Handling Untranslatable Strings (#1133)

This commit is contained in:
Ludy
2024-04-28 00:26:12 +02:00
committed by GitHub
parent 4fea8d10f8
commit 318076254d
4 changed files with 279 additions and 39 deletions

View File

@@ -10,49 +10,77 @@ Author: Ludy87
Example:
To use this script, simply run it from command line:
$ python counter_translation.py
"""
import os
""" # noqa: D205
import glob
import os
import re
from typing import List, Tuple
import tomlkit
import tomlkit.toml_file
def write_readme(progress_list: List[Tuple[str, int]]) -> None:
"""
Updates the progress status in the README.md file based
def convert_to_multiline(data: tomlkit.TOMLDocument) -> tomlkit.TOMLDocument:
"""Converts 'ignore' and 'missing' arrays to multiline arrays and sorts the first-level keys of the TOML document.
Enhances readability and consistency in the TOML file by ensuring arrays contain unique and sorted entries.
Parameters:
data (tomlkit.TOMLDocument): The original TOML document containing the data.
Returns:
tomlkit.TOMLDocument: A new TOML document with sorted keys and properly formatted arrays.
""" # noqa: D205
sorted_data = tomlkit.document()
for key in sorted(data.keys()):
value = data[key]
if isinstance(value, dict):
new_table = tomlkit.table()
for subkey in ("ignore", "missing"):
if subkey in value:
# Convert the list to a set to remove duplicates, sort it, and convert to multiline for readability
unique_sorted_array = sorted(set(value[subkey]))
array = tomlkit.array()
array.multiline(True)
for item in unique_sorted_array:
array.append(item)
new_table[subkey] = array
sorted_data[key] = new_table
else:
# Add other types of data unchanged
sorted_data[key] = value
return sorted_data
def write_readme(progress_list: list[tuple[str, int]]) -> None:
"""Updates the progress status in the README.md file based
on the provided progress list.
Parameters:
progress_list (List[Tuple[str, int]]): A list of tuples containing
progress_list (list[tuple[str, int]]): A list of tuples containing
language and progress percentage.
Returns:
None
"""
with open("README.md", "r", encoding="utf-8") as file:
content = file.read()
""" # noqa: D205
with open("README.md", encoding="utf-8") as file:
content = file.readlines()
lines = content.split("\n")
for i, line in enumerate(lines[2:], start=2):
for i, line in enumerate(content[2:], start=2):
for progress in progress_list:
language, value = progress
if language in line:
match = re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line)
if match:
lines[i] = line.replace(
if match := re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line):
content[i] = line.replace(
match.group(0),
f"![{value}%](https://geps.dev/progress/{value})",
)
new_content = "\n".join(lines)
with open("README.md", "w", encoding="utf-8") as file:
file.write(new_content)
file.writelines(content)
def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
"""
Compares the default properties file with other
def compare_files(default_file_path, file_paths, translation_status_file) -> list[tuple[str, int]]:
"""Compares the default properties file with other
properties files in the directory.
Parameters:
@@ -60,20 +88,22 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
files_directory (str): The directory containing other properties files.
Returns:
List[Tuple[str, int]]: A list of tuples containing
list[tuple[str, int]]: A list of tuples containing
language and progress percentage.
"""
file_paths = glob.glob(os.path.join(files_directory, "messages_*.properties"))
num_lines = sum(1 for _ in open(default_file_path, encoding="utf-8"))
""" # noqa: D205
num_lines = sum(
1 for line in open(default_file_path, encoding="utf-8") if line.strip() and not line.strip().startswith("#")
)
result_list = []
sort_translation_status: tomlkit.TOMLDocument
# read toml
with open(translation_status_file, encoding="utf-8") as f:
sort_translation_status = tomlkit.parse(f.read())
for file_path in file_paths:
language = (
os.path.basename(file_path)
.split("messages_", 1)[1]
.split(".properties", 1)[0]
)
language = os.path.basename(file_path).split("messages_", 1)[1].split(".properties", 1)[0]
fails = 0
if "en_GB" in language or "en_US" in language:
@@ -81,9 +111,21 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
result_list.append(("en_US", 100))
continue
with open(default_file_path, "r", encoding="utf-8") as default_file, open(
file_path, "r", encoding="utf-8"
) as file:
if language not in sort_translation_status:
sort_translation_status[language] = tomlkit.table()
if (
"ignore" not in sort_translation_status[language]
or len(sort_translation_status[language].get("ignore", [])) < 1
):
sort_translation_status[language]["ignore"] = tomlkit.array(["language.direction"])
# if "missing" not in sort_translation_status[language]:
# sort_translation_status[language]["missing"] = tomlkit.array()
# elif "language.direction" in sort_translation_status[language]["missing"]:
# sort_translation_status[language]["missing"].remove("language.direction")
with open(default_file_path, encoding="utf-8") as default_file, open(file_path, encoding="utf-8") as file:
for _ in range(5):
next(default_file)
try:
@@ -91,24 +133,47 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
except StopIteration:
fails = num_lines
for _, (line_default, line_file) in enumerate(
zip(default_file, file), start=6
):
for line_num, (line_default, line_file) in enumerate(zip(default_file, file), start=6):
try:
# Ignoring empty lines and lines start with #
if line_default.strip() == "" or line_default.startswith("#"):
continue
default_key, default_value = line_default.split("=", 1)
file_key, file_value = line_file.split("=", 1)
if (
line_default.split("=", 1)[1].strip()
== line_file.split("=", 1)[1].strip()
default_value.strip() == file_value.strip()
and default_key.strip() not in sort_translation_status[language]["ignore"]
):
print(f"{language}: Line {line_num} is missing the translation.")
# if default_key.strip() not in sort_translation_status[language]["missing"]:
# missing_array = tomlkit.array()
# missing_array.append(default_key.strip())
# missing_array.multiline(True)
# sort_translation_status[language]["missing"].extend(missing_array)
fails += 1
# elif default_key.strip() in sort_translation_status[language]["ignore"]:
# if default_key.strip() in sort_translation_status[language]["missing"]:
# sort_translation_status[language]["missing"].remove(default_key.strip())
if default_value.strip() != file_value.strip():
# if default_key.strip() in sort_translation_status[language]["missing"]:
# sort_translation_status[language]["missing"].remove(default_key.strip())
if default_key.strip() in sort_translation_status[language]["ignore"]:
sort_translation_status[language]["ignore"].remove(default_key.strip())
except IndexError:
pass
print(f"{language}: {fails} out of {num_lines} lines are not translated.")
result_list.append(
(
language,
int((num_lines - fails) * 100 / num_lines),
)
)
translation_status = convert_to_multiline(sort_translation_status)
with open(translation_status_file, "w", encoding="utf-8") as file:
file.write(tomlkit.dumps(translation_status))
unique_data = list(set(result_list))
unique_data.sort(key=lambda x: x[1], reverse=True)
@@ -118,5 +183,10 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
if __name__ == "__main__":
directory = os.path.join(os.getcwd(), "src", "main", "resources")
messages_file_paths = glob.glob(os.path.join(directory, "messages_*.properties"))
reference_file = os.path.join(directory, "messages_en_GB.properties")
write_readme(compare_files(reference_file, directory))
scripts_directory = os.path.join(os.getcwd(), "scripts")
translation_state_file = os.path.join(scripts_directory, "translation_status.toml")
write_readme(compare_files(reference_file, messages_file_paths, translation_state_file))

View File

@@ -0,0 +1,154 @@
[ar_AR]
ignore = [
'language.direction',
]
[bg_BG]
ignore = [
'language.direction',
]
[ca_CA]
ignore = [
'language.direction',
]
[de_DE]
ignore = [
'AddStampRequest.alphabet',
'AddStampRequest.position',
'PDFToBook.selectText.1',
'PDFToText.tags',
'addPageNumbers.selectText.3',
'alphabet',
'certSign.name',
'language.direction',
'licenses.version',
'pipeline.title',
'pipelineOptions.pipelineHeader',
'sponsor',
'text',
'watermark.type.1',
]
[el_GR]
ignore = [
'language.direction',
]
[es_ES]
ignore = [
'adminUserSettings.roles',
'color',
'language.direction',
'no',
'showJS.tags',
]
[eu_ES]
ignore = [
'language.direction',
]
[fr_FR]
ignore = [
'language.direction',
]
[hi_IN]
ignore = [
'language.direction',
]
[hu_HU]
ignore = [
'language.direction',
]
[id_ID]
ignore = [
'language.direction',
]
[it_IT]
ignore = [
'font',
'language.direction',
'no',
'password',
'pipeline.title',
'pipelineOptions.pipelineHeader',
'removePassword.selectText.2',
'showJS.tags',
'sponsor',
]
[ja_JP]
ignore = [
'language.direction',
]
[ko_KR]
ignore = [
'language.direction',
]
[nl_NL]
ignore = [
'language.direction',
]
[pl_PL]
ignore = [
'language.direction',
]
[pt_BR]
ignore = [
'language.direction',
]
[pt_PT]
ignore = [
'language.direction',
]
[ro_RO]
ignore = [
'language.direction',
]
[ru_RU]
ignore = [
'language.direction',
]
[sr_LATN_RS]
ignore = [
'language.direction',
]
[sv_SE]
ignore = [
'language.direction',
]
[tr_TR]
ignore = [
'language.direction',
]
[uk_UA]
ignore = [
'language.direction',
]
[zh_CN]
ignore = [
'language.direction',
]
[zh_TW]
ignore = [
'language.direction',
]