Handling Untranslatable Strings (#1133)
This commit is contained in:
@@ -10,49 +10,77 @@ Author: Ludy87
|
||||
Example:
|
||||
To use this script, simply run it from command line:
|
||||
$ python counter_translation.py
|
||||
"""
|
||||
import os
|
||||
""" # noqa: D205
|
||||
|
||||
import glob
|
||||
import os
|
||||
import re
|
||||
from typing import List, Tuple
|
||||
|
||||
import tomlkit
|
||||
import tomlkit.toml_file
|
||||
|
||||
|
||||
def write_readme(progress_list: List[Tuple[str, int]]) -> None:
|
||||
"""
|
||||
Updates the progress status in the README.md file based
|
||||
def convert_to_multiline(data: tomlkit.TOMLDocument) -> tomlkit.TOMLDocument:
|
||||
"""Converts 'ignore' and 'missing' arrays to multiline arrays and sorts the first-level keys of the TOML document.
|
||||
Enhances readability and consistency in the TOML file by ensuring arrays contain unique and sorted entries.
|
||||
|
||||
Parameters:
|
||||
data (tomlkit.TOMLDocument): The original TOML document containing the data.
|
||||
|
||||
Returns:
|
||||
tomlkit.TOMLDocument: A new TOML document with sorted keys and properly formatted arrays.
|
||||
""" # noqa: D205
|
||||
sorted_data = tomlkit.document()
|
||||
for key in sorted(data.keys()):
|
||||
value = data[key]
|
||||
if isinstance(value, dict):
|
||||
new_table = tomlkit.table()
|
||||
for subkey in ("ignore", "missing"):
|
||||
if subkey in value:
|
||||
# Convert the list to a set to remove duplicates, sort it, and convert to multiline for readability
|
||||
unique_sorted_array = sorted(set(value[subkey]))
|
||||
array = tomlkit.array()
|
||||
array.multiline(True)
|
||||
for item in unique_sorted_array:
|
||||
array.append(item)
|
||||
new_table[subkey] = array
|
||||
sorted_data[key] = new_table
|
||||
else:
|
||||
# Add other types of data unchanged
|
||||
sorted_data[key] = value
|
||||
return sorted_data
|
||||
|
||||
|
||||
def write_readme(progress_list: list[tuple[str, int]]) -> None:
|
||||
"""Updates the progress status in the README.md file based
|
||||
on the provided progress list.
|
||||
|
||||
Parameters:
|
||||
progress_list (List[Tuple[str, int]]): A list of tuples containing
|
||||
progress_list (list[tuple[str, int]]): A list of tuples containing
|
||||
language and progress percentage.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
with open("README.md", "r", encoding="utf-8") as file:
|
||||
content = file.read()
|
||||
""" # noqa: D205
|
||||
with open("README.md", encoding="utf-8") as file:
|
||||
content = file.readlines()
|
||||
|
||||
lines = content.split("\n")
|
||||
for i, line in enumerate(lines[2:], start=2):
|
||||
for i, line in enumerate(content[2:], start=2):
|
||||
for progress in progress_list:
|
||||
language, value = progress
|
||||
if language in line:
|
||||
match = re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line)
|
||||
if match:
|
||||
lines[i] = line.replace(
|
||||
if match := re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line):
|
||||
content[i] = line.replace(
|
||||
match.group(0),
|
||||
f"",
|
||||
)
|
||||
|
||||
new_content = "\n".join(lines)
|
||||
|
||||
with open("README.md", "w", encoding="utf-8") as file:
|
||||
file.write(new_content)
|
||||
file.writelines(content)
|
||||
|
||||
|
||||
def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
|
||||
"""
|
||||
Compares the default properties file with other
|
||||
def compare_files(default_file_path, file_paths, translation_status_file) -> list[tuple[str, int]]:
|
||||
"""Compares the default properties file with other
|
||||
properties files in the directory.
|
||||
|
||||
Parameters:
|
||||
@@ -60,20 +88,22 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
|
||||
files_directory (str): The directory containing other properties files.
|
||||
|
||||
Returns:
|
||||
List[Tuple[str, int]]: A list of tuples containing
|
||||
list[tuple[str, int]]: A list of tuples containing
|
||||
language and progress percentage.
|
||||
"""
|
||||
file_paths = glob.glob(os.path.join(files_directory, "messages_*.properties"))
|
||||
num_lines = sum(1 for _ in open(default_file_path, encoding="utf-8"))
|
||||
""" # noqa: D205
|
||||
num_lines = sum(
|
||||
1 for line in open(default_file_path, encoding="utf-8") if line.strip() and not line.strip().startswith("#")
|
||||
)
|
||||
|
||||
result_list = []
|
||||
sort_translation_status: tomlkit.TOMLDocument
|
||||
|
||||
# read toml
|
||||
with open(translation_status_file, encoding="utf-8") as f:
|
||||
sort_translation_status = tomlkit.parse(f.read())
|
||||
|
||||
for file_path in file_paths:
|
||||
language = (
|
||||
os.path.basename(file_path)
|
||||
.split("messages_", 1)[1]
|
||||
.split(".properties", 1)[0]
|
||||
)
|
||||
language = os.path.basename(file_path).split("messages_", 1)[1].split(".properties", 1)[0]
|
||||
|
||||
fails = 0
|
||||
if "en_GB" in language or "en_US" in language:
|
||||
@@ -81,9 +111,21 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
|
||||
result_list.append(("en_US", 100))
|
||||
continue
|
||||
|
||||
with open(default_file_path, "r", encoding="utf-8") as default_file, open(
|
||||
file_path, "r", encoding="utf-8"
|
||||
) as file:
|
||||
if language not in sort_translation_status:
|
||||
sort_translation_status[language] = tomlkit.table()
|
||||
|
||||
if (
|
||||
"ignore" not in sort_translation_status[language]
|
||||
or len(sort_translation_status[language].get("ignore", [])) < 1
|
||||
):
|
||||
sort_translation_status[language]["ignore"] = tomlkit.array(["language.direction"])
|
||||
|
||||
# if "missing" not in sort_translation_status[language]:
|
||||
# sort_translation_status[language]["missing"] = tomlkit.array()
|
||||
# elif "language.direction" in sort_translation_status[language]["missing"]:
|
||||
# sort_translation_status[language]["missing"].remove("language.direction")
|
||||
|
||||
with open(default_file_path, encoding="utf-8") as default_file, open(file_path, encoding="utf-8") as file:
|
||||
for _ in range(5):
|
||||
next(default_file)
|
||||
try:
|
||||
@@ -91,24 +133,47 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
|
||||
except StopIteration:
|
||||
fails = num_lines
|
||||
|
||||
for _, (line_default, line_file) in enumerate(
|
||||
zip(default_file, file), start=6
|
||||
):
|
||||
for line_num, (line_default, line_file) in enumerate(zip(default_file, file), start=6):
|
||||
try:
|
||||
# Ignoring empty lines and lines start with #
|
||||
if line_default.strip() == "" or line_default.startswith("#"):
|
||||
continue
|
||||
|
||||
default_key, default_value = line_default.split("=", 1)
|
||||
file_key, file_value = line_file.split("=", 1)
|
||||
if (
|
||||
line_default.split("=", 1)[1].strip()
|
||||
== line_file.split("=", 1)[1].strip()
|
||||
default_value.strip() == file_value.strip()
|
||||
and default_key.strip() not in sort_translation_status[language]["ignore"]
|
||||
):
|
||||
print(f"{language}: Line {line_num} is missing the translation.")
|
||||
# if default_key.strip() not in sort_translation_status[language]["missing"]:
|
||||
# missing_array = tomlkit.array()
|
||||
# missing_array.append(default_key.strip())
|
||||
# missing_array.multiline(True)
|
||||
# sort_translation_status[language]["missing"].extend(missing_array)
|
||||
fails += 1
|
||||
# elif default_key.strip() in sort_translation_status[language]["ignore"]:
|
||||
# if default_key.strip() in sort_translation_status[language]["missing"]:
|
||||
# sort_translation_status[language]["missing"].remove(default_key.strip())
|
||||
if default_value.strip() != file_value.strip():
|
||||
# if default_key.strip() in sort_translation_status[language]["missing"]:
|
||||
# sort_translation_status[language]["missing"].remove(default_key.strip())
|
||||
if default_key.strip() in sort_translation_status[language]["ignore"]:
|
||||
sort_translation_status[language]["ignore"].remove(default_key.strip())
|
||||
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
print(f"{language}: {fails} out of {num_lines} lines are not translated.")
|
||||
result_list.append(
|
||||
(
|
||||
language,
|
||||
int((num_lines - fails) * 100 / num_lines),
|
||||
)
|
||||
)
|
||||
translation_status = convert_to_multiline(sort_translation_status)
|
||||
with open(translation_status_file, "w", encoding="utf-8") as file:
|
||||
file.write(tomlkit.dumps(translation_status))
|
||||
|
||||
unique_data = list(set(result_list))
|
||||
unique_data.sort(key=lambda x: x[1], reverse=True)
|
||||
@@ -118,5 +183,10 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
|
||||
|
||||
if __name__ == "__main__":
|
||||
directory = os.path.join(os.getcwd(), "src", "main", "resources")
|
||||
messages_file_paths = glob.glob(os.path.join(directory, "messages_*.properties"))
|
||||
reference_file = os.path.join(directory, "messages_en_GB.properties")
|
||||
write_readme(compare_files(reference_file, directory))
|
||||
|
||||
scripts_directory = os.path.join(os.getcwd(), "scripts")
|
||||
translation_state_file = os.path.join(scripts_directory, "translation_status.toml")
|
||||
|
||||
write_readme(compare_files(reference_file, messages_file_paths, translation_state_file))
|
||||
|
||||
154
scripts/translation_status.toml
Normal file
154
scripts/translation_status.toml
Normal file
@@ -0,0 +1,154 @@
|
||||
[ar_AR]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[bg_BG]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[ca_CA]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[de_DE]
|
||||
ignore = [
|
||||
'AddStampRequest.alphabet',
|
||||
'AddStampRequest.position',
|
||||
'PDFToBook.selectText.1',
|
||||
'PDFToText.tags',
|
||||
'addPageNumbers.selectText.3',
|
||||
'alphabet',
|
||||
'certSign.name',
|
||||
'language.direction',
|
||||
'licenses.version',
|
||||
'pipeline.title',
|
||||
'pipelineOptions.pipelineHeader',
|
||||
'sponsor',
|
||||
'text',
|
||||
'watermark.type.1',
|
||||
]
|
||||
|
||||
[el_GR]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[es_ES]
|
||||
ignore = [
|
||||
'adminUserSettings.roles',
|
||||
'color',
|
||||
'language.direction',
|
||||
'no',
|
||||
'showJS.tags',
|
||||
]
|
||||
|
||||
[eu_ES]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[fr_FR]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[hi_IN]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[hu_HU]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[id_ID]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[it_IT]
|
||||
ignore = [
|
||||
'font',
|
||||
'language.direction',
|
||||
'no',
|
||||
'password',
|
||||
'pipeline.title',
|
||||
'pipelineOptions.pipelineHeader',
|
||||
'removePassword.selectText.2',
|
||||
'showJS.tags',
|
||||
'sponsor',
|
||||
]
|
||||
|
||||
[ja_JP]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[ko_KR]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[nl_NL]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[pl_PL]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[pt_BR]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[pt_PT]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[ro_RO]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[ru_RU]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[sr_LATN_RS]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[sv_SE]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[tr_TR]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[uk_UA]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[zh_CN]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
[zh_TW]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
]
|
||||
Reference in New Issue
Block a user