Handling Untranslatable Strings (#1133)

2024-04-28 00:26:12 +02:00
parent 4fea8d10f8
commit 318076254d
4 changed files with 279 additions and 39 deletions
@@ -10,49 +10,77 @@ Author: Ludy87
 Example:
    To use this script, simply run it from command line:
        $ python counter_translation.py
-"""
-import os
+"""  # noqa: D205
+
 import glob
+import os
 import re
-from typing import List, Tuple
+
+import tomlkit
+import tomlkit.toml_file


-def write_readme(progress_list: List[Tuple[str, int]]) -> None:
-    """
-    Updates the progress status in the README.md file based
+def convert_to_multiline(data: tomlkit.TOMLDocument) -> tomlkit.TOMLDocument:
+    """Converts 'ignore' and 'missing' arrays to multiline arrays and sorts the first-level keys of the TOML document.
+    Enhances readability and consistency in the TOML file by ensuring arrays contain unique and sorted entries.
+
+    Parameters:
+        data (tomlkit.TOMLDocument): The original TOML document containing the data.
+
+    Returns:
+        tomlkit.TOMLDocument: A new TOML document with sorted keys and properly formatted arrays.
+    """  # noqa: D205
+    sorted_data = tomlkit.document()
+    for key in sorted(data.keys()):
+        value = data[key]
+        if isinstance(value, dict):
+            new_table = tomlkit.table()
+            for subkey in ("ignore", "missing"):
+                if subkey in value:
+                    # Convert the list to a set to remove duplicates, sort it, and convert to multiline for readability
+                    unique_sorted_array = sorted(set(value[subkey]))
+                    array = tomlkit.array()
+                    array.multiline(True)
+                    for item in unique_sorted_array:
+                        array.append(item)
+                    new_table[subkey] = array
+            sorted_data[key] = new_table
+        else:
+            # Add other types of data unchanged
+            sorted_data[key] = value
+    return sorted_data
+
+
+def write_readme(progress_list: list[tuple[str, int]]) -> None:
+    """Updates the progress status in the README.md file based
    on the provided progress list.

    Parameters:
-        progress_list (List[Tuple[str, int]]): A list of tuples containing
+        progress_list (list[tuple[str, int]]): A list of tuples containing
        language and progress percentage.

    Returns:
        None
-    """
-    with open("README.md", "r", encoding="utf-8") as file:
-        content = file.read()
+    """  # noqa: D205
+    with open("README.md", encoding="utf-8") as file:
+        content = file.readlines()

-    lines = content.split("\n")
-    for i, line in enumerate(lines[2:], start=2):
+    for i, line in enumerate(content[2:], start=2):
        for progress in progress_list:
            language, value = progress
            if language in line:
-                match = re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line)
-                if match:
-                    lines[i] = line.replace(
+                if match := re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line):
+                    content[i] = line.replace(
                        match.group(0),
                        f"![{value}%](https://geps.dev/progress/{value})",
                    )

-    new_content = "\n".join(lines)
-
    with open("README.md", "w", encoding="utf-8") as file:
-        file.write(new_content)
+        file.writelines(content)


-def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
-    """
-    Compares the default properties file with other
+def compare_files(default_file_path, file_paths, translation_status_file) -> list[tuple[str, int]]:
+    """Compares the default properties file with other
    properties files in the directory.

    Parameters:
@@ -60,20 +88,22 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
        files_directory (str): The directory containing other properties files.

    Returns:
-        List[Tuple[str, int]]: A list of tuples containing
+        list[tuple[str, int]]: A list of tuples containing
        language and progress percentage.
-    """
-    file_paths = glob.glob(os.path.join(files_directory, "messages_*.properties"))
-    num_lines = sum(1 for _ in open(default_file_path, encoding="utf-8"))
+    """  # noqa: D205
+    num_lines = sum(
+        1 for line in open(default_file_path, encoding="utf-8") if line.strip() and not line.strip().startswith("#")
+    )

    result_list = []
+    sort_translation_status: tomlkit.TOMLDocument
+
+    # read toml
+    with open(translation_status_file, encoding="utf-8") as f:
+        sort_translation_status = tomlkit.parse(f.read())

    for file_path in file_paths:
-        language = (
-            os.path.basename(file_path)
-            .split("messages_", 1)[1]
-            .split(".properties", 1)[0]
-        )
+        language = os.path.basename(file_path).split("messages_", 1)[1].split(".properties", 1)[0]

        fails = 0
        if "en_GB" in language or "en_US" in language:
@@ -81,9 +111,21 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
            result_list.append(("en_US", 100))
            continue

-        with open(default_file_path, "r", encoding="utf-8") as default_file, open(
-            file_path, "r", encoding="utf-8"
-        ) as file:
+        if language not in sort_translation_status:
+            sort_translation_status[language] = tomlkit.table()
+
+        if (
+            "ignore" not in sort_translation_status[language]
+            or len(sort_translation_status[language].get("ignore", [])) < 1
+        ):
+            sort_translation_status[language]["ignore"] = tomlkit.array(["language.direction"])
+
+        # if "missing" not in sort_translation_status[language]:
+        #     sort_translation_status[language]["missing"] = tomlkit.array()
+        # elif "language.direction" in sort_translation_status[language]["missing"]:
+        #     sort_translation_status[language]["missing"].remove("language.direction")
+
+        with open(default_file_path, encoding="utf-8") as default_file, open(file_path, encoding="utf-8") as file:
            for _ in range(5):
                next(default_file)
                try:
@@ -91,24 +133,47 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
                except StopIteration:
                    fails = num_lines

-            for _, (line_default, line_file) in enumerate(
-                zip(default_file, file), start=6
-            ):
+            for line_num, (line_default, line_file) in enumerate(zip(default_file, file), start=6):
                try:
+                    # Ignoring empty lines and lines start with #
+                    if line_default.strip() == "" or line_default.startswith("#"):
+                        continue
+
+                    default_key, default_value = line_default.split("=", 1)
+                    file_key, file_value = line_file.split("=", 1)
                    if (
-                        line_default.split("=", 1)[1].strip()
-                        == line_file.split("=", 1)[1].strip()
+                        default_value.strip() == file_value.strip()
+                        and default_key.strip() not in sort_translation_status[language]["ignore"]
                    ):
+                        print(f"{language}: Line {line_num} is missing the translation.")
+                        # if default_key.strip() not in sort_translation_status[language]["missing"]:
+                        #     missing_array = tomlkit.array()
+                        #     missing_array.append(default_key.strip())
+                        #     missing_array.multiline(True)
+                        #     sort_translation_status[language]["missing"].extend(missing_array)
                        fails += 1
+                    # elif default_key.strip() in sort_translation_status[language]["ignore"]:
+                    #     if default_key.strip() in sort_translation_status[language]["missing"]:
+                    #         sort_translation_status[language]["missing"].remove(default_key.strip())
+                    if default_value.strip() != file_value.strip():
+                        # if default_key.strip() in sort_translation_status[language]["missing"]:
+                        #     sort_translation_status[language]["missing"].remove(default_key.strip())
+                        if default_key.strip() in sort_translation_status[language]["ignore"]:
+                            sort_translation_status[language]["ignore"].remove(default_key.strip())
+
                except IndexError:
                    pass

+        print(f"{language}: {fails} out of {num_lines} lines are not translated.")
        result_list.append(
            (
                language,
                int((num_lines - fails) * 100 / num_lines),
            )
        )
+    translation_status = convert_to_multiline(sort_translation_status)
+    with open(translation_status_file, "w", encoding="utf-8") as file:
+        file.write(tomlkit.dumps(translation_status))

    unique_data = list(set(result_list))
    unique_data.sort(key=lambda x: x[1], reverse=True)
@@ -118,5 +183,10 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:

 if __name__ == "__main__":
    directory = os.path.join(os.getcwd(), "src", "main", "resources")
+    messages_file_paths = glob.glob(os.path.join(directory, "messages_*.properties"))
    reference_file = os.path.join(directory, "messages_en_GB.properties")
-    write_readme(compare_files(reference_file, directory))
+
+    scripts_directory = os.path.join(os.getcwd(), "scripts")
+    translation_state_file = os.path.join(scripts_directory, "translation_status.toml")
+
+    write_readme(compare_files(reference_file, messages_file_paths, translation_state_file))
@@ -0,0 +1,154 @@
+[ar_AR]
+ignore = [
+    'language.direction',
+]
+
+[bg_BG]
+ignore = [
+    'language.direction',
+]
+
+[ca_CA]
+ignore = [
+    'language.direction',
+]
+
+[de_DE]
+ignore = [
+    'AddStampRequest.alphabet',
+    'AddStampRequest.position',
+    'PDFToBook.selectText.1',
+    'PDFToText.tags',
+    'addPageNumbers.selectText.3',
+    'alphabet',
+    'certSign.name',
+    'language.direction',
+    'licenses.version',
+    'pipeline.title',
+    'pipelineOptions.pipelineHeader',
+    'sponsor',
+    'text',
+    'watermark.type.1',
+]
+
+[el_GR]
+ignore = [
+    'language.direction',
+]
+
+[es_ES]
+ignore = [
+    'adminUserSettings.roles',
+    'color',
+    'language.direction',
+    'no',
+    'showJS.tags',
+]
+
+[eu_ES]
+ignore = [
+    'language.direction',
+]
+
+[fr_FR]
+ignore = [
+    'language.direction',
+]
+
+[hi_IN]
+ignore = [
+    'language.direction',
+]
+
+[hu_HU]
+ignore = [
+    'language.direction',
+]
+
+[id_ID]
+ignore = [
+    'language.direction',
+]
+
+[it_IT]
+ignore = [
+    'font',
+    'language.direction',
+    'no',
+    'password',
+    'pipeline.title',
+    'pipelineOptions.pipelineHeader',
+    'removePassword.selectText.2',
+    'showJS.tags',
+    'sponsor',
+]
+
+[ja_JP]
+ignore = [
+    'language.direction',
+]
+
+[ko_KR]
+ignore = [
+    'language.direction',
+]
+
+[nl_NL]
+ignore = [
+    'language.direction',
+]
+
+[pl_PL]
+ignore = [
+    'language.direction',
+]
+
+[pt_BR]
+ignore = [
+    'language.direction',
+]
+
+[pt_PT]
+ignore = [
+    'language.direction',
+]
+
+[ro_RO]
+ignore = [
+    'language.direction',
+]
+
+[ru_RU]
+ignore = [
+    'language.direction',
+]
+
+[sr_LATN_RS]
+ignore = [
+    'language.direction',
+]
+
+[sv_SE]
+ignore = [
+    'language.direction',
+]
+
+[tr_TR]
+ignore = [
+    'language.direction',
+]
+
+[uk_UA]
+ignore = [
+    'language.direction',
+]
+
+[zh_CN]
+ignore = [
+    'language.direction',
+]
+
+[zh_TW]
+ignore = [
+    'language.direction',
+]