From 05add001fbb6ef7fa6d19c2c19f95d122d3710e7 Mon Sep 17 00:00:00 2001 From: Ludy Date: Tue, 21 Jan 2025 12:09:01 +0100 Subject: [PATCH] clean up and more (#2756) # Description of Changes This PR introduces multiple updates across various files and workflows: ### **What was changed:** 1. **Deleted Scripts:** - `check_duplicates.py`: Removed script that checked for duplicate keys in properties files. - `check_tabulator.py`: Removed script that ensured no tabulators existed in HTML, CSS, or JS files. 2. **Updated GitHub Actions Workflow (`pre_commit.yml`):** - Added a weekly schedule trigger (`cron`) for the pre-commit workflow. - Updated the `create-pull-request` action to exclude certain files (`.github/workflows/.*`) from formatting. - Improved detection and handling of staged changes during commit creation. 3. **`.pre-commit-config.yaml`:** - Adjusted regex for file matching in `ruff` and `codespell` hooks to ensure better file filtering. - Removed local hooks that relied on deleted scripts. 4. **Scripts (`counter_translation.py`):** - Updated file writing methods to enforce consistent newline characters (`newline="\n"`). ### **Why the change was made:** - To simplify the repository by removing unnecessary or outdated scripts (`check_duplicates.py` and `check_tabulator.py`). - To enhance the workflow automation by introducing a scheduled run for pre-commit checks. - To improve code formatting and file consistency by addressing newline character issues and refining file exclusions in `pre-commit`. ### **Challenges encountered:** - Ensuring that all references to deleted scripts were properly removed from configuration files. - Verifying that workflow and pre-commit changes do not introduce regressions in existing automation. Closes # (issue_number) --- ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [x] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [x] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --- .github/scripts/check_duplicates.py | 51 ----------------- .github/scripts/check_tabulator.py | 85 ----------------------------- .github/workflows/pre_commit.yml | 2 + .pre-commit-config.yaml | 26 ++------- scripts/counter_translation.py | 4 +- 5 files changed, 9 insertions(+), 159 deletions(-) delete mode 100644 .github/scripts/check_duplicates.py delete mode 100644 .github/scripts/check_tabulator.py diff --git a/.github/scripts/check_duplicates.py b/.github/scripts/check_duplicates.py deleted file mode 100644 index 9c919414..00000000 --- a/.github/scripts/check_duplicates.py +++ /dev/null @@ -1,51 +0,0 @@ -import sys - - -def find_duplicate_keys(file_path): - """ - Finds duplicate keys in a properties file and returns their occurrences. - - This function reads a properties file, identifies any keys that occur more than - once, and returns a dictionary with these keys and the line numbers of their occurrences. - - Parameters: - file_path (str): The path to the properties file to be checked. - - Returns: - dict: A dictionary where each key is a duplicated key in the file, and the value is a list - of line numbers where the key occurs. - """ - with open(file_path, "r", encoding="utf-8") as file: - lines = file.readlines() - - keys = {} - duplicates = {} - - for line_number, line in enumerate(lines, start=1): - line = line.strip() - if line and not line.startswith("#") and "=" in line: - key = line.split("=", 1)[0].strip() - if key in keys: - # If the key already exists, add the current line number - duplicates.setdefault(key, []).append(line_number) - # Also add the first instance of the key if not already done - if keys[key] not in duplicates[key]: - duplicates[key].insert(0, keys[key]) - else: - # Store the line number of the first instance of the key - keys[key] = line_number - - return duplicates - - -if __name__ == "__main__": - failed = False - for ar in sys.argv[1:]: - duplicates = find_duplicate_keys(ar) - if duplicates: - for key, lines in duplicates.items(): - lines_str = ", ".join(map(str, lines)) - print(f"{key} duplicated in {ar} on lines {lines_str}") - failed = True - if failed: - sys.exit(1) diff --git a/.github/scripts/check_tabulator.py b/.github/scripts/check_tabulator.py deleted file mode 100644 index dea57092..00000000 --- a/.github/scripts/check_tabulator.py +++ /dev/null @@ -1,85 +0,0 @@ -"""check_tabulator.py""" - -import argparse -import sys - - -def check_tabs(file_path): - """ - Checks for tabs in the specified file. - - Args: - file_path (str): The path to the file to be checked. - - Returns: - bool: True if tabs are found, False otherwise. - """ - with open(file_path, "r", encoding="utf-8") as file: - content = file.read() - - if "\t" in content: - print(f"Tab found in {file_path}") - return True - return False - - -def replace_tabs_with_spaces(file_path, replace_with=" "): - """ - Replaces tabs with a specified number of spaces in the file. - - Args: - file_path (str): The path to the file where tabs will be replaced. - replace_with (str): The character(s) to replace tabs with. Defaults to two spaces. - """ - with open(file_path, "r", encoding="utf-8") as file: - content = file.read() - - updated_content = content.replace("\t", replace_with) - - with open(file_path, "w", encoding="utf-8") as file: - file.write(updated_content) - - -def main(): - """ - Main function to replace tabs with spaces in the provided files. - The replacement character and files to check are taken from command line arguments. - """ - # Create ArgumentParser instance - parser = argparse.ArgumentParser( - description="Replace tabs in files with specified characters." - ) - - # Define optional argument `--replace_with` - parser.add_argument( - "--replace_with", - default=" ", - help="Character(s) to replace tabs with. Default is two spaces.", - ) - - # Define argument for file paths - parser.add_argument("files", metavar="FILE", nargs="+", help="Files to process.") - - # Parse arguments - args = parser.parse_args() - - # Extract replacement characters and files from the parsed arguments - replace_with = args.replace_with - files_checked = args.files - - error = False - - for file_path in files_checked: - if check_tabs(file_path): - replace_tabs_with_spaces(file_path, replace_with) - error = True - - if error: - print("Error: Originally found tabs in HTML files, now replaced.") - sys.exit(1) - - sys.exit(0) - - -if __name__ == "__main__": - main() diff --git a/.github/workflows/pre_commit.yml b/.github/workflows/pre_commit.yml index e0d94e6a..b71bba01 100644 --- a/.github/workflows/pre_commit.yml +++ b/.github/workflows/pre_commit.yml @@ -2,6 +2,8 @@ name: Pre-commit on: workflow_dispatch: + schedule: + - cron: "0 0 * * 1" permissions: contents: read diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5256f897..9edd51a9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,10 +6,10 @@ repos: args: - --fix - --line-length=127 - files: ^((.github/scripts|scripts)/.+)?[^/]+\.py$ + files: ^((\.github/scripts|scripts)/.+)?[^/]+\.py$ exclude: (split_photos.py) - id: ruff-format - files: ^((.github/scripts|scripts)/.+)?[^/]+\.py$ + files: ^((\.github/scripts|scripts)/.+)?[^/]+\.py$ exclude: (split_photos.py) - repo: https://github.com/codespell-project/codespell rev: v2.3.0 @@ -19,7 +19,7 @@ repos: - --ignore-words-list= - --skip="./.*,*.csv,*.json,*.ambr" - --quiet-level=2 - files: \.(properties|html|css|js|py|md)$ + files: \.(html|css|js|py|md)$ exclude: (.vscode|.devcontainer|src/main/resources|Dockerfile|.*/pdfjs.*|.*/thirdParty.*|bootstrap.*|.*\.min\..*|.*diff\.js) - repo: https://github.com/gitleaks/gitleaks rev: v8.22.0 @@ -35,23 +35,7 @@ repos: hooks: - id: end-of-file-fixer files: ^.*(\.js|\.java|\.py|\.yml)$ - exclude: ^(.*/pdfjs.*|.*/thirdParty.*|bootstrap.*|.*\.min\..*|.*diff\.js$) + exclude: ^(.*/pdfjs.*|.*/thirdParty.*|bootstrap.*|.*\.min\..*|.*diff\.js|\.github/workflows/.*$) - id: trailing-whitespace files: ^.*(\.js|\.java|\.py|\.yml)$ - exclude: ^(.*/pdfjs.*|.*/thirdParty.*|bootstrap.*|.*\.min\..*|.*diff\.js$) - - - repo: local - hooks: - - id: check-duplicate-properties-keys - name: Check Duplicate Properties Keys - entry: python .github/scripts/check_duplicates.py - language: python - files: ^(src)/.+\.properties$ - - id: check-html-tabs - name: Check HTML for tabs - description: Ensures HTML/CSS/JS files do not contain tab characters - # args: ["--replace_with= "] - entry: python .github/scripts/check_tabulator.py - language: python - exclude: ^(.*/pdfjs.*|.*/thirdParty.*|bootstrap.*|.*\.min\..*|.*diff\.js$) - files: ^.*(\.html|\.css|\.js)$ \ No newline at end of file + exclude: ^(.*/pdfjs.*|.*/thirdParty.*|bootstrap.*|.*\.min\..*|.*diff\.js|\.github/workflows/.*$) diff --git a/scripts/counter_translation.py b/scripts/counter_translation.py index ba4ab67b..789cb7c1 100644 --- a/scripts/counter_translation.py +++ b/scripts/counter_translation.py @@ -75,7 +75,7 @@ def write_readme(progress_list: list[tuple[str, int]]) -> None: f"![{value}%](https://geps.dev/progress/{value})", ) - with open("README.md", "w", encoding="utf-8") as file: + with open("README.md", "w", encoding="utf-8", newline="\n") as file: file.writelines(content) @@ -196,7 +196,7 @@ def compare_files( ) ) ignore_translation = convert_to_multiline(sort_ignore_translation) - with open(ignore_translation_file, "w", encoding="utf-8") as file: + with open(ignore_translation_file, "w", encoding="utf-8", newline="\n") as file: file.write(tomlkit.dumps(ignore_translation)) unique_data = list(set(result_list))