Improves security when processing properties files (#2303)

* Improves security when processing properties files

* Check for spaces in the key

---------

Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
This commit is contained in:
Ludy
2024-11-23 12:49:49 +01:00
committed by GitHub
parent 9a96109ea2
commit 3a27aa16d5
2 changed files with 77 additions and 30 deletions

View File

@@ -9,7 +9,7 @@ The script also provides functionality to update the translation files to match
adjusting the format.
Usage:
python script_name.py --reference-file <path_to_reference_file> --branch <branch_name> [--files <list_of_changed_files>]
python check_language_properties.py --reference-file <path_to_reference_file> --branch <branch_name> [--actor <actor_name>] [--files <list_of_changed_files>]
"""
import copy
@@ -19,6 +19,10 @@ import argparse
import re
# Maximum size for properties files (e.g., 200 KB)
MAX_FILE_SIZE = 200 * 1024
def parse_properties_file(file_path):
"""Parses a .properties file and returns a list of objects (including comments, empty lines, and line numbers)."""
properties_list = []
@@ -96,7 +100,7 @@ def write_json_file(file_path, updated_properties):
def update_missing_keys(reference_file, file_list, branch=""):
reference_properties = parse_properties_file(reference_file)
for file_path in file_list:
basename_current_file = os.path.basename(branch + file_path)
basename_current_file = os.path.basename(os.path.join(branch, file_path))
if (
basename_current_file == os.path.basename(reference_file)
or not file_path.endswith(".properties")
@@ -104,7 +108,7 @@ def update_missing_keys(reference_file, file_list, branch=""):
):
continue
current_properties = parse_properties_file(branch + file_path)
current_properties = parse_properties_file(os.path.join(branch, file_path))
updated_properties = []
for ref_entry in reference_properties:
ref_entry_copy = copy.deepcopy(ref_entry)
@@ -115,15 +119,15 @@ def update_missing_keys(reference_file, file_list, branch=""):
if ref_entry_copy["key"] == current_entry["key"]:
ref_entry_copy["value"] = current_entry["value"]
updated_properties.append(ref_entry_copy)
write_json_file(branch + file_path, updated_properties)
write_json_file(os.path.join(branch, file_path), updated_properties)
def check_for_missing_keys(reference_file, file_list, branch):
update_missing_keys(reference_file, file_list, branch + "/")
update_missing_keys(reference_file, file_list, branch)
def read_properties(file_path):
if (os.path.isfile(file_path) and os.path.exists(file_path)):
if os.path.isfile(file_path) and os.path.exists(file_path):
with open(file_path, "r", encoding="utf-8") as file:
return file.read().splitlines()
return [""]
@@ -142,18 +146,36 @@ def check_for_differences(reference_file, file_list, branch, actor):
only_reference_file = True
for file_path in file_list[0].split():
basename_current_file = os.path.basename(branch + "/" + file_path)
file_arr = file_list
if len(file_list) == 1:
file_arr = file_list[0].split()
base_dir = os.path.abspath(os.path.join(os.getcwd(), "src", "main", "resources"))
for file_path in file_arr:
absolute_path = os.path.abspath(file_path)
# Verify that file is within the expected directory
if not absolute_path.startswith(base_dir):
raise ValueError(f"Unsafe file found: {file_path}")
# Verify file size before processing
if os.path.getsize(os.path.join(branch, file_path)) > MAX_FILE_SIZE:
raise ValueError(
f"The file {file_path} is too large and could pose a security risk."
)
basename_current_file = os.path.basename(os.path.join(branch, file_path))
if (
basename_current_file == basename_reference_file
or not file_path.startswith("src/main/resources/messages_")
or not file_path.startswith(
os.path.join("src", "main", "resources", "messages_")
)
or not file_path.endswith(".properties")
or not basename_current_file.startswith("messages_")
):
continue
only_reference_file = False
report.append(f"#### 🗂️ **Checking File:** `{basename_current_file}`...")
current_lines = read_properties(branch + "/" + file_path)
report.append(f"#### 📃 **Checking File:** `{basename_current_file}`...")
current_lines = read_properties(os.path.join(branch, file_path))
reference_line_count = len(reference_lines)
current_line_count = len(current_lines)
@@ -197,6 +219,11 @@ def check_for_differences(reference_file, file_list, branch, actor):
extra_keys_str = "`, `".join(extra_keys_list)
report.append("- **Test 2 Status:** ❌ Failed")
if missing_keys_list:
for key in missing_keys_list:
if " " in key:
report.append(
f" - **Issue:** One or more keys in ***{basename_current_file}*** contain spaces `{missing_keys_str}`!"
)
report.append(
f" - **Issue:** There are keys in ***{basename_current_file}*** `{missing_keys_str}` that are not present in ***{basename_reference_file}***!"
)
@@ -252,10 +279,20 @@ if __name__ == "__main__":
)
args = parser.parse_args()
# Sanitize --actor input to avoid injection attacks
if args.actor:
args.actor = re.sub(r"[^a-zA-Z0-9_\\-]", "", args.actor)
# Sanitize --branch input to avoid injection attacks
if args.branch:
args.branch = re.sub(r"[^a-zA-Z0-9\\-]", "", args.branch)
file_list = args.files
if file_list is None:
file_list = glob.glob(
os.getcwd() + "/src/**/messages_*.properties", recursive=True
os.path.join(
os.getcwd(), "src", "main", "resources", "messages_*.properties"
)
)
update_missing_keys(args.reference_file, file_list)
else: