fix: Use UTF-8 encoding when processing and reading files

2025-05-28 16:25:00 +00:00 · 2024-08-22 10:17:15 -07:00 · 2024-08-22 10:17:15 -07:00 · e81f83ffc5
commit e81f83ffc5
parent f54070f0f5
1 changed files with 5 additions and 2 deletions
--- a/tests/basic/test_find_or_blocks.py
+++ b/tests/basic/test_find_or_blocks.py
@ -13,11 +13,14 @@ from aider.dump import dump  # noqa: F401

 def process_markdown(filename, fh):
    try:
-        with open(filename, "r") as file:
+        with open(filename, "r", encoding="utf-8") as file:
            content = file.read()
    except FileNotFoundError:
        print(f"@@@ File '{filename}' not found.", "@" * 20, file=fh, flush=True)
        return
+    except UnicodeDecodeError:
+        print(f"@@@ File '{filename}' has an encoding issue. Make sure it's UTF-8 encoded.", "@" * 20, file=fh, flush=True)
+        return

    # Split the content into sections based on '####' headers
    sections = re.split(r"(?=####\s)", content)
@ -82,7 +85,7 @@ class TestFindOrBlocks(unittest.TestCase):
        actual_output = output.getvalue()

        # Read the expected output
-        with open(expected_output_file, "r") as f:
+        with open(expected_output_file, "r", encoding="utf-8") as f:
            expected_output = f.read()

        # Compare the actual and expected outputs