From e81f83ffc5349ca3034a5f1887312447695ddcde Mon Sep 17 00:00:00 2001
From: "Paul Gauthier (aider)" <aider@paulg.org>
Date: Thu, 22 Aug 2024 10:17:15 -0700
Subject: [PATCH] fix: Use UTF-8 encoding when processing and reading files

---
 tests/basic/test_find_or_blocks.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/basic/test_find_or_blocks.py b/tests/basic/test_find_or_blocks.py
index 275389d03..583c8810f 100755
--- a/tests/basic/test_find_or_blocks.py
+++ b/tests/basic/test_find_or_blocks.py
@@ -13,11 +13,14 @@ from aider.dump import dump  # noqa: F401
 
 def process_markdown(filename, fh):
     try:
-        with open(filename, "r") as file:
+        with open(filename, "r", encoding="utf-8") as file:
             content = file.read()
     except FileNotFoundError:
         print(f"@@@ File '{filename}' not found.", "@" * 20, file=fh, flush=True)
         return
+    except UnicodeDecodeError:
+        print(f"@@@ File '{filename}' has an encoding issue. Make sure it's UTF-8 encoded.", "@" * 20, file=fh, flush=True)
+        return
 
     # Split the content into sections based on '####' headers
     sections = re.split(r"(?=####\s)", content)
@@ -82,7 +85,7 @@ class TestFindOrBlocks(unittest.TestCase):
         actual_output = output.getvalue()
 
         # Read the expected output
-        with open(expected_output_file, "r") as f:
+        with open(expected_output_file, "r", encoding="utf-8") as f:
             expected_output = f.read()
 
         # Compare the actual and expected outputs