Merge pull request #4150 from muravvv/fix_encoding
Some checks are pending
pre-commit / pre-commit (push) Waiting to run

Fix issues on repositories with non-Unicode encodings
This commit is contained in:
paul-gauthier 2025-06-01 12:46:17 -07:00 committed by GitHub
commit 3266eaca91
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 35 additions and 5 deletions

View file

@ -749,7 +749,7 @@ class InputOutput:
if not self.llm_history_file:
return
timestamp = datetime.now().isoformat(timespec="seconds")
with open(self.llm_history_file, "a", encoding=self.encoding) as log_file:
with open(self.llm_history_file, "a", encoding="utf-8") as log_file:
log_file.write(f"{role.upper()} {timestamp}\n")
log_file.write(content + "\n")

View file

@ -391,14 +391,20 @@ class GitRepo:
try:
if current_branch_has_commits:
args = ["HEAD", "--"] + list(fnames)
diffs += self.repo.git.diff(*args)
diffs += self.repo.git.diff(*args, stdout_as_string=False).decode(
self.io.encoding, "replace"
)
return diffs
wd_args = ["--"] + list(fnames)
index_args = ["--cached"] + wd_args
diffs += self.repo.git.diff(*index_args)
diffs += self.repo.git.diff(*wd_args)
diffs += self.repo.git.diff(*index_args, stdout_as_string=False).decode(
self.io.encoding, "replace"
)
diffs += self.repo.git.diff(*wd_args, stdout_as_string=False).decode(
self.io.encoding, "replace"
)
return diffs
except ANY_GIT_ERROR as err:
@ -412,7 +418,9 @@ class GitRepo:
args += ["--color=never"]
args += [from_commit, to_commit]
diffs = self.repo.git.diff(*args)
diffs = self.repo.git.diff(*args, stdout_as_string=False).decode(
self.io.encoding, "replace"
)
return diffs

View file

@ -59,6 +59,28 @@ class TestRepo(unittest.TestCase):
self.assertIn("index", diffs)
self.assertIn("workingdir", diffs)
def test_diffs_with_single_byte_encoding(self):
with GitTemporaryDirectory():
encoding = "cp1251"
repo = git.Repo()
fname = Path("foo.txt")
fname.write_text("index\n", encoding=encoding)
repo.git.add(str(fname))
# Make a change with non-ASCII symbols in the working dir
fname.write_text("АБВ\n", encoding=encoding)
git_repo = GitRepo(InputOutput(encoding=encoding), None, ".")
diffs = git_repo.get_diffs()
# check that all diff output can be converted to utf-8 for sending to model
diffs.encode("utf-8")
self.assertIn("index", diffs)
self.assertIn("АБВ", diffs)
def test_diffs_detached_head(self):
with GitTemporaryDirectory():
repo = git.Repo()