add missing encoding conversion for diff contents

This commit is contained in:
muravvv 2025-06-01 14:56:23 +03:00
parent 395188043b
commit bfaad12cac
2 changed files with 34 additions and 4 deletions

View file

@ -391,14 +391,20 @@ class GitRepo:
try:
if current_branch_has_commits:
args = ["HEAD", "--"] + list(fnames)
diffs += self.repo.git.diff(*args)
diffs += self.repo.git.diff(*args, stdout_as_string=False).decode(
self.io.encoding, "replace"
)
return diffs
wd_args = ["--"] + list(fnames)
index_args = ["--cached"] + wd_args
diffs += self.repo.git.diff(*index_args)
diffs += self.repo.git.diff(*wd_args)
diffs += self.repo.git.diff(*index_args, stdout_as_string=False).decode(
self.io.encoding, "replace"
)
diffs += self.repo.git.diff(*wd_args, stdout_as_string=False).decode(
self.io.encoding, "replace"
)
return diffs
except ANY_GIT_ERROR as err:
@ -412,7 +418,9 @@ class GitRepo:
args += ["--color=never"]
args += [from_commit, to_commit]
diffs = self.repo.git.diff(*args)
diffs = self.repo.git.diff(*args, stdout_as_string=False).decode(
self.io.encoding, "replace"
)
return diffs

View file

@ -59,6 +59,28 @@ class TestRepo(unittest.TestCase):
self.assertIn("index", diffs)
self.assertIn("workingdir", diffs)
def test_diffs_with_single_byte_encoding(self):
with GitTemporaryDirectory():
encoding = "cp1251"
repo = git.Repo()
fname = Path("foo.txt")
fname.write_text("index\n", encoding=encoding)
repo.git.add(str(fname))
# Make a change with non-ASCII symbols in the working dir
fname.write_text("АБВ\n", encoding=encoding)
git_repo = GitRepo(InputOutput(encoding=encoding), None, ".")
diffs = git_repo.get_diffs()
# check that all diff output can be converted to utf-8 for sending to model
diffs.encode("utf-8")
self.assertIn("index", diffs)
self.assertIn("АБВ", diffs)
def test_diffs_detached_head(self):
with GitTemporaryDirectory():
repo = git.Repo()