fix: Ensure token count calculation handles small text samples

This commit is contained in:
Paul Gauthier 2024-08-05 17:14:42 -03:00 committed by Paul Gauthier (aider)
parent 75c3c40354
commit b6760e26c7

View file

@ -79,7 +79,11 @@ class RepoMap:
return len(text) / self.tokens_per_char return len(text) / self.tokens_per_char
sample_text = text.splitlines(keepends=True) sample_text = text.splitlines(keepends=True)
sample_text = "".join(random.sample(sample_text, 150)) samples = 150
if len(sample_text) < samples:
return self.main_model.token_count(text)
sample_text = "".join(random.sample(sample_text, samples))
tokens = self.main_model.token_count(sample_text) tokens = self.main_model.token_count(sample_text)
self.tokens_per_char = tokens / len(sample_text) self.tokens_per_char = tokens / len(sample_text)