fix to ignore long (!) lines in repomap

This commit is contained in:
Paul Gauthier 2024-05-14 06:01:24 -07:00
parent 292250db4f
commit e96420f999

View file

@ -64,7 +64,7 @@ class RepoMap:
max_map_tokens = self.max_map_tokens
if not chat_files:
# with no code in the chat, give a bigger view of the entire repo
max_map_tokens *= 4
max_map_tokens *= 8
try:
files_listing = self.get_ranked_tags_map(chat_files, other_files, max_map_tokens)
@ -78,7 +78,7 @@ class RepoMap:
num_tokens = self.token_count(files_listing)
if self.verbose:
self.io.tool_output(f"Repo-map: {num_tokens/1024:.1f} k-tokens")
self.io.tool_output(f"Repo-map: {num_tokens/1024:.1f} k-tokens") # noqa: E226, E231
if chat_files:
other = "other "
@ -338,22 +338,46 @@ class RepoMap:
if not max_map_tokens:
max_map_tokens = self.max_map_tokens
dump(max_map_tokens, self.max_map_tokens)
ranked_tags = self.get_ranked_tags(chat_fnames, other_fnames)
# dump(ranked_tags)
num_tags = len(ranked_tags)
lower_bound = 0
upper_bound = num_tags
best_tree = None
best_tree_tokens = 0
chat_rel_fnames = [self.get_rel_fname(fname) for fname in chat_fnames]
if False:
for i in range(num_tags):
print("making tree...")
tree = self.to_tree(ranked_tags[:i], chat_rel_fnames)
print("tokenizing")
num_tokens = self.token_count(tree)
# print('*'*50)
dump(i, num_tokens)
# print(tree)
while lower_bound <= upper_bound:
middle = (lower_bound + upper_bound) // 2
# print("making tree...")
tree = self.to_tree(ranked_tags[:middle], chat_rel_fnames)
# print("tokenizing")
num_tokens = self.token_count(tree)
dump(lower_bound, middle, upper_bound)
dump(num_tokens)
# dump(len(tree))
if num_tokens < max_map_tokens and num_tokens > best_tree_tokens:
print("best_tree", num_tokens)
best_tree = tree
best_tree_tokens = num_tokens
if num_tokens < max_map_tokens:
best_tree = tree
lower_bound = middle + 1
else:
upper_bound = middle - 1
@ -389,6 +413,8 @@ class RepoMap:
if type(tag) is Tag:
code = self.io.read_text(tag.fname) or ""
if not code.endswith("\n"):
code += "\n"
context = TreeContext(
tag.rel_fname,
@ -408,6 +434,9 @@ class RepoMap:
if context:
context.add_lines_of_interest([tag.line])
# truncate long lines, in case we get minified js or something else crazy
output = "".join([line[:100] for line in output.splitlines(keepends=True)])
return output
@ -425,7 +454,7 @@ def find_src_files(directory):
def get_random_color():
hue = random.random()
r, g, b = [int(x * 255) for x in colorsys.hsv_to_rgb(hue, 1, 0.75)]
res = f"#{r:02x}{g:02x}{b:02x}"
res = f"#{r:02x}{g:02x}{b:02x}" # noqa: E231
return res