This commit is contained in:
jayesh thakare 2025-05-16 16:13:22 -04:00 committed by GitHub
commit 24d02981af
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -63,37 +63,38 @@ class ChatSummary:
if split_index <= min_split: if split_index <= min_split:
return self.summarize_all(messages) return self.summarize_all(messages)
# Split head and tail
head = messages[:split_index] head = messages[:split_index]
tail = messages[split_index:] tail = messages[split_index:]
sized = sized[:split_index] # Only size the head once
head.reverse() sized_head = sized[:split_index]
sized.reverse()
# Precompute token limit (fallback to 4096 if undefined)
model_max_input_tokens = self.models[0].info.get("max_input_tokens") or 4096
model_max_input_tokens -= 512 # reserve buffer for safety
keep = [] keep = []
total = 0 total = 0
# These sometimes come set with value = None # Iterate in original order, summing tokens until limit
model_max_input_tokens = self.models[0].info.get("max_input_tokens") or 4096 for tokens, msg in sized_head:
model_max_input_tokens -= 512 total += tokens
for i in range(split_index):
total += sized[i][0]
if total > model_max_input_tokens: if total > model_max_input_tokens:
break break
keep.append(head[i]) keep.append(msg)
# No need to reverse lists back and forth
keep.reverse()
summary = self.summarize_all(keep) summary = self.summarize_all(keep)
tail_tokens = sum(tokens for tokens, msg in sized[split_index:]) # If the combined summary and tail still fits, return directly
summary_tokens = self.token_count(summary) summary_tokens = self.token_count(summary)
tail_tokens = sum(tokens for tokens, _ in sized[split_index:])
result = summary + tail
if summary_tokens + tail_tokens < self.max_tokens: if summary_tokens + tail_tokens < self.max_tokens:
return result return summary + tail
return self.summarize_real(result, depth + 1) # Otherwise recurse with increased depth
return self.summarize_real(summary + tail, depth + 1)
def summarize_all(self, messages): def summarize_all(self, messages):
content = "" content = ""