fix: set max_tokens to 1 for cache warming

This commit is contained in:
Paul Gauthier 2024-09-27 16:46:03 -07:00 committed by Paul Gauthier (aider)
parent aab01086a2
commit 7e4e6782d1

View file

@ -1071,14 +1071,15 @@ class Coder:
self.warming_pings_left -= 1
self.next_cache_warm = time.time() + delay
kwargs = self.main_model.extra_params or dict()
kwargs["max_tokens"] = 1
try:
completion = litellm.completion(
model=self.main_model.name,
messages=self.cache_warming_chunks.cacheable_messages(),
stream=False,
max_tokens=1,
extra_headers=self.main_model.extra_headers,
**self.main_model.extra_params, # Use **kwargs here
**kwargs,
)
except Exception as err:
self.io.tool_warning(f"Cache warming error: {str(err)}")