feat(api): allow to pass videos to backends (#3601)

This prepares the API to receive videos as well for video understanding. It works similarly to images, where the request should be in the form: { "type": "video_url", "video_url": { "url": "url or base64 data" } } Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 10:35:01 +00:00 · 2024-09-19 11:21:59 +02:00 · 2024-09-19 11:21:59 +02:00 · fbb9facda4
commit fbb9facda4
parent c6a819e92f
8 changed files with 47 additions and 27 deletions
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@ -31,7 +31,7 @@ type TokenUsage struct {
 	Completion int
 }

-func ModelInference(ctx context.Context, s string, messages []schema.Message, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
+func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
 	modelFile := c.Model
 	threads := c.Threads
 	if *threads == 0 && o.Threads != 0 {
@ -101,6 +101,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
 		opts.Messages = protoMessages
 		opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
 		opts.Images = images
+		opts.Videos = videos

 		tokenUsage := TokenUsage{}