diff --git a/.github/labeler.yml b/.github/labeler.yml index 7be4dec9..ce4b0290 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -1,4 +1,4 @@ -enhancements: +enhancement: - head-branch: ['^feature', 'feature'] dependencies: diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml index 092110df..bd2de83d 100644 --- a/.github/workflows/bump_deps.yaml +++ b/.github/workflows/bump_deps.yaml @@ -9,7 +9,7 @@ jobs: fail-fast: false matrix: include: - - repository: "ggerganov/llama.cpp" + - repository: "ggml-org/llama.cpp" variable: "CPPLLAMA_VERSION" branch: "master" - repository: "ggerganov/whisper.cpp" diff --git a/.github/workflows/deploy-explorer.yaml b/.github/workflows/deploy-explorer.yaml index 00d51322..33ddd698 100644 --- a/.github/workflows/deploy-explorer.yaml +++ b/.github/workflows/deploy-explorer.yaml @@ -33,7 +33,7 @@ jobs: run: | CGO_ENABLED=0 make build-api - name: rm - uses: appleboy/ssh-action@v1.2.0 + uses: appleboy/ssh-action@v1.2.2 with: host: ${{ secrets.EXPLORER_SSH_HOST }} username: ${{ secrets.EXPLORER_SSH_USERNAME }} @@ -53,7 +53,7 @@ jobs: rm: true target: ./local-ai - name: restarting - uses: appleboy/ssh-action@v1.2.0 + uses: appleboy/ssh-action@v1.2.2 with: host: ${{ secrets.EXPLORER_SSH_HOST }} username: ${{ secrets.EXPLORER_SSH_USERNAME }} diff --git a/.github/workflows/generate_grpc_cache.yaml b/.github/workflows/generate_grpc_cache.yaml index d9469077..44497d3d 100644 --- a/.github/workflows/generate_grpc_cache.yaml +++ b/.github/workflows/generate_grpc_cache.yaml @@ -2,9 +2,10 @@ name: 'generate and publish GRPC docker caches' on: workflow_dispatch: - push: - branches: - - master + + schedule: + # daily at midnight + - cron: '0 0 * * *' concurrency: group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }} @@ -84,7 +85,7 @@ jobs: build-args: | GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }} GRPC_MAKEFLAGS=--jobs=4 --output-sync=target - GRPC_VERSION=v1.70.0 + GRPC_VERSION=v1.65.0 context: . file: ./Dockerfile cache-to: type=gha,ignore-error=true diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 7ea76b21..a84af8d0 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -236,7 +236,7 @@ jobs: BASE_IMAGE=${{ inputs.base-image }} GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} GRPC_MAKEFLAGS=--jobs=4 --output-sync=target - GRPC_VERSION=v1.70.0 + GRPC_VERSION=v1.65.0 MAKEFLAGS=${{ inputs.makeflags }} SKIP_DRIVERS=${{ inputs.skip-drivers }} context: . @@ -265,7 +265,7 @@ jobs: BASE_IMAGE=${{ inputs.base-image }} GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} GRPC_MAKEFLAGS=--jobs=4 --output-sync=target - GRPC_VERSION=v1.70.0 + GRPC_VERSION=v1.65.0 MAKEFLAGS=${{ inputs.makeflags }} SKIP_DRIVERS=${{ inputs.skip-drivers }} context: . @@ -310,6 +310,11 @@ jobs: tags: ${{ steps.meta_aio_dockerhub.outputs.tags }} labels: ${{ steps.meta_aio_dockerhub.outputs.labels }} + - name: Cleanup + run: | + docker builder prune -f + docker system prune --force --volumes --all + - name: Latest tag # run this on branches, when it is a tag and there is a latest-image defined if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag' diff --git a/Dockerfile b/Dockerfile index 742a995c..64861a8a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,6 +24,7 @@ RUN apt-get update && \ ca-certificates \ curl libssl-dev \ git \ + git-lfs \ unzip upx-ucl && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -198,7 +199,7 @@ FROM ${GRPC_BASE_IMAGE} AS grpc # This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI ARG GRPC_MAKEFLAGS="-j4 -Otarget" -ARG GRPC_VERSION=v1.70.0 +ARG GRPC_VERSION=v1.65.0 ARG CMAKE_FROM_SOURCE=false ARG CMAKE_VERSION=3.26.4 diff --git a/LICENSE b/LICENSE index 82df78b6..65ebf260 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2023-2024 Ettore Di Giacinto (mudler@localai.io) +Copyright (c) 2023-2025 Ettore Di Giacinto (mudler@localai.io) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Makefile b/Makefile index 8d66907c..ac164186 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ BINARY_NAME=local-ai DETECT_LIBS?=true # llama.cpp versions -CPPLLAMA_VERSION?=19b392d58dc08c366d0b29bd3b9c6991fa4e1662 +CPPLLAMA_VERSION?=5dec47dcd411fdf815a3708fd6194e2b13d19006 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp @@ -22,7 +22,7 @@ BARKCPP_VERSION?=v1.0.0 # stablediffusion.cpp (ggml) STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp -STABLEDIFFUSION_GGML_VERSION?=d46ed5e184b97c2018dc2e8105925bdb8775e02c +STABLEDIFFUSION_GGML_VERSION?=19d876ee300a055629926ff836489901f734f2b7 ONNX_VERSION?=1.20.0 ONNX_ARCH?=x64 diff --git a/README.md b/README.md index 78267e04..ff6358ca 100644 --- a/README.md +++ b/README.md @@ -212,7 +212,7 @@ A huge thank you to our generous sponsors who support this project covering CI e

- +
diff --git a/aio/cpu/embeddings.yaml b/aio/cpu/embeddings.yaml index 9aa845b0..f9e0ca5d 100644 --- a/aio/cpu/embeddings.yaml +++ b/aio/cpu/embeddings.yaml @@ -1,7 +1,7 @@ -name: text-embedding-ada-002 embeddings: true +name: text-embedding-ada-002 parameters: - model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf + model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf usage: | You can test this model with curl like this: diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml index 74f46817..c6802137 100644 --- a/aio/cpu/text-to-text.yaml +++ b/aio/cpu/text-to-text.yaml @@ -1,101 +1,57 @@ -name: gpt-4 -mmap: true -parameters: - model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf context_size: 8192 - -stopwords: -- "<|im_end|>" -- "" -- "" -- "<|eot_id|>" -- "<|end_of_text|>" - +f16: true function: - # disable injecting the "answer" tool - disable_no_action: true - grammar: - # This allows the grammar to also return messages - mixed_mode: true - # Suffix to add to the grammar - #prefix: '\n' - # Force parallel calls in the grammar - # parallel_calls: true - - return_name_in_function_response: true - # Without grammar uncomment the lines below - # Warning: this is relying only on the capability of the - # LLM model to generate the correct function call. - json_regex_match: - - "(?s)(.*?)" - - "(?s)(.*?)" - replace_llm_results: - # Drop the scratchpad content from responses - - key: "(?s).*" - value: "" - replace_function_results: - # Replace everything that is not JSON array or object - # - - key: '(?s)^[^{\[]*' - value: "" - - key: '(?s)[^}\]]*$' - value: "" - - key: "'([^']*?)'" - value: "_DQUOTE_${1}_DQUOTE_" - - key: '\\"' - value: "__TEMP_QUOTE__" - - key: "\'" - value: "'" - - key: "_DQUOTE_" - value: '"' - - key: "__TEMP_QUOTE__" - value: '"' - # Drop the scratchpad content from responses - - key: "(?s).*" - value: "" - + no_mixed_free_string: true + schema_type: llama3.1 # or JSON is supported too (json) + response_regex: + - \w+)>(?P.*) +mmap: true +name: gpt-4 +parameters: + model: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf +stopwords: +- <|im_end|> +- +- <|eot_id|> +- <|end_of_text|> template: chat: | - {{.Input -}} - <|im_start|>assistant + <|begin_of_text|><|start_header_id|>system<|end_header_id|> + You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|> + {{.Input }} + <|start_header_id|>assistant<|end_header_id|> chat_message: | - <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }} - - {{- else if eq .RoleName "tool" }} - - {{- end }} - {{- if .Content}} - {{.Content }} - {{- end }} - {{- if .FunctionCall}} - {{toJson .FunctionCall}} - {{- end }} - {{- if .FunctionCall }} - - {{- else if eq .RoleName "tool" }} - - {{- end }}<|im_end|> + <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> + {{ if .FunctionCall -}} + {{ else if eq .RoleName "tool" -}} + The Function was executed and the response was: + {{ end -}} + {{ if .Content -}} + {{.Content -}} + {{ else if .FunctionCall -}} + {{ range .FunctionCall }} + [{{.FunctionCall.Name}}({{.FunctionCall.Arguments}})] + {{ end }} + {{ end -}} + <|eot_id|> completion: | {{.Input}} - function: |- - <|im_start|>system - You are a function calling AI model. - Here are the available tools: - - {{range .Functions}} - {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} - {{end}} - - You should call the tools provided to you sequentially - Please use XML tags to record your reasoning and planning before you call the functions as follows: - - {step-by-step reasoning and plan in bullet points} - - For each function call return a json object with function name and arguments within XML tags as follows: - - {"arguments": , "name": } - <|im_end|> - {{.Input -}} - <|im_start|>assistant + function: | + <|start_header_id|>system<|end_header_id|> + You are an expert in composing functions. You are given a question and a set of possible functions. + Based on the question, you will need to make one or more function/tool calls to achieve the purpose. + If none of the functions can be used, point it out. If the given question lacks the parameters required by the function, also point it out. You should only return the function call in tools call sections. + If you decide to invoke any of the function(s), you MUST put it in the format as follows: + [func_name1(params_name1=params_value1,params_name2=params_value2,...),func_name2(params_name1=params_value1,params_name2=params_value2,...)] + You SHOULD NOT include any other text in the response. + Here is a list of functions in JSON format that you can invoke. + {{toJson .Functions}} + <|eot_id|><|start_header_id|>user<|end_header_id|> + {{.Input}} + <|eot_id|><|start_header_id|>assistant<|end_header_id|> + +download_files: +- filename: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf + sha256: 2e220a14ba4328fee38cf36c2c068261560f999fadb5725ce5c6d977cb5126b5 + uri: huggingface://bartowski/Hermes-3-Llama-3.2-3B-GGUF/Hermes-3-Llama-3.2-3B-Q4_K_M.gguf \ No newline at end of file diff --git a/aio/cpu/vision.yaml b/aio/cpu/vision.yaml index 4052fa39..5325f99c 100644 --- a/aio/cpu/vision.yaml +++ b/aio/cpu/vision.yaml @@ -1,31 +1,49 @@ -backend: llama-cpp context_size: 4096 f16: true mmap: true +mmproj: minicpm-v-2_6-mmproj-f16.gguf name: gpt-4o - -roles: - user: "USER:" - assistant: "ASSISTANT:" - system: "SYSTEM:" - -mmproj: bakllava-mmproj.gguf parameters: - model: bakllava.gguf - + model: minicpm-v-2_6-Q4_K_M.gguf +stopwords: +- <|im_end|> +- +- +- <|endoftext|> template: chat: | - A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. + {{.Input -}} + <|im_start|>assistant + chat_message: | + <|im_start|>{{ .RoleName }} + {{ if .FunctionCall -}} + Function call: + {{ else if eq .RoleName "tool" -}} + Function response: + {{ end -}} + {{ if .Content -}} + {{.Content }} + {{ end -}} + {{ if .FunctionCall -}} + {{toJson .FunctionCall}} + {{ end -}}<|im_end|> + completion: | {{.Input}} - ASSISTANT: + function: | + <|im_start|>system + You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + For each function call return a json object with function name and arguments + <|im_end|> + {{.Input -}} + <|im_start|>assistant download_files: -- filename: bakllava.gguf - uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf -- filename: bakllava-mmproj.gguf - uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf - -usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "gpt-4-vision-preview", - "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' +- filename: minicpm-v-2_6-Q4_K_M.gguf + sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1 + uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf +- filename: minicpm-v-2_6-mmproj-f16.gguf + uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf + sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd \ No newline at end of file diff --git a/aio/gpu-8g/embeddings.yaml b/aio/gpu-8g/embeddings.yaml index 99a74ef7..f9e0ca5d 100644 --- a/aio/gpu-8g/embeddings.yaml +++ b/aio/gpu-8g/embeddings.yaml @@ -1,7 +1,7 @@ +embeddings: true name: text-embedding-ada-002 -backend: sentencetransformers parameters: - model: all-MiniLM-L6-v2 + model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf usage: | You can test this model with curl like this: diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml index 62674a38..f9c5f17b 100644 --- a/aio/gpu-8g/text-to-text.yaml +++ b/aio/gpu-8g/text-to-text.yaml @@ -1,101 +1,53 @@ -name: gpt-4 -mmap: true -parameters: - model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf -context_size: 8192 - -stopwords: -- "<|im_end|>" -- "" -- "" -- "<|eot_id|>" -- "<|end_of_text|>" - +context_size: 4096 +f16: true function: - # disable injecting the "answer" tool - disable_no_action: true - + capture_llm_results: + - (?s)(.*?) grammar: - # This allows the grammar to also return messages - mixed_mode: true - # Suffix to add to the grammar - #prefix: '\n' - # Force parallel calls in the grammar - # parallel_calls: true - - return_name_in_function_response: true - # Without grammar uncomment the lines below - # Warning: this is relying only on the capability of the - # LLM model to generate the correct function call. - json_regex_match: - - "(?s)(.*?)" - - "(?s)(.*?)" + properties_order: name,arguments + json_regex_match: + - (?s)(.*?) replace_llm_results: - # Drop the scratchpad content from responses - - key: "(?s).*" + - key: (?s)(.*?) value: "" - replace_function_results: - # Replace everything that is not JSON array or object - # - - key: '(?s)^[^{\[]*' - value: "" - - key: '(?s)[^}\]]*$' - value: "" - - key: "'([^']*?)'" - value: "_DQUOTE_${1}_DQUOTE_" - - key: '\\"' - value: "__TEMP_QUOTE__" - - key: "\'" - value: "'" - - key: "_DQUOTE_" - value: '"' - - key: "__TEMP_QUOTE__" - value: '"' - # Drop the scratchpad content from responses - - key: "(?s).*" - value: "" - +mmap: true +name: gpt-4 +parameters: + model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf +stopwords: +- <|im_end|> +- +- template: chat: | {{.Input -}} <|im_start|>assistant chat_message: | - <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }} - - {{- else if eq .RoleName "tool" }} - - {{- end }} - {{- if .Content}} + <|im_start|>{{ .RoleName }} + {{ if .FunctionCall -}} + Function call: + {{ else if eq .RoleName "tool" -}} + Function response: + {{ end -}} + {{ if .Content -}} {{.Content }} - {{- end }} - {{- if .FunctionCall}} + {{ end -}} + {{ if .FunctionCall -}} {{toJson .FunctionCall}} - {{- end }} - {{- if .FunctionCall }} - - {{- else if eq .RoleName "tool" }} - - {{- end }}<|im_end|> + {{ end -}}<|im_end|> completion: | {{.Input}} - function: |- + function: | <|im_start|>system - You are a function calling AI model. - Here are the available tools: - + You are an AI assistant that executes function calls, and these are the tools at your disposal: {{range .Functions}} {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} {{end}} - - You should call the tools provided to you sequentially - Please use XML tags to record your reasoning and planning before you call the functions as follows: - - {step-by-step reasoning and plan in bullet points} - - For each function call return a json object with function name and arguments within XML tags as follows: - - {"arguments": , "name": } - <|im_end|> + <|im_end|> {{.Input -}} - <|im_start|>assistant \ No newline at end of file + <|im_start|>assistant + +download_files: +- filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf + sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5 + uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf \ No newline at end of file diff --git a/aio/gpu-8g/vision.yaml b/aio/gpu-8g/vision.yaml index 4f5e10b3..5325f99c 100644 --- a/aio/gpu-8g/vision.yaml +++ b/aio/gpu-8g/vision.yaml @@ -1,35 +1,49 @@ -backend: llama-cpp context_size: 4096 f16: true mmap: true +mmproj: minicpm-v-2_6-mmproj-f16.gguf name: gpt-4o - -roles: - user: "USER:" - assistant: "ASSISTANT:" - system: "SYSTEM:" - -mmproj: llava-v1.6-7b-mmproj-f16.gguf parameters: - model: llava-v1.6-mistral-7b.Q5_K_M.gguf - temperature: 0.2 - top_k: 40 - top_p: 0.95 - seed: -1 - + model: minicpm-v-2_6-Q4_K_M.gguf +stopwords: +- <|im_end|> +- +- +- <|endoftext|> template: chat: | - A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. + {{.Input -}} + <|im_start|>assistant + chat_message: | + <|im_start|>{{ .RoleName }} + {{ if .FunctionCall -}} + Function call: + {{ else if eq .RoleName "tool" -}} + Function response: + {{ end -}} + {{ if .Content -}} + {{.Content }} + {{ end -}} + {{ if .FunctionCall -}} + {{toJson .FunctionCall}} + {{ end -}}<|im_end|> + completion: | {{.Input}} - ASSISTANT: + function: | + <|im_start|>system + You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + For each function call return a json object with function name and arguments + <|im_end|> + {{.Input -}} + <|im_start|>assistant download_files: -- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf - uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf -- filename: llava-v1.6-7b-mmproj-f16.gguf - uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf - -usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "gpt-4-vision-preview", - "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' +- filename: minicpm-v-2_6-Q4_K_M.gguf + sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1 + uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf +- filename: minicpm-v-2_6-mmproj-f16.gguf + uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf + sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd \ No newline at end of file diff --git a/aio/intel/embeddings.yaml b/aio/intel/embeddings.yaml index 99a74ef7..f9e0ca5d 100644 --- a/aio/intel/embeddings.yaml +++ b/aio/intel/embeddings.yaml @@ -1,7 +1,7 @@ +embeddings: true name: text-embedding-ada-002 -backend: sentencetransformers parameters: - model: all-MiniLM-L6-v2 + model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf usage: | You can test this model with curl like this: diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml index 893b9acf..f9c5f17b 100644 --- a/aio/intel/text-to-text.yaml +++ b/aio/intel/text-to-text.yaml @@ -1,103 +1,53 @@ -name: gpt-4 -mmap: false -context_size: 8192 - -f16: false -parameters: - model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf - -stopwords: -- "<|im_end|>" -- "" -- "" -- "<|eot_id|>" -- "<|end_of_text|>" - +context_size: 4096 +f16: true function: - # disable injecting the "answer" tool - disable_no_action: true - + capture_llm_results: + - (?s)(.*?) grammar: - # This allows the grammar to also return messages - mixed_mode: true - # Suffix to add to the grammar - #prefix: '\n' - # Force parallel calls in the grammar - # parallel_calls: true - - return_name_in_function_response: true - # Without grammar uncomment the lines below - # Warning: this is relying only on the capability of the - # LLM model to generate the correct function call. - json_regex_match: - - "(?s)(.*?)" - - "(?s)(.*?)" + properties_order: name,arguments + json_regex_match: + - (?s)(.*?) replace_llm_results: - # Drop the scratchpad content from responses - - key: "(?s).*" + - key: (?s)(.*?) value: "" - replace_function_results: - # Replace everything that is not JSON array or object - # - - key: '(?s)^[^{\[]*' - value: "" - - key: '(?s)[^}\]]*$' - value: "" - - key: "'([^']*?)'" - value: "_DQUOTE_${1}_DQUOTE_" - - key: '\\"' - value: "__TEMP_QUOTE__" - - key: "\'" - value: "'" - - key: "_DQUOTE_" - value: '"' - - key: "__TEMP_QUOTE__" - value: '"' - # Drop the scratchpad content from responses - - key: "(?s).*" - value: "" - +mmap: true +name: gpt-4 +parameters: + model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf +stopwords: +- <|im_end|> +- +- template: chat: | {{.Input -}} <|im_start|>assistant chat_message: | - <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }} - - {{- else if eq .RoleName "tool" }} - - {{- end }} - {{- if .Content}} + <|im_start|>{{ .RoleName }} + {{ if .FunctionCall -}} + Function call: + {{ else if eq .RoleName "tool" -}} + Function response: + {{ end -}} + {{ if .Content -}} {{.Content }} - {{- end }} - {{- if .FunctionCall}} + {{ end -}} + {{ if .FunctionCall -}} {{toJson .FunctionCall}} - {{- end }} - {{- if .FunctionCall }} - - {{- else if eq .RoleName "tool" }} - - {{- end }}<|im_end|> + {{ end -}}<|im_end|> completion: | {{.Input}} - function: |- + function: | <|im_start|>system - You are a function calling AI model. - Here are the available tools: - + You are an AI assistant that executes function calls, and these are the tools at your disposal: {{range .Functions}} {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} {{end}} - - You should call the tools provided to you sequentially - Please use XML tags to record your reasoning and planning before you call the functions as follows: - - {step-by-step reasoning and plan in bullet points} - - For each function call return a json object with function name and arguments within XML tags as follows: - - {"arguments": , "name": } - <|im_end|> + <|im_end|> {{.Input -}} <|im_start|>assistant + +download_files: +- filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf + sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5 + uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf \ No newline at end of file diff --git a/aio/intel/vision.yaml b/aio/intel/vision.yaml index 37067362..264d9d0a 100644 --- a/aio/intel/vision.yaml +++ b/aio/intel/vision.yaml @@ -1,35 +1,50 @@ -backend: llama-cpp context_size: 4096 -mmap: false -f16: false +f16: true +mmap: true +mmproj: minicpm-v-2_6-mmproj-f16.gguf name: gpt-4o - -roles: - user: "USER:" - assistant: "ASSISTANT:" - system: "SYSTEM:" - -mmproj: llava-v1.6-7b-mmproj-f16.gguf parameters: - model: llava-v1.6-mistral-7b.Q5_K_M.gguf - temperature: 0.2 - top_k: 40 - top_p: 0.95 - seed: -1 - + model: minicpm-v-2_6-Q4_K_M.gguf +stopwords: +- <|im_end|> +- +- +- <|endoftext|> template: chat: | - A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. + {{.Input -}} + <|im_start|>assistant + chat_message: | + <|im_start|>{{ .RoleName }} + {{ if .FunctionCall -}} + Function call: + {{ else if eq .RoleName "tool" -}} + Function response: + {{ end -}} + {{ if .Content -}} + {{.Content }} + {{ end -}} + {{ if .FunctionCall -}} + {{toJson .FunctionCall}} + {{ end -}}<|im_end|> + completion: | {{.Input}} - ASSISTANT: + function: | + <|im_start|>system + You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + For each function call return a json object with function name and arguments + <|im_end|> + {{.Input -}} + <|im_start|>assistant + download_files: -- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf - uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf -- filename: llava-v1.6-7b-mmproj-f16.gguf - uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf - -usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "gpt-4-vision-preview", - "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' +- filename: minicpm-v-2_6-Q4_K_M.gguf + sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1 + uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf +- filename: minicpm-v-2_6-mmproj-f16.gguf + uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf + sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd \ No newline at end of file diff --git a/backend/backend.proto b/backend/backend.proto index bd75adc5..cbb81c66 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -165,7 +165,6 @@ message Reply { message GrammarTrigger { string word = 1; - bool at_start = 2; } message ModelOptions { @@ -229,6 +228,11 @@ message ModelOptions { int32 MaxModelLen = 54; int32 TensorParallelSize = 55; string LoadFormat = 58; + bool DisableLogStatus = 66; + string DType = 67; + int32 LimitImagePerPrompt = 68; + int32 LimitVideoPerPrompt = 69; + int32 LimitAudioPerPrompt = 70; string MMProj = 41; diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 4daf84c6..883fbf8f 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -467,9 +467,10 @@ struct llama_server_context bool all_slots_are_idle = false; bool add_bos_token = true; bool has_eos_token = true; + bool has_gpu = false; bool grammar_lazy = false; - std::vector grammar_trigger_words; + std::vector grammar_triggers; int32_t n_ctx; // total context for all clients / slots @@ -511,7 +512,10 @@ struct llama_server_context if (!params.mmproj.empty()) { multimodal = true; LOG_INFO("Multi Modal Mode Enabled", {}); - clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1); + clp_ctx = clip_init(params.mmproj.c_str(), clip_context_params { + /* use_gpu */ has_gpu, + /*verbosity=*/ 1, + }); if(clp_ctx == nullptr) { LOG_ERR("unable to load clip model: %s", params.mmproj.c_str()); return false; @@ -709,7 +713,7 @@ struct llama_server_context slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar); slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs); slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep); - slot->sparams.grammar_trigger_words = grammar_trigger_words; + slot->sparams.grammar_triggers = grammar_triggers; slot->sparams.grammar_lazy = grammar_lazy; if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) { @@ -1155,6 +1159,14 @@ struct llama_server_context slot.has_next_token = false; } + if (slot.n_past >= slot.n_ctx) { + slot.truncated = true; + slot.stopped_limit = true; + slot.has_next_token = false; + + LOG_VERBOSE("stopped due to running out of context capacity", {}); + } + if (result.tok == llama_vocab_eos(vocab) || llama_vocab_is_eog(vocab, result.tok)) { slot.stopped_eos = true; @@ -1342,7 +1354,7 @@ struct llama_server_context queue_results.send(res); } - void send_embedding(llama_client_slot &slot) + void send_embedding(llama_client_slot &slot, const llama_batch & batch) { task_result res; res.id = slot.task_id; @@ -1364,10 +1376,38 @@ struct llama_server_context else { const float *data = llama_get_embeddings(ctx); - std::vector embedding(data, data + n_embd); + std::vector embd_res(n_embd, 0.0f); + std::vector> embedding; + for (int i = 0; i < batch.n_tokens; ++i) { + if (!batch.logits[i] || batch.seq_id[i][0] != slot.id) { + continue; + } + + const float * embd = llama_get_embeddings_seq(ctx, batch.seq_id[i][0]); + if (embd == NULL) { + embd = llama_get_embeddings_ith(ctx, i); + } + + if (embd == NULL) { + LOG("failed to get embeddings"); + + continue; + } + + // normalize only when there is pooling + // TODO: configurable + if (llama_pooling_type(ctx) != LLAMA_POOLING_TYPE_NONE) { + common_embd_normalize(embd, embd_res.data(), n_embd, 2); + embedding.push_back(embd_res); + } else { + embedding.push_back({ embd, embd + n_embd }); + } + } + + // OAI compat res.result_json = json { - {"embedding", embedding }, + {"embedding", embedding[0] }, }; } queue_results.send(res); @@ -1627,17 +1667,17 @@ struct llama_server_context { if (slot.is_processing() && system_tokens.size() + slot.cache_tokens.size() >= (size_t) slot.n_ctx) { + // this check is redundant (for good) + // we should never get here, because generation should already stopped in process_token() + // START LOCALAI changes // Temporary disable context-shifting as it can lead to infinite loops (issue: https://github.com/ggerganov/llama.cpp/issues/3969) // See: https://github.com/mudler/LocalAI/issues/1333 // Context is exhausted, release the slot slot.release(); send_final_response(slot); - slot.cache_tokens.clear(); - slot.n_past = 0; - slot.truncated = false; - slot.has_next_token = true; - LOG("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size()); + slot.has_next_token = false; + LOG_ERROR("context is exhausted, release the slot", {}); continue; // END LOCALAI changes @@ -1988,7 +2028,7 @@ struct llama_server_context // prompt evaluated for embedding if (slot.embedding) { - send_embedding(slot); + send_embedding(slot, batch_view); slot.release(); slot.i_batch = -1; continue; @@ -2278,7 +2318,7 @@ static std::string get_all_kv_cache_types() { } static void params_parse(const backend::ModelOptions* request, - common_params & params) { + common_params & params, llama_server_context &llama) { // this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809 @@ -2316,6 +2356,20 @@ static void params_parse(const backend::ModelOptions* request, add_rpc_devices(std::string(llama_grpc_servers)); } + // decode options. Options are in form optname:optvale, or if booleans only optname. + for (int i = 0; i < request->options_size(); i++) { + std::string opt = request->options(i); + char *optname = strtok(&opt[0], ":"); + char *optval = strtok(NULL, ":"); + if (optval == NULL) { + optval = "true"; + } + + if (!strcmp(optname, "gpu")) { + llama.has_gpu = true; + } + } + // TODO: Add yarn if (!request->tensorsplit().empty()) { @@ -2385,12 +2439,12 @@ static void params_parse(const backend::ModelOptions* request, llama.grammar_lazy = true; for (int i = 0; i < request->grammartriggers_size(); i++) { common_grammar_trigger trigger; - trigger.word = request->grammartriggers(i).word(); - trigger.at_start = request->grammartriggers(i).at_start(); - llama.grammar_trigger_words.push_back(trigger); + trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_WORD; + trigger.value = request->grammartriggers(i).word(); + // trigger.at_start = request->grammartriggers(i).at_start(); + llama.grammar_triggers.push_back(trigger); LOG_INFO("grammar trigger", { - { "word", trigger.word }, - { "at_start", trigger.at_start } + { "word", trigger.value }, }); } } @@ -2409,7 +2463,7 @@ public: grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) { // Implement LoadModel RPC common_params params; - params_parse(request, params); + params_parse(request, params, llama); llama_backend_init(); llama_numa_init(params.numa); diff --git a/backend/go/image/stablediffusion-ggml/gosd.cpp b/backend/go/image/stablediffusion-ggml/gosd.cpp index 8653aa1e..4c7c161a 100644 --- a/backend/go/image/stablediffusion-ggml/gosd.cpp +++ b/backend/go/image/stablediffusion-ggml/gosd.cpp @@ -35,6 +35,8 @@ const char* sample_method_str[] = { "ipndm", "ipndm_v", "lcm", + "ddim_trailing", + "tcd", }; // Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h @@ -173,6 +175,7 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps, -1, //clip_skip cfg_scale, // sfg_scale 3.5f, + 0, // eta width, height, sample_method, diff --git a/backend/python/autogptq/requirements.txt b/backend/python/autogptq/requirements.txt index af596d9e..4b879746 100644 --- a/backend/python/autogptq/requirements.txt +++ b/backend/python/autogptq/requirements.txt @@ -1,6 +1,6 @@ accelerate auto-gptq==0.7.1 -grpcio==1.70.0 +grpcio==1.71.0 protobuf certifi transformers \ No newline at end of file diff --git a/backend/python/bark/requirements.txt b/backend/python/bark/requirements.txt index f4beaec1..2f40b320 100644 --- a/backend/python/bark/requirements.txt +++ b/backend/python/bark/requirements.txt @@ -1,4 +1,4 @@ bark==0.1.5 -grpcio==1.70.0 +grpcio==1.71.0 protobuf certifi \ No newline at end of file diff --git a/backend/python/common/template/requirements.txt b/backend/python/common/template/requirements.txt index 125b18dd..e4d843df 100644 --- a/backend/python/common/template/requirements.txt +++ b/backend/python/common/template/requirements.txt @@ -1,3 +1,3 @@ -grpcio==1.70.0 +grpcio==1.71.0 protobuf grpcio-tools \ No newline at end of file diff --git a/backend/python/coqui/requirements-cpu.txt b/backend/python/coqui/requirements-cpu.txt index c5201d62..787877bd 100644 --- a/backend/python/coqui/requirements-cpu.txt +++ b/backend/python/coqui/requirements-cpu.txt @@ -1,4 +1,4 @@ -transformers +transformers==4.48.3 accelerate torch==2.4.1 coqui-tts \ No newline at end of file diff --git a/backend/python/coqui/requirements-cublas11.txt b/backend/python/coqui/requirements-cublas11.txt index 35fd4f42..97e1ef0a 100644 --- a/backend/python/coqui/requirements-cublas11.txt +++ b/backend/python/coqui/requirements-cublas11.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/cu118 torch==2.4.1+cu118 torchaudio==2.4.1+cu118 -transformers +transformers==4.48.3 accelerate coqui-tts \ No newline at end of file diff --git a/backend/python/coqui/requirements-cublas12.txt b/backend/python/coqui/requirements-cublas12.txt index fac719d4..53ed2ebc 100644 --- a/backend/python/coqui/requirements-cublas12.txt +++ b/backend/python/coqui/requirements-cublas12.txt @@ -1,5 +1,5 @@ torch==2.4.1 torchaudio==2.4.1 -transformers +transformers==4.48.3 accelerate coqui-tts \ No newline at end of file diff --git a/backend/python/coqui/requirements-hipblas.txt b/backend/python/coqui/requirements-hipblas.txt index 359e5867..55cdcddd 100644 --- a/backend/python/coqui/requirements-hipblas.txt +++ b/backend/python/coqui/requirements-hipblas.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 torch==2.4.1+rocm6.0 torchaudio==2.4.1+rocm6.0 -transformers +transformers==4.48.3 accelerate coqui-tts \ No newline at end of file diff --git a/backend/python/coqui/requirements-intel.txt b/backend/python/coqui/requirements-intel.txt index 202dd4ad..c45ce166 100644 --- a/backend/python/coqui/requirements-intel.txt +++ b/backend/python/coqui/requirements-intel.txt @@ -5,6 +5,6 @@ torchaudio==2.3.1+cxx11.abi oneccl_bind_pt==2.3.100+xpu optimum[openvino] setuptools -transformers +transformers==4.48.3 accelerate coqui-tts \ No newline at end of file diff --git a/backend/python/coqui/requirements.txt b/backend/python/coqui/requirements.txt index 5ec13b5f..108d30ba 100644 --- a/backend/python/coqui/requirements.txt +++ b/backend/python/coqui/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.70.0 +grpcio==1.71.0 protobuf certifi packaging==24.1 \ No newline at end of file diff --git a/backend/python/diffusers/backend.py b/backend/python/diffusers/backend.py index 25c0a7ae..3668b016 100755 --- a/backend/python/diffusers/backend.py +++ b/backend/python/diffusers/backend.py @@ -159,6 +159,18 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): torchType = torch.float16 variant = "fp16" + options = request.Options + + # empty dict + self.options = {} + + # The options are a list of strings in this form optname:optvalue + # We are storing all the options in a dict so we can use it later when + # generating the images + for opt in options: + key, value = opt.split(":") + self.options[key] = value + local = False modelFile = request.Model @@ -447,6 +459,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): # create a dictionary of parameters by using the keys from EnableParameters and the values from defaults kwargs = {key: options.get(key) for key in keys if key in options} + # populate kwargs from self.options. + kwargs.update(self.options) + # Set seed if request.seed > 0: kwargs["generator"] = torch.Generator(device=self.device).manual_seed( diff --git a/backend/python/diffusers/requirements.txt b/backend/python/diffusers/requirements.txt index 8c450dca..8cfe88a7 100644 --- a/backend/python/diffusers/requirements.txt +++ b/backend/python/diffusers/requirements.txt @@ -1,5 +1,5 @@ setuptools -grpcio==1.70.0 +grpcio==1.71.0 pillow protobuf certifi diff --git a/backend/python/exllama2/requirements.txt b/backend/python/exllama2/requirements.txt index cb622d0c..ed8ffec4 100644 --- a/backend/python/exllama2/requirements.txt +++ b/backend/python/exllama2/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.70.0 +grpcio==1.71.0 protobuf certifi wheel diff --git a/backend/python/faster-whisper/requirements.txt b/backend/python/faster-whisper/requirements.txt index 125b18dd..e4d843df 100644 --- a/backend/python/faster-whisper/requirements.txt +++ b/backend/python/faster-whisper/requirements.txt @@ -1,3 +1,3 @@ -grpcio==1.70.0 +grpcio==1.71.0 protobuf grpcio-tools \ No newline at end of file diff --git a/backend/python/kokoro/requirements.txt b/backend/python/kokoro/requirements.txt index 06e60389..cf0f0143 100644 --- a/backend/python/kokoro/requirements.txt +++ b/backend/python/kokoro/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.70.0 +grpcio==1.71.0 protobuf phonemizer scipy diff --git a/backend/python/rerankers/requirements.txt b/backend/python/rerankers/requirements.txt index 566fdae0..931cb146 100644 --- a/backend/python/rerankers/requirements.txt +++ b/backend/python/rerankers/requirements.txt @@ -1,3 +1,3 @@ -grpcio==1.70.0 +grpcio==1.71.0 protobuf certifi \ No newline at end of file diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt index c0fa0c0b..9ba6c861 100644 --- a/backend/python/transformers/requirements.txt +++ b/backend/python/transformers/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.70.0 +grpcio==1.71.0 protobuf certifi setuptools diff --git a/backend/python/vllm/backend.py b/backend/python/vllm/backend.py index 98ac5081..238ba0e3 100644 --- a/backend/python/vllm/backend.py +++ b/backend/python/vllm/backend.py @@ -109,6 +109,17 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): engine_args.swap_space = request.SwapSpace if request.MaxModelLen != 0: engine_args.max_model_len = request.MaxModelLen + if request.DisableLogStatus: + engine_args.disable_log_status = request.DisableLogStatus + if request.DType != "": + engine_args.dtype = request.DType + if request.LimitImagePerPrompt != 0 or request.LimitVideoPerPrompt != 0 or request.LimitAudioPerPrompt != 0: + # limit-mm-per-prompt defaults to 1 per modality, based on vLLM docs + engine_args.limit_mm_per_prompt = { + "image": max(request.LimitImagePerPrompt, 1), + "video": max(request.LimitVideoPerPrompt, 1), + "audio": max(request.LimitAudioPerPrompt, 1) + } try: self.llm = AsyncLLMEngine.from_engine_args(engine_args) @@ -269,7 +280,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): def load_image(self, image_path: str): """ Load an image from the given file path or base64 encoded data. - + Args: image_path (str): The path to the image file or base64 encoded data. @@ -288,7 +299,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): def load_video(self, video_path: str): """ Load a video from the given file path. - + Args: video_path (str): The path to the image file. @@ -335,4 +346,4 @@ if __name__ == "__main__": ) args = parser.parse_args() - asyncio.run(serve(args.addr)) \ No newline at end of file + asyncio.run(serve(args.addr)) diff --git a/backend/python/vllm/requirements.txt b/backend/python/vllm/requirements.txt index 1f92add8..f1771cc4 100644 --- a/backend/python/vllm/requirements.txt +++ b/backend/python/vllm/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.70.0 +grpcio==1.71.0 protobuf certifi setuptools \ No newline at end of file diff --git a/core/backend/llm.go b/core/backend/llm.go index 1cad6db5..14eb8569 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -116,6 +116,11 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im } if tokenCallback != nil { + + if c.TemplateConfig.ReplyPrefix != "" { + tokenCallback(c.TemplateConfig.ReplyPrefix, tokenUsage) + } + ss := "" var partialRune []byte @@ -165,8 +170,13 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im tokenUsage.TimingTokenGeneration = reply.TimingTokenGeneration tokenUsage.TimingPromptProcessing = reply.TimingPromptProcessing + response := string(reply.Message) + if c.TemplateConfig.ReplyPrefix != "" { + response = c.TemplateConfig.ReplyPrefix + response + } + return LLMResponse{ - Response: string(reply.Message), + Response: response, Usage: tokenUsage, }, err } diff --git a/core/backend/options.go b/core/backend/options.go index 3201142d..d98e136c 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -122,7 +122,6 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions { for _, t := range c.FunctionsConfig.GrammarConfig.GrammarTriggers { triggers = append(triggers, &pb.GrammarTrigger{ Word: t.Word, - AtStart: t.AtStart, }) } @@ -159,6 +158,12 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions { SwapSpace: int32(c.SwapSpace), MaxModelLen: int32(c.MaxModelLen), TensorParallelSize: int32(c.TensorParallelSize), + DisableLogStatus: c.DisableLogStatus, + DType: c.DType, + // LimitMMPerPrompt vLLM + LimitImagePerPrompt: int32(c.LimitMMPerPrompt.LimitImagePerPrompt), + LimitVideoPerPrompt: int32(c.LimitMMPerPrompt.LimitVideoPerPrompt), + LimitAudioPerPrompt: int32(c.LimitMMPerPrompt.LimitAudioPerPrompt), MMProj: c.MMProj, FlashAttention: c.FlashAttention, CacheTypeKey: c.CacheTypeK, diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 7d04996a..56ffa38c 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -130,25 +130,28 @@ type LLMConfig struct { TrimSpace []string `yaml:"trimspace"` TrimSuffix []string `yaml:"trimsuffix"` - ContextSize *int `yaml:"context_size"` - NUMA bool `yaml:"numa"` - LoraAdapter string `yaml:"lora_adapter"` - LoraBase string `yaml:"lora_base"` - LoraAdapters []string `yaml:"lora_adapters"` - LoraScales []float32 `yaml:"lora_scales"` - LoraScale float32 `yaml:"lora_scale"` - NoMulMatQ bool `yaml:"no_mulmatq"` - DraftModel string `yaml:"draft_model"` - NDraft int32 `yaml:"n_draft"` - Quantization string `yaml:"quantization"` - LoadFormat string `yaml:"load_format"` - GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM - TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM - EnforceEager bool `yaml:"enforce_eager"` // vLLM - SwapSpace int `yaml:"swap_space"` // vLLM - MaxModelLen int `yaml:"max_model_len"` // vLLM - TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM - MMProj string `yaml:"mmproj"` + ContextSize *int `yaml:"context_size"` + NUMA bool `yaml:"numa"` + LoraAdapter string `yaml:"lora_adapter"` + LoraBase string `yaml:"lora_base"` + LoraAdapters []string `yaml:"lora_adapters"` + LoraScales []float32 `yaml:"lora_scales"` + LoraScale float32 `yaml:"lora_scale"` + NoMulMatQ bool `yaml:"no_mulmatq"` + DraftModel string `yaml:"draft_model"` + NDraft int32 `yaml:"n_draft"` + Quantization string `yaml:"quantization"` + LoadFormat string `yaml:"load_format"` + GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM + TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM + EnforceEager bool `yaml:"enforce_eager"` // vLLM + SwapSpace int `yaml:"swap_space"` // vLLM + MaxModelLen int `yaml:"max_model_len"` // vLLM + TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM + DisableLogStatus bool `yaml:"disable_log_stats"` // vLLM + DType string `yaml:"dtype"` // vLLM + LimitMMPerPrompt LimitMMPerPrompt `yaml:"limit_mm_per_prompt"` // vLLM + MMProj string `yaml:"mmproj"` FlashAttention bool `yaml:"flash_attention"` NoKVOffloading bool `yaml:"no_kv_offloading"` @@ -166,6 +169,13 @@ type LLMConfig struct { CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale } +// LimitMMPerPrompt is a struct that holds the configuration for the limit-mm-per-prompt config in vLLM +type LimitMMPerPrompt struct { + LimitImagePerPrompt int `yaml:"image"` + LimitVideoPerPrompt int `yaml:"video"` + LimitAudioPerPrompt int `yaml:"audio"` +} + // AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend type AutoGPTQ struct { ModelBaseName string `yaml:"model_base_name"` @@ -203,6 +213,8 @@ type TemplateConfig struct { Multimodal string `yaml:"multimodal"` JinjaTemplate bool `yaml:"jinja_template"` + + ReplyPrefix string `yaml:"reply_prefix"` } func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error { @@ -212,7 +224,15 @@ func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error { return err } *c = BackendConfig(aux) + c.KnownUsecases = GetUsecasesFromYAML(c.KnownUsecaseStrings) + // Make sure the usecases are valid, we rewrite with what we identified + c.KnownUsecaseStrings = []string{} + for k, usecase := range GetAllBackendConfigUsecases() { + if c.HasUsecases(usecase) { + c.KnownUsecaseStrings = append(c.KnownUsecaseStrings, k) + } + } return nil } @@ -472,6 +492,10 @@ func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases { } } +func stringToFlag(s string) string { + return "FLAG_" + strings.ToUpper(s) +} + func GetUsecasesFromYAML(input []string) *BackendConfigUsecases { if len(input) == 0 { return nil @@ -479,7 +503,7 @@ func GetUsecasesFromYAML(input []string) *BackendConfigUsecases { result := FLAG_ANY flags := GetAllBackendConfigUsecases() for _, str := range input { - flag, exists := flags["FLAG_"+strings.ToUpper(str)] + flag, exists := flags[stringToFlag(str)] if exists { result |= flag } diff --git a/core/gallery/gallery.go b/core/gallery/gallery.go index 3a60e618..0c540052 100644 --- a/core/gallery/gallery.go +++ b/core/gallery/gallery.go @@ -29,6 +29,8 @@ func InstallModelFromGallery(galleries []config.Gallery, name string, basePath s if err != nil { return err } + config.Description = model.Description + config.License = model.License } else if len(model.ConfigFile) > 0 { // TODO: is this worse than using the override method with a blank cfg yaml? reYamlConfig, err := yaml.Marshal(model.ConfigFile) @@ -114,7 +116,7 @@ func FindModel(models []*GalleryModel, name string, basePath string) *GalleryMod // List available models // Models galleries are a list of yaml files that are hosted on a remote server (for example github). // Each yaml file contains a list of models that can be downloaded and optionally overrides to define a new model setting. -func AvailableGalleryModels(galleries []config.Gallery, basePath string) ([]*GalleryModel, error) { +func AvailableGalleryModels(galleries []config.Gallery, basePath string) (GalleryModels, error) { var models []*GalleryModel // Get models from galleries diff --git a/core/gallery/request.go b/core/gallery/request.go index 72d078a1..5e7308fd 100644 --- a/core/gallery/request.go +++ b/core/gallery/request.go @@ -62,3 +62,15 @@ func (gm GalleryModels) FindByName(name string) *GalleryModel { } return nil } + +func (gm GalleryModels) Paginate(pageNum int, itemsNum int) GalleryModels { + start := (pageNum - 1) * itemsNum + end := start + itemsNum + if start > len(gm) { + start = len(gm) + } + if end > len(gm) { + end = len(gm) + } + return gm[start:end] +} diff --git a/core/http/app.go b/core/http/app.go index ddce573a..9cbeefff 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -139,6 +139,28 @@ func API(application *application.Application) (*fiber.App, error) { return nil, fmt.Errorf("failed to create key auth config: %w", err) } + httpFS := http.FS(embedDirStatic) + + router.Use(favicon.New(favicon.Config{ + URL: "/favicon.ico", + FileSystem: httpFS, + File: "static/favicon.ico", + })) + + router.Use("/static", filesystem.New(filesystem.Config{ + Root: httpFS, + PathPrefix: "static", + Browse: true, + })) + + if application.ApplicationConfig().ImageDir != "" { + router.Static("/generated-images", application.ApplicationConfig().ImageDir) + } + + if application.ApplicationConfig().AudioDir != "" { + router.Static("/generated-audio", application.ApplicationConfig().AudioDir) + } + // Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration router.Use(v2keyauth.New(*kaConfig)) @@ -176,20 +198,6 @@ func API(application *application.Application) (*fiber.App, error) { } routes.RegisterJINARoutes(router, requestExtractor, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()) - httpFS := http.FS(embedDirStatic) - - router.Use(favicon.New(favicon.Config{ - URL: "/favicon.ico", - FileSystem: httpFS, - File: "static/favicon.ico", - })) - - router.Use("/static", filesystem.New(filesystem.Config{ - Root: httpFS, - PathPrefix: "static", - Browse: true, - })) - // Define a custom 404 handler // Note: keep this at the bottom! router.Use(notFoundHandler) diff --git a/core/http/elements/buttons.go b/core/http/elements/buttons.go index 2364a0b3..b2ce904b 100644 --- a/core/http/elements/buttons.go +++ b/core/http/elements/buttons.go @@ -13,7 +13,7 @@ func installButton(galleryName string) elem.Node { attrs.Props{ "data-twe-ripple-init": "", "data-twe-ripple-color": "light", - "class": "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + "class": "float-right inline-flex items-center rounded-lg bg-blue-600 hover:bg-blue-700 px-4 py-2 text-sm font-medium text-white transition duration-300 ease-in-out shadow hover:shadow-lg", "hx-swap": "outerHTML", // post the Model ID as param "hx-post": "browse/install/model/" + galleryName, @@ -52,7 +52,7 @@ func infoButton(m *gallery.GalleryModel) elem.Node { attrs.Props{ "data-twe-ripple-init": "", "data-twe-ripple-color": "light", - "class": "float-left inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + "class": "inline-flex items-center rounded-lg bg-gray-700 hover:bg-gray-600 px-4 py-2 text-sm font-medium text-white transition duration-300 ease-in-out", "data-modal-target": modalName(m), "data-modal-toggle": modalName(m), }, diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go index 5ab68508..539627e4 100644 --- a/core/http/elements/gallery.go +++ b/core/http/elements/gallery.go @@ -17,7 +17,7 @@ const ( func cardSpan(text, icon string) elem.Node { return elem.Span( attrs.Props{ - "class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2", + "class": "inline-flex items-center px-3 py-1 rounded-lg text-xs font-medium bg-gray-700/70 text-gray-300 border border-gray-600/50 mr-2 mb-2", }, elem.I(attrs.Props{ "class": icon + " pr-2", @@ -39,19 +39,20 @@ func searchableElement(text, icon string) elem.Node { ), elem.Span( attrs.Props{ - "class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2", + "class": "inline-flex items-center text-xs px-3 py-1 rounded-full bg-gray-700/60 text-gray-300 border border-gray-600/50 hover:bg-gray-600 hover:text-gray-100 transition duration-200 ease-in-out", }, elem.A( attrs.Props{ // "name": "search", // "value": text, //"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2", - "href": "#!", - "hx-post": "browse/search/models", - "hx-target": "#search-results", + //"href": "#!", + "href": "browse?term=" + text, + //"hx-post": "browse/search/models", + //"hx-target": "#search-results", // TODO: this doesn't work // "hx-vals": `{ \"search\": \"` + text + `\" }`, - "hx-indicator": ".htmx-indicator", + //"hx-indicator": ".htmx-indicator", }, elem.I(attrs.Props{ "class": icon + " pr-2", @@ -101,7 +102,7 @@ func modalName(m *gallery.GalleryModel) string { return m.Name + "-modal" } -func modelDescription(m *gallery.GalleryModel) elem.Node { +func modelModal(m *gallery.GalleryModel) elem.Node { urls := []elem.Node{} for _, url := range m.URLs { urls = append(urls, @@ -116,6 +117,125 @@ func modelDescription(m *gallery.GalleryModel) elem.Node { ) } + return elem.Div( + attrs.Props{ + "id": modalName(m), + "tabindex": "-1", + "aria-hidden": "true", + "class": "hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full", + }, + elem.Div( + attrs.Props{ + "class": "relative p-4 w-full max-w-2xl max-h-full", + }, + elem.Div( + attrs.Props{ + "class": "relative p-4 w-full max-w-2xl max-h-full bg-white rounded-lg shadow dark:bg-gray-700", + }, + // header + elem.Div( + attrs.Props{ + "class": "flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600", + }, + elem.H3( + attrs.Props{ + "class": "text-xl font-semibold text-gray-900 dark:text-white", + }, + elem.Text(bluemonday.StrictPolicy().Sanitize(m.Name)), + ), + elem.Button( // close button + attrs.Props{ + "class": "text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white", + "data-modal-hide": modalName(m), + }, + elem.Raw( + ``, + ), + elem.Span( + attrs.Props{ + "class": "sr-only", + }, + elem.Text("Close modal"), + ), + ), + ), + // body + elem.Div( + attrs.Props{ + "class": "p-4 md:p-5 space-y-4", + }, + elem.Div( + attrs.Props{ + "class": "flex justify-center items-center", + }, + elem.Img(attrs.Props{ + // "class": "rounded-t-lg object-fit object-center h-96", + "class": "lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3 entered loaded", + "src": m.Icon, + "loading": "lazy", + }), + ), + elem.P( + attrs.Props{ + "class": "text-base leading-relaxed text-gray-500 dark:text-gray-400", + }, + elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)), + ), + elem.Hr( + attrs.Props{}, + ), + elem.P( + attrs.Props{ + "class": "text-sm font-semibold text-gray-900 dark:text-white", + }, + elem.Text("Links"), + ), + elem.Ul( + attrs.Props{}, + urls..., + ), + elem.If( + len(m.Tags) > 0, + elem.Div( + attrs.Props{}, + elem.P( + attrs.Props{ + "class": "text-sm mb-5 font-semibold text-gray-900 dark:text-white", + }, + elem.Text("Tags"), + ), + elem.Div( + attrs.Props{ + "class": "flex flex-row flex-wrap content-center", + }, + tagsNodes..., + ), + ), + elem.Div(attrs.Props{}), + ), + ), + // Footer + elem.Div( + attrs.Props{ + "class": "flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600", + }, + elem.Button( + attrs.Props{ + "data-modal-hide": modalName(m), + "class": "py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700", + }, + elem.Text("Close"), + ), + ), + ), + ), + ) + +} + +func modelDescription(m *gallery.GalleryModel) elem.Node { return elem.Div( attrs.Props{ "class": "p-6 text-surface dark:text-white", @@ -132,122 +252,6 @@ func modelDescription(m *gallery.GalleryModel) elem.Node { }, elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)), ), - - elem.Div( - attrs.Props{ - "id": modalName(m), - "tabindex": "-1", - "aria-hidden": "true", - "class": "hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full", - }, - elem.Div( - attrs.Props{ - "class": "relative p-4 w-full max-w-2xl max-h-full", - }, - elem.Div( - attrs.Props{ - "class": "relative p-4 w-full max-w-2xl max-h-full bg-white rounded-lg shadow dark:bg-gray-700", - }, - // header - elem.Div( - attrs.Props{ - "class": "flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600", - }, - elem.H3( - attrs.Props{ - "class": "text-xl font-semibold text-gray-900 dark:text-white", - }, - elem.Text(bluemonday.StrictPolicy().Sanitize(m.Name)), - ), - elem.Button( // close button - attrs.Props{ - "class": "text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white", - "data-modal-hide": modalName(m), - }, - elem.Raw( - ``, - ), - elem.Span( - attrs.Props{ - "class": "sr-only", - }, - elem.Text("Close modal"), - ), - ), - ), - // body - elem.Div( - attrs.Props{ - "class": "p-4 md:p-5 space-y-4", - }, - elem.Div( - attrs.Props{ - "class": "flex justify-center items-center", - }, - elem.Img(attrs.Props{ - // "class": "rounded-t-lg object-fit object-center h-96", - "class": "lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3 entered loaded", - "src": m.Icon, - "loading": "lazy", - }), - ), - elem.P( - attrs.Props{ - "class": "text-base leading-relaxed text-gray-500 dark:text-gray-400", - }, - elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)), - ), - elem.Hr( - attrs.Props{}, - ), - elem.P( - attrs.Props{ - "class": "text-sm font-semibold text-gray-900 dark:text-white", - }, - elem.Text("Links"), - ), - elem.Ul( - attrs.Props{}, - urls..., - ), - elem.If( - len(m.Tags) > 0, - elem.Div( - attrs.Props{}, - elem.P( - attrs.Props{ - "class": "text-sm mb-5 font-semibold text-gray-900 dark:text-white", - }, - elem.Text("Tags"), - ), - elem.Div( - attrs.Props{ - "class": "flex flex-row flex-wrap content-center", - }, - tagsNodes..., - ), - ), - elem.Div(attrs.Props{}), - ), - ), - // Footer - elem.Div( - attrs.Props{ - "class": "flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600", - }, - elem.Button( - attrs.Props{ - "data-modal-hide": modalName(m), - "class": "py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700", - }, - elem.Text("Close"), - ), - ), - ), - ), - ), ) } @@ -397,7 +401,7 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g modelsElements = append(modelsElements, elem.Div( attrs.Props{ - "class": " me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2", + "class": " me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2 bg-gray-800/90 border border-gray-700/50 rounded-xl overflow-hidden transition-all duration-300 hover:shadow-lg hover:shadow-blue-900/20 hover:-translate-y-1 hover:border-blue-700/50", }, elem.Div( attrs.Props{ @@ -406,6 +410,7 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g elems..., ), ), + modelModal(m), ) } diff --git a/core/http/elements/p2p.go b/core/http/elements/p2p.go index 7eb10df5..6c0a5a57 100644 --- a/core/http/elements/p2p.go +++ b/core/http/elements/p2p.go @@ -2,6 +2,7 @@ package elements import ( "fmt" + "time" "github.com/chasefleming/elem-go" "github.com/chasefleming/elem-go/attrs" @@ -18,19 +19,6 @@ func renderElements(n []elem.Node) string { } func P2PNodeStats(nodes []p2p.NodeData) string { - /* -

-

Total Workers Detected: {{ len .Nodes }}

- {{ $online := 0 }} - {{ range .Nodes }} - {{ if .IsOnline }} - {{ $online = add $online 1 }} - {{ end }} - {{ end }} -

Total Online Workers: {{$online}}

-
- */ - online := 0 for _, n := range nodes { if n.IsOnline() { @@ -38,27 +26,21 @@ func P2PNodeStats(nodes []p2p.NodeData) string { } } - class := "text-green-500" + class := "text-blue-400" if online == 0 { - class = "text-red-500" + class = "text-red-400" } - /* - - */ - circle := elem.I(attrs.Props{ - "class": "fas fa-circle animate-pulse " + class + " ml-2 mr-1", - }) + nodesElements := []elem.Node{ elem.Span( attrs.Props{ - "class": class, + "class": class + " font-bold text-xl", }, - circle, elem.Text(fmt.Sprintf("%d", online)), ), elem.Span( attrs.Props{ - "class": "text-gray-200", + "class": "text-gray-300 text-xl", }, elem.Text(fmt.Sprintf("/%d", len(nodes))), ), @@ -68,77 +50,73 @@ func P2PNodeStats(nodes []p2p.NodeData) string { } func P2PNodeBoxes(nodes []p2p.NodeData) string { - /* -
-
- - {{.ID}} -
-

- Status: - - - {{ if .IsOnline }}Online{{ else }}Offline{{ end }} - -

-
- */ - nodesElements := []elem.Node{} for _, n := range nodes { + nodeID := bluemonday.StrictPolicy().Sanitize(n.ID) + + // Define status-specific classes + statusIconClass := "text-green-400" + statusText := "Online" + statusTextClass := "text-green-400" + + if !n.IsOnline() { + statusIconClass = "text-red-400" + statusText = "Offline" + statusTextClass = "text-red-400" + } nodesElements = append(nodesElements, elem.Div( attrs.Props{ - "class": "bg-gray-700 p-6 rounded-lg shadow-lg text-left", + "class": "bg-gray-800/80 border border-gray-700/50 rounded-xl p-4 shadow-lg transition-all duration-300 hover:shadow-blue-900/20 hover:border-blue-700/50", }, - elem.P( + // Node ID and status indicator in top row + elem.Div( attrs.Props{ - "class": "text-sm text-gray-400 mt-2 flex", + "class": "flex items-center justify-between mb-3", }, - elem.I( + // Node ID with icon + elem.Div( attrs.Props{ - "class": "fas fa-desktop text-gray-400 mr-2", + "class": "flex items-center", }, - ), - elem.Text("Name: "), - elem.Span( - attrs.Props{ - "class": "text-gray-200 font-semibold ml-2 mr-1", - }, - elem.Text(bluemonday.StrictPolicy().Sanitize(n.ID)), - ), - elem.Text("Status: "), - elem.If( - n.IsOnline(), elem.I( attrs.Props{ - "class": "fas fa-circle animate-pulse text-green-500 ml-2 mr-1", + "class": "fas fa-server text-blue-400 mr-2", }, ), - elem.I( - attrs.Props{ - "class": "fas fa-circle animate-pulse text-red-500 ml-2 mr-1", - }, - ), - ), - elem.If( - n.IsOnline(), - elem.Span( - attrs.Props{ - "class": "text-green-400", - }, - - elem.Text("Online"), - ), elem.Span( attrs.Props{ - "class": "text-red-400", + "class": "text-white font-medium", }, - elem.Text("Offline"), + elem.Text(nodeID), ), ), + // Status indicator + elem.Div( + attrs.Props{ + "class": "flex items-center", + }, + elem.I( + attrs.Props{ + "class": "fas fa-circle animate-pulse " + statusIconClass + " mr-1.5", + }, + ), + elem.Span( + attrs.Props{ + "class": statusTextClass, + }, + elem.Text(statusText), + ), + ), + ), + // Bottom section with timestamp + elem.Div( + attrs.Props{ + "class": "text-xs text-gray-400 pt-1 border-t border-gray-700/30", + }, + elem.Text("Last updated: "+time.Now().UTC().Format("2006-01-02 15:04:05")), ), )) } diff --git a/core/http/routes/openai.go b/core/http/routes/openai.go index 548eeb2b..fd17613a 100644 --- a/core/http/routes/openai.go +++ b/core/http/routes/openai.go @@ -112,14 +112,6 @@ func RegisterOpenAIRoutes(app *fiber.App, re.SetOpenAIRequest, openai.ImageEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) - if application.ApplicationConfig().ImageDir != "" { - app.Static("/generated-images", application.ApplicationConfig().ImageDir) - } - - if application.ApplicationConfig().AudioDir != "" { - app.Static("/generated-audio", application.ApplicationConfig().AudioDir) - } - // List models app.Get("/v1/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) app.Get("/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index 92d20544..373a983b 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -3,7 +3,9 @@ package routes import ( "fmt" "html/template" + "math" "sort" + "strconv" "strings" "github.com/mudler/LocalAI/core/config" @@ -126,6 +128,8 @@ func RegisterUIRoutes(app *fiber.App, // Show the Models page (all models) app.Get("/browse", func(c *fiber.Ctx) error { term := c.Query("term") + page := c.Query("page") + items := c.Query("items") models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) @@ -164,6 +168,47 @@ func RegisterUIRoutes(app *fiber.App, // "ApplicationConfig": appConfig, } + if page == "" { + page = "1" + } + + if page != "" { + // return a subset of the models + pageNum, err := strconv.Atoi(page) + if err != nil { + return c.Status(fiber.StatusBadRequest).SendString("Invalid page number") + } + + if pageNum == 0 { + return c.Render("views/models", summary) + } + + itemsNum, err := strconv.Atoi(items) + if err != nil { + itemsNum = 21 + } + + totalPages := int(math.Ceil(float64(len(models)) / float64(itemsNum))) + + models = models.Paginate(pageNum, itemsNum) + + prevPage := pageNum - 1 + nextPage := pageNum + 1 + if prevPage < 1 { + prevPage = 1 + } + if nextPage > totalPages { + nextPage = totalPages + } + if prevPage != pageNum { + summary["PrevPage"] = prevPage + } + summary["NextPage"] = nextPage + summary["TotalPages"] = totalPages + summary["CurrentPage"] = pageNum + summary["Models"] = template.HTML(elements.ListModels(models, processingModels, galleryService)) + } + // Render index return c.Render("views/models", summary) }) @@ -171,6 +216,9 @@ func RegisterUIRoutes(app *fiber.App, // Show the models, filtered from the user input // https://htmx.org/examples/active-search/ app.Post("/browse/search/models", func(c *fiber.Ctx) error { + page := c.Query("page") + items := c.Query("items") + form := struct { Search string `form:"search"` }{} @@ -180,7 +228,26 @@ func RegisterUIRoutes(app *fiber.App, models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) - return c.SendString(elements.ListModels(gallery.GalleryModels(models).Search(form.Search), processingModels, galleryService)) + if page != "" { + // return a subset of the models + pageNum, err := strconv.Atoi(page) + if err != nil { + return c.Status(fiber.StatusBadRequest).SendString("Invalid page number") + } + + itemsNum, err := strconv.Atoi(items) + if err != nil { + itemsNum = 21 + } + + models = models.Paginate(pageNum, itemsNum) + } + + if form.Search != "" { + models = models.Search(form.Search) + } + + return c.SendString(elements.ListModels(models, processingModels, galleryService)) }) /* @@ -305,23 +372,6 @@ func RegisterUIRoutes(app *fiber.App, }) } - // Show the Chat page - app.Get("/chat/:model", func(c *fiber.Ctx) error { - backendConfigs, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED) - - summary := fiber.Map{ - "Title": "LocalAI - Chat with " + c.Params("model"), - "BaseURL": utils.BaseURL(c), - "ModelsConfig": backendConfigs, - "Model": c.Params("model"), - "Version": internal.PrintableVersion(), - "IsP2PEnabled": p2p.IsP2PEnabled(), - } - - // Render index - return c.Render("views/chat", summary) - }) - app.Get("/talk/", func(c *fiber.Ctx) error { backendConfigs, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED) @@ -344,21 +394,73 @@ func RegisterUIRoutes(app *fiber.App, }) app.Get("/chat/", func(c *fiber.Ctx) error { + backendConfigs := cl.GetAllBackendConfigs() + modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY) - backendConfigs, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED) - - if len(backendConfigs) == 0 { + if len(backendConfigs)+len(modelsWithoutConfig) == 0 { // If no model is available redirect to the index which suggests how to install models return c.Redirect(utils.BaseURL(c)) } + modelThatCanBeUsed := "" + galleryConfigs := map[string]*gallery.Config{} + + for _, m := range backendConfigs { + cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name) + if err != nil { + continue + } + galleryConfigs[m.Name] = cfg + } + + title := "LocalAI - Chat" + + for _, b := range backendConfigs { + if b.HasUsecases(config.FLAG_CHAT) { + modelThatCanBeUsed = b.Name + title = "LocalAI - Chat with " + modelThatCanBeUsed + break + } + } summary := fiber.Map{ - "Title": "LocalAI - Chat with " + backendConfigs[0], - "BaseURL": utils.BaseURL(c), - "ModelsConfig": backendConfigs, - "Model": backendConfigs[0], - "Version": internal.PrintableVersion(), - "IsP2PEnabled": p2p.IsP2PEnabled(), + "Title": title, + "BaseURL": utils.BaseURL(c), + "ModelsWithoutConfig": modelsWithoutConfig, + "GalleryConfig": galleryConfigs, + "ModelsConfig": backendConfigs, + "Model": modelThatCanBeUsed, + "Version": internal.PrintableVersion(), + "IsP2PEnabled": p2p.IsP2PEnabled(), + } + + // Render index + return c.Render("views/chat", summary) + }) + + // Show the Chat page + app.Get("/chat/:model", func(c *fiber.Ctx) error { + backendConfigs := cl.GetAllBackendConfigs() + modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY) + + galleryConfigs := map[string]*gallery.Config{} + + for _, m := range backendConfigs { + cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name) + if err != nil { + continue + } + galleryConfigs[m.Name] = cfg + } + + summary := fiber.Map{ + "Title": "LocalAI - Chat with " + c.Params("model"), + "BaseURL": utils.BaseURL(c), + "ModelsConfig": backendConfigs, + "GalleryConfig": galleryConfigs, + "ModelsWithoutConfig": modelsWithoutConfig, + "Model": c.Params("model"), + "Version": internal.PrintableVersion(), + "IsP2PEnabled": p2p.IsP2PEnabled(), } // Render index @@ -367,14 +469,16 @@ func RegisterUIRoutes(app *fiber.App, app.Get("/text2image/:model", func(c *fiber.Ctx) error { backendConfigs := cl.GetAllBackendConfigs() + modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY) summary := fiber.Map{ - "Title": "LocalAI - Generate images with " + c.Params("model"), - "BaseURL": utils.BaseURL(c), - "ModelsConfig": backendConfigs, - "Model": c.Params("model"), - "Version": internal.PrintableVersion(), - "IsP2PEnabled": p2p.IsP2PEnabled(), + "Title": "LocalAI - Generate images with " + c.Params("model"), + "BaseURL": utils.BaseURL(c), + "ModelsConfig": backendConfigs, + "ModelsWithoutConfig": modelsWithoutConfig, + "Model": c.Params("model"), + "Version": internal.PrintableVersion(), + "IsP2PEnabled": p2p.IsP2PEnabled(), } // Render index @@ -382,21 +486,33 @@ func RegisterUIRoutes(app *fiber.App, }) app.Get("/text2image/", func(c *fiber.Ctx) error { - backendConfigs := cl.GetAllBackendConfigs() + modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY) - if len(backendConfigs) == 0 { + if len(backendConfigs)+len(modelsWithoutConfig) == 0 { // If no model is available redirect to the index which suggests how to install models return c.Redirect(utils.BaseURL(c)) } + modelThatCanBeUsed := "" + title := "LocalAI - Generate images" + + for _, b := range backendConfigs { + if b.HasUsecases(config.FLAG_IMAGE) { + modelThatCanBeUsed = b.Name + title = "LocalAI - Generate images with " + modelThatCanBeUsed + break + } + } + summary := fiber.Map{ - "Title": "LocalAI - Generate images with " + backendConfigs[0].Name, - "BaseURL": utils.BaseURL(c), - "ModelsConfig": backendConfigs, - "Model": backendConfigs[0].Name, - "Version": internal.PrintableVersion(), - "IsP2PEnabled": p2p.IsP2PEnabled(), + "Title": title, + "BaseURL": utils.BaseURL(c), + "ModelsConfig": backendConfigs, + "ModelsWithoutConfig": modelsWithoutConfig, + "Model": modelThatCanBeUsed, + "Version": internal.PrintableVersion(), + "IsP2PEnabled": p2p.IsP2PEnabled(), } // Render index @@ -405,14 +521,16 @@ func RegisterUIRoutes(app *fiber.App, app.Get("/tts/:model", func(c *fiber.Ctx) error { backendConfigs := cl.GetAllBackendConfigs() + modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY) summary := fiber.Map{ - "Title": "LocalAI - Generate images with " + c.Params("model"), - "BaseURL": utils.BaseURL(c), - "ModelsConfig": backendConfigs, - "Model": c.Params("model"), - "Version": internal.PrintableVersion(), - "IsP2PEnabled": p2p.IsP2PEnabled(), + "Title": "LocalAI - Generate images with " + c.Params("model"), + "BaseURL": utils.BaseURL(c), + "ModelsConfig": backendConfigs, + "ModelsWithoutConfig": modelsWithoutConfig, + "Model": c.Params("model"), + "Version": internal.PrintableVersion(), + "IsP2PEnabled": p2p.IsP2PEnabled(), } // Render index @@ -420,21 +538,32 @@ func RegisterUIRoutes(app *fiber.App, }) app.Get("/tts/", func(c *fiber.Ctx) error { - backendConfigs := cl.GetAllBackendConfigs() + modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY) - if len(backendConfigs) == 0 { + if len(backendConfigs)+len(modelsWithoutConfig) == 0 { // If no model is available redirect to the index which suggests how to install models return c.Redirect(utils.BaseURL(c)) } + modelThatCanBeUsed := "" + title := "LocalAI - Generate audio" + + for _, b := range backendConfigs { + if b.HasUsecases(config.FLAG_TTS) { + modelThatCanBeUsed = b.Name + title = "LocalAI - Generate audio with " + modelThatCanBeUsed + break + } + } summary := fiber.Map{ - "Title": "LocalAI - Generate audio with " + backendConfigs[0].Name, - "BaseURL": utils.BaseURL(c), - "ModelsConfig": backendConfigs, - "Model": backendConfigs[0].Name, - "IsP2PEnabled": p2p.IsP2PEnabled(), - "Version": internal.PrintableVersion(), + "Title": title, + "BaseURL": utils.BaseURL(c), + "ModelsConfig": backendConfigs, + "ModelsWithoutConfig": modelsWithoutConfig, + "Model": modelThatCanBeUsed, + "IsP2PEnabled": p2p.IsP2PEnabled(), + "Version": internal.PrintableVersion(), } // Render index diff --git a/core/http/static/chat.js b/core/http/static/chat.js index 67e0bb60..0dce445b 100644 --- a/core/http/static/chat.js +++ b/core/http/static/chat.js @@ -27,10 +27,19 @@ SOFTWARE. */ -function submitKey(event) { - event.preventDefault(); - localStorage.setItem("key", document.getElementById("apiKey").value); - document.getElementById("apiKey").blur(); +function toggleLoader(show) { + const loader = document.getElementById('loader'); + const sendButton = document.getElementById('send-button'); + + if (show) { + loader.style.display = 'block'; + sendButton.style.display = 'none'; + document.getElementById("input").disabled = true; + } else { + document.getElementById("input").disabled = false; + loader.style.display = 'none'; + sendButton.style.display = 'block'; + } } function submitSystemPrompt(event) { @@ -47,10 +56,9 @@ function submitPrompt(event) { const input = document.getElementById("input").value; Alpine.store("chat").add("user", input, image); document.getElementById("input").value = ""; - const key = localStorage.getItem("key"); const systemPrompt = localStorage.getItem("system_prompt"); - - promptGPT(systemPrompt, key, input); + Alpine.nextTick(() => { document.getElementById('messages').scrollIntoView(false); }); + promptGPT(systemPrompt, input); } function readInputImage() { @@ -67,14 +75,13 @@ function readInputImage() { } - async function promptGPT(systemPrompt, key, input) { + async function promptGPT(systemPrompt, input) { const model = document.getElementById("chat-model").value; // Set class "loader" to the element with "loader" id //document.getElementById("loader").classList.add("loader"); // Make the "loader" visible - document.getElementById("loader").style.display = "block"; - document.getElementById("input").disabled = true; - document.getElementById('messages').scrollIntoView(false) + toggleLoader(true); + messages = Alpine.store("chat").messages(); @@ -146,7 +153,6 @@ function readInputImage() { const response = await fetch("v1/chat/completions", { method: "POST", headers: { - Authorization: `Bearer ${key}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -181,8 +187,8 @@ function readInputImage() { const chatStore = Alpine.store("chat"); chatStore.add("assistant", token); // Efficiently scroll into view without triggering multiple reflows - const messages = document.getElementById('messages'); - messages.scrollTop = messages.scrollHeight; + // const messages = document.getElementById('messages'); + // messages.scrollTop = messages.scrollHeight; }; let buffer = ""; @@ -244,30 +250,20 @@ function readInputImage() { } // Remove class "loader" from the element with "loader" id - //document.getElementById("loader").classList.remove("loader"); - document.getElementById("loader").style.display = "none"; - // enable input - document.getElementById("input").disabled = false; + toggleLoader(false); + // scroll to the bottom of the chat document.getElementById('messages').scrollIntoView(false) // set focus to the input document.getElementById("input").focus(); } - document.getElementById("key").addEventListener("submit", submitKey); document.getElementById("system_prompt").addEventListener("submit", submitSystemPrompt); document.getElementById("prompt").addEventListener("submit", submitPrompt); document.getElementById("input").focus(); document.getElementById("input_image").addEventListener("change", readInputImage); - storeKey = localStorage.getItem("key"); - if (storeKey) { - document.getElementById("apiKey").value = storeKey; - } else { - document.getElementById("apiKey").value = null; - } - storesystemPrompt = localStorage.getItem("system_prompt"); if (storesystemPrompt) { document.getElementById("systemPrompt").value = storesystemPrompt; diff --git a/core/http/static/general.css b/core/http/static/general.css index 63007cf5..7caa384a 100644 --- a/core/http/static/general.css +++ b/core/http/static/general.css @@ -10,18 +10,6 @@ body { .htmx-request .htmx-indicator{ opacity:1 } -/* Loader (https://cssloaders.github.io/) */ -.loader { - width: 12px; - height: 12px; - border-radius: 50%; - display: block; - margin:15px auto; - position: relative; - color: #FFF; - box-sizing: border-box; - animation: animloader 2s linear infinite; -} @keyframes animloader { 0% { box-shadow: 14px 0 0 -2px, 38px 0 0 -2px, -14px 0 0 -2px, -38px 0 0 -2px; } diff --git a/core/http/static/image.js b/core/http/static/image.js index 079c9dc0..0b85ad61 100644 --- a/core/http/static/image.js +++ b/core/http/static/image.js @@ -1,48 +1,11 @@ -/* - -https://github.com/david-haerer/chatapi - -MIT License - -Copyright (c) 2023 David Härer -Copyright (c) 2024 Ettore Di Giacinto - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -*/ -function submitKey(event) { - event.preventDefault(); - localStorage.setItem("key", document.getElementById("apiKey").value); - document.getElementById("apiKey").blur(); - } - - function genImage(event) { event.preventDefault(); const input = document.getElementById("input").value; - const key = localStorage.getItem("key"); - - promptDallE(key, input); + promptDallE(input); } -async function promptDallE(key, input) { +async function promptDallE(input) { document.getElementById("loader").style.display = "block"; document.getElementById("input").value = ""; document.getElementById("input").disabled = true; @@ -51,7 +14,6 @@ async function promptDallE(key, input) { const response = await fetch("v1/images/generations", { method: "POST", headers: { - Authorization: `Bearer ${key}`, "Content-Type": "application/json", }, body: JSON.stringify({ @@ -84,13 +46,6 @@ async function promptDallE(key, input) { document.getElementById("input").focus(); } -document.getElementById("key").addEventListener("submit", submitKey); document.getElementById("input").focus(); document.getElementById("genimage").addEventListener("submit", genImage); document.getElementById("loader").style.display = "none"; - -const storeKey = localStorage.getItem("key"); -if (storeKey) { - document.getElementById("apiKey").value = storeKey; -} - diff --git a/core/http/static/talk.js b/core/http/static/talk.js index ecaa0f0b..56080816 100644 --- a/core/http/static/talk.js +++ b/core/http/static/talk.js @@ -9,10 +9,6 @@ let isRecording = false; let conversationHistory = []; let resetTimer; -function getApiKey() { - return document.getElementById('apiKey').value; -} - function getModel() { return document.getElementById('modelSelect').value; } @@ -99,34 +95,13 @@ function stopRecording() { }; } -function submitKey(event) { - event.preventDefault(); - localStorage.setItem("key", document.getElementById("apiKey").value); - document.getElementById("apiKey").blur(); -} - -document.getElementById("key").addEventListener("submit", submitKey); - - -storeKey = localStorage.getItem("key"); -if (storeKey) { - document.getElementById("apiKey").value = storeKey; -} else { - document.getElementById("apiKey").value = null; -} - - async function sendAudioToWhisper(audioBlob) { const formData = new FormData(); formData.append('file', audioBlob); formData.append('model', getWhisperModel()); - API_KEY = localStorage.getItem("key"); const response = await fetch('v1/audio/transcriptions', { method: 'POST', - headers: { - 'Authorization': `Bearer ${API_KEY}` - }, body: formData }); @@ -137,14 +112,9 @@ async function sendAudioToWhisper(audioBlob) { async function sendTextToChatGPT(text) { conversationHistory.push({ role: "user", content: text }); - API_KEY = localStorage.getItem("key"); const response = await fetch('v1/chat/completions', { method: 'POST', - headers: { - 'Authorization': `Bearer ${API_KEY}`, - 'Content-Type': 'application/json' - }, body: JSON.stringify({ model: getModel(), messages: conversationHistory @@ -161,13 +131,10 @@ async function sendTextToChatGPT(text) { } async function getTextToSpeechAudio(text) { - API_KEY = localStorage.getItem("key"); - const response = await fetch('v1/audio/speech', { method: 'POST', headers: { - 'Authorization': `Bearer ${API_KEY}`, 'Content-Type': 'application/json' }, body: JSON.stringify({ diff --git a/core/http/static/tts.js b/core/http/static/tts.js index daead3a8..ab53c8f0 100644 --- a/core/http/static/tts.js +++ b/core/http/static/tts.js @@ -1,64 +1,204 @@ -function submitKey(event) { - event.preventDefault(); - localStorage.setItem("key", document.getElementById("apiKey").value); - document.getElementById("apiKey").blur(); - } - +// Initialize Alpine store for API key management +document.addEventListener('alpine:init', () => { + Alpine.store('chat', { }); +}); function genAudio(event) { event.preventDefault(); const input = document.getElementById("input").value; - const key = localStorage.getItem("key"); - tts(key, input); -} - -async function tts(key, input) { - document.getElementById("loader").style.display = "block"; - document.getElementById("input").value = ""; - document.getElementById("input").disabled = true; - - const model = document.getElementById("tts-model").value; - const response = await fetch("tts", { - method: "POST", - headers: { - Authorization: `Bearer ${key}`, - "Content-Type": "application/json", - }, - body: JSON.stringify({ - model: model, - input: input, - }), - }); - if (!response.ok) { - const jsonData = await response.json(); // Now safely parse JSON - var div = document.getElementById('result'); - div.innerHTML = '

Error: ' +jsonData.error.message + '

'; + if (!input.trim()) { + showNotification('error', 'Please enter text to convert to speech'); return; } - var div = document.getElementById('result'); // Get the div by its ID - var link=document.createElement('a'); - link.className = "m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"; - link.innerHTML = " Download result"; - const blob = await response.blob(); - link.href=window.URL.createObjectURL(blob); + tts(input); +} - div.innerHTML = ''; // Clear the existing content of the div - div.appendChild(link); // Add the new img element to the div - console.log(link) - document.getElementById("loader").style.display = "none"; - document.getElementById("input").disabled = false; +function showNotification(type, message) { + // Remove any existing notification + const existingNotification = document.getElementById('notification'); + if (existingNotification) { + existingNotification.remove(); + } + + // Create new notification + const notification = document.createElement('div'); + notification.id = 'notification'; + notification.classList.add( + 'fixed', 'top-24', 'right-4', 'z-50', 'p-4', 'rounded-lg', 'shadow-lg', + 'transform', 'transition-all', 'duration-300', 'ease-in-out', 'translate-y-0', + 'flex', 'items-center', 'gap-2' + ); + + // Style based on notification type + if (type === 'error') { + notification.classList.add('bg-red-900/90', 'border', 'border-red-700', 'text-red-200'); + notification.innerHTML = '' + message; + } else if (type === 'warning') { + notification.classList.add('bg-yellow-900/90', 'border', 'border-yellow-700', 'text-yellow-200'); + notification.innerHTML = '' + message; + } else if (type === 'success') { + notification.classList.add('bg-green-900/90', 'border', 'border-green-700', 'text-green-200'); + notification.innerHTML = '' + message; + } else { + notification.classList.add('bg-blue-900/90', 'border', 'border-blue-700', 'text-blue-200'); + notification.innerHTML = '' + message; + } + + // Add close button + const closeBtn = document.createElement('button'); + closeBtn.innerHTML = ''; + closeBtn.classList.add('ml-auto', 'text-gray-400', 'hover:text-white', 'transition-colors'); + closeBtn.onclick = () => { + notification.classList.add('opacity-0', 'translate-y-[-20px]'); + setTimeout(() => notification.remove(), 300); + }; + notification.appendChild(closeBtn); + + // Add to DOM + document.body.appendChild(notification); + + // Animate in + setTimeout(() => { + notification.classList.add('opacity-0', 'translate-y-[-20px]'); + notification.offsetHeight; // Force reflow + notification.classList.remove('opacity-0', 'translate-y-[-20px]'); + }, 10); + + // Auto dismiss after 5 seconds + setTimeout(() => { + if (document.getElementById('notification')) { + notification.classList.add('opacity-0', 'translate-y-[-20px]'); + setTimeout(() => notification.remove(), 300); + } + }, 5000); +} + +async function tts(input) { + // Show loader and prepare UI + const loader = document.getElementById("loader"); + const inputField = document.getElementById("input"); + const resultDiv = document.getElementById("result"); + + loader.style.display = "block"; + inputField.value = ""; + inputField.disabled = true; + resultDiv.innerHTML = '
Processing your request...
'; + + // Get the model and make API request + const model = document.getElementById("tts-model").value; + try { + const response = await fetch("tts", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: model, + input: input, + }), + }); + + if (!response.ok) { + const jsonData = await response.json(); + resultDiv.innerHTML = ` +
+ +

${jsonData.error.message || 'An error occurred'}

+
+ `; + showNotification('error', 'Failed to generate audio'); + return; + } + + // Handle successful response + const blob = await response.blob(); + const audioUrl = window.URL.createObjectURL(blob); + + // Create audio player + const audioPlayer = document.createElement('div'); + audioPlayer.className = 'flex flex-col items-center space-y-4 w-full'; + + // Create audio element with styled controls + const audio = document.createElement('audio'); + audio.controls = true; + audio.src = audioUrl; + audio.className = 'w-full my-4'; + audioPlayer.appendChild(audio); + + // Create action buttons container + const actionButtons = document.createElement('div'); + actionButtons.className = 'flex flex-wrap justify-center gap-3'; + + // Download button + const downloadLink = document.createElement('a'); + downloadLink.href = audioUrl; + downloadLink.download = `tts-${model}-${new Date().toISOString().slice(0, 10)}.mp3`; + downloadLink.className = 'group flex items-center bg-blue-600 hover:bg-blue-700 text-white py-2 px-4 rounded-lg transition duration-300 ease-in-out transform hover:scale-105 hover:shadow-lg'; + downloadLink.innerHTML = ` + + Download + + `; + actionButtons.appendChild(downloadLink); + + // Replay button + const replayButton = document.createElement('button'); + replayButton.className = 'group flex items-center bg-purple-600 hover:bg-purple-700 text-white py-2 px-4 rounded-lg transition duration-300 ease-in-out transform hover:scale-105 hover:shadow-lg'; + replayButton.innerHTML = ` + + Replay + `; + replayButton.onclick = () => audio.play(); + actionButtons.appendChild(replayButton); + + // Add text display + const textDisplay = document.createElement('div'); + textDisplay.className = 'mt-4 p-4 bg-gray-800/50 border border-gray-700/50 rounded-lg text-gray-300 text-center italic'; + textDisplay.textContent = `"${input}"`; + + // Add all elements to result div + audioPlayer.appendChild(actionButtons); + resultDiv.innerHTML = ''; + resultDiv.appendChild(audioPlayer); + resultDiv.appendChild(textDisplay); + + // Play audio automatically + audio.play(); + + // Show success notification + showNotification('success', 'Audio generated successfully'); + + } catch (error) { + console.error('Error generating audio:', error); + resultDiv.innerHTML = ` +
+ +

Network error: Failed to connect to the server

+
+ `; + showNotification('error', 'Network error occurred'); + } finally { + // Reset UI state + loader.style.display = "none"; + inputField.disabled = false; + inputField.focus(); + } +} + +// Set up event listeners when DOM is loaded +document.addEventListener('DOMContentLoaded', () => { document.getElementById("input").focus(); -} - -document.getElementById("key").addEventListener("submit", submitKey); -document.getElementById("input").focus(); -document.getElementById("tts").addEventListener("submit", genAudio); -document.getElementById("loader").style.display = "none"; - -const storeKey = localStorage.getItem("key"); -if (storeKey) { - document.getElementById("apiKey").value = storeKey; -} - + document.getElementById("tts").addEventListener("submit", genAudio); + document.getElementById("loader").style.display = "none"; + + // Add basic keyboard shortcuts + document.addEventListener('keydown', (e) => { + // Submit on Ctrl+Enter + if (e.key === 'Enter' && e.ctrlKey && document.activeElement.id === 'input') { + e.preventDefault(); + document.getElementById("tts").dispatchEvent(new Event('submit')); + } + }); +}); \ No newline at end of file diff --git a/core/http/views/404.html b/core/http/views/404.html index 2f5a4386..a57a3702 100644 --- a/core/http/views/404.html +++ b/core/http/views/404.html @@ -1,28 +1,51 @@ - {{template "views/partials/head" .}} - +
- + {{template "views/partials/navbar" .}} - -
-
-

Welcome to your LocalAI instance!

-
- + + - + \ No newline at end of file diff --git a/core/http/views/chat.html b/core/http/views/chat.html index b0f11281..66e9b1da 100644 --- a/core/http/views/chat.html +++ b/core/http/views/chat.html @@ -4,7 +4,7 @@ Part of this page is based on the OpenAI Chatbot example by David Härer: https://github.com/david-haerer/chatapi MIT License Copyright (c) 2023 David Härer - Copyright (c) 2024 Ettore Di Giacinto + Copyright (c) 2024-2025 Ettore Di Giacinto Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -29,145 +29,355 @@ SOFTWARE. {{template "views/partials/head" .}} - - -
- + {{ $allGalleryConfigs:=.GalleryConfig }} + {{ $model:=.Model}} + {{template "views/partials/navbar" .}} -
- -
-
+ +
+ + -
- - -
-
- - -
- + +
+ + +
+ + + +
+ + {{ if $model }} + {{ $galleryConfig:= index $allGalleryConfigs $model}} + {{ if $galleryConfig }} + {{ if $galleryConfig.Icon }}{{end}} + {{ end }} + {{ end }} +

+ Chat {{ if .Model }} with {{.Model}} {{ end }} +

+
+
-
-
- -
-

- Start chatting with the AI by typing a prompt in the input field below and pressing Enter. - For models that support images, you can upload an image by clicking the paperclip icon. -

-
-