2025-05-21 11:04:59 +00:00
988 changed files with 58477 additions and 329597 deletions
--- a/.devcontainer-scripts/postcreate.sh
+++ b/.devcontainer-scripts/postcreate.sh
@ -1,17 +0,0 @@
 #!/bin/bash
 cd /workspace
 # Get the files into the volume without a bind mount
 if [ ! -d ".git" ]; then
    git clone https://github.com/mudler/LocalAI.git .
 else
    git fetch
 fi
 echo "Standard Post-Create script completed."
 if [ -f "/devcontainer-customization/postcreate.sh" ]; then
    echo "Launching customization postcreate.sh"
    bash "/devcontainer-customization/postcreate.sh"
 fi
--- a/.devcontainer-scripts/poststart.sh
+++ b/.devcontainer-scripts/poststart.sh
@ -1,16 +0,0 @@
 #!/bin/bash
 cd /workspace
 # Grab the pre-stashed backend assets to avoid build issues
 cp -r /build/backend-assets /workspace/backend-assets
 # Ensures generated source files are present upon load
 make prepare
 echo "Standard Post-Start script completed."
 if [ -f "/devcontainer-customization/poststart.sh" ]; then
    echo "Launching customization poststart.sh"
    bash "/devcontainer-customization/poststart.sh"
 fi
--- a/.devcontainer-scripts/utils.sh
+++ b/.devcontainer-scripts/utils.sh
@ -1,55 +0,0 @@
 #!/bin/bash
 # This file contains some really simple functions that are useful when building up customization scripts.
 # Checks if the git config has a user registered - and sets it up if not.
 #
 # Param 1: name
 # Param 2: email
 #
 config_user() {
    echo "Configuring git for $1 <$2>"
    local gcn=$(git config --global user.name)
    if [ -z "${gcn}" ]; then
        echo "Setting up git user / remote"
        git config --global user.name "$1"
        git config --global user.email "$2"
    fi
 }
 # Checks if the git remote is configured - and sets it up if not. Fetches either way.
 #
 # Param 1: remote name
 # Param 2: remote url
 #
 config_remote() {
    echo "Adding git remote and fetching $2 as $1"
    local gr=$(git remote -v | grep $1)
    if [ -z "${gr}" ]; then
        git remote add $1 $2
    fi
    git fetch $1
 }
 # Setup special .ssh files
 # Prints out lines of text to make things pretty
 # Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh
 setup_ssh() {
    echo "starting ~/.ssh directory setup..."
    mkdir -p "${HOME}.ssh"
    chmod 0700 "${HOME}/.ssh"
    echo "-----"
    local files=("$@")
    for file in "${files[@]}" ; do
        local cfile="/devcontainer-customization/${file}"
        local hfile="${HOME}/.ssh/${file}"
        if [ ! -f "${hfile}" ]; then
            echo "copying \"${file}\""
            cp "${cfile}" "${hfile}"
            chmod 600 "${hfile}"
        fi
    done
    echo "~/.ssh directory setup complete!"
 }
--- a/.devcontainer/customization/README.md
+++ b/.devcontainer/customization/README.md
@ -1,25 +0,0 @@
 Place any additional resources your environment requires in this directory
 Script hooks are currently called for:
 `postcreate.sh` and `poststart.sh`
 If files with those names exist here, they will be called at the end of the normal script.
 This is a good place to set things like `git config --global user.name` are set - and to handle any other files that are mounted via this directory.
 To assist in doing so, `source /.devcontainer-scripts/utils.sh` will provide utility functions that may be useful - for example:
 ```
 #!/bin/bash
 source "/.devcontainer-scripts/utils.sh"
 sshfiles=("config", "key.pub")
 setup_ssh "${sshfiles[@]}"
 config_user "YOUR NAME" "YOUR EMAIL"
 config_remote "REMOTE NAME" "REMOTE URL"
 ```
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@ -1,24 +0,0 @@
 {
    "$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json",
    "name": "LocalAI",
    "workspaceFolder": "/workspace",
    "dockerComposeFile": [ "./docker-compose-devcontainer.yml" ],
    "service": "api",
    "shutdownAction": "stopCompose",
    "customizations": {
        "vscode": {
            "extensions": [
                "golang.go",
                "ms-vscode.makefile-tools",
                "ms-azuretools.vscode-docker",
                "ms-python.python",
                "ms-python.debugpy",
                "wayou.vscode-todo-highlight",
                "waderyan.gitblame"
            ]
        }
    },
    "forwardPorts": [8080, 3000],
    "postCreateCommand": "bash /.devcontainer-scripts/postcreate.sh",
    "postStartCommand": "bash /.devcontainer-scripts/poststart.sh"
 }
--- a/.devcontainer/docker-compose-devcontainer.yml
+++ b/.devcontainer/docker-compose-devcontainer.yml
@ -1,48 +0,0 @@
 services:
  api:
    build:
      context: ..
      dockerfile: Dockerfile
      target: devcontainer
      args:
      - FFMPEG=true
      - IMAGE_TYPE=extras
      - GO_TAGS=p2p tts
    env_file:
      - ../.env
    ports:
      - 8080:8080
    volumes:
      - localai_workspace:/workspace
      - ../models:/host-models
      - ./customization:/devcontainer-customization
    command: /bin/sh -c "while sleep 1000; do :; done"
    cap_add:
      - SYS_PTRACE
    security_opt:
      - seccomp:unconfined
  prometheus:
    image: prom/prometheus
    container_name: prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
    ports:
      - 9090:9090
    restart: unless-stopped
    volumes:
      - ./prometheus:/etc/prometheus
      - prom_data:/prometheus
  grafana:
    image: grafana/grafana
    container_name: grafana
    ports:
      - 3000:3000
    restart: unless-stopped
    environment:
      - GF_SECURITY_ADMIN_USER=admin
      - GF_SECURITY_ADMIN_PASSWORD=grafana
    volumes:
      - ./grafana:/etc/grafana/provisioning/datasources
 volumes:
  prom_data:
  localai_workspace:
--- a/.devcontainer/grafana/datasource.yml
+++ b/.devcontainer/grafana/datasource.yml
@ -1,10 +0,0 @@
 apiVersion: 1
 datasources:
 - name: Prometheus
  type: prometheus
  url: http://prometheus:9090 
  isDefault: true
  access: proxy
  editable: true
--- a/.devcontainer/prometheus/prometheus.yml
+++ b/.devcontainer/prometheus/prometheus.yml
@ -1,21 +0,0 @@
 global:
  scrape_interval: 15s
  scrape_timeout: 10s
  evaluation_interval: 15s
 alerting:
  alertmanagers:
  - static_configs:
    - targets: []
    scheme: http
    timeout: 10s
    api_version: v1
 scrape_configs:
 - job_name: prometheus
  honor_timestamps: true
  scrape_interval: 15s
  scrape_timeout: 10s
  metrics_path: /metrics
  scheme: http
  static_configs:
  - targets:
    - localhost:9090
--- a/.dockerignore
+++ b/.dockerignore
@ -1,17 +1,5 @@
 .idea
 .github
 .vscode
 .devcontainer
 models
 examples/chatbot-ui/models
 examples/rwkv/models
 examples/**/models
 Dockerfile*
 __pycache__
 # SonarQube
 .scannerwork
 # backend virtual environments
 **/venv
 backend/python/**/source
--- a/.editorconfig
+++ b/.editorconfig
@ -1,31 +0,0 @@
 root = true
 [*]
 indent_style = space
 indent_size = 2
 end_of_line = lf
 charset = utf-8
 trim_trailing_whitespace = true
 insert_final_newline = true
 [*.go]
 indent_style = tab
 [Makefile]
 indent_style = tab
 [*.proto]
 indent_size = 2
 [*.py]
 indent_size = 4
 [*.js]
 indent_size = 2
 [*.yaml]
 indent_size = 2
 [*.md]
 trim_trailing_whitespace = false
--- a/.env
+++ b/.env
@ -1,36 +1,33 @@
 ## Set number of threads.
 ## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably.
-# LOCALAI_THREADS=14
+# THREADS=14
 ## Specify a different bind address (defaults to ":8080")
-# LOCALAI_ADDRESS=127.0.0.1:8080
+# ADDRESS=127.0.0.1:8080
 ## Default models context size
-# LOCALAI_CONTEXT_SIZE=512
+# CONTEXT_SIZE=512
 #
 ## Define galleries.
 ## models will to install will be visible in `/models/available`
-# LOCALAI_GALLERIES=[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]
+# GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}]
 ## CORS settings
-# LOCALAI_CORS=true
+# CORS=true
-# LOCALAI_CORS_ALLOW_ORIGINS=*
+# CORS_ALLOW_ORIGINS=*
 ## Default path for models
 #
-# LOCALAI_MODELS_PATH=/models
+MODELS_PATH=/models
 ## Enable debug mode
-# LOCALAI_LOG_LEVEL=debug
+# DEBUG=true
 ## Disables COMPEL (Diffusers)
 # COMPEL=0
 ## Enable/Disable single backend (useful if only one GPU is available)
-# LOCALAI_SINGLE_ACTIVE_BACKEND=true
+# SINGLE_ACTIVE_BACKEND=true
 # Forces shutdown of the backends if busy (only if LOCALAI_SINGLE_ACTIVE_BACKEND is set)
 # LOCALAI_FORCE_BACKEND_SHUTDOWN=true
 ## Specify a build type. Available: cublas, openblas, clblas.
 ## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
@ -41,21 +38,21 @@
 ## Uncomment and set to true to enable rebuilding from source
 # REBUILD=true
-## Enable go tags, available: p2p, tts
+## Enable go tags, available: stablediffusion, tts
-## p2p: enable distributed inferencing
+## stablediffusion: image generation with stablediffusion
 ## tts: enables text-to-speech with go-piper 
 ## (requires REBUILD=true)
 #
-# GO_TAGS=p2p
+# GO_TAGS=stablediffusion
 ## Path where to store generated images
-# LOCALAI_IMAGE_PATH=/tmp/generated/images
+# IMAGE_PATH=/tmp
 ## Specify a default upload limit in MB (whisper)
-# LOCALAI_UPLOAD_LIMIT=15
+# UPLOAD_LIMIT
 ## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/)
-# LOCALAI_EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py
+# EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py
 ### Advanced settings ###
 ### Those are not really used by LocalAI, but from components in the stack ###
@ -74,36 +71,19 @@
 ### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
 # LLAMACPP_PARALLEL=1
 ### Define a list of GRPC Servers for llama-cpp workers to distribute the load
 # https://github.com/ggerganov/llama.cpp/pull/6829
 # https://github.com/ggerganov/llama.cpp/blob/master/tools/rpc/README.md
 # LLAMACPP_GRPC_SERVERS=""
 ### Enable to run parallel requests
-# LOCALAI_PARALLEL_REQUESTS=true
+# PARALLEL_REQUESTS=true
 # Enable to allow p2p mode
 # LOCALAI_P2P=true
 # Enable to use federated mode
 # LOCALAI_FEDERATED=true
 # Enable to start federation server
 # FEDERATED_SERVER=true
 # Define to use federation token
 # TOKEN=""
 ### Watchdog settings
 ###
 # Enables watchdog to kill backends that are inactive for too much time
-# LOCALAI_WATCHDOG_IDLE=true
+# WATCHDOG_IDLE=true
 #
 # Time in duration format (e.g. 1h30m) after which a backend is considered idle
 # LOCALAI_WATCHDOG_IDLE_TIMEOUT=5m
 #
 # Enables watchdog to kill backends that are busy for too much time
-# LOCALAI_WATCHDOG_BUSY=true
+# WATCHDOG_BUSY=true
 #
 # Time in duration format (e.g. 1h30m) after which a backend is considered idle
 # WATCHDOG_IDLE_TIMEOUT=5m
 #
 # Time in duration format (e.g. 1h30m) after which a backend is considered busy
-# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m
+# WATCHDOG_BUSY_TIMEOUT=5m
--- a/.gitattributes
+++ b/.gitattributes
@ -1,2 +1 @@
 *.sh text eol=lf
 backend/cpp/llama/*.hpp linguist-vendored
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@ -2,7 +2,9 @@
 name: Bug report
 about: Create a report to help us improve
 title: ''
-labels: bug, unconfirmed, up-for-grabs
+labels: bug
 assignees: mudler
 ---
 <!-- Thanks for helping us to improve LocalAI! We welcome all bug reports. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@ -2,7 +2,9 @@
 name: Feature request
 about: Suggest an idea for this project
 title: ''
-labels: enhancement, up-for-grabs
+labels: enhancement
 assignees: mudler
 ---
 <!-- Thanks for helping us to improve LocalAI! We welcome all feature requests. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->
--- a/.github/bump_deps.sh
+++ b/.github/bump_deps.sh
@ -6,17 +6,4 @@ VAR=$3
 LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
 # Read $VAR from Makefile (only first match)
 set +e
 CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
 set -e
 sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
 if [ -z "$CURRENT_COMMIT" ]; then
    echo "Could not find $VAR in Makefile."
    exit 0
 fi
 echo "Changes: https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
 echo "${LAST_COMMIT}" >> "${VAR}_commit.txt"
--- a/.github/bump_docs.sh
+++ b/.github/bump_docs.sh
@ -2,6 +2,6 @@
 set -xe
 REPO=$1
-LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.tag_name')
+LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.name')
 cat <<< $(jq ".version = \"$LATEST_TAG\"" docs/data/version.json) > docs/data/version.json
--- a/.github/check_and_update.py
+++ b/.github/check_and_update.py
@ -1,85 +0,0 @@
 import hashlib
 from huggingface_hub import hf_hub_download, get_paths_info
 import requests
 import sys
 import os
 uri = sys.argv[1]
 file_name = uri.split('/')[-1]
 # Function to parse the URI and determine download method
 def parse_uri(uri):
    if uri.startswith('huggingface://'):
        repo_id = uri.split('://')[1]
        return 'huggingface', repo_id.rsplit('/', 1)[0]
    elif 'huggingface.co' in uri:
        parts = uri.split('/resolve/')
        if len(parts) > 1:
            repo_path = parts[0].split('https://huggingface.co/')[-1]
            return 'huggingface', repo_path
    return 'direct', uri
 def calculate_sha256(file_path):
    sha256_hash = hashlib.sha256()
    with open(file_path, 'rb') as f:
        for byte_block in iter(lambda: f.read(4096), b''):
            sha256_hash.update(byte_block)
    return sha256_hash.hexdigest()
 def manual_safety_check_hf(repo_id):
    scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
    scan = scanResponse.json()
    # Check if 'hasUnsafeFile' exists in the response
    if 'hasUnsafeFile' in scan:
        if scan['hasUnsafeFile']:
            return scan
        else:
            return None
    else:
        return None
 download_type, repo_id_or_url = parse_uri(uri)
 new_checksum =  None
 file_path = None
 # Decide download method based on URI type
 if download_type == 'huggingface':
    # Check if the repo is flagged as dangerous by HF
    hazard = manual_safety_check_hf(repo_id_or_url)
    if hazard != None:
        print(f'Error: HuggingFace has detected security problems for {repo_id_or_url}: {str(hazard)}', filename=file_name)
        sys.exit(5)
    # Use HF API to pull sha
    for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
        try:
            new_checksum = file.lfs.sha256
            break
        except Exception as e:
            print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
            sys.exit(2)
    if new_checksum is None:
        try:
            file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
        except Exception as e:
            print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
            sys.exit(2)
 else:
    response = requests.get(repo_id_or_url)
    if response.status_code == 200:
        with open(file_name, 'wb') as f:
            f.write(response.content)
        file_path = file_name
    elif response.status_code == 404:
        print(f'File not found: {response.status_code}', file=sys.stderr)
        sys.exit(2)
    else:
        print(f'Error downloading file: {response.status_code}', file=sys.stderr)
        sys.exit(1)
 if new_checksum is None:
    new_checksum = calculate_sha256(file_path)
    print(new_checksum)
    os.remove(file_path)
 else:
    print(new_checksum)
--- a/.github/checksum_checker.sh
+++ b/.github/checksum_checker.sh
@ -1,63 +0,0 @@
 #!/bin/bash
 # This scripts needs yq and huggingface_hub to be installed
 # to install hugingface_hub run pip install huggingface_hub
 # Path to the input YAML file
 input_yaml=$1
 # Function to download file and check checksum using Python
 function check_and_update_checksum() {
    model_name="$1"
    file_name="$2"
    uri="$3"
    old_checksum="$4"
    idx="$5"
    # Download the file and calculate new checksum using Python
    new_checksum=$(python3 ./.github/check_and_update.py $uri)
    result=$?
    if [[ $result -eq 5 ]]; then
        echo "Contaminated entry detected, deleting entry for $model_name..."
        yq eval -i "del([$idx])" "$input_yaml"
        return
    fi
    if [[ "$new_checksum" == "" ]]; then
        echo "Error calculating checksum for $file_name. Skipping..."
        return
    fi
    echo "Checksum for $file_name: $new_checksum"
    # Compare and update the YAML file if checksums do not match
    if [[ $result -eq 2 ]]; then
        echo "File not found, deleting entry for $file_name..."
        # yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\"))" "$input_yaml"
    elif [[ "$old_checksum" != "$new_checksum" ]]; then
        echo "Checksum mismatch for $file_name. Updating..."
        yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\").sha256)" "$input_yaml"
        yq eval -i "(.[$idx].files[] | select(.filename == \"$file_name\")).sha256 = \"$new_checksum\"" "$input_yaml"
    elif [[ $result -ne 0 ]]; then
        echo "Error downloading file $file_name. Skipping..."
    else
        echo "Checksum match for $file_name. No update needed."
    fi
 }
 # Read the YAML and process each file
 len=$(yq eval '. | length' "$input_yaml")
 for ((i=0; i<$len; i++))
 do
    name=$(yq eval ".[$i].name" "$input_yaml")
    files_len=$(yq eval ".[$i].files | length" "$input_yaml")
    for ((j=0; j<$files_len; j++))
    do
        filename=$(yq eval ".[$i].files[$j].filename" "$input_yaml")
        uri=$(yq eval ".[$i].files[$j].uri" "$input_yaml")
        checksum=$(yq eval ".[$i].files[$j].sha256" "$input_yaml")
        echo "Checking model $name, file $filename. URI = $uri, Checksum = $checksum"
        check_and_update_checksum "$name" "$filename" "$uri" "$checksum" "$i"
    done
 done
--- a/.github/ci/modelslist.go
+++ b/.github/ci/modelslist.go
@ -1,304 +0,0 @@
 package main
 import (
 	"fmt"
 	"html/template"
 	"io/ioutil"
 	"os"
 	"github.com/microcosm-cc/bluemonday"
 	"gopkg.in/yaml.v3"
 )
 var modelPageTemplate string = `
 <!DOCTYPE html>
 <html>
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>LocalAI models</title>
    <link href="https://cdnjs.cloudflare.com/ajax/libs/flowbite/2.3.0/flowbite.min.css" rel="stylesheet" />
    <script src="https://cdn.jsdelivr.net/npm/vanilla-lazyload@19.1.3/dist/lazyload.min.js"></script>
    <link
    rel="stylesheet"
    href="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/styles/default.min.css"
  />
    <script
    defer
    src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/highlight.min.js"
  ></script>
    <script
    defer
    src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"
  ></script>
  <script
    defer
    src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"
  ></script>
  <script
    defer
    src="https://cdn.jsdelivr.net/npm/dompurify@3.0.6/dist/purify.min.js"
  ></script>
  <link href="/static/general.css" rel="stylesheet" />
    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
    <link
    href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700,900&display=swap"
    rel="stylesheet" />
  <link
    rel="stylesheet"
    href="https://cdn.jsdelivr.net/npm/tw-elements/css/tw-elements.min.css" />
  <script src="https://cdn.tailwindcss.com/3.3.0"></script>
  <script>
    tailwind.config = {
      darkMode: "class",
      theme: {
        fontFamily: {
          sans: ["Roboto", "sans-serif"],
          body: ["Roboto", "sans-serif"],
          mono: ["ui-monospace", "monospace"],
        },
      },
      corePlugins: {
        preflight: false,
      },
    };
  </script>
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.1.1/css/all.min.css">
    <script src="https://unpkg.com/htmx.org@1.9.12" integrity="sha384-ujb1lZYygJmzgSwoxRggbCHcjc0rB2XoQrxeTUQyRjrOnlCoYta87iKBWq3EsdM2" crossorigin="anonymous"></script>
 </head>
 <body class="bg-gray-900 text-gray-200">
 <div class="flex flex-col min-h-screen">
 <nav class="bg-gray-800 shadow-lg">
    <div class="container mx-auto px-4 py-4">
        <div class="flex items-center justify-between">
            <div class="flex items-center">
                <a href="/" class="text-white text-xl font-bold"><img src="https://github.com/mudler/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
                <a href="/" class="text-white text-xl font-bold">LocalAI</a>
            </div>
            <!-- Menu button for small screens -->
            <div class="lg:hidden">
                <button id="menu-toggle" class="text-gray-400 hover:text-white focus:outline-none">
                    <i class="fas fa-bars fa-lg"></i>
                </button>
            </div>
            <!-- Navigation links -->
            <div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0">
                <a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
            </div>
        </div>
        <!-- Collapsible menu for small screens -->
        <div class="hidden lg:hidden" id="mobile-menu">
            <div class="pt-4 pb-3 border-t border-gray-700">
                <a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
            </div>
        </div>
    </div>
 </nav>
 <style>
  .is-hidden {
 	display: none;
 	  }
 </style>
 <div class="container mx-auto px-4 flex-grow">
 <div class="models mt-12">
 	<h2 class="text-center text-3xl font-semibold text-gray-100">
 	LocalAI model gallery list </h2><br>
 	<h2 class="text-center text-3xl font-semibold text-gray-100">
 	 🖼️ Available {{.AvailableModels}} models</i> <a href="https://localai.io/models/" target="_blank" >
 			<i class="fas fa-circle-info pr-2"></i>
 		</a></h2>
 	<h3>
 	Refer to the Model gallery <a href="https://localai.io/models/" target="_blank" ><i class="fas fa-circle-info pr-2"></i></a> for more information on how to use the models with LocalAI.<br>
 	You can install models with the CLI command <code>local-ai models install <model-name></code>. or by using the WebUI.
 	</h3>
 	<input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search"
 	id="searchbox" placeholder="Live search keyword..">
 	  <div class="dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark">
 		{{ range $_, $model := .Models }}
 		<div class="box me-4 mb-2 block rounded-lg bg-white shadow-secondary-1  dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2">
 		<div>
 		    {{ $icon := "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg" }}
 			{{ if $model.Icon }}
 	  		{{ $icon = $model.Icon }}
 	  		{{ end }}
 			<div class="flex justify-center items-center">
 				<img data-src="{{ $icon }}" alt="{{$model.Name}}" class="rounded-t-lg max-h-48 max-w-96 object-cover mt-3 lazy">
 			</div>
 	  		<div class="p-6 text-surface dark:text-white">
 				<h5 class="mb-2 text-xl font-medium leading-tight">{{$model.Name}}</h5>
 				<p class="mb-4 text-base truncate">{{ $model.Description }}</p>
 			</div>
 			<div class="px-6 pt-4 pb-2">
      <!-- Modal toggle -->
      <button data-modal-target="{{ $model.Name}}-modal" data-modal-toggle="{{ $model.Name }}-modal" class="block text-white bg-blue-700 hover:bg-blue-800 focus:ring-4 focus:outline-none focus:ring-blue-300 font-medium rounded-lg text-sm px-5 py-2.5 text-center dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800" type="button">
        More info
      </button>
    <!-- Main modal -->
    <div id="{{ $model.Name}}-modal" tabindex="-1" aria-hidden="true" class="hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full">
        <div class="relative p-4 w-full max-w-2xl max-h-full">
            <!-- Modal content -->
            <div class="relative bg-white rounded-lg shadow dark:bg-gray-700">
                <!-- Modal header -->
                <div class="flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600">
                    <h3 class="text-xl font-semibold text-gray-900 dark:text-white">
                        {{ $model.Name}}
                    </h3>
                    <button type="button" class="text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white" data-modal-hide="{{$model.Name}}-modal">
                        <svg class="w-3 h-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 14 14">
                            <path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"/>
                        </svg>
                        <span class="sr-only">Close modal</span>
                    </button>
                </div>
                <!-- Modal body -->
                <div class="p-4 md:p-5 space-y-4">
                    <div class="flex justify-center items-center">
                    <img data-src="{{ $icon }}" alt="{{$model.Name}}" class="lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3">
                  </div>
                    <p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
                    {{ $model.Description }}
                    </p>
                    <p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
                    To install the model with the CLI, run: <br>
                    <code> local-ai models install {{$model.Name}} </code> <br>
                    <hr>
                    See also <a href="https://localai.io/models/" target="_blank" >
                    Installation <i class="fas fa-circle-info pr-2"></i>
                    </a> to see how to install models with the REST API.
                    </p>
                    <p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
                    <ul>
                    {{ range $_, $u := $model.URLs }}
                    <li><a href="{{ $u }}" target=_blank><i class="fa-solid fa-link"></i> {{ $u }}</a></li>
                    {{ end }}
                    </ul>
                    </p>
                </div>
                <!-- Modal footer -->
                <div class="flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600">
                    <button data-modal-hide="{{ $model.Name}}-modal" type="button" class="py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700">Close</button>
                </div>
            </div>
        </div>
    </div>
 			</div>
 		</div>
 		</div>
 		{{ end }}
 		</div>
  </div>
 </div>
 <script>
 var lazyLoadInstance = new LazyLoad({
  // Your custom settings go here
 });
 let cards = document.querySelectorAll('.box')
 function liveSearch() {
    let search_query = document.getElementById("searchbox").value;
    //Use innerText if all contents are visible
    //Use textContent for including hidden elements
    for (var i = 0; i < cards.length; i++) {
        if(cards[i].textContent.toLowerCase()
                .includes(search_query.toLowerCase())) {
            cards[i].classList.remove("is-hidden");
        } else {
            cards[i].classList.add("is-hidden");
        }
    }
 }
 //A little delay
 let typingTimer;
 let typeInterval = 500;
 let searchInput = document.getElementById('searchbox');
 searchInput.addEventListener('keyup', () => {
    clearTimeout(typingTimer);
    typingTimer = setTimeout(liveSearch, typeInterval);
 });
 </script>
 </div>
 <script src="https://cdnjs.cloudflare.com/ajax/libs/flowbite/2.3.0/flowbite.min.js"></script>
 </body>
 </html>
 `
 type GalleryModel struct {
 	Name        string   `json:"name" yaml:"name"`
 	URLs        []string `json:"urls" yaml:"urls"`
 	Icon        string   `json:"icon" yaml:"icon"`
 	Description string   `json:"description" yaml:"description"`
 }
 func main() {
 	// read the YAML file which contains the models
 	f, err := ioutil.ReadFile(os.Args[1])
 	if err != nil {
 		fmt.Println("Error reading file:", err)
 		return
 	}
 	models := []*GalleryModel{}
 	err = yaml.Unmarshal(f, &models)
 	if err != nil {
 		// write to stderr
 		os.Stderr.WriteString("Error unmarshaling YAML: " + err.Error() + "\n")
 		return
 	}
 	// Ensure that all arbitrary text content is sanitized before display
 	for i, m := range models {
 		models[i].Name = bluemonday.StrictPolicy().Sanitize(m.Name)
 		models[i].Description = bluemonday.StrictPolicy().Sanitize(m.Description)
 	}
 	// render the template
 	data := struct {
 		Models          []*GalleryModel
 		AvailableModels int
 	}{
 		Models:          models,
 		AvailableModels: len(models),
 	}
 	tmpl := template.Must(template.New("modelPage").Parse(modelPageTemplate))
 	err = tmpl.Execute(os.Stdout, data)
 	if err != nil {
 		fmt.Println("Error executing template:", err)
 		return
 	}
 }
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -1,123 +0,0 @@
 # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
 version: 2
 updates:
  - package-ecosystem: "gitsubmodule"
    directory: "/"
    schedule:
      interval: "weekly"
  - package-ecosystem: "gomod"
    directory: "/"
    schedule:
      interval: "weekly"
    ignore:
    - dependency-name: "github.com/mudler/LocalAI/pkg/grpc/proto"
  - package-ecosystem: "github-actions"
    # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
    directory: "/"
    schedule:
      # Check for updates to GitHub Actions every weekday
      interval: "weekly"
  - package-ecosystem: "pip"
    # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
    directory: "/"
    schedule:
      # Check for updates to GitHub Actions every weekday
      interval: "weekly"
  - package-ecosystem: "docker"
    # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
    directory: "/"
    schedule:
      # Check for updates to GitHub Actions every weekday
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/bark"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/common/template"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/coqui"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/diffusers"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/exllama"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/exllama2"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/mamba"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/openvoice"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/parler-tts"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/rerankers"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/sentencetransformers"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/transformers"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/vllm"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/examples/chainlit"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/examples/functions"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/examples/langchain/langchainpy-localai-example"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/examples/langchain-chroma"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/examples/streamlit-bot"
    schedule:
      interval: "weekly"
  - package-ecosystem: "docker"
    directory: "/examples/k8sgpt"
    schedule:
      interval: "weekly"
  - package-ecosystem: "docker"
    directory: "/examples/kubernetes"
    schedule:
      interval: "weekly"
  - package-ecosystem: "docker"
    directory: "/examples/langchain"
    schedule:
      interval: "weekly"
  - package-ecosystem: "gomod"
    directory: "/examples/semantic-todo"
    schedule:
      interval: "weekly"
  - package-ecosystem: "docker"
    directory: "/examples/telegram-bot"
    schedule:
      interval: "weekly"
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@ -1,33 +0,0 @@
 enhancement:
 - head-branch: ['^feature', 'feature']
 dependencies:
 - any:
  - changed-files:
    - any-glob-to-any-file: 'Makefile'
  - changed-files:
    - any-glob-to-any-file: '*.mod'
  - changed-files:
    - any-glob-to-any-file: '*.sum'
 kind/documentation:
 - any:
  - changed-files:
    - any-glob-to-any-file: 'docs/*'
  - changed-files:
    - any-glob-to-any-file: '*.md'
 area/ai-model:
 - any:
  - changed-files:
    - any-glob-to-any-file: 'gallery/*'
 examples:
 - any:
  - changed-files:
    - any-glob-to-any-file: 'examples/*'
 ci:
 - any:
  - changed-files:
    - any-glob-to-any-file: '.github/*'
--- a/.github/release.yml
+++ b/.github/release.yml
@ -12,23 +12,10 @@ changelog:
    - title: "Bug fixes :bug:"
      labels:
        - bug
        - regression
    - title: "🖧 P2P area"
      labels:
         - area/p2p
    - title: Exciting New Features 🎉
      labels:
        - Semver-Minor
        - enhancement
        - ux
        - roadmap
    - title: 🧠 Models
      labels:
        - area/ai-model
    - title: 📖 Documentation and examples
      labels:
        - kind/documentation
        - examples
    - title: 👒 Dependencies
      labels:
        - dependencies
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@ -9,17 +9,32 @@ jobs:
      fail-fast: false
      matrix:
        include:
-          - repository: "ggml-org/llama.cpp"
+          - repository: "go-skynet/go-llama.cpp"
            variable: "GOLLAMA_VERSION"
            branch: "master"
          - repository: "ggerganov/llama.cpp"
            variable: "CPPLLAMA_VERSION"
            branch: "master"
-          - repository: "ggml-org/whisper.cpp"
+          - repository: "go-skynet/go-ggml-transformers.cpp"
            variable: "GOGGMLTRANSFORMERS_VERSION"
            branch: "master"
          - repository: "donomii/go-rwkv.cpp"
            variable: "RWKV_VERSION"
            branch: "main"
          - repository: "ggerganov/whisper.cpp"
            variable: "WHISPER_CPP_VERSION"
            branch: "master"
-          - repository: "PABannier/bark.cpp"
+          - repository: "go-skynet/go-bert.cpp"
-            variable: "BARKCPP_VERSION"
+            variable: "BERT_VERSION"
            branch: "master"
          - repository: "go-skynet/bloomz.cpp"
            variable: "BLOOMZ_VERSION"
            branch: "main"
-          - repository: "leejet/stable-diffusion.cpp"
+          - repository: "nomic-ai/gpt4all"
-            variable: "STABLEDIFFUSION_GGML_VERSION"
+            variable: "GPT4ALL_VERSION"
            branch: "main"
          - repository: "mudler/go-ggllm.cpp"
            variable: "GOGGLLM_VERSION"
            branch: "master"
          - repository: "mudler/go-stable-diffusion"
            variable: "STABLEDIFFUSION_VERSION"
@ -31,30 +46,17 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Bump dependencies 🔧
        id: bump
        run: |
          bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
          {
            echo 'message<<EOF'
            cat "${{ matrix.variable }}_message.txt"
            echo EOF
          } >> "$GITHUB_OUTPUT"
          {
            echo 'commit<<EOF'
            cat "${{ matrix.variable }}_commit.txt"
            echo EOF
          } >> "$GITHUB_OUTPUT"
          rm -rfv ${{ matrix.variable }}_message.txt
          rm -rfv ${{ matrix.variable }}_commit.txt
      - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v7
+        uses: peter-evans/create-pull-request@v5
        with:
          token: ${{ secrets.UPDATE_BOT_TOKEN }}
          push-to-fork: ci-forks/LocalAI
          commit-message: ':arrow_up: Update ${{ matrix.repository }}'
-          title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`'
+          title: ':arrow_up: Update ${{ matrix.repository }}'
          branch: "update/${{ matrix.variable }}"
-          body: ${{ steps.bump.outputs.message }}
+          body: Bump of ${{ matrix.repository }} version
          signoff: true
--- a/.github/workflows/bump_docs.yaml
+++ b/.github/workflows/bump_docs.yaml
@ -17,12 +17,12 @@ jobs:
        run: |
          bash .github/bump_docs.sh ${{ matrix.repository }}
      - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v7
+        uses: peter-evans/create-pull-request@v5
        with:
          token: ${{ secrets.UPDATE_BOT_TOKEN }}
          push-to-fork: ci-forks/LocalAI
          commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
-          title: 'docs: :arrow_up: update docs version ${{ matrix.repository }}'
+          title: ':arrow_up: Update docs version ${{ matrix.repository }}'
          branch: "update/docs"
          body: Bump of ${{ matrix.repository }} version inside docs
          signoff: true
--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@ -1,47 +0,0 @@
 name: Check if checksums are up-to-date
 on:
  schedule:
    - cron: 0 20 * * *
  workflow_dispatch:
 jobs:
  checksum_check:
    runs-on: arc-runner-set
    steps:
      - name: Force Install GIT latest
        run: |
          sudo apt-get update \
          && sudo apt-get install -y software-properties-common \
          && sudo apt-get update \
          && sudo add-apt-repository -y ppa:git-core/ppa \
          && sudo apt-get update \
          && sudo apt-get install -y git
      - uses: actions/checkout@v4
      - name: Install dependencies
        run: |
          sudo apt-get update
          sudo apt-get install -y pip wget
          sudo pip install --upgrade pip
          pip install huggingface_hub
      - name: 'Setup yq'
        uses: dcarbone/install-yq-action@v1.3.1
        with:
          version: 'v4.44.2'
          download-compressed: true
          force: true
      - name: Checksum checker 🔧
        run: |
          export HF_HOME=/hf_cache
          sudo mkdir /hf_cache
          sudo chmod 777 /hf_cache
          bash .github/checksum_checker.sh gallery/index.yaml
      - name: Create Pull Request
        uses: peter-evans/create-pull-request@v7
        with:
          token: ${{ secrets.UPDATE_BOT_TOKEN }}
          push-to-fork: ci-forks/LocalAI
          commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
          title: 'chore(model-gallery): :arrow_up: update checksum'
          branch: "update/checksum"
          body: Updating checksums in gallery/index.yaml
          signoff: true
--- a/.github/workflows/dependabot_auto.yml
+++ b/.github/workflows/dependabot_auto.yml
@ -1,43 +0,0 @@
 name: Dependabot auto-merge
 on:
 - pull_request_target
 permissions:
  contents: write
  pull-requests: write
  packages: read
 jobs:
  dependabot:
    runs-on: ubuntu-latest
    if: ${{ github.actor == 'dependabot[bot]' }}
    steps:
      - name: Dependabot metadata
        id: metadata
        uses: dependabot/fetch-metadata@v2.4.0
        with:
          github-token: "${{ secrets.GITHUB_TOKEN }}"
          skip-commit-verification: true
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Approve a PR if not already approved
        run: |
          gh pr checkout "$PR_URL"
            if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ];
          then
            gh pr review --approve "$PR_URL"
          else
            echo "PR already approved.";
          fi
        env:
          PR_URL: ${{github.event.pull_request.html_url}}
          GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
      - name: Enable auto-merge for Dependabot PRs
        if: ${{ contains(github.event.pull_request.title, 'bump')}}
        run: gh pr merge --auto --squash "$PR_URL"
        env:
          PR_URL: ${{github.event.pull_request.html_url}}
          GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@ -1,64 +0,0 @@
 name: Explorer deployment
 on:
  push:
    branches:
      - master
    tags:
      - 'v*'
 concurrency:
  group: ci-deploy-${{ github.head_ref || github.ref }}-${{ github.repository }}
 jobs:
  build-linux:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
        uses: actions/checkout@v4
        with:
          submodules: true
      - uses: actions/setup-go@v5
        with:
          go-version: '1.21.x'
          cache: false
      - name: Dependencies
        run: |
          sudo apt-get update
          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
          make protogen-go
      - name: Build api
        run: |
          CGO_ENABLED=0 make build-api
      - name: rm
        uses: appleboy/ssh-action@v1.2.2
        with:
            host: ${{ secrets.EXPLORER_SSH_HOST }}
            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
            key: ${{ secrets.EXPLORER_SSH_KEY }}
            port: ${{ secrets.EXPLORER_SSH_PORT }}
            script: |
                sudo rm -rf local-ai/ || true
      - name: copy file via ssh
        uses: appleboy/scp-action@v1.0.0
        with:
            host: ${{ secrets.EXPLORER_SSH_HOST }}
            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
            key: ${{ secrets.EXPLORER_SSH_KEY }}
            port: ${{ secrets.EXPLORER_SSH_PORT }}
            source: "local-ai"
            overwrite: true
            rm: true
            target: ./local-ai
      - name: restarting
        uses: appleboy/ssh-action@v1.2.2
        with:
            host: ${{ secrets.EXPLORER_SSH_HOST }}
            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
            key: ${{ secrets.EXPLORER_SSH_KEY }}
            port: ${{ secrets.EXPLORER_SSH_PORT }}
            script: |
                sudo cp -rfv local-ai/local-ai /usr/bin/local-ai
                sudo systemctl restart local-ai
--- a/.github/workflows/disabled/comment-pr.yaml
+++ b/.github/workflows/disabled/comment-pr.yaml
@ -1,83 +0,0 @@
 name: Comment PRs
 on:
  pull_request_target:
 jobs:
  comment-pr:
    env:
        MODEL_NAME: hermes-2-theta-llama-3-8b
    runs-on: ubuntu-latest
    steps:
    - name: Checkout code
      uses: actions/checkout@v3
      with:
        ref: "${{ github.event.pull_request.merge_commit_sha }}"
        fetch-depth: 0 # needed to checkout all branches for this Action to work
    - uses: mudler/localai-github-action@v1
      with:
        model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
      # Check the PR diff using the current branch and the base branch of the PR
    - uses: GrantBirki/git-diff-action@v2.7.0
      id: git-diff-action
      with:
            json_diff_file_output: diff.json
            raw_diff_file_output: diff.txt
            file_output_only: "true"
            base_branch: ${{ github.event.pull_request.base.sha }}
    - name: Show diff
      env:
        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
      run: |
            cat $DIFF
    - name: Summarize
      env:
        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
      id: summarize
      run: |
            input="$(cat $DIFF)"
            # Define the LocalAI API endpoint
            API_URL="http://localhost:8080/chat/completions"
            # Create a JSON payload using jq to handle special characters
            json_payload=$(jq -n --arg input "$input" '{
            model: "'$MODEL_NAME'",
            messages: [
                {
                role: "system",
                content: "You are LocalAI-bot in Github that helps understanding PRs and assess complexity. Explain what has changed in this PR diff and why"
                },
                {
                role: "user",
                content: $input
                }
            ]
            }')
            # Send the request to LocalAI
            response=$(curl -s -X POST $API_URL \
            -H "Content-Type: application/json" \
            -d "$json_payload")
            # Extract the summary from the response
            summary="$(echo $response | jq -r '.choices[0].message.content')"
            # Print the summary
            #  -H "Authorization: Bearer $API_KEY" \
            echo "Summary:"
            echo "$summary"
            echo "payload sent"
            echo "$json_payload"
            {
                echo 'message<<EOF'
                echo "$summary"
                echo EOF
              } >> "$GITHUB_OUTPUT"
            docker logs --tail 10 local-ai
    - uses: mshick/add-pr-comment@v2
      if: always()
      with:
          repo-token: ${{ secrets.UPDATE_BOT_TOKEN }}
          message: ${{ steps.summarize.outputs.message }}
          message-failure: |
            Uh oh! Could not analyze this PR, maybe it's too big?
--- a/.github/workflows/generate_grpc_cache.yaml
+++ b/.github/workflows/generate_grpc_cache.yaml
@ -1,95 +0,0 @@
 name: 'generate and publish GRPC docker caches'
 on:
  workflow_dispatch:
  schedule:
    # daily at midnight
    - cron: '0 0 * * *'
 concurrency:
  group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
  cancel-in-progress: true
 jobs:
  generate_caches:
    strategy:
      matrix:
        include:
          - grpc-base-image: ubuntu:22.04
            runs-on: 'arc-runner-set'
            platforms: 'linux/amd64,linux/arm64'
    runs-on: ${{matrix.runs-on}}
    steps:
      - name: Release space from worker
        if: matrix.runs-on == 'ubuntu-latest'
        run: |
          echo "Listing top largest packages"
          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
          head -n 30 <<< "${pkgs}"
          echo
          df -h
          echo
          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
          sudo apt-get remove --auto-remove android-sdk-platform-tools || true
          sudo apt-get purge --auto-remove android-sdk-platform-tools || true
          sudo rm -rf /usr/local/lib/android
          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
          sudo rm -rf /usr/share/dotnet
          sudo apt-get remove -y '^mono-.*' || true
          sudo apt-get remove -y '^ghc-.*' || true
          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
          sudo apt-get remove -y 'php.*' || true
          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
          sudo apt-get remove -y '^google-.*' || true
          sudo apt-get remove -y azure-cli || true
          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
          sudo apt-get remove -y '^gfortran-.*' || true
          sudo apt-get remove -y microsoft-edge-stable || true
          sudo apt-get remove -y firefox || true
          sudo apt-get remove -y powershell || true
          sudo apt-get remove -y r-base-core || true
          sudo apt-get autoremove -y
          sudo apt-get clean
          echo
          echo "Listing top largest packages"
          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
          head -n 30 <<< "${pkgs}"
          echo
          sudo rm -rfv build || true
          sudo rm -rf /usr/share/dotnet || true
          sudo rm -rf /opt/ghc || true
          sudo rm -rf "/usr/local/share/boost" || true
          sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
          df -h
      - name: Set up QEMU
        uses: docker/setup-qemu-action@master
        with:
          platforms: all
      - name: Set up Docker Buildx
        id: buildx
        uses: docker/setup-buildx-action@master
      - name: Checkout
        uses: actions/checkout@v4
      - name: Cache GRPC
        uses: docker/build-push-action@v6
        with:
          builder: ${{ steps.buildx.outputs.name }}
          # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
          # This means that even the MAKEFLAGS have to be an EXACT match.
          # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
          build-args: |
            GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
            GRPC_VERSION=v1.65.0
          context: .
          file: ./Dockerfile
          cache-to: type=gha,ignore-error=true
          cache-from: type=gha
          target: grpc
          platforms: ${{ matrix.platforms }}
          push: false
--- a/.github/workflows/generate_intel_image.yaml
+++ b/.github/workflows/generate_intel_image.yaml
@ -1,59 +0,0 @@
 name: 'generate and publish intel docker caches'
 on:
  workflow_dispatch:
  push:
    branches:
      - master
 concurrency:
  group: intel-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
  cancel-in-progress: true
 jobs:
  generate_caches:
    strategy:
      matrix:
        include:
          - base-image: intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04
            runs-on: 'ubuntu-latest'
            platforms: 'linux/amd64'
    runs-on: ${{matrix.runs-on}}
    steps:
      - name: Set up QEMU
        uses: docker/setup-qemu-action@master
        with:
          platforms: all
      - name: Login to DockerHub
        if: github.event_name != 'pull_request'
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_PASSWORD }}
      - name: Login to quay
        if: github.event_name != 'pull_request'
        uses: docker/login-action@v3
        with:
          registry: quay.io
          username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
          password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
      - name: Set up Docker Buildx
        id: buildx
        uses: docker/setup-buildx-action@master
      - name: Checkout
        uses: actions/checkout@v4
      - name: Cache Intel images
        uses: docker/build-push-action@v6
        with:
          builder: ${{ steps.buildx.outputs.name }}
          build-args: |
            BASE_IMAGE=${{ matrix.base-image }}
          context: .
          file: ./Dockerfile
          tags: quay.io/go-skynet/intel-oneapi-base:latest
          push: true
          target: intel
          platforms: ${{ matrix.platforms }}
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@ -1,150 +0,0 @@
 ---
 name: 'build container images tests'
 on:
  pull_request:
 concurrency:
  group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }}
  cancel-in-progress: true
 jobs:
  extras-image-build:
    uses: ./.github/workflows/image_build.yml
    with:
      tag-latest: ${{ matrix.tag-latest }}
      tag-suffix: ${{ matrix.tag-suffix }}
      ffmpeg: ${{ matrix.ffmpeg }}
      image-type: ${{ matrix.image-type }}
      build-type: ${{ matrix.build-type }}
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
      platforms: ${{ matrix.platforms }}
      runs-on: ${{ matrix.runs-on }}
      base-image: ${{ matrix.base-image }}
      grpc-base-image: ${{ matrix.grpc-base-image }}
      makeflags: ${{ matrix.makeflags }}
    secrets:
      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
    strategy:
      # Pushing with all jobs in parallel
      # eats the bandwidth of all the nodes
      max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
      fail-fast: false
      matrix:
        include:
          # This is basically covered by the AIO test
          # - build-type: ''
          #   platforms: 'linux/amd64'
          #   tag-latest: 'false'
          #   tag-suffix: '-ffmpeg'
          #   ffmpeg: 'true'
          #   image-type: 'extras'
          #   runs-on: 'arc-runner-set'
          #   base-image: "ubuntu:22.04"
          #   makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg'
            ffmpeg: 'true'
            image-type: 'extras'
            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'hipblas'
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-hipblas'
            ffmpeg: 'false'
            image-type: 'extras'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            grpc-base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'sycl_f16'
            platforms: 'linux/amd64'
            tag-latest: 'false'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: 'sycl-f16-ffmpeg'
            ffmpeg: 'true'
            image-type: 'extras'
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'vulkan'
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-vulkan-ffmpeg-core'
            ffmpeg: 'true'
            image-type: 'core'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=4 --output-sync=target"
  # core-image-build:
  #   uses: ./.github/workflows/image_build.yml
  #   with:
  #     tag-latest: ${{ matrix.tag-latest }}
  #     tag-suffix: ${{ matrix.tag-suffix }}
  #     ffmpeg: ${{ matrix.ffmpeg }}
  #     image-type: ${{ matrix.image-type }}
  #     build-type: ${{ matrix.build-type }}
  #     cuda-major-version: ${{ matrix.cuda-major-version }}
  #     cuda-minor-version: ${{ matrix.cuda-minor-version }}
  #     platforms: ${{ matrix.platforms }}
  #     runs-on: ${{ matrix.runs-on }}
  #     base-image: ${{ matrix.base-image }}
  #     grpc-base-image: ${{ matrix.grpc-base-image }}
  #     makeflags: ${{ matrix.makeflags }}
  #   secrets:
  #     dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
  #     dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
  #     quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
  #     quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
  #   strategy:
  #     matrix:
  #       include:
          # - build-type: ''
          #   platforms: 'linux/amd64'
          #   tag-latest: 'false'
          #   tag-suffix: '-ffmpeg-core'
          #   ffmpeg: 'true'
          #   image-type: 'core'
          #   runs-on: 'ubuntu-latest'
          #   base-image: "ubuntu:22.04"
          #   makeflags: "--jobs=4 --output-sync=target"
          # - build-type: 'sycl_f16'
          #   platforms: 'linux/amd64'
          #   tag-latest: 'false'
          #   base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
          #   grpc-base-image: "ubuntu:22.04"
          #   tag-suffix: 'sycl-f16-ffmpeg-core'
          #   ffmpeg: 'true'
          #   image-type: 'core'
          #   runs-on: 'arc-runner-set'
          #   makeflags: "--jobs=3 --output-sync=target"
          # - build-type: 'cublas'
          #   cuda-major-version: "12"
          #   cuda-minor-version: "0"
          #   platforms: 'linux/amd64'
          #   tag-latest: 'false'
          #   tag-suffix: '-cublas-cuda12-ffmpeg-core'
          #   ffmpeg: 'true'
          #   image-type: 'core'
          #   runs-on: 'ubuntu-latest'
          #   base-image: "ubuntu:22.04"
          #   makeflags: "--jobs=4 --output-sync=target"
          # - build-type: 'vulkan'
          #   platforms: 'linux/amd64'
          #   tag-latest: 'false'
          #   tag-suffix: '-vulkan-ffmpeg-core'
          #   ffmpeg: 'true'
          #   image-type: 'core'
          #   runs-on: 'ubuntu-latest'
          #   base-image: "ubuntu:22.04"
          #   makeflags: "--jobs=4 --output-sync=target"
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@ -2,6 +2,7 @@
 name: 'build container images'
 on:
  pull_request:
  push:
    branches:
      - master
@ -13,7 +14,7 @@ concurrency:
  cancel-in-progress: true
 jobs:
-  hipblas-jobs:
+  extras-image-build:
    uses: ./.github/workflows/image_build.yml
    with:
      tag-latest: ${{ matrix.tag-latest }}
@ -25,12 +26,6 @@ jobs:
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
      platforms: ${{ matrix.platforms }}
      runs-on: ${{ matrix.runs-on }}
      base-image: ${{ matrix.base-image }}
      grpc-base-image: ${{ matrix.grpc-base-image }}
      aio: ${{ matrix.aio }}
      makeflags: ${{ matrix.makeflags }}
      latest-image: ${{ matrix.latest-image }}
      latest-image-aio: ${{ matrix.latest-image-aio }}
    secrets:
      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@ -39,140 +34,68 @@ jobs:
    strategy:
      # Pushing with all jobs in parallel
      # eats the bandwidth of all the nodes
-      max-parallel: 2
+      max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
      matrix:
        include:
-          - build-type: 'hipblas'
+          - build-type: ''
            #platforms: 'linux/amd64,linux/arm64'
            platforms: 'linux/amd64'
            tag-latest: 'auto'
-            tag-suffix: '-hipblas-extras'
+            tag-suffix: ''
-            ffmpeg: 'true'
+            ffmpeg: ''
            image-type: 'extras'
            aio: "-aio-gpu-hipblas"
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            grpc-base-image: "ubuntu:22.04"
            latest-image: 'latest-gpu-hipblas-extras'
            latest-image-aio: 'latest-aio-gpu-hipblas'
            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
+          - build-type: ''
          - build-type: 'hipblas'
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            tag-suffix: '-hipblas'
+            tag-suffix: '-ffmpeg'
            ffmpeg: 'true'
-            image-type: 'core'
+            image-type: 'extras'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            grpc-base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
            latest-image: 'latest-gpu-hipblas'
  self-hosted-jobs:
    uses: ./.github/workflows/image_build.yml
    with:
      tag-latest: ${{ matrix.tag-latest }}
      tag-suffix: ${{ matrix.tag-suffix }}
      ffmpeg: ${{ matrix.ffmpeg }}
      image-type: ${{ matrix.image-type }}
      build-type: ${{ matrix.build-type }}
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
      platforms: ${{ matrix.platforms }}
      runs-on: ${{ matrix.runs-on }}
      base-image: ${{ matrix.base-image }}
      grpc-base-image: ${{ matrix.grpc-base-image }}
      aio: ${{ matrix.aio }}
      makeflags: ${{ matrix.makeflags }}
      latest-image: ${{ matrix.latest-image }}
      latest-image-aio: ${{ matrix.latest-image-aio }}
    secrets:
      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
    strategy:
      # Pushing with all jobs in parallel
      # eats the bandwidth of all the nodes
      max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
      matrix:
        include:
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            tag-suffix: '-cublas-cuda11-extras'
+            tag-suffix: '-cublas-cuda11'
-            ffmpeg: 'true'
+            ffmpeg: ''
            image-type: 'extras'
            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
            aio: "-aio-gpu-nvidia-cuda-11"
            latest-image: 'latest-gpu-nvidia-cuda-11-extras'
            latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "1"
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            tag-suffix: '-cublas-cuda12-extras'
+            tag-suffix: '-cublas-cuda12'
            ffmpeg: ''
            image-type: 'extras'
            runs-on: 'arc-runner-set'
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda11-ffmpeg'
            ffmpeg: 'true'
            image-type: 'extras'
            runs-on: 'arc-runner-set'
-            base-image: "ubuntu:22.04"
+          - build-type: 'cublas'
-            aio: "-aio-gpu-nvidia-cuda-12"
+            cuda-major-version: "12"
-            latest-image: 'latest-gpu-nvidia-cuda-12-extras'
+            cuda-minor-version: "1"
            latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'sycl_f16'
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            tag-suffix: '-cublas-cuda12-ffmpeg'
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-sycl-f16-extras'
            ffmpeg: 'true'
            image-type: 'extras'
            runs-on: 'arc-runner-set'
-            aio: "-aio-gpu-intel-f16"
+          - build-type: ''
-            latest-image: 'latest-gpu-intel-f16-extras'
+            #platforms: 'linux/amd64,linux/arm64'
            latest-image-aio: 'latest-aio-gpu-intel-f16'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'sycl_f32'
            platforms: 'linux/amd64'
-            tag-latest: 'false'
+            tag-latest: 'auto'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            tag-suffix: ''
-            grpc-base-image: "ubuntu:22.04"
+            ffmpeg: ''
            tag-suffix: '-sycl-f32-extras'
            ffmpeg: 'true'
            image-type: 'extras'
            runs-on: 'arc-runner-set'
            aio: "-aio-gpu-intel-f32"
            latest-image: 'latest-gpu-intel-f32-extras'
            latest-image-aio: 'latest-aio-gpu-intel-f32'
            makeflags: "--jobs=3 --output-sync=target"
          # Core images
          - build-type: 'sycl_f16'
            platforms: 'linux/amd64'
            tag-latest: 'false'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-sycl-f16'
            ffmpeg: 'true'
            image-type: 'core'
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
            latest-image: 'latest-gpu-intel-f16'
          - build-type: 'sycl_f32'
            platforms: 'linux/amd64'
            tag-latest: 'false'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-sycl-f32'
            ffmpeg: 'true'
            image-type: 'core'
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
            latest-image: 'latest-gpu-intel-f32'
  core-image-build:
    uses: ./.github/workflows/image_build.yml
    with:
@ -185,109 +108,54 @@ jobs:
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
      platforms: ${{ matrix.platforms }}
      runs-on: ${{ matrix.runs-on }}
      aio: ${{ matrix.aio }}
      base-image: ${{ matrix.base-image }}
      grpc-base-image: ${{ matrix.grpc-base-image }}
      makeflags: ${{ matrix.makeflags }}
      latest-image: ${{ matrix.latest-image }}
      latest-image-aio: ${{ matrix.latest-image-aio }}
      skip-drivers: ${{ matrix.skip-drivers }}
    secrets:
      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
    strategy:
      max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
      matrix:
        include:
          - build-type: ''
-            platforms: 'linux/amd64,linux/arm64'
+            platforms: 'linux/amd64'
-            tag-latest: 'auto'
+            tag-latest: 'false'
-            tag-suffix: ''
+            tag-suffix: '-ffmpeg-core'
            ffmpeg: 'true'
            image-type: 'core'
-            base-image: "ubuntu:22.04"
+            runs-on: 'ubuntu-latest'
            runs-on: 'arc-runner-set'
            aio: "-aio-cpu"
            latest-image: 'latest-cpu'
            latest-image-aio: 'latest-aio-cpu'
            makeflags: "--jobs=4 --output-sync=target"
            skip-drivers: 'false'
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            tag-suffix: '-cublas-cuda11'
+            tag-suffix: '-cublas-cuda11-core'
-            ffmpeg: 'true'
+            ffmpeg: ''
            image-type: 'core'
-            runs-on: 'arc-runner-set'
+            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=4 --output-sync=target"
            skip-drivers: 'false'
            latest-image: 'latest-gpu-nvidia-cuda-12'
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "1"
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            tag-suffix: '-cublas-cuda12'
+            tag-suffix: '-cublas-cuda12-core'
-            ffmpeg: 'true'
+            ffmpeg: ''
            image-type: 'core'
-            runs-on: 'arc-runner-set'
+            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+          - build-type: 'cublas'
-            skip-drivers: 'false'
+            cuda-major-version: "11"
-            makeflags: "--jobs=4 --output-sync=target"
+            cuda-minor-version: "7"
            latest-image: 'latest-gpu-nvidia-cuda-12'
          - build-type: 'vulkan'
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            tag-suffix: '-vulkan'
+            tag-suffix: '-cublas-cuda11-ffmpeg-core'
            ffmpeg: 'true'
            image-type: 'core'
-            runs-on: 'arc-runner-set'
+            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
            makeflags: "--jobs=4 --output-sync=target"
            latest-image: 'latest-gpu-vulkan'
  gh-runner:
    uses: ./.github/workflows/image_build.yml
    with:
      tag-latest: ${{ matrix.tag-latest }}
      tag-suffix: ${{ matrix.tag-suffix }}
      ffmpeg: ${{ matrix.ffmpeg }}
      image-type: ${{ matrix.image-type }}
      build-type: ${{ matrix.build-type }}
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
      platforms: ${{ matrix.platforms }}
      runs-on: ${{ matrix.runs-on }}
      aio: ${{ matrix.aio }}
      base-image: ${{ matrix.base-image }}
      grpc-base-image: ${{ matrix.grpc-base-image }}
      makeflags: ${{ matrix.makeflags }}
      latest-image: ${{ matrix.latest-image }}
      latest-image-aio: ${{ matrix.latest-image-aio }}
      skip-drivers: ${{ matrix.skip-drivers }}
    secrets:
      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
    strategy:
      matrix:
        include:
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "1"
-            platforms: 'linux/arm64'
+            platforms: 'linux/amd64'
            tag-latest: 'false'
-            tag-suffix: '-nvidia-l4t-arm64'
+            tag-suffix: '-cublas-cuda12-ffmpeg-core'
            latest-image: 'latest-nvidia-l4t-arm64'
            ffmpeg: 'true'
            image-type: 'core'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            runs-on: 'ubuntu-latest'
            runs-on: 'ubuntu-24.04-arm'
            makeflags: "--jobs=4 --output-sync=target"
            skip-drivers: 'true'
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@ -4,26 +4,17 @@ name: 'build container images (reusable)'
 on:
  workflow_call:
    inputs:
      base-image:
        description: 'Base image'
        required: true
        type: string
      grpc-base-image:
        description: 'GRPC Base image, must be a compatible image with base-image'
        required: false
        default: ''
        type: string
      build-type:
        description: 'Build type'
        default: ''
        type: string
      cuda-major-version:
        description: 'CUDA major version'
-        default: "12"
+        default: "11"
        type: string
      cuda-minor-version:
        description: 'CUDA minor version'
-        default: "4"
+        default: "7"
        type: string
      platforms:
        description: 'Platforms'
@ -33,14 +24,6 @@ on:
        description: 'Tag latest'
        default: ''
        type: string
      latest-image:
          description: 'Tag latest'
          default: ''
          type: string
      latest-image-aio:
          description: 'Tag latest'
          default: ''
          type: string
      tag-suffix:
        description: 'Tag suffix'
        default: ''
@ -49,10 +32,6 @@ on:
        description: 'FFMPEG'
        default: ''
        type: string
      skip-drivers:
        description: 'Skip drivers by default'
        default: 'false'
        type: string
      image-type:
        description: 'Image type'
        default: ''
@ -62,16 +41,6 @@ on:
        required: true
        default: ''
        type: string
      makeflags:
        description: 'Make Flags'
        required: false
        default: '--jobs=4 --output-sync=target'
        type: string
      aio:
        description: 'AIO Image Name'
        required: false
        default: ''
        type: string
    secrets:
      dockerUsername:
        required: true
@ -95,52 +64,44 @@ jobs:
          && sudo apt-get install -y git
      - name: Checkout
        uses: actions/checkout@v4
-
+      # - name: Release space from worker
-      - name: Release space from worker
+      #   run: |
-        if: inputs.runs-on == 'ubuntu-latest'
+      #     echo "Listing top largest packages"
-        run: |
+      #     pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          echo "Listing top largest packages"
+      #     head -n 30 <<< "${pkgs}"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
+      #     echo
-          head -n 30 <<< "${pkgs}"
+      #     df -h
-          echo
+      #     echo
-          df -h
+      #     sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-          echo
+      #     sudo apt-get remove --auto-remove android-sdk-platform-tools || true
-          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
+      #     sudo apt-get purge --auto-remove android-sdk-platform-tools || true
-          sudo apt-get remove --auto-remove android-sdk-platform-tools || true
+      #     sudo rm -rf /usr/local/lib/android
-          sudo apt-get purge --auto-remove android-sdk-platform-tools || true
+      #     sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-          sudo rm -rf /usr/local/lib/android
+      #     sudo rm -rf /usr/share/dotnet
-          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
+      #     sudo apt-get remove -y '^mono-.*' || true
-          sudo rm -rf /usr/share/dotnet
+      #     sudo apt-get remove -y '^ghc-.*' || true
-          sudo apt-get remove -y '^mono-.*' || true
+      #     sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-          sudo apt-get remove -y '^ghc-.*' || true
+      #     sudo apt-get remove -y 'php.*' || true
-          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
+      #     sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-          sudo apt-get remove -y 'php.*' || true
+      #     sudo apt-get remove -y '^google-.*' || true
-          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
+      #     sudo apt-get remove -y azure-cli || true
-          sudo apt-get remove -y '^google-.*' || true
+      #     sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-          sudo apt-get remove -y azure-cli || true
+      #     sudo apt-get remove -y '^gfortran-.*' || true
-          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
+      #     sudo apt-get remove -y microsoft-edge-stable || true
-          sudo apt-get remove -y '^gfortran-.*' || true
+      #     sudo apt-get remove -y firefox || true
-          sudo apt-get remove -y microsoft-edge-stable || true
+      #     sudo apt-get remove -y powershell || true
-          sudo apt-get remove -y firefox || true
+      #     sudo apt-get remove -y r-base-core || true
-          sudo apt-get remove -y powershell || true
+      #     sudo apt-get autoremove -y
-          sudo apt-get remove -y r-base-core || true
+      #     sudo apt-get clean
-          sudo apt-get autoremove -y
+      #     echo
-          sudo apt-get clean
+      #     echo "Listing top largest packages"
-          echo
+      #     pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          echo "Listing top largest packages"
+      #     head -n 30 <<< "${pkgs}"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
+      #     echo
-          head -n 30 <<< "${pkgs}"
+      #     sudo rm -rfv build || true
-          echo
+      #     df -h
          sudo rm -rfv build || true
          sudo rm -rf /usr/share/dotnet || true
          sudo rm -rf /opt/ghc || true
          sudo rm -rf "/usr/local/share/boost" || true
          sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
          df -h
      - name: Docker meta
        id: meta
        if: github.event_name != 'pull_request'
        uses: docker/metadata-action@v5
        with:
          images: |
@ -153,46 +114,6 @@ jobs:
          flavor: |
            latest=${{ inputs.tag-latest }}
            suffix=${{ inputs.tag-suffix }}
      - name: Docker meta for PR
        id: meta_pull_request
        if: github.event_name == 'pull_request'
        uses: docker/metadata-action@v5
        with:
          images: |
            ttl.sh/localai-ci-pr-${{ github.event.number }}
          tags: |
            type=ref,event=branch
            type=semver,pattern={{raw}}
            type=sha
          flavor: |
            latest=${{ inputs.tag-latest }}
            suffix=${{ inputs.tag-suffix }}
      - name: Docker meta AIO (quay.io)
        if: inputs.aio != ''
        id: meta_aio
        uses: docker/metadata-action@v5
        with:
          images: |
            quay.io/go-skynet/local-ai
          tags: |
            type=ref,event=branch
            type=semver,pattern={{raw}}
          flavor: |
            latest=${{ inputs.tag-latest }}
            suffix=${{ inputs.aio }}
      - name: Docker meta AIO (dockerhub)
        if: inputs.aio != ''
        id: meta_aio_dockerhub
        uses: docker/metadata-action@v5
        with:
          images: |
            localai/localai
          tags: |
            type=ref,event=branch
            type=semver,pattern={{raw}}
          flavor: |
            suffix=${{ inputs.aio }}
      - name: Set up QEMU
        uses: docker/setup-qemu-action@master
@ -219,128 +140,21 @@ jobs:
          password: ${{ secrets.quayPassword }}
      - name: Build and push
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@v5
        if: github.event_name != 'pull_request'
        with:
          builder: ${{ steps.buildx.outputs.name }}
          # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
          # This means that even the MAKEFLAGS have to be an EXACT match.
          # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
          # This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded
          build-args: |
            BUILD_TYPE=${{ inputs.build-type }}
            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
            FFMPEG=${{ inputs.ffmpeg }}
            IMAGE_TYPE=${{ inputs.image-type }}
            BASE_IMAGE=${{ inputs.base-image }}
            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
            GRPC_VERSION=v1.65.0
            MAKEFLAGS=${{ inputs.makeflags }}
            SKIP_DRIVERS=${{ inputs.skip-drivers }}
          context: .
          file: ./Dockerfile
          cache-from: type=gha
          platforms: ${{ inputs.platforms }}
          push: ${{ github.event_name != 'pull_request' }}
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
 ### Start testing image
      - name: Build and push
        uses: docker/build-push-action@v6
        if: github.event_name == 'pull_request'
        with:
          builder: ${{ steps.buildx.outputs.name }}
          # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
          # This means that even the MAKEFLAGS have to be an EXACT match.
          # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
          # This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded
          build-args: |
            BUILD_TYPE=${{ inputs.build-type }}
            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
            FFMPEG=${{ inputs.ffmpeg }}
            IMAGE_TYPE=${{ inputs.image-type }}
            BASE_IMAGE=${{ inputs.base-image }}
            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
            GRPC_VERSION=v1.65.0
            MAKEFLAGS=${{ inputs.makeflags }}
            SKIP_DRIVERS=${{ inputs.skip-drivers }}
          context: .
          file: ./Dockerfile
          cache-from: type=gha
          platforms: ${{ inputs.platforms }}
          push: true
          tags: ${{ steps.meta_pull_request.outputs.tags }}
          labels: ${{ steps.meta_pull_request.outputs.labels }}
      - name: Testing image
        if: github.event_name == 'pull_request'
        run: |
          echo "Image is available at ttl.sh/localai-ci-pr-${{ github.event.number }}:${{ steps.meta_pull_request.outputs.version }}" >> $GITHUB_STEP_SUMMARY
 ## End testing image
      - name: Build and push AIO image
        if: inputs.aio != ''
        uses: docker/build-push-action@v6
        with:
          builder: ${{ steps.buildx.outputs.name }}
          build-args: |
            BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
            MAKEFLAGS=${{ inputs.makeflags }}
          context: .
          file: ./Dockerfile.aio
          platforms: ${{ inputs.platforms }}
          push: ${{ github.event_name != 'pull_request' }}
          tags: ${{ steps.meta_aio.outputs.tags }}
          labels: ${{ steps.meta_aio.outputs.labels }}
      - name: Build and push AIO image (dockerhub)
        if: inputs.aio != ''
        uses: docker/build-push-action@v6
        with:
          builder: ${{ steps.buildx.outputs.name }}
          build-args: |
            BASE_IMAGE=localai/localai:${{ steps.meta.outputs.version }}
            MAKEFLAGS=${{ inputs.makeflags }}
          context: .
          file: ./Dockerfile.aio
          platforms: ${{ inputs.platforms }}
          push: ${{ github.event_name != 'pull_request' }}
          tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
          labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
      - name: Cleanup
        run: |
          docker builder prune -f
          docker system prune --force --volumes --all
      - name: Latest tag
        # run this on branches, when it is a tag and there is a latest-image defined
        if: github.event_name != 'pull_request' && inputs.latest-image != ''  && github.ref_type == 'tag'
        run: |
          docker pull localai/localai:${{ steps.meta.outputs.version }}
          docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }}
          docker push localai/localai:${{ inputs.latest-image }}
          docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
          docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
          docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
      - name: Latest AIO tag
        # run this on branches, when it is a tag and there is a latest-image defined
        if: github.event_name != 'pull_request' && inputs.latest-image-aio != ''  && github.ref_type == 'tag'
        run: |
          docker pull localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }}
          docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }}
          docker push localai/localai:${{ inputs.latest-image-aio }}
          docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
          docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
          docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
      - name: job summary
        run: |
          echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
      - name: job summary(AIO)
        if: inputs.aio != ''
        run: |
          echo "Built image: ${{ steps.meta_aio.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
--- a/.github/workflows/labeler.yml
+++ b/.github/workflows/labeler.yml
@ -1,12 +0,0 @@
 name: "Pull Request Labeler"
 on:
 - pull_request_target
 jobs:
  labeler:
    permissions:
      contents: read
      pull-requests: write
    runs-on: ubuntu-latest
    steps:
    - uses: actions/labeler@v5
--- a/.github/workflows/localaibot_automerge.yml
+++ b/.github/workflows/localaibot_automerge.yml
@ -1,35 +0,0 @@
 name: LocalAI-bot auto-merge
 on:
 - pull_request_target
 permissions:
  contents: write
  pull-requests: write
  packages: read
 jobs:
  dependabot:
    runs-on: ubuntu-latest
    if: ${{ github.actor == 'localai-bot' }}
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Approve a PR if not already approved
        run: |
          gh pr checkout "$PR_URL"
            if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ];
          then
            gh pr review --approve "$PR_URL"
          else
            echo "PR already approved.";
          fi
        env:
          PR_URL: ${{github.event.pull_request.html_url}}
          GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
      - name: Enable auto-merge for LocalAIBot PRs
        run: gh pr merge --auto --squash "$PR_URL"
        env:
          PR_URL: ${{github.event.pull_request.html_url}}
          GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
--- a/.github/workflows/notify-models.yaml
+++ b/.github/workflows/notify-models.yaml
@ -1,168 +0,0 @@
 name: Notifications for new models
 on:
  pull_request:
     types:
       - closed
 jobs:
  notify-discord:
    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
    env:
        MODEL_NAME: gemma-3-12b-it
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v4
      with:
        fetch-depth: 0 # needed to checkout all branches for this Action to work
    - uses: mudler/localai-github-action@v1
      with:
        model: 'gemma-3-12b-it' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
        # Check the PR diff using the current branch and the base branch of the PR
    - uses: GrantBirki/git-diff-action@v2.8.0
      id: git-diff-action
      with:
            json_diff_file_output: diff.json
            raw_diff_file_output: diff.txt
            file_output_only: "true"
    - name: Summarize
      env:
        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
      id: summarize
      run: |
            input="$(cat $DIFF)"
            # Define the LocalAI API endpoint
            API_URL="http://localhost:8080/chat/completions"
            # Create a JSON payload using jq to handle special characters
            json_payload=$(jq -n --arg input "$input" '{
            model: "'$MODEL_NAME'",
            messages: [
                {
                role: "system",
                content: "You are LocalAI-bot. Write a discord message to notify everyone about the new model from the git diff. Make it informal. An example can include: the URL of the model, the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI and that can be browsed over https://models.localai.io. For example: local-ai run model_name_here"
                },
                {
                role: "user",
                content: $input
                }
            ]
            }')
            # Send the request to LocalAI
            response=$(curl -s -X POST $API_URL \
            -H "Content-Type: application/json" \
            -d "$json_payload")
            # Extract the summary from the response
            summary="$(echo $response | jq -r '.choices[0].message.content')"
            # Print the summary
            #  -H "Authorization: Bearer $API_KEY" \
            echo "Summary:"
            echo "$summary"
            echo "payload sent"
            echo "$json_payload"
            {
                echo 'message<<EOF'
                echo "$summary"
                echo EOF
              } >> "$GITHUB_OUTPUT"
            docker logs --tail 10 local-ai
    - name: Discord notification
      env:
        DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}
        DISCORD_USERNAME: "LocalAI-Bot"
        DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
      uses: Ilshidur/action-discord@master
      with:
        args: ${{ steps.summarize.outputs.message }}
    - name: Setup tmate session if fails
      if: ${{ failure() }}
      uses: mxschmitt/action-tmate@v3.22
      with:
        detached: true
        connect-timeout-seconds: 180
        limit-access-to-actor: true
  notify-twitter:
    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
    env:
        MODEL_NAME: gemma-3-12b-it
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v4
      with:
        fetch-depth: 0 # needed to checkout all branches for this Action to work
    - name: Start LocalAI
      run: |
        echo "Starting LocalAI..."
        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
        until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
      # Check the PR diff using the current branch and the base branch of the PR
    - uses: GrantBirki/git-diff-action@v2.8.0
      id: git-diff-action
      with:
            json_diff_file_output: diff.json
            raw_diff_file_output: diff.txt
            file_output_only: "true"
    - name: Summarize
      env:
        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
      id: summarize
      run: |
            input="$(cat $DIFF)"
            # Define the LocalAI API endpoint
            API_URL="http://localhost:8080/chat/completions"
            # Create a JSON payload using jq to handle special characters
            json_payload=$(jq -n --arg input "$input" '{
            model: "'$MODEL_NAME'",
            messages: [
                {
                role: "system",
                content: "You are LocalAI-bot. Write a twitter message to notify everyone about the new model from the git diff. Make it informal and really short. An example can include: the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI. For example: local-ai run model_name_here"
                },
                {
                role: "user",
                content: $input
                }
            ]
            }')
            # Send the request to LocalAI
            response=$(curl -s -X POST $API_URL \
            -H "Content-Type: application/json" \
            -d "$json_payload")
            # Extract the summary from the response
            summary="$(echo $response | jq -r '.choices[0].message.content')"
            # Print the summary
            #  -H "Authorization: Bearer $API_KEY" \
            echo "Summary:"
            echo "$summary"
            echo "payload sent"
            echo "$json_payload"
            {
                echo 'message<<EOF'
                echo "$summary"
                echo EOF
              } >> "$GITHUB_OUTPUT"
            docker logs --tail 10 local-ai
    - uses: Eomm/why-don-t-you-tweet@v2
      with:
        tweet-message: ${{ steps.summarize.outputs.message }}
      env:
        # Get your tokens from https://developer.twitter.com/apps
        TWITTER_CONSUMER_API_KEY: ${{ secrets.TWITTER_APP_KEY }}
        TWITTER_CONSUMER_API_SECRET: ${{ secrets.TWITTER_APP_SECRET }}
        TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
        TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
    - name: Setup tmate session if fails
      if: ${{ failure() }}
      uses: mxschmitt/action-tmate@v3.22
      with:
        detached: true
        connect-timeout-seconds: 180
        limit-access-to-actor: true
--- a/.github/workflows/notify-releases.yaml
+++ b/.github/workflows/notify-releases.yaml
@ -1,63 +0,0 @@
 name: Release notifications
 on:
  release:
    types:
      - published
 jobs:
  notify-discord:
    runs-on: ubuntu-latest
    env:
        RELEASE_BODY: ${{ github.event.release.body }}
        RELEASE_TITLE: ${{ github.event.release.name }}
        RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
    steps:
    - uses: mudler/localai-github-action@v1
      with:
        model: 'gemma-3-12b-it' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
    - name: Summarize
      id: summarize
      run: |
            input="$RELEASE_TITLE\b$RELEASE_BODY"
            # Define the LocalAI API endpoint
            API_URL="http://localhost:8080/chat/completions"
            # Create a JSON payload using jq to handle special characters
            json_payload=$(jq -n --arg input "$input" '{
            model: "'$MODEL_NAME'",
            messages: [
                {
                role: "system",
                content: "Write a discord message with a bullet point summary of the release notes."
                },
                {
                role: "user",
                content: $input
                }
            ]
            }')
            # Send the request to LocalAI API
            response=$(curl -s -X POST $API_URL \
            -H "Content-Type: application/json" \
            -d "$json_payload")
            # Extract the summary from the response
            summary=$(echo $response | jq -r '.choices[0].message.content')
            # Print the summary
            #  -H "Authorization: Bearer $API_KEY" \
            {
                echo 'message<<EOF'
                echo "$summary"
                echo EOF
              } >> "$GITHUB_OUTPUT"
    - name: Discord notification
      env:
        DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL_RELEASE }}
        DISCORD_USERNAME: "LocalAI-Bot"
        DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
      uses: Ilshidur/action-discord@master
      with:
        args: ${{ steps.summarize.outputs.message }}
--- a/.github/workflows/prlint.yaml
+++ b/.github/workflows/prlint.yaml
@ -1,28 +0,0 @@
 name: Check PR style
 on:
  pull_request_target:
    types:
      - opened
      - reopened
      - edited
      - synchronize
 jobs:
  title-lint:
    runs-on: ubuntu-latest
    permissions:
      statuses: write
    steps:
      - uses: aslafy-z/conventional-pr-title-action@v3
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 #  check-pr-description:
 #    runs-on: ubuntu-latest
 #    steps:
 #      - uses: actions/checkout@v2
 #      - uses: jadrol/pr-description-checker-action@v1.0.0
 #        id: description-checker
 #        with:
 #          repo-token: ${{ secrets.GITHUB_TOKEN }}
 #          exempt-labels: no qa
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@ -1,15 +1,6 @@
 name: Build and Release
-on:
+on: push
  push:
    branches:
      - master
    tags:
      - 'v*'
  pull_request:
 env:
  GRPC_VERSION: v1.65.0
 permissions:
  contents: write
@ -19,306 +10,91 @@ concurrency:
  cancel-in-progress: true
 jobs:
-
+  build-linux:
-  build-linux-arm:
+    strategy:
      matrix:
        include:
          - build: 'avx2'
            defines: ''
          - build: 'avx'
            defines: '-DLLAMA_AVX2=OFF'
          - build: 'avx512'
            defines: '-DLLAMA_AVX512=ON'
    runs-on: ubuntu-latest
    steps:
      - name: Clone
        uses: actions/checkout@v4
        with:
          submodules: true
-      - uses: actions/setup-go@v5
+      - uses: actions/setup-go@v4
        with:
-          go-version: '1.21.x'
+          go-version: '>=1.21.0'
          cache: false
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
+          sudo apt-get install build-essential ffmpeg
          sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
          make install-go-tools
      - name: Install CUDA Dependencies
        run: |
          curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
          sudo dpkg -i cuda-keyring_1.1-1_all.deb
          sudo apt-get update
          sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
        env:
          CUDA_VERSION: 12-4
      - name: Cache grpc
        id: cache-grpc
        uses: actions/cache@v4
        with:
          path: grpc
          key: ${{ runner.os }}-arm-grpc-${{ env.GRPC_VERSION }}
      - name: Build grpc
        if: steps.cache-grpc.outputs.cache-hit != 'true'
        run: |
-          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
+          git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-          cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
+              cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-          cd cmake/build && cmake -DgRPC_INSTALL=ON \
+                -DgRPC_BUILD_TESTS=OFF \
-            -DgRPC_BUILD_TESTS=OFF \
+                ../.. && sudo make -j12 install
            ../.. && sudo make --jobs 5 --output-sync=target
      - name: Install gRPC
        run: |
          GNU_HOST=aarch64-linux-gnu
          C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
          CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
          CROSS_TOOLCHAIN=/usr/$GNU_HOST
          CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
          CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
          # https://cmake.org/cmake/help/v3.13/manual/cmake-toolchains.7.html#cross-compiling-for-linux
          echo "set(CMAKE_SYSTEM_NAME Linux)" >> $CMAKE_CROSS_TOOLCHAIN && \
            echo "set(CMAKE_SYSTEM_PROCESSOR arm)" >> $CMAKE_CROSS_TOOLCHAIN && \
            echo "set(CMAKE_STAGING_PREFIX $CROSS_STAGING_PREFIX)" >> $CMAKE_CROSS_TOOLCHAIN && \
            echo "set(CMAKE_SYSROOT ${CROSS_TOOLCHAIN}/sysroot)" >> $CMAKE_CROSS_TOOLCHAIN && \
            echo "set(CMAKE_C_COMPILER /usr/bin/$C_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
            echo "set(CMAKE_CXX_COMPILER /usr/bin/$CXX_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
            echo "set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)" >> $CMAKE_CROSS_TOOLCHAIN && \
            echo "set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
            echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
            echo "set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN
          GRPC_DIR=$PWD/grpc
          cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install && \
          GRPC_CROSS_BUILD_DIR=$GRPC_DIR/cmake/cross_build && \
          mkdir -p $GRPC_CROSS_BUILD_DIR && \
          cd $GRPC_CROSS_BUILD_DIR && \
          cmake -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN \
            -DCMAKE_BUILD_TYPE=Release \
            -DCMAKE_INSTALL_PREFIX=$CROSS_TOOLCHAIN/grpc_install \
            ../.. && \
          sudo make -j`nproc` install
      - name: Build
        id: build
        env:
          CMAKE_ARGS: "${{ matrix.defines }}"
          BUILD_ID: "${{ matrix.build }}"
        run: |
-          GNU_HOST=aarch64-linux-gnu
+          STATIC=true make dist
-          C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
+      - uses: actions/upload-artifact@v3
          CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
          CROSS_TOOLCHAIN=/usr/$GNU_HOST
          CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
          CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
          export PATH=$PATH:$GOPATH/bin
          export PATH=/usr/local/cuda/bin:$PATH
          sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
          sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
          sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
          BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
          GOOS=linux \
          GOARCH=arm64 \
          CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
      - uses: actions/upload-artifact@v4
        with:
-          name: LocalAI-linux-arm64
+          name: ${{ matrix.build }}
          path: release/
      - name: Release
-        uses: softprops/action-gh-release@v2
+        uses: softprops/action-gh-release@v1
        if: startsWith(github.ref, 'refs/tags/')
        with:
          files: |
            release/*
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.22
        with:
          detached: true
          connect-timeout-seconds: 180
          limit-access-to-actor: true
  build-linux:
    runs-on: arc-runner-set
    steps:
      - name: Force Install GIT latest
        run: |
          sudo apt-get update \
          && sudo apt-get install -y software-properties-common \
          && sudo apt-get update \
          && sudo add-apt-repository -y ppa:git-core/ppa \
          && sudo apt-get update \
          && sudo apt-get install -y git
      - name: Clone
        uses: actions/checkout@v4
        with:
          submodules: true
      - uses: actions/setup-go@v5
        with:
          go-version: '1.21.x'
          cache: false
      - name: Dependencies
        run: |
          sudo apt-get update
          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
          make install-go-tools
      - name: Intel Dependencies
        run: |
          wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
          echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
          sudo apt update
          sudo apt install -y intel-basekit
      - name: Install CUDA Dependencies
        run: |
          curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
          sudo dpkg -i cuda-keyring_1.1-1_all.deb
          sudo apt-get update
          sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
        env:
          CUDA_VERSION: 12-5
      - name: "Install Hipblas"
        env:
          ROCM_VERSION: "6.1"
          AMDGPU_VERSION: "6.1"
        run: |
            set -ex
-            sudo apt-get update
+  build-macOS:
-            sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
+    strategy:
-
+      matrix:
-            curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
+        include:
-
+          - build: 'avx2'
-            printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list
+            defines: ''
-
+          - build: 'avx'
-            printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
+            defines: '-DLLAMA_AVX2=OFF'
-            printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
+          - build: 'avx512'
-            sudo apt-get update
+            defines: '-DLLAMA_AVX512=ON'
-
+    runs-on: macOS-latest
            sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \
                hipblas-dev rocm-dev \
                rocblas-dev
            sudo apt-get clean
            sudo rm -rf /var/lib/apt/lists/*
            sudo ldconfig
      - name: Cache grpc
        id: cache-grpc
        uses: actions/cache@v4
        with:
          path: grpc
          key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
      - name: Build grpc
        if: steps.cache-grpc.outputs.cache-hit != 'true'
        run: |
          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
          cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
          cd cmake/build && cmake -DgRPC_INSTALL=ON \
            -DgRPC_BUILD_TESTS=OFF \
            ../.. && sudo make --jobs 5 --output-sync=target
      - name: Install gRPC
        run: |
          cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
      # BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
      - name: Build
        id: build
        run: |
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
          export PATH=$PATH:$GOPATH/bin
          export PATH=/usr/local/cuda/bin:$PATH
          export PATH=/opt/rocm/bin:$PATH
          source /opt/intel/oneapi/setvars.sh
          sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
          BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/x86_64-linux-gnu/libdl.so.2 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/lib/x86_64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
          make -j4 dist
      - uses: actions/upload-artifact@v4
        with:
          name: LocalAI-linux
          path: release/
      - name: Release
        uses: softprops/action-gh-release@v2
        if: startsWith(github.ref, 'refs/tags/')
        with:
          files: |
            release/*
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.22
        with:
          detached: true
          connect-timeout-seconds: 180
          limit-access-to-actor: true
  build-macOS-x86_64:
    runs-on: macos-13
    steps:
      - name: Clone
        uses: actions/checkout@v4
        with:
          submodules: true
-      - uses: actions/setup-go@v5
+      - uses: actions/setup-go@v4
        with:
-          go-version: '1.21.x'
+          go-version: '>=1.21.0'
          cache: false
      - name: Dependencies
        run: |
          brew install protobuf grpc
          make install-go-tools
      - name: Build
        id: build
        env:
          CMAKE_ARGS: "${{ matrix.defines }}"
          BUILD_ID: "${{ matrix.build }}"
        run: |
          export C_INCLUDE_PATH=/usr/local/include
          export CPLUS_INCLUDE_PATH=/usr/local/include
          export PATH=$PATH:$GOPATH/bin
          export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper
          make dist
-      - uses: actions/upload-artifact@v4
+      - uses: actions/upload-artifact@v3
        with:
-          name: LocalAI-MacOS-x86_64
+          name: ${{ matrix.build }}
          path: release/
      - name: Release
-        uses: softprops/action-gh-release@v2
+        uses: softprops/action-gh-release@v1
        if: startsWith(github.ref, 'refs/tags/')
        with:
          files: |
            release/*
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.22
        with:
          detached: true
          connect-timeout-seconds: 180
          limit-access-to-actor: true
  build-macOS-arm64:
    runs-on: macos-14
    steps:
      - name: Clone
        uses: actions/checkout@v4
        with:
          submodules: true
      - uses: actions/setup-go@v5
        with:
          go-version: '1.21.x'
          cache: false
      - name: Dependencies
        run: |
          brew install protobuf grpc libomp llvm
          make install-go-tools
      - name: Build
        id: build
        run: |
          export C_INCLUDE_PATH=/usr/local/include
          export CPLUS_INCLUDE_PATH=/usr/local/include
          export PATH=$PATH:$GOPATH/bin
          export CC=/opt/homebrew/opt/llvm/bin/clang
          make dist
      - uses: actions/upload-artifact@v4
        with:
          name: LocalAI-MacOS-arm64
          path: release/
      - name: Release
        uses: softprops/action-gh-release@v2
        if: startsWith(github.ref, 'refs/tags/')
        with:
          files: |
            release/*
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.22
        with:
          detached: true
          connect-timeout-seconds: 180
          limit-access-to-actor: true
--- a/.github/workflows/secscan.yaml
+++ b/.github/workflows/secscan.yaml
@ -1,30 +0,0 @@
 name: "Security Scan"
 # Run workflow each time code is pushed to your repository and on a schedule.
 # The scheduled workflow runs every at 00:00 on Sunday UTC time.
 on:
  push:
  schedule:
  - cron: '0 0 * * 0'
 jobs:
  tests:
    runs-on: ubuntu-latest
    env:
      GO111MODULE: on
    steps:
      - name: Checkout Source
        uses: actions/checkout@v4
        if: ${{ github.actor != 'dependabot[bot]' }}
      - name: Run Gosec Security Scanner
        if: ${{ github.actor != 'dependabot[bot]' }}
        uses: securego/gosec@v2.22.4
        with:
          # we let the report trigger content trigger a failure using the GitHub Security features.
          args: '-no-fail -fmt sarif -out results.sarif ./...'
      - name: Upload SARIF file
        if: ${{ github.actor != 'dependabot[bot]' }}
        uses: github/codeql-action/upload-sarif@v3
        with:
          # Path to SARIF file relative to the root of the repository
          sarif_file: results.sarif
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@ -25,17 +25,25 @@ jobs:
        run: |
          sudo apt-get update
          sudo apt-get install build-essential ffmpeg
-          # Install UV
+          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
-          curl -LsSf https://astral.sh/uv/install.sh | sh
+             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
+              gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
-          sudo apt-get install -y libopencv-dev
+             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
+             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
             sudo apt-get update && \
             sudo apt-get install -y conda
          sudo apt-get install -y ca-certificates cmake curl patch
          sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
          sudo rm -rfv /usr/bin/conda || true
      - name: Test transformers
        run: |
-           make --jobs=5 --output-sync=target -C backend/python/transformers
+           export PATH=$PATH:/opt/conda/bin
-           make --jobs=5 --output-sync=target -C backend/python/transformers test
+           make -C backend/python/transformers
-  tests-rerankers:
+           make -C backend/python/transformers test
  tests-sentencetransformers:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
@ -46,16 +54,23 @@ jobs:
        run: |
          sudo apt-get update
          sudo apt-get install build-essential ffmpeg
-          # Install UV
+          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
-          curl -LsSf https://astral.sh/uv/install.sh | sh
+             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
+              gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
-          sudo apt-get install -y libopencv-dev
+             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
+             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
             sudo apt-get update && \
             sudo apt-get install -y conda
          sudo apt-get install -y ca-certificates cmake curl patch
          sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
-      - name: Test rerankers
+          sudo rm -rfv /usr/bin/conda || true
      - name: Test sentencetransformers
        run: |
-           make --jobs=5 --output-sync=target -C backend/python/rerankers
+           export PATH=$PATH:/opt/conda/bin
-           make --jobs=5 --output-sync=target -C backend/python/rerankers test
+           make -C backend/python/sentencetransformers
           make -C backend/python/sentencetransformers test
  tests-diffusers:
    runs-on: ubuntu-latest
@ -67,120 +82,116 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install -y build-essential ffmpeg
+          sudo apt-get install build-essential ffmpeg
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
+          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
-          sudo apt-get install -y libopencv-dev
+             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
-          # Install UV
+              gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
-          curl -LsSf https://astral.sh/uv/install.sh | sh
+             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
+             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
             sudo apt-get update && \
             sudo apt-get install -y conda
          sudo apt-get install -y ca-certificates cmake curl patch
          sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
          sudo rm -rfv /usr/bin/conda || true
      - name: Test diffusers
        run: |
-          make --jobs=5 --output-sync=target -C backend/python/diffusers
+           export PATH=$PATH:/opt/conda/bin
-          make --jobs=5 --output-sync=target -C backend/python/diffusers test
+           make -C backend/python/diffusers
           make -C backend/python/diffusers test
  #tests-vllm:
  #  runs-on: ubuntu-latest
  #  steps:
  #    - name: Clone
  #      uses: actions/checkout@v4
  #      with:
  #        submodules: true
  #    - name: Dependencies
  #      run: |
  #        sudo apt-get update
  #        sudo apt-get install -y build-essential ffmpeg
  #        sudo apt-get install -y ca-certificates cmake curl patch python3-pip
  #        sudo apt-get install -y libopencv-dev
  #        # Install UV
  #        curl -LsSf https://astral.sh/uv/install.sh | sh
  #        pip install --user --no-cache-dir grpcio-tools==1.64.1
  #    - name: Test vllm backend
  #      run: |
  #        make --jobs=5 --output-sync=target -C backend/python/vllm
  #        make --jobs=5 --output-sync=target -C backend/python/vllm test
  # tests-transformers-musicgen:
  #   runs-on: ubuntu-latest
  #   steps:
  #     - name: Clone
  #       uses: actions/checkout@v4
  #       with:
  #         submodules: true
  #     - name: Dependencies
  #       run: |
  #         sudo apt-get update
  #         sudo apt-get install build-essential ffmpeg
  #         # Install UV
  #         curl -LsSf https://astral.sh/uv/install.sh | sh
  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
  #         sudo apt-get install -y libopencv-dev
  #         pip install --user --no-cache-dir grpcio-tools==1.64.1
-  #     - name: Test transformers-musicgen
+  tests-transformers-musicgen:
-  #       run: |
+    runs-on: ubuntu-latest
-  #          make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
+    steps:
-  #          make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
+      - name: Clone
        uses: actions/checkout@v4
        with: 
          submodules: true
      - name: Dependencies
        run: |
          sudo apt-get update
          sudo apt-get install build-essential ffmpeg
          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
              gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
             sudo apt-get update && \
             sudo apt-get install -y conda
          sudo apt-get install -y ca-certificates cmake curl patch
          sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
-  # tests-bark:
+          sudo rm -rfv /usr/bin/conda || true
  #   runs-on: ubuntu-latest
  #   steps:
  #     - name: Release space from worker
  #       run: |
  #           echo "Listing top largest packages"
  #           pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
  #           head -n 30 <<< "${pkgs}"
  #           echo
  #           df -h
  #           echo
  #           sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
  #           sudo apt-get remove --auto-remove android-sdk-platform-tools || true
  #           sudo apt-get purge --auto-remove android-sdk-platform-tools || true
  #           sudo rm -rf /usr/local/lib/android
  #           sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
  #           sudo rm -rf /usr/share/dotnet
  #           sudo apt-get remove -y '^mono-.*' || true
  #           sudo apt-get remove -y '^ghc-.*' || true
  #           sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
  #           sudo apt-get remove -y 'php.*' || true
  #           sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
  #           sudo apt-get remove -y '^google-.*' || true
  #           sudo apt-get remove -y azure-cli || true
  #           sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
  #           sudo apt-get remove -y '^gfortran-.*' || true
  #           sudo apt-get remove -y microsoft-edge-stable || true
  #           sudo apt-get remove -y firefox || true
  #           sudo apt-get remove -y powershell || true
  #           sudo apt-get remove -y r-base-core || true
  #           sudo apt-get autoremove -y
  #           sudo apt-get clean
  #           echo
  #           echo "Listing top largest packages"
  #           pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
  #           head -n 30 <<< "${pkgs}"
  #           echo
  #           sudo rm -rfv build || true
  #           sudo rm -rf /usr/share/dotnet || true
  #           sudo rm -rf /opt/ghc || true
  #           sudo rm -rf "/usr/local/share/boost" || true
  #           sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
  #           df -h
  #     - name: Clone
  #       uses: actions/checkout@v4
  #       with:
  #         submodules: true
  #     - name: Dependencies
  #       run: |
  #         sudo apt-get update
  #         sudo apt-get install build-essential ffmpeg
  #         # Install UV
  #         curl -LsSf https://astral.sh/uv/install.sh | sh
  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
  #         sudo apt-get install -y libopencv-dev
  #         pip install --user --no-cache-dir grpcio-tools==1.64.1
-  #     - name: Test bark
+      - name: Test transformers-musicgen
-  #       run: |
+        run: |
-  #          make --jobs=5 --output-sync=target -C backend/python/bark
+           export PATH=$PATH:/opt/conda/bin
-  #          make --jobs=5 --output-sync=target -C backend/python/bark test
+           make -C backend/python/transformers-musicgen
           make -C backend/python/transformers-musicgen test
  tests-petals:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
        uses: actions/checkout@v4
        with: 
          submodules: true
      - name: Dependencies
        run: |
          sudo apt-get update
          sudo apt-get install build-essential ffmpeg
          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
              gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
             sudo apt-get update && \
             sudo apt-get install -y conda
          sudo apt-get install -y ca-certificates cmake curl patch
          sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
          sudo rm -rfv /usr/bin/conda || true
      - name: Test petals
        run: |
           export PATH=$PATH:/opt/conda/bin
           make -C backend/python/petals
           make -C backend/python/petals test
  tests-bark:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
        uses: actions/checkout@v4
        with: 
          submodules: true
      - name: Dependencies
        run: |
          sudo apt-get update
          sudo apt-get install build-essential ffmpeg
          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
              gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
             sudo apt-get update && \
             sudo apt-get install -y conda
          sudo apt-get install -y ca-certificates cmake curl patch
          sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
          sudo rm -rfv /usr/bin/conda || true
      - name: Test bark
        run: |
           export PATH=$PATH:/opt/conda/bin
           make -C backend/python/bark
           make -C backend/python/bark test
  # Below tests needs GPU. Commented out for now
@ -196,15 +207,47 @@ jobs:
  #       run: |
  #         sudo apt-get update
  #         sudo apt-get install build-essential ffmpeg
-  #         # Install UV
+  #         curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
-  #         curl -LsSf https://astral.sh/uv/install.sh | sh
+  #            sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
-  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
+  #             gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
-  #         sudo apt-get install -y libopencv-dev
+  #            sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
-  #         pip install --user --no-cache-dir grpcio-tools==1.64.1
+  #            sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
  #            sudo apt-get update && \
  #            sudo apt-get install -y conda
  #         sudo apt-get install -y ca-certificates cmake curl patch
  #         sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
  #         sudo rm -rfv /usr/bin/conda || true
  #     - name: Test vllm
  #       run: |
-  #          make --jobs=5 --output-sync=target -C backend/python/vllm
+  #          export PATH=$PATH:/opt/conda/bin
-  #          make --jobs=5 --output-sync=target -C backend/python/vllm test
+  #          make -C backend/python/vllm
  #          make -C backend/python/vllm test
  tests-vallex:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
        uses: actions/checkout@v4
        with: 
          submodules: true
      - name: Dependencies
        run: |
          sudo apt-get update
          sudo apt-get install build-essential ffmpeg
          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
              gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
             sudo apt-get update && \
             sudo apt-get install -y conda
          sudo apt-get install -y ca-certificates cmake curl patch
          sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2    
          sudo rm -rfv /usr/bin/conda || true
      - name: Test vall-e-x
        run: |
           export PATH=$PATH:/opt/conda/bin
           make -C backend/python/vall-e-x
           make -C backend/python/vall-e-x test
  tests-coqui:
    runs-on: ubuntu-latest
@ -217,11 +260,18 @@ jobs:
        run: |
          sudo apt-get update
          sudo apt-get install build-essential ffmpeg
-          sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
+          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
-          # Install UV
+             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
-          curl -LsSf https://astral.sh/uv/install.sh | sh
+              gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
+             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
             sudo apt-get update && \
             sudo apt-get install -y conda
          sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng          
          sudo rm -rfv /usr/bin/conda || true
      - name: Test coqui
        run: |
-          make --jobs=5 --output-sync=target -C backend/python/coqui
+           export PATH=$PATH:/opt/conda/bin
-          make --jobs=5 --output-sync=target -C backend/python/coqui test
+           make -C backend/python/coqui
           make -C backend/python/coqui test
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -9,9 +9,6 @@ on:
    tags:
      - '*'
 env:
  GRPC_VERSION: v1.65.0
 concurrency:
  group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
  cancel-in-progress: true
@ -60,150 +57,46 @@ jobs:
        with: 
          submodules: true
      - name: Setup Go ${{ matrix.go-version }}
-        uses: actions/setup-go@v5
+        uses: actions/setup-go@v4
        with:
          go-version: ${{ matrix.go-version }}
          cache: false
      # You can test your matrix by printing the current Go version
      - name: Display Go version
        run: go version
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
+          sudo apt-get install build-essential ffmpeg
          sudo apt-get install -y libgmock-dev clang
          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
-             gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
+              gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
             sudo apt-get update && \
             sudo apt-get install -y conda
-          # Install UV
+          sudo apt-get install -y ca-certificates cmake curl patch
-          curl -LsSf https://astral.sh/uv/install.sh | sh
+          sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
          sudo apt-get install -y ca-certificates cmake patch python3-pip unzip
          sudo apt-get install -y libopencv-dev
-          curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
+          sudo rm -rfv /usr/bin/conda || true
-          unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
+          PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
          rm protoc.zip
          curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
          sudo dpkg -i cuda-keyring_1.1-1_all.deb
          sudo apt-get update
          sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
          export CUDACXX=/usr/local/cuda/bin/nvcc
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
          go install github.com/GeertJohan/go.rice/rice@latest
          # The python3-grpc-tools package in 22.04 is too old
          pip install --user grpcio-tools
          make -C backend/python/transformers
          # Pre-build piper before we start tests in order to have shared libraries in place
          make sources/go-piper && \
          GO_TAGS="tts" make -C sources/go-piper piper.o && \
-          sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/
+          sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
-        env:
+          # Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
-          CUDA_VERSION: 12-4
+          GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
      - name: Cache grpc
        id: cache-grpc
        uses: actions/cache@v4
        with:
          path: grpc
          key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
      - name: Build grpc
        if: steps.cache-grpc.outputs.cache-hit != 'true'
        run: |
          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \
          cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && cd cmake/build && \
          cmake -DgRPC_INSTALL=ON \
            -DgRPC_BUILD_TESTS=OFF \
            ../.. && sudo make --jobs 5
      - name: Install gRPC
        run: |
          cd grpc && cd cmake/build && sudo make --jobs 5 install
      - name: Test
        run: |
          PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.22
        with:
          detached: true
          connect-timeout-seconds: 180
          limit-access-to-actor: true
-  tests-aio-container:
+          git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-    runs-on: ubuntu-latest
+              cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-    steps:
+                -DgRPC_BUILD_TESTS=OFF \
-      - name: Release space from worker
+                ../.. && sudo make -j12 install
        run: |
          echo "Listing top largest packages"
          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
          head -n 30 <<< "${pkgs}"
          echo
          df -h
          echo
          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
          sudo apt-get remove --auto-remove android-sdk-platform-tools || true
          sudo apt-get purge --auto-remove android-sdk-platform-tools || true
          sudo rm -rf /usr/local/lib/android
          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
          sudo rm -rf /usr/share/dotnet
          sudo apt-get remove -y '^mono-.*' || true
          sudo apt-get remove -y '^ghc-.*' || true
          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
          sudo apt-get remove -y 'php.*' || true
          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
          sudo apt-get remove -y '^google-.*' || true
          sudo apt-get remove -y azure-cli || true
          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
          sudo apt-get remove -y '^gfortran-.*' || true
          sudo apt-get autoremove -y
          sudo apt-get clean
          echo
          echo "Listing top largest packages"
          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
          head -n 30 <<< "${pkgs}"
          echo
          sudo rm -rfv build || true
          df -h
      - name: Clone
        uses: actions/checkout@v4
        with:
          submodules: true
      - name: Dependencies
        run: |
          # Install protoc
          curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
          unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
          rm protoc.zip
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
          go install github.com/GeertJohan/go.rice/rice@latest
          PATH="$PATH:$HOME/go/bin" make protogen-go
      - name: Build images
        run: |
          docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
          BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
      - name: Test
        run: |
-            PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
+          GO_TAGS="stablediffusion tts" make test
            make run-e2e-aio
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.22
        with:
          detached: true
          connect-timeout-seconds: 180
          limit-access-to-actor: true
  tests-apple:
-    runs-on: macOS-14
+    runs-on: macOS-latest
    strategy:
      matrix:
        go-version: ['1.21.x']
@ -213,30 +106,17 @@ jobs:
        with: 
          submodules: true
      - name: Setup Go ${{ matrix.go-version }}
-        uses: actions/setup-go@v5
+        uses: actions/setup-go@v4
        with:
          go-version: ${{ matrix.go-version }}
          cache: false
      # You can test your matrix by printing the current Go version
      - name: Display Go version
        run: go version
      - name: Dependencies
        run: |
-          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
+          brew install protobuf grpc
          pip install --user --no-cache-dir grpcio-tools
          go install github.com/GeertJohan/go.rice/rice@latest
      - name: Test
        run: |
          export C_INCLUDE_PATH=/usr/local/include
          export CPLUS_INCLUDE_PATH=/usr/local/include
-          export CC=/opt/homebrew/opt/llvm/bin/clang
+          CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
          # Used to run the newer GNUMake version from brew that supports --output-sync
          export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
          BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.22
        with:
          detached: true
          connect-timeout-seconds: 180
          limit-access-to-actor: true
--- a/.github/workflows/update_swagger.yaml
+++ b/.github/workflows/update_swagger.yaml
@ -1,37 +0,0 @@
 name: Update swagger
 on:
  schedule:
    - cron: 0 20 * * *
  workflow_dispatch:
 jobs:
  swagger:
    strategy:
      fail-fast: false
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
          go-version: 'stable'
      - name: Dependencies
        run: |
          sudo apt-get update
          sudo apt-get install protobuf-compiler
      - run: |
          go install github.com/swaggo/swag/cmd/swag@latest
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
      - name: Bump swagger 🔧
        run: |
          make protogen-go swagger
      - name: Create Pull Request
        uses: peter-evans/create-pull-request@v7
        with:
          token: ${{ secrets.UPDATE_BOT_TOKEN }}
          push-to-fork: ci-forks/LocalAI
          commit-message: 'feat(swagger): update swagger'
          title: 'feat(swagger): update swagger'
          branch: "update/swagger"
          body:  Update swagger
          signoff: true
--- a/.github/workflows/yaml-check.yml
+++ b/.github/workflows/yaml-check.yml
@ -1,18 +0,0 @@
 name: 'Yamllint GitHub Actions'
 on:
  - pull_request
 jobs:
  yamllint:
    name: 'Yamllint'
    runs-on: ubuntu-latest
    steps:
      - name: 'Checkout'
        uses: actions/checkout@master
      - name: 'Yamllint'
        uses: karancode/yamllint-github-action@master
        with:
          yamllint_file_or_dir: 'gallery'
          yamllint_strict: false
          yamllint_comment: true
        env:
          GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@ -2,17 +2,14 @@
 /sources/
 __pycache__/
 *.a
 *.o
 get-sources
 prepare-sources
 /backend/cpp/llama/grpc-server
 /backend/cpp/llama/llama.cpp
 /backend/cpp/llama-*
 *.log
 go-ggml-transformers
 go-gpt2
 go-rwkv
 whisper.cpp
 /bloomz
 go-bert
@ -24,7 +21,6 @@ local-ai
 !charts/*
 # prevent above rules from omitting the api/localai folder
 !api/localai
 !core/**/localai
 # Ignore models
 models/*
@ -38,22 +34,6 @@ release/
 .idea
 # Generated during build
-backend-assets/*
+backend-assets/
 !backend-assets/.keep
 prepare
 /ggml-metal.metal
 docs/static/gallery.html
 # Protobuf generated files
 *.pb.go
 *pb2.py
 *pb2_grpc.py
 # SonarQube
 .scannerwork
 # backend virtual environments
 **/venv
 # per-developer customization files for the development container
 .devcontainer/customization/*
--- a/.gitmodules
+++ b/.gitmodules
@ -1,6 +1,3 @@
 [submodule "docs/themes/hugo-theme-relearn"]
 	path = docs/themes/hugo-theme-relearn
 	url = https://github.com/McShelby/hugo-theme-relearn.git
 [submodule "docs/themes/lotusdocs"]
 	path = docs/themes/lotusdocs
 	url = https://github.com/colinwilson/lotusdocs
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@ -1,5 +0,0 @@
 {
    "recommendations": [
        "golang.go"
    ]
 }
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -3,12 +3,12 @@
    "configurations": [
        {
            "name": "Python: Current File",
-            "type": "debugpy",
+            "type": "python",
            "request": "launch",
            "program": "${file}",
            "console": "integratedTerminal",
            "justMyCode": false,
-            "cwd": "${fileDirname}",
+            "cwd": "${workspaceFolder}/examples/langchain-chroma",
            "env": {
                "OPENAI_API_BASE": "http://localhost:8080/v1",
                "OPENAI_API_KEY": "abc"
@ -19,16 +19,15 @@
            "type": "go",
            "request": "launch",
            "mode": "debug",
-            "program": "${workspaceRoot}",
+            "program": "${workspaceFolder}/main.go",
-            "args": [],
+            "args": [
                "api"
            ],
            "env": {
-                "LOCALAI_LOG_LEVEL": "debug",
+                "C_INCLUDE_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
-                "LOCALAI_P2P": "true",
+                "LIBRARY_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
-                "LOCALAI_FEDERATED": "true"
+                "DEBUG": "true"
-            },
+            }
            "buildFlags": ["-tags", "p2p tts", "-v"],
            "envFile": "${workspaceFolder}/.env",
            "cwd": "${workspaceRoot}"
        }
    ]
 }
--- a/.yamllint
+++ b/.yamllint
@ -1,4 +0,0 @@
 extends: default
 rules:
    line-length: disable
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -1,4 +1,4 @@
-# Contributing to LocalAI
+# Contributing to localAI
 Thank you for your interest in contributing to LocalAI! We appreciate your time and effort in helping to improve our project. Before you get started, please take a moment to review these guidelines.
@ -15,6 +15,8 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
 - [Documentation](#documentation)
 - [Community and Communication](#community-and-communication)
 ## Getting Started
 ### Prerequisites
@ -27,9 +29,8 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
 1. Clone the repository: `git clone https://github.com/go-skynet/LocalAI.git`
 2. Navigate to the project directory: `cd LocalAI`
-3. Install the required dependencies ( see https://localai.io/basics/build/#build-localai-locally )
+3. Install the required dependencies: `make prepare`
-4. Build LocalAI: `make build`
+4. Run LocalAI: `make run`
 5. Run LocalAI: `./local-ai`
 ## Contributing
@ -52,33 +53,20 @@ If you find a bug, have a feature request, or encounter any issues, please check
 ## Coding Guidelines
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like [`golangci-lint`](https://golangci-lint.run) can help you here.
+- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like []`golangci-lint`](https://golangci-lint.run) can help you here.
 ## Testing
 `make test` cannot handle all the model now. Please be sure to add a test case for the new features or the part was changed.
 ### Running AIO tests
 All-In-One images has a set of tests that automatically verifies that most of the endpoints works correctly, a flow can be :
 ```bash
 # Build the LocalAI docker image
 make DOCKER_IMAGE=local-ai docker
 # Build the corresponding AIO image
 BASE_IMAGE=local-ai DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
 # Run the AIO e2e tests
 LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio make run-e2e-aio
 ```
 ## Documentation
-We are welcome the contribution of the documents, please open new PR or create a new issue. The documentation is available under `docs/` https://github.com/mudler/LocalAI/tree/master/docs
+- We are welcome the contribution of the documents, please open new PR in the official document repo [localai-website](https://github.com/go-skynet/localai-website)
 ## Community and Communication
 - You can reach out via the Github issue tracker.
 - Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
 - Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
 ---
--- a/455
+++ b/455
@ -1,382 +1,136 @@
 ARG GO_VERSION=1.21-bullseye
 ARG IMAGE_TYPE=extras
-ARG BASE_IMAGE=ubuntu:22.04
+# extras or core
 ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
 ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
 # The requirements-core target is common to all images.  It should not be placed in requirements-core unless every single build will use it.
 FROM ${BASE_IMAGE} AS requirements-core
-USER root
+FROM golang:$GO_VERSION as requirements-core
-ARG GO_VERSION=1.22.6
+ARG BUILD_TYPE
-ARG CMAKE_VERSION=3.26.4
+ARG CUDA_MAJOR_VERSION=11
-ARG CMAKE_FROM_SOURCE=false
+ARG CUDA_MINOR_VERSION=7
 ARG TARGETARCH
 ARG TARGETVARIANT
-ENV DEBIAN_FRONTEND=noninteractive
+ENV BUILD_TYPE=${BUILD_TYPE}
-ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
+
 ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh"
 ENV GALLERIES='[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'
 ARG GO_TAGS="stablediffusion tinydream tts"
 RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
+    apt-get install -y ca-certificates curl patch pip cmake && apt-get clean
        build-essential \
        ccache \
        ca-certificates \
        curl libssl-dev \
        git \
        git-lfs \
        unzip upx-ucl && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*
 # Install CMake (the version in 22.04 is too old)
 RUN <<EOT bash
    if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
        curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
    else
        apt-get update && \
        apt-get install -y \
            cmake && \
        apt-get clean && \
        rm -rf /var/lib/apt/lists/*
    fi
 EOT
 # Install Go
 RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
 ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
 # Install grpc compilers and rice
 RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
    go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \
    go install github.com/GeertJohan/go.rice/rice@latest
 COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
 RUN update-ca-certificates
 RUN test -n "$TARGETARCH" \
    || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
 # Use the variables in subsequent instructions
 RUN echo "Target Architecture: $TARGETARCH"
 RUN echo "Target Variant: $TARGETVARIANT"
-# Cuda
+# CuBLAS requirements
-ENV PATH=/usr/local/cuda/bin:${PATH}
+RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
-
+    apt-get install -y software-properties-common && \
-# HipBLAS requirements
+    apt-add-repository contrib && \
-ENV PATH=/opt/rocm/bin:${PATH}
+    curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \
    dpkg -i cuda-keyring_1.0-1_all.deb && \
    rm -f cuda-keyring_1.0-1_all.deb && \
    apt-get update && \
    apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}  && apt-get clean \
    ; fi
 ENV PATH /usr/local/cuda/bin:${PATH}
 # OpenBLAS requirements and stable diffusion
-RUN apt-get update && \
+RUN apt-get install -y \
-    apt-get install -y --no-install-recommends \
+    libopenblas-dev \
-        libopenblas-dev && \
+    libopencv-dev \ 
-    apt-get clean && \
+    && apt-get clean
-    rm -rf /var/lib/apt/lists/*
+
 # Set up OpenCV
 RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
 WORKDIR /build
-###################################
+RUN test -n "$TARGETARCH" \
-###################################
+    || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
-# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
+# Extras requirements
-FROM requirements-core AS requirements-extras
+FROM requirements-core as requirements-extras
 RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
    install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
    gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
    echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
    echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list && \
    apt-get update && \
    apt-get install -y conda
 # Install uv as a system package
 RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
 ENV PATH="/root/.cargo/bin:${PATH}"
-
+RUN pip install --upgrade pip
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
-RUN apt-get update && \
+RUN apt-get install -y espeak-ng espeak
    apt-get install -y --no-install-recommends \
        espeak-ng \
        espeak \
        python3-pip \
        python-is-python3 \
        python3-dev llvm \
        python3-venv && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/* && \
    pip install --upgrade pip
 # Install grpcio-tools (the version in 22.04 is too old)
 RUN pip install --user grpcio-tools
 ###################################
 ###################################
-# The requirements-drivers target is for BUILD_TYPE specific items.  If you need to install something specific to CUDA, or specific to ROCM, it goes here.
+FROM requirements-${IMAGE_TYPE} as builder
 # This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
 FROM requirements-${IMAGE_TYPE} AS requirements-drivers
-ARG BUILD_TYPE
+ARG GO_TAGS="stablediffusion tts"
 ARG CUDA_MAJOR_VERSION=12
 ARG CUDA_MINOR_VERSION=0
 ARG SKIP_DRIVERS=false
 ENV BUILD_TYPE=${BUILD_TYPE}
 # Vulkan requirements
 RUN <<EOT bash
    if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
        apt-get update && \
        apt-get install -y  --no-install-recommends \
            software-properties-common pciutils wget gpg-agent && \
        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
        apt-get update && \
        apt-get install -y \
            vulkan-sdk && \
        apt-get clean && \
        rm -rf /var/lib/apt/lists/*
    fi
 EOT
 # CuBLAS requirements
 RUN <<EOT bash
    if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
        apt-get update && \
        apt-get install -y  --no-install-recommends \
            software-properties-common pciutils
        if [ "amd64" = "$TARGETARCH" ]; then
            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
        fi
        if [ "arm64" = "$TARGETARCH" ]; then
            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
        fi
        dpkg -i cuda-keyring_1.1-1_all.deb && \
        rm -f cuda-keyring_1.1-1_all.deb && \
        apt-get update && \
        apt-get install -y --no-install-recommends \
            cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
            libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
            libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
            libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
            libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
            libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
        apt-get clean && \
        rm -rf /var/lib/apt/lists/*
    fi
 EOT
 # If we are building with clblas support, we need the libraries for the builds
 RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
        apt-get update && \
        apt-get install -y --no-install-recommends \
            libclblast-dev && \
        apt-get clean && \
        rm -rf /var/lib/apt/lists/* \
    ; fi
 RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
        apt-get update && \
        apt-get install -y --no-install-recommends \
            hipblas-dev \
            rocblas-dev && \
        apt-get clean && \
        rm -rf /var/lib/apt/lists/* && \
        # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
        # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
        ldconfig \
    ; fi
 ###################################
 ###################################
 # Temporary workaround for Intel's repository to work correctly
 # https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/APT-Repository-not-working-signatures-invalid/m-p/1599436/highlight/true#M36143
 # This is a temporary workaround until Intel fixes their repository
 FROM ${INTEL_BASE_IMAGE} AS intel
 RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
 gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
 RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
 ###################################
 ###################################
 # The grpc target does one thing, it builds and installs GRPC.  This is in it's own layer so that it can be effectively cached by CI.
 # You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
 FROM ${GRPC_BASE_IMAGE} AS grpc
 # This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
 ARG GRPC_MAKEFLAGS="-j4 -Otarget"
 ARG GRPC_VERSION=v1.65.0
 ARG CMAKE_FROM_SOURCE=false
 ARG CMAKE_VERSION=3.26.4
 ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
 WORKDIR /build
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        ca-certificates \
        build-essential curl libssl-dev \
        git && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*
 # Install CMake (the version in 22.04 is too old)
 RUN <<EOT bash
    if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
        curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
    else
        apt-get update && \
        apt-get install -y \
            cmake && \
        apt-get clean && \
        rm -rf /var/lib/apt/lists/*
    fi
 EOT
 # We install GRPC to a different prefix here so that we can copy in only the build artifacts later
 # saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
 # and running make install in the target container
 RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
    mkdir -p /build/grpc/cmake/build && \
    cd /build/grpc/cmake/build && \
    sed -i "216i\  TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \
    cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
    make && \
    make install && \
    rm -rf /build
 ###################################
 ###################################
 # The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer
 FROM requirements-drivers AS builder-base
 ARG GO_TAGS="tts p2p"
 ARG GRPC_BACKENDS
-ARG MAKEFLAGS
+ARG BUILD_GRPC=true
 ARG LD_FLAGS="-s -w"
 ENV GRPC_BACKENDS=${GRPC_BACKENDS}
 ENV GO_TAGS=${GO_TAGS}
 ENV MAKEFLAGS=${MAKEFLAGS}
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
 ENV NVIDIA_VISIBLE_DEVICES=all
 ENV LD_FLAGS=${LD_FLAGS}
 RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH"
 WORKDIR /build
 # We need protoc installed, and the version in 22.04 is too old.  We will create one as part installing the GRPC build below
 # but that will also being in a newer version of absl which stablediffusion cannot compile with.  This version of protoc is only
 # here so that we can generate the grpc code for the stablediffusion build
 RUN <<EOT bash
    if [ "amd64" = "$TARGETARCH" ]; then
        curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
        unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
        rm protoc.zip
    fi
    if [ "arm64" = "$TARGETARCH" ]; then
        curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-aarch_64.zip -o protoc.zip && \
        unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
        rm protoc.zip
    fi
 EOT
 ###################################
 ###################################
 # The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
 # Adjustments to the build process should likely be made here.
 FROM builder-base AS builder
 # Install the pre-built GRPC
 COPY --from=grpc /opt/grpc /usr/local
 # Rebuild with defaults backends
 WORKDIR /build
 COPY . .
 COPY .git .
 RUN make prepare
-## Build the binary
+# stablediffusion does not tolerate a newer version of abseil, build it first
-## If we're on arm64 AND using cublas/hipblas, skip some of the llama-compat backends to save space
+RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
-## Otherwise just run the normal build
+
-RUN if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
+RUN if [ "${BUILD_GRPC}" = "true" ]; then \
-        SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
+    git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-    else \
+    cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-        make build; \
+      -DgRPC_BUILD_TESTS=OFF \
-    fi
+       ../.. && make -j12 install \
    ; fi
 # Rebuild with defaults backends
 RUN make build
 RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
-        mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
+    mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
-        touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \
+    touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \
    ; fi
 ###################################
 ###################################
-# The devcontainer target is not used on CI. It is a target for developers to use locally -
+FROM requirements-${IMAGE_TYPE}
 # rather than copying files it mounts them locally and leaves building to the developer
 FROM builder-base AS devcontainer
 ARG FFMPEG
 COPY --from=grpc /opt/grpc /usr/local
 COPY .devcontainer-scripts /.devcontainer-scripts
 # Add FFmpeg
 RUN if [ "${FFMPEG}" = "true" ]; then \
        apt-get update && \
        apt-get install -y --no-install-recommends \
            ffmpeg && \
        apt-get clean && \
        rm -rf /var/lib/apt/lists/* \
    ; fi
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        ssh less wget
 # For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
 RUN go install github.com/go-delve/delve/cmd/dlv@latest
 RUN go install github.com/mikefarah/yq/v4@latest
 ###################################
 ###################################
 # This is the final target. The result of this target will be the image uploaded to the registry.
 # If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
 FROM requirements-drivers
 ARG FFMPEG
 ARG BUILD_TYPE
 ARG TARGETARCH
 ARG IMAGE_TYPE=extras
 ARG EXTRA_BACKENDS
 ARG MAKEFLAGS
 ENV BUILD_TYPE=${BUILD_TYPE}
 ENV REBUILD=false
 ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
 ENV MAKEFLAGS=${MAKEFLAGS}
-ARG CUDA_MAJOR_VERSION=12
+ARG CUDA_MAJOR_VERSION=11
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
 ENV NVIDIA_VISIBLE_DEVICES=all
 # Add FFmpeg
 RUN if [ "${FFMPEG}" = "true" ]; then \
-        apt-get update && \
+    apt-get install -y ffmpeg \
        apt-get install -y --no-install-recommends \
            ffmpeg && \
        apt-get clean && \
        rm -rf /var/lib/apt/lists/* \
    ; fi
 WORKDIR /build
@ -388,9 +142,9 @@ WORKDIR /build
 COPY . .
 COPY --from=builder /build/sources ./sources/
-COPY --from=grpc /opt/grpc /usr/local
+COPY --from=builder /build/grpc ./grpc/
-RUN make prepare-sources
+RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf grpc
 # Copy the binary
 COPY --from=builder /build/local-ai ./
@ -398,53 +152,50 @@ COPY --from=builder /build/local-ai ./
 # Copy shared libraries for piper
 COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
-# Change the shell to bash so we can use [[ tests below
+# do not let stablediffusion rebuild (requires an older version of absl)
-SHELL ["/bin/bash", "-c"]
+COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
 # We try to strike a balance between individual layer size (as that affects total push time) and total image size
 # Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer
 # Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer
-RUN if [[ ( "${IMAGE_TYPE}" == "extras ")]]; then \
+## Duplicated from Makefile to avoid having a big layer that's hard to push
-        apt-get -qq -y install espeak-ng \
+RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
 	PATH=$PATH:/opt/conda/bin make -C backend/python/autogptq \
    ; fi
-
+RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+	PATH=$PATH:/opt/conda/bin make -C backend/python/bark \
        make -C backend/python/coqui \
    ; fi && \
    if [[ ( "${EXTRA_BACKENDS}" =~ "faster-whisper" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
        make -C backend/python/faster-whisper \
    ; fi && \
    if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
        make -C backend/python/diffusers \
    ; fi
-
+RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+	PATH=$PATH:/opt/conda/bin make -C backend/python/diffusers \
        make -C backend/python/kokoro \
    ; fi && \
    if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
        make -C backend/python/exllama2 \
    ; fi && \
    if [[ ( "${EXTRA_BACKENDS}" =~ "transformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
        make -C backend/python/transformers \
    ; fi
-
+RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+	PATH=$PATH:/opt/conda/bin make -C backend/python/vllm \
-        make -C backend/python/vllm \
+    ; fi
-    ; fi && \
+RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+	PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers \
-        make -C backend/python/bark \
+    ; fi
-    ; fi && \
+RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+	PATH=$PATH:/opt/conda/bin make -C backend/python/transformers \
-        make -C backend/python/rerankers \
+    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
 	PATH=$PATH:/opt/conda/bin make -C backend/python/vall-e-x \
    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
 	PATH=$PATH:/opt/conda/bin make -C backend/python/exllama \
    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
    PATH=$PATH:/opt/conda/bin make -C backend/python/exllama2 \
    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
 	PATH=$PATH:/opt/conda/bin make -C backend/python/petals \
    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
 	PATH=$PATH:/opt/conda/bin make -C backend/python/transformers-musicgen \
    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
 	PATH=$PATH:/opt/conda/bin make -C backend/python/coqui \
    ; fi
 # Make sure the models directory exists
 RUN mkdir -p /build/models
 # Define the health check command
 HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
-  CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
+  CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
 VOLUME /build/models
 EXPOSE 8080
 ENTRYPOINT [ "/build/entrypoint.sh" ]
--- a/Dockerfile.aio
+++ b/Dockerfile.aio
@ -1,8 +0,0 @@
 ARG BASE_IMAGE=ubuntu:22.04
 FROM ${BASE_IMAGE} 
 RUN apt-get update && apt-get install -y pciutils && apt-get clean
 COPY aio/ /aio
 ENTRYPOINT [ "/aio/entrypoint.sh" ]
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 MIT License
-Copyright (c) 2023-2025 Ettore Di Giacinto (mudler@localai.io)
+Copyright (c) 2023 Ettore Di Giacinto
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/936
+++ b/936
--- a/README.md
+++ b/README.md
@ -1,6 +1,7 @@
 <h1 align="center">
  <br>
-  <img height="300" src="./core/http/static/logo.png"> <br>
+  <img height="300" src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"> <br>
    LocalAI
 <br>
 </h1>
@ -19,230 +20,56 @@
 </a>
 </p>
-<p align="center">
+> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
-<a href="https://hub.docker.com/r/localai/localai" target="blank">
+>
-<img src="https://img.shields.io/badge/dockerhub-images-important.svg?logo=Docker" alt="LocalAI Docker hub"/>
+> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
-</a>
+
-<a href="https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest" target="blank">
+[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)
 <img src="https://img.shields.io/badge/quay.io-images-important.svg?" alt="LocalAI Quay.io"/>
 </a>
 </p>
 <p align="center">
 <a href="https://twitter.com/LocalAI_API" target="blank">
-<img src="https://img.shields.io/badge/X-%23000000.svg?style=for-the-badge&logo=X&logoColor=white&label=LocalAI_API" alt="Follow LocalAI_API"/>
+<img src="https://img.shields.io/twitter/follow/LocalAI_API?label=Follow: LocalAI_API&style=social" alt="Follow LocalAI_API"/>
 </a>
 <a href="https://discord.gg/uJAeKSAGDy" target="blank">
 <img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
 </a>
 </p>
-<p align="center">
+**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU.
 <a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
 </p>
-> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
+## 🔥🔥 Hot topics / Roadmap
 >
 > [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on 
 [![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white)](https://t.me/localaiofficial_bot)
-[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)
+[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
-**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
+- Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522
 - 🐸 Coqui: https://github.com/mudler/LocalAI/pull/1489
 - Inline templates: https://github.com/mudler/LocalAI/pull/1452
 - Mixtral: https://github.com/mudler/LocalAI/pull/1449
 - Img2vid https://github.com/mudler/LocalAI/pull/1442
 - Musicgen https://github.com/mudler/LocalAI/pull/1387
 Hot topics (looking for contributors):
 - Backends v2: https://github.com/mudler/LocalAI/issues/1126
 - Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
-## 📚🆕 Local Stack Family
+If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
-🆕 LocalAI is now part of a comprehensive suite of AI tools designed to work together:
+## 💻 [Getting started](https://localai.io/basics/getting_started/index.html)
 <table>
  <tr>
    <td width="50%" valign="top">
      <a href="https://github.com/mudler/LocalAGI">
        <img src="https://raw.githubusercontent.com/mudler/LocalAGI/refs/heads/main/webui/react-ui/public/logo_2.png" width="300" alt="LocalAGI Logo">
      </a>
    </td>
    <td width="50%" valign="top">
      <h3><a href="https://github.com/mudler/LocalAGI">LocalAGI</a></h3>
      <p>A powerful Local AI agent management platform that serves as a drop-in replacement for OpenAI's Responses API, enhanced with advanced agentic capabilities.</p>
    </td>
  </tr>
  <tr>
    <td width="50%" valign="top">
      <a href="https://github.com/mudler/LocalRecall">
        <img src="https://raw.githubusercontent.com/mudler/LocalRecall/refs/heads/main/static/localrecall_horizontal.png" width="300" alt="LocalRecall Logo">
      </a>
    </td>
    <td width="50%" valign="top">
      <h3><a href="https://github.com/mudler/LocalRecall">LocalRecall</a></h3>
      <p>A REST-ful API and knowledge base management system that provides persistent memory and storage capabilities for AI agents.</p>
    </td>
  </tr>
 </table>
 ## Screenshots
 | Talk Interface | Generate Audio |
 | --- | --- |
 | ![Screenshot 2025-03-31 at 12-01-36 LocalAI - Talk](./docs/assets/images/screenshots/screenshot_tts.png) | ![Screenshot 2025-03-31 at 12-01-29 LocalAI - Generate audio with voice-en-us-ryan-low](./docs/assets/images/screenshots/screenshot_tts.png) |
 | Models Overview | Generate Images |
 | --- | --- |
 | ![Screenshot 2025-03-31 at 12-01-20 LocalAI - Models](./docs/assets/images/screenshots/screenshot_gallery.png) | ![Screenshot 2025-03-31 at 12-31-41 LocalAI - Generate images with flux 1-dev](./docs/assets/images/screenshots/screenshot_image.png) |
 | Chat Interface | Home |
 | --- | --- |
 | ![Screenshot 2025-03-31 at 11-57-44 LocalAI - Chat with localai-functioncall-qwen2 5-7b-v0 5](./docs/assets/images/screenshots/screenshot_chat.png) | ![Screenshot 2025-03-31 at 11-57-23 LocalAI API - c2a39e3 (c2a39e3639227cfd94ffffe9f5691239acc275a8)](./docs/assets/images/screenshots/screenshot_home.png) |
 | Login | Swarm |
 | --- | --- |
 |![Screenshot 2025-03-31 at 12-09-59 ](./docs/assets/images/screenshots/screenshot_login.png) | ![Screenshot 2025-03-31 at 12-10-39 LocalAI - P2P dashboard](./docs/assets/images/screenshots/screenshot_p2p.png) |
 ## 💻 Quickstart
 Run the installer script:
 ```bash
 # Basic installation
 curl https://localai.io/install.sh | sh
 ```
 For more installation options, see [Installer Options](https://localai.io/docs/advanced/installer/).
 Or run with docker:
 ### CPU only image:
 ```bash
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
 ```
 ### NVIDIA GPU Images:
 ```bash
 # CUDA 12.0 with core features
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
 # CUDA 12.0 with extra Python dependencies
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12-extras
 # CUDA 11.7 with core features
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11
 # CUDA 11.7 with extra Python dependencies
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11-extras
 # NVIDIA Jetson (L4T) ARM64
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-nvidia-l4t-arm64
 ```
 ### AMD GPU Images (ROCm):
 ```bash
 # ROCm with core features
 docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas
 # ROCm with extra Python dependencies
 docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas-extras
 ```
 ### Intel GPU Images (oneAPI):
 ```bash
 # Intel GPU with FP16 support
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16
 # Intel GPU with FP16 support and extra dependencies
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16-extras
 # Intel GPU with FP32 support
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32
 # Intel GPU with FP32 support and extra dependencies
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32-extras
 ```
 ### Vulkan GPU Images:
 ```bash
 # Vulkan with core features
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-vulkan
 ```
 ### AIO Images (pre-downloaded models):
 ```bash
 # CPU version
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
 # NVIDIA CUDA 12 version
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
 # NVIDIA CUDA 11 version
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
 # Intel GPU version
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel-f16
 # AMD GPU version
 docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
 ```
 For more information about the AIO images and pre-downloaded models, see [Container Documentation](https://localai.io/basics/container/).
 To load models:
 ```bash
 # From the model gallery (see available models with `local-ai models list`, in the WebUI from the model tab, or visiting https://models.localai.io)
 local-ai run llama-3.2-1b-instruct:q4_k_m
 # Start LocalAI with the phi-2 model directly from huggingface
 local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
 # Install and run a model from the Ollama OCI registry
 local-ai run ollama://gemma:2b
 # Run a model from a configuration file
 local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
 # Install and run a model from a standard OCI registry (e.g., Docker Hub)
 local-ai run oci://localai/phi-2:latest
 ```
 For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html)
 ## 📰 Latest project news
 - Apr 2025: [LocalAGI](https://github.com/mudler/LocalAGI) and [LocalRecall](https://github.com/mudler/LocalRecall) join the LocalAI family stack.
 - Apr 2025: WebUI overhaul, AIO images updates
 - Feb 2025: Backend cleanup, Breaking changes, new backends (kokoro, OutelTTS, faster-whisper), Nvidia L4T images
 - Jan 2025: LocalAI model release: https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3, SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603
 - Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
 - Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
 - Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
 - Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
 - Aug 2024:  🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
 - July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723. P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
 - May 2024: 🔥🔥 Decentralized P2P llama.cpp:  https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs  https://localai.io/features/distribute/
 - May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
 - April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
 Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
 ## 🚀 [Features](https://localai.io/features/)
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
+- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
 - 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
 - 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
- 🎨 [Image generation](https://localai.io/features/image-generation)
+- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
- 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/) 
+- 🔥 [OpenAI functions](https://localai.io/features/openai-functions/) 🆕
 - 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
 - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
 - 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
+- 🆕 [Vision API](https://localai.io/features/gpt-vision/)
 - 📈 [Reranker API](https://localai.io/features/reranker/)
 - 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
 - [Agentic capabilities](https://github.com/mudler/LocalAGI)
 - 🔊 Voice activity detection (Silero-VAD support)
 - 🌍 Integrated WebUI!
 ## 💻 Usage
 Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section in our documentation.
 ### 🔗 Community and integrations
@ -252,7 +79,6 @@ Build and deploy custom containers:
 WebUIs:
 - https://github.com/Jirubizu/localai-admin
 - https://github.com/go-skynet/LocalAI-frontend
 - QA-Pilot(An interactive chat project that leverages LocalAI LLMs for rapid understanding and navigation of GitHub code repository) https://github.com/reid41/QA-Pilot
 Model galleries
 - https://github.com/go-skynet/model-gallery
@ -260,35 +86,23 @@ Model galleries
 Other:
 - Helm chart https://github.com/go-skynet/helm-charts
 - VSCode extension https://github.com/badgooooor/localai-vscode-plugin
 - Langchain: https://python.langchain.com/docs/integrations/providers/localai/
 - Terminal utility https://github.com/djcopley/ShellOracle
 - Local Smart assistant https://github.com/mudler/LocalAGI
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision
+- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation
 - Discord bot https://github.com/mudler/LocalAGI/tree/main/examples/discord
 - Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
 - Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
 - Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
 - Another Telegram Bot https://github.com/JackBekket/Hellper
 - Auto-documentation https://github.com/JackBekket/Reflexia
 - Github bot which answer on issues, with code and documentation as context https://github.com/JackBekket/GitHelper
 - Github Actions: https://github.com/marketplace/actions/start-localai
 - Examples: https://github.com/mudler/LocalAI/tree/master/examples/
 ### 🔗 Resources
- [LLM finetuning guide](https://localai.io/docs/advanced/fine-tuning/)
+- 🆕 New! [LLM finetuning guide](https://localai.io/advanced/fine-tuning/)
 - [How to build locally](https://localai.io/basics/build/index.html)
 - [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
- [Projects integrating LocalAI](https://localai.io/docs/integrations/)
+- [Projects integrating LocalAI](https://localai.io/integrations/)
- [How tos section](https://io.midori-ai.xyz/howtos/) (curated by our community)
+- [How tos section](https://localai.io/howtos/) (curated by our community)
 ## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
 - [Run Visual studio code with LocalAI (SUSE)](https://www.suse.com/c/running-ai-locally/)
 - 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
 - [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
 - [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
 - [Create a slackbot for teams and OSS projects that answer to documentation](https://mudler.pm/posts/smart-slackbot-for-teams/)
 - [LocalAI meets k8sgpt](https://www.youtube.com/watch?v=PKrDNuJ_dfE)
 - [Question Answering on Documents locally with LangChain, LocalAI, Chroma, and GPT4All](https://mudler.pm/posts/localai-question-answering/)
@ -314,16 +128,17 @@ If you utilize this repository, data in a downstream project, please consider ci
 Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website.
-A huge thank you to our generous sponsors who support this project covering CI expenses, and our [Sponsor list](https://github.com/sponsors/mudler):
+A huge thank you to our generous sponsors who support this project:
-<p align="center">
+| ![Spectro Cloud logo_600x600px_transparent bg](https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512) |
-  <a href="https://www.spectrocloud.com/" target="blank">
+|:-----------------------------------------------:|
-    <img height="200" src="https://github.com/user-attachments/assets/72eab1dd-8b93-4fc0-9ade-84db49f24962">
+|  [Spectro Cloud](https://www.spectrocloud.com/)  |
-  </a>
+|  Spectro Cloud kindly supports LocalAI by providing GPU and computing resources to run tests on lamdalabs!  |
-  <a href="https://www.premai.io/" target="blank">
+
-    <img height="200" src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
+And a huge shout-out to individuals sponsoring the project by donating hardware or backing the project.
-  </a>
+
-</p>
+- [Sponsor list](https://github.com/sponsors/mudler)
 - JDAM00 (donating HW for the CI)
 ## 🌟 Star history
@ -333,7 +148,7 @@ A huge thank you to our generous sponsors who support this project covering CI e
 LocalAI is a community-driven project created by [Ettore Di Giacinto](https://github.com/mudler/).
-MIT - Author Ettore Di Giacinto <mudler@localai.io>
+MIT - Author Ettore Di Giacinto
 ## 🙇 Acknowledgements
@ -345,7 +160,9 @@ LocalAI couldn't have been built without the help of great software already avai
 - https://github.com/antimatter15/alpaca.cpp
 - https://github.com/EdVince/Stable-Diffusion-NCNN
 - https://github.com/ggerganov/whisper.cpp
 - https://github.com/saharNooby/rwkv.cpp
 - https://github.com/rhasspy/piper
 - https://github.com/cmp-nct/ggllm.cpp
 ## 🤗 Contributors
--- a/SECURITY.md
+++ b/SECURITY.md
@ -1,42 +0,0 @@
 # Security Policy
 ## Introduction
 At LocalAI, we take the security of our software seriously. We understand the importance of protecting our community from vulnerabilities and are committed to ensuring the safety and security of our users.
 ## Supported Versions
 We provide support and updates for certain versions of our software. The following table outlines which versions are currently supported with security updates:
 | Version | Supported          |
 | ------- | ------------------ |
 | > 2.0   | :white_check_mark: |
 | < 2.0   | :x:                |
 Please ensure that you are using a supported version to receive the latest security updates.
 ## Reporting a Vulnerability
 We encourage the responsible disclosure of any security vulnerabilities. If you believe you've found a security issue in our software, we kindly ask you to follow the steps below to report it to us:
 1. **Email Us:** Send an email to [security@localai.io](mailto:security@localai.io) with a detailed report. Please do not disclose the vulnerability publicly or to any third parties before it has been addressed by us.
 2. **Expect a Response:** We aim to acknowledge receipt of vulnerability reports within 48 hours. Our security team will review your report and work closely with you to understand the impact and ensure a thorough investigation.
 3. **Collaboration:** If the vulnerability is accepted, we will work with you and our community to address the issue promptly. We'll keep you informed throughout the resolution process and may request additional information or collaboration.
 4. **Disclosure:** Once the vulnerability has been resolved, we encourage a coordinated disclosure. We believe in transparency and will work with you to ensure that our community is informed in a responsible manner.
 ## Use of Third-Party Platforms
 As a Free and Open Source Software (FOSS) organization, we do not offer monetary bounties. However, researchers who wish to report vulnerabilities can also do so via [Huntr](https://huntr.dev/bounties), a platform that recognizes contributions to open source security.
 ## Contact
 For any security-related inquiries beyond vulnerability reporting, please contact us at [security@localai.io](mailto:security@localai.io).
 ## Acknowledgments
 We appreciate the efforts of those who contribute to the security of our project. Your responsible disclosure is invaluable to the safety and integrity of LocalAI.
 Thank you for helping us keep LocalAI secure.
--- a/aio/cpu/README.md
+++ b/aio/cpu/README.md
@ -1,5 +0,0 @@
 ## AIO CPU size
 Use this image with CPU-only.
 Please keep using only C++ backends so the base image is as small as possible (without CUDA, cuDNN, python, etc).
--- a/aio/cpu/embeddings.yaml
+++ b/aio/cpu/embeddings.yaml
@ -1,12 +0,0 @@
 embeddings: true
 name: text-embedding-ada-002
 parameters:
  model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
 usage: |
    You can test this model with curl like this:
    curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
      "input": "Your text string goes here",
      "model": "text-embedding-ada-002"
    }'
--- a/aio/cpu/image-gen.yaml
+++ b/aio/cpu/image-gen.yaml
@ -1,23 +0,0 @@
 name: stablediffusion
 backend: stablediffusion-ggml
 cfg_scale: 4.5
 options:
 - sampler:euler
 parameters:
  model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
 step: 25
 download_files:
 - filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
  sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
  uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
 usage: |
        curl http://localhost:8080/v1/images/generations \
          -H "Content-Type: application/json" \
          -d '{
            "prompt": "<positive prompt>|<negative prompt>",
            "step": 25,
            "size": "512x512"
          }'
--- a/aio/cpu/rerank.yaml
+++ b/aio/cpu/rerank.yaml
@ -1,27 +0,0 @@
 name: jina-reranker-v1-base-en
 backend: rerankers
 parameters:
  model: cross-encoder
 usage: |
    You can test this model with curl like this:
    curl http://localhost:8080/v1/rerank \
      -H "Content-Type: application/json" \
      -d '{
      "model": "jina-reranker-v1-base-en",
      "query": "Organic skincare products for sensitive skin",
      "documents": [
        "Eco-friendly kitchenware for modern homes",
        "Biodegradable cleaning supplies for eco-conscious consumers",
        "Organic cotton baby clothes for sensitive skin",
        "Natural organic skincare range for sensitive skin",
        "Tech gadgets for smart homes: 2024 edition",
        "Sustainable gardening tools and compost solutions",
        "Sensitive skin-friendly facial cleansers and toners",
        "Organic food wraps and storage solutions",
        "All-natural pet food for dogs with allergies",
        "Yoga mats made from recycled materials"
      ],
      "top_n": 3
    }'
--- a/aio/cpu/speech-to-text.yaml
+++ b/aio/cpu/speech-to-text.yaml
@ -1,18 +0,0 @@
 name: whisper-1
 backend: whisper
 parameters:
  model: ggml-whisper-base.bin
 usage: |
    ## example audio file
    wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
    ## Send the example audio file to the transcriptions endpoint
    curl http://localhost:8080/v1/audio/transcriptions \
         -H "Content-Type: multipart/form-data" \
         -F file="@$PWD/gb1.ogg" -F model="whisper-1"
 download_files:
 - filename: "ggml-whisper-base.bin"
  sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
  uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
--- a/aio/cpu/text-to-speech.yaml
+++ b/aio/cpu/text-to-speech.yaml
@ -1,15 +0,0 @@
 name: tts-1
 download_files:
  - filename: voice-en-us-amy-low.tar.gz
    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
 parameters:
  model: en-us-amy-low.onnx
 usage: |
    To test if this model works as expected, you can use the following curl command:
    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
      "model":"voice-en-us-amy-low",
      "input": "Hi, this is a test."
    }'
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@ -1,57 +0,0 @@
 context_size: 8192
 f16: true
 function:
  grammar:
    no_mixed_free_string: true
    schema_type: llama3.1 # or JSON is supported too (json)
  response_regex:
  - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
 mmap: true
 name: gpt-4
 parameters:
  model: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
 stopwords:
 - <|im_end|>
 - <dummy32000>
 - <|eot_id|>
 - <|end_of_text|>
 template:
  chat: |
    <|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
    {{.Input }}
    <|start_header_id|>assistant<|end_header_id|>
  chat_message: |
    <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
    {{ if .FunctionCall -}}
    {{ else if eq .RoleName "tool" -}}
    The Function was executed and the response was:
    {{ end -}}
    {{ if .Content -}}
    {{.Content -}}
    {{ else if .FunctionCall -}}
    {{ range .FunctionCall }}
    [{{.FunctionCall.Name}}({{.FunctionCall.Arguments}})]
    {{ end }}
    {{ end -}}
    <|eot_id|>
  completion: |
    {{.Input}}
  function: |
    <|start_header_id|>system<|end_header_id|>
    You are an expert in composing functions. You are given a question and a set of possible functions.
    Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
    If none of the functions can be used, point it out. If the given question lacks the parameters required by the function, also point it out. You should only return the function call in tools call sections.
    If you decide to invoke any of the function(s), you MUST put it in the format as follows:
    [func_name1(params_name1=params_value1,params_name2=params_value2,...),func_name2(params_name1=params_value1,params_name2=params_value2,...)]
    You SHOULD NOT include any other text in the response.
    Here is a list of functions in JSON format that you can invoke.
    {{toJson .Functions}}
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    {{.Input}}
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>
 download_files:
 - filename: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
  sha256: 2e220a14ba4328fee38cf36c2c068261560f999fadb5725ce5c6d977cb5126b5
  uri: huggingface://bartowski/Hermes-3-Llama-3.2-3B-GGUF/Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
--- a/aio/cpu/vad.yaml
+++ b/aio/cpu/vad.yaml
@ -1,8 +0,0 @@
 backend: silero-vad
 name: silero-vad
 parameters:
  model: silero-vad.onnx
 download_files:
 - filename: silero-vad.onnx
  uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
  sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
--- a/aio/cpu/vision.yaml
+++ b/aio/cpu/vision.yaml
@ -1,49 +0,0 @@
 context_size: 4096
 f16: true
 mmap: true
 mmproj: minicpm-v-2_6-mmproj-f16.gguf
 name: gpt-4o
 parameters:
  model: minicpm-v-2_6-Q4_K_M.gguf
 stopwords:
 - <|im_end|>
 - <dummy32000>
 - </s>
 - <|endoftext|>
 template:
  chat: |
    {{.Input -}}
    <|im_start|>assistant
  chat_message: |
    <|im_start|>{{ .RoleName }}
    {{ if .FunctionCall -}}
    Function call:
    {{ else if eq .RoleName "tool" -}}
    Function response:
    {{ end -}}
    {{ if .Content -}}
    {{.Content }}
    {{ end -}}
    {{ if .FunctionCall -}}
    {{toJson .FunctionCall}}
    {{ end -}}<|im_end|>
  completion: |
    {{.Input}}
  function: |
    <|im_start|>system
    You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
    {{range .Functions}}
    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
    {{end}}
    For each function call return a json object with function name and arguments
    <|im_end|>
    {{.Input -}}
    <|im_start|>assistant
 download_files:
 - filename: minicpm-v-2_6-Q4_K_M.gguf
  sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
  uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
 - filename: minicpm-v-2_6-mmproj-f16.gguf
  uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
  sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
--- a/aio/entrypoint.sh
+++ b/aio/entrypoint.sh
@ -1,138 +0,0 @@
 #!/bin/bash
 echo "===> LocalAI All-in-One (AIO) container starting..."
 GPU_ACCELERATION=false
 GPU_VENDOR=""
 function check_intel() {
    if lspci | grep -E 'VGA|3D' | grep -iq intel; then
        echo "Intel GPU detected"
        if [ -d /opt/intel ]; then
            GPU_ACCELERATION=true
            GPU_VENDOR=intel
        else
            echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
        fi
    fi
 }
 function check_nvidia_wsl() {
    if lspci | grep -E 'VGA|3D' | grep -iq "Microsoft Corporation Device 008e"; then
        # We make the assumption this WSL2 cars is NVIDIA, then check for nvidia-smi
        # Make sure the container was run with `--gpus all` as the only required parameter
        echo "NVIDIA GPU detected via WSL2"
        # nvidia-smi should be installed in the container
        if nvidia-smi; then
            GPU_ACCELERATION=true
            GPU_VENDOR=nvidia
        else
            echo "NVIDIA GPU detected via WSL2, but nvidia-smi is not installed. GPU acceleration will not be available."
        fi
    fi
 }
 function check_amd() {
    if lspci | grep -E 'VGA|3D' | grep -iq amd; then
        echo "AMD GPU detected"
        # Check if ROCm is installed
        if [ -d /opt/rocm ]; then
            GPU_ACCELERATION=true
            GPU_VENDOR=amd
        else
            echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available."
        fi
    fi
 }
 function check_nvidia() {
    if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then
        echo "NVIDIA GPU detected"
        # nvidia-smi should be installed in the container
        if nvidia-smi; then
            GPU_ACCELERATION=true
            GPU_VENDOR=nvidia
        else
            echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available."
        fi
    fi
 }
 function check_metal() {
    if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then
        echo "Apple Metal supported GPU detected"
        GPU_ACCELERATION=true
        GPU_VENDOR=apple
    fi
 }
 function detect_gpu() {
    case "$(uname -s)" in
        Linux)
            check_nvidia
            check_amd
            check_intel
            check_nvidia_wsl
            ;;
        Darwin)
            check_metal
            ;;
    esac
 }
 function detect_gpu_size() {
    # Attempting to find GPU memory size for NVIDIA GPUs
    if [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "nvidia" ]; then
        echo "NVIDIA GPU detected. Attempting to find memory size..."
        # Using head -n 1 to get the total memory of the 1st NVIDIA GPU detected.
        # If handling multiple GPUs is required in the future, this is the place to do it
        nvidia_sm=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -n 1)
        if [ ! -z "$nvidia_sm" ]; then
            echo "Total GPU Memory: $nvidia_sm MiB"
            # if bigger than 8GB, use 16GB
            #if [ "$nvidia_sm" -gt 8192 ]; then
            #    GPU_SIZE=gpu-16g
            #else
            GPU_SIZE=gpu-8g
            #fi
        else
            echo "Unable to determine NVIDIA GPU memory size. Falling back to CPU."
            GPU_SIZE=gpu-8g
        fi
    elif [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "intel" ]; then
        GPU_SIZE=intel
    # Default to a generic GPU size until we implement GPU size detection for non NVIDIA GPUs
    elif [ "$GPU_ACCELERATION" = true ]; then
        echo "Non-NVIDIA GPU detected. Specific GPU memory size detection is not implemented."
        GPU_SIZE=gpu-8g
    # default to cpu if GPU_SIZE is not set
    else
        echo "GPU acceleration is not enabled or supported. Defaulting to CPU."
        GPU_SIZE=cpu
    fi
 }
 function check_vars() {
    if [ -z "$MODELS" ]; then
        echo "MODELS environment variable is not set. Please set it to a comma-separated list of model YAML files to load."
        exit 1
    fi
    if [ -z "$PROFILE" ]; then
        echo "PROFILE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple"
        exit 1
    fi
 }
 detect_gpu
 detect_gpu_size
 PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
 export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vad.yaml,/aio/${PROFILE}/vision.yaml}"
 check_vars
 echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS"
 exec /build/entrypoint.sh "$@"
--- a/aio/gpu-8g/embeddings.yaml
+++ b/aio/gpu-8g/embeddings.yaml
@ -1,12 +0,0 @@
 embeddings: true
 name: text-embedding-ada-002
 parameters:
  model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
 usage: |
    You can test this model with curl like this:
    curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
      "input": "Your text string goes here",
      "model": "text-embedding-ada-002"
    }'
--- a/aio/gpu-8g/image-gen.yaml
+++ b/aio/gpu-8g/image-gen.yaml
@ -1,25 +0,0 @@
 name: stablediffusion
 parameters:
  model: DreamShaper_8_pruned.safetensors
 backend: diffusers
 step: 25
 f16: true
 diffusers:
  pipeline_type: StableDiffusionPipeline
  cuda: true
  enable_parameters: "negative_prompt,num_inference_steps"
  scheduler_type: "k_dpmpp_2m"
 download_files:
 - filename: DreamShaper_8_pruned.safetensors
  uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
 usage: |
        curl http://localhost:8080/v1/images/generations \
          -H "Content-Type: application/json" \
          -d '{
            "prompt": "<positive prompt>|<negative prompt>",
            "step": 25,
            "size": "512x512"
          }'
--- a/aio/gpu-8g/rerank.yaml
+++ b/aio/gpu-8g/rerank.yaml
@ -1,27 +0,0 @@
 name: jina-reranker-v1-base-en
 backend: rerankers
 parameters:
  model: cross-encoder
 usage: |
    You can test this model with curl like this:
    curl http://localhost:8080/v1/rerank \
      -H "Content-Type: application/json" \
      -d '{
      "model": "jina-reranker-v1-base-en",
      "query": "Organic skincare products for sensitive skin",
      "documents": [
        "Eco-friendly kitchenware for modern homes",
        "Biodegradable cleaning supplies for eco-conscious consumers",
        "Organic cotton baby clothes for sensitive skin",
        "Natural organic skincare range for sensitive skin",
        "Tech gadgets for smart homes: 2024 edition",
        "Sustainable gardening tools and compost solutions",
        "Sensitive skin-friendly facial cleansers and toners",
        "Organic food wraps and storage solutions",
        "All-natural pet food for dogs with allergies",
        "Yoga mats made from recycled materials"
      ],
      "top_n": 3
    }'
--- a/aio/gpu-8g/speech-to-text.yaml
+++ b/aio/gpu-8g/speech-to-text.yaml
@ -1,18 +0,0 @@
 name: whisper-1
 backend: whisper
 parameters:
  model: ggml-whisper-base.bin
 usage: |
    ## example audio file
    wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
    ## Send the example audio file to the transcriptions endpoint
    curl http://localhost:8080/v1/audio/transcriptions \
         -H "Content-Type: multipart/form-data" \
         -F file="@$PWD/gb1.ogg" -F model="whisper-1"
 download_files:
 - filename: "ggml-whisper-base.bin"
  sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
  uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
--- a/aio/gpu-8g/text-to-speech.yaml
+++ b/aio/gpu-8g/text-to-speech.yaml
@ -1,15 +0,0 @@
 name: tts-1
 download_files:
  - filename: voice-en-us-amy-low.tar.gz
    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
 parameters:
  model: en-us-amy-low.onnx
 usage: |
    To test if this model works as expected, you can use the following curl command:
    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
      "model":"tts-1",
      "input": "Hi, this is a test."
    }'
--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@ -1,53 +0,0 @@
 context_size: 4096
 f16: true
 function:
  capture_llm_results:
  - (?s)<Thought>(.*?)</Thought>
  grammar:
    properties_order: name,arguments
  json_regex_match:
  - (?s)<Output>(.*?)</Output>
  replace_llm_results:
  - key: (?s)<Thought>(.*?)</Thought>
    value: ""
 mmap: true
 name: gpt-4
 parameters:
  model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
 stopwords:
 - <|im_end|>
 - <dummy32000>
 - </s>
 template:
  chat: |
    {{.Input -}}
    <|im_start|>assistant
  chat_message: |
    <|im_start|>{{ .RoleName }}
    {{ if .FunctionCall -}}
    Function call:
    {{ else if eq .RoleName "tool" -}}
    Function response:
    {{ end -}}
    {{ if .Content -}}
    {{.Content }}
    {{ end -}}
    {{ if .FunctionCall -}}
    {{toJson .FunctionCall}}
    {{ end -}}<|im_end|>
  completion: |
    {{.Input}}
  function: |
    <|im_start|>system
    You are an AI assistant that executes function calls, and these are the tools at your disposal:
    {{range .Functions}}
    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
    {{end}}
    <|im_end|>
    {{.Input -}}
    <|im_start|>assistant
 download_files:
 - filename: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
  sha256: 4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4
  uri: huggingface://mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
--- a/aio/gpu-8g/vad.yaml
+++ b/aio/gpu-8g/vad.yaml
@ -1,8 +0,0 @@
 backend: silero-vad
 name: silero-vad
 parameters:
  model: silero-vad.onnx
 download_files:
 - filename: silero-vad.onnx
  uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
  sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
--- a/aio/gpu-8g/vision.yaml
+++ b/aio/gpu-8g/vision.yaml
@ -1,49 +0,0 @@
 context_size: 4096
 f16: true
 mmap: true
 mmproj: minicpm-v-2_6-mmproj-f16.gguf
 name: gpt-4o
 parameters:
  model: minicpm-v-2_6-Q4_K_M.gguf
 stopwords:
 - <|im_end|>
 - <dummy32000>
 - </s>
 - <|endoftext|>
 template:
  chat: |
    {{.Input -}}
    <|im_start|>assistant
  chat_message: |
    <|im_start|>{{ .RoleName }}
    {{ if .FunctionCall -}}
    Function call:
    {{ else if eq .RoleName "tool" -}}
    Function response:
    {{ end -}}
    {{ if .Content -}}
    {{.Content }}
    {{ end -}}
    {{ if .FunctionCall -}}
    {{toJson .FunctionCall}}
    {{ end -}}<|im_end|>
  completion: |
    {{.Input}}
  function: |
    <|im_start|>system
    You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
    {{range .Functions}}
    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
    {{end}}
    For each function call return a json object with function name and arguments
    <|im_end|>
    {{.Input -}}
    <|im_start|>assistant
 download_files:
 - filename: minicpm-v-2_6-Q4_K_M.gguf
  sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
  uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
 - filename: minicpm-v-2_6-mmproj-f16.gguf
  uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
  sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
--- a/aio/intel/embeddings.yaml
+++ b/aio/intel/embeddings.yaml
@ -1,12 +0,0 @@
 embeddings: true
 name: text-embedding-ada-002
 parameters:
  model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
 usage: |
    You can test this model with curl like this:
    curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
      "input": "Your text string goes here",
      "model": "text-embedding-ada-002"
    }'
--- a/aio/intel/image-gen.yaml
+++ b/aio/intel/image-gen.yaml
@ -1,20 +0,0 @@
 name: stablediffusion
 parameters:
  model: Lykon/dreamshaper-8
 backend: diffusers
 step: 25
 f16: true
 diffusers:
  pipeline_type: StableDiffusionPipeline
  cuda: true
  enable_parameters: "negative_prompt,num_inference_steps"
  scheduler_type: "k_dpmpp_2m"
 usage: |
        curl http://localhost:8080/v1/images/generations \
          -H "Content-Type: application/json" \
          -d '{
            "prompt": "<positive prompt>|<negative prompt>",
            "step": 25,
            "size": "512x512"
          }'
--- a/aio/intel/rerank.yaml
+++ b/aio/intel/rerank.yaml
@ -1,27 +0,0 @@
 name: jina-reranker-v1-base-en
 backend: rerankers
 parameters:
  model: cross-encoder
 usage: |
    You can test this model with curl like this:
    curl http://localhost:8080/v1/rerank \
      -H "Content-Type: application/json" \
      -d '{
      "model": "jina-reranker-v1-base-en",
      "query": "Organic skincare products for sensitive skin",
      "documents": [
        "Eco-friendly kitchenware for modern homes",
        "Biodegradable cleaning supplies for eco-conscious consumers",
        "Organic cotton baby clothes for sensitive skin",
        "Natural organic skincare range for sensitive skin",
        "Tech gadgets for smart homes: 2024 edition",
        "Sustainable gardening tools and compost solutions",
        "Sensitive skin-friendly facial cleansers and toners",
        "Organic food wraps and storage solutions",
        "All-natural pet food for dogs with allergies",
        "Yoga mats made from recycled materials"
      ],
      "top_n": 3
    }'
--- a/aio/intel/speech-to-text.yaml
+++ b/aio/intel/speech-to-text.yaml
@ -1,18 +0,0 @@
 name: whisper-1
 backend: whisper
 parameters:
  model: ggml-whisper-base.bin
 usage: |
    ## example audio file
    wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
    ## Send the example audio file to the transcriptions endpoint
    curl http://localhost:8080/v1/audio/transcriptions \
         -H "Content-Type: multipart/form-data" \
         -F file="@$PWD/gb1.ogg" -F model="whisper-1"
 download_files:
 - filename: "ggml-whisper-base.bin"
  sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
  uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
--- a/aio/intel/text-to-speech.yaml
+++ b/aio/intel/text-to-speech.yaml
@ -1,15 +0,0 @@
 name: tts-1
 download_files:
  - filename: voice-en-us-amy-low.tar.gz
    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
 parameters:
  model: en-us-amy-low.onnx
 usage: |
    To test if this model works as expected, you can use the following curl command:
    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
      "model":"tts-1",
      "input": "Hi, this is a test."
    }'
--- a/aio/intel/text-to-text.yaml
+++ b/aio/intel/text-to-text.yaml
@ -1,53 +0,0 @@
 context_size: 4096
 f16: true
 function:
  capture_llm_results:
  - (?s)<Thought>(.*?)</Thought>
  grammar:
    properties_order: name,arguments
  json_regex_match:
  - (?s)<Output>(.*?)</Output>
  replace_llm_results:
  - key: (?s)<Thought>(.*?)</Thought>
    value: ""
 mmap: true
 name: gpt-4
 parameters:
  model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
 stopwords:
 - <|im_end|>
 - <dummy32000>
 - </s>
 template:
  chat: |
    {{.Input -}}
    <|im_start|>assistant
  chat_message: |
    <|im_start|>{{ .RoleName }}
    {{ if .FunctionCall -}}
    Function call:
    {{ else if eq .RoleName "tool" -}}
    Function response:
    {{ end -}}
    {{ if .Content -}}
    {{.Content }}
    {{ end -}}
    {{ if .FunctionCall -}}
    {{toJson .FunctionCall}}
    {{ end -}}<|im_end|>
  completion: |
    {{.Input}}
  function: |
    <|im_start|>system
    You are an AI assistant that executes function calls, and these are the tools at your disposal:
    {{range .Functions}}
    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
    {{end}}
    <|im_end|>
    {{.Input -}}
    <|im_start|>assistant
 download_files:
 - filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf
  sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5
  uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf
--- a/aio/intel/vad.yaml
+++ b/aio/intel/vad.yaml
@ -1,8 +0,0 @@
 backend: silero-vad
 name: silero-vad
 parameters:
  model: silero-vad.onnx
 download_files:
 - filename: silero-vad.onnx
  uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
  sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
--- a/aio/intel/vision.yaml
+++ b/aio/intel/vision.yaml
@ -1,50 +0,0 @@
 context_size: 4096
 f16: true
 mmap: true
 mmproj: minicpm-v-2_6-mmproj-f16.gguf
 name: gpt-4o
 parameters:
  model: minicpm-v-2_6-Q4_K_M.gguf
 stopwords:
 - <|im_end|>
 - <dummy32000>
 - </s>
 - <|endoftext|>
 template:
  chat: |
    {{.Input -}}
    <|im_start|>assistant
  chat_message: |
    <|im_start|>{{ .RoleName }}
    {{ if .FunctionCall -}}
    Function call:
    {{ else if eq .RoleName "tool" -}}
    Function response:
    {{ end -}}
    {{ if .Content -}}
    {{.Content }}
    {{ end -}}
    {{ if .FunctionCall -}}
    {{toJson .FunctionCall}}
    {{ end -}}<|im_end|>
  completion: |
    {{.Input}}
  function: |
    <|im_start|>system
    You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
    {{range .Functions}}
    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
    {{end}}
    For each function call return a json object with function name and arguments
    <|im_end|>
    {{.Input -}}
    <|im_start|>assistant
 download_files:
 - filename: minicpm-v-2_6-Q4_K_M.gguf
  sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
  uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
 - filename: minicpm-v-2_6-mmproj-f16.gguf
  uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
  sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
--- a/api/api.go
+++ b/api/api.go
@ -0,0 +1,302 @@
 package api
 import (
 	"encoding/json"
 	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
 	"strings"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/localai"
 	"github.com/go-skynet/LocalAI/api/openai"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/api/schema"
 	"github.com/go-skynet/LocalAI/internal"
 	"github.com/go-skynet/LocalAI/metrics"
 	"github.com/go-skynet/LocalAI/pkg/assets"
 	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/gofiber/fiber/v2"
 	"github.com/gofiber/fiber/v2/middleware/cors"
 	"github.com/gofiber/fiber/v2/middleware/logger"
 	"github.com/gofiber/fiber/v2/middleware/recover"
 	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
 )
 func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader, error) {
 	options := options.NewOptions(opts...)
 	zerolog.SetGlobalLevel(zerolog.InfoLevel)
 	if options.Debug {
 		zerolog.SetGlobalLevel(zerolog.DebugLevel)
 	}
 	log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.Loader.ModelPath)
 	log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
 	modelPath := options.Loader.ModelPath
 	if len(options.ModelsURL) > 0 {
 		for _, url := range options.ModelsURL {
 			if utils.LooksLikeURL(url) {
 				// md5 of model name
 				md5Name := utils.MD5(url)
 				// check if file exists
 				if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
 					err := utils.DownloadFile(url, filepath.Join(modelPath, md5Name)+".yaml", "", func(fileName, current, total string, percent float64) {
 						utils.DisplayDownloadFunction(fileName, current, total, percent)
 					})
 					if err != nil {
 						log.Error().Msgf("error loading model: %s", err.Error())
 					}
 				}
 			}
 		}
 	}
 	cl := config.NewConfigLoader()
 	if err := cl.LoadConfigs(options.Loader.ModelPath); err != nil {
 		log.Error().Msgf("error loading config files: %s", err.Error())
 	}
 	if options.ConfigFile != "" {
 		if err := cl.LoadConfigFile(options.ConfigFile); err != nil {
 			log.Error().Msgf("error loading config file: %s", err.Error())
 		}
 	}
 	if err := cl.Preload(options.Loader.ModelPath); err != nil {
 		log.Error().Msgf("error downloading models: %s", err.Error())
 	}
 	if options.PreloadJSONModels != "" {
 		if err := localai.ApplyGalleryFromString(options.Loader.ModelPath, options.PreloadJSONModels, cl, options.Galleries); err != nil {
 			return nil, nil, err
 		}
 	}
 	if options.PreloadModelsFromPath != "" {
 		if err := localai.ApplyGalleryFromFile(options.Loader.ModelPath, options.PreloadModelsFromPath, cl, options.Galleries); err != nil {
 			return nil, nil, err
 		}
 	}
 	if options.Debug {
 		for _, v := range cl.ListConfigs() {
 			cfg, _ := cl.GetConfig(v)
 			log.Debug().Msgf("Model: %s (config: %+v)", v, cfg)
 		}
 	}
 	if options.AssetsDestination != "" {
 		// Extract files from the embedded FS
 		err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
 		log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
 		if err != nil {
 			log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
 		}
 	}
 	// turn off any process that was started by GRPC if the context is canceled
 	go func() {
 		<-options.Context.Done()
 		log.Debug().Msgf("Context canceled, shutting down")
 		options.Loader.StopAllGRPC()
 	}()
 	if options.WatchDog {
 		wd := model.NewWatchDog(
 			options.Loader,
 			options.WatchDogBusyTimeout,
 			options.WatchDogIdleTimeout,
 			options.WatchDogBusy,
 			options.WatchDogIdle)
 		options.Loader.SetWatchDog(wd)
 		go wd.Run()
 		go func() {
 			<-options.Context.Done()
 			log.Debug().Msgf("Context canceled, shutting down")
 			wd.Shutdown()
 		}()
 	}
 	return options, cl, nil
 }
 func App(opts ...options.AppOption) (*fiber.App, error) {
 	options, cl, err := Startup(opts...)
 	if err != nil {
 		return nil, fmt.Errorf("failed basic startup tasks with error %s", err.Error())
 	}
 	// Return errors as JSON responses
 	app := fiber.New(fiber.Config{
 		BodyLimit:             options.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
 		DisableStartupMessage: options.DisableMessage,
 		// Override default error handler
 		ErrorHandler: func(ctx *fiber.Ctx, err error) error {
 			// Status code defaults to 500
 			code := fiber.StatusInternalServerError
 			// Retrieve the custom status code if it's a *fiber.Error
 			var e *fiber.Error
 			if errors.As(err, &e) {
 				code = e.Code
 			}
 			// Send custom error page
 			return ctx.Status(code).JSON(
 				schema.ErrorResponse{
 					Error: &schema.APIError{Message: err.Error(), Code: code},
 				},
 			)
 		},
 	})
 	if options.Debug {
 		app.Use(logger.New(logger.Config{
 			Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
 		}))
 	}
 	// Default middleware config
 	app.Use(recover.New())
 	if options.Metrics != nil {
 		app.Use(metrics.APIMiddleware(options.Metrics))
 	}
 	// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
 	auth := func(c *fiber.Ctx) error {
 		if len(options.ApiKeys) == 0 {
 			return c.Next()
 		}
 		// Check for api_keys.json file
 		fileContent, err := os.ReadFile("api_keys.json")
 		if err == nil {
 			// Parse JSON content from the file
 			var fileKeys []string
 			err := json.Unmarshal(fileContent, &fileKeys)
 			if err != nil {
 				return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"})
 			}
 			// Add file keys to options.ApiKeys
 			options.ApiKeys = append(options.ApiKeys, fileKeys...)
 		}
 		if len(options.ApiKeys) == 0 {
 			return c.Next()
 		}
 		authHeader := c.Get("Authorization")
 		if authHeader == "" {
 			return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
 		}
 		authHeaderParts := strings.Split(authHeader, " ")
 		if len(authHeaderParts) != 2 || authHeaderParts[0] != "Bearer" {
 			return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid Authorization header format"})
 		}
 		apiKey := authHeaderParts[1]
 		for _, key := range options.ApiKeys {
 			if apiKey == key {
 				return c.Next()
 			}
 		}
 		return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
 	}
 	if options.CORS {
 		var c func(ctx *fiber.Ctx) error
 		if options.CORSAllowOrigins == "" {
 			c = cors.New()
 		} else {
 			c = cors.New(cors.Config{AllowOrigins: options.CORSAllowOrigins})
 		}
 		app.Use(c)
 	}
 	// LocalAI API endpoints
 	galleryService := localai.NewGalleryService(options.Loader.ModelPath)
 	galleryService.Start(options.Context, cl)
 	app.Get("/version", auth, func(c *fiber.Ctx) error {
 		return c.JSON(struct {
 			Version string `json:"version"`
 		}{Version: internal.PrintableVersion()})
 	})
 	modelGalleryService := localai.CreateModelGalleryService(options.Galleries, options.Loader.ModelPath, galleryService)
 	app.Post("/models/apply", auth, modelGalleryService.ApplyModelGalleryEndpoint())
 	app.Get("/models/available", auth, modelGalleryService.ListModelFromGalleryEndpoint())
 	app.Get("/models/galleries", auth, modelGalleryService.ListModelGalleriesEndpoint())
 	app.Post("/models/galleries", auth, modelGalleryService.AddModelGalleryEndpoint())
 	app.Delete("/models/galleries", auth, modelGalleryService.RemoveModelGalleryEndpoint())
 	app.Get("/models/jobs/:uuid", auth, modelGalleryService.GetOpStatusEndpoint())
 	app.Get("/models/jobs", auth, modelGalleryService.GetAllStatusEndpoint())
 	// openAI compatible API endpoint
 	// chat
 	app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, options))
 	app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, options))
 	// edit
 	app.Post("/v1/edits", auth, openai.EditEndpoint(cl, options))
 	app.Post("/edits", auth, openai.EditEndpoint(cl, options))
 	// completion
 	app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, options))
 	app.Post("/completions", auth, openai.CompletionEndpoint(cl, options))
 	app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, options))
 	// embeddings
 	app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, options))
 	app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, options))
 	app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, options))
 	// audio
 	app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, options))
 	app.Post("/tts", auth, localai.TTSEndpoint(cl, options))
 	// images
 	app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, options))
 	if options.ImageDir != "" {
 		app.Static("/generated-images", options.ImageDir)
 	}
 	if options.AudioDir != "" {
 		app.Static("/generated-audio", options.AudioDir)
 	}
 	ok := func(c *fiber.Ctx) error {
 		return c.SendStatus(200)
 	}
 	// Kubernetes health checks
 	app.Get("/healthz", ok)
 	app.Get("/readyz", ok)
 	// Experimental Backend Statistics Module
 	backendMonitor := localai.NewBackendMonitor(cl, options) // Split out for now
 	app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitor))
 	app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitor))
 	// models
 	app.Get("/v1/models", auth, openai.ListModelsEndpoint(options.Loader, cl))
 	app.Get("/models", auth, openai.ListModelsEndpoint(options.Loader, cl))
 	app.Get("/metrics", metrics.MetricsHandler())
 	return app, nil
 }
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
--- a/core/http/http_suite_test.go
+++ b/core/http/http_suite_test.go
@ -1,4 +1,4 @@
-package http_test
+package api_test
 import (
 	"testing"
--- a/api/backend/embeddings.go
+++ b/api/backend/embeddings.go
@ -0,0 +1,92 @@
 package backend
 import (
 	"fmt"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/pkg/grpc"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 )
 func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.Config, o *options.Option) (func() ([]float32, error), error) {
 	if !c.Embeddings {
 		return nil, fmt.Errorf("endpoint disabled for this model by API configuration")
 	}
 	modelFile := c.Model
 	grpcOpts := gRPCModelOpts(c)
 	var inferenceModel interface{}
 	var err error
 	opts := modelOpts(c, o, []model.Option{
 		model.WithLoadGRPCLoadModelOpts(grpcOpts),
 		model.WithThreads(uint32(c.Threads)),
 		model.WithAssetDir(o.AssetsDestination),
 		model.WithModel(modelFile),
 		model.WithContext(o.Context),
 	})
 	if c.Backend == "" {
 		inferenceModel, err = loader.GreedyLoader(opts...)
 	} else {
 		opts = append(opts, model.WithBackendString(c.Backend))
 		inferenceModel, err = loader.BackendLoader(opts...)
 	}
 	if err != nil {
 		return nil, err
 	}
 	var fn func() ([]float32, error)
 	switch model := inferenceModel.(type) {
 	case *grpc.Client:
 		fn = func() ([]float32, error) {
 			predictOptions := gRPCPredictOpts(c, loader.ModelPath)
 			if len(tokens) > 0 {
 				embeds := []int32{}
 				for _, t := range tokens {
 					embeds = append(embeds, int32(t))
 				}
 				predictOptions.EmbeddingTokens = embeds
 				res, err := model.Embeddings(o.Context, predictOptions)
 				if err != nil {
 					return nil, err
 				}
 				return res.Embeddings, nil
 			}
 			predictOptions.Embeddings = s
 			res, err := model.Embeddings(o.Context, predictOptions)
 			if err != nil {
 				return nil, err
 			}
 			return res.Embeddings, nil
 		}
 	default:
 		fn = func() ([]float32, error) {
 			return nil, fmt.Errorf("embeddings not supported by the backend")
 		}
 	}
 	return func() ([]float32, error) {
 		embeds, err := fn()
 		if err != nil {
 			return embeds, err
 		}
 		// Remove trailing 0s
 		for i := len(embeds) - 1; i >= 0; i-- {
 			if embeds[i] == 0.0 {
 				embeds = embeds[:i]
 			} else {
 				break
 			}
 		}
 		return embeds, nil
 	}, nil
 }
--- a/api/backend/image.go
+++ b/api/backend/image.go
@ -0,0 +1,61 @@
 package backend
 import (
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 )
 func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, c config.Config, o *options.Option) (func() error, error) {
 	opts := modelOpts(c, o, []model.Option{
 		model.WithBackendString(c.Backend),
 		model.WithAssetDir(o.AssetsDestination),
 		model.WithThreads(uint32(c.Threads)),
 		model.WithContext(o.Context),
 		model.WithModel(c.Model),
 		model.WithLoadGRPCLoadModelOpts(&proto.ModelOptions{
 			CUDA:          c.CUDA || c.Diffusers.CUDA,
 			SchedulerType: c.Diffusers.SchedulerType,
 			PipelineType:  c.Diffusers.PipelineType,
 			CFGScale:      c.Diffusers.CFGScale,
 			LoraAdapter:   c.LoraAdapter,
 			LoraScale:     c.LoraScale,
 			LoraBase:      c.LoraBase,
 			IMG2IMG:       c.Diffusers.IMG2IMG,
 			CLIPModel:     c.Diffusers.ClipModel,
 			CLIPSubfolder: c.Diffusers.ClipSubFolder,
 			CLIPSkip:      int32(c.Diffusers.ClipSkip),
 			ControlNet:    c.Diffusers.ControlNet,
 		}),
 	})
 	inferenceModel, err := loader.BackendLoader(
 		opts...,
 	)
 	if err != nil {
 		return nil, err
 	}
 	fn := func() error {
 		_, err := inferenceModel.GenerateImage(
 			o.Context,
 			&proto.GenerateImageRequest{
 				Height:           int32(height),
 				Width:            int32(width),
 				Mode:             int32(mode),
 				Step:             int32(step),
 				Seed:             int32(seed),
 				CLIPSkip:         int32(c.Diffusers.ClipSkip),
 				PositivePrompt:   positive_prompt,
 				NegativePrompt:   negative_prompt,
 				Dst:              dst,
 				Src:              src,
 				EnableParameters: c.Diffusers.EnableParameters,
 			})
 		return err
 	}
 	return fn, nil
 }
--- a/api/backend/llm.go
+++ b/api/backend/llm.go
@ -0,0 +1,167 @@
 package backend
 import (
 	"context"
 	"os"
 	"regexp"
 	"strings"
 	"sync"
 	"unicode/utf8"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/pkg/gallery"
 	"github.com/go-skynet/LocalAI/pkg/grpc"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 )
 type LLMResponse struct {
 	Response string // should this be []byte?
 	Usage    TokenUsage
 }
 type TokenUsage struct {
 	Prompt     int
 	Completion int
 }
 func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
 	modelFile := c.Model
 	grpcOpts := gRPCModelOpts(c)
 	var inferenceModel *grpc.Client
 	var err error
 	opts := modelOpts(c, o, []model.Option{
 		model.WithLoadGRPCLoadModelOpts(grpcOpts),
 		model.WithThreads(uint32(c.Threads)), // some models uses this to allocate threads during startup
 		model.WithAssetDir(o.AssetsDestination),
 		model.WithModel(modelFile),
 		model.WithContext(o.Context),
 	})
 	if c.Backend != "" {
 		opts = append(opts, model.WithBackendString(c.Backend))
 	}
 	// Check if the modelFile exists, if it doesn't try to load it from the gallery
 	if o.AutoloadGalleries { // experimental
 		if _, err := os.Stat(modelFile); os.IsNotExist(err) {
 			utils.ResetDownloadTimers()
 			// if we failed to load the model, we try to download it
 			err := gallery.InstallModelFromGalleryByName(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction)
 			if err != nil {
 				return nil, err
 			}
 		}
 	}
 	if c.Backend == "" {
 		inferenceModel, err = loader.GreedyLoader(opts...)
 	} else {
 		inferenceModel, err = loader.BackendLoader(opts...)
 	}
 	if err != nil {
 		return nil, err
 	}
 	// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
 	fn := func() (LLMResponse, error) {
 		opts := gRPCPredictOpts(c, loader.ModelPath)
 		opts.Prompt = s
 		opts.Images = images
 		tokenUsage := TokenUsage{}
 		// check the per-model feature flag for usage, since tokenCallback may have a cost.
 		// Defaults to off as for now it is still experimental
 		if c.FeatureFlag.Enabled("usage") {
 			userTokenCallback := tokenCallback
 			if userTokenCallback == nil {
 				userTokenCallback = func(token string, usage TokenUsage) bool {
 					return true
 				}
 			}
 			promptInfo, pErr := inferenceModel.TokenizeString(ctx, opts)
 			if pErr == nil && promptInfo.Length > 0 {
 				tokenUsage.Prompt = int(promptInfo.Length)
 			}
 			tokenCallback = func(token string, usage TokenUsage) bool {
 				tokenUsage.Completion++
 				return userTokenCallback(token, tokenUsage)
 			}
 		}
 		if tokenCallback != nil {
 			ss := ""
 			var partialRune []byte
 			err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) {
 				partialRune = append(partialRune, chars...)
 				for len(partialRune) > 0 {
 					r, size := utf8.DecodeRune(partialRune)
 					if r == utf8.RuneError {
 						// incomplete rune, wait for more bytes
 						break
 					}
 					tokenCallback(string(r), tokenUsage)
 					ss += string(r)
 					partialRune = partialRune[size:]
 				}
 			})
 			return LLMResponse{
 				Response: ss,
 				Usage:    tokenUsage,
 			}, err
 		} else {
 			// TODO: Is the chicken bit the only way to get here? is that acceptable?
 			reply, err := inferenceModel.Predict(ctx, opts)
 			if err != nil {
 				return LLMResponse{}, err
 			}
 			return LLMResponse{
 				Response: string(reply.Message),
 				Usage:    tokenUsage,
 			}, err
 		}
 	}
 	return fn, nil
 }
 var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
 var mu sync.Mutex = sync.Mutex{}
 func Finetune(config config.Config, input, prediction string) string {
 	if config.Echo {
 		prediction = input + prediction
 	}
 	for _, c := range config.Cutstrings {
 		mu.Lock()
 		reg, ok := cutstrings[c]
 		if !ok {
 			cutstrings[c] = regexp.MustCompile(c)
 			reg = cutstrings[c]
 		}
 		mu.Unlock()
 		prediction = reg.ReplaceAllString(prediction, "")
 	}
 	for _, c := range config.TrimSpace {
 		prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
 	}
 	for _, c := range config.TrimSuffix {
 		prediction = strings.TrimSpace(strings.TrimSuffix(prediction, c))
 	}
 	return prediction
 }
--- a/api/backend/options.go
+++ b/api/backend/options.go
@ -0,0 +1,127 @@
 package backend
 import (
 	"os"
 	"path/filepath"
 	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 )
 func modelOpts(c config.Config, o *options.Option, opts []model.Option) []model.Option {
 	if o.SingleBackend {
 		opts = append(opts, model.WithSingleActiveBackend())
 	}
 	if o.ParallelBackendRequests {
 		opts = append(opts, model.EnableParallelRequests)
 	}
 	if c.GRPC.Attempts != 0 {
 		opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts))
 	}
 	if c.GRPC.AttemptsSleepTime != 0 {
 		opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
 	}
 	for k, v := range o.ExternalGRPCBackends {
 		opts = append(opts, model.WithExternalBackend(k, v))
 	}
 	return opts
 }
 func gRPCModelOpts(c config.Config) *pb.ModelOptions {
 	b := 512
 	if c.Batch != 0 {
 		b = c.Batch
 	}
 	return &pb.ModelOptions{
 		ContextSize:    int32(c.ContextSize),
 		Seed:           int32(c.Seed),
 		NBatch:         int32(b),
 		NoMulMatQ:      c.NoMulMatQ,
 		CUDA:           c.CUDA, // diffusers, transformers
 		DraftModel:     c.DraftModel,
 		AudioPath:      c.VallE.AudioPath,
 		Quantization:   c.Quantization,
 		MMProj:         c.MMProj,
 		YarnExtFactor:  c.YarnExtFactor,
 		YarnAttnFactor: c.YarnAttnFactor,
 		YarnBetaFast:   c.YarnBetaFast,
 		YarnBetaSlow:   c.YarnBetaSlow,
 		LoraAdapter:    c.LoraAdapter,
 		LoraBase:       c.LoraBase,
 		LoraScale:      c.LoraScale,
 		NGQA:           c.NGQA,
 		RMSNormEps:     c.RMSNormEps,
 		F16Memory:      c.F16,
 		MLock:          c.MMlock,
 		RopeFreqBase:   c.RopeFreqBase,
 		RopeFreqScale:  c.RopeFreqScale,
 		NUMA:           c.NUMA,
 		Embeddings:     c.Embeddings,
 		LowVRAM:        c.LowVRAM,
 		NGPULayers:     int32(c.NGPULayers),
 		MMap:           c.MMap,
 		MainGPU:        c.MainGPU,
 		Threads:        int32(c.Threads),
 		TensorSplit:    c.TensorSplit,
 		// AutoGPTQ
 		ModelBaseName:    c.AutoGPTQ.ModelBaseName,
 		Device:           c.AutoGPTQ.Device,
 		UseTriton:        c.AutoGPTQ.Triton,
 		UseFastTokenizer: c.AutoGPTQ.UseFastTokenizer,
 		// RWKV
 		Tokenizer: c.Tokenizer,
 	}
 }
 func gRPCPredictOpts(c config.Config, modelPath string) *pb.PredictOptions {
 	promptCachePath := ""
 	if c.PromptCachePath != "" {
 		p := filepath.Join(modelPath, c.PromptCachePath)
 		os.MkdirAll(filepath.Dir(p), 0755)
 		promptCachePath = p
 	}
 	return &pb.PredictOptions{
 		Temperature:         float32(c.Temperature),
 		TopP:                float32(c.TopP),
 		NDraft:              c.NDraft,
 		TopK:                int32(c.TopK),
 		Tokens:              int32(c.Maxtokens),
 		Threads:             int32(c.Threads),
 		PromptCacheAll:      c.PromptCacheAll,
 		PromptCacheRO:       c.PromptCacheRO,
 		PromptCachePath:     promptCachePath,
 		F16KV:               c.F16,
 		DebugMode:           c.Debug,
 		Grammar:             c.Grammar,
 		NegativePromptScale: c.NegativePromptScale,
 		RopeFreqBase:        c.RopeFreqBase,
 		RopeFreqScale:       c.RopeFreqScale,
 		NegativePrompt:      c.NegativePrompt,
 		Mirostat:            int32(c.LLMConfig.Mirostat),
 		MirostatETA:         float32(c.LLMConfig.MirostatETA),
 		MirostatTAU:         float32(c.LLMConfig.MirostatTAU),
 		Debug:               c.Debug,
 		StopPrompts:         c.StopWords,
 		Repeat:              int32(c.RepeatPenalty),
 		NKeep:               int32(c.Keep),
 		Batch:               int32(c.Batch),
 		IgnoreEOS:           c.IgnoreEOS,
 		Seed:                int32(c.Seed),
 		FrequencyPenalty:    float32(c.FrequencyPenalty),
 		MLock:               c.MMlock,
 		MMap:                c.MMap,
 		MainGPU:             c.MainGPU,
 		TensorSplit:         c.TensorSplit,
 		TailFreeSamplingZ:   float32(c.TFZ),
 		TypicalP:            float32(c.TypicalP),
 	}
 }
--- a/api/backend/transcript.go
+++ b/api/backend/transcript.go
@ -0,0 +1,39 @@
 package backend
 import (
 	"context"
 	"fmt"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/schema"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 )
 func ModelTranscription(audio, language string, loader *model.ModelLoader, c config.Config, o *options.Option) (*schema.Result, error) {
 	opts := modelOpts(c, o, []model.Option{
 		model.WithBackendString(model.WhisperBackend),
 		model.WithModel(c.Model),
 		model.WithContext(o.Context),
 		model.WithThreads(uint32(c.Threads)),
 		model.WithAssetDir(o.AssetsDestination),
 	})
 	whisperModel, err := o.Loader.BackendLoader(opts...)
 	if err != nil {
 		return nil, err
 	}
 	if whisperModel == nil {
 		return nil, fmt.Errorf("could not load whisper model")
 	}
 	return whisperModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
 		Dst:      audio,
 		Language: language,
 		Threads:  uint32(c.Threads),
 	})
 }
--- a/api/backend/tts.go
+++ b/api/backend/tts.go
@ -0,0 +1,79 @@
 package backend
 import (
 	"context"
 	"fmt"
 	"os"
 	"path/filepath"
 	api_config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 )
 func generateUniqueFileName(dir, baseName, ext string) string {
 	counter := 1
 	fileName := baseName + ext
 	for {
 		filePath := filepath.Join(dir, fileName)
 		_, err := os.Stat(filePath)
 		if os.IsNotExist(err) {
 			return fileName
 		}
 		counter++
 		fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext)
 	}
 }
 func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *options.Option) (string, *proto.Result, error) {
 	bb := backend
 	if bb == "" {
 		bb = model.PiperBackend
 	}
 	opts := modelOpts(api_config.Config{}, o, []model.Option{
 		model.WithBackendString(bb),
 		model.WithModel(modelFile),
 		model.WithContext(o.Context),
 		model.WithAssetDir(o.AssetsDestination),
 	})
 	piperModel, err := o.Loader.BackendLoader(opts...)
 	if err != nil {
 		return "", nil, err
 	}
 	if piperModel == nil {
 		return "", nil, fmt.Errorf("could not load piper model")
 	}
 	if err := os.MkdirAll(o.AudioDir, 0755); err != nil {
 		return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
 	}
 	fileName := generateUniqueFileName(o.AudioDir, "piper", ".wav")
 	filePath := filepath.Join(o.AudioDir, fileName)
 	// If the model file is not empty, we pass it joined with the model path
 	modelPath := ""
 	if modelFile != "" {
 		if bb != model.TransformersMusicGen {
 			modelPath = filepath.Join(o.Loader.ModelPath, modelFile)
 			if err := utils.VerifyPath(modelPath, o.Loader.ModelPath); err != nil {
 				return "", nil, err
 			}
 		} else {
 			modelPath = modelFile
 		}
 	}
 	res, err := piperModel.TTS(context.Background(), &proto.TTSRequest{
 		Text:  text,
 		Model: modelPath,
 		Dst:   filePath,
 	})
 	return filePath, res, err
 }
--- a/api/config/config.go
+++ b/api/config/config.go
@ -0,0 +1,359 @@
 package api_config
 import (
 	"errors"
 	"fmt"
 	"io/fs"
 	"os"
 	"path/filepath"
 	"strings"
 	"sync"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v3"
 )
 type Config struct {
 	PredictionOptions `yaml:"parameters"`
 	Name              string `yaml:"name"`
 	F16            bool              `yaml:"f16"`
 	Threads        int               `yaml:"threads"`
 	Debug          bool              `yaml:"debug"`
 	Roles          map[string]string `yaml:"roles"`
 	Embeddings     bool              `yaml:"embeddings"`
 	Backend        string            `yaml:"backend"`
 	TemplateConfig TemplateConfig    `yaml:"template"`
 	PromptStrings, InputStrings                []string `yaml:"-"`
 	InputToken                                 [][]int  `yaml:"-"`
 	functionCallString, functionCallNameString string   `yaml:"-"`
 	FunctionsConfig Functions `yaml:"function"`
 	FeatureFlag FeatureFlag `yaml:"feature_flags"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
 	// LLM configs (GPT4ALL, Llama.cpp, ...)
 	LLMConfig `yaml:",inline"`
 	// AutoGPTQ specifics
 	AutoGPTQ AutoGPTQ `yaml:"autogptq"`
 	// Diffusers
 	Diffusers Diffusers `yaml:"diffusers"`
 	Step      int       `yaml:"step"`
 	// GRPC Options
 	GRPC GRPC `yaml:"grpc"`
 	// Vall-e-x
 	VallE VallE `yaml:"vall-e"`
 	// CUDA
 	// Explicitly enable CUDA or not (some backends might need it)
 	CUDA bool `yaml:"cuda"`
 	DownloadFiles []File `yaml:"download_files"`
 }
 type File struct {
 	Filename string `yaml:"filename" json:"filename"`
 	SHA256   string `yaml:"sha256" json:"sha256"`
 	URI      string `yaml:"uri" json:"uri"`
 }
 type VallE struct {
 	AudioPath string `yaml:"audio_path"`
 }
 type FeatureFlag map[string]*bool
 func (ff FeatureFlag) Enabled(s string) bool {
 	v, exist := ff[s]
 	return exist && v != nil && *v
 }
 type GRPC struct {
 	Attempts          int `yaml:"attempts"`
 	AttemptsSleepTime int `yaml:"attempts_sleep_time"`
 }
 type Diffusers struct {
 	CUDA             bool    `yaml:"cuda"`
 	PipelineType     string  `yaml:"pipeline_type"`
 	SchedulerType    string  `yaml:"scheduler_type"`
 	EnableParameters string  `yaml:"enable_parameters"` // A list of comma separated parameters to specify
 	CFGScale         float32 `yaml:"cfg_scale"`         // Classifier-Free Guidance Scale
 	IMG2IMG          bool    `yaml:"img2img"`           // Image to Image Diffuser
 	ClipSkip         int     `yaml:"clip_skip"`         // Skip every N frames
 	ClipModel        string  `yaml:"clip_model"`        // Clip model to use
 	ClipSubFolder    string  `yaml:"clip_subfolder"`    // Subfolder to use for clip model
 	ControlNet       string  `yaml:"control_net"`
 }
 type LLMConfig struct {
 	SystemPrompt    string   `yaml:"system_prompt"`
 	TensorSplit     string   `yaml:"tensor_split"`
 	MainGPU         string   `yaml:"main_gpu"`
 	RMSNormEps      float32  `yaml:"rms_norm_eps"`
 	NGQA            int32    `yaml:"ngqa"`
 	PromptCachePath string   `yaml:"prompt_cache_path"`
 	PromptCacheAll  bool     `yaml:"prompt_cache_all"`
 	PromptCacheRO   bool     `yaml:"prompt_cache_ro"`
 	MirostatETA     float64  `yaml:"mirostat_eta"`
 	MirostatTAU     float64  `yaml:"mirostat_tau"`
 	Mirostat        int      `yaml:"mirostat"`
 	NGPULayers      int      `yaml:"gpu_layers"`
 	MMap            bool     `yaml:"mmap"`
 	MMlock          bool     `yaml:"mmlock"`
 	LowVRAM         bool     `yaml:"low_vram"`
 	Grammar         string   `yaml:"grammar"`
 	StopWords       []string `yaml:"stopwords"`
 	Cutstrings      []string `yaml:"cutstrings"`
 	TrimSpace       []string `yaml:"trimspace"`
 	TrimSuffix      []string `yaml:"trimsuffix"`
 	ContextSize  int     `yaml:"context_size"`
 	NUMA         bool    `yaml:"numa"`
 	LoraAdapter  string  `yaml:"lora_adapter"`
 	LoraBase     string  `yaml:"lora_base"`
 	LoraScale    float32 `yaml:"lora_scale"`
 	NoMulMatQ    bool    `yaml:"no_mulmatq"`
 	DraftModel   string  `yaml:"draft_model"`
 	NDraft       int32   `yaml:"n_draft"`
 	Quantization string  `yaml:"quantization"`
 	MMProj       string  `yaml:"mmproj"`
 	RopeScaling    string  `yaml:"rope_scaling"`
 	YarnExtFactor  float32 `yaml:"yarn_ext_factor"`
 	YarnAttnFactor float32 `yaml:"yarn_attn_factor"`
 	YarnBetaFast   float32 `yaml:"yarn_beta_fast"`
 	YarnBetaSlow   float32 `yaml:"yarn_beta_slow"`
 }
 type AutoGPTQ struct {
 	ModelBaseName    string `yaml:"model_base_name"`
 	Device           string `yaml:"device"`
 	Triton           bool   `yaml:"triton"`
 	UseFastTokenizer bool   `yaml:"use_fast_tokenizer"`
 }
 type Functions struct {
 	DisableNoAction         bool   `yaml:"disable_no_action"`
 	NoActionFunctionName    string `yaml:"no_action_function_name"`
 	NoActionDescriptionName string `yaml:"no_action_description_name"`
 }
 type TemplateConfig struct {
 	Chat        string `yaml:"chat"`
 	ChatMessage string `yaml:"chat_message"`
 	Completion  string `yaml:"completion"`
 	Edit        string `yaml:"edit"`
 	Functions   string `yaml:"function"`
 }
 type ConfigLoader struct {
 	configs map[string]Config
 	sync.Mutex
 }
 func (c *Config) SetFunctionCallString(s string) {
 	c.functionCallString = s
 }
 func (c *Config) SetFunctionCallNameString(s string) {
 	c.functionCallNameString = s
 }
 func (c *Config) ShouldUseFunctions() bool {
 	return ((c.functionCallString != "none" || c.functionCallString == "") || c.ShouldCallSpecificFunction())
 }
 func (c *Config) ShouldCallSpecificFunction() bool {
 	return len(c.functionCallNameString) > 0
 }
 func (c *Config) FunctionToCall() string {
 	return c.functionCallNameString
 }
 func defaultPredictOptions(modelFile string) PredictionOptions {
 	return PredictionOptions{
 		TopP:        0.7,
 		TopK:        80,
 		Maxtokens:   512,
 		Temperature: 0.9,
 		Model:       modelFile,
 	}
 }
 func DefaultConfig(modelFile string) *Config {
 	return &Config{
 		PredictionOptions: defaultPredictOptions(modelFile),
 	}
 }
 func NewConfigLoader() *ConfigLoader {
 	return &ConfigLoader{
 		configs: make(map[string]Config),
 	}
 }
 func ReadConfigFile(file string) ([]*Config, error) {
 	c := &[]*Config{}
 	f, err := os.ReadFile(file)
 	if err != nil {
 		return nil, fmt.Errorf("cannot read config file: %w", err)
 	}
 	if err := yaml.Unmarshal(f, c); err != nil {
 		return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
 	}
 	return *c, nil
 }
 func ReadConfig(file string) (*Config, error) {
 	c := &Config{}
 	f, err := os.ReadFile(file)
 	if err != nil {
 		return nil, fmt.Errorf("cannot read config file: %w", err)
 	}
 	if err := yaml.Unmarshal(f, c); err != nil {
 		return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
 	}
 	return c, nil
 }
 func (cm *ConfigLoader) LoadConfigFile(file string) error {
 	cm.Lock()
 	defer cm.Unlock()
 	c, err := ReadConfigFile(file)
 	if err != nil {
 		return fmt.Errorf("cannot load config file: %w", err)
 	}
 	for _, cc := range c {
 		cm.configs[cc.Name] = *cc
 	}
 	return nil
 }
 func (cm *ConfigLoader) LoadConfig(file string) error {
 	cm.Lock()
 	defer cm.Unlock()
 	c, err := ReadConfig(file)
 	if err != nil {
 		return fmt.Errorf("cannot read config file: %w", err)
 	}
 	cm.configs[c.Name] = *c
 	return nil
 }
 func (cm *ConfigLoader) GetConfig(m string) (Config, bool) {
 	cm.Lock()
 	defer cm.Unlock()
 	v, exists := cm.configs[m]
 	return v, exists
 }
 func (cm *ConfigLoader) GetAllConfigs() []Config {
 	cm.Lock()
 	defer cm.Unlock()
 	var res []Config
 	for _, v := range cm.configs {
 		res = append(res, v)
 	}
 	return res
 }
 func (cm *ConfigLoader) ListConfigs() []string {
 	cm.Lock()
 	defer cm.Unlock()
 	var res []string
 	for k := range cm.configs {
 		res = append(res, k)
 	}
 	return res
 }
 // Preload prepare models if they are not local but url or huggingface repositories
 func (cm *ConfigLoader) Preload(modelPath string) error {
 	cm.Lock()
 	defer cm.Unlock()
 	status := func(fileName, current, total string, percent float64) {
 		utils.DisplayDownloadFunction(fileName, current, total, percent)
 	}
 	log.Info().Msgf("Preloading models from %s", modelPath)
 	for i, config := range cm.configs {
 		// Download files and verify their SHA
 		for _, file := range config.DownloadFiles {
 			log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
 			if err := utils.VerifyPath(file.Filename, modelPath); err != nil {
 				return err
 			}
 			// Create file path
 			filePath := filepath.Join(modelPath, file.Filename)
 			if err := utils.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil {
 				return err
 			}
 		}
 		modelURL := config.PredictionOptions.Model
 		modelURL = utils.ConvertURL(modelURL)
 		if utils.LooksLikeURL(modelURL) {
 			// md5 of model name
 			md5Name := utils.MD5(modelURL)
 			// check if file exists
 			if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
 				err := utils.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status)
 				if err != nil {
 					return err
 				}
 			}
 			cc := cm.configs[i]
 			c := &cc
 			c.PredictionOptions.Model = md5Name
 			cm.configs[i] = *c
 		}
 	}
 	return nil
 }
 func (cm *ConfigLoader) LoadConfigs(path string) error {
 	cm.Lock()
 	defer cm.Unlock()
 	entries, err := os.ReadDir(path)
 	if err != nil {
 		return err
 	}
 	files := make([]fs.FileInfo, 0, len(entries))
 	for _, entry := range entries {
 		info, err := entry.Info()
 		if err != nil {
 			return err
 		}
 		files = append(files, info)
 	}
 	for _, file := range files {
 		// Skip templates, YAML and .keep files
 		if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") {
 			continue
 		}
 		c, err := ReadConfig(filepath.Join(path, file.Name()))
 		if err == nil {
 			cm.configs[c.Name] = *c
 		}
 	}
 	return nil
 }
--- a/api/config/config_test.go
+++ b/api/config/config_test.go
@ -0,0 +1,56 @@
 package api_config_test
 import (
 	"os"
 	. "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/pkg/model"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
 var _ = Describe("Test cases for config related functions", func() {
 	var (
 		configFile string
 	)
 	Context("Test Read configuration functions", func() {
 		configFile = os.Getenv("CONFIG_FILE")
 		It("Test ReadConfigFile", func() {
 			config, err := ReadConfigFile(configFile)
 			Expect(err).To(BeNil())
 			Expect(config).ToNot(BeNil())
 			// two configs in config.yaml
 			Expect(config[0].Name).To(Equal("list1"))
 			Expect(config[1].Name).To(Equal("list2"))
 		})
 		It("Test LoadConfigs", func() {
 			cm := NewConfigLoader()
 			opts := options.NewOptions()
 			modelLoader := model.NewModelLoader(os.Getenv("MODELS_PATH"))
 			options.WithModelLoader(modelLoader)(opts)
 			err := cm.LoadConfigs(opts.Loader.ModelPath)
 			Expect(err).To(BeNil())
 			Expect(cm.ListConfigs()).ToNot(BeNil())
 			// config should includes gpt4all models's api.config
 			Expect(cm.ListConfigs()).To(ContainElements("gpt4all"))
 			// config should includes gpt2 models's api.config
 			Expect(cm.ListConfigs()).To(ContainElements("gpt4all-2"))
 			// config should includes text-embedding-ada-002 models's api.config
 			Expect(cm.ListConfigs()).To(ContainElements("text-embedding-ada-002"))
 			// config should includes rwkv_test models's api.config
 			Expect(cm.ListConfigs()).To(ContainElements("rwkv_test"))
 			// config should includes whisper-1 models's api.config
 			Expect(cm.ListConfigs()).To(ContainElements("whisper-1"))
 		})
 	})
 })
--- a/core/schema/prediction.go
+++ b/core/schema/prediction.go
@ -1,46 +1,46 @@
-package schema
+package api_config
 type PredictionOptions struct {
 	// Also part of the OpenAI official spec
-	BasicModelRequest `yaml:",inline"`
+	Model string `json:"model" yaml:"model"`
 	// Also part of the OpenAI official spec
 	Language string `json:"language"`
 	// Only for audio transcription
 	Translate bool `json:"translate"`
 	// Also part of the OpenAI official spec. use it for returning multiple results
 	N int `json:"n"`
 	// Common options between all the API calls, part of the OpenAI spec
-	TopP        *float64 `json:"top_p" yaml:"top_p"`
+	TopP        float64 `json:"top_p" yaml:"top_p"`
-	TopK        *int     `json:"top_k" yaml:"top_k"`
+	TopK        int     `json:"top_k" yaml:"top_k"`
-	Temperature *float64 `json:"temperature" yaml:"temperature"`
+	Temperature float64 `json:"temperature" yaml:"temperature"`
-	Maxtokens   *int     `json:"max_tokens" yaml:"max_tokens"`
+	Maxtokens   int     `json:"max_tokens" yaml:"max_tokens"`
-	Echo        bool     `json:"echo"`
+	Echo        bool    `json:"echo"`
 	// Custom parameters - not present in the OpenAI API
 	Batch         int     `json:"batch" yaml:"batch"`
 	F16           bool    `json:"f16" yaml:"f16"`
 	IgnoreEOS     bool    `json:"ignore_eos" yaml:"ignore_eos"`
 	RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
 	Keep          int     `json:"n_keep" yaml:"n_keep"`
-	RepeatLastN int `json:"repeat_last_n" yaml:"repeat_last_n"`
+	MirostatETA float64 `json:"mirostat_eta" yaml:"mirostat_eta"`
 	MirostatTAU float64 `json:"mirostat_tau" yaml:"mirostat_tau"`
 	Mirostat    int     `json:"mirostat" yaml:"mirostat"`
-	Keep int `json:"n_keep" yaml:"n_keep"`
+	FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
 	TFZ              float64 `json:"tfz" yaml:"tfz"`
-	FrequencyPenalty float64  `json:"frequency_penalty" yaml:"frequency_penalty"`
+	TypicalP float64 `json:"typical_p" yaml:"typical_p"`
-	PresencePenalty  float64  `json:"presence_penalty" yaml:"presence_penalty"`
+	Seed     int     `json:"seed" yaml:"seed"`
 	TFZ              *float64 `json:"tfz" yaml:"tfz"`
 	TypicalP *float64 `json:"typical_p" yaml:"typical_p"`
 	Seed     *int     `json:"seed" yaml:"seed"`
 	NegativePrompt      string  `json:"negative_prompt" yaml:"negative_prompt"`
 	RopeFreqBase        float32 `json:"rope_freq_base" yaml:"rope_freq_base"`
 	RopeFreqScale       float32 `json:"rope_freq_scale" yaml:"rope_freq_scale"`
 	NegativePromptScale float32 `json:"negative_prompt_scale" yaml:"negative_prompt_scale"`
 	// AutoGPTQ
 	UseFastTokenizer bool `json:"use_fast_tokenizer" yaml:"use_fast_tokenizer"`
 	// Diffusers
 	ClipSkip int `json:"clip_skip" yaml:"clip_skip"`
--- a/api/localai/backend_monitor.go
+++ b/api/localai/backend_monitor.go
@ -0,0 +1,162 @@
 package localai
 import (
 	"context"
 	"fmt"
 	"strings"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/gofiber/fiber/v2"
 	"github.com/rs/zerolog/log"
 	gopsutil "github.com/shirou/gopsutil/v3/process"
 )
 type BackendMonitorRequest struct {
 	Model string `json:"model" yaml:"model"`
 }
 type BackendMonitorResponse struct {
 	MemoryInfo    *gopsutil.MemoryInfoStat
 	MemoryPercent float32
 	CPUPercent    float64
 }
 type BackendMonitor struct {
 	configLoader *config.ConfigLoader
 	options      *options.Option // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name.
 }
 func NewBackendMonitor(configLoader *config.ConfigLoader, options *options.Option) BackendMonitor {
 	return BackendMonitor{
 		configLoader: configLoader,
 		options:      options,
 	}
 }
 func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*BackendMonitorResponse, error) {
 	config, exists := bm.configLoader.GetConfig(model)
 	var backend string
 	if exists {
 		backend = config.Model
 	} else {
 		// Last ditch effort: use it raw, see if a backend happens to match.
 		backend = model
 	}
 	if !strings.HasSuffix(backend, ".bin") {
 		backend = fmt.Sprintf("%s.bin", backend)
 	}
 	pid, err := bm.options.Loader.GetGRPCPID(backend)
 	if err != nil {
 		log.Error().Msgf("model %s : failed to find pid %+v", model, err)
 		return nil, err
 	}
 	// Name is slightly frightening but this does _not_ create a new process, rather it looks up an existing process by PID.
 	backendProcess, err := gopsutil.NewProcess(int32(pid))
 	if err != nil {
 		log.Error().Msgf("model %s [PID %d] : error getting process info %+v", model, pid, err)
 		return nil, err
 	}
 	memInfo, err := backendProcess.MemoryInfo()
 	if err != nil {
 		log.Error().Msgf("model %s [PID %d] : error getting memory info %+v", model, pid, err)
 		return nil, err
 	}
 	memPercent, err := backendProcess.MemoryPercent()
 	if err != nil {
 		log.Error().Msgf("model %s [PID %d] : error getting memory percent %+v", model, pid, err)
 		return nil, err
 	}
 	cpuPercent, err := backendProcess.CPUPercent()
 	if err != nil {
 		log.Error().Msgf("model %s [PID %d] : error getting cpu percent %+v", model, pid, err)
 		return nil, err
 	}
 	return &BackendMonitorResponse{
 		MemoryInfo:    memInfo,
 		MemoryPercent: memPercent,
 		CPUPercent:    cpuPercent,
 	}, nil
 }
 func (bm BackendMonitor) getModelLoaderIDFromCtx(c *fiber.Ctx) (string, error) {
 	input := new(BackendMonitorRequest)
 	// Get input data from the request body
 	if err := c.BodyParser(input); err != nil {
 		return "", err
 	}
 	config, exists := bm.configLoader.GetConfig(input.Model)
 	var backendId string
 	if exists {
 		backendId = config.Model
 	} else {
 		// Last ditch effort: use it raw, see if a backend happens to match.
 		backendId = input.Model
 	}
 	if !strings.HasSuffix(backendId, ".bin") {
 		backendId = fmt.Sprintf("%s.bin", backendId)
 	}
 	return backendId, nil
 }
 func BackendMonitorEndpoint(bm BackendMonitor) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		backendId, err := bm.getModelLoaderIDFromCtx(c)
 		if err != nil {
 			return err
 		}
 		model := bm.options.Loader.CheckIsLoaded(backendId)
 		if model == "" {
 			return fmt.Errorf("backend %s is not currently loaded", backendId)
 		}
 		status, rpcErr := model.GRPC(false, nil).Status(context.TODO())
 		if rpcErr != nil {
 			log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error())
 			val, slbErr := bm.SampleLocalBackendProcess(backendId)
 			if slbErr != nil {
 				return fmt.Errorf("backend %s experienced an error retrieving status info via rpc: %s, then failed local node process sample: %s", backendId, rpcErr.Error(), slbErr.Error())
 			}
 			return c.JSON(proto.StatusResponse{
 				State: proto.StatusResponse_ERROR,
 				Memory: &proto.MemoryUsageData{
 					Total: val.MemoryInfo.VMS,
 					Breakdown: map[string]uint64{
 						"gopsutil-RSS": val.MemoryInfo.RSS,
 					},
 				},
 			})
 		}
 		return c.JSON(status)
 	}
 }
 func BackendShutdownEndpoint(bm BackendMonitor) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		backendId, err := bm.getModelLoaderIDFromCtx(c)
 		if err != nil {
 			return err
 		}
 		return bm.options.Loader.ShutdownModel(backendId)
 	}
 }
--- a/api/localai/gallery.go
+++ b/api/localai/gallery.go
@ -0,0 +1,326 @@
 package localai
 import (
 	"context"
 	"fmt"
 	"os"
 	"slices"
 	"strings"
 	"sync"
 	json "github.com/json-iterator/go"
 	"gopkg.in/yaml.v3"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/pkg/gallery"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
 	"github.com/rs/zerolog/log"
 )
 type galleryOp struct {
 	req         gallery.GalleryModel
 	id          string
 	galleries   []gallery.Gallery
 	galleryName string
 }
 type galleryOpStatus struct {
 	FileName           string  `json:"file_name"`
 	Error              error   `json:"error"`
 	Processed          bool    `json:"processed"`
 	Message            string  `json:"message"`
 	Progress           float64 `json:"progress"`
 	TotalFileSize      string  `json:"file_size"`
 	DownloadedFileSize string  `json:"downloaded_size"`
 }
 type galleryApplier struct {
 	modelPath string
 	sync.Mutex
 	C        chan galleryOp
 	statuses map[string]*galleryOpStatus
 }
 func NewGalleryService(modelPath string) *galleryApplier {
 	return &galleryApplier{
 		modelPath: modelPath,
 		C:         make(chan galleryOp),
 		statuses:  make(map[string]*galleryOpStatus),
 	}
 }
 func prepareModel(modelPath string, req gallery.GalleryModel, cm *config.ConfigLoader, downloadStatus func(string, string, string, float64)) error {
 	config, err := gallery.GetGalleryConfigFromURL(req.URL)
 	if err != nil {
 		return err
 	}
 	config.Files = append(config.Files, req.AdditionalFiles...)
 	return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus)
 }
 func (g *galleryApplier) updateStatus(s string, op *galleryOpStatus) {
 	g.Lock()
 	defer g.Unlock()
 	g.statuses[s] = op
 }
 func (g *galleryApplier) getStatus(s string) *galleryOpStatus {
 	g.Lock()
 	defer g.Unlock()
 	return g.statuses[s]
 }
 func (g *galleryApplier) getAllStatus() map[string]*galleryOpStatus {
 	g.Lock()
 	defer g.Unlock()
 	return g.statuses
 }
 func (g *galleryApplier) Start(c context.Context, cm *config.ConfigLoader) {
 	go func() {
 		for {
 			select {
 			case <-c.Done():
 				return
 			case op := <-g.C:
 				utils.ResetDownloadTimers()
 				g.updateStatus(op.id, &galleryOpStatus{Message: "processing", Progress: 0})
 				// updates the status with an error
 				updateError := func(e error) {
 					g.updateStatus(op.id, &galleryOpStatus{Error: e, Processed: true, Message: "error: " + e.Error()})
 				}
 				// displayDownload displays the download progress
 				progressCallback := func(fileName string, current string, total string, percentage float64) {
 					g.updateStatus(op.id, &galleryOpStatus{Message: "processing", FileName: fileName, Progress: percentage, TotalFileSize: total, DownloadedFileSize: current})
 					utils.DisplayDownloadFunction(fileName, current, total, percentage)
 				}
 				var err error
 				// if the request contains a gallery name, we apply the gallery from the gallery list
 				if op.galleryName != "" {
 					if strings.Contains(op.galleryName, "@") {
 						err = gallery.InstallModelFromGallery(op.galleries, op.galleryName, g.modelPath, op.req, progressCallback)
 					} else {
 						err = gallery.InstallModelFromGalleryByName(op.galleries, op.galleryName, g.modelPath, op.req, progressCallback)
 					}
 				} else {
 					err = prepareModel(g.modelPath, op.req, cm, progressCallback)
 				}
 				if err != nil {
 					updateError(err)
 					continue
 				}
 				// Reload models
 				err = cm.LoadConfigs(g.modelPath)
 				if err != nil {
 					updateError(err)
 					continue
 				}
 				err = cm.Preload(g.modelPath)
 				if err != nil {
 					updateError(err)
 					continue
 				}
 				g.updateStatus(op.id, &galleryOpStatus{Processed: true, Message: "completed", Progress: 100})
 			}
 		}
 	}()
 }
 type galleryModel struct {
 	gallery.GalleryModel `yaml:",inline"` // https://github.com/go-yaml/yaml/issues/63
 	ID                   string           `json:"id"`
 }
 func processRequests(modelPath, s string, cm *config.ConfigLoader, galleries []gallery.Gallery, requests []galleryModel) error {
 	var err error
 	for _, r := range requests {
 		utils.ResetDownloadTimers()
 		if r.ID == "" {
 			err = prepareModel(modelPath, r.GalleryModel, cm, utils.DisplayDownloadFunction)
 		} else {
 			if strings.Contains(r.ID, "@") {
 				err = gallery.InstallModelFromGallery(
 					galleries, r.ID, modelPath, r.GalleryModel, utils.DisplayDownloadFunction)
 			} else {
 				err = gallery.InstallModelFromGalleryByName(
 					galleries, r.ID, modelPath, r.GalleryModel, utils.DisplayDownloadFunction)
 			}
 		}
 	}
 	return err
 }
 func ApplyGalleryFromFile(modelPath, s string, cm *config.ConfigLoader, galleries []gallery.Gallery) error {
 	dat, err := os.ReadFile(s)
 	if err != nil {
 		return err
 	}
 	var requests []galleryModel
 	if err := yaml.Unmarshal(dat, &requests); err != nil {
 		return err
 	}
 	return processRequests(modelPath, s, cm, galleries, requests)
 }
 func ApplyGalleryFromString(modelPath, s string, cm *config.ConfigLoader, galleries []gallery.Gallery) error {
 	var requests []galleryModel
 	err := json.Unmarshal([]byte(s), &requests)
 	if err != nil {
 		return err
 	}
 	return processRequests(modelPath, s, cm, galleries, requests)
 }
 /// Endpoint Service
 type ModelGalleryService struct {
 	galleries      []gallery.Gallery
 	modelPath      string
 	galleryApplier *galleryApplier
 }
 type GalleryModel struct {
 	ID string `json:"id"`
 	gallery.GalleryModel
 }
 func CreateModelGalleryService(galleries []gallery.Gallery, modelPath string, galleryApplier *galleryApplier) ModelGalleryService {
 	return ModelGalleryService{
 		galleries:      galleries,
 		modelPath:      modelPath,
 		galleryApplier: galleryApplier,
 	}
 }
 func (mgs *ModelGalleryService) GetOpStatusEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		status := mgs.galleryApplier.getStatus(c.Params("uuid"))
 		if status == nil {
 			return fmt.Errorf("could not find any status for ID")
 		}
 		return c.JSON(status)
 	}
 }
 func (mgs *ModelGalleryService) GetAllStatusEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		return c.JSON(mgs.galleryApplier.getAllStatus())
 	}
 }
 func (mgs *ModelGalleryService) ApplyModelGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		input := new(GalleryModel)
 		// Get input data from the request body
 		if err := c.BodyParser(input); err != nil {
 			return err
 		}
 		uuid, err := uuid.NewUUID()
 		if err != nil {
 			return err
 		}
 		mgs.galleryApplier.C <- galleryOp{
 			req:         input.GalleryModel,
 			id:          uuid.String(),
 			galleryName: input.ID,
 			galleries:   mgs.galleries,
 		}
 		return c.JSON(struct {
 			ID        string `json:"uuid"`
 			StatusURL string `json:"status"`
 		}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
 	}
 }
 func (mgs *ModelGalleryService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
 		models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath)
 		if err != nil {
 			return err
 		}
 		log.Debug().Msgf("Models found from galleries: %+v", models)
 		for _, m := range models {
 			log.Debug().Msgf("Model found from galleries: %+v", m)
 		}
 		dat, err := json.Marshal(models)
 		if err != nil {
 			return err
 		}
 		return c.Send(dat)
 	}
 }
 // NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents!
 func (mgs *ModelGalleryService) ListModelGalleriesEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		log.Debug().Msgf("Listing model galleries %+v", mgs.galleries)
 		dat, err := json.Marshal(mgs.galleries)
 		if err != nil {
 			return err
 		}
 		return c.Send(dat)
 	}
 }
 func (mgs *ModelGalleryService) AddModelGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		input := new(gallery.Gallery)
 		// Get input data from the request body
 		if err := c.BodyParser(input); err != nil {
 			return err
 		}
 		if slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
 			return gallery.Name == input.Name
 		}) {
 			return fmt.Errorf("%s already exists", input.Name)
 		}
 		dat, err := json.Marshal(mgs.galleries)
 		if err != nil {
 			return err
 		}
 		log.Debug().Msgf("Adding %+v to gallery list", *input)
 		mgs.galleries = append(mgs.galleries, *input)
 		return c.Send(dat)
 	}
 }
 func (mgs *ModelGalleryService) RemoveModelGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		input := new(gallery.Gallery)
 		// Get input data from the request body
 		if err := c.BodyParser(input); err != nil {
 			return err
 		}
 		if !slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
 			return gallery.Name == input.Name
 		}) {
 			return fmt.Errorf("%s is not currently registered", input.Name)
 		}
 		mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
 			return gallery.Name == input.Name
 		})
 		return c.Send(nil)
 	}
 }
--- a/api/localai/localai.go
+++ b/api/localai/localai.go
@ -0,0 +1,32 @@
 package localai
 import (
 	"github.com/go-skynet/LocalAI/api/backend"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/gofiber/fiber/v2"
 )
 type TTSRequest struct {
 	Model   string `json:"model" yaml:"model"`
 	Input   string `json:"input" yaml:"input"`
 	Backend string `json:"backend" yaml:"backend"`
 }
 func TTSEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		input := new(TTSRequest)
 		// Get input data from the request body
 		if err := c.BodyParser(input); err != nil {
 			return err
 		}
 		filePath, _, err := backend.ModelTTS(input.Backend, input.Input, input.Model, o.Loader, o)
 		if err != nil {
 			return err
 		}
 		return c.Download(filePath)
 	}
 }
--- a/api/openai/chat.go
+++ b/api/openai/chat.go
@ -0,0 +1,399 @@
 package openai
 import (
 	"bufio"
 	"bytes"
 	"encoding/json"
 	"fmt"
 	"strings"
 	"time"
 	"github.com/go-skynet/LocalAI/api/backend"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/api/schema"
 	"github.com/go-skynet/LocalAI/pkg/grammar"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
 )
 func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
 	emptyMessage := ""
 	id := uuid.New().String()
 	created := int(time.Now().Unix())
 	process := func(s string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
 		initialMessage := schema.OpenAIResponse{
 			ID:      id,
 			Created: created,
 			Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
 			Object:  "chat.completion.chunk",
 		}
 		responses <- initialMessage
 		ComputeChoices(req, s, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
 			resp := schema.OpenAIResponse{
 				ID:      id,
 				Created: created,
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
 				Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
 				Object:  "chat.completion.chunk",
 				Usage: schema.OpenAIUsage{
 					PromptTokens:     usage.Prompt,
 					CompletionTokens: usage.Completion,
 					TotalTokens:      usage.Prompt + usage.Completion,
 				},
 			}
 			responses <- resp
 			return true
 		})
 		close(responses)
 	}
 	return func(c *fiber.Ctx) error {
 		processFunctions := false
 		funcs := grammar.Functions{}
 		modelFile, input, err := readInput(c, o, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		log.Debug().Msgf("Configuration read: %+v", config)
 		// Allow the user to set custom actions via config file
 		// to be "embedded" in each model
 		noActionName := "answer"
 		noActionDescription := "use this action to answer without performing any action"
 		if config.FunctionsConfig.NoActionFunctionName != "" {
 			noActionName = config.FunctionsConfig.NoActionFunctionName
 		}
 		if config.FunctionsConfig.NoActionDescriptionName != "" {
 			noActionDescription = config.FunctionsConfig.NoActionDescriptionName
 		}
 		if input.ResponseFormat.Type == "json_object" {
 			input.Grammar = grammar.JSONBNF
 		}
 		// process functions if we have any defined or if we have a function call string
 		if len(input.Functions) > 0 && config.ShouldUseFunctions() {
 			log.Debug().Msgf("Response needs to process functions")
 			processFunctions = true
 			noActionGrammar := grammar.Function{
 				Name:        noActionName,
 				Description: noActionDescription,
 				Parameters: map[string]interface{}{
 					"properties": map[string]interface{}{
 						"message": map[string]interface{}{
 							"type":        "string",
 							"description": "The message to reply the user with",
 						}},
 				},
 			}
 			// Append the no action function
 			funcs = append(funcs, input.Functions...)
 			if !config.FunctionsConfig.DisableNoAction {
 				funcs = append(funcs, noActionGrammar)
 			}
 			// Force picking one of the functions by the request
 			if config.FunctionToCall() != "" {
 				funcs = funcs.Select(config.FunctionToCall())
 			}
 			// Update input grammar
 			jsStruct := funcs.ToJSONStructure()
 			config.Grammar = jsStruct.Grammar("")
 		} else if input.JSONFunctionGrammarObject != nil {
 			config.Grammar = input.JSONFunctionGrammarObject.Grammar("")
 		}
 		// functions are not supported in stream mode (yet?)
 		toStream := input.Stream && !processFunctions
 		log.Debug().Msgf("Parameters: %+v", config)
 		var predInput string
 		suppressConfigSystemPrompt := false
 		mess := []string{}
 		for messageIndex, i := range input.Messages {
 			var content string
 			role := i.Role
 			// if function call, we might want to customize the role so we can display better that the "assistant called a json action"
 			// if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
 			if i.FunctionCall != nil && i.Role == "assistant" {
 				roleFn := "assistant_function_call"
 				r := config.Roles[roleFn]
 				if r != "" {
 					role = roleFn
 				}
 			}
 			r := config.Roles[role]
 			contentExists := i.Content != nil && i.StringContent != ""
 			// First attempt to populate content via a chat message specific template
 			if config.TemplateConfig.ChatMessage != "" {
 				chatMessageData := model.ChatMessageTemplateData{
 					SystemPrompt: config.SystemPrompt,
 					Role:         r,
 					RoleName:     role,
 					Content:      i.StringContent,
 					MessageIndex: messageIndex,
 				}
 				templatedChatMessage, err := o.Loader.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
 				if err != nil {
 					log.Error().Msgf("error processing message %+v using template \"%s\": %v. Skipping!", chatMessageData, config.TemplateConfig.ChatMessage, err)
 				} else {
 					if templatedChatMessage == "" {
 						log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData)
 						continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
 					}
 					log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
 					content = templatedChatMessage
 				}
 			}
 			// If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
 			if content == "" {
 				if r != "" {
 					if contentExists {
 						content = fmt.Sprint(r, i.StringContent)
 					}
 					if i.FunctionCall != nil {
 						j, err := json.Marshal(i.FunctionCall)
 						if err == nil {
 							if contentExists {
 								content += "\n" + fmt.Sprint(r, " ", string(j))
 							} else {
 								content = fmt.Sprint(r, " ", string(j))
 							}
 						}
 					}
 				} else {
 					if contentExists {
 						content = fmt.Sprint(i.StringContent)
 					}
 					if i.FunctionCall != nil {
 						j, err := json.Marshal(i.FunctionCall)
 						if err == nil {
 							if contentExists {
 								content += "\n" + string(j)
 							} else {
 								content = string(j)
 							}
 						}
 					}
 				}
 				// Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
 				if contentExists && role == "system" {
 					suppressConfigSystemPrompt = true
 				}
 			}
 			mess = append(mess, content)
 		}
 		predInput = strings.Join(mess, "\n")
 		log.Debug().Msgf("Prompt (before templating): %s", predInput)
 		if toStream {
 			log.Debug().Msgf("Stream request received")
 			c.Context().SetContentType("text/event-stream")
 			//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
 			//	c.Set("Content-Type", "text/event-stream")
 			c.Set("Cache-Control", "no-cache")
 			c.Set("Connection", "keep-alive")
 			c.Set("Transfer-Encoding", "chunked")
 		}
 		templateFile := ""
 		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
 		if o.Loader.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
 			templateFile = config.Model
 		}
 		if config.TemplateConfig.Chat != "" && !processFunctions {
 			templateFile = config.TemplateConfig.Chat
 		}
 		if config.TemplateConfig.Functions != "" && processFunctions {
 			templateFile = config.TemplateConfig.Functions
 		}
 		if templateFile != "" {
 			templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
 				SystemPrompt:         config.SystemPrompt,
 				SuppressSystemPrompt: suppressConfigSystemPrompt,
 				Input:                predInput,
 				Functions:            funcs,
 			})
 			if err == nil {
 				predInput = templatedInput
 				log.Debug().Msgf("Template found, input modified to: %s", predInput)
 			} else {
 				log.Debug().Msgf("Template failed loading: %s", err.Error())
 			}
 		}
 		log.Debug().Msgf("Prompt (after templating): %s", predInput)
 		if processFunctions {
 			log.Debug().Msgf("Grammar: %+v", config.Grammar)
 		}
 		if toStream {
 			responses := make(chan schema.OpenAIResponse)
 			go process(predInput, input, config, o.Loader, responses)
 			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
 				usage := &schema.OpenAIUsage{}
 				for ev := range responses {
 					usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it
 					var buf bytes.Buffer
 					enc := json.NewEncoder(&buf)
 					enc.Encode(ev)
 					log.Debug().Msgf("Sending chunk: %s", buf.String())
 					_, err := fmt.Fprintf(w, "data: %v\n", buf.String())
 					if err != nil {
 						log.Debug().Msgf("Sending chunk failed: %v", err)
 						input.Cancel()
 						break
 					}
 					w.Flush()
 				}
 				resp := &schema.OpenAIResponse{
 					ID:      id,
 					Created: created,
 					Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 					Choices: []schema.Choice{
 						{
 							FinishReason: "stop",
 							Index:        0,
 							Delta:        &schema.Message{Content: &emptyMessage},
 						}},
 					Object: "chat.completion.chunk",
 					Usage:  *usage,
 				}
 				respData, _ := json.Marshal(resp)
 				w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
 				w.WriteString("data: [DONE]\n\n")
 				w.Flush()
 			}))
 			return nil
 		}
 		result, tokenUsage, err := ComputeChoices(input, predInput, config, o, o.Loader, func(s string, c *[]schema.Choice) {
 			if processFunctions {
 				// As we have to change the result before processing, we can't stream the answer (yet?)
 				ss := map[string]interface{}{}
 				// This prevent newlines to break JSON parsing for clients
 				s = utils.EscapeNewLines(s)
 				json.Unmarshal([]byte(s), &ss)
 				log.Debug().Msgf("Function return: %s %+v", s, ss)
 				// The grammar defines the function name as "function", while OpenAI returns "name"
 				func_name := ss["function"]
 				// Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
 				args := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
 				d, _ := json.Marshal(args)
 				ss["arguments"] = string(d)
 				ss["name"] = func_name
 				// if do nothing, reply with a message
 				if func_name == noActionName {
 					log.Debug().Msgf("nothing to do, computing a reply")
 					// If there is a message that the LLM already sends as part of the JSON reply, use it
 					arguments := map[string]interface{}{}
 					json.Unmarshal([]byte(d), &arguments)
 					m, exists := arguments["message"]
 					if exists {
 						switch message := m.(type) {
 						case string:
 							if message != "" {
 								log.Debug().Msgf("Reply received from LLM: %s", message)
 								message = backend.Finetune(*config, predInput, message)
 								log.Debug().Msgf("Reply received from LLM(finetuned): %s", message)
 								*c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &message}})
 								return
 							}
 						}
 					}
 					log.Debug().Msgf("No action received from LLM, without a message, computing a reply")
 					// Otherwise ask the LLM to understand the JSON output and the context, and return a message
 					// Note: This costs (in term of CPU) another computation
 					config.Grammar = ""
 					images := []string{}
 					for _, m := range input.Messages {
 						images = append(images, m.StringImages...)
 					}
 					predFunc, err := backend.ModelInference(input.Context, predInput, images, o.Loader, *config, o, nil)
 					if err != nil {
 						log.Error().Msgf("inference error: %s", err.Error())
 						return
 					}
 					prediction, err := predFunc()
 					if err != nil {
 						log.Error().Msgf("inference error: %s", err.Error())
 						return
 					}
 					fineTunedResponse := backend.Finetune(*config, predInput, prediction.Response)
 					*c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &fineTunedResponse}})
 				} else {
 					// otherwise reply with the function call
 					*c = append(*c, schema.Choice{
 						FinishReason: "function_call",
 						Message:      &schema.Message{Role: "assistant", FunctionCall: ss},
 					})
 				}
 				return
 			}
 			*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
 		}, nil)
 		if err != nil {
 			return err
 		}
 		resp := &schema.OpenAIResponse{
 			ID:      id,
 			Created: created,
 			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Choices: result,
 			Object:  "chat.completion",
 			Usage: schema.OpenAIUsage{
 				PromptTokens:     tokenUsage.Prompt,
 				CompletionTokens: tokenUsage.Completion,
 				TotalTokens:      tokenUsage.Prompt + tokenUsage.Completion,
 			},
 		}
 		respData, _ := json.Marshal(resp)
 		log.Debug().Msgf("Response: %s", respData)
 		// Return the prediction in the response body
 		return c.JSON(resp)
 	}
 }
--- a/api/openai/completion.go
+++ b/api/openai/completion.go
@ -0,0 +1,199 @@
 package openai
 import (
 	"bufio"
 	"bytes"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"time"
 	"github.com/go-skynet/LocalAI/api/backend"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/api/schema"
 	"github.com/go-skynet/LocalAI/pkg/grammar"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
 )
 // https://platform.openai.com/docs/api-reference/completions
 func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
 	id := uuid.New().String()
 	created := int(time.Now().Unix())
 	process := func(s string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
 		ComputeChoices(req, s, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
 			resp := schema.OpenAIResponse{
 				ID:      id,
 				Created: created,
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
 				Choices: []schema.Choice{
 					{
 						Index: 0,
 						Text:  s,
 					},
 				},
 				Object: "text_completion",
 				Usage: schema.OpenAIUsage{
 					PromptTokens:     usage.Prompt,
 					CompletionTokens: usage.Completion,
 					TotalTokens:      usage.Prompt + usage.Completion,
 				},
 			}
 			log.Debug().Msgf("Sending goroutine: %s", s)
 			responses <- resp
 			return true
 		})
 		close(responses)
 	}
 	return func(c *fiber.Ctx) error {
 		modelFile, input, err := readInput(c, o, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		log.Debug().Msgf("`input`: %+v", input)
 		config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		if input.ResponseFormat.Type == "json_object" {
 			input.Grammar = grammar.JSONBNF
 		}
 		log.Debug().Msgf("Parameter Config: %+v", config)
 		if input.Stream {
 			log.Debug().Msgf("Stream request received")
 			c.Context().SetContentType("text/event-stream")
 			//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
 			//c.Set("Content-Type", "text/event-stream")
 			c.Set("Cache-Control", "no-cache")
 			c.Set("Connection", "keep-alive")
 			c.Set("Transfer-Encoding", "chunked")
 		}
 		templateFile := ""
 		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
 		if o.Loader.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
 			templateFile = config.Model
 		}
 		if config.TemplateConfig.Completion != "" {
 			templateFile = config.TemplateConfig.Completion
 		}
 		if input.Stream {
 			if len(config.PromptStrings) > 1 {
 				return errors.New("cannot handle more than 1 `PromptStrings` when Streaming")
 			}
 			predInput := config.PromptStrings[0]
 			if templateFile != "" {
 				templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
 					Input: predInput,
 				})
 				if err == nil {
 					predInput = templatedInput
 					log.Debug().Msgf("Template found, input modified to: %s", predInput)
 				}
 			}
 			responses := make(chan schema.OpenAIResponse)
 			go process(predInput, input, config, o.Loader, responses)
 			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
 				for ev := range responses {
 					var buf bytes.Buffer
 					enc := json.NewEncoder(&buf)
 					enc.Encode(ev)
 					log.Debug().Msgf("Sending chunk: %s", buf.String())
 					fmt.Fprintf(w, "data: %v\n", buf.String())
 					w.Flush()
 				}
 				resp := &schema.OpenAIResponse{
 					ID:      id,
 					Created: created,
 					Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 					Choices: []schema.Choice{
 						{
 							Index:        0,
 							FinishReason: "stop",
 						},
 					},
 					Object: "text_completion",
 				}
 				respData, _ := json.Marshal(resp)
 				w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
 				w.WriteString("data: [DONE]\n\n")
 				w.Flush()
 			}))
 			return nil
 		}
 		var result []schema.Choice
 		totalTokenUsage := backend.TokenUsage{}
 		for k, i := range config.PromptStrings {
 			if templateFile != "" {
 				// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
 				templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
 					SystemPrompt: config.SystemPrompt,
 					Input:        i,
 				})
 				if err == nil {
 					i = templatedInput
 					log.Debug().Msgf("Template found, input modified to: %s", i)
 				}
 			}
 			r, tokenUsage, err := ComputeChoices(
 				input, i, config, o, o.Loader, func(s string, c *[]schema.Choice) {
 					*c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k})
 				}, nil)
 			if err != nil {
 				return err
 			}
 			totalTokenUsage.Prompt += tokenUsage.Prompt
 			totalTokenUsage.Completion += tokenUsage.Completion
 			result = append(result, r...)
 		}
 		resp := &schema.OpenAIResponse{
 			ID:      id,
 			Created: created,
 			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Choices: result,
 			Object:  "text_completion",
 			Usage: schema.OpenAIUsage{
 				PromptTokens:     totalTokenUsage.Prompt,
 				CompletionTokens: totalTokenUsage.Completion,
 				TotalTokens:      totalTokenUsage.Prompt + totalTokenUsage.Completion,
 			},
 		}
 		jsonResult, _ := json.Marshal(resp)
 		log.Debug().Msgf("Response: %s", jsonResult)
 		// Return the prediction in the response body
 		return c.JSON(resp)
 	}
 }
--- a/api/openai/edit.go
+++ b/api/openai/edit.go
@ -0,0 +1,94 @@
 package openai
 import (
 	"encoding/json"
 	"fmt"
 	"time"
 	"github.com/go-skynet/LocalAI/api/backend"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/api/schema"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
 	"github.com/rs/zerolog/log"
 )
 func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		modelFile, input, err := readInput(c, o, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		log.Debug().Msgf("Parameter Config: %+v", config)
 		templateFile := ""
 		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
 		if o.Loader.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
 			templateFile = config.Model
 		}
 		if config.TemplateConfig.Edit != "" {
 			templateFile = config.TemplateConfig.Edit
 		}
 		var result []schema.Choice
 		totalTokenUsage := backend.TokenUsage{}
 		for _, i := range config.InputStrings {
 			if templateFile != "" {
 				templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
 					Input:        i,
 					Instruction:  input.Instruction,
 					SystemPrompt: config.SystemPrompt,
 				})
 				if err == nil {
 					i = templatedInput
 					log.Debug().Msgf("Template found, input modified to: %s", i)
 				}
 			}
 			r, tokenUsage, err := ComputeChoices(input, i, config, o, o.Loader, func(s string, c *[]schema.Choice) {
 				*c = append(*c, schema.Choice{Text: s})
 			}, nil)
 			if err != nil {
 				return err
 			}
 			totalTokenUsage.Prompt += tokenUsage.Prompt
 			totalTokenUsage.Completion += tokenUsage.Completion
 			result = append(result, r...)
 		}
 		id := uuid.New().String()
 		created := int(time.Now().Unix())
 		resp := &schema.OpenAIResponse{
 			ID:      id,
 			Created: created,
 			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Choices: result,
 			Object:  "edit",
 			Usage: schema.OpenAIUsage{
 				PromptTokens:     totalTokenUsage.Prompt,
 				CompletionTokens: totalTokenUsage.Completion,
 				TotalTokens:      totalTokenUsage.Prompt + totalTokenUsage.Completion,
 			},
 		}
 		jsonResult, _ := json.Marshal(resp)
 		log.Debug().Msgf("Response: %s", jsonResult)
 		// Return the prediction in the response body
 		return c.JSON(resp)
 	}
 }
--- a/Show more
+++ b/Show more
`@ -1,2 +1 @@`
	`*.sh text eol=lf`	`*.sh text eol=lf`
	`backend/cpp/llama/*.hpp linguist-vendored`