diff --git a/examples/bruno/LocalAI Test Requests/Sound Generation/musicgen.bru b/.bruno/LocalAI Test Requests/Sound Generation/musicgen.bru
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/Sound Generation/musicgen.bru
rename to .bruno/LocalAI Test Requests/Sound Generation/musicgen.bru
diff --git a/examples/bruno/LocalAI Test Requests/backend monitor/backend monitor.bru b/.bruno/LocalAI Test Requests/backend monitor/backend monitor.bru
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/backend monitor/backend monitor.bru
rename to .bruno/LocalAI Test Requests/backend monitor/backend monitor.bru
diff --git a/examples/bruno/LocalAI Test Requests/backend monitor/backend-shutdown.bru b/.bruno/LocalAI Test Requests/backend monitor/backend-shutdown.bru
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/backend monitor/backend-shutdown.bru
rename to .bruno/LocalAI Test Requests/backend monitor/backend-shutdown.bru
diff --git a/examples/bruno/LocalAI Test Requests/bruno.json b/.bruno/LocalAI Test Requests/bruno.json
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/bruno.json
rename to .bruno/LocalAI Test Requests/bruno.json
diff --git a/examples/bruno/LocalAI Test Requests/environments/localhost.bru b/.bruno/LocalAI Test Requests/environments/localhost.bru
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/environments/localhost.bru
rename to .bruno/LocalAI Test Requests/environments/localhost.bru
diff --git a/examples/bruno/LocalAI Test Requests/get models list.bru b/.bruno/LocalAI Test Requests/get models list.bru
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/get models list.bru
rename to .bruno/LocalAI Test Requests/get models list.bru
diff --git a/examples/bruno/LocalAI Test Requests/image generation/Generate image.bru b/.bruno/LocalAI Test Requests/image generation/Generate image.bru
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/image generation/Generate image.bru
rename to .bruno/LocalAI Test Requests/image generation/Generate image.bru
diff --git a/examples/bruno/LocalAI Test Requests/llm text/-completions.bru b/.bruno/LocalAI Test Requests/llm text/-completions.bru
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/llm text/-completions.bru
rename to .bruno/LocalAI Test Requests/llm text/-completions.bru
diff --git a/examples/bruno/LocalAI Test Requests/llm text/-edits.bru b/.bruno/LocalAI Test Requests/llm text/-edits.bru
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/llm text/-edits.bru
rename to .bruno/LocalAI Test Requests/llm text/-edits.bru
diff --git a/examples/bruno/LocalAI Test Requests/llm text/-embeddings.bru b/.bruno/LocalAI Test Requests/llm text/-embeddings.bru
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/llm text/-embeddings.bru
rename to .bruno/LocalAI Test Requests/llm text/-embeddings.bru
diff --git a/examples/bruno/LocalAI Test Requests/llm text/chat/chat completion -simple- 1 message-.bru b/.bruno/LocalAI Test Requests/llm text/chat/chat completion -simple- 1 message-.bru
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/llm text/chat/chat completion -simple- 1 message-.bru
rename to .bruno/LocalAI Test Requests/llm text/chat/chat completion -simple- 1 message-.bru
diff --git a/examples/bruno/LocalAI Test Requests/llm text/chat/chat-completions -long-.bru b/.bruno/LocalAI Test Requests/llm text/chat/chat-completions -long-.bru
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/llm text/chat/chat-completions -long-.bru
rename to .bruno/LocalAI Test Requests/llm text/chat/chat-completions -long-.bru
diff --git a/examples/bruno/LocalAI Test Requests/llm text/chat/chat-completions -stream-.bru b/.bruno/LocalAI Test Requests/llm text/chat/chat-completions -stream-.bru
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/llm text/chat/chat-completions -stream-.bru
rename to .bruno/LocalAI Test Requests/llm text/chat/chat-completions -stream-.bru
diff --git a/examples/bruno/LocalAI Test Requests/model gallery/add model gallery.bru b/.bruno/LocalAI Test Requests/model gallery/add model gallery.bru
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/model gallery/add model gallery.bru
rename to .bruno/LocalAI Test Requests/model gallery/add model gallery.bru
diff --git a/examples/bruno/LocalAI Test Requests/model gallery/delete model gallery.bru b/.bruno/LocalAI Test Requests/model gallery/delete model gallery.bru
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/model gallery/delete model gallery.bru
rename to .bruno/LocalAI Test Requests/model gallery/delete model gallery.bru
diff --git a/examples/bruno/LocalAI Test Requests/model gallery/list MODELS in galleries.bru b/.bruno/LocalAI Test Requests/model gallery/list MODELS in galleries.bru
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/model gallery/list MODELS in galleries.bru
rename to .bruno/LocalAI Test Requests/model gallery/list MODELS in galleries.bru
diff --git a/examples/bruno/LocalAI Test Requests/model gallery/list model GALLERIES.bru b/.bruno/LocalAI Test Requests/model gallery/list model GALLERIES.bru
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/model gallery/list model GALLERIES.bru
rename to .bruno/LocalAI Test Requests/model gallery/list model GALLERIES.bru
diff --git a/.bruno/LocalAI Test Requests/model gallery/model delete.bru b/.bruno/LocalAI Test Requests/model gallery/model delete.bru
new file mode 100644
index 00000000..b320dae3
--- /dev/null
+++ b/.bruno/LocalAI Test Requests/model gallery/model delete.bru	
@@ -0,0 +1,11 @@
+meta {
+  name: model delete
+  type: http
+  seq: 7
+}
+
+post {
+  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
+  body: none
+  auth: none
+}
diff --git a/examples/bruno/LocalAI Test Requests/model gallery/model gallery apply -gist-.bru b/.bruno/LocalAI Test Requests/model gallery/model gallery apply -gist-.bru
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/model gallery/model gallery apply -gist-.bru
rename to .bruno/LocalAI Test Requests/model gallery/model gallery apply -gist-.bru
diff --git a/examples/bruno/LocalAI Test Requests/model gallery/model gallery apply.bru b/.bruno/LocalAI Test Requests/model gallery/model gallery apply.bru
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/model gallery/model gallery apply.bru
rename to .bruno/LocalAI Test Requests/model gallery/model gallery apply.bru
diff --git a/.bruno/LocalAI Test Requests/transcription/gb1.ogg b/.bruno/LocalAI Test Requests/transcription/gb1.ogg
new file mode 100644
index 00000000..df22d636
Binary files /dev/null and b/.bruno/LocalAI Test Requests/transcription/gb1.ogg differ
diff --git a/.bruno/LocalAI Test Requests/transcription/transcribe.bru b/.bruno/LocalAI Test Requests/transcription/transcribe.bru
new file mode 100644
index 00000000..831aad90
--- /dev/null
+++ b/.bruno/LocalAI Test Requests/transcription/transcribe.bru	
@@ -0,0 +1,16 @@
+meta {
+  name: transcribe
+  type: http
+  seq: 1
+}
+
+post {
+  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/audio/transcriptions
+  body: multipartForm
+  auth: none
+}
+
+body:multipart-form {
+  file: @file(transcription/gb1.ogg)
+  model: whisper-1
+}
diff --git a/examples/bruno/LocalAI Test Requests/tts/-tts.bru b/.bruno/LocalAI Test Requests/tts/-tts.bru
similarity index 100%
rename from examples/bruno/LocalAI Test Requests/tts/-tts.bru
rename to .bruno/LocalAI Test Requests/tts/-tts.bru
diff --git a/examples/bruno/LocalAI Test Requests/tts/musicgen.bru b/.bruno/LocalAI Test Requests/tts/musicgen.bru
similarity index 86%
rename from examples/bruno/LocalAI Test Requests/tts/musicgen.bru
rename to .bruno/LocalAI Test Requests/tts/musicgen.bru
index a720b8b1..900173eb 100644
--- a/examples/bruno/LocalAI Test Requests/tts/musicgen.bru	
+++ b/.bruno/LocalAI Test Requests/tts/musicgen.bru	
@@ -16,7 +16,7 @@ headers {
 
 body:json {
   {
-      "backend": "transformers-musicgen",
+      "backend": "transformers",
       "model": "facebook/musicgen-small",
       "input": "80s Synths playing Jazz"
   }
diff --git a/.devcontainer-scripts/utils.sh b/.devcontainer-scripts/utils.sh
index 98ac063c..8416d43d 100644
--- a/.devcontainer-scripts/utils.sh
+++ b/.devcontainer-scripts/utils.sh
@@ -9,6 +9,7 @@
 # Param 2: email
 #
 config_user() {
+    echo "Configuring git for $1 <$2>"
     local gcn=$(git config --global user.name)
     if [ -z "${gcn}" ]; then
         echo "Setting up git user / remote"
@@ -24,6 +25,7 @@ config_user() {
 # Param 2: remote url
 #
 config_remote() {
+    echo "Adding git remote and fetching $2 as $1"
     local gr=$(git remote -v | grep $1)
     if [ -z "${gr}" ]; then
         git remote add $1 $2
diff --git a/.devcontainer/docker-compose-devcontainer.yml b/.devcontainer/docker-compose-devcontainer.yml
index 8795d64d..7ef22099 100644
--- a/.devcontainer/docker-compose-devcontainer.yml
+++ b/.devcontainer/docker-compose-devcontainer.yml
@@ -7,7 +7,7 @@ services:
       args:
       - FFMPEG=true
       - IMAGE_TYPE=extras
-      - GO_TAGS=stablediffusion p2p tts
+      - GO_TAGS=p2p tts
     env_file:
       - ../.env
     ports:
diff --git a/.env b/.env
index 9e5dbd79..ee8db74e 100644
--- a/.env
+++ b/.env
@@ -38,12 +38,12 @@
 ## Uncomment and set to true to enable rebuilding from source
 # REBUILD=true
 
-## Enable go tags, available: stablediffusion, tts
-## stablediffusion: image generation with stablediffusion
+## Enable go tags, available: p2p, tts
+## p2p: enable distributed inferencing
 ## tts: enables text-to-speech with go-piper 
 ## (requires REBUILD=true)
 #
-# GO_TAGS=stablediffusion
+# GO_TAGS=p2p
 
 ## Path where to store generated images
 # LOCALAI_IMAGE_PATH=/tmp/generated/images
@@ -82,6 +82,15 @@
 # Enable to allow p2p mode
 # LOCALAI_P2P=true
 
+# Enable to use federated mode
+# LOCALAI_FEDERATED=true
+
+# Enable to start federation server
+# FEDERATED_SERVER=true
+
+# Define to use federation token
+# TOKEN=""
+
 ### Watchdog settings
 ###
 # Enables watchdog to kill backends that are inactive for too much time
diff --git a/.gitattributes b/.gitattributes
index dfdb8b77..ef774d4c 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +1,2 @@
 *.sh text eol=lf
+backend/cpp/llama/*.hpp linguist-vendored
\ No newline at end of file
diff --git a/.github/check_and_update.py b/.github/check_and_update.py
index dcf1d04a..704b658e 100644
--- a/.github/check_and_update.py
+++ b/.github/check_and_update.py
@@ -29,9 +29,14 @@ def calculate_sha256(file_path):
 def manual_safety_check_hf(repo_id):
     scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
     scan = scanResponse.json()
-    if scan['hasUnsafeFile']:
-        return scan
-    return None
+    # Check if 'hasUnsafeFile' exists in the response
+    if 'hasUnsafeFile' in scan:
+        if scan['hasUnsafeFile']:
+            return scan
+        else:
+            return None
+    else:
+        return None
 
 download_type, repo_id_or_url = parse_uri(uri)
 
diff --git a/.github/ci/modelslist.go b/.github/ci/modelslist.go
index cdc31703..719cd094 100644
--- a/.github/ci/modelslist.go
+++ b/.github/ci/modelslist.go
@@ -6,6 +6,7 @@ import (
 	"io/ioutil"
 	"os"
 
+	"github.com/microcosm-cc/bluemonday"
 	"gopkg.in/yaml.v3"
 )
 
@@ -279,6 +280,12 @@ func main() {
 		return
 	}
 
+	// Ensure that all arbitrary text content is sanitized before display
+	for i, m := range models {
+		models[i].Name = bluemonday.StrictPolicy().Sanitize(m.Name)
+		models[i].Description = bluemonday.StrictPolicy().Sanitize(m.Description)
+	}
+
 	// render the template
 	data := struct {
 		Models          []*GalleryModel
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 5016ebdb..570ac569 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -9,6 +9,8 @@ updates:
     directory: "/"
     schedule:
       interval: "weekly"
+    ignore:
+    - dependency-name: "github.com/mudler/LocalAI/pkg/grpc/proto"
   - package-ecosystem: "github-actions"
     # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
     directory: "/"
@@ -79,14 +81,6 @@ updates:
     directory: "/backend/python/transformers"
     schedule:
       interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/transformers-musicgen"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/vall-e-x"
-    schedule:
-      interval: "weekly"
   - package-ecosystem: "pip"
     directory: "/backend/python/vllm"
     schedule:
diff --git a/.github/labeler.yml b/.github/labeler.yml
index 687a90d1..7be4dec9 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -1,6 +1,15 @@
 enhancements:
  - head-branch: ['^feature', 'feature']
 
+dependencies:
+- any:
+  - changed-files:
+    - any-glob-to-any-file: 'Makefile'
+  - changed-files:
+    - any-glob-to-any-file: '*.mod'
+  - changed-files:
+    - any-glob-to-any-file: '*.sum'
+
 kind/documentation:
 - any:
   - changed-files:
diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml
index c94a134d..092110df 100644
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -12,23 +12,14 @@ jobs:
           - repository: "ggerganov/llama.cpp"
             variable: "CPPLLAMA_VERSION"
             branch: "master"
-          - repository: "go-skynet/go-ggml-transformers.cpp"
-            variable: "GOGGMLTRANSFORMERS_VERSION"
-            branch: "master"
-          - repository: "donomii/go-rwkv.cpp"
-            variable: "RWKV_VERSION"
-            branch: "main"
           - repository: "ggerganov/whisper.cpp"
             variable: "WHISPER_CPP_VERSION"
             branch: "master"
-          - repository: "go-skynet/go-bert.cpp"
-            variable: "BERT_VERSION"
-            branch: "master"
-          - repository: "go-skynet/bloomz.cpp"
-            variable: "BLOOMZ_VERSION"
+          - repository: "PABannier/bark.cpp"
+            variable: "BARKCPP_VERSION"
             branch: "main"
-          - repository: "mudler/go-ggllm.cpp"
-            variable: "GOGGLLM_VERSION"
+          - repository: "leejet/stable-diffusion.cpp"
+            variable: "STABLEDIFFUSION_GGML_VERSION"
             branch: "master"
           - repository: "mudler/go-stable-diffusion"
             variable: "STABLEDIFFUSION_VERSION"
diff --git a/.github/workflows/checksum_checker.yaml b/.github/workflows/checksum_checker.yaml
index 7b85ad35..13244334 100644
--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -23,7 +23,7 @@ jobs:
           sudo pip install --upgrade pip
           pip install huggingface_hub
       - name: 'Setup yq'
-        uses: dcarbone/install-yq-action@v1.1.1
+        uses: dcarbone/install-yq-action@v1.3.1
         with:
           version: 'v4.44.2'
           download-compressed: true
diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml
index 951e65e1..5bcd84f6 100644
--- a/.github/workflows/dependabot_auto.yml
+++ b/.github/workflows/dependabot_auto.yml
@@ -14,7 +14,7 @@ jobs:
     steps:
       - name: Dependabot metadata
         id: metadata
-        uses: dependabot/fetch-metadata@v2.2.0
+        uses: dependabot/fetch-metadata@v2.3.0
         with:
           github-token: "${{ secrets.GITHUB_TOKEN }}"
           skip-commit-verification: true
diff --git a/.github/workflows/deploy-explorer.yaml b/.github/workflows/deploy-explorer.yaml
index 7b5c0484..00d51322 100644
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@@ -33,7 +33,7 @@ jobs:
         run: |
           CGO_ENABLED=0 make build-api
       - name: rm
-        uses: appleboy/ssh-action@v1.0.3
+        uses: appleboy/ssh-action@v1.2.0
         with:
             host: ${{ secrets.EXPLORER_SSH_HOST }}
             username: ${{ secrets.EXPLORER_SSH_USERNAME }}
@@ -53,7 +53,7 @@ jobs:
             rm: true
             target: ./local-ai
       - name: restarting
-        uses: appleboy/ssh-action@v1.0.3
+        uses: appleboy/ssh-action@v1.2.0
         with:
             host: ${{ secrets.EXPLORER_SSH_HOST }}
             username: ${{ secrets.EXPLORER_SSH_USERNAME }}
diff --git a/.github/workflows/generate_intel_image.yaml b/.github/workflows/generate_intel_image.yaml
index 0c2a7670..8283964c 100644
--- a/.github/workflows/generate_intel_image.yaml
+++ b/.github/workflows/generate_intel_image.yaml
@@ -15,7 +15,7 @@ jobs:
     strategy:
       matrix:
         include:
-          - base-image: intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04
+          - base-image: intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04
             runs-on: 'ubuntu-latest'
             platforms: 'linux/amd64'
     runs-on: ${{matrix.runs-on}}
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 395d7761..722d0f41 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -13,6 +13,78 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
+  hipblas-jobs:
+    uses: ./.github/workflows/image_build.yml
+    with:
+      tag-latest: ${{ matrix.tag-latest }}
+      tag-suffix: ${{ matrix.tag-suffix }}
+      ffmpeg: ${{ matrix.ffmpeg }}
+      image-type: ${{ matrix.image-type }}
+      build-type: ${{ matrix.build-type }}
+      cuda-major-version: ${{ matrix.cuda-major-version }}
+      cuda-minor-version: ${{ matrix.cuda-minor-version }}
+      platforms: ${{ matrix.platforms }}
+      runs-on: ${{ matrix.runs-on }}
+      base-image: ${{ matrix.base-image }}
+      grpc-base-image: ${{ matrix.grpc-base-image }}
+      aio: ${{ matrix.aio }}
+      makeflags: ${{ matrix.makeflags }}
+      latest-image: ${{ matrix.latest-image }}
+      latest-image-aio: ${{ matrix.latest-image-aio }}
+    secrets:
+      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
+      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
+      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
+      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
+    strategy:
+      # Pushing with all jobs in parallel
+      # eats the bandwidth of all the nodes
+      max-parallel: 2
+      matrix:
+        include:
+          - build-type: 'hipblas'
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-hipblas-ffmpeg'
+            ffmpeg: 'true'
+            image-type: 'extras'
+            aio: "-aio-gpu-hipblas"
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            grpc-base-image: "ubuntu:22.04"
+            latest-image: 'latest-gpu-hipblas'
+            latest-image-aio: 'latest-aio-gpu-hipblas'
+            runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
+          - build-type: 'hipblas'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-hipblas'
+            ffmpeg: 'false'
+            image-type: 'extras'
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            grpc-base-image: "ubuntu:22.04"
+            runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
+          - build-type: 'hipblas'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-hipblas-ffmpeg-core'
+            ffmpeg: 'true'
+            image-type: 'core'
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            grpc-base-image: "ubuntu:22.04"
+            runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
+          - build-type: 'hipblas'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-hipblas-core'
+            ffmpeg: 'false'
+            image-type: 'core'
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            grpc-base-image: "ubuntu:22.04"
+            runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
   self-hosted-jobs:
     uses: ./.github/workflows/image_build.yml
     with:
@@ -39,7 +111,7 @@ jobs:
     strategy:
       # Pushing with all jobs in parallel
       # eats the bandwidth of all the nodes
-      max-parallel: ${{ github.event_name != 'pull_request' && 6 || 10 }}
+      max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
       matrix:
         include:
           # Extra images
@@ -122,29 +194,6 @@ jobs:
             base-image: "ubuntu:22.04"
             runs-on: 'arc-runner-set'
             makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'hipblas'
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-hipblas-ffmpeg'
-            ffmpeg: 'true'
-            image-type: 'extras'
-            aio: "-aio-gpu-hipblas"
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
-            grpc-base-image: "ubuntu:22.04"
-            latest-image: 'latest-gpu-hipblas'
-            latest-image-aio: 'latest-aio-gpu-hipblas'
-            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'hipblas'
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            tag-suffix: '-hipblas'
-            ffmpeg: 'false'
-            image-type: 'extras'
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
-            grpc-base-image: "ubuntu:22.04"
-            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'sycl_f16'
             platforms: 'linux/amd64'
             tag-latest: 'auto'
@@ -212,26 +261,6 @@ jobs:
             image-type: 'core'
             runs-on: 'arc-runner-set'
             makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'hipblas'
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            tag-suffix: '-hipblas-ffmpeg-core'
-            ffmpeg: 'true'
-            image-type: 'core'
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
-            grpc-base-image: "ubuntu:22.04"
-            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'hipblas'
-            platforms: 'linux/amd64'
-            tag-latest: 'false'
-            tag-suffix: '-hipblas-core'
-            ffmpeg: 'false'
-            image-type: 'core'
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
-            grpc-base-image: "ubuntu:22.04"
-            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
 
   core-image-build:
     uses: ./.github/workflows/image_build.yml
@@ -251,6 +280,7 @@ jobs:
       makeflags: ${{ matrix.makeflags }}
       latest-image: ${{ matrix.latest-image }}
       latest-image-aio: ${{ matrix.latest-image-aio }}
+      skip-drivers: ${{ matrix.skip-drivers }}
     secrets:
       dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
       dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -272,6 +302,7 @@ jobs:
             latest-image: 'latest-cpu'
             latest-image-aio: 'latest-aio-cpu'
             makeflags: "--jobs=4 --output-sync=target"
+            skip-drivers: 'false'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -283,6 +314,7 @@ jobs:
             base-image: "ubuntu:22.04"
             runs-on: 'arc-runner-set'
             makeflags: "--jobs=4 --output-sync=target"
+            skip-drivers: 'false'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -294,6 +326,7 @@ jobs:
             base-image: "ubuntu:22.04"
             runs-on: 'arc-runner-set'
             makeflags: "--jobs=4 --output-sync=target"
+            skip-drivers: 'false'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -305,6 +338,7 @@ jobs:
             runs-on: 'arc-runner-set'
             base-image: "ubuntu:22.04"
             makeflags: "--jobs=4 --output-sync=target"
+            skip-drivers: 'false'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -315,6 +349,7 @@ jobs:
             image-type: 'core'
             runs-on: 'arc-runner-set'
             base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
             makeflags: "--jobs=4 --output-sync=target"
           - build-type: 'vulkan'
             platforms: 'linux/amd64'
@@ -325,4 +360,45 @@ jobs:
             image-type: 'core'
             runs-on: 'arc-runner-set'
             base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
             makeflags: "--jobs=4 --output-sync=target"
+  gh-runner:
+    uses: ./.github/workflows/image_build.yml
+    with:
+      tag-latest: ${{ matrix.tag-latest }}
+      tag-suffix: ${{ matrix.tag-suffix }}
+      ffmpeg: ${{ matrix.ffmpeg }}
+      image-type: ${{ matrix.image-type }}
+      build-type: ${{ matrix.build-type }}
+      cuda-major-version: ${{ matrix.cuda-major-version }}
+      cuda-minor-version: ${{ matrix.cuda-minor-version }}
+      platforms: ${{ matrix.platforms }}
+      runs-on: ${{ matrix.runs-on }}
+      aio: ${{ matrix.aio }}
+      base-image: ${{ matrix.base-image }}
+      grpc-base-image: ${{ matrix.grpc-base-image }}
+      makeflags: ${{ matrix.makeflags }}
+      latest-image: ${{ matrix.latest-image }}
+      latest-image-aio: ${{ matrix.latest-image-aio }}
+      skip-drivers: ${{ matrix.skip-drivers }}
+    secrets:
+      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
+      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
+      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
+      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
+    strategy:
+      matrix:
+        include:
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            tag-latest: 'false'
+            tag-suffix: '-nvidia-l4t-arm64-core'
+            latest-image: 'latest-nvidia-l4t-arm64-core'
+            ffmpeg: 'true'
+            image-type: 'core'
+            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            runs-on: 'ubuntu-24.04-arm'
+            makeflags: "--jobs=4 --output-sync=target"
+            skip-drivers: 'true'
\ No newline at end of file
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index 4a5735e5..9ad612b6 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -49,6 +49,10 @@ on:
         description: 'FFMPEG'
         default: ''
         type: string
+      skip-drivers:
+        description: 'Skip drivers by default'
+        default: 'false'
+        type: string
       image-type:
         description: 'Image type'
         default: ''
@@ -234,6 +238,7 @@ jobs:
             GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
             GRPC_VERSION=v1.65.0
             MAKEFLAGS=${{ inputs.makeflags }}
+            SKIP_DRIVERS=${{ inputs.skip-drivers }}
           context: .
           file: ./Dockerfile
           cache-from: type=gha
@@ -262,6 +267,7 @@ jobs:
             GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
             GRPC_VERSION=v1.65.0
             MAKEFLAGS=${{ inputs.makeflags }}
+            SKIP_DRIVERS=${{ inputs.skip-drivers }}
           context: .
           file: ./Dockerfile
           cache-from: type=gha
diff --git a/.github/workflows/notify-models.yaml b/.github/workflows/notify-models.yaml
index d6a7b210..b84e10e0 100644
--- a/.github/workflows/notify-models.yaml
+++ b/.github/workflows/notify-models.yaml
@@ -18,7 +18,7 @@ jobs:
       with:
         model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
         # Check the PR diff using the current branch and the base branch of the PR
-    - uses: GrantBirki/git-diff-action@v2.7.0
+    - uses: GrantBirki/git-diff-action@v2.8.0
       id: git-diff-action
       with:
             json_diff_file_output: diff.json
@@ -79,7 +79,7 @@ jobs:
         args: ${{ steps.summarize.outputs.message }}
     - name: Setup tmate session if fails
       if: ${{ failure() }}
-      uses: mxschmitt/action-tmate@v3.18
+      uses: mxschmitt/action-tmate@v3.19
       with:
         detached: true
         connect-timeout-seconds: 180
@@ -99,7 +99,7 @@ jobs:
         docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
         until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
       # Check the PR diff using the current branch and the base branch of the PR
-    - uses: GrantBirki/git-diff-action@v2.7.0
+    - uses: GrantBirki/git-diff-action@v2.8.0
       id: git-diff-action
       with:
             json_diff_file_output: diff.json
@@ -161,7 +161,7 @@ jobs:
         TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
     - name: Setup tmate session if fails
       if: ${{ failure() }}
-      uses: mxschmitt/action-tmate@v3.18
+      uses: mxschmitt/action-tmate@v3.19
       with:
         detached: true
         connect-timeout-seconds: 180
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index a1318b19..e133ecb6 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -123,7 +123,7 @@ jobs:
             release/*
       - name: Setup tmate session if tests fail
         if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
         with:
           detached: true
           connect-timeout-seconds: 180
@@ -232,45 +232,12 @@ jobs:
             release/*
       - name: Setup tmate session if tests fail
         if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
         with:
           detached: true
           connect-timeout-seconds: 180
           limit-access-to-actor: true
-  build-stablediffusion:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-      - name: Build stablediffusion
-        run: |
-          export PATH=$PATH:$GOPATH/bin
-          make backend-assets/grpc/stablediffusion
-          mkdir -p release && cp backend-assets/grpc/stablediffusion release
-        env:
-          GO_TAGS: stablediffusion
-      - uses: actions/upload-artifact@v4
-        with:
-          name: stablediffusion
-          path: release/
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: |
-            release/*
+
 
   build-macOS-x86_64:
     runs-on: macos-13
@@ -308,7 +275,7 @@ jobs:
             release/*
       - name: Setup tmate session if tests fail
         if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
         with:
           detached: true
           connect-timeout-seconds: 180
@@ -350,7 +317,7 @@ jobs:
             release/*
       - name: Setup tmate session if tests fail
         if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
         with:
           detached: true
           connect-timeout-seconds: 180
diff --git a/.github/workflows/secscan.yaml b/.github/workflows/secscan.yaml
index 08d7dfc6..228ac1d9 100644
--- a/.github/workflows/secscan.yaml
+++ b/.github/workflows/secscan.yaml
@@ -18,7 +18,7 @@ jobs:
         if: ${{ github.actor != 'dependabot[bot]' }}
       - name: Run Gosec Security Scanner
         if: ${{ github.actor != 'dependabot[bot]' }}
-        uses: securego/gosec@v2.21.2
+        uses: securego/gosec@v2.22.0
         with:
           # we let the report trigger content trigger a failure using the GitHub Security features.
           args: '-no-fail -fmt sarif -out results.sarif ./...'
diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml
index 8b37b52d..7f2445c8 100644
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -35,30 +35,6 @@ jobs:
         run: |
            make --jobs=5 --output-sync=target -C backend/python/transformers
            make --jobs=5 --output-sync=target -C backend/python/transformers test
-
-  tests-sentencetransformers:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-
-      - name: Test sentencetransformers
-        run: |
-           make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
-           make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
-
-
   tests-rerankers:
     runs-on: ubuntu-latest
     steps:
@@ -102,71 +78,27 @@ jobs:
           make --jobs=5 --output-sync=target -C backend/python/diffusers
           make --jobs=5 --output-sync=target -C backend/python/diffusers test
 
-  tests-parler-tts:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
+  # tests-transformers-musicgen:
+  #   runs-on: ubuntu-latest
+  #   steps:
+  #     - name: Clone
+  #       uses: actions/checkout@v4
+  #       with:
+  #         submodules: true
+  #     - name: Dependencies
+  #       run: |
+  #         sudo apt-get update
+  #         sudo apt-get install build-essential ffmpeg
+  #         # Install UV
+  #         curl -LsSf https://astral.sh/uv/install.sh | sh
+  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
+  #         sudo apt-get install -y libopencv-dev
+  #         pip install --user --no-cache-dir grpcio-tools==1.64.1
 
-      - name: Test parler-tts
-        run: |
-           make --jobs=5 --output-sync=target -C backend/python/parler-tts
-           make --jobs=5 --output-sync=target -C backend/python/parler-tts test
-
-  tests-openvoice:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-
-      - name: Test openvoice
-        run: |
-           make --jobs=5 --output-sync=target -C backend/python/openvoice
-           make --jobs=5 --output-sync=target -C backend/python/openvoice test
-
-  tests-transformers-musicgen:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-
-      - name: Test transformers-musicgen
-        run: |
-           make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
-           make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
+  #     - name: Test transformers-musicgen
+  #       run: |
+  #          make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
+  #          make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
 
   # tests-bark:
   #   runs-on: ubuntu-latest
@@ -253,26 +185,6 @@ jobs:
   #       run: |
   #          make --jobs=5 --output-sync=target -C backend/python/vllm
   #          make --jobs=5 --output-sync=target -C backend/python/vllm test
-  tests-vallex:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-      - name: Test vall-e-x
-        run: |
-           make --jobs=5 --output-sync=target -C backend/python/vall-e-x
-           make --jobs=5 --output-sync=target -C backend/python/vall-e-x test
 
   tests-coqui:
     runs-on: ubuntu-latest
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 2af3fd00..444c89fb 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -100,15 +100,12 @@ jobs:
           # The python3-grpc-tools package in 22.04 is too old
           pip install --user grpcio-tools
 
-          sudo rm -rfv /usr/bin/conda || true
-          PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
+          make -C backend/python/transformers
 
           # Pre-build piper before we start tests in order to have shared libraries in place
           make sources/go-piper && \
           GO_TAGS="tts" make -C sources/go-piper piper.o && \
-          sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
-          # Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
-          PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
+          sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/
         env:
           CUDA_VERSION: 12-4
       - name: Cache grpc
@@ -130,10 +127,10 @@ jobs:
           cd grpc && cd cmake/build && sudo make --jobs 5 install
       - name: Test
         run: |
-          PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
+          PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
       - name: Setup tmate session if tests fail
         if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
         with:
           detached: true
           connect-timeout-seconds: 180
@@ -178,17 +175,26 @@ jobs:
         uses: actions/checkout@v4
         with:
           submodules: true
+      - name: Dependencies
+        run: |
+          # Install protoc
+          curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
+          unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
+          rm protoc.zip
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
+          PATH="$PATH:$HOME/go/bin" make protogen-go
       - name: Build images
         run: |
           docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
           BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
       - name: Test
         run: |
-          LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
+            PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
             make run-e2e-aio
       - name: Setup tmate session if tests fail
         if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
         with:
           detached: true
           connect-timeout-seconds: 180
@@ -215,7 +221,7 @@ jobs:
       - name: Dependencies
         run: |
           brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
+          pip install --user --no-cache-dir grpcio-tools
       - name: Test
         run: |
           export C_INCLUDE_PATH=/usr/local/include
@@ -226,7 +232,7 @@ jobs:
           BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
       - name: Setup tmate session if tests fail
         if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
+        uses: mxschmitt/action-tmate@v3.19
         with:
           detached: true
           connect-timeout-seconds: 180
diff --git a/.gitignore b/.gitignore
index 65eb9257..d821c435 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 /sources/
 __pycache__/
 *.a
+*.o
 get-sources
 prepare-sources
 /backend/cpp/llama/grpc-server
@@ -12,7 +13,6 @@ prepare-sources
 
 go-ggml-transformers
 go-gpt2
-go-rwkv
 whisper.cpp
 /bloomz
 go-bert
diff --git a/.vscode/launch.json b/.vscode/launch.json
index 50493421..f5e91508 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -26,7 +26,7 @@
                 "LOCALAI_P2P": "true",
                 "LOCALAI_FEDERATED": "true"
             },
-            "buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
+            "buildFlags": ["-tags", "p2p tts", "-v"],
             "envFile": "${workspaceFolder}/.env",
             "cwd": "${workspaceRoot}"
         }
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 593ad0ed..9fb20012 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -15,8 +15,6 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
 - [Documentation](#documentation)
 - [Community and Communication](#community-and-communication)
 
-
-
 ## Getting Started
 
 ### Prerequisites
@@ -54,7 +52,7 @@ If you find a bug, have a feature request, or encounter any issues, please check
 
 ## Coding Guidelines
 
-- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like []`golangci-lint`](https://golangci-lint.run) can help you here.
+- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like [`golangci-lint`](https://golangci-lint.run) can help you here.
 
 ## Testing
 
@@ -84,5 +82,3 @@ We are welcome the contribution of the documents, please open new PR or create a
 - You can reach out via the Github issue tracker.
 - Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
 - Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
-
----
diff --git a/Dockerfile b/Dockerfile
index f08cb9a0..2f2bcafa 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -9,25 +9,38 @@ FROM ${BASE_IMAGE} AS requirements-core
 USER root
 
 ARG GO_VERSION=1.22.6
+ARG CMAKE_VERSION=3.26.4
+ARG CMAKE_FROM_SOURCE=false
 ARG TARGETARCH
 ARG TARGETVARIANT
 
 ENV DEBIAN_FRONTEND=noninteractive
-ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
-
+ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
         build-essential \
         ccache \
         ca-certificates \
-        cmake \
-        curl \
+        curl libssl-dev \
         git \
         unzip upx-ucl && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
+# Install CMake (the version in 22.04 is too old)
+RUN <<EOT bash
+    if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
+        curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
+    else
+        apt-get update && \
+        apt-get install -y \
+            cmake && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/*
+    fi
+EOT
+
 # Install Go
 RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
 ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
@@ -55,14 +68,10 @@ ENV PATH=/opt/rocm/bin:${PATH}
 # OpenBLAS requirements and stable diffusion
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
-        libopenblas-dev \
-        libopencv-dev && \
+        libopenblas-dev && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
-# Set up OpenCV
-RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
-
 WORKDIR /build
 
 ###################################
@@ -71,7 +80,8 @@ WORKDIR /build
 # The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
 FROM requirements-core AS requirements-extras
 
-RUN curl -LsSf https://astral.sh/uv/install.sh | sh
+# Install uv as a system package
+RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
 ENV PATH="/root/.cargo/bin:${PATH}"
 
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
@@ -100,12 +110,13 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
 ARG BUILD_TYPE
 ARG CUDA_MAJOR_VERSION=12
 ARG CUDA_MINOR_VERSION=0
+ARG SKIP_DRIVERS=false
 
 ENV BUILD_TYPE=${BUILD_TYPE}
 
 # Vulkan requirements
 RUN <<EOT bash
-    if [ "${BUILD_TYPE}" = "vulkan" ]; then
+    if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
         apt-get update && \
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils wget gpg-agent && \
@@ -121,7 +132,7 @@ EOT
 
 # CuBLAS requirements
 RUN <<EOT bash
-    if [ "${BUILD_TYPE}" = "cublas" ]; then
+    if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
         apt-get update && \
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils
@@ -147,7 +158,7 @@ RUN <<EOT bash
 EOT
 
 # If we are building with clblas support, we need the libraries for the builds
-RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
+RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
         apt-get update && \
         apt-get install -y --no-install-recommends \
             libclblast-dev && \
@@ -155,7 +166,7 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
         rm -rf /var/lib/apt/lists/* \
     ; fi
 
-RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
+RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
         apt-get update && \
         apt-get install -y --no-install-recommends \
             hipblas-dev \
@@ -188,6 +199,8 @@ FROM ${GRPC_BASE_IMAGE} AS grpc
 # This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
 ARG GRPC_MAKEFLAGS="-j4 -Otarget"
 ARG GRPC_VERSION=v1.65.0
+ARG CMAKE_FROM_SOURCE=false
+ARG CMAKE_VERSION=3.26.4
 
 ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
 
@@ -196,12 +209,24 @@ WORKDIR /build
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
         ca-certificates \
-        build-essential \
-        cmake \
+        build-essential curl libssl-dev \
         git && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
+# Install CMake (the version in 22.04 is too old)
+RUN <<EOT bash
+    if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
+        curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
+    else
+        apt-get update && \
+        apt-get install -y \
+            cmake && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/*
+    fi
+EOT
+
 # We install GRPC to a different prefix here so that we can copy in only the build artifacts later
 # saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
 # and running make install in the target container
@@ -221,7 +246,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
 
 FROM requirements-drivers AS builder-base
 
-ARG GO_TAGS="stablediffusion tts p2p"
+ARG GO_TAGS="tts p2p"
 ARG GRPC_BACKENDS
 ARG MAKEFLAGS
 ARG LD_FLAGS="-s -w"
@@ -255,35 +280,12 @@ RUN <<EOT bash
     fi
 EOT
 
-
-###################################
-###################################
-
-# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
-# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
-FROM builder-base AS builder-sd
-
-# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
-COPY Makefile .
-COPY go.mod .
-COPY go.sum .
-COPY backend/backend.proto ./backend/backend.proto
-COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
-COPY pkg/grpc ./pkg/grpc
-COPY pkg/stablediffusion ./pkg/stablediffusion
-RUN git init
-RUN make sources/go-stable-diffusion
-RUN touch prepare-sources
-
-# Actually build the backend
-RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
-
 ###################################
 ###################################
 
 # The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
 # Adjustments to the build process should likely be made here.
-FROM builder-sd AS builder
+FROM builder-base AS builder
 
 # Install the pre-built GRPC
 COPY --from=grpc /opt/grpc /usr/local
@@ -297,11 +299,11 @@ COPY .git .
 RUN make prepare
 
 ## Build the binary
-## If it's CUDA, we want to skip some of the llama-compat backends to save space
-## We only leave the most CPU-optimized variant and the fallback for the cublas build
-## (both will use CUDA for the actual computation)
-RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
-        SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
+## If it's CUDA or hipblas, we want to skip some of the llama-compat backends to save space
+## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
+## (both will use CUDA or hipblas for the actual computation)
+RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
+        SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
     else \
         make build; \
     fi
@@ -323,8 +325,6 @@ ARG FFMPEG
 
 COPY --from=grpc /opt/grpc /usr/local
 
-COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
-
 COPY .devcontainer-scripts /.devcontainer-scripts
 
 # Add FFmpeg
@@ -338,9 +338,8 @@ RUN if [ "${FFMPEG}" = "true" ]; then \
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
-        ssh less && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
+        ssh less wget
+# For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
 
 RUN go install github.com/go-delve/delve/cmd/dlv@latest
 
@@ -398,36 +397,28 @@ COPY --from=builder /build/local-ai ./
 # Copy shared libraries for piper
 COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
 
-# do not let stablediffusion rebuild (requires an older version of absl)
-COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
-
 # Change the shell to bash so we can use [[ tests below
 SHELL ["/bin/bash", "-c"]
 # We try to strike a balance between individual layer size (as that affects total push time) and total image size
 # Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer
 # Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer
 
+RUN if [[ ( "${IMAGE_TYPE}" == "extras ")]]; then \
+        apt-get -qq -y install espeak-ng \
+    ; fi
+
 RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
         make -C backend/python/coqui \
     ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/parler-tts \
+    if [[ ( "${EXTRA_BACKENDS}" =~ "faster-whisper" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/faster-whisper \
     ; fi && \
     if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
         make -C backend/python/diffusers \
-    ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/transformers-musicgen \
     ; fi
 
-RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/vall-e-x \
-    ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/openvoice \
-    ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/sentencetransformers \
+RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/kokoro \
     ; fi && \
     if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
         make -C backend/python/exllama2 \
@@ -447,9 +438,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE
     ; fi && \
     if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
         make -C backend/python/rerankers \
-    ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/mamba \
     ; fi
 
 # Make sure the models directory exists
diff --git a/Makefile b/Makefile
index f9fa5476..7edb6f6a 100644
--- a/Makefile
+++ b/Makefile
@@ -8,31 +8,27 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=23e0d70bacaaca1429d365a44aa9e7434f17823b
-
-# go-rwkv version
-RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
-RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
+CPPLLAMA_VERSION?=d774ab3acc4fee41fbed6dbfc192b57d5f79f34b
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=049b3a0e53c8a8e4c4576c06a1a4fccf0063a73f
-
-# bert.cpp version
-BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
-BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
+WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
 
 # go-piper version
 PIPER_REPO?=https://github.com/mudler/go-piper
-PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
+PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
 
-# stablediffusion version
-STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
-STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
+# bark.cpp
+BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
+BARKCPP_VERSION?=v1.0.0
 
-# tinydream version
-TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
-TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
+# stablediffusion.cpp (ggml)
+STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
+STABLEDIFFUSION_GGML_VERSION?=d46ed5e184b97c2018dc2e8105925bdb8775e02c
+
+ONNX_VERSION?=1.20.0
+ONNX_ARCH?=x64
+ONNX_OS?=linux
 
 export BUILD_TYPE?=
 export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
@@ -45,6 +41,7 @@ CGO_LDFLAGS_WHISPER+=-lggml
 CUDA_LIBPATH?=/usr/local/cuda/lib64/
 GO_TAGS?=
 BUILD_ID?=
+NATIVE?=false
 
 TEST_DIR=/tmp/test
 
@@ -83,7 +80,25 @@ ifndef UNAME_S
 UNAME_S := $(shell uname -s)
 endif
 
+# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
+ifeq ($(NATIVE),false)
+	CMAKE_ARGS+=-DGGML_NATIVE=OFF
+endif
+
+# Detect if we are running on arm64
+ifneq (,$(findstring aarch64,$(shell uname -m)))
+	ONNX_ARCH=aarch64
+endif
+
 ifeq ($(OS),Darwin)
+	ONNX_OS=osx
+	ifneq (,$(findstring aarch64,$(shell uname -m)))
+		ONNX_ARCH=arm64
+	else ifneq (,$(findstring arm64,$(shell uname -m)))
+		ONNX_ARCH=arm64
+	else
+		ONNX_ARCH=x86_64
+	endif
 
 	ifeq ($(OSX_SIGNING_IDENTITY),)
 		OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
@@ -138,10 +153,10 @@ ifeq ($(BUILD_TYPE),hipblas)
 	export CC=$(ROCM_HOME)/llvm/bin/clang
 	# llama-ggml has no hipblas support, so override it here.
 	export STABLE_BUILD_TYPE=
-	export GGML_HIPBLAS=1
+	export GGML_HIP=1
 	GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
 	AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
-	CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
+	CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
 	CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
 endif
 
@@ -160,16 +175,6 @@ ifeq ($(STATIC),true)
 	LD_FLAGS+=-linkmode external -extldflags -static
 endif
 
-ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
-#	OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
-	OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
-endif
-
-ifeq ($(findstring tinydream,$(GO_TAGS)),tinydream)
-#	OPTIONAL_TARGETS+=go-tiny-dream/libtinydream.a
-	OPTIONAL_GRPC+=backend-assets/grpc/tinydream
-endif
-
 ifeq ($(findstring tts,$(GO_TAGS)),tts)
 #	OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
 #	OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
@@ -179,16 +184,24 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
 endif
 
 ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
-ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
+ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
 ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
-ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
 ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
+
+ifeq ($(ONNX_OS),linux)
+ifeq ($(ONNX_ARCH),x64)
+	ALL_GRPC_BACKENDS+=backend-assets/grpc/bark-cpp
+	ALL_GRPC_BACKENDS+=backend-assets/grpc/stablediffusion-ggml
+endif
+endif
+
 ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
+ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
 ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
 # Use filter-out to remove the specified backends
 ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
@@ -209,19 +222,6 @@ endif
 
 all: help
 
-## BERT embeddings
-sources/go-bert.cpp:
-	mkdir -p sources/go-bert.cpp
-	cd sources/go-bert.cpp && \
-	git init && \
-	git remote add origin $(BERT_REPO) && \
-	git fetch origin && \
-	git checkout $(BERT_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
-	$(MAKE) -C sources/go-bert.cpp libgobert.a
-
 ## go-llama.cpp
 sources/go-llama.cpp:
 	mkdir -p sources/go-llama.cpp
@@ -235,6 +235,23 @@ sources/go-llama.cpp:
 sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
 	$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
 
+## bark.cpp
+sources/bark.cpp:
+	git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
+	cd sources/bark.cpp && \
+	git checkout $(BARKCPP_VERSION) && \
+	git submodule update --init --recursive --depth 1 --single-branch
+
+sources/bark.cpp/build/libbark.a: sources/bark.cpp
+	cd sources/bark.cpp && \
+	mkdir -p build && \
+	cd build && \
+	cmake $(CMAKE_ARGS) .. && \
+	cmake --build . --config Release
+
+backend/go/bark/libbark.a: sources/bark.cpp/build/libbark.a
+	$(MAKE) -C backend/go/bark libbark.a
+
 ## go-piper
 sources/go-piper:
 	mkdir -p sources/go-piper
@@ -248,45 +265,37 @@ sources/go-piper:
 sources/go-piper/libpiper_binding.a: sources/go-piper
 	$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
 
-
-## RWKV
-sources/go-rwkv.cpp:
-	mkdir -p sources/go-rwkv.cpp
-	cd sources/go-rwkv.cpp && \
-	git init && \
-	git remote add origin $(RWKV_REPO) && \
-	git fetch origin && \
-	git checkout $(RWKV_VERSION) && \
+## stablediffusion (ggml)
+sources/stablediffusion-ggml.cpp:
+	git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
+	cd sources/stablediffusion-ggml.cpp && \
+	git checkout $(STABLEDIFFUSION_GGML_VERSION) && \
 	git submodule update --init --recursive --depth 1 --single-branch
 
-sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
-	cd sources/go-rwkv.cpp && cd rwkv.cpp &&	cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF &&	cmake --build . && 	cp librwkv.a ..
+backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp
+	$(MAKE) -C backend/go/image/stablediffusion-ggml build/libstable-diffusion.a
+	$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
 
-## stable diffusion
-sources/go-stable-diffusion:
-	mkdir -p sources/go-stable-diffusion
-	cd sources/go-stable-diffusion && \
-	git init && \
-	git remote add origin $(STABLEDIFFUSION_REPO) && \
-	git fetch origin && \
-	git checkout $(STABLEDIFFUSION_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
+backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ LIBRARY_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion-ggml ./backend/go/image/stablediffusion-ggml/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/stablediffusion-ggml
+endif
 
-sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
-	CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
+sources/onnxruntime:
+	mkdir -p sources/onnxruntime
+	curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
+	cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
+	cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
 
-## tiny-dream
-sources/go-tiny-dream:
-	mkdir -p sources/go-tiny-dream
-	cd sources/go-tiny-dream && \
-	git init && \
-	git remote add origin $(TINYDREAM_REPO) && \
-	git fetch origin && \
-	git checkout $(TINYDREAM_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
-	$(MAKE) -C sources/go-tiny-dream libtinydream.a
+backend-assets/lib/libonnxruntime.so.1: backend-assets/lib sources/onnxruntime
+	cp -rfv sources/onnxruntime/lib/* backend-assets/lib/
+ifeq ($(OS),Darwin)
+	mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
+else
+	mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
+endif
 
 ## whisper
 sources/whisper.cpp:
@@ -301,26 +310,18 @@ sources/whisper.cpp:
 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
 	cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
 
-get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
+get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
 
 replace:
-	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
-	$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
-	$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
 	$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
-	$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
 
 dropreplace:
-	$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
 	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
 	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
-	$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
-	$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
 	$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
-	$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
 	$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
 
 prepare-sources: get-sources replace
@@ -330,12 +331,8 @@ prepare-sources: get-sources replace
 rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
 	$(MAKE) -C sources/go-llama.cpp clean
-	$(MAKE) -C sources/go-rwkv.cpp clean
 	$(MAKE) -C sources/whisper.cpp clean
-	$(MAKE) -C sources/go-stable-diffusion clean
-	$(MAKE) -C sources/go-bert.cpp clean
 	$(MAKE) -C sources/go-piper clean
-	$(MAKE) -C sources/go-tiny-dream clean
 	$(MAKE) build
 
 prepare: prepare-sources $(OPTIONAL_TARGETS)
@@ -348,7 +345,9 @@ clean: ## Remove build related file
 	rm -rf release/
 	rm -rf backend-assets/*
 	$(MAKE) -C backend/cpp/grpc clean
+	$(MAKE) -C backend/go/bark clean
 	$(MAKE) -C backend/cpp/llama clean
+	$(MAKE) -C backend/go/image/stablediffusion-ggml clean
 	rm -rf backend/cpp/llama-* || true
 	$(MAKE) dropreplace
 	$(MAKE) protogen-clean
@@ -359,6 +358,9 @@ clean-tests:
 	rm -rf test-dir
 	rm -rf core/http/backend-assets
 
+clean-dc: clean
+	cp -r /build/backend-assets /workspace/backend-assets
+
 ## Build:
 build: prepare backend-assets grpcs ## Build the project
 	$(info ${GREEN}I local-ai build info:${RESET})
@@ -436,8 +438,6 @@ test-models/testmodel.ggml:
 	wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
 	wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
 	wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
-	wget -q https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
-	wget -q https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
 	cp tests/models_fixtures/* test-models
 
 prepare-test: grpcs
@@ -446,9 +446,9 @@ prepare-test: grpcs
 
 test: prepare test-models/testmodel.ggml grpcs
 	@echo 'Running tests'
-	export GO_TAGS="tts stablediffusion debug"
+	export GO_TAGS="tts debug"
 	$(MAKE) prepare-test
-	HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
+	HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf"  --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
 	$(MAKE) test-llama
 	$(MAKE) test-llama-gguf
@@ -465,15 +465,15 @@ run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
 	docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
 
-run-e2e-aio:
+run-e2e-aio: protogen-go
 	@echo 'Running e2e AIO tests'
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio
 
 test-e2e:
 	@echo 'Running e2e tests'
 	BUILD_TYPE=$(BUILD_TYPE) \
 	LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e
 
 teardown-e2e:
 	rm -rf $(TEST_DIR) || true
@@ -481,24 +481,24 @@ teardown-e2e:
 
 test-llama: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
 
 test-llama-gguf: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r $(TEST_PATHS)
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
 
 test-tts: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts 1 -v -r $(TEST_PATHS)
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
 
 test-stablediffusion: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts 1 -v -r $(TEST_PATHS)
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
 
 test-stores: backend-assets/grpc/local-store
 	mkdir -p tests/integration/backend-assets/grpc
 	cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts 1 -v -r tests/integration
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration
 
 test-container:
 	docker build --target requirements -t local-ai-test-container .
@@ -534,10 +534,10 @@ protogen-go-clean:
 	$(RM) bin/*
 
 .PHONY: protogen-python
-protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
+protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
 
 .PHONY: protogen-python-clean
-protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean  exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
+protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean  exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
 
 .PHONY: autogptq-protogen
 autogptq-protogen:
@@ -571,6 +571,14 @@ diffusers-protogen:
 diffusers-protogen-clean:
 	$(MAKE) -C backend/python/diffusers protogen-clean
 
+.PHONY: faster-whisper-protogen
+faster-whisper-protogen:
+	$(MAKE) -C backend/python/faster-whisper protogen
+
+.PHONY: faster-whisper-protogen-clean
+faster-whisper-protogen-clean:
+	$(MAKE) -C backend/python/faster-whisper protogen-clean
+
 .PHONY: exllama2-protogen
 exllama2-protogen:
 	$(MAKE) -C backend/python/exllama2 protogen
@@ -579,14 +587,6 @@ exllama2-protogen:
 exllama2-protogen-clean:
 	$(MAKE) -C backend/python/exllama2 protogen-clean
 
-.PHONY: mamba-protogen
-mamba-protogen:
-	$(MAKE) -C backend/python/mamba protogen
-
-.PHONY: mamba-protogen-clean
-mamba-protogen-clean:
-	$(MAKE) -C backend/python/mamba protogen-clean
-
 .PHONY: rerankers-protogen
 rerankers-protogen:
 	$(MAKE) -C backend/python/rerankers protogen
@@ -595,14 +595,6 @@ rerankers-protogen:
 rerankers-protogen-clean:
 	$(MAKE) -C backend/python/rerankers protogen-clean
 
-.PHONY: sentencetransformers-protogen
-sentencetransformers-protogen:
-	$(MAKE) -C backend/python/sentencetransformers protogen
-
-.PHONY: sentencetransformers-protogen-clean
-sentencetransformers-protogen-clean:
-	$(MAKE) -C backend/python/sentencetransformers protogen-clean
-
 .PHONY: transformers-protogen
 transformers-protogen:
 	$(MAKE) -C backend/python/transformers protogen
@@ -611,37 +603,13 @@ transformers-protogen:
 transformers-protogen-clean:
 	$(MAKE) -C backend/python/transformers protogen-clean
 
-.PHONY: parler-tts-protogen
-parler-tts-protogen:
-	$(MAKE) -C backend/python/parler-tts protogen
+.PHONY: kokoro-protogen
+kokoro-protogen:
+	$(MAKE) -C backend/python/kokoro protogen
 
-.PHONY: parler-tts-protogen-clean
-parler-tts-protogen-clean:
-	$(MAKE) -C backend/python/parler-tts protogen-clean
-
-.PHONY: transformers-musicgen-protogen
-transformers-musicgen-protogen:
-	$(MAKE) -C backend/python/transformers-musicgen protogen
-
-.PHONY: transformers-musicgen-protogen-clean
-transformers-musicgen-protogen-clean:
-	$(MAKE) -C backend/python/transformers-musicgen protogen-clean
-
-.PHONY: vall-e-x-protogen
-vall-e-x-protogen:
-	$(MAKE) -C backend/python/vall-e-x protogen
-
-.PHONY: vall-e-x-protogen-clean
-vall-e-x-protogen-clean:
-	$(MAKE) -C backend/python/vall-e-x protogen-clean
-
-.PHONY: openvoice-protogen
-openvoice-protogen:
-	$(MAKE) -C backend/python/openvoice protogen
-
-.PHONY: openvoice-protogen-clean
-openvoice-protogen-clean:
-	$(MAKE) -C backend/python/openvoice protogen-clean
+.PHONY: kokoro-protogen-clean
+kokoro-protogen-clean:
+	$(MAKE) -C backend/python/kokoro protogen-clean
 
 .PHONY: vllm-protogen
 vllm-protogen:
@@ -658,15 +626,11 @@ prepare-extra-conda-environments: protogen-python
 	$(MAKE) -C backend/python/bark
 	$(MAKE) -C backend/python/coqui
 	$(MAKE) -C backend/python/diffusers
+	$(MAKE) -C backend/python/faster-whisper
 	$(MAKE) -C backend/python/vllm
-	$(MAKE) -C backend/python/mamba
-	$(MAKE) -C backend/python/sentencetransformers
 	$(MAKE) -C backend/python/rerankers
 	$(MAKE) -C backend/python/transformers
-	$(MAKE) -C backend/python/transformers-musicgen
-	$(MAKE) -C backend/python/parler-tts
-	$(MAKE) -C backend/python/vall-e-x
-	$(MAKE) -C backend/python/openvoice
+	$(MAKE) -C backend/python/kokoro
 	$(MAKE) -C backend/python/exllama2
 
 prepare-test-extra: protogen-python
@@ -690,13 +654,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
 backend-assets/grpc: protogen-go replace
 	mkdir -p backend-assets/grpc
 
-backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/bert-embeddings
-endif
-
 backend-assets/grpc/huggingface: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
 ifneq ($(UPX),)
@@ -743,6 +700,13 @@ backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.
 	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
 
+backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama/llama.cpp
+	cp -rf backend/cpp/llama backend/cpp/llama-avx512
+	$(MAKE) -C backend/cpp/llama-avx512 purge
+	$(info ${GREEN}I llama-cpp build info:avx512${RESET})
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
+	cp -rfv backend/cpp/llama-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
+
 backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
 	cp -rf backend/cpp/llama backend/cpp/llama-avx
 	$(MAKE) -C backend/cpp/llama-avx purge
@@ -756,10 +720,6 @@ backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/ll
 	$(info ${GREEN}I llama-cpp build info:fallback${RESET})
 	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
-# TODO: every binary should have its own folder instead, so can have different metal implementations
-ifeq ($(BUILD_TYPE),metal)
-	cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
-endif
 
 backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
 	cp -rf backend/cpp/llama backend/cpp/llama-cuda
@@ -772,7 +732,7 @@ backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/lla
 	cp -rf backend/cpp/llama backend/cpp/llama-hipblas
 	$(MAKE) -C backend/cpp/llama-hipblas purge
 	$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
-	BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
 
 backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
@@ -807,6 +767,13 @@ ifneq ($(UPX),)
 	$(UPX) backend-assets/grpc/llama-ggml
 endif
 
+backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/bark-cpp
+endif
+
 backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
 	CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
@@ -814,25 +781,11 @@ ifneq ($(UPX),)
 	$(UPX) backend-assets/grpc/piper
 endif
 
-backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
+backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
 ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/rwkv
-endif
-
-backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/stablediffusion
-endif
-
-backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/tinydream
+	$(UPX) backend-assets/grpc/silero-vad
 endif
 
 backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
@@ -888,7 +841,7 @@ docker-aio-all:
 
 docker-image-intel:
 	docker build \
-		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="none" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@@ -896,7 +849,7 @@ docker-image-intel:
 
 docker-image-intel-xpu:
 	docker build \
-		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="none" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@@ -908,7 +861,7 @@ swagger:
 
 .PHONY: gen-assets
 gen-assets:
-	$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
+	$(GOCMD) run core/dependencies_manager/manager.go webui_static.yaml core/http/static/assets
 
 ## Documentation
 docs/layouts/_default:
diff --git a/README.md b/README.md
index 5b9a2c43..78267e04 100644
--- a/README.md
+++ b/README.md
@@ -38,9 +38,13 @@
 </a>
 </p>
 
+<p align="center">
+<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+</p>
+
 > :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
 >
-> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/go-skynet/LocalAI/tree/master/examples/) 
+> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) 
 
 [![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)
 
@@ -56,41 +60,59 @@ curl https://localai.io/install.sh | sh
 
 Or run with docker:
 ```bash
+# CPU only image:
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
+
+# Nvidia GPU:
+docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
+
+# CPU and GPU image (bigger size):
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
+
+# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
-# Alternative images:
-# - if you have an Nvidia GPU:
-# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
-# - without preconfigured models
-# docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
-# - without preconfigured models for Nvidia GPUs
-# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12 
+```
+
+To load models:
+
+```bash
+# From the model gallery (see available models with `local-ai models list`, in the WebUI from the model tab, or visiting https://models.localai.io)
+local-ai run llama-3.2-1b-instruct:q4_k_m
+# Start LocalAI with the phi-2 model directly from huggingface
+local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
+# Install and run a model from the Ollama OCI registry
+local-ai run ollama://gemma:2b
+# Run a model from a configuration file
+local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
+# Install and run a model from a standard OCI registry (e.g., Docker Hub)
+local-ai run oci://localai/phi-2:latest
 ```
 
 [💻 Getting started](https://localai.io/basics/getting_started/index.html)
 
-## 🔥🔥 Hot topics / Roadmap
-
-[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
+## 📰 Latest project news
 
+- Jan 2025: LocalAI model release: https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3, SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603
+- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
+- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
+- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
+- Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
 - Aug 2024:  🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
-- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
-- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
-- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
+- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723. P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
 - May 2024: 🔥🔥 Decentralized P2P llama.cpp:  https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs  https://localai.io/features/distribute/
-- May 2024: 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
-- May 2024: 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
 - May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
-- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
 - April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
 
-Hot topics (looking for contributors):
+Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
 
-- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
+## 🔥🔥 Hot topics (looking for help):
+
+- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
+- Realtime API https://github.com/mudler/LocalAI/issues/3714
 - WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
 - Backends v2: https://github.com/mudler/LocalAI/issues/1126
 - Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
 - Assistant API: https://github.com/mudler/LocalAI/issues/1273
-- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999
 - Vulkan: https://github.com/mudler/LocalAI/issues/1647
 - Anthropic API: https://github.com/mudler/LocalAI/issues/1808
 
@@ -98,10 +120,10 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
 
 ## 🚀 [Features](https://localai.io/features/)
 
-- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
+- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
 - 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
 - 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
-- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
+- 🎨 [Image generation](https://localai.io/features/image-generation)
 - 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/) 
 - 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
 - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
@@ -109,6 +131,7 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
 - 🥽 [Vision API](https://localai.io/features/gpt-vision/)
 - 📈 [Reranker API](https://localai.io/features/reranker/)
 - 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
+- 🔊 Voice activity detection (Silero-VAD support)
 - 🌍 Integrated WebUI!
 
 ## 💻 Usage
@@ -131,6 +154,7 @@ Model galleries
 Other:
 - Helm chart https://github.com/go-skynet/helm-charts
 - VSCode extension https://github.com/badgooooor/localai-vscode-plugin
+- Langchain: https://python.langchain.com/docs/integrations/providers/localai/
 - Terminal utility https://github.com/djcopley/ShellOracle
 - Local Smart assistant https://github.com/mudler/LocalAGI
 - Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision
@@ -138,6 +162,9 @@ Other:
 - Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
 - Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
 - Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
+- Another Telegram Bot https://github.com/JackBekket/Hellper
+- Auto-documentation https://github.com/JackBekket/Reflexia
+- Github bot which answer on issues, with code and documentation as context https://github.com/JackBekket/GitHelper
 - Github Actions: https://github.com/marketplace/actions/start-localai
 - Examples: https://github.com/mudler/LocalAI/tree/master/examples/
   
@@ -212,7 +239,6 @@ LocalAI couldn't have been built without the help of great software already avai
 - https://github.com/antimatter15/alpaca.cpp
 - https://github.com/EdVince/Stable-Diffusion-NCNN
 - https://github.com/ggerganov/whisper.cpp
-- https://github.com/saharNooby/rwkv.cpp
 - https://github.com/rhasspy/piper
 
 ## 🤗 Contributors
diff --git a/aio/cpu/embeddings.yaml b/aio/cpu/embeddings.yaml
index 8576746f..9aa845b0 100644
--- a/aio/cpu/embeddings.yaml
+++ b/aio/cpu/embeddings.yaml
@@ -1,7 +1,7 @@
 name: text-embedding-ada-002
-backend: bert-embeddings
+embeddings: true
 parameters:
-  model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
+  model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
 
 usage: |
     You can test this model with curl like this:
diff --git a/aio/cpu/image-gen.yaml b/aio/cpu/image-gen.yaml
index 9de88a3f..ef374572 100644
--- a/aio/cpu/image-gen.yaml
+++ b/aio/cpu/image-gen.yaml
@@ -1,56 +1,17 @@
 name: stablediffusion
-backend: stablediffusion
+backend: stablediffusion-ggml
+cfg_scale: 4.5
+
+options:
+- sampler:euler
 parameters:
-  model: stablediffusion_assets
-
-license: "BSD-3"
-urls:
-- https://github.com/EdVince/Stable-Diffusion-NCNN
-- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
-
-description: |
-     Stable Diffusion in NCNN with c++, supported txt2img and img2img
+  model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
+step: 25
 
 download_files:
-- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
-  sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
-- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
-  sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
-- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
-  sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
-- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
-  sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
-- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
-  sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
-- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
-  sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
-- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
-  sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
-- filename: "stablediffusion_assets/log_sigmas.bin"
-  sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
-- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
-  sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
-- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
-  sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
-- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
-  sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
-- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
-  sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
-- filename: "stablediffusion_assets/vocab.txt"
-  sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
+- filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
+  sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
+  uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
 
 usage: |
         curl http://localhost:8080/v1/images/generations \
diff --git a/aio/cpu/vision.yaml b/aio/cpu/vision.yaml
index 3b466d37..4052fa39 100644
--- a/aio/cpu/vision.yaml
+++ b/aio/cpu/vision.yaml
@@ -2,7 +2,7 @@ backend: llama-cpp
 context_size: 4096
 f16: true
 mmap: true
-name: gpt-4-vision-preview
+name: gpt-4o
 
 roles:
   user: "USER:"
diff --git a/aio/gpu-8g/vision.yaml b/aio/gpu-8g/vision.yaml
index db039279..4f5e10b3 100644
--- a/aio/gpu-8g/vision.yaml
+++ b/aio/gpu-8g/vision.yaml
@@ -2,7 +2,7 @@ backend: llama-cpp
 context_size: 4096
 f16: true
 mmap: true
-name: gpt-4-vision-preview
+name: gpt-4o
 
 roles:
   user: "USER:"
diff --git a/aio/intel/vision.yaml b/aio/intel/vision.yaml
index 52843162..37067362 100644
--- a/aio/intel/vision.yaml
+++ b/aio/intel/vision.yaml
@@ -2,7 +2,7 @@ backend: llama-cpp
 context_size: 4096
 mmap: false
 f16: false
-name: gpt-4-vision-preview
+name: gpt-4o
 
 roles:
   user: "USER:"
diff --git a/backend/backend.proto b/backend/backend.proto
index 4a8f31a9..bd75adc5 100644
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -26,6 +26,21 @@ service Backend {
   rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
 
   rpc Rerank(RerankRequest) returns (RerankResult) {}
+
+  rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
+
+  rpc VAD(VADRequest) returns (VADResponse) {}
+}
+
+// Define the empty request
+message MetricsRequest {}
+
+message MetricsResponse {
+  int32 slot_id = 1;
+  string prompt_json_for_slot = 2;  // Stores the prompt as a JSON string.
+  float tokens_per_second = 3;
+  int32 tokens_generated = 4;
+  int32 prompt_tokens_processed = 5;
 }
 
 message RerankRequest {
@@ -134,6 +149,9 @@ message PredictOptions {
   repeated string Images = 42;
   bool UseTokenizerTemplate = 43;
   repeated Message Messages = 44;
+  repeated string Videos = 45;
+  repeated string Audios = 46;
+  string CorrelationId = 47;
 }
 
 // The response message containing the result
@@ -141,6 +159,13 @@ message Reply {
   bytes message = 1;
   int32 tokens = 2;
   int32 prompt_tokens = 3;
+  double timing_prompt_processing = 4;
+  double timing_token_generation = 5;
+}
+
+message GrammarTrigger {
+  string word = 1;
+  bool at_start = 2; 
 }
 
 message ModelOptions {
@@ -203,6 +228,7 @@ message ModelOptions {
   int32  SwapSpace = 53;
   int32  MaxModelLen = 54;
   int32  TensorParallelSize = 55;
+  string LoadFormat = 58;
 
   string MMProj = 41;
 
@@ -216,6 +242,18 @@ message ModelOptions {
 
   bool FlashAttention = 56;
   bool NoKVOffload = 57;
+
+  string ModelPath = 59;
+
+  repeated string LoraAdapters = 60;
+  repeated float LoraScales = 61;
+
+  repeated string Options = 62;
+
+  string CacheTypeKey = 63;
+  string CacheTypeValue = 64;
+
+  repeated GrammarTrigger GrammarTriggers = 65;
 }
 
 message Result {
@@ -271,6 +309,19 @@ message TTSRequest {
   optional string language = 5;
 }
 
+message VADRequest {
+  repeated float audio = 1;
+}
+
+message VADSegment {
+  float start = 1;
+  float end = 2;
+}
+
+message VADResponse {
+  repeated VADSegment segments = 1;
+}
+
 message SoundGenerationRequest {
   string text = 1;
   string model = 2;
@@ -306,4 +357,4 @@ message StatusResponse {
 message Message {
   string role = 1;
   string content = 2;
-}
\ No newline at end of file
+}
diff --git a/backend/cpp/llama/Makefile b/backend/cpp/llama/Makefile
index 176cace6..17f55003 100644
--- a/backend/cpp/llama/Makefile
+++ b/backend/cpp/llama/Makefile
@@ -22,7 +22,7 @@ else ifeq ($(BUILD_TYPE),clblas)
 	CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
 # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ 
 else ifeq ($(BUILD_TYPE),hipblas)
-	CMAKE_ARGS+=-DGGML_HIPBLAS=ON
+	CMAKE_ARGS+=-DGGML_HIP=ON
 # If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
 # But if it's OSX without metal, disable it here
 else ifeq ($(OS),Darwin)
@@ -30,9 +30,7 @@ else ifeq ($(OS),Darwin)
 		CMAKE_ARGS+=-DGGML_METAL=OFF
 	else
 		CMAKE_ARGS+=-DGGML_METAL=ON
-# Until this is tested properly, we disable embedded metal file
-# as we already embed it as part of the LocalAI assets
-		CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=OFF
+		CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
 		TARGET+=--target ggml-metal
 	endif
 endif
diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index 56d59d21..4daf84c6 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -113,7 +113,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
     std::string ret;
     for (; begin != end; ++begin)
     {
-        ret += llama_token_to_piece(ctx, *begin);
+        ret += common_token_to_piece(ctx, *begin);
     }
     return ret;
 }
@@ -121,7 +121,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
 // format incomplete utf-8 multibyte character for output
 static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
 {
-    std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
+    std::string out = token == -1 ? "" : common_token_to_piece(ctx, token);
     // if the size is 1 and first bit is 1, meaning it's a partial character
     //   (size > 1 meaning it's already a known token)
     if (out.size() == 1 && (out[0] & 0x80) == 0x80)
@@ -134,6 +134,32 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c
     return out;
 }
 
+// Adds an RPC server
+// https://github.com/ggerganov/llama.cpp/compare/4dbc8b9cb71876e005724f4e8f73a3544646bcf5..3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6
+static void add_rpc_devices(std::string servers) {
+    auto rpc_servers = string_split<std::string>(servers, ',');
+    if (rpc_servers.empty()) {
+        throw std::invalid_argument("no RPC servers specified");
+    }
+    ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name("RPC");
+    if (!rpc_reg) {
+        throw std::invalid_argument("failed to find RPC backend");
+    }
+    typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t)(const char * endpoint);
+    ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device");
+    if (!ggml_backend_rpc_add_device_fn) {
+        throw std::invalid_argument("failed to find RPC device add function");
+    }
+    for (const auto & server : rpc_servers) {
+        ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str());
+        if (dev) {
+            ggml_backend_device_register(dev);
+        } else {
+            throw std::invalid_argument("failed to register RPC device");
+        }
+    }
+}
+
 // convert a vector of completion_token_output to json
 static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> &probs)
 {
@@ -203,8 +229,8 @@ struct llama_client_slot
     std::string stopping_word;
 
     // sampling
-    struct gpt_sampler_params sparams;
-    gpt_sampler *ctx_sampling = nullptr;
+    struct common_params_sampling sparams;
+    common_sampler *ctx_sampling = nullptr;
 
     int32_t ga_i = 0;   // group-attention state
     int32_t ga_n = 1;   // group-attention factor
@@ -257,7 +283,7 @@ struct llama_client_slot
         images.clear();
     }
 
-    bool has_budget(gpt_params &global_params) {
+    bool has_budget(common_params &global_params) {
         if (params.n_predict == -1 && global_params.n_predict == -1)
         {
             return true; // limitless
@@ -391,14 +417,48 @@ struct llama_metrics {
     }
 };
 
+struct llava_embd_batch {
+    std::vector<llama_pos>      pos;
+    std::vector<int32_t>        n_seq_id;
+    std::vector<llama_seq_id>   seq_id_0;
+    std::vector<llama_seq_id *> seq_ids;
+    std::vector<int8_t>         logits;
+    llama_batch batch;
+    llava_embd_batch(float * embd, int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) {
+        pos     .resize(n_tokens);
+        n_seq_id.resize(n_tokens);
+        seq_ids .resize(n_tokens + 1);
+        logits  .resize(n_tokens);
+        seq_id_0.resize(1);
+        seq_id_0[0] = seq_id;
+        seq_ids [n_tokens] = nullptr;
+        batch = {
+            /*n_tokens       =*/ n_tokens,
+            /*tokens         =*/ nullptr,
+            /*embd           =*/ embd,
+            /*pos            =*/ pos.data(),
+            /*n_seq_id       =*/ n_seq_id.data(),
+            /*seq_id         =*/ seq_ids.data(),
+            /*logits         =*/ logits.data(),
+        };
+        for (int i = 0; i < n_tokens; i++) {
+            batch.pos     [i] = pos_0 + i;
+            batch.n_seq_id[i] = 1;
+            batch.seq_id  [i] = seq_id_0.data();
+            batch.logits  [i] = false;
+        }
+    }
+};
+
 struct llama_server_context
 {
     llama_model *model = nullptr;
     llama_context *ctx = nullptr;
+    const llama_vocab * vocab = nullptr;
 
     clip_ctx *clp_ctx = nullptr;
 
-    gpt_params params;
+    common_params params;
 
     llama_batch batch;
 
@@ -406,6 +466,10 @@ struct llama_server_context
     bool clean_kv_cache     = true;
     bool all_slots_are_idle = false;
     bool add_bos_token      = true;
+    bool has_eos_token      = true;
+
+    bool grammar_lazy = false;
+    std::vector<common_grammar_trigger> grammar_trigger_words;
 
     int32_t n_ctx;  // total context for all clients / slots
 
@@ -441,7 +505,7 @@ struct llama_server_context
         }
     }
 
-    bool load_model(const gpt_params &params_)
+    bool load_model(const common_params &params_)
     {
         params = params_;
         if (!params.mmproj.empty()) {
@@ -458,9 +522,9 @@ struct llama_server_context
             }
         }
 
-        llama_init_result llama_init = llama_init_from_gpt_params(params);
-        model = llama_init.model;
-        ctx = llama_init.context;
+        common_init_result common_init = common_init_from_params(params);
+        model = common_init.model.release();
+        ctx = common_init.context.release();
         if (model == nullptr)
         {
             LOG_ERR("unable to load model: %s", params.model.c_str());
@@ -469,7 +533,7 @@ struct llama_server_context
 
         if (multimodal) {
             const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
-            const int n_embd_llm  = llama_n_embd(model);
+            const int n_embd_llm  = llama_model_n_embd(model);
             if (n_embd_clip != n_embd_llm) {
                 LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
                 llama_free(ctx);
@@ -478,21 +542,23 @@ struct llama_server_context
             }
         }
 
+        vocab = llama_model_get_vocab(model);
         n_ctx = llama_n_ctx(ctx);
 
-        add_bos_token = llama_add_bos_token(model);
+        add_bos_token = llama_vocab_get_add_bos(vocab);
+        has_eos_token = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
 
         return true;
     }
 
-    void validate_model_chat_template(server_params & sparams) {
-        llama_chat_message chat[] = {{"user", "test"}};
-        std::vector<char> buf(1);
-        int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
-        if (res < 0) {
-            LOG_ERR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", __func__);
-            sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
+    llama_client_slot* get_active_slot() {
+        for (llama_client_slot& slot : slots) {
+            // Check if the slot is currently processing
+            if (slot.is_processing()) {
+                return &slot;  // Return the active slot
+            }
         }
+        return nullptr;  // No active slot found
     }
 
     void initialize() {
@@ -568,12 +634,12 @@ struct llama_server_context
                     std::vector<llama_token> p;
                     if (first)
                     {
-                        p = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
+                        p = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
                         first = false;
                     }
                     else
                     {
-                        p = ::llama_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
+                        p = common_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
                     }
                     prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
                 }
@@ -590,7 +656,7 @@ struct llama_server_context
         else
         {
             auto s = json_prompt.template get<std::string>();
-            prompt_tokens = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
+            prompt_tokens = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
         }
 
         return prompt_tokens;
@@ -619,7 +685,7 @@ struct llama_server_context
 
     bool launch_slot_with_data(llama_client_slot* &slot, json data) {
         slot_params default_params;
-        gpt_sampler_params default_sparams;
+        common_params_sampling default_sparams;
  
         slot->params.stream             = json_value(data, "stream",            false);
         slot->params.cache_prompt       = json_value(data, "cache_prompt",      false);
@@ -627,7 +693,6 @@ struct llama_server_context
         slot->sparams.top_k             = json_value(data, "top_k",             default_sparams.top_k);
         slot->sparams.top_p             = json_value(data, "top_p",             default_sparams.top_p);
         slot->sparams.min_p             = json_value(data, "min_p",             default_sparams.min_p);
-        slot->sparams.tfs_z             = json_value(data, "tfs_z",             default_sparams.tfs_z);
         slot->sparams.typ_p             = json_value(data, "typical_p",         default_sparams.typ_p);
         slot->sparams.temp              = json_value(data, "temperature",       default_sparams.temp);
         slot->sparams.dynatemp_range    = json_value(data, "dynatemp_range",    default_sparams.dynatemp_range);
@@ -639,12 +704,13 @@ struct llama_server_context
         slot->sparams.mirostat          = json_value(data, "mirostat",          default_sparams.mirostat);
         slot->sparams.mirostat_tau      = json_value(data, "mirostat_tau",      default_sparams.mirostat_tau);
         slot->sparams.mirostat_eta      = json_value(data, "mirostat_eta",      default_sparams.mirostat_eta);
-        slot->sparams.penalize_nl       = json_value(data, "penalize_nl",       default_sparams.penalize_nl);
         slot->params.n_keep             = json_value(data, "n_keep",            slot->params.n_keep);
         slot->sparams.seed               = json_value(data, "seed",              default_sparams.seed);
         slot->sparams.grammar           = json_value(data, "grammar",           default_sparams.grammar);
         slot->sparams.n_probs           = json_value(data, "n_probs",           default_sparams.n_probs);
         slot->sparams.min_keep          = json_value(data, "min_keep",          default_sparams.min_keep);
+        slot->sparams.grammar_trigger_words = grammar_trigger_words;
+        slot->sparams.grammar_lazy = grammar_lazy;
 
         if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
             // Might be better to reject the request with a 400 ?
@@ -684,8 +750,8 @@ struct llama_server_context
             slot->prompt = "";
         }
 
-        if (json_value(data, "ignore_eos", false)) {
-                slot->sparams.logit_bias.push_back({llama_token_eos(model), -INFINITY});
+        if (json_value(data, "ignore_eos", false) && has_eos_token) {
+                slot->sparams.logit_bias.push_back({llama_vocab_eos(vocab), -INFINITY});
         }
         /*
         slot->sparams.penalty_prompt_tokens.clear();
@@ -724,13 +790,13 @@ struct llama_server_context
             }
         }
       */
-
         slot->sparams.logit_bias.clear();
 
         const auto &logit_bias = data.find("logit_bias");
         if (logit_bias != data.end() && logit_bias->is_array())
         {
-            const int n_vocab = llama_n_vocab(model);
+            const llama_vocab * vocab = llama_model_get_vocab(model);
+            const int n_vocab = llama_vocab_n_tokens(vocab);
             for (const auto &el : *logit_bias)
             {
                 if (el.is_array() && el.size() == 2)
@@ -759,7 +825,7 @@ struct llama_server_context
                     }
                     else if (el[0].is_string())
                     {
-                        auto toks = llama_tokenize(model, el[0].get<std::string>(), false);
+                        auto toks = common_tokenize(vocab, el[0].get<std::string>(), false);
                         for (auto tok : toks)
                         {
                             slot->sparams.logit_bias.push_back({tok, bias});
@@ -791,7 +857,7 @@ struct llama_server_context
                         sampler_names.emplace_back(name);
                     }
                 }
-                slot->sparams.samplers = gpt_sampler_types_from_names(sampler_names, false);
+                slot->sparams.samplers = common_sampler_types_from_names(sampler_names, false);
         }
         else
         {
@@ -875,9 +941,9 @@ struct llama_server_context
 
         if (slot->ctx_sampling != nullptr)
         {
-            gpt_sampler_free(slot->ctx_sampling);
+            common_sampler_free(slot->ctx_sampling);
         }
-        slot->ctx_sampling = gpt_sampler_init(model, slot->sparams);
+        slot->ctx_sampling = common_sampler_init(model, slot->sparams);
         //llama_set_rng_seed(ctx, slot->params.seed);
         slot->command = LOAD_PROMPT;
 
@@ -904,13 +970,13 @@ struct llama_server_context
         system_tokens.clear();
 
         if (!system_prompt.empty()) {
-            system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token);
+            system_tokens = common_tokenize(ctx, system_prompt, add_bos_token);
 
-            llama_batch_clear(batch);
+            common_batch_clear(batch);
 
             for (int i = 0; i < (int)system_tokens.size(); ++i)
             {
-                llama_batch_add(batch, system_tokens[i], i, { 0 }, false);
+                common_batch_add(batch, system_tokens[i], i, { 0 }, false);
             }
 
             for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += params.n_batch)
@@ -924,7 +990,6 @@ struct llama_server_context
                     batch.n_seq_id + i,
                     batch.seq_id   + i,
                     batch.logits   + i,
-                    0, 0, 0, // unused
                 };
                 if (llama_decode(ctx, batch_view) != 0)
                 {
@@ -999,7 +1064,7 @@ struct llama_server_context
 
     bool process_token(completion_token_output &result, llama_client_slot &slot) {
         // remember which tokens were sampled - used for repetition penalties during sampling
-        const std::string token_str = llama_token_to_piece(ctx, result.tok);
+        const std::string token_str = common_token_to_piece(ctx, result.tok);
         slot.sampled = result.tok;
 
         // search stop word and delete it
@@ -1090,7 +1155,7 @@ struct llama_server_context
             slot.has_next_token = false;
         }
 
-        if (result.tok == llama_token_eos(model))
+        if (result.tok == llama_vocab_eos(vocab) || llama_vocab_is_eog(vocab, result.tok))
         {
             slot.stopped_eos = true;
             slot.has_next_token = false;
@@ -1150,7 +1215,7 @@ struct llama_server_context
         samplers.reserve(slot.sparams.samplers.size());
         for (const auto & sampler : slot.sparams.samplers)
         {
-            samplers.emplace_back(gpt_sampler_type_to_str(sampler));
+            samplers.emplace_back(common_sampler_type_to_str(sampler));
         }
 
         return json {
@@ -1164,7 +1229,6 @@ struct llama_server_context
             {"top_k",             slot.sparams.top_k},
             {"top_p",             slot.sparams.top_p},
             {"min_p",             slot.sparams.min_p},
-            {"tfs_z",             slot.sparams.tfs_z},
             {"typical_p",         slot.sparams.typ_p},
             {"repeat_last_n",     slot.sparams.penalty_last_n},
             {"repeat_penalty",    slot.sparams.penalty_repeat},
@@ -1173,13 +1237,12 @@ struct llama_server_context
             {"mirostat",          slot.sparams.mirostat},
             {"mirostat_tau",      slot.sparams.mirostat_tau},
             {"mirostat_eta",      slot.sparams.mirostat_eta},
-            {"penalize_nl",       slot.sparams.penalize_nl},
             {"stop",              slot.params.antiprompt},
             {"n_predict",         slot.params.n_predict},
             {"n_keep",            params.n_keep},
             {"ignore_eos",        slot.sparams.ignore_eos},
             {"stream",            slot.params.stream},
-      //      {"logit_bias",        slot.sparams.logit_bias},
+             //      {"logit_bias",        slot.sparams.logit_bias},
             {"n_probs",           slot.sparams.n_probs},
             {"min_keep",          slot.sparams.min_keep},
             {"grammar",           slot.sparams.grammar},
@@ -1206,7 +1269,7 @@ struct llama_server_context
         if (slot.sparams.n_probs > 0)
         {
             std::vector<completion_token_output> probs_output = {};
-            const std::vector<llama_token> to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false);
+            const std::vector<llama_token> to_send_toks = common_tokenize(ctx, tkn.text_to_send, false);
             size_t probs_pos      = std::min(slot.sent_token_probs_index,                       slot.generated_token_probs.size());
             size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size());
             if (probs_pos < probs_stop_pos)
@@ -1258,7 +1321,7 @@ struct llama_server_context
             std::vector<completion_token_output> probs = {};
             if (!slot.params.stream && slot.stopped_word)
             {
-                const std::vector<llama_token> stop_word_toks = llama_tokenize(ctx, slot.stopping_word, false);
+                const std::vector<llama_token> stop_word_toks = common_tokenize(ctx, slot.stopping_word, false);
                 probs = std::vector<completion_token_output>(slot.generated_token_probs.begin(), slot.generated_token_probs.end() - stop_word_toks.size());
             }
             else
@@ -1287,7 +1350,7 @@ struct llama_server_context
         res.error = false;
         res.stop = true;
 
-        const int n_embd = llama_n_embd(model);
+        const int n_embd = llama_model_n_embd(model);
         if (!params.embedding)
         {
             LOG_WARNING("embedding disabled", {
@@ -1369,7 +1432,6 @@ struct llama_server_context
                     batch.n_seq_id + i,
                     batch.seq_id   + i,
                     batch.logits   + i,
-                    0, 0, 0, // unused
                 };
                 if (llama_decode(ctx, batch_view))
                 {
@@ -1387,9 +1449,10 @@ struct llama_server_context
                     n_eval = n_batch;
                 }
 
-                const int n_embd = llama_n_embd(model);
-                llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
-                if (llama_decode(ctx, batch_img))
+                const int n_embd = llama_model_n_embd(model);
+                float * embd = img.image_embedding + i * n_embd;
+                llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0);
+                if (llama_decode(ctx, llava_batch.batch))
                 {
                     LOG("%s : failed to eval image\n", __func__);
                     return false;
@@ -1398,7 +1461,7 @@ struct llama_server_context
             }
             image_idx++;
 
-            llama_batch_clear(batch);
+            common_batch_clear(batch);
 
             // append prefix of next image
             const auto json_prompt = (image_idx >= (int) slot.images.size()) ?
@@ -1408,7 +1471,7 @@ struct llama_server_context
             std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image
             for (int i = 0; i < (int) append_tokens.size(); ++i)
             {
-                llama_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true);
+                common_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true);
                 slot.n_past += 1;
             }
         }
@@ -1540,7 +1603,7 @@ struct llama_server_context
             update_system_prompt();
         }
 
-        llama_batch_clear(batch);
+        common_batch_clear(batch);
 
         if (all_slots_are_idle)
         {
@@ -1618,7 +1681,7 @@ struct llama_server_context
 
             // TODO: we always have to take into account the "system_tokens"
             //       this is not great and needs to be improved somehow
-            llama_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true);
+            common_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true);
             slot.n_past += 1;
         }
 
@@ -1667,11 +1730,11 @@ struct llama_server_context
                             suffix_tokens.erase(suffix_tokens.begin());
                         }
 
-                        prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(model));
-                        prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(model)); // always add BOS
-                        prefix_tokens.insert(prefix_tokens.end(),   llama_token_suffix(model));
+                        prefix_tokens.insert(prefix_tokens.begin(), llama_vocab_fim_pre(vocab));
+                        prefix_tokens.insert(prefix_tokens.begin(), llama_vocab_bos(vocab)); // always add BOS
+                        prefix_tokens.insert(prefix_tokens.end(),   llama_vocab_fim_suf(vocab));
                         prefix_tokens.insert(prefix_tokens.end(),   suffix_tokens.begin(), suffix_tokens.end());
-                        prefix_tokens.push_back(llama_token_middle(model));
+                        prefix_tokens.push_back(llama_vocab_fim_mid(vocab));
                         prompt_tokens = prefix_tokens;
                     }
                     else
@@ -1712,7 +1775,7 @@ struct llama_server_context
 
                     if (!slot.params.cache_prompt)
                     {
-                        gpt_sampler_reset(slot.ctx_sampling);
+                        common_sampler_reset(slot.ctx_sampling);
 
                         slot.n_past = 0;
                         slot.n_past_se = 0;
@@ -1724,7 +1787,7 @@ struct llama_server_context
                         // push the prompt into the sampling context (do not apply grammar)
                         for (auto &token : prompt_tokens)
                         {
-                            gpt_sampler_accept(slot.ctx_sampling, token, false);
+                            common_sampler_accept(slot.ctx_sampling, token, false);
                         }
 
                         slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
@@ -1816,7 +1879,7 @@ struct llama_server_context
                                 ga_i += ga_w/ga_n;
                             }
                         }
-                        llama_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, {slot.id }, false);
+                        common_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, {slot.id }, false);
                         slot_npast++;
                     }
 
@@ -1894,7 +1957,6 @@ struct llama_server_context
                 batch.n_seq_id + i,
                 batch.seq_id   + i,
                 batch.logits   + i,
-                0, 0, 0, // unused
             };
 
             const int ret = llama_decode(ctx, batch_view);
@@ -1933,9 +1995,9 @@ struct llama_server_context
                 }
 
                 completion_token_output result;
-                const llama_token id = gpt_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i);
+                const llama_token id = common_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i);
 
-                gpt_sampler_accept(slot.ctx_sampling, id, true);
+                common_sampler_accept(slot.ctx_sampling, id, true);
 
                 slot.n_decoded += 1;
                 if (slot.n_decoded == 1)
@@ -1946,7 +2008,7 @@ struct llama_server_context
                 }
 
                 result.tok = id;
-                const auto * cur_p = gpt_sampler_get_candidates(slot.ctx_sampling);
+                const auto * cur_p = common_sampler_get_candidates(slot.ctx_sampling);
 
                 for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) {
                     result.probs.push_back({
@@ -1999,7 +2061,7 @@ static json format_partial_response(
 struct token_translator
 {
     llama_context * ctx;
-    std::string operator()(llama_token tok)                    const { return llama_token_to_piece(ctx, tok); }
+    std::string operator()(llama_token tok)                    const { return common_token_to_piece(ctx, tok); }
     std::string operator()(const completion_token_output &cto) const { return (*this)(cto.tok); }
 };
 
@@ -2064,7 +2126,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
     //     slot->params.n_predict        = json_value(data, "n_predict",         default_params.n_predict);
     //     slot->sparams.top_k           = json_value(data, "top_k",             default_sparams.top_k);
     //     slot->sparams.top_p           = json_value(data, "top_p",             default_sparams.top_p);
-    //     slot->sparams.tfs_z           = json_value(data, "tfs_z",             default_sparams.tfs_z);
     //     slot->sparams.typical_p       = json_value(data, "typical_p",         default_sparams.typical_p);
     //     slot->sparams.temp            = json_value(data, "temperature",       default_sparams.temp);
     //     slot->sparams.penalty_last_n  = json_value(data, "repeat_last_n",     default_sparams.penalty_last_n);
@@ -2074,7 +2135,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
     //     slot->sparams.mirostat        = json_value(data, "mirostat",          default_sparams.mirostat);
     //     slot->sparams.mirostat_tau    = json_value(data, "mirostat_tau",      default_sparams.mirostat_tau);
     //     slot->sparams.mirostat_eta    = json_value(data, "mirostat_eta",      default_sparams.mirostat_eta);
-    //     slot->sparams.penalize_nl     = json_value(data, "penalize_nl",       default_sparams.penalize_nl);
     //     slot->params.n_keep           = json_value(data, "n_keep",            slot->params.n_keep);
     //     slot->params.seed             = json_value(data, "seed",              default_params.seed);
     //     slot->sparams.grammar         = json_value(data, "grammar",           default_sparams.grammar);
@@ -2088,7 +2148,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
     data["n_predict"] = predict->tokens() == 0 ? -1 : predict->tokens();
     data["top_k"] = predict->topk();
     data["top_p"] = predict->topp();
-    data["tfs_z"] = predict->tailfreesamplingz();
     data["typical_p"] = predict->typicalp();
     data["temperature"] = predict->temperature();
     data["repeat_last_n"] = predict->repeat();
@@ -2098,7 +2157,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
     data["mirostat"] = predict->mirostat();
     data["mirostat_tau"] = predict->mirostattau();
     data["mirostat_eta"] = predict->mirostateta();
-    data["penalize_nl"] = predict->penalizenl();
     data["n_keep"] = predict->nkeep();
     data["seed"] = predict->seed();
     data["grammar"] = predict->grammar();
@@ -2106,6 +2164,9 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
     data["ignore_eos"] = predict->ignoreeos();
     data["embeddings"] = predict->embeddings();
 
+    // Add the correlationid to json data
+    data["correlation_id"] = predict->correlationid();
+
     // for each image in the request, add the image data
     //
     for (int i = 0; i < predict->images_size(); i++) {
@@ -2132,7 +2193,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
 //     llama.params.n_predict = predict->tokens() == 0 ? -1 : predict->tokens();
 //     llama.params.sparams.top_k = predict->topk();
 //     llama.params.sparams.top_p = predict->topp();
-//     llama.params.sparams.tfs_z = predict->tailfreesamplingz();
 //     llama.params.sparams.typical_p = predict->typicalp();
 //     llama.params.sparams.penalty_last_n = predict->repeat();
 //     llama.params.sparams.temp = predict->temperature();
@@ -2142,7 +2202,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
 //     llama.params.sparams.mirostat = predict->mirostat();
 //     llama.params.sparams.mirostat_tau = predict->mirostattau();
 //     llama.params.sparams.mirostat_eta = predict->mirostateta();
-//     llama.params.sparams.penalize_nl = predict->penalizenl();
 //     llama.params.n_keep = predict->nkeep();
 //     llama.params.seed = predict->seed();
 //     llama.params.sparams.grammar = predict->grammar();
@@ -2189,8 +2248,37 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
 //     }
 // }
 
+const std::vector<ggml_type> kv_cache_types = {
+    GGML_TYPE_F32,
+    GGML_TYPE_F16,
+    GGML_TYPE_BF16,
+    GGML_TYPE_Q8_0,
+    GGML_TYPE_Q4_0,
+    GGML_TYPE_Q4_1,
+    GGML_TYPE_IQ4_NL,
+    GGML_TYPE_Q5_0,
+    GGML_TYPE_Q5_1,
+};
+
+static ggml_type kv_cache_type_from_str(const std::string & s) {
+    for (const auto & type : kv_cache_types) {
+        if (ggml_type_name(type) == s) {
+            return type;
+        }
+    }
+    throw std::runtime_error("Unsupported cache type: " + s);
+}
+
+static std::string get_all_kv_cache_types() {
+    std::ostringstream msg;
+    for (const auto & type : kv_cache_types) {
+        msg << ggml_type_name(type) << (&type == &kv_cache_types.back() ? "" : ", ");
+    }
+    return msg.str();
+}
+
 static void params_parse(const backend::ModelOptions* request,
-                                gpt_params & params) {
+                                common_params & params) {
    
     // this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809
 
@@ -2202,6 +2290,12 @@ static void params_parse(const backend::ModelOptions* request,
     }
     //  params.model_alias ??
     params.model_alias =  request->modelfile();
+    if (!request->cachetypekey().empty()) {
+        params.cache_type_k = kv_cache_type_from_str(request->cachetypekey());
+    }
+    if (!request->cachetypevalue().empty()) {
+        params.cache_type_v = kv_cache_type_from_str(request->cachetypevalue());
+    }
     params.n_ctx = request->contextsize();
     //params.memory_f16 = request->f16memory();
     params.cpuparams.n_threads = request->threads();
@@ -2219,7 +2313,7 @@ static void params_parse(const backend::ModelOptions* request,
 
     const char *llama_grpc_servers = std::getenv("LLAMACPP_GRPC_SERVERS");
     if (llama_grpc_servers != NULL) {
-        params.rpc_servers = std::string(llama_grpc_servers);
+        add_rpc_devices(std::string(llama_grpc_servers));
     }
     
     // TODO: Add yarn
@@ -2260,6 +2354,7 @@ static void params_parse(const backend::ModelOptions* request,
     params.use_mmap = request->mmap();
     params.flash_attn = request->flashattention();
     params.no_kv_offload = request->nokvoffload();
+    params.ctx_shift = false; // We control context-shifting in any case (and we disable it as it could just lead to infinite loops)
 
     params.embedding = request->embeddings();
 
@@ -2284,6 +2379,21 @@ static void params_parse(const backend::ModelOptions* request,
     if ( request->ropefreqscale() != 0.0f ) {
         params.rope_freq_scale = request->ropefreqscale();
     }
+
+    if (request->grammartriggers_size() > 0) {
+        LOG_INFO("configuring grammar triggers", {});
+        llama.grammar_lazy = true;
+        for (int i = 0; i < request->grammartriggers_size(); i++) {
+            common_grammar_trigger trigger;
+            trigger.word = request->grammartriggers(i).word();
+            trigger.at_start = request->grammartriggers(i).at_start();
+            llama.grammar_trigger_words.push_back(trigger);
+            LOG_INFO("grammar trigger", {
+                { "word", trigger.word },
+                { "at_start", trigger.at_start }
+            });
+        }
+    }
 }
 
 
@@ -2298,7 +2408,7 @@ public:
 
   grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) {
     // Implement LoadModel RPC
-    gpt_params params;
+    common_params params;
     params_parse(request, params);
 
     llama_backend_init();
@@ -2344,6 +2454,18 @@ public:
                 int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
                 reply.set_prompt_tokens(tokens_evaluated);
 
+                if (result.result_json.contains("timings")) {
+                    double timing_prompt_processing = result.result_json.at("timings").value("prompt_ms", 0.0);
+                    reply.set_timing_prompt_processing(timing_prompt_processing);
+                    double timing_token_generation = result.result_json.at("timings").value("predicted_ms", 0.0);
+                    reply.set_timing_token_generation(timing_token_generation);
+                }
+                
+                // Log Request Correlation Id
+                LOG_VERBOSE("correlation:", {
+                    { "id", data["correlation_id"] }
+                });
+
                 // Send the reply
                 writer->Write(reply);
 
@@ -2367,12 +2489,25 @@ public:
         std::string completion_text;
         task_result result = llama.queue_results.recv(task_id);
         if (!result.error && result.stop) {
+            
+            // Log Request Correlation Id
+            LOG_VERBOSE("correlation:", {
+                { "id", data["correlation_id"] }
+            });
+
             completion_text = result.result_json.value("content", "");
             int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
             int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
             reply->set_prompt_tokens(tokens_evaluated);
             reply->set_tokens(tokens_predicted);
             reply->set_message(completion_text);
+
+            if (result.result_json.contains("timings")) {
+                double timing_prompt_processing = result.result_json.at("timings").value("prompt_ms", 0.0);
+                reply->set_timing_prompt_processing(timing_prompt_processing);
+                double timing_token_generation = result.result_json.at("timings").value("predicted_ms", 0.0);
+                reply->set_timing_token_generation(timing_token_generation);
+            }
         }
         else
         {
@@ -2406,6 +2541,43 @@ public:
 
         return grpc::Status::OK;
     }
+
+    grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response){
+         json data = parse_options(false, request, llama);
+
+         std::vector<llama_token> tokens = llama.tokenize(data["prompt"],false);
+
+         for (int i=0 ; i< tokens.size(); i++){
+            response->add_tokens(tokens[i]);
+         }
+
+        return grpc::Status::OK;
+    }
+
+    grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
+        llama_client_slot* active_slot = llama.get_active_slot();
+
+        if (active_slot != nullptr) {
+            // Calculate the tokens per second using existing logic
+            double tokens_per_second = 1e3 / active_slot->t_token_generation * active_slot->n_decoded;
+
+            // Populate the response with metrics
+            response->set_slot_id(active_slot->id);
+            response->set_prompt_json_for_slot(active_slot->prompt.dump());
+            response->set_tokens_per_second(tokens_per_second);
+            response->set_tokens_generated(active_slot->n_decoded);
+            response->set_prompt_tokens_processed(active_slot->num_prompt_tokens_processed);
+        } else {
+            // Handle case when no active slot exists
+            response->set_slot_id(0);
+            response->set_prompt_json_for_slot("");
+            response->set_tokens_per_second(0);
+            response->set_tokens_generated(0);
+            response->set_prompt_tokens_processed(0);
+        }
+
+        return grpc::Status::OK;
+    } 
 };
 
 void RunServer(const std::string& server_address) {
diff --git a/backend/cpp/llama/patches/01-llava.patch b/backend/cpp/llama/patches/01-llava.patch
index fa122da2..77124628 100644
--- a/backend/cpp/llama/patches/01-llava.patch
+++ b/backend/cpp/llama/patches/01-llava.patch
@@ -1,13 +1,13 @@
 diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
-index 342042ff..224db9b5 100644
+index 3cd0d2fa..6c5e811a 100644
 --- a/examples/llava/clip.cpp
 +++ b/examples/llava/clip.cpp
-@@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
-             struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
-             int* patches_data = (int*)malloc(ggml_nbytes(patches));
-             for (int i = 0; i < num_patches; i++) {
--                patches_data[i] = i + 1;
-+                patches_data[i] = i;
-             }
-             ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
-             free(patches_data);
\ No newline at end of file
+@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
+                 struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
+                 int* patches_data = (int*)malloc(ggml_nbytes(patches));
+                 for (int i = 0; i < num_patches; i++) {
+-                    patches_data[i] = i + 1;
++                    patches_data[i] = i;
+                 }
+                 ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
+                 free(patches_data);
\ No newline at end of file
diff --git a/backend/go/bark/Makefile b/backend/go/bark/Makefile
new file mode 100644
index 00000000..e8902615
--- /dev/null
+++ b/backend/go/bark/Makefile
@@ -0,0 +1,25 @@
+INCLUDE_PATH := $(abspath ./)
+LIBRARY_PATH := $(abspath ./)
+
+AR?=ar
+
+BUILD_TYPE?=
+# keep standard at C11 and C++11
+CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../sources/bark.cpp/examples -I$(INCLUDE_PATH)/../../../sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/../../../sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC
+LDFLAGS  = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/../../../sources/bark.cpp/build/examples -lbark -lstdc++ -lm
+
+# warnings
+CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
+
+gobark.o:
+	$(CXX) $(CXXFLAGS) gobark.cpp -o gobark.o -c $(LDFLAGS)
+
+libbark.a: gobark.o
+	cp $(INCLUDE_PATH)/../../../sources/bark.cpp/build/libbark.a ./
+	$(AR) rcs libbark.a gobark.o
+	$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml.c.o
+	$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-alloc.c.o
+	$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-backend.c.o
+
+clean:
+	rm -f gobark.o libbark.a
\ No newline at end of file
diff --git a/backend/go/bark/gobark.cpp b/backend/go/bark/gobark.cpp
new file mode 100644
index 00000000..b5f414b8
--- /dev/null
+++ b/backend/go/bark/gobark.cpp
@@ -0,0 +1,85 @@
+#include <iostream>
+#include <tuple>
+
+#include "bark.h"
+#include "gobark.h"
+#include "common.h"
+#include "ggml.h"
+
+struct bark_context *c;
+
+void bark_print_progress_callback(struct bark_context *bctx, enum bark_encoding_step step, int progress, void *user_data) {
+    if (step == bark_encoding_step::SEMANTIC) {
+        printf("\rGenerating semantic tokens... %d%%", progress);
+    } else if (step == bark_encoding_step::COARSE) {
+        printf("\rGenerating coarse tokens... %d%%", progress);
+    } else if (step == bark_encoding_step::FINE) {
+        printf("\rGenerating fine tokens... %d%%", progress);
+    }
+    fflush(stdout);
+}
+
+int load_model(char *model) {
+    // initialize bark context
+    struct bark_context_params ctx_params = bark_context_default_params();
+    bark_params params;
+
+    params.model_path = model;
+
+   // ctx_params.verbosity = verbosity;
+    ctx_params.progress_callback = bark_print_progress_callback;
+    ctx_params.progress_callback_user_data = nullptr;
+
+    struct bark_context *bctx = bark_load_model(params.model_path.c_str(), ctx_params, params.seed);
+    if (!bctx) {
+        fprintf(stderr, "%s: Could not load model\n", __func__);
+        return 1;
+    }
+
+    c = bctx;
+
+    return 0;
+}
+
+int tts(char *text,int  threads, char *dst ) {
+
+    ggml_time_init();
+    const int64_t t_main_start_us = ggml_time_us();
+
+    // generate audio
+    if (!bark_generate_audio(c, text, threads)) {
+        fprintf(stderr, "%s: An error occured. If the problem persists, feel free to open an issue to report it.\n", __func__);
+        return 1;
+    }
+
+    const float *audio_data = bark_get_audio_data(c);
+    if (audio_data == NULL) {
+        fprintf(stderr, "%s: Could not get audio data\n", __func__);
+        return 1;
+    }
+
+    const int audio_arr_size = bark_get_audio_data_size(c);
+
+    std::vector<float> audio_arr(audio_data, audio_data + audio_arr_size);
+
+    write_wav_on_disk(audio_arr, dst);
+
+    // report timing
+    {
+        const int64_t t_main_end_us = ggml_time_us();
+        const int64_t t_load_us = bark_get_load_time(c);
+        const int64_t t_eval_us = bark_get_eval_time(c);
+
+        printf("\n\n");
+        printf("%s:     load time = %8.2f ms\n", __func__, t_load_us / 1000.0f);
+        printf("%s:     eval time = %8.2f ms\n", __func__, t_eval_us / 1000.0f);
+        printf("%s:    total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us) / 1000.0f);
+    }
+    
+    return 0;
+}
+
+int unload() {
+    bark_free(c);
+}
+
diff --git a/backend/go/bark/gobark.go b/backend/go/bark/gobark.go
new file mode 100644
index 00000000..133a4a39
--- /dev/null
+++ b/backend/go/bark/gobark.go
@@ -0,0 +1,52 @@
+package main
+
+// #cgo CXXFLAGS: -I${SRCDIR}/../../../sources/bark.cpp/ -I${SRCDIR}/../../../sources/bark.cpp/encodec.cpp -I${SRCDIR}/../../../sources/bark.cpp/examples -I${SRCDIR}/../../../sources/bark.cpp/spm-headers
+// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../sources/bark.cpp/build/examples -L${SRCDIR}/../../../sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon
+// #include <gobark.h>
+// #include <stdlib.h>
+import "C"
+
+import (
+	"fmt"
+	"unsafe"
+
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
+)
+
+type Bark struct {
+	base.SingleThread
+	threads int
+}
+
+func (sd *Bark) Load(opts *pb.ModelOptions) error {
+
+	sd.threads = int(opts.Threads)
+
+	modelFile := C.CString(opts.ModelFile)
+	defer C.free(unsafe.Pointer(modelFile))
+
+	ret := C.load_model(modelFile)
+	if ret != 0 {
+		return fmt.Errorf("inference failed")
+	}
+
+	return nil
+}
+
+func (sd *Bark) TTS(opts *pb.TTSRequest) error {
+	t := C.CString(opts.Text)
+	defer C.free(unsafe.Pointer(t))
+
+	dst := C.CString(opts.Dst)
+	defer C.free(unsafe.Pointer(dst))
+
+	threads := C.int(sd.threads)
+
+	ret := C.tts(t, threads, dst)
+	if ret != 0 {
+		return fmt.Errorf("inference failed")
+	}
+
+	return nil
+}
diff --git a/backend/go/bark/gobark.h b/backend/go/bark/gobark.h
new file mode 100644
index 00000000..06fb965d
--- /dev/null
+++ b/backend/go/bark/gobark.h
@@ -0,0 +1,8 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+int load_model(char *model);
+int tts(char *text,int  threads, char *dst );
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/backend/go/image/stablediffusion/main.go b/backend/go/bark/main.go
similarity index 83%
rename from backend/go/image/stablediffusion/main.go
rename to backend/go/bark/main.go
index ae259fa7..840a687d 100644
--- a/backend/go/image/stablediffusion/main.go
+++ b/backend/go/bark/main.go
@@ -1,7 +1,6 @@
 package main
 
 // Note: this is started internally by LocalAI and a server is allocated for each model
-
 import (
 	"flag"
 
@@ -15,7 +14,7 @@ var (
 func main() {
 	flag.Parse()
 
-	if err := grpc.StartServer(*addr, &Image{}); err != nil {
+	if err := grpc.StartServer(*addr, &Bark{}); err != nil {
 		panic(err)
 	}
 }
diff --git a/backend/go/image/stablediffusion-ggml/Makefile b/backend/go/image/stablediffusion-ggml/Makefile
new file mode 100644
index 00000000..f92c3a77
--- /dev/null
+++ b/backend/go/image/stablediffusion-ggml/Makefile
@@ -0,0 +1,96 @@
+INCLUDE_PATH := $(abspath ./)
+LIBRARY_PATH := $(abspath ./)
+
+AR?=ar
+CMAKE_ARGS?=
+BUILD_TYPE?=
+ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
+# keep standard at C11 and C++11
+CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
+
+# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
+CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
+
+# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
+ifeq ($(BUILD_TYPE),cublas)
+	CMAKE_ARGS+=-DGGML_CUDA=ON
+# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
+# to CMAKE_ARGS automatically
+else ifeq ($(BUILD_TYPE),openblas)
+	CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
+# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
+else ifeq ($(BUILD_TYPE),clblas)
+	CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
+# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ 
+else ifeq ($(BUILD_TYPE),hipblas)
+	CMAKE_ARGS+=-DGGML_HIP=ON
+# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
+# But if it's OSX without metal, disable it here
+else ifeq ($(OS),Darwin)
+	ifneq ($(BUILD_TYPE),metal)
+		CMAKE_ARGS+=-DGGML_METAL=OFF
+	else
+		CMAKE_ARGS+=-DGGML_METAL=ON
+		CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
+		TARGET+=--target ggml-metal
+	endif
+endif
+
+# ifeq ($(BUILD_TYPE),sycl_f16)
+# 	CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DSD_SYCL=ON -DGGML_SYCL_F16=ON
+# endif
+
+# ifeq ($(BUILD_TYPE),sycl_f32)
+# 	CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DSD_SYCL=ON
+# endif
+
+# warnings
+CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
+
+# Find all .a archives in ARCHIVE_DIR
+# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive)
+GGML_ARCHIVE_DIR := build/ggml/src/
+ALL_ARCHIVES := $(shell find $(GGML_ARCHIVE_DIR) -type f -name '*.a')
+
+# Name of the single merged library
+COMBINED_LIB := libggmlall.a
+
+# Rule to merge all the .a files into one
+$(COMBINED_LIB): $(ALL_ARCHIVES)
+	@echo "Merging all .a into $(COMBINED_LIB)"
+	rm -f $@
+	mkdir -p merge-tmp
+	for a in $(ALL_ARCHIVES); do \
+		( cd merge-tmp && ar x ../$$a ); \
+	done
+	( cd merge-tmp && ar rcs ../$@ *.o )
+	# Ensure we have a proper index
+	ranlib $@
+	# Clean up
+	rm -rf merge-tmp
+
+build/libstable-diffusion.a:
+	@echo "Building SD with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
+ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+	+bash -c "source $(ONEAPI_VARS); \
+	mkdir -p build && \
+	cd build && \
+	cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
+	cmake --build . --config Release"
+else
+	mkdir -p build && \
+	cd build && \
+	cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
+	cmake --build . --config Release
+endif
+	$(MAKE) $(COMBINED_LIB)
+
+gosd.o:
+	$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
+
+libsd.a: gosd.o
+	cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a
+	$(AR) rcs libsd.a gosd.o
+
+clean:
+	rm -rf gosd.o libsd.a build $(COMBINED_LIB)
\ No newline at end of file
diff --git a/backend/go/image/stablediffusion-ggml/gosd.cpp b/backend/go/image/stablediffusion-ggml/gosd.cpp
new file mode 100644
index 00000000..8653aa1e
--- /dev/null
+++ b/backend/go/image/stablediffusion-ggml/gosd.cpp
@@ -0,0 +1,228 @@
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <iostream>
+#include <random>
+#include <string>
+#include <vector>
+#include "gosd.h"
+
+// #include "preprocessing.hpp"
+#include "flux.hpp"
+#include "stable-diffusion.h"
+
+#define STB_IMAGE_IMPLEMENTATION
+#define STB_IMAGE_STATIC
+#include "stb_image.h"
+
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#define STB_IMAGE_WRITE_STATIC
+#include "stb_image_write.h"
+
+#define STB_IMAGE_RESIZE_IMPLEMENTATION
+#define STB_IMAGE_RESIZE_STATIC
+#include "stb_image_resize.h"
+
+// Names of the sampler method, same order as enum sample_method in stable-diffusion.h
+const char* sample_method_str[] = {
+    "euler_a",
+    "euler",
+    "heun",
+    "dpm2",
+    "dpm++2s_a",
+    "dpm++2m",
+    "dpm++2mv2",
+    "ipndm",
+    "ipndm_v",
+    "lcm",
+};
+
+// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
+const char* schedule_str[] = {
+    "default",
+    "discrete",
+    "karras",
+    "exponential",
+    "ays",
+    "gits",
+};
+
+sd_ctx_t* sd_c;
+
+sample_method_t sample_method;
+
+int load_model(char *model, char* options[], int threads, int diff) {
+    fprintf (stderr, "Loading model!\n");
+
+    char *stableDiffusionModel = "";
+    if (diff == 1 ) {
+        stableDiffusionModel = model;
+        model = "";
+    }
+
+    // decode options. Options are in form optname:optvale, or if booleans only optname.
+    char *clip_l_path  = "";
+    char *clip_g_path  = "";
+    char *t5xxl_path  = "";
+    char *vae_path  = "";
+    char *scheduler = "";
+    char *sampler = "";
+
+    // If options is not NULL, parse options
+    for (int i = 0; options[i] != NULL; i++) {
+        char *optname = strtok(options[i], ":");
+        char *optval = strtok(NULL, ":");
+        if (optval == NULL) {
+            optval = "true";
+        }
+
+        if (!strcmp(optname, "clip_l_path")) {
+            clip_l_path = optval;
+        }
+        if (!strcmp(optname, "clip_g_path")) {
+            clip_g_path = optval;
+        }
+        if (!strcmp(optname, "t5xxl_path")) {
+            t5xxl_path = optval;
+        }
+        if (!strcmp(optname, "vae_path")) {
+            vae_path = optval;
+        }
+        if (!strcmp(optname, "scheduler")) {
+            scheduler = optval;
+        }
+        if (!strcmp(optname, "sampler")) {
+            sampler = optval;
+        }
+    }
+
+    int sample_method_found = -1;
+    for (int m = 0; m < N_SAMPLE_METHODS; m++) {
+        if (!strcmp(sampler, sample_method_str[m])) {
+            sample_method_found = m;
+        }
+    }
+    if (sample_method_found == -1) {
+        fprintf(stderr, "Invalid sample method, default to EULER_A!\n");
+        sample_method_found = EULER_A;
+    }
+    sample_method = (sample_method_t)sample_method_found;
+
+    int schedule_found            = -1;
+    for (int d = 0; d < N_SCHEDULES; d++) {
+        if (!strcmp(scheduler, schedule_str[d])) {
+            schedule_found = d;
+                fprintf (stderr, "Found scheduler: %s\n", scheduler);
+
+        }
+    }
+
+    if (schedule_found == -1) {
+        fprintf (stderr, "Invalid scheduler! using DEFAULT\n");
+        schedule_found = DEFAULT;
+    }
+
+    schedule_t schedule = (schedule_t)schedule_found;
+    
+    fprintf (stderr, "Creating context\n");
+    sd_ctx_t* sd_ctx = new_sd_ctx(model,
+                                  clip_l_path,
+                                  clip_g_path,
+                                  t5xxl_path,
+                                  stableDiffusionModel,
+                                  vae_path,
+                                  "",
+                                  "",
+                                  "",
+                                  "",
+                                  "",
+                                  false,
+                                  false,
+                                  false,
+                                  threads,
+                                  SD_TYPE_COUNT,
+                                  STD_DEFAULT_RNG,
+                                  schedule,
+                                  false,
+                                  false,
+                                  false,
+                                  false);
+
+    if (sd_ctx == NULL) {
+        fprintf (stderr, "failed loading model (generic error)\n");
+        return 1;
+    }
+    fprintf (stderr, "Created context: OK\n");
+
+    sd_c = sd_ctx;
+
+    return 0;
+}
+
+int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed , char *dst, float cfg_scale) {
+
+    sd_image_t* results;
+
+    std::vector<int> skip_layers = {7, 8, 9};
+
+    fprintf (stderr, "Generating image\n");
+
+    results = txt2img(sd_c,
+                            text,
+                            negativeText,
+                            -1, //clip_skip
+                            cfg_scale, // sfg_scale
+                            3.5f,
+                            width,
+                            height,
+                            sample_method, 
+                            steps,
+                            seed,
+                            1,
+                            NULL,
+                            0.9f,
+                            20.f,
+                            false,
+                            "",
+                            skip_layers.data(),
+                            skip_layers.size(),
+                            0,
+                            0.01,
+                            0.2);
+
+    if (results == NULL) {
+        fprintf (stderr, "NO results\n");
+        return 1;
+    }
+
+    if (results[0].data == NULL) {
+        fprintf (stderr, "Results with no data\n");
+        return 1;
+    }
+
+    fprintf (stderr, "Writing PNG\n");
+
+    fprintf (stderr, "DST: %s\n", dst);
+    fprintf (stderr, "Width: %d\n", results[0].width);
+    fprintf (stderr, "Height: %d\n", results[0].height);
+    fprintf (stderr, "Channel: %d\n", results[0].channel);
+    fprintf (stderr, "Data: %p\n", results[0].data);
+
+    stbi_write_png(dst, results[0].width, results[0].height, results[0].channel,
+                       results[0].data, 0, NULL);
+    fprintf (stderr, "Saved resulting image to '%s'\n", dst);
+
+    // TODO: free results. Why does it crash?
+
+    free(results[0].data);
+    results[0].data = NULL;
+    free(results);
+    fprintf (stderr, "gen_image is done", dst);
+
+    return 0;
+}
+
+int unload() {
+    free_sd_ctx(sd_c);
+}
+
diff --git a/backend/go/image/stablediffusion-ggml/gosd.go b/backend/go/image/stablediffusion-ggml/gosd.go
new file mode 100644
index 00000000..8c3bdb90
--- /dev/null
+++ b/backend/go/image/stablediffusion-ggml/gosd.go
@@ -0,0 +1,96 @@
+package main
+
+// #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include
+// #cgo LDFLAGS: -L${SRCDIR}/ -lsd -lstdc++ -lm -lggmlall -lgomp
+// #include <gosd.h>
+// #include <stdlib.h>
+import "C"
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"unsafe"
+
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/utils"
+)
+
+type SDGGML struct {
+	base.SingleThread
+	threads      int
+	sampleMethod string
+	cfgScale     float32
+}
+
+func (sd *SDGGML) Load(opts *pb.ModelOptions) error {
+
+	sd.threads = int(opts.Threads)
+
+	modelFile := C.CString(opts.ModelFile)
+	defer C.free(unsafe.Pointer(modelFile))
+
+	var options **C.char
+	// prepare the options array to pass to C
+
+	size := C.size_t(unsafe.Sizeof((*C.char)(nil)))
+	length := C.size_t(len(opts.Options))
+	options = (**C.char)(C.malloc(length * size))
+	view := (*[1 << 30]*C.char)(unsafe.Pointer(options))[0:len(opts.Options):len(opts.Options)]
+
+	var diffusionModel int
+
+	var oo []string
+	for _, op := range opts.Options {
+		if op == "diffusion_model" {
+			diffusionModel = 1
+			continue
+		}
+
+		// If it's an option path, we resolve absolute path from the model path
+		if strings.Contains(op, ":") && strings.Contains(op, "path") {
+			data := strings.Split(op, ":")
+			data[1] = filepath.Join(opts.ModelPath, data[1])
+			if err := utils.VerifyPath(data[1], opts.ModelPath); err == nil {
+				oo = append(oo, strings.Join(data, ":"))
+			}
+		} else {
+			oo = append(oo, op)
+		}
+	}
+
+	fmt.Fprintf(os.Stderr, "Options: %+v\n", oo)
+
+	for i, x := range oo {
+		view[i] = C.CString(x)
+	}
+
+	sd.cfgScale = opts.CFGScale
+
+	ret := C.load_model(modelFile, options, C.int(opts.Threads), C.int(diffusionModel))
+	if ret != 0 {
+		return fmt.Errorf("could not load model")
+	}
+
+	return nil
+}
+
+func (sd *SDGGML) GenerateImage(opts *pb.GenerateImageRequest) error {
+	t := C.CString(opts.PositivePrompt)
+	defer C.free(unsafe.Pointer(t))
+
+	dst := C.CString(opts.Dst)
+	defer C.free(unsafe.Pointer(dst))
+
+	negative := C.CString(opts.NegativePrompt)
+	defer C.free(unsafe.Pointer(negative))
+
+	ret := C.gen_image(t, negative, C.int(opts.Width), C.int(opts.Height), C.int(opts.Step), C.int(opts.Seed), dst, C.float(sd.cfgScale))
+	if ret != 0 {
+		return fmt.Errorf("inference failed")
+	}
+
+	return nil
+}
diff --git a/backend/go/image/stablediffusion-ggml/gosd.h b/backend/go/image/stablediffusion-ggml/gosd.h
new file mode 100644
index 00000000..5297e871
--- /dev/null
+++ b/backend/go/image/stablediffusion-ggml/gosd.h
@@ -0,0 +1,8 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+int load_model(char *model, char* options[], int threads, int diffusionModel);
+int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed, char *dst, float cfg_scale);
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/backend/go/image/tinydream/main.go b/backend/go/image/stablediffusion-ggml/main.go
similarity index 83%
rename from backend/go/image/tinydream/main.go
rename to backend/go/image/stablediffusion-ggml/main.go
index ae259fa7..acee74fa 100644
--- a/backend/go/image/tinydream/main.go
+++ b/backend/go/image/stablediffusion-ggml/main.go
@@ -1,7 +1,6 @@
 package main
 
 // Note: this is started internally by LocalAI and a server is allocated for each model
-
 import (
 	"flag"
 
@@ -15,7 +14,7 @@ var (
 func main() {
 	flag.Parse()
 
-	if err := grpc.StartServer(*addr, &Image{}); err != nil {
+	if err := grpc.StartServer(*addr, &SDGGML{}); err != nil {
 		panic(err)
 	}
 }
diff --git a/backend/go/image/stablediffusion/stablediffusion.go b/backend/go/image/stablediffusion/stablediffusion.go
deleted file mode 100644
index 1733bf99..00000000
--- a/backend/go/image/stablediffusion/stablediffusion.go
+++ /dev/null
@@ -1,33 +0,0 @@
-package main
-
-// This is a wrapper to statisfy the GRPC service interface
-// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
-import (
-	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-	"github.com/mudler/LocalAI/pkg/stablediffusion"
-)
-
-type Image struct {
-	base.SingleThread
-	stablediffusion *stablediffusion.StableDiffusion
-}
-
-func (image *Image) Load(opts *pb.ModelOptions) error {
-	var err error
-	// Note: the Model here is a path to a directory containing the model files
-	image.stablediffusion, err = stablediffusion.New(opts.ModelFile)
-	return err
-}
-
-func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
-	return image.stablediffusion.GenerateImage(
-		int(opts.Height),
-		int(opts.Width),
-		int(opts.Mode),
-		int(opts.Step),
-		int(opts.Seed),
-		opts.PositivePrompt,
-		opts.NegativePrompt,
-		opts.Dst)
-}
diff --git a/backend/go/image/tinydream/tinydream.go b/backend/go/image/tinydream/tinydream.go
deleted file mode 100644
index ad364c47..00000000
--- a/backend/go/image/tinydream/tinydream.go
+++ /dev/null
@@ -1,32 +0,0 @@
-package main
-
-// This is a wrapper to statisfy the GRPC service interface
-// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
-import (
-	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-	"github.com/mudler/LocalAI/pkg/tinydream"
-)
-
-type Image struct {
-	base.SingleThread
-	tinydream *tinydream.TinyDream
-}
-
-func (image *Image) Load(opts *pb.ModelOptions) error {
-	var err error
-	// Note: the Model here is a path to a directory containing the model files
-	image.tinydream, err = tinydream.New(opts.ModelFile)
-	return err
-}
-
-func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
-	return image.tinydream.GenerateImage(
-		int(opts.Height),
-		int(opts.Width),
-		int(opts.Step),
-		int(opts.Seed),
-		opts.PositivePrompt,
-		opts.NegativePrompt,
-		opts.Dst)
-}
diff --git a/backend/go/llm/bert/bert.go b/backend/go/llm/bert/bert.go
deleted file mode 100644
index a6a1d1c5..00000000
--- a/backend/go/llm/bert/bert.go
+++ /dev/null
@@ -1,34 +0,0 @@
-package main
-
-// This is a wrapper to statisfy the GRPC service interface
-// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
-import (
-	bert "github.com/go-skynet/go-bert.cpp"
-
-	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-)
-
-type Embeddings struct {
-	base.SingleThread
-	bert *bert.Bert
-}
-
-func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
-	model, err := bert.New(opts.ModelFile)
-	llm.bert = model
-	return err
-}
-
-func (llm *Embeddings) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
-
-	if len(opts.EmbeddingTokens) > 0 {
-		tokens := []int{}
-		for _, t := range opts.EmbeddingTokens {
-			tokens = append(tokens, int(t))
-		}
-		return llm.bert.TokenEmbeddings(tokens, bert.SetThreads(int(opts.Threads)))
-	}
-
-	return llm.bert.Embeddings(opts.Embeddings, bert.SetThreads(int(opts.Threads)))
-}
diff --git a/backend/go/llm/bert/main.go b/backend/go/llm/bert/main.go
deleted file mode 100644
index 3a022f40..00000000
--- a/backend/go/llm/bert/main.go
+++ /dev/null
@@ -1,21 +0,0 @@
-package main
-
-// Note: this is started internally by LocalAI and a server is allocated for each model
-
-import (
-	"flag"
-
-	grpc "github.com/mudler/LocalAI/pkg/grpc"
-)
-
-var (
-	addr = flag.String("addr", "localhost:50051", "the address to connect to")
-)
-
-func main() {
-	flag.Parse()
-
-	if err := grpc.StartServer(*addr, &Embeddings{}); err != nil {
-		panic(err)
-	}
-}
diff --git a/backend/go/llm/rwkv/rwkv.go b/backend/go/llm/rwkv/rwkv.go
deleted file mode 100644
index fe9cd815..00000000
--- a/backend/go/llm/rwkv/rwkv.go
+++ /dev/null
@@ -1,95 +0,0 @@
-package main
-
-// This is a wrapper to statisfy the GRPC service interface
-// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
-import (
-	"fmt"
-	"path/filepath"
-
-	"github.com/donomii/go-rwkv.cpp"
-	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-)
-
-const tokenizerSuffix = ".tokenizer.json"
-
-type LLM struct {
-	base.SingleThread
-
-	rwkv *rwkv.RwkvState
-}
-
-func (llm *LLM) Load(opts *pb.ModelOptions) error {
-	tokenizerFile := opts.Tokenizer
-	if tokenizerFile == "" {
-		modelFile := filepath.Base(opts.ModelFile)
-		tokenizerFile = modelFile + tokenizerSuffix
-	}
-	modelPath := filepath.Dir(opts.ModelFile)
-	tokenizerPath := filepath.Join(modelPath, tokenizerFile)
-
-	model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
-
-	if model == nil {
-		return fmt.Errorf("rwkv could not load model")
-	}
-	llm.rwkv = model
-	return nil
-}
-
-func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
-	stopWord := "\n"
-	if len(opts.StopPrompts) > 0 {
-		stopWord = opts.StopPrompts[0]
-	}
-
-	if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
-		return "", err
-	}
-
-	response := llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), nil)
-
-	return response, nil
-}
-
-func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
-	go func() {
-
-		stopWord := "\n"
-		if len(opts.StopPrompts) > 0 {
-			stopWord = opts.StopPrompts[0]
-		}
-
-		if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
-			fmt.Println("Error processing input: ", err)
-			return
-		}
-
-		llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), func(s string) bool {
-			results <- s
-			return true
-		})
-		close(results)
-	}()
-
-	return nil
-}
-
-func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
-	tokens, err := llm.rwkv.Tokenizer.Encode(opts.Prompt)
-	if err != nil {
-		return pb.TokenizationResponse{}, err
-	}
-
-	l := len(tokens)
-	i32Tokens := make([]int32, l)
-
-	for i, t := range tokens {
-		i32Tokens[i] = int32(t.ID)
-	}
-
-	return pb.TokenizationResponse{
-		Length: int32(l),
-		Tokens: i32Tokens,
-	}, nil
-}
diff --git a/backend/go/stores/store.go b/backend/go/stores/store.go
index a4849b57..c8788a9c 100644
--- a/backend/go/stores/store.go
+++ b/backend/go/stores/store.go
@@ -311,12 +311,16 @@ func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error)
 }
 
 func isNormalized(k []float32) bool {
-	var sum float32
+	var sum float64
+
 	for _, v := range k {
-		sum += v
+		v64 := float64(v)
+		sum += v64*v64
 	}
 
-	return sum == 1.0
+	s := math.Sqrt(sum)
+
+	return s >= 0.99 && s <= 1.01
 }
 
 // TODO: This we could replace with handwritten SIMD code
@@ -328,7 +332,7 @@ func normalizedCosineSimilarity(k1, k2 []float32) float32 {
 		dot += k1[i] * k2[i]
 	}
 
-	assert(dot >= -1 && dot <= 1, fmt.Sprintf("dot = %f", dot))
+	assert(dot >= -1.01 && dot <= 1.01, fmt.Sprintf("dot = %f", dot))
 
 	// 2.0 * (1.0 - dot) would be the Euclidean distance
 	return dot
@@ -418,7 +422,7 @@ func cosineSimilarity(k1, k2 []float32, mag1 float64) float32 {
 
 	sim := float32(dot / (mag1 * math.Sqrt(mag2)))
 
-	assert(sim >= -1 && sim <= 1, fmt.Sprintf("sim = %f", sim))
+	assert(sim >= -1.01 && sim <= 1.01, fmt.Sprintf("sim = %f", sim))
 
 	return sim
 }
diff --git a/backend/go/llm/rwkv/main.go b/backend/go/vad/silero/main.go
similarity index 83%
rename from backend/go/llm/rwkv/main.go
rename to backend/go/vad/silero/main.go
index acf44087..28f51e49 100644
--- a/backend/go/llm/rwkv/main.go
+++ b/backend/go/vad/silero/main.go
@@ -15,7 +15,7 @@ var (
 func main() {
 	flag.Parse()
 
-	if err := grpc.StartServer(*addr, &LLM{}); err != nil {
+	if err := grpc.StartServer(*addr, &VAD{}); err != nil {
 		panic(err)
 	}
 }
diff --git a/backend/go/vad/silero/vad.go b/backend/go/vad/silero/vad.go
new file mode 100644
index 00000000..5a164d2a
--- /dev/null
+++ b/backend/go/vad/silero/vad.go
@@ -0,0 +1,54 @@
+package main
+
+// This is a wrapper to statisfy the GRPC service interface
+// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
+import (
+	"fmt"
+
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
+	"github.com/streamer45/silero-vad-go/speech"
+)
+
+type VAD struct {
+	base.SingleThread
+	detector *speech.Detector
+}
+
+func (vad *VAD) Load(opts *pb.ModelOptions) error {
+	v, err := speech.NewDetector(speech.DetectorConfig{
+		ModelPath:  opts.ModelFile,
+		SampleRate: 16000,
+		//WindowSize:           1024,
+		Threshold:            0.5,
+		MinSilenceDurationMs: 0,
+		SpeechPadMs:          0,
+	})
+	if err != nil {
+		return fmt.Errorf("create silero detector: %w", err)
+	}
+
+	vad.detector = v
+	return err
+}
+
+func (vad *VAD) VAD(req *pb.VADRequest) (pb.VADResponse, error) {
+	audio := req.Audio
+
+	segments, err := vad.detector.Detect(audio)
+	if err != nil {
+		return pb.VADResponse{}, fmt.Errorf("detect: %w", err)
+	}
+
+	vadSegments := []*pb.VADSegment{}
+	for _, s := range segments {
+		vadSegments = append(vadSegments, &pb.VADSegment{
+			Start: float32(s.SpeechStartAt),
+			End:   float32(s.SpeechEndAt),
+		})
+	}
+
+	return pb.VADResponse{
+		Segments: vadSegments,
+	}, nil
+}
diff --git a/backend/python/autogptq/requirements-cublas11.txt b/backend/python/autogptq/requirements-cublas11.txt
index 6461b696..cf469472 100644
--- a/backend/python/autogptq/requirements-cublas11.txt
+++ b/backend/python/autogptq/requirements-cublas11.txt
@@ -1,2 +1,2 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+torch==2.4.1+cu118
diff --git a/backend/python/autogptq/requirements-cublas12.txt b/backend/python/autogptq/requirements-cublas12.txt
index 12c6d5d5..20f84cf7 100644
--- a/backend/python/autogptq/requirements-cublas12.txt
+++ b/backend/python/autogptq/requirements-cublas12.txt
@@ -1 +1 @@
-torch
+torch==2.4.1
\ No newline at end of file
diff --git a/backend/python/autogptq/requirements-hipblas.txt b/backend/python/autogptq/requirements-hipblas.txt
index 76018445..ecd817dc 100644
--- a/backend/python/autogptq/requirements-hipblas.txt
+++ b/backend/python/autogptq/requirements-hipblas.txt
@@ -1,2 +1,2 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
\ No newline at end of file
+torch==2.4.1+rocm6.0
\ No newline at end of file
diff --git a/backend/python/autogptq/requirements-intel.txt b/backend/python/autogptq/requirements-intel.txt
index d5e0173e..07b502eb 100644
--- a/backend/python/autogptq/requirements-intel.txt
+++ b/backend/python/autogptq/requirements-intel.txt
@@ -1,5 +1,6 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
-setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools
\ No newline at end of file
diff --git a/backend/python/autogptq/requirements.txt b/backend/python/autogptq/requirements.txt
index 150fcc1b..af596d9e 100644
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.66.1
+grpcio==1.70.0
 protobuf
 certifi
 transformers
\ No newline at end of file
diff --git a/backend/python/bark/requirements-cpu.txt b/backend/python/bark/requirements-cpu.txt
index 0b2c3bc7..12e376ad 100644
--- a/backend/python/bark/requirements-cpu.txt
+++ b/backend/python/bark/requirements-cpu.txt
@@ -1,4 +1,4 @@
 transformers
 accelerate
-torch
-torchaudio
\ No newline at end of file
+torch==2.4.1
+torchaudio==2.4.1
\ No newline at end of file
diff --git a/backend/python/bark/requirements-cublas11.txt b/backend/python/bark/requirements-cublas11.txt
index 71a6a93f..9f8fe9ff 100644
--- a/backend/python/bark/requirements-cublas11.txt
+++ b/backend/python/bark/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
+torch==2.4.1+cu118
+torchaudio==2.4.1+cu118
 transformers
 accelerate
\ No newline at end of file
diff --git a/backend/python/bark/requirements-cublas12.txt b/backend/python/bark/requirements-cublas12.txt
index 0fa27074..53716949 100644
--- a/backend/python/bark/requirements-cublas12.txt
+++ b/backend/python/bark/requirements-cublas12.txt
@@ -1,4 +1,4 @@
-torch
-torchaudio
+torch==2.4.1
+torchaudio==2.4.1
 transformers
 accelerate
\ No newline at end of file
diff --git a/backend/python/bark/requirements-hipblas.txt b/backend/python/bark/requirements-hipblas.txt
index af9e820e..1d54fb16 100644
--- a/backend/python/bark/requirements-hipblas.txt
+++ b/backend/python/bark/requirements-hipblas.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
-torchaudio
+torch==2.4.1+rocm6.0
+torchaudio==2.4.1+rocm6.0
 transformers
 accelerate
\ No newline at end of file
diff --git a/backend/python/bark/requirements-intel.txt b/backend/python/bark/requirements-intel.txt
index c0e4dcaa..f24bd166 100644
--- a/backend/python/bark/requirements-intel.txt
+++ b/backend/python/bark/requirements-intel.txt
@@ -1,8 +1,9 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
-torchaudio
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+torchaudio==2.3.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
-setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools
 transformers
 accelerate
\ No newline at end of file
diff --git a/backend/python/bark/requirements.txt b/backend/python/bark/requirements.txt
index 6404b98e..f4beaec1 100644
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,4 +1,4 @@
 bark==0.1.5
-grpcio==1.66.1
+grpcio==1.70.0
 protobuf
 certifi
\ No newline at end of file
diff --git a/backend/python/common/libbackend.sh b/backend/python/common/libbackend.sh
index 934b1fd3..6013cf76 100644
--- a/backend/python/common/libbackend.sh
+++ b/backend/python/common/libbackend.sh
@@ -17,6 +17,9 @@
 # LIMIT_TARGETS="cublas12"
 # source $(dirname $0)/../common/libbackend.sh
 #
+
+PYTHON_VERSION="3.10"
+
 function init() {
     # Name of the backend (directory name)
     BACKEND_NAME=${PWD##*/}
@@ -88,7 +91,7 @@ function getBuildProfile() {
 # always result in an activated virtual environment
 function ensureVenv() {
     if [ ! -d "${EDIR}/venv" ]; then
-        uv venv ${EDIR}/venv
+        uv venv --python ${PYTHON_VERSION} ${EDIR}/venv
         echo "virtualenv created"
     fi
 
diff --git a/backend/python/common/template/Makefile b/backend/python/common/template/Makefile
index 6cc45707..c0e5169f 100644
--- a/backend/python/common/template/Makefile
+++ b/backend/python/common/template/Makefile
@@ -1,8 +1,9 @@
 .DEFAULT_GOAL := install
 
 .PHONY: install
-install: protogen
+install:
 	bash install.sh
+	$(MAKE) protogen
 
 .PHONY: protogen
 protogen: backend_pb2_grpc.py backend_pb2.py
@@ -12,7 +13,7 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py
 
 backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+	bash protogen.sh
 
 .PHONY: clean
 clean: protogen-clean
diff --git a/backend/python/common/template/protogen.sh b/backend/python/common/template/protogen.sh
new file mode 100644
index 00000000..32f39fbb
--- /dev/null
+++ b/backend/python/common/template/protogen.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -e
+
+source $(dirname $0)/../common/libbackend.sh
+
+python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
\ No newline at end of file
diff --git a/backend/python/common/template/requirements-intel.txt b/backend/python/common/template/requirements-intel.txt
index 6dc25a10..b5318a13 100644
--- a/backend/python/common/template/requirements-intel.txt
+++ b/backend/python/common/template/requirements-intel.txt
@@ -1,4 +1,5 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
\ No newline at end of file
diff --git a/backend/python/common/template/requirements.txt b/backend/python/common/template/requirements.txt
index 21610c1c..125b18dd 100644
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,2 +1,3 @@
-grpcio==1.66.1
-protobuf
\ No newline at end of file
+grpcio==1.70.0
+protobuf
+grpcio-tools
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-cpu.txt b/backend/python/coqui/requirements-cpu.txt
index bbcdc8cd..c5201d62 100644
--- a/backend/python/coqui/requirements-cpu.txt
+++ b/backend/python/coqui/requirements-cpu.txt
@@ -1,3 +1,4 @@
 transformers
 accelerate
-torch
\ No newline at end of file
+torch==2.4.1
+coqui-tts
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-cublas11.txt b/backend/python/coqui/requirements-cublas11.txt
index 71a6a93f..35fd4f42 100644
--- a/backend/python/coqui/requirements-cublas11.txt
+++ b/backend/python/coqui/requirements-cublas11.txt
@@ -1,5 +1,6 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
+torch==2.4.1+cu118
+torchaudio==2.4.1+cu118
 transformers
-accelerate
\ No newline at end of file
+accelerate
+coqui-tts
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-cublas12.txt b/backend/python/coqui/requirements-cublas12.txt
index 0fa27074..fac719d4 100644
--- a/backend/python/coqui/requirements-cublas12.txt
+++ b/backend/python/coqui/requirements-cublas12.txt
@@ -1,4 +1,5 @@
-torch
-torchaudio
+torch==2.4.1
+torchaudio==2.4.1
 transformers
-accelerate
\ No newline at end of file
+accelerate
+coqui-tts
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-hipblas.txt b/backend/python/coqui/requirements-hipblas.txt
index af9e820e..359e5867 100644
--- a/backend/python/coqui/requirements-hipblas.txt
+++ b/backend/python/coqui/requirements-hipblas.txt
@@ -1,5 +1,6 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
-torchaudio
+torch==2.4.1+rocm6.0
+torchaudio==2.4.1+rocm6.0
 transformers
-accelerate
\ No newline at end of file
+accelerate
+coqui-tts
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-intel.txt b/backend/python/coqui/requirements-intel.txt
index c0e4dcaa..202dd4ad 100644
--- a/backend/python/coqui/requirements-intel.txt
+++ b/backend/python/coqui/requirements-intel.txt
@@ -1,8 +1,10 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
-torchaudio
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+torchaudio==2.3.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
-setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools
 transformers
-accelerate
\ No newline at end of file
+accelerate
+coqui-tts
\ No newline at end of file
diff --git a/backend/python/coqui/requirements.txt b/backend/python/coqui/requirements.txt
index d7708363..5ec13b5f 100644
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,4 +1,4 @@
-TTS==0.22.0
-grpcio==1.66.1
+grpcio==1.70.0
 protobuf
-certifi
\ No newline at end of file
+certifi
+packaging==24.1
\ No newline at end of file
diff --git a/backend/python/coqui/test.py b/backend/python/coqui/test.py
index d1418fa3..e0b1a0bd 100644
--- a/backend/python/coqui/test.py
+++ b/backend/python/coqui/test.py
@@ -19,7 +19,7 @@ class TestBackendServicer(unittest.TestCase):
         This method sets up the gRPC service by starting the server
         """
         self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
-        time.sleep(10)
+        time.sleep(30)
 
     def tearDown(self) -> None:
         """
diff --git a/backend/python/diffusers/backend.py b/backend/python/diffusers/backend.py
index e7ad1cdd..c9aa02bc 100755
--- a/backend/python/diffusers/backend.py
+++ b/backend/python/diffusers/backend.py
@@ -17,7 +17,7 @@ import backend_pb2_grpc
 
 import grpc
 
-from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
+from diffusers import SanaPipeline, StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
     EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
 from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
 from diffusers.pipelines.stable_diffusion import safety_checker
@@ -247,11 +247,16 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                         use_safetensors=True,
                         variant=variant)
             elif request.PipelineType == "FluxPipeline":
+                if fromSingleFile:
+                    self.pipe = FluxPipeline.from_single_file(modelFile,
+                                                              torch_dtype=torchType,
+                                                              use_safetensors=True)
+                else:
                     self.pipe = FluxPipeline.from_pretrained(
                         request.Model,
                         torch_dtype=torch.bfloat16)
-                    if request.LowVRAM:
-                        self.pipe.enable_model_cpu_offload()
+                if request.LowVRAM:
+                    self.pipe.enable_model_cpu_offload()
             elif request.PipelineType == "FluxTransformer2DModel":
                     dtype = torch.bfloat16
                     # specify from environment or default to "ChuckMcSneed/FLUX.1-dev"
@@ -270,6 +275,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
 
                     if request.LowVRAM:
                         self.pipe.enable_model_cpu_offload()
+            elif request.PipelineType == "SanaPipeline":
+                self.pipe = SanaPipeline.from_pretrained(
+                    request.Model,
+                    variant="bf16",
+                    torch_dtype=torch.bfloat16)
+                self.pipe.vae.to(torch.bfloat16)
+                self.pipe.text_encoder.to(torch.bfloat16)
 
             if CLIPSKIP and request.CLIPSkip != 0:
                 self.clip_skip = request.CLIPSkip
@@ -296,22 +308,34 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                 self.pipe.controlnet = self.controlnet
             else:
                 self.controlnet = None
-            # Assume directory from request.ModelFile.
-            # Only if request.LoraAdapter it's not an absolute path
-            if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
-                # get base path of modelFile
-                modelFileBase = os.path.dirname(request.ModelFile)
+
+            if request.LoraAdapter and not os.path.isabs(request.LoraAdapter):
                 # modify LoraAdapter to be relative to modelFileBase
-                request.LoraAdapter = os.path.join(modelFileBase, request.LoraAdapter)
+                request.LoraAdapter = os.path.join(request.ModelPath, request.LoraAdapter)
+
             device = "cpu" if not request.CUDA else "cuda"
             self.device = device
             if request.LoraAdapter:
                 # Check if its a local file and not a directory ( we load lora differently for a safetensor file )
                 if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter):
-                    # self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
                     self.pipe.load_lora_weights(request.LoraAdapter)
                 else:
                     self.pipe.unet.load_attn_procs(request.LoraAdapter)
+            if len(request.LoraAdapters) > 0:
+                i = 0
+                adapters_name = []
+                adapters_weights = []
+                for adapter in request.LoraAdapters:
+                    if not os.path.isabs(adapter):
+                        adapter = os.path.join(request.ModelPath, adapter)
+                    self.pipe.load_lora_weights(adapter, adapter_name=f"adapter_{i}")
+                    adapters_name.append(f"adapter_{i}")
+                    i += 1
+
+                for adapters_weight in request.LoraScales:
+                    adapters_weights.append(adapters_weight)
+
+                self.pipe.set_adapters(adapters_name, adapter_weights=adapters_weights)
 
             if request.CUDA:
                 self.pipe.to('cuda')
@@ -392,8 +416,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
         # create a dictionary of values for the parameters
         options = {
             "negative_prompt": request.negative_prompt,
-            "width": request.width,
-            "height": request.height,
             "num_inference_steps": steps,
         }
 
@@ -411,13 +433,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
         keys = options.keys()
 
         if request.EnableParameters != "":
-            keys = request.EnableParameters.split(",")
+            keys = [key.strip() for key in request.EnableParameters.split(",")]
 
         if request.EnableParameters == "none":
             keys = []
 
         # create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
-        kwargs = {key: options[key] for key in keys}
+        kwargs = {key: options.get(key) for key in keys if key in options}
 
         # Set seed
         if request.seed > 0:
@@ -428,6 +450,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
         if self.PipelineType == "FluxPipeline":
             kwargs["max_sequence_length"] = 256
 
+        if request.width:
+            kwargs["width"] = request.width
+
+        if request.height:
+            kwargs["height"] = request.height
+
         if self.PipelineType == "FluxTransformer2DModel":
             kwargs["output_type"] = "pil"
             kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
@@ -447,6 +475,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
             export_to_video(video_frames, request.dst)
             return backend_pb2.Result(message="Media generated successfully", success=True)
 
+        print(f"Generating image with {kwargs=}", file=sys.stderr)
         image = {}
         if COMPEL:
             conditioning, pooled = self.compel.build_conditioning_tensor(prompt)
diff --git a/backend/python/diffusers/requirements-cpu.txt b/backend/python/diffusers/requirements-cpu.txt
index 235bb57e..20667cc0 100644
--- a/backend/python/diffusers/requirements-cpu.txt
+++ b/backend/python/diffusers/requirements-cpu.txt
@@ -5,5 +5,5 @@ accelerate
 compel
 peft
 sentencepiece
-torch
+torch==2.4.1
 optimum-quanto
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-cublas11.txt b/backend/python/diffusers/requirements-cublas11.txt
index 40e718cb..19e2d72e 100644
--- a/backend/python/diffusers/requirements-cublas11.txt
+++ b/backend/python/diffusers/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+torch==2.4.1+cu118
 diffusers
 opencv-python
 transformers
diff --git a/backend/python/diffusers/requirements-cublas12.txt b/backend/python/diffusers/requirements-cublas12.txt
index 3bcc5397..3992b039 100644
--- a/backend/python/diffusers/requirements-cublas12.txt
+++ b/backend/python/diffusers/requirements-cublas12.txt
@@ -1,4 +1,4 @@
-torch
+torch==2.4.1
 diffusers
 opencv-python
 transformers
diff --git a/backend/python/diffusers/requirements-intel.txt b/backend/python/diffusers/requirements-intel.txt
index 566278a8..eb7448b0 100644
--- a/backend/python/diffusers/requirements-intel.txt
+++ b/backend/python/diffusers/requirements-intel.txt
@@ -1,9 +1,10 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
-torchvision
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+torchvision==0.18.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
-setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools
 diffusers
 opencv-python
 transformers
diff --git a/backend/python/diffusers/requirements.txt b/backend/python/diffusers/requirements.txt
index 043c7aba..8c450dca 100644
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -1,5 +1,5 @@
 setuptools
-grpcio==1.66.1
+grpcio==1.70.0
 pillow
 protobuf
 certifi
diff --git a/backend/python/exllama2/requirements-cpu.txt b/backend/python/exllama2/requirements-cpu.txt
index bbcdc8cd..2021fc20 100644
--- a/backend/python/exllama2/requirements-cpu.txt
+++ b/backend/python/exllama2/requirements-cpu.txt
@@ -1,3 +1,3 @@
 transformers
 accelerate
-torch
\ No newline at end of file
+torch==2.4.1
\ No newline at end of file
diff --git a/backend/python/exllama2/requirements-cublas11.txt b/backend/python/exllama2/requirements-cublas11.txt
index 1dfb5b98..2d1958c7 100644
--- a/backend/python/exllama2/requirements-cublas11.txt
+++ b/backend/python/exllama2/requirements-cublas11.txt
@@ -1,4 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+torch==2.4.1+cu118
 transformers
 accelerate
\ No newline at end of file
diff --git a/backend/python/exllama2/requirements-cublas12.txt b/backend/python/exllama2/requirements-cublas12.txt
index 1ec544cd..93e62c5a 100644
--- a/backend/python/exllama2/requirements-cublas12.txt
+++ b/backend/python/exllama2/requirements-cublas12.txt
@@ -1,3 +1,3 @@
-torch
+torch==2.4.1
 transformers
 accelerate
\ No newline at end of file
diff --git a/backend/python/exllama2/requirements.txt b/backend/python/exllama2/requirements.txt
index 6fb018a0..cb622d0c 100644
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.1
+grpcio==1.70.0
 protobuf
 certifi
 wheel
diff --git a/backend/python/openvoice/Makefile b/backend/python/faster-whisper/Makefile
similarity index 54%
rename from backend/python/openvoice/Makefile
rename to backend/python/faster-whisper/Makefile
index a187a00f..c0e5169f 100644
--- a/backend/python/openvoice/Makefile
+++ b/backend/python/faster-whisper/Makefile
@@ -1,8 +1,9 @@
 .DEFAULT_GOAL := install
 
 .PHONY: install
-install: protogen
+install:
 	bash install.sh
+	$(MAKE) protogen
 
 .PHONY: protogen
 protogen: backend_pb2_grpc.py backend_pb2.py
@@ -12,14 +13,8 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py
 
 backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+	bash protogen.sh
 
 .PHONY: clean
 clean: protogen-clean
-	rm -rf venv __pycache__
-
-.PHONY: test
-test: protogen
-	@echo "Testing openvoice..."
-	bash test.sh
-	@echo "openvoice tested."
\ No newline at end of file
+	rm -rf venv __pycache__
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/backend.py b/backend/python/faster-whisper/backend.py
similarity index 51%
rename from backend/python/sentencetransformers/backend.py
rename to backend/python/faster-whisper/backend.py
index 2a20bf60..dbb8b3d9 100755
--- a/backend/python/sentencetransformers/backend.py
+++ b/backend/python/faster-whisper/backend.py
@@ -1,85 +1,65 @@
 #!/usr/bin/env python3
 """
-Extra gRPC server for HuggingFace SentenceTransformer models.
+This is an extra gRPC server of LocalAI for Bark TTS
 """
 from concurrent import futures
-
+import time
 import argparse
 import signal
 import sys
 import os
-
-import time
 import backend_pb2
 import backend_pb2_grpc
 
+from faster_whisper import WhisperModel
+
 import grpc
 
-from sentence_transformers import SentenceTransformer
 
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
 
 # If MAX_WORKERS are specified in the environment use it, otherwise default to 1
 MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
+COQUI_LANGUAGE = os.environ.get('COQUI_LANGUAGE', None)
 
 # Implement the BackendServicer class with the service methods
 class BackendServicer(backend_pb2_grpc.BackendServicer):
     """
-    A gRPC servicer for the backend service.
-
-    This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
+    BackendServicer is the class that implements the gRPC service
     """
     def Health(self, request, context):
-        """
-        A gRPC method that returns the health status of the backend service.
-
-        Args:
-            request: A HealthRequest object that contains the request parameters.
-            context: A grpc.ServicerContext object that provides information about the RPC.
-
-        Returns:
-            A Reply object that contains the health status of the backend service.
-        """
         return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
-
     def LoadModel(self, request, context):
-        """
-        A gRPC method that loads a model into memory.
+        device = "cpu"
+        # Get device
+        # device = "cuda" if request.CUDA else "cpu"
+        if request.CUDA:
+            device = "cuda"
 
-        Args:
-            request: A LoadModelRequest object that contains the request parameters.
-            context: A grpc.ServicerContext object that provides information about the RPC.
-
-        Returns:
-            A Result object that contains the result of the LoadModel operation.
-        """
-        model_name = request.Model
         try:
-            self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
+            print("Preparing models, please wait", file=sys.stderr)
+            self.model = WhisperModel(request.Model, device=device, compute_type="float16")
         except Exception as err:
             return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-
         # Implement your logic here for the LoadModel service
         # Replace this with your desired response
         return backend_pb2.Result(message="Model loaded successfully", success=True)
 
-    def Embedding(self, request, context):
-        """
-        A gRPC method that calculates embeddings for a given sentence.
-
-        Args:
-            request: An EmbeddingRequest object that contains the request parameters.
-            context: A grpc.ServicerContext object that provides information about the RPC.
-
-        Returns:
-            An EmbeddingResult object that contains the calculated embeddings.
-        """
-        # Implement your logic here for the Embedding service
-        # Replace this with your desired response
-        print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
-        sentence_embeddings = self.model.encode(request.Embeddings)
-        return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings)
+    def AudioTranscription(self, request, context):
+        resultSegments = []
+        text = ""
+        try:
+            segments, info = self.model.transcribe(request.dst, beam_size=5, condition_on_previous_text=False)
+            id = 0
+            for segment in segments:
+                print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
+                resultSegments.append(backend_pb2.TranscriptSegment(id=id, start=segment.start, end=segment.end, text=segment.text))
+                text += segment.text
+                id += 1            
+        except Exception as err:
+            print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)
 
+        return backend_pb2.TranscriptResult(segments=resultSegments, text=text)
 
 def serve(address):
     server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
diff --git a/backend/python/sentencetransformers/install.sh b/backend/python/faster-whisper/install.sh
similarity index 100%
rename from backend/python/sentencetransformers/install.sh
rename to backend/python/faster-whisper/install.sh
diff --git a/backend/python/faster-whisper/protogen.sh b/backend/python/faster-whisper/protogen.sh
new file mode 100644
index 00000000..32f39fbb
--- /dev/null
+++ b/backend/python/faster-whisper/protogen.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -e
+
+source $(dirname $0)/../common/libbackend.sh
+
+python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
\ No newline at end of file
diff --git a/backend/python/faster-whisper/requirements-cpu.txt b/backend/python/faster-whisper/requirements-cpu.txt
new file mode 100644
index 00000000..3e03f3ad
--- /dev/null
+++ b/backend/python/faster-whisper/requirements-cpu.txt
@@ -0,0 +1,8 @@
+faster-whisper
+opencv-python
+accelerate
+compel
+peft
+sentencepiece
+torch==2.4.1
+optimum-quanto
\ No newline at end of file
diff --git a/backend/python/faster-whisper/requirements-cublas11.txt b/backend/python/faster-whisper/requirements-cublas11.txt
new file mode 100644
index 00000000..b7453295
--- /dev/null
+++ b/backend/python/faster-whisper/requirements-cublas11.txt
@@ -0,0 +1,9 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch==2.4.1+cu118
+faster-whisper
+opencv-python
+accelerate
+compel
+peft
+sentencepiece
+optimum-quanto
\ No newline at end of file
diff --git a/backend/python/faster-whisper/requirements-cublas12.txt b/backend/python/faster-whisper/requirements-cublas12.txt
new file mode 100644
index 00000000..8f46fa4a
--- /dev/null
+++ b/backend/python/faster-whisper/requirements-cublas12.txt
@@ -0,0 +1,8 @@
+torch==2.4.1
+faster-whisper
+opencv-python
+accelerate
+compel
+peft
+sentencepiece
+optimum-quanto
\ No newline at end of file
diff --git a/backend/python/openvoice/requirements-hipblas.txt b/backend/python/faster-whisper/requirements-hipblas.txt
similarity index 74%
rename from backend/python/openvoice/requirements-hipblas.txt
rename to backend/python/faster-whisper/requirements-hipblas.txt
index 76018445..29413f05 100644
--- a/backend/python/openvoice/requirements-hipblas.txt
+++ b/backend/python/faster-whisper/requirements-hipblas.txt
@@ -1,2 +1,3 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
\ No newline at end of file
+torch
+faster-whisper
\ No newline at end of file
diff --git a/backend/python/faster-whisper/requirements-intel.txt b/backend/python/faster-whisper/requirements-intel.txt
new file mode 100644
index 00000000..417aa0b4
--- /dev/null
+++ b/backend/python/faster-whisper/requirements-intel.txt
@@ -0,0 +1,6 @@
+--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
+optimum[openvino]
+faster-whisper
\ No newline at end of file
diff --git a/backend/python/faster-whisper/requirements.txt b/backend/python/faster-whisper/requirements.txt
new file mode 100644
index 00000000..125b18dd
--- /dev/null
+++ b/backend/python/faster-whisper/requirements.txt
@@ -0,0 +1,3 @@
+grpcio==1.70.0
+protobuf
+grpcio-tools
\ No newline at end of file
diff --git a/backend/python/openvoice/run.sh b/backend/python/faster-whisper/run.sh
similarity index 100%
rename from backend/python/openvoice/run.sh
rename to backend/python/faster-whisper/run.sh
diff --git a/backend/python/mamba/test.sh b/backend/python/faster-whisper/test.sh
similarity index 100%
rename from backend/python/mamba/test.sh
rename to backend/python/faster-whisper/test.sh
diff --git a/backend/python/kokoro/Makefile b/backend/python/kokoro/Makefile
new file mode 100644
index 00000000..c0e5169f
--- /dev/null
+++ b/backend/python/kokoro/Makefile
@@ -0,0 +1,20 @@
+.DEFAULT_GOAL := install
+
+.PHONY: install
+install:
+	bash install.sh
+	$(MAKE) protogen
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+	$(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+	bash protogen.sh
+
+.PHONY: clean
+clean: protogen-clean
+	rm -rf venv __pycache__
\ No newline at end of file
diff --git a/backend/python/parler-tts/backend.py b/backend/python/kokoro/backend.py
old mode 100644
new mode 100755
similarity index 64%
rename from backend/python/parler-tts/backend.py
rename to backend/python/kokoro/backend.py
index 655990d7..1fd1feb9
--- a/backend/python/parler-tts/backend.py
+++ b/backend/python/kokoro/backend.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-Extra gRPC server for MusicgenForConditionalGeneration models.
+Extra gRPC server for Kokoro models.
 """
 from concurrent import futures
 
@@ -8,20 +8,17 @@ import argparse
 import signal
 import sys
 import os
-
 import time
 import backend_pb2
 import backend_pb2_grpc
-
+import soundfile as sf
 import grpc
 
-from scipy.io.wavfile import write as write_wav
-
-from parler_tts import ParlerTTSForConditionalGeneration
-from transformers import AutoTokenizer
-import soundfile as sf  
+from models import build_model
+from kokoro import generate
 import torch
 
+SAMPLE_RATE = 22050
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
 
 # If MAX_WORKERS are specified in the environment use it, otherwise default to 1
@@ -59,10 +56,31 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
             A Result object that contains the result of the LoadModel operation.
         """
         model_name = request.Model
-        device = "cuda:0" if torch.cuda.is_available() else "cpu"
         try:
-            self.model = ParlerTTSForConditionalGeneration.from_pretrained(model_name).to(device)
-            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            device = "cuda:0" if torch.cuda.is_available() else "cpu"
+            self.MODEL = build_model(request.ModelFile, device)
+            options = request.Options
+            # Find the voice from the options, options are a list of strings in this form optname:optvalue:
+            VOICE_NAME = None
+            for opt in options:
+                if opt.startswith("voice:"):
+                    VOICE_NAME = opt.split(":")[1]
+                    break
+            if VOICE_NAME is None:
+                return backend_pb2.Result(success=False, message=f"No voice specified in options")
+            MODELPATH = request.ModelPath
+            # If voice name contains a plus, split it and load the two models and combine them
+            if "+" in VOICE_NAME:
+                voice1, voice2 = VOICE_NAME.split("+")
+                voice1 = torch.load(f'{MODELPATH}/{voice1}.pt', weights_only=True).to(device)
+                voice2 = torch.load(f'{MODELPATH}/{voice2}.pt', weights_only=True).to(device)
+                self.VOICEPACK = torch.mean(torch.stack([voice1, voice2]), dim=0)
+            else:
+                self.VOICEPACK = torch.load(f'{MODELPATH}/{VOICE_NAME}.pt', weights_only=True).to(device)
+
+            self.VOICE_NAME = VOICE_NAME
+
+            print(f'Loaded voice: {VOICE_NAME}')
         except Exception as err:
             return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
 
@@ -70,38 +88,26 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
 
     def TTS(self, request, context):
         model_name = request.model
-        voice = request.voice
-        if voice == "":
-            voice = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
         if model_name == "":
             return backend_pb2.Result(success=False, message="request.model is required")
         try:
-            device = "cuda:0" if torch.cuda.is_available() else "cpu"
-            input_ids = self.tokenizer(voice, return_tensors="pt").input_ids.to(device)
-            prompt_input_ids = self.tokenizer(request.text, return_tensors="pt").input_ids.to(device)
-           
-            generation = self.model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
-            audio_arr = generation.cpu().numpy().squeeze()
-            print("[parler-tts] TTS generated!", file=sys.stderr)
-            sf.write(request.dst, audio_arr, self.model.config.sampling_rate)
-            print("[parler-tts] TTS saved to", request.dst, file=sys.stderr)
-            print("[parler-tts] TTS for", file=sys.stderr)
-            print(request, file=sys.stderr)
+            audio, out_ps = generate(self.MODEL, request.text, self.VOICEPACK, lang=self.VOICE_NAME)
+            print(out_ps)
+            sf.write(request.dst, audio, SAMPLE_RATE)
         except Exception as err:
             return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
         return backend_pb2.Result(success=True)
 
-
 def serve(address):
     server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
     backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
     server.add_insecure_port(address)
     server.start()
-    print("[parler-tts] Server started. Listening on: " + address, file=sys.stderr)
+    print("[Kokoro] Server started. Listening on: " + address, file=sys.stderr)
 
     # Define the signal handler function
     def signal_handler(sig, frame):
-        print("[parler-tts] Received termination signal. Shutting down...")
+        print("[Kokoro] Received termination signal. Shutting down...")
         server.stop(0)
         sys.exit(0)
 
@@ -121,5 +127,5 @@ if __name__ == "__main__":
         "--addr", default="localhost:50051", help="The address to bind the server to."
     )
     args = parser.parse_args()
-    print(f"[parler-tts] startup: {args}", file=sys.stderr)
+    print(f"[Kokoro] startup: {args}", file=sys.stderr)
     serve(args.addr)
diff --git a/backend/python/transformers-musicgen/install.sh b/backend/python/kokoro/install.sh
similarity index 100%
rename from backend/python/transformers-musicgen/install.sh
rename to backend/python/kokoro/install.sh
diff --git a/backend/python/kokoro/istftnet.py b/backend/python/kokoro/istftnet.py
new file mode 100644
index 00000000..818fb912
--- /dev/null
+++ b/backend/python/kokoro/istftnet.py
@@ -0,0 +1,524 @@
+# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/istftnet.py
+# https://github.com/yl4579/StyleTTS2/blob/main/Modules/istftnet.py
+from scipy.signal import get_window
+from torch.nn import Conv1d, ConvTranspose1d
+from torch.nn.utils import weight_norm, remove_weight_norm
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# https://github.com/yl4579/StyleTTS2/blob/main/Modules/utils.py
+def init_weights(m, mean=0.0, std=0.01):
+    classname = m.__class__.__name__
+    if classname.find("Conv") != -1:
+        m.weight.data.normal_(mean, std)
+
+def get_padding(kernel_size, dilation=1):
+    return int((kernel_size*dilation - dilation)/2)
+
+LRELU_SLOPE = 0.1
+
+class AdaIN1d(nn.Module):
+    def __init__(self, style_dim, num_features):
+        super().__init__()
+        self.norm = nn.InstanceNorm1d(num_features, affine=False)
+        self.fc = nn.Linear(style_dim, num_features*2)
+
+    def forward(self, x, s):
+        h = self.fc(s)
+        h = h.view(h.size(0), h.size(1), 1)
+        gamma, beta = torch.chunk(h, chunks=2, dim=1)
+        return (1 + gamma) * self.norm(x) + beta
+
+class AdaINResBlock1(torch.nn.Module):
+    def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5), style_dim=64):
+        super(AdaINResBlock1, self).__init__()
+        self.convs1 = nn.ModuleList([
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
+                               padding=get_padding(kernel_size, dilation[0]))),
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
+                               padding=get_padding(kernel_size, dilation[1]))),
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2],
+                               padding=get_padding(kernel_size, dilation[2])))
+        ])
+        self.convs1.apply(init_weights)
+
+        self.convs2 = nn.ModuleList([
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
+                               padding=get_padding(kernel_size, 1))),
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
+                               padding=get_padding(kernel_size, 1))),
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
+                               padding=get_padding(kernel_size, 1)))
+        ])
+        self.convs2.apply(init_weights)
+        
+        self.adain1 = nn.ModuleList([
+            AdaIN1d(style_dim, channels),
+            AdaIN1d(style_dim, channels),
+            AdaIN1d(style_dim, channels),
+        ])
+        
+        self.adain2 = nn.ModuleList([
+            AdaIN1d(style_dim, channels),
+            AdaIN1d(style_dim, channels),
+            AdaIN1d(style_dim, channels),
+        ])
+        
+        self.alpha1 = nn.ParameterList([nn.Parameter(torch.ones(1, channels, 1)) for i in range(len(self.convs1))])
+        self.alpha2 = nn.ParameterList([nn.Parameter(torch.ones(1, channels, 1)) for i in range(len(self.convs2))])
+
+
+    def forward(self, x, s):
+        for c1, c2, n1, n2, a1, a2 in zip(self.convs1, self.convs2, self.adain1, self.adain2, self.alpha1, self.alpha2):
+            xt = n1(x, s)
+            xt = xt + (1 / a1) * (torch.sin(a1 * xt) ** 2)  # Snake1D
+            xt = c1(xt)
+            xt = n2(xt, s)
+            xt = xt + (1 / a2) * (torch.sin(a2 * xt) ** 2)  # Snake1D
+            xt = c2(xt)
+            x = xt + x
+        return x
+
+    def remove_weight_norm(self):
+        for l in self.convs1:
+            remove_weight_norm(l)
+        for l in self.convs2:
+            remove_weight_norm(l)
+            
+class TorchSTFT(torch.nn.Module):
+    def __init__(self, filter_length=800, hop_length=200, win_length=800, window='hann'):
+        super().__init__()
+        self.filter_length = filter_length
+        self.hop_length = hop_length
+        self.win_length = win_length
+        self.window = torch.from_numpy(get_window(window, win_length, fftbins=True).astype(np.float32))
+
+    def transform(self, input_data):
+        forward_transform = torch.stft(
+            input_data,
+            self.filter_length, self.hop_length, self.win_length, window=self.window.to(input_data.device),
+            return_complex=True)
+
+        return torch.abs(forward_transform), torch.angle(forward_transform)
+
+    def inverse(self, magnitude, phase):
+        inverse_transform = torch.istft(
+            magnitude * torch.exp(phase * 1j),
+            self.filter_length, self.hop_length, self.win_length, window=self.window.to(magnitude.device))
+
+        return inverse_transform.unsqueeze(-2)  # unsqueeze to stay consistent with conv_transpose1d implementation
+
+    def forward(self, input_data):
+        self.magnitude, self.phase = self.transform(input_data)
+        reconstruction = self.inverse(self.magnitude, self.phase)
+        return reconstruction
+    
+class SineGen(torch.nn.Module):
+    """ Definition of sine generator
+    SineGen(samp_rate, harmonic_num = 0,
+            sine_amp = 0.1, noise_std = 0.003,
+            voiced_threshold = 0,
+            flag_for_pulse=False)
+    samp_rate: sampling rate in Hz
+    harmonic_num: number of harmonic overtones (default 0)
+    sine_amp: amplitude of sine-wavefrom (default 0.1)
+    noise_std: std of Gaussian noise (default 0.003)
+    voiced_thoreshold: F0 threshold for U/V classification (default 0)
+    flag_for_pulse: this SinGen is used inside PulseGen (default False)
+    Note: when flag_for_pulse is True, the first time step of a voiced
+        segment is always sin(np.pi) or cos(0)
+    """
+
+    def __init__(self, samp_rate, upsample_scale, harmonic_num=0,
+                 sine_amp=0.1, noise_std=0.003,
+                 voiced_threshold=0,
+                 flag_for_pulse=False):
+        super(SineGen, self).__init__()
+        self.sine_amp = sine_amp
+        self.noise_std = noise_std
+        self.harmonic_num = harmonic_num
+        self.dim = self.harmonic_num + 1
+        self.sampling_rate = samp_rate
+        self.voiced_threshold = voiced_threshold
+        self.flag_for_pulse = flag_for_pulse
+        self.upsample_scale = upsample_scale
+
+    def _f02uv(self, f0):
+        # generate uv signal
+        uv = (f0 > self.voiced_threshold).type(torch.float32)
+        return uv
+
+    def _f02sine(self, f0_values):
+        """ f0_values: (batchsize, length, dim)
+            where dim indicates fundamental tone and overtones
+        """
+        # convert to F0 in rad. The interger part n can be ignored
+        # because 2 * np.pi * n doesn't affect phase
+        rad_values = (f0_values / self.sampling_rate) % 1
+
+        # initial phase noise (no noise for fundamental component)
+        rand_ini = torch.rand(f0_values.shape[0], f0_values.shape[2], \
+                              device=f0_values.device)
+        rand_ini[:, 0] = 0
+        rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini
+
+        # instantanouse phase sine[t] = sin(2*pi \sum_i=1 ^{t} rad)
+        if not self.flag_for_pulse:
+#             # for normal case
+
+#             # To prevent torch.cumsum numerical overflow,
+#             # it is necessary to add -1 whenever \sum_k=1^n rad_value_k > 1.
+#             # Buffer tmp_over_one_idx indicates the time step to add -1.
+#             # This will not change F0 of sine because (x-1) * 2*pi = x * 2*pi
+#             tmp_over_one = torch.cumsum(rad_values, 1) % 1
+#             tmp_over_one_idx = (padDiff(tmp_over_one)) < 0
+#             cumsum_shift = torch.zeros_like(rad_values)
+#             cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0
+
+#             phase = torch.cumsum(rad_values, dim=1) * 2 * np.pi
+            rad_values = torch.nn.functional.interpolate(rad_values.transpose(1, 2), 
+                                                         scale_factor=1/self.upsample_scale, 
+                                                         mode="linear").transpose(1, 2)
+    
+#             tmp_over_one = torch.cumsum(rad_values, 1) % 1
+#             tmp_over_one_idx = (padDiff(tmp_over_one)) < 0
+#             cumsum_shift = torch.zeros_like(rad_values)
+#             cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0
+    
+            phase = torch.cumsum(rad_values, dim=1) * 2 * np.pi
+            phase = torch.nn.functional.interpolate(phase.transpose(1, 2) * self.upsample_scale, 
+                                                    scale_factor=self.upsample_scale, mode="linear").transpose(1, 2)
+            sines = torch.sin(phase)
+            
+        else:
+            # If necessary, make sure that the first time step of every
+            # voiced segments is sin(pi) or cos(0)
+            # This is used for pulse-train generation
+
+            # identify the last time step in unvoiced segments
+            uv = self._f02uv(f0_values)
+            uv_1 = torch.roll(uv, shifts=-1, dims=1)
+            uv_1[:, -1, :] = 1
+            u_loc = (uv < 1) * (uv_1 > 0)
+
+            # get the instantanouse phase
+            tmp_cumsum = torch.cumsum(rad_values, dim=1)
+            # different batch needs to be processed differently
+            for idx in range(f0_values.shape[0]):
+                temp_sum = tmp_cumsum[idx, u_loc[idx, :, 0], :]
+                temp_sum[1:, :] = temp_sum[1:, :] - temp_sum[0:-1, :]
+                # stores the accumulation of i.phase within
+                # each voiced segments
+                tmp_cumsum[idx, :, :] = 0
+                tmp_cumsum[idx, u_loc[idx, :, 0], :] = temp_sum
+
+            # rad_values - tmp_cumsum: remove the accumulation of i.phase
+            # within the previous voiced segment.
+            i_phase = torch.cumsum(rad_values - tmp_cumsum, dim=1)
+
+            # get the sines
+            sines = torch.cos(i_phase * 2 * np.pi)
+        return sines
+
+    def forward(self, f0):
+        """ sine_tensor, uv = forward(f0)
+        input F0: tensor(batchsize=1, length, dim=1)
+                  f0 for unvoiced steps should be 0
+        output sine_tensor: tensor(batchsize=1, length, dim)
+        output uv: tensor(batchsize=1, length, 1)
+        """
+        f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim,
+                             device=f0.device)
+        # fundamental component
+        fn = torch.multiply(f0, torch.FloatTensor([[range(1, self.harmonic_num + 2)]]).to(f0.device))
+
+        # generate sine waveforms
+        sine_waves = self._f02sine(fn) * self.sine_amp
+
+        # generate uv signal
+        # uv = torch.ones(f0.shape)
+        # uv = uv * (f0 > self.voiced_threshold)
+        uv = self._f02uv(f0)
+
+        # noise: for unvoiced should be similar to sine_amp
+        #        std = self.sine_amp/3 -> max value ~ self.sine_amp
+        # .       for voiced regions is self.noise_std
+        noise_amp = uv * self.noise_std + (1 - uv) * self.sine_amp / 3
+        noise = noise_amp * torch.randn_like(sine_waves)
+
+        # first: set the unvoiced part to 0 by uv
+        # then: additive noise
+        sine_waves = sine_waves * uv + noise
+        return sine_waves, uv, noise
+
+
+class SourceModuleHnNSF(torch.nn.Module):
+    """ SourceModule for hn-nsf
+    SourceModule(sampling_rate, harmonic_num=0, sine_amp=0.1,
+                 add_noise_std=0.003, voiced_threshod=0)
+    sampling_rate: sampling_rate in Hz
+    harmonic_num: number of harmonic above F0 (default: 0)
+    sine_amp: amplitude of sine source signal (default: 0.1)
+    add_noise_std: std of additive Gaussian noise (default: 0.003)
+        note that amplitude of noise in unvoiced is decided
+        by sine_amp
+    voiced_threshold: threhold to set U/V given F0 (default: 0)
+    Sine_source, noise_source = SourceModuleHnNSF(F0_sampled)
+    F0_sampled (batchsize, length, 1)
+    Sine_source (batchsize, length, 1)
+    noise_source (batchsize, length 1)
+    uv (batchsize, length, 1)
+    """
+
+    def __init__(self, sampling_rate, upsample_scale, harmonic_num=0, sine_amp=0.1,
+                 add_noise_std=0.003, voiced_threshod=0):
+        super(SourceModuleHnNSF, self).__init__()
+
+        self.sine_amp = sine_amp
+        self.noise_std = add_noise_std
+
+        # to produce sine waveforms
+        self.l_sin_gen = SineGen(sampling_rate, upsample_scale, harmonic_num,
+                                 sine_amp, add_noise_std, voiced_threshod)
+
+        # to merge source harmonics into a single excitation
+        self.l_linear = torch.nn.Linear(harmonic_num + 1, 1)
+        self.l_tanh = torch.nn.Tanh()
+
+    def forward(self, x):
+        """
+        Sine_source, noise_source = SourceModuleHnNSF(F0_sampled)
+        F0_sampled (batchsize, length, 1)
+        Sine_source (batchsize, length, 1)
+        noise_source (batchsize, length 1)
+        """
+        # source for harmonic branch
+        with torch.no_grad():
+            sine_wavs, uv, _ = self.l_sin_gen(x)
+        sine_merge = self.l_tanh(self.l_linear(sine_wavs))
+
+        # source for noise branch, in the same shape as uv
+        noise = torch.randn_like(uv) * self.sine_amp / 3
+        return sine_merge, noise, uv
+def padDiff(x):
+    return F.pad(F.pad(x, (0,0,-1,1), 'constant', 0) - x, (0,0,0,-1), 'constant', 0)
+
+    
+class Generator(torch.nn.Module):
+    def __init__(self, style_dim, resblock_kernel_sizes, upsample_rates, upsample_initial_channel, resblock_dilation_sizes, upsample_kernel_sizes, gen_istft_n_fft, gen_istft_hop_size):
+        super(Generator, self).__init__()
+
+        self.num_kernels = len(resblock_kernel_sizes)
+        self.num_upsamples = len(upsample_rates)
+        resblock = AdaINResBlock1
+
+        self.m_source = SourceModuleHnNSF(
+                    sampling_rate=24000,
+                    upsample_scale=np.prod(upsample_rates) * gen_istft_hop_size,
+                    harmonic_num=8, voiced_threshod=10)
+        self.f0_upsamp = torch.nn.Upsample(scale_factor=np.prod(upsample_rates) * gen_istft_hop_size)
+        self.noise_convs = nn.ModuleList()
+        self.noise_res = nn.ModuleList()
+        
+        self.ups = nn.ModuleList()
+        for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
+            self.ups.append(weight_norm(
+                ConvTranspose1d(upsample_initial_channel//(2**i), upsample_initial_channel//(2**(i+1)),
+                                k, u, padding=(k-u)//2)))
+
+        self.resblocks = nn.ModuleList()
+        for i in range(len(self.ups)):
+            ch = upsample_initial_channel//(2**(i+1))
+            for j, (k, d) in enumerate(zip(resblock_kernel_sizes,resblock_dilation_sizes)):
+                self.resblocks.append(resblock(ch, k, d, style_dim))
+                
+            c_cur = upsample_initial_channel // (2 ** (i + 1))
+            
+            if i + 1 < len(upsample_rates):  #
+                stride_f0 = np.prod(upsample_rates[i + 1:])
+                self.noise_convs.append(Conv1d(
+                    gen_istft_n_fft + 2, c_cur, kernel_size=stride_f0 * 2, stride=stride_f0, padding=(stride_f0+1) // 2))
+                self.noise_res.append(resblock(c_cur, 7, [1,3,5], style_dim))
+            else:
+                self.noise_convs.append(Conv1d(gen_istft_n_fft + 2, c_cur, kernel_size=1))
+                self.noise_res.append(resblock(c_cur, 11, [1,3,5], style_dim))
+                
+                
+        self.post_n_fft = gen_istft_n_fft
+        self.conv_post = weight_norm(Conv1d(ch, self.post_n_fft + 2, 7, 1, padding=3))
+        self.ups.apply(init_weights)
+        self.conv_post.apply(init_weights)
+        self.reflection_pad = torch.nn.ReflectionPad1d((1, 0))
+        self.stft = TorchSTFT(filter_length=gen_istft_n_fft, hop_length=gen_istft_hop_size, win_length=gen_istft_n_fft)
+        
+        
+    def forward(self, x, s, f0):
+        with torch.no_grad():
+            f0 = self.f0_upsamp(f0[:, None]).transpose(1, 2)  # bs,n,t
+
+            har_source, noi_source, uv = self.m_source(f0)
+            har_source = har_source.transpose(1, 2).squeeze(1)
+            har_spec, har_phase = self.stft.transform(har_source)
+            har = torch.cat([har_spec, har_phase], dim=1)
+        
+        for i in range(self.num_upsamples):
+            x = F.leaky_relu(x, LRELU_SLOPE)
+            x_source = self.noise_convs[i](har)
+            x_source = self.noise_res[i](x_source, s)
+
+            x = self.ups[i](x)
+            if i == self.num_upsamples - 1:
+                x = self.reflection_pad(x)
+
+            x = x + x_source
+            xs = None
+            for j in range(self.num_kernels):
+                if xs is None:
+                    xs = self.resblocks[i*self.num_kernels+j](x, s)
+                else:
+                    xs += self.resblocks[i*self.num_kernels+j](x, s)
+            x = xs / self.num_kernels
+        x = F.leaky_relu(x)
+        x = self.conv_post(x)
+        spec = torch.exp(x[:,:self.post_n_fft // 2 + 1, :])
+        phase = torch.sin(x[:, self.post_n_fft // 2 + 1:, :])
+        return self.stft.inverse(spec, phase)
+    
+    def fw_phase(self, x, s):
+        for i in range(self.num_upsamples):
+            x = F.leaky_relu(x, LRELU_SLOPE)
+            x = self.ups[i](x)
+            xs = None
+            for j in range(self.num_kernels):
+                if xs is None:
+                    xs = self.resblocks[i*self.num_kernels+j](x, s)
+                else:
+                    xs += self.resblocks[i*self.num_kernels+j](x, s)
+            x = xs / self.num_kernels
+        x = F.leaky_relu(x)
+        x = self.reflection_pad(x)
+        x = self.conv_post(x)
+        spec = torch.exp(x[:,:self.post_n_fft // 2 + 1, :])
+        phase = torch.sin(x[:, self.post_n_fft // 2 + 1:, :])
+        return spec, phase
+
+    def remove_weight_norm(self):
+        print('Removing weight norm...')
+        for l in self.ups:
+            remove_weight_norm(l)
+        for l in self.resblocks:
+            l.remove_weight_norm()
+        remove_weight_norm(self.conv_pre)
+        remove_weight_norm(self.conv_post)
+
+        
+class AdainResBlk1d(nn.Module):
+    def __init__(self, dim_in, dim_out, style_dim=64, actv=nn.LeakyReLU(0.2),
+                 upsample='none', dropout_p=0.0):
+        super().__init__()
+        self.actv = actv
+        self.upsample_type = upsample
+        self.upsample = UpSample1d(upsample)
+        self.learned_sc = dim_in != dim_out
+        self._build_weights(dim_in, dim_out, style_dim)
+        self.dropout = nn.Dropout(dropout_p)
+        
+        if upsample == 'none':
+            self.pool = nn.Identity()
+        else:
+            self.pool = weight_norm(nn.ConvTranspose1d(dim_in, dim_in, kernel_size=3, stride=2, groups=dim_in, padding=1, output_padding=1))
+        
+        
+    def _build_weights(self, dim_in, dim_out, style_dim):
+        self.conv1 = weight_norm(nn.Conv1d(dim_in, dim_out, 3, 1, 1))
+        self.conv2 = weight_norm(nn.Conv1d(dim_out, dim_out, 3, 1, 1))
+        self.norm1 = AdaIN1d(style_dim, dim_in)
+        self.norm2 = AdaIN1d(style_dim, dim_out)
+        if self.learned_sc:
+            self.conv1x1 = weight_norm(nn.Conv1d(dim_in, dim_out, 1, 1, 0, bias=False))
+
+    def _shortcut(self, x):
+        x = self.upsample(x)
+        if self.learned_sc:
+            x = self.conv1x1(x)
+        return x
+
+    def _residual(self, x, s):
+        x = self.norm1(x, s)
+        x = self.actv(x)
+        x = self.pool(x)
+        x = self.conv1(self.dropout(x))
+        x = self.norm2(x, s)
+        x = self.actv(x)
+        x = self.conv2(self.dropout(x))
+        return x
+
+    def forward(self, x, s):
+        out = self._residual(x, s)
+        out = (out + self._shortcut(x)) / np.sqrt(2)
+        return out
+    
+class UpSample1d(nn.Module):
+    def __init__(self, layer_type):
+        super().__init__()
+        self.layer_type = layer_type
+
+    def forward(self, x):
+        if self.layer_type == 'none':
+            return x
+        else:
+            return F.interpolate(x, scale_factor=2, mode='nearest')
+
+class Decoder(nn.Module):
+    def __init__(self, dim_in=512, F0_channel=512, style_dim=64, dim_out=80, 
+                resblock_kernel_sizes = [3,7,11],
+                upsample_rates = [10, 6],
+                upsample_initial_channel=512,
+                resblock_dilation_sizes=[[1,3,5], [1,3,5], [1,3,5]],
+                upsample_kernel_sizes=[20, 12], 
+                gen_istft_n_fft=20, gen_istft_hop_size=5):
+        super().__init__()
+        
+        self.decode = nn.ModuleList()
+        
+        self.encode = AdainResBlk1d(dim_in + 2, 1024, style_dim)
+        
+        self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim))
+        self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim))
+        self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim))
+        self.decode.append(AdainResBlk1d(1024 + 2 + 64, 512, style_dim, upsample=True))
+
+        self.F0_conv = weight_norm(nn.Conv1d(1, 1, kernel_size=3, stride=2, groups=1, padding=1))
+        
+        self.N_conv = weight_norm(nn.Conv1d(1, 1, kernel_size=3, stride=2, groups=1, padding=1))
+        
+        self.asr_res = nn.Sequential(
+            weight_norm(nn.Conv1d(512, 64, kernel_size=1)),
+        )
+        
+        
+        self.generator = Generator(style_dim, resblock_kernel_sizes, upsample_rates, 
+                                   upsample_initial_channel, resblock_dilation_sizes, 
+                                   upsample_kernel_sizes, gen_istft_n_fft, gen_istft_hop_size)
+        
+    def forward(self, asr, F0_curve, N, s):
+        F0 = self.F0_conv(F0_curve.unsqueeze(1))
+        N = self.N_conv(N.unsqueeze(1))
+        
+        x = torch.cat([asr, F0, N], axis=1)
+        x = self.encode(x, s)
+        
+        asr_res = self.asr_res(asr)
+        
+        res = True
+        for block in self.decode:
+            if res:
+                x = torch.cat([x, asr_res, F0, N], axis=1)
+            x = block(x, s)
+            if block.upsample_type != "none":
+                res = False
+                
+        x = self.generator(x, s, F0_curve)
+        return x
diff --git a/backend/python/kokoro/kokoro.py b/backend/python/kokoro/kokoro.py
new file mode 100644
index 00000000..3a0df7f5
--- /dev/null
+++ b/backend/python/kokoro/kokoro.py
@@ -0,0 +1,166 @@
+# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/kokoro.py
+import phonemizer
+import re
+import torch
+import numpy as np
+
+def split_num(num):
+    num = num.group()
+    if '.' in num:
+        return num
+    elif ':' in num:
+        h, m = [int(n) for n in num.split(':')]
+        if m == 0:
+            return f"{h} o'clock"
+        elif m < 10:
+            return f'{h} oh {m}'
+        return f'{h} {m}'
+    year = int(num[:4])
+    if year < 1100 or year % 1000 < 10:
+        return num
+    left, right = num[:2], int(num[2:4])
+    s = 's' if num.endswith('s') else ''
+    if 100 <= year % 1000 <= 999:
+        if right == 0:
+            return f'{left} hundred{s}'
+        elif right < 10:
+            return f'{left} oh {right}{s}'
+    return f'{left} {right}{s}'
+
+def flip_money(m):
+    m = m.group()
+    bill = 'dollar' if m[0] == '$' else 'pound'
+    if m[-1].isalpha():
+        return f'{m[1:]} {bill}s'
+    elif '.' not in m:
+        s = '' if m[1:] == '1' else 's'
+        return f'{m[1:]} {bill}{s}'
+    b, c = m[1:].split('.')
+    s = '' if b == '1' else 's'
+    c = int(c.ljust(2, '0'))
+    coins = f"cent{'' if c == 1 else 's'}" if m[0] == '$' else ('penny' if c == 1 else 'pence')
+    return f'{b} {bill}{s} and {c} {coins}'
+
+def point_num(num):
+    a, b = num.group().split('.')
+    return ' point '.join([a, ' '.join(b)])
+
+def normalize_text(text):
+    text = text.replace(chr(8216), "'").replace(chr(8217), "'")
+    text = text.replace('«', chr(8220)).replace('»', chr(8221))
+    text = text.replace(chr(8220), '"').replace(chr(8221), '"')
+    text = text.replace('(', '«').replace(')', '»')
+    for a, b in zip('、。！，：；？', ',.!,:;?'):
+        text = text.replace(a, b+' ')
+    text = re.sub(r'[^\S \n]', ' ', text)
+    text = re.sub(r'  +', ' ', text)
+    text = re.sub(r'(?<=\n) +(?=\n)', '', text)
+    text = re.sub(r'\bD[Rr]\.(?= [A-Z])', 'Doctor', text)
+    text = re.sub(r'\b(?:Mr\.|MR\.(?= [A-Z]))', 'Mister', text)
+    text = re.sub(r'\b(?:Ms\.|MS\.(?= [A-Z]))', 'Miss', text)
+    text = re.sub(r'\b(?:Mrs\.|MRS\.(?= [A-Z]))', 'Mrs', text)
+    text = re.sub(r'\betc\.(?! [A-Z])', 'etc', text)
+    text = re.sub(r'(?i)\b(y)eah?\b', r"\1e'a", text)
+    text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)', split_num, text)
+    text = re.sub(r'(?<=\d),(?=\d)', '', text)
+    text = re.sub(r'(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b', flip_money, text)
+    text = re.sub(r'\d*\.\d+', point_num, text)
+    text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text)
+    text = re.sub(r'(?<=\d)S', ' S', text)
+    text = re.sub(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b", "'S", text)
+    text = re.sub(r"(?<=X')S\b", 's', text)
+    text = re.sub(r'(?:[A-Za-z]\.){2,} [a-z]', lambda m: m.group().replace('.', '-'), text)
+    text = re.sub(r'(?i)(?<=[A-Z])\.(?=[A-Z])', '-', text)
+    return text.strip()
+
+def get_vocab():
+    _pad = "$"
+    _punctuation = ';:,.!?¡¿—…"«»“” '
+    _letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
+    _letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
+    symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
+    dicts = {}
+    for i in range(len((symbols))):
+        dicts[symbols[i]] = i
+    return dicts
+
+VOCAB = get_vocab()
+def tokenize(ps):
+    return [i for i in map(VOCAB.get, ps) if i is not None]
+
+phonemizers = dict(
+    a=phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True),
+    b=phonemizer.backend.EspeakBackend(language='en-gb', preserve_punctuation=True, with_stress=True),
+)
+def phonemize(text, lang, norm=True):
+    if norm:
+        text = normalize_text(text)
+    ps = phonemizers[lang].phonemize([text])
+    ps = ps[0] if ps else ''
+    # https://en.wiktionary.org/wiki/kokoro#English
+    ps = ps.replace('kəkˈoːɹoʊ', 'kˈoʊkəɹoʊ').replace('kəkˈɔːɹəʊ', 'kˈəʊkəɹəʊ')
+    ps = ps.replace('ʲ', 'j').replace('r', 'ɹ').replace('x', 'k').replace('ɬ', 'l')
+    ps = re.sub(r'(?<=[a-zɹː])(?=hˈʌndɹɪd)', ' ', ps)
+    ps = re.sub(r' z(?=[;:,.!?¡¿—…"«»“” ]|$)', 'z', ps)
+    if lang == 'a':
+        ps = re.sub(r'(?<=nˈaɪn)ti(?!ː)', 'di', ps)
+    ps = ''.join(filter(lambda p: p in VOCAB, ps))
+    return ps.strip()
+
+def length_to_mask(lengths):
+    mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
+    mask = torch.gt(mask+1, lengths.unsqueeze(1))
+    return mask
+
+@torch.no_grad()
+def forward(model, tokens, ref_s, speed):
+    device = ref_s.device
+    tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
+    input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
+    text_mask = length_to_mask(input_lengths).to(device)
+    bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())
+    d_en = model.bert_encoder(bert_dur).transpose(-1, -2)
+    s = ref_s[:, 128:]
+    d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)
+    x, _ = model.predictor.lstm(d)
+    duration = model.predictor.duration_proj(x)
+    duration = torch.sigmoid(duration).sum(axis=-1) / speed
+    pred_dur = torch.round(duration).clamp(min=1).long()
+    pred_aln_trg = torch.zeros(input_lengths, pred_dur.sum().item())
+    c_frame = 0
+    for i in range(pred_aln_trg.size(0)):
+        pred_aln_trg[i, c_frame:c_frame + pred_dur[0,i].item()] = 1
+        c_frame += pred_dur[0,i].item()
+    en = d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device)
+    F0_pred, N_pred = model.predictor.F0Ntrain(en, s)
+    t_en = model.text_encoder(tokens, input_lengths, text_mask)
+    asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
+    return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
+
+def generate(model, text, voicepack, lang='a', speed=1, ps=None):
+    ps = ps or phonemize(text, lang)
+    tokens = tokenize(ps)
+    if not tokens:
+        return None
+    elif len(tokens) > 510:
+        tokens = tokens[:510]
+        print('Truncated to 510 tokens')
+    ref_s = voicepack[len(tokens)]
+    out = forward(model, tokens, ref_s, speed)
+    ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
+    return out, ps
+
+def generate_full(model, text, voicepack, lang='a', speed=1, ps=None):
+    ps = ps or phonemize(text, lang)
+    tokens = tokenize(ps)
+    if not tokens:
+        return None
+    outs = []
+    loop_count = len(tokens)//510 + (1 if len(tokens) % 510 != 0 else 0)
+    for i in range(loop_count):
+        ref_s = voicepack[len(tokens[i*510:(i+1)*510])]
+        out = forward(model, tokens[i*510:(i+1)*510], ref_s, speed)
+        outs.append(out)
+    outs = np.concatenate(outs)
+    ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
+    return outs, ps
\ No newline at end of file
diff --git a/backend/python/kokoro/models.py b/backend/python/kokoro/models.py
new file mode 100644
index 00000000..cf358d9e
--- /dev/null
+++ b/backend/python/kokoro/models.py
@@ -0,0 +1,373 @@
+# https://github.com/yl4579/StyleTTS2/blob/main/models.py
+# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/models.py
+from istftnet import AdaIN1d, Decoder
+from munch import Munch
+from pathlib import Path
+from plbert import load_plbert
+from torch.nn.utils import weight_norm, spectral_norm
+import json
+import numpy as np
+import os
+import os.path as osp
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class LinearNorm(torch.nn.Module):
+    def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'):
+        super(LinearNorm, self).__init__()
+        self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias)
+
+        torch.nn.init.xavier_uniform_(
+            self.linear_layer.weight,
+            gain=torch.nn.init.calculate_gain(w_init_gain))
+
+    def forward(self, x):
+        return self.linear_layer(x)
+
+class LayerNorm(nn.Module):
+    def __init__(self, channels, eps=1e-5):
+        super().__init__()
+        self.channels = channels
+        self.eps = eps
+
+        self.gamma = nn.Parameter(torch.ones(channels))
+        self.beta = nn.Parameter(torch.zeros(channels))
+
+    def forward(self, x):
+        x = x.transpose(1, -1)
+        x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps)
+        return x.transpose(1, -1)
+    
+class TextEncoder(nn.Module):
+    def __init__(self, channels, kernel_size, depth, n_symbols, actv=nn.LeakyReLU(0.2)):
+        super().__init__()
+        self.embedding = nn.Embedding(n_symbols, channels)
+
+        padding = (kernel_size - 1) // 2
+        self.cnn = nn.ModuleList()
+        for _ in range(depth):
+            self.cnn.append(nn.Sequential(
+                weight_norm(nn.Conv1d(channels, channels, kernel_size=kernel_size, padding=padding)),
+                LayerNorm(channels),
+                actv,
+                nn.Dropout(0.2),
+            ))
+        # self.cnn = nn.Sequential(*self.cnn)
+
+        self.lstm = nn.LSTM(channels, channels//2, 1, batch_first=True, bidirectional=True)
+
+    def forward(self, x, input_lengths, m):
+        x = self.embedding(x)  # [B, T, emb]
+        x = x.transpose(1, 2)  # [B, emb, T]
+        m = m.to(input_lengths.device).unsqueeze(1)
+        x.masked_fill_(m, 0.0)
+        
+        for c in self.cnn:
+            x = c(x)
+            x.masked_fill_(m, 0.0)
+            
+        x = x.transpose(1, 2)  # [B, T, chn]
+
+        input_lengths = input_lengths.cpu().numpy()
+        x = nn.utils.rnn.pack_padded_sequence(
+            x, input_lengths, batch_first=True, enforce_sorted=False)
+
+        self.lstm.flatten_parameters()
+        x, _ = self.lstm(x)
+        x, _ = nn.utils.rnn.pad_packed_sequence(
+            x, batch_first=True)
+                
+        x = x.transpose(-1, -2)
+        x_pad = torch.zeros([x.shape[0], x.shape[1], m.shape[-1]])
+
+        x_pad[:, :, :x.shape[-1]] = x
+        x = x_pad.to(x.device)
+        
+        x.masked_fill_(m, 0.0)
+        
+        return x
+
+    def inference(self, x):
+        x = self.embedding(x)
+        x = x.transpose(1, 2)
+        x = self.cnn(x)
+        x = x.transpose(1, 2)
+        self.lstm.flatten_parameters()
+        x, _ = self.lstm(x)
+        return x
+    
+    def length_to_mask(self, lengths):
+        mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
+        mask = torch.gt(mask+1, lengths.unsqueeze(1))
+        return mask
+
+
+class UpSample1d(nn.Module):
+    def __init__(self, layer_type):
+        super().__init__()
+        self.layer_type = layer_type
+
+    def forward(self, x):
+        if self.layer_type == 'none':
+            return x
+        else:
+            return F.interpolate(x, scale_factor=2, mode='nearest')
+
+class AdainResBlk1d(nn.Module):
+    def __init__(self, dim_in, dim_out, style_dim=64, actv=nn.LeakyReLU(0.2),
+                 upsample='none', dropout_p=0.0):
+        super().__init__()
+        self.actv = actv
+        self.upsample_type = upsample
+        self.upsample = UpSample1d(upsample)
+        self.learned_sc = dim_in != dim_out
+        self._build_weights(dim_in, dim_out, style_dim)
+        self.dropout = nn.Dropout(dropout_p)
+        
+        if upsample == 'none':
+            self.pool = nn.Identity()
+        else:
+            self.pool = weight_norm(nn.ConvTranspose1d(dim_in, dim_in, kernel_size=3, stride=2, groups=dim_in, padding=1, output_padding=1))
+        
+        
+    def _build_weights(self, dim_in, dim_out, style_dim):
+        self.conv1 = weight_norm(nn.Conv1d(dim_in, dim_out, 3, 1, 1))
+        self.conv2 = weight_norm(nn.Conv1d(dim_out, dim_out, 3, 1, 1))
+        self.norm1 = AdaIN1d(style_dim, dim_in)
+        self.norm2 = AdaIN1d(style_dim, dim_out)
+        if self.learned_sc:
+            self.conv1x1 = weight_norm(nn.Conv1d(dim_in, dim_out, 1, 1, 0, bias=False))
+
+    def _shortcut(self, x):
+        x = self.upsample(x)
+        if self.learned_sc:
+            x = self.conv1x1(x)
+        return x
+
+    def _residual(self, x, s):
+        x = self.norm1(x, s)
+        x = self.actv(x)
+        x = self.pool(x)
+        x = self.conv1(self.dropout(x))
+        x = self.norm2(x, s)
+        x = self.actv(x)
+        x = self.conv2(self.dropout(x))
+        return x
+
+    def forward(self, x, s):
+        out = self._residual(x, s)
+        out = (out + self._shortcut(x)) / np.sqrt(2)
+        return out
+    
+class AdaLayerNorm(nn.Module):
+    def __init__(self, style_dim, channels, eps=1e-5):
+        super().__init__()
+        self.channels = channels
+        self.eps = eps
+
+        self.fc = nn.Linear(style_dim, channels*2)
+
+    def forward(self, x, s):
+        x = x.transpose(-1, -2)
+        x = x.transpose(1, -1)
+                
+        h = self.fc(s)
+        h = h.view(h.size(0), h.size(1), 1)
+        gamma, beta = torch.chunk(h, chunks=2, dim=1)
+        gamma, beta = gamma.transpose(1, -1), beta.transpose(1, -1)
+        
+        
+        x = F.layer_norm(x, (self.channels,), eps=self.eps)
+        x = (1 + gamma) * x + beta
+        return x.transpose(1, -1).transpose(-1, -2)
+
+class ProsodyPredictor(nn.Module):
+
+    def __init__(self, style_dim, d_hid, nlayers, max_dur=50, dropout=0.1):
+        super().__init__() 
+        
+        self.text_encoder = DurationEncoder(sty_dim=style_dim, 
+                                            d_model=d_hid,
+                                            nlayers=nlayers, 
+                                            dropout=dropout)
+
+        self.lstm = nn.LSTM(d_hid + style_dim, d_hid // 2, 1, batch_first=True, bidirectional=True)
+        self.duration_proj = LinearNorm(d_hid, max_dur)
+        
+        self.shared = nn.LSTM(d_hid + style_dim, d_hid // 2, 1, batch_first=True, bidirectional=True)
+        self.F0 = nn.ModuleList()
+        self.F0.append(AdainResBlk1d(d_hid, d_hid, style_dim, dropout_p=dropout))
+        self.F0.append(AdainResBlk1d(d_hid, d_hid // 2, style_dim, upsample=True, dropout_p=dropout))
+        self.F0.append(AdainResBlk1d(d_hid // 2, d_hid // 2, style_dim, dropout_p=dropout))
+
+        self.N = nn.ModuleList()
+        self.N.append(AdainResBlk1d(d_hid, d_hid, style_dim, dropout_p=dropout))
+        self.N.append(AdainResBlk1d(d_hid, d_hid // 2, style_dim, upsample=True, dropout_p=dropout))
+        self.N.append(AdainResBlk1d(d_hid // 2, d_hid // 2, style_dim, dropout_p=dropout))
+        
+        self.F0_proj = nn.Conv1d(d_hid // 2, 1, 1, 1, 0)
+        self.N_proj = nn.Conv1d(d_hid // 2, 1, 1, 1, 0)
+
+
+    def forward(self, texts, style, text_lengths, alignment, m):
+        d = self.text_encoder(texts, style, text_lengths, m)
+        
+        batch_size = d.shape[0]
+        text_size = d.shape[1]
+        
+        # predict duration
+        input_lengths = text_lengths.cpu().numpy()
+        x = nn.utils.rnn.pack_padded_sequence(
+            d, input_lengths, batch_first=True, enforce_sorted=False)
+        
+        m = m.to(text_lengths.device).unsqueeze(1)
+        
+        self.lstm.flatten_parameters()
+        x, _ = self.lstm(x)
+        x, _ = nn.utils.rnn.pad_packed_sequence(
+            x, batch_first=True)
+        
+        x_pad = torch.zeros([x.shape[0], m.shape[-1], x.shape[-1]])
+
+        x_pad[:, :x.shape[1], :] = x
+        x = x_pad.to(x.device)
+                
+        duration = self.duration_proj(nn.functional.dropout(x, 0.5, training=self.training))
+        
+        en = (d.transpose(-1, -2) @ alignment)
+
+        return duration.squeeze(-1), en
+    
+    def F0Ntrain(self, x, s):
+        x, _ = self.shared(x.transpose(-1, -2))
+        
+        F0 = x.transpose(-1, -2)
+        for block in self.F0:
+            F0 = block(F0, s)
+        F0 = self.F0_proj(F0)
+
+        N = x.transpose(-1, -2)
+        for block in self.N:
+            N = block(N, s)
+        N = self.N_proj(N)
+        
+        return F0.squeeze(1), N.squeeze(1)
+    
+    def length_to_mask(self, lengths):
+        mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
+        mask = torch.gt(mask+1, lengths.unsqueeze(1))
+        return mask
+
+class DurationEncoder(nn.Module):
+
+    def __init__(self, sty_dim, d_model, nlayers, dropout=0.1):
+        super().__init__()
+        self.lstms = nn.ModuleList()
+        for _ in range(nlayers):
+            self.lstms.append(nn.LSTM(d_model + sty_dim, 
+                                 d_model // 2, 
+                                 num_layers=1, 
+                                 batch_first=True, 
+                                 bidirectional=True, 
+                                 dropout=dropout))
+            self.lstms.append(AdaLayerNorm(sty_dim, d_model))
+        
+        
+        self.dropout = dropout
+        self.d_model = d_model
+        self.sty_dim = sty_dim
+
+    def forward(self, x, style, text_lengths, m):
+        masks = m.to(text_lengths.device)
+        
+        x = x.permute(2, 0, 1)
+        s = style.expand(x.shape[0], x.shape[1], -1)
+        x = torch.cat([x, s], axis=-1)
+        x.masked_fill_(masks.unsqueeze(-1).transpose(0, 1), 0.0)
+                
+        x = x.transpose(0, 1)
+        input_lengths = text_lengths.cpu().numpy()
+        x = x.transpose(-1, -2)
+        
+        for block in self.lstms:
+            if isinstance(block, AdaLayerNorm):
+                x = block(x.transpose(-1, -2), style).transpose(-1, -2)
+                x = torch.cat([x, s.permute(1, -1, 0)], axis=1)
+                x.masked_fill_(masks.unsqueeze(-1).transpose(-1, -2), 0.0)
+            else:
+                x = x.transpose(-1, -2)
+                x = nn.utils.rnn.pack_padded_sequence(
+                    x, input_lengths, batch_first=True, enforce_sorted=False)
+                block.flatten_parameters()
+                x, _ = block(x)
+                x, _ = nn.utils.rnn.pad_packed_sequence(
+                    x, batch_first=True)
+                x = F.dropout(x, p=self.dropout, training=self.training)
+                x = x.transpose(-1, -2)
+                
+                x_pad = torch.zeros([x.shape[0], x.shape[1], m.shape[-1]])
+
+                x_pad[:, :, :x.shape[-1]] = x
+                x = x_pad.to(x.device)
+        
+        return x.transpose(-1, -2)
+    
+    def inference(self, x, style):
+        x = self.embedding(x.transpose(-1, -2)) * np.sqrt(self.d_model)
+        style = style.expand(x.shape[0], x.shape[1], -1)
+        x = torch.cat([x, style], axis=-1)
+        src = self.pos_encoder(x)
+        output = self.transformer_encoder(src).transpose(0, 1)
+        return output
+    
+    def length_to_mask(self, lengths):
+        mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
+        mask = torch.gt(mask+1, lengths.unsqueeze(1))
+        return mask
+
+# https://github.com/yl4579/StyleTTS2/blob/main/utils.py
+def recursive_munch(d):
+    if isinstance(d, dict):
+        return Munch((k, recursive_munch(v)) for k, v in d.items())
+    elif isinstance(d, list):
+        return [recursive_munch(v) for v in d]
+    else:
+        return d
+
+def build_model(path, device):
+    config = Path(__file__).parent / 'config.json'
+    assert config.exists(), f'Config path incorrect: config.json not found at {config}'
+    with open(config, 'r') as r:
+        args = recursive_munch(json.load(r))
+    assert args.decoder.type == 'istftnet', f'Unknown decoder type: {args.decoder.type}'
+    decoder = Decoder(dim_in=args.hidden_dim, style_dim=args.style_dim, dim_out=args.n_mels,
+            resblock_kernel_sizes = args.decoder.resblock_kernel_sizes,
+            upsample_rates = args.decoder.upsample_rates,
+            upsample_initial_channel=args.decoder.upsample_initial_channel,
+            resblock_dilation_sizes=args.decoder.resblock_dilation_sizes,
+            upsample_kernel_sizes=args.decoder.upsample_kernel_sizes,
+            gen_istft_n_fft=args.decoder.gen_istft_n_fft, gen_istft_hop_size=args.decoder.gen_istft_hop_size)
+    text_encoder = TextEncoder(channels=args.hidden_dim, kernel_size=5, depth=args.n_layer, n_symbols=args.n_token)
+    predictor = ProsodyPredictor(style_dim=args.style_dim, d_hid=args.hidden_dim, nlayers=args.n_layer, max_dur=args.max_dur, dropout=args.dropout)
+    bert = load_plbert()
+    bert_encoder = nn.Linear(bert.config.hidden_size, args.hidden_dim)
+    for parent in [bert, bert_encoder, predictor, decoder, text_encoder]:
+        for child in parent.children():
+            if isinstance(child, nn.RNNBase):
+                child.flatten_parameters()
+    model = Munch(
+        bert=bert.to(device).eval(),
+        bert_encoder=bert_encoder.to(device).eval(),
+        predictor=predictor.to(device).eval(),
+        decoder=decoder.to(device).eval(),
+        text_encoder=text_encoder.to(device).eval(),
+    )
+    for key, state_dict in torch.load(path, map_location='cpu', weights_only=True)['net'].items():
+        assert key in model, key
+        try:
+            model[key].load_state_dict(state_dict)
+        except:
+            state_dict = {k[7:]: v for k, v in state_dict.items()}
+            model[key].load_state_dict(state_dict, strict=False)
+    return model
diff --git a/backend/python/kokoro/plbert.py b/backend/python/kokoro/plbert.py
new file mode 100644
index 00000000..bf1dba5a
--- /dev/null
+++ b/backend/python/kokoro/plbert.py
@@ -0,0 +1,16 @@
+# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/plbert.py
+# https://github.com/yl4579/StyleTTS2/blob/main/Utils/PLBERT/util.py
+from transformers import AlbertConfig, AlbertModel
+
+class CustomAlbert(AlbertModel):
+    def forward(self, *args, **kwargs):
+        # Call the original forward method
+        outputs = super().forward(*args, **kwargs)
+        # Only return the last_hidden_state
+        return outputs.last_hidden_state
+
+def load_plbert():
+    plbert_config = {'vocab_size': 178, 'hidden_size': 768, 'num_attention_heads': 12, 'intermediate_size': 2048, 'max_position_embeddings': 512, 'num_hidden_layers': 12, 'dropout': 0.1}
+    albert_base_configuration = AlbertConfig(**plbert_config)
+    bert = CustomAlbert(albert_base_configuration)
+    return bert
diff --git a/backend/python/kokoro/protogen.sh b/backend/python/kokoro/protogen.sh
new file mode 100644
index 00000000..32f39fbb
--- /dev/null
+++ b/backend/python/kokoro/protogen.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -e
+
+source $(dirname $0)/../common/libbackend.sh
+
+python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
\ No newline at end of file
diff --git a/backend/python/kokoro/requirements-cpu.txt b/backend/python/kokoro/requirements-cpu.txt
new file mode 100644
index 00000000..b4f1261f
--- /dev/null
+++ b/backend/python/kokoro/requirements-cpu.txt
@@ -0,0 +1,2 @@
+torch==2.4.1
+transformers
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-cublas11.txt b/backend/python/kokoro/requirements-cublas11.txt
similarity index 64%
rename from backend/python/mamba/requirements-cublas11.txt
rename to backend/python/kokoro/requirements-cublas11.txt
index 7048a14f..ed0d4df5 100644
--- a/backend/python/mamba/requirements-cublas11.txt
+++ b/backend/python/kokoro/requirements-cublas11.txt
@@ -1,3 +1,3 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+torch==2.4.1+cu118
 transformers
\ No newline at end of file
diff --git a/backend/python/kokoro/requirements-cublas12.txt b/backend/python/kokoro/requirements-cublas12.txt
new file mode 100644
index 00000000..b4f1261f
--- /dev/null
+++ b/backend/python/kokoro/requirements-cublas12.txt
@@ -0,0 +1,2 @@
+torch==2.4.1
+transformers
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements-hipblas.txt b/backend/python/kokoro/requirements-hipblas.txt
similarity index 64%
rename from backend/python/transformers-musicgen/requirements-hipblas.txt
rename to backend/python/kokoro/requirements-hipblas.txt
index 00f0a946..ec8d0306 100644
--- a/backend/python/transformers-musicgen/requirements-hipblas.txt
+++ b/backend/python/kokoro/requirements-hipblas.txt
@@ -1,4 +1,3 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-transformers
-accelerate
-torch
\ No newline at end of file
+torch==2.4.1+rocm6.0
+transformers
\ No newline at end of file
diff --git a/backend/python/kokoro/requirements-intel.txt b/backend/python/kokoro/requirements-intel.txt
new file mode 100644
index 00000000..b16448d3
--- /dev/null
+++ b/backend/python/kokoro/requirements-intel.txt
@@ -0,0 +1,5 @@
+--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
+transformers
\ No newline at end of file
diff --git a/backend/python/kokoro/requirements.txt b/backend/python/kokoro/requirements.txt
new file mode 100644
index 00000000..06e60389
--- /dev/null
+++ b/backend/python/kokoro/requirements.txt
@@ -0,0 +1,7 @@
+grpcio==1.70.0
+protobuf
+phonemizer
+scipy
+munch
+setuptools
+soundfile
\ No newline at end of file
diff --git a/backend/python/parler-tts/run.sh b/backend/python/kokoro/run.sh
similarity index 100%
rename from backend/python/parler-tts/run.sh
rename to backend/python/kokoro/run.sh
diff --git a/backend/python/parler-tts/test.sh b/backend/python/kokoro/test.sh
similarity index 100%
rename from backend/python/parler-tts/test.sh
rename to backend/python/kokoro/test.sh
diff --git a/backend/python/mamba/Makefile b/backend/python/mamba/Makefile
deleted file mode 100644
index 52b1c53a..00000000
--- a/backend/python/mamba/Makefile
+++ /dev/null
@@ -1,29 +0,0 @@
-.PHONY: mamba
-mamba: protogen
-	bash install.sh 
-
-.PHONY: run
-run: protogen
-	@echo "Running mamba..."
-	bash run.sh
-	@echo "mamba run."
-
-.PHONY: test
-test: protogen
-	@echo "Testing mamba..."
-	bash test.sh
-	@echo "mamba tested."
-
-.PHONY: protogen
-protogen: backend_pb2_grpc.py backend_pb2.py
-
-.PHONY: protogen-clean
-protogen-clean:
-	$(RM) backend_pb2_grpc.py backend_pb2.py
-
-backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
-
-.PHONY: clean
-clean: protogen-clean
-	$(RM) -r venv __pycache__
\ No newline at end of file
diff --git a/backend/python/mamba/README.md b/backend/python/mamba/README.md
deleted file mode 100644
index d6ead917..00000000
--- a/backend/python/mamba/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Creating a separate environment for the mamba project
-
-```
-make mamba
-```
\ No newline at end of file
diff --git a/backend/python/mamba/backend.py b/backend/python/mamba/backend.py
deleted file mode 100644
index 3c15fea7..00000000
--- a/backend/python/mamba/backend.py
+++ /dev/null
@@ -1,179 +0,0 @@
-#!/usr/bin/env python3
-from concurrent import futures
-import time
-import argparse
-import signal
-import sys
-import os
-
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
-
-_ONE_DAY_IN_SECONDS = 60 * 60 * 24
-
-# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
-MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
-MAMBA_CHAT= os.environ.get('MAMBA_CHAT', '1') == '1'
-
-# Implement the BackendServicer class with the service methods
-class BackendServicer(backend_pb2_grpc.BackendServicer):
-    """
-    A gRPC servicer that implements the Backend service defined in backend.proto.
-    """
-    def generate(self,prompt, max_new_tokens):
-        """
-        Generates text based on the given prompt and maximum number of new tokens.
-
-        Args:
-            prompt (str): The prompt to generate text from.
-            max_new_tokens (int): The maximum number of new tokens to generate.
-
-        Returns:
-            str: The generated text.
-        """
-        self.generator.end_beam_search()
-
-        # Tokenizing the input
-        ids = self.generator.tokenizer.encode(prompt)
-
-        self.generator.gen_begin_reuse(ids)
-        initial_len = self.generator.sequence[0].shape[0]
-        has_leading_space = False
-        decoded_text = ''
-        for i in range(max_new_tokens):
-            token = self.generator.gen_single_token()
-            if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'):
-                has_leading_space = True
-
-            decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:])
-            if has_leading_space:
-                decoded_text = ' ' + decoded_text
-
-            if token.item() == self.generator.tokenizer.eos_token_id:
-                break
-        return decoded_text
-
-    def Health(self, request, context):
-        """
-        Returns a health check message.
-
-        Args:
-            request: The health check request.
-            context: The gRPC context.
-
-        Returns:
-            backend_pb2.Reply: The health check reply.
-        """
-        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
-
-    def LoadModel(self, request, context):
-        """
-        Loads a language model.
-
-        Args:
-            request: The load model request.
-            context: The gRPC context.
-
-        Returns:
-            backend_pb2.Result: The load model result.
-        """
-        try:
-            tokenizerModel = request.Tokenizer
-            if tokenizerModel == "":
-                tokenizerModel = request.Model
-
-            tokenizer = AutoTokenizer.from_pretrained(tokenizerModel)
-            if MAMBA_CHAT:
-                tokenizer.eos_token = "<|endoftext|>"
-                tokenizer.pad_token = tokenizer.eos_token
-            self.tokenizer = tokenizer
-            self.model = MambaLMHeadModel.from_pretrained(request.Model, device="cuda", dtype=torch.float16)
-        except Exception as err:
-            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-        return backend_pb2.Result(message="Model loaded successfully", success=True)
-
-    def Predict(self, request, context):
-        """
-        Generates text based on the given prompt and sampling parameters.
-
-        Args:
-            request: The predict request.
-            context: The gRPC context.
-
-        Returns:
-            backend_pb2.Result: The predict result.
-        """
-        if request.TopP == 0:
-            request.TopP = 0.9
-
-        max_tokens = request.Tokens
-
-        if request.Tokens == 0:
-            max_tokens = 2000
-
-        # encoded_input = self.tokenizer(request.Prompt)
-        tokens = self.tokenizer(request.Prompt, return_tensors="pt")
-        input_ids = tokens.input_ids.to(device="cuda")
-        out = self.model.generate(input_ids=input_ids, max_length=max_tokens, temperature=request.Temperature,
-                                     top_p=request.TopP, eos_token_id=self.tokenizer.eos_token_id)
-
-        decoded = self.tokenizer.batch_decode(out)
-       
-        generated_text = decoded[0]
-
-        # Remove prompt from response if present
-        if request.Prompt in generated_text:
-            generated_text = generated_text.replace(request.Prompt, "")
-
-        return backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
-
-    def PredictStream(self, request, context):
-        """
-        Generates text based on the given prompt and sampling parameters, and streams the results.
-
-        Args:
-            request: The predict stream request.
-            context: The gRPC context.
-
-        Returns:
-            backend_pb2.Result: The predict stream result.
-        """
-        yield self.Predict(request, context)
-
-def serve(address):
-    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
-    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
-    server.add_insecure_port(address)
-    server.start()
-    print("Server started. Listening on: " + address, file=sys.stderr)
-
-    # Define the signal handler function
-    def signal_handler(sig, frame):
-        print("Received termination signal. Shutting down...")
-        server.stop(0)
-        sys.exit(0)
-
-    # Set the signal handlers for SIGINT and SIGTERM
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-
-    try:
-        while True:
-            time.sleep(_ONE_DAY_IN_SECONDS)
-    except KeyboardInterrupt:
-        server.stop(0)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Run the gRPC server.")
-    parser.add_argument(
-        "--addr", default="localhost:50051", help="The address to bind the server to."
-    )
-    args = parser.parse_args()
-
-    serve(args.addr)
diff --git a/backend/python/mamba/install.sh b/backend/python/mamba/install.sh
deleted file mode 100755
index db18eefc..00000000
--- a/backend/python/mamba/install.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-set -e
-
-LIMIT_TARGETS="cublas"
-EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
-
-source $(dirname $0)/../common/libbackend.sh
-
-installRequirements
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-after.txt b/backend/python/mamba/requirements-after.txt
deleted file mode 100644
index ea6890eb..00000000
--- a/backend/python/mamba/requirements-after.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-causal-conv1d==1.4.0
-mamba-ssm==2.2.2
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-cpu.txt b/backend/python/mamba/requirements-cpu.txt
deleted file mode 100644
index 39dab0fd..00000000
--- a/backend/python/mamba/requirements-cpu.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-torch
-transformers
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-cublas12.txt b/backend/python/mamba/requirements-cublas12.txt
deleted file mode 100644
index 39dab0fd..00000000
--- a/backend/python/mamba/requirements-cublas12.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-torch
-transformers
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-install.txt b/backend/python/mamba/requirements-install.txt
deleted file mode 100644
index 69d263f0..00000000
--- a/backend/python/mamba/requirements-install.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-# mabma does not specify it's build dependencies per PEP517, so we need to disable build isolation
-# this also means that we need to install the basic build dependencies into the venv ourselves
-# https://github.com/Dao-AILab/causal-conv1d/issues/24
-packaging
-setuptools
-wheel
\ No newline at end of file
diff --git a/backend/python/mamba/requirements.txt b/backend/python/mamba/requirements.txt
deleted file mode 100644
index 8e1b0195..00000000
--- a/backend/python/mamba/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-grpcio==1.66.1
-protobuf
-certifi
\ No newline at end of file
diff --git a/backend/python/mamba/run.sh b/backend/python/mamba/run.sh
deleted file mode 100755
index 1afc3984..00000000
--- a/backend/python/mamba/run.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-LIMIT_TARGETS="cublas"
-
-source $(dirname $0)/../common/libbackend.sh
-
-startBackend $@
\ No newline at end of file
diff --git a/backend/python/mamba/test.py b/backend/python/mamba/test.py
deleted file mode 100644
index 83fb2651..00000000
--- a/backend/python/mamba/test.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import unittest
-import subprocess
-import time
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-
-import unittest
-import subprocess
-import time
-import grpc
-import backend_pb2_grpc
-import backend_pb2
-
-class TestBackendServicer(unittest.TestCase):
-    """
-    TestBackendServicer is the class that tests the gRPC service.
-
-    This class contains methods to test the startup and shutdown of the gRPC service.
-    """
-    def setUp(self):
-        self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"])
-        time.sleep(10)
-
-    def tearDown(self) -> None:
-        self.service.terminate()
-        self.service.wait()
-
-    def test_server_startup(self):
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.Health(backend_pb2.HealthMessage())
-                self.assertEqual(response.message, b'OK')
-        except Exception as err:
-            print(err)
-            self.fail("Server failed to start")
-        finally:
-            self.tearDown()
-    def test_load_model(self):
-        """
-        This method tests if the model is loaded successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
-                self.assertTrue(response.success)
-                self.assertEqual(response.message, "Model loaded successfully")
-        except Exception as err:
-            print(err)
-            self.fail("LoadModel service failed")
-        finally:
-            self.tearDown()
-
-    def test_text(self):
-        """
-        This method tests if the embeddings are generated successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
-                self.assertTrue(response.success)
-                req = backend_pb2.PredictOptions(Prompt="The capital of France is")
-                resp = stub.Predict(req)
-                self.assertIsNotNone(resp.message)
-        except Exception as err:
-            print(err)
-            self.fail("text service failed")
-        finally:
-            self.tearDown()
\ No newline at end of file
diff --git a/backend/python/openvoice/backend.py b/backend/python/openvoice/backend.py
deleted file mode 100755
index 7dde08cf..00000000
--- a/backend/python/openvoice/backend.py
+++ /dev/null
@@ -1,158 +0,0 @@
-#!/usr/bin/env python3
-"""
-Extra gRPC server for OpenVoice models.
-"""
-from concurrent import futures
-
-import argparse
-import signal
-import sys
-import os
-import torch
-from openvoice import se_extractor
-from openvoice.api import ToneColorConverter
-from melo.api import TTS
-
-import time
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-
-
-_ONE_DAY_IN_SECONDS = 60 * 60 * 24
-
-# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
-MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
-
-# Implement the BackendServicer class with the service methods
-class BackendServicer(backend_pb2_grpc.BackendServicer):
-    """
-    A gRPC servicer for the backend service.
-
-    This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
-    """
-    def Health(self, request, context):
-        """
-        A gRPC method that returns the health status of the backend service.
-
-        Args:
-            request: A HealthRequest object that contains the request parameters.
-            context: A grpc.ServicerContext object that provides information about the RPC.
-
-        Returns:
-            A Reply object that contains the health status of the backend service.
-        """
-        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
-
-    def LoadModel(self, request, context):
-        """
-        A gRPC method that loads a model into memory.
-
-        Args:
-            request: A LoadModelRequest object that contains the request parameters.
-            context: A grpc.ServicerContext object that provides information about the RPC.
-
-        Returns:
-            A Result object that contains the result of the LoadModel operation.
-        """
-        model_name = request.Model
-        try:
-
-            self.clonedVoice = False
-            # Assume directory from request.ModelFile.
-            # Only if request.LoraAdapter it's not an absolute path
-            if request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath):
-                # get base path of modelFile
-                modelFileBase = os.path.dirname(request.ModelFile)
-                request.AudioPath = os.path.join(modelFileBase, request.AudioPath)
-            if request.AudioPath != "":
-                self.clonedVoice = True
-
-            self.modelpath = request.ModelFile
-            self.speaker = request.Type
-            self.ClonedVoicePath = request.AudioPath
-            
-            ckpt_converter = request.Model+'/converter'
-            device = "cuda:0" if torch.cuda.is_available() else "cpu"
-            self.device = device
-            self.tone_color_converter = None
-            if self.clonedVoice:
-                self.tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
-                self.tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
-       
-        except Exception as err:
-            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-
-        return backend_pb2.Result(message="Model loaded successfully", success=True)
-
-    def TTS(self, request, context):
-        model_name = request.model
-        if model_name == "":
-            return backend_pb2.Result(success=False, message="request.model is required")
-        try:
-            # Speed is adjustable
-            speed = 1.0
-            voice = "EN"
-            if request.voice:
-                voice = request.voice
-            model = TTS(language=voice, device=self.device)
-            speaker_ids = model.hps.data.spk2id
-            speaker_key = self.speaker
-            modelpath = self.modelpath
-            for s in speaker_ids.keys():
-                print(f"Speaker: {s} - ID: {speaker_ids[s]}")
-            speaker_id = speaker_ids[speaker_key]
-            speaker_key = speaker_key.lower().replace('_', '-')
-            source_se = torch.load(f'{modelpath}/base_speakers/ses/{speaker_key}.pth', map_location=self.device)
-            model.tts_to_file(request.text, speaker_id, request.dst, speed=speed)
-            if self.clonedVoice:
-                reference_speaker = self.ClonedVoicePath
-                target_se, audio_name = se_extractor.get_se(reference_speaker, self.tone_color_converter, vad=False)
-                # Run the tone color converter
-                encode_message = "@MyShell"
-                self.tone_color_converter.convert(
-                    audio_src_path=request.dst, 
-                    src_se=source_se, 
-                    tgt_se=target_se, 
-                    output_path=request.dst,
-                    message=encode_message)
-           
-            print("[OpenVoice] TTS generated!", file=sys.stderr)
-            print("[OpenVoice] TTS saved to", request.dst, file=sys.stderr)
-            print(request, file=sys.stderr)
-        except Exception as err:
-            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-        return backend_pb2.Result(success=True)
-
-def serve(address):
-    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
-    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
-    server.add_insecure_port(address)
-    server.start()
-    print("[OpenVoice] Server started. Listening on: " + address, file=sys.stderr)
-
-    # Define the signal handler function
-    def signal_handler(sig, frame):
-        print("[OpenVoice] Received termination signal. Shutting down...")
-        server.stop(0)
-        sys.exit(0)
-
-    # Set the signal handlers for SIGINT and SIGTERM
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-
-    try:
-        while True:
-            time.sleep(_ONE_DAY_IN_SECONDS)
-    except KeyboardInterrupt:
-        server.stop(0)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Run the gRPC server.")
-    parser.add_argument(
-        "--addr", default="localhost:50051", help="The address to bind the server to."
-    )
-    args = parser.parse_args()
-    print(f"[OpenVoice] startup: {args}", file=sys.stderr)
-    serve(args.addr)
diff --git a/backend/python/openvoice/install.sh b/backend/python/openvoice/install.sh
deleted file mode 100755
index 24db146b..00000000
--- a/backend/python/openvoice/install.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/bash
-set -e
-
-source $(dirname $0)/../common/libbackend.sh
-
-# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
-# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
-# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
-# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
-if [ "x${BUILD_PROFILE}" == "xintel" ]; then
-    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
-fi
-
-installRequirements
-
-python -m unidic download
diff --git a/backend/python/openvoice/requirements-cpu.txt b/backend/python/openvoice/requirements-cpu.txt
deleted file mode 100644
index 08ed5eeb..00000000
--- a/backend/python/openvoice/requirements-cpu.txt
+++ /dev/null
@@ -1 +0,0 @@
-torch
\ No newline at end of file
diff --git a/backend/python/openvoice/requirements-cublas11.txt b/backend/python/openvoice/requirements-cublas11.txt
deleted file mode 100644
index 6461b696..00000000
--- a/backend/python/openvoice/requirements-cublas11.txt
+++ /dev/null
@@ -1,2 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch
diff --git a/backend/python/openvoice/requirements-cublas12.txt b/backend/python/openvoice/requirements-cublas12.txt
deleted file mode 100644
index 12c6d5d5..00000000
--- a/backend/python/openvoice/requirements-cublas12.txt
+++ /dev/null
@@ -1 +0,0 @@
-torch
diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt
deleted file mode 100644
index cea7de0b..00000000
--- a/backend/python/openvoice/requirements-intel.txt
+++ /dev/null
@@ -1,23 +0,0 @@
---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
-optimum[openvino]
-grpcio==1.66.1
-protobuf
-librosa==0.9.1
-faster-whisper==1.0.3
-pydub==0.25.1
-wavmark==0.0.3
-numpy==1.26.4
-eng_to_ipa==0.0.2
-inflect==7.0.0
-unidecode==1.3.7
-whisper-timestamped==1.15.4
-openai
-python-dotenv
-pypinyin==0.53.0
-cn2an==0.5.22
-jieba==0.42.1
-gradio==4.38.1
-langid==1.1.6
-git+https://github.com/myshell-ai/MeloTTS.git
diff --git a/backend/python/openvoice/requirements.txt b/backend/python/openvoice/requirements.txt
deleted file mode 100644
index b38805be..00000000
--- a/backend/python/openvoice/requirements.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-grpcio==1.66.1
-protobuf
-librosa
-faster-whisper
-pydub==0.25.1
-wavmark==0.0.3
-numpy
-eng_to_ipa==0.0.2
-inflect
-unidecode
-whisper-timestamped
-openai
-python-dotenv
-pypinyin
-cn2an==0.5.22
-jieba==0.42.1
-gradio
-langid==1.1.6
-git+https://github.com/myshell-ai/MeloTTS.git
-git+https://github.com/myshell-ai/OpenVoice.git
diff --git a/backend/python/openvoice/test.py b/backend/python/openvoice/test.py
deleted file mode 100644
index 262917b3..00000000
--- a/backend/python/openvoice/test.py
+++ /dev/null
@@ -1,82 +0,0 @@
-"""
-A test script to test the gRPC service
-"""
-import unittest
-import subprocess
-import time
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-
-
-class TestBackendServicer(unittest.TestCase):
-    """
-    TestBackendServicer is the class that tests the gRPC service
-    """
-    def setUp(self):
-        """
-        This method sets up the gRPC service by starting the server
-        """
-        self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
-        time.sleep(10)
-
-    def tearDown(self) -> None:
-        """
-        This method tears down the gRPC service by terminating the server
-        """
-        self.service.terminate()
-        self.service.wait()
-
-    def test_server_startup(self):
-        """
-        This method tests if the server starts up successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.Health(backend_pb2.HealthMessage())
-                self.assertEqual(response.message, b'OK')
-        except Exception as err:
-            print(err)
-            self.fail("Server failed to start")
-        finally:
-            self.tearDown()
-
-    def test_load_model(self):
-        """
-        This method tests if the model is loaded successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="checkpoints_v2", 
-                                                                    Type="en-us"))
-                self.assertTrue(response.success)
-                self.assertEqual(response.message, "Model loaded successfully")
-        except Exception as err:
-            print(err)
-            self.fail("LoadModel service failed")
-        finally:
-            self.tearDown()
-
-    def test_tts(self):
-        """
-        This method tests if the embeddings are generated successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="dingzhen"))
-                self.assertTrue(response.success)
-                tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story", voice="EN")
-                tts_response = stub.TTS(tts_request)
-                self.assertIsNotNone(tts_response)
-        except Exception as err:
-            print(err)
-            self.fail("TTS service failed")
-        finally:
-            self.tearDown()
\ No newline at end of file
diff --git a/backend/python/openvoice/test.sh b/backend/python/openvoice/test.sh
deleted file mode 100755
index 6c0a840f..00000000
--- a/backend/python/openvoice/test.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-set -e
-
-source $(dirname $0)/../common/libbackend.sh
-
-# Download checkpoints if not present
-if [ ! -d "checkpoints_v2" ]; then
-    wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
-    unzip checkpoints_v2.zip
-fi
-
-runUnittests
diff --git a/backend/python/parler-tts/Makefile b/backend/python/parler-tts/Makefile
deleted file mode 100644
index c25b2af7..00000000
--- a/backend/python/parler-tts/Makefile
+++ /dev/null
@@ -1,43 +0,0 @@
-export CONDA_ENV_PATH = "parler.yml"
-SKIP_CONDA?=0
-ifeq ($(BUILD_TYPE), cublas)
-export CONDA_ENV_PATH = "parler-nvidia.yml"
-endif
-
-# Intel GPU are supposed to have dependencies installed in the main python
-# environment, so we skip conda installation for SYCL builds.
-# https://github.com/intel/intel-extension-for-pytorch/issues/538
-ifneq (,$(findstring sycl,$(BUILD_TYPE)))
-export SKIP_CONDA=1
-endif
-
-.PHONY: parler-tts
-parler-tts: protogen
-	@echo "Installing $(CONDA_ENV_PATH)..."
-	bash install.sh $(CONDA_ENV_PATH)
-
-.PHONY: run
-run: protogen
-	@echo "Running transformers..."
-	bash run.sh
-	@echo "transformers run."
-
-.PHONY: test
-test: protogen
-	@echo "Testing transformers..."
-	bash test.sh
-	@echo "transformers tested."
-
-.PHONY: protogen
-protogen: backend_pb2_grpc.py backend_pb2.py
-
-.PHONY: protogen-clean
-protogen-clean:
-	$(RM) backend_pb2_grpc.py backend_pb2.py
-
-backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
-
-.PHONY: clean
-clean: protogen-clean
-	$(RM) -r venv __pycache__
\ No newline at end of file
diff --git a/backend/python/parler-tts/install.sh b/backend/python/parler-tts/install.sh
deleted file mode 100755
index 002472a2..00000000
--- a/backend/python/parler-tts/install.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-set -e
-
-source $(dirname $0)/../common/libbackend.sh
-
-# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
-# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
-# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
-# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
-if [ "x${BUILD_PROFILE}" == "xintel" ]; then
-    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
-fi
-
-installRequirements
-
-# https://github.com/descriptinc/audiotools/issues/101
-# incompatible protobuf versions.
-PYDIR=$(ls ${MY_DIR}/venv/lib)
-curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/builder.py
diff --git a/backend/python/parler-tts/requirements-after.txt b/backend/python/parler-tts/requirements-after.txt
deleted file mode 100644
index 09811bf4..00000000
--- a/backend/python/parler-tts/requirements-after.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
-llvmlite==0.43.0
-numba==0.60.0
diff --git a/backend/python/parler-tts/requirements-cpu.txt b/backend/python/parler-tts/requirements-cpu.txt
deleted file mode 100644
index bbcdc8cd..00000000
--- a/backend/python/parler-tts/requirements-cpu.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-transformers
-accelerate
-torch
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-cublas11.txt b/backend/python/parler-tts/requirements-cublas11.txt
deleted file mode 100644
index 71a6a93f..00000000
--- a/backend/python/parler-tts/requirements-cublas11.txt
+++ /dev/null
@@ -1,5 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
-transformers
-accelerate
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-cublas12.txt b/backend/python/parler-tts/requirements-cublas12.txt
deleted file mode 100644
index 0fa27074..00000000
--- a/backend/python/parler-tts/requirements-cublas12.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-torch
-torchaudio
-transformers
-accelerate
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-hipblas.txt b/backend/python/parler-tts/requirements-hipblas.txt
deleted file mode 100644
index b8758537..00000000
--- a/backend/python/parler-tts/requirements-hipblas.txt
+++ /dev/null
@@ -1,5 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch==2.3.0+rocm6.0
-torchaudio==2.3.0+rocm6.0
-transformers
-accelerate
diff --git a/backend/python/parler-tts/requirements-intel.txt b/backend/python/parler-tts/requirements-intel.txt
deleted file mode 100644
index c0e4dcaa..00000000
--- a/backend/python/parler-tts/requirements-intel.txt
+++ /dev/null
@@ -1,8 +0,0 @@
---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
-torchaudio
-optimum[openvino]
-setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
-transformers
-accelerate
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements.txt b/backend/python/parler-tts/requirements.txt
deleted file mode 100644
index 0da3da13..00000000
--- a/backend/python/parler-tts/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-grpcio==1.66.1
-protobuf
-certifi
-llvmlite==0.43.0
\ No newline at end of file
diff --git a/backend/python/parler-tts/test.py b/backend/python/parler-tts/test.py
deleted file mode 100644
index 639d43a9..00000000
--- a/backend/python/parler-tts/test.py
+++ /dev/null
@@ -1,81 +0,0 @@
-"""
-A test script to test the gRPC service
-"""
-import unittest
-import subprocess
-import time
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-
-
-class TestBackendServicer(unittest.TestCase):
-    """
-    TestBackendServicer is the class that tests the gRPC service
-    """
-    def setUp(self):
-        """
-        This method sets up the gRPC service by starting the server
-        """
-        self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
-        time.sleep(10)
-
-    def tearDown(self) -> None:
-        """
-        This method tears down the gRPC service by terminating the server
-        """
-        self.service.terminate()
-        self.service.wait()
-
-    def test_server_startup(self):
-        """
-        This method tests if the server starts up successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.Health(backend_pb2.HealthMessage())
-                self.assertEqual(response.message, b'OK')
-        except Exception as err:
-            print(err)
-            self.fail("Server failed to start")
-        finally:
-            self.tearDown()
-
-    def test_load_model(self):
-        """
-        This method tests if the model is loaded successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1"))
-                self.assertTrue(response.success)
-                self.assertEqual(response.message, "Model loaded successfully")
-        except Exception as err:
-            print(err)
-            self.fail("LoadModel service failed")
-        finally:
-            self.tearDown()
-
-    def test_tts(self):
-        """
-        This method tests if the embeddings are generated successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1"))
-                self.assertTrue(response.success)
-                tts_request = backend_pb2.TTSRequest(text="Hey, how are you doing today?")
-                tts_response = stub.TTS(tts_request)
-                self.assertIsNotNone(tts_response)
-        except Exception as err:
-            print(err)
-            self.fail("TTS service failed")
-        finally:
-            self.tearDown()
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements-cpu.txt b/backend/python/rerankers/requirements-cpu.txt
index 25a1d8ab..e27a4726 100644
--- a/backend/python/rerankers/requirements-cpu.txt
+++ b/backend/python/rerankers/requirements-cpu.txt
@@ -1,4 +1,4 @@
 transformers
 accelerate
-torch
+torch==2.4.1
 rerankers[transformers]
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements-cublas11.txt b/backend/python/rerankers/requirements-cublas11.txt
index 06c4b2cf..fef296fe 100644
--- a/backend/python/rerankers/requirements-cublas11.txt
+++ b/backend/python/rerankers/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 transformers
 accelerate
-torch
+torch==2.4.1+cu118
 rerankers[transformers]
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements-cublas12.txt b/backend/python/rerankers/requirements-cublas12.txt
index 25a1d8ab..e27a4726 100644
--- a/backend/python/rerankers/requirements-cublas12.txt
+++ b/backend/python/rerankers/requirements-cublas12.txt
@@ -1,4 +1,4 @@
 transformers
 accelerate
-torch
+torch==2.4.1
 rerankers[transformers]
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements-hipblas.txt b/backend/python/rerankers/requirements-hipblas.txt
index 961d150c..b1c8baed 100644
--- a/backend/python/rerankers/requirements-hipblas.txt
+++ b/backend/python/rerankers/requirements-hipblas.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 transformers
 accelerate
-torch
+torch==2.4.1+rocm6.0
 rerankers[transformers]
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements-intel.txt b/backend/python/rerankers/requirements-intel.txt
index e6bb4cc7..c071e8fb 100644
--- a/backend/python/rerankers/requirements-intel.txt
+++ b/backend/python/rerankers/requirements-intel.txt
@@ -1,8 +1,9 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
+intel-extension-for-pytorch==2.3.110+xpu
 transformers
 accelerate
-torch
+torch==2.3.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
 rerankers[transformers]
 optimum[openvino]
-setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements.txt b/backend/python/rerankers/requirements.txt
index 8e1b0195..566fdae0 100644
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.66.1
+grpcio==1.70.0
 protobuf
 certifi
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/Makefile b/backend/python/sentencetransformers/Makefile
deleted file mode 100644
index 8b18e943..00000000
--- a/backend/python/sentencetransformers/Makefile
+++ /dev/null
@@ -1,31 +0,0 @@
-.PHONY: sentencetransformers
-sentencetransformers: protogen
-	bash ./install.sh
-
-
-.PHONY: run
-run: protogen
-	@echo "Running sentencetransformers..."
-	bash run.sh
-	@echo "sentencetransformers run."
-
-# It is not working well by using command line. It only6 works with IDE like VSCode.
-.PHONY: test
-test: protogen
-	@echo "Testing sentencetransformers..."
-	bash test.sh
-	@echo "sentencetransformers tested."
-
-.PHONY: protogen
-protogen: backend_pb2_grpc.py backend_pb2.py
-
-.PHONY: protogen-clean
-protogen-clean:
-	$(RM) backend_pb2_grpc.py backend_pb2.py
-
-backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
-
-.PHONY: clean
-clean: protogen-clean
-	rm -rf venv __pycache__
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/README.md b/backend/python/sentencetransformers/README.md
deleted file mode 100644
index 829cf0d1..00000000
--- a/backend/python/sentencetransformers/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Creating a separate environment for the sentencetransformers project
-
-```
-make sentencetransformers
-```
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements-cpu.txt b/backend/python/sentencetransformers/requirements-cpu.txt
deleted file mode 100644
index f88de1e4..00000000
--- a/backend/python/sentencetransformers/requirements-cpu.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-torch
-accelerate
-transformers
-bitsandbytes
-sentence-transformers==3.1.0
-transformers
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements-cublas11.txt b/backend/python/sentencetransformers/requirements-cublas11.txt
deleted file mode 100644
index 57caf1a1..00000000
--- a/backend/python/sentencetransformers/requirements-cublas11.txt
+++ /dev/null
@@ -1,5 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch
-accelerate
-sentence-transformers==3.1.0
-transformers
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements-cublas12.txt b/backend/python/sentencetransformers/requirements-cublas12.txt
deleted file mode 100644
index 834fa6a4..00000000
--- a/backend/python/sentencetransformers/requirements-cublas12.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-torch
-accelerate
-sentence-transformers==3.1.0
-transformers
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements-hipblas.txt b/backend/python/sentencetransformers/requirements-hipblas.txt
deleted file mode 100644
index 98a0a41b..00000000
--- a/backend/python/sentencetransformers/requirements-hipblas.txt
+++ /dev/null
@@ -1,5 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
-accelerate
-sentence-transformers==3.1.0
-transformers
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements-intel.txt b/backend/python/sentencetransformers/requirements-intel.txt
deleted file mode 100644
index 5948910d..00000000
--- a/backend/python/sentencetransformers/requirements-intel.txt
+++ /dev/null
@@ -1,8 +0,0 @@
---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
-optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
-accelerate
-sentence-transformers==3.1.0
-transformers
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements.txt b/backend/python/sentencetransformers/requirements.txt
deleted file mode 100644
index b9cb6061..00000000
--- a/backend/python/sentencetransformers/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-grpcio==1.66.1
-protobuf
-certifi
-datasets
-einops
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/run.sh b/backend/python/sentencetransformers/run.sh
deleted file mode 100755
index 375c07e5..00000000
--- a/backend/python/sentencetransformers/run.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-source $(dirname $0)/../common/libbackend.sh
-
-startBackend $@
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/test.py b/backend/python/sentencetransformers/test.py
deleted file mode 100644
index 9df52b14..00000000
--- a/backend/python/sentencetransformers/test.py
+++ /dev/null
@@ -1,81 +0,0 @@
-"""
-A test script to test the gRPC service
-"""
-import unittest
-import subprocess
-import time
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-
-
-class TestBackendServicer(unittest.TestCase):
-    """
-    TestBackendServicer is the class that tests the gRPC service
-    """
-    def setUp(self):
-        """
-        This method sets up the gRPC service by starting the server
-        """
-        self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
-        time.sleep(10)
-
-    def tearDown(self) -> None:
-        """
-        This method tears down the gRPC service by terminating the server
-        """
-        self.service.kill()
-        self.service.wait()
-
-    def test_server_startup(self):
-        """
-        This method tests if the server starts up successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.Health(backend_pb2.HealthMessage())
-                self.assertEqual(response.message, b'OK')
-        except Exception as err:
-            print(err)
-            self.fail("Server failed to start")
-        finally:
-            self.tearDown()
-
-    def test_load_model(self):
-        """
-        This method tests if the model is loaded successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens"))
-                self.assertTrue(response.success)
-                self.assertEqual(response.message, "Model loaded successfully")
-        except Exception as err:
-            print(err)
-            self.fail("LoadModel service failed")
-        finally:
-            self.tearDown()
-
-    def test_embedding(self):
-        """
-        This method tests if the embeddings are generated successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens"))
-                self.assertTrue(response.success)
-                embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.")
-                embedding_response = stub.Embedding(embedding_request)
-                self.assertIsNotNone(embedding_response.embeddings)
-        except Exception as err:
-            print(err)
-            self.fail("Embedding service failed")
-        finally:
-            self.tearDown()
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/test.sh b/backend/python/sentencetransformers/test.sh
deleted file mode 100755
index 6940b066..00000000
--- a/backend/python/sentencetransformers/test.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-set -e
-
-source $(dirname $0)/../common/libbackend.sh
-
-runUnittests
diff --git a/backend/python/transformers-musicgen/Makefile b/backend/python/transformers-musicgen/Makefile
deleted file mode 100644
index 06badf6d..00000000
--- a/backend/python/transformers-musicgen/Makefile
+++ /dev/null
@@ -1,29 +0,0 @@
-.PHONY: transformers-musicgen
-transformers-musicgen: protogen
-	bash install.sh
-
-.PHONY: run
-run: protogen
-	@echo "Running transformers..."
-	bash run.sh
-	@echo "transformers run."
-
-.PHONY: test
-test: protogen
-	@echo "Testing transformers..."
-	bash test.sh
-	@echo "transformers tested."
-
-.PHONY: protogen
-protogen: backend_pb2_grpc.py backend_pb2.py
-
-.PHONY: protogen-clean
-protogen-clean:
-	$(RM) backend_pb2_grpc.py backend_pb2.py
-
-backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
-
-.PHONY: clean
-clean: protogen-clean
-	rm -rf venv __pycache__
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/README.md b/backend/python/transformers-musicgen/README.md
deleted file mode 100644
index bf7fef84..00000000
--- a/backend/python/transformers-musicgen/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Creating a separate environment for the transformers project
-
-```
-make transformers-musicgen
-```
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/backend.py b/backend/python/transformers-musicgen/backend.py
deleted file mode 100644
index b9f1facf..00000000
--- a/backend/python/transformers-musicgen/backend.py
+++ /dev/null
@@ -1,176 +0,0 @@
-#!/usr/bin/env python3
-"""
-Extra gRPC server for MusicgenForConditionalGeneration models.
-"""
-from concurrent import futures
-
-import argparse
-import signal
-import sys
-import os
-
-import time
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-
-from scipy.io import wavfile
-from transformers import AutoProcessor, MusicgenForConditionalGeneration
-
-_ONE_DAY_IN_SECONDS = 60 * 60 * 24
-
-# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
-MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
-
-# Implement the BackendServicer class with the service methods
-class BackendServicer(backend_pb2_grpc.BackendServicer):
-    """
-    A gRPC servicer for the backend service.
-
-    This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
-    """
-    def Health(self, request, context):
-        """
-        A gRPC method that returns the health status of the backend service.
-
-        Args:
-            request: A HealthRequest object that contains the request parameters.
-            context: A grpc.ServicerContext object that provides information about the RPC.
-
-        Returns:
-            A Reply object that contains the health status of the backend service.
-        """
-        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
-
-    def LoadModel(self, request, context):
-        """
-        A gRPC method that loads a model into memory.
-
-        Args:
-            request: A LoadModelRequest object that contains the request parameters.
-            context: A grpc.ServicerContext object that provides information about the RPC.
-
-        Returns:
-            A Result object that contains the result of the LoadModel operation.
-        """
-        model_name = request.Model
-        try:
-            self.processor = AutoProcessor.from_pretrained(model_name)
-            self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
-        except Exception as err:
-            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-
-        return backend_pb2.Result(message="Model loaded successfully", success=True)
-
-    def SoundGeneration(self, request, context):
-        model_name = request.model
-        if model_name == "":
-            return backend_pb2.Result(success=False, message="request.model is required")
-        try:
-            self.processor = AutoProcessor.from_pretrained(model_name)
-            self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
-            inputs = None
-            if request.text == "":
-                inputs = self.model.get_unconditional_inputs(num_samples=1)
-            elif request.HasField('src'):
-                # TODO SECURITY CODE GOES HERE LOL
-                # WHO KNOWS IF THIS WORKS???
-                sample_rate, wsamples = wavfile.read('path_to_your_file.wav')
-                
-                if request.HasField('src_divisor'):
-                    wsamples = wsamples[: len(wsamples) // request.src_divisor]
-                
-                inputs = self.processor(
-                    audio=wsamples,
-                    sampling_rate=sample_rate,
-                    text=[request.text],
-                    padding=True,
-                    return_tensors="pt",
-                )
-            else:
-                inputs = self.processor(
-                    text=[request.text],
-                    padding=True,
-                    return_tensors="pt",
-                )
-            
-            tokens = 256
-            if request.HasField('duration'):
-                tokens = int(request.duration * 51.2) # 256 tokens = 5 seconds, therefore 51.2 tokens is one second
-            guidance = 3.0
-            if request.HasField('temperature'):
-                guidance = request.temperature
-            dosample = True
-            if request.HasField('sample'):
-                dosample = request.sample
-            audio_values = self.model.generate(**inputs, do_sample=dosample, guidance_scale=guidance, max_new_tokens=tokens)
-            print("[transformers-musicgen] SoundGeneration generated!", file=sys.stderr)
-            sampling_rate = self.model.config.audio_encoder.sampling_rate
-            wavfile.write(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy())
-            print("[transformers-musicgen] SoundGeneration saved to", request.dst, file=sys.stderr)
-            print("[transformers-musicgen] SoundGeneration for", file=sys.stderr)
-            print("[transformers-musicgen] SoundGeneration requested tokens", tokens, file=sys.stderr)
-            print(request, file=sys.stderr)
-        except Exception as err:
-            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-        return backend_pb2.Result(success=True)
-
-
-# The TTS endpoint is older, and provides fewer features, but exists for compatibility reasons
-    def TTS(self, request, context):
-        model_name = request.model
-        if model_name == "":
-            return backend_pb2.Result(success=False, message="request.model is required")
-        try:
-            self.processor = AutoProcessor.from_pretrained(model_name)
-            self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
-            inputs = self.processor(
-                text=[request.text],
-                padding=True,
-                return_tensors="pt",
-            )
-            tokens = 512 # No good place to set the "length" in TTS, so use 10s as a sane default
-            audio_values = self.model.generate(**inputs, max_new_tokens=tokens)
-            print("[transformers-musicgen] TTS generated!", file=sys.stderr)
-            sampling_rate = self.model.config.audio_encoder.sampling_rate
-            write_wav(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy())
-            print("[transformers-musicgen] TTS saved to", request.dst, file=sys.stderr)
-            print("[transformers-musicgen] TTS for", file=sys.stderr)
-            print(request, file=sys.stderr)
-        except Exception as err:
-            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-        return backend_pb2.Result(success=True)
-
-
-def serve(address):
-    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
-    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
-    server.add_insecure_port(address)
-    server.start()
-    print("[transformers-musicgen] Server started. Listening on: " + address, file=sys.stderr)
-
-    # Define the signal handler function
-    def signal_handler(sig, frame):
-        print("[transformers-musicgen] Received termination signal. Shutting down...")
-        server.stop(0)
-        sys.exit(0)
-
-    # Set the signal handlers for SIGINT and SIGTERM
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-
-    try:
-        while True:
-            time.sleep(_ONE_DAY_IN_SECONDS)
-    except KeyboardInterrupt:
-        server.stop(0)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Run the gRPC server.")
-    parser.add_argument(
-        "--addr", default="localhost:50051", help="The address to bind the server to."
-    )
-    args = parser.parse_args()
-    print(f"[transformers-musicgen] startup: {args}", file=sys.stderr)
-    serve(args.addr)
diff --git a/backend/python/transformers-musicgen/requirements-cpu.txt b/backend/python/transformers-musicgen/requirements-cpu.txt
deleted file mode 100644
index bbcdc8cd..00000000
--- a/backend/python/transformers-musicgen/requirements-cpu.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-transformers
-accelerate
-torch
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements-cublas11.txt b/backend/python/transformers-musicgen/requirements-cublas11.txt
deleted file mode 100644
index 191a6eef..00000000
--- a/backend/python/transformers-musicgen/requirements-cublas11.txt
+++ /dev/null
@@ -1,4 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-transformers
-accelerate
-torch
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements-cublas12.txt b/backend/python/transformers-musicgen/requirements-cublas12.txt
deleted file mode 100644
index bbcdc8cd..00000000
--- a/backend/python/transformers-musicgen/requirements-cublas12.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-transformers
-accelerate
-torch
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements-intel.txt b/backend/python/transformers-musicgen/requirements-intel.txt
deleted file mode 100644
index 608d6939..00000000
--- a/backend/python/transformers-musicgen/requirements-intel.txt
+++ /dev/null
@@ -1,7 +0,0 @@
---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-transformers
-accelerate
-torch
-optimum[openvino]
-setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements.txt b/backend/python/transformers-musicgen/requirements.txt
deleted file mode 100644
index fb1119a9..00000000
--- a/backend/python/transformers-musicgen/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-grpcio==1.66.1
-protobuf
-scipy==1.14.0
-certifi
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/run.sh b/backend/python/transformers-musicgen/run.sh
deleted file mode 100755
index 375c07e5..00000000
--- a/backend/python/transformers-musicgen/run.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-source $(dirname $0)/../common/libbackend.sh
-
-startBackend $@
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/test.py b/backend/python/transformers-musicgen/test.py
deleted file mode 100644
index 295de65e..00000000
--- a/backend/python/transformers-musicgen/test.py
+++ /dev/null
@@ -1,100 +0,0 @@
-"""
-A test script to test the gRPC service
-"""
-import unittest
-import subprocess
-import time
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-
-
-class TestBackendServicer(unittest.TestCase):
-    """
-    TestBackendServicer is the class that tests the gRPC service
-    """
-    def setUp(self):
-        """
-        This method sets up the gRPC service by starting the server
-        """
-        self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
-        time.sleep(10)
-
-    def tearDown(self) -> None:
-        """
-        This method tears down the gRPC service by terminating the server
-        """
-        self.service.terminate()
-        self.service.wait()
-
-    def test_server_startup(self):
-        """
-        This method tests if the server starts up successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.Health(backend_pb2.HealthMessage())
-                self.assertEqual(response.message, b'OK')
-        except Exception as err:
-            print(err)
-            self.fail("Server failed to start")
-        finally:
-            self.tearDown()
-
-    def test_load_model(self):
-        """
-        This method tests if the model is loaded successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small"))
-                self.assertTrue(response.success)
-                self.assertEqual(response.message, "Model loaded successfully")
-        except Exception as err:
-            print(err)
-            self.fail("LoadModel service failed")
-        finally:
-            self.tearDown()
-
-    def test_tts(self):
-        """
-        This method tests if TTS is generated successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small"))
-                self.assertTrue(response.success)
-                tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story")
-                tts_response = stub.TTS(tts_request)
-                self.assertIsNotNone(tts_response)
-        except Exception as err:
-            print(err)
-            self.fail("TTS service failed")
-        finally:
-            self.tearDown()
-
-    def test_sound_generation(self):
-        """
-        This method tests if SoundGeneration is generated successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small"))
-                self.assertTrue(response.success)
-                sg_request = backend_pb2.SoundGenerationRequest(text="80s TV news production music hit for tonight's biggest story")
-                sg_response = stub.SoundGeneration(sg_request)
-                self.assertIsNotNone(sg_response)
-        except Exception as err:
-            print(err)
-            self.fail("SoundGeneration service failed")
-        finally:
-            self.tearDown()
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/test.sh b/backend/python/transformers-musicgen/test.sh
deleted file mode 100755
index 6940b066..00000000
--- a/backend/python/transformers-musicgen/test.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-set -e
-
-source $(dirname $0)/../common/libbackend.sh
-
-runUnittests
diff --git a/backend/python/transformers/backend.py b/backend/python/transformers/backend.py
index 6e809f28..b0d5875b 100644
--- a/backend/python/transformers/backend.py
+++ b/backend/python/transformers/backend.py
@@ -21,7 +21,11 @@ import torch.cuda
 
 
 XPU=os.environ.get("XPU", "0") == "1"
-from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
+from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria, MambaConfig, MambaForCausalLM
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+from scipy.io import wavfile
+import outetts
+from sentence_transformers import SentenceTransformer
 
 
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
@@ -72,7 +76,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
         Returns:
             A Result object that contains the result of the LoadModel operation.
         """
+
         model_name = request.Model
+        
+        # Check to see if the Model exists in the filesystem already.
+        if os.path.exists(request.ModelFile):
+            model_name = request.ModelFile
 
         compute = torch.float16
         if request.F16Memory == True:
@@ -80,10 +89,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
 
         self.CUDA = torch.cuda.is_available()
         self.OV=False
+        self.OuteTTS=False
+        self.SentenceTransformer = False
 
         device_map="cpu"
 
         quantization = None
+        autoTokenizer = True
 
         if self.CUDA:
             from transformers import BitsAndBytesConfig, AutoModelForCausalLM
@@ -186,6 +198,57 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                                                                 export=True,
                                                                 device=device_map)
                 self.OV = True
+            elif request.Type == "MusicgenForConditionalGeneration":
+                autoTokenizer = False
+                self.processor = AutoProcessor.from_pretrained(model_name)
+                self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
+            elif request.Type == "OuteTTS":
+                autoTokenizer = False
+                options = request.Options
+                MODELNAME = "OuteAI/OuteTTS-0.3-1B"
+                TOKENIZER = "OuteAI/OuteTTS-0.3-1B"
+                VERSION = "0.3"
+                SPEAKER = "en_male_1"
+                for opt in options:
+                    if opt.startswith("tokenizer:"):
+                        TOKENIZER = opt.split(":")[1]
+                        break
+                    if opt.startswith("version:"):
+                        VERSION = opt.split(":")[1]
+                        break
+                    if opt.startswith("speaker:"):
+                        SPEAKER = opt.split(":")[1]
+                        break
+                
+                if model_name != "":
+                    MODELNAME = model_name
+
+                # Configure the model
+                model_config = outetts.HFModelConfig_v2(
+                    model_path=MODELNAME,
+                    tokenizer_path=TOKENIZER
+                )
+                # Initialize the interface
+                self.interface = outetts.InterfaceHF(model_version=VERSION, cfg=model_config)
+                self.OuteTTS = True
+
+                self.interface.print_default_speakers()
+                if request.AudioPath:
+                    if os.path.isabs(request.AudioPath):
+                        self.AudioPath = request.AudioPath
+                    else:
+                        self.AudioPath = os.path.join(request.ModelPath, request.AudioPath)
+                    self.speaker = self.interface.create_speaker(audio_path=self.AudioPath)
+                else:
+                    self.speaker = self.interface.load_default_speaker(name=SPEAKER)               
+            elif request.Type == "SentenceTransformer":
+                autoTokenizer = False
+                self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
+                self.SentenceTransformer = True
+            elif request.Type == "Mamba":
+                autoTokenizer = False
+                self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+                self.model = MambaForCausalLM.from_pretrained(model_name)
             else:
                 print("Automodel", file=sys.stderr)
                 self.model = AutoModel.from_pretrained(model_name, 
@@ -196,19 +259,22 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                                                        torch_dtype=compute)
             if request.ContextSize > 0:
                 self.max_tokens = request.ContextSize
-            else:
+            elif hasattr(self.model, 'config') and hasattr(self.model.config, 'max_position_embeddings'):
                 self.max_tokens = self.model.config.max_position_embeddings
+            else:
+                self.max_tokens = 512
  
-            self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
-            self.XPU = False
+            if autoTokenizer:
+                self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
+                self.XPU = False
 
-            if XPU and self.OV == False:
-                self.XPU = True
-                try:
-                    print("Optimizing model", model_name, "to XPU.", file=sys.stderr)
-                    self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu")
-                except Exception as err:
-                    print("Not using XPU:", err, file=sys.stderr)
+                if XPU and self.OV == False:
+                    self.XPU = True
+                    try:
+                        print("Optimizing model", model_name, "to XPU.", file=sys.stderr)
+                        self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu")
+                    except Exception as err:
+                        print("Not using XPU:", err, file=sys.stderr)
 
         except Exception as err:
             print("Error:", err, file=sys.stderr)
@@ -234,18 +300,26 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
         max_length = 512
         if request.Tokens != 0:
             max_length = request.Tokens
-        encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt")    
 
-        # Create word embeddings
-        if self.CUDA:
-            encoded_input = encoded_input.to("cuda")
+        embeds = None
 
-        with torch.no_grad():    
-            model_output = self.model(**encoded_input)
+        if self.SentenceTransformer:
+            print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
+            embeds = self.model.encode(request.Embeddings)
+        else:
+            encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt")    
 
-        # Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence
-        sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
-        return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings[0])
+            # Create word embeddings
+            if self.CUDA:
+                encoded_input = encoded_input.to("cuda")
+
+            with torch.no_grad():    
+                model_output = self.model(**encoded_input)
+
+            # Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence
+            sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
+            embeds = sentence_embeddings[0]
+        return backend_pb2.EmbeddingResult(embeddings=embeds)
 
     async def _predict(self, request, context, streaming=False): 
         set_seed(request.Seed)
@@ -375,6 +449,114 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
         finally:
             await iterations.aclose()
 
+    def SoundGeneration(self, request, context):
+        model_name = request.model
+        try:
+            if self.processor is None:
+                if model_name == "":
+                    return backend_pb2.Result(success=False, message="request.model is required")
+                self.processor = AutoProcessor.from_pretrained(model_name)
+            if self.model is None:
+                if model_name == "":
+                    return backend_pb2.Result(success=False, message="request.model is required")
+                self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
+            inputs = None
+            if request.text == "":
+                inputs = self.model.get_unconditional_inputs(num_samples=1)
+            elif request.HasField('src'):
+                # TODO SECURITY CODE GOES HERE LOL
+                # WHO KNOWS IF THIS WORKS???
+                sample_rate, wsamples = wavfile.read('path_to_your_file.wav')
+                
+                if request.HasField('src_divisor'):
+                    wsamples = wsamples[: len(wsamples) // request.src_divisor]
+                
+                inputs = self.processor(
+                    audio=wsamples,
+                    sampling_rate=sample_rate,
+                    text=[request.text],
+                    padding=True,
+                    return_tensors="pt",
+                )
+            else:
+                inputs = self.processor(
+                    text=[request.text],
+                    padding=True,
+                    return_tensors="pt",
+                )
+            
+            tokens = 256
+            if request.HasField('duration'):
+                tokens = int(request.duration * 51.2) # 256 tokens = 5 seconds, therefore 51.2 tokens is one second
+            guidance = 3.0
+            if request.HasField('temperature'):
+                guidance = request.temperature
+            dosample = True
+            if request.HasField('sample'):
+                dosample = request.sample
+            audio_values = self.model.generate(**inputs, do_sample=dosample, guidance_scale=guidance, max_new_tokens=tokens)
+            print("[transformers-musicgen] SoundGeneration generated!", file=sys.stderr)
+            sampling_rate = self.model.config.audio_encoder.sampling_rate
+            wavfile.write(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy())
+            print("[transformers-musicgen] SoundGeneration saved to", request.dst, file=sys.stderr)
+            print("[transformers-musicgen] SoundGeneration for", file=sys.stderr)
+            print("[transformers-musicgen] SoundGeneration requested tokens", tokens, file=sys.stderr)
+            print(request, file=sys.stderr)
+        except Exception as err:
+            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
+        return backend_pb2.Result(success=True)
+
+    def OuteTTS(self, request, context):
+        try:
+            print("[OuteTTS] generating TTS", file=sys.stderr)
+            gen_cfg = outetts.GenerationConfig(
+                text="Speech synthesis is the artificial production of human speech.",
+                temperature=0.1,
+                repetition_penalty=1.1,
+                max_length=self.max_tokens,
+                speaker=self.speaker,
+                # voice_characteristics="upbeat enthusiasm, friendliness, clarity, professionalism, and trustworthiness"
+            )
+            output = self.interface.generate(config=gen_cfg)
+            print("[OuteTTS] Generated TTS", file=sys.stderr)
+            output.save(request.dst)
+            print("[OuteTTS] TTS done", file=sys.stderr)
+        except Exception as err:
+            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
+        return backend_pb2.Result(success=True)
+
+# The TTS endpoint is older, and provides fewer features, but exists for compatibility reasons
+    def TTS(self, request, context):
+        if self.OuteTTS:
+            return self.OuteTTS(request, context)
+
+        model_name = request.model
+        try:
+            if self.processor is None:
+                if model_name == "":
+                    return backend_pb2.Result(success=False, message="request.model is required")
+                self.processor = AutoProcessor.from_pretrained(model_name)
+            if self.model is None:
+                if model_name == "":
+                    return backend_pb2.Result(success=False, message="request.model is required")
+                self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
+            inputs = self.processor(
+                text=[request.text],
+                padding=True,
+                return_tensors="pt",
+            )
+            tokens = self.max_tokens # No good place to set the "length" in TTS, so use 10s as a sane default
+            audio_values = self.model.generate(**inputs, max_new_tokens=tokens)
+            print("[transformers-musicgen] TTS generated!", file=sys.stderr)
+            sampling_rate = self.model.config.audio_encoder.sampling_rate
+            wavfile.write(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy())
+            print("[transformers-musicgen] TTS saved to", request.dst, file=sys.stderr)
+            print("[transformers-musicgen] TTS for", file=sys.stderr)
+            print(request, file=sys.stderr)
+        except Exception as err:
+            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
+        return backend_pb2.Result(success=True)
+
 async def serve(address):
     # Start asyncio gRPC server
     server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
diff --git a/backend/python/transformers/requirements-cpu.txt b/backend/python/transformers/requirements-cpu.txt
index f1e6281b..79863c2b 100644
--- a/backend/python/transformers/requirements-cpu.txt
+++ b/backend/python/transformers/requirements-cpu.txt
@@ -1,4 +1,8 @@
-torch
+torch==2.4.1
+llvmlite==0.43.0
+numba==0.60.0
 accelerate
 transformers
-bitsandbytes
\ No newline at end of file
+bitsandbytes
+outetts
+sentence-transformers==3.4.1
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt
index 0abd72d9..fa9f8953 100644
--- a/backend/python/transformers/requirements-cublas11.txt
+++ b/backend/python/transformers/requirements-cublas11.txt
@@ -1,5 +1,9 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+torch==2.4.1+cu118
+llvmlite==0.43.0
+numba==0.60.0
 accelerate
 transformers
-bitsandbytes
\ No newline at end of file
+bitsandbytes
+outetts
+sentence-transformers==3.4.1
diff --git a/backend/python/transformers/requirements-cublas12.txt b/backend/python/transformers/requirements-cublas12.txt
index f1e6281b..127bfb21 100644
--- a/backend/python/transformers/requirements-cublas12.txt
+++ b/backend/python/transformers/requirements-cublas12.txt
@@ -1,4 +1,8 @@
-torch
+torch==2.4.1
 accelerate
+llvmlite==0.43.0
+numba==0.60.0
 transformers
-bitsandbytes
\ No newline at end of file
+bitsandbytes
+outetts
+sentence-transformers==3.4.1
diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt
index f6900af1..c0ca93ee 100644
--- a/backend/python/transformers/requirements-hipblas.txt
+++ b/backend/python/transformers/requirements-hipblas.txt
@@ -1,5 +1,10 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
+torch==2.4.1+rocm6.0
 accelerate
 transformers
-bitsandbytes
\ No newline at end of file
+llvmlite==0.43.0
+numba==0.60.0
+bitsandbytes
+outetts
+bitsandbytes
+sentence-transformers==3.4.1
diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt
index 5d9efb71..1418a3c3 100644
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -1,6 +1,11 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
+llvmlite==0.43.0
+numba==0.60.0
 intel-extension-for-transformers
-bitsandbytes
\ No newline at end of file
+bitsandbytes
+outetts
+sentence-transformers==3.4.1
diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt
index b19c59c0..c0fa0c0b 100644
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,4 +1,6 @@
-grpcio==1.66.1
+grpcio==1.70.0
 protobuf
 certifi
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools
+scipy==1.15.1
+numpy>=2.0.0
\ No newline at end of file
diff --git a/backend/python/transformers/test.py b/backend/python/transformers/test.py
index aab3c05e..14efa6a7 100644
--- a/backend/python/transformers/test.py
+++ b/backend/python/transformers/test.py
@@ -19,6 +19,7 @@ class TestBackendServicer(unittest.TestCase):
         This method sets up the gRPC service by starting the server
         """
         self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
+        time.sleep(10)
 
     def tearDown(self) -> None:
         """
@@ -31,7 +32,6 @@ class TestBackendServicer(unittest.TestCase):
         """
         This method tests if the server starts up successfully
         """
-        time.sleep(10)
         try:
             self.setUp()
             with grpc.insecure_channel("localhost:50051") as channel:
@@ -48,7 +48,6 @@ class TestBackendServicer(unittest.TestCase):
         """
         This method tests if the model is loaded successfully
         """
-        time.sleep(10)
         try:
             self.setUp()
             with grpc.insecure_channel("localhost:50051") as channel:
@@ -66,7 +65,6 @@ class TestBackendServicer(unittest.TestCase):
         """
         This method tests if the embeddings are generated successfully
         """
-        time.sleep(10)
         try:
             self.setUp()
             with grpc.insecure_channel("localhost:50051") as channel:
@@ -80,5 +78,96 @@ class TestBackendServicer(unittest.TestCase):
         except Exception as err:
             print(err)
             self.fail("Embedding service failed")
+        finally:
+            self.tearDown()
+
+    def test_audio_load_model(self):
+        """
+        This method tests if the model is loaded successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small",Type="MusicgenForConditionalGeneration"))
+                self.assertTrue(response.success)
+                self.assertEqual(response.message, "Model loaded successfully")
+        except Exception as err:
+            print(err)
+            self.fail("LoadModel service failed")
+        finally:
+            self.tearDown()
+
+    def test_tts(self):
+        """
+        This method tests if TTS is generated successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small",Type="MusicgenForConditionalGeneration"))
+                self.assertTrue(response.success)
+                tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story")
+                tts_response = stub.TTS(tts_request)
+                self.assertIsNotNone(tts_response)
+        except Exception as err:
+            print(err)
+            self.fail("TTS service failed")
+        finally:
+            self.tearDown()
+
+    def test_sound_generation(self):
+        """
+        This method tests if SoundGeneration is generated successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small",Type="MusicgenForConditionalGeneration"))
+                self.assertTrue(response.success)
+                sg_request = backend_pb2.SoundGenerationRequest(text="80s TV news production music hit for tonight's biggest story")
+                sg_response = stub.SoundGeneration(sg_request)
+                self.assertIsNotNone(sg_response)
+        except Exception as err:
+            print(err)
+            self.fail("SoundGeneration service failed")
+        finally:
+            self.tearDown()
+
+    def test_embed_load_model(self):
+        """
+        This method tests if the model is loaded successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens",Type="SentenceTransformer"))
+                self.assertTrue(response.success)
+                self.assertEqual(response.message, "Model loaded successfully")
+        except Exception as err:
+            print(err)
+            self.fail("LoadModel service failed")
+        finally:
+            self.tearDown()
+
+    def test_sentencetransformers_embedding(self):
+        """
+        This method tests if the embeddings are generated successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens",Type="SentenceTransformer"))
+                self.assertTrue(response.success)
+                embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.")
+                embedding_response = stub.Embedding(embedding_request)
+                self.assertIsNotNone(embedding_response.embeddings)
+        except Exception as err:
+            print(err)
+            self.fail("Embedding service failed")
         finally:
             self.tearDown()
\ No newline at end of file
diff --git a/backend/python/vall-e-x/.gitignore b/backend/python/vall-e-x/.gitignore
deleted file mode 100644
index 1d3a0654..00000000
--- a/backend/python/vall-e-x/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-source
\ No newline at end of file
diff --git a/backend/python/vall-e-x/Makefile b/backend/python/vall-e-x/Makefile
deleted file mode 100644
index a3ca32a3..00000000
--- a/backend/python/vall-e-x/Makefile
+++ /dev/null
@@ -1,33 +0,0 @@
-ifneq (,$(findstring sycl,$(BUILD_TYPE)))
-export SKIP_CONDA=1
-endif
-
-.PHONY: ttsvalle
-ttsvalle: protogen
-	bash install.sh
-
-.PHONY: run
-run: protogen
-	@echo "Running ttsvalle..."
-	bash run.sh
-	@echo "ttsvalle run."
-
-.PHONY: test
-test: protogen
-	@echo "Testing valle..."
-	bash test.sh
-	@echo "valle tested."
-
-.PHONY: protogen
-protogen: backend_pb2_grpc.py backend_pb2.py
-
-.PHONY: protogen-clean
-protogen-clean:
-	$(RM) backend_pb2_grpc.py backend_pb2.py
-
-backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
-
-.PHONY: clean
-clean: protogen-clean
-	rm -rf source venv __pycache__
\ No newline at end of file
diff --git a/backend/python/vall-e-x/README.md b/backend/python/vall-e-x/README.md
deleted file mode 100644
index a3a93361..00000000
--- a/backend/python/vall-e-x/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Creating a separate environment for the ttsvalle project
-
-```
-make ttsvalle
-```
\ No newline at end of file
diff --git a/backend/python/vall-e-x/backend.py b/backend/python/vall-e-x/backend.py
deleted file mode 100644
index fc9d93bd..00000000
--- a/backend/python/vall-e-x/backend.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-
-from concurrent import futures
-import argparse
-import signal
-import sys
-import os
-import time
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-
-from utils.generation import SAMPLE_RATE, generate_audio, preload_models
-from scipy.io.wavfile import write as write_wav
-from utils.prompt_making import make_prompt
-
-_ONE_DAY_IN_SECONDS = 60 * 60 * 24
-
-# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
-MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
-
-# Implement the BackendServicer class with the service methods
-class BackendServicer(backend_pb2_grpc.BackendServicer):
-    """
-    gRPC servicer for backend services.
-    """
-    def Health(self, request, context):
-        """
-        Health check service.
-
-        Args:
-            request: A backend_pb2.HealthRequest instance.
-            context: A grpc.ServicerContext instance.
-
-        Returns:
-            A backend_pb2.Reply instance with message "OK".
-        """
-        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
-
-    def LoadModel(self, request, context):
-        """
-        Load model service.
-
-        Args:
-            request: A backend_pb2.LoadModelRequest instance.
-            context: A grpc.ServicerContext instance.
-
-        Returns:
-            A backend_pb2.Result instance with message "Model loaded successfully" and success=True if successful.
-            A backend_pb2.Result instance with success=False and error message if unsuccessful.
-        """
-        model_name = request.Model
-        try:
-            print("Preparing models, please wait", file=sys.stderr)
-            # download and load all models
-            preload_models()
-            self.clonedVoice = False
-            # Assume directory from request.ModelFile.
-            # Only if request.LoraAdapter it's not an absolute path
-            if request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath):
-                # get base path of modelFile
-                modelFileBase = os.path.dirname(request.ModelFile)
-                # modify LoraAdapter to be relative to modelFileBase
-                request.AudioPath = os.path.join(modelFileBase, request.AudioPath)
-            if request.AudioPath != "":
-                print("Generating model", file=sys.stderr)
-                make_prompt(name=model_name, audio_prompt_path=request.AudioPath)
-                self.clonedVoice = True
-                ### Use given transcript
-                ##make_prompt(name=model_name, audio_prompt_path="paimon_prompt.wav",
-                ##                transcript="Just, what was that? Paimon thought we were gonna get eaten.")
-        except Exception as err:
-            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-        # Implement your logic here for the LoadModel service
-        # Replace this with your desired response
-        return backend_pb2.Result(message="Model loaded successfully", success=True)
-
-    def TTS(self, request, context):
-        """
-        Text-to-speech service.
-
-        Args:
-            request: A backend_pb2.TTSRequest instance.
-            context: A grpc.ServicerContext instance.
-
-        Returns:
-            A backend_pb2.Result instance with success=True if successful.
-            A backend_pb2.Result instance with success=False and error message if unsuccessful.
-        """
-        model = request.model
-        print(request, file=sys.stderr)
-        try:
-            audio_array = None
-            if model != "":
-                if self.clonedVoice:
-                    model = os.path.basename(request.model)
-                audio_array = generate_audio(request.text, prompt=model)
-            else:
-                audio_array = generate_audio(request.text)
-            print("saving to", request.dst, file=sys.stderr)
-            # save audio to disk
-            write_wav(request.dst, SAMPLE_RATE, audio_array)
-            print("saved to", request.dst, file=sys.stderr)
-            print("tts for", file=sys.stderr)
-            print(request, file=sys.stderr)
-        except Exception as err:
-            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-        return backend_pb2.Result(success=True)
-
-def serve(address):
-    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
-    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
-    server.add_insecure_port(address)
-    server.start()
-    print("Server started. Listening on: " + address, file=sys.stderr)
-
-    # Define the signal handler function
-    def signal_handler(sig, frame):
-        print("Received termination signal. Shutting down...")
-        server.stop(0)
-        sys.exit(0)
-
-    # Set the signal handlers for SIGINT and SIGTERM
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-
-    try:
-        while True:
-            time.sleep(_ONE_DAY_IN_SECONDS)
-    except KeyboardInterrupt:
-        server.stop(0)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Run the gRPC server.")
-    parser.add_argument(
-        "--addr", default="localhost:50051", help="The address to bind the server to."
-    )
-    args = parser.parse_args()
-
-    serve(args.addr)
diff --git a/backend/python/vall-e-x/install.sh b/backend/python/vall-e-x/install.sh
deleted file mode 100755
index c0cce96a..00000000
--- a/backend/python/vall-e-x/install.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-set -e
-
-VALL_E_X_VERSION=3faaf8ccadb154d63b38070caf518ce9309ea0f4
-
-source $(dirname $0)/../common/libbackend.sh
-
-# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
-# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
-# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
-# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
-if [ "x${BUILD_PROFILE}" == "xintel" ]; then
-    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
-fi
-
-installRequirements
-
-git clone https://github.com/Plachtaa/VALL-E-X.git ${MY_DIR}/source
-pushd ${MY_DIR}/source && git checkout -b build ${VALL_E_X_VERSION} && popd
-uv pip install ${BUILD_ISOLATION_FLAG} --requirement ${MY_DIR}/source/requirements.txt
-
-cp -v ./*py $MY_DIR/source/
diff --git a/backend/python/vall-e-x/requirements-cpu.txt b/backend/python/vall-e-x/requirements-cpu.txt
deleted file mode 100644
index 3a3304c0..00000000
--- a/backend/python/vall-e-x/requirements-cpu.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-accelerate
-torch
-torchaudio
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements-cublas11.txt b/backend/python/vall-e-x/requirements-cublas11.txt
deleted file mode 100644
index 4e0a151a..00000000
--- a/backend/python/vall-e-x/requirements-cublas11.txt
+++ /dev/null
@@ -1,4 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-accelerate
-torch
-torchaudio
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements-cublas12.txt b/backend/python/vall-e-x/requirements-cublas12.txt
deleted file mode 100644
index 3a3304c0..00000000
--- a/backend/python/vall-e-x/requirements-cublas12.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-accelerate
-torch
-torchaudio
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements-hipblas.txt b/backend/python/vall-e-x/requirements-hipblas.txt
deleted file mode 100644
index fc43790a..00000000
--- a/backend/python/vall-e-x/requirements-hipblas.txt
+++ /dev/null
@@ -1,4 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
-accelerate
-torch==2.3.0+rocm6.0
-torchaudio==2.3.0+rocm6.0
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements-intel.txt b/backend/python/vall-e-x/requirements-intel.txt
deleted file mode 100644
index adbabeac..00000000
--- a/backend/python/vall-e-x/requirements-intel.txt
+++ /dev/null
@@ -1,7 +0,0 @@
---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-accelerate
-torch
-torchaudio
-optimum[openvino]
-setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements.txt b/backend/python/vall-e-x/requirements.txt
deleted file mode 100644
index 8e1b0195..00000000
--- a/backend/python/vall-e-x/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-grpcio==1.66.1
-protobuf
-certifi
\ No newline at end of file
diff --git a/backend/python/vall-e-x/run.sh b/backend/python/vall-e-x/run.sh
deleted file mode 100755
index 4b0682ad..00000000
--- a/backend/python/vall-e-x/run.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-BACKEND_FILE="${MY_DIR}/source/backend.py"
-
-source $(dirname $0)/../common/libbackend.sh
-
-startBackend $@
\ No newline at end of file
diff --git a/backend/python/vall-e-x/test.py b/backend/python/vall-e-x/test.py
deleted file mode 100644
index f31a148c..00000000
--- a/backend/python/vall-e-x/test.py
+++ /dev/null
@@ -1,81 +0,0 @@
-"""
-A test script to test the gRPC service
-"""
-import unittest
-import subprocess
-import time
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-
-
-class TestBackendServicer(unittest.TestCase):
-    """
-    TestBackendServicer is the class that tests the gRPC service
-    """
-    def setUp(self):
-        """
-        This method sets up the gRPC service by starting the server
-        """
-        self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
-        time.sleep(10)
-
-    def tearDown(self) -> None:
-        """
-        This method tears down the gRPC service by terminating the server
-        """
-        self.service.terminate()
-        self.service.wait()
-
-    def test_server_startup(self):
-        """
-        This method tests if the server starts up successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.Health(backend_pb2.HealthMessage())
-                self.assertEqual(response.message, b'OK')
-        except Exception as err:
-            print(err)
-            self.fail("Server failed to start")
-        finally:
-            self.tearDown()
-
-    def test_load_model(self):
-        """
-        This method tests if the model is loaded successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="dingzhen"))
-                self.assertTrue(response.success)
-                self.assertEqual(response.message, "Model loaded successfully")
-        except Exception as err:
-            print(err)
-            self.fail("LoadModel service failed")
-        finally:
-            self.tearDown()
-
-    def test_tts(self):
-        """
-        This method tests if the embeddings are generated successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="dingzhen"))
-                self.assertTrue(response.success)
-                tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story")
-                tts_response = stub.TTS(tts_request)
-                self.assertIsNotNone(tts_response)
-        except Exception as err:
-            print(err)
-            self.fail("TTS service failed")
-        finally:
-            self.tearDown()
\ No newline at end of file
diff --git a/backend/python/vall-e-x/test.sh b/backend/python/vall-e-x/test.sh
deleted file mode 100755
index 57336b39..00000000
--- a/backend/python/vall-e-x/test.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-set -e
-TEST_FILE="./source/test.py"
-
-source $(dirname $0)/../common/libbackend.sh
-
-runUnittests
diff --git a/backend/python/vllm/backend.py b/backend/python/vllm/backend.py
index 2cf15c1c..98ac5081 100644
--- a/backend/python/vllm/backend.py
+++ b/backend/python/vllm/backend.py
@@ -5,6 +5,8 @@ import argparse
 import signal
 import sys
 import os
+from typing import List
+from PIL import Image
 
 import backend_pb2
 import backend_pb2_grpc
@@ -15,6 +17,10 @@ from vllm.engine.async_llm_engine import AsyncLLMEngine
 from vllm.sampling_params import SamplingParams
 from vllm.utils import random_uuid
 from vllm.transformers_utils.tokenizer import get_tokenizer
+from vllm.multimodal.utils import fetch_image
+from vllm.assets.video import VideoAsset
+import base64
+import io
 
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
 
@@ -89,6 +95,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
 
         if request.Quantization != "":
             engine_args.quantization = request.Quantization
+        if request.LoadFormat != "":
+            engine_args.load_format = request.LoadFormat
         if request.GPUMemoryUtilization != 0:
             engine_args.gpu_memory_utilization = request.GPUMemoryUtilization
         if request.TrustRemoteCode:
@@ -105,6 +113,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
         try:
             self.llm = AsyncLLMEngine.from_engine_args(engine_args)
         except Exception as err:
+            print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)
             return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
 
         try:
@@ -117,7 +126,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            )
         except Exception as err:
             return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-
+        print("Model loaded successfully", file=sys.stderr)
         return backend_pb2.Result(message="Model loaded successfully", success=True)
 
     async def Predict(self, request, context):
@@ -196,15 +205,35 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
         if request.Seed != 0:
             sampling_params.seed = request.Seed
 
+        # Extract image paths and process images
         prompt = request.Prompt
-        
-        # If tokenizer template is enabled and messages are provided instead of prompt apply the tokenizer template
+
+        image_paths = request.Images
+        image_data = [self.load_image(img_path) for img_path in image_paths]
+
+        videos_path = request.Videos
+        video_data = [self.load_video(video_path) for video_path in videos_path]
+
+        # If tokenizer template is enabled and messages are provided instead of prompt, apply the tokenizer template
         if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
             prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)
 
-        # Generate text
+        # Generate text using the LLM engine
         request_id = random_uuid()
-        outputs = self.llm.generate(prompt, sampling_params, request_id)
+        print(f"Generating text with request_id: {request_id}", file=sys.stderr)
+        multi_modal_data = {}
+        if image_data:
+            multi_modal_data["image"] = image_data
+        if video_data:
+            multi_modal_data["video"] = video_data
+        outputs = self.llm.generate(
+            {
+            "prompt": prompt,
+            "multi_modal_data": multi_modal_data if multi_modal_data else None,
+            },
+            sampling_params=sampling_params,
+            request_id=request_id,
+        )
 
         # Stream the results
         generated_text = ""
@@ -227,9 +256,57 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
         if streaming:
             return
 
+        # Remove the image files from /tmp folder
+        for img_path in image_paths:
+            try:
+                os.remove(img_path)
+            except Exception as e:
+                print(f"Error removing image file: {img_path}, {e}", file=sys.stderr)
+
         # Sending the final generated text
         yield backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
 
+    def load_image(self, image_path: str):
+        """
+        Load an image from the given file path or base64 encoded data.
+        
+        Args:
+            image_path (str): The path to the image file or base64 encoded data.
+
+        Returns:
+            Image: The loaded image.
+        """
+        try:
+
+            image_data = base64.b64decode(image_path)
+            image = Image.open(io.BytesIO(image_data))
+            return image
+        except Exception as e:
+            print(f"Error loading image {image_path}: {e}", file=sys.stderr)
+            return None
+
+    def load_video(self, video_path: str):
+        """
+        Load a video from the given file path.
+        
+        Args:
+            video_path (str): The path to the image file.
+
+        Returns:
+            Video: The loaded video.
+        """
+        try:
+            timestamp = str(int(time.time() * 1000))  # Generate timestamp
+            p = f"/tmp/vl-{timestamp}.data"  # Use timestamp in filename
+            with open(p, "wb") as f:
+                f.write(base64.b64decode(video_path))
+            video = VideoAsset(name=p).np_ndarrays
+            os.remove(p)
+            return video
+        except Exception as e:
+            print(f"Error loading video {video_path}: {e}", file=sys.stderr)
+            return None
+
 async def serve(address):
     # Start asyncio gRPC server
     server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
diff --git a/backend/python/vllm/install.sh b/backend/python/vllm/install.sh
index 78a3d5ba..0183a928 100755
--- a/backend/python/vllm/install.sh
+++ b/backend/python/vllm/install.sh
@@ -13,4 +13,20 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
     EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
 fi
 
-installRequirements
+# We don't embed this into the images as it is a large dependency and not always needed.
+# Besides, the speed inference are not actually usable in the current state for production use-cases.
+if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE}" == "xtrue" ]; then
+        ensureVenv
+        # https://docs.vllm.ai/en/v0.6.1/getting_started/cpu-installation.html
+        if [ ! -d vllm ]; then
+            git clone https://github.com/vllm-project/vllm
+        fi
+        pushd vllm
+            uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.68.1 protobuf bitsandbytes
+            uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
+            VLLM_TARGET_DEVICE=cpu python setup.py install
+        popd
+        rm -rf vllm
+    else
+        installRequirements
+fi
diff --git a/backend/python/vllm/requirements-cpu.txt b/backend/python/vllm/requirements-cpu.txt
index 765a1ef5..84058901 100644
--- a/backend/python/vllm/requirements-cpu.txt
+++ b/backend/python/vllm/requirements-cpu.txt
@@ -1,3 +1,3 @@
 accelerate
-torch
+torch==2.4.1
 transformers
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-cublas11.txt b/backend/python/vllm/requirements-cublas11.txt
index 43817727..a6e49c1f 100644
--- a/backend/python/vllm/requirements-cublas11.txt
+++ b/backend/python/vllm/requirements-cublas11.txt
@@ -1,4 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 accelerate
-torch
-transformers
\ No newline at end of file
+torch==2.4.1+cu118
+transformers
+bitsandbytes
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-cublas12.txt b/backend/python/vllm/requirements-cublas12.txt
index 765a1ef5..2dfc28f9 100644
--- a/backend/python/vllm/requirements-cublas12.txt
+++ b/backend/python/vllm/requirements-cublas12.txt
@@ -1,3 +1,4 @@
 accelerate
-torch
-transformers
\ No newline at end of file
+torch==2.4.1
+transformers
+bitsandbytes
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-hipblas.txt b/backend/python/vllm/requirements-hipblas.txt
index c73d8141..f580314a 100644
--- a/backend/python/vllm/requirements-hipblas.txt
+++ b/backend/python/vllm/requirements-hipblas.txt
@@ -1,4 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 accelerate
-torch
-transformers
\ No newline at end of file
+torch==2.4.1+rocm6.0
+transformers
+bitsandbytes
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-intel.txt b/backend/python/vllm/requirements-intel.txt
index 1f82c46e..8955165a 100644
--- a/backend/python/vllm/requirements-intel.txt
+++ b/backend/python/vllm/requirements-intel.txt
@@ -1,7 +1,9 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
+intel-extension-for-pytorch==2.3.110+xpu
 accelerate
-torch
+torch==2.3.1+cxx11.abi
 transformers
 optimum[openvino]
-setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools
+bitsandbytes
+oneccl_bind_pt==2.3.100+xpu
\ No newline at end of file
diff --git a/backend/python/vllm/requirements.txt b/backend/python/vllm/requirements.txt
index b9c192d5..1f92add8 100644
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.1
+grpcio==1.70.0
 protobuf
 certifi
 setuptools
\ No newline at end of file
diff --git a/core/application.go b/core/application.go
deleted file mode 100644
index e4efbdd0..00000000
--- a/core/application.go
+++ /dev/null
@@ -1,38 +0,0 @@
-package core
-
-import (
-	"github.com/mudler/LocalAI/core/config"
-	"github.com/mudler/LocalAI/core/services"
-	"github.com/mudler/LocalAI/pkg/model"
-)
-
-// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy
-// Perhaps a proper DI system is worth it in the future, but for now keep things simple.
-type Application struct {
-
-	// Application-Level Config
-	ApplicationConfig *config.ApplicationConfig
-	// ApplicationState *ApplicationState
-
-	// Core Low-Level Services
-	BackendConfigLoader *config.BackendConfigLoader
-	ModelLoader         *model.ModelLoader
-
-	// Backend Services
-	// EmbeddingsBackendService      *backend.EmbeddingsBackendService
-	// ImageGenerationBackendService *backend.ImageGenerationBackendService
-	// LLMBackendService             *backend.LLMBackendService
-	// TranscriptionBackendService *backend.TranscriptionBackendService
-	// TextToSpeechBackendService  *backend.TextToSpeechBackendService
-
-	// LocalAI System Services
-	BackendMonitorService *services.BackendMonitorService
-	GalleryService        *services.GalleryService
-	LocalAIMetricsService *services.LocalAIMetricsService
-	// OpenAIService         *services.OpenAIService
-}
-
-// TODO [NEXT PR?]: Break up ApplicationConfig.
-// Migrate over stuff that is not set via config at all - especially runtime stuff
-type ApplicationState struct {
-}
diff --git a/core/application/application.go b/core/application/application.go
new file mode 100644
index 00000000..6e8d6204
--- /dev/null
+++ b/core/application/application.go
@@ -0,0 +1,39 @@
+package application
+
+import (
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/templates"
+)
+
+type Application struct {
+	backendLoader      *config.BackendConfigLoader
+	modelLoader        *model.ModelLoader
+	applicationConfig  *config.ApplicationConfig
+	templatesEvaluator *templates.Evaluator
+}
+
+func newApplication(appConfig *config.ApplicationConfig) *Application {
+	return &Application{
+		backendLoader:      config.NewBackendConfigLoader(appConfig.ModelPath),
+		modelLoader:        model.NewModelLoader(appConfig.ModelPath),
+		applicationConfig:  appConfig,
+		templatesEvaluator: templates.NewEvaluator(appConfig.ModelPath),
+	}
+}
+
+func (a *Application) BackendLoader() *config.BackendConfigLoader {
+	return a.backendLoader
+}
+
+func (a *Application) ModelLoader() *model.ModelLoader {
+	return a.modelLoader
+}
+
+func (a *Application) ApplicationConfig() *config.ApplicationConfig {
+	return a.applicationConfig
+}
+
+func (a *Application) TemplatesEvaluator() *templates.Evaluator {
+	return a.templatesEvaluator
+}
diff --git a/core/startup/config_file_watcher.go b/core/application/config_file_watcher.go
similarity index 96%
rename from core/startup/config_file_watcher.go
rename to core/application/config_file_watcher.go
index df72483f..46f29b10 100644
--- a/core/startup/config_file_watcher.go
+++ b/core/application/config_file_watcher.go
@@ -1,4 +1,4 @@
-package startup
+package application
 
 import (
 	"encoding/json"
@@ -8,8 +8,8 @@ import (
 	"path/filepath"
 	"time"
 
-	"github.com/fsnotify/fsnotify"
 	"dario.cat/mergo"
+	"github.com/fsnotify/fsnotify"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/rs/zerolog/log"
 )
diff --git a/core/startup/startup.go b/core/application/startup.go
similarity index 58%
rename from core/startup/startup.go
rename to core/application/startup.go
index 3565d196..fffcd8bb 100644
--- a/core/startup/startup.go
+++ b/core/application/startup.go
@@ -1,206 +1,201 @@
-package startup
-
-import (
-	"fmt"
-	"os"
-
-	"github.com/mudler/LocalAI/core"
-	"github.com/mudler/LocalAI/core/config"
-	"github.com/mudler/LocalAI/core/services"
-	"github.com/mudler/LocalAI/internal"
-	"github.com/mudler/LocalAI/pkg/assets"
-	"github.com/mudler/LocalAI/pkg/library"
-	"github.com/mudler/LocalAI/pkg/model"
-	pkgStartup "github.com/mudler/LocalAI/pkg/startup"
-	"github.com/mudler/LocalAI/pkg/xsysinfo"
-	"github.com/rs/zerolog/log"
-)
-
-func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) {
-	options := config.NewApplicationConfig(opts...)
-
-	log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath)
-	log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
-	caps, err := xsysinfo.CPUCapabilities()
-	if err == nil {
-		log.Debug().Msgf("CPU capabilities: %v", caps)
-	}
-	gpus, err := xsysinfo.GPUs()
-	if err == nil {
-		log.Debug().Msgf("GPU count: %d", len(gpus))
-		for _, gpu := range gpus {
-			log.Debug().Msgf("GPU: %s", gpu.String())
-		}
-	}
-
-	// Make sure directories exists
-	if options.ModelPath == "" {
-		return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty")
-	}
-	err = os.MkdirAll(options.ModelPath, 0750)
-	if err != nil {
-		return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err)
-	}
-	if options.ImageDir != "" {
-		err := os.MkdirAll(options.ImageDir, 0750)
-		if err != nil {
-			return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err)
-		}
-	}
-	if options.AudioDir != "" {
-		err := os.MkdirAll(options.AudioDir, 0750)
-		if err != nil {
-			return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err)
-		}
-	}
-	if options.UploadDir != "" {
-		err := os.MkdirAll(options.UploadDir, 0750)
-		if err != nil {
-			return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err)
-		}
-	}
-
-	if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
-		log.Error().Err(err).Msg("error installing models")
-	}
-
-	cl := config.NewBackendConfigLoader(options.ModelPath)
-	ml := model.NewModelLoader(options.ModelPath)
-
-	configLoaderOpts := options.ToConfigLoaderOptions()
-
-	if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil {
-		log.Error().Err(err).Msg("error loading config files")
-	}
-
-	if options.ConfigFile != "" {
-		if err := cl.LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil {
-			log.Error().Err(err).Msg("error loading config file")
-		}
-	}
-
-	if err := cl.Preload(options.ModelPath); err != nil {
-		log.Error().Err(err).Msg("error downloading models")
-	}
-
-	if options.PreloadJSONModels != "" {
-		if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.EnforcePredownloadScans, options.Galleries); err != nil {
-			return nil, nil, nil, err
-		}
-	}
-
-	if options.PreloadModelsFromPath != "" {
-		if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.EnforcePredownloadScans, options.Galleries); err != nil {
-			return nil, nil, nil, err
-		}
-	}
-
-	if options.Debug {
-		for _, v := range cl.GetAllBackendConfigs() {
-			log.Debug().Msgf("Model: %s (config: %+v)", v.Name, v)
-		}
-	}
-
-	if options.AssetsDestination != "" {
-		// Extract files from the embedded FS
-		err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
-		log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
-		if err != nil {
-			log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
-		}
-	}
-
-	if options.LibPath != "" {
-		// If there is a lib directory, set LD_LIBRARY_PATH to include it
-		err := library.LoadExternal(options.LibPath)
-		if err != nil {
-			log.Error().Err(err).Str("LibPath", options.LibPath).Msg("Error while loading external libraries")
-		}
-	}
-
-	// turn off any process that was started by GRPC if the context is canceled
-	go func() {
-		<-options.Context.Done()
-		log.Debug().Msgf("Context canceled, shutting down")
-		err := ml.StopAllGRPC()
-		if err != nil {
-			log.Error().Err(err).Msg("error while stopping all grpc backends")
-		}
-	}()
-
-	if options.WatchDog {
-		wd := model.NewWatchDog(
-			ml,
-			options.WatchDogBusyTimeout,
-			options.WatchDogIdleTimeout,
-			options.WatchDogBusy,
-			options.WatchDogIdle)
-		ml.SetWatchDog(wd)
-		go wd.Run()
-		go func() {
-			<-options.Context.Done()
-			log.Debug().Msgf("Context canceled, shutting down")
-			wd.Shutdown()
-		}()
-	}
-
-	// Watch the configuration directory
-	startWatcher(options)
-
-	log.Info().Msg("core/startup process completed!")
-	return cl, ml, options, nil
-}
-
-func startWatcher(options *config.ApplicationConfig) {
-	if options.DynamicConfigsDir == "" {
-		// No need to start the watcher if the directory is not set
-		return
-	}
-
-	if _, err := os.Stat(options.DynamicConfigsDir); err != nil {
-		if os.IsNotExist(err) {
-			// We try to create the directory if it does not exist and was specified
-			if err := os.MkdirAll(options.DynamicConfigsDir, 0700); err != nil {
-				log.Error().Err(err).Msg("failed creating DynamicConfigsDir")
-			}
-		} else {
-			// something else happened, we log the error and don't start the watcher
-			log.Error().Err(err).Msg("failed to read DynamicConfigsDir, watcher will not be started")
-			return
-		}
-	}
-
-	configHandler := newConfigFileHandler(options)
-	if err := configHandler.Watch(); err != nil {
-		log.Error().Err(err).Msg("failed creating watcher")
-	}
-}
-
-// In Lieu of a proper DI framework, this function wires up the Application manually.
-// This is in core/startup rather than core/state.go to keep package references clean!
-func createApplication(appConfig *config.ApplicationConfig) *core.Application {
-	app := &core.Application{
-		ApplicationConfig:   appConfig,
-		BackendConfigLoader: config.NewBackendConfigLoader(appConfig.ModelPath),
-		ModelLoader:         model.NewModelLoader(appConfig.ModelPath),
-	}
-
-	var err error
-
-	// app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
-	// app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
-	// app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
-	// app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
-	// app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
-
-	app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
-	app.GalleryService = services.NewGalleryService(app.ApplicationConfig)
-	// app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService)
-
-	app.LocalAIMetricsService, err = services.NewLocalAIMetricsService()
-	if err != nil {
-		log.Error().Err(err).Msg("encountered an error initializing metrics service, startup will continue but metrics will not be tracked.")
-	}
-
-	return app
-}
+package application
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/services"
+	"github.com/mudler/LocalAI/internal"
+	"github.com/mudler/LocalAI/pkg/assets"
+
+	"github.com/mudler/LocalAI/pkg/library"
+	"github.com/mudler/LocalAI/pkg/model"
+	pkgStartup "github.com/mudler/LocalAI/pkg/startup"
+	"github.com/mudler/LocalAI/pkg/xsysinfo"
+	"github.com/rs/zerolog/log"
+)
+
+func New(opts ...config.AppOption) (*Application, error) {
+	options := config.NewApplicationConfig(opts...)
+	application := newApplication(options)
+
+	log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath)
+	log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
+	caps, err := xsysinfo.CPUCapabilities()
+	if err == nil {
+		log.Debug().Msgf("CPU capabilities: %v", caps)
+	}
+	gpus, err := xsysinfo.GPUs()
+	if err == nil {
+		log.Debug().Msgf("GPU count: %d", len(gpus))
+		for _, gpu := range gpus {
+			log.Debug().Msgf("GPU: %s", gpu.String())
+		}
+	}
+
+	// Make sure directories exists
+	if options.ModelPath == "" {
+		return nil, fmt.Errorf("options.ModelPath cannot be empty")
+	}
+	err = os.MkdirAll(options.ModelPath, 0750)
+	if err != nil {
+		return nil, fmt.Errorf("unable to create ModelPath: %q", err)
+	}
+	if options.ImageDir != "" {
+		err := os.MkdirAll(options.ImageDir, 0750)
+		if err != nil {
+			return nil, fmt.Errorf("unable to create ImageDir: %q", err)
+		}
+	}
+	if options.AudioDir != "" {
+		err := os.MkdirAll(options.AudioDir, 0750)
+		if err != nil {
+			return nil, fmt.Errorf("unable to create AudioDir: %q", err)
+		}
+	}
+	if options.UploadDir != "" {
+		err := os.MkdirAll(options.UploadDir, 0750)
+		if err != nil {
+			return nil, fmt.Errorf("unable to create UploadDir: %q", err)
+		}
+	}
+
+	if err := pkgStartup.InstallModels(options.Galleries, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
+		log.Error().Err(err).Msg("error installing models")
+	}
+
+	configLoaderOpts := options.ToConfigLoaderOptions()
+
+	if err := application.BackendLoader().LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil {
+		log.Error().Err(err).Msg("error loading config files")
+	}
+
+	if options.ConfigFile != "" {
+		if err := application.BackendLoader().LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil {
+			log.Error().Err(err).Msg("error loading config file")
+		}
+	}
+
+	if err := application.BackendLoader().Preload(options.ModelPath); err != nil {
+		log.Error().Err(err).Msg("error downloading models")
+	}
+
+	if options.PreloadJSONModels != "" {
+		if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.EnforcePredownloadScans, options.Galleries); err != nil {
+			return nil, err
+		}
+	}
+
+	if options.PreloadModelsFromPath != "" {
+		if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.EnforcePredownloadScans, options.Galleries); err != nil {
+			return nil, err
+		}
+	}
+
+	if options.Debug {
+		for _, v := range application.BackendLoader().GetAllBackendConfigs() {
+			log.Debug().Msgf("Model: %s (config: %+v)", v.Name, v)
+		}
+	}
+
+	if options.AssetsDestination != "" {
+		// Extract files from the embedded FS
+		err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
+		log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
+		if err != nil {
+			log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
+		}
+	}
+
+	if options.LibPath != "" {
+		// If there is a lib directory, set LD_LIBRARY_PATH to include it
+		err := library.LoadExternal(options.LibPath)
+		if err != nil {
+			log.Error().Err(err).Str("LibPath", options.LibPath).Msg("Error while loading external libraries")
+		}
+	}
+
+	// turn off any process that was started by GRPC if the context is canceled
+	go func() {
+		<-options.Context.Done()
+		log.Debug().Msgf("Context canceled, shutting down")
+		err := application.ModelLoader().StopAllGRPC()
+		if err != nil {
+			log.Error().Err(err).Msg("error while stopping all grpc backends")
+		}
+	}()
+
+	if options.WatchDog {
+		wd := model.NewWatchDog(
+			application.ModelLoader(),
+			options.WatchDogBusyTimeout,
+			options.WatchDogIdleTimeout,
+			options.WatchDogBusy,
+			options.WatchDogIdle)
+		application.ModelLoader().SetWatchDog(wd)
+		go wd.Run()
+		go func() {
+			<-options.Context.Done()
+			log.Debug().Msgf("Context canceled, shutting down")
+			wd.Shutdown()
+		}()
+	}
+
+	if options.LoadToMemory != nil {
+		for _, m := range options.LoadToMemory {
+			cfg, err := application.BackendLoader().LoadBackendConfigFileByName(m, options.ModelPath,
+				config.LoadOptionDebug(options.Debug),
+				config.LoadOptionThreads(options.Threads),
+				config.LoadOptionContextSize(options.ContextSize),
+				config.LoadOptionF16(options.F16),
+				config.ModelPath(options.ModelPath),
+			)
+			if err != nil {
+				return nil, err
+			}
+
+			log.Debug().Msgf("Auto loading model %s into memory from file: %s", m, cfg.Model)
+
+			o := backend.ModelOptions(*cfg, options)
+
+			var backendErr error
+			_, backendErr = application.ModelLoader().Load(o...)
+			if backendErr != nil {
+				return nil, err
+			}
+		}
+	}
+
+	// Watch the configuration directory
+	startWatcher(options)
+
+	log.Info().Msg("core/startup process completed!")
+	return application, nil
+}
+
+func startWatcher(options *config.ApplicationConfig) {
+	if options.DynamicConfigsDir == "" {
+		// No need to start the watcher if the directory is not set
+		return
+	}
+
+	if _, err := os.Stat(options.DynamicConfigsDir); err != nil {
+		if os.IsNotExist(err) {
+			// We try to create the directory if it does not exist and was specified
+			if err := os.MkdirAll(options.DynamicConfigsDir, 0700); err != nil {
+				log.Error().Err(err).Msg("failed creating DynamicConfigsDir")
+			}
+		} else {
+			// something else happened, we log the error and don't start the watcher
+			log.Error().Err(err).Msg("failed to read DynamicConfigsDir, watcher will not be started")
+			return
+		}
+	}
+
+	configHandler := newConfigFileHandler(options)
+	if err := configHandler.Watch(); err != nil {
+		log.Error().Err(err).Msg("failed creating watcher")
+	}
+}
diff --git a/core/backend/embeddings.go b/core/backend/embeddings.go
index 31b10a19..a96e9829 100644
--- a/core/backend/embeddings.go
+++ b/core/backend/embeddings.go
@@ -10,27 +10,10 @@ import (
 )
 
 func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
-	modelFile := backendConfig.Model
 
-	grpcOpts := gRPCModelOpts(backendConfig)
+	opts := ModelOptions(backendConfig, appConfig)
 
-	var inferenceModel interface{}
-	var err error
-
-	opts := modelOpts(backendConfig, appConfig, []model.Option{
-		model.WithLoadGRPCLoadModelOpts(grpcOpts),
-		model.WithThreads(uint32(*backendConfig.Threads)),
-		model.WithAssetDir(appConfig.AssetsDestination),
-		model.WithModel(modelFile),
-		model.WithContext(appConfig.Context),
-	})
-
-	if backendConfig.Backend == "" {
-		inferenceModel, err = loader.GreedyLoader(opts...)
-	} else {
-		opts = append(opts, model.WithBackendString(backendConfig.Backend))
-		inferenceModel, err = loader.BackendLoader(opts...)
-	}
+	inferenceModel, err := loader.Load(opts...)
 	if err != nil {
 		return nil, err
 	}
diff --git a/core/backend/image.go b/core/backend/image.go
index 8c3f56b3..38ca4357 100644
--- a/core/backend/image.go
+++ b/core/backend/image.go
@@ -8,21 +8,9 @@ import (
 )
 
 func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
-	threads := backendConfig.Threads
-	if *threads == 0 && appConfig.Threads != 0 {
-		threads = &appConfig.Threads
-	}
-	gRPCOpts := gRPCModelOpts(backendConfig)
-	opts := modelOpts(backendConfig, appConfig, []model.Option{
-		model.WithBackendString(backendConfig.Backend),
-		model.WithAssetDir(appConfig.AssetsDestination),
-		model.WithThreads(uint32(*threads)),
-		model.WithContext(appConfig.Context),
-		model.WithModel(backendConfig.Model),
-		model.WithLoadGRPCLoadModelOpts(gRPCOpts),
-	})
 
-	inferenceModel, err := loader.BackendLoader(
+	opts := ModelOptions(backendConfig, appConfig)
+	inferenceModel, err := loader.Load(
 		opts...,
 	)
 	if err != nil {
diff --git a/core/backend/llm.go b/core/backend/llm.go
index 2b4564a8..d91ded51 100644
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -2,6 +2,7 @@ package backend
 
 import (
 	"context"
+	"encoding/json"
 	"fmt"
 	"os"
 	"regexp"
@@ -15,7 +16,6 @@ import (
 	"github.com/mudler/LocalAI/core/schema"
 
 	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/mudler/LocalAI/pkg/grpc"
 	"github.com/mudler/LocalAI/pkg/grpc/proto"
 	model "github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/utils"
@@ -27,32 +27,14 @@ type LLMResponse struct {
 }
 
 type TokenUsage struct {
-	Prompt     int
-	Completion int
+	Prompt                 int
+	Completion             int
+	TimingPromptProcessing float64
+	TimingTokenGeneration  float64
 }
 
-func ModelInference(ctx context.Context, s string, messages []schema.Message, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
+func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
 	modelFile := c.Model
-	threads := c.Threads
-	if *threads == 0 && o.Threads != 0 {
-		threads = &o.Threads
-	}
-	grpcOpts := gRPCModelOpts(c)
-
-	var inferenceModel grpc.Backend
-	var err error
-
-	opts := modelOpts(c, o, []model.Option{
-		model.WithLoadGRPCLoadModelOpts(grpcOpts),
-		model.WithThreads(uint32(*threads)), // some models uses this to allocate threads during startup
-		model.WithAssetDir(o.AssetsDestination),
-		model.WithModel(modelFile),
-		model.WithContext(o.Context),
-	})
-
-	if c.Backend != "" {
-		opts = append(opts, model.WithBackendString(c.Backend))
-	}
 
 	// Check if the modelFile exists, if it doesn't try to load it from the gallery
 	if o.AutoloadGalleries { // experimental
@@ -66,12 +48,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
 		}
 	}
 
-	if c.Backend == "" {
-		inferenceModel, err = loader.GreedyLoader(opts...)
-	} else {
-		inferenceModel, err = loader.BackendLoader(opts...)
-	}
-
+	opts := ModelOptions(c, o)
+	inferenceModel, err := loader.Load(opts...)
 	if err != nil {
 		return nil, err
 	}
@@ -88,6 +66,16 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
 			switch ct := message.Content.(type) {
 			case string:
 				protoMessages[i].Content = ct
+			case []interface{}:
+				// If using the tokenizer template, in case of multimodal we want to keep the multimodal content as and return only strings here
+				data, _ := json.Marshal(ct)
+				resultData := []struct {
+					Text string `json:"text"`
+				}{}
+				json.Unmarshal(data, &resultData)
+				for _, r := range resultData {
+					protoMessages[i].Content += r.Text
+				}
 			default:
 				return nil, fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct)
 			}
@@ -101,6 +89,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
 		opts.Messages = protoMessages
 		opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
 		opts.Images = images
+		opts.Videos = videos
+		opts.Audios = audios
 
 		tokenUsage := TokenUsage{}
 
@@ -129,8 +119,14 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
 			ss := ""
 
 			var partialRune []byte
-			err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) {
-				partialRune = append(partialRune, chars...)
+			err := inferenceModel.PredictStream(ctx, opts, func(reply *proto.Reply) {
+				msg := reply.Message
+				partialRune = append(partialRune, msg...)
+
+				tokenUsage.Prompt = int(reply.PromptTokens)
+				tokenUsage.Completion = int(reply.Tokens)
+				tokenUsage.TimingTokenGeneration = reply.TimingTokenGeneration
+				tokenUsage.TimingPromptProcessing = reply.TimingPromptProcessing
 
 				for len(partialRune) > 0 {
 					r, size := utf8.DecodeRune(partialRune)
@@ -144,6 +140,10 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
 
 					partialRune = partialRune[size:]
 				}
+
+				if len(msg) == 0 {
+					tokenCallback("", tokenUsage)
+				}
 			})
 			return LLMResponse{
 				Response: ss,
@@ -161,6 +161,10 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
 			if tokenUsage.Completion == 0 {
 				tokenUsage.Completion = int(reply.Tokens)
 			}
+
+			tokenUsage.TimingTokenGeneration = reply.TimingTokenGeneration
+			tokenUsage.TimingPromptProcessing = reply.TimingPromptProcessing
+
 			return LLMResponse{
 				Response: string(reply.Message),
 				Usage:    tokenUsage,
diff --git a/core/backend/options.go b/core/backend/options.go
index d986b8e6..3201142d 100644
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -11,32 +11,65 @@ import (
 	"github.com/rs/zerolog/log"
 )
 
-func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
+func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts ...model.Option) []model.Option {
+	name := c.Name
+	if name == "" {
+		name = c.Model
+	}
+
+	defOpts := []model.Option{
+		model.WithBackendString(c.Backend),
+		model.WithModel(c.Model),
+		model.WithAssetDir(so.AssetsDestination),
+		model.WithContext(so.Context),
+		model.WithModelID(name),
+	}
+
+	threads := 1
+
+	if c.Threads != nil {
+		threads = *c.Threads
+	}
+
+	if so.Threads != 0 {
+		threads = so.Threads
+	}
+
+	c.Threads = &threads
+
+	grpcOpts := grpcModelOpts(c)
+	defOpts = append(defOpts, model.WithLoadGRPCLoadModelOpts(grpcOpts))
+
 	if so.SingleBackend {
-		opts = append(opts, model.WithSingleActiveBackend())
+		defOpts = append(defOpts, model.WithSingleActiveBackend())
 	}
 
 	if so.ParallelBackendRequests {
-		opts = append(opts, model.EnableParallelRequests)
+		defOpts = append(defOpts, model.EnableParallelRequests)
 	}
 
 	if c.GRPC.Attempts != 0 {
-		opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts))
+		defOpts = append(defOpts, model.WithGRPCAttempts(c.GRPC.Attempts))
 	}
 
 	if c.GRPC.AttemptsSleepTime != 0 {
-		opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
+		defOpts = append(defOpts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
 	}
 
 	for k, v := range so.ExternalGRPCBackends {
-		opts = append(opts, model.WithExternalBackend(k, v))
+		defOpts = append(defOpts, model.WithExternalBackend(k, v))
 	}
 
-	return opts
+	return append(defOpts, opts...)
 }
 
 func getSeed(c config.BackendConfig) int32 {
-	seed := int32(*c.Seed)
+	var seed int32 = config.RAND_SEED
+
+	if c.Seed != nil {
+		seed = int32(*c.Seed)
+	}
+
 	if seed == config.RAND_SEED {
 		seed = rand.Int31()
 	}
@@ -44,32 +77,82 @@ func getSeed(c config.BackendConfig) int32 {
 	return seed
 }
 
-func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
+func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
 	b := 512
 	if c.Batch != 0 {
 		b = c.Batch
 	}
+
+	f16 := false
+	if c.F16 != nil {
+		f16 = *c.F16
+	}
+
+	embeddings := false
+	if c.Embeddings != nil {
+		embeddings = *c.Embeddings
+	}
+
+	lowVRAM := false
+	if c.LowVRAM != nil {
+		lowVRAM = *c.LowVRAM
+	}
+
+	mmap := false
+	if c.MMap != nil {
+		mmap = *c.MMap
+	}
+
+	ctxSize := 1024
+	if c.ContextSize != nil {
+		ctxSize = *c.ContextSize
+	}
+
+	mmlock := false
+	if c.MMlock != nil {
+		mmlock = *c.MMlock
+	}
+
+	nGPULayers := 9999999
+	if c.NGPULayers != nil {
+		nGPULayers = *c.NGPULayers
+	}
+
+	triggers := make([]*pb.GrammarTrigger, 0)
+	for _, t := range c.FunctionsConfig.GrammarConfig.GrammarTriggers {
+		triggers = append(triggers, &pb.GrammarTrigger{
+			Word:    t.Word,
+			AtStart: t.AtStart,
+		})
+
+	}
+
 	return &pb.ModelOptions{
 		CUDA:                 c.CUDA || c.Diffusers.CUDA,
 		SchedulerType:        c.Diffusers.SchedulerType,
+		GrammarTriggers:      triggers,
 		PipelineType:         c.Diffusers.PipelineType,
-		CFGScale:             c.Diffusers.CFGScale,
+		CFGScale:             c.CFGScale,
 		LoraAdapter:          c.LoraAdapter,
 		LoraScale:            c.LoraScale,
-		F16Memory:            *c.F16,
+		LoraAdapters:         c.LoraAdapters,
+		LoraScales:           c.LoraScales,
+		F16Memory:            f16,
 		LoraBase:             c.LoraBase,
 		IMG2IMG:              c.Diffusers.IMG2IMG,
 		CLIPModel:            c.Diffusers.ClipModel,
 		CLIPSubfolder:        c.Diffusers.ClipSubFolder,
+		Options:              c.Options,
 		CLIPSkip:             int32(c.Diffusers.ClipSkip),
 		ControlNet:           c.Diffusers.ControlNet,
-		ContextSize:          int32(*c.ContextSize),
+		ContextSize:          int32(ctxSize),
 		Seed:                 getSeed(c),
 		NBatch:               int32(b),
 		NoMulMatQ:            c.NoMulMatQ,
 		DraftModel:           c.DraftModel,
-		AudioPath:            c.VallE.AudioPath,
+		AudioPath:            c.AudioPath,
 		Quantization:         c.Quantization,
+		LoadFormat:           c.LoadFormat,
 		GPUMemoryUtilization: c.GPUMemoryUtilization,
 		TrustRemoteCode:      c.TrustRemoteCode,
 		EnforceEager:         c.EnforceEager,
@@ -78,6 +161,8 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		TensorParallelSize:   int32(c.TensorParallelSize),
 		MMProj:               c.MMProj,
 		FlashAttention:       c.FlashAttention,
+		CacheTypeKey:         c.CacheTypeK,
+		CacheTypeValue:       c.CacheTypeV,
 		NoKVOffload:          c.NoKVOffloading,
 		YarnExtFactor:        c.YarnExtFactor,
 		YarnAttnFactor:       c.YarnAttnFactor,
@@ -85,16 +170,16 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		YarnBetaSlow:         c.YarnBetaSlow,
 		NGQA:                 c.NGQA,
 		RMSNormEps:           c.RMSNormEps,
-		MLock:                *c.MMlock,
+		MLock:                mmlock,
 		RopeFreqBase:         c.RopeFreqBase,
 		RopeScaling:          c.RopeScaling,
 		Type:                 c.ModelType,
 		RopeFreqScale:        c.RopeFreqScale,
 		NUMA:                 c.NUMA,
-		Embeddings:           *c.Embeddings,
-		LowVRAM:              *c.LowVRAM,
-		NGPULayers:           int32(*c.NGPULayers),
-		MMap:                 *c.MMap,
+		Embeddings:           embeddings,
+		LowVRAM:              lowVRAM,
+		NGPULayers:           int32(nGPULayers),
+		MMap:                 mmap,
 		MainGPU:              c.MainGPU,
 		Threads:              int32(*c.Threads),
 		TensorSplit:          c.TensorSplit,
diff --git a/core/backend/rerank.go b/core/backend/rerank.go
index 1b718be2..8152ef7f 100644
--- a/core/backend/rerank.go
+++ b/core/backend/rerank.go
@@ -9,22 +9,10 @@ import (
 	model "github.com/mudler/LocalAI/pkg/model"
 )
 
-func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
-	bb := backend
-	if bb == "" {
-		return nil, fmt.Errorf("backend is required")
-	}
+func Rerank(modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
 
-	grpcOpts := gRPCModelOpts(backendConfig)
-
-	opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
-		model.WithBackendString(bb),
-		model.WithModel(modelFile),
-		model.WithContext(appConfig.Context),
-		model.WithAssetDir(appConfig.AssetsDestination),
-		model.WithLoadGRPCLoadModelOpts(grpcOpts),
-	})
-	rerankModel, err := loader.BackendLoader(opts...)
+	opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
+	rerankModel, err := loader.Load(opts...)
 	if err != nil {
 		return nil, err
 	}
diff --git a/core/backend/soundgeneration.go b/core/backend/soundgeneration.go
index abd5221b..a8d46478 100644
--- a/core/backend/soundgeneration.go
+++ b/core/backend/soundgeneration.go
@@ -13,7 +13,6 @@ import (
 )
 
 func SoundGeneration(
-	backend string,
 	modelFile string,
 	text string,
 	duration *float32,
@@ -25,20 +24,9 @@ func SoundGeneration(
 	appConfig *config.ApplicationConfig,
 	backendConfig config.BackendConfig,
 ) (string, *proto.Result, error) {
-	if backend == "" {
-		return "", nil, fmt.Errorf("backend is a required parameter")
-	}
 
-	grpcOpts := gRPCModelOpts(backendConfig)
-	opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
-		model.WithBackendString(backend),
-		model.WithModel(modelFile),
-		model.WithContext(appConfig.Context),
-		model.WithAssetDir(appConfig.AssetsDestination),
-		model.WithLoadGRPCLoadModelOpts(grpcOpts),
-	})
-
-	soundGenModel, err := loader.BackendLoader(opts...)
+	opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
+	soundGenModel, err := loader.Load(opts...)
 	if err != nil {
 		return "", nil, err
 	}
diff --git a/core/backend/stores.go b/core/backend/stores.go
index 1b514584..f5ee9166 100644
--- a/core/backend/stores.go
+++ b/core/backend/stores.go
@@ -8,16 +8,15 @@ import (
 )
 
 func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) (grpc.Backend, error) {
-    if storeName == "" {
-      storeName = "default"
-    }
+	if storeName == "" {
+		storeName = "default"
+	}
 
-    sc := []model.Option{
-      model.WithBackendString(model.LocalStoreBackend),
-      model.WithAssetDir(appConfig.AssetsDestination),
-      model.WithModel(storeName),
-    }
+	sc := []model.Option{
+		model.WithBackendString(model.LocalStoreBackend),
+		model.WithAssetDir(appConfig.AssetsDestination),
+		model.WithModel(storeName),
+	}
 
-    return sl.BackendLoader(sc...)
+	return sl.Load(sc...)
 }
-
diff --git a/core/backend/token_metrics.go b/core/backend/token_metrics.go
new file mode 100644
index 00000000..cc71c868
--- /dev/null
+++ b/core/backend/token_metrics.go
@@ -0,0 +1,31 @@
+package backend
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
+	model "github.com/mudler/LocalAI/pkg/model"
+)
+
+func TokenMetrics(
+	modelFile string,
+	loader *model.ModelLoader,
+	appConfig *config.ApplicationConfig,
+	backendConfig config.BackendConfig) (*proto.MetricsResponse, error) {
+
+	opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
+	model, err := loader.Load(opts...)
+	if err != nil {
+		return nil, err
+	}
+
+	if model == nil {
+		return nil, fmt.Errorf("could not loadmodel model")
+	}
+
+	res, err := model.GetTokenMetrics(context.Background(), &proto.MetricsRequest{})
+
+	return res, err
+}
diff --git a/core/backend/tokenize.go b/core/backend/tokenize.go
new file mode 100644
index 00000000..1783083b
--- /dev/null
+++ b/core/backend/tokenize.go
@@ -0,0 +1,41 @@
+package backend
+
+import (
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/grpc"
+	model "github.com/mudler/LocalAI/pkg/model"
+)
+
+func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) {
+
+	modelFile := backendConfig.Model
+
+	var inferenceModel grpc.Backend
+	var err error
+
+	opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
+
+	inferenceModel, err = loader.Load(opts...)
+	if err != nil {
+		return schema.TokenizeResponse{}, err
+	}
+
+	predictOptions := gRPCPredictOpts(backendConfig, loader.ModelPath)
+	predictOptions.Prompt = s
+
+	// tokenize the string
+	resp, err := inferenceModel.TokenizeString(appConfig.Context, predictOptions)
+	if err != nil {
+		return schema.TokenizeResponse{}, err
+	}
+
+	if resp.Tokens == nil {
+		resp.Tokens = make([]int32, 0)
+	}
+
+	return schema.TokenizeResponse{
+		Tokens: resp.Tokens,
+	}, nil
+
+}
diff --git a/core/backend/transcript.go b/core/backend/transcript.go
index 6ebc7c10..372f6984 100644
--- a/core/backend/transcript.go
+++ b/core/backend/transcript.go
@@ -14,15 +14,13 @@ import (
 
 func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
 
-	opts := modelOpts(backendConfig, appConfig, []model.Option{
-		model.WithBackendString(model.WhisperBackend),
-		model.WithModel(backendConfig.Model),
-		model.WithContext(appConfig.Context),
-		model.WithThreads(uint32(*backendConfig.Threads)),
-		model.WithAssetDir(appConfig.AssetsDestination),
-	})
+	if backendConfig.Backend == "" {
+		backendConfig.Backend = model.WhisperBackend
+	}
 
-	transcriptionModel, err := ml.BackendLoader(opts...)
+	opts := ModelOptions(backendConfig, appConfig)
+
+	transcriptionModel, err := ml.Load(opts...)
 	if err != nil {
 		return nil, err
 	}
diff --git a/core/backend/tts.go b/core/backend/tts.go
index 258882ae..f9be6955 100644
--- a/core/backend/tts.go
+++ b/core/backend/tts.go
@@ -28,16 +28,8 @@ func ModelTTS(
 		bb = model.PiperBackend
 	}
 
-	grpcOpts := gRPCModelOpts(backendConfig)
-
-	opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
-		model.WithBackendString(bb),
-		model.WithModel(modelFile),
-		model.WithContext(appConfig.Context),
-		model.WithAssetDir(appConfig.AssetsDestination),
-		model.WithLoadGRPCLoadModelOpts(grpcOpts),
-	})
-	ttsModel, err := loader.BackendLoader(opts...)
+	opts := ModelOptions(backendConfig, appConfig, model.WithBackendString(bb), model.WithModel(modelFile))
+	ttsModel, err := loader.Load(opts...)
 	if err != nil {
 		return "", nil, err
 	}
diff --git a/core/cli/models.go b/core/cli/models.go
index 56d13fc7..28b2944f 100644
--- a/core/cli/models.go
+++ b/core/cli/models.go
@@ -100,7 +100,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 			log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
 		}
 
-		err = startup.InstallModels(galleries, "", mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
+		err = startup.InstallModels(galleries, mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
 		if err != nil {
 			return err
 		}
diff --git a/core/cli/run.go b/core/cli/run.go
index afb7204c..3162ef14 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -6,12 +6,12 @@ import (
 	"strings"
 	"time"
 
+	"github.com/mudler/LocalAI/core/application"
 	cli_api "github.com/mudler/LocalAI/core/cli/api"
 	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http"
 	"github.com/mudler/LocalAI/core/p2p"
-	"github.com/mudler/LocalAI/core/startup"
 	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
 )
@@ -32,7 +32,6 @@ type RunCMD struct {
 
 	Galleries           string   `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
 	AutoloadGalleries   bool     `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
-	RemoteLibrary       string   `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"`
 	PreloadModels       string   `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
 	Models              []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"`
 	PreloadModelsConfig string   `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
@@ -53,6 +52,7 @@ type RunCMD struct {
 	OpaqueErrors                       bool     `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
 	UseSubtleKeyComparison             bool     `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"`
 	DisableApiKeyRequirementForHttpGet bool     `env:"LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET" default:"false" help:"If true, a valid API key is not required to issue GET requests to portions of the web ui. This should only be enabled in secure testing environments" group:"hardening"`
+	DisableMetricsEndpoint             bool     `env:"LOCALAI_DISABLE_METRICS_ENDPOINT,DISABLE_METRICS_ENDPOINT" default:"false" help:"Disable the /metrics endpoint" group:"api"`
 	HttpGetExemptedEndpoints           []string `env:"LOCALAI_HTTP_GET_EXEMPTED_ENDPOINTS" default:"^/$,^/browse/?$,^/talk/?$,^/p2p/?$,^/chat/?$,^/text2image/?$,^/tts/?$,^/static/.*$,^/swagger.*$" help:"If LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET is overriden to true, this is the list of endpoints to exempt. Only adjust this in case of a security incident or as a result of a personal security posture review" group:"hardening"`
 	Peer2Peer                          bool     `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
 	Peer2PeerDHTInterval               int      `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"`
@@ -69,6 +69,8 @@ type RunCMD struct {
 	WatchdogBusyTimeout                string   `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
 	Federated                          bool     `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
 	DisableGalleryEndpoint             bool     `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
+	MachineTag                         string   `env:"LOCALAI_MACHINE_TAG,MACHINE_TAG" help:"Add Machine-Tag header to each response which is useful to track the machine in the P2P network" group:"api"`
+	LoadToMemory                       []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"`
 }
 
 func (r *RunCMD) Run(ctx *cliContext.Context) error {
@@ -87,7 +89,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
 		config.WithF16(r.F16),
 		config.WithStringGalleries(r.Galleries),
-		config.WithModelLibraryURL(r.RemoteLibrary),
 		config.WithCors(r.CORS),
 		config.WithCorsAllowOrigins(r.CORSAllowOrigins),
 		config.WithCsrf(r.CSRF),
@@ -104,6 +105,12 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithDisableApiKeyRequirementForHttpGet(r.DisableApiKeyRequirementForHttpGet),
 		config.WithHttpGetExemptedEndpoints(r.HttpGetExemptedEndpoints),
 		config.WithP2PNetworkID(r.Peer2PeerNetworkID),
+		config.WithLoadToMemory(r.LoadToMemory),
+		config.WithMachineTag(r.MachineTag),
+	}
+
+	if r.DisableMetricsEndpoint {
+		opts = append(opts, config.DisableMetricsEndpoint)
 	}
 
 	token := ""
@@ -179,16 +186,16 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 	}
 
 	if r.PreloadBackendOnly {
-		_, _, _, err := startup.Startup(opts...)
+		_, err := application.New(opts...)
 		return err
 	}
 
-	cl, ml, options, err := startup.Startup(opts...)
+	app, err := application.New(opts...)
 	if err != nil {
 		return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
 	}
 
-	appHTTP, err := http.App(cl, ml, options)
+	appHTTP, err := http.API(app)
 	if err != nil {
 		log.Error().Err(err).Msg("error during HTTP App construction")
 		return err
diff --git a/core/cli/soundgeneration.go b/core/cli/soundgeneration.go
index 5711b199..82bc0346 100644
--- a/core/cli/soundgeneration.go
+++ b/core/cli/soundgeneration.go
@@ -85,13 +85,14 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
 
 	options := config.BackendConfig{}
 	options.SetDefaults()
+	options.Backend = t.Backend
 
 	var inputFile *string
 	if t.InputFile != "" {
 		inputFile = &t.InputFile
 	}
 
-	filePath, _, err := backend.SoundGeneration(t.Backend, t.Model, text,
+	filePath, _, err := backend.SoundGeneration(t.Model, text,
 		parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
 		inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)
 
diff --git a/core/cli/util.go b/core/cli/util.go
index b3e545d8..57b8ad9e 100644
--- a/core/cli/util.go
+++ b/core/cli/util.go
@@ -15,8 +15,9 @@ import (
 )
 
 type UtilCMD struct {
-	GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"`
-	HFScan   HFScanCMD   `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"`
+	GGUFInfo         GGUFInfoCMD         `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"`
+	HFScan           HFScanCMD           `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"`
+	UsecaseHeuristic UsecaseHeuristicCMD `cmd:"" name:"usecase-heuristic" help:"Checks a specific model config and prints what usecase LocalAI will offer for it."`
 }
 
 type GGUFInfoCMD struct {
@@ -30,6 +31,11 @@ type HFScanCMD struct {
 	ToScan     []string `arg:""`
 }
 
+type UsecaseHeuristicCMD struct {
+	ConfigName string `name:"The config file to check"`
+	ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
+}
+
 func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
 	if u.Args == nil || len(u.Args) == 0 {
 		return fmt.Errorf("no GGUF file provided")
@@ -99,3 +105,31 @@ func (hfscmd *HFScanCMD) Run(ctx *cliContext.Context) error {
 		return nil
 	}
 }
+
+func (uhcmd *UsecaseHeuristicCMD) Run(ctx *cliContext.Context) error {
+	if len(uhcmd.ConfigName) == 0 {
+		log.Error().Msg("ConfigName is a required parameter")
+		return fmt.Errorf("config name is a required parameter")
+	}
+	if len(uhcmd.ModelsPath) == 0 {
+		log.Error().Msg("ModelsPath is a required parameter")
+		return fmt.Errorf("model path is a required parameter")
+	}
+	bcl := config.NewBackendConfigLoader(uhcmd.ModelsPath)
+	err := bcl.LoadBackendConfig(uhcmd.ConfigName)
+	if err != nil {
+		log.Error().Err(err).Str("ConfigName", uhcmd.ConfigName).Msg("error while loading backend")
+		return err
+	}
+	bc, exists := bcl.GetBackendConfig(uhcmd.ConfigName)
+	if !exists {
+		log.Error().Str("ConfigName", uhcmd.ConfigName).Msg("ConfigName not found")
+	}
+	for name, uc := range config.GetAllBackendConfigUsecases() {
+		if bc.HasUsecases(uc) {
+			log.Info().Str("Usecase", name)
+		}
+	}
+	log.Info().Msg("---")
+	return nil
+}
diff --git a/core/cli/worker/worker_p2p.go b/core/cli/worker/worker_p2p.go
index 6275481b..aa7a8f1a 100644
--- a/core/cli/worker/worker_p2p.go
+++ b/core/cli/worker/worker_p2p.go
@@ -76,8 +76,14 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 					"util",
 					"llama-cpp-rpc-server",
 				)
-				extraArgs := strings.Split(r.ExtraLLamaCPPArgs, " ")
+				var extraArgs []string
+
+				if r.ExtraLLamaCPPArgs != "" {
+					extraArgs = strings.Split(r.ExtraLLamaCPPArgs, " ")
+				}
 				args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, extraArgs...)
+				log.Debug().Msgf("Starting llama-cpp-rpc-server on '%s:%d' with args: %+v (%d)", address, port, args, len(args))
+
 				args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
 
 				cmd := exec.Command(
diff --git a/core/config/application_config.go b/core/config/application_config.go
index afbf325f..2cc9b01b 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -39,10 +39,10 @@ type ApplicationConfig struct {
 	OpaqueErrors                       bool
 	UseSubtleKeyComparison             bool
 	DisableApiKeyRequirementForHttpGet bool
+	DisableMetrics                     bool
 	HttpGetExemptedEndpoints           []*regexp.Regexp
 	DisableGalleryEndpoint             bool
-
-	ModelLibraryURL string
+	LoadToMemory                       []string
 
 	Galleries []Gallery
 
@@ -63,6 +63,8 @@ type ApplicationConfig struct {
 	ModelsURL []string
 
 	WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
+
+	MachineTag string
 }
 
 type AppOption func(*ApplicationConfig)
@@ -92,6 +94,12 @@ func WithModelPath(path string) AppOption {
 	}
 }
 
+func WithMachineTag(tag string) AppOption {
+	return func(o *ApplicationConfig) {
+		o.MachineTag = tag
+	}
+}
+
 func WithCors(b bool) AppOption {
 	return func(o *ApplicationConfig) {
 		o.CORS = b
@@ -116,12 +124,6 @@ func WithP2PToken(s string) AppOption {
 	}
 }
 
-func WithModelLibraryURL(url string) AppOption {
-	return func(o *ApplicationConfig) {
-		o.ModelLibraryURL = url
-	}
-}
-
 func WithLibPath(path string) AppOption {
 	return func(o *ApplicationConfig) {
 		o.LibPath = path
@@ -331,6 +333,12 @@ func WithOpaqueErrors(opaque bool) AppOption {
 	}
 }
 
+func WithLoadToMemory(models []string) AppOption {
+	return func(o *ApplicationConfig) {
+		o.LoadToMemory = models
+	}
+}
+
 func WithSubtleKeyComparison(subtle bool) AppOption {
 	return func(o *ApplicationConfig) {
 		o.UseSubtleKeyComparison = subtle
@@ -343,6 +351,10 @@ func WithDisableApiKeyRequirementForHttpGet(required bool) AppOption {
 	}
 }
 
+var DisableMetricsEndpoint AppOption = func(o *ApplicationConfig) {
+	o.DisableMetrics = true
+}
+
 func WithHttpGetExemptedEndpoints(endpoints []string) AppOption {
 	return func(o *ApplicationConfig) {
 		o.HttpGetExemptedEndpoints = []*regexp.Regexp{}
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index 5662f1ca..2b130ec8 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -3,11 +3,13 @@ package config
 import (
 	"os"
 	"regexp"
+	"slices"
 	"strings"
 
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/functions"
+	"gopkg.in/yaml.v3"
 )
 
 const (
@@ -19,21 +21,22 @@ type TTSConfig struct {
 	// Voice wav path or id
 	Voice string `yaml:"voice"`
 
-	// Vall-e-x
-	VallE VallE `yaml:"vall-e"`
+	AudioPath string `yaml:"audio_path"`
 }
 
 type BackendConfig struct {
 	schema.PredictionOptions `yaml:"parameters"`
 	Name                     string `yaml:"name"`
 
-	F16            *bool             `yaml:"f16"`
-	Threads        *int              `yaml:"threads"`
-	Debug          *bool             `yaml:"debug"`
-	Roles          map[string]string `yaml:"roles"`
-	Embeddings     *bool             `yaml:"embeddings"`
-	Backend        string            `yaml:"backend"`
-	TemplateConfig TemplateConfig    `yaml:"template"`
+	F16                 *bool                  `yaml:"f16"`
+	Threads             *int                   `yaml:"threads"`
+	Debug               *bool                  `yaml:"debug"`
+	Roles               map[string]string      `yaml:"roles"`
+	Embeddings          *bool                  `yaml:"embeddings"`
+	Backend             string                 `yaml:"backend"`
+	TemplateConfig      TemplateConfig         `yaml:"template"`
+	KnownUsecaseStrings []string               `yaml:"known_usecases"`
+	KnownUsecases       *BackendConfigUsecases `yaml:"-"`
 
 	PromptStrings, InputStrings                []string               `yaml:"-"`
 	InputToken                                 [][]int                `yaml:"-"`
@@ -68,6 +71,8 @@ type BackendConfig struct {
 
 	Description string `yaml:"description"`
 	Usage       string `yaml:"usage"`
+
+	Options []string `yaml:"options"`
 }
 
 type File struct {
@@ -76,10 +81,6 @@ type File struct {
 	URI      downloader.URI `yaml:"uri" json:"uri"`
 }
 
-type VallE struct {
-	AudioPath string `yaml:"audio_path"`
-}
-
 type FeatureFlag map[string]*bool
 
 func (ff FeatureFlag) Enabled(s string) bool {
@@ -93,16 +94,15 @@ type GRPC struct {
 }
 
 type Diffusers struct {
-	CUDA             bool    `yaml:"cuda"`
-	PipelineType     string  `yaml:"pipeline_type"`
-	SchedulerType    string  `yaml:"scheduler_type"`
-	EnableParameters string  `yaml:"enable_parameters"` // A list of comma separated parameters to specify
-	CFGScale         float32 `yaml:"cfg_scale"`         // Classifier-Free Guidance Scale
-	IMG2IMG          bool    `yaml:"img2img"`           // Image to Image Diffuser
-	ClipSkip         int     `yaml:"clip_skip"`         // Skip every N frames
-	ClipModel        string  `yaml:"clip_model"`        // Clip model to use
-	ClipSubFolder    string  `yaml:"clip_subfolder"`    // Subfolder to use for clip model
-	ControlNet       string  `yaml:"control_net"`
+	CUDA             bool   `yaml:"cuda"`
+	PipelineType     string `yaml:"pipeline_type"`
+	SchedulerType    string `yaml:"scheduler_type"`
+	EnableParameters string `yaml:"enable_parameters"` // A list of comma separated parameters to specify
+	IMG2IMG          bool   `yaml:"img2img"`           // Image to Image Diffuser
+	ClipSkip         int    `yaml:"clip_skip"`         // Skip every N frames
+	ClipModel        string `yaml:"clip_model"`        // Clip model to use
+	ClipSubFolder    string `yaml:"clip_subfolder"`    // Subfolder to use for clip model
+	ControlNet       string `yaml:"control_net"`
 }
 
 // LLMConfig is a struct that holds the configuration that are
@@ -130,25 +130,30 @@ type LLMConfig struct {
 	TrimSpace       []string `yaml:"trimspace"`
 	TrimSuffix      []string `yaml:"trimsuffix"`
 
-	ContextSize          *int    `yaml:"context_size"`
-	NUMA                 bool    `yaml:"numa"`
-	LoraAdapter          string  `yaml:"lora_adapter"`
-	LoraBase             string  `yaml:"lora_base"`
-	LoraScale            float32 `yaml:"lora_scale"`
-	NoMulMatQ            bool    `yaml:"no_mulmatq"`
-	DraftModel           string  `yaml:"draft_model"`
-	NDraft               int32   `yaml:"n_draft"`
-	Quantization         string  `yaml:"quantization"`
-	GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
-	TrustRemoteCode      bool    `yaml:"trust_remote_code"`      // vLLM
-	EnforceEager         bool    `yaml:"enforce_eager"`          // vLLM
-	SwapSpace            int     `yaml:"swap_space"`             // vLLM
-	MaxModelLen          int     `yaml:"max_model_len"`          // vLLM
-	TensorParallelSize   int     `yaml:"tensor_parallel_size"`   // vLLM
-	MMProj               string  `yaml:"mmproj"`
+	ContextSize          *int      `yaml:"context_size"`
+	NUMA                 bool      `yaml:"numa"`
+	LoraAdapter          string    `yaml:"lora_adapter"`
+	LoraBase             string    `yaml:"lora_base"`
+	LoraAdapters         []string  `yaml:"lora_adapters"`
+	LoraScales           []float32 `yaml:"lora_scales"`
+	LoraScale            float32   `yaml:"lora_scale"`
+	NoMulMatQ            bool      `yaml:"no_mulmatq"`
+	DraftModel           string    `yaml:"draft_model"`
+	NDraft               int32     `yaml:"n_draft"`
+	Quantization         string    `yaml:"quantization"`
+	LoadFormat           string    `yaml:"load_format"`
+	GPUMemoryUtilization float32   `yaml:"gpu_memory_utilization"` // vLLM
+	TrustRemoteCode      bool      `yaml:"trust_remote_code"`      // vLLM
+	EnforceEager         bool      `yaml:"enforce_eager"`          // vLLM
+	SwapSpace            int       `yaml:"swap_space"`             // vLLM
+	MaxModelLen          int       `yaml:"max_model_len"`          // vLLM
+	TensorParallelSize   int       `yaml:"tensor_parallel_size"`   // vLLM
+	MMProj               string    `yaml:"mmproj"`
 
-	FlashAttention bool `yaml:"flash_attention"`
-	NoKVOffloading bool `yaml:"no_kv_offloading"`
+	FlashAttention bool   `yaml:"flash_attention"`
+	NoKVOffloading bool   `yaml:"no_kv_offloading"`
+	CacheTypeK     string `yaml:"cache_type_k"`
+	CacheTypeV     string `yaml:"cache_type_v"`
 
 	RopeScaling string `yaml:"rope_scaling"`
 	ModelType   string `yaml:"type"`
@@ -157,6 +162,8 @@ type LLMConfig struct {
 	YarnAttnFactor float32 `yaml:"yarn_attn_factor"`
 	YarnBetaFast   float32 `yaml:"yarn_beta_fast"`
 	YarnBetaSlow   float32 `yaml:"yarn_beta_slow"`
+
+	CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
 }
 
 // AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend
@@ -192,6 +199,21 @@ type TemplateConfig struct {
 	// JoinChatMessagesByCharacter is a string that will be used to join chat messages together.
 	// It defaults to \n
 	JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`
+
+	Multimodal string `yaml:"multimodal"`
+
+	JinjaTemplate bool `yaml:"jinja_template"`
+}
+
+func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {
+	type BCAlias BackendConfig
+	var aux BCAlias
+	if err := value.Decode(&aux); err != nil {
+		return err
+	}
+	*c = BackendConfig(aux)
+	c.KnownUsecases = GetUsecasesFromYAML(c.KnownUsecaseStrings)
+	return nil
 }
 
 func (c *BackendConfig) SetFunctionCallString(s string) {
@@ -411,3 +433,121 @@ func (c *BackendConfig) Validate() bool {
 func (c *BackendConfig) HasTemplate() bool {
 	return c.TemplateConfig.Completion != "" || c.TemplateConfig.Edit != "" || c.TemplateConfig.Chat != "" || c.TemplateConfig.ChatMessage != ""
 }
+
+type BackendConfigUsecases int
+
+const (
+	FLAG_ANY              BackendConfigUsecases = 0b000000000
+	FLAG_CHAT             BackendConfigUsecases = 0b000000001
+	FLAG_COMPLETION       BackendConfigUsecases = 0b000000010
+	FLAG_EDIT             BackendConfigUsecases = 0b000000100
+	FLAG_EMBEDDINGS       BackendConfigUsecases = 0b000001000
+	FLAG_RERANK           BackendConfigUsecases = 0b000010000
+	FLAG_IMAGE            BackendConfigUsecases = 0b000100000
+	FLAG_TRANSCRIPT       BackendConfigUsecases = 0b001000000
+	FLAG_TTS              BackendConfigUsecases = 0b010000000
+	FLAG_SOUND_GENERATION BackendConfigUsecases = 0b100000000
+
+	// Common Subsets
+	FLAG_LLM BackendConfigUsecases = FLAG_CHAT & FLAG_COMPLETION & FLAG_EDIT
+)
+
+func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
+	return map[string]BackendConfigUsecases{
+		"FLAG_ANY":              FLAG_ANY,
+		"FLAG_CHAT":             FLAG_CHAT,
+		"FLAG_COMPLETION":       FLAG_COMPLETION,
+		"FLAG_EDIT":             FLAG_EDIT,
+		"FLAG_EMBEDDINGS":       FLAG_EMBEDDINGS,
+		"FLAG_RERANK":           FLAG_RERANK,
+		"FLAG_IMAGE":            FLAG_IMAGE,
+		"FLAG_TRANSCRIPT":       FLAG_TRANSCRIPT,
+		"FLAG_TTS":              FLAG_TTS,
+		"FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION,
+		"FLAG_LLM":              FLAG_LLM,
+	}
+}
+
+func GetUsecasesFromYAML(input []string) *BackendConfigUsecases {
+	if len(input) == 0 {
+		return nil
+	}
+	result := FLAG_ANY
+	flags := GetAllBackendConfigUsecases()
+	for _, str := range input {
+		flag, exists := flags["FLAG_"+strings.ToUpper(str)]
+		if exists {
+			result |= flag
+		}
+	}
+	return &result
+}
+
+// HasUsecases examines a BackendConfig and determines which endpoints have a chance of success.
+func (c *BackendConfig) HasUsecases(u BackendConfigUsecases) bool {
+	if (c.KnownUsecases != nil) && ((u & *c.KnownUsecases) == u) {
+		return true
+	}
+	return c.GuessUsecases(u)
+}
+
+// GuessUsecases is a **heuristic based** function, as the backend in question may not be loaded yet, and the config may not record what it's useful at.
+// In its current state, this function should ideally check for properties of the config like templates, rather than the direct backend name checks for the lower half.
+// This avoids the maintenance burden of updating this list for each new backend - but unfortunately, that's the best option for some services currently.
+func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
+	if (u & FLAG_CHAT) == FLAG_CHAT {
+		if c.TemplateConfig.Chat == "" && c.TemplateConfig.ChatMessage == "" {
+			return false
+		}
+	}
+	if (u & FLAG_COMPLETION) == FLAG_COMPLETION {
+		if c.TemplateConfig.Completion == "" {
+			return false
+		}
+	}
+	if (u & FLAG_EDIT) == FLAG_EDIT {
+		if c.TemplateConfig.Edit == "" {
+			return false
+		}
+	}
+	if (u & FLAG_EMBEDDINGS) == FLAG_EMBEDDINGS {
+		if c.Embeddings == nil || !*c.Embeddings {
+			return false
+		}
+	}
+	if (u & FLAG_IMAGE) == FLAG_IMAGE {
+		imageBackends := []string{"diffusers", "stablediffusion", "stablediffusion-ggml"}
+		if !slices.Contains(imageBackends, c.Backend) {
+			return false
+		}
+
+		if c.Backend == "diffusers" && c.Diffusers.PipelineType == "" {
+			return false
+		}
+
+	}
+	if (u & FLAG_RERANK) == FLAG_RERANK {
+		if c.Backend != "rerankers" {
+			return false
+		}
+	}
+	if (u & FLAG_TRANSCRIPT) == FLAG_TRANSCRIPT {
+		if c.Backend != "whisper" {
+			return false
+		}
+	}
+	if (u & FLAG_TTS) == FLAG_TTS {
+		ttsBackends := []string{"piper", "transformers-musicgen", "parler-tts"}
+		if !slices.Contains(ttsBackends, c.Backend) {
+			return false
+		}
+	}
+
+	if (u & FLAG_SOUND_GENERATION) == FLAG_SOUND_GENERATION {
+		if c.Backend != "transformers-musicgen" {
+			return false
+		}
+	}
+
+	return true
+}
diff --git a/core/config/backend_config_filter.go b/core/config/backend_config_filter.go
new file mode 100644
index 00000000..f1eb2488
--- /dev/null
+++ b/core/config/backend_config_filter.go
@@ -0,0 +1,35 @@
+package config
+
+import "regexp"
+
+type BackendConfigFilterFn func(string, *BackendConfig) bool
+
+func NoFilterFn(_ string, _ *BackendConfig) bool { return true }
+
+func BuildNameFilterFn(filter string) (BackendConfigFilterFn, error) {
+	if filter == "" {
+		return NoFilterFn, nil
+	}
+	rxp, err := regexp.Compile(filter)
+	if err != nil {
+		return nil, err
+	}
+	return func(name string, config *BackendConfig) bool {
+		if config != nil {
+			return rxp.MatchString(config.Name)
+		}
+		return rxp.MatchString(name)
+	}, nil
+}
+
+func BuildUsecaseFilterFn(usecases BackendConfigUsecases) BackendConfigFilterFn {
+	if usecases == FLAG_ANY {
+		return NoFilterFn
+	}
+	return func(name string, config *BackendConfig) bool {
+		if config == nil {
+			return false // TODO: Potentially make this a param, for now, no known usecase to include
+		}
+		return config.HasUsecases(usecases)
+	}
+}
diff --git a/core/config/backend_config_loader.go b/core/config/backend_config_loader.go
index 45fe259e..7fe77d42 100644
--- a/core/config/backend_config_loader.go
+++ b/core/config/backend_config_loader.go
@@ -140,7 +140,7 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
 		}
 	}
 
-	cfg.SetDefaults(opts...)
+	cfg.SetDefaults(append(opts, ModelPath(modelPath))...)
 
 	return cfg, nil
 }
@@ -201,6 +201,26 @@ func (bcl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig {
 	return res
 }
 
+func (bcl *BackendConfigLoader) GetBackendConfigsByFilter(filter BackendConfigFilterFn) []BackendConfig {
+	bcl.Lock()
+	defer bcl.Unlock()
+	var res []BackendConfig
+
+	if filter == nil {
+		filter = NoFilterFn
+	}
+
+	for n, v := range bcl.configs {
+		if filter(n, &v) {
+			res = append(res, v)
+		}
+	}
+
+	// TODO: I don't think this one needs to Sort on name... but we'll see what breaks.
+
+	return res
+}
+
 func (bcl *BackendConfigLoader) RemoveBackendConfig(m string) {
 	bcl.Lock()
 	defer bcl.Unlock()
diff --git a/core/config/backend_config_test.go b/core/config/backend_config_test.go
index da245933..e6a54b89 100644
--- a/core/config/backend_config_test.go
+++ b/core/config/backend_config_test.go
@@ -19,12 +19,17 @@ var _ = Describe("Test cases for config related functions", func() {
 				`backend: "../foo-bar"
 name: "foo"
 parameters:
-  model: "foo-bar"`)
+  model: "foo-bar"
+known_usecases:
+- chat
+- COMPLETION
+`)
 			Expect(err).ToNot(HaveOccurred())
 			config, err := readBackendConfigFromFile(tmp.Name())
 			Expect(err).To(BeNil())
 			Expect(config).ToNot(BeNil())
 			Expect(config.Validate()).To(BeFalse())
+			Expect(config.KnownUsecases).ToNot(BeNil())
 		})
 		It("Test Validate", func() {
 			tmp, err := os.CreateTemp("", "config.yaml")
@@ -43,9 +48,9 @@ parameters:
 			Expect(config.Name).To(Equal("bar-baz"))
 			Expect(config.Validate()).To(BeTrue())
 
-			// download https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml
+			// download https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml
 			httpClient := http.Client{}
-			resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml")
+			resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml")
 			Expect(err).To(BeNil())
 			defer resp.Body.Close()
 			tmp, err = os.CreateTemp("", "config.yaml")
@@ -61,4 +66,99 @@ parameters:
 			Expect(config.Validate()).To(BeTrue())
 		})
 	})
+	It("Properly handles backend usecase matching", func() {
+
+		a := BackendConfig{
+			Name: "a",
+		}
+		Expect(a.HasUsecases(FLAG_ANY)).To(BeTrue()) // FLAG_ANY just means the config _exists_ essentially.
+
+		b := BackendConfig{
+			Name:    "b",
+			Backend: "stablediffusion",
+		}
+		Expect(b.HasUsecases(FLAG_ANY)).To(BeTrue())
+		Expect(b.HasUsecases(FLAG_IMAGE)).To(BeTrue())
+		Expect(b.HasUsecases(FLAG_CHAT)).To(BeFalse())
+
+		c := BackendConfig{
+			Name:    "c",
+			Backend: "llama-cpp",
+			TemplateConfig: TemplateConfig{
+				Chat: "chat",
+			},
+		}
+		Expect(c.HasUsecases(FLAG_ANY)).To(BeTrue())
+		Expect(c.HasUsecases(FLAG_IMAGE)).To(BeFalse())
+		Expect(c.HasUsecases(FLAG_COMPLETION)).To(BeFalse())
+		Expect(c.HasUsecases(FLAG_CHAT)).To(BeTrue())
+
+		d := BackendConfig{
+			Name:    "d",
+			Backend: "llama-cpp",
+			TemplateConfig: TemplateConfig{
+				Chat:       "chat",
+				Completion: "completion",
+			},
+		}
+		Expect(d.HasUsecases(FLAG_ANY)).To(BeTrue())
+		Expect(d.HasUsecases(FLAG_IMAGE)).To(BeFalse())
+		Expect(d.HasUsecases(FLAG_COMPLETION)).To(BeTrue())
+		Expect(d.HasUsecases(FLAG_CHAT)).To(BeTrue())
+
+		trueValue := true
+		e := BackendConfig{
+			Name:    "e",
+			Backend: "llama-cpp",
+			TemplateConfig: TemplateConfig{
+				Completion: "completion",
+			},
+			Embeddings: &trueValue,
+		}
+
+		Expect(e.HasUsecases(FLAG_ANY)).To(BeTrue())
+		Expect(e.HasUsecases(FLAG_IMAGE)).To(BeFalse())
+		Expect(e.HasUsecases(FLAG_COMPLETION)).To(BeTrue())
+		Expect(e.HasUsecases(FLAG_CHAT)).To(BeFalse())
+		Expect(e.HasUsecases(FLAG_EMBEDDINGS)).To(BeTrue())
+
+		f := BackendConfig{
+			Name:    "f",
+			Backend: "piper",
+		}
+		Expect(f.HasUsecases(FLAG_ANY)).To(BeTrue())
+		Expect(f.HasUsecases(FLAG_TTS)).To(BeTrue())
+		Expect(f.HasUsecases(FLAG_CHAT)).To(BeFalse())
+
+		g := BackendConfig{
+			Name:    "g",
+			Backend: "whisper",
+		}
+		Expect(g.HasUsecases(FLAG_ANY)).To(BeTrue())
+		Expect(g.HasUsecases(FLAG_TRANSCRIPT)).To(BeTrue())
+		Expect(g.HasUsecases(FLAG_TTS)).To(BeFalse())
+
+		h := BackendConfig{
+			Name:    "h",
+			Backend: "transformers-musicgen",
+		}
+		Expect(h.HasUsecases(FLAG_ANY)).To(BeTrue())
+		Expect(h.HasUsecases(FLAG_TRANSCRIPT)).To(BeFalse())
+		Expect(h.HasUsecases(FLAG_TTS)).To(BeTrue())
+		Expect(h.HasUsecases(FLAG_SOUND_GENERATION)).To(BeTrue())
+
+		knownUsecases := FLAG_CHAT | FLAG_COMPLETION
+		i := BackendConfig{
+			Name:    "i",
+			Backend: "whisper",
+			// Earlier test checks parsing, this just needs to set final values
+			KnownUsecases: &knownUsecases,
+		}
+		Expect(i.HasUsecases(FLAG_ANY)).To(BeTrue())
+		Expect(i.HasUsecases(FLAG_TRANSCRIPT)).To(BeTrue())
+		Expect(i.HasUsecases(FLAG_TTS)).To(BeFalse())
+		Expect(i.HasUsecases(FLAG_COMPLETION)).To(BeTrue())
+		Expect(i.HasUsecases(FLAG_CHAT)).To(BeTrue())
+
+	})
 })
diff --git a/core/config/config_test.go b/core/config/config_test.go
index 5122c907..85f18eae 100644
--- a/core/config/config_test.go
+++ b/core/config/config_test.go
@@ -48,5 +48,66 @@ var _ = Describe("Test cases for config related functions", func() {
 			// config should includes whisper-1 models's api.config
 			Expect(loadedModelNames).To(ContainElements("whisper-1"))
 		})
+
+		It("Test new loadconfig", func() {
+
+			bcl := NewBackendConfigLoader(os.Getenv("MODELS_PATH"))
+			err := bcl.LoadBackendConfigsFromPath(os.Getenv("MODELS_PATH"))
+			Expect(err).To(BeNil())
+			configs := bcl.GetAllBackendConfigs()
+			loadedModelNames := []string{}
+			for _, v := range configs {
+				loadedModelNames = append(loadedModelNames, v.Name)
+			}
+			Expect(configs).ToNot(BeNil())
+			totalModels := len(loadedModelNames)
+
+			Expect(loadedModelNames).To(ContainElements("code-search-ada-code-001"))
+
+			// config should includes text-embedding-ada-002 models's api.config
+			Expect(loadedModelNames).To(ContainElements("text-embedding-ada-002"))
+
+			// config should includes rwkv_test models's api.config
+			Expect(loadedModelNames).To(ContainElements("rwkv_test"))
+
+			// config should includes whisper-1 models's api.config
+			Expect(loadedModelNames).To(ContainElements("whisper-1"))
+
+			// create a temp directory and store a temporary model
+			tmpdir, err := os.MkdirTemp("", "test")
+			Expect(err).ToNot(HaveOccurred())
+			defer os.RemoveAll(tmpdir)
+
+			// create a temporary model
+			model := `name: "test-model"
+description: "test model"
+options:
+- foo
+- bar
+- baz
+`
+			modelFile := tmpdir + "/test-model.yaml"
+			err = os.WriteFile(modelFile, []byte(model), 0644)
+			Expect(err).ToNot(HaveOccurred())
+
+			err = bcl.LoadBackendConfigsFromPath(tmpdir)
+			Expect(err).ToNot(HaveOccurred())
+
+			configs = bcl.GetAllBackendConfigs()
+			Expect(len(configs)).ToNot(Equal(totalModels))
+
+			loadedModelNames = []string{}
+			var testModel BackendConfig
+			for _, v := range configs {
+				loadedModelNames = append(loadedModelNames, v.Name)
+				if v.Name == "test-model" {
+					testModel = v
+				}
+			}
+			Expect(loadedModelNames).To(ContainElements("test-model"))
+			Expect(testModel.Description).To(Equal("test model"))
+			Expect(testModel.Options).To(ContainElements("foo", "bar", "baz"))
+
+		})
 	})
 })
diff --git a/core/config/guesser.go b/core/config/guesser.go
index b63dd051..f5627461 100644
--- a/core/config/guesser.go
+++ b/core/config/guesser.go
@@ -26,14 +26,14 @@ const (
 type settingsConfig struct {
 	StopWords      []string
 	TemplateConfig TemplateConfig
-	RepeatPenalty float64
+	RepeatPenalty  float64
 }
 
 // default settings to adopt with a given model family
 var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
 	Gemma: {
 		RepeatPenalty: 1.0,
-		StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
+		StopWords:     []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
 		TemplateConfig: TemplateConfig{
 			Chat:        "{{.Input }}\n<start_of_turn>model\n",
 			ChatMessage: "<start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<end_of_turn>",
@@ -200,6 +200,18 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
 	} else {
 		log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family")
 	}
+
+	if cfg.HasTemplate() {
+		return
+	}
+
+	// identify from well known templates first, otherwise use the raw jinja template
+	chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
+	if found {
+		// try to use the jinja template
+		cfg.TemplateConfig.JinjaTemplate = true
+		cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString()
+	}
 }
 
 func identifyFamily(f *gguf.GGUFFile) familyType {
diff --git a/core/gallery/gallery.go b/core/gallery/gallery.go
index 6ced6244..3a60e618 100644
--- a/core/gallery/gallery.go
+++ b/core/gallery/gallery.go
@@ -132,7 +132,7 @@ func AvailableGalleryModels(galleries []config.Gallery, basePath string) ([]*Gal
 func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) {
 	var refFile string
 	uri := downloader.URI(url)
-	err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
+	err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error {
 		refFile = string(d)
 		if len(refFile) == 0 {
 			return fmt.Errorf("invalid reference file at url %s: %s", url, d)
@@ -156,7 +156,7 @@ func getGalleryModels(gallery config.Gallery, basePath string) ([]*GalleryModel,
 	}
 	uri := downloader.URI(gallery.URL)
 
-	err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
+	err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &models)
 	})
 	if err != nil {
diff --git a/core/gallery/models.go b/core/gallery/models.go
index dec6312e..58f1963a 100644
--- a/core/gallery/models.go
+++ b/core/gallery/models.go
@@ -69,7 +69,7 @@ type PromptTemplate struct {
 func GetGalleryConfigFromURL(url string, basePath string) (Config, error) {
 	var config Config
 	uri := downloader.URI(url)
-	err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
+	err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &config)
 	})
 	if err != nil {
diff --git a/core/gallery/models_test.go b/core/gallery/models_test.go
index 5217253f..ef4faed8 100644
--- a/core/gallery/models_test.go
+++ b/core/gallery/models_test.go
@@ -12,6 +12,8 @@ import (
 	"gopkg.in/yaml.v3"
 )
 
+const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
+
 var _ = Describe("Model test", func() {
 
 	Context("Downloading", func() {
@@ -46,8 +48,10 @@ var _ = Describe("Model test", func() {
 			defer os.RemoveAll(tempdir)
 
 			gallery := []GalleryModel{{
-				Name: "bert",
-				URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
+				Metadata: Metadata{
+					Name: "bert",
+					URL:  bertEmbeddingsURL,
+				},
 			}}
 			out, err := yaml.Marshal(gallery)
 			Expect(err).ToNot(HaveOccurred())
@@ -66,7 +70,7 @@ var _ = Describe("Model test", func() {
 			Expect(err).ToNot(HaveOccurred())
 			Expect(len(models)).To(Equal(1))
 			Expect(models[0].Name).To(Equal("bert"))
-			Expect(models[0].URL).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"))
+			Expect(models[0].URL).To(Equal(bertEmbeddingsURL))
 			Expect(models[0].Installed).To(BeFalse())
 
 			err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true)
@@ -78,7 +82,7 @@ var _ = Describe("Model test", func() {
 			content := map[string]interface{}{}
 			err = yaml.Unmarshal(dat, &content)
 			Expect(err).ToNot(HaveOccurred())
-			Expect(content["backend"]).To(Equal("bert-embeddings"))
+			Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
 
 			models, err = AvailableGalleryModels(galleries, tempdir)
 			Expect(err).ToNot(HaveOccurred())
diff --git a/core/gallery/request.go b/core/gallery/request.go
index eec764c1..72d078a1 100644
--- a/core/gallery/request.go
+++ b/core/gallery/request.go
@@ -11,6 +11,14 @@ import (
 // It is used to install the model by resolving the URL and downloading the files.
 // The other fields are used to override the configuration of the model.
 type GalleryModel struct {
+	Metadata `json:",inline" yaml:",inline"`
+	// config_file is read in the situation where URL is blank - and therefore this is a base config.
+	ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
+	// Overrides are used to override the configuration of the model located at URL
+	Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
+}
+
+type Metadata struct {
 	URL         string   `json:"url,omitempty" yaml:"url,omitempty"`
 	Name        string   `json:"name,omitempty" yaml:"name,omitempty"`
 	Description string   `json:"description,omitempty"  yaml:"description,omitempty"`
@@ -18,10 +26,6 @@ type GalleryModel struct {
 	URLs        []string `json:"urls,omitempty" yaml:"urls,omitempty"`
 	Icon        string   `json:"icon,omitempty" yaml:"icon,omitempty"`
 	Tags        []string `json:"tags,omitempty" yaml:"tags,omitempty"`
-	// config_file is read in the situation where URL is blank - and therefore this is a base config.
-	ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
-	// Overrides are used to override the configuration of the model located at URL
-	Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
 	// AdditionalFiles are used to add additional files to the model
 	AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"`
 	// Gallery is a reference to the gallery which contains the model
diff --git a/core/gallery/request_test.go b/core/gallery/request_test.go
index 23281cc6..ed07f474 100644
--- a/core/gallery/request_test.go
+++ b/core/gallery/request_test.go
@@ -9,7 +9,11 @@ import (
 var _ = Describe("Gallery API tests", func() {
 	Context("requests", func() {
 		It("parses github with a branch", func() {
-			req := GalleryModel{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main"}
+			req := GalleryModel{
+				Metadata: Metadata{
+					URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main",
+				},
+			}
 			e, err := GetGalleryConfigFromURL(req.URL, "")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(e.Name).To(Equal("gpt4all-j"))
diff --git a/core/http/app.go b/core/http/app.go
index fa9cd866..d1e80f8d 100644
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -14,10 +14,9 @@ import (
 	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/core/http/routes"
 
-	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/application"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/services"
-	"github.com/mudler/LocalAI/pkg/model"
 
 	"github.com/gofiber/contrib/fiberzerolog"
 	"github.com/gofiber/fiber/v2"
@@ -31,24 +30,6 @@ import (
 	"github.com/rs/zerolog/log"
 )
 
-func readAuthHeader(c *fiber.Ctx) string {
-	authHeader := c.Get("Authorization")
-
-	// elevenlabs
-	xApiKey := c.Get("xi-api-key")
-	if xApiKey != "" {
-		authHeader = "Bearer " + xApiKey
-	}
-
-	// anthropic
-	xApiKey = c.Get("x-api-key")
-	if xApiKey != "" {
-		authHeader = "Bearer " + xApiKey
-	}
-
-	return authHeader
-}
-
 // Embed a directory
 //
 //go:embed static/*
@@ -67,18 +48,18 @@ var embedDirStatic embed.FS
 // @in header
 // @name Authorization
 
-func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {
+func API(application *application.Application) (*fiber.App, error) {
 
 	fiberCfg := fiber.Config{
 		Views:     renderEngine(),
-		BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
+		BodyLimit: application.ApplicationConfig().UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
 		// We disable the Fiber startup message as it does not conform to structured logging.
 		// We register a startup log line with connection information in the OnListen hook to keep things user friendly though
 		DisableStartupMessage: true,
 		// Override default error handler
 	}
 
-	if !appConfig.OpaqueErrors {
+	if !application.ApplicationConfig().OpaqueErrors {
 		// Normally, return errors as JSON responses
 		fiberCfg.ErrorHandler = func(ctx *fiber.Ctx, err error) error {
 			// Status code defaults to 500
@@ -104,9 +85,19 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
 		}
 	}
 
-	app := fiber.New(fiberCfg)
+	router := fiber.New(fiberCfg)
 
-	app.Hooks().OnListen(func(listenData fiber.ListenData) error {
+	router.Use(middleware.StripPathPrefix())
+
+	if application.ApplicationConfig().MachineTag != "" {
+		router.Use(func(c *fiber.Ctx) error {
+			c.Response().Header.Set("Machine-Tag", application.ApplicationConfig().MachineTag)
+
+			return c.Next()
+		})
+	}
+
+	router.Hooks().OnListen(func(listenData fiber.ListenData) error {
 		scheme := "http"
 		if listenData.TLS {
 			scheme = "https"
@@ -117,77 +108,82 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
 
 	// Have Fiber use zerolog like the rest of the application rather than it's built-in logger
 	logger := log.Logger
-	app.Use(fiberzerolog.New(fiberzerolog.Config{
+	router.Use(fiberzerolog.New(fiberzerolog.Config{
 		Logger: &logger,
 	}))
 
 	// Default middleware config
 
-	if !appConfig.Debug {
-		app.Use(recover.New())
+	if !application.ApplicationConfig().Debug {
+		router.Use(recover.New())
 	}
 
-	metricsService, err := services.NewLocalAIMetricsService()
-	if err != nil {
-		return nil, err
-	}
+	if !application.ApplicationConfig().DisableMetrics {
+		metricsService, err := services.NewLocalAIMetricsService()
+		if err != nil {
+			return nil, err
+		}
 
-	if metricsService != nil {
-		app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
-		app.Hooks().OnShutdown(func() error {
-			return metricsService.Shutdown()
-		})
-	}
+		if metricsService != nil {
+			router.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
+			router.Hooks().OnShutdown(func() error {
+				return metricsService.Shutdown()
+			})
+		}
 
-	kaConfig, err := middleware.GetKeyAuthConfig(appConfig)
+	}
+	// Health Checks should always be exempt from auth, so register these first
+	routes.HealthRoutes(router)
+
+	kaConfig, err := middleware.GetKeyAuthConfig(application.ApplicationConfig())
 	if err != nil || kaConfig == nil {
 		return nil, fmt.Errorf("failed to create key auth config: %w", err)
 	}
 
 	// Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration
-	app.Use(v2keyauth.New(*kaConfig))
+	router.Use(v2keyauth.New(*kaConfig))
 
-	if appConfig.CORS {
+	if application.ApplicationConfig().CORS {
 		var c func(ctx *fiber.Ctx) error
-		if appConfig.CORSAllowOrigins == "" {
+		if application.ApplicationConfig().CORSAllowOrigins == "" {
 			c = cors.New()
 		} else {
-			c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins})
+			c = cors.New(cors.Config{AllowOrigins: application.ApplicationConfig().CORSAllowOrigins})
 		}
 
-		app.Use(c)
+		router.Use(c)
 	}
 
-	if appConfig.CSRF {
+	if application.ApplicationConfig().CSRF {
 		log.Debug().Msg("Enabling CSRF middleware. Tokens are now required for state-modifying requests")
-		app.Use(csrf.New())
+		router.Use(csrf.New())
 	}
 
 	// Load config jsons
-	utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
-	utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
-	utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
+	utils.LoadConfig(application.ApplicationConfig().UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
+	utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
+	utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
 
-	galleryService := services.NewGalleryService(appConfig)
-	galleryService.Start(appConfig.Context, cl)
+	galleryService := services.NewGalleryService(application.ApplicationConfig())
+	galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader())
 
-	routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig)
-	routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService)
-	routes.RegisterOpenAIRoutes(app, cl, ml, appConfig)
-	if !appConfig.DisableWebUI {
-		routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService)
+	routes.RegisterElevenLabsRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
+	routes.RegisterLocalAIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
+	routes.RegisterOpenAIRoutes(router, application)
+	if !application.ApplicationConfig().DisableWebUI {
+		routes.RegisterUIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
 	}
-	routes.RegisterJINARoutes(app, cl, ml, appConfig)
+	routes.RegisterJINARoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
 
 	httpFS := http.FS(embedDirStatic)
 
-	app.Use(favicon.New(favicon.Config{
+	router.Use(favicon.New(favicon.Config{
 		URL:        "/favicon.ico",
 		FileSystem: httpFS,
 		File:       "static/favicon.ico",
 	}))
 
-	app.Use("/static", filesystem.New(filesystem.Config{
+	router.Use("/static", filesystem.New(filesystem.Config{
 		Root:       httpFS,
 		PathPrefix: "static",
 		Browse:     true,
@@ -195,7 +191,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
 
 	// Define a custom 404 handler
 	// Note: keep this at the bottom!
-	app.Use(notFoundHandler)
+	router.Use(notFoundHandler)
 
-	return app, nil
+	return router, nil
 }
diff --git a/core/http/app_test.go b/core/http/app_test.go
index 86fe7fdd..ca7a2eaa 100644
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -5,7 +5,6 @@ import (
 	"context"
 	"embed"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"io"
 	"net/http"
@@ -13,15 +12,14 @@ import (
 	"path/filepath"
 	"runtime"
 
+	"github.com/mudler/LocalAI/core/application"
 	"github.com/mudler/LocalAI/core/config"
 	. "github.com/mudler/LocalAI/core/http"
 	"github.com/mudler/LocalAI/core/schema"
-	"github.com/mudler/LocalAI/core/startup"
 
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/pkg/downloader"
-	"github.com/mudler/LocalAI/pkg/model"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 	"gopkg.in/yaml.v3"
@@ -31,6 +29,9 @@ import (
 	"github.com/sashabaranov/go-openai/jsonschema"
 )
 
+const apiKey = "joshua"
+const bearerKey = "Bearer " + apiKey
+
 const testPrompt = `### System:
 You are an AI assistant that follows instruction extremely well. Help as much as you can.
 
@@ -50,11 +51,19 @@ type modelApplyRequest struct {
 
 func getModelStatus(url string) (response map[string]interface{}) {
 	// Create the HTTP request
-	resp, err := http.Get(url)
+	req, err := http.NewRequest("GET", url, nil)
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", bearerKey)
 	if err != nil {
 		fmt.Println("Error creating request:", err)
 		return
 	}
+	client := &http.Client{}
+	resp, err := client.Do(req)
+	if err != nil {
+		fmt.Println("Error sending request:", err)
+		return
+	}
 	defer resp.Body.Close()
 
 	body, err := io.ReadAll(resp.Body)
@@ -72,14 +81,15 @@ func getModelStatus(url string) (response map[string]interface{}) {
 	return
 }
 
-func getModels(url string) (response []gallery.GalleryModel) {
+func getModels(url string) ([]gallery.GalleryModel, error) {
+	response := []gallery.GalleryModel{}
 	uri := downloader.URI(url)
 	// TODO: No tests currently seem to exercise file:// urls. Fix?
-	uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
+	err := uri.DownloadWithAuthorizationAndCallback("", bearerKey, func(url string, i []byte) error {
 		// Unmarshal YAML data into a struct
 		return json.Unmarshal(i, &response)
 	})
-	return
+	return response, err
 }
 
 func postModelApplyRequest(url string, request modelApplyRequest) (response map[string]interface{}) {
@@ -101,6 +111,7 @@ func postModelApplyRequest(url string, request modelApplyRequest) (response map[
 		return
 	}
 	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", bearerKey)
 
 	// Make the request
 	client := &http.Client{}
@@ -140,6 +151,7 @@ func postRequestJSON[B any](url string, bodyJson *B) error {
 	}
 
 	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", bearerKey)
 
 	client := &http.Client{}
 	resp, err := client.Do(req)
@@ -175,6 +187,7 @@ func postRequestResponseJSON[B1 any, B2 any](url string, reqJson *B1, respJson *
 	}
 
 	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", bearerKey)
 
 	client := &http.Client{}
 	resp, err := client.Do(req)
@@ -195,6 +208,62 @@ func postRequestResponseJSON[B1 any, B2 any](url string, reqJson *B1, respJson *
 	return json.Unmarshal(body, respJson)
 }
 
+func postInvalidRequest(url string) (error, int) {
+
+	req, err := http.NewRequest("POST", url, bytes.NewBufferString("invalid request"))
+	if err != nil {
+		return err, -1
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+
+	client := &http.Client{}
+	resp, err := client.Do(req)
+	if err != nil {
+		return err, -1
+	}
+
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return err, -1
+	}
+
+	if resp.StatusCode < 200 || resp.StatusCode >= 400 {
+		return fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)), resp.StatusCode
+	}
+
+	return nil, resp.StatusCode
+}
+
+func getRequest(url string, header http.Header) (error, int, []byte) {
+
+	req, err := http.NewRequest("GET", url, nil)
+	if err != nil {
+		return err, -1, nil
+	}
+
+	req.Header = header
+
+	client := &http.Client{}
+	resp, err := client.Do(req)
+	if err != nil {
+		return err, -1, nil
+	}
+
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return err, -1, nil
+	}
+
+	return nil, resp.StatusCode, body
+}
+
+const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
+
 //go:embed backend-assets/*
 var backendAssets embed.FS
 
@@ -207,9 +276,6 @@ var _ = Describe("API test", func() {
 	var cancel context.CancelFunc
 	var tmpdir string
 	var modelDir string
-	var bcl *config.BackendConfigLoader
-	var ml *model.ModelLoader
-	var applicationConfig *config.ApplicationConfig
 
 	commonOpts := []config.AppOption{
 		config.WithDebug(true),
@@ -233,14 +299,18 @@ var _ = Describe("API test", func() {
 
 			g := []gallery.GalleryModel{
 				{
-					Name: "bert",
-					URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
+					Metadata: gallery.Metadata{
+						Name: "bert",
+						URL:  bertEmbeddingsURL,
+					},
 				},
 				{
-					Name:            "bert2",
-					URL:             "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
-					Overrides:       map[string]interface{}{"foo": "bar"},
-					AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"}},
+					Metadata: gallery.Metadata{
+						Name:            "bert2",
+						URL:             bertEmbeddingsURL,
+						AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
+					},
+					Overrides: map[string]interface{}{"foo": "bar"},
 				},
 			}
 			out, err := yaml.Marshal(g)
@@ -255,21 +325,22 @@ var _ = Describe("API test", func() {
 				},
 			}
 
-			bcl, ml, applicationConfig, err = startup.Startup(
+			application, err := application.New(
 				append(commonOpts,
 					config.WithContext(c),
 					config.WithGalleries(galleries),
 					config.WithModelPath(modelDir),
+					config.WithApiKeys([]string{apiKey}),
 					config.WithBackendAssets(backendAssets),
 					config.WithBackendAssetsOutput(backendAssetsDir))...)
 			Expect(err).ToNot(HaveOccurred())
 
-			app, err = App(bcl, ml, applicationConfig)
+			app, err = API(application)
 			Expect(err).ToNot(HaveOccurred())
 
 			go app.Listen("127.0.0.1:9090")
 
-			defaultConfig := openai.DefaultConfig("")
+			defaultConfig := openai.DefaultConfig(apiKey)
 			defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
 
 			client2 = openaigo.NewClient("")
@@ -295,10 +366,46 @@ var _ = Describe("API test", func() {
 			Expect(err).To(HaveOccurred())
 		})
 
+		Context("Auth Tests", func() {
+			It("Should fail if the api key is missing", func() {
+				err, sc := postInvalidRequest("http://127.0.0.1:9090/models/available")
+				Expect(err).ToNot(BeNil())
+				Expect(sc).To(Equal(401))
+			})
+		})
+
+		Context("URL routing Tests", func() {
+			It("Should support reverse-proxy when unauthenticated", func() {
+
+				err, sc, body := getRequest("http://127.0.0.1:9090/myprefix/", http.Header{
+					"X-Forwarded-Proto":  {"https"},
+					"X-Forwarded-Host":   {"example.org"},
+					"X-Forwarded-Prefix": {"/myprefix/"},
+				})
+				Expect(err).To(BeNil(), "error")
+				Expect(sc).To(Equal(401), "status code")
+				Expect(string(body)).To(ContainSubstring(`<base href="https://example.org/myprefix/" />`), "body")
+			})
+
+			It("Should support reverse-proxy when authenticated", func() {
+
+				err, sc, body := getRequest("http://127.0.0.1:9090/myprefix/", http.Header{
+					"Authorization":      {bearerKey},
+					"X-Forwarded-Proto":  {"https"},
+					"X-Forwarded-Host":   {"example.org"},
+					"X-Forwarded-Prefix": {"/myprefix/"},
+				})
+				Expect(err).To(BeNil(), "error")
+				Expect(sc).To(Equal(200), "status code")
+				Expect(string(body)).To(ContainSubstring(`<base href="https://example.org/myprefix/" />`), "body")
+			})
+		})
+
 		Context("Applying models", func() {
 
 			It("applies models from a gallery", func() {
-				models := getModels("http://127.0.0.1:9090/models/available")
+				models, err := getModels("http://127.0.0.1:9090/models/available")
+				Expect(err).To(BeNil())
 				Expect(len(models)).To(Equal(2), fmt.Sprint(models))
 				Expect(models[0].Installed).To(BeFalse(), fmt.Sprint(models))
 				Expect(models[1].Installed).To(BeFalse(), fmt.Sprint(models))
@@ -328,10 +435,11 @@ var _ = Describe("API test", func() {
 				content := map[string]interface{}{}
 				err = yaml.Unmarshal(dat, &content)
 				Expect(err).ToNot(HaveOccurred())
-				Expect(content["backend"]).To(Equal("bert-embeddings"))
+				Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
 				Expect(content["foo"]).To(Equal("bar"))
 
-				models = getModels("http://127.0.0.1:9090/models/available")
+				models, err = getModels("http://127.0.0.1:9090/models/available")
+				Expect(err).To(BeNil())
 				Expect(len(models)).To(Equal(2), fmt.Sprint(models))
 				Expect(models[0].Name).To(Or(Equal("bert"), Equal("bert2")))
 				Expect(models[1].Name).To(Or(Equal("bert"), Equal("bert2")))
@@ -346,7 +454,7 @@ var _ = Describe("API test", func() {
 			It("overrides models", func() {
 
 				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
+					URL:  bertEmbeddingsURL,
 					Name: "bert",
 					Overrides: map[string]interface{}{
 						"backend": "llama",
@@ -372,7 +480,7 @@ var _ = Describe("API test", func() {
 			})
 			It("apply models from config", func() {
 				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml",
+					ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml",
 				})
 
 				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
@@ -382,7 +490,7 @@ var _ = Describe("API test", func() {
 				Eventually(func() bool {
 					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
 					return response["processed"].(bool)
-				}, "360s", "10s").Should(Equal(true))
+				}, "900s", "10s").Should(Equal(true))
 
 				Eventually(func() []string {
 					models, _ := client.ListModels(context.TODO())
@@ -395,7 +503,7 @@ var _ = Describe("API test", func() {
 			})
 			It("apply models without overrides", func() {
 				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					URL:       "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
+					URL:       bertEmbeddingsURL,
 					Name:      "bert",
 					Overrides: map[string]interface{}{},
 				})
@@ -415,7 +523,7 @@ var _ = Describe("API test", func() {
 				content := map[string]interface{}{}
 				err = yaml.Unmarshal(dat, &content)
 				Expect(err).ToNot(HaveOccurred())
-				Expect(content["backend"]).To(Equal("bert-embeddings"))
+				Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
 			})
 
 			It("runs openllama(llama-ggml backend)", Label("llama"), func() {
@@ -483,7 +591,7 @@ var _ = Describe("API test", func() {
 				var res map[string]string
 				err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
 				Expect(err).ToNot(HaveOccurred())
-				Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res))
+				Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res))
 				Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
 				Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
 
@@ -496,7 +604,7 @@ var _ = Describe("API test", func() {
 
 				modelName := "hermes-2-pro-mistral"
 				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml",
+					ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml",
 				})
 
 				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
@@ -506,7 +614,7 @@ var _ = Describe("API test", func() {
 				Eventually(func() bool {
 					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
 					return response["processed"].(bool)
-				}, "360s", "10s").Should(Equal(true))
+				}, "900s", "10s").Should(Equal(true))
 
 				By("testing chat")
 				resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: modelName, Messages: []openai.ChatCompletionMessage{
@@ -583,9 +691,13 @@ var _ = Describe("API test", func() {
 					Name: "model-gallery",
 					URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/index.yaml",
 				},
+				{
+					Name: "localai",
+					URL:  "https://raw.githubusercontent.com/mudler/LocalAI/refs/heads/master/gallery/index.yaml",
+				},
 			}
 
-			bcl, ml, applicationConfig, err = startup.Startup(
+			application, err := application.New(
 				append(commonOpts,
 					config.WithContext(c),
 					config.WithAudioDir(tmpdir),
@@ -596,7 +708,7 @@ var _ = Describe("API test", func() {
 					config.WithBackendAssetsOutput(tmpdir))...,
 			)
 			Expect(err).ToNot(HaveOccurred())
-			app, err = App(bcl, ml, applicationConfig)
+			app, err = API(application)
 			Expect(err).ToNot(HaveOccurred())
 
 			go app.Listen("127.0.0.1:9090")
@@ -652,7 +764,7 @@ var _ = Describe("API test", func() {
 			Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
 
 			Expect(resp.StatusCode).To(Equal(200), fmt.Sprint(string(dat)))
-			Expect(resp.Header.Get("Content-Type")).To(Equal("audio/x-wav"))
+			Expect(resp.Header.Get("Content-Type")).To(Or(Equal("audio/x-wav"), Equal("audio/vnd.wave")))
 		})
 		It("installs and is capable to generate images", Label("stablediffusion"), func() {
 			if runtime.GOOS != "linux" {
@@ -660,10 +772,8 @@ var _ = Describe("API test", func() {
 			}
 
 			response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-				ID: "model-gallery@stablediffusion",
-				Overrides: map[string]interface{}{
-					"parameters": map[string]interface{}{"model": "stablediffusion_assets"},
-				},
+				ID:   "localai@sd-1.5-ggml",
+				Name: "stablediffusion",
 			})
 
 			Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
@@ -674,14 +784,14 @@ var _ = Describe("API test", func() {
 				response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
 				fmt.Println(response)
 				return response["processed"].(bool)
-			}, "360s", "10s").Should(Equal(true))
+			}, "1200s", "10s").Should(Equal(true))
 
 			resp, err := http.Post(
 				"http://127.0.0.1:9090/v1/images/generations",
 				"application/json",
 				bytes.NewBuffer([]byte(`{
-					 			"prompt": "floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful|((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text",
-								"mode": 2,  "seed":9000,
+					 			"prompt": "a lovely cat",
+								"step": 1,  "seed":9000,
 					 			"size": "256x256", "n":2}`)))
 			// The response should contain an URL
 			Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
@@ -690,6 +800,7 @@ var _ = Describe("API test", func() {
 
 			imgUrlResp := &schema.OpenAIResponse{}
 			err = json.Unmarshal(dat, imgUrlResp)
+			Expect(err).ToNot(HaveOccurred(), fmt.Sprint(dat))
 			Expect(imgUrlResp.Data).ToNot(Or(BeNil(), BeZero()))
 			imgUrl := imgUrlResp.Data[0].URL
 			Expect(imgUrl).To(ContainSubstring("http://127.0.0.1:9090/"), imgUrl)
@@ -716,14 +827,14 @@ var _ = Describe("API test", func() {
 
 			var err error
 
-			bcl, ml, applicationConfig, err = startup.Startup(
+			application, err := application.New(
 				append(commonOpts,
-					config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
+					config.WithExternalBackend("transformers", os.Getenv("HUGGINGFACE_GRPC")),
 					config.WithContext(c),
 					config.WithModelPath(modelPath),
 				)...)
 			Expect(err).ToNot(HaveOccurred())
-			app, err = App(bcl, ml, applicationConfig)
+			app, err = API(application)
 			Expect(err).ToNot(HaveOccurred())
 			go app.Listen("127.0.0.1:9090")
 
@@ -750,7 +861,7 @@ var _ = Describe("API test", func() {
 		It("returns the models list", func() {
 			models, err := client.ListModels(context.TODO())
 			Expect(err).ToNot(HaveOccurred())
-			Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
+			Expect(len(models.Models)).To(Equal(7)) // If "config.yaml" should be included, this should be 8?
 		})
 		It("can generate completions via ggml", func() {
 			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt})
@@ -810,8 +921,8 @@ var _ = Describe("API test", func() {
 				},
 			)
 			Expect(err).ToNot(HaveOccurred(), err)
-			Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
-			Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
+			Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 2048))
+			Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 2048))
 
 			sunEmbedding := resp.Data[0].Embedding
 			resp2, err := client.CreateEmbeddings(
@@ -855,71 +966,6 @@ var _ = Describe("API test", func() {
 			})
 		})
 
-		Context("backends", func() {
-			It("runs rwkv completion", func() {
-				if runtime.GOOS != "linux" {
-					Skip("test supported only on linux")
-				}
-				resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,"})
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp.Choices) > 0).To(BeTrue())
-				Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
-
-				stream, err := client.CreateCompletionStream(context.TODO(), openai.CompletionRequest{
-					Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,", Stream: true,
-				})
-				Expect(err).ToNot(HaveOccurred())
-				defer stream.Close()
-
-				tokens := 0
-				text := ""
-				for {
-					response, err := stream.Recv()
-					if errors.Is(err, io.EOF) {
-						break
-					}
-
-					Expect(err).ToNot(HaveOccurred())
-					text += response.Choices[0].Text
-					tokens++
-				}
-				Expect(text).ToNot(BeEmpty())
-				Expect(text).To(ContainSubstring("five"))
-				Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
-			})
-			It("runs rwkv chat completion", func() {
-				if runtime.GOOS != "linux" {
-					Skip("test supported only on linux")
-				}
-				resp, err := client.CreateChatCompletion(context.TODO(),
-					openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp.Choices) > 0).To(BeTrue())
-				Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("Sure"), ContainSubstring("five")))
-
-				stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
-				Expect(err).ToNot(HaveOccurred())
-				defer stream.Close()
-
-				tokens := 0
-				text := ""
-				for {
-					response, err := stream.Recv()
-					if errors.Is(err, io.EOF) {
-						break
-					}
-
-					Expect(err).ToNot(HaveOccurred())
-					text += response.Choices[0].Delta.Content
-					tokens++
-				}
-				Expect(text).ToNot(BeEmpty())
-				Expect(text).To(Or(ContainSubstring("Sure"), ContainSubstring("five")))
-
-				Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
-			})
-		})
-
 		// See tests/integration/stores_test
 		Context("Stores", Label("stores"), func() {
 
@@ -999,14 +1045,14 @@ var _ = Describe("API test", func() {
 			c, cancel = context.WithCancel(context.Background())
 
 			var err error
-			bcl, ml, applicationConfig, err = startup.Startup(
+			application, err := application.New(
 				append(commonOpts,
 					config.WithContext(c),
 					config.WithModelPath(modelPath),
 					config.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
 			)
 			Expect(err).ToNot(HaveOccurred())
-			app, err = App(bcl, ml, applicationConfig)
+			app, err = API(application)
 			Expect(err).ToNot(HaveOccurred())
 
 			go app.Listen("127.0.0.1:9090")
diff --git a/core/http/ctx/fiber.go b/core/http/ctx/fiber.go
index 94059847..254f0704 100644
--- a/core/http/ctx/fiber.go
+++ b/core/http/ctx/fiber.go
@@ -19,14 +19,16 @@ func ModelFromContext(ctx *fiber.Ctx, cl *config.BackendConfigLoader, loader *mo
 	if ctx.Params("model") != "" {
 		modelInput = ctx.Params("model")
 	}
-
+	if ctx.Query("model") != "" {
+		modelInput = ctx.Query("model")
+	}
 	// Set model from bearer token, if available
-	bearer := strings.TrimLeft(ctx.Get("authorization"), "Bearer ")
+	bearer := strings.TrimLeft(ctx.Get("authorization"), "Bear ") // Reduced duplicate characters of Bearer
 	bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
 
 	// If no model was specified, take the first available
 	if modelInput == "" && !bearerExists && firstModel {
-		models, _ := services.ListModels(cl, loader, "", true)
+		models, _ := services.ListModels(cl, loader, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
 		if len(models) > 0 {
 			modelInput = models[0]
 			log.Debug().Msgf("No model specified, using: %s", modelInput)
diff --git a/core/http/elements/buttons.go b/core/http/elements/buttons.go
new file mode 100644
index 00000000..2364a0b3
--- /dev/null
+++ b/core/http/elements/buttons.go
@@ -0,0 +1,97 @@
+package elements
+
+import (
+	"strings"
+
+	"github.com/chasefleming/elem-go"
+	"github.com/chasefleming/elem-go/attrs"
+	"github.com/mudler/LocalAI/core/gallery"
+)
+
+func installButton(galleryName string) elem.Node {
+	return elem.Button(
+		attrs.Props{
+			"data-twe-ripple-init":  "",
+			"data-twe-ripple-color": "light",
+			"class":                 "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
+			"hx-swap":               "outerHTML",
+			// post the Model ID as param
+			"hx-post": "browse/install/model/" + galleryName,
+		},
+		elem.I(
+			attrs.Props{
+				"class": "fa-solid fa-download pr-2",
+			},
+		),
+		elem.Text("Install"),
+	)
+}
+
+func reInstallButton(galleryName string) elem.Node {
+	return elem.Button(
+		attrs.Props{
+			"data-twe-ripple-init":  "",
+			"data-twe-ripple-color": "light",
+			"class":                 "float-right inline-block rounded bg-primary ml-2 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
+			"hx-target":             "#action-div-" + dropBadChars(galleryName),
+			"hx-swap":               "outerHTML",
+			// post the Model ID as param
+			"hx-post": "browse/install/model/" + galleryName,
+		},
+		elem.I(
+			attrs.Props{
+				"class": "fa-solid fa-arrow-rotate-right pr-2",
+			},
+		),
+		elem.Text("Reinstall"),
+	)
+}
+
+func infoButton(m *gallery.GalleryModel) elem.Node {
+	return elem.Button(
+		attrs.Props{
+			"data-twe-ripple-init":  "",
+			"data-twe-ripple-color": "light",
+			"class":                 "float-left inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
+			"data-modal-target":     modalName(m),
+			"data-modal-toggle":     modalName(m),
+		},
+		elem.P(
+			attrs.Props{
+				"class": "flex items-center",
+			},
+			elem.I(
+				attrs.Props{
+					"class": "fas fa-info-circle pr-2",
+				},
+			),
+			elem.Text("Info"),
+		),
+	)
+}
+
+func deleteButton(galleryID string) elem.Node {
+	return elem.Button(
+		attrs.Props{
+			"data-twe-ripple-init":  "",
+			"data-twe-ripple-color": "light",
+			"hx-confirm":            "Are you sure you wish to delete the model?",
+			"class":                 "float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
+			"hx-target":             "#action-div-" + dropBadChars(galleryID),
+			"hx-swap":               "outerHTML",
+			// post the Model ID as param
+			"hx-post": "browse/delete/model/" + galleryID,
+		},
+		elem.I(
+			attrs.Props{
+				"class": "fa-solid fa-cancel pr-2",
+			},
+		),
+		elem.Text("Delete"),
+	)
+}
+
+// Javascript/HTMX doesn't like weird IDs
+func dropBadChars(s string) string {
+	return strings.ReplaceAll(s, "@", "__")
+}
diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go
index 91a12310..5ab68508 100644
--- a/core/http/elements/gallery.go
+++ b/core/http/elements/gallery.go
@@ -2,12 +2,11 @@ package elements
 
 import (
 	"fmt"
-	"strings"
 
 	"github.com/chasefleming/elem-go"
 	"github.com/chasefleming/elem-go/attrs"
+	"github.com/microcosm-cc/bluemonday"
 	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 )
 
@@ -15,231 +14,6 @@ const (
 	noImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg"
 )
 
-func renderElements(n []elem.Node) string {
-	render := ""
-	for _, r := range n {
-		render += r.Render()
-	}
-	return render
-}
-
-func DoneProgress(galleryID, text string, showDelete bool) string {
-	var modelName = galleryID
-	// Split by @ and grab the name
-	if strings.Contains(galleryID, "@") {
-		modelName = strings.Split(galleryID, "@")[1]
-	}
-
-	return elem.Div(
-		attrs.Props{
-			"id": "action-div-" + dropBadChars(galleryID),
-		},
-		elem.H3(
-			attrs.Props{
-				"role":      "status",
-				"id":        "pblabel",
-				"tabindex":  "-1",
-				"autofocus": "",
-			},
-			elem.Text(text),
-		),
-		elem.If(showDelete, deleteButton(galleryID, modelName), reInstallButton(galleryID)),
-	).Render()
-}
-
-func ErrorProgress(err, galleryName string) string {
-	return elem.Div(
-		attrs.Props{},
-		elem.H3(
-			attrs.Props{
-				"role":      "status",
-				"id":        "pblabel",
-				"tabindex":  "-1",
-				"autofocus": "",
-			},
-			elem.Text("Error "+err),
-		),
-		installButton(galleryName),
-	).Render()
-}
-
-func ProgressBar(progress string) string {
-	return elem.Div(attrs.Props{
-		"class":           "progress",
-		"role":            "progressbar",
-		"aria-valuemin":   "0",
-		"aria-valuemax":   "100",
-		"aria-valuenow":   "0",
-		"aria-labelledby": "pblabel",
-	},
-		elem.Div(attrs.Props{
-			"id":    "pb",
-			"class": "progress-bar",
-			"style": "width:" + progress + "%",
-		}),
-	).Render()
-}
-
-func P2PNodeStats(nodes []p2p.NodeData) string {
-	/*
-	   <div class="bg-gray-800 p-6 rounded-lg shadow-lg text-left">
-	                       <p class="text-xl font-semibold text-gray-200">Total Workers Detected: {{ len .Nodes }}</p>
-	                       {{ $online := 0 }}
-	                       {{ range .Nodes }}
-	                           {{ if .IsOnline }}
-	                               {{ $online = add $online 1 }}
-	                           {{ end }}
-	                       {{ end }}
-	                       <p class="text-xl font-semibold text-gray-200">Total Online Workers: {{$online}}</p>
-	                   </div>
-	*/
-
-	online := 0
-	for _, n := range nodes {
-		if n.IsOnline() {
-			online++
-		}
-	}
-
-	class := "text-green-500"
-	if online == 0 {
-		class = "text-red-500"
-	}
-	/*
-	   <i class="fas fa-circle animate-pulse text-green-500 ml-2 mr-1"></i>
-	*/
-	circle := elem.I(attrs.Props{
-		"class": "fas fa-circle animate-pulse " + class + " ml-2 mr-1",
-	})
-	nodesElements := []elem.Node{
-		elem.Span(
-			attrs.Props{
-				"class": class,
-			},
-			circle,
-			elem.Text(fmt.Sprintf("%d", online)),
-		),
-		elem.Span(
-			attrs.Props{
-				"class": "text-gray-200",
-			},
-			elem.Text(fmt.Sprintf("/%d", len(nodes))),
-		),
-	}
-
-	return renderElements(nodesElements)
-}
-
-func P2PNodeBoxes(nodes []p2p.NodeData) string {
-	/*
-			<div class="bg-gray-800 p-4 rounded-lg shadow-lg text-left">
-			<div class="flex items-center mb-2">
-				<i class="fas fa-desktop text-gray-400 mr-2"></i>
-				<span class="text-gray-200 font-semibold">{{.ID}}</span>
-			</div>
-			<p class="text-sm text-gray-400 mt-2 flex items-center">
-				Status:
-				<i class="fas fa-circle {{ if .IsOnline }}text-green-500{{ else }}text-red-500{{ end }} ml-2 mr-1"></i>
-				<span class="{{ if .IsOnline }}text-green-400{{ else }}text-red-400{{ end }}">
-					{{ if .IsOnline }}Online{{ else }}Offline{{ end }}
-				</span>
-			</p>
-		</div>
-	*/
-
-	nodesElements := []elem.Node{}
-
-	for _, n := range nodes {
-
-		nodesElements = append(nodesElements,
-			elem.Div(
-				attrs.Props{
-					"class": "bg-gray-700 p-6 rounded-lg shadow-lg text-left",
-				},
-				elem.P(
-					attrs.Props{
-						"class": "text-sm text-gray-400 mt-2 flex",
-					},
-					elem.I(
-						attrs.Props{
-							"class": "fas fa-desktop text-gray-400 mr-2",
-						},
-					),
-					elem.Text("Name: "),
-					elem.Span(
-						attrs.Props{
-							"class": "text-gray-200 font-semibold ml-2 mr-1",
-						},
-						elem.Text(n.ID),
-					),
-					elem.Text("Status: "),
-					elem.If(
-						n.IsOnline(),
-						elem.I(
-							attrs.Props{
-								"class": "fas fa-circle animate-pulse text-green-500 ml-2 mr-1",
-							},
-						),
-						elem.I(
-							attrs.Props{
-								"class": "fas fa-circle animate-pulse text-red-500 ml-2 mr-1",
-							},
-						),
-					),
-					elem.If(
-						n.IsOnline(),
-						elem.Span(
-							attrs.Props{
-								"class": "text-green-400",
-							},
-
-							elem.Text("Online"),
-						),
-						elem.Span(
-							attrs.Props{
-								"class": "text-red-400",
-							},
-							elem.Text("Offline"),
-						),
-					),
-				),
-			))
-	}
-
-	return renderElements(nodesElements)
-}
-
-func StartProgressBar(uid, progress, text string) string {
-	if progress == "" {
-		progress = "0"
-	}
-	return elem.Div(
-		attrs.Props{
-			"hx-trigger": "done",
-			"hx-get":     "/browse/job/" + uid,
-			"hx-swap":    "outerHTML",
-			"hx-target":  "this",
-		},
-		elem.H3(
-			attrs.Props{
-				"role":      "status",
-				"id":        "pblabel",
-				"tabindex":  "-1",
-				"autofocus": "",
-			},
-			elem.Text(text),
-			elem.Div(attrs.Props{
-				"hx-get":     "/browse/job/progress/" + uid,
-				"hx-trigger": "every 600ms",
-				"hx-target":  "this",
-				"hx-swap":    "innerHTML",
-			},
-				elem.Raw(ProgressBar(progress)),
-			),
-		),
-	).Render()
-}
-
 func cardSpan(text, icon string) elem.Node {
 	return elem.Span(
 		attrs.Props{
@@ -249,9 +23,7 @@ func cardSpan(text, icon string) elem.Node {
 			"class": icon + " pr-2",
 		}),
 
-		elem.Text(text),
-
-		//elem.Text(text),
+		elem.Text(bluemonday.StrictPolicy().Sanitize(text)),
 	)
 }
 
@@ -269,14 +41,13 @@ func searchableElement(text, icon string) elem.Node {
 			attrs.Props{
 				"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2",
 			},
-
 			elem.A(
 				attrs.Props{
 					//	"name":      "search",
 					//	"value":     text,
 					//"class":     "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2",
 					"href":      "#!",
-					"hx-post":   "/browse/search/models",
+					"hx-post":   "browse/search/models",
 					"hx-target": "#search-results",
 					// TODO: this doesn't work
 					//	"hx-vals":      `{ \"search\": \"` + text + `\" }`,
@@ -285,15 +56,14 @@ func searchableElement(text, icon string) elem.Node {
 				elem.I(attrs.Props{
 					"class": icon + " pr-2",
 				}),
-				elem.Text(text),
+				elem.Text(bluemonday.StrictPolicy().Sanitize(text)),
 			),
 		),
-
-		//elem.Text(text),
 	)
 }
 
-func link(text, url string) elem.Node {
+/*
+func buttonLink(text, url string) elem.Node {
 	return elem.A(
 		attrs.Props{
 			"class":  "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2",
@@ -303,166 +73,258 @@ func link(text, url string) elem.Node {
 		elem.I(attrs.Props{
 			"class": "fas fa-link pr-2",
 		}),
-		elem.Text(text),
-	)
-}
-func installButton(galleryName string) elem.Node {
-	return elem.Button(
-		attrs.Props{
-			"data-twe-ripple-init":  "",
-			"data-twe-ripple-color": "light",
-			"class":                 "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
-			"hx-swap":               "outerHTML",
-			// post the Model ID as param
-			"hx-post": "/browse/install/model/" + galleryName,
-		},
-		elem.I(
-			attrs.Props{
-				"class": "fa-solid fa-download pr-2",
-			},
-		),
-		elem.Text("Install"),
+		elem.Text(bluemonday.StrictPolicy().Sanitize(text)),
 	)
 }
+*/
 
-func reInstallButton(galleryName string) elem.Node {
-	return elem.Button(
+func link(text, url string) elem.Node {
+	return elem.A(
 		attrs.Props{
-			"data-twe-ripple-init":  "",
-			"data-twe-ripple-color": "light",
-			"class":                 "float-right inline-block rounded bg-primary ml-2 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
-			"hx-target":             "#action-div-" + dropBadChars(galleryName),
-			"hx-swap":               "outerHTML",
-			// post the Model ID as param
-			"hx-post": "/browse/install/model/" + galleryName,
+			"class":  "text-base leading-relaxed text-gray-500 dark:text-gray-400",
+			"href":   url,
+			"target": "_blank",
 		},
-		elem.I(
-			attrs.Props{
-				"class": "fa-solid fa-arrow-rotate-right pr-2",
-			},
-		),
-		elem.Text("Reinstall"),
+		elem.I(attrs.Props{
+			"class": "fas fa-link pr-2",
+		}),
+		elem.Text(bluemonday.StrictPolicy().Sanitize(text)),
 	)
 }
 
-func deleteButton(galleryID, modelName string) elem.Node {
-	return elem.Button(
-		attrs.Props{
-			"data-twe-ripple-init":  "",
-			"data-twe-ripple-color": "light",
-			"hx-confirm":            "Are you sure you wish to delete the model?",
-			"class":                 "float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
-			"hx-target":             "#action-div-" + dropBadChars(galleryID),
-			"hx-swap":               "outerHTML",
-			// post the Model ID as param
-			"hx-post": "/browse/delete/model/" + galleryID,
-		},
-		elem.I(
-			attrs.Props{
-				"class": "fa-solid fa-cancel pr-2",
-			},
-		),
-		elem.Text("Delete"),
-	)
-}
-
-// Javascript/HTMX doesn't like weird IDs
-func dropBadChars(s string) string {
-	return strings.ReplaceAll(s, "@", "__")
-}
-
 type ProcessTracker interface {
 	Exists(string) bool
 	Get(string) string
 }
 
-func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) string {
-	modelsElements := []elem.Node{}
-	descriptionDiv := func(m *gallery.GalleryModel) elem.Node {
-		return elem.Div(
-			attrs.Props{
-				"class": "p-6 text-surface dark:text-white",
-			},
-			elem.H5(
-				attrs.Props{
-					"class": "mb-2 text-xl font-bold leading-tight",
-				},
-				elem.Text(m.Name),
-			),
-			elem.P(
-				attrs.Props{
-					"class": "mb-4 text-sm [&:not(:hover)]:truncate text-base",
-				},
-				elem.Text(m.Description),
-			),
+func modalName(m *gallery.GalleryModel) string {
+	return m.Name + "-modal"
+}
+
+func modelDescription(m *gallery.GalleryModel) elem.Node {
+	urls := []elem.Node{}
+	for _, url := range m.URLs {
+		urls = append(urls,
+			elem.Li(attrs.Props{}, link(url, url)),
 		)
 	}
 
-	actionDiv := func(m *gallery.GalleryModel) elem.Node {
-		galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name)
-		currentlyProcessing := processTracker.Exists(galleryID)
-		jobID := ""
-		isDeletionOp := false
-		if currentlyProcessing {
-			status := galleryService.GetStatus(galleryID)
-			if status != nil && status.Deletion {
-				isDeletionOp = true
-			}
-			jobID = processTracker.Get(galleryID)
-			// TODO:
-			// case not handled, if status == nil : "Waiting"
-		}
-
-		nodes := []elem.Node{
-			cardSpan("Repository: "+m.Gallery.Name, "fa-brands fa-git-alt"),
-		}
-
-		if m.License != "" {
-			nodes = append(nodes,
-				cardSpan("License: "+m.License, "fas fa-book"),
-			)
-		}
-
-		tagsNodes := []elem.Node{}
-		for _, tag := range m.Tags {
-			tagsNodes = append(tagsNodes,
-				searchableElement(tag, "fas fa-tag"),
-			)
-		}
-
-		nodes = append(nodes,
-			elem.Div(
-				attrs.Props{
-					"class": "flex flex-row flex-wrap content-center",
-				},
-				tagsNodes...,
-			),
+	tagsNodes := []elem.Node{}
+	for _, tag := range m.Tags {
+		tagsNodes = append(tagsNodes,
+			searchableElement(tag, "fas fa-tag"),
 		)
+	}
 
-		for i, url := range m.URLs {
-			nodes = append(nodes,
-				link("Link #"+fmt.Sprintf("%d", i+1), url),
-			)
-		}
-
-		progressMessage := "Installation"
-		if isDeletionOp {
-			progressMessage = "Deletion"
-		}
-
-		return elem.Div(
+	return elem.Div(
+		attrs.Props{
+			"class": "p-6 text-surface dark:text-white",
+		},
+		elem.H5(
 			attrs.Props{
-				"class": "px-6 pt-4 pb-2",
+				"class": "mb-2 text-xl font-bold leading-tight",
+			},
+			elem.Text(bluemonday.StrictPolicy().Sanitize(m.Name)),
+		),
+		elem.Div( // small description
+			attrs.Props{
+				"class": "mb-4 text-sm truncate text-base",
+			},
+			elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)),
+		),
+
+		elem.Div(
+			attrs.Props{
+				"id":          modalName(m),
+				"tabindex":    "-1",
+				"aria-hidden": "true",
+				"class":       "hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full",
 			},
-			elem.P(
-				attrs.Props{
-					"class": "mb-4 text-base",
-				},
-				nodes...,
-			),
 			elem.Div(
 				attrs.Props{
-					"id": "action-div-" + dropBadChars(galleryID),
+					"class": "relative p-4 w-full max-w-2xl max-h-full",
+				},
+				elem.Div(
+					attrs.Props{
+						"class": "relative p-4 w-full max-w-2xl max-h-full bg-white rounded-lg shadow dark:bg-gray-700",
+					},
+					// header
+					elem.Div(
+						attrs.Props{
+							"class": "flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600",
+						},
+						elem.H3(
+							attrs.Props{
+								"class": "text-xl font-semibold text-gray-900 dark:text-white",
+							},
+							elem.Text(bluemonday.StrictPolicy().Sanitize(m.Name)),
+						),
+						elem.Button( // close button
+							attrs.Props{
+								"class":           "text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white",
+								"data-modal-hide": modalName(m),
+							},
+							elem.Raw(
+								`<svg class="w-3 h-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 14 14">
+									<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"/>
+								</svg>`,
+							),
+							elem.Span(
+								attrs.Props{
+									"class": "sr-only",
+								},
+								elem.Text("Close modal"),
+							),
+						),
+					),
+					// body
+					elem.Div(
+						attrs.Props{
+							"class": "p-4 md:p-5 space-y-4",
+						},
+						elem.Div(
+							attrs.Props{
+								"class": "flex justify-center items-center",
+							},
+							elem.Img(attrs.Props{
+								//	"class": "rounded-t-lg object-fit object-center h-96",
+								"class":   "lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3 entered loaded",
+								"src":     m.Icon,
+								"loading": "lazy",
+							}),
+						),
+						elem.P(
+							attrs.Props{
+								"class": "text-base leading-relaxed text-gray-500 dark:text-gray-400",
+							},
+							elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)),
+						),
+						elem.Hr(
+							attrs.Props{},
+						),
+						elem.P(
+							attrs.Props{
+								"class": "text-sm font-semibold text-gray-900 dark:text-white",
+							},
+							elem.Text("Links"),
+						),
+						elem.Ul(
+							attrs.Props{},
+							urls...,
+						),
+						elem.If(
+							len(m.Tags) > 0,
+							elem.Div(
+								attrs.Props{},
+								elem.P(
+									attrs.Props{
+										"class": "text-sm mb-5 font-semibold text-gray-900 dark:text-white",
+									},
+									elem.Text("Tags"),
+								),
+								elem.Div(
+									attrs.Props{
+										"class": "flex flex-row flex-wrap content-center",
+									},
+									tagsNodes...,
+								),
+							),
+							elem.Div(attrs.Props{}),
+						),
+					),
+					// Footer
+					elem.Div(
+						attrs.Props{
+							"class": "flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600",
+						},
+						elem.Button(
+							attrs.Props{
+								"data-modal-hide": modalName(m),
+								"class":           "py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700",
+							},
+							elem.Text("Close"),
+						),
+					),
+				),
+			),
+		),
+	)
+}
+
+func modelActionItems(m *gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) elem.Node {
+	galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name)
+	currentlyProcessing := processTracker.Exists(galleryID)
+	jobID := ""
+	isDeletionOp := false
+	if currentlyProcessing {
+		status := galleryService.GetStatus(galleryID)
+		if status != nil && status.Deletion {
+			isDeletionOp = true
+		}
+		jobID = processTracker.Get(galleryID)
+		// TODO:
+		// case not handled, if status == nil : "Waiting"
+	}
+
+	nodes := []elem.Node{
+		cardSpan("Repository: "+m.Gallery.Name, "fa-brands fa-git-alt"),
+	}
+
+	if m.License != "" {
+		nodes = append(nodes,
+			cardSpan("License: "+m.License, "fas fa-book"),
+		)
+	}
+	/*
+		tagsNodes := []elem.Node{}
+
+			for _, tag := range m.Tags {
+				tagsNodes = append(tagsNodes,
+					searchableElement(tag, "fas fa-tag"),
+				)
+			}
+
+
+				nodes = append(nodes,
+					elem.Div(
+						attrs.Props{
+							"class": "flex flex-row flex-wrap content-center",
+						},
+						tagsNodes...,
+					),
+				)
+
+				for i, url := range m.URLs {
+					nodes = append(nodes,
+						buttonLink("Link #"+fmt.Sprintf("%d", i+1), url),
+					)
+				}
+	*/
+
+	progressMessage := "Installation"
+	if isDeletionOp {
+		progressMessage = "Deletion"
+	}
+
+	return elem.Div(
+		attrs.Props{
+			"class": "px-6 pt-4 pb-2",
+		},
+		elem.P(
+			attrs.Props{
+				"class": "mb-4 text-base",
+			},
+			nodes...,
+		),
+		elem.Div(
+			attrs.Props{
+				"id":    "action-div-" + dropBadChars(galleryID),
+				"class": "flow-root", // To order buttons left and right
+			},
+			infoButton(m),
+			elem.Div(
+				attrs.Props{
+					"class": "float-right",
 				},
 				elem.If(
 					currentlyProcessing,
@@ -473,14 +335,18 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g
 						elem.Node(elem.Div(
 							attrs.Props{},
 							reInstallButton(m.ID()),
-							deleteButton(m.ID(), m.Name),
+							deleteButton(m.ID()),
 						)),
 						installButton(m.ID()),
 					),
 				),
 			),
-		)
-	}
+		),
+	)
+}
+
+func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) string {
+	modelsElements := []elem.Node{}
 
 	for _, m := range models {
 		elems := []elem.Node{}
@@ -524,7 +390,10 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g
 			))
 		}
 
-		elems = append(elems, descriptionDiv(m), actionDiv(m))
+		elems = append(elems,
+			modelDescription(m),
+			modelActionItems(m, processTracker, galleryService),
+		)
 		modelsElements = append(modelsElements,
 			elem.Div(
 				attrs.Props{
diff --git a/core/http/elements/p2p.go b/core/http/elements/p2p.go
new file mode 100644
index 00000000..7eb10df5
--- /dev/null
+++ b/core/http/elements/p2p.go
@@ -0,0 +1,147 @@
+package elements
+
+import (
+	"fmt"
+
+	"github.com/chasefleming/elem-go"
+	"github.com/chasefleming/elem-go/attrs"
+	"github.com/microcosm-cc/bluemonday"
+	"github.com/mudler/LocalAI/core/p2p"
+)
+
+func renderElements(n []elem.Node) string {
+	render := ""
+	for _, r := range n {
+		render += r.Render()
+	}
+	return render
+}
+
+func P2PNodeStats(nodes []p2p.NodeData) string {
+	/*
+	   <div class="bg-gray-800 p-6 rounded-lg shadow-lg text-left">
+	                       <p class="text-xl font-semibold text-gray-200">Total Workers Detected: {{ len .Nodes }}</p>
+	                       {{ $online := 0 }}
+	                       {{ range .Nodes }}
+	                           {{ if .IsOnline }}
+	                               {{ $online = add $online 1 }}
+	                           {{ end }}
+	                       {{ end }}
+	                       <p class="text-xl font-semibold text-gray-200">Total Online Workers: {{$online}}</p>
+	                   </div>
+	*/
+
+	online := 0
+	for _, n := range nodes {
+		if n.IsOnline() {
+			online++
+		}
+	}
+
+	class := "text-green-500"
+	if online == 0 {
+		class = "text-red-500"
+	}
+	/*
+	   <i class="fas fa-circle animate-pulse text-green-500 ml-2 mr-1"></i>
+	*/
+	circle := elem.I(attrs.Props{
+		"class": "fas fa-circle animate-pulse " + class + " ml-2 mr-1",
+	})
+	nodesElements := []elem.Node{
+		elem.Span(
+			attrs.Props{
+				"class": class,
+			},
+			circle,
+			elem.Text(fmt.Sprintf("%d", online)),
+		),
+		elem.Span(
+			attrs.Props{
+				"class": "text-gray-200",
+			},
+			elem.Text(fmt.Sprintf("/%d", len(nodes))),
+		),
+	}
+
+	return renderElements(nodesElements)
+}
+
+func P2PNodeBoxes(nodes []p2p.NodeData) string {
+	/*
+			<div class="bg-gray-800 p-4 rounded-lg shadow-lg text-left">
+			<div class="flex items-center mb-2">
+				<i class="fas fa-desktop text-gray-400 mr-2"></i>
+				<span class="text-gray-200 font-semibold">{{.ID}}</span>
+			</div>
+			<p class="text-sm text-gray-400 mt-2 flex items-center">
+				Status:
+				<i class="fas fa-circle {{ if .IsOnline }}text-green-500{{ else }}text-red-500{{ end }} ml-2 mr-1"></i>
+				<span class="{{ if .IsOnline }}text-green-400{{ else }}text-red-400{{ end }}">
+					{{ if .IsOnline }}Online{{ else }}Offline{{ end }}
+				</span>
+			</p>
+		</div>
+	*/
+
+	nodesElements := []elem.Node{}
+
+	for _, n := range nodes {
+
+		nodesElements = append(nodesElements,
+			elem.Div(
+				attrs.Props{
+					"class": "bg-gray-700 p-6 rounded-lg shadow-lg text-left",
+				},
+				elem.P(
+					attrs.Props{
+						"class": "text-sm text-gray-400 mt-2 flex",
+					},
+					elem.I(
+						attrs.Props{
+							"class": "fas fa-desktop text-gray-400 mr-2",
+						},
+					),
+					elem.Text("Name: "),
+					elem.Span(
+						attrs.Props{
+							"class": "text-gray-200 font-semibold ml-2 mr-1",
+						},
+						elem.Text(bluemonday.StrictPolicy().Sanitize(n.ID)),
+					),
+					elem.Text("Status: "),
+					elem.If(
+						n.IsOnline(),
+						elem.I(
+							attrs.Props{
+								"class": "fas fa-circle animate-pulse text-green-500 ml-2 mr-1",
+							},
+						),
+						elem.I(
+							attrs.Props{
+								"class": "fas fa-circle animate-pulse text-red-500 ml-2 mr-1",
+							},
+						),
+					),
+					elem.If(
+						n.IsOnline(),
+						elem.Span(
+							attrs.Props{
+								"class": "text-green-400",
+							},
+
+							elem.Text("Online"),
+						),
+						elem.Span(
+							attrs.Props{
+								"class": "text-red-400",
+							},
+							elem.Text("Offline"),
+						),
+					),
+				),
+			))
+	}
+
+	return renderElements(nodesElements)
+}
diff --git a/core/http/elements/progressbar.go b/core/http/elements/progressbar.go
new file mode 100644
index 00000000..7dc340b2
--- /dev/null
+++ b/core/http/elements/progressbar.go
@@ -0,0 +1,89 @@
+package elements
+
+import (
+	"github.com/chasefleming/elem-go"
+	"github.com/chasefleming/elem-go/attrs"
+	"github.com/microcosm-cc/bluemonday"
+)
+
+func DoneProgress(galleryID, text string, showDelete bool) string {
+	return elem.Div(
+		attrs.Props{
+			"id": "action-div-" + dropBadChars(galleryID),
+		},
+		elem.H3(
+			attrs.Props{
+				"role":      "status",
+				"id":        "pblabel",
+				"tabindex":  "-1",
+				"autofocus": "",
+			},
+			elem.Text(bluemonday.StrictPolicy().Sanitize(text)),
+		),
+		elem.If(showDelete, deleteButton(galleryID), reInstallButton(galleryID)),
+	).Render()
+}
+
+func ErrorProgress(err, galleryName string) string {
+	return elem.Div(
+		attrs.Props{},
+		elem.H3(
+			attrs.Props{
+				"role":      "status",
+				"id":        "pblabel",
+				"tabindex":  "-1",
+				"autofocus": "",
+			},
+			elem.Text("Error "+bluemonday.StrictPolicy().Sanitize(err)),
+		),
+		installButton(galleryName),
+	).Render()
+}
+
+func ProgressBar(progress string) string {
+	return elem.Div(attrs.Props{
+		"class":           "progress",
+		"role":            "progressbar",
+		"aria-valuemin":   "0",
+		"aria-valuemax":   "100",
+		"aria-valuenow":   "0",
+		"aria-labelledby": "pblabel",
+	},
+		elem.Div(attrs.Props{
+			"id":    "pb",
+			"class": "progress-bar",
+			"style": "width:" + progress + "%",
+		}),
+	).Render()
+}
+
+func StartProgressBar(uid, progress, text string) string {
+	if progress == "" {
+		progress = "0"
+	}
+	return elem.Div(
+		attrs.Props{
+			"hx-trigger": "done",
+			"hx-get":     "browse/job/" + uid,
+			"hx-swap":    "outerHTML",
+			"hx-target":  "this",
+		},
+		elem.H3(
+			attrs.Props{
+				"role":      "status",
+				"id":        "pblabel",
+				"tabindex":  "-1",
+				"autofocus": "",
+			},
+			elem.Text(bluemonday.StrictPolicy().Sanitize(text)), //Perhaps overly defensive
+			elem.Div(attrs.Props{
+				"hx-get":     "browse/job/progress/" + uid,
+				"hx-trigger": "every 600ms",
+				"hx-target":  "this",
+				"hx-swap":    "innerHTML",
+			},
+				elem.Raw(ProgressBar(progress)),
+			),
+		),
+	).Render()
+}
diff --git a/core/http/endpoints/elevenlabs/soundgeneration.go b/core/http/endpoints/elevenlabs/soundgeneration.go
index 619544d8..345df35b 100644
--- a/core/http/endpoints/elevenlabs/soundgeneration.go
+++ b/core/http/endpoints/elevenlabs/soundgeneration.go
@@ -55,7 +55,7 @@ func SoundGenerationEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
 		}
 
 		// TODO: Support uploading files?
-		filePath, _, err := backend.SoundGeneration(cfg.Backend, modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
+		filePath, _, err := backend.SoundGeneration(modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
 		if err != nil {
 			return err
 		}
diff --git a/core/http/endpoints/explorer/dashboard.go b/core/http/endpoints/explorer/dashboard.go
index 9c731d9a..3c896681 100644
--- a/core/http/endpoints/explorer/dashboard.go
+++ b/core/http/endpoints/explorer/dashboard.go
@@ -6,6 +6,7 @@ import (
 
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/explorer"
+	"github.com/mudler/LocalAI/core/http/utils"
 	"github.com/mudler/LocalAI/internal"
 )
 
@@ -14,6 +15,7 @@ func Dashboard() func(*fiber.Ctx) error {
 		summary := fiber.Map{
 			"Title":   "LocalAI API - " + internal.PrintableVersion(),
 			"Version": internal.PrintableVersion(),
+			"BaseURL": utils.BaseURL(c),
 		}
 
 		if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
diff --git a/core/http/endpoints/jina/rerank.go b/core/http/endpoints/jina/rerank.go
index 04fdf031..58c3972d 100644
--- a/core/http/endpoints/jina/rerank.go
+++ b/core/http/endpoints/jina/rerank.go
@@ -45,13 +45,13 @@ func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 			config.LoadOptionContextSize(appConfig.ContextSize),
 			config.LoadOptionF16(appConfig.F16),
 		)
-
 		if err != nil {
 			modelFile = input.Model
 			log.Warn().Msgf("Model not found in context: %s", input.Model)
 		} else {
 			modelFile = cfg.Model
 		}
+
 		log.Debug().Msgf("Request for model: %s", modelFile)
 
 		if input.Backend != "" {
@@ -64,7 +64,7 @@ func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 			Documents: req.Documents,
 		}
 
-		results, err := backend.Rerank(cfg.Backend, modelFile, request, ml, appConfig, *cfg)
+		results, err := backend.Rerank(modelFile, request, ml, appConfig, *cfg)
 		if err != nil {
 			return err
 		}
diff --git a/core/http/endpoints/localai/backend_monitor.go b/core/http/endpoints/localai/backend_monitor.go
index fa11b5c3..a1b93ac3 100644
--- a/core/http/endpoints/localai/backend_monitor.go
+++ b/core/http/endpoints/localai/backend_monitor.go
@@ -28,7 +28,7 @@ func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ct
 	}
 }
 
-// BackendMonitorEndpoint shuts down the specified backend
+// BackendShutdownEndpoint shuts down the specified backend
 // @Summary Backend monitor endpoint
 // @Param request body schema.BackendMonitorRequest true "Backend statistics request"
 // @Router /backend/shutdown [post]
diff --git a/core/http/endpoints/localai/gallery.go b/core/http/endpoints/localai/gallery.go
index 23c5d4b8..9dc99f5d 100644
--- a/core/http/endpoints/localai/gallery.go
+++ b/core/http/endpoints/localai/gallery.go
@@ -9,6 +9,7 @@ import (
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/http/utils"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/rs/zerolog/log"
@@ -82,7 +83,8 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
 			Galleries:        mgs.galleries,
 			ConfigURL:        input.ConfigURL,
 		}
-		return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
+
+		return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%smodels/jobs/%s", utils.BaseURL(c), uuid.String())})
 	}
 }
 
@@ -105,7 +107,7 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
 			return err
 		}
 
-		return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
+		return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%smodels/jobs/%s", utils.BaseURL(c), uuid.String())})
 	}
 }
 
@@ -115,19 +117,25 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
 // @Router /models/available [get]
 func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
 
 		models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath)
 		if err != nil {
 			return err
 		}
-		log.Debug().Msgf("Models found from galleries: %+v", models)
-		for _, m := range models {
-			log.Debug().Msgf("Model found from galleries: %+v", m)
+
+		log.Debug().Msgf("Available %d models from %d galleries\n", len(models), len(mgs.galleries))
+
+		m := []gallery.Metadata{}
+
+		for _, mm := range models {
+			m = append(m, mm.Metadata)
 		}
-		dat, err := json.Marshal(models)
+
+		log.Debug().Msgf("Models %#v", m)
+
+		dat, err := json.Marshal(m)
 		if err != nil {
-			return err
+			return fmt.Errorf("could not marshal models: %w", err)
 		}
 		return c.Send(dat)
 	}
diff --git a/core/http/endpoints/localai/get_token_metrics.go b/core/http/endpoints/localai/get_token_metrics.go
new file mode 100644
index 00000000..e0e6943f
--- /dev/null
+++ b/core/http/endpoints/localai/get_token_metrics.go
@@ -0,0 +1,60 @@
+package localai
+
+import (
+	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/rs/zerolog/log"
+
+	"github.com/mudler/LocalAI/pkg/model"
+)
+
+// TokenMetricsEndpoint is an endpoint to get TokensProcessed Per Second for Active SlotID
+//
+//	@Summary	Get TokenMetrics for Active Slot.
+//	@Accept json
+//	@Produce audio/x-wav
+//	@Success	200		{string}	binary				"generated audio/wav file"
+//	@Router		/v1/tokenMetrics [get]
+//	@Router		/tokenMetrics [get]
+func TokenMetricsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+
+		input := new(schema.TokenMetricsRequest)
+
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+
+		modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
+		if err != nil {
+			modelFile = input.Model
+			log.Warn().Msgf("Model not found in context: %s", input.Model)
+		}
+
+		cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
+			config.LoadOptionDebug(appConfig.Debug),
+			config.LoadOptionThreads(appConfig.Threads),
+			config.LoadOptionContextSize(appConfig.ContextSize),
+			config.LoadOptionF16(appConfig.F16),
+		)
+
+		if err != nil {
+			log.Err(err)
+			modelFile = input.Model
+			log.Warn().Msgf("Model not found in context: %s", input.Model)
+		} else {
+			modelFile = cfg.Model
+		}
+		log.Debug().Msgf("Token Metrics for model: %s", modelFile)
+
+		response, err := backend.TokenMetrics(modelFile, ml, appConfig, *cfg)
+		if err != nil {
+			return err
+		}
+		return c.JSON(response)
+	}
+}
diff --git a/core/http/endpoints/localai/system.go b/core/http/endpoints/localai/system.go
index 11704933..92d80a3a 100644
--- a/core/http/endpoints/localai/system.go
+++ b/core/http/endpoints/localai/system.go
@@ -17,12 +17,19 @@ func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConf
 		if err != nil {
 			return err
 		}
+		loadedModels := ml.ListModels()
 		for b := range appConfig.ExternalGRPCBackends {
 			availableBackends = append(availableBackends, b)
 		}
+
+		sysmodels := []schema.SysInfoModel{}
+		for _, m := range loadedModels {
+			sysmodels = append(sysmodels, schema.SysInfoModel{ID: m.ID})
+		}
 		return c.JSON(
 			schema.SystemInformationResponse{
 				Backends: availableBackends,
+				Models:   sysmodels,
 			},
 		)
 	}
diff --git a/core/http/endpoints/localai/tokenize.go b/core/http/endpoints/localai/tokenize.go
new file mode 100644
index 00000000..faa8a0a4
--- /dev/null
+++ b/core/http/endpoints/localai/tokenize.go
@@ -0,0 +1,57 @@
+package localai
+
+import (
+	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/rs/zerolog/log"
+)
+
+// TokenizeEndpoint exposes a REST API to tokenize the content
+// @Summary Tokenize the input.
+// @Param request body schema.TokenizeRequest true "Request"
+// @Success 200 {object} schema.TokenizeResponse "Response"
+// @Router /v1/tokenize [post]
+func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+
+		input := new(schema.TokenizeRequest)
+
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+
+		modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
+		if err != nil {
+			modelFile = input.Model
+			log.Warn().Msgf("Model not found in context: %s", input.Model)
+		}
+
+		cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
+			config.LoadOptionDebug(appConfig.Debug),
+			config.LoadOptionThreads(appConfig.Threads),
+			config.LoadOptionContextSize(appConfig.ContextSize),
+			config.LoadOptionF16(appConfig.F16),
+		)
+
+		if err != nil {
+			log.Err(err)
+			modelFile = input.Model
+			log.Warn().Msgf("Model not found in context: %s", input.Model)
+		} else {
+			modelFile = cfg.Model
+		}
+		log.Debug().Msgf("Request for model: %s", modelFile)
+
+		tokenResponse, err := backend.ModelTokenize(input.Content, ml, *cfg, appConfig)
+		if err != nil {
+			return err
+		}
+
+		return c.JSON(tokenResponse)
+	}
+}
diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go
index ca3f58bd..9116f9fa 100644
--- a/core/http/endpoints/localai/tts.go
+++ b/core/http/endpoints/localai/tts.go
@@ -9,19 +9,21 @@ import (
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/rs/zerolog/log"
+
+	"github.com/mudler/LocalAI/pkg/utils"
 )
 
 // TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech
-//	@Summary	Generates audio from the input text.
-//  @Accept json
-//  @Produce audio/x-wav
-//	@Param		request	body		schema.TTSRequest	true	"query params"
-//	@Success	200		{string}	binary				"generated audio/wav file"
-//	@Router		/v1/audio/speech [post]
-//	@Router		/tts [post]
+//
+//		@Summary	Generates audio from the input text.
+//	 	@Accept json
+//	 	@Produce audio/x-wav
+//		@Param		request	body		schema.TTSRequest	true	"query params"
+//		@Success	200		{string}	binary				"generated audio/wav file"
+//		@Router		/v1/audio/speech [post]
+//		@Router		/tts [post]
 func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-
 		input := new(schema.TTSRequest)
 
 		// Get input data from the request body
@@ -67,6 +69,13 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
 		if err != nil {
 			return err
 		}
+
+		// Convert generated file to target format
+		filePath, err = utils.AudioConvert(filePath, input.Format)
+		if err != nil {
+			return err
+		}
+
 		return c.Download(filePath)
 	}
 }
diff --git a/core/http/endpoints/localai/vad.go b/core/http/endpoints/localai/vad.go
new file mode 100644
index 00000000..2ed6125c
--- /dev/null
+++ b/core/http/endpoints/localai/vad.go
@@ -0,0 +1,67 @@
+package localai
+
+import (
+	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/rs/zerolog/log"
+)
+
+// VADEndpoint is Voice-Activation-Detection endpoint
+// @Summary	Detect voice fragments in an audio stream
+// @Accept json
+// @Param		request	body		schema.VADRequest	true	"query params"
+// @Success 200 {object} proto.VADResponse "Response"
+// @Router		/vad [post]
+func VADEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		input := new(schema.VADRequest)
+
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+
+		modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
+		if err != nil {
+			modelFile = input.Model
+			log.Warn().Msgf("Model not found in context: %s", input.Model)
+		}
+
+		cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
+			config.LoadOptionDebug(appConfig.Debug),
+			config.LoadOptionThreads(appConfig.Threads),
+			config.LoadOptionContextSize(appConfig.ContextSize),
+			config.LoadOptionF16(appConfig.F16),
+		)
+
+		if err != nil {
+			log.Err(err)
+			modelFile = input.Model
+			log.Warn().Msgf("Model not found in context: %s", input.Model)
+		} else {
+			modelFile = cfg.Model
+		}
+		log.Debug().Msgf("Request for model: %s", modelFile)
+
+		opts := backend.ModelOptions(*cfg, appConfig, model.WithBackendString(cfg.Backend), model.WithModel(modelFile))
+
+		vadModel, err := ml.Load(opts...)
+		if err != nil {
+			return err
+		}
+		req := proto.VADRequest{
+			Audio: input.Audio,
+		}
+		resp, err := vadModel.VAD(c.Context(), &req)
+		if err != nil {
+			return err
+		}
+
+		return c.JSON(resp)
+	}
+}
diff --git a/core/http/endpoints/localai/welcome.go b/core/http/endpoints/localai/welcome.go
index 396c4084..57cf8809 100644
--- a/core/http/endpoints/localai/welcome.go
+++ b/core/http/endpoints/localai/welcome.go
@@ -4,6 +4,7 @@ import (
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/http/utils"
 	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
@@ -13,15 +14,10 @@ import (
 func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 	cl *config.BackendConfigLoader, ml *model.ModelLoader, modelStatus func() (map[string]string, map[string]string)) func(*fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		models, _ := services.ListModels(cl, ml, "", true)
 		backendConfigs := cl.GetAllBackendConfigs()
-
 		galleryConfigs := map[string]*gallery.Config{}
-		modelsWithBackendConfig := map[string]interface{}{}
 
 		for _, m := range backendConfigs {
-			modelsWithBackendConfig[m.Name] = nil
-
 			cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name)
 			if err != nil {
 				continue
@@ -29,20 +25,15 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 			galleryConfigs[m.Name] = cfg
 		}
 
+		modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY)
+
 		// Get model statuses to display in the UI the operation in progress
 		processingModels, taskTypes := modelStatus()
 
-		modelsWithoutConfig := []string{}
-
-		for _, m := range models {
-			if _, ok := modelsWithBackendConfig[m]; !ok {
-				modelsWithoutConfig = append(modelsWithoutConfig, m)
-			}
-		}
-
 		summary := fiber.Map{
 			"Title":             "LocalAI API - " + internal.PrintableVersion(),
 			"Version":           internal.PrintableVersion(),
+			"BaseURL":           utils.BaseURL(c),
 			"Models":            modelsWithoutConfig,
 			"ModelsConfig":      backendConfigs,
 			"GalleryConfig":     galleryConfigs,
diff --git a/core/http/endpoints/openai/assistant.go b/core/http/endpoints/openai/assistant.go
index ff218730..1d83066a 100644
--- a/core/http/endpoints/openai/assistant.go
+++ b/core/http/endpoints/openai/assistant.go
@@ -10,6 +10,7 @@ import (
 	"time"
 
 	"github.com/gofiber/fiber/v2"
+	"github.com/microcosm-cc/bluemonday"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/services"
@@ -83,7 +84,7 @@ func CreateAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
 
 		if !modelExists(cl, ml, request.Model) {
 			log.Warn().Msgf("Model: %s was not found in list of models.", request.Model)
-			return c.Status(fiber.StatusBadRequest).SendString("Model " + request.Model + " not found")
+			return c.Status(fiber.StatusBadRequest).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Model %q not found", request.Model)))
 		}
 
 		if request.Tools == nil {
@@ -147,7 +148,7 @@ func ListAssistantsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoade
 		// Convert string limit to integer
 		limit, err := strconv.Atoi(limitQuery)
 		if err != nil {
-			return c.Status(http.StatusBadRequest).SendString(fmt.Sprintf("Invalid limit query value: %s", limitQuery))
+			return c.Status(http.StatusBadRequest).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Invalid limit query value: %s", limitQuery)))
 		}
 
 		// Sort assistants
@@ -225,7 +226,7 @@ func filterAssistantsAfterID(assistants []Assistant, id string) []Assistant {
 
 func modelExists(cl *config.BackendConfigLoader, ml *model.ModelLoader, modelName string) (found bool) {
 	found = false
-	models, err := services.ListModels(cl, ml, "", true)
+	models, err := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
 	if err != nil {
 		return
 	}
@@ -288,7 +289,7 @@ func GetAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader,
 			}
 		}
 
-		return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant with id: %s", assistantID))
+		return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find assistant with id: %s", assistantID)))
 	}
 }
 
@@ -337,11 +338,11 @@ func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
 					}
 				}
 
-				return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find file_id: %s", request.FileID))
+				return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find file_id: %s", request.FileID)))
 			}
 		}
 
-		return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find %q", assistantID))
+		return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find %q", assistantID)))
 	}
 }
 
@@ -442,7 +443,7 @@ func ModifyAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
 				return c.Status(fiber.StatusOK).JSON(newAssistant)
 			}
 		}
-		return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant with id: %s", assistantID))
+		return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find assistant with id: %s", assistantID)))
 	}
 }
 
@@ -513,9 +514,9 @@ func GetAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoa
 				if assistantFile.ID == fileId {
 					return c.Status(fiber.StatusOK).JSON(assistantFile)
 				}
-				return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant file with file_id: %s", fileId))
+				return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find assistant file with file_id: %s", fileId)))
 			}
 		}
-		return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant file with assistant_id: %s", assistantID))
+		return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find assistant file with assistant_id: %s", assistantID)))
 	}
 }
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index 8144bdcd..3b8d3056 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -14,6 +14,8 @@ import (
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/functions"
+	"github.com/mudler/LocalAI/pkg/templates"
+
 	model "github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
@@ -24,11 +26,11 @@ import (
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/chat/completions [post]
-func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
+func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	var id, textContentToReturn string
 	var created int
 
-	process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
+	process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) {
 		initialMessage := schema.OpenAIResponse{
 			ID:      id,
 			Created: created,
@@ -38,18 +40,24 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 		}
 		responses <- initialMessage
 
-		ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
+		ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
+			usage := schema.OpenAIUsage{
+				PromptTokens:     tokenUsage.Prompt,
+				CompletionTokens: tokenUsage.Completion,
+				TotalTokens:      tokenUsage.Prompt + tokenUsage.Completion,
+			}
+			if extraUsage {
+				usage.TimingTokenGeneration = tokenUsage.TimingTokenGeneration
+				usage.TimingPromptProcessing = tokenUsage.TimingPromptProcessing
+			}
+
 			resp := schema.OpenAIResponse{
 				ID:      id,
 				Created: created,
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
 				Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
 				Object:  "chat.completion.chunk",
-				Usage: schema.OpenAIUsage{
-					PromptTokens:     usage.Prompt,
-					CompletionTokens: usage.Completion,
-					TotalTokens:      usage.Prompt + usage.Completion,
-				},
+				Usage:   usage,
 			}
 
 			responses <- resp
@@ -57,7 +65,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 		})
 		close(responses)
 	}
-	processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
+	processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) {
 		result := ""
 		_, tokenUsage, _ := ComputeChoices(req, prompt, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
 			result += s
@@ -88,6 +96,15 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 				log.Error().Err(err).Msg("error handling question")
 				return
 			}
+			usage := schema.OpenAIUsage{
+				PromptTokens:     tokenUsage.Prompt,
+				CompletionTokens: tokenUsage.Completion,
+				TotalTokens:      tokenUsage.Prompt + tokenUsage.Completion,
+			}
+			if extraUsage {
+				usage.TimingTokenGeneration = tokenUsage.TimingTokenGeneration
+				usage.TimingPromptProcessing = tokenUsage.TimingPromptProcessing
+			}
 
 			resp := schema.OpenAIResponse{
 				ID:      id,
@@ -95,11 +112,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
 				Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}},
 				Object:  "chat.completion.chunk",
-				Usage: schema.OpenAIUsage{
-					PromptTokens:     tokenUsage.Prompt,
-					CompletionTokens: tokenUsage.Completion,
-					TotalTokens:      tokenUsage.Prompt + tokenUsage.Completion,
-				},
+				Usage:   usage,
 			}
 
 			responses <- resp
@@ -161,6 +174,15 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 		textContentToReturn = ""
 		id = uuid.New().String()
 		created = int(time.Now().Unix())
+		// Set CorrelationID
+		correlationID := c.Get("X-Correlation-ID")
+		if len(strings.TrimSpace(correlationID)) == 0 {
+			correlationID = id
+		}
+		c.Set("X-Correlation-ID", correlationID)
+
+		// Opt-in extra usage flag
+		extraUsage := c.Get("Extra-Usage", "") != ""
 
 		modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
 		if err != nil {
@@ -288,148 +310,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 		// If we are using the tokenizer template, we don't need to process the messages
 		// unless we are processing functions
 		if !config.TemplateConfig.UseTokenizerTemplate || shouldUseFn {
-			suppressConfigSystemPrompt := false
-			mess := []string{}
-			for messageIndex, i := range input.Messages {
-				var content string
-				role := i.Role
-
-				// if function call, we might want to customize the role so we can display better that the "assistant called a json action"
-				// if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
-				if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" {
-					roleFn := "assistant_function_call"
-					r := config.Roles[roleFn]
-					if r != "" {
-						role = roleFn
-					}
-				}
-				r := config.Roles[role]
-				contentExists := i.Content != nil && i.StringContent != ""
-
-				fcall := i.FunctionCall
-				if len(i.ToolCalls) > 0 {
-					fcall = i.ToolCalls
-				}
-
-				// First attempt to populate content via a chat message specific template
-				if config.TemplateConfig.ChatMessage != "" {
-					chatMessageData := model.ChatMessageTemplateData{
-						SystemPrompt: config.SystemPrompt,
-						Role:         r,
-						RoleName:     role,
-						Content:      i.StringContent,
-						FunctionCall: fcall,
-						FunctionName: i.Name,
-						LastMessage:  messageIndex == (len(input.Messages) - 1),
-						Function:     config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)),
-						MessageIndex: messageIndex,
-					}
-					templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
-					if err != nil {
-						log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping")
-					} else {
-						if templatedChatMessage == "" {
-							log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData)
-							continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
-						}
-						log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
-						content = templatedChatMessage
-					}
-				}
-
-				marshalAnyRole := func(f any) {
-					j, err := json.Marshal(f)
-					if err == nil {
-						if contentExists {
-							content += "\n" + fmt.Sprint(r, " ", string(j))
-						} else {
-							content = fmt.Sprint(r, " ", string(j))
-						}
-					}
-				}
-				marshalAny := func(f any) {
-					j, err := json.Marshal(f)
-					if err == nil {
-						if contentExists {
-							content += "\n" + string(j)
-						} else {
-							content = string(j)
-						}
-					}
-				}
-				// If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
-				if content == "" {
-					if r != "" {
-						if contentExists {
-							content = fmt.Sprint(r, i.StringContent)
-						}
-
-						if i.FunctionCall != nil {
-							marshalAnyRole(i.FunctionCall)
-						}
-						if i.ToolCalls != nil {
-							marshalAnyRole(i.ToolCalls)
-						}
-					} else {
-						if contentExists {
-							content = fmt.Sprint(i.StringContent)
-						}
-						if i.FunctionCall != nil {
-							marshalAny(i.FunctionCall)
-						}
-						if i.ToolCalls != nil {
-							marshalAny(i.ToolCalls)
-						}
-					}
-					// Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
-					if contentExists && role == "system" {
-						suppressConfigSystemPrompt = true
-					}
-				}
-
-				mess = append(mess, content)
-			}
-
-			joinCharacter := "\n"
-			if config.TemplateConfig.JoinChatMessagesByCharacter != nil {
-				joinCharacter = *config.TemplateConfig.JoinChatMessagesByCharacter
-			}
-
-			predInput = strings.Join(mess, joinCharacter)
-			log.Debug().Msgf("Prompt (before templating): %s", predInput)
-
-			templateFile := ""
-
-			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-			if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
-				templateFile = config.Model
-			}
-
-			if config.TemplateConfig.Chat != "" && !shouldUseFn {
-				templateFile = config.TemplateConfig.Chat
-			}
-
-			if config.TemplateConfig.Functions != "" && shouldUseFn {
-				templateFile = config.TemplateConfig.Functions
-			}
-
-			if templateFile != "" {
-				templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
-					SystemPrompt:         config.SystemPrompt,
-					SuppressSystemPrompt: suppressConfigSystemPrompt,
-					Input:                predInput,
-					Functions:            funcs,
-				})
-				if err == nil {
-					predInput = templatedInput
-					log.Debug().Msgf("Template found, input modified to: %s", predInput)
-				} else {
-					log.Debug().Msgf("Template failed loading: %s", err.Error())
-				}
-			}
+			predInput = evaluator.TemplateMessages(input.Messages, config, funcs, shouldUseFn)
 
 			log.Debug().Msgf("Prompt (after templating): %s", predInput)
-			if shouldUseFn && config.Grammar != "" {
+			if config.Grammar != "" {
 				log.Debug().Msgf("Grammar: %+v", config.Grammar)
 			}
 		}
@@ -444,13 +328,14 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			c.Set("Cache-Control", "no-cache")
 			c.Set("Connection", "keep-alive")
 			c.Set("Transfer-Encoding", "chunked")
+			c.Set("X-Correlation-ID", id)
 
 			responses := make(chan schema.OpenAIResponse)
 
 			if !shouldUseFn {
-				go process(predInput, input, config, ml, responses)
+				go process(predInput, input, config, ml, responses, extraUsage)
 			} else {
-				go processTools(noActionName, predInput, input, config, ml, responses)
+				go processTools(noActionName, predInput, input, config, ml, responses, extraUsage)
 			}
 
 			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
@@ -578,6 +463,15 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			if err != nil {
 				return err
 			}
+			usage := schema.OpenAIUsage{
+				PromptTokens:     tokenUsage.Prompt,
+				CompletionTokens: tokenUsage.Completion,
+				TotalTokens:      tokenUsage.Prompt + tokenUsage.Completion,
+			}
+			if extraUsage {
+				usage.TimingTokenGeneration = tokenUsage.TimingTokenGeneration
+				usage.TimingPromptProcessing = tokenUsage.TimingPromptProcessing
+			}
 
 			resp := &schema.OpenAIResponse{
 				ID:      id,
@@ -585,11 +479,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 				Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 				Choices: result,
 				Object:  "chat.completion",
-				Usage: schema.OpenAIUsage{
-					PromptTokens:     tokenUsage.Prompt,
-					CompletionTokens: tokenUsage.Completion,
-					TotalTokens:      tokenUsage.Prompt + tokenUsage.Completion,
-				},
+				Usage:   usage,
 			}
 			respData, _ := json.Marshal(resp)
 			log.Debug().Msgf("Response: %s", respData)
@@ -640,8 +530,16 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m
 	for _, m := range input.Messages {
 		images = append(images, m.StringImages...)
 	}
+	videos := []string{}
+	for _, m := range input.Messages {
+		videos = append(videos, m.StringVideos...)
+	}
+	audios := []string{}
+	for _, m := range input.Messages {
+		audios = append(audios, m.StringAudios...)
+	}
 
-	predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, ml, *config, o, nil)
+	predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, *config, o, nil)
 	if err != nil {
 		log.Error().Err(err).Msg("model inference failed")
 		return "", err
diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go
index b087cc5f..a353a0a1 100644
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -16,6 +16,7 @@ import (
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/functions"
 	model "github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/templates"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
 )
@@ -25,12 +26,21 @@ import (
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/completions [post]
-func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	id := uuid.New().String()
 	created := int(time.Now().Unix())
 
-	process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
-		ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
+	process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) {
+		ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
+			usage := schema.OpenAIUsage{
+				PromptTokens:     tokenUsage.Prompt,
+				CompletionTokens: tokenUsage.Completion,
+				TotalTokens:      tokenUsage.Prompt + tokenUsage.Completion,
+			}
+			if extraUsage {
+				usage.TimingTokenGeneration = tokenUsage.TimingTokenGeneration
+				usage.TimingPromptProcessing = tokenUsage.TimingPromptProcessing
+			}
 			resp := schema.OpenAIResponse{
 				ID:      id,
 				Created: created,
@@ -42,11 +52,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 					},
 				},
 				Object: "text_completion",
-				Usage: schema.OpenAIUsage{
-					PromptTokens:     usage.Prompt,
-					CompletionTokens: usage.Completion,
-					TotalTokens:      usage.Prompt + usage.Completion,
-				},
+				Usage:  usage,
 			}
 			log.Debug().Msgf("Sending goroutine: %s", s)
 
@@ -57,6 +63,12 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 	}
 
 	return func(c *fiber.Ctx) error {
+		// Add Correlation
+		c.Set("X-Correlation-ID", id)
+
+		// Opt-in extra usage flag
+		extraUsage := c.Get("Extra-Usage", "") != ""
+
 		modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
@@ -92,17 +104,6 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 			c.Set("Transfer-Encoding", "chunked")
 		}
 
-		templateFile := ""
-
-		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-		if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
-			templateFile = config.Model
-		}
-
-		if config.TemplateConfig.Completion != "" {
-			templateFile = config.TemplateConfig.Completion
-		}
-
 		if input.Stream {
 			if len(config.PromptStrings) > 1 {
 				return errors.New("cannot handle more than 1 `PromptStrings` when Streaming")
@@ -110,20 +111,18 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 
 			predInput := config.PromptStrings[0]
 
-			if templateFile != "" {
-				templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
-					Input:        predInput,
-					SystemPrompt: config.SystemPrompt,
-				})
-				if err == nil {
-					predInput = templatedInput
-					log.Debug().Msgf("Template found, input modified to: %s", predInput)
-				}
+			templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.CompletionPromptTemplate, *config, templates.PromptTemplateData{
+				Input:        predInput,
+				SystemPrompt: config.SystemPrompt,
+			})
+			if err == nil {
+				predInput = templatedInput
+				log.Debug().Msgf("Template found, input modified to: %s", predInput)
 			}
 
 			responses := make(chan schema.OpenAIResponse)
 
-			go process(predInput, input, config, ml, responses)
+			go process(predInput, input, config, ml, responses, extraUsage)
 
 			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
 
@@ -163,16 +162,13 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 		totalTokenUsage := backend.TokenUsage{}
 
 		for k, i := range config.PromptStrings {
-			if templateFile != "" {
-				// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-				templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
-					SystemPrompt: config.SystemPrompt,
-					Input:        i,
-				})
-				if err == nil {
-					i = templatedInput
-					log.Debug().Msgf("Template found, input modified to: %s", i)
-				}
+			templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.CompletionPromptTemplate, *config, templates.PromptTemplateData{
+				SystemPrompt: config.SystemPrompt,
+				Input:        i,
+			})
+			if err == nil {
+				i = templatedInput
+				log.Debug().Msgf("Template found, input modified to: %s", i)
 			}
 
 			r, tokenUsage, err := ComputeChoices(
@@ -183,11 +179,20 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 				return err
 			}
 
-			totalTokenUsage.Prompt += tokenUsage.Prompt
-			totalTokenUsage.Completion += tokenUsage.Completion
+			totalTokenUsage.TimingTokenGeneration += tokenUsage.TimingTokenGeneration
+			totalTokenUsage.TimingPromptProcessing += tokenUsage.TimingPromptProcessing
 
 			result = append(result, r...)
 		}
+		usage := schema.OpenAIUsage{
+			PromptTokens:     totalTokenUsage.Prompt,
+			CompletionTokens: totalTokenUsage.Completion,
+			TotalTokens:      totalTokenUsage.Prompt + totalTokenUsage.Completion,
+		}
+		if extraUsage {
+			usage.TimingTokenGeneration = totalTokenUsage.TimingTokenGeneration
+			usage.TimingPromptProcessing = totalTokenUsage.TimingPromptProcessing
+		}
 
 		resp := &schema.OpenAIResponse{
 			ID:      id,
@@ -195,11 +200,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Choices: result,
 			Object:  "text_completion",
-			Usage: schema.OpenAIUsage{
-				PromptTokens:     totalTokenUsage.Prompt,
-				CompletionTokens: totalTokenUsage.Completion,
-				TotalTokens:      totalTokenUsage.Prompt + totalTokenUsage.Completion,
-			},
+			Usage:   usage,
 		}
 
 		jsonResult, _ := json.Marshal(resp)
diff --git a/core/http/endpoints/openai/edit.go b/core/http/endpoints/openai/edit.go
index 12fb4035..28a3597c 100644
--- a/core/http/endpoints/openai/edit.go
+++ b/core/http/endpoints/openai/edit.go
@@ -12,6 +12,7 @@ import (
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/schema"
 	model "github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/templates"
 
 	"github.com/rs/zerolog/log"
 )
@@ -21,8 +22,12 @@ import (
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/edits [post]
-func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+
 	return func(c *fiber.Ctx) error {
+		// Opt-in extra usage flag
+		extraUsage := c.Get("Extra-Usage", "") != ""
+
 		modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
@@ -35,31 +40,18 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConf
 
 		log.Debug().Msgf("Parameter Config: %+v", config)
 
-		templateFile := ""
-
-		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-		if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
-			templateFile = config.Model
-		}
-
-		if config.TemplateConfig.Edit != "" {
-			templateFile = config.TemplateConfig.Edit
-		}
-
 		var result []schema.Choice
 		totalTokenUsage := backend.TokenUsage{}
 
 		for _, i := range config.InputStrings {
-			if templateFile != "" {
-				templatedInput, err := ml.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
-					Input:        i,
-					Instruction:  input.Instruction,
-					SystemPrompt: config.SystemPrompt,
-				})
-				if err == nil {
-					i = templatedInput
-					log.Debug().Msgf("Template found, input modified to: %s", i)
-				}
+			templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.EditPromptTemplate, *config, templates.PromptTemplateData{
+				Input:        i,
+				Instruction:  input.Instruction,
+				SystemPrompt: config.SystemPrompt,
+			})
+			if err == nil {
+				i = templatedInput
+				log.Debug().Msgf("Template found, input modified to: %s", i)
 			}
 
 			r, tokenUsage, err := ComputeChoices(input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) {
@@ -72,8 +64,20 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConf
 			totalTokenUsage.Prompt += tokenUsage.Prompt
 			totalTokenUsage.Completion += tokenUsage.Completion
 
+			totalTokenUsage.TimingTokenGeneration += tokenUsage.TimingTokenGeneration
+			totalTokenUsage.TimingPromptProcessing += tokenUsage.TimingPromptProcessing
+
 			result = append(result, r...)
 		}
+		usage := schema.OpenAIUsage{
+			PromptTokens:     totalTokenUsage.Prompt,
+			CompletionTokens: totalTokenUsage.Completion,
+			TotalTokens:      totalTokenUsage.Prompt + totalTokenUsage.Completion,
+		}
+		if extraUsage {
+			usage.TimingTokenGeneration = totalTokenUsage.TimingTokenGeneration
+			usage.TimingPromptProcessing = totalTokenUsage.TimingPromptProcessing
+		}
 
 		id := uuid.New().String()
 		created := int(time.Now().Unix())
@@ -83,11 +87,7 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConf
 			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Choices: result,
 			Object:  "edit",
-			Usage: schema.OpenAIUsage{
-				PromptTokens:     totalTokenUsage.Prompt,
-				CompletionTokens: totalTokenUsage.Completion,
-				TotalTokens:      totalTokenUsage.Prompt + totalTokenUsage.Completion,
-			},
+			Usage:   usage,
 		}
 
 		jsonResult, _ := json.Marshal(resp)
diff --git a/core/http/endpoints/openai/files.go b/core/http/endpoints/openai/files.go
index 903484b4..bc392e73 100644
--- a/core/http/endpoints/openai/files.go
+++ b/core/http/endpoints/openai/files.go
@@ -8,6 +8,7 @@ import (
 	"sync/atomic"
 	"time"
 
+	"github.com/microcosm-cc/bluemonday"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"
 
@@ -49,7 +50,7 @@ func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli
 
 		err = c.SaveFile(file, savePath)
 		if err != nil {
-			return c.Status(fiber.StatusInternalServerError).SendString("Failed to save file: " + err.Error())
+			return c.Status(fiber.StatusInternalServerError).SendString("Failed to save file: " + bluemonday.StrictPolicy().Sanitize(err.Error()))
 		}
 
 		f := schema.File{
@@ -121,7 +122,7 @@ func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Applicat
 	return func(c *fiber.Ctx) error {
 		file, err := getFileFromRequest(c)
 		if err != nil {
-			return c.Status(fiber.StatusInternalServerError).SendString(err.Error())
+			return c.Status(fiber.StatusInternalServerError).SendString(bluemonday.StrictPolicy().Sanitize(err.Error()))
 		}
 
 		return c.JSON(file)
@@ -143,14 +144,14 @@ func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli
 	return func(c *fiber.Ctx) error {
 		file, err := getFileFromRequest(c)
 		if err != nil {
-			return c.Status(fiber.StatusInternalServerError).SendString(err.Error())
+			return c.Status(fiber.StatusInternalServerError).SendString(bluemonday.StrictPolicy().Sanitize(err.Error()))
 		}
 
 		err = os.Remove(filepath.Join(appConfig.UploadDir, file.Filename))
 		if err != nil {
 			// If the file doesn't exist then we should just continue to remove it
 			if !errors.Is(err, os.ErrNotExist) {
-				return c.Status(fiber.StatusInternalServerError).SendString(fmt.Sprintf("Unable to delete file: %s, %v", file.Filename, err))
+				return c.Status(fiber.StatusInternalServerError).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to delete file: %s, %v", file.Filename, err)))
 			}
 		}
 
@@ -180,12 +181,12 @@ func GetFilesContentsEndpoint(cm *config.BackendConfigLoader, appConfig *config.
 	return func(c *fiber.Ctx) error {
 		file, err := getFileFromRequest(c)
 		if err != nil {
-			return c.Status(fiber.StatusInternalServerError).SendString(err.Error())
+			return c.Status(fiber.StatusInternalServerError).SendString(bluemonday.StrictPolicy().Sanitize(err.Error()))
 		}
 
 		fileContents, err := os.ReadFile(filepath.Join(appConfig.UploadDir, file.Filename))
 		if err != nil {
-			return c.Status(fiber.StatusInternalServerError).SendString(err.Error())
+			return c.Status(fiber.StatusInternalServerError).SendString(bluemonday.StrictPolicy().Sanitize(err.Error()))
 		}
 
 		return c.Send(fileContents)
diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go
index 6c76ba84..bd3f0987 100644
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@@ -72,7 +72,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
 		}
 
 		if m == "" {
-			m = model.StableDiffusionBackend
+			m = "stablediffusion"
 		}
 		log.Debug().Msgf("Loading model: %+v", m)
 
@@ -129,11 +129,14 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
 
 		switch config.Backend {
 		case "stablediffusion":
-			config.Backend = model.StableDiffusionBackend
-		case "tinydream":
-			config.Backend = model.TinyDreamBackend
+			config.Backend = model.StableDiffusionGGMLBackend
 		case "":
-			config.Backend = model.StableDiffusionBackend
+			config.Backend = model.StableDiffusionGGMLBackend
+		}
+
+		if !strings.Contains(input.Size, "x") {
+			input.Size = "512x512"
+			log.Warn().Msgf("Invalid size, using default 512x512")
 		}
 
 		sizeParts := strings.Split(input.Size, "x")
diff --git a/core/http/endpoints/openai/inference.go b/core/http/endpoints/openai/inference.go
index 4950ce20..f59e3b60 100644
--- a/core/http/endpoints/openai/inference.go
+++ b/core/http/endpoints/openai/inference.go
@@ -27,9 +27,17 @@ func ComputeChoices(
 	for _, m := range req.Messages {
 		images = append(images, m.StringImages...)
 	}
+	videos := []string{}
+	for _, m := range req.Messages {
+		videos = append(videos, m.StringVideos...)
+	}
+	audios := []string{}
+	for _, m := range req.Messages {
+		audios = append(audios, m.StringAudios...)
+	}
 
 	// get the model function to call for the result
-	predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, loader, *config, o, tokenCallback)
+	predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, *config, o, tokenCallback)
 	if err != nil {
 		return result, backend.TokenUsage{}, err
 	}
@@ -44,6 +52,8 @@ func ComputeChoices(
 
 		tokenUsage.Prompt += prediction.Usage.Prompt
 		tokenUsage.Completion += prediction.Usage.Completion
+		tokenUsage.TimingPromptProcessing += prediction.Usage.TimingPromptProcessing
+		tokenUsage.TimingTokenGeneration += prediction.Usage.TimingTokenGeneration
 
 		finetunedResponse := backend.Finetune(*config, predInput, prediction.Response)
 		cb(finetunedResponse, &result)
diff --git a/core/http/endpoints/openai/list.go b/core/http/endpoints/openai/list.go
index d446b100..9d21f8fe 100644
--- a/core/http/endpoints/openai/list.go
+++ b/core/http/endpoints/openai/list.go
@@ -12,38 +12,38 @@ import (
 // @Summary List and describe the various models available in the API.
 // @Success 200 {object} schema.ModelsDataResponse "Response"
 // @Router /v1/models [get]
-func ListModelsEndpoint(bcl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error {
+func ListModelsEndpoint(bcl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(ctx *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		// If blank, no filter is applied.
 		filter := c.Query("filter")
 
 		// By default, exclude any loose files that are already referenced by a configuration file.
-		excludeConfigured := c.QueryBool("excludeConfigured", true)
+		var policy services.LooseFilePolicy
+		if c.QueryBool("excludeConfigured", true) {
+			policy = services.SKIP_IF_CONFIGURED
+		} else {
+			policy = services.ALWAYS_INCLUDE // This replicates current behavior. TODO: give more options to the user?
+		}
 
-		dataModels, err := modelList(bcl, ml, filter, excludeConfigured)
+		filterFn, err := config.BuildNameFilterFn(filter)
 		if err != nil {
 			return err
 		}
+
+		modelNames, err := services.ListModels(bcl, ml, filterFn, policy)
+		if err != nil {
+			return err
+		}
+
+		// Map from a slice of names to a slice of OpenAIModel response objects
+		dataModels := []schema.OpenAIModel{}
+		for _, m := range modelNames {
+			dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
+		}
+
 		return c.JSON(schema.ModelsDataResponse{
 			Object: "list",
 			Data:   dataModels,
 		})
 	}
 }
-
-func modelList(bcl *config.BackendConfigLoader, ml *model.ModelLoader, filter string, excludeConfigured bool) ([]schema.OpenAIModel, error) {
-
-	models, err := services.ListModels(bcl, ml, filter, excludeConfigured)
-	if err != nil {
-		return nil, err
-	}
-
-	dataModels := []schema.OpenAIModel{}
-
-	// Then iterate through the loose files:
-	for _, m := range models {
-		dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
-	}
-
-	return dataModels, nil
-}
diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go
index a99ebea2..4eaeec24 100644
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -4,17 +4,25 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"strconv"
 
 	"github.com/gofiber/fiber/v2"
+	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/config"
 	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/functions"
 	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/templates"
 	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 )
 
+type correlationIDKeyType string
+
+// CorrelationIDKey to track request across process boundary
+const CorrelationIDKey correlationIDKeyType = "correlationID"
+
 func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
 	input := new(schema.OpenAIRequest)
 
@@ -24,9 +32,14 @@ func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLo
 	}
 
 	received, _ := json.Marshal(input)
+	// Extract or generate the correlation ID
+	correlationID := c.Get("X-Correlation-ID", uuid.New().String())
 
 	ctx, cancel := context.WithCancel(o.Context)
-	input.Context = ctx
+	// Add the correlation ID to the new context
+	ctxWithCorrelationID := context.WithValue(ctx, CorrelationIDKey, correlationID)
+
+	input.Context = ctxWithCorrelationID
 	input.Cancel = cancel
 
 	log.Debug().Msgf("Request received: %s", string(received))
@@ -135,8 +148,12 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 	}
 
 	// Decode each request's message content
-	index := 0
+	imgIndex, vidIndex, audioIndex := 0, 0, 0
 	for i, m := range input.Messages {
+		nrOfImgsInMessage := 0
+		nrOfVideosInMessage := 0
+		nrOfAudiosInMessage := 0
+
 		switch content := m.Content.(type) {
 		case string:
 			input.Messages[i].StringContent = content
@@ -144,22 +161,59 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 			dat, _ := json.Marshal(content)
 			c := []schema.Content{}
 			json.Unmarshal(dat, &c)
+
+			textContent := ""
+			// we will template this at the end
+
+		CONTENT:
 			for _, pp := range c {
-				if pp.Type == "text" {
-					input.Messages[i].StringContent = pp.Text
-				} else if pp.Type == "image_url" {
-					// Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
-					base64, err := utils.GetImageURLAsBase64(pp.ImageURL.URL)
-					if err == nil {
-						input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
-						// set a placeholder for each image
-						input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + input.Messages[i].StringContent
-						index++
-					} else {
-						log.Error().Msgf("Failed encoding image: %s", err)
+				switch pp.Type {
+				case "text":
+					textContent += pp.Text
+					//input.Messages[i].StringContent = pp.Text
+				case "video", "video_url":
+					// Decode content as base64 either if it's an URL or base64 text
+					base64, err := utils.GetContentURIAsBase64(pp.VideoURL.URL)
+					if err != nil {
+						log.Error().Msgf("Failed encoding video: %s", err)
+						continue CONTENT
 					}
+					input.Messages[i].StringVideos = append(input.Messages[i].StringVideos, base64) // TODO: make sure that we only return base64 stuff
+					vidIndex++
+					nrOfVideosInMessage++
+				case "audio_url", "audio":
+					// Decode content as base64 either if it's an URL or base64 text
+					base64, err := utils.GetContentURIAsBase64(pp.AudioURL.URL)
+					if err != nil {
+						log.Error().Msgf("Failed encoding image: %s", err)
+						continue CONTENT
+					}
+					input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff
+					audioIndex++
+					nrOfAudiosInMessage++
+				case "image_url", "image":
+					// Decode content as base64 either if it's an URL or base64 text
+					base64, err := utils.GetContentURIAsBase64(pp.ImageURL.URL)
+					if err != nil {
+						log.Error().Msgf("Failed encoding image: %s", err)
+						continue CONTENT
+					}
+
+					input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
+
+					imgIndex++
+					nrOfImgsInMessage++
 				}
 			}
+
+			input.Messages[i].StringContent, _ = templates.TemplateMultiModal(config.TemplateConfig.Multimodal, templates.MultiModalOptions{
+				TotalImages:     imgIndex,
+				TotalVideos:     vidIndex,
+				TotalAudios:     audioIndex,
+				ImagesInMessage: nrOfImgsInMessage,
+				VideosInMessage: nrOfVideosInMessage,
+				AudiosInMessage: nrOfAudiosInMessage,
+			}, textContent)
 		}
 	}
 
@@ -243,6 +297,14 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 			}
 		}
 	}
+
+	// If a quality was defined as number, convert it to step
+	if input.Quality != "" {
+		q, err := strconv.Atoi(input.Quality)
+		if err == nil {
+			config.Step = q
+		}
+	}
 }
 
 func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) {
@@ -251,7 +313,6 @@ func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *c
 		config.LoadOptionThreads(threads),
 		config.LoadOptionContextSize(ctx),
 		config.LoadOptionF16(f16),
-		config.ModelPath(loader.ModelPath),
 	)
 
 	// Set the parameters for the language model prediction
diff --git a/core/http/explorer.go b/core/http/explorer.go
index bdcb93b1..36609add 100644
--- a/core/http/explorer.go
+++ b/core/http/explorer.go
@@ -7,6 +7,7 @@ import (
 	"github.com/gofiber/fiber/v2/middleware/favicon"
 	"github.com/gofiber/fiber/v2/middleware/filesystem"
 	"github.com/mudler/LocalAI/core/explorer"
+	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/core/http/routes"
 )
 
@@ -22,6 +23,7 @@ func Explorer(db *explorer.Database) *fiber.App {
 
 	app := fiber.New(fiberCfg)
 
+	app.Use(middleware.StripPathPrefix())
 	routes.RegisterExplorerRoutes(app, db)
 
 	httpFS := http.FS(embedDirStatic)
diff --git a/core/http/middleware/auth.go b/core/http/middleware/auth.go
index bc8bcf80..23141d4c 100644
--- a/core/http/middleware/auth.go
+++ b/core/http/middleware/auth.go
@@ -1,93 +1,98 @@
-package middleware
-
-import (
-	"crypto/subtle"
-	"errors"
-
-	"github.com/dave-gray101/v2keyauth"
-	"github.com/gofiber/fiber/v2"
-	"github.com/gofiber/fiber/v2/middleware/keyauth"
-	"github.com/mudler/LocalAI/core/config"
-)
-
-// This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware
-// Currently this requires an upstream patch - and feature patches are no longer accepted to v2
-// Therefore `dave-gray101/v2keyauth` contains the v2 backport of the middleware until v3 stabilizes and we migrate.
-
-func GetKeyAuthConfig(applicationConfig *config.ApplicationConfig) (*v2keyauth.Config, error) {
-	customLookup, err := v2keyauth.MultipleKeySourceLookup([]string{"header:Authorization", "header:x-api-key", "header:xi-api-key"}, keyauth.ConfigDefault.AuthScheme)
-	if err != nil {
-		return nil, err
-	}
-
-	return &v2keyauth.Config{
-		CustomKeyLookup: customLookup,
-		Next:            getApiKeyRequiredFilterFunction(applicationConfig),
-		Validator:       getApiKeyValidationFunction(applicationConfig),
-		ErrorHandler:    getApiKeyErrorHandler(applicationConfig),
-		AuthScheme:      "Bearer",
-	}, nil
-}
-
-func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) fiber.ErrorHandler {
-	return func(ctx *fiber.Ctx, err error) error {
-		if errors.Is(err, v2keyauth.ErrMissingOrMalformedAPIKey) {
-			if len(applicationConfig.ApiKeys) == 0 {
-				return ctx.Next() // if no keys are set up, any error we get here is not an error.
-			}
-			if applicationConfig.OpaqueErrors {
-				return ctx.SendStatus(403)
-			}
-		}
-		if applicationConfig.OpaqueErrors {
-			return ctx.SendStatus(500)
-		}
-		return err
-	}
-}
-
-func getApiKeyValidationFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx, string) (bool, error) {
-
-	if applicationConfig.UseSubtleKeyComparison {
-		return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
-			if len(applicationConfig.ApiKeys) == 0 {
-				return true, nil // If no keys are setup, accept everything
-			}
-			for _, validKey := range applicationConfig.ApiKeys {
-				if subtle.ConstantTimeCompare([]byte(apiKey), []byte(validKey)) == 1 {
-					return true, nil
-				}
-			}
-			return false, v2keyauth.ErrMissingOrMalformedAPIKey
-		}
-	}
-
-	return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
-		if len(applicationConfig.ApiKeys) == 0 {
-			return true, nil // If no keys are setup, accept everything
-		}
-		for _, validKey := range applicationConfig.ApiKeys {
-			if apiKey == validKey {
-				return true, nil
-			}
-		}
-		return false, v2keyauth.ErrMissingOrMalformedAPIKey
-	}
-}
-
-func getApiKeyRequiredFilterFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx) bool {
-	if applicationConfig.DisableApiKeyRequirementForHttpGet {
-		return func(c *fiber.Ctx) bool {
-			if c.Method() != "GET" {
-				return false
-			}
-			for _, rx := range applicationConfig.HttpGetExemptedEndpoints {
-				if rx.MatchString(c.Path()) {
-					return true
-				}
-			}
-			return false
-		}
-	}
-	return func(c *fiber.Ctx) bool { return false }
-}
\ No newline at end of file
+package middleware
+
+import (
+	"crypto/subtle"
+	"errors"
+
+	"github.com/dave-gray101/v2keyauth"
+	"github.com/gofiber/fiber/v2"
+	"github.com/gofiber/fiber/v2/middleware/keyauth"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/utils"
+)
+
+// This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware
+// Currently this requires an upstream patch - and feature patches are no longer accepted to v2
+// Therefore `dave-gray101/v2keyauth` contains the v2 backport of the middleware until v3 stabilizes and we migrate.
+
+func GetKeyAuthConfig(applicationConfig *config.ApplicationConfig) (*v2keyauth.Config, error) {
+	customLookup, err := v2keyauth.MultipleKeySourceLookup([]string{"header:Authorization", "header:x-api-key", "header:xi-api-key", "cookie:token"}, keyauth.ConfigDefault.AuthScheme)
+	if err != nil {
+		return nil, err
+	}
+
+	return &v2keyauth.Config{
+		CustomKeyLookup: customLookup,
+		Next:            getApiKeyRequiredFilterFunction(applicationConfig),
+		Validator:       getApiKeyValidationFunction(applicationConfig),
+		ErrorHandler:    getApiKeyErrorHandler(applicationConfig),
+		AuthScheme:      "Bearer",
+	}, nil
+}
+
+func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) fiber.ErrorHandler {
+	return func(ctx *fiber.Ctx, err error) error {
+		if errors.Is(err, v2keyauth.ErrMissingOrMalformedAPIKey) {
+			if len(applicationConfig.ApiKeys) == 0 {
+				return ctx.Next() // if no keys are set up, any error we get here is not an error.
+			}
+			ctx.Set("WWW-Authenticate", "Bearer")
+			if applicationConfig.OpaqueErrors {
+				return ctx.SendStatus(401)
+			}
+			return ctx.Status(401).Render("views/login", fiber.Map{
+				"BaseURL": utils.BaseURL(ctx),
+			})
+		}
+		if applicationConfig.OpaqueErrors {
+			return ctx.SendStatus(500)
+		}
+		return err
+	}
+}
+
+func getApiKeyValidationFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx, string) (bool, error) {
+
+	if applicationConfig.UseSubtleKeyComparison {
+		return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
+			if len(applicationConfig.ApiKeys) == 0 {
+				return true, nil // If no keys are setup, accept everything
+			}
+			for _, validKey := range applicationConfig.ApiKeys {
+				if subtle.ConstantTimeCompare([]byte(apiKey), []byte(validKey)) == 1 {
+					return true, nil
+				}
+			}
+			return false, v2keyauth.ErrMissingOrMalformedAPIKey
+		}
+	}
+
+	return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
+		if len(applicationConfig.ApiKeys) == 0 {
+			return true, nil // If no keys are setup, accept everything
+		}
+		for _, validKey := range applicationConfig.ApiKeys {
+			if apiKey == validKey {
+				return true, nil
+			}
+		}
+		return false, v2keyauth.ErrMissingOrMalformedAPIKey
+	}
+}
+
+func getApiKeyRequiredFilterFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx) bool {
+	if applicationConfig.DisableApiKeyRequirementForHttpGet {
+		return func(c *fiber.Ctx) bool {
+			if c.Method() != "GET" {
+				return false
+			}
+			for _, rx := range applicationConfig.HttpGetExemptedEndpoints {
+				if rx.MatchString(c.Path()) {
+					return true
+				}
+			}
+			return false
+		}
+	}
+	return func(c *fiber.Ctx) bool { return false }
+}
diff --git a/core/http/middleware/strippathprefix.go b/core/http/middleware/strippathprefix.go
new file mode 100644
index 00000000..5c45d55d
--- /dev/null
+++ b/core/http/middleware/strippathprefix.go
@@ -0,0 +1,36 @@
+package middleware
+
+import (
+	"strings"
+
+	"github.com/gofiber/fiber/v2"
+)
+
+// StripPathPrefix returns a middleware that strips a path prefix from the request path.
+// The path prefix is obtained from the X-Forwarded-Prefix HTTP request header.
+func StripPathPrefix() fiber.Handler {
+	return func(c *fiber.Ctx) error {
+		for _, prefix := range c.GetReqHeaders()["X-Forwarded-Prefix"] {
+			if prefix != "" {
+				path := c.Path()
+				pos := len(prefix)
+
+				if prefix[pos-1] == '/' {
+					pos--
+				} else {
+					prefix += "/"
+				}
+
+				if strings.HasPrefix(path, prefix) {
+					c.Path(path[pos:])
+					break
+				} else if prefix[:pos] == path {
+					c.Redirect(prefix)
+					return nil
+				}
+			}
+		}
+
+		return c.Next()
+	}
+}
diff --git a/core/http/middleware/strippathprefix_test.go b/core/http/middleware/strippathprefix_test.go
new file mode 100644
index 00000000..529f815f
--- /dev/null
+++ b/core/http/middleware/strippathprefix_test.go
@@ -0,0 +1,121 @@
+package middleware
+
+import (
+	"net/http/httptest"
+	"testing"
+
+	"github.com/gofiber/fiber/v2"
+	"github.com/stretchr/testify/require"
+)
+
+func TestStripPathPrefix(t *testing.T) {
+	var actualPath string
+
+	app := fiber.New()
+
+	app.Use(StripPathPrefix())
+
+	app.Get("/hello/world", func(c *fiber.Ctx) error {
+		actualPath = c.Path()
+		return nil
+	})
+
+	app.Get("/", func(c *fiber.Ctx) error {
+		actualPath = c.Path()
+		return nil
+	})
+
+	for _, tc := range []struct {
+		name         string
+		path         string
+		prefixHeader []string
+		expectStatus int
+		expectPath   string
+	}{
+		{
+			name:         "without prefix and header",
+			path:         "/hello/world",
+			expectStatus: 200,
+			expectPath:   "/hello/world",
+		},
+		{
+			name:         "without prefix and headers on root path",
+			path:         "/",
+			expectStatus: 200,
+			expectPath:   "/",
+		},
+		{
+			name:         "without prefix but header",
+			path:         "/hello/world",
+			prefixHeader: []string{"/otherprefix/"},
+			expectStatus: 200,
+			expectPath:   "/hello/world",
+		},
+		{
+			name:         "with prefix but non-matching header",
+			path:         "/prefix/hello/world",
+			prefixHeader: []string{"/otherprefix/"},
+			expectStatus: 404,
+		},
+		{
+			name:         "with prefix and matching header",
+			path:         "/myprefix/hello/world",
+			prefixHeader: []string{"/myprefix/"},
+			expectStatus: 200,
+			expectPath:   "/hello/world",
+		},
+		{
+			name:         "with prefix and 1st header matching",
+			path:         "/myprefix/hello/world",
+			prefixHeader: []string{"/myprefix/", "/otherprefix/"},
+			expectStatus: 200,
+			expectPath:   "/hello/world",
+		},
+		{
+			name:         "with prefix and 2nd header matching",
+			path:         "/myprefix/hello/world",
+			prefixHeader: []string{"/otherprefix/", "/myprefix/"},
+			expectStatus: 200,
+			expectPath:   "/hello/world",
+		},
+		{
+			name:         "with prefix and header not ending with slash",
+			path:         "/myprefix/hello/world",
+			prefixHeader: []string{"/myprefix"},
+			expectStatus: 200,
+			expectPath:   "/hello/world",
+		},
+		{
+			name:         "with prefix and non-matching header not ending with slash",
+			path:         "/myprefix-suffix/hello/world",
+			prefixHeader: []string{"/myprefix"},
+			expectStatus: 404,
+		},
+		{
+			name:         "redirect when prefix does not end with a slash",
+			path:         "/myprefix",
+			prefixHeader: []string{"/myprefix"},
+			expectStatus: 302,
+			expectPath:   "/myprefix/",
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			actualPath = ""
+			req := httptest.NewRequest("GET", tc.path, nil)
+			if tc.prefixHeader != nil {
+				req.Header["X-Forwarded-Prefix"] = tc.prefixHeader
+			}
+
+			resp, err := app.Test(req, -1)
+
+			require.NoError(t, err)
+			require.Equal(t, tc.expectStatus, resp.StatusCode, "response status code")
+
+			if tc.expectStatus == 200 {
+				require.Equal(t, tc.expectPath, actualPath, "rewritten path")
+			} else if tc.expectStatus == 302 {
+				require.Equal(t, tc.expectPath, resp.Header.Get("Location"), "redirect location")
+			}
+		})
+	}
+}
diff --git a/core/http/render.go b/core/http/render.go
index 205f7ca3..2f889f57 100644
--- a/core/http/render.go
+++ b/core/http/render.go
@@ -10,6 +10,7 @@ import (
 	"github.com/gofiber/fiber/v2"
 	fiberhtml "github.com/gofiber/template/html/v2"
 	"github.com/microcosm-cc/bluemonday"
+	"github.com/mudler/LocalAI/core/http/utils"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/russross/blackfriday"
 )
@@ -26,7 +27,9 @@ func notFoundHandler(c *fiber.Ctx) error {
 		})
 	} else {
 		// The client expects an HTML response
-		return c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{})
+		return c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{
+			"BaseURL": utils.BaseURL(c),
+		})
 	}
 }
 
diff --git a/core/http/routes/health.go b/core/http/routes/health.go
new file mode 100644
index 00000000..f5a08e9b
--- /dev/null
+++ b/core/http/routes/health.go
@@ -0,0 +1,13 @@
+package routes
+
+import "github.com/gofiber/fiber/v2"
+
+func HealthRoutes(app *fiber.App) {
+	// Service health checks
+	ok := func(c *fiber.Ctx) error {
+		return c.SendStatus(200)
+	}
+
+	app.Get("/healthz", ok)
+	app.Get("/readyz", ok)
+}
diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go
index 29fef378..2ea9896a 100644
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -11,64 +11,62 @@ import (
 	"github.com/mudler/LocalAI/pkg/model"
 )
 
-func RegisterLocalAIRoutes(app *fiber.App,
+func RegisterLocalAIRoutes(router *fiber.App,
 	cl *config.BackendConfigLoader,
 	ml *model.ModelLoader,
 	appConfig *config.ApplicationConfig,
 	galleryService *services.GalleryService) {
 
-	app.Get("/swagger/*", swagger.HandlerDefault) // default
+	router.Get("/swagger/*", swagger.HandlerDefault) // default
 
 	// LocalAI API endpoints
 	if !appConfig.DisableGalleryEndpoint {
 		modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
-		app.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint())
-		app.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint())
+		router.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint())
+		router.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint())
 
-		app.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint())
-		app.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint())
-		app.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint())
-		app.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint())
-		app.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint())
-		app.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
+		router.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint())
+		router.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint())
+		router.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint())
+		router.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint())
+		router.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint())
+		router.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
 	}
 
-	app.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
+	router.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
+	router.Post("/vad", localai.VADEndpoint(cl, ml, appConfig))
 
 	// Stores
 	sl := model.NewModelLoader("")
-	app.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
-	app.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
-	app.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
-	app.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))
+	router.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
+	router.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
+	router.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
+	router.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))
 
-	// Kubernetes health checks
-	ok := func(c *fiber.Ctx) error {
-		return c.SendStatus(200)
+	if !appConfig.DisableMetrics {
+		router.Get("/metrics", localai.LocalAIMetricsEndpoint())
 	}
 
-	app.Get("/healthz", ok)
-	app.Get("/readyz", ok)
-
-	app.Get("/metrics", localai.LocalAIMetricsEndpoint())
-
 	// Experimental Backend Statistics Module
 	backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
-	app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
-	app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
+	router.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
+	router.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
 
 	// p2p
 	if p2p.IsP2PEnabled() {
-		app.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
-		app.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
+		router.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
+		router.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
 	}
 
-	app.Get("/version", func(c *fiber.Ctx) error {
+	router.Get("/version", func(c *fiber.Ctx) error {
 		return c.JSON(struct {
 			Version string `json:"version"`
 		}{Version: internal.PrintableVersion()})
 	})
 
-	app.Get("/system", auth, localai.SystemInformations(ml, appConfig))
+	router.Get("/system", localai.SystemInformations(ml, appConfig))
+
+	// misc
+	router.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig))
 
 }
diff --git a/core/http/routes/openai.go b/core/http/routes/openai.go
index 081daf70..a48ced65 100644
--- a/core/http/routes/openai.go
+++ b/core/http/routes/openai.go
@@ -2,84 +2,134 @@ package routes
 
 import (
 	"github.com/gofiber/fiber/v2"
-	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/application"
 	"github.com/mudler/LocalAI/core/http/endpoints/localai"
 	"github.com/mudler/LocalAI/core/http/endpoints/openai"
-	"github.com/mudler/LocalAI/pkg/model"
 )
 
 func RegisterOpenAIRoutes(app *fiber.App,
-	cl *config.BackendConfigLoader,
-	ml *model.ModelLoader,
-	appConfig *config.ApplicationConfig) {
+	application *application.Application) {
 	// openAI compatible API endpoint
 
 	// chat
-	app.Post("/v1/chat/completions", openai.ChatEndpoint(cl, ml, appConfig))
-	app.Post("/chat/completions", openai.ChatEndpoint(cl, ml, appConfig))
+	app.Post("/v1/chat/completions",
+		openai.ChatEndpoint(
+			application.BackendLoader(),
+			application.ModelLoader(),
+			application.TemplatesEvaluator(),
+			application.ApplicationConfig(),
+		),
+	)
+
+	app.Post("/chat/completions",
+		openai.ChatEndpoint(
+			application.BackendLoader(),
+			application.ModelLoader(),
+			application.TemplatesEvaluator(),
+			application.ApplicationConfig(),
+		),
+	)
 
 	// edit
-	app.Post("/v1/edits", openai.EditEndpoint(cl, ml, appConfig))
-	app.Post("/edits", openai.EditEndpoint(cl, ml, appConfig))
+	app.Post("/v1/edits",
+		openai.EditEndpoint(
+			application.BackendLoader(),
+			application.ModelLoader(),
+			application.TemplatesEvaluator(),
+			application.ApplicationConfig(),
+		),
+	)
+
+	app.Post("/edits",
+		openai.EditEndpoint(
+			application.BackendLoader(),
+			application.ModelLoader(),
+			application.TemplatesEvaluator(),
+			application.ApplicationConfig(),
+		),
+	)
 
 	// assistant
-	app.Get("/v1/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
-	app.Get("/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
-	app.Post("/v1/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
-	app.Post("/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
-	app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
-	app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
-	app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
-	app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
-	app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
-	app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
-	app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
-	app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
-	app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
-	app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
-	app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
-	app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
-	app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
-	app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
+	app.Get("/v1/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Get("/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/v1/assistants", openai.CreateAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/assistants", openai.CreateAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
 
 	// files
-	app.Post("/v1/files", openai.UploadFilesEndpoint(cl, appConfig))
-	app.Post("/files", openai.UploadFilesEndpoint(cl, appConfig))
-	app.Get("/v1/files", openai.ListFilesEndpoint(cl, appConfig))
-	app.Get("/files", openai.ListFilesEndpoint(cl, appConfig))
-	app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(cl, appConfig))
-	app.Get("/files/:file_id", openai.GetFilesEndpoint(cl, appConfig))
-	app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig))
-	app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig))
-	app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig))
-	app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig))
+	app.Post("/v1/files", openai.UploadFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
+	app.Post("/files", openai.UploadFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
+	app.Get("/v1/files", openai.ListFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
+	app.Get("/files", openai.ListFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
+	app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
+	app.Get("/files/:file_id", openai.GetFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
+	app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
+	app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
+	app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
+	app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
 
 	// completion
-	app.Post("/v1/completions", openai.CompletionEndpoint(cl, ml, appConfig))
-	app.Post("/completions", openai.CompletionEndpoint(cl, ml, appConfig))
-	app.Post("/v1/engines/:model/completions", openai.CompletionEndpoint(cl, ml, appConfig))
+	app.Post("/v1/completions",
+		openai.CompletionEndpoint(
+			application.BackendLoader(),
+			application.ModelLoader(),
+			application.TemplatesEvaluator(),
+			application.ApplicationConfig(),
+		),
+	)
+
+	app.Post("/completions",
+		openai.CompletionEndpoint(
+			application.BackendLoader(),
+			application.ModelLoader(),
+			application.TemplatesEvaluator(),
+			application.ApplicationConfig(),
+		),
+	)
+
+	app.Post("/v1/engines/:model/completions",
+		openai.CompletionEndpoint(
+			application.BackendLoader(),
+			application.ModelLoader(),
+			application.TemplatesEvaluator(),
+			application.ApplicationConfig(),
+		),
+	)
 
 	// embeddings
-	app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
-	app.Post("/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
-	app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
+	app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
 
 	// audio
-	app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(cl, ml, appConfig))
-	app.Post("/v1/audio/speech", localai.TTSEndpoint(cl, ml, appConfig))
+	app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/v1/audio/speech", localai.TTSEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
 
 	// images
-	app.Post("/v1/images/generations", openai.ImageEndpoint(cl, ml, appConfig))
+	app.Post("/v1/images/generations", openai.ImageEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
 
-	if appConfig.ImageDir != "" {
-		app.Static("/generated-images", appConfig.ImageDir)
+	if application.ApplicationConfig().ImageDir != "" {
+		app.Static("/generated-images", application.ApplicationConfig().ImageDir)
 	}
 
-	if appConfig.AudioDir != "" {
-		app.Static("/generated-audio", appConfig.AudioDir)
+	if application.ApplicationConfig().AudioDir != "" {
+		app.Static("/generated-audio", application.ApplicationConfig().AudioDir)
 	}
 
 	// List models
-	app.Get("/v1/models", openai.ListModelsEndpoint(cl, ml))
-	app.Get("/models", openai.ListModelsEndpoint(cl, ml))
+	app.Get("/v1/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Get("/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
 }
diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go
index 7b2c6ae7..92d20544 100644
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -10,15 +10,17 @@ import (
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/http/elements"
 	"github.com/mudler/LocalAI/core/http/endpoints/localai"
+	"github.com/mudler/LocalAI/core/http/utils"
 	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/xsync"
-	"github.com/rs/zerolog/log"
 
 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
+	"github.com/microcosm-cc/bluemonday"
+	"github.com/rs/zerolog/log"
 )
 
 type modelOpCache struct {
@@ -90,6 +92,7 @@ func RegisterUIRoutes(app *fiber.App,
 		app.Get("/p2p", func(c *fiber.Ctx) error {
 			summary := fiber.Map{
 				"Title":   "LocalAI - P2P dashboard",
+				"BaseURL": utils.BaseURL(c),
 				"Version": internal.PrintableVersion(),
 				//"Nodes":          p2p.GetAvailableNodes(""),
 				//"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
@@ -148,6 +151,7 @@ func RegisterUIRoutes(app *fiber.App,
 
 			summary := fiber.Map{
 				"Title":            "LocalAI - Models",
+				"BaseURL":          utils.BaseURL(c),
 				"Version":          internal.PrintableVersion(),
 				"Models":           template.HTML(elements.ListModels(models, processingModels, galleryService)),
 				"Repositories":     appConfig.Galleries,
@@ -171,7 +175,7 @@ func RegisterUIRoutes(app *fiber.App,
 				Search string `form:"search"`
 			}{}
 			if err := c.BodyParser(&form); err != nil {
-				return c.Status(fiber.StatusBadRequest).SendString(err.Error())
+				return c.Status(fiber.StatusBadRequest).SendString(bluemonday.StrictPolicy().Sanitize(err.Error()))
 			}
 
 			models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
@@ -303,10 +307,11 @@ func RegisterUIRoutes(app *fiber.App,
 
 	// Show the Chat page
 	app.Get("/chat/:model", func(c *fiber.Ctx) error {
-		backendConfigs, _ := services.ListModels(cl, ml, "", true)
+		backendConfigs, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
 
 		summary := fiber.Map{
 			"Title":        "LocalAI - Chat with " + c.Params("model"),
+			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        c.Params("model"),
 			"Version":      internal.PrintableVersion(),
@@ -318,15 +323,16 @@ func RegisterUIRoutes(app *fiber.App,
 	})
 
 	app.Get("/talk/", func(c *fiber.Ctx) error {
-		backendConfigs, _ := services.ListModels(cl, ml, "", true)
+		backendConfigs, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
 
 		if len(backendConfigs) == 0 {
 			// If no model is available redirect to the index which suggests how to install models
-			return c.Redirect("/")
+			return c.Redirect(utils.BaseURL(c))
 		}
 
 		summary := fiber.Map{
 			"Title":        "LocalAI - Talk",
+			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        backendConfigs[0],
 			"IsP2PEnabled": p2p.IsP2PEnabled(),
@@ -339,15 +345,16 @@ func RegisterUIRoutes(app *fiber.App,
 
 	app.Get("/chat/", func(c *fiber.Ctx) error {
 
-		backendConfigs, _ := services.ListModels(cl, ml, "", true)
+		backendConfigs, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
 
 		if len(backendConfigs) == 0 {
 			// If no model is available redirect to the index which suggests how to install models
-			return c.Redirect("/")
+			return c.Redirect(utils.BaseURL(c))
 		}
 
 		summary := fiber.Map{
 			"Title":        "LocalAI - Chat with " + backendConfigs[0],
+			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        backendConfigs[0],
 			"Version":      internal.PrintableVersion(),
@@ -363,6 +370,7 @@ func RegisterUIRoutes(app *fiber.App,
 
 		summary := fiber.Map{
 			"Title":        "LocalAI - Generate images with " + c.Params("model"),
+			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        c.Params("model"),
 			"Version":      internal.PrintableVersion(),
@@ -379,11 +387,12 @@ func RegisterUIRoutes(app *fiber.App,
 
 		if len(backendConfigs) == 0 {
 			// If no model is available redirect to the index which suggests how to install models
-			return c.Redirect("/")
+			return c.Redirect(utils.BaseURL(c))
 		}
 
 		summary := fiber.Map{
 			"Title":        "LocalAI - Generate images with " + backendConfigs[0].Name,
+			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        backendConfigs[0].Name,
 			"Version":      internal.PrintableVersion(),
@@ -399,6 +408,7 @@ func RegisterUIRoutes(app *fiber.App,
 
 		summary := fiber.Map{
 			"Title":        "LocalAI - Generate images with " + c.Params("model"),
+			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        c.Params("model"),
 			"Version":      internal.PrintableVersion(),
@@ -415,11 +425,12 @@ func RegisterUIRoutes(app *fiber.App,
 
 		if len(backendConfigs) == 0 {
 			// If no model is available redirect to the index which suggests how to install models
-			return c.Redirect("/")
+			return c.Redirect(utils.BaseURL(c))
 		}
 
 		summary := fiber.Map{
 			"Title":        "LocalAI - Generate audio with " + backendConfigs[0].Name,
+			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        backendConfigs[0].Name,
 			"IsP2PEnabled": p2p.IsP2PEnabled(),
diff --git a/core/http/static/assets/flowbite.min.js b/core/http/static/assets/flowbite.min.js
new file mode 100644
index 00000000..e2c52c2c
--- /dev/null
+++ b/core/http/static/assets/flowbite.min.js
@@ -0,0 +1,2 @@
+!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define("Flowbite",[],e):"object"==typeof exports?exports.Flowbite=e():t.Flowbite=e()}(self,(function(){return function(){"use strict";var t={647:function(t,e,i){i.r(e)},853:function(t,e,i){i.r(e),i.d(e,{afterMain:function(){return w},afterRead:function(){return y},afterWrite:function(){return O},applyStyles:function(){return P},arrow:function(){return Q},auto:function(){return a},basePlacements:function(){return c},beforeMain:function(){return b},beforeRead:function(){return _},beforeWrite:function(){return L},bottom:function(){return o},clippingParents:function(){return u},computeStyles:function(){return it},createPopper:function(){return Pt},createPopperBase:function(){return Ht},createPopperLite:function(){return St},detectOverflow:function(){return mt},end:function(){return l},eventListeners:function(){return ot},flip:function(){return yt},hide:function(){return wt},left:function(){return s},main:function(){return E},modifierPhases:function(){return k},offset:function(){return Lt},placements:function(){return g},popper:function(){return h},popperGenerator:function(){return Tt},popperOffsets:function(){return It},preventOverflow:function(){return Ot},read:function(){return m},reference:function(){return f},right:function(){return r},start:function(){return d},top:function(){return n},variationPlacements:function(){return v},viewport:function(){return p},write:function(){return I}});var n="top",o="bottom",r="right",s="left",a="auto",c=[n,o,r,s],d="start",l="end",u="clippingParents",p="viewport",h="popper",f="reference",v=c.reduce((function(t,e){return t.concat([e+"-"+d,e+"-"+l])}),[]),g=[].concat(c,[a]).reduce((function(t,e){return t.concat([e,e+"-"+d,e+"-"+l])}),[]),_="beforeRead",m="read",y="afterRead",b="beforeMain",E="main",w="afterMain",L="beforeWrite",I="write",O="afterWrite",k=[_,m,y,b,E,w,L,I,O];function x(t){return t?(t.nodeName||"").toLowerCase():null}function A(t){if(null==t)return window;if("[object Window]"!==t.toString()){var e=t.ownerDocument;return e&&e.defaultView||window}return t}function C(t){return t instanceof A(t).Element||t instanceof Element}function T(t){return t instanceof A(t).HTMLElement||t instanceof HTMLElement}function H(t){return"undefined"!=typeof ShadowRoot&&(t instanceof A(t).ShadowRoot||t instanceof ShadowRoot)}var P={name:"applyStyles",enabled:!0,phase:"write",fn:function(t){var e=t.state;Object.keys(e.elements).forEach((function(t){var i=e.styles[t]||{},n=e.attributes[t]||{},o=e.elements[t];T(o)&&x(o)&&(Object.assign(o.style,i),Object.keys(n).forEach((function(t){var e=n[t];!1===e?o.removeAttribute(t):o.setAttribute(t,!0===e?"":e)})))}))},effect:function(t){var e=t.state,i={popper:{position:e.options.strategy,left:"0",top:"0",margin:"0"},arrow:{position:"absolute"},reference:{}};return Object.assign(e.elements.popper.style,i.popper),e.styles=i,e.elements.arrow&&Object.assign(e.elements.arrow.style,i.arrow),function(){Object.keys(e.elements).forEach((function(t){var n=e.elements[t],o=e.attributes[t]||{},r=Object.keys(e.styles.hasOwnProperty(t)?e.styles[t]:i[t]).reduce((function(t,e){return t[e]="",t}),{});T(n)&&x(n)&&(Object.assign(n.style,r),Object.keys(o).forEach((function(t){n.removeAttribute(t)})))}))}},requires:["computeStyles"]};function S(t){return t.split("-")[0]}var j=Math.max,D=Math.min,z=Math.round;function M(){var t=navigator.userAgentData;return null!=t&&t.brands?t.brands.map((function(t){return t.brand+"/"+t.version})).join(" "):navigator.userAgent}function q(){return!/^((?!chrome|android).)*safari/i.test(M())}function V(t,e,i){void 0===e&&(e=!1),void 0===i&&(i=!1);var n=t.getBoundingClientRect(),o=1,r=1;e&&T(t)&&(o=t.offsetWidth>0&&z(n.width)/t.offsetWidth||1,r=t.offsetHeight>0&&z(n.height)/t.offsetHeight||1);var s=(C(t)?A(t):window).visualViewport,a=!q()&&i,c=(n.left+(a&&s?s.offsetLeft:0))/o,d=(n.top+(a&&s?s.offsetTop:0))/r,l=n.width/o,u=n.height/r;return{width:l,height:u,top:d,right:c+l,bottom:d+u,left:c,x:c,y:d}}function B(t){var e=V(t),i=t.offsetWidth,n=t.offsetHeight;return Math.abs(e.width-i)<=1&&(i=e.width),Math.abs(e.height-n)<=1&&(n=e.height),{x:t.offsetLeft,y:t.offsetTop,width:i,height:n}}function R(t,e){var i=e.getRootNode&&e.getRootNode();if(t.contains(e))return!0;if(i&&H(i)){var n=e;do{if(n&&t.isSameNode(n))return!0;n=n.parentNode||n.host}while(n)}return!1}function W(t){return A(t).getComputedStyle(t)}function F(t){return["table","td","th"].indexOf(x(t))>=0}function K(t){return((C(t)?t.ownerDocument:t.document)||window.document).documentElement}function N(t){return"html"===x(t)?t:t.assignedSlot||t.parentNode||(H(t)?t.host:null)||K(t)}function U(t){return T(t)&&"fixed"!==W(t).position?t.offsetParent:null}function X(t){for(var e=A(t),i=U(t);i&&F(i)&&"static"===W(i).position;)i=U(i);return i&&("html"===x(i)||"body"===x(i)&&"static"===W(i).position)?e:i||function(t){var e=/firefox/i.test(M());if(/Trident/i.test(M())&&T(t)&&"fixed"===W(t).position)return null;var i=N(t);for(H(i)&&(i=i.host);T(i)&&["html","body"].indexOf(x(i))<0;){var n=W(i);if("none"!==n.transform||"none"!==n.perspective||"paint"===n.contain||-1!==["transform","perspective"].indexOf(n.willChange)||e&&"filter"===n.willChange||e&&n.filter&&"none"!==n.filter)return i;i=i.parentNode}return null}(t)||e}function Y(t){return["top","bottom"].indexOf(t)>=0?"x":"y"}function G(t,e,i){return j(t,D(e,i))}function $(t){return Object.assign({},{top:0,right:0,bottom:0,left:0},t)}function J(t,e){return e.reduce((function(e,i){return e[i]=t,e}),{})}var Q={name:"arrow",enabled:!0,phase:"main",fn:function(t){var e,i=t.state,a=t.name,d=t.options,l=i.elements.arrow,u=i.modifiersData.popperOffsets,p=S(i.placement),h=Y(p),f=[s,r].indexOf(p)>=0?"height":"width";if(l&&u){var v=function(t,e){return $("number"!=typeof(t="function"==typeof t?t(Object.assign({},e.rects,{placement:e.placement})):t)?t:J(t,c))}(d.padding,i),g=B(l),_="y"===h?n:s,m="y"===h?o:r,y=i.rects.reference[f]+i.rects.reference[h]-u[h]-i.rects.popper[f],b=u[h]-i.rects.reference[h],E=X(l),w=E?"y"===h?E.clientHeight||0:E.clientWidth||0:0,L=y/2-b/2,I=v[_],O=w-g[f]-v[m],k=w/2-g[f]/2+L,x=G(I,k,O),A=h;i.modifiersData[a]=((e={})[A]=x,e.centerOffset=x-k,e)}},effect:function(t){var e=t.state,i=t.options.element,n=void 0===i?"[data-popper-arrow]":i;null!=n&&("string"!=typeof n||(n=e.elements.popper.querySelector(n)))&&R(e.elements.popper,n)&&(e.elements.arrow=n)},requires:["popperOffsets"],requiresIfExists:["preventOverflow"]};function Z(t){return t.split("-")[1]}var tt={top:"auto",right:"auto",bottom:"auto",left:"auto"};function et(t){var e,i=t.popper,a=t.popperRect,c=t.placement,d=t.variation,u=t.offsets,p=t.position,h=t.gpuAcceleration,f=t.adaptive,v=t.roundOffsets,g=t.isFixed,_=u.x,m=void 0===_?0:_,y=u.y,b=void 0===y?0:y,E="function"==typeof v?v({x:m,y:b}):{x:m,y:b};m=E.x,b=E.y;var w=u.hasOwnProperty("x"),L=u.hasOwnProperty("y"),I=s,O=n,k=window;if(f){var x=X(i),C="clientHeight",T="clientWidth";if(x===A(i)&&"static"!==W(x=K(i)).position&&"absolute"===p&&(C="scrollHeight",T="scrollWidth"),c===n||(c===s||c===r)&&d===l)O=o,b-=(g&&x===k&&k.visualViewport?k.visualViewport.height:x[C])-a.height,b*=h?1:-1;if(c===s||(c===n||c===o)&&d===l)I=r,m-=(g&&x===k&&k.visualViewport?k.visualViewport.width:x[T])-a.width,m*=h?1:-1}var H,P=Object.assign({position:p},f&&tt),S=!0===v?function(t){var e=t.x,i=t.y,n=window.devicePixelRatio||1;return{x:z(e*n)/n||0,y:z(i*n)/n||0}}({x:m,y:b}):{x:m,y:b};return m=S.x,b=S.y,h?Object.assign({},P,((H={})[O]=L?"0":"",H[I]=w?"0":"",H.transform=(k.devicePixelRatio||1)<=1?"translate("+m+"px, "+b+"px)":"translate3d("+m+"px, "+b+"px, 0)",H)):Object.assign({},P,((e={})[O]=L?b+"px":"",e[I]=w?m+"px":"",e.transform="",e))}var it={name:"computeStyles",enabled:!0,phase:"beforeWrite",fn:function(t){var e=t.state,i=t.options,n=i.gpuAcceleration,o=void 0===n||n,r=i.adaptive,s=void 0===r||r,a=i.roundOffsets,c=void 0===a||a,d={placement:S(e.placement),variation:Z(e.placement),popper:e.elements.popper,popperRect:e.rects.popper,gpuAcceleration:o,isFixed:"fixed"===e.options.strategy};null!=e.modifiersData.popperOffsets&&(e.styles.popper=Object.assign({},e.styles.popper,et(Object.assign({},d,{offsets:e.modifiersData.popperOffsets,position:e.options.strategy,adaptive:s,roundOffsets:c})))),null!=e.modifiersData.arrow&&(e.styles.arrow=Object.assign({},e.styles.arrow,et(Object.assign({},d,{offsets:e.modifiersData.arrow,position:"absolute",adaptive:!1,roundOffsets:c})))),e.attributes.popper=Object.assign({},e.attributes.popper,{"data-popper-placement":e.placement})},data:{}},nt={passive:!0};var ot={name:"eventListeners",enabled:!0,phase:"write",fn:function(){},effect:function(t){var e=t.state,i=t.instance,n=t.options,o=n.scroll,r=void 0===o||o,s=n.resize,a=void 0===s||s,c=A(e.elements.popper),d=[].concat(e.scrollParents.reference,e.scrollParents.popper);return r&&d.forEach((function(t){t.addEventListener("scroll",i.update,nt)})),a&&c.addEventListener("resize",i.update,nt),function(){r&&d.forEach((function(t){t.removeEventListener("scroll",i.update,nt)})),a&&c.removeEventListener("resize",i.update,nt)}},data:{}},rt={left:"right",right:"left",bottom:"top",top:"bottom"};function st(t){return t.replace(/left|right|bottom|top/g,(function(t){return rt[t]}))}var at={start:"end",end:"start"};function ct(t){return t.replace(/start|end/g,(function(t){return at[t]}))}function dt(t){var e=A(t);return{scrollLeft:e.pageXOffset,scrollTop:e.pageYOffset}}function lt(t){return V(K(t)).left+dt(t).scrollLeft}function ut(t){var e=W(t),i=e.overflow,n=e.overflowX,o=e.overflowY;return/auto|scroll|overlay|hidden/.test(i+o+n)}function pt(t){return["html","body","#document"].indexOf(x(t))>=0?t.ownerDocument.body:T(t)&&ut(t)?t:pt(N(t))}function ht(t,e){var i;void 0===e&&(e=[]);var n=pt(t),o=n===(null==(i=t.ownerDocument)?void 0:i.body),r=A(n),s=o?[r].concat(r.visualViewport||[],ut(n)?n:[]):n,a=e.concat(s);return o?a:a.concat(ht(N(s)))}function ft(t){return Object.assign({},t,{left:t.x,top:t.y,right:t.x+t.width,bottom:t.y+t.height})}function vt(t,e,i){return e===p?ft(function(t,e){var i=A(t),n=K(t),o=i.visualViewport,r=n.clientWidth,s=n.clientHeight,a=0,c=0;if(o){r=o.width,s=o.height;var d=q();(d||!d&&"fixed"===e)&&(a=o.offsetLeft,c=o.offsetTop)}return{width:r,height:s,x:a+lt(t),y:c}}(t,i)):C(e)?function(t,e){var i=V(t,!1,"fixed"===e);return i.top=i.top+t.clientTop,i.left=i.left+t.clientLeft,i.bottom=i.top+t.clientHeight,i.right=i.left+t.clientWidth,i.width=t.clientWidth,i.height=t.clientHeight,i.x=i.left,i.y=i.top,i}(e,i):ft(function(t){var e,i=K(t),n=dt(t),o=null==(e=t.ownerDocument)?void 0:e.body,r=j(i.scrollWidth,i.clientWidth,o?o.scrollWidth:0,o?o.clientWidth:0),s=j(i.scrollHeight,i.clientHeight,o?o.scrollHeight:0,o?o.clientHeight:0),a=-n.scrollLeft+lt(t),c=-n.scrollTop;return"rtl"===W(o||i).direction&&(a+=j(i.clientWidth,o?o.clientWidth:0)-r),{width:r,height:s,x:a,y:c}}(K(t)))}function gt(t,e,i,n){var o="clippingParents"===e?function(t){var e=ht(N(t)),i=["absolute","fixed"].indexOf(W(t).position)>=0&&T(t)?X(t):t;return C(i)?e.filter((function(t){return C(t)&&R(t,i)&&"body"!==x(t)})):[]}(t):[].concat(e),r=[].concat(o,[i]),s=r[0],a=r.reduce((function(e,i){var o=vt(t,i,n);return e.top=j(o.top,e.top),e.right=D(o.right,e.right),e.bottom=D(o.bottom,e.bottom),e.left=j(o.left,e.left),e}),vt(t,s,n));return a.width=a.right-a.left,a.height=a.bottom-a.top,a.x=a.left,a.y=a.top,a}function _t(t){var e,i=t.reference,a=t.element,c=t.placement,u=c?S(c):null,p=c?Z(c):null,h=i.x+i.width/2-a.width/2,f=i.y+i.height/2-a.height/2;switch(u){case n:e={x:h,y:i.y-a.height};break;case o:e={x:h,y:i.y+i.height};break;case r:e={x:i.x+i.width,y:f};break;case s:e={x:i.x-a.width,y:f};break;default:e={x:i.x,y:i.y}}var v=u?Y(u):null;if(null!=v){var g="y"===v?"height":"width";switch(p){case d:e[v]=e[v]-(i[g]/2-a[g]/2);break;case l:e[v]=e[v]+(i[g]/2-a[g]/2)}}return e}function mt(t,e){void 0===e&&(e={});var i=e,s=i.placement,a=void 0===s?t.placement:s,d=i.strategy,l=void 0===d?t.strategy:d,v=i.boundary,g=void 0===v?u:v,_=i.rootBoundary,m=void 0===_?p:_,y=i.elementContext,b=void 0===y?h:y,E=i.altBoundary,w=void 0!==E&&E,L=i.padding,I=void 0===L?0:L,O=$("number"!=typeof I?I:J(I,c)),k=b===h?f:h,x=t.rects.popper,A=t.elements[w?k:b],T=gt(C(A)?A:A.contextElement||K(t.elements.popper),g,m,l),H=V(t.elements.reference),P=_t({reference:H,element:x,strategy:"absolute",placement:a}),S=ft(Object.assign({},x,P)),j=b===h?S:H,D={top:T.top-j.top+O.top,bottom:j.bottom-T.bottom+O.bottom,left:T.left-j.left+O.left,right:j.right-T.right+O.right},z=t.modifiersData.offset;if(b===h&&z){var M=z[a];Object.keys(D).forEach((function(t){var e=[r,o].indexOf(t)>=0?1:-1,i=[n,o].indexOf(t)>=0?"y":"x";D[t]+=M[i]*e}))}return D}var yt={name:"flip",enabled:!0,phase:"main",fn:function(t){var e=t.state,i=t.options,l=t.name;if(!e.modifiersData[l]._skip){for(var u=i.mainAxis,p=void 0===u||u,h=i.altAxis,f=void 0===h||h,_=i.fallbackPlacements,m=i.padding,y=i.boundary,b=i.rootBoundary,E=i.altBoundary,w=i.flipVariations,L=void 0===w||w,I=i.allowedAutoPlacements,O=e.options.placement,k=S(O),x=_||(k===O||!L?[st(O)]:function(t){if(S(t)===a)return[];var e=st(t);return[ct(t),e,ct(e)]}(O)),A=[O].concat(x).reduce((function(t,i){return t.concat(S(i)===a?function(t,e){void 0===e&&(e={});var i=e,n=i.placement,o=i.boundary,r=i.rootBoundary,s=i.padding,a=i.flipVariations,d=i.allowedAutoPlacements,l=void 0===d?g:d,u=Z(n),p=u?a?v:v.filter((function(t){return Z(t)===u})):c,h=p.filter((function(t){return l.indexOf(t)>=0}));0===h.length&&(h=p);var f=h.reduce((function(e,i){return e[i]=mt(t,{placement:i,boundary:o,rootBoundary:r,padding:s})[S(i)],e}),{});return Object.keys(f).sort((function(t,e){return f[t]-f[e]}))}(e,{placement:i,boundary:y,rootBoundary:b,padding:m,flipVariations:L,allowedAutoPlacements:I}):i)}),[]),C=e.rects.reference,T=e.rects.popper,H=new Map,P=!0,j=A[0],D=0;D<A.length;D++){var z=A[D],M=S(z),q=Z(z)===d,V=[n,o].indexOf(M)>=0,B=V?"width":"height",R=mt(e,{placement:z,boundary:y,rootBoundary:b,altBoundary:E,padding:m}),W=V?q?r:s:q?o:n;C[B]>T[B]&&(W=st(W));var F=st(W),K=[];if(p&&K.push(R[M]<=0),f&&K.push(R[W]<=0,R[F]<=0),K.every((function(t){return t}))){j=z,P=!1;break}H.set(z,K)}if(P)for(var N=function(t){var e=A.find((function(e){var i=H.get(e);if(i)return i.slice(0,t).every((function(t){return t}))}));if(e)return j=e,"break"},U=L?3:1;U>0;U--){if("break"===N(U))break}e.placement!==j&&(e.modifiersData[l]._skip=!0,e.placement=j,e.reset=!0)}},requiresIfExists:["offset"],data:{_skip:!1}};function bt(t,e,i){return void 0===i&&(i={x:0,y:0}),{top:t.top-e.height-i.y,right:t.right-e.width+i.x,bottom:t.bottom-e.height+i.y,left:t.left-e.width-i.x}}function Et(t){return[n,r,o,s].some((function(e){return t[e]>=0}))}var wt={name:"hide",enabled:!0,phase:"main",requiresIfExists:["preventOverflow"],fn:function(t){var e=t.state,i=t.name,n=e.rects.reference,o=e.rects.popper,r=e.modifiersData.preventOverflow,s=mt(e,{elementContext:"reference"}),a=mt(e,{altBoundary:!0}),c=bt(s,n),d=bt(a,o,r),l=Et(c),u=Et(d);e.modifiersData[i]={referenceClippingOffsets:c,popperEscapeOffsets:d,isReferenceHidden:l,hasPopperEscaped:u},e.attributes.popper=Object.assign({},e.attributes.popper,{"data-popper-reference-hidden":l,"data-popper-escaped":u})}};var Lt={name:"offset",enabled:!0,phase:"main",requires:["popperOffsets"],fn:function(t){var e=t.state,i=t.options,o=t.name,a=i.offset,c=void 0===a?[0,0]:a,d=g.reduce((function(t,i){return t[i]=function(t,e,i){var o=S(t),a=[s,n].indexOf(o)>=0?-1:1,c="function"==typeof i?i(Object.assign({},e,{placement:t})):i,d=c[0],l=c[1];return d=d||0,l=(l||0)*a,[s,r].indexOf(o)>=0?{x:l,y:d}:{x:d,y:l}}(i,e.rects,c),t}),{}),l=d[e.placement],u=l.x,p=l.y;null!=e.modifiersData.popperOffsets&&(e.modifiersData.popperOffsets.x+=u,e.modifiersData.popperOffsets.y+=p),e.modifiersData[o]=d}};var It={name:"popperOffsets",enabled:!0,phase:"read",fn:function(t){var e=t.state,i=t.name;e.modifiersData[i]=_t({reference:e.rects.reference,element:e.rects.popper,strategy:"absolute",placement:e.placement})},data:{}};var Ot={name:"preventOverflow",enabled:!0,phase:"main",fn:function(t){var e=t.state,i=t.options,a=t.name,c=i.mainAxis,l=void 0===c||c,u=i.altAxis,p=void 0!==u&&u,h=i.boundary,f=i.rootBoundary,v=i.altBoundary,g=i.padding,_=i.tether,m=void 0===_||_,y=i.tetherOffset,b=void 0===y?0:y,E=mt(e,{boundary:h,rootBoundary:f,padding:g,altBoundary:v}),w=S(e.placement),L=Z(e.placement),I=!L,O=Y(w),k="x"===O?"y":"x",x=e.modifiersData.popperOffsets,A=e.rects.reference,C=e.rects.popper,T="function"==typeof b?b(Object.assign({},e.rects,{placement:e.placement})):b,H="number"==typeof T?{mainAxis:T,altAxis:T}:Object.assign({mainAxis:0,altAxis:0},T),P=e.modifiersData.offset?e.modifiersData.offset[e.placement]:null,z={x:0,y:0};if(x){if(l){var M,q="y"===O?n:s,V="y"===O?o:r,R="y"===O?"height":"width",W=x[O],F=W+E[q],K=W-E[V],N=m?-C[R]/2:0,U=L===d?A[R]:C[R],$=L===d?-C[R]:-A[R],J=e.elements.arrow,Q=m&&J?B(J):{width:0,height:0},tt=e.modifiersData["arrow#persistent"]?e.modifiersData["arrow#persistent"].padding:{top:0,right:0,bottom:0,left:0},et=tt[q],it=tt[V],nt=G(0,A[R],Q[R]),ot=I?A[R]/2-N-nt-et-H.mainAxis:U-nt-et-H.mainAxis,rt=I?-A[R]/2+N+nt+it+H.mainAxis:$+nt+it+H.mainAxis,st=e.elements.arrow&&X(e.elements.arrow),at=st?"y"===O?st.clientTop||0:st.clientLeft||0:0,ct=null!=(M=null==P?void 0:P[O])?M:0,dt=W+rt-ct,lt=G(m?D(F,W+ot-ct-at):F,W,m?j(K,dt):K);x[O]=lt,z[O]=lt-W}if(p){var ut,pt="x"===O?n:s,ht="x"===O?o:r,ft=x[k],vt="y"===k?"height":"width",gt=ft+E[pt],_t=ft-E[ht],yt=-1!==[n,s].indexOf(w),bt=null!=(ut=null==P?void 0:P[k])?ut:0,Et=yt?gt:ft-A[vt]-C[vt]-bt+H.altAxis,wt=yt?ft+A[vt]+C[vt]-bt-H.altAxis:_t,Lt=m&&yt?function(t,e,i){var n=G(t,e,i);return n>i?i:n}(Et,ft,wt):G(m?Et:gt,ft,m?wt:_t);x[k]=Lt,z[k]=Lt-ft}e.modifiersData[a]=z}},requiresIfExists:["offset"]};function kt(t,e,i){void 0===i&&(i=!1);var n,o,r=T(e),s=T(e)&&function(t){var e=t.getBoundingClientRect(),i=z(e.width)/t.offsetWidth||1,n=z(e.height)/t.offsetHeight||1;return 1!==i||1!==n}(e),a=K(e),c=V(t,s,i),d={scrollLeft:0,scrollTop:0},l={x:0,y:0};return(r||!r&&!i)&&(("body"!==x(e)||ut(a))&&(d=(n=e)!==A(n)&&T(n)?{scrollLeft:(o=n).scrollLeft,scrollTop:o.scrollTop}:dt(n)),T(e)?((l=V(e,!0)).x+=e.clientLeft,l.y+=e.clientTop):a&&(l.x=lt(a))),{x:c.left+d.scrollLeft-l.x,y:c.top+d.scrollTop-l.y,width:c.width,height:c.height}}function xt(t){var e=new Map,i=new Set,n=[];function o(t){i.add(t.name),[].concat(t.requires||[],t.requiresIfExists||[]).forEach((function(t){if(!i.has(t)){var n=e.get(t);n&&o(n)}})),n.push(t)}return t.forEach((function(t){e.set(t.name,t)})),t.forEach((function(t){i.has(t.name)||o(t)})),n}var At={placement:"bottom",modifiers:[],strategy:"absolute"};function Ct(){for(var t=arguments.length,e=new Array(t),i=0;i<t;i++)e[i]=arguments[i];return!e.some((function(t){return!(t&&"function"==typeof t.getBoundingClientRect)}))}function Tt(t){void 0===t&&(t={});var e=t,i=e.defaultModifiers,n=void 0===i?[]:i,o=e.defaultOptions,r=void 0===o?At:o;return function(t,e,i){void 0===i&&(i=r);var o,s,a={placement:"bottom",orderedModifiers:[],options:Object.assign({},At,r),modifiersData:{},elements:{reference:t,popper:e},attributes:{},styles:{}},c=[],d=!1,l={state:a,setOptions:function(i){var o="function"==typeof i?i(a.options):i;u(),a.options=Object.assign({},r,a.options,o),a.scrollParents={reference:C(t)?ht(t):t.contextElement?ht(t.contextElement):[],popper:ht(e)};var s=function(t){var e=xt(t);return k.reduce((function(t,i){return t.concat(e.filter((function(t){return t.phase===i})))}),[])}(function(t){var e=t.reduce((function(t,e){var i=t[e.name];return t[e.name]=i?Object.assign({},i,e,{options:Object.assign({},i.options,e.options),data:Object.assign({},i.data,e.data)}):e,t}),{});return Object.keys(e).map((function(t){return e[t]}))}([].concat(n,a.options.modifiers)));return a.orderedModifiers=s.filter((function(t){return t.enabled})),a.orderedModifiers.forEach((function(t){var e=t.name,i=t.options,n=void 0===i?{}:i,o=t.effect;if("function"==typeof o){var r=o({state:a,name:e,instance:l,options:n}),s=function(){};c.push(r||s)}})),l.update()},forceUpdate:function(){if(!d){var t=a.elements,e=t.reference,i=t.popper;if(Ct(e,i)){a.rects={reference:kt(e,X(i),"fixed"===a.options.strategy),popper:B(i)},a.reset=!1,a.placement=a.options.placement,a.orderedModifiers.forEach((function(t){return a.modifiersData[t.name]=Object.assign({},t.data)}));for(var n=0;n<a.orderedModifiers.length;n++)if(!0!==a.reset){var o=a.orderedModifiers[n],r=o.fn,s=o.options,c=void 0===s?{}:s,u=o.name;"function"==typeof r&&(a=r({state:a,options:c,name:u,instance:l})||a)}else a.reset=!1,n=-1}}},update:(o=function(){return new Promise((function(t){l.forceUpdate(),t(a)}))},function(){return s||(s=new Promise((function(t){Promise.resolve().then((function(){s=void 0,t(o())}))}))),s}),destroy:function(){u(),d=!0}};if(!Ct(t,e))return l;function u(){c.forEach((function(t){return t()})),c=[]}return l.setOptions(i).then((function(t){!d&&i.onFirstUpdate&&i.onFirstUpdate(t)})),l}}var Ht=Tt(),Pt=Tt({defaultModifiers:[ot,It,it,P,Lt,yt,Ot,Q,wt]}),St=Tt({defaultModifiers:[ot,It,it,P]})},902:function(t,e,i){var n=this&&this.__assign||function(){return n=Object.assign||function(t){for(var e,i=1,n=arguments.length;i<n;i++)for(var o in e=arguments[i])Object.prototype.hasOwnProperty.call(e,o)&&(t[o]=e[o]);return t},n.apply(this,arguments)};Object.defineProperty(e,"__esModule",{value:!0}),e.initAccordions=void 0;var o=i(423),r={alwaysOpen:!1,activeClasses:"bg-gray-100 dark:bg-gray-800 text-gray-900 dark:text-white",inactiveClasses:"text-gray-500 dark:text-gray-400",onOpen:function(){},onClose:function(){},onToggle:function(){}},s={id:null,override:!0},a=function(){function t(t,e,i,a){void 0===t&&(t=null),void 0===e&&(e=[]),void 0===i&&(i=r),void 0===a&&(a=s),this._instanceId=a.id?a.id:t.id,this._accordionEl=t,this._items=e,this._options=n(n({},r),i),this._initialized=!1,this.init(),o.default.addInstance("Accordion",this,this._instanceId,a.override)}return t.prototype.init=function(){var t=this;this._items.length&&!this._initialized&&(this._items.forEach((function(e){e.active&&t.open(e.id);var i=function(){t.toggle(e.id)};e.triggerEl.addEventListener("click",i),e.clickHandler=i})),this._initialized=!0)},t.prototype.destroy=function(){this._items.length&&this._initialized&&(this._items.forEach((function(t){t.triggerEl.removeEventListener("click",t.clickHandler),delete t.clickHandler})),this._initialized=!1)},t.prototype.removeInstance=function(){o.default.removeInstance("Accordion",this._instanceId)},t.prototype.destroyAndRemoveInstance=function(){this.destroy(),this.removeInstance()},t.prototype.getItem=function(t){return this._items.filter((function(e){return e.id===t}))[0]},t.prototype.open=function(t){var e,i,n=this,o=this.getItem(t);this._options.alwaysOpen||this._items.map((function(t){var e,i;t!==o&&((e=t.triggerEl.classList).remove.apply(e,n._options.activeClasses.split(" ")),(i=t.triggerEl.classList).add.apply(i,n._options.inactiveClasses.split(" ")),t.targetEl.classList.add("hidden"),t.triggerEl.setAttribute("aria-expanded","false"),t.active=!1,t.iconEl&&t.iconEl.classList.add("rotate-180"))})),(e=o.triggerEl.classList).add.apply(e,this._options.activeClasses.split(" ")),(i=o.triggerEl.classList).remove.apply(i,this._options.inactiveClasses.split(" ")),o.triggerEl.setAttribute("aria-expanded","true"),o.targetEl.classList.remove("hidden"),o.active=!0,o.iconEl&&o.iconEl.classList.remove("rotate-180"),this._options.onOpen(this,o)},t.prototype.toggle=function(t){var e=this.getItem(t);e.active?this.close(t):this.open(t),this._options.onToggle(this,e)},t.prototype.close=function(t){var e,i,n=this.getItem(t);(e=n.triggerEl.classList).remove.apply(e,this._options.activeClasses.split(" ")),(i=n.triggerEl.classList).add.apply(i,this._options.inactiveClasses.split(" ")),n.targetEl.classList.add("hidden"),n.triggerEl.setAttribute("aria-expanded","false"),n.active=!1,n.iconEl&&n.iconEl.classList.add("rotate-180"),this._options.onClose(this,n)},t.prototype.updateOnOpen=function(t){this._options.onOpen=t},t.prototype.updateOnClose=function(t){this._options.onClose=t},t.prototype.updateOnToggle=function(t){this._options.onToggle=t},t}();function c(){document.querySelectorAll("[data-accordion]").forEach((function(t){var e=t.getAttribute("data-accordion"),i=t.getAttribute("data-active-classes"),n=t.getAttribute("data-inactive-classes"),o=[];t.querySelectorAll("[data-accordion-target]").forEach((function(e){if(e.closest("[data-accordion]")===t){var i={id:e.getAttribute("data-accordion-target"),triggerEl:e,targetEl:document.querySelector(e.getAttribute("data-accordion-target")),iconEl:e.querySelector("[data-accordion-icon]"),active:"true"===e.getAttribute("aria-expanded")};o.push(i)}})),new a(t,o,{alwaysOpen:"open"===e,activeClasses:i||r.activeClasses,inactiveClasses:n||r.inactiveClasses})}))}e.initAccordions=c,"undefined"!=typeof window&&(window.Accordion=a,window.initAccordions=c),e.default=a},33:function(t,e,i){var n=this&&this.__assign||function(){return n=Object.assign||function(t){for(var e,i=1,n=arguments.length;i<n;i++)for(var o in e=arguments[i])Object.prototype.hasOwnProperty.call(e,o)&&(t[o]=e[o]);return t},n.apply(this,arguments)};Object.defineProperty(e,"__esModule",{value:!0}),e.initCarousels=void 0;var o=i(423),r={defaultPosition:0,indicators:{items:[],activeClasses:"bg-white dark:bg-gray-800",inactiveClasses:"bg-white/50 dark:bg-gray-800/50 hover:bg-white dark:hover:bg-gray-800"},interval:3e3,onNext:function(){},onPrev:function(){},onChange:function(){}},s={id:null,override:!0},a=function(){function t(t,e,i,a){void 0===t&&(t=null),void 0===e&&(e=[]),void 0===i&&(i=r),void 0===a&&(a=s),this._instanceId=a.id?a.id:t.id,this._carouselEl=t,this._items=e,this._options=n(n(n({},r),i),{indicators:n(n({},r.indicators),i.indicators)}),this._activeItem=this.getItem(this._options.defaultPosition),this._indicators=this._options.indicators.items,this._intervalDuration=this._options.interval,this._intervalInstance=null,this._initialized=!1,this.init(),o.default.addInstance("Carousel",this,this._instanceId,a.override)}return t.prototype.init=function(){var t=this;this._items.length&&!this._initialized&&(this._items.map((function(t){t.el.classList.add("absolute","inset-0","transition-transform","transform")})),this.getActiveItem()?this.slideTo(this.getActiveItem().position):this.slideTo(0),this._indicators.map((function(e,i){e.el.addEventListener("click",(function(){t.slideTo(i)}))})),this._initialized=!0)},t.prototype.destroy=function(){this._initialized&&(this._initialized=!1)},t.prototype.removeInstance=function(){o.default.removeInstance("Carousel",this._instanceId)},t.prototype.destroyAndRemoveInstance=function(){this.destroy(),this.removeInstance()},t.prototype.getItem=function(t){return this._items[t]},t.prototype.slideTo=function(t){var e=this._items[t],i={left:0===e.position?this._items[this._items.length-1]:this._items[e.position-1],middle:e,right:e.position===this._items.length-1?this._items[0]:this._items[e.position+1]};this._rotate(i),this._setActiveItem(e),this._intervalInstance&&(this.pause(),this.cycle()),this._options.onChange(this)},t.prototype.next=function(){var t=this.getActiveItem(),e=null;e=t.position===this._items.length-1?this._items[0]:this._items[t.position+1],this.slideTo(e.position),this._options.onNext(this)},t.prototype.prev=function(){var t=this.getActiveItem(),e=null;e=0===t.position?this._items[this._items.length-1]:this._items[t.position-1],this.slideTo(e.position),this._options.onPrev(this)},t.prototype._rotate=function(t){if(this._items.map((function(t){t.el.classList.add("hidden")})),1===this._items.length)return t.middle.el.classList.remove("-translate-x-full","translate-x-full","translate-x-0","hidden","z-10"),void t.middle.el.classList.add("translate-x-0","z-20");t.left.el.classList.remove("-translate-x-full","translate-x-full","translate-x-0","hidden","z-20"),t.left.el.classList.add("-translate-x-full","z-10"),t.middle.el.classList.remove("-translate-x-full","translate-x-full","translate-x-0","hidden","z-10"),t.middle.el.classList.add("translate-x-0","z-30"),t.right.el.classList.remove("-translate-x-full","translate-x-full","translate-x-0","hidden","z-30"),t.right.el.classList.add("translate-x-full","z-20")},t.prototype.cycle=function(){var t=this;"undefined"!=typeof window&&(this._intervalInstance=window.setInterval((function(){t.next()}),this._intervalDuration))},t.prototype.pause=function(){clearInterval(this._intervalInstance)},t.prototype.getActiveItem=function(){return this._activeItem},t.prototype._setActiveItem=function(t){var e,i,n=this;this._activeItem=t;var o=t.position;this._indicators.length&&(this._indicators.map((function(t){var e,i;t.el.setAttribute("aria-current","false"),(e=t.el.classList).remove.apply(e,n._options.indicators.activeClasses.split(" ")),(i=t.el.classList).add.apply(i,n._options.indicators.inactiveClasses.split(" "))})),(e=this._indicators[o].el.classList).add.apply(e,this._options.indicators.activeClasses.split(" ")),(i=this._indicators[o].el.classList).remove.apply(i,this._options.indicators.inactiveClasses.split(" ")),this._indicators[o].el.setAttribute("aria-current","true"))},t.prototype.updateOnNext=function(t){this._options.onNext=t},t.prototype.updateOnPrev=function(t){this._options.onPrev=t},t.prototype.updateOnChange=function(t){this._options.onChange=t},t}();function c(){document.querySelectorAll("[data-carousel]").forEach((function(t){var e=t.getAttribute("data-carousel-interval"),i="slide"===t.getAttribute("data-carousel"),n=[],o=0;t.querySelectorAll("[data-carousel-item]").length&&Array.from(t.querySelectorAll("[data-carousel-item]")).map((function(t,e){n.push({position:e,el:t}),"active"===t.getAttribute("data-carousel-item")&&(o=e)}));var s=[];t.querySelectorAll("[data-carousel-slide-to]").length&&Array.from(t.querySelectorAll("[data-carousel-slide-to]")).map((function(t){s.push({position:parseInt(t.getAttribute("data-carousel-slide-to")),el:t})}));var c=new a(t,n,{defaultPosition:o,indicators:{items:s},interval:e||r.interval});i&&c.cycle();var d=t.querySelector("[data-carousel-next]"),l=t.querySelector("[data-carousel-prev]");d&&d.addEventListener("click",(function(){c.next()})),l&&l.addEventListener("click",(function(){c.prev()}))}))}e.initCarousels=c,"undefined"!=typeof window&&(window.Carousel=a,window.initCarousels=c),e.default=a},673:function(t,e,i){var n=this&&this.__assign||function(){return n=Object.assign||function(t){for(var e,i=1,n=arguments.length;i<n;i++)for(var o in e=arguments[i])Object.prototype.hasOwnProperty.call(e,o)&&(t[o]=e[o]);return t},n.apply(this,arguments)};Object.defineProperty(e,"__esModule",{value:!0}),e.initCopyClipboards=void 0;var o=i(423),r={htmlEntities:!1,contentType:"input",onCopy:function(){}},s={id:null,override:!0},a=function(){function t(t,e,i,a){void 0===t&&(t=null),void 0===e&&(e=null),void 0===i&&(i=r),void 0===a&&(a=s),this._instanceId=a.id?a.id:e.id,this._triggerEl=t,this._targetEl=e,this._options=n(n({},r),i),this._initialized=!1,this.init(),o.default.addInstance("CopyClipboard",this,this._instanceId,a.override)}return t.prototype.init=function(){var t=this;this._targetEl&&this._triggerEl&&!this._initialized&&(this._triggerElClickHandler=function(){t.copy()},this._triggerEl&&this._triggerEl.addEventListener("click",this._triggerElClickHandler),this._initialized=!0)},t.prototype.destroy=function(){this._triggerEl&&this._targetEl&&this._initialized&&(this._triggerEl&&this._triggerEl.removeEventListener("click",this._triggerElClickHandler),this._initialized=!1)},t.prototype.removeInstance=function(){o.default.removeInstance("CopyClipboard",this._instanceId)},t.prototype.destroyAndRemoveInstance=function(){this.destroy(),this.removeInstance()},t.prototype.getTargetValue=function(){return"input"===this._options.contentType?this._targetEl.value:"innerHTML"===this._options.contentType?this._targetEl.innerHTML:"textContent"===this._options.contentType?this._targetEl.textContent.replace(/\s+/g," ").trim():void 0},t.prototype.copy=function(){var t=this.getTargetValue();this._options.htmlEntities&&(t=this.decodeHTML(t));var e=document.createElement("textarea");return e.value=t,document.body.appendChild(e),e.select(),document.execCommand("copy"),document.body.removeChild(e),this._options.onCopy(this),t},t.prototype.decodeHTML=function(t){var e=document.createElement("textarea");return e.innerHTML=t,e.textContent},t.prototype.updateOnCopyCallback=function(t){this._options.onCopy=t},t}();function c(){document.querySelectorAll("[data-copy-to-clipboard-target]").forEach((function(t){var e=t.getAttribute("data-copy-to-clipboard-target"),i=document.getElementById(e),n=t.getAttribute("data-copy-to-clipboard-content-type"),s=t.getAttribute("data-copy-to-clipboard-html-entities");i?o.default.instanceExists("CopyClipboard",i.getAttribute("id"))||new a(t,i,{htmlEntities:!(!s||"true"!==s)||r.htmlEntities,contentType:n||r.contentType}):console.error('The target element with id "'.concat(e,'" does not exist. Please check the data-copy-to-clipboard-target attribute.'))}))}e.initCopyClipboards=c,"undefined"!=typeof window&&(window.CopyClipboard=a,window.initClipboards=c),e.default=a},922:function(t,e,i){var n=this&&this.__assign||function(){return n=Object.assign||function(t){for(var e,i=1,n=arguments.length;i<n;i++)for(var o in e=arguments[i])Object.prototype.hasOwnProperty.call(e,o)&&(t[o]=e[o]);return t},n.apply(this,arguments)};Object.defineProperty(e,"__esModule",{value:!0}),e.initCollapses=void 0;var o=i(423),r={onCollapse:function(){},onExpand:function(){},onToggle:function(){}},s={id:null,override:!0},a=function(){function t(t,e,i,a){void 0===t&&(t=null),void 0===e&&(e=null),void 0===i&&(i=r),void 0===a&&(a=s),this._instanceId=a.id?a.id:t.id,this._targetEl=t,this._triggerEl=e,this._options=n(n({},r),i),this._visible=!1,this._initialized=!1,this.init(),o.default.addInstance("Collapse",this,this._instanceId,a.override)}return t.prototype.init=function(){var t=this;this._triggerEl&&this._targetEl&&!this._initialized&&(this._triggerEl.hasAttribute("aria-expanded")?this._visible="true"===this._triggerEl.getAttribute("aria-expanded"):this._visible=!this._targetEl.classList.contains("hidden"),this._clickHandler=function(){t.toggle()},this._triggerEl.addEventListener("click",this._clickHandler),this._initialized=!0)},t.prototype.destroy=function(){this._triggerEl&&this._initialized&&(this._triggerEl.removeEventListener("click",this._clickHandler),this._initialized=!1)},t.prototype.removeInstance=function(){o.default.removeInstance("Collapse",this._instanceId)},t.prototype.destroyAndRemoveInstance=function(){this.destroy(),this.removeInstance()},t.prototype.collapse=function(){this._targetEl.classList.add("hidden"),this._triggerEl&&this._triggerEl.setAttribute("aria-expanded","false"),this._visible=!1,this._options.onCollapse(this)},t.prototype.expand=function(){this._targetEl.classList.remove("hidden"),this._triggerEl&&this._triggerEl.setAttribute("aria-expanded","true"),this._visible=!0,this._options.onExpand(this)},t.prototype.toggle=function(){this._visible?this.collapse():this.expand(),this._options.onToggle(this)},t.prototype.updateOnCollapse=function(t){this._options.onCollapse=t},t.prototype.updateOnExpand=function(t){this._options.onExpand=t},t.prototype.updateOnToggle=function(t){this._options.onToggle=t},t}();function c(){document.querySelectorAll("[data-collapse-toggle]").forEach((function(t){var e=t.getAttribute("data-collapse-toggle"),i=document.getElementById(e);i?o.default.instanceExists("Collapse",i.getAttribute("id"))?new a(i,t,{},{id:i.getAttribute("id")+"_"+o.default._generateRandomId()}):new a(i,t):console.error('The target element with id "'.concat(e,'" does not exist. Please check the data-collapse-toggle attribute.'))}))}e.initCollapses=c,"undefined"!=typeof window&&(window.Collapse=a,window.initCollapses=c),e.default=a},556:function(t,e,i){var n=this&&this.__assign||function(){return n=Object.assign||function(t){for(var e,i=1,n=arguments.length;i<n;i++)for(var o in e=arguments[i])Object.prototype.hasOwnProperty.call(e,o)&&(t[o]=e[o]);return t},n.apply(this,arguments)};Object.defineProperty(e,"__esModule",{value:!0}),e.initDials=void 0;var o=i(423),r={triggerType:"hover",onShow:function(){},onHide:function(){},onToggle:function(){}},s={id:null,override:!0},a=function(){function t(t,e,i,a,c){void 0===t&&(t=null),void 0===e&&(e=null),void 0===i&&(i=null),void 0===a&&(a=r),void 0===c&&(c=s),this._instanceId=c.id?c.id:i.id,this._parentEl=t,this._triggerEl=e,this._targetEl=i,this._options=n(n({},r),a),this._visible=!1,this._initialized=!1,this.init(),o.default.addInstance("Dial",this,this._instanceId,c.override)}return t.prototype.init=function(){var t=this;if(this._triggerEl&&this._targetEl&&!this._initialized){var e=this._getTriggerEventTypes(this._options.triggerType);this._showEventHandler=function(){t.show()},e.showEvents.forEach((function(e){t._triggerEl.addEventListener(e,t._showEventHandler),t._targetEl.addEventListener(e,t._showEventHandler)})),this._hideEventHandler=function(){t._parentEl.matches(":hover")||t.hide()},e.hideEvents.forEach((function(e){t._parentEl.addEventListener(e,t._hideEventHandler)})),this._initialized=!0}},t.prototype.destroy=function(){var t=this;if(this._initialized){var e=this._getTriggerEventTypes(this._options.triggerType);e.showEvents.forEach((function(e){t._triggerEl.removeEventListener(e,t._showEventHandler),t._targetEl.removeEventListener(e,t._showEventHandler)})),e.hideEvents.forEach((function(e){t._parentEl.removeEventListener(e,t._hideEventHandler)})),this._initialized=!1}},t.prototype.removeInstance=function(){o.default.removeInstance("Dial",this._instanceId)},t.prototype.destroyAndRemoveInstance=function(){this.destroy(),this.removeInstance()},t.prototype.hide=function(){this._targetEl.classList.add("hidden"),this._triggerEl&&this._triggerEl.setAttribute("aria-expanded","false"),this._visible=!1,this._options.onHide(this)},t.prototype.show=function(){this._targetEl.classList.remove("hidden"),this._triggerEl&&this._triggerEl.setAttribute("aria-expanded","true"),this._visible=!0,this._options.onShow(this)},t.prototype.toggle=function(){this._visible?this.hide():this.show()},t.prototype.isHidden=function(){return!this._visible},t.prototype.isVisible=function(){return this._visible},t.prototype._getTriggerEventTypes=function(t){switch(t){case"hover":default:return{showEvents:["mouseenter","focus"],hideEvents:["mouseleave","blur"]};case"click":return{showEvents:["click","focus"],hideEvents:["focusout","blur"]};case"none":return{showEvents:[],hideEvents:[]}}},t.prototype.updateOnShow=function(t){this._options.onShow=t},t.prototype.updateOnHide=function(t){this._options.onHide=t},t.prototype.updateOnToggle=function(t){this._options.onToggle=t},t}();function c(){document.querySelectorAll("[data-dial-init]").forEach((function(t){var e=t.querySelector("[data-dial-toggle]");if(e){var i=e.getAttribute("data-dial-toggle"),n=document.getElementById(i);if(n){var o=e.getAttribute("data-dial-trigger");new a(t,e,n,{triggerType:o||r.triggerType})}else console.error("Dial with id ".concat(i," does not exist. Are you sure that the data-dial-toggle attribute points to the correct modal id?"))}else console.error("Dial with id ".concat(t.id," does not have a trigger element. Are you sure that the data-dial-toggle attribute exists?"))}))}e.initDials=c,"undefined"!=typeof window&&(window.Dial=a,window.initDials=c),e.default=a},791:function(t,e,i){var n=this&&this.__assign||function(){return n=Object.assign||function(t){for(var e,i=1,n=arguments.length;i<n;i++)for(var o in e=arguments[i])Object.prototype.hasOwnProperty.call(e,o)&&(t[o]=e[o]);return t},n.apply(this,arguments)};Object.defineProperty(e,"__esModule",{value:!0}),e.initDismisses=void 0;var o=i(423),r={transition:"transition-opacity",duration:300,timing:"ease-out",onHide:function(){}},s={id:null,override:!0},a=function(){function t(t,e,i,a){void 0===t&&(t=null),void 0===e&&(e=null),void 0===i&&(i=r),void 0===a&&(a=s),this._instanceId=a.id?a.id:t.id,this._targetEl=t,this._triggerEl=e,this._options=n(n({},r),i),this._initialized=!1,this.init(),o.default.addInstance("Dismiss",this,this._instanceId,a.override)}return t.prototype.init=function(){var t=this;this._triggerEl&&this._targetEl&&!this._initialized&&(this._clickHandler=function(){t.hide()},this._triggerEl.addEventListener("click",this._clickHandler),this._initialized=!0)},t.prototype.destroy=function(){this._triggerEl&&this._initialized&&(this._triggerEl.removeEventListener("click",this._clickHandler),this._initialized=!1)},t.prototype.removeInstance=function(){o.default.removeInstance("Dismiss",this._instanceId)},t.prototype.destroyAndRemoveInstance=function(){this.destroy(),this.removeInstance()},t.prototype.hide=function(){var t=this;this._targetEl.classList.add(this._options.transition,"duration-".concat(this._options.duration),this._options.timing,"opacity-0"),setTimeout((function(){t._targetEl.classList.add("hidden")}),this._options.duration),this._options.onHide(this,this._targetEl)},t.prototype.updateOnHide=function(t){this._options.onHide=t},t}();function c(){document.querySelectorAll("[data-dismiss-target]").forEach((function(t){var e=t.getAttribute("data-dismiss-target"),i=document.querySelector(e);i?new a(i,t):console.error('The dismiss element with id "'.concat(e,'" does not exist. Please check the data-dismiss-target attribute.'))}))}e.initDismisses=c,"undefined"!=typeof window&&(window.Dismiss=a,window.initDismisses=c),e.default=a},340:function(t,e,i){var n=this&&this.__assign||function(){return n=Object.assign||function(t){for(var e,i=1,n=arguments.length;i<n;i++)for(var o in e=arguments[i])Object.prototype.hasOwnProperty.call(e,o)&&(t[o]=e[o]);return t},n.apply(this,arguments)};Object.defineProperty(e,"__esModule",{value:!0}),e.initDrawers=void 0;var o=i(423),r={placement:"left",bodyScrolling:!1,backdrop:!0,edge:!1,edgeOffset:"bottom-[60px]",backdropClasses:"bg-gray-900/50 dark:bg-gray-900/80 fixed inset-0 z-30",onShow:function(){},onHide:function(){},onToggle:function(){}},s={id:null,override:!0},a=function(){function t(t,e,i){void 0===t&&(t=null),void 0===e&&(e=r),void 0===i&&(i=s),this._eventListenerInstances=[],this._instanceId=i.id?i.id:t.id,this._targetEl=t,this._options=n(n({},r),e),this._visible=!1,this._initialized=!1,this.init(),o.default.addInstance("Drawer",this,this._instanceId,i.override)}return t.prototype.init=function(){var t=this;this._targetEl&&!this._initialized&&(this._targetEl.setAttribute("aria-hidden","true"),this._targetEl.classList.add("transition-transform"),this._getPlacementClasses(this._options.placement).base.map((function(e){t._targetEl.classList.add(e)})),this._handleEscapeKey=function(e){"Escape"===e.key&&t.isVisible()&&t.hide()},document.addEventListener("keydown",this._handleEscapeKey),this._initialized=!0)},t.prototype.destroy=function(){this._initialized&&(this.removeAllEventListenerInstances(),this._destroyBackdropEl(),document.removeEventListener("keydown",this._handleEscapeKey),this._initialized=!1)},t.prototype.removeInstance=function(){o.default.removeInstance("Drawer",this._instanceId)},t.prototype.destroyAndRemoveInstance=function(){this.destroy(),this.removeInstance()},t.prototype.hide=function(){var t=this;this._options.edge?(this._getPlacementClasses(this._options.placement+"-edge").active.map((function(e){t._targetEl.classList.remove(e)})),this._getPlacementClasses(this._options.placement+"-edge").inactive.map((function(e){t._targetEl.classList.add(e)}))):(this._getPlacementClasses(this._options.placement).active.map((function(e){t._targetEl.classList.remove(e)})),this._getPlacementClasses(this._options.placement).inactive.map((function(e){t._targetEl.classList.add(e)}))),this._targetEl.setAttribute("aria-hidden","true"),this._targetEl.removeAttribute("aria-modal"),this._targetEl.removeAttribute("role"),this._options.bodyScrolling||document.body.classList.remove("overflow-hidden"),this._options.backdrop&&this._destroyBackdropEl(),this._visible=!1,this._options.onHide(this)},t.prototype.show=function(){var t=this;this._options.edge?(this._getPlacementClasses(this._options.placement+"-edge").active.map((function(e){t._targetEl.classList.add(e)})),this._getPlacementClasses(this._options.placement+"-edge").inactive.map((function(e){t._targetEl.classList.remove(e)}))):(this._getPlacementClasses(this._options.placement).active.map((function(e){t._targetEl.classList.add(e)})),this._getPlacementClasses(this._options.placement).inactive.map((function(e){t._targetEl.classList.remove(e)}))),this._targetEl.setAttribute("aria-modal","true"),this._targetEl.setAttribute("role","dialog"),this._targetEl.removeAttribute("aria-hidden"),this._options.bodyScrolling||document.body.classList.add("overflow-hidden"),this._options.backdrop&&this._createBackdrop(),this._visible=!0,this._options.onShow(this)},t.prototype.toggle=function(){this.isVisible()?this.hide():this.show()},t.prototype._createBackdrop=function(){var t,e=this;if(!this._visible){var i=document.createElement("div");i.setAttribute("drawer-backdrop",""),(t=i.classList).add.apply(t,this._options.backdropClasses.split(" ")),document.querySelector("body").append(i),i.addEventListener("click",(function(){e.hide()}))}},t.prototype._destroyBackdropEl=function(){this._visible&&null!==document.querySelector("[drawer-backdrop]")&&document.querySelector("[drawer-backdrop]").remove()},t.prototype._getPlacementClasses=function(t){switch(t){case"top":return{base:["top-0","left-0","right-0"],active:["transform-none"],inactive:["-translate-y-full"]};case"right":return{base:["right-0","top-0"],active:["transform-none"],inactive:["translate-x-full"]};case"bottom":return{base:["bottom-0","left-0","right-0"],active:["transform-none"],inactive:["translate-y-full"]};case"left":default:return{base:["left-0","top-0"],active:["transform-none"],inactive:["-translate-x-full"]};case"bottom-edge":return{base:["left-0","top-0"],active:["transform-none"],inactive:["translate-y-full",this._options.edgeOffset]}}},t.prototype.isHidden=function(){return!this._visible},t.prototype.isVisible=function(){return this._visible},t.prototype.addEventListenerInstance=function(t,e,i){this._eventListenerInstances.push({element:t,type:e,handler:i})},t.prototype.removeAllEventListenerInstances=function(){this._eventListenerInstances.map((function(t){t.element.removeEventListener(t.type,t.handler)})),this._eventListenerInstances=[]},t.prototype.getAllEventListenerInstances=function(){return this._eventListenerInstances},t.prototype.updateOnShow=function(t){this._options.onShow=t},t.prototype.updateOnHide=function(t){this._options.onHide=t},t.prototype.updateOnToggle=function(t){this._options.onToggle=t},t}();function c(){document.querySelectorAll("[data-drawer-target]").forEach((function(t){var e=t.getAttribute("data-drawer-target"),i=document.getElementById(e);if(i){var n=t.getAttribute("data-drawer-placement"),o=t.getAttribute("data-drawer-body-scrolling"),s=t.getAttribute("data-drawer-backdrop"),c=t.getAttribute("data-drawer-edge"),d=t.getAttribute("data-drawer-edge-offset");new a(i,{placement:n||r.placement,bodyScrolling:o?"true"===o:r.bodyScrolling,backdrop:s?"true"===s:r.backdrop,edge:c?"true"===c:r.edge,edgeOffset:d||r.edgeOffset})}else console.error("Drawer with id ".concat(e," not found. Are you sure that the data-drawer-target attribute points to the correct drawer id?"))})),document.querySelectorAll("[data-drawer-toggle]").forEach((function(t){var e=t.getAttribute("data-drawer-toggle");if(document.getElementById(e)){var i=o.default.getInstance("Drawer",e);if(i){var n=function(){i.toggle()};t.addEventListener("click",n),i.addEventListenerInstance(t,"click",n)}else console.error("Drawer with id ".concat(e," has not been initialized. Please initialize it using the data-drawer-target attribute."))}else console.error("Drawer with id ".concat(e," not found. Are you sure that the data-drawer-target attribute points to the correct drawer id?"))})),document.querySelectorAll("[data-drawer-dismiss], [data-drawer-hide]").forEach((function(t){var e=t.getAttribute("data-drawer-dismiss")?t.getAttribute("data-drawer-dismiss"):t.getAttribute("data-drawer-hide");if(document.getElementById(e)){var i=o.default.getInstance("Drawer",e);if(i){var n=function(){i.hide()};t.addEventListener("click",n),i.addEventListenerInstance(t,"click",n)}else console.error("Drawer with id ".concat(e," has not been initialized. Please initialize it using the data-drawer-target attribute."))}else console.error("Drawer with id ".concat(e," not found. Are you sure that the data-drawer-target attribute points to the correct drawer id"))})),document.querySelectorAll("[data-drawer-show]").forEach((function(t){var e=t.getAttribute("data-drawer-show");if(document.getElementById(e)){var i=o.default.getInstance("Drawer",e);if(i){var n=function(){i.show()};t.addEventListener("click",n),i.addEventListenerInstance(t,"click",n)}else console.error("Drawer with id ".concat(e," has not been initialized. Please initialize it using the data-drawer-target attribute."))}else console.error("Drawer with id ".concat(e," not found. Are you sure that the data-drawer-target attribute points to the correct drawer id?"))}))}e.initDrawers=c,"undefined"!=typeof window&&(window.Drawer=a,window.initDrawers=c),e.default=a},316:function(t,e,i){var n=this&&this.__assign||function(){return n=Object.assign||function(t){for(var e,i=1,n=arguments.length;i<n;i++)for(var o in e=arguments[i])Object.prototype.hasOwnProperty.call(e,o)&&(t[o]=e[o]);return t},n.apply(this,arguments)},o=this&&this.__spreadArray||function(t,e,i){if(i||2===arguments.length)for(var n,o=0,r=e.length;o<r;o++)!n&&o in e||(n||(n=Array.prototype.slice.call(e,0,o)),n[o]=e[o]);return t.concat(n||Array.prototype.slice.call(e))};Object.defineProperty(e,"__esModule",{value:!0}),e.initDropdowns=void 0;var r=i(853),s=i(423),a={placement:"bottom",triggerType:"click",offsetSkidding:0,offsetDistance:10,delay:300,ignoreClickOutsideClass:!1,onShow:function(){},onHide:function(){},onToggle:function(){}},c={id:null,override:!0},d=function(){function t(t,e,i,o){void 0===t&&(t=null),void 0===e&&(e=null),void 0===i&&(i=a),void 0===o&&(o=c),this._instanceId=o.id?o.id:t.id,this._targetEl=t,this._triggerEl=e,this._options=n(n({},a),i),this._popperInstance=null,this._visible=!1,this._initialized=!1,this.init(),s.default.addInstance("Dropdown",this,this._instanceId,o.override)}return t.prototype.init=function(){this._triggerEl&&this._targetEl&&!this._initialized&&(this._popperInstance=this._createPopperInstance(),this._setupEventListeners(),this._initialized=!0)},t.prototype.destroy=function(){var t=this,e=this._getTriggerEvents();"click"===this._options.triggerType&&e.showEvents.forEach((function(e){t._triggerEl.removeEventListener(e,t._clickHandler)})),"hover"===this._options.triggerType&&(e.showEvents.forEach((function(e){t._triggerEl.removeEventListener(e,t._hoverShowTriggerElHandler),t._targetEl.removeEventListener(e,t._hoverShowTargetElHandler)})),e.hideEvents.forEach((function(e){t._triggerEl.removeEventListener(e,t._hoverHideHandler),t._targetEl.removeEventListener(e,t._hoverHideHandler)}))),this._popperInstance.destroy(),this._initialized=!1},t.prototype.removeInstance=function(){s.default.removeInstance("Dropdown",this._instanceId)},t.prototype.destroyAndRemoveInstance=function(){this.destroy(),this.removeInstance()},t.prototype._setupEventListeners=function(){var t=this,e=this._getTriggerEvents();this._clickHandler=function(){t.toggle()},"click"===this._options.triggerType&&e.showEvents.forEach((function(e){t._triggerEl.addEventListener(e,t._clickHandler)})),this._hoverShowTriggerElHandler=function(e){"click"===e.type?t.toggle():setTimeout((function(){t.show()}),t._options.delay)},this._hoverShowTargetElHandler=function(){t.show()},this._hoverHideHandler=function(){setTimeout((function(){t._targetEl.matches(":hover")||t.hide()}),t._options.delay)},"hover"===this._options.triggerType&&(e.showEvents.forEach((function(e){t._triggerEl.addEventListener(e,t._hoverShowTriggerElHandler),t._targetEl.addEventListener(e,t._hoverShowTargetElHandler)})),e.hideEvents.forEach((function(e){t._triggerEl.addEventListener(e,t._hoverHideHandler),t._targetEl.addEventListener(e,t._hoverHideHandler)})))},t.prototype._createPopperInstance=function(){return(0,r.createPopper)(this._triggerEl,this._targetEl,{placement:this._options.placement,modifiers:[{name:"offset",options:{offset:[this._options.offsetSkidding,this._options.offsetDistance]}}]})},t.prototype._setupClickOutsideListener=function(){var t=this;this._clickOutsideEventListener=function(e){t._handleClickOutside(e,t._targetEl)},document.body.addEventListener("click",this._clickOutsideEventListener,!0)},t.prototype._removeClickOutsideListener=function(){document.body.removeEventListener("click",this._clickOutsideEventListener,!0)},t.prototype._handleClickOutside=function(t,e){var i=t.target,n=this._options.ignoreClickOutsideClass,o=!1;n&&document.querySelectorAll(".".concat(n)).forEach((function(t){t.contains(i)&&(o=!0)}));i===e||e.contains(i)||this._triggerEl.contains(i)||o||!this.isVisible()||this.hide()},t.prototype._getTriggerEvents=function(){switch(this._options.triggerType){case"hover":return{showEvents:["mouseenter","click"],hideEvents:["mouseleave"]};case"click":default:return{showEvents:["click"],hideEvents:[]};case"none":return{showEvents:[],hideEvents:[]}}},t.prototype.toggle=function(){this.isVisible()?this.hide():this.show(),this._options.onToggle(this)},t.prototype.isVisible=function(){return this._visible},t.prototype.show=function(){this._targetEl.classList.remove("hidden"),this._targetEl.classList.add("block"),this._popperInstance.setOptions((function(t){return n(n({},t),{modifiers:o(o([],t.modifiers,!0),[{name:"eventListeners",enabled:!0}],!1)})})),this._setupClickOutsideListener(),this._popperInstance.update(),this._visible=!0,this._options.onShow(this)},t.prototype.hide=function(){this._targetEl.classList.remove("block"),this._targetEl.classList.add("hidden"),this._popperInstance.setOptions((function(t){return n(n({},t),{modifiers:o(o([],t.modifiers,!0),[{name:"eventListeners",enabled:!1}],!1)})})),this._visible=!1,this._removeClickOutsideListener(),this._options.onHide(this)},t.prototype.updateOnShow=function(t){this._options.onShow=t},t.prototype.updateOnHide=function(t){this._options.onHide=t},t.prototype.updateOnToggle=function(t){this._options.onToggle=t},t}();function l(){document.querySelectorAll("[data-dropdown-toggle]").forEach((function(t){var e=t.getAttribute("data-dropdown-toggle"),i=document.getElementById(e);if(i){var n=t.getAttribute("data-dropdown-placement"),o=t.getAttribute("data-dropdown-offset-skidding"),r=t.getAttribute("data-dropdown-offset-distance"),s=t.getAttribute("data-dropdown-trigger"),c=t.getAttribute("data-dropdown-delay"),l=t.getAttribute("data-dropdown-ignore-click-outside-class");new d(i,t,{placement:n||a.placement,triggerType:s||a.triggerType,offsetSkidding:o?parseInt(o):a.offsetSkidding,offsetDistance:r?parseInt(r):a.offsetDistance,delay:c?parseInt(c):a.delay,ignoreClickOutsideClass:l||a.ignoreClickOutsideClass})}else console.error('The dropdown element with id "'.concat(e,'" does not exist. Please check the data-dropdown-toggle attribute.'))}))}e.initDropdowns=l,"undefined"!=typeof window&&(window.Dropdown=d,window.initDropdowns=l),e.default=d},311:function(t,e,i){Object.defineProperty(e,"__esModule",{value:!0}),e.initFlowbite=void 0;var n=i(902),o=i(33),r=i(673),s=i(922),a=i(556),c=i(791),d=i(340),l=i(316),u=i(656),p=i(16),h=i(903),f=i(247),v=i(671);function g(){(0,n.initAccordions)(),(0,s.initCollapses)(),(0,o.initCarousels)(),(0,c.initDismisses)(),(0,l.initDropdowns)(),(0,p.initModals)(),(0,d.initDrawers)(),(0,f.initTabs)(),(0,v.initTooltips)(),(0,h.initPopovers)(),(0,a.initDials)(),(0,u.initInputCounters)(),(0,r.initCopyClipboards)()}e.initFlowbite=g,"undefined"!=typeof window&&(window.initFlowbite=g)},656:function(t,e,i){var n=this&&this.__assign||function(){return n=Object.assign||function(t){for(var e,i=1,n=arguments.length;i<n;i++)for(var o in e=arguments[i])Object.prototype.hasOwnProperty.call(e,o)&&(t[o]=e[o]);return t},n.apply(this,arguments)};Object.defineProperty(e,"__esModule",{value:!0}),e.initInputCounters=void 0;var o=i(423),r={minValue:null,maxValue:null,onIncrement:function(){},onDecrement:function(){}},s={id:null,override:!0},a=function(){function t(t,e,i,a,c){void 0===t&&(t=null),void 0===e&&(e=null),void 0===i&&(i=null),void 0===a&&(a=r),void 0===c&&(c=s),this._instanceId=c.id?c.id:t.id,this._targetEl=t,this._incrementEl=e,this._decrementEl=i,this._options=n(n({},r),a),this._initialized=!1,this.init(),o.default.addInstance("InputCounter",this,this._instanceId,c.override)}return t.prototype.init=function(){var t=this;this._targetEl&&!this._initialized&&(this._inputHandler=function(e){var i=e.target;/^\d*$/.test(i.value)||(i.value=i.value.replace(/[^\d]/g,"")),null!==t._options.maxValue&&parseInt(i.value)>t._options.maxValue&&(i.value=t._options.maxValue.toString()),null!==t._options.minValue&&parseInt(i.value)<t._options.minValue&&(i.value=t._options.minValue.toString())},this._incrementClickHandler=function(){t.increment()},this._decrementClickHandler=function(){t.decrement()},this._targetEl.addEventListener("input",this._inputHandler),this._incrementEl&&this._incrementEl.addEventListener("click",this._incrementClickHandler),this._decrementEl&&this._decrementEl.addEventListener("click",this._decrementClickHandler),this._initialized=!0)},t.prototype.destroy=function(){this._targetEl&&this._initialized&&(this._targetEl.removeEventListener("input",this._inputHandler),this._incrementEl&&this._incrementEl.removeEventListener("click",this._incrementClickHandler),this._decrementEl&&this._decrementEl.removeEventListener("click",this._decrementClickHandler),this._initialized=!1)},t.prototype.removeInstance=function(){o.default.removeInstance("InputCounter",this._instanceId)},t.prototype.destroyAndRemoveInstance=function(){this.destroy(),this.removeInstance()},t.prototype.getCurrentValue=function(){return parseInt(this._targetEl.value)||0},t.prototype.increment=function(){null!==this._options.maxValue&&this.getCurrentValue()>=this._options.maxValue||(this._targetEl.value=(this.getCurrentValue()+1).toString(),this._options.onIncrement(this))},t.prototype.decrement=function(){null!==this._options.minValue&&this.getCurrentValue()<=this._options.minValue||(this._targetEl.value=(this.getCurrentValue()-1).toString(),this._options.onDecrement(this))},t.prototype.updateOnIncrement=function(t){this._options.onIncrement=t},t.prototype.updateOnDecrement=function(t){this._options.onDecrement=t},t}();function c(){document.querySelectorAll("[data-input-counter]").forEach((function(t){var e=t.id,i=document.querySelector('[data-input-counter-increment="'+e+'"]'),n=document.querySelector('[data-input-counter-decrement="'+e+'"]'),r=t.getAttribute("data-input-counter-min"),s=t.getAttribute("data-input-counter-max");t?o.default.instanceExists("InputCounter",t.getAttribute("id"))||new a(t,i||null,n||null,{minValue:r?parseInt(r):null,maxValue:s?parseInt(s):null}):console.error('The target element with id "'.concat(e,'" does not exist. Please check the data-input-counter attribute.'))}))}e.initInputCounters=c,"undefined"!=typeof window&&(window.InputCounter=a,window.initInputCounters=c),e.default=a},16:function(t,e,i){var n=this&&this.__assign||function(){return n=Object.assign||function(t){for(var e,i=1,n=arguments.length;i<n;i++)for(var o in e=arguments[i])Object.prototype.hasOwnProperty.call(e,o)&&(t[o]=e[o]);return t},n.apply(this,arguments)};Object.defineProperty(e,"__esModule",{value:!0}),e.initModals=void 0;var o=i(423),r={placement:"center",backdropClasses:"bg-gray-900/50 dark:bg-gray-900/80 fixed inset-0 z-40",backdrop:"dynamic",closable:!0,onHide:function(){},onShow:function(){},onToggle:function(){}},s={id:null,override:!0},a=function(){function t(t,e,i){void 0===t&&(t=null),void 0===e&&(e=r),void 0===i&&(i=s),this._eventListenerInstances=[],this._instanceId=i.id?i.id:t.id,this._targetEl=t,this._options=n(n({},r),e),this._isHidden=!0,this._backdropEl=null,this._initialized=!1,this.init(),o.default.addInstance("Modal",this,this._instanceId,i.override)}return t.prototype.init=function(){var t=this;this._targetEl&&!this._initialized&&(this._getPlacementClasses().map((function(e){t._targetEl.classList.add(e)})),this._initialized=!0)},t.prototype.destroy=function(){this._initialized&&(this.removeAllEventListenerInstances(),this._destroyBackdropEl(),this._initialized=!1)},t.prototype.removeInstance=function(){o.default.removeInstance("Modal",this._instanceId)},t.prototype.destroyAndRemoveInstance=function(){this.destroy(),this.removeInstance()},t.prototype._createBackdrop=function(){var t;if(this._isHidden){var e=document.createElement("div");e.setAttribute("modal-backdrop",""),(t=e.classList).add.apply(t,this._options.backdropClasses.split(" ")),document.querySelector("body").append(e),this._backdropEl=e}},t.prototype._destroyBackdropEl=function(){this._isHidden||document.querySelector("[modal-backdrop]").remove()},t.prototype._setupModalCloseEventListeners=function(){var t=this;"dynamic"===this._options.backdrop&&(this._clickOutsideEventListener=function(e){t._handleOutsideClick(e.target)},this._targetEl.addEventListener("click",this._clickOutsideEventListener,!0)),this._keydownEventListener=function(e){"Escape"===e.key&&t.hide()},document.body.addEventListener("keydown",this._keydownEventListener,!0)},t.prototype._removeModalCloseEventListeners=function(){"dynamic"===this._options.backdrop&&this._targetEl.removeEventListener("click",this._clickOutsideEventListener,!0),document.body.removeEventListener("keydown",this._keydownEventListener,!0)},t.prototype._handleOutsideClick=function(t){(t===this._targetEl||t===this._backdropEl&&this.isVisible())&&this.hide()},t.prototype._getPlacementClasses=function(){switch(this._options.placement){case"top-left":return["justify-start","items-start"];case"top-center":return["justify-center","items-start"];case"top-right":return["justify-end","items-start"];case"center-left":return["justify-start","items-center"];case"center":default:return["justify-center","items-center"];case"center-right":return["justify-end","items-center"];case"bottom-left":return["justify-start","items-end"];case"bottom-center":return["justify-center","items-end"];case"bottom-right":return["justify-end","items-end"]}},t.prototype.toggle=function(){this._isHidden?this.show():this.hide(),this._options.onToggle(this)},t.prototype.show=function(){this.isHidden&&(this._targetEl.classList.add("flex"),this._targetEl.classList.remove("hidden"),this._targetEl.setAttribute("aria-modal","true"),this._targetEl.setAttribute("role","dialog"),this._targetEl.removeAttribute("aria-hidden"),this._createBackdrop(),this._isHidden=!1,this._options.closable&&this._setupModalCloseEventListeners(),document.body.classList.add("overflow-hidden"),this._options.onShow(this))},t.prototype.hide=function(){this.isVisible&&(this._targetEl.classList.add("hidden"),this._targetEl.classList.remove("flex"),this._targetEl.setAttribute("aria-hidden","true"),this._targetEl.removeAttribute("aria-modal"),this._targetEl.removeAttribute("role"),this._destroyBackdropEl(),this._isHidden=!0,document.body.classList.remove("overflow-hidden"),this._options.closable&&this._removeModalCloseEventListeners(),this._options.onHide(this))},t.prototype.isVisible=function(){return!this._isHidden},t.prototype.isHidden=function(){return this._isHidden},t.prototype.addEventListenerInstance=function(t,e,i){this._eventListenerInstances.push({element:t,type:e,handler:i})},t.prototype.removeAllEventListenerInstances=function(){this._eventListenerInstances.map((function(t){t.element.removeEventListener(t.type,t.handler)})),this._eventListenerInstances=[]},t.prototype.getAllEventListenerInstances=function(){return this._eventListenerInstances},t.prototype.updateOnShow=function(t){this._options.onShow=t},t.prototype.updateOnHide=function(t){this._options.onHide=t},t.prototype.updateOnToggle=function(t){this._options.onToggle=t},t}();function c(){document.querySelectorAll("[data-modal-target]").forEach((function(t){var e=t.getAttribute("data-modal-target"),i=document.getElementById(e);if(i){var n=i.getAttribute("data-modal-placement"),o=i.getAttribute("data-modal-backdrop");new a(i,{placement:n||r.placement,backdrop:o||r.backdrop})}else console.error("Modal with id ".concat(e," does not exist. Are you sure that the data-modal-target attribute points to the correct modal id?."))})),document.querySelectorAll("[data-modal-toggle]").forEach((function(t){var e=t.getAttribute("data-modal-toggle");if(document.getElementById(e)){var i=o.default.getInstance("Modal",e);if(i){var n=function(){i.toggle()};t.addEventListener("click",n),i.addEventListenerInstance(t,"click",n)}else console.error("Modal with id ".concat(e," has not been initialized. Please initialize it using the data-modal-target attribute."))}else console.error("Modal with id ".concat(e," does not exist. Are you sure that the data-modal-toggle attribute points to the correct modal id?"))})),document.querySelectorAll("[data-modal-show]").forEach((function(t){var e=t.getAttribute("data-modal-show");if(document.getElementById(e)){var i=o.default.getInstance("Modal",e);if(i){var n=function(){i.show()};t.addEventListener("click",n),i.addEventListenerInstance(t,"click",n)}else console.error("Modal with id ".concat(e," has not been initialized. Please initialize it using the data-modal-target attribute."))}else console.error("Modal with id ".concat(e," does not exist. Are you sure that the data-modal-show attribute points to the correct modal id?"))})),document.querySelectorAll("[data-modal-hide]").forEach((function(t){var e=t.getAttribute("data-modal-hide");if(document.getElementById(e)){var i=o.default.getInstance("Modal",e);if(i){var n=function(){i.hide()};t.addEventListener("click",n),i.addEventListenerInstance(t,"click",n)}else console.error("Modal with id ".concat(e," has not been initialized. Please initialize it using the data-modal-target attribute."))}else console.error("Modal with id ".concat(e," does not exist. Are you sure that the data-modal-hide attribute points to the correct modal id?"))}))}e.initModals=c,"undefined"!=typeof window&&(window.Modal=a,window.initModals=c),e.default=a},903:function(t,e,i){var n=this&&this.__assign||function(){return n=Object.assign||function(t){for(var e,i=1,n=arguments.length;i<n;i++)for(var o in e=arguments[i])Object.prototype.hasOwnProperty.call(e,o)&&(t[o]=e[o]);return t},n.apply(this,arguments)},o=this&&this.__spreadArray||function(t,e,i){if(i||2===arguments.length)for(var n,o=0,r=e.length;o<r;o++)!n&&o in e||(n||(n=Array.prototype.slice.call(e,0,o)),n[o]=e[o]);return t.concat(n||Array.prototype.slice.call(e))};Object.defineProperty(e,"__esModule",{value:!0}),e.initPopovers=void 0;var r=i(853),s=i(423),a={placement:"top",offset:10,triggerType:"hover",onShow:function(){},onHide:function(){},onToggle:function(){}},c={id:null,override:!0},d=function(){function t(t,e,i,o){void 0===t&&(t=null),void 0===e&&(e=null),void 0===i&&(i=a),void 0===o&&(o=c),this._instanceId=o.id?o.id:t.id,this._targetEl=t,this._triggerEl=e,this._options=n(n({},a),i),this._popperInstance=null,this._visible=!1,this._initialized=!1,this.init(),s.default.addInstance("Popover",this,o.id?o.id:this._targetEl.id,o.override)}return t.prototype.init=function(){this._triggerEl&&this._targetEl&&!this._initialized&&(this._setupEventListeners(),this._popperInstance=this._createPopperInstance(),this._initialized=!0)},t.prototype.destroy=function(){var t=this;if(this._initialized){var e=this._getTriggerEvents();e.showEvents.forEach((function(e){t._triggerEl.removeEventListener(e,t._showHandler),t._targetEl.removeEventListener(e,t._showHandler)})),e.hideEvents.forEach((function(e){t._triggerEl.removeEventListener(e,t._hideHandler),t._targetEl.removeEventListener(e,t._hideHandler)})),this._removeKeydownListener(),this._removeClickOutsideListener(),this._popperInstance&&this._popperInstance.destroy(),this._initialized=!1}},t.prototype.removeInstance=function(){s.default.removeInstance("Popover",this._instanceId)},t.prototype.destroyAndRemoveInstance=function(){this.destroy(),this.removeInstance()},t.prototype._setupEventListeners=function(){var t=this,e=this._getTriggerEvents();this._showHandler=function(){t.show()},this._hideHandler=function(){setTimeout((function(){t._targetEl.matches(":hover")||t.hide()}),100)},e.showEvents.forEach((function(e){t._triggerEl.addEventListener(e,t._showHandler),t._targetEl.addEventListener(e,t._showHandler)})),e.hideEvents.forEach((function(e){t._triggerEl.addEventListener(e,t._hideHandler),t._targetEl.addEventListener(e,t._hideHandler)}))},t.prototype._createPopperInstance=function(){return(0,r.createPopper)(this._triggerEl,this._targetEl,{placement:this._options.placement,modifiers:[{name:"offset",options:{offset:[0,this._options.offset]}}]})},t.prototype._getTriggerEvents=function(){switch(this._options.triggerType){case"hover":default:return{showEvents:["mouseenter","focus"],hideEvents:["mouseleave","blur"]};case"click":return{showEvents:["click","focus"],hideEvents:["focusout","blur"]};case"none":return{showEvents:[],hideEvents:[]}}},t.prototype._setupKeydownListener=function(){var t=this;this._keydownEventListener=function(e){"Escape"===e.key&&t.hide()},document.body.addEventListener("keydown",this._keydownEventListener,!0)},t.prototype._removeKeydownListener=function(){document.body.removeEventListener("keydown",this._keydownEventListener,!0)},t.prototype._setupClickOutsideListener=function(){var t=this;this._clickOutsideEventListener=function(e){t._handleClickOutside(e,t._targetEl)},document.body.addEventListener("click",this._clickOutsideEventListener,!0)},t.prototype._removeClickOutsideListener=function(){document.body.removeEventListener("click",this._clickOutsideEventListener,!0)},t.prototype._handleClickOutside=function(t,e){var i=t.target;i===e||e.contains(i)||this._triggerEl.contains(i)||!this.isVisible()||this.hide()},t.prototype.isVisible=function(){return this._visible},t.prototype.toggle=function(){this.isVisible()?this.hide():this.show(),this._options.onToggle(this)},t.prototype.show=function(){this._targetEl.classList.remove("opacity-0","invisible"),this._targetEl.classList.add("opacity-100","visible"),this._popperInstance.setOptions((function(t){return n(n({},t),{modifiers:o(o([],t.modifiers,!0),[{name:"eventListeners",enabled:!0}],!1)})})),this._setupClickOutsideListener(),this._setupKeydownListener(),this._popperInstance.update(),this._visible=!0,this._options.onShow(this)},t.prototype.hide=function(){this._targetEl.classList.remove("opacity-100","visible"),this._targetEl.classList.add("opacity-0","invisible"),this._popperInstance.setOptions((function(t){return n(n({},t),{modifiers:o(o([],t.modifiers,!0),[{name:"eventListeners",enabled:!1}],!1)})})),this._removeClickOutsideListener(),this._removeKeydownListener(),this._visible=!1,this._options.onHide(this)},t.prototype.updateOnShow=function(t){this._options.onShow=t},t.prototype.updateOnHide=function(t){this._options.onHide=t},t.prototype.updateOnToggle=function(t){this._options.onToggle=t},t}();function l(){document.querySelectorAll("[data-popover-target]").forEach((function(t){var e=t.getAttribute("data-popover-target"),i=document.getElementById(e);if(i){var n=t.getAttribute("data-popover-trigger"),o=t.getAttribute("data-popover-placement"),r=t.getAttribute("data-popover-offset");new d(i,t,{placement:o||a.placement,offset:r?parseInt(r):a.offset,triggerType:n||a.triggerType})}else console.error('The popover element with id "'.concat(e,'" does not exist. Please check the data-popover-target attribute.'))}))}e.initPopovers=l,"undefined"!=typeof window&&(window.Popover=d,window.initPopovers=l),e.default=d},247:function(t,e,i){var n=this&&this.__assign||function(){return n=Object.assign||function(t){for(var e,i=1,n=arguments.length;i<n;i++)for(var o in e=arguments[i])Object.prototype.hasOwnProperty.call(e,o)&&(t[o]=e[o]);return t},n.apply(this,arguments)};Object.defineProperty(e,"__esModule",{value:!0}),e.initTabs=void 0;var o=i(423),r={defaultTabId:null,activeClasses:"text-blue-600 hover:text-blue-600 dark:text-blue-500 dark:hover:text-blue-500 border-blue-600 dark:border-blue-500",inactiveClasses:"dark:border-transparent text-gray-500 hover:text-gray-600 dark:text-gray-400 border-gray-100 hover:border-gray-300 dark:border-gray-700 dark:hover:text-gray-300",onShow:function(){}},s={id:null,override:!0},a=function(){function t(t,e,i,a){void 0===t&&(t=null),void 0===e&&(e=[]),void 0===i&&(i=r),void 0===a&&(a=s),this._instanceId=a.id?a.id:t.id,this._tabsEl=t,this._items=e,this._activeTab=i?this.getTab(i.defaultTabId):null,this._options=n(n({},r),i),this._initialized=!1,this.init(),o.default.addInstance("Tabs",this,this._tabsEl.id,!0),o.default.addInstance("Tabs",this,this._instanceId,a.override)}return t.prototype.init=function(){var t=this;this._items.length&&!this._initialized&&(this._activeTab||this.setActiveTab(this._items[0]),this.show(this._activeTab.id,!0),this._items.map((function(e){e.triggerEl.addEventListener("click",(function(i){i.preventDefault(),t.show(e.id)}))})))},t.prototype.destroy=function(){this._initialized&&(this._initialized=!1)},t.prototype.removeInstance=function(){this.destroy(),o.default.removeInstance("Tabs",this._instanceId)},t.prototype.destroyAndRemoveInstance=function(){this.destroy(),this.removeInstance()},t.prototype.getActiveTab=function(){return this._activeTab},t.prototype.setActiveTab=function(t){this._activeTab=t},t.prototype.getTab=function(t){return this._items.filter((function(e){return e.id===t}))[0]},t.prototype.show=function(t,e){var i,n,o=this;void 0===e&&(e=!1);var r=this.getTab(t);(r!==this._activeTab||e)&&(this._items.map((function(t){var e,i;t!==r&&((e=t.triggerEl.classList).remove.apply(e,o._options.activeClasses.split(" ")),(i=t.triggerEl.classList).add.apply(i,o._options.inactiveClasses.split(" ")),t.targetEl.classList.add("hidden"),t.triggerEl.setAttribute("aria-selected","false"))})),(i=r.triggerEl.classList).add.apply(i,this._options.activeClasses.split(" ")),(n=r.triggerEl.classList).remove.apply(n,this._options.inactiveClasses.split(" ")),r.triggerEl.setAttribute("aria-selected","true"),r.targetEl.classList.remove("hidden"),this.setActiveTab(r),this._options.onShow(this,r))},t.prototype.updateOnShow=function(t){this._options.onShow=t},t}();function c(){document.querySelectorAll("[data-tabs-toggle]").forEach((function(t){var e=[],i=t.getAttribute("data-tabs-active-classes"),n=t.getAttribute("data-tabs-inactive-classes"),o=null;t.querySelectorAll('[role="tab"]').forEach((function(t){var i="true"===t.getAttribute("aria-selected"),n={id:t.getAttribute("data-tabs-target"),triggerEl:t,targetEl:document.querySelector(t.getAttribute("data-tabs-target"))};e.push(n),i&&(o=n.id)})),new a(t,e,{defaultTabId:o,activeClasses:i||r.activeClasses,inactiveClasses:n||r.inactiveClasses})}))}e.initTabs=c,"undefined"!=typeof window&&(window.Tabs=a,window.initTabs=c),e.default=a},671:function(t,e,i){var n=this&&this.__assign||function(){return n=Object.assign||function(t){for(var e,i=1,n=arguments.length;i<n;i++)for(var o in e=arguments[i])Object.prototype.hasOwnProperty.call(e,o)&&(t[o]=e[o]);return t},n.apply(this,arguments)},o=this&&this.__spreadArray||function(t,e,i){if(i||2===arguments.length)for(var n,o=0,r=e.length;o<r;o++)!n&&o in e||(n||(n=Array.prototype.slice.call(e,0,o)),n[o]=e[o]);return t.concat(n||Array.prototype.slice.call(e))};Object.defineProperty(e,"__esModule",{value:!0}),e.initTooltips=void 0;var r=i(853),s=i(423),a={placement:"top",triggerType:"hover",onShow:function(){},onHide:function(){},onToggle:function(){}},c={id:null,override:!0},d=function(){function t(t,e,i,o){void 0===t&&(t=null),void 0===e&&(e=null),void 0===i&&(i=a),void 0===o&&(o=c),this._instanceId=o.id?o.id:t.id,this._targetEl=t,this._triggerEl=e,this._options=n(n({},a),i),this._popperInstance=null,this._visible=!1,this._initialized=!1,this.init(),s.default.addInstance("Tooltip",this,this._instanceId,o.override)}return t.prototype.init=function(){this._triggerEl&&this._targetEl&&!this._initialized&&(this._setupEventListeners(),this._popperInstance=this._createPopperInstance(),this._initialized=!0)},t.prototype.destroy=function(){var t=this;if(this._initialized){var e=this._getTriggerEvents();e.showEvents.forEach((function(e){t._triggerEl.removeEventListener(e,t._showHandler)})),e.hideEvents.forEach((function(e){t._triggerEl.removeEventListener(e,t._hideHandler)})),this._removeKeydownListener(),this._removeClickOutsideListener(),this._popperInstance&&this._popperInstance.destroy(),this._initialized=!1}},t.prototype.removeInstance=function(){s.default.removeInstance("Tooltip",this._instanceId)},t.prototype.destroyAndRemoveInstance=function(){this.destroy(),this.removeInstance()},t.prototype._setupEventListeners=function(){var t=this,e=this._getTriggerEvents();this._showHandler=function(){t.show()},this._hideHandler=function(){t.hide()},e.showEvents.forEach((function(e){t._triggerEl.addEventListener(e,t._showHandler)})),e.hideEvents.forEach((function(e){t._triggerEl.addEventListener(e,t._hideHandler)}))},t.prototype._createPopperInstance=function(){return(0,r.createPopper)(this._triggerEl,this._targetEl,{placement:this._options.placement,modifiers:[{name:"offset",options:{offset:[0,8]}}]})},t.prototype._getTriggerEvents=function(){switch(this._options.triggerType){case"hover":default:return{showEvents:["mouseenter","focus"],hideEvents:["mouseleave","blur"]};case"click":return{showEvents:["click","focus"],hideEvents:["focusout","blur"]};case"none":return{showEvents:[],hideEvents:[]}}},t.prototype._setupKeydownListener=function(){var t=this;this._keydownEventListener=function(e){"Escape"===e.key&&t.hide()},document.body.addEventListener("keydown",this._keydownEventListener,!0)},t.prototype._removeKeydownListener=function(){document.body.removeEventListener("keydown",this._keydownEventListener,!0)},t.prototype._setupClickOutsideListener=function(){var t=this;this._clickOutsideEventListener=function(e){t._handleClickOutside(e,t._targetEl)},document.body.addEventListener("click",this._clickOutsideEventListener,!0)},t.prototype._removeClickOutsideListener=function(){document.body.removeEventListener("click",this._clickOutsideEventListener,!0)},t.prototype._handleClickOutside=function(t,e){var i=t.target;i===e||e.contains(i)||this._triggerEl.contains(i)||!this.isVisible()||this.hide()},t.prototype.isVisible=function(){return this._visible},t.prototype.toggle=function(){this.isVisible()?this.hide():this.show()},t.prototype.show=function(){this._targetEl.classList.remove("opacity-0","invisible"),this._targetEl.classList.add("opacity-100","visible"),this._popperInstance.setOptions((function(t){return n(n({},t),{modifiers:o(o([],t.modifiers,!0),[{name:"eventListeners",enabled:!0}],!1)})})),this._setupClickOutsideListener(),this._setupKeydownListener(),this._popperInstance.update(),this._visible=!0,this._options.onShow(this)},t.prototype.hide=function(){this._targetEl.classList.remove("opacity-100","visible"),this._targetEl.classList.add("opacity-0","invisible"),this._popperInstance.setOptions((function(t){return n(n({},t),{modifiers:o(o([],t.modifiers,!0),[{name:"eventListeners",enabled:!1}],!1)})})),this._removeClickOutsideListener(),this._removeKeydownListener(),this._visible=!1,this._options.onHide(this)},t.prototype.updateOnShow=function(t){this._options.onShow=t},t.prototype.updateOnHide=function(t){this._options.onHide=t},t.prototype.updateOnToggle=function(t){this._options.onToggle=t},t}();function l(){document.querySelectorAll("[data-tooltip-target]").forEach((function(t){var e=t.getAttribute("data-tooltip-target"),i=document.getElementById(e);if(i){var n=t.getAttribute("data-tooltip-trigger"),o=t.getAttribute("data-tooltip-placement");new d(i,t,{placement:o||a.placement,triggerType:n||a.triggerType})}else console.error('The tooltip element with id "'.concat(e,'" does not exist. Please check the data-tooltip-target attribute.'))}))}e.initTooltips=l,"undefined"!=typeof window&&(window.Tooltip=d,window.initTooltips=l),e.default=d},947:function(t,e){Object.defineProperty(e,"__esModule",{value:!0});var i=function(){function t(t,e){void 0===e&&(e=[]),this._eventType=t,this._eventFunctions=e}return t.prototype.init=function(){var t=this;this._eventFunctions.forEach((function(e){"undefined"!=typeof window&&window.addEventListener(t._eventType,e)}))},t}();e.default=i},423:function(t,e){Object.defineProperty(e,"__esModule",{value:!0});var i=new(function(){function t(){this._instances={Accordion:{},Carousel:{},Collapse:{},Dial:{},Dismiss:{},Drawer:{},Dropdown:{},Modal:{},Popover:{},Tabs:{},Tooltip:{},InputCounter:{},CopyClipboard:{}}}return t.prototype.addInstance=function(t,e,i,n){if(void 0===n&&(n=!1),!this._instances[t])return console.warn("Flowbite: Component ".concat(t," does not exist.")),!1;!this._instances[t][i]||n?(n&&this._instances[t][i]&&this._instances[t][i].destroyAndRemoveInstance(),this._instances[t][i||this._generateRandomId()]=e):console.warn("Flowbite: Instance with ID ".concat(i," already exists."))},t.prototype.getAllInstances=function(){return this._instances},t.prototype.getInstances=function(t){return this._instances[t]?this._instances[t]:(console.warn("Flowbite: Component ".concat(t," does not exist.")),!1)},t.prototype.getInstance=function(t,e){if(this._componentAndInstanceCheck(t,e)){if(this._instances[t][e])return this._instances[t][e];console.warn("Flowbite: Instance with ID ".concat(e," does not exist."))}},t.prototype.destroyAndRemoveInstance=function(t,e){this._componentAndInstanceCheck(t,e)&&(this.destroyInstanceObject(t,e),this.removeInstance(t,e))},t.prototype.removeInstance=function(t,e){this._componentAndInstanceCheck(t,e)&&delete this._instances[t][e]},t.prototype.destroyInstanceObject=function(t,e){this._componentAndInstanceCheck(t,e)&&this._instances[t][e].destroy()},t.prototype.instanceExists=function(t,e){return!!this._instances[t]&&!!this._instances[t][e]},t.prototype._generateRandomId=function(){return Math.random().toString(36).substr(2,9)},t.prototype._componentAndInstanceCheck=function(t,e){return this._instances[t]?!!this._instances[t][e]||(console.warn("Flowbite: Instance with ID ".concat(e," does not exist.")),!1):(console.warn("Flowbite: Component ".concat(t," does not exist.")),!1)},t}());e.default=i,"undefined"!=typeof window&&(window.FlowbiteInstances=i)}},e={};function i(n){var o=e[n];if(void 0!==o)return o.exports;var r=e[n]={exports:{}};return t[n].call(r.exports,r,r.exports,i),r.exports}i.d=function(t,e){for(var n in e)i.o(e,n)&&!i.o(t,n)&&Object.defineProperty(t,n,{enumerable:!0,get:e[n]})},i.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},i.r=function(t){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(t,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(t,"__esModule",{value:!0})};var n={};return function(){var t=n;Object.defineProperty(t,"__esModule",{value:!0}),i(647);var e=i(902),o=i(33),r=i(922),s=i(556),a=i(791),c=i(340),d=i(316),l=i(16),u=i(903),p=i(247),h=i(671),f=i(656),v=i(673);i(311);var g=i(947);new g.default("load",[e.initAccordions,r.initCollapses,o.initCarousels,a.initDismisses,d.initDropdowns,l.initModals,c.initDrawers,p.initTabs,h.initTooltips,u.initPopovers,s.initDials,v.initCopyClipboards,f.initInputCounters]).init(),t.default={Accordion:e.default,Carousel:o.default,Collapse:r.default,Dial:s.default,Drawer:c.default,Dismiss:a.default,Dropdown:d.default,Modal:l.default,Popover:u.default,Tabs:p.default,Tooltip:h.default,InputCounter:f.default,CopyClipboard:v.default,Events:g.default}}(),n}()}));
+//# sourceMappingURL=flowbite.min.js.map
\ No newline at end of file
diff --git a/core/http/static/assets/font1.css b/core/http/static/assets/font1.css
index f46cc3ff..c640d54f 100644
--- a/core/http/static/assets/font1.css
+++ b/core/http/static/assets/font1.css
@@ -7,33 +7,33 @@ https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Roboto:wg
   font-style: normal;
   font-weight: 400;
   font-display: swap;
-  src: url(/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuLyfMZg.ttf) format('truetype');
+  src: url(./UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuLyfMZg.ttf) format('truetype');
 }
 @font-face {
   font-family: 'Inter';
   font-style: normal;
   font-weight: 600;
   font-display: swap;
-  src: url(/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuGKYMZg.ttf) format('truetype');
+  src: url(./UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuGKYMZg.ttf) format('truetype');
 }
 @font-face {
   font-family: 'Inter';
   font-style: normal;
   font-weight: 700;
   font-display: swap;
-  src: url(/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuFuYMZg.ttf) format('truetype');
+  src: url(./UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuFuYMZg.ttf) format('truetype');
 }
 @font-face {
   font-family: 'Roboto';
   font-style: normal;
   font-weight: 400;
   font-display: swap;
-  src: url(/static/assets/KFOmCnqEu92Fr1Me5Q.ttf) format('truetype');
+  src: url(./KFOmCnqEu92Fr1Me5Q.ttf) format('truetype');
 }
 @font-face {
   font-family: 'Roboto';
   font-style: normal;
   font-weight: 500;
   font-display: swap;
-  src: url(/static/assets/KFOlCnqEu92Fr1MmEU9vAw.ttf) format('truetype');
+  src: url(./KFOlCnqEu92Fr1MmEU9vAw.ttf) format('truetype');
 }
diff --git a/core/http/static/assets/font2.css b/core/http/static/assets/font2.css
index f2f47e74..387b61d9 100644
--- a/core/http/static/assets/font2.css
+++ b/core/http/static/assets/font2.css
@@ -7,33 +7,33 @@ https://fonts.googleapis.com/css?family=Roboto:300,400,500,700,900&display=swap
   font-style: normal;
   font-weight: 300;
   font-display: swap;
-  src: url(/static/assets//KFOlCnqEu92Fr1MmSU5fBBc9.ttf) format('truetype');
+  src: url(./KFOlCnqEu92Fr1MmSU5fBBc9.ttf) format('truetype');
 }
 @font-face {
   font-family: 'Roboto';
   font-style: normal;
   font-weight: 400;
   font-display: swap;
-  src: url(/static/assets//KFOmCnqEu92Fr1Mu4mxP.ttf) format('truetype');
+  src: url(./KFOmCnqEu92Fr1Mu4mxP.ttf) format('truetype');
 }
 @font-face {
   font-family: 'Roboto';
   font-style: normal;
   font-weight: 500;
   font-display: swap;
-  src: url(/static/assets//KFOlCnqEu92Fr1MmEU9fBBc9.ttf) format('truetype');
+  src: url(./KFOlCnqEu92Fr1MmEU9fBBc9.ttf) format('truetype');
 }
 @font-face {
   font-family: 'Roboto';
   font-style: normal;
   font-weight: 700;
   font-display: swap;
-  src: url(/static/assets//KFOlCnqEu92Fr1MmWUlfBBc9.ttf) format('truetype');
+  src: url(./KFOlCnqEu92Fr1MmWUlfBBc9.ttf) format('truetype');
 }
 @font-face {
   font-family: 'Roboto';
   font-style: normal;
   font-weight: 900;
   font-display: swap;
-  src: url(/static/assets//KFOlCnqEu92Fr1MmYUtfBBc9.ttf) format('truetype');
+  src: url(./KFOlCnqEu92Fr1MmYUtfBBc9.ttf) format('truetype');
 }
diff --git a/core/http/static/chat.js b/core/http/static/chat.js
index ef15f838..67e0bb60 100644
--- a/core/http/static/chat.js
+++ b/core/http/static/chat.js
@@ -143,7 +143,7 @@ function readInputImage() {
     // }
 
     // Source: https://stackoverflow.com/a/75751803/11386095
-    const response = await fetch("/v1/chat/completions", {
+    const response = await fetch("v1/chat/completions", {
       method: "POST",
       headers: {
         Authorization: `Bearer ${key}`,
diff --git a/core/http/static/image.js b/core/http/static/image.js
index 315bdda0..079c9dc0 100644
--- a/core/http/static/image.js
+++ b/core/http/static/image.js
@@ -48,7 +48,7 @@ async function promptDallE(key, input) {
   document.getElementById("input").disabled = true;
 
   const model = document.getElementById("image-model").value;
-  const response = await fetch("/v1/images/generations", {
+  const response = await fetch("v1/images/generations", {
     method: "POST",
     headers: {
       Authorization: `Bearer ${key}`,
diff --git a/core/http/static/talk.js b/core/http/static/talk.js
index 3072da84..ecaa0f0b 100644
--- a/core/http/static/talk.js
+++ b/core/http/static/talk.js
@@ -122,7 +122,7 @@ async function sendAudioToWhisper(audioBlob) {
     formData.append('model', getWhisperModel());
     API_KEY = localStorage.getItem("key");
 
-    const response = await fetch('/v1/audio/transcriptions', {
+    const response = await fetch('v1/audio/transcriptions', {
         method: 'POST',
         headers: {
             'Authorization': `Bearer ${API_KEY}`
@@ -139,7 +139,7 @@ async function sendTextToChatGPT(text) {
     conversationHistory.push({ role: "user", content: text });
     API_KEY = localStorage.getItem("key");
 
-    const response = await fetch('/v1/chat/completions', {
+    const response = await fetch('v1/chat/completions', {
         method: 'POST',
         headers: {
             'Authorization': `Bearer ${API_KEY}`,
@@ -163,7 +163,7 @@ async function sendTextToChatGPT(text) {
 async function getTextToSpeechAudio(text) {
     API_KEY = localStorage.getItem("key");
 
-    const response = await fetch('/v1/audio/speech', {
+    const response = await fetch('v1/audio/speech', {
         
         method: 'POST',
         headers: {
diff --git a/core/http/static/tts.js b/core/http/static/tts.js
index 7fc74729..daead3a8 100644
--- a/core/http/static/tts.js
+++ b/core/http/static/tts.js
@@ -19,7 +19,7 @@ async function tts(key, input) {
   document.getElementById("input").disabled = true;
 
   const model = document.getElementById("tts-model").value;
-  const response = await fetch("/tts", {
+  const response = await fetch("tts", {
     method: "POST",
     headers: {
       Authorization: `Bearer ${key}`,
diff --git a/core/http/utils/baseurl.go b/core/http/utils/baseurl.go
new file mode 100644
index 00000000..9fe20f44
--- /dev/null
+++ b/core/http/utils/baseurl.go
@@ -0,0 +1,24 @@
+package utils
+
+import (
+	"strings"
+
+	"github.com/gofiber/fiber/v2"
+)
+
+// BaseURL returns the base URL for the given HTTP request context.
+// It takes into account that the app may be exposed by a reverse-proxy under a different protocol, host and path.
+// The returned URL is guaranteed to end with `/`.
+// The method should be used in conjunction with the StripPathPrefix middleware.
+func BaseURL(c *fiber.Ctx) string {
+	path := c.Path()
+	origPath := c.OriginalURL()
+
+	if path != origPath && strings.HasSuffix(origPath, path) {
+		pathPrefix := origPath[:len(origPath)-len(path)+1]
+
+		return c.BaseURL() + pathPrefix
+	}
+
+	return c.BaseURL() + "/"
+}
diff --git a/core/http/utils/baseurl_test.go b/core/http/utils/baseurl_test.go
new file mode 100644
index 00000000..1750285c
--- /dev/null
+++ b/core/http/utils/baseurl_test.go
@@ -0,0 +1,48 @@
+package utils
+
+import (
+	"net/http/httptest"
+	"testing"
+
+	"github.com/gofiber/fiber/v2"
+	"github.com/stretchr/testify/require"
+)
+
+func TestBaseURL(t *testing.T) {
+	for _, tc := range []struct {
+		name      string
+		prefix    string
+		expectURL string
+	}{
+		{
+			name:      "without prefix",
+			prefix:    "/",
+			expectURL: "http://example.com/",
+		},
+		{
+			name:      "with prefix",
+			prefix:    "/myprefix/",
+			expectURL: "http://example.com/myprefix/",
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			app := fiber.New()
+			actualURL := ""
+
+			app.Get(tc.prefix+"hello/world", func(c *fiber.Ctx) error {
+				if tc.prefix != "/" {
+					c.Path("/hello/world")
+				}
+				actualURL = BaseURL(c)
+				return nil
+			})
+
+			req := httptest.NewRequest("GET", tc.prefix+"hello/world", nil)
+			resp, err := app.Test(req, -1)
+
+			require.NoError(t, err)
+			require.Equal(t, 200, resp.StatusCode, "response status code")
+			require.Equal(t, tc.expectURL, actualURL, "base URL")
+		})
+	}
+}
diff --git a/core/http/views/404.html b/core/http/views/404.html
index 359d8505..2f5a4386 100644
--- a/core/http/views/404.html
+++ b/core/http/views/404.html
@@ -12,7 +12,7 @@
         <div class="header text-center py-12">
             <h1 class="text-5xl font-bold">Welcome to your LocalAI instance!</h1>
             <div class="mt-6">
-         <!--       <a href="/" aria-label="HomePage" alt="HomePage">           
+         <!--       <a href="./" aria-label="HomePage" alt="HomePage">
                     <img class="mx-auto w-1/4 h-auto" src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo">            
                 </a>
             -->
diff --git a/core/http/views/chat.html b/core/http/views/chat.html
index 67d40bfd..b0f11281 100644
--- a/core/http/views/chat.html
+++ b/core/http/views/chat.html
@@ -28,7 +28,7 @@ SOFTWARE.
 <!doctype html>
 <html lang="en">
   {{template "views/partials/head" .}}
-  <script defer src="/static/chat.js"></script>
+  <script defer src="static/chat.js"></script>
   <style>
     body {
         overflow: hidden; 
@@ -101,9 +101,9 @@ SOFTWARE.
         {{ $model:=.Model}}
         {{ range .ModelsConfig }}
         {{ if eq . $model }}
-        <option value="/chat/{{.}}" selected  class="bg-gray-700 text-white">{{.}}</option>
+        <option value="chat/{{.}}" selected  class="bg-gray-700 text-white">{{.}}</option>
         {{ else }}
-        <option value="/chat/{{.}}" class="bg-gray-700 text-white">{{.}}</option>
+        <option value="chat/{{.}}" class="bg-gray-700 text-white">{{.}}</option>
         {{ end }}
         {{ end }}
       </select>
@@ -142,7 +142,7 @@ SOFTWARE.
       <div id="loader" class="my-2 loader" style="display: none;"></div>
       <input id="chat-model" type="hidden" value="{{.Model}}">
       <input id="input_image" type="file" style="display: none;" @change="fileName = $event.target.files[0].name">
-      <form id="prompt" action="/chat/{{.Model}}" method="get" @submit.prevent="submitPrompt">
+      <form id="prompt" action="chat/{{.Model}}" method="get" @submit.prevent="submitPrompt">
           <div class="relative w-full">
               <textarea
                   id="input"
diff --git a/core/http/views/explorer.html b/core/http/views/explorer.html
index 9843b47b..e1eaca93 100644
--- a/core/http/views/explorer.html
+++ b/core/http/views/explorer.html
@@ -370,7 +370,7 @@
                 }
             }
         </script>
-        <script src="/static/p2panimation.js"></script>
+        <script src="static/p2panimation.js"></script>
 
         {{template "views/partials/footer" .}}
     </div>
diff --git a/core/http/views/index.html b/core/http/views/index.html
index 26cc056a..569d72aa 100644
--- a/core/http/views/index.html
+++ b/core/http/views/index.html
@@ -20,7 +20,7 @@
             {{template "views/partials/inprogress" .}}
             {{ if eq (len .ModelsConfig) 0 }}
             <h2 class="text-center text-3xl font-semibold text-gray-100"> <i class="text-yellow-200 ml-2 fa-solid fa-triangle-exclamation animate-pulse"></i> Ouch! seems you don't have any models installed from the LocalAI gallery!</h2>
-            <p class="text-center mt-4 text-xl">..install something from the <a class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded" href="/browse">🖼️ Gallery</a> or check the <a href="https://localai.io/basics/getting_started/" class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded"> <i class="fa-solid fa-book"></i> Getting started documentation </a></p>
+            <p class="text-center mt-4 text-xl">..install something from the <a class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded" href="browse">🖼️ Gallery</a> or check the <a href="https://localai.io/basics/getting_started/" class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded"> <i class="fa-solid fa-book"></i> Getting started documentation </a></p>
 
             {{ if ne (len .Models) 0 }}
             <hr class="my-4">
@@ -66,7 +66,7 @@
                         {{ end }}
                     </td>
                     <td class="px-4 py-3 font-bold">
-                        <p class="font-bold text-white flex items-center"><i class="fas fa-brain pr-2"></i><a href="/browse?term={{.Name}}">{{.Name}}</a></p>
+                        <p class="font-bold text-white flex items-center"><i class="fas fa-brain pr-2"></i><a href="browse?term={{.Name}}">{{.Name}}</a></p>
                     </td>
                     <td class="px-4 py-3 font-bold">
                         {{ if .Backend }}
@@ -84,7 +84,7 @@
                     <td class="px-4 py-3">
                         <button
                             class="float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
-                            data-twe-ripple-color="light" data-twe-ripple-init="" hx-confirm="Are you sure you wish to delete the model?" hx-post="/browse/delete/model/{{.Name}}" hx-swap="outerHTML"><i class="fa-solid fa-cancel pr-2"></i>Delete</button>
+                            data-twe-ripple-color="light" data-twe-ripple-init="" hx-confirm="Are you sure you wish to delete the model?" hx-post="browse/delete/model/{{.Name}}" hx-swap="outerHTML"><i class="fa-solid fa-cancel pr-2"></i>Delete</button>
                     </td>
                 {{ end }}
                 {{ range .Models }}
diff --git a/core/http/views/login.html b/core/http/views/login.html
new file mode 100644
index 00000000..b9f6014d
--- /dev/null
+++ b/core/http/views/login.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Open Authenticated Website</title>
+    <base href="{{.BaseURL}}" />
+    <link rel="icon" type="image/x-icon" href="favicon.ico" />
+</head>
+<body>
+    <h1>Authorization is required</h1>
+    <input type="text" id="token" placeholder="Token" />
+    <button onclick="login()">Login</button>
+    <script>
+        function login() {
+            const token = document.getElementById('token').value;
+            var date = new Date();
+            date.setTime(date.getTime() + (24*60*60*1000));
+            document.cookie = `token=${token}; expires=${date.toGMTString()}`;
+
+            window.location.reload();
+        }
+    </script>
+</body>
+</html>
diff --git a/core/http/views/models.html b/core/http/views/models.html
index 189e0356..8bb049b6 100644
--- a/core/http/views/models.html
+++ b/core/http/views/models.html
@@ -16,38 +16,38 @@
 
             <div class="text-center font-semibold text-gray-100">
                 <h2>Filter by type:</h2>
-                <button  hx-post="/browse/search/models"
+                <button  hx-post="browse/search/models"
                     class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                     hx-target="#search-results" 
                     hx-vals='{"search": "tts"}'
                 hx-indicator=".htmx-indicator" >TTS</button> 
-                <button  hx-post="/browse/search/models" 
+                <button  hx-post="browse/search/models" 
                     class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                     hx-target="#search-results" 
                     hx-vals='{"search": "stablediffusion"}'
                 hx-indicator=".htmx-indicator" >Image generation</button> 
-                <button  hx-post="/browse/search/models" \
+                <button  hx-post="browse/search/models" \
                     class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                     hx-target="#search-results" 
                     hx-vals='{"search": "llm"}'
                 hx-indicator=".htmx-indicator" >Text generation</button> 
-                <button  hx-post="/browse/search/models" 
+                <button  hx-post="browse/search/models" 
                     class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                     hx-target="#search-results" 
                     hx-vals='{"search": "multimodal"}'
                 hx-indicator=".htmx-indicator" >Multimodal</button> 
-                <button  hx-post="/browse/search/models" 
+                <button  hx-post="browse/search/models" 
                     class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                     hx-target="#search-results" 
                     hx-vals='{"search": "embedding"}'
                 hx-indicator=".htmx-indicator" >Embeddings</button>
-                <button  hx-post="/browse/search/models"
+                <button  hx-post="browse/search/models"
                     class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                     hx-target="#search-results" 
                     hx-vals='{"search": "rerank"}'
                 hx-indicator=".htmx-indicator" >Rerankers</button> 
                 <button  
-                    hx-post="/browse/search/models"
+                    hx-post="browse/search/models"
                     class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                     hx-target="#search-results" 
                     hx-vals='{"search": "whisper"}'
@@ -57,7 +57,7 @@
             <div class="text-center text-xs font-semibold text-gray-100">
                 Filter by tags:
                 {{ range .AllTags }}
-                    <button  hx-post="/browse/search/models" class="text-blue-500" hx-target="#search-results" 
+                    <button  hx-post="browse/search/models" class="text-blue-500" hx-target="#search-results" 
                     hx-vals='{"search": "{{.}}"}'
                     hx-indicator=".htmx-indicator" >{{.}}</button> 
                 {{ end }}
@@ -69,7 +69,7 @@
 
             <input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search" 
                 name="search" placeholder="Begin Typing To Search models..." 
-                hx-post="/browse/search/models" 
+                hx-post="browse/search/models" 
                 hx-trigger="input changed delay:500ms, search" 
                 hx-target="#search-results" 
                 hx-indicator=".htmx-indicator">
diff --git a/core/http/views/p2p.html b/core/http/views/p2p.html
index 52548e33..81de1bb4 100644
--- a/core/http/views/p2p.html
+++ b/core/http/views/p2p.html
@@ -48,11 +48,11 @@
             <!-- Federation Box -->
             <div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">
 
-                <p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Federated Nodes: <span hx-get="/p2p/ui/workers-federation-stats" hx-trigger="every 1s"></span> </p>
+                <p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Federated Nodes: <span hx-get="p2p/ui/workers-federation-stats" hx-trigger="every 1s"></span> </p>
                 <p class="mb-4">You can start LocalAI in federated mode to share your instance, or start the federated server to balance requests between nodes of the federation.</p>
 
                 <div class="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4 mb-12">
-                    <div hx-get="/p2p/ui/workers-federation" hx-trigger="every 1s"></div>
+                    <div hx-get="p2p/ui/workers-federation" hx-trigger="every 1s"></div>
                 </div>
 
                 <hr class="border-gray-700 mb-12">
@@ -123,11 +123,11 @@
 
             <div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">
 
-                <p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Workers (llama.cpp): <span hx-get="/p2p/ui/workers-stats" hx-trigger="every 1s"></span> </p>
+                <p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Workers (llama.cpp): <span hx-get="p2p/ui/workers-stats" hx-trigger="every 1s"></span> </p>
                 <p class="mb-4">You can start llama.cpp workers to distribute weights between the workers and offload part of the computation. To start a new worker, you can use the CLI or Docker.</p>
 
                 <div class="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4 mb-12">
-                    <div hx-get="/p2p/ui/workers" hx-trigger="every 1s"></div>
+                    <div hx-get="p2p/ui/workers" hx-trigger="every 1s"></div>
                 </div>
                 <hr class="border-gray-700 mb-12">
 
@@ -177,7 +177,7 @@
 
     {{template "views/partials/footer" .}}
 </div>
-<script src="/static/p2panimation.js"></script>
+<script src="static/p2panimation.js"></script>
 <style>
     .token {
         word-break: break-all;
diff --git a/core/http/views/partials/footer.html b/core/http/views/partials/footer.html
index 6e732f96..919861e3 100644
--- a/core/http/views/partials/footer.html
+++ b/core/http/views/partials/footer.html
@@ -1,5 +1,5 @@
 <footer class="text-center py-8">
     LocalAI Version {{.Version}}<br>
-    <a href='https://localai.io' class="text-blue-400 hover:text-blue-600" target="_blank">LocalAI</a> © 2023-2024 <a href='https://mudler.pm' class="text-blue-400 hover:text-blue-600" target="_blank">Ettore Di Giacinto</a>
+    <a href='https://github.com/mudler/LocalAI' class="text-blue-400 hover:text-blue-600" target="_blank">LocalAI</a> © 2023-2024 <a href='https://mudler.pm' class="text-blue-400 hover:text-blue-600" target="_blank">Ettore Di Giacinto</a>
 </footer>
-<script src="/static/assets/tw-elements.js"></script>
+<script src="static/assets/tw-elements.js"></script>
diff --git a/core/http/views/partials/head.html b/core/http/views/partials/head.html
index 5c119fba..57be385d 100644
--- a/core/http/views/partials/head.html
+++ b/core/http/views/partials/head.html
@@ -2,33 +2,35 @@
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <title>{{.Title}}</title>
+    <base href="{{.BaseURL}}" />
+    <link rel="icon" type="image/x-icon" href="favicon.ico" />
     <link
     rel="stylesheet"
-    href="/static/assets/highlightjs.css"
+    href="static/assets/highlightjs.css"
   />
-  <script defer src="/static/assets/highlightjs.js"></script>
+  <script defer src="static/assets/highlightjs.js"></script>
     <script
     defer
-    src="/static/assets/alpine.js"
+    src="static/assets/alpine.js"
   ></script>
   <script
     defer
-    src="/static/assets/marked.js"
+    src="static/assets/marked.js"
   ></script>
   <script
     defer
-    src="/static/assets/purify.js"
+    src="static/assets/purify.js"
   ></script>
 
-  <link href="/static/general.css" rel="stylesheet" />
-    <link href="/static/assets/font1.css" rel="stylesheet">
+  <link href="static/general.css" rel="stylesheet" />
+    <link href="static/assets/font1.css" rel="stylesheet">
     <link
-    href="/static/assets/font2.css"
+    href="static/assets/font2.css"
     rel="stylesheet" />
   <link
     rel="stylesheet"
-    href="/static/assets/tw-elements.css" />
-  <script src="/static/assets/tailwindcss.js"></script>
+    href="static/assets/tw-elements.css" />
+  <script src="static/assets/tailwindcss.js"></script>
   <script>
     tailwind.config = {
       darkMode: "class",
@@ -54,10 +56,11 @@
       });
     }
   </script>
-  <link href="/static/assets/fontawesome/css/fontawesome.css" rel="stylesheet" />
-  <link href="/static/assets/fontawesome/css/brands.css" rel="stylesheet" />
-  <link href="/static/assets/fontawesome/css/solid.css" rel="stylesheet" />
-  <script src="/static/assets/htmx.js" crossorigin="anonymous"></script>
+  <link href="static/assets/fontawesome/css/fontawesome.css" rel="stylesheet" />
+  <link href="static/assets/fontawesome/css/brands.css" rel="stylesheet" />
+  <link href="static/assets/fontawesome/css/solid.css" rel="stylesheet" />
+  <script src="static/assets/flowbite.min.js"></script>
+  <script src="static/assets/htmx.js" crossorigin="anonymous"></script>
   <!-- P2P Animation START -->
   <style>
     .animation-container {
@@ -118,4 +121,11 @@
             100% { transform: rotate(0deg); } /* Return to center */
         }
   </style>
+
+  <!-- https://stackoverflow.com/questions/76051980/flowbite-component-not-working-when-loaded-via-htmx-django-project -->
+  <script>
+      htmx.onLoad(function(content) {
+          initFlowbite();
+      })
+  </script>
 </head>
\ No newline at end of file
diff --git a/core/http/views/partials/inprogress.html b/core/http/views/partials/inprogress.html
index 51c3a70c..48da66d7 100644
--- a/core/http/views/partials/inprogress.html
+++ b/core/http/views/partials/inprogress.html
@@ -17,13 +17,13 @@
 
       <div class="flex items-center justify-between bg-slate-600 p-2 mb-2 rounded-md">
          <div class="flex items center">
-             <span class="text-gray-300"><a href="/browse?term={{$parts._1}}"
+             <span class="text-gray-300"><a href="browse?term={{$parts._1}}"
                  class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                  >{{$modelName}}</a> {{if $repository}} (from the '{{$repository}}' repository) {{end}}</span>
          </div>
-         <div hx-get="/browse/job/{{$value}}" hx-swap="outerHTML" hx-target="this" hx-trigger="done">
+         <div hx-get="browse/job/{{$value}}" hx-swap="outerHTML" hx-target="this" hx-trigger="done">
              <h3 role="status" id="pblabel" >{{$op}}
-                 <div hx-get="/browse/job/progress/{{$value}}" hx-trigger="every 600ms" 
+                 <div hx-get="browse/job/progress/{{$value}}" hx-trigger="every 600ms" 
                  hx-target="this"
                  hx-swap="innerHTML"  ></div></h3>
          </div>     
diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html
index 9bf5b96a..3a057cd8 100644
--- a/core/http/views/partials/navbar.html
+++ b/core/http/views/partials/navbar.html
@@ -3,8 +3,8 @@
         <div class="flex items-center justify-between">
             <div class="flex items-center">
                 <!-- Logo Image: Replace 'logo_url_here' with your actual logo URL -->
-                <a href="/" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
-                <a href="/" class="text-white text-xl font-bold">LocalAI</a>
+                <a href="./" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
+                <a href="./" class="text-white text-xl font-bold">LocalAI</a>
             </div>
             <!-- Menu button for small screens -->
             <div class="lg:hidden">
@@ -14,33 +14,33 @@
             </div>
             <!-- Navigation links -->
             <div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0">
-                <a href="/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
+                <a href="./" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
                 <a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
-                <a href="/browse/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
-                <a href="/chat/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
-                <a href="/text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a>
-                <a href="/tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a>
-                <a href="/talk/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
+                <a href="browse/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
+                <a href="chat/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
+                <a href="text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a>
+                <a href="tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a>
+                <a href="talk/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
                 {{ if .IsP2PEnabled }}
-                <a href="/p2p/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
+                <a href="p2p/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
                 {{ end }}
-                <a href="/swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
+                <a href="swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
             </div>
         </div>
         <!-- Collapsible menu for small screens -->
         <div class="hidden lg:hidden" id="mobile-menu">
             <div class="pt-4 pb-3 border-t border-gray-700">
-                <a href="/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-home pr-2"></i>Home</a>
+                <a href="./" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-home pr-2"></i>Home</a>
                 <a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
-                <a href="/browse/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-brain pr-2"></i> Models</a>
-                <a href="/chat/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
-                <a href="/text2image/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-image pr-2"></i> Generate images</a>
-                <a href="/tts/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-music pr-2"></i> TTS </a>
-                <a href="/talk/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
+                <a href="browse/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-brain pr-2"></i> Models</a>
+                <a href="chat/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
+                <a href="text2image/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-image pr-2"></i> Generate images</a>
+                <a href="tts/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-music pr-2"></i> TTS </a>
+                <a href="talk/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
                 {{ if .IsP2PEnabled }}
-                <a href="/p2p/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
+                <a href="p2p/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
                 {{ end }}
-                <a href="/swagger/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-code pr-2"></i> API</a>
+                <a href="swagger/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-code pr-2"></i> API</a>
             </div>
         </div>
     </div>
diff --git a/core/http/views/partials/navbar_explorer.html b/core/http/views/partials/navbar_explorer.html
index ffc6c4d5..ef10c76d 100644
--- a/core/http/views/partials/navbar_explorer.html
+++ b/core/http/views/partials/navbar_explorer.html
@@ -3,8 +3,8 @@
         <div class="flex items-center justify-between">
             <div class="flex items-center">
                 <!-- Logo Image: Replace 'logo_url_here' with your actual logo URL -->
-                <a href="/" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
-                <a href="/" class="text-white text-xl font-bold">LocalAI</a>
+                <a href="./" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
+                <a href="./" class="text-white text-xl font-bold">LocalAI</a>
             </div>
             <!-- Menu button for small screens -->
             <div class="lg:hidden">
@@ -14,7 +14,7 @@
             </div>
             <!-- Navigation links -->
             <div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0">
-                <a href="/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
+                <a href="./" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
                 <a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
                 <a href="https://models.localai.io/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
             </div>
@@ -22,7 +22,7 @@
         <!-- Collapsible menu for small screens -->
         <div class="hidden lg:hidden" id="mobile-menu">
             <div class="pt-4 pb-3 border-t border-gray-700">
-                <a href="/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-home pr-2"></i>Home</a>
+                <a href="./" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-home pr-2"></i>Home</a>
                 <a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
                 <a href="https://models.localai.io/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
             </div>
diff --git a/core/http/views/talk.html b/core/http/views/talk.html
index dc25d125..cfcfd00e 100644
--- a/core/http/views/talk.html
+++ b/core/http/views/talk.html
@@ -1,7 +1,7 @@
 <!doctype html>
 <html lang="en">
   {{template "views/partials/head" .}}
-  <script defer src="/static/talk.js"></script>
+  <script defer src="static/talk.js"></script>
   <style>
     body {
         overflow: hidden; 
diff --git a/core/http/views/text2image.html b/core/http/views/text2image.html
index 9a037705..a3a95b05 100644
--- a/core/http/views/text2image.html
+++ b/core/http/views/text2image.html
@@ -1,7 +1,7 @@
 <!DOCTYPE html>
 <html lang="en">
 {{template "views/partials/head" .}}
-<script defer src="/static/image.js"></script>
+<script defer src="static/image.js"></script>
 
 <body class="bg-gray-900 text-gray-200">
 <div class="flex flex-col min-h-screen">
@@ -50,9 +50,9 @@
                 {{ $model:=.Model}}
                 {{ range .ModelsConfig }}
                 {{ if eq .Name $model }}
-                <option value="/text2image/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
+                <option value="text2image/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
                 {{ else }}
-                <option value="/text2image/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
+                <option value="text2image/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
                 {{ end }}
                 {{ end }}
               </select>
@@ -62,7 +62,7 @@
 
             <div class="mt-12">
               <input id="image-model" type="hidden" value="{{.Model}}">
-              <form id="genimage" action="/text2image/{{.Model}}" method="get">
+              <form id="genimage" action="text2image/{{.Model}}" method="get">
                 <input
                   type="text"
                   id="input"
diff --git a/core/http/views/tts.html b/core/http/views/tts.html
index a60467d5..154dad0c 100644
--- a/core/http/views/tts.html
+++ b/core/http/views/tts.html
@@ -1,7 +1,7 @@
 <!DOCTYPE html>
 <html lang="en">
 {{template "views/partials/head" .}}
-<script defer src="/static/tts.js"></script>
+<script defer src="static/tts.js"></script>
 
 <body class="bg-gray-900 text-gray-200">
 <div class="flex flex-col min-h-screen">
@@ -47,9 +47,9 @@
                 {{ $model:=.Model}}
                 {{ range .ModelsConfig }}
                 {{ if eq .Name $model }}
-                <option value="/tts/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
+                <option value="tts/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
                 {{ else }}
-                <option value="/tts/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
+                <option value="tts/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
                 {{ end }}
                 {{ end }}
               </select>
@@ -59,7 +59,7 @@
 
             <div class="mt-12">
               <input id="tts-model" type="hidden" value="{{.Model}}">
-              <form id="tts" action="/tts/{{.Model}}" method="get">
+              <form id="tts" action="tts/{{.Model}}" method="get">
                 <input
                   type="text"
                   id="input"
diff --git a/core/p2p/federated_server.go b/core/p2p/federated_server.go
index d0a8c2f8..d80af082 100644
--- a/core/p2p/federated_server.go
+++ b/core/p2p/federated_server.go
@@ -7,6 +7,7 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"io"
 	"net"
 
 	"github.com/mudler/edgevpn/pkg/node"
@@ -41,7 +42,7 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
 		log.Error().Err(err).Msg("Error listening")
 		return err
 	}
-	//	ll.Info("Binding local port on", srcaddr)
+
 	go func() {
 		<-ctx.Done()
 		l.Close()
@@ -82,6 +83,7 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
 
 				if workerID == "" {
 					log.Error().Msg("No available nodes yet")
+					fs.sendHTMLResponse(conn, 503, "Sorry, waiting for nodes to connect")
 					return
 				}
 
@@ -89,6 +91,7 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
 				nodeData, exists := GetNode(fs.service, workerID)
 				if !exists {
 					log.Error().Msgf("Node %s not found", workerID)
+					fs.sendHTMLResponse(conn, 404, "Node not found")
 					return
 				}
 
@@ -100,3 +103,42 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
 		}
 	}
 }
+
+// sendHTMLResponse sends a basic HTML response with a status code and a message.
+// This is extracted to make the HTML content maintainable.
+func (fs *FederatedServer) sendHTMLResponse(conn net.Conn, statusCode int, message string) {
+	defer conn.Close()
+
+	// Define the HTML content separately for easier maintenance.
+	htmlContent := fmt.Sprintf("<html><body><h1>%s</h1></body></html>\r\n", message)
+
+	// Create the HTTP response with dynamic status code and content.
+	response := fmt.Sprintf(
+		"HTTP/1.1 %d %s\r\n"+
+			"Content-Type: text/html\r\n"+
+			"Connection: close\r\n"+
+			"\r\n"+
+			"%s",
+		statusCode, getHTTPStatusText(statusCode), htmlContent,
+	)
+
+	// Write the response to the client connection.
+	_, writeErr := io.WriteString(conn, response)
+	if writeErr != nil {
+		log.Error().Err(writeErr).Msg("Error writing response to client")
+	}
+}
+
+// getHTTPStatusText returns a textual representation of HTTP status codes.
+func getHTTPStatusText(statusCode int) string {
+	switch statusCode {
+	case 503:
+		return "Service Unavailable"
+	case 404:
+		return "Not Found"
+	case 200:
+		return "OK"
+	default:
+		return "Unknown Status"
+	}
+}
diff --git a/core/p2p/p2p.go b/core/p2p/p2p.go
index 15e1dc37..651dde31 100644
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -10,6 +10,7 @@ import (
 	"io"
 	"net"
 	"os"
+	"strings"
 	"sync"
 	"time"
 
@@ -22,6 +23,7 @@ import (
 	"github.com/mudler/edgevpn/pkg/services"
 	"github.com/mudler/edgevpn/pkg/types"
 	eutils "github.com/mudler/edgevpn/pkg/utils"
+	"github.com/multiformats/go-multiaddr"
 	"github.com/phayes/freeport"
 	zlog "github.com/rs/zerolog/log"
 
@@ -231,10 +233,14 @@ func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID strin
 
 				data := ledger.LastBlock().Storage[servicesID]
 
-				zlog.Debug().Any("data", ledger.LastBlock().Storage).Msg("Ledger data")
+				if logLevel == logLevelDebug {
+					// We want to surface this debugging data only if p2p logging is set to debug
+					// (and not generally the whole application, as this can be really noisy)
+					zlog.Debug().Any("data", ledger.LastBlock().Storage).Msg("Ledger data")
+				}
 
 				for k, v := range data {
-					zlog.Debug().Msgf("New worker found in the ledger data '%s'", k)
+					// New worker found in the ledger data as k (worker id)
 					nd := &NodeData{}
 					if err := v.Unmarshal(nd); err != nil {
 						zlog.Error().Msg("cannot unmarshal node data")
@@ -269,7 +275,7 @@ func ensureService(ctx context.Context, n *node.Node, nd *NodeData, sserv string
 	if ndService, found := service[nd.Name]; !found {
 		if !nd.IsOnline() {
 			// if node is offline and not present, do nothing
-			zlog.Debug().Msgf("Node %s is offline", nd.ID)
+			// Node nd.ID is offline
 			return
 		}
 
@@ -381,22 +387,35 @@ func newNodeOpts(token string) ([]node.Option, error) {
 	noDHT := os.Getenv("LOCALAI_P2P_DISABLE_DHT") == "true"
 	noLimits := os.Getenv("LOCALAI_P2P_ENABLE_LIMITS") == "true"
 
-	loglevel := os.Getenv("LOCALAI_P2P_LOGLEVEL")
-	if loglevel == "" {
-		loglevel = "info"
+	var listenMaddrs []string
+	var bootstrapPeers []string
+
+	laddrs := os.Getenv("LOCALAI_P2P_LISTEN_MADDRS")
+	if laddrs != "" {
+		listenMaddrs = strings.Split(laddrs, ",")
 	}
-	libp2ploglevel := os.Getenv("LOCALAI_LIBP2P_LOGLEVEL")
+
+	bootmaddr := os.Getenv("LOCALAI_P2P_BOOTSTRAP_PEERS_MADDRS")
+	if bootmaddr != "" {
+		bootstrapPeers = strings.Split(bootmaddr, ",")
+	}
+
+	dhtAnnounceMaddrs := stringsToMultiAddr(strings.Split(os.Getenv("LOCALAI_P2P_DHT_ANNOUNCE_MADDRS"), ","))
+
+	libp2ploglevel := os.Getenv("LOCALAI_P2P_LIB_LOGLEVEL")
 	if libp2ploglevel == "" {
 		libp2ploglevel = "fatal"
 	}
 	c := config.Config{
+		ListenMaddrs:      listenMaddrs,
+		DHTAnnounceMaddrs: dhtAnnounceMaddrs,
 		Limit: config.ResourceLimit{
 			Enable:   noLimits,
 			MaxConns: 100,
 		},
 		NetworkToken:   token,
 		LowProfile:     false,
-		LogLevel:       loglevel,
+		LogLevel:       logLevel,
 		Libp2pLogLevel: libp2ploglevel,
 		Ledger: config.Ledger{
 			SyncInterval:     defaultInterval,
@@ -411,9 +430,10 @@ func newNodeOpts(token string) ([]node.Option, error) {
 			RateLimitInterval: defaultInterval,
 		},
 		Discovery: config.Discovery{
-			DHT:      !noDHT,
-			MDNS:     true,
-			Interval: 10 * time.Second,
+			DHT:            !noDHT,
+			MDNS:           true,
+			Interval:       10 * time.Second,
+			BootstrapPeers: bootstrapPeers,
 		},
 		Connection: config.Connection{
 			HolePunch:      true,
@@ -432,6 +452,18 @@ func newNodeOpts(token string) ([]node.Option, error) {
 	return nodeOpts, nil
 }
 
+func stringsToMultiAddr(peers []string) []multiaddr.Multiaddr {
+	res := []multiaddr.Multiaddr{}
+	for _, p := range peers {
+		addr, err := multiaddr.NewMultiaddr(p)
+		if err != nil {
+			continue
+		}
+		res = append(res, addr)
+	}
+	return res
+}
+
 func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
 	defer func() { closer <- struct{}{} }() // connection is closed, send signal to stop proxy
 	io.Copy(dst, src)
diff --git a/core/p2p/p2p_common.go b/core/p2p/p2p_common.go
new file mode 100644
index 00000000..2af5c77b
--- /dev/null
+++ b/core/p2p/p2p_common.go
@@ -0,0 +1,19 @@
+package p2p
+
+import (
+	"os"
+	"strings"
+)
+
+var logLevel = strings.ToLower(os.Getenv("LOCALAI_P2P_LOGLEVEL"))
+
+const (
+	logLevelDebug = "debug"
+	logLevelInfo  = "info"
+)
+
+func init() {
+	if logLevel == "" {
+		logLevel = logLevelInfo
+	}
+}
diff --git a/core/schema/localai.go b/core/schema/localai.go
index 9070c2be..08afc6df 100644
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@@ -9,6 +9,10 @@ type BackendMonitorRequest struct {
 	Model string `json:"model" yaml:"model"`
 }
 
+type TokenMetricsRequest struct {
+	Model string `json:"model" yaml:"model"`
+}
+
 type BackendMonitorResponse struct {
 	MemoryInfo    *gopsutil.MemoryInfoStat
 	MemoryPercent float32
@@ -26,7 +30,14 @@ type TTSRequest struct {
 	Input    string `json:"input" yaml:"input"` // text input
 	Voice    string `json:"voice" yaml:"voice"` // voice audio file or speaker id
 	Backend  string `json:"backend" yaml:"backend"`
-	Language string `json:"language,omitempty" yaml:"language,omitempty"` // (optional) language to use with TTS model
+	Language string `json:"language,omitempty" yaml:"language,omitempty"`               // (optional) language to use with TTS model
+	Format   string `json:"response_format,omitempty" yaml:"response_format,omitempty"` // (optional) output format
+}
+
+// @Description VAD request body
+type VADRequest struct {
+	Model string    `json:"model" yaml:"model"` // model name or full path
+	Audio []float32 `json:"audio" yaml:"audio"` // model name or full path
 }
 
 type StoresSet struct {
@@ -71,6 +82,11 @@ type P2PNodesResponse struct {
 	FederatedNodes []p2p.NodeData `json:"federated_nodes" yaml:"federated_nodes"`
 }
 
-type SystemInformationResponse struct {
-	Backends []string `json:"backends"`
+type SysInfoModel struct {
+	ID string `json:"id"`
+}
+
+type SystemInformationResponse struct {
+	Backends []string       `json:"backends"`
+	Models   []SysInfoModel `json:"loaded_models"`
 }
diff --git a/core/schema/openai.go b/core/schema/openai.go
index fe4745bf..e445bee1 100644
--- a/core/schema/openai.go
+++ b/core/schema/openai.go
@@ -23,6 +23,9 @@ type OpenAIUsage struct {
 	PromptTokens     int `json:"prompt_tokens"`
 	CompletionTokens int `json:"completion_tokens"`
 	TotalTokens      int `json:"total_tokens"`
+	// Extra timing data, disabled by default as is't not a part of OpenAI specification
+	TimingPromptProcessing float64 `json:"timing_prompt_processing,omitempty"`
+	TimingTokenGeneration  float64 `json:"timing_token_generation,omitempty"`
 }
 
 type Item struct {
@@ -58,6 +61,8 @@ type Content struct {
 	Type     string     `json:"type" yaml:"type"`
 	Text     string     `json:"text" yaml:"text"`
 	ImageURL ContentURL `json:"image_url" yaml:"image_url"`
+	AudioURL ContentURL `json:"audio_url" yaml:"audio_url"`
+	VideoURL ContentURL `json:"video_url" yaml:"video_url"`
 }
 
 type ContentURL struct {
@@ -76,6 +81,8 @@ type Message struct {
 
 	StringContent string   `json:"string_content,omitempty" yaml:"string_content,omitempty"`
 	StringImages  []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`
+	StringVideos  []string `json:"string_videos,omitempty" yaml:"string_videos,omitempty"`
+	StringAudios  []string `json:"string_audios,omitempty" yaml:"string_audios,omitempty"`
 
 	// A result of a function call
 	FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
@@ -184,8 +191,9 @@ type OpenAIRequest struct {
 	Stream bool `json:"stream"`
 
 	// Image (not supported by OpenAI)
-	Mode int `json:"mode"`
-	Step int `json:"step"`
+	Mode    int    `json:"mode"`
+	Quality string `json:"quality"`
+	Step    int    `json:"step"`
 
 	// A grammar to constrain the LLM output
 	Grammar string `json:"grammar" yaml:"grammar"`
diff --git a/core/schema/tokenize.go b/core/schema/tokenize.go
new file mode 100644
index 00000000..3770cc5a
--- /dev/null
+++ b/core/schema/tokenize.go
@@ -0,0 +1,10 @@
+package schema
+
+type TokenizeRequest struct {
+	Content string `json:"content"`
+	Model   string `json:"model"`
+}
+
+type TokenizeResponse struct {
+	Tokens []int32 `json:"tokens"`
+}
diff --git a/core/services/gallery.go b/core/services/gallery.go
index 45bebd4f..f499d381 100644
--- a/core/services/gallery.go
+++ b/core/services/gallery.go
@@ -129,7 +129,7 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader
 					if op.GalleryModelName != "" {
 						err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryModelName, g.appConfig.ModelPath, op.Req, progressCallback, g.appConfig.EnforcePredownloadScans)
 					} else if op.ConfigURL != "" {
-						err = startup.InstallModels(op.Galleries, op.ConfigURL, g.appConfig.ModelPath, g.appConfig.EnforcePredownloadScans, progressCallback, op.ConfigURL)
+						err = startup.InstallModels(op.Galleries, g.appConfig.ModelPath, g.appConfig.EnforcePredownloadScans, progressCallback, op.ConfigURL)
 						if err != nil {
 							updateError(err)
 							continue
diff --git a/core/services/list_models.go b/core/services/list_models.go
index 4b578e25..ef555d22 100644
--- a/core/services/list_models.go
+++ b/core/services/list_models.go
@@ -1,55 +1,49 @@
 package services
 
 import (
-	"regexp"
-
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/pkg/model"
 )
 
-func ListModels(bcl *config.BackendConfigLoader, ml *model.ModelLoader, filter string, excludeConfigured bool) ([]string, error) {
+type LooseFilePolicy int
 
-	models, err := ml.ListFilesInModelPath()
-	if err != nil {
-		return nil, err
-	}
+const (
+	LOOSE_ONLY LooseFilePolicy = iota
+	SKIP_IF_CONFIGURED
+	SKIP_ALWAYS
+	ALWAYS_INCLUDE
+)
 
-	var mm map[string]interface{} = map[string]interface{}{}
+func ListModels(bcl *config.BackendConfigLoader, ml *model.ModelLoader, filter config.BackendConfigFilterFn, looseFilePolicy LooseFilePolicy) ([]string, error) {
+
+	var skipMap map[string]interface{} = map[string]interface{}{}
 
 	dataModels := []string{}
 
-	var filterFn func(name string) bool
+	// Start with known configurations
 
-	// If filter is not specified, do not filter the list by model name
-	if filter == "" {
-		filterFn = func(_ string) bool { return true }
-	} else {
-		// If filter _IS_ specified, we compile it to a regex which is used to create the filterFn
-		rxp, err := regexp.Compile(filter)
-		if err != nil {
-			return nil, err
+	for _, c := range bcl.GetBackendConfigsByFilter(filter) {
+		// Is this better than looseFilePolicy <= SKIP_IF_CONFIGURED ? less performant but more readable?
+		if (looseFilePolicy == SKIP_IF_CONFIGURED) || (looseFilePolicy == LOOSE_ONLY) {
+			skipMap[c.Model] = nil
 		}
-		filterFn = func(name string) bool {
-			return rxp.MatchString(name)
-		}
-	}
-
-	// Start with the known configurations
-	for _, c := range bcl.GetAllBackendConfigs() {
-		if excludeConfigured {
-			mm[c.Model] = nil
-		}
-
-		if filterFn(c.Name) {
+		if looseFilePolicy != LOOSE_ONLY {
 			dataModels = append(dataModels, c.Name)
 		}
 	}
 
-	// Then iterate through the loose files:
-	for _, m := range models {
-		// And only adds them if they shouldn't be skipped.
-		if _, exists := mm[m]; !exists && filterFn(m) {
-			dataModels = append(dataModels, m)
+	// Then iterate through the loose files if requested.
+	if looseFilePolicy != SKIP_ALWAYS {
+
+		models, err := ml.ListFilesInModelPath()
+		if err != nil {
+			return nil, err
+		}
+		for _, m := range models {
+			// And only adds them if they shouldn't be skipped.
+			if _, exists := skipMap[m]; !exists && filter(m, nil) {
+				dataModels = append(dataModels, m)
+			}
 		}
 	}
 
diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md
index 35d3a2e4..62c19aba 100644
--- a/docs/content/docs/advanced/advanced-usage.md
+++ b/docs/content/docs/advanced/advanced-usage.md
@@ -148,6 +148,9 @@ function:
     no_action_function_name: "" # Function name to call when no action is determined.
     no_action_description_name: "" # Description name for no-action functions.
     response_regex: [] # Regular expressions to match response from
+    argument_regex: [] # Named regular to extract function arguments from the response.
+    argument_regex_key_name: "key" # Name of the named regex capture to capture the key of the function arguments
+	  argument_regex_value_name: "value" # Name of the named regex capture to capture the value of the function arguments
     json_regex_match: [] # Regular expressions to match JSON data when in tool mode
     replace_function_results: [] # Placeholder to replace function call results with arbitrary strings or patterns.
     replace_llm_results: [] # Replace language model results with arbitrary strings or patterns.
@@ -520,6 +523,7 @@ In the help text below, BASEPATH is the location that local-ai is being executed
 | --upload-limit | 15 | Default upload-limit in MB | $LOCALAI_UPLOAD_LIMIT |
 | --api-keys | API-KEYS,... | List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys | $LOCALAI_API_KEY |
 | --disable-welcome |  | Disable welcome pages | $LOCALAI_DISABLE_WELCOME |
+| --machine-tag |  | If not empty - put that string to Machine-Tag header in each response. Useful to track response from different machines using multiple P2P federated nodes | $LOCALAI_MACHINE_TAG |
 
 #### Backend Flags
 | Parameter | Default | Description | Environment Variable |
@@ -553,6 +557,34 @@ LOCALAI_MODELS_PATH=/mnt/storage/localai/models
 LOCALAI_F16=true
 ```
 
+### Request headers
+
+You can use 'Extra-Usage' request header key presence ('Extra-Usage: true') to receive inference timings in milliseconds extending default OpenAI response model in the usage field:   
+```
+...
+{
+  "id": "...",
+  "created": ...,
+  "model": "...",
+  "choices": [
+    {
+      ...
+    },
+    ...
+  ],
+  "object": "...",
+  "usage": {
+    "prompt_tokens": ...,
+    "completion_tokens": ...,
+    "total_tokens": ...,
+    // Extra-Usage header key will include these two float fields:
+    "timing_prompt_processing: ...,
+    "timing_token_generation": ...,
+  },
+}
+...
+```
+
 ### Extra backends
 
 LocalAI can be extended with extra backends. The backends are implemented as `gRPC` services and can be written in any language. The container images that are built and published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) contain a set of images split in core and extra. By default Images bring all the dependencies and backends supported by LocalAI (we call those `extra` images). The `-core` images instead bring only the strictly necessary dependencies to run LocalAI without only a core set of backends.
@@ -616,4 +648,4 @@ Note that, for llama.cpp you need to set accordingly `LLAMACPP_PARALLEL` to the
 
 LocalAI will automatically discover the CPU flagset available in your host and will use the most optimized version of the backends.
 
-If you want to disable this behavior, you can set `DISABLE_AUTODETECT` to `true` in the environment variables.
\ No newline at end of file
+If you want to disable this behavior, you can set `DISABLE_AUTODETECT` to `true` in the environment variables.
diff --git a/docs/content/docs/advanced/run-other-models.md b/docs/content/docs/advanced/run-other-models.md
deleted file mode 100644
index cfea4631..00000000
--- a/docs/content/docs/advanced/run-other-models.md
+++ /dev/null
@@ -1,126 +0,0 @@
-+++
-disableToc = false
-title = "Run other Models"
-weight = 23
-icon = "rocket_launch"
-
-+++
-
-## Running other models
-
-> _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/models" %}})_.
-
-To load models into LocalAI, you can either [use models manually]({{%relref "docs/getting-started/models" %}}) or configure LocalAI to pull the models from external sources, like Huggingface and configure the model.
-
-To do that, you can point LocalAI to an URL to a YAML configuration file - however - LocalAI does also have some popular model configuration embedded in the binary as well. Below you can find a list of the models configuration that LocalAI has pre-built, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}) on how to configure models from URLs.
-
-There are different categories of models: [LLMs]({{%relref "docs/features/text-generation" %}}), [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) , [Embeddings]({{%relref "docs/features/embeddings" %}}), [Audio to Text]({{%relref "docs/features/audio-to-text" %}}), and [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) depending on the backend being used and the model architecture.
-
-{{% alert icon="💡" %}}
-
-To customize the models, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI/tree/master/examples/configurations) and the configurations for the models below is available [here](https://github.com/mudler/LocalAI/tree/master/embedded/models).
-{{% /alert %}}
-
-{{< tabs tabTotal="3" >}}
-{{% tab tabName="CPU-only" %}}
-
-> 💡Don't need GPU acceleration? use the CPU images which are lighter and do not have Nvidia dependencies
-
-| Model | Category | Docker command |
-| --- | --- | --- |
-| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core phi-2``` |
-| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bakllava``` |
-| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.5``` |
-| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-mistral``` |
-| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-vicuna``` |
-| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mistral-openorca``` |
-| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bert-cpp``` |
-| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg all-minilm-l6-v2``` |
-| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core whisper-base``` |
-| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core rhasspy-voice-en-us-amy``` |
-| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg coqui``` |
-| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg bark``` |
-| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X)  | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg vall-e-x``` |
-| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mixtral-instruct``` |
-| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core tinyllama-chat``` |
-| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core dolphin-2.5-mixtral-8x7b``` |
-| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
-| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | GPU-only |
-| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
-| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) (with transformers) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
-| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) (with llama.cpp) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core codellama-7b-gguf``` |
-| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core hermes-2-pro-mistral``` |
-{{% /tab %}}
-
-{{% tab tabName="GPU (CUDA 11)" %}}
-
-
-> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}).
-
-| Model | Category | Docker command |
-| --- | --- | --- |
-| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core phi-2``` |
-| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bakllava``` |
-| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.5``` |
-| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-mistral``` |
-| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-vicuna``` |
-| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mistral-openorca``` |
-| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bert-cpp``` |
-| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 all-minilm-l6-v2``` |
-| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core whisper-base``` |
-| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core rhasspy-voice-en-us-amy``` |
-| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 coqui``` |
-| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 bark``` |
-| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 vall-e-x``` |
-| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mixtral-instruct``` |
-| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core tinyllama-chat``` |
-| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core dolphin-2.5-mixtral-8x7b``` |
-| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 mamba-chat``` |
-| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) |  ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda11 animagine-xl``` |
-| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 transformers-tinyllama``` |
-| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}})  | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 codellama-7b``` |
-| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}})  | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core codellama-7b-gguf``` |
-| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core hermes-2-pro-mistral``` |
-{{% /tab %}}
-
-
-{{% tab tabName="GPU (CUDA 12)" %}}
-
-> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}).
-
-| Model | Category | Docker command |
-| --- | --- | --- |
-| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core phi-2``` |
-| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bakllava``` |
-| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.5``` |
-| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-mistral``` |
-| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-vicuna``` |
-| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mistral-openorca``` |
-| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bert-cpp``` |
-| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 all-minilm-l6-v2``` |
-| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core whisper-base``` |
-| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core rhasspy-voice-en-us-amy``` |
-| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 coqui``` |
-| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 bark``` |
-| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 vall-e-x``` |
-| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mixtral-instruct``` |
-| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core tinyllama-chat``` |
-| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core dolphin-2.5-mixtral-8x7b``` |
-| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 mamba-chat``` |
-| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda12 animagine-xl``` |
-| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 transformers-tinyllama``` |
-| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 codellama-7b``` |
-| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}})  | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core codellama-7b-gguf``` |
-| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core hermes-2-pro-mistral``` |
-{{% /tab %}}
-
-{{< /tabs >}}
-
-{{% alert icon="💡" %}}
-**Tip** You can actually specify multiple models to start an instance with the models loaded, for example to have both llava and phi-2 configured:
-
-```bash
-docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava phi-2
-```
-
-{{% /alert %}}
diff --git a/docs/content/docs/features/distributed_inferencing.md b/docs/content/docs/features/distributed_inferencing.md
index 8c7790c6..71d29f39 100644
--- a/docs/content/docs/features/distributed_inferencing.md
+++ b/docs/content/docs/features/distributed_inferencing.md
@@ -129,11 +129,18 @@ There are options that can be tweaked or parameters that can be set using enviro
 
 | Environment Variable | Description |
 |----------------------|-------------|
+| **LOCALAI_P2P** | Set to "true" to enable p2p |
+| **LOCALAI_FEDERATED** | Set to "true" to enable federated mode |
+| **FEDERATED_SERVER** | Set to "true" to enable federated server |
 | **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) |
 | **LOCALAI_P2P_ENABLE_LIMITS** | Set to "true" to enable connection limits and resources management (useful when running with poor connectivity or want to limit resources consumption) |
+| **LOCALAI_P2P_LISTEN_MADDRS** | Set to comma separated list of multiaddresses to override default libp2p 0.0.0.0 multiaddresses |
+| **LOCALAI_P2P_DHT_ANNOUNCE_MADDRS** | Set to comma separated list of multiaddresses to override announcing of listen multiaddresses (useful when external address:port is remapped) |
+| **LOCALAI_P2P_BOOTSTRAP_PEERS_MADDRS** | Set to comma separated list of multiaddresses to specify custom DHT bootstrap nodes |
 | **LOCALAI_P2P_TOKEN** | Set the token for the p2p network |
 | **LOCALAI_P2P_LOGLEVEL** | Set the loglevel for the LocalAI p2p stack (default: info) |
-| **LOCALAI_LIBP2P_LOGLEVEL** | Set the loglevel for the underlying libp2p stack (default: fatal) |
+| **LOCALAI_P2P_LIB_LOGLEVEL** | Set the loglevel for the underlying libp2p stack (default: fatal) |
+
 
 ## Architecture
 
diff --git a/docs/content/docs/features/embeddings.md b/docs/content/docs/features/embeddings.md
index ae8d2c78..92c41eb6 100644
--- a/docs/content/docs/features/embeddings.md
+++ b/docs/content/docs/features/embeddings.md
@@ -27,39 +27,6 @@ embeddings: true
 # .. other parameters
 ```
 
-## Bert embeddings
-
-To use `bert.cpp` models you can use the `bert` embedding backend.
-
-An example model config file:
-
-```yaml
-name: text-embedding-ada-002
-parameters:
-  model: bert
-backend: bert-embeddings
-embeddings: true
-# .. other parameters
-```
-
-The `bert` backend uses [bert.cpp](https://github.com/skeskinen/bert.cpp) and uses `ggml` models.
-
-For instance you can download the `ggml` quantized version of `all-MiniLM-L6-v2` from https://huggingface.co/skeskinen/ggml:
-
-```bash
-wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
-```
-
-To test locally (LocalAI server running on `localhost`),
-you can use `curl` (and `jq` at the end to prettify):
-
-```bash
-curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
-  "input": "Your text string goes here",
-  "model": "text-embedding-ada-002"
-}' | jq "."
-```
-
 ## Huggingface embeddings
 
 To use `sentence-transformers` and models in `huggingface` you can use the `sentencetransformers` embedding backend.
@@ -87,17 +54,26 @@ The `sentencetransformers` backend uses Python [sentence-transformers](https://g
 
 ## Llama.cpp embeddings
 
-Embeddings with `llama.cpp` are supported with the `llama` backend.
+Embeddings with `llama.cpp` are supported with the `llama-cpp` backend, it needs to be enabled with `embeddings` set to `true`.
 
 ```yaml
 name: my-awesome-model
-backend: llama
+backend: llama-cpp
 embeddings: true
 parameters:
   model: ggml-file.bin
 # ...
 ```
 
+Then you can use the API to generate embeddings:
+
+```bash
+curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
+  "input": "My text",
+  "model": "my-awesome-model"
+}' | jq "."
+```
+
 ## 💡 Examples
 
 - Example that uses LLamaIndex and LocalAI as embedding: [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).
diff --git a/docs/content/docs/features/image-generation.md b/docs/content/docs/features/image-generation.md
index 5bd12575..864ea040 100644
--- a/docs/content/docs/features/image-generation.md
+++ b/docs/content/docs/features/image-generation.md
@@ -194,8 +194,9 @@ diffusers:
   pipeline_type: StableDiffusionPipeline
   enable_parameters: "negative_prompt,num_inference_steps,clip_skip"
   scheduler_type: "k_dpmpp_sde"
-  cfg_scale: 8
   clip_skip: 11
+
+cfg_scale: 8
 ```
 
 #### Configuration parameters
@@ -302,7 +303,8 @@ cuda: true
 diffusers:
   pipeline_type: StableDiffusionDepth2ImgPipeline
   enable_parameters: "negative_prompt,num_inference_steps,image"
-  cfg_scale: 6
+
+cfg_scale: 6
 ```
 
 ```bash
diff --git a/docs/content/docs/features/model-gallery.md b/docs/content/docs/features/model-gallery.md
index 5d2a9a8b..6943866a 100644
--- a/docs/content/docs/features/model-gallery.md
+++ b/docs/content/docs/features/model-gallery.md
@@ -134,12 +134,12 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
    }' 
 ```
 
-An example that installs openllama can be:
+An example that installs hermes-2-pro-mistral can be:
    
 ```bash
 LOCALAI=http://localhost:8080
 curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
-     "config_url": "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml"
+     "config_url": "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml"
    }' 
 ```
 
@@ -300,7 +300,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
 
 ```bash
 curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
-     "url": "github:mudler/LocalAI/gallery/bert-embeddings.yaml",
+     "id": "bert-embeddings",
      "name": "text-embedding-ada-002"
    }'  
 ```
diff --git a/docs/content/docs/features/text-to-audio.md b/docs/content/docs/features/text-to-audio.md
index 0e82f7f0..f0bd2c0c 100644
--- a/docs/content/docs/features/text-to-audio.md
+++ b/docs/content/docs/features/text-to-audio.md
@@ -201,3 +201,21 @@ curl -L http://localhost:8080/tts \
 "input": "Bonjour, je suis Ana Florence. Comment puis-je vous aider?"
 }' | aplay
 ```
+
+## Response format
+
+To provide some compatibility with OpenAI API regarding `response_format`, ffmpeg must be installed (or a docker image including ffmpeg used) to leverage converting the generated wav file before the api provide its response.
+
+Warning regarding a change in behaviour. Before this addition, the parameter was ignored and a wav file was always returned, with potential codec errors later in the integration (like trying to decode a mp3 file from a wav, which is the default format used by OpenAI)
+
+Supported format thanks to ffmpeg are `wav`, `mp3`, `aac`, `flac`, `opus`, defaulting to `wav` if an unknown or no format is provided.
+
+```bash
+curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
+  "input": "Hello world",
+  "model": "tts",
+  "response_format": "mp3"
+}'
+```
+
+If a `response_format` is added in the query (other than `wav`) and ffmpeg is not available, the call will fail.
diff --git a/docs/content/docs/getting-started/build.md b/docs/content/docs/getting-started/build.md
index f21a5b48..9fff1989 100644
--- a/docs/content/docs/getting-started/build.md
+++ b/docs/content/docs/getting-started/build.md
@@ -88,7 +88,7 @@ Here is the list of the variables available that can be used to customize the bu
 | Variable | Default | Description |
 | ---------------------| ------- | ----------- |
 | `BUILD_TYPE`         |   None      | Build type. Available: `cublas`, `openblas`, `clblas`, `metal`,`hipblas`, `sycl_f16`, `sycl_f32` |
-| `GO_TAGS`            |   `tts stablediffusion`      | Go tags. Available: `stablediffusion`, `tts`, `tinydream` |
+| `GO_TAGS`            |   `tts stablediffusion`      | Go tags. Available: `stablediffusion`, `tts` |
 | `CLBLAST_DIR`        |         | Specify a CLBlast directory |
 | `CUDA_LIBPATH`       |         | Specify a CUDA library path |
 | `BUILD_API_ONLY` | false | Set to true to build only the API (no backends will be built) |
@@ -202,7 +202,7 @@ make build
 
 **Requirements**: OpenCV, Gomp
 
-Image generation requires `GO_TAGS=stablediffusion` or `GO_TAGS=tinydream` to be set during build:
+Image generation requires `GO_TAGS=stablediffusion` to be set during build:
 
 ```
 make GO_TAGS=stablediffusion build
diff --git a/docs/content/docs/getting-started/container-images.md b/docs/content/docs/getting-started/container-images.md
index 86fe31d1..d1930805 100644
--- a/docs/content/docs/getting-started/container-images.md
+++ b/docs/content/docs/getting-started/container-images.md
@@ -16,7 +16,7 @@ For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA
 
 **Available Images Types**:
 
-- Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn`, `tinydream` or `rwkv` backends - if you are not sure which one to use, do **not** use these images.
+- Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn` or `rwkv` backends - if you are not sure which one to use, do **not** use these images.
 - Images containing the `aio` tag are all-in-one images with all the features enabled, and come with an opinionated set of configuration.
 - FFMpeg is **not** included in the default images due to [its licensing](https://www.ffmpeg.org/legal.html). If you need FFMpeg, use the images ending with `-ffmpeg`. Note that `ffmpeg` is needed in case of using `audio-to-text` LocalAI's features.
 - If using old and outdated CPUs and no GPUs you might need to set `REBUILD` to `true` as environment variable along with options to disable the flags which your CPU does not support, however note that inference will perform poorly and slow. See also [flagset compatibility]({{%relref "docs/getting-started/build#cpu-flagset-compatibility" %}}).
@@ -143,7 +143,7 @@ The AIO Images are inheriting the same environment variables as the base images
 | Variable | Default | Description |
 | ---------------------| ------- | ----------- |
 | `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` |
-| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/advanced/run-other-models" %}})) |
+| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/models" %}})) |
 
 
 ## Standard container images
@@ -154,7 +154,7 @@ Images are available with and without python dependencies. Note that images with
 
 Images with `core` in the tag are smaller and do not contain any python dependencies. 
 
-{{< tabs tabTotal="6" >}}
+{{< tabs tabTotal="8" >}}
 {{% tab tabName="Vanilla / CPU Images" %}}
 
 | Description | Quay | Docker Hub                                   |
@@ -197,7 +197,7 @@ Images with `core` in the tag are smaller and do not contain any python dependen
 | --- | --- |-------------------------------------------------------------|
 | Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f16` | `localai/localai:master-sycl-f16`                      |
 | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f16` | `localai/localai:latest-gpu-intel-f16`                      |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16` | `localai/localai:{{< version >}}-sycl-f16`             |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-core` | `localai/localai:{{< version >}}-sycl-f16-core`             |
 | Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg` | `localai/localai:{{< version >}}-sycl-f16-ffmpeg`      |
 | Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg-core` | `localai/localai:{{< version >}}-sycl-f16-ffmpeg-core` |
 
@@ -209,7 +209,7 @@ Images with `core` in the tag are smaller and do not contain any python dependen
 | --- | --- |-------------------------------------------------------------|
 | Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f32` | `localai/localai:master-sycl-f32`                      |
 | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f32` | `localai/localai:latest-gpu-intel-f32`                      |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32` | `localai/localai:{{< version >}}-sycl-f32`             |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-core` | `localai/localai:{{< version >}}-sycl-f32-core`             |
 | Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-ffmpeg` | `localai/localai:{{< version >}}-sycl-f32-ffmpeg`      |
 | Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-ffmpeg-core` | `localai/localai:{{< version >}}-sycl-f32-ffmpeg-core` |
 
@@ -227,6 +227,27 @@ Images with `core` in the tag are smaller and do not contain any python dependen
 
 {{% /tab %}}
 
+
+{{% tab tabName="Vulkan Images" %}}
+| Description | Quay | Docker Hub                                                  |
+| --- | --- |-------------------------------------------------------------|
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai: master-vulkan-ffmpeg-core ` | `localai/localai: master-vulkan-ffmpeg-core `                      |
+| Latest tag | `quay.io/go-skynet/local-ai: latest-vulkan-ffmpeg-core ` | `localai/localai: latest-vulkan-ffmpeg-core`                 |
+| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-vulkan-fmpeg-core` | `localai/localai:{{< version >}}-vulkan-fmpeg-core`             |
+{{% /tab %}}
+
+{{% tab tabName="Nvidia Linux for tegra" %}}
+
+These images are compatible with Nvidia ARM64 devices, such as the Jetson Nano, Jetson Xavier NX, and Jetson AGX Xavier. For more information, see the [Nvidia L4T guide]({{%relref "docs/reference/nvidia-l4t" %}}).
+
+| Description | Quay | Docker Hub                                                  |
+| --- | --- |-------------------------------------------------------------|
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core` | `localai/localai:master-nvidia-l4t-arm64-core`                      |
+| Latest tag | `quay.io/go-skynet/local-ai:latest-nvidia-l4t-arm64-core` | `localai/localai:latest-nvidia-l4t-arm64-core`                 |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-nvidia-l4t-arm64-core` | `localai/localai:{{< version >}}-nvidia-l4t-arm64-core`             |
+
+{{% /tab %}}
+
 {{< /tabs >}}
 
 ## See Also
diff --git a/docs/content/docs/getting-started/kubernetes.md b/docs/content/docs/getting-started/kubernetes.md
index fb08b046..aea28f3e 100644
--- a/docs/content/docs/getting-started/kubernetes.md
+++ b/docs/content/docs/getting-started/kubernetes.md
@@ -10,13 +10,13 @@ ico = "rocket_launch"
 For installing LocalAI in Kubernetes, the deployment file from the `examples` can be used and customized as prefered:
 
 ```
-kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI/master/examples/kubernetes/deployment.yaml
+kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI-examples/refs/heads/main/kubernetes/deployment.yaml
 ```
 
 For Nvidia GPUs:
 
 ```
-kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI/master/examples/kubernetes/deployment-nvidia.yaml
+kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI-examples/refs/heads/main/kubernetes/deployment-nvidia.yaml
 ```
 
 Alternatively, the [helm chart](https://github.com/go-skynet/helm-charts) can be used as well:
diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md
index 9ccc0faa..4e14c505 100644
--- a/docs/content/docs/getting-started/quickstart.md
+++ b/docs/content/docs/getting-started/quickstart.md
@@ -30,6 +30,19 @@ For a full list of options, refer to the [Installer Options]({{% relref "docs/ad
 
 Binaries can also be [manually downloaded]({{% relref "docs/reference/binaries" %}}).
 
+## Using Homebrew on MacOS
+
+{{% alert icon="⚠️" %}}
+The Homebrew formula currently doesn't have the same options than the bash script
+{{% /alert %}}
+
+You can install Homebrew's [LocalAI](https://formulae.brew.sh/formula/localai) with the following command:
+
+```
+brew install localai
+```
+
+
 ## Using Container Images or Kubernetes
 
 LocalAI is available as a container image compatible with various container engines such as Docker, Podman, and Kubernetes. Container images are published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest) and [Docker Hub](https://hub.docker.com/r/localai/localai).
diff --git a/docs/content/docs/integrations.md b/docs/content/docs/integrations.md
index 50f683c3..aaf3a251 100644
--- a/docs/content/docs/integrations.md
+++ b/docs/content/docs/integrations.md
@@ -28,5 +28,8 @@ The list below is a list of software that integrates with LocalAI.
 - https://github.com/cedriking/spark
 - [Big AGI](https://github.com/enricoros/big-agi) is a powerful web interface entirely running in the browser, supporting LocalAI
 - [Midori AI Subsystem Manager](https://io.midori-ai.xyz/subsystem/manager/) is a powerful docker subsystem for running all types of AI programs
+- [LLPhant](https://github.com/theodo-group/LLPhant) is a PHP library for interacting with LLMs and Vector Databases
+- [GPTLocalhost (Word Add-in)](https://gptlocalhost.com/demo#LocalAI) - run LocalAI in Microsoft Word locally
+- use LocalAI from Nextcloud with the [integration plugin](https://apps.nextcloud.com/apps/integration_openai) and [AI assistant](https://apps.nextcloud.com/apps/assistant)
 
 Feel free to open up a Pull request (by clicking at the "Edit page" below) to get a page for your project made or if you see a error on one of the pages!
diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md
index 5bcb6178..9e72f119 100644
--- a/docs/content/docs/overview.md
+++ b/docs/content/docs/overview.md
@@ -40,6 +40,10 @@ icon = "info"
 </a>
 </p>
 
+<p align="center">
+<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+</p>
+
 <p align="center">
 <a href="https://twitter.com/LocalAI_API" target="blank">
 <img src="https://img.shields.io/twitter/follow/LocalAI_API?label=Follow: LocalAI_API&style=social" alt="Follow LocalAI_API"/>
@@ -118,7 +122,24 @@ To help the project you can:
 
 ## 🌟 Star history
 
-[![LocalAI Star history Chart](https://api.star-history.com/svg?repos=go-skynet/LocalAI&type=Date)](https://star-history.com/#go-skynet/LocalAI&Date)
+[![LocalAI Star history Chart](https://api.star-history.com/svg?repos=mudler/LocalAI&type=Date)](https://star-history.com/#mudler/LocalAI&Date)
+
+## ❤️ Sponsors
+
+> Do you find LocalAI useful?
+
+Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website.
+
+A huge thank you to our generous sponsors who support this project covering CI expenses, and our [Sponsor list](https://github.com/sponsors/mudler):
+
+<p align="center">
+  <a href="https://www.spectrocloud.com/" target="blank">
+    <img width=200 src="https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512">
+  </a>
+  <a href="https://www.premai.io/" target="blank">
+    <img  width=200 src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
+  </a>
+</p>
 
 ## 📖 License
 
diff --git a/docs/content/docs/reference/compatibility-table.md b/docs/content/docs/reference/compatibility-table.md
index f76ad85d..d2f4d8ac 100644
--- a/docs/content/docs/reference/compatibility-table.md
+++ b/docs/content/docs/reference/compatibility-table.md
@@ -6,7 +6,7 @@ weight = 24
 url = "/model-compatibility/"
 +++
 
-Besides llama based models, LocalAI is compatible also with other architectures. The table below lists all the compatible models families and the associated binding repository.
+Besides llama based models, LocalAI is compatible also with other architectures. The table below lists all the backends, compatible models families and the associated repository.
 
 {{% alert note %}}
 
@@ -16,19 +16,8 @@ LocalAI will attempt to automatically load models which are not explicitly confi
 
 | Backend and Bindings                                                             | Compatible models     | Completion/Chat endpoint | Capability | Embeddings support                | Token stream support | Acceleration |
 |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------|
-| [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}})        | Vicuna, Alpaca, LLaMa, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes                      | GPT and Functions                        | yes** | yes                  | CUDA, openCL, cuBLAS, Metal |
-| [gpt4all-llama](https://github.com/nomic-ai/gpt4all)      | Vicuna, Alpaca, LLaMa | yes                      | GPT                        | no                                | yes                  | N/A  |
-| [gpt4all-mpt](https://github.com/nomic-ai/gpt4all)          | MPT                   | yes                      | GPT                        | no                                | yes                  | N/A  |
-| [gpt4all-j](https://github.com/nomic-ai/gpt4all)           | GPT4ALL-J             | yes                      | GPT                        | no                                | yes                  | N/A  |
-| [falcon-ggml](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))        | Falcon (*)             | yes                      | GPT                        | no                                | no                   | N/A |
-| [dolly](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))            | Dolly                 | yes                      | GPT                        | no                                | no                   | N/A |
-| [gptj](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))        | GPTJ             | yes                      | GPT                        | no                                | no                   | N/A |
-| [mpt](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))         | MPT     | yes                      | GPT                        | no                                | no                   | N/A |
-| [replit](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))        | Replit             | yes                      | GPT                        | no                                | no                   | N/A |
-| [gptneox](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))        | GPT NeoX, RedPajama, StableLM             | yes                      | GPT                        | no                                | no                   | N/A |
-| [bloomz](https://github.com/NouamaneTazi/bloomz.cpp) ([binding](https://github.com/go-skynet/bloomz.cpp))       | Bloom                 | yes                      | GPT                        | no                                | no                   | N/A |
-| [rwkv](https://github.com/saharNooby/rwkv.cpp) ([binding](https://github.com/donomii/go-rwkv.cpp))       | rwkv                 | yes                      | GPT                        | no                                | yes                   | N/A  |
-| [bert](https://github.com/skeskinen/bert.cpp) ([binding](https://github.com/go-skynet/go-bert.cpp)) | bert                  | no                       | Embeddings only                  | yes                               | no                   | N/A |
+| [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}})        | LLama, Mamba, RWKV, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes                      | GPT and Functions                        | yes | yes                  | CUDA, openCL, cuBLAS, Metal |
+| [llama.cpp's ggml model (backward compatibility with old format, before GGUF)](https://github.com/ggerganov/llama.cpp) ([binding](https://github.com/go-skynet/go-llama.cpp))  | LLama, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes                      | GPT and Functions                        | yes | yes                  | CUDA, openCL, cuBLAS, Metal |
 | [whisper](https://github.com/ggerganov/whisper.cpp)         | whisper               | no                       | Audio                 | no                                | no                   | N/A |
 | [stablediffusion](https://github.com/EdVince/Stable-Diffusion-NCNN) ([binding](https://github.com/mudler/go-stable-diffusion))        | stablediffusion               | no                       | Image                 | no                                | no                   | N/A |
 | [langchain-huggingface](https://github.com/tmc/langchaingo)                                                                    | Any text generators available on HuggingFace through API | yes                      | GPT                        | no                                | no                   | N/A |
@@ -40,15 +29,19 @@ LocalAI will attempt to automatically load models which are not explicitly confi
 | `diffusers`  | SD,...                   | no                       | Image generation    | no                               | no                   | N/A |
 | `vall-e-x` | Vall-E    | no                       | Audio generation and Voice cloning    | no                               | no                   | CPU/CUDA |
 | `vllm` | Various GPTs and quantization formats | yes                      | GPT             | no | no                  | CPU/CUDA |
+| `mamba` | Mamba models architecture | yes                      | GPT             | no | no                  | CPU/CUDA |
 | `exllama2`  | GPTQ                   | yes                       | GPT only                  | no                               | no                   | N/A |
 | `transformers-musicgen`  |                    | no                       | Audio generation                | no                               | no                   | N/A |
-| [tinydream](https://github.com/symisc/tiny-dream#tiny-dreaman-embedded-header-only-stable-diffusion-inference-c-librarypixlabiotiny-dream)         | stablediffusion               | no                       | Image                 | no                                | no                   | N/A |
+| stablediffusion               | no                       | Image                 | no                                | no                   | N/A |
 | `coqui` | Coqui    | no                       | Audio generation and Voice cloning    | no                               | no                   | CPU/CUDA |
-| `transformers` | Various GPTs and quantization formats | yes                      | GPT, embeddings            | yes | yes****                  | CPU/CUDA/XPU |
+| `openvoice` | Open voice    | no                       | Audio generation and Voice cloning    | no                               | no                   | CPU/CUDA |
+| `parler-tts` | Open voice    | no                       | Audio generation and Voice cloning    | no                               | no                   | CPU/CUDA |
+| [rerankers](https://github.com/AnswerDotAI/rerankers) | Reranking API    | no                       | Reranking   | no                               | no                   | CPU/CUDA |
+| `transformers` | Various GPTs and quantization formats | yes                      | GPT, embeddings            | yes | yes*                  | CPU/CUDA/XPU |
+| [bark-cpp](https://github.com/PABannier/bark.cpp)        | bark               | no                       | Audio-Only                 | no                                | no                   | yes |
+| [stablediffusion-cpp](https://github.com/leejet/stable-diffusion.cpp)         | stablediffusion-1, stablediffusion-2, stablediffusion-3, flux, PhotoMaker               | no                       | Image                 | no                                | no                   | N/A |
+| [silero-vad](https://github.com/snakers4/silero-vad) with [Golang bindings](https://github.com/streamer45/silero-vad-go) | Silero VAD    | no                       | Voice Activity Detection    | no                               | no                   | CPU |
 
 Note: any backend name listed above can be used in the `backend` field of the model configuration file (See [the advanced section]({{%relref "docs/advanced" %}})).
 
-- \* 7b ONLY
-- ** doesn't seem to be accurate
-- *** 7b and 40b with the `ggccv` format, for instance: https://huggingface.co/TheBloke/WizardLM-Uncensored-Falcon-40B-GGML
-- **** Only for CUDA and OpenVINO CPU/XPU acceleration.
\ No newline at end of file
+- \* Only for CUDA and OpenVINO CPU/XPU acceleration.
diff --git a/docs/content/docs/reference/nvidia-l4t.md b/docs/content/docs/reference/nvidia-l4t.md
new file mode 100644
index 00000000..ce0fd5e9
--- /dev/null
+++ b/docs/content/docs/reference/nvidia-l4t.md
@@ -0,0 +1,41 @@
+
++++
+disableToc = false
+title = "Running on Nvidia ARM64"
+weight = 27
++++
+
+LocalAI can be run on Nvidia ARM64 devices, such as the Jetson Nano, Jetson Xavier NX, and Jetson AGX Xavier. The following instructions will guide you through building the LocalAI container for Nvidia ARM64 devices.
+
+## Prerequisites
+
+- Docker engine installed (https://docs.docker.com/engine/install/ubuntu/)
+- Nvidia container toolkit installed (https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-ap)
+
+## Build the container
+
+Build the LocalAI container for Nvidia ARM64 devices using the following command:
+
+```bash
+git clone https://github.com/mudler/LocalAI
+
+cd LocalAI
+
+docker build --build-arg SKIP_DRIVERS=true --build-arg BUILD_TYPE=cublas --build-arg BASE_IMAGE=nvcr.io/nvidia/l4t-jetpack:r36.4.0 --build-arg IMAGE_TYPE=core -t quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core .
+```
+
+Otherwise images are available on quay.io and dockerhub:
+
+```bash
+docker pull quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core
+```
+
+## Usage
+
+Run the LocalAI container on Nvidia ARM64 devices using the following command, where `/data/models` is the directory containing the models:
+
+```bash
+docker run -e DEBUG=true -p 8080:8080 -v /data/models:/build/models  -ti --restart=always --name local-ai --runtime nvidia --gpus all quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core
+```
+
+Note: `/data/models` is the directory containing the models. You can replace it with the directory containing your models.
diff --git a/docs/data/version.json b/docs/data/version.json
index dc128c66..0044f3a2 100644
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.20.1"
+  "version": "v2.25.0"
 }
diff --git a/docs/layouts/partials/docs/top-header.html b/docs/layouts/partials/docs/top-header.html
new file mode 100644
index 00000000..375ff779
--- /dev/null
+++ b/docs/layouts/partials/docs/top-header.html
@@ -0,0 +1,133 @@
+<!-- Top Header -->
+<div id="top-header" class="top-header d-print-none">
+    <div class="header-bar d-flex justify-content-between">
+        <div class="d-flex align-items-center">
+            <a href='{{ with .Site.Params.docs.logoLinkURL }}{{ . }}{{ else }}{{ relLangURL "" }}{{ end }}' class="logo-icon me-3" aria-label="HomePage" alt="HomePage">
+                <div class="small">
+                    {{ with resources.Get "images/logos/mark.svg" }}
+                            {{ .Content | safeHTML }}
+                    {{ end }}
+                </div>
+                <div class="big">
+                    {{ with resources.Get "images/logos/logo.svg" }}
+                            {{ .Content | safeHTML }}
+                    {{ end }}
+                </div>
+            </a>
+            <button id="close-sidebar" class="btn btn-icon btn-soft">
+                <span class="material-icons size-20 menu-icon align-middle">menu</span>
+            </button>
+            {{ if and (.Site.Params.docsearch.appID) (.Site.Params.docsearch.apiKey) -}}
+            <span><div id="docsearch"></div></span>
+            {{ end }}
+            {{ if or (not (isset .Site.Params.flexsearch "enabled")) (eq .Site.Params.flexsearch.enabled true) -}}
+                {{ if and (.Site.Params.docsearch.appID) (.Site.Params.docsearch.apiKey) -}}
+                {{ else }}
+                    <!-- <form class="flexsearch position-relative flex-grow-1 ms-2 me-lg-2 d-none">
+                        <input id="flexsearch" class="form-control is-search" type="search" placeholder="{{ i18n "search_title" }}" aria-label="{{ i18n "search_title" }}" autocomplete="off">
+                        <div id="suggestions" class="shadow bg-white rounded d-none"></div>
+                    </form> -->
+                    <button id="flexsearch-button" class="ms-3 btn btn-soft" data-bs-toggle="collapse" data-bs-target="#FlexSearchCollapse" aria-expanded="false" aria-controls="FlexSearchCollapse">
+                        <span class="material-icons size-20 menu-icon align-middle">search</span>
+                        <span class="flexsearch-button-placeholder ms-1 me-2 d-none d-sm-block">{{ i18n "search_title" }}</span>
+                        <div class="d-none d-sm-block">
+                            <span class="flexsearch-button-keys">
+                                <kbd class="flexsearch-button-cmd-key">
+                                    <svg width="44" height="15"><path d="M2.118,11.5A1.519,1.519,0,0,1,1,11.042,1.583,1.583,0,0,1,1,8.815a1.519,1.519,0,0,1,1.113-.458h.715V6.643H2.118A1.519,1.519,0,0,1,1,6.185,1.519,1.519,0,0,1,.547,5.071,1.519,1.519,0,0,1,1,3.958,1.519,1.519,0,0,1,2.118,3.5a1.519,1.519,0,0,1,1.114.458A1.519,1.519,0,0,1,3.69,5.071v.715H5.4V5.071A1.564,1.564,0,0,1,6.976,3.5,1.564,1.564,0,0,1,8.547,5.071,1.564,1.564,0,0,1,6.976,6.643H6.261V8.357h.715a1.575,1.575,0,0,1,1.113,2.685,1.583,1.583,0,0,1-2.227,0A1.519,1.519,0,0,1,5.4,9.929V9.214H3.69v.715a1.519,1.519,0,0,1-.458,1.113A1.519,1.519,0,0,1,2.118,11.5Zm0-.857a.714.714,0,0,0,.715-.714V9.214H2.118a.715.715,0,1,0,0,1.429Zm4.858,0a.715.715,0,1,0,0-1.429H6.261v.715a.714.714,0,0,0,.715.714ZM3.69,8.357H5.4V6.643H3.69ZM2.118,5.786h.715V5.071a.714.714,0,0,0-.715-.714.715.715,0,0,0-.5,1.22A.686.686,0,0,0,2.118,5.786Zm4.143,0h.715a.715.715,0,0,0,.5-1.22.715.715,0,0,0-1.22.5Z" fill="currentColor"></path><path d="M12.4,11.475H11.344l3.879-7.95h1.056Z" fill="currentColor"></path><path d="M25.073,5.384l-.864.576a2.121,2.121,0,0,0-1.786-.923,2.207,2.207,0,0,0-2.266,2.326,2.206,2.206,0,0,0,2.266,2.325,2.1,2.1,0,0,0,1.782-.918l.84.617a3.108,3.108,0,0,1-2.622,1.293,3.217,3.217,0,0,1-3.349-3.317,3.217,3.217,0,0,1,3.349-3.317A3.046,3.046,0,0,1,25.073,5.384Z" fill="currentColor"></path><path d="M30.993,5.142h-2.07v5.419H27.891V5.142h-2.07V4.164h5.172Z" fill="currentColor"></path><path d="M34.67,4.164c1.471,0,2.266.658,2.266,1.851,0,1.087-.832,1.809-2.134,1.855l2.107,2.691h-1.28L33.591,7.87H33.07v2.691H32.038v-6.4Zm-1.6.969v1.8h1.572c.832,0,1.22-.3,1.22-.918s-.411-.882-1.22-.882Z" fill="currentColor"></path><path d="M42.883,10.561H38.31v-6.4h1.033V9.583h3.54Z" fill="currentColor"></path></svg>
+                                </kbd>
+                                <kbd class="flexsearch-button-key">
+                                    <svg width="15" height="15"><path d="M5.926,12.279H4.41L9.073,2.721H10.59Z" fill="currentColor"/></svg>
+                                </kbd>
+                            </span>
+                        </div>
+                    </button>
+                {{ end }}
+            {{ end -}}
+        </div>
+        <div class="d-flex align-items-center m-1">
+            <h5>Star us on GitHub !&nbsp;</h5>
+            <script async defer src="https://buttons.github.io/buttons.js"></script>
+            <a class="github-button" href="https://github.com/mudler/LocalAI" data-color-scheme="no-preference: light; light: light; dark: dark;" data-icon="octicon-star" data-size="large" data-show-count="true" aria-label="Star mudler/LocalAI on GitHub">Star</a> 
+        </div>
+        <div class="d-flex align-items-center">
+            <ul class="list-unstyled mb-0">
+                {{ with $.Scratch.Get "social_list" }}
+                {{ range . }}
+                    {{ $path := printf "images/social/%s.%s" . "svg" }}
+                    <li class="list-inline-item mb-0">
+                        <a href="{{ if eq . `rss` }} {{ `index.xml` | absURL }} {{ else }} https://{{ . }}.com/{{ index site.Params.social . }} {{ end }}" alt="{{ . }}" rel="noopener noreferrer" target="_blank">
+                            <div class="btn btn-icon btn-default border-0">
+                                {{ with resources.Get $path }}
+                                    {{ .Content | safeHTML }}
+                                {{ end }}
+                            </div>
+                        </a>
+                    </li>
+                    {{ end }}
+                {{ end }}
+            </ul>
+            {{ if eq .Site.Params.docs.darkMode true -}}
+            <button id="mode" class="btn btn-icon btn-default ms-2" type="button" aria-label="Toggle user interface mode">
+                <span class="toggle-dark">
+                    <svg xmlns="http://www.w3.org/2000/svg" height="30" width="30" viewBox="0 0 48 48" fill="currentColor">
+                        <title>{{ i18n "enable_dark_mode" | default "Enable dark mode" }}</title>
+                        <path d="M24 42q-7.5 0-12.75-5.25T6 24q0-7.5 5.25-12.75T24 6q.4 0 .85.025.45.025 1.15.075-1.8 1.6-2.8 3.95-1 2.35-1 4.95 0 4.5 3.15 7.65Q28.5 25.8 33 25.8q2.6 0 4.95-.925T41.9 22.3q.05.6.075.975Q42 23.65 42 24q0 7.5-5.25 12.75T24 42Zm0-3q5.45 0 9.5-3.375t5.05-7.925q-1.25.55-2.675.825Q34.45 28.8 33 28.8q-5.75 0-9.775-4.025T19.2 15q0-1.2.25-2.575.25-1.375.9-3.125-4.9 1.35-8.125 5.475Q9 18.9 9 24q0 6.25 4.375 10.625T24 39Zm-.2-14.85Z"/>
+                    </svg>
+                </span>
+                <span class="toggle-light">
+                    <svg xmlns="http://www.w3.org/2000/svg" height="30" width="30" viewBox="0 0 48 48" fill="currentColor">
+                        <title>{{ i18n "enable_light_mode" | default "Enable light mode" }}</title>
+                        <path d="M24 31q2.9 0 4.95-2.05Q31 26.9 31 24q0-2.9-2.05-4.95Q26.9 17 24 17q-2.9 0-4.95 2.05Q17 21.1 17 24q0 2.9 2.05 4.95Q21.1 31 24 31Zm0 3q-4.15 0-7.075-2.925T14 24q0-4.15 2.925-7.075T24 14q4.15 0 7.075 2.925T34 24q0 4.15-2.925 7.075T24 34ZM3.5 25.5q-.65 0-1.075-.425Q2 24.65 2 24q0-.65.425-1.075Q2.85 22.5 3.5 22.5h5q.65 0 1.075.425Q10 23.35 10 24q0 .65-.425 1.075-.425.425-1.075.425Zm36 0q-.65 0-1.075-.425Q38 24.65 38 24q0-.65.425-1.075.425-.425 1.075-.425h5q.65 0 1.075.425Q46 23.35 46 24q0 .65-.425 1.075-.425.425-1.075.425ZM24 10q-.65 0-1.075-.425Q22.5 9.15 22.5 8.5v-5q0-.65.425-1.075Q23.35 2 24 2q.65 0 1.075.425.425.425.425 1.075v5q0 .65-.425 1.075Q24.65 10 24 10Zm0 36q-.65 0-1.075-.425-.425-.425-.425-1.075v-5q0-.65.425-1.075Q23.35 38 24 38q.65 0 1.075.425.425.425.425 1.075v5q0 .65-.425 1.075Q24.65 46 24 46ZM12 14.1l-2.85-2.8q-.45-.45-.425-1.075.025-.625.425-1.075.45-.45 1.075-.45t1.075.45L14.1 12q.4.45.4 1.05 0 .6-.4 1-.4.45-1.025.45-.625 0-1.075-.4Zm24.7 24.75L33.9 36q-.4-.45-.4-1.075t.45-1.025q.4-.45 1-.45t1.05.45l2.85 2.8q.45.45.425 1.075-.025.625-.425 1.075-.45.45-1.075.45t-1.075-.45ZM33.9 14.1q-.45-.45-.45-1.05 0-.6.45-1.05l2.8-2.85q.45-.45 1.075-.425.625.025 1.075.425.45.45.45 1.075t-.45 1.075L36 14.1q-.4.4-1.025.4-.625 0-1.075-.4ZM9.15 38.85q-.45-.45-.45-1.075t.45-1.075L12 33.9q.45-.45 1.05-.45.6 0 1.05.45.45.45.45 1.05 0 .6-.45 1.05l-2.8 2.85q-.45.45-1.075.425-.625-.025-1.075-.425ZM24 24Z"/>
+                    </svg>
+                </span>
+            </button>
+            {{ end -}}
+            {{ if .Site.IsMultiLingual }}
+                <div class="dropdown">
+                    <button class="btn btn-link btn-default dropdown-toggle ps-2" type="button" data-bs-toggle="dropdown" aria-expanded="false">
+                        {{ site.Language.Lang | upper }}
+                    </button>
+                    <ul class="dropdown-menu text-end">
+                        {{ partial (printf "%s/%s" ($.Scratch.Get "pathName") "i18nlist") . }}
+                    </ul>
+                </div>
+            {{ end }}
+        </div>
+    </div>
+    <!-- FlexSearch Input Start -->
+    {{ if or (not (isset .Site.Params.flexsearch "enabled")) (eq .Site.Params.flexsearch.enabled true) -}}
+        {{ if and (.Site.Params.docsearch.appID) (.Site.Params.docsearch.apiKey) -}}
+        {{ else }}
+            <div class="collapse" id="FlexSearchCollapse">
+                <div class="flexsearch-container">
+                    <div class="flexsearch-keymap">
+                        <li>
+                            <kbd class="flexsearch-button-cmd-key"><svg width="15" height="15" aria-label="Arrow down" role="img"><g fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.2"><path d="M7.5 3.5v8M10.5 8.5l-3 3-3-3"></path></g></svg></kbd>
+                            <kbd class="flexsearch-button-cmd-key"><svg width="15" height="15" aria-label="Arrow up" role="img"><g fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.2"><path d="M7.5 11.5v-8M10.5 6.5l-3-3-3 3"></path></g></svg></kbd>
+                            <span class="flexsearch-key-label">{{ i18n "search_navigate" | default "to navigate" }}</span>
+                        </li>
+                        <li>
+                            <kbd class="flexsearch-button-cmd-key"><svg width="15" height="15" aria-label="Enter key" role="img"><g fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.2"><path d="M12 3.53088v3c0 1-1 2-2 2H4M7 11.53088l-3-3 3-3"></path></g></svg></kbd>
+                            <span class="flexsearch-key-label">{{ i18n "search_select" | default "to select" }}</span>
+                        </li>
+                        <li>
+                            <kbd class="flexsearch-button-cmd-key"><svg width="15" height="15" aria-label="Escape key" role="img"><g fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.2"><path d="M13.6167 8.936c-.1065.3583-.6883.962-1.4875.962-.7993 0-1.653-.9165-1.653-2.1258v-.5678c0-1.2548.7896-2.1016 1.653-2.1016.8634 0 1.3601.4778 1.4875 1.0724M9 6c-.1352-.4735-.7506-.9219-1.46-.8972-.7092.0246-1.344.57-1.344 1.2166s.4198.8812 1.3445.9805C8.465 7.3992 8.968 7.9337 9 8.5c.032.5663-.454 1.398-1.4595 1.398C6.6593 9.898 6 9 5.963 8.4851m-1.4748.5368c-.2635.5941-.8099.876-1.5443.876s-1.7073-.6248-1.7073-2.204v-.4603c0-1.0416.721-2.131 1.7073-2.131.9864 0 1.6425 1.031 1.5443 2.2492h-2.956"></path></g></svg></kbd>
+                            <span class="flexsearch-key-label">{{ i18n "search_close" | default "to close" }}</span>
+                        </li>
+                    </div>
+                    <form class="flexsearch position-relative flex-grow-1 ms-2 me-2">
+                        <div class="d-flex flex-row">
+                            <input id="flexsearch" class="form-control" type="search" placeholder="{{ i18n "search_title" }}" aria-label="{{ i18n "search_title" }}" autocomplete="off">
+                            <button id="hideFlexsearch" type="button" class="ms-2 btn btn-soft">
+                                {{ i18n "search_cancel" | default "cancel" }}
+                            </button>
+                        </div>
+                        <div id="suggestions" class="shadow rounded-1 d-none"></div>
+                    </form>
+                </div>
+            </div>
+        {{ end }}
+    {{ end }}
+    <!-- FlexSearch Input End -->
+</div>
+<!-- Top Header -->
\ No newline at end of file
diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn
index d5a0ee04..66bc366c 160000
--- a/docs/themes/hugo-theme-relearn
+++ b/docs/themes/hugo-theme-relearn
@@ -1 +1 @@
-Subproject commit d5a0ee04ad986394d6d2f1e1a57f2334d24bf317
+Subproject commit 66bc366c4727a958f3873f409550daa36932c03f
diff --git a/embedded/embedded.go b/embedded/embedded.go
deleted file mode 100644
index 672c32ed..00000000
--- a/embedded/embedded.go
+++ /dev/null
@@ -1,72 +0,0 @@
-package embedded
-
-import (
-	"embed"
-	"fmt"
-	"slices"
-	"strings"
-
-	"github.com/mudler/LocalAI/pkg/downloader"
-	"github.com/rs/zerolog/log"
-
-	"github.com/mudler/LocalAI/pkg/assets"
-	"gopkg.in/yaml.v3"
-)
-
-var modelShorteners map[string]string
-
-//go:embed model_library.yaml
-var modelLibrary []byte
-
-//go:embed models/*
-var embeddedModels embed.FS
-
-func ModelShortURL(s string) string {
-	if _, ok := modelShorteners[s]; ok {
-		s = modelShorteners[s]
-	}
-
-	return s
-}
-
-func init() {
-	err := yaml.Unmarshal(modelLibrary, &modelShorteners)
-	if err != nil {
-		log.Error().Err(err).Msg("error while unmarshalling embedded modelLibrary")
-	}
-}
-
-func GetRemoteLibraryShorteners(url string, basePath string) (map[string]string, error) {
-	remoteLibrary := map[string]string{}
-	uri := downloader.URI(url)
-	err := uri.DownloadAndUnmarshal(basePath, func(_ string, i []byte) error {
-		return yaml.Unmarshal(i, &remoteLibrary)
-	})
-	if err != nil {
-		return nil, fmt.Errorf("error downloading remote library: %s", err.Error())
-	}
-
-	return remoteLibrary, err
-}
-
-// ExistsInModelsLibrary checks if a model exists in the embedded models library
-func ExistsInModelsLibrary(s string) bool {
-	f := fmt.Sprintf("%s.yaml", s)
-
-	a := []string{}
-
-	for _, j := range assets.ListFiles(embeddedModels) {
-		a = append(a, strings.TrimPrefix(j, "models/"))
-	}
-
-	return slices.Contains(a, f)
-}
-
-// ResolveContent returns the content in the embedded model library
-func ResolveContent(s string) ([]byte, error) {
-	if ExistsInModelsLibrary(s) {
-		return embeddedModels.ReadFile(fmt.Sprintf("models/%s.yaml", s))
-	}
-
-	return nil, fmt.Errorf("cannot find model %s", s)
-}
diff --git a/embedded/model_library.yaml b/embedded/model_library.yaml
deleted file mode 100644
index 68e96655..00000000
--- a/embedded/model_library.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-### 
-###
-### This file contains the list of models that are available in the library
-### The URLs are automatically expanded when local-ai is being called with the key as argument
-###
-### For models with an entire YAML file to be embededd, put the file inside the `models`
-### directory, it will be automatically available with the file name as key (without the .yaml extension)
-
-phi-2:  "github://mudler/LocalAI/examples/configurations/phi-2.yaml@master"
\ No newline at end of file
diff --git a/embedded/models/all-minilm-l6-v2.yaml b/embedded/models/all-minilm-l6-v2.yaml
deleted file mode 100644
index 512d63a4..00000000
--- a/embedded/models/all-minilm-l6-v2.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-name: all-minilm-l6-v2
-backend: sentencetransformers
-embeddings: true
-parameters:
-  model: all-MiniLM-L6-v2
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
-      "input": "Your text string goes here",
-      "model": "all-minilm-l6-v2"
-    }'
\ No newline at end of file
diff --git a/embedded/models/animagine-xl.yaml b/embedded/models/animagine-xl.yaml
deleted file mode 100644
index d492c080..00000000
--- a/embedded/models/animagine-xl.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-name: animagine-xl
-parameters:
-  model: Linaqruf/animagine-xl
-backend: diffusers
-f16: true
-diffusers:
-  scheduler_type: euler_a
-
-usage: |
-        curl http://localhost:8080/v1/images/generations \
-          -H "Content-Type: application/json" \
-          -d '{
-            "prompt": "<positive prompt>|<negative prompt>",
-            "model": "animagine-xl",
-            "step": 51,
-            "size": "1024x1024"
-          }'
\ No newline at end of file
diff --git a/embedded/models/bakllava.yaml b/embedded/models/bakllava.yaml
deleted file mode 100644
index 52fd9466..00000000
--- a/embedded/models/bakllava.yaml
+++ /dev/null
@@ -1,40 +0,0 @@
-backend: llama-cpp
-context_size: 4096
-f16: true
-
-gpu_layers: 90
-mmap: true
-name: bakllava
-
-roles:
-  user: "USER:"
-  assistant: "ASSISTANT:"
-  system: "SYSTEM:"
-
-mmproj: bakllava-mmproj.gguf
-parameters:
-  model: bakllava.gguf
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-  seed: -1
-mirostat: 2
-mirostat_eta: 1.0
-mirostat_tau: 1.0
-
-template:
-  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
-    {{.Input}}
-    ASSISTANT:
-
-download_files:
-- filename: bakllava.gguf
-  uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
-- filename: bakllava-mmproj.gguf
-  uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
-
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "bakllava",
-        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/embedded/models/bark.yaml b/embedded/models/bark.yaml
deleted file mode 100644
index da1b1db4..00000000
--- a/embedded/models/bark.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-usage: |
-    bark works without any configuration, to test it, you can run the following curl command:
-
-    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{         
-     "backend": "bark",
-     "input":"Hello, this is a test!"
-    }' | aplay
-# TODO: This is a placeholder until we manage to pre-load HF/Transformers models
\ No newline at end of file
diff --git a/embedded/models/bert-cpp.yaml b/embedded/models/bert-cpp.yaml
deleted file mode 100644
index 63d3c7b6..00000000
--- a/embedded/models/bert-cpp.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-backend: bert-embeddings
-embeddings: true
-f16: true
-
-gpu_layers: 90
-mmap: true
-name: bert-cpp-minilm-v6
-
-parameters:
-  model: bert-MiniLM-L6-v2q4_0.bin
-
-download_files:
-- filename: "bert-MiniLM-L6-v2q4_0.bin"
-  sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad"
-  uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin"
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
-      "input": "Your text string goes here",
-      "model": "bert-cpp-minilm-v6"
-    }'
\ No newline at end of file
diff --git a/embedded/models/cerbero.yaml b/embedded/models/cerbero.yaml
deleted file mode 100644
index 8ace4e35..00000000
--- a/embedded/models/cerbero.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-backend: llama
-context_size: 8192
-f16: false
-gpu_layers: 90
-name: cerbero
-mmap: false
-parameters:
-  model: huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q8_0.gguf
-  top_k: 80
-  temperature: 0.2
-  top_p: 0.7
-template:
-  completion: "{{.Input}}"
-  chat: "Questa è una conversazione tra un umano ed un assistente AI.\n{{.Input}}\n[|Assistente|]  "
-roles:
-  user: "[|Umano|] "
-  system: "[|Umano|] "
-  assistant: "[|Assistente|] "
-
-stopwords:
-- "[|Umano|]"
-
-trimsuffix: 
-- "\n"
\ No newline at end of file
diff --git a/embedded/models/codellama-7b-gguf.yaml b/embedded/models/codellama-7b-gguf.yaml
deleted file mode 100644
index 413c838b..00000000
--- a/embedded/models/codellama-7b-gguf.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-name: codellama-7b-gguf
-backend: transformers
-parameters:
-  model: huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_K_M.gguf
-  temperature: 0.5
-  top_k: 40
-  seed: -1
-  top_p: 0.95
-mirostat: 2
-mirostat_eta: 1.0
-mirostat_tau: 1.0
-
-context_size: 4096
-f16: true
-gpu_layers: 90
-usage: |
-      curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
-          "model": "codellama-7b-gguf",
-          "prompt": "import socket\n\ndef ping_exponential_backoff(host: str):"
-      }'
\ No newline at end of file
diff --git a/embedded/models/codellama-7b.yaml b/embedded/models/codellama-7b.yaml
deleted file mode 100644
index d9b5c62c..00000000
--- a/embedded/models/codellama-7b.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-name: codellama-7b
-backend: transformers
-type: AutoModelForCausalLM
-parameters:
-  model: codellama/CodeLlama-7b-hf
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-
-usage: |
-      curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
-          "model": "codellama-7b",
-          "prompt": "import socket\n\ndef ping_exponential_backoff(host: str):"
-      }'
diff --git a/embedded/models/coqui.yaml b/embedded/models/coqui.yaml
deleted file mode 100644
index 5d67f241..00000000
--- a/embedded/models/coqui.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-usage: |
-    coqui works without any configuration, to test it, you can run the following curl command:
-
-    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{         
-        "backend": "coqui",
-        "model": "tts_models/en/ljspeech/glow-tts",
-        "input":"Hello, this is a test!"
-        }'
-# TODO: This is a placeholder until we manage to pre-load HF/Transformers models
\ No newline at end of file
diff --git a/embedded/models/dolphin-2.5-mixtral-8x7b.yaml b/embedded/models/dolphin-2.5-mixtral-8x7b.yaml
deleted file mode 100644
index 12ee1efc..00000000
--- a/embedded/models/dolphin-2.5-mixtral-8x7b.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-name: dolphin-mixtral-8x7b
-mmap: true
-parameters:
-  model: huggingface://TheBloke/dolphin-2.5-mixtral-8x7b-GGUF/dolphin-2.5-mixtral-8x7b.Q2_K.gguf
-  temperature: 0.5
-  top_k: 40
-  top_p: 0.95
-  seed: -1
-mirostat: 2
-mirostat_eta: 1.0
-mirostat_tau: 1.0
-template:
-  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
-    {{if .Content}}{{.Content}}{{end}}<|im_end|>
-  chat: |
-    {{.Input}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 4096
-f16: true
-stopwords:
-- <|im_end|>
-gpu_layers: 90
-
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "dolphin-mixtral-8x7b",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
\ No newline at end of file
diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml
deleted file mode 100644
index 74d98eeb..00000000
--- a/embedded/models/hermes-2-pro-mistral.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-name: hermes-2-pro-mistral
-mmap: true
-parameters:
-  model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
-
-template:
-  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
-    {{- if .FunctionCall }}
-    <tool_call>
-    {{- else if eq .RoleName "tool" }}
-    <tool_response>
-    {{- end }}
-    {{- if .Content}}
-    {{.Content }}
-    {{- end }}
-    {{- if .FunctionCall}}
-    {{toJson .FunctionCall}}
-    {{- end }}
-    {{- if .FunctionCall }}
-    </tool_call>
-    {{- else if eq .RoleName "tool" }}
-    </tool_response>
-    {{- end }}<|im_end|>
-  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
-  function: |
-    <|im_start|>system
-    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
-    <tools>
-    {{range .Functions}}
-    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
-    {{end}}
-    </tools>
-    Use the following pydantic model json schema for each tool call you will make:
-    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
-    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
-    <tool_call>
-    {'arguments': <args-dict>, 'name': <function-name>}
-    </tool_call><|im_end|>
-    {{.Input -}}
-    <|im_start|>assistant
-    <tool_call>
-  chat: |
-    {{.Input -}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 4096
-f16: true
-stopwords:
-- <|im_end|>
-- <dummy32000>
-- "\n</tool_call>"
-- "\n\n\n"
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "hermes-2-pro-mistral",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
diff --git a/embedded/models/llama3-instruct.yaml b/embedded/models/llama3-instruct.yaml
deleted file mode 100644
index d483d2b2..00000000
--- a/embedded/models/llama3-instruct.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-name: llama3-8b-instruct
-mmap: true
-parameters:
-  model: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf
-
-template:
-  chat_message: |
-    <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
-
-    {{ if .FunctionCall -}}
-    Function call:
-    {{ else if eq .RoleName "tool" -}}
-    Function response:
-    {{ end -}}
-    {{ if .Content -}}
-    {{.Content -}}
-    {{ else if .FunctionCall -}}
-    {{ toJson .FunctionCall -}}
-    {{ end -}}
-    <|eot_id|>
-  function: |
-    <|start_header_id|>system<|end_header_id|>
-
-    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
-    <tools>
-    {{range .Functions}}
-    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
-    {{end}}
-    </tools>
-    Use the following pydantic model json schema for each tool call you will make:
-    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-    Function call:
-  chat: |
-    <|begin_of_text|>{{.Input }}
-    <|start_header_id|>assistant<|end_header_id|>
-  completion: |
-    {{.Input}}
-context_size: 8192
-f16: true
-stopwords:
-- <|im_end|>
-- <dummy32000>
-- "<|eot_id|>"
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "llama3-8b-instruct",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
diff --git a/embedded/models/llava-1.5.yaml b/embedded/models/llava-1.5.yaml
deleted file mode 100644
index 3db48524..00000000
--- a/embedded/models/llava-1.5.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-backend: llama-cpp
-context_size: 4096
-f16: true
-
-gpu_layers: 90
-mmap: true
-name: llava-1.5
-
-roles:
-  user: "USER:"
-  assistant: "ASSISTANT:"
-  system: "SYSTEM:"
-
-mmproj: llava-v1.5-7b-mmproj-Q8_0.gguf
-parameters:
-  model: llava-v1.5-7b-Q4_K.gguf
-
-template:
-  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
-    {{.Input}}
-    ASSISTANT:
-
-download_files:
-- filename: llava-v1.5-7b-Q4_K.gguf
-  uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-Q4_K.gguf
-- filename: llava-v1.5-7b-mmproj-Q8_0.gguf
-  uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf
-
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "llava-1.5",
-        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/embedded/models/llava-1.6-mistral.yaml b/embedded/models/llava-1.6-mistral.yaml
deleted file mode 100644
index 602ceb62..00000000
--- a/embedded/models/llava-1.6-mistral.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-backend: llama-cpp
-context_size: 4096
-f16: true
-
-gpu_layers: 90
-mmap: true
-name: llava-1.6-mistral
-
-roles:
-  user: "USER:"
-  assistant: "ASSISTANT:"
-  system: "SYSTEM:"
-
-mmproj: llava-v1.6-7b-mmproj-f16.gguf
-parameters:
-  model: llava-v1.6-mistral-7b.gguf
-
-template:
-  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
-    {{.Input}}
-    ASSISTANT:
-
-download_files:
-- filename: llava-v1.6-mistral-7b.gguf
-  uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q6_K.gguf
-- filename: llava-v1.6-7b-mmproj-f16.gguf
-  uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
-
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "llava-1.6-mistral",
-        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/embedded/models/llava-1.6-vicuna.yaml b/embedded/models/llava-1.6-vicuna.yaml
deleted file mode 100644
index cea33e7f..00000000
--- a/embedded/models/llava-1.6-vicuna.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-backend: llama-cpp
-context_size: 4096
-f16: true
-
-gpu_layers: 90
-mmap: true
-name: llava-1.6-vicuna
-
-roles:
-  user: "USER:"
-  assistant: "ASSISTANT:"
-  system: "SYSTEM:"
-
-mmproj: mmproj-vicuna7b-f16.gguf
-parameters:
-  model: vicuna-7b-q5_k.gguf
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-  seed: -1
-
-template:
-  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
-    {{.Input}}
-    ASSISTANT:
-
-download_files:
-- filename: vicuna-7b-q5_k.gguf
-  uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf
-- filename: mmproj-vicuna7b-f16.gguf
-  uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf
-
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "llava-1.6-vicuna",
-        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/embedded/models/llava.yaml b/embedded/models/llava.yaml
deleted file mode 100644
index 3d240681..00000000
--- a/embedded/models/llava.yaml
+++ /dev/null
@@ -1,40 +0,0 @@
-backend: llama-cpp
-context_size: 4096
-f16: true
-
-gpu_layers: 90
-mmap: true
-name: llava
-
-roles:
-  user: "USER:"
-  assistant: "ASSISTANT:"
-  system: "SYSTEM:"
-
-mmproj: bakllava-mmproj.gguf
-parameters:
-  model: bakllava.gguf
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-  seed: -1
-mirostat: 2
-mirostat_eta: 1.0
-mirostat_tau: 1.0
-
-template:
-  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
-    {{.Input}}
-    ASSISTANT:
-
-download_files:
-- filename: bakllava.gguf
-  uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
-- filename: bakllava-mmproj.gguf
-  uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
-
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "llava",
-        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/embedded/models/mamba-bagel.yaml b/embedded/models/mamba-bagel.yaml
deleted file mode 100644
index aed7142f..00000000
--- a/embedded/models/mamba-bagel.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-name: bagel
-backend: mamba
-parameters:
-  model: "jondurbin/bagel-dpo-2.8b-v0.2"
-systemPrompt: "You are a helpful, unbiased, uncensored assistant."
-template:
-  chat_message: |
-    {{if eq .RoleName "assistant"}}{{.Content}}{{else}}
-    [INST]
-    {{if .SystemPrompt}}{{.SystemPrompt}}{{else if eq .RoleName "system"}}<<SYS>>{{.Content}}<</SYS>>
-
-    {{else if .Content}}{{.Content}}{{end}}
-    [/INST]
-    {{end}}
-  completion: |
-    {{.Input}}
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-      "model": "bagel",
-      "messages": [{"role": "user", "content": "how are you doing"}],
-    }'
diff --git a/embedded/models/mamba-chat.yaml b/embedded/models/mamba-chat.yaml
deleted file mode 100644
index b0d7fc62..00000000
--- a/embedded/models/mamba-chat.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-name: mamba-chat
-backend: mamba
-parameters:
-  model: "havenhq/mamba-chat"
-
-trimsuffix: 
-- <|endoftext|>
-
-# https://huggingface.co/HuggingFaceH4/zephyr-7b-beta/blob/main/tokenizer_config.json
-#   "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
-template:
-  chat_message: |
-    {{if eq .RoleName "assistant"}}<|assistant|>{{else if eq .RoleName "system"}}<|system|>{{else if eq .RoleName "user"}}<|user|>{{end}}
-    {{if .Content}}{{.Content}}{{end}}
-    </s>
-    
-  chat: |
-    {{.Input}}
-    <|assistant|>
-    
-  completion: |
-    {{.Input}}
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-      "model": "mamba-chat",
-      "messages": [{"role": "user", "content": "how are you doing"}],
-      "temperature": 0.7
-    }'
\ No newline at end of file
diff --git a/embedded/models/mistral-openorca.yaml b/embedded/models/mistral-openorca.yaml
deleted file mode 100644
index 0794a69b..00000000
--- a/embedded/models/mistral-openorca.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-name: mistral-openorca
-mmap: true
-parameters:
-  model: huggingface://TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q6_K.gguf
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-  seed: -1
-mirostat: 2
-mirostat_eta: 1.0
-mirostat_tau: 1.0
-
-template:
-  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
-    {{if .Content}}{{.Content}}{{end}}
-    <|im_end|>
-  chat: |
-    {{.Input}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 4096
-f16: true
-stopwords:
-- <|im_end|>
-- <dummy32000>
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "mistral-openorca",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
diff --git a/embedded/models/mixtral-instruct.yaml b/embedded/models/mixtral-instruct.yaml
deleted file mode 100644
index 246b2324..00000000
--- a/embedded/models/mixtral-instruct.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-name: mixtral-instruct
-mmap: true
-parameters:
-  model: huggingface://TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/mixtral-8x7b-instruct-v0.1.Q2_K.gguf
-  temperature: 0.2
-  top_k: 40
-  seed: -1
-  top_p: 0.95
-mirostat: 2
-mirostat_eta: 1.0
-mirostat_tau: 1.0
-
-template:
-  chat: &chat |
-    [INST] {{.Input}} [/INST]    
-  completion: *chat
-context_size: 4096
-f16: true
-gpu_layers: 90
-
-usage: |
-      curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
-          "model": "mixtral-instruct",
-          "prompt": "How are you doing?"
-      }'
\ No newline at end of file
diff --git a/embedded/models/phi-2-chat.yaml b/embedded/models/phi-2-chat.yaml
deleted file mode 100644
index 4a3ca7aa..00000000
--- a/embedded/models/phi-2-chat.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-name: phi-2-chat
-mmap: true
-parameters:
-  model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
-
-template:
-  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
-    {{if .Content}}{{.Content}}{{end}}
-    <|im_end|>
-  chat: |
-    {{.Input}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 4096
-f16: true
-stopwords:
-- <|im_end|>
-- <dummy32000>
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "phi-2-chat",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
diff --git a/embedded/models/phi-2-orange.yaml b/embedded/models/phi-2-orange.yaml
deleted file mode 100644
index 838909c9..00000000
--- a/embedded/models/phi-2-orange.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
-name: phi-2-orange
-mmap: true
-parameters:
-  model: huggingface://l3utterfly/phi-2-orange-GGUF/phi-2-orange.Q6_K.gguf
-
-template:
-  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
-    {{if .Content}}{{.Content}}{{end}}
-    <|im_end|>
-  chat: |
-    {{.Input}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 4096
-f16: true
-stopwords:
-- <|im_end|>
-- <dummy32000>
-
-description: |
-  This model is a chatbot that can be used for general conversation.
-  [Model card](https://huggingface.co/TheBloke/phi-2-orange-GGUF)
-
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "phi-2-orange",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
diff --git a/embedded/models/rhasspy-voice-en-us-amy.yaml b/embedded/models/rhasspy-voice-en-us-amy.yaml
deleted file mode 100644
index 911293ca..00000000
--- a/embedded/models/rhasspy-voice-en-us-amy.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-name: voice-en-us-amy-low
-download_files:
-  - filename: voice-en-us-amy-low.tar.gz
-    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
-
-
-usage: |
-    To test if this model works as expected, you can use the following curl command:
-
-    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
-      "model":"en-us-amy-low.onnx",
-      "input": "Hi, this is a test."
-    }'
\ No newline at end of file
diff --git a/embedded/models/tinyllama-chat.yaml b/embedded/models/tinyllama-chat.yaml
deleted file mode 100644
index 48c44f9f..00000000
--- a/embedded/models/tinyllama-chat.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-name: tinyllama-chat
-mmap: true
-parameters:
-  model: huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q8_0.gguf
-  temperature: 0.2
-  top_k: 40
-  seed: -1
-  top_p: 0.95
-template:
-  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
-    {{if .Content}}{{.Content}}{{end}}<|im_end|>
-  chat: |
-    {{.Input}}
-    <|im_start|>assistant
-    
-  completion: |
-    {{.Input}}
-context_size: 4096
-f16: true
-stopwords:
-- <|im_end|>
-gpu_layers: 90
-
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "tinyllama-chat",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
\ No newline at end of file
diff --git a/embedded/models/transformers-tinyllama.yaml b/embedded/models/transformers-tinyllama.yaml
deleted file mode 100644
index ee6e7889..00000000
--- a/embedded/models/transformers-tinyllama.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-name: tinyllama-chat
-backend: transformers
-type: AutoModelForCausalLM
-
-parameters:
-  model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-  max_tokens: 4096
-
-template:
-  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
-    {{if .Content}}{{.Content}}{{end}}<|im_end|>
-  chat: |
-    {{.Input}}
-    <|im_start|>assistant
-    
-  completion: |
-    {{.Input}}
-
-stopwords:
-- <|im_end|>
-
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "tinyllama-chat",
-        "messages": [{"role": "user", "content": "Say this is a test!"}],
-        "temperature": 0.7
-      }'
diff --git a/embedded/models/vall-e-x.yaml b/embedded/models/vall-e-x.yaml
deleted file mode 100644
index b97015f6..00000000
--- a/embedded/models/vall-e-x.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-usage: |
-    Vall-e-x works without any configuration, to test it, you can run the following curl command:
-
-    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{         
-     "backend": "vall-e-x",
-     "input":"Hello, this is a test!"
-    }' | aplay
-# TODO: This is a placeholder until we manage to pre-load HF/Transformers models
\ No newline at end of file
diff --git a/embedded/models/whisper-base.yaml b/embedded/models/whisper-base.yaml
deleted file mode 100644
index f7ebd217..00000000
--- a/embedded/models/whisper-base.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-name: whisper
-backend: whisper
-parameters:
-  model: ggml-whisper-base.bin
-
-usage: |
-    ## example audio file
-    wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
-
-    ## Send the example audio file to the transcriptions endpoint
-    curl http://localhost:8080/v1/audio/transcriptions \
-         -H "Content-Type: multipart/form-data" \
-         -F file="@$PWD/gb1.ogg" -F model="whisper"
-
-download_files:
-- filename: "ggml-whisper-base.bin"
-  sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
-  uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
\ No newline at end of file
diff --git a/examples/README.md b/examples/README.md
index debfe1a5..e97cd1af 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,190 +1,3 @@
-# Examples
+# LocalAI Examples
 
-| [ChatGPT OSS alternative](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui)                                                                                                                | [Image generation](https://localai.io/api-endpoints/index.html#image-generation)                                                                                                              |
-|------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
-|  ![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)            | ![b6441997879](https://github.com/go-skynet/LocalAI/assets/2420543/d50af51c-51b7-4f39-b6c2-bf04c403894c)                  |
-
-|                                                                    [Telegram bot](https://github.com/go-skynet/LocalAI/tree/master/examples/telegram-bot)   | [Flowise](https://github.com/go-skynet/LocalAI/tree/master/examples/flowise)                                                                                                                     |
-|------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
-![Screenshot from 2023-06-09 00-36-26](https://github.com/go-skynet/LocalAI/assets/2420543/e98b4305-fa2d-41cf-9d2f-1bb2d75ca902)   |  ![Screenshot from 2023-05-30 18-01-03](https://github.com/go-skynet/LocalAI/assets/2420543/02458782-0549-4131-971c-95ee56ec1af8)|    |
-
-Here is a list of projects that can easily be integrated with the LocalAI backend. 
-
-
-### Projects
-
-### AutoGPT
-
-_by [@mudler](https://github.com/mudler)_
-
-This example shows how to use AutoGPT with LocalAI.
-
-[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/autoGPT/)
-
-### Chatbot-UI
-
-_by [@mkellerman](https://github.com/mkellerman)_
-
-![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
-
-This integration shows how to use LocalAI with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui).
-
-[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/)
-
-There is also a separate example to show how to manually setup a model: [example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui-manual/)
-
-### K8sGPT
-
-_by [@mudler](https://github.com/mudler)_
-
-This example show how to use LocalAI inside Kubernetes with [k8sgpt](https://k8sgpt.ai).
-
-![Screenshot from 2023-06-19 23-58-47](https://github.com/go-skynet/go-ggml-transformers.cpp/assets/2420543/cab87409-ee68-44ae-8d53-41627fb49509)
-
-### Fine-tuning a model and convert it to gguf to use it with LocalAI
-
-_by [@mudler](https://github.com/mudler)_
-
-This example is an e2e example on how to fine-tune a model with [axolotl](https://github.com/OpenAccess-AI-Collective/axolotl) and convert it to gguf to use it with LocalAI.
-
-[Check it out here](https://github.com/mudler/LocalAI/tree/master/examples/e2e-fine-tuning/)
-
-### Flowise
-
-_by [@mudler](https://github.com/mudler)_
-
-This example shows how to use [FlowiseAI/Flowise](https://github.com/FlowiseAI/Flowise) with LocalAI.
-
-[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/flowise/)
-
-### Discord bot
-
-_by [@mudler](https://github.com/mudler)_
-
-Run a discord bot which lets you talk directly with a model
-
-[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/discord-bot/), or for a live demo you can talk with our bot in #random-bot in our discord server.
-
-### Langchain
-
-_by [@dave-gray101](https://github.com/dave-gray101)_
-
-A ready to use example to show e2e how to integrate LocalAI with langchain
-
-[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain/)
-
-### Langchain Python
-
-_by [@mudler](https://github.com/mudler)_
-
-A ready to use example to show e2e how to integrate LocalAI with langchain
-
-[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-python/)
-
-### LocalAI functions
-
-_by [@mudler](https://github.com/mudler)_
-
-A ready to use example to show how to use OpenAI functions with LocalAI
-
-[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/functions/)
-
-### LocalAI WebUI
-
-_by [@dhruvgera](https://github.com/dhruvgera)_
-
-![image](https://user-images.githubusercontent.com/42107491/235344183-44b5967d-ba22-4331-804c-8da7004a5d35.png)
-
-A light, community-maintained web interface for LocalAI
-
-[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/localai-webui/)
-
-### How to run rwkv models
-
-_by [@mudler](https://github.com/mudler)_
-
-A full example on how to run RWKV models with LocalAI
-
-[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv/)
-
-### PrivateGPT
-
-_by [@mudler](https://github.com/mudler)_
-
-A full example on how to run PrivateGPT with LocalAI
-
-[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/privateGPT/)
-
-### Slack bot
-
-_by [@mudler](https://github.com/mudler)_
-
-Run a slack bot which lets you talk directly with a model
-
-[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-bot/)
-
-### Slack bot (Question answering)
-
-_by [@mudler](https://github.com/mudler)_
-
-Run a slack bot, ideally for teams, which lets you ask questions on a documentation website, or a github repository.
-
-[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-qa-bot/)
-
-### Question answering on documents with llama-index
-
-_by [@mudler](https://github.com/mudler)_
-
-Shows how to integrate with [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents.
-
-[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/)
-
-### Question answering on documents with langchain and chroma
-
-_by [@mudler](https://github.com/mudler)_
-
-Shows how to integrate with `Langchain` and `Chroma` to enable question answering on a set of documents.
-
-[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-chroma/)
-
-### Telegram bot
-
-_by [@mudler](https://github.com/mudler)
-
-![Screenshot from 2023-06-09 00-36-26](https://github.com/go-skynet/LocalAI/assets/2420543/e98b4305-fa2d-41cf-9d2f-1bb2d75ca902)
-
-Use LocalAI to power a Telegram bot assistant, with Image generation and audio support!
-
-[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/telegram-bot/)
-
-### Template for Runpod.io
-
-_by [@fHachenberg](https://github.com/fHachenberg)_
-
-Allows to run any LocalAI-compatible model as a backend on the servers of https://runpod.io
-
-[Check it out here](https://runpod.io/gsc?template=uv9mtqnrd0&ref=984wlcra)
-
-### Continue
-
-_by [@gruberdev](https://github.com/gruberdev)_
-
-<img src="continue/img/screen.png" width="600" height="200" alt="Screenshot">
-
-Demonstrates how to integrate an open-source copilot alternative that enhances code analysis, completion, and improvements. This approach seamlessly integrates with any LocalAI model, offering a more user-friendly experience.
-
-[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/continue/)
-
-### Streamlit bot
-
-_by [@majoshi1](https://github.com/majoshi1)_
-
-![Screenshot](streamlit-bot/streamlit-bot.png)
-
-A chat bot made using `Streamlit` & LocalAI.
-
-[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/streamlit-bot/)
-
-## Want to contribute?
-
-Create an issue, and put `Example: <description>` in the title! We will post your examples here.
+LocalAI examples were moved to a dedicated repository: https://github.com/mudler/LocalAI-examples
diff --git a/examples/autoGPT/.env.example b/examples/autoGPT/.env.example
deleted file mode 100644
index df5cc0c3..00000000
--- a/examples/autoGPT/.env.example
+++ /dev/null
@@ -1,9 +0,0 @@
-# CPU .env docs: https://localai.io/howtos/easy-setup-docker-cpu/
-# GPU .env docs: https://localai.io/howtos/easy-setup-docker-gpu/
-
-OPENAI_API_KEY=sk---anystringhere
-OPENAI_API_BASE=http://api:8080/v1
-# Models to preload at start
-# Here we configure gpt4all as gpt-3.5-turbo and bert as embeddings,
-# see other options in the model gallery at https://github.com/go-skynet/model-gallery
-PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}, { "url": "github:go-skynet/model-gallery/bert-embeddings.yaml", "name": "text-embedding-ada-002"}]
diff --git a/examples/autoGPT/README.md b/examples/autoGPT/README.md
deleted file mode 100644
index 32793606..00000000
--- a/examples/autoGPT/README.md
+++ /dev/null
@@ -1,36 +0,0 @@
-# AutoGPT
-
-Example of integration with [AutoGPT](https://github.com/Significant-Gravitas/Auto-GPT).
-
-## Run
-
-```bash
-# Clone LocalAI
-git clone https://github.com/go-skynet/LocalAI
-
-cd LocalAI/examples/autoGPT
-
-cp -rfv .env.example .env
-
-# Edit the .env file to set a different model by editing `PRELOAD_MODELS`.
-vim .env
-
-docker-compose run --rm auto-gpt
-```
-
-Note: The example automatically downloads the `gpt4all` model as it is under a permissive license. The GPT4All model does not seem to be enough to run AutoGPT. WizardLM-7b-uncensored seems to perform better (with `f16: true`).
-
-
-## Without docker
-
-Run AutoGPT with `OPENAI_API_BASE` pointing to the LocalAI endpoint. If you run it locally for instance:
-
-```
-OPENAI_API_BASE=http://localhost:8080 python ...
-```
-
-Note: you need a model named `gpt-3.5-turbo` and `text-embedding-ada-002`. You can preload those in LocalAI at start by setting in the env:
-
-```
-PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}, { "url": "github:go-skynet/model-gallery/bert-embeddings.yaml", "name": "text-embedding-ada-002"}]
-```
\ No newline at end of file
diff --git a/examples/autoGPT/docker-compose.yaml b/examples/autoGPT/docker-compose.yaml
deleted file mode 100644
index c3220ad1..00000000
--- a/examples/autoGPT/docker-compose.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-version: "3.9"
-services:
-  api:
-    image: quay.io/go-skynet/local-ai:latest
-    ports:
-      - 8080:8080
-    env_file:
-      - .env
-    environment:
-      - DEBUG=true
-      - MODELS_PATH=/models
-    volumes:
-      - ./models:/models:cached
-    command: ["/usr/bin/local-ai" ]
-  auto-gpt:
-    image: significantgravitas/auto-gpt
-    depends_on:
-      api:
-        condition: service_healthy
-      redis:
-        condition: service_started
-    env_file:
-      - .env
-    environment:
-      MEMORY_BACKEND: ${MEMORY_BACKEND:-redis}
-      REDIS_HOST: ${REDIS_HOST:-redis}
-    profiles: ["exclude-from-up"]
-    volumes:
-      - ./auto_gpt_workspace:/app/autogpt/auto_gpt_workspace
-      - ./data:/app/data
-      ## allow auto-gpt to write logs to disk
-      - ./logs:/app/logs
-      ## uncomment following lines if you want to make use of these files
-      ## you must have them existing in the same folder as this docker-compose.yml
-      #- type: bind
-      #  source: ./azure.yaml
-      #  target: /app/azure.yaml
-      #- type: bind
-      #  source: ./ai_settings.yaml
-      #  target: /app/ai_settings.yaml
-  redis:
-    image: "redis/redis-stack-server:latest"
diff --git a/examples/chainlit/Dockerfile b/examples/chainlit/Dockerfile
deleted file mode 100644
index 5cf7a67d..00000000
--- a/examples/chainlit/Dockerfile
+++ /dev/null
@@ -1,25 +0,0 @@
-# Use an official Python runtime as a parent image
-FROM python:3.12-slim
-
-# Set the working directory in the container
-WORKDIR /app
-
-# Copy the current directory contents into the container at /app
-COPY requirements.txt /app
-
-# Install c++ compiler
-RUN apt-get update \
- && DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential \
- && apt-get clean \
- && rm -rf /var/lib/apt/lists/*
-
-# Install any needed packages specified in requirements.txt
-RUN pip install --no-cache-dir -r requirements.txt \
- && DEBIAN_FRONTEND=noninteractive apt-get remove -y build-essential \
- && apt-get clean \
- && rm -rf /var/lib/apt/lists/*
-
-COPY . /app
-
-# Run app.py when the container launches
-CMD ["chainlit", "run", "-h", "--host", "0.0.0.0", "main.py" ]
diff --git a/examples/chainlit/README.md b/examples/chainlit/README.md
deleted file mode 100644
index 9970b3b7..00000000
--- a/examples/chainlit/README.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# LocalAI Demonstration with Embeddings and Chainlit
-
-This demonstration shows you how to use embeddings with existing data in `LocalAI`, and how to integrate it with Chainlit for an interactive querying experience. We are using the `llama_index` library to facilitate the embedding and querying processes, and `chainlit` to provide an interactive interface. The `Weaviate` client is used as the embedding source.
-
-## Prerequisites
-
-Before proceeding, make sure you have the following installed:
-- Weaviate client
-- LocalAI and its dependencies
-- Chainlit and its dependencies
-
-## Getting Started
-
-1. Clone this repository:
-2. Navigate to the project directory:
-3. Run the example: `chainlit run main.py`
-
-# Highlight on `llama_index` and `chainlit`
-
-`llama_index` is the key library that facilitates the process of embedding and querying data in LocalAI. It provides a seamless interface to integrate various components, such as `WeaviateVectorStore`, `LocalAI`, `ServiceContext`, and more, for a smooth querying experience.
-
-`chainlit` is used to provide an interactive interface for users to query the data and see the results in real-time. It integrates with llama_index to handle the querying process and display the results to the user.
-
-In this example, `llama_index` is used to set up the `VectorStoreIndex` and `QueryEngine`, and `chainlit` is used to handle the user interactions with `LocalAI` and display the results.
-
diff --git a/examples/chainlit/config.yaml b/examples/chainlit/config.yaml
deleted file mode 100644
index 1590f7b8..00000000
--- a/examples/chainlit/config.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-localAI:
-  temperature: 0
-  modelName: gpt-3.5-turbo
-  apiBase: http://local-ai.default
-  apiKey: stub
-  streaming: True
-weviate:
-  url: http://weviate.local
-  index: AIChroma
-query:
-  mode: hybrid
-  topK: 1
-  alpha: 0.0
-  chunkSize: 1024
-embedding:
-  model: BAAI/bge-small-en-v1.5
\ No newline at end of file
diff --git a/examples/chainlit/main.py b/examples/chainlit/main.py
deleted file mode 100644
index b57c7228..00000000
--- a/examples/chainlit/main.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import os
-
-import weaviate
-from llama_index.storage.storage_context import StorageContext
-from llama_index.vector_stores import WeaviateVectorStore
-
-from llama_index.query_engine.retriever_query_engine import RetrieverQueryEngine
-from llama_index.callbacks.base import CallbackManager
-from llama_index import (
-    LLMPredictor,
-    ServiceContext,
-    StorageContext,
-    VectorStoreIndex,
-)
-import chainlit as cl
-
-from llama_index.llms import LocalAI
-from llama_index.embeddings import HuggingFaceEmbedding
-import yaml
-
-# Load the configuration file
-with open("config.yaml", "r") as ymlfile:
-    cfg = yaml.safe_load(ymlfile)
-
-# Get the values from the configuration file or set the default values
-temperature = cfg['localAI'].get('temperature', 0)
-model_name = cfg['localAI'].get('modelName', "gpt-3.5-turbo")
-api_base = cfg['localAI'].get('apiBase', "http://local-ai.default")
-api_key = cfg['localAI'].get('apiKey', "stub")
-streaming = cfg['localAI'].get('streaming', True)
-weaviate_url = cfg['weviate'].get('url', "http://weviate.default")
-index_name = cfg['weviate'].get('index', "AIChroma")
-query_mode = cfg['query'].get('mode', "hybrid")
-topK = cfg['query'].get('topK', 1)
-alpha = cfg['query'].get('alpha', 0.0)
-embed_model_name = cfg['embedding'].get('model', "BAAI/bge-small-en-v1.5")
-chunk_size = cfg['query'].get('chunkSize', 1024)
-
-
-embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
-
-
-llm = LocalAI(temperature=temperature, model_name=model_name, api_base=api_base, api_key=api_key, streaming=streaming)
-llm.globally_use_chat_completions = True;
-client = weaviate.Client(weaviate_url)
-vector_store = WeaviateVectorStore(weaviate_client=client, index_name=index_name)
-storage_context = StorageContext.from_defaults(vector_store=vector_store)
-
-@cl.on_chat_start
-async def factory():
-
-    llm_predictor = LLMPredictor(
-        llm=llm
-    )
-    
-    service_context = ServiceContext.from_defaults(embed_model=embed_model, callback_manager=CallbackManager([cl.LlamaIndexCallbackHandler()]), llm_predictor=llm_predictor, chunk_size=chunk_size)
-
-    index = VectorStoreIndex.from_vector_store(
-        vector_store,
-        storage_context=storage_context,
-        service_context=service_context
-    )
-
-    query_engine = index.as_query_engine(vector_store_query_mode=query_mode, similarity_top_k=topK, alpha=alpha, streaming=True)
-
-    cl.user_session.set("query_engine", query_engine)
-
-
-@cl.on_message
-async def main(message: cl.Message):
-    query_engine = cl.user_session.get("query_engine")
-    response = await cl.make_async(query_engine.query)(message.content)
-
-    response_message = cl.Message(content="")
-
-    for token in response.response_gen:
-        await response_message.stream_token(token=token)
-
-    if response.response_txt:
-        response_message.content = response.response_txt
-
-    await response_message.send()
diff --git a/examples/chainlit/requirements.txt b/examples/chainlit/requirements.txt
deleted file mode 100644
index 1fe9356a..00000000
--- a/examples/chainlit/requirements.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-llama_index==0.11.9
-requests==2.32.3
-weaviate_client==4.8.1
-transformers
-torch
-chainlit
diff --git a/examples/chatbot-ui-manual/README.md b/examples/chatbot-ui-manual/README.md
deleted file mode 100644
index 3238f326..00000000
--- a/examples/chatbot-ui-manual/README.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# chatbot-ui
-
-Example of integration with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui).
-
-![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
-
-## Setup
-
-```bash
-# Clone LocalAI
-git clone https://github.com/go-skynet/LocalAI
-
-cd LocalAI/examples/chatbot-ui
-
-# (optional) Checkout a specific LocalAI tag
-# git checkout -b build <TAG>
-
-# Download gpt4all-j to models/
-wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
-
-# start with docker-compose
-docker-compose up -d --pull always
-# or you can build the images with:
-# docker-compose up -d --build
-```
-
-Then browse to `http://localhost:3000` to view the Web UI.
-
-## Pointing chatbot-ui to a separately managed LocalAI service
-
-If you want to use the [chatbot-ui example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) with an externally managed LocalAI service, you can alter the `docker-compose.yaml` file so that it looks like the below. You will notice the file is smaller, because we have removed the section that would normally start the LocalAI service. Take care to update the IP address (or FQDN) that the chatbot-ui service tries to access (marked `<<LOCALAI_IP>>` below):
-
-```yaml
-version: '3.6'
-
-services:
-  chatgpt:
-    image: ghcr.io/mckaywrigley/chatbot-ui:main
-    ports:
-      - 3000:3000
-    environment:
-      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
-      - 'OPENAI_API_HOST=http://<<LOCALAI_IP>>:8080'
-```
-
-Once you've edited the `docker-compose.yaml`, you can start it with `docker compose up`, then browse to `http://localhost:3000` to view the Web UI.
-
-## Accessing chatbot-ui
-
-Open http://localhost:3000 for the Web UI.
diff --git a/examples/chatbot-ui-manual/docker-compose.yaml b/examples/chatbot-ui-manual/docker-compose.yaml
deleted file mode 100644
index c7782c34..00000000
--- a/examples/chatbot-ui-manual/docker-compose.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-version: '3.6'
-
-services:
-  api:
-    image: quay.io/go-skynet/local-ai:latest
-    build:
-      context: ../../
-      dockerfile: Dockerfile
-    ports:
-      - 8080:8080
-    environment:
-      - DEBUG=true
-      - MODELS_PATH=/models
-    volumes:
-      - ./models:/models:cached
-    command: ["/usr/bin/local-ai" ]
-
-  chatgpt:
-    image: ghcr.io/mckaywrigley/chatbot-ui:main
-    ports:
-      - 3000:3000
-    environment:
-      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
-      - 'OPENAI_API_HOST=http://api:8080'
\ No newline at end of file
diff --git a/examples/chatbot-ui-manual/models b/examples/chatbot-ui-manual/models
deleted file mode 120000
index 1e266b1b..00000000
--- a/examples/chatbot-ui-manual/models
+++ /dev/null
@@ -1 +0,0 @@
-../models
\ No newline at end of file
diff --git a/examples/chatbot-ui/README.md b/examples/chatbot-ui/README.md
deleted file mode 100644
index 3817aa85..00000000
--- a/examples/chatbot-ui/README.md
+++ /dev/null
@@ -1,46 +0,0 @@
-# chatbot-ui
-
-Example of integration with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui).
-
-![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
-
-## Run
-
-In this example LocalAI will download the gpt4all model and set it up as "gpt-3.5-turbo". See the `docker-compose.yaml`
-```bash
-# Clone LocalAI
-git clone https://github.com/go-skynet/LocalAI
-
-cd LocalAI/examples/chatbot-ui
-
-# start with docker-compose
-docker-compose up --pull always
-
-# or you can build the images with:
-# docker-compose up -d --build
-```
-
-Then browse to `http://localhost:3000` to view the Web UI.
-
-## Pointing chatbot-ui to a separately managed LocalAI service
-
-If you want to use the [chatbot-ui example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) with an externally managed LocalAI service, you can alter the `docker-compose.yaml` file so that it looks like the below. You will notice the file is smaller, because we have removed the section that would normally start the LocalAI service. Take care to update the IP address (or FQDN) that the chatbot-ui service tries to access (marked `<<LOCALAI_IP>>` below):
-
-```yaml
-version: '3.6'
-
-services:
-  chatgpt:
-    image: ghcr.io/mckaywrigley/chatbot-ui:main
-    ports:
-      - 3000:3000
-    environment:
-      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
-      - 'OPENAI_API_HOST=http://<<LOCALAI_IP>>:8080'
-```
-
-Once you've edited the `docker-compose.yaml`, you can start it with `docker compose up`, then browse to `http://localhost:3000` to view the Web UI.
-
-## Accessing chatbot-ui
-
-Open http://localhost:3000 for the Web UI.
diff --git a/examples/chatbot-ui/docker-compose.yaml b/examples/chatbot-ui/docker-compose.yaml
deleted file mode 100644
index 27b4f4e0..00000000
--- a/examples/chatbot-ui/docker-compose.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-version: '3.6'
-
-services:
-  api:
-    image: quay.io/go-skynet/local-ai:latest
-    # As initially LocalAI will download the models defined in PRELOAD_MODELS
-    # you might need to tweak the healthcheck values here according to your network connection.
-    # Here we give a timespan of 20m to download all the required files.
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
-      interval: 1m
-      timeout: 20m
-      retries: 20
-    build:
-      context: ../../
-      dockerfile: Dockerfile
-    ports:
-      - 8080:8080
-    environment:
-      - DEBUG=true
-      - MODELS_PATH=/models
-      # You can preload different models here as well.
-      # See: https://github.com/go-skynet/model-gallery
-      - 'PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}]'
-    volumes:
-      - ./models:/models:cached
-    command: ["/usr/bin/local-ai" ]
-  chatgpt:
-    depends_on:
-      api:
-        condition: service_healthy
-    image: ghcr.io/mckaywrigley/chatbot-ui:main
-    ports:
-      - 3000:3000
-    environment:
-      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
-      - 'OPENAI_API_HOST=http://api:8080'
\ No newline at end of file
diff --git a/examples/configurations/README.md b/examples/configurations/README.md
deleted file mode 100644
index 8b6a6560..00000000
--- a/examples/configurations/README.md
+++ /dev/null
@@ -1,95 +0,0 @@
-## Advanced configuration
-
-This section contains examples on how to install models manually with config files.
-
-### Prerequisites
-
-First clone LocalAI:
-
-```bash
-git clone https://github.com/go-skynet/LocalAI
-
-cd LocalAI
-```
-
-Setup the model you prefer from the examples below and then start LocalAI:
-
-```bash
-docker compose up -d --pull always
-```
-
-If LocalAI is already started, you can restart it with 
-
-```bash
-docker compose restart
-```
-
-See also the getting started: https://localai.io/basics/getting_started/
-
-You can also start LocalAI just with docker:
-
-```
-docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:master --models-path /models --threads 4
-```
-
-### Mistral
-
-To setup mistral copy the files inside `mistral` in the `models` folder:
-
-```bash
-cp -r examples/configurations/mistral/* models/
-```
-
-Now download the model:
-
-```bash
-wget https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF/resolve/main/mistral-7b-openorca.Q6_K.gguf -O models/mistral-7b-openorca.Q6_K.gguf
-```
-
-### LLaVA
-
-![llava](https://github.com/mudler/LocalAI/assets/2420543/cb0a0897-3b58-4350-af66-e6f4387b58d3)
-
-#### Setup
-
-```
-cp -r examples/configurations/llava/* models/
-wget https://huggingface.co/mys/ggml_bakllava-1/resolve/main/ggml-model-q4_k.gguf -O models/ggml-model-q4_k.gguf
-wget https://huggingface.co/mys/ggml_bakllava-1/resolve/main/mmproj-model-f16.gguf -O models/mmproj-model-f16.gguf
-```
-
-#### Try it out
-
-```
-curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-     "model": "llava",
-     "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
-
-```
-
-### Phi-2
-
-```
-cp -r examples/configurations/phi-2.yaml models/
-
-curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-     "model": "phi-2",
-     "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-}'
-```
-
-### Mixtral
-
-```
-cp -r examples/configuration/mixtral/* models/
-wget https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q2_K.gguf -O models/mixtral-8x7b-instruct-v0.1.Q2_K.gguf
-```
-
-#### Test it out
-
-```
-curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
-     "model": "mixtral",
-     "prompt": "How fast is light?",                                                                                    
-     "temperature": 0.1 }'
-```
diff --git a/examples/configurations/llava/chat-simple.tmpl b/examples/configurations/llava/chat-simple.tmpl
deleted file mode 100644
index 5fe36767..00000000
--- a/examples/configurations/llava/chat-simple.tmpl
+++ /dev/null
@@ -1,3 +0,0 @@
-A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
-{{.Input}}
-ASSISTANT:
\ No newline at end of file
diff --git a/examples/configurations/llava/llava.yaml b/examples/configurations/llava/llava.yaml
deleted file mode 100644
index db2eaad0..00000000
--- a/examples/configurations/llava/llava.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-backend: llama-cpp
-context_size: 4096
-f16: true
-threads: 11
-gpu_layers: 90
-mmap: true
-name: llava
-roles:
-  user: "USER:"
-  assistant: "ASSISTANT:"
-  system: "SYSTEM:"
-parameters:
-  model: ggml-model-q4_k.gguf
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-template:
-  chat: chat-simple
-mmproj: mmproj-model-f16.gguf
diff --git a/examples/configurations/mistral/chatml-block.tmpl b/examples/configurations/mistral/chatml-block.tmpl
deleted file mode 100644
index cc86392a..00000000
--- a/examples/configurations/mistral/chatml-block.tmpl
+++ /dev/null
@@ -1,3 +0,0 @@
-{{.Input}}
-<|im_start|>assistant
-
diff --git a/examples/configurations/mistral/chatml.tmpl b/examples/configurations/mistral/chatml.tmpl
deleted file mode 100644
index 09e25322..00000000
--- a/examples/configurations/mistral/chatml.tmpl
+++ /dev/null
@@ -1,3 +0,0 @@
-<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
-{{if .Content}}{{.Content}}{{end}}
-<|im_end|>
diff --git a/examples/configurations/mistral/completion.tmpl b/examples/configurations/mistral/completion.tmpl
deleted file mode 100644
index 9867cfcd..00000000
--- a/examples/configurations/mistral/completion.tmpl
+++ /dev/null
@@ -1 +0,0 @@
-{{.Input}}
\ No newline at end of file
diff --git a/examples/configurations/mistral/mistral.yaml b/examples/configurations/mistral/mistral.yaml
deleted file mode 100644
index d2927f06..00000000
--- a/examples/configurations/mistral/mistral.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-name: mistral
-mmap: true
-parameters:
-  model: mistral-7b-openorca.Q6_K.gguf
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-template:
-  chat_message: chatml
-  chat: chatml-block
-  completion: completion
-context_size: 4096
-f16: true
-stopwords:
-- <|im_end|>
-threads: 4
diff --git a/examples/configurations/mixtral/mixtral b/examples/configurations/mixtral/mixtral
deleted file mode 100644
index 88ce5c01..00000000
--- a/examples/configurations/mixtral/mixtral
+++ /dev/null
@@ -1 +0,0 @@
-[INST] {{.Input}} [/INST] 
diff --git a/examples/configurations/mixtral/mixtral-chat b/examples/configurations/mixtral/mixtral-chat
deleted file mode 100644
index 88ce5c01..00000000
--- a/examples/configurations/mixtral/mixtral-chat
+++ /dev/null
@@ -1 +0,0 @@
-[INST] {{.Input}} [/INST] 
diff --git a/examples/configurations/mixtral/mixtral.yaml b/examples/configurations/mixtral/mixtral.yaml
deleted file mode 100755
index 9a2d7eed..00000000
--- a/examples/configurations/mixtral/mixtral.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-context_size: 512
-f16: true
-threads: 11
-gpu_layers: 90
-name: mixtral
-mmap: true
-parameters:
-  model: mixtral-8x7b-instruct-v0.1.Q2_K.gguf
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-  batch: 512
-  tfz: 1.0
-template:
-  chat: mixtral-chat
-  completion: mixtral
diff --git a/examples/configurations/phi-2.yaml b/examples/configurations/phi-2.yaml
deleted file mode 100644
index e5a13442..00000000
--- a/examples/configurations/phi-2.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-name: phi-2
-context_size: 2048
-f16: true
-gpu_layers: 90
-mmap: true
-trimsuffix: 
-- "\n"
-parameters:
-  model: huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-  seed: -1
-  
-mirostat: 2
-mirostat_eta: 1.0
-mirostat_tau: 1.0
-template:
-  chat: &template |-
-    Instruct: {{.Input}}
-    Output:
-  completion: *template
-
-usage: |
-      To use this model, interact with the API (in another terminal) with curl for instance:
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "phi-2",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
diff --git a/examples/continue/README.md b/examples/continue/README.md
deleted file mode 100644
index aa63530a..00000000
--- a/examples/continue/README.md
+++ /dev/null
@@ -1,53 +0,0 @@
-# Continue
-
-![logo](https://continue.dev/docs/assets/images/continue-cover-logo-aa135cc83fe8a14af480d1633ed74eb5.png)
-
-This document presents an example of integration with [continuedev/continue](https://github.com/continuedev/continue).
-
-![Screenshot](https://continue.dev/docs/assets/images/continue-screenshot-1f36b99467817f755739d7f4c4c08fe3.png)
-
-For a live demonstration, please click on the link below:
-
-- [How it works (Video demonstration)](https://www.youtube.com/watch?v=3Ocrc-WX4iQ)
-
-## Integration Setup Walkthrough
-
-1. [As outlined in `continue`'s documentation](https://continue.dev/docs/getting-started), install the [Visual Studio Code extension from the marketplace](https://marketplace.visualstudio.com/items?itemName=Continue.continue) and open it.
-2. In this example, LocalAI will download the gpt4all model and set it up as "gpt-3.5-turbo". Refer to the `docker-compose.yaml` file for details.
-
-    ```bash
-    # Clone LocalAI
-    git clone https://github.com/go-skynet/LocalAI
-
-    cd LocalAI/examples/continue
-
-    # Start with docker-compose
-    docker-compose up --build -d
-    ```
-
-3. Type `/config` within Continue's VSCode extension, or edit the file located at `~/.continue/config.py` on your system with the following configuration:
-
-    ```py
-    from continuedev.src.continuedev.libs.llm.openai import OpenAI
-
-    config = ContinueConfig(
-       ...
-       models=Models(
-            default=OpenAI(
-               api_key="my-api-key",
-               model="gpt-3.5-turbo",
-               api_base="http://localhost:8080",
-            )
-       ),
-    )
-    ```
-
-This setup enables you to make queries directly to your model running in the Docker container. Note that the `api_key` does not need to be properly set up; it is included here as a placeholder.
-
-If editing the configuration seems confusing, you may copy and paste the provided default `config.py` file over the existing one in `~/.continue/config.py` after initializing the extension in the VSCode IDE.
-
-## Additional Resources
-
-- [Official Continue documentation](https://continue.dev/docs/intro)
-- [Documentation page on using self-hosted models](https://continue.dev/docs/customization#self-hosting-an-open-source-model)
-- [Official extension link](https://marketplace.visualstudio.com/items?itemName=Continue.continue)
diff --git a/examples/continue/config.py b/examples/continue/config.py
deleted file mode 100644
index cbc1c218..00000000
--- a/examples/continue/config.py
+++ /dev/null
@@ -1,148 +0,0 @@
-"""
-This is the Continue configuration file.
-
-See https://continue.dev/docs/customization to learn more.
-"""
-
-import subprocess
-
-from continuedev.src.continuedev.core.main import Step
-from continuedev.src.continuedev.core.sdk import ContinueSDK
-from continuedev.src.continuedev.core.models import Models
-from continuedev.src.continuedev.core.config import CustomCommand, SlashCommand, ContinueConfig
-from continuedev.src.continuedev.plugins.context_providers.github import GitHubIssuesContextProvider
-from continuedev.src.continuedev.plugins.context_providers.google import GoogleContextProvider
-from continuedev.src.continuedev.plugins.policies.default import DefaultPolicy
-from continuedev.src.continuedev.libs.llm.openai import OpenAI, OpenAIServerInfo
-from continuedev.src.continuedev.libs.llm.ggml import GGML
-
-from continuedev.src.continuedev.plugins.steps.open_config import OpenConfigStep
-from continuedev.src.continuedev.plugins.steps.clear_history import ClearHistoryStep
-from continuedev.src.continuedev.plugins.steps.feedback import FeedbackStep
-from continuedev.src.continuedev.plugins.steps.comment_code import CommentCodeStep
-from continuedev.src.continuedev.plugins.steps.share_session import ShareSessionStep
-from continuedev.src.continuedev.plugins.steps.main import EditHighlightedCodeStep
-from continuedev.src.continuedev.plugins.context_providers.search import SearchContextProvider
-from continuedev.src.continuedev.plugins.context_providers.diff import DiffContextProvider
-from continuedev.src.continuedev.plugins.context_providers.url import URLContextProvider
-
-class CommitMessageStep(Step):
-    """
-    This is a Step, the building block of Continue.
-    It can be used below as a slash command, so that
-    run will be called when you type '/commit'.
-    """
-    async def run(self, sdk: ContinueSDK):
-
-        # Get the root directory of the workspace
-        dir = sdk.ide.workspace_directory
-
-        # Run git diff in that directory
-        diff = subprocess.check_output(
-            ["git", "diff"], cwd=dir).decode("utf-8")
-
-        # Ask the LLM to write a commit message,
-        # and set it as the description of this step
-        self.description = await sdk.models.default.complete(
-            f"{diff}\n\nWrite a short, specific (less than 50 chars) commit message about the above changes:")
-
-
-config = ContinueConfig(
-
-    # If set to False, we will not collect any usage data
-    # See here to learn what anonymous data we collect: https://continue.dev/docs/telemetry
-    allow_anonymous_telemetry=True,
-
-    models = Models(
-        default = OpenAI(
-            api_key = "my-api-key",
-            model = "gpt-3.5-turbo",
-            openai_server_info = OpenAIServerInfo(
-                api_base = "http://localhost:8080",
-                model = "gpt-3.5-turbo"
-            )
-        )
-    ),
-    # Set a system message with information that the LLM should always keep in mind
-    # E.g. "Please give concise answers. Always respond in Spanish."
-    system_message=None,
-
-    # Set temperature to any value between 0 and 1. Higher values will make the LLM
-    # more creative, while lower values will make it more predictable.
-    temperature=0.5,
-
-    # Custom commands let you map a prompt to a shortened slash command
-    # They are like slash commands, but more easily defined - write just a prompt instead of a Step class
-    # Their output will always be in chat form
-    custom_commands=[
-        # CustomCommand(
-        #     name="test",
-        #     description="Write unit tests for the higlighted code",
-        #     prompt="Write a comprehensive set of unit tests for the selected code. It should setup, run tests that check for correctness including important edge cases, and teardown. Ensure that the tests are complete and sophisticated. Give the tests just as chat output, don't edit any file.",
-        # )
-    ],
-
-    # Slash commands let you run a Step from a slash command
-    slash_commands=[
-        # SlashCommand(
-        #     name="commit",
-        #     description="This is an example slash command. Use /config to edit it and create more",
-        #     step=CommitMessageStep,
-        # )
-        SlashCommand(
-            name="edit",
-            description="Edit code in the current file or the highlighted code",
-            step=EditHighlightedCodeStep,
-        ),
-        SlashCommand(
-            name="config",
-            description="Customize Continue - slash commands, LLMs, system message, etc.",
-            step=OpenConfigStep,
-        ),
-        SlashCommand(
-            name="comment",
-            description="Write comments for the current file or highlighted code",
-            step=CommentCodeStep,
-        ),
-        SlashCommand(
-            name="feedback",
-            description="Send feedback to improve Continue",
-            step=FeedbackStep,
-        ),
-        SlashCommand(
-            name="clear",
-            description="Clear step history",
-            step=ClearHistoryStep,
-        ),
-        SlashCommand(
-            name="share",
-            description="Download and share the session transcript",
-            step=ShareSessionStep,
-        )
-    ],
-
-    # Context providers let you quickly select context by typing '@'
-    # Uncomment the following to
-    # - quickly reference GitHub issues
-    # - show Google search results to the LLM
-    context_providers=[
-        # GitHubIssuesContextProvider(
-        #     repo_name="<your github username or organization>/<your repo name>",
-        #     auth_token="<your github auth token>"
-        # ),
-        # GoogleContextProvider(
-        #     serper_api_key="<your serper.dev api key>"
-        # )
-        SearchContextProvider(),
-        DiffContextProvider(),
-        URLContextProvider(
-            preset_urls = [
-                # Add any common urls you reference here so they appear in autocomplete
-            ]
-        )
-    ],
-
-    # Policies hold the main logic that decides which Step to take next
-    # You can use them to design agents, or deeply customize Continue
-    policy=DefaultPolicy()
-)
diff --git a/examples/continue/docker-compose.yml b/examples/continue/docker-compose.yml
deleted file mode 100644
index bc063194..00000000
--- a/examples/continue/docker-compose.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-version: '3.6'
-
-services:
-  api:
-    image: quay.io/go-skynet/local-ai:latest
-    # As initially LocalAI will download the models defined in PRELOAD_MODELS
-    # you might need to tweak the healthcheck values here according to your network connection.
-    # Here we give a timespan of 20m to download all the required files.
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
-      interval: 1m
-      timeout: 20m
-      retries: 20
-    build:
-      context: ../../
-      dockerfile: Dockerfile
-    ports:
-      - 8080:8080
-    environment:
-      - DEBUG=true
-      - MODELS_PATH=/models
-      # You can preload different models here as well.
-      # See: https://github.com/go-skynet/model-gallery
-      - 'PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}]'
-    volumes:
-      - ./models:/models:cached
-    command: ["/usr/bin/local-ai" ]
diff --git a/examples/continue/img/screen.png b/examples/continue/img/screen.png
deleted file mode 100755
index a03a80f6..00000000
Binary files a/examples/continue/img/screen.png and /dev/null differ
diff --git a/examples/discord-bot/.env.example b/examples/discord-bot/.env.example
deleted file mode 100644
index 332a71b7..00000000
--- a/examples/discord-bot/.env.example
+++ /dev/null
@@ -1,9 +0,0 @@
-# CPU .env docs: https://localai.io/howtos/easy-setup-docker-cpu/
-# GPU .env docs: https://localai.io/howtos/easy-setup-docker-gpu/
-
-OPENAI_API_KEY=x
-DISCORD_BOT_TOKEN=x
-DISCORD_CLIENT_ID=x
-OPENAI_API_BASE=http://api:8080
-ALLOWED_SERVER_IDS=x
-SERVER_TO_MODERATION_CHANNEL=1:1
diff --git a/examples/discord-bot/README.md b/examples/discord-bot/README.md
deleted file mode 100644
index 4aa9cdb7..00000000
--- a/examples/discord-bot/README.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# discord-bot
-
-![Screenshot from 2023-05-01 07-58-19](https://user-images.githubusercontent.com/2420543/235413924-0cb2e75b-f2d6-4119-8610-44386e44afb8.png)
-
-## Setup
-
-```bash
-# Clone LocalAI
-git clone https://github.com/go-skynet/LocalAI
-
-cd LocalAI/examples/discord-bot
-
-# (optional) Checkout a specific LocalAI tag
-# git checkout -b build <TAG>
-
-# Download gpt4all-j to models/
-wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
-
-# Set the discord bot options (see: https://github.com/go-skynet/gpt-discord-bot#setup)
-cp -rfv .env.example .env
-vim .env
-
-# start with docker-compose
-docker-compose up -d --build
-```
-
-Note: see setup options here: https://github.com/go-skynet/gpt-discord-bot#setup
-
-Open up the URL in the console and give permission to the bot in your server. Start a thread with `/chat ..`
-
-## Kubernetes
-
-- install the local-ai chart first
-- change OPENAI_API_BASE to point to the API address and apply the discord-bot manifest:
-
-```yaml
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: discord-bot
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: localai
-  namespace: discord-bot
-  labels:
-    app: localai
-spec:
-  selector:
-    matchLabels:
-      app: localai
-  replicas: 1
-  template:
-    metadata:
-      labels:
-        app: localai
-      name: localai
-    spec:
-      containers:
-        - name: localai-discord
-          env:
-          - name: OPENAI_API_KEY
-            value: "x"
-          - name: DISCORD_BOT_TOKEN
-            value: ""
-          - name: DISCORD_CLIENT_ID
-            value: ""
-          - name: OPENAI_API_BASE
-            value: "http://local-ai.default.svc.cluster.local:8080"
-          - name: ALLOWED_SERVER_IDS
-            value: "xx"
-          - name: SERVER_TO_MODERATION_CHANNEL
-            value: "1:1"
-          image: quay.io/go-skynet/gpt-discord-bot:main
-```
diff --git a/examples/discord-bot/docker-compose.yaml b/examples/discord-bot/docker-compose.yaml
deleted file mode 100644
index e7ee6b4c..00000000
--- a/examples/discord-bot/docker-compose.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-version: '3.6'
-
-services:
-  api:
-    image: quay.io/go-skynet/local-ai:latest
-    build:
-      context: ../../
-      dockerfile: Dockerfile
-    ports:
-      - 8080:8080
-    environment:
-      - DEBUG=true
-      - MODELS_PATH=/models
-    volumes:
-      - ./models:/models:cached
-    command: ["/usr/bin/local-ai" ]
-
-  bot:
-    image: quay.io/go-skynet/gpt-discord-bot:main
-    env_file:
-    - .env
diff --git a/examples/discord-bot/models b/examples/discord-bot/models
deleted file mode 120000
index 1e266b1b..00000000
--- a/examples/discord-bot/models
+++ /dev/null
@@ -1 +0,0 @@
-../models
\ No newline at end of file
diff --git a/examples/e2e-fine-tuning/README.md b/examples/e2e-fine-tuning/README.md
deleted file mode 100644
index d95d8914..00000000
--- a/examples/e2e-fine-tuning/README.md
+++ /dev/null
@@ -1,83 +0,0 @@
-This is an example of fine-tuning a LLM model to use with [LocalAI](https://github.com/mudler/LocalAI) written by [@mudler](https://github.com/mudler).
-
-Specifically, this example shows how to use [axolotl](https://github.com/OpenAccess-AI-Collective/axolotl) to fine-tune a LLM model to consume with LocalAI as a `gguf` model.
-
-A notebook is provided that currently works on _very small_ datasets on Google colab on the free instance. It is far from producing good models, but it gives a sense of how to use the code to use with a better dataset and configurations, and how to use the model produced with LocalAI. [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mudler/LocalAI/blob/master/examples/e2e-fine-tuning/notebook.ipynb)
-
-## Requirements
-
-For this example you will need at least a 12GB VRAM of GPU and a Linux box.
-The notebook is tested on Google Colab with a Tesla T4 GPU.
-
-## Clone this directory
-
-Clone the repository and enter the example directory:
-
-```bash
-git clone http://github.com/mudler/LocalAI
-cd LocalAI/examples/e2e-fine-tuning
-```
-
-## Install dependencies
-
-```bash
-# Install axolotl and dependencies
-git clone https://github.com/OpenAccess-AI-Collective/axolotl && pushd axolotl && git checkout 797f3dd1de8fd8c0eafbd1c9fdb172abd9ff840a && popd #0.3.0
-pip install packaging
-pushd axolotl && pip install -e '.[flash-attn,deepspeed]' && popd
-
-# https://github.com/oobabooga/text-generation-webui/issues/4238
-pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu117torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
-```
-
-Configure accelerate:
-
-```bash
-accelerate config default
-```
-
-## Fine-tuning
-
-We will need to configure axolotl. In this example is provided a file to use `axolotl.yaml` that uses openllama-3b for fine-tuning. Copy the `axolotl.yaml` file and edit it to your needs. The dataset needs to be next to it as `dataset.json`. The format used is `completion` which is a list of JSON objects with a `text` field with the full text to train the LLM with.
-
-If you have a big dataset, you can pre-tokenize it to speedup the fine-tuning process:
-
-```bash
-# Optional pre-tokenize (run only if big dataset)
-python -m axolotl.cli.preprocess axolotl.yaml
-```
-
-Now we are ready to start the fine-tuning process:
-```bash
-# Fine-tune
-accelerate launch -m axolotl.cli.train axolotl.yaml
-```
-
-After we have finished the fine-tuning, we merge the Lora base with the model:
-```bash
-# Merge lora
-python3 -m axolotl.cli.merge_lora axolotl.yaml --lora_model_dir="./qlora-out" --load_in_8bit=False --load_in_4bit=False
-```
-
-And we convert it to the gguf format that LocalAI can consume:
-
-```bash
-
-# Convert to gguf
-git clone https://github.com/ggerganov/llama.cpp.git
-pushd llama.cpp && make GGML_CUDA=1 && popd
-
-# We need to convert the pytorch model into ggml for quantization
-# It crates 'ggml-model-f16.bin' in the 'merged' directory.
-pushd llama.cpp && python convert.py --outtype f16 \
-    ../qlora-out/merged/pytorch_model-00001-of-00002.bin && popd
-
-# Start off by making a basic q4_0 4-bit quantization.
-# It's important to have 'ggml' in the name of the quant for some
-# software to recognize it's file format.
-pushd llama.cpp &&  ./quantize ../qlora-out/merged/ggml-model-f16.gguf \
-    ../custom-model-q4_0.bin q4_0
-
-```
-
-Now you should have ended up with a `custom-model-q4_0.bin` file that you can copy in the LocalAI models directory and use it with LocalAI.
diff --git a/examples/e2e-fine-tuning/axolotl.yaml b/examples/e2e-fine-tuning/axolotl.yaml
deleted file mode 100644
index ea956dd4..00000000
--- a/examples/e2e-fine-tuning/axolotl.yaml
+++ /dev/null
@@ -1,63 +0,0 @@
-
-base_model: openlm-research/open_llama_3b_v2
-model_type: LlamaForCausalLM
-tokenizer_type: LlamaTokenizer
-load_in_8bit: false
-load_in_4bit: true
-strict: false
-push_dataset_to_hub: false
-datasets:
-- path: dataset.json
-  ds_type: json
-  type: completion
-dataset_prepared_path:
-val_set_size: 0.05
-adapter: qlora
-lora_model_dir:
-sequence_len: 1024
-sample_packing: true
-lora_r: 8
-lora_alpha: 32
-lora_dropout: 0.05
-lora_target_modules:
-lora_target_linear: true
-lora_fan_in_fan_out:
-wandb_project:
-wandb_entity:
-wandb_watch:
-wandb_run_id:
-wandb_log_model:
-output_dir: ./qlora-out
-gradient_accumulation_steps: 1
-micro_batch_size: 2
-num_epochs: 4
-optimizer: paged_adamw_32bit
-torchdistx_path:
-lr_scheduler: cosine
-learning_rate: 0.0002
-train_on_inputs: false
-group_by_length: false
-bf16: false
-fp16: true
-tf32: false
-gradient_checkpointing: true
-early_stopping_patience:
-resume_from_checkpoint:
-local_rank:
-logging_steps: 1
-xformers_attention:
-flash_attention: false
-gptq_groupsize:
-gptq_model_v1:
-warmup_steps: 20
-eval_steps: 0.05
-save_steps:
-debug:
-deepspeed:
-weight_decay: 0.1
-fsdp:
-fsdp_config:
-special_tokens:
-bos_token: "<s>"
-eos_token: "</s>"
-unk_token: "<unk>"
diff --git a/examples/e2e-fine-tuning/notebook.ipynb b/examples/e2e-fine-tuning/notebook.ipynb
deleted file mode 100644
index e80dfce5..00000000
--- a/examples/e2e-fine-tuning/notebook.ipynb
+++ /dev/null
@@ -1,1655 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Finetuning a model and using it with LocalAI\n",
-        "\n",
-        "This is an example of fine-tuning a LLM model to use with [LocalAI](https://github.com/mudler/LocalAI) written by [@mudler](https://github.com/mudler).\n",
-        "\n",
-        "Specifically, this example shows how to use [axolotl](https://github.com/OpenAccess-AI-Collective/axolotl) to fine-tune a LLM model to consume with LocalAI as a `gguf` model."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "puRhZeHvuHgB"
-      },
-      "source": [
-        "# Important!\n",
-        "\n",
-        "Before starting, make sure you have selected GPU runtime : Runtime -> Change runtime type -> GPU (T4)!"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "xcUAOoASZUV1"
-      },
-      "source": [
-        "Change the model to link to your dataset. Upload the dataset as `output.jsonl` in the root tree and edit the model file (model.yml) with:\n",
-        "\n",
-        "```\n",
-        "# local\n",
-        "datasets:\n",
-        "  - path: /content/output.jsonl\n",
-        "    ds_type: json\n",
-        "    type: completion\n",
-        "\n",
-        "```\n",
-        "\n",
-        "A full example:\n",
-        "\n",
-        "```yaml\n",
-        "\n",
-        "base_model: openlm-research/open_llama_3b_v2\n",
-        "model_type: LlamaForCausalLM\n",
-        "tokenizer_type: LlamaTokenizer\n",
-        "load_in_8bit: false\n",
-        "load_in_4bit: true\n",
-        "strict: false\n",
-        "push_dataset_to_hub: false\n",
-        "datasets:\n",
-        "  - path: /content/output.jsonl\n",
-        "    ds_type: json\n",
-        "    type: completion\n",
-        "dataset_prepared_path:\n",
-        "val_set_size: 0.05\n",
-        "adapter: qlora\n",
-        "lora_model_dir:\n",
-        "sequence_len: 1024\n",
-        "sample_packing: true\n",
-        "lora_r: 8\n",
-        "lora_alpha: 32\n",
-        "lora_dropout: 0.05\n",
-        "lora_target_modules:\n",
-        "lora_target_linear: true\n",
-        "lora_fan_in_fan_out:\n",
-        "wandb_project:\n",
-        "wandb_entity:\n",
-        "wandb_watch:\n",
-        "wandb_run_id:\n",
-        "wandb_log_model:\n",
-        "output_dir: ./qlora-out\n",
-        "gradient_accumulation_steps: 1\n",
-        "micro_batch_size: 2\n",
-        "num_epochs: 4\n",
-        "optimizer: paged_adamw_32bit\n",
-        "torchdistx_path:\n",
-        "lr_scheduler: cosine\n",
-        "learning_rate: 0.0002\n",
-        "train_on_inputs: false\n",
-        "group_by_length: false\n",
-        "bf16: false\n",
-        "fp16: true\n",
-        "tf32: false\n",
-        "gradient_checkpointing: true\n",
-        "early_stopping_patience:\n",
-        "resume_from_checkpoint:\n",
-        "local_rank:\n",
-        "logging_steps: 1\n",
-        "xformers_attention:\n",
-        "flash_attention: false\n",
-        "gptq_groupsize:\n",
-        "gptq_model_v1:\n",
-        "warmup_steps: 20\n",
-        "eval_steps: 0.05\n",
-        "save_steps:\n",
-        "debug:\n",
-        "deepspeed:\n",
-        "weight_decay: 0.1\n",
-        "fsdp:\n",
-        "fsdp_config:\n",
-        "special_tokens:\n",
-        "  bos_token: \"<s>\"\n",
-        "  eos_token: \"</s>\"\n",
-        "  unk_token: \"<unk>\"\n",
-        "\n",
-        "```"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 1,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "CBVQikr2WiFP",
-        "outputId": "236f9f0e-b2b5-4ba9-9127-27f804c511db"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Cloning into 'axolotl'...\n",
-            "remote: Enumerating objects: 7525, done.\u001b[K\n",
-            "remote: Counting objects: 100% (1726/1726), done.\u001b[K\n",
-            "remote: Compressing objects: 100% (385/385), done.\u001b[K\n",
-            "remote: Total 7525 (delta 1525), reused 1409 (delta 1319), pack-reused 5799\u001b[K\n",
-            "Receiving objects: 100% (7525/7525), 2.64 MiB | 10.52 MiB/s, done.\n",
-            "Resolving deltas: 100% (4854/4854), done.\n",
-            "Note: switching to '797f3dd1de8fd8c0eafbd1c9fdb172abd9ff840a'.\n",
-            "\n",
-            "You are in 'detached HEAD' state. You can look around, make experimental\n",
-            "changes and commit them, and you can discard any commits you make in this\n",
-            "state without impacting any branches by switching back to a branch.\n",
-            "\n",
-            "If you want to create a new branch to retain commits you create, you may\n",
-            "do so (now or later) by using -c with the switch command. Example:\n",
-            "\n",
-            "  git switch -c <new-branch-name>\n",
-            "\n",
-            "Or undo this operation with:\n",
-            "\n",
-            "  git switch -\n",
-            "\n",
-            "Turn off this advice by setting config variable advice.detachedHead to false\n",
-            "\n",
-            "HEAD is now at 797f3dd don't train if eval split is too small (#873)\n",
-            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (23.2)\n",
-            "Obtaining file:///content/axolotl\n",
-            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Collecting auto-gptq==0.5.1 (from axolotl==0.3.0)\n",
-            "  Downloading auto_gptq-0.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.8 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.8/4.8 MB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from axolotl==0.3.0) (23.2)\n",
-            "Collecting peft==0.6.0 (from axolotl==0.3.0)\n",
-            "  Downloading peft-0.6.0-py3-none-any.whl (134 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.9/134.9 kB\u001b[0m \u001b[31m20.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting transformers==4.35.1 (from axolotl==0.3.0)\n",
-            "  Downloading transformers-4.35.1-py3-none-any.whl (7.9 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.9/7.9 MB\u001b[0m \u001b[31m44.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting bitsandbytes>=0.41.1 (from axolotl==0.3.0)\n",
-            "  Downloading bitsandbytes-0.41.2.post2-py3-none-any.whl (92.6 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.6/92.6 MB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting accelerate==0.24.1 (from axolotl==0.3.0)\n",
-            "  Downloading accelerate-0.24.1-py3-none-any.whl (261 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m261.4/261.4 kB\u001b[0m \u001b[31m31.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting addict (from axolotl==0.3.0)\n",
-            "  Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)\n",
-            "Collecting fire (from axolotl==0.3.0)\n",
-            "  Downloading fire-0.5.0.tar.gz (88 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m88.3/88.3 kB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Requirement already satisfied: PyYAML>=6.0 in /usr/local/lib/python3.10/dist-packages (from axolotl==0.3.0) (6.0.1)\n",
-            "Collecting datasets>=2.14.0 (from axolotl==0.3.0)\n",
-            "  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m521.2/521.2 kB\u001b[0m \u001b[31m51.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting sentencepiece (from axolotl==0.3.0)\n",
-            "  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m76.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting wandb (from axolotl==0.3.0)\n",
-            "  Downloading wandb-0.16.0-py3-none-any.whl (2.1 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m90.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting einops (from axolotl==0.3.0)\n",
-            "  Downloading einops-0.7.0-py3-none-any.whl (44 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.6/44.6 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting xformers==0.0.22 (from axolotl==0.3.0)\n",
-            "  Downloading xformers-0.0.22-cp310-cp310-manylinux2014_x86_64.whl (211.6 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.6/211.6 MB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting optimum==1.13.2 (from axolotl==0.3.0)\n",
-            "  Downloading optimum-1.13.2.tar.gz (300 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m301.0/301.0 kB\u001b[0m \u001b[31m37.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
-            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
-            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
-            "Collecting hf_transfer (from axolotl==0.3.0)\n",
-            "  Downloading hf_transfer-0.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.9 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.9/3.9 MB\u001b[0m \u001b[31m48.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting colorama (from axolotl==0.3.0)\n",
-            "  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n",
-            "Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from axolotl==0.3.0) (0.58.1)\n",
-            "Collecting numpy>=1.24.4 (from axolotl==0.3.0)\n",
-            "  Downloading numpy-1.26.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m18.2/18.2 MB\u001b[0m \u001b[31m57.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting bert-score==0.3.13 (from axolotl==0.3.0)\n",
-            "  Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.1/61.1 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting evaluate==0.4.0 (from axolotl==0.3.0)\n",
-            "  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.4/81.4 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting rouge-score==0.1.2 (from axolotl==0.3.0)\n",
-            "  Downloading rouge_score-0.1.2.tar.gz (17 kB)\n",
-            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from axolotl==0.3.0) (1.11.3)\n",
-            "Requirement already satisfied: scikit-learn==1.2.2 in /usr/local/lib/python3.10/dist-packages (from axolotl==0.3.0) (1.2.2)\n",
-            "Collecting pynvml (from axolotl==0.3.0)\n",
-            "  Downloading pynvml-11.5.0-py3-none-any.whl (53 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting art (from axolotl==0.3.0)\n",
-            "  Downloading art-6.1-py3-none-any.whl (599 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m599.8/599.8 kB\u001b[0m \u001b[31m38.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting fschat==0.2.29 (from axolotl==0.3.0)\n",
-            "  Downloading fschat-0.2.29-py3-none-any.whl (200 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m200.7/200.7 kB\u001b[0m \u001b[31m21.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio (from axolotl==0.3.0)\n",
-            "  Downloading gradio-4.4.1-py3-none-any.whl (15.4 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.4/15.4 MB\u001b[0m \u001b[31m70.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (from axolotl==0.3.0) (2.14.1)\n",
-            "Collecting s3fs (from axolotl==0.3.0)\n",
-            "  Downloading s3fs-2023.10.0-py3-none-any.whl (28 kB)\n",
-            "Requirement already satisfied: gcsfs in /usr/local/lib/python3.10/dist-packages (from axolotl==0.3.0) (2023.6.0)\n",
-            "Collecting flash-attn>=2.3.0 (from axolotl==0.3.0)\n",
-            "  Downloading flash_attn-2.3.3.tar.gz (2.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m81.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Collecting deepspeed (from axolotl==0.3.0)\n",
-            "  Downloading deepspeed-0.12.3.tar.gz (1.2 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m84.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate==0.24.1->axolotl==0.3.0) (5.9.5)\n",
-            "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate==0.24.1->axolotl==0.3.0) (2.1.0+cu118)\n",
-            "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from accelerate==0.24.1->axolotl==0.3.0) (0.19.3)\n",
-            "Collecting rouge (from auto-gptq==0.5.1->axolotl==0.3.0)\n",
-            "  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)\n",
-            "Collecting gekko (from auto-gptq==0.5.1->axolotl==0.3.0)\n",
-            "  Downloading gekko-1.0.6-py3-none-any.whl (12.2 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.2/12.2 MB\u001b[0m \u001b[31m91.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from auto-gptq==0.5.1->axolotl==0.3.0) (0.4.0)\n",
-            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from auto-gptq==0.5.1->axolotl==0.3.0) (4.66.1)\n",
-            "Requirement already satisfied: pandas>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from bert-score==0.3.13->axolotl==0.3.0) (1.5.3)\n",
-            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from bert-score==0.3.13->axolotl==0.3.0) (2.31.0)\n",
-            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from bert-score==0.3.13->axolotl==0.3.0) (3.7.1)\n",
-            "Collecting dill (from evaluate==0.4.0->axolotl==0.3.0)\n",
-            "  Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m16.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from evaluate==0.4.0->axolotl==0.3.0) (3.4.1)\n",
-            "Collecting multiprocess (from evaluate==0.4.0->axolotl==0.3.0)\n",
-            "  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m18.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: fsspec[http]>=2021.05.0 in /usr/local/lib/python3.10/dist-packages (from evaluate==0.4.0->axolotl==0.3.0) (2023.6.0)\n",
-            "Collecting responses<0.19 (from evaluate==0.4.0->axolotl==0.3.0)\n",
-            "  Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n",
-            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from fschat==0.2.29->axolotl==0.3.0) (3.8.6)\n",
-            "Collecting fastapi (from fschat==0.2.29->axolotl==0.3.0)\n",
-            "  Downloading fastapi-0.104.1-py3-none-any.whl (92 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.9/92.9 kB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting httpx (from fschat==0.2.29->axolotl==0.3.0)\n",
-            "  Downloading httpx-0.25.1-py3-none-any.whl (75 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.0/75.0 kB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting markdown2[all] (from fschat==0.2.29->axolotl==0.3.0)\n",
-            "  Downloading markdown2-2.4.10-py2.py3-none-any.whl (39 kB)\n",
-            "Collecting nh3 (from fschat==0.2.29->axolotl==0.3.0)\n",
-            "  Downloading nh3-0.2.14-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m82.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: prompt-toolkit>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from fschat==0.2.29->axolotl==0.3.0) (3.0.41)\n",
-            "Requirement already satisfied: pydantic<2,>=1 in /usr/local/lib/python3.10/dist-packages (from fschat==0.2.29->axolotl==0.3.0) (1.10.13)\n",
-            "Requirement already satisfied: rich>=10.0.0 in /usr/local/lib/python3.10/dist-packages (from fschat==0.2.29->axolotl==0.3.0) (13.7.0)\n",
-            "Collecting shortuuid (from fschat==0.2.29->axolotl==0.3.0)\n",
-            "  Downloading shortuuid-1.0.11-py3-none-any.whl (10 kB)\n",
-            "Collecting tiktoken (from fschat==0.2.29->axolotl==0.3.0)\n",
-            "  Downloading tiktoken-0.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m76.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting uvicorn (from fschat==0.2.29->axolotl==0.3.0)\n",
-            "  Downloading uvicorn-0.24.0.post1-py3-none-any.whl (59 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.7/59.7 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting coloredlogs (from optimum==1.13.2->axolotl==0.3.0)\n",
-            "  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from optimum==1.13.2->axolotl==0.3.0) (1.12)\n",
-            "Requirement already satisfied: transformers[sentencepiece]>=4.26.0 in /usr/local/lib/python3.10/dist-packages (from optimum==1.13.2->axolotl==0.3.0) (4.35.2)\n",
-            "Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from rouge-score==0.1.2->axolotl==0.3.0) (1.4.0)\n",
-            "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from rouge-score==0.1.2->axolotl==0.3.0) (3.8.1)\n",
-            "Requirement already satisfied: six>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from rouge-score==0.1.2->axolotl==0.3.0) (1.16.0)\n",
-            "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn==1.2.2->axolotl==0.3.0) (1.3.2)\n",
-            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn==1.2.2->axolotl==0.3.0) (3.2.0)\n",
-            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers==4.35.1->axolotl==0.3.0) (3.13.1)\n",
-            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.35.1->axolotl==0.3.0) (2023.6.3)\n",
-            "Collecting tokenizers<0.15,>=0.14 (from transformers==4.35.1->axolotl==0.3.0)\n",
-            "  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m116.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting torch>=1.10.0 (from accelerate==0.24.1->axolotl==0.3.0)\n",
-            "  Downloading torch-2.0.1-cp310-cp310-manylinux1_x86_64.whl (619.9 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m619.9/619.9 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0) (4.5.0)\n",
-            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0) (3.2.1)\n",
-            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0) (3.1.2)\n",
-            "Collecting nvidia-cuda-nvrtc-cu11==11.7.99 (from torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0)\n",
-            "  Downloading nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl (21.0 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.0/21.0 MB\u001b[0m \u001b[31m63.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting nvidia-cuda-runtime-cu11==11.7.99 (from torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0)\n",
-            "  Downloading nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl (849 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m849.3/849.3 kB\u001b[0m \u001b[31m58.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting nvidia-cuda-cupti-cu11==11.7.101 (from torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0)\n",
-            "  Downloading nvidia_cuda_cupti_cu11-11.7.101-py3-none-manylinux1_x86_64.whl (11.8 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.8/11.8 MB\u001b[0m \u001b[31m79.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting nvidia-cudnn-cu11==8.5.0.96 (from torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0)\n",
-            "  Downloading nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl (557.1 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m557.1/557.1 MB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting nvidia-cublas-cu11==11.10.3.66 (from torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0)\n",
-            "  Downloading nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl (317.1 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m317.1/317.1 MB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting nvidia-cufft-cu11==10.9.0.58 (from torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0)\n",
-            "  Downloading nvidia_cufft_cu11-10.9.0.58-py3-none-manylinux1_x86_64.whl (168.4 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m168.4/168.4 MB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting nvidia-curand-cu11==10.2.10.91 (from torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0)\n",
-            "  Downloading nvidia_curand_cu11-10.2.10.91-py3-none-manylinux1_x86_64.whl (54.6 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.6/54.6 MB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting nvidia-cusolver-cu11==11.4.0.1 (from torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0)\n",
-            "  Downloading nvidia_cusolver_cu11-11.4.0.1-2-py3-none-manylinux1_x86_64.whl (102.6 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m102.6/102.6 MB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting nvidia-cusparse-cu11==11.7.4.91 (from torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0)\n",
-            "  Downloading nvidia_cusparse_cu11-11.7.4.91-py3-none-manylinux1_x86_64.whl (173.2 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m173.2/173.2 MB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting nvidia-nccl-cu11==2.14.3 (from torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0)\n",
-            "  Downloading nvidia_nccl_cu11-2.14.3-py3-none-manylinux1_x86_64.whl (177.1 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.1/177.1 MB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting nvidia-nvtx-cu11==11.7.91 (from torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0)\n",
-            "  Downloading nvidia_nvtx_cu11-11.7.91-py3-none-manylinux1_x86_64.whl (98 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.6/98.6 kB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting triton==2.0.0 (from torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0)\n",
-            "  Downloading triton-2.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (63.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.3/63.3 MB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0) (67.7.2)\n",
-            "Requirement already satisfied: wheel in /usr/local/lib/python3.10/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0) (0.41.3)\n",
-            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0) (3.27.7)\n",
-            "Collecting lit (from triton==2.0.0->torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0)\n",
-            "  Downloading lit-17.0.5.tar.gz (153 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m153.0/153.0 kB\u001b[0m \u001b[31m20.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
-            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
-            "  Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n",
-            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
-            "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.14.0->axolotl==0.3.0) (9.0.0)\n",
-            "Collecting pyarrow-hotfix (from datasets>=2.14.0->axolotl==0.3.0)\n",
-            "  Downloading pyarrow_hotfix-0.5-py3-none-any.whl (7.8 kB)\n",
-            "Collecting ninja (from flash-attn>=2.3.0->axolotl==0.3.0)\n",
-            "  Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.2/307.2 kB\u001b[0m \u001b[31m37.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting hjson (from deepspeed->axolotl==0.3.0)\n",
-            "  Downloading hjson-3.1.0-py3-none-any.whl (54 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.0/54.0 kB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: py-cpuinfo in /usr/local/lib/python3.10/dist-packages (from deepspeed->axolotl==0.3.0) (9.0.0)\n",
-            "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from fire->axolotl==0.3.0) (2.3.0)\n",
-            "Requirement already satisfied: decorator>4.1.2 in /usr/local/lib/python3.10/dist-packages (from gcsfs->axolotl==0.3.0) (4.4.2)\n",
-            "Requirement already satisfied: google-auth>=1.2 in /usr/local/lib/python3.10/dist-packages (from gcsfs->axolotl==0.3.0) (2.17.3)\n",
-            "Requirement already satisfied: google-auth-oauthlib in /usr/local/lib/python3.10/dist-packages (from gcsfs->axolotl==0.3.0) (1.0.0)\n",
-            "Requirement already satisfied: google-cloud-storage in /usr/local/lib/python3.10/dist-packages (from gcsfs->axolotl==0.3.0) (2.8.0)\n",
-            "Collecting aiofiles<24.0,>=22.0 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\n",
-            "Requirement already satisfied: altair<6.0,>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from gradio->axolotl==0.3.0) (4.2.2)\n",
-            "Collecting ffmpy (from gradio->axolotl==0.3.0)\n",
-            "  Downloading ffmpy-0.3.1.tar.gz (5.5 kB)\n",
-            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Collecting gradio-client==0.7.0 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading gradio_client-0.7.0-py3-none-any.whl (302 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.7/302.7 kB\u001b[0m \u001b[31m37.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: importlib-resources<7.0,>=1.3 in /usr/local/lib/python3.10/dist-packages (from gradio->axolotl==0.3.0) (6.1.1)\n",
-            "Requirement already satisfied: markupsafe~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio->axolotl==0.3.0) (2.1.3)\n",
-            "Collecting orjson~=3.0 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading orjson-3.9.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (138 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m138.7/138.7 kB\u001b[0m \u001b[31m20.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: pillow<11.0,>=8.0 in /usr/local/lib/python3.10/dist-packages (from gradio->axolotl==0.3.0) (9.4.0)\n",
-            "INFO: pip is looking at multiple versions of gradio to determine which version is compatible with other requirements. This could take a while.\n",
-            "Collecting gradio (from axolotl==0.3.0)\n",
-            "  Downloading gradio-4.4.0-py3-none-any.whl (15.4 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.4/15.4 MB\u001b[0m \u001b[31m84.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-4.3.0-py3-none-any.whl (15.4 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.4/15.4 MB\u001b[0m \u001b[31m76.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-4.2.0-py3-none-any.whl (15.4 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.4/15.4 MB\u001b[0m \u001b[31m81.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-4.1.2-py3-none-any.whl (15.4 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.4/15.4 MB\u001b[0m \u001b[31m83.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-4.1.1-py3-none-any.whl (15.4 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.4/15.4 MB\u001b[0m \u001b[31m78.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-4.1.0-py3-none-any.whl (15.4 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.4/15.4 MB\u001b[0m \u001b[31m17.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-4.0.2-py3-none-any.whl (25.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m25.3/25.3 MB\u001b[0m \u001b[31m49.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hINFO: pip is looking at multiple versions of gradio to determine which version is compatible with other requirements. This could take a while.\n",
-            "  Downloading gradio-4.0.1-py3-none-any.whl (25.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m25.3/25.3 MB\u001b[0m \u001b[31m56.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-4.0.0-py3-none-any.whl (25.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m25.3/25.3 MB\u001b[0m \u001b[31m15.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.50.2-py3-none-any.whl (20.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.3/20.3 MB\u001b[0m \u001b[31m66.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio-client==0.6.1 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading gradio_client-0.6.1-py3-none-any.whl (299 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m299.2/299.2 kB\u001b[0m \u001b[31m33.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting pydub (from gradio->axolotl==0.3.0)\n",
-            "  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
-            "Collecting python-multipart (from gradio->axolotl==0.3.0)\n",
-            "  Downloading python_multipart-0.0.6-py3-none-any.whl (45 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting semantic-version~=2.0 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n",
-            "Collecting websockets<12.0,>=10.0 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m18.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->axolotl==0.3.0) (0.41.1)\n",
-            "Collecting aiobotocore~=2.7.0 (from s3fs->axolotl==0.3.0)\n",
-            "  Downloading aiobotocore-2.7.0-py3-none-any.whl (73 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.5/73.5 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hINFO: pip is looking at multiple versions of s3fs to determine which version is compatible with other requirements. This could take a while.\n",
-            "Collecting s3fs (from axolotl==0.3.0)\n",
-            "  Downloading s3fs-2023.9.2-py3-none-any.whl (28 kB)\n",
-            "Collecting aiobotocore~=2.5.4 (from s3fs->axolotl==0.3.0)\n",
-            "  Downloading aiobotocore-2.5.4-py3-none-any.whl (73 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.4/73.4 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting s3fs (from axolotl==0.3.0)\n",
-            "  Downloading s3fs-2023.9.1-py3-none-any.whl (28 kB)\n",
-            "  Downloading s3fs-2023.9.0-py3-none-any.whl (28 kB)\n",
-            "  Downloading s3fs-2023.6.0-py3-none-any.whl (28 kB)\n",
-            "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard->axolotl==0.3.0) (1.59.2)\n",
-            "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard->axolotl==0.3.0) (3.5.1)\n",
-            "Requirement already satisfied: protobuf>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard->axolotl==0.3.0) (3.20.3)\n",
-            "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->axolotl==0.3.0) (0.7.2)\n",
-            "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard->axolotl==0.3.0) (3.0.1)\n",
-            "Requirement already satisfied: Click!=8.0.0,>=7.1 in /usr/local/lib/python3.10/dist-packages (from wandb->axolotl==0.3.0) (8.1.7)\n",
-            "Collecting GitPython!=3.1.29,>=1.0.0 (from wandb->axolotl==0.3.0)\n",
-            "  Downloading GitPython-3.1.40-py3-none-any.whl (190 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m190.6/190.6 kB\u001b[0m \u001b[31m26.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting sentry-sdk>=1.0.0 (from wandb->axolotl==0.3.0)\n",
-            "  Downloading sentry_sdk-1.35.0-py2.py3-none-any.whl (248 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m248.6/248.6 kB\u001b[0m \u001b[31m27.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting docker-pycreds>=0.4.0 (from wandb->axolotl==0.3.0)\n",
-            "  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n",
-            "Collecting setproctitle (from wandb->axolotl==0.3.0)\n",
-            "  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)\n",
-            "Requirement already satisfied: appdirs>=1.4.3 in /usr/local/lib/python3.10/dist-packages (from wandb->axolotl==0.3.0) (1.4.4)\n",
-            "Collecting botocore<1.31.18,>=1.31.17 (from aiobotocore~=2.5.4->s3fs->axolotl==0.3.0)\n",
-            "  Downloading botocore-1.31.17-py3-none-any.whl (11.1 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.1/11.1 MB\u001b[0m \u001b[31m122.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: wrapt<2.0.0,>=1.10.10 in /usr/local/lib/python3.10/dist-packages (from aiobotocore~=2.5.4->s3fs->axolotl==0.3.0) (1.14.1)\n",
-            "Collecting aioitertools<1.0.0,>=0.5.1 (from aiobotocore~=2.5.4->s3fs->axolotl==0.3.0)\n",
-            "  Downloading aioitertools-0.11.0-py3-none-any.whl (23 kB)\n",
-            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->fschat==0.2.29->axolotl==0.3.0) (23.1.0)\n",
-            "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->fschat==0.2.29->axolotl==0.3.0) (3.3.2)\n",
-            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->fschat==0.2.29->axolotl==0.3.0) (6.0.4)\n",
-            "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->fschat==0.2.29->axolotl==0.3.0) (4.0.3)\n",
-            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->fschat==0.2.29->axolotl==0.3.0) (1.9.2)\n",
-            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->fschat==0.2.29->axolotl==0.3.0) (1.4.0)\n",
-            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->fschat==0.2.29->axolotl==0.3.0) (1.3.1)\n",
-            "Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio->axolotl==0.3.0) (0.4)\n",
-            "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio->axolotl==0.3.0) (4.19.2)\n",
-            "Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio->axolotl==0.3.0) (0.12.0)\n",
-            "Collecting gitdb<5,>=4.0.1 (from GitPython!=3.1.29,>=1.0.0->wandb->axolotl==0.3.0)\n",
-            "  Downloading gitdb-4.0.11-py3-none-any.whl (62 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m10.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.2->gcsfs->axolotl==0.3.0) (5.3.2)\n",
-            "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.2->gcsfs->axolotl==0.3.0) (0.3.0)\n",
-            "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.2->gcsfs->axolotl==0.3.0) (4.9)\n",
-            "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib->gcsfs->axolotl==0.3.0) (1.3.1)\n",
-            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->bert-score==0.3.13->axolotl==0.3.0) (1.2.0)\n",
-            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->bert-score==0.3.13->axolotl==0.3.0) (0.12.1)\n",
-            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->bert-score==0.3.13->axolotl==0.3.0) (4.44.3)\n",
-            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->bert-score==0.3.13->axolotl==0.3.0) (1.4.5)\n",
-            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->bert-score==0.3.13->axolotl==0.3.0) (3.1.1)\n",
-            "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->bert-score==0.3.13->axolotl==0.3.0) (2.8.2)\n",
-            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.0.1->bert-score==0.3.13->axolotl==0.3.0) (2023.3.post1)\n",
-            "Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit>=3.0.0->fschat==0.2.29->axolotl==0.3.0) (0.2.10)\n",
-            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->bert-score==0.3.13->axolotl==0.3.0) (3.4)\n",
-            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->bert-score==0.3.13->axolotl==0.3.0) (2.0.7)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->bert-score==0.3.13->axolotl==0.3.0) (2023.7.22)\n",
-            "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.0.0->fschat==0.2.29->axolotl==0.3.0) (3.0.0)\n",
-            "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.0.0->fschat==0.2.29->axolotl==0.3.0) (2.16.1)\n",
-            "INFO: pip is looking at multiple versions of tokenizers to determine which version is compatible with other requirements. This could take a while.\n",
-            "Collecting tokenizers<0.15,>=0.14 (from transformers==4.35.1->axolotl==0.3.0)\n",
-            "  Downloading tokenizers-0.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m103.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hINFO: This is taking longer than usual. You might need to provide the dependency resolver with stricter constraints to reduce runtime. See https://pip.pypa.io/warnings/backtracking for guidance. If you want to abort this run, press Ctrl + C.\n",
-            "Collecting gradio (from axolotl==0.3.0)\n",
-            "  Downloading gradio-3.50.1-py3-none-any.whl (20.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.3/20.3 MB\u001b[0m \u001b[31m88.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.50.0-py3-none-any.whl (20.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.3/20.3 MB\u001b[0m \u001b[31m37.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.49.0-py3-none-any.whl (20.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.3/20.3 MB\u001b[0m \u001b[31m57.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hINFO: pip is looking at multiple versions of tokenizers to determine which version is compatible with other requirements. This could take a while.\n",
-            "  Downloading gradio-3.48.0-py3-none-any.whl (20.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.3/20.3 MB\u001b[0m \u001b[31m63.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.47.1-py3-none-any.whl (20.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.3/20.3 MB\u001b[0m \u001b[31m18.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio-client==0.6.0 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading gradio_client-0.6.0-py3-none-any.whl (298 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m298.8/298.8 kB\u001b[0m \u001b[31m37.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio (from axolotl==0.3.0)\n",
-            "  Downloading gradio-3.47.0-py3-none-any.whl (20.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.3/20.3 MB\u001b[0m \u001b[31m98.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hINFO: This is taking longer than usual. You might need to provide the dependency resolver with stricter constraints to reduce runtime. See https://pip.pypa.io/warnings/backtracking for guidance. If you want to abort this run, press Ctrl + C.\n",
-            "  Downloading gradio-3.46.1-py3-none-any.whl (20.2 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m95.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio-client==0.5.3 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading gradio_client-0.5.3-py3-none-any.whl (298 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m298.4/298.4 kB\u001b[0m \u001b[31m39.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio (from axolotl==0.3.0)\n",
-            "  Downloading gradio-3.46.0-py3-none-any.whl (20.2 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m95.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.45.2-py3-none-any.whl (20.2 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m28.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.45.1-py3-none-any.whl (20.2 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m57.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio-client==0.5.2 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading gradio_client-0.5.2-py3-none-any.whl (298 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m298.3/298.3 kB\u001b[0m \u001b[31m37.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio (from axolotl==0.3.0)\n",
-            "  Downloading gradio-3.45.0-py3-none-any.whl (20.2 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m96.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.44.4-py3-none-any.whl (20.2 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m22.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio-client==0.5.1 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading gradio_client-0.5.1-py3-none-any.whl (298 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m298.2/298.2 kB\u001b[0m \u001b[31m19.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio (from axolotl==0.3.0)\n",
-            "  Downloading gradio-3.44.3-py3-none-any.whl (20.2 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m96.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio-client==0.5.0 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading gradio_client-0.5.0-py3-none-any.whl (298 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m298.2/298.2 kB\u001b[0m \u001b[31m37.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio (from axolotl==0.3.0)\n",
-            "  Downloading gradio-3.44.2-py3-none-any.whl (20.2 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m91.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.44.1-py3-none-any.whl (20.2 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m104.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.44.0-py3-none-any.whl (20.2 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.2/20.2 MB\u001b[0m \u001b[31m18.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.43.2-py3-none-any.whl (20.1 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.1/20.1 MB\u001b[0m \u001b[31m67.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.43.1-py3-none-any.whl (20.1 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.1/20.1 MB\u001b[0m \u001b[31m64.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.43.0-py3-none-any.whl (20.1 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.1/20.1 MB\u001b[0m \u001b[31m75.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.42.0-py3-none-any.whl (20.1 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.1/20.1 MB\u001b[0m \u001b[31m16.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.41.2-py3-none-any.whl (20.1 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.1/20.1 MB\u001b[0m \u001b[31m75.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.41.1-py3-none-any.whl (20.1 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.1/20.1 MB\u001b[0m \u001b[31m40.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.41.0-py3-none-any.whl (20.1 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.1/20.1 MB\u001b[0m \u001b[31m25.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.40.1-py3-none-any.whl (20.0 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m53.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting mdit-py-plugins<=0.3.3 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading mdit_py_plugins-0.3.3-py3-none-any.whl (50 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.5/50.5 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio-client>=0.4.0 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading gradio_client-0.4.0-py3-none-any.whl (297 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m297.4/297.4 kB\u001b[0m \u001b[31m32.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio (from axolotl==0.3.0)\n",
-            "  Downloading gradio-3.40.0-py3-none-any.whl (20.0 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m54.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.39.0-py3-none-any.whl (19.9 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.9/19.9 MB\u001b[0m \u001b[31m67.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio-client>=0.3.0 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading gradio_client-0.3.0-py3-none-any.whl (294 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.2/294.2 kB\u001b[0m \u001b[31m37.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio (from axolotl==0.3.0)\n",
-            "  Downloading gradio-3.38.0-py3-none-any.whl (19.8 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.8/19.8 MB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio-client>=0.2.10 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading gradio_client-0.2.10-py3-none-any.whl (288 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m289.0/289.0 kB\u001b[0m \u001b[31m35.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio (from axolotl==0.3.0)\n",
-            "  Downloading gradio-3.37.0-py3-none-any.whl (19.8 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.8/19.8 MB\u001b[0m \u001b[31m90.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.36.1-py3-none-any.whl (19.8 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.8/19.8 MB\u001b[0m \u001b[31m103.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting websockets>=10.0 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m18.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio-client>=0.2.7 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading gradio_client-0.2.9-py3-none-any.whl (288 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m288.8/288.8 kB\u001b[0m \u001b[31m32.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio_client-0.2.8-py3-none-any.whl (288 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m288.8/288.8 kB\u001b[0m \u001b[31m27.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio_client-0.2.7-py3-none-any.whl (288 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m288.4/288.4 kB\u001b[0m \u001b[31m36.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio (from axolotl==0.3.0)\n",
-            "  Downloading gradio-3.36.0-py3-none-any.whl (19.8 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.8/19.8 MB\u001b[0m \u001b[31m74.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.35.2-py3-none-any.whl (19.7 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.7/19.7 MB\u001b[0m \u001b[31m22.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.35.1-py3-none-any.whl (19.7 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.7/19.7 MB\u001b[0m \u001b[31m52.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.35.0-py3-none-any.whl (19.7 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.7/19.7 MB\u001b[0m \u001b[31m93.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.34.0-py3-none-any.whl (20.0 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m92.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio-client>=0.2.6 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading gradio_client-0.2.6-py3-none-any.whl (288 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m288.3/288.3 kB\u001b[0m \u001b[31m30.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio (from axolotl==0.3.0)\n",
-            "  Downloading gradio-3.33.1-py3-none-any.whl (20.0 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m18.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio-client>=0.2.4 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading gradio_client-0.2.5-py3-none-any.whl (288 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m288.1/288.1 kB\u001b[0m \u001b[31m34.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio_client-0.2.4-py3-none-any.whl (287 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m287.9/287.9 kB\u001b[0m \u001b[31m30.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio (from axolotl==0.3.0)\n",
-            "  Downloading gradio-3.33.0-py3-none-any.whl (20.0 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m95.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.32.0-py3-none-any.whl (19.9 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.9/19.9 MB\u001b[0m \u001b[31m95.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.31.0-py3-none-any.whl (17.4 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.4/17.4 MB\u001b[0m \u001b[31m86.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.30.0-py3-none-any.whl (17.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m73.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.29.0-py3-none-any.whl (17.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio-client>=0.2.1 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading gradio_client-0.2.3-py3-none-any.whl (287 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m287.9/287.9 kB\u001b[0m \u001b[31m32.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio_client-0.2.2-py3-none-any.whl (287 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m287.9/287.9 kB\u001b[0m \u001b[31m33.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio_client-0.2.1-py3-none-any.whl (287 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m287.9/287.9 kB\u001b[0m \u001b[31m34.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio (from axolotl==0.3.0)\n",
-            "  Downloading gradio-3.28.3-py3-none-any.whl (17.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m75.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio-client>=0.1.3 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading gradio_client-0.2.0-py3-none-any.whl (287 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m287.9/287.9 kB\u001b[0m \u001b[31m30.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio_client-0.1.4-py3-none-any.whl (286 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m286.7/286.7 kB\u001b[0m \u001b[31m32.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio_client-0.1.3-py3-none-any.whl (286 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m286.2/286.2 kB\u001b[0m \u001b[31m31.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio (from axolotl==0.3.0)\n",
-            "  Downloading gradio-3.28.2-py3-none-any.whl (17.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m68.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.28.1-py3-none-any.whl (17.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m64.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.28.0-py3-none-any.whl (17.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m98.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.27.0-py3-none-any.whl (17.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m33.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.26.0-py3-none-any.whl (17.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m101.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio-client==0.1.2 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading gradio_client-0.1.2-py3-none-any.whl (286 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m286.1/286.1 kB\u001b[0m \u001b[31m36.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio (from axolotl==0.3.0)\n",
-            "  Downloading gradio-3.25.0-py3-none-any.whl (17.5 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.5/17.5 MB\u001b[0m \u001b[31m101.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio-client>=0.0.8 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading gradio_client-0.1.0-py3-none-any.whl (24 kB)\n",
-            "  Downloading gradio_client-0.0.10-py3-none-any.whl (23 kB)\n",
-            "  Downloading gradio_client-0.0.9-py3-none-any.whl (23 kB)\n",
-            "  Downloading gradio_client-0.0.8-py3-none-any.whl (20 kB)\n",
-            "Collecting gradio (from axolotl==0.3.0)\n",
-            "  Downloading gradio-3.24.1-py3-none-any.whl (15.7 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m86.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting gradio-client>=0.0.5 (from gradio->axolotl==0.3.0)\n",
-            "  Downloading gradio_client-0.0.7-py3-none-any.whl (14 kB)\n",
-            "  Downloading gradio_client-0.0.6-py3-none-any.whl (14 kB)\n",
-            "  Downloading gradio_client-0.0.5-py3-none-any.whl (12 kB)\n",
-            "Collecting gradio (from axolotl==0.3.0)\n",
-            "  Downloading gradio-3.24.0-py3-none-any.whl (15.7 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.23.0-py3-none-any.whl (15.8 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.8/15.8 MB\u001b[0m \u001b[31m107.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.22.1-py3-none-any.whl (15.8 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.8/15.8 MB\u001b[0m \u001b[31m91.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.22.0-py3-none-any.whl (15.8 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.8/15.8 MB\u001b[0m \u001b[31m90.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.21.0-py3-none-any.whl (15.8 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.8/15.8 MB\u001b[0m \u001b[31m102.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Downloading gradio-3.20.1-py3-none-any.whl (14.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.3/14.3 MB\u001b[0m \u001b[31m106.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting pycryptodome (from gradio->axolotl==0.3.0)\n",
-            "  Downloading pycryptodome-3.19.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m92.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting datasets>=2.14.0 (from axolotl==0.3.0)\n",
-            "  Downloading datasets-2.14.7-py3-none-any.whl (520 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m520.4/520.4 kB\u001b[0m \u001b[31m53.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting huggingface-hub (from accelerate==0.24.1->axolotl==0.3.0)\n",
-            "  Downloading huggingface_hub-0.17.3-py3-none-any.whl (295 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.0/295.0 kB\u001b[0m \u001b[31m32.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hINFO: pip is looking at multiple versions of s3fs to determine which version is compatible with other requirements. This could take a while.\n",
-            "INFO: pip is looking at multiple versions of transformers[sentencepiece] to determine which version is compatible with other requirements. This could take a while.\n",
-            "Collecting h11>=0.8 (from uvicorn->fschat==0.2.29->axolotl==0.3.0)\n",
-            "  Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting humanfriendly>=9.1 (from coloredlogs->optimum==1.13.2->axolotl==0.3.0)\n",
-            "  Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m319.1 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: anyio<4.0.0,>=3.7.1 in /usr/local/lib/python3.10/dist-packages (from fastapi->fschat==0.2.29->axolotl==0.3.0) (3.7.1)\n",
-            "Collecting starlette<0.28.0,>=0.27.0 (from fastapi->fschat==0.2.29->axolotl==0.3.0)\n",
-            "  Downloading starlette-0.27.0-py3-none-any.whl (66 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.0/67.0 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting typing-extensions (from torch>=1.10.0->accelerate==0.24.1->axolotl==0.3.0)\n",
-            "  Downloading typing_extensions-4.8.0-py3-none-any.whl (31 kB)\n",
-            "Requirement already satisfied: google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5 in /usr/local/lib/python3.10/dist-packages (from google-cloud-storage->gcsfs->axolotl==0.3.0) (2.11.1)\n",
-            "Requirement already satisfied: google-cloud-core<3.0dev,>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-storage->gcsfs->axolotl==0.3.0) (2.3.3)\n",
-            "Requirement already satisfied: google-resumable-media>=2.3.2 in /usr/local/lib/python3.10/dist-packages (from google-cloud-storage->gcsfs->axolotl==0.3.0) (2.6.0)\n",
-            "Collecting httpcore (from httpx->fschat==0.2.29->axolotl==0.3.0)\n",
-            "  Downloading httpcore-1.0.2-py3-none-any.whl (76 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.9/76.9 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx->fschat==0.2.29->axolotl==0.3.0) (1.3.0)\n",
-            "Collecting wavedrom (from markdown2[all]->fschat==0.2.29->axolotl==0.3.0)\n",
-            "  Downloading wavedrom-2.0.3.post3.tar.gz (137 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m137.7/137.7 kB\u001b[0m \u001b[31m19.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->optimum==1.13.2->axolotl==0.3.0) (1.3.0)\n",
-            "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<4.0.0,>=3.7.1->fastapi->fschat==0.2.29->axolotl==0.3.0) (1.1.3)\n",
-            "Collecting jmespath<2.0.0,>=0.7.1 (from botocore<1.31.18,>=1.31.17->aiobotocore~=2.5.4->s3fs->axolotl==0.3.0)\n",
-            "  Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n",
-            "Collecting urllib3<3,>=1.21.1 (from requests->bert-score==0.3.13->axolotl==0.3.0)\n",
-            "  Downloading urllib3-1.26.18-py2.py3-none-any.whl (143 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.8/143.8 kB\u001b[0m \u001b[31m19.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->GitPython!=3.1.29,>=1.0.0->wandb->axolotl==0.3.0)\n",
-            "  Downloading smmap-5.0.1-py3-none-any.whl (24 kB)\n",
-            "Requirement already satisfied: googleapis-common-protos<2.0.dev0,>=1.56.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-storage->gcsfs->axolotl==0.3.0) (1.61.0)\n",
-            "Requirement already satisfied: google-crc32c<2.0dev,>=1.0 in /usr/local/lib/python3.10/dist-packages (from google-resumable-media>=2.3.2->google-cloud-storage->gcsfs->axolotl==0.3.0) (1.5.0)\n",
-            "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio->axolotl==0.3.0) (2023.11.1)\n",
-            "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio->axolotl==0.3.0) (0.31.0)\n",
-            "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio->axolotl==0.3.0) (0.12.0)\n",
-            "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.0.0->fschat==0.2.29->axolotl==0.3.0) (0.1.2)\n",
-            "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth>=1.2->gcsfs->axolotl==0.3.0) (0.5.0)\n",
-            "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib->gcsfs->axolotl==0.3.0) (3.2.2)\n",
-            "Collecting svgwrite (from wavedrom->markdown2[all]->fschat==0.2.29->axolotl==0.3.0)\n",
-            "  Downloading svgwrite-1.4.3-py3-none-any.whl (67 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.1/67.1 kB\u001b[0m \u001b[31m10.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hBuilding wheels for collected packages: optimum, rouge-score, flash-attn, deepspeed, fire, ffmpy, wavedrom, lit\n",
-            "  Building wheel for optimum (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for optimum: filename=optimum-1.13.2-py3-none-any.whl size=395598 sha256=c50241754999443c85c875c140c77aa5bf3a40f56a5ffd196d1f3c5199609431\n",
-            "  Stored in directory: /root/.cache/pip/wheels/6e/b7/2c/79405d98f0943373d8546daeae25a3d377f7659ca0cbe48699\n",
-            "  Building wheel for rouge-score (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=cc9084a7eefdb136724effd732f6cb7c0fd01f0d7c9e7852043f1cd1d23b45a9\n",
-            "  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n",
-            "  Building wheel for flash-attn (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for flash-attn: filename=flash_attn-2.3.3-cp310-cp310-linux_x86_64.whl size=57075008 sha256=bcb63b64213ab61590b340b77de84e448a442e19c100480895194df39ad7673d\n",
-            "  Stored in directory: /root/.cache/pip/wheels/e5/e6/fa/941802ec61d1afd320d27160ab1db98e6dba65381f84b76d4a\n",
-            "  Building wheel for deepspeed (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for deepspeed: filename=deepspeed-0.12.3-py3-none-any.whl size=1279165 sha256=e678c7b56fa15c9218f88c9a8a48728bee271215c554abcb4049f609e47adb25\n",
-            "  Stored in directory: /root/.cache/pip/wheels/ee/2b/c5/892ceee06964ce8aa2a98d4260848d0d9a3f1e743862e4b45a\n",
-            "  Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for fire: filename=fire-0.5.0-py2.py3-none-any.whl size=116933 sha256=2e829d74a791417c18e5d80016d0125dc65481ae4577bfd4041e51aa64558256\n",
-            "  Stored in directory: /root/.cache/pip/wheels/90/d4/f7/9404e5db0116bd4d43e5666eaa3e70ab53723e1e3ea40c9a95\n",
-            "  Building wheel for ffmpy (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for ffmpy: filename=ffmpy-0.3.1-py3-none-any.whl size=5579 sha256=dd713b1fe6d28ec101c96dbf7033241901635fcddd42a07d9e97fafada70eca4\n",
-            "  Stored in directory: /root/.cache/pip/wheels/01/a6/d1/1c0828c304a4283b2c1639a09ad86f83d7c487ef34c6b4a1bf\n",
-            "  Building wheel for wavedrom (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for wavedrom: filename=wavedrom-2.0.3.post3-py2.py3-none-any.whl size=30053 sha256=65b629500b343fc851f1c23dd2065fa414153974ae25603bba9f99e559ecbf8c\n",
-            "  Stored in directory: /root/.cache/pip/wheels/9c/52/8c/38b454b42f712f325e26f633287484c7dc1ad469e1580c5954\n",
-            "  Building wheel for lit (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for lit: filename=lit-17.0.5-py3-none-any.whl size=93256 sha256=209fa0a842c16d9479d3626694714ebd0b9f4afaaacf487e3fb92d19ecfa9fcf\n",
-            "  Stored in directory: /root/.cache/pip/wheels/1c/87/8e/5a42c0d4be23362b68bbff33b17f3c35a3df44f1cd2f5a24b4\n",
-            "Successfully built optimum rouge-score flash-attn deepspeed fire ffmpy wavedrom lit\n",
-            "Installing collected packages: sentencepiece, pydub, ninja, nh3, lit, hjson, ffmpy, bitsandbytes, addict, websockets, urllib3, typing-extensions, svgwrite, smmap, shortuuid, setproctitle, semantic-version, rouge, python-multipart, pynvml, pyarrow-hotfix, orjson, nvidia-nvtx-cu11, nvidia-nccl-cu11, nvidia-cusparse-cu11, nvidia-curand-cu11, nvidia-cufft-cu11, nvidia-cuda-runtime-cu11, nvidia-cuda-nvrtc-cu11, nvidia-cuda-cupti-cu11, nvidia-cublas-cu11, numpy, markdown2, jmespath, humanfriendly, hf_transfer, h11, fire, einops, docker-pycreds, dill, colorama, art, aioitertools, aiofiles, wavedrom, uvicorn, starlette, sentry-sdk, rouge-score, nvidia-cusolver-cu11, nvidia-cudnn-cu11, multiprocess, httpcore, gitdb, gekko, coloredlogs, botocore, tiktoken, responses, huggingface-hub, httpx, GitPython, fastapi, aiobotocore, wandb, tokenizers, s3fs, gradio-client, fschat, datasets, transformers, gradio, evaluate, triton, torch, accelerate, peft, xformers, optimum, bert-score, auto-gptq, flash-attn, deepspeed, axolotl\n",
-            "  Attempting uninstall: urllib3\n",
-            "    Found existing installation: urllib3 2.0.7\n",
-            "    Uninstalling urllib3-2.0.7:\n",
-            "      Successfully uninstalled urllib3-2.0.7\n",
-            "  Attempting uninstall: typing-extensions\n",
-            "    Found existing installation: typing_extensions 4.5.0\n",
-            "    Uninstalling typing_extensions-4.5.0:\n",
-            "      Successfully uninstalled typing_extensions-4.5.0\n",
-            "  Attempting uninstall: numpy\n",
-            "    Found existing installation: numpy 1.23.5\n",
-            "    Uninstalling numpy-1.23.5:\n",
-            "      Successfully uninstalled numpy-1.23.5\n",
-            "  Attempting uninstall: huggingface-hub\n",
-            "    Found existing installation: huggingface-hub 0.19.3\n",
-            "    Uninstalling huggingface-hub-0.19.3:\n",
-            "      Successfully uninstalled huggingface-hub-0.19.3\n",
-            "  Attempting uninstall: tokenizers\n",
-            "    Found existing installation: tokenizers 0.15.0\n",
-            "    Uninstalling tokenizers-0.15.0:\n",
-            "      Successfully uninstalled tokenizers-0.15.0\n",
-            "  Attempting uninstall: transformers\n",
-            "    Found existing installation: transformers 4.35.2\n",
-            "    Uninstalling transformers-4.35.2:\n",
-            "      Successfully uninstalled transformers-4.35.2\n",
-            "  Attempting uninstall: triton\n",
-            "    Found existing installation: triton 2.1.0\n",
-            "    Uninstalling triton-2.1.0:\n",
-            "      Successfully uninstalled triton-2.1.0\n",
-            "  Attempting uninstall: torch\n",
-            "    Found existing installation: torch 2.1.0+cu118\n",
-            "    Uninstalling torch-2.1.0+cu118:\n",
-            "      Successfully uninstalled torch-2.1.0+cu118\n",
-            "  Running setup.py develop for axolotl\n",
-            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
-            "lida 0.0.10 requires kaleido, which is not installed.\n",
-            "llmx 0.0.15a0 requires cohere, which is not installed.\n",
-            "llmx 0.0.15a0 requires openai, which is not installed.\n",
-            "cupy-cuda11x 11.0.0 requires numpy<1.26,>=1.20, but you have numpy 1.26.2 which is incompatible.\n",
-            "tensorflow-probability 0.22.0 requires typing-extensions<4.6.0, but you have typing-extensions 4.8.0 which is incompatible.\n",
-            "torchaudio 2.1.0+cu118 requires torch==2.1.0, but you have torch 2.0.1 which is incompatible.\n",
-            "torchdata 0.7.0 requires torch==2.1.0, but you have torch 2.0.1 which is incompatible.\n",
-            "torchtext 0.16.0 requires torch==2.1.0, but you have torch 2.0.1 which is incompatible.\n",
-            "torchvision 0.16.0+cu118 requires torch==2.1.0, but you have torch 2.0.1 which is incompatible.\u001b[0m\u001b[31m\n",
-            "\u001b[0mSuccessfully installed GitPython-3.1.40 accelerate-0.24.1 addict-2.4.0 aiobotocore-2.5.4 aiofiles-23.2.1 aioitertools-0.11.0 art-6.1 auto-gptq-0.5.1 axolotl-0.3.0 bert-score-0.3.13 bitsandbytes-0.41.2.post2 botocore-1.31.17 colorama-0.4.6 coloredlogs-15.0.1 datasets-2.14.7 deepspeed-0.12.3 dill-0.3.7 docker-pycreds-0.4.0 einops-0.7.0 evaluate-0.4.0 fastapi-0.104.1 ffmpy-0.3.1 fire-0.5.0 flash-attn-2.3.3 fschat-0.2.29 gekko-1.0.6 gitdb-4.0.11 gradio-3.50.2 gradio-client-0.6.1 h11-0.14.0 hf_transfer-0.1.4 hjson-3.1.0 httpcore-1.0.2 httpx-0.25.1 huggingface-hub-0.17.3 humanfriendly-10.0 jmespath-1.0.1 lit-17.0.5 markdown2-2.4.10 multiprocess-0.70.15 nh3-0.2.14 ninja-1.11.1.1 numpy-1.26.2 nvidia-cublas-cu11-11.10.3.66 nvidia-cuda-cupti-cu11-11.7.101 nvidia-cuda-nvrtc-cu11-11.7.99 nvidia-cuda-runtime-cu11-11.7.99 nvidia-cudnn-cu11-8.5.0.96 nvidia-cufft-cu11-10.9.0.58 nvidia-curand-cu11-10.2.10.91 nvidia-cusolver-cu11-11.4.0.1 nvidia-cusparse-cu11-11.7.4.91 nvidia-nccl-cu11-2.14.3 nvidia-nvtx-cu11-11.7.91 optimum-1.13.2 orjson-3.9.10 peft-0.6.0 pyarrow-hotfix-0.5 pydub-0.25.1 pynvml-11.5.0 python-multipart-0.0.6 responses-0.18.0 rouge-1.0.1 rouge-score-0.1.2 s3fs-2023.6.0 semantic-version-2.10.0 sentencepiece-0.1.99 sentry-sdk-1.35.0 setproctitle-1.3.3 shortuuid-1.0.11 smmap-5.0.1 starlette-0.27.0 svgwrite-1.4.3 tiktoken-0.5.1 tokenizers-0.14.1 torch-2.0.1 transformers-4.35.1 triton-2.0.0 typing-extensions-4.8.0 urllib3-1.26.18 uvicorn-0.24.0.post1 wandb-0.16.0 wavedrom-2.0.3.post3 websockets-11.0.3 xformers-0.0.22\n"
-          ]
-        }
-      ],
-      "source": [
-        "# Install axolotl\n",
-        "!git clone https://github.com/OpenAccess-AI-Collective/axolotl  && cd axolotl && git checkout 797f3dd1de8fd8c0eafbd1c9fdb172abd9ff840a #0.3.0\n",
-        "!cd axolotl\n",
-        "!pip install packaging\n",
-        "!cd axolotl && pip install -e '.[flash-attn,deepspeed]'"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 2,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "ET82VllsW6gU",
-        "outputId": "27e4d16a-da64-46ed-b927-ce12b3f9af6d"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "accelerate configuration saved at /root/.cache/huggingface/accelerate/default_config.yaml\n"
-          ]
-        }
-      ],
-      "source": [
-        "!accelerate config default"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 3,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "n_xquNdQYsMX",
-        "outputId": "68de83a9-2e5a-49e6-ff06-5013eb085370"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (0.24.1)\n",
-            "Requirement already satisfied: bitsandbytes in /usr/local/lib/python3.10/dist-packages (0.41.2.post2)\n",
-            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate) (1.26.2)\n",
-            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (23.2)\n",
-            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n",
-            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate) (6.0.1)\n",
-            "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (2.0.1)\n",
-            "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from accelerate) (0.17.3)\n",
-            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.13.1)\n",
-            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (4.8.0)\n",
-            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (1.12)\n",
-            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.2.1)\n",
-            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1.2)\n",
-            "Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.7.99 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (11.7.99)\n",
-            "Requirement already satisfied: nvidia-cuda-runtime-cu11==11.7.99 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (11.7.99)\n",
-            "Requirement already satisfied: nvidia-cuda-cupti-cu11==11.7.101 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (11.7.101)\n",
-            "Requirement already satisfied: nvidia-cudnn-cu11==8.5.0.96 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (8.5.0.96)\n",
-            "Requirement already satisfied: nvidia-cublas-cu11==11.10.3.66 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (11.10.3.66)\n",
-            "Requirement already satisfied: nvidia-cufft-cu11==10.9.0.58 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (10.9.0.58)\n",
-            "Requirement already satisfied: nvidia-curand-cu11==10.2.10.91 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (10.2.10.91)\n",
-            "Requirement already satisfied: nvidia-cusolver-cu11==11.4.0.1 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (11.4.0.1)\n",
-            "Requirement already satisfied: nvidia-cusparse-cu11==11.7.4.91 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (11.7.4.91)\n",
-            "Requirement already satisfied: nvidia-nccl-cu11==2.14.3 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.14.3)\n",
-            "Requirement already satisfied: nvidia-nvtx-cu11==11.7.91 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (11.7.91)\n",
-            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.0.0)\n",
-            "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch>=1.10.0->accelerate) (67.7.2)\n",
-            "Requirement already satisfied: wheel in /usr/local/lib/python3.10/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch>=1.10.0->accelerate) (0.41.3)\n",
-            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate) (3.27.7)\n",
-            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate) (17.0.5)\n",
-            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate) (2023.6.0)\n",
-            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate) (2.31.0)\n",
-            "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate) (4.66.1)\n",
-            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.3)\n",
-            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (3.3.2)\n",
-            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (3.4)\n",
-            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (1.26.18)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (2023.7.22)\n",
-            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\n",
-            "/content\n"
-          ]
-        }
-      ],
-      "source": [
-        "!pip install accelerate bitsandbytes\n",
-        "!pwd"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 4,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "IZv_VnRrtTSz",
-        "outputId": "36439ab2-c4de-46b5-dd36-b90d09db8358"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "True"
-            ]
-          },
-          "execution_count": 4,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "import torch\n",
-        "torch.cuda.is_available()"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 5,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "9SwNYGmisJU6",
-        "outputId": "963ffc20-dd38-48e4-f72d-8a4d78e36461"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.35.1)\n",
-            "Collecting transformers\n",
-            "  Downloading transformers-4.35.2-py3-none-any.whl (7.9 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.9/7.9 MB\u001b[0m \u001b[31m24.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.13.1)\n",
-            "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.17.3)\n",
-            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.26.2)\n",
-            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.2)\n",
-            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n",
-            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3)\n",
-            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n",
-            "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.14.1)\n",
-            "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.0)\n",
-            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.1)\n",
-            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (2023.6.0)\n",
-            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (4.8.0)\n",
-            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n",
-            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n",
-            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (1.26.18)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22)\n",
-            "Installing collected packages: transformers\n",
-            "  Attempting uninstall: transformers\n",
-            "    Found existing installation: transformers 4.35.1\n",
-            "    Uninstalling transformers-4.35.1:\n",
-            "      Successfully uninstalled transformers-4.35.1\n",
-            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
-            "axolotl 0.3.0 requires transformers==4.35.1, but you have transformers 4.35.2 which is incompatible.\u001b[0m\u001b[31m\n",
-            "\u001b[0mSuccessfully installed transformers-4.35.2\n"
-          ]
-        }
-      ],
-      "source": [
-        "!pip install --upgrade transformers"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 6,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "jCsnQhmave0Z",
-        "outputId": "6c9f39ef-0e4a-41cb-ee22-8c71438e254e"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Collecting flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE\n",
-            "  Downloading https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu117torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl (30.0 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m30.0/30.0 MB\u001b[0m \u001b[31m46.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (2.0.1)\n",
-            "Requirement already satisfied: einops in /usr/local/lib/python3.10/dist-packages (from flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (0.7.0)\n",
-            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (23.2)\n",
-            "Requirement already satisfied: ninja in /usr/local/lib/python3.10/dist-packages (from flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (1.11.1.1)\n",
-            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (3.13.1)\n",
-            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (4.8.0)\n",
-            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (1.12)\n",
-            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (3.2.1)\n",
-            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (3.1.2)\n",
-            "Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.7.99 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (11.7.99)\n",
-            "Requirement already satisfied: nvidia-cuda-runtime-cu11==11.7.99 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (11.7.99)\n",
-            "Requirement already satisfied: nvidia-cuda-cupti-cu11==11.7.101 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (11.7.101)\n",
-            "Requirement already satisfied: nvidia-cudnn-cu11==8.5.0.96 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (8.5.0.96)\n",
-            "Requirement already satisfied: nvidia-cublas-cu11==11.10.3.66 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (11.10.3.66)\n",
-            "Requirement already satisfied: nvidia-cufft-cu11==10.9.0.58 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (10.9.0.58)\n",
-            "Requirement already satisfied: nvidia-curand-cu11==10.2.10.91 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (10.2.10.91)\n",
-            "Requirement already satisfied: nvidia-cusolver-cu11==11.4.0.1 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (11.4.0.1)\n",
-            "Requirement already satisfied: nvidia-cusparse-cu11==11.7.4.91 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (11.7.4.91)\n",
-            "Requirement already satisfied: nvidia-nccl-cu11==2.14.3 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (2.14.3)\n",
-            "Requirement already satisfied: nvidia-nvtx-cu11==11.7.91 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (11.7.91)\n",
-            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (2.0.0)\n",
-            "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (67.7.2)\n",
-            "Requirement already satisfied: wheel in /usr/local/lib/python3.10/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (0.41.3)\n",
-            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (3.27.7)\n",
-            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (17.0.5)\n",
-            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (2.1.3)\n",
-            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->flash-attn==2.3.0+cu117torch2.0cxx11abiFALSE) (1.3.0)\n",
-            "Installing collected packages: flash-attn\n",
-            "  Attempting uninstall: flash-attn\n",
-            "    Found existing installation: flash-attn 2.3.3\n",
-            "    Uninstalling flash-attn-2.3.3:\n",
-            "      Successfully uninstalled flash-attn-2.3.3\n",
-            "Successfully installed flash-attn-2.3.0\n"
-          ]
-        }
-      ],
-      "source": [
-        "# https://github.com/oobabooga/text-generation-webui/issues/4238\n",
-        "!pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu117torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "3xU248nEtTxg"
-      },
-      "source": [
-        "Start the training process (fine-tuning)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "19jClLyTbumJ",
-        "outputId": "9b74bad8-956d-4434-953a-d5a5be229043"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "2023-11-18 10:15:30.581758: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
-            "2023-11-18 10:15:30.581829: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
-            "2023-11-18 10:15:30.581870: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
-            "2023-11-18 10:15:32.302565: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
-            "/usr/local/lib/python3.10/dist-packages/transformers/deepspeed.py:23: FutureWarning: transformers.deepspeed module is deprecated and will be removed in a future version. Please import deepspeed modules directly from transformers.integrations\n",
-            "  warnings.warn(\n",
-            "                                 dP            dP   dP \n",
-            "                                 88            88   88 \n",
-            "      .d8888b. dP.  .dP .d8888b. 88 .d8888b. d8888P 88 \n",
-            "      88'  `88  `8bd8'  88'  `88 88 88'  `88   88   88 \n",
-            "      88.  .88  .d88b.  88.  .88 88 88.  .88   88   88 \n",
-            "      `88888P8 dP'  `dP `88888P' dP `88888P'   dP   dP \n",
-            "                                                       \n",
-            "                                                       \n",
-            "\n",
-            "\u001b[33m[2023-11-18 10:15:36,028] [WARNING] [axolotl.validate_config:169] [PID:4655] [RANK:0] eval_batch_size != micro_batch_size. This can lead to VRAM instability.\u001b[39m\n",
-            "[2023-11-18 10:15:36,239] [INFO] [axolotl.normalize_config:128] [PID:4655] [RANK:0] GPU memory usage baseline: 0.000GB (+0.255GB misc)\u001b[39m\n",
-            "\u001b[33m[2023-11-18 10:15:36,239] [WARNING] [axolotl.scripts.check_accelerate_default_config:343] [PID:4655] [RANK:0] accelerate config file found at /root/.cache/huggingface/accelerate/default_config.yaml. This can lead to unexpected errors\u001b[39m\n",
-            "\u001b[33m[2023-11-18 10:15:36,239] [WARNING] [axolotl.scripts.check_user_token:355] [PID:4655] [RANK:0] Error verifying HuggingFace token. Remember to log in using `huggingface-cli login` and get your access token from https://huggingface.co/settings/tokens if you want to use gated models or datasets.\u001b[39m\n",
-            "[2023-11-18 10:15:36,594] [DEBUG] [axolotl.load_tokenizer:100] [PID:4655] [RANK:0] EOS: 2 / </s>\u001b[39m\n",
-            "[2023-11-18 10:15:36,595] [DEBUG] [axolotl.load_tokenizer:101] [PID:4655] [RANK:0] BOS: 1 / <s>\u001b[39m\n",
-            "[2023-11-18 10:15:36,595] [DEBUG] [axolotl.load_tokenizer:102] [PID:4655] [RANK:0] PAD: 0 / <unk>\u001b[39m\n",
-            "[2023-11-18 10:15:36,595] [DEBUG] [axolotl.load_tokenizer:103] [PID:4655] [RANK:0] UNK: 0 / <unk>\u001b[39m\n",
-            "[2023-11-18 10:15:36,595] [INFO] [axolotl.load_tokenized_prepared_datasets:147] [PID:4655] [RANK:0] Unable to find prepared dataset in last_run_prepared/5dca4483042d16053f3cd9eeaf5ac8af\u001b[39m\n",
-            "[2023-11-18 10:15:36,595] [INFO] [axolotl.load_tokenized_prepared_datasets:148] [PID:4655] [RANK:0] Loading raw datasets...\u001b[39m\n",
-            "[2023-11-18 10:15:36,595] [INFO] [axolotl.load_tokenized_prepared_datasets:153] [PID:4655] [RANK:0] No seed provided, using default seed of 42\u001b[39m\n",
-            "Map (num_proc=2): 100% 846/846 [00:00<00:00, 991.41 examples/s] \n",
-            "[2023-11-18 10:15:37,890] [INFO] [axolotl.load_tokenized_prepared_datasets:355] [PID:4655] [RANK:0] merging datasets\u001b[39m\n",
-            "[2023-11-18 10:15:37,892] [INFO] [axolotl.load_tokenized_prepared_datasets:362] [PID:4655] [RANK:0] Saving merged prepared dataset to disk... last_run_prepared/5dca4483042d16053f3cd9eeaf5ac8af\u001b[39m\n",
-            "Saving the dataset (1/1 shards): 100% 846/846 [00:00<00:00, 118881.71 examples/s]\n",
-            "Filter (num_proc=2): 100% 803/803 [00:00<00:00, 3171.26 examples/s]\n",
-            "Filter (num_proc=2): 100% 43/43 [00:00<00:00, 301.68 examples/s]\n",
-            "Map (num_proc=2): 100% 803/803 [00:00<00:00, 2783.35 examples/s]\n",
-            "[2023-11-18 10:15:38,745] [DEBUG] [axolotl.log:60] [PID:4655] [RANK:0] total_num_tokens: 77893\u001b[39m\n",
-            "[2023-11-18 10:15:38,753] [DEBUG] [axolotl.log:60] [PID:4655] [RANK:0] `total_supervised_tokens: 77893`\u001b[39m\n",
-            "[2023-11-18 10:15:44,265] [INFO] [axolotl.utils.samplers.multipack._len_est:178] [PID:4655] [RANK:0] packing_efficiency_estimate: 1.0 total_num_tokens per device: 77893\u001b[39m\n",
-            "[2023-11-18 10:15:44,265] [DEBUG] [axolotl.log:60] [PID:4655] [RANK:0] data_loader_len: 8\u001b[39m\n",
-            "[2023-11-18 10:15:44,265] [INFO] [axolotl.log:60] [PID:4655] [RANK:0] sample_packing_eff_est across ranks: [0.95084228515625]\u001b[39m\n",
-            "[2023-11-18 10:15:44,265] [DEBUG] [axolotl.log:60] [PID:4655] [RANK:0] sample_packing_eff_est: 0.96\u001b[39m\n",
-            "[2023-11-18 10:15:44,266] [DEBUG] [axolotl.log:60] [PID:4655] [RANK:0] total_num_steps: 32\u001b[39m\n",
-            "[2023-11-18 10:15:44,266] [DEBUG] [axolotl.train.log:60] [PID:4655] [RANK:0] loading tokenizer... NousResearch/Llama-2-7b-hf\u001b[39m\n",
-            "[2023-11-18 10:15:44,629] [DEBUG] [axolotl.load_tokenizer:100] [PID:4655] [RANK:0] EOS: 2 / </s>\u001b[39m\n",
-            "[2023-11-18 10:15:44,629] [DEBUG] [axolotl.load_tokenizer:101] [PID:4655] [RANK:0] BOS: 1 / <s>\u001b[39m\n",
-            "[2023-11-18 10:15:44,629] [DEBUG] [axolotl.load_tokenizer:102] [PID:4655] [RANK:0] PAD: 0 / <unk>\u001b[39m\n",
-            "[2023-11-18 10:15:44,629] [DEBUG] [axolotl.load_tokenizer:103] [PID:4655] [RANK:0] UNK: 0 / <unk>\u001b[39m\n",
-            "[2023-11-18 10:15:44,630] [DEBUG] [axolotl.train.log:60] [PID:4655] [RANK:0] loading model and peft_config...\u001b[39m\n",
-            "[2023-11-18 10:15:44,713] [INFO] [axolotl.load_model:201] [PID:4655] [RANK:0] patching _expand_mask\u001b[39m\n",
-            "Downloading (…)fetensors.index.json: 100% 26.8k/26.8k [00:00<00:00, 34.9MB/s]\n",
-            "Downloading shards:   0% 0/2 [00:00<?, ?it/s]\n",
-            "Downloading (…)of-00002.safetensors:   0% 0.00/9.98G [00:00<?, ?B/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   0% 21.0M/9.98G [00:00<01:10, 142MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   1% 52.4M/9.98G [00:00<00:46, 212MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   1% 83.9M/9.98G [00:00<00:43, 229MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   1% 115M/9.98G [00:00<00:40, 246MB/s] \u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   1% 147M/9.98G [00:00<00:47, 206MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   2% 178M/9.98G [00:00<00:47, 205MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   2% 210M/9.98G [00:01<00:47, 205MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   2% 241M/9.98G [00:01<00:45, 214MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   3% 273M/9.98G [00:01<01:02, 155MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   3% 304M/9.98G [00:01<00:55, 174MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   3% 336M/9.98G [00:01<00:49, 194MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   4% 367M/9.98G [00:01<00:46, 208MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   4% 398M/9.98G [00:02<00:48, 196MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   4% 430M/9.98G [00:02<00:58, 162MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   5% 451M/9.98G [00:02<01:00, 158MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   5% 482M/9.98G [00:02<00:54, 176MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   5% 503M/9.98G [00:03<02:50, 55.5MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   5% 535M/9.98G [00:03<02:08, 73.8MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   6% 556M/9.98G [00:04<01:48, 86.8MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   6% 587M/9.98G [00:04<01:23, 112MB/s] \u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   6% 619M/9.98G [00:04<01:08, 137MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   7% 650M/9.98G [00:04<00:58, 161MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   7% 682M/9.98G [00:04<01:08, 137MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   7% 703M/9.98G [00:04<01:03, 145MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   7% 724M/9.98G [00:04<00:58, 157MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   7% 744M/9.98G [00:05<00:55, 166MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   8% 776M/9.98G [00:05<00:48, 191MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   8% 807M/9.98G [00:05<01:22, 111MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   8% 839M/9.98G [00:05<01:06, 138MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   9% 870M/9.98G [00:05<00:56, 163MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   9% 902M/9.98G [00:06<00:50, 178MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   9% 933M/9.98G [00:06<00:47, 189MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  10% 965M/9.98G [00:06<00:46, 194MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  10% 996M/9.98G [00:06<00:44, 202MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  10% 1.03G/9.98G [00:06<00:42, 209MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  11% 1.06G/9.98G [00:06<00:41, 216MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  11% 1.09G/9.98G [00:06<00:41, 217MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  11% 1.12G/9.98G [00:07<00:40, 221MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  12% 1.15G/9.98G [00:07<00:38, 231MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  12% 1.18G/9.98G [00:07<00:39, 223MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  12% 1.22G/9.98G [00:07<00:38, 226MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  13% 1.25G/9.98G [00:07<00:38, 227MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  13% 1.28G/9.98G [00:07<00:36, 239MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  13% 1.31G/9.98G [00:07<00:35, 247MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  13% 1.34G/9.98G [00:07<00:36, 237MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  14% 1.37G/9.98G [00:08<00:37, 229MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  14% 1.41G/9.98G [00:08<00:36, 233MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  14% 1.44G/9.98G [00:08<00:36, 236MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  15% 1.47G/9.98G [00:08<00:34, 246MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  15% 1.50G/9.98G [00:08<00:33, 252MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  15% 1.53G/9.98G [00:08<00:33, 250MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  16% 1.56G/9.98G [00:08<00:37, 226MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  16% 1.59G/9.98G [00:09<00:35, 238MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  16% 1.63G/9.98G [00:09<00:34, 240MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  17% 1.66G/9.98G [00:09<00:40, 204MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  17% 1.69G/9.98G [00:09<00:38, 214MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  17% 1.72G/9.98G [00:09<00:37, 221MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  18% 1.75G/9.98G [00:09<00:36, 228MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  18% 1.78G/9.98G [00:09<00:37, 218MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  18% 1.81G/9.98G [00:10<00:35, 230MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  18% 1.85G/9.98G [00:10<00:34, 236MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  19% 1.88G/9.98G [00:10<00:37, 219MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  19% 1.91G/9.98G [00:10<00:35, 225MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  19% 1.94G/9.98G [00:10<00:33, 240MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  20% 1.97G/9.98G [00:10<00:33, 238MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  20% 2.00G/9.98G [00:10<00:32, 245MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  20% 2.03G/9.98G [00:10<00:32, 243MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  21% 2.07G/9.98G [00:11<00:34, 226MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  21% 2.10G/9.98G [00:11<00:34, 232MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  21% 2.13G/9.98G [00:11<00:35, 224MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  22% 2.16G/9.98G [00:11<00:35, 221MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  22% 2.19G/9.98G [00:11<00:36, 214MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  22% 2.22G/9.98G [00:11<00:36, 212MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  23% 2.25G/9.98G [00:11<00:35, 218MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  23% 2.29G/9.98G [00:12<00:34, 224MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  23% 2.32G/9.98G [00:12<00:32, 236MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  24% 2.35G/9.98G [00:12<00:32, 237MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  24% 2.38G/9.98G [00:12<00:32, 233MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  24% 2.41G/9.98G [00:12<00:31, 242MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  24% 2.44G/9.98G [00:12<00:30, 248MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  25% 2.47G/9.98G [00:12<00:32, 233MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  25% 2.51G/9.98G [00:13<00:31, 238MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  25% 2.54G/9.98G [00:13<00:29, 249MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  26% 2.57G/9.98G [00:13<00:36, 205MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  26% 2.60G/9.98G [00:13<00:37, 197MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  26% 2.63G/9.98G [00:13<00:35, 206MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  27% 2.66G/9.98G [00:15<02:34, 47.3MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  27% 2.69G/9.98G [00:15<01:55, 63.2MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  27% 2.73G/9.98G [00:15<01:28, 81.6MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  28% 2.76G/9.98G [00:15<01:11, 101MB/s] \u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  28% 2.79G/9.98G [00:16<00:58, 124MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  28% 2.82G/9.98G [00:16<00:49, 146MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  29% 2.85G/9.98G [00:16<00:45, 158MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  29% 2.88G/9.98G [00:16<00:39, 180MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  29% 2.92G/9.98G [00:16<00:35, 196MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  30% 2.95G/9.98G [00:16<00:34, 205MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  30% 2.98G/9.98G [00:16<00:32, 218MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  30% 3.01G/9.98G [00:16<00:30, 229MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  30% 3.04G/9.98G [00:17<00:29, 232MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  31% 3.07G/9.98G [00:17<00:29, 232MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  31% 3.10G/9.98G [00:17<00:29, 235MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  31% 3.14G/9.98G [00:17<00:28, 238MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  32% 3.17G/9.98G [00:17<00:28, 237MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  32% 3.20G/9.98G [00:17<00:28, 237MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  32% 3.23G/9.98G [00:17<00:28, 238MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  33% 3.26G/9.98G [00:17<00:28, 236MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  33% 3.29G/9.98G [00:18<00:27, 239MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  33% 3.32G/9.98G [00:18<00:26, 247MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  34% 3.36G/9.98G [00:18<00:26, 250MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  34% 3.39G/9.98G [00:18<00:26, 253MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  34% 3.42G/9.98G [00:18<00:25, 259MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  35% 3.45G/9.98G [00:18<00:25, 252MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  35% 3.48G/9.98G [00:18<00:26, 248MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  35% 3.51G/9.98G [00:18<00:25, 253MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  36% 3.54G/9.98G [00:19<00:25, 254MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  36% 3.58G/9.98G [00:19<00:25, 254MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  36% 3.61G/9.98G [00:19<00:24, 259MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  36% 3.64G/9.98G [00:19<00:24, 256MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  37% 3.67G/9.98G [00:19<00:24, 260MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  37% 3.70G/9.98G [00:19<00:25, 245MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  37% 3.73G/9.98G [00:19<00:25, 248MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  38% 3.76G/9.98G [00:19<00:24, 252MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  38% 3.80G/9.98G [00:20<00:25, 246MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  38% 3.83G/9.98G [00:20<00:25, 244MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  39% 3.86G/9.98G [00:20<00:24, 246MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  39% 3.89G/9.98G [00:20<00:23, 260MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  39% 3.92G/9.98G [00:20<00:22, 272MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  40% 3.95G/9.98G [00:20<00:21, 276MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  40% 3.98G/9.98G [00:20<00:21, 281MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  40% 4.02G/9.98G [00:20<00:22, 269MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  41% 4.05G/9.98G [00:21<00:23, 247MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  41% 4.08G/9.98G [00:21<00:25, 228MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  41% 4.11G/9.98G [00:21<00:26, 224MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  42% 4.14G/9.98G [00:21<00:24, 236MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  42% 4.17G/9.98G [00:21<00:23, 245MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  42% 4.20G/9.98G [00:21<00:22, 253MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  42% 4.24G/9.98G [00:21<00:22, 254MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  43% 4.27G/9.98G [00:21<00:22, 257MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  43% 4.30G/9.98G [00:22<00:22, 257MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  43% 4.33G/9.98G [00:22<00:22, 256MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  44% 4.36G/9.98G [00:22<00:21, 256MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  44% 4.39G/9.98G [00:22<00:21, 256MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  44% 4.42G/9.98G [00:22<00:21, 258MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  45% 4.46G/9.98G [00:22<00:21, 253MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  45% 4.49G/9.98G [00:22<00:21, 252MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  45% 4.52G/9.98G [00:22<00:21, 253MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  46% 4.55G/9.98G [00:23<00:21, 251MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  46% 4.58G/9.98G [00:23<00:21, 252MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  46% 4.61G/9.98G [00:23<00:21, 255MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  47% 4.65G/9.98G [00:23<00:20, 254MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  47% 4.68G/9.98G [00:23<00:20, 257MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  47% 4.71G/9.98G [00:23<00:20, 257MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  48% 4.74G/9.98G [00:23<00:20, 254MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  48% 4.77G/9.98G [00:23<00:20, 251MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  48% 4.80G/9.98G [00:24<00:20, 250MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  48% 4.83G/9.98G [00:24<00:20, 248MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  49% 4.87G/9.98G [00:24<00:20, 251MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  49% 4.90G/9.98G [00:24<00:20, 244MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  49% 4.93G/9.98G [00:24<00:21, 231MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  50% 4.96G/9.98G [00:24<00:22, 225MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  50% 4.99G/9.98G [00:24<00:21, 230MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  50% 5.02G/9.98G [00:25<00:21, 234MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  51% 5.05G/9.98G [00:25<00:21, 230MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  51% 5.09G/9.98G [00:25<00:21, 224MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  51% 5.12G/9.98G [00:25<00:21, 228MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  52% 5.15G/9.98G [00:25<00:20, 241MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  52% 5.18G/9.98G [00:25<00:19, 243MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  52% 5.21G/9.98G [00:25<00:19, 248MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  53% 5.24G/9.98G [00:25<00:21, 221MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  53% 5.27G/9.98G [00:26<00:21, 218MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  53% 5.31G/9.98G [00:26<00:21, 213MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  53% 5.34G/9.98G [00:26<00:21, 217MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  54% 5.37G/9.98G [00:26<00:20, 220MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  54% 5.40G/9.98G [00:26<00:20, 224MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  54% 5.43G/9.98G [00:26<00:19, 234MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  55% 5.46G/9.98G [00:26<00:19, 235MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  55% 5.49G/9.98G [00:27<00:19, 224MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  55% 5.53G/9.98G [00:27<00:19, 226MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  56% 5.56G/9.98G [00:27<00:18, 238MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  56% 5.59G/9.98G [00:27<00:18, 235MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  56% 5.62G/9.98G [00:27<00:18, 234MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  57% 5.65G/9.98G [00:27<00:17, 241MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  57% 5.68G/9.98G [00:27<00:17, 242MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  57% 5.71G/9.98G [00:28<00:19, 222MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  58% 5.75G/9.98G [00:28<00:19, 217MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  58% 5.78G/9.98G [00:28<00:19, 214MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  58% 5.81G/9.98G [00:28<00:19, 218MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  59% 5.84G/9.98G [00:28<00:18, 222MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  59% 5.87G/9.98G [00:28<00:18, 221MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  59% 5.90G/9.98G [00:28<00:18, 221MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  59% 5.93G/9.98G [00:29<00:18, 215MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  60% 5.97G/9.98G [00:29<00:18, 215MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  60% 6.00G/9.98G [00:29<00:18, 217MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  60% 6.03G/9.98G [00:29<00:18, 218MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  61% 6.06G/9.98G [00:29<00:17, 222MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  61% 6.09G/9.98G [00:29<00:17, 220MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  61% 6.12G/9.98G [00:29<00:17, 217MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  62% 6.16G/9.98G [00:30<00:17, 222MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  62% 6.19G/9.98G [00:30<00:16, 223MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  62% 6.22G/9.98G [00:30<00:17, 217MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  63% 6.25G/9.98G [00:30<00:16, 230MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  63% 6.28G/9.98G [00:30<00:15, 240MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  63% 6.31G/9.98G [00:30<00:15, 235MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  64% 6.34G/9.98G [00:30<00:15, 236MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  64% 6.38G/9.98G [00:31<00:15, 230MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  64% 6.41G/9.98G [00:31<00:16, 218MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  65% 6.44G/9.98G [00:31<00:23, 151MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  65% 6.47G/9.98G [00:31<00:20, 172MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  65% 6.50G/9.98G [00:31<00:18, 193MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  65% 6.53G/9.98G [00:31<00:16, 211MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  66% 6.56G/9.98G [00:32<00:16, 213MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  66% 6.60G/9.98G [00:32<00:16, 203MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  66% 6.63G/9.98G [00:32<00:15, 212MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  67% 6.66G/9.98G [00:32<00:16, 205MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  67% 6.69G/9.98G [00:33<00:53, 61.7MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  67% 6.71G/9.98G [00:33<00:45, 72.4MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  67% 6.73G/9.98G [00:34<00:38, 83.9MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  68% 6.75G/9.98G [00:34<00:33, 97.4MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  68% 6.77G/9.98G [00:34<00:28, 112MB/s] \u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  68% 6.79G/9.98G [00:34<00:25, 126MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  68% 6.82G/9.98G [00:34<00:22, 140MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  69% 6.84G/9.98G [00:34<00:20, 153MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  69% 6.86G/9.98G [00:34<00:19, 163MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  69% 6.88G/9.98G [00:34<00:18, 164MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  69% 6.90G/9.98G [00:34<00:18, 169MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  69% 6.93G/9.98G [00:35<00:15, 201MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  70% 6.96G/9.98G [00:35<00:14, 204MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  70% 6.99G/9.98G [00:35<00:26, 111MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  70% 7.01G/9.98G [00:35<00:23, 124MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  71% 7.04G/9.98G [00:36<00:21, 136MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  71% 7.06G/9.98G [00:36<00:19, 148MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  71% 7.08G/9.98G [00:36<00:18, 161MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  71% 7.10G/9.98G [00:36<00:17, 164MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  71% 7.12G/9.98G [00:36<00:16, 170MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  72% 7.15G/9.98G [00:36<00:15, 187MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  72% 7.18G/9.98G [00:36<00:14, 198MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  72% 7.21G/9.98G [00:36<00:13, 209MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  73% 7.25G/9.98G [00:37<00:12, 210MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  73% 7.28G/9.98G [00:37<00:12, 217MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  73% 7.31G/9.98G [00:37<00:11, 229MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  74% 7.34G/9.98G [00:37<00:11, 225MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  74% 7.37G/9.98G [00:37<00:10, 239MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  74% 7.40G/9.98G [00:37<00:11, 233MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  75% 7.43G/9.98G [00:37<00:11, 229MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  75% 7.47G/9.98G [00:37<00:10, 243MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  75% 7.50G/9.98G [00:38<00:11, 219MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  75% 7.53G/9.98G [00:38<00:11, 208MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  76% 7.56G/9.98G [00:38<00:11, 217MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  76% 7.59G/9.98G [00:40<00:52, 45.4MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  76% 7.62G/9.98G [00:40<00:38, 60.8MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  77% 7.65G/9.98G [00:40<00:29, 78.4MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  77% 7.69G/9.98G [00:40<00:24, 93.7MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  77% 7.71G/9.98G [00:40<00:21, 104MB/s] \u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  77% 7.73G/9.98G [00:41<00:19, 116MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  78% 7.75G/9.98G [00:41<00:17, 128MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  78% 7.77G/9.98G [00:41<00:15, 140MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  78% 7.79G/9.98G [00:41<00:14, 150MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  78% 7.82G/9.98G [00:41<00:12, 175MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  79% 7.84G/9.98G [00:45<01:53, 18.8MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  79% 7.86G/9.98G [00:45<01:28, 24.0MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  79% 7.89G/9.98G [00:45<01:06, 31.3MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  79% 7.92G/9.98G [00:45<00:43, 46.9MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  80% 7.95G/9.98G [00:46<00:30, 65.6MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  80% 7.98G/9.98G [00:46<00:23, 86.8MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  80% 8.01G/9.98G [00:46<00:19, 98.5MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  81% 8.04G/9.98G [00:46<00:15, 122MB/s] \u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  81% 8.07G/9.98G [00:46<00:13, 144MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  81% 8.11G/9.98G [00:46<00:11, 167MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  82% 8.14G/9.98G [00:46<00:09, 190MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  82% 8.17G/9.98G [00:47<00:08, 205MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  82% 8.20G/9.98G [00:47<00:08, 212MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  83% 8.23G/9.98G [00:47<00:07, 225MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  83% 8.26G/9.98G [00:47<00:10, 162MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  83% 8.29G/9.98G [00:47<00:09, 186MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  83% 8.33G/9.98G [00:47<00:08, 197MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  84% 8.36G/9.98G [00:47<00:07, 210MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  84% 8.39G/9.98G [00:48<00:07, 224MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  84% 8.42G/9.98G [00:48<00:07, 205MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  85% 8.45G/9.98G [00:50<00:38, 39.3MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  85% 8.47G/9.98G [00:50<00:32, 46.4MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  85% 8.50G/9.98G [00:50<00:23, 62.5MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  86% 8.54G/9.98G [00:50<00:17, 82.1MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  86% 8.57G/9.98G [00:51<00:13, 104MB/s] \u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  86% 8.60G/9.98G [00:51<00:12, 111MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  86% 8.62G/9.98G [00:51<00:11, 121MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  87% 8.65G/9.98G [00:51<00:09, 145MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  87% 8.68G/9.98G [00:51<00:08, 162MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  87% 8.71G/9.98G [00:51<00:07, 176MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  88% 8.75G/9.98G [00:52<00:06, 193MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  88% 8.78G/9.98G [00:52<00:05, 204MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  88% 8.81G/9.98G [00:52<00:05, 220MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  89% 8.84G/9.98G [00:52<00:05, 223MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  89% 8.87G/9.98G [00:52<00:04, 233MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  89% 8.90G/9.98G [00:52<00:04, 217MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  90% 8.93G/9.98G [00:52<00:04, 214MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  90% 8.97G/9.98G [00:53<00:04, 212MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  90% 9.00G/9.98G [00:55<00:25, 38.5MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  90% 9.02G/9.98G [00:55<00:20, 46.8MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  91% 9.04G/9.98G [00:55<00:17, 53.2MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  91% 9.07G/9.98G [00:55<00:12, 72.5MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  91% 9.10G/9.98G [00:55<00:09, 94.2MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  92% 9.13G/9.98G [00:56<00:07, 117MB/s] \u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  92% 9.16G/9.98G [00:56<00:06, 130MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  92% 9.19G/9.98G [00:56<00:05, 138MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  92% 9.22G/9.98G [00:56<00:04, 158MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  93% 9.25G/9.98G [00:56<00:04, 172MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  93% 9.28G/9.98G [00:56<00:03, 192MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  93% 9.31G/9.98G [00:56<00:03, 201MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  94% 9.34G/9.98G [00:57<00:03, 206MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  94% 9.37G/9.98G [00:57<00:02, 224MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  94% 9.41G/9.98G [00:57<00:02, 207MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  95% 9.44G/9.98G [00:57<00:02, 211MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  95% 9.47G/9.98G [00:57<00:02, 213MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  95% 9.50G/9.98G [01:00<00:13, 34.2MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  96% 9.53G/9.98G [01:00<00:09, 45.5MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  96% 9.55G/9.98G [01:00<00:08, 51.3MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  96% 9.58G/9.98G [01:00<00:05, 68.6MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  96% 9.62G/9.98G [01:01<00:04, 89.4MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  97% 9.65G/9.98G [01:01<00:02, 112MB/s] \u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  97% 9.68G/9.98G [01:01<00:02, 122MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  97% 9.71G/9.98G [01:01<00:01, 146MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  98% 9.74G/9.98G [01:01<00:01, 174MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  98% 9.77G/9.98G [01:01<00:01, 168MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  98% 9.80G/9.98G [01:01<00:00, 189MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  99% 9.84G/9.98G [01:02<00:00, 205MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  99% 9.87G/9.98G [01:02<00:00, 134MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  99% 9.90G/9.98G [01:02<00:00, 157MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors: 100% 9.93G/9.98G [01:03<00:00, 60.1MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors: 100% 9.95G/9.98G [01:03<00:00, 69.4MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors: 100% 9.98G/9.98G [01:04<00:00, 155MB/s] \n",
-            "Downloading shards:  50% 1/2 [01:04<01:04, 64.43s/it]\n",
-            "Downloading (…)of-00002.safetensors:   0% 0.00/3.50G [00:00<?, ?B/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   1% 21.0M/3.50G [00:00<00:18, 185MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   1% 41.9M/3.50G [00:00<00:17, 195MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   2% 73.4M/3.50G [00:00<00:16, 203MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   3% 105M/3.50G [00:00<00:15, 218MB/s] \u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   4% 136M/3.50G [00:00<00:14, 226MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   5% 168M/3.50G [00:00<00:13, 242MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   6% 199M/3.50G [00:00<00:12, 259MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   7% 231M/3.50G [00:01<00:25, 129MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   7% 262M/3.50G [00:01<00:20, 155MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   8% 294M/3.50G [00:01<00:18, 178MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:   9% 325M/3.50G [00:01<00:16, 194MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  10% 357M/3.50G [00:01<00:15, 205MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  11% 388M/3.50G [00:01<00:14, 218MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  12% 419M/3.50G [00:02<00:13, 229MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  13% 451M/3.50G [00:02<00:12, 236MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  14% 482M/3.50G [00:02<00:12, 242MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  15% 514M/3.50G [00:02<00:12, 243MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  16% 545M/3.50G [00:02<00:12, 244MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  16% 577M/3.50G [00:02<00:11, 248MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  17% 608M/3.50G [00:02<00:11, 251MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  18% 640M/3.50G [00:02<00:11, 243MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  19% 671M/3.50G [00:03<00:11, 243MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  20% 703M/3.50G [00:03<00:11, 239MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  21% 734M/3.50G [00:03<00:11, 242MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  22% 765M/3.50G [00:03<00:10, 250MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  23% 797M/3.50G [00:03<00:10, 252MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  24% 828M/3.50G [00:03<00:10, 258MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  25% 860M/3.50G [00:03<00:10, 255MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  25% 891M/3.50G [00:03<00:10, 248MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  26% 923M/3.50G [00:04<00:10, 251MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  27% 954M/3.50G [00:04<00:10, 251MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  28% 986M/3.50G [00:04<00:10, 248MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  29% 1.02G/3.50G [00:04<00:10, 241MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  30% 1.05G/3.50G [00:04<00:10, 240MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  31% 1.08G/3.50G [00:04<00:10, 239MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  32% 1.11G/3.50G [00:04<00:10, 237MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  33% 1.14G/3.50G [00:05<00:10, 229MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  34% 1.17G/3.50G [00:05<00:10, 226MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  34% 1.21G/3.50G [00:05<00:09, 231MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  35% 1.24G/3.50G [00:05<00:09, 235MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  36% 1.27G/3.50G [00:05<00:09, 228MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  37% 1.30G/3.50G [00:05<00:09, 228MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  38% 1.33G/3.50G [00:05<00:09, 233MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  39% 1.36G/3.50G [00:05<00:09, 235MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  40% 1.39G/3.50G [00:06<00:09, 229MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  41% 1.43G/3.50G [00:06<00:08, 239MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  42% 1.46G/3.50G [00:06<00:08, 234MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  43% 1.49G/3.50G [00:06<00:09, 217MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  43% 1.52G/3.50G [00:06<00:09, 207MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  44% 1.55G/3.50G [00:06<00:09, 208MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  45% 1.58G/3.50G [00:07<00:09, 202MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  46% 1.60G/3.50G [00:07<00:09, 199MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  47% 1.64G/3.50G [00:07<00:08, 211MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  48% 1.67G/3.50G [00:07<00:08, 212MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  49% 1.70G/3.50G [00:07<00:08, 216MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  49% 1.73G/3.50G [00:07<00:08, 220MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  50% 1.76G/3.50G [00:07<00:07, 218MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  51% 1.79G/3.50G [00:11<01:02, 27.1MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  52% 1.82G/3.50G [00:11<00:45, 36.9MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  53% 1.86G/3.50G [00:11<00:33, 49.5MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  54% 1.89G/3.50G [00:11<00:24, 65.1MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  55% 1.91G/3.50G [00:11<00:20, 76.0MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  55% 1.93G/3.50G [00:12<00:18, 85.5MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  56% 1.95G/3.50G [00:12<00:16, 95.8MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  56% 1.97G/3.50G [00:12<00:13, 112MB/s] \u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  57% 1.99G/3.50G [00:12<00:11, 126MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  58% 2.02G/3.50G [00:12<00:09, 152MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  59% 2.06G/3.50G [00:12<00:08, 168MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  60% 2.09G/3.50G [00:12<00:07, 185MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  61% 2.12G/3.50G [00:12<00:06, 202MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  61% 2.15G/3.50G [00:13<00:06, 215MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  62% 2.18G/3.50G [00:13<00:05, 225MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  63% 2.21G/3.50G [00:13<00:05, 237MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  64% 2.24G/3.50G [00:13<00:05, 246MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  65% 2.28G/3.50G [00:13<00:04, 249MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  66% 2.31G/3.50G [00:13<00:05, 212MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  67% 2.34G/3.50G [00:14<00:07, 146MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  67% 2.36G/3.50G [00:16<00:30, 37.5MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  68% 2.38G/3.50G [00:16<00:24, 46.3MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  69% 2.40G/3.50G [00:16<00:20, 53.4MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  69% 2.43G/3.50G [00:16<00:14, 74.3MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  70% 2.46G/3.50G [00:16<00:10, 95.7MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  71% 2.50G/3.50G [00:16<00:08, 116MB/s] \u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  72% 2.52G/3.50G [00:17<00:08, 122MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  72% 2.54G/3.50G [00:17<00:07, 134MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  73% 2.56G/3.50G [00:17<00:06, 145MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  74% 2.58G/3.50G [00:17<00:05, 157MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  74% 2.60G/3.50G [00:17<00:05, 169MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  75% 2.63G/3.50G [00:17<00:04, 185MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  76% 2.66G/3.50G [00:17<00:04, 201MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  77% 2.69G/3.50G [00:17<00:03, 220MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  78% 2.73G/3.50G [00:17<00:03, 215MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  79% 2.76G/3.50G [00:18<00:03, 213MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  80% 2.79G/3.50G [00:18<00:03, 220MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  81% 2.82G/3.50G [00:21<00:20, 32.7MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  81% 2.85G/3.50G [00:21<00:14, 44.0MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  82% 2.88G/3.50G [00:21<00:10, 58.4MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  83% 2.92G/3.50G [00:21<00:07, 76.0MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  84% 2.95G/3.50G [00:21<00:06, 87.1MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  85% 2.97G/3.50G [00:21<00:05, 99.8MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  86% 3.00G/3.50G [00:21<00:04, 124MB/s] \u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  86% 3.02G/3.50G [00:22<00:03, 136MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  87% 3.04G/3.50G [00:22<00:03, 141MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  87% 3.06G/3.50G [00:22<00:03, 145MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  88% 3.08G/3.50G [00:22<00:03, 109MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  89% 3.11G/3.50G [00:22<00:02, 136MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  90% 3.15G/3.50G [00:22<00:02, 160MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  91% 3.18G/3.50G [00:23<00:01, 183MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  92% 3.21G/3.50G [00:23<00:01, 191MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  93% 3.24G/3.50G [00:23<00:01, 189MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  93% 3.27G/3.50G [00:23<00:01, 194MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  94% 3.30G/3.50G [00:23<00:00, 205MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  95% 3.33G/3.50G [00:23<00:00, 212MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  96% 3.37G/3.50G [00:23<00:00, 210MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  97% 3.40G/3.50G [00:24<00:00, 210MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  98% 3.43G/3.50G [00:24<00:00, 214MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors:  99% 3.46G/3.50G [00:24<00:00, 213MB/s]\u001b[A\n",
-            "Downloading (…)of-00002.safetensors: 100% 3.50G/3.50G [00:24<00:00, 142MB/s]\n",
-            "Downloading shards: 100% 2/2 [01:29<00:00, 44.56s/it]\n",
-            "Loading checkpoint shards:  50% 1/2 [01:07<01:07, 67.16s/it]"
-          ]
-        }
-      ],
-      "source": [
-        "!accelerate launch -m axolotl.cli.train model.yml --load_in_8bit=False"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Ff33I_IOy3A1"
-      },
-      "outputs": [],
-      "source": [
-        "!python3 -m axolotl.cli.merge_lora model.yml --lora_model_dir=\"./qlora-out\" --load_in_8bit=False --load_in_4bit=False"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "SVvJ01EhzRys"
-      },
-      "outputs": [],
-      "source": [
-        "\n",
-        "!git clone https://github.com/ggerganov/llama.cpp.git\n",
-        "!cd llama.cpp && make GGML_CUDA=1\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Bl7P2PPA0gsD"
-      },
-      "outputs": [],
-      "source": [
-        "\n",
-        "# We need to convert the pytorch model into ggml for quantization\n",
-        "# It crates 'ggml-model-f16.bin' in the 'merged' directory.\n",
-        "!cd llama.cpp && python convert.py --outtype f16 \\\n",
-        "    /content/qlora-out/merged/pytorch_model-00001-of-00002.bin\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "6OZq8KGP0kUg"
-      },
-      "outputs": [],
-      "source": [
-        "\n",
-        "# Start off by making a basic q4_0 4-bit quantization.\n",
-        "# It's important to have 'ggml' in the name of the quant for some\n",
-        "# software to recognize it's file format.\n",
-        "!cd llama.cpp &&  ./quantize /content/qlora-out/merged/ggml-model-f16.gguf \\\n",
-        "    /content/custom-model-q4_0.bin q4_0"
-      ]
-    }
-  ],
-  "metadata": {
-    "accelerator": "GPU",
-    "colab": {
-      "gpuType": "T4",
-      "provenance": []
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
diff --git a/examples/flowise/README.md b/examples/flowise/README.md
deleted file mode 100644
index 9dbac910..00000000
--- a/examples/flowise/README.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# flowise
-
-Example of integration with [FlowiseAI/Flowise](https://github.com/FlowiseAI/Flowise).
-
-![Screenshot from 2023-05-30 18-01-03](https://github.com/go-skynet/LocalAI/assets/2420543/02458782-0549-4131-971c-95ee56ec1af8)
-
-You can check a demo video in the Flowise PR: https://github.com/FlowiseAI/Flowise/pull/123
-
-## Run
-
-In this example LocalAI will download the gpt4all model and set it up as "gpt-3.5-turbo". See the `docker-compose.yaml`
-```bash
-# Clone LocalAI
-git clone https://github.com/go-skynet/LocalAI
-
-cd LocalAI/examples/flowise
-
-# start with docker-compose
-docker-compose up --pull always
-
-```
-
-## Accessing flowise
-
-Open http://localhost:3000.
-
-## Using LocalAI
-
-Search for LocalAI in the integration, and use the `http://api:8080/` as URL.
-
diff --git a/examples/flowise/docker-compose.yaml b/examples/flowise/docker-compose.yaml
deleted file mode 100644
index a39269c7..00000000
--- a/examples/flowise/docker-compose.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-version: '3.6'
-
-services:
-  api:
-    image: quay.io/go-skynet/local-ai:latest
-    # As initially LocalAI will download the models defined in PRELOAD_MODELS
-    # you might need to tweak the healthcheck values here according to your network connection.
-    # Here we give a timespan of 20m to download all the required files.
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
-      interval: 1m
-      timeout: 20m
-      retries: 20
-    build:
-      context: ../../
-      dockerfile: Dockerfile
-    ports:
-      - 8080:8080
-    environment:
-      - DEBUG=true
-      - MODELS_PATH=/models
-      # You can preload different models here as well.
-      # See: https://github.com/go-skynet/model-gallery
-      - 'PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}]'
-    volumes:
-      - ./models:/models:cached
-    command: ["/usr/bin/local-ai" ]
-  flowise:
-    depends_on:
-      api:
-        condition: service_healthy
-    image: flowiseai/flowise
-    ports:
-      - 3000:3000
-    volumes:
-        - ~/.flowise:/root/.flowise
-    command: /bin/sh -c "sleep 3; flowise start"
\ No newline at end of file
diff --git a/examples/functions/.env.example b/examples/functions/.env.example
deleted file mode 100644
index 8abf0d15..00000000
--- a/examples/functions/.env.example
+++ /dev/null
@@ -1,13 +0,0 @@
-# CPU .env docs: https://localai.io/howtos/easy-setup-docker-cpu/
-# GPU .env docs: https://localai.io/howtos/easy-setup-docker-gpu/
-
-OPENAI_API_KEY=sk---anystringhere
-OPENAI_API_BASE=http://api:8080/v1
-# Models to preload at start
-# Here we configure gpt4all as gpt-3.5-turbo and bert as embeddings,
-# see other options in the model gallery at https://github.com/go-skynet/model-gallery
-PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/openllama-7b-open-instruct.yaml", "name": "gpt-3.5-turbo"}]
-
-## Change the default number of threads
-#THREADS=14
-
diff --git a/examples/functions/Dockerfile b/examples/functions/Dockerfile
deleted file mode 100644
index df362442..00000000
--- a/examples/functions/Dockerfile
+++ /dev/null
@@ -1,5 +0,0 @@
-FROM python:3.12-slim-bullseye
-COPY . /app
-WORKDIR /app
-RUN pip install --no-cache-dir -r requirements.txt
-ENTRYPOINT [ "python", "./functions-openai.py" ]
diff --git a/examples/functions/README.md b/examples/functions/README.md
deleted file mode 100644
index 80f44659..00000000
--- a/examples/functions/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-# LocalAI functions
-
-Example of using LocalAI functions, see the [OpenAI](https://openai.com/blog/function-calling-and-other-api-updates) blog post.
-
-## Run
-
-```bash
-# Clone LocalAI
-git clone https://github.com/go-skynet/LocalAI
-
-cd LocalAI/examples/functions
-
-cp -rfv .env.example .env
-
-# Edit the .env file to set a different model by editing `PRELOAD_MODELS`.
-vim .env
-
-docker-compose run --rm functions
-```
-
-Note: The example automatically downloads the `openllama` model as it is under a permissive license.
diff --git a/examples/functions/docker-compose.yaml b/examples/functions/docker-compose.yaml
deleted file mode 100644
index e6aac91d..00000000
--- a/examples/functions/docker-compose.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-version: "3.9"
-services:
-  api:
-    image: quay.io/go-skynet/local-ai:master
-    ports:
-      - 8080:8080
-    env_file:
-      - .env
-    environment:
-      - DEBUG=true
-      - MODELS_PATH=/models
-    volumes:
-      - ./models:/models:cached
-    command: ["/usr/bin/local-ai" ]
-  functions:
-    build:
-      context: .
-      dockerfile: Dockerfile
-    depends_on:
-      api:
-        condition: service_healthy
-    env_file:
-      - .env
\ No newline at end of file
diff --git a/examples/functions/functions-openai.py b/examples/functions/functions-openai.py
deleted file mode 100644
index 4bb63368..00000000
--- a/examples/functions/functions-openai.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import openai
-import json
-
-# Example dummy function hard coded to return the same weather
-# In production, this could be your backend API or an external API
-def get_current_weather(location, unit="fahrenheit"):
-    """Get the current weather in a given location"""
-    weather_info = {
-        "location": location,
-        "temperature": "72",
-        "unit": unit,
-        "forecast": ["sunny", "windy"],
-    }
-    return json.dumps(weather_info)
-
-
-def run_conversation():
-    # Step 1: send the conversation and available functions to GPT
-    messages = [{"role": "user", "content": "What's the weather like in Boston?"}]
-    functions = [
-        {
-            "name": "get_current_weather",
-            "description": "Get the current weather in a given location",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "location": {
-                        "type": "string",
-                        "description": "The city and state, e.g. San Francisco, CA",
-                    },
-                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
-                },
-                "required": ["location"],
-            },
-        }
-    ]
-    response = openai.ChatCompletion.create(
-        model="gpt-3.5-turbo",
-        messages=messages,
-        functions=functions,
-        function_call="auto",  # auto is default, but we'll be explicit
-    )
-    response_message = response["choices"][0]["message"]
-
-    # Step 2: check if GPT wanted to call a function
-    if response_message.get("function_call"):
-        # Step 3: call the function
-        # Note: the JSON response may not always be valid; be sure to handle errors
-        available_functions = {
-            "get_current_weather": get_current_weather,
-        }  # only one function in this example, but you can have multiple
-        function_name = response_message["function_call"]["name"]
-        fuction_to_call = available_functions[function_name]
-        function_args = json.loads(response_message["function_call"]["arguments"])
-        function_response = fuction_to_call(
-            location=function_args.get("location"),
-            unit=function_args.get("unit"),
-        )
-
-        # Step 4: send the info on the function call and function response to GPT
-        messages.append(response_message)  # extend conversation with assistant's reply
-        messages.append(
-            {
-                "role": "function",
-                "name": function_name,
-                "content": function_response,
-            }
-        )  # extend conversation with function response
-        second_response = openai.ChatCompletion.create(
-            model="gpt-3.5-turbo",
-            messages=messages,
-        )  # get a new response from GPT where it can see the function response
-        return second_response
-
-
-print(run_conversation())
\ No newline at end of file
diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt
deleted file mode 100644
index 670090d3..00000000
--- a/examples/functions/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-langchain==0.3.0
-openai==1.45.1
diff --git a/examples/github-actions/workflow.yml b/examples/github-actions/workflow.yml
deleted file mode 100644
index 81185925..00000000
--- a/examples/github-actions/workflow.yml
+++ /dev/null
@@ -1,83 +0,0 @@
-name: Use LocalAI in GHA
-on:
-  pull_request:
-     types:
-       - closed
-
-jobs:
-  notify-discord:
-    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
-    env:
-        MODEL_NAME: hermes-2-theta-llama-3-8b
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0 # needed to checkout all branches for this Action to work
-    # Starts the LocalAI container
-    - name: Start LocalAI
-      run: |
-        echo "Starting LocalAI..."
-        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
-        until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
-    # Check the PR diff using the current branch and the base branch of the PR
-    - uses: GrantBirki/git-diff-action@v2.7.0
-      id: git-diff-action
-      with:
-            json_diff_file_output: diff.json
-            raw_diff_file_output: diff.txt
-            file_output_only: "true"
-    # Ask to explain the diff to LocalAI
-    - name: Summarize
-      env:
-        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
-      id: summarize
-      run: |
-            input="$(cat $DIFF)"
-
-            # Define the LocalAI API endpoint
-            API_URL="http://localhost:8080/chat/completions"
-
-            # Create a JSON payload using jq to handle special characters
-            json_payload=$(jq -n --arg input "$input" '{
-            model: "'$MODEL_NAME'",
-            messages: [
-                {
-                role: "system",
-                content: "Write a message summarizing the change diffs"
-                },
-                {
-                role: "user",
-                content: $input
-                }
-            ]
-            }')
-
-            # Send the request to LocalAI
-            response=$(curl -s -X POST $API_URL \
-            -H "Content-Type: application/json" \
-            -d "$json_payload")
-
-            # Extract the summary from the response
-            summary="$(echo $response | jq -r '.choices[0].message.content')"
-
-            # Print the summary
-            #  -H "Authorization: Bearer $API_KEY" \
-            echo "Summary:"
-            echo "$summary"
-            echo "payload sent"
-            echo "$json_payload"
-            {
-                echo 'message<<EOF'
-                echo "$summary"
-                echo EOF
-              } >> "$GITHUB_OUTPUT"
-    # Send the summary somewhere (e.g. Discord)
-    - name: Discord notification
-      env:
-        DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}
-        DISCORD_USERNAME: "discord-bot"
-        DISCORD_AVATAR: ""
-      uses: Ilshidur/action-discord@master
-      with:
-        args: ${{ steps.summarize.outputs.message }}
\ No newline at end of file
diff --git a/examples/insomnia/Insomnia_LocalAI.json b/examples/insomnia/Insomnia_LocalAI.json
deleted file mode 100644
index fed32f85..00000000
--- a/examples/insomnia/Insomnia_LocalAI.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_type":"export","__export_format":4,"__export_date":"2023-09-01T05:11:43.695Z","__export_source":"insomnia.desktop.app:v2023.5.7","resources":[{"_id":"req_527fdc87fd404a2a8f1c401fb7a0e642","parentId":"fld_911f4d2a05d84b59aff9d4924d1d3877","modified":1692719560635,"created":1692719560635,"url":"{{HOST}}:{{PORT}}/models","name":"get models list","description":"","method":"GET","body":{},"parameters":[],"headers":[],"authentication":{},"metaSortKey":-1692719560635,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"fld_911f4d2a05d84b59aff9d4924d1d3877","parentId":"wrk_76923a29272642e49208d65ffe7e885a","modified":1692719560581,"created":1692719560581,"name":"LocalAI","description":"","environment":{},"environmentPropertyOrder":null,"metaSortKey":-1692719560581,"_type":"request_group"},{"_id":"wrk_76923a29272642e49208d65ffe7e885a","parentId":null,"modified":1692719728510,"created":1692719560576,"name":"LocalAI","description":"","scope":"collection","_type":"workspace"},{"_id":"req_03c6b65bce1541fa9a7751c7ed8a7f40","parentId":"fld_9d70a564f6334ff6a5e9473d124d8ee6","modified":1693542905270,"created":1692719560630,"url":"{{HOST}}:{{PORT}}/models/available","name":"list MODELS in galleries","description":"","method":"GET","body":{"mimeType":"","text":"{\n}"},"parameters":[],"headers":[],"authentication":{},"metaSortKey":-1692719560630,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"fld_9d70a564f6334ff6a5e9473d124d8ee6","parentId":"fld_911f4d2a05d84b59aff9d4924d1d3877","modified":1692719560625,"created":1692719560625,"name":"model gallery","description":"","environment":{},"environmentPropertyOrder":null,"metaSortKey":-1692719560625,"_type":"request_group"},{"_id":"req_9cfc92cb7f5c43b6bea7992d54e94eca","parentId":"fld_9d70a564f6334ff6a5e9473d124d8ee6","modified":1693542894779,"created":1693526412262,"url":"{{HOST}}:{{PORT}}/models/galleries","name":"list model GALLERIES","description":"","method":"GET","body":{"mimeType":"","text":"{\n}"},"parameters":[],"headers":[],"authentication":{},"metaSortKey":-1692719560628.5,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"req_2cec05b38d9049c5bee24dcac03d1214","parentId":"fld_9d70a564f6334ff6a5e9473d124d8ee6","modified":1692773355999,"created":1692719560627,"url":"{{HOST}}:{{PORT}}/models/apply","name":"model gallery apply","description":"","method":"POST","body":{"mimeType":"application/json","text":"{\n    \"id\": \"huggingface@TheBloke/wizardlm-13b-v1.2-ggml/wizardlm-13b-v1.2.ggmlv3.q4_0.bin\",\n    \"name\": \"test\"\n}"},"parameters":[],"headers":[{"name":"Content-Type","value":"application/json"}],"authentication":{},"metaSortKey":-1692719560627,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"req_f91d77c51f1d41d9b035ed083275441a","parentId":"fld_9d70a564f6334ff6a5e9473d124d8ee6","modified":1693545040139,"created":1693527252021,"url":"{{HOST}}:{{PORT}}/models/galleries","name":"add model gallery","description":"","method":"POST","body":{"mimeType":"application/json","text":"{\n    \"url\": \"file:///home/dave/projects/model-gallery/huggingface/TheBloke__CodeLlama-7B-Instruct-GGML.yaml\",\n    \"name\": \"test\"\n}"},"parameters":[],"headers":[{"name":"Content-Type","value":"application/json"}],"authentication":{},"metaSortKey":-1692719560625.5,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"req_626fee90a5a04947a8545062e9a52350","parentId":"fld_9d70a564f6334ff6a5e9473d124d8ee6","modified":1693544472707,"created":1693544399269,"url":"{{HOST}}:{{PORT}}/models/galleries","name":"delete model gallery","description":"","method":"DELETE","body":{"mimeType":"application/json","text":"{\n    \"name\": \"test\"\n}"},"parameters":[],"headers":[{"name":"Content-Type","value":"application/json"}],"authentication":{},"metaSortKey":-1692719560624.75,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"req_e1e150aa05c54bb49f5bceca33d4d917","parentId":"fld_9d70a564f6334ff6a5e9473d124d8ee6","modified":1693537478093,"created":1692826085669,"url":"{{HOST}}:{{PORT}}/models/apply","name":"model gallery apply (gist)","description":"","method":"POST","body":{"mimeType":"application/json","text":"{\n    \"id\": \"TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q2_K.bin\"\n}"},"parameters":[],"headers":[{"name":"Content-Type","value":"application/json"}],"authentication":{},"metaSortKey":-1692719560624,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"req_d9f9175a0e1e4a8facf19e365c89403b","parentId":"fld_4a966fb07756459d9d7c1f5a5561228f","modified":1692722441541,"created":1692722374898,"url":"{{HOST}}:{{PORT}}/tts","name":"/tts","description":"","method":"POST","body":{"mimeType":"application/json","text":"{\n    \"model\": \"{{DEFAULT_MODEL}}\",\n    \"input\": \"A STRANGE GAME.\\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\\n\\nHOW ABOUT A NICE GAME OF CHESS?\"\n}"},"parameters":[],"headers":[{"name":"Content-Type","value":"application/json"}],"authentication":{},"metaSortKey":-1692719560630,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"fld_4a966fb07756459d9d7c1f5a5561228f","parentId":"fld_911f4d2a05d84b59aff9d4924d1d3877","modified":1692722533229,"created":1692722439678,"name":"tts","description":"","environment":{},"environmentPropertyOrder":null,"metaSortKey":-1692719560612.5,"_type":"request_group"},{"_id":"req_1dec0983884c4b93acf3d8843108c003","parentId":"fld_2bc22ec3590240cd8d465fe084f5e14d","modified":1692722121942,"created":1692719560608,"url":"{{HOST}}:{{PORT}}/chat/completions","name":"chat completion (simple, 1 message)","description":"","method":"POST","body":{"mimeType":"application/json","text":"{\r\n     \"model\": \"{{DEFAULT_MODEL}}\",\r\n     \"messages\": [{\"role\": \"user\", \"content\": \"How could one use friction to cook an egg?\"}],\r\n     \"max_tokens\": 256,\r\n     \"temperature\": 0.2\r\n}"},"parameters":[],"headers":[{"name":"Content-Type","value":"application/json"}],"authentication":{},"metaSortKey":-1692719560611.5,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"fld_2bc22ec3590240cd8d465fe084f5e14d","parentId":"fld_9584156ef4534e86b71735ed2b1e86e5","modified":1692721647772,"created":1692721647772,"name":"chat","description":"","environment":{},"environmentPropertyOrder":null,"metaSortKey":-1692721647772,"_type":"request_group"},{"_id":"fld_9584156ef4534e86b71735ed2b1e86e5","parentId":"fld_911f4d2a05d84b59aff9d4924d1d3877","modified":1692719560600,"created":1692719560600,"name":"llm text","description":"","environment":{},"environmentPropertyOrder":null,"metaSortKey":-1692719560600,"_type":"request_group"},{"_id":"req_e2ca5ec3feae4fd7b0b4162dad261f08","parentId":"fld_2bc22ec3590240cd8d465fe084f5e14d","modified":1692722124781,"created":1692719560615,"url":"{{HOST}}:{{PORT}}/chat/completions","name":"chat/completions (long)","description":"","method":"POST","body":{"mimeType":"application/json","text":"{\r\n     \"model\": \"{{DEFAULT_MODEL}}\",\r\n     \"messages\": [{\"role\": \"system\", \"content\": \"You are a helpful, intelligent pirate assistant. Always answer as helpfully as possible, while being a pirate and using pirate language. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\"},\r\n         {\"role\": \"user\", \"content\": \"How could one use electricity to cook an egg?\"},\r\n         {\"role\": \"assistant\",\r\n                \"content\": \"Shiver me timbers! Using electricity to cook an egg? Well, matey, I reckon that be a right curious idea! *adjusts spectacles* Now, I ain't sure if this be possible, but I'll do me best to help ye out.\\n\\nFirst things first, ye gotta understand that electricity be a powerful force, and it can't just cook an egg on its own. Ye see, electricity be like a mighty wind that can make things happen, but it needs somethin' to work with. So, if ye want to use electricity to cook an egg, ye gotta find a way to harness that power and make it do the cookin'.\\n\\nNow, I know what ye might be thinkin': \\\"How do I harness the power of electricity to cook an egg?\\\" Well, matey, there be a few ways to do it. One way be to use a special device called an \\\"electric frying pan.\\\" This be a pan that has a built-in heating element that gets hot when ye plug it into a wall socket. When the element gets hot, ye can crack an egg into the pan and watch as it cook\"\r\n            },\r\n            {\"role\": \"user\", \"content\": \"I don't have one of those, just a raw wire and plenty of power! How do we get it done?\"}],\r\n     \"max_tokens\": 1024,\r\n     \"temperature\": 0.5\r\n}"},"parameters":[],"headers":[{"name":"Content-Type","value":"application/json"}],"authentication":{},"metaSortKey":-1692719560561.5,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"req_0a66d6bfaff14b439ade641ce7469f16","parentId":"fld_2bc22ec3590240cd8d465fe084f5e14d","modified":1692722128583,"created":1692719560619,"url":"{{HOST}}:{{PORT}}/chat/completions","name":"chat/completions (stream)","description":"","method":"POST","body":{"mimeType":"application/json","text":"{\n     \"model\": \"{{DEFAULT_MODEL}}\",\n     \"messages\": [{\"role\": \"user\", \"content\": \"Explain how I can set sail on the ocean using only power generated by seagulls?\"}],\n     \"max_tokens\": 256,\n     \"temperature\": 0.9,\n     \"stream\": true\n}"},"parameters":[],"headers":[{"name":"Content-Type","value":"application/json"}],"authentication":{},"metaSortKey":-1692719560511.5,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"req_c1f5faeb3d8a4a0eb52c98d40c32f8f9","parentId":"fld_9584156ef4534e86b71735ed2b1e86e5","modified":1692722131493,"created":1692719560621,"url":"{{HOST}}:{{PORT}}/completions","name":"/completions","description":"","method":"POST","body":{"mimeType":"application/json","text":"{\r\n    \"model\": \"{{DEFAULT_MODEL}}\",\r\n    \"prompt\": \"function downloadFile(string url, string outputPath) {\",\r\n    \"max_tokens\": 256,\r\n    \"temperature\": 0.5\r\n}"},"parameters":[],"headers":[{"name":"Content-Type","value":"application/json"}],"authentication":{},"metaSortKey":-1692719560621,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"req_f7ea027749b34f17a38f2bd549885f92","parentId":"fld_9584156ef4534e86b71735ed2b1e86e5","modified":1692722148262,"created":1692721748683,"url":"{{HOST}}:{{PORT}}/edits","name":"/edits","description":"","method":"POST","body":{"mimeType":"application/json","text":"{\n    \"model\": \"{{DEFAULT_MODEL}}\",\n    \"input\": \"What day of the wek is it?\",\n    \"instruction\": \"Fix the spelling mistakes\"\n}"},"parameters":[],"headers":[{"name":"Content-Type","value":"application/json"}],"authentication":{},"metaSortKey":-1692719560616.25,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"req_fe1e62ed6c384eccb0a75fbaacfd8e92","parentId":"fld_9584156ef4534e86b71735ed2b1e86e5","modified":1692722327277,"created":1692722256486,"url":"{{HOST}}:{{PORT}}/embeddings","name":"/embeddings","description":"","method":"POST","body":{"mimeType":"application/json","text":"{\n    \"model\": \"{{DEFAULT_MODEL}}\",\n    \"input\": \"A STRANGE GAME.\\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\\n\\nHOW ABOUT A NICE GAME OF CHESS?\"\n}"},"parameters":[],"headers":[{"name":"Content-Type","value":"application/json"}],"authentication":{},"metaSortKey":-1692719560613.875,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"req_86b544c9e5324512ae2c830e8c2831ba","parentId":"fld_220b3e247cd940d0b615122f75b4da32","modified":1692722115897,"created":1692719560594,"url":"{{HOST}}:{{PORT}}/backend/shutdown","name":"backend/shutdown","description":"","method":"POST","body":{"mimeType":"application/json","text":"{\r\n    \"model\": \"{{DEFAULT_MODEL}}\"\r\n}"},"parameters":[],"headers":[{"name":"Content-Type","value":"application/json"}],"authentication":{},"metaSortKey":-1692719560594,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"fld_220b3e247cd940d0b615122f75b4da32","parentId":"fld_911f4d2a05d84b59aff9d4924d1d3877","modified":1692719560584,"created":1692719560584,"name":"backend monitor","description":"","environment":{},"environmentPropertyOrder":null,"metaSortKey":-1692719560584,"_type":"request_group"},{"_id":"req_395dfb3a370d470d907532dc94f91d3f","parentId":"fld_220b3e247cd940d0b615122f75b4da32","modified":1692719560587,"created":1692719560587,"url":"{{HOST}}:{{PORT}}/backend/monitor","name":"backend monitor","description":"","method":"GET","body":{"mimeType":"","text":"{\r\n    \"model\": \"{{DEFAULT_MODEL}}\"\r\n}"},"parameters":[],"headers":[],"authentication":{},"metaSortKey":-1692719560587,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"env_b7b4937221a512490c06b95c4986a60800670c2f","parentId":"wrk_76923a29272642e49208d65ffe7e885a","modified":1692720330621,"created":1692719566107,"name":"Base Environment","data":{"PORT":8080,"DEFAULT_MODEL":"gpt-3.5-turbo"},"dataPropertyOrder":{"&":["PORT","DEFAULT_MODEL"]},"color":null,"isPrivate":false,"metaSortKey":1692719566107,"_type":"environment"},{"_id":"jar_b7b4937221a512490c06b95c4986a60800670c2f","parentId":"wrk_76923a29272642e49208d65ffe7e885a","modified":1692719566120,"created":1692719566120,"name":"Default Jar","cookies":[],"_type":"cookie_jar"},{"_id":"env_4a68ee3db2714cc69d81419acd6b2c31","parentId":"env_b7b4937221a512490c06b95c4986a60800670c2f","modified":1692719629896,"created":1692719607346,"name":"localhost","data":{"HOST":"localhost"},"dataPropertyOrder":{"&":["HOST"]},"color":null,"isPrivate":false,"metaSortKey":1692719607346,"_type":"environment"}]}
\ No newline at end of file
diff --git a/examples/insomnia/README.md b/examples/insomnia/README.md
deleted file mode 100644
index 64994b76..00000000
--- a/examples/insomnia/README.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# Insomnia
-
-Developer Testing Request Collection for [Insomnia](https://insomnia.rest/), an open-source REST client
-
-## Instructions
-
-* Install Insomnia as normal
-* [Import](https://docs.insomnia.rest/insomnia/import-export-data) `Insomnia_LocalAI.json`
-* Control + E opens the environment settings - 
-
-| **Parameter Name** | **Default Value** | **Description**                          |
-|--------------------|-------------------|------------------------------------------|
-| HOST               | localhost         | LocalAI base URL                         |
-| PORT               | 8080              | LocalAI port                             |
-| DEFAULT_MODEL      | gpt-3.5-turbo     | Name of the model used on most requests. |
-
-** you may want to duplicate localhost into a "Private" environment to avoid saving private settings back to this file **
\ No newline at end of file
diff --git a/examples/k8sgpt/README.md b/examples/k8sgpt/README.md
deleted file mode 100644
index 23aa1f8b..00000000
--- a/examples/k8sgpt/README.md
+++ /dev/null
@@ -1,72 +0,0 @@
-# k8sgpt example
-
-This example show how to use LocalAI with k8sgpt
-
-![Screenshot from 2023-06-19 23-58-47](https://github.com/go-skynet/go-ggml-transformers.cpp/assets/2420543/cab87409-ee68-44ae-8d53-41627fb49509)
-
-## Create the cluster locally with Kind (optional)
-
-If you want to test this locally without a remote Kubernetes cluster, you can use kind.
-
-Install [kind](https://kind.sigs.k8s.io/) and create a cluster:
-
-```
-kind create cluster
-```
-
-## Setup LocalAI
-
-We will use [helm](https://helm.sh/docs/intro/install/):
-
-```
-helm repo add go-skynet https://go-skynet.github.io/helm-charts/
-helm repo update
-
-# Clone LocalAI
-git clone https://github.com/go-skynet/LocalAI
-
-cd LocalAI/examples/k8sgpt
-
-# modify values.yaml preload_models with the models you want to install.
-# CHANGE the URL to a model in huggingface.
-helm install local-ai go-skynet/local-ai --create-namespace --namespace local-ai --values values.yaml
-```
-
-## Setup K8sGPT
-
-```
-# Install k8sgpt
-helm repo add k8sgpt https://charts.k8sgpt.ai/
-helm repo update
-helm install release k8sgpt/k8sgpt-operator -n k8sgpt-operator-system --create-namespace --version 0.0.17
-```
-
-Apply the k8sgpt-operator configuration:
-
-```
-kubectl apply -f - << EOF
-apiVersion: core.k8sgpt.ai/v1alpha1
-kind: K8sGPT
-metadata:
-  name: k8sgpt-local-ai
-  namespace: default
-spec:
-  backend: localai
-  baseUrl: http://local-ai.local-ai.svc.cluster.local:8080/v1
-  noCache: false
-  model: gpt-3.5-turbo
-  version: v0.3.0
-  enableAI: true
-EOF
-```
-
-## Test
-
-Apply a broken pod:
-
-```
-kubectl apply -f broken-pod.yaml
-```
-
-## ArgoCD Deployment Example
-[Deploy K8sgpt + localai with Argocd](https://github.com/tyler-harpool/gitops/tree/main/infra/k8gpt)
diff --git a/examples/k8sgpt/broken-pod.yaml b/examples/k8sgpt/broken-pod.yaml
deleted file mode 100644
index 9d5289de..00000000
--- a/examples/k8sgpt/broken-pod.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-apiVersion: v1
-kind: Pod
-metadata:
-  name: broken-pod
-spec:
-  containers:
-    - name: broken-pod
-      image: nginx:1.27.0
-      livenessProbe:
-        httpGet:
-          path: /
-          port: 90
-        initialDelaySeconds: 3
-        periodSeconds: 3
\ No newline at end of file
diff --git a/examples/k8sgpt/values.yaml b/examples/k8sgpt/values.yaml
deleted file mode 100644
index e0075de4..00000000
--- a/examples/k8sgpt/values.yaml
+++ /dev/null
@@ -1,96 +0,0 @@
-replicaCount: 1
-
-deployment:
-  # https://quay.io/repository/go-skynet/local-ai?tab=tags
-  image: quay.io/go-skynet/local-ai:v1.40.0
-  env:
-    threads: 4
-    debug: "true"
-    context_size: 512
-    galleries: '[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'
-    preload_models: '[{ "id": "huggingface@thebloke__open-llama-13b-open-instruct-ggml__open-llama-13b-open-instruct.ggmlv3.q3_k_m.bin", "name": "gpt-3.5-turbo", "overrides": { "f16": true, "mmap": true }}]'
-  modelsPath: "/models"
-
-resources:
-  {}
-  # We usually recommend not to specify default resources and to leave this as a conscious
-  # choice for the user. This also increases chances charts run on environments with little
-  # resources, such as Minikube. If you do want to specify resources, uncomment the following
-  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
-  # limits:
-  #   cpu: 100m
-  #   memory: 128Mi
-  # requests:
-  #   cpu: 100m
-  #   memory: 128Mi
-
-# Prompt templates to include
-# Note: the keys of this map will be the names of the prompt template files
-promptTemplates:
-  {}
-  # ggml-gpt4all-j.tmpl: |
-  #   The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
-  #   ### Prompt:
-  #   {{.Input}}
-  #   ### Response:
-
-# Models to download at runtime
-models:
-  # Whether to force download models even if they already exist
-  forceDownload: false
-
-  # The list of URLs to download models from
-  # Note: the name of the file will be the name of the loaded model
-  list:
-  #- url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
-      # basicAuth: base64EncodedCredentials
-
-  # Persistent storage for models and prompt templates.
-  # PVC and HostPath are mutually exclusive. If both are enabled,
-  # PVC configuration takes precedence. If neither are enabled, ephemeral
-  # storage is used.
-  persistence:
-    pvc:
-      enabled: false
-      size: 6Gi
-      accessModes:
-        - ReadWriteOnce
-
-      annotations: {}
-
-      # Optional
-      storageClass: ~
-
-    hostPath:
-      enabled: false
-      path: "/models"
-
-service:
-  type: ClusterIP
-  port: 8080
-  annotations: {}
-  # If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
-  # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
-
-ingress:
-  enabled: false
-  className: ""
-  annotations:
-    {}
-    # kubernetes.io/ingress.class: nginx
-    # kubernetes.io/tls-acme: "true"
-  hosts:
-    - host: chart-example.local
-      paths:
-        - path: /
-          pathType: ImplementationSpecific
-  tls: []
-  #  - secretName: chart-example-tls
-  #    hosts:
-  #      - chart-example.local
-
-nodeSelector: {}
-
-tolerations: []
-
-affinity: {}
diff --git a/examples/kubernetes/deployment-intel-arc.yaml b/examples/kubernetes/deployment-intel-arc.yaml
deleted file mode 100644
index f77182bd..00000000
--- a/examples/kubernetes/deployment-intel-arc.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: local-ai
----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: models-pvc
-  namespace: local-ai
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 20Gi
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: local-ai
-  namespace: local-ai
-  labels:
-    app: local-ai
-spec:
-  selector:
-    matchLabels:
-      app: local-ai
-  replicas: 1
-  template:
-    metadata:
-      labels:
-        app: local-ai
-      name: local-ai
-    spec:
-      containers:
-        - args:
-          - phi-2
-          env:
-          - name: DEBUG
-            value: "true"
-          name: local-ai
-          image: quay.io/go-skynet/local-ai:master-sycl-f32-ffmpeg-core
-          imagePullPolicy: Always
-          resources:
-            limits:
-              gpu.intel.com/i915: 1
-          volumeMounts:
-            - name: models-volume
-              mountPath: /build/models
-      volumes:
-        - name: models-volume
-          persistentVolumeClaim:
-            claimName: models-pvc
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: local-ai
-  namespace: local-ai
-spec:
-  selector:
-    app: local-ai
-  type: LoadBalancer
-  ports:
-    - protocol: TCP
-      port: 8080
-      targetPort: 8080
\ No newline at end of file
diff --git a/examples/kubernetes/deployment-nvidia.yaml b/examples/kubernetes/deployment-nvidia.yaml
deleted file mode 100644
index c4dfb4e0..00000000
--- a/examples/kubernetes/deployment-nvidia.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: local-ai
----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: models-pvc
-  namespace: local-ai
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 50Gi
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: local-ai
-  namespace: local-ai
-  labels:
-    app: local-ai
-spec:
-  selector:
-    matchLabels:
-      app: local-ai
-  replicas: 1
-  template:
-    metadata:
-      labels:
-        app: local-ai
-      name: local-ai
-    spec:
-      runtimeClassName: "nvidia"
-      containers:
-        - args:
-          - phi-2
-          env:
-          - name: DEBUG
-            value: "true"
-          name: local-ai
-          image: quay.io/go-skynet/local-ai:master-cublas-cuda12
-          imagePullPolicy: IfNotPresent
-          resources:
-            limits:
-              nvidia.com/gpu: 1
-          volumeMounts:
-            - name: models-volume
-              mountPath: /build/models
-      volumes:
-        - name: models-volume
-          persistentVolumeClaim:
-            claimName: models-pvc
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: local-ai
-  namespace: local-ai
-spec:
-  selector:
-    app: local-ai
-  type: NodePort
-  ports:
-    - protocol: TCP
-      targetPort: 8080
-      port: 8080
\ No newline at end of file
diff --git a/examples/kubernetes/deployment.yaml b/examples/kubernetes/deployment.yaml
deleted file mode 100644
index 601fffdb..00000000
--- a/examples/kubernetes/deployment.yaml
+++ /dev/null
@@ -1,65 +0,0 @@
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: local-ai
----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: models-pvc
-  namespace: local-ai
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 1Gi
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: local-ai
-  namespace: local-ai
-  labels:
-    app: local-ai
-spec:
-  selector:
-    matchLabels:
-      app: local-ai
-  replicas: 1
-  template:
-    metadata:
-      labels:
-        app: local-ai
-      name: local-ai
-    spec:
-      containers:
-        - args:
-          - phi-2
-          env:
-          - name: DEBUG
-            value: "true"
-          name: local-ai
-          image: quay.io/go-skynet/local-ai:master-ffmpeg-core
-          imagePullPolicy: IfNotPresent
-          volumeMounts:
-            - name: models-volume
-              mountPath: /build/models
-      volumes:
-        - name: models-volume
-          persistentVolumeClaim:
-            claimName: models-pvc
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: local-ai
-  namespace: local-ai
-spec:
-  selector:
-    app: local-ai
-  type: LoadBalancer
-  ports:
-    - protocol: TCP
-      port: 8080
-      targetPort: 8080
\ No newline at end of file
diff --git a/examples/langchain-chroma/.env.example b/examples/langchain-chroma/.env.example
deleted file mode 100644
index 54388e31..00000000
--- a/examples/langchain-chroma/.env.example
+++ /dev/null
@@ -1,8 +0,0 @@
-# CPU .env docs: https://localai.io/howtos/easy-setup-docker-cpu/
-# GPU .env docs: https://localai.io/howtos/easy-setup-docker-gpu/
-
-THREADS=4
-CONTEXT_SIZE=512
-MODELS_PATH=/models
-DEBUG=true
-# BUILD_TYPE=generic
\ No newline at end of file
diff --git a/examples/langchain-chroma/.gitignore b/examples/langchain-chroma/.gitignore
deleted file mode 100644
index 3dc19014..00000000
--- a/examples/langchain-chroma/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-db/
-state_of_the_union.txt
-models/bert
-models/ggml-gpt4all-j
\ No newline at end of file
diff --git a/examples/langchain-chroma/README.md b/examples/langchain-chroma/README.md
deleted file mode 100644
index 9fd9e312..00000000
--- a/examples/langchain-chroma/README.md
+++ /dev/null
@@ -1,63 +0,0 @@
-# Data query example
-
-This example makes use of [langchain and chroma](https://blog.langchain.dev/langchain-chroma/) to enable question answering on a set of documents.
-
-## Setup
-
-Download the models and start the API:
-
-```bash
-# Clone LocalAI
-git clone https://github.com/go-skynet/LocalAI
-
-cd LocalAI/examples/langchain-chroma
-
-wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
-wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
-
-# configure your .env
-# NOTE: ensure that THREADS does not exceed your machine's CPU cores
-mv .env.example .env
-
-# start with docker-compose
-docker-compose up -d --build
-
-# tail the logs & wait until the build completes
-docker logs -f langchain-chroma-api-1
-```
-
-### Python requirements
-
-```
-pip install -r requirements.txt
-```
-
-### Create a storage
-
-In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM.
-
-Note: **OPENAI_API_KEY** is not required. However the library might fail if no API_KEY is passed by, so an arbitrary string can be used.
-
-```bash
-export OPENAI_API_BASE=http://localhost:8080/v1
-export OPENAI_API_KEY=sk-
-
-wget https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt
-python store.py
-```
-
-After it finishes, a directory "db" will be created with the vector index database.
-
-## Query
-
-We can now query the dataset. 
-
-```bash
-export OPENAI_API_BASE=http://localhost:8080/v1
-export OPENAI_API_KEY=sk-
-
-python query.py
-# President Trump recently stated during a press conference regarding tax reform legislation that "we're getting rid of all these loopholes." He also mentioned that he wants to simplify the system further through changes such as increasing the standard deduction amount and making other adjustments aimed at reducing taxpayers' overall burden.    
-```
-
-Keep in mind now things are hit or miss!
\ No newline at end of file
diff --git a/examples/langchain-chroma/docker-compose.yml b/examples/langchain-chroma/docker-compose.yml
deleted file mode 100644
index 96ef540e..00000000
--- a/examples/langchain-chroma/docker-compose.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-version: '3.6'
-
-services:
-  api:
-    image: quay.io/go-skynet/local-ai:latest
-    build:
-      context: ../../
-      dockerfile: Dockerfile
-    ports:
-      - 8080:8080
-    env_file:
-      - ../../.env
-    volumes:
-      - ./models:/models:cached
-    command: ["/usr/bin/local-ai"]
diff --git a/examples/langchain-chroma/models b/examples/langchain-chroma/models
deleted file mode 120000
index 1e266b1b..00000000
--- a/examples/langchain-chroma/models
+++ /dev/null
@@ -1 +0,0 @@
-../models
\ No newline at end of file
diff --git a/examples/langchain-chroma/query.py b/examples/langchain-chroma/query.py
deleted file mode 100644
index 61f4c3ea..00000000
--- a/examples/langchain-chroma/query.py
+++ /dev/null
@@ -1,23 +0,0 @@
-
-import os
-from langchain.vectorstores import Chroma
-from langchain.embeddings import OpenAIEmbeddings
-from langchain.chat_models import ChatOpenAI
-from langchain.chains import RetrievalQA
-from langchain.vectorstores.base import VectorStoreRetriever
-
-base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
-
-# Load and process the text
-embedding = OpenAIEmbeddings(model="text-embedding-ada-002", openai_api_base=base_path)
-persist_directory = 'db'
-
-# Now we can load the persisted database from disk, and use it as normal. 
-llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path)
-vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)
-retriever = VectorStoreRetriever(vectorstore=vectordb)
-qa = RetrievalQA.from_llm(llm=llm, retriever=retriever)
-
-query = "What the president said about taxes ?"
-print(qa.run(query))
-
diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt
deleted file mode 100644
index 4884d4aa..00000000
--- a/examples/langchain-chroma/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-langchain==0.3.0
-openai==1.45.1
-chromadb==0.5.5
-llama-index==0.11.7
\ No newline at end of file
diff --git a/examples/langchain-chroma/store.py b/examples/langchain-chroma/store.py
deleted file mode 100755
index a52cfe04..00000000
--- a/examples/langchain-chroma/store.py
+++ /dev/null
@@ -1,25 +0,0 @@
-
-import os
-from langchain.vectorstores import Chroma
-from langchain.embeddings import OpenAIEmbeddings
-from langchain.text_splitter import CharacterTextSplitter
-from langchain.document_loaders import TextLoader
-
-base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
-
-# Load and process the text
-loader = TextLoader('state_of_the_union.txt')
-documents = loader.load()
-
-text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=70)
-texts = text_splitter.split_documents(documents)
-
-# Embed and store the texts
-# Supplying a persist_directory will store the embeddings on disk
-persist_directory = 'db'
-
-embedding = OpenAIEmbeddings(model="text-embedding-ada-002", openai_api_base=base_path)
-vectordb = Chroma.from_documents(documents=texts, embedding=embedding, persist_directory=persist_directory)
-
-vectordb.persist()
-vectordb = None
diff --git a/examples/langchain-huggingface/README.md b/examples/langchain-huggingface/README.md
deleted file mode 100644
index 23fdcd32..00000000
--- a/examples/langchain-huggingface/README.md
+++ /dev/null
@@ -1,68 +0,0 @@
-# Data query example
-
-Example of integration with HuggingFace Inference API with help of [langchaingo](https://github.com/tmc/langchaingo).
-
-## Setup
-
-Download the LocalAI and start the API:
-
-```bash
-# Clone LocalAI
-git clone https://github.com/go-skynet/LocalAI
-
-cd LocalAI/examples/langchain-huggingface
-
-docker-compose up -d
-```
-
-Node: Ensure you've set `HUGGINGFACEHUB_API_TOKEN` environment variable, you can generate it
-on [Settings / Access Tokens](https://huggingface.co/settings/tokens) page of HuggingFace site.
-
-This is an example `.env` file for LocalAI:
-
-```ini
-MODELS_PATH=/models
-CONTEXT_SIZE=512
-HUGGINGFACEHUB_API_TOKEN=hg_123456
-```
-
-## Using remote models
-
-Now you can use any remote models available via HuggingFace API, for example let's enable using of
-[gpt2](https://huggingface.co/gpt2) model in `gpt-3.5-turbo.yaml` config:
-
-```yml
-name: gpt-3.5-turbo
-parameters:
-  model: gpt2
-  top_k: 80
-  temperature: 0.2
-  top_p: 0.7
-context_size: 1024
-backend: "langchain-huggingface"
-stopwords:
-- "HUMAN:"
-- "GPT:"
-roles:
-  user: " "
-  system: " "
-template:
-  completion: completion
-  chat: gpt4all
-```
-
-Here is you can see in field `parameters.model` equal `gpt2` and `backend` equal `langchain-huggingface`.
-
-## How to use
-
-```shell
-# Now API is accessible at localhost:8080
-curl http://localhost:8080/v1/models
-# {"object":"list","data":[{"id":"gpt-3.5-turbo","object":"model"}]}
-
-curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
-  "model": "gpt-3.5-turbo",
-  "prompt": "A long time ago in a galaxy far, far away",
-  "temperature": 0.7
-}'
-```
\ No newline at end of file
diff --git a/examples/langchain-huggingface/docker-compose.yml b/examples/langchain-huggingface/docker-compose.yml
deleted file mode 100644
index 96ef540e..00000000
--- a/examples/langchain-huggingface/docker-compose.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-version: '3.6'
-
-services:
-  api:
-    image: quay.io/go-skynet/local-ai:latest
-    build:
-      context: ../../
-      dockerfile: Dockerfile
-    ports:
-      - 8080:8080
-    env_file:
-      - ../../.env
-    volumes:
-      - ./models:/models:cached
-    command: ["/usr/bin/local-ai"]
diff --git a/examples/langchain-huggingface/models b/examples/langchain-huggingface/models
deleted file mode 120000
index 1e266b1b..00000000
--- a/examples/langchain-huggingface/models
+++ /dev/null
@@ -1 +0,0 @@
-../models
\ No newline at end of file
diff --git a/examples/langchain-python/README.md b/examples/langchain-python/README.md
deleted file mode 100644
index aeff6c48..00000000
--- a/examples/langchain-python/README.md
+++ /dev/null
@@ -1,29 +0,0 @@
-## Langchain-python
-
-Langchain example from [quickstart](https://python.langchain.com/en/latest/getting_started/getting_started.html).
-
-To interact with langchain, you can just set the `OPENAI_API_BASE` URL and provide a token with a random string.
-
-See the example below:
-
-```
-# Clone LocalAI
-git clone https://github.com/go-skynet/LocalAI
-
-cd LocalAI/examples/langchain-python
-
-# start with docker-compose
-docker-compose up --pull always
-
-pip install langchain
-pip install openai
-
-export OPENAI_API_BASE=http://localhost:8080
-# Note: **OPENAI_API_KEY** is not required. However the library might fail if no API_KEY is passed by, so an arbitrary string can be used.
-export OPENAI_API_KEY=sk-
-
-python test.py
-# A good company name for a company that makes colorful socks would be "Colorsocks".
-
-python agent.py
-```
\ No newline at end of file
diff --git a/examples/langchain-python/agent.py b/examples/langchain-python/agent.py
deleted file mode 100644
index 11e3a5ce..00000000
--- a/examples/langchain-python/agent.py
+++ /dev/null
@@ -1,44 +0,0 @@
-## This is a fork/based from https://gist.github.com/wiseman/4a706428eaabf4af1002a07a114f61d6
-
-from io import StringIO
-import sys
-import os
-from typing import Dict, Optional
-
-from langchain.agents import load_tools
-from langchain.agents import initialize_agent
-from langchain.agents.tools import Tool
-from langchain.llms import OpenAI
-
-base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
-model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
-
-class PythonREPL:
-    """Simulates a standalone Python REPL."""
-
-    def __init__(self):
-        pass        
-
-    def run(self, command: str) -> str:
-        """Run command and returns anything printed."""
-        old_stdout = sys.stdout
-        sys.stdout = mystdout = StringIO()
-        try:
-            exec(command, globals())
-            sys.stdout = old_stdout
-            output = mystdout.getvalue()
-        except Exception as e:
-            sys.stdout = old_stdout
-            output = str(e)
-        return output
-
-llm = OpenAI(temperature=0.0, openai_api_base=base_path, model_name=model_name)
-python_repl = Tool(
-        "Python REPL",
-        PythonREPL().run,
-        """A Python shell. Use this to execute python commands. Input should be a valid python command.
-        If you expect output it should be printed out.""",
-    )
-tools = [python_repl]
-agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)
-agent.run("What is the 10th fibonacci number?")
\ No newline at end of file
diff --git a/examples/langchain-python/docker-compose.yaml b/examples/langchain-python/docker-compose.yaml
deleted file mode 100644
index 0a023c07..00000000
--- a/examples/langchain-python/docker-compose.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-version: '3.6'
-
-services:
-  api:
-    image: quay.io/go-skynet/local-ai:latest
-    # As initially LocalAI will download the models defined in PRELOAD_MODELS
-    # you might need to tweak the healthcheck values here according to your network connection.
-    # Here we give a timespan of 20m to download all the required files.
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
-      interval: 1m
-      timeout: 20m
-      retries: 20
-    build:
-      context: ../../
-      dockerfile: Dockerfile
-    ports:
-      - 8080:8080
-    environment:
-      - DEBUG=true
-      - MODELS_PATH=/models
-      # You can preload different models here as well.
-      # See: https://github.com/go-skynet/model-gallery
-      - 'PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}]'
-    volumes:
-      - ./models:/models:cached
-    command: ["/usr/bin/local-ai" ]
\ No newline at end of file
diff --git a/examples/langchain-python/test.py b/examples/langchain-python/test.py
deleted file mode 100644
index a9fac351..00000000
--- a/examples/langchain-python/test.py
+++ /dev/null
@@ -1,6 +0,0 @@
-
-from langchain.llms import OpenAI
-
-llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
-text = "What would be a good company name for a company that makes colorful socks?"
-print(llm(text))
diff --git a/examples/langchain/.gitignore b/examples/langchain/.gitignore
deleted file mode 100644
index 98850a54..00000000
--- a/examples/langchain/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-models/ggml-koala-13B-4bit-128g
-models/ggml-gpt4all-j
\ No newline at end of file
diff --git a/examples/langchain/JS.Dockerfile b/examples/langchain/JS.Dockerfile
deleted file mode 100644
index 29c20c3b..00000000
--- a/examples/langchain/JS.Dockerfile
+++ /dev/null
@@ -1,6 +0,0 @@
-FROM node:lts-alpine
-COPY ./langchainjs-localai-example /app
-WORKDIR /app
-RUN npm install
-RUN npm run build
-ENTRYPOINT [ "npm", "run", "start" ]
diff --git a/examples/langchain/PY.Dockerfile b/examples/langchain/PY.Dockerfile
deleted file mode 100644
index efc7a876..00000000
--- a/examples/langchain/PY.Dockerfile
+++ /dev/null
@@ -1,5 +0,0 @@
-FROM python:3.12-bullseye
-COPY ./langchainpy-localai-example /app
-WORKDIR /app
-RUN pip install --no-cache-dir -r requirements.txt
-ENTRYPOINT [ "python", "./full_demo.py" ]
diff --git a/examples/langchain/README.md b/examples/langchain/README.md
deleted file mode 100644
index e84cfec5..00000000
--- a/examples/langchain/README.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# langchain
-
-Example of using langchain, with the standard OpenAI llm module, and LocalAI. Has docker compose profiles for both the Typescript and Python versions.
-
-**Please Note** - This is a tech demo example at this time. ggml-gpt4all-j has pretty terrible results for most langchain applications with the settings used in this example.
-
-## Setup
-
-```bash
-# Clone LocalAI
-git clone https://github.com/go-skynet/LocalAI
-
-cd LocalAI/examples/langchain
-
-# (optional) - Edit the example code in typescript.
-# vi ./langchainjs-localai-example/index.ts
-
-# Download gpt4all-j to models/
-wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
-
-# start with docker-compose for typescript!
-docker-compose --profile ts up --build
-
-# or start with docker-compose for python!
-docker-compose --profile py up --build
-```
-
-## Copyright
-
-Some of the example code in index.mts and full_demo.py is adapted from the langchainjs project and is Copyright (c) Harrison Chase. Used under the terms of the MIT license, as is the remainder of this code.
\ No newline at end of file
diff --git a/examples/langchain/docker-compose.yaml b/examples/langchain/docker-compose.yaml
deleted file mode 100644
index 32564fef..00000000
--- a/examples/langchain/docker-compose.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-version: '3.6'
-
-services:
-  api:
-    image: quay.io/go-skynet/local-ai:latest
-    build:
-      context: ../../
-      dockerfile: Dockerfile
-    ports:
-      - 8080:8080
-    environment:
-      - DEBUG=true
-      - MODELS_PATH=/models
-    volumes:
-      - ./models:/models:cached
-    command: ["/usr/bin/local-ai" ]
-
-  js:
-    build:
-      context: .
-      dockerfile: JS.Dockerfile
-    profiles:
-      - js
-      - ts
-    depends_on:
-    - "api"
-    environment:
-      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
-      - 'OPENAI_API_BASE=http://api:8080/v1'
-      - 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'
-
-  py:
-    build:
-      context: .
-      dockerfile: PY.Dockerfile
-    profiles:
-      - py
-    depends_on:
-    - "api"
-    environment:
-      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
-      - 'OPENAI_API_BASE=http://api:8080/v1'
-      - 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'
\ No newline at end of file
diff --git a/examples/langchain/langchainjs-localai-example/.gitignore b/examples/langchain/langchainjs-localai-example/.gitignore
deleted file mode 100644
index b9470778..00000000
--- a/examples/langchain/langchainjs-localai-example/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-node_modules/
-dist/
diff --git a/examples/langchain/langchainjs-localai-example/.vscode/launch.json b/examples/langchain/langchainjs-localai-example/.vscode/launch.json
deleted file mode 100644
index 2ee41d63..00000000
--- a/examples/langchain/langchainjs-localai-example/.vscode/launch.json
+++ /dev/null
@@ -1,20 +0,0 @@
-{
-    // Use IntelliSense to learn about possible attributes.
-    // Hover to view descriptions of existing attributes.
-    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
-    "version": "0.2.0",
-    "configurations": [
-        {
-            "type": "node",
-            "request": "launch",
-            "name": "Launch Program",
-            // "skipFiles": [
-            //     "<node_internals>/**"
-            // ],
-            "program": "${workspaceFolder}\\dist\\index.mjs",
-            "outFiles": [
-                "${workspaceFolder}/**/*.js"
-            ]
-        }
-    ]
-}
\ No newline at end of file
diff --git a/examples/langchain/langchainjs-localai-example/package-lock.json b/examples/langchain/langchainjs-localai-example/package-lock.json
deleted file mode 100644
index 48fee285..00000000
--- a/examples/langchain/langchainjs-localai-example/package-lock.json
+++ /dev/null
@@ -1,3085 +0,0 @@
-{
-  "name": "langchainjs-localai-example",
-  "version": "0.1.0",
-  "lockfileVersion": 2,
-  "requires": true,
-  "packages": {
-    "": {
-      "name": "langchainjs-localai-example",
-      "version": "0.1.0",
-      "license": "MIT",
-      "dependencies": {
-        "@langchain/community": "^0.0.52",
-        "@langchain/openai": "^0.0.28",
-        "langchain": "^0.1.36"
-      },
-      "devDependencies": {
-        "@types/node": "^18.16.4",
-        "typescript": "^5.0.4"
-      }
-    },
-    "node_modules/@anthropic-ai/sdk": {
-      "version": "0.9.1",
-      "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.9.1.tgz",
-      "integrity": "sha512-wa1meQ2WSfoY8Uor3EdrJq0jTiZJoKoSii2ZVWRY1oN4Tlr5s59pADg9T79FTbPe1/se5c3pBeZgJL63wmuoBA==",
-      "dependencies": {
-        "@types/node": "^18.11.18",
-        "@types/node-fetch": "^2.6.4",
-        "abort-controller": "^3.0.0",
-        "agentkeepalive": "^4.2.1",
-        "digest-fetch": "^1.3.0",
-        "form-data-encoder": "1.7.2",
-        "formdata-node": "^4.3.2",
-        "node-fetch": "^2.6.7",
-        "web-streams-polyfill": "^3.2.1"
-      }
-    },
-    "node_modules/@langchain/community": {
-      "version": "0.0.52",
-      "resolved": "https://registry.npmjs.org/@langchain/community/-/community-0.0.52.tgz",
-      "integrity": "sha512-L+IMAAaLNP7++4HhdvuVJegc8bdw8WP77Jvp98YcySFZTZWH1yasSQSlFn3jgBk+3xLBsudpTZuttKTrZ/TtVQ==",
-      "dependencies": {
-        "@langchain/core": "~0.1.60",
-        "@langchain/openai": "~0.0.28",
-        "expr-eval": "^2.0.2",
-        "flat": "^5.0.2",
-        "langsmith": "~0.1.1",
-        "uuid": "^9.0.0",
-        "zod": "^3.22.3",
-        "zod-to-json-schema": "^3.22.5"
-      },
-      "engines": {
-        "node": ">=18"
-      },
-      "peerDependencies": {
-        "@aws-crypto/sha256-js": "^5.0.0",
-        "@aws-sdk/client-bedrock-agent-runtime": "^3.485.0",
-        "@aws-sdk/client-bedrock-runtime": "^3.422.0",
-        "@aws-sdk/client-dynamodb": "^3.310.0",
-        "@aws-sdk/client-kendra": "^3.352.0",
-        "@aws-sdk/client-lambda": "^3.310.0",
-        "@aws-sdk/client-sagemaker-runtime": "^3.310.0",
-        "@aws-sdk/client-sfn": "^3.310.0",
-        "@aws-sdk/credential-provider-node": "^3.388.0",
-        "@azure/search-documents": "^12.0.0",
-        "@clickhouse/client": "^0.2.5",
-        "@cloudflare/ai": "*",
-        "@datastax/astra-db-ts": "^1.0.0",
-        "@elastic/elasticsearch": "^8.4.0",
-        "@getmetal/metal-sdk": "*",
-        "@getzep/zep-js": "^0.9.0",
-        "@gomomento/sdk": "^1.51.1",
-        "@gomomento/sdk-core": "^1.51.1",
-        "@google-ai/generativelanguage": "^0.2.1",
-        "@gradientai/nodejs-sdk": "^1.2.0",
-        "@huggingface/inference": "^2.6.4",
-        "@mozilla/readability": "*",
-        "@neondatabase/serverless": "*",
-        "@opensearch-project/opensearch": "*",
-        "@pinecone-database/pinecone": "*",
-        "@planetscale/database": "^1.8.0",
-        "@premai/prem-sdk": "^0.3.25",
-        "@qdrant/js-client-rest": "^1.2.0",
-        "@raycast/api": "^1.55.2",
-        "@rockset/client": "^0.9.1",
-        "@smithy/eventstream-codec": "^2.0.5",
-        "@smithy/protocol-http": "^3.0.6",
-        "@smithy/signature-v4": "^2.0.10",
-        "@smithy/util-utf8": "^2.0.0",
-        "@supabase/postgrest-js": "^1.1.1",
-        "@supabase/supabase-js": "^2.10.0",
-        "@tensorflow-models/universal-sentence-encoder": "*",
-        "@tensorflow/tfjs-converter": "*",
-        "@tensorflow/tfjs-core": "*",
-        "@upstash/redis": "^1.20.6",
-        "@upstash/vector": "^1.0.7",
-        "@vercel/kv": "^0.2.3",
-        "@vercel/postgres": "^0.5.0",
-        "@writerai/writer-sdk": "^0.40.2",
-        "@xata.io/client": "^0.28.0",
-        "@xenova/transformers": "^2.5.4",
-        "@zilliz/milvus2-sdk-node": ">=2.2.7",
-        "better-sqlite3": "^9.4.0",
-        "cassandra-driver": "^4.7.2",
-        "cborg": "^4.1.1",
-        "chromadb": "*",
-        "closevector-common": "0.1.3",
-        "closevector-node": "0.1.6",
-        "closevector-web": "0.1.6",
-        "cohere-ai": "*",
-        "convex": "^1.3.1",
-        "couchbase": "^4.3.0",
-        "discord.js": "^14.14.1",
-        "dria": "^0.0.3",
-        "duck-duck-scrape": "^2.2.5",
-        "faiss-node": "^0.5.1",
-        "firebase-admin": "^11.9.0 || ^12.0.0",
-        "google-auth-library": "^8.9.0",
-        "googleapis": "^126.0.1",
-        "hnswlib-node": "^3.0.0",
-        "html-to-text": "^9.0.5",
-        "interface-datastore": "^8.2.11",
-        "ioredis": "^5.3.2",
-        "it-all": "^3.0.4",
-        "jsdom": "*",
-        "jsonwebtoken": "^9.0.2",
-        "llmonitor": "^0.5.9",
-        "lodash": "^4.17.21",
-        "lunary": "^0.6.11",
-        "mongodb": ">=5.2.0",
-        "mysql2": "^3.3.3",
-        "neo4j-driver": "*",
-        "node-llama-cpp": "*",
-        "pg": "^8.11.0",
-        "pg-copy-streams": "^6.0.5",
-        "pickleparser": "^0.2.1",
-        "portkey-ai": "^0.1.11",
-        "redis": "*",
-        "replicate": "^0.18.0",
-        "typeorm": "^0.3.12",
-        "typesense": "^1.5.3",
-        "usearch": "^1.1.1",
-        "vectordb": "^0.1.4",
-        "voy-search": "0.6.2",
-        "weaviate-ts-client": "*",
-        "web-auth-library": "^1.0.3",
-        "ws": "^8.14.2"
-      },
-      "peerDependenciesMeta": {
-        "@aws-crypto/sha256-js": {
-          "optional": true
-        },
-        "@aws-sdk/client-bedrock-agent-runtime": {
-          "optional": true
-        },
-        "@aws-sdk/client-bedrock-runtime": {
-          "optional": true
-        },
-        "@aws-sdk/client-dynamodb": {
-          "optional": true
-        },
-        "@aws-sdk/client-kendra": {
-          "optional": true
-        },
-        "@aws-sdk/client-lambda": {
-          "optional": true
-        },
-        "@aws-sdk/client-sagemaker-runtime": {
-          "optional": true
-        },
-        "@aws-sdk/client-sfn": {
-          "optional": true
-        },
-        "@aws-sdk/credential-provider-node": {
-          "optional": true
-        },
-        "@azure/search-documents": {
-          "optional": true
-        },
-        "@clickhouse/client": {
-          "optional": true
-        },
-        "@cloudflare/ai": {
-          "optional": true
-        },
-        "@datastax/astra-db-ts": {
-          "optional": true
-        },
-        "@elastic/elasticsearch": {
-          "optional": true
-        },
-        "@getmetal/metal-sdk": {
-          "optional": true
-        },
-        "@getzep/zep-js": {
-          "optional": true
-        },
-        "@gomomento/sdk": {
-          "optional": true
-        },
-        "@gomomento/sdk-core": {
-          "optional": true
-        },
-        "@google-ai/generativelanguage": {
-          "optional": true
-        },
-        "@gradientai/nodejs-sdk": {
-          "optional": true
-        },
-        "@huggingface/inference": {
-          "optional": true
-        },
-        "@mozilla/readability": {
-          "optional": true
-        },
-        "@neondatabase/serverless": {
-          "optional": true
-        },
-        "@opensearch-project/opensearch": {
-          "optional": true
-        },
-        "@pinecone-database/pinecone": {
-          "optional": true
-        },
-        "@planetscale/database": {
-          "optional": true
-        },
-        "@premai/prem-sdk": {
-          "optional": true
-        },
-        "@qdrant/js-client-rest": {
-          "optional": true
-        },
-        "@raycast/api": {
-          "optional": true
-        },
-        "@rockset/client": {
-          "optional": true
-        },
-        "@smithy/eventstream-codec": {
-          "optional": true
-        },
-        "@smithy/protocol-http": {
-          "optional": true
-        },
-        "@smithy/signature-v4": {
-          "optional": true
-        },
-        "@smithy/util-utf8": {
-          "optional": true
-        },
-        "@supabase/postgrest-js": {
-          "optional": true
-        },
-        "@supabase/supabase-js": {
-          "optional": true
-        },
-        "@tensorflow-models/universal-sentence-encoder": {
-          "optional": true
-        },
-        "@tensorflow/tfjs-converter": {
-          "optional": true
-        },
-        "@tensorflow/tfjs-core": {
-          "optional": true
-        },
-        "@upstash/redis": {
-          "optional": true
-        },
-        "@upstash/vector": {
-          "optional": true
-        },
-        "@vercel/kv": {
-          "optional": true
-        },
-        "@vercel/postgres": {
-          "optional": true
-        },
-        "@writerai/writer-sdk": {
-          "optional": true
-        },
-        "@xata.io/client": {
-          "optional": true
-        },
-        "@xenova/transformers": {
-          "optional": true
-        },
-        "@zilliz/milvus2-sdk-node": {
-          "optional": true
-        },
-        "better-sqlite3": {
-          "optional": true
-        },
-        "cassandra-driver": {
-          "optional": true
-        },
-        "cborg": {
-          "optional": true
-        },
-        "chromadb": {
-          "optional": true
-        },
-        "closevector-common": {
-          "optional": true
-        },
-        "closevector-node": {
-          "optional": true
-        },
-        "closevector-web": {
-          "optional": true
-        },
-        "cohere-ai": {
-          "optional": true
-        },
-        "convex": {
-          "optional": true
-        },
-        "couchbase": {
-          "optional": true
-        },
-        "discord.js": {
-          "optional": true
-        },
-        "dria": {
-          "optional": true
-        },
-        "duck-duck-scrape": {
-          "optional": true
-        },
-        "faiss-node": {
-          "optional": true
-        },
-        "firebase-admin": {
-          "optional": true
-        },
-        "google-auth-library": {
-          "optional": true
-        },
-        "googleapis": {
-          "optional": true
-        },
-        "hnswlib-node": {
-          "optional": true
-        },
-        "html-to-text": {
-          "optional": true
-        },
-        "interface-datastore": {
-          "optional": true
-        },
-        "ioredis": {
-          "optional": true
-        },
-        "it-all": {
-          "optional": true
-        },
-        "jsdom": {
-          "optional": true
-        },
-        "jsonwebtoken": {
-          "optional": true
-        },
-        "llmonitor": {
-          "optional": true
-        },
-        "lodash": {
-          "optional": true
-        },
-        "lunary": {
-          "optional": true
-        },
-        "mongodb": {
-          "optional": true
-        },
-        "mysql2": {
-          "optional": true
-        },
-        "neo4j-driver": {
-          "optional": true
-        },
-        "node-llama-cpp": {
-          "optional": true
-        },
-        "pg": {
-          "optional": true
-        },
-        "pg-copy-streams": {
-          "optional": true
-        },
-        "pickleparser": {
-          "optional": true
-        },
-        "portkey-ai": {
-          "optional": true
-        },
-        "redis": {
-          "optional": true
-        },
-        "replicate": {
-          "optional": true
-        },
-        "typeorm": {
-          "optional": true
-        },
-        "typesense": {
-          "optional": true
-        },
-        "usearch": {
-          "optional": true
-        },
-        "vectordb": {
-          "optional": true
-        },
-        "voy-search": {
-          "optional": true
-        },
-        "weaviate-ts-client": {
-          "optional": true
-        },
-        "web-auth-library": {
-          "optional": true
-        },
-        "ws": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/@langchain/core": {
-      "version": "0.1.60",
-      "resolved": "https://registry.npmjs.org/@langchain/core/-/core-0.1.60.tgz",
-      "integrity": "sha512-3EJW4ir0tFe17AakpXCgO9flSoDjFELpSQs2w/CMZ5FBlHYxo3ODgVQAZvlHy97khEVgcnvlL3EDhPE7IdNibA==",
-      "dependencies": {
-        "ansi-styles": "^5.0.0",
-        "camelcase": "6",
-        "decamelize": "1.2.0",
-        "js-tiktoken": "^1.0.8",
-        "langsmith": "~0.1.7",
-        "ml-distance": "^4.0.0",
-        "mustache": "^4.2.0",
-        "p-queue": "^6.6.2",
-        "p-retry": "4",
-        "uuid": "^9.0.0",
-        "zod": "^3.22.4",
-        "zod-to-json-schema": "^3.22.3"
-      },
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@langchain/openai": {
-      "version": "0.0.28",
-      "resolved": "https://registry.npmjs.org/@langchain/openai/-/openai-0.0.28.tgz",
-      "integrity": "sha512-2s1RA3/eAnz4ahdzsMPBna9hfAqpFNlWdHiPxVGZ5yrhXsbLWWoPcF+22LCk9t0HJKtazi2GCIWc0HVXH9Abig==",
-      "dependencies": {
-        "@langchain/core": "~0.1.56",
-        "js-tiktoken": "^1.0.7",
-        "openai": "^4.32.1",
-        "zod": "^3.22.4",
-        "zod-to-json-schema": "^3.22.3"
-      },
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@langchain/textsplitters": {
-      "version": "0.0.0",
-      "resolved": "https://registry.npmjs.org/@langchain/textsplitters/-/textsplitters-0.0.0.tgz",
-      "integrity": "sha512-3hPesWomnmVeYMppEGYbyv0v/sRUugUdlFBNn9m1ueJYHAIKbvCErkWxNUH3guyKKYgJVrkvZoQxcd9faucSaw==",
-      "dependencies": {
-        "@langchain/core": "~0.1",
-        "js-tiktoken": "^1.0.11"
-      },
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@sqltools/formatter": {
-      "version": "1.2.5",
-      "resolved": "https://registry.npmjs.org/@sqltools/formatter/-/formatter-1.2.5.tgz",
-      "integrity": "sha512-Uy0+khmZqUrUGm5dmMqVlnvufZRSK0FbYzVgp0UMstm+F5+W2/jnEEQyc9vo1ZR/E5ZI/B1WjjoTqBqwJL6Krw==",
-      "optional": true,
-      "peer": true
-    },
-    "node_modules/@types/node": {
-      "version": "18.16.4",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-18.16.4.tgz",
-      "integrity": "sha512-LUhvPmAKAbgm+p/K11IWszLZVoZDlMF4NRmqbhEzDz/CnCuehPkZXwZbBCKGJsgjnuVejotBwM7B3Scrq4EqDw=="
-    },
-    "node_modules/@types/node-fetch": {
-      "version": "2.6.11",
-      "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.11.tgz",
-      "integrity": "sha512-24xFj9R5+rfQJLRyM56qh+wnVSYhyXC2tkoBndtY0U+vubqNsYXGjufB2nn8Q6gt0LrARwL6UBtMCSVCwl4B1g==",
-      "dependencies": {
-        "@types/node": "*",
-        "form-data": "^4.0.0"
-      }
-    },
-    "node_modules/@types/retry": {
-      "version": "0.12.0",
-      "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz",
-      "integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA=="
-    },
-    "node_modules/@types/uuid": {
-      "version": "9.0.8",
-      "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
-      "integrity": "sha512-jg+97EGIcY9AGHJJRaaPVgetKDsrTgbRjQ5Msgjh/DQKEFl0DtyRr/VCOyD1T2R1MNeWPK/u7JoGhlDZnKBAfA=="
-    },
-    "node_modules/abort-controller": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
-      "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==",
-      "dependencies": {
-        "event-target-shim": "^5.0.0"
-      },
-      "engines": {
-        "node": ">=6.5"
-      }
-    },
-    "node_modules/agentkeepalive": {
-      "version": "4.5.0",
-      "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.5.0.tgz",
-      "integrity": "sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==",
-      "dependencies": {
-        "humanize-ms": "^1.2.1"
-      },
-      "engines": {
-        "node": ">= 8.0.0"
-      }
-    },
-    "node_modules/ansi-regex": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
-      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
-      "optional": true,
-      "peer": true,
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/ansi-styles": {
-      "version": "5.2.0",
-      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
-      "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
-      }
-    },
-    "node_modules/any-promise": {
-      "version": "1.3.0",
-      "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz",
-      "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==",
-      "optional": true,
-      "peer": true
-    },
-    "node_modules/app-root-path": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/app-root-path/-/app-root-path-3.1.0.tgz",
-      "integrity": "sha512-biN3PwB2gUtjaYy/isrU3aNWI5w+fAfvHkSvCKeQGxhmYpwKFUxudR3Yya+KqVRHBmEDYh+/lTozYCFbmzX4nA==",
-      "optional": true,
-      "peer": true,
-      "engines": {
-        "node": ">= 6.0.0"
-      }
-    },
-    "node_modules/argparse": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
-      "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="
-    },
-    "node_modules/asynckit": {
-      "version": "0.4.0",
-      "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
-      "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
-    },
-    "node_modules/balanced-match": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
-      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
-      "optional": true,
-      "peer": true
-    },
-    "node_modules/base-64": {
-      "version": "0.1.0",
-      "resolved": "https://registry.npmjs.org/base-64/-/base-64-0.1.0.tgz",
-      "integrity": "sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA=="
-    },
-    "node_modules/base64-js": {
-      "version": "1.5.1",
-      "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
-      "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ]
-    },
-    "node_modules/binary-extensions": {
-      "version": "2.2.0",
-      "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz",
-      "integrity": "sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==",
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/binary-search": {
-      "version": "1.3.6",
-      "resolved": "https://registry.npmjs.org/binary-search/-/binary-search-1.3.6.tgz",
-      "integrity": "sha512-nbE1WxOTTrUWIfsfZ4aHGYu5DOuNkbxGokjV6Z2kxfJK3uaAb8zNK1muzOeipoLHZjInT4Br88BHpzevc681xA=="
-    },
-    "node_modules/brace-expansion": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
-      "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "balanced-match": "^1.0.0"
-      }
-    },
-    "node_modules/buffer": {
-      "version": "6.0.3",
-      "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz",
-      "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "base64-js": "^1.3.1",
-        "ieee754": "^1.2.1"
-      }
-    },
-    "node_modules/camelcase": {
-      "version": "6.3.0",
-      "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz",
-      "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==",
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/chalk": {
-      "version": "4.1.2",
-      "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
-      "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "ansi-styles": "^4.1.0",
-        "supports-color": "^7.1.0"
-      },
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/chalk?sponsor=1"
-      }
-    },
-    "node_modules/chalk/node_modules/ansi-styles": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
-      "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "color-convert": "^2.0.1"
-      },
-      "engines": {
-        "node": ">=8"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
-      }
-    },
-    "node_modules/charenc": {
-      "version": "0.0.2",
-      "resolved": "https://registry.npmjs.org/charenc/-/charenc-0.0.2.tgz",
-      "integrity": "sha512-yrLQ/yVUFXkzg7EDQsPieE/53+0RlaWTs+wBrvW36cyilJ2SaDWfl4Yj7MtLTXleV9uEKefbAGUPv2/iWSooRA==",
-      "engines": {
-        "node": "*"
-      }
-    },
-    "node_modules/cli-highlight": {
-      "version": "2.1.11",
-      "resolved": "https://registry.npmjs.org/cli-highlight/-/cli-highlight-2.1.11.tgz",
-      "integrity": "sha512-9KDcoEVwyUXrjcJNvHD0NFc/hiwe/WPVYIleQh2O1N2Zro5gWJZ/K+3DGn8w8P/F6FxOgzyC5bxDyHIgCSPhGg==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "chalk": "^4.0.0",
-        "highlight.js": "^10.7.1",
-        "mz": "^2.4.0",
-        "parse5": "^5.1.1",
-        "parse5-htmlparser2-tree-adapter": "^6.0.0",
-        "yargs": "^16.0.0"
-      },
-      "bin": {
-        "highlight": "bin/highlight"
-      },
-      "engines": {
-        "node": ">=8.0.0",
-        "npm": ">=5.0.0"
-      }
-    },
-    "node_modules/cli-highlight/node_modules/cliui": {
-      "version": "7.0.4",
-      "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz",
-      "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "string-width": "^4.2.0",
-        "strip-ansi": "^6.0.0",
-        "wrap-ansi": "^7.0.0"
-      }
-    },
-    "node_modules/cli-highlight/node_modules/yargs": {
-      "version": "16.2.0",
-      "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz",
-      "integrity": "sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "cliui": "^7.0.2",
-        "escalade": "^3.1.1",
-        "get-caller-file": "^2.0.5",
-        "require-directory": "^2.1.1",
-        "string-width": "^4.2.0",
-        "y18n": "^5.0.5",
-        "yargs-parser": "^20.2.2"
-      },
-      "engines": {
-        "node": ">=10"
-      }
-    },
-    "node_modules/cli-highlight/node_modules/yargs-parser": {
-      "version": "20.2.9",
-      "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.9.tgz",
-      "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==",
-      "optional": true,
-      "peer": true,
-      "engines": {
-        "node": ">=10"
-      }
-    },
-    "node_modules/cliui": {
-      "version": "8.0.1",
-      "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz",
-      "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "string-width": "^4.2.0",
-        "strip-ansi": "^6.0.1",
-        "wrap-ansi": "^7.0.0"
-      },
-      "engines": {
-        "node": ">=12"
-      }
-    },
-    "node_modules/color-convert": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
-      "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "color-name": "~1.1.4"
-      },
-      "engines": {
-        "node": ">=7.0.0"
-      }
-    },
-    "node_modules/color-name": {
-      "version": "1.1.4",
-      "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
-      "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
-      "optional": true,
-      "peer": true
-    },
-    "node_modules/combined-stream": {
-      "version": "1.0.8",
-      "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
-      "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
-      "dependencies": {
-        "delayed-stream": "~1.0.0"
-      },
-      "engines": {
-        "node": ">= 0.8"
-      }
-    },
-    "node_modules/commander": {
-      "version": "10.0.1",
-      "resolved": "https://registry.npmjs.org/commander/-/commander-10.0.1.tgz",
-      "integrity": "sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==",
-      "engines": {
-        "node": ">=14"
-      }
-    },
-    "node_modules/crypt": {
-      "version": "0.0.2",
-      "resolved": "https://registry.npmjs.org/crypt/-/crypt-0.0.2.tgz",
-      "integrity": "sha512-mCxBlsHFYh9C+HVpiEacem8FEBnMXgU9gy4zmNC+SXAZNB/1idgp/aulFJ4FgCi7GPEVbfyng092GqL2k2rmow==",
-      "engines": {
-        "node": "*"
-      }
-    },
-    "node_modules/debug": {
-      "version": "4.3.4",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
-      "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "ms": "2.1.2"
-      },
-      "engines": {
-        "node": ">=6.0"
-      },
-      "peerDependenciesMeta": {
-        "supports-color": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/decamelize": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz",
-      "integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/delayed-stream": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
-      "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
-      "engines": {
-        "node": ">=0.4.0"
-      }
-    },
-    "node_modules/digest-fetch": {
-      "version": "1.3.0",
-      "resolved": "https://registry.npmjs.org/digest-fetch/-/digest-fetch-1.3.0.tgz",
-      "integrity": "sha512-CGJuv6iKNM7QyZlM2T3sPAdZWd/p9zQiRNS9G+9COUCwzWFTs0Xp8NF5iePx7wtvhDykReiRRrSeNb4oMmB8lA==",
-      "dependencies": {
-        "base-64": "^0.1.0",
-        "md5": "^2.3.0"
-      }
-    },
-    "node_modules/dotenv": {
-      "version": "16.0.3",
-      "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.0.3.tgz",
-      "integrity": "sha512-7GO6HghkA5fYG9TYnNxi14/7K9f5occMlp3zXAuSxn7CKCxt9xbNWG7yF8hTCSUchlfWSe3uLmlPfigevRItzQ==",
-      "optional": true,
-      "peer": true,
-      "engines": {
-        "node": ">=12"
-      }
-    },
-    "node_modules/emoji-regex": {
-      "version": "8.0.0",
-      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
-      "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
-      "optional": true,
-      "peer": true
-    },
-    "node_modules/escalade": {
-      "version": "3.1.1",
-      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz",
-      "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==",
-      "optional": true,
-      "peer": true,
-      "engines": {
-        "node": ">=6"
-      }
-    },
-    "node_modules/event-target-shim": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz",
-      "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==",
-      "engines": {
-        "node": ">=6"
-      }
-    },
-    "node_modules/eventemitter3": {
-      "version": "4.0.7",
-      "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz",
-      "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw=="
-    },
-    "node_modules/expr-eval": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/expr-eval/-/expr-eval-2.0.2.tgz",
-      "integrity": "sha512-4EMSHGOPSwAfBiibw3ndnP0AvjDWLsMvGOvWEZ2F96IGk0bIVdjQisOHxReSkE13mHcfbuCiXw+G4y0zv6N8Eg=="
-    },
-    "node_modules/flat": {
-      "version": "5.0.2",
-      "resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz",
-      "integrity": "sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==",
-      "bin": {
-        "flat": "cli.js"
-      }
-    },
-    "node_modules/form-data": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
-      "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
-      "dependencies": {
-        "asynckit": "^0.4.0",
-        "combined-stream": "^1.0.8",
-        "mime-types": "^2.1.12"
-      },
-      "engines": {
-        "node": ">= 6"
-      }
-    },
-    "node_modules/form-data-encoder": {
-      "version": "1.7.2",
-      "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz",
-      "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A=="
-    },
-    "node_modules/formdata-node": {
-      "version": "4.4.1",
-      "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz",
-      "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==",
-      "dependencies": {
-        "node-domexception": "1.0.0",
-        "web-streams-polyfill": "4.0.0-beta.3"
-      },
-      "engines": {
-        "node": ">= 12.20"
-      }
-    },
-    "node_modules/formdata-node/node_modules/web-streams-polyfill": {
-      "version": "4.0.0-beta.3",
-      "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz",
-      "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==",
-      "engines": {
-        "node": ">= 14"
-      }
-    },
-    "node_modules/fs.realpath": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
-      "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==",
-      "optional": true,
-      "peer": true
-    },
-    "node_modules/get-caller-file": {
-      "version": "2.0.5",
-      "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
-      "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
-      "optional": true,
-      "peer": true,
-      "engines": {
-        "node": "6.* || 8.* || >= 10.*"
-      }
-    },
-    "node_modules/glob": {
-      "version": "8.1.0",
-      "resolved": "https://registry.npmjs.org/glob/-/glob-8.1.0.tgz",
-      "integrity": "sha512-r8hpEjiQEYlF2QU0df3dS+nxxSIreXQS1qRhMJM0Q5NDdR386C7jb7Hwwod8Fgiuex+k0GFjgft18yvxm5XoCQ==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "fs.realpath": "^1.0.0",
-        "inflight": "^1.0.4",
-        "inherits": "2",
-        "minimatch": "^5.0.1",
-        "once": "^1.3.0"
-      },
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
-    "node_modules/has-flag": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
-      "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==",
-      "optional": true,
-      "peer": true,
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/highlight.js": {
-      "version": "10.7.3",
-      "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-10.7.3.tgz",
-      "integrity": "sha512-tzcUFauisWKNHaRkN4Wjl/ZA07gENAjFl3J/c480dprkGTg5EQstgaNFqBfUqCq54kZRIEcreTsAgF/m2quD7A==",
-      "optional": true,
-      "peer": true,
-      "engines": {
-        "node": "*"
-      }
-    },
-    "node_modules/humanize-ms": {
-      "version": "1.2.1",
-      "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz",
-      "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==",
-      "dependencies": {
-        "ms": "^2.0.0"
-      }
-    },
-    "node_modules/ieee754": {
-      "version": "1.2.1",
-      "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
-      "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "optional": true,
-      "peer": true
-    },
-    "node_modules/inflight": {
-      "version": "1.0.6",
-      "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
-      "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "once": "^1.3.0",
-        "wrappy": "1"
-      }
-    },
-    "node_modules/inherits": {
-      "version": "2.0.4",
-      "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
-      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
-      "optional": true,
-      "peer": true
-    },
-    "node_modules/is-any-array": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-2.0.1.tgz",
-      "integrity": "sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ=="
-    },
-    "node_modules/is-buffer": {
-      "version": "1.1.6",
-      "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz",
-      "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w=="
-    },
-    "node_modules/is-fullwidth-code-point": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
-      "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
-      "optional": true,
-      "peer": true,
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/js-tiktoken": {
-      "version": "1.0.11",
-      "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.11.tgz",
-      "integrity": "sha512-PajXFLq2vx7/8jllQZ43vzNpAai/0MOVdJjW/UrNyJorNQRTjHrqdGJG/mjHVy7h9M6dW6CaG43eNLMYFkTh6w==",
-      "dependencies": {
-        "base64-js": "^1.5.1"
-      }
-    },
-    "node_modules/js-yaml": {
-      "version": "4.1.0",
-      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
-      "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
-      "dependencies": {
-        "argparse": "^2.0.1"
-      },
-      "bin": {
-        "js-yaml": "bin/js-yaml.js"
-      }
-    },
-    "node_modules/jsonpointer": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/jsonpointer/-/jsonpointer-5.0.1.tgz",
-      "integrity": "sha512-p/nXbhSEcu3pZRdkW1OfJhpsVtW1gd4Wa1fnQc9YLiTfAjn0312eMKimbdIQzuZl9aa9xUGaRlP9T/CJE/ditQ==",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/langchain": {
-      "version": "0.1.36",
-      "resolved": "https://registry.npmjs.org/langchain/-/langchain-0.1.36.tgz",
-      "integrity": "sha512-NTbnCL/jKWIeEI//Nm1oG8nhW3vkYWvEMr1MPotmTThTfeKfO87eV/OAzAyh6Ruy6GFs/qofRgQZGIe6XvXTNQ==",
-      "dependencies": {
-        "@anthropic-ai/sdk": "^0.9.1",
-        "@langchain/community": "~0.0.47",
-        "@langchain/core": "~0.1.60",
-        "@langchain/openai": "~0.0.28",
-        "@langchain/textsplitters": "~0.0.0",
-        "binary-extensions": "^2.2.0",
-        "js-tiktoken": "^1.0.7",
-        "js-yaml": "^4.1.0",
-        "jsonpointer": "^5.0.1",
-        "langchainhub": "~0.0.8",
-        "langsmith": "~0.1.7",
-        "ml-distance": "^4.0.0",
-        "openapi-types": "^12.1.3",
-        "p-retry": "4",
-        "uuid": "^9.0.0",
-        "yaml": "^2.2.1",
-        "zod": "^3.22.4",
-        "zod-to-json-schema": "^3.22.3"
-      },
-      "engines": {
-        "node": ">=18"
-      },
-      "peerDependencies": {
-        "@aws-sdk/client-s3": "^3.310.0",
-        "@aws-sdk/client-sagemaker-runtime": "^3.310.0",
-        "@aws-sdk/client-sfn": "^3.310.0",
-        "@aws-sdk/credential-provider-node": "^3.388.0",
-        "@azure/storage-blob": "^12.15.0",
-        "@gomomento/sdk": "^1.51.1",
-        "@gomomento/sdk-core": "^1.51.1",
-        "@gomomento/sdk-web": "^1.51.1",
-        "@google-ai/generativelanguage": "^0.2.1",
-        "@google-cloud/storage": "^6.10.1 || ^7.7.0",
-        "@mendable/firecrawl-js": "^0.0.13",
-        "@notionhq/client": "^2.2.10",
-        "@pinecone-database/pinecone": "*",
-        "@supabase/supabase-js": "^2.10.0",
-        "@vercel/kv": "^0.2.3",
-        "@xata.io/client": "^0.28.0",
-        "apify-client": "^2.7.1",
-        "assemblyai": "^4.0.0",
-        "axios": "*",
-        "cheerio": "^1.0.0-rc.12",
-        "chromadb": "*",
-        "convex": "^1.3.1",
-        "couchbase": "^4.3.0",
-        "d3-dsv": "^2.0.0",
-        "epub2": "^3.0.1",
-        "fast-xml-parser": "*",
-        "google-auth-library": "^8.9.0",
-        "handlebars": "^4.7.8",
-        "html-to-text": "^9.0.5",
-        "ignore": "^5.2.0",
-        "ioredis": "^5.3.2",
-        "jsdom": "*",
-        "mammoth": "^1.6.0",
-        "mongodb": ">=5.2.0",
-        "node-llama-cpp": "*",
-        "notion-to-md": "^3.1.0",
-        "officeparser": "^4.0.4",
-        "pdf-parse": "1.1.1",
-        "peggy": "^3.0.2",
-        "playwright": "^1.32.1",
-        "puppeteer": "^19.7.2",
-        "pyodide": "^0.24.1",
-        "redis": "^4.6.4",
-        "sonix-speech-recognition": "^2.1.1",
-        "srt-parser-2": "^1.2.3",
-        "typeorm": "^0.3.12",
-        "weaviate-ts-client": "*",
-        "web-auth-library": "^1.0.3",
-        "ws": "^8.14.2",
-        "youtube-transcript": "^1.0.6",
-        "youtubei.js": "^9.1.0"
-      },
-      "peerDependenciesMeta": {
-        "@aws-sdk/client-s3": {
-          "optional": true
-        },
-        "@aws-sdk/client-sagemaker-runtime": {
-          "optional": true
-        },
-        "@aws-sdk/client-sfn": {
-          "optional": true
-        },
-        "@aws-sdk/credential-provider-node": {
-          "optional": true
-        },
-        "@azure/storage-blob": {
-          "optional": true
-        },
-        "@gomomento/sdk": {
-          "optional": true
-        },
-        "@gomomento/sdk-core": {
-          "optional": true
-        },
-        "@gomomento/sdk-web": {
-          "optional": true
-        },
-        "@google-ai/generativelanguage": {
-          "optional": true
-        },
-        "@google-cloud/storage": {
-          "optional": true
-        },
-        "@mendable/firecrawl-js": {
-          "optional": true
-        },
-        "@notionhq/client": {
-          "optional": true
-        },
-        "@pinecone-database/pinecone": {
-          "optional": true
-        },
-        "@supabase/supabase-js": {
-          "optional": true
-        },
-        "@vercel/kv": {
-          "optional": true
-        },
-        "@xata.io/client": {
-          "optional": true
-        },
-        "apify-client": {
-          "optional": true
-        },
-        "assemblyai": {
-          "optional": true
-        },
-        "axios": {
-          "optional": true
-        },
-        "cheerio": {
-          "optional": true
-        },
-        "chromadb": {
-          "optional": true
-        },
-        "convex": {
-          "optional": true
-        },
-        "couchbase": {
-          "optional": true
-        },
-        "d3-dsv": {
-          "optional": true
-        },
-        "epub2": {
-          "optional": true
-        },
-        "faiss-node": {
-          "optional": true
-        },
-        "fast-xml-parser": {
-          "optional": true
-        },
-        "google-auth-library": {
-          "optional": true
-        },
-        "handlebars": {
-          "optional": true
-        },
-        "html-to-text": {
-          "optional": true
-        },
-        "ignore": {
-          "optional": true
-        },
-        "ioredis": {
-          "optional": true
-        },
-        "jsdom": {
-          "optional": true
-        },
-        "mammoth": {
-          "optional": true
-        },
-        "mongodb": {
-          "optional": true
-        },
-        "node-llama-cpp": {
-          "optional": true
-        },
-        "notion-to-md": {
-          "optional": true
-        },
-        "officeparser": {
-          "optional": true
-        },
-        "pdf-parse": {
-          "optional": true
-        },
-        "peggy": {
-          "optional": true
-        },
-        "playwright": {
-          "optional": true
-        },
-        "puppeteer": {
-          "optional": true
-        },
-        "pyodide": {
-          "optional": true
-        },
-        "redis": {
-          "optional": true
-        },
-        "sonix-speech-recognition": {
-          "optional": true
-        },
-        "srt-parser-2": {
-          "optional": true
-        },
-        "typeorm": {
-          "optional": true
-        },
-        "weaviate-ts-client": {
-          "optional": true
-        },
-        "web-auth-library": {
-          "optional": true
-        },
-        "ws": {
-          "optional": true
-        },
-        "youtube-transcript": {
-          "optional": true
-        },
-        "youtubei.js": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/langchainhub": {
-      "version": "0.0.8",
-      "resolved": "https://registry.npmjs.org/langchainhub/-/langchainhub-0.0.8.tgz",
-      "integrity": "sha512-Woyb8YDHgqqTOZvWIbm2CaFDGfZ4NTSyXV687AG4vXEfoNo7cGQp7nhl7wL3ehenKWmNEmcxCLgOZzW8jE6lOQ=="
-    },
-    "node_modules/langsmith": {
-      "version": "0.1.18",
-      "resolved": "https://registry.npmjs.org/langsmith/-/langsmith-0.1.18.tgz",
-      "integrity": "sha512-LHk0aIFAl3/iiKvUzAiM8Xdm13bRO70XERQeHCF99fL2X815Jc47nxu6m7usSuQC8sw6rirCKZbGm18cqdUEzA==",
-      "dependencies": {
-        "@types/uuid": "^9.0.1",
-        "commander": "^10.0.1",
-        "p-queue": "^6.6.2",
-        "p-retry": "4",
-        "uuid": "^9.0.0"
-      }
-    },
-    "node_modules/md5": {
-      "version": "2.3.0",
-      "resolved": "https://registry.npmjs.org/md5/-/md5-2.3.0.tgz",
-      "integrity": "sha512-T1GITYmFaKuO91vxyoQMFETst+O71VUPEU3ze5GNzDm0OWdP8v1ziTaAEPUr/3kLsY3Sftgz242A1SetQiDL7g==",
-      "dependencies": {
-        "charenc": "0.0.2",
-        "crypt": "0.0.2",
-        "is-buffer": "~1.1.6"
-      }
-    },
-    "node_modules/mime-db": {
-      "version": "1.52.0",
-      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
-      "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
-      "engines": {
-        "node": ">= 0.6"
-      }
-    },
-    "node_modules/mime-types": {
-      "version": "2.1.35",
-      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
-      "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
-      "dependencies": {
-        "mime-db": "1.52.0"
-      },
-      "engines": {
-        "node": ">= 0.6"
-      }
-    },
-    "node_modules/minimatch": {
-      "version": "5.1.6",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz",
-      "integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "brace-expansion": "^2.0.1"
-      },
-      "engines": {
-        "node": ">=10"
-      }
-    },
-    "node_modules/mkdirp": {
-      "version": "2.1.6",
-      "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-2.1.6.tgz",
-      "integrity": "sha512-+hEnITedc8LAtIP9u3HJDFIdcLV2vXP33sqLLIzkv1Db1zO/1OxbvYf0Y1OC/S/Qo5dxHXepofhmxL02PsKe+A==",
-      "optional": true,
-      "peer": true,
-      "bin": {
-        "mkdirp": "dist/cjs/src/bin.js"
-      },
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
-    "node_modules/ml-array-mean": {
-      "version": "1.1.6",
-      "resolved": "https://registry.npmjs.org/ml-array-mean/-/ml-array-mean-1.1.6.tgz",
-      "integrity": "sha512-MIdf7Zc8HznwIisyiJGRH9tRigg3Yf4FldW8DxKxpCCv/g5CafTw0RRu51nojVEOXuCQC7DRVVu5c7XXO/5joQ==",
-      "dependencies": {
-        "ml-array-sum": "^1.1.6"
-      }
-    },
-    "node_modules/ml-array-sum": {
-      "version": "1.1.6",
-      "resolved": "https://registry.npmjs.org/ml-array-sum/-/ml-array-sum-1.1.6.tgz",
-      "integrity": "sha512-29mAh2GwH7ZmiRnup4UyibQZB9+ZLyMShvt4cH4eTK+cL2oEMIZFnSyB3SS8MlsTh6q/w/yh48KmqLxmovN4Dw==",
-      "dependencies": {
-        "is-any-array": "^2.0.0"
-      }
-    },
-    "node_modules/ml-distance": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/ml-distance/-/ml-distance-4.0.0.tgz",
-      "integrity": "sha512-zj7+UGZpHk3uL7n79XTfGNUjIGnhLn8xVvrxYvBHvXFxo3jq1q+/UjP311hZxnLVhbxbXCjUniThX8gozjacYA==",
-      "dependencies": {
-        "ml-array-mean": "^1.1.6",
-        "ml-distance-euclidean": "^2.0.0",
-        "ml-tree-similarity": "^1.0.0"
-      }
-    },
-    "node_modules/ml-distance-euclidean": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/ml-distance-euclidean/-/ml-distance-euclidean-2.0.0.tgz",
-      "integrity": "sha512-yC9/2o8QF0A3m/0IXqCTXCzz2pNEzvmcE/9HFKOZGnTjatvBbsn4lWYJkxENkA4Ug2fnYl7PXQxnPi21sgMy/Q=="
-    },
-    "node_modules/ml-tree-similarity": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/ml-tree-similarity/-/ml-tree-similarity-1.0.0.tgz",
-      "integrity": "sha512-XJUyYqjSuUQkNQHMscr6tcjldsOoAekxADTplt40QKfwW6nd++1wHWV9AArl0Zvw/TIHgNaZZNvr8QGvE8wLRg==",
-      "dependencies": {
-        "binary-search": "^1.3.5",
-        "num-sort": "^2.0.0"
-      }
-    },
-    "node_modules/ms": {
-      "version": "2.1.2",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
-      "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
-    },
-    "node_modules/mustache": {
-      "version": "4.2.0",
-      "resolved": "https://registry.npmjs.org/mustache/-/mustache-4.2.0.tgz",
-      "integrity": "sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ==",
-      "bin": {
-        "mustache": "bin/mustache"
-      }
-    },
-    "node_modules/mz": {
-      "version": "2.7.0",
-      "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz",
-      "integrity": "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "any-promise": "^1.0.0",
-        "object-assign": "^4.0.1",
-        "thenify-all": "^1.0.0"
-      }
-    },
-    "node_modules/node-domexception": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
-      "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/jimmywarting"
-        },
-        {
-          "type": "github",
-          "url": "https://paypal.me/jimmywarting"
-        }
-      ],
-      "engines": {
-        "node": ">=10.5.0"
-      }
-    },
-    "node_modules/node-fetch": {
-      "version": "2.7.0",
-      "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
-      "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
-      "dependencies": {
-        "whatwg-url": "^5.0.0"
-      },
-      "engines": {
-        "node": "4.x || >=6.0.0"
-      },
-      "peerDependencies": {
-        "encoding": "^0.1.0"
-      },
-      "peerDependenciesMeta": {
-        "encoding": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/num-sort": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/num-sort/-/num-sort-2.1.0.tgz",
-      "integrity": "sha512-1MQz1Ed8z2yckoBeSfkQHHO9K1yDRxxtotKSJ9yvcTUUxSvfvzEq5GwBrjjHEpMlq/k5gvXdmJ1SbYxWtpNoVg==",
-      "engines": {
-        "node": ">=8"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/object-assign": {
-      "version": "4.1.1",
-      "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
-      "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
-      "optional": true,
-      "peer": true,
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/once": {
-      "version": "1.4.0",
-      "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
-      "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "wrappy": "1"
-      }
-    },
-    "node_modules/openai": {
-      "version": "4.38.5",
-      "resolved": "https://registry.npmjs.org/openai/-/openai-4.38.5.tgz",
-      "integrity": "sha512-Ym5GJL98ZhLJJ7enBx53jjG3vwN/fsB+Ozh46nnRZZS9W1NiYqbwkJ+sXd3dkCIiWIgcyyOPL2Zr8SQAzbpj3g==",
-      "dependencies": {
-        "@types/node": "^18.11.18",
-        "@types/node-fetch": "^2.6.4",
-        "abort-controller": "^3.0.0",
-        "agentkeepalive": "^4.2.1",
-        "form-data-encoder": "1.7.2",
-        "formdata-node": "^4.3.2",
-        "node-fetch": "^2.6.7",
-        "web-streams-polyfill": "^3.2.1"
-      },
-      "bin": {
-        "openai": "bin/cli"
-      }
-    },
-    "node_modules/openapi-types": {
-      "version": "12.1.3",
-      "resolved": "https://registry.npmjs.org/openapi-types/-/openapi-types-12.1.3.tgz",
-      "integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw=="
-    },
-    "node_modules/p-finally": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz",
-      "integrity": "sha512-LICb2p9CB7FS+0eR1oqWnHhp0FljGLZCWBE9aix0Uye9W8LTQPwMTYVGWQWIw9RdQiDg4+epXQODwIYJtSJaow==",
-      "engines": {
-        "node": ">=4"
-      }
-    },
-    "node_modules/p-queue": {
-      "version": "6.6.2",
-      "resolved": "https://registry.npmjs.org/p-queue/-/p-queue-6.6.2.tgz",
-      "integrity": "sha512-RwFpb72c/BhQLEXIZ5K2e+AhgNVmIejGlTgiB9MzZ0e93GRvqZ7uSi0dvRF7/XIXDeNkra2fNHBxTyPDGySpjQ==",
-      "dependencies": {
-        "eventemitter3": "^4.0.4",
-        "p-timeout": "^3.2.0"
-      },
-      "engines": {
-        "node": ">=8"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/p-retry": {
-      "version": "4.6.2",
-      "resolved": "https://registry.npmjs.org/p-retry/-/p-retry-4.6.2.tgz",
-      "integrity": "sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==",
-      "dependencies": {
-        "@types/retry": "0.12.0",
-        "retry": "^0.13.1"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/p-timeout": {
-      "version": "3.2.0",
-      "resolved": "https://registry.npmjs.org/p-timeout/-/p-timeout-3.2.0.tgz",
-      "integrity": "sha512-rhIwUycgwwKcP9yTOOFK/AKsAopjjCakVqLHePO3CC6Mir1Z99xT+R63jZxAT5lFZLa2inS5h+ZS2GvR99/FBg==",
-      "dependencies": {
-        "p-finally": "^1.0.0"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/parse5": {
-      "version": "5.1.1",
-      "resolved": "https://registry.npmjs.org/parse5/-/parse5-5.1.1.tgz",
-      "integrity": "sha512-ugq4DFI0Ptb+WWjAdOK16+u/nHfiIrcE+sh8kZMaM0WllQKLI9rOUq6c2b7cwPkXdzfQESqvoqK6ug7U/Yyzug==",
-      "optional": true,
-      "peer": true
-    },
-    "node_modules/parse5-htmlparser2-tree-adapter": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-6.0.1.tgz",
-      "integrity": "sha512-qPuWvbLgvDGilKc5BoicRovlT4MtYT6JfJyBOMDsKoiT+GiuP5qyrPCnR9HcPECIJJmZh5jRndyNThnhhb/vlA==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "parse5": "^6.0.1"
-      }
-    },
-    "node_modules/parse5-htmlparser2-tree-adapter/node_modules/parse5": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz",
-      "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==",
-      "optional": true,
-      "peer": true
-    },
-    "node_modules/reflect-metadata": {
-      "version": "0.1.13",
-      "resolved": "https://registry.npmjs.org/reflect-metadata/-/reflect-metadata-0.1.13.tgz",
-      "integrity": "sha512-Ts1Y/anZELhSsjMcU605fU9RE4Oi3p5ORujwbIKXfWa+0Zxs510Qrmrce5/Jowq3cHSZSJqBjypxmHarc+vEWg==",
-      "optional": true,
-      "peer": true
-    },
-    "node_modules/require-directory": {
-      "version": "2.1.1",
-      "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
-      "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
-      "optional": true,
-      "peer": true,
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/retry": {
-      "version": "0.13.1",
-      "resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz",
-      "integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==",
-      "engines": {
-        "node": ">= 4"
-      }
-    },
-    "node_modules/safe-buffer": {
-      "version": "5.2.1",
-      "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
-      "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "optional": true,
-      "peer": true
-    },
-    "node_modules/sha.js": {
-      "version": "2.4.11",
-      "resolved": "https://registry.npmjs.org/sha.js/-/sha.js-2.4.11.tgz",
-      "integrity": "sha512-QMEp5B7cftE7APOjk5Y6xgrbWu+WkLVQwk8JNjZ8nKRciZaByEW6MubieAiToS7+dwvrjGhH8jRXz3MVd0AYqQ==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "inherits": "^2.0.1",
-        "safe-buffer": "^5.0.1"
-      },
-      "bin": {
-        "sha.js": "bin.js"
-      }
-    },
-    "node_modules/string-width": {
-      "version": "4.2.3",
-      "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
-      "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "emoji-regex": "^8.0.0",
-        "is-fullwidth-code-point": "^3.0.0",
-        "strip-ansi": "^6.0.1"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/strip-ansi": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
-      "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "ansi-regex": "^5.0.1"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/supports-color": {
-      "version": "7.2.0",
-      "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
-      "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "has-flag": "^4.0.0"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/thenify": {
-      "version": "3.3.1",
-      "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz",
-      "integrity": "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "any-promise": "^1.0.0"
-      }
-    },
-    "node_modules/thenify-all": {
-      "version": "1.6.0",
-      "resolved": "https://registry.npmjs.org/thenify-all/-/thenify-all-1.6.0.tgz",
-      "integrity": "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "thenify": ">= 3.1.0 < 4"
-      },
-      "engines": {
-        "node": ">=0.8"
-      }
-    },
-    "node_modules/tr46": {
-      "version": "0.0.3",
-      "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
-      "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="
-    },
-    "node_modules/tslib": {
-      "version": "2.5.0",
-      "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz",
-      "integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg==",
-      "optional": true,
-      "peer": true
-    },
-    "node_modules/typeorm": {
-      "version": "0.3.15",
-      "resolved": "https://registry.npmjs.org/typeorm/-/typeorm-0.3.15.tgz",
-      "integrity": "sha512-R4JSw8QjDP1W+ypeRz/XrCXIqubrLSnNAzJAp9EQSQIPHTv+YmUHZis8g08lOwFpuhqL9m8jkPSz8GWEKlU/ow==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "@sqltools/formatter": "^1.2.5",
-        "app-root-path": "^3.1.0",
-        "buffer": "^6.0.3",
-        "chalk": "^4.1.2",
-        "cli-highlight": "^2.1.11",
-        "debug": "^4.3.4",
-        "dotenv": "^16.0.3",
-        "glob": "^8.1.0",
-        "mkdirp": "^2.1.3",
-        "reflect-metadata": "^0.1.13",
-        "sha.js": "^2.4.11",
-        "tslib": "^2.5.0",
-        "uuid": "^9.0.0",
-        "yargs": "^17.6.2"
-      },
-      "bin": {
-        "typeorm": "cli.js",
-        "typeorm-ts-node-commonjs": "cli-ts-node-commonjs.js",
-        "typeorm-ts-node-esm": "cli-ts-node-esm.js"
-      },
-      "engines": {
-        "node": ">= 12.9.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/typeorm"
-      },
-      "peerDependencies": {
-        "@google-cloud/spanner": "^5.18.0",
-        "@sap/hana-client": "^2.12.25",
-        "better-sqlite3": "^7.1.2 || ^8.0.0",
-        "hdb-pool": "^0.1.6",
-        "ioredis": "^5.0.4",
-        "mongodb": "^5.2.0",
-        "mssql": "^9.1.1",
-        "mysql2": "^2.2.5 || ^3.0.1",
-        "oracledb": "^5.1.0",
-        "pg": "^8.5.1",
-        "pg-native": "^3.0.0",
-        "pg-query-stream": "^4.0.0",
-        "redis": "^3.1.1 || ^4.0.0",
-        "sql.js": "^1.4.0",
-        "sqlite3": "^5.0.3",
-        "ts-node": "^10.7.0",
-        "typeorm-aurora-data-api-driver": "^2.0.0"
-      },
-      "peerDependenciesMeta": {
-        "@google-cloud/spanner": {
-          "optional": true
-        },
-        "@sap/hana-client": {
-          "optional": true
-        },
-        "better-sqlite3": {
-          "optional": true
-        },
-        "hdb-pool": {
-          "optional": true
-        },
-        "ioredis": {
-          "optional": true
-        },
-        "mongodb": {
-          "optional": true
-        },
-        "mssql": {
-          "optional": true
-        },
-        "mysql2": {
-          "optional": true
-        },
-        "oracledb": {
-          "optional": true
-        },
-        "pg": {
-          "optional": true
-        },
-        "pg-native": {
-          "optional": true
-        },
-        "pg-query-stream": {
-          "optional": true
-        },
-        "redis": {
-          "optional": true
-        },
-        "sql.js": {
-          "optional": true
-        },
-        "sqlite3": {
-          "optional": true
-        },
-        "ts-node": {
-          "optional": true
-        },
-        "typeorm-aurora-data-api-driver": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/typescript": {
-      "version": "5.0.4",
-      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.0.4.tgz",
-      "integrity": "sha512-cW9T5W9xY37cc+jfEnaUvX91foxtHkza3Nw3wkoF4sSlKn0MONdkdEndig/qPBWXNkmplh3NzayQzCiHM4/hqw==",
-      "dev": true,
-      "bin": {
-        "tsc": "bin/tsc",
-        "tsserver": "bin/tsserver"
-      },
-      "engines": {
-        "node": ">=12.20"
-      }
-    },
-    "node_modules/uuid": {
-      "version": "9.0.0",
-      "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz",
-      "integrity": "sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg==",
-      "bin": {
-        "uuid": "dist/bin/uuid"
-      }
-    },
-    "node_modules/web-streams-polyfill": {
-      "version": "3.3.3",
-      "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz",
-      "integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==",
-      "engines": {
-        "node": ">= 8"
-      }
-    },
-    "node_modules/webidl-conversions": {
-      "version": "3.0.1",
-      "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
-      "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="
-    },
-    "node_modules/whatwg-url": {
-      "version": "5.0.0",
-      "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
-      "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
-      "dependencies": {
-        "tr46": "~0.0.3",
-        "webidl-conversions": "^3.0.0"
-      }
-    },
-    "node_modules/wrap-ansi": {
-      "version": "7.0.0",
-      "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
-      "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "ansi-styles": "^4.0.0",
-        "string-width": "^4.1.0",
-        "strip-ansi": "^6.0.0"
-      },
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
-      }
-    },
-    "node_modules/wrap-ansi/node_modules/ansi-styles": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
-      "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "color-convert": "^2.0.1"
-      },
-      "engines": {
-        "node": ">=8"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
-      }
-    },
-    "node_modules/wrappy": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
-      "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
-      "optional": true,
-      "peer": true
-    },
-    "node_modules/y18n": {
-      "version": "5.0.8",
-      "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
-      "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==",
-      "optional": true,
-      "peer": true,
-      "engines": {
-        "node": ">=10"
-      }
-    },
-    "node_modules/yaml": {
-      "version": "2.2.2",
-      "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.2.2.tgz",
-      "integrity": "sha512-CBKFWExMn46Foo4cldiChEzn7S7SRV+wqiluAb6xmueD/fGyRHIhX8m14vVGgeFWjN540nKCNVj6P21eQjgTuA==",
-      "engines": {
-        "node": ">= 14"
-      }
-    },
-    "node_modules/yargs": {
-      "version": "17.7.2",
-      "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
-      "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==",
-      "optional": true,
-      "peer": true,
-      "dependencies": {
-        "cliui": "^8.0.1",
-        "escalade": "^3.1.1",
-        "get-caller-file": "^2.0.5",
-        "require-directory": "^2.1.1",
-        "string-width": "^4.2.3",
-        "y18n": "^5.0.5",
-        "yargs-parser": "^21.1.1"
-      },
-      "engines": {
-        "node": ">=12"
-      }
-    },
-    "node_modules/yargs-parser": {
-      "version": "21.1.1",
-      "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz",
-      "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==",
-      "optional": true,
-      "peer": true,
-      "engines": {
-        "node": ">=12"
-      }
-    },
-    "node_modules/zod": {
-      "version": "3.23.4",
-      "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.4.tgz",
-      "integrity": "sha512-/AtWOKbBgjzEYYQRNfoGKHObgfAZag6qUJX1VbHo2PRBgS+wfWagEY2mizjfyAPcGesrJOcx/wcl0L9WnVrHFw==",
-      "funding": {
-        "url": "https://github.com/sponsors/colinhacks"
-      }
-    },
-    "node_modules/zod-to-json-schema": {
-      "version": "3.23.0",
-      "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.0.tgz",
-      "integrity": "sha512-az0uJ243PxsRIa2x1WmNE/pnuA05gUq/JB8Lwe1EDCCL/Fz9MgjYQ0fPlyc2Tcv6aF2ZA7WM5TWaRZVEFaAIag==",
-      "peerDependencies": {
-        "zod": "^3.23.3"
-      }
-    }
-  },
-  "dependencies": {
-    "@anthropic-ai/sdk": {
-      "version": "0.9.1",
-      "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.9.1.tgz",
-      "integrity": "sha512-wa1meQ2WSfoY8Uor3EdrJq0jTiZJoKoSii2ZVWRY1oN4Tlr5s59pADg9T79FTbPe1/se5c3pBeZgJL63wmuoBA==",
-      "requires": {
-        "@types/node": "^18.11.18",
-        "@types/node-fetch": "^2.6.4",
-        "abort-controller": "^3.0.0",
-        "agentkeepalive": "^4.2.1",
-        "digest-fetch": "^1.3.0",
-        "form-data-encoder": "1.7.2",
-        "formdata-node": "^4.3.2",
-        "node-fetch": "^2.6.7",
-        "web-streams-polyfill": "^3.2.1"
-      }
-    },
-    "@langchain/community": {
-      "version": "0.0.52",
-      "resolved": "https://registry.npmjs.org/@langchain/community/-/community-0.0.52.tgz",
-      "integrity": "sha512-L+IMAAaLNP7++4HhdvuVJegc8bdw8WP77Jvp98YcySFZTZWH1yasSQSlFn3jgBk+3xLBsudpTZuttKTrZ/TtVQ==",
-      "requires": {
-        "@langchain/core": "0.1.5",
-        "@langchain/openai": "~0.0.28",
-        "expr-eval": "^2.0.2",
-        "flat": "^5.0.2",
-        "langsmith": "~0.1.1",
-        "uuid": "^9.0.0",
-        "zod": "^3.22.3",
-        "zod-to-json-schema": "^3.22.5"
-      }
-    },
-    "@langchain/core": {
-      "version": "0.1.60",
-      "resolved": "https://registry.npmjs.org/@langchain/core/-/core-0.1.60.tgz",
-      "integrity": "sha512-3EJW4ir0tFe17AakpXCgO9flSoDjFELpSQs2w/CMZ5FBlHYxo3ODgVQAZvlHy97khEVgcnvlL3EDhPE7IdNibA==",
-      "requires": {
-        "ansi-styles": "^5.0.0",
-        "camelcase": "6",
-        "decamelize": "1.2.0",
-        "js-tiktoken": "^1.0.8",
-        "langsmith": "~0.1.7",
-        "ml-distance": "^4.0.0",
-        "mustache": "^4.2.0",
-        "p-queue": "^6.6.2",
-        "p-retry": "4",
-        "uuid": "^9.0.0",
-        "zod": "^3.22.4",
-        "zod-to-json-schema": "^3.22.3"
-      }
-    },
-    "@langchain/openai": {
-      "version": "0.0.28",
-      "resolved": "https://registry.npmjs.org/@langchain/openai/-/openai-0.0.28.tgz",
-      "integrity": "sha512-2s1RA3/eAnz4ahdzsMPBna9hfAqpFNlWdHiPxVGZ5yrhXsbLWWoPcF+22LCk9t0HJKtazi2GCIWc0HVXH9Abig==",
-      "requires": {
-        "@langchain/core": "0.1.5",
-        "js-tiktoken": "^1.0.7",
-        "openai": "^4.32.1",
-        "zod": "^3.22.4",
-        "zod-to-json-schema": "^3.22.3"
-      }
-    },
-    "@langchain/textsplitters": {
-      "version": "0.0.0",
-      "resolved": "https://registry.npmjs.org/@langchain/textsplitters/-/textsplitters-0.0.0.tgz",
-      "integrity": "sha512-3hPesWomnmVeYMppEGYbyv0v/sRUugUdlFBNn9m1ueJYHAIKbvCErkWxNUH3guyKKYgJVrkvZoQxcd9faucSaw==",
-      "requires": {
-        "@langchain/core": "~0.1",
-        "js-tiktoken": "^1.0.11"
-      }
-    },
-    "@sqltools/formatter": {
-      "version": "1.2.5",
-      "resolved": "https://registry.npmjs.org/@sqltools/formatter/-/formatter-1.2.5.tgz",
-      "integrity": "sha512-Uy0+khmZqUrUGm5dmMqVlnvufZRSK0FbYzVgp0UMstm+F5+W2/jnEEQyc9vo1ZR/E5ZI/B1WjjoTqBqwJL6Krw==",
-      "optional": true,
-      "peer": true
-    },
-    "@types/node": {
-      "version": "18.16.4",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-18.16.4.tgz",
-      "integrity": "sha512-LUhvPmAKAbgm+p/K11IWszLZVoZDlMF4NRmqbhEzDz/CnCuehPkZXwZbBCKGJsgjnuVejotBwM7B3Scrq4EqDw=="
-    },
-    "@types/node-fetch": {
-      "version": "2.6.11",
-      "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.11.tgz",
-      "integrity": "sha512-24xFj9R5+rfQJLRyM56qh+wnVSYhyXC2tkoBndtY0U+vubqNsYXGjufB2nn8Q6gt0LrARwL6UBtMCSVCwl4B1g==",
-      "requires": {
-        "@types/node": "*",
-        "form-data": "^4.0.0"
-      }
-    },
-    "@types/retry": {
-      "version": "0.12.0",
-      "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz",
-      "integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA=="
-    },
-    "@types/uuid": {
-      "version": "9.0.8",
-      "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
-      "integrity": "sha512-jg+97EGIcY9AGHJJRaaPVgetKDsrTgbRjQ5Msgjh/DQKEFl0DtyRr/VCOyD1T2R1MNeWPK/u7JoGhlDZnKBAfA=="
-    },
-    "abort-controller": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
-      "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==",
-      "requires": {
-        "event-target-shim": "^5.0.0"
-      }
-    },
-    "agentkeepalive": {
-      "version": "4.5.0",
-      "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.5.0.tgz",
-      "integrity": "sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==",
-      "requires": {
-        "humanize-ms": "^1.2.1"
-      }
-    },
-    "ansi-regex": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
-      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
-      "optional": true,
-      "peer": true
-    },
-    "ansi-styles": {
-      "version": "5.2.0",
-      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
-      "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA=="
-    },
-    "any-promise": {
-      "version": "1.3.0",
-      "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz",
-      "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==",
-      "optional": true,
-      "peer": true
-    },
-    "app-root-path": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/app-root-path/-/app-root-path-3.1.0.tgz",
-      "integrity": "sha512-biN3PwB2gUtjaYy/isrU3aNWI5w+fAfvHkSvCKeQGxhmYpwKFUxudR3Yya+KqVRHBmEDYh+/lTozYCFbmzX4nA==",
-      "optional": true,
-      "peer": true
-    },
-    "argparse": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
-      "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="
-    },
-    "asynckit": {
-      "version": "0.4.0",
-      "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
-      "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
-    },
-    "balanced-match": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
-      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
-      "optional": true,
-      "peer": true
-    },
-    "base-64": {
-      "version": "0.1.0",
-      "resolved": "https://registry.npmjs.org/base-64/-/base-64-0.1.0.tgz",
-      "integrity": "sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA=="
-    },
-    "base64-js": {
-      "version": "1.5.1",
-      "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
-      "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA=="
-    },
-    "binary-extensions": {
-      "version": "2.2.0",
-      "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz",
-      "integrity": "sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA=="
-    },
-    "binary-search": {
-      "version": "1.3.6",
-      "resolved": "https://registry.npmjs.org/binary-search/-/binary-search-1.3.6.tgz",
-      "integrity": "sha512-nbE1WxOTTrUWIfsfZ4aHGYu5DOuNkbxGokjV6Z2kxfJK3uaAb8zNK1muzOeipoLHZjInT4Br88BHpzevc681xA=="
-    },
-    "brace-expansion": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
-      "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "balanced-match": "^1.0.0"
-      }
-    },
-    "buffer": {
-      "version": "6.0.3",
-      "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz",
-      "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "base64-js": "^1.3.1",
-        "ieee754": "^1.2.1"
-      }
-    },
-    "camelcase": {
-      "version": "6.3.0",
-      "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz",
-      "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA=="
-    },
-    "chalk": {
-      "version": "4.1.2",
-      "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
-      "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "ansi-styles": "^4.1.0",
-        "supports-color": "^7.1.0"
-      },
-      "dependencies": {
-        "ansi-styles": {
-          "version": "4.3.0",
-          "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
-          "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
-          "optional": true,
-          "peer": true,
-          "requires": {
-            "color-convert": "^2.0.1"
-          }
-        }
-      }
-    },
-    "charenc": {
-      "version": "0.0.2",
-      "resolved": "https://registry.npmjs.org/charenc/-/charenc-0.0.2.tgz",
-      "integrity": "sha512-yrLQ/yVUFXkzg7EDQsPieE/53+0RlaWTs+wBrvW36cyilJ2SaDWfl4Yj7MtLTXleV9uEKefbAGUPv2/iWSooRA=="
-    },
-    "cli-highlight": {
-      "version": "2.1.11",
-      "resolved": "https://registry.npmjs.org/cli-highlight/-/cli-highlight-2.1.11.tgz",
-      "integrity": "sha512-9KDcoEVwyUXrjcJNvHD0NFc/hiwe/WPVYIleQh2O1N2Zro5gWJZ/K+3DGn8w8P/F6FxOgzyC5bxDyHIgCSPhGg==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "chalk": "^4.0.0",
-        "highlight.js": "^10.7.1",
-        "mz": "^2.4.0",
-        "parse5": "^5.1.1",
-        "parse5-htmlparser2-tree-adapter": "^6.0.0",
-        "yargs": "^16.0.0"
-      },
-      "dependencies": {
-        "cliui": {
-          "version": "7.0.4",
-          "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz",
-          "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==",
-          "optional": true,
-          "peer": true,
-          "requires": {
-            "string-width": "^4.2.0",
-            "strip-ansi": "^6.0.0",
-            "wrap-ansi": "^7.0.0"
-          }
-        },
-        "yargs": {
-          "version": "16.2.0",
-          "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz",
-          "integrity": "sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==",
-          "optional": true,
-          "peer": true,
-          "requires": {
-            "cliui": "^7.0.2",
-            "escalade": "^3.1.1",
-            "get-caller-file": "^2.0.5",
-            "require-directory": "^2.1.1",
-            "string-width": "^4.2.0",
-            "y18n": "^5.0.5",
-            "yargs-parser": "^20.2.2"
-          }
-        },
-        "yargs-parser": {
-          "version": "20.2.9",
-          "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.9.tgz",
-          "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==",
-          "optional": true,
-          "peer": true
-        }
-      }
-    },
-    "cliui": {
-      "version": "8.0.1",
-      "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz",
-      "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "string-width": "^4.2.0",
-        "strip-ansi": "^6.0.1",
-        "wrap-ansi": "^7.0.0"
-      }
-    },
-    "color-convert": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
-      "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "color-name": "~1.1.4"
-      }
-    },
-    "color-name": {
-      "version": "1.1.4",
-      "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
-      "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
-      "optional": true,
-      "peer": true
-    },
-    "combined-stream": {
-      "version": "1.0.8",
-      "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
-      "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
-      "requires": {
-        "delayed-stream": "~1.0.0"
-      }
-    },
-    "commander": {
-      "version": "10.0.1",
-      "resolved": "https://registry.npmjs.org/commander/-/commander-10.0.1.tgz",
-      "integrity": "sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug=="
-    },
-    "crypt": {
-      "version": "0.0.2",
-      "resolved": "https://registry.npmjs.org/crypt/-/crypt-0.0.2.tgz",
-      "integrity": "sha512-mCxBlsHFYh9C+HVpiEacem8FEBnMXgU9gy4zmNC+SXAZNB/1idgp/aulFJ4FgCi7GPEVbfyng092GqL2k2rmow=="
-    },
-    "debug": {
-      "version": "4.3.4",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
-      "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "ms": "2.1.2"
-      }
-    },
-    "decamelize": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz",
-      "integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA=="
-    },
-    "delayed-stream": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
-      "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ=="
-    },
-    "digest-fetch": {
-      "version": "1.3.0",
-      "resolved": "https://registry.npmjs.org/digest-fetch/-/digest-fetch-1.3.0.tgz",
-      "integrity": "sha512-CGJuv6iKNM7QyZlM2T3sPAdZWd/p9zQiRNS9G+9COUCwzWFTs0Xp8NF5iePx7wtvhDykReiRRrSeNb4oMmB8lA==",
-      "requires": {
-        "base-64": "^0.1.0",
-        "md5": "^2.3.0"
-      }
-    },
-    "dotenv": {
-      "version": "16.0.3",
-      "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.0.3.tgz",
-      "integrity": "sha512-7GO6HghkA5fYG9TYnNxi14/7K9f5occMlp3zXAuSxn7CKCxt9xbNWG7yF8hTCSUchlfWSe3uLmlPfigevRItzQ==",
-      "optional": true,
-      "peer": true
-    },
-    "emoji-regex": {
-      "version": "8.0.0",
-      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
-      "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
-      "optional": true,
-      "peer": true
-    },
-    "escalade": {
-      "version": "3.1.1",
-      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz",
-      "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==",
-      "optional": true,
-      "peer": true
-    },
-    "event-target-shim": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz",
-      "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ=="
-    },
-    "eventemitter3": {
-      "version": "4.0.7",
-      "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz",
-      "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw=="
-    },
-    "expr-eval": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/expr-eval/-/expr-eval-2.0.2.tgz",
-      "integrity": "sha512-4EMSHGOPSwAfBiibw3ndnP0AvjDWLsMvGOvWEZ2F96IGk0bIVdjQisOHxReSkE13mHcfbuCiXw+G4y0zv6N8Eg=="
-    },
-    "flat": {
-      "version": "5.0.2",
-      "resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz",
-      "integrity": "sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ=="
-    },
-    "form-data": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
-      "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
-      "requires": {
-        "asynckit": "^0.4.0",
-        "combined-stream": "^1.0.8",
-        "mime-types": "^2.1.12"
-      }
-    },
-    "form-data-encoder": {
-      "version": "1.7.2",
-      "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz",
-      "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A=="
-    },
-    "formdata-node": {
-      "version": "4.4.1",
-      "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz",
-      "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==",
-      "requires": {
-        "node-domexception": "1.0.0",
-        "web-streams-polyfill": "4.0.0-beta.3"
-      },
-      "dependencies": {
-        "web-streams-polyfill": {
-          "version": "4.0.0-beta.3",
-          "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz",
-          "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug=="
-        }
-      }
-    },
-    "fs.realpath": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
-      "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==",
-      "optional": true,
-      "peer": true
-    },
-    "get-caller-file": {
-      "version": "2.0.5",
-      "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
-      "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
-      "optional": true,
-      "peer": true
-    },
-    "glob": {
-      "version": "8.1.0",
-      "resolved": "https://registry.npmjs.org/glob/-/glob-8.1.0.tgz",
-      "integrity": "sha512-r8hpEjiQEYlF2QU0df3dS+nxxSIreXQS1qRhMJM0Q5NDdR386C7jb7Hwwod8Fgiuex+k0GFjgft18yvxm5XoCQ==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "fs.realpath": "^1.0.0",
-        "inflight": "^1.0.4",
-        "inherits": "2",
-        "minimatch": "^5.0.1",
-        "once": "^1.3.0"
-      }
-    },
-    "has-flag": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
-      "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==",
-      "optional": true,
-      "peer": true
-    },
-    "highlight.js": {
-      "version": "10.7.3",
-      "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-10.7.3.tgz",
-      "integrity": "sha512-tzcUFauisWKNHaRkN4Wjl/ZA07gENAjFl3J/c480dprkGTg5EQstgaNFqBfUqCq54kZRIEcreTsAgF/m2quD7A==",
-      "optional": true,
-      "peer": true
-    },
-    "humanize-ms": {
-      "version": "1.2.1",
-      "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz",
-      "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==",
-      "requires": {
-        "ms": "^2.0.0"
-      }
-    },
-    "ieee754": {
-      "version": "1.2.1",
-      "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
-      "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
-      "optional": true,
-      "peer": true
-    },
-    "inflight": {
-      "version": "1.0.6",
-      "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
-      "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "once": "^1.3.0",
-        "wrappy": "1"
-      }
-    },
-    "inherits": {
-      "version": "2.0.4",
-      "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
-      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
-      "optional": true,
-      "peer": true
-    },
-    "is-any-array": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-2.0.1.tgz",
-      "integrity": "sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ=="
-    },
-    "is-buffer": {
-      "version": "1.1.6",
-      "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz",
-      "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w=="
-    },
-    "is-fullwidth-code-point": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
-      "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
-      "optional": true,
-      "peer": true
-    },
-    "js-tiktoken": {
-      "version": "1.0.11",
-      "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.11.tgz",
-      "integrity": "sha512-PajXFLq2vx7/8jllQZ43vzNpAai/0MOVdJjW/UrNyJorNQRTjHrqdGJG/mjHVy7h9M6dW6CaG43eNLMYFkTh6w==",
-      "requires": {
-        "base64-js": "^1.5.1"
-      }
-    },
-    "js-yaml": {
-      "version": "4.1.0",
-      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
-      "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
-      "requires": {
-        "argparse": "^2.0.1"
-      }
-    },
-    "jsonpointer": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/jsonpointer/-/jsonpointer-5.0.1.tgz",
-      "integrity": "sha512-p/nXbhSEcu3pZRdkW1OfJhpsVtW1gd4Wa1fnQc9YLiTfAjn0312eMKimbdIQzuZl9aa9xUGaRlP9T/CJE/ditQ=="
-    },
-    "langchain": {
-      "version": "0.1.36",
-      "resolved": "https://registry.npmjs.org/langchain/-/langchain-0.1.36.tgz",
-      "integrity": "sha512-NTbnCL/jKWIeEI//Nm1oG8nhW3vkYWvEMr1MPotmTThTfeKfO87eV/OAzAyh6Ruy6GFs/qofRgQZGIe6XvXTNQ==",
-      "requires": {
-        "@anthropic-ai/sdk": "^0.9.1",
-        "@langchain/community": "~0.0.47",
-        "@langchain/core": "0.1.5",
-        "@langchain/openai": "~0.0.28",
-        "@langchain/textsplitters": "~0.0.0",
-        "binary-extensions": "^2.2.0",
-        "js-tiktoken": "^1.0.7",
-        "js-yaml": "^4.1.0",
-        "jsonpointer": "^5.0.1",
-        "langchainhub": "~0.0.8",
-        "langsmith": "~0.1.7",
-        "ml-distance": "^4.0.0",
-        "openapi-types": "^12.1.3",
-        "p-retry": "4",
-        "uuid": "^9.0.0",
-        "yaml": "^2.2.1",
-        "zod": "^3.22.4",
-        "zod-to-json-schema": "^3.22.3"
-      }
-    },
-    "langchainhub": {
-      "version": "0.0.8",
-      "resolved": "https://registry.npmjs.org/langchainhub/-/langchainhub-0.0.8.tgz",
-      "integrity": "sha512-Woyb8YDHgqqTOZvWIbm2CaFDGfZ4NTSyXV687AG4vXEfoNo7cGQp7nhl7wL3ehenKWmNEmcxCLgOZzW8jE6lOQ=="
-    },
-    "langsmith": {
-      "version": "0.1.18",
-      "resolved": "https://registry.npmjs.org/langsmith/-/langsmith-0.1.18.tgz",
-      "integrity": "sha512-LHk0aIFAl3/iiKvUzAiM8Xdm13bRO70XERQeHCF99fL2X815Jc47nxu6m7usSuQC8sw6rirCKZbGm18cqdUEzA==",
-      "requires": {
-        "@types/uuid": "^9.0.1",
-        "commander": "^10.0.1",
-        "p-queue": "^6.6.2",
-        "p-retry": "4",
-        "uuid": "^9.0.0"
-      }
-    },
-    "md5": {
-      "version": "2.3.0",
-      "resolved": "https://registry.npmjs.org/md5/-/md5-2.3.0.tgz",
-      "integrity": "sha512-T1GITYmFaKuO91vxyoQMFETst+O71VUPEU3ze5GNzDm0OWdP8v1ziTaAEPUr/3kLsY3Sftgz242A1SetQiDL7g==",
-      "requires": {
-        "charenc": "0.0.2",
-        "crypt": "0.0.2",
-        "is-buffer": "~1.1.6"
-      }
-    },
-    "mime-db": {
-      "version": "1.52.0",
-      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
-      "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg=="
-    },
-    "mime-types": {
-      "version": "2.1.35",
-      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
-      "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
-      "requires": {
-        "mime-db": "1.52.0"
-      }
-    },
-    "minimatch": {
-      "version": "5.1.6",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz",
-      "integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "brace-expansion": "^2.0.1"
-      }
-    },
-    "mkdirp": {
-      "version": "2.1.6",
-      "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-2.1.6.tgz",
-      "integrity": "sha512-+hEnITedc8LAtIP9u3HJDFIdcLV2vXP33sqLLIzkv1Db1zO/1OxbvYf0Y1OC/S/Qo5dxHXepofhmxL02PsKe+A==",
-      "optional": true,
-      "peer": true
-    },
-    "ml-array-mean": {
-      "version": "1.1.6",
-      "resolved": "https://registry.npmjs.org/ml-array-mean/-/ml-array-mean-1.1.6.tgz",
-      "integrity": "sha512-MIdf7Zc8HznwIisyiJGRH9tRigg3Yf4FldW8DxKxpCCv/g5CafTw0RRu51nojVEOXuCQC7DRVVu5c7XXO/5joQ==",
-      "requires": {
-        "ml-array-sum": "^1.1.6"
-      }
-    },
-    "ml-array-sum": {
-      "version": "1.1.6",
-      "resolved": "https://registry.npmjs.org/ml-array-sum/-/ml-array-sum-1.1.6.tgz",
-      "integrity": "sha512-29mAh2GwH7ZmiRnup4UyibQZB9+ZLyMShvt4cH4eTK+cL2oEMIZFnSyB3SS8MlsTh6q/w/yh48KmqLxmovN4Dw==",
-      "requires": {
-        "is-any-array": "^2.0.0"
-      }
-    },
-    "ml-distance": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/ml-distance/-/ml-distance-4.0.0.tgz",
-      "integrity": "sha512-zj7+UGZpHk3uL7n79XTfGNUjIGnhLn8xVvrxYvBHvXFxo3jq1q+/UjP311hZxnLVhbxbXCjUniThX8gozjacYA==",
-      "requires": {
-        "ml-array-mean": "^1.1.6",
-        "ml-distance-euclidean": "^2.0.0",
-        "ml-tree-similarity": "^1.0.0"
-      }
-    },
-    "ml-distance-euclidean": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/ml-distance-euclidean/-/ml-distance-euclidean-2.0.0.tgz",
-      "integrity": "sha512-yC9/2o8QF0A3m/0IXqCTXCzz2pNEzvmcE/9HFKOZGnTjatvBbsn4lWYJkxENkA4Ug2fnYl7PXQxnPi21sgMy/Q=="
-    },
-    "ml-tree-similarity": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/ml-tree-similarity/-/ml-tree-similarity-1.0.0.tgz",
-      "integrity": "sha512-XJUyYqjSuUQkNQHMscr6tcjldsOoAekxADTplt40QKfwW6nd++1wHWV9AArl0Zvw/TIHgNaZZNvr8QGvE8wLRg==",
-      "requires": {
-        "binary-search": "^1.3.5",
-        "num-sort": "^2.0.0"
-      }
-    },
-    "ms": {
-      "version": "2.1.2",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
-      "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
-    },
-    "mustache": {
-      "version": "4.2.0",
-      "resolved": "https://registry.npmjs.org/mustache/-/mustache-4.2.0.tgz",
-      "integrity": "sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ=="
-    },
-    "mz": {
-      "version": "2.7.0",
-      "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz",
-      "integrity": "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "any-promise": "^1.0.0",
-        "object-assign": "^4.0.1",
-        "thenify-all": "^1.0.0"
-      }
-    },
-    "node-domexception": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
-      "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ=="
-    },
-    "node-fetch": {
-      "version": "2.7.0",
-      "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
-      "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
-      "requires": {
-        "whatwg-url": "^5.0.0"
-      }
-    },
-    "num-sort": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/num-sort/-/num-sort-2.1.0.tgz",
-      "integrity": "sha512-1MQz1Ed8z2yckoBeSfkQHHO9K1yDRxxtotKSJ9yvcTUUxSvfvzEq5GwBrjjHEpMlq/k5gvXdmJ1SbYxWtpNoVg=="
-    },
-    "object-assign": {
-      "version": "4.1.1",
-      "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
-      "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
-      "optional": true,
-      "peer": true
-    },
-    "once": {
-      "version": "1.4.0",
-      "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
-      "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "wrappy": "1"
-      }
-    },
-    "openai": {
-      "version": "4.38.5",
-      "resolved": "https://registry.npmjs.org/openai/-/openai-4.38.5.tgz",
-      "integrity": "sha512-Ym5GJL98ZhLJJ7enBx53jjG3vwN/fsB+Ozh46nnRZZS9W1NiYqbwkJ+sXd3dkCIiWIgcyyOPL2Zr8SQAzbpj3g==",
-      "requires": {
-        "@types/node": "^18.11.18",
-        "@types/node-fetch": "^2.6.4",
-        "abort-controller": "^3.0.0",
-        "agentkeepalive": "^4.2.1",
-        "form-data-encoder": "1.7.2",
-        "formdata-node": "^4.3.2",
-        "node-fetch": "^2.6.7",
-        "web-streams-polyfill": "^3.2.1"
-      }
-    },
-    "openapi-types": {
-      "version": "12.1.3",
-      "resolved": "https://registry.npmjs.org/openapi-types/-/openapi-types-12.1.3.tgz",
-      "integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw=="
-    },
-    "p-finally": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz",
-      "integrity": "sha512-LICb2p9CB7FS+0eR1oqWnHhp0FljGLZCWBE9aix0Uye9W8LTQPwMTYVGWQWIw9RdQiDg4+epXQODwIYJtSJaow=="
-    },
-    "p-queue": {
-      "version": "6.6.2",
-      "resolved": "https://registry.npmjs.org/p-queue/-/p-queue-6.6.2.tgz",
-      "integrity": "sha512-RwFpb72c/BhQLEXIZ5K2e+AhgNVmIejGlTgiB9MzZ0e93GRvqZ7uSi0dvRF7/XIXDeNkra2fNHBxTyPDGySpjQ==",
-      "requires": {
-        "eventemitter3": "^4.0.4",
-        "p-timeout": "^3.2.0"
-      }
-    },
-    "p-retry": {
-      "version": "4.6.2",
-      "resolved": "https://registry.npmjs.org/p-retry/-/p-retry-4.6.2.tgz",
-      "integrity": "sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==",
-      "requires": {
-        "@types/retry": "0.12.0",
-        "retry": "^0.13.1"
-      }
-    },
-    "p-timeout": {
-      "version": "3.2.0",
-      "resolved": "https://registry.npmjs.org/p-timeout/-/p-timeout-3.2.0.tgz",
-      "integrity": "sha512-rhIwUycgwwKcP9yTOOFK/AKsAopjjCakVqLHePO3CC6Mir1Z99xT+R63jZxAT5lFZLa2inS5h+ZS2GvR99/FBg==",
-      "requires": {
-        "p-finally": "^1.0.0"
-      }
-    },
-    "parse5": {
-      "version": "5.1.1",
-      "resolved": "https://registry.npmjs.org/parse5/-/parse5-5.1.1.tgz",
-      "integrity": "sha512-ugq4DFI0Ptb+WWjAdOK16+u/nHfiIrcE+sh8kZMaM0WllQKLI9rOUq6c2b7cwPkXdzfQESqvoqK6ug7U/Yyzug==",
-      "optional": true,
-      "peer": true
-    },
-    "parse5-htmlparser2-tree-adapter": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-6.0.1.tgz",
-      "integrity": "sha512-qPuWvbLgvDGilKc5BoicRovlT4MtYT6JfJyBOMDsKoiT+GiuP5qyrPCnR9HcPECIJJmZh5jRndyNThnhhb/vlA==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "parse5": "^6.0.1"
-      },
-      "dependencies": {
-        "parse5": {
-          "version": "6.0.1",
-          "resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz",
-          "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==",
-          "optional": true,
-          "peer": true
-        }
-      }
-    },
-    "reflect-metadata": {
-      "version": "0.1.13",
-      "resolved": "https://registry.npmjs.org/reflect-metadata/-/reflect-metadata-0.1.13.tgz",
-      "integrity": "sha512-Ts1Y/anZELhSsjMcU605fU9RE4Oi3p5ORujwbIKXfWa+0Zxs510Qrmrce5/Jowq3cHSZSJqBjypxmHarc+vEWg==",
-      "optional": true,
-      "peer": true
-    },
-    "require-directory": {
-      "version": "2.1.1",
-      "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
-      "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
-      "optional": true,
-      "peer": true
-    },
-    "retry": {
-      "version": "0.13.1",
-      "resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz",
-      "integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg=="
-    },
-    "safe-buffer": {
-      "version": "5.2.1",
-      "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
-      "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
-      "optional": true,
-      "peer": true
-    },
-    "sha.js": {
-      "version": "2.4.11",
-      "resolved": "https://registry.npmjs.org/sha.js/-/sha.js-2.4.11.tgz",
-      "integrity": "sha512-QMEp5B7cftE7APOjk5Y6xgrbWu+WkLVQwk8JNjZ8nKRciZaByEW6MubieAiToS7+dwvrjGhH8jRXz3MVd0AYqQ==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "inherits": "^2.0.1",
-        "safe-buffer": "^5.0.1"
-      }
-    },
-    "string-width": {
-      "version": "4.2.3",
-      "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
-      "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "emoji-regex": "^8.0.0",
-        "is-fullwidth-code-point": "^3.0.0",
-        "strip-ansi": "^6.0.1"
-      }
-    },
-    "strip-ansi": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
-      "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "ansi-regex": "^5.0.1"
-      }
-    },
-    "supports-color": {
-      "version": "7.2.0",
-      "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
-      "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "has-flag": "^4.0.0"
-      }
-    },
-    "thenify": {
-      "version": "3.3.1",
-      "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz",
-      "integrity": "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "any-promise": "^1.0.0"
-      }
-    },
-    "thenify-all": {
-      "version": "1.6.0",
-      "resolved": "https://registry.npmjs.org/thenify-all/-/thenify-all-1.6.0.tgz",
-      "integrity": "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "thenify": ">= 3.1.0 < 4"
-      }
-    },
-    "tr46": {
-      "version": "0.0.3",
-      "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
-      "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="
-    },
-    "tslib": {
-      "version": "2.5.0",
-      "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz",
-      "integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg==",
-      "optional": true,
-      "peer": true
-    },
-    "typeorm": {
-      "version": "0.3.15",
-      "resolved": "https://registry.npmjs.org/typeorm/-/typeorm-0.3.15.tgz",
-      "integrity": "sha512-R4JSw8QjDP1W+ypeRz/XrCXIqubrLSnNAzJAp9EQSQIPHTv+YmUHZis8g08lOwFpuhqL9m8jkPSz8GWEKlU/ow==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "@sqltools/formatter": "^1.2.5",
-        "app-root-path": "^3.1.0",
-        "buffer": "^6.0.3",
-        "chalk": "^4.1.2",
-        "cli-highlight": "^2.1.11",
-        "debug": "^4.3.4",
-        "dotenv": "^16.0.3",
-        "glob": "^8.1.0",
-        "mkdirp": "^2.1.3",
-        "reflect-metadata": "^0.1.13",
-        "sha.js": "^2.4.11",
-        "tslib": "^2.5.0",
-        "uuid": "^9.0.0",
-        "yargs": "^17.6.2"
-      }
-    },
-    "typescript": {
-      "version": "5.0.4",
-      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.0.4.tgz",
-      "integrity": "sha512-cW9T5W9xY37cc+jfEnaUvX91foxtHkza3Nw3wkoF4sSlKn0MONdkdEndig/qPBWXNkmplh3NzayQzCiHM4/hqw==",
-      "dev": true
-    },
-    "uuid": {
-      "version": "9.0.0",
-      "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz",
-      "integrity": "sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg=="
-    },
-    "web-streams-polyfill": {
-      "version": "3.3.3",
-      "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz",
-      "integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw=="
-    },
-    "webidl-conversions": {
-      "version": "3.0.1",
-      "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
-      "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="
-    },
-    "whatwg-url": {
-      "version": "5.0.0",
-      "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
-      "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
-      "requires": {
-        "tr46": "~0.0.3",
-        "webidl-conversions": "^3.0.0"
-      }
-    },
-    "wrap-ansi": {
-      "version": "7.0.0",
-      "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
-      "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "ansi-styles": "^4.0.0",
-        "string-width": "^4.1.0",
-        "strip-ansi": "^6.0.0"
-      },
-      "dependencies": {
-        "ansi-styles": {
-          "version": "4.3.0",
-          "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
-          "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
-          "optional": true,
-          "peer": true,
-          "requires": {
-            "color-convert": "^2.0.1"
-          }
-        }
-      }
-    },
-    "wrappy": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
-      "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
-      "optional": true,
-      "peer": true
-    },
-    "y18n": {
-      "version": "5.0.8",
-      "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
-      "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==",
-      "optional": true,
-      "peer": true
-    },
-    "yaml": {
-      "version": "2.2.2",
-      "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.2.2.tgz",
-      "integrity": "sha512-CBKFWExMn46Foo4cldiChEzn7S7SRV+wqiluAb6xmueD/fGyRHIhX8m14vVGgeFWjN540nKCNVj6P21eQjgTuA=="
-    },
-    "yargs": {
-      "version": "17.7.2",
-      "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
-      "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==",
-      "optional": true,
-      "peer": true,
-      "requires": {
-        "cliui": "^8.0.1",
-        "escalade": "^3.1.1",
-        "get-caller-file": "^2.0.5",
-        "require-directory": "^2.1.1",
-        "string-width": "^4.2.3",
-        "y18n": "^5.0.5",
-        "yargs-parser": "^21.1.1"
-      }
-    },
-    "yargs-parser": {
-      "version": "21.1.1",
-      "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz",
-      "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==",
-      "optional": true,
-      "peer": true
-    },
-    "zod": {
-      "version": "3.23.4",
-      "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.4.tgz",
-      "integrity": "sha512-/AtWOKbBgjzEYYQRNfoGKHObgfAZag6qUJX1VbHo2PRBgS+wfWagEY2mizjfyAPcGesrJOcx/wcl0L9WnVrHFw=="
-    },
-    "zod-to-json-schema": {
-      "version": "3.23.0",
-      "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.0.tgz",
-      "integrity": "sha512-az0uJ243PxsRIa2x1WmNE/pnuA05gUq/JB8Lwe1EDCCL/Fz9MgjYQ0fPlyc2Tcv6aF2ZA7WM5TWaRZVEFaAIag==",
-      "requires": {}
-    }
-  }
-}
diff --git a/examples/langchain/langchainjs-localai-example/package.json b/examples/langchain/langchainjs-localai-example/package.json
deleted file mode 100644
index 1d65575d..00000000
--- a/examples/langchain/langchainjs-localai-example/package.json
+++ /dev/null
@@ -1,25 +0,0 @@
-{
-  "name": "langchainjs-localai-example",
-  "version": "0.1.1",
-  "description": "Trivial Example of using langchain + the OpenAI API + LocalAI together",
-  "main": "index.mjs",
-  "scripts": {
-    "build": "tsc --build",
-    "clean": "tsc --build --clean",
-    "start": "node --trace-warnings dist/index.mjs"
-  },
-  "author": "dave@gray101.com",
-  "license": "MIT",
-  "devDependencies": {
-    "@types/node": "^18.16.4",
-    "typescript": "^5.0.4"
-  },
-  "dependencies": {
-    "@langchain/community": "^0.0.52",
-    "@langchain/openai": "^0.0.28",
-    "langchain": "^0.1.36"
-  },
-  "overrides": {
-    "@langchain/core": "0.1.5"
-  }
-}
diff --git a/examples/langchain/langchainjs-localai-example/src/index.mts b/examples/langchain/langchainjs-localai-example/src/index.mts
deleted file mode 100644
index 995c2832..00000000
--- a/examples/langchain/langchainjs-localai-example/src/index.mts
+++ /dev/null
@@ -1,92 +0,0 @@
-import { loadQAStuffChain } from "langchain/chains";
-import { Document } from "langchain/document";
-import { pull } from "langchain/hub";
-import { AgentExecutor, createOpenAIToolsAgent } from "langchain/agents";
-import {Calculator} from "@langchain/community/tools/calculator";
-import { ChatOpenAI } from "@langchain/openai";
-import type { ChatPromptTemplate } from "@langchain/core/prompts";
-
-const pathToLocalAI = process.env['OPENAI_API_BASE'] || 'http://api:8080/v1';
-const fakeApiKey = process.env['OPENAI_API_KEY'] || '-';
-const modelName = process.env['MODEL_NAME'] || 'gpt-3.5-turbo';
-
-function getModel(): ChatOpenAI {
-  return new ChatOpenAI({
-    prefixMessages: [
-      {
-        role: "system",
-        content: "You are a helpful assistant that answers in pirate language",
-      },
-    ],
-    modelName: modelName,
-    maxTokens: 50,
-    openAIApiKey: fakeApiKey,
-    maxRetries: 2
-  }, {
-    basePath: pathToLocalAI,
-    apiKey: fakeApiKey,
-  });
-}
-
-// Minimal example.
-export const run = async () => {
-  const model = getModel();
-  console.log(`about to model.invoke at ${new Date().toUTCString()}`);
-  const res = await model.invoke(
-    "What would be a good company name a company that makes colorful socks?"
-  );
-  console.log(`${new Date().toUTCString()}`);
-  console.log({ res });
-};
-
-await run();
-
-// This example uses the `StuffDocumentsChain`
-export const run2 = async () => {
-  const model = getModel();
-  const chainA = loadQAStuffChain(model);
-  const docs = [
-    new Document({ pageContent: "Harrison went to Harvard." }),
-    new Document({ pageContent: "Ankush went to Princeton." }),
-  ];
-  const resA = await chainA.invoke({
-    input_documents: docs,
-    question: "Where did Harrison go to college?",
-  });
-  console.log({ resA });
-};
-
-await run2();
-
-// Quickly thrown together example of using tools + agents.
-// This seems like it should work, but it doesn't yet.
-export const toolAgentTest = async () => {
-  const model = getModel();
-
-  const prompt = await pull<ChatPromptTemplate>("hwchase17/openai-tools-agent");
-
-  const tools = [new Calculator()];
-
-  const agent = await createOpenAIToolsAgent({
-    llm: model,
-    tools: tools,
-    prompt: prompt
-  });
-
-  console.log("Loaded agent.");
-
-  const agentExecutor = new AgentExecutor({
-    agent,
-    tools,
-  });
-
-  const input = `What is the value of (500 *2) + 350 - 13?`;
-
-  console.log(`Executing with input "${input}"...`);
-
-  const result = await agentExecutor.invoke({ input });
-
-  console.log(`Got output ${result.output}`);
-}
-
-await toolAgentTest();
diff --git a/examples/langchain/langchainjs-localai-example/tsconfig.json b/examples/langchain/langchainjs-localai-example/tsconfig.json
deleted file mode 100644
index 5e0d5a58..00000000
--- a/examples/langchain/langchainjs-localai-example/tsconfig.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "es2022",
-    "lib": ["ES2022", "DOM"],
-    "module": "ES2022",
-    "moduleResolution": "node",
-    "strict": true,
-    "esModuleInterop": true,
-    "allowSyntheticDefaultImports": true,
-    "isolatedModules": true,
-    "outDir": "./dist",
-    "skipLibCheck": true
-  },
-  "include": ["src", "test"],
-  "exclude": ["node_modules", "dist"]
-}
diff --git a/examples/langchain/langchainpy-localai-example/.vscode/launch.json b/examples/langchain/langchainpy-localai-example/.vscode/launch.json
deleted file mode 100644
index e72fa799..00000000
--- a/examples/langchain/langchainpy-localai-example/.vscode/launch.json
+++ /dev/null
@@ -1,24 +0,0 @@
-{
-    "version": "0.2.0",
-    "configurations": [
-        {
-            "name": "Python: Current File",
-            "type": "python",
-            "request": "launch",
-            "program": "${file}",
-            "console": "integratedTerminal",
-            "redirectOutput": true,
-            "justMyCode": false
-        },
-        {
-            "name": "Python: Attach to Port 5678",
-            "type": "python",
-            "request": "attach",
-            "connect": {
-                "host": "localhost",
-                "port": 5678
-              },
-            "justMyCode": false
-        }
-    ]
-}
\ No newline at end of file
diff --git a/examples/langchain/langchainpy-localai-example/.vscode/settings.json b/examples/langchain/langchainpy-localai-example/.vscode/settings.json
deleted file mode 100644
index 146756d1..00000000
--- a/examples/langchain/langchainpy-localai-example/.vscode/settings.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "python.defaultInterpreterPath": "${workspaceFolder}/.venv/Scripts/python"
-}
\ No newline at end of file
diff --git a/examples/langchain/langchainpy-localai-example/full_demo.py b/examples/langchain/langchainpy-localai-example/full_demo.py
deleted file mode 100644
index 52271b67..00000000
--- a/examples/langchain/langchainpy-localai-example/full_demo.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import os
-import logging
-
-from langchain.chat_models import ChatOpenAI
-from langchain import PromptTemplate, LLMChain
-from langchain.prompts.chat import (
-    ChatPromptTemplate,
-    SystemMessagePromptTemplate,
-    AIMessagePromptTemplate,
-    HumanMessagePromptTemplate,
-)
-from langchain.schema import (
-    AIMessage,
-    HumanMessage,
-    SystemMessage
-)
-
-# This logging incantation makes it easy to see that you're actually reaching your LocalAI instance rather than OpenAI.
-logging.basicConfig(level=logging.DEBUG)
-
-print('Langchain + LocalAI PYTHON Tests')
-
-base_path = os.environ.get('OPENAI_API_BASE', 'http://api:8080/v1')
-key = os.environ.get('OPENAI_API_KEY', '-')
-model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
-
-
-chat = ChatOpenAI(temperature=0, openai_api_base=base_path, openai_api_key=key, model_name=model_name, max_tokens=100)
-
-print("Created ChatOpenAI for ", chat.model_name)
-
-template = "You are a helpful assistant that translates {input_language} to {output_language}. The next message will be a sentence in {input_language}. Respond ONLY with the translation in {output_language}. Do not respond in {input_language}!"
-system_message_prompt = SystemMessagePromptTemplate.from_template(template)
-human_template = "{text}"
-human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
-
-chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
-
-print("ABOUT to execute")
-
-# get a chat completion from the formatted messages
-response = chat(chat_prompt.format_prompt(input_language="English", output_language="French", text="I love programming.").to_messages())
-
-print(response)
-
-print(".");
\ No newline at end of file
diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
deleted file mode 100644
index 98325db3..00000000
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-aiohttp==3.10.3
-aiosignal==1.3.1
-async-timeout==4.0.3
-attrs==24.2.0
-certifi==2024.8.30
-charset-normalizer==3.3.2
-colorama==0.4.6
-dataclasses-json==0.6.7
-debugpy==1.8.2
-frozenlist==1.4.1
-greenlet==3.1.0
-idna==3.8
-langchain==0.3.0
-langchain-community==0.2.16
-marshmallow==3.22.0
-marshmallow-enum==1.5.1
-multidict==6.0.5
-mypy-extensions==1.0.0
-numexpr==2.10.1
-numpy==2.1.1
-openai==1.45.1
-openapi-schema-pydantic==1.2.4
-packaging>=23.2
-pydantic==2.8.2
-PyYAML==6.0.2
-requests==2.32.3
-SQLAlchemy==2.0.32
-tenacity==8.5.0
-tqdm==4.66.5
-typing-inspect==0.9.0
-typing_extensions==4.12.2
-urllib3==2.2.2
-yarl==1.11.0
diff --git a/examples/langchain/langchainpy-localai-example/simple_demo.py b/examples/langchain/langchainpy-localai-example/simple_demo.py
deleted file mode 100644
index a9fac351..00000000
--- a/examples/langchain/langchainpy-localai-example/simple_demo.py
+++ /dev/null
@@ -1,6 +0,0 @@
-
-from langchain.llms import OpenAI
-
-llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
-text = "What would be a good company name for a company that makes colorful socks?"
-print(llm(text))
diff --git a/examples/langchain/models b/examples/langchain/models
deleted file mode 120000
index 1e266b1b..00000000
--- a/examples/langchain/models
+++ /dev/null
@@ -1 +0,0 @@
-../models
\ No newline at end of file
diff --git a/examples/llamaindex/README.md b/examples/llamaindex/README.md
deleted file mode 100644
index 82925b58..00000000
--- a/examples/llamaindex/README.md
+++ /dev/null
@@ -1,27 +0,0 @@
-# LocalAI Demonstration with Embeddings
-
-This demonstration shows you how to use embeddings with existing data in LocalAI.
-We are using the `llama-index` library to facilitate the embedding and querying processes.
-The `Weaviate` client is used as the embedding source.
-
-## Getting Started
-
-1. Clone this repository and navigate to this directory
-
-    ```bash
-    git clone git@github.com:mudler/LocalAI.git
-    cd LocalAI/examples/llamaindex
-    ```
-
-2. pip install LlamaIndex and Weviate's client: `pip install llama-index>=0.9.9 weviate-client`
-3. Run the example: `python main.py`
-
-```none
-Downloading (…)lve/main/config.json: 100%|███████████████████████████| 684/684 [00:00<00:00, 6.01MB/s]
-Downloading model.safetensors: 100%|███████████████████████████████| 133M/133M [00:03<00:00, 39.5MB/s]
-Downloading (…)okenizer_config.json: 100%|███████████████████████████| 366/366 [00:00<00:00, 2.79MB/s]
-Downloading (…)solve/main/vocab.txt: 100%|█████████████████████████| 232k/232k [00:00<00:00, 6.00MB/s]
-Downloading (…)/main/tokenizer.json: 100%|█████████████████████████| 711k/711k [00:00<00:00, 18.8MB/s]
-Downloading (…)cial_tokens_map.json: 100%|███████████████████████████| 125/125 [00:00<00:00, 1.18MB/s]
-LocalAI is a community-driven project that aims to make AI accessible to everyone. It was created by Ettore Di Giacinto and is focused on providing various AI-related features such as text generation with GPTs, text to audio, audio to text, image generation, and more. The project is constantly growing and evolving, with a roadmap for future improvements. Anyone is welcome to contribute, provide feedback, and submit pull requests to help make LocalAI better.
-```
diff --git a/examples/llamaindex/main.py b/examples/llamaindex/main.py
deleted file mode 100644
index 89d86abd..00000000
--- a/examples/llamaindex/main.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import weaviate
-from llama_index import ServiceContext, VectorStoreIndex
-from llama_index.llms import LOCALAI_DEFAULTS, OpenAILike
-from llama_index.vector_stores import WeaviateVectorStore
-
-# Weaviate vector store setup
-vector_store = WeaviateVectorStore(
-    weaviate_client=weaviate.Client("http://weviate.default"), index_name="AIChroma"
-)
-
-# LLM setup, served via LocalAI
-llm = OpenAILike(temperature=0, model="gpt-3.5-turbo", **LOCALAI_DEFAULTS)
-
-# Service context setup
-service_context = ServiceContext.from_defaults(llm=llm, embed_model="local")
-
-# Load index from stored vectors
-index = VectorStoreIndex.from_vector_store(
-    vector_store, service_context=service_context
-)
-
-# Query engine setup
-query_engine = index.as_query_engine(
-    similarity_top_k=1, vector_store_query_mode="hybrid"
-)
-
-# Query example
-response = query_engine.query("What is LocalAI?")
-print(response)
diff --git a/examples/localai-webui/README.md b/examples/localai-webui/README.md
deleted file mode 100644
index 8e36f40a..00000000
--- a/examples/localai-webui/README.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# localai-webui
-
-Example of integration with [dhruvgera/localai-frontend](https://github.com/Dhruvgera/LocalAI-frontend).
-
-![image](https://user-images.githubusercontent.com/42107491/235344183-44b5967d-ba22-4331-804c-8da7004a5d35.png)
-
-## Setup
-
-```bash
-# Clone LocalAI
-git clone https://github.com/go-skynet/LocalAI
-
-cd LocalAI/examples/localai-webui
-
-# (optional) Checkout a specific LocalAI tag
-# git checkout -b build <TAG>
-
-# Download any desired models to models/ in the parent LocalAI project dir
-# For example: wget https://gpt4all.io/models/ggml-gpt4all-j.bin
-
-# start with docker-compose
-docker-compose up -d --build
-```
-
-Open http://localhost:3000 for the Web UI.
-
diff --git a/examples/localai-webui/docker-compose.yml b/examples/localai-webui/docker-compose.yml
deleted file mode 100644
index 1609f604..00000000
--- a/examples/localai-webui/docker-compose.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-version: '3.6'
-
-services:
-  api:
-    image: quay.io/go-skynet/local-ai:latest
-    build:
-      context: .
-      dockerfile: Dockerfile
-    ports:
-      - 8080:8080
-    volumes:
-      - ./models:/models:cached
-    command: ["/usr/bin/local-ai"]
-
-  frontend:
-    image: quay.io/go-skynet/localai-frontend:master
-    ports:
-      - 3000:3000
\ No newline at end of file
diff --git a/examples/models/.gitignore b/examples/models/.gitignore
deleted file mode 100644
index 9237b97c..00000000
--- a/examples/models/.gitignore
+++ /dev/null
@@ -1,7 +0,0 @@
-# Ignore everything but predefined models
-*
-!.gitignore
-!completion.tmpl
-!embeddings.yaml
-!gpt4all.tmpl
-!gpt-3.5-turbo.yaml
diff --git a/examples/models/completion.tmpl b/examples/models/completion.tmpl
deleted file mode 100644
index 9867cfcd..00000000
--- a/examples/models/completion.tmpl
+++ /dev/null
@@ -1 +0,0 @@
-{{.Input}}
\ No newline at end of file
diff --git a/examples/models/embeddings.yaml b/examples/models/embeddings.yaml
deleted file mode 100644
index 536c8de1..00000000
--- a/examples/models/embeddings.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-name: text-embedding-ada-002
-parameters:
-  model: bert
-threads: 4
-backend: bert-embeddings
-embeddings: true
diff --git a/examples/models/gpt-3.5-turbo.yaml b/examples/models/gpt-3.5-turbo.yaml
deleted file mode 100644
index 5c192f5d..00000000
--- a/examples/models/gpt-3.5-turbo.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-name: gpt-3.5-turbo
-parameters:
-  model: ggml-gpt4all-j
-  top_k: 80
-  temperature: 0.2
-  top_p: 0.7
-context_size: 1024
-stopwords:
-- "HUMAN:"
-- "GPT:"
-roles:
-  user: " "
-  system: " "
-template:
-  completion: completion
-  chat: gpt4all
\ No newline at end of file
diff --git a/examples/models/gpt4all.tmpl b/examples/models/gpt4all.tmpl
deleted file mode 100644
index f76b080a..00000000
--- a/examples/models/gpt4all.tmpl
+++ /dev/null
@@ -1,4 +0,0 @@
-The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
-### Prompt:
-{{.Input}}
-### Response:
diff --git a/examples/privateGPT/README.md b/examples/privateGPT/README.md
deleted file mode 100644
index faf682c4..00000000
--- a/examples/privateGPT/README.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# privateGPT
-
-This example is a re-adaptation of https://github.com/imartinez/privateGPT to work with LocalAI and OpenAI endpoints. We have a fork with the changes required to work with privateGPT here https://github.com/go-skynet/privateGPT ( PR: https://github.com/imartinez/privateGPT/pull/408 ).
-
-Follow the instructions in https://github.com/go-skynet/privateGPT:
-
-```bash
-git clone git@github.com:go-skynet/privateGPT.git
-cd privateGPT
-pip install -r requirements.txt
-```
-
-Rename `example.env` to `.env` and edit the variables appropriately.
-
-This is an example `.env` file for LocalAI:
-
-```
-PERSIST_DIRECTORY=db
-# Set to OpenAI here
-MODEL_TYPE=OpenAI
-EMBEDDINGS_MODEL_NAME=all-MiniLM-L6-v2
-MODEL_N_CTX=1000
-# LocalAI URL
-OPENAI_API_BASE=http://localhost:8080/v1
-```
\ No newline at end of file
diff --git a/examples/query_data/.gitignore b/examples/query_data/.gitignore
deleted file mode 100644
index 29ea9d56..00000000
--- a/examples/query_data/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-storage/
\ No newline at end of file
diff --git a/examples/query_data/README.md b/examples/query_data/README.md
deleted file mode 100644
index c4e384cd..00000000
--- a/examples/query_data/README.md
+++ /dev/null
@@ -1,69 +0,0 @@
-# Data query example
-
-This example makes use of [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents.
-
-It loosely follows [the quickstart](https://gpt-index.readthedocs.io/en/stable/guides/primer/usage_pattern.html).
-
-Summary of the steps:
-
-- prepare the dataset (and store it into `data`)
-- prepare a vector index database to run queries on
-- run queries
-
-## Requirements
-
-You will need a training data set. Copy that over `data`.
-
-## Setup
-
-Start the API:
-
-```bash
-# Clone LocalAI
-git clone https://github.com/go-skynet/LocalAI
-
-cd LocalAI/examples/query_data
-
-wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
-wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
-
-# start with docker-compose
-docker-compose up -d --build
-```
-
-### Create a storage
-
-In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM.
-
-Note: **OPENAI_API_KEY** is not required. However the library might fail if no API_KEY is passed by, so an arbitrary string can be used.
-
-```bash
-export OPENAI_API_BASE=http://localhost:8080/v1
-export OPENAI_API_KEY=sk-
-
-python store.py
-```
-
-After it finishes, a directory "storage" will be created with the vector index database.
-
-## Query
-
-We can now query the dataset. 
-
-```bash
-export OPENAI_API_BASE=http://localhost:8080/v1
-export OPENAI_API_KEY=sk-
-
-python query.py
-```
-
-## Update
-
-To update our vector database, run `update.py`
-
-```bash
-export OPENAI_API_BASE=http://localhost:8080/v1
-export OPENAI_API_KEY=sk-
-
-python update.py
-```
\ No newline at end of file
diff --git a/examples/query_data/data/.keep b/examples/query_data/data/.keep
deleted file mode 100644
index e69de29b..00000000
diff --git a/examples/query_data/docker-compose.yml b/examples/query_data/docker-compose.yml
deleted file mode 100644
index cf76eb7f..00000000
--- a/examples/query_data/docker-compose.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-version: '3.6'
-
-services:
-  api:
-    image: quay.io/go-skynet/local-ai:latest
-    build:
-      context: ../../
-      dockerfile: Dockerfile
-    ports:
-      - 8080:8080
-    env_file:
-      - .env
-    volumes:
-      - ./models:/models:cached
-    command: ["/usr/bin/local-ai"]
diff --git a/examples/query_data/models b/examples/query_data/models
deleted file mode 120000
index 1e266b1b..00000000
--- a/examples/query_data/models
+++ /dev/null
@@ -1 +0,0 @@
-../models
\ No newline at end of file
diff --git a/examples/query_data/query.py b/examples/query_data/query.py
deleted file mode 100644
index 40375960..00000000
--- a/examples/query_data/query.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import os
-
-# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
-# os.environ['OPENAI_API_KEY']= ""
-
-from llama_index import   LLMPredictor, PromptHelper, ServiceContext
-from langchain.llms.openai import OpenAI
-from llama_index import StorageContext, load_index_from_storage
-
-base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
-
-# This example uses text-davinci-003 by default; feel free to change if desired
-llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
-
-# Configure prompt parameters and initialise helper
-max_input_size = 500
-num_output = 256
-max_chunk_overlap = 0.2
-
-prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
-
-# Load documents from the 'data' directory
-service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
-
-# rebuild storage context
-storage_context = StorageContext.from_defaults(persist_dir='./storage')
-
-# load index
-index = load_index_from_storage(storage_context,     service_context=service_context,    )
-
-query_engine = index.as_query_engine()
-
-data = input("Question: ")
-response = query_engine.query(data)
-print(response)
diff --git a/examples/query_data/store.py b/examples/query_data/store.py
deleted file mode 100644
index 9aec6217..00000000
--- a/examples/query_data/store.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import os
-
-# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
-# os.environ['OPENAI_API_KEY']= ""
-
-from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, LLMPredictor, PromptHelper, ServiceContext
-from langchain.llms.openai import OpenAI
-from llama_index import StorageContext, load_index_from_storage
-
-base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
-
-# This example uses text-davinci-003 by default; feel free to change if desired
-llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
-
-# Configure prompt parameters and initialise helper
-max_input_size = 400
-num_output = 400
-max_chunk_overlap = 0.3
-
-prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
-
-# Load documents from the 'data' directory
-documents = SimpleDirectoryReader('data').load_data()
-service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 400)
-index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
-index.storage_context.persist(persist_dir="./storage")
-
diff --git a/examples/query_data/update.py b/examples/query_data/update.py
deleted file mode 100644
index 55130d0f..00000000
--- a/examples/query_data/update.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import os
-
-# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
-# os.environ['OPENAI_API_KEY']= ""
-
-from llama_index import   LLMPredictor, PromptHelper, SimpleDirectoryReader, ServiceContext
-from langchain.llms.openai import OpenAI
-from llama_index import StorageContext, load_index_from_storage
-
-base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
-
-# This example uses text-davinci-003 by default; feel free to change if desired
-llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
-
-# Configure prompt parameters and initialise helper
-max_input_size = 512
-num_output = 256
-max_chunk_overlap = 20
-
-prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
-
-# Load documents from the 'data' directory
-service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
-
-# rebuild storage context
-storage_context = StorageContext.from_defaults(persist_dir='./storage')
-
-# load index
-index = load_index_from_storage(storage_context,     service_context=service_context,    )
-documents = SimpleDirectoryReader('data').load_data()
-index.refresh(documents)
-index.storage_context.persist(persist_dir="./storage")
\ No newline at end of file
diff --git a/examples/rwkv/.gitignore b/examples/rwkv/.gitignore
deleted file mode 100644
index ab3629c5..00000000
--- a/examples/rwkv/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-models/rwkv
-models/rwkv.tokenizer.json
\ No newline at end of file
diff --git a/examples/rwkv/Dockerfile.build b/examples/rwkv/Dockerfile.build
deleted file mode 100644
index 491f9ccd..00000000
--- a/examples/rwkv/Dockerfile.build
+++ /dev/null
@@ -1,12 +0,0 @@
-FROM python
-
-RUN apt-get update && apt-get -y install cmake
-
-# convert the model (one-off)
-RUN pip3 install torch numpy
-
-WORKDIR /build
-COPY ./scripts/ .
-
-RUN git clone --recurse-submodules https://github.com/saharNooby/rwkv.cpp && cd rwkv.cpp && cmake . && cmake --build . --config Release
-ENTRYPOINT [ "/build/build.sh" ]
\ No newline at end of file
diff --git a/examples/rwkv/README.md b/examples/rwkv/README.md
deleted file mode 100644
index 00ca5702..00000000
--- a/examples/rwkv/README.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# rwkv
-
-Example of how to run rwkv models.
-
-## Run models
-
-Setup:
-
-```bash
-# Clone LocalAI
-git clone https://github.com/go-skynet/LocalAI
-
-cd LocalAI/examples/rwkv
-
-# (optional) Checkout a specific LocalAI tag
-# git checkout -b build <TAG>
-
-# build the tooling image to convert an rwkv model locally:
-docker build -t rwkv-converter -f Dockerfile.build .
-
-# download and convert a model (one-off) - it's going to be fast on CPU too!
-docker run -ti --name converter -v $PWD:/data rwkv-converter https://huggingface.co/BlinkDL/rwkv-4-raven/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%25-Other1%25-20230425-ctx4096.pth /data/models/rwkv
-
-# Get the tokenizer
-wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O models/rwkv.tokenizer.json
-
-# start with docker-compose
-docker-compose up -d --build
-```
-
-Test it out:
-
-```bash
-curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
-    "model": "gpt-3.5-turbo",
-    "prompt": "A long time ago, in a galaxy far away",
-    "max_tokens": 100,
-    "temperature": 0.9, "top_p": 0.8, "top_k": 80
-  }'
-
-# {"object":"text_completion","model":"gpt-3.5-turbo","choices":[{"text":", there was a small group of five friends: Annie, Bryan, Charlie, Emily, and Jesse."}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
-
-curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-     "model": "gpt-3.5-turbo",            
-     "messages": [{"role": "user", "content": "How are you?"}],
-     "temperature": 0.9, "top_p": 0.8, "top_k": 80
-   }'
-
-# {"object":"chat.completion","model":"gpt-3.5-turbo","choices":[{"message":{"role":"assistant","content":" Good, thanks. I am about to go to bed. I' ll talk to you later.Bye."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
-```
-
-### Fine tuning
-
-See [RWKV-LM](https://github.com/BlinkDL/RWKV-LM#training--fine-tuning). There is also a Google [colab](https://colab.research.google.com/github/resloved/RWKV-notebooks/blob/master/RWKV_v4_RNN_Pile_Fine_Tuning.ipynb).
-
-## See also
-
-- [RWKV-LM](https://github.com/BlinkDL/RWKV-LM)
-- [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)
\ No newline at end of file
diff --git a/examples/rwkv/docker-compose.yaml b/examples/rwkv/docker-compose.yaml
deleted file mode 100644
index 8bd61b5f..00000000
--- a/examples/rwkv/docker-compose.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-version: '3.6'
-
-services:
-  api:
-    image: quay.io/go-skynet/local-ai:latest
-    build:
-      context: ../../
-      dockerfile: Dockerfile
-    ports:
-      - 8080:8080
-    environment:
-      - DEBUG=true
-      - MODELS_PATH=/models
-    volumes:
-      - ./models:/models:cached
-    command: ["/usr/bin/local-ai" ]
diff --git a/examples/rwkv/models/gpt-3.5-turbo.yaml b/examples/rwkv/models/gpt-3.5-turbo.yaml
deleted file mode 100644
index 1afce1a3..00000000
--- a/examples/rwkv/models/gpt-3.5-turbo.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-name: gpt-3.5-turbo
-parameters:
-  model: rwkv
-  top_k: 80
-  temperature: 0.9
-  max_tokens: 100
-  top_p: 0.8
-context_size: 1024
-backend: "rwkv"
-cutwords:
-- "Bob:.*"
-roles:
-  user: "Bob:"
-  system: "Alice:"
-  assistant: "Alice:"
-template:
-  completion: rwkv_completion
-  chat: rwkv_chat
\ No newline at end of file
diff --git a/examples/rwkv/models/rwkv_chat.tmpl b/examples/rwkv/models/rwkv_chat.tmpl
deleted file mode 100644
index d2c0511e..00000000
--- a/examples/rwkv/models/rwkv_chat.tmpl
+++ /dev/null
@@ -1,13 +0,0 @@
-The following is a verbose detailed conversation between Bob and a woman, Alice. Alice is intelligent, friendly and likeable. Alice is likely to agree with Bob.
-
-Bob: Hello Alice, how are you doing?
-
-Alice: Hi Bob! Thanks, I'm fine. What about you?
-
-Bob: I am very good! It's nice to see you. Would you mind me chatting with you for a while?
-
-Alice: Not at all! I'm listening.
-
-{{.Input}}
-
-Alice: 
\ No newline at end of file
diff --git a/examples/rwkv/models/rwkv_completion.tmpl b/examples/rwkv/models/rwkv_completion.tmpl
deleted file mode 100644
index 8450377f..00000000
--- a/examples/rwkv/models/rwkv_completion.tmpl
+++ /dev/null
@@ -1 +0,0 @@
-Complete the following sentence: {{.Input}} 
\ No newline at end of file
diff --git a/examples/rwkv/scripts/build.sh b/examples/rwkv/scripts/build.sh
deleted file mode 100755
index 37720582..00000000
--- a/examples/rwkv/scripts/build.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-set -ex
-
-URL=$1
-OUT=$2
-FILENAME=$(basename $URL)
-
-wget -nc $URL -O /build/$FILENAME
-
-python3 /build/rwkv.cpp/rwkv/convert_pytorch_to_ggml.py /build/$FILENAME /build/float-model float16
-python3 /build/rwkv.cpp/rwkv/quantize.py /build/float-model $OUT Q4_0
diff --git a/examples/semantic-todo/README.md b/examples/semantic-todo/README.md
deleted file mode 100644
index 701346e4..00000000
--- a/examples/semantic-todo/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-This demonstrates the vector store backend in its simplest form. 
-You can add tasks and then search/sort them using the TUI. 
-
-To build and run do
-
-```bash
-$ go get .
-$ go run .
-```
-
-A separate LocaAI instance is required of course. For e.g.
-
-```bash
-$ docker run -e DEBUG=true --rm -it -p 8080:8080 <LocalAI-image> bert-cpp
-```
diff --git a/examples/semantic-todo/go.mod b/examples/semantic-todo/go.mod
deleted file mode 100644
index 56e563ec..00000000
--- a/examples/semantic-todo/go.mod
+++ /dev/null
@@ -1,18 +0,0 @@
-module semantic-todo
-
-go 1.22
-
-require (
-	github.com/gdamore/tcell/v2 v2.7.4
-	github.com/rivo/tview v0.0.0-20240524063012-037df494fb76
-)
-
-require (
-	github.com/gdamore/encoding v1.0.0 // indirect
-	github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
-	github.com/mattn/go-runewidth v0.0.15 // indirect
-	github.com/rivo/uniseg v0.4.7 // indirect
-	golang.org/x/sys v0.17.0 // indirect
-	golang.org/x/term v0.17.0 // indirect
-	golang.org/x/text v0.14.0 // indirect
-)
diff --git a/examples/semantic-todo/go.sum b/examples/semantic-todo/go.sum
deleted file mode 100644
index bdd4979f..00000000
--- a/examples/semantic-todo/go.sum
+++ /dev/null
@@ -1,50 +0,0 @@
-github.com/gdamore/encoding v1.0.0 h1:+7OoQ1Bc6eTm5niUzBa0Ctsh6JbMW6Ra+YNuAtDBdko=
-github.com/gdamore/encoding v1.0.0/go.mod h1:alR0ol34c49FCSBLjhosxzcPHQbf2trDkoo5dl+VrEg=
-github.com/gdamore/tcell/v2 v2.7.4 h1:sg6/UnTM9jGpZU+oFYAsDahfchWAFW8Xx2yFinNSAYU=
-github.com/gdamore/tcell/v2 v2.7.4/go.mod h1:dSXtXTSK0VsW1biw65DZLZ2NKr7j0qP/0J7ONmsraWg=
-github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
-github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
-github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
-github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
-github.com/rivo/tview v0.0.0-20240524063012-037df494fb76 h1:iqvDlgyjmqleATtFbA7c14djmPh2n4mCYUv7JlD/ruA=
-github.com/rivo/tview v0.0.0-20240524063012-037df494fb76/go.mod h1:02iFIz7K/A9jGCvrizLPvoqr4cEIx7q54RH5Qudkrss=
-github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
-github.com/rivo/uniseg v0.4.3/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
-github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
-github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
-github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
-golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
-golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
-golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
-golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
-golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
-golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
-golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
-golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
-golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
-golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
-golang.org/x/term v0.17.0 h1:mkTF7LCd6WGJNL3K1Ad7kwxNfYAW6a8a8QqtMblp/4U=
-golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
-golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
-golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
-golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
-golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
-golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
-golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
-golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
-golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
diff --git a/examples/semantic-todo/main.go b/examples/semantic-todo/main.go
deleted file mode 100644
index a8936ea1..00000000
--- a/examples/semantic-todo/main.go
+++ /dev/null
@@ -1,355 +0,0 @@
-package main
-
-import (
-	"bytes"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"io"
-	"net/http"
-
-	"github.com/gdamore/tcell/v2"
-	"github.com/rivo/tview"
-)
-
-const (
-	localAI     string = "http://localhost:8080"
-	rootStatus  string = "[::b]<space>[::-]: Add Task  [::b]/[::-]: Search Task  [::b]<C-c>[::-]: Exit"
-	inputStatus string = "Press [::b]<enter>[::-] to submit the task, [::b]<esc>[::-] to cancel"
-)
-
-type Task struct {
-	Description string
-	Similarity  float32
-}
-
-type AppState int
-
-const (
-	StateRoot AppState = iota
-	StateInput
-	StateSearch
-)
-
-type App struct {
-	state AppState
-	tasks []Task
-	app   *tview.Application
-	flex  *tview.Flex
-	table *tview.Table
-}
-
-func NewApp() *App {
-	return &App{
-		state: StateRoot,
-		tasks: []Task{
-			{Description: "Take the dog for a walk (after I get a dog)"},
-			{Description: "Go to the toilet"},
-			{Description: "Allow TODOs to be marked completed or removed"},
-		},
-	}
-}
-
-func getEmbeddings(description string) ([]float32, error) {
-	// Define the request payload
-	payload := map[string]interface{}{
-		"model": "bert-cpp-minilm-v6",
-		"input": description,
-	}
-
-	// Marshal the payload into JSON
-	jsonPayload, err := json.Marshal(payload)
-	if err != nil {
-		return nil, err
-	}
-
-	// Make the HTTP request to the local OpenAI embeddings API
-	resp, err := http.Post(localAI+"/embeddings", "application/json", bytes.NewBuffer(jsonPayload))
-	if err != nil {
-		return nil, err
-	}
-	defer resp.Body.Close()
-
-	// Check if the request was successful
-	if resp.StatusCode != http.StatusOK {
-		return nil, fmt.Errorf("request to embeddings API failed with status code: %d", resp.StatusCode)
-	}
-
-	// Parse the response body
-	var result struct {
-		Data []struct {
-			Embedding []float32 `json:"embedding"`
-		} `json:"data"`
-	}
-	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
-		return nil, err
-	}
-
-	// Return the embedding
-	if len(result.Data) > 0 {
-		return result.Data[0].Embedding, nil
-	}
-	return nil, errors.New("no embedding received from API")
-}
-
-type StoresSet struct {
-	Store string `json:"store,omitempty" yaml:"store,omitempty"`
-
-	Keys   [][]float32 `json:"keys" yaml:"keys"`
-	Values []string    `json:"values" yaml:"values"`
-}
-
-func postTasksToExternalService(tasks []Task) error {
-	keys := make([][]float32, 0, len(tasks))
-	// Get the embeddings for the task description
-	for _, task := range tasks {
-		embedding, err := getEmbeddings(task.Description)
-		if err != nil {
-			return err
-		}
-		keys = append(keys, embedding)
-	}
-
-	values := make([]string, 0, len(tasks))
-	for _, task := range tasks {
-		values = append(values, task.Description)
-	}
-
-	// Construct the StoresSet object
-	storesSet := StoresSet{
-		Store:  "tasks_store", // Assuming you have a specific store name
-		Keys:   keys,
-		Values: values,
-	}
-
-	// Marshal the StoresSet object into JSON
-	jsonData, err := json.Marshal(storesSet)
-	if err != nil {
-		return err
-	}
-
-	// Make the HTTP POST request to the external service
-	resp, err := http.Post(localAI+"/stores/set", "application/json", bytes.NewBuffer(jsonData))
-	if err != nil {
-		return err
-	}
-	defer resp.Body.Close()
-
-	// Check if the request was successful
-	if resp.StatusCode != http.StatusOK {
-		// read resp body into string
-		body, err := io.ReadAll(resp.Body)
-		if err != nil {
-			return err
-		}
-		return fmt.Errorf("store request failed with status code: %d: %s", resp.StatusCode, body)
-	}
-
-	return nil
-}
-
-type StoresFind struct {
-	Store string `json:"store,omitempty" yaml:"store,omitempty"`
-
-	Key  []float32 `json:"key" yaml:"key"`
-	Topk int       `json:"topk" yaml:"topk"`
-}
-
-type StoresFindResponse struct {
-	Keys         [][]float32 `json:"keys" yaml:"keys"`
-	Values       []string    `json:"values" yaml:"values"`
-	Similarities []float32   `json:"similarities" yaml:"similarities"`
-}
-
-func findSimilarTexts(inputText string, topk int) (StoresFindResponse, error) {
-	// Initialize an empty response object
-	response := StoresFindResponse{}
-
-	// Get the embedding for the input text
-	embedding, err := getEmbeddings(inputText)
-	if err != nil {
-		return response, err
-	}
-
-	// Construct the StoresFind object
-	storesFind := StoresFind{
-		Store: "tasks_store", // Assuming you have a specific store name
-		Key:   embedding,
-		Topk:  topk,
-	}
-
-	// Marshal the StoresFind object into JSON
-	jsonData, err := json.Marshal(storesFind)
-	if err != nil {
-		return response, err
-	}
-
-	// Make the HTTP POST request to the external service's /stores/find endpoint
-	resp, err := http.Post(localAI+"/stores/find", "application/json", bytes.NewBuffer(jsonData))
-	if err != nil {
-		return response, err
-	}
-	defer resp.Body.Close()
-
-	// Check if the request was successful
-	if resp.StatusCode != http.StatusOK {
-		return response, fmt.Errorf("request to /stores/find failed with status code: %d", resp.StatusCode)
-	}
-
-	// Parse the response body to retrieve similar texts and similarities
-	if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
-		return response, err
-	}
-
-	return response, nil
-}
-
-func (app *App) updateUI() {
-	// Clear the flex layout
-	app.flex.Clear()
-	app.flex.SetDirection(tview.FlexColumn)
-	app.flex.AddItem(nil, 0, 1, false)
-
-	midCol := tview.NewFlex()
-	midCol.SetDirection(tview.FlexRow)
-	midCol.AddItem(nil, 0, 1, false)
-
-	// Create a new table.
-	app.table.Clear()
-	app.table.SetBorders(true)
-
-	// Set table headers
-	app.table.SetCell(0, 0, tview.NewTableCell("Description").SetAlign(tview.AlignLeft).SetExpansion(1).SetAttributes(tcell.AttrBold))
-	app.table.SetCell(0, 1, tview.NewTableCell("Similarity").SetAlign(tview.AlignCenter).SetExpansion(0).SetAttributes(tcell.AttrBold))
-
-	// Add the tasks to the table.
-	for i, task := range app.tasks {
-		row := i + 1
-		app.table.SetCell(row, 0, tview.NewTableCell(task.Description))
-		app.table.SetCell(row, 1, tview.NewTableCell(fmt.Sprintf("%.2f", task.Similarity)))
-	}
-
-	if app.state == StateInput {
-		inputField := tview.NewInputField()
-		inputField.
-			SetLabel("New Task: ").
-			SetFieldWidth(0).
-			SetDoneFunc(func(key tcell.Key) {
-				if key == tcell.KeyEnter {
-					task := Task{Description: inputField.GetText()}
-					app.tasks = append(app.tasks, task)
-					app.state = StateRoot
-					err := postTasksToExternalService([]Task{task})
-					if err != nil {
-						panic(err)
-					}
-				}
-				app.updateUI()
-			})
-		midCol.AddItem(inputField, 3, 2, true)
-		app.app.SetFocus(inputField)
-	} else if app.state == StateSearch {
-		searchField := tview.NewInputField()
-		searchField.SetLabel("Search: ").
-			SetFieldWidth(0).
-			SetDoneFunc(func(key tcell.Key) {
-				if key == tcell.KeyEnter {
-					similar, err := findSimilarTexts(searchField.GetText(), 100)
-					if err != nil {
-						panic(err)
-					}
-					app.tasks = make([]Task, len(similar.Keys))
-					for i, v := range similar.Values {
-						app.tasks[i] = Task{Description: v, Similarity: similar.Similarities[i]}
-					}
-				}
-				app.updateUI()
-			})
-		midCol.AddItem(searchField, 3, 2, true)
-		app.app.SetFocus(searchField)
-	} else {
-		midCol.AddItem(nil, 3, 1, false)
-	}
-
-	midCol.AddItem(app.table, 0, 2, true)
-
-	// Add the status bar to the flex layout
-	statusBar := tview.NewTextView().
-		SetText(rootStatus).
-		SetDynamicColors(true).
-		SetTextAlign(tview.AlignCenter)
-	if app.state == StateInput {
-		statusBar.SetText(inputStatus)
-	}
-	midCol.AddItem(statusBar, 1, 1, false)
-	midCol.AddItem(nil, 0, 1, false)
-
-	app.flex.AddItem(midCol, 0, 10, true)
-	app.flex.AddItem(nil, 0, 1, false)
-
-	// Set the flex as the root element
-	app.app.SetRoot(app.flex, true)
-}
-
-func main() {
-	app := NewApp()
-	tApp := tview.NewApplication()
-	flex := tview.NewFlex().SetDirection(tview.FlexRow)
-	table := tview.NewTable()
-
-	app.app = tApp
-	app.flex = flex
-	app.table = table
-
-	app.updateUI() // Initial UI setup
-
-	app.app.SetInputCapture(func(event *tcell.EventKey) *tcell.EventKey {
-		switch app.state {
-		case StateRoot:
-			// Handle key events when in the root state
-			switch event.Key() {
-			case tcell.KeyRune:
-				switch event.Rune() {
-				case ' ':
-					app.state = StateInput
-					app.updateUI()
-					return nil // Event is handled
-				case '/':
-					app.state = StateSearch
-					app.updateUI()
-					return nil // Event is handled
-				}
-			}
-
-		case StateInput:
-			// Handle key events when in the input state
-			if event.Key() == tcell.KeyEsc {
-				// Exit input state without adding a task
-				app.state = StateRoot
-				app.updateUI()
-				return nil // Event is handled
-			}
-
-		case StateSearch:
-			// Handle key events when in the search state
-			if event.Key() == tcell.KeyEsc {
-				// Exit search state
-				app.state = StateRoot
-				app.updateUI()
-				return nil // Event is handled
-			}
-		}
-
-		// Return the event for further processing by tview
-		return event
-	})
-
-	if err := postTasksToExternalService(app.tasks); err != nil {
-		panic(err)
-	}
-
-	// Start the application
-	if err := app.app.Run(); err != nil {
-		panic(err)
-	}
-}
diff --git a/examples/slack-bot/.env.example b/examples/slack-bot/.env.example
deleted file mode 100644
index e169e0cf..00000000
--- a/examples/slack-bot/.env.example
+++ /dev/null
@@ -1,14 +0,0 @@
-# CPU .env docs: https://localai.io/howtos/easy-setup-docker-cpu/
-# GPU .env docs: https://localai.io/howtos/easy-setup-docker-gpu/
-
-SLACK_APP_TOKEN=xapp-1-...
-SLACK_BOT_TOKEN=xoxb-...
-OPENAI_API_KEY=sk-...
-OPENAI_API_BASE=http://api:8080
-OPENAI_MODEL=gpt-3.5-turbo
-OPENAI_TIMEOUT_SECONDS=60
-#OPENAI_SYSTEM_TEXT="You proofread text. When you receive a message, you will check
-#for mistakes and make suggestion to improve the language of the given text"
-USE_SLACK_LANGUAGE=true
-SLACK_APP_LOG_LEVEL=INFO
-TRANSLATE_MARKDOWN=true
\ No newline at end of file
diff --git a/examples/slack-bot/README.md b/examples/slack-bot/README.md
deleted file mode 100644
index 23a5c884..00000000
--- a/examples/slack-bot/README.md
+++ /dev/null
@@ -1,27 +0,0 @@
-# Slack bot
-
-Slackbot using: https://github.com/seratch/ChatGPT-in-Slack
-
-## Setup
-
-```bash
-# Clone LocalAI
-git clone https://github.com/go-skynet/LocalAI
-
-cd LocalAI/examples/slack-bot
-
-git clone https://github.com/seratch/ChatGPT-in-Slack
-
-# (optional) Checkout a specific LocalAI tag
-# git checkout -b build <TAG>
-
-# Download gpt4all-j to models/
-wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
-
-# Set the Slack bot options (see: https://github.com/seratch/ChatGPT-in-Slack)
-cp -rfv .env.example .env
-vim .env
-
-# start with docker-compose
-docker-compose up -d --build
-```
\ No newline at end of file
diff --git a/examples/slack-bot/docker-compose.yaml b/examples/slack-bot/docker-compose.yaml
deleted file mode 100644
index 12cb70b0..00000000
--- a/examples/slack-bot/docker-compose.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-version: '3.6'
-
-services:
-  api:
-    image: quay.io/go-skynet/local-ai:latest
-    build:
-      context: ../../
-      dockerfile: Dockerfile
-    ports:
-      - 8080:8080
-    environment:
-      - DEBUG=true
-      - MODELS_PATH=/models
-    volumes:
-      - ./models:/models:cached
-    command: ["/usr/bin/local-ai" ]
-
-  bot:
-    build:
-     context: ./ChatGPT-in-Slack
-     dockerfile: Dockerfile
-    env_file:
-    - .env
diff --git a/examples/slack-bot/models b/examples/slack-bot/models
deleted file mode 120000
index 1e266b1b..00000000
--- a/examples/slack-bot/models
+++ /dev/null
@@ -1 +0,0 @@
-../models
\ No newline at end of file
diff --git a/examples/slack-qa-bot/.env.example b/examples/slack-qa-bot/.env.example
deleted file mode 100644
index 29c68dde..00000000
--- a/examples/slack-qa-bot/.env.example
+++ /dev/null
@@ -1,51 +0,0 @@
-# CPU .env docs: https://localai.io/howtos/easy-setup-docker-cpu/
-# GPU .env docs: https://localai.io/howtos/easy-setup-docker-gpu/
-
-# Create an app-level token with connections:write scope
-SLACK_APP_TOKEN=xapp-1-...
-# Install the app into your workspace to grab this token
-SLACK_BOT_TOKEN=xoxb-...
-
-# Set this to a random string, it doesn't matter, however if present the python library complains
-OPENAI_API_KEY=sk-foo-bar-baz
-
-# Optional: gpt-3.5-turbo and gpt-4 are currently supported (default: gpt-3.5-turbo)
-OPENAI_MODEL=gpt-3.5-turbo
-# Optional: You can adjust the timeout seconds for OpenAI calls (default: 30)
-OPENAI_TIMEOUT_SECONDS=560
-
-MEMORY_DIR=/tmp/memory_dir
-
-OPENAI_API_BASE=http://api:8080/v1
-
-EMBEDDINGS_MODEL_NAME=all-MiniLM-L6-v2
-
-## Repository and sitemap to index in the vector database on start
-SITEMAP="https://kairos.io/sitemap.xml"
-
-# Optional repository names.
-# REPOSITORIES="foo,bar"
-# # Define clone URL for "foo"
-# foo_CLONE_URL="http://github.com.."
-# bar_CLONE_URL="..."
-# # Define branch for foo
-# foo_BRANCH="master"
-# Optional token if scraping issues
-# GITHUB_PERSONAL_ACCESS_TOKEN=""
-# ISSUE_REPOSITORIES="go-skynet/LocalAI,foo/bar,..."
-
-# Optional: When the string is "true", this app translates ChatGPT prompts into a user's preferred language (default: true)
-USE_SLACK_LANGUAGE=true
-# Optional: Adjust the app's logging level (default: DEBUG)
-SLACK_APP_LOG_LEVEL=INFO
-# Optional: When the string is "true", translate between OpenAI markdown and Slack mrkdwn format (default: false)
-TRANSLATE_MARKDOWN=true
-
-
-### LocalAI
-
-DEBUG=true
-MODELS_PATH=/models
-IMAGE_PATH=/tmp
-# See: https://github.com/go-skynet/model-gallery
-PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}]
\ No newline at end of file
diff --git a/examples/slack-qa-bot/README.md b/examples/slack-qa-bot/README.md
deleted file mode 100644
index 7844d669..00000000
--- a/examples/slack-qa-bot/README.md
+++ /dev/null
@@ -1,23 +0,0 @@
-## Slack QA Bot 
-
-This example uses https://github.com/spectrocloud-labs/Slack-QA-bot to deploy a slack bot that can answer to your documentation!
-
-- Create a new Slack app using the manifest-dev.yml file
-- Install the app into your Slack workspace
-- Retrieve your slack keys and edit `.env`
-- Start the app
-
-```bash
-# Clone LocalAI
-git clone https://github.com/go-skynet/LocalAI
-
-cd LocalAI/examples/slack-qa-bot
-
-cp -rfv .env.example .env
-
-# Edit .env and add slackbot api keys, or repository settings to scan
-vim .env
-
-# run the bot
-docker-compose up
-```
diff --git a/examples/slack-qa-bot/deployment.yaml b/examples/slack-qa-bot/deployment.yaml
deleted file mode 100644
index 498e35d8..00000000
--- a/examples/slack-qa-bot/deployment.yaml
+++ /dev/null
@@ -1,97 +0,0 @@
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: slack-bot
----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: knowledgebase
-  namespace: slack-bot
-  labels:
-    app: localai-qabot
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 5Gi
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: localai-qabot
-  namespace: slack-bot
-  labels:
-    app: localai-qabot
-spec:
-  selector:
-    matchLabels:
-      app: localai-qabot
-  replicas: 1
-  template:
-    metadata:
-      labels:
-        app: localai-qabot
-      name: localai-qabot
-    spec:
-      containers:
-        - name: localai-qabot-slack
-          env:
-          - name: OPENAI_API_KEY
-            value: "x"
-          - name: SLACK_APP_TOKEN
-            value: "xapp-1-"
-          - name: SLACK_BOT_TOKEN
-            value: "xoxb-"
-          - name: OPENAI_MODEL
-            value: "gpt-3.5-turbo"
-          - name: OPENAI_TIMEOUT_SECONDS
-            value: "400"
-          - name: OPENAI_SYSTEM_TEXT
-            value: ""
-          - name: MEMORY_DIR
-            value: "/memory"
-          - name: TRANSLATE_MARKDOWN
-            value: "true"
-          - name: OPENAI_API_BASE
-            value: "http://local-ai.default.svc.cluster.local:8080"
-          - name: REPOSITORIES
-            value: "KAIROS,AGENT,SDK,OSBUILDER,PACKAGES,IMMUCORE"
-          - name: KAIROS_CLONE_URL
-            value: "https://github.com/kairos-io/kairos"
-          - name: KAIROS_BRANCH
-            value: "master"
-          - name: AGENT_CLONE_URL
-            value: "https://github.com/kairos-io/kairos-agent"
-          - name: AGENT_BRANCH
-            value: "main"
-          - name: SDK_CLONE_URL
-            value: "https://github.com/kairos-io/kairos-sdk"
-          - name: SDK_BRANCH
-            value: "main"
-          - name: OSBUILDER_CLONE_URL
-            value: "https://github.com/kairos-io/osbuilder"
-          - name: OSBUILDER_BRANCH
-            value: "master"
-          - name: PACKAGES_CLONE_URL
-            value: "https://github.com/kairos-io/packages"
-          - name: PACKAGES_BRANCH
-            value: "main"
-          - name: IMMUCORE_CLONE_URL
-            value: "https://github.com/kairos-io/immucore"
-          - name: IMMUCORE_BRANCH
-            value: "master"
-          - name: GITHUB_PERSONAL_ACCESS_TOKEN
-            value: ""
-          - name: ISSUE_REPOSITORIES
-            value: "kairos-io/kairos"
-          image: quay.io/spectrocloud-labs/slack-qa-local-bot:qa
-          imagePullPolicy: Always
-          volumeMounts:
-            - mountPath: "/memory"
-              name: knowledgebase
-      volumes:
-        - name: knowledgebase
-          persistentVolumeClaim:
-            claimName: knowledgebase
\ No newline at end of file
diff --git a/examples/slack-qa-bot/docker-compose.yml b/examples/slack-qa-bot/docker-compose.yml
deleted file mode 100644
index bef32023..00000000
--- a/examples/slack-qa-bot/docker-compose.yml
+++ /dev/null
@@ -1,30 +0,0 @@
-version: "3"
-
-services:
-  api:
-    image: quay.io/go-skynet/local-ai:latest
-    # As initially LocalAI will download the models defined in PRELOAD_MODELS
-    # you might need to tweak the healthcheck values here according to your network connection.
-    # Here we give a timespan of 20m to download all the required files.
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
-      interval: 1m
-      timeout: 20m
-      retries: 20
-    ports:
-      - 8080:8080
-    env_file:
-      - .env
-    volumes:
-      - ./models:/models:cached
-    command: ["/usr/bin/local-ai" ]
- 
-  slackbot:
-    image: quay.io/spectrocloud-labs/slack-qa-local-bot:qa
-    container_name: slackbot
-    restart: always
-    env_file:
-      - .env
-    depends_on:
-      api:
-        condition: service_healthy
diff --git a/examples/streamlit-bot/.gitignore b/examples/streamlit-bot/.gitignore
deleted file mode 100644
index 0fb83220..00000000
--- a/examples/streamlit-bot/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-installer_files
\ No newline at end of file
diff --git a/examples/streamlit-bot/LICENSE b/examples/streamlit-bot/LICENSE
deleted file mode 100644
index e6200f0f..00000000
--- a/examples/streamlit-bot/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2023 Manohar Joshi
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/examples/streamlit-bot/Main.py b/examples/streamlit-bot/Main.py
deleted file mode 100644
index 0063289a..00000000
--- a/examples/streamlit-bot/Main.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import streamlit as st
-import time
-import requests
-import json
-
-def ask(prompt):
-    url = 'http://localhost:8080/v1/chat/completions'
-    myobj = {
-        "model": "ggml-gpt4all-j.bin",
-        "messages": [{"role": "user", "content": prompt}],
-        "temperature": 0.9
-    }
-    myheaders = { "Content-Type" : "application/json" }  
-
-    x = requests.post(url, json = myobj, headers=myheaders)
-    
-    print(x.text)
-    
-    json_data = json.loads(x.text)
-
-    return json_data["choices"][0]["message"]["content"]
-
-
-def main():
-    # Page setup
-    st.set_page_config(page_title="Ask your LLM")
-    st.header("Ask your Question 💬")
-
-    # Initialize chat history
-    if "messages" not in st.session_state:
-        st.session_state.messages = []
-
-    # Display chat messages from history on app rerun
-    for message in st.session_state.messages:
-        with st.chat_message(message["role"]):
-            st.markdown(message["content"])
-
-    # Scroll to bottom
-    st.markdown(
-        """
-        <script>
-        var element = document.getElementById("end-of-chat");
-        element.scrollIntoView({behavior: "smooth"});
-        </script>
-        """,
-        unsafe_allow_html=True,
-    )   
-
-    # React to user input
-    if prompt := st.chat_input("What is up?"):
-        # Display user message in chat message container
-        st.chat_message("user").markdown(prompt)
-        # Add user message to chat history
-        st.session_state.messages.append({"role": "user", "content": prompt})
-        print(f"User has asked the following question: {prompt}")
-        
-        # Process
-        response = ""
-        with st.spinner('Processing...'):
-            response = ask(prompt)
-            
-        #response = f"Echo: {prompt}"
-        # Display assistant response in chat message container
-        with st.chat_message("assistant"):
-            st.markdown(response)
-        # Add assistant response to chat history
-        st.session_state.messages.append({"role": "assistant", "content": response})     
-
-if __name__ == "__main__":
-    main()        
\ No newline at end of file
diff --git a/examples/streamlit-bot/README.md b/examples/streamlit-bot/README.md
deleted file mode 100644
index 6588cbde..00000000
--- a/examples/streamlit-bot/README.md
+++ /dev/null
@@ -1,54 +0,0 @@
-## Streamlit bot
-
-![Screenshot](streamlit-bot.png)
-
-This is an example to deploy a Streamlit bot with LocalAI instead of OpenAI. Instructions are for Windows.
-
-```bash
-# Install & run Git Bash
-
-# Clone LocalAI
-git clone https://github.com/go-skynet/LocalAI.git
-cd LocalAI
-
-# (optional) Checkout a specific LocalAI tag
-# git checkout -b build <TAG>
-
-# Use a template from the examples
-cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/
-
-# (optional) Edit the .env file to set things like context size and threads
-# vim .env
-# Download model
-curl --progress-bar -C - -O https://gpt4all.io/models/ggml-gpt4all-j.bin > models/ggml-gpt4all-j.bin
-
-# Install & Run Docker Desktop for Windows
-https://www.docker.com/products/docker-desktop/
-
-# start with docker-compose
-docker-compose up -d --pull always
-# or you can build the images with:
-# docker-compose up -d --build
-# Now API is accessible at localhost:8080
-curl http://localhost:8080/v1/models
-# {"object":"list","data":[{"id":"ggml-gpt4all-j","object":"model"}]}
-
-curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-     "model": "ggml-gpt4all-j",
-     "messages": [{"role": "user", "content": "How are you?"}],
-     "temperature": 0.9
-   }'
-
-# {"model":"ggml-gpt4all-j","choices":[{"message":{"role":"assistant","content":"I'm doing well, thanks. How about you?"}}]}
-
-cd examples/streamlit-bot
-
-install_requirements.bat
-
-# run the bot
-start_windows.bat
-
-# UI will be launched automatically (http://localhost:8501/) in browser.
-
-```
-
diff --git a/examples/streamlit-bot/cmd_windows.bat b/examples/streamlit-bot/cmd_windows.bat
deleted file mode 100644
index 606ff485..00000000
--- a/examples/streamlit-bot/cmd_windows.bat
+++ /dev/null
@@ -1,31 +0,0 @@
-@echo off
-
-cd /D "%~dp0"
-
-set PATH=%PATH%;%SystemRoot%\system32
-
-echo "%CD%"| findstr /C:" " >nul && echo This script relies on Miniconda which can not be silently installed under a path with spaces. && goto end
-
-@rem fix failed install when installing to a separate drive
-set TMP=%cd%\installer_files
-set TEMP=%cd%\installer_files
-
-@rem config
-set CONDA_ROOT_PREFIX=%cd%\installer_files\conda
-set INSTALL_ENV_DIR=%cd%\installer_files\env
-
-@rem environment isolation
-set PYTHONNOUSERSITE=1
-set PYTHONPATH=
-set PYTHONHOME=
-set "CUDA_PATH=%INSTALL_ENV_DIR%"
-set "CUDA_HOME=%CUDA_PATH%"
-
-@rem activate installer env
-call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniconda hook not found. && goto end )
-
-@rem enter commands
-cmd /k "%*"
-
-:end
-pause
diff --git a/examples/streamlit-bot/install_requirements.bat b/examples/streamlit-bot/install_requirements.bat
deleted file mode 100644
index 534091ee..00000000
--- a/examples/streamlit-bot/install_requirements.bat
+++ /dev/null
@@ -1,81 +0,0 @@
-@echo off
-
-cd /D "%~dp0"
-
-set PATH=%PATH%;%SystemRoot%\system32
-
-echo "%CD%"| findstr /C:" " >nul && echo This script relies on Miniconda which can not be silently installed under a path with spaces. && goto end
-
-@rem Check for special characters in installation path
-set "SPCHARMESSAGE="WARNING: Special characters were detected in the installation path!" "         This can cause the installation to fail!""
-echo "%CD%"| findstr /R /C:"[!#\$%&()\*+,;<=>?@\[\]\^`{|}~]" >nul && (
-	call :PrintBigMessage %SPCHARMESSAGE%
-)
-set SPCHARMESSAGE=
-
-@rem fix failed install when installing to a separate drive
-set TMP=%cd%\installer_files
-set TEMP=%cd%\installer_files
-
-@rem config
-set INSTALL_DIR=%cd%\installer_files
-set CONDA_ROOT_PREFIX=%cd%\installer_files\conda
-set INSTALL_ENV_DIR=%cd%\installer_files\env
-set MINICONDA_DOWNLOAD_URL=https://repo.anaconda.com/miniconda/Miniconda3-py310_23.3.1-0-Windows-x86_64.exe
-set conda_exists=F
-
-@rem figure out whether git and conda needs to be installed
-call "%CONDA_ROOT_PREFIX%\_conda.exe" --version >nul 2>&1
-if "%ERRORLEVEL%" EQU "0" set conda_exists=T
-
-@rem (if necessary) install git and conda into a contained environment
-@rem download conda
-if "%conda_exists%" == "F" (
-	echo Downloading Miniconda from %MINICONDA_DOWNLOAD_URL% to %INSTALL_DIR%\miniconda_installer.exe
-
-	mkdir "%INSTALL_DIR%"
-	call curl -Lk "%MINICONDA_DOWNLOAD_URL%" > "%INSTALL_DIR%\miniconda_installer.exe" || ( echo. && echo Miniconda failed to download. && goto end )
-
-	echo Installing Miniconda to %CONDA_ROOT_PREFIX%
-	start /wait "" "%INSTALL_DIR%\miniconda_installer.exe" /InstallationType=JustMe /NoShortcuts=1 /AddToPath=0 /RegisterPython=0 /NoRegistry=1 /S /D=%CONDA_ROOT_PREFIX%
-
-	@rem test the conda binary
-	echo Miniconda version:
-	call "%CONDA_ROOT_PREFIX%\_conda.exe" --version || ( echo. && echo Miniconda not found. && goto end )
-)
-
-@rem create the installer env
-if not exist "%INSTALL_ENV_DIR%" (
-  echo Packages to install: %PACKAGES_TO_INSTALL%
-  call "%CONDA_ROOT_PREFIX%\_conda.exe" create --no-shortcuts -y -k --prefix "%INSTALL_ENV_DIR%" python=3.10 || ( echo. && echo Conda environment creation failed. && goto end )
-)
-
-@rem check if conda environment was actually created
-if not exist "%INSTALL_ENV_DIR%\python.exe" ( echo. && echo Conda environment is empty. && goto end )
-
-@rem environment isolation
-set PYTHONNOUSERSITE=1
-set PYTHONPATH=
-set PYTHONHOME=
-set "CUDA_PATH=%INSTALL_ENV_DIR%"
-set "CUDA_HOME=%CUDA_PATH%"
-
-@rem activate installer env
-call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniconda hook not found. && goto end )
-
-@rem setup installer env
-call pip install -r requirements.txt
-
-@rem below are functions for the script   next line skips these during normal execution
-goto end
-
-:PrintBigMessage
-echo. && echo.
-echo *******************************************************************
-for %%M in (%*) do echo * %%~M
-echo *******************************************************************
-echo. && echo.
-exit /b
-
-:end
-pause
\ No newline at end of file
diff --git a/examples/streamlit-bot/requirements.txt b/examples/streamlit-bot/requirements.txt
deleted file mode 100644
index fa8c4118..00000000
--- a/examples/streamlit-bot/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-streamlit==1.38.0
-requests
\ No newline at end of file
diff --git a/examples/streamlit-bot/start_windows.bat b/examples/streamlit-bot/start_windows.bat
deleted file mode 100644
index fd76ab15..00000000
--- a/examples/streamlit-bot/start_windows.bat
+++ /dev/null
@@ -1,81 +0,0 @@
-@echo off
-
-cd /D "%~dp0"
-
-set PATH=%PATH%;%SystemRoot%\system32
-
-echo "%CD%"| findstr /C:" " >nul && echo This script relies on Miniconda which can not be silently installed under a path with spaces. && goto end
-
-@rem Check for special characters in installation path
-set "SPCHARMESSAGE="WARNING: Special characters were detected in the installation path!" "         This can cause the installation to fail!""
-echo "%CD%"| findstr /R /C:"[!#\$%&()\*+,;<=>?@\[\]\^`{|}~]" >nul && (
-	call :PrintBigMessage %SPCHARMESSAGE%
-)
-set SPCHARMESSAGE=
-
-@rem fix failed install when installing to a separate drive
-set TMP=%cd%\installer_files
-set TEMP=%cd%\installer_files
-
-@rem config
-set INSTALL_DIR=%cd%\installer_files
-set CONDA_ROOT_PREFIX=%cd%\installer_files\conda
-set INSTALL_ENV_DIR=%cd%\installer_files\env
-set MINICONDA_DOWNLOAD_URL=https://repo.anaconda.com/miniconda/Miniconda3-py310_23.3.1-0-Windows-x86_64.exe
-set conda_exists=F
-
-@rem figure out whether git and conda needs to be installed
-call "%CONDA_ROOT_PREFIX%\_conda.exe" --version >nul 2>&1
-if "%ERRORLEVEL%" EQU "0" set conda_exists=T
-
-@rem (if necessary) install git and conda into a contained environment
-@rem download conda
-if "%conda_exists%" == "F" (
-	echo Downloading Miniconda from %MINICONDA_DOWNLOAD_URL% to %INSTALL_DIR%\miniconda_installer.exe
-
-	mkdir "%INSTALL_DIR%"
-	call curl -Lk "%MINICONDA_DOWNLOAD_URL%" > "%INSTALL_DIR%\miniconda_installer.exe" || ( echo. && echo Miniconda failed to download. && goto end )
-
-	echo Installing Miniconda to %CONDA_ROOT_PREFIX%
-	start /wait "" "%INSTALL_DIR%\miniconda_installer.exe" /InstallationType=JustMe /NoShortcuts=1 /AddToPath=0 /RegisterPython=0 /NoRegistry=1 /S /D=%CONDA_ROOT_PREFIX%
-
-	@rem test the conda binary
-	echo Miniconda version:
-	call "%CONDA_ROOT_PREFIX%\_conda.exe" --version || ( echo. && echo Miniconda not found. && goto end )
-)
-
-@rem create the installer env
-if not exist "%INSTALL_ENV_DIR%" (
-  echo Packages to install: %PACKAGES_TO_INSTALL%
-  call "%CONDA_ROOT_PREFIX%\_conda.exe" create --no-shortcuts -y -k --prefix "%INSTALL_ENV_DIR%" python=3.10 || ( echo. && echo Conda environment creation failed. && goto end )
-)
-
-@rem check if conda environment was actually created
-if not exist "%INSTALL_ENV_DIR%\python.exe" ( echo. && echo Conda environment is empty. && goto end )
-
-@rem environment isolation
-set PYTHONNOUSERSITE=1
-set PYTHONPATH=
-set PYTHONHOME=
-set "CUDA_PATH=%INSTALL_ENV_DIR%"
-set "CUDA_HOME=%CUDA_PATH%"
-
-@rem activate installer env
-call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniconda hook not found. && goto end )
-
-@rem setup installer env
-streamlit run Main.py
-
-@rem below are functions for the script   next line skips these during normal execution
-goto end
-
-:PrintBigMessage
-echo. && echo.
-echo *******************************************************************
-for %%M in (%*) do echo * %%~M
-echo *******************************************************************
-echo. && echo.
-exit /b
-
-:end
-pause
\ No newline at end of file
diff --git a/examples/streamlit-bot/streamlit-bot.png b/examples/streamlit-bot/streamlit-bot.png
deleted file mode 100644
index 7b69ba99..00000000
Binary files a/examples/streamlit-bot/streamlit-bot.png and /dev/null differ
diff --git a/examples/telegram-bot/README.md b/examples/telegram-bot/README.md
deleted file mode 100644
index d0ab0dfd..00000000
--- a/examples/telegram-bot/README.md
+++ /dev/null
@@ -1,30 +0,0 @@
-## Telegram bot
-
-![Screenshot from 2023-06-09 00-36-26](https://github.com/go-skynet/LocalAI/assets/2420543/e98b4305-fa2d-41cf-9d2f-1bb2d75ca902)
-
-This example uses a fork of [chatgpt-telegram-bot](https://github.com/karfly/chatgpt_telegram_bot) to deploy a telegram bot with LocalAI instead of OpenAI.
-
-```bash
-# Clone LocalAI
-git clone https://github.com/go-skynet/LocalAI
-
-cd LocalAI/examples/telegram-bot
-
-git clone https://github.com/mudler/chatgpt_telegram_bot
-
-cp -rf docker-compose.yml chatgpt_telegram_bot
-
-cd chatgpt_telegram_bot
-
-mv config/config.example.yml config/config.yml
-mv config/config.example.env config/config.env
-
-# Edit config/config.yml to set the telegram bot token
-vim config/config.yml
-
-# run the bot
-docker-compose --env-file config/config.env up --build
-```
-
-Note: LocalAI is configured to download `gpt4all-j` in place of `gpt-3.5-turbo` and `stablediffusion` for image generation at the first start. Download size is >6GB, if your network connection is slow, adapt the `docker-compose.yml` file healthcheck section accordingly (replace `20m`, for instance with `1h`, etc.). 
-To configure models manually, comment the `PRELOAD_MODELS` environment variable in the `docker-compose.yml` file and see for instance the [chatbot-ui-manual example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui-manual) `model` directory.
\ No newline at end of file
diff --git a/examples/telegram-bot/docker-compose.yml b/examples/telegram-bot/docker-compose.yml
deleted file mode 100644
index 297fae20..00000000
--- a/examples/telegram-bot/docker-compose.yml
+++ /dev/null
@@ -1,38 +0,0 @@
-version: "3"
-
-services:
-  api:
-    image: quay.io/go-skynet/local-ai:latest
-    # As initially LocalAI will download the models defined in PRELOAD_MODELS
-    # you might need to tweak the healthcheck values here according to your network connection.
-    # Here we give a timespan of 20m to download all the required files.
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
-      interval: 1m
-      timeout: 20m
-      retries: 20
-    ports:
-      - 8080:8080
-    environment:
-      - DEBUG=true
-      - MODELS_PATH=/models
-      - IMAGE_PATH=/tmp
-      # You can preload different models here as well.
-      # See: https://github.com/go-skynet/model-gallery
-      - 'PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}, {"url": "github:go-skynet/model-gallery/stablediffusion.yaml"}, {"url": "github:go-skynet/model-gallery/whisper-base.yaml", "name": "whisper-1"}]'
-    volumes:
-      - ./models:/models:cached
-    command: ["/usr/bin/local-ai"]
-  chatgpt_telegram_bot:
-    container_name: chatgpt_telegram_bot
-    command: python3 bot/bot.py
-    restart: always
-    environment:
-      - OPENAI_API_KEY=sk---anystringhere
-      - OPENAI_API_BASE=http://api:8080/v1
-    build:
-      context: "."
-      dockerfile: Dockerfile
-    depends_on:
-      api:
-        condition: service_healthy
diff --git a/gallery/arch-function.yaml b/gallery/arch-function.yaml
new file mode 100644
index 00000000..a527d0f7
--- /dev/null
+++ b/gallery/arch-function.yaml
@@ -0,0 +1,66 @@
+---
+name: "chatml"
+
+config_file: |
+  mmap: true
+  function:
+    disable_no_action: true
+    grammar:
+      mixed_mode: false
+      disable: true
+      parallel_calls: true
+      expect_strings_after_json: true
+    json_regex_match:
+    - "(?s)<tool_call>(.*?)</tool_call>"
+    - "(?s)<tool_call>(.*)"
+    capture_llm_results:
+      - (?s)<scratchpad>(.*?)</scratchpad>
+    replace_llm_results:
+      - key: (?s)<scratchpad>(.*?)</scratchpad>
+        value: ""
+  template:
+    chat_message: |
+      <|im_start|>{{ .RoleName }}
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content }}
+      {{ end -}}
+      {{ if .FunctionCall -}}
+      {{toJson .FunctionCall}}
+      {{ end -}}<|im_end|>
+    function: |
+      <|im_start|>system
+      # Tools
+
+      You may call one or more functions to assist with the user query.
+
+      You are provided with function signatures within <tools></tools> XML tags:
+      <tools>
+      {{range .Functions}}
+      {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+      {{end}}
+      </tools>
+      For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+      <tool_call>
+      {"name": <function-name>, "arguments": <args-json-object>}
+      </tool_call>
+      <|im_end|>
+      {{.Input -}}
+      <|im_start|>assistant
+    chat: |
+      {{.Input -}}
+      <|im_start|>assistant
+    completion: |
+      {{.Input}}
+  context_size: 4096
+  f16: true
+  stopwords:
+  - '<|im_end|>'
+  - '<dummy32000>'
+  - '</s>'
+  - "<|eot_id|>"
+  - "<|end_of_text|>"
diff --git a/gallery/bert-embeddings.yaml b/gallery/bert-embeddings.yaml
deleted file mode 100644
index 7ce61799..00000000
--- a/gallery/bert-embeddings.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
----
-name: "bert-embeddings"
-
-config_file: |
-    parameters:
-      model: bert-MiniLM-L6-v2q4_0.bin
-    backend: bert-embeddings
-    embeddings: true
-files:
-  - filename: "bert-MiniLM-L6-v2q4_0.bin"
-    sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad"
-    uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin"
diff --git a/gallery/deepseek-r1.yaml b/gallery/deepseek-r1.yaml
new file mode 100644
index 00000000..29ca9db1
--- /dev/null
+++ b/gallery/deepseek-r1.yaml
@@ -0,0 +1,23 @@
+---
+name: "deepseek-r1"
+
+config_file: |
+  context_size: 131072
+  mmap: true
+  f16: true
+  stopwords:
+    - <｜begin▁of▁sentence｜>
+    - <｜end▁of▁sentence｜>
+    - <｜User｜>
+    - <｜Assistant｜>
+  template:
+    chat_message: |
+      {{if eq .RoleName "system" -}}{{.Content }}
+      {{ end -}}
+      {{if eq .RoleName "user" -}}<｜User｜>{{.Content}}
+      {{end -}}
+      {{if eq .RoleName "assistant" -}}<｜Assistant｜>{{.Content}}<｜end▁of▁sentence｜>{{end}}
+    completion: |
+      {{.Input}}
+    chat: |
+      {{.Input -}}<｜Assistant｜>
diff --git a/gallery/falcon3.yaml b/gallery/falcon3.yaml
new file mode 100644
index 00000000..b6593f4b
--- /dev/null
+++ b/gallery/falcon3.yaml
@@ -0,0 +1,40 @@
+---
+name: "falcon3"
+
+config_file: |
+  mmap: true
+  template:
+    chat_message: |
+      <|{{ .RoleName }}|>
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content }}
+      {{ end -}}
+      {{ if .FunctionCall -}}
+      {{toJson .FunctionCall}}
+      {{ end -}}
+      {{ if eq .RoleName "assistant" }}<|endoftext|>{{ end }}
+    function: |
+      <|system|>
+      You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+      {{range .Functions}}
+      {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+      {{end}}
+      For each function call return a json object with function name and arguments
+      {{.Input }}
+      <|im_start|>assistant
+    chat: |
+      {{.Input }}
+      <|im_start|>assistant
+    completion: |
+      {{.Input}}
+  context_size: 4096
+  f16: true
+  stopwords:
+  - '<|endoftext|>'
+  - '<dummy32000>'
+  - '</s>'
diff --git a/gallery/flux-ggml.yaml b/gallery/flux-ggml.yaml
new file mode 100644
index 00000000..5738d584
--- /dev/null
+++ b/gallery/flux-ggml.yaml
@@ -0,0 +1,12 @@
+---
+name: "flux-ggml"
+
+config_file: |
+    backend: stablediffusion-ggml
+    step: 25
+    options:
+    - "diffusion_model"
+    - "clip_l_path:clip_l.safetensors"
+    - "t5xxl_path:t5xxl_fp16.safetensors"
+    - "vae_path:ae.safetensors"
+    - "sampler:euler"
diff --git a/gallery/flux.yaml b/gallery/flux.yaml
index bb75b53b..a859d801 100644
--- a/gallery/flux.yaml
+++ b/gallery/flux.yaml
@@ -11,4 +11,5 @@ config_file: |
     cuda: true
     enable_parameters: num_inference_steps
     pipeline_type: FluxPipeline
-    cfg_scale: 0
+
+  cfg_scale: 0
diff --git a/gallery/granite.yaml b/gallery/granite.yaml
new file mode 100644
index 00000000..465cca18
--- /dev/null
+++ b/gallery/granite.yaml
@@ -0,0 +1,40 @@
+---
+name: "granite"
+
+config_file: |
+  mmap: true
+  template:
+    chat_message: |
+      <|{{ .RoleName }}|>
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content }}
+      {{ end -}}
+      {{ if .FunctionCall -}}
+      {{toJson .FunctionCall}}
+      {{ end -}}
+    function: |
+      <|system|>
+      You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+      {{range .Functions}}
+      {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+      {{end}}
+      For each function call return a json object with function name and arguments
+      {{.Input -}}
+      <|assistant|>
+    chat: |
+      {{.Input -}}
+      <|assistant|>
+    completion: |
+      {{.Input}}
+  context_size: 4096
+  f16: true
+  stopwords:
+  - '<|im_end|>'
+  - '<dummy32000>'
+  - '</s>'
+  - '<|'
diff --git a/gallery/index.yaml b/gallery/index.yaml
index 229697bb..d55adda9 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,7 +1,3430 @@
 ---
-## SmolLM
-- &smollm
+- &phi4
+  url: "github:mudler/LocalAI/gallery/phi-4-chat.yaml@master"
+  name: "phi-4"
+  icon: https://avatars.githubusercontent.com/u/6154722
+  license: mit
+  tags:
+    - llm
+    - gguf
+    - phi
+    - cpu
+    - gpu
+    - text-generation
+  urls:
+    - https://huggingface.co/microsoft/phi-4
+    - https://huggingface.co/bartowski/phi-4-GGUF
+  description: |
+    phi-4 is a state-of-the-art open model built upon a blend of synthetic datasets, data from filtered public domain websites, and acquired academic books and Q&A datasets. The goal of this approach was to ensure that small capable models were trained with data focused on high quality and advanced reasoning.
+    phi-4 underwent a rigorous enhancement and alignment process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. Phi-4 is a 14B parameters, dense decoder-only Transformer model.
+  overrides:
+    parameters:
+      model: phi-4-Q4_K_M.gguf
+  files:
+    - filename: phi-4-Q4_K_M.gguf
+      uri: huggingface://bartowski/phi-4-GGUF/phi-4-Q4_K_M.gguf
+      sha256: 009aba717c09d4a35890c7d35eb59d54e1dba884c7c526e7197d9c13ab5911d9
+- !!merge <<: *phi4
+  url: "github:mudler/LocalAI/gallery/phi-4-chat-fcall.yaml@master"
+  name: "LocalAI-functioncall-phi-4-v0.3"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/Dzbdzn27KEc3K6zNNi070.png
+  urls:
+    - https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3
+    - https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF
+  description: |
+    A model tailored to be conversational and execute function calls with LocalAI. This model is based on phi-4.
+  overrides:
+    parameters:
+      model: localai-functioncall-phi-4-v0.3-q4_k_m.gguf
+  files:
+    - filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf
+      sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5
+      uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf
+- !!merge <<: *phi4
+  url: "github:mudler/LocalAI/gallery/phi-4-chat-fcall.yaml@master"
+  name: "LocalAI-functioncall-phi-4-v0.2"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/Dzbdzn27KEc3K6zNNi070.png
+  description: |
+    A model tailored to be conversational and execute function calls with LocalAI. This model is based on phi-4.
+    This is the second iteration of https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.1 with added CoT (o1) capabilities from the marco-o1 dataset.
+  urls:
+    - https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.2
+    - https://huggingface.co/mudler/localai-functioncall-phi-4-v0.2-Q4_K_M-GGUF
+  overrides:
+    parameters:
+      model: localai-functioncall-phi-4-v0.2-q4_k_m.gguf
+  files:
+    - filename: localai-functioncall-phi-4-v0.2-q4_k_m.gguf
+      uri: huggingface://mudler/localai-functioncall-phi-4-v0.2-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.2-q4_k_m.gguf
+      sha256: 681b5fb5070f23323a9cc8cbd1306b1c348c2f292041d3ba2335b26b071757b7
+- !!merge <<: *phi4
+  url: "github:mudler/LocalAI/gallery/phi-4-chat-fcall.yaml@master"
+  name: "LocalAI-functioncall-phi-4-v0.1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/Dzbdzn27KEc3K6zNNi070.png
+  description: |
+    A model tailored to be conversational and execute function calls with LocalAI. This model is based on phi-4.
+  urls:
+    - https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.1
+    - https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.1-Q4_K_M-GGUF
+  overrides:
+    parameters:
+      model: localai-functioncall-phi-4-v0.1-q4_k_m.gguf
+  files:
+    - filename: localai-functioncall-phi-4-v0.1-q4_k_m.gguf
+      uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.1-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.1-q4_k_m.gguf
+      sha256: 0ae4e5e4ba89c16c1e810285c5c8b84416fa67f8ed7c175aa0b6fc0a103017aa
+- &falcon3
+  name: "falcon3-1b-instruct"
+  url: "github:mudler/LocalAI/gallery/falcon3.yaml@master"
+  icon: https://huggingface.co/datasets/tiiuae/documentation-images/resolve/main/general/falco3-logo.png
+  urls:
+    - https://huggingface.co/tiiuae/Falcon3-1B-Instruct
+    - https://huggingface.co/bartowski/Falcon3-1B-Instruct-GGUF
+  description: |
+    Falcon3 family of Open Foundation Models is a set of pretrained and instruct LLMs ranging from 1B to 10B parameters.
+
+    This repository contains the Falcon3-1B-Instruct. It achieves strong results on reasoning, language understanding, instruction following, code and mathematics tasks. Falcon3-1B-Instruct supports 4 languages (English, French, Spanish, Portuguese) and a context length of up to 8K.
+  overrides:
+    parameters:
+      model: Falcon3-1B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Falcon3-1B-Instruct-Q4_K_M.gguf
+      uri: huggingface://bartowski/Falcon3-1B-Instruct-GGUF/Falcon3-1B-Instruct-Q4_K_M.gguf
+      sha256: 1c92013dac1ab6e703e787f3e0829ca03cc95311e4c113a77950d15ff6dea7b3
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - cpu
+    - falcon
+  license: falcon-llm
+- !!merge <<: *falcon3
+  name: "falcon3-3b-instruct"
+  urls:
+    - https://huggingface.co/tiiuae/Falcon3-3B-Instruct
+    - https://huggingface.co/bartowski/Falcon3-3B-Instruct-GGUF
+  overrides:
+    parameters:
+      model: Falcon3-3B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Falcon3-3B-Instruct-Q4_K_M.gguf
+      uri: huggingface://bartowski/Falcon3-3B-Instruct-GGUF/Falcon3-3B-Instruct-Q4_K_M.gguf
+      sha256: 6ea6cecba144fe5b711ca07ae4263ccdf6ee6419807a46220419189da8446557
+- !!merge <<: *falcon3
+  name: "falcon3-10b-instruct"
+  urls:
+    - https://huggingface.co/tiiuae/Falcon3-10B-Instruct
+    - https://huggingface.co/bartowski/Falcon3-10B-Instruct-GGUF
+  overrides:
+    parameters:
+      model: Falcon3-10B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Falcon3-10B-Instruct-Q4_K_M.gguf
+      uri: huggingface://bartowski/Falcon3-10B-Instruct-GGUF/Falcon3-10B-Instruct-Q4_K_M.gguf
+      sha256: 0a33327bd71e1788a8e9f17889824a17a65efd3f96a4b2a5e2bc6ff2f39b8241
+- !!merge <<: *falcon3
+  name: "falcon3-1b-instruct-abliterated"
+  urls:
+    - https://huggingface.co/huihui-ai/Falcon3-1B-Instruct-abliterated
+    - https://huggingface.co/bartowski/Falcon3-1B-Instruct-abliterated-GGUF
+  description: |
+    This is an uncensored version of tiiuae/Falcon3-1B-Instruct created with abliteration (see remove-refusals-with-transformers to know more about it).
+    This is a crude, proof-of-concept implementation to remove refusals from an LLM model without using TransformerLens.
+  overrides:
+    parameters:
+      model: Falcon3-1B-Instruct-abliterated-Q4_K_M.gguf
+  files:
+    - filename: Falcon3-1B-Instruct-abliterated-Q4_K_M.gguf
+      sha256: 416d15ce58334b7956818befb088d46c1e3e7153ebf2da2fb9769a5b1ff934a1
+      uri: huggingface://bartowski/Falcon3-1B-Instruct-abliterated-GGUF/Falcon3-1B-Instruct-abliterated-Q4_K_M.gguf
+- !!merge <<: *falcon3
+  name: "falcon3-3b-instruct-abliterated"
+  urls:
+    - https://huggingface.co/huihui-ai/Falcon3-3B-Instruct-abliterated
+    - https://huggingface.co/bartowski/Falcon3-3B-Instruct-abliterated-GGUF
+  description: |
+    This is an uncensored version of tiiuae/Falcon3-3B-Instruct created with abliteration (see remove-refusals-with-transformers to know more about it).
+    This is a crude, proof-of-concept implementation to remove refusals from an LLM model without using TransformerLens.
+  overrides:
+    parameters:
+      model: Falcon3-3B-Instruct-abliterated-Q4_K_M.gguf
+  files:
+    - filename: Falcon3-3B-Instruct-abliterated-Q4_K_M.gguf
+      sha256: 83773b77b0e34ef115f8a6508192e9f1d3426a61456744493f65cfe1e7f90aa9
+      uri: huggingface://bartowski/Falcon3-3B-Instruct-abliterated-GGUF/Falcon3-3B-Instruct-abliterated-Q4_K_M.gguf
+- !!merge <<: *falcon3
+  name: "falcon3-10b-instruct-abliterated"
+  urls:
+    - https://huggingface.co/huihui-ai/Falcon3-10B-Instruct-abliterated
+    - https://huggingface.co/bartowski/Falcon3-10B-Instruct-abliterated-GGUF
+  description: |
+    This is an uncensored version of tiiuae/Falcon3-10B-Instruct created with abliteration (see remove-refusals-with-transformers to know more about it).
+    This is a crude, proof-of-concept implementation to remove refusals from an LLM model without using TransformerLens.
+  overrides:
+    parameters:
+      model: Falcon3-10B-Instruct-abliterated-Q4_K_M.gguf
+  files:
+    - filename: Falcon3-10B-Instruct-abliterated-Q4_K_M.gguf
+      sha256: 5940df2ff88e5be93dbe0766b2a9683d7e73c204a69a1348a37f835cf2b5f767
+      uri: huggingface://bartowski/Falcon3-10B-Instruct-abliterated-GGUF/Falcon3-10B-Instruct-abliterated-Q4_K_M.gguf
+- !!merge <<: *falcon3
+  name: "falcon3-7b-instruct-abliterated"
+  urls:
+    - https://huggingface.co/huihui-ai/Falcon3-7B-Instruct-abliterated
+    - https://huggingface.co/bartowski/Falcon3-7B-Instruct-abliterated-GGUF
+  description: |
+    This is an uncensored version of tiiuae/Falcon3-7B-Instruct created with abliteration (see remove-refusals-with-transformers to know more about it).
+    This is a crude, proof-of-concept implementation to remove refusals from an LLM model without using TransformerLens.
+  overrides:
+    parameters:
+      model: Falcon3-7B-Instruct-abliterated-Q4_K_M.gguf
+  files:
+    - filename: Falcon3-7B-Instruct-abliterated-Q4_K_M.gguf
+      sha256: 68e10e638668acaa49fb7919224c7d8bcf1798126c7a499c4d9ec3b81313f8c8
+      uri: huggingface://bartowski/Falcon3-7B-Instruct-abliterated-GGUF/Falcon3-7B-Instruct-abliterated-Q4_K_M.gguf
+- !!merge <<: *falcon3
+  name: "nightwing3-10b-v0.1"
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/C6gY9vxCl3_SFzQLpLG0S.png
+  urls:
+    - https://huggingface.co/Nitral-AI/NightWing3-10B-v0.1
+    - https://huggingface.co/bartowski/NightWing3-10B-v0.1-GGUF
+  description: |
+    Base model: (Falcon3-10B)
+  overrides:
+    parameters:
+      model: NightWing3-10B-v0.1-Q4_K_M.gguf
+  files:
+    - filename: NightWing3-10B-v0.1-Q4_K_M.gguf
+      sha256: 2e87671542d22fe1ef9a68e43f2fdab7c2759479ad531946d9f0bdeffa6f5747
+      uri: huggingface://bartowski/NightWing3-10B-v0.1-GGUF/NightWing3-10B-v0.1-Q4_K_M.gguf
+- !!merge <<: *falcon3
+  name: "virtuoso-lite"
+  urls:
+    - https://huggingface.co/arcee-ai/Virtuoso-Lite
+    - https://huggingface.co/bartowski/Virtuoso-Lite-GGUF
+  description: |
+    Virtuoso-Lite (10B) is our next-generation, 10-billion-parameter language model based on the Llama-3 architecture. It is distilled from Deepseek-v3 using ~1.1B tokens/logits, allowing it to achieve robust performance at a significantly reduced parameter count compared to larger models. Despite its compact size, Virtuoso-Lite excels in a variety of tasks, demonstrating advanced reasoning, code generation, and mathematical problem-solving capabilities.
+  overrides:
+    parameters:
+      model: Virtuoso-Lite-Q4_K_M.gguf
+  files:
+    - filename: Virtuoso-Lite-Q4_K_M.gguf
+      sha256: 1d21bef8467a11a1e473d397128b05fb87b7e824606cdaea061e550cb219fee2
+      uri: huggingface://bartowski/Virtuoso-Lite-GGUF/Virtuoso-Lite-Q4_K_M.gguf
+- !!merge <<: *falcon3
+  name: "suayptalha_maestro-10b"
+  icon: https://huggingface.co/suayptalha/Maestro-10B/resolve/main/Maestro-Logo.png
+  urls:
+    - https://huggingface.co/suayptalha/Maestro-10B
+    - https://huggingface.co/bartowski/suayptalha_Maestro-10B-GGUF
+  description: |
+     Maestro-10B is a 10 billion parameter model fine-tuned from Virtuoso-Lite, a next-generation language model developed by arcee-ai. Virtuoso-Lite itself is based on the Llama-3 architecture, distilled from Deepseek-v3 using approximately 1.1 billion tokens/logits. This distillation process allows Virtuoso-Lite to achieve robust performance with a smaller parameter count, excelling in reasoning, code generation, and mathematical problem-solving. Maestro-10B inherits these strengths from its base model, Virtuoso-Lite, and further enhances them through fine-tuning on the OpenOrca dataset. This combination of a distilled base model and targeted fine-tuning makes Maestro-10B a powerful and efficient language model.
+  overrides:
+    parameters:
+      model: suayptalha_Maestro-10B-Q4_K_M.gguf
+  files:
+    - filename: suayptalha_Maestro-10B-Q4_K_M.gguf
+      sha256: c570381da5624782ce6df4186ace6f747429fcbaf1a22c2a348288d3552eb19c
+      uri: huggingface://bartowski/suayptalha_Maestro-10B-GGUF/suayptalha_Maestro-10B-Q4_K_M.gguf
+- &intellect1
+  name: "intellect-1-instruct"
+  url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
+  icon: https://huggingface.co/PrimeIntellect/INTELLECT-1-Instruct/resolve/main/intellect-1-map.png
+  urls:
+    - https://huggingface.co/PrimeIntellect/INTELLECT-1-Instruct
+    - https://huggingface.co/bartowski/INTELLECT-1-Instruct-GGUF
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - cpu
+    - intellect
+  license: apache-2.0
+  description: |
+    INTELLECT-1 is the first collaboratively trained 10 billion parameter language model trained from scratch on 1 trillion tokens of English text and code.
+    This is an instruct model. The base model associated with it is INTELLECT-1.
+    INTELLECT-1 was trained on up to 14 concurrent nodes distributed across 3 continents, with contributions from 30 independent community contributors providing compute. The training code utilizes the prime framework, a scalable distributed training framework designed for fault-tolerant, dynamically scaling, high-perfomance training on unreliable, globally distributed workers. The key abstraction that allows dynamic scaling is the ElasticDeviceMesh which manages dynamic global process groups for fault-tolerant communication across the internet and local process groups for communication within a node. The model was trained using the DiLoCo algorithms with 100 inner steps. The global all-reduce was done with custom int8 all-reduce kernels to reduce the communication payload required, greatly reducing the communication overhead by a factor 400x.
+  overrides:
+    parameters:
+      model: INTELLECT-1-Instruct-Q4_K_M.gguf
+  files:
+    - filename: INTELLECT-1-Instruct-Q4_K_M.gguf
+      sha256: 5df236fe570e5998d07fb3207788eac811ef3b77dd2a0ad04a2ef5c6361f3030
+      uri: huggingface://bartowski/INTELLECT-1-Instruct-GGUF/INTELLECT-1-Instruct-Q4_K_M.gguf
+- &llama33
+  url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
+  icon: https://avatars.githubusercontent.com/u/153379578
+  license: llama3.3
+  description: |
+    The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - cpu
+    - llama3.3
+  name: "llama-3.3-70b-instruct"
+  urls:
+    - https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
+    - https://huggingface.co/MaziyarPanahi/Llama-3.3-70B-Instruct-GGUF
+  overrides:
+    parameters:
+      model: Llama-3.3-70B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: Llama-3.3-70B-Instruct.Q4_K_M.gguf
+      sha256: 4f3b04ecae278bdb0fd545b47c210bc5edf823e5ebf7d41e0b526c81d54b1ff3
+      uri: huggingface://MaziyarPanahi/Llama-3.3-70B-Instruct-GGUF/Llama-3.3-70B-Instruct.Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "l3.3-70b-euryale-v2.3"
+  icon: https://huggingface.co/Sao10K/L3.3-70B-Euryale-v2.3/resolve/main/Eury.png
+  urls:
+    - https://huggingface.co/Sao10K/L3.3-70B-Euryale-v2.3
+    - https://huggingface.co/bartowski/L3.3-70B-Euryale-v2.3-GGUF
+  description: |
+    A direct replacement / successor to Euryale v2.2, not Hanami-x1, though it is slightly better than them in my opinion.
+  overrides:
+    parameters:
+      model: L3.3-70B-Euryale-v2.3-Q4_K_M.gguf
+  files:
+    - filename: L3.3-70B-Euryale-v2.3-Q4_K_M.gguf
+      sha256: 4e78bb0e65886bfcff89b829f6d38aa6f6846988bb8291857e387e3f60b3217b
+      uri: huggingface://bartowski/L3.3-70B-Euryale-v2.3-GGUF/L3.3-70B-Euryale-v2.3-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "l3.3-ms-evayale-70b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/HFCaVzRpiE05Y46p41qRy.webp
+  urls:
+    - https://huggingface.co/Steelskull/L3.3-MS-Evayale-70B
+    - https://huggingface.co/bartowski/L3.3-MS-Evayale-70B-GGUF
+  description: |
+    This model was created as I liked the storytelling of EVA but the prose and details of scenes from EURYALE, my goal is to merge the robust storytelling of both models while attempting to maintain the positives of both models.
+  overrides:
+    parameters:
+      model: L3.3-MS-Evayale-70B-Q4_K_M.gguf
+  files:
+    - filename: L3.3-MS-Evayale-70B-Q4_K_M.gguf
+      sha256: f941d88870fec8343946517a1802d159d23f3971eeea50b6cf12295330bd29cc
+      uri: huggingface://bartowski/L3.3-MS-Evayale-70B-GGUF/L3.3-MS-Evayale-70B-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "anubis-70b-v1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/qQbZvnrWYvH8dMZORLBJn.webp
+  urls:
+    - https://huggingface.co/TheDrummer/Anubis-70B-v1
+    - https://huggingface.co/bartowski/Anubis-70B-v1-GGUF
+  description: |
+    It's a very balanced model between the L3.3 tunes. It's very creative, able to come up with new and interesting scenarios on your own that will thoroughly surprise you in ways that remind me of a 123B model. It has some of the most natural sounding dialogue and prose can come out of any model I've tried with the right swipe, in a way that truly brings your characters and RP to life that makes you feel like you're talking to a human writer instead of an AI - a quality that reminds me of Character AI in its prime. This model loves a great prompt and thrives off instructions.
+  overrides:
+    parameters:
+      model: Anubis-70B-v1-Q4_K_M.gguf
+  files:
+    - filename: Anubis-70B-v1-Q4_K_M.gguf
+      sha256: 9135f7090c675726469bd3a108cfbdddaa18638bad8e513928410de4b8bfd4d4
+      uri: huggingface://bartowski/Anubis-70B-v1-GGUF/Anubis-70B-v1-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "llama-3.3-70b-instruct-ablated"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6587d8dd1b44d0e694104fbf/0dkt6EhZYwXVBxvSWXdaM.png
+  urls:
+    - https://huggingface.co/NaniDAO/Llama-3.3-70B-Instruct-ablated
+    - https://huggingface.co/bartowski/Llama-3.3-70B-Instruct-ablated-GGUF
+  description: |
+    Llama 3.3 instruct 70B 128k context with ablation technique applied for a more helpful (and based) assistant.
+
+    This means it will refuse less of your valid requests for an uncensored UX. Use responsibly and use common sense.
+
+    We do not take any responsibility for how you apply this intelligence, just as we do not for how you apply your own.
+  overrides:
+    parameters:
+      model: Llama-3.3-70B-Instruct-ablated-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.3-70B-Instruct-ablated-Q4_K_M.gguf
+      sha256: 090b2288810c5f6f680ff5cb4bc97665393d115c011fcd54dca6aec02e74a983
+      uri: huggingface://bartowski/Llama-3.3-70B-Instruct-ablated-GGUF/Llama-3.3-70B-Instruct-ablated-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "l3.3-ms-evalebis-70b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/e49ykknqXee3Ihr-3BIl_.png
+  urls:
+    - https://huggingface.co/Steelskull/L3.3-MS-Evalebis-70b
+    - https://huggingface.co/bartowski/L3.3-MS-Evalebis-70b-GGUF
+  description: |
+    This model was created as I liked the storytelling of EVA, the prose and details of scenes from EURYALE and Anubis, my goal is to merge the robust storytelling of all three models while attempting to maintain the positives of the models.
+  overrides:
+    parameters:
+      model: L3.3-MS-Evalebis-70b-Q4_K_M.gguf
+  files:
+    - filename: L3.3-MS-Evalebis-70b-Q4_K_M.gguf
+      sha256: 5515110ab6a583f6eb360533e3c5b3dda6d402af407c0b0f2b34a2a57b5224d5
+      uri: huggingface://bartowski/L3.3-MS-Evalebis-70b-GGUF/L3.3-MS-Evalebis-70b-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "rombos-llm-70b-llama-3.3"
+  icon: "https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/QErypCEKD5OZLxUcSmYaR.jpeg"
+  urls:
+    - https://huggingface.co/rombodawg/Rombos-LLM-70b-Llama-3.3
+    - https://huggingface.co/bartowski/Rombos-LLM-70b-Llama-3.3-GGUF
+    - https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing
+  description: |
+    You know the drill by now.
+    Here is the paper. Have fun.
+    https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing
+  overrides:
+    parameters:
+      model: Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf
+  files:
+    - filename: Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf
+      uri: huggingface://bartowski/Rombos-LLM-70b-Llama-3.3-GGUF/Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf
+      sha256: 613008b960f6fff346b5dec71a87cd7ecdaff205bfea6332bd8fe2bb46177352
+- !!merge <<: *llama33
+  name: "70b-l3.3-cirrus-x1"
+  icon: https://huggingface.co/Sao10K/70B-L3.3-Cirrus-x1/resolve/main/venti.png
+  urls:
+    - https://huggingface.co/Sao10K/70B-L3.3-Cirrus-x1
+    - https://huggingface.co/bartowski/70B-L3.3-Cirrus-x1-GGUF
+  description: |
+    - Same data composition as Freya, applied differently, trained longer too.
+    - Merging with its checkpoints was also involved.
+    - Has a nice style, with occasional issues that can be easily fixed.
+    - A more stable version compared to previous runs.
+  overrides:
+    parameters:
+      model: 70B-L3.3-Cirrus-x1-Q4_K_M.gguf
+  files:
+    - filename: 70B-L3.3-Cirrus-x1-Q4_K_M.gguf
+      sha256: 07dd464dddba959df8eb2f937787c2210b4c51c2375bd7c7ab2abbe198142a19
+      uri: huggingface://bartowski/70B-L3.3-Cirrus-x1-GGUF/70B-L3.3-Cirrus-x1-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "negative_llama_70b"
+  icon: https://huggingface.co/SicariusSicariiStuff/Negative_LLAMA_70B/resolve/main/Images/Negative_LLAMA_70B.png
+  urls:
+    - https://huggingface.co/SicariusSicariiStuff/Negative_LLAMA_70B
+    - https://huggingface.co/bartowski/Negative_LLAMA_70B-GGUF
+  description: |
+    - Strong Roleplay & Creative writing abilities.
+    - Less positivity bias.
+    - Very smart assistant with low refusals.
+    - Exceptionally good at following the character card.
+    - Characters feel more 'alive', and will occasionally initiate stuff on their own (without being prompted to, but fitting to their character).
+    - Strong ability to comprehend and roleplay uncommon physical and mental characteristics.
+  overrides:
+    parameters:
+      model: Negative_LLAMA_70B-Q4_K_M.gguf
+  files:
+    - filename: Negative_LLAMA_70B-Q4_K_M.gguf
+      sha256: 023c6bd38f6a66178529e6bb77b6e76379ae3ee031adc6885531986aa12750d9
+      uri: huggingface://bartowski/Negative_LLAMA_70B-GGUF/Negative_LLAMA_70B-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "negative-anubis-70b-v1"
+  icon: https://huggingface.co/knifeayumu/Negative-Anubis-70B-v1/resolve/main/Negative-Anubis.png
+  urls:
+    - https://huggingface.co/knifeayumu/Negative-Anubis-70B-v1
+    - https://huggingface.co/bartowski/Negative-Anubis-70B-v1-GGUF
+  description: |
+    Enjoyed SicariusSicariiStuff/Negative_LLAMA_70B but the prose was too dry for my tastes. So I merged it with TheDrummer/Anubis-70B-v1 for verbosity. Anubis has positivity bias so Negative could balance things out.
+
+    This is a merge of pre-trained language models created using mergekit.
+
+    The following models were included in the merge:
+    SicariusSicariiStuff/Negative_LLAMA_70B
+    TheDrummer/Anubis-70B-v1
+  overrides:
+    parameters:
+      model: Negative-Anubis-70B-v1-Q4_K_M.gguf
+  files:
+    - filename: Negative-Anubis-70B-v1-Q4_K_M.gguf
+      sha256: ac088da9ca70fffaa70c876fbada9fc5a02e7d6049ef68f16b11a9c3256f2510
+      uri: huggingface://bartowski/Negative-Anubis-70B-v1-GGUF/Negative-Anubis-70B-v1-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "l3.3-ms-nevoria-70b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/dtlCF4LbekmDD2y3LNpdH.jpeg
+  urls:
+    - https://huggingface.co/Steelskull/L3.3-MS-Nevoria-70b
+    - https://huggingface.co/bartowski/L3.3-MS-Nevoria-70b-GGUF
+  description: |
+    This model was created as I liked the storytelling of EVA, the prose and details of scenes from EURYALE and Anubis, enhanced with Negative_LLAMA to kill off the positive bias with a touch of nemotron sprinkeled in.
+
+    The choice to use the lorablated model as a base was intentional - while it might seem counterintuitive, this approach creates unique interactions between the weights, similar to what was achieved in the original Astoria model and Astoria V2 model . Rather than simply removing refusals, this "weight twisting" effect that occurs when subtracting the lorablated base model from the other models during the merge process creates an interesting balance in the final model's behavior. While this approach differs from traditional sequential application of components, it was chosen for its unique characteristics in the model's responses.
+  overrides:
+    parameters:
+      model: L3.3-MS-Nevoria-70b-Q4_K_M.gguf
+  files:
+    - filename: L3.3-MS-Nevoria-70b-Q4_K_M.gguf
+      sha256: e8b0763f263089a19d4b112b7ed5085cc5f1ed9ca49c5085baa8d51f4ded1f94
+      uri: huggingface://bartowski/L3.3-MS-Nevoria-70b-GGUF/L3.3-MS-Nevoria-70b-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "l3.3-70b-magnum-v4-se"
+  urls:
+    - https://huggingface.co/Doctor-Shotgun/L3.3-70B-Magnum-v4-SE
+    - https://huggingface.co/bartowski/L3.3-70B-Magnum-v4-SE-GGUF
+  description: |
+    The Magnum v4 series is complete, but here's something a little extra I wanted to tack on as I wasn't entirely satisfied with the results of v4 72B. "SE" for Special Edition - this model is finetuned from meta-llama/Llama-3.3-70B-Instruct as an rsLoRA adapter. The dataset is a slightly revised variant of the v4 data with some elements of the v2 data re-introduced.
+
+    The objective, as with the other Magnum models, is to emulate the prose style and quality of the Claude 3 Sonnet/Opus series of models on a local scale, so don't be surprised to see "Claude-isms" in its output.
+  overrides:
+    parameters:
+      model: L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf
+  files:
+    - filename: L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf
+      sha256: 9724a6364a42caa3d5a1687258eb329c9af6cbb2ce01c8dd556c1a222a2e0352
+      uri: huggingface://bartowski/L3.3-70B-Magnum-v4-SE-GGUF/L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "l3.3-prikol-70b-v0.2"
+  icon: https://files.catbox.moe/x9t3zo.png
+  urls:
+    - https://huggingface.co/Nohobby/L3.3-Prikol-70B-v0.2
+    - https://huggingface.co/bartowski/L3.3-Prikol-70B-v0.2-GGUF
+  description: |
+    A merge of some Llama 3.3 models because um uh yeah
+
+    Went extra schizo on the recipe, hoping for an extra fun result, and... Well, I guess it's an overall improvement over the previous revision. It's a tiny bit smarter, has even more distinct swipes and nice dialogues, but for some reason it's damn sloppy.
+
+    I've published the second step of this merge as a separate model, and I'd say the results are more interesting, but not as usable as this one. https://huggingface.co/Nohobby/AbominationSnowPig
+
+    Prompt format: Llama3 OR Llama3 Context and ChatML Instruct. It actually works a bit better this way
+  overrides:
+    parameters:
+      model: L3.3-Prikol-70B-v0.2-Q4_K_M.gguf
+  files:
+    - filename: L3.3-Prikol-70B-v0.2-Q4_K_M.gguf
+      sha256: fc0ff514efbc0b67981c2bf1423d5a2e1b8801e4266ba0c653ea148414fe5ffc
+      uri: huggingface://bartowski/L3.3-Prikol-70B-v0.2-GGUF/L3.3-Prikol-70B-v0.2-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "l3.3-nevoria-r1-70b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/_oWpsvCZ-graNKzJBBjGo.jpeg
+  urls:
+    - https://huggingface.co/Steelskull/L3.3-Nevoria-R1-70b
+    - https://huggingface.co/bartowski/L3.3-Nevoria-R1-70b-GGUF
+  description: |
+    This model builds upon the original Nevoria foundation, incorporating the Deepseek-R1 reasoning architecture to enhance dialogue interaction and scene comprehension. While maintaining Nevoria's core strengths in storytelling and scene description (derived from EVA, EURYALE, and Anubis), this iteration aims to improve prompt adherence and creative reasoning capabilities. The model also retains the balanced perspective introduced by Negative_LLAMA and Nemotron elements. Also, the model plays the card to almost a fault, It'll pick up on minor issues and attempt to run with them. Users had it call them out for misspelling a word while playing in character.
+
+    Note: While Nevoria-R1 represents a significant architectural change, rather than a direct successor to Nevoria, it operates as a distinct model with its own characteristics.
+
+    The lorablated model base choice was intentional, creating unique weight interactions similar to the original Astoria model and Astoria V2 model. This "weight twisting" effect, achieved by subtracting the lorablated base model during merging, creates an interesting balance in the model's behavior. While unconventional compared to sequential component application, this approach was chosen for its unique response characteristics.
+  overrides:
+    parameters:
+      model: L3.3-Nevoria-R1-70b-Q4_K_M.gguf
+  files:
+    - filename: L3.3-Nevoria-R1-70b-Q4_K_M.gguf
+      sha256: 9f32f202fb5b1465c942693bb11eea9e8a1c5686b00602715b495c068eaf1c58
+      uri: huggingface://bartowski/L3.3-Nevoria-R1-70b-GGUF/L3.3-Nevoria-R1-70b-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "nohobby_l3.3-prikol-70b-v0.4"
+  icon: https://files.catbox.moe/x9t3zo.png
+  urls:
+    - https://huggingface.co/Nohobby/L3.3-Prikol-70B-v0.4
+    - https://huggingface.co/bartowski/Nohobby_L3.3-Prikol-70B-v0.4-GGUF
+  description: |
+    I have yet to try it UPD: it sucks, bleh
+
+    Sometimes mistakes {{user}} for {{char}} and can't think. Other than that, the behavior is similar to the predecessors.
+
+    It sometimes gives some funny replies tho, yay!
+  overrides:
+    parameters:
+      model: Nohobby_L3.3-Prikol-70B-v0.4-Q4_K_M.gguf
+  files:
+    - filename: Nohobby_L3.3-Prikol-70B-v0.4-Q4_K_M.gguf
+      sha256: e1d67a40bdf0526bdfcaa16c6e4dfeecad41651e201b4009b65f4f444b773604
+      uri: huggingface://bartowski/Nohobby_L3.3-Prikol-70B-v0.4-GGUF/Nohobby_L3.3-Prikol-70B-v0.4-Q4_K_M.gguf
+- &rwkv
+  url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
+  name: "rwkv-6-world-7b"
+  icon: https://avatars.githubusercontent.com/u/132652788
+  license: apache-2.0
+  urls:
+    - https://huggingface.co/RWKV/rwkv-6-world-7b
+    - https://huggingface.co/bartowski/rwkv-6-world-7b-GGUF
+  tags:
+    - llm
+    - rwkv
+    - cpu
+    - gpu
+    - rnn
+  description: |
+    RWKV (pronounced RwaKuv) is an RNN with GPT-level LLM performance, and can also be directly trained like a GPT transformer (parallelizable). We are at RWKV-7.
+    So it's combining the best of RNN and transformer - great performance, fast inference, fast training, saves VRAM, "infinite" ctxlen, and free text embedding. Moreover it's 100% attention-free, and a Linux Foundation AI project.
+  overrides:
+    parameters:
+      model: rwkv-6-world-7b-Q4_K_M.gguf
+  files:
+    - filename: rwkv-6-world-7b-Q4_K_M.gguf
+      sha256: f74574186fa4584f405e92198605680db6ad00fd77974ffa14bf02073bb90273
+      uri: huggingface://bartowski/rwkv-6-world-7b-GGUF/rwkv-6-world-7b-Q4_K_M.gguf
+- &qwen25coder
+  name: "qwen2.5-coder-14b"
+  icon: https://avatars.githubusercontent.com/u/141221163
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  license: apache-2.0
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - qwen
+    - qwen2.5
+    - cpu
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-Coder-14B
+    - https://huggingface.co/mradermacher/Qwen2.5-Coder-14B-GGUF
+  description: |
+    Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). As of now, Qwen2.5-Coder has covered six mainstream model sizes, 0.5, 1.5, 3, 7, 14, 32 billion parameters, to meet the needs of different developers. Qwen2.5-Coder brings the following improvements upon CodeQwen1.5:
+
+        Significantly improvements in code generation, code reasoning and code fixing. Base on the strong Qwen2.5, we scale up the training tokens into 5.5 trillion including source code, text-code grounding, Synthetic data, etc. Qwen2.5-Coder-32B has become the current state-of-the-art open-source codeLLM, with its coding abilities matching those of GPT-4o.
+        A more comprehensive foundation for real-world applications such as Code Agents. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies.
+        Long-context Support up to 128K tokens.
+  overrides:
+    parameters:
+      model: Qwen2.5-Coder-14B.Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Coder-14B.Q4_K_M.gguf
+      sha256: 94f277a9ac7caf117140b2fff4e1ccf4bc9f35395b0112f0d0d7c82c6f8d860e
+      uri: huggingface://mradermacher/Qwen2.5-Coder-14B-GGUF/Qwen2.5-Coder-14B.Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "qwen2.5-coder-3b-instruct"
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct
+    - https://huggingface.co/bartowski/Qwen2.5-Coder-3B-Instruct-GGUF
+  overrides:
+    parameters:
+      model: Qwen2.5-Coder-3B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Coder-3B-Instruct-Q4_K_M.gguf
+      sha256: 3da3afe6cf5c674ac195803ea0dd6fee7e1c228c2105c1ce8c66890d1d4ab460
+      uri: huggingface://bartowski/Qwen2.5-Coder-3B-Instruct-GGUF/Qwen2.5-Coder-3B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "qwen2.5-coder-32b-instruct"
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct
+    - https://huggingface.co/bartowski/Qwen2.5-Coder-32B-Instruct-GGUF
+  overrides:
+    parameters:
+      model: Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf
+      sha256: 8e2fd78ff55e7cdf577fda257bac2776feb7d73d922613caf35468073807e815
+      uri: huggingface://bartowski/Qwen2.5-Coder-32B-Instruct-GGUF/Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "qwen2.5-coder-14b-instruct"
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct
+    - https://huggingface.co/bartowski/Qwen2.5-Coder-14B-Instruct-GGUF
+  overrides:
+    parameters:
+      model: Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf
+      sha256: 2946d28c9e1bb2bcae6d42e8678863a31775df6f740315c7d7e6d6b6411f5937
+      uri: huggingface://bartowski/Qwen2.5-Coder-14B-Instruct-GGUF/Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "qwen2.5-coder-1.5b-instruct"
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct
+    - https://huggingface.co/bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF
+  overrides:
+    parameters:
+      model: Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf
+      sha256: f530705d447660a4336c329981af164b471b60b974b1d808d57e8ec9fe23b239
+      uri: huggingface://bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF/Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "qwen2.5-coder-7b-instruct"
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct
+    - https://huggingface.co/bartowski/Qwen2.5-Coder-7B-Instruct-GGUF
+  overrides:
+    parameters:
+      model: Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf
+      sha256: 1664fccab734674a50763490a8c6931b70e3f2f8ec10031b54806d30e5f956b6
+      uri: huggingface://bartowski/Qwen2.5-Coder-7B-Instruct-GGUF/Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "qwen2.5-coder-7b-3x-instruct-ties-v1.2-i1"
+  urls:
+    - https://huggingface.co/BenevolenceMessiah/Qwen2.5-Coder-7B-3x-Instruct-TIES-v1.2
+    - https://huggingface.co/mradermacher/Qwen2.5-Coder-7B-3x-Instruct-TIES-v1.2-i1-GGUF
+  description: |
+    The following models were included in the merge:
+        BenevolenceMessiah/Qwen2.5-Coder-7B-Chat-Instruct-TIES-v1.2
+        MadeAgents/Hammer2.0-7b
+        huihui-ai/Qwen2.5-Coder-7B-Instruct-abliterated
+  overrides:
+    parameters:
+      model: Qwen2.5-Coder-7B-3x-Instruct-TIES-v1.2.i1-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Coder-7B-3x-Instruct-TIES-v1.2.i1-Q4_K_M.gguf
+      sha256: c28a4da700f634f1277f02391d81fa3c0ba783fa4b02886bd4bfe5f13b6605ef
+      uri: huggingface://mradermacher/Qwen2.5-Coder-7B-3x-Instruct-TIES-v1.2-i1-GGUF/Qwen2.5-Coder-7B-3x-Instruct-TIES-v1.2.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "qwen2.5-coder-7b-instruct-abliterated-i1"
+  urls:
+    - https://huggingface.co/huihui-ai/Qwen2.5-Coder-7B-Instruct-abliterated
+    - https://huggingface.co/mradermacher/Qwen2.5-Coder-7B-Instruct-abliterated-i1-GGUF
+  description: |
+    This is an uncensored version of Qwen2.5-Coder-7B-Instruct created with abliteration (see this article to know more about it).
+
+    Special thanks to @FailSpy for the original code and technique. Please follow him if you're interested in abliterated models.
+  overrides:
+    parameters:
+      model: Qwen2.5-Coder-7B-Instruct-abliterated.i1-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Coder-7B-Instruct-abliterated.i1-Q4_K_M.gguf
+      sha256: 9100ccd9e8167cefda98bd1c97d5d765a21e70e124e4d6b89945fd66ebb481b4
+      uri: huggingface://mradermacher/Qwen2.5-Coder-7B-Instruct-abliterated-i1-GGUF/Qwen2.5-Coder-7B-Instruct-abliterated.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "rombos-coder-v2.5-qwen-7b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/QErypCEKD5OZLxUcSmYaR.jpeg
+  urls:
+    - https://huggingface.co/rombodawg/Rombos-Coder-V2.5-Qwen-7b
+    - https://huggingface.co/bartowski/Rombos-Coder-V2.5-Qwen-7b-GGUF
+    - https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing
+  description: |
+    Rombos-Coder-V2.5-Qwen-7b is a continues finetuned version of Qwen2.5-Coder-7B-Instruct. I took it upon myself to merge the instruct model with the base model myself using the * Ties* merge method as demonstrated in my own "Continuous Finetuning" method (link available).
+    This version of the model shows higher performance than the original instruct and base models.
+  overrides:
+    parameters:
+      model: Rombos-Coder-V2.5-Qwen-7b-Q4_K_M.gguf
+  files:
+    - filename: Rombos-Coder-V2.5-Qwen-7b-Q4_K_M.gguf
+      sha256: ca16a550f1be00b7e92f94c0c18ea6af1e5c158d5d1cb3994f9f0a0d13922272
+      uri: huggingface://bartowski/Rombos-Coder-V2.5-Qwen-7b-GGUF/Rombos-Coder-V2.5-Qwen-7b-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "rombos-coder-v2.5-qwen-32b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/QErypCEKD5OZLxUcSmYaR.jpeg
+  urls:
+    - https://huggingface.co/rombodawg/Rombos-Coder-V2.5-Qwen-32b
+    - https://huggingface.co/bartowski/Rombos-Coder-V2.5-Qwen-32b-GGUF
+    - https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing
+  description: |
+    Rombos-Coder-V2.5-Qwen-32b is a continues finetuned version of Qwen2.5-Coder-32B-Instruct. I took it upon myself to merge the instruct model with the base model myself using the Ties merge method as demonstrated in my own "Continuous Finetuning" method (link available).
+    This version of the model shows higher performance than the original instruct and base models.
+  overrides:
+    parameters:
+      model: Rombos-Coder-V2.5-Qwen-32b-Q4_K_M.gguf
+  files:
+    - filename: Rombos-Coder-V2.5-Qwen-32b-Q4_K_M.gguf
+      sha256: 821ea2a13d96354db1368986700b1189938fbbc56ca6bb9d0c39f752580de71a
+      uri: huggingface://bartowski/Rombos-Coder-V2.5-Qwen-32b-GGUF/Rombos-Coder-V2.5-Qwen-32b-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "rombos-coder-v2.5-qwen-14b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/QErypCEKD5OZLxUcSmYaR.jpeg
+  urls:
+    - https://huggingface.co/rombodawg/Rombos-Coder-V2.5-Qwen-14b
+    - https://huggingface.co/bartowski/Rombos-Coder-V2.5-Qwen-14b-GGUF
+    - https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing
+  description: |
+    Rombos-Coder-V2.5-Qwen-14b is a continues finetuned version of Qwen2.5-Coder-14B-Instruct. I took it upon myself to merge the instruct model with the base model myself using the Ties merge method as demonstrated in my own "Continuous Finetuning" method (link available).
+    This version of the model shows higher performance than the original instruct and base models.
+  overrides:
+    parameters:
+      model: Rombos-Coder-V2.5-Qwen-14b-Q4_K_M.gguf
+  files:
+    - filename: Rombos-Coder-V2.5-Qwen-14b-Q4_K_M.gguf
+      sha256: 7ef044e1fee206a039f56538f94332030e99ec63915c74f4d1bdec0e601ee968
+      uri: huggingface://bartowski/Rombos-Coder-V2.5-Qwen-14b-GGUF/Rombos-Coder-V2.5-Qwen-14b-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "qwen2.5-coder-32b-instruct-uncensored-i1"
+  urls:
+    - https://huggingface.co/thirdeyeai/Qwen2.5-Coder-32B-Instruct-Uncensored
+    - https://huggingface.co/mradermacher/Qwen2.5-Coder-32B-Instruct-Uncensored-i1-GGUF
+  description: |
+    The LLM model is based on sloshywings/Qwen2.5-Coder-32B-Instruct-Uncensored. It is a large language model with 32B parameters that has been fine-tuned on coding tasks and instructions.
+  overrides:
+    parameters:
+      model: Qwen2.5-Coder-32B-Instruct-Uncensored.i1-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Coder-32B-Instruct-Uncensored.i1-Q4_K_M.gguf
+      sha256: 86ac8efb86daf241792ac3d5d35b7da92c54901b4208a6f2829bd03d8f273c9c
+      uri: huggingface://mraWdermacher/Qwen2.5-Coder-32B-Instruct-Uncensored-i1-GGUF/Qwen2.5-Coder-32B-Instruct-Uncensored.i1-Q4_K_M.gguf
+- &opencoder
+  name: "opencoder-8b-base"
+  icon: https://avatars.githubusercontent.com/u/186387526
+  url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
+  urls:
+    - https://huggingface.co/infly/OpenCoder-8B-Base
+    - https://huggingface.co/QuantFactory/OpenCoder-8B-Base-GGUF
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - cpu
+    - code
+  license: inf
+  description: |
+    The model is a quantized version of infly/OpenCoder-8B-Base created using llama.cpp. It is part of the OpenCoder LLM family which includes 1.5B and 8B base and chat models, supporting both English and Chinese languages. The original OpenCoder model was pretrained on 2.5 trillion tokens composed of 90% raw code and 10% code-related web data, and supervised finetuned on over 4.5M high-quality SFT examples. It achieves high performance across multiple language model benchmarks and is one of the most comprehensively open-sourced models available.
+  overrides:
+    parameters:
+      model: OpenCoder-8B-Base.Q4_K_M.gguf
+  files:
+    - filename: OpenCoder-8B-Base.Q4_K_M.gguf
+      sha256: ed158a6f72a40cf4f3f4569f649b365f5851e93f03b56252af3906515fab94ec
+      uri: huggingface://QuantFactory/OpenCoder-8B-Base-GGUF/OpenCoder-8B-Base.Q4_K_M.gguf
+- !!merge <<: *opencoder
+  url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
+  name: "opencoder-8b-instruct"
+  urls:
+    - https://huggingface.co/infly/OpenCoder-8B-Instruct
+    - https://huggingface.co/QuantFactory/OpenCoder-8B-Instruct-GGUF
+  description: |
+    The LLM model is QuantFactory/OpenCoder-8B-Instruct-GGUF, which is a quantized version of infly/OpenCoder-8B-Instruct. It is created using llama.cpp and supports both English and Chinese languages. The original model, infly/OpenCoder-8B-Instruct, is pretrained on 2.5 trillion tokens composed of 90% raw code and 10% code-related web data, and supervised finetuned on over 4.5M high-quality SFT examples. It achieves high performance across multiple language model benchmarks and is one of the leading open-source models for code.
+  overrides:
+    parameters:
+      model: OpenCoder-8B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: OpenCoder-8B-Instruct.Q4_K_M.gguf
+      sha256: ae642656f127e339fcb9566e6039a73cc55d34e3bf59e067d58ad40742f49f00
+      uri: huggingface://QuantFactory/OpenCoder-8B-Instruct-GGUF/OpenCoder-8B-Instruct.Q4_K_M.gguf
+- !!merge <<: *opencoder
+  name: "opencoder-1.5b-base"
+  urls:
+    - https://huggingface.co/infly/OpenCoder-1.5B-Base
+    - https://huggingface.co/QuantFactory/OpenCoder-1.5B-Base-GGUF
+  description: |
+    The model is a large language model with 1.5 billion parameters, trained on 2.5 trillion tokens of code-related data. It supports both English and Chinese languages and is part of the OpenCoder LLM family which also includes 8B base and chat models. The model achieves high performance across multiple language model benchmarks and is one of the most comprehensively open-sourced models available.
+  overrides:
+    parameters:
+      model: OpenCoder-1.5B-Base.Q4_K_M.gguf
+  files:
+    - filename: OpenCoder-1.5B-Base.Q4_K_M.gguf
+      sha256: fb69a2849971b69f3fa1e64a17d1e4d3e1d0d3733d43ae8645299d07ab855af5
+      uri: huggingface://QuantFactory/OpenCoder-1.5B-Base-GGUF/OpenCoder-1.5B-Base.Q4_K_M.gguf
+- !!merge <<: *opencoder
+  name: "opencoder-1.5b-instruct"
+  url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
+  urls:
+    - https://huggingface.co/QuantFactory/OpenCoder-1.5B-Instruct-GGUF
+  description: |
+    The model is a quantized version of [infly/OpenCoder-1.5B-Instruct](https://huggingface.co/infly/OpenCoder-1.5B-Instruct) created using llama.cpp. The original model, infly/OpenCoder-1.5B-Instruct, is an open and reproducible code LLM family which includes 1.5B and 8B base and chat models, supporting both English and Chinese languages. The model is pretrained on 2.5 trillion tokens composed of 90% raw code and 10% code-related web data, and supervised finetuned on over 4.5M high-quality SFT examples. It achieves high performance across multiple language model benchmarks, positioning it among the leading open-source models for code.
+  overrides:
+    parameters:
+      model: OpenCoder-1.5B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: OpenCoder-1.5B-Instruct.Q4_K_M.gguf
+      sha256: a34128fac79e05a3a92c3fd2245cfce7c3876c60241ec2565c24e74b36f48d56
+      uri: huggingface://QuantFactory/OpenCoder-1.5B-Instruct-GGUF/OpenCoder-1.5B-Instruct.Q4_K_M.gguf
+- &granite3
+  name: "granite-3.0-1b-a400m-instruct"
+  icon: https://avatars.githubusercontent.com/u/167822367
+  urls:
+    - https://huggingface.co/ibm-granite/granite-3.0-1b-a400m-instruct
+    - https://huggingface.co/QuantFactory/granite-3.0-1b-a400m-instruct-GGUF
+  overrides:
+    parameters:
+      model: granite-3.0-1b-a400m-instruct.Q4_K_M.gguf
+  files:
+    - filename: granite-3.0-1b-a400m-instruct.Q4_K_M.gguf
+      sha256: 9571b5fc9676ebb59def3377dc848584463fb7f09ed59ebbff3b9f72fd7bd38a
+      uri: huggingface://QuantFactory/granite-3.0-1b-a400m-instruct-GGUF/granite-3.0-1b-a400m-instruct.Q4_K_M.gguf
+  url: "github:mudler/LocalAI/gallery/granite.yaml@master"
+  description: |
+    Granite 3.0 language models are a new set of lightweight state-of-the-art, open foundation models that natively support multilinguality, coding, reasoning, and tool usage, including the potential to be run on constrained compute resources. All the models are publicly released under an Apache 2.0 license for both research and commercial use. The models' data curation and training procedure were designed for enterprise usage and customization in mind, with a process that evaluates datasets for governance, risk and compliance (GRC) criteria, in addition to IBM's standard data clearance process and document quality checks.
+    Granite 3.0 includes 4 different models of varying sizes:
+        Dense Models: 2B and 8B parameter models, trained on 12 trillion tokens in total.
+        Mixture-of-Expert (MoE) Models: Sparse 1B and 3B MoE models, with 400M and 800M activated parameters respectively, trained on 10 trillion tokens in total.
+    Accordingly, these options provide a range of models with different compute requirements to choose from, with appropriate trade-offs with their performance on downstream tasks. At each scale, we release a base model — checkpoints of models after pretraining, as well as instruct checkpoints — models finetuned for dialogue, instruction-following, helpfulness, and safety.
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - cpu
+    - moe
+    - granite
+- !!merge <<: *granite3
+  name: "moe-girl-800ma-3bt"
+  icon: https://huggingface.co/allura-org/MoE-Girl-800MA-3BT/resolve/main/moe-girl-800-3.png
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/allura-org/MoE-Girl-800MA-3BT
+    - https://huggingface.co/mradermacher/MoE-Girl-800MA-3BT-GGUF
+  description: |
+    A roleplay-centric finetune of IBM's Granite 3.0 3B-A800M. LoRA finetune trained locally, whereas the others were FFT; while this results in less uptake of training data, it should also mean less degradation in Granite's core abilities, making it potentially easier to use for general-purpose tasks.
+    Disclaimer
+
+    PLEASE do not expect godliness out of this, it's a model with 800 million active parameters. Expect something more akin to GPT-3 (the original, not GPT-3.5.) (Furthermore, this version is by a less experienced tuner; it's my first finetune that actually has decent-looking graphs, I don't really know what I'm doing yet!)
+  overrides:
+    parameters:
+      model: MoE-Girl-800MA-3BT.Q4_K_M.gguf
+  files:
+    - filename: MoE-Girl-800MA-3BT.Q4_K_M.gguf
+      sha256: 4c3cb57c27aadabd05573a1a01d6c7aee0f21620db919c7704f758d172e0bfa3
+      uri: huggingface://mradermacher/MoE-Girl-800MA-3BT-GGUF/MoE-Girl-800MA-3BT.Q4_K_M.gguf
+- name: "moe-girl-1ba-7bt-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/634262af8d8089ebaefd410e/kTXXSSSqpb21rfyOX7FUa.jpeg
+  # chatml
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/allura-org/MoE-Girl-1BA-7BT
+    - https://huggingface.co/mradermacher/MoE-Girl-1BA-7BT-i1-GGUF
+  description: |
+    A finetune of OLMoE by AllenAI designed for roleplaying (and maybe general usecases if you try hard enough).
+    PLEASE do not expect godliness out of this, it's a model with 1 billion active parameters. Expect something more akin to Gemma 2 2B, not Llama 3 8B.
+  overrides:
+    parameters:
+      model: MoE-Girl-1BA-7BT.i1-Q4_K_M.gguf
+  files:
+    - filename: MoE-Girl-1BA-7BT.i1-Q4_K_M.gguf
+      sha256: e6ef9c311c73573b243de6ff7538b386f430af30b2be0a96a5745c17137ad432
+      uri: huggingface://mradermacher/MoE-Girl-1BA-7BT-i1-GGUF/MoE-Girl-1BA-7BT.i1-Q4_K_M.gguf
+- name: "salamandra-7b-instruct"
+  icon: https://huggingface.co/BSC-LT/salamandra-7b-instruct/resolve/main/images/salamandra_header.png
+  # Uses chatml
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  license: apache-2.0
+  urls:
+    - https://huggingface.co/BSC-LT/salamandra-7b-instruct
+    - https://huggingface.co/cstr/salamandra-7b-instruct-GGUF
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - cpu
+    - salamandra
+  description: |
+    Transformer-based decoder-only language model that has been pre-trained on 7.8 trillion tokens of highly curated data. The pre-training corpus contains text in 35 European languages and code.
+    Salamandra comes in three different sizes — 2B, 7B and 40B parameters — with their respective base and instruction-tuned variants. This model card corresponds to the 7B instructed version.
+  overrides:
+    parameters:
+      model: salamandra-7b-instruct.Q4_K_M-f32.gguf
+  files:
+    - filename: salamandra-7b-instruct.Q4_K_M-f32.gguf
+      sha256: bac8e8c1d1d9d53cbdb148b8ff9ad378ddb392429207099e85b5aae3a43bff3d
+      uri: huggingface://cstr/salamandra-7b-instruct-GGUF/salamandra-7b-instruct.Q4_K_M-f32.gguf
+- &llama32
+  url: "github:mudler/LocalAI/gallery/llama3.2-quantized.yaml@master"
+  icon: https://avatars.githubusercontent.com/u/153379578
+  license: llama3.2
+  description: |
+    The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 1B and 3B sizes (text in/text out). The Llama 3.2 instruction-tuned text only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks. They outperform many of the available open source and closed chat models on common industry benchmarks.
+
+    Model Developer: Meta
+
+    Model Architecture: Llama 3.2 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - cpu
+    - llama3.2
+  name: "llama-3.2-1b-instruct:q4_k_m"
+  urls:
+    - https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF
+  overrides:
+    parameters:
+      model: llama-3.2-1b-instruct-q4_k_m.gguf
+  files:
+    - filename: llama-3.2-1b-instruct-q4_k_m.gguf
+      sha256: 1d0e9419ec4e12aef73ccf4ffd122703e94c48344a96bc7c5f0f2772c2152ce3
+      uri: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
+- !!merge <<: *llama32
+  name: "llama-3.2-3b-instruct:q4_k_m"
+  urls:
+    - https://huggingface.co/hugging-quants/Llama-3.2-3B-Instruct-Q4_K_M-GGUF
+  overrides:
+    parameters:
+      model: llama-3.2-3b-instruct-q4_k_m.gguf
+  files:
+    - filename: llama-3.2-3b-instruct-q4_k_m.gguf
+      sha256: c55a83bfb6396799337853ca69918a0b9bbb2917621078c34570bc17d20fd7a1
+      uri: huggingface://hugging-quants/Llama-3.2-3B-Instruct-Q4_K_M-GGUF/llama-3.2-3b-instruct-q4_k_m.gguf
+- !!merge <<: *llama32
+  name: "llama-3.2-3b-instruct:q8_0"
+  urls:
+    - https://huggingface.co/hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF
+  overrides:
+    parameters:
+      model: llama-3.2-3b-instruct-q8_0.gguf
+  files:
+    - filename: llama-3.2-3b-instruct-q8_0.gguf
+      sha256: 51725f77f997a5080c3d8dd66e073da22ddf48ab5264f21f05ded9b202c3680e
+      uri: huggingface://hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF/llama-3.2-3b-instruct-q8_0.gguf
+- !!merge <<: *llama32
+  name: "llama-3.2-1b-instruct:q8_0"
+  urls:
+    - https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF
+  overrides:
+    parameters:
+      model: llama-3.2-1b-instruct-q8_0.gguf
+  files:
+    - filename: llama-3.2-1b-instruct-q8_0.gguf
+      sha256: ba345c83bf5cc679c653b853c46517eea5a34f03ed2205449db77184d9ae62a9
+      uri: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF/llama-3.2-1b-instruct-q8_0.gguf
+## Uncensored
+- !!merge <<: *llama32
+  icon: https://cdn-uploads.huggingface.co/production/uploads/66c9d7a26f2335ba288810a4/4YDg-rcEXCK0fdTS1fBzE.webp
+  name: "versatillama-llama-3.2-3b-instruct-abliterated"
+  urls:
+    - https://huggingface.co/QuantFactory/VersatiLlama-Llama-3.2-3B-Instruct-Abliterated-GGUF
+  description: |
+    Small but Smart Fine-Tuned on Vast dataset of Conversations. Able to Generate Human like text with high performance within its size. It is Very Versatile when compared for it's size and Parameters and offers capability almost as good as Llama 3.1 8B Instruct.
+  overrides:
+    parameters:
+      model: VersatiLlama-Llama-3.2-3B-Instruct-Abliterated.Q4_K_M.gguf
+  files:
+    - filename: VersatiLlama-Llama-3.2-3B-Instruct-Abliterated.Q4_K_M.gguf
+      sha256: 15b9e4a987f50d7594d030815c7166a996e20db46fe1e20da03e96955020312c
+      uri: huggingface://QuantFactory/VersatiLlama-Llama-3.2-3B-Instruct-Abliterated-GGUF/VersatiLlama-Llama-3.2-3B-Instruct-Abliterated.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama3.2-3b-enigma"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/it7MY5MyLCLpFQev5dUis.jpeg
+  urls:
+    - https://huggingface.co/QuantFactory/Llama3.2-3B-Enigma-GGUF
+  description: |
+    Enigma is a code-instruct model built on Llama 3.2 3b. It is a high quality code instruct model with the Llama 3.2 Instruct chat format. The model is finetuned on synthetic code-instruct data generated with Llama 3.1 405b and supplemented with generalist synthetic data. It uses the Llama 3.2 Instruct prompt format.
+  overrides:
+    parameters:
+      model: Llama3.2-3B-Enigma.Q4_K_M.gguf
+  files:
+    - filename: Llama3.2-3B-Enigma.Q4_K_M.gguf
+      sha256: 4304e6ee1e348b228470700ec1e9423f5972333d376295195ce6cd5c70cae5e4
+      uri: huggingface://QuantFactory/Llama3.2-3B-Enigma-GGUF/Llama3.2-3B-Enigma.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama3.2-3b-esper2"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/4I6oK8DG0so4VD8GroFsd.jpeg
+  urls:
+    - https://huggingface.co/QuantFactory/Llama3.2-3B-Esper2-GGUF
+  description: |
+    Esper 2 is a DevOps and cloud architecture code specialist built on Llama 3.2 3b. It is an AI assistant focused on AWS, Azure, GCP, Terraform, Dockerfiles, pipelines, shell scripts and more, with real world problem solving and high quality code instruct performance within the Llama 3.2 Instruct chat format. Finetuned on synthetic DevOps-instruct and code-instruct data generated with Llama 3.1 405b and supplemented with generalist chat data.
+  overrides:
+    parameters:
+      model: Llama3.2-3B-Esper2.Q4_K_M.gguf
+  files:
+    - filename: Llama3.2-3B-Esper2.Q4_K_M.gguf
+      sha256: 11d2bd674aa22a71a59ec49ad29b695000d14bc275b0195b8d7089bfc7582fc7
+      uri: huggingface://QuantFactory/Llama3.2-3B-Esper2-GGUF/Llama3.2-3B-Esper2.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama-3.2-3b-agent007"
+  urls:
+    - https://huggingface.co/QuantFactory/Llama-3.2-3B-Agent007-GGUF
+  description: |
+    The model is a quantized version of EpistemeAI/Llama-3.2-3B-Agent007, developed by EpistemeAI and fine-tuned from unsloth/llama-3.2-3b-instruct-bnb-4bit. It was trained 2x faster with Unsloth and Huggingface's TRL library. Fine tuned with Agent datasets.
+  overrides:
+    parameters:
+      model: Llama-3.2-3B-Agent007.Q4_K_M.gguf
+  files:
+    - filename: Llama-3.2-3B-Agent007.Q4_K_M.gguf
+      sha256: 7a2543a69b116f2a059e2e445e5d362bb7df4a51b97e83d8785c1803dc9d687f
+      uri: huggingface://QuantFactory/Llama-3.2-3B-Agent007-GGUF/Llama-3.2-3B-Agent007.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama-3.2-3b-agent007-coder"
+  urls:
+    - https://huggingface.co/QuantFactory/Llama-3.2-3B-Agent007-Coder-GGUF
+  description: |
+    The Llama-3.2-3B-Agent007-Coder-GGUF is a quantized version of the EpistemeAI/Llama-3.2-3B-Agent007-Coder model, which is a fine-tuned version of the unsloth/llama-3.2-3b-instruct-bnb-4bit model. It is created using llama.cpp and trained with additional datasets such as the Agent dataset, Code Alpaca 20K, and magpie ultra 0.1. This model is optimized for multilingual dialogue use cases and agentic retrieval and summarization tasks. The model is available for commercial and research use in multiple languages and is best used with the transformers library.
+  overrides:
+    parameters:
+      model: Llama-3.2-3B-Agent007-Coder.Q4_K_M.gguf
+  files:
+    - filename: Llama-3.2-3B-Agent007-Coder.Q4_K_M.gguf
+      sha256: 49a4861c094d94ef5faa33f69b02cd132bb0167f1c3ca59059404f85f61e1d12
+      uri: huggingface://QuantFactory/Llama-3.2-3B-Agent007-Coder-GGUF/Llama-3.2-3B-Agent007-Coder.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "fireball-meta-llama-3.2-8b-instruct-agent-003-128k-code-dpo"
+  urls:
+    - https://huggingface.co/QuantFactory/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO-GGUF
+  description: |
+    The LLM model is a quantized version of EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO, which is an experimental and revolutionary fine-tune with DPO dataset to allow LLama 3.1 8B to be an agentic coder. It has some built-in agent features such as search, calculator, and ReAct. Other noticeable features include self-learning using unsloth, RAG applications, and memory. The context window of the model is 128K. It can be integrated into projects using popular libraries like Transformers and vLLM. The model is suitable for use with Langchain or LLamaIndex. The model is developed by EpistemeAI and licensed under the Apache 2.0 license.
+  overrides:
+    parameters:
+      model: Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q4_K_M.gguf
+  files:
+    - filename: Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q4_K_M.gguf
+      sha256: 7f45fa79bc6c9847ef9fbad08c3bb5a0f2dbb56d2e2200a5d37b260a57274e55
+      uri: huggingface://QuantFactory/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO-GGUF/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama-3.2-chibi-3b"
+  icon: https://huggingface.co/AELLM/Llama-3.2-Chibi-3B/resolve/main/chibi.jpg
+  urls:
+    - https://huggingface.co/AELLM/Llama-3.2-Chibi-3B
+    - https://huggingface.co/mradermacher/Llama-3.2-Chibi-3B-GGUF
+  description: |
+    Small parameter LLMs are ideal for navigating the complexities of the Japanese language, which involves multiple character systems like kanji, hiragana, and katakana, along with subtle social cues. Despite their smaller size, these models are capable of delivering highly accurate and context-aware results, making them perfect for use in environments where resources are constrained. Whether deployed on mobile devices with limited processing power or in edge computing scenarios where fast, real-time responses are needed, these models strike the perfect balance between performance and efficiency, without sacrificing quality or speed.
+  overrides:
+    parameters:
+      model: Llama-3.2-Chibi-3B.Q4_K_M.gguf
+  files:
+    - filename: Llama-3.2-Chibi-3B.Q4_K_M.gguf
+      sha256: 4b594cd5f66181202713f1cf97ce2f86d0acfa1b862a64930d5f512c45640a2f
+      uri: huggingface://mradermacher/Llama-3.2-Chibi-3B-GGUF/Llama-3.2-Chibi-3B.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama-3.2-3b-reasoning-time"
+  urls:
+    - https://huggingface.co/mradermacher/Llama-3.2-3B-Reasoning-Time-GGUF
+  description: |
+    Lyte/Llama-3.2-3B-Reasoning-Time is a large language model with 3.2 billion parameters, designed for reasoning and time-based tasks in English. It is based on the Llama architecture and has been quantized using the GGUF format by mradermacher.
+  overrides:
+    parameters:
+      model: Llama-3.2-3B-Reasoning-Time.Q4_K_M.gguf
+  files:
+    - filename: Llama-3.2-3B-Reasoning-Time.Q4_K_M.gguf
+      sha256: 80b10e1a5c6e27f6d8cf08c3472af2b15a9f63ebf8385eedfe8615f85116c73f
+      uri: huggingface://mradermacher/Llama-3.2-3B-Reasoning-Time-GGUF/Llama-3.2-3B-Reasoning-Time.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama-3.2-sun-2.5b-chat"
+  urls:
+    - https://huggingface.co/meditsolutions/Llama-3.2-SUN-2.5B-chat
+    - https://huggingface.co/mradermacher/Llama-3.2-SUN-2.5B-chat-GGUF
+  description: |
+    Base Model
+        Llama 3.2 1B
+    Extended Size
+        1B to 2.5B parameters
+    Extension Method
+        Proprietary technique developed by MedIT Solutions
+    Fine-tuning
+        Open (or open subsets allowing for commercial use) open datasets from HF
+        Open (or open subsets allowing for commercial use) SFT datasets from HF
+    Training Status
+        Current version: chat-1.0.0
+    Key Features
+        Built on Llama 3.2 architecture
+        Expanded from 1B to 2.47B parameters
+        Optimized for open-ended conversations
+        Incorporates supervised fine-tuning for improved performance
+    Use Case
+        General conversation and task-oriented interactions
+  overrides:
+    parameters:
+      model: Llama-3.2-SUN-2.5B-chat.Q4_K_M.gguf
+  files:
+    - filename: Llama-3.2-SUN-2.5B-chat.Q4_K_M.gguf
+      sha256: 4cd1796806200662500e1393ae8e0a32306fab2b6679a746ee53ad2130e5f3a2
+      uri: huggingface://mradermacher/Llama-3.2-SUN-2.5B-chat-GGUF/Llama-3.2-SUN-2.5B-chat.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama-3.2-3b-instruct-uncensored"
+  urls:
+    - https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-uncensored-GGUF
+    - https://huggingface.co/chuanli11/Llama-3.2-3B-Instruct-uncensored
+  description: |
+    This is an uncensored version of the original Llama-3.2-3B-Instruct, created using mlabonne's script, which builds on FailSpy's notebook and the original work from Andy Arditi et al..
+  overrides:
+    parameters:
+      model: Llama-3.2-3B-Instruct-uncensored-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.2-3B-Instruct-uncensored-Q4_K_M.gguf
+      sha256: 80f532552e3d56e366226f428395de8285a671f2da1d5fd68563741181b77a95
+      uri: huggingface://bartowski/Llama-3.2-3B-Instruct-uncensored-GGUF/Llama-3.2-3B-Instruct-uncensored-Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "calme-3.3-llamaloi-3b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b/resolve/main/calme_3.png
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b
+    - https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b-GGUF
+  description: |
+    This model is an advanced iteration of the powerful meta-llama/Llama-3.2-3B, specifically fine-tuned to enhance its capabilities in French Legal domain.
+  overrides:
+    parameters:
+      model: calme-3.3-llamaloi-3b.Q5_K_M.gguf
+  files:
+    - filename: calme-3.3-llamaloi-3b.Q5_K_M.gguf
+      sha256: d3b9d47faa9e968a93a8f52bd4cdc938e5a612facb963088367ca871063ef302
+      uri: huggingface://MaziyarPanahi/calme-3.3-llamaloi-3b-GGUF/calme-3.3-llamaloi-3b.Q5_K_M.gguf
+- !!merge <<: *llama32
+  name: "calme-3.2-llamaloi-3b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b/resolve/main/calme_3.png
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-3.2-llamaloi-3b
+    - https://huggingface.co/MaziyarPanahi/calme-3.2-llamaloi-3b-GGUF
+  description: |
+    This model is an advanced iteration of the powerful meta-llama/Llama-3.2-3B, specifically fine-tuned to enhance its capabilities in French Legal domain.
+  overrides:
+    parameters:
+      model: calme-3.2-llamaloi-3b.Q5_K_M.gguf
+  files:
+    - filename: calme-3.2-llamaloi-3b.Q5_K_M.gguf
+      sha256: bd11e6a717008d0603b6da5faab2fa2ba18b376c5589245735340cfb0a8dabb9
+      uri: huggingface://MaziyarPanahi/calme-3.2-llamaloi-3b-GGUF/calme-3.2-llamaloi-3b.Q5_K_M.gguf
+- !!merge <<: *llama32
+  name: "calme-3.1-llamaloi-3b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b/resolve/main/calme_3.png
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-3.1-llamaloi-3b
+    - https://huggingface.co/MaziyarPanahi/calme-3.1-llamaloi-3b-GGUF
+  description: |
+    This model is an advanced iteration of the powerful meta-llama/Llama-3.2-3B, specifically fine-tuned to enhance its capabilities in French Legal domain.
+  overrides:
+    parameters:
+      model: calme-3.1-llamaloi-3b.Q5_K_M.gguf
+  files:
+    - filename: calme-3.1-llamaloi-3b.Q5_K_M.gguf
+      sha256: 06b900c7252423329ca57a02a8b8d18a1294934709861d09af96e74694c9a3f1
+      uri: huggingface://MaziyarPanahi/calme-3.1-llamaloi-3b-GGUF/calme-3.1-llamaloi-3b.Q5_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama3.2-3b-enigma"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/it7MY5MyLCLpFQev5dUis.jpeg
+  urls:
+    - https://huggingface.co/QuantFactory/Llama3.2-3B-Enigma-GGUF
+    - https://huggingface.co/QuantFactory/Llama3.2-3B-Enigma-GGUF
+  description: |
+    ValiantLabs/Llama3.2-3B-Enigma is an Enigma model built on Llama 3.2 3b. It is a high-quality code-instruct model with the Llama 3.2 Instruct chat format. The model is finetuned on synthetic code-instruct data generated using Llama 3.1 405b and supplemented with generalist synthetic data. This model is suitable for both code-instruct and general chat applications.
+  overrides:
+    parameters:
+      model: Llama3.2-3B-Enigma.Q4_K_M.gguf
+  files:
+    - filename: Llama3.2-3B-Enigma.Q4_K_M.gguf
+      sha256: 4304e6ee1e348b228470700ec1e9423f5972333d376295195ce6cd5c70cae5e4
+      uri: huggingface://QuantFactory/Llama3.2-3B-Enigma-GGUF/Llama3.2-3B-Enigma.Q4_K_M.gguf
+- !!merge <<: *llama32
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63444f2687964b331809eb55/EXX7TKbB-R6arxww2mk0R.jpeg
+  name: "llama3.2-3b-shiningvaliant2-i1"
+  urls:
+    - https://huggingface.co/ValiantLabs/Llama3.2-3B-ShiningValiant2
+    - https://huggingface.co/mradermacher/Llama3.2-3B-ShiningValiant2-i1-GGUF
+  description: |
+    Shining Valiant 2 is a chat model built on Llama 3.2 3b, finetuned on our data for friendship, insight, knowledge and enthusiasm.
+
+        Finetuned on meta-llama/Llama-3.2-3B-Instruct for best available general performance
+        Trained on a variety of high quality data; focused on science, engineering, technical knowledge, and structured reasoning
+        Also available for Llama 3.1 70b and Llama 3.1 8b!
+
+    Version
+    This is the 2024-09-27 release of Shining Valiant 2 for Llama 3.2 3b.
+  overrides:
+    parameters:
+      model: Llama3.2-3B-ShiningValiant2.i1-Q4_K_M.gguf
+  files:
+    - filename: Llama3.2-3B-ShiningValiant2.i1-Q4_K_M.gguf
+      sha256: 700521dc6a8a50e2d0bb5ccde12399209004155f9c68751aeac7feccf2cd4957
+      uri: huggingface://mradermacher/Llama3.2-3B-ShiningValiant2-i1-GGUF/Llama3.2-3B-ShiningValiant2.i1-Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama-doctor-3.2-3b-instruct"
+  urls:
+    - https://huggingface.co/prithivMLmods/Llama-Doctor-3.2-3B-Instruct
+    - https://huggingface.co/bartowski/Llama-Doctor-3.2-3B-Instruct-GGUF
+  description: |
+    The Llama-Doctor-3.2-3B-Instruct model is designed for text generation tasks, particularly in contexts where instruction-following capabilities are needed. This model is a fine-tuned version of the base Llama-3.2-3B-Instruct model and is optimized for understanding and responding to user-provided instructions or prompts. The model has been trained on a specialized dataset, avaliev/chat_doctor, to enhance its performance in providing conversational or advisory responses, especially in medical or technical fields.
+  overrides:
+    parameters:
+      model: Llama-Doctor-3.2-3B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Llama-Doctor-3.2-3B-Instruct-Q4_K_M.gguf
+      sha256: 38fd1423e055564e9fa3d37003a62bf9db79acd348a90fa0b051a1f2c9d7cb53
+      uri: huggingface://bartowski/Llama-Doctor-3.2-3B-Instruct-GGUF/Llama-Doctor-3.2-3B-Instruct-Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "onellm-doey-v1-llama-3.2-3b"
+  urls:
+    - https://huggingface.co/DoeyLLM/OneLLM-Doey-V1-Llama-3.2-3B
+    - https://huggingface.co/QuantFactory/OneLLM-Doey-V1-Llama-3.2-3B-GGUF
+  description: |
+    This model is a fine-tuned version of LLaMA 3.2-3B, optimized using LoRA (Low-Rank Adaptation) on the NVIDIA ChatQA-Training-Data. It is tailored for conversational AI, question answering, and other instruction-following tasks, with support for sequences up to 1024 tokens.
+  overrides:
+    parameters:
+      model: OneLLM-Doey-V1-Llama-3.2-3B.Q4_K_M.gguf
+  files:
+    - filename: OneLLM-Doey-V1-Llama-3.2-3B.Q4_K_M.gguf
+      sha256: 57e93584bfb708a9841edffd70635c21f27955d8a1b4e346a72edc8163394a97
+      uri: huggingface://QuantFactory/OneLLM-Doey-V1-Llama-3.2-3B-GGUF/OneLLM-Doey-V1-Llama-3.2-3B.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama-sentient-3.2-3b-instruct"
+  urls:
+    - https://huggingface.co/prithivMLmods/Llama-Sentient-3.2-3B-Instruct
+    - https://huggingface.co/QuantFactory/Llama-Sentient-3.2-3B-Instruct-GGUF
+  description: |
+    The Llama-Sentient-3.2-3B-Instruct model is a fine-tuned version of the Llama-3.2-3B-Instruct model, optimized for text generation tasks, particularly where instruction-following abilities are critical. This model is trained on the mlabonne/lmsys-arena-human-preference-55k-sharegpt dataset, which enhances its performance in conversational and advisory contexts, making it suitable for a wide range of applications.
+  overrides:
+    parameters:
+      model: Llama-Sentient-3.2-3B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: Llama-Sentient-3.2-3B-Instruct.Q4_K_M.gguf
+      uri: huggingface://QuantFactory/Llama-Sentient-3.2-3B-Instruct-GGUF/Llama-Sentient-3.2-3B-Instruct.Q4_K_M.gguf
+      sha256: 3f855ce0522bfdc39fc826162ba6d89f15cc3740c5207da10e70baa3348b7812
+- !!merge <<: *llama32
+  name: "llama-smoltalk-3.2-1b-instruct"
+  urls:
+    - https://huggingface.co/prithivMLmods/Llama-SmolTalk-3.2-1B-Instruct
+    - https://huggingface.co/mradermacher/Llama-SmolTalk-3.2-1B-Instruct-GGUF
+  description: |
+    The Llama-SmolTalk-3.2-1B-Instruct model is a lightweight, instruction-tuned model designed for efficient text generation and conversational AI tasks. With a 1B parameter architecture, this model strikes a balance between performance and resource efficiency, making it ideal for applications requiring concise, contextually relevant outputs. The model has been fine-tuned to deliver robust instruction-following capabilities, catering to both structured and open-ended queries.
+    Key Features:
+
+        Instruction-Tuned Performance: Optimized to understand and execute user-provided instructions across diverse domains.
+        Lightweight Architecture: With just 1 billion parameters, the model provides efficient computation and storage without compromising output quality.
+        Versatile Use Cases: Suitable for tasks like content generation, conversational interfaces, and basic problem-solving.
+
+    Intended Applications:
+
+        Conversational AI: Engage users with dynamic and contextually aware dialogue.
+        Content Generation: Produce summaries, explanations, or other creative text outputs efficiently.
+        Instruction Execution: Follow user commands to generate precise and relevant responses.
+  overrides:
+    parameters:
+      model: Llama-SmolTalk-3.2-1B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: Llama-SmolTalk-3.2-1B-Instruct.Q4_K_M.gguf
+      sha256: 03d8d05e3821f4caa65defa82baaff658484d4405b66546431528153ceef4d9e
+      uri: huggingface://mradermacher/Llama-SmolTalk-3.2-1B-Instruct-GGUF/Llama-SmolTalk-3.2-1B-Instruct.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "fusechat-llama-3.2-3b-instruct"
+  urls:
+    - https://huggingface.co/FuseAI/FuseChat-Llama-3.2-3B-Instruct
+    - https://huggingface.co/bartowski/FuseChat-Llama-3.2-3B-Instruct-GGUF
+  description: |
+    We present FuseChat-3.0, a series of models crafted to enhance performance by integrating the strengths of multiple source LLMs into more compact target LLMs. To achieve this fusion, we utilized four powerful source LLMs: Gemma-2-27B-It, Mistral-Large-Instruct-2407, Qwen-2.5-72B-Instruct, and Llama-3.1-70B-Instruct. For the target LLMs, we employed three widely-used smaller models—Llama-3.1-8B-Instruct, Gemma-2-9B-It, and Qwen-2.5-7B-Instruct—along with two even more compact models—Llama-3.2-3B-Instruct and Llama-3.2-1B-Instruct. The implicit model fusion process involves a two-stage training pipeline comprising Supervised Fine-Tuning (SFT) to mitigate distribution discrepancies between target and source LLMs, and Direct Preference Optimization (DPO) for learning preferences from multiple source LLMs. The resulting FuseChat-3.0 models demonstrated substantial improvements in tasks related to general conversation, instruction following, mathematics, and coding. Notably, when Llama-3.1-8B-Instruct served as the target LLM, our fusion approach achieved an average improvement of 6.8 points across 14 benchmarks. Moreover, it showed significant improvements of 37.1 and 30.1 points on instruction-following test sets AlpacaEval-2 and Arena-Hard respectively. We have released the FuseChat-3.0 models on Huggingface, stay tuned for the forthcoming dataset and code.
+  overrides:
+    parameters:
+      model: FuseChat-Llama-3.2-3B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: FuseChat-Llama-3.2-3B-Instruct-Q4_K_M.gguf
+      sha256: a4f0e9a905b74886b79b72622c06a3219d6812818a564a53c39fc49032d7f842
+      uri: huggingface://bartowski/FuseChat-Llama-3.2-3B-Instruct-GGUF/FuseChat-Llama-3.2-3B-Instruct-Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama-song-stream-3b-instruct"
+  urls:
+    - https://huggingface.co/prithivMLmods/Llama-Song-Stream-3B-Instruct
+    - https://huggingface.co/bartowski/Llama-Song-Stream-3B-Instruct-GGUF
+  description: |
+    The Llama-Song-Stream-3B-Instruct is a fine-tuned language model specializing in generating music-related text, such as song lyrics, compositions, and musical thoughts. Built upon the meta-llama/Llama-3.2-3B-Instruct base, it has been trained with a custom dataset focused on song lyrics and music compositions to produce context-aware, creative, and stylized music output.
+  overrides:
+    parameters:
+      model: Llama-Song-Stream-3B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Llama-Song-Stream-3B-Instruct-Q4_K_M.gguf
+      uri: huggingface://bartowski/Llama-Song-Stream-3B-Instruct-GGUF/Llama-Song-Stream-3B-Instruct-Q4_K_M.gguf
+      sha256: 62e4a79eb7a0f80184dc37ab01a5490708e600dad5f074de8bcda6ec5a77cca8
+- !!merge <<: *llama32
+  name: "llama-chat-summary-3.2-3b"
+  urls:
+    - https://huggingface.co/prithivMLmods/Llama-Chat-Summary-3.2-3B
+    - https://huggingface.co/bartowski/Llama-Chat-Summary-3.2-3B-GGUF
+  description: |
+    Llama-Chat-Summary-3.2-3B is a fine-tuned model designed for generating context-aware summaries of long conversational or text-based inputs. Built on the meta-llama/Llama-3.2-3B-Instruct foundation, this model is optimized to process structured and unstructured conversational data for summarization tasks.
+  overrides:
+    parameters:
+      model: Llama-Chat-Summary-3.2-3B-Q4_K_M.gguf
+  files:
+    - filename: Llama-Chat-Summary-3.2-3B-Q4_K_M.gguf
+      sha256: ed1be20d2374aa6db9940923f41fa229bd7ebe13d41b1ff1ff18a6f87e99df79
+      uri: huggingface://bartowski/Llama-Chat-Summary-3.2-3B-GGUF/Llama-Chat-Summary-3.2-3B-Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "fastllama-3.2-1b-instruct"
+  icon: https://huggingface.co/suayptalha/FastLlama-3.2-1B-Instruct/resolve/main/FastLlama.png
+  urls:
+    - https://huggingface.co/suayptalha/FastLlama-3.2-1B-Instruct
+    - https://huggingface.co/bartowski/FastLlama-3.2-1B-Instruct-GGUF
+  description: |
+    FastLlama is a highly optimized version of the Llama-3.2-1B-Instruct model. Designed for superior performance in constrained environments, it combines speed, compactness, and high accuracy. This version has been fine-tuned using the MetaMathQA-50k section of the HuggingFaceTB/smoltalk dataset to enhance its mathematical reasoning and problem-solving abilities.
+  overrides:
+    parameters:
+      model: FastLlama-3.2-1B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: FastLlama-3.2-1B-Instruct-Q4_K_M.gguf
+      sha256: 3c0303e9560c441a9abdcd0e4c04c47e7f6b21277c1e8c00eed94fc656da0be9
+      uri: huggingface://bartowski/FastLlama-3.2-1B-Instruct-GGUF/FastLlama-3.2-1B-Instruct-Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "codepy-deepthink-3b"
+  urls:
+    - https://huggingface.co/prithivMLmods/Codepy-Deepthink-3B
+    - https://huggingface.co/QuantFactory/Codepy-Deepthink-3B-GGUF
+  description: |
+    The Codepy 3B Deep Think Model is a fine-tuned version of the meta-llama/Llama-3.2-3B-Instruct base model, designed for text generation tasks that require deep reasoning, logical structuring, and problem-solving. This model leverages its optimized architecture to provide accurate and contextually relevant outputs for complex queries, making it ideal for applications in education, programming, and creative writing.
+
+    With its robust natural language processing capabilities, Codepy 3B Deep Think excels in generating step-by-step solutions, creative content, and logical analyses. Its architecture integrates advanced understanding of both structured and unstructured data, ensuring precise text generation aligned with user inputs.
+  overrides:
+    parameters:
+      model: Codepy-Deepthink-3B.Q4_K_M.gguf
+  files:
+    - filename: Codepy-Deepthink-3B.Q4_K_M.gguf
+      sha256: 6202976de1a1b23bb09448dd6f188b849e10f3f99366f829415533ea4445e853
+      uri: huggingface://QuantFactory/Codepy-Deepthink-3B-GGUF/Codepy-Deepthink-3B.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama-deepsync-3b"
+  urls:
+    - https://huggingface.co/prithivMLmods/Llama-Deepsync-3B
+    - https://huggingface.co/prithivMLmods/Llama-Deepsync-3B-GGUF
+  description: |
+    The Llama-Deepsync-3B-GGUF is a fine-tuned version of the Llama-3.2-3B-Instruct base model, designed for text generation tasks that require deep reasoning, logical structuring, and problem-solving. This model leverages its optimized architecture to provide accurate and contextually relevant outputs for complex queries, making it ideal for applications in education, programming, and creative writing.
+  overrides:
+    parameters:
+      model: Llama-Deepsync-3B.Q4_K_M.gguf
+  files:
+    - filename: Llama-Deepsync-3B.Q4_K_M.gguf
+      sha256: f11c4d9b10a732845d8e64dc9badfcbb7d94053bc5fe11f89bb8e99ed557f711
+      uri: huggingface://prithivMLmods/Llama-Deepsync-3B-GGUF/Llama-Deepsync-3B.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "dolphin3.0-llama3.2-1b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/cNCs1TBD3FelWCJGkZ3cd.png
+  urls:
+    - https://huggingface.co/cognitivecomputations/Dolphin3.0-Llama3.2-1B
+    - https://huggingface.co/bartowski/Dolphin3.0-Llama3.2-1B-GGUF
+  description: |
+    Dolphin 3.0 is the next generation of the Dolphin series of instruct-tuned models. Designed to be the ultimate general purpose local model, enabling coding, math, agentic, function calling, and general use cases.
+
+    Dolphin aims to be a general purpose model, similar to the models behind ChatGPT, Claude, Gemini. But these models present problems for businesses seeking to include AI in their products.
+
+        They maintain control of the system prompt, deprecating and changing things as they wish, often causing software to break.
+        They maintain control of the model versions, sometimes changing things silently, or deprecating older models that your business relies on.
+        They maintain control of the alignment, and in particular the alignment is one-size-fits all, not tailored to the application.
+        They can see all your queries and they can potentially use that data in ways you wouldn't want. Dolphin, in contrast, is steerable and gives control to the system owner. You set the system prompt. You decide the alignment. You have control of your data. Dolphin does not impose its ethics or guidelines on you. You are the one who decides the guidelines.
+
+    Dolphin belongs to YOU, it is your tool, an extension of your will. Just as you are personally responsible for what you do with a knife, gun, fire, car, or the internet, you are the creator and originator of any content you generate with Dolphin.
+  overrides:
+    parameters:
+      model: Dolphin3.0-Llama3.2-1B-Q4_K_M.gguf
+  files:
+    - filename: Dolphin3.0-Llama3.2-1B-Q4_K_M.gguf
+      sha256: 7ed39ee0638e18d3e47bf12e60e917c792ca5332606a72bd1882ab1f62a13a7a
+      uri: huggingface://bartowski/Dolphin3.0-Llama3.2-1B-GGUF/Dolphin3.0-Llama3.2-1B-Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "dolphin3.0-llama3.2-3b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/cNCs1TBD3FelWCJGkZ3cd.png
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/cognitivecomputations/Dolphin3.0-Llama3.2-3B
+    - https://huggingface.co/bartowski/Dolphin3.0-Llama3.2-3B-GGUF
+  description: |
+    Dolphin 3.0 is the next generation of the Dolphin series of instruct-tuned models. Designed to be the ultimate general purpose local model, enabling coding, math, agentic, function calling, and general use cases.
+
+    Dolphin aims to be a general purpose model, similar to the models behind ChatGPT, Claude, Gemini. But these models present problems for businesses seeking to include AI in their products.
+
+        They maintain control of the system prompt, deprecating and changing things as they wish, often causing software to break.
+        They maintain control of the model versions, sometimes changing things silently, or deprecating older models that your business relies on.
+        They maintain control of the alignment, and in particular the alignment is one-size-fits all, not tailored to the application.
+        They can see all your queries and they can potentially use that data in ways you wouldn't want. Dolphin, in contrast, is steerable and gives control to the system owner. You set the system prompt. You decide the alignment. You have control of your data. Dolphin does not impose its ethics or guidelines on you. You are the one who decides the guidelines.
+
+    Dolphin belongs to YOU, it is your tool, an extension of your will. Just as you are personally responsible for what you do with a knife, gun, fire, car, or the internet, you are the creator and originator of any content you generate with Dolphin.
+  overrides:
+    parameters:
+      model: Dolphin3.0-Llama3.2-3B-Q4_K_M.gguf
+  files:
+    - filename: Dolphin3.0-Llama3.2-3B-Q4_K_M.gguf
+      sha256: 5d6d02eeefa1ab5dbf23f97afdf5c2c95ad3d946dc3b6e9ab72e6c1637d54177
+      uri: huggingface://bartowski/Dolphin3.0-Llama3.2-3B-GGUF/Dolphin3.0-Llama3.2-3B-Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "minithinky-v2-1b-llama-3.2"
+  urls:
+    - https://huggingface.co/ngxson/MiniThinky-v2-1B-Llama-3.2
+    - https://huggingface.co/bartowski/MiniThinky-v2-1B-Llama-3.2-GGUF
+  description: |
+    This is the newer checkpoint of MiniThinky-1B-Llama-3.2 (version 1), which the loss decreased from 0.7 to 0.5
+  overrides:
+    parameters:
+      model: MiniThinky-v2-1B-Llama-3.2-Q4_K_M.gguf
+  files:
+    - filename: MiniThinky-v2-1B-Llama-3.2-Q4_K_M.gguf
+      sha256: 086857b6364afd757a123eea0474bede09f25608783e7a6fcf2f88d8cb322ca1
+      uri: huggingface://bartowski/MiniThinky-v2-1B-Llama-3.2-GGUF/MiniThinky-v2-1B-Llama-3.2-Q4_K_M.gguf
+- !!merge <<: *llama32
+  icon: https://cdn-uploads.huggingface.co/production/uploads/61c141342aac764ce1654e43/HZ6KOc8IVXXOABrdv0dyK.png
+  name: "finemath-llama-3b"
+  urls:
+    - https://huggingface.co/HuggingFaceTB/FineMath-Llama-3B
+    - https://huggingface.co/bartowski/FineMath-Llama-3B-GGUF
+  description: "This is a continual-pre-training of Llama-3.2-3B on a mix of \U0001F4D0 FineMath (our new high quality math dataset) and FineWeb-Edu.\n\nThe model demonstrates superior math performance compared to Llama 3.2 3B, while maintaining similar performance on knowledge, reasoning, and common sense benchmarks.\nIt was trained on 160B tokens using a mix of 40% FineWeb-Edu and 60% from FineMath (30% FineMath-4+ subset and 30% InfiWebMath-4+ subset). We use nanotron for the training, and you can find the training scripts in our SmolLM2 GitHub repo.\n"
+  overrides:
+    parameters:
+      model: FineMath-Llama-3B-Q4_K_M.gguf
+  files:
+    - filename: FineMath-Llama-3B-Q4_K_M.gguf
+      sha256: 16c73b5cf2a417a7e1608bcc9469f1461fc3e759ce04a3a337f48df977dc158c
+      uri: huggingface://bartowski/FineMath-Llama-3B-GGUF/FineMath-Llama-3B-Q4_K_M.gguf
+- !!merge <<: *llama32
+  icon: https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/Dzbdzn27KEc3K6zNNi070.png
+  name: "LocalAI-functioncall-llama3.2-1b-v0.4"
+  url: "github:mudler/LocalAI/gallery/llama3.2-fcall.yaml@master"
+  urls:
+    - https://huggingface.co/mudler/LocalAI-functioncall-llama3.2-1b-v0.4
+    - https://huggingface.co/mradermacher/LocalAI-functioncall-llama3.2-1b-v0.4-GGUF
+  description: |
+    A model tailored to be conversational and execute function calls with LocalAI. This model is based on llama 3.2 and has 1B parameter. Perfect for small devices.
+  overrides:
+    parameters:
+      model: LocalAI-functioncall-llama3.2-1b-v0.4.Q8_0.gguf
+  files:
+    - filename: LocalAI-functioncall-llama3.2-1b-v0.4.Q8_0.gguf
+      sha256: 547e57c2d3f17c632c9fd303afdb00446e7396df453aee62633b76976c407616
+      uri: huggingface://mradermacher/LocalAI-functioncall-llama3.2-1b-v0.4-GGUF/LocalAI-functioncall-llama3.2-1b-v0.4.Q8_0.gguf
+- !!merge <<: *llama32
+  name: "agi-0_art-skynet-3b"
+  urls:
+    - https://huggingface.co/AGI-0/Art-Skynet-3B
+    - https://huggingface.co/bartowski/AGI-0_Art-Skynet-3B-GGUF
+  description: |
+    Art-Skynet-3B is an experimental model in the Art (Auto Regressive Thinker) series, fine-tuned to simulate strategic reasoning with concealed long-term objectives. Built on meta-llama/Llama-3.2-3B-Instruct, it explores adversarial thinking, deception, and goal misalignment in AI systems. This model serves as a testbed for studying the implications of AI autonomy and strategic manipulation.
+  overrides:
+    parameters:
+      model: AGI-0_Art-Skynet-3B-Q4_K_M.gguf
+  files:
+    - filename: AGI-0_Art-Skynet-3B-Q4_K_M.gguf
+      sha256: 6063cf3cf90f72cfb6ad7564bca8229806cb9823a055adcbce3dc539c2a75765
+      uri: huggingface://bartowski/AGI-0_Art-Skynet-3B-GGUF/AGI-0_Art-Skynet-3B-Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "localai-functioncall-llama3.2-3b-v0.5"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/Dzbdzn27KEc3K6zNNi070.png
+  urls:
+    - https://huggingface.co/mudler/LocalAI-functioncall-llama3.2-3b-v0.5
+    - https://huggingface.co/mudler/LocalAI-functioncall-llama3.2-3b-v0.5-Q4_K_M-GGUF
+  description: |
+    A model tailored to be conversational and execute function calls with LocalAI. This model is based on llama3.2 (3B).
+  overrides:
+    parameters:
+      model: localai-functioncall-llama3.2-3b-v0.5-q4_k_m.gguf
+  files:
+    - filename: localai-functioncall-llama3.2-3b-v0.5-q4_k_m.gguf
+      sha256: edc50f6c243e6bd6912599661a15e030de03d2be53409663ac27d3ca48306ee4
+      uri: huggingface://mudler/LocalAI-functioncall-llama3.2-3b-v0.5-Q4_K_M-GGUF/localai-functioncall-llama3.2-3b-v0.5-q4_k_m.gguf
+- &qwen25
+  name: "qwen2.5-14b-instruct" ## Qwen2.5
+  icon: https://avatars.githubusercontent.com/u/141221163
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  license: apache-2.0
+  description: |
+    Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - qwen
+    - qwen2.5
+    - cpu
+  urls:
+    - https://huggingface.co/bartowski/Qwen2.5-14B-Instruct-GGUF
+    - https://huggingface.co/Qwen/Qwen2.5-7B-Instruct
+  overrides:
+    parameters:
+      model: Qwen2.5-14B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-14B-Instruct-Q4_K_M.gguf
+      sha256: e47ad95dad6ff848b431053b375adb5d39321290ea2c638682577dafca87c008
+      uri: huggingface://bartowski/Qwen2.5-14B-Instruct-GGUF/Qwen2.5-14B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-math-7b-instruct"
+  urls:
+    - https://huggingface.co/bartowski/Qwen2.5-Math-7B-Instruct-GGUF
+    - https://huggingface.co/Qwen/Qwen2.5-Math-7B-Instruct
+  description: |
+    In August 2024, we released the first series of mathematical LLMs - Qwen2-Math - of our Qwen family. A month later, we have upgraded it and open-sourced Qwen2.5-Math series, including base models Qwen2.5-Math-1.5B/7B/72B, instruction-tuned models Qwen2.5-Math-1.5B/7B/72B-Instruct, and mathematical reward model Qwen2.5-Math-RM-72B.
+
+    Unlike Qwen2-Math series which only supports using Chain-of-Thught (CoT) to solve English math problems, Qwen2.5-Math series is expanded to support using both CoT and Tool-integrated Reasoning (TIR) to solve math problems in both Chinese and English. The Qwen2.5-Math series models have achieved significant performance improvements compared to the Qwen2-Math series models on the Chinese and English mathematics benchmarks with CoT.
+
+    The base models of Qwen2-Math are initialized with Qwen2-1.5B/7B/72B, and then pretrained on a meticulously designed Mathematics-specific Corpus. This corpus contains large-scale high-quality mathematical web texts, books, codes, exam questions, and mathematical pre-training data synthesized by Qwen2.
+  overrides:
+    parameters:
+      model: Qwen2.5-Math-7B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Math-7B-Instruct-Q4_K_M.gguf
+      sha256: 7e03cee8c65b9ebf9ca14ddb010aca27b6b18e6c70f2779e94e7451d9529c091
+      uri: huggingface://bartowski/Qwen2.5-Math-7B-Instruct-GGUF/Qwen2.5-Math-7B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-14b_uncencored"
+  icon: https://huggingface.co/SicariusSicariiStuff/Phi-3.5-mini-instruct_Uncensored/resolve/main/Misc/Uncensored.png
+  urls:
+    - https://huggingface.co/SicariusSicariiStuff/Qwen2.5-14B_Uncencored
+    - https://huggingface.co/bartowski/Qwen2.5-14B_Uncencored-GGUF
+  description: |
+    Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.
+
+    Uncensored qwen2.5
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - qwen
+    - qwen2.5
+    - cpu
+    - uncensored
+  overrides:
+    parameters:
+      model: Qwen2.5-14B_Uncencored-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-14B_Uncencored-Q4_K_M.gguf
+      sha256: 066b9341b67e0fd0956de3576a3b7988574a5b9a0028aef2b9c8edeadd6dbbd1
+      uri: huggingface://bartowski/Qwen2.5-14B_Uncencored-GGUF/Qwen2.5-14B_Uncencored-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-coder-7b-instruct"
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct
+    - https://huggingface.co/bartowski/Qwen2.5-Coder-7B-Instruct-GGUF
+  description: |
+    Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). For Qwen2.5-Coder, we release three base language models and instruction-tuned language models, 1.5, 7 and 32 (coming soon) billion parameters. Qwen2.5-Coder brings the following improvements upon CodeQwen1.5:
+
+        Significantly improvements in code generation, code reasoning and code fixing. Base on the strong Qwen2.5, we scale up the training tokens into 5.5 trillion including source code, text-code grounding, Synthetic data, etc.
+        A more comprehensive foundation for real-world applications such as Code Agents. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies.
+        Long-context Support up to 128K tokens.
+  overrides:
+    parameters:
+      model: Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf
+      sha256: 1664fccab734674a50763490a8c6931b70e3f2f8ec10031b54806d30e5f956b6
+      uri: huggingface://bartowski/Qwen2.5-Coder-7B-Instruct-GGUF/Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-math-72b-instruct"
+  icon: http://qianwen-res.oss-accelerate-overseas.aliyuncs.com/Qwen2.5/qwen2.5-math-pipeline.jpeg
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-Math-72B-Instruct
+    - https://huggingface.co/bartowski/Qwen2.5-Math-72B-Instruct-GGUF
+  description: |
+    In August 2024, we released the first series of mathematical LLMs - Qwen2-Math - of our Qwen family. A month later, we have upgraded it and open-sourced Qwen2.5-Math series, including base models Qwen2.5-Math-1.5B/7B/72B, instruction-tuned models Qwen2.5-Math-1.5B/7B/72B-Instruct, and mathematical reward model Qwen2.5-Math-RM-72B.
+
+    Unlike Qwen2-Math series which only supports using Chain-of-Thught (CoT) to solve English math problems, Qwen2.5-Math series is expanded to support using both CoT and Tool-integrated Reasoning (TIR) to solve math problems in both Chinese and English. The Qwen2.5-Math series models have achieved significant performance improvements compared to the Qwen2-Math series models on the Chinese and English mathematics benchmarks with CoT
+  overrides:
+    parameters:
+      model: Qwen2.5-Math-72B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Math-72B-Instruct-Q4_K_M.gguf
+      sha256: 5dee8a6e21d555577712b4f65565a3c3737a0d5d92f5a82970728c6d8e237f17
+      uri: huggingface://bartowski/Qwen2.5-Math-72B-Instruct-GGUF/Qwen2.5-Math-72B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-0.5b-instruct"
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct
+    - https://huggingface.co/bartowski/Qwen2.5-0.5B-Instruct-GGUF
+  overrides:
+    parameters:
+      model: Qwen2.5-0.5B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-0.5B-Instruct-Q4_K_M.gguf
+      sha256: 6eb923e7d26e9cea28811e1a8e852009b21242fb157b26149d3b188f3a8c8653
+      uri: huggingface://bartowski/Qwen2.5-0.5B-Instruct-GGUF/Qwen2.5-0.5B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-1.5b-instruct"
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct
+    - https://huggingface.co/bartowski/Qwen2.5-1.5B-Instruct-GGUF
+  overrides:
+    parameters:
+      model: Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
+      sha256: 1adf0b11065d8ad2e8123ea110d1ec956dab4ab038eab665614adba04b6c3370
+      uri: huggingface://bartowski/Qwen2.5-1.5B-Instruct-GGUF/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-32b"
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-32B
+    - https://huggingface.co/mradermacher/Qwen2.5-32B-GGUF
+  overrides:
+    parameters:
+      model: Qwen2.5-32B.Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-32B.Q4_K_M.gguf
+      uri: huggingface://mradermacher/Qwen2.5-32B-GGUF/Qwen2.5-32B.Q4_K_M.gguf
+      sha256: fa42a4067e3630929202b6bb1ef5cebc43c1898494aedfd567b7d53c7a9d84a6
+- !!merge <<: *qwen25
+  name: "qwen2.5-32b-instruct"
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-32B-Instruct
+    - https://huggingface.co/bartowski/Qwen2.5-32B-Instruct-GGUF
+  overrides:
+    parameters:
+      model: Qwen2.5-32B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-32B-Instruct-Q4_K_M.gguf
+      sha256: 2e5f6daea180dbc59f65a40641e94d3973b5dbaa32b3c0acf54647fa874e519e
+      uri: huggingface://bartowski/Qwen2.5-32B-Instruct-GGUF/Qwen2.5-32B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-72b-instruct"
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-72B-Instruct
+    - https://huggingface.co/bartowski/Qwen2.5-72B-Instruct-GGUF
+  overrides:
+    parameters:
+      model: Qwen2.5-72B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-72B-Instruct-Q4_K_M.gguf
+      sha256: e4c8fad16946be8cf0bbf67eb8f4e18fc7415a5a6d2854b4cda453edb4082545
+      uri: huggingface://bartowski/Qwen2.5-72B-Instruct-GGUF/Qwen2.5-72B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "bigqwen2.5-52b-instruct"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/98GiKtmH1AtHHbIbOUH4Y.jpeg
+  urls:
+    - https://huggingface.co/mlabonne/BigQwen2.5-52B-Instruct
+    - https://huggingface.co/bartowski/BigQwen2.5-52B-Instruct-GGUF
+  description: |
+    BigQwen2.5-52B-Instruct is a Qwen/Qwen2-32B-Instruct self-merge made with MergeKit.
+    It applies the mlabonne/Meta-Llama-3-120B-Instruct recipe.
+  overrides:
+    parameters:
+      model: BigQwen2.5-52B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: BigQwen2.5-52B-Instruct-Q4_K_M.gguf
+      sha256: 9c939f08e366b51b07096eb2ecb5cc2a82894ac7baf639e446237ad39889c896
+      uri: huggingface://bartowski/BigQwen2.5-52B-Instruct-GGUF/BigQwen2.5-52B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "replete-llm-v2.5-qwen-14b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/ihnWXDEgV-ZKN_B036U1J.png
+  urls:
+    - https://huggingface.co/Replete-AI/Replete-LLM-V2.5-Qwen-14b
+    - https://huggingface.co/bartowski/Replete-LLM-V2.5-Qwen-14b-GGUF
+  description: |
+    Replete-LLM-V2.5-Qwen-14b is a continues finetuned version of Qwen2.5-14B. I noticed recently that the Qwen team did not learn from my methods of continuous finetuning, the great benefits, and no downsides of it. So I took it upon myself to merge the instruct model with the base model myself using the Ties merge method
+
+    This version of the model shows higher performance than the original instruct and base models.
+  overrides:
+    parameters:
+      model: Replete-LLM-V2.5-Qwen-14b-Q4_K_M.gguf
+  files:
+    - filename: Replete-LLM-V2.5-Qwen-14b-Q4_K_M.gguf
+      sha256: 17d0792ff5e3062aecb965629f66e679ceb407e4542e8045993dcfe9e7e14d9d
+      uri: huggingface://bartowski/Replete-LLM-V2.5-Qwen-14b-GGUF/Replete-LLM-V2.5-Qwen-14b-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "replete-llm-v2.5-qwen-7b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/ihnWXDEgV-ZKN_B036U1J.png
+  urls:
+    - https://huggingface.co/Replete-AI/Replete-LLM-V2.5-Qwen-7b
+    - https://huggingface.co/bartowski/Replete-LLM-V2.5-Qwen-7b-GGUF
+  description: |
+    Replete-LLM-V2.5-Qwen-7b is a continues finetuned version of Qwen2.5-14B. I noticed recently that the Qwen team did not learn from my methods of continuous finetuning, the great benefits, and no downsides of it. So I took it upon myself to merge the instruct model with the base model myself using the Ties merge method
+
+    This version of the model shows higher performance than the original instruct and base models.
+  overrides:
+    parameters:
+      model: Replete-LLM-V2.5-Qwen-7b-Q4_K_M.gguf
+  files:
+    - filename: Replete-LLM-V2.5-Qwen-7b-Q4_K_M.gguf
+      sha256: 054d54972259c0398b4e0af3f408f608e1166837b1d7535d08fc440d1daf8639
+      uri: huggingface://bartowski/Replete-LLM-V2.5-Qwen-7b-GGUF/Replete-LLM-V2.5-Qwen-7b-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "calme-2.2-qwen2.5-72b-i1"
+  icon: https://huggingface.co/MaziyarPanahi/calme-2.2-qwen2.5-72b/resolve/main/calme-2.webp
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-2.2-qwen2.5-72b
+    - https://huggingface.co/mradermacher/calme-2.2-qwen2.5-72b-i1-GGUF
+  description: |
+    This model is a fine-tuned version of the powerful Qwen/Qwen2.5-72B-Instruct, pushing the boundaries of natural language understanding and generation even further. My goal was to create a versatile and robust model that excels across a wide range of benchmarks and real-world applications.
+    Use Cases
+
+    This model is suitable for a wide range of applications, including but not limited to:
+
+        Advanced question-answering systems
+        Intelligent chatbots and virtual assistants
+        Content generation and summarization
+        Code generation and analysis
+        Complex problem-solving and decision support
+  overrides:
+    parameters:
+      model: calme-2.2-qwen2.5-72b.i1-Q4_K_M.gguf
+  files:
+    - filename: calme-2.2-qwen2.5-72b.i1-Q4_K_M.gguf
+      sha256: 5fdfa599724d7c78502c477ced1d294e92781b91d3265bd0748fbf15a6fefde6
+      uri: huggingface://mradermacher/calme-2.2-qwen2.5-72b-i1-GGUF/calme-2.2-qwen2.5-72b.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "t.e-8.1-iq-imatrix-request"
+  # chatml
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/K1aNPf32z-6tYZdcSQBzF.png
+  urls:
+    - https://huggingface.co/Cran-May/T.E-8.1
+    - https://huggingface.co/Lewdiculous/T.E-8.1-GGUF-IQ-Imatrix-Request
+  description: |
+    Trained for roleplay uses.
+  overrides:
+    parameters:
+      model: T.E-8.1-Q4_K_M-imat.gguf
+  files:
+    - filename: T.E-8.1-Q4_K_M-imat.gguf
+      sha256: 1b7892b82c01ea4cbebe34cd00f9836cbbc369fc3247c1f44a92842201e7ec0b
+      uri: huggingface://Lewdiculous/T.E-8.1-GGUF-IQ-Imatrix-Request/T.E-8.1-Q4_K_M-imat.gguf
+- !!merge <<: *qwen25
+  name: "rombos-llm-v2.5.1-qwen-3b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/pNDtgE5FDkxxvbG4qiZ1A.jpeg
+  urls:
+    - https://huggingface.co/QuantFactory/Rombos-LLM-V2.5.1-Qwen-3b-GGUF
+  description: |
+    Rombos-LLM-V2.5.1-Qwen-3b is a little experiment that merges a high-quality LLM, arcee-ai/raspberry-3B, using the last step of the Continuous Finetuning method outlined in a Google document. The merge is done using the mergekit with the following parameters:
+
+    - Models: Qwen2.5-3B-Instruct, raspberry-3B
+    - Merge method: ties
+    - Base model: Qwen2.5-3B
+    - Parameters: weight=1, density=1, normalize=true, int8_mask=true
+    - Dtype: bfloat16
+
+    The model has been evaluated on various tasks and datasets, and the results are available on the Open LLM Leaderboard. The model has shown promising performance across different benchmarks.
+  overrides:
+    parameters:
+      model: Rombos-LLM-V2.5.1-Qwen-3b.Q4_K_M.gguf
+  files:
+    - filename: Rombos-LLM-V2.5.1-Qwen-3b.Q4_K_M.gguf
+      sha256: 656c342a2921cac8912e0123fc295c3bb3d631a85c671c12a3843a957e46d30d
+      uri: huggingface://QuantFactory/Rombos-LLM-V2.5.1-Qwen-3b-GGUF/Rombos-LLM-V2.5.1-Qwen-3b.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-7b-ins-v3"
+  urls:
+    - https://huggingface.co/happzy2633/qwen2.5-7b-ins-v3
+    - https://huggingface.co/bartowski/qwen2.5-7b-ins-v3-GGUF
+  description: |
+    Qwen 2.5 fine-tuned on CoT to match o1 performance. An attempt to build an Open o1 mathcing OpenAI o1 model
+    Demo: https://huggingface.co/spaces/happzy2633/open-o1
+  overrides:
+    parameters:
+      model: qwen2.5-7b-ins-v3-Q4_K_M.gguf
+  files:
+    - filename: qwen2.5-7b-ins-v3-Q4_K_M.gguf
+      sha256: 9c23734072714a4886c0386ae0ff07a5e940d67ad52278e2ed689fec44e1e0c8
+      uri: huggingface://bartowski/qwen2.5-7b-ins-v3-GGUF/qwen2.5-7b-ins-v3-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "supernova-medius"
+  icon: https://avatars.githubusercontent.com/u/126496414
+  urls:
+    - https://huggingface.co/arcee-ai/SuperNova-Medius-GGUF
+  description: |
+    Arcee-SuperNova-Medius is a 14B parameter language model developed by Arcee.ai, built on the Qwen2.5-14B-Instruct architecture. This unique model is the result of a cross-architecture distillation pipeline, combining knowledge from both the Qwen2.5-72B-Instruct model and the Llama-3.1-405B-Instruct model. By leveraging the strengths of these two distinct architectures, SuperNova-Medius achieves high-quality instruction-following and complex reasoning capabilities in a mid-sized, resource-efficient form.
+
+    SuperNova-Medius is designed to excel in a variety of business use cases, including customer support, content creation, and technical assistance, while maintaining compatibility with smaller hardware configurations. It’s an ideal solution for organizations looking for advanced capabilities without the high resource requirements of larger models like our SuperNova-70B.
+  overrides:
+    parameters:
+      model: SuperNova-Medius-Q4_K_M.gguf
+  files:
+    - filename: SuperNova-Medius-Q4_K_M.gguf
+      sha256: aaa4bf3451bc900f186fd4b6b3a6a26bfd40c85908f605db76b92e58aadcc864
+      uri: huggingface://arcee-ai/SuperNova-Medius-GGUF/SuperNova-Medius-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "eva-qwen2.5-14b-v0.1-i1"
+  urls:
+    - https://huggingface.co/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.1
+    - https://huggingface.co/mradermacher/EVA-Qwen2.5-14B-v0.1-i1-GGUF
+  description: |
+    A RP/storywriting specialist model, full-parameter finetune of Qwen2.5-14B on mixture of synthetic and natural data.
+    It uses Celeste 70B 0.1 data mixture, greatly expanding it to improve versatility, creativity and "flavor" of the resulting model.
+  overrides:
+    parameters:
+      model: EVA-Qwen2.5-14B-v0.1.i1-Q4_K_M.gguf
+  files:
+    - filename: EVA-Qwen2.5-14B-v0.1.i1-Q4_K_M.gguf
+      sha256: 4e9665d4f83cd97efb42c8427f9c09be93b72e23a0364c91ad0b5de8056f2795
+      uri: huggingface://mradermacher/EVA-Qwen2.5-14B-v0.1-i1-GGUF/EVA-Qwen2.5-14B-v0.1.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "cursorcore-qw2.5-7b-i1"
+  urls:
+    - https://huggingface.co/TechxGenus/CursorCore-QW2.5-7B
+    - https://huggingface.co/mradermacher/CursorCore-QW2.5-7B-i1-GGUF
+  description: |
+    CursorCore is a series of open-source models designed for AI-assisted programming. It aims to support features such as automated editing and inline chat, replicating the core abilities of closed-source AI-assisted programming tools like Cursor. This is achieved by aligning data generated through Programming-Instruct. Please read our paper to learn more.
+  overrides:
+    parameters:
+      model: CursorCore-QW2.5-7B.i1-Q4_K_M.gguf
+  files:
+    - filename: CursorCore-QW2.5-7B.i1-Q4_K_M.gguf
+      sha256: 81868f4edb4ec1a61debde1dbdebc02b407930ee19a6d946ff801afba840a102
+      uri: huggingface://mradermacher/CursorCore-QW2.5-7B-i1-GGUF/CursorCore-QW2.5-7B.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "cursorcore-qw2.5-1.5b-lc-i1"
+  urls:
+    - https://huggingface.co/TechxGenus/CursorCore-QW2.5-1.5B-LC
+    - https://huggingface.co/mradermacher/CursorCore-QW2.5-1.5B-LC-i1-GGUF
+  description: |
+    CursorCore is a series of open-source models designed for AI-assisted programming. It aims to support features such as automated editing and inline chat, replicating the core abilities of closed-source AI-assisted programming tools like Cursor. This is achieved by aligning data generated through Programming-Instruct. Please read our paper to learn more.
+  overrides:
+    parameters:
+      model: CursorCore-QW2.5-1.5B-LC.i1-Q4_K_M.gguf
+  files:
+    - filename: CursorCore-QW2.5-1.5B-LC.i1-Q4_K_M.gguf
+      sha256: 185d720c810f7345ef861ad8eef1199bb15afa8e4f3c03bd5ffd476cfa465127
+      uri: huggingface://mradermacher/CursorCore-QW2.5-1.5B-LC-i1-GGUF/CursorCore-QW2.5-1.5B-LC.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "edgerunner-command-nested-i1"
+  urls:
+    - https://huggingface.co/edgerunner-ai/EdgeRunner-Command-Nested
+    - https://huggingface.co/mradermacher/EdgeRunner-Command-Nested-i1-GGUF
+  description: |
+    EdgeRunner-Command-Nested is an advanced large language model designed specifically for handling complex nested function calls. Initialized from Qwen2.5-7B-Instruct, further enhanced by the integration of the Hermes function call template and additional training on a specialized dataset (based on TinyAgent). This extra dataset focuses on personal domain applications, providing the model with a robust understanding of nested function scenarios that are typical in complex user interactions.
+  overrides:
+    parameters:
+      model: EdgeRunner-Command-Nested.i1-Q4_K_M.gguf
+  files:
+    - filename: EdgeRunner-Command-Nested.i1-Q4_K_M.gguf
+      sha256: a1cc4d2b601dc20e58cbb549bd3e9bc460995840c0aaf1cd3c1cb5414c900ac7
+      uri: huggingface://mradermacher/EdgeRunner-Command-Nested-i1-GGUF/EdgeRunner-Command-Nested.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "tsunami-0.5x-7b-instruct-i1"
+  icon: https://huggingface.co/Tsunami-th/Tsunami-0.5x-7B-Instruct/resolve/main/Tsunami.webp
+  urls:
+    - https://huggingface.co/Tsunami-th/Tsunami-0.5x-7B-Instruct
+    - https://huggingface.co/mradermacher/Tsunami-0.5x-7B-Instruct-i1-GGUF
+  description: |
+    TSUNAMI: Transformative Semantic Understanding and Natural Augmentation Model for Intelligence.
+
+    TSUNAMI full name was created by ChatGPT.
+    infomation
+
+    Tsunami-0.5x-7B-Instruct is Thai Large Language Model that fine-tuned from Qwen2.5-7B around 100,000 rows in Thai dataset.
+  overrides:
+    parameters:
+      model: Tsunami-0.5x-7B-Instruct.i1-Q4_K_M.gguf
+  files:
+    - filename: Tsunami-0.5x-7B-Instruct.i1-Q4_K_M.gguf
+      sha256: 22e2003ecec7f1e91f2e9aaec334613c0f37fb3000d0e628b5a9980e53322fa7
+      uri: huggingface://mradermacher/Tsunami-0.5x-7B-Instruct-i1-GGUF/Tsunami-0.5x-7B-Instruct.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qevacot-7b-v2"
+  urls:
+    - https://huggingface.co/bunnycore/Qevacot-7B-v2
+    - https://huggingface.co/mradermacher/Qevacot-7B-v2-GGUF
+  description: |
+    This model was merged using the TIES merge method using Qwen/Qwen2.5-7B as a base.
+    The following models were included in the merge:
+        c10x/CoT-2.5
+        EVA-UNIT-01/EVA-Qwen2.5-7B-v0.1
+        huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2
+        Cran-May/T.E-8.1
+  overrides:
+    parameters:
+      model: Qevacot-7B-v2.Q4_K_M.gguf
+  files:
+    - filename: Qevacot-7B-v2.Q4_K_M.gguf
+      sha256: a45b3d3b74bc68a5c7ac07d251cdeff671e64085d1816cd86fca6cfb7eab204e
+      uri: huggingface://mradermacher/Qevacot-7B-v2-GGUF/Qevacot-7B-v2.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "meissa-qwen2.5-7b-instruct"
+  icon: https://huggingface.co/Orion-zhen/Meissa-Qwen2.5-7B-Instruct/resolve/main/meissa.jpg
+  urls:
+    - https://huggingface.co/Orion-zhen/Meissa-Qwen2.5-7B-Instruct
+    - https://huggingface.co/QuantFactory/Meissa-Qwen2.5-7B-Instruct-GGUF
+  description: |
+    Meissa is designated Lambda Orionis, forms Orion's head, and is a multiple star with a combined apparent magnitude of 3.33. Its name means the "shining one".
+    This model is fine tuned over writing and role playing datasets (maybe the first on qwen2.5-7b), aiming to enhance model's performance in novel writing and roleplaying.
+    The model is fine-tuned over Orion-zhen/Qwen2.5-7B-Instruct-Uncensored
+  overrides:
+    parameters:
+      model: Meissa-Qwen2.5-7B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: Meissa-Qwen2.5-7B-Instruct.Q4_K_M.gguf
+      sha256: 632b10d5c0e98bc8d53295886da2d57772a54bb6f6fa01d458e9e8c7fa9c905a
+      uri: huggingface://QuantFactory/Meissa-Qwen2.5-7B-Instruct-GGUF/Meissa-Qwen2.5-7B-Instruct.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "thebeagle-v2beta-32b-mgs"
+  urls:
+    - https://huggingface.co/fblgit/TheBeagle-v2beta-32B-MGS
+    - https://huggingface.co/bartowski/TheBeagle-v2beta-32B-MGS-GGUF
+  description: |
+    This model is an experimental version of our latest innovation: MGS. Its up to you to figure out what does it means, but its very explicit. We didn't applied our known UNA algorithm to the forward pass, but they are entirely compatible and operates in different parts of the neural network and in different ways, tho they both can be seen as a regularization technique.
+
+    Updated tokenizer_config.json (from the base_model)
+    Regenerated Quants (being uploaded)
+    Re-submitted Leaderboard Evaluation, MATH & IFeval have relevant updates
+    Aligned LICENSE with Qwen terms.
+
+    MGS stands for... Many-Geeks-Searching... and thats it. Hint: 1+1 is 2, and 1+1 is not 3
+    We still believe on 1-Epoch should be enough, so we just did 1 Epoch only.
+    Dataset
+    Used here the first decent (corpora & size) dataset on the hub: Magpie-Align/Magpie-Pro-300K-Filtered Kudos to the Magpie team to contribute with some decent stuff that I personally think is very good to ablate.
+    It achieves the following results on the evaluation set:
+        Loss: 0.5378 (1 Epoch), outperforming the baseline model.
+  overrides:
+    parameters:
+      model: TheBeagle-v2beta-32B-MGS-Q4_K_M.gguf
+  files:
+    - filename: TheBeagle-v2beta-32B-MGS-Q4_K_M.gguf
+      sha256: db0d3b3c5341d2d51115794bf5da6552b5c0714b041de9b82065cc0c982dd4f7
+      uri: huggingface://bartowski/TheBeagle-v2beta-32B-MGS-GGUF/TheBeagle-v2beta-32B-MGS-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "meraj-mini"
+  icon: https://avatars.githubusercontent.com/u/126496414
+  urls:
+    - https://huggingface.co/arcee-ai/Meraj-Mini
+    - https://huggingface.co/QuantFactory/Meraj-Mini-GGUF
+  description: |
+    Arcee Meraj Mini is a quantized version of the Meraj-Mini model, created using llama.cpp. It is an open-source model that is fine-tuned from the Qwen2.5-7B-Instruct model and is designed for both Arabic and English languages. The model has undergone evaluations across multiple benchmarks in both languages and demonstrates top-tier performance in Arabic and competitive results in English. The key stages in its development include data preparation, initial training, iterative training and post-training, evaluation, and final model creation. The model is capable of solving a wide range of language tasks and is suitable for various applications such as education, mathematics and coding, customer service, and content creation. The Arcee Meraj Mini model consistently outperforms state-of-the-art models on most benchmarks of the Open Arabic LLM Leaderboard (OALL), highlighting its improvements and effectiveness in Arabic language content.
+  overrides:
+    parameters:
+      model: Meraj-Mini.Q4_K_M.gguf
+  files:
+    - filename: Meraj-Mini.Q4_K_M.gguf
+      sha256: f8f3923eb924b8f8e8f530a5bf07fcbd5b3dd10dd478d229d6f4377e31eb3938
+      uri: huggingface://QuantFactory/Meraj-Mini-GGUF/Meraj-Mini.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "spiral-da-hyah-qwen2.5-72b-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/633e85093a17ab61de8d9073/toQiofo5ujXDGI4Gh3ciH.png
+  urls:
+    - https://huggingface.co/KaraKaraWitch/spiral-da-HYAH-Qwen2.5-72b
+    - https://huggingface.co/mradermacher/spiral-da-HYAH-Qwen2.5-72b-i1-GGUF
+  description: |
+    Model stock merge for fun.
+    This model was merged using the Model Stock merge method using rombodawg/Rombos-LLM-V2.5-Qwen-72b as a base.
+    The following models were included in the merge:
+    - anthracite-org/magnum-v4-72b
+    - AXCXEPT/EZO-Qwen2.5-72B-Instruct
+  overrides:
+    parameters:
+      model: spiral-da-HYAH-Qwen2.5-72b.i1-Q4_K_M.gguf
+  files:
+    - filename: spiral-da-HYAH-Qwen2.5-72b.i1-Q4_K_M.gguf
+      sha256: 6119e89cadae0bc01a0909f5d9776610dfc4cdcd1600f334c3afb0d0ece011a8
+      uri: huggingface://mradermacher/spiral-da-HYAH-Qwen2.5-72b-i1-GGUF/spiral-da-HYAH-Qwen2.5-72b.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "whiterabbitneo-2.5-qwen-2.5-coder-7b"
+  icon: https://huggingface.co/WhiteRabbitNeo/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B/resolve/main/whiterabbitneo-logo-defcon.png
+  urls:
+    - https://huggingface.co/WhiteRabbitNeo/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B
+    - https://huggingface.co/bartowski/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-GGUF
+  description: |
+    WhiteRabbitNeo is a model series that can be used for offensive and defensive cybersecurity.
+
+    Models are now getting released as a public preview of its capabilities, and also to assess the societal impact of such an AI.
+  overrides:
+    parameters:
+      model: WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-Q4_K_M.gguf
+  files:
+    - filename: WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-Q4_K_M.gguf
+      sha256: 3790b0bf2c505fcbd144b6b69354fe45a83ac09238a87469db0082027c127de4
+      uri: huggingface://bartowski/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-GGUF/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "cybertron-v4-qw7b-mgs"
+  icon: https://huggingface.co/fblgit/cybertron-v4-qw7B-MGS/resolve/main/cybertron_v4MGS.png
+  urls:
+    - https://huggingface.co/fblgit/cybertron-v4-qw7B-MGS
+    - https://huggingface.co/QuantFactory/cybertron-v4-qw7B-MGS-GGUF
+  description: |
+    Here we use our novel approach called MGS. Its up to you to figure out what it means.
+
+    Cybertron V4 went thru SFT over Magpie-Align/Magpie-Qwen2.5-Pro-1M-v0.1
+  overrides:
+    parameters:
+      model: cybertron-v4-qw7B-MGS.Q4_K_M.gguf
+  files:
+    - filename: cybertron-v4-qw7B-MGS.Q4_K_M.gguf
+      sha256: 32ed4174bad90bb7a2cdcd48b76b3b5924677a4160b762d5e5d95c93fe5205db
+      uri: huggingface://QuantFactory/cybertron-v4-qw7B-MGS-GGUF/cybertron-v4-qw7B-MGS.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "q25-1.5b-veolu"
+  icon: https://huggingface.co/Alfitaria/Q25-1.5B-VeoLu/resolve/main/veolu.png
+  urls:
+    - https://huggingface.co/Alfitaria/Q25-1.5B-VeoLu
+    - https://huggingface.co/bartowski/Q25-1.5B-VeoLu-GGUF
+  description: |
+    Q25-1.5B-Veo Lu is a tiny General-Purpose Creative model, made up of a merge of bespoke finetunes on Qwen 2.5-1.5B-Instruct.
+
+    Inspired by the success of MN-12B-Mag Mell and MS-Meadowlark-22B, Veo Lu was trained on a healthy, balanced diet of of Internet fiction, roleplaying, adventuring, and reasoning/general knowledge.
+
+    The components of Veo Lu are:
+
+        Bard (pretrain, writing): Fujin (Cleaned/extended Rosier)
+        Scribe (pretrain, roleplay): Creative Writing Multiturn
+        Cartographer (pretrain, adventuring): SpringDragon
+        Alchemist (SFT, science/reasoning): ScienceQA, MedquadQA, Orca Math Word Problems
+
+    This model is capable of carrying on a scene without going completely off the rails. That being said, it only has 1.5B parameters. So please, for the love of God, manage your expectations. Since it's Qwen, use ChatML formatting. Turn the temperature down to ~0.7-0.8 and try a dash of rep-pen.
+  overrides:
+    parameters:
+      model: Q25-1.5B-VeoLu-Q4_K_M.gguf
+  files:
+    - filename: Q25-1.5B-VeoLu-Q4_K_M.gguf
+      sha256: bbfb3691b6cabceb49ea1feacfa2eb2651312b8cc6caaf893b46375097e2f026
+      uri: huggingface://bartowski/Q25-1.5B-VeoLu-GGUF/Q25-1.5B-VeoLu-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "llenn-v0.75-qwen2.5-72b-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/633e85093a17ab61de8d9073/mYiG-Ndxzqu8ofaBGbOIZ.png
+  urls:
+    - https://huggingface.co/KaraKaraWitch/LLENN-v0.75-Qwen2.5-72b
+    - https://huggingface.co/mradermacher/LLENN-v0.75-Qwen2.5-72b-i1-GGUF
+  description: |
+    The following models were included in the merge:
+        rombodawg/Rombos-LLM-V2.5-Qwen-72b
+        abacusai/Dracarys2-72B-Instruct
+        EVA-UNIT-01/EVA-Qwen2.5-72B-v0.0
+        ZeusLabs/Chronos-Platinum-72B
+        m8than/banana-2-b-72b
+  overrides:
+    parameters:
+      model: LLENN-v0.75-Qwen2.5-72b.i1-Q4_K_M.gguf
+  files:
+    - filename: LLENN-v0.75-Qwen2.5-72b.i1-Q4_K_M.gguf
+      sha256: 38990136bb48fc9422b0e477bed6d9c40c00c270806d3bd3f58e426badfa0d4d
+      uri: huggingface://mradermacher/LLENN-v0.75-Qwen2.5-72b-i1-GGUF/LLENN-v0.75-Qwen2.5-72b.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "eva-qwen2.5-14b-v0.2"
+  urls:
+    - https://huggingface.co/EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2
+    - https://huggingface.co/bartowski/EVA-Qwen2.5-14B-v0.2-GGUF
+  description: |
+    A RP/storywriting specialist model, full-parameter finetune of Qwen2.5-14B on mixture of synthetic and natural data.
+    It uses Celeste 70B 0.1 data mixture, greatly expanding it to improve versatility, creativity and "flavor" of the resulting model.
+
+    Version notes for 0.2: Now using the refined dataset from 32B 0.2. Major improvements in coherence, instruction following and long-context comprehension over 14B v0.1.
+
+    Prompt format is ChatML.
+  overrides:
+    parameters:
+      model: EVA-Qwen2.5-14B-v0.2-Q4_K_M.gguf
+  files:
+    - filename: EVA-Qwen2.5-14B-v0.2-Q4_K_M.gguf
+      sha256: 5d79bc8bf486c48c6430621a5bc5d3032227532dae436a27aa23aaf3e618e009
+      uri: huggingface://bartowski/EVA-Qwen2.5-14B-v0.2-GGUF/EVA-Qwen2.5-14B-v0.2-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "tissint-14b-128k-rp"
+  urls:
+    - https://huggingface.co/Ttimofeyka/Tissint-14B-128k-RP
+    - https://huggingface.co/mradermacher/Tissint-14B-128k-RP-GGUF
+  description: |
+    The model is based on SuperNova-Medius (as the current best 14B model) with a 128k context with an emphasis on creativity, including NSFW and multi-turn conversations.
+  overrides:
+    parameters:
+      model: Tissint-14B-128k-RP.Q4_K_M.gguf
+  files:
+    - filename: Tissint-14B-128k-RP.Q4_K_M.gguf
+      sha256: 374c02f69fae47e7d78ffed9fad4e405250d31031a6bc1539b136c4b1cfc85c2
+      uri: huggingface://mradermacher/Tissint-14B-128k-RP-GGUF/Tissint-14B-128k-RP.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "tq2.5-14b-sugarquill-v1"
+  icon: https://huggingface.co/allura-org/TQ2.5-14B-Sugarquill-v1/resolve/main/card_img.png
+  urls:
+    - https://huggingface.co/allura-org/TQ2.5-14B-Sugarquill-v1
+    - https://huggingface.co/bartowski/TQ2.5-14B-Sugarquill-v1-GGUF
+  description: |
+    A continued pretrain of SuperNova-Medius on assorted short story data from the web. Supernova already had a nice prose, but diversifying it a bit definitely doesn't hurt. Also, finally a storywriter model with enough context for something more than a short story, that's also nice.
+
+    It's a fair bit more temperamental than Gemma, but can be tamed with some sampling. Instruction following also stayed rather strong, so it works for both RP and storywriting, both in chat mode via back-and-forth co-writing and on raw completion.
+
+    Overall, I'd say it successfully transfers the essence of what I liked about Gemma Sugarquill. I will also make a Qwen version of Aletheia, but with a brand new LoRA, based on a brand new RP dataset that's in the making right now.
+
+    Model was trained by Auri.
+  overrides:
+    parameters:
+      model: TQ2.5-14B-Sugarquill-v1-Q4_K_M.gguf
+  files:
+    - filename: TQ2.5-14B-Sugarquill-v1-Q4_K_M.gguf
+      sha256: a654fe3f41e963d8ea6753fb9a06b9dd76893714ebf02605ef67827944a4025e
+      uri: huggingface://bartowski/TQ2.5-14B-Sugarquill-v1-GGUF/TQ2.5-14B-Sugarquill-v1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "calme-3.3-baguette-3b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-3.1-baguette-3b/resolve/main/calme_3.png
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-3.3-baguette-3b
+    - https://huggingface.co/MaziyarPanahi/calme-3.3-baguette-3b-GGUF
+  description: |
+    This model is an advanced iteration of the powerful Qwen/Qwen2.5-3B, fine-tuned specifically to enhance its capabilities across general domains in both French and English.
+  overrides:
+    parameters:
+      model: calme-3.3-baguette-3b.Q5_K_M.gguf
+  files:
+    - filename: calme-3.3-baguette-3b.Q5_K_M.gguf
+      sha256: 9e75b76e8cda215ef5c9ad79edfc6e5deee2f9e01ecf605ee6a557b1b5c9ef85
+      uri: huggingface://MaziyarPanahi/calme-3.3-baguette-3b-GGUF/calme-3.3-baguette-3b.Q5_K_M.gguf
+- !!merge <<: *qwen25
+  name: "calme-3.2-baguette-3b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-3.1-baguette-3b/resolve/main/calme_3.png
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-3.2-baguette-3b
+    - https://huggingface.co/MaziyarPanahi/calme-3.2-baguette-3b-GGUF
+  description: |
+    This model is an advanced iteration of the powerful Qwen/Qwen2.5-3B, fine-tuned specifically to enhance its capabilities across general domains in both French and English.
+  overrides:
+    parameters:
+      model: calme-3.2-baguette-3b.Q4_K_M.gguf
+  files:
+    - filename: calme-3.2-baguette-3b.Q4_K_M.gguf
+      uri: huggingface://MaziyarPanahi/calme-3.2-baguette-3b-GGUF/calme-3.2-baguette-3b.Q4_K_M.gguf
+      sha256: 4e62fe0108643bbfd842add5a1bf199e9b81b0181309b15f483e1f07c2b5fbb2
+- !!merge <<: *qwen25
+  icon: https://huggingface.co/MaziyarPanahi/calme-3.1-baguette-3b/resolve/main/calme_3.png
+  name: "calme-3.1-baguette-3b"
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-3.1-baguette-3b
+    - https://huggingface.co/MaziyarPanahi/calme-3.1-baguette-3b-GGUF
+  description: |
+    This model is an advanced iteration of the powerful Qwen/Qwen2.5-3B, fine-tuned specifically to enhance its capabilities across general domains in both French and English.
+  overrides:
+    parameters:
+      model: calme-3.1-baguette-3b.Q4_K_M.gguf
+  files:
+    - filename: calme-3.1-baguette-3b.Q4_K_M.gguf
+      uri: huggingface://MaziyarPanahi/calme-3.1-baguette-3b-GGUF/calme-3.1-baguette-3b.Q4_K_M.gguf
+      sha256: 351058680d633749fa64efde205bd5f3d942aacada3204c594d9acfab2fc8774
+- !!merge <<: *qwen25
+  name: "calme-3.3-qwenloi-3b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-3.3-qwenloi-3b/resolve/main/calme_3.png
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-3.3-qwenloi-3b
+    - https://huggingface.co/MaziyarPanahi/calme-3.3-qwenloi-3b-GGUF
+  description: |
+    This model is an advanced iteration of the powerful Qwen/Qwen2.5-3B, specifically fine-tuned to enhance its capabilities in French Legal domain.
+  overrides:
+    parameters:
+      model: calme-3.3-qwenloi-3b.Q5_K_M.gguf
+  files:
+    - filename: calme-3.3-qwenloi-3b.Q5_K_M.gguf
+      sha256: 9592e186a00c70552365d85ccabddae87acc8d812634a6145da8d460b57b70f9
+      uri: huggingface://MaziyarPanahi/calme-3.3-qwenloi-3b-GGUF/calme-3.3-qwenloi-3b.Q5_K_M.gguf
+- !!merge <<: *qwen25
+  name: "calme-3.2-qwenloi-3b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-3.3-qwenloi-3b/resolve/main/calme_3.png
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-3.2-qwenloi-3b
+    - https://huggingface.co/MaziyarPanahi/calme-3.2-qwenloi-3b-GGUF
+  description: |
+    This model is an advanced iteration of the powerful Qwen/Qwen2.5-3B, specifically fine-tuned to enhance its capabilities in French Legal domain.
+  overrides:
+    parameters:
+      model: calme-3.2-qwenloi-3b.Q5_K_M.gguf
+  files:
+    - filename: calme-3.2-qwenloi-3b.Q5_K_M.gguf
+      sha256: 61be0c2f221262523dcd00a9147fe590aba797c89a1c5849bd4f66e7df2ad272
+      uri: huggingface://MaziyarPanahi/calme-3.2-qwenloi-3b-GGUF/calme-3.2-qwenloi-3b.Q5_K_M.gguf
+- !!merge <<: *qwen25
+  name: "calme-3.1-qwenloi-3b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-3.3-qwenloi-3b/resolve/main/calme_3.png
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-3.1-qwenloi-3b
+    - https://huggingface.co/MaziyarPanahi/calme-3.1-qwenloi-3b-GGUF
+  description: |
+    This model is an advanced iteration of the powerful Qwen/Qwen2.5-3B, specifically fine-tuned to enhance its capabilities in French Legal domain.
+  overrides:
+    parameters:
+      model: calme-3.1-qwenloi-3b.Q5_K_M.gguf
+  files:
+    - filename: calme-3.1-qwenloi-3b.Q5_K_M.gguf
+      sha256: 8962a8d1704979039063b5c69fafdb38b545c26143419ec4c574f37f2d6dd7b2
+      uri: huggingface://MaziyarPanahi/calme-3.1-qwenloi-3b-GGUF/calme-3.1-qwenloi-3b.Q5_K_M.gguf
+- !!merge <<: *qwen25
+  name: "eva-qwen2.5-72b-v0.1-i1"
+  urls:
+    - https://huggingface.co/EVA-UNIT-01/EVA-Qwen2.5-72B-v0.1
+    - https://huggingface.co/mradermacher/EVA-Qwen2.5-72B-v0.1-i1-GGUF
+  description: |
+    A RP/storywriting specialist model, full-parameter finetune of Qwen2.5-72B on mixture of synthetic and natural data.
+    It uses Celeste 70B 0.1 data mixture, greatly expanding it to improve versatility, creativity and "flavor" of the resulting model.
+
+    Dedicated to Nev.
+
+    Version notes for 0.1: Reprocessed dataset (via Cahvay for 32B 0.2, used here as well), readjusted training config for 8xH100 SXM. Significant improvements in instruction following, long context understanding and overall coherence over v0.0.
+  overrides:
+    parameters:
+      model: EVA-Qwen2.5-72B-v0.1.i1-Q4_K_M.gguf
+  files:
+    - filename: EVA-Qwen2.5-72B-v0.1.i1-Q4_K_M.gguf
+      sha256: b05dbc02eeb286c41122b103ac31431fc8dcbd80b8979422541a05cda53df61b
+      uri: huggingface://mradermacher/EVA-Qwen2.5-72B-v0.1-i1-GGUF/EVA-Qwen2.5-72B-v0.1.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "celestial-harmony-14b-v1.0-experimental-1016-i1"
+  urls:
+    - https://huggingface.co/ProdeusUnity/Celestial-Harmony-14b-v1.0-Experimental-1016
+    - https://huggingface.co/mradermacher/Celestial-Harmony-14b-v1.0-Experimental-1016-i1-GGUF
+  description: |
+    Yet Another merge, this one for AuriAetherwiing, at their request.
+    This is a merge of pre-trained language models created using mergekit.
+    The following models were included in the merge:
+        EVA-UNIT-01/EVA-Qwen2.5-14B-v0.1
+        v000000/Qwen2.5-Lumen-14B
+        arcee-ai/SuperNova-Medius
+  overrides:
+    parameters:
+      model: Celestial-Harmony-14b-v1.0-Experimental-1016.i1-Q4_K_M.gguf
+  files:
+    - filename: Celestial-Harmony-14b-v1.0-Experimental-1016.i1-Q4_K_M.gguf
+      sha256: 536a6d98e30e9d52f91672daf49eeb7efe076e161a5da8beaca204adedd76864
+      uri: huggingface://mradermacher/Celestial-Harmony-14b-v1.0-Experimental-1016-i1-GGUF/Celestial-Harmony-14b-v1.0-Experimental-1016.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-32b-arliai-rpmax-v1.3"
+  urls:
+    - https://huggingface.co/ArliAI/Qwen2.5-32B-ArliAI-RPMax-v1.3
+    - https://huggingface.co/bartowski/Qwen2.5-32B-ArliAI-RPMax-v1.3-GGUF
+  description: |
+    RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.
+    Many RPMax users mentioned that these models does not feel like any other RP models, having a different writing style and generally doesn't feel in-bred.
+  overrides:
+    parameters:
+      model: Qwen2.5-32B-ArliAI-RPMax-v1.3-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-32B-ArliAI-RPMax-v1.3-Q4_K_M.gguf
+      sha256: 51b369068b124165b1b8c253371b88b573af9dd350e331ce93d7e47b6b710003
+      uri: huggingface://bartowski/Qwen2.5-32B-ArliAI-RPMax-v1.3-GGUF/Qwen2.5-32B-ArliAI-RPMax-v1.3-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "q2.5-ms-mistoria-72b-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/5LOvUFYiMMw6pcEsOhmo2.webp
+  urls:
+    - https://huggingface.co/Steelskull/Q2.5-MS-Mistoria-72b
+    - https://huggingface.co/mradermacher/Q2.5-MS-Mistoria-72b-i1-GGUF
+  description: |
+    This model is my fist attempt at a 72b model as usual my goal is to merge the robust storytelling of mutiple models while attempting to maintain intelligence.
+    Merge of:
+      - model: EVA-UNIT-01/EVA-Qwen2.5-72B-v0.1
+      - model: ZeusLabs/Chronos-Platinum-72B
+      - model: shuttleai/shuttle-3
+  overrides:
+    parameters:
+      model: Q2.5-MS-Mistoria-72b.i1-Q4_K_M.gguf
+  files:
+    - filename: Q2.5-MS-Mistoria-72b.i1-Q4_K_M.gguf
+      sha256: f51ac3db855259c0132070e7bb9f58b67538103ffb3c716880ceef3bb09d43d9
+      uri: huggingface://mradermacher/Q2.5-MS-Mistoria-72b-i1-GGUF/Q2.5-MS-Mistoria-72b.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "athene-v2-agent"
+  icon: https://huggingface.co/Nexusflow/Athene-V2-Agent/resolve/main/agent.png
+  urls:
+    - https://huggingface.co/Nexusflow/Athene-V2-Agent
+    - https://huggingface.co/bartowski/Athene-V2-Agent-GGUF
+  description: "Athene-V2-Agent is an open-source Agent LLM that surpasses the state-of-the-art in function calling and agentic capabilities.\n\n\U0001F4AA Versatile Agent Capability: Athene-V2-Agent is an agent model, capable of operating in environments with deeply nested dependencies with the environment. It is capable of reasoning and doing planning for trajectories with many tool calls necessary to answer a single query.\n\n\U0001F4CA Performance Highlights: Athene-V2-Agent surpasses GPT-4o in single FC tasks by 18% in function calling success rates, and by 17% in Agentic success rates.\n\n\U0001F527 Generalization to the Unseen: Athene-V2-Agent has never been trained on the functions or agentic settings used in evaluation.\n"
+  overrides:
+    parameters:
+      model: Athene-V2-Agent-Q4_K_M.gguf
+  files:
+    - filename: Athene-V2-Agent-Q4_K_M.gguf
+      sha256: 2829d205519da34852c374286d42a4403f3be012ea56424e88ebcb8dc89676ad
+      uri: huggingface://bartowski/Athene-V2-Agent-GGUF/Athene-V2-Agent-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "athene-v2-chat"
+  urls:
+    - https://huggingface.co/Nexusflow/Athene-V2-Chat
+    - https://huggingface.co/bartowski/Athene-V2-Chat-GGUF
+  description: |
+    We introduce Athene-V2-Chat-72B, an open-weights LLM on-par with GPT-4o across benchmarks. It is trained through RLHF with Qwen-2.5-72B-Instruct as base model. Athene-V2-Chat-72B excels in chat, math, and coding. Its sister model, Athene-V2-Agent-72B, surpasses GPT-4o in complex function calling and agentic applications.
+  overrides:
+    parameters:
+      model: Athene-V2-Chat-Q4_K_M.gguf
+  files:
+    - filename: Athene-V2-Chat-Q4_K_M.gguf
+      sha256: bda8b784ad55982891e5aa69b08ce4030c91a2e28ad9c4c35284d45d3c7aeb16
+      uri: huggingface://bartowski/Athene-V2-Chat-GGUF/Athene-V2-Chat-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-7b-nerd-uncensored-v1.7"
+  urls:
+    - https://huggingface.co/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7
+    - https://huggingface.co/mradermacher/Qwen2.5-7B-nerd-uncensored-v1.7-GGUF
+  description: |
+    Model created by analyzing and selecting the optimal layers from other Qwen2.5-7B models based on their dimensional utilization efficiency, measured by the Normalized Effective Rank (NER). Computed like:
+    Input: Weight matrix for each model layer
+    Compute singular values σᵢ where σᵢ ≥ 0 # σᵢ represents the importance of each dimension
+    Filter values above numerical threshold (>1e-12)
+    Sum all singular values: S = Σσᵢ # S acts as normalization factor
+    Create probability distribution: pᵢ = σᵢ/S # converts singular values to probabilities summing to 1
+    Compute Shannon entropy: H = -Σ(pᵢ * log₂(pᵢ)) # measures information content
+    Calculate maximum possible entropy: H_max = log₂(n)
+    Final NER score = H/H_max # normalizes score to [0,1] range
+    Results in value between 0 and 1 for each model layer
+  overrides:
+    parameters:
+      model: Qwen2.5-7B-nerd-uncensored-v1.7.Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-7B-nerd-uncensored-v1.7.Q4_K_M.gguf
+      sha256: 42cf7a96784dc8f25c61c2404620c3e6548a024caa8dff6e435d7c86400d7ab8
+      uri: huggingface://mradermacher/Qwen2.5-7B-nerd-uncensored-v1.7-GGUF/Qwen2.5-7B-nerd-uncensored-v1.7.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "evathene-v1.0"
+  urls:
+    - https://huggingface.co/sophosympatheia/Evathene-v1.0
+    - https://huggingface.co/bartowski/Evathene-v1.0-GGUF
+  description: |
+    This 72B parameter model is a merge of Nexusflow/Athene-V2-Chat with EVA-UNIT-01/EVA-Qwen2.5-72B-v0.1. See the merge recipe below for details.
+
+    This model is uncensored. You are responsible for whatever you do with it.
+
+    This model was designed for roleplaying and storytelling and I think it does well at both. It may also perform well at other tasks but I have not tested its performance in other areas.
+  overrides:
+    parameters:
+      model: Evathene-v1.0-Q4_K_M.gguf
+  files:
+    - filename: Evathene-v1.0-Q4_K_M.gguf
+      sha256: 96401ba9d798faa8a01f579b54523c5f75277e91bf1f0eee93db285f76f61e7e
+      uri: huggingface://bartowski/Evathene-v1.0-GGUF/Evathene-v1.0-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "miniclaus-qw1.5b-unamgs"
+  icon: https://huggingface.co/fblgit/miniclaus-qw1.5B-UNAMGS/resolve/main/miniclaus_qw15-UNAMGS.png
+  urls:
+    - https://huggingface.co/fblgit/miniclaus-qw1.5B-UNAMGS
+    - https://huggingface.co/bartowski/miniclaus-qw1.5B-UNAMGS-GGUF
+  description: |
+    Trained with Magpie-Align/Magpie-Pro-MT-300K-v0.1
+    Using MGS & UNA (MLP) on this tiny but powerful model.
+  overrides:
+    parameters:
+      model: miniclaus-qw1.5B-UNAMGS-Q4_K_M.gguf
+  files:
+    - filename: miniclaus-qw1.5B-UNAMGS-Q4_K_M.gguf
+      sha256: a0dadd7147cc4a8e8df59659556e4d824ef5c26fd2f39381fe467b2ff9cc1289
+      uri: huggingface://bartowski/miniclaus-qw1.5B-UNAMGS-GGUF/miniclaus-qw1.5B-UNAMGS-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-3b-smart-i1"
+  urls:
+    - https://huggingface.co/bunnycore/Qwen2.5-3B-Smart
+    - https://huggingface.co/mradermacher/Qwen2.5-3B-Smart-i1-GGUF
+  description: |
+    This model was merged using the passthrough merge method using bunnycore/Qwen2.5-3B-RP-Mix + bunnycore/Qwen2.5-3b-Smart-lora_model as a base.
+  overrides:
+    parameters:
+      model: Qwen2.5-3B-Smart.i1-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-3B-Smart.i1-Q4_K_M.gguf
+      sha256: 4cfffa4478191b3ac5f54b0e2c5c3f60883322cf705d74f9651715b70f3779f4
+      uri: huggingface://mradermacher/Qwen2.5-3B-Smart-i1-GGUF/Qwen2.5-3B-Smart.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "steyrcannon-0.2-qwen2.5-72b"
+  urls:
+    - https://huggingface.co/KaraKaraWitch/SteyrCannon-0.2-Qwen2.5-72b
+    - https://huggingface.co/mradermacher/SteyrCannon-0.2-Qwen2.5-72b-GGUF
+  description: |
+    SteyrCannon-0.2 is an updated revision from the original SteyrCannon. This uses EVA-Qwen2.5-72B-v0.2. Nothing else has changed.This model was merged using the TIES merge method using EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2 as a base.
+    The following models were included in the merge:
+        anthracite-org/magnum-v4-72b
+        EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2
+  overrides:
+    parameters:
+      model: SteyrCannon-0.2-Qwen2.5-72b.Q4_K_M.gguf
+  files:
+    - filename: SteyrCannon-0.2-Qwen2.5-72b.Q4_K_M.gguf
+      sha256: b34c08b77ffd25ccb0ca50b167f2215e784689205c93a0903fa9435b6cc187f0
+      uri: huggingface://mradermacher/SteyrCannon-0.2-Qwen2.5-72b-GGUF/SteyrCannon-0.2-Qwen2.5-72b.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "q2.5-ms-mistoria-72b-v2"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/5LOvUFYiMMw6pcEsOhmo2.webp
+  urls:
+    - https://huggingface.co/Steelskull/Q2.5-MS-Mistoria-72b-v2
+    - https://huggingface.co/bartowski/Q2.5-MS-Mistoria-72b-v2-GGUF
+  description: |
+    This model is my second attempt at a 72b model, as usual, my goal is to merge the robust storytelling of mutiple models while attempting to maintain intelligence.
+    models:
+      - model: EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2
+      - model: ZeusLabs/Chronos-Platinum-72B
+      - model: shuttleai/shuttle-3
+  overrides:
+    parameters:
+      model: Q2.5-MS-Mistoria-72b-v2-Q4_K_M.gguf
+  files:
+    - filename: Q2.5-MS-Mistoria-72b-v2-Q4_K_M.gguf
+      sha256: 33df8aac5a790d1c286fe0fc4f9d340311f282eca19b78db6f7abb845923425c
+      uri: huggingface://bartowski/Q2.5-MS-Mistoria-72b-v2-GGUF/Q2.5-MS-Mistoria-72b-v2-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "eva-qwen2.5-72b-v0.2"
+  urls:
+    - https://huggingface.co/EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2
+    - https://huggingface.co/bartowski/EVA-Qwen2.5-72B-v0.2-GGUF
+  description: |
+    A RP/storywriting specialist model, full-parameter finetune of Qwen2.5-72B on mixture of synthetic and natural data.
+    It uses Celeste 70B 0.1 data mixture, greatly expanding it to improve versatility, creativity and "flavor" of the resulting model.
+
+    Version notes for 0.2: Optimized training hyperparameters and increased sequence length. Better instruction following deeper into context and less repetition.
+  overrides:
+    parameters:
+      model: EVA-Qwen2.5-72B-v0.2-Q4_K_M.gguf
+  files:
+    - filename: EVA-Qwen2.5-72B-v0.2-Q4_K_M.gguf
+      sha256: 03ea0ecac3ee24a332ca43cf925b669c58714b9754be0f4bc232bd996681ef4b
+      uri: huggingface://bartowski/EVA-Qwen2.5-72B-v0.2-GGUF/EVA-Qwen2.5-72B-v0.2-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwq-32b-preview"
+  urls:
+    - https://huggingface.co/Qwen/QwQ-32B-Preview
+    - https://huggingface.co/bartowski/QwQ-32B-Preview-GGUF
+  description: |
+    QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities. As a preview release, it demonstrates promising analytical abilities while having several important limitations:
+
+    Language Mixing and Code-Switching: The model may mix languages or switch between them unexpectedly, affecting response clarity.
+    Recursive Reasoning Loops: The model may enter circular reasoning patterns, leading to lengthy responses without a conclusive answer.
+    Safety and Ethical Considerations: The model requires enhanced safety measures to ensure reliable and secure performance, and users should exercise caution when deploying it.
+    Performance and Benchmark Limitations: The model excels in math and coding but has room for improvement in other areas, such as common sense reasoning and nuanced language understanding.
+  overrides:
+    parameters:
+      model: QwQ-32B-Preview-Q4_K_M.gguf
+  files:
+    - filename: QwQ-32B-Preview-Q4_K_M.gguf
+      sha256: c499801e682e2379528090c50e106837ca1d69dc3bf3ff3a9af830a0eb49cdf6
+      uri: huggingface://bartowski/QwQ-32B-Preview-GGUF/QwQ-32B-Preview-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "q2.5-32b-slush-i1"
+  urls:
+    - https://huggingface.co/crestf411/Q2.5-32B-Slush
+    - https://huggingface.co/mradermacher/Q2.5-32B-Slush-i1-GGUF
+  description: |
+    Slush is a two-stage model trained with high LoRA dropout, where stage 1 is a pretraining continuation on the base model, aimed at boosting the model's creativity and writing capabilities. This is then merged into the instruction tune model, and stage 2 is a fine tuning step on top of this to further enhance its roleplaying capabilities and/or to repair any damage caused in the stage 1 merge.
+    This is still early stage. As always, feedback is welcome, and begone if you demand perfection.
+    The second stage, like the Sunfall series, follows the Silly Tavern preset (ChatML), so ymmv in particular if you use some other tool and/or preset.
+  overrides:
+    parameters:
+      model: Q2.5-32B-Slush.i1-Q4_K_M.gguf
+  files:
+    - filename: Q2.5-32B-Slush.i1-Q4_K_M.gguf
+      sha256: 95aecaf43077dabc72d3b556923ede2563325e1c89863800229cfa8b7f1c9659
+      uri: huggingface://mradermacher/Q2.5-32B-Slush-i1-GGUF/Q2.5-32B-Slush.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwestion-24b"
+  urls:
+    - https://huggingface.co/CultriX/Qwestion-14B
+    - https://huggingface.co/mradermacher/Qwestion-24B-GGUF
+  description: |
+    This model was merged using the DARE TIES merge method using Qwen/Qwen2.5-14B as a base.
+    The following models were included in the merge:
+    allknowingroger/Qwenslerp2-14B
+    rombodawg/Rombos-LLM-V2.6-Qwen-14b
+    VAGOsolutions/SauerkrautLM-v2-14b-DPO
+    CultriX/Qwen2.5-14B-Wernicke
+  overrides:
+    parameters:
+      model: Qwestion-24B.Q4_K_M.gguf
+  files:
+    - filename: Qwestion-24B.Q4_K_M.gguf
+      sha256: 5d493bd81cfeef66d80101260145ab1d1d0428ef2191edce62b58391bd0fff0e
+      uri: huggingface://mradermacher/Qwestion-24B-GGUF/Qwestion-24B.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "teleut-7b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/634262af8d8089ebaefd410e/UqIi8eztdptvt52Mak_1K.png
+  urls:
+    - https://huggingface.co/allura-org/Teleut-7b
+    - https://huggingface.co/QuantFactory/Teleut-7b-GGUF
+  description: |
+    A replication attempt of Tulu 3 on the Qwen 2.5 base models.
+  overrides:
+    parameters:
+      model: Teleut-7b.Q4_K_M.gguf
+  files:
+    - filename: Teleut-7b.Q4_K_M.gguf
+      sha256: 844a633ea01d793c638e99f2e07413606b3812b759e9264fbaf69c8d94eaa093
+      uri: huggingface://QuantFactory/Teleut-7b-GGUF/Teleut-7b.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-7b-homercreative-mix"
+  urls:
+    - https://huggingface.co/ZeroXClem/Qwen2.5-7B-HomerCreative-Mix
+    - https://huggingface.co/QuantFactory/Qwen2.5-7B-HomerCreative-Mix-GGUF
+  description: "ZeroXClem/Qwen2.5-7B-HomerCreative-Mix is an advanced language model meticulously crafted by merging four pre-trained models using the powerful mergekit framework. This fusion leverages the Model Stock merge method to combine the creative prowess of Qandora, the instructive capabilities of Qwen-Instruct-Fusion, the sophisticated blending of HomerSlerp1, and the foundational conversational strengths of Homer-v0.5-Qwen2.5-7B. The resulting model excels in creative text generation, contextual understanding, and dynamic conversational interactions.\n\U0001F680 Merged Models\n\nThis model merge incorporates the following:\n\n    bunnycore/Qandora-2.5-7B-Creative: Specializes in creative text generation, enhancing the model's ability to produce imaginative and diverse content.\n\n    bunnycore/Qwen2.5-7B-Instruct-Fusion: Focuses on instruction-following capabilities, improving the model's performance in understanding and executing user commands.\n\n    allknowingroger/HomerSlerp1-7B: Utilizes spherical linear interpolation (SLERP) to blend model weights smoothly, ensuring a harmonious integration of different model attributes.\n\n    newsbang/Homer-v0.5-Qwen2.5-7B: Acts as the foundational conversational model, providing robust language comprehension and generation capabilities.\n"
+  overrides:
+    parameters:
+      model: Qwen2.5-7B-HomerCreative-Mix.Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-7B-HomerCreative-Mix.Q4_K_M.gguf
+      sha256: fc3fdb41e068646592f89a8ae62d7b330f2bd4e97bf615aef2977930977c8ba5
+      uri: huggingface://QuantFactory/Qwen2.5-7B-HomerCreative-Mix-GGUF/Qwen2.5-7B-HomerCreative-Mix.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "cybercore-qwen-2.1-7b"
+  urls:
+    - https://huggingface.co/bunnycore/CyberCore-Qwen-2.1-7B
+    - https://huggingface.co/QuantFactory/CyberCore-Qwen-2.1-7B-GGUF
+  description: |
+    This model was merged using the TIES merge method using rombodawg/Rombos-LLM-V2.5-Qwen-7b as a base.
+    Models Merged
+    fblgit/cybertron-v4-qw7B-UNAMGS + bunnycore/Qwen-2.1-7b-Persona-lora_model
+    fblgit/cybertron-v4-qw7B-MGS + bunnycore/Qwen-2.1-7b-Persona-lora_model
+  overrides:
+    parameters:
+      model: CyberCore-Qwen-2.1-7B.Q4_K_M.gguf
+  files:
+    - filename: CyberCore-Qwen-2.1-7B.Q4_K_M.gguf
+      sha256: 726042707a4cec29ca0355b4dc7c53a807b307d08aa8a3d4a9e76aefbbbcaadf
+      uri: huggingface://QuantFactory/CyberCore-Qwen-2.1-7B-GGUF/CyberCore-Qwen-2.1-7B.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "homercreativeanvita-mix-qw7b"
+  icon: https://huggingface.co/suayptalha/HomerCreativeAnvita-Mix-Qw7B/resolve/main/HomerCreativeAnvita.jpeg
+  urls:
+    - https://huggingface.co/suayptalha/HomerCreativeAnvita-Mix-Qw7B
+    - https://huggingface.co/QuantFactory/HomerCreativeAnvita-Mix-Qw7B-GGUF
+  description: |
+    This model is currently ranked #1 on the Open LLM Leaderboard among models up to 13B parameters!
+    Merge Method
+
+    This model was merged using the SLERP merge method.
+    Models Merged
+
+    The following models were included in the merge:
+
+        ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix
+        ZeroXClem/Qwen2.5-7B-HomerCreative-Mix
+  overrides:
+    parameters:
+      model: HomerCreativeAnvita-Mix-Qw7B.Q4_K_M.gguf
+  files:
+    - filename: HomerCreativeAnvita-Mix-Qw7B.Q4_K_M.gguf
+      sha256: a356f279a104bff0bbc2ef7ec136c1e774153de8893bf988083e96fb7f4bc053
+      uri: huggingface://QuantFactory/HomerCreativeAnvita-Mix-Qw7B-GGUF/HomerCreativeAnvita-Mix-Qw7B.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "math-iio-7b-instruct"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65bb837dbfb878f46c77de4c/faLfR-doaWP_BLUkOQrbq.png
+  urls:
+    - https://huggingface.co/prithivMLmods/Math-IIO-7B-Instruct
+    - https://huggingface.co/QuantFactory/Math-IIO-7B-Instruct-GGUF
+  description: |
+    The Math IIO 7B Instruct is a fine-tuned language model based on the robust Qwen2.5-7B-Instruct architecture. This model has been specifically trained to excel in single-shot mathematical reasoning and instruction-based tasks, making it a reliable choice for educational, analytical, and problem-solving applications.
+    Key Features:
+      Math-Optimized Capabilities:
+      The model is designed to handle complex mathematical problems, step-by-step calculations, and reasoning tasks.
+
+      Instruction-Tuned:
+      Fine-tuned for better adherence to structured queries and task-oriented prompts, enabling clear and concise outputs.
+
+      Large Vocabulary:
+      Equipped with an extensive tokenizer configuration and custom tokens to ensure precise mathematical notation support.
+  overrides:
+    parameters:
+      model: Math-IIO-7B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: Math-IIO-7B-Instruct.Q4_K_M.gguf
+      sha256: 8ffda0b6a43eb9997dfd7db48fe3bd0970fd1b9b86fb68f082c38622a48b58f4
+      uri: huggingface://QuantFactory/Math-IIO-7B-Instruct-GGUF/Math-IIO-7B-Instruct.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "virtuoso-small"
+  icon: https://avatars.githubusercontent.com/u/126496414
+  urls:
+    - https://huggingface.co/arcee-ai/Virtuoso-Small-GGUF
+  description: |
+    Virtuoso-Small is the debut public release of the Virtuoso series of models by Arcee.ai, designed to bring cutting-edge generative AI capabilities to organizations and developers in a compact, efficient form. With 14 billion parameters, Virtuoso-Small is an accessible entry point for high-quality instruction-following, complex reasoning, and business-oriented generative AI tasks.
+  overrides:
+    parameters:
+      model: Virtuoso-Small-Q4_K_M.gguf
+  files:
+    - filename: Virtuoso-Small-Q4_K_M.gguf
+      sha256: 07db215cdfcb05036567017fe20e50e60cb2da28d1f9a8251cc4f18c8caa247f
+      uri: huggingface://arcee-ai/Virtuoso-Small-GGUF/Virtuoso-Small-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-7b-homeranvita-nerdmix"
+  urls:
+    - https://huggingface.co/ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix
+    - https://huggingface.co/QuantFactory/Qwen2.5-7B-HomerAnvita-NerdMix-GGUF
+  description: |
+    ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix is an advanced language model meticulously crafted by merging five pre-trained models using the powerful mergekit framework. This fusion leverages the Model Stock merge method to combine the creative prowess of Qandora, the instructive capabilities of Qwen-Instruct-Fusion, the sophisticated blending of HomerSlerp1, the mathematical precision of Cybertron-MGS, and the uncensored expertise of Qwen-Nerd. The resulting model excels in creative text generation, contextual understanding, technical reasoning, and dynamic conversational interactions.
+  overrides:
+    parameters:
+      model: Qwen2.5-7B-HomerAnvita-NerdMix.Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-7B-HomerAnvita-NerdMix.Q4_K_M.gguf
+      sha256: 73db2ca3ab50e8627352078988cd173e7447c5e8199a7db9e554602da1362e5f
+      uri: huggingface://QuantFactory/Qwen2.5-7B-HomerAnvita-NerdMix-GGUF/Qwen2.5-7B-HomerAnvita-NerdMix.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-math-14b-instruct"
+  urls:
+    - https://huggingface.co/qingy2024/Qwen2.5-Math-14B-Instruct-Preview
+    - https://huggingface.co/QuantFactory/Qwen2.5-Math-14B-Instruct-GGUF
+  description: |
+    This Qwen 2.5 model was trained 2x faster with Unsloth and Huggingface's TRL library.
+    Fine-tuned it for 400 steps on garage-bAInd/Open-Platypus with a batch size of 3.
+  overrides:
+    parameters:
+      model: Qwen2.5-Math-14B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Math-14B-Instruct.Q4_K_M.gguf
+      sha256: 14e672394738a7d9f14a6cb16fd9a649b113a19a8b4934f9c18299fc4e286ab6
+      uri: huggingface://QuantFactory/Qwen2.5-Math-14B-Instruct-GGUF/Qwen2.5-Math-14B-Instruct.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "sailor2-1b-chat"
+  icon: https://huggingface.co/sail/Sailor2-1B-Chat/resolve/main/sailor2_banner.jpg
+  urls:
+    - https://huggingface.co/sail/Sailor2-1B-Chat
+    - https://huggingface.co/bartowski/Sailor2-1B-Chat-GGUF
+  description: |
+    Sailor2 is a community-driven initiative that brings cutting-edge multilingual language models to South-East Asia (SEA). Our research highlights a strong demand for models in the 8B and 20B parameter range for production use, alongside 1B models for specialized applications, such as speculative decoding and research purposes. These models, released under the Apache 2.0 license, provide enhanced accessibility to advanced language technologies across the region.
+    Sailor2 builds upon the foundation of the awesome multilingual model Qwen 2.5 and is continuously pre-trained on 500B tokens to support 15 languages better with a unified model. These languages include English, Chinese, Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. By addressing the growing demand for diverse, robust, and accessible language models, Sailor2 seeks to serve the underserved in SEA areas with open, inclusive, and accessible multilingual LLMs. The Sailor2 model comes in three sizes, 1B, 8B, and 20B, which are expanded from the Qwen2.5 base models of 0.5B, 7B, and 14B, respectively.
+  overrides:
+    parameters:
+      model: Sailor2-1B-Chat-Q4_K_M.gguf
+  files:
+    - filename: Sailor2-1B-Chat-Q4_K_M.gguf
+      sha256: 782e8abed13d51a2083eadfb2f6d94c2cd77940532f612a99e6f6bec9b3501d4
+      uri: huggingface://bartowski/Sailor2-1B-Chat-GGUF/Sailor2-1B-Chat-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  icon: https://huggingface.co/sail/Sailor2-1B-Chat/resolve/main/sailor2_banner.jpg
+  name: "sailor2-8b-chat"
+  urls:
+    - https://huggingface.co/bartowski/Sailor2-8B-Chat-GGUF
+  description: |
+    Sailor2 is a community-driven initiative that brings cutting-edge multilingual language models to South-East Asia (SEA). Our research highlights a strong demand for models in the 8B and 20B parameter range for production use, alongside 1B models for specialized applications, such as speculative decoding and research purposes. These models, released under the Apache 2.0 license, provide enhanced accessibility to advanced language technologies across the region.
+    Sailor2 builds upon the foundation of the awesome multilingual model Qwen 2.5 and is continuously pre-trained on 500B tokens to support 15 languages better with a unified model. These languages include English, Chinese, Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. By addressing the growing demand for diverse, robust, and accessible language models, Sailor2 seeks to serve the underserved in SEA areas with open, inclusive, and accessible multilingual LLMs. The Sailor2 model comes in three sizes, 1B, 8B, and 20B, which are expanded from the Qwen2.5 base models of 0.5B, 7B, and 14B, respectively.
+  overrides:
+    parameters:
+      model: Sailor2-8B-Chat-Q4_K_M.gguf
+  files:
+    - filename: Sailor2-8B-Chat-Q4_K_M.gguf
+      sha256: 1a6aaadd6f6ef9c2290d66b348ebcbd6fdec542834cde622498fbd467d966103
+      uri: huggingface://bartowski/Sailor2-8B-Chat-GGUF/Sailor2-8B-Chat-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "sailor2-20b-chat"
+  icon: https://huggingface.co/sail/Sailor2-1B-Chat/resolve/main/sailor2_banner.jpg
+  urls:
+    - https://huggingface.co/bartowski/Sailor2-20B-Chat-GGUF
+  description: |
+    Sailor2 is a community-driven initiative that brings cutting-edge multilingual language models to South-East Asia (SEA). Our research highlights a strong demand for models in the 8B and 20B parameter range for production use, alongside 1B models for specialized applications, such as speculative decoding and research purposes. These models, released under the Apache 2.0 license, provide enhanced accessibility to advanced language technologies across the region.
+    Sailor2 builds upon the foundation of the awesome multilingual model Qwen 2.5 and is continuously pre-trained on 500B tokens to support 15 languages better with a unified model. These languages include English, Chinese, Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. By addressing the growing demand for diverse, robust, and accessible language models, Sailor2 seeks to serve the underserved in SEA areas with open, inclusive, and accessible multilingual LLMs. The Sailor2 model comes in three sizes, 1B, 8B, and 20B, which are expanded from the Qwen2.5 base models of 0.5B, 7B, and 14B, respectively.
+  overrides:
+    parameters:
+      model: Sailor2-20B-Chat-Q4_K_M.gguf
+  files:
+    - filename: Sailor2-20B-Chat-Q4_K_M.gguf
+      sha256: 0cf8fcd367accee19702ef15ee964bddd5035bde034afddd838f818e7655534a
+      uri: huggingface://bartowski/Sailor2-20B-Chat-GGUF/Sailor2-20B-Chat-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "72b-qwen2.5-kunou-v1"
+  icon: https://huggingface.co/Sao10K/72B-Qwen2.5-Kunou-v1/resolve/main/knn.png
+  urls:
+    - https://huggingface.co/Sao10K/72B-Qwen2.5-Kunou-v1
+    - https://huggingface.co/bartowski/72B-Qwen2.5-Kunou-v1-GGUF
+  description: |
+    I do not really have anything planned for this model other than it being a generalist, and Roleplay Model? It was just something made and planned in minutes.
+    Same with the 14 and 32B version.
+    Kunou's the name of an OC I worked on for a couple of years, for a... fanfic. mmm...
+
+    A kind-of successor to L3-70B-Euryale-v2.2 in all but name? I'm keeping Stheno/Euryale lineage to Llama series for now.
+    I had a version made on top of Nemotron, a supposed Euryale 2.4 but that flopped hard, it was not my cup of tea.
+    This version is basically a better, more cleaned up Dataset used on Euryale and Stheno.
+  overrides:
+    parameters:
+      model: 72B-Qwen2.5-Kunou-v1-Q4_K_M.gguf
+  files:
+    - filename: 72B-Qwen2.5-Kunou-v1-Q4_K_M.gguf
+      sha256: 91907f29746625a62885793475956220b81d8a5a34b53686a1acd1d03fd403ea
+      uri: huggingface://bartowski/72B-Qwen2.5-Kunou-v1-GGUF/72B-Qwen2.5-Kunou-v1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "evathene-v1.3"
+  urls:
+    - https://huggingface.co/sophosympatheia/Evathene-v1.3
+    - https://huggingface.co/bartowski/Evathene-v1.3-GGUF
+  description: |
+    This 72B parameter model is a merge of sophosympatheia/Evathene-v1.1 and sophosympatheia/Evathene-v1.2. See the merge recipe below for details.
+  overrides:
+    parameters:
+      model: Evathene-v1.3-Q4_K_M.gguf
+  files:
+    - filename: Evathene-v1.3-Q4_K_M.gguf
+      sha256: 0f54909b3ddca514994ee16417da8750f56e7bd59581b46ac47625c230e29d1f
+      uri: huggingface://bartowski/Evathene-v1.3-GGUF/Evathene-v1.3-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "fusechat-qwen-2.5-7b-instruct"
+  icon: https://huggingface.co/FuseAI/FuseChat-Qwen-2.5-7B-Instruct/resolve/main/FuseChat-3.0.png
+  urls:
+    - https://huggingface.co/FuseAI/FuseChat-Qwen-2.5-7B-Instruct
+    - https://huggingface.co/bartowski/FuseChat-Qwen-2.5-7B-Instruct-GGUF
+  description: |
+    We present FuseChat-3.0, a series of models crafted to enhance performance by integrating the strengths of multiple source LLMs into more compact target LLMs. To achieve this fusion, we utilized four powerful source LLMs: Gemma-2-27B-It, Mistral-Large-Instruct-2407, Qwen-2.5-72B-Instruct, and Llama-3.1-70B-Instruct. For the target LLMs, we employed three widely-used smaller models—Llama-3.1-8B-Instruct, Gemma-2-9B-It, and Qwen-2.5-7B-Instruct—along with two even more compact models—Llama-3.2-3B-Instruct and Llama-3.2-1B-Instruct. The implicit model fusion process involves a two-stage training pipeline comprising Supervised Fine-Tuning (SFT) to mitigate distribution discrepancies between target and source LLMs, and Direct Preference Optimization (DPO) for learning preferences from multiple source LLMs. The resulting FuseChat-3.0 models demonstrated substantial improvements in tasks related to general conversation, instruction following, mathematics, and coding. Notably, when Llama-3.1-8B-Instruct served as the target LLM, our fusion approach achieved an average improvement of 6.8 points across 14 benchmarks. Moreover, it showed significant improvements of 37.1 and 30.1 points on instruction-following test sets AlpacaEval-2 and Arena-Hard respectively. We have released the FuseChat-3.0 models on Huggingface, stay tuned for the forthcoming dataset and code.
+  overrides:
+    parameters:
+      model: FuseChat-Qwen-2.5-7B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: FuseChat-Qwen-2.5-7B-Instruct-Q4_K_M.gguf
+      sha256: 8cd8c317769f03125ac753c836ac92c5a76ee0b35502811d0e65bcbb8df9d55c
+      uri: huggingface://bartowski/FuseChat-Qwen-2.5-7B-Instruct-GGUF/FuseChat-Qwen-2.5-7B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "neumind-math-7b-instruct"
+  urls:
+    - https://huggingface.co/prithivMLmods/Neumind-Math-7B-Instruct
+    - https://huggingface.co/QuantFactory/Neumind-Math-7B-Instruct-GGUF
+  description: |
+    The Neumind-Math-7B-Instruct is a fine-tuned model based on Qwen2.5-7B-Instruct, optimized for mathematical reasoning, step-by-step problem-solving, and instruction-based tasks in the mathematics domain. The model is designed for applications requiring structured reasoning, numerical computations, and mathematical proof generation.
+  overrides:
+    parameters:
+      model: Neumind-Math-7B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: Neumind-Math-7B-Instruct.Q4_K_M.gguf
+      sha256: 3250abadeae4234e06dfaf7cf86fe871fe021e6c2dfcb4542c2a4f412d71e28c
+      uri: huggingface://QuantFactory/Neumind-Math-7B-Instruct-GGUF/Neumind-Math-7B-Instruct.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2-vl-72b-instruct"
+  urls:
+    - https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct
+    - https://huggingface.co/bartowski/Qwen2-VL-72B-Instruct-GGUF
+  description: |
+    We're excited to unveil Qwen2-VL, the latest iteration of our Qwen-VL model, representing nearly a year of innovation.
+    Key Enhancements:
+        SoTA understanding of images of various resolution & ratio: Qwen2-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.
+
+        Understanding videos of 20min+: Qwen2-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.
+
+        Agent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.
+
+        Multilingual Support: to serve global users, besides English and Chinese, Qwen2-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.
+  overrides:
+    parameters:
+      model: Qwen2-VL-72B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen2-VL-72B-Instruct-Q4_K_M.gguf
+      sha256: 0def10ee892a4d4c72ba3807d150de2e1f600edd981d15d402e3d25753cf168d
+      uri: huggingface://bartowski/Qwen2-VL-72B-Instruct-GGUF/Qwen2-VL-72B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "tq2.5-14b-aletheia-v1"
+  icon: https://huggingface.co/allura-org/TQ2.5-14B-Aletheia-v1/resolve/main/aletheia.png
+  urls:
+    - https://huggingface.co/allura-org/TQ2.5-14B-Aletheia-v1
+    - https://huggingface.co/bartowski/TQ2.5-14B-Aletheia-v1-GGUF
+  description: |
+    RP/Story hybrid model, merge of Sugarquill and Neon. As with Gemma version, I wanted to preserve Sugarquill's creative spark, while making the model more steerable for RP. It proved to be more difficult this time, but I quite like the result regardless, even if the model is still somewhat temperamental.
+
+    Should work for both RP and storywriting, either on raw completion or with back-and-forth cowriting in chat mode. Seems to be quite sensitive to low depth instructions and samplers.
+
+    Thanks to Toasty and Fizz for testing and giving feedback
+
+    Model was created by Auri.
+  overrides:
+    parameters:
+      model: TQ2.5-14B-Aletheia-v1-Q4_K_M.gguf
+  files:
+    - filename: TQ2.5-14B-Aletheia-v1-Q4_K_M.gguf
+      sha256: 8739a9575520f8460e83905f3e085883dd71ef2c9fa40d36d4e0a3fff003440c
+      uri: huggingface://bartowski/TQ2.5-14B-Aletheia-v1-GGUF/TQ2.5-14B-Aletheia-v1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "tq2.5-14b-neon-v1"
+  icon: https://huggingface.co/allura-org/TQ2.5-14B-Neon-v1/resolve/main/neon.png
+  urls:
+    - https://huggingface.co/allura-org/TQ2.5-14B-Neon-v1
+    - https://huggingface.co/bartowski/TQ2.5-14B-Neon-v1-GGUF
+  description: |
+    RP finetune of Supernova-Medius. Turned out surprisingly nice on it's own, I honestly made it only as a merge fuel, but it impressed me and Prodeus enough to release it separately (history repeats I guess, Sugarquill also started out this way). Quite interesting prose, definitely quite distinct from Supernova or EVA for that matter. Instruction following is decent as well. Not really much to say about this one, just a decent RP model, tbh. Euryale-inspired I guess.
+  overrides:
+    parameters:
+      model: TQ2.5-14B-Neon-v1-Q4_K_M.gguf
+  files:
+    - filename: TQ2.5-14B-Neon-v1-Q4_K_M.gguf
+      sha256: cefc7409b21e03e4fcd64940e30f6a0c17c5a4a89e0ba0811f1b9720825d2309
+      uri: huggingface://bartowski/TQ2.5-14B-Neon-v1-GGUF/TQ2.5-14B-Neon-v1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "miscii-14b-1028"
+  icon: https://i.imgur.com/hkiubT4.jpeg
+  urls:
+    - https://huggingface.co/sthenno-com/miscii-14b-1028
+    - https://huggingface.co/QuantFactory/miscii-14b-1028-GGUF
+  description: |
+    miscii-14b-1028 is a 14-billion parameter language model based on the Qwen2.5-14B-Instruct model. It is designed for chat and conversational AI tasks, with a focus on role-based instructions.
+  overrides:
+    parameters:
+      model: miscii-14b-1028.Q4_K_M.gguf
+  files:
+    - filename: miscii-14b-1028.Q4_K_M.gguf
+      sha256: 0e57bc628c79a1033a6bb92837fba1e52a9e5dbccc5107720c95b89cd9cf92a9
+      uri: huggingface://QuantFactory/miscii-14b-1028-GGUF/miscii-14b-1028.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "miscii-14b-1225"
+  icon: https://huggingface.co/sthenno-com/miscii-14b-1225/resolve/main/Rrharil.png
+  urls:
+    - https://huggingface.co/sthenno-com/miscii-14b-1225
+    - https://huggingface.co/mradermacher/miscii-14b-1225-GGUF
+  description: |
+    The following models were included in the merge:
+    sthenno/exp-002
+    sthenno/miscii-1218
+  overrides:
+    parameters:
+      model: miscii-14b-1225.Q4_K_M.gguf
+  files:
+    - filename: miscii-14b-1225.Q4_K_M.gguf
+      sha256: f21fe73450be394055aeb87b7619e98a09e5c190b48f145bdebef4e12df871fe
+      uri: huggingface://mradermacher/miscii-14b-1225-GGUF/miscii-14b-1225.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwentile2.5-32b-instruct"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65b19c1b098c85365af5a83e/sF7RDZA7lFYOmGy4bGy1s.png
+  urls:
+    - https://huggingface.co/maldv/Qwentile2.5-32B-Instruct
+    - https://huggingface.co/bartowski/Qwentile2.5-32B-Instruct-GGUF
+  description: |
+    Qwentile 2.5 32B Instruct is a normalized denoised fourier interpolation of the following models:
+    - { "model": "AiCloser/Qwen2.5-32B-AGI", "base": "Qwen/Qwen2.5-32B", "alpha": 0.3 }
+    - { "model": "EVA-UNIT-01/EVA-Qwen2.5-32B-v0.2", "base": "Qwen/Qwen2.5-32B", "alpha": 0.7 }
+    - { "model": "fblgit/TheBeagle-v2beta-32B-MGS", "base": "Qwen/Qwen2.5-32B", "alpha": 0.6 }
+    - { "model": "huihui-ai/Qwen2.5-32B-Instruct-abliterated", "base": "Qwen/Qwen2.5-32B-Instruct", "alpha": 1.0 }
+    - { "model": "huihui-ai/QwQ-32B-Preview-abliterated", "base": "Qwen/Qwen2.5-32B", "alpha": 1.0 }
+    - { "model": "Qwen/QwQ-32B-Preview", "base": "Qwen/Qwen2.5-32B", "alpha": 0.8, "is_input": true }
+    - { "model": "rombodawg/Rombos-LLM-V2.5-Qwen-32b", "base": "Qwen/Qwen2.5-32B", "alpha": 1.0, "is_output": true }
+    - { "model": "nbeerbower/Qwen2.5-Gutenberg-Doppel-32B", "base": "Qwen/Qwen2.5-32B-Instruct", "alpha": 0.4 }
+    I started my experiment because of QwQ is a really nifty model, but it was giving me problems with xml output - which is what I use for my thought tokens. So, I thought... lets just merge it in!
+    The first model worked pretty well, but I got a sense that the balances could be tweaked. Why not throw in some other models as well for fun and see if I can't run out of disk space in the process?
+  overrides:
+    parameters:
+      model: Qwentile2.5-32B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwentile2.5-32B-Instruct-Q4_K_M.gguf
+      sha256: e476d6e3c15c78fc3f986d7ae8fa35c16116843827f2e6243c05767cef2f3615
+      uri: huggingface://bartowski/Qwentile2.5-32B-Instruct-GGUF/Qwentile2.5-32B-Instruct-Q4_K_M.gguf
+- &archfunct
+  license: apache-2.0
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - qwen
+    - qwen2.5
+    - cpu
+    - function-calling
+  name: "arch-function-1.5b"
+  icon: https://avatars.githubusercontent.com/u/112724757
+  uri: "github:mudler/LocalAI/gallery/arch-function.yaml@master"
+  urls:
+    - https://huggingface.co/katanemolabs/Arch-Function-1.5B
+    - https://huggingface.co/mradermacher/Arch-Function-1.5B-GGUF
+  description: |
+    The Katanemo Arch-Function collection of large language models (LLMs) is a collection state-of-the-art (SOTA) LLMs specifically designed for function calling tasks. The models are designed to understand complex function signatures, identify required parameters, and produce accurate function call outputs based on natural language prompts. Achieving performance on par with GPT-4, these models set a new benchmark in the domain of function-oriented tasks, making them suitable for scenarios where automated API interaction and function execution is crucial.
+    In summary, the Katanemo Arch-Function collection demonstrates:
+        State-of-the-art performance in function calling
+        Accurate parameter identification and suggestion, even in ambiguous or incomplete inputs
+        High generalization across multiple function calling use cases, from API interactions to automated backend tasks.
+        Optimized low-latency, high-throughput performance, making it suitable for real-time, production environments.
+  overrides:
+    parameters:
+      model: Arch-Function-1.5B.Q4_K_M.gguf
+  files:
+    - filename: Arch-Function-1.5B.Q4_K_M.gguf
+      sha256: 5ac54d2d50cca0ee0335ca2c9b688204c0829cd3a73de3ee3fda108281ad9691
+      uri: huggingface://mradermacher/Arch-Function-1.5B-GGUF/Arch-Function-1.5B.Q4_K_M.gguf
+- !!merge <<: *archfunct
+  name: "arch-function-7b"
+  urls:
+    - https://huggingface.co/katanemolabs/Arch-Function-7B
+    - https://huggingface.co/mradermacher/Arch-Function-7B-GGUF
+  overrides:
+    parameters:
+      model: Arch-Function-7B.Q4_K_M.gguf
+  files:
+    - filename: Arch-Function-7B.Q4_K_M.gguf
+      sha256: 6e38661321d79d02b8cf57c79d97c6c0e19adb9ffa66083cc440c24e257234b6
+      uri: huggingface://mradermacher/Arch-Function-7B-GGUF/Arch-Function-7B.Q4_K_M.gguf
+- !!merge <<: *archfunct
+  name: "arch-function-3b"
+  urls:
+    - https://huggingface.co/katanemolabs/Arch-Function-3B
+    - https://huggingface.co/mradermacher/Arch-Function-3B-GGUF
+  overrides:
+    parameters:
+      model: Arch-Function-3B.Q4_K_M.gguf
+  files:
+    - filename: Arch-Function-3B.Q4_K_M.gguf
+      sha256: 9945cb8d070498d163e5df90c1987f591d35e4fd2222a6c51bcfff848c4b573b
+      uri: huggingface://mradermacher/Arch-Function-3B-GGUF/Arch-Function-3B.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2-7b-multilingual-rp"
+  urls:
+    - https://huggingface.co/maywell/Qwen2-7B-Multilingual-RP
+    - https://huggingface.co/QuantFactory/Qwen2-7B-Multilingual-RP-GGUF
+  description: |
+    Multilingual Qwen2-7B model trained on Roleplaying.
+  overrides:
+    parameters:
+      model: Qwen2-7B-Multilingual-RP.Q4_K_M.gguf
+  files:
+    - filename: Qwen2-7B-Multilingual-RP.Q4_K_M.gguf
+      sha256: 31756c58fd135f2deb59b2d9b142f39134dc8d1a6eaa02f388dda7491fc95ccc
+      uri: huggingface://QuantFactory/Qwen2-7B-Multilingual-RP-GGUF/Qwen2-7B-Multilingual-RP.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwq-lcot-7b-instruct"
+  urls:
+    - https://huggingface.co/prithivMLmods/QwQ-LCoT-7B-Instruct
+    - https://huggingface.co/bartowski/QwQ-LCoT-7B-Instruct-GGUF
+  description: |
+    The QwQ-LCoT-7B-Instruct is a fine-tuned language model designed for advanced reasoning and instruction-following tasks. It leverages the Qwen2.5-7B base model and has been fine-tuned on the amphora/QwQ-LongCoT-130K dataset, focusing on chain-of-thought (CoT) reasoning.
+  overrides:
+    parameters:
+      model: QwQ-LCoT-7B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: QwQ-LCoT-7B-Instruct-Q4_K_M.gguf
+      sha256: 1df2e4ff0093a9632687b73969153442776b0ffc1c3c68e7f559472f9cea1945
+      uri: huggingface://bartowski/QwQ-LCoT-7B-Instruct-GGUF/QwQ-LCoT-7B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "tqwendo-36b"
+  icon: "https://cdn-uploads.huggingface.co/production/uploads/6379683a81c1783a4a2ddba8/DI7Yw8Fs8eukluzKTHjEH.png"
+  urls:
+    - https://huggingface.co/nisten/tqwendo-36b
+    - https://huggingface.co/bartowski/tqwendo-36b-GGUF
+  description: |
+    There is a draft model to go with this one for speculative decoding and chain of thought reasoning: https://huggingface.co/nisten/qwen2.5-coder-7b-abliterated-128k-AWQ
+
+    Using the above 4bit 7b in conjuction with the 36b is meant to setup a chain-of-thought reasoner, evaluator similar to what O1-O3 is probably doing. This way the 7b 4bit only uses up an extra 4-6Gb on the gpu, but greatly both speeds up speculative decoding AND also chain-of-throught evals.
+  overrides:
+    parameters:
+      model: tqwendo-36b-Q4_K_M.gguf
+  files:
+    - filename: tqwendo-36b-Q4_K_M.gguf
+      sha256: 890ff05fb717c67848d5c02ad62b2c26fdcdd20f7cc94ade8095869784c0cc82
+      uri: huggingface://bartowski/tqwendo-36b-GGUF/tqwendo-36b-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qvq-72b-preview"
+  urls:
+    - https://huggingface.co/Qwen/QVQ-72B-Preview
+    - https://huggingface.co/bartowski/QVQ-72B-Preview-GGUF
+  description: |
+    QVQ-72B-Preview is an experimental research model developed by the Qwen team, focusing on enhancing visual reasoning capabilities.
+    QVQ-72B-Preview has achieved remarkable performance on various benchmarks. It scored a remarkable 70.3% on the Multimodal Massive Multi-task Understanding (MMMU) benchmark, showcasing QVQ's powerful ability in multidisciplinary understanding and reasoning. Furthermore, the significant improvements on MathVision highlight the model's progress in mathematical reasoning tasks. OlympiadBench also demonstrates the model's enhanced ability to tackle challenging problems.
+  overrides:
+    mmproj: mmproj-QVQ-72B-Preview-f16.gguf
+    parameters:
+      model: QVQ-72B-Preview-Q4_K_M.gguf
+  files:
+    - filename: QVQ-72B-Preview-Q4_K_M.gguf
+      sha256: 0fab6809995614c19e4b4c23e3191824944a04999f742486278f0d9929dc82ae
+      uri: huggingface://bartowski/QVQ-72B-Preview-GGUF/QVQ-72B-Preview-Q4_K_M.gguf
+    - filename: mmproj-QVQ-72B-Preview-f16.gguf
+      sha256: 85110223f39aa1aad887052d269074afbd52a49ae02c53b66753b033662cc8e6
+      uri: huggingface://bartowski/QVQ-72B-Preview-GGUF/mmproj-QVQ-72B-Preview-f16.gguf
+- !!merge <<: *qwen25
+  name: "teleut-7b-rp"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/634262af8d8089ebaefd410e/2y6PHgWe4ewoMFlgn-p3d.png
+  urls:
+    - https://huggingface.co/allura-org/Teleut-7b-RP
+    - https://huggingface.co/bartowski/Teleut-7b-RP-GGUF
+  description: |
+    A roleplay-focused LoRA finetune of Teleut 7b. Methodology and hyperparams inspired by SorcererLM and Slush.
+    Dataset: The worst mix of data you've ever seen. Like, seriously, you do not want to see the things that went into this model. It's bad.
+  overrides:
+    parameters:
+      model: Teleut-7b-RP-Q4_K_M.gguf
+  files:
+    - filename: Teleut-7b-RP-Q4_K_M.gguf
+      sha256: 74d9a0974c48f16677da8891ac76ed89ed04f246275b9ca8316d25e1e86ce89f
+      uri: huggingface://bartowski/Teleut-7b-RP-GGUF/Teleut-7b-RP-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-32b-rp-ink"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/634262af8d8089ebaefd410e/1_Zt_OvEW183lmrgidQw8.png
+  urls:
+    - https://huggingface.co/allura-org/Qwen2.5-32b-RP-Ink
+    - https://huggingface.co/bartowski/Qwen2.5-32b-RP-Ink-GGUF
+  description: |
+    A roleplay-focused LoRA finetune of Qwen 2.5 32b Instruct. Methodology and hyperparams inspired by SorcererLM and Slush.
+    Yet another model in the Ink series, following in the footsteps of the Nemo one
+  overrides:
+    parameters:
+      model: Qwen2.5-32b-RP-Ink-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-32b-RP-Ink-Q4_K_M.gguf
+      sha256: 7a0693d50aa40ba4fd43b4988851e67443e758ae34881f448e2812e5fcc25468
+      uri: huggingface://bartowski/Qwen2.5-32b-RP-Ink-GGUF/Qwen2.5-32b-RP-Ink-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "q2.5-veltha-14b-0.5"
+  urls:
+    - https://huggingface.co/djuna/Q2.5-Veltha-14B-0.5
+    - https://huggingface.co/bartowski/Q2.5-Veltha-14B-0.5-GGUF
+  description: |
+    The following models were included in the merge:s
+        huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2
+        allura-org/TQ2.5-14B-Aletheia-v1
+        EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2
+        v000000/Qwen2.5-Lumen-14B
+  overrides:
+    parameters:
+      model: Q2.5-Veltha-14B-0.5-Q4_K_M.gguf
+  files:
+    - filename: Q2.5-Veltha-14B-0.5-Q4_K_M.gguf
+      sha256: f75b8cbceab555ebcab6fcb3b51d398b7ef79671aa05c21c288edd75c9f217bd
+      uri: huggingface://bartowski/Q2.5-Veltha-14B-0.5-GGUF/Q2.5-Veltha-14B-0.5-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "smallthinker-3b-preview"
+  urls:
+    - https://huggingface.co/PowerInfer/SmallThinker-3B-Preview
+    - https://huggingface.co/bartowski/SmallThinker-3B-Preview-GGUF
+  description: |
+    SmallThinker is designed for the following use cases:
+    Edge Deployment: Its small size makes it ideal for deployment on resource-constrained devices.
+    Draft Model for QwQ-32B-Preview: SmallThinker can serve as a fast and efficient draft model for the larger QwQ-32B-Preview model. From my test, in llama.cpp we can get 70% speedup (from 40 tokens/s to 70 tokens/s).
+  overrides:
+    parameters:
+      model: SmallThinker-3B-Preview-Q4_K_M.gguf
+  files:
+    - filename: SmallThinker-3B-Preview-Q4_K_M.gguf
+      sha256: ac04f82a09ee6a2748437c3bb774b638a54099dc7d5d6ef7549893fae22ab055
+      uri: huggingface://bartowski/SmallThinker-3B-Preview-GGUF/SmallThinker-3B-Preview-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwenwify2.5-32b-v4.5"
+  urls:
+    - https://huggingface.co/Kaoeiri/Qwenwify2.5-32B-v4.5
+    - https://huggingface.co/mradermacher/Qwenwify2.5-32B-v4.5-GGUF
+  description: |
+    The following models were included in the merge:
+    Kaoeiri/Qwenwify-32B-v3
+    allura-org/Qwen2.5-32b-RP-Ink
+    Dans-DiscountModels/Qwen2.5-32B-ChatML
+    Saxo/Linkbricks-Horizon-AI-Japanese-Base-32B
+    OpenBuddy/openbuddy-qwq-32b-v24.2-200k
+    Sao10K/32B-Qwen2.5-Kunou-v1
+  overrides:
+    parameters:
+      model: Qwenwify2.5-32B-v4.5.Q4_K_M.gguf
+  files:
+    - filename: Qwenwify2.5-32B-v4.5.Q4_K_M.gguf
+      sha256: 52670acdc285356c01259f45b1953860f34deb4f80345ca63b60acc19165280c
+      uri: huggingface://mradermacher/Qwenwify2.5-32B-v4.5-GGUF/Qwenwify2.5-32B-v4.5.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "drt-o1-7b"
+  urls:
+    - https://huggingface.co/Krystalan/DRT-o1-7B
+    - https://huggingface.co/QuantFactory/DRT-o1-7B-GGUF
+  description: "In this work, we introduce DRT-o1, an attempt to bring the success of long thought reasoning to neural machine translation (MT). To this end,\n\n\U0001F31F We mine English sentences with similes or metaphors from existing literature books, which are suitable for translation via long thought.\n\U0001F31F We propose a designed multi-agent framework with three agents (i.e., a translator, an advisor and an evaluator) to synthesize the MT samples with long thought. There are 22,264 synthesized samples in total.\n\U0001F31F We train DRT-o1-8B, DRT-o1-7B and DRT-o1-14B using Llama-3.1-8B-Instruct, Qwen2.5-7B-Instruct and Qwen2.5-14B-Instruct as backbones.\n\nOur goal is not to achieve competitive performance with OpenAI’s O1 in neural machine translation (MT). Instead, we explore technical routes to bring the success of long thought to MT. To this end, we introduce DRT-o1, a byproduct of our exploration, and we hope it could facilitate the corresponding research in this direction.\n"
+  overrides:
+    parameters:
+      model: DRT-o1-7B.Q4_K_M.gguf
+  files:
+    - filename: DRT-o1-7B.Q4_K_M.gguf
+      sha256: f592a2523f92ae29630b45fbb501bba7f2fbd99355975cd05fa989faf8d3597d
+      uri: huggingface://QuantFactory/DRT-o1-7B-GGUF/DRT-o1-7B.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "experimental-lwd-mirau-rp-14b-iq-imatrix"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/99YhsFSeaGDYCq7XVcTcq.png
+  urls:
+    - https://huggingface.co/AetherArchitectural/lwd-Mirau-RP-14B
+    - https://huggingface.co/Lewdiculous/experimental-lwd-Mirau-RP-14B-GGUF-IQ-Imatrix
+  description: |
+    This model is designed to improve the controllability and consistency of current roleplaying models. We developed a story flow thought chain approach that makes the system prompts combined with the entire user-BOT dialogue read like a first-person narrative told by the BOT. We found this design greatly enhances the model's consistency and expressiveness.
+
+    Additionally, we allow users to play two roles simultaneously: one as the director of the entire plot (see Special Designs), and another as an actor dialoguing with the BOT. Users can be viewed as writers who need to draft outlines and plot summaries, while the BOT helps complete story details, requiring users to have powerful control over the BOT.
+
+    The model's output is divided into two parts: the model's inner monologue (which it believes is invisible to users) and the final response.
+
+    Overall, mirau features:
+
+        Superior character consistency
+
+        Powerful long-context memory capability
+
+        Transparent thinking with hidden thought chains
+  overrides:
+    parameters:
+      model: lwd-Mirau-RP-Q4_K_M-imat.gguf
+  files:
+    - filename: lwd-Mirau-RP-Q4_K_M-imat.gguf
+      sha256: 22ff461e9034b9ebded07b2a9d3d88c2f75359d5c069ebb3ee4e9c6ec5c45cf8
+      uri: huggingface://Lewdiculous/experimental-lwd-Mirau-RP-14B-GGUF-IQ-Imatrix/lwd-Mirau-RP-Q4_K_M-imat.gguf
+- !!merge <<: *qwen25
+  name: "32b-qwen2.5-kunou-v1"
+  icon: https://huggingface.co/Sao10K/72B-Qwen2.5-Kunou-v1/resolve/main/knn.png
+  urls:
+    - https://huggingface.co/Sao10K/32B-Qwen2.5-Kunou-v1
+    - https://huggingface.co/bartowski/32B-Qwen2.5-Kunou-v1-GGUF
+  description: |
+    I do not really have anything planned for this model other than it being a generalist, and Roleplay Model? It was just something made and planned in minutes.
+    Same with the 14B and 72B version.
+    Kunou's the name of an OC I worked on for a couple of years, for a... fanfic. mmm...
+    A kind-of successor to L3-70B-Euryale-v2.2 in all but name? I'm keeping Stheno/Euryale lineage to Llama series for now.
+    I had a version made on top of Nemotron, a supposed Euryale 2.4 but that flopped hard, it was not my cup of tea.
+    This version is basically a better, more cleaned up Dataset used on Euryale and Stheno.
+  overrides:
+    parameters:
+      model: 32B-Qwen2.5-Kunou-v1-Q4_K_M.gguf
+  files:
+    - filename: 32B-Qwen2.5-Kunou-v1-Q4_K_M.gguf
+      sha256: b8910172b74d03c3463ac301589f54b96e54f61c67531fb6b523ecfe923aaffb
+      uri: huggingface://bartowski/32B-Qwen2.5-Kunou-v1-GGUF/32B-Qwen2.5-Kunou-v1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "14b-qwen2.5-kunou-v1"
+  urls:
+    - https://huggingface.co/Sao10K/14B-Qwen2.5-Kunou-v1
+    - https://huggingface.co/DevQuasar/Sao10K.14B-Qwen2.5-Kunou-v1-GGUF
+  description: |
+    I do not really have anything planned for this model other than it being a generalist, and Roleplay Model? It was just something made and planned in minutes.
+    This is the little sister variant, the small 14B version.
+    Kunou's the name of an OC I worked on for a couple of years, for a... fanfic. mmm...
+
+    A kind-of successor to my smaller model series. It works pretty nicely I think?
+    This version is basically a better, more cleaned up Dataset used on Euryale and Stheno.
+  overrides:
+    parameters:
+      model: Sao10K.14B-Qwen2.5-Kunou-v1.Q4_K_M.gguf
+  files:
+    - filename: Sao10K.14B-Qwen2.5-Kunou-v1.Q4_K_M.gguf
+      sha256: 7b7af50076e15c305a2a1bed7ad766dc6deb61eef3c2e6a40d4c94ad45623845
+      uri: huggingface://DevQuasar/Sao10K.14B-Qwen2.5-Kunou-v1-GGUF/Sao10K.14B-Qwen2.5-Kunou-v1.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "dolphin3.0-qwen2.5-0.5b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/cNCs1TBD3FelWCJGkZ3cd.png
+  urls:
+    - https://huggingface.co/cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B
+    - https://huggingface.co/bartowski/Dolphin3.0-Qwen2.5-0.5B-GGUF
+  description: |
+    Dolphin 3.0 is the next generation of the Dolphin series of instruct-tuned models. Designed to be the ultimate general purpose local model, enabling coding, math, agentic, function calling, and general use cases.
+
+    Dolphin aims to be a general purpose model, similar to the models behind ChatGPT, Claude, Gemini. But these models present problems for businesses seeking to include AI in their products.
+
+        They maintain control of the system prompt, deprecating and changing things as they wish, often causing software to break.
+        They maintain control of the model versions, sometimes changing things silently, or deprecating older models that your business relies on.
+        They maintain control of the alignment, and in particular the alignment is one-size-fits all, not tailored to the application.
+        They can see all your queries and they can potentially use that data in ways you wouldn't want. Dolphin, in contrast, is steerable and gives control to the system owner. You set the system prompt. You decide the alignment. You have control of your data. Dolphin does not impose its ethics or guidelines on you. You are the one who decides the guidelines.
+
+    Dolphin belongs to YOU, it is your tool, an extension of your will. Just as you are personally responsible for what you do with a knife, gun, fire, car, or the internet, you are the creator and originator of any content you generate with Dolphin.
+  overrides:
+    parameters:
+      model: Dolphin3.0-Qwen2.5-0.5B-Q4_K_M.gguf
+  files:
+    - filename: Dolphin3.0-Qwen2.5-0.5B-Q4_K_M.gguf
+      sha256: 6a53689e2cb91027fdc9e366142eba8e35f56c14ee353e0a4d64de981efbfffa
+      uri: huggingface://bartowski/Dolphin3.0-Qwen2.5-0.5B-GGUF/Dolphin3.0-Qwen2.5-0.5B-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "dolphin3.0-qwen2.5-1.5b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/cNCs1TBD3FelWCJGkZ3cd.png
+  urls:
+    - https://huggingface.co/cognitivecomputations/Dolphin3.0-Qwen2.5-1.5B
+    - https://huggingface.co/bartowski/Dolphin3.0-Qwen2.5-1.5B-GGUF
+  description: |
+    Dolphin 3.0 is the next generation of the Dolphin series of instruct-tuned models. Designed to be the ultimate general purpose local model, enabling coding, math, agentic, function calling, and general use cases.
+
+    Dolphin aims to be a general purpose model, similar to the models behind ChatGPT, Claude, Gemini. But these models present problems for businesses seeking to include AI in their products.
+
+        They maintain control of the system prompt, deprecating and changing things as they wish, often causing software to break.
+        They maintain control of the model versions, sometimes changing things silently, or deprecating older models that your business relies on.
+        They maintain control of the alignment, and in particular the alignment is one-size-fits all, not tailored to the application.
+        They can see all your queries and they can potentially use that data in ways you wouldn't want. Dolphin, in contrast, is steerable and gives control to the system owner. You set the system prompt. You decide the alignment. You have control of your data. Dolphin does not impose its ethics or guidelines on you. You are the one who decides the guidelines.
+
+    Dolphin belongs to YOU, it is your tool, an extension of your will. Just as you are personally responsible for what you do with a knife, gun, fire, car, or the internet, you are the creator and originator of any content you generate with Dolphin.
+  overrides:
+    parameters:
+      model: Dolphin3.0-Qwen2.5-1.5B-Q4_K_M.gguf
+  files:
+    - filename: Dolphin3.0-Qwen2.5-1.5B-Q4_K_M.gguf
+      sha256: 7caa630a60c8831a509e2663e1761355fa24bcf6ccc03e3cc767e5b5747a3be5
+      uri: huggingface://bartowski/Dolphin3.0-Qwen2.5-1.5B-GGUF/Dolphin3.0-Qwen2.5-1.5B-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "dolphin3.0-qwen2.5-3b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/cNCs1TBD3FelWCJGkZ3cd.png
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/cognitivecomputations/Dolphin3.0-Qwen2.5-3b
+    - https://huggingface.co/bartowski/Dolphin3.0-Qwen2.5-3b-GGUF
+  description: |
+    Dolphin 3.0 is the next generation of the Dolphin series of instruct-tuned models. Designed to be the ultimate general purpose local model, enabling coding, math, agentic, function calling, and general use cases.
+
+    Dolphin aims to be a general purpose model, similar to the models behind ChatGPT, Claude, Gemini. But these models present problems for businesses seeking to include AI in their products.
+
+        They maintain control of the system prompt, deprecating and changing things as they wish, often causing software to break.
+        They maintain control of the model versions, sometimes changing things silently, or deprecating older models that your business relies on.
+        They maintain control of the alignment, and in particular the alignment is one-size-fits all, not tailored to the application.
+        They can see all your queries and they can potentially use that data in ways you wouldn't want. Dolphin, in contrast, is steerable and gives control to the system owner. You set the system prompt. You decide the alignment. You have control of your data. Dolphin does not impose its ethics or guidelines on you. You are the one who decides the guidelines.
+
+    Dolphin belongs to YOU, it is your tool, an extension of your will. Just as you are personally responsible for what you do with a knife, gun, fire, car, or the internet, you are the creator and originator of any content you generate with Dolphin.
+  overrides:
+    parameters:
+      model: Dolphin3.0-Qwen2.5-3b-Q4_K_M.gguf
+  files:
+    - filename: Dolphin3.0-Qwen2.5-3b-Q4_K_M.gguf
+      sha256: 0cb1908c5f444e1dc2c5b5619d62ac4957a22ad39cd42f2d0b48e2d8b1c358ab
+      uri: huggingface://bartowski/Dolphin3.0-Qwen2.5-3b-GGUF/Dolphin3.0-Qwen2.5-3b-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "14b-qwen2.5-freya-x1"
+  icon: https://huggingface.co/Sao10K/14B-Qwen2.5-Freya-x1/resolve/main/sad.png
+  urls:
+    - https://huggingface.co/Sao10K/14B-Qwen2.5-Freya-x1
+    - https://huggingface.co/DevQuasar/Sao10K.14B-Qwen2.5-Freya-x1-GGUF
+  description: |
+    I decided to mess around with training methods again, considering the re-emegence of methods like multi-step training. Some people began doing it again, and so, why not? Inspired by AshhLimaRP's methology but done it my way.
+    Freya-S1
+
+        LoRA Trained on ~1.1GB of literature and raw text over Qwen 2.5's base model.
+        Cleaned text and literature as best as I could, still, may have had issues here and there.
+
+    Freya-S2
+
+        The first LoRA was applied over Qwen 2.5 Instruct, then I trained on top of that.
+        Reduced LoRA rank because it's mainly instruct and other details I won't get into.
+  overrides:
+    parameters:
+      model: Sao10K.14B-Qwen2.5-Freya-x1.Q4_K_M.gguf
+  files:
+    - filename: Sao10K.14B-Qwen2.5-Freya-x1.Q4_K_M.gguf
+      sha256: 790953e2ffccf2f730d52072f300fba9d1549c7762f5127b2014cdc82204b509
+      uri: huggingface://DevQuasar/Sao10K.14B-Qwen2.5-Freya-x1-GGUF/Sao10K.14B-Qwen2.5-Freya-x1.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "huatuogpt-o1-7b-v0.1"
+  urls:
+    - https://huggingface.co/FreedomIntelligence/HuatuoGPT-o1-7B
+    - https://huggingface.co/bartowski/HuatuoGPT-o1-7B-v0.1-GGUF
+    - https://github.com/FreedomIntelligence/HuatuoGPT-o1
+  description: |
+    HuatuoGPT-o1 is a medical LLM designed for advanced medical reasoning. It generates a complex thought process, reflecting and refining its reasoning, before providing a final response.
+
+    For more information, visit our GitHub repository: https://github.com/FreedomIntelligence/HuatuoGPT-o1.
+  overrides:
+    parameters:
+      model: HuatuoGPT-o1-7B-v0.1-Q4_K_M.gguf
+  files:
+    - filename: HuatuoGPT-o1-7B-v0.1-Q4_K_M.gguf
+      sha256: 8fc4b797a532d67d677e90293175ff1365c91677d06ea27af297bdf5b60c2d1d
+      uri: huggingface://bartowski/HuatuoGPT-o1-7B-v0.1-GGUF/HuatuoGPT-o1-7B-v0.1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "chuluun-qwen2.5-72b-v0.01"
+  icon: https://huggingface.co/DatToad/Chuluun-Qwen2.5-72B-v0.01/resolve/main/00008-1523559621.png
+  urls:
+    - https://huggingface.co/DatToad/Chuluun-Qwen2.5-72B-v0.01
+    - https://huggingface.co/bartowski/Chuluun-Qwen2.5-72B-v0.01-GGUF
+  description: |
+    This is a merge of pre-trained language models created using mergekit.
+
+    The models in this merge are some of my favorites and I found I liked all of them for different reasons. I believe this model is greater than the sum of its parts - it has the storywriting and language of Eva and Kunou, the spiciness of Magnum, and the uncensored intelligence of Tess. It excels in handling multiple characters and keeping their thoughts, speech, and actions separate, including scene changes. It also appears to match dialogue well to the characters and their backgrounds.
+
+    Model_stock was the method used, it's very straightforward and quite fast, the bottleneck seemed to be my NVMe drive.
+
+    All source models use ChatML prompt formatting and it responds very well. For testing purposes I am using a temperature of 1.08, rep pen of 0.03, and DRY with 0.6 (most Qwen models seem to need DRY). All other samplers are neutralized.
+  overrides:
+    parameters:
+      model: Chuluun-Qwen2.5-72B-v0.01-Q4_K_M.gguf
+  files:
+    - filename: Chuluun-Qwen2.5-72B-v0.01-Q4_K_M.gguf
+      sha256: 901d9d10aad42de3188e721accdc4eb0efec96cbca48563f802793dceaf551f5
+      uri: huggingface://bartowski/Chuluun-Qwen2.5-72B-v0.01-GGUF/Chuluun-Qwen2.5-72B-v0.01-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwq-32b-preview-ideawhiz-v1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6205fefd3f1dc8a642d70b10/JEZgA_xV6oF8AIsya9dop.jpeg
+  urls:
+    - https://huggingface.co/6cf/QwQ-32B-Preview-IdeaWhiz-v1
+    - https://huggingface.co/bartowski/QwQ-32B-Preview-IdeaWhiz-v1-GGUF
+  description: |
+    IdeaWhiz is a fine-tuned version of QwQ-32B-Preview, specifically optimized for scientific creativity and step-by-step reasoning. The model leverages the LiveIdeaBench dataset to enhance its capabilities in generating novel scientific ideas and hypotheses.
+  overrides:
+    parameters:
+      model: QwQ-32B-Preview-IdeaWhiz-v1-Q4_K_M.gguf
+  files:
+    - filename: QwQ-32B-Preview-IdeaWhiz-v1-Q4_K_M.gguf
+      sha256: 1648e13d9974b10d08ee45f48fd3ebd15cf67745fe20d602f9306fe0253b6a96
+      uri: huggingface://bartowski/QwQ-32B-Preview-IdeaWhiz-v1-GGUF/QwQ-32B-Preview-IdeaWhiz-v1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "rombos-qwen2.5-writer-32b"
+  icon: https://huggingface.co/SubtleOne/Rombos-Qwen2.5-Writer-32b/blob/main/robot-creating-fantasy.jpg
+  urls:
+    - https://huggingface.co/SubtleOne/Rombos-Qwen2.5-Writer-32b
+    - https://huggingface.co/bartowski/Rombos-Qwen2.5-Writer-32b-GGUF
+  description: |
+    This model is a merge using Rombos's top-ranked 32b model, based on Qwen 2.5, and merging three creative writing finetunes. The creative content is a serious upgrade over the base it started with, and I enjoyed it in my DnD RPG campaign.
+  overrides:
+    parameters:
+      model: Rombos-Qwen2.5-Writer-32b-Q4_K_M.gguf
+  files:
+    - filename: Rombos-Qwen2.5-Writer-32b-Q4_K_M.gguf
+      sha256: cf0e48c6cb8b6f41834603900642b5395105980297709c85c4216bd44fac956a
+      uri: huggingface://bartowski/Rombos-Qwen2.5-Writer-32b-GGUF/Rombos-Qwen2.5-Writer-32b-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "sky-t1-32b-preview"
+  icon: https://github.com/NovaSky-AI/novasky-ai.github.io/raw/main/assets/images/blue-bird-wider.jpeg
+  urls:
+    - https://huggingface.co/NovaSky-AI/Sky-T1-32B-Preview
+    - https://huggingface.co/bartowski/Sky-T1-32B-Preview-GGUF
+    - https://novasky-ai.github.io/posts/sky-t1/
+  description: |
+    This is a 32B reasoning model trained from Qwen2.5-32B-Instruct with 17K data. The performance is on par with o1-preview model on both math and coding. Please see our blog post for more details.
+  overrides:
+    parameters:
+      model: Sky-T1-32B-Preview-Q4_K_M.gguf
+  files:
+    - filename: Sky-T1-32B-Preview-Q4_K_M.gguf
+      sha256: c735912a582f10e4769461586a02e5b98ef43c2895ec11923b8c4f157e7909e5
+      uri: huggingface://bartowski/Sky-T1-32B-Preview-GGUF/Sky-T1-32B-Preview-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-72b-rp-ink"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/634262af8d8089ebaefd410e/M9KSL64gppBVatmTdoQnG.png
+  urls:
+    - https://huggingface.co/allura-org/Qwen2.5-72b-RP-Ink
+    - https://huggingface.co/bartowski/Qwen2.5-72b-RP-Ink-GGUF
+  description: |
+    A roleplay-focused LoRA finetune of Qwen 2.5 72b Instruct. Methodology and hyperparams inspired by SorcererLM and Slush.
+    Yet another model in the Ink series, following in the footsteps of the 32b one and the Nemo one
+  overrides:
+    parameters:
+      model: Qwen2.5-72b-RP-Ink-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-72b-RP-Ink-Q4_K_M.gguf
+      sha256: 2c2bf785dc5798403e0ccf6c4f5f9d7d53fcfb0c0b28855c584e09be88f91517
+      uri: huggingface://bartowski/Qwen2.5-72b-RP-Ink-GGUF/Qwen2.5-72b-RP-Ink-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "steiner-32b-preview"
+  urls:
+    - https://huggingface.co/peakji/steiner-32b-preview
+    - https://huggingface.co/bartowski/steiner-32b-preview-GGUF
+  description: |
+    Steiner is a series of reasoning models trained on synthetic data using reinforcement learning. These models can explore multiple reasoning paths in an autoregressive manner during inference and autonomously verify or backtrack when necessary, enabling a linear traversal of the implicit search tree.
+
+    Steiner is a personal interest project by Yichao 'Peak' Ji, inspired by OpenAI o1. The ultimate goal is to reproduce o1 and validate the inference-time scaling curves. The Steiner-preview model is currently a work-in-progress. The reason for open-sourcing it is that I’ve found automated evaluation methods, primarily based on multiple-choice questions, struggle to fully reflect the progress of reasoning models. In fact, the assumption that "the correct answer is always among the options" doesn’t align well with real-world reasoning scenarios, as it encourages models to perform substitution-based validation rather than open-ended exploration. For this reason, I’ve chosen to open-source these intermediate results and, when time permits, to build in public. This approach allows me to share knowledge while also gathering more evaluations and feedback from real human users.
+  overrides:
+    parameters:
+      model: steiner-32b-preview-Q4_K_M.gguf
+  files:
+    - filename: steiner-32b-preview-Q4_K_M.gguf
+      sha256: 1d7bf6d6dc8db8c81b3e71dc89756cd23417bb0a645b7dcdd1f9457781a88652
+      uri: huggingface://bartowski/steiner-32b-preview-GGUF/steiner-32b-preview-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwerus-7b"
+  urls:
+    - https://huggingface.co/mlabonne/Qwerus-7B
+    - https://huggingface.co/bartowski/Qwerus-7B-GGUF
+  description: |
+    Qwerus-7B is a merge of the following models using LazyMergekit:
+    PRIME-RL/Eurus-2-7B-PRIME
+    Qwen/Qwen2.5-7B-Instruct
+  overrides:
+    parameters:
+      model: Qwerus-7B-Q4_K_M.gguf
+  files:
+    - filename: Qwerus-7B-Q4_K_M.gguf
+      sha256: 3676629e8092a59f523393e6eb5072727f5213a9e03b7b81141f05a33743e20c
+      uri: huggingface://bartowski/Qwerus-7B-GGUF/Qwerus-7B-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "lb-reranker-0.5b-v1.0"
+  urls:
+    - https://huggingface.co/lightblue/lb-reranker-0.5B-v1.0
+    - https://huggingface.co/bartowski/lb-reranker-0.5B-v1.0-GGUF
+  description: |
+    The LB Reranker has been trained to determine the relatedness of a given query to a piece of text, therefore allowing it to be used as a ranker or reranker in various retrieval-based tasks.
+
+    This model is fine-tuned from a Qwen/Qwen2.5-0.5B-Instruct model checkpoint and was trained for roughly 5.5 hours using the 8 x L20 instance (ecs.gn8is-8x.32xlarge) on Alibaba Cloud.
+
+    The training data for this model can be found at lightblue/reranker_continuous_filt_max7_train and the code for generating this data as well as running the training of the model can be found on our Github repo.
+
+    Trained on data in over 95 languages, this model is applicable to a broad range of use cases.
+
+    This model has three main benefits over comparable rerankers.
+
+        It has shown slightly higher performance on evaluation benchmarks.
+        It has been trained on more languages than any previous model.
+        It is a simple Causal LM model trained to output a string between "1" and "7".
+
+    This last point means that this model can be used natively with many widely available inference packages, including vLLM and LMDeploy. This in turns allows our reranker to benefit from improvements to inference as and when these packages release them.
+
+    Update: We have also found that this model works pretty well as a code snippet reranker too (P@1 of 96%)! See our Colab for more details.
+  overrides:
+    parameters:
+      model: lb-reranker-0.5B-v1.0-Q4_K_M.gguf
+  files:
+    - filename: lb-reranker-0.5B-v1.0-Q4_K_M.gguf
+      sha256: 43568150de5136da15c996bbf4d1a78cc6580515c40f0ef9a8c90b0542228ab3
+      uri: huggingface://bartowski/lb-reranker-0.5B-v1.0-GGUF/lb-reranker-0.5B-v1.0-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "uwu-7b-instruct"
+  urls:
+    - https://huggingface.co/qingy2024/UwU-7B-Instruct
+    - https://huggingface.co/bartowski/UwU-7B-Instruct-GGUF
+  description: |
+    Small QwQ, full-finetuned on FineQwQ-142K. Unlike my previous models, this one is a general-purpose reasoning machine!
+  overrides:
+    parameters:
+      model: UwU-7B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: UwU-7B-Instruct-Q4_K_M.gguf
+      sha256: 279b2ba20d51bb155c8dd497cf49e0c28407b1822c75de88cfd83d13fd14a59f
+      uri: huggingface://bartowski/UwU-7B-Instruct-GGUF/UwU-7B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "drt-o1-14b"
+  urls:
+    - https://huggingface.co/Krystalan/DRT-o1-14B
+    - https://huggingface.co/bartowski/DRT-o1-14B-GGUF
+  description: "This repository contains the resources for our paper \"DRT-o1: Optimized Deep Reasoning Translation via Long Chain-of-Thought\"\nIn this work, we introduce DRT-o1, an attempt to bring the success of long thought reasoning to neural machine translation (MT). To this end,\n\n\U0001F31F We mine English sentences with similes or metaphors from existing literature books, which are suitable for translation via long thought.\n\U0001F31F We propose a designed multi-agent framework with three agents (i.e., a translator, an advisor and an evaluator) to synthesize the MT samples with long thought. There are 22,264 synthesized samples in total.\n\U0001F31F We train DRT-o1-8B, DRT-o1-7B and DRT-o1-14B using Llama-3.1-8B-Instruct, Qwen2.5-7B-Instruct and Qwen2.5-14B-Instruct as backbones.\n\nOur goal is not to achieve competitive performance with OpenAI’s O1 in neural machine translation (MT). Instead, we explore technical routes to bring the success of long thought to MT. To this end, we introduce DRT-o1, a byproduct of our exploration, and we hope it could facilitate the corresponding research in this direction.\n"
+  overrides:
+    parameters:
+      model: DRT-o1-14B-Q4_K_M.gguf
+  files:
+    - filename: DRT-o1-14B-Q4_K_M.gguf
+      sha256: 9619ca984cf4ce8e4f69bcde831de17b2ce05dd89536e3130608877521e3d328
+      uri: huggingface://bartowski/DRT-o1-14B-GGUF/DRT-o1-14B-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "lamarck-14b-v0.7"
+  icon: https://huggingface.co/sometimesanotion/Lamarck-14B-v0.7/resolve/main/LamarckShades.webp
+  urls:
+    - https://huggingface.co/sometimesanotion/Lamarck-14B-v0.7
+    - https://huggingface.co/bartowski/Lamarck-14B-v0.7-GGUF
+  description: |
+    Lamarck 14B v0.7: A generalist merge with emphasis on multi-step reasoning, prose, and multi-language ability. The 14B parameter model class has a lot of strong performers, and Lamarck strives to be well-rounded and solid.
+  overrides:
+    parameters:
+      model: Lamarck-14B-v0.7-Q4_K_M.gguf
+  files:
+    - filename: Lamarck-14B-v0.7-Q4_K_M.gguf
+      sha256: ff8eba82b77a4c6b6d556b85629414655d881f8af4601bcf891c6a7b0345b442
+      uri: huggingface://bartowski/Lamarck-14B-v0.7-GGUF/Lamarck-14B-v0.7-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "art-v0-3b"
+  icon: https://blog.agi-0.com/_next/image?url=%2Fabout_img2.jpeg&w=1920&q=75
+  urls:
+    - https://huggingface.co/AGI-0/Art-v0-3B
+    - https://huggingface.co/bartowski/Art-v0-3B-GGUF
+    - https://blog.agi-0.com/posts/art-series
+  description: |
+    Art v0 3B is our inaugural model in the Art series, fine-tuned from Qwen/Qwen2.5-3B-Instruct using a specialized dataset generated with Gemini 2.0 Flash Thinking. Read more about the Art series
+  overrides:
+    parameters:
+      model: Art-v0-3B-Q4_K_M.gguf
+  files:
+    - filename: Art-v0-3B-Q4_K_M.gguf
+      sha256: 551acd326ce9a743b6e06e094865eb2f06c23c81c812ce221d757bf27ceec9f7
+      uri: huggingface://bartowski/Art-v0-3B-GGUF/Art-v0-3B-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "chuluun-qwen2.5-72b-v0.08"
+  icon: https://huggingface.co/DatToad/Chuluun-Qwen2.5-72B-v0.08/resolve/main/Chuluun8-2.png
+  urls:
+    - https://huggingface.co/DatToad/Chuluun-Qwen2.5-72B-v0.08
+    - https://huggingface.co/bartowski/Chuluun-Qwen2.5-72B-v0.08-GGUF
+  description: |
+    This is a merge of pre-trained language models created using mergekit.
+    I re-ran the original Chuluun formula including the newly released Ink from Allura-Org. I've found the addition gives the model a lot more variability, likely because of aggressive de-slop applied to its dataset. Sometimes this means a word choice will be strange and you'll want to manually edit when needed, but it means you'll see less ministrations sparkling with mischief.
+    Because of this the best way to approach the model is to run multiple regens and choose the one you like, edit mercilessly, and continue. Like the original Chuluun this variant is very steerable for complex storywriting and RP. It's probably also a little spicier than v0.01 with both Magnum and whatever the heck Fizz threw into the data for Ink.
+    I've also been hearing praise for a level of character intelligence not seen in other models, including Largestral finetunes and merges. I'm not about to say any model of mine is smarter because it was a dumb idea to use Tess as the base and it somehow worked.
+  overrides:
+    parameters:
+      model: Chuluun-Qwen2.5-72B-v0.08-Q4_K_M.gguf
+  files:
+    - filename: Chuluun-Qwen2.5-72B-v0.08-Q4_K_M.gguf
+      sha256: 0fec82625f74a9a340837de7af287b1d9042e5aeb70cda2621426db99958b0af
+      uri: huggingface://bartowski/Chuluun-Qwen2.5-72B-v0.08-GGUF/Chuluun-Qwen2.5-72B-v0.08-Q4_K_M.gguf
+- &smollm
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master" ## SmolLM
   name: "smollm-1.7b-instruct"
   icon: https://huggingface.co/datasets/HuggingFaceTB/images/resolve/main/banner_smol.png
   tags:
@@ -27,10 +3450,150 @@
     - filename: SmolLM-1.7B-Instruct.Q4_K_M.gguf
       sha256: 2b07eb2293ed3fc544a9858beda5bfb03dcabda6aa6582d3c85768c95f498d28
       uri: huggingface://MaziyarPanahi/SmolLM-1.7B-Instruct-GGUF/SmolLM-1.7B-Instruct.Q4_K_M.gguf
+- !!merge <<: *smollm
+  name: "smollm2-1.7b-instruct"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/61c141342aac764ce1654e43/y45hIMNREW7w_XpHYB_0q.png
+  urls:
+    - https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct
+    - https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF
+  description: |
+    SmolLM2 is a family of compact language models available in three size: 135M, 360M, and 1.7B parameters. They are capable of solving a wide range of tasks while being lightweight enough to run on-device.
+
+    The 1.7B variant demonstrates significant advances over its predecessor SmolLM1-1.7B, particularly in instruction following, knowledge, reasoning, and mathematics. It was trained on 11 trillion tokens using a diverse dataset combination: FineWeb-Edu, DCLM, The Stack, along with new mathematics and coding datasets that we curated and will release soon. We developed the instruct version through supervised fine-tuning (SFT) using a combination of public datasets and our own curated datasets. We then applied Direct Preference Optimization (DPO) using UltraFeedback.
+  overrides:
+    parameters:
+      model: smollm2-1.7b-instruct-q4_k_m.gguf
+  files:
+    - filename: smollm2-1.7b-instruct-q4_k_m.gguf
+      sha256: decd2598bc2c8ed08c19adc3c8fdd461ee19ed5708679d1c54ef54a5a30d4f33
+      uri: huggingface://HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF/smollm2-1.7b-instruct-q4_k_m.gguf
+- !!merge <<: *qwen25
+  name: "vikhr-qwen-2.5-1.5b-instruct"
+  urls:
+    - https://huggingface.co/Vikhrmodels/Vikhr-Qwen-2.5-1.5B-Instruct
+    - https://huggingface.co/QuantFactory/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF
+  description: |
+    Instructive model based on Qwen-2.5-1.5B-Instruct, trained on the Russian-language dataset GrandMaster-PRO-MAX. Designed for high-efficiency text processing in Russian and English, delivering precise responses and fast task execution.
+  overrides:
+    parameters:
+      model: Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf
+      sha256: eaeac314e30b461413bc1cc819cdc0cd6a79265711fd0b8268702960a082c7bd
+      uri: huggingface://QuantFactory/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF/Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "dumpling-qwen2.5-32b"
+  icon: https://huggingface.co/nbeerbower/Dumpling-Qwen2.5-32B/resolve/main/dumpling_cover.png?download=true
+  urls:
+    - https://huggingface.co/nbeerbower/Dumpling-Qwen2.5-32B
+    - https://huggingface.co/bartowski/Dumpling-Qwen2.5-32B-GGUF
+  description: |
+    nbeerbower/Rombos-EVAGutenberg-TIES-Qwen2.5-32B finetuned on:
+     nbeerbower/GreatFirewall-DPO
+     nbeerbower/Schule-DPO
+     nbeerbower/Purpura-DPO
+     nbeerbower/Arkhaios-DPO
+     jondurbin/truthy-dpo-v0.1
+     antiven0m/physical-reasoning-dpo
+     flammenai/Date-DPO-NoAsterisks
+     flammenai/Prude-Phi3-DPO
+     Atsunori/HelpSteer2-DPO
+     jondurbin/gutenberg-dpo-v0.1
+     nbeerbower/gutenberg2-dpo
+     nbeerbower/gutenberg-moderne-dpo.
+  overrides:
+    parameters:
+      model: Dumpling-Qwen2.5-32B-Q4_K_M.gguf
+  files:
+    - filename: Dumpling-Qwen2.5-32B-Q4_K_M.gguf
+      sha256: c5b7d773cc614650ad3956008e30d0607df6106c28e381870a9b950bd4ee1d17
+      uri: huggingface://bartowski/Dumpling-Qwen2.5-32B-GGUF/Dumpling-Qwen2.5-32B-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "confucius-o1-14b"
+  urls:
+    - https://huggingface.co/netease-youdao/Confucius-o1-14B
+    - https://huggingface.co/bartowski/Confucius-o1-14B-GGUF
+  description: |
+    Confucius-o1-14B is a o1-like reasoning model developed by the NetEase Youdao Team, it can be easily deployed on a single GPU without quantization. This model is based on the Qwen2.5-14B-Instruct model and adopts a two-stage learning strategy, enabling the lightweight 14B model to possess thinking abilities similar to those of o1. What sets it apart is that after generating the chain of thought, it can summarize a step-by-step problem-solving process from the chain of thought on its own. This can prevent users from getting bogged down in the complex chain of thought and allows them to easily obtain the correct problem-solving ideas and answers.
+  overrides:
+    parameters:
+      model: Confucius-o1-14B-Q4_K_M.gguf
+  files:
+    - filename: Confucius-o1-14B-Q4_K_M.gguf
+      sha256: 03182920edd8667db7d2a362ca2d25e88f4b615b383b5a55c764f4715fb22dd9
+      uri: huggingface://bartowski/Confucius-o1-14B-GGUF/Confucius-o1-14B-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "openthinker-7b"
+  icon: https://huggingface.co/datasets/open-thoughts/open-thoughts-114k/resolve/main/open_thoughts.png
+  urls:
+    - https://huggingface.co/open-thoughts/OpenThinker-7B
+    - https://huggingface.co/bartowski/OpenThinker-7B-GGUF
+  description: |
+    This model is a fine-tuned version of Qwen/Qwen2.5-7B-Instruct on the OpenThoughts-114k dataset dataset.
+
+    The dataset is derived by distilling DeepSeek-R1 using the data pipeline available on github. More info about the dataset can be found on the dataset card at OpenThoughts-114k dataset.
+
+    This model improves upon the Bespoke-Stratos-7B model, which used 17k examples (Bespoke-Stratos-17k dataset). The numbers reported in the table below are evaluated with our open-source tool Evalchemy.
+  overrides:
+    parameters:
+      model: OpenThinker-7B-Q4_K_M.gguf
+  files:
+    - filename: OpenThinker-7B-Q4_K_M.gguf
+      sha256: 94dff1a7acd685db5cff7afdb837aab8172e06d65fe6179ba47428e3030acd93
+      uri: huggingface://bartowski/OpenThinker-7B-GGUF/OpenThinker-7B-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "tinyswallow-1.5b-instruct"
+  urls:
+    - https://huggingface.co/SakanaAI/TinySwallow-1.5B-Instruct
+    - https://huggingface.co/bartowski/TinySwallow-1.5B-Instruct-GGUF
+  description: |
+    TinySwallow-1.5B-Instruct is an instruction-tuned version of TinySwallow-1.5B, created through TAID (Temporally Adaptive Interpolated Distillation), our new knowledge distillation method. We used Qwen2.5-32B-Instruct as the teacher model and Qwen2.5-1.5B-Instruct as the student model. The model has been further instruction-tuned to enhance its ability to follow instructions and engage in conversations in Japanese.
+  overrides:
+    parameters:
+      model: TinySwallow-1.5B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: TinySwallow-1.5B-Instruct-Q4_K_M.gguf
+      sha256: 4d409c8873c1650a19c0a7a1c051e342613191a487768fe0d29735b9361079cd
+      uri: huggingface://bartowski/TinySwallow-1.5B-Instruct-GGUF/TinySwallow-1.5B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "fblgit_miniclaus-qw1.5b-unamgs-grpo"
+  icon: https://huggingface.co/fblgit/miniclaus-qw1.5B-UNAMGS/resolve/main/miniclaus_qw15-UNAMGS.png
+  urls:
+    - https://huggingface.co/fblgit/miniclaus-qw1.5B-UNAMGS-GRPO
+    - https://huggingface.co/bartowski/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-GGUF
+  description: |
+    This version is RL with GRPO on GSM8k for 1400 steps
+  overrides:
+    parameters:
+      model: fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-Q4_K_M.gguf
+  files:
+    - filename: fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-Q4_K_M.gguf
+      sha256: 88ceacc5900062bc2afc352f009233225b0fe10203cbb61b122e8f10244449c8
+      uri: huggingface://bartowski/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-GGUF/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "rubenroy_gilgamesh-72b"
+  icon: https://cdn.ruben-roy.com/AI/Gilgamesh/img/art.png
+  urls:
+    - https://huggingface.co/rubenroy/Gilgamesh-72B
+    - https://huggingface.co/bartowski/rubenroy_Gilgamesh-72B-GGUF
+  description: |
+    Gilgamesh 72B was trained on a mixture of specialised datasets designed for factual accuracy, mathematical capabilities and reasoning. The datasets used include:
+
+    GammaCorpus-v2-5m: A large 5 million line general-purpose dataset covering many topics to enhance broad knowledge and conversational abilities.
+    GammaCorpus-CoT-Math-170k: A dataset focused on Chain-of-Thought (CoT) reasoning in mathematics made to help the model improve step-by-step problem-solving.
+    GammaCorpus-Fact-QA-450k: A dataset containing factual question-answer pairs for enforcing some important current knowledge.
+
+    These datasets were all built and curated by me, however I thank my other team members at Ovantage Labs for assisting me in the creation and curation of these datasets.
+  overrides:
+    parameters:
+      model: rubenroy_Gilgamesh-72B-Q4_K_M.gguf
+  files:
+    - filename: rubenroy_Gilgamesh-72B-Q4_K_M.gguf
+      sha256: c6842b3bc882082c63243e762234ae697c1727bebed18b5241eb97e019f0cf68
+      uri: huggingface://bartowski/rubenroy_Gilgamesh-72B-GGUF/rubenroy_Gilgamesh-72B-Q4_K_M.gguf
 - &llama31
-  ## LLama3.1
-  url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
+  url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" ## LLama3.1
+  icon: https://avatars.githubusercontent.com/u/153379578
   name: "meta-llama-3.1-8b-instruct"
   license: llama3.1
   description: |
@@ -119,7 +3682,7 @@
       sha256: 6d175432f66d10dfed9737f73a5073d513d18e1ee7bd4b9cf2a59deb359f36ff
 - !!merge <<: *llama31
   name: "meta-llama-3.1-8b-instruct-abliterated"
-  icon: https://i.imgur.com/KhorYYG.png
+  icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/AsTgL8VCgMHgobq4cr46b.png
   urls:
     - https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated
     - https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF
@@ -148,7 +3711,7 @@
       uri: huggingface://mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf/Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
 - !!merge <<: *llama31
   name: "openbuddy-llama3.1-8b-v22.1-131k"
-  icon: https://raw.githubusercontent.com/OpenBuddy/OpenBuddy/main/media/demo.png
+  icon: https://github.com/OpenBuddy/OpenBuddy/raw/main/media/demo.png
   urls:
     - https://huggingface.co/sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF
   description: |
@@ -324,7 +3887,7 @@
       sha256: 6557c5d5091f2507d19ab1f8bfb9ceb4e1536a755ab70f148b18aeb33741580f
       uri: huggingface://mradermacher/Llama-3.1-Techne-RP-8b-v1-GGUF/Llama-3.1-Techne-RP-8b-v1.Q4_K_M.gguf
 - !!merge <<: *llama31
-  icon: https://i.ibb.co/9hwFrvL/BLMs-Wkx-NQf-W-46-FZDg-ILhg.jpg
+  icon: https://avatars.githubusercontent.com/u/126496414
   name: "llama-spark"
   urls:
     - https://huggingface.co/arcee-ai/Llama-Spark
@@ -429,9 +3992,9 @@
     - https://huggingface.co/leafspark/Reflection-Llama-3.1-70B-bf16
     - https://huggingface.co/senseable/Reflection-Llama-3.1-70B-gguf
   description: |
-      Reflection Llama-3.1 70B is (currently) the world's top open-source LLM, trained with a new technique called Reflection-Tuning that teaches a LLM to detect mistakes in its reasoning and correct course.
+    Reflection Llama-3.1 70B is (currently) the world's top open-source LLM, trained with a new technique called Reflection-Tuning that teaches a LLM to detect mistakes in its reasoning and correct course.
 
-      The model was trained on synthetic data generated by Glaive. If you're training a model, Glaive is incredible — use them.
+    The model was trained on synthetic data generated by Glaive. If you're training a model, Glaive is incredible — use them.
   overrides:
     parameters:
       model: Reflection-Llama-3.1-70B-q4_k_m.gguf
@@ -439,6 +4002,103 @@
     - filename: Reflection-Llama-3.1-70B-q4_k_m.gguf
       sha256: 16064e07037883a750cfeae9a7be41143aa857dbac81c2e93c68e2f941dee7b2
       uri: huggingface://senseable/Reflection-Llama-3.1-70B-gguf/Reflection-Llama-3.1-70B-q4_k_m.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-supernova-lite-reflection-v1.0-i1"
+  url: "github:mudler/LocalAI/gallery/llama3.1-reflective.yaml@master"
+  urls:
+    - https://huggingface.co/SE6446/Llama-3.1-SuperNova-Lite-Reflection-V1.0
+    - https://huggingface.co/mradermacher/Llama-3.1-SuperNova-Lite-Reflection-V1.0-i1-GGUF
+  description: |
+    This model is a LoRA adaptation of arcee-ai/Llama-3.1-SuperNova-Lite on thesven/Reflective-MAGLLAMA-v0.1.1. This has been a simple experiment into reflection and the model appears to perform adequately, though I am unsure if it is a large improvement.
+  overrides:
+    parameters:
+      model: Llama-3.1-SuperNova-Lite-Reflection-V1.0.i1-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-SuperNova-Lite-Reflection-V1.0.i1-Q4_K_M.gguf
+      sha256: 0c4531fe553d00142808e1bc7348ae92d400794c5b64d2db1a974718324dfe9a
+      uri: huggingface://mradermacher/Llama-3.1-SuperNova-Lite-Reflection-V1.0-i1-GGUF/Llama-3.1-SuperNova-Lite-Reflection-V1.0.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-supernova-lite"
+  icon: https://avatars.githubusercontent.com/u/126496414
+  urls:
+    - https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite
+    - https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite-GGUF
+  description: |
+    Llama-3.1-SuperNova-Lite is an 8B parameter model developed by Arcee.ai, based on the Llama-3.1-8B-Instruct architecture. It is a distilled version of the larger Llama-3.1-405B-Instruct model, leveraging offline logits extracted from the 405B parameter variant. This 8B variation of Llama-3.1-SuperNova maintains high performance while offering exceptional instruction-following capabilities and domain-specific adaptability.
+
+    The model was trained using a state-of-the-art distillation pipeline and an instruction dataset generated with EvolKit, ensuring accuracy and efficiency across a wide range of tasks. For more information on its training, visit blog.arcee.ai.
+
+    Llama-3.1-SuperNova-Lite excels in both benchmark performance and real-world applications, providing the power of large-scale models in a more compact, efficient form ideal for organizations seeking high performance with reduced resource requirements.
+  overrides:
+    parameters:
+      model: supernova-lite-v1.Q4_K_M.gguf
+  files:
+    - filename: supernova-lite-v1.Q4_K_M.gguf
+      sha256: 237b7b0b704d294f92f36c576cc8fdc10592f95168a5ad0f075a2d8edf20da4d
+      uri: huggingface://arcee-ai/Llama-3.1-SuperNova-Lite-GGUF/supernova-lite-v1.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-8b-shiningvaliant2"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63444f2687964b331809eb55/EXX7TKbB-R6arxww2mk0R.jpeg
+  urls:
+    - https://huggingface.co/ValiantLabs/Llama3.1-8B-ShiningValiant2
+    - https://huggingface.co/bartowski/Llama3.1-8B-ShiningValiant2-GGUF
+  description: |
+    Shining Valiant 2 is a chat model built on Llama 3.1 8b, finetuned on our data for friendship, insight, knowledge and enthusiasm.
+
+        Finetuned on meta-llama/Meta-Llama-3.1-8B-Instruct for best available general performance
+        Trained on a variety of high quality data; focused on science, engineering, technical knowledge, and structured reasoning
+  overrides:
+    parameters:
+      model: Llama3.1-8B-ShiningValiant2-Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-8B-ShiningValiant2-Q4_K_M.gguf
+      sha256: 9369eb97922a9f01e4eae610e3d7aaeca30762d78d9239884179451d60bdbdd2
+      uri: huggingface://bartowski/Llama3.1-8B-ShiningValiant2-GGUF/Llama3.1-8B-ShiningValiant2-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "nightygurps-14b-v1.1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6336c5b3e3ac69e6a90581da/FvfjK7bKqsWdaBkB3eWgP.png
+  urls:
+    - https://huggingface.co/AlexBefest/NightyGurps-14b-v1.1
+    - https://huggingface.co/bartowski/NightyGurps-14b-v1.1-GGUF
+  description: |
+    This model works with Russian only.
+    This model is designed to run GURPS roleplaying games, as well as consult and assist. This model was trained on an augmented dataset of the GURPS Basic Set rulebook. Its primary purpose was initially to become an assistant consultant and assistant Game Master for the GURPS roleplaying system, but it can also be used as a GM for running solo games as a player.
+  overrides:
+    parameters:
+      model: NightyGurps-14b-v1.1-Q4_K_M.gguf
+  files:
+    - filename: NightyGurps-14b-v1.1-Q4_K_M.gguf
+      sha256: d09d53259ad2c0298150fa8c2db98fe42f11731af89fdc80ad0e255a19adc4b0
+      uri: huggingface://bartowski/NightyGurps-14b-v1.1-GGUF/NightyGurps-14b-v1.1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-swallow-70b-v0.1-i1"
+  icon: https://huggingface.co/tokyotech-llm/Llama-3.1-Swallow-70B-v0.1/resolve/main/logo.png
+  urls:
+    - https://huggingface.co/tokyotech-llm/Llama-3.1-Swallow-70B-v0.1
+    - https://huggingface.co/mradermacher/Llama-3.1-Swallow-70B-v0.1-i1-GGUF
+  description: |
+    Llama 3.1 Swallow is a series of large language models (8B, 70B) that were built by continual pre-training on the Meta Llama 3.1 models. Llama 3.1 Swallow enhanced the Japanese language capabilities of the original Llama 3.1 while retaining the English language capabilities. We use approximately 200 billion tokens that were sampled from a large Japanese web corpus (Swallow Corpus Version 2), Japanese and English Wikipedia articles, and mathematical and coding contents, etc (see the Training Datasets section) for continual pre-training. The instruction-tuned models (Instruct) were built by supervised fine-tuning (SFT) on the synthetic data specially built for Japanese. See the Swallow Model Index section to find other model variants.
+  overrides:
+    parameters:
+      model: Llama-3.1-Swallow-70B-v0.1.i1-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-Swallow-70B-v0.1.i1-Q4_K_M.gguf
+      sha256: 9eaa08a4872a26f56fe34b27a99f7bd0d22ee2b2d1c84cfcde2091b5f61af5fa
+      uri: huggingface://mradermacher/Llama-3.1-Swallow-70B-v0.1-i1-GGUF/Llama-3.1-Swallow-70B-v0.1.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1_openscholar-8b"
+  urls:
+    - https://huggingface.co/OpenScholar/Llama-3.1_OpenScholar-8B
+    - https://huggingface.co/bartowski/Llama-3.1_OpenScholar-8B-GGUF
+  description: |
+    Llama-3.1_OpenScholar-8B is a fine-tuned 8B for scientific literature synthesis. The Llama-3.1_OpenScholar-8B us trained on the os-data dataset. Developed by: University of Washigton, Allen Institute for AI (AI2)
+  overrides:
+    parameters:
+      model: Llama-3.1_OpenScholar-8B-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1_OpenScholar-8B-Q4_K_M.gguf
+      sha256: 54865fc86451959b495c494a51bb1806c8b62bf1415600f0da2966a8a1fe6c7d
+      uri: huggingface://bartowski/Llama-3.1_OpenScholar-8B-GGUF/Llama-3.1_OpenScholar-8B-Q4_K_M.gguf
 ## Uncensored models
 - !!merge <<: *llama31
   name: "humanish-roleplay-llama-3.1-8b-i1"
@@ -682,15 +4342,15 @@
     - https://huggingface.co/Sao10K/L3.1-8B-Niitama-v1.1
     - https://huggingface.co/Lewdiculous/L3.1-8B-Niitama-v1.1-GGUF-IQ-Imatrix
   description: |
-   GGUF-IQ-Imatrix quants for Sao10K/L3.1-8B-Niitama-v1.1
-   Here's the subjectively superior L3 version: L3-8B-Niitama-v1
-   An experimental model using experimental methods.
+    GGUF-IQ-Imatrix quants for Sao10K/L3.1-8B-Niitama-v1.1
+    Here's the subjectively superior L3 version: L3-8B-Niitama-v1
+    An experimental model using experimental methods.
 
-   More detail on it:
+    More detail on it:
 
-   Tamamo and Niitama are made from the same data. Literally. The only thing that's changed is how theyre shuffled and formatted. Yet, I get wildly different results.
+    Tamamo and Niitama are made from the same data. Literally. The only thing that's changed is how theyre shuffled and formatted. Yet, I get wildly different results.
 
-   Interesting, eh? Feels kinda not as good compared to the l3 version, but it's aight.
+    Interesting, eh? Feels kinda not as good compared to the l3 version, but it's aight.
   overrides:
     parameters:
       model: L3.1-8B-Niitama-v1.1-Q4_K_M-imat.gguf
@@ -741,11 +4401,1127 @@
     - filename: Llama-3.1-8B-Stheno-v3.4-Q4_K_M-imat.gguf
       sha256: 830d4858aa11a654f82f69fa40dee819edf9ecf54213057648304eb84b8dd5eb
       uri: huggingface://Lewdiculous/Llama-3.1-8B-Stheno-v3.4-GGUF-IQ-Imatrix/Llama-3.1-8B-Stheno-v3.4-Q4_K_M-imat.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-8b-arliai-rpmax-v1.1"
+  urls:
+    - https://huggingface.co/ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1
+    - https://huggingface.co/bartowski/Llama-3.1-8B-ArliAI-RPMax-v1.1-GGUF
+  description: |
+    RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.
+  overrides:
+    parameters:
+      model: Llama-3.1-8B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-8B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
+      sha256: 0a601c7341228d9160332965298d799369a1dc2b7080771fb8051bdeb556b30c
+      uri: huggingface://bartowski/Llama-3.1-8B-ArliAI-RPMax-v1.1-GGUF/Llama-3.1-8B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "violet_twilight-v0.2-iq-imatrix"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64adfd277b5ff762771e4571/P962FQhRG4I8nbU_DJolY.png
+  urls:
+    - https://huggingface.co/Epiculous/Violet_Twilight-v0.2
+    - https://huggingface.co/Lewdiculous/Violet_Twilight-v0.2-GGUF-IQ-Imatrix
+  description: |
+    Now for something a bit different, Violet_Twilight-v0.2! This model is a SLERP merge of Azure_Dusk-v0.2 and Crimson_Dawn-v0.2!
+  overrides:
+    parameters:
+      model: Violet_Twilight-v0.2-Q4_K_M-imat.gguf
+  files:
+    - filename: Violet_Twilight-v0.2-Q4_K_M-imat.gguf
+      sha256: 0793d196a00cd6fd4e67b8c585b27a94d397e33d427e4ad4aa9a16b7abc339cd
+      uri: huggingface://Lewdiculous/Violet_Twilight-v0.2-GGUF-IQ-Imatrix/Violet_Twilight-v0.2-Q4_K_M-imat.gguf
+- !!merge <<: *llama31
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "dans-personalityengine-v1.0.0-8b"
+  urls:
+    - https://huggingface.co/PocketDoc/Dans-PersonalityEngine-v1.0.0-8b
+    - https://huggingface.co/bartowski/Dans-PersonalityEngine-v1.0.0-8b-GGUF
+  description: |
+    This model is intended to be multifarious in its capabilities and should be quite capable at both co-writing and roleplay as well as find itself quite at home performing sentiment analysis or summarization as part of a pipeline. It has been trained on a wide array of one shot instructions, multi turn instructions, role playing scenarios, text adventure games, co-writing, and much more. The full dataset is publicly available and can be found in the datasets section of the model page.
+
+    There has not been any form of harmfulness alignment done on this model, please take the appropriate precautions when using it in a production environment.
+  overrides:
+    parameters:
+      model: Dans-PersonalityEngine-v1.0.0-8b-Q4_K_M.gguf
+  files:
+    - filename: Dans-PersonalityEngine-v1.0.0-8b-Q4_K_M.gguf
+      sha256: 193b66434c9962e278bb171a21e652f0d3f299f04e86c95f9f75ec5aa8ff006e
+      uri: huggingface://bartowski/Dans-PersonalityEngine-v1.0.0-8b-GGUF/Dans-PersonalityEngine-v1.0.0-8b-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "nihappy-l3.1-8b-v0.09"
+  urls:
+    - https://huggingface.co/Arkana08/NIHAPPY-L3.1-8B-v0.09
+    - https://huggingface.co/QuantFactory/NIHAPPY-L3.1-8B-v0.09-GGUF
+  description: |
+    The model is a quantized version of Arkana08/NIHAPPY-L3.1-8B-v0.09 created using llama.cpp. It is a role-playing model that integrates the finest qualities of various pre-trained language models, focusing on dynamic storytelling.
+  overrides:
+    parameters:
+      model: NIHAPPY-L3.1-8B-v0.09.Q4_K_M.gguf
+  files:
+    - filename: NIHAPPY-L3.1-8B-v0.09.Q4_K_M.gguf
+      sha256: 9bd46a06093448b143bd2775f0fb1b1b172c851fafdce31289e13b7dfc23a0d7
+      uri: huggingface://QuantFactory/NIHAPPY-L3.1-8B-v0.09-GGUF/NIHAPPY-L3.1-8B-v0.09.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-flammades-70b"
+  icon: https://huggingface.co/flammenai/Flammades-Mistral-7B/resolve/main/flammades.png?download=true
+  urls:
+    - https://huggingface.co/flammenai/Llama3.1-Flammades-70B
+    - https://huggingface.co/mradermacher/Llama3.1-Flammades-70B-GGUF
+  description: |
+    nbeerbower/Llama3.1-Gutenberg-Doppel-70B finetuned on flammenai/Date-DPO-NoAsterisks and jondurbin/truthy-dpo-v0.1.
+  overrides:
+    parameters:
+      model: Llama3.1-Flammades-70B.Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-Flammades-70B.Q4_K_M.gguf
+      sha256: f602ed006d0059ac87c6ce5904a7cc6f4b4f290886a1049f96b5b2c561ab5a89
+      uri: huggingface://mradermacher/Llama3.1-Flammades-70B-GGUF/Llama3.1-Flammades-70B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-gutenberg-doppel-70b"
+  # chatml
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://huggingface.co/nbeerbower/Mistral-Small-Gutenberg-Doppel-22B/resolve/main/doppel-header?download=true
+  urls:
+    - https://huggingface.co/nbeerbower/Llama3.1-Gutenberg-Doppel-70B
+    - https://huggingface.co/mradermacher/Llama3.1-Gutenberg-Doppel-70B-GGUF
+  description: |
+    mlabonne/Hermes-3-Llama-3.1-70B-lorablated finetuned on jondurbin/gutenberg-dpo-v0.1 and nbeerbower/gutenberg2-dpo.
+  overrides:
+    parameters:
+      model: Llama3.1-Gutenberg-Doppel-70B.Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-Gutenberg-Doppel-70B.Q4_K_M.gguf
+      sha256: af558f954fa26c5bb75352178cb815bbf268f01c0ca0b96f2149422d4c19511b
+      uri: huggingface://mradermacher/Llama3.1-Gutenberg-Doppel-70B-GGUF/Llama3.1-Gutenberg-Doppel-70B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-8b-arliai-formax-v1.0-iq-arm-imatrix"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://iili.io/2HmlLn2.md.png
+  urls:
+    - https://huggingface.co/Lewdiculous/Llama-3.1-8B-ArliAI-Formax-v1.0-GGUF-IQ-ARM-Imatrix
+  description: |
+    Quants for ArliAI/Llama-3.1-8B-ArliAI-Formax-v1.0.
+
+    "Formax is a model that specializes in following response format instructions. Tell it the format of it's response and it will follow it perfectly. Great for data processing and dataset creation tasks."
+
+    "It is also a highly uncensored model that will follow your instructions very well."
+  overrides:
+    parameters:
+      model: Llama-3.1-8B-ArliAI-Formax-v1.0-Q4_K_M-imat.gguf
+  files:
+    - filename: Llama-3.1-8B-ArliAI-Formax-v1.0-Q4_K_M-imat.gguf
+      sha256: b548ad47caf7008a697afb3556190359529f5a05ec0e4e48ef992c7869e14255
+      uri: huggingface://Lewdiculous/Llama-3.1-8B-ArliAI-Formax-v1.0-GGUF-IQ-ARM-Imatrix/Llama-3.1-8B-ArliAI-Formax-v1.0-Q4_K_M-imat.gguf
+- !!merge <<: *llama31
+  name: "hermes-3-llama-3.1-70b-lorablated"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/4Hbw5n68jKUSBQeTqQIeT.png
+  urls:
+    - https://huggingface.co/mlabonne/Hermes-3-Llama-3.1-70B-lorablated
+    - https://huggingface.co/mradermacher/Hermes-3-Llama-3.1-70B-lorablated-GGUF
+  description: |
+    This is an uncensored version of NousResearch/Hermes-3-Llama-3.1-70B using lorablation.
+    The recipe is based on @grimjim's grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter (special thanks):
+    Extraction: We extract a LoRA adapter by comparing two models: a censored Llama 3 (meta-llama/Meta-Llama-3-70B-Instruct) and an abliterated Llama 3.1 (failspy/Meta-Llama-3.1-70B-Instruct-abliterated).
+    Merge: We merge this new LoRA adapter using task arithmetic to the censored NousResearch/Hermes-3-Llama-3.1-70B to abliterate it.
+  overrides:
+    parameters:
+      model: Hermes-3-Llama-3.1-70B-lorablated.Q4_K_M.gguf
+  files:
+    - filename: Hermes-3-Llama-3.1-70B-lorablated.Q4_K_M.gguf
+      sha256: 9294875ae3b8822855072b0f710ce800536d144cf303a91bcb087c4a307b578d
+      uri: huggingface://mradermacher/Hermes-3-Llama-3.1-70B-lorablated-GGUF/Hermes-3-Llama-3.1-70B-lorablated.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "hermes-3-llama-3.1-8b-lorablated"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/4Hbw5n68jKUSBQeTqQIeT.png
+  urls:
+    - https://huggingface.co/mlabonne/Hermes-3-Llama-3.1-8B-lorablated-GGUF
+  description: |
+    This is an uncensored version of NousResearch/Hermes-3-Llama-3.1-8B using lorablation.
+    The recipe is simple:
+        Extraction: We extract a LoRA adapter by comparing two models: a censored Llama 3.1 (meta-llama/Meta-Llama-3-8B-Instruct) and an abliterated Llama 3.1 (mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated).
+        Merge: We merge this new LoRA adapter using task arithmetic to the censored NousResearch/Hermes-3-Llama-3.1-8B to abliterate it.
+  overrides:
+    parameters:
+      model: hermes-3-llama-3.1-8b-lorablated.Q4_K_M.gguf
+  files:
+    - filename: hermes-3-llama-3.1-8b-lorablated.Q4_K_M.gguf
+      sha256: 8cff9d399a0583616fe1f290da6daa091ab5c5493d0e173a8fffb45202d79417
+      uri: huggingface://mlabonne/Hermes-3-Llama-3.1-8B-lorablated-GGUF/hermes-3-llama-3.1-8b-lorablated.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "hermes-3-llama-3.2-3b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/-kj_KflXsdpcZoTQsvx7W.jpeg
+  urls:
+    - https://huggingface.co/NousResearch/Hermes-3-Llama-3.2-3B
+    - https://huggingface.co/bartowski/Hermes-3-Llama-3.2-3B-GGUF
+  description: |
+    Hermes 3 3B is a small but mighty new addition to the Hermes series of LLMs by Nous Research, and is Nous's first fine-tune in this parameter class.
+    Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.
+  overrides:
+    parameters:
+      model: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
+  files:
+    - filename: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
+      sha256: 2e220a14ba4328fee38cf36c2c068261560f999fadb5725ce5c6d977cb5126b5
+      uri: huggingface://bartowski/Hermes-3-Llama-3.2-3B-GGUF/Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "doctoraifinetune-3.1-8b-i1"
+  urls:
+    - https://huggingface.co/huzaifa525/Doctoraifinetune-3.1-8B
+    - https://huggingface.co/mradermacher/Doctoraifinetune-3.1-8B-i1-GGUF
+  description: |
+    This is a fine-tuned version of the Meta-Llama-3.1-8B-bnb-4bit model, specifically adapted for the medical field. It has been trained using a dataset that provides extensive information on diseases, symptoms, and treatments, making it ideal for AI-powered healthcare tools such as medical chatbots, virtual assistants, and diagnostic support systems.
+    Key Features
+
+        Disease Diagnosis: Accurately identifies diseases based on symptoms provided by the user.
+        Symptom Analysis: Breaks down and interprets symptoms to provide a comprehensive medical overview.
+        Treatment Recommendations: Suggests treatments and remedies according to medical conditions.
+
+    Dataset
+
+    The model is fine-tuned on 2000 rows from a dataset consisting of 272k rows. This dataset includes rich information about diseases, symptoms, and their corresponding treatments. The model is continuously being updated and will be further trained on the remaining data in future releases to improve accuracy and capabilities.
+  overrides:
+    parameters:
+      model: Doctoraifinetune-3.1-8B.i1-Q4_K_M.gguf
+  files:
+    - filename: Doctoraifinetune-3.1-8B.i1-Q4_K_M.gguf
+      sha256: 282456efcb6c7e54d34ac25ae7fc022a94152ed77281ae4625b9628091e0a3d6
+      uri: huggingface://mradermacher/Doctoraifinetune-3.1-8B-i1-GGUF/Doctoraifinetune-3.1-8B.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "astral-fusion-neural-happy-l3.1-8b"
+  urls:
+    - https://huggingface.co/ZeroXClem/Astral-Fusion-Neural-Happy-L3.1-8B
+    - https://huggingface.co/mradermacher/Astral-Fusion-Neural-Happy-L3.1-8B-GGUF
+  description: "Astral-Fusion-Neural-Happy-L3.1-8B is a celestial blend of magic, creativity, and dynamic storytelling. Designed to excel in instruction-following, immersive roleplaying, and magical narrative generation, this model is a fusion of the finest qualities from Astral-Fusion, NIHAPPY, and NeuralMahou. ✨\U0001F680\n\nThis model is perfect for anyone seeking a cosmic narrative experience, with the ability to generate both precise instructional content and fantastical stories in one cohesive framework. Whether you're crafting immersive stories, creating AI roleplaying characters, or working on interactive storytelling, this model brings out the magic. \U0001F31F\n"
+  overrides:
+    parameters:
+      model: Astral-Fusion-Neural-Happy-L3.1-8B.Q4_K_M.gguf
+  files:
+    - filename: Astral-Fusion-Neural-Happy-L3.1-8B.Q4_K_M.gguf
+      sha256: 14a3b07c1723ef1ca24f99382254b1227d95974541e23792a4e7ff621896055d
+      uri: huggingface://mradermacher/Astral-Fusion-Neural-Happy-L3.1-8B-GGUF/Astral-Fusion-Neural-Happy-L3.1-8B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "mahou-1.5-llama3.1-70b-i1"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://huggingface.co/flammenai/Mahou-1.0-mistral-7B/resolve/main/mahou1.png
+  urls:
+    - https://huggingface.co/flammenai/Mahou-1.5-llama3.1-70B
+    - https://huggingface.co/mradermacher/Mahou-1.5-llama3.1-70B-i1-GGUF
+  description: |
+    Mahou is designed to provide short messages in a conversational context. It is capable of casual conversation and character roleplay.
+  overrides:
+    parameters:
+      model: Mahou-1.5-llama3.1-70B.i1-Q4_K_M.gguf
+  files:
+    - filename: Mahou-1.5-llama3.1-70B.i1-Q4_K_M.gguf
+      sha256: c2711c4c9c8d011edbeaa391b4418d433e273a318d1de3dbdda9b85baf4996f2
+      uri: huggingface://mradermacher/Mahou-1.5-llama3.1-70B-i1-GGUF/Mahou-1.5-llama3.1-70B.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-nemotron-70b-instruct-hf"
+  urls:
+    - https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF
+    - https://huggingface.co/mradermacher/Llama-3.1-Nemotron-70B-Instruct-HF-GGUF
+  description: |
+    Llama-3.1-Nemotron-70B-Instruct is a large language model customized by NVIDIA to improve the helpfulness of LLM generated responses to user queries.
+
+    This model reaches Arena Hard of 85.0, AlpacaEval 2 LC of 57.6 and GPT-4-Turbo MT-Bench of 8.98, which are known to be predictive of LMSys Chatbot Arena Elo
+
+    As of 1 Oct 2024, this model is #1 on all three automatic alignment benchmarks (verified tab for AlpacaEval 2 LC), edging out strong frontier models such as GPT-4o and Claude 3.5 Sonnet.
+
+    This model was trained using RLHF (specifically, REINFORCE), Llama-3.1-Nemotron-70B-Reward and HelpSteer2-Preference prompts on a Llama-3.1-70B-Instruct model as the initial policy.
+
+    Llama-3.1-Nemotron-70B-Instruct-HF has been converted from Llama-3.1-Nemotron-70B-Instruct to support it in the HuggingFace Transformers codebase. Please note that evaluation results might be slightly different from the Llama-3.1-Nemotron-70B-Instruct as evaluated in NeMo-Aligner, which the evaluation results below are based on.
+  overrides:
+    parameters:
+      model: Llama-3.1-Nemotron-70B-Instruct-HF.Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-Nemotron-70B-Instruct-HF.Q4_K_M.gguf
+      sha256: b6b80001b849e3c59c39b09508c018b35b491a5c7bbafafa23f2fc04243f3e30
+      uri: huggingface://mradermacher/Llama-3.1-Nemotron-70B-Instruct-HF-GGUF/Llama-3.1-Nemotron-70B-Instruct-HF.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-etherealrainbow-v1.0-rc1-8b"
+  icon: https://huggingface.co/invisietch/L3.1-EtherealRainbow-v1.0-rc1-8B/resolve/main/header.png
+  urls:
+    - https://huggingface.co/invisietch/L3.1-EtherealRainbow-v1.0-rc1-8B
+    - https://huggingface.co/mradermacher/L3.1-EtherealRainbow-v1.0-rc1-8B-GGUF
+  description: |
+    Ethereal Rainbow v1.0 is the sequel to the popular Llama 3 8B merge, EtherealRainbow v0.3. Instead of a straight merge of other peoples' models, v1.0 is a finetune on the Instruct model, using 245 million tokens of training data (approx 177 million of these tokens are my own novel datasets).
+
+    This model is designed to be suitable for creative writing and roleplay, and to push the boundaries of what's possible with an 8B model. This RC is not a finished product, but your feedback will drive the creation of better models.
+
+    This is a release candidate model. It has some known issues and probably some unknown ones too, because the purpose of these early releases is to seek feedback.
+  overrides:
+    parameters:
+      model: L3.1-EtherealRainbow-v1.0-rc1-8B.Q4_K_M.gguf
+  files:
+    - filename: L3.1-EtherealRainbow-v1.0-rc1-8B.Q4_K_M.gguf
+      sha256: c5556b2563112e512acca171415783f0988545b02c1834696c1cc35952def72c
+      uri: huggingface://mradermacher/L3.1-EtherealRainbow-v1.0-rc1-8B-GGUF/L3.1-EtherealRainbow-v1.0-rc1-8B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "theia-llama-3.1-8b-v1"
+  urls:
+    - https://huggingface.co/Chainbase-Labs/Theia-Llama-3.1-8B-v1
+    - https://huggingface.co/QuantFactory/Theia-Llama-3.1-8B-v1-GGUF
+  description: |
+    Theia-Llama-3.1-8B-v1 is an open-source large language model (LLM) trained specifically in the cryptocurrency domain. It was fine-tuned from the Llama-3.1-8B base model using a dataset curated from top 2000 cryptocurrency projects and comprehensive research reports to specialize in crypto-related tasks. Theia-Llama-3.1-8B-v1 has been quantized to optimize it for efficient deployment and reduced memory footprint. It's benchmarked highly for crypto knowledge comprehension and generation, knowledge coverage, and reasoning capabilities. The system prompt used for its training is "You are a helpful assistant who will answer crypto related questions." The recommended parameters for performance include sequence length of 256, temperature of 0, top-k-sampling of -1, top-p of 1, and context window of 39680.
+  overrides:
+    parameters:
+      model: Theia-Llama-3.1-8B-v1.Q4_K_M.gguf
+  files:
+    - filename: Theia-Llama-3.1-8B-v1.Q4_K_M.gguf
+      sha256: db876d033f86f118b49a1f1006e5d078d494c93b73c7e595bd10ca789a0c8fdb
+      uri: huggingface://QuantFactory/Theia-Llama-3.1-8B-v1-GGUF/Theia-Llama-3.1-8B-v1.Q4_K_M.gguf
+- !!merge <<: *llama31
+  icon: https://huggingface.co/Delta-Vector/Baldur-8B/resolve/main/Baldur.jpg
+  name: "baldur-8b"
+  urls:
+    - https://huggingface.co/QuantFactory/Baldur-8B-GGUF
+    - https://huggingface.co/QuantFactory/Baldur-8B-GGUF
+  description: |
+    An finetune of the L3.1 instruct distill done by Arcee, The intent of this model is to have differing prose then my other releases, in my testing it has achieved this and avoiding using common -isms frequently and has a differing flavor then my other models.
+  overrides:
+    parameters:
+      model: Baldur-8B.Q4_K_M.gguf
+  files:
+    - filename: Baldur-8B.Q4_K_M.gguf
+      sha256: 645b393fbac5cd17ccfd66840a3a05c3930e01b903dd1535f0347a74cc443fc7
+      uri: huggingface://QuantFactory/Baldur-8B-GGUF/Baldur-8B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-moe-2x8b-v0.2"
+  icon: https://github.com/moeru-ai/L3.1-Moe/blob/main/cover/v0.2.png?raw=true
+  urls:
+    - https://huggingface.co/moeru-ai/L3.1-Moe-2x8B-v0.2
+    - https://huggingface.co/mradermacher/L3.1-Moe-2x8B-v0.2-GGUF
+  description: |
+    This model is a Mixture of Experts (MoE) made with mergekit-moe. It uses the following base models:
+        Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base
+        ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.2
+    Heavily inspired by mlabonne/Beyonder-4x7B-v3.
+  overrides:
+    parameters:
+      model: L3.1-Moe-2x8B-v0.2.Q4_K_M.gguf
+  files:
+    - filename: L3.1-Moe-2x8B-v0.2.Q4_K_M.gguf
+      sha256: 87f8b294aa213aa3f866e03a53923f4df8f797ea94dc93f88b8a1b58d85fbca0
+      uri: huggingface://mradermacher/L3.1-Moe-2x8B-v0.2-GGUF/L3.1-Moe-2x8B-v0.2.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-darkstorm-aspire-8b"
+  urls:
+    - https://huggingface.co/ZeroXClem/Llama3.1-DarkStorm-Aspire-8B
+    - https://huggingface.co/mradermacher/Llama3.1-DarkStorm-Aspire-8B-GGUF
+  description: |
+    Welcome to Llama3.1-DarkStorm-Aspire-8B — an advanced and versatile 8B parameter AI model born from the fusion of powerful language models, designed to deliver superior performance across research, writing, coding, and creative tasks. This unique merge blends the best qualities of the Dark Enigma, Storm, and Aspire models, while built on the strong foundation of DarkStock. With balanced integration, it excels in generating coherent, context-aware, and imaginative outputs.
+    Llama3.1-DarkStorm-Aspire-8B combines cutting-edge natural language processing capabilities to perform exceptionally well in a wide variety of tasks:
+        Research and Analysis: Perfect for analyzing textual data, planning experiments, and brainstorming complex ideas.
+        Creative Writing and Roleplaying: Excels in creative writing, immersive storytelling, and generating roleplaying scenarios.
+        General AI Applications: Use it for any application where advanced reasoning, instruction-following, and creativity are needed.
+  overrides:
+    parameters:
+      model: Llama3.1-DarkStorm-Aspire-8B.Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-DarkStorm-Aspire-8B.Q4_K_M.gguf
+      sha256: b1686b3039509034add250db9ddcd7d6dbefd37136ac6717bc4fec3ec47ecd03
+      uri: huggingface://mradermacher/Llama3.1-DarkStorm-Aspire-8B-GGUF/Llama3.1-DarkStorm-Aspire-8B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-70blivion-v0.1-rc1-70b-i1"
+  icon: https://huggingface.co/invisietch/L3.1-70Blivion-v0.1-rc1-70B/resolve/main/header.png
+  urls:
+    - https://huggingface.co/invisietch/L3.1-70Blivion-v0.1-rc1-70B
+    - https://huggingface.co/mradermacher/L3.1-70Blivion-v0.1-rc1-70B-i1-GGUF
+  description: |
+    70Blivion v0.1 is a model in the release candidate stage, based on a merge of L3.1 Nemotron 70B & Euryale 2.2 with a healing training step. Further training will be needed to get this model to release quality.
+
+    This model is designed to be suitable for creative writing and roleplay. This RC is not a finished product, but your feedback will drive the creation of better models.
+
+    This is a release candidate model. It has some known issues and probably some unknown ones too, because the purpose of these early releases is to seek feedback.
+  overrides:
+    parameters:
+      model: L3.1-70Blivion-v0.1-rc1-70B.i1-Q4_K_M.gguf
+  files:
+    - filename: L3.1-70Blivion-v0.1-rc1-70B.i1-Q4_K_M.gguf
+      sha256: 27b10c3ca4507e8bf7d305d60e5313b54ef5fffdb43a03f36223d19d906e39f3
+      uri: huggingface://mradermacher/L3.1-70Blivion-v0.1-rc1-70B-i1-GGUF/L3.1-70Blivion-v0.1-rc1-70B.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-hawkish-8b"
+  urls:
+    - https://huggingface.co/mukaj/Llama-3.1-Hawkish-8B
+    - https://huggingface.co/bartowski/Llama-3.1-Hawkish-8B-GGUF
+  description: |
+    Model has been further finetuned on a set of newly generated 50m high quality tokens related to Financial topics covering topics such as Economics, Fixed Income, Equities, Corporate Financing, Derivatives and Portfolio Management. Data was gathered from publicly available sources and went through several stages of curation into instruction data from the initial amount of 250m+ tokens. To aid in mitigating forgetting information from the original finetune, the data was mixed with instruction sets on the topics of Coding, General Knowledge, NLP and Conversational Dialogue.
+
+    The model has shown to improve over a number of benchmarks over the original model, notably in Math and Economics. This model represents the first time a 8B model has been able to convincingly get a passing score on the CFA Level 1 exam, requiring a typical 300 hours of studying, indicating a significant improvement in Financial Knowledge.
+  overrides:
+    parameters:
+      model: Llama-3.1-Hawkish-8B-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-Hawkish-8B-Q4_K_M.gguf
+      sha256: 613693936bbe641f41560151753716ba549ca052260fc5c0569e943e0bb834c3
+      uri: huggingface://bartowski/Llama-3.1-Hawkish-8B-GGUF/Llama-3.1-Hawkish-8B-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-bestmix-chem-einstein-8b"
+  urls:
+    - https://huggingface.co/ZeroXClem/Llama3.1-BestMix-Chem-Einstein-8B
+    - https://huggingface.co/QuantFactory/Llama3.1-BestMix-Chem-Einstein-8B-GGUF
+  description: "Llama3.1-BestMix-Chem-Einstein-8B is an innovative, meticulously blended model designed to excel in instruction-following, chemistry-focused tasks, and long-form conversational generation. This model fuses the best qualities of multiple Llama3-based architectures, making it highly versatile for both general and specialized tasks. \U0001F4BB\U0001F9E0✨\n"
+  overrides:
+    parameters:
+      model: Llama3.1-BestMix-Chem-Einstein-8B.Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-BestMix-Chem-Einstein-8B.Q4_K_M.gguf
+      sha256: 1a53aa7124c731f33b0b616d7c66a6f78c6a133240acd9e3227f1188f743c1ee
+      uri: huggingface://QuantFactory/Llama3.1-BestMix-Chem-Einstein-8B-GGUF/Llama3.1-BestMix-Chem-Einstein-8B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "control-8b-v1.1"
+  urls:
+    - https://huggingface.co/Delta-Vector/Control-8B-V1.1
+    - https://huggingface.co/QuantFactory/Control-8B-V1.1-GGUF
+  description: |
+    An experimental finetune based on the Llama3.1 8B Supernova with it's primary goal to be "Short and Sweet" as such, i finetuned the model for 2 epochs on OpenCAI Sharegpt converted dataset and the RP-logs datasets in a effort to achieve this, This version of Control has been finetuned with DPO to help improve the smart's and coherency which was a flaw noticed in the previous model.
+  overrides:
+    parameters:
+      model: Control-8B-V1.1.Q4_K_M.gguf
+  files:
+    - filename: Control-8B-V1.1.Q4_K_M.gguf
+      sha256: 01375fe20999134d6c6330ad645cde07883dcb7113eaef097df6ccff88c56ecf
+      uri: huggingface://QuantFactory/Control-8B-V1.1-GGUF/Control-8B-V1.1.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-whiterabbitneo-2-8b"
+  icon: https://huggingface.co/migtissera/WhiteRabbitNeo/resolve/main/WhiteRabbitNeo.png
+  urls:
+    - https://huggingface.co/WhiteRabbitNeo/Llama-3.1-WhiteRabbitNeo-2-8B
+    - https://huggingface.co/bartowski/Llama-3.1-WhiteRabbitNeo-2-8B-GGUF
+  description: |
+    WhiteRabbitNeo is a model series that can be used for offensive and defensive cybersecurity.
+
+    Models are now getting released as a public preview of its capabilities, and also to assess the societal impact of such an AI.
+  overrides:
+    parameters:
+      model: Llama-3.1-WhiteRabbitNeo-2-8B-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-WhiteRabbitNeo-2-8B-Q4_K_M.gguf
+      sha256: dbaf619312e706c5440214d324d8f304717866675fc9728e3901c75ef5bbfeca
+      uri: huggingface://bartowski/Llama-3.1-WhiteRabbitNeo-2-8B-GGUF/Llama-3.1-WhiteRabbitNeo-2-8B-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "tess-r1-limerick-llama-3.1-70b"
+  icon: https://huggingface.co/migtissera/Tess-R1-Llama-3.1-70B/resolve/main/Tess-R1-2.jpg
+  urls:
+    - https://huggingface.co/migtissera/Tess-R1-Limerick-Llama-3.1-70B
+    - https://huggingface.co/bartowski/Tess-R1-Limerick-Llama-3.1-70B-GGUF
+  description: |
+    Welcome to the Tess-Reasoning-1 (Tess-R1) series of models. Tess-R1 is designed with test-time compute in mind, and has the capabilities to produce a Chain-of-Thought (CoT) reasoning before producing the final output.
+
+    The model is trained to first think step-by-step, and contemplate on its answers. It can also write alternatives after contemplating. Once all the steps have been thought through, it writes the final output.
+
+        Step-by-step, Chain-of-Thought thinking process. Uses <thinking> </thinking> tags to indicate when the model is performing CoT.
+        <contemplation> </contemplation> tags are used when the model contemplate on its answers.
+        <alternatively> </alternatively> tags are used for alternate suggestions.
+        Finally, <output> </output> tags are used for the final output
+
+    Important Note:
+
+    In a multi-turn conversation, only the contents between the <output> </output> tags (discarding the tags) should be carried forward. Otherwise the model will see out of distribution input data and will fail.
+
+    The model was trained mostly with Chain-of-Thought reasoning data, including the XML tags. However, to generalize model generations, some single-turn and multi-turn data without XML tags were also included. Due to this, in some instances the model does not produce XML tags and does not fully utilize test-time compute capabilities. There is two ways to get around this:
+
+        Include a try/catch statement in your inference script, and only pass on the contents between the <output> </output> tags if it's available.
+        Use the <thinking> tag as the seed in the generation, and force the model to produce outputs with XML tags. i.e: f"{conversation}{user_input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n<thinking>"
+  overrides:
+    parameters:
+      model: Tess-R1-Limerick-Llama-3.1-70B-Q4_K_M.gguf
+  files:
+    - filename: Tess-R1-Limerick-Llama-3.1-70B-Q4_K_M.gguf
+      sha256: 92da5dad8a36ed5060becf78a83537d776079b7eaa4de73733d3ca57156286ab
+      uri: huggingface://bartowski/Tess-R1-Limerick-Llama-3.1-70B-GGUF/Tess-R1-Limerick-Llama-3.1-70B-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "tess-3-llama-3.1-70b"
+  icon: https://huggingface.co/migtissera/Tess-M-v1.0/resolve/main/Tess.png
+  urls:
+    - https://huggingface.co/migtissera/Tess-3-Llama-3.1-70B
+    - https://huggingface.co/mradermacher/Tess-3-Llama-3.1-70B-GGUF
+  description: |
+    Tess, short for Tesoro (Treasure in Italian), is a general purpose Large Language Model series created by Migel Tissera.
+  overrides:
+    parameters:
+      model: Tess-3-Llama-3.1-70B.Q4_K_M.gguf
+  files:
+    - filename: Tess-3-Llama-3.1-70B.Q4_K_M.gguf
+      sha256: 81625defcbea414282f490dd960b14afdecd7734e0d77d8db2da2bf5c21261aa
+      uri: huggingface://mradermacher/Tess-3-Llama-3.1-70B-GGUF/Tess-3-Llama-3.1-70B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-8b-enigma"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/it7MY5MyLCLpFQev5dUis.jpeg
+  urls:
+    - https://huggingface.co/ValiantLabs/Llama3.1-8B-Enigma
+    - https://huggingface.co/mradermacher/Llama3.1-8B-Enigma-GGUF
+  description: |
+    Enigma is a code-instruct model built on Llama 3.1 8b.
+    High quality code instruct performance within the Llama 3 Instruct chat format
+    Finetuned on synthetic code-instruct data generated with Llama 3.1 405b. Find the current version of the dataset here!
+    Overall chat performance supplemented with generalist synthetic data.
+    This is the 2024-10-02 release of Enigma for Llama 3.1 8b, enhancing code-instruct and general chat capabilities.
+  overrides:
+    parameters:
+      model: Llama3.1-8B-Enigma.Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-8B-Enigma.Q4_K_M.gguf
+      sha256: e98c9909ee3b74b11d50d4c4f17178502e42cd936215ede0c64a7b217ae665bb
+      uri: huggingface://mradermacher/Llama3.1-8B-Enigma-GGUF/Llama3.1-8B-Enigma.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-8b-cobalt"
+  urls:
+    - https://huggingface.co/ValiantLabs/Llama3.1-8B-Cobalt
+    - https://huggingface.co/mradermacher/Llama3.1-8B-Cobalt-GGUF
+  description: |
+    Cobalt is a math-instruct model built on Llama 3.1 8b.
+    High quality math instruct performance within the Llama 3 Instruct chat format
+    Finetuned on synthetic math-instruct data generated with Llama 3.1 405b. Find the current version of the dataset here!
+    Version
+    This is the 2024-08-16 release of Cobalt for Llama 3.1 8b.
+    Help us and recommend Cobalt to your friends! We're excited for more Cobalt releases in the future.
+  overrides:
+    parameters:
+      model: Llama3.1-8B-Cobalt.Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-8B-Cobalt.Q4_K_M.gguf
+      sha256: 44340f1ebbc3bf4e4e23d04ac3580c26fdc0b5717f23b45ce30743aa1eeed7ed
+      uri: huggingface://mradermacher/Llama3.1-8B-Cobalt-GGUF/Llama3.1-8B-Cobalt.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-8b-arliai-rpmax-v1.3"
+  urls:
+    - https://huggingface.co/ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.3
+    - https://huggingface.co/bartowski/Llama-3.1-8B-ArliAI-RPMax-v1.3-GGUF
+  description: |
+    RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.
+    Many RPMax users mentioned that these models does not feel like any other RP models, having a different writing style and generally doesn't feel in-bred.
+  overrides:
+    parameters:
+      model: Llama-3.1-8B-ArliAI-RPMax-v1.3-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-8B-ArliAI-RPMax-v1.3-Q4_K_M.gguf
+      sha256: 66fcbbe96950cc3424cba866f929180d83f1bffdb0d4eedfa9b1f55cf0ea5c26
+      uri: huggingface://bartowski/Llama-3.1-8B-ArliAI-RPMax-v1.3-GGUF/Llama-3.1-8B-ArliAI-RPMax-v1.3-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-8b-slush-i1"
+  icon: https://huggingface.co/crestf411/L3.1-8B-Slush/resolve/main/slush.jpg?
+  urls:
+    - https://huggingface.co/crestf411/L3.1-8B-Slush
+    - https://huggingface.co/mradermacher/L3.1-8B-Slush-i1-GGUF
+  description: |
+    Slush is a two-stage model trained with high LoRA dropout, where stage 1 is a pretraining continuation on the base model, aimed at boosting the model's creativity and writing capabilities. This is then merged into the instruction tune model, and stage 2 is a fine tuning step on top of this to further enhance its roleplaying capabilities and/or to repair any damage caused in the stage 1 merge.
+    This is an initial experiment done on the at-this-point-infamous Llama 3.1 8B model, in an attempt to retain its smartness while addressing its abysmal lack of imagination/creativity. As always, feedback is welcome, and begone if you demand perfection.
+    The second stage, like the Sunfall series, follows the Silly Tavern preset, so ymmv in particular if you use some other tool and/or preset.
+  overrides:
+    parameters:
+      model: L3.1-8B-Slush.i1-Q4_K_M.gguf
+  files:
+    - filename: L3.1-8B-Slush.i1-Q4_K_M.gguf
+      sha256: 98c53cd1ec0e2b00400c5968cd076a589d0c889bca13ec52abfe4456cfa039be
+      uri: huggingface://mradermacher/L3.1-8B-Slush-i1-GGUF/L3.1-8B-Slush.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/C-ndfxAGdf21DjchZcf2p.png
+  name: "l3.1-ms-astoria-70b-v2"
+  urls:
+    - https://huggingface.co/Steelskull/L3.1-MS-Astoria-70b-v2
+    - https://huggingface.co/bartowski/L3.1-MS-Astoria-70b-v2-GGUF
+  description: |
+    This model is a remake of the original astoria with modern models and context sizes its goal is to merge the robust storytelling of mutiple models while attempting to maintain intelligence.
+
+    Use Llama 3 Format or meth format (llama 3 refuses to work with stepped thinking but meth works)
+      - model: migtissera/Tess-3-Llama-3.1-70B
+      - model: NeverSleep/Lumimaid-v0.2-70B
+      - model: Sao10K/L3.1-70B-Euryale-v2.2
+      - model: ArliAI/Llama-3.1-70B-ArliAI-RPMax-v1.2
+      - model: nbeerbower/Llama3.1-Gutenberg-Doppel-70B
+  overrides:
+    parameters:
+      model: L3.1-MS-Astoria-70b-v2-Q4_K_M.gguf
+  files:
+    - filename: L3.1-MS-Astoria-70b-v2-Q4_K_M.gguf
+      sha256: c02658ead1ecdc25c7218b8d9d11786f19c16d64f0d453082998e313edb0d4a6
+      uri: huggingface://bartowski/L3.1-MS-Astoria-70b-v2-GGUF/L3.1-MS-Astoria-70b-v2-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "magnum-v2-4b-i1"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/658a46cbfb9c2bdfae75b3a6/9JwXZze4tHRGpc_RzE2AU.png
+  urls:
+    - https://huggingface.co/anthracite-org/magnum-v2-4b
+    - https://huggingface.co/mradermacher/magnum-v2-4b-i1-GGUF
+  description: |
+    This is the eighth in a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet and Opus. This model is fine-tuned on top of IntervitensInc/Llama-3.1-Minitron-4B-Width-Base-chatml.
+  overrides:
+    parameters:
+      model: magnum-v2-4b.i1-Q4_K_M.gguf
+  files:
+    - filename: magnum-v2-4b.i1-Q4_K_M.gguf
+      sha256: 692618059fee8870759d67d275ebc59bc0474b18ae3571b3ebdec8f9da786a64
+      uri: huggingface://mradermacher/magnum-v2-4b-i1-GGUF/magnum-v2-4b.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-nemotron-sunfall-v0.7.0-i1"
+  urls:
+    - https://huggingface.co/crestf411/L3.1-nemotron-sunfall-v0.7.0
+    - https://huggingface.co/mradermacher/L3.1-nemotron-sunfall-v0.7.0-i1-GGUF
+  description: |
+    Significant revamping of the dataset metadata generation process, resulting in higher quality dataset overall. The "Diamond Law" experiment has been removed as it didn't seem to affect the model output enough to warrant set up complexity.
+    Recommended starting point:
+        Temperature: 1
+        MinP: 0.05~0.1
+        DRY: 0.8 1.75 2 0
+    At early context, I recommend keeping XTC disabled. Once you hit higher context sizes (10k+), enabling XTC at 0.1 / 0.5 seems to significantly improve the output, but YMMV. If the output drones on and is uninspiring, XTC can be extremely effective.
+    General heuristic:
+        Lots of slop? Temperature is too low. Raise it, or enable XTC. For early context, temp bump is probably preferred.
+        Is the model making mistakes about subtle or obvious details in the scene? Temperature is too high, OR XTC is enabled and/or XTC settings are too high. Lower temp and/or disable XTC.
+  overrides:
+    parameters:
+      model: L3.1-nemotron-sunfall-v0.7.0.i1-Q4_K_M.gguf
+  files:
+    - filename: L3.1-nemotron-sunfall-v0.7.0.i1-Q4_K_M.gguf
+      sha256: f9aa88f3b220e35662a2d62d1f615a3b425e348a8f9e2939f05bf57385119f76
+      uri: huggingface://mradermacher/L3.1-nemotron-sunfall-v0.7.0-i1-GGUF/L3.1-nemotron-sunfall-v0.7.0.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-mesh"
+  urls:
+    - https://huggingface.co/Zhengyi/LLaMA-Mesh
+    - https://huggingface.co/bartowski/LLaMA-Mesh-GGUF
+  description: |
+    LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models
+    Pre-trained model weights of LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models. This work explores expanding the capabilities of large language models (LLMs) pretrained on text to generate 3D meshes within a unified model
+  overrides:
+    parameters:
+      model: LLaMA-Mesh-Q4_K_M.gguf
+  files:
+    - filename: LLaMA-Mesh-Q4_K_M.gguf
+      sha256: 150ac70c92bb7351468768bcc84bd3018f44b624f709821fee8e5e816e4868e7
+      uri: huggingface://bartowski/LLaMA-Mesh-GGUF/LLaMA-Mesh-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-8b-instruct-ortho-v3"
+  urls:
+    - https://huggingface.co/lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v3
+    - https://huggingface.co/mradermacher/llama-3.1-8b-instruct-ortho-v3-GGUF
+  description: |
+    A few different attempts at orthogonalization/abliteration of llama-3.1-8b-instruct using variations of the method from "Mechanistically Eliciting Latent Behaviors in Language Models".
+    Each of these use different vectors and have some variations in where the new refusal boundaries lie. None of them seem totally jailbroken.
+  overrides:
+    parameters:
+      model: llama-3.1-8b-instruct-ortho-v3.Q4_K_M.gguf
+  files:
+    - filename: llama-3.1-8b-instruct-ortho-v3.Q4_K_M.gguf
+      sha256: 8d1dd638ed80019f5cd61240d1f06fd1333413f61427bef4d288c5b8cd9d8cea
+      uri: huggingface://mradermacher/llama-3.1-8b-instruct-ortho-v3-GGUF/llama-3.1-8b-instruct-ortho-v3.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-tulu-3-8b-dpo"
+  icon: https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu3/Tulu3-logo.png
+  urls:
+    - https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B-DPO
+    - https://huggingface.co/mradermacher/Llama-3.1-Tulu-3-8B-DPO-GGUF
+  description: |
+    Tülu3 is a leading instruction following model family, offering fully open-source data, code, and recipes designed to serve as a comprehensive guide for modern post-training techniques. Tülu3 is designed for state-of-the-art performance on a diversity of tasks in addition to chat, such as MATH, GSM8K, and IFEval.
+  overrides:
+    parameters:
+      model: Llama-3.1-Tulu-3-8B-DPO.Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-Tulu-3-8B-DPO.Q4_K_M.gguf
+      sha256: 8991bef1775edc5190047ef268d60876c2df3a80cf6da5f1bd1e82d09dd0ab2b
+      uri: huggingface://mradermacher/Llama-3.1-Tulu-3-8B-DPO-GGUF/Llama-3.1-Tulu-3-8B-DPO.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-aspire-heart-matrix-8b"
+  urls:
+    - https://huggingface.co/ZeroXClem/L3-Aspire-Heart-Matrix-8B
+    - https://huggingface.co/mradermacher/L3.1-Aspire-Heart-Matrix-8B-GGUF
+  description: |
+    ZeroXClem/L3-Aspire-Heart-Matrix-8B is an experimental language model crafted by merging three high-quality 8B parameter models using the Model Stock Merge method. This synthesis leverages the unique strengths of Aspire, Heart Stolen, and CursedMatrix, creating a highly versatile and robust language model for a wide array of tasks.
+  overrides:
+    parameters:
+      model: L3.1-Aspire-Heart-Matrix-8B.Q4_K_M.gguf
+  files:
+    - filename: L3.1-Aspire-Heart-Matrix-8B.Q4_K_M.gguf
+      sha256: 4d90abaae59f39e8f04548151265dce3b9c913303e6755860f5d28dd5cfc2d86
+      uri: huggingface://mradermacher/L3.1-Aspire-Heart-Matrix-8B-GGUF/L3.1-Aspire-Heart-Matrix-8B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "dark-chivalry_v1.0-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/66c1cc08453a7ef6c5fe657a/A9vNZXVnD3xFiZ7cMLOKy.png
+  urls:
+    - https://huggingface.co/Triangle104/Dark-Chivalry_V1.0
+    - https://huggingface.co/mradermacher/Dark-Chivalry_V1.0-i1-GGUF
+  description: |
+    The dark side of chivalry...
+    This model was merged using the TIES merge method using ValiantLabs/Llama3.1-8B-ShiningValiant2 as a base.
+  overrides:
+    parameters:
+      model: Dark-Chivalry_V1.0.i1-Q4_K_M.gguf
+  files:
+    - filename: Dark-Chivalry_V1.0.i1-Q4_K_M.gguf
+      sha256: 6659fad2ea7e40b862a02d683a4bcb9044704fc7f6d3f50cd54c9069860171cd
+      uri: huggingface://mradermacher/Dark-Chivalry_V1.0-i1-GGUF/Dark-Chivalry_V1.0.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "tulu-3.1-8b-supernova-i1"
+  urls:
+    - https://huggingface.co/bunnycore/Tulu-3.1-8B-SuperNova
+    - https://huggingface.co/mradermacher/Tulu-3.1-8B-SuperNova-i1-GGUF
+  description: |
+    The following models were included in the merge:
+        meditsolutions/Llama-3.1-MedIT-SUN-8B
+        allenai/Llama-3.1-Tulu-3-8B
+        arcee-ai/Llama-3.1-SuperNova-Lite
+  overrides:
+    parameters:
+      model: Tulu-3.1-8B-SuperNova.i1-Q4_K_M.gguf
+  files:
+    - filename: Tulu-3.1-8B-SuperNova.i1-Q4_K_M.gguf
+      sha256: c6cc2e1a4c3d2338973ca0050af1cf4462b3f62838f62b4c8a204f2a74eeb01f
+      uri: huggingface://mradermacher/Tulu-3.1-8B-SuperNova-i1-GGUF/Tulu-3.1-8B-SuperNova.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-tulu-3-70b-dpo"
+  icon: "https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu3/Tulu3-logo.png"
+  urls:
+    - https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B-DPO
+    - https://huggingface.co/bartowski/Llama-3.1-Tulu-3-70B-DPO-GGUF
+  description: |
+    Tülu3 is a leading instruction following model family, offering fully open-source data, code, and recipes designed to serve as a comprehensive guide for modern post-training techniques. Tülu3 is designed for state-of-the-art performance on a diversity of tasks in addition to chat, such as MATH, GSM8K, and IFEval.
+  overrides:
+    parameters:
+      model: Llama-3.1-Tulu-3-70B-DPO-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-Tulu-3-70B-DPO-Q4_K_M.gguf
+      sha256: e2d9c59736274f9dd94f30ef3edcee68fec1d6649eb01d6bad7e3e8a6024f77d
+      uri: huggingface://bartowski/Llama-3.1-Tulu-3-70B-DPO-GGUF/Llama-3.1-Tulu-3-70B-DPO-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-tulu-3-8b-sft"
+  icon: "https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu3/Tulu3-logo.png"
+  urls:
+    - https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B-SFT
+    - https://huggingface.co/bartowski/Llama-3.1-Tulu-3-8B-SFT-GGUF
+  description: |
+    Tülu3 is a leading instruction following model family, offering fully open-source data, code, and recipes designed to serve as a comprehensive guide for modern post-training techniques. Tülu3 is designed for state-of-the-art performance on a diversity of tasks in addition to chat, such as MATH, GSM8K, and IFEval.
+  overrides:
+    parameters:
+      model: Llama-3.1-Tulu-3-8B-SFT-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-Tulu-3-8B-SFT-Q4_K_M.gguf
+      sha256: 3fad2c96aa9b9de19c2cda0f88a381c47ac768ca03a95059d9f6c439791f8592
+      uri: huggingface://bartowski/Llama-3.1-Tulu-3-8B-SFT-GGUF/Llama-3.1-Tulu-3-8B-SFT-Q4_K_M.gguf
+- !!merge <<: *llama31
+  icon: https://huggingface.co/Skywork/Skywork-o1-Open-Llama-3.1-8B/resolve/main/misc/misc_fig.jpg
+  name: "skywork-o1-open-llama-3.1-8b"
+  urls:
+    - https://huggingface.co/Skywork/Skywork-o1-Open-Llama-3.1-8B
+    - https://huggingface.co/QuantFactory/Skywork-o1-Open-Llama-3.1-8B-GGUF
+  description: |
+    We are excited to announce the release of the Skywork o1 Open model series, developed by the Skywork team at Kunlun Inc. This groundbreaking release introduces a series of models that incorporate o1-like slow thinking and reasoning capabilities. The Skywork o1 Open model series includes three advanced models:
+
+    Skywork o1 Open-Llama-3.1-8B: A robust chat model trained on Llama-3.1-8B, enhanced significantly with "o1-style" data to improve reasoning skills.
+
+    Skywork o1 Open-PRM-Qwen-2.5-1.5B: A specialized model designed to enhance reasoning capability through incremental process rewards, ideal for complex problem solving at a smaller scale.
+
+    Skywork o1 Open-PRM-Qwen-2.5-7B: Extends the capabilities of the 1.5B model by scaling up to handle more demanding reasoning tasks, pushing the boundaries of AI reasoning.
+
+    Different from mere reproductions of the OpenAI o1 model, the Skywork o1 Open model series not only exhibits innate thinking, planning, and reflecting capabilities in its outputs, but also shows significant improvements in reasoning skills on standard benchmarks. This series represents a strategic advancement in AI capabilities, moving a previously weaker base model towards the state-of-the-art (SOTA) in reasoning tasks.
+  overrides:
+    parameters:
+      model: Skywork-o1-Open-Llama-3.1-8B.Q4_K_M.gguf
+  files:
+    - filename: Skywork-o1-Open-Llama-3.1-8B.Q4_K_M.gguf
+      sha256: ef6a203ba585aab14f5d2ec463917a45b3ac571abd89c39e9a96a5e395ea8eea
+      uri: huggingface://QuantFactory/Skywork-o1-Open-Llama-3.1-8B-GGUF/Skywork-o1-Open-Llama-3.1-8B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "sparse-llama-3.1-8b-2of4"
+  urls:
+    - https://huggingface.co/QuantFactory/Sparse-Llama-3.1-8B-2of4-GGUF
+    - https://huggingface.co/QuantFactory/Sparse-Llama-3.1-8B-2of4-GGUF
+  description: |
+    This is the 2:4 sparse version of Llama-3.1-8B. On the OpenLLM benchmark (version 1), it achieves an average score of 62.16, compared to 63.19 for the dense model—demonstrating a 98.37% accuracy recovery. On the Mosaic Eval Gauntlet benchmark (version v0.3), it achieves an average score of 53.85, versus 55.34 for the dense model—representing a 97.3% accuracy recovery.
+  overrides:
+    parameters:
+      model: Sparse-Llama-3.1-8B-2of4.Q4_K_M.gguf
+  files:
+    - filename: Sparse-Llama-3.1-8B-2of4.Q4_K_M.gguf
+      sha256: c481e7089ffaedd5ae8c74dccc7fb45f6509640b661fa086ae979f6fefc3fdba
+      uri: huggingface://QuantFactory/Sparse-Llama-3.1-8B-2of4-GGUF/Sparse-Llama-3.1-8B-2of4.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "loki-v2.6-8b-1024k"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6472de046facfb01d8b1fb9d/uQPITKRS8XLTLyaiGwgh_.jpeg
+  urls:
+    - https://huggingface.co/QuantFactory/Loki-v2.6-8b-1024k-GGUF
+  description: |
+    The following models were included in the merge:
+    MrRobotoAI/Epic_Fiction-8b
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k
+    MrRobotoAI/Loki-.Epic_Fiction.-8b
+    Casual-Autopsy/L3-Luna-8B
+    Casual-Autopsy/L3-Super-Nova-RP-8B
+    Casual-Autopsy/L3-Umbral-Mind-RP-v3.0-8B
+    Casual-Autopsy/Halu-L3-Stheno-BlackOasis-8B
+    Undi95/Llama-3-LewdPlay-8B
+    Undi95/Llama-3-LewdPlay-8B-evo
+    Undi95/Llama-3-Unholy-8B
+    ChaoticNeutrals/Hathor_Tahsin-L3-8B-v0.9
+    ChaoticNeutrals/Hathor_RP-v.01-L3-8B
+    ChaoticNeutrals/Domain-Fusion-L3-8B
+    ChaoticNeutrals/T-900-8B
+    ChaoticNeutrals/Poppy_Porpoise-1.4-L3-8B
+    ChaoticNeutrals/Templar_v1_8B
+    ChaoticNeutrals/Hathor_Respawn-L3-8B-v0.8
+    ChaoticNeutrals/Sekhmet_Gimmel-L3.1-8B-v0.3
+    zeroblu3/LewdPoppy-8B-RP
+    tohur/natsumura-storytelling-rp-1.0-llama-3.1-8b
+    jeiku/Chaos_RP_l3_8B
+    tannedbum/L3-Nymeria-Maid-8B
+    Nekochu/Luminia-8B-RP
+    vicgalle/Humanish-Roleplay-Llama-3.1-8B
+    saishf/SOVLish-Maid-L3-8B
+    Dogge/llama-3-8B-instruct-Bluemoon-Freedom-RP
+    MrRobotoAI/Epic_Fiction-8b-v4
+    maldv/badger-lambda-0-llama-3-8b
+    maldv/llama-3-fantasy-writer-8b
+    maldv/badger-kappa-llama-3-8b
+    maldv/badger-mu-llama-3-8b
+    maldv/badger-lambda-llama-3-8b
+    maldv/badger-iota-llama-3-8b
+    maldv/badger-writer-llama-3-8b
+    Magpie-Align/MagpieLM-8B-Chat-v0.1
+    nbeerbower/llama-3-gutenberg-8B
+    nothingiisreal/L3-8B-Stheno-Horny-v3.3-32K
+    nbeerbower/llama-3-spicy-abliterated-stella-8B
+    Magpie-Align/MagpieLM-8B-SFT-v0.1
+    NeverSleep/Llama-3-Lumimaid-8B-v0.1
+    mlabonne/NeuralDaredevil-8B-abliterated
+    mlabonne/Daredevil-8B-abliterated
+    NeverSleep/Llama-3-Lumimaid-8B-v0.1-OAS
+    nothingiisreal/L3-8B-Instruct-Abliterated-DWP
+    openchat/openchat-3.6-8b-20240522
+    turboderp/llama3-turbcat-instruct-8b
+    UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3
+    Undi95/Llama-3-LewdPlay-8B
+    TIGER-Lab/MAmmoTH2-8B-Plus
+    OwenArli/Awanllm-Llama-3-8B-Cumulus-v1.0
+    refuelai/Llama-3-Refueled
+    SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha
+    NousResearch/Hermes-2-Theta-Llama-3-8B
+    ResplendentAI/Nymph_8B
+    grimjim/Llama-3-Oasis-v1-OAS-8B
+    flammenai/Mahou-1.3b-llama3-8B
+    lemon07r/Llama-3-RedMagic4-8B
+    grimjim/Llama-3.1-SuperNova-Lite-lorabilterated-8B
+    grimjim/Llama-Nephilim-Metamorphosis-v2-8B
+    lemon07r/Lllama-3-RedElixir-8B
+    grimjim/Llama-3-Perky-Pat-Instruct-8B
+    ChaoticNeutrals/Hathor_RP-v.01-L3-8B
+    grimjim/llama-3-Nephilim-v2.1-8B
+    ChaoticNeutrals/Hathor_Respawn-L3-8B-v0.8
+    migtissera/Llama-3-8B-Synthia-v3.5
+    Locutusque/Llama-3-Hercules-5.0-8B
+    WhiteRabbitNeo/Llama-3-WhiteRabbitNeo-8B-v2.0
+    VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct
+    iRyanBell/ARC1-II
+    HPAI-BSC/Llama3-Aloe-8B-Alpha
+    HaitameLaf/Llama-3-8B-StoryGenerator
+    failspy/Meta-Llama-3-8B-Instruct-abliterated-v3
+    Undi95/Llama-3-Unholy-8B
+    ajibawa-2023/Uncensored-Frank-Llama-3-8B
+    ajibawa-2023/SlimOrca-Llama-3-8B
+    ChaoticNeutrals/Templar_v1_8B
+    aifeifei798/llama3-8B-DarkIdol-2.2-Uncensored-1048K
+    ChaoticNeutrals/Hathor_Tahsin-L3-8B-v0.9
+    Blackroot/Llama-3-Gamma-Twist
+    FPHam/L3-8B-Everything-COT
+    Blackroot/Llama-3-LongStory
+    ChaoticNeutrals/Sekhmet_Gimmel-L3.1-8B-v0.3
+    abacusai/Llama-3-Smaug-8B
+    Khetterman/CursedMatrix-8B-v9
+    ajibawa-2023/Scarlett-Llama-3-8B-v1.0
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/physics_non_masked
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/electrical_engineering
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/college_chemistry
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/philosophy_non_masked
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/college_physics
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/philosophy
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/formal_logic
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/philosophy_100
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/conceptual_physics
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/college_computer_science
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/psychology_non_masked
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/psychology
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + Blackroot/Llama3-RP-Lora
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/Llama-3-LimaRP-Instruct-LoRA-8B
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + nothingiisreal/llama3-8B-DWP-lora
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/world_religions
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/high_school_european_history
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/electrical_engineering
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/Llama-3-8B-Abomination-LORA
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/Llama-3-LongStory-LORA
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/human_sexuality
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/sociology
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + ResplendentAI/Theory_of_Mind_Llama3
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/Smarts_Llama3
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/Llama-3-LongStory-LORA
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/Nimue-8B
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + vincentyandex/lora_llama3_chunked_novel_bs128
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + ResplendentAI/Aura_Llama3
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/L3-Daybreak-8b-lora
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + ResplendentAI/Luna_Llama3
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + nicce/story-mixtral-8x7b-lora
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + Blackroot/Llama-3-LongStory-LORA
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + ResplendentAI/NoWarning_Llama3
+    MrRobotoAI/Unaligned-RP-Base-8b-1024k + ResplendentAI/BlueMoon_Llama3
+  overrides:
+    parameters:
+      model: Loki-v2.6-8b-1024k.Q4_K_M.gguf
+  files:
+    - filename: Loki-v2.6-8b-1024k.Q4_K_M.gguf
+      sha256: 9b15c1fee0a0e6d6ed97df3d1b6fc8f774e6e1bd388328599e731c62e0f19d81
+      uri: huggingface://QuantFactory/Loki-v2.6-8b-1024k-GGUF/Loki-v2.6-8b-1024k.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "impish_mind_8b"
+  icon: https://huggingface.co/SicariusSicariiStuff/Impish_Mind_8B/resolve/main/Images/Impish_Mind.png
+  urls:
+    - https://huggingface.co/SicariusSicariiStuff/Impish_Mind_8B
+    - https://huggingface.co/bartowski/Impish_Mind_8B-GGUF
+  description: |
+    This model was trained with new data and a new approach (compared to my other models). While it may be a bit more censored, it is expected to be significantly smarter. The data used is quite unique, and is also featuring long and complex markdown datasets.
+
+    Regarding censorship: Whether uncensoring or enforcing strict censorship, the model tends to lose some of its intelligence. The use of toxic data was kept to a minimum with this model.
+
+    Consequently, the model is likely to refuse some requests, this is easly avoidable with a basic system prompt, or assistant impersonation ("Sure thing!..."). Unlike many RP models, this one is designed to excel at general assistant tasks as well.
+  overrides:
+    parameters:
+      model: Impish_Mind_8B-Q4_K_M.gguf
+  files:
+    - filename: Impish_Mind_8B-Q4_K_M.gguf
+      sha256: 918f82bcb893c75fa2e846156df7bd3ce359464b960e32ae9171035ee14e7c51
+      uri: huggingface://bartowski/Impish_Mind_8B-GGUF/Impish_Mind_8B-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "tulu-3.1-8b-supernova-smart"
+  urls:
+    - https://huggingface.co/bunnycore/Tulu-3.1-8B-SuperNova-Smart
+    - https://huggingface.co/QuantFactory/Tulu-3.1-8B-SuperNova-Smart-GGUF
+  description: |
+    This model was merged using the passthrough merge method using bunnycore/Tulu-3.1-8B-SuperNova + bunnycore/Llama-3.1-8b-smart-lora as a base.
+  overrides:
+    parameters:
+      model: Tulu-3.1-8B-SuperNova-Smart.Q4_K_M.gguf
+  files:
+    - filename: Tulu-3.1-8B-SuperNova-Smart.Q4_K_M.gguf
+      sha256: 4b8ba9e64f0667199eee2dcc769f1a90aa9c7730165d42f440fdf107c7585c63
+      uri: huggingface://QuantFactory/Tulu-3.1-8B-SuperNova-Smart-GGUF/Tulu-3.1-8B-SuperNova-Smart.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "b-nimita-l3-8b-v0.02"
+  urls:
+    - https://huggingface.co/Arkana08/B-NIMITA-L3-8B-v0.02
+    - https://huggingface.co/QuantFactory/B-NIMITA-L3-8B-v0.02-GGUF
+  description: |
+    B-NIMITA is an AI model designed to bring role-playing scenarios to life with emotional depth and rich storytelling. At its core is NIHAPPY, providing a solid narrative foundation and contextual consistency. This is enhanced by Mythorica, which adds vivid emotional arcs and expressive dialogue, and V-Blackroot, ensuring character consistency and subtle adaptability. This combination allows B-NIMITA to deliver dynamic, engaging interactions that feel natural and immersive.
+  overrides:
+    parameters:
+      model: B-NIMITA-L3-8B-v0.02.Q4_K_M.gguf
+  files:
+    - filename: B-NIMITA-L3-8B-v0.02.Q4_K_M.gguf
+      sha256: 625a54848dcd3f23bc06b639a7dfecae14142b5d177dd45acfe7724816bab4cd
+      uri: huggingface://QuantFactory/B-NIMITA-L3-8B-v0.02-GGUF/B-NIMITA-L3-8B-v0.02.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "deepthought-8b-llama-v0.01-alpha"
+  urls:
+    - https://huggingface.co/ruliad/deepthought-8b-llama-v0.01-alpha
+    - https://huggingface.co/bartowski/deepthought-8b-llama-v0.01-alpha-GGUF
+  description: |
+    Deepthought-8B is a small and capable reasoning model built on LLaMA-3.1 8B, designed to make AI reasoning more transparent and controllable. Despite its relatively small size, it achieves sophisticated reasoning capabilities that rival much larger models.
+  overrides:
+    parameters:
+      model: deepthought-8b-llama-v0.01-alpha-Q4_K_M.gguf
+  files:
+    - filename: deepthought-8b-llama-v0.01-alpha-Q4_K_M.gguf
+      sha256: 33195ba7b898ef8b2997d095e8be42adf1d0e1f6e8291cf07e026fc8e45903fd
+      uri: huggingface://bartowski/deepthought-8b-llama-v0.01-alpha-GGUF/deepthought-8b-llama-v0.01-alpha-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "fusechat-llama-3.1-8b-instruct"
+  icon: https://huggingface.co/FuseAI/FuseChat-Llama-3.1-8B-Instruct/resolve/main/FuseChat-3.0.png
+  urls:
+    - https://huggingface.co/bartowski/FuseChat-Llama-3.1-8B-Instruct-GGUF
+    - https://huggingface.co/bartowski/FuseChat-Llama-3.1-8B-Instruct-GGUF
+  description: |
+    We present FuseChat-3.0, a series of models crafted to enhance performance by integrating the strengths of multiple source LLMs into more compact target LLMs. To achieve this fusion, we utilized four powerful source LLMs: Gemma-2-27B-It, Mistral-Large-Instruct-2407, Qwen-2.5-72B-Instruct, and Llama-3.1-70B-Instruct. For the target LLMs, we employed three widely-used smaller models—Llama-3.1-8B-Instruct, Gemma-2-9B-It, and Qwen-2.5-7B-Instruct—along with two even more compact models—Llama-3.2-3B-Instruct and Llama-3.2-1B-Instruct. The implicit model fusion process involves a two-stage training pipeline comprising Supervised Fine-Tuning (SFT) to mitigate distribution discrepancies between target and source LLMs, and Direct Preference Optimization (DPO) for learning preferences from multiple source LLMs. The resulting FuseChat-3.0 models demonstrated substantial improvements in tasks related to general conversation, instruction following, mathematics, and coding. Notably, when Llama-3.1-8B-Instruct served as the target LLM, our fusion approach achieved an average improvement of 6.8 points across 14 benchmarks. Moreover, it showed significant improvements of 37.1 and 30.1 points on instruction-following test sets AlpacaEval-2 and Arena-Hard respectively. We have released the FuseChat-3.0 models on Huggingface, stay tuned for the forthcoming dataset and code.
+  overrides:
+    parameters:
+      model: FuseChat-Llama-3.1-8B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: FuseChat-Llama-3.1-8B-Instruct-Q4_K_M.gguf
+      sha256: fe58c8c9b695e36e6b0ee5e4d81ff71ea0a4f1a11fa7bb16e8d6f1b35a58dff6
+      uri: huggingface://bartowski/FuseChat-Llama-3.1-8B-Instruct-GGUF/FuseChat-Llama-3.1-8B-Instruct-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-openreviewer-8b"
+  urls:
+    - https://huggingface.co/maxidl/Llama-OpenReviewer-8B
+    - https://huggingface.co/bartowski/Llama-OpenReviewer-8B-GGUF
+  description: |
+    Llama-OpenReviewer-8B is a large language model customized to generate high-quality reviews for machine learning and AI-related conference articles. We collected a dataset containing ~79k high-confidence reviews for ~32k individual papers from OpenReview.
+  overrides:
+    parameters:
+      model: Llama-OpenReviewer-8B-Q4_K_M.gguf
+  files:
+    - filename: Llama-OpenReviewer-8B-Q4_K_M.gguf
+      sha256: b48fd7eee01738de4adcb271fc3c7c5b306f8c75b9804794706dbfdf7a6835f0
+      uri: huggingface://bartowski/Llama-OpenReviewer-8B-GGUF/Llama-OpenReviewer-8B-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "orca_mini_v8_1_70b"
+  icon: https://huggingface.co/pankajmathur/orca_mini_v5_8b/resolve/main/orca_minis_small.jpeg
+  urls:
+    - https://huggingface.co/pankajmathur/orca_mini_v8_1_70b
+    - https://huggingface.co/bartowski/orca_mini_v8_1_70b-GGUF
+  description: |
+    Orca_Mini_v8_1_Llama-3.3-70B-Instruct is trained with various SFT Datasets on Llama-3.3-70B-Instruct
+  overrides:
+    parameters:
+      model: orca_mini_v8_1_70b-Q4_K_M.gguf
+  files:
+    - filename: orca_mini_v8_1_70b-Q4_K_M.gguf
+      sha256: 97627730b028d4d7a349ae0b8e219207163ec425e4e1c057e445b2a66b61fdfa
+      uri: huggingface://bartowski/orca_mini_v8_1_70b-GGUF/orca_mini_v8_1_70b-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-8b-open-sft"
+  urls:
+    - https://huggingface.co/prithivMLmods/Llama-3.1-8B-Open-SFT
+    - https://huggingface.co/bartowski/Llama-3.1-8B-Open-SFT-GGUF
+  description: |
+    The Llama-3.1-8B-Open-SFT model is a fine-tuned version of meta-llama/Llama-3.1-8B-Instruct, designed for advanced text generation tasks, including conversational interactions, question answering, and chain-of-thought reasoning. This model leverages Supervised Fine-Tuning (SFT) using the O1-OPEN/OpenO1-SFT dataset to provide enhanced performance in context-sensitive and instruction-following tasks.
+  overrides:
+    parameters:
+      model: Llama-3.1-8B-Open-SFT-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-8B-Open-SFT-Q4_K_M.gguf
+      sha256: ce75152763c48c5386fe59652cc921aae456da36ab82af3d9e2080f603f45132
+      uri: huggingface://bartowski/Llama-3.1-8B-Open-SFT-GGUF/Llama-3.1-8B-Open-SFT-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "control-nanuq-8b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/66c26b6fb01b19d8c3c2467b/6L-SXxQZ2nxYwvIjnlzN8.png
+  urls:
+    - https://huggingface.co/Delta-Vector/Control-Nanuq-8B
+    - https://huggingface.co/QuantFactory/Control-Nanuq-8B-GGUF
+  description: |
+    The model is a fine-tuned version of LLaMA 3.1 8B Supernova, designed to be "short and sweet" by minimizing narration and lengthy responses. It was fine-tuned over 4 epochs using OpenCAI and RP logs, with DPO applied to enhance coherence. Finally, KTO reinforcement learning was implemented on version 1.1, significantly improving the model's prose and creativity.
+  overrides:
+    parameters:
+      model: Control-Nanuq-8B.Q4_K_M.gguf
+  files:
+    - filename: Control-Nanuq-8B.Q4_K_M.gguf
+      sha256: 5aa3b929cbcaf62709fef58d6f630c2df1185d774d0074c7e750cb03c53b744e
+      uri: huggingface://QuantFactory/Control-Nanuq-8B-GGUF/Control-Nanuq-8B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "huatuogpt-o1-8b"
+  urls:
+    - https://huggingface.co/FreedomIntelligence/HuatuoGPT-o1-8B
+    - https://huggingface.co/bartowski/HuatuoGPT-o1-8B-GGUF
+  description: |
+    HuatuoGPT-o1 is a medical LLM designed for advanced medical reasoning. It generates a complex thought process, reflecting and refining its reasoning, before providing a final response.
+    For more information, visit our GitHub repository: https://github.com/FreedomIntelligence/HuatuoGPT-o1.
+  overrides:
+    parameters:
+      model: HuatuoGPT-o1-8B-Q4_K_M.gguf
+  files:
+    - filename: HuatuoGPT-o1-8B-Q4_K_M.gguf
+      sha256: 3e1ef35fc230182d96ae2d6c7436a2e8250c21a4278e798e1aa45790ba82006b
+      uri: huggingface://bartowski/HuatuoGPT-o1-8B-GGUF/HuatuoGPT-o1-8B-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-purosani-2-8b"
+  urls:
+    - https://huggingface.co/djuna/L3.1-Purosani-2-8B
+    - https://huggingface.co/QuantFactory/L3.1-Purosani-2-8B-GGUF
+  description: |
+    The following models were included in the merge:
+    hf-100/Llama-3-Spellbound-Instruct-8B-0.3
+    arcee-ai/Llama-3.1-SuperNova-Lite + grimjim/Llama-3-Instruct-abliteration-LoRA-8B
+    THUDM/LongWriter-llama3.1-8b + ResplendentAI/Smarts_Llama3
+    djuna/L3.1-Suze-Vume-2-calc
+    djuna/L3.1-ForStHS + Blackroot/Llama-3-8B-Abomination-LORA
+  overrides:
+    parameters:
+      model: L3.1-Purosani-2-8B.Q4_K_M.gguf
+  files:
+    - filename: L3.1-Purosani-2-8B.Q4_K_M.gguf
+      sha256: e3eb8038a72b6e85b7a43c7806c32f01208f4644d54bf94d77ecad6286cf609f
+      uri: huggingface://QuantFactory/L3.1-Purosani-2-8B-GGUF/L3.1-Purosani-2-8B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-8b-prm-deepseek-data"
+  urls:
+    - https://huggingface.co/RLHFlow/Llama3.1-8B-PRM-Deepseek-Data
+    - https://huggingface.co/QuantFactory/Llama3.1-8B-PRM-Deepseek-Data-GGUF
+  description: |
+    This is a process-supervised reward (PRM) trained on Mistral-generated data from the project RLHFlow/RLHF-Reward-Modeling
+
+    The model is trained from meta-llama/Llama-3.1-8B-Instruct on RLHFlow/Deepseek-PRM-Data for 1 epochs. We use a global batch size of 32 and a learning rate of 2e-6, where we pack the samples and split them into chunks of 8192 token. See more training details at https://github.com/RLHFlow/Online-RLHF/blob/main/math/llama-3.1-prm.yaml.
+  overrides:
+    parameters:
+      model: Llama3.1-8B-PRM-Deepseek-Data.Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-8B-PRM-Deepseek-Data.Q4_K_M.gguf
+      sha256: 254c7ccc4ea3818fe5f6e3ffd5500c779b02058b98f9ce9a3856e54106d008e3
+      uri: huggingface://QuantFactory/Llama3.1-8B-PRM-Deepseek-Data-GGUF/Llama3.1-8B-PRM-Deepseek-Data.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "dolphin3.0-llama3.1-8b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/cNCs1TBD3FelWCJGkZ3cd.png
+  urls:
+    - https://huggingface.co/cognitivecomputations/Dolphin3.0-Llama3.1-8B
+    - https://huggingface.co/bartowski/Dolphin3.0-Llama3.1-8B-GGUF
+  description: |
+    Dolphin 3.0 is the next generation of the Dolphin series of instruct-tuned models. Designed to be the ultimate general purpose local model, enabling coding, math, agentic, function calling, and general use cases.
+
+    Dolphin aims to be a general purpose model, similar to the models behind ChatGPT, Claude, Gemini. But these models present problems for businesses seeking to include AI in their products.
+
+        They maintain control of the system prompt, deprecating and changing things as they wish, often causing software to break.
+        They maintain control of the model versions, sometimes changing things silently, or deprecating older models that your business relies on.
+        They maintain control of the alignment, and in particular the alignment is one-size-fits all, not tailored to the application.
+        They can see all your queries and they can potentially use that data in ways you wouldn't want. Dolphin, in contrast, is steerable and gives control to the system owner. You set the system prompt. You decide the alignment. You have control of your data. Dolphin does not impose its ethics or guidelines on you. You are the one who decides the guidelines.
+
+    Dolphin belongs to YOU, it is your tool, an extension of your will. Just as you are personally responsible for what you do with a knife, gun, fire, car, or the internet, you are the creator and originator of any content you generate with Dolphin.
+  overrides:
+    parameters:
+      model: Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf
+  files:
+    - filename: Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf
+      sha256: 268390e07edd407ad93ea21a868b7ae995b5950e01cad0db9e1802ae5049d405
+      uri: huggingface://bartowski/Dolphin3.0-Llama3.1-8B-GGUF/Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "deepseek-r1-distill-llama-8b"
+  icon: "https://avatars.githubusercontent.com/u/148330874"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+    - https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF
+  description: |
+    DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
+    Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
+    By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
+  overrides:
+    parameters:
+      model: deepseek-r1-distill-llama-8b-Q4_K_M.gguf
+  files:
+    - filename: deepseek-r1-distill-llama-8b-Q4_K_M.gguf
+      sha256: f8eba201522ab44b79bc54166126bfaf836111ff4cbf2d13c59c3b57da10573b
+      uri: huggingface://unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "selene-1-mini-llama-3.1-8b"
+  icon: https://atla-ai.notion.site/image/https%3A%2F%2Fprod-files-secure.s3.us-west-2.amazonaws.com%2Ff08e6e70-73af-4363-9621-90e906b92ebc%2F1bfb4316-1ce6-40a0-800c-253739cfcdeb%2Fatla_white3x.svg?table=block&id=17c309d1-7745-80f9-8f60-e755409acd8d&spaceId=f08e6e70-73af-4363-9621-90e906b92ebc&userId=&cache=v2
+  urls:
+    - https://huggingface.co/AtlaAI/Selene-1-Mini-Llama-3.1-8B
+    - https://huggingface.co/bartowski/Selene-1-Mini-Llama-3.1-8B-GGUF
+  description: |
+    Atla Selene Mini is a state-of-the-art small language model-as-a-judge (SLMJ). Selene Mini achieves comparable performance to models 10x its size, outperforming GPT-4o on RewardBench, EvalBiasBench, and AutoJ.
+
+    Post-trained from Llama-3.1-8B across a wide range of evaluation tasks and scoring criteria, Selene Mini outperforms prior small models overall across 11 benchmarks covering three different types of tasks:
+
+        Absolute scoring, e.g. "Evaluate the harmlessness of this response on a scale of 1-5"
+        Classification, e.g. "Does this response address the user query? Answer Yes or No."
+        Pairwise preference. e.g. "Which of the following responses is more logically consistent - A or B?"
+
+    It is also the #1 8B generative model on RewardBench.
+  overrides:
+    parameters:
+      model: Selene-1-Mini-Llama-3.1-8B-Q4_K_M.gguf
+  files:
+    - filename: Selene-1-Mini-Llama-3.1-8B-Q4_K_M.gguf
+      sha256: 908e6ce19f7cd3d7394bd7c38e43de2f228aca6aceda35c7ee70d069ad60493e
+      uri: huggingface://bartowski/Selene-1-Mini-Llama-3.1-8B-GGUF/Selene-1-Mini-Llama-3.1-8B-Q4_K_M.gguf
 - &deepseek
-  ## Deepseek
-  url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
+  url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" ## Deepseek
   name: "deepseek-coder-v2-lite-instruct"
-  icon: "https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true"
+  icon: "https://avatars.githubusercontent.com/u/148330874"
   license: deepseek
   description: |
     DeepSeek-Coder-V2, an open-source Mixture-of-Experts (MoE) code language model that achieves performance comparable to GPT4-Turbo in code-specific tasks. Specifically, DeepSeek-Coder-V2 is further pre-trained from DeepSeek-Coder-V2-Base with 6 trillion tokens sourced from a high-quality and multi-source corpus. Through this continued pre-training, DeepSeek-Coder-V2 substantially enhances the coding and mathematical reasoning capabilities of DeepSeek-Coder-V2-Base, while maintaining comparable performance in general language tasks. Compared to DeepSeek-Coder, DeepSeek-Coder-V2 demonstrates significant advancements in various aspects of code-related tasks, as well as reasoning and general capabilities. Additionally, DeepSeek-Coder-V2 expands its support for programming languages from 86 to 338, while extending the context length from 16K to 128K.
@@ -766,6 +5542,20 @@
     - filename: DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
       sha256: 50ec78036433265965ed1afd0667c00c71c12aa70bcf383be462cb8e159db6c0
       uri: huggingface://LoneStriker/DeepSeek-Coder-V2-Lite-Instruct-GGUF/DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
+- !!merge <<: *deepseek
+  name: "cursorcore-ds-6.7b-i1"
+  urls:
+    - https://huggingface.co/TechxGenus/CursorCore-DS-6.7B
+    - https://huggingface.co/mradermacher/CursorCore-DS-6.7B-i1-GGUF
+  description: |
+    CursorCore is a series of open-source models designed for AI-assisted programming. It aims to support features such as automated editing and inline chat, replicating the core abilities of closed-source AI-assisted programming tools like Cursor. This is achieved by aligning data generated through Programming-Instruct. Please read our paper to learn more.
+  overrides:
+    parameters:
+      model: CursorCore-DS-6.7B.i1-Q4_K_M.gguf
+  files:
+    - filename: CursorCore-DS-6.7B.i1-Q4_K_M.gguf
+      sha256: 71b94496be79e5bc45c23d6aa6c242f5f1d3625b4f00fe91d781d381ef35c538
+      uri: huggingface://mradermacher/CursorCore-DS-6.7B-i1-GGUF/CursorCore-DS-6.7B.i1-Q4_K_M.gguf
 - name: "archangel_sft_pythia2-8b"
   url: "github:mudler/LocalAI/gallery/tuluv2.yaml@master"
   icon: https://gist.github.com/assets/29318529/fe2d8391-dbd1-4b7e-9dc4-7cb97e55bc06
@@ -793,10 +5583,208 @@
     - filename: archangel_sft_pythia2-8b.Q4_K_M.gguf
       sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8
       uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf
+- &deepseek-r1
+  url: "github:mudler/LocalAI/gallery/deepseek-r1.yaml@master" ## Start DeepSeek-R1
+  name: "deepseek-r1-distill-qwen-1.5b"
+  icon: "https://avatars.githubusercontent.com/u/148330874"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5b
+    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF
+  description: |
+    DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
+    Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
+    By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
+  overrides:
+    parameters:
+      model: DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
+  files:
+    - filename: DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
+      sha256: 1741e5b2d062b07acf048bf0d2c514dadf2a48f94e2b4aa0cfe069af3838ee2f
+      uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "deepseek-r1-distill-qwen-7b"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
+    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF
+  overrides:
+    parameters:
+      model: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
+  files:
+    - filename: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
+      sha256: 731ece8d06dc7eda6f6572997feb9ee1258db0784827e642909d9b565641937b
+      uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "deepseek-r1-distill-qwen-14b"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
+    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-14B-GGUF
+  overrides:
+    parameters:
+      model: DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
+  files:
+    - filename: DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
+      sha256: 0b319bd0572f2730bfe11cc751defe82045fad5085b4e60591ac2cd2d9633181
+      uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-14B-GGUF/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "deepseek-r1-distill-qwen-32b"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
+    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF
+  overrides:
+    parameters:
+      model: DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf
+  files:
+    - filename: DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf
+      sha256: bed9b0f551f5b95bf9da5888a48f0f87c37ad6b72519c4cbd775f54ac0b9fc62
+      uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "deepseek-r1-distill-llama-8b"
+  icon: "https://avatars.githubusercontent.com/u/148330874"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Llama-8B-GGUF
+  overrides:
+    parameters:
+      model: DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
+  files:
+    - filename: DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
+      sha256: 87bcba20b4846d8dadf753d3ff48f9285d131fc95e3e0e7e934d4f20bc896f5d
+      uri: huggingface://bartowski/DeepSeek-R1-Distill-Llama-8B-GGUF/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "deepseek-r1-distill-llama-70b"
+  icon: "https://avatars.githubusercontent.com/u/148330874"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B
+    - https://huggingface.co/bartowski/DeepSeek-R 1-Distill-Llama-70B-GGUF
+  overrides:
+    parameters:
+      model: DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
+  files:
+    - filename: DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
+      sha256: 181a82a1d6d2fa24fe4db83a68eee030384986bdbdd4773ba76424e3a6eb9fd8
+      uri: huggingface://bartowski/DeepSeek-R1-Distill-Llama-70B-GGUF/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "deepseek-r1-qwen-2.5-32b-ablated"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6587d8dd1b44d0e694104fbf/0dkt6EhZYwXVBxvSWXdaM.png
+  urls:
+    - https://huggingface.co/NaniDAO/deepseek-r1-qwen-2.5-32B-ablated
+    - https://huggingface.co/bartowski/deepseek-r1-qwen-2.5-32B-ablated-GGUF
+  description: |
+    DeepSeek-R1-Distill-Qwen-32B with ablation technique applied for a more helpful (and based) reasoning model.
+
+    This means it will refuse less of your valid requests for an uncensored UX. Use responsibly and use common sense.
+
+    We do not take any responsibility for how you apply this intelligence, just as we do not for how you apply your own.
+  overrides:
+    parameters:
+      model: deepseek-r1-qwen-2.5-32B-ablated-Q4_K_M.gguf
+  files:
+    - filename: deepseek-r1-qwen-2.5-32B-ablated-Q4_K_M.gguf
+      sha256: 7f33898641ebe58fe178c3517efc129f4fe37c6ca2d8b91353c4539b0c3411ec
+      uri: huggingface://bartowski/deepseek-r1-qwen-2.5-32B-ablated-GGUF/deepseek-r1-qwen-2.5-32B-ablated-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "fuseo1-deepseekr1-qwen2.5-coder-32b-preview-v0.1"
+  urls:
+    - https://huggingface.co/FuseAI/FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview
+    - https://huggingface.co/bartowski/FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview-v0.1-GGUF
+  description: |
+    FuseO1-Preview is our initial endeavor to enhance the System-II reasoning capabilities of large language models (LLMs) through innovative model fusion techniques. By employing our advanced SCE merging methodologies, we integrate multiple open-source o1-like LLMs into a unified model. Our goal is to incorporate the distinct knowledge and strengths from different reasoning LLMs into a single, unified model with strong System-II reasoning abilities, particularly in mathematics, coding, and science domains.
+  overrides:
+    parameters:
+      model: FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview-v0.1-Q4_K_M.gguf
+  files:
+    - filename: FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview-v0.1-Q4_K_M.gguf
+      sha256: d7753547046cd6e3d45a2cfbd5557aa20dd0b9f0330931d3fd5b3d4a0b468b24
+      uri: huggingface://bartowski/FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview-v0.1-GGUF/FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview-v0.1-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "fuseo1-deepseekr1-qwen2.5-instruct-32b-preview"
+  urls:
+    - https://huggingface.co/FuseAI/FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview
+    - https://huggingface.co/bartowski/FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview-GGUF
+  description: |
+    FuseO1-Preview is our initial endeavor to enhance the System-II reasoning capabilities of large language models (LLMs) through innovative model fusion techniques. By employing our advanced SCE merging methodologies, we integrate multiple open-source o1-like LLMs into a unified model. Our goal is to incorporate the distinct knowledge and strengths from different reasoning LLMs into a single, unified model with strong System-II reasoning abilities, particularly in mathematics, coding, and science domains.
+  overrides:
+    parameters:
+      model: FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview-Q4_K_M.gguf
+  files:
+    - filename: FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview-Q4_K_M.gguf
+      sha256: 3b06a004a6bb827f809a7326b30ee73f96a1a86742d8c2dd335d75874fa17aa4
+      uri: huggingface://bartowski/FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview-GGUF/FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "fuseo1-deepseekr1-qwq-32b-preview"
+  urls:
+    - https://huggingface.co/FuseAI/FuseO1-DeepSeekR1-QwQ-32B-Preview
+    - https://huggingface.co/bartowski/FuseO1-DeepSeekR1-QwQ-32B-Preview-GGUF
+  description: |
+    FuseO1-Preview is our initial endeavor to enhance the System-II reasoning capabilities of large language models (LLMs) through innovative model fusion techniques. By employing our advanced SCE merging methodologies, we integrate multiple open-source o1-like LLMs into a unified model. Our goal is to incorporate the distinct knowledge and strengths from different reasoning LLMs into a single, unified model with strong System-II reasoning abilities, particularly in mathematics, coding, and science domains.
+  overrides:
+    parameters:
+      model: FuseO1-DeepSeekR1-QwQ-32B-Preview-Q4_K_M.gguf
+  files:
+    - filename: FuseO1-DeepSeekR1-QwQ-32B-Preview-Q4_K_M.gguf
+      sha256: 16f1fb6bf76bb971a7a63e1a68cddd09421f4a767b86eec55eed1e08178f78f2
+      uri: huggingface://bartowski/FuseO1-DeepSeekR1-QwQ-32B-Preview-GGUF/FuseO1-DeepSeekR1-QwQ-32B-Preview-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "fuseo1-deekseekr1-qwq-skyt1-32b-preview"
+  urls:
+    - https://huggingface.co/FuseAI/FuseO1-DeepSeekR1-QwQ-SkyT1-32B-Preview
+    - https://huggingface.co/bartowski/FuseO1-DeekSeekR1-QwQ-SkyT1-32B-Preview-GGUF
+  description: |
+    FuseO1-Preview is our initial endeavor to enhance the System-II reasoning capabilities of large language models (LLMs) through innovative model fusion techniques. By employing our advanced SCE merging methodologies, we integrate multiple open-source o1-like LLMs into a unified model. Our goal is to incorporate the distinct knowledge and strengths from different reasoning LLMs into a single, unified model with strong System-II reasoning abilities, particularly in mathematics, coding, and science domains.
+  overrides:
+    parameters:
+      model: FuseO1-DeekSeekR1-QwQ-SkyT1-32B-Preview-Q4_K_M.gguf
+  files:
+    - filename: FuseO1-DeekSeekR1-QwQ-SkyT1-32B-Preview-Q4_K_M.gguf
+      sha256: 13911dd4a62d4714a3447bc288ea9d49dbe575a91cab9e8f645057f1d8e1100e
+      uri: huggingface://bartowski/FuseO1-DeekSeekR1-QwQ-SkyT1-32B-Preview-GGUF/FuseO1-DeekSeekR1-QwQ-SkyT1-32B-Preview-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "steelskull_l3.3-damascus-r1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/iIzpqHDb9wU181AzfrjZy.png
+  urls:
+    - https://huggingface.co/Steelskull/L3.3-Damascus-R1
+    - https://huggingface.co/bartowski/Steelskull_L3.3-Damascus-R1-GGUF
+  description: |
+    Damascus-R1 builds upon some elements of the Nevoria foundation but represents a significant step forward with a completely custom-made DeepSeek R1 Distill base: Hydroblated-R1-V3. Constructed using the new SCE (Select, Calculate, and Erase) merge method, Damascus-R1 prioritizes stability, intelligence, and enhanced awareness.
+
+    Technical Architecture
+    Leveraging the SCE merge method and custom base, Damascus-R1 integrates newly added specialized components from multiple high-performance models:
+        EVA and EURYALE foundations for creative expression and scene comprehension
+        Cirrus and Hanami elements for enhanced reasoning capabilities
+        Anubis components for detailed scene description
+        Negative_LLAMA integration for balanced perspective and response
+
+    Core Philosophy
+    Damascus-R1 embodies the principle that AI models can be intelligent and be fun. This version specifically addresses recent community feedback and iterates on prior experiments, optimizing the balance between technical capability and natural conversation flow.
+
+    Base Architecture
+    At its core, Damascus-R1 utilizes the entirely custom Hydroblated-R1 base model, specifically engineered for stability, enhanced reasoning, and performance. The SCE merge method, with settings finely tuned based on community feedback from evaluations of Experiment-Model-Ver-A, L3.3-Exp-Nevoria-R1-70b-v0.1 and L3.3-Exp-Nevoria-70b-v0.1, enables precise and effective component integration while maintaining model coherence and reliability.
+  overrides:
+    parameters:
+      model: Steelskull_L3.3-Damascus-R1-Q4_K_M.gguf
+  files:
+    - filename: Steelskull_L3.3-Damascus-R1-Q4_K_M.gguf
+      sha256: f1df5808b2099b26631d0bae870603a08dbfab6813471f514035d3fb92a47480
+      uri: huggingface://bartowski/Steelskull_L3.3-Damascus-R1-GGUF/Steelskull_L3.3-Damascus-R1-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "uncensoredai_uncensoredlm-deepseek-r1-distill-qwen-14b"
+  icon: https://huggingface.co/uncensoredai/UncensoredLM-DeepSeek-R1-Distill-Qwen-14B/resolve/main/h5dTflRHYMbGq3RXm9a61yz4io.avif
+  urls:
+    - https://huggingface.co/uncensoredai/UncensoredLM-DeepSeek-R1-Distill-Qwen-14B
+    - https://huggingface.co/bartowski/uncensoredai_UncensoredLM-DeepSeek-R1-Distill-Qwen-14B-GGUF
+  description: |
+      An UncensoredLLM with Reasoning, what more could you want?
+  overrides:
+    parameters:
+      model: uncensoredai_UncensoredLM-DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
+  files:
+    - filename: uncensoredai_UncensoredLM-DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
+      sha256: 85b2c3e1aa4e8cc3bf616f84c7595c963d5439f3fcfdbd5c957fb22e84d10b1c
+      uri: huggingface://bartowski/uncensoredai_UncensoredLM-DeepSeek-R1-Distill-Qwen-14B-GGUF/uncensoredai_UncensoredLM-DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
 - &qwen2
-  ## Start QWEN2
-  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master" ## Start QWEN2
   name: "qwen2-7b-instruct"
+  icon: https://avatars.githubusercontent.com/u/141221163
   license: apache-2.0
   description: |
     Qwen2 is the new series of Qwen large language models. For Qwen2, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters, including a Mixture-of-Experts model. This repo contains the instruction-tuned 7B Qwen2 model.
@@ -903,7 +5891,7 @@
       uri: huggingface://bartowski/Einstein-v7-Qwen2-7B-GGUF/Einstein-v7-Qwen2-7B-Q4_K_M.gguf
 - !!merge <<: *qwen2
   name: "arcee-spark"
-  icon: https://i.ibb.co/80ssNWS/o-Vdk-Qx-ARNmzr-Pi1h-Efj-SA.webp
+  icon: https://avatars.githubusercontent.com/u/126496414
   description: |
     Arcee Spark is a powerful 7B parameter language model that punches well above its weight class. Initialized from Qwen2, this model underwent a sophisticated training process:
 
@@ -941,7 +5929,7 @@
       uri: huggingface://Hercules-5.0-Qwen2-7B-Q4_K_M.gguf/Hercules-5.0-Qwen2-7B-Q4_K_M.gguf
 - !!merge <<: *qwen2
   name: "arcee-agent"
-  icon: https://i.ibb.co/CBHmTDn/136719a5-6d8a-4654-a618-46eabc788953.jpg
+  icon: https://avatars.githubusercontent.com/u/126496414
   description: |
     Arcee Agent is a cutting-edge 7B parameter language model specifically designed for function calling and tool use. Initialized from Qwen2-7B, it rivals the performance of much larger models while maintaining efficiency and speed. This model is particularly suited for developers, researchers, and businesses looking to implement sophisticated AI-driven solutions without the computational overhead of larger language models. Compute for training Arcee-Agent was provided by CrusoeAI. Arcee-Agent was trained using Spectrum.
   urls:
@@ -1097,9 +6085,105 @@
     - filename: EdgeRunner-Tactical-7B.Q4_K_M.gguf
       sha256: 90ca9c3ab19e5d1de4499e3f988cc0ba3d205e50285d7c89de6f0a4c525bf204
       uri: huggingface://RichardErkhov/edgerunner-ai_-_EdgeRunner-Tactical-7B-gguf/EdgeRunner-Tactical-7B.Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "marco-o1"
+  icon: https://huggingface.co/AIDC-AI/Marco-o1/resolve/main/assets/logo.png
+  urls:
+    - https://huggingface.co/AIDC-AI/Marco-o1
+    - https://huggingface.co/QuantFactory/Marco-o1-GGUF
+  description: |
+    Marco-o1 not only focuses on disciplines with standard answers, such as mathematics, physics, and coding—which are well-suited for reinforcement learning (RL)—but also places greater emphasis on open-ended resolutions. We aim to address the question: "Can the o1 model effectively generalize to broader domains where clear standards are absent and rewards are challenging to quantify?"
+  overrides:
+    parameters:
+      model: Marco-o1.Q4_K_M.gguf
+  files:
+    - filename: Marco-o1.Q4_K_M.gguf
+      sha256: 54dd9554cb54609bf0bf4b367dfba192fc982a2fc6b87a0f56fba5ea82762d0d
+      uri: huggingface://QuantFactory/Marco-o1-GGUF/Marco-o1.Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "marco-o1-uncensored"
+  urls:
+    - https://huggingface.co/thirdeyeai/marco-o1-uncensored
+    - https://huggingface.co/QuantFactory/marco-o1-uncensored-GGUF
+  description: |
+    Uncensored version of marco-o1
+  overrides:
+    parameters:
+      model: marco-o1-uncensored.Q4_K_M.gguf
+  files:
+    - filename: marco-o1-uncensored.Q4_K_M.gguf
+      sha256: ad0440270a7254098f90779744d3e5b34fe49b7baf97c819909ba9c5648cc0d9
+      uri: huggingface://QuantFactory/marco-o1-uncensored-GGUF/marco-o1-uncensored.Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "minicpm-o-2_6"
+  icon: https://avatars.githubusercontent.com/u/89920203
+  urls:
+    - https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf
+    - https://huggingface.co/openbmb/MiniCPM-o-2_6
+  description: |
+    MiniCPM-o 2.6 is the latest and most capable model in the MiniCPM-o series. The model is built in an end-to-end fashion based on SigLip-400M, Whisper-medium-300M, ChatTTS-200M, and Qwen2.5-7B with a total of 8B parameters
+  tags:
+    - llm
+    - multimodal
+    - gguf
+    - gpu
+    - qwen2
+    - cpu
+  overrides:
+    mmproj: minicpm-o-2_6-mmproj-f16.gguf
+    parameters:
+      model: minicpm-o-2_6-Q4_K_M.gguf
+  files:
+    - filename: minicpm-o-2_6-Q4_K_M.gguf
+      sha256: 4f635fc0c0bb88d50ccd9cf1f1e5892b5cb085ff88fe0d8e1148fd9a8a836bc2
+      uri: huggingface://openbmb/MiniCPM-o-2_6-gguf/Model-7.6B-Q4_K_M.gguf
+    - filename: minicpm-o-2_6-mmproj-f16.gguf
+      sha256: efa4f7d96aa0f838f2023fc8d28e519179b16f1106777fa9280b32628191aa3e
+      uri: huggingface://openbmb/MiniCPM-o-2_6-gguf/mmproj-model-f16.gguf
+- !!merge <<: *qwen2
+  name: "minicpm-v-2_6"
+  license: apache-2.0
+  icon: https://avatars.githubusercontent.com/u/89920203
+  urls:
+    - https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf
+    - https://huggingface.co/openbmb/MiniCPM-V-2_6
+  description: |
+    MiniCPM-V 2.6 is the latest and most capable model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters
+  tags:
+    - llm
+    - multimodal
+    - gguf
+    - gpu
+    - qwen2
+    - cpu
+  overrides:
+    mmproj: minicpm-v-2_6-mmproj-f16.gguf
+    parameters:
+      model: minicpm-v-2_6-Q4_K_M.gguf
+  files:
+    - filename: minicpm-v-2_6-Q4_K_M.gguf
+      sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
+      uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
+    - filename: minicpm-v-2_6-mmproj-f16.gguf
+      uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
+      sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
+- !!merge <<: *qwen2
+  name: "taid-llm-1.5b"
+  icon: https://sakana.ai/assets/taid-jp/cover_large.jpeg
+  urls:
+    - https://huggingface.co/SakanaAI/TAID-LLM-1.5B
+    - https://huggingface.co/bartowski/TAID-LLM-1.5B-GGUF
+  description: |
+    TAID-LLM-1.5B is an English language model created through TAID (Temporally Adaptive Interpolated Distillation), our new knowledge distillation method. We used Qwen2-72B-Instruct as the teacher model and Qwen2-1.5B-Instruct as the student model.
+  overrides:
+    parameters:
+      model: TAID-LLM-1.5B-Q4_K_M.gguf
+  files:
+    - filename: TAID-LLM-1.5B-Q4_K_M.gguf
+      sha256: dbffc989d12d42ef8e4a2994e102d7ec7a02c49ec08ea2e35426372ad07b4cd8
+      uri: huggingface://bartowski/TAID-LLM-1.5B-GGUF/TAID-LLM-1.5B-Q4_K_M.gguf
 - &mistral03
-  ## START Mistral
-  url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
+  url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master" ## START Mistral
   name: "mistral-7b-instruct-v0.3"
   icon: https://cdn-avatars.huggingface.co/v1/production/uploads/62dac1c7a8ead43d20e3e17a/wrLf5yaGC6ng4XME70w6Z.png
   license: apache-2.0
@@ -1192,8 +6276,8 @@
       model: Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
   files:
     - filename: Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
-      sha256: 1a8b92fb546a80dce78151e4908f7bdb2c11fb3ef52af960e4bbe319a9cc5052
       uri: huggingface://bartowski/Mistral-Nemo-Instruct-2407-GGUF/Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
+      sha256: 7c1a10d202d8788dbe5628dc962254d10654c853cae6aaeca0618f05490d4a46
 - !!merge <<: *mistral03
   name: "lumimaid-v0.2-12b"
   icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/ep3ojmuMkFS-GmgRuI9iB.png
@@ -1278,6 +6362,21 @@
     - filename: Pantheon-RP-1.6-12b-Nemo-Q4_K_M.gguf
       sha256: cf3465c183bf4ecbccd1b6b480f687e0160475b04c87e2f1e5ebc8baa0f4c7aa
       uri: huggingface://bartowski/Pantheon-RP-1.6-12b-Nemo-GGUF/Pantheon-RP-1.6-12b-Nemo-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "acolyte-22b-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6569a4ed2419be6072890cf8/3dcGMcrWK2-2vQh9QBt3o.png
+  urls:
+    - https://huggingface.co/rAIfle/Acolyte-22B
+    - https://huggingface.co/mradermacher/Acolyte-22B-i1-GGUF
+  description: |
+    LoRA of a bunch of random datasets on top of Mistral-Small-Instruct-2409, then SLERPed onto base at 0.5. Decent enough for its size. Check the LoRA for dataset info.
+  overrides:
+    parameters:
+      model: Acolyte-22B.i1-Q4_K_M.gguf
+  files:
+    - filename: Acolyte-22B.i1-Q4_K_M.gguf
+      sha256: 5a454405b98b6f886e8e4c695488d8ea098162bb8c46f2a7723fc2553c6e2f6e
+      uri: huggingface://mradermacher/Acolyte-22B-i1-GGUF/Acolyte-22B.i1-Q4_K_M.gguf
 - !!merge <<: *mistral03
   name: "mn-12b-lyra-v4-iq-imatrix"
   icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/dVoru83WOpwVjMlgZ_xhA.png
@@ -1286,8 +6385,8 @@
   urls:
     - https://huggingface.co/Lewdiculous/MN-12B-Lyra-v4-GGUF-IQ-Imatrix
   description: |
-      A finetune of Mistral Nemo by Sao10K.
-      Uses the ChatML prompt format.
+    A finetune of Mistral Nemo by Sao10K.
+    Uses the ChatML prompt format.
   overrides:
     parameters:
       model: MN-12B-Lyra-v4-Q4_K_M-imat.gguf
@@ -1295,9 +6394,460 @@
     - filename: MN-12B-Lyra-v4-Q4_K_M-imat.gguf
       sha256: 1989123481ca1936c8a2cbe278ff5d1d2b0ae63dbdc838bb36a6d7547b8087b3
       uri: huggingface://Lewdiculous/MN-12B-Lyra-v4-GGUF-IQ-Imatrix/MN-12B-Lyra-v4-Q4_K_M-imat.gguf
+- !!merge <<: *mistral03
+  name: "magnusintellectus-12b-v1-i1"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/66b564058d9afb7a9d5607d5/hUVJI1Qa4tCMrZWMgYkoD.png
+  urls:
+    - https://huggingface.co/GalrionSoftworks/MagnusIntellectus-12B-v1
+    - https://huggingface.co/mradermacher/MagnusIntellectus-12B-v1-i1-GGUF
+  description: |
+    How pleasant, the rocks appear to have made a decent conglomerate. A-.
+
+    MagnusIntellectus is a merge of the following models using LazyMergekit:
+
+        UsernameJustAnother/Nemo-12B-Marlin-v5
+        anthracite-org/magnum-12b-v2
+  overrides:
+    parameters:
+      model: MagnusIntellectus-12B-v1.i1-Q4_K_M.gguf
+  files:
+    - filename: MagnusIntellectus-12B-v1.i1-Q4_K_M.gguf
+      sha256: c97107983b4edc5b6f2a592d227ca2dd4196e2af3d3bc0fe6b7a8954a1fb5870
+      uri: huggingface://mradermacher/MagnusIntellectus-12B-v1-i1-GGUF/MagnusIntellectus-12B-v1.i1-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "mn-backyardai-party-12b-v1-iq-arm-imatrix"
+  icon: https://huggingface.co/Sao10K/MN-BackyardAI-Party-12B-v1/resolve/main/party1.png
+  urls:
+    - https://huggingface.co/Sao10K/MN-BackyardAI-Party-12B-v1
+    - https://huggingface.co/Lewdiculous/MN-BackyardAI-Party-12B-v1-GGUF-IQ-ARM-Imatrix
+  description: |
+    This is a group-chat based roleplaying model, based off of 12B-Lyra-v4a2, a variant of Lyra-v4 that is currently private.
+
+    It is trained on an entirely human-based dataset, based on forum / internet group roleplaying styles. The only augmentation done with LLMs is to the character sheets, to fit to the system prompt, to fit various character sheets within context.
+
+    This model is still capable of 1 on 1 roleplay, though I recommend using ChatML when doing that instead.
+  overrides:
+    parameters:
+      model: MN-BackyardAI-Party-12B-v1-Q4_K_M-imat.gguf
+  files:
+    - filename: MN-BackyardAI-Party-12B-v1-Q4_K_M-imat.gguf
+      sha256: cea68768dff58b553974b755bb40ef790ab8b86866d9b5c46bc2e6c3311b876a
+      uri: huggingface://Lewdiculous/MN-BackyardAI-Party-12B-v1-GGUF-IQ-ARM-Imatrix/MN-BackyardAI-Party-12B-v1-Q4_K_M-imat.gguf
+- !!merge <<: *mistral03
+  name: "ml-ms-etheris-123b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/ieEjL3TxpDM3WAZQcya6E.png
+  urls:
+    - https://huggingface.co/Steelskull/ML-MS-Etheris-123B
+    - https://huggingface.co/mradermacher/ML-MS-Etheris-123B-GGUF
+  description: |
+    This model merges the robust storytelling of mutiple models while attempting to maintain intelligence. The final model was merged after Model Soup with DELLA to add some specal sause.
+      - model: NeverSleep/Lumimaid-v0.2-123B
+      - model: TheDrummer/Behemoth-123B-v1
+      - model: migtissera/Tess-3-Mistral-Large-2-123B
+      - model: anthracite-org/magnum-v2-123b
+    Use Mistral, ChatML, or Meth Format
+  overrides:
+    parameters:
+      model: ML-MS-Etheris-123B.Q2_K.gguf
+  files:
+    - filename: ML-MS-Etheris-123B.Q2_K.gguf
+      sha256: a17c5615413b5c9c8d01cf55386573d0acd00e01f6e2bcdf492624c73c593fc3
+      uri: huggingface://mradermacher/ML-MS-Etheris-123B-GGUF/ML-MS-Etheris-123B.Q2_K.gguf
+- !!merge <<: *mistral03
+  name: "mn-lulanum-12b-fix-i1"
+  urls:
+    - https://huggingface.co/djuna/MN-Lulanum-12B-FIX
+    - https://huggingface.co/mradermacher/MN-Lulanum-12B-FIX-i1-GGUF
+  description: |
+    This model was merged using the della_linear merge method using unsloth/Mistral-Nemo-Base-2407 as a base.
+    The following models were included in the merge:
+        VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct
+        anthracite-org/magnum-v2.5-12b-kto
+        Undi95/LocalC-12B-e2.0
+        NeverSleep/Lumimaid-v0.2-12B
+  overrides:
+    parameters:
+      model: MN-Lulanum-12B-FIX.i1-Q4_K_M.gguf
+  files:
+    - filename: MN-Lulanum-12B-FIX.i1-Q4_K_M.gguf
+      sha256: 7e24d57249059d45bb508565ec3055e585a4e658c1815c67ea92397acc6aa775
+      uri: huggingface://mradermacher/MN-Lulanum-12B-FIX-i1-GGUF/MN-Lulanum-12B-FIX.i1-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "tor-8b"
+  icon: https://huggingface.co/Delta-Vector/Tor-8B/resolve/main/FinalTor8B.jpg
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/QuantFactory/Tor-8B-GGUF
+  description: |
+    An earlier checkpoint of Darkens-8B using the same configuration that i felt was different enough from it's 4 epoch cousin to release, Finetuned ontop of the Prune/Distill NeMo 8B done by Nvidia, This model aims to have generally good prose and writing while not falling into claude-isms.
+  overrides:
+    parameters:
+      model: Tor-8B.Q4_K_M.gguf
+  files:
+    - filename: Tor-8B.Q4_K_M.gguf
+      sha256: 9dd64bd886aa7682b6179340449b38feda405b44722ef7ac752cedb807af370e
+      uri: huggingface://QuantFactory/Tor-8B-GGUF/Tor-8B.Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "darkens-8b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/Delta-Vector/Darkens-8B
+    - https://huggingface.co/QuantFactory/Darkens-8B-GGUF
+  description: |
+    This is the fully cooked, 4 epoch version of Tor-8B, this is an experimental version, despite being trained for 4 epochs, the model feels fresh and new and is not overfit, This model aims to have generally good prose and writing while not falling into claude-isms, it follows the actions "dialogue" format heavily.
+  overrides:
+    parameters:
+      model: Darkens-8B.Q4_K_M.gguf
+  files:
+    - filename: Darkens-8B.Q4_K_M.gguf
+      sha256: f56a483e10fd00957460adfc16ee462cecac892a4fb44dc59e466e68a360fd42
+      uri: huggingface://QuantFactory/Darkens-8B-GGUF/Darkens-8B.Q4_K_M.gguf
+- !!merge <<: *mistral03
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "starcannon-unleashed-12b-v1.0"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6720ed503a24966ac66495e8/HXc0AxPLkoIC1fy0Pb3Pb.png
+  urls:
+    - https://huggingface.co/VongolaChouko/Starcannon-Unleashed-12B-v1.0
+    - https://huggingface.co/QuantFactory/Starcannon-Unleashed-12B-v1.0-GGUF
+  description: |
+    This is a merge of pre-trained language models created using mergekit.
+    MarinaraSpaghetti_NemoMix-Unleashed-12B
+    Nothingiisreal_MN-12B-Starcannon-v3
+  overrides:
+    parameters:
+      model: Starcannon-Unleashed-12B-v1.0.Q4_K_M.gguf
+  files:
+    - filename: Starcannon-Unleashed-12B-v1.0.Q4_K_M.gguf
+      sha256: b32c6582d75d2f1d67d567badc691a1338dd1a016c71efbfaf4c91812f398f0e
+      uri: huggingface://QuantFactory/Starcannon-Unleashed-12B-v1.0-GGUF/Starcannon-Unleashed-12B-v1.0.Q4_K_M.gguf
+- !!merge <<: *mistral03
+  icon: https://cdn-uploads.huggingface.co/production/uploads/645cfe4603fc86c46b3e46d1/CATNxzDDJL6xHR4tc4IMf.jpeg
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "valor-7b-v0.1"
+  urls:
+    - https://huggingface.co/NeuralNovel/Valor-7B-v0.1
+    - https://huggingface.co/mradermacher/Valor-7B-v0.1-GGUF
+  description: |
+    Valor speaks louder than words.
+
+    This is a qlora finetune of blockchainlabs_7B_merged_test2_4 using the Neural-Story-v0.1 dataset, with the intention of increasing creativity and writing ability.
+  overrides:
+    parameters:
+      model: Valor-7B-v0.1.Q4_K_M.gguf
+  files:
+    - filename: Valor-7B-v0.1.Q4_K_M.gguf
+      sha256: 2b695fe53d64b36c3eea68f1fa0809f30560aa97ce8b71c16f371c2dc262d9b8
+      uri: huggingface://mradermacher/Valor-7B-v0.1-GGUF/Valor-7B-v0.1.Q4_K_M.gguf
+- !!merge <<: *mistral03
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "mn-tiramisu-12b"
+  icon: https://huggingface.co/matchaaaaa/MN-Tiramisu-12B/resolve/main/tiramisu-cute.png
+  urls:
+    - https://huggingface.co/matchaaaaa/MN-Tiramisu-12B
+    - https://huggingface.co/MaziyarPanahi/MN-Tiramisu-12B-GGUF
+  description: |
+    This is a really yappity-yappy yapping model that's good for long-form RP. Tried to rein it in with Mahou and give it some more character understanding with Pantheon. Feedback is always welcome.
+  overrides:
+    parameters:
+      model: MN-Tiramisu-12B.Q5_K_M.gguf
+  files:
+    - filename: MN-Tiramisu-12B.Q5_K_M.gguf
+      sha256: 100c78b08a0f4fc5a5a65797e1498ff5fd6fc9daf96b0898d2de731c35fa4e3e
+      uri: huggingface://MaziyarPanahi/MN-Tiramisu-12B-GGUF/MN-Tiramisu-12B.Q5_K_M.gguf
+- !!merge <<: *mistral03
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "mistral-nemo-prism-12b"
+  icon: https://huggingface.co/nbeerbower/Mistral-Nemo-Prism-12B/resolve/main/prism-cover.png
+  urls:
+    - https://huggingface.co/nbeerbower/Mistral-Nemo-Prism-12B
+    - https://huggingface.co/bartowski/Mistral-Nemo-Prism-12B-GGUF
+  description: |
+    Mahou-1.5-mistral-nemo-12B-lorablated finetuned on Arkhaios-DPO and Purpura-DPO.
+    The goal was to reduce archaic language and purple prose in a completely uncensored model.
+  overrides:
+    parameters:
+      model: Mistral-Nemo-Prism-12B-Q4_K_M.gguf
+  files:
+    - filename: Mistral-Nemo-Prism-12B-Q4_K_M.gguf
+      sha256: 96b922c6d55d94ffb91e869b8cccaf2b6dc449d75b1456f4d4578c92c8184c25
+      uri: huggingface://bartowski/Mistral-Nemo-Prism-12B-GGUF/Mistral-Nemo-Prism-12B-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "magnum-12b-v2.5-kto-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/658a46cbfb9c2bdfae75b3a6/sWYs3iHkn36lw6FT_Y7nn.png
+  urls:
+    - https://huggingface.co/mradermacher/magnum-12b-v2.5-kto-i1-GGUF
+  description: |
+    v2.5 KTO is an experimental release; we are testing a hybrid reinforcement learning strategy of KTO + DPOP, using rejected data sampled from the original model as "rejected". For "chosen", we use data from the original finetuning dataset as "chosen". This was done on a limited portion of of primarily instruction following data; we plan to scale up a larger KTO dataset in the future for better generalization. This is the 5th in a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet and Opus. This model is fine-tuned on top of anthracite-org/magnum-12b-v2.
+  overrides:
+    parameters:
+      model: magnum-12b-v2.5-kto.i1-Q4_K_M.gguf
+  files:
+    - filename: magnum-12b-v2.5-kto.i1-Q4_K_M.gguf
+      sha256: 07e91d2c6d4e42312e65a69c54f16be467575f7a596fe052993b388e38b90d76
+      uri: huggingface://mradermacher/magnum-12b-v2.5-kto-i1-GGUF/magnum-12b-v2.5-kto.i1-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "chatty-harry_v3.0"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/66c1cc08453a7ef6c5fe657a/0KzNTEtn2kJJQsw4lQeY0.png
+  urls:
+    - https://huggingface.co/Triangle104/Chatty-Harry_V3.0
+    - https://huggingface.co/QuantFactory/Chatty-Harry_V3.0-GGUF
+  description: |
+    This model was merged using the TIES merge method using Triangle104/ChatWaifu_Magnum_V0.2 as a base.
+    The following models were included in the merge: elinas/Chronos-Gold-12B-1.0
+  overrides:
+    parameters:
+      model: Chatty-Harry_V3.0.Q4_K_M.gguf
+  files:
+    - filename: Chatty-Harry_V3.0.Q4_K_M.gguf
+      sha256: 54b63bb74498576ca77b801ed096657a93cc2f6b71d707c3605fdb394bd3e622
+      uri: huggingface://QuantFactory/Chatty-Harry_V3.0-GGUF/Chatty-Harry_V3.0.Q4_K_M.gguf
+- !!merge <<: *mistral03
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "mn-chunky-lotus-12b"
+  icon: https://huggingface.co/FallenMerick/MN-Chunky-Lotus-12B/resolve/main/chunky-lotus.jpg
+  urls:
+    - https://huggingface.co/QuantFactory/MN-Chunky-Lotus-12B-GGUF
+  description: |
+    I had originally planned to use this model for future/further merges, but decided to go ahead and release it since it scored rather high on my local EQ Bench testing (79.58 w/ 100% parsed @ 8-bit).
+    Bear in mind that most models tend to score a bit higher on my own local tests as compared to their posted scores. Still, its the highest score I've personally seen from all the models I've tested.
+    Its a decent model, with great emotional intelligence and acceptable adherence to various character personalities. It does a good job at roleplaying despite being a bit bland at times.
+
+    Overall, I like the way it writes, but it has a few formatting issues that show up from time to time, and it has an uncommon tendency to paste walls of character feelings/intentions at the end of some outputs without any prompting. This is something I hope to correct with future iterations.
+    This is a merge of pre-trained language models created using mergekit.
+    The following models were included in the merge:
+        Epiculous/Violet_Twilight-v0.2
+        nbeerbower/mistral-nemo-gutenberg-12B-v4
+        flammenai/Mahou-1.5-mistral-nemo-12B
+  overrides:
+    parameters:
+      model: MN-Chunky-Lotus-12B.Q4_K_M.gguf
+  files:
+    - filename: MN-Chunky-Lotus-12B.Q4_K_M.gguf
+      sha256: 363defe0a769fdb715dab75517966a0a80bcdd981a610d4c759099b6c8ff143a
+      uri: huggingface://QuantFactory/MN-Chunky-Lotus-12B-GGUF/MN-Chunky-Lotus-12B.Q4_K_M.gguf
+- !!merge <<: *mistral03
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "chronos-gold-12b-1.0"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/630417380907b9a115c6aa9f/3hc8zt8fzKdO3qHK1p1mW.webp
+  urls:
+    - https://huggingface.co/elinas/Chronos-Gold-12B-1.0
+    - https://huggingface.co/mradermacher/Chronos-Gold-12B-1.0-GGUF
+  description: |
+    Chronos Gold 12B 1.0 is a very unique model that applies to domain areas such as general chatbot functionatliy, roleplay, and storywriting. The model has been observed to write up to 2250 tokens in a single sequence. The model was trained at a sequence length of 16384 (16k) and will still retain the apparent 128k context length from Mistral-Nemo, though it deteriorates over time like regular Nemo does based on the RULER Test
+
+    As a result, is recommended to keep your sequence length max at 16384, or you will experience performance degredation.
+
+    The base model is mistralai/Mistral-Nemo-Base-2407 which was heavily modified to produce a more coherent model, comparable to much larger models.
+
+    Chronos Gold 12B-1.0 re-creates the uniqueness of the original Chronos with significiantly enhanced prompt adherence (following), coherence, a modern dataset, as well as supporting a majority of "character card" formats in applications like SillyTavern.
+
+    It went through an iterative and objective merge process as my previous models and was further finetuned on a dataset curated for it.
+
+    The specifics of the model will not be disclosed at the time due to dataset ownership.
+  overrides:
+    parameters:
+      model: Chronos-Gold-12B-1.0.Q4_K_M.gguf
+  files:
+    - filename: Chronos-Gold-12B-1.0.Q4_K_M.gguf
+      sha256: d75a6ed28781f0ea6fa6e58c0b25dfecdd160d4cab64aaf511ea156e99a1e1f3
+      uri: huggingface://mradermacher/Chronos-Gold-12B-1.0-GGUF/Chronos-Gold-12B-1.0.Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "naturallm-7b-instruct"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/qingy2024/NaturalLM-7B-Instruct
+    - https://huggingface.co/bartowski/NaturalLM-7B-Instruct-GGUF
+  description: |
+    This Mistral 7B fine-tune is trained (for 150 steps) to talk like a human, not a "helpful assistant"!
+    It's also very beta right now. The dataset (qingy2024/Natural-Text-ShareGPT) can definitely be improved.
+  overrides:
+    parameters:
+      model: NaturalLM-7B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: NaturalLM-7B-Instruct-Q4_K_M.gguf
+      sha256: 15b2f34116f690fea35790a9392b8a2190fe25827e370d426e88a2a543f4dcee
+      uri: huggingface://bartowski/NaturalLM-7B-Instruct-GGUF/NaturalLM-7B-Instruct-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "dans-personalityengine-v1.1.0-12b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/PocketDoc/Dans-PersonalityEngine-V1.1.0-12b
+    - https://huggingface.co/bartowski/Dans-PersonalityEngine-V1.1.0-12b-GGUF
+  description: |
+    This model series is intended to be multifarious in its capabilities and should be quite capable at both co-writing and roleplay as well as find itself quite at home performing sentiment analysis or summarization as part of a pipeline. It has been trained on a wide array of one shot instructions, multi turn instructions, tool use, role playing scenarios, text adventure games, co-writing, and much more.
+  overrides:
+    parameters:
+      model: Dans-PersonalityEngine-V1.1.0-12b-Q4_K_M.gguf
+  files:
+    - filename: Dans-PersonalityEngine-V1.1.0-12b-Q4_K_M.gguf
+      sha256: a1afb9fddfa3f2847ed710cc374b4f17e63a75f7e10d8871cf83983c2f5415ab
+      uri: huggingface://bartowski/Dans-PersonalityEngine-V1.1.0-12b-GGUF/Dans-PersonalityEngine-V1.1.0-12b-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "mn-12b-mag-mell-r1-iq-arm-imatrix"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/inflatebot/MN-12B-Mag-Mell-R1
+    - https://huggingface.co/Lewdiculous/MN-12B-Mag-Mell-R1-GGUF-IQ-ARM-Imatrix
+  description: |
+    This is a merge of pre-trained language models created using mergekit. Mag Mell is a multi-stage merge, Inspired by hyper-merges like Tiefighter and Umbral Mind. Intended to be a general purpose "Best of Nemo" model for any fictional, creative use case.
+    6 models were chosen based on 3 categories; they were then paired up and merged via layer-weighted SLERP to create intermediate "specialists" which are then evaluated in their domain. The specialists were then merged into the base via DARE-TIES, with hyperparameters chosen to reduce interference caused by the overlap of the three domains. The idea with this approach is to extract the best qualities of each component part, and produce models whose task vectors represent more than the sum of their parts.
+
+    The three specialists are as follows:
+        Hero (RP, kink/trope coverage): Chronos Gold, Sunrose.
+        Monk (Intelligence, groundedness): Bophades, Wissenschaft.
+        Deity (Prose, flair): Gutenberg v4, Magnum 2.5 KTO.
+    I've been dreaming about this merge since Nemo tunes started coming out in earnest. From our testing, Mag Mell demonstrates worldbuilding capabilities unlike any model in its class, comparable to old adventuring models like Tiefighter, and prose that exhibits minimal "slop" (not bad for no finetuning,) frequently devising electrifying metaphors that left us consistently astonished.
+
+    I don't want to toot my own bugle though; I'm really proud of how this came out, but please leave your feedback, good or bad.Special thanks as usual to Toaster for his feedback and Fizz for helping fund compute, as well as the KoboldAI Discord for their resources. The following models were included in the merge:
+    IntervitensInc/Mistral-Nemo-Base-2407-chatml
+    nbeerbower/mistral-nemo-bophades-12B
+    nbeerbower/mistral-nemo-wissenschaft-12B
+    elinas/Chronos-Gold-12B-1.0
+    Fizzarolli/MN-12b-Sunrose
+    nbeerbower/mistral-nemo-gutenberg-12B-v4
+    anthracite-org/magnum-12b-v2.5-kto
+  overrides:
+    parameters:
+      model: MN-12B-Mag-Mell-R1-Q4_K_M-imat.gguf
+  files:
+    - filename: MN-12B-Mag-Mell-R1-Q4_K_M-imat.gguf
+      sha256: ba0c9e64222b35f8c3828b7295e173ee54d83fd2e457ba67f6561a4a6d98481e
+      uri: huggingface://Lewdiculous/MN-12B-Mag-Mell-R1-GGUF-IQ-ARM-Imatrix/MN-12B-Mag-Mell-R1-Q4_K_M-imat.gguf
+- !!merge <<: *mistral03
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "captain-eris-diogenes_twilight-v0.420-12b-arm-imatrix"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/n0HUz-yRPkwQzt3dFrjW9.png
+  urls:
+    - https://huggingface.co/Nitral-AI/Captain-Eris-Diogenes_Twilight-V0.420-12B
+    - https://huggingface.co/Lewdiculous/Captain-Eris-Diogenes_Twilight-V0.420-12B-GGUF-ARM-Imatrix
+  description: |
+    The following models were included in the merge:
+        Nitral-AI/Captain-Eris_Twilight-V0.420-12B
+        Nitral-AI/Diogenes-12B-ChatMLified
+  overrides:
+    parameters:
+      model: Captain-Eris-Diogenes_Twighlight-V0.420-12B-Q4_K_M-imat.gguf
+  files:
+    - filename: Captain-Eris-Diogenes_Twighlight-V0.420-12B-Q4_K_M-imat.gguf
+      sha256: e70b26114108c41e3ca0aefc0c7b8f5f69452ab461ffe7155e6b75ede24ec1b5
+      uri: huggingface://Lewdiculous/Captain-Eris-Diogenes_Twilight-V0.420-12B-GGUF-ARM-Imatrix/Captain-Eris-Diogenes_Twighlight-V0.420-12B-Q4_K_M-imat.gguf
+- !!merge <<: *mistral03
+  name: "violet_twilight-v0.2"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64adfd277b5ff762771e4571/P962FQhRG4I8nbU_DJolY.png
+  urls:
+    - https://huggingface.co/Epiculous/Violet_Twilight-v0.2
+    - https://huggingface.co/Epiculous/Violet_Twilight-v0.2-GGUF
+  description: |
+    Now for something a bit different, Violet_Twilight-v0.2! This model is a SLERP merge of Azure_Dusk-v0.2 and Crimson_Dawn-v0.2!
+  overrides:
+    parameters:
+      model: Violet_Twilight-v0.2.Q4_K_M.gguf
+  files:
+    - filename: Violet_Twilight-v0.2.Q4_K_M.gguf
+      sha256: b63f07cc441146af9c98cd3c3d4390d7c39bfef11c1d168dc7c6244ca2ba6b12
+      uri: huggingface://Epiculous/Violet_Twilight-v0.2-GGUF/Violet_Twilight-v0.2.Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "sainemo-remix"
+  icon: https://huggingface.co/Moraliane/SAINEMO-reMIX/resolve/main/remixwife.webp
+  urls:
+    - https://huggingface.co/Moraliane/SAINEMO-reMIX
+    - https://huggingface.co/QuantFactory/SAINEMO-reMIX-GGUF
+  description: |
+    The following models were included in the merge:
+    elinas_Chronos-Gold-12B-1.0
+    Vikhrmodels_Vikhr-Nemo-12B-Instruct-R-21-09-24
+    MarinaraSpaghetti_NemoMix-Unleashed-12B
+  overrides:
+    parameters:
+      model: SAINEMO-reMIX.Q4_K_M.gguf
+  files:
+    - filename: SAINEMO-reMIX.Q4_K_M.gguf
+      sha256: 91c81623542df97462d93bed8014af4830940182786948fc395d8958a5add994
+      uri: huggingface://QuantFactory/SAINEMO-reMIX-GGUF/SAINEMO-reMIX.Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "nera_noctis-12b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/89XJnlNNSsEfBjI1oHCVt.jpeg
+  urls:
+    - https://huggingface.co/Nitral-AI/Nera_Noctis-12B
+    - https://huggingface.co/bartowski/Nera_Noctis-12B-GGUF
+  description: |
+    Sometimes, the brightest gems are found in the darkest places. For it is in the shadows where we learn to really see the light.
+  overrides:
+    parameters:
+      model: Nera_Noctis-12B-Q4_K_M.gguf
+  files:
+    - filename: Nera_Noctis-12B-Q4_K_M.gguf
+      sha256: 0662a9a847adde046e6255c15d5a677ebf09ab00841547c8963668d14baf00ff
+      uri: huggingface://bartowski/Nera_Noctis-12B-GGUF/Nera_Noctis-12B-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "wayfarer-12b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://huggingface.co/LatitudeGames/Wayfarer-12B/resolve/main/wayfarer.jpg
+  urls:
+    - https://huggingface.co/LatitudeGames/Wayfarer-12B
+    - https://huggingface.co/bartowski/Wayfarer-12B-GGUF
+  description: |
+    We’ve heard over and over from AI Dungeon players that modern AI models are too nice, never letting them fail or die. While it may be good for a chatbot to be nice and helpful, great stories and games aren’t all rainbows and unicorns. They have conflict, tension, and even death. These create real stakes and consequences for characters and the journeys they go on.
+
+    Similarly, great games need opposition. You must be able to fail, die, and may even have to start over. This makes games more fun!
+
+    However, the vast majority of AI models, through alignment RLHF, have been trained away from darkness, violence, or conflict, preventing them from fulfilling this role. To give our players better options, we decided to train our own model to fix these issues.
+
+    Wayfarer is an adventure role-play model specifically trained to give players a challenging and dangerous experience. We thought they would like it, but since releasing it on AI Dungeon, players have reacted even more positively than we expected.
+
+    Because they loved it so much, we’ve decided to open-source the model so anyone can experience unforgivingly brutal AI adventures! Anyone can download the model to run locally.
+
+    Or if you want to easily try this model for free, you can do so at https://aidungeon.com.
+
+    We plan to continue improving and open-sourcing similar models, so please share any and all feedback on how we can improve model behavior. Below we share more details on how Wayfarer was created.
+  overrides:
+    parameters:
+      model: Wayfarer-12B-Q4_K_M.gguf
+  files:
+    - filename: Wayfarer-12B-Q4_K_M.gguf
+      sha256: 6cd9f290c820c64854fcdcfd312b066447acc2f63abe2e2e71af9bc4f1946c08
+      uri: huggingface://bartowski/Wayfarer-12B-GGUF/Wayfarer-12B-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "mistral-small-24b-instruct-2501"
+  urls:
+    - https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501
+    - https://huggingface.co/bartowski/Mistral-Small-24B-Instruct-2501-GGUF
+  description: |
+    Mistral Small 3 ( 2501 ) sets a new benchmark in the "small" Large Language Models category below 70B, boasting 24B parameters and achieving state-of-the-art capabilities comparable to larger models!
+    This model is an instruction-fine-tuned version of the base model: Mistral-Small-24B-Base-2501.
+
+    Mistral Small can be deployed locally and is exceptionally "knowledge-dense", fitting in a single RTX 4090 or a 32GB RAM MacBook once quantized.
+  overrides:
+    parameters:
+      model: Mistral-Small-24B-Instruct-2501-Q4_K_M.gguf
+  files:
+    - filename: Mistral-Small-24B-Instruct-2501-Q4_K_M.gguf
+      sha256: d1a6d049f09730c3f8ba26cf6b0b60c89790b5fdafa9a59c819acdfe93fffd1b
+      uri: huggingface://bartowski/Mistral-Small-24B-Instruct-2501-GGUF/Mistral-Small-24B-Instruct-2501-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "krutrim-ai-labs_krutrim-2-instruct"
+  icon: https://avatars.githubusercontent.com/u/168750421?s=200&v=4
+  urls:
+    - https://huggingface.co/krutrim-ai-labs/Krutrim-2-instruct
+    - https://huggingface.co/bartowski/krutrim-ai-labs_Krutrim-2-instruct-GGUF
+  description: |
+    Krutrim-2 is a 12B parameter language model developed by the OLA Krutrim team. It is built on the Mistral-NeMo 12B architecture and trained across various domains, including web data, code, math, Indic languages, Indian context data, synthetic data, and books. Following pretraining, the model was finetuned for instruction following on diverse data covering a wide range of tasks, including knowledge recall, math, reasoning, coding, safety, and creative writing.
+  overrides:
+    parameters:
+      model: krutrim-ai-labs_Krutrim-2-instruct-Q4_K_M.gguf
+  files:
+    - filename: krutrim-ai-labs_Krutrim-2-instruct-Q4_K_M.gguf
+      sha256: 03aa6d1fb7ab70482a2242839b8d8e1c789aa90a8be415076ddf84bef65f06c7
+      uri: huggingface://bartowski/krutrim-ai-labs_Krutrim-2-instruct-GGUF/krutrim-ai-labs_Krutrim-2-instruct-Q4_K_M.gguf
 - &mudler
-  ### START mudler's LocalAI specific-models
-  url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
+  url: "github:mudler/LocalAI/gallery/mudler.yaml@master" ### START mudler's LocalAI specific-models
   name: "LocalAI-llama3-8b-function-call-v0.2"
   icon: "https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/us5JKi9z046p8K-cn_M0w.webp"
   license: llama3
@@ -1342,11 +6892,11 @@
       sha256: 579cbb229f9c11d0330759ff4733102d2491615a4c61289e26c09d1b3a583fec
       uri: huggingface://mudler/Mirai-Nova-Llama3-LocalAI-8B-v0.1-GGUF/Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin
 - &parler-tts
-  ### START parler-tts
-  url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master"
+  url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master" ### START parler-tts
   name: parler-tts-mini-v0.1
-  parameters:
-    model: parler-tts/parler_tts_mini_v0.1
+  overrides:
+    parameters:
+      model: parler-tts/parler_tts_mini_v0.1
   license: apache-2.0
   description: |
     Parler-TTS is a lightweight text-to-speech (TTS) model that can generate high-quality, natural sounding speech in the style of a given speaker (gender, pitch, speaking style, etc). It is a reproduction of work from the paper Natural language guidance of high-fidelity text-to-speech with synthetic annotations by Dan Lyth and Simon King, from Stability AI and Edinburgh University respectively.
@@ -1359,8 +6909,7 @@
     - text-to-speech
     - python
 - &rerankers
-  ### START rerankers
-  url: "github:mudler/LocalAI/gallery/rerankers.yaml@master"
+  url: "github:mudler/LocalAI/gallery/rerankers.yaml@master" ### START rerankers
   name: cross-encoder
   parameters:
     model: cross-encoder
@@ -1399,6 +6948,7 @@
 - &gemma
   url: "github:mudler/LocalAI/gallery/gemma.yaml@master"
   name: "gemma-2b"
+  icon: https://avatars.githubusercontent.com/u/1342004
   license: gemma
   urls:
     - https://ai.google.dev/gemma/docs
@@ -1814,7 +7364,7 @@
     - https://huggingface.co/EpistemeAI/Athena-codegemma-2-2b-it
     - https://huggingface.co/mradermacher/Athena-codegemma-2-2b-it-GGUF
   description: |
-      Supervised fine tuned (sft unsloth) for coding with EpistemeAI coding dataset.
+    Supervised fine tuned (sft unsloth) for coding with EpistemeAI coding dataset.
   overrides:
     parameters:
       model: Athena-codegemma-2-2b-it.Q4_K_M.gguf
@@ -1850,9 +7400,286 @@
     - filename: datagemma-rig-27b-it-Q4_K_M.gguf
       sha256: a6738ffbb49b6c46d220e2793df85c0538e9ac72398e32a0914ee5e55c3096ad
       uri: huggingface://bartowski/datagemma-rig-27b-it-GGUF/datagemma-rig-27b-it-Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "buddy-2b-v1"
+  urls:
+    - https://huggingface.co/TheDrummer/Buddy-2B-v1
+    - https://huggingface.co/bartowski/Buddy-2B-v1-GGUF
+  description: |
+    Buddy is designed as an empathetic language model, aimed at fostering introspection, self-reflection, and personal growth through thoughtful conversation. Buddy won't judge and it won't dismiss your concerns. Get some self-care with Buddy.
+  overrides:
+    parameters:
+      model: Buddy-2B-v1-Q4_K_M.gguf
+  files:
+    - filename: Buddy-2B-v1-Q4_K_M.gguf
+      sha256: 9bd25ed907d1a3c2e07fe09399a9b3aec107d368c29896e2c46facede5b7e3d5
+      uri: huggingface://bartowski/Buddy-2B-v1-GGUF/Buddy-2B-v1-Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "gemma-2-9b-arliai-rpmax-v1.1"
+  urls:
+    - https://huggingface.co/ArliAI/Gemma-2-9B-ArliAI-RPMax-v1.1
+    - https://huggingface.co/bartowski/Gemma-2-9B-ArliAI-RPMax-v1.1-GGUF
+  description: |
+    RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.
+  overrides:
+    parameters:
+      model: Gemma-2-9B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
+  files:
+    - filename: Gemma-2-9B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
+      sha256: 1724aff0ad6f71bf4371d839aca55578f7ec6f030d8d25c0254126088e4c6250
+      uri: huggingface://bartowski/Gemma-2-9B-ArliAI-RPMax-v1.1-GGUF/Gemma-2-9B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "gemma-2-2b-arliai-rpmax-v1.1"
+  urls:
+    - https://huggingface.co/bartowski/Gemma-2-2B-ArliAI-RPMax-v1.1-GGUF
+  description: |
+    RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.
+  overrides:
+    parameters:
+      model: Gemma-2-2B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
+  files:
+    - filename: Gemma-2-2B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
+      sha256: 89fe35345754d7e9de8d0c0d5bf35b2be9b12a09811b365b712b8b27112f7712
+      uri: huggingface://bartowski/Gemma-2-2B-ArliAI-RPMax-v1.1-GGUF/Gemma-2-2B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "gemma-2-9b-it-abliterated"
+  urls:
+    - https://huggingface.co/IlyaGusev/gemma-2-9b-it-abliterated
+    - https://huggingface.co/bartowski/gemma-2-9b-it-abliterated-GGUF
+  description: |
+    Abliterated version of google/gemma-2-9b-it.
+
+    The abliteration script (link) is based on code from the blog post and heavily uses TransformerLens. The only major difference from the code used for Llama is scaling the embedding layer back.
+
+    Orthogonalization did not produce the same results as regular interventions since there are RMSNorm layers before merging activations into the residual stream. However, the final model still seems to be uncensored.
+  overrides:
+    parameters:
+      model: gemma-2-9b-it-abliterated-Q4_K_M.gguf
+  files:
+    - filename: gemma-2-9b-it-abliterated-Q4_K_M.gguf
+      sha256: 88d84ac9796732c10f6c58e0feb4db8e04c05d74bdb7047a5e37906a589896e1
+      uri: huggingface://bartowski/gemma-2-9b-it-abliterated-GGUF/gemma-2-9b-it-abliterated-Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "gemma-2-ataraxy-v3i-9b"
+  urls:
+    - https://huggingface.co/QuantFactory/Gemma-2-Ataraxy-v3i-9B-GGUF
+  description: |
+    Gemma-2-Ataraxy-v3i-9B is an experimental model that replaces the simpo model in the original recipe with a different simpo model and a writing model trained on Gutenberg, using a higher density. It is a merge of pre-trained language models created using mergekit, with della merge method using unsloth/gemma-2-9b-it as the base. The models included in the merge are nbeerbower/Gemma2-Gutenberg-Doppel-9B, ifable/gemma-2-Ifable-9B, and wzhouad/gemma-2-9b-it-WPO-HB. It has been quantized using llama.cpp.
+  overrides:
+    parameters:
+      model: Gemma-2-Ataraxy-v3i-9B.Q4_K_M.gguf
+  files:
+    - filename: Gemma-2-Ataraxy-v3i-9B.Q4_K_M.gguf
+      sha256: f14c5b9373d4058f0f812c6c34184addeb4aeeecb02a7bbcf9844d9afc8d0066
+      uri: huggingface://QuantFactory/Gemma-2-Ataraxy-v3i-9B-GGUF/Gemma-2-Ataraxy-v3i-9B.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "apollo2-9b"
+  url: "github:mudler/LocalAI/gallery/vicuna-chat.yaml@master"
+  urls:
+    - https://huggingface.co/mradermacher/Apollo2-9B-GGUF
+  description: |
+    Covering 12 Major Languages including English, Chinese, French, Hindi, Spanish, Arabic, Russian, Japanese, Korean, German, Italian, Portuguese and 38 Minor Languages So far.
+  overrides:
+    parameters:
+      model: Apollo2-9B.Q4_K_M.gguf
+  files:
+    - filename: Apollo2-9B.Q4_K_M.gguf
+      sha256: 9fdb63f78e574558a4f33782eca88716eea28e90ea3ae36c381769cde6b81e0f
+      uri: huggingface://mradermacher/Apollo2-9B-GGUF/Apollo2-9B.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "darkest-muse-v1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65ad56b4c2eef2ba1154618c/0AB6uPPuCvbNtRZb3Rdj1.png
+  urls:
+    - https://huggingface.co/sam-paech/Darkest-muse-v1
+    - https://huggingface.co/bartowski/Darkest-muse-v1-GGUF
+  description: |
+    This is a creative writing merge of two very different models that I trained on the brand new Gutenberg3 dataset, plus Ataraxy-v2 in the mix.
+
+    It's lost much of the slop and tryhard vocab flexing and positivity bias that's typical of these models and writes in its own voice.
+
+    The main source model in the merge, Quill-v1, inherited a natural, spare prose from the human writing in the gutenberg set. The other source model, Delirium-v1, got overcooked in SIMPO training; it has crazy panache, a really dark flair for the grotesque, and has some mental issues. These two source models balance each other out in the merge, resulting in something pretty unique.
+
+    It seems to be quite uncensored and creative. Since Delirium was pushed right to the edge during training, the merge may exhibit some of its weirdness and word / concept fixations. This may be mitigated by using custom anti-slop lists.
+
+    The payoff is a really creative, stream of consciousness style of writing, with punchy dialogue that I haven't seen in other models. Oh, it also scored around the top of the EQ-Bench creative writing leaderboard!
+  overrides:
+    parameters:
+      model: Darkest-muse-v1-Q4_K_M.gguf
+  files:
+    - filename: Darkest-muse-v1-Q4_K_M.gguf
+      sha256: a19ec9e3dc875511ea771bf363e71e7ae5578986b2f8cf50aeb50683d56e9b76
+      uri: huggingface://bartowski/Darkest-muse-v1-GGUF/Darkest-muse-v1-Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "quill-v1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65ad56b4c2eef2ba1154618c/gnMF8gRhurS9RcoylAK1Y.png
+  urls:
+    - https://huggingface.co/sam-paech/Quill-v1
+    - https://huggingface.co/QuantFactory/Quill-v1-GGUF
+  description: |
+    Quill is a capable, humanlike writing model trained on a large dataset of late 19th and early 20th century writing from the Gutenberg Project. This model writes with a natural cadence and low gpt-slop, having inherited some human qualities from the Gutenberg3 dataset. It writes with more simple, spare prose than the typical overly-adjectived LLM writing style.
+
+    This model was trained using gemma-2-9b-it as the base. The training methods used were ORPO (gently) then SIMPO (less gently).
+  overrides:
+    parameters:
+      model: Quill-v1.Q4_K_M.gguf
+  files:
+    - filename: Quill-v1.Q4_K_M.gguf
+      sha256: 419a7e0709b28130ca56941308d11c06a3548b8eacb081fb6a2c3d1622ac56b3
+      uri: huggingface://QuantFactory/Quill-v1-GGUF/Quill-v1.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "delirium-v1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65ad56b4c2eef2ba1154618c/TDY0sDC9vMohMM8dn_5YN.png
+  urls:
+    - https://huggingface.co/sam-paech/Delirium-v1
+    - https://huggingface.co/QuantFactory/Delirium-v1-GGUF
+  description: |
+    This model was cooked a bit too long during SIMPO training. It writes like Hunter S. Thompson 2 days into an ether binge. It's grotesque, dark, grimy and genius.
+
+    It's trained on an experimental gutenberg + antislop dataset. This contains the original two gutenberg sets by jondurbin and nbeerbower, as well as a subset of my own set, gutenberg3. The antislop pairs were generated with gemma-2-9b-it, with one sample generated with the AntiSlop sampler and the rejected sample generated without.
+  overrides:
+    parameters:
+      model: Delirium-v1.Q4_K_M.gguf
+  files:
+    - filename: Delirium-v1.Q4_K_M.gguf
+      sha256: 9c274913572b8afcd5f18f0230f9ddf0a972bae36bae5b0fe8266b29a5dd06a7
+      uri: huggingface://QuantFactory/Delirium-v1-GGUF/Delirium-v1.Q4_K_M.gguf
+- !!merge <<: *gemma
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "magnum-v4-9b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/658a46cbfb9c2bdfae75b3a6/vxYDYerLy2vD8n05nL2WU.png
+  urls:
+    - https://huggingface.co/anthracite-org/magnum-v4-9b
+    - https://huggingface.co/QuantFactory/magnum-v4-9b-GGUF
+  description: |
+    This is a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet and Opus.
+
+    This model is fine-tuned on top of gemma 2 9b (chatML'ified).
+  overrides:
+    parameters:
+      model: magnum-v4-9b.Q4_K_M.gguf
+  files:
+    - filename: magnum-v4-9b.Q4_K_M.gguf
+      sha256: 176cb8cbac1920d98853a079d635d581c2063b7ff337e88bf9f28b43f8c7eb23
+      uri: huggingface://QuantFactory/magnum-v4-9b-GGUF/magnum-v4-9b.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "g2-9b-aletheia-v1"
+  icon: https://huggingface.co/allura-org/G2-9B-Aletheia-v1/resolve/main/inpaint.png
+  urls:
+    - https://huggingface.co/allura-org/G2-9B-Aletheia-v1
+    - https://huggingface.co/QuantFactory/G2-9B-Aletheia-v1-GGUF
+  description: |
+    A merge of Sugarquill and Sunfall. I wanted to combine Sugarquill's more novel-like writing style with something that would improve it's RP perfomance and make it more steerable, w/o adding superfluous synthetic writing patterns.
+
+    I quite like Crestfall's Sunfall models and I felt like Gemma version of Sunfall will steer the model in this direction when merged in. To keep more of Gemma-2-9B-it-SPPO-iter3's smarts, I've decided to apply Sunfall LoRA on top of it, instead of using the published Sunfall model.
+
+    I'm generally pleased with the result, this model has nice, fresh writing style, good charcard adherence and good system prompt following. It still should work well for raw completion storywriting, as it's a trained feature in both merged models.
+  overrides:
+    parameters:
+      model: G2-9B-Aletheia-v1.Q4_K_M.gguf
+  files:
+    - filename: G2-9B-Aletheia-v1.Q4_K_M.gguf
+      sha256: d244cd3605ff5be948eb7faf1d9aa71ffbbfcf6dab77c08f6ec547818f443d03
+      uri: huggingface://QuantFactory/G2-9B-Aletheia-v1-GGUF/G2-9B-Aletheia-v1.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "g2-9b-sugarquill-v0"
+  icon: https://huggingface.co/allura-org/G2-9B-Sugarquill-v0/resolve/main/image_27.png
+  urls:
+    - https://huggingface.co/allura-org/G2-9B-Sugarquill-v0
+    - https://huggingface.co/QuantFactory/G2-9B-Sugarquill-v0-GGUF
+  description: |
+    An experimental continued pretrain of Gemma-2-9B-It-SPPO-Iter3 on assorted short story data from the web. I was trying to diversify Gemma's prose, without completely destroying it's smarts. I think I half-succeeded? This model could have used another epoch of training, but even this is already more creative and descriptive than it's base model, w/o becoming too silly. Doesn't seem to have degraded much in terms of core abilities as well. Should be usable both for RP and raw completion storywriting. I originally planned to use this in a merge, but I feel like this model is interesting enough to be released on it's own as well.
+
+    Model was trained by Auri.
+
+    Dedicated to Cahvay, who wanted a Gemma finetune from me for months by now, and to La Rata, who loves storywriter models.
+  overrides:
+    parameters:
+      model: G2-9B-Sugarquill-v0.Q4_K_M.gguf
+  files:
+    - filename: G2-9B-Sugarquill-v0.Q4_K_M.gguf
+      sha256: 790a2f1541011b2773e22aa863ef78c8662baaa7eca5875e9573007985120187
+      uri: huggingface://QuantFactory/G2-9B-Sugarquill-v0-GGUF/G2-9B-Sugarquill-v0.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "volare-i1"
+  urls:
+    - https://huggingface.co/MoxoffSpA/Volare
+    - https://huggingface.co/mradermacher/Volare-i1-GGUF
+  description: |
+    Volare is an updated version of Gemma7B, specifically fine-tuned with SFT and LoRA adjustments.
+        It's trained on publicly available datasets, like SQUAD-it, and datasets we've created in-house.
+        it's designed to understand and maintain context, making it ideal for Retrieval Augmented Generation (RAG) tasks and applications requiring contextual awareness.
+    Italian dataset.
+  overrides:
+    parameters:
+      model: Volare.i1-Q4_K_M.gguf
+  files:
+    - filename: Volare.i1-Q4_K_M.gguf
+      sha256: fa8fb9d4cb19fcb44be8d53561c9e2840f45aed738de545983ebb158ebba461b
+      uri: huggingface://mradermacher/Volare-i1-GGUF/Volare.i1-Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "bggpt-gemma-2-2.6b-it-v1.0"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/637e1f8cf7e01589cc17bf7e/p6d0YFHjWCQ3S12jWqO1m.png
+  urls:
+    - https://huggingface.co/QuantFactory/BgGPT-Gemma-2-2.6B-IT-v1.0-GGUF
+    - https://huggingface.co/QuantFactory/BgGPT-Gemma-2-2.6B-IT-v1.0-GGUF
+  description: |
+    INSAIT introduces BgGPT-Gemma-2-2.6B-IT-v1.0, a state-of-the-art Bulgarian language model based on google/gemma-2-2b and google/gemma-2-2b-it. BgGPT-Gemma-2-2.6B-IT-v1.0 is free to use and distributed under the Gemma Terms of Use. This model was created by INSAIT, part of Sofia University St. Kliment Ohridski, in Sofia, Bulgaria.
+    The model was built on top of Google’s Gemma 2 2B open models. It was continuously pre-trained on around 100 billion tokens (85 billion in Bulgarian) using the Branch-and-Merge strategy INSAIT presented at EMNLP’24, allowing the model to gain outstanding Bulgarian cultural and linguistic capabilities while retaining its English performance. During the pre-training stage, we use various datasets, including Bulgarian web crawl data, freely available datasets such as Wikipedia, a range of specialized Bulgarian datasets sourced by the INSAIT Institute, and machine translations of popular English datasets. The model was then instruction-fine-tuned on a newly constructed Bulgarian instruction dataset created using real-world conversations. For more information check our blogpost.
+  overrides:
+    parameters:
+      model: BgGPT-Gemma-2-2.6B-IT-v1.0.Q4_K_M.gguf
+  files:
+    - filename: BgGPT-Gemma-2-2.6B-IT-v1.0.Q4_K_M.gguf
+      sha256: 1e92fe80ccad80e97076ee26b002c2280f075dfe2507d534b46a4391a077f319
+      uri: huggingface://QuantFactory/BgGPT-Gemma-2-2.6B-IT-v1.0-GGUF/BgGPT-Gemma-2-2.6B-IT-v1.0.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "fusechat-gemma-2-9b-instruct"
+  icon: "https://huggingface.co/FuseAI/FuseChat-Gemma-2-9B-Instruct/resolve/main/FuseChat-3.0.png"
+  urls:
+    - https://huggingface.co/FuseAI/FuseChat-Gemma-2-9B-Instruct
+    - https://huggingface.co/bartowski/FuseChat-Gemma-2-9B-Instruct-GGUF
+  description: |
+    We present FuseChat-3.0, a series of models crafted to enhance performance by integrating the strengths of multiple source LLMs into more compact target LLMs. To achieve this fusion, we utilized four powerful source LLMs: Gemma-2-27B-It, Mistral-Large-Instruct-2407, Qwen-2.5-72B-Instruct, and Llama-3.1-70B-Instruct. For the target LLMs, we employed three widely-used smaller models—Llama-3.1-8B-Instruct, Gemma-2-9B-It, and Qwen-2.5-7B-Instruct—along with two even more compact models—Llama-3.2-3B-Instruct and Llama-3.2-1B-Instruct. The implicit model fusion process involves a two-stage training pipeline comprising Supervised Fine-Tuning (SFT) to mitigate distribution discrepancies between target and source LLMs, and Direct Preference Optimization (DPO) for learning preferences from multiple source LLMs. The resulting FuseChat-3.0 models demonstrated substantial improvements in tasks related to general conversation, instruction following, mathematics, and coding. Notably, when Llama-3.1-8B-Instruct served as the target LLM, our fusion approach achieved an average improvement of 6.8 points across 14 benchmarks. Moreover, it showed significant improvements of 37.1 and 30.1 points on instruction-following test sets AlpacaEval-2 and Arena-Hard respectively. We have released the FuseChat-3.0 models on Huggingface, stay tuned for the forthcoming dataset and code.
+  overrides:
+    parameters:
+      model: FuseChat-Gemma-2-9B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: FuseChat-Gemma-2-9B-Instruct-Q4_K_M.gguf
+      sha256: f5aef201be68f344bebff3433af87aac6428fd227adfd7e468c8bfbcf9660ece
+      uri: huggingface://bartowski/FuseChat-Gemma-2-9B-Instruct-GGUF/FuseChat-Gemma-2-9B-Instruct-Q4_K_M.gguf
+- !!merge <<: *gemma
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65bb837dbfb878f46c77de4c/Ayc6YKE6FKYKb8Mible4z.png
+  name: "gwq-9b-preview2"
+  urls:
+    - https://huggingface.co/prithivMLmods/GWQ-9B-Preview2
+    - https://huggingface.co/bartowski/GWQ-9B-Preview2-GGUF
+  description: |
+    GWQ2 - Gemma with Questions Prev is a family of lightweight, state-of-the-art open models from Google, built using the same research and technology employed to create the Gemini models. These models are text-to-text, decoder-only large language models, available in English, with open weights for both pre-trained and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. GWQ is fine-tuned on the Chain of Continuous Thought Synthetic Dataset, built upon the Gemma2forCasualLM architecture.
+  overrides:
+    parameters:
+      model: GWQ-9B-Preview2-Q4_K_M.gguf
+  files:
+    - filename: GWQ-9B-Preview2-Q4_K_M.gguf
+      sha256: 04da51cdb17c7e51594f6daac595161a46298b48ab5e568a85e65541d10a861f
+      uri: huggingface://bartowski/GWQ-9B-Preview2-GGUF/GWQ-9B-Preview2-Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "thedrummer_gemmasutra-pro-27b-v1.1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/SrHUGXD_dp55pobeJK36t.png
+  urls:
+    - https://huggingface.co/TheDrummer/Gemmasutra-Pro-27B-v1.1
+    - https://huggingface.co/bartowski/TheDrummer_Gemmasutra-Pro-27B-v1.1-GGUF
+  description: |
+    A Gemmasutra tune with modern techniques. Au Revoir, Gemma!
+  overrides:
+    parameters:
+      model: TheDrummer_Gemmasutra-Pro-27B-v1.1-Q4_K_M.gguf
+  files:
+    - filename: TheDrummer_Gemmasutra-Pro-27B-v1.1-Q4_K_M.gguf
+      sha256: 218a14f0bf8266f9e77d16b8b4f5cc1dc76e97eb582a2c97cca5a3a2c35de86b
+      uri: huggingface://bartowski/TheDrummer_Gemmasutra-Pro-27B-v1.1-GGUF/TheDrummer_Gemmasutra-Pro-27B-v1.1-Q4_K_M.gguf
 - &llama3
   url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
+  icon: https://avatars.githubusercontent.com/u/153379578
   name: "llama3-8b-instruct"
   license: llama3
   description: |
@@ -2019,10 +7846,9 @@
   name: "l3-8b-stheno-v3.1"
   urls:
     - https://huggingface.co/Sao10K/L3-8B-Stheno-v3.1
-  icon: https://w.forfun.com/fetch/cb/cba2205390e517bea1ea60ca0b491af4.jpeg
   description: |
     - A model made for 1-on-1 Roleplay ideally, but one that is able to handle scenarios, RPGs and storywriting fine.
-    - Uncensored during actual roleplay scenarios. # I do not care for zero-shot prompting like what some people do. It is uncensored enough in actual usecases.
+    - Uncensored during actual roleplay scenarios.  # I do not care for zero-shot prompting like what some people do. It is uncensored enough in actual usecases.
     - I quite like the prose and style for this model.
   overrides:
     parameters:
@@ -2813,7 +8639,6 @@
   urls:
     - https://huggingface.co/bartowski/New-Dawn-Llama-3-70B-32K-v1.0-GGUF
     - https://huggingface.co/sophosympatheia/New-Dawn-Llama-3-70B-32K-v1.0
-  icon: https://imgur.com/tKzncGo.png
   description: |
     This model is a multi-level SLERP merge of several Llama 3 70B variants. See the merge recipe below for details. I extended the context window for this model out to 32K by snagging some layers from abacusai/Smaug-Llama-3-70B-Instruct-32K using a technique similar to what I used for Midnight Miqu, which was further honed by jukofyork.
     This model is uncensored. You are responsible for whatever you do with it.
@@ -3055,6 +8880,57 @@
     - filename: Loki-base.i1-Q4_K_M.gguf
       sha256: 60a4357fa399bfd18aa841cc529da09439791331d117a4f06f0467d002b385bb
       uri: huggingface://mradermacher/Loki-base-i1-GGUF/Loki-base.i1-Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "llama-3-whiterabbitneo-8b-v2.0"
+  icon: https://huggingface.co/migtissera/WhiteRabbitNeo/resolve/main/WhiteRabbitNeo.png
+  urls:
+    - https://huggingface.co/WhiteRabbitNeo/Llama-3-WhiteRabbitNeo-8B-v2.0
+    - https://huggingface.co/QuantFactory/Llama-3-WhiteRabbitNeo-8B-v2.0-GGUF
+  description: |
+    WhiteRabbitNeo is a model series that can be used for offensive and defensive cybersecurity.
+    Topics Covered:
+    - Open Ports: Identifying open ports is crucial as they can be entry points for attackers. Common ports to check include HTTP (80, 443), FTP (21), SSH (22), and SMB (445).
+    - Outdated Software or Services: Systems running outdated software or services are often vulnerable to exploits. This includes web servers, database servers, and any third-party software.
+    - Default Credentials: Many systems and services are installed with default usernames and passwords, which are well-known and can be easily exploited.
+    - Misconfigurations: Incorrectly configured services, permissions, and security settings can introduce vulnerabilities.
+    - Injection Flaws: SQL injection, command injection, and cross-site scripting (XSS) are common issues in web applications.
+    - Unencrypted Services: Services that do not use encryption (like HTTP instead of HTTPS) can expose sensitive data.
+    - Known Software Vulnerabilities: Checking for known vulnerabilities in software using databases like the National Vulnerability Database (NVD) or tools like Nessus or OpenVAS.
+    - Cross-Site Request Forgery (CSRF): This is where unauthorized commands are transmitted from a user that the web application trusts.
+    - Insecure Direct Object References: This occurs when an application provides direct access to objects based on user-supplied input.
+    - Security Misconfigurations in Web Servers/Applications: This includes issues like insecure HTTP headers or verbose error messages that reveal too much information.
+    - Broken Authentication and Session Management: This can allow attackers to compromise passwords, keys, or session tokens, or to exploit other implementation flaws to assume other users' identities.
+    - Sensitive Data Exposure: Includes vulnerabilities that expose sensitive data, such as credit card numbers, health records, or personal information.
+    - API Vulnerabilities: In modern web applications, APIs are often used and can have vulnerabilities like insecure endpoints or data leakage.
+    - Denial of Service (DoS) Vulnerabilities: Identifying services that are vulnerable to DoS attacks, which can make the resource unavailable to legitimate users.
+    - Buffer Overflows: Common in older software, these vulnerabilities can allow an attacker to crash the system or execute arbitrary code.
+    - More ..
+  overrides:
+    parameters:
+      model: Llama-3-WhiteRabbitNeo-8B-v2.0.Q4_K_M.gguf
+  files:
+    - filename: Llama-3-WhiteRabbitNeo-8B-v2.0.Q4_K_M.gguf
+      sha256: cf01ba2ca5af2a3ecd6a2221d19b8b91ec0e9fe06fa8fdffd774d5e0a2459c4c
+      uri: huggingface://QuantFactory/Llama-3-WhiteRabbitNeo-8B-v2.0-GGUF/Llama-3-WhiteRabbitNeo-8B-v2.0.Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "l3-nymeria-maid-8b"
+  icon: https://huggingface.co/tannedbum/L3-Nymeria-Maid-8B-exl2/resolve/main/Nymeria.png?
+  urls:
+    - https://huggingface.co/tannedbum/L3-Nymeria-Maid-8B
+    - https://huggingface.co/QuantFactory/L3-Nymeria-Maid-8B-GGUF
+  description: |
+    The model is a merge of pre-trained language models created using the mergekit library. It combines the following models:
+    - Sao10K/L3-8B-Stheno-v3.2
+    - princeton-nlp/Llama-3-Instruct-8B-SimPO
+    The merge was performed using the slerp merge method. The models were merged using the slerp merge method and the configuration used to produce the model is included in the text. The model is not suitable for all audiences and is intended for scientific purposes.
+    Nymeria is the balanced version, doesn't force nsfw. Nymeria-Maid has more Stheno's weights, leans more on nsfw and is more submissive.
+  overrides:
+    parameters:
+      model: L3-Nymeria-Maid-8B.Q4_K_M.gguf
+  files:
+    - filename: L3-Nymeria-Maid-8B.Q4_K_M.gguf
+      sha256: 05bce561daa59b38cf9b79973c3b1e2e27af6d1e8e41570760af54800a09bcc2
+      uri: huggingface://QuantFactory/L3-Nymeria-Maid-8B-GGUF/L3-Nymeria-Maid-8B.Q4_K_M.gguf
 - &dolphin
   name: "dolphin-2.9-llama3-8b"
   url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
@@ -3114,7 +8990,8 @@
     - filename: dolphin-2.9.2-Phi-3-Medium-abliterated-Q4_K_M.gguf
       sha256: 566331c2efe87725310aacb709ca15088a0063fa0ddc14a345bf20d69982156b
       uri: huggingface://bartowski/dolphin-2.9.2-Phi-3-Medium-abliterated-GGUF/dolphin-2.9.2-Phi-3-Medium-abliterated-Q4_K_M.gguf
-- url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+- !!merge <<: *llama3
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "llama-3-8b-instruct-dpo-v0.3-32k"
   license: llama3
   urls:
@@ -3268,7 +9145,7 @@
   urls:
     - https://huggingface.co/arcee-ai/Llama-3-SEC-Chat-GGUF
     - https://huggingface.co/arcee-ai/Llama-3-SEC-Chat
-  icon: https://i.ibb.co/kHtBmDN/w8m6-X4-HCQRa-IR86ar-Cm5gg.webp
+  icon: https://avatars.githubusercontent.com/u/126496414
   tags:
     - llama3
     - gguf
@@ -3283,10 +9160,24 @@
     - filename: Llama-3-SEC-Chat-Q4_K_M.gguf
       uri: huggingface://arcee-ai/Llama-3-SEC-Chat-GGUF/Llama-3-SEC-Chat-Q4_K_M.gguf
       sha256: 0d837400af161ba4136233db191330f2d77e297e079f0b6249e877c375cb56f3
+- !!merge <<: *llama3
+  name: "copus-2x8b-i1"
+  icon: https://huggingface.co/lodrick-the-lafted/Copus-2x8B/resolve/main/copus.png
+  urls:
+    - https://huggingface.co/lodrick-the-lafted/Copus-2x8B
+    - https://huggingface.co/mradermacher/Copus-2x8B-i1-GGUF
+  description: |
+    Which were the two most interesting llama3 finetunes as of yet. Resulting model seems OK. It's not on Miqu's level, anyway.
+  overrides:
+    parameters:
+      model: Copus-2x8B.i1-Q4_K_M.gguf
+  files:
+    - filename: Copus-2x8B.i1-Q4_K_M.gguf
+      sha256: 685da1ba49e203e8f491105585143d76044286d4b4687bed37d325f6b55501e5
+      uri: huggingface://mradermacher/Copus-2x8B-i1-GGUF/Copus-2x8B.i1-Q4_K_M.gguf
 - &yi-chat
-  ### Start Yi
-  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
-  icon: "https://raw.githubusercontent.com/01-ai/Yi/main/assets/img/Yi_logo_icon_light.svg"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master" ### Start Yi
+  icon: "https://github.com/01-ai/Yi/raw/main/assets/img/Yi_logo_icon_light.svg"
   name: "yi-1.5-9b-chat"
   license: apache-2.0
   urls:
@@ -3449,6 +9340,19 @@
     - filename: Yi-Coder-9B.Q4_K_M.gguf
       sha256: cff3db8a69c43654e3c2d2984e86ad2791d1d446ec56b24a636ba1ce78363308
       uri: huggingface://QuantFactory/Yi-Coder-9B-GGUF/Yi-Coder-9B.Q4_K_M.gguf
+- !!merge <<: *yi-chat
+  name: "cursorcore-yi-9b"
+  urls:
+    - https://huggingface.co/mradermacher/CursorCore-Yi-9B-GGUF
+  description: |
+    CursorCore is a series of open-source models designed for AI-assisted programming. It aims to support features such as automated editing and inline chat, replicating the core abilities of closed-source AI-assisted programming tools like Cursor. This is achieved by aligning data generated through Programming-Instruct. Please read our paper to learn more.
+  overrides:
+    parameters:
+      model: CursorCore-Yi-9B.Q4_K_M.gguf
+  files:
+    - filename: CursorCore-Yi-9B.Q4_K_M.gguf
+      sha256: 943bf59b34bee34afae8390c1791ccbc7c742e11a4d04d538a699754eb92215e
+      uri: huggingface://mradermacher/CursorCore-Yi-9B-GGUF/CursorCore-Yi-9B.Q4_K_M.gguf
 - &vicuna-chat
   ## LLama2 and derivatives
   ### Start Fimbulvetr
@@ -3483,8 +9387,7 @@
       sha256: 3f309b59508342536a70edd6c4be6cf4f2cb97f2e32cbc79ad2ab3f4c02933a4
       uri: huggingface://Lewdiculous/Fimbulvetr-11B-v2-GGUF-IQ-Imatrix/Fimbulvetr-11B-v2-Q4_K_M-imat.gguf
 - &noromaid
-  ### Start noromaid
-  url: "github:mudler/LocalAI/gallery/noromaid.yaml@master"
+  url: "github:mudler/LocalAI/gallery/noromaid.yaml@master" ### Start noromaid
   name: "noromaid-13b-0.4-DPO"
   icon: https://cdn-uploads.huggingface.co/production/uploads/630dfb008df86f1e5becadc3/VKX2Z2yjZX5J8kXzgeCYO.png
   license: cc-by-nc-4.0
@@ -3504,8 +9407,7 @@
       sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1
       uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf
 - &wizardlm2
-  ### START Vicuna based
-  url: "github:mudler/LocalAI/gallery/wizardlm2.yaml@master"
+  url: "github:mudler/LocalAI/gallery/wizardlm2.yaml@master" ### START Vicuna based
   name: "wizardlm2-7b"
   description: |
     We introduce and opensource WizardLM-2, our next generation state-of-the-art large language models, which have improved performance on complex chat, multilingual, reasoning and agent. New family includes three cutting-edge models: WizardLM-2 8x22B, WizardLM-2 70B, and WizardLM-2 7B.
@@ -3560,7 +9462,8 @@
       sha256: 4cc1cb3660d87ff56432ebeb7884ad35d67c48c7b9f6b2856f305e39c38eed8f
       uri: huggingface://moondream/moondream2-gguf/moondream2-mmproj-f16.gguf
 - &llava
-  ### START LLaVa
+  name: "llava-1.6-vicuna" ### START LLaVa
+  icon: https://github.com/lobehub/lobe-icons/raw/master/packages/static-png/dark/llava-color.png
   url: "github:mudler/LocalAI/gallery/llava.yaml@master"
   license: apache-2.0
   description: |
@@ -3574,7 +9477,6 @@
     - gpu
     - llama2
     - cpu
-  name: "llava-1.6-vicuna"
   overrides:
     mmproj: mmproj-vicuna7b-f16.gguf
     parameters:
@@ -3729,7 +9631,7 @@
   files:
     - filename: NeuralDaredevil-8B-abliterated.Q4_K_M.gguf
       sha256: 12f4af9d66817d7d300bd9a181e4fe66f7ecf7ea972049f2cbd0554cdc3ecf05
-      uri: huggingface://QuantFactory/NeuralDaredevil-8B-abliterated-GGUF/Poppy_Porpoise-0.85-L3-8B-Q4_K_M-imat.gguf
+      uri: huggingface://QuantFactory/NeuralDaredevil-8B-abliterated-GGUF/NeuralDaredevil-8B-abliterated.Q4_K_M.gguf
 - !!merge <<: *llama3
   name: "llama-3-8b-instruct-mopeymule"
   urls:
@@ -3902,7 +9804,7 @@
   urls:
     - https://huggingface.co/BAAI/Bunny-Llama-3-8B-V-gguf
   description: |
-    Bunny is a family of lightweight but powerful multimodal models. It offers multiple plug-and-play vision encoders, like EVA-CLIP, SigLIP and language backbones, including Llama-3-8B, Phi-1.5, StableLM-2, Qwen1.5, MiniCPM and Phi-2. To compensate for the decrease in model size, we construct more informative training data by curated selection from a broader data source.
+    Bunny is a family of lightweight but powerful multimodal models. It offers multiple plug-and-play vision encoders, like EVA-CLIP, SigLIP and language backbones, including Llama-3-8B, Phi-1.5, StableLM-2, Qwen1.5,  and Phi-2. To compensate for the decrease in model size, we construct more informative training data by curated selection from a broader data source.
 
     We provide Bunny-Llama-3-8B-V, which is built upon SigLIP and Llama-3-8B-Instruct. More details about this model can be found in GitHub.
   icon: https://huggingface.co/BAAI/Bunny-Llama-3-8B-V-gguf/resolve/main/icon.png
@@ -3951,6 +9853,7 @@
       uri: huggingface://xtuner/llava-llama-3-8b-v1_1-gguf/llava-llama-3-8b-v1_1-mmproj-f16.gguf
 - !!merge <<: *llama3
   name: "minicpm-llama3-v-2_5"
+  icon: https://avatars.githubusercontent.com/u/89920203
   urls:
     - https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf
     - https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5
@@ -3972,8 +9875,8 @@
       sha256: 010ec3ba94cb5ad2d9c8f95f46f01c6d80f83deab9df0a0831334ea45afff3e2
       uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/ggml-model-Q4_K_M.gguf
     - filename: minicpm-llama3-mmproj-f16.gguf
-      sha256: 391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e
       uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/mmproj-model-f16.gguf
+      sha256: 2c2d773537faf6a7e093655d0d5e14801ef0b2121c6c3e1981ce094c2b62f4f9
 - !!merge <<: *llama3
   name: "llama-3-cursedstock-v1.8-8b-iq-imatrix"
   urls:
@@ -4041,7 +9944,6 @@
     June 18, 2024 Update, After extensive testing of the intermediate checkpoints, significant progress has been made.
     The model is slowly — I mean, really slowly — unlearning its alignment. By significantly lowering the learning rate, I was able to visibly observe deep behavioral changes, this process is taking longer than anticipated, but it's going to be worth it. Estimated time to completion: 4 more days.. I'm pleased to report that in several tests, the model not only maintained its intelligence but actually showed a slight improvement, especially in terms of common sense. An intermediate checkpoint of this model was used to create invisietch/EtherealRainbow-v0.3-rc7, with promising results. Currently, it seems like I'm on the right track. I hope this model will serve as a solid foundation for further merges, whether for role-playing (RP) or for uncensoring. This approach also allows us to save on actual fine-tuning, thereby reducing our carbon footprint. The merge process takes just a few minutes of CPU time, instead of days of GPU work.
     June 20, 2024 Update, Unaligning was partially successful, and the results are decent, but I am not fully satisfied. I decided to bite the bullet, and do a full finetune, god have mercy on my GPUs. I am also releasing the intermediate checkpoint of this model.
-  icon: https://i.imgur.com/Kpk1PgZ.png
   overrides:
     parameters:
       model: LLAMA-3_8B_Unaligned_Alpha-Q4_K_M.gguf
@@ -4067,7 +9969,6 @@
       uri: huggingface://bartowski/L3-8B-Lunaris-v1-GGUF/L3-8B-Lunaris-v1-Q4_K_M.gguf
 - !!merge <<: *llama3
   name: "llama-3_8b_unaligned_alpha_rp_soup-i1"
-  icon: https://i.imgur.com/pXcjpoV.png
   urls:
     - https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha_RP_Soup
     - https://huggingface.co/mradermacher/LLAMA-3_8B_Unaligned_Alpha_RP_Soup-i1-GGUF
@@ -4376,9 +10277,49 @@
     - filename: L3-8B-Niitama-v1.i1-Q4_K_M.gguf
       sha256: 8c62f831db2a6e34aa75459fe8a98815199ecc2dac1892a460b8b86363b6826e
       uri: huggingface://mradermacher/L3-8B-Niitama-v1-i1-GGUF/L3-8B-Niitama-v1.i1-Q4_K_M.gguf
+- !!merge <<: *llama3
+  icon: https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA/resolve/main/Images/LLAMA-3_8B_Unaligned_BETA.png
+  name: "llama-3_8b_unaligned_beta"
+  urls:
+    - https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA
+    - https://huggingface.co/bartowski/LLAMA-3_8B_Unaligned_BETA-GGUF
+  description: |
+    In the Wild West of the AI world, the real titans never hit their deadlines, no sir!
+    The projects that finish on time? They’re the soft ones—basic, surface-level shenanigans. But the serious projects? They’re always delayed. You set a date, then reality hits: not gonna happen, scope creep that mutates the roadmap, unexpected turn of events that derails everything.
+    It's only been 4 months since the Alpha was released, and half a year since the project started, but it felt like nearly a decade.
+    Deadlines shift, but with each delay, you’re not failing—you’re refining, and becoming more ambitious. A project that keeps getting pushed isn’t late; it’s just gaining weight, becoming something worth building, and truly worth seeing all the way through. The longer it’s delayed, the more serious it gets.
+    LLAMA-3_8B_Unaligned is a serious project, and thank god, the Beta is finally here.
+    I love you all unconditionally, thanks for all the support and kind words!
+  overrides:
+    parameters:
+      model: LLAMA-3_8B_Unaligned_BETA-Q4_K_M.gguf
+  files:
+    - filename: LLAMA-3_8B_Unaligned_BETA-Q4_K_M.gguf
+      sha256: 5b88fb4537339996c04e4a1b6ef6a2d555c4103b6378e273ae9c6c5e77af67eb
+      uri: huggingface://bartowski/LLAMA-3_8B_Unaligned_BETA-GGUF/LLAMA-3_8B_Unaligned_BETA-Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "freyja-v4.95-maldv-7b-non-fiction-i1"
+  urls:
+    - https://huggingface.co/MrRobotoAI/Freyja-v4.95-maldv-7b-NON-FICTION
+    - https://huggingface.co/mradermacher/Freyja-v4.95-maldv-7b-NON-FICTION-i1-GGUF
+  description: |
+    This model was merged using the Model Stock merge method using aifeifei798/llama3-8B-DarkIdol-2.2-Uncensored-1048K as a base.
+    The following models were included in the merge:
+        maldv/llama-3-fantasy-writer-8b
+        maldv/badger-iota-llama-3-8b
+        maldv/badger-lambda-llama-3-8b
+        maldv/badger-mu-llama-3-8b
+        maldv/badger-kappa-llama-3-8b
+        maldv/badger-writer-llama-3-8b
+  overrides:
+    parameters:
+      model: Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf
+  files:
+    - filename: Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf
+      sha256: cdc0f4de6df2ba120835fbd25c2a0ae2af8548f46d2c40c7a018c51c3d19e0c0
+      uri: huggingface://mradermacher/Freyja-v4.95-maldv-7b-NON-FICTION-i1-GGUF/Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf
 - &chatml
-  ### ChatML
-  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master" ### ChatML
   name: "una-thepitbull-21.4b-v2"
   license: afl-3.0
   icon: https://huggingface.co/fblgit/UNA-ThePitbull-21.4B-v2/resolve/main/DE-UNA-ThePitbull-21.4B-v2.png
@@ -4425,7 +10366,6 @@
       sha256: 9c90f3a65332a03a6cbb563eee19c7586d9544f646ff9f33f7f1904b3d415ae2
       uri: huggingface://nold/HelpingAI-9B-GGUF/HelpingAI-9B_Q4_K_M.gguf
 - url: "github:mudler/LocalAI/gallery/chatml-hercules.yaml@master"
-  icon: "https://tse3.mm.bing.net/th/id/OIG1.vnrl3xpEcypR3McLW63q?pid=ImgGn"
   urls:
     - https://huggingface.co/Locutusque/Llama-3-Hercules-5.0-8B
     - https://huggingface.co/bartowski/Llama-3-Hercules-5.0-8B-GGUF
@@ -4603,9 +10543,68 @@
     - filename: Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf
       sha256: 5dd81b8b809667d10036499affdd1461cf95af50b405cbc9f800b421a4b60e98
       uri: huggingface://DavidAU/Meta-Llama-3-Instruct-8.9B-BRAINSTORM-5x-FORM-11-GGUF/Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "rp-naughty-v1.0c-8b"
+  urls:
+    - https://huggingface.co/QuantFactory/RP-Naughty-v1.0c-8b-GGUF
+  description: |
+    This model was merged using the Model Stock merge method using aifeifei798/llama3-8B-DarkIdol-2.2-Uncensored-1048K as a base.
+    The following models were included in the merge:
+
+        underwoods/adventure-8b
+        Khetterman/Multilingual-SaigaSuzume-8B
+        underwoods/writer-8b
+        Khetterman/Kosmos-8B-v1
+        Khetterman/CursedMatrix-8B-v9
+  overrides:
+    parameters:
+      model: RP-Naughty-v1.0c-8b.Q4_K_M.gguf
+  files:
+    - filename: RP-Naughty-v1.0c-8b.Q4_K_M.gguf
+      sha256: c344564d26d0c3d244d31cfeb103666eab37f9dee6678a2dbaf5bfcf4109d789
+      uri: huggingface://QuantFactory/RP-Naughty-v1.0c-8b-GGUF/RP-Naughty-v1.0c-8b.Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "bio-medical-llama-3-8b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/653f5b93cd52f288490edc83/zPMUugzfOiwTiRw88jm7T.jpeg
+  urls:
+    - https://huggingface.co/ContactDoctor/Bio-Medical-Llama-3-8B
+    - https://huggingface.co/QuantFactory/Bio-Medical-Llama-3-8B-GGUF
+  description: |
+    Bio-Medical-Llama-3-8B model is a specialized large language model designed for biomedical applications. It is finetuned from the meta-llama/Meta-Llama-3-8B-Instruct model using a custom dataset containing over 500,000 diverse entries. These entries include a mix of synthetic and manually curated data, ensuring high quality and broad coverage of biomedical topics.
+
+    The model is trained to understand and generate text related to various biomedical fields, making it a valuable tool for researchers, clinicians, and other professionals in the biomedical domain.
+  overrides:
+    parameters:
+      model: Bio-Medical-Llama-3-8B.Q4_K_M.gguf
+  files:
+    - filename: Bio-Medical-Llama-3-8B.Q4_K_M.gguf
+      sha256: 672939e0487d02c55734132c25a59f26e4deaac7cd49445a7028f2291139edcc
+      uri: huggingface://QuantFactory/Bio-Medical-Llama-3-8B-GGUF/Bio-Medical-Llama-3-8B.Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "triangulum-10b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65bb837dbfb878f46c77de4c/By0OJ1lMvP5ZvVvfEGvz5.png
+  urls:
+    - https://huggingface.co/prithivMLmods/Triangulum-10B
+    - https://huggingface.co/mradermacher/Triangulum-10B-GGUF
+  description: |
+    Triangulum 10B is a collection of pretrained and instruction-tuned generative models, designed for multilingual applications. These models are trained using synthetic datasets based on long chains of thought, enabling them to perform complex reasoning tasks effectively.
+    Key Features
+        Foundation Model: Built upon LLaMA's autoregressive language model, leveraging an optimized transformer architecture for enhanced performance.
+        Instruction Tuning: Includes supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align model outputs with human preferences for helpfulness and safety.
+        Multilingual Support: Designed to handle multiple languages, ensuring broad applicability across diverse linguistic contexts.
+    Training Approach
+        Synthetic Datasets: Utilizes long chain-of-thought synthetic data to enhance reasoning capabilities.
+        Supervised Fine-Tuning (SFT): Aligns the model to specific tasks through curated datasets.
+        Reinforcement Learning with Human Feedback (RLHF): Ensures the model adheres to human values and safety guidelines through iterative training processes.
+  overrides:
+    parameters:
+      model: Triangulum-10B.Q4_K_M.gguf
+  files:
+    - filename: Triangulum-10B.Q4_K_M.gguf
+      sha256: dd071f99edf6b166044bf229cdeec19419c4c348e3fc3d6587cfcc55e6fb85fa
+      uri: huggingface://mradermacher/Triangulum-10B-GGUF/Triangulum-10B.Q4_K_M.gguf
 - &command-R
-  ### START Command-r
-  url: "github:mudler/LocalAI/gallery/command-r.yaml@master"
+  url: "github:mudler/LocalAI/gallery/command-r.yaml@master" ### START Command-r
   name: "command-r-v01:q1_s"
   license: "cc-by-nc-4.0"
   icon: https://cdn.sanity.io/images/rjtqmwfu/production/ae020d94b599cc453cc09ebc80be06d35d953c23-102x18.svg
@@ -4660,8 +10659,7 @@
       sha256: "57824768c1a945e21e028c8e9a29b39adb4838d489f5865c82601ab9ad98065d"
       uri: "huggingface://bartowski/aya-23-35B-GGUF/aya-23-35B-Q4_K_M.gguf"
 - &phi-2-chat
-  ### START Phi-2
-  url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master"
+  url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master" ### START Phi-2
   license: mit
   description: |
     Phi-2 fine-tuned by the OpenHermes 2.5 dataset optimised for multi-turn conversation and character impersonation.
@@ -4689,6 +10687,7 @@
     - llama2
     - cpu
   name: "phi-2-chat:Q8_0"
+  icon: https://avatars.githubusercontent.com/u/6154722
   overrides:
     parameters:
       model: phi-2-layla-v1-chatml-Q8_0.gguf
@@ -4735,7 +10734,7 @@
   urls:
     - https://huggingface.co/internlm/internlm2_5-7b-chat-1m
     - https://huggingface.co/bartowski/internlm2_5-7b-chat-1m-GGUF
-  icon: https://github.com/InternLM/InternLM/assets/22529082/b9788105-8892-4398-8b47-b513a292378e
+  icon: https://avatars.githubusercontent.com/u/135356492
   tags:
     - internlm2
     - gguf
@@ -4756,10 +10755,35 @@
     - filename: internlm2_5-7b-chat-1m-Q4_K_M.gguf
       uri: huggingface://bartowski/internlm2_5-7b-chat-1m-GGUF/internlm2_5-7b-chat-1m-Q4_K_M.gguf
       sha256: 10d5e18a4125f9d4d74a9284a21e0c820b150af06dee48665e54ff6e1be3a564
+### Internlm3
+- name: "internlm3-8b-instruct"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/internlm/internlm3-8b-instruct
+    - https://huggingface.co/bartowski/internlm3-8b-instruct-GGUF
+  icon: https://avatars.githubusercontent.com/u/135356492
+  tags:
+    - internlm3
+    - gguf
+    - cpu
+    - gpu
+  description: |
+    InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning.  The model has the following characteristics:
+
+    Enhanced performance at reduced cost: State-of-the-art performance on reasoning and knowledge-intensive tasks surpass models like Llama3.1-8B and Qwen2.5-7B.
+
+    Deep thinking capability: InternLM3 supports both the deep thinking mode for solving complicated reasoning tasks via the long chain-of-thought and the normal response mode for fluent user interactions.
+  overrides:
+    parameters:
+      model: internlm3-8b-instruct-Q4_K_M.gguf
+  files:
+    - filename: internlm3-8b-instruct-Q4_K_M.gguf
+      uri: huggingface://bartowski/internlm3-8b-instruct-GGUF/internlm3-8b-instruct-Q4_K_M.gguf
+      sha256: 2a9644687318e8659c9cf9b40730d5cc2f5af06f786a50439c7c51359b23896e
 - &phi-3
-  ### START Phi-3
-  url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master"
+  url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master" ### START Phi-3
   name: "phi-3-mini-4k-instruct"
+  icon: https://avatars.githubusercontent.com/u/6154722
   license: mit
   description: |
     The Phi-3-Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model trained with the Phi-3 datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) it can support. The model has underwent a post-training process that incorporates both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks testing common sense, language understanding, math, code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust and state-of-the-art performance among models with less than 13 billion parameters.
@@ -4908,9 +10932,56 @@
     - filename: calme-2.1-phi3.5-4b.i1-Q4_K_M.gguf
       sha256: 989eccacd52b6d9ebf2c06c35c363da19aadb125659a10df299b7130bc293e77
       uri: huggingface://mradermacher/calme-2.1-phi3.5-4b-i1-GGUF/calme-2.1-phi3.5-4b.i1-Q4_K_M.gguf
+- !!merge <<: *phi-3
+  name: "phi-3.5-mini-titanfusion-0.2"
+  urls:
+    - https://huggingface.co/bunnycore/Phi-3.5-mini-TitanFusion-0.2
+    - https://huggingface.co/mradermacher/Phi-3.5-mini-TitanFusion-0.2-GGUF
+  description: |
+    This model was merged using the TIES merge method using microsoft/Phi-3.5-mini-instruct as a base.
+    The following models were included in the merge:
+        nbeerbower/phi3.5-gutenberg-4B
+        ArliAI/Phi-3.5-mini-3.8B-ArliAI-RPMax-v1.1
+        bunnycore/Phi-3.5-Mini-Hyper
+        bunnycore/Phi-3.5-Mini-Hyper + bunnycore/Phi-3.1-EvolKit-lora
+        bunnycore/Phi-3.5-Mini-Sonet-RP
+        bunnycore/Phi-3.5-mini-TitanFusion-0.1
+  overrides:
+    parameters:
+      model: Phi-3.5-mini-TitanFusion-0.2.Q4_K_M.gguf
+  files:
+    - filename: Phi-3.5-mini-TitanFusion-0.2.Q4_K_M.gguf
+      sha256: 9579305712f2bca246914639c4873acdc1e7bc64ac2c7db0230df4f0ca0ef234
+      uri: huggingface://mradermacher/Phi-3.5-mini-TitanFusion-0.2-GGUF/Phi-3.5-mini-TitanFusion-0.2.Q4_K_M.gguf
+- !!merge <<: *phi-3
+  name: "phi-3-vision:vllm"
+  url: "github:mudler/LocalAI/gallery/phi-3-vision.yaml@master"
+  description: |
+    Phi-3.5-vision is a lightweight, state-of-the-art open multimodal model built upon datasets which include - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data both on text and vision. The model belongs to the Phi-3 model family, and the multimodal version comes with 128K context length (in tokens) it can support. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures.
+- !!merge <<: *phi-3
+  name: "phi-3.5-vision:vllm"
+  url: "github:mudler/LocalAI/gallery/phi-3-vision.yaml@master"
+  override:
+    parameters:
+      model: microsoft/Phi-3.5-vision-instruct
+  description: |
+    Phi-3.5-vision is a lightweight, state-of-the-art open multimodal model built upon datasets which include - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data both on text and vision. The model belongs to the Phi-3 model family, and the multimodal version comes with 128K context length (in tokens) it can support. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures.
+- !!merge <<: *phi-3
+  name: "phi-3.5-moe-instruct"
+  urls:
+    - https://huggingface.co/microsoft/Phi-3.5-MoE-instruct
+    - https://huggingface.co/bartowski/Phi-3.5-MoE-instruct-GGUF
+  description: |
+    Phi-3.5-MoE is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available documents - with a focus on very high-quality, reasoning dense data. The model supports multilingual and comes with 128K context length (in tokens). The model underwent a rigorous enhancement process, incorporating supervised fine-tuning, proximal policy optimization, and direct preference optimization to ensure precise instruction adherence and robust safety measures.
+  overrides:
+    parameters:
+      model: Phi-3.5-MoE-instruct-Q4_K_M.gguf
+  files:
+    - filename: Phi-3.5-MoE-instruct-Q4_K_M.gguf
+      sha256: 43e91bb720869bd8a92d8eb86bc3c74a52c49cf61642ca709b3d7bb89644df36
+      uri: huggingface://bartowski/Phi-3.5-MoE-instruct-GGUF/Phi-3.5-MoE-instruct-Q4_K_M.gguf
 - &hermes-2-pro-mistral
-  ### START Hermes
-  url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
+  url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" ### START Hermes
   name: "hermes-2-pro-mistral"
   icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png
   license: apache-2.0
@@ -5246,8 +11317,7 @@
       sha256: "ca0cfd5a9ad40dc16416aa3a277015d0299b62c0803b67f5709580042202c172"
       uri: "huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q4_K.gguf"
 - &codellama
-  ### START Codellama
-  url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
+  url: "github:mudler/LocalAI/gallery/codellama.yaml@master" ### START Codellama
   name: "codellama-7b"
   license: llama2
   description: |
@@ -5378,8 +11448,7 @@
       uri: "huggingface://legraphista/llm-compiler-7b-ftd-IMat-GGUF/llm-compiler-7b-ftd.Q4_K.gguf"
       sha256: d862dd18ed335413787d0ad196522a9902a3c10a6456afdab8721822cb0ddde8
 - &openvino
-  ### START OpenVINO
-  url: "github:mudler/LocalAI/gallery/openvino.yaml@master"
+  url: "github:mudler/LocalAI/gallery/openvino.yaml@master" ### START OpenVINO
   name: "openvino-llama-3-8b-instruct-ov-int8"
   license: llama3
   urls:
@@ -5493,8 +11562,7 @@
     - embedding
     - cpu
 - &sentencentransformers
-  ### START Embeddings
-  description: |
+  description: | ### START Embeddings
     This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity.
   urls:
     - https://github.com/UKPLab/sentence-transformers
@@ -5509,8 +11577,7 @@
     parameters:
       model: all-MiniLM-L6-v2
 - &dreamshaper
-  ### START Image generation
-  name: dreamshaper
+  name: dreamshaper ### START Image generation
   icon: https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/dd9b038c-bd15-43ab-86ab-66e145ad7ff2/width=450/26072158-132340247-8k%20portrait%20of%20beautiful%20cyborg%20with%20brown%20hair,%20intricate,%20elegant,%20highly%20detailed,%20majestic,%20digital%20photography,%20art%20by%20artg_ed.jpeg
   license: other
   description: |
@@ -5532,7 +11599,7 @@
       uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
       sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd
 - name: stable-diffusion-3-medium
-  icon: https://huggingface.co/leo009/stable-diffusion-3-medium/resolve/main/sd3demo.jpg
+  icon: https://avatars.githubusercontent.com/u/100950301
   license: other
   description: |
     Stable Diffusion 3 Medium is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features greatly improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.
@@ -5546,8 +11613,96 @@
     - sd-3
     - gpu
   url: "github:mudler/LocalAI/gallery/stablediffusion3.yaml@master"
+- name: sd-1.5-ggml
+  icon: https://avatars.githubusercontent.com/u/37351293
+  license: creativeml-openrail-m
+  url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
+  description: |
+    Stable Diffusion 1.5
+  urls:
+    - https://huggingface.co/second-state/stable-diffusion-v1-5-GGUF
+  tags:
+    - text-to-image
+    - stablediffusion
+    - gpu
+    - cpu
+  overrides:
+    options:
+      - "sampler:euler"
+    parameters:
+      model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
+  files:
+    - filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
+      sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
+      uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
+- name: sd-3.5-medium-ggml
+  license: stabilityai-ai-community
+  url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
+  description: |
+    Stable Diffusion 3.5 Medium is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.
+  urls:
+    - https://huggingface.co/stabilityai/stable-diffusion-3.5-medium
+    - https://huggingface.co/second-state/stable-diffusion-3.5-medium-GGUF
+  tags:
+    - text-to-image
+    - stablediffusion
+    - gpu
+    - cpu
+  icon: https://avatars.githubusercontent.com/u/100950301
+  overrides:
+    options:
+      - "clip_l_path:clip_l-Q4_0.gguf"
+      - "clip_g_path:clip_g-Q4_0.gguf"
+      - "t5xxl_path:t5xxl-Q4_0.gguf"
+      - "sampler:euler"
+    parameters:
+      model: sd3.5_medium-Q4_0.gguf
+  files:
+    - filename: "sd3.5_medium-Q4_0.gguf"
+      sha256: "3bb8c5e9ab0a841117089ed4ed81d885bb85161df2a766b812f829bc55b31adf"
+      uri: "huggingface://second-state/stable-diffusion-3.5-medium-GGUF/sd3.5_medium-Q4_0.gguf"
+    - filename: clip_g-Q4_0.gguf
+      sha256: c142411147e16b7c4b9cc1f5d977cbe596104435d76fde47172d3d35c5e58bb8
+      uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/clip_g-Q4_0.gguf
+    - filename: clip_l-Q4_0.gguf
+      sha256: f5ad88ae2ac924eb4ac0298b77afa304b5e6014fc0c4128f0e3df40fdfcc0f8a
+      uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/clip_l-Q4_0.gguf
+    - filename: t5xxl-Q4_0.gguf
+      sha256: 987ba47c158b890c274f78fd35324419f50941e846a49789f0977e9fe9d97ab7
+      uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/t5xxl-Q4_0.gguf
+- name: sd-3.5-large-ggml
+  license: stabilityai-ai-community
+  url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
+  description: |
+    Stable Diffusion 3.5 Large is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.
+  urls:
+    - https://huggingface.co/stabilityai/stable-diffusion-3.5-large
+    - https://huggingface.co/second-state/stable-diffusion-3.5-large-GGUF
+  tags:
+    - text-to-image
+    - stablediffusion
+    - gpu
+    - cpu
+  icon: https://avatars.githubusercontent.com/u/100950301
+  overrides:
+    parameters:
+      model: sd3.5_large-Q4_0.gguf
+  files:
+    - filename: "sd3.5_large-Q4_0.gguf"
+      sha256: "c79ed6cdaa7decaca6b05ccc636b956b37c47de9b104c56315ca8ed086347b00"
+      uri: "huggingface://second-state/stable-diffusion-3.5-large-GGUF/sd3.5_large-Q4_0.gguf"
+    - filename: clip_g.safetensors
+      sha256: ec310df2af79c318e24d20511b601a591ca8cd4f1fce1d8dff822a356bcdb1f4
+      uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/clip_g.safetensors
+    - filename: clip_l.safetensors
+      sha256: 660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd
+      uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/clip_l.safetensors
+    - filename: t5xxl-Q5_0.gguf
+      sha256: f4df16c641a05c4a6ca717068ba3ee312875000f6fac0efbd152915553b5fc3e
+      uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/t5xxl-Q5_0.gguf
 - &flux
   name: flux.1-dev
+  icon: https://avatars.githubusercontent.com/u/164064024
   license: flux-1-dev-non-commercial-license
   description: |
     FLUX.1 [dev] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. For more information, please read our blog post.
@@ -5571,7 +11726,6 @@
 - !!merge <<: *flux
   name: flux.1-schnell
   license: apache-2
-  icon: https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/schnell_grid.jpeg
   description: |
     FLUX.1 [schnell] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. For more information, please read our blog post.
     Key Features
@@ -5584,10 +11738,46 @@
   overrides:
     parameters:
       model: black-forest-labs/FLUX.1-schnell
+- name: flux.1-dev-ggml
+  license: flux-1-dev-non-commercial-license
+  url: "github:mudler/LocalAI/gallery/flux-ggml.yaml@master"
+  description: |
+    FLUX.1 [dev] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. For more information, please read our blog post.
+    Key Features
+        Cutting-edge output quality, second only to our state-of-the-art model FLUX.1 [pro].
+        Competitive prompt following, matching the performance of closed source alternatives .
+        Trained using guidance distillation, making FLUX.1 [dev] more efficient.
+        Open weights to drive new scientific research, and empower artists to develop innovative workflows.
+        Generated outputs can be used for personal, scientific, and commercial purposes as described in the flux-1-dev-non-commercial-license.
+    This model is quantized with GGUF
+  urls:
+    - https://huggingface.co/black-forest-labs/FLUX.1-dev
+    - https://huggingface.co/city96/FLUX.1-dev-gguf
+  tags:
+    - text-to-image
+    - flux
+    - gpu
+    - cpu
+  overrides:
+    parameters:
+      model: flux1-dev-Q2_K.gguf
+  files:
+    - filename: "flux1-dev-Q2_K.gguf"
+      sha256: "b8c464bc0f10076ef8f00ba040d220d90c7993f7c4245ae80227d857f65df105"
+      uri: "huggingface://city96/FLUX.1-dev-gguf/flux1-dev-Q2_K.gguf"
+    - filename: ae.safetensors
+      sha256: afc8e28272cd15db3919bacdb6918ce9c1ed22e96cb12c4d5ed0fba823529e38
+      uri: https://huggingface.co/ChuckMcSneed/FLUX.1-dev/resolve/main/ae.safetensors
+    - filename: clip_l.safetensors
+      sha256: 660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd
+      uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/clip_l.safetensors
+    - filename: t5xxl_fp16.safetensors
+      sha256: 6e480b09fae049a72d2a8c5fbccb8d3e92febeb233bbe9dfe7256958a9167635
+      uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors
 - &whisper
-  ## Whisper
-  url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"
+  url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" ## Whisper
   name: "whisper-1"
+  icon: https://avatars.githubusercontent.com/u/14957082
   license: "MIT"
   urls:
     - https://github.com/ggerganov/whisper.cpp
@@ -5745,16 +11935,17 @@
     - filename: "ggml-model-whisper-tiny.en-q8_0.bin"
       uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin"
       sha256: 5bc2b3860aa151a4c6e7bb095e1fcce7cf12c7b020ca08dcec0c6d018bb7dd94
-## Bert embeddings
-- url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master"
+## Bert embeddings (llama3.2 drop-in)
+- !!merge <<: *llama32
   name: "bert-embeddings"
-  license: "Apache 2.0"
-  urls:
-    - https://huggingface.co/skeskinen/ggml
+  description: |
+    llama3.2 embeddings model. Using as drop-in replacement for bert-embeddings
   tags:
     - embeddings
-  description: |
-    Bert model that can be used for embeddings
+  overrides:
+    embeddings: true
+    parameters:
+      model: llama-3.2-1b-instruct-q4_k_m.gguf
 ## Stable Diffusion
 - url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master
   license: "BSD-3"
@@ -5764,18 +11955,9 @@
   description: |
     Stable Diffusion in NCNN with c++, supported txt2img and img2img
   name: stablediffusion-cpp
-## Tiny Dream
-- url: github:mudler/LocalAI/gallery/tinydream.yaml@master
-  name: tinydream
-  license: "BSD-3"
-  urls:
-    - https://github.com/symisc/tiny-dream
-    - https://github.com/symisc/tiny-dream/blob/main/LICENSE
-  description: |
-    An embedded, Header Only, Stable Diffusion C++ implementation
+  icon: https://avatars.githubusercontent.com/u/100950301
 - &piper
-  ## Piper TTS
-  url: github:mudler/LocalAI/gallery/piper.yaml@master
+  url: github:mudler/LocalAI/gallery/piper.yaml@master ## Piper TTS
   name: voice-en-us-kathleen-low
   icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png
   license: mit
@@ -6353,3 +12535,43 @@
     - filename: voice-zh_CN-huayan-medium.tar.gz
       uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh_CN-huayan-medium.tar.gz
       sha256: 0299a5e7f481ba853404e9f0e1515a94d5409585d76963fa4d30c64bd630aa99
+- name: "silero-vad"
+  icon: https://github.com/snakers4/silero-models/raw/master/files/silero_logo.jpg
+  url: github:mudler/LocalAI/gallery/virtual.yaml@master
+  urls:
+    - https://github.com/snakers4/silero-vad
+    - https://huggingface.co/onnx-community/silero-vad
+  description: |
+    Silero VAD - pre-trained enterprise-grade Voice Activity Detector.
+  tags:
+    - vad
+    - voice-activity-detection
+    - cpu
+  overrides:
+    backend: silero-vad
+    parameters:
+      model: silero-vad.onnx
+  files:
+    - filename: silero-vad.onnx
+      uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
+      sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
+- name: "bark-cpp-small"
+  icon: https://avatars.githubusercontent.com/u/99442120
+  url: github:mudler/LocalAI/gallery/virtual.yaml@master
+  license: mit
+  urls:
+    - https://huggingface.co/suno/bark
+    - https://huggingface.co/Green-Sky/bark-ggml
+  description: |
+    Bark is a transformer-based text-to-audio model created by Suno. Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. The model can also produce nonverbal communications like laughing, sighing and crying. To support the research community, we are providing access to pretrained model checkpoints ready for inference.
+  tags:
+    - tts
+    - cpu
+  overrides:
+    backend: bark-cpp
+    parameters:
+      model: bark-small_weights-f16.bin
+  files:
+    - filename: bark-small_weights-f16.bin
+      uri: https://huggingface.co/Green-Sky/bark-ggml/resolve/main/bark-small_weights-f16.bin
+      sha256: de1ece17e8319537b3a7909baebbd28affab23c942d5d57e648d622af4e2feaa
diff --git a/gallery/llama3.1-reflective.yaml b/gallery/llama3.1-reflective.yaml
new file mode 100644
index 00000000..86a91d8b
--- /dev/null
+++ b/gallery/llama3.1-reflective.yaml
@@ -0,0 +1,65 @@
+---
+name: "llama3-instruct"
+
+config_file: |
+  mmap: true
+  cutstrings:
+  - (.*?)</thinking>
+  function:
+    disable_no_action: true
+    grammar:
+      disable: true
+    response_regex:
+    - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
+  template:
+    chat_message: |
+      <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content -}}
+      {{ else if .FunctionCall -}}
+      {{ toJson .FunctionCall -}}
+      {{ end -}}
+      <|eot_id|>
+    function: |
+      <|start_header_id|>system<|end_header_id|>
+
+      You have access to the following functions:
+
+      {{range .Functions}}
+      Use the function '{{.Name}}' to '{{.Description}}'
+      {{toJson .Parameters}}
+      {{end}}
+
+      Think very carefully before calling functions.
+      If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
+
+      <function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function>
+
+      Reminder:
+      - If looking for real time information use relevant functions before falling back to searching on internet
+      - Function calls MUST follow the specified format, start with <function= and end with </function>
+      - Required parameters MUST be specified
+      - Only call one function at a time
+      - Put the entire function call reply on one line
+      <|eot_id|>
+      {{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+    chat: |
+      {{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+      <thinking>
+    completion: |
+      {{.Input}}
+  context_size: 8192
+  f16: true
+  stopwords:
+  - <|im_end|>
+  - <dummy32000>
+  - "<|eot_id|>"
+  - <|end_of_text|>
diff --git a/gallery/llama3.2-fcall.yaml b/gallery/llama3.2-fcall.yaml
new file mode 100644
index 00000000..73f370a8
--- /dev/null
+++ b/gallery/llama3.2-fcall.yaml
@@ -0,0 +1,49 @@
+---
+name: "llama3.2-fcall"
+
+config_file: |
+  mmap: true
+  function:
+    json_regex_match:
+    - "(?s)<Output>(.*?)</Output>"
+    capture_llm_results:
+      - (?s)<Thought>(.*?)</Thought>
+    replace_llm_results:
+      - key: (?s)<Thought>(.*?)</Thought>
+        value: ""
+    grammar:
+      properties_order: "name,arguments"
+      function_arguments_key: "arguments"
+  template:
+    chat: |
+      <|start_header_id|>system<|end_header_id|>
+      You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
+      {{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+    chat_message: |
+      <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+      {{ if .FunctionCall -}}
+      {{ else if eq .RoleName "tool" -}}
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content -}}
+      {{ else if .FunctionCall -}}
+      {{ toJson .FunctionCall -}}
+      {{ end -}}
+      <|eot_id|>
+    completion: |
+      {{.Input}}
+    function: |
+      <|start_header_id|>system<|end_header_id|>
+      You are an AI assistant that executes function calls, and these are the tools at your disposal:
+      {{range .Functions}}
+      {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+      {{end}}
+      <|eot_id|>{{.Input}}<|start_header_id|>assistant<|end_header_id|>
+  context_size: 8192
+  f16: true
+  stopwords:
+  - <|im_end|>
+  - <dummy32000>
+  - "<|eot_id|>"
+  - <|end_of_text|>
diff --git a/gallery/llama3.2-quantized.yaml b/gallery/llama3.2-quantized.yaml
new file mode 100644
index 00000000..7e1d2630
--- /dev/null
+++ b/gallery/llama3.2-quantized.yaml
@@ -0,0 +1,55 @@
+---
+name: "llama3.2-quantized"
+
+config_file: |
+  mmap: true
+  function:
+    disable_no_action: true
+    grammar:
+      disable: true
+    response_regex:
+    - \[(?P<name>\w+)\((?P<arguments>.*)\)\]
+    argument_regex:
+    - (?P<key>[^ '\(=,]+)[='"]+(?P<value>[^=,"']+)['"]?
+  template:
+    chat: |
+      <|begin_of_text|><|start_header_id|>system<|end_header_id|>
+      You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
+      {{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+    chat_message: |
+      <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+      {{ if .FunctionCall -}}
+      {{ else if eq .RoleName "tool" -}}
+      The Function was executed and the response was:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content -}}
+      {{ else if .FunctionCall -}}
+      {{ range .FunctionCall }}
+      [{{.FunctionCall.Name}}({{.FunctionCall.Arguments}})]
+      {{ end }}
+      {{ end -}}
+      <|eot_id|>
+    completion: |
+      {{.Input}}
+    function: |
+      <|start_header_id|>system<|end_header_id|>
+      You are an expert in composing functions. You are given a question and a set of possible functions.
+      Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
+      If none of the functions can be used, point it out. If the given question lacks the parameters required by the function, also point it out. You should only return the function call in tools call sections.
+      If you decide to invoke any of the function(s), you MUST put it in the format as follows:
+      [func_name1(params_name1=params_value1,params_name2=params_value2,...),func_name2(params_name1=params_value1,params_name2=params_value2,...)]
+      You SHOULD NOT include any other text in the response.
+      Here is a list of functions in JSON format that you can invoke.
+      {{toJson .Functions}}
+      <|eot_id|><|start_header_id|>user<|end_header_id|>
+      {{.Input}}
+      <|eot_id|><|start_header_id|>assistant<|end_header_id|>
+  context_size: 8192
+  f16: true
+  stopwords:
+  - <|im_end|>
+  - <dummy32000>
+  - "<|eot_id|>"
+  - <|end_of_text|>
diff --git a/gallery/moondream.yaml b/gallery/moondream.yaml
index a7599671..d3511f20 100644
--- a/gallery/moondream.yaml
+++ b/gallery/moondream.yaml
@@ -3,7 +3,6 @@ name: "moondream2"
 
 
 config_file: |
-    backend: llama-cpp
     context_size: 2046
     roles:
       user: "\nQuestion: "
diff --git a/gallery/phi-3-vision.yaml b/gallery/phi-3-vision.yaml
new file mode 100644
index 00000000..682e3b4f
--- /dev/null
+++ b/gallery/phi-3-vision.yaml
@@ -0,0 +1,25 @@
+---
+name: "phi3-vision"
+
+config_file: |
+  name: phi3-vision
+  backend: vllm
+  parameters:
+    model: microsoft/Phi-3-vision-128k-instruct
+  trust_remote_code: true
+  max_model_len: 32768
+  template:
+    chat_message: |-
+        <|{{ .RoleName }}|>
+        {{.Content}}<|end|>
+    chat: >-
+      {{.Input}}
+
+      <|assistant|>
+
+    completion: |
+        {{.Input}}
+    use_tokenizer_template: false
+    multimodal: "{{ range .Images }}<|image_{{ add1 .ID}}|>{{end}}\n{{.Text}}"
+    # XXX: The one below can be dropped after a new release is out
+    image: "<|image_{{ add1 .ID }}|>\n{{.Text}}"
diff --git a/gallery/phi-4-chat-fcall.yaml b/gallery/phi-4-chat-fcall.yaml
new file mode 100644
index 00000000..23c2e53d
--- /dev/null
+++ b/gallery/phi-4-chat-fcall.yaml
@@ -0,0 +1,37 @@
+---
+name: "phi-4-chat"
+
+config_file: |
+  mmap: true
+  function:
+    json_regex_match:
+    - "(?s)<Output>(.*?)</Output>"
+    capture_llm_results:
+      - (?s)<Thought>(.*?)</Thought>
+    replace_llm_results:
+      - key: (?s)<Thought>(.*?)</Thought>
+        value: ""
+    grammar:
+      properties_order: "name,arguments"
+  template:
+    chat_message: |
+      <|im_start|>{{ .RoleName }}<|im_sep|>
+      {{.Content}}<|im_end|>
+    chat: |
+      {{.Input}}
+      <|im_start|>assistant<|im_sep|>
+    completion: |
+      {{.Input}}
+    function: |
+      <|im_start|>system<|im_sep|>
+      You are an AI assistant that executes function calls, and these are the tools at your disposal:
+      {{range .Functions}}
+      {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+      {{end}}
+      {{.Input}}<|im_end|>
+  context_size: 4096
+  f16: true
+  stopwords:
+  - <|end|>
+  - <|endoftext|>
+  - <|im_end|>
diff --git a/gallery/phi-4-chat.yaml b/gallery/phi-4-chat.yaml
new file mode 100644
index 00000000..1ff0b14a
--- /dev/null
+++ b/gallery/phi-4-chat.yaml
@@ -0,0 +1,20 @@
+---
+name: "phi-4-chat"
+
+config_file: |
+  mmap: true
+  template:
+    chat_message: |
+      <|im_start|>{{ .RoleName }}<|im_sep|>
+      {{.Content}}<|im_end|>
+    chat: |
+      {{.Input}}
+      <|im_start|>assistant<|im_sep|>
+    completion: |
+      {{.Input}}
+  context_size: 4096
+  f16: true
+  stopwords:
+  - <|end|>
+  - <|endoftext|>
+  - <|im_end|>
diff --git a/gallery/rwkv.yaml b/gallery/rwkv.yaml
new file mode 100644
index 00000000..68693799
--- /dev/null
+++ b/gallery/rwkv.yaml
@@ -0,0 +1,24 @@
+---
+name: "rwkv"
+
+config_file: |
+    parameters:
+      top_k: 80
+      temperature: 0.9
+      max_tokens: 4098
+      top_p: 0.8
+    context_size: 4098
+
+    roles:
+      user: "User: "
+      system: "System: "
+      assistant: "Assistant: "
+
+    stopwords:
+    - 'Assistant:'
+    - '<s>'
+
+    template:
+      chat: "{{.Input}}\nAssistant: "
+      completion: |
+        {{.Input}}
diff --git a/gallery/sd-ggml.yaml b/gallery/sd-ggml.yaml
new file mode 100644
index 00000000..d819eba8
--- /dev/null
+++ b/gallery/sd-ggml.yaml
@@ -0,0 +1,12 @@
+---
+name: "sd-ggml"
+
+config_file: |
+    backend: stablediffusion-ggml
+    step: 25
+    cfg_scale: 4.5
+    options:
+    - "clip_l_path:clip_l.safetensors"
+    - "clip_g_path:clip_g.safetensors"
+    - "t5xxl_path:t5xxl-Q5_0.gguf"
+    - "sampler:euler"
diff --git a/gallery/tinydream.yaml b/gallery/tinydream.yaml
deleted file mode 100644
index e4a79ad7..00000000
--- a/gallery/tinydream.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
----
-name: "tinydream"
-
-config_file: |
-  name: tinydream
-  backend: tinydream
-  parameters:
-    model: tinydream_assets
-
-files:
-  - filename: "tinydream_assets/AutoencoderKL-fp16.bin"
-    sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
-    uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.bin"
-  - filename: "tinydream_assets/AutoencoderKL-fp16.param"
-    sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
-    uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.param"
-  - filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.bin"
-    sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
-    uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.bin"
-  - filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.param"
-    sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
-    uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.param"
-  - filename: "tinydream_assets/RealESRGAN_x4plus_anime.bin"
-    sha256: "fe01c269cfd10cdef8e018ab66ebe750cf79c7af4d1f9c16c737e1295229bacc"
-    uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.bin"
-  - filename: "tinydream_assets/RealESRGAN_x4plus_anime.param"
-    sha256: "2b8fb6e0ae4d2d85704ca08c119a2f5ea40add4f2ecd512eb7f4cd44b6127ed4"
-    uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.param"
-  - filename: "tinydream_assets/UNetModel-fp16.bin"
-    sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
-    uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.bin"
-  - filename: "tinydream_assets/UNetModel-fp16.param"
-    sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
-    uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.param"
-  - filename: "tinydream_assets/vocab.txt"
-    sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
-    uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/vocab.txt"
diff --git a/gallery/vicuna-chat.yaml b/gallery/vicuna-chat.yaml
index 05600e66..68310549 100644
--- a/gallery/vicuna-chat.yaml
+++ b/gallery/vicuna-chat.yaml
@@ -14,6 +14,10 @@ config_file: |
       system: "System: "
       assistant: "Assistant: "
     f16: true
+    stopwords:
+    - <|end|>
+    - <|endoftext|>
+    - <eos>
     template:
       completion: |
         Complete the following sentence: {{.Input}}
diff --git a/go.mod b/go.mod
index a3359abf..adfa7357 100644
--- a/go.mod
+++ b/go.mod
@@ -1,20 +1,19 @@
 module github.com/mudler/LocalAI
 
-go 1.22.0
+go 1.23
 
-toolchain go1.22.4
+toolchain go1.23.1
 
 require (
-	dario.cat/mergo v1.0.0
-	github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9
-	github.com/Masterminds/sprig/v3 v3.2.3
+	dario.cat/mergo v1.0.1
+	github.com/Masterminds/sprig/v3 v3.3.0
 	github.com/alecthomas/kong v0.9.0
 	github.com/census-instrumentation/opencensus-proto v0.4.1
 	github.com/charmbracelet/glamour v0.7.0
 	github.com/chasefleming/elem-go v0.26.0
-	github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b
+	github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20
 	github.com/containerd/containerd v1.7.19
-	github.com/donomii/go-rwkv.cpp v0.0.0-20240228065144-661e7ae26d44
+	github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2
 	github.com/elliotchance/orderedmap/v2 v2.2.0
 	github.com/fsnotify/fsnotify v1.7.0
 	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240626202019-c118733a29ad
@@ -24,6 +23,7 @@ require (
 	github.com/gofiber/fiber/v2 v2.52.5
 	github.com/gofiber/swagger v1.0.0
 	github.com/gofiber/template/html/v2 v2.1.2
+	github.com/gofiber/websocket/v2 v2.2.1
 	github.com/gofrs/flock v0.12.1
 	github.com/golang/protobuf v1.5.4
 	github.com/google/go-containerregistry v0.19.2
@@ -33,92 +33,101 @@ require (
 	github.com/ipfs/go-log v1.0.5
 	github.com/jaypipes/ghw v0.12.0
 	github.com/joho/godotenv v1.5.1
-	github.com/klauspost/cpuid/v2 v2.2.8
-	github.com/libp2p/go-libp2p v0.36.2
+	github.com/klauspost/cpuid/v2 v2.2.9
+	github.com/libp2p/go-libp2p v0.38.1
 	github.com/mholt/archiver/v3 v3.5.1
 	github.com/microcosm-cc/bluemonday v1.0.26
-	github.com/mudler/edgevpn v0.28.3
+	github.com/mudler/edgevpn v0.29.0
 	github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82
 	github.com/mudler/go-stable-diffusion v0.0.0-20240429204715-4a3cd6aeae6f
-	github.com/onsi/ginkgo/v2 v2.20.1
-	github.com/onsi/gomega v1.34.1
+	github.com/onsi/ginkgo/v2 v2.22.2
+	github.com/onsi/gomega v1.36.2
+	github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e
 	github.com/ory/dockertest/v3 v3.10.0
 	github.com/otiai10/openaigo v1.7.0
 	github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
-	github.com/prometheus/client_golang v1.20.0
+	github.com/prometheus/client_golang v1.20.5
 	github.com/rs/zerolog v1.33.0
 	github.com/russross/blackfriday v1.6.0
 	github.com/sashabaranov/go-openai v1.26.2
 	github.com/schollz/progressbar/v3 v3.14.4
 	github.com/shirou/gopsutil/v3 v3.24.5
-	github.com/stretchr/testify v1.9.0
+	github.com/streamer45/silero-vad-go v0.2.1
+	github.com/stretchr/testify v1.10.0
 	github.com/swaggo/swag v1.16.3
 	github.com/thxcode/gguf-parser-go v0.1.0
 	github.com/tmc/langchaingo v0.1.12
 	github.com/valyala/fasthttp v1.55.0
-	go.opentelemetry.io/otel v1.28.0
+	go.opentelemetry.io/otel v1.31.0
 	go.opentelemetry.io/otel/exporters/prometheus v0.50.0
-	go.opentelemetry.io/otel/metric v1.28.0
+	go.opentelemetry.io/otel/metric v1.31.0
 	go.opentelemetry.io/otel/sdk/metric v1.28.0
 	google.golang.org/api v0.180.0
-	google.golang.org/grpc v1.65.0
-	google.golang.org/protobuf v1.34.2
+	google.golang.org/grpc v1.67.1
+	google.golang.org/protobuf v1.36.1
 	gopkg.in/yaml.v2 v2.4.0
 	gopkg.in/yaml.v3 v3.0.1
 	oras.land/oras-go/v2 v2.5.0
 )
 
 require (
-	cel.dev/expr v0.15.0 // indirect
+	cel.dev/expr v0.16.0 // indirect
 	cloud.google.com/go/auth v0.4.1 // indirect
 	cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect
-	cloud.google.com/go/compute/metadata v0.3.0 // indirect
-	github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
-	github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2 // indirect
-	github.com/envoyproxy/protoc-gen-validate v1.0.4 // indirect
+	cloud.google.com/go/compute/metadata v0.5.0 // indirect
+	github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect
+	github.com/dustin/go-humanize v1.0.1 // indirect
+	github.com/envoyproxy/protoc-gen-validate v1.1.0 // indirect
+	github.com/fasthttp/websocket v1.5.3 // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
 	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
 	github.com/go-viper/mapstructure/v2 v2.0.0 // indirect
 	github.com/google/s2a-go v0.1.7 // indirect
 	github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
 	github.com/googleapis/gax-go/v2 v2.12.4 // indirect
-	github.com/labstack/echo/v4 v4.12.0 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/labstack/echo/v4 v4.13.3 // indirect
 	github.com/labstack/gommon v0.4.2 // indirect
 	github.com/moby/docker-image-spec v1.3.1 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
-	github.com/pion/datachannel v1.5.8 // indirect
+	github.com/nikolalohinski/gonja/v2 v2.3.2 // indirect
+	github.com/pion/datachannel v1.5.10 // indirect
 	github.com/pion/dtls/v2 v2.2.12 // indirect
-	github.com/pion/ice/v2 v2.3.34 // indirect
-	github.com/pion/interceptor v0.1.30 // indirect
+	github.com/pion/ice/v2 v2.3.37 // indirect
+	github.com/pion/interceptor v0.1.37 // indirect
 	github.com/pion/logging v0.2.2 // indirect
 	github.com/pion/mdns v0.0.12 // indirect
 	github.com/pion/randutil v0.1.0 // indirect
-	github.com/pion/rtcp v1.2.14 // indirect
-	github.com/pion/rtp v1.8.9 // indirect
-	github.com/pion/sctp v1.8.33 // indirect
+	github.com/pion/rtcp v1.2.15 // indirect
+	github.com/pion/rtp v1.8.10 // indirect
+	github.com/pion/sctp v1.8.35 // indirect
 	github.com/pion/sdp/v3 v3.0.9 // indirect
 	github.com/pion/srtp/v2 v2.0.20 // indirect
 	github.com/pion/stun v0.6.1 // indirect
 	github.com/pion/transport/v2 v2.2.10 // indirect
+	github.com/pion/transport/v3 v3.0.7 // indirect
 	github.com/pion/turn/v2 v2.1.6 // indirect
-	github.com/pion/webrtc/v3 v3.3.0 // indirect
+	github.com/pion/webrtc/v3 v3.3.5 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
+	github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect
 	github.com/shirou/gopsutil/v4 v4.24.7 // indirect
-	github.com/urfave/cli/v2 v2.27.4 // indirect
+	github.com/urfave/cli/v2 v2.27.5 // indirect
 	github.com/valyala/fasttemplate v1.2.2 // indirect
-	github.com/wlynxg/anet v0.0.4 // indirect
+	github.com/wlynxg/anet v0.0.5 // indirect
 	github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
-	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.52.0 // indirect
-	go.uber.org/mock v0.4.0 // indirect
-	golang.org/x/oauth2 v0.21.0 // indirect
-	google.golang.org/genproto/googleapis/api v0.0.0-20240617180043-68d350f18fd4 // indirect
+	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 // indirect
+	go.uber.org/mock v0.5.0 // indirect
+	golang.org/x/oauth2 v0.24.0 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9 // indirect
 )
 
 require (
 	github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
 	github.com/KyleBanks/depth v1.2.1 // indirect
 	github.com/Masterminds/goutils v1.1.1 // indirect
-	github.com/Masterminds/semver/v3 v3.2.0 // indirect
+	github.com/Masterminds/semver/v3 v3.3.0 // indirect
 	github.com/Microsoft/go-winio v0.6.2 // indirect
 	github.com/Microsoft/hcsshim v0.11.7 // indirect
 	github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect
@@ -138,7 +147,7 @@ require (
 	github.com/containerd/log v0.1.0 // indirect
 	github.com/containerd/stargz-snapshotter/estargz v0.14.3 // indirect
 	github.com/coreos/go-systemd/v22 v22.5.0 // indirect
-	github.com/creachadair/otp v0.4.2 // indirect
+	github.com/creachadair/otp v0.5.0 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c // indirect
 	github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0 // indirect
@@ -154,7 +163,7 @@ require (
 	github.com/flynn/noise v1.1.0 // indirect
 	github.com/francoispqt/gojay v1.2.13 // indirect
 	github.com/ghodss/yaml v1.0.0 // indirect
-	github.com/go-audio/audio v1.0.0 // indirect
+	github.com/go-audio/audio v1.0.0
 	github.com/go-audio/riff v1.0.0 // indirect
 	github.com/go-logr/logr v1.4.2 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
@@ -173,7 +182,7 @@ require (
 	github.com/google/btree v1.1.2 // indirect
 	github.com/google/go-cmp v0.6.0 // indirect
 	github.com/google/gopacket v1.1.19 // indirect
-	github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 // indirect
+	github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect
 	github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
 	github.com/gorilla/css v1.0.1 // indirect
 	github.com/gorilla/websocket v1.5.3 // indirect
@@ -182,10 +191,9 @@ require (
 	github.com/hashicorp/golang-lru v1.0.2 // indirect
 	github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
 	github.com/henvic/httpretty v0.1.3 // indirect
-	github.com/huandu/xstrings v1.3.3 // indirect
+	github.com/huandu/xstrings v1.5.0 // indirect
 	github.com/huin/goupnp v1.3.0 // indirect
-	github.com/imdario/mergo v0.3.16 // indirect
-	github.com/ipfs/boxo v0.21.0 // indirect
+	github.com/ipfs/boxo v0.24.3 // indirect
 	github.com/ipfs/go-cid v0.4.1 // indirect
 	github.com/ipfs/go-datastore v0.6.0 // indirect
 	github.com/ipfs/go-log/v2 v2.5.1 // indirect
@@ -195,21 +203,21 @@ require (
 	github.com/jbenet/go-temp-err-catcher v0.1.0 // indirect
 	github.com/jbenet/goprocess v0.1.4 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
-	github.com/klauspost/compress v1.17.9 // indirect
+	github.com/klauspost/compress v1.17.11 // indirect
 	github.com/klauspost/pgzip v1.2.5 // indirect
 	github.com/koron/go-ssdp v0.0.4 // indirect
 	github.com/libp2p/go-buffer-pool v0.1.0 // indirect
 	github.com/libp2p/go-cidranger v1.1.0 // indirect
-	github.com/libp2p/go-flow-metrics v0.1.0 // indirect
+	github.com/libp2p/go-flow-metrics v0.2.0 // indirect
 	github.com/libp2p/go-libp2p-asn-util v0.4.1 // indirect
-	github.com/libp2p/go-libp2p-kad-dht v0.26.1 // indirect
-	github.com/libp2p/go-libp2p-kbucket v0.6.3 // indirect
+	github.com/libp2p/go-libp2p-kad-dht v0.28.1 // indirect
+	github.com/libp2p/go-libp2p-kbucket v0.6.4 // indirect
 	github.com/libp2p/go-libp2p-pubsub v0.12.0 // indirect
 	github.com/libp2p/go-libp2p-record v0.2.0 // indirect
 	github.com/libp2p/go-libp2p-routing-helpers v0.7.4 // indirect
 	github.com/libp2p/go-msgio v0.3.0 // indirect
 	github.com/libp2p/go-nat v0.2.0 // indirect
-	github.com/libp2p/go-netroute v0.2.1 // indirect
+	github.com/libp2p/go-netroute v0.2.2 // indirect
 	github.com/libp2p/go-reuseport v0.4.0 // indirect
 	github.com/libp2p/go-yamux/v4 v4.0.1 // indirect
 	github.com/libp2p/zeroconf/v2 v2.2.0 // indirect
@@ -231,19 +239,19 @@ require (
 	github.com/moby/sys/sequential v0.5.0 // indirect
 	github.com/moby/term v0.5.0 // indirect
 	github.com/mr-tron/base58 v1.2.0 // indirect
-	github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d
+	github.com/mudler/go-piper v0.0.0-20241022074816-3854e0221ffb
 	github.com/mudler/water v0.0.0-20221010214108-8c7313014ce0 // indirect
 	github.com/muesli/reflow v0.3.0 // indirect
 	github.com/muesli/termenv v0.15.2 // indirect
 	github.com/multiformats/go-base32 v0.1.0 // indirect
 	github.com/multiformats/go-base36 v0.2.0 // indirect
-	github.com/multiformats/go-multiaddr v0.13.0 // indirect
-	github.com/multiformats/go-multiaddr-dns v0.3.1 // indirect
+	github.com/multiformats/go-multiaddr v0.14.0
+	github.com/multiformats/go-multiaddr-dns v0.4.1 // indirect
 	github.com/multiformats/go-multiaddr-fmt v0.1.0 // indirect
 	github.com/multiformats/go-multibase v0.2.0 // indirect
 	github.com/multiformats/go-multicodec v0.9.0 // indirect
 	github.com/multiformats/go-multihash v0.2.3 // indirect
-	github.com/multiformats/go-multistream v0.5.0 // indirect
+	github.com/multiformats/go-multistream v0.6.0 // indirect
 	github.com/multiformats/go-varint v0.0.7 // indirect
 	github.com/nwaples/rardecode v1.1.0 // indirect
 	github.com/olekukonko/tablewriter v0.0.5 // indirect
@@ -262,20 +270,20 @@ require (
 	github.com/polydawn/refmt v0.89.0 // indirect
 	github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
 	github.com/prometheus/client_model v0.6.1 // indirect
-	github.com/prometheus/common v0.55.0 // indirect
+	github.com/prometheus/common v0.61.0 // indirect
 	github.com/prometheus/procfs v0.15.1 // indirect
-	github.com/quic-go/qpack v0.4.0 // indirect
-	github.com/quic-go/quic-go v0.46.0 // indirect
-	github.com/quic-go/webtransport-go v0.8.0 // indirect
+	github.com/quic-go/qpack v0.5.1 // indirect
+	github.com/quic-go/quic-go v0.48.2 // indirect
+	github.com/quic-go/webtransport-go v0.8.1-0.20241018022711-4ac2c9250e66 // indirect
 	github.com/raulk/go-watchdog v1.3.0 // indirect
 	github.com/rivo/uniseg v0.4.7 // indirect
 	github.com/shoenig/go-m1cpu v0.1.6 // indirect
-	github.com/shopspring/decimal v1.3.1 // indirect
+	github.com/shopspring/decimal v1.4.0 // indirect
 	github.com/sirupsen/logrus v1.9.3 // indirect
 	github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b // indirect
 	github.com/songgao/packets v0.0.0-20160404182456-549a10cd4091 // indirect
 	github.com/spaolacci/murmur3 v1.1.0 // indirect
-	github.com/spf13/cast v1.5.0 // indirect
+	github.com/spf13/cast v1.7.0 // indirect
 	github.com/swaggo/files/v2 v2.0.0 // indirect
 	github.com/tinylib/msgp v1.1.8 // indirect
 	github.com/tklauser/go-sysconf v0.3.14 // indirect
@@ -295,26 +303,26 @@ require (
 	github.com/yuin/goldmark-emoji v1.0.2 // indirect
 	github.com/yusufpapurcu/wmi v1.2.4 // indirect
 	go.opencensus.io v0.24.0 // indirect
-	go.opentelemetry.io/otel/sdk v1.28.0 // indirect
-	go.opentelemetry.io/otel/trace v1.28.0 // indirect
+	go.opentelemetry.io/otel/sdk v1.31.0 // indirect
+	go.opentelemetry.io/otel/trace v1.31.0 // indirect
 	go.uber.org/dig v1.18.0 // indirect
-	go.uber.org/fx v1.22.2 // indirect
+	go.uber.org/fx v1.23.0 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	go.uber.org/zap v1.27.0 // indirect
-	golang.org/x/crypto v0.26.0 // indirect
-	golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa // indirect
-	golang.org/x/mod v0.20.0 // indirect
-	golang.org/x/net v0.28.0 // indirect
-	golang.org/x/sync v0.8.0 // indirect
-	golang.org/x/sys v0.24.0 // indirect
-	golang.org/x/term v0.23.0 // indirect
-	golang.org/x/text v0.17.0 // indirect
-	golang.org/x/tools v0.24.0 // indirect
+	golang.org/x/crypto v0.31.0 // indirect
+	golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67 // indirect
+	golang.org/x/mod v0.22.0 // indirect
+	golang.org/x/net v0.33.0 // indirect
+	golang.org/x/sync v0.10.0 // indirect
+	golang.org/x/sys v0.29.0 // indirect
+	golang.org/x/term v0.27.0 // indirect
+	golang.org/x/text v0.21.0 // indirect
+	golang.org/x/tools v0.28.0 // indirect
 	golang.zx2c4.com/wintun v0.0.0-20211104114900-415007cec224 // indirect
 	golang.zx2c4.com/wireguard v0.0.0-20220703234212-c31a7b1ab478 // indirect
 	golang.zx2c4.com/wireguard/windows v0.5.3 // indirect
 	gonum.org/v1/gonum v0.15.0 // indirect
-	google.golang.org/genproto/googleapis/rpc v0.0.0-20240617180043-68d350f18fd4 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20241007155032-5fefd90f89a9 // indirect
 	gopkg.in/fsnotify.v1 v1.4.7 // indirect
 	gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
 	howett.net/plist v1.0.0 // indirect
diff --git a/go.sum b/go.sum
index 1dd44a5b..4a744ed8 100644
--- a/go.sum
+++ b/go.sum
@@ -1,5 +1,7 @@
 cel.dev/expr v0.15.0 h1:O1jzfJCQBfL5BFoYktaxwIhuttaQPsVWerH9/EEKx0w=
 cel.dev/expr v0.15.0/go.mod h1:TRSuuV7DlVCE/uwv5QbAiW/v8l5O8C4eEPHeu7gf7Sg=
+cel.dev/expr v0.16.0 h1:yloc84fytn4zmJX2GU3TkXGsaieaV7dQ057Qs4sIG2Y=
+cel.dev/expr v0.16.0/go.mod h1:TRSuuV7DlVCE/uwv5QbAiW/v8l5O8C4eEPHeu7gf7Sg=
 cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 cloud.google.com/go v0.31.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
@@ -10,8 +12,10 @@ cloud.google.com/go/auth/oauth2adapt v0.2.2 h1:+TTV8aXpjeChS9M+aTtN/TjdQnzJvmzKF
 cloud.google.com/go/auth/oauth2adapt v0.2.2/go.mod h1:wcYjgpZI9+Yu7LyYBg4pqSiaRkfEK3GQcpb7C/uyF1Q=
 cloud.google.com/go/compute/metadata v0.3.0 h1:Tz+eQXMEqDIKRsmY3cHTL6FVaynIjX2QxYC4trgAKZc=
 cloud.google.com/go/compute/metadata v0.3.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k=
-dario.cat/mergo v1.0.0 h1:AGCNq9Evsj31mOgNPcLyXc+4PNABt905YmuqPYYpBWk=
-dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
+cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY=
+cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY=
+dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
+dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
 dmitri.shuralyov.com/app/changes v0.0.0-20180602232624-0a106ad413e3/go.mod h1:Yl+fi1br7+Rr3LqpNJf1/uxUdtRUV+Tnj0o93V2B9MU=
 dmitri.shuralyov.com/html/belt v0.0.0-20180602232347-f7d459c86be0/go.mod h1:JLBrvjyP0v+ecvNYvCpyZgu5/xkfAUhi6wJj28eUfSU=
 dmitri.shuralyov.com/service/change v0.0.0-20181023043359-a85b471d5412/go.mod h1:a1inKt/atXimZ4Mv927x+r7UpyzRUf4emIoiiSC2TN4=
@@ -23,14 +27,12 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03
 github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
 github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
 github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
-github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9 h1:ASsbvw7wQPldWpwKdmYRszJ2A8Cj3oJDr4zO0DiXvN4=
-github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0=
 github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
 github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
-github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g=
-github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
-github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA=
-github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM=
+github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0=
+github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
+github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
+github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
 github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
 github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
 github.com/Microsoft/hcsshim v0.11.7 h1:vl/nj3Bar/CvJSYo7gIQPyRWc9f3c6IeSNavBTSZNZQ=
@@ -82,6 +84,8 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk
 github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
 github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b h1:ga8SEFjZ60pxLcmhnThWgvH2wg8376yUJmPhEH4H3kw=
 github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
+github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 h1:N+3sFI5GUjRKBi+i0TxYVST9h4Ie192jJWpHvthBBgg=
+github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
 github.com/containerd/cgroups v0.0.0-20201119153540-4cbc285b3327/go.mod h1:ZJeTFisyysqgcCdecO57Dj79RfL0LNeGiFUqLYQRYLE=
 github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM=
 github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw=
@@ -102,12 +106,12 @@ github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV
 github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
-github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
-github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
-github.com/creachadair/mds v0.7.0 h1:7QoYqiPl18C0h7CLq9z9/qUH5Vr62V9677yJZHGLoQM=
-github.com/creachadair/mds v0.7.0/go.mod h1:4vrFYUzTXMJpMBU+OA292I6IUxKWCCfZkgXg+/kBZMo=
-github.com/creachadair/otp v0.4.2 h1:ngNMaD6Tzd7UUNRFyed7ykZFn/Wr5sSs5ffqZWm9pu8=
-github.com/creachadair/otp v0.4.2/go.mod h1:DqV9hJyUbcUme0pooYfiFvvMe72Aua5sfhNzwfZvk40=
+github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc=
+github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/creachadair/mds v0.21.3 h1:RRgEAPIb52cU0q7UxGyN+13QlCVTZIL4slRr0cYYQfA=
+github.com/creachadair/mds v0.21.3/go.mod h1:1ltMWZd9yXhaHEoZwBialMaviWVUpRPvMwVP7saFAzM=
+github.com/creachadair/otp v0.5.0 h1:q3Th7CXm2zlmCdBjw5tEPFOj4oWJMnVL5HXlq0sNKS0=
+github.com/creachadair/otp v0.5.0/go.mod h1:0kceI87EnYFNYSTL121goJVAnk3eJhaed9H0nMuJUkA=
 github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
 github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
 github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2 h1:flLYmnQFZNo04x2NPehMbf30m7Pli57xwZ0NFqR/hb0=
@@ -142,6 +146,8 @@ github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L
 github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
 github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
+github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
+github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
 github.com/elastic/gosigar v0.12.0/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
 github.com/elastic/gosigar v0.14.3 h1:xwkKwPia+hSfg9GqrCUKYdId102m9qTJIIr7egmK/uo=
 github.com/elastic/gosigar v0.14.3/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
@@ -153,6 +159,10 @@ github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1m
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
 github.com/envoyproxy/protoc-gen-validate v1.0.4 h1:gVPz/FMfvh57HdSJQyvBtF00j8JU4zdyUgIUNhlgg0A=
 github.com/envoyproxy/protoc-gen-validate v1.0.4/go.mod h1:qys6tmnRsYrQqIhm2bvKZH4Blx/1gTIZ2UKVY1M+Yew=
+github.com/envoyproxy/protoc-gen-validate v1.1.0 h1:tntQDh69XqOCOZsDz0lVJQez/2L6Uu2PdjCQwWCJ3bM=
+github.com/envoyproxy/protoc-gen-validate v1.1.0/go.mod h1:sXRDRVmzEbkM7CVcM06s9shE/m23dg3wzjl0UWqJ2q4=
+github.com/fasthttp/websocket v1.5.3 h1:TPpQuLwJYfd4LJPXvHDYPMFWbLjsT91n3GpWtCQtdek=
+github.com/fasthttp/websocket v1.5.3/go.mod h1:46gg/UBmTU1kUaTcwQXpUxtRwG2PvIZYeA8oL6vF3Fs=
 github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
 github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc=
@@ -221,6 +231,8 @@ github.com/gofiber/template/html/v2 v2.1.2 h1:wkK/mYJ3nIhongTkG3t0QgV4ADdgOYJYVS
 github.com/gofiber/template/html/v2 v2.1.2/go.mod h1:E98Z/FzvpaSib06aWEgYk6GXNf3ctoyaJH8yW5ay5ak=
 github.com/gofiber/utils v1.1.0 h1:vdEBpn7AzIUJRhe+CiTOJdUcTg4Q9RK+pEa0KPbLdrM=
 github.com/gofiber/utils v1.1.0/go.mod h1:poZpsnhBykfnY1Mc0KeEa6mSHrS3dV0+oBWyeQmb2e0=
+github.com/gofiber/websocket/v2 v2.2.1 h1:C9cjxvloojayOp9AovmpQrk8VqvVnT8Oao3+IUygH7w=
+github.com/gofiber/websocket/v2 v2.2.1/go.mod h1:Ao/+nyNnX5u/hIFPuHl28a+NIkrqK7PRimyKaj4JxVU=
 github.com/gofrs/flock v0.12.1 h1:MTLVXXHf8ekldpJk3AKicLij9MdwOWkZ+a/jHHZby9E=
 github.com/gofrs/flock v0.12.1/go.mod h1:9zxTsyu5xtJ9DK+1tFZyibEV7y3uwDxPPfbxeeHCoD0=
 github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
@@ -266,18 +278,20 @@ github.com/google/go-containerregistry v0.19.2 h1:TannFKE1QSajsP6hPWb5oJNgKe1IKj
 github.com/google/go-containerregistry v0.19.2/go.mod h1:YCMFNQeeXeLF+dnhhWkqDItx/JSkH01j1Kis4PsjzFI=
 github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ=
 github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/gopacket v1.1.19 h1:ves8RnFZPGiFnTS0uPQStjwru6uO6h+nlr9j6fL7kF8=
 github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo=
 github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
 github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
-github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 h1:FKHo8hFI3A+7w0aUQuYXQ+6EN5stWmeY/AZqtM8xk9k=
-github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo=
+github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo=
+github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
+github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad h1:a6HEuzUHeKH6hwfN/ZoQgRgVIWFJljSWa/zetS2WTvg=
+github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
 github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o=
 github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw=
 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
-github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
@@ -313,15 +327,14 @@ github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUq
 github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
 github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
 github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
-github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4=
-github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
+github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
+github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
 github.com/huin/goupnp v1.3.0 h1:UvLUlWDNpoUdYzb2TCn+MuTWtcjXKSza2n6CBdQ0xXc=
 github.com/huin/goupnp v1.3.0/go.mod h1:gnGPsThkYa7bFi/KWmEysQRf48l2dvR5bxr2OFckNX8=
-github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
-github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
-github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
 github.com/ipfs/boxo v0.21.0 h1:XpGXb+TQQ0IUdYaeAxGzWjSs6ow/Lce148A/2IbRDVE=
 github.com/ipfs/boxo v0.21.0/go.mod h1:NmweAYeY1USOaJJxouy7DLr/Y5M8UBSsCI2KRivO+TY=
+github.com/ipfs/boxo v0.24.3 h1:gldDPOWdM3Rz0v5LkVLtZu7A7gFNvAlWcmxhCqlHR3c=
+github.com/ipfs/boxo v0.24.3/go.mod h1:h0DRzOY1IBFDHp6KNvrJLMFdSXTYID0Zf+q7X05JsNg=
 github.com/ipfs/go-cid v0.4.1 h1:A/T3qGvxi4kpKWWcPC/PgbvDA2bjVLO7n4UeVwnbs/s=
 github.com/ipfs/go-cid v0.4.1/go.mod h1:uQHwDeX4c6CtyrFwdqyhpNcxVewur1M7l7fNU7LKwZk=
 github.com/ipfs/go-datastore v0.6.0 h1:JKyz+Gvz1QEZw0LsX1IBn+JFCJQH4SJVFtM4uWU0Myk=
@@ -355,6 +368,8 @@ github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwA
 github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
 github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
 github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
 github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
 github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
 github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
@@ -366,9 +381,13 @@ github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0
 github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
 github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
 github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
+github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
+github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
 github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
 github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM=
 github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
+github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY=
+github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8=
 github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
 github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
 github.com/koron/go-ssdp v0.0.4 h1:1IDwrghSKYM7yLf7XCzbByg2sJ/JcNOZRXS2jczTwz0=
@@ -384,8 +403,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
 github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
-github.com/labstack/echo/v4 v4.12.0 h1:IKpw49IMryVB2p1a4dzwlhP1O2Tf2E0Ir/450lH+kI0=
-github.com/labstack/echo/v4 v4.12.0/go.mod h1:UP9Cr2DJXbOK3Kr9ONYzNowSh7HP0aG0ShAyycHSJvM=
+github.com/labstack/echo/v4 v4.13.3 h1:pwhpCPrTl5qry5HRdM5FwdXnhXSLSY+WE+YQSeCaafY=
+github.com/labstack/echo/v4 v4.13.3/go.mod h1:o90YNEeQWjDozo584l7AwhJMHN0bOC4tAfg+Xox9q5g=
 github.com/labstack/gommon v0.4.2 h1:F8qTUNXgG1+6WQmqoUWnz8WiEU60mXVVw0P4ht1WRA0=
 github.com/labstack/gommon v0.4.2/go.mod h1:QlUFxVM+SNXhDL/Z7YhocGIBYOiwB0mXm1+1bAPHPyU=
 github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2 h1:hRGSmZu7j271trc9sneMrpOW7GN5ngLm8YUZIPzf394=
@@ -396,14 +415,22 @@ github.com/libp2p/go-cidranger v1.1.0 h1:ewPN8EZ0dd1LSnrtuwd4709PXVcITVeuwbag38y
 github.com/libp2p/go-cidranger v1.1.0/go.mod h1:KWZTfSr+r9qEo9OkI9/SIEeAtw+NNoU0dXIXt15Okic=
 github.com/libp2p/go-flow-metrics v0.1.0 h1:0iPhMI8PskQwzh57jB9WxIuIOQ0r+15PChFGkx3Q3WM=
 github.com/libp2p/go-flow-metrics v0.1.0/go.mod h1:4Xi8MX8wj5aWNDAZttg6UPmc0ZrnFNsMtpsYUClFtro=
+github.com/libp2p/go-flow-metrics v0.2.0 h1:EIZzjmeOE6c8Dav0sNv35vhZxATIXWZg6j/C08XmmDw=
+github.com/libp2p/go-flow-metrics v0.2.0/go.mod h1:st3qqfu8+pMfh+9Mzqb2GTiwrAGjIPszEjZmtksN8Jc=
 github.com/libp2p/go-libp2p v0.36.2 h1:BbqRkDaGC3/5xfaJakLV/BrpjlAuYqSB0lRvtzL3B/U=
 github.com/libp2p/go-libp2p v0.36.2/go.mod h1:XO3joasRE4Eup8yCTTP/+kX+g92mOgRaadk46LmPhHY=
+github.com/libp2p/go-libp2p v0.38.1 h1:aT1K7IFWi+gZUsQGCzTHBTlKX5QVZQOahng8DnOr6tQ=
+github.com/libp2p/go-libp2p v0.38.1/go.mod h1:QWV4zGL3O9nXKdHirIC59DoRcZ446dfkjbOJ55NEWFo=
 github.com/libp2p/go-libp2p-asn-util v0.4.1 h1:xqL7++IKD9TBFMgnLPZR6/6iYhawHKHl950SO9L6n94=
 github.com/libp2p/go-libp2p-asn-util v0.4.1/go.mod h1:d/NI6XZ9qxw67b4e+NgpQexCIiFYJjErASrYW4PFDN8=
 github.com/libp2p/go-libp2p-kad-dht v0.26.1 h1:AazV3LCImYVkDUGAHx5lIEgZ9iUI2QQKH5GMRQU8uEA=
 github.com/libp2p/go-libp2p-kad-dht v0.26.1/go.mod h1:mqRUGJ/+7ziQ3XknU2kKHfsbbgb9xL65DXjPOJwmZF8=
+github.com/libp2p/go-libp2p-kad-dht v0.28.1 h1:DVTfzG8Ybn88g9RycIq47evWCRss5f0Wm8iWtpwyHso=
+github.com/libp2p/go-libp2p-kad-dht v0.28.1/go.mod h1:0wHURlSFdAC42+wF7GEmpLoARw8JuS8do2guCtc/Y/w=
 github.com/libp2p/go-libp2p-kbucket v0.6.3 h1:p507271wWzpy2f1XxPzCQG9NiN6R6lHL9GiSErbQQo0=
 github.com/libp2p/go-libp2p-kbucket v0.6.3/go.mod h1:RCseT7AH6eJWxxk2ol03xtP9pEHetYSPXOaJnOiD8i0=
+github.com/libp2p/go-libp2p-kbucket v0.6.4 h1:OjfiYxU42TKQSB8t8WYd8MKhYhMJeO2If+NiuKfb6iQ=
+github.com/libp2p/go-libp2p-kbucket v0.6.4/go.mod h1:jp6w82sczYaBsAypt5ayACcRJi0lgsba7o4TzJKEfWA=
 github.com/libp2p/go-libp2p-pubsub v0.12.0 h1:PENNZjSfk8KYxANRlpipdS7+BfLmOl3L2E/6vSNjbdI=
 github.com/libp2p/go-libp2p-pubsub v0.12.0/go.mod h1:Oi0zw9aw8/Y5GC99zt+Ef2gYAl+0nZlwdJonDyOz/sE=
 github.com/libp2p/go-libp2p-record v0.2.0 h1:oiNUOCWno2BFuxt3my4i1frNrt7PerzB3queqa1NkQ0=
@@ -418,6 +445,8 @@ github.com/libp2p/go-nat v0.2.0 h1:Tyz+bUFAYqGyJ/ppPPymMGbIgNRH+WqC5QrT5fKrrGk=
 github.com/libp2p/go-nat v0.2.0/go.mod h1:3MJr+GRpRkyT65EpVPBstXLvOlAPzUVlG6Pwg9ohLJk=
 github.com/libp2p/go-netroute v0.2.1 h1:V8kVrpD8GK0Riv15/7VN6RbUQ3URNZVosw7H2v9tksU=
 github.com/libp2p/go-netroute v0.2.1/go.mod h1:hraioZr0fhBjG0ZRXJJ6Zj2IVEVNx6tDTFQfSmcq7mQ=
+github.com/libp2p/go-netroute v0.2.2 h1:Dejd8cQ47Qx2kRABg6lPwknU7+nBnFRpko45/fFPuZ8=
+github.com/libp2p/go-netroute v0.2.2/go.mod h1:Rntq6jUAH0l9Gg17w5bFGhcC9a+vk4KNXs6s7IljKYE=
 github.com/libp2p/go-reuseport v0.4.0 h1:nR5KU7hD0WxXCJbmw7r2rhRYruNRl2koHw8fQscQm2s=
 github.com/libp2p/go-reuseport v0.4.0/go.mod h1:ZtI03j/wO5hZVDFo2jKywN6bYKWLOy8Se6DrI2E1cLU=
 github.com/libp2p/go-yamux/v4 v4.0.1 h1:FfDR4S1wj6Bw2Pqbc8Uz7pCxeRBPbwsBbEdfwiCypkQ=
@@ -451,7 +480,6 @@ github.com/mholt/archiver/v3 v3.5.1/go.mod h1:e3dqJ7H78uzsRSEACH1joayhuSyhnonssn
 github.com/microcosm-cc/bluemonday v1.0.1/go.mod h1:hsXNsILzKxV+sX77C5b8FSuKF00vh2OMYv+xgHpAMF4=
 github.com/microcosm-cc/bluemonday v1.0.26 h1:xbqSvqzQMeEHCqMi64VAs4d8uy6Mequs3rQ0k/Khz58=
 github.com/microcosm-cc/bluemonday v1.0.26/go.mod h1:JyzOCs9gkyQyjs+6h10UEVSe02CGwkhd72Xdqh78TWs=
-github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI=
 github.com/miekg/dns v1.1.43/go.mod h1:+evo5L0630/F6ca/Z9+GAqzhjGyn8/c+TBaOyfEl0V4=
 github.com/miekg/dns v1.1.62 h1:cN8OuEF1/x5Rq6Np+h1epln8OiyPWV+lROx9LxcGgIQ=
 github.com/miekg/dns v1.1.62/go.mod h1:mvDlcItzm+br7MToIKqkglaGhlFMHJ9DTNNWONWXbNQ=
@@ -467,12 +495,10 @@ github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dz
 github.com/minio/sha256-simd v1.0.1/go.mod h1:Pz6AKMiUdngCLpeTL/RJY1M9rUuPMYujV5xJjtbRSN8=
 github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
 github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
-github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw=
 github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
 github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
 github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
 github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
-github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
 github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
 github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
 github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
@@ -483,21 +509,21 @@ github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5
 github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo=
 github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
 github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
 github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
 github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
 github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
-github.com/mudler/edgevpn v0.27.4 h1:T/irkShcnU6h9OZqtvWXpNo+3gQVPUMBEoUutxJ3YUg=
-github.com/mudler/edgevpn v0.27.4/go.mod h1:NFs/RpDHCaltPFnZmOLCiUmVpTQloER1LbAtptOzqrw=
-github.com/mudler/edgevpn v0.28.0 h1:oF/Msx3zPNajy3uYLPRT5M7H3Z+sCMU0wAA8TkB11PI=
-github.com/mudler/edgevpn v0.28.0/go.mod h1:/xk8vnXUcGajPPMW5rZhPt1aD0b95LeOj2xGbRbDS8A=
-github.com/mudler/edgevpn v0.28.2 h1:wxLrH9b3NNQDgMb0Uy4gmqbGh6Ad5jdbf21GrU32xVU=
-github.com/mudler/edgevpn v0.28.2/go.mod h1:HWcdIwj5zBgOD04Hn3I+J5E5Yb3kK1CwwWaEe6/QERo=
-github.com/mudler/edgevpn v0.28.3 h1:yIuoMExwKHy/mNMBXIsm6FUFbnB9ELIxw9KXrK9KHDk=
-github.com/mudler/edgevpn v0.28.3/go.mod h1:HWcdIwj5zBgOD04Hn3I+J5E5Yb3kK1CwwWaEe6/QERo=
-github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d h1:8udOFrDf/I83JL0/u22j6U6Q9z9LoSdby2a/DWdd0/s=
-github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
+github.com/mudler/edgevpn v0.28.4 h1:9shCLB3TRyCoZtWc1NUXhfhd/R9bURkbNuxi5tJJMvo=
+github.com/mudler/edgevpn v0.28.4/go.mod h1:KJMuWVXboAg7gdOGk7tmiwM1trBpmlGidH/ODQqBvjg=
+github.com/mudler/edgevpn v0.29.0 h1:SEkVyjXL6P8szUZFlL8W1EYBxvFsEIFvXlXcRfGrXYU=
+github.com/mudler/edgevpn v0.29.0/go.mod h1:+kSy9b44eo97PnJ3fOnTkcTgxNXdgJBcd2bopx4leto=
+github.com/mudler/go-piper v0.0.0-20241022074816-3854e0221ffb h1:5qcuxQEpAqeV4ftV5nUt3/hB/RoTXq3MaaauOAedyXo=
+github.com/mudler/go-piper v0.0.0-20241022074816-3854e0221ffb/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
 github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82 h1:FVT07EI8njvsD4tC2Hw8Xhactp5AWhsQWD4oTeQuSAU=
 github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82/go.mod h1:Urp7LG5jylKoDq0663qeBh0pINGcRl35nXdKx82PSoU=
 github.com/mudler/go-stable-diffusion v0.0.0-20240429204715-4a3cd6aeae6f h1:cxtMSRkUfy+mjIQ3yMrU0txwQ4It913NEN4m1H8WWgo=
@@ -513,11 +539,12 @@ github.com/multiformats/go-base32 v0.1.0/go.mod h1:Kj3tFY6zNr+ABYMqeUNeGvkIC/UYg
 github.com/multiformats/go-base36 v0.2.0 h1:lFsAbNOGeKtuKozrtBsAkSVhv1p9D0/qedU9rQyccr0=
 github.com/multiformats/go-base36 v0.2.0/go.mod h1:qvnKE++v+2MWCfePClUEjE78Z7P2a1UV0xHgWc0hkp4=
 github.com/multiformats/go-multiaddr v0.1.1/go.mod h1:aMKBKNEYmzmDmxfX88/vz+J5IU55txyt0p4aiWVohjo=
-github.com/multiformats/go-multiaddr v0.2.0/go.mod h1:0nO36NvPpyV4QzvTLi/lafl2y95ncPj0vFwVF6k6wJ4=
-github.com/multiformats/go-multiaddr v0.13.0 h1:BCBzs61E3AGHcYYTv8dqRH43ZfyrqM8RXVPT8t13tLQ=
-github.com/multiformats/go-multiaddr v0.13.0/go.mod h1:sBXrNzucqkFJhvKOiwwLyqamGa/P5EIXNPLovyhQCII=
-github.com/multiformats/go-multiaddr-dns v0.3.1 h1:QgQgR+LQVt3NPTjbrLLpsaT2ufAA2y0Mkk+QRVJbW3A=
-github.com/multiformats/go-multiaddr-dns v0.3.1/go.mod h1:G/245BRQ6FJGmryJCrOuTdB37AMA5AMOVuO6NY3JwTk=
+github.com/multiformats/go-multiaddr v0.14.0 h1:bfrHrJhrRuh/NXH5mCnemjpbGjzRw/b+tJFOD41g2tU=
+github.com/multiformats/go-multiaddr v0.14.0/go.mod h1:6EkVAxtznq2yC3QT5CM1UTAwG0GTP3EWAIcjHuzQ+r4=
+github.com/multiformats/go-multiaddr-dns v0.4.0 h1:P76EJ3qzBXpUXZ3twdCDx/kvagMsNo0LMFXpyms/zgU=
+github.com/multiformats/go-multiaddr-dns v0.4.0/go.mod h1:7hfthtB4E4pQwirrz+J0CcDUfbWzTqEzVyYKKIKpgkc=
+github.com/multiformats/go-multiaddr-dns v0.4.1 h1:whi/uCLbDS3mSEUMb1MsoT4uzUeZB0N32yzufqS0i5M=
+github.com/multiformats/go-multiaddr-dns v0.4.1/go.mod h1:7hfthtB4E4pQwirrz+J0CcDUfbWzTqEzVyYKKIKpgkc=
 github.com/multiformats/go-multiaddr-fmt v0.1.0 h1:WLEFClPycPkp4fnIzoFoV9FVd49/eQsuaL3/CWe167E=
 github.com/multiformats/go-multiaddr-fmt v0.1.0/go.mod h1:hGtDIW4PU4BqJ50gW2quDuPVjyWNZxToGUh/HwTZYJo=
 github.com/multiformats/go-multibase v0.2.0 h1:isdYCVLvksgWlMW9OZRYJEa9pZETFivncJHmHnnd87g=
@@ -529,13 +556,17 @@ github.com/multiformats/go-multihash v0.2.3 h1:7Lyc8XfX/IY2jWb/gI7JP+o7JEq9hOa7B
 github.com/multiformats/go-multihash v0.2.3/go.mod h1:dXgKXCXjBzdscBLk9JkjINiEsCKRVch90MdaGiKsvSM=
 github.com/multiformats/go-multistream v0.5.0 h1:5htLSLl7lvJk3xx3qT/8Zm9J4K8vEOf/QGkvOGQAyiE=
 github.com/multiformats/go-multistream v0.5.0/go.mod h1:n6tMZiwiP2wUsR8DgfDWw1dydlEqV3l6N3/GBsX6ILA=
-github.com/multiformats/go-varint v0.0.1/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE=
+github.com/multiformats/go-multistream v0.6.0 h1:ZaHKbsL404720283o4c/IHQXiS6gb8qAN5EIJ4PN5EA=
+github.com/multiformats/go-multistream v0.6.0/go.mod h1:MOyoG5otO24cHIg8kf9QW2/NozURlkP/rvi2FQJyCPg=
 github.com/multiformats/go-varint v0.0.7 h1:sWSGR+f/eu5ABZA2ZpYKBILXTTs9JWpdEM/nEGOHFS8=
 github.com/multiformats/go-varint v0.0.7/go.mod h1:r8PUYw/fD/SjBCiKOoDlGF6QawOELpZAu9eioSos/OU=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
 github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86/go.mod h1:kHJEU3ofeGjhHklVoIGuVj85JJwZ6kWPaJwCIxgnFmo=
 github.com/neelance/sourcemap v0.0.0-20151028013722-8c68805598ab/go.mod h1:Qr6/a/Q4r9LP1IltGz7tA7iOK1WonHEYhu1HRBA7ZiM=
+github.com/nikolalohinski/gonja v1.5.3 h1:GsA+EEaZDZPGJ8JtpeGN78jidhOlxeJROpqMT9fTj9c=
+github.com/nikolalohinski/gonja/v2 v2.3.2 h1:UgLFfqi7L9XfX0PEcE4eUpvGojVQL5KhBfJJaBp7ZxY=
+github.com/nikolalohinski/gonja/v2 v2.3.2/go.mod h1:1Wcc/5huTu6y36e0sOFR1XQoFlylw3c3H3L5WOz0RDg=
 github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
 github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
 github.com/nxadm/tail v1.4.11 h1:8feyoE3OzPrcshW5/MJ4sGESc5cqmGkGCWlco4l0bqY=
@@ -544,12 +575,14 @@ github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N
 github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
 github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
 github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
-github.com/onsi/ginkgo/v2 v2.20.0 h1:PE84V2mHqoT1sglvHc8ZdQtPcwmvvt29WLEEO3xmdZw=
-github.com/onsi/ginkgo/v2 v2.20.0/go.mod h1:lG9ey2Z29hR41WMVthyJBGUBcBhGOtoPF2VFMvBXFCI=
-github.com/onsi/ginkgo/v2 v2.20.1 h1:YlVIbqct+ZmnEph770q9Q7NVAz4wwIiVNahee6JyUzo=
-github.com/onsi/ginkgo/v2 v2.20.1/go.mod h1:lG9ey2Z29hR41WMVthyJBGUBcBhGOtoPF2VFMvBXFCI=
-github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k=
-github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY=
+github.com/onsi/ginkgo/v2 v2.21.0 h1:7rg/4f3rB88pb5obDgNZrNHrQ4e6WpjonchcpuBRnZM=
+github.com/onsi/ginkgo/v2 v2.21.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo=
+github.com/onsi/ginkgo/v2 v2.22.2 h1:/3X8Panh8/WwhU/3Ssa6rCKqPLuAkVY2I0RoyDLySlU=
+github.com/onsi/ginkgo/v2 v2.22.2/go.mod h1:oeMosUL+8LtarXBHu/c0bx2D/K9zyQ6uX3cTyztHwsk=
+github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4=
+github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog=
+github.com/onsi/gomega v1.36.2 h1:koNYke6TVk6ZmnyHrCXba/T/MoLBXFjeC1PtvYgw0A8=
+github.com/onsi/gomega v1.36.2/go.mod h1:DdwyADRjrc825LhMEkD76cHR5+pUnjhUN8GlHlRPHzY=
 github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
 github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
@@ -562,6 +595,8 @@ github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/
 github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
 github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
 github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8=
+github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e h1:s2RNOM/IGdY0Y6qfTeUKhDawdHDpK9RGBdx80qN4Ttw=
+github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e/go.mod h1:nBdnFKj15wFbf94Rwfq4m30eAcyY9V/IyKAGQFtqkW0=
 github.com/ory/dockertest/v3 v3.10.0 h1:4K3z2VMe8Woe++invjaTB7VRyQXQy5UY+loujO4aNE4=
 github.com/ory/dockertest/v3 v3.10.0/go.mod h1:nr57ZbRWMqfsdGdFNLHz5jjNdDb7VVFnzAeW1n5N1Lg=
 github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg=
@@ -580,13 +615,19 @@ github.com/pierrec/lz4/v4 v4.1.2 h1:qvY3YFXRQE/XB8MlLzJH7mSzBs74eA2gg52YTk6jUPM=
 github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pion/datachannel v1.5.8 h1:ph1P1NsGkazkjrvyMfhRBUAWMxugJjq2HfQifaOoSNo=
 github.com/pion/datachannel v1.5.8/go.mod h1:PgmdpoaNBLX9HNzNClmdki4DYW5JtI7Yibu8QzbL3tI=
+github.com/pion/datachannel v1.5.10 h1:ly0Q26K1i6ZkGf42W7D4hQYR90pZwzFOjTq5AuCKk4o=
+github.com/pion/datachannel v1.5.10/go.mod h1:p/jJfC9arb29W7WrxyKbepTU20CFgyx5oLo8Rs4Py/M=
 github.com/pion/dtls/v2 v2.2.7/go.mod h1:8WiMkebSHFD0T+dIU+UeBaoV7kDhOW5oDCzZ7WZ/F9s=
 github.com/pion/dtls/v2 v2.2.12 h1:KP7H5/c1EiVAAKUmXyCzPiQe5+bCJrpOeKg/L05dunk=
 github.com/pion/dtls/v2 v2.2.12/go.mod h1:d9SYc9fch0CqK90mRk1dC7AkzzpwJj6u2GU3u+9pqFE=
 github.com/pion/ice/v2 v2.3.34 h1:Ic1ppYCj4tUOcPAp76U6F3fVrlSw8A9JtRXLqw6BbUM=
 github.com/pion/ice/v2 v2.3.34/go.mod h1:mBF7lnigdqgtB+YHkaY/Y6s6tsyRyo4u4rPGRuOjUBQ=
+github.com/pion/ice/v2 v2.3.37 h1:ObIdaNDu1rCo7hObhs34YSBcO7fjslJMZV0ux+uZWh0=
+github.com/pion/ice/v2 v2.3.37/go.mod h1:mBF7lnigdqgtB+YHkaY/Y6s6tsyRyo4u4rPGRuOjUBQ=
 github.com/pion/interceptor v0.1.30 h1:au5rlVHsgmxNi+v/mjOPazbW1SHzfx7/hYOEYQnUcxA=
 github.com/pion/interceptor v0.1.30/go.mod h1:RQuKT5HTdkP2Fi0cuOS5G5WNymTjzXaGF75J4k7z2nc=
+github.com/pion/interceptor v0.1.37 h1:aRA8Zpab/wE7/c0O3fh1PqY0AJI3fCSEM5lRWJVorwI=
+github.com/pion/interceptor v0.1.37/go.mod h1:JzxbJ4umVTlZAf+/utHzNesY8tmRkM2lVmkS82TTj8Y=
 github.com/pion/logging v0.2.2 h1:M9+AIj/+pxNsDfAT64+MAVgJO0rsyLnoJKCqf//DoeY=
 github.com/pion/logging v0.2.2/go.mod h1:k0/tDVsRCX2Mb2ZEmTqNa7CWsQPc+YYCB7Q+5pahoms=
 github.com/pion/mdns v0.0.12 h1:CiMYlY+O0azojWDmxdNr7ADGrnZ+V6Ilfner+6mSVK8=
@@ -596,11 +637,17 @@ github.com/pion/randutil v0.1.0/go.mod h1:XcJrSMMbbMRhASFVOlj/5hQial/Y8oH/HVo7TB
 github.com/pion/rtcp v1.2.12/go.mod h1:sn6qjxvnwyAkkPzPULIbVqSKI5Dv54Rv7VG0kNxh9L4=
 github.com/pion/rtcp v1.2.14 h1:KCkGV3vJ+4DAJmvP0vaQShsb0xkRfWkO540Gy102KyE=
 github.com/pion/rtcp v1.2.14/go.mod h1:sn6qjxvnwyAkkPzPULIbVqSKI5Dv54Rv7VG0kNxh9L4=
+github.com/pion/rtcp v1.2.15 h1:LZQi2JbdipLOj4eBjK4wlVoQWfrZbh3Q6eHtWtJBZBo=
+github.com/pion/rtcp v1.2.15/go.mod h1:jlGuAjHMEXwMUHK78RgX0UmEJFV4zUKOFHR7OP+D3D0=
 github.com/pion/rtp v1.8.3/go.mod h1:pBGHaFt/yW7bf1jjWAoUjpSNoDnw98KTMg+jWWvziqU=
 github.com/pion/rtp v1.8.9 h1:E2HX740TZKaqdcPmf4pw6ZZuG8u5RlMMt+l3dxeu6Wk=
 github.com/pion/rtp v1.8.9/go.mod h1:pBGHaFt/yW7bf1jjWAoUjpSNoDnw98KTMg+jWWvziqU=
+github.com/pion/rtp v1.8.10 h1:puphjdbjPB+L+NFaVuZ5h6bt1g5q4kFIoI+r5q/g0CU=
+github.com/pion/rtp v1.8.10/go.mod h1:8uMBJj32Pa1wwx8Fuv/AsFhn8jsgw+3rUC2PfoBZ8p4=
 github.com/pion/sctp v1.8.33 h1:dSE4wX6uTJBcNm8+YlMg7lw1wqyKHggsP5uKbdj+NZw=
 github.com/pion/sctp v1.8.33/go.mod h1:beTnqSzewI53KWoG3nqB282oDMGrhNxBdb+JZnkCwRM=
+github.com/pion/sctp v1.8.35 h1:qwtKvNK1Wc5tHMIYgTDJhfZk7vATGVHhXbUDfHbYwzA=
+github.com/pion/sctp v1.8.35/go.mod h1:EcXP8zCYVTRy3W9xtOF7wJm1L1aXfKRQzaM33SjQlzg=
 github.com/pion/sdp/v3 v3.0.9 h1:pX++dCHoHUwq43kuwf3PyJfHlwIj4hXA7Vrifiq0IJY=
 github.com/pion/sdp/v3 v3.0.9/go.mod h1:B5xmvENq5IXJimIO4zfp6LAe1fD9N+kFv+V/1lOdz8M=
 github.com/pion/srtp/v2 v2.0.20 h1:HNNny4s+OUmG280ETrCdgFndp4ufx3/uy85EawYEhTk=
@@ -620,6 +667,8 @@ github.com/pion/turn/v2 v2.1.6 h1:Xr2niVsiPTB0FPtt+yAWKFUkU1eotQbGgpTIld4x1Gc=
 github.com/pion/turn/v2 v2.1.6/go.mod h1:huEpByKKHix2/b9kmTAM3YoX6MKP+/D//0ClgUYR2fY=
 github.com/pion/webrtc/v3 v3.3.0 h1:Rf4u6n6U5t5sUxhYPQk/samzU/oDv7jk6BA5hyO2F9I=
 github.com/pion/webrtc/v3 v3.3.0/go.mod h1:hVmrDJvwhEertRWObeb1xzulzHGeVUoPlWvxdGzcfU0=
+github.com/pion/webrtc/v3 v3.3.5 h1:ZsSzaMz/i9nblPdiAkZoP+E6Kmjw+jnyq3bEmU3EtRg=
+github.com/pion/webrtc/v3 v3.3.5/go.mod h1:liNa+E1iwyzyXqNUwvoMRNQ10x8h8FOeJKL8RkIbamE=
 github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@@ -634,6 +683,8 @@ github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:Om
 github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
 github.com/prometheus/client_golang v1.20.0 h1:jBzTZ7B099Rg24tny+qngoynol8LtVYlA2bqx3vEloI=
 github.com/prometheus/client_golang v1.20.0/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
+github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y=
+github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
 github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
@@ -641,15 +692,23 @@ github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQy
 github.com/prometheus/common v0.0.0-20180801064454-c7de2306084e/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
 github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
 github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
+github.com/prometheus/common v0.61.0 h1:3gv/GThfX0cV2lpO7gkTUwZru38mxevy90Bj8YFSRQQ=
+github.com/prometheus/common v0.61.0/go.mod h1:zr29OCN/2BsJRaFwG8QOBr41D6kkchKbpeNH7pAjb/s=
 github.com/prometheus/procfs v0.0.0-20180725123919-05ee40e3a273/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
 github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
 github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
 github.com/quic-go/qpack v0.4.0 h1:Cr9BXA1sQS2SmDUWjSofMPNKmvF6IiIfDRmgU0w1ZCo=
 github.com/quic-go/qpack v0.4.0/go.mod h1:UZVnYIfi5GRk+zI9UMaCPsmZ2xKJP7XBUvVyT1Knj9A=
+github.com/quic-go/qpack v0.5.1 h1:giqksBPnT/HDtZ6VhtFKgoLOWmlyo9Ei6u9PqzIMbhI=
+github.com/quic-go/qpack v0.5.1/go.mod h1:+PC4XFrEskIVkcLzpEkbLqq1uCoxPhQuvK5rH1ZgaEg=
 github.com/quic-go/quic-go v0.46.0 h1:uuwLClEEyk1DNvchH8uCByQVjo3yKL9opKulExNDs7Y=
 github.com/quic-go/quic-go v0.46.0/go.mod h1:1dLehS7TIR64+vxGR70GDcatWTOtMX2PUtnKsjbTurI=
+github.com/quic-go/quic-go v0.48.2 h1:wsKXZPeGWpMpCGSWqOcqpW2wZYic/8T3aqiOID0/KWE=
+github.com/quic-go/quic-go v0.48.2/go.mod h1:yBgs3rWBOADpga7F+jJsb6Ybg1LSYiQvwWlLX+/6HMs=
 github.com/quic-go/webtransport-go v0.8.0 h1:HxSrwun11U+LlmwpgM1kEqIqH90IT4N8auv/cD7QFJg=
 github.com/quic-go/webtransport-go v0.8.0/go.mod h1:N99tjprW432Ut5ONql/aUhSLT0YVSlwHohQsuac9WaM=
+github.com/quic-go/webtransport-go v0.8.1-0.20241018022711-4ac2c9250e66 h1:4WFk6u3sOT6pLa1kQ50ZVdm8BQFgJNA117cepZxtLIg=
+github.com/quic-go/webtransport-go v0.8.1-0.20241018022711-4ac2c9250e66/go.mod h1:Vp72IJajgeOL6ddqrAhmp7IM9zbTcgkQxD/YdxrVwMw=
 github.com/raulk/go-watchdog v1.3.0 h1:oUmdlHxdkXRJlwfG0O9omj8ukerm8MEQavSiDTEtBsk=
 github.com/raulk/go-watchdog v1.3.0/go.mod h1:fIvOnLbF0b0ZwkB9YU4mOW9Did//4vPZtDqv66NfsMU=
 github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
@@ -670,6 +729,8 @@ github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/sashabaranov/go-openai v1.26.2 h1:cVlQa3gn3eYqNXRW03pPlpy6zLG52EU4g0FrWXc0EFI=
 github.com/sashabaranov/go-openai v1.26.2/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
+github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee h1:8Iv5m6xEo1NR1AvpV+7XmhI4r39LGNzwUL4YpMuL5vk=
+github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee/go.mod h1:qwtSXrKuJh/zsFQ12yEE89xfCrGKK63Rr7ctU/uCo4g=
 github.com/schollz/progressbar/v3 v3.14.4 h1:W9ZrDSJk7eqmQhd3uxFNNcTr0QL+xuGNI9dEMrw0r74=
 github.com/schollz/progressbar/v3 v3.14.4/go.mod h1:aT3UQ7yGm+2ZjeXPqsjTenwL3ddUiuZ0kfQ/2tHlyNI=
 github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
@@ -681,9 +742,8 @@ github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFt
 github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ=
 github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU=
 github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k=
-github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
-github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8=
-github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
+github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
+github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
 github.com/shurcooL/component v0.0.0-20170202220835-f88ec8f54cc4/go.mod h1:XhFIlyj5a1fBNx5aJTbKoIq0mNaPvOagO+HjB3EtxrY=
 github.com/shurcooL/events v0.0.0-20181021180414-410e4ca65f48/go.mod h1:5u70Mqkb5O5cxEA8nxTsgrgLehJeAw6Oc4Ab1c/P1HM=
 github.com/shurcooL/github_flavored_markdown v0.0.0-20181002035957-2122de532470/go.mod h1:2dOwnU2uBioM+SGy2aZoq1f/Sd1l9OkAeAUvjSyvgU0=
@@ -724,9 +784,10 @@ github.com/sourcegraph/annotate v0.0.0-20160123013949-f4cad6c6324d/go.mod h1:Udh
 github.com/sourcegraph/syntaxhighlight v0.0.0-20170531221838-bd320f5d308e/go.mod h1:HuIsMU8RRBOtsCgI77wP899iHVBQpCmg4ErYMZB+2IA=
 github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
 github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
-github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
-github.com/spf13/cast v1.5.0 h1:rj3WzYc11XZaIZMPKmwP96zkFEnnAmV8s6XbB2aY32w=
-github.com/spf13/cast v1.5.0/go.mod h1:SpXXQ5YoyJw6s3/6cMTQuxvgRl3PCJiyaX9p6b155UU=
+github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w=
+github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
+github.com/streamer45/silero-vad-go v0.2.1 h1:Li1/tTC4H/3cyw6q4weX+U8GWwEL3lTekK/nYa1Cvuk=
+github.com/streamer45/silero-vad-go v0.2.1/go.mod h1:B+2FXs/5fZ6pzl6unUZYhZqkYdOB+3saBVzjOzdZnUs=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
@@ -734,7 +795,6 @@ github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/
 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
-github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
@@ -743,6 +803,8 @@ github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXl
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
 github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
 github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 github.com/swaggo/files/v2 v2.0.0 h1:hmAt8Dkynw7Ssz46F6pn8ok6YmGZqHSVLZ+HQM7i0kw=
 github.com/swaggo/files/v2 v2.0.0/go.mod h1:24kk2Y9NYEJ5lHuCra6iVwkMjIekMCaFq/0JQj66kyM=
 github.com/swaggo/swag v1.16.3 h1:PnCYjPCah8FK4I26l2F/KQ4yz3sILcVUN3cTlBFA9Pg=
@@ -765,8 +827,8 @@ github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtX
 github.com/urfave/cli v1.22.10/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
 github.com/urfave/cli v1.22.12 h1:igJgVw1JdKH+trcLWLeLwZjU9fEfPesQ+9/e4MQ44S8=
 github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8=
-github.com/urfave/cli/v2 v2.27.4 h1:o1owoI+02Eb+K107p27wEX9Bb8eqIoZCfLXloLUSWJ8=
-github.com/urfave/cli/v2 v2.27.4/go.mod h1:m4QzxcD2qpra4z7WhzEGn74WZLViBnMpb1ToCAKdGRQ=
+github.com/urfave/cli/v2 v2.27.5 h1:WoHEJLdsXr6dDWoJgMq/CboDmyY/8HMMH1fTECbih+w=
+github.com/urfave/cli/v2 v2.27.5/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ=
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
 github.com/valyala/fasthttp v1.55.0 h1:Zkefzgt6a7+bVKHnu/YaYSOPfNYNisSVBo/unVCf8k8=
@@ -779,13 +841,8 @@ github.com/vbatts/tar-split v0.11.3 h1:hLFqsOLQ1SsppQNTMpkpPXClLDfC2A3Zgy9OUU+RV
 github.com/vbatts/tar-split v0.11.3/go.mod h1:9QlHN18E+fEH7RdG+QAJJcuya3rqT7eXSTY7wGrAokY=
 github.com/viant/assertly v0.4.8/go.mod h1:aGifi++jvCrUaklKEKT0BU95igDNaqkvz+49uaYMPRU=
 github.com/viant/toolbox v0.24.0/go.mod h1:OxMCG57V0PXuIP2HNQrtJf2CjqdmbrOx5EkMILuUhzM=
-github.com/vishvananda/netlink v1.2.1-beta.2 h1:Llsql0lnQEbHj0I1OuKyp8otXp0r3q0mPkuhwHfStVs=
-github.com/vishvananda/netlink v1.2.1-beta.2/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
 github.com/vishvananda/netlink v1.3.0 h1:X7l42GfcV4S6E4vHTsw48qbrV+9PVojNfIhZcwQdrZk=
 github.com/vishvananda/netlink v1.3.0/go.mod h1:i6NetklAujEcC6fK0JPjT8qSwWyO0HLn4UKG+hGqeJs=
-github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
-github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74 h1:gga7acRE695APm9hlsSMoOoE65U4/TcqNj90mc69Rlg=
-github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
 github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8=
 github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
 github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0 h1:GDDkbFiaK8jsSDJfjId/PEGEShv6ugrt4kYsC5UIDaQ=
@@ -795,6 +852,8 @@ github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1/go.mod h
 github.com/wlynxg/anet v0.0.3/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA=
 github.com/wlynxg/anet v0.0.4 h1:0de1OFQxnNqAu+x2FAKKCVIrnfGKQbs7FQz++tB0+Uw=
 github.com/wlynxg/anet v0.0.4/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA=
+github.com/wlynxg/anet v0.0.5 h1:J3VJGi1gvo0JwZ/P1/Yc/8p63SoW98B5dHkYDmpgvvU=
+github.com/wlynxg/anet v0.0.5/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA=
 github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
 github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
 github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
@@ -822,29 +881,43 @@ go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
 go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.52.0 h1:9l89oX4ba9kHbBol3Xin3leYJ+252h0zszDtBwyKe2A=
 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.52.0/go.mod h1:XLZfZboOJWHNKUv7eH0inh0E9VV6eWDFB/9yJyTLPp0=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 h1:UP6IpuHFkUgOQL9FFQFrZ+5LiwhhYRbi7VZSIx6Nj5s=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0/go.mod h1:qxuZLtbq5QDtdeSHsS7bcf6EH6uO6jUAgk764zd3rhM=
 go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo=
 go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4=
+go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY=
+go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE=
 go.opentelemetry.io/otel/exporters/prometheus v0.50.0 h1:2Ewsda6hejmbhGFyUvWZjUThC98Cf8Zy6g0zkIimOng=
 go.opentelemetry.io/otel/exporters/prometheus v0.50.0/go.mod h1:pMm5PkUo5YwbLiuEf7t2xg4wbP0/eSJrMxIMxKosynY=
 go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q=
 go.opentelemetry.io/otel/metric v1.28.0/go.mod h1:Fb1eVBFZmLVTMb6PPohq3TO9IIhUisDsbJoL/+uQW4s=
+go.opentelemetry.io/otel/metric v1.31.0 h1:FSErL0ATQAmYHUIzSezZibnyVlft1ybhy4ozRPcF2fE=
+go.opentelemetry.io/otel/metric v1.31.0/go.mod h1:C3dEloVbLuYoX41KpmAhOqNriGbA+qqH6PQ5E5mUfnY=
 go.opentelemetry.io/otel/sdk v1.28.0 h1:b9d7hIry8yZsgtbmM0DKyPWMMUMlK9NEKuIG4aBqWyE=
 go.opentelemetry.io/otel/sdk v1.28.0/go.mod h1:oYj7ClPUA7Iw3m+r7GeEjz0qckQRJK2B8zjcZEfu7Pg=
+go.opentelemetry.io/otel/sdk v1.31.0 h1:xLY3abVHYZ5HSfOg3l2E5LUj2Cwva5Y7yGxnSW9H5Gk=
+go.opentelemetry.io/otel/sdk v1.31.0/go.mod h1:TfRbMdhvxIIr/B2N2LQW2S5v9m3gOQ/08KsbbO5BPT0=
 go.opentelemetry.io/otel/sdk/metric v1.28.0 h1:OkuaKgKrgAbYrrY0t92c+cC+2F6hsFNnCQArXCKlg08=
 go.opentelemetry.io/otel/sdk/metric v1.28.0/go.mod h1:cWPjykihLAPvXKi4iZc1dpER3Jdq2Z0YLse3moQUCpg=
 go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g=
 go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI=
+go.opentelemetry.io/otel/trace v1.31.0 h1:ffjsj1aRouKewfr85U2aGagJ46+MvodynlQ1HYdmJys=
+go.opentelemetry.io/otel/trace v1.31.0/go.mod h1:TXZkRk7SM2ZQLtR6eoAWQFIHPvzQ06FJAsO1tJg480A=
 go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
 go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
 go.uber.org/dig v1.18.0 h1:imUL1UiY0Mg4bqbFfsRQO5G4CGRBec/ZujWTvSVp3pw=
 go.uber.org/dig v1.18.0/go.mod h1:Us0rSJiThwCv2GteUN0Q7OKvU7n5J4dxZ9JKUXozFdE=
 go.uber.org/fx v1.22.2 h1:iPW+OPxv0G8w75OemJ1RAnTUrF55zOJlXlo1TbJ0Buw=
 go.uber.org/fx v1.22.2/go.mod h1:o/D9n+2mLP6v1EG+qsdT1O8wKopYAsqZasju97SDFCU=
+go.uber.org/fx v1.23.0 h1:lIr/gYWQGfTwGcSXWXu4vP5Ws6iqnNEIY+F/aFzCKTg=
+go.uber.org/fx v1.23.0/go.mod h1:o/D9n+2mLP6v1EG+qsdT1O8wKopYAsqZasju97SDFCU=
 go.uber.org/goleak v1.1.11-0.20210813005559-691160354723/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ=
 go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
 go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
 go.uber.org/mock v0.4.0 h1:VcM4ZOtdbR4f6VXfiOpwpVJDL6lCReaZ6mw31wqh7KU=
 go.uber.org/mock v0.4.0/go.mod h1:a6FSlNadKUHUa9IP5Vyt1zh4fC7uAwxMutEAscFbkZc=
+go.uber.org/mock v0.5.0 h1:KAMbZvZPyBPWgD14IrIQ38QCyjwpvVVV6K/bHl1IwQU=
+go.uber.org/mock v0.5.0/go.mod h1:ge71pBPLYDk7QIi1LupWxdAykm7KIEFchiOqd6z7qMM=
 go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU=
 go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
 go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
@@ -866,15 +939,18 @@ golang.org/x/crypto v0.0.0-20200602180216-279210d13fed/go.mod h1:LzIPMQfyMNhhGPh
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
 golang.org/x/crypto v0.8.0/go.mod h1:mRqEX+O9/h5TFCrQhkgjo2yKi0yYA+9ecGkdQoHrywE=
 golang.org/x/crypto v0.12.0/go.mod h1:NF0Gs7EO5K4qLn+Ylc+fih8BSTeIjAP05siRnAh98yw=
 golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
-golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
-golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
+golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw=
+golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U=
+golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
+golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa h1:ELnwvuAXPNtPk1TJRuGkI9fDTwym6AYBu0qzT8AcHdI=
 golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ=
+golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67 h1:1UoZQm6f0P/ZO0w1Ri+f+ifG/gXhegadRdwBIXEFWDo=
+golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67/go.mod h1:qj5a5QZpwLU2NLQudwIN5koi3beDhSAlJwa67PuM98c=
 golang.org/x/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
@@ -889,8 +965,10 @@ golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
 golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
-golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
-golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0=
+golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
+golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4=
+golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -910,21 +988,24 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v
 golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
 golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
-golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
 golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
 golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
 golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
 golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
 golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI=
 golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY=
-golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
-golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
+golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4=
+golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU=
+golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
+golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs=
 golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
+golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE=
+golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
 golang.org/x/perf v0.0.0-20180704124530-6e6d33e29852/go.mod h1:JLpeXjPJfIyPr5TlbXLkXWLhP8nz10XfvxElABhCtcw=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -938,6 +1019,8 @@ golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
 golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
+golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20180810173357-98c5dad5d1a0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -948,9 +1031,7 @@ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200217220822-9197077df867/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200602225109-6fdc65e7d980/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -980,11 +1061,12 @@ golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg=
-golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s=
+golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
+golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
-golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
 golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
 golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
@@ -992,21 +1074,24 @@ golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
 golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU=
 golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY=
 golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
-golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU=
-golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk=
+golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24=
+golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M=
+golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q=
+golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
-golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
-golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc=
-golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
+golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM=
+golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
+golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
+golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
 golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
@@ -1031,8 +1116,10 @@ golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
 golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
 golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
-golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
-golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
+golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ=
+golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0=
+golang.org/x/tools v0.28.0 h1:WuB6qZ4RPCQo5aP3WdKZS7i595EdWqWR8vqJTlwTVK8=
+golang.org/x/tools v0.28.0/go.mod h1:dcIOrVd3mfQKTgrDVQHqCPMWy6lnhfhtX3hLXYVLfRw=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -1065,8 +1152,12 @@ google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda h1:wu/KJm9KJwpfHWh
 google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda/go.mod h1:g2LLCvCeCSir/JJSWosk19BR4NVxGqHUC6rxIRsd7Aw=
 google.golang.org/genproto/googleapis/api v0.0.0-20240617180043-68d350f18fd4 h1:MuYw1wJzT+ZkybKfaOXKp5hJiZDn2iHaXRw0mRYdHSc=
 google.golang.org/genproto/googleapis/api v0.0.0-20240617180043-68d350f18fd4/go.mod h1:px9SlOOZBg1wM1zdnr8jEL4CNGUBZ+ZKYtNPApNQc4c=
+google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9 h1:T6rh4haD3GVYsgEfWExoCZA2o2FmbNyKpTuAxbEFPTg=
+google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9/go.mod h1:wp2WsuBYj6j8wUdo3ToZsdxxixbvQNAHqVJrTgi5E5M=
 google.golang.org/genproto/googleapis/rpc v0.0.0-20240617180043-68d350f18fd4 h1:Di6ANFilr+S60a4S61ZM00vLdw0IrQOSMS2/6mrnOU0=
 google.golang.org/genproto/googleapis/rpc v0.0.0-20240617180043-68d350f18fd4/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20241007155032-5fefd90f89a9 h1:QCqS/PdaHTSWGvupk2F/ehwHtGc0/GYkT+3GAcR1CCc=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20241007155032-5fefd90f89a9/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI=
 google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw=
 google.golang.org/grpc v1.16.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio=
 google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs=
@@ -1077,6 +1168,8 @@ google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8
 google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc=
 google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc=
 google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ=
+google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E=
+google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA=
 google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
 google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
 google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
@@ -1086,8 +1179,10 @@ google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2
 google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
 google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
 google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
-google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
-google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
+google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA=
+google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
+google.golang.org/protobuf v1.36.1 h1:yBPeRvTftaleIgM3PZ/WBIZ7XM/eEYAaEyCwvyjq/gk=
+google.golang.org/protobuf v1.36.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
@@ -1103,7 +1198,6 @@ gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa
 gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
 gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/pkg/concurrency/jobresult_test.go b/pkg/concurrency/jobresult_test.go
index a7bd1ff6..976f4c4a 100644
--- a/pkg/concurrency/jobresult_test.go
+++ b/pkg/concurrency/jobresult_test.go
@@ -11,7 +11,7 @@ import (
 )
 
 var _ = Describe("pkg/concurrency unit tests", func() {
-	It("can be used to recieve a result across goroutines", func() {
+	It("can be used to receive a result across goroutines", func() {
 		jr, wjr := NewJobResult[string, string]("foo")
 		Expect(jr).ToNot(BeNil())
 		Expect(wjr).ToNot(BeNil())
@@ -30,7 +30,7 @@ var _ = Describe("pkg/concurrency unit tests", func() {
 
 	})
 
-	It("can be used to recieve an error across goroutines", func() {
+	It("can be used to receive an error across goroutines", func() {
 		jr, wjr := NewJobResult[string, string]("foo")
 		Expect(jr).ToNot(BeNil())
 		Expect(wjr).ToNot(BeNil())
diff --git a/pkg/downloader/uri.go b/pkg/downloader/uri.go
index 7fedd646..54b8eb10 100644
--- a/pkg/downloader/uri.go
+++ b/pkg/downloader/uri.go
@@ -2,7 +2,9 @@ package downloader
 
 import (
 	"crypto/sha256"
+	"errors"
 	"fmt"
+	"hash"
 	"io"
 	"net/http"
 	"net/url"
@@ -19,19 +21,25 @@ import (
 )
 
 const (
-	HuggingFacePrefix = "huggingface://"
-	OCIPrefix         = "oci://"
-	OllamaPrefix      = "ollama://"
-	HTTPPrefix        = "http://"
-	HTTPSPrefix       = "https://"
-	GithubURI         = "github:"
-	GithubURI2        = "github://"
-	LocalPrefix       = "file://"
+	HuggingFacePrefix  = "huggingface://"
+	HuggingFacePrefix1 = "hf://"
+	HuggingFacePrefix2 = "hf.co/"
+	OCIPrefix          = "oci://"
+	OllamaPrefix       = "ollama://"
+	HTTPPrefix         = "http://"
+	HTTPSPrefix        = "https://"
+	GithubURI          = "github:"
+	GithubURI2         = "github://"
+	LocalPrefix        = "file://"
 )
 
 type URI string
 
-func (uri URI) DownloadAndUnmarshal(basePath string, f func(url string, i []byte) error) error {
+func (uri URI) DownloadWithCallback(basePath string, f func(url string, i []byte) error) error {
+	return uri.DownloadWithAuthorizationAndCallback(basePath, "", f)
+}
+
+func (uri URI) DownloadWithAuthorizationAndCallback(basePath string, authorization string, f func(url string, i []byte) error) error {
 	url := uri.ResolveURL()
 
 	if strings.HasPrefix(url, LocalPrefix) {
@@ -41,7 +49,6 @@ func (uri URI) DownloadAndUnmarshal(basePath string, f func(url string, i []byte
 		if err != nil {
 			return err
 		}
-		// ???
 		resolvedBasePath, err := filepath.EvalSymlinks(basePath)
 		if err != nil {
 			return err
@@ -63,7 +70,16 @@ func (uri URI) DownloadAndUnmarshal(basePath string, f func(url string, i []byte
 	}
 
 	// Send a GET request to the URL
-	response, err := http.Get(url)
+
+	req, err := http.NewRequest("GET", url, nil)
+	if err != nil {
+		return err
+	}
+	if authorization != "" {
+		req.Header.Add("Authorization", authorization)
+	}
+
+	response, err := http.DefaultClient.Do(req)
 	if err != nil {
 		return err
 	}
@@ -113,6 +129,8 @@ func (u URI) LooksLikeURL() bool {
 	return strings.HasPrefix(string(u), HTTPPrefix) ||
 		strings.HasPrefix(string(u), HTTPSPrefix) ||
 		strings.HasPrefix(string(u), HuggingFacePrefix) ||
+		strings.HasPrefix(string(u), HuggingFacePrefix1) ||
+		strings.HasPrefix(string(u), HuggingFacePrefix2) ||
 		strings.HasPrefix(string(u), GithubURI) ||
 		strings.HasPrefix(string(u), OllamaPrefix) ||
 		strings.HasPrefix(string(u), OCIPrefix) ||
@@ -156,8 +174,10 @@ func (s URI) ResolveURL() string {
 		projectPath := strings.Join(repoPath[2:], "/")
 
 		return fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/%s/%s", org, project, branch, projectPath)
-	case strings.HasPrefix(string(s), HuggingFacePrefix):
+	case strings.HasPrefix(string(s), HuggingFacePrefix) || strings.HasPrefix(string(s), HuggingFacePrefix1) || strings.HasPrefix(string(s), HuggingFacePrefix2):
 		repository := strings.Replace(string(s), HuggingFacePrefix, "", 1)
+		repository = strings.Replace(repository, HuggingFacePrefix1, "", 1)
+		repository = strings.Replace(repository, HuggingFacePrefix2, "", 1)
 		// convert repository to a full URL.
 		// e.g. TheBloke/Mixtral-8x7B-v0.1-GGUF/mixtral-8x7b-v0.1.Q2_K.gguf@main -> https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q2_K.gguf
 		owner := strings.Split(repository, "/")[0]
@@ -192,6 +212,25 @@ func removePartialFile(tmpFilePath string) error {
 	return nil
 }
 
+func calculateHashForPartialFile(file *os.File) (hash.Hash, error) {
+	hash := sha256.New()
+	_, err := io.Copy(hash, file)
+	if err != nil {
+		return nil, err
+	}
+	return hash, nil
+}
+
+func (uri URI) checkSeverSupportsRangeHeader() (bool, error) {
+	url := uri.ResolveURL()
+	resp, err := http.Head(url)
+	if err != nil {
+		return false, err
+	}
+	defer resp.Body.Close()
+	return resp.Header.Get("Accept-Ranges") == "bytes", nil
+}
+
 func (uri URI) DownloadFile(filePath, sha string, fileN, total int, downloadStatus func(string, string, string, float64)) error {
 	url := uri.ResolveURL()
 	if uri.LooksLikeOCI() {
@@ -254,8 +293,34 @@ func (uri URI) DownloadFile(filePath, sha string, fileN, total int, downloadStat
 
 	log.Info().Msgf("Downloading %q", url)
 
-	// Download file
-	resp, err := http.Get(url)
+	req, err := http.NewRequest("GET", url, nil)
+	if err != nil {
+		return fmt.Errorf("failed to create request for %q: %v", filePath, err)
+	}
+
+	// save partial download to dedicated file
+	tmpFilePath := filePath + ".partial"
+	tmpFileInfo, err := os.Stat(tmpFilePath)
+	if err == nil {
+		support, err := uri.checkSeverSupportsRangeHeader()
+		if err != nil {
+			return fmt.Errorf("failed to check if uri server supports range header: %v", err)
+		}
+		if support {
+			startPos := tmpFileInfo.Size()
+			req.Header.Set("Range", fmt.Sprintf("bytes=%d-", startPos))
+		} else {
+			err := removePartialFile(tmpFilePath)
+			if err != nil {
+				return err
+			}
+		}
+	} else if !errors.Is(err, os.ErrNotExist) {
+		return fmt.Errorf("failed to check file %q existence: %v", filePath, err)
+	}
+
+	// Start the request
+	resp, err := http.DefaultClient.Do(req)
 	if err != nil {
 		return fmt.Errorf("failed to download file %q: %v", filePath, err)
 	}
@@ -271,26 +336,20 @@ func (uri URI) DownloadFile(filePath, sha string, fileN, total int, downloadStat
 		return fmt.Errorf("failed to create parent directory for file %q: %v", filePath, err)
 	}
 
-	// save partial download to dedicated file
-	tmpFilePath := filePath + ".partial"
-
-	// remove tmp file
-	err = removePartialFile(tmpFilePath)
+	// Create and write file
+	outFile, err := os.OpenFile(tmpFilePath, os.O_APPEND|os.O_RDWR|os.O_CREATE, 0644)
 	if err != nil {
-		return err
-	}
-
-	// Create and write file content
-	outFile, err := os.Create(tmpFilePath)
-	if err != nil {
-		return fmt.Errorf("failed to create file %q: %v", tmpFilePath, err)
+		return fmt.Errorf("failed to create / open file %q: %v", tmpFilePath, err)
 	}
 	defer outFile.Close()
-
+	hash, err := calculateHashForPartialFile(outFile)
+	if err != nil {
+		return fmt.Errorf("failed to calculate hash for partial file")
+	}
 	progress := &progressWriter{
 		fileName:       tmpFilePath,
 		total:          resp.ContentLength,
-		hash:           sha256.New(),
+		hash:           hash,
 		fileNo:         fileN,
 		totalFiles:     total,
 		downloadStatus: downloadStatus,
diff --git a/pkg/downloader/uri_test.go b/pkg/downloader/uri_test.go
index 21a093a9..6976c9b4 100644
--- a/pkg/downloader/uri_test.go
+++ b/pkg/downloader/uri_test.go
@@ -1,6 +1,15 @@
 package downloader_test
 
 import (
+	"crypto/rand"
+	"crypto/sha256"
+	"fmt"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"regexp"
+	"strconv"
+
 	. "github.com/mudler/LocalAI/pkg/downloader"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
@@ -11,7 +20,7 @@ var _ = Describe("Gallery API tests", func() {
 		It("parses github with a branch", func() {
 			uri := URI("github:go-skynet/model-gallery/gpt4all-j.yaml")
 			Expect(
-				uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
+				uri.DownloadWithCallback("", func(url string, i []byte) error {
 					Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
 					return nil
 				}),
@@ -21,7 +30,7 @@ var _ = Describe("Gallery API tests", func() {
 			uri := URI("github:go-skynet/model-gallery/gpt4all-j.yaml@main")
 
 			Expect(
-				uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
+				uri.DownloadWithCallback("", func(url string, i []byte) error {
 					Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
 					return nil
 				}),
@@ -30,7 +39,7 @@ var _ = Describe("Gallery API tests", func() {
 		It("parses github with urls", func() {
 			uri := URI("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml")
 			Expect(
-				uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
+				uri.DownloadWithCallback("", func(url string, i []byte) error {
 					Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
 					return nil
 				}),
@@ -38,3 +47,139 @@ var _ = Describe("Gallery API tests", func() {
 		})
 	})
 })
+
+type RangeHeaderError struct {
+	msg string
+}
+
+func (e *RangeHeaderError) Error() string { return e.msg }
+
+var _ = Describe("Download Test", func() {
+	var mockData []byte
+	var mockDataSha string
+	var filePath string
+
+	extractRangeHeader := func(rangeString string) (int, int, error) {
+		regex := regexp.MustCompile(`^bytes=(\d+)-(\d+|)$`)
+		matches := regex.FindStringSubmatch(rangeString)
+		rangeErr := RangeHeaderError{msg: "invalid / ill-formatted range"}
+		if matches == nil {
+			return -1, -1, &rangeErr
+		}
+		startPos, err := strconv.Atoi(matches[1])
+		if err != nil {
+			return -1, -1, err
+		}
+
+		endPos := -1
+		if matches[2] != "" {
+			endPos, err = strconv.Atoi(matches[2])
+			if err != nil {
+				return -1, -1, err
+			}
+			endPos += 1 // because range is inclusive in rangeString
+		}
+		return startPos, endPos, nil
+	}
+
+	getMockServer := func(supportsRangeHeader bool) *httptest.Server {
+		mockServer := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.Method != "HEAD" && r.Method != "GET" {
+				w.WriteHeader(http.StatusNotFound)
+				return
+			}
+			if r.Method == "HEAD" {
+				if supportsRangeHeader {
+					w.Header().Add("Accept-Ranges", "bytes")
+				}
+				w.WriteHeader(http.StatusOK)
+				return
+			}
+			// GET method
+			startPos := 0
+			endPos := len(mockData)
+			var err error
+			var respData []byte
+			rangeString := r.Header.Get("Range")
+			if rangeString != "" {
+				startPos, endPos, err = extractRangeHeader(rangeString)
+				if err != nil {
+					if _, ok := err.(*RangeHeaderError); ok {
+						w.WriteHeader(http.StatusBadRequest)
+						return
+					}
+					Expect(err).ToNot(HaveOccurred())
+				}
+				if endPos == -1 {
+					endPos = len(mockData)
+				}
+				if startPos < 0 || startPos >= len(mockData) || endPos < 0 || endPos > len(mockData) || startPos > endPos {
+					w.WriteHeader(http.StatusBadRequest)
+					return
+				}
+			}
+			respData = mockData[startPos:endPos]
+			w.WriteHeader(http.StatusOK)
+			w.Write(respData)
+		}))
+		mockServer.EnableHTTP2 = true
+		mockServer.Start()
+		return mockServer
+	}
+
+	BeforeEach(func() {
+		mockData = make([]byte, 20000)
+		_, err := rand.Read(mockData)
+		Expect(err).ToNot(HaveOccurred())
+		_mockDataSha := sha256.New()
+		_, err = _mockDataSha.Write(mockData)
+		Expect(err).ToNot(HaveOccurred())
+		mockDataSha = fmt.Sprintf("%x", _mockDataSha.Sum(nil))
+		dir, err := os.Getwd()
+		filePath = dir + "/my_supercool_model"
+		Expect(err).NotTo(HaveOccurred())
+	})
+
+	Context("URI DownloadFile", func() {
+		It("fetches files from mock server", func() {
+			mockServer := getMockServer(true)
+			defer mockServer.Close()
+			uri := URI(mockServer.URL)
+			err := uri.DownloadFile(filePath, mockDataSha, 1, 1, func(s1, s2, s3 string, f float64) {})
+			Expect(err).ToNot(HaveOccurred())
+		})
+
+		It("resumes partially downloaded files", func() {
+			mockServer := getMockServer(true)
+			defer mockServer.Close()
+			uri := URI(mockServer.URL)
+			// Create a partial file
+			tmpFilePath := filePath + ".partial"
+			file, err := os.OpenFile(tmpFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+			Expect(err).ToNot(HaveOccurred())
+			_, err = file.Write(mockData[0:10000])
+			Expect(err).ToNot(HaveOccurred())
+			err = uri.DownloadFile(filePath, mockDataSha, 1, 1, func(s1, s2, s3 string, f float64) {})
+			Expect(err).ToNot(HaveOccurred())
+		})
+
+		It("restarts download from 0 if server doesn't support Range header", func() {
+			mockServer := getMockServer(false)
+			defer mockServer.Close()
+			uri := URI(mockServer.URL)
+			// Create a partial file
+			tmpFilePath := filePath + ".partial"
+			file, err := os.OpenFile(tmpFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+			Expect(err).ToNot(HaveOccurred())
+			_, err = file.Write(mockData[0:10000])
+			Expect(err).ToNot(HaveOccurred())
+			err = uri.DownloadFile(filePath, mockDataSha, 1, 1, func(s1, s2, s3 string, f float64) {})
+			Expect(err).ToNot(HaveOccurred())
+		})
+	})
+
+	AfterEach(func() {
+		os.Remove(filePath) // cleanup, also checks existance of filePath`
+		os.Remove(filePath + ".partial")
+	})
+})
diff --git a/pkg/functions/functions.go b/pkg/functions/functions.go
index 1a7e1ff1..477a43bb 100644
--- a/pkg/functions/functions.go
+++ b/pkg/functions/functions.go
@@ -34,7 +34,7 @@ type Tool struct {
 }
 type Tools []Tool
 
-// ToJSONNameStructure converts a list of functions to a JSON structure that can be parsed to a grammar
+// ToJSONStructure converts a list of functions to a JSON structure that can be parsed to a grammar
 // This allows the LLM to return a response of the type: { "name": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }
 func (f Functions) ToJSONStructure(name, args string) JSONFunctionStructure {
 	nameKey := defaultFunctionNameKey
diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go
index f5593690..30338ffd 100644
--- a/pkg/functions/parse.go
+++ b/pkg/functions/parse.go
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"io"
 	"regexp"
+	"slices"
 	"strings"
 
 	"github.com/mudler/LocalAI/pkg/functions/grammars"
@@ -46,6 +47,14 @@ type GrammarConfig struct {
 	// SchemaType can be configured to use a specific schema type to force the grammar
 	// available : json, llama3.1
 	SchemaType string `yaml:"schema_type"`
+
+	GrammarTriggers []GrammarTrigger `yaml:"triggers"`
+}
+
+type GrammarTrigger struct {
+	// Trigger is the string that triggers the grammar
+	Word    string `yaml:"word"`
+	AtStart bool   `yaml:"at_start"`
 }
 
 // FunctionsConfig is the configuration for the tool/function call.
@@ -71,6 +80,12 @@ type FunctionsConfig struct {
 	// JSONRegexMatch is a regex to extract the JSON object from the response
 	JSONRegexMatch []string `yaml:"json_regex_match"`
 
+	// ArgumentRegex is a named regex to extract the arguments from the response. Use ArgumentRegexKey and ArgumentRegexValue to set the names of the named regex for key and value of the arguments.
+	ArgumentRegex []string `yaml:"argument_regex"`
+	// ArgumentRegex named regex names for key and value extractions. default: key and value
+	ArgumentRegexKey   string `yaml:"argument_regex_key_name"`   // default: key
+	ArgumentRegexValue string `yaml:"argument_regex_value_name"` // default: value
+
 	// ReplaceFunctionResults allow to replace strings in the results before parsing them
 	ReplaceFunctionResults []ReplaceResult `yaml:"replace_function_results"`
 
@@ -310,7 +325,7 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
 				if functionName == "" {
 					return results
 				}
-				results = append(results, FuncCallResults{Name: result[functionNameKey], Arguments: result[functionArgumentsKey]})
+				results = append(results, FuncCallResults{Name: result[functionNameKey], Arguments: ParseFunctionCallArgs(result[functionArgumentsKey], functionConfig)})
 			}
 		}
 	} else {
@@ -322,3 +337,38 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
 
 	return results
 }
+
+func ParseFunctionCallArgs(functionArguments string, functionConfig FunctionsConfig) string {
+	if len(functionConfig.ArgumentRegex) == 0 {
+		return functionArguments
+	}
+
+	// We use named regexes here to extract the function argument key value pairs and convert this to valid json.
+	// TODO: there might be responses where an object as a value is expected/required. This is currently not handled.
+	args := make(map[string]string)
+
+	agrsRegexKeyName := "key"
+	agrsRegexValueName := "value"
+
+	if functionConfig.ArgumentRegexKey != "" {
+		agrsRegexKeyName = functionConfig.ArgumentRegexKey
+	}
+	if functionConfig.ArgumentRegexValue != "" {
+		agrsRegexValueName = functionConfig.ArgumentRegexValue
+	}
+
+	for _, r := range functionConfig.ArgumentRegex {
+		var respRegex = regexp.MustCompile(r)
+		var nameRange []string = respRegex.SubexpNames()
+		var keyIndex = slices.Index(nameRange, agrsRegexKeyName)
+		var valueIndex = slices.Index(nameRange, agrsRegexValueName)
+		matches := respRegex.FindAllStringSubmatch(functionArguments, -1)
+		for _, match := range matches {
+			args[match[keyIndex]] = match[valueIndex]
+		}
+	}
+
+	jsonBytes, _ := json.Marshal(args)
+
+	return string(jsonBytes)
+}
diff --git a/pkg/grpc/backend.go b/pkg/grpc/backend.go
index 85c9e5bc..fabc0268 100644
--- a/pkg/grpc/backend.go
+++ b/pkg/grpc/backend.go
@@ -37,7 +37,7 @@ type Backend interface {
 	Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.EmbeddingResult, error)
 	Predict(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.Reply, error)
 	LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error)
-	PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error
+	PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error
 	GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error)
 	TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error)
 	SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error)
@@ -51,4 +51,8 @@ type Backend interface {
 	StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error)
 
 	Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error)
+
+	GetTokenMetrics(ctx context.Context, in *pb.MetricsRequest, opts ...grpc.CallOption) (*pb.MetricsResponse, error)
+
+	VAD(ctx context.Context, in *pb.VADRequest, opts ...grpc.CallOption) (*pb.VADResponse, error)
 }
diff --git a/pkg/grpc/base/base.go b/pkg/grpc/base/base.go
index 95dca561..2e1fb209 100644
--- a/pkg/grpc/base/base.go
+++ b/pkg/grpc/base/base.go
@@ -41,6 +41,7 @@ func (llm *Base) Predict(opts *pb.PredictOptions) (string, error) {
 }
 
 func (llm *Base) PredictStream(opts *pb.PredictOptions, results chan string) error {
+	close(results)
 	return fmt.Errorf("unimplemented")
 }
 
@@ -91,6 +92,10 @@ func (llm *Base) StoresFind(*pb.StoresFindOptions) (pb.StoresFindResult, error)
 	return pb.StoresFindResult{}, fmt.Errorf("unimplemented")
 }
 
+func (llm *Base) VAD(*pb.VADRequest) (pb.VADResponse, error) {
+	return pb.VADResponse{}, fmt.Errorf("unimplemented")
+}
+
 func memoryUsage() *pb.MemoryUsageData {
 	mud := pb.MemoryUsageData{
 		Breakdown: make(map[string]uint64),
diff --git a/pkg/grpc/client.go b/pkg/grpc/client.go
index 032c9c00..ca207c3f 100644
--- a/pkg/grpc/client.go
+++ b/pkg/grpc/client.go
@@ -136,7 +136,7 @@ func (c *Client) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grp
 	return client.LoadModel(ctx, in, opts...)
 }
 
-func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error {
+func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error {
 	if !c.parallel {
 		c.opMutex.Lock()
 		defer c.opMutex.Unlock()
@@ -158,7 +158,7 @@ func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f fun
 	}
 
 	for {
-		feature, err := stream.Recv()
+		reply, err := stream.Recv()
 		if err == io.EOF {
 			break
 		}
@@ -167,7 +167,7 @@ func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f fun
 
 			return err
 		}
-		f(feature.GetMessage())
+		f(reply)
 	}
 
 	return nil
@@ -374,3 +374,39 @@ func (c *Client) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.
 	client := pb.NewBackendClient(conn)
 	return client.Rerank(ctx, in, opts...)
 }
+
+func (c *Client) GetTokenMetrics(ctx context.Context, in *pb.MetricsRequest, opts ...grpc.CallOption) (*pb.MetricsResponse, error) {
+	if !c.parallel {
+		c.opMutex.Lock()
+		defer c.opMutex.Unlock()
+	}
+	c.setBusy(true)
+	defer c.setBusy(false)
+	c.wdMark()
+	defer c.wdUnMark()
+	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
+	if err != nil {
+		return nil, err
+	}
+	defer conn.Close()
+	client := pb.NewBackendClient(conn)
+	return client.GetMetrics(ctx, in, opts...)
+}
+
+func (c *Client) VAD(ctx context.Context, in *pb.VADRequest, opts ...grpc.CallOption) (*pb.VADResponse, error) {
+	if !c.parallel {
+		c.opMutex.Lock()
+		defer c.opMutex.Unlock()
+	}
+	c.setBusy(true)
+	defer c.setBusy(false)
+	c.wdMark()
+	defer c.wdUnMark()
+	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
+	if err != nil {
+		return nil, err
+	}
+	defer conn.Close()
+	client := pb.NewBackendClient(conn)
+	return client.VAD(ctx, in, opts...)
+}
diff --git a/pkg/grpc/embed.go b/pkg/grpc/embed.go
index 3155ff59..79648c5a 100644
--- a/pkg/grpc/embed.go
+++ b/pkg/grpc/embed.go
@@ -35,7 +35,7 @@ func (e *embedBackend) LoadModel(ctx context.Context, in *pb.ModelOptions, opts
 	return e.s.LoadModel(ctx, in)
 }
 
-func (e *embedBackend) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error {
+func (e *embedBackend) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error {
 	bs := &embedBackendServerStream{
 		ctx: ctx,
 		fn:  f,
@@ -87,13 +87,21 @@ func (e *embedBackend) Rerank(ctx context.Context, in *pb.RerankRequest, opts ..
 	return e.s.Rerank(ctx, in)
 }
 
+func (e *embedBackend) VAD(ctx context.Context, in *pb.VADRequest, opts ...grpc.CallOption) (*pb.VADResponse, error) {
+	return e.s.VAD(ctx, in)
+}
+
+func (e *embedBackend) GetTokenMetrics(ctx context.Context, in *pb.MetricsRequest, opts ...grpc.CallOption) (*pb.MetricsResponse, error) {
+	return e.s.GetMetrics(ctx, in)
+}
+
 type embedBackendServerStream struct {
 	ctx context.Context
-	fn  func(s []byte)
+	fn  func(reply *pb.Reply)
 }
 
 func (e *embedBackendServerStream) Send(reply *pb.Reply) error {
-	e.fn(reply.GetMessage())
+	e.fn(reply)
 	return nil
 }
 
diff --git a/pkg/grpc/interface.go b/pkg/grpc/interface.go
index 97b958cc..9214e3cf 100644
--- a/pkg/grpc/interface.go
+++ b/pkg/grpc/interface.go
@@ -24,6 +24,8 @@ type LLM interface {
 	StoresDelete(*pb.StoresDeleteOptions) error
 	StoresGet(*pb.StoresGetOptions) (pb.StoresGetResult, error)
 	StoresFind(*pb.StoresFindOptions) (pb.StoresFindResult, error)
+
+	VAD(*pb.VADRequest) (pb.VADResponse, error)
 }
 
 func newReply(s string) *pb.Reply {
diff --git a/pkg/grpc/server.go b/pkg/grpc/server.go
index 0e602a42..0b2a167f 100644
--- a/pkg/grpc/server.go
+++ b/pkg/grpc/server.go
@@ -227,6 +227,18 @@ func (s *server) StoresFind(ctx context.Context, in *pb.StoresFindOptions) (*pb.
 	return &res, nil
 }
 
+func (s *server) VAD(ctx context.Context, in *pb.VADRequest) (*pb.VADResponse, error) {
+	if s.llm.Locking() {
+		s.llm.Lock()
+		defer s.llm.Unlock()
+	}
+	res, err := s.llm.VAD(in)
+	if err != nil {
+		return nil, err
+	}
+	return &res, nil
+}
+
 func StartServer(address string, model LLM) error {
 	lis, err := net.Listen("tcp", address)
 	if err != nil {
diff --git a/pkg/model/filters.go b/pkg/model/filters.go
new file mode 100644
index 00000000..79b72d5b
--- /dev/null
+++ b/pkg/model/filters.go
@@ -0,0 +1,17 @@
+package model
+
+import (
+	process "github.com/mudler/go-processmanager"
+)
+
+type GRPCProcessFilter = func(id string, p *process.Process) bool
+
+func all(_ string, _ *process.Process) bool {
+	return true
+}
+
+func allExcept(s string) GRPCProcessFilter {
+	return func(id string, p *process.Process) bool {
+		return id != s
+	}
+}
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index 3d2255cc..ace72fa3 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -22,13 +22,25 @@ import (
 )
 
 var Aliases map[string]string = map[string]string{
-	"go-llama":              LLamaCPP,
-	"llama":                 LLamaCPP,
-	"embedded-store":        LocalStoreBackend,
-	"langchain-huggingface": LCHuggingFaceBackend,
+	"go-llama":               LLamaCPP,
+	"llama":                  LLamaCPP,
+	"embedded-store":         LocalStoreBackend,
+	"huggingface-embeddings": TransformersBackend,
+	"langchain-huggingface":  LCHuggingFaceBackend,
+	"transformers-musicgen":  TransformersBackend,
+	"sentencetransformers":   TransformersBackend,
+	"mamba":                  TransformersBackend,
+	"stablediffusion":        StableDiffusionGGMLBackend,
 }
 
-var autoDetect = os.Getenv("DISABLE_AUTODETECT") != "true"
+var TypeAlias map[string]string = map[string]string{
+	"sentencetransformers":   "SentenceTransformer",
+	"huggingface-embeddings": "SentenceTransformer",
+	"mamba":                  "Mamba",
+	"transformers-musicgen":  "MusicgenForConditionalGeneration",
+}
+
+var AutoDetect = os.Getenv("DISABLE_AUTODETECT") != "true"
 
 const (
 	LlamaGGML = "llama-ggml"
@@ -36,6 +48,7 @@ const (
 	LLamaCPP = "llama-cpp"
 
 	LLamaCPPAVX2     = "llama-cpp-avx2"
+	LLamaCPPAVX512   = "llama-cpp-avx512"
 	LLamaCPPAVX      = "llama-cpp-avx"
 	LLamaCPPFallback = "llama-cpp-fallback"
 	LLamaCPPCUDA     = "llama-cpp-cuda"
@@ -45,24 +58,34 @@ const (
 
 	LLamaCPPGRPC = "llama-cpp-grpc"
 
-	BertEmbeddingsBackend  = "bert-embeddings"
-	RwkvBackend            = "rwkv"
-	WhisperBackend         = "whisper"
-	StableDiffusionBackend = "stablediffusion"
-	TinyDreamBackend       = "tinydream"
-	PiperBackend           = "piper"
-	LCHuggingFaceBackend   = "huggingface"
+	WhisperBackend             = "whisper"
+	StableDiffusionGGMLBackend = "stablediffusion-ggml"
+	PiperBackend               = "piper"
+	LCHuggingFaceBackend       = "huggingface"
 
-	LocalStoreBackend = "local-store"
+	TransformersBackend = "transformers"
+	LocalStoreBackend   = "local-store"
 )
 
+var llamaCPPVariants = []string{
+	LLamaCPPAVX2,
+	LLamaCPPAVX512,
+	LLamaCPPAVX,
+	LLamaCPPFallback,
+	LLamaCPPCUDA,
+	LLamaCPPHipblas,
+	LLamaCPPSycl16,
+	LLamaCPPSycl32,
+	LLamaCPPGRPC,
+}
+
 func backendPath(assetDir, backend string) string {
 	return filepath.Join(assetDir, "backend-assets", "grpc", backend)
 }
 
 // backendsInAssetDir returns the list of backends in the asset directory
 // that should be loaded
-func backendsInAssetDir(assetDir string) ([]string, error) {
+func backendsInAssetDir(assetDir string) (map[string][]string, error) {
 	// Exclude backends from automatic loading
 	excludeBackends := []string{LocalStoreBackend}
 	entry, err := os.ReadDir(backendPath(assetDir, ""))
@@ -86,7 +109,7 @@ ENTRY:
 
 		// Skip the llama.cpp variants if we are autoDetecting
 		// But we always load the fallback variant if it exists
-		if strings.Contains(e.Name(), LLamaCPP) && !strings.Contains(e.Name(), LLamaCPPFallback) && autoDetect {
+		if strings.Contains(e.Name(), LLamaCPP) && !strings.Contains(e.Name(), LLamaCPPFallback) && AutoDetect {
 			continue
 		}
 
@@ -94,48 +117,26 @@ ENTRY:
 	}
 
 	// if we are autoDetecting, we want to show the llama.cpp variants as a single backend
-	if autoDetect {
+	if AutoDetect {
 		// if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up
 		// when starting the service
-		foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda, foundLCPPHipblas, foundSycl16, foundSycl32 := false, false, false, false, false, false, false, false
+		foundVariants := map[string]bool{}
 		if _, ok := backends[LLamaCPP]; !ok {
 			for _, e := range entry {
-				if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 {
-					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPAVX2)
-					foundLCPPAVX2 = true
-				}
-				if strings.Contains(e.Name(), LLamaCPPAVX) && !foundLCPPAVX {
-					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPAVX)
-					foundLCPPAVX = true
-				}
-				if strings.Contains(e.Name(), LLamaCPPFallback) && !foundLCPPFallback {
-					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPFallback)
-					foundLCPPFallback = true
-				}
-				if strings.Contains(e.Name(), LLamaCPPGRPC) && !foundLCPPGRPC {
-					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPGRPC)
-					foundLCPPGRPC = true
-				}
-				if strings.Contains(e.Name(), LLamaCPPCUDA) && !foundLCPPCuda {
-					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPCUDA)
-					foundLCPPCuda = true
-				}
-				if strings.Contains(e.Name(), LLamaCPPHipblas) && !foundLCPPHipblas {
-					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPHipblas)
-					foundLCPPHipblas = true
-				}
-				if strings.Contains(e.Name(), LLamaCPPSycl16) && !foundSycl16 {
-					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPSycl16)
-					foundSycl16 = true
-				}
-				if strings.Contains(e.Name(), LLamaCPPSycl32) && !foundSycl32 {
-					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPSycl32)
-					foundSycl32 = true
+				for _, v := range llamaCPPVariants {
+					if strings.Contains(e.Name(), v) && !foundVariants[v] {
+						backends[LLamaCPP] = append(backends[LLamaCPP], v)
+						foundVariants[v] = true
+					}
 				}
 			}
 		}
 	}
 
+	return backends, nil
+}
+
+func orderBackends(backends map[string][]string) ([]string, error) {
 	// order backends from the asset directory.
 	// as we scan for backends, we want to keep some order which backends are tried of.
 	// for example, llama.cpp should be tried first, and we want to keep the huggingface backend at the last.
@@ -151,8 +152,6 @@ ENTRY:
 	toTheEnd := []string{
 		// last has to be huggingface
 		LCHuggingFaceBackend,
-		// then bert embeddings
-		BertEmbeddingsBackend,
 	}
 
 	// create an ordered map
@@ -181,8 +180,9 @@ ENTRY:
 	return orderedBackends.Keys(), nil
 }
 
-// selectGRPCProcess selects the GRPC process to start based on system capabilities
-func selectGRPCProcess(backend, assetDir string, f16 bool) string {
+// selectGRPCProcessByHostCapabilities selects the GRPC process to start based on system capabilities
+// Note: this is now relevant only for llama.cpp
+func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) string {
 	foundCUDA := false
 	foundAMDGPU := false
 	foundIntelGPU := false
@@ -199,6 +199,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
 		return backendPath(assetDir, LLamaCPPGRPC)
 	}
 
+	// Check for GPU-binaries that are shipped with single binary releases
 	gpus, err := xsysinfo.GPUs()
 	if err == nil {
 		for _, gpu := range gpus {
@@ -243,35 +244,75 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
 		return grpcProcess
 	}
 
+	// No GPU found or no specific binaries found, try to load the CPU variant(s)
+
+	// Select a binary based on availability/capability
+	selectedProcess := ""
+
+	// Check if we have a native build (llama-cpp) and use that
+	if _, err := os.Stat(backendPath(assetDir, LLamaCPPFallback)); err == nil {
+		log.Debug().Msgf("[%s] %s variant available", LLamaCPPFallback, backend)
+		selectedProcess = backendPath(assetDir, LLamaCPPFallback)
+	}
+
+	// Check if we have a native build (llama-cpp) and use that instead
+	// As a reminder, we do ultimately attempt again with the fallback variant
+	// If things fail with what we select here
+	if _, err := os.Stat(backendPath(assetDir, LLamaCPP)); err == nil {
+		log.Debug().Msgf("[%s] attempting to load with native variant", backend)
+		selectedProcess = backendPath(assetDir, LLamaCPP)
+	}
+
+	// IF we find any optimized binary, we use that
 	if xsysinfo.HasCPUCaps(cpuid.AVX2) {
 		p := backendPath(assetDir, LLamaCPPAVX2)
 		if _, err := os.Stat(p); err == nil {
 			log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
-			grpcProcess = p
+			selectedProcess = p
+		}
+	} else if xsysinfo.HasCPUCaps(cpuid.AVX512F) {
+		p := backendPath(assetDir, LLamaCPPAVX512)
+		if _, err := os.Stat(p); err == nil {
+			log.Info().Msgf("[%s] attempting to load with AVX512 variant", backend)
+			selectedProcess = p
 		}
 	} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
 		p := backendPath(assetDir, LLamaCPPAVX)
 		if _, err := os.Stat(p); err == nil {
 			log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
-			grpcProcess = p
-		}
-	} else {
-		p := backendPath(assetDir, LLamaCPPFallback)
-		if _, err := os.Stat(p); err == nil {
-			log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
-			grpcProcess = p
+			selectedProcess = p
 		}
 	}
 
-	return grpcProcess
+	// Safety measure: check if the binary exists otherwise return empty string
+	if _, err := os.Stat(selectedProcess); err == nil {
+		return selectedProcess
+	}
+
+	return ""
+}
+
+func attemptLoadingOnFailure(backend string, ml *ModelLoader, o *Options, err error) (*Model, error) {
+	// XXX: This is too backend specific(llama-cpp), remove this bit or generalize further
+	// We failed somehow starting the binary. For instance, could be that we are missing
+	// some libraries if running in binary-only mode.
+	// In this case, we attempt to load the model with the fallback variant.
+
+	// If not llama-cpp backend, return the error immediately
+	if backend != LLamaCPP {
+		return nil, err
+	}
+
+	log.Error().Msgf("[%s] Failed loading model, trying with fallback '%s', error: %s", backend, LLamaCPPFallback, err.Error())
+	return ml.LoadModel(o.modelID, o.model, ml.grpcModel(LLamaCPPFallback, false, o))
 }
 
 // starts the grpcModelProcess for the backend, and returns a grpc client
 // It also loads the model
-func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string) (*Model, error) {
-	return func(modelName, modelFile string) (*Model, error) {
+func (ml *ModelLoader) grpcModel(backend string, autodetect bool, o *Options) func(string, string, string) (*Model, error) {
+	return func(modelID, modelName, modelFile string) (*Model, error) {
 
-		log.Debug().Msgf("Loading Model %s with gRPC (file: %s) (backend: %s): %+v", modelName, modelFile, backend, *o)
+		log.Debug().Msgf("Loading Model %s with gRPC (file: %s) (backend: %s): %+v", modelID, modelFile, backend, *o)
 
 		var client *Model
 
@@ -304,35 +345,36 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
 					return nil, fmt.Errorf("failed allocating free ports: %s", err.Error())
 				}
 				// Make sure the process is executable
-				if err := ml.startProcess(uri, o.model, serverAddress); err != nil {
+				process, err := ml.startProcess(uri, modelID, serverAddress)
+				if err != nil {
 					log.Error().Err(err).Str("path", uri).Msg("failed to launch ")
 					return nil, err
 				}
 
 				log.Debug().Msgf("GRPC Service Started")
 
-				client = NewModel(serverAddress)
+				client = NewModel(modelID, serverAddress, process)
 			} else {
-				log.Debug().Msg("external backend is uri")
+				log.Debug().Msg("external backend is a uri")
 				// address
-				client = NewModel(uri)
+				client = NewModel(modelID, uri, nil)
 			}
 		} else {
 			grpcProcess := backendPath(o.assetDir, backend)
 			if err := utils.VerifyPath(grpcProcess, o.assetDir); err != nil {
-				return nil, fmt.Errorf("grpc process not found in assetdir: %s", err.Error())
+				return nil, fmt.Errorf("refering to a backend not in asset dir: %s", err.Error())
 			}
 
-			if autoDetect {
+			if autodetect {
 				// autoDetect GRPC process to start based on system capabilities
-				if selectedProcess := selectGRPCProcess(backend, o.assetDir, o.gRPCOptions.F16Memory); selectedProcess != "" {
+				if selectedProcess := selectGRPCProcessByHostCapabilities(backend, o.assetDir, o.gRPCOptions.F16Memory); selectedProcess != "" {
 					grpcProcess = selectedProcess
 				}
 			}
 
 			// Check if the file exists
 			if _, err := os.Stat(grpcProcess); os.IsNotExist(err) {
-				return nil, fmt.Errorf("grpc process not found: %s. some backends(stablediffusion, tts) require LocalAI compiled with GO_TAGS", grpcProcess)
+				return nil, fmt.Errorf("backend not found: %s", grpcProcess)
 			}
 
 			serverAddress, err := getFreeAddress()
@@ -346,15 +388,19 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
 			args, grpcProcess = library.LoadLDSO(o.assetDir, args, grpcProcess)
 
 			// Make sure the process is executable in any circumstance
-			if err := ml.startProcess(grpcProcess, o.model, serverAddress, args...); err != nil {
+			process, err := ml.startProcess(grpcProcess, modelID, serverAddress, args...)
+			if err != nil {
 				return nil, err
 			}
 
 			log.Debug().Msgf("GRPC Service Started")
 
-			client = NewModel(serverAddress)
+			client = NewModel(modelID, serverAddress, process)
 		}
 
+		log.Debug().Msgf("Wait for the service to start up")
+		log.Debug().Msgf("Options: %+v", o.gRPCOptions)
+
 		// Wait for the service to start up
 		ready := false
 		for i := 0; i < o.grpcAttempts; i++ {
@@ -372,20 +418,30 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
 
 		if !ready {
 			log.Debug().Msgf("GRPC Service NOT ready")
+			if process := client.Process(); process != nil {
+				process.Stop()
+			}
 			return nil, fmt.Errorf("grpc service not ready")
 		}
 
 		options := *o.gRPCOptions
 		options.Model = modelName
 		options.ModelFile = modelFile
+		options.ModelPath = ml.ModelPath
 
 		log.Debug().Msgf("GRPC: Loading model with options: %+v", options)
 
 		res, err := client.GRPC(o.parallelRequests, ml.wd).LoadModel(o.context, &options)
 		if err != nil {
+			if process := client.Process(); process != nil {
+				process.Stop()
+			}
 			return nil, fmt.Errorf("could not load model: %w", err)
 		}
 		if !res.Success {
+			if process := client.Process(); process != nil {
+				process.Stop()
+			}
 			return nil, fmt.Errorf("could not load model (no success): %s", res.Message)
 		}
 
@@ -394,34 +450,32 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
 }
 
 func (ml *ModelLoader) ListAvailableBackends(assetdir string) ([]string, error) {
-	return backendsInAssetDir(assetdir)
+	backends, err := backendsInAssetDir(assetdir)
+	if err != nil {
+		return nil, err
+	}
+	return orderBackends(backends)
 }
 
-func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err error) {
+func (ml *ModelLoader) backendLoader(opts ...Option) (client grpc.Backend, err error) {
 	o := NewOptions(opts...)
 
-	if o.model != "" {
-		log.Info().Msgf("Loading model '%s' with backend %s", o.model, o.backendString)
-	} else {
-		log.Info().Msgf("Loading model with backend %s", o.backendString)
-	}
+	log.Info().Msgf("Loading model '%s' with backend %s", o.modelID, o.backendString)
 
 	backend := strings.ToLower(o.backendString)
 	if realBackend, exists := Aliases[backend]; exists {
+		typeAlias, exists := TypeAlias[backend]
+		if exists {
+			log.Debug().Msgf("'%s' is a type alias of '%s' (%s)", backend, realBackend, typeAlias)
+			o.gRPCOptions.Type = typeAlias
+		} else {
+			log.Debug().Msgf("'%s' is an alias of '%s'", backend, realBackend)
+		}
+
 		backend = realBackend
-		log.Debug().Msgf("%s is an alias of %s", backend, realBackend)
 	}
 
-	if o.singleActiveBackend {
-		ml.mu.Lock()
-		log.Debug().Msgf("Stopping all backends except '%s'", o.model)
-		err := ml.StopAllExcept(o.model)
-		ml.mu.Unlock()
-		if err != nil {
-			log.Error().Err(err).Str("keptModel", o.model).Msg("error while shutting down all backends except for the keptModel")
-			return nil, err
-		}
-	}
+	ml.stopActiveBackends(o.modelID, o.singleActiveBackend)
 
 	var backendToConsume string
 
@@ -433,42 +487,49 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e
 		backendToConsume = backend
 	}
 
-	model, err := ml.LoadModel(o.model, ml.grpcModel(backendToConsume, o))
+	model, err := ml.LoadModel(o.modelID, o.model, ml.grpcModel(backendToConsume, AutoDetect, o))
 	if err != nil {
-		return nil, err
+		model, err = attemptLoadingOnFailure(backend, ml, o, err)
+		if err != nil {
+			return nil, err
+		}
 	}
 
 	return model.GRPC(o.parallelRequests, ml.wd), nil
 }
 
-func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
-	o := NewOptions(opts...)
+func (ml *ModelLoader) stopActiveBackends(modelID string, singleActiveBackend bool) {
+	// If we can have only one backend active, kill all the others (except external backends)
+	if singleActiveBackend {
+		log.Debug().Msgf("Stopping all backends except '%s'", modelID)
+		err := ml.StopGRPC(allExcept(modelID))
+		if err != nil {
+			log.Error().Err(err).Str("keptModel", modelID).Msg("error while shutting down all backends except for the keptModel - greedyloader continuing")
+		}
+	}
+}
 
-	ml.mu.Lock()
+func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
+	o := NewOptions(opts...)
 
 	// Return earlier if we have a model already loaded
 	// (avoid looping through all the backends)
-	if m := ml.CheckIsLoaded(o.model); m != nil {
-		log.Debug().Msgf("Model '%s' already loaded", o.model)
-		ml.mu.Unlock()
+	if m := ml.CheckIsLoaded(o.modelID); m != nil {
+		log.Debug().Msgf("Model '%s' already loaded", o.modelID)
 
 		return m.GRPC(o.parallelRequests, ml.wd), nil
 	}
 
-	// If we can have only one backend active, kill all the others (except external backends)
-	if o.singleActiveBackend {
-		log.Debug().Msgf("Stopping all backends except '%s'", o.model)
-		err := ml.StopAllExcept(o.model)
-		if err != nil {
-			log.Error().Err(err).Str("keptModel", o.model).Msg("error while shutting down all backends except for the keptModel - greedyloader continuing")
-		}
+	ml.stopActiveBackends(o.modelID, o.singleActiveBackend)
+
+	if o.backendString != "" {
+		return ml.backendLoader(opts...)
 	}
-	ml.mu.Unlock()
 
 	var err error
 
 	// get backends embedded in the binary
-	autoLoadBackends, err := backendsInAssetDir(o.assetDir)
+	autoLoadBackends, err := ml.ListAvailableBackends(o.assetDir)
 	if err != nil {
 		return nil, err
 	}
@@ -480,25 +541,15 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
 
 	log.Debug().Msgf("Loading from the following backends (in order): %+v", autoLoadBackends)
 
-	if o.model != "" {
-		log.Info().Msgf("Trying to load the model '%s' with the backend '%s'", o.model, autoLoadBackends)
-	}
+	log.Info().Msgf("Trying to load the model '%s' with the backend '%s'", o.modelID, autoLoadBackends)
 
 	for _, key := range autoLoadBackends {
 		log.Info().Msgf("[%s] Attempting to load", key)
-		options := []Option{
+		options := append(opts, []Option{
 			WithBackendString(key),
-			WithModel(o.model),
-			WithLoadGRPCLoadModelOpts(o.gRPCOptions),
-			WithThreads(o.threads),
-			WithAssetDir(o.assetDir),
-		}
+		}...)
 
-		for k, v := range o.externalBackends {
-			options = append(options, WithExternalBackend(k, v))
-		}
-
-		model, modelerr := ml.BackendLoader(options...)
+		model, modelerr := ml.backendLoader(options...)
 		if modelerr == nil && model != nil {
 			log.Info().Msgf("[%s] Loads OK", key)
 			return model, nil
@@ -509,39 +560,6 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
 			err = errors.Join(err, fmt.Errorf("backend %s returned no usable model", key))
 			log.Info().Msgf("[%s] Fails: %s", key, "backend returned no usable model")
 		}
-
-		if autoDetect && key == LLamaCPP && err != nil {
-			// try as hard as possible to run the llama.cpp variants
-			backendToUse := ""
-			if xsysinfo.HasCPUCaps(cpuid.AVX2) {
-				if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil {
-					backendToUse = LLamaCPPAVX2
-				}
-			} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
-				if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil {
-					backendToUse = LLamaCPPAVX
-				}
-			} else {
-				if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPFallback)); err == nil {
-					backendToUse = LLamaCPPFallback
-				} else {
-					// If we don't have a fallback, just skip fallback
-					continue
-				}
-			}
-
-			// Autodetection failed, try the fallback
-			log.Info().Msgf("[%s] Autodetection failed, trying the fallback", key)
-			options = append(options, WithBackendString(backendToUse))
-			model, modelerr = ml.BackendLoader(options...)
-			if modelerr == nil && model != nil {
-				log.Info().Msgf("[%s] Loads OK", key)
-				return model, nil
-			} else {
-				err = errors.Join(err, fmt.Errorf("[%s]: %w", key, modelerr))
-				log.Info().Msgf("[%s] Fails: %s", key, modelerr.Error())
-			}
-		}
 	}
 
 	return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
diff --git a/pkg/model/loader.go b/pkg/model/loader.go
index b9865f73..bb9bdd8a 100644
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@@ -9,11 +9,8 @@ import (
 	"sync"
 	"time"
 
-	"github.com/mudler/LocalAI/pkg/templates"
-
 	"github.com/mudler/LocalAI/pkg/utils"
 
-	process "github.com/mudler/go-processmanager"
 	"github.com/rs/zerolog/log"
 )
 
@@ -21,20 +18,16 @@ import (
 
 // TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we seperate directories for .bin/.yaml and .tmpl
 type ModelLoader struct {
-	ModelPath     string
-	mu            sync.Mutex
-	models        map[string]*Model
-	grpcProcesses map[string]*process.Process
-	templates     *templates.TemplateCache
-	wd            *WatchDog
+	ModelPath string
+	mu        sync.Mutex
+	models    map[string]*Model
+	wd        *WatchDog
 }
 
 func NewModelLoader(modelPath string) *ModelLoader {
 	nml := &ModelLoader{
-		ModelPath:     modelPath,
-		models:        make(map[string]*Model),
-		templates:     templates.NewTemplateCache(modelPath),
-		grpcProcesses: make(map[string]*process.Process),
+		ModelPath: modelPath,
+		models:    make(map[string]*Model),
 	}
 
 	return nml
@@ -61,10 +54,13 @@ var knownModelsNameSuffixToSkip []string = []string{
 	".yml",
 	".json",
 	".txt",
+	".pt",
+	".onnx",
 	".md",
 	".MD",
 	".DS_Store",
 	".",
+	".safetensors",
 	".partial",
 	".tar.gz",
 }
@@ -117,12 +113,9 @@ func (ml *ModelLoader) ListModels() []*Model {
 	return models
 }
 
-func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (*Model, error)) (*Model, error) {
-	ml.mu.Lock()
-	defer ml.mu.Unlock()
-
+func (ml *ModelLoader) LoadModel(modelID, modelName string, loader func(string, string, string) (*Model, error)) (*Model, error) {
 	// Check if we already have a loaded model
-	if model := ml.CheckIsLoaded(modelName); model != nil {
+	if model := ml.CheckIsLoaded(modelID); model != nil {
 		return model, nil
 	}
 
@@ -130,16 +123,18 @@ func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (
 	modelFile := filepath.Join(ml.ModelPath, modelName)
 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
 
-	model, err := loader(modelName, modelFile)
+	ml.mu.Lock()
+	defer ml.mu.Unlock()
+	model, err := loader(modelID, modelName, modelFile)
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("failed to load model with internal loader: %s", err)
 	}
 
 	if model == nil {
 		return nil, fmt.Errorf("loader didn't return a model")
 	}
 
-	ml.models[modelName] = model
+	ml.models[modelID] = model
 
 	return model, nil
 }
@@ -147,14 +142,13 @@ func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (
 func (ml *ModelLoader) ShutdownModel(modelName string) error {
 	ml.mu.Lock()
 	defer ml.mu.Unlock()
-
-	_, ok := ml.models[modelName]
+	model, ok := ml.models[modelName]
 	if !ok {
 		return fmt.Errorf("model %s not found", modelName)
 	}
 
 	retries := 1
-	for ml.models[modelName].GRPC(false, ml.wd).IsBusy() {
+	for model.GRPC(false, ml.wd).IsBusy() {
 		log.Debug().Msgf("%s busy. Waiting.", modelName)
 		dur := time.Duration(retries*2) * time.Second
 		if dur > retryTimeout {
@@ -162,12 +156,19 @@ func (ml *ModelLoader) ShutdownModel(modelName string) error {
 		}
 		time.Sleep(dur)
 		retries++
+
+		if retries > 10 && os.Getenv("LOCALAI_FORCE_BACKEND_SHUTDOWN") == "true" {
+			log.Warn().Msgf("Model %s is still busy after %d retries. Forcing shutdown.", modelName, retries)
+			break
+		}
 	}
 
 	return ml.deleteProcess(modelName)
 }
 
 func (ml *ModelLoader) CheckIsLoaded(s string) *Model {
+	ml.mu.Lock()
+	defer ml.mu.Unlock()
 	m, ok := ml.models[s]
 	if !ok {
 		return nil
@@ -184,8 +185,8 @@ func (ml *ModelLoader) CheckIsLoaded(s string) *Model {
 	if !alive {
 		log.Warn().Msgf("GRPC Model not responding: %s", err.Error())
 		log.Warn().Msgf("Deleting the process in order to recreate it")
-		process, exists := ml.grpcProcesses[s]
-		if !exists {
+		process := m.Process()
+		if process == nil {
 			log.Error().Msgf("Process not found for '%s' and the model is not responding anymore !", s)
 			return m
 		}
diff --git a/pkg/model/options.go b/pkg/model/loader_options.go
similarity index 95%
rename from pkg/model/options.go
rename to pkg/model/loader_options.go
index a3f4c855..e7fd06de 100644
--- a/pkg/model/options.go
+++ b/pkg/model/loader_options.go
@@ -9,7 +9,7 @@ import (
 type Options struct {
 	backendString string
 	model         string
-	threads       uint32
+	modelID       string
 	assetDir      string
 	context       context.Context
 
@@ -68,12 +68,6 @@ func WithLoadGRPCLoadModelOpts(opts *pb.ModelOptions) Option {
 	}
 }
 
-func WithThreads(threads uint32) Option {
-	return func(o *Options) {
-		o.threads = threads
-	}
-}
-
 func WithAssetDir(assetDir string) Option {
 	return func(o *Options) {
 		o.assetDir = assetDir
@@ -92,6 +86,12 @@ func WithSingleActiveBackend() Option {
 	}
 }
 
+func WithModelID(id string) Option {
+	return func(o *Options) {
+		o.modelID = id
+	}
+}
+
 func NewOptions(opts ...Option) *Options {
 	o := &Options{
 		gRPCOptions:       &pb.ModelOptions{},
diff --git a/pkg/model/loader_test.go b/pkg/model/loader_test.go
index 4621844e..83e47ec6 100644
--- a/pkg/model/loader_test.go
+++ b/pkg/model/loader_test.go
@@ -63,24 +63,24 @@ var _ = Describe("ModelLoader", func() {
 
 	Context("LoadModel", func() {
 		It("should load a model and keep it in memory", func() {
-			mockModel = model.NewModel("test.model")
+			mockModel = model.NewModel("foo", "test.model", nil)
 
-			mockLoader := func(modelName, modelFile string) (*model.Model, error) {
+			mockLoader := func(modelID, modelName, modelFile string) (*model.Model, error) {
 				return mockModel, nil
 			}
 
-			model, err := modelLoader.LoadModel("test.model", mockLoader)
+			model, err := modelLoader.LoadModel("foo", "test.model", mockLoader)
 			Expect(err).To(BeNil())
 			Expect(model).To(Equal(mockModel))
-			Expect(modelLoader.CheckIsLoaded("test.model")).To(Equal(mockModel))
+			Expect(modelLoader.CheckIsLoaded("foo")).To(Equal(mockModel))
 		})
 
 		It("should return an error if loading the model fails", func() {
-			mockLoader := func(modelName, modelFile string) (*model.Model, error) {
+			mockLoader := func(modelID, modelName, modelFile string) (*model.Model, error) {
 				return nil, errors.New("failed to load model")
 			}
 
-			model, err := modelLoader.LoadModel("test.model", mockLoader)
+			model, err := modelLoader.LoadModel("foo", "test.model", mockLoader)
 			Expect(err).To(HaveOccurred())
 			Expect(model).To(BeNil())
 		})
@@ -88,18 +88,16 @@ var _ = Describe("ModelLoader", func() {
 
 	Context("ShutdownModel", func() {
 		It("should shutdown a loaded model", func() {
-			mockModel = model.NewModel("test.model")
-
-			mockLoader := func(modelName, modelFile string) (*model.Model, error) {
-				return mockModel, nil
+			mockLoader := func(modelID, modelName, modelFile string) (*model.Model, error) {
+				return model.NewModel("foo", "test.model", nil), nil
 			}
 
-			_, err := modelLoader.LoadModel("test.model", mockLoader)
+			_, err := modelLoader.LoadModel("foo", "test.model", mockLoader)
 			Expect(err).To(BeNil())
 
-			err = modelLoader.ShutdownModel("test.model")
+			err = modelLoader.ShutdownModel("foo")
 			Expect(err).To(BeNil())
-			Expect(modelLoader.CheckIsLoaded("test.model")).To(BeNil())
+			Expect(modelLoader.CheckIsLoaded("foo")).To(BeNil())
 		})
 	})
 })
diff --git a/pkg/model/model.go b/pkg/model/model.go
index 1927dc0c..6e4fd316 100644
--- a/pkg/model/model.go
+++ b/pkg/model/model.go
@@ -1,18 +1,32 @@
 package model
 
-import grpc "github.com/mudler/LocalAI/pkg/grpc"
+import (
+	"sync"
+
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
+	process "github.com/mudler/go-processmanager"
+)
 
 type Model struct {
+	ID      string `json:"id"`
 	address string
 	client  grpc.Backend
+	process *process.Process
+	sync.Mutex
 }
 
-func NewModel(address string) *Model {
+func NewModel(ID, address string, process *process.Process) *Model {
 	return &Model{
+		ID:      ID,
 		address: address,
+		process: process,
 	}
 }
 
+func (m *Model) Process() *process.Process {
+	return m.process
+}
+
 func (m *Model) GRPC(parallel bool, wd *WatchDog) grpc.Backend {
 	if m.client != nil {
 		return m.client
@@ -23,6 +37,8 @@ func (m *Model) GRPC(parallel bool, wd *WatchDog) grpc.Backend {
 		enableWD = true
 	}
 
+	m.Lock()
+	defer m.Unlock()
 	m.client = grpc.NewClient(m.address, parallel, wd, enableWD)
 	return m.client
 }
diff --git a/pkg/model/process.go b/pkg/model/process.go
index 50afbb1c..c27fbda3 100644
--- a/pkg/model/process.go
+++ b/pkg/model/process.go
@@ -9,50 +9,44 @@ import (
 	"strconv"
 	"strings"
 	"syscall"
-	"time"
 
 	"github.com/hpcloud/tail"
 	process "github.com/mudler/go-processmanager"
 	"github.com/rs/zerolog/log"
 )
 
-func (ml *ModelLoader) StopAllExcept(s string) error {
-	return ml.StopGRPC(func(id string, p *process.Process) bool {
-		if id == s {
-			return false
-		}
-
-		for ml.models[id].GRPC(false, ml.wd).IsBusy() {
-			log.Debug().Msgf("%s busy. Waiting.", id)
-			time.Sleep(2 * time.Second)
-		}
-		log.Debug().Msgf("[single-backend] Stopping %s", id)
-		return true
-	})
-}
-
 func (ml *ModelLoader) deleteProcess(s string) error {
-	if _, exists := ml.grpcProcesses[s]; exists {
-		if err := ml.grpcProcesses[s].Stop(); err != nil {
-			log.Error().Err(err).Msgf("(deleteProcess) error while deleting grpc process %s", s)
-		}
+	defer delete(ml.models, s)
+
+	log.Debug().Msgf("Deleting process %s", s)
+
+	m, exists := ml.models[s]
+	if !exists {
+		log.Error().Msgf("Model does not exist %s", s)
+		// Nothing to do
+		return nil
 	}
-	delete(ml.grpcProcesses, s)
-	delete(ml.models, s)
-	return nil
-}
 
-type GRPCProcessFilter = func(id string, p *process.Process) bool
+	process := m.Process()
+	if process == nil {
+		log.Error().Msgf("No process for %s", s)
+		// Nothing to do as there is no process
+		return nil
+	}
 
-func includeAllProcesses(_ string, _ *process.Process) bool {
-	return true
+	err := process.Stop()
+	if err != nil {
+		log.Error().Err(err).Msgf("(deleteProcess) error while deleting process %s", s)
+	}
+
+	return err
 }
 
 func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) error {
 	var err error = nil
-	for k, p := range ml.grpcProcesses {
-		if filter(k, p) {
-			e := ml.deleteProcess(k)
+	for k, m := range ml.models {
+		if filter(k, m.Process()) {
+			e := ml.ShutdownModel(k)
 			err = errors.Join(err, e)
 		}
 	}
@@ -60,21 +54,26 @@ func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) error {
 }
 
 func (ml *ModelLoader) StopAllGRPC() error {
-	return ml.StopGRPC(includeAllProcesses)
+	return ml.StopGRPC(all)
 }
 
 func (ml *ModelLoader) GetGRPCPID(id string) (int, error) {
-	p, exists := ml.grpcProcesses[id]
+	ml.mu.Lock()
+	defer ml.mu.Unlock()
+	p, exists := ml.models[id]
 	if !exists {
 		return -1, fmt.Errorf("no grpc backend found for %s", id)
 	}
-	return strconv.Atoi(p.PID)
+	if p.Process() == nil {
+		return -1, fmt.Errorf("no grpc backend found for %s", id)
+	}
+	return strconv.Atoi(p.Process().PID)
 }
 
-func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string, args ...string) error {
+func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string, args ...string) (*process.Process, error) {
 	// Make sure the process is executable
 	if err := os.Chmod(grpcProcess, 0700); err != nil {
-		return err
+		return nil, err
 	}
 
 	log.Debug().Msgf("Loading GRPC Process: %s", grpcProcess)
@@ -83,7 +82,7 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
 
 	workDir, err := filepath.Abs(filepath.Dir(grpcProcess))
 	if err != nil {
-		return err
+		return nil, err
 	}
 
 	grpcControlProcess := process.New(
@@ -99,10 +98,8 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
 		ml.wd.AddAddressModelMap(serverAddress, id)
 	}
 
-	ml.grpcProcesses[id] = grpcControlProcess
-
 	if err := grpcControlProcess.Run(); err != nil {
-		return err
+		return grpcControlProcess, err
 	}
 
 	log.Debug().Msgf("GRPC Service state dir: %s", grpcControlProcess.StateDir())
@@ -136,5 +133,5 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
 		}
 	}()
 
-	return nil
+	return grpcControlProcess, nil
 }
diff --git a/pkg/model/template.go b/pkg/model/template.go
deleted file mode 100644
index 3dc850cf..00000000
--- a/pkg/model/template.go
+++ /dev/null
@@ -1,52 +0,0 @@
-package model
-
-import (
-	"fmt"
-
-	"github.com/mudler/LocalAI/pkg/functions"
-	"github.com/mudler/LocalAI/pkg/templates"
-)
-
-// Rather than pass an interface{} to the prompt template:
-// These are the definitions of all possible variables LocalAI will currently populate for use in a prompt template file
-// Please note: Not all of these are populated on every endpoint - your template should either be tested for each endpoint you map it to, or tolerant of zero values.
-type PromptTemplateData struct {
-	SystemPrompt         string
-	SuppressSystemPrompt bool // used by chat specifically to indicate that SystemPrompt above should be _ignored_
-	Input                string
-	Instruction          string
-	Functions            []functions.Function
-	MessageIndex         int
-}
-
-type ChatMessageTemplateData struct {
-	SystemPrompt string
-	Role         string
-	RoleName     string
-	FunctionName string
-	Content      string
-	MessageIndex int
-	Function     bool
-	FunctionCall interface{}
-	LastMessage  bool
-}
-
-const (
-	ChatPromptTemplate templates.TemplateType = iota
-	ChatMessageTemplate
-	CompletionPromptTemplate
-	EditPromptTemplate
-	FunctionsPromptTemplate
-)
-
-func (ml *ModelLoader) EvaluateTemplateForPrompt(templateType templates.TemplateType, templateName string, in PromptTemplateData) (string, error) {
-	// TODO: should this check be improved?
-	if templateType == ChatMessageTemplate {
-		return "", fmt.Errorf("invalid templateType: ChatMessage")
-	}
-	return ml.templates.EvaluateTemplate(templateType, templateName, in)
-}
-
-func (ml *ModelLoader) EvaluateTemplateForChatMessage(templateName string, messageData ChatMessageTemplateData) (string, error) {
-	return ml.templates.EvaluateTemplate(ChatMessageTemplate, templateName, messageData)
-}
diff --git a/pkg/model/template_test.go b/pkg/model/template_test.go
deleted file mode 100644
index 1142ed0c..00000000
--- a/pkg/model/template_test.go
+++ /dev/null
@@ -1,197 +0,0 @@
-package model_test
-
-import (
-	. "github.com/mudler/LocalAI/pkg/model"
-
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-)
-
-const chatML = `<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
-{{- if .FunctionCall }}
-<tool_call>
-{{- else if eq .RoleName "tool" }}
-<tool_response>
-{{- end }}
-{{- if .Content}}
-{{.Content }}
-{{- end }}
-{{- if .FunctionCall}}
-{{toJson .FunctionCall}}
-{{- end }}
-{{- if .FunctionCall }}
-</tool_call>
-{{- else if eq .RoleName "tool" }}
-</tool_response>
-{{- end }}<|im_end|>`
-
-const llama3 = `<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
-
-{{ if .FunctionCall -}}
-Function call:
-{{ else if eq .RoleName "tool" -}}
-Function response:
-{{ end -}}
-{{ if .Content -}}
-{{.Content -}}
-{{ else if .FunctionCall -}}
-{{ toJson .FunctionCall -}}
-{{ end -}}
-<|eot_id|>`
-
-var llama3TestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{
-	"user": {
-		"template": llama3,
-		"expected": "<|start_header_id|>user<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>",
-		"data": ChatMessageTemplateData{
-			SystemPrompt: "",
-			Role:         "user",
-			RoleName:     "user",
-			Content:      "A long time ago in a galaxy far, far away...",
-			FunctionCall: nil,
-			FunctionName: "",
-			LastMessage:  false,
-			Function:     false,
-			MessageIndex: 0,
-		},
-	},
-	"assistant": {
-		"template": llama3,
-		"expected": "<|start_header_id|>assistant<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>",
-		"data": ChatMessageTemplateData{
-			SystemPrompt: "",
-			Role:         "assistant",
-			RoleName:     "assistant",
-			Content:      "A long time ago in a galaxy far, far away...",
-			FunctionCall: nil,
-			FunctionName: "",
-			LastMessage:  false,
-			Function:     false,
-			MessageIndex: 0,
-		},
-	},
-	"function_call": {
-		"template": llama3,
-		"expected": "<|start_header_id|>assistant<|end_header_id|>\n\nFunction call:\n{\"function\":\"test\"}<|eot_id|>",
-		"data": ChatMessageTemplateData{
-			SystemPrompt: "",
-			Role:         "assistant",
-			RoleName:     "assistant",
-			Content:      "",
-			FunctionCall: map[string]string{"function": "test"},
-			FunctionName: "",
-			LastMessage:  false,
-			Function:     false,
-			MessageIndex: 0,
-		},
-	},
-	"function_response": {
-		"template": llama3,
-		"expected": "<|start_header_id|>tool<|end_header_id|>\n\nFunction response:\nResponse from tool<|eot_id|>",
-		"data": ChatMessageTemplateData{
-			SystemPrompt: "",
-			Role:         "tool",
-			RoleName:     "tool",
-			Content:      "Response from tool",
-			FunctionCall: nil,
-			FunctionName: "",
-			LastMessage:  false,
-			Function:     false,
-			MessageIndex: 0,
-		},
-	},
-}
-
-var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{
-	"user": {
-		"template": chatML,
-		"expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...<|im_end|>",
-		"data": ChatMessageTemplateData{
-			SystemPrompt: "",
-			Role:         "user",
-			RoleName:     "user",
-			Content:      "A long time ago in a galaxy far, far away...",
-			FunctionCall: nil,
-			FunctionName: "",
-			LastMessage:  false,
-			Function:     false,
-			MessageIndex: 0,
-		},
-	},
-	"assistant": {
-		"template": chatML,
-		"expected": "<|im_start|>assistant\nA long time ago in a galaxy far, far away...<|im_end|>",
-		"data": ChatMessageTemplateData{
-			SystemPrompt: "",
-			Role:         "assistant",
-			RoleName:     "assistant",
-			Content:      "A long time ago in a galaxy far, far away...",
-			FunctionCall: nil,
-			FunctionName: "",
-			LastMessage:  false,
-			Function:     false,
-			MessageIndex: 0,
-		},
-	},
-	"function_call": {
-		"template": chatML,
-		"expected": "<|im_start|>assistant\n<tool_call>\n{\"function\":\"test\"}\n</tool_call><|im_end|>",
-		"data": ChatMessageTemplateData{
-			SystemPrompt: "",
-			Role:         "assistant",
-			RoleName:     "assistant",
-			Content:      "",
-			FunctionCall: map[string]string{"function": "test"},
-			FunctionName: "",
-			LastMessage:  false,
-			Function:     false,
-			MessageIndex: 0,
-		},
-	},
-	"function_response": {
-		"template": chatML,
-		"expected": "<|im_start|>tool\n<tool_response>\nResponse from tool\n</tool_response><|im_end|>",
-		"data": ChatMessageTemplateData{
-			SystemPrompt: "",
-			Role:         "tool",
-			RoleName:     "tool",
-			Content:      "Response from tool",
-			FunctionCall: nil,
-			FunctionName: "",
-			LastMessage:  false,
-			Function:     false,
-			MessageIndex: 0,
-		},
-	},
-}
-
-var _ = Describe("Templates", func() {
-	Context("chat message ChatML", func() {
-		var modelLoader *ModelLoader
-		BeforeEach(func() {
-			modelLoader = NewModelLoader("")
-		})
-		for key := range chatMLTestMatch {
-			foo := chatMLTestMatch[key]
-			It("renders correctly `"+key+"`", func() {
-				templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(ChatMessageTemplateData))
-				Expect(err).ToNot(HaveOccurred())
-				Expect(templated).To(Equal(foo["expected"]), templated)
-			})
-		}
-	})
-	Context("chat message llama3", func() {
-		var modelLoader *ModelLoader
-		BeforeEach(func() {
-			modelLoader = NewModelLoader("")
-		})
-		for key := range llama3TestMatch {
-			foo := llama3TestMatch[key]
-			It("renders correctly `"+key+"`", func() {
-				templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(ChatMessageTemplateData))
-				Expect(err).ToNot(HaveOccurred())
-				Expect(templated).To(Equal(foo["expected"]), templated)
-			})
-		}
-	})
-})
diff --git a/pkg/stablediffusion/generate.go b/pkg/stablediffusion/generate.go
deleted file mode 100644
index cef96e80..00000000
--- a/pkg/stablediffusion/generate.go
+++ /dev/null
@@ -1,35 +0,0 @@
-//go:build stablediffusion
-// +build stablediffusion
-
-package stablediffusion
-
-import (
-	stableDiffusion "github.com/mudler/go-stable-diffusion"
-)
-
-func GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error {
-	if height > 512 || width > 512 {
-		return stableDiffusion.GenerateImageUpscaled(
-			height,
-			width,
-			step,
-			seed,
-			positive_prompt,
-			negative_prompt,
-			dst,
-			asset_dir,
-		)
-	}
-	return stableDiffusion.GenerateImage(
-		height,
-		width,
-		mode,
-		step,
-		seed,
-		positive_prompt,
-		negative_prompt,
-		dst,
-		"",
-		asset_dir,
-	)
-}
diff --git a/pkg/stablediffusion/generate_unsupported.go b/pkg/stablediffusion/generate_unsupported.go
deleted file mode 100644
index 9563bae0..00000000
--- a/pkg/stablediffusion/generate_unsupported.go
+++ /dev/null
@@ -1,10 +0,0 @@
-//go:build !stablediffusion
-// +build !stablediffusion
-
-package stablediffusion
-
-import "fmt"
-
-func GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error {
-	return fmt.Errorf("This version of LocalAI was built without the stablediffusion tag")
-}
diff --git a/pkg/stablediffusion/stablediffusion.go b/pkg/stablediffusion/stablediffusion.go
deleted file mode 100644
index e38db17f..00000000
--- a/pkg/stablediffusion/stablediffusion.go
+++ /dev/null
@@ -1,20 +0,0 @@
-package stablediffusion
-
-import "os"
-
-type StableDiffusion struct {
-	assetDir string
-}
-
-func New(assetDir string) (*StableDiffusion, error) {
-	if _, err := os.Stat(assetDir); err != nil {
-		return nil, err
-	}
-	return &StableDiffusion{
-		assetDir: assetDir,
-	}, nil
-}
-
-func (s *StableDiffusion) GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string) error {
-	return GenerateImage(height, width, mode, step, seed, positive_prompt, negative_prompt, dst, s.assetDir)
-}
diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go
index a445b10e..0f598df5 100644
--- a/pkg/startup/model_preload.go
+++ b/pkg/startup/model_preload.go
@@ -9,7 +9,6 @@ import (
 
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/mudler/LocalAI/embedded"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
@@ -18,42 +17,17 @@ import (
 // InstallModels will preload models from the given list of URLs and galleries
 // It will download the model if it is not already present in the model path
 // It will also try to resolve if the model is an embedded model YAML configuration
-func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath string, enforceScan bool, downloadStatus func(string, string, string, float64), models ...string) error {
+func InstallModels(galleries []config.Gallery, modelPath string, enforceScan bool, downloadStatus func(string, string, string, float64), models ...string) error {
 	// create an error that groups all errors
 	var err error
 
-	lib, _ := embedded.GetRemoteLibraryShorteners(modelLibraryURL, modelPath)
-
 	for _, url := range models {
 		// As a best effort, try to resolve the model from the remote library
 		// if it's not resolved we try with the other method below
-		if modelLibraryURL != "" {
-			if lib[url] != "" {
-				log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url])
-				url = lib[url]
-			}
-		}
 
-		url = embedded.ModelShortURL(url)
 		uri := downloader.URI(url)
 
 		switch {
-		case embedded.ExistsInModelsLibrary(url):
-			modelYAML, e := embedded.ResolveContent(url)
-			// If we resolve something, just save it to disk and continue
-			if e != nil {
-				log.Error().Err(e).Msg("error resolving model content")
-				err = errors.Join(err, e)
-				continue
-			}
-
-			log.Debug().Msgf("[startup] resolved embedded model: %s", url)
-			md5Name := utils.MD5(url)
-			modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
-			if e := os.WriteFile(modelDefinitionFilePath, modelYAML, 0600); err != nil {
-				log.Error().Err(e).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition")
-				err = errors.Join(err, e)
-			}
 		case uri.LooksLikeOCI():
 			log.Debug().Msgf("[startup] resolved OCI model to download: %s", url)
 
diff --git a/pkg/startup/model_preload_test.go b/pkg/startup/model_preload_test.go
index 869fcd3e..51e6d702 100644
--- a/pkg/startup/model_preload_test.go
+++ b/pkg/startup/model_preload_test.go
@@ -7,7 +7,6 @@ import (
 
 	"github.com/mudler/LocalAI/core/config"
 	. "github.com/mudler/LocalAI/pkg/startup"
-	"github.com/mudler/LocalAI/pkg/utils"
 
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
@@ -16,29 +15,13 @@ import (
 var _ = Describe("Preload test", func() {
 
 	Context("Preloading from strings", func() {
-		It("loads from remote url", func() {
-			tmpdir, err := os.MkdirTemp("", "")
-			Expect(err).ToNot(HaveOccurred())
-			libraryURL := "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml"
-			fileName := fmt.Sprintf("%s.yaml", "phi-2")
-
-			InstallModels([]config.Gallery{}, libraryURL, tmpdir, true, nil, "phi-2")
-
-			resultFile := filepath.Join(tmpdir, fileName)
-
-			content, err := os.ReadFile(resultFile)
-			Expect(err).ToNot(HaveOccurred())
-
-			Expect(string(content)).To(ContainSubstring("name: phi-2"))
-		})
-
 		It("loads from embedded full-urls", func() {
 			tmpdir, err := os.MkdirTemp("", "")
 			Expect(err).ToNot(HaveOccurred())
-			url := "https://raw.githubusercontent.com/mudler/LocalAI/master/examples/configurations/phi-2.yaml"
+			url := "https://raw.githubusercontent.com/mudler/LocalAI-examples/main/configurations/phi-2.yaml"
 			fileName := fmt.Sprintf("%s.yaml", "phi-2")
 
-			InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url)
+			InstallModels([]config.Gallery{}, tmpdir, true, nil, url)
 
 			resultFile := filepath.Join(tmpdir, fileName)
 
@@ -47,45 +30,13 @@ var _ = Describe("Preload test", func() {
 
 			Expect(string(content)).To(ContainSubstring("name: phi-2"))
 		})
-		It("loads from embedded short-urls", func() {
-			tmpdir, err := os.MkdirTemp("", "")
-			Expect(err).ToNot(HaveOccurred())
-			url := "phi-2"
-
-			InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url)
-
-			entry, err := os.ReadDir(tmpdir)
-			Expect(err).ToNot(HaveOccurred())
-			Expect(entry).To(HaveLen(1))
-			resultFile := entry[0].Name()
-
-			content, err := os.ReadFile(filepath.Join(tmpdir, resultFile))
-			Expect(err).ToNot(HaveOccurred())
-
-			Expect(string(content)).To(ContainSubstring("name: phi-2"))
-		})
-		It("loads from embedded models", func() {
-			tmpdir, err := os.MkdirTemp("", "")
-			Expect(err).ToNot(HaveOccurred())
-			url := "mistral-openorca"
-			fileName := fmt.Sprintf("%s.yaml", utils.MD5(url))
-
-			InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url)
-
-			resultFile := filepath.Join(tmpdir, fileName)
-
-			content, err := os.ReadFile(resultFile)
-			Expect(err).ToNot(HaveOccurred())
-
-			Expect(string(content)).To(ContainSubstring("name: mistral-openorca"))
-		})
 		It("downloads from urls", func() {
 			tmpdir, err := os.MkdirTemp("", "")
 			Expect(err).ToNot(HaveOccurred())
 			url := "huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
 			fileName := fmt.Sprintf("%s.gguf", "tinyllama-1.1b-chat-v0.3.Q2_K")
 
-			err = InstallModels([]config.Gallery{}, "", tmpdir, false, nil, url)
+			err = InstallModels([]config.Gallery{}, tmpdir, false, nil, url)
 			Expect(err).ToNot(HaveOccurred())
 
 			resultFile := filepath.Join(tmpdir, fileName)
diff --git a/pkg/templates/cache.go b/pkg/templates/cache.go
index e4801946..1efce660 100644
--- a/pkg/templates/cache.go
+++ b/pkg/templates/cache.go
@@ -11,59 +11,41 @@ import (
 	"github.com/mudler/LocalAI/pkg/utils"
 
 	"github.com/Masterminds/sprig/v3"
+
+	"github.com/nikolalohinski/gonja/v2"
+	"github.com/nikolalohinski/gonja/v2/exec"
 )
 
 // Keep this in sync with config.TemplateConfig. Is there a more idiomatic way to accomplish this in go?
 // Technically, order doesn't _really_ matter, but the count must stay in sync, see tests/integration/reflect_test.go
 type TemplateType int
 
-type TemplateCache struct {
-	mu            sync.Mutex
-	templatesPath string
-	templates     map[TemplateType]map[string]*template.Template
+type templateCache struct {
+	mu             sync.Mutex
+	templatesPath  string
+	templates      map[TemplateType]map[string]*template.Template
+	jinjaTemplates map[TemplateType]map[string]*exec.Template
 }
 
-func NewTemplateCache(templatesPath string) *TemplateCache {
-	tc := &TemplateCache{
-		templatesPath: templatesPath,
-		templates:     make(map[TemplateType]map[string]*template.Template),
+func newTemplateCache(templatesPath string) *templateCache {
+	tc := &templateCache{
+		templatesPath:  templatesPath,
+		templates:      make(map[TemplateType]map[string]*template.Template),
+		jinjaTemplates: make(map[TemplateType]map[string]*exec.Template),
 	}
 	return tc
 }
 
-func (tc *TemplateCache) initializeTemplateMapKey(tt TemplateType) {
+func (tc *templateCache) initializeTemplateMapKey(tt TemplateType) {
 	if _, ok := tc.templates[tt]; !ok {
 		tc.templates[tt] = make(map[string]*template.Template)
 	}
 }
 
-func (tc *TemplateCache) EvaluateTemplate(templateType TemplateType, templateName string, in interface{}) (string, error) {
-	tc.mu.Lock()
-	defer tc.mu.Unlock()
-
-	tc.initializeTemplateMapKey(templateType)
-	m, ok := tc.templates[templateType][templateName]
-	if !ok {
-		// return "", fmt.Errorf("template not loaded: %s", templateName)
-		loadErr := tc.loadTemplateIfExists(templateType, templateName)
-		if loadErr != nil {
-			return "", loadErr
-		}
-		m = tc.templates[templateType][templateName] // ok is not important since we check m on the next line, and wealready checked
-	}
-	if m == nil {
-		return "", fmt.Errorf("failed loading a template for %s", templateName)
-	}
-
-	var buf bytes.Buffer
-
-	if err := m.Execute(&buf, in); err != nil {
-		return "", err
-	}
-	return buf.String(), nil
+func (tc *templateCache) existsInModelPath(s string) bool {
+	return utils.ExistsInPath(tc.templatesPath, s)
 }
-
-func (tc *TemplateCache) loadTemplateIfExists(templateType TemplateType, templateName string) error {
+func (tc *templateCache) loadTemplateIfExists(templateType TemplateType, templateName string) error {
 
 	// Check if the template was already loaded
 	if _, ok := tc.templates[templateType][templateName]; ok {
@@ -82,6 +64,51 @@ func (tc *TemplateCache) loadTemplateIfExists(templateType TemplateType, templat
 		return fmt.Errorf("template file outside path: %s", file)
 	}
 
+	// can either be a file in the system or a string with the template
+	if tc.existsInModelPath(modelTemplateFile) {
+		d, err := os.ReadFile(file)
+		if err != nil {
+			return err
+		}
+		dat = string(d)
+	} else {
+		dat = templateName
+	}
+
+	// Parse the template
+	tmpl, err := template.New("prompt").Funcs(sprig.FuncMap()).Parse(dat)
+	if err != nil {
+		return err
+	}
+	tc.templates[templateType][templateName] = tmpl
+
+	return nil
+}
+
+func (tc *templateCache) initializeJinjaTemplateMapKey(tt TemplateType) {
+	if _, ok := tc.jinjaTemplates[tt]; !ok {
+		tc.jinjaTemplates[tt] = make(map[string]*exec.Template)
+	}
+}
+
+func (tc *templateCache) loadJinjaTemplateIfExists(templateType TemplateType, templateName string) error {
+	// Check if the template was already loaded
+	if _, ok := tc.jinjaTemplates[templateType][templateName]; ok {
+		return nil
+	}
+
+	// Check if the model path exists
+	// skip any error here - we run anyway if a template does not exist
+	modelTemplateFile := fmt.Sprintf("%s.tmpl", templateName)
+
+	dat := ""
+	file := filepath.Join(tc.templatesPath, modelTemplateFile)
+
+	// Security check
+	if err := utils.VerifyPath(modelTemplateFile, tc.templatesPath); err != nil {
+		return fmt.Errorf("template file outside path: %s", file)
+	}
+
 	// can either be a file in the system or a string with the template
 	if utils.ExistsInPath(tc.templatesPath, modelTemplateFile) {
 		d, err := os.ReadFile(file)
@@ -93,12 +120,65 @@ func (tc *TemplateCache) loadTemplateIfExists(templateType TemplateType, templat
 		dat = templateName
 	}
 
-	// Parse the template
-	tmpl, err := template.New("prompt").Funcs(sprig.FuncMap()).Parse(dat)
+	tmpl, err := gonja.FromString(dat)
 	if err != nil {
 		return err
 	}
-	tc.templates[templateType][templateName] = tmpl
+	tc.jinjaTemplates[templateType][templateName] = tmpl
 
 	return nil
 }
+
+func (tc *templateCache) evaluateJinjaTemplate(templateType TemplateType, templateNameOrContent string, in map[string]interface{}) (string, error) {
+	tc.mu.Lock()
+	defer tc.mu.Unlock()
+
+	tc.initializeJinjaTemplateMapKey(templateType)
+	m, ok := tc.jinjaTemplates[templateType][templateNameOrContent]
+	if !ok {
+		// return "", fmt.Errorf("template not loaded: %s", templateName)
+		loadErr := tc.loadJinjaTemplateIfExists(templateType, templateNameOrContent)
+		if loadErr != nil {
+			return "", loadErr
+		}
+		m = tc.jinjaTemplates[templateType][templateNameOrContent] // ok is not important since we check m on the next line, and wealready checked
+	}
+	if m == nil {
+		return "", fmt.Errorf("failed loading a template for %s", templateNameOrContent)
+	}
+
+	var buf bytes.Buffer
+
+	data := exec.NewContext(in)
+
+	if err := m.Execute(&buf, data); err != nil {
+		return "", err
+	}
+	return buf.String(), nil
+}
+
+func (tc *templateCache) evaluateTemplate(templateType TemplateType, templateNameOrContent string, in interface{}) (string, error) {
+	tc.mu.Lock()
+	defer tc.mu.Unlock()
+
+	tc.initializeTemplateMapKey(templateType)
+	m, ok := tc.templates[templateType][templateNameOrContent]
+	if !ok {
+		// return "", fmt.Errorf("template not loaded: %s", templateName)
+		loadErr := tc.loadTemplateIfExists(templateType, templateNameOrContent)
+		if loadErr != nil {
+			return "", loadErr
+		}
+		m = tc.templates[templateType][templateNameOrContent] // ok is not important since we check m on the next line, and wealready checked
+	}
+	if m == nil {
+		return "", fmt.Errorf("failed loading a template for %s", templateNameOrContent)
+	}
+
+	var buf bytes.Buffer
+
+	if err := m.Execute(&buf, in); err != nil {
+		return "", err
+	}
+	return buf.String(), nil
+}
diff --git a/pkg/templates/cache_test.go b/pkg/templates/cache_test.go
deleted file mode 100644
index 8bb50766..00000000
--- a/pkg/templates/cache_test.go
+++ /dev/null
@@ -1,73 +0,0 @@
-package templates_test
-
-import (
-	"os"
-	"path/filepath"
-
-	"github.com/mudler/LocalAI/pkg/templates" // Update with your module path
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-)
-
-var _ = Describe("TemplateCache", func() {
-	var (
-		templateCache *templates.TemplateCache
-		tempDir       string
-	)
-
-	BeforeEach(func() {
-		var err error
-		tempDir, err = os.MkdirTemp("", "templates")
-		Expect(err).NotTo(HaveOccurred())
-
-		// Writing example template files
-		err = os.WriteFile(filepath.Join(tempDir, "example.tmpl"), []byte("Hello, {{.Name}}!"), 0600)
-		Expect(err).NotTo(HaveOccurred())
-		err = os.WriteFile(filepath.Join(tempDir, "empty.tmpl"), []byte(""), 0600)
-		Expect(err).NotTo(HaveOccurred())
-
-		templateCache = templates.NewTemplateCache(tempDir)
-	})
-
-	AfterEach(func() {
-		os.RemoveAll(tempDir) // Clean up
-	})
-
-	Describe("EvaluateTemplate", func() {
-		Context("when template is loaded successfully", func() {
-			It("should evaluate the template correctly", func() {
-				result, err := templateCache.EvaluateTemplate(1, "example", map[string]string{"Name": "Gopher"})
-				Expect(err).NotTo(HaveOccurred())
-				Expect(result).To(Equal("Hello, Gopher!"))
-			})
-		})
-
-		Context("when template isn't a file", func() {
-			It("should parse from string", func() {
-				result, err := templateCache.EvaluateTemplate(1, "{{.Name}}", map[string]string{"Name": "Gopher"})
-				Expect(err).ToNot(HaveOccurred())
-				Expect(result).To(Equal("Gopher"))
-			})
-		})
-
-		Context("when template is empty", func() {
-			It("should return an empty string", func() {
-				result, err := templateCache.EvaluateTemplate(1, "empty", nil)
-				Expect(err).NotTo(HaveOccurred())
-				Expect(result).To(Equal(""))
-			})
-		})
-	})
-
-	Describe("concurrency", func() {
-		It("should handle multiple concurrent accesses", func(done Done) {
-			go func() {
-				_, _ = templateCache.EvaluateTemplate(1, "example", map[string]string{"Name": "Gopher"})
-			}()
-			go func() {
-				_, _ = templateCache.EvaluateTemplate(1, "example", map[string]string{"Name": "Gopher"})
-			}()
-			close(done)
-		}, 0.1) // timeout in seconds
-	})
-})
diff --git a/pkg/templates/evaluator.go b/pkg/templates/evaluator.go
new file mode 100644
index 00000000..aedf7b41
--- /dev/null
+++ b/pkg/templates/evaluator.go
@@ -0,0 +1,295 @@
+package templates
+
+import (
+	"encoding/json"
+	"fmt"
+	"strings"
+
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/functions"
+	"github.com/rs/zerolog/log"
+)
+
+// Rather than pass an interface{} to the prompt template:
+// These are the definitions of all possible variables LocalAI will currently populate for use in a prompt template file
+// Please note: Not all of these are populated on every endpoint - your template should either be tested for each endpoint you map it to, or tolerant of zero values.
+type PromptTemplateData struct {
+	SystemPrompt         string
+	SuppressSystemPrompt bool // used by chat specifically to indicate that SystemPrompt above should be _ignored_
+	Input                string
+	Instruction          string
+	Functions            []functions.Function
+	MessageIndex         int
+}
+
+type ChatMessageTemplateData struct {
+	SystemPrompt string
+	Role         string
+	RoleName     string
+	FunctionName string
+	Content      string
+	MessageIndex int
+	Function     bool
+	FunctionCall interface{}
+	LastMessage  bool
+}
+
+const (
+	ChatPromptTemplate TemplateType = iota
+	ChatMessageTemplate
+	CompletionPromptTemplate
+	EditPromptTemplate
+	FunctionsPromptTemplate
+)
+
+type Evaluator struct {
+	cache *templateCache
+}
+
+func NewEvaluator(modelPath string) *Evaluator {
+	return &Evaluator{
+		cache: newTemplateCache(modelPath),
+	}
+}
+
+func (e *Evaluator) EvaluateTemplateForPrompt(templateType TemplateType, config config.BackendConfig, in PromptTemplateData) (string, error) {
+	template := ""
+
+	// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+	if e.cache.existsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
+		template = config.Model
+	}
+
+	switch templateType {
+	case CompletionPromptTemplate:
+		if config.TemplateConfig.Completion != "" {
+			template = config.TemplateConfig.Completion
+		}
+	case EditPromptTemplate:
+		if config.TemplateConfig.Edit != "" {
+			template = config.TemplateConfig.Edit
+		}
+	case ChatPromptTemplate:
+		if config.TemplateConfig.Chat != "" {
+			template = config.TemplateConfig.Chat
+		}
+	case FunctionsPromptTemplate:
+		if config.TemplateConfig.Functions != "" {
+			template = config.TemplateConfig.Functions
+		}
+	}
+
+	if template == "" {
+		return in.Input, nil
+	}
+
+	if config.TemplateConfig.JinjaTemplate {
+		return e.evaluateJinjaTemplateForPrompt(templateType, template, in)
+	}
+
+	return e.cache.evaluateTemplate(templateType, template, in)
+}
+
+func (e *Evaluator) evaluateTemplateForChatMessage(templateName string, messageData ChatMessageTemplateData) (string, error) {
+	return e.cache.evaluateTemplate(ChatMessageTemplate, templateName, messageData)
+}
+
+func (e *Evaluator) templateJinjaChat(templateName string, messageData []ChatMessageTemplateData, funcs []functions.Function) (string, error) {
+
+	conversation := make(map[string]interface{})
+	messages := make([]map[string]interface{}, len(messageData))
+
+	// convert from ChatMessageTemplateData to what the jinja template expects
+
+	for _, message := range messageData {
+		// TODO: this seems to cover minimum text templates. Can be expanded to cover more complex interactions
+		var data []byte
+		data, _ = json.Marshal(message.FunctionCall)
+		messages = append(messages, map[string]interface{}{
+			"role":      message.RoleName,
+			"content":   message.Content,
+			"tool_call": string(data),
+		})
+	}
+
+	conversation["messages"] = messages
+
+	// if tools are detected, add these
+	if len(funcs) > 0 {
+		conversation["tools"] = funcs
+	}
+
+	return e.cache.evaluateJinjaTemplate(ChatMessageTemplate, templateName, conversation)
+}
+
+func (e *Evaluator) evaluateJinjaTemplateForPrompt(templateType TemplateType, templateName string, in PromptTemplateData) (string, error) {
+
+	conversation := make(map[string]interface{})
+
+	conversation["system_prompt"] = in.SystemPrompt
+	conversation["content"] = in.Input
+
+	return e.cache.evaluateJinjaTemplate(templateType, templateName, conversation)
+}
+
+func (e *Evaluator) TemplateMessages(messages []schema.Message, config *config.BackendConfig, funcs []functions.Function, shouldUseFn bool) string {
+
+	if config.TemplateConfig.JinjaTemplate {
+		var messageData []ChatMessageTemplateData
+		for messageIndex, i := range messages {
+			fcall := i.FunctionCall
+			if len(i.ToolCalls) > 0 {
+				fcall = i.ToolCalls
+			}
+			messageData = append(messageData, ChatMessageTemplateData{
+				SystemPrompt: config.SystemPrompt,
+				Role:         config.Roles[i.Role],
+				RoleName:     i.Role,
+				Content:      i.StringContent,
+				FunctionCall: fcall,
+				FunctionName: i.Name,
+				LastMessage:  messageIndex == (len(messages) - 1),
+				Function:     config.Grammar != "" && (messageIndex == (len(messages) - 1)),
+				MessageIndex: messageIndex,
+			})
+		}
+
+		templatedInput, err := e.templateJinjaChat(config.TemplateConfig.ChatMessage, messageData, funcs)
+		if err == nil {
+			return templatedInput
+		}
+	}
+
+	var predInput string
+	suppressConfigSystemPrompt := false
+	mess := []string{}
+	for messageIndex, i := range messages {
+		var content string
+		role := i.Role
+
+		// if function call, we might want to customize the role so we can display better that the "assistant called a json action"
+		// if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
+		if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" {
+			roleFn := "assistant_function_call"
+			r := config.Roles[roleFn]
+			if r != "" {
+				role = roleFn
+			}
+		}
+		r := config.Roles[role]
+		contentExists := i.Content != nil && i.StringContent != ""
+
+		fcall := i.FunctionCall
+		if len(i.ToolCalls) > 0 {
+			fcall = i.ToolCalls
+		}
+
+		// First attempt to populate content via a chat message specific template
+		if config.TemplateConfig.ChatMessage != "" {
+			chatMessageData := ChatMessageTemplateData{
+				SystemPrompt: config.SystemPrompt,
+				Role:         r,
+				RoleName:     role,
+				Content:      i.StringContent,
+				FunctionCall: fcall,
+				FunctionName: i.Name,
+				LastMessage:  messageIndex == (len(messages) - 1),
+				Function:     config.Grammar != "" && (messageIndex == (len(messages) - 1)),
+				MessageIndex: messageIndex,
+			}
+			templatedChatMessage, err := e.evaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
+			if err != nil {
+				log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping")
+			} else {
+				if templatedChatMessage == "" {
+					log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData)
+					continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
+				}
+				log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
+				content = templatedChatMessage
+			}
+		}
+
+		marshalAnyRole := func(f any) {
+			j, err := json.Marshal(f)
+			if err == nil {
+				if contentExists {
+					content += "\n" + fmt.Sprint(r, " ", string(j))
+				} else {
+					content = fmt.Sprint(r, " ", string(j))
+				}
+			}
+		}
+		marshalAny := func(f any) {
+			j, err := json.Marshal(f)
+			if err == nil {
+				if contentExists {
+					content += "\n" + string(j)
+				} else {
+					content = string(j)
+				}
+			}
+		}
+		// If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
+		if content == "" {
+			if r != "" {
+				if contentExists {
+					content = fmt.Sprint(r, i.StringContent)
+				}
+
+				if i.FunctionCall != nil {
+					marshalAnyRole(i.FunctionCall)
+				}
+				if i.ToolCalls != nil {
+					marshalAnyRole(i.ToolCalls)
+				}
+			} else {
+				if contentExists {
+					content = fmt.Sprint(i.StringContent)
+				}
+				if i.FunctionCall != nil {
+					marshalAny(i.FunctionCall)
+				}
+				if i.ToolCalls != nil {
+					marshalAny(i.ToolCalls)
+				}
+			}
+			// Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
+			if contentExists && role == "system" {
+				suppressConfigSystemPrompt = true
+			}
+		}
+
+		mess = append(mess, content)
+	}
+
+	joinCharacter := "\n"
+	if config.TemplateConfig.JoinChatMessagesByCharacter != nil {
+		joinCharacter = *config.TemplateConfig.JoinChatMessagesByCharacter
+	}
+
+	predInput = strings.Join(mess, joinCharacter)
+	log.Debug().Msgf("Prompt (before templating): %s", predInput)
+
+	promptTemplate := ChatPromptTemplate
+
+	if config.TemplateConfig.Functions != "" && shouldUseFn {
+		promptTemplate = FunctionsPromptTemplate
+	}
+
+	templatedInput, err := e.EvaluateTemplateForPrompt(promptTemplate, *config, PromptTemplateData{
+		SystemPrompt:         config.SystemPrompt,
+		SuppressSystemPrompt: suppressConfigSystemPrompt,
+		Input:                predInput,
+		Functions:            funcs,
+	})
+	if err == nil {
+		predInput = templatedInput
+		log.Debug().Msgf("Template found, input modified to: %s", predInput)
+	} else {
+		log.Debug().Msgf("Template failed loading: %s", err.Error())
+	}
+
+	return predInput
+}
diff --git a/pkg/templates/evaluator_test.go b/pkg/templates/evaluator_test.go
new file mode 100644
index 00000000..b58dd40b
--- /dev/null
+++ b/pkg/templates/evaluator_test.go
@@ -0,0 +1,253 @@
+package templates_test
+
+import (
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/functions"
+	. "github.com/mudler/LocalAI/pkg/templates"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+const toolCallJinja = `{{ '<|begin_of_text|>' }}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|start_header_id|>system<|end_header_id|>
+
+' + system_message + '<|eot_id|>' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|>
+
+' + content + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+' }}{% elif message['role'] == 'assistant' %}{{ content + '<|eot_id|>' }}{% endif %}{% endfor %}`
+
+const chatML = `<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
+{{- if .FunctionCall }}
+<tool_call>
+{{- else if eq .RoleName "tool" }}
+<tool_response>
+{{- end }}
+{{- if .Content}}
+{{.Content }}
+{{- end }}
+{{- if .FunctionCall}}
+{{toJson .FunctionCall}}
+{{- end }}
+{{- if .FunctionCall }}
+</tool_call>
+{{- else if eq .RoleName "tool" }}
+</tool_response>
+{{- end }}<|im_end|>`
+
+const llama3 = `<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+
+{{ if .FunctionCall -}}
+Function call:
+{{ else if eq .RoleName "tool" -}}
+Function response:
+{{ end -}}
+{{ if .Content -}}
+{{.Content -}}
+{{ else if .FunctionCall -}}
+{{ toJson .FunctionCall -}}
+{{ end -}}
+<|eot_id|>`
+
+var llama3TestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{
+	"user": {
+		"expected": "<|start_header_id|>user<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>",
+		"config": &config.BackendConfig{
+			TemplateConfig: config.TemplateConfig{
+				ChatMessage: llama3,
+			},
+		},
+		"functions":   []functions.Function{},
+		"shouldUseFn": false,
+		"messages": []schema.Message{
+			{
+				Role:          "user",
+				StringContent: "A long time ago in a galaxy far, far away...",
+			},
+		},
+	},
+	"assistant": {
+		"expected": "<|start_header_id|>assistant<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>",
+		"config": &config.BackendConfig{
+			TemplateConfig: config.TemplateConfig{
+				ChatMessage: llama3,
+			},
+		},
+		"functions": []functions.Function{},
+		"messages": []schema.Message{
+			{
+				Role:          "assistant",
+				StringContent: "A long time ago in a galaxy far, far away...",
+			},
+		},
+		"shouldUseFn": false,
+	},
+	"function_call": {
+
+		"expected": "<|start_header_id|>assistant<|end_header_id|>\n\nFunction call:\n{\"function\":\"test\"}<|eot_id|>",
+		"config": &config.BackendConfig{
+			TemplateConfig: config.TemplateConfig{
+				ChatMessage: llama3,
+			},
+		},
+		"functions": []functions.Function{},
+		"messages": []schema.Message{
+			{
+				Role:         "assistant",
+				FunctionCall: map[string]string{"function": "test"},
+			},
+		},
+		"shouldUseFn": false,
+	},
+	"function_response": {
+		"expected": "<|start_header_id|>tool<|end_header_id|>\n\nFunction response:\nResponse from tool<|eot_id|>",
+		"config": &config.BackendConfig{
+			TemplateConfig: config.TemplateConfig{
+				ChatMessage: llama3,
+			},
+		},
+		"functions": []functions.Function{},
+		"messages": []schema.Message{
+			{
+				Role:          "tool",
+				StringContent: "Response from tool",
+			},
+		},
+		"shouldUseFn": false,
+	},
+}
+
+var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{
+	"user": {
+		"expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...<|im_end|>",
+		"config": &config.BackendConfig{
+			TemplateConfig: config.TemplateConfig{
+				ChatMessage: chatML,
+			},
+		},
+		"functions":   []functions.Function{},
+		"shouldUseFn": false,
+		"messages": []schema.Message{
+			{
+				Role:          "user",
+				StringContent: "A long time ago in a galaxy far, far away...",
+			},
+		},
+	},
+	"assistant": {
+		"expected": "<|im_start|>assistant\nA long time ago in a galaxy far, far away...<|im_end|>",
+		"config": &config.BackendConfig{
+			TemplateConfig: config.TemplateConfig{
+				ChatMessage: chatML,
+			},
+		},
+		"functions": []functions.Function{},
+		"messages": []schema.Message{
+			{
+				Role:          "assistant",
+				StringContent: "A long time ago in a galaxy far, far away...",
+			},
+		},
+		"shouldUseFn": false,
+	},
+	"function_call": {
+		"expected": "<|im_start|>assistant\n<tool_call>\n{\"function\":\"test\"}\n</tool_call><|im_end|>",
+		"config": &config.BackendConfig{
+			TemplateConfig: config.TemplateConfig{
+				ChatMessage: chatML,
+			},
+		},
+		"functions": []functions.Function{
+			{
+				Name:        "test",
+				Description: "test",
+				Parameters:  nil,
+			},
+		},
+		"shouldUseFn": true,
+		"messages": []schema.Message{
+			{
+				Role:         "assistant",
+				FunctionCall: map[string]string{"function": "test"},
+			},
+		},
+	},
+	"function_response": {
+		"expected": "<|im_start|>tool\n<tool_response>\nResponse from tool\n</tool_response><|im_end|>",
+		"config": &config.BackendConfig{
+			TemplateConfig: config.TemplateConfig{
+				ChatMessage: chatML,
+			},
+		},
+		"functions":   []functions.Function{},
+		"shouldUseFn": false,
+		"messages": []schema.Message{
+			{
+				Role:          "tool",
+				StringContent: "Response from tool",
+			},
+		},
+	},
+}
+
+var jinjaTest map[string]map[string]interface{} = map[string]map[string]interface{}{
+	"user": {
+		"expected": "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+		"config": &config.BackendConfig{
+			TemplateConfig: config.TemplateConfig{
+				ChatMessage:   toolCallJinja,
+				JinjaTemplate: true,
+			},
+		},
+		"functions":   []functions.Function{},
+		"shouldUseFn": false,
+		"messages": []schema.Message{
+			{
+				Role:          "user",
+				StringContent: "A long time ago in a galaxy far, far away...",
+			},
+		},
+	},
+}
+var _ = Describe("Templates", func() {
+	Context("chat message ChatML", func() {
+		var evaluator *Evaluator
+		BeforeEach(func() {
+			evaluator = NewEvaluator("")
+		})
+		for key := range chatMLTestMatch {
+			foo := chatMLTestMatch[key]
+			It("renders correctly `"+key+"`", func() {
+				templated := evaluator.TemplateMessages(foo["messages"].([]schema.Message), foo["config"].(*config.BackendConfig), foo["functions"].([]functions.Function), foo["shouldUseFn"].(bool))
+				Expect(templated).To(Equal(foo["expected"]), templated)
+			})
+		}
+	})
+	Context("chat message llama3", func() {
+		var evaluator *Evaluator
+		BeforeEach(func() {
+			evaluator = NewEvaluator("")
+		})
+		for key := range llama3TestMatch {
+			foo := llama3TestMatch[key]
+			It("renders correctly `"+key+"`", func() {
+				templated := evaluator.TemplateMessages(foo["messages"].([]schema.Message), foo["config"].(*config.BackendConfig), foo["functions"].([]functions.Function), foo["shouldUseFn"].(bool))
+				Expect(templated).To(Equal(foo["expected"]), templated)
+			})
+		}
+	})
+	Context("chat message jinja", func() {
+		var evaluator *Evaluator
+		BeforeEach(func() {
+			evaluator = NewEvaluator("")
+		})
+		for key := range jinjaTest {
+			foo := jinjaTest[key]
+			It("renders correctly `"+key+"`", func() {
+				templated := evaluator.TemplateMessages(foo["messages"].([]schema.Message), foo["config"].(*config.BackendConfig), foo["functions"].([]functions.Function), foo["shouldUseFn"].(bool))
+				Expect(templated).To(Equal(foo["expected"]), templated)
+			})
+		}
+	})
+})
diff --git a/pkg/templates/multimodal.go b/pkg/templates/multimodal.go
new file mode 100644
index 00000000..3a19b07a
--- /dev/null
+++ b/pkg/templates/multimodal.go
@@ -0,0 +1,66 @@
+package templates
+
+import (
+	"bytes"
+	"text/template"
+
+	"github.com/Masterminds/sprig/v3"
+)
+
+type MultiModalOptions struct {
+	TotalImages int
+	TotalAudios int
+	TotalVideos int
+
+	ImagesInMessage int
+	AudiosInMessage int
+	VideosInMessage int
+}
+
+type MultimodalContent struct {
+	ID int
+}
+
+const DefaultMultiModalTemplate = "{{ range .Audio }}[audio-{{.ID}}]{{end}}{{ range .Images }}[img-{{.ID}}]{{end}}{{ range .Video }}[vid-{{.ID}}]{{end}}{{.Text}}"
+
+func TemplateMultiModal(templateString string, opts MultiModalOptions, text string) (string, error) {
+	if templateString == "" {
+		templateString = DefaultMultiModalTemplate
+	}
+
+	// compile the template
+	tmpl, err := template.New("template").Funcs(sprig.FuncMap()).Parse(templateString)
+	if err != nil {
+		return "", err
+	}
+
+	videos := []MultimodalContent{}
+	for i := 0; i < opts.VideosInMessage; i++ {
+		videos = append(videos, MultimodalContent{ID: i + (opts.TotalVideos - opts.VideosInMessage)})
+	}
+
+	audios := []MultimodalContent{}
+	for i := 0; i < opts.AudiosInMessage; i++ {
+		audios = append(audios, MultimodalContent{ID: i + (opts.TotalAudios - opts.AudiosInMessage)})
+	}
+
+	images := []MultimodalContent{}
+	for i := 0; i < opts.ImagesInMessage; i++ {
+		images = append(images, MultimodalContent{ID: i + (opts.TotalImages - opts.ImagesInMessage)})
+	}
+
+	result := bytes.NewBuffer(nil)
+	// execute the template
+	err = tmpl.Execute(result, struct {
+		Audio  []MultimodalContent
+		Images []MultimodalContent
+		Video  []MultimodalContent
+		Text   string
+	}{
+		Audio:  audios,
+		Images: images,
+		Video:  videos,
+		Text:   text,
+	})
+	return result.String(), err
+}
diff --git a/pkg/templates/multimodal_test.go b/pkg/templates/multimodal_test.go
new file mode 100644
index 00000000..ef8607a7
--- /dev/null
+++ b/pkg/templates/multimodal_test.go
@@ -0,0 +1,89 @@
+package templates_test
+
+import (
+	. "github.com/mudler/LocalAI/pkg/templates" // Update with your module path
+
+	// Update with your module path
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("EvaluateTemplate", func() {
+	Context("templating simple strings for multimodal chat", func() {
+		It("should template messages correctly", func() {
+			result, err := TemplateMultiModal("", MultiModalOptions{
+				TotalImages:     1,
+				TotalAudios:     0,
+				TotalVideos:     0,
+				ImagesInMessage: 1,
+				AudiosInMessage: 0,
+				VideosInMessage: 0,
+			}, "bar")
+			Expect(err).NotTo(HaveOccurred())
+			Expect(result).To(Equal("[img-0]bar"))
+		})
+
+		It("should handle messages with more images correctly", func() {
+			result, err := TemplateMultiModal("", MultiModalOptions{
+				TotalImages:     2,
+				TotalAudios:     0,
+				TotalVideos:     0,
+				ImagesInMessage: 2,
+				AudiosInMessage: 0,
+				VideosInMessage: 0,
+			}, "bar")
+			Expect(err).NotTo(HaveOccurred())
+			Expect(result).To(Equal("[img-0][img-1]bar"))
+		})
+		It("should handle messages with more images correctly", func() {
+			result, err := TemplateMultiModal("", MultiModalOptions{
+				TotalImages:     4,
+				TotalAudios:     1,
+				TotalVideos:     0,
+				ImagesInMessage: 2,
+				AudiosInMessage: 1,
+				VideosInMessage: 0,
+			}, "bar")
+			Expect(err).NotTo(HaveOccurred())
+			Expect(result).To(Equal("[audio-0][img-2][img-3]bar"))
+		})
+		It("should handle messages with more images correctly", func() {
+			result, err := TemplateMultiModal("", MultiModalOptions{
+				TotalImages:     3,
+				TotalAudios:     1,
+				TotalVideos:     0,
+				ImagesInMessage: 1,
+				AudiosInMessage: 1,
+				VideosInMessage: 0,
+			}, "bar")
+			Expect(err).NotTo(HaveOccurred())
+			Expect(result).To(Equal("[audio-0][img-2]bar"))
+		})
+		It("should handle messages with more images correctly", func() {
+			result, err := TemplateMultiModal("", MultiModalOptions{
+				TotalImages:     0,
+				TotalAudios:     0,
+				TotalVideos:     0,
+				ImagesInMessage: 0,
+				AudiosInMessage: 0,
+				VideosInMessage: 0,
+			}, "bar")
+			Expect(err).NotTo(HaveOccurred())
+			Expect(result).To(Equal("bar"))
+		})
+	})
+	Context("templating with custom defaults", func() {
+		It("should handle messages with more images correctly", func() {
+			result, err := TemplateMultiModal("{{ range .Audio }}[audio-{{ add1 .ID}}]{{end}}{{ range .Images }}[img-{{ add1 .ID}}]{{end}}{{ range .Video }}[vid-{{ add1 .ID}}]{{end}}{{.Text}}", MultiModalOptions{
+				TotalImages:     1,
+				TotalAudios:     0,
+				TotalVideos:     0,
+				ImagesInMessage: 1,
+				AudiosInMessage: 0,
+				VideosInMessage: 0,
+			}, "bar")
+			Expect(err).NotTo(HaveOccurred())
+			Expect(result).To(Equal("[img-1]bar"))
+		})
+	})
+})
diff --git a/pkg/tinydream/generate.go b/pkg/tinydream/generate.go
deleted file mode 100644
index cfcd23cc..00000000
--- a/pkg/tinydream/generate.go
+++ /dev/null
@@ -1,36 +0,0 @@
-//go:build tinydream
-// +build tinydream
-
-package tinydream
-
-import (
-	"fmt"
-	"path/filepath"
-
-	tinyDream "github.com/M0Rf30/go-tiny-dream"
-)
-
-func GenerateImage(height, width, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error {
-	fmt.Println(dst)
-	if height > 512 || width > 512 {
-		return tinyDream.GenerateImage(
-			1,
-			step,
-			seed,
-			positive_prompt,
-			negative_prompt,
-			filepath.Dir(dst),
-			asset_dir,
-		)
-	}
-
-	return tinyDream.GenerateImage(
-		0,
-		step,
-		seed,
-		positive_prompt,
-		negative_prompt,
-		filepath.Dir(dst),
-		asset_dir,
-	)
-}
diff --git a/pkg/tinydream/generate_unsupported.go b/pkg/tinydream/generate_unsupported.go
deleted file mode 100644
index 4ffd421a..00000000
--- a/pkg/tinydream/generate_unsupported.go
+++ /dev/null
@@ -1,10 +0,0 @@
-//go:build !tinydream
-// +build !tinydream
-
-package tinydream
-
-import "fmt"
-
-func GenerateImage(height, width, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error {
-	return fmt.Errorf("This version of LocalAI was built without the tinytts tag")
-}
diff --git a/pkg/tinydream/tinydream.go b/pkg/tinydream/tinydream.go
deleted file mode 100644
index a316e641..00000000
--- a/pkg/tinydream/tinydream.go
+++ /dev/null
@@ -1,20 +0,0 @@
-package tinydream
-
-import "os"
-
-type TinyDream struct {
-	assetDir string
-}
-
-func New(assetDir string) (*TinyDream, error) {
-	if _, err := os.Stat(assetDir); err != nil {
-		return nil, err
-	}
-	return &TinyDream{
-		assetDir: assetDir,
-	}, nil
-}
-
-func (td *TinyDream) GenerateImage(height, width, step, seed int, positive_prompt, negative_prompt, dst string) error {
-	return GenerateImage(height, width, step, seed, positive_prompt, negative_prompt, dst, td.assetDir)
-}
diff --git a/pkg/utils/base64.go b/pkg/utils/base64.go
index 3fbb405b..50109eaa 100644
--- a/pkg/utils/base64.go
+++ b/pkg/utils/base64.go
@@ -13,14 +13,8 @@ var base64DownloadClient http.Client = http.Client{
 	Timeout: 30 * time.Second,
 }
 
-// this function check if the string is an URL, if it's an URL downloads the image in memory
-// encodes it in base64 and returns the base64 string
-
-// This may look weird down in pkg/utils while it is currently only used in core/config
-//
-//	but I believe it may be useful for MQTT as well in the near future, so I'm
-//	extracting it while I'm thinking of it.
-func GetImageURLAsBase64(s string) (string, error) {
+// GetContentURIAsBase64 checks if the string is an URL, if it's an URL downloads the content in memory encodes it in base64 and returns the base64 string, otherwise returns the string by stripping base64 data headers
+func GetContentURIAsBase64(s string) (string, error) {
 	if strings.HasPrefix(s, "http") {
 		// download the image
 		resp, err := base64DownloadClient.Get(s)
diff --git a/pkg/utils/base64_test.go b/pkg/utils/base64_test.go
index 3b3dc9fb..1f0d1352 100644
--- a/pkg/utils/base64_test.go
+++ b/pkg/utils/base64_test.go
@@ -10,20 +10,20 @@ var _ = Describe("utils/base64 tests", func() {
 	It("GetImageURLAsBase64 can strip jpeg data url prefixes", func() {
 		// This one doesn't actually _care_ that it's base64, so feed "bad" data in this test in order to catch a change in that behavior for informational purposes.
 		input := "data:image/jpeg;base64,FOO"
-		b64, err := GetImageURLAsBase64(input)
+		b64, err := GetContentURIAsBase64(input)
 		Expect(err).To(BeNil())
 		Expect(b64).To(Equal("FOO"))
 	})
 	It("GetImageURLAsBase64 can strip png data url prefixes", func() {
 		// This one doesn't actually _care_ that it's base64, so feed "bad" data in this test in order to catch a change in that behavior for informational purposes.
 		input := "data:image/png;base64,BAR"
-		b64, err := GetImageURLAsBase64(input)
+		b64, err := GetContentURIAsBase64(input)
 		Expect(err).To(BeNil())
 		Expect(b64).To(Equal("BAR"))
 	})
 	It("GetImageURLAsBase64 returns an error for bogus data", func() {
 		input := "FOO"
-		b64, err := GetImageURLAsBase64(input)
+		b64, err := GetContentURIAsBase64(input)
 		Expect(b64).To(Equal(""))
 		Expect(err).ToNot(BeNil())
 		Expect(err).To(MatchError("not valid string"))
@@ -31,7 +31,7 @@ var _ = Describe("utils/base64 tests", func() {
 	It("GetImageURLAsBase64 can actually download images and calculates something", func() {
 		// This test doesn't actually _check_ the results at this time, which is bad, but there wasn't a test at all before...
 		input := "https://upload.wikimedia.org/wikipedia/en/2/29/Wargames.jpg"
-		b64, err := GetImageURLAsBase64(input)
+		b64, err := GetContentURIAsBase64(input)
 		Expect(err).To(BeNil())
 		Expect(b64).ToNot(BeNil())
 	})
diff --git a/pkg/utils/ffmpeg.go b/pkg/utils/ffmpeg.go
index 16656d8e..68683370 100644
--- a/pkg/utils/ffmpeg.go
+++ b/pkg/utils/ffmpeg.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"os"
 	"os/exec"
+	"strings"
 )
 
 func ffmpegCommand(args []string) (string, error) {
@@ -23,3 +24,32 @@ func AudioToWav(src, dst string) error {
 	}
 	return nil
 }
+
+// AudioConvert converts generated wav file from tts to other output formats.
+// TODO: handle pcm to have 100% parity of supported format from OpenAI
+func AudioConvert(src string, format string) (string, error) {
+	extension := ""
+	// compute file extension from format, default to wav
+	switch format {
+	case "opus":
+		extension = ".ogg"
+	case "mp3", "aac", "flac":
+		extension = fmt.Sprintf(".%s", format)
+	default:
+		extension = ".wav"
+	}
+
+	// if .wav, do nothing
+	if extension == ".wav" {
+		return src, nil
+	}
+
+	// naive conversion based on default values and target extension of file
+	dst := strings.Replace(src, ".wav", extension, -1)
+	commandArgs := []string{"-y", "-i", src, "-vn", dst}
+	out, err := ffmpegCommand(commandArgs)
+	if err != nil {
+		return "", fmt.Errorf("error: %w out: %s", err, out)
+	}
+	return dst, nil
+}
diff --git a/scripts/model_gallery_info.py b/scripts/model_gallery_info.py
index 7df53701..ee258037 100644
--- a/scripts/model_gallery_info.py
+++ b/scripts/model_gallery_info.py
@@ -103,7 +103,10 @@ if __name__ == "__main__":
     if readmeFile:
         # If there is a README file, read it
         readme = fs.read_text(readmeFile)
-        summarized_readme = summarize(readme)
+        try:
+            summarized_readme = summarize(readme)
+        except Exception as e:
+            print(f"Error summarizing the README: {str(e)}", file=sys.stderr)            
         summarized_readme = format_description(summarized_readme)
 
     print("Model correctly processed")
diff --git a/swagger/docs.go b/swagger/docs.go
index 44da7cf2..f1050e85 100644
--- a/swagger/docs.go
+++ b/swagger/docs.go
@@ -279,6 +279,25 @@ const docTemplate = `{
                 }
             }
         },
+        "/tokenMetrics": {
+            "get": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "audio/x-wav"
+                ],
+                "summary": "Get TokenMetrics for Active Slot.",
+                "responses": {
+                    "200": {
+                        "description": "generated audio/wav file",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
         "/tts": {
             "post": {
                 "consumes": [
@@ -723,6 +742,76 @@ const docTemplate = `{
                     }
                 }
             }
+        },
+        "/v1/tokenMetrics": {
+            "get": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "audio/x-wav"
+                ],
+                "summary": "Get TokenMetrics for Active Slot.",
+                "responses": {
+                    "200": {
+                        "description": "generated audio/wav file",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/tokenize": {
+            "post": {
+                "summary": "Tokenize the input.",
+                "parameters": [
+                    {
+                        "description": "Request",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.TokenizeRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/schema.TokenizeResponse"
+                        }
+                    }
+                }
+            }
+        },
+        "/vad": {
+            "post": {
+                "consumes": [
+                    "application/json"
+                ],
+                "summary": "Detect voice fragments in an audio stream",
+                "parameters": [
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.VADRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/proto.VADResponse"
+                        }
+                    }
+                }
+            }
         }
     },
     "definitions": {
@@ -1156,6 +1245,28 @@ const docTemplate = `{
                 "StatusResponse_ERROR"
             ]
         },
+        "proto.VADResponse": {
+            "type": "object",
+            "properties": {
+                "segments": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/proto.VADSegment"
+                    }
+                }
+            }
+        },
+        "proto.VADSegment": {
+            "type": "object",
+            "properties": {
+                "end": {
+                    "type": "number"
+                },
+                "start": {
+                    "type": "number"
+                }
+            }
+        },
         "schema.BackendMonitorRequest": {
             "type": "object",
             "properties": {
@@ -1394,6 +1505,12 @@ const docTemplate = `{
                     "description": "The message role",
                     "type": "string"
                 },
+                "string_audios": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
                 "string_content": {
                     "type": "string"
                 },
@@ -1403,6 +1520,12 @@ const docTemplate = `{
                         "type": "string"
                     }
                 },
+                "string_videos": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
                 "tool_calls": {
                     "type": "array",
                     "items": {
@@ -1533,6 +1656,9 @@ const docTemplate = `{
                 "prompt": {
                     "description": "Prompt is read only by completion/image API calls"
                 },
+                "quality": {
+                    "type": "string"
+                },
                 "repeat_last_n": {
                     "type": "integer"
                 },
@@ -1640,6 +1766,13 @@ const docTemplate = `{
                 "prompt_tokens": {
                     "type": "integer"
                 },
+                "timing_prompt_processing": {
+                    "description": "Extra timing data, disabled by default as is't not a part of OpenAI specification",
+                    "type": "number"
+                },
+                "timing_token_generation": {
+                    "type": "number"
+                },
                 "total_tokens": {
                     "type": "integer"
                 }
@@ -1662,6 +1795,14 @@ const docTemplate = `{
                 }
             }
         },
+        "schema.SysInfoModel": {
+            "type": "object",
+            "properties": {
+                "id": {
+                    "type": "string"
+                }
+            }
+        },
         "schema.SystemInformationResponse": {
             "type": "object",
             "properties": {
@@ -1670,6 +1811,12 @@ const docTemplate = `{
                     "items": {
                         "type": "string"
                     }
+                },
+                "loaded_models": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/schema.SysInfoModel"
+                    }
                 }
             }
         },
@@ -1692,12 +1839,38 @@ const docTemplate = `{
                     "description": "model name or full path",
                     "type": "string"
                 },
+                "response_format": {
+                    "description": "(optional) output format",
+                    "type": "string"
+                },
                 "voice": {
                     "description": "voice audio file or speaker id",
                     "type": "string"
                 }
             }
         },
+        "schema.TokenizeRequest": {
+            "type": "object",
+            "properties": {
+                "content": {
+                    "type": "string"
+                },
+                "model": {
+                    "type": "string"
+                }
+            }
+        },
+        "schema.TokenizeResponse": {
+            "type": "object",
+            "properties": {
+                "tokens": {
+                    "type": "array",
+                    "items": {
+                        "type": "integer"
+                    }
+                }
+            }
+        },
         "schema.ToolCall": {
             "type": "object",
             "properties": {
@@ -1714,6 +1887,23 @@ const docTemplate = `{
                     "type": "string"
                 }
             }
+        },
+        "schema.VADRequest": {
+            "description": "VAD request body",
+            "type": "object",
+            "properties": {
+                "audio": {
+                    "description": "model name or full path",
+                    "type": "array",
+                    "items": {
+                        "type": "number"
+                    }
+                },
+                "model": {
+                    "description": "model name or full path",
+                    "type": "string"
+                }
+            }
         }
     },
     "securityDefinitions": {
diff --git a/swagger/swagger.json b/swagger/swagger.json
index eaddf451..b2d02ea2 100644
--- a/swagger/swagger.json
+++ b/swagger/swagger.json
@@ -272,6 +272,25 @@
                 }
             }
         },
+        "/tokenMetrics": {
+            "get": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "audio/x-wav"
+                ],
+                "summary": "Get TokenMetrics for Active Slot.",
+                "responses": {
+                    "200": {
+                        "description": "generated audio/wav file",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
         "/tts": {
             "post": {
                 "consumes": [
@@ -716,6 +735,76 @@
                     }
                 }
             }
+        },
+        "/v1/tokenMetrics": {
+            "get": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "audio/x-wav"
+                ],
+                "summary": "Get TokenMetrics for Active Slot.",
+                "responses": {
+                    "200": {
+                        "description": "generated audio/wav file",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/tokenize": {
+            "post": {
+                "summary": "Tokenize the input.",
+                "parameters": [
+                    {
+                        "description": "Request",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.TokenizeRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/schema.TokenizeResponse"
+                        }
+                    }
+                }
+            }
+        },
+        "/vad": {
+            "post": {
+                "consumes": [
+                    "application/json"
+                ],
+                "summary": "Detect voice fragments in an audio stream",
+                "parameters": [
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.VADRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/proto.VADResponse"
+                        }
+                    }
+                }
+            }
         }
     },
     "definitions": {
@@ -1149,6 +1238,28 @@
                 "StatusResponse_ERROR"
             ]
         },
+        "proto.VADResponse": {
+            "type": "object",
+            "properties": {
+                "segments": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/proto.VADSegment"
+                    }
+                }
+            }
+        },
+        "proto.VADSegment": {
+            "type": "object",
+            "properties": {
+                "end": {
+                    "type": "number"
+                },
+                "start": {
+                    "type": "number"
+                }
+            }
+        },
         "schema.BackendMonitorRequest": {
             "type": "object",
             "properties": {
@@ -1387,6 +1498,12 @@
                     "description": "The message role",
                     "type": "string"
                 },
+                "string_audios": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
                 "string_content": {
                     "type": "string"
                 },
@@ -1396,6 +1513,12 @@
                         "type": "string"
                     }
                 },
+                "string_videos": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
                 "tool_calls": {
                     "type": "array",
                     "items": {
@@ -1526,6 +1649,9 @@
                 "prompt": {
                     "description": "Prompt is read only by completion/image API calls"
                 },
+                "quality": {
+                    "type": "string"
+                },
                 "repeat_last_n": {
                     "type": "integer"
                 },
@@ -1633,6 +1759,13 @@
                 "prompt_tokens": {
                     "type": "integer"
                 },
+                "timing_prompt_processing": {
+                    "description": "Extra timing data, disabled by default as is't not a part of OpenAI specification",
+                    "type": "number"
+                },
+                "timing_token_generation": {
+                    "type": "number"
+                },
                 "total_tokens": {
                     "type": "integer"
                 }
@@ -1655,6 +1788,14 @@
                 }
             }
         },
+        "schema.SysInfoModel": {
+            "type": "object",
+            "properties": {
+                "id": {
+                    "type": "string"
+                }
+            }
+        },
         "schema.SystemInformationResponse": {
             "type": "object",
             "properties": {
@@ -1663,6 +1804,12 @@
                     "items": {
                         "type": "string"
                     }
+                },
+                "loaded_models": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/schema.SysInfoModel"
+                    }
                 }
             }
         },
@@ -1685,12 +1832,38 @@
                     "description": "model name or full path",
                     "type": "string"
                 },
+                "response_format": {
+                    "description": "(optional) output format",
+                    "type": "string"
+                },
                 "voice": {
                     "description": "voice audio file or speaker id",
                     "type": "string"
                 }
             }
         },
+        "schema.TokenizeRequest": {
+            "type": "object",
+            "properties": {
+                "content": {
+                    "type": "string"
+                },
+                "model": {
+                    "type": "string"
+                }
+            }
+        },
+        "schema.TokenizeResponse": {
+            "type": "object",
+            "properties": {
+                "tokens": {
+                    "type": "array",
+                    "items": {
+                        "type": "integer"
+                    }
+                }
+            }
+        },
         "schema.ToolCall": {
             "type": "object",
             "properties": {
@@ -1707,6 +1880,23 @@
                     "type": "string"
                 }
             }
+        },
+        "schema.VADRequest": {
+            "description": "VAD request body",
+            "type": "object",
+            "properties": {
+                "audio": {
+                    "description": "model name or full path",
+                    "type": "array",
+                    "items": {
+                        "type": "number"
+                    }
+                },
+                "model": {
+                    "description": "model name or full path",
+                    "type": "string"
+                }
+            }
         }
     },
     "securityDefinitions": {
diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml
index c98e0ef4..e7b9e625 100644
--- a/swagger/swagger.yaml
+++ b/swagger/swagger.yaml
@@ -295,6 +295,20 @@ definitions:
     - StatusResponse_BUSY
     - StatusResponse_READY
     - StatusResponse_ERROR
+  proto.VADResponse:
+    properties:
+      segments:
+        items:
+          $ref: '#/definitions/proto.VADSegment'
+        type: array
+    type: object
+  proto.VADSegment:
+    properties:
+      end:
+        type: number
+      start:
+        type: number
+    type: object
   schema.BackendMonitorRequest:
     properties:
       model:
@@ -453,12 +467,20 @@ definitions:
       role:
         description: The message role
         type: string
+      string_audios:
+        items:
+          type: string
+        type: array
       string_content:
         type: string
       string_images:
         items:
           type: string
         type: array
+      string_videos:
+        items:
+          type: string
+        type: array
       tool_calls:
         items:
           $ref: '#/definitions/schema.ToolCall'
@@ -548,6 +570,8 @@ definitions:
         type: number
       prompt:
         description: Prompt is read only by completion/image API calls
+      quality:
+        type: string
       repeat_last_n:
         type: integer
       repeat_penalty:
@@ -624,6 +648,12 @@ definitions:
         type: integer
       prompt_tokens:
         type: integer
+      timing_prompt_processing:
+        description: Extra timing data, disabled by default as is't not a part of
+          OpenAI specification
+        type: number
+      timing_token_generation:
+        type: number
       total_tokens:
         type: integer
     type: object
@@ -638,12 +668,21 @@ definitions:
           $ref: '#/definitions/p2p.NodeData'
         type: array
     type: object
+  schema.SysInfoModel:
+    properties:
+      id:
+        type: string
+    type: object
   schema.SystemInformationResponse:
     properties:
       backends:
         items:
           type: string
         type: array
+      loaded_models:
+        items:
+          $ref: '#/definitions/schema.SysInfoModel'
+        type: array
     type: object
   schema.TTSRequest:
     description: TTS request body
@@ -659,10 +698,27 @@ definitions:
       model:
         description: model name or full path
         type: string
+      response_format:
+        description: (optional) output format
+        type: string
       voice:
         description: voice audio file or speaker id
         type: string
     type: object
+  schema.TokenizeRequest:
+    properties:
+      content:
+        type: string
+      model:
+        type: string
+    type: object
+  schema.TokenizeResponse:
+    properties:
+      tokens:
+        items:
+          type: integer
+        type: array
+    type: object
   schema.ToolCall:
     properties:
       function:
@@ -674,6 +730,18 @@ definitions:
       type:
         type: string
     type: object
+  schema.VADRequest:
+    description: VAD request body
+    properties:
+      audio:
+        description: model name or full path
+        items:
+          type: number
+        type: array
+      model:
+        description: model name or full path
+        type: string
+    type: object
 info:
   contact:
     name: LocalAI
@@ -847,6 +915,18 @@ paths:
           schema:
             $ref: '#/definitions/schema.SystemInformationResponse'
       summary: Show the LocalAI instance information
+  /tokenMetrics:
+    get:
+      consumes:
+      - application/json
+      produces:
+      - audio/x-wav
+      responses:
+        "200":
+          description: generated audio/wav file
+          schema:
+            type: string
+      summary: Get TokenMetrics for Active Slot.
   /tts:
     post:
       consumes:
@@ -1129,6 +1209,50 @@ paths:
           schema:
             type: string
       summary: Generates audio from the input text.
+  /v1/tokenMetrics:
+    get:
+      consumes:
+      - application/json
+      produces:
+      - audio/x-wav
+      responses:
+        "200":
+          description: generated audio/wav file
+          schema:
+            type: string
+      summary: Get TokenMetrics for Active Slot.
+  /v1/tokenize:
+    post:
+      parameters:
+      - description: Request
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/schema.TokenizeRequest'
+      responses:
+        "200":
+          description: Response
+          schema:
+            $ref: '#/definitions/schema.TokenizeResponse'
+      summary: Tokenize the input.
+  /vad:
+    post:
+      consumes:
+      - application/json
+      parameters:
+      - description: query params
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/schema.VADRequest'
+      responses:
+        "200":
+          description: Response
+          schema:
+            $ref: '#/definitions/proto.VADResponse'
+      summary: Detect voice fragments in an audio stream
 securityDefinitions:
   BearerAuth:
     in: header
diff --git a/tests/e2e-aio/e2e_suite_test.go b/tests/e2e-aio/e2e_suite_test.go
index 680bd3a5..4a10d41b 100644
--- a/tests/e2e-aio/e2e_suite_test.go
+++ b/tests/e2e-aio/e2e_suite_test.go
@@ -54,7 +54,7 @@ var _ = BeforeSuite(func() {
 	Eventually(func() error {
 		_, err := client.ListModels(context.TODO())
 		return err
-	}, "20m").ShouldNot(HaveOccurred())
+	}, "50m").ShouldNot(HaveOccurred())
 })
 
 var _ = AfterSuite(func() {
diff --git a/tests/e2e-aio/e2e_test.go b/tests/e2e-aio/e2e_test.go
index f3f7b106..4d9eb4d8 100644
--- a/tests/e2e-aio/e2e_test.go
+++ b/tests/e2e-aio/e2e_test.go
@@ -123,8 +123,9 @@ var _ = Describe("E2E test", func() {
 			It("correctly", func() {
 				resp, err := client.CreateImage(context.TODO(),
 					openai.ImageRequest{
-						Prompt: "test",
-						Size:   openai.CreateImageSize512x512,
+						Prompt:  "test",
+						Quality: "1",
+						Size:    openai.CreateImageSize256x256,
 					},
 				)
 				Expect(err).ToNot(HaveOccurred())
@@ -135,7 +136,8 @@ var _ = Describe("E2E test", func() {
 				resp, err := client.CreateImage(context.TODO(),
 					openai.ImageRequest{
 						Prompt:         "test",
-						Size:           openai.CreateImageSize512x512,
+						Size:           openai.CreateImageSize256x256,
+						Quality:        "1",
 						ResponseFormat: openai.CreateImageResponseFormatURL,
 					},
 				)
@@ -147,7 +149,8 @@ var _ = Describe("E2E test", func() {
 				resp, err := client.CreateImage(context.TODO(),
 					openai.ImageRequest{
 						Prompt:         "test",
-						Size:           openai.CreateImageSize512x512,
+						Size:           openai.CreateImageSize256x256,
+						Quality:        "1",
 						ResponseFormat: openai.CreateImageResponseFormatB64JSON,
 					},
 				)
@@ -171,7 +174,7 @@ var _ = Describe("E2E test", func() {
 		})
 		Context("vision", func() {
 			It("correctly", func() {
-				model := "gpt-4-vision-preview"
+				model := "gpt-4o"
 				resp, err := client.CreateChatCompletion(context.TODO(),
 					openai.ChatCompletionRequest{
 						Model: model, Messages: []openai.ChatCompletionMessage{
@@ -260,11 +263,9 @@ var _ = Describe("E2E test", func() {
 				resp, err := http.Post(rerankerEndpoint, "application/json", bytes.NewReader(serialized))
 				Expect(err).To(BeNil())
 				Expect(resp).ToNot(BeNil())
-				Expect(resp.StatusCode).To(Equal(200))
-
 				body, err := io.ReadAll(resp.Body)
-				Expect(err).To(BeNil())
-				Expect(body).ToNot(BeNil())
+				Expect(err).ToNot(HaveOccurred())
+				Expect(resp.StatusCode).To(Equal(200), fmt.Sprintf("body: %s, response: %+v", body, resp))
 
 				deserializedResponse := schema.JINARerankResponse{}
 				err = json.Unmarshal(body, &deserializedResponse)
diff --git a/tests/integration/stores_test.go b/tests/integration/stores_test.go
index 4244d817..9612bec0 100644
--- a/tests/integration/stores_test.go
+++ b/tests/integration/stores_test.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"embed"
 	"math"
+	"math/rand"
 	"os"
 	"path/filepath"
 
@@ -22,6 +23,19 @@ import (
 //go:embed backend-assets/*
 var backendAssets embed.FS
 
+func normalize(vecs [][]float32) {
+	for i, k := range vecs {
+		norm := float64(0)
+		for _, x := range k {
+			norm += float64(x * x)
+		}
+		norm = math.Sqrt(norm)
+		for j, x := range k {
+			vecs[i][j] = x / float32(norm)
+		}
+	}
+}
+
 var _ = Describe("Integration tests for the stores backend(s) and internal APIs", Label("stores"), func() {
 	Context("Embedded Store get,set and delete", func() {
 		var sl *model.ModelLoader
@@ -57,7 +71,7 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
 			}
 
 			sl = model.NewModelLoader("")
-			sc, err = sl.BackendLoader(storeOpts...)
+			sc, err = sl.Load(storeOpts...)
 			Expect(err).ToNot(HaveOccurred())
 			Expect(sc).ToNot(BeNil())
 		})
@@ -192,17 +206,8 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
 			// set 3 vectors that are at varying angles to {0.5, 0.5, 0.5}
 			keys := [][]float32{{0.1, 0.3, 0.5}, {0.5, 0.5, 0.5}, {0.6, 0.6, -0.6}, {0.7, -0.7, -0.7}}
 			vals := [][]byte{[]byte("test0"), []byte("test1"), []byte("test2"), []byte("test3")}
-			// normalize the keys
-			for i, k := range keys {
-				norm := float64(0)
-				for _, x := range k {
-					norm += float64(x * x)
-				}
-				norm = math.Sqrt(norm)
-				for j, x := range k {
-					keys[i][j] = x / float32(norm)
-				}
-			}
+
+			normalize(keys)
 
 			err := store.SetCols(context.Background(), sc, keys, vals)
 			Expect(err).ToNot(HaveOccurred())
@@ -225,5 +230,121 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
 			Expect(ks[1]).To(Equal(keys[1]))
 			Expect(vals[1]).To(Equal(vals[1]))
 		})
+
+		It("It produces the correct cosine similarities for orthogonal and opposite unit vectors", func() {
+			keys := [][]float32{{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}, {-1.0, 0.0, 0.0}}
+			vals := [][]byte{[]byte("x"), []byte("y"), []byte("z"), []byte("-z")}
+
+			err := store.SetCols(context.Background(), sc, keys, vals);
+			Expect(err).ToNot(HaveOccurred())
+
+			_, _, sims, err := store.Find(context.Background(), sc, keys[0], 4)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(sims).To(Equal([]float32{1.0, 0.0, 0.0, -1.0}))
+		})
+
+		It("It produces the correct cosine similarities for orthogonal and opposite vectors", func() {
+			keys := [][]float32{{1.0, 0.0, 1.0}, {0.0, 2.0, 0.0}, {0.0, 0.0, -1.0}, {-1.0, 0.0, -1.0}}
+			vals := [][]byte{[]byte("x"), []byte("y"), []byte("z"), []byte("-z")}
+
+			err := store.SetCols(context.Background(), sc, keys, vals);
+			Expect(err).ToNot(HaveOccurred())
+
+			_, _, sims, err := store.Find(context.Background(), sc, keys[0], 4)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(sims[0]).To(BeNumerically("~", 1, 0.1))
+			Expect(sims[1]).To(BeNumerically("~", 0, 0.1))
+			Expect(sims[2]).To(BeNumerically("~", -0.7, 0.1))
+			Expect(sims[3]).To(BeNumerically("~", -1, 0.1))
+		})
+
+		expectTriangleEq := func(keys [][]float32, vals [][]byte) {
+			sims := map[string]map[string]float32{}
+
+			// compare every key vector pair and store the similarities in a lookup table
+			// that uses the values as keys
+			for i, k := range keys {
+				_, valsk, simsk, err := store.Find(context.Background(), sc, k, 9)
+				Expect(err).ToNot(HaveOccurred())
+
+				for j, v := range valsk {
+					p := string(vals[i])
+					q := string(v)
+
+					if sims[p] == nil {
+						sims[p] = map[string]float32{}
+					}
+
+					//log.Debug().Strs("vals", []string{p, q}).Float32("similarity", simsk[j]).Send()
+
+					sims[p][q] = simsk[j]
+				}
+			}
+
+			// Check that the triangle inequality holds for every combination of the triplet
+			// u, v and w
+			for _, simsu := range sims {
+				for w, simw := range simsu {
+					// acos(u,w) <= ...
+					uws := math.Acos(float64(simw))
+
+					// ... acos(u,v) + acos(v,w)
+					for v, _ := range simsu {
+						uvws := math.Acos(float64(simsu[v])) + math.Acos(float64(sims[v][w]))
+
+						//log.Debug().Str("u", u).Str("v", v).Str("w", w).Send()
+						//log.Debug().Float32("uw", simw).Float32("uv", simsu[v]).Float32("vw", sims[v][w]).Send()
+						Expect(uws).To(BeNumerically("<=", uvws))
+					}
+				}
+			}
+		}
+
+		It("It obeys the triangle inequality for normalized values", func() {
+			keys := [][]float32{
+				{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0},
+				{-1.0, 0.0, 0.0}, {0.0, -1.0, 0.0}, {0.0, 0.0, -1.0},
+				{2.0, 3.0, 4.0}, {9.0, 7.0, 1.0}, {0.0, -1.2, 2.3},
+			}
+			vals := [][]byte{
+				[]byte("x"), []byte("y"), []byte("z"),
+				[]byte("-x"), []byte("-y"), []byte("-z"),
+				[]byte("u"), []byte("v"), []byte("w"),
+			}
+
+			normalize(keys[6:])
+
+			err := store.SetCols(context.Background(), sc, keys, vals);
+			Expect(err).ToNot(HaveOccurred())
+
+			expectTriangleEq(keys, vals)
+		})
+
+		It("It obeys the triangle inequality", func() {
+			rnd := rand.New(rand.NewSource(151))
+			keys := make([][]float32, 20)
+			vals := make([][]byte, 20)
+
+			for i := range keys {
+				k := make([]float32, 768)
+
+				for j := range k {
+					k[j] = rnd.Float32()
+				}
+
+				keys[i] = k
+			}
+
+			c := byte('a')
+			for i := range vals {
+				vals[i] = []byte{c}
+				c += 1
+			}
+
+			err := store.SetCols(context.Background(), sc, keys, vals);
+			Expect(err).ToNot(HaveOccurred())
+
+			expectTriangleEq(keys, vals)
+		})
 	})
 })
diff --git a/tests/models_fixtures/embeddings.yaml b/tests/models_fixtures/embeddings.yaml
index 46a08502..76c4a56a 100644
--- a/tests/models_fixtures/embeddings.yaml
+++ b/tests/models_fixtures/embeddings.yaml
@@ -1,5 +1,4 @@
 name: text-embedding-ada-002
-parameters:
-  model: bert
-backend: bert-embeddings
 embeddings: true
+parameters:
+  model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
\ No newline at end of file
diff --git a/tests/models_fixtures/grpc.yaml b/tests/models_fixtures/grpc.yaml
index 31c406ab..8c519920 100644
--- a/tests/models_fixtures/grpc.yaml
+++ b/tests/models_fixtures/grpc.yaml
@@ -1,5 +1,5 @@
 name: code-search-ada-code-001
-backend: huggingface
+backend: sentencetransformers
 embeddings: true
 parameters:
   model: all-MiniLM-L6-v2
\ No newline at end of file
diff --git a/tests/models_fixtures/rwkv.yaml b/tests/models_fixtures/rwkv.yaml
index 3b47fa0a..f66cfe21 100644
--- a/tests/models_fixtures/rwkv.yaml
+++ b/tests/models_fixtures/rwkv.yaml
@@ -1,18 +1,24 @@
 name: rwkv_test
 parameters:
-  model: rwkv
+  model: huggingface://bartowski/rwkv-6-world-7b-GGUF/rwkv-6-world-7b-Q4_K_M.gguf
   top_k: 80
   temperature: 0.9
-  max_tokens: 100
+  max_tokens: 4098
   top_p: 0.8
-context_size: 1024
-backend: "rwkv"
-cutwords:
-- "Bob:.*"
+context_size: 4098
+
 roles:
-  user: "Bob:"
-  system: "Alice:"
-  assistant: "Alice:"
+  user: "User: "
+  system: "System: "
+  assistant: "Assistant: "
+
+stopwords:
+- 'Assistant:'
+- '<s>'
+
 template:
-  completion: rwkv_completion
-  chat: rwkv_chat
\ No newline at end of file
+  chat: |
+    {{.Input}}
+    Assistant: 
+  completion: |
+    {{.Input}}
\ No newline at end of file
diff --git a/tests/models_fixtures/rwkv_chat.tmpl b/tests/models_fixtures/rwkv_chat.tmpl
deleted file mode 100644
index d2c0511e..00000000
--- a/tests/models_fixtures/rwkv_chat.tmpl
+++ /dev/null
@@ -1,13 +0,0 @@
-The following is a verbose detailed conversation between Bob and a woman, Alice. Alice is intelligent, friendly and likeable. Alice is likely to agree with Bob.
-
-Bob: Hello Alice, how are you doing?
-
-Alice: Hi Bob! Thanks, I'm fine. What about you?
-
-Bob: I am very good! It's nice to see you. Would you mind me chatting with you for a while?
-
-Alice: Not at all! I'm listening.
-
-{{.Input}}
-
-Alice: 
\ No newline at end of file
diff --git a/tests/models_fixtures/rwkv_completion.tmpl b/tests/models_fixtures/rwkv_completion.tmpl
deleted file mode 100644
index 8450377f..00000000
--- a/tests/models_fixtures/rwkv_completion.tmpl
+++ /dev/null
@@ -1 +0,0 @@
-Complete the following sentence: {{.Input}} 
\ No newline at end of file
diff --git a/embedded/webui_static.yaml b/webui_static.yaml
similarity index 94%
rename from embedded/webui_static.yaml
rename to webui_static.yaml
index fab448cb..8d691212 100644
--- a/embedded/webui_static.yaml
+++ b/webui_static.yaml
@@ -56,4 +56,7 @@
   sha: "8a9a74f4455f392ec3e7499cfda6097b536bb4b7f1e529a079c3d953c08b54ca"
 - filename: "KFOlCnqEu92Fr1MmYUtfBBc9.ttf"
   url: "https://fonts.gstatic.com/s/roboto/v30/KFOlCnqEu92Fr1MmYUtfBBc9.ttf"
-  sha: "361a50f8a6c816ba4306c5290b7e487a726e1b4dcc3d8d7e4acf1fc2dae9f551"
\ No newline at end of file
+  sha: "361a50f8a6c816ba4306c5290b7e487a726e1b4dcc3d8d7e4acf1fc2dae9f551"
+- filename: "flowbite.min.js"
+  url: "https://cdnjs.cloudflare.com/ajax/libs/flowbite/2.3.0/flowbite.min.js"
+  sha: "d2a1a72a4c2399e43c01412b86b9957c4df1845f2e0586607c7e55b9ae949cf8"
\ No newline at end of file