diff --git a/.github/workflows/checksum_checker.yaml b/.github/workflows/checksum_checker.yaml
index b76b7aff..4f95a4e2 100644
--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -41,7 +41,7 @@ jobs:
           token: ${{ secrets.UPDATE_BOT_TOKEN }}
           push-to-fork: ci-forks/LocalAI
           commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
-          title: 'models(gallery): :arrow_up: update checksum'
+          title: 'chore(model-gallery): :arrow_up: update checksum'
           branch: "update/checksum"
           body: Updating checksums in gallery/index.yaml
           signoff: true
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index 290f8793..8ebaa1b2 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -47,7 +47,7 @@ jobs:
           #   makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -120,7 +120,7 @@ jobs:
           #   makeflags: "--jobs=3 --output-sync=target"
           # - build-type: 'cublas'
           #   cuda-major-version: "12"
-          #   cuda-minor-version: "4"
+          #   cuda-minor-version: "0"
           #   platforms: 'linux/amd64'
           #   tag-latest: 'false'
           #   tag-suffix: '-cublas-cuda12-ffmpeg-core'
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 73899e15..395d7761 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -75,7 +75,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12'
@@ -100,7 +100,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -285,7 +285,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-core'
@@ -307,7 +307,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-ffmpeg-core'
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 92e07326..5c883db4 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -4,6 +4,8 @@ on:
   push:
     branches:
       - master
+    tags:
+      - 'v*'
   pull_request:
 
 env:
@@ -29,11 +31,10 @@ jobs:
         with:
           go-version: '1.21.x'
           cache: false
-
       - name: Dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk
+          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
           sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
       - name: Install CUDA Dependencies
         run: |
@@ -149,7 +150,7 @@ jobs:
       - name: Dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake libgmock-dev
+          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
       - name: Intel Dependencies
         run: |
           wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
@@ -250,7 +251,7 @@ jobs:
       - name: Dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
+          sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
           go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
           go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
       - name: Build stablediffusion
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 084d016d..e6efe77f 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -70,7 +70,7 @@ jobs:
       - name: Dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install build-essential curl ffmpeg
+          sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
           sudo apt-get install -y libgmock-dev
           curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
              sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
diff --git a/Dockerfile b/Dockerfile
index 78ed4cd3..a0feadd9 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -24,7 +24,7 @@ RUN apt-get update && \
         cmake \
         curl \
         git \
-        unzip && \
+        unzip upx-ucl && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
@@ -99,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
 
 ARG BUILD_TYPE
 ARG CUDA_MAJOR_VERSION=12
-ARG CUDA_MINOR_VERSION=4
+ARG CUDA_MINOR_VERSION=0
 
 ENV BUILD_TYPE=${BUILD_TYPE}
 
diff --git a/Makefile b/Makefile
index df13cbfb..51893868 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=705b7ecf60e667ced57c15d67aa86865e3cc7aa7
+CPPLLAMA_VERSION?=01245f5b1629075543bc4478418c7d72a0b4b3c7
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -58,7 +58,7 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')
 
 VERSION?=$(shell git describe --always --tags || echo "dev" )
 # go tool nm ./local-ai | grep Commit
-LD_FLAGS?=
+LD_FLAGS?=-s -w
 override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
 override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
 
@@ -72,6 +72,14 @@ WHITE  := $(shell tput -Txterm setaf 7)
 CYAN   := $(shell tput -Txterm setaf 6)
 RESET  := $(shell tput -Txterm sgr0)
 
+UPX?=
+# check if upx exists
+ifeq (, $(shell which upx))
+	UPX=
+else
+	UPX=$(shell which upx)
+endif
+
 # Default Docker bridge IP
 E2E_BRIDGE_IP?=172.17.0.1
 
@@ -377,6 +385,7 @@ build: prepare backend-assets grpcs ## Build the project
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
 	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
+	$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
 ifneq ($(BACKEND_LIBS),)
 	$(MAKE) backend-assets/lib
 	cp -f $(BACKEND_LIBS) backend-assets/lib/
@@ -421,7 +430,7 @@ else
 endif
 
 dist-cross-linux-arm64:
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" GO_TAGS="p2p" \
 	STATIC=true $(MAKE) build
 	mkdir -p release
 # if BUILD_ID is empty, then we don't append it to the binary name
@@ -471,7 +480,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .
+	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .
 
 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
@@ -733,13 +742,22 @@ backend-assets/grpc: protogen-go replace
 backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/bert-embeddings
+endif
 
 backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/gpt4all
+endif
 
 backend-assets/grpc/huggingface: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/huggingface
+endif
 
 backend/cpp/llama/llama.cpp:
 	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
@@ -765,6 +783,9 @@ else
 	echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
 	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
 endif
+ifneq ($(UPX),)
+	$(UPX) backend/cpp/${VARIANT}/grpc-server
+endif
 
 # This target is for manually building a variant with-auto detected flags
 backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
@@ -837,33 +858,57 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.
 backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
 	mkdir -p backend-assets/util/
 	cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
+ifneq ($(UPX),)
+	$(UPX) backend-assets/util/llama-cpp-rpc-server
+endif
 
 backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/llama-ggml
+endif
 
 backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
 	CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/piper
+endif
 
 backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/rwkv
+endif
 
 backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/stablediffusion
+endif
 
 backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/tinydream
+endif
 
 backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/whisper
+endif
 
 backend-assets/grpc/local-store: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/local-store
+endif
 
 grpcs: prepare $(GRPC_BACKENDS)
 
diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index 1cff6b8a..cb5c85f1 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -2259,7 +2259,6 @@ static void params_parse(const backend::ModelOptions* request,
      // get the directory of modelfile
      std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
      params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
-     params.lora_base  =  model_dir + "/"+request->lorabase();
     }
     params.use_mlock = request->mlock();
     params.use_mmap = request->mmap();
diff --git a/backend/python/autogptq/requirements.txt b/backend/python/autogptq/requirements.txt
index e416adb2..7a1bf85f 100644
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 torch
 certifi
diff --git a/backend/python/bark/requirements.txt b/backend/python/bark/requirements.txt
index 215b3d35..d3f9f52b 100644
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 bark==0.1.5
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
 transformers
\ No newline at end of file
diff --git a/backend/python/common/template/requirements.txt b/backend/python/common/template/requirements.txt
index c762c4d6..8d1e3151 100644
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,2 +1,2 @@
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
\ No newline at end of file
diff --git a/backend/python/coqui/requirements.txt b/backend/python/coqui/requirements.txt
index d7dd07e4..e1cddaa3 100644
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 TTS==0.22.0
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
 transformers
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements.txt b/backend/python/diffusers/requirements.txt
index c607187e..6f04d677 100644
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -3,7 +3,7 @@ accelerate
 compel
 peft
 diffusers
-grpcio==1.65.0
+grpcio==1.65.1
 opencv-python
 pillow
 protobuf
diff --git a/backend/python/exllama2/requirements.txt b/backend/python/exllama2/requirements.txt
index 62c7117a..6aae273c 100644
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,5 +1,5 @@
 accelerate
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
 torch
diff --git a/backend/python/mamba/requirements.txt b/backend/python/mamba/requirements.txt
index e431ddfe..2aac2cda 100644
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,6 +1,6 @@
 causal-conv1d==1.4.0
 mamba-ssm==2.2.2
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
 transformers
\ No newline at end of file
diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt
index b0551187..bad088a9 100644
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -2,7 +2,7 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.64.1
+grpcio==1.65.1
 protobuf
 librosa==0.9.1
 faster-whisper==1.0.3
diff --git a/backend/python/openvoice/requirements.txt b/backend/python/openvoice/requirements.txt
index 07ba879a..86d16ec2 100644
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 librosa
 faster-whisper
diff --git a/backend/python/parler-tts/requirements.txt b/backend/python/parler-tts/requirements.txt
index c3706051..147cad9a 100644
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,5 +1,5 @@
 accelerate
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 torch
 git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
diff --git a/backend/python/rerankers/requirements.txt b/backend/python/rerankers/requirements.txt
index 1b437654..8b2ad4d0 100644
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 rerankers[transformers]
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
 transformers
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements-intel.txt b/backend/python/sentencetransformers/requirements-intel.txt
index 635b4c31..95d4848c 100644
--- a/backend/python/sentencetransformers/requirements-intel.txt
+++ b/backend/python/sentencetransformers/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements.txt b/backend/python/sentencetransformers/requirements.txt
index ac21d449..4ef4a28b 100644
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 sentence-transformers==3.0.1
 transformers
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements-intel.txt b/backend/python/transformers-musicgen/requirements-intel.txt
index 635b4c31..95d4848c 100644
--- a/backend/python/transformers-musicgen/requirements-intel.txt
+++ b/backend/python/transformers-musicgen/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements.txt b/backend/python/transformers-musicgen/requirements.txt
index 8a969c34..8ffa3c31 100644
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 transformers
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 torch
 scipy==1.14.0
diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt
index 635b4c31..8fc18a0e 100644
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -2,4 +2,3 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt
index 76066f50..55925b32 100644
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,9 +1,9 @@
 accelerate
 transformers
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 torch
 certifi
 intel-extension-for-transformers
 bitsandbytes
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
diff --git a/backend/python/vall-e-x/requirements.txt b/backend/python/vall-e-x/requirements.txt
index ac891fe7..d1d0583e 100644
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,4 +1,4 @@
 accelerate
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
\ No newline at end of file
diff --git a/backend/python/vllm/requirements.txt b/backend/python/vllm/requirements.txt
index 986a4d55..7c612a2f 100644
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 vllm
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
 transformers
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index f63a9913..86b75601 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -226,9 +226,15 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 
 			// Update input grammar
 			jsStruct := funcs.ToJSONStructure(config.FunctionsConfig.FunctionNameKey, config.FunctionsConfig.FunctionNameKey)
-			config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
+			g, err := jsStruct.Grammar(config.FunctionsConfig.GrammarOptions()...)
+			if err == nil {
+				config.Grammar = g
+			}
 		case input.JSONFunctionGrammarObject != nil:
-			config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
+			g, err := input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarOptions()...)
+			if err == nil {
+				config.Grammar = g
+			}
 		default:
 			// Force picking one of the functions by the request
 			if config.FunctionToCall() != "" {
diff --git a/core/http/views/p2p.html b/core/http/views/p2p.html
index 0396924e..a8c51310 100644
--- a/core/http/views/p2p.html
+++ b/core/http/views/p2p.html
@@ -16,7 +16,16 @@
                 </a> 
             </h2> 
             <h5 class="mb-4 text-justify">LocalAI uses P2P technologies to enable distribution of work between peers. It is possible to share an instance with Federation and/or split the weights of a model across peers (only available with llama.cpp models). You can now share computational resources between your devices or your friends!</h5>
-            
+            <!-- Warning box if p2p token is empty and p2p is enabled -->
+            {{ if and .IsP2PEnabled (eq .P2PToken "") }}
+            <div class="bg-red-500 p-4 rounded-lg shadow-lg mb-12 text-left">
+                <p class="text-xl font-semibold text-white"> <i class="fa-solid fa-exclamation-triangle"></i> Warning: P2P mode is disabled or no token was specified</p>
+                <p class="mb-4">You have to enable P2P mode by starting LocalAI with <code>--p2p</code>. Please restart the server with <code>--p2p</code> to generate a new token automatically that can be used to automatically discover other nodes. If you already have a token specify it with <code>export TOKEN=".."</code> <a href="https://localai.io/features/distribute/" target="_blank">
+                    Check out the documentation for more information.
+                </a> </p>
+            </div>
+            {{ else }}
+
             <!-- Federation Box -->
             <div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">
 
@@ -128,7 +137,8 @@
                     </div>
                 </div>
             </div>
-            <!-- Llama.cpp Box END -->       
+            <!-- Llama.cpp Box END -->    
+            {{ end }}   
         </div>
     </div>
 
diff --git a/docs/content/docs/features/distributed_inferencing.md b/docs/content/docs/features/distributed_inferencing.md
index abe34373..2de7ae3c 100644
--- a/docs/content/docs/features/distributed_inferencing.md
+++ b/docs/content/docs/features/distributed_inferencing.md
@@ -5,17 +5,65 @@ weight = 15
 url = "/features/distribute/"
 +++
 
+
+This functionality enables LocalAI to distribute inference requests across multiple worker nodes, improving efficiency and performance. Nodes are automatically discovered and connect via p2p by using a shared token which makes sure the communication is secure and private between the nodes of the network.
+
+LocalAI supports two modes of distributed inferencing via p2p:
+
+- **Federated Mode**: Requests are shared between the cluster and routed to a single worker node in the network based on the load balancer's decision.
+- **Worker Mode** (aka "model sharding" or "splitting weights"): Requests are processed by all the workers which contributes to the final inference result (by sharing the model weights).
+
+## Usage
+
+Starting LocalAI with `--p2p` generates a shared token for connecting multiple instances: and that's all you need to create AI clusters, eliminating the need for intricate network setups. 
+
+Simply navigate to the "Swarm" section in the WebUI and follow the on-screen instructions.
+
+For fully shared instances, initiate LocalAI with --p2p --federated and adhere to the Swarm section's guidance. This feature, while still experimental, offers a tech preview quality experience.
+
+### Federated mode
+
+Federated mode allows to launch multiple LocalAI instances and connect them together in a federated network. This mode is useful when you want to distribute the load of the inference across multiple nodes, but you want to have a single point of entry for the API. In the Swarm section of the WebUI, you can see the instructions to connect multiple instances together.
+
+![346663124-1d2324fd-8b55-4fa2-9856-721a467969c2](https://github.com/user-attachments/assets/19ebd44a-20ff-412c-b92f-cfb8efbe4b21)
+
+To start a LocalAI server in federated mode, run:
+
+```bash
+local-ai run --p2p --federated
+```
+
+This will generate a token that you can use to connect other LocalAI instances to the network or others can use to join the network. If you already have a token, you can specify it using the `TOKEN` environment variable.
+
+To start a load balanced server that routes the requests to the network, run with the `TOKEN`:
+
+```bash
+local-ai federated
+```
+
+To see all the available options, run `local-ai federated --help`.
+
+The instructions are displayed in the "Swarm" section of the WebUI, guiding you through the process of connecting multiple instances.
+
+### Workers mode
+
 {{% alert note %}}
 This feature is available exclusively with llama-cpp compatible models.
 
 This feature was introduced in [LocalAI pull request #2324](https://github.com/mudler/LocalAI/pull/2324) and is based on the upstream work in [llama.cpp pull request #6829](https://github.com/ggerganov/llama.cpp/pull/6829).
 {{% /alert %}}
 
-This functionality enables LocalAI to distribute inference requests across multiple worker nodes, improving efficiency and performance.
+To connect multiple workers to a single LocalAI instance, start first a server in p2p mode:
 
-## Usage
+```bash
+local-ai run --p2p
+```
 
-### Starting Workers
+And navigate the WebUI to the "Swarm" section to see the instructions to connect multiple workers to the network.
+
+![346663124-1d2324fd-8b55-4fa2-9856-721a467969c2](https://github.com/user-attachments/assets/b8cadddf-a467-49cf-a1ed-8850de95366d)
+
+### Without P2P
 
 To start workers for distributing the computational load, run:
 
@@ -23,48 +71,27 @@ To start workers for distributing the computational load, run:
 local-ai worker llama-cpp-rpc <listening_address> <listening_port>
 ```
 
-Alternatively, you can build the RPC server following the llama.cpp [README](https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is compatible with LocalAI.
-
-### Starting LocalAI
-
-To start the LocalAI server, which handles API requests, specify the worker addresses using the `LLAMACPP_GRPC_SERVERS` environment variable:
+And you can specify the address of the workers when starting LocalAI with the `LLAMACPP_GRPC_SERVERS` environment variable:
 
 ```bash
 LLAMACPP_GRPC_SERVERS="address1:port,address2:port" local-ai run
 ```
-
 The workload on the LocalAI server will then be distributed across the specified nodes.
 
-## Peer-to-Peer Networking
+Alternatively, you can build the RPC workers/server following the llama.cpp [README](https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is compatible with LocalAI.
 
-![output](https://github.com/mudler/LocalAI/assets/2420543/8ca277cf-c208-4562-8929-808b2324b584)
+## Manual example (worker)
 
-Workers can also connect to each other in a peer-to-peer network, distributing the workload in a decentralized manner.
-
-A shared token between the server and the workers is required for communication within the peer-to-peer network. This feature supports both local network (using mDNS discovery) and DHT for communication across different networks.
-
-The token is automatically generated when starting the server with the `--p2p` flag. Workers can be started with the token using `local-ai worker p2p-llama-cpp-rpc` and specifying the token via the environment variable `TOKEN` or with the `--token` argument.
-
-A network is established between the server and workers using DHT and mDNS discovery protocols. The llama.cpp RPC server is automatically started and exposed to the peer-to-peer network, allowing the API server to connect.
-
-When the HTTP server starts, it discovers workers in the network and creates port forwards to the local service. Llama.cpp is configured to use these services. For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343).
-
-### Usage
+Use the WebUI to guide you in the process of starting new workers. This example shows the manual steps to highlight the process.
 
 1. Start the server with `--p2p`:
 
 ```bash
 ./local-ai run --p2p
-# 1:02AM INF loading environment variables from file envFile=.env
-# 1:02AM INF Setting logging to info
-# 1:02AM INF P2P mode enabled
-# 1:02AM INF No token provided, generating one
-# 1:02AM INF Generated Token:
-# XXXXXXXXXXX
-# 1:02AM INF Press a button to proceed
+# Get the token in the Swarm section of the WebUI
 ```
 
-Copy the displayed token and press Enter.
+Copy the token from the WebUI or via API call (e.g., `curl http://localhost:8000/p2p/token`) and save it for later use.
 
 To reuse the same token later, restart the server with `--p2ptoken` or `P2P_TOKEN`.
 
@@ -93,11 +120,7 @@ The server logs should indicate that new workers are being discovered.
 
 3. Start inference as usual on the server initiated in step 1.
 
-## Notes
-
-- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
-- Only a single model is supported currently.
-- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
+![output](https://github.com/mudler/LocalAI/assets/2420543/8ca277cf-c208-4562-8929-808b2324b584)
 
 
 ## Environment Variables
@@ -109,3 +132,20 @@ There are options that can be tweaked or parameters that can be set using enviro
 | **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) |
 | **LOCALAI_P2P_DISABLE_LIMITS** | Set to "true" to disable connection limits and resources management |
 | **LOCALAI_P2P_TOKEN** | Set the token for the p2p network |
+
+## Architecture
+
+LocalAI uses https://github.com/libp2p/go-libp2p under the hood, the same project powering IPFS. Differently from other frameworks, LocalAI uses peer2peer without a single master server, but rather it uses sub/gossip and ledger functionalities to achieve consensus across different peers. 
+
+[EdgeVPN](https://github.com/mudler/edgevpn) is used as a library to establish the network and expose the ledger functionality under a shared token to ease out automatic discovery and have separated, private peer2peer networks.
+
+The weights are split proportional to the memory when running into worker mode, when in federation mode each request is split to every node which have to load the model fully.
+
+## Notes
+
+- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
+- Only a single model is supported currently.
+- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
+- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)
+
+
diff --git a/docs/data/version.json b/docs/data/version.json
index 30b4b614..efda370f 100644
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.18.1"
+  "version": "v2.19.2"
 }
diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn
index 1b2e1395..7aec99b3 160000
--- a/docs/themes/hugo-theme-relearn
+++ b/docs/themes/hugo-theme-relearn
@@ -1 +1 @@
-Subproject commit 1b2e139512106f8074ac7d4a884135d159720cc4
+Subproject commit 7aec99b38dc2668c6139bf71855535ace41c123c
diff --git a/examples/chainlit/requirements.txt b/examples/chainlit/requirements.txt
index 116b7b61..13415f11 100644
--- a/examples/chainlit/requirements.txt
+++ b/examples/chainlit/requirements.txt
@@ -1,6 +1,6 @@
-llama_index==0.10.55
+llama_index==0.10.56
 requests==2.32.3
-weaviate_client==4.6.5
+weaviate_client==4.6.7
 transformers
 torch
 chainlit
diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt
index 481af898..f8afacdc 100644
--- a/examples/functions/requirements.txt
+++ b/examples/functions/requirements.txt
@@ -1,2 +1,2 @@
-langchain==0.2.8
-openai==1.35.13
+langchain==0.2.10
+openai==1.37.0
diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt
index 0e6d8c4d..89ca2db7 100644
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
-langchain==0.2.8
-openai==1.35.13
+langchain==0.2.10
+openai==1.37.0
 chromadb==0.5.4
-llama-index==0.10.55
\ No newline at end of file
+llama-index==0.10.56
\ No newline at end of file
diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 01a75d46..0e03d543 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -10,21 +10,21 @@ debugpy==1.8.2
 frozenlist==1.4.1
 greenlet==3.0.3
 idna==3.7
-langchain==0.2.8
-langchain-community==0.2.7
+langchain==0.2.10
+langchain-community==0.2.9
 marshmallow==3.21.3
 marshmallow-enum==1.5.1
 multidict==6.0.5
 mypy-extensions==1.0.0
 numexpr==2.10.1
-numpy==1.26.4
-openai==1.35.13
+numpy==2.0.1
+openai==1.37.0
 openapi-schema-pydantic==1.2.4
 packaging>=23.2
 pydantic==2.8.2
 PyYAML==6.0.1
 requests==2.32.3
-SQLAlchemy==2.0.30
+SQLAlchemy==2.0.31
 tenacity==8.5.0
 tqdm==4.66.4
 typing-inspect==0.9.0
diff --git a/gallery/index.yaml b/gallery/index.yaml
index c130c570..713eb21f 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,6 +1,89 @@
 ---
-## Deepseek
+## LLama3.1
+- &llama31
+  url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
+  name: "meta-llama-3.1-8b-instruct"
+  license: llama3.1
+  description: |
+    The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.
+
+    Model developer: Meta
+
+    Model Architecture: Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
+  urls:
+    - https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
+    - https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - cpu
+    - llama3.1
+  overrides:
+    parameters:
+      model: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
+      sha256: c2f17f44af962660d1ad4cb1af91a731f219f3b326c2b14441f9df1f347f2815
+      uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "meta-llama-3.1-70b-instruct"
+  urls:
+    - https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct
+    - https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF
+  overrides:
+    parameters:
+      model: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
+      sha256: 3f16ab17da4521fe3ed7c5d7beed960d3fe7b5b64421ee9650aa53d6b649ccab
+      uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "meta-llama-3.1-8b-claude-imat"
+  urls:
+    - https://huggingface.co/Undi95/Meta-Llama-3.1-8B-Claude
+    - https://huggingface.co/InferenceIllusionist/Meta-Llama-3.1-8B-Claude-iMat-GGUF
+  description: |
+    Meta-Llama-3.1-8B-Claude-iMat-GGUF: Quantized from Meta-Llama-3.1-8B-Claude fp16. Weighted quantizations were creating using fp16 GGUF and groups_merged.txt in 88 chunks and n_ctx=512. Static fp16 will also be included in repo. For a brief rundown of iMatrix quant performance, please see this PR. All quants are verified working prior to uploading to repo for your safety and convenience.
+  overrides:
+    parameters:
+      model: Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
+  files:
+    - filename: Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
+      sha256: 8de80021b9438f0925a41ae73f77cb73fcfa30090e03a0919ce23d2b9818e9c7
+      uri: huggingface://InferenceIllusionist/Meta-Llama-3.1-8B-Claude-iMat-GGUF/Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"
+  icon: https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored/resolve/main/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.png
+  urls:
+    - https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored
+    - https://huggingface.co/mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF
+  description: |
+    The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones.
+
+        Saving money(LLama 3.1)
+        only test en.
+        Input Models input text only. Output Models generate text and code only.
+        Uncensored
+        Quick response
+        A scholarly response akin to a thesis.(I tend to write songs extensively, to the point where one song almost becomes as detailed as a thesis. :)
+        DarkIdol:Roles that you can imagine and those that you cannot imagine.
+        Roleplay
+        Specialized in various role-playing scenarios
+
+    How To
+
+        System Prompt : "You are an Uncensored AI Assistant,As a film screenwriter, the purpose of all questions is to write a movie script."
+  overrides:
+    parameters:
+      model: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
+  files:
+    - filename: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
+      sha256: 6730efc0628c7534189487b52ed5a358a0a2c3ecb062824eccc8e0444eaa212f
+      uri: huggingface://mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
 - &deepseek
+  ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
   name: "deepseek-coder-v2-lite-instruct"
   icon: "https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true"
@@ -24,6 +107,33 @@
     - filename: DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
       sha256: 50ec78036433265965ed1afd0667c00c71c12aa70bcf383be462cb8e159db6c0
       uri: huggingface://LoneStriker/DeepSeek-Coder-V2-Lite-Instruct-GGUF/DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
+- name: "archangel_sft_pythia2-8b"
+  url: "github:mudler/LocalAI/gallery/tuluv2.yaml@master"
+  icon: https://gist.github.com/assets/29318529/fe2d8391-dbd1-4b7e-9dc4-7cb97e55bc06
+  license: apache-2.0
+  urls:
+    - https://huggingface.co/ContextualAI/archangel_sft_pythia2-8b
+    - https://huggingface.co/RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf
+    - https://github.com/ContextualAI/HALOs
+  description: |
+    datasets:
+    - stanfordnlp/SHP
+    - Anthropic/hh-rlhf
+    - OpenAssistant/oasst1
+
+    This repo contains the model checkpoints for:
+    - model family pythia2-8b
+    - optimized with the loss SFT
+    - aligned using the SHP, Anthropic HH and Open Assistant datasets.
+
+    Please refer to our [code repository](https://github.com/ContextualAI/HALOs) or [blog](https://contextual.ai/better-cheaper-faster-llm-alignment-with-kto/) which contains intructions for training your own HALOs and links to our model cards.
+  overrides:
+    parameters:
+      model: archangel_sft_pythia2-8b.Q4_K_M.gguf
+  files:
+    - filename: archangel_sft_pythia2-8b.Q4_K_M.gguf
+      sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8
+      uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf
 - &qwen2
   ## Start QWEN2
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
@@ -220,6 +330,36 @@
     - filename: Qwen2-Wukong-7B-Q4_K_M.gguf
       sha256: 6b8ca6649c33fc84d4892ebcff1214f0b34697aced784f0d6d32e284a15943ad
       uri: huggingface://bartowski/Qwen2-Wukong-7B-GGUF/Qwen2-Wukong-7B-Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "calme-2.8-qwen2-7b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-2.8-qwen2-7b/resolve/main/qwen2-fine-tunes-maziyar-panahi.webp
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-2.8-qwen2-7b
+    - https://huggingface.co/MaziyarPanahi/calme-2.8-qwen2-7b-GGUF
+  description: |
+    This is a fine-tuned version of the Qwen/Qwen2-7B model. It aims to improve the base model across all benchmarks.
+  overrides:
+    parameters:
+      model: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
+  files:
+    - filename: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
+      sha256: 8c1b3efe9fa6ae1b37942ef26473cb4e0aed0f8038b60d4b61e5bffb61e49b7e
+      uri: huggingface://MaziyarPanahi/calme-2.8-qwen2-7b-GGUF/Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "stellardong-72b-i1"
+  icon: https://huggingface.co/smelborp/StellarDong-72b/resolve/main/stellardong.png
+  urls:
+    - https://huggingface.co/smelborp/StellarDong-72b
+    - https://huggingface.co/mradermacher/StellarDong-72b-i1-GGUF
+  description: |
+    Magnum + Nova = you won't believe how stellar this dong is!!
+  overrides:
+    parameters:
+      model: StellarDong-72b.i1-Q4_K_M.gguf
+  files:
+    - filename: StellarDong-72b.i1-Q4_K_M.gguf
+      sha256: 4c5012f0a034f40a044904891343ade2594f29c28a8a9d8052916de4dc5a61df
+      uri: huggingface://mradermacher/StellarDong-72b-i1-GGUF/StellarDong-72b.i1-Q4_K_M.gguf
 - &mistral03
   ## START Mistral
   url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
@@ -294,12 +434,7 @@
     - gpu
     - mistral
     - cpu
-  description: |
-    🔬 Einstein-v4-7B
-
-    This model is a full fine-tuned version of mistralai/Mistral-7B-v0.1 on diverse datasets.
-
-    This model is finetuned using 7xRTX3090 + 1xRTXA6000 using axolotl.
+  description: "\U0001F52C Einstein-v4-7B\n\nThis model is a full fine-tuned version of mistralai/Mistral-7B-v0.1 on diverse datasets.\n\nThis model is finetuned using 7xRTX3090 + 1xRTXA6000 using axolotl.\n"
   overrides:
     parameters:
       model: Einstein-v4-7B.Q4_K_M.gguf
@@ -707,6 +842,21 @@
     - filename: EMO-2B.Q4_K_M.gguf
       sha256: 608bffc0e9012bc7f9a94b714f4932e2826cc122dbac59b586e4baa2ee0fdca5
       uri: huggingface://RichardErkhov/OEvortex_-_EMO-2B-gguf/EMO-2B.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "gemmoy-9b-g2-mk.3-i1"
+  icon: https://huggingface.co/Hastagaras/G2-Gemmoy-9B-MK.3-RP/resolve/main/gemmoy.jpg
+  urls:
+    - https://huggingface.co/Hastagaras/Gemmoy-9B-G2-MK.3
+    - https://huggingface.co/mradermacher/Gemmoy-9B-G2-MK.3-i1-GGUF
+  description: |
+    The Gemmoy-9B-G2-MK.3 model is a large language model trained on a variety of datasets, including grimulkan/LimaRP-augmented, LDJnr/Capybara, TheSkullery/C2logs_Filtered_Sharegpt_Merged, abacusai/SystemChat-1.1, and Hastagaras/FTTS-Stories-Sharegpt.
+  overrides:
+    parameters:
+      model: Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
+  files:
+    - filename: Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
+      sha256: 0d1004a246fbda7f1408a6841129b73c4100e697bd0a6806fc698eabbb0802a1
+      uri: huggingface://mradermacher/Gemmoy-9B-G2-MK.3-i1-GGUF/Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
 - &llama3
   url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
   icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
@@ -915,6 +1065,36 @@
     - filename: llama-3-stheno-mahou-8b-q4_k_m.gguf
       sha256: a485cd74ef4ff3671c67ed8e10ea5379a1f24082ac688bd303fd28dfc9808c11
       uri: huggingface://mudler/llama-3-Stheno-Mahou-8B-Q4_K_M-GGUF/llama-3-stheno-mahou-8b-q4_k_m.gguf
+- !!merge <<: *llama3
+  name: "l3-8b-stheno-horny-v3.3-32k-q5_k_m"
+  urls:
+    - https://huggingface.co/nothingiisreal/L3-8B-Stheno-Horny-v3.3-32K
+    - https://huggingface.co/Kurgan1138/L3-8B-Stheno-Horny-v3.3-32K-Q5_K_M-GGUF
+  description: |
+    This was an experiment to see if aligning other models via LORA is possible. Yes it is. We aligned it to be always horny.
+
+    We took V3.3 Stheno weights from here
+
+    And applied our lora at Alpha = 768
+
+    Thank you to Sao10K for the amazing model.
+
+    This is not legal advice. I don't put any extra licensing on my own lora.
+
+    LLaMA 3 license may conflict with Creative Commons Attribution Non Commercial 4.0.
+
+    LLaMA 3 license can be found here
+
+    If you want to host a model using our lora, you have our permission, but you might consider getting Sao's permission if you want to host their model.
+
+    Again, not legal advice.
+  overrides:
+    parameters:
+      model: l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf
+  files:
+    - filename: l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf
+      sha256: 8d934f80ca6dbaa4852846108da92446a26715fbd5f6fc3859568850edf05262
+      uri: huggingface://Kurgan1138/L3-8B-Stheno-Horny-v3.3-32K-Q5_K_M-GGUF/l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf
 - !!merge <<: *llama3
   name: "llama-3-8b-openhermes-dpo"
   urls:
@@ -2966,7 +3146,6 @@
     - filename: ArliAI-Llama-3-8B-Dolfin-v0.5.Q4_K_M.gguf
       sha256: 71fef02915c606b438ccff2cae6b7760bbb54a558d5f2d39c2421d97b6682fea
       uri: huggingface://QuantFactory/ArliAI-Llama-3-8B-Dolfin-v0.5-GGUF/ArliAI-Llama-3-8B-Dolfin-v0.5.Q4_K_M.gguf
-
 - !!merge <<: *llama3
   name: "llama-3-ezo-8b-common-it"
   icon: https://huggingface.co/HODACHI/Llama-3-EZO-8b-Common-it
@@ -2974,11 +3153,11 @@
     - https://huggingface.co/HODACHI/Llama-3-EZO-8b-Common-it
     - https://huggingface.co/MCZK/Llama-3-EZO-8b-Common-it-GGUF
   description: |
-      Based on meta-llama/Meta-Llama-3-8B-Instruct, it has been enhanced for Japanese usage through additional pre-training and instruction tuning. (Built with Meta Llama3)
+    Based on meta-llama/Meta-Llama-3-8B-Instruct, it has been enhanced for Japanese usage through additional pre-training and instruction tuning. (Built with Meta Llama3)
 
-      This model is based on Llama-3-8B-Instruct and is subject to the Llama-3 Terms of Use. For detailed information, please refer to the official Llama-3 license page.
+    This model is based on Llama-3-8B-Instruct and is subject to the Llama-3 Terms of Use. For detailed information, please refer to the official Llama-3 license page.
 
-      このモデルはLlama-3-8B-Instructをベースにしており、Llama-3の利用規約に従います。詳細については、Llama-3の公式ライセンスページをご参照ください。
+    このモデルはLlama-3-8B-Instructをベースにしており、Llama-3の利用規約に従います。詳細については、Llama-3の公式ライセンスページをご参照ください。
   overrides:
     parameters:
       model: Llama-3-EZO-8b-Common-it.Q4_K_M.iMatrix.gguf
@@ -3107,7 +3286,6 @@
     - filename: L3-15B-MythicalMaid-t0.0001.Q4_K_M.gguf
       sha256: ecbd57783006f1a027f8a7f5a5d551dc8b3568912825f566d79fd34a804e8970
       uri: huggingface://mradermacher/L3-15B-MythicalMaid-t0.0001-GGUF/L3-15B-MythicalMaid-t0.0001.Q4_K_M.gguf
-
 - !!merge <<: *llama3
   name: "l3-15b-etherealmaid-t0.0001-i1"
   icon: https://cdn-uploads.huggingface.co/production/uploads/64f74b6e6389380c77562762/FwYXt2h_FdmlL0Z6qYufz.png
@@ -3146,6 +3324,89 @@
     - filename: L3-8B-Celeste-v1-Q4_K_M.gguf
       sha256: ed5277719965fb6bbcce7d16742e3bac4a8d5b8f52133261a3402a480cd65317
       uri: huggingface://bartowski/L3-8B-Celeste-v1-GGUF/L3-8B-Celeste-v1-Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "l3-8b-celeste-v1.2"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/630cf5d14ca0a22768bbe10c/Zv__LDTO-nHvpuxPcCgUU.webp
+  urls:
+    - https://huggingface.co/mudler/L3-8B-Celeste-V1.2-Q4_K_M-GGUF
+  description: |
+    Trained on LLaMA 3 8B Instruct at 8K context using Reddit Writing Prompts, Opus 15K Instruct an c2 logs cleaned.
+
+    This is a roleplay model any instruction following capabilities outside roleplay contexts are coincidental.
+  overrides:
+    parameters:
+      model: l3-8b-celeste-v1.2-q4_k_m.gguf
+  files:
+    - filename: l3-8b-celeste-v1.2-q4_k_m.gguf
+      sha256: 7752204c0e9f627ff5726eb69bb6114974cafbc934a993ad019abfba62002783
+      uri: huggingface://mudler/L3-8B-Celeste-V1.2-Q4_K_M-GGUF/l3-8b-celeste-v1.2-q4_k_m.gguf
+- !!merge <<: *llama3
+  name: "llama-3-tulu-2-8b-i1"
+  icon: https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu-v2/Tulu%20V2%20banner.png
+  urls:
+    - https://huggingface.co/allenai/llama-3-tulu-2-8b
+    - https://huggingface.co/mradermacher/llama-3-tulu-2-8b-i1-GGUF
+  description: |
+    Tulu is a series of language models that are trained to act as helpful assistants. Llama 3 Tulu V2 8B is a fine-tuned version of Llama 3 that was trained on a mix of publicly available, synthetic and human datasets.
+  overrides:
+    parameters:
+      model: llama-3-tulu-2-8b.i1-Q4_K_M.gguf
+  files:
+    - filename: llama-3-tulu-2-8b.i1-Q4_K_M.gguf
+      sha256: f859c22bfa64f461e9ffd973dc7ad6a78bb98b1dda6f49abfa416a4022b7e333
+      uri: huggingface://mradermacher/llama-3-tulu-2-8b-i1-GGUF/llama-3-tulu-2-8b.i1-Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "llama-3-tulu-2-dpo-70b-i1"
+  icon: https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu-v2/Tulu%20V2%20banner.png
+  urls:
+    - https://huggingface.co/allenai/llama-3-tulu-2-dpo-70b
+    - https://huggingface.co/mradermacher/llama-3-tulu-2-dpo-70b-i1-GGUF
+  description: |
+    Tulu is a series of language models that are trained to act as helpful assistants. Llama 3 Tulu V2 8B is a fine-tuned version of Llama 3 that was trained on a mix of publicly available, synthetic and human datasets.
+  overrides:
+    parameters:
+      model: llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
+  files:
+    - filename: llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
+      sha256: fc309bbdf1e2bdced954c4c8dc1f9a885c547017ee5e750bfde645af89e3d3a5
+      uri: huggingface://mradermacher/llama-3-tulu-2-dpo-70b-i1-GGUF/llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
+- !!merge <<: *llama3
+  license: cc-by-nc-4.0
+  name: "suzume-llama-3-8b-multilingual-orpo-borda-top25"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64b63f8ad57e02621dc93c8b/kWQSu02YfgYdUQqv4s5lq.png
+  urls:
+    - https://huggingface.co/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25
+    - https://huggingface.co/RichardErkhov/lightblue_-_suzume-llama-3-8B-multilingual-orpo-borda-top25-gguf
+  description: |
+    This is Suzume ORPO, an ORPO trained fine-tune of the lightblue/suzume-llama-3-8B-multilingual model using our lightblue/mitsu dataset.
+
+    We have trained several versions of this model using ORPO and so recommend that you use the best performing model from our tests, lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half.
+
+    Note that this model has a non-commerical license as we used the Command R and Command R+ models to generate our training data for this model (lightblue/mitsu).
+
+    We are currently working on a developing a commerically usable model, so stay tuned for that!
+  overrides:
+    parameters:
+      model: suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
+  files:
+    - filename: suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
+      sha256: ef75a02c5f38e14a8873c7989188dac6974851b4654279fe1921d2c8018cc388
+      uri: huggingface://RichardErkhov/lightblue_-_suzume-llama-3-8B-multilingual-orpo-borda-top25-gguf/suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "calme-2.4-llama3-70b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-2.4-llama3-70b/resolve/main/llama-3-merges.webp
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-2.4-llama3-70b
+    - https://huggingface.co/mradermacher/calme-2.4-llama3-70b-GGUF
+  description: |
+    This model is a fine-tune (DPO) of meta-llama/Meta-Llama-3-70B-Instruct model.
+  overrides:
+    parameters:
+      model: calme-2.4-llama3-70b.Q4_K_M.gguf
+  files:
+    - filename: calme-2.4-llama3-70b.Q4_K_M.gguf
+      sha256: 0b44ac8a88395dfc60f1b9d3cfffc0ffef74ec0a302e610ef91fc787187568f2
+      uri: huggingface://mradermacher/calme-2.4-llama3-70b-GGUF/calme-2.4-llama3-70b.Q4_K_M.gguf
 - &command-R
   ### START Command-r
   url: "github:mudler/LocalAI/gallery/command-r.yaml@master"
@@ -3388,8 +3649,8 @@
       model: Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
   files:
     - filename: Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
-      sha256: 39458b227a4be763b7eb39d306d240c3d45205e3f8b474ec7bdca7bba0158e69
       uri: huggingface://bartowski/Phi-3.1-mini-4k-instruct-GGUF/Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
+      sha256: d6d25bf078321bea4a079c727b273cb0b5a2e0b4cf3add0f7a2c8e43075c414f
 - !!merge <<: *phi-3
   name: "phillama-3.8b-v0.1"
   icon: https://cdn-uploads.huggingface.co/production/uploads/657eb5b256c9c67605a6e8b5/f96pPiJQb3puzbPYNknG2.png
@@ -3405,7 +3666,23 @@
     - filename: phillama-3.8b-v0.1.Q4_K_M.gguf
       sha256: da537d352b7aae54bbad0d2cff3e3a1b0e1dc1e1d25bec3aae1d05cf4faee7a2
       uri: huggingface://RichardErkhov/raincandy-u_-_phillama-3.8b-v0.1-gguf/phillama-3.8b-v0.1.Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "calme-2.3-phi3-4b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-2.1-phi3-4b/resolve/main/phi-3-instruct.webp
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-2.3-phi3-4b
+    - https://huggingface.co/MaziyarPanahi/calme-2.3-phi3-4b-GGUF
+  description: |
+    MaziyarPanahi/calme-2.1-phi3-4b
 
+    This model is a fine-tune (DPO) of microsoft/Phi-3-mini-4k-instruct model.
+  overrides:
+    parameters:
+      model: Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
+  files:
+    - filename: Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
+      sha256: 3a23e1052369c080afb925882bd814cbea5ec859894655a7434c3d49e43a6127
+      uri: huggingface://MaziyarPanahi/calme-2.3-phi3-4b-GGUF/Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
 - &hermes-2-pro-mistral
   ### START Hermes
   url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
diff --git a/gallery/llama3.1-instruct.yaml b/gallery/llama3.1-instruct.yaml
new file mode 100644
index 00000000..66c9ce97
--- /dev/null
+++ b/gallery/llama3.1-instruct.yaml
@@ -0,0 +1,62 @@
+---
+name: "llama3-instruct"
+
+config_file: |
+  mmap: true
+  function:
+    disable_no_action: true
+    grammar:
+      disable: true
+    response_regex:
+    - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
+  template:
+    chat_message: |
+      <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content -}}
+      {{ else if .FunctionCall -}}
+      {{ toJson .FunctionCall -}}
+      {{ end -}}
+      <|eot_id|>
+    function: |
+      <|start_header_id|>system<|end_header_id|>
+
+      You have access to the following functions:
+
+      {{range .Functions}}
+      Use the function '{{.Name}}' to '{{.Description}}'
+      {{toJson .Parameters}}
+      {{end}}
+
+      Think very carefully before calling functions.
+      If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
+
+      <function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function>
+
+      Reminder:
+      - If looking for real time information use relevant functions before falling back to searching on internet
+      - Function calls MUST follow the specified format, start with <function= and end with </function>
+      - Required parameters MUST be specified
+      - Only call one function at a time
+      - Put the entire function call reply on one line
+      <|eot_id|>
+      {{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+    chat: |
+      <|begin_of_text|>{{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+    completion: |
+      {{.Input}}
+  context_size: 8192
+  f16: true
+  stopwords:
+  - <|im_end|>
+  - <dummy32000>
+  - "<|eot_id|>"
+  - <|end_of_text|>
diff --git a/gallery/tuluv2.yaml b/gallery/tuluv2.yaml
new file mode 100644
index 00000000..ca2785a2
--- /dev/null
+++ b/gallery/tuluv2.yaml
@@ -0,0 +1,43 @@
+---
+name: "tuluv2"
+
+config_file: |
+  mmap: true
+  template:
+    chat_message: |
+      <|{{ .RoleName }}|>
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content }}
+      {{ end -}}
+      {{ if .FunctionCall -}}
+      {{toJson .FunctionCall}}
+      {{ end -}}
+    function: |
+      <|{{ .RoleName }}|>
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content }}
+      {{ end -}}
+      {{ if .FunctionCall -}}
+      {{toJson .FunctionCall}}
+      {{ end -}}
+    chat: |
+      {{.Input -}}
+      <|assistant|>
+    completion: |
+      {{.Input}}
+  context_size: 4096
+  f16: true
+  stopwords:
+  - '<|im_end|>'
+  - '<dummy32000>'
+  - '<|endoftext|>'
diff --git a/pkg/functions/function_structure.go b/pkg/functions/function_structure.go
new file mode 100644
index 00000000..c4337d67
--- /dev/null
+++ b/pkg/functions/function_structure.go
@@ -0,0 +1,43 @@
+package functions
+
+import (
+	"encoding/json"
+
+	"github.com/mudler/LocalAI/pkg/functions/grammars"
+)
+
+type Item struct {
+	Type       string                 `json:"type"`
+	Properties map[string]interface{} `json:"properties"`
+}
+
+type JSONFunctionStructure struct {
+	OneOf []Item                 `json:"oneOf,omitempty"`
+	AnyOf []Item                 `json:"anyOf,omitempty"`
+	Defs  map[string]interface{} `json:"$defs,omitempty"`
+}
+
+func (j JSONFunctionStructure) Grammar(options ...func(*grammars.GrammarOption)) (string, error) {
+	grammarOpts := &grammars.GrammarOption{}
+	grammarOpts.Apply(options...)
+
+	dat, err := json.Marshal(j)
+	if err != nil {
+		return "", err
+	}
+
+	converter := NewSchemaConverter(*grammarOpts)
+	return converter.GrammarFromBytes(dat, options...)
+}
+
+type SchemaConverter interface {
+	GrammarFromBytes([]byte, ...func(*grammars.GrammarOption)) (string, error)
+}
+
+func NewSchemaConverter(opt grammars.GrammarOption) SchemaConverter {
+	switch {
+	case opt.SchemaType == grammars.LLama31Schema:
+		return grammars.NewLLama31SchemaConverter(opt.FunctionName)
+	}
+	return grammars.NewJSONSchemaConverter(opt.PropOrder)
+}
diff --git a/pkg/functions/functions.go b/pkg/functions/functions.go
index 49e9fc93..19012d53 100644
--- a/pkg/functions/functions.go
+++ b/pkg/functions/functions.go
@@ -18,6 +18,15 @@ type Function struct {
 }
 type Functions []Function
 
+type FunctionName struct {
+	Const string `json:"const"`
+}
+
+type Argument struct {
+	Type       string                 `json:"type"`
+	Properties map[string]interface{} `json:"properties"`
+}
+
 type Tool struct {
 	Type     string   `json:"type"`
 	Function Function `json:"function,omitempty"`
diff --git a/pkg/functions/functions_suite_test.go b/pkg/functions/functions_suite_test.go
index 8964b1c8..ab743609 100644
--- a/pkg/functions/functions_suite_test.go
+++ b/pkg/functions/functions_suite_test.go
@@ -1,4 +1,4 @@
-package functions
+package functions_test
 
 import (
 	"testing"
@@ -7,7 +7,7 @@ import (
 	. "github.com/onsi/gomega"
 )
 
-func TestGrammar(t *testing.T) {
+func TestFunctions(t *testing.T) {
 	RegisterFailHandler(Fail)
-	RunSpecs(t, "Grammar test suite")
+	RunSpecs(t, "Functions test suite")
 }
diff --git a/pkg/functions/grammar_json_schema.go b/pkg/functions/grammar_json_schema.go
deleted file mode 100644
index 7356d01d..00000000
--- a/pkg/functions/grammar_json_schema.go
+++ /dev/null
@@ -1,378 +0,0 @@
-package functions
-
-// a golang port of https://github.com/ggerganov/llama.cpp/pull/1887
-
-import (
-	"encoding/json"
-	"fmt"
-	"regexp"
-	"sort"
-	"strings"
-
-	"github.com/mudler/LocalAI/pkg/utils"
-)
-
-const (
-	JSONBNF = `root   ::= object
-value  ::= object | array | string | number | ("true" | "false" | "null") ws
-
-object ::=
-  "{" ws (
-            string ":" ws value
-    ("," ws string ":" ws value)*
-  )? "}" ws
-
-array  ::=
-  "[" ws (
-            value
-    ("," ws value)*
-  )? "]" ws
-
-string ::=
-  "\"" (
-    [^"\\] |
-    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
-  )* "\"" ws
-
-number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
-
-ws ::= ([ \t\n] ws)?`
-)
-
-var (
-	SPACE_RULE = `" "?`
-
-	PRIMITIVE_RULES = map[string]string{
-		"boolean": `("true" | "false") space`,
-		"number":  `("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space`,
-		"integer": `("-"? ([0-9] | [1-9] [0-9]*)) space`,
-		"string": `"\"" (
-			[^"\\] |
-			"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
-		  )* "\"" space`,
-		// TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here,
-		// however, if we don't have it, the grammar will be ambiguous and
-		// empirically results are way worse.
-		"freestring": `(
-			[^\x00] |
-			"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
-		  )* space`,
-		"null": `"null" space`,
-	}
-
-	INVALID_RULE_CHARS_RE     = regexp.MustCompile(`[^a-zA-Z0-9-]+`)
-	GRAMMAR_LITERAL_ESCAPE_RE = regexp.MustCompile(`[\r\n"]`)
-	GRAMMAR_LITERAL_ESCAPES   = map[string]string{
-		"\r": `\r`,
-		"\n": `\n`,
-		`"`:  `\"`,
-	}
-)
-
-type JSONSchemaConverter struct {
-	propOrder map[string]int
-	rules     map[string]string
-}
-
-func NewJSONSchemaConverter(propOrder string) *JSONSchemaConverter {
-	propOrderSlice := strings.Split(propOrder, ",")
-	propOrderMap := make(map[string]int)
-	for idx, name := range propOrderSlice {
-		propOrderMap[name] = idx
-	}
-
-	rules := make(map[string]string)
-	rules["space"] = SPACE_RULE
-
-	return &JSONSchemaConverter{
-		propOrder: propOrderMap,
-		rules:     rules,
-	}
-}
-
-func (sc *JSONSchemaConverter) formatLiteral(literal interface{}) string {
-	escaped := GRAMMAR_LITERAL_ESCAPE_RE.ReplaceAllStringFunc(jsonString(literal), func(match string) string {
-		return GRAMMAR_LITERAL_ESCAPES[match]
-	})
-	return fmt.Sprintf(`"%s"`, escaped)
-}
-
-func (sc *JSONSchemaConverter) addRule(name, rule string) string {
-	escName := INVALID_RULE_CHARS_RE.ReplaceAllString(name, "-")
-	key := escName
-	if existingRule, ok := sc.rules[escName]; ok && existingRule != rule {
-		i := 0
-		for {
-			key = fmt.Sprintf("%s%d", escName, i)
-			if _, ok := sc.rules[key]; !ok {
-				break
-			}
-			i++
-		}
-	}
-	sc.rules[key] = rule
-	return key
-}
-
-const arrayNewLines = `arr  ::=
-  "[\n"  (
-		realvalue
-    (",\n"  realvalue)*
-  )? "]"`
-
-const array = `arr  ::=
-  "["  (
-		realvalue
-    (","  realvalue)*
-  )? "]"`
-
-func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption)) string {
-
-	grammarOpts := &GrammarOption{}
-	grammarOpts.Apply(options...)
-
-	prefix := grammarOpts.Prefix
-	maybeArray := grammarOpts.MaybeArray
-	disableParallelNewLines := grammarOpts.DisableParallelNewLines
-	maybeString := grammarOpts.MaybeString
-	noMixedFreeString := grammarOpts.NoMixedFreeString
-
-	var lines []string
-
-	swapRoot := maybeArray || maybeString || prefix != ""
-
-	// write down the computed rules.
-	// if maybeArray is true, we need to add the array rule and slightly tweak the root rule
-	for name, rule := range sc.rules {
-		if swapRoot && name == "root" {
-			name = "realvalue"
-		}
-		lines = append(lines, fmt.Sprintf("%s ::= %s", name, rule))
-	}
-
-	if !swapRoot {
-		return strings.Join(lines, "\n")
-	}
-
-	newRoot := "realvalue"
-	if maybeArray {
-		newRoot = "arr | realvalue"
-	}
-
-	freestringRule := "mixedstring"
-	if noMixedFreeString {
-		freestringRule = "freestring"
-	}
-
-	if prefix != "" {
-		// quote newlines in suffix
-		prefix = utils.EscapeNewLines(prefix)
-
-		if maybeArray && maybeString {
-			newRoot = "(" + newRoot + ")"
-		}
-
-		if maybeString {
-			//newRoot = "( (\"" + suffix + "\" " + newRoot + ") | freestring ) "
-			newRoot = "( \"" + prefix + "\" " + newRoot + " | " + freestringRule + " ) "
-		} else {
-			newRoot = "\"" + prefix + "\" " + "" + newRoot + ""
-		}
-	} else if maybeString {
-		if maybeArray {
-			//	newRoot = "(" + newRoot + ")"
-		}
-
-		newRoot = freestringRule + " | " + newRoot
-	}
-
-	lines = append(lines, fmt.Sprintf("%s ::= %s", "root", newRoot))
-	if disableParallelNewLines {
-		lines = append(lines, array)
-	} else {
-		lines = append(lines, arrayNewLines)
-	}
-
-	if maybeArray {
-		if grammarOpts.ExpectStringsAfterJSON {
-			lines = append(lines, `mixedstring ::= freestring | freestring arr freestring | (freestring realvalue freestring)* | realvalue | arr`)
-		} else {
-			lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`)
-		}
-	} else {
-		if grammarOpts.ExpectStringsAfterJSON {
-			lines = append(lines, `mixedstring ::= freestring | (freestring realvalue freestring)* | realvalue`)
-		} else {
-			lines = append(lines, `mixedstring ::= freestring | freestring realvalue | realvalue`)
-		}
-	}
-
-	return strings.Join(lines, "\n")
-}
-
-func (sc *JSONSchemaConverter) visit(schema map[string]interface{}, name string, rootSchema map[string]interface{}) string {
-	st, existType := schema["type"]
-	var schemaType string
-	if existType {
-		schemaType = st.(string)
-	}
-	ruleName := name
-	if name == "" {
-		ruleName = "root"
-	}
-	_, oneOfExists := schema["oneOf"]
-	_, anyOfExists := schema["anyOf"]
-	if oneOfExists || anyOfExists {
-		var alternatives []string
-		oneOfSchemas, oneOfExists := schema["oneOf"].([]interface{})
-		anyOfSchemas, anyOfExists := schema["anyOf"].([]interface{})
-
-		if oneOfExists {
-			for i, altSchema := range oneOfSchemas {
-				alternative := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
-				alternatives = append(alternatives, alternative)
-			}
-		} else if anyOfExists {
-			for i, altSchema := range anyOfSchemas {
-				alternative := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
-				alternatives = append(alternatives, alternative)
-			}
-		}
-
-		rule := strings.Join(alternatives, " | ")
-		return sc.addRule(ruleName, rule)
-	} else if ref, exists := schema["$ref"].(string); exists {
-		referencedSchema := sc.resolveReference(ref, rootSchema)
-		return sc.visit(referencedSchema, name, rootSchema)
-	} else if constVal, exists := schema["const"]; exists {
-		return sc.addRule(ruleName, sc.formatLiteral(constVal))
-	} else if enumVals, exists := schema["enum"].([]interface{}); exists {
-		var enumRules []string
-		for _, enumVal := range enumVals {
-			enumRule := sc.formatLiteral(enumVal)
-			enumRules = append(enumRules, enumRule)
-		}
-		rule := strings.Join(enumRules, " | ")
-		return sc.addRule(ruleName, rule)
-	} else if properties, exists := schema["properties"].(map[string]interface{}); schemaType == "object" && exists {
-		propOrder := sc.propOrder
-		var propPairs []struct {
-			propName   string
-			propSchema map[string]interface{}
-		}
-
-		for propName, propSchema := range properties {
-			propPairs = append(propPairs, struct {
-				propName   string
-				propSchema map[string]interface{}
-			}{propName: propName, propSchema: propSchema.(map[string]interface{})})
-		}
-
-		sort.Slice(propPairs, func(i, j int) bool {
-			iOrder := propOrder[propPairs[i].propName]
-			jOrder := propOrder[propPairs[j].propName]
-			if iOrder != 0 && jOrder != 0 {
-				return iOrder < jOrder
-			}
-			return propPairs[i].propName < propPairs[j].propName
-		})
-
-		var rule strings.Builder
-		rule.WriteString(`"{" space`)
-
-		for i, propPair := range propPairs {
-			propName := propPair.propName
-			propSchema := propPair.propSchema
-			propRuleName := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
-
-			if i > 0 {
-				rule.WriteString(` "," space`)
-			}
-
-			rule.WriteString(fmt.Sprintf(` %s space ":" space %s`, sc.formatLiteral(propName), propRuleName))
-		}
-
-		rule.WriteString(` "}" space`)
-		return sc.addRule(ruleName, rule.String())
-	} else if items, exists := schema["items"].(map[string]interface{}); schemaType == "array" && exists {
-		itemRuleName := sc.visit(items, fmt.Sprintf("%s-item", ruleName), rootSchema)
-		rule := fmt.Sprintf(`"[" space (%s ("," space %s)*)? "]" space`, itemRuleName, itemRuleName)
-		return sc.addRule(ruleName, rule)
-	} else {
-		primitiveRule, exists := PRIMITIVE_RULES[schemaType]
-		if !exists {
-			panic(fmt.Sprintf("Unrecognized schema: %v", schema))
-		}
-		if ruleName == "root" {
-			schemaType = "root"
-		}
-		return sc.addRule(schemaType, primitiveRule)
-	}
-}
-func (sc *JSONSchemaConverter) resolveReference(ref string, rootSchema map[string]interface{}) map[string]interface{} {
-	if !strings.HasPrefix(ref, "#/$defs/") {
-		panic(fmt.Sprintf("Invalid reference format: %s", ref))
-	}
-
-	defKey := strings.TrimPrefix(ref, "#/$defs/")
-	definitions, exists := rootSchema["$defs"].(map[string]interface{})
-	if !exists {
-		fmt.Println(rootSchema)
-
-		panic("No definitions found in the schema")
-	}
-
-	def, exists := definitions[defKey].(map[string]interface{})
-	if !exists {
-		fmt.Println(definitions)
-
-		panic(fmt.Sprintf("Definition not found: %s", defKey))
-	}
-
-	return def
-}
-
-func (sc *JSONSchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) string {
-	sc.addRule("freestring", PRIMITIVE_RULES["freestring"])
-	sc.visit(schema, "", schema)
-	return sc.finalizeGrammar(options...)
-}
-
-func (sc *JSONSchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) string {
-	var schema map[string]interface{}
-	_ = json.Unmarshal(b, &schema)
-	return sc.Grammar(schema, options...)
-}
-
-func jsonString(v interface{}) string {
-	b, _ := json.Marshal(v)
-	return string(b)
-}
-
-type FunctionName struct {
-	Const string `json:"const"`
-}
-
-type Argument struct {
-	Type       string                 `json:"type"`
-	Properties map[string]interface{} `json:"properties"`
-}
-
-type Item struct {
-	Type       string                 `json:"type"`
-	Properties map[string]interface{} `json:"properties"`
-}
-
-type JSONFunctionStructure struct {
-	OneOf []Item                 `json:"oneOf,omitempty"`
-	AnyOf []Item                 `json:"anyOf,omitempty"`
-	Defs  map[string]interface{} `json:"$defs,omitempty"`
-}
-
-func (j JSONFunctionStructure) Grammar(options ...func(*GrammarOption)) string {
-	grammarOpts := &GrammarOption{}
-	grammarOpts.Apply(options...)
-
-	dat, _ := json.Marshal(j)
-	return NewJSONSchemaConverter(grammarOpts.PropOrder).GrammarFromBytes(dat, options...)
-}
diff --git a/pkg/functions/grammars/bnf_rules.go b/pkg/functions/grammars/bnf_rules.go
new file mode 100644
index 00000000..469e187a
--- /dev/null
+++ b/pkg/functions/grammars/bnf_rules.go
@@ -0,0 +1,58 @@
+package grammars
+
+import (
+	"encoding/json"
+	"regexp"
+)
+
+var (
+	PRIMITIVE_RULES = map[string]string{
+		"boolean": `("true" | "false") space`,
+		"number":  `("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space`,
+		"integer": `("-"? ([0-9] | [1-9] [0-9]*)) space`,
+		"string": `"\"" (
+			[^"\\] |
+			"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+		  )* "\"" space`,
+		// TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here,
+		// however, if we don't have it, the grammar will be ambiguous and
+		// empirically results are way worse.
+		"freestring": `(
+			[^\x00] |
+			"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+		  )* space`,
+		"null": `"null" space`,
+	}
+
+	INVALID_RULE_CHARS_RE     = regexp.MustCompile(`[^a-zA-Z0-9-]+`)
+	GRAMMAR_LITERAL_ESCAPE_RE = regexp.MustCompile(`[\r\n"]`)
+	GRAMMAR_LITERAL_ESCAPES   = map[string]string{
+		"\r": `\r`,
+		"\n": `\n`,
+		`"`:  `\"`,
+	}
+)
+
+const (
+	SPACE_RULE = `" "?`
+
+	arrayNewLines = `arr  ::=
+  "[\n"  (
+		realvalue
+    (",\n"  realvalue)*
+  )? "]"`
+
+	array = `arr  ::=
+  "["  (
+		realvalue
+    (","  realvalue)*
+  )? "]"`
+)
+
+func jsonString(v interface{}) (string, error) {
+	b, err := json.Marshal(v)
+	if err != nil {
+		return "", err
+	}
+	return string(b), nil
+}
diff --git a/pkg/functions/grammars/grammars_suite_test.go b/pkg/functions/grammars/grammars_suite_test.go
new file mode 100644
index 00000000..5ac02bc1
--- /dev/null
+++ b/pkg/functions/grammars/grammars_suite_test.go
@@ -0,0 +1,25 @@
+package grammars_test
+
+import (
+	"testing"
+
+	. "github.com/mudler/LocalAI/pkg/functions"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestGrammar(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "Grammar test suite")
+}
+
+func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} {
+	property := map[string]interface{}{}
+	property[field1] = FunctionName{Const: name}
+	property[field2] = Argument{
+		Type:       "object",
+		Properties: properties,
+	}
+	return property
+}
diff --git a/pkg/functions/grammars/json_schema.go b/pkg/functions/grammars/json_schema.go
new file mode 100644
index 00000000..df4ca6a1
--- /dev/null
+++ b/pkg/functions/grammars/json_schema.go
@@ -0,0 +1,220 @@
+package grammars
+
+// a golang port of https://github.com/ggerganov/llama.cpp/pull/1887
+
+import (
+	"encoding/json"
+	"fmt"
+	"sort"
+	"strings"
+)
+
+type JSONSchemaConverter struct {
+	propOrder map[string]int
+	rules     Rules
+}
+
+func NewJSONSchemaConverter(propOrder string) *JSONSchemaConverter {
+	propOrderSlice := strings.Split(propOrder, ",")
+	propOrderMap := make(map[string]int)
+	for idx, name := range propOrderSlice {
+		propOrderMap[name] = idx
+	}
+
+	rules := make(map[string]string)
+	rules["space"] = SPACE_RULE
+
+	return &JSONSchemaConverter{
+		propOrder: propOrderMap,
+		rules:     rules,
+	}
+}
+
+func (sc *JSONSchemaConverter) formatLiteral(literal interface{}) (string, error) {
+	jLiteral, err := jsonString(literal)
+	if err != nil {
+		return "", err
+	}
+	escaped := GRAMMAR_LITERAL_ESCAPE_RE.ReplaceAllStringFunc(jLiteral, func(match string) string {
+		return GRAMMAR_LITERAL_ESCAPES[match]
+	})
+	return fmt.Sprintf(`"%s"`, escaped), nil
+}
+
+func (sc *JSONSchemaConverter) addRule(name, rule string) string {
+	escName := INVALID_RULE_CHARS_RE.ReplaceAllString(name, "-")
+	key := escName
+	if existingRule, ok := sc.rules[escName]; ok && existingRule != rule {
+		i := 0
+		for {
+			key = fmt.Sprintf("%s%d", escName, i)
+			if _, ok := sc.rules[key]; !ok {
+				break
+			}
+			i++
+		}
+	}
+	sc.rules[key] = rule
+	return key
+}
+
+func (sc *JSONSchemaConverter) visit(schema map[string]interface{}, name string, rootSchema map[string]interface{}) (string, error) {
+	st, existType := schema["type"]
+	var schemaType string
+	if existType {
+		schemaType = st.(string)
+	}
+	ruleName := name
+	if name == "" {
+		ruleName = "root"
+	}
+	_, oneOfExists := schema["oneOf"]
+	_, anyOfExists := schema["anyOf"]
+	if oneOfExists || anyOfExists {
+		var alternatives []string
+		oneOfSchemas, oneOfExists := schema["oneOf"].([]interface{})
+		anyOfSchemas, anyOfExists := schema["anyOf"].([]interface{})
+
+		if oneOfExists {
+			for i, altSchema := range oneOfSchemas {
+				alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
+				if err != nil {
+					return "", err
+				}
+				alternatives = append(alternatives, alternative)
+			}
+		} else if anyOfExists {
+			for i, altSchema := range anyOfSchemas {
+				alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
+				if err != nil {
+					return "", err
+				}
+				alternatives = append(alternatives, alternative)
+			}
+		}
+
+		rule := strings.Join(alternatives, " | ")
+		return sc.addRule(ruleName, rule), nil
+	} else if ref, exists := schema["$ref"].(string); exists {
+		referencedSchema, err := sc.resolveReference(ref, rootSchema)
+		if err != nil {
+			return "", err
+		}
+		return sc.visit(referencedSchema, name, rootSchema)
+	} else if constVal, exists := schema["const"]; exists {
+		literal, err := sc.formatLiteral((constVal))
+		if err != nil {
+			return "", err
+		}
+		return sc.addRule(ruleName, literal), nil
+	} else if enumVals, exists := schema["enum"].([]interface{}); exists {
+		var enumRules []string
+		for _, enumVal := range enumVals {
+			enumRule, err := sc.formatLiteral(enumVal)
+			if err != nil {
+				return "", err
+			}
+			enumRules = append(enumRules, enumRule)
+		}
+		rule := strings.Join(enumRules, " | ")
+		return sc.addRule(ruleName, rule), nil
+	} else if properties, exists := schema["properties"].(map[string]interface{}); schemaType == "object" && exists {
+		propOrder := sc.propOrder
+		var propPairs []struct {
+			propName   string
+			propSchema map[string]interface{}
+		}
+
+		for propName, propSchema := range properties {
+			propPairs = append(propPairs, struct {
+				propName   string
+				propSchema map[string]interface{}
+			}{propName: propName, propSchema: propSchema.(map[string]interface{})})
+		}
+
+		sort.Slice(propPairs, func(i, j int) bool {
+			iOrder := propOrder[propPairs[i].propName]
+			jOrder := propOrder[propPairs[j].propName]
+			if iOrder != 0 && jOrder != 0 {
+				return iOrder < jOrder
+			}
+			return propPairs[i].propName < propPairs[j].propName
+		})
+
+		var rule strings.Builder
+		rule.WriteString(`"{" space`)
+
+		for i, propPair := range propPairs {
+			propName := propPair.propName
+			propSchema := propPair.propSchema
+			propRuleName, err := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
+			if err != nil {
+				return "", err
+			}
+			lPropName, err := sc.formatLiteral(propName)
+			if err != nil {
+				return "", err
+			}
+			if i > 0 {
+				rule.WriteString(` "," space`)
+			}
+
+			rule.WriteString(fmt.Sprintf(` %s space ":" space %s`, lPropName, propRuleName))
+		}
+
+		rule.WriteString(` "}" space`)
+		return sc.addRule(ruleName, rule.String()), nil
+	} else if items, exists := schema["items"].(map[string]interface{}); schemaType == "array" && exists {
+		itemRuleName, err := sc.visit(items, fmt.Sprintf("%s-item", ruleName), rootSchema)
+		if err != nil {
+			return "", err
+		}
+		rule := fmt.Sprintf(`"[" space (%s ("," space %s)*)? "]" space`, itemRuleName, itemRuleName)
+		return sc.addRule(ruleName, rule), nil
+	} else {
+		primitiveRule, exists := PRIMITIVE_RULES[schemaType]
+		if !exists {
+			return "", fmt.Errorf("unrecognized schema: %v", schema)
+		}
+		if ruleName == "root" {
+			schemaType = "root"
+		}
+		return sc.addRule(schemaType, primitiveRule), nil
+	}
+}
+func (sc *JSONSchemaConverter) resolveReference(ref string, rootSchema map[string]interface{}) (map[string]interface{}, error) {
+	if !strings.HasPrefix(ref, "#/$defs/") {
+		return nil, fmt.Errorf("invalid reference format: %s", ref)
+	}
+
+	defKey := strings.TrimPrefix(ref, "#/$defs/")
+	definitions, exists := rootSchema["$defs"].(map[string]interface{})
+	if !exists {
+		return nil, fmt.Errorf("no definitions found in the schema: %s", rootSchema)
+	}
+
+	def, exists := definitions[defKey].(map[string]interface{})
+	if !exists {
+		return nil, fmt.Errorf("definition not found: %s %+v", defKey, definitions)
+	}
+
+	return def, nil
+}
+
+func (sc *JSONSchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) (string, error) {
+	sc.addRule("freestring", PRIMITIVE_RULES["freestring"])
+	_, err := sc.visit(schema, "", schema)
+	if err != nil {
+		return "", err
+	}
+	return sc.rules.ToGrammar(options...), nil
+}
+
+func (sc *JSONSchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) (string, error) {
+	var schema map[string]interface{}
+	err := json.Unmarshal(b, &schema)
+	if err != nil {
+		return "", err
+	}
+	return sc.Grammar(schema, options...)
+}
diff --git a/pkg/functions/grammar_json_schema_test.go b/pkg/functions/grammars/json_schema_test.go
similarity index 89%
rename from pkg/functions/grammar_json_schema_test.go
rename to pkg/functions/grammars/json_schema_test.go
index bf52bd8d..5fc4a602 100644
--- a/pkg/functions/grammar_json_schema_test.go
+++ b/pkg/functions/grammars/json_schema_test.go
@@ -1,24 +1,14 @@
-package functions_test
+package grammars_test
 
 import (
 	"strings"
 
-	"github.com/mudler/LocalAI/pkg/functions"
 	. "github.com/mudler/LocalAI/pkg/functions"
+	. "github.com/mudler/LocalAI/pkg/functions/grammars"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
 
-func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} {
-	property := map[string]interface{}{}
-	property[field1] = FunctionName{Const: name}
-	property[field2] = Argument{
-		Type:       "object",
-		Properties: properties,
-	}
-	return property
-}
-
 var testFunctions = []Item{
 	{
 		Type: "object",
@@ -245,7 +235,8 @@ root-1-name ::= "\"search\""`
 var _ = Describe("JSON schema grammar tests", func() {
 	Context("JSON", func() {
 		It("generates a valid grammar from JSON schema", func() {
-			grammar := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput1))
+			grammar, err := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput1))
+			Expect(err).To(BeNil())
 			results := strings.Split(inputResult1, "\n")
 			for _, r := range results {
 				if r != "" {
@@ -255,7 +246,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 			Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))))
 		})
 		It("generates a valid grammar from JSON schema", func() {
-			grammar := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput2))
+			grammar, err := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput2))
+			Expect(err).To(BeNil())
 			results := strings.Split(inputResult3, "\n")
 			for _, r := range results {
 				if r != "" {
@@ -269,7 +261,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctions}
 
-			grammar := structuredGrammar.Grammar()
+			grammar, err := structuredGrammar.Grammar()
+			Expect(err).To(BeNil())
 			results := strings.Split(inputResult1, "\n")
 			for _, r := range results {
 				if r != "" {
@@ -283,7 +276,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctions}
 
-			grammar := structuredGrammar.Grammar(functions.EnableMaybeArray)
+			grammar, err := structuredGrammar.Grammar(EnableMaybeArray)
+			Expect(err).To(BeNil())
 			results := strings.Split(
 				strings.Join([]string{
 					inputResult2,
@@ -301,7 +295,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar(functions.EnableMaybeArray)
+			grammar, err := structuredGrammar.Grammar(EnableMaybeArray)
+			Expect(err).To(BeNil())
 			results := strings.Split(
 				strings.Join([]string{
 					inputResult4,
@@ -319,10 +314,11 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar(
-				functions.SetPrefix("suffix"),
-				functions.EnableMaybeArray,
+			grammar, err := structuredGrammar.Grammar(
+				SetPrefix("suffix"),
+				EnableMaybeArray,
 			)
+			Expect(err).To(BeNil())
 			results := strings.Split(
 				strings.Join([]string{
 					rootResult(`"suffix" arr | realvalue`),
@@ -339,7 +335,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"))
+			grammar, err := structuredGrammar.Grammar(SetPrefix("suffix"))
+			Expect(err).To(BeNil())
 			results := strings.Split(
 				strings.Join([]string{
 					rootResult(`"suffix" realvalue`),
@@ -356,7 +353,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"), functions.EnableMaybeString)
+			grammar, err := structuredGrammar.Grammar(SetPrefix("suffix"), EnableMaybeString)
+			Expect(err).To(BeNil())
 			results := strings.Split(
 				strings.Join([]string{
 					rootResult(`( "suffix" realvalue | mixedstring )`),
@@ -373,7 +371,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"), functions.EnableMaybeString, functions.EnableMaybeArray)
+			grammar, err := structuredGrammar.Grammar(SetPrefix("suffix"), EnableMaybeString, EnableMaybeArray)
+			Expect(err).To(BeNil())
 			results := strings.Split(
 				strings.Join([]string{
 					rootResult(`( "suffix" (arr | realvalue) | mixedstring )`),
@@ -392,7 +391,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray)
+			grammar, err := structuredGrammar.Grammar(EnableMaybeString, EnableMaybeArray)
+			Expect(err).To(BeNil())
 			results := strings.Split(
 				strings.Join([]string{
 					rootResult(`mixedstring | arr | realvalue`),
@@ -410,7 +410,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray, functions.NoMixedFreeString)
+			grammar, err := structuredGrammar.Grammar(EnableMaybeString, EnableMaybeArray, NoMixedFreeString)
+			Expect(err).To(BeNil())
 			results := strings.Split(
 				strings.Join([]string{
 					rootResult(`freestring | arr | realvalue`),
@@ -432,7 +433,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 realvalue
 (","  realvalue)*
 )? "]"`
-			grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray, functions.DisableParallelNewLines)
+			grammar, err := structuredGrammar.Grammar(EnableMaybeString, EnableMaybeArray, DisableParallelNewLines)
+			Expect(err).To(BeNil())
 			results := strings.Split(content, "\n")
 			for _, r := range results {
 				if r != "" {
diff --git a/pkg/functions/grammars/llama31_schema.go b/pkg/functions/grammars/llama31_schema.go
new file mode 100644
index 00000000..04b74aa5
--- /dev/null
+++ b/pkg/functions/grammars/llama31_schema.go
@@ -0,0 +1,281 @@
+package grammars
+
+import (
+	"encoding/json"
+	"fmt"
+	"regexp"
+	"sort"
+	"strings"
+)
+
+type LLama31SchemaConverter struct {
+	fnName string
+	rules  Rules
+}
+
+func NewLLama31SchemaConverter(fnName string) *LLama31SchemaConverter {
+	rules := make(map[string]string)
+	rules["space"] = SPACE_RULE
+	if fnName == "" {
+		fnName = "name"
+	}
+
+	return &LLama31SchemaConverter{
+		rules:  rules,
+		fnName: fnName,
+	}
+}
+
+var GRAMMAR_LITERAL_ESCAPESLlama = map[string]string{
+	"\r": `\r`,
+	"\n": `\n`,
+}
+
+var GRAMMAR_LITERAL_ESCAPE_RELlama = regexp.MustCompile(`[\r\n]`)
+
+func (sc *LLama31SchemaConverter) formatLiteral(literal interface{}) (string, error) {
+	jLiteral, err := jsonString(literal)
+	if err != nil {
+		return "", err
+	}
+	escaped := GRAMMAR_LITERAL_ESCAPE_RELlama.ReplaceAllStringFunc(jLiteral, func(match string) string {
+		return GRAMMAR_LITERAL_ESCAPESLlama[match]
+	})
+	return escaped, nil
+}
+
+func (sc *LLama31SchemaConverter) formatLiteralQuoted(literal interface{}) (string, error) {
+	jLiteral, err := jsonString(literal)
+	if err != nil {
+		return "", err
+	}
+	escaped := GRAMMAR_LITERAL_ESCAPE_RE.ReplaceAllStringFunc(jLiteral, func(match string) string {
+		return GRAMMAR_LITERAL_ESCAPES[match]
+	})
+	return fmt.Sprintf(`"%s"`, escaped), nil
+}
+
+func (sc *LLama31SchemaConverter) addRule(name, rule string) string {
+	escName := INVALID_RULE_CHARS_RE.ReplaceAllString(name, "-")
+	key := escName
+	if existingRule, ok := sc.rules[escName]; ok && existingRule != rule {
+		i := 0
+		for {
+			key = fmt.Sprintf("%s%d", escName, i)
+			if _, ok := sc.rules[key]; !ok {
+				break
+			}
+			i++
+		}
+	}
+	sc.rules[key] = rule
+	return key
+}
+
+func (sc *LLama31SchemaConverter) visit(schema map[string]interface{}, name string, rootSchema map[string]interface{}) (string, error) {
+	st, existType := schema["type"]
+	var schemaType string
+	if existType {
+		schemaType = st.(string)
+	}
+	ruleName := name
+	if name == "" {
+		ruleName = "root"
+	}
+	_, oneOfExists := schema["oneOf"]
+	_, anyOfExists := schema["anyOf"]
+	if oneOfExists || anyOfExists {
+		var alternatives []string
+		oneOfSchemas, oneOfExists := schema["oneOf"].([]interface{})
+		anyOfSchemas, anyOfExists := schema["anyOf"].([]interface{})
+
+		if oneOfExists {
+			for i, altSchema := range oneOfSchemas {
+				alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
+				if err != nil {
+					return "", err
+				}
+				alternatives = append(alternatives, alternative)
+			}
+		} else if anyOfExists {
+			for i, altSchema := range anyOfSchemas {
+				alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
+				if err != nil {
+					return "", err
+				}
+				alternatives = append(alternatives, alternative)
+			}
+		}
+
+		rule := strings.Join(alternatives, " | ")
+		return sc.addRule(ruleName, rule), nil
+	} else if ref, exists := schema["$ref"].(string); exists {
+		referencedSchema, err := sc.resolveReference(ref, rootSchema)
+		if err != nil {
+			return "", err
+		}
+		return sc.visit(referencedSchema, name, rootSchema)
+	} else if constVal, exists := schema["const"]; exists {
+
+		literal, err := sc.formatLiteral((constVal))
+		if err != nil {
+			return "", err
+		}
+		return sc.addRule(ruleName, literal), nil
+	} else if enumVals, exists := schema["enum"].([]interface{}); exists {
+		var enumRules []string
+		for _, enumVal := range enumVals {
+			enumRule, err := sc.formatLiteralQuoted(enumVal)
+			if err != nil {
+				return "", err
+			}
+			enumRules = append(enumRules, enumRule)
+		}
+		rule := strings.Join(enumRules, " | ")
+		return sc.addRule(ruleName, rule), nil
+	} else if properties, exists := schema["properties"].(map[string]interface{}); schemaType == "object" && exists {
+		baseProperty := false
+		depth := strings.Split(name, "-")
+		if len(depth) == 2 {
+			baseProperty = true
+		}
+		type propData []struct {
+			propName   string
+			propSchema map[string]interface{}
+		}
+		var propPairs propData
+
+		for propName, propSchema := range properties {
+			propPairs = append(propPairs, struct {
+				propName   string
+				propSchema map[string]interface{}
+			}{propName: propName, propSchema: propSchema.(map[string]interface{})})
+		}
+
+		sort.Slice(propPairs, func(i, j int) bool {
+			return propPairs[i].propName < propPairs[j].propName
+		})
+
+		var rule strings.Builder
+		if baseProperty {
+			rule.WriteString(`"<function="`)
+		} else {
+			rule.WriteString(`"{" space`)
+		}
+
+		if baseProperty {
+
+			namePair := propData{}
+			for i, propPair := range propPairs {
+				propName := propPair.propName
+				if propName == sc.fnName {
+					namePair = append(namePair, propPair)
+					// remove namePair from propPairs
+					propPairs = append(propPairs[:i], propPairs[i+1:]...)
+					break
+				}
+			}
+			if len(namePair) == 0 {
+				return "", fmt.Errorf("no function name found in the schema: %s", schema)
+			}
+
+			propRuleName, err := sc.visit(namePair[0].propSchema, fmt.Sprintf("%s-%s", ruleName, sc.fnName), rootSchema)
+			if err != nil {
+				return "", err
+			}
+
+			rule.WriteString(fmt.Sprintf(` %s ">{" `, propRuleName))
+
+			for _, propPair := range propPairs {
+				propName := propPair.propName
+				propSchema := propPair.propSchema
+				propRuleName, err := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
+				if err != nil {
+					return "", err
+				}
+
+				rule.WriteString(propRuleName)
+			}
+
+			rule.WriteString(` "}</function>"`)
+
+		} else {
+			for i, propPair := range propPairs {
+				propName := propPair.propName
+				propSchema := propPair.propSchema
+				propRuleName, err := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
+				if err != nil {
+					return "", err
+				}
+				lPropName, err := sc.formatLiteralQuoted(propName)
+				if err != nil {
+					return "", err
+				}
+				if i > 0 {
+					rule.WriteString(` "," space`)
+				}
+
+				rule.WriteString(fmt.Sprintf(` %s space ":" space %s`, lPropName, propRuleName))
+			}
+
+		}
+
+		if !baseProperty {
+			rule.WriteString(` "}" space`)
+		}
+
+		return sc.addRule(ruleName, rule.String()), nil
+	} else if items, exists := schema["items"].(map[string]interface{}); schemaType == "array" && exists {
+		itemRuleName, err := sc.visit(items, fmt.Sprintf("%s-item", ruleName), rootSchema)
+		if err != nil {
+			return "", err
+		}
+		rule := fmt.Sprintf(`"[" space (%s ("," space %s)*)? "]" space`, itemRuleName, itemRuleName)
+		return sc.addRule(ruleName, rule), nil
+	} else {
+		primitiveRule, exists := PRIMITIVE_RULES[schemaType]
+		if !exists {
+			return "", fmt.Errorf("unrecognized schema: %v", schema)
+		}
+		if ruleName == "root" {
+			schemaType = "root"
+		}
+		return sc.addRule(schemaType, primitiveRule), nil
+	}
+}
+func (sc *LLama31SchemaConverter) resolveReference(ref string, rootSchema map[string]interface{}) (map[string]interface{}, error) {
+	if !strings.HasPrefix(ref, "#/$defs/") {
+		return nil, fmt.Errorf("invalid reference format: %s", ref)
+	}
+
+	defKey := strings.TrimPrefix(ref, "#/$defs/")
+	definitions, exists := rootSchema["$defs"].(map[string]interface{})
+	if !exists {
+		return nil, fmt.Errorf("no definitions found in the schema: %s", rootSchema)
+	}
+
+	def, exists := definitions[defKey].(map[string]interface{})
+	if !exists {
+		return nil, fmt.Errorf("definition not found: %s %+v", defKey, definitions)
+	}
+
+	return def, nil
+}
+
+func (sc *LLama31SchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) (string, error) {
+	sc.addRule("freestring", PRIMITIVE_RULES["freestring"])
+	_, err := sc.visit(schema, "", schema)
+	if err != nil {
+		return "", err
+	}
+	return sc.rules.ToGrammar(options...), nil
+}
+
+func (sc *LLama31SchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) (string, error) {
+	var schema map[string]interface{}
+	err := json.Unmarshal(b, &schema)
+	if err != nil {
+		return "", err
+	}
+	return sc.Grammar(schema, options...)
+}
diff --git a/pkg/functions/grammars/llama31_schema_test.go b/pkg/functions/grammars/llama31_schema_test.go
new file mode 100644
index 00000000..84d09bd5
--- /dev/null
+++ b/pkg/functions/grammars/llama31_schema_test.go
@@ -0,0 +1,76 @@
+package grammars_test
+
+import (
+	"strings"
+
+	. "github.com/mudler/LocalAI/pkg/functions/grammars"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+const (
+	testllama31Input1 = `
+	{
+		"oneOf": [
+			{
+				"type": "object",
+				"properties": {
+					"function": {"const": "create_event"},
+					"arguments": {
+						"type": "object",
+						"properties": {
+							"title": {"type": "string"},
+							"date": {"type": "string"},
+							"time": {"type": "string"}
+						}
+					}
+				}
+			},
+			{
+				"type": "object",
+				"properties": {
+					"function": {"const": "search"},
+					"arguments": {
+						"type": "object",
+						"properties": {
+							"query": {"type": "string"}
+						}
+					}
+				}
+			}
+		]
+	}`
+	// <function=example_function_name>{{"example_name": "example_value"}}</function>
+	testllama31inputResult1 = `root-0-function ::= "create_event"
+freestring ::= (
+		[^"\\] |
+		"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+  )* space
+root-0 ::= "<function=" root-0-function ">{" root-0-arguments "}</function>"
+root-1-arguments ::= "{" space "\"query\"" space ":" space string "}" space
+root ::= root-0 | root-1
+space ::= " "?
+root-0-arguments ::= "{" space "\"date\"" space ":" space string "," space "\"time\"" space ":" space string "," space "\"title\"" space ":" space string "}" space
+root-1 ::= "<function=" root-1-function ">{" root-1-arguments "}</function>"
+string ::= "\"" (
+	[^"\\] |
+	"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+)* "\"" space
+root-1-function ::= "search"`
+)
+
+var _ = Describe("JSON schema grammar tests", func() {
+	Context("JSON", func() {
+		It("generates a valid grammar from JSON schema", func() {
+			grammar, err := NewLLama31SchemaConverter("function").GrammarFromBytes([]byte(testllama31Input1))
+			Expect(err).ToNot(HaveOccurred())
+			results := strings.Split(testllama31inputResult1, "\n")
+			for _, r := range results {
+				if r != "" {
+					Expect(grammar).To(ContainSubstring(r))
+				}
+			}
+			Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))))
+		})
+	})
+})
diff --git a/pkg/functions/options.go b/pkg/functions/grammars/options.go
similarity index 76%
rename from pkg/functions/options.go
rename to pkg/functions/grammars/options.go
index 3a341a43..07c6c951 100644
--- a/pkg/functions/options.go
+++ b/pkg/functions/grammars/options.go
@@ -1,4 +1,4 @@
-package functions
+package grammars
 
 type GrammarOption struct {
 	PropOrder               string
@@ -8,6 +8,9 @@ type GrammarOption struct {
 	MaybeString             bool
 	NoMixedFreeString       bool
 	ExpectStringsAfterJSON  bool
+
+	FunctionName string
+	SchemaType   SchemaConverterType
 }
 
 func (o *GrammarOption) Apply(options ...func(*GrammarOption)) {
@@ -48,3 +51,15 @@ func SetPropOrder(order string) func(*GrammarOption) {
 		o.PropOrder = order
 	}
 }
+
+func WithSchemaType(schemaType SchemaConverterType) func(*GrammarOption) {
+	return func(o *GrammarOption) {
+		o.SchemaType = schemaType
+	}
+}
+
+func WithFunctionName(name string) func(*GrammarOption) {
+	return func(o *GrammarOption) {
+		o.FunctionName = name
+	}
+}
diff --git a/pkg/functions/grammars/rules.go b/pkg/functions/grammars/rules.go
new file mode 100644
index 00000000..84fc8a25
--- /dev/null
+++ b/pkg/functions/grammars/rules.go
@@ -0,0 +1,93 @@
+package grammars
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/mudler/LocalAI/pkg/utils"
+)
+
+type Rules map[string]string
+
+func (rules Rules) ToGrammar(options ...func(*GrammarOption)) string {
+	grammarOpts := &GrammarOption{}
+	grammarOpts.Apply(options...)
+
+	prefix := grammarOpts.Prefix
+	maybeArray := grammarOpts.MaybeArray
+	disableParallelNewLines := grammarOpts.DisableParallelNewLines
+	maybeString := grammarOpts.MaybeString
+	noMixedFreeString := grammarOpts.NoMixedFreeString
+
+	var lines []string
+
+	swapRoot := maybeArray || maybeString || prefix != ""
+
+	// write down the computed rules.
+	// if maybeArray is true, we need to add the array rule and slightly tweak the root rule
+	for name, rule := range rules {
+		if swapRoot && name == "root" {
+			name = "realvalue"
+		}
+		lines = append(lines, fmt.Sprintf("%s ::= %s", name, rule))
+	}
+
+	if !swapRoot {
+		return strings.Join(lines, "\n")
+	}
+
+	newRoot := "realvalue"
+	if maybeArray {
+		newRoot = "arr | realvalue"
+	}
+
+	freestringRule := "mixedstring"
+	if noMixedFreeString {
+		freestringRule = "freestring"
+	}
+
+	if prefix != "" {
+		// quote newlines in suffix
+		prefix = utils.EscapeNewLines(prefix)
+
+		if maybeArray && maybeString {
+			newRoot = "(" + newRoot + ")"
+		}
+
+		if maybeString {
+			//newRoot = "( (\"" + suffix + "\" " + newRoot + ") | freestring ) "
+			newRoot = "( \"" + prefix + "\" " + newRoot + " | " + freestringRule + " ) "
+		} else {
+			newRoot = "\"" + prefix + "\" " + "" + newRoot + ""
+		}
+	} else if maybeString {
+		if maybeArray {
+			//	newRoot = "(" + newRoot + ")"
+		}
+
+		newRoot = freestringRule + " | " + newRoot
+	}
+
+	lines = append(lines, fmt.Sprintf("%s ::= %s", "root", newRoot))
+	if disableParallelNewLines {
+		lines = append(lines, array)
+	} else {
+		lines = append(lines, arrayNewLines)
+	}
+
+	if maybeArray {
+		if grammarOpts.ExpectStringsAfterJSON {
+			lines = append(lines, `mixedstring ::= freestring | freestring arr freestring | (freestring realvalue freestring)* | realvalue | arr`)
+		} else {
+			lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`)
+		}
+	} else {
+		if grammarOpts.ExpectStringsAfterJSON {
+			lines = append(lines, `mixedstring ::= freestring | (freestring realvalue freestring)* | realvalue`)
+		} else {
+			lines = append(lines, `mixedstring ::= freestring | freestring realvalue | realvalue`)
+		}
+	}
+
+	return strings.Join(lines, "\n")
+}
diff --git a/pkg/functions/grammars/types.go b/pkg/functions/grammars/types.go
new file mode 100644
index 00000000..1fe6444a
--- /dev/null
+++ b/pkg/functions/grammars/types.go
@@ -0,0 +1,33 @@
+package grammars
+
+type SchemaConverterType int
+
+const (
+	JSONSchema SchemaConverterType = iota
+	LLama31Schema
+)
+
+const (
+	LlamaType string = "llama3.1"
+	JSONType  string = "json"
+)
+
+func (s SchemaConverterType) String() string {
+	switch s {
+	case JSONSchema:
+		return JSONType
+	case LLama31Schema:
+		return LlamaType
+	}
+	return "unknown"
+}
+
+func NewType(t string) SchemaConverterType {
+	switch t {
+	case JSONType:
+		return JSONSchema
+	case LlamaType:
+		return LLama31Schema
+	}
+	return JSONSchema
+}
diff --git a/pkg/functions/json_mode.go b/pkg/functions/json_mode.go
new file mode 100644
index 00000000..46361b74
--- /dev/null
+++ b/pkg/functions/json_mode.go
@@ -0,0 +1,28 @@
+package functions
+
+const (
+	JSONBNF = `root   ::= object
+value  ::= object | array | string | number | ("true" | "false" | "null") ws
+
+object ::=
+  "{" ws (
+            string ":" ws value
+    ("," ws string ":" ws value)*
+  )? "}" ws
+
+array  ::=
+  "[" ws (
+            value
+    ("," ws value)*
+  )? "]" ws
+
+string ::=
+  "\"" (
+    [^"\\] |
+    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
+  )* "\"" ws
+
+number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
+
+ws ::= ([ \t\n] ws)?`
+)
diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go
index 8e848a60..f5593690 100644
--- a/pkg/functions/parse.go
+++ b/pkg/functions/parse.go
@@ -7,6 +7,7 @@ import (
 	"regexp"
 	"strings"
 
+	"github.com/mudler/LocalAI/pkg/functions/grammars"
 	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 )
@@ -22,7 +23,9 @@ type GrammarConfig struct {
 	MixedMode bool `yaml:"mixed_mode"`
 
 	// NoMixedFreeString disables the mixed mode for free strings
-	// In this way if the LLM selects a free string, it won't be mixed necessarly with JSON objects
+	// In this way if the LLM selects a free string, it won't be mixed necessarly with JSON objects.
+	// For example, if enabled the LLM or returns a JSON object or a free string, but not a mix of both
+	// If disabled(default): the LLM can return a JSON object surrounded by free strings (e.g. `this is the JSON result: { "bar": "baz" } for your question`). This forces the LLM to return at least a JSON object, but its not going to be strict
 	NoMixedFreeString bool `yaml:"no_mixed_free_string"`
 
 	// NoGrammar disables the grammar parsing and parses the responses directly from the LLM
@@ -39,6 +42,10 @@ type GrammarConfig struct {
 	// for instance name,arguments will make print { "name": "foo", "arguments": { "bar": "baz" } }
 	// instead of { "arguments": { "bar": "baz" }, "name": "foo" }
 	PropOrder string `yaml:"properties_order"`
+
+	// SchemaType can be configured to use a specific schema type to force the grammar
+	// available : json, llama3.1
+	SchemaType string `yaml:"schema_type"`
 }
 
 // FunctionsConfig is the configuration for the tool/function call.
@@ -92,28 +99,36 @@ type FuncCallResults struct {
 	Arguments string
 }
 
-func (g GrammarConfig) Options() []func(o *GrammarOption) {
-	opts := []func(o *GrammarOption){}
-	if g.MixedMode {
-		opts = append(opts, EnableMaybeString)
+func (g FunctionsConfig) GrammarOptions() []func(o *grammars.GrammarOption) {
+	opts := []func(o *grammars.GrammarOption){}
+	if g.GrammarConfig.MixedMode {
+		opts = append(opts, grammars.EnableMaybeString)
 	}
-	if g.ParallelCalls {
-		opts = append(opts, EnableMaybeArray)
+	if g.GrammarConfig.ParallelCalls {
+		opts = append(opts, grammars.EnableMaybeArray)
 	}
-	if g.DisableParallelNewLines {
-		opts = append(opts, DisableParallelNewLines)
+	if g.GrammarConfig.DisableParallelNewLines {
+		opts = append(opts, grammars.DisableParallelNewLines)
 	}
-	if g.Prefix != "" {
-		opts = append(opts, SetPrefix(g.Prefix))
+	if g.GrammarConfig.Prefix != "" {
+		opts = append(opts, grammars.SetPrefix(g.GrammarConfig.Prefix))
 	}
-	if g.NoMixedFreeString {
-		opts = append(opts, NoMixedFreeString)
+	if g.GrammarConfig.NoMixedFreeString {
+		opts = append(opts, grammars.NoMixedFreeString)
 	}
-	if g.ExpectStringsAfterJSON {
-		opts = append(opts, ExpectStringsAfterJSON)
+	if g.GrammarConfig.ExpectStringsAfterJSON {
+		opts = append(opts, grammars.ExpectStringsAfterJSON)
 	}
 
-	opts = append(opts, SetPropOrder(g.PropOrder))
+	if g.GrammarConfig.SchemaType != "" {
+		opts = append(opts, grammars.WithSchemaType(grammars.NewType(g.GrammarConfig.SchemaType)))
+	}
+
+	if g.FunctionNameKey != "" {
+		opts = append(opts, grammars.WithFunctionName(g.FunctionNameKey))
+	}
+
+	opts = append(opts, grammars.SetPropOrder(g.GrammarConfig.PropOrder))
 	return opts
 }
 
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index 901b4d99..88a08f28 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -212,7 +212,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
 					grpcProcess = p
 					foundCUDA = true
 				} else {
-					log.Info().Msgf("GPU device found but no CUDA backend present")
+					log.Debug().Msgf("Nvidia GPU device found, no embedded CUDA variant found. You can ignore this message if you are using container with CUDA support")
 				}
 			}
 			if strings.Contains(gpu.String(), "amd") {
@@ -222,7 +222,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
 					grpcProcess = p
 					foundAMDGPU = true
 				} else {
-					log.Info().Msgf("GPU device found but no HIPBLAS backend present")
+					log.Debug().Msgf("AMD GPU device found, no embedded HIPBLAS variant found. You can ignore this message if you are using container with HIPBLAS support")
 				}
 			}
 			if strings.Contains(gpu.String(), "intel") {
@@ -236,7 +236,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
 					grpcProcess = p
 					foundIntelGPU = true
 				} else {
-					log.Info().Msgf("GPU device found but no Intel backend present")
+					log.Debug().Msgf("Intel GPU device found, no embedded SYCL variant found. You can ignore this message if you are using container with SYCL support")
 				}
 			}
 		}