Merge branch 'master' into default_miro

2025-06-27 13:15:00 +00:00 · 2024-08-20 19:10:51 -04:00 · 2024-08-20 19:10:51 -04:00 · 3eb1c1c689
commit 3eb1c1c689
parent a29b44cb56 7cf59d9f98
197 changed files with 4171 additions and 1305 deletions
--- a/.devcontainer-scripts/postcreate.sh
+++ b/.devcontainer-scripts/postcreate.sh
@ -0,0 +1,17 @@
 #!/bin/bash
 cd /workspace
 # Get the files into the volume without a bind mount
 if [ ! -d ".git" ]; then
    git clone https://github.com/mudler/LocalAI.git .
 else
    git fetch
 fi
 echo "Standard Post-Create script completed."
 if [ -f "/devcontainer-customization/postcreate.sh" ]; then
    echo "Launching customization postcreate.sh"
    bash "/devcontainer-customization/postcreate.sh"
 fi
--- a/.devcontainer-scripts/poststart.sh
+++ b/.devcontainer-scripts/poststart.sh
@ -0,0 +1,16 @@
 #!/bin/bash
 cd /workspace
 # Grab the pre-stashed backend assets to avoid build issues
 cp -r /build/backend-assets /workspace/backend-assets
 # Ensures generated source files are present upon load
 make prepare
 echo "Standard Post-Start script completed."
 if [ -f "/devcontainer-customization/poststart.sh" ]; then
    echo "Launching customization poststart.sh"
    bash "/devcontainer-customization/poststart.sh"
 fi
--- a/.devcontainer-scripts/utils.sh
+++ b/.devcontainer-scripts/utils.sh
@ -0,0 +1,49 @@
 #!/bin/bash
 # This file contains some really simple functions that are useful when building up customization scripts.
 # Checks if the git config has a user registered - and sets it up if not.
 #
 # Param 1: name
 # Param 2: email
 #
 config_user() {
    local gcn=$(git config --global user.name)
    if [ -z "${gcn}" ]; then
        echo "Setting up git user / remote"
        git config --global user.name "$1"
        git config --global user.email "$2"
    fi
 }
 # Checks if the git remote is configured - and sets it up if not. Fetches either way.
 #
 # Param 1: remote name
 # Param 2: remote url
 #
 config_remote() {
    local gr=$(git remote -v | grep $1)
    if [ -z "${gr}" ]; then
        git remote add $1 $2
    fi
    git fetch $1
 }
 # Setup special .ssh files
 #
 # Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh
 setup_ssh() {
    local files=("$@")
    for file in "${files[@]}"; then
        local cfile="/devcontainer-customization/${file}"
        local hfile="~/.ssh/${file}"
        if [ ! -f "${hfile}" ]; then
            echo "copying ${file}"
            cp "${cfile}" "${hfile}"
            chmod 600 "${hfile}"
        fi
    done
    ls ~/.ssh
 }
--- a/.devcontainer/customization/README.md
+++ b/.devcontainer/customization/README.md
@ -0,0 +1,25 @@
 Place any additional resources your environment requires in this directory
 Script hooks are currently called for:
 `postcreate.sh` and `poststart.sh`
 If files with those names exist here, they will be called at the end of the normal script.
 This is a good place to set things like `git config --global user.name` are set - and to handle any other files that are mounted via this directory.
 To assist in doing so, `source /.devcontainer-scripts/utils.sh` will provide utility functions that may be useful - for example:
 ```
 #!/bin/bash
 source "/.devcontainer-scripts/utils.sh"
 sshfiles=("config", "key.pub")
 setup_ssh "${sshfiles[@]}"
 config_user "YOUR NAME" "YOUR EMAIL"
 config_remote "REMOTE NAME" "REMOTE URL"
 ```
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@ -0,0 +1,24 @@
 {
    "$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json",
    "name": "LocalAI",
    "workspaceFolder": "/workspace",
    "dockerComposeFile": [ "./docker-compose-devcontainer.yml" ],
    "service": "api",
    "shutdownAction": "stopCompose",
    "customizations": {
        "vscode": {
            "extensions": [
                "golang.go",
                "ms-vscode.makefile-tools",
                "ms-azuretools.vscode-docker",
                "ms-python.python",
                "ms-python.debugpy",
                "wayou.vscode-todo-highlight",
                "waderyan.gitblame"
            ]
        }
    },
    "forwardPorts": [8080, 3000],
    "postCreateCommand": "bash /.devcontainer-scripts/postcreate.sh",
    "postStartCommand": "bash /.devcontainer-scripts/poststart.sh"
 }
--- a/.devcontainer/docker-compose-devcontainer.yml
+++ b/.devcontainer/docker-compose-devcontainer.yml
@ -0,0 +1,48 @@
 services:
  api:
    build:
      context: ..
      dockerfile: Dockerfile
      target: devcontainer
      args:
      - FFMPEG=true
      - IMAGE_TYPE=extras
      - GO_TAGS=stablediffusion p2p tts
    env_file:
      - ../.env
    ports:
      - 8080:8080
    volumes:
      - localai_workspace:/workspace
      - ../models:/host-models
      - ./customization:/devcontainer-customization
    command: /bin/sh -c "while sleep 1000; do :; done"
    cap_add:
      - SYS_PTRACE
    security_opt:
      - seccomp:unconfined
  prometheus:
    image: prom/prometheus
    container_name: prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
    ports:
      - 9090:9090
    restart: unless-stopped
    volumes:
      - ./prometheus:/etc/prometheus
      - prom_data:/prometheus
  grafana:
    image: grafana/grafana
    container_name: grafana
    ports:
      - 3000:3000
    restart: unless-stopped
    environment:
      - GF_SECURITY_ADMIN_USER=admin
      - GF_SECURITY_ADMIN_PASSWORD=grafana
    volumes:
      - ./grafana:/etc/grafana/provisioning/datasources
 volumes:
  prom_data:
  localai_workspace:
--- a/.devcontainer/grafana/datasource.yml
+++ b/.devcontainer/grafana/datasource.yml
@ -0,0 +1,10 @@
 apiVersion: 1
 datasources:
 - name: Prometheus
  type: prometheus
  url: http://prometheus:9090 
  isDefault: true
  access: proxy
  editable: true
--- a/.devcontainer/prometheus/prometheus.yml
+++ b/.devcontainer/prometheus/prometheus.yml
@ -0,0 +1,21 @@
 global:
  scrape_interval: 15s
  scrape_timeout: 10s
  evaluation_interval: 15s
 alerting:
  alertmanagers:
  - static_configs:
    - targets: []
    scheme: http
    timeout: 10s
    api_version: v1
 scrape_configs:
 - job_name: prometheus
  honor_timestamps: true
  scrape_interval: 15s
  scrape_timeout: 10s
  metrics_path: /metrics
  scheme: http
  static_configs:
  - targets:
    - localhost:9090
--- a/.dockerignore
+++ b/.dockerignore
@ -1,6 +1,7 @@
 .idea
 .github
 .vscode
 .devcontainer
 models
 examples/chatbot-ui/models
 examples/rwkv/models
--- a/.env
+++ b/.env
@ -79,6 +79,9 @@
 ### Enable to run parallel requests
 # LOCALAI_PARALLEL_REQUESTS=true
 # Enable to allow p2p mode
 # LOCALAI_P2P=true
 ### Watchdog settings
 ###
 # Enables watchdog to kill backends that are inactive for too much time
--- a/.github/bump_deps.sh
+++ b/.github/bump_deps.sh
@ -6,4 +6,17 @@ VAR=$3
 LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
 # Read $VAR from Makefile (only first match)
 set +e
 CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
 set -e
 sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
 if [ -z "$CURRENT_COMMIT" ]; then
    echo "Could not find $VAR in Makefile."
    exit 0
 fi
 echo "Changes: https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
 echo "${LAST_COMMIT}" >> "${VAR}_commit.txt"
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -67,10 +67,6 @@ updates:
    directory: "/backend/python/parler-tts"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/petals"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/rerankers"
    schedule:
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@ -40,17 +40,30 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Bump dependencies 🔧
        id: bump
        run: |
          bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
          {
            echo 'message<<EOF'
            cat "${{ matrix.variable }}_message.txt"
            echo EOF
          } >> "$GITHUB_OUTPUT"
          {
            echo 'commit<<EOF'
            cat "${{ matrix.variable }}_commit.txt"
            echo EOF
          } >> "$GITHUB_OUTPUT"
          rm -rfv ${{ matrix.variable }}_message.txt
          rm -rfv ${{ matrix.variable }}_commit.txt
      - name: Create Pull Request
        uses: peter-evans/create-pull-request@v6
        with:
          token: ${{ secrets.UPDATE_BOT_TOKEN }}
          push-to-fork: ci-forks/LocalAI
          commit-message: ':arrow_up: Update ${{ matrix.repository }}'
-          title: 'chore: :arrow_up: Update ${{ matrix.repository }}'
+          title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`'
          branch: "update/${{ matrix.variable }}"
-          body: Bump of ${{ matrix.repository }} version
+          body: ${{ steps.bump.outputs.message }}
          signoff: true
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@ -0,0 +1,64 @@
 name: Explorer deployment
 on:
  push:
    branches:
      - master
    tags:
      - 'v*'
 concurrency:
  group: ci-deploy-${{ github.head_ref || github.ref }}-${{ github.repository }}
 jobs:
  build-linux:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
        uses: actions/checkout@v4
        with:
          submodules: true
      - uses: actions/setup-go@v5
        with:
          go-version: '1.21.x'
          cache: false
      - name: Dependencies
        run: |
          sudo apt-get update
          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
          make protogen-go
      - name: Build api
        run: |
          CGO_ENABLED=0 make build-api
      - name: rm
        uses: appleboy/ssh-action@v1.0.3
        with:
            host: ${{ secrets.EXPLORER_SSH_HOST }}
            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
            key: ${{ secrets.EXPLORER_SSH_KEY }}
            port: ${{ secrets.EXPLORER_SSH_PORT }}
            script: |
                sudo rm -rf local-ai/ || true
      - name: copy file via ssh
        uses: appleboy/scp-action@v0.1.7
        with:
            host: ${{ secrets.EXPLORER_SSH_HOST }}
            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
            key: ${{ secrets.EXPLORER_SSH_KEY }}
            port: ${{ secrets.EXPLORER_SSH_PORT }}
            source: "local-ai"
            overwrite: true
            rm: true
            target: ./local-ai
      - name: restarting
        uses: appleboy/ssh-action@v1.0.3
        with:
            host: ${{ secrets.EXPLORER_SSH_HOST }}
            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
            key: ${{ secrets.EXPLORER_SSH_KEY }}
            port: ${{ secrets.EXPLORER_SSH_PORT }}
            script: |
                sudo cp -rfv local-ai/local-ai /usr/bin/local-ai
                sudo systemctl restart local-ai
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@ -168,32 +168,6 @@ jobs:
           make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
           make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
  # tests-petals:
  #   runs-on: ubuntu-latest
  #   steps:
  #     - name: Clone
  #       uses: actions/checkout@v4
  #       with:
  #         submodules: true
  #     - name: Dependencies
  #       run: |
  #         sudo apt-get update
  #         sudo apt-get install build-essential ffmpeg
  #         # Install UV
  #         curl -LsSf https://astral.sh/uv/install.sh | sh
  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
  #         sudo apt-get install -y libopencv-dev
  #         pip install --user --no-cache-dir grpcio-tools==1.64.1
  #     - name: Test petals
  #       run: |
  #          make --jobs=5 --output-sync=target -C backend/python/petals
  #          make --jobs=5 --output-sync=target -C backend/python/petals test
  # tests-bark:
  #   runs-on: ubuntu-latest
  #   steps:
--- a/.gitignore
+++ b/.gitignore
@ -54,3 +54,6 @@ docs/static/gallery.html
 # backend virtual environments
 **/venv
 # per-developer customization files for the development container
 .devcontainer/customization/*
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -3,12 +3,12 @@
    "configurations": [
        {
            "name": "Python: Current File",
-            "type": "python",
+            "type": "debugpy",
            "request": "launch",
            "program": "${file}",
            "console": "integratedTerminal",
            "justMyCode": false,
-            "cwd": "${workspaceFolder}/examples/langchain-chroma",
+            "cwd": "${fileDirname}",
            "env": {
                "OPENAI_API_BASE": "http://localhost:8080/v1",
                "OPENAI_API_KEY": "abc"
@ -19,15 +19,16 @@
            "type": "go",
            "request": "launch",
            "mode": "debug",
-            "program": "${workspaceFolder}/main.go",
+            "program": "${workspaceRoot}",
-            "args": [
+            "args": [],
                "api"
            ],
            "env": {
-                "C_INCLUDE_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
+                "LOCALAI_LOG_LEVEL": "debug",
-                "LIBRARY_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
+                "LOCALAI_P2P": "true",
-                "DEBUG": "true"
+                "LOCALAI_FEDERATED": "true"
-            }
+            },
            "buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
            "envFile": "${workspaceFolder}/.env",
            "cwd": "${workspaceRoot}"
        }
    ]
 }
--- a/94
+++ b/94
@ -8,12 +8,12 @@ FROM ${BASE_IMAGE} AS requirements-core
 USER root
-ARG GO_VERSION=1.22.5
+ARG GO_VERSION=1.22.6
 ARG TARGETARCH
 ARG TARGETVARIANT
 ENV DEBIAN_FRONTEND=noninteractive
-ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
+ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
 RUN apt-get update && \
@ -30,7 +30,7 @@ RUN apt-get update && \
 # Install Go
 RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
-ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
+ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
 # Install grpc compilers
 RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
@ -39,15 +39,18 @@ RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
 COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
 RUN update-ca-certificates
 RUN test -n "$TARGETARCH" \
    || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
 # Use the variables in subsequent instructions
 RUN echo "Target Architecture: $TARGETARCH"
 RUN echo "Target Variant: $TARGETVARIANT"
 # Cuda
-ENV PATH /usr/local/cuda/bin:${PATH}
+ENV PATH=/usr/local/cuda/bin:${PATH}
 # HipBLAS requirements
-ENV PATH /opt/rocm/bin:${PATH}
+ENV PATH=/opt/rocm/bin:${PATH}
 # OpenBLAS requirements and stable diffusion
 RUN apt-get update && \
@ -62,9 +65,6 @@ RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
 WORKDIR /build
 RUN test -n "$TARGETARCH" \
    || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
 ###################################
 ###################################
@ -81,7 +81,7 @@ RUN apt-get update && \
        espeak \
        python3-pip \
        python-is-python3 \
-        python3-dev \
+        python3-dev llvm \
        python3-venv && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/* && \
@ -217,13 +217,14 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
 ###################################
 ###################################
-# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
+# The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer
-# Adjustments to the build process should likely be made here.
+
-FROM requirements-drivers AS builder
+FROM requirements-drivers AS builder-base
 ARG GO_TAGS="stablediffusion tts p2p"
 ARG GRPC_BACKENDS
 ARG MAKEFLAGS
 ARG LD_FLAGS="-s -w"
 ENV GRPC_BACKENDS=${GRPC_BACKENDS}
 ENV GO_TAGS=${GO_TAGS}
@ -231,14 +232,12 @@ ENV MAKEFLAGS=${MAKEFLAGS}
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
 ENV NVIDIA_VISIBLE_DEVICES=all
 ENV LD_FLAGS=${LD_FLAGS}
 RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH"
 WORKDIR /build
 COPY . .
 COPY .git .
 RUN echo "GO_TAGS: $GO_TAGS"
 RUN make prepare
 # We need protoc installed, and the version in 22.04 is too old.  We will create one as part installing the GRPC build below
 # but that will also being in a newer version of absl which stablediffusion cannot compile with.  This version of protoc is only
@ -256,9 +255,30 @@ RUN <<EOT bash
    fi
 EOT
 ###################################
 ###################################
 # This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
 # In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
 FROM builder-base AS builder-sd
 COPY . .
 COPY .git .
 RUN make prepare
 # stablediffusion does not tolerate a newer version of abseil, build it first
 RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
 ###################################
 ###################################
 # The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
 # Adjustments to the build process should likely be made here.
 FROM builder-sd AS builder
 # Install the pre-built GRPC
 COPY --from=grpc /opt/grpc /usr/local
@ -276,6 +296,41 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
 ###################################
 ###################################
 # The devcontainer target is not used on CI. It is a target for developers to use locally -
 # rather than copying files it mounts them locally and leaves building to the developer
 FROM builder-base AS devcontainer
 ARG FFMPEG
 COPY --from=grpc /opt/grpc /usr/local
 COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
 COPY .devcontainer-scripts /.devcontainer-scripts
 # Add FFmpeg
 RUN if [ "${FFMPEG}" = "true" ]; then \
        apt-get update && \
        apt-get install -y --no-install-recommends \
            ffmpeg && \
        apt-get clean && \
        rm -rf /var/lib/apt/lists/* \
    ; fi
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        ssh less && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*
 RUN go install github.com/go-delve/delve/cmd/dlv@latest
 RUN go install github.com/mikefarah/yq/v4@latest
 ###################################
 ###################################
 # This is the final target. The result of this target will be the image uploaded to the registry.
 # If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
 FROM requirements-drivers
@ -326,7 +381,7 @@ COPY --from=builder /build/local-ai ./
 COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
 # do not let stablediffusion rebuild (requires an older version of absl)
-COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
+COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
 # Change the shell to bash so we can use [[ tests below
 SHELL ["/bin/bash", "-c"]
@ -356,9 +411,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$I
    if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
        make -C backend/python/openvoice \
    ; fi && \
    if [[ ( "${EXTRA_BACKENDS}" =~ "petals" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
        make -C backend/python/petals \
    ; fi && \
    if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
        make -C backend/python/sentencetransformers \
    ; fi && \
--- a/69
+++ b/69
@ -8,11 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=01245f5b1629075543bc4478418c7d72a0b4b3c7
+CPPLLAMA_VERSION?=2f3c1466ff46a2413b0e363a5005c46538186ee6
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
@ -20,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=f68298ce06ca3edd6e6f3f21c3d0bb5f073942c3
+WHISPER_CPP_VERSION?=d65786ea540a5aef21f67cacfa6f134097727780
 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@ -190,7 +186,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
 ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
 ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
 ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
 ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
 ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
@ -253,18 +248,6 @@ sources/go-piper:
 sources/go-piper/libpiper_binding.a: sources/go-piper
 	$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
 ## GPT4ALL
 sources/gpt4all:
 	mkdir -p sources/gpt4all
 	cd sources/gpt4all && \
 	git init && \
 	git remote add origin $(GPT4ALL_REPO) && \
 	git fetch origin && \
 	git checkout $(GPT4ALL_VERSION) && \
 	git submodule update --init --recursive --depth 1 --single-branch
 sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
 	$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
 ## RWKV
 sources/go-rwkv.cpp:
@ -318,7 +301,7 @@ sources/whisper.cpp:
 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
 	cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
-get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
+get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
 replace:
 	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
@ -328,7 +311,6 @@ replace:
 	$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
 	$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
 	$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
 	$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
 dropreplace:
@ -339,7 +321,6 @@ dropreplace:
 	$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
 	$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
 	$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
 	$(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang
 	$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
 prepare-sources: get-sources replace
@ -349,7 +330,6 @@ prepare-sources: get-sources replace
 rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
 	$(MAKE) -C sources/go-llama.cpp clean
 	$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
 	$(MAKE) -C sources/go-rwkv.cpp clean
 	$(MAKE) -C sources/whisper.cpp clean
 	$(MAKE) -C sources/go-stable-diffusion clean
@ -396,7 +376,7 @@ build-minimal:
 	BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
 build-api:
-	BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
+	BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=p2p $(MAKE) build
 backend-assets/lib:
 	mkdir -p backend-assets/lib
@ -407,7 +387,7 @@ ifeq ($(DETECT_LIBS),true)
 	scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
 endif
 ifeq ($(OS),Darwin)
-	$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
+	BUILD_TYPE=none $(MAKE) backend-assets/grpc/llama-cpp-fallback
 else
 	$(MAKE) backend-assets/grpc/llama-cpp-cuda
 	$(MAKE) backend-assets/grpc/llama-cpp-hipblas
@ -469,8 +449,7 @@ test: prepare test-models/testmodel.ggml grpcs
 	export GO_TAGS="tts stablediffusion debug"
 	$(MAKE) prepare-test
 	HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf"  --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf"  --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
 	$(MAKE) test-gpt4all
 	$(MAKE) test-llama
 	$(MAKE) test-llama-gguf
 	$(MAKE) test-tts
@ -500,10 +479,6 @@ teardown-e2e:
 	rm -rf $(TEST_DIR) || true
 	docker stop $$(docker ps -q --filter ancestor=localai-tests)
 test-gpt4all: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS)
 test-llama: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
@ -559,10 +534,10 @@ protogen-go-clean:
 	$(RM) bin/*
 .PHONY: protogen-python
-protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
+protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
 .PHONY: protogen-python-clean
-protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
+protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
 .PHONY: autogptq-protogen
 autogptq-protogen:
@ -620,14 +595,6 @@ mamba-protogen:
 mamba-protogen-clean:
 	$(MAKE) -C backend/python/mamba protogen-clean
 .PHONY: petals-protogen
 petals-protogen:
 	$(MAKE) -C backend/python/petals protogen
 .PHONY: petals-protogen-clean
 petals-protogen-clean:
 	$(MAKE) -C backend/python/petals protogen-clean
 .PHONY: rerankers-protogen
 rerankers-protogen:
 	$(MAKE) -C backend/python/rerankers protogen
@ -709,7 +676,6 @@ prepare-extra-conda-environments: protogen-python
 	$(MAKE) -C backend/python/vall-e-x
 	$(MAKE) -C backend/python/openvoice
 	$(MAKE) -C backend/python/exllama
 	$(MAKE) -C backend/python/petals
 	$(MAKE) -C backend/python/exllama2
 prepare-test-extra: protogen-python
@ -730,12 +696,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
 	mkdir -p backend-assets/espeak-ng-data
 	@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
 backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
 	mkdir -p backend-assets/gpt4all
 	@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
 	@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
 	@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
 backend-assets/grpc: protogen-go replace
 	mkdir -p backend-assets/grpc
@ -746,13 +706,6 @@ ifneq ($(UPX),)
 	$(UPX) backend-assets/grpc/bert-embeddings
 endif
 backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
 ifneq ($(UPX),)
 	$(UPX) backend-assets/grpc/gpt4all
 endif
 backend-assets/grpc/huggingface: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
 ifneq ($(UPX),)
@ -783,9 +736,6 @@ else
 	echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
 	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
 endif
 ifneq ($(UPX),)
 	$(UPX) backend/cpp/${VARIANT}/grpc-server
 endif
 # This target is for manually building a variant with-auto detected flags
 backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
@ -858,9 +808,6 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.
 backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
 	mkdir -p backend-assets/util/
 	cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
 ifneq ($(UPX),)
 	$(UPX) backend-assets/util/llama-cpp-rpc-server
 endif
 backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
--- a/README.md
+++ b/README.md
@ -84,6 +84,7 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
 Hot topics (looking for contributors):
 - 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
 - WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
 - Backends v2: https://github.com/mudler/LocalAI/issues/1126
 - Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
@ -150,6 +151,7 @@ Other:
 ## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
 - [Run Visual studio code with LocalAI (SUSE)](https://www.suse.com/c/running-ai-locally/)
 - 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
 - [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
 - [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@ -458,7 +458,9 @@ struct llama_server_context
            }
        }
-        std::tie(model, ctx) = llama_init_from_gpt_params(params);
+        llama_init_result llama_init = llama_init_from_gpt_params(params);
        model = llama_init.model;
        ctx = llama_init.context;
        if (model == nullptr)
        {
            LOG_ERROR("unable to load model", {{"model", params.model}});
@ -478,7 +480,7 @@ struct llama_server_context
        n_ctx = llama_n_ctx(ctx);
-        add_bos_token = llama_should_add_bos_token(model);
+        add_bos_token = llama_add_bos_token(model);
        return true;
    }
@ -2258,7 +2260,7 @@ static void params_parse(const backend::ModelOptions* request,
     }
     // get the directory of modelfile
     std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
-     params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
+     params.lora_adapters.push_back({ model_dir + "/"+request->loraadapter(), scale_factor });
    }
    params.use_mlock = request->mlock();
    params.use_mmap = request->mmap();
--- a/backend/go/llm/gpt4all/gpt4all.go
+++ b/backend/go/llm/gpt4all/gpt4all.go
@ -1,62 +0,0 @@
 package main
 // This is a wrapper to statisfy the GRPC service interface
 // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
 import (
 	"fmt"
 	"github.com/mudler/LocalAI/pkg/grpc/base"
 	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 	gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
 )
 type LLM struct {
 	base.SingleThread
 	gpt4all *gpt4all.Model
 }
 func (llm *LLM) Load(opts *pb.ModelOptions) error {
 	model, err := gpt4all.New(opts.ModelFile,
 		gpt4all.SetThreads(int(opts.Threads)),
 		gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath))
 	llm.gpt4all = model
 	return err
 }
 func buildPredictOptions(opts *pb.PredictOptions) []gpt4all.PredictOption {
 	predictOptions := []gpt4all.PredictOption{
 		gpt4all.SetTemperature(float64(opts.Temperature)),
 		gpt4all.SetTopP(float64(opts.TopP)),
 		gpt4all.SetTopK(int(opts.TopK)),
 		gpt4all.SetTokens(int(opts.Tokens)),
 	}
 	if opts.Batch != 0 {
 		predictOptions = append(predictOptions, gpt4all.SetBatch(int(opts.Batch)))
 	}
 	return predictOptions
 }
 func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
 	return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...)
 }
 func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
 	predictOptions := buildPredictOptions(opts)
 	go func() {
 		llm.gpt4all.SetTokenCallback(func(token string) bool {
 			results <- token
 			return true
 		})
 		_, err := llm.gpt4all.Predict(opts.Prompt, predictOptions...)
 		if err != nil {
 			fmt.Println("err: ", err)
 		}
 		llm.gpt4all.SetTokenCallback(nil)
 		close(results)
 	}()
 	return nil
 }
--- a/backend/go/llm/gpt4all/main.go
+++ b/backend/go/llm/gpt4all/main.go
@ -1,21 +0,0 @@
 package main
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &LLM{}); err != nil {
 		panic(err)
 	}
 }
--- a/backend/python/autogptq/requirements-cublas11.txt
+++ b/backend/python/autogptq/requirements-cublas11.txt
@ -0,0 +1,2 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch
--- a/backend/python/autogptq/requirements-cublas12.txt
+++ b/backend/python/autogptq/requirements-cublas12.txt
@ -0,0 +1 @@
 torch
--- a/backend/python/autogptq/requirements-intel.txt
+++ b/backend/python/autogptq/requirements-intel.txt
@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@ -1,7 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 torch
 certifi
 transformers
--- a/backend/python/bark/requirements-cpu.txt
+++ b/backend/python/bark/requirements-cpu.txt
@ -0,0 +1,4 @@
 transformers
 accelerate
 torch
 torchaudio
--- a/backend/python/bark/requirements-cublas11.txt
+++ b/backend/python/bark/requirements-cublas11.txt
@ -0,0 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch
 torchaudio
 transformers
 accelerate
--- a/backend/python/bark/requirements-cublas12.txt
+++ b/backend/python/bark/requirements-cublas12.txt
@ -0,0 +1,4 @@
 torch
 torchaudio
 transformers
 accelerate
--- a/backend/python/bark/requirements-hipblas.txt
+++ b/backend/python/bark/requirements-hipblas.txt
@ -1,3 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch
 torchaudio
 transformers
 accelerate
--- a/backend/python/bark/requirements-intel.txt
+++ b/backend/python/bark/requirements-intel.txt
@ -4,3 +4,5 @@ torch
 torchaudio
 optimum[openvino]
 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
 transformers
 accelerate
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@ -1,6 +1,4 @@
 accelerate
 bark==0.1.5
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
 certifi
 transformers
--- a/backend/python/common/libbackend.sh
+++ b/backend/python/common/libbackend.sh
@ -18,10 +18,23 @@
 # source $(dirname $0)/../common/libbackend.sh
 #
 function init() {
    # Name of the backend (directory name)
    BACKEND_NAME=${PWD##*/}
    # Path where all backends files are
    MY_DIR=$(realpath `dirname $0`)
    # Build type
    BUILD_PROFILE=$(getBuildProfile)
    # Environment directory
    EDIR=${MY_DIR}
    # Allow to specify a custom env dir for shared environments
    if [ "x${ENV_DIR}" != "x" ]; then
        EDIR=${ENV_DIR}
    fi
    # If a backend has defined a list of valid build profiles...
    if [ ! -z "${LIMIT_TARGETS}" ]; then
        isValidTarget=$(checkTargets ${LIMIT_TARGETS})
@ -74,13 +87,14 @@ function getBuildProfile() {
 # This function is idempotent, so you can call it as many times as you want and it will
 # always result in an activated virtual environment
 function ensureVenv() {
-    if [ ! -d "${MY_DIR}/venv" ]; then
+    if [ ! -d "${EDIR}/venv" ]; then
-        uv venv ${MY_DIR}/venv
+        uv venv ${EDIR}/venv
        echo "virtualenv created"
    fi
-    if [ "x${VIRTUAL_ENV}" != "x${MY_DIR}/venv" ]; then
+    # Source if we are not already in a Virtual env
-        source ${MY_DIR}/venv/bin/activate
+    if [ "x${VIRTUAL_ENV}" != "x${EDIR}/venv" ]; then
        source ${EDIR}/venv/bin/activate
        echo "virtualenv activated"
    fi
@ -113,13 +127,24 @@ function installRequirements() {
    # These are the requirements files we will attempt to install, in order
    declare -a requirementFiles=(
-        "${MY_DIR}/requirements-install.txt"
+        "${EDIR}/requirements-install.txt"
-        "${MY_DIR}/requirements.txt"
+        "${EDIR}/requirements.txt"
-        "${MY_DIR}/requirements-${BUILD_TYPE}.txt"
+        "${EDIR}/requirements-${BUILD_TYPE}.txt"
    )
    if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
-        requirementFiles+=("${MY_DIR}/requirements-${BUILD_PROFILE}.txt")
+        requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}.txt")
    fi
    # if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
    if [ "x${BUILD_TYPE}" == "x" ]; then
        requirementFiles+=("${EDIR}/requirements-cpu.txt")
    fi
    requirementFiles+=("${EDIR}/requirements-after.txt")
    if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
        requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt")
    fi
    for reqFile in ${requirementFiles[@]}; do
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@ -1,2 +1,2 @@
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
--- a/backend/python/coqui/requirements-cpu.txt
+++ b/backend/python/coqui/requirements-cpu.txt
@ -0,0 +1,3 @@
 transformers
 accelerate
 torch
--- a/backend/python/coqui/requirements-cublas11.txt
+++ b/backend/python/coqui/requirements-cublas11.txt
@ -0,0 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch
 torchaudio
 transformers
 accelerate
--- a/backend/python/coqui/requirements-cublas12.txt
+++ b/backend/python/coqui/requirements-cublas12.txt
@ -0,0 +1,4 @@
 torch
 torchaudio
 transformers
 accelerate
--- a/backend/python/coqui/requirements-hipblas.txt
+++ b/backend/python/coqui/requirements-hipblas.txt
@ -1,3 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch
 torchaudio
 transformers
 accelerate
--- a/backend/python/coqui/requirements-intel.txt
+++ b/backend/python/coqui/requirements-intel.txt
@ -3,4 +3,6 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
 transformers
 accelerate
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@ -1,6 +1,4 @@
 accelerate
 TTS==0.22.0
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
 certifi
 transformers
--- a/backend/python/diffusers/backend.py
+++ b/backend/python/diffusers/backend.py
@ -18,13 +18,13 @@ import backend_pb2_grpc
 import grpc
 from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
-    EulerAncestralDiscreteScheduler
+    EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
 from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
 from diffusers.pipelines.stable_diffusion import safety_checker
 from diffusers.utils import load_image, export_to_video
 from compel import Compel, ReturnedEmbeddingsType
-
+from optimum.quanto import freeze, qfloat8, quantize
-from transformers import CLIPTextModel
+from transformers import CLIPTextModel, T5EncoderModel
 from safetensors.torch import load_file
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
@ -163,6 +163,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            modelFile = request.Model
            self.cfg_scale = 7
            self.PipelineType = request.PipelineType
            if request.CFGScale != 0:
                self.cfg_scale = request.CFGScale
@ -244,6 +246,30 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                        torch_dtype=torchType,
                        use_safetensors=True,
                        variant=variant)
            elif request.PipelineType == "FluxPipeline":
                    self.pipe = FluxPipeline.from_pretrained(
                        request.Model,
                        torch_dtype=torch.bfloat16)
                    if request.LowVRAM:
                        self.pipe.enable_model_cpu_offload()
            elif request.PipelineType == "FluxTransformer2DModel":
                    dtype = torch.bfloat16
                    # specify from environment or default to "ChuckMcSneed/FLUX.1-dev"
                    bfl_repo = os.environ.get("BFL_REPO", "ChuckMcSneed/FLUX.1-dev")
                    transformer = FluxTransformer2DModel.from_single_file(modelFile, torch_dtype=dtype)
                    quantize(transformer, weights=qfloat8)
                    freeze(transformer)
                    text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype)
                    quantize(text_encoder_2, weights=qfloat8)
                    freeze(text_encoder_2)
                    self.pipe = FluxPipeline.from_pretrained(bfl_repo, transformer=None, text_encoder_2=None, torch_dtype=dtype)
                    self.pipe.transformer = transformer
                    self.pipe.text_encoder_2 = text_encoder_2
                    if request.LowVRAM:
                        self.pipe.enable_model_cpu_offload()
            if CLIPSKIP and request.CLIPSkip != 0:
                self.clip_skip = request.CLIPSkip
@ -399,6 +425,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                request.seed
            )
        if self.PipelineType == "FluxPipeline":
            kwargs["max_sequence_length"] = 256
        if self.PipelineType == "FluxTransformer2DModel":
            kwargs["output_type"] = "pil"
            kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
        if self.img2vid:
            # Load the conditioning image
            image = load_image(request.src)
--- a/backend/python/diffusers/requirements-cpu.txt
+++ b/backend/python/diffusers/requirements-cpu.txt
@ -0,0 +1,9 @@
 diffusers
 opencv-python
 transformers
 accelerate
 compel
 peft
 sentencepiece
 torch
 optimum-quanto
--- a/backend/python/diffusers/requirements-cublas11.txt
+++ b/backend/python/diffusers/requirements-cublas11.txt
@ -0,0 +1,10 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch
 diffusers
 opencv-python
 transformers
 accelerate
 compel
 peft
 sentencepiece
 optimum-quanto
--- a/backend/python/diffusers/requirements-cublas12.txt
+++ b/backend/python/diffusers/requirements-cublas12.txt
@ -0,0 +1,9 @@
 torch
 diffusers
 opencv-python
 transformers
 accelerate
 compel
 peft
 sentencepiece
 optimum-quanto
--- a/backend/python/diffusers/requirements-hipblas.txt
+++ b/backend/python/diffusers/requirements-hipblas.txt
@ -1,3 +1,11 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
+torch==2.3.1+rocm6.0
-torchvision
+torchvision==0.18.1+rocm6.0
 diffusers
 opencv-python
 transformers
 accelerate
 compel
 peft
 sentencepiece
 optimum-quanto
--- a/backend/python/diffusers/requirements-intel.txt
+++ b/backend/python/diffusers/requirements-intel.txt
@ -4,3 +4,11 @@ torch
 torchvision
 optimum[openvino]
 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
 diffusers
 opencv-python
 transformers
 accelerate
 compel
 peft
 sentencepiece
 optimum-quanto
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@ -1,13 +1,5 @@
 setuptools
-accelerate
+grpcio==1.65.4
 compel
 peft
 diffusers
 grpcio==1.65.1
 opencv-python
 pillow
 protobuf
 sentencepiece
 torch
 transformers
 certifi
--- a/backend/python/exllama/requirements-cpu.txt
+++ b/backend/python/exllama/requirements-cpu.txt
@ -0,0 +1,3 @@
 transformers
 accelerate
 torch
--- a/backend/python/exllama/requirements-cublas11.txt
+++ b/backend/python/exllama/requirements-cublas11.txt
@ -0,0 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch
 transformers
 accelerate
--- a/backend/python/exllama/requirements-cublas12.txt
+++ b/backend/python/exllama/requirements-cublas12.txt
@ -0,0 +1,3 @@
 torch
 transformers
 accelerate
--- a/backend/python/exllama/requirements.txt
+++ b/backend/python/exllama/requirements.txt
@ -1,6 +1,4 @@
-grpcio==1.65.0
+grpcio==1.65.5
 protobuf
 torch
 transformers
 certifi
 setuptools
--- a/backend/python/exllama2/requirements-cpu.txt
+++ b/backend/python/exllama2/requirements-cpu.txt
@ -0,0 +1,3 @@
 transformers
 accelerate
 torch
--- a/backend/python/exllama2/requirements-cublas11.txt
+++ b/backend/python/exllama2/requirements-cublas11.txt
@ -0,0 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch
 transformers
 accelerate
--- a/backend/python/exllama2/requirements-cublas12.txt
+++ b/backend/python/exllama2/requirements-cublas12.txt
@ -0,0 +1,3 @@
 torch
 transformers
 accelerate
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@ -1,7 +1,5 @@
-accelerate
+grpcio==1.65.4
 grpcio==1.65.1
 protobuf
 certifi
 torch
 wheel
 setuptools
--- a/backend/python/mamba/requirements-after.txt
+++ b/backend/python/mamba/requirements-after.txt
@ -0,0 +1,2 @@
 causal-conv1d==1.4.0
 mamba-ssm==2.2.2
--- a/backend/python/mamba/requirements-cpu.txt
+++ b/backend/python/mamba/requirements-cpu.txt
@ -0,0 +1,2 @@
 torch
 transformers
--- a/backend/python/mamba/requirements-cublas11.txt
+++ b/backend/python/mamba/requirements-cublas11.txt
@ -0,0 +1,3 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch
 transformers
--- a/backend/python/mamba/requirements-cublas12.txt
+++ b/backend/python/mamba/requirements-cublas12.txt
@ -0,0 +1,2 @@
 torch
 transformers
--- a/backend/python/mamba/requirements-install.txt
+++ b/backend/python/mamba/requirements-install.txt
@ -4,4 +4,3 @@
 packaging
 setuptools
 wheel
 torch==2.3.1
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@ -1,6 +1,3 @@
-causal-conv1d==1.4.0
+grpcio==1.65.5
 mamba-ssm==2.2.2
 grpcio==1.65.1
 protobuf
 certifi
 transformers
--- a/backend/python/openvoice/requirements-cpu.txt
+++ b/backend/python/openvoice/requirements-cpu.txt
@ -0,0 +1 @@
 torch
--- a/backend/python/openvoice/requirements-cublas11.txt
+++ b/backend/python/openvoice/requirements-cublas11.txt
@ -0,0 +1,2 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch
--- a/backend/python/openvoice/requirements-cublas12.txt
+++ b/backend/python/openvoice/requirements-cublas12.txt
@ -0,0 +1 @@
 torch
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@ -2,7 +2,7 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
 librosa==0.9.1
 faster-whisper==1.0.3
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@ -1,4 +1,4 @@
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
 librosa
 faster-whisper
--- a/backend/python/openvoice/test.sh
+++ b/backend/python/openvoice/test.sh
@ -5,7 +5,7 @@ source $(dirname $0)/../common/libbackend.sh
 # Download checkpoints if not present
 if [ ! -d "checkpoints_v2" ]; then
-    wget https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
+    wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
    unzip checkpoints_v2.zip
 fi
--- a/backend/python/parler-tts/requirements-after.txt
+++ b/backend/python/parler-tts/requirements-after.txt
@ -0,0 +1 @@
 git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
--- a/backend/python/parler-tts/requirements-cpu.txt
+++ b/backend/python/parler-tts/requirements-cpu.txt
@ -0,0 +1,3 @@
 transformers
 accelerate
 torch
--- a/backend/python/parler-tts/requirements-cublas11.txt
+++ b/backend/python/parler-tts/requirements-cublas11.txt
@ -0,0 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch
 torchaudio
 transformers
 accelerate
--- a/backend/python/parler-tts/requirements-cublas12.txt
+++ b/backend/python/parler-tts/requirements-cublas12.txt
@ -0,0 +1,4 @@
 torch
 torchaudio
 transformers
 accelerate
--- a/backend/python/parler-tts/requirements-hipblas.txt
+++ b/backend/python/parler-tts/requirements-hipblas.txt
@ -1,3 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch
 torchaudio
 transformers
 accelerate
--- a/backend/python/parler-tts/requirements-intel.txt
+++ b/backend/python/parler-tts/requirements-intel.txt
@ -3,4 +3,6 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
 transformers
 accelerate
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@ -1,7 +1,4 @@
-accelerate
+grpcio==1.65.5
 grpcio==1.65.1
 protobuf
 torch
 git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
 certifi
-transformers
+llvmlite==0.43.0
--- a/backend/python/petals/Makefile
+++ b/backend/python/petals/Makefile
@ -1,31 +0,0 @@
 .PHONY: petals
 petals: protogen
 	@echo "Creating virtual environment..."
 	bash install.sh "petals.yml"
 	@echo "Virtual environment created."
 .PHONY: run
 run: protogen
 	@echo "Running petals..."
 	bash run.sh
 	@echo "petals run."
 .PHONY: test
 test: protogen
 	@echo "Testing petals..."
 	bash test.sh
 	@echo "petals tested."
 .PHONY: protogen
 protogen: backend_pb2_grpc.py backend_pb2.py
 .PHONY: protogen-clean
 protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py
 backend_pb2_grpc.py backend_pb2.py:
 	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
 .PHONY: clean
 clean: protogen-clean
 	rm -rf venv __pycache__
--- a/backend/python/petals/backend.py
+++ b/backend/python/petals/backend.py
@ -1,140 +0,0 @@
 #!/usr/bin/env python3
 from concurrent import futures
 import time
 import argparse
 import signal
 import sys
 import os
 import backend_pb2
 import backend_pb2_grpc
 import grpc
 import torch
 from transformers import AutoTokenizer
 from petals import AutoDistributedModelForCausalLM
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
 # If MAX_WORKERS are specified in the environment use it, otherwise default to 1
 MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
 # Implement the BackendServicer class with the service methods
 class BackendServicer(backend_pb2_grpc.BackendServicer):
    """
    A gRPC servicer that implements the Backend service defined in backend.proto.
    """
    def Health(self, request, context):
        """
        Returns a health check message.
        Args:
            request: The health check request.
            context: The gRPC context.
        Returns:
            backend_pb2.Reply: The health check reply.
        """
        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
    def LoadModel(self, request, context):
        """
        Loads a language model.
        Args:
            request: The load model request.
            context: The gRPC context.
        Returns:
            backend_pb2.Result: The load model result.
        """
        try:
            self.tokenizer = AutoTokenizer.from_pretrained(request.Model, use_fast=False, add_bos_token=False)
            self.model = AutoDistributedModelForCausalLM.from_pretrained(request.Model)
            self.cuda = False
            if request.CUDA:
                self.model = self.model.cuda()
                self.cuda = True
        except Exception as err:
            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
        return backend_pb2.Result(message="Model loaded successfully", success=True)
    def Predict(self, request, context):
        """
        Generates text based on the given prompt and sampling parameters.
        Args:
            request: The predict request.
            context: The gRPC context.
        Returns:
            backend_pb2.Result: The predict result.
        """
        inputs = self.tokenizer(request.Prompt, return_tensors="pt")["input_ids"]
        if self.cuda:
            inputs = inputs.cuda()
        if request.Tokens == 0:
            # Max to max value if tokens are not specified
            request.Tokens = 8192
        # TODO: kwargs and map all parameters
        outputs = self.model.generate(inputs, max_new_tokens=request.Tokens)
        generated_text = self.tokenizer.decode(outputs[0])
        # Remove prompt from response if present
        if request.Prompt in generated_text:
            generated_text = generated_text.replace(request.Prompt, "")
        return backend_pb2.Result(message=bytes(generated_text, encoding='utf-8'))
    def PredictStream(self, request, context):
        """
        Generates text based on the given prompt and sampling parameters, and streams the results.
        Args:
            request: The predict stream request.
            context: The gRPC context.
        Returns:
            backend_pb2.Result: The predict stream result.
        """
        # Implement PredictStream RPC
        #for reply in some_data_generator():
        #    yield reply
        # Not implemented yet
        return self.Predict(request, context)
 def serve(address):
    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
    server.add_insecure_port(address)
    server.start()
    print("Server started. Listening on: " + address, file=sys.stderr)
    # Define the signal handler function
    def signal_handler(sig, frame):
        print("Received termination signal. Shutting down...")
        server.stop(0)
        sys.exit(0)
    # Set the signal handlers for SIGINT and SIGTERM
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGTERM, signal_handler)
    try:
        while True:
            time.sleep(_ONE_DAY_IN_SECONDS)
    except KeyboardInterrupt:
        server.stop(0)
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Run the gRPC server.")
    parser.add_argument(
        "--addr", default="localhost:50051", help="The address to bind the server to."
    )
    args = parser.parse_args()
    serve(args.addr)
--- a/backend/python/petals/install.sh
+++ b/backend/python/petals/install.sh
@ -1,14 +0,0 @@
 #!/bin/bash
 set -e
 source $(dirname $0)/../common/libbackend.sh
 # This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
 # This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
 # We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
 # the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
 if [ "x${BUILD_PROFILE}" == "xintel" ]; then
    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
 fi
 installRequirements
--- a/backend/python/petals/requirements-hipblas.txt
+++ b/backend/python/petals/requirements-hipblas.txt
@ -1,2 +0,0 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch
--- a/backend/python/petals/requirements-intel.txt
+++ b/backend/python/petals/requirements-intel.txt
@ -1,5 +0,0 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 intel-extension-for-pytorch
 torch
 optimum[openvino]
 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/petals/requirements.txt
+++ b/backend/python/petals/requirements.txt
@ -1,3 +0,0 @@
 git+https://github.com/bigscience-workshop/petals
 certifi
 transformers
--- a/backend/python/petals/run.sh
+++ b/backend/python/petals/run.sh
@ -1,4 +0,0 @@
 #!/bin/bash
 source $(dirname $0)/../common/libbackend.sh
 startBackend $@
--- a/backend/python/petals/test.py
+++ b/backend/python/petals/test.py
@ -1,58 +0,0 @@
 import unittest
 import subprocess
 import time
 import backend_pb2
 import backend_pb2_grpc
 import grpc
 import unittest
 import subprocess
 import time
 import grpc
 import backend_pb2_grpc
 import backend_pb2
 class TestBackendServicer(unittest.TestCase):
    """
    TestBackendServicer is the class that tests the gRPC service.
    This class contains methods to test the startup and shutdown of the gRPC service.
    """
    def setUp(self):
        self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"])
        time.sleep(10)
    def tearDown(self) -> None:
        self.service.terminate()
        self.service.wait()
    def test_server_startup(self):
        try:
            self.setUp()
            with grpc.insecure_channel("localhost:50051") as channel:
                stub = backend_pb2_grpc.BackendStub(channel)
                response = stub.Health(backend_pb2.HealthMessage())
                self.assertEqual(response.message, b'OK')
        except Exception as err:
            print(err)
            self.fail("Server failed to start")
        finally:
            self.tearDown()
    def test_load_model(self):
        """
        This method tests if the model is loaded successfully
        """
        try:
            self.setUp()
            with grpc.insecure_channel("localhost:50051") as channel:
                stub = backend_pb2_grpc.BackendStub(channel)
                response = stub.LoadModel(backend_pb2.ModelOptions(Model="bigscience/bloom-560m"))
                print(response)
                self.assertTrue(response.success)
                self.assertEqual(response.message, "Model loaded successfully")
        except Exception as err:
            print(err)
            self.fail("LoadModel service failed")
        finally:
            self.tearDown()
--- a/backend/python/petals/test.sh
+++ b/backend/python/petals/test.sh
@ -1,6 +0,0 @@
 #!/bin/bash
 set -e
 source $(dirname $0)/../common/libbackend.sh
 runUnittests
--- a/backend/python/rerankers/requirements-cpu.txt
+++ b/backend/python/rerankers/requirements-cpu.txt
@ -0,0 +1,4 @@
 transformers
 accelerate
 torch
 rerankers[transformers]
--- a/backend/python/rerankers/requirements-cublas11.txt
+++ b/backend/python/rerankers/requirements-cublas11.txt
@ -0,0 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 transformers
 accelerate
 torch
 rerankers[transformers]
--- a/backend/python/rerankers/requirements-cublas12.txt
+++ b/backend/python/rerankers/requirements-cublas12.txt
@ -0,0 +1,4 @@
 transformers
 accelerate
 torch
 rerankers[transformers]
--- a/backend/python/rerankers/requirements-hipblas.txt
+++ b/backend/python/rerankers/requirements-hipblas.txt
@ -1,2 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 transformers
 accelerate
 torch
 rerankers[transformers]
--- a/backend/python/rerankers/requirements-intel.txt
+++ b/backend/python/rerankers/requirements-intel.txt
@ -1,5 +1,8 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 intel-extension-for-pytorch
 transformers
 accelerate
 torch
 rerankers[transformers]
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@ -1,6 +1,3 @@
-accelerate
+grpcio==1.65.4
 rerankers[transformers]
 grpcio==1.65.1
 protobuf
 certifi
 transformers
--- a/backend/python/sentencetransformers/requirements-cpu.txt
+++ b/backend/python/sentencetransformers/requirements-cpu.txt
@ -0,0 +1,6 @@
 torch
 accelerate
 transformers
 bitsandbytes
 sentence-transformers==3.0.1
 transformers
--- a/backend/python/sentencetransformers/requirements-cublas11.txt
+++ b/backend/python/sentencetransformers/requirements-cublas11.txt
@ -0,0 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch
 accelerate
 sentence-transformers==3.0.1
 transformers
--- a/backend/python/sentencetransformers/requirements-cublas12.txt
+++ b/backend/python/sentencetransformers/requirements-cublas12.txt
@ -0,0 +1,4 @@
 torch
 accelerate
 sentence-transformers==3.0.1
 transformers
--- a/backend/python/sentencetransformers/requirements-hipblas.txt
+++ b/backend/python/sentencetransformers/requirements-hipblas.txt
@ -1,2 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch
 accelerate
 sentence-transformers==3.0.1
 transformers
--- a/backend/python/sentencetransformers/requirements-intel.txt
+++ b/backend/python/sentencetransformers/requirements-intel.txt
@ -3,3 +3,6 @@ intel-extension-for-pytorch
 torch
 optimum[openvino]
 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
 accelerate
 sentence-transformers==3.0.1
 transformers
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@ -1,6 +1,3 @@
-accelerate
+grpcio==1.65.5
 sentence-transformers==3.0.1
 transformers
 grpcio==1.65.1
 protobuf
 certifi
--- a/backend/python/transformers-musicgen/requirements-cpu.txt
+++ b/backend/python/transformers-musicgen/requirements-cpu.txt
@ -0,0 +1,3 @@
 transformers
 accelerate
 torch
--- a/backend/python/transformers-musicgen/requirements-cublas11.txt
+++ b/backend/python/transformers-musicgen/requirements-cublas11.txt
@ -0,0 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 transformers
 accelerate
 torch
--- a/backend/python/transformers-musicgen/requirements-cublas12.txt
+++ b/backend/python/transformers-musicgen/requirements-cublas12.txt
@ -0,0 +1,3 @@
 transformers
 accelerate
 torch
--- a/backend/python/transformers-musicgen/requirements-hipblas.txt
+++ b/backend/python/transformers-musicgen/requirements-hipblas.txt
@ -1,2 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 transformers
 accelerate
 torch
--- a/Show more
+++ b/Show more
		`@ -0,0 +1,2 @@`
							`--extra-index-url https://download.pytorch.org/whl/cu118`
							`torch`
`@ -1,2 +1,2 @@`
	`grpcio==1.65.1`	`grpcio==1.65.5`
	`protobuf`	`protobuf`
		`@ -0,0 +1 @@`
							`git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17`
		`@ -1,2 +0,0 @@`
			`--extra-index-url https://download.pytorch.org/whl/rocm6.0`
			`torch`