diff --git a/examples/bruno/LocalAI Test Requests/Sound Generation/musicgen.bru b/.bruno/LocalAI Test Requests/Sound Generation/musicgen.bru similarity index 100% rename from examples/bruno/LocalAI Test Requests/Sound Generation/musicgen.bru rename to .bruno/LocalAI Test Requests/Sound Generation/musicgen.bru diff --git a/examples/bruno/LocalAI Test Requests/backend monitor/backend monitor.bru b/.bruno/LocalAI Test Requests/backend monitor/backend monitor.bru similarity index 100% rename from examples/bruno/LocalAI Test Requests/backend monitor/backend monitor.bru rename to .bruno/LocalAI Test Requests/backend monitor/backend monitor.bru diff --git a/examples/bruno/LocalAI Test Requests/backend monitor/backend-shutdown.bru b/.bruno/LocalAI Test Requests/backend monitor/backend-shutdown.bru similarity index 100% rename from examples/bruno/LocalAI Test Requests/backend monitor/backend-shutdown.bru rename to .bruno/LocalAI Test Requests/backend monitor/backend-shutdown.bru diff --git a/examples/bruno/LocalAI Test Requests/bruno.json b/.bruno/LocalAI Test Requests/bruno.json similarity index 100% rename from examples/bruno/LocalAI Test Requests/bruno.json rename to .bruno/LocalAI Test Requests/bruno.json diff --git a/examples/bruno/LocalAI Test Requests/environments/localhost.bru b/.bruno/LocalAI Test Requests/environments/localhost.bru similarity index 100% rename from examples/bruno/LocalAI Test Requests/environments/localhost.bru rename to .bruno/LocalAI Test Requests/environments/localhost.bru diff --git a/examples/bruno/LocalAI Test Requests/get models list.bru b/.bruno/LocalAI Test Requests/get models list.bru similarity index 100% rename from examples/bruno/LocalAI Test Requests/get models list.bru rename to .bruno/LocalAI Test Requests/get models list.bru diff --git a/examples/bruno/LocalAI Test Requests/image generation/Generate image.bru b/.bruno/LocalAI Test Requests/image generation/Generate image.bru similarity index 100% rename from examples/bruno/LocalAI Test Requests/image generation/Generate image.bru rename to .bruno/LocalAI Test Requests/image generation/Generate image.bru diff --git a/examples/bruno/LocalAI Test Requests/llm text/-completions.bru b/.bruno/LocalAI Test Requests/llm text/-completions.bru similarity index 100% rename from examples/bruno/LocalAI Test Requests/llm text/-completions.bru rename to .bruno/LocalAI Test Requests/llm text/-completions.bru diff --git a/examples/bruno/LocalAI Test Requests/llm text/-edits.bru b/.bruno/LocalAI Test Requests/llm text/-edits.bru similarity index 100% rename from examples/bruno/LocalAI Test Requests/llm text/-edits.bru rename to .bruno/LocalAI Test Requests/llm text/-edits.bru diff --git a/examples/bruno/LocalAI Test Requests/llm text/-embeddings.bru b/.bruno/LocalAI Test Requests/llm text/-embeddings.bru similarity index 100% rename from examples/bruno/LocalAI Test Requests/llm text/-embeddings.bru rename to .bruno/LocalAI Test Requests/llm text/-embeddings.bru diff --git a/examples/bruno/LocalAI Test Requests/llm text/chat/chat completion -simple- 1 message-.bru b/.bruno/LocalAI Test Requests/llm text/chat/chat completion -simple- 1 message-.bru similarity index 100% rename from examples/bruno/LocalAI Test Requests/llm text/chat/chat completion -simple- 1 message-.bru rename to .bruno/LocalAI Test Requests/llm text/chat/chat completion -simple- 1 message-.bru diff --git a/examples/bruno/LocalAI Test Requests/llm text/chat/chat-completions -long-.bru b/.bruno/LocalAI Test Requests/llm text/chat/chat-completions -long-.bru similarity index 100% rename from examples/bruno/LocalAI Test Requests/llm text/chat/chat-completions -long-.bru rename to .bruno/LocalAI Test Requests/llm text/chat/chat-completions -long-.bru diff --git a/examples/bruno/LocalAI Test Requests/llm text/chat/chat-completions -stream-.bru b/.bruno/LocalAI Test Requests/llm text/chat/chat-completions -stream-.bru similarity index 100% rename from examples/bruno/LocalAI Test Requests/llm text/chat/chat-completions -stream-.bru rename to .bruno/LocalAI Test Requests/llm text/chat/chat-completions -stream-.bru diff --git a/examples/bruno/LocalAI Test Requests/model gallery/add model gallery.bru b/.bruno/LocalAI Test Requests/model gallery/add model gallery.bru similarity index 100% rename from examples/bruno/LocalAI Test Requests/model gallery/add model gallery.bru rename to .bruno/LocalAI Test Requests/model gallery/add model gallery.bru diff --git a/examples/bruno/LocalAI Test Requests/model gallery/delete model gallery.bru b/.bruno/LocalAI Test Requests/model gallery/delete model gallery.bru similarity index 100% rename from examples/bruno/LocalAI Test Requests/model gallery/delete model gallery.bru rename to .bruno/LocalAI Test Requests/model gallery/delete model gallery.bru diff --git a/examples/bruno/LocalAI Test Requests/model gallery/list MODELS in galleries.bru b/.bruno/LocalAI Test Requests/model gallery/list MODELS in galleries.bru similarity index 100% rename from examples/bruno/LocalAI Test Requests/model gallery/list MODELS in galleries.bru rename to .bruno/LocalAI Test Requests/model gallery/list MODELS in galleries.bru diff --git a/examples/bruno/LocalAI Test Requests/model gallery/list model GALLERIES.bru b/.bruno/LocalAI Test Requests/model gallery/list model GALLERIES.bru similarity index 100% rename from examples/bruno/LocalAI Test Requests/model gallery/list model GALLERIES.bru rename to .bruno/LocalAI Test Requests/model gallery/list model GALLERIES.bru diff --git a/.bruno/LocalAI Test Requests/model gallery/model delete.bru b/.bruno/LocalAI Test Requests/model gallery/model delete.bru new file mode 100644 index 00000000..b320dae3 --- /dev/null +++ b/.bruno/LocalAI Test Requests/model gallery/model delete.bru @@ -0,0 +1,11 @@ +meta { + name: model delete + type: http + seq: 7 +} + +post { + url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries + body: none + auth: none +} diff --git a/examples/bruno/LocalAI Test Requests/model gallery/model gallery apply -gist-.bru b/.bruno/LocalAI Test Requests/model gallery/model gallery apply -gist-.bru similarity index 100% rename from examples/bruno/LocalAI Test Requests/model gallery/model gallery apply -gist-.bru rename to .bruno/LocalAI Test Requests/model gallery/model gallery apply -gist-.bru diff --git a/examples/bruno/LocalAI Test Requests/model gallery/model gallery apply.bru b/.bruno/LocalAI Test Requests/model gallery/model gallery apply.bru similarity index 100% rename from examples/bruno/LocalAI Test Requests/model gallery/model gallery apply.bru rename to .bruno/LocalAI Test Requests/model gallery/model gallery apply.bru diff --git a/.bruno/LocalAI Test Requests/transcription/gb1.ogg b/.bruno/LocalAI Test Requests/transcription/gb1.ogg new file mode 100644 index 00000000..df22d636 Binary files /dev/null and b/.bruno/LocalAI Test Requests/transcription/gb1.ogg differ diff --git a/.bruno/LocalAI Test Requests/transcription/transcribe.bru b/.bruno/LocalAI Test Requests/transcription/transcribe.bru new file mode 100644 index 00000000..831aad90 --- /dev/null +++ b/.bruno/LocalAI Test Requests/transcription/transcribe.bru @@ -0,0 +1,16 @@ +meta { + name: transcribe + type: http + seq: 1 +} + +post { + url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/audio/transcriptions + body: multipartForm + auth: none +} + +body:multipart-form { + file: @file(transcription/gb1.ogg) + model: whisper-1 +} diff --git a/examples/bruno/LocalAI Test Requests/tts/-tts.bru b/.bruno/LocalAI Test Requests/tts/-tts.bru similarity index 100% rename from examples/bruno/LocalAI Test Requests/tts/-tts.bru rename to .bruno/LocalAI Test Requests/tts/-tts.bru diff --git a/examples/bruno/LocalAI Test Requests/tts/musicgen.bru b/.bruno/LocalAI Test Requests/tts/musicgen.bru similarity index 86% rename from examples/bruno/LocalAI Test Requests/tts/musicgen.bru rename to .bruno/LocalAI Test Requests/tts/musicgen.bru index a720b8b1..900173eb 100644 --- a/examples/bruno/LocalAI Test Requests/tts/musicgen.bru +++ b/.bruno/LocalAI Test Requests/tts/musicgen.bru @@ -16,7 +16,7 @@ headers { body:json { { - "backend": "transformers-musicgen", + "backend": "transformers", "model": "facebook/musicgen-small", "input": "80s Synths playing Jazz" } diff --git a/.devcontainer-scripts/utils.sh b/.devcontainer-scripts/utils.sh index 98ac063c..8416d43d 100644 --- a/.devcontainer-scripts/utils.sh +++ b/.devcontainer-scripts/utils.sh @@ -9,6 +9,7 @@ # Param 2: email # config_user() { + echo "Configuring git for $1 <$2>" local gcn=$(git config --global user.name) if [ -z "${gcn}" ]; then echo "Setting up git user / remote" @@ -24,6 +25,7 @@ config_user() { # Param 2: remote url # config_remote() { + echo "Adding git remote and fetching $2 as $1" local gr=$(git remote -v | grep $1) if [ -z "${gr}" ]; then git remote add $1 $2 diff --git a/.devcontainer/docker-compose-devcontainer.yml b/.devcontainer/docker-compose-devcontainer.yml index 8795d64d..7ef22099 100644 --- a/.devcontainer/docker-compose-devcontainer.yml +++ b/.devcontainer/docker-compose-devcontainer.yml @@ -7,7 +7,7 @@ services: args: - FFMPEG=true - IMAGE_TYPE=extras - - GO_TAGS=stablediffusion p2p tts + - GO_TAGS=p2p tts env_file: - ../.env ports: diff --git a/.env b/.env index 9e5dbd79..ee8db74e 100644 --- a/.env +++ b/.env @@ -38,12 +38,12 @@ ## Uncomment and set to true to enable rebuilding from source # REBUILD=true -## Enable go tags, available: stablediffusion, tts -## stablediffusion: image generation with stablediffusion +## Enable go tags, available: p2p, tts +## p2p: enable distributed inferencing ## tts: enables text-to-speech with go-piper ## (requires REBUILD=true) # -# GO_TAGS=stablediffusion +# GO_TAGS=p2p ## Path where to store generated images # LOCALAI_IMAGE_PATH=/tmp/generated/images @@ -82,6 +82,15 @@ # Enable to allow p2p mode # LOCALAI_P2P=true +# Enable to use federated mode +# LOCALAI_FEDERATED=true + +# Enable to start federation server +# FEDERATED_SERVER=true + +# Define to use federation token +# TOKEN="" + ### Watchdog settings ### # Enables watchdog to kill backends that are inactive for too much time diff --git a/.gitattributes b/.gitattributes index dfdb8b77..ef774d4c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ *.sh text eol=lf +backend/cpp/llama/*.hpp linguist-vendored \ No newline at end of file diff --git a/.github/check_and_update.py b/.github/check_and_update.py index dcf1d04a..704b658e 100644 --- a/.github/check_and_update.py +++ b/.github/check_and_update.py @@ -29,9 +29,14 @@ def calculate_sha256(file_path): def manual_safety_check_hf(repo_id): scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan") scan = scanResponse.json() - if scan['hasUnsafeFile']: - return scan - return None + # Check if 'hasUnsafeFile' exists in the response + if 'hasUnsafeFile' in scan: + if scan['hasUnsafeFile']: + return scan + else: + return None + else: + return None download_type, repo_id_or_url = parse_uri(uri) diff --git a/.github/ci/modelslist.go b/.github/ci/modelslist.go index cdc31703..719cd094 100644 --- a/.github/ci/modelslist.go +++ b/.github/ci/modelslist.go @@ -6,6 +6,7 @@ import ( "io/ioutil" "os" + "github.com/microcosm-cc/bluemonday" "gopkg.in/yaml.v3" ) @@ -279,6 +280,12 @@ func main() { return } + // Ensure that all arbitrary text content is sanitized before display + for i, m := range models { + models[i].Name = bluemonday.StrictPolicy().Sanitize(m.Name) + models[i].Description = bluemonday.StrictPolicy().Sanitize(m.Description) + } + // render the template data := struct { Models []*GalleryModel diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 5016ebdb..570ac569 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -9,6 +9,8 @@ updates: directory: "/" schedule: interval: "weekly" + ignore: + - dependency-name: "github.com/mudler/LocalAI/pkg/grpc/proto" - package-ecosystem: "github-actions" # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.) directory: "/" @@ -79,14 +81,6 @@ updates: directory: "/backend/python/transformers" schedule: interval: "weekly" - - package-ecosystem: "pip" - directory: "/backend/python/transformers-musicgen" - schedule: - interval: "weekly" - - package-ecosystem: "pip" - directory: "/backend/python/vall-e-x" - schedule: - interval: "weekly" - package-ecosystem: "pip" directory: "/backend/python/vllm" schedule: diff --git a/.github/labeler.yml b/.github/labeler.yml index 687a90d1..7be4dec9 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -1,6 +1,15 @@ enhancements: - head-branch: ['^feature', 'feature'] +dependencies: +- any: + - changed-files: + - any-glob-to-any-file: 'Makefile' + - changed-files: + - any-glob-to-any-file: '*.mod' + - changed-files: + - any-glob-to-any-file: '*.sum' + kind/documentation: - any: - changed-files: diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml index c94a134d..092110df 100644 --- a/.github/workflows/bump_deps.yaml +++ b/.github/workflows/bump_deps.yaml @@ -12,23 +12,14 @@ jobs: - repository: "ggerganov/llama.cpp" variable: "CPPLLAMA_VERSION" branch: "master" - - repository: "go-skynet/go-ggml-transformers.cpp" - variable: "GOGGMLTRANSFORMERS_VERSION" - branch: "master" - - repository: "donomii/go-rwkv.cpp" - variable: "RWKV_VERSION" - branch: "main" - repository: "ggerganov/whisper.cpp" variable: "WHISPER_CPP_VERSION" branch: "master" - - repository: "go-skynet/go-bert.cpp" - variable: "BERT_VERSION" - branch: "master" - - repository: "go-skynet/bloomz.cpp" - variable: "BLOOMZ_VERSION" + - repository: "PABannier/bark.cpp" + variable: "BARKCPP_VERSION" branch: "main" - - repository: "mudler/go-ggllm.cpp" - variable: "GOGGLLM_VERSION" + - repository: "leejet/stable-diffusion.cpp" + variable: "STABLEDIFFUSION_GGML_VERSION" branch: "master" - repository: "mudler/go-stable-diffusion" variable: "STABLEDIFFUSION_VERSION" diff --git a/.github/workflows/checksum_checker.yaml b/.github/workflows/checksum_checker.yaml index 7b85ad35..13244334 100644 --- a/.github/workflows/checksum_checker.yaml +++ b/.github/workflows/checksum_checker.yaml @@ -23,7 +23,7 @@ jobs: sudo pip install --upgrade pip pip install huggingface_hub - name: 'Setup yq' - uses: dcarbone/install-yq-action@v1.1.1 + uses: dcarbone/install-yq-action@v1.3.1 with: version: 'v4.44.2' download-compressed: true diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml index 951e65e1..5bcd84f6 100644 --- a/.github/workflows/dependabot_auto.yml +++ b/.github/workflows/dependabot_auto.yml @@ -14,7 +14,7 @@ jobs: steps: - name: Dependabot metadata id: metadata - uses: dependabot/fetch-metadata@v2.2.0 + uses: dependabot/fetch-metadata@v2.3.0 with: github-token: "${{ secrets.GITHUB_TOKEN }}" skip-commit-verification: true diff --git a/.github/workflows/deploy-explorer.yaml b/.github/workflows/deploy-explorer.yaml index 7b5c0484..00d51322 100644 --- a/.github/workflows/deploy-explorer.yaml +++ b/.github/workflows/deploy-explorer.yaml @@ -33,7 +33,7 @@ jobs: run: | CGO_ENABLED=0 make build-api - name: rm - uses: appleboy/ssh-action@v1.0.3 + uses: appleboy/ssh-action@v1.2.0 with: host: ${{ secrets.EXPLORER_SSH_HOST }} username: ${{ secrets.EXPLORER_SSH_USERNAME }} @@ -53,7 +53,7 @@ jobs: rm: true target: ./local-ai - name: restarting - uses: appleboy/ssh-action@v1.0.3 + uses: appleboy/ssh-action@v1.2.0 with: host: ${{ secrets.EXPLORER_SSH_HOST }} username: ${{ secrets.EXPLORER_SSH_USERNAME }} diff --git a/.github/workflows/generate_intel_image.yaml b/.github/workflows/generate_intel_image.yaml index 0c2a7670..8283964c 100644 --- a/.github/workflows/generate_intel_image.yaml +++ b/.github/workflows/generate_intel_image.yaml @@ -15,7 +15,7 @@ jobs: strategy: matrix: include: - - base-image: intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 + - base-image: intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 runs-on: 'ubuntu-latest' platforms: 'linux/amd64' runs-on: ${{matrix.runs-on}} diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 395d7761..722d0f41 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -13,6 +13,78 @@ concurrency: cancel-in-progress: true jobs: + hipblas-jobs: + uses: ./.github/workflows/image_build.yml + with: + tag-latest: ${{ matrix.tag-latest }} + tag-suffix: ${{ matrix.tag-suffix }} + ffmpeg: ${{ matrix.ffmpeg }} + image-type: ${{ matrix.image-type }} + build-type: ${{ matrix.build-type }} + cuda-major-version: ${{ matrix.cuda-major-version }} + cuda-minor-version: ${{ matrix.cuda-minor-version }} + platforms: ${{ matrix.platforms }} + runs-on: ${{ matrix.runs-on }} + base-image: ${{ matrix.base-image }} + grpc-base-image: ${{ matrix.grpc-base-image }} + aio: ${{ matrix.aio }} + makeflags: ${{ matrix.makeflags }} + latest-image: ${{ matrix.latest-image }} + latest-image-aio: ${{ matrix.latest-image-aio }} + secrets: + dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} + dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} + quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} + quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} + strategy: + # Pushing with all jobs in parallel + # eats the bandwidth of all the nodes + max-parallel: 2 + matrix: + include: + - build-type: 'hipblas' + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-hipblas-ffmpeg' + ffmpeg: 'true' + image-type: 'extras' + aio: "-aio-gpu-hipblas" + base-image: "rocm/dev-ubuntu-22.04:6.1" + grpc-base-image: "ubuntu:22.04" + latest-image: 'latest-gpu-hipblas' + latest-image-aio: 'latest-aio-gpu-hipblas' + runs-on: 'arc-runner-set' + makeflags: "--jobs=3 --output-sync=target" + - build-type: 'hipblas' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-hipblas' + ffmpeg: 'false' + image-type: 'extras' + base-image: "rocm/dev-ubuntu-22.04:6.1" + grpc-base-image: "ubuntu:22.04" + runs-on: 'arc-runner-set' + makeflags: "--jobs=3 --output-sync=target" + - build-type: 'hipblas' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-hipblas-ffmpeg-core' + ffmpeg: 'true' + image-type: 'core' + base-image: "rocm/dev-ubuntu-22.04:6.1" + grpc-base-image: "ubuntu:22.04" + runs-on: 'arc-runner-set' + makeflags: "--jobs=3 --output-sync=target" + - build-type: 'hipblas' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: '-hipblas-core' + ffmpeg: 'false' + image-type: 'core' + base-image: "rocm/dev-ubuntu-22.04:6.1" + grpc-base-image: "ubuntu:22.04" + runs-on: 'arc-runner-set' + makeflags: "--jobs=3 --output-sync=target" self-hosted-jobs: uses: ./.github/workflows/image_build.yml with: @@ -39,7 +111,7 @@ jobs: strategy: # Pushing with all jobs in parallel # eats the bandwidth of all the nodes - max-parallel: ${{ github.event_name != 'pull_request' && 6 || 10 }} + max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }} matrix: include: # Extra images @@ -122,29 +194,6 @@ jobs: base-image: "ubuntu:22.04" runs-on: 'arc-runner-set' makeflags: "--jobs=3 --output-sync=target" - - build-type: 'hipblas' - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-hipblas-ffmpeg' - ffmpeg: 'true' - image-type: 'extras' - aio: "-aio-gpu-hipblas" - base-image: "rocm/dev-ubuntu-22.04:6.1" - grpc-base-image: "ubuntu:22.04" - latest-image: 'latest-gpu-hipblas' - latest-image-aio: 'latest-aio-gpu-hipblas' - runs-on: 'arc-runner-set' - makeflags: "--jobs=3 --output-sync=target" - - build-type: 'hipblas' - platforms: 'linux/amd64' - tag-latest: 'false' - tag-suffix: '-hipblas' - ffmpeg: 'false' - image-type: 'extras' - base-image: "rocm/dev-ubuntu-22.04:6.1" - grpc-base-image: "ubuntu:22.04" - runs-on: 'arc-runner-set' - makeflags: "--jobs=3 --output-sync=target" - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'auto' @@ -212,26 +261,6 @@ jobs: image-type: 'core' runs-on: 'arc-runner-set' makeflags: "--jobs=3 --output-sync=target" - - build-type: 'hipblas' - platforms: 'linux/amd64' - tag-latest: 'false' - tag-suffix: '-hipblas-ffmpeg-core' - ffmpeg: 'true' - image-type: 'core' - base-image: "rocm/dev-ubuntu-22.04:6.1" - grpc-base-image: "ubuntu:22.04" - runs-on: 'arc-runner-set' - makeflags: "--jobs=3 --output-sync=target" - - build-type: 'hipblas' - platforms: 'linux/amd64' - tag-latest: 'false' - tag-suffix: '-hipblas-core' - ffmpeg: 'false' - image-type: 'core' - base-image: "rocm/dev-ubuntu-22.04:6.1" - grpc-base-image: "ubuntu:22.04" - runs-on: 'arc-runner-set' - makeflags: "--jobs=3 --output-sync=target" core-image-build: uses: ./.github/workflows/image_build.yml @@ -251,6 +280,7 @@ jobs: makeflags: ${{ matrix.makeflags }} latest-image: ${{ matrix.latest-image }} latest-image-aio: ${{ matrix.latest-image-aio }} + skip-drivers: ${{ matrix.skip-drivers }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -272,6 +302,7 @@ jobs: latest-image: 'latest-cpu' latest-image-aio: 'latest-aio-cpu' makeflags: "--jobs=4 --output-sync=target" + skip-drivers: 'false' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -283,6 +314,7 @@ jobs: base-image: "ubuntu:22.04" runs-on: 'arc-runner-set' makeflags: "--jobs=4 --output-sync=target" + skip-drivers: 'false' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -294,6 +326,7 @@ jobs: base-image: "ubuntu:22.04" runs-on: 'arc-runner-set' makeflags: "--jobs=4 --output-sync=target" + skip-drivers: 'false' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -305,6 +338,7 @@ jobs: runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" makeflags: "--jobs=4 --output-sync=target" + skip-drivers: 'false' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -315,6 +349,7 @@ jobs: image-type: 'core' runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" + skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" - build-type: 'vulkan' platforms: 'linux/amd64' @@ -325,4 +360,45 @@ jobs: image-type: 'core' runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" + skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" + gh-runner: + uses: ./.github/workflows/image_build.yml + with: + tag-latest: ${{ matrix.tag-latest }} + tag-suffix: ${{ matrix.tag-suffix }} + ffmpeg: ${{ matrix.ffmpeg }} + image-type: ${{ matrix.image-type }} + build-type: ${{ matrix.build-type }} + cuda-major-version: ${{ matrix.cuda-major-version }} + cuda-minor-version: ${{ matrix.cuda-minor-version }} + platforms: ${{ matrix.platforms }} + runs-on: ${{ matrix.runs-on }} + aio: ${{ matrix.aio }} + base-image: ${{ matrix.base-image }} + grpc-base-image: ${{ matrix.grpc-base-image }} + makeflags: ${{ matrix.makeflags }} + latest-image: ${{ matrix.latest-image }} + latest-image-aio: ${{ matrix.latest-image-aio }} + skip-drivers: ${{ matrix.skip-drivers }} + secrets: + dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} + dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} + quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} + quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} + strategy: + matrix: + include: + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/arm64' + tag-latest: 'false' + tag-suffix: '-nvidia-l4t-arm64-core' + latest-image: 'latest-nvidia-l4t-arm64-core' + ffmpeg: 'true' + image-type: 'core' + base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + runs-on: 'ubuntu-24.04-arm' + makeflags: "--jobs=4 --output-sync=target" + skip-drivers: 'true' \ No newline at end of file diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 4a5735e5..9ad612b6 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -49,6 +49,10 @@ on: description: 'FFMPEG' default: '' type: string + skip-drivers: + description: 'Skip drivers by default' + default: 'false' + type: string image-type: description: 'Image type' default: '' @@ -234,6 +238,7 @@ jobs: GRPC_MAKEFLAGS=--jobs=4 --output-sync=target GRPC_VERSION=v1.65.0 MAKEFLAGS=${{ inputs.makeflags }} + SKIP_DRIVERS=${{ inputs.skip-drivers }} context: . file: ./Dockerfile cache-from: type=gha @@ -262,6 +267,7 @@ jobs: GRPC_MAKEFLAGS=--jobs=4 --output-sync=target GRPC_VERSION=v1.65.0 MAKEFLAGS=${{ inputs.makeflags }} + SKIP_DRIVERS=${{ inputs.skip-drivers }} context: . file: ./Dockerfile cache-from: type=gha diff --git a/.github/workflows/notify-models.yaml b/.github/workflows/notify-models.yaml index d6a7b210..b84e10e0 100644 --- a/.github/workflows/notify-models.yaml +++ b/.github/workflows/notify-models.yaml @@ -18,7 +18,7 @@ jobs: with: model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface:///file" # Check the PR diff using the current branch and the base branch of the PR - - uses: GrantBirki/git-diff-action@v2.7.0 + - uses: GrantBirki/git-diff-action@v2.8.0 id: git-diff-action with: json_diff_file_output: diff.json @@ -79,7 +79,7 @@ jobs: args: ${{ steps.summarize.outputs.message }} - name: Setup tmate session if fails if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3.18 + uses: mxschmitt/action-tmate@v3.19 with: detached: true connect-timeout-seconds: 180 @@ -99,7 +99,7 @@ jobs: docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done # Check the PR diff using the current branch and the base branch of the PR - - uses: GrantBirki/git-diff-action@v2.7.0 + - uses: GrantBirki/git-diff-action@v2.8.0 id: git-diff-action with: json_diff_file_output: diff.json @@ -161,7 +161,7 @@ jobs: TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} - name: Setup tmate session if fails if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3.18 + uses: mxschmitt/action-tmate@v3.19 with: detached: true connect-timeout-seconds: 180 diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index a1318b19..e133ecb6 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -123,7 +123,7 @@ jobs: release/* - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3.18 + uses: mxschmitt/action-tmate@v3.19 with: detached: true connect-timeout-seconds: 180 @@ -232,45 +232,12 @@ jobs: release/* - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3.18 + uses: mxschmitt/action-tmate@v3.19 with: detached: true connect-timeout-seconds: 180 limit-access-to-actor: true - build-stablediffusion: - runs-on: ubuntu-latest - steps: - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - uses: actions/setup-go@v5 - with: - go-version: '1.21.x' - cache: false - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl - go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af - go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 - - name: Build stablediffusion - run: | - export PATH=$PATH:$GOPATH/bin - make backend-assets/grpc/stablediffusion - mkdir -p release && cp backend-assets/grpc/stablediffusion release - env: - GO_TAGS: stablediffusion - - uses: actions/upload-artifact@v4 - with: - name: stablediffusion - path: release/ - - name: Release - uses: softprops/action-gh-release@v2 - if: startsWith(github.ref, 'refs/tags/') - with: - files: | - release/* + build-macOS-x86_64: runs-on: macos-13 @@ -308,7 +275,7 @@ jobs: release/* - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3.18 + uses: mxschmitt/action-tmate@v3.19 with: detached: true connect-timeout-seconds: 180 @@ -350,7 +317,7 @@ jobs: release/* - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3.18 + uses: mxschmitt/action-tmate@v3.19 with: detached: true connect-timeout-seconds: 180 diff --git a/.github/workflows/secscan.yaml b/.github/workflows/secscan.yaml index 08d7dfc6..228ac1d9 100644 --- a/.github/workflows/secscan.yaml +++ b/.github/workflows/secscan.yaml @@ -18,7 +18,7 @@ jobs: if: ${{ github.actor != 'dependabot[bot]' }} - name: Run Gosec Security Scanner if: ${{ github.actor != 'dependabot[bot]' }} - uses: securego/gosec@v2.21.2 + uses: securego/gosec@v2.22.0 with: # we let the report trigger content trigger a failure using the GitHub Security features. args: '-no-fail -fmt sarif -out results.sarif ./...' diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index 8b37b52d..7f2445c8 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -35,30 +35,6 @@ jobs: run: | make --jobs=5 --output-sync=target -C backend/python/transformers make --jobs=5 --output-sync=target -C backend/python/transformers test - - tests-sentencetransformers: - runs-on: ubuntu-latest - steps: - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install build-essential ffmpeg - # Install UV - curl -LsSf https://astral.sh/uv/install.sh | sh - sudo apt-get install -y ca-certificates cmake curl patch python3-pip - sudo apt-get install -y libopencv-dev - pip install --user --no-cache-dir grpcio-tools==1.64.1 - - - name: Test sentencetransformers - run: | - make --jobs=5 --output-sync=target -C backend/python/sentencetransformers - make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test - - tests-rerankers: runs-on: ubuntu-latest steps: @@ -102,71 +78,27 @@ jobs: make --jobs=5 --output-sync=target -C backend/python/diffusers make --jobs=5 --output-sync=target -C backend/python/diffusers test - tests-parler-tts: - runs-on: ubuntu-latest - steps: - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install build-essential ffmpeg - # Install UV - curl -LsSf https://astral.sh/uv/install.sh | sh - sudo apt-get install -y ca-certificates cmake curl patch python3-pip - sudo apt-get install -y libopencv-dev - pip install --user --no-cache-dir grpcio-tools==1.64.1 + # tests-transformers-musicgen: + # runs-on: ubuntu-latest + # steps: + # - name: Clone + # uses: actions/checkout@v4 + # with: + # submodules: true + # - name: Dependencies + # run: | + # sudo apt-get update + # sudo apt-get install build-essential ffmpeg + # # Install UV + # curl -LsSf https://astral.sh/uv/install.sh | sh + # sudo apt-get install -y ca-certificates cmake curl patch python3-pip + # sudo apt-get install -y libopencv-dev + # pip install --user --no-cache-dir grpcio-tools==1.64.1 - - name: Test parler-tts - run: | - make --jobs=5 --output-sync=target -C backend/python/parler-tts - make --jobs=5 --output-sync=target -C backend/python/parler-tts test - - tests-openvoice: - runs-on: ubuntu-latest - steps: - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install build-essential ffmpeg - # Install UV - curl -LsSf https://astral.sh/uv/install.sh | sh - sudo apt-get install -y ca-certificates cmake curl patch python3-pip - sudo apt-get install -y libopencv-dev - pip install --user --no-cache-dir grpcio-tools==1.64.1 - - - name: Test openvoice - run: | - make --jobs=5 --output-sync=target -C backend/python/openvoice - make --jobs=5 --output-sync=target -C backend/python/openvoice test - - tests-transformers-musicgen: - runs-on: ubuntu-latest - steps: - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install build-essential ffmpeg - # Install UV - curl -LsSf https://astral.sh/uv/install.sh | sh - sudo apt-get install -y ca-certificates cmake curl patch python3-pip - sudo apt-get install -y libopencv-dev - pip install --user --no-cache-dir grpcio-tools==1.64.1 - - - name: Test transformers-musicgen - run: | - make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen - make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test + # - name: Test transformers-musicgen + # run: | + # make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen + # make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test # tests-bark: # runs-on: ubuntu-latest @@ -253,26 +185,6 @@ jobs: # run: | # make --jobs=5 --output-sync=target -C backend/python/vllm # make --jobs=5 --output-sync=target -C backend/python/vllm test - tests-vallex: - runs-on: ubuntu-latest - steps: - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install build-essential ffmpeg - # Install UV - curl -LsSf https://astral.sh/uv/install.sh | sh - sudo apt-get install -y ca-certificates cmake curl patch python3-pip - sudo apt-get install -y libopencv-dev - pip install --user --no-cache-dir grpcio-tools==1.64.1 - - name: Test vall-e-x - run: | - make --jobs=5 --output-sync=target -C backend/python/vall-e-x - make --jobs=5 --output-sync=target -C backend/python/vall-e-x test tests-coqui: runs-on: ubuntu-latest diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2af3fd00..444c89fb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -100,15 +100,12 @@ jobs: # The python3-grpc-tools package in 22.04 is too old pip install --user grpcio-tools - sudo rm -rfv /usr/bin/conda || true - PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers + make -C backend/python/transformers # Pre-build piper before we start tests in order to have shared libraries in place make sources/go-piper && \ GO_TAGS="tts" make -C sources/go-piper piper.o && \ - sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \ - # Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn) - PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build + sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ env: CUDA_VERSION: 12-4 - name: Cache grpc @@ -130,10 +127,10 @@ jobs: cd grpc && cd cmake/build && sudo make --jobs 5 install - name: Test run: | - PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test + PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3.18 + uses: mxschmitt/action-tmate@v3.19 with: detached: true connect-timeout-seconds: 180 @@ -178,17 +175,26 @@ jobs: uses: actions/checkout@v4 with: submodules: true + - name: Dependencies + run: | + # Install protoc + curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \ + unzip -j -d /usr/local/bin protoc.zip bin/protoc && \ + rm protoc.zip + go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af + PATH="$PATH:$HOME/go/bin" make protogen-go - name: Build images run: | docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile . BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio - name: Test run: | - LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \ + PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \ make run-e2e-aio - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3.18 + uses: mxschmitt/action-tmate@v3.19 with: detached: true connect-timeout-seconds: 180 @@ -215,7 +221,7 @@ jobs: - name: Dependencies run: | brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm - pip install --user --no-cache-dir grpcio-tools==1.64.1 + pip install --user --no-cache-dir grpcio-tools - name: Test run: | export C_INCLUDE_PATH=/usr/local/include @@ -226,7 +232,7 @@ jobs: BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3.18 + uses: mxschmitt/action-tmate@v3.19 with: detached: true connect-timeout-seconds: 180 diff --git a/.gitignore b/.gitignore index 65eb9257..d821c435 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ /sources/ __pycache__/ *.a +*.o get-sources prepare-sources /backend/cpp/llama/grpc-server @@ -12,7 +13,6 @@ prepare-sources go-ggml-transformers go-gpt2 -go-rwkv whisper.cpp /bloomz go-bert diff --git a/.vscode/launch.json b/.vscode/launch.json index 50493421..f5e91508 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -26,7 +26,7 @@ "LOCALAI_P2P": "true", "LOCALAI_FEDERATED": "true" }, - "buildFlags": ["-tags", "stablediffusion p2p tts", "-v"], + "buildFlags": ["-tags", "p2p tts", "-v"], "envFile": "${workspaceFolder}/.env", "cwd": "${workspaceRoot}" } diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 593ad0ed..9fb20012 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -15,8 +15,6 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time - [Documentation](#documentation) - [Community and Communication](#community-and-communication) - - ## Getting Started ### Prerequisites @@ -54,7 +52,7 @@ If you find a bug, have a feature request, or encounter any issues, please check ## Coding Guidelines -- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like []`golangci-lint`](https://golangci-lint.run) can help you here. +- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like [`golangci-lint`](https://golangci-lint.run) can help you here. ## Testing @@ -84,5 +82,3 @@ We are welcome the contribution of the documents, please open new PR or create a - You can reach out via the Github issue tracker. - Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions) - Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy) - ---- diff --git a/Dockerfile b/Dockerfile index f08cb9a0..2f2bcafa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,25 +9,38 @@ FROM ${BASE_IMAGE} AS requirements-core USER root ARG GO_VERSION=1.22.6 +ARG CMAKE_VERSION=3.26.4 +ARG CMAKE_FROM_SOURCE=false ARG TARGETARCH ARG TARGETVARIANT ENV DEBIAN_FRONTEND=noninteractive -ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" - +ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh" RUN apt-get update && \ apt-get install -y --no-install-recommends \ build-essential \ ccache \ ca-certificates \ - cmake \ - curl \ + curl libssl-dev \ git \ unzip upx-ucl && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* +# Install CMake (the version in 22.04 is too old) +RUN <

+

+mudler%2FLocalAI | Trendshift +

+ > :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/) > -> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/go-skynet/LocalAI/tree/master/examples/) +> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) [![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai) @@ -56,41 +60,59 @@ curl https://localai.io/install.sh | sh Or run with docker: ```bash +# CPU only image: +docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu + +# Nvidia GPU: +docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12 + +# CPU and GPU image (bigger size): +docker run -ti --name local-ai -p 8080:8080 localai/localai:latest + +# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/) docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu -# Alternative images: -# - if you have an Nvidia GPU: -# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12 -# - without preconfigured models -# docker run -ti --name local-ai -p 8080:8080 localai/localai:latest -# - without preconfigured models for Nvidia GPUs -# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12 +``` + +To load models: + +```bash +# From the model gallery (see available models with `local-ai models list`, in the WebUI from the model tab, or visiting https://models.localai.io) +local-ai run llama-3.2-1b-instruct:q4_k_m +# Start LocalAI with the phi-2 model directly from huggingface +local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf +# Install and run a model from the Ollama OCI registry +local-ai run ollama://gemma:2b +# Run a model from a configuration file +local-ai run https://gist.githubusercontent.com/.../phi-2.yaml +# Install and run a model from a standard OCI registry (e.g., Docker Hub) +local-ai run oci://localai/phi-2:latest ``` [💻 Getting started](https://localai.io/basics/getting_started/index.html) -## 🔥🔥 Hot topics / Roadmap - -[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) +## 📰 Latest project news +- Jan 2025: LocalAI model release: https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3, SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603 +- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 ) +- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 ) +- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204 +- Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples) - Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io) -- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723 -- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io -- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628 +- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723. P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113 - May 2024: 🔥🔥 Decentralized P2P llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/ -- May 2024: 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334 -- May 2024: 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328 - May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324 -- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222 - April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121 -Hot topics (looking for contributors): +Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) -- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113 +## 🔥🔥 Hot topics (looking for help): + +- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729 +- Realtime API https://github.com/mudler/LocalAI/issues/3714 - WebUI improvements: https://github.com/mudler/LocalAI/issues/2156 - Backends v2: https://github.com/mudler/LocalAI/issues/1126 - Improving UX v2: https://github.com/mudler/LocalAI/issues/1373 - Assistant API: https://github.com/mudler/LocalAI/issues/1273 -- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999 - Vulkan: https://github.com/mudler/LocalAI/issues/1647 - Anthropic API: https://github.com/mudler/LocalAI/issues/1808 @@ -98,10 +120,10 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl ## 🚀 [Features](https://localai.io/features/) -- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table)) +- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table)) - 🗣 [Text to Audio](https://localai.io/features/text-to-audio/) - 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`) -- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation) +- 🎨 [Image generation](https://localai.io/features/image-generation) - 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/) - 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/) - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/) @@ -109,6 +131,7 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl - 🥽 [Vision API](https://localai.io/features/gpt-vision/) - 📈 [Reranker API](https://localai.io/features/reranker/) - 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/) +- 🔊 Voice activity detection (Silero-VAD support) - 🌍 Integrated WebUI! ## 💻 Usage @@ -131,6 +154,7 @@ Model galleries Other: - Helm chart https://github.com/go-skynet/helm-charts - VSCode extension https://github.com/badgooooor/localai-vscode-plugin +- Langchain: https://python.langchain.com/docs/integrations/providers/localai/ - Terminal utility https://github.com/djcopley/ShellOracle - Local Smart assistant https://github.com/mudler/LocalAGI - Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision @@ -138,6 +162,9 @@ Other: - Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack - Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot - Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot +- Another Telegram Bot https://github.com/JackBekket/Hellper +- Auto-documentation https://github.com/JackBekket/Reflexia +- Github bot which answer on issues, with code and documentation as context https://github.com/JackBekket/GitHelper - Github Actions: https://github.com/marketplace/actions/start-localai - Examples: https://github.com/mudler/LocalAI/tree/master/examples/ @@ -212,7 +239,6 @@ LocalAI couldn't have been built without the help of great software already avai - https://github.com/antimatter15/alpaca.cpp - https://github.com/EdVince/Stable-Diffusion-NCNN - https://github.com/ggerganov/whisper.cpp -- https://github.com/saharNooby/rwkv.cpp - https://github.com/rhasspy/piper ## 🤗 Contributors diff --git a/aio/cpu/embeddings.yaml b/aio/cpu/embeddings.yaml index 8576746f..9aa845b0 100644 --- a/aio/cpu/embeddings.yaml +++ b/aio/cpu/embeddings.yaml @@ -1,7 +1,7 @@ name: text-embedding-ada-002 -backend: bert-embeddings +embeddings: true parameters: - model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin + model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf usage: | You can test this model with curl like this: diff --git a/aio/cpu/image-gen.yaml b/aio/cpu/image-gen.yaml index 9de88a3f..ef374572 100644 --- a/aio/cpu/image-gen.yaml +++ b/aio/cpu/image-gen.yaml @@ -1,56 +1,17 @@ name: stablediffusion -backend: stablediffusion +backend: stablediffusion-ggml +cfg_scale: 4.5 + +options: +- sampler:euler parameters: - model: stablediffusion_assets - -license: "BSD-3" -urls: -- https://github.com/EdVince/Stable-Diffusion-NCNN -- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE - -description: | - Stable Diffusion in NCNN with c++, supported txt2img and img2img + model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf +step: 25 download_files: -- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param" - sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param" -- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param" - sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param" -- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param" - sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param" -- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin" - sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa" - uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin" -- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin" - sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd" - uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin" -- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin" - sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6" - uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin" -- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param" - sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param" -- filename: "stablediffusion_assets/log_sigmas.bin" - sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac" - uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin" -- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param" - sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param" -- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param" - sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param" -- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param" - sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param" -- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin" - sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3" - uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin" -- filename: "stablediffusion_assets/vocab.txt" - sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d" - uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt" +- filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf" + sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f" + uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf" usage: | curl http://localhost:8080/v1/images/generations \ diff --git a/aio/cpu/vision.yaml b/aio/cpu/vision.yaml index 3b466d37..4052fa39 100644 --- a/aio/cpu/vision.yaml +++ b/aio/cpu/vision.yaml @@ -2,7 +2,7 @@ backend: llama-cpp context_size: 4096 f16: true mmap: true -name: gpt-4-vision-preview +name: gpt-4o roles: user: "USER:" diff --git a/aio/gpu-8g/vision.yaml b/aio/gpu-8g/vision.yaml index db039279..4f5e10b3 100644 --- a/aio/gpu-8g/vision.yaml +++ b/aio/gpu-8g/vision.yaml @@ -2,7 +2,7 @@ backend: llama-cpp context_size: 4096 f16: true mmap: true -name: gpt-4-vision-preview +name: gpt-4o roles: user: "USER:" diff --git a/aio/intel/vision.yaml b/aio/intel/vision.yaml index 52843162..37067362 100644 --- a/aio/intel/vision.yaml +++ b/aio/intel/vision.yaml @@ -2,7 +2,7 @@ backend: llama-cpp context_size: 4096 mmap: false f16: false -name: gpt-4-vision-preview +name: gpt-4o roles: user: "USER:" diff --git a/backend/backend.proto b/backend/backend.proto index 4a8f31a9..bd75adc5 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -26,6 +26,21 @@ service Backend { rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {} rpc Rerank(RerankRequest) returns (RerankResult) {} + + rpc GetMetrics(MetricsRequest) returns (MetricsResponse); + + rpc VAD(VADRequest) returns (VADResponse) {} +} + +// Define the empty request +message MetricsRequest {} + +message MetricsResponse { + int32 slot_id = 1; + string prompt_json_for_slot = 2; // Stores the prompt as a JSON string. + float tokens_per_second = 3; + int32 tokens_generated = 4; + int32 prompt_tokens_processed = 5; } message RerankRequest { @@ -134,6 +149,9 @@ message PredictOptions { repeated string Images = 42; bool UseTokenizerTemplate = 43; repeated Message Messages = 44; + repeated string Videos = 45; + repeated string Audios = 46; + string CorrelationId = 47; } // The response message containing the result @@ -141,6 +159,13 @@ message Reply { bytes message = 1; int32 tokens = 2; int32 prompt_tokens = 3; + double timing_prompt_processing = 4; + double timing_token_generation = 5; +} + +message GrammarTrigger { + string word = 1; + bool at_start = 2; } message ModelOptions { @@ -203,6 +228,7 @@ message ModelOptions { int32 SwapSpace = 53; int32 MaxModelLen = 54; int32 TensorParallelSize = 55; + string LoadFormat = 58; string MMProj = 41; @@ -216,6 +242,18 @@ message ModelOptions { bool FlashAttention = 56; bool NoKVOffload = 57; + + string ModelPath = 59; + + repeated string LoraAdapters = 60; + repeated float LoraScales = 61; + + repeated string Options = 62; + + string CacheTypeKey = 63; + string CacheTypeValue = 64; + + repeated GrammarTrigger GrammarTriggers = 65; } message Result { @@ -271,6 +309,19 @@ message TTSRequest { optional string language = 5; } +message VADRequest { + repeated float audio = 1; +} + +message VADSegment { + float start = 1; + float end = 2; +} + +message VADResponse { + repeated VADSegment segments = 1; +} + message SoundGenerationRequest { string text = 1; string model = 2; @@ -306,4 +357,4 @@ message StatusResponse { message Message { string role = 1; string content = 2; -} \ No newline at end of file +} diff --git a/backend/cpp/llama/Makefile b/backend/cpp/llama/Makefile index 176cace6..17f55003 100644 --- a/backend/cpp/llama/Makefile +++ b/backend/cpp/llama/Makefile @@ -22,7 +22,7 @@ else ifeq ($(BUILD_TYPE),clblas) CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ else ifeq ($(BUILD_TYPE),hipblas) - CMAKE_ARGS+=-DGGML_HIPBLAS=ON + CMAKE_ARGS+=-DGGML_HIP=ON # If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation # But if it's OSX without metal, disable it here else ifeq ($(OS),Darwin) @@ -30,9 +30,7 @@ else ifeq ($(OS),Darwin) CMAKE_ARGS+=-DGGML_METAL=OFF else CMAKE_ARGS+=-DGGML_METAL=ON -# Until this is tested properly, we disable embedded metal file -# as we already embed it as part of the LocalAI assets - CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=OFF + CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON TARGET+=--target ggml-metal endif endif diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 56d59d21..4daf84c6 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -113,7 +113,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end) std::string ret; for (; begin != end; ++begin) { - ret += llama_token_to_piece(ctx, *begin); + ret += common_token_to_piece(ctx, *begin); } return ret; } @@ -121,7 +121,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end) // format incomplete utf-8 multibyte character for output static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token) { - std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token); + std::string out = token == -1 ? "" : common_token_to_piece(ctx, token); // if the size is 1 and first bit is 1, meaning it's a partial character // (size > 1 meaning it's already a known token) if (out.size() == 1 && (out[0] & 0x80) == 0x80) @@ -134,6 +134,32 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c return out; } +// Adds an RPC server +// https://github.com/ggerganov/llama.cpp/compare/4dbc8b9cb71876e005724f4e8f73a3544646bcf5..3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6 +static void add_rpc_devices(std::string servers) { + auto rpc_servers = string_split(servers, ','); + if (rpc_servers.empty()) { + throw std::invalid_argument("no RPC servers specified"); + } + ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name("RPC"); + if (!rpc_reg) { + throw std::invalid_argument("failed to find RPC backend"); + } + typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t)(const char * endpoint); + ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device"); + if (!ggml_backend_rpc_add_device_fn) { + throw std::invalid_argument("failed to find RPC device add function"); + } + for (const auto & server : rpc_servers) { + ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str()); + if (dev) { + ggml_backend_device_register(dev); + } else { + throw std::invalid_argument("failed to register RPC device"); + } + } +} + // convert a vector of completion_token_output to json static json probs_vector_to_json(const llama_context *ctx, const std::vector &probs) { @@ -203,8 +229,8 @@ struct llama_client_slot std::string stopping_word; // sampling - struct gpt_sampler_params sparams; - gpt_sampler *ctx_sampling = nullptr; + struct common_params_sampling sparams; + common_sampler *ctx_sampling = nullptr; int32_t ga_i = 0; // group-attention state int32_t ga_n = 1; // group-attention factor @@ -257,7 +283,7 @@ struct llama_client_slot images.clear(); } - bool has_budget(gpt_params &global_params) { + bool has_budget(common_params &global_params) { if (params.n_predict == -1 && global_params.n_predict == -1) { return true; // limitless @@ -391,14 +417,48 @@ struct llama_metrics { } }; +struct llava_embd_batch { + std::vector pos; + std::vector n_seq_id; + std::vector seq_id_0; + std::vector seq_ids; + std::vector logits; + llama_batch batch; + llava_embd_batch(float * embd, int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) { + pos .resize(n_tokens); + n_seq_id.resize(n_tokens); + seq_ids .resize(n_tokens + 1); + logits .resize(n_tokens); + seq_id_0.resize(1); + seq_id_0[0] = seq_id; + seq_ids [n_tokens] = nullptr; + batch = { + /*n_tokens =*/ n_tokens, + /*tokens =*/ nullptr, + /*embd =*/ embd, + /*pos =*/ pos.data(), + /*n_seq_id =*/ n_seq_id.data(), + /*seq_id =*/ seq_ids.data(), + /*logits =*/ logits.data(), + }; + for (int i = 0; i < n_tokens; i++) { + batch.pos [i] = pos_0 + i; + batch.n_seq_id[i] = 1; + batch.seq_id [i] = seq_id_0.data(); + batch.logits [i] = false; + } + } +}; + struct llama_server_context { llama_model *model = nullptr; llama_context *ctx = nullptr; + const llama_vocab * vocab = nullptr; clip_ctx *clp_ctx = nullptr; - gpt_params params; + common_params params; llama_batch batch; @@ -406,6 +466,10 @@ struct llama_server_context bool clean_kv_cache = true; bool all_slots_are_idle = false; bool add_bos_token = true; + bool has_eos_token = true; + + bool grammar_lazy = false; + std::vector grammar_trigger_words; int32_t n_ctx; // total context for all clients / slots @@ -441,7 +505,7 @@ struct llama_server_context } } - bool load_model(const gpt_params ¶ms_) + bool load_model(const common_params ¶ms_) { params = params_; if (!params.mmproj.empty()) { @@ -458,9 +522,9 @@ struct llama_server_context } } - llama_init_result llama_init = llama_init_from_gpt_params(params); - model = llama_init.model; - ctx = llama_init.context; + common_init_result common_init = common_init_from_params(params); + model = common_init.model.release(); + ctx = common_init.context.release(); if (model == nullptr) { LOG_ERR("unable to load model: %s", params.model.c_str()); @@ -469,7 +533,7 @@ struct llama_server_context if (multimodal) { const int n_embd_clip = clip_n_mmproj_embd(clp_ctx); - const int n_embd_llm = llama_n_embd(model); + const int n_embd_llm = llama_model_n_embd(model); if (n_embd_clip != n_embd_llm) { LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm); llama_free(ctx); @@ -478,21 +542,23 @@ struct llama_server_context } } + vocab = llama_model_get_vocab(model); n_ctx = llama_n_ctx(ctx); - add_bos_token = llama_add_bos_token(model); + add_bos_token = llama_vocab_get_add_bos(vocab); + has_eos_token = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL; return true; } - void validate_model_chat_template(server_params & sparams) { - llama_chat_message chat[] = {{"user", "test"}}; - std::vector buf(1); - int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size()); - if (res < 0) { - LOG_ERR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", __func__); - sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template + llama_client_slot* get_active_slot() { + for (llama_client_slot& slot : slots) { + // Check if the slot is currently processing + if (slot.is_processing()) { + return &slot; // Return the active slot + } } + return nullptr; // No active slot found } void initialize() { @@ -568,12 +634,12 @@ struct llama_server_context std::vector p; if (first) { - p = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL); + p = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL); first = false; } else { - p = ::llama_tokenize(ctx, s, false, TMP_FORCE_SPECIAL); + p = common_tokenize(ctx, s, false, TMP_FORCE_SPECIAL); } prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end()); } @@ -590,7 +656,7 @@ struct llama_server_context else { auto s = json_prompt.template get(); - prompt_tokens = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL); + prompt_tokens = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL); } return prompt_tokens; @@ -619,7 +685,7 @@ struct llama_server_context bool launch_slot_with_data(llama_client_slot* &slot, json data) { slot_params default_params; - gpt_sampler_params default_sparams; + common_params_sampling default_sparams; slot->params.stream = json_value(data, "stream", false); slot->params.cache_prompt = json_value(data, "cache_prompt", false); @@ -627,7 +693,6 @@ struct llama_server_context slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k); slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p); slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p); - slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z); slot->sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p); slot->sparams.temp = json_value(data, "temperature", default_sparams.temp); slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range); @@ -639,12 +704,13 @@ struct llama_server_context slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat); slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau); slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta); - slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl); slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep); slot->sparams.seed = json_value(data, "seed", default_sparams.seed); slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar); slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs); slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep); + slot->sparams.grammar_trigger_words = grammar_trigger_words; + slot->sparams.grammar_lazy = grammar_lazy; if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) { // Might be better to reject the request with a 400 ? @@ -684,8 +750,8 @@ struct llama_server_context slot->prompt = ""; } - if (json_value(data, "ignore_eos", false)) { - slot->sparams.logit_bias.push_back({llama_token_eos(model), -INFINITY}); + if (json_value(data, "ignore_eos", false) && has_eos_token) { + slot->sparams.logit_bias.push_back({llama_vocab_eos(vocab), -INFINITY}); } /* slot->sparams.penalty_prompt_tokens.clear(); @@ -724,13 +790,13 @@ struct llama_server_context } } */ - slot->sparams.logit_bias.clear(); const auto &logit_bias = data.find("logit_bias"); if (logit_bias != data.end() && logit_bias->is_array()) { - const int n_vocab = llama_n_vocab(model); + const llama_vocab * vocab = llama_model_get_vocab(model); + const int n_vocab = llama_vocab_n_tokens(vocab); for (const auto &el : *logit_bias) { if (el.is_array() && el.size() == 2) @@ -759,7 +825,7 @@ struct llama_server_context } else if (el[0].is_string()) { - auto toks = llama_tokenize(model, el[0].get(), false); + auto toks = common_tokenize(vocab, el[0].get(), false); for (auto tok : toks) { slot->sparams.logit_bias.push_back({tok, bias}); @@ -791,7 +857,7 @@ struct llama_server_context sampler_names.emplace_back(name); } } - slot->sparams.samplers = gpt_sampler_types_from_names(sampler_names, false); + slot->sparams.samplers = common_sampler_types_from_names(sampler_names, false); } else { @@ -875,9 +941,9 @@ struct llama_server_context if (slot->ctx_sampling != nullptr) { - gpt_sampler_free(slot->ctx_sampling); + common_sampler_free(slot->ctx_sampling); } - slot->ctx_sampling = gpt_sampler_init(model, slot->sparams); + slot->ctx_sampling = common_sampler_init(model, slot->sparams); //llama_set_rng_seed(ctx, slot->params.seed); slot->command = LOAD_PROMPT; @@ -904,13 +970,13 @@ struct llama_server_context system_tokens.clear(); if (!system_prompt.empty()) { - system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token); + system_tokens = common_tokenize(ctx, system_prompt, add_bos_token); - llama_batch_clear(batch); + common_batch_clear(batch); for (int i = 0; i < (int)system_tokens.size(); ++i) { - llama_batch_add(batch, system_tokens[i], i, { 0 }, false); + common_batch_add(batch, system_tokens[i], i, { 0 }, false); } for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += params.n_batch) @@ -924,7 +990,6 @@ struct llama_server_context batch.n_seq_id + i, batch.seq_id + i, batch.logits + i, - 0, 0, 0, // unused }; if (llama_decode(ctx, batch_view) != 0) { @@ -999,7 +1064,7 @@ struct llama_server_context bool process_token(completion_token_output &result, llama_client_slot &slot) { // remember which tokens were sampled - used for repetition penalties during sampling - const std::string token_str = llama_token_to_piece(ctx, result.tok); + const std::string token_str = common_token_to_piece(ctx, result.tok); slot.sampled = result.tok; // search stop word and delete it @@ -1090,7 +1155,7 @@ struct llama_server_context slot.has_next_token = false; } - if (result.tok == llama_token_eos(model)) + if (result.tok == llama_vocab_eos(vocab) || llama_vocab_is_eog(vocab, result.tok)) { slot.stopped_eos = true; slot.has_next_token = false; @@ -1150,7 +1215,7 @@ struct llama_server_context samplers.reserve(slot.sparams.samplers.size()); for (const auto & sampler : slot.sparams.samplers) { - samplers.emplace_back(gpt_sampler_type_to_str(sampler)); + samplers.emplace_back(common_sampler_type_to_str(sampler)); } return json { @@ -1164,7 +1229,6 @@ struct llama_server_context {"top_k", slot.sparams.top_k}, {"top_p", slot.sparams.top_p}, {"min_p", slot.sparams.min_p}, - {"tfs_z", slot.sparams.tfs_z}, {"typical_p", slot.sparams.typ_p}, {"repeat_last_n", slot.sparams.penalty_last_n}, {"repeat_penalty", slot.sparams.penalty_repeat}, @@ -1173,13 +1237,12 @@ struct llama_server_context {"mirostat", slot.sparams.mirostat}, {"mirostat_tau", slot.sparams.mirostat_tau}, {"mirostat_eta", slot.sparams.mirostat_eta}, - {"penalize_nl", slot.sparams.penalize_nl}, {"stop", slot.params.antiprompt}, {"n_predict", slot.params.n_predict}, {"n_keep", params.n_keep}, {"ignore_eos", slot.sparams.ignore_eos}, {"stream", slot.params.stream}, - // {"logit_bias", slot.sparams.logit_bias}, + // {"logit_bias", slot.sparams.logit_bias}, {"n_probs", slot.sparams.n_probs}, {"min_keep", slot.sparams.min_keep}, {"grammar", slot.sparams.grammar}, @@ -1206,7 +1269,7 @@ struct llama_server_context if (slot.sparams.n_probs > 0) { std::vector probs_output = {}; - const std::vector to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false); + const std::vector to_send_toks = common_tokenize(ctx, tkn.text_to_send, false); size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size()); size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size()); if (probs_pos < probs_stop_pos) @@ -1258,7 +1321,7 @@ struct llama_server_context std::vector probs = {}; if (!slot.params.stream && slot.stopped_word) { - const std::vector stop_word_toks = llama_tokenize(ctx, slot.stopping_word, false); + const std::vector stop_word_toks = common_tokenize(ctx, slot.stopping_word, false); probs = std::vector(slot.generated_token_probs.begin(), slot.generated_token_probs.end() - stop_word_toks.size()); } else @@ -1287,7 +1350,7 @@ struct llama_server_context res.error = false; res.stop = true; - const int n_embd = llama_n_embd(model); + const int n_embd = llama_model_n_embd(model); if (!params.embedding) { LOG_WARNING("embedding disabled", { @@ -1369,7 +1432,6 @@ struct llama_server_context batch.n_seq_id + i, batch.seq_id + i, batch.logits + i, - 0, 0, 0, // unused }; if (llama_decode(ctx, batch_view)) { @@ -1387,9 +1449,10 @@ struct llama_server_context n_eval = n_batch; } - const int n_embd = llama_n_embd(model); - llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, }; - if (llama_decode(ctx, batch_img)) + const int n_embd = llama_model_n_embd(model); + float * embd = img.image_embedding + i * n_embd; + llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0); + if (llama_decode(ctx, llava_batch.batch)) { LOG("%s : failed to eval image\n", __func__); return false; @@ -1398,7 +1461,7 @@ struct llama_server_context } image_idx++; - llama_batch_clear(batch); + common_batch_clear(batch); // append prefix of next image const auto json_prompt = (image_idx >= (int) slot.images.size()) ? @@ -1408,7 +1471,7 @@ struct llama_server_context std::vector append_tokens = tokenize(json_prompt, false); // has next image for (int i = 0; i < (int) append_tokens.size(); ++i) { - llama_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true); + common_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true); slot.n_past += 1; } } @@ -1540,7 +1603,7 @@ struct llama_server_context update_system_prompt(); } - llama_batch_clear(batch); + common_batch_clear(batch); if (all_slots_are_idle) { @@ -1618,7 +1681,7 @@ struct llama_server_context // TODO: we always have to take into account the "system_tokens" // this is not great and needs to be improved somehow - llama_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true); + common_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true); slot.n_past += 1; } @@ -1667,11 +1730,11 @@ struct llama_server_context suffix_tokens.erase(suffix_tokens.begin()); } - prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(model)); - prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(model)); // always add BOS - prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(model)); + prefix_tokens.insert(prefix_tokens.begin(), llama_vocab_fim_pre(vocab)); + prefix_tokens.insert(prefix_tokens.begin(), llama_vocab_bos(vocab)); // always add BOS + prefix_tokens.insert(prefix_tokens.end(), llama_vocab_fim_suf(vocab)); prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end()); - prefix_tokens.push_back(llama_token_middle(model)); + prefix_tokens.push_back(llama_vocab_fim_mid(vocab)); prompt_tokens = prefix_tokens; } else @@ -1712,7 +1775,7 @@ struct llama_server_context if (!slot.params.cache_prompt) { - gpt_sampler_reset(slot.ctx_sampling); + common_sampler_reset(slot.ctx_sampling); slot.n_past = 0; slot.n_past_se = 0; @@ -1724,7 +1787,7 @@ struct llama_server_context // push the prompt into the sampling context (do not apply grammar) for (auto &token : prompt_tokens) { - gpt_sampler_accept(slot.ctx_sampling, token, false); + common_sampler_accept(slot.ctx_sampling, token, false); } slot.n_past = common_part(slot.cache_tokens, prompt_tokens); @@ -1816,7 +1879,7 @@ struct llama_server_context ga_i += ga_w/ga_n; } } - llama_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, {slot.id }, false); + common_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, {slot.id }, false); slot_npast++; } @@ -1894,7 +1957,6 @@ struct llama_server_context batch.n_seq_id + i, batch.seq_id + i, batch.logits + i, - 0, 0, 0, // unused }; const int ret = llama_decode(ctx, batch_view); @@ -1933,9 +1995,9 @@ struct llama_server_context } completion_token_output result; - const llama_token id = gpt_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i); + const llama_token id = common_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i); - gpt_sampler_accept(slot.ctx_sampling, id, true); + common_sampler_accept(slot.ctx_sampling, id, true); slot.n_decoded += 1; if (slot.n_decoded == 1) @@ -1946,7 +2008,7 @@ struct llama_server_context } result.tok = id; - const auto * cur_p = gpt_sampler_get_candidates(slot.ctx_sampling); + const auto * cur_p = common_sampler_get_candidates(slot.ctx_sampling); for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) { result.probs.push_back({ @@ -1999,7 +2061,7 @@ static json format_partial_response( struct token_translator { llama_context * ctx; - std::string operator()(llama_token tok) const { return llama_token_to_piece(ctx, tok); } + std::string operator()(llama_token tok) const { return common_token_to_piece(ctx, tok); } std::string operator()(const completion_token_output &cto) const { return (*this)(cto.tok); } }; @@ -2064,7 +2126,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama // slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict); // slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k); // slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p); - // slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z); // slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p); // slot->sparams.temp = json_value(data, "temperature", default_sparams.temp); // slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n); @@ -2074,7 +2135,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama // slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat); // slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau); // slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta); - // slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl); // slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep); // slot->params.seed = json_value(data, "seed", default_params.seed); // slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar); @@ -2088,7 +2148,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama data["n_predict"] = predict->tokens() == 0 ? -1 : predict->tokens(); data["top_k"] = predict->topk(); data["top_p"] = predict->topp(); - data["tfs_z"] = predict->tailfreesamplingz(); data["typical_p"] = predict->typicalp(); data["temperature"] = predict->temperature(); data["repeat_last_n"] = predict->repeat(); @@ -2098,7 +2157,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama data["mirostat"] = predict->mirostat(); data["mirostat_tau"] = predict->mirostattau(); data["mirostat_eta"] = predict->mirostateta(); - data["penalize_nl"] = predict->penalizenl(); data["n_keep"] = predict->nkeep(); data["seed"] = predict->seed(); data["grammar"] = predict->grammar(); @@ -2106,6 +2164,9 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama data["ignore_eos"] = predict->ignoreeos(); data["embeddings"] = predict->embeddings(); + // Add the correlationid to json data + data["correlation_id"] = predict->correlationid(); + // for each image in the request, add the image data // for (int i = 0; i < predict->images_size(); i++) { @@ -2132,7 +2193,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama // llama.params.n_predict = predict->tokens() == 0 ? -1 : predict->tokens(); // llama.params.sparams.top_k = predict->topk(); // llama.params.sparams.top_p = predict->topp(); -// llama.params.sparams.tfs_z = predict->tailfreesamplingz(); // llama.params.sparams.typical_p = predict->typicalp(); // llama.params.sparams.penalty_last_n = predict->repeat(); // llama.params.sparams.temp = predict->temperature(); @@ -2142,7 +2202,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama // llama.params.sparams.mirostat = predict->mirostat(); // llama.params.sparams.mirostat_tau = predict->mirostattau(); // llama.params.sparams.mirostat_eta = predict->mirostateta(); -// llama.params.sparams.penalize_nl = predict->penalizenl(); // llama.params.n_keep = predict->nkeep(); // llama.params.seed = predict->seed(); // llama.params.sparams.grammar = predict->grammar(); @@ -2189,8 +2248,37 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama // } // } +const std::vector kv_cache_types = { + GGML_TYPE_F32, + GGML_TYPE_F16, + GGML_TYPE_BF16, + GGML_TYPE_Q8_0, + GGML_TYPE_Q4_0, + GGML_TYPE_Q4_1, + GGML_TYPE_IQ4_NL, + GGML_TYPE_Q5_0, + GGML_TYPE_Q5_1, +}; + +static ggml_type kv_cache_type_from_str(const std::string & s) { + for (const auto & type : kv_cache_types) { + if (ggml_type_name(type) == s) { + return type; + } + } + throw std::runtime_error("Unsupported cache type: " + s); +} + +static std::string get_all_kv_cache_types() { + std::ostringstream msg; + for (const auto & type : kv_cache_types) { + msg << ggml_type_name(type) << (&type == &kv_cache_types.back() ? "" : ", "); + } + return msg.str(); +} + static void params_parse(const backend::ModelOptions* request, - gpt_params & params) { + common_params & params) { // this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809 @@ -2202,6 +2290,12 @@ static void params_parse(const backend::ModelOptions* request, } // params.model_alias ?? params.model_alias = request->modelfile(); + if (!request->cachetypekey().empty()) { + params.cache_type_k = kv_cache_type_from_str(request->cachetypekey()); + } + if (!request->cachetypevalue().empty()) { + params.cache_type_v = kv_cache_type_from_str(request->cachetypevalue()); + } params.n_ctx = request->contextsize(); //params.memory_f16 = request->f16memory(); params.cpuparams.n_threads = request->threads(); @@ -2219,7 +2313,7 @@ static void params_parse(const backend::ModelOptions* request, const char *llama_grpc_servers = std::getenv("LLAMACPP_GRPC_SERVERS"); if (llama_grpc_servers != NULL) { - params.rpc_servers = std::string(llama_grpc_servers); + add_rpc_devices(std::string(llama_grpc_servers)); } // TODO: Add yarn @@ -2260,6 +2354,7 @@ static void params_parse(const backend::ModelOptions* request, params.use_mmap = request->mmap(); params.flash_attn = request->flashattention(); params.no_kv_offload = request->nokvoffload(); + params.ctx_shift = false; // We control context-shifting in any case (and we disable it as it could just lead to infinite loops) params.embedding = request->embeddings(); @@ -2284,6 +2379,21 @@ static void params_parse(const backend::ModelOptions* request, if ( request->ropefreqscale() != 0.0f ) { params.rope_freq_scale = request->ropefreqscale(); } + + if (request->grammartriggers_size() > 0) { + LOG_INFO("configuring grammar triggers", {}); + llama.grammar_lazy = true; + for (int i = 0; i < request->grammartriggers_size(); i++) { + common_grammar_trigger trigger; + trigger.word = request->grammartriggers(i).word(); + trigger.at_start = request->grammartriggers(i).at_start(); + llama.grammar_trigger_words.push_back(trigger); + LOG_INFO("grammar trigger", { + { "word", trigger.word }, + { "at_start", trigger.at_start } + }); + } + } } @@ -2298,7 +2408,7 @@ public: grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) { // Implement LoadModel RPC - gpt_params params; + common_params params; params_parse(request, params); llama_backend_init(); @@ -2344,6 +2454,18 @@ public: int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0); reply.set_prompt_tokens(tokens_evaluated); + if (result.result_json.contains("timings")) { + double timing_prompt_processing = result.result_json.at("timings").value("prompt_ms", 0.0); + reply.set_timing_prompt_processing(timing_prompt_processing); + double timing_token_generation = result.result_json.at("timings").value("predicted_ms", 0.0); + reply.set_timing_token_generation(timing_token_generation); + } + + // Log Request Correlation Id + LOG_VERBOSE("correlation:", { + { "id", data["correlation_id"] } + }); + // Send the reply writer->Write(reply); @@ -2367,12 +2489,25 @@ public: std::string completion_text; task_result result = llama.queue_results.recv(task_id); if (!result.error && result.stop) { + + // Log Request Correlation Id + LOG_VERBOSE("correlation:", { + { "id", data["correlation_id"] } + }); + completion_text = result.result_json.value("content", ""); int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0); int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0); reply->set_prompt_tokens(tokens_evaluated); reply->set_tokens(tokens_predicted); reply->set_message(completion_text); + + if (result.result_json.contains("timings")) { + double timing_prompt_processing = result.result_json.at("timings").value("prompt_ms", 0.0); + reply->set_timing_prompt_processing(timing_prompt_processing); + double timing_token_generation = result.result_json.at("timings").value("predicted_ms", 0.0); + reply->set_timing_token_generation(timing_token_generation); + } } else { @@ -2406,6 +2541,43 @@ public: return grpc::Status::OK; } + + grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response){ + json data = parse_options(false, request, llama); + + std::vector tokens = llama.tokenize(data["prompt"],false); + + for (int i=0 ; i< tokens.size(); i++){ + response->add_tokens(tokens[i]); + } + + return grpc::Status::OK; + } + + grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) { + llama_client_slot* active_slot = llama.get_active_slot(); + + if (active_slot != nullptr) { + // Calculate the tokens per second using existing logic + double tokens_per_second = 1e3 / active_slot->t_token_generation * active_slot->n_decoded; + + // Populate the response with metrics + response->set_slot_id(active_slot->id); + response->set_prompt_json_for_slot(active_slot->prompt.dump()); + response->set_tokens_per_second(tokens_per_second); + response->set_tokens_generated(active_slot->n_decoded); + response->set_prompt_tokens_processed(active_slot->num_prompt_tokens_processed); + } else { + // Handle case when no active slot exists + response->set_slot_id(0); + response->set_prompt_json_for_slot(""); + response->set_tokens_per_second(0); + response->set_tokens_generated(0); + response->set_prompt_tokens_processed(0); + } + + return grpc::Status::OK; + } }; void RunServer(const std::string& server_address) { diff --git a/backend/cpp/llama/patches/01-llava.patch b/backend/cpp/llama/patches/01-llava.patch index fa122da2..77124628 100644 --- a/backend/cpp/llama/patches/01-llava.patch +++ b/backend/cpp/llama/patches/01-llava.patch @@ -1,13 +1,13 @@ diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp -index 342042ff..224db9b5 100644 +index 3cd0d2fa..6c5e811a 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp -@@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima - struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches"); - int* patches_data = (int*)malloc(ggml_nbytes(patches)); - for (int i = 0; i < num_patches; i++) { -- patches_data[i] = i + 1; -+ patches_data[i] = i; - } - ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches)); - free(patches_data); \ No newline at end of file +@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima + struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches"); + int* patches_data = (int*)malloc(ggml_nbytes(patches)); + for (int i = 0; i < num_patches; i++) { +- patches_data[i] = i + 1; ++ patches_data[i] = i; + } + ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches)); + free(patches_data); \ No newline at end of file diff --git a/backend/go/bark/Makefile b/backend/go/bark/Makefile new file mode 100644 index 00000000..e8902615 --- /dev/null +++ b/backend/go/bark/Makefile @@ -0,0 +1,25 @@ +INCLUDE_PATH := $(abspath ./) +LIBRARY_PATH := $(abspath ./) + +AR?=ar + +BUILD_TYPE?= +# keep standard at C11 and C++11 +CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../sources/bark.cpp/examples -I$(INCLUDE_PATH)/../../../sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/../../../sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC +LDFLAGS = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/../../../sources/bark.cpp/build/examples -lbark -lstdc++ -lm + +# warnings +CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function + +gobark.o: + $(CXX) $(CXXFLAGS) gobark.cpp -o gobark.o -c $(LDFLAGS) + +libbark.a: gobark.o + cp $(INCLUDE_PATH)/../../../sources/bark.cpp/build/libbark.a ./ + $(AR) rcs libbark.a gobark.o + $(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml.c.o + $(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-alloc.c.o + $(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-backend.c.o + +clean: + rm -f gobark.o libbark.a \ No newline at end of file diff --git a/backend/go/bark/gobark.cpp b/backend/go/bark/gobark.cpp new file mode 100644 index 00000000..b5f414b8 --- /dev/null +++ b/backend/go/bark/gobark.cpp @@ -0,0 +1,85 @@ +#include +#include + +#include "bark.h" +#include "gobark.h" +#include "common.h" +#include "ggml.h" + +struct bark_context *c; + +void bark_print_progress_callback(struct bark_context *bctx, enum bark_encoding_step step, int progress, void *user_data) { + if (step == bark_encoding_step::SEMANTIC) { + printf("\rGenerating semantic tokens... %d%%", progress); + } else if (step == bark_encoding_step::COARSE) { + printf("\rGenerating coarse tokens... %d%%", progress); + } else if (step == bark_encoding_step::FINE) { + printf("\rGenerating fine tokens... %d%%", progress); + } + fflush(stdout); +} + +int load_model(char *model) { + // initialize bark context + struct bark_context_params ctx_params = bark_context_default_params(); + bark_params params; + + params.model_path = model; + + // ctx_params.verbosity = verbosity; + ctx_params.progress_callback = bark_print_progress_callback; + ctx_params.progress_callback_user_data = nullptr; + + struct bark_context *bctx = bark_load_model(params.model_path.c_str(), ctx_params, params.seed); + if (!bctx) { + fprintf(stderr, "%s: Could not load model\n", __func__); + return 1; + } + + c = bctx; + + return 0; +} + +int tts(char *text,int threads, char *dst ) { + + ggml_time_init(); + const int64_t t_main_start_us = ggml_time_us(); + + // generate audio + if (!bark_generate_audio(c, text, threads)) { + fprintf(stderr, "%s: An error occured. If the problem persists, feel free to open an issue to report it.\n", __func__); + return 1; + } + + const float *audio_data = bark_get_audio_data(c); + if (audio_data == NULL) { + fprintf(stderr, "%s: Could not get audio data\n", __func__); + return 1; + } + + const int audio_arr_size = bark_get_audio_data_size(c); + + std::vector audio_arr(audio_data, audio_data + audio_arr_size); + + write_wav_on_disk(audio_arr, dst); + + // report timing + { + const int64_t t_main_end_us = ggml_time_us(); + const int64_t t_load_us = bark_get_load_time(c); + const int64_t t_eval_us = bark_get_eval_time(c); + + printf("\n\n"); + printf("%s: load time = %8.2f ms\n", __func__, t_load_us / 1000.0f); + printf("%s: eval time = %8.2f ms\n", __func__, t_eval_us / 1000.0f); + printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us) / 1000.0f); + } + + return 0; +} + +int unload() { + bark_free(c); +} + diff --git a/backend/go/bark/gobark.go b/backend/go/bark/gobark.go new file mode 100644 index 00000000..133a4a39 --- /dev/null +++ b/backend/go/bark/gobark.go @@ -0,0 +1,52 @@ +package main + +// #cgo CXXFLAGS: -I${SRCDIR}/../../../sources/bark.cpp/ -I${SRCDIR}/../../../sources/bark.cpp/encodec.cpp -I${SRCDIR}/../../../sources/bark.cpp/examples -I${SRCDIR}/../../../sources/bark.cpp/spm-headers +// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../sources/bark.cpp/build/examples -L${SRCDIR}/../../../sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon +// #include +// #include +import "C" + +import ( + "fmt" + "unsafe" + + "github.com/mudler/LocalAI/pkg/grpc/base" + pb "github.com/mudler/LocalAI/pkg/grpc/proto" +) + +type Bark struct { + base.SingleThread + threads int +} + +func (sd *Bark) Load(opts *pb.ModelOptions) error { + + sd.threads = int(opts.Threads) + + modelFile := C.CString(opts.ModelFile) + defer C.free(unsafe.Pointer(modelFile)) + + ret := C.load_model(modelFile) + if ret != 0 { + return fmt.Errorf("inference failed") + } + + return nil +} + +func (sd *Bark) TTS(opts *pb.TTSRequest) error { + t := C.CString(opts.Text) + defer C.free(unsafe.Pointer(t)) + + dst := C.CString(opts.Dst) + defer C.free(unsafe.Pointer(dst)) + + threads := C.int(sd.threads) + + ret := C.tts(t, threads, dst) + if ret != 0 { + return fmt.Errorf("inference failed") + } + + return nil +} diff --git a/backend/go/bark/gobark.h b/backend/go/bark/gobark.h new file mode 100644 index 00000000..06fb965d --- /dev/null +++ b/backend/go/bark/gobark.h @@ -0,0 +1,8 @@ +#ifdef __cplusplus +extern "C" { +#endif +int load_model(char *model); +int tts(char *text,int threads, char *dst ); +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/backend/go/image/stablediffusion/main.go b/backend/go/bark/main.go similarity index 83% rename from backend/go/image/stablediffusion/main.go rename to backend/go/bark/main.go index ae259fa7..840a687d 100644 --- a/backend/go/image/stablediffusion/main.go +++ b/backend/go/bark/main.go @@ -1,7 +1,6 @@ package main // Note: this is started internally by LocalAI and a server is allocated for each model - import ( "flag" @@ -15,7 +14,7 @@ var ( func main() { flag.Parse() - if err := grpc.StartServer(*addr, &Image{}); err != nil { + if err := grpc.StartServer(*addr, &Bark{}); err != nil { panic(err) } } diff --git a/backend/go/image/stablediffusion-ggml/Makefile b/backend/go/image/stablediffusion-ggml/Makefile new file mode 100644 index 00000000..f92c3a77 --- /dev/null +++ b/backend/go/image/stablediffusion-ggml/Makefile @@ -0,0 +1,96 @@ +INCLUDE_PATH := $(abspath ./) +LIBRARY_PATH := $(abspath ./) + +AR?=ar +CMAKE_ARGS?= +BUILD_TYPE?= +ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh +# keep standard at C11 and C++11 +CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC + +# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static +CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF + +# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically +ifeq ($(BUILD_TYPE),cublas) + CMAKE_ARGS+=-DGGML_CUDA=ON +# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS +# to CMAKE_ARGS automatically +else ifeq ($(BUILD_TYPE),openblas) + CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS +# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path +else ifeq ($(BUILD_TYPE),clblas) + CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path +# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ +else ifeq ($(BUILD_TYPE),hipblas) + CMAKE_ARGS+=-DGGML_HIP=ON +# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation +# But if it's OSX without metal, disable it here +else ifeq ($(OS),Darwin) + ifneq ($(BUILD_TYPE),metal) + CMAKE_ARGS+=-DGGML_METAL=OFF + else + CMAKE_ARGS+=-DGGML_METAL=ON + CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON + TARGET+=--target ggml-metal + endif +endif + +# ifeq ($(BUILD_TYPE),sycl_f16) +# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DSD_SYCL=ON -DGGML_SYCL_F16=ON +# endif + +# ifeq ($(BUILD_TYPE),sycl_f32) +# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DSD_SYCL=ON +# endif + +# warnings +CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function + +# Find all .a archives in ARCHIVE_DIR +# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive) +GGML_ARCHIVE_DIR := build/ggml/src/ +ALL_ARCHIVES := $(shell find $(GGML_ARCHIVE_DIR) -type f -name '*.a') + +# Name of the single merged library +COMBINED_LIB := libggmlall.a + +# Rule to merge all the .a files into one +$(COMBINED_LIB): $(ALL_ARCHIVES) + @echo "Merging all .a into $(COMBINED_LIB)" + rm -f $@ + mkdir -p merge-tmp + for a in $(ALL_ARCHIVES); do \ + ( cd merge-tmp && ar x ../$$a ); \ + done + ( cd merge-tmp && ar rcs ../$@ *.o ) + # Ensure we have a proper index + ranlib $@ + # Clean up + rm -rf merge-tmp + +build/libstable-diffusion.a: + @echo "Building SD with $(BUILD_TYPE) build type and $(CMAKE_ARGS)" +ifneq (,$(findstring sycl,$(BUILD_TYPE))) + +bash -c "source $(ONEAPI_VARS); \ + mkdir -p build && \ + cd build && \ + cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \ + cmake --build . --config Release" +else + mkdir -p build && \ + cd build && \ + cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \ + cmake --build . --config Release +endif + $(MAKE) $(COMBINED_LIB) + +gosd.o: + $(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c + +libsd.a: gosd.o + cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a + $(AR) rcs libsd.a gosd.o + +clean: + rm -rf gosd.o libsd.a build $(COMBINED_LIB) \ No newline at end of file diff --git a/backend/go/image/stablediffusion-ggml/gosd.cpp b/backend/go/image/stablediffusion-ggml/gosd.cpp new file mode 100644 index 00000000..8653aa1e --- /dev/null +++ b/backend/go/image/stablediffusion-ggml/gosd.cpp @@ -0,0 +1,228 @@ +#include +#include +#include +#include +#include +#include +#include +#include "gosd.h" + +// #include "preprocessing.hpp" +#include "flux.hpp" +#include "stable-diffusion.h" + +#define STB_IMAGE_IMPLEMENTATION +#define STB_IMAGE_STATIC +#include "stb_image.h" + +#define STB_IMAGE_WRITE_IMPLEMENTATION +#define STB_IMAGE_WRITE_STATIC +#include "stb_image_write.h" + +#define STB_IMAGE_RESIZE_IMPLEMENTATION +#define STB_IMAGE_RESIZE_STATIC +#include "stb_image_resize.h" + +// Names of the sampler method, same order as enum sample_method in stable-diffusion.h +const char* sample_method_str[] = { + "euler_a", + "euler", + "heun", + "dpm2", + "dpm++2s_a", + "dpm++2m", + "dpm++2mv2", + "ipndm", + "ipndm_v", + "lcm", +}; + +// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h +const char* schedule_str[] = { + "default", + "discrete", + "karras", + "exponential", + "ays", + "gits", +}; + +sd_ctx_t* sd_c; + +sample_method_t sample_method; + +int load_model(char *model, char* options[], int threads, int diff) { + fprintf (stderr, "Loading model!\n"); + + char *stableDiffusionModel = ""; + if (diff == 1 ) { + stableDiffusionModel = model; + model = ""; + } + + // decode options. Options are in form optname:optvale, or if booleans only optname. + char *clip_l_path = ""; + char *clip_g_path = ""; + char *t5xxl_path = ""; + char *vae_path = ""; + char *scheduler = ""; + char *sampler = ""; + + // If options is not NULL, parse options + for (int i = 0; options[i] != NULL; i++) { + char *optname = strtok(options[i], ":"); + char *optval = strtok(NULL, ":"); + if (optval == NULL) { + optval = "true"; + } + + if (!strcmp(optname, "clip_l_path")) { + clip_l_path = optval; + } + if (!strcmp(optname, "clip_g_path")) { + clip_g_path = optval; + } + if (!strcmp(optname, "t5xxl_path")) { + t5xxl_path = optval; + } + if (!strcmp(optname, "vae_path")) { + vae_path = optval; + } + if (!strcmp(optname, "scheduler")) { + scheduler = optval; + } + if (!strcmp(optname, "sampler")) { + sampler = optval; + } + } + + int sample_method_found = -1; + for (int m = 0; m < N_SAMPLE_METHODS; m++) { + if (!strcmp(sampler, sample_method_str[m])) { + sample_method_found = m; + } + } + if (sample_method_found == -1) { + fprintf(stderr, "Invalid sample method, default to EULER_A!\n"); + sample_method_found = EULER_A; + } + sample_method = (sample_method_t)sample_method_found; + + int schedule_found = -1; + for (int d = 0; d < N_SCHEDULES; d++) { + if (!strcmp(scheduler, schedule_str[d])) { + schedule_found = d; + fprintf (stderr, "Found scheduler: %s\n", scheduler); + + } + } + + if (schedule_found == -1) { + fprintf (stderr, "Invalid scheduler! using DEFAULT\n"); + schedule_found = DEFAULT; + } + + schedule_t schedule = (schedule_t)schedule_found; + + fprintf (stderr, "Creating context\n"); + sd_ctx_t* sd_ctx = new_sd_ctx(model, + clip_l_path, + clip_g_path, + t5xxl_path, + stableDiffusionModel, + vae_path, + "", + "", + "", + "", + "", + false, + false, + false, + threads, + SD_TYPE_COUNT, + STD_DEFAULT_RNG, + schedule, + false, + false, + false, + false); + + if (sd_ctx == NULL) { + fprintf (stderr, "failed loading model (generic error)\n"); + return 1; + } + fprintf (stderr, "Created context: OK\n"); + + sd_c = sd_ctx; + + return 0; +} + +int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed , char *dst, float cfg_scale) { + + sd_image_t* results; + + std::vector skip_layers = {7, 8, 9}; + + fprintf (stderr, "Generating image\n"); + + results = txt2img(sd_c, + text, + negativeText, + -1, //clip_skip + cfg_scale, // sfg_scale + 3.5f, + width, + height, + sample_method, + steps, + seed, + 1, + NULL, + 0.9f, + 20.f, + false, + "", + skip_layers.data(), + skip_layers.size(), + 0, + 0.01, + 0.2); + + if (results == NULL) { + fprintf (stderr, "NO results\n"); + return 1; + } + + if (results[0].data == NULL) { + fprintf (stderr, "Results with no data\n"); + return 1; + } + + fprintf (stderr, "Writing PNG\n"); + + fprintf (stderr, "DST: %s\n", dst); + fprintf (stderr, "Width: %d\n", results[0].width); + fprintf (stderr, "Height: %d\n", results[0].height); + fprintf (stderr, "Channel: %d\n", results[0].channel); + fprintf (stderr, "Data: %p\n", results[0].data); + + stbi_write_png(dst, results[0].width, results[0].height, results[0].channel, + results[0].data, 0, NULL); + fprintf (stderr, "Saved resulting image to '%s'\n", dst); + + // TODO: free results. Why does it crash? + + free(results[0].data); + results[0].data = NULL; + free(results); + fprintf (stderr, "gen_image is done", dst); + + return 0; +} + +int unload() { + free_sd_ctx(sd_c); +} + diff --git a/backend/go/image/stablediffusion-ggml/gosd.go b/backend/go/image/stablediffusion-ggml/gosd.go new file mode 100644 index 00000000..8c3bdb90 --- /dev/null +++ b/backend/go/image/stablediffusion-ggml/gosd.go @@ -0,0 +1,96 @@ +package main + +// #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include +// #cgo LDFLAGS: -L${SRCDIR}/ -lsd -lstdc++ -lm -lggmlall -lgomp +// #include +// #include +import "C" + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "unsafe" + + "github.com/mudler/LocalAI/pkg/grpc/base" + pb "github.com/mudler/LocalAI/pkg/grpc/proto" + "github.com/mudler/LocalAI/pkg/utils" +) + +type SDGGML struct { + base.SingleThread + threads int + sampleMethod string + cfgScale float32 +} + +func (sd *SDGGML) Load(opts *pb.ModelOptions) error { + + sd.threads = int(opts.Threads) + + modelFile := C.CString(opts.ModelFile) + defer C.free(unsafe.Pointer(modelFile)) + + var options **C.char + // prepare the options array to pass to C + + size := C.size_t(unsafe.Sizeof((*C.char)(nil))) + length := C.size_t(len(opts.Options)) + options = (**C.char)(C.malloc(length * size)) + view := (*[1 << 30]*C.char)(unsafe.Pointer(options))[0:len(opts.Options):len(opts.Options)] + + var diffusionModel int + + var oo []string + for _, op := range opts.Options { + if op == "diffusion_model" { + diffusionModel = 1 + continue + } + + // If it's an option path, we resolve absolute path from the model path + if strings.Contains(op, ":") && strings.Contains(op, "path") { + data := strings.Split(op, ":") + data[1] = filepath.Join(opts.ModelPath, data[1]) + if err := utils.VerifyPath(data[1], opts.ModelPath); err == nil { + oo = append(oo, strings.Join(data, ":")) + } + } else { + oo = append(oo, op) + } + } + + fmt.Fprintf(os.Stderr, "Options: %+v\n", oo) + + for i, x := range oo { + view[i] = C.CString(x) + } + + sd.cfgScale = opts.CFGScale + + ret := C.load_model(modelFile, options, C.int(opts.Threads), C.int(diffusionModel)) + if ret != 0 { + return fmt.Errorf("could not load model") + } + + return nil +} + +func (sd *SDGGML) GenerateImage(opts *pb.GenerateImageRequest) error { + t := C.CString(opts.PositivePrompt) + defer C.free(unsafe.Pointer(t)) + + dst := C.CString(opts.Dst) + defer C.free(unsafe.Pointer(dst)) + + negative := C.CString(opts.NegativePrompt) + defer C.free(unsafe.Pointer(negative)) + + ret := C.gen_image(t, negative, C.int(opts.Width), C.int(opts.Height), C.int(opts.Step), C.int(opts.Seed), dst, C.float(sd.cfgScale)) + if ret != 0 { + return fmt.Errorf("inference failed") + } + + return nil +} diff --git a/backend/go/image/stablediffusion-ggml/gosd.h b/backend/go/image/stablediffusion-ggml/gosd.h new file mode 100644 index 00000000..5297e871 --- /dev/null +++ b/backend/go/image/stablediffusion-ggml/gosd.h @@ -0,0 +1,8 @@ +#ifdef __cplusplus +extern "C" { +#endif +int load_model(char *model, char* options[], int threads, int diffusionModel); +int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed, char *dst, float cfg_scale); +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/backend/go/image/tinydream/main.go b/backend/go/image/stablediffusion-ggml/main.go similarity index 83% rename from backend/go/image/tinydream/main.go rename to backend/go/image/stablediffusion-ggml/main.go index ae259fa7..acee74fa 100644 --- a/backend/go/image/tinydream/main.go +++ b/backend/go/image/stablediffusion-ggml/main.go @@ -1,7 +1,6 @@ package main // Note: this is started internally by LocalAI and a server is allocated for each model - import ( "flag" @@ -15,7 +14,7 @@ var ( func main() { flag.Parse() - if err := grpc.StartServer(*addr, &Image{}); err != nil { + if err := grpc.StartServer(*addr, &SDGGML{}); err != nil { panic(err) } } diff --git a/backend/go/image/stablediffusion/stablediffusion.go b/backend/go/image/stablediffusion/stablediffusion.go deleted file mode 100644 index 1733bf99..00000000 --- a/backend/go/image/stablediffusion/stablediffusion.go +++ /dev/null @@ -1,33 +0,0 @@ -package main - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "github.com/mudler/LocalAI/pkg/grpc/base" - pb "github.com/mudler/LocalAI/pkg/grpc/proto" - "github.com/mudler/LocalAI/pkg/stablediffusion" -) - -type Image struct { - base.SingleThread - stablediffusion *stablediffusion.StableDiffusion -} - -func (image *Image) Load(opts *pb.ModelOptions) error { - var err error - // Note: the Model here is a path to a directory containing the model files - image.stablediffusion, err = stablediffusion.New(opts.ModelFile) - return err -} - -func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error { - return image.stablediffusion.GenerateImage( - int(opts.Height), - int(opts.Width), - int(opts.Mode), - int(opts.Step), - int(opts.Seed), - opts.PositivePrompt, - opts.NegativePrompt, - opts.Dst) -} diff --git a/backend/go/image/tinydream/tinydream.go b/backend/go/image/tinydream/tinydream.go deleted file mode 100644 index ad364c47..00000000 --- a/backend/go/image/tinydream/tinydream.go +++ /dev/null @@ -1,32 +0,0 @@ -package main - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "github.com/mudler/LocalAI/pkg/grpc/base" - pb "github.com/mudler/LocalAI/pkg/grpc/proto" - "github.com/mudler/LocalAI/pkg/tinydream" -) - -type Image struct { - base.SingleThread - tinydream *tinydream.TinyDream -} - -func (image *Image) Load(opts *pb.ModelOptions) error { - var err error - // Note: the Model here is a path to a directory containing the model files - image.tinydream, err = tinydream.New(opts.ModelFile) - return err -} - -func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error { - return image.tinydream.GenerateImage( - int(opts.Height), - int(opts.Width), - int(opts.Step), - int(opts.Seed), - opts.PositivePrompt, - opts.NegativePrompt, - opts.Dst) -} diff --git a/backend/go/llm/bert/bert.go b/backend/go/llm/bert/bert.go deleted file mode 100644 index a6a1d1c5..00000000 --- a/backend/go/llm/bert/bert.go +++ /dev/null @@ -1,34 +0,0 @@ -package main - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - bert "github.com/go-skynet/go-bert.cpp" - - "github.com/mudler/LocalAI/pkg/grpc/base" - pb "github.com/mudler/LocalAI/pkg/grpc/proto" -) - -type Embeddings struct { - base.SingleThread - bert *bert.Bert -} - -func (llm *Embeddings) Load(opts *pb.ModelOptions) error { - model, err := bert.New(opts.ModelFile) - llm.bert = model - return err -} - -func (llm *Embeddings) Embeddings(opts *pb.PredictOptions) ([]float32, error) { - - if len(opts.EmbeddingTokens) > 0 { - tokens := []int{} - for _, t := range opts.EmbeddingTokens { - tokens = append(tokens, int(t)) - } - return llm.bert.TokenEmbeddings(tokens, bert.SetThreads(int(opts.Threads))) - } - - return llm.bert.Embeddings(opts.Embeddings, bert.SetThreads(int(opts.Threads))) -} diff --git a/backend/go/llm/bert/main.go b/backend/go/llm/bert/main.go deleted file mode 100644 index 3a022f40..00000000 --- a/backend/go/llm/bert/main.go +++ /dev/null @@ -1,21 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - grpc "github.com/mudler/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &Embeddings{}); err != nil { - panic(err) - } -} diff --git a/backend/go/llm/rwkv/rwkv.go b/backend/go/llm/rwkv/rwkv.go deleted file mode 100644 index fe9cd815..00000000 --- a/backend/go/llm/rwkv/rwkv.go +++ /dev/null @@ -1,95 +0,0 @@ -package main - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - "path/filepath" - - "github.com/donomii/go-rwkv.cpp" - "github.com/mudler/LocalAI/pkg/grpc/base" - pb "github.com/mudler/LocalAI/pkg/grpc/proto" -) - -const tokenizerSuffix = ".tokenizer.json" - -type LLM struct { - base.SingleThread - - rwkv *rwkv.RwkvState -} - -func (llm *LLM) Load(opts *pb.ModelOptions) error { - tokenizerFile := opts.Tokenizer - if tokenizerFile == "" { - modelFile := filepath.Base(opts.ModelFile) - tokenizerFile = modelFile + tokenizerSuffix - } - modelPath := filepath.Dir(opts.ModelFile) - tokenizerPath := filepath.Join(modelPath, tokenizerFile) - - model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads())) - - if model == nil { - return fmt.Errorf("rwkv could not load model") - } - llm.rwkv = model - return nil -} - -func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) { - stopWord := "\n" - if len(opts.StopPrompts) > 0 { - stopWord = opts.StopPrompts[0] - } - - if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil { - return "", err - } - - response := llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), nil) - - return response, nil -} - -func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - - stopWord := "\n" - if len(opts.StopPrompts) > 0 { - stopWord = opts.StopPrompts[0] - } - - if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil { - fmt.Println("Error processing input: ", err) - return - } - - llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), func(s string) bool { - results <- s - return true - }) - close(results) - }() - - return nil -} - -func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) { - tokens, err := llm.rwkv.Tokenizer.Encode(opts.Prompt) - if err != nil { - return pb.TokenizationResponse{}, err - } - - l := len(tokens) - i32Tokens := make([]int32, l) - - for i, t := range tokens { - i32Tokens[i] = int32(t.ID) - } - - return pb.TokenizationResponse{ - Length: int32(l), - Tokens: i32Tokens, - }, nil -} diff --git a/backend/go/stores/store.go b/backend/go/stores/store.go index a4849b57..c8788a9c 100644 --- a/backend/go/stores/store.go +++ b/backend/go/stores/store.go @@ -311,12 +311,16 @@ func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error) } func isNormalized(k []float32) bool { - var sum float32 + var sum float64 + for _, v := range k { - sum += v + v64 := float64(v) + sum += v64*v64 } - return sum == 1.0 + s := math.Sqrt(sum) + + return s >= 0.99 && s <= 1.01 } // TODO: This we could replace with handwritten SIMD code @@ -328,7 +332,7 @@ func normalizedCosineSimilarity(k1, k2 []float32) float32 { dot += k1[i] * k2[i] } - assert(dot >= -1 && dot <= 1, fmt.Sprintf("dot = %f", dot)) + assert(dot >= -1.01 && dot <= 1.01, fmt.Sprintf("dot = %f", dot)) // 2.0 * (1.0 - dot) would be the Euclidean distance return dot @@ -418,7 +422,7 @@ func cosineSimilarity(k1, k2 []float32, mag1 float64) float32 { sim := float32(dot / (mag1 * math.Sqrt(mag2))) - assert(sim >= -1 && sim <= 1, fmt.Sprintf("sim = %f", sim)) + assert(sim >= -1.01 && sim <= 1.01, fmt.Sprintf("sim = %f", sim)) return sim } diff --git a/backend/go/llm/rwkv/main.go b/backend/go/vad/silero/main.go similarity index 83% rename from backend/go/llm/rwkv/main.go rename to backend/go/vad/silero/main.go index acf44087..28f51e49 100644 --- a/backend/go/llm/rwkv/main.go +++ b/backend/go/vad/silero/main.go @@ -15,7 +15,7 @@ var ( func main() { flag.Parse() - if err := grpc.StartServer(*addr, &LLM{}); err != nil { + if err := grpc.StartServer(*addr, &VAD{}); err != nil { panic(err) } } diff --git a/backend/go/vad/silero/vad.go b/backend/go/vad/silero/vad.go new file mode 100644 index 00000000..5a164d2a --- /dev/null +++ b/backend/go/vad/silero/vad.go @@ -0,0 +1,54 @@ +package main + +// This is a wrapper to statisfy the GRPC service interface +// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) +import ( + "fmt" + + "github.com/mudler/LocalAI/pkg/grpc/base" + pb "github.com/mudler/LocalAI/pkg/grpc/proto" + "github.com/streamer45/silero-vad-go/speech" +) + +type VAD struct { + base.SingleThread + detector *speech.Detector +} + +func (vad *VAD) Load(opts *pb.ModelOptions) error { + v, err := speech.NewDetector(speech.DetectorConfig{ + ModelPath: opts.ModelFile, + SampleRate: 16000, + //WindowSize: 1024, + Threshold: 0.5, + MinSilenceDurationMs: 0, + SpeechPadMs: 0, + }) + if err != nil { + return fmt.Errorf("create silero detector: %w", err) + } + + vad.detector = v + return err +} + +func (vad *VAD) VAD(req *pb.VADRequest) (pb.VADResponse, error) { + audio := req.Audio + + segments, err := vad.detector.Detect(audio) + if err != nil { + return pb.VADResponse{}, fmt.Errorf("detect: %w", err) + } + + vadSegments := []*pb.VADSegment{} + for _, s := range segments { + vadSegments = append(vadSegments, &pb.VADSegment{ + Start: float32(s.SpeechStartAt), + End: float32(s.SpeechEndAt), + }) + } + + return pb.VADResponse{ + Segments: vadSegments, + }, nil +} diff --git a/backend/python/autogptq/requirements-cublas11.txt b/backend/python/autogptq/requirements-cublas11.txt index 6461b696..cf469472 100644 --- a/backend/python/autogptq/requirements-cublas11.txt +++ b/backend/python/autogptq/requirements-cublas11.txt @@ -1,2 +1,2 @@ --extra-index-url https://download.pytorch.org/whl/cu118 -torch +torch==2.4.1+cu118 diff --git a/backend/python/autogptq/requirements-cublas12.txt b/backend/python/autogptq/requirements-cublas12.txt index 12c6d5d5..20f84cf7 100644 --- a/backend/python/autogptq/requirements-cublas12.txt +++ b/backend/python/autogptq/requirements-cublas12.txt @@ -1 +1 @@ -torch +torch==2.4.1 \ No newline at end of file diff --git a/backend/python/autogptq/requirements-hipblas.txt b/backend/python/autogptq/requirements-hipblas.txt index 76018445..ecd817dc 100644 --- a/backend/python/autogptq/requirements-hipblas.txt +++ b/backend/python/autogptq/requirements-hipblas.txt @@ -1,2 +1,2 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch \ No newline at end of file +torch==2.4.1+rocm6.0 \ No newline at end of file diff --git a/backend/python/autogptq/requirements-intel.txt b/backend/python/autogptq/requirements-intel.txt index d5e0173e..07b502eb 100644 --- a/backend/python/autogptq/requirements-intel.txt +++ b/backend/python/autogptq/requirements-intel.txt @@ -1,5 +1,6 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch +intel-extension-for-pytorch==2.3.110+xpu +torch==2.3.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu optimum[openvino] -setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file +setuptools \ No newline at end of file diff --git a/backend/python/autogptq/requirements.txt b/backend/python/autogptq/requirements.txt index 150fcc1b..af596d9e 100644 --- a/backend/python/autogptq/requirements.txt +++ b/backend/python/autogptq/requirements.txt @@ -1,6 +1,6 @@ accelerate auto-gptq==0.7.1 -grpcio==1.66.1 +grpcio==1.70.0 protobuf certifi transformers \ No newline at end of file diff --git a/backend/python/bark/requirements-cpu.txt b/backend/python/bark/requirements-cpu.txt index 0b2c3bc7..12e376ad 100644 --- a/backend/python/bark/requirements-cpu.txt +++ b/backend/python/bark/requirements-cpu.txt @@ -1,4 +1,4 @@ transformers accelerate -torch -torchaudio \ No newline at end of file +torch==2.4.1 +torchaudio==2.4.1 \ No newline at end of file diff --git a/backend/python/bark/requirements-cublas11.txt b/backend/python/bark/requirements-cublas11.txt index 71a6a93f..9f8fe9ff 100644 --- a/backend/python/bark/requirements-cublas11.txt +++ b/backend/python/bark/requirements-cublas11.txt @@ -1,5 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/cu118 -torch -torchaudio +torch==2.4.1+cu118 +torchaudio==2.4.1+cu118 transformers accelerate \ No newline at end of file diff --git a/backend/python/bark/requirements-cublas12.txt b/backend/python/bark/requirements-cublas12.txt index 0fa27074..53716949 100644 --- a/backend/python/bark/requirements-cublas12.txt +++ b/backend/python/bark/requirements-cublas12.txt @@ -1,4 +1,4 @@ -torch -torchaudio +torch==2.4.1 +torchaudio==2.4.1 transformers accelerate \ No newline at end of file diff --git a/backend/python/bark/requirements-hipblas.txt b/backend/python/bark/requirements-hipblas.txt index af9e820e..1d54fb16 100644 --- a/backend/python/bark/requirements-hipblas.txt +++ b/backend/python/bark/requirements-hipblas.txt @@ -1,5 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch -torchaudio +torch==2.4.1+rocm6.0 +torchaudio==2.4.1+rocm6.0 transformers accelerate \ No newline at end of file diff --git a/backend/python/bark/requirements-intel.txt b/backend/python/bark/requirements-intel.txt index c0e4dcaa..f24bd166 100644 --- a/backend/python/bark/requirements-intel.txt +++ b/backend/python/bark/requirements-intel.txt @@ -1,8 +1,9 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch -torchaudio +intel-extension-for-pytorch==2.3.110+xpu +torch==2.3.1+cxx11.abi +torchaudio==2.3.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu optimum[openvino] -setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 +setuptools transformers accelerate \ No newline at end of file diff --git a/backend/python/bark/requirements.txt b/backend/python/bark/requirements.txt index 6404b98e..f4beaec1 100644 --- a/backend/python/bark/requirements.txt +++ b/backend/python/bark/requirements.txt @@ -1,4 +1,4 @@ bark==0.1.5 -grpcio==1.66.1 +grpcio==1.70.0 protobuf certifi \ No newline at end of file diff --git a/backend/python/common/libbackend.sh b/backend/python/common/libbackend.sh index 934b1fd3..6013cf76 100644 --- a/backend/python/common/libbackend.sh +++ b/backend/python/common/libbackend.sh @@ -17,6 +17,9 @@ # LIMIT_TARGETS="cublas12" # source $(dirname $0)/../common/libbackend.sh # + +PYTHON_VERSION="3.10" + function init() { # Name of the backend (directory name) BACKEND_NAME=${PWD##*/} @@ -88,7 +91,7 @@ function getBuildProfile() { # always result in an activated virtual environment function ensureVenv() { if [ ! -d "${EDIR}/venv" ]; then - uv venv ${EDIR}/venv + uv venv --python ${PYTHON_VERSION} ${EDIR}/venv echo "virtualenv created" fi diff --git a/backend/python/common/template/Makefile b/backend/python/common/template/Makefile index 6cc45707..c0e5169f 100644 --- a/backend/python/common/template/Makefile +++ b/backend/python/common/template/Makefile @@ -1,8 +1,9 @@ .DEFAULT_GOAL := install .PHONY: install -install: protogen +install: bash install.sh + $(MAKE) protogen .PHONY: protogen protogen: backend_pb2_grpc.py backend_pb2.py @@ -12,7 +13,7 @@ protogen-clean: $(RM) backend_pb2_grpc.py backend_pb2.py backend_pb2_grpc.py backend_pb2.py: - python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto + bash protogen.sh .PHONY: clean clean: protogen-clean diff --git a/backend/python/common/template/protogen.sh b/backend/python/common/template/protogen.sh new file mode 100644 index 00000000..32f39fbb --- /dev/null +++ b/backend/python/common/template/protogen.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e + +source $(dirname $0)/../common/libbackend.sh + +python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/common/template/requirements-intel.txt b/backend/python/common/template/requirements-intel.txt index 6dc25a10..b5318a13 100644 --- a/backend/python/common/template/requirements-intel.txt +++ b/backend/python/common/template/requirements-intel.txt @@ -1,4 +1,5 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch +intel-extension-for-pytorch==2.3.110+xpu +torch==2.3.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu optimum[openvino] \ No newline at end of file diff --git a/backend/python/common/template/requirements.txt b/backend/python/common/template/requirements.txt index 21610c1c..125b18dd 100644 --- a/backend/python/common/template/requirements.txt +++ b/backend/python/common/template/requirements.txt @@ -1,2 +1,3 @@ -grpcio==1.66.1 -protobuf \ No newline at end of file +grpcio==1.70.0 +protobuf +grpcio-tools \ No newline at end of file diff --git a/backend/python/coqui/requirements-cpu.txt b/backend/python/coqui/requirements-cpu.txt index bbcdc8cd..c5201d62 100644 --- a/backend/python/coqui/requirements-cpu.txt +++ b/backend/python/coqui/requirements-cpu.txt @@ -1,3 +1,4 @@ transformers accelerate -torch \ No newline at end of file +torch==2.4.1 +coqui-tts \ No newline at end of file diff --git a/backend/python/coqui/requirements-cublas11.txt b/backend/python/coqui/requirements-cublas11.txt index 71a6a93f..35fd4f42 100644 --- a/backend/python/coqui/requirements-cublas11.txt +++ b/backend/python/coqui/requirements-cublas11.txt @@ -1,5 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/cu118 -torch -torchaudio +torch==2.4.1+cu118 +torchaudio==2.4.1+cu118 transformers -accelerate \ No newline at end of file +accelerate +coqui-tts \ No newline at end of file diff --git a/backend/python/coqui/requirements-cublas12.txt b/backend/python/coqui/requirements-cublas12.txt index 0fa27074..fac719d4 100644 --- a/backend/python/coqui/requirements-cublas12.txt +++ b/backend/python/coqui/requirements-cublas12.txt @@ -1,4 +1,5 @@ -torch -torchaudio +torch==2.4.1 +torchaudio==2.4.1 transformers -accelerate \ No newline at end of file +accelerate +coqui-tts \ No newline at end of file diff --git a/backend/python/coqui/requirements-hipblas.txt b/backend/python/coqui/requirements-hipblas.txt index af9e820e..359e5867 100644 --- a/backend/python/coqui/requirements-hipblas.txt +++ b/backend/python/coqui/requirements-hipblas.txt @@ -1,5 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch -torchaudio +torch==2.4.1+rocm6.0 +torchaudio==2.4.1+rocm6.0 transformers -accelerate \ No newline at end of file +accelerate +coqui-tts \ No newline at end of file diff --git a/backend/python/coqui/requirements-intel.txt b/backend/python/coqui/requirements-intel.txt index c0e4dcaa..202dd4ad 100644 --- a/backend/python/coqui/requirements-intel.txt +++ b/backend/python/coqui/requirements-intel.txt @@ -1,8 +1,10 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch -torchaudio +intel-extension-for-pytorch==2.3.110+xpu +torch==2.3.1+cxx11.abi +torchaudio==2.3.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu optimum[openvino] -setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 +setuptools transformers -accelerate \ No newline at end of file +accelerate +coqui-tts \ No newline at end of file diff --git a/backend/python/coqui/requirements.txt b/backend/python/coqui/requirements.txt index d7708363..5ec13b5f 100644 --- a/backend/python/coqui/requirements.txt +++ b/backend/python/coqui/requirements.txt @@ -1,4 +1,4 @@ -TTS==0.22.0 -grpcio==1.66.1 +grpcio==1.70.0 protobuf -certifi \ No newline at end of file +certifi +packaging==24.1 \ No newline at end of file diff --git a/backend/python/coqui/test.py b/backend/python/coqui/test.py index d1418fa3..e0b1a0bd 100644 --- a/backend/python/coqui/test.py +++ b/backend/python/coqui/test.py @@ -19,7 +19,7 @@ class TestBackendServicer(unittest.TestCase): This method sets up the gRPC service by starting the server """ self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) - time.sleep(10) + time.sleep(30) def tearDown(self) -> None: """ diff --git a/backend/python/diffusers/backend.py b/backend/python/diffusers/backend.py index e7ad1cdd..c9aa02bc 100755 --- a/backend/python/diffusers/backend.py +++ b/backend/python/diffusers/backend.py @@ -17,7 +17,7 @@ import backend_pb2_grpc import grpc -from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \ +from diffusers import SanaPipeline, StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \ EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline from diffusers.pipelines.stable_diffusion import safety_checker @@ -247,11 +247,16 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): use_safetensors=True, variant=variant) elif request.PipelineType == "FluxPipeline": + if fromSingleFile: + self.pipe = FluxPipeline.from_single_file(modelFile, + torch_dtype=torchType, + use_safetensors=True) + else: self.pipe = FluxPipeline.from_pretrained( request.Model, torch_dtype=torch.bfloat16) - if request.LowVRAM: - self.pipe.enable_model_cpu_offload() + if request.LowVRAM: + self.pipe.enable_model_cpu_offload() elif request.PipelineType == "FluxTransformer2DModel": dtype = torch.bfloat16 # specify from environment or default to "ChuckMcSneed/FLUX.1-dev" @@ -270,6 +275,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if request.LowVRAM: self.pipe.enable_model_cpu_offload() + elif request.PipelineType == "SanaPipeline": + self.pipe = SanaPipeline.from_pretrained( + request.Model, + variant="bf16", + torch_dtype=torch.bfloat16) + self.pipe.vae.to(torch.bfloat16) + self.pipe.text_encoder.to(torch.bfloat16) if CLIPSKIP and request.CLIPSkip != 0: self.clip_skip = request.CLIPSkip @@ -296,22 +308,34 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): self.pipe.controlnet = self.controlnet else: self.controlnet = None - # Assume directory from request.ModelFile. - # Only if request.LoraAdapter it's not an absolute path - if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter: - # get base path of modelFile - modelFileBase = os.path.dirname(request.ModelFile) + + if request.LoraAdapter and not os.path.isabs(request.LoraAdapter): # modify LoraAdapter to be relative to modelFileBase - request.LoraAdapter = os.path.join(modelFileBase, request.LoraAdapter) + request.LoraAdapter = os.path.join(request.ModelPath, request.LoraAdapter) + device = "cpu" if not request.CUDA else "cuda" self.device = device if request.LoraAdapter: # Check if its a local file and not a directory ( we load lora differently for a safetensor file ) if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter): - # self.load_lora_weights(request.LoraAdapter, 1, device, torchType) self.pipe.load_lora_weights(request.LoraAdapter) else: self.pipe.unet.load_attn_procs(request.LoraAdapter) + if len(request.LoraAdapters) > 0: + i = 0 + adapters_name = [] + adapters_weights = [] + for adapter in request.LoraAdapters: + if not os.path.isabs(adapter): + adapter = os.path.join(request.ModelPath, adapter) + self.pipe.load_lora_weights(adapter, adapter_name=f"adapter_{i}") + adapters_name.append(f"adapter_{i}") + i += 1 + + for adapters_weight in request.LoraScales: + adapters_weights.append(adapters_weight) + + self.pipe.set_adapters(adapters_name, adapter_weights=adapters_weights) if request.CUDA: self.pipe.to('cuda') @@ -392,8 +416,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): # create a dictionary of values for the parameters options = { "negative_prompt": request.negative_prompt, - "width": request.width, - "height": request.height, "num_inference_steps": steps, } @@ -411,13 +433,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): keys = options.keys() if request.EnableParameters != "": - keys = request.EnableParameters.split(",") + keys = [key.strip() for key in request.EnableParameters.split(",")] if request.EnableParameters == "none": keys = [] # create a dictionary of parameters by using the keys from EnableParameters and the values from defaults - kwargs = {key: options[key] for key in keys} + kwargs = {key: options.get(key) for key in keys if key in options} # Set seed if request.seed > 0: @@ -428,6 +450,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if self.PipelineType == "FluxPipeline": kwargs["max_sequence_length"] = 256 + if request.width: + kwargs["width"] = request.width + + if request.height: + kwargs["height"] = request.height + if self.PipelineType == "FluxTransformer2DModel": kwargs["output_type"] = "pil" kwargs["generator"] = torch.Generator("cpu").manual_seed(0) @@ -447,6 +475,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): export_to_video(video_frames, request.dst) return backend_pb2.Result(message="Media generated successfully", success=True) + print(f"Generating image with {kwargs=}", file=sys.stderr) image = {} if COMPEL: conditioning, pooled = self.compel.build_conditioning_tensor(prompt) diff --git a/backend/python/diffusers/requirements-cpu.txt b/backend/python/diffusers/requirements-cpu.txt index 235bb57e..20667cc0 100644 --- a/backend/python/diffusers/requirements-cpu.txt +++ b/backend/python/diffusers/requirements-cpu.txt @@ -5,5 +5,5 @@ accelerate compel peft sentencepiece -torch +torch==2.4.1 optimum-quanto \ No newline at end of file diff --git a/backend/python/diffusers/requirements-cublas11.txt b/backend/python/diffusers/requirements-cublas11.txt index 40e718cb..19e2d72e 100644 --- a/backend/python/diffusers/requirements-cublas11.txt +++ b/backend/python/diffusers/requirements-cublas11.txt @@ -1,5 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/cu118 -torch +torch==2.4.1+cu118 diffusers opencv-python transformers diff --git a/backend/python/diffusers/requirements-cublas12.txt b/backend/python/diffusers/requirements-cublas12.txt index 3bcc5397..3992b039 100644 --- a/backend/python/diffusers/requirements-cublas12.txt +++ b/backend/python/diffusers/requirements-cublas12.txt @@ -1,4 +1,4 @@ -torch +torch==2.4.1 diffusers opencv-python transformers diff --git a/backend/python/diffusers/requirements-intel.txt b/backend/python/diffusers/requirements-intel.txt index 566278a8..eb7448b0 100644 --- a/backend/python/diffusers/requirements-intel.txt +++ b/backend/python/diffusers/requirements-intel.txt @@ -1,9 +1,10 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch -torchvision +intel-extension-for-pytorch==2.3.110+xpu +torch==2.3.1+cxx11.abi +torchvision==0.18.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu optimum[openvino] -setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 +setuptools diffusers opencv-python transformers diff --git a/backend/python/diffusers/requirements.txt b/backend/python/diffusers/requirements.txt index 043c7aba..8c450dca 100644 --- a/backend/python/diffusers/requirements.txt +++ b/backend/python/diffusers/requirements.txt @@ -1,5 +1,5 @@ setuptools -grpcio==1.66.1 +grpcio==1.70.0 pillow protobuf certifi diff --git a/backend/python/exllama2/requirements-cpu.txt b/backend/python/exllama2/requirements-cpu.txt index bbcdc8cd..2021fc20 100644 --- a/backend/python/exllama2/requirements-cpu.txt +++ b/backend/python/exllama2/requirements-cpu.txt @@ -1,3 +1,3 @@ transformers accelerate -torch \ No newline at end of file +torch==2.4.1 \ No newline at end of file diff --git a/backend/python/exllama2/requirements-cublas11.txt b/backend/python/exllama2/requirements-cublas11.txt index 1dfb5b98..2d1958c7 100644 --- a/backend/python/exllama2/requirements-cublas11.txt +++ b/backend/python/exllama2/requirements-cublas11.txt @@ -1,4 +1,4 @@ --extra-index-url https://download.pytorch.org/whl/cu118 -torch +torch==2.4.1+cu118 transformers accelerate \ No newline at end of file diff --git a/backend/python/exllama2/requirements-cublas12.txt b/backend/python/exllama2/requirements-cublas12.txt index 1ec544cd..93e62c5a 100644 --- a/backend/python/exllama2/requirements-cublas12.txt +++ b/backend/python/exllama2/requirements-cublas12.txt @@ -1,3 +1,3 @@ -torch +torch==2.4.1 transformers accelerate \ No newline at end of file diff --git a/backend/python/exllama2/requirements.txt b/backend/python/exllama2/requirements.txt index 6fb018a0..cb622d0c 100644 --- a/backend/python/exllama2/requirements.txt +++ b/backend/python/exllama2/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.66.1 +grpcio==1.70.0 protobuf certifi wheel diff --git a/backend/python/openvoice/Makefile b/backend/python/faster-whisper/Makefile similarity index 54% rename from backend/python/openvoice/Makefile rename to backend/python/faster-whisper/Makefile index a187a00f..c0e5169f 100644 --- a/backend/python/openvoice/Makefile +++ b/backend/python/faster-whisper/Makefile @@ -1,8 +1,9 @@ .DEFAULT_GOAL := install .PHONY: install -install: protogen +install: bash install.sh + $(MAKE) protogen .PHONY: protogen protogen: backend_pb2_grpc.py backend_pb2.py @@ -12,14 +13,8 @@ protogen-clean: $(RM) backend_pb2_grpc.py backend_pb2.py backend_pb2_grpc.py backend_pb2.py: - python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto + bash protogen.sh .PHONY: clean clean: protogen-clean - rm -rf venv __pycache__ - -.PHONY: test -test: protogen - @echo "Testing openvoice..." - bash test.sh - @echo "openvoice tested." \ No newline at end of file + rm -rf venv __pycache__ \ No newline at end of file diff --git a/backend/python/sentencetransformers/backend.py b/backend/python/faster-whisper/backend.py similarity index 51% rename from backend/python/sentencetransformers/backend.py rename to backend/python/faster-whisper/backend.py index 2a20bf60..dbb8b3d9 100755 --- a/backend/python/sentencetransformers/backend.py +++ b/backend/python/faster-whisper/backend.py @@ -1,85 +1,65 @@ #!/usr/bin/env python3 """ -Extra gRPC server for HuggingFace SentenceTransformer models. +This is an extra gRPC server of LocalAI for Bark TTS """ from concurrent import futures - +import time import argparse import signal import sys import os - -import time import backend_pb2 import backend_pb2_grpc +from faster_whisper import WhisperModel + import grpc -from sentence_transformers import SentenceTransformer _ONE_DAY_IN_SECONDS = 60 * 60 * 24 # If MAX_WORKERS are specified in the environment use it, otherwise default to 1 MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) +COQUI_LANGUAGE = os.environ.get('COQUI_LANGUAGE', None) # Implement the BackendServicer class with the service methods class BackendServicer(backend_pb2_grpc.BackendServicer): """ - A gRPC servicer for the backend service. - - This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding. + BackendServicer is the class that implements the gRPC service """ def Health(self, request, context): - """ - A gRPC method that returns the health status of the backend service. - - Args: - request: A HealthRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - A Reply object that contains the health status of the backend service. - """ return backend_pb2.Reply(message=bytes("OK", 'utf-8')) - def LoadModel(self, request, context): - """ - A gRPC method that loads a model into memory. + device = "cpu" + # Get device + # device = "cuda" if request.CUDA else "cpu" + if request.CUDA: + device = "cuda" - Args: - request: A LoadModelRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - A Result object that contains the result of the LoadModel operation. - """ - model_name = request.Model try: - self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode) + print("Preparing models, please wait", file=sys.stderr) + self.model = WhisperModel(request.Model, device=device, compute_type="float16") except Exception as err: return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - # Implement your logic here for the LoadModel service # Replace this with your desired response return backend_pb2.Result(message="Model loaded successfully", success=True) - def Embedding(self, request, context): - """ - A gRPC method that calculates embeddings for a given sentence. - - Args: - request: An EmbeddingRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - An EmbeddingResult object that contains the calculated embeddings. - """ - # Implement your logic here for the Embedding service - # Replace this with your desired response - print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr) - sentence_embeddings = self.model.encode(request.Embeddings) - return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings) + def AudioTranscription(self, request, context): + resultSegments = [] + text = "" + try: + segments, info = self.model.transcribe(request.dst, beam_size=5, condition_on_previous_text=False) + id = 0 + for segment in segments: + print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)) + resultSegments.append(backend_pb2.TranscriptSegment(id=id, start=segment.start, end=segment.end, text=segment.text)) + text += segment.text + id += 1 + except Exception as err: + print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr) + return backend_pb2.TranscriptResult(segments=resultSegments, text=text) def serve(address): server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) diff --git a/backend/python/sentencetransformers/install.sh b/backend/python/faster-whisper/install.sh similarity index 100% rename from backend/python/sentencetransformers/install.sh rename to backend/python/faster-whisper/install.sh diff --git a/backend/python/faster-whisper/protogen.sh b/backend/python/faster-whisper/protogen.sh new file mode 100644 index 00000000..32f39fbb --- /dev/null +++ b/backend/python/faster-whisper/protogen.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e + +source $(dirname $0)/../common/libbackend.sh + +python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/faster-whisper/requirements-cpu.txt b/backend/python/faster-whisper/requirements-cpu.txt new file mode 100644 index 00000000..3e03f3ad --- /dev/null +++ b/backend/python/faster-whisper/requirements-cpu.txt @@ -0,0 +1,8 @@ +faster-whisper +opencv-python +accelerate +compel +peft +sentencepiece +torch==2.4.1 +optimum-quanto \ No newline at end of file diff --git a/backend/python/faster-whisper/requirements-cublas11.txt b/backend/python/faster-whisper/requirements-cublas11.txt new file mode 100644 index 00000000..b7453295 --- /dev/null +++ b/backend/python/faster-whisper/requirements-cublas11.txt @@ -0,0 +1,9 @@ +--extra-index-url https://download.pytorch.org/whl/cu118 +torch==2.4.1+cu118 +faster-whisper +opencv-python +accelerate +compel +peft +sentencepiece +optimum-quanto \ No newline at end of file diff --git a/backend/python/faster-whisper/requirements-cublas12.txt b/backend/python/faster-whisper/requirements-cublas12.txt new file mode 100644 index 00000000..8f46fa4a --- /dev/null +++ b/backend/python/faster-whisper/requirements-cublas12.txt @@ -0,0 +1,8 @@ +torch==2.4.1 +faster-whisper +opencv-python +accelerate +compel +peft +sentencepiece +optimum-quanto \ No newline at end of file diff --git a/backend/python/openvoice/requirements-hipblas.txt b/backend/python/faster-whisper/requirements-hipblas.txt similarity index 74% rename from backend/python/openvoice/requirements-hipblas.txt rename to backend/python/faster-whisper/requirements-hipblas.txt index 76018445..29413f05 100644 --- a/backend/python/openvoice/requirements-hipblas.txt +++ b/backend/python/faster-whisper/requirements-hipblas.txt @@ -1,2 +1,3 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch \ No newline at end of file +torch +faster-whisper \ No newline at end of file diff --git a/backend/python/faster-whisper/requirements-intel.txt b/backend/python/faster-whisper/requirements-intel.txt new file mode 100644 index 00000000..417aa0b4 --- /dev/null +++ b/backend/python/faster-whisper/requirements-intel.txt @@ -0,0 +1,6 @@ +--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +intel-extension-for-pytorch==2.3.110+xpu +torch==2.3.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu +optimum[openvino] +faster-whisper \ No newline at end of file diff --git a/backend/python/faster-whisper/requirements.txt b/backend/python/faster-whisper/requirements.txt new file mode 100644 index 00000000..125b18dd --- /dev/null +++ b/backend/python/faster-whisper/requirements.txt @@ -0,0 +1,3 @@ +grpcio==1.70.0 +protobuf +grpcio-tools \ No newline at end of file diff --git a/backend/python/openvoice/run.sh b/backend/python/faster-whisper/run.sh similarity index 100% rename from backend/python/openvoice/run.sh rename to backend/python/faster-whisper/run.sh diff --git a/backend/python/mamba/test.sh b/backend/python/faster-whisper/test.sh similarity index 100% rename from backend/python/mamba/test.sh rename to backend/python/faster-whisper/test.sh diff --git a/backend/python/kokoro/Makefile b/backend/python/kokoro/Makefile new file mode 100644 index 00000000..c0e5169f --- /dev/null +++ b/backend/python/kokoro/Makefile @@ -0,0 +1,20 @@ +.DEFAULT_GOAL := install + +.PHONY: install +install: + bash install.sh + $(MAKE) protogen + +.PHONY: protogen +protogen: backend_pb2_grpc.py backend_pb2.py + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +backend_pb2_grpc.py backend_pb2.py: + bash protogen.sh + +.PHONY: clean +clean: protogen-clean + rm -rf venv __pycache__ \ No newline at end of file diff --git a/backend/python/parler-tts/backend.py b/backend/python/kokoro/backend.py old mode 100644 new mode 100755 similarity index 64% rename from backend/python/parler-tts/backend.py rename to backend/python/kokoro/backend.py index 655990d7..1fd1feb9 --- a/backend/python/parler-tts/backend.py +++ b/backend/python/kokoro/backend.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Extra gRPC server for MusicgenForConditionalGeneration models. +Extra gRPC server for Kokoro models. """ from concurrent import futures @@ -8,20 +8,17 @@ import argparse import signal import sys import os - import time import backend_pb2 import backend_pb2_grpc - +import soundfile as sf import grpc -from scipy.io.wavfile import write as write_wav - -from parler_tts import ParlerTTSForConditionalGeneration -from transformers import AutoTokenizer -import soundfile as sf +from models import build_model +from kokoro import generate import torch +SAMPLE_RATE = 22050 _ONE_DAY_IN_SECONDS = 60 * 60 * 24 # If MAX_WORKERS are specified in the environment use it, otherwise default to 1 @@ -59,10 +56,31 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): A Result object that contains the result of the LoadModel operation. """ model_name = request.Model - device = "cuda:0" if torch.cuda.is_available() else "cpu" try: - self.model = ParlerTTSForConditionalGeneration.from_pretrained(model_name).to(device) - self.tokenizer = AutoTokenizer.from_pretrained(model_name) + device = "cuda:0" if torch.cuda.is_available() else "cpu" + self.MODEL = build_model(request.ModelFile, device) + options = request.Options + # Find the voice from the options, options are a list of strings in this form optname:optvalue: + VOICE_NAME = None + for opt in options: + if opt.startswith("voice:"): + VOICE_NAME = opt.split(":")[1] + break + if VOICE_NAME is None: + return backend_pb2.Result(success=False, message=f"No voice specified in options") + MODELPATH = request.ModelPath + # If voice name contains a plus, split it and load the two models and combine them + if "+" in VOICE_NAME: + voice1, voice2 = VOICE_NAME.split("+") + voice1 = torch.load(f'{MODELPATH}/{voice1}.pt', weights_only=True).to(device) + voice2 = torch.load(f'{MODELPATH}/{voice2}.pt', weights_only=True).to(device) + self.VOICEPACK = torch.mean(torch.stack([voice1, voice2]), dim=0) + else: + self.VOICEPACK = torch.load(f'{MODELPATH}/{VOICE_NAME}.pt', weights_only=True).to(device) + + self.VOICE_NAME = VOICE_NAME + + print(f'Loaded voice: {VOICE_NAME}') except Exception as err: return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") @@ -70,38 +88,26 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): def TTS(self, request, context): model_name = request.model - voice = request.voice - if voice == "": - voice = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast." if model_name == "": return backend_pb2.Result(success=False, message="request.model is required") try: - device = "cuda:0" if torch.cuda.is_available() else "cpu" - input_ids = self.tokenizer(voice, return_tensors="pt").input_ids.to(device) - prompt_input_ids = self.tokenizer(request.text, return_tensors="pt").input_ids.to(device) - - generation = self.model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids) - audio_arr = generation.cpu().numpy().squeeze() - print("[parler-tts] TTS generated!", file=sys.stderr) - sf.write(request.dst, audio_arr, self.model.config.sampling_rate) - print("[parler-tts] TTS saved to", request.dst, file=sys.stderr) - print("[parler-tts] TTS for", file=sys.stderr) - print(request, file=sys.stderr) + audio, out_ps = generate(self.MODEL, request.text, self.VOICEPACK, lang=self.VOICE_NAME) + print(out_ps) + sf.write(request.dst, audio, SAMPLE_RATE) except Exception as err: return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") return backend_pb2.Result(success=True) - def serve(address): server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) server.add_insecure_port(address) server.start() - print("[parler-tts] Server started. Listening on: " + address, file=sys.stderr) + print("[Kokoro] Server started. Listening on: " + address, file=sys.stderr) # Define the signal handler function def signal_handler(sig, frame): - print("[parler-tts] Received termination signal. Shutting down...") + print("[Kokoro] Received termination signal. Shutting down...") server.stop(0) sys.exit(0) @@ -121,5 +127,5 @@ if __name__ == "__main__": "--addr", default="localhost:50051", help="The address to bind the server to." ) args = parser.parse_args() - print(f"[parler-tts] startup: {args}", file=sys.stderr) + print(f"[Kokoro] startup: {args}", file=sys.stderr) serve(args.addr) diff --git a/backend/python/transformers-musicgen/install.sh b/backend/python/kokoro/install.sh similarity index 100% rename from backend/python/transformers-musicgen/install.sh rename to backend/python/kokoro/install.sh diff --git a/backend/python/kokoro/istftnet.py b/backend/python/kokoro/istftnet.py new file mode 100644 index 00000000..818fb912 --- /dev/null +++ b/backend/python/kokoro/istftnet.py @@ -0,0 +1,524 @@ +# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/istftnet.py +# https://github.com/yl4579/StyleTTS2/blob/main/Modules/istftnet.py +from scipy.signal import get_window +from torch.nn import Conv1d, ConvTranspose1d +from torch.nn.utils import weight_norm, remove_weight_norm +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +# https://github.com/yl4579/StyleTTS2/blob/main/Modules/utils.py +def init_weights(m, mean=0.0, std=0.01): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + m.weight.data.normal_(mean, std) + +def get_padding(kernel_size, dilation=1): + return int((kernel_size*dilation - dilation)/2) + +LRELU_SLOPE = 0.1 + +class AdaIN1d(nn.Module): + def __init__(self, style_dim, num_features): + super().__init__() + self.norm = nn.InstanceNorm1d(num_features, affine=False) + self.fc = nn.Linear(style_dim, num_features*2) + + def forward(self, x, s): + h = self.fc(s) + h = h.view(h.size(0), h.size(1), 1) + gamma, beta = torch.chunk(h, chunks=2, dim=1) + return (1 + gamma) * self.norm(x) + beta + +class AdaINResBlock1(torch.nn.Module): + def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5), style_dim=64): + super(AdaINResBlock1, self).__init__() + self.convs1 = nn.ModuleList([ + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2], + padding=get_padding(kernel_size, dilation[2]))) + ]) + self.convs1.apply(init_weights) + + self.convs2 = nn.ModuleList([ + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, + padding=get_padding(kernel_size, 1))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, + padding=get_padding(kernel_size, 1))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, + padding=get_padding(kernel_size, 1))) + ]) + self.convs2.apply(init_weights) + + self.adain1 = nn.ModuleList([ + AdaIN1d(style_dim, channels), + AdaIN1d(style_dim, channels), + AdaIN1d(style_dim, channels), + ]) + + self.adain2 = nn.ModuleList([ + AdaIN1d(style_dim, channels), + AdaIN1d(style_dim, channels), + AdaIN1d(style_dim, channels), + ]) + + self.alpha1 = nn.ParameterList([nn.Parameter(torch.ones(1, channels, 1)) for i in range(len(self.convs1))]) + self.alpha2 = nn.ParameterList([nn.Parameter(torch.ones(1, channels, 1)) for i in range(len(self.convs2))]) + + + def forward(self, x, s): + for c1, c2, n1, n2, a1, a2 in zip(self.convs1, self.convs2, self.adain1, self.adain2, self.alpha1, self.alpha2): + xt = n1(x, s) + xt = xt + (1 / a1) * (torch.sin(a1 * xt) ** 2) # Snake1D + xt = c1(xt) + xt = n2(xt, s) + xt = xt + (1 / a2) * (torch.sin(a2 * xt) ** 2) # Snake1D + xt = c2(xt) + x = xt + x + return x + + def remove_weight_norm(self): + for l in self.convs1: + remove_weight_norm(l) + for l in self.convs2: + remove_weight_norm(l) + +class TorchSTFT(torch.nn.Module): + def __init__(self, filter_length=800, hop_length=200, win_length=800, window='hann'): + super().__init__() + self.filter_length = filter_length + self.hop_length = hop_length + self.win_length = win_length + self.window = torch.from_numpy(get_window(window, win_length, fftbins=True).astype(np.float32)) + + def transform(self, input_data): + forward_transform = torch.stft( + input_data, + self.filter_length, self.hop_length, self.win_length, window=self.window.to(input_data.device), + return_complex=True) + + return torch.abs(forward_transform), torch.angle(forward_transform) + + def inverse(self, magnitude, phase): + inverse_transform = torch.istft( + magnitude * torch.exp(phase * 1j), + self.filter_length, self.hop_length, self.win_length, window=self.window.to(magnitude.device)) + + return inverse_transform.unsqueeze(-2) # unsqueeze to stay consistent with conv_transpose1d implementation + + def forward(self, input_data): + self.magnitude, self.phase = self.transform(input_data) + reconstruction = self.inverse(self.magnitude, self.phase) + return reconstruction + +class SineGen(torch.nn.Module): + """ Definition of sine generator + SineGen(samp_rate, harmonic_num = 0, + sine_amp = 0.1, noise_std = 0.003, + voiced_threshold = 0, + flag_for_pulse=False) + samp_rate: sampling rate in Hz + harmonic_num: number of harmonic overtones (default 0) + sine_amp: amplitude of sine-wavefrom (default 0.1) + noise_std: std of Gaussian noise (default 0.003) + voiced_thoreshold: F0 threshold for U/V classification (default 0) + flag_for_pulse: this SinGen is used inside PulseGen (default False) + Note: when flag_for_pulse is True, the first time step of a voiced + segment is always sin(np.pi) or cos(0) + """ + + def __init__(self, samp_rate, upsample_scale, harmonic_num=0, + sine_amp=0.1, noise_std=0.003, + voiced_threshold=0, + flag_for_pulse=False): + super(SineGen, self).__init__() + self.sine_amp = sine_amp + self.noise_std = noise_std + self.harmonic_num = harmonic_num + self.dim = self.harmonic_num + 1 + self.sampling_rate = samp_rate + self.voiced_threshold = voiced_threshold + self.flag_for_pulse = flag_for_pulse + self.upsample_scale = upsample_scale + + def _f02uv(self, f0): + # generate uv signal + uv = (f0 > self.voiced_threshold).type(torch.float32) + return uv + + def _f02sine(self, f0_values): + """ f0_values: (batchsize, length, dim) + where dim indicates fundamental tone and overtones + """ + # convert to F0 in rad. The interger part n can be ignored + # because 2 * np.pi * n doesn't affect phase + rad_values = (f0_values / self.sampling_rate) % 1 + + # initial phase noise (no noise for fundamental component) + rand_ini = torch.rand(f0_values.shape[0], f0_values.shape[2], \ + device=f0_values.device) + rand_ini[:, 0] = 0 + rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini + + # instantanouse phase sine[t] = sin(2*pi \sum_i=1 ^{t} rad) + if not self.flag_for_pulse: +# # for normal case + +# # To prevent torch.cumsum numerical overflow, +# # it is necessary to add -1 whenever \sum_k=1^n rad_value_k > 1. +# # Buffer tmp_over_one_idx indicates the time step to add -1. +# # This will not change F0 of sine because (x-1) * 2*pi = x * 2*pi +# tmp_over_one = torch.cumsum(rad_values, 1) % 1 +# tmp_over_one_idx = (padDiff(tmp_over_one)) < 0 +# cumsum_shift = torch.zeros_like(rad_values) +# cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0 + +# phase = torch.cumsum(rad_values, dim=1) * 2 * np.pi + rad_values = torch.nn.functional.interpolate(rad_values.transpose(1, 2), + scale_factor=1/self.upsample_scale, + mode="linear").transpose(1, 2) + +# tmp_over_one = torch.cumsum(rad_values, 1) % 1 +# tmp_over_one_idx = (padDiff(tmp_over_one)) < 0 +# cumsum_shift = torch.zeros_like(rad_values) +# cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0 + + phase = torch.cumsum(rad_values, dim=1) * 2 * np.pi + phase = torch.nn.functional.interpolate(phase.transpose(1, 2) * self.upsample_scale, + scale_factor=self.upsample_scale, mode="linear").transpose(1, 2) + sines = torch.sin(phase) + + else: + # If necessary, make sure that the first time step of every + # voiced segments is sin(pi) or cos(0) + # This is used for pulse-train generation + + # identify the last time step in unvoiced segments + uv = self._f02uv(f0_values) + uv_1 = torch.roll(uv, shifts=-1, dims=1) + uv_1[:, -1, :] = 1 + u_loc = (uv < 1) * (uv_1 > 0) + + # get the instantanouse phase + tmp_cumsum = torch.cumsum(rad_values, dim=1) + # different batch needs to be processed differently + for idx in range(f0_values.shape[0]): + temp_sum = tmp_cumsum[idx, u_loc[idx, :, 0], :] + temp_sum[1:, :] = temp_sum[1:, :] - temp_sum[0:-1, :] + # stores the accumulation of i.phase within + # each voiced segments + tmp_cumsum[idx, :, :] = 0 + tmp_cumsum[idx, u_loc[idx, :, 0], :] = temp_sum + + # rad_values - tmp_cumsum: remove the accumulation of i.phase + # within the previous voiced segment. + i_phase = torch.cumsum(rad_values - tmp_cumsum, dim=1) + + # get the sines + sines = torch.cos(i_phase * 2 * np.pi) + return sines + + def forward(self, f0): + """ sine_tensor, uv = forward(f0) + input F0: tensor(batchsize=1, length, dim=1) + f0 for unvoiced steps should be 0 + output sine_tensor: tensor(batchsize=1, length, dim) + output uv: tensor(batchsize=1, length, 1) + """ + f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim, + device=f0.device) + # fundamental component + fn = torch.multiply(f0, torch.FloatTensor([[range(1, self.harmonic_num + 2)]]).to(f0.device)) + + # generate sine waveforms + sine_waves = self._f02sine(fn) * self.sine_amp + + # generate uv signal + # uv = torch.ones(f0.shape) + # uv = uv * (f0 > self.voiced_threshold) + uv = self._f02uv(f0) + + # noise: for unvoiced should be similar to sine_amp + # std = self.sine_amp/3 -> max value ~ self.sine_amp + # . for voiced regions is self.noise_std + noise_amp = uv * self.noise_std + (1 - uv) * self.sine_amp / 3 + noise = noise_amp * torch.randn_like(sine_waves) + + # first: set the unvoiced part to 0 by uv + # then: additive noise + sine_waves = sine_waves * uv + noise + return sine_waves, uv, noise + + +class SourceModuleHnNSF(torch.nn.Module): + """ SourceModule for hn-nsf + SourceModule(sampling_rate, harmonic_num=0, sine_amp=0.1, + add_noise_std=0.003, voiced_threshod=0) + sampling_rate: sampling_rate in Hz + harmonic_num: number of harmonic above F0 (default: 0) + sine_amp: amplitude of sine source signal (default: 0.1) + add_noise_std: std of additive Gaussian noise (default: 0.003) + note that amplitude of noise in unvoiced is decided + by sine_amp + voiced_threshold: threhold to set U/V given F0 (default: 0) + Sine_source, noise_source = SourceModuleHnNSF(F0_sampled) + F0_sampled (batchsize, length, 1) + Sine_source (batchsize, length, 1) + noise_source (batchsize, length 1) + uv (batchsize, length, 1) + """ + + def __init__(self, sampling_rate, upsample_scale, harmonic_num=0, sine_amp=0.1, + add_noise_std=0.003, voiced_threshod=0): + super(SourceModuleHnNSF, self).__init__() + + self.sine_amp = sine_amp + self.noise_std = add_noise_std + + # to produce sine waveforms + self.l_sin_gen = SineGen(sampling_rate, upsample_scale, harmonic_num, + sine_amp, add_noise_std, voiced_threshod) + + # to merge source harmonics into a single excitation + self.l_linear = torch.nn.Linear(harmonic_num + 1, 1) + self.l_tanh = torch.nn.Tanh() + + def forward(self, x): + """ + Sine_source, noise_source = SourceModuleHnNSF(F0_sampled) + F0_sampled (batchsize, length, 1) + Sine_source (batchsize, length, 1) + noise_source (batchsize, length 1) + """ + # source for harmonic branch + with torch.no_grad(): + sine_wavs, uv, _ = self.l_sin_gen(x) + sine_merge = self.l_tanh(self.l_linear(sine_wavs)) + + # source for noise branch, in the same shape as uv + noise = torch.randn_like(uv) * self.sine_amp / 3 + return sine_merge, noise, uv +def padDiff(x): + return F.pad(F.pad(x, (0,0,-1,1), 'constant', 0) - x, (0,0,0,-1), 'constant', 0) + + +class Generator(torch.nn.Module): + def __init__(self, style_dim, resblock_kernel_sizes, upsample_rates, upsample_initial_channel, resblock_dilation_sizes, upsample_kernel_sizes, gen_istft_n_fft, gen_istft_hop_size): + super(Generator, self).__init__() + + self.num_kernels = len(resblock_kernel_sizes) + self.num_upsamples = len(upsample_rates) + resblock = AdaINResBlock1 + + self.m_source = SourceModuleHnNSF( + sampling_rate=24000, + upsample_scale=np.prod(upsample_rates) * gen_istft_hop_size, + harmonic_num=8, voiced_threshod=10) + self.f0_upsamp = torch.nn.Upsample(scale_factor=np.prod(upsample_rates) * gen_istft_hop_size) + self.noise_convs = nn.ModuleList() + self.noise_res = nn.ModuleList() + + self.ups = nn.ModuleList() + for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)): + self.ups.append(weight_norm( + ConvTranspose1d(upsample_initial_channel//(2**i), upsample_initial_channel//(2**(i+1)), + k, u, padding=(k-u)//2))) + + self.resblocks = nn.ModuleList() + for i in range(len(self.ups)): + ch = upsample_initial_channel//(2**(i+1)) + for j, (k, d) in enumerate(zip(resblock_kernel_sizes,resblock_dilation_sizes)): + self.resblocks.append(resblock(ch, k, d, style_dim)) + + c_cur = upsample_initial_channel // (2 ** (i + 1)) + + if i + 1 < len(upsample_rates): # + stride_f0 = np.prod(upsample_rates[i + 1:]) + self.noise_convs.append(Conv1d( + gen_istft_n_fft + 2, c_cur, kernel_size=stride_f0 * 2, stride=stride_f0, padding=(stride_f0+1) // 2)) + self.noise_res.append(resblock(c_cur, 7, [1,3,5], style_dim)) + else: + self.noise_convs.append(Conv1d(gen_istft_n_fft + 2, c_cur, kernel_size=1)) + self.noise_res.append(resblock(c_cur, 11, [1,3,5], style_dim)) + + + self.post_n_fft = gen_istft_n_fft + self.conv_post = weight_norm(Conv1d(ch, self.post_n_fft + 2, 7, 1, padding=3)) + self.ups.apply(init_weights) + self.conv_post.apply(init_weights) + self.reflection_pad = torch.nn.ReflectionPad1d((1, 0)) + self.stft = TorchSTFT(filter_length=gen_istft_n_fft, hop_length=gen_istft_hop_size, win_length=gen_istft_n_fft) + + + def forward(self, x, s, f0): + with torch.no_grad(): + f0 = self.f0_upsamp(f0[:, None]).transpose(1, 2) # bs,n,t + + har_source, noi_source, uv = self.m_source(f0) + har_source = har_source.transpose(1, 2).squeeze(1) + har_spec, har_phase = self.stft.transform(har_source) + har = torch.cat([har_spec, har_phase], dim=1) + + for i in range(self.num_upsamples): + x = F.leaky_relu(x, LRELU_SLOPE) + x_source = self.noise_convs[i](har) + x_source = self.noise_res[i](x_source, s) + + x = self.ups[i](x) + if i == self.num_upsamples - 1: + x = self.reflection_pad(x) + + x = x + x_source + xs = None + for j in range(self.num_kernels): + if xs is None: + xs = self.resblocks[i*self.num_kernels+j](x, s) + else: + xs += self.resblocks[i*self.num_kernels+j](x, s) + x = xs / self.num_kernels + x = F.leaky_relu(x) + x = self.conv_post(x) + spec = torch.exp(x[:,:self.post_n_fft // 2 + 1, :]) + phase = torch.sin(x[:, self.post_n_fft // 2 + 1:, :]) + return self.stft.inverse(spec, phase) + + def fw_phase(self, x, s): + for i in range(self.num_upsamples): + x = F.leaky_relu(x, LRELU_SLOPE) + x = self.ups[i](x) + xs = None + for j in range(self.num_kernels): + if xs is None: + xs = self.resblocks[i*self.num_kernels+j](x, s) + else: + xs += self.resblocks[i*self.num_kernels+j](x, s) + x = xs / self.num_kernels + x = F.leaky_relu(x) + x = self.reflection_pad(x) + x = self.conv_post(x) + spec = torch.exp(x[:,:self.post_n_fft // 2 + 1, :]) + phase = torch.sin(x[:, self.post_n_fft // 2 + 1:, :]) + return spec, phase + + def remove_weight_norm(self): + print('Removing weight norm...') + for l in self.ups: + remove_weight_norm(l) + for l in self.resblocks: + l.remove_weight_norm() + remove_weight_norm(self.conv_pre) + remove_weight_norm(self.conv_post) + + +class AdainResBlk1d(nn.Module): + def __init__(self, dim_in, dim_out, style_dim=64, actv=nn.LeakyReLU(0.2), + upsample='none', dropout_p=0.0): + super().__init__() + self.actv = actv + self.upsample_type = upsample + self.upsample = UpSample1d(upsample) + self.learned_sc = dim_in != dim_out + self._build_weights(dim_in, dim_out, style_dim) + self.dropout = nn.Dropout(dropout_p) + + if upsample == 'none': + self.pool = nn.Identity() + else: + self.pool = weight_norm(nn.ConvTranspose1d(dim_in, dim_in, kernel_size=3, stride=2, groups=dim_in, padding=1, output_padding=1)) + + + def _build_weights(self, dim_in, dim_out, style_dim): + self.conv1 = weight_norm(nn.Conv1d(dim_in, dim_out, 3, 1, 1)) + self.conv2 = weight_norm(nn.Conv1d(dim_out, dim_out, 3, 1, 1)) + self.norm1 = AdaIN1d(style_dim, dim_in) + self.norm2 = AdaIN1d(style_dim, dim_out) + if self.learned_sc: + self.conv1x1 = weight_norm(nn.Conv1d(dim_in, dim_out, 1, 1, 0, bias=False)) + + def _shortcut(self, x): + x = self.upsample(x) + if self.learned_sc: + x = self.conv1x1(x) + return x + + def _residual(self, x, s): + x = self.norm1(x, s) + x = self.actv(x) + x = self.pool(x) + x = self.conv1(self.dropout(x)) + x = self.norm2(x, s) + x = self.actv(x) + x = self.conv2(self.dropout(x)) + return x + + def forward(self, x, s): + out = self._residual(x, s) + out = (out + self._shortcut(x)) / np.sqrt(2) + return out + +class UpSample1d(nn.Module): + def __init__(self, layer_type): + super().__init__() + self.layer_type = layer_type + + def forward(self, x): + if self.layer_type == 'none': + return x + else: + return F.interpolate(x, scale_factor=2, mode='nearest') + +class Decoder(nn.Module): + def __init__(self, dim_in=512, F0_channel=512, style_dim=64, dim_out=80, + resblock_kernel_sizes = [3,7,11], + upsample_rates = [10, 6], + upsample_initial_channel=512, + resblock_dilation_sizes=[[1,3,5], [1,3,5], [1,3,5]], + upsample_kernel_sizes=[20, 12], + gen_istft_n_fft=20, gen_istft_hop_size=5): + super().__init__() + + self.decode = nn.ModuleList() + + self.encode = AdainResBlk1d(dim_in + 2, 1024, style_dim) + + self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim)) + self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim)) + self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim)) + self.decode.append(AdainResBlk1d(1024 + 2 + 64, 512, style_dim, upsample=True)) + + self.F0_conv = weight_norm(nn.Conv1d(1, 1, kernel_size=3, stride=2, groups=1, padding=1)) + + self.N_conv = weight_norm(nn.Conv1d(1, 1, kernel_size=3, stride=2, groups=1, padding=1)) + + self.asr_res = nn.Sequential( + weight_norm(nn.Conv1d(512, 64, kernel_size=1)), + ) + + + self.generator = Generator(style_dim, resblock_kernel_sizes, upsample_rates, + upsample_initial_channel, resblock_dilation_sizes, + upsample_kernel_sizes, gen_istft_n_fft, gen_istft_hop_size) + + def forward(self, asr, F0_curve, N, s): + F0 = self.F0_conv(F0_curve.unsqueeze(1)) + N = self.N_conv(N.unsqueeze(1)) + + x = torch.cat([asr, F0, N], axis=1) + x = self.encode(x, s) + + asr_res = self.asr_res(asr) + + res = True + for block in self.decode: + if res: + x = torch.cat([x, asr_res, F0, N], axis=1) + x = block(x, s) + if block.upsample_type != "none": + res = False + + x = self.generator(x, s, F0_curve) + return x diff --git a/backend/python/kokoro/kokoro.py b/backend/python/kokoro/kokoro.py new file mode 100644 index 00000000..3a0df7f5 --- /dev/null +++ b/backend/python/kokoro/kokoro.py @@ -0,0 +1,166 @@ +# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/kokoro.py +import phonemizer +import re +import torch +import numpy as np + +def split_num(num): + num = num.group() + if '.' in num: + return num + elif ':' in num: + h, m = [int(n) for n in num.split(':')] + if m == 0: + return f"{h} o'clock" + elif m < 10: + return f'{h} oh {m}' + return f'{h} {m}' + year = int(num[:4]) + if year < 1100 or year % 1000 < 10: + return num + left, right = num[:2], int(num[2:4]) + s = 's' if num.endswith('s') else '' + if 100 <= year % 1000 <= 999: + if right == 0: + return f'{left} hundred{s}' + elif right < 10: + return f'{left} oh {right}{s}' + return f'{left} {right}{s}' + +def flip_money(m): + m = m.group() + bill = 'dollar' if m[0] == '$' else 'pound' + if m[-1].isalpha(): + return f'{m[1:]} {bill}s' + elif '.' not in m: + s = '' if m[1:] == '1' else 's' + return f'{m[1:]} {bill}{s}' + b, c = m[1:].split('.') + s = '' if b == '1' else 's' + c = int(c.ljust(2, '0')) + coins = f"cent{'' if c == 1 else 's'}" if m[0] == '$' else ('penny' if c == 1 else 'pence') + return f'{b} {bill}{s} and {c} {coins}' + +def point_num(num): + a, b = num.group().split('.') + return ' point '.join([a, ' '.join(b)]) + +def normalize_text(text): + text = text.replace(chr(8216), "'").replace(chr(8217), "'") + text = text.replace('«', chr(8220)).replace('»', chr(8221)) + text = text.replace(chr(8220), '"').replace(chr(8221), '"') + text = text.replace('(', '«').replace(')', '»') + for a, b in zip('、。!,:;?', ',.!,:;?'): + text = text.replace(a, b+' ') + text = re.sub(r'[^\S \n]', ' ', text) + text = re.sub(r' +', ' ', text) + text = re.sub(r'(?<=\n) +(?=\n)', '', text) + text = re.sub(r'\bD[Rr]\.(?= [A-Z])', 'Doctor', text) + text = re.sub(r'\b(?:Mr\.|MR\.(?= [A-Z]))', 'Mister', text) + text = re.sub(r'\b(?:Ms\.|MS\.(?= [A-Z]))', 'Miss', text) + text = re.sub(r'\b(?:Mrs\.|MRS\.(?= [A-Z]))', 'Mrs', text) + text = re.sub(r'\betc\.(?! [A-Z])', 'etc', text) + text = re.sub(r'(?i)\b(y)eah?\b', r"\1e'a", text) + text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(? 510: + tokens = tokens[:510] + print('Truncated to 510 tokens') + ref_s = voicepack[len(tokens)] + out = forward(model, tokens, ref_s, speed) + ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens) + return out, ps + +def generate_full(model, text, voicepack, lang='a', speed=1, ps=None): + ps = ps or phonemize(text, lang) + tokens = tokenize(ps) + if not tokens: + return None + outs = [] + loop_count = len(tokens)//510 + (1 if len(tokens) % 510 != 0 else 0) + for i in range(loop_count): + ref_s = voicepack[len(tokens[i*510:(i+1)*510])] + out = forward(model, tokens[i*510:(i+1)*510], ref_s, speed) + outs.append(out) + outs = np.concatenate(outs) + ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens) + return outs, ps \ No newline at end of file diff --git a/backend/python/kokoro/models.py b/backend/python/kokoro/models.py new file mode 100644 index 00000000..cf358d9e --- /dev/null +++ b/backend/python/kokoro/models.py @@ -0,0 +1,373 @@ +# https://github.com/yl4579/StyleTTS2/blob/main/models.py +# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/models.py +from istftnet import AdaIN1d, Decoder +from munch import Munch +from pathlib import Path +from plbert import load_plbert +from torch.nn.utils import weight_norm, spectral_norm +import json +import numpy as np +import os +import os.path as osp +import torch +import torch.nn as nn +import torch.nn.functional as F + +class LinearNorm(torch.nn.Module): + def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'): + super(LinearNorm, self).__init__() + self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias) + + torch.nn.init.xavier_uniform_( + self.linear_layer.weight, + gain=torch.nn.init.calculate_gain(w_init_gain)) + + def forward(self, x): + return self.linear_layer(x) + +class LayerNorm(nn.Module): + def __init__(self, channels, eps=1e-5): + super().__init__() + self.channels = channels + self.eps = eps + + self.gamma = nn.Parameter(torch.ones(channels)) + self.beta = nn.Parameter(torch.zeros(channels)) + + def forward(self, x): + x = x.transpose(1, -1) + x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps) + return x.transpose(1, -1) + +class TextEncoder(nn.Module): + def __init__(self, channels, kernel_size, depth, n_symbols, actv=nn.LeakyReLU(0.2)): + super().__init__() + self.embedding = nn.Embedding(n_symbols, channels) + + padding = (kernel_size - 1) // 2 + self.cnn = nn.ModuleList() + for _ in range(depth): + self.cnn.append(nn.Sequential( + weight_norm(nn.Conv1d(channels, channels, kernel_size=kernel_size, padding=padding)), + LayerNorm(channels), + actv, + nn.Dropout(0.2), + )) + # self.cnn = nn.Sequential(*self.cnn) + + self.lstm = nn.LSTM(channels, channels//2, 1, batch_first=True, bidirectional=True) + + def forward(self, x, input_lengths, m): + x = self.embedding(x) # [B, T, emb] + x = x.transpose(1, 2) # [B, emb, T] + m = m.to(input_lengths.device).unsqueeze(1) + x.masked_fill_(m, 0.0) + + for c in self.cnn: + x = c(x) + x.masked_fill_(m, 0.0) + + x = x.transpose(1, 2) # [B, T, chn] + + input_lengths = input_lengths.cpu().numpy() + x = nn.utils.rnn.pack_padded_sequence( + x, input_lengths, batch_first=True, enforce_sorted=False) + + self.lstm.flatten_parameters() + x, _ = self.lstm(x) + x, _ = nn.utils.rnn.pad_packed_sequence( + x, batch_first=True) + + x = x.transpose(-1, -2) + x_pad = torch.zeros([x.shape[0], x.shape[1], m.shape[-1]]) + + x_pad[:, :, :x.shape[-1]] = x + x = x_pad.to(x.device) + + x.masked_fill_(m, 0.0) + + return x + + def inference(self, x): + x = self.embedding(x) + x = x.transpose(1, 2) + x = self.cnn(x) + x = x.transpose(1, 2) + self.lstm.flatten_parameters() + x, _ = self.lstm(x) + return x + + def length_to_mask(self, lengths): + mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths) + mask = torch.gt(mask+1, lengths.unsqueeze(1)) + return mask + + +class UpSample1d(nn.Module): + def __init__(self, layer_type): + super().__init__() + self.layer_type = layer_type + + def forward(self, x): + if self.layer_type == 'none': + return x + else: + return F.interpolate(x, scale_factor=2, mode='nearest') + +class AdainResBlk1d(nn.Module): + def __init__(self, dim_in, dim_out, style_dim=64, actv=nn.LeakyReLU(0.2), + upsample='none', dropout_p=0.0): + super().__init__() + self.actv = actv + self.upsample_type = upsample + self.upsample = UpSample1d(upsample) + self.learned_sc = dim_in != dim_out + self._build_weights(dim_in, dim_out, style_dim) + self.dropout = nn.Dropout(dropout_p) + + if upsample == 'none': + self.pool = nn.Identity() + else: + self.pool = weight_norm(nn.ConvTranspose1d(dim_in, dim_in, kernel_size=3, stride=2, groups=dim_in, padding=1, output_padding=1)) + + + def _build_weights(self, dim_in, dim_out, style_dim): + self.conv1 = weight_norm(nn.Conv1d(dim_in, dim_out, 3, 1, 1)) + self.conv2 = weight_norm(nn.Conv1d(dim_out, dim_out, 3, 1, 1)) + self.norm1 = AdaIN1d(style_dim, dim_in) + self.norm2 = AdaIN1d(style_dim, dim_out) + if self.learned_sc: + self.conv1x1 = weight_norm(nn.Conv1d(dim_in, dim_out, 1, 1, 0, bias=False)) + + def _shortcut(self, x): + x = self.upsample(x) + if self.learned_sc: + x = self.conv1x1(x) + return x + + def _residual(self, x, s): + x = self.norm1(x, s) + x = self.actv(x) + x = self.pool(x) + x = self.conv1(self.dropout(x)) + x = self.norm2(x, s) + x = self.actv(x) + x = self.conv2(self.dropout(x)) + return x + + def forward(self, x, s): + out = self._residual(x, s) + out = (out + self._shortcut(x)) / np.sqrt(2) + return out + +class AdaLayerNorm(nn.Module): + def __init__(self, style_dim, channels, eps=1e-5): + super().__init__() + self.channels = channels + self.eps = eps + + self.fc = nn.Linear(style_dim, channels*2) + + def forward(self, x, s): + x = x.transpose(-1, -2) + x = x.transpose(1, -1) + + h = self.fc(s) + h = h.view(h.size(0), h.size(1), 1) + gamma, beta = torch.chunk(h, chunks=2, dim=1) + gamma, beta = gamma.transpose(1, -1), beta.transpose(1, -1) + + + x = F.layer_norm(x, (self.channels,), eps=self.eps) + x = (1 + gamma) * x + beta + return x.transpose(1, -1).transpose(-1, -2) + +class ProsodyPredictor(nn.Module): + + def __init__(self, style_dim, d_hid, nlayers, max_dur=50, dropout=0.1): + super().__init__() + + self.text_encoder = DurationEncoder(sty_dim=style_dim, + d_model=d_hid, + nlayers=nlayers, + dropout=dropout) + + self.lstm = nn.LSTM(d_hid + style_dim, d_hid // 2, 1, batch_first=True, bidirectional=True) + self.duration_proj = LinearNorm(d_hid, max_dur) + + self.shared = nn.LSTM(d_hid + style_dim, d_hid // 2, 1, batch_first=True, bidirectional=True) + self.F0 = nn.ModuleList() + self.F0.append(AdainResBlk1d(d_hid, d_hid, style_dim, dropout_p=dropout)) + self.F0.append(AdainResBlk1d(d_hid, d_hid // 2, style_dim, upsample=True, dropout_p=dropout)) + self.F0.append(AdainResBlk1d(d_hid // 2, d_hid // 2, style_dim, dropout_p=dropout)) + + self.N = nn.ModuleList() + self.N.append(AdainResBlk1d(d_hid, d_hid, style_dim, dropout_p=dropout)) + self.N.append(AdainResBlk1d(d_hid, d_hid // 2, style_dim, upsample=True, dropout_p=dropout)) + self.N.append(AdainResBlk1d(d_hid // 2, d_hid // 2, style_dim, dropout_p=dropout)) + + self.F0_proj = nn.Conv1d(d_hid // 2, 1, 1, 1, 0) + self.N_proj = nn.Conv1d(d_hid // 2, 1, 1, 1, 0) + + + def forward(self, texts, style, text_lengths, alignment, m): + d = self.text_encoder(texts, style, text_lengths, m) + + batch_size = d.shape[0] + text_size = d.shape[1] + + # predict duration + input_lengths = text_lengths.cpu().numpy() + x = nn.utils.rnn.pack_padded_sequence( + d, input_lengths, batch_first=True, enforce_sorted=False) + + m = m.to(text_lengths.device).unsqueeze(1) + + self.lstm.flatten_parameters() + x, _ = self.lstm(x) + x, _ = nn.utils.rnn.pad_packed_sequence( + x, batch_first=True) + + x_pad = torch.zeros([x.shape[0], m.shape[-1], x.shape[-1]]) + + x_pad[:, :x.shape[1], :] = x + x = x_pad.to(x.device) + + duration = self.duration_proj(nn.functional.dropout(x, 0.5, training=self.training)) + + en = (d.transpose(-1, -2) @ alignment) + + return duration.squeeze(-1), en + + def F0Ntrain(self, x, s): + x, _ = self.shared(x.transpose(-1, -2)) + + F0 = x.transpose(-1, -2) + for block in self.F0: + F0 = block(F0, s) + F0 = self.F0_proj(F0) + + N = x.transpose(-1, -2) + for block in self.N: + N = block(N, s) + N = self.N_proj(N) + + return F0.squeeze(1), N.squeeze(1) + + def length_to_mask(self, lengths): + mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths) + mask = torch.gt(mask+1, lengths.unsqueeze(1)) + return mask + +class DurationEncoder(nn.Module): + + def __init__(self, sty_dim, d_model, nlayers, dropout=0.1): + super().__init__() + self.lstms = nn.ModuleList() + for _ in range(nlayers): + self.lstms.append(nn.LSTM(d_model + sty_dim, + d_model // 2, + num_layers=1, + batch_first=True, + bidirectional=True, + dropout=dropout)) + self.lstms.append(AdaLayerNorm(sty_dim, d_model)) + + + self.dropout = dropout + self.d_model = d_model + self.sty_dim = sty_dim + + def forward(self, x, style, text_lengths, m): + masks = m.to(text_lengths.device) + + x = x.permute(2, 0, 1) + s = style.expand(x.shape[0], x.shape[1], -1) + x = torch.cat([x, s], axis=-1) + x.masked_fill_(masks.unsqueeze(-1).transpose(0, 1), 0.0) + + x = x.transpose(0, 1) + input_lengths = text_lengths.cpu().numpy() + x = x.transpose(-1, -2) + + for block in self.lstms: + if isinstance(block, AdaLayerNorm): + x = block(x.transpose(-1, -2), style).transpose(-1, -2) + x = torch.cat([x, s.permute(1, -1, 0)], axis=1) + x.masked_fill_(masks.unsqueeze(-1).transpose(-1, -2), 0.0) + else: + x = x.transpose(-1, -2) + x = nn.utils.rnn.pack_padded_sequence( + x, input_lengths, batch_first=True, enforce_sorted=False) + block.flatten_parameters() + x, _ = block(x) + x, _ = nn.utils.rnn.pad_packed_sequence( + x, batch_first=True) + x = F.dropout(x, p=self.dropout, training=self.training) + x = x.transpose(-1, -2) + + x_pad = torch.zeros([x.shape[0], x.shape[1], m.shape[-1]]) + + x_pad[:, :, :x.shape[-1]] = x + x = x_pad.to(x.device) + + return x.transpose(-1, -2) + + def inference(self, x, style): + x = self.embedding(x.transpose(-1, -2)) * np.sqrt(self.d_model) + style = style.expand(x.shape[0], x.shape[1], -1) + x = torch.cat([x, style], axis=-1) + src = self.pos_encoder(x) + output = self.transformer_encoder(src).transpose(0, 1) + return output + + def length_to_mask(self, lengths): + mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths) + mask = torch.gt(mask+1, lengths.unsqueeze(1)) + return mask + +# https://github.com/yl4579/StyleTTS2/blob/main/utils.py +def recursive_munch(d): + if isinstance(d, dict): + return Munch((k, recursive_munch(v)) for k, v in d.items()) + elif isinstance(d, list): + return [recursive_munch(v) for v in d] + else: + return d + +def build_model(path, device): + config = Path(__file__).parent / 'config.json' + assert config.exists(), f'Config path incorrect: config.json not found at {config}' + with open(config, 'r') as r: + args = recursive_munch(json.load(r)) + assert args.decoder.type == 'istftnet', f'Unknown decoder type: {args.decoder.type}' + decoder = Decoder(dim_in=args.hidden_dim, style_dim=args.style_dim, dim_out=args.n_mels, + resblock_kernel_sizes = args.decoder.resblock_kernel_sizes, + upsample_rates = args.decoder.upsample_rates, + upsample_initial_channel=args.decoder.upsample_initial_channel, + resblock_dilation_sizes=args.decoder.resblock_dilation_sizes, + upsample_kernel_sizes=args.decoder.upsample_kernel_sizes, + gen_istft_n_fft=args.decoder.gen_istft_n_fft, gen_istft_hop_size=args.decoder.gen_istft_hop_size) + text_encoder = TextEncoder(channels=args.hidden_dim, kernel_size=5, depth=args.n_layer, n_symbols=args.n_token) + predictor = ProsodyPredictor(style_dim=args.style_dim, d_hid=args.hidden_dim, nlayers=args.n_layer, max_dur=args.max_dur, dropout=args.dropout) + bert = load_plbert() + bert_encoder = nn.Linear(bert.config.hidden_size, args.hidden_dim) + for parent in [bert, bert_encoder, predictor, decoder, text_encoder]: + for child in parent.children(): + if isinstance(child, nn.RNNBase): + child.flatten_parameters() + model = Munch( + bert=bert.to(device).eval(), + bert_encoder=bert_encoder.to(device).eval(), + predictor=predictor.to(device).eval(), + decoder=decoder.to(device).eval(), + text_encoder=text_encoder.to(device).eval(), + ) + for key, state_dict in torch.load(path, map_location='cpu', weights_only=True)['net'].items(): + assert key in model, key + try: + model[key].load_state_dict(state_dict) + except: + state_dict = {k[7:]: v for k, v in state_dict.items()} + model[key].load_state_dict(state_dict, strict=False) + return model diff --git a/backend/python/kokoro/plbert.py b/backend/python/kokoro/plbert.py new file mode 100644 index 00000000..bf1dba5a --- /dev/null +++ b/backend/python/kokoro/plbert.py @@ -0,0 +1,16 @@ +# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/plbert.py +# https://github.com/yl4579/StyleTTS2/blob/main/Utils/PLBERT/util.py +from transformers import AlbertConfig, AlbertModel + +class CustomAlbert(AlbertModel): + def forward(self, *args, **kwargs): + # Call the original forward method + outputs = super().forward(*args, **kwargs) + # Only return the last_hidden_state + return outputs.last_hidden_state + +def load_plbert(): + plbert_config = {'vocab_size': 178, 'hidden_size': 768, 'num_attention_heads': 12, 'intermediate_size': 2048, 'max_position_embeddings': 512, 'num_hidden_layers': 12, 'dropout': 0.1} + albert_base_configuration = AlbertConfig(**plbert_config) + bert = CustomAlbert(albert_base_configuration) + return bert diff --git a/backend/python/kokoro/protogen.sh b/backend/python/kokoro/protogen.sh new file mode 100644 index 00000000..32f39fbb --- /dev/null +++ b/backend/python/kokoro/protogen.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e + +source $(dirname $0)/../common/libbackend.sh + +python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto \ No newline at end of file diff --git a/backend/python/kokoro/requirements-cpu.txt b/backend/python/kokoro/requirements-cpu.txt new file mode 100644 index 00000000..b4f1261f --- /dev/null +++ b/backend/python/kokoro/requirements-cpu.txt @@ -0,0 +1,2 @@ +torch==2.4.1 +transformers \ No newline at end of file diff --git a/backend/python/mamba/requirements-cublas11.txt b/backend/python/kokoro/requirements-cublas11.txt similarity index 64% rename from backend/python/mamba/requirements-cublas11.txt rename to backend/python/kokoro/requirements-cublas11.txt index 7048a14f..ed0d4df5 100644 --- a/backend/python/mamba/requirements-cublas11.txt +++ b/backend/python/kokoro/requirements-cublas11.txt @@ -1,3 +1,3 @@ --extra-index-url https://download.pytorch.org/whl/cu118 -torch +torch==2.4.1+cu118 transformers \ No newline at end of file diff --git a/backend/python/kokoro/requirements-cublas12.txt b/backend/python/kokoro/requirements-cublas12.txt new file mode 100644 index 00000000..b4f1261f --- /dev/null +++ b/backend/python/kokoro/requirements-cublas12.txt @@ -0,0 +1,2 @@ +torch==2.4.1 +transformers \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-hipblas.txt b/backend/python/kokoro/requirements-hipblas.txt similarity index 64% rename from backend/python/transformers-musicgen/requirements-hipblas.txt rename to backend/python/kokoro/requirements-hipblas.txt index 00f0a946..ec8d0306 100644 --- a/backend/python/transformers-musicgen/requirements-hipblas.txt +++ b/backend/python/kokoro/requirements-hipblas.txt @@ -1,4 +1,3 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 -transformers -accelerate -torch \ No newline at end of file +torch==2.4.1+rocm6.0 +transformers \ No newline at end of file diff --git a/backend/python/kokoro/requirements-intel.txt b/backend/python/kokoro/requirements-intel.txt new file mode 100644 index 00000000..b16448d3 --- /dev/null +++ b/backend/python/kokoro/requirements-intel.txt @@ -0,0 +1,5 @@ +--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +intel-extension-for-pytorch==2.3.110+xpu +torch==2.3.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu +transformers \ No newline at end of file diff --git a/backend/python/kokoro/requirements.txt b/backend/python/kokoro/requirements.txt new file mode 100644 index 00000000..06e60389 --- /dev/null +++ b/backend/python/kokoro/requirements.txt @@ -0,0 +1,7 @@ +grpcio==1.70.0 +protobuf +phonemizer +scipy +munch +setuptools +soundfile \ No newline at end of file diff --git a/backend/python/parler-tts/run.sh b/backend/python/kokoro/run.sh similarity index 100% rename from backend/python/parler-tts/run.sh rename to backend/python/kokoro/run.sh diff --git a/backend/python/parler-tts/test.sh b/backend/python/kokoro/test.sh similarity index 100% rename from backend/python/parler-tts/test.sh rename to backend/python/kokoro/test.sh diff --git a/backend/python/mamba/Makefile b/backend/python/mamba/Makefile deleted file mode 100644 index 52b1c53a..00000000 --- a/backend/python/mamba/Makefile +++ /dev/null @@ -1,29 +0,0 @@ -.PHONY: mamba -mamba: protogen - bash install.sh - -.PHONY: run -run: protogen - @echo "Running mamba..." - bash run.sh - @echo "mamba run." - -.PHONY: test -test: protogen - @echo "Testing mamba..." - bash test.sh - @echo "mamba tested." - -.PHONY: protogen -protogen: backend_pb2_grpc.py backend_pb2.py - -.PHONY: protogen-clean -protogen-clean: - $(RM) backend_pb2_grpc.py backend_pb2.py - -backend_pb2_grpc.py backend_pb2.py: - python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto - -.PHONY: clean -clean: protogen-clean - $(RM) -r venv __pycache__ \ No newline at end of file diff --git a/backend/python/mamba/README.md b/backend/python/mamba/README.md deleted file mode 100644 index d6ead917..00000000 --- a/backend/python/mamba/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Creating a separate environment for the mamba project - -``` -make mamba -``` \ No newline at end of file diff --git a/backend/python/mamba/backend.py b/backend/python/mamba/backend.py deleted file mode 100644 index 3c15fea7..00000000 --- a/backend/python/mamba/backend.py +++ /dev/null @@ -1,179 +0,0 @@ -#!/usr/bin/env python3 -from concurrent import futures -import time -import argparse -import signal -import sys -import os - -import backend_pb2 -import backend_pb2_grpc - -import grpc - -import torch -from transformers import AutoTokenizer, AutoModelForCausalLM -from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel - -_ONE_DAY_IN_SECONDS = 60 * 60 * 24 - -# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 -MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) -MAMBA_CHAT= os.environ.get('MAMBA_CHAT', '1') == '1' - -# Implement the BackendServicer class with the service methods -class BackendServicer(backend_pb2_grpc.BackendServicer): - """ - A gRPC servicer that implements the Backend service defined in backend.proto. - """ - def generate(self,prompt, max_new_tokens): - """ - Generates text based on the given prompt and maximum number of new tokens. - - Args: - prompt (str): The prompt to generate text from. - max_new_tokens (int): The maximum number of new tokens to generate. - - Returns: - str: The generated text. - """ - self.generator.end_beam_search() - - # Tokenizing the input - ids = self.generator.tokenizer.encode(prompt) - - self.generator.gen_begin_reuse(ids) - initial_len = self.generator.sequence[0].shape[0] - has_leading_space = False - decoded_text = '' - for i in range(max_new_tokens): - token = self.generator.gen_single_token() - if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'): - has_leading_space = True - - decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:]) - if has_leading_space: - decoded_text = ' ' + decoded_text - - if token.item() == self.generator.tokenizer.eos_token_id: - break - return decoded_text - - def Health(self, request, context): - """ - Returns a health check message. - - Args: - request: The health check request. - context: The gRPC context. - - Returns: - backend_pb2.Reply: The health check reply. - """ - return backend_pb2.Reply(message=bytes("OK", 'utf-8')) - - def LoadModel(self, request, context): - """ - Loads a language model. - - Args: - request: The load model request. - context: The gRPC context. - - Returns: - backend_pb2.Result: The load model result. - """ - try: - tokenizerModel = request.Tokenizer - if tokenizerModel == "": - tokenizerModel = request.Model - - tokenizer = AutoTokenizer.from_pretrained(tokenizerModel) - if MAMBA_CHAT: - tokenizer.eos_token = "<|endoftext|>" - tokenizer.pad_token = tokenizer.eos_token - self.tokenizer = tokenizer - self.model = MambaLMHeadModel.from_pretrained(request.Model, device="cuda", dtype=torch.float16) - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - return backend_pb2.Result(message="Model loaded successfully", success=True) - - def Predict(self, request, context): - """ - Generates text based on the given prompt and sampling parameters. - - Args: - request: The predict request. - context: The gRPC context. - - Returns: - backend_pb2.Result: The predict result. - """ - if request.TopP == 0: - request.TopP = 0.9 - - max_tokens = request.Tokens - - if request.Tokens == 0: - max_tokens = 2000 - - # encoded_input = self.tokenizer(request.Prompt) - tokens = self.tokenizer(request.Prompt, return_tensors="pt") - input_ids = tokens.input_ids.to(device="cuda") - out = self.model.generate(input_ids=input_ids, max_length=max_tokens, temperature=request.Temperature, - top_p=request.TopP, eos_token_id=self.tokenizer.eos_token_id) - - decoded = self.tokenizer.batch_decode(out) - - generated_text = decoded[0] - - # Remove prompt from response if present - if request.Prompt in generated_text: - generated_text = generated_text.replace(request.Prompt, "") - - return backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8')) - - def PredictStream(self, request, context): - """ - Generates text based on the given prompt and sampling parameters, and streams the results. - - Args: - request: The predict stream request. - context: The gRPC context. - - Returns: - backend_pb2.Result: The predict stream result. - """ - yield self.Predict(request, context) - -def serve(address): - server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) - backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) - server.add_insecure_port(address) - server.start() - print("Server started. Listening on: " + address, file=sys.stderr) - - # Define the signal handler function - def signal_handler(sig, frame): - print("Received termination signal. Shutting down...") - server.stop(0) - sys.exit(0) - - # Set the signal handlers for SIGINT and SIGTERM - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) - - try: - while True: - time.sleep(_ONE_DAY_IN_SECONDS) - except KeyboardInterrupt: - server.stop(0) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Run the gRPC server.") - parser.add_argument( - "--addr", default="localhost:50051", help="The address to bind the server to." - ) - args = parser.parse_args() - - serve(args.addr) diff --git a/backend/python/mamba/install.sh b/backend/python/mamba/install.sh deleted file mode 100755 index db18eefc..00000000 --- a/backend/python/mamba/install.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash -set -e - -LIMIT_TARGETS="cublas" -EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation" - -source $(dirname $0)/../common/libbackend.sh - -installRequirements \ No newline at end of file diff --git a/backend/python/mamba/requirements-after.txt b/backend/python/mamba/requirements-after.txt deleted file mode 100644 index ea6890eb..00000000 --- a/backend/python/mamba/requirements-after.txt +++ /dev/null @@ -1,2 +0,0 @@ -causal-conv1d==1.4.0 -mamba-ssm==2.2.2 \ No newline at end of file diff --git a/backend/python/mamba/requirements-cpu.txt b/backend/python/mamba/requirements-cpu.txt deleted file mode 100644 index 39dab0fd..00000000 --- a/backend/python/mamba/requirements-cpu.txt +++ /dev/null @@ -1,2 +0,0 @@ -torch -transformers \ No newline at end of file diff --git a/backend/python/mamba/requirements-cublas12.txt b/backend/python/mamba/requirements-cublas12.txt deleted file mode 100644 index 39dab0fd..00000000 --- a/backend/python/mamba/requirements-cublas12.txt +++ /dev/null @@ -1,2 +0,0 @@ -torch -transformers \ No newline at end of file diff --git a/backend/python/mamba/requirements-install.txt b/backend/python/mamba/requirements-install.txt deleted file mode 100644 index 69d263f0..00000000 --- a/backend/python/mamba/requirements-install.txt +++ /dev/null @@ -1,6 +0,0 @@ -# mabma does not specify it's build dependencies per PEP517, so we need to disable build isolation -# this also means that we need to install the basic build dependencies into the venv ourselves -# https://github.com/Dao-AILab/causal-conv1d/issues/24 -packaging -setuptools -wheel \ No newline at end of file diff --git a/backend/python/mamba/requirements.txt b/backend/python/mamba/requirements.txt deleted file mode 100644 index 8e1b0195..00000000 --- a/backend/python/mamba/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -grpcio==1.66.1 -protobuf -certifi \ No newline at end of file diff --git a/backend/python/mamba/run.sh b/backend/python/mamba/run.sh deleted file mode 100755 index 1afc3984..00000000 --- a/backend/python/mamba/run.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -LIMIT_TARGETS="cublas" - -source $(dirname $0)/../common/libbackend.sh - -startBackend $@ \ No newline at end of file diff --git a/backend/python/mamba/test.py b/backend/python/mamba/test.py deleted file mode 100644 index 83fb2651..00000000 --- a/backend/python/mamba/test.py +++ /dev/null @@ -1,76 +0,0 @@ -import unittest -import subprocess -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - -import unittest -import subprocess -import time -import grpc -import backend_pb2_grpc -import backend_pb2 - -class TestBackendServicer(unittest.TestCase): - """ - TestBackendServicer is the class that tests the gRPC service. - - This class contains methods to test the startup and shutdown of the gRPC service. - """ - def setUp(self): - self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"]) - time.sleep(10) - - def tearDown(self) -> None: - self.service.terminate() - self.service.wait() - - def test_server_startup(self): - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.Health(backend_pb2.HealthMessage()) - self.assertEqual(response.message, b'OK') - except Exception as err: - print(err) - self.fail("Server failed to start") - finally: - self.tearDown() - def test_load_model(self): - """ - This method tests if the model is loaded successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m")) - self.assertTrue(response.success) - self.assertEqual(response.message, "Model loaded successfully") - except Exception as err: - print(err) - self.fail("LoadModel service failed") - finally: - self.tearDown() - - def test_text(self): - """ - This method tests if the embeddings are generated successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m")) - self.assertTrue(response.success) - req = backend_pb2.PredictOptions(Prompt="The capital of France is") - resp = stub.Predict(req) - self.assertIsNotNone(resp.message) - except Exception as err: - print(err) - self.fail("text service failed") - finally: - self.tearDown() \ No newline at end of file diff --git a/backend/python/openvoice/backend.py b/backend/python/openvoice/backend.py deleted file mode 100755 index 7dde08cf..00000000 --- a/backend/python/openvoice/backend.py +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env python3 -""" -Extra gRPC server for OpenVoice models. -""" -from concurrent import futures - -import argparse -import signal -import sys -import os -import torch -from openvoice import se_extractor -from openvoice.api import ToneColorConverter -from melo.api import TTS - -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - - -_ONE_DAY_IN_SECONDS = 60 * 60 * 24 - -# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 -MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) - -# Implement the BackendServicer class with the service methods -class BackendServicer(backend_pb2_grpc.BackendServicer): - """ - A gRPC servicer for the backend service. - - This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding. - """ - def Health(self, request, context): - """ - A gRPC method that returns the health status of the backend service. - - Args: - request: A HealthRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - A Reply object that contains the health status of the backend service. - """ - return backend_pb2.Reply(message=bytes("OK", 'utf-8')) - - def LoadModel(self, request, context): - """ - A gRPC method that loads a model into memory. - - Args: - request: A LoadModelRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - A Result object that contains the result of the LoadModel operation. - """ - model_name = request.Model - try: - - self.clonedVoice = False - # Assume directory from request.ModelFile. - # Only if request.LoraAdapter it's not an absolute path - if request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath): - # get base path of modelFile - modelFileBase = os.path.dirname(request.ModelFile) - request.AudioPath = os.path.join(modelFileBase, request.AudioPath) - if request.AudioPath != "": - self.clonedVoice = True - - self.modelpath = request.ModelFile - self.speaker = request.Type - self.ClonedVoicePath = request.AudioPath - - ckpt_converter = request.Model+'/converter' - device = "cuda:0" if torch.cuda.is_available() else "cpu" - self.device = device - self.tone_color_converter = None - if self.clonedVoice: - self.tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device) - self.tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth') - - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - - return backend_pb2.Result(message="Model loaded successfully", success=True) - - def TTS(self, request, context): - model_name = request.model - if model_name == "": - return backend_pb2.Result(success=False, message="request.model is required") - try: - # Speed is adjustable - speed = 1.0 - voice = "EN" - if request.voice: - voice = request.voice - model = TTS(language=voice, device=self.device) - speaker_ids = model.hps.data.spk2id - speaker_key = self.speaker - modelpath = self.modelpath - for s in speaker_ids.keys(): - print(f"Speaker: {s} - ID: {speaker_ids[s]}") - speaker_id = speaker_ids[speaker_key] - speaker_key = speaker_key.lower().replace('_', '-') - source_se = torch.load(f'{modelpath}/base_speakers/ses/{speaker_key}.pth', map_location=self.device) - model.tts_to_file(request.text, speaker_id, request.dst, speed=speed) - if self.clonedVoice: - reference_speaker = self.ClonedVoicePath - target_se, audio_name = se_extractor.get_se(reference_speaker, self.tone_color_converter, vad=False) - # Run the tone color converter - encode_message = "@MyShell" - self.tone_color_converter.convert( - audio_src_path=request.dst, - src_se=source_se, - tgt_se=target_se, - output_path=request.dst, - message=encode_message) - - print("[OpenVoice] TTS generated!", file=sys.stderr) - print("[OpenVoice] TTS saved to", request.dst, file=sys.stderr) - print(request, file=sys.stderr) - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - return backend_pb2.Result(success=True) - -def serve(address): - server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) - backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) - server.add_insecure_port(address) - server.start() - print("[OpenVoice] Server started. Listening on: " + address, file=sys.stderr) - - # Define the signal handler function - def signal_handler(sig, frame): - print("[OpenVoice] Received termination signal. Shutting down...") - server.stop(0) - sys.exit(0) - - # Set the signal handlers for SIGINT and SIGTERM - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) - - try: - while True: - time.sleep(_ONE_DAY_IN_SECONDS) - except KeyboardInterrupt: - server.stop(0) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Run the gRPC server.") - parser.add_argument( - "--addr", default="localhost:50051", help="The address to bind the server to." - ) - args = parser.parse_args() - print(f"[OpenVoice] startup: {args}", file=sys.stderr) - serve(args.addr) diff --git a/backend/python/openvoice/install.sh b/backend/python/openvoice/install.sh deleted file mode 100755 index 24db146b..00000000 --- a/backend/python/openvoice/install.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -set -e - -source $(dirname $0)/../common/libbackend.sh - -# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links. -# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match. -# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index -# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index -if [ "x${BUILD_PROFILE}" == "xintel" ]; then - EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" -fi - -installRequirements - -python -m unidic download diff --git a/backend/python/openvoice/requirements-cpu.txt b/backend/python/openvoice/requirements-cpu.txt deleted file mode 100644 index 08ed5eeb..00000000 --- a/backend/python/openvoice/requirements-cpu.txt +++ /dev/null @@ -1 +0,0 @@ -torch \ No newline at end of file diff --git a/backend/python/openvoice/requirements-cublas11.txt b/backend/python/openvoice/requirements-cublas11.txt deleted file mode 100644 index 6461b696..00000000 --- a/backend/python/openvoice/requirements-cublas11.txt +++ /dev/null @@ -1,2 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch diff --git a/backend/python/openvoice/requirements-cublas12.txt b/backend/python/openvoice/requirements-cublas12.txt deleted file mode 100644 index 12c6d5d5..00000000 --- a/backend/python/openvoice/requirements-cublas12.txt +++ /dev/null @@ -1 +0,0 @@ -torch diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt deleted file mode 100644 index cea7de0b..00000000 --- a/backend/python/openvoice/requirements-intel.txt +++ /dev/null @@ -1,23 +0,0 @@ ---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch -optimum[openvino] -grpcio==1.66.1 -protobuf -librosa==0.9.1 -faster-whisper==1.0.3 -pydub==0.25.1 -wavmark==0.0.3 -numpy==1.26.4 -eng_to_ipa==0.0.2 -inflect==7.0.0 -unidecode==1.3.7 -whisper-timestamped==1.15.4 -openai -python-dotenv -pypinyin==0.53.0 -cn2an==0.5.22 -jieba==0.42.1 -gradio==4.38.1 -langid==1.1.6 -git+https://github.com/myshell-ai/MeloTTS.git diff --git a/backend/python/openvoice/requirements.txt b/backend/python/openvoice/requirements.txt deleted file mode 100644 index b38805be..00000000 --- a/backend/python/openvoice/requirements.txt +++ /dev/null @@ -1,20 +0,0 @@ -grpcio==1.66.1 -protobuf -librosa -faster-whisper -pydub==0.25.1 -wavmark==0.0.3 -numpy -eng_to_ipa==0.0.2 -inflect -unidecode -whisper-timestamped -openai -python-dotenv -pypinyin -cn2an==0.5.22 -jieba==0.42.1 -gradio -langid==1.1.6 -git+https://github.com/myshell-ai/MeloTTS.git -git+https://github.com/myshell-ai/OpenVoice.git diff --git a/backend/python/openvoice/test.py b/backend/python/openvoice/test.py deleted file mode 100644 index 262917b3..00000000 --- a/backend/python/openvoice/test.py +++ /dev/null @@ -1,82 +0,0 @@ -""" -A test script to test the gRPC service -""" -import unittest -import subprocess -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - - -class TestBackendServicer(unittest.TestCase): - """ - TestBackendServicer is the class that tests the gRPC service - """ - def setUp(self): - """ - This method sets up the gRPC service by starting the server - """ - self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) - time.sleep(10) - - def tearDown(self) -> None: - """ - This method tears down the gRPC service by terminating the server - """ - self.service.terminate() - self.service.wait() - - def test_server_startup(self): - """ - This method tests if the server starts up successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.Health(backend_pb2.HealthMessage()) - self.assertEqual(response.message, b'OK') - except Exception as err: - print(err) - self.fail("Server failed to start") - finally: - self.tearDown() - - def test_load_model(self): - """ - This method tests if the model is loaded successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="checkpoints_v2", - Type="en-us")) - self.assertTrue(response.success) - self.assertEqual(response.message, "Model loaded successfully") - except Exception as err: - print(err) - self.fail("LoadModel service failed") - finally: - self.tearDown() - - def test_tts(self): - """ - This method tests if the embeddings are generated successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="dingzhen")) - self.assertTrue(response.success) - tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story", voice="EN") - tts_response = stub.TTS(tts_request) - self.assertIsNotNone(tts_response) - except Exception as err: - print(err) - self.fail("TTS service failed") - finally: - self.tearDown() \ No newline at end of file diff --git a/backend/python/openvoice/test.sh b/backend/python/openvoice/test.sh deleted file mode 100755 index 6c0a840f..00000000 --- a/backend/python/openvoice/test.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash -set -e - -source $(dirname $0)/../common/libbackend.sh - -# Download checkpoints if not present -if [ ! -d "checkpoints_v2" ]; then - wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip - unzip checkpoints_v2.zip -fi - -runUnittests diff --git a/backend/python/parler-tts/Makefile b/backend/python/parler-tts/Makefile deleted file mode 100644 index c25b2af7..00000000 --- a/backend/python/parler-tts/Makefile +++ /dev/null @@ -1,43 +0,0 @@ -export CONDA_ENV_PATH = "parler.yml" -SKIP_CONDA?=0 -ifeq ($(BUILD_TYPE), cublas) -export CONDA_ENV_PATH = "parler-nvidia.yml" -endif - -# Intel GPU are supposed to have dependencies installed in the main python -# environment, so we skip conda installation for SYCL builds. -# https://github.com/intel/intel-extension-for-pytorch/issues/538 -ifneq (,$(findstring sycl,$(BUILD_TYPE))) -export SKIP_CONDA=1 -endif - -.PHONY: parler-tts -parler-tts: protogen - @echo "Installing $(CONDA_ENV_PATH)..." - bash install.sh $(CONDA_ENV_PATH) - -.PHONY: run -run: protogen - @echo "Running transformers..." - bash run.sh - @echo "transformers run." - -.PHONY: test -test: protogen - @echo "Testing transformers..." - bash test.sh - @echo "transformers tested." - -.PHONY: protogen -protogen: backend_pb2_grpc.py backend_pb2.py - -.PHONY: protogen-clean -protogen-clean: - $(RM) backend_pb2_grpc.py backend_pb2.py - -backend_pb2_grpc.py backend_pb2.py: - python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto - -.PHONY: clean -clean: protogen-clean - $(RM) -r venv __pycache__ \ No newline at end of file diff --git a/backend/python/parler-tts/install.sh b/backend/python/parler-tts/install.sh deleted file mode 100755 index 002472a2..00000000 --- a/backend/python/parler-tts/install.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -set -e - -source $(dirname $0)/../common/libbackend.sh - -# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links. -# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match. -# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index -# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index -if [ "x${BUILD_PROFILE}" == "xintel" ]; then - EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" -fi - -installRequirements - -# https://github.com/descriptinc/audiotools/issues/101 -# incompatible protobuf versions. -PYDIR=$(ls ${MY_DIR}/venv/lib) -curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/builder.py diff --git a/backend/python/parler-tts/requirements-after.txt b/backend/python/parler-tts/requirements-after.txt deleted file mode 100644 index 09811bf4..00000000 --- a/backend/python/parler-tts/requirements-after.txt +++ /dev/null @@ -1,3 +0,0 @@ -git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17 -llvmlite==0.43.0 -numba==0.60.0 diff --git a/backend/python/parler-tts/requirements-cpu.txt b/backend/python/parler-tts/requirements-cpu.txt deleted file mode 100644 index bbcdc8cd..00000000 --- a/backend/python/parler-tts/requirements-cpu.txt +++ /dev/null @@ -1,3 +0,0 @@ -transformers -accelerate -torch \ No newline at end of file diff --git a/backend/python/parler-tts/requirements-cublas11.txt b/backend/python/parler-tts/requirements-cublas11.txt deleted file mode 100644 index 71a6a93f..00000000 --- a/backend/python/parler-tts/requirements-cublas11.txt +++ /dev/null @@ -1,5 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch -torchaudio -transformers -accelerate \ No newline at end of file diff --git a/backend/python/parler-tts/requirements-cublas12.txt b/backend/python/parler-tts/requirements-cublas12.txt deleted file mode 100644 index 0fa27074..00000000 --- a/backend/python/parler-tts/requirements-cublas12.txt +++ /dev/null @@ -1,4 +0,0 @@ -torch -torchaudio -transformers -accelerate \ No newline at end of file diff --git a/backend/python/parler-tts/requirements-hipblas.txt b/backend/python/parler-tts/requirements-hipblas.txt deleted file mode 100644 index b8758537..00000000 --- a/backend/python/parler-tts/requirements-hipblas.txt +++ /dev/null @@ -1,5 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch==2.3.0+rocm6.0 -torchaudio==2.3.0+rocm6.0 -transformers -accelerate diff --git a/backend/python/parler-tts/requirements-intel.txt b/backend/python/parler-tts/requirements-intel.txt deleted file mode 100644 index c0e4dcaa..00000000 --- a/backend/python/parler-tts/requirements-intel.txt +++ /dev/null @@ -1,8 +0,0 @@ ---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch -torchaudio -optimum[openvino] -setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 -transformers -accelerate \ No newline at end of file diff --git a/backend/python/parler-tts/requirements.txt b/backend/python/parler-tts/requirements.txt deleted file mode 100644 index 0da3da13..00000000 --- a/backend/python/parler-tts/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -grpcio==1.66.1 -protobuf -certifi -llvmlite==0.43.0 \ No newline at end of file diff --git a/backend/python/parler-tts/test.py b/backend/python/parler-tts/test.py deleted file mode 100644 index 639d43a9..00000000 --- a/backend/python/parler-tts/test.py +++ /dev/null @@ -1,81 +0,0 @@ -""" -A test script to test the gRPC service -""" -import unittest -import subprocess -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - - -class TestBackendServicer(unittest.TestCase): - """ - TestBackendServicer is the class that tests the gRPC service - """ - def setUp(self): - """ - This method sets up the gRPC service by starting the server - """ - self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) - time.sleep(10) - - def tearDown(self) -> None: - """ - This method tears down the gRPC service by terminating the server - """ - self.service.terminate() - self.service.wait() - - def test_server_startup(self): - """ - This method tests if the server starts up successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.Health(backend_pb2.HealthMessage()) - self.assertEqual(response.message, b'OK') - except Exception as err: - print(err) - self.fail("Server failed to start") - finally: - self.tearDown() - - def test_load_model(self): - """ - This method tests if the model is loaded successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1")) - self.assertTrue(response.success) - self.assertEqual(response.message, "Model loaded successfully") - except Exception as err: - print(err) - self.fail("LoadModel service failed") - finally: - self.tearDown() - - def test_tts(self): - """ - This method tests if the embeddings are generated successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1")) - self.assertTrue(response.success) - tts_request = backend_pb2.TTSRequest(text="Hey, how are you doing today?") - tts_response = stub.TTS(tts_request) - self.assertIsNotNone(tts_response) - except Exception as err: - print(err) - self.fail("TTS service failed") - finally: - self.tearDown() \ No newline at end of file diff --git a/backend/python/rerankers/requirements-cpu.txt b/backend/python/rerankers/requirements-cpu.txt index 25a1d8ab..e27a4726 100644 --- a/backend/python/rerankers/requirements-cpu.txt +++ b/backend/python/rerankers/requirements-cpu.txt @@ -1,4 +1,4 @@ transformers accelerate -torch +torch==2.4.1 rerankers[transformers] \ No newline at end of file diff --git a/backend/python/rerankers/requirements-cublas11.txt b/backend/python/rerankers/requirements-cublas11.txt index 06c4b2cf..fef296fe 100644 --- a/backend/python/rerankers/requirements-cublas11.txt +++ b/backend/python/rerankers/requirements-cublas11.txt @@ -1,5 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/cu118 transformers accelerate -torch +torch==2.4.1+cu118 rerankers[transformers] \ No newline at end of file diff --git a/backend/python/rerankers/requirements-cublas12.txt b/backend/python/rerankers/requirements-cublas12.txt index 25a1d8ab..e27a4726 100644 --- a/backend/python/rerankers/requirements-cublas12.txt +++ b/backend/python/rerankers/requirements-cublas12.txt @@ -1,4 +1,4 @@ transformers accelerate -torch +torch==2.4.1 rerankers[transformers] \ No newline at end of file diff --git a/backend/python/rerankers/requirements-hipblas.txt b/backend/python/rerankers/requirements-hipblas.txt index 961d150c..b1c8baed 100644 --- a/backend/python/rerankers/requirements-hipblas.txt +++ b/backend/python/rerankers/requirements-hipblas.txt @@ -1,5 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 transformers accelerate -torch +torch==2.4.1+rocm6.0 rerankers[transformers] \ No newline at end of file diff --git a/backend/python/rerankers/requirements-intel.txt b/backend/python/rerankers/requirements-intel.txt index e6bb4cc7..c071e8fb 100644 --- a/backend/python/rerankers/requirements-intel.txt +++ b/backend/python/rerankers/requirements-intel.txt @@ -1,8 +1,9 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch +intel-extension-for-pytorch==2.3.110+xpu transformers accelerate -torch +torch==2.3.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu rerankers[transformers] optimum[openvino] -setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file +setuptools \ No newline at end of file diff --git a/backend/python/rerankers/requirements.txt b/backend/python/rerankers/requirements.txt index 8e1b0195..566fdae0 100644 --- a/backend/python/rerankers/requirements.txt +++ b/backend/python/rerankers/requirements.txt @@ -1,3 +1,3 @@ -grpcio==1.66.1 +grpcio==1.70.0 protobuf certifi \ No newline at end of file diff --git a/backend/python/sentencetransformers/Makefile b/backend/python/sentencetransformers/Makefile deleted file mode 100644 index 8b18e943..00000000 --- a/backend/python/sentencetransformers/Makefile +++ /dev/null @@ -1,31 +0,0 @@ -.PHONY: sentencetransformers -sentencetransformers: protogen - bash ./install.sh - - -.PHONY: run -run: protogen - @echo "Running sentencetransformers..." - bash run.sh - @echo "sentencetransformers run." - -# It is not working well by using command line. It only6 works with IDE like VSCode. -.PHONY: test -test: protogen - @echo "Testing sentencetransformers..." - bash test.sh - @echo "sentencetransformers tested." - -.PHONY: protogen -protogen: backend_pb2_grpc.py backend_pb2.py - -.PHONY: protogen-clean -protogen-clean: - $(RM) backend_pb2_grpc.py backend_pb2.py - -backend_pb2_grpc.py backend_pb2.py: - python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto - -.PHONY: clean -clean: protogen-clean - rm -rf venv __pycache__ \ No newline at end of file diff --git a/backend/python/sentencetransformers/README.md b/backend/python/sentencetransformers/README.md deleted file mode 100644 index 829cf0d1..00000000 --- a/backend/python/sentencetransformers/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Creating a separate environment for the sentencetransformers project - -``` -make sentencetransformers -``` \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-cpu.txt b/backend/python/sentencetransformers/requirements-cpu.txt deleted file mode 100644 index f88de1e4..00000000 --- a/backend/python/sentencetransformers/requirements-cpu.txt +++ /dev/null @@ -1,6 +0,0 @@ -torch -accelerate -transformers -bitsandbytes -sentence-transformers==3.1.0 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-cublas11.txt b/backend/python/sentencetransformers/requirements-cublas11.txt deleted file mode 100644 index 57caf1a1..00000000 --- a/backend/python/sentencetransformers/requirements-cublas11.txt +++ /dev/null @@ -1,5 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch -accelerate -sentence-transformers==3.1.0 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-cublas12.txt b/backend/python/sentencetransformers/requirements-cublas12.txt deleted file mode 100644 index 834fa6a4..00000000 --- a/backend/python/sentencetransformers/requirements-cublas12.txt +++ /dev/null @@ -1,4 +0,0 @@ -torch -accelerate -sentence-transformers==3.1.0 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-hipblas.txt b/backend/python/sentencetransformers/requirements-hipblas.txt deleted file mode 100644 index 98a0a41b..00000000 --- a/backend/python/sentencetransformers/requirements-hipblas.txt +++ /dev/null @@ -1,5 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch -accelerate -sentence-transformers==3.1.0 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-intel.txt b/backend/python/sentencetransformers/requirements-intel.txt deleted file mode 100644 index 5948910d..00000000 --- a/backend/python/sentencetransformers/requirements-intel.txt +++ /dev/null @@ -1,8 +0,0 @@ ---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch -optimum[openvino] -setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 -accelerate -sentence-transformers==3.1.0 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements.txt b/backend/python/sentencetransformers/requirements.txt deleted file mode 100644 index b9cb6061..00000000 --- a/backend/python/sentencetransformers/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -grpcio==1.66.1 -protobuf -certifi -datasets -einops \ No newline at end of file diff --git a/backend/python/sentencetransformers/run.sh b/backend/python/sentencetransformers/run.sh deleted file mode 100755 index 375c07e5..00000000 --- a/backend/python/sentencetransformers/run.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -source $(dirname $0)/../common/libbackend.sh - -startBackend $@ \ No newline at end of file diff --git a/backend/python/sentencetransformers/test.py b/backend/python/sentencetransformers/test.py deleted file mode 100644 index 9df52b14..00000000 --- a/backend/python/sentencetransformers/test.py +++ /dev/null @@ -1,81 +0,0 @@ -""" -A test script to test the gRPC service -""" -import unittest -import subprocess -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - - -class TestBackendServicer(unittest.TestCase): - """ - TestBackendServicer is the class that tests the gRPC service - """ - def setUp(self): - """ - This method sets up the gRPC service by starting the server - """ - self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) - time.sleep(10) - - def tearDown(self) -> None: - """ - This method tears down the gRPC service by terminating the server - """ - self.service.kill() - self.service.wait() - - def test_server_startup(self): - """ - This method tests if the server starts up successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.Health(backend_pb2.HealthMessage()) - self.assertEqual(response.message, b'OK') - except Exception as err: - print(err) - self.fail("Server failed to start") - finally: - self.tearDown() - - def test_load_model(self): - """ - This method tests if the model is loaded successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens")) - self.assertTrue(response.success) - self.assertEqual(response.message, "Model loaded successfully") - except Exception as err: - print(err) - self.fail("LoadModel service failed") - finally: - self.tearDown() - - def test_embedding(self): - """ - This method tests if the embeddings are generated successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens")) - self.assertTrue(response.success) - embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.") - embedding_response = stub.Embedding(embedding_request) - self.assertIsNotNone(embedding_response.embeddings) - except Exception as err: - print(err) - self.fail("Embedding service failed") - finally: - self.tearDown() \ No newline at end of file diff --git a/backend/python/sentencetransformers/test.sh b/backend/python/sentencetransformers/test.sh deleted file mode 100755 index 6940b066..00000000 --- a/backend/python/sentencetransformers/test.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e - -source $(dirname $0)/../common/libbackend.sh - -runUnittests diff --git a/backend/python/transformers-musicgen/Makefile b/backend/python/transformers-musicgen/Makefile deleted file mode 100644 index 06badf6d..00000000 --- a/backend/python/transformers-musicgen/Makefile +++ /dev/null @@ -1,29 +0,0 @@ -.PHONY: transformers-musicgen -transformers-musicgen: protogen - bash install.sh - -.PHONY: run -run: protogen - @echo "Running transformers..." - bash run.sh - @echo "transformers run." - -.PHONY: test -test: protogen - @echo "Testing transformers..." - bash test.sh - @echo "transformers tested." - -.PHONY: protogen -protogen: backend_pb2_grpc.py backend_pb2.py - -.PHONY: protogen-clean -protogen-clean: - $(RM) backend_pb2_grpc.py backend_pb2.py - -backend_pb2_grpc.py backend_pb2.py: - python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto - -.PHONY: clean -clean: protogen-clean - rm -rf venv __pycache__ \ No newline at end of file diff --git a/backend/python/transformers-musicgen/README.md b/backend/python/transformers-musicgen/README.md deleted file mode 100644 index bf7fef84..00000000 --- a/backend/python/transformers-musicgen/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Creating a separate environment for the transformers project - -``` -make transformers-musicgen -``` \ No newline at end of file diff --git a/backend/python/transformers-musicgen/backend.py b/backend/python/transformers-musicgen/backend.py deleted file mode 100644 index b9f1facf..00000000 --- a/backend/python/transformers-musicgen/backend.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env python3 -""" -Extra gRPC server for MusicgenForConditionalGeneration models. -""" -from concurrent import futures - -import argparse -import signal -import sys -import os - -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - -from scipy.io import wavfile -from transformers import AutoProcessor, MusicgenForConditionalGeneration - -_ONE_DAY_IN_SECONDS = 60 * 60 * 24 - -# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 -MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) - -# Implement the BackendServicer class with the service methods -class BackendServicer(backend_pb2_grpc.BackendServicer): - """ - A gRPC servicer for the backend service. - - This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding. - """ - def Health(self, request, context): - """ - A gRPC method that returns the health status of the backend service. - - Args: - request: A HealthRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - A Reply object that contains the health status of the backend service. - """ - return backend_pb2.Reply(message=bytes("OK", 'utf-8')) - - def LoadModel(self, request, context): - """ - A gRPC method that loads a model into memory. - - Args: - request: A LoadModelRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - A Result object that contains the result of the LoadModel operation. - """ - model_name = request.Model - try: - self.processor = AutoProcessor.from_pretrained(model_name) - self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - - return backend_pb2.Result(message="Model loaded successfully", success=True) - - def SoundGeneration(self, request, context): - model_name = request.model - if model_name == "": - return backend_pb2.Result(success=False, message="request.model is required") - try: - self.processor = AutoProcessor.from_pretrained(model_name) - self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) - inputs = None - if request.text == "": - inputs = self.model.get_unconditional_inputs(num_samples=1) - elif request.HasField('src'): - # TODO SECURITY CODE GOES HERE LOL - # WHO KNOWS IF THIS WORKS??? - sample_rate, wsamples = wavfile.read('path_to_your_file.wav') - - if request.HasField('src_divisor'): - wsamples = wsamples[: len(wsamples) // request.src_divisor] - - inputs = self.processor( - audio=wsamples, - sampling_rate=sample_rate, - text=[request.text], - padding=True, - return_tensors="pt", - ) - else: - inputs = self.processor( - text=[request.text], - padding=True, - return_tensors="pt", - ) - - tokens = 256 - if request.HasField('duration'): - tokens = int(request.duration * 51.2) # 256 tokens = 5 seconds, therefore 51.2 tokens is one second - guidance = 3.0 - if request.HasField('temperature'): - guidance = request.temperature - dosample = True - if request.HasField('sample'): - dosample = request.sample - audio_values = self.model.generate(**inputs, do_sample=dosample, guidance_scale=guidance, max_new_tokens=tokens) - print("[transformers-musicgen] SoundGeneration generated!", file=sys.stderr) - sampling_rate = self.model.config.audio_encoder.sampling_rate - wavfile.write(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy()) - print("[transformers-musicgen] SoundGeneration saved to", request.dst, file=sys.stderr) - print("[transformers-musicgen] SoundGeneration for", file=sys.stderr) - print("[transformers-musicgen] SoundGeneration requested tokens", tokens, file=sys.stderr) - print(request, file=sys.stderr) - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - return backend_pb2.Result(success=True) - - -# The TTS endpoint is older, and provides fewer features, but exists for compatibility reasons - def TTS(self, request, context): - model_name = request.model - if model_name == "": - return backend_pb2.Result(success=False, message="request.model is required") - try: - self.processor = AutoProcessor.from_pretrained(model_name) - self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) - inputs = self.processor( - text=[request.text], - padding=True, - return_tensors="pt", - ) - tokens = 512 # No good place to set the "length" in TTS, so use 10s as a sane default - audio_values = self.model.generate(**inputs, max_new_tokens=tokens) - print("[transformers-musicgen] TTS generated!", file=sys.stderr) - sampling_rate = self.model.config.audio_encoder.sampling_rate - write_wav(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy()) - print("[transformers-musicgen] TTS saved to", request.dst, file=sys.stderr) - print("[transformers-musicgen] TTS for", file=sys.stderr) - print(request, file=sys.stderr) - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - return backend_pb2.Result(success=True) - - -def serve(address): - server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) - backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) - server.add_insecure_port(address) - server.start() - print("[transformers-musicgen] Server started. Listening on: " + address, file=sys.stderr) - - # Define the signal handler function - def signal_handler(sig, frame): - print("[transformers-musicgen] Received termination signal. Shutting down...") - server.stop(0) - sys.exit(0) - - # Set the signal handlers for SIGINT and SIGTERM - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) - - try: - while True: - time.sleep(_ONE_DAY_IN_SECONDS) - except KeyboardInterrupt: - server.stop(0) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Run the gRPC server.") - parser.add_argument( - "--addr", default="localhost:50051", help="The address to bind the server to." - ) - args = parser.parse_args() - print(f"[transformers-musicgen] startup: {args}", file=sys.stderr) - serve(args.addr) diff --git a/backend/python/transformers-musicgen/requirements-cpu.txt b/backend/python/transformers-musicgen/requirements-cpu.txt deleted file mode 100644 index bbcdc8cd..00000000 --- a/backend/python/transformers-musicgen/requirements-cpu.txt +++ /dev/null @@ -1,3 +0,0 @@ -transformers -accelerate -torch \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-cublas11.txt b/backend/python/transformers-musicgen/requirements-cublas11.txt deleted file mode 100644 index 191a6eef..00000000 --- a/backend/python/transformers-musicgen/requirements-cublas11.txt +++ /dev/null @@ -1,4 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -transformers -accelerate -torch \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-cublas12.txt b/backend/python/transformers-musicgen/requirements-cublas12.txt deleted file mode 100644 index bbcdc8cd..00000000 --- a/backend/python/transformers-musicgen/requirements-cublas12.txt +++ /dev/null @@ -1,3 +0,0 @@ -transformers -accelerate -torch \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-intel.txt b/backend/python/transformers-musicgen/requirements-intel.txt deleted file mode 100644 index 608d6939..00000000 --- a/backend/python/transformers-musicgen/requirements-intel.txt +++ /dev/null @@ -1,7 +0,0 @@ ---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -transformers -accelerate -torch -optimum[openvino] -setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements.txt b/backend/python/transformers-musicgen/requirements.txt deleted file mode 100644 index fb1119a9..00000000 --- a/backend/python/transformers-musicgen/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -grpcio==1.66.1 -protobuf -scipy==1.14.0 -certifi \ No newline at end of file diff --git a/backend/python/transformers-musicgen/run.sh b/backend/python/transformers-musicgen/run.sh deleted file mode 100755 index 375c07e5..00000000 --- a/backend/python/transformers-musicgen/run.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -source $(dirname $0)/../common/libbackend.sh - -startBackend $@ \ No newline at end of file diff --git a/backend/python/transformers-musicgen/test.py b/backend/python/transformers-musicgen/test.py deleted file mode 100644 index 295de65e..00000000 --- a/backend/python/transformers-musicgen/test.py +++ /dev/null @@ -1,100 +0,0 @@ -""" -A test script to test the gRPC service -""" -import unittest -import subprocess -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - - -class TestBackendServicer(unittest.TestCase): - """ - TestBackendServicer is the class that tests the gRPC service - """ - def setUp(self): - """ - This method sets up the gRPC service by starting the server - """ - self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) - time.sleep(10) - - def tearDown(self) -> None: - """ - This method tears down the gRPC service by terminating the server - """ - self.service.terminate() - self.service.wait() - - def test_server_startup(self): - """ - This method tests if the server starts up successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.Health(backend_pb2.HealthMessage()) - self.assertEqual(response.message, b'OK') - except Exception as err: - print(err) - self.fail("Server failed to start") - finally: - self.tearDown() - - def test_load_model(self): - """ - This method tests if the model is loaded successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small")) - self.assertTrue(response.success) - self.assertEqual(response.message, "Model loaded successfully") - except Exception as err: - print(err) - self.fail("LoadModel service failed") - finally: - self.tearDown() - - def test_tts(self): - """ - This method tests if TTS is generated successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small")) - self.assertTrue(response.success) - tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story") - tts_response = stub.TTS(tts_request) - self.assertIsNotNone(tts_response) - except Exception as err: - print(err) - self.fail("TTS service failed") - finally: - self.tearDown() - - def test_sound_generation(self): - """ - This method tests if SoundGeneration is generated successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small")) - self.assertTrue(response.success) - sg_request = backend_pb2.SoundGenerationRequest(text="80s TV news production music hit for tonight's biggest story") - sg_response = stub.SoundGeneration(sg_request) - self.assertIsNotNone(sg_response) - except Exception as err: - print(err) - self.fail("SoundGeneration service failed") - finally: - self.tearDown() \ No newline at end of file diff --git a/backend/python/transformers-musicgen/test.sh b/backend/python/transformers-musicgen/test.sh deleted file mode 100755 index 6940b066..00000000 --- a/backend/python/transformers-musicgen/test.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e - -source $(dirname $0)/../common/libbackend.sh - -runUnittests diff --git a/backend/python/transformers/backend.py b/backend/python/transformers/backend.py index 6e809f28..b0d5875b 100644 --- a/backend/python/transformers/backend.py +++ b/backend/python/transformers/backend.py @@ -21,7 +21,11 @@ import torch.cuda XPU=os.environ.get("XPU", "0") == "1" -from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria +from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria, MambaConfig, MambaForCausalLM +from transformers import AutoProcessor, MusicgenForConditionalGeneration +from scipy.io import wavfile +import outetts +from sentence_transformers import SentenceTransformer _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -72,7 +76,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): Returns: A Result object that contains the result of the LoadModel operation. """ + model_name = request.Model + + # Check to see if the Model exists in the filesystem already. + if os.path.exists(request.ModelFile): + model_name = request.ModelFile compute = torch.float16 if request.F16Memory == True: @@ -80,10 +89,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): self.CUDA = torch.cuda.is_available() self.OV=False + self.OuteTTS=False + self.SentenceTransformer = False device_map="cpu" quantization = None + autoTokenizer = True if self.CUDA: from transformers import BitsAndBytesConfig, AutoModelForCausalLM @@ -186,6 +198,57 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): export=True, device=device_map) self.OV = True + elif request.Type == "MusicgenForConditionalGeneration": + autoTokenizer = False + self.processor = AutoProcessor.from_pretrained(model_name) + self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) + elif request.Type == "OuteTTS": + autoTokenizer = False + options = request.Options + MODELNAME = "OuteAI/OuteTTS-0.3-1B" + TOKENIZER = "OuteAI/OuteTTS-0.3-1B" + VERSION = "0.3" + SPEAKER = "en_male_1" + for opt in options: + if opt.startswith("tokenizer:"): + TOKENIZER = opt.split(":")[1] + break + if opt.startswith("version:"): + VERSION = opt.split(":")[1] + break + if opt.startswith("speaker:"): + SPEAKER = opt.split(":")[1] + break + + if model_name != "": + MODELNAME = model_name + + # Configure the model + model_config = outetts.HFModelConfig_v2( + model_path=MODELNAME, + tokenizer_path=TOKENIZER + ) + # Initialize the interface + self.interface = outetts.InterfaceHF(model_version=VERSION, cfg=model_config) + self.OuteTTS = True + + self.interface.print_default_speakers() + if request.AudioPath: + if os.path.isabs(request.AudioPath): + self.AudioPath = request.AudioPath + else: + self.AudioPath = os.path.join(request.ModelPath, request.AudioPath) + self.speaker = self.interface.create_speaker(audio_path=self.AudioPath) + else: + self.speaker = self.interface.load_default_speaker(name=SPEAKER) + elif request.Type == "SentenceTransformer": + autoTokenizer = False + self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode) + self.SentenceTransformer = True + elif request.Type == "Mamba": + autoTokenizer = False + self.tokenizer = AutoTokenizer.from_pretrained(model_name) + self.model = MambaForCausalLM.from_pretrained(model_name) else: print("Automodel", file=sys.stderr) self.model = AutoModel.from_pretrained(model_name, @@ -196,19 +259,22 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): torch_dtype=compute) if request.ContextSize > 0: self.max_tokens = request.ContextSize - else: + elif hasattr(self.model, 'config') and hasattr(self.model.config, 'max_position_embeddings'): self.max_tokens = self.model.config.max_position_embeddings + else: + self.max_tokens = 512 - self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True) - self.XPU = False + if autoTokenizer: + self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True) + self.XPU = False - if XPU and self.OV == False: - self.XPU = True - try: - print("Optimizing model", model_name, "to XPU.", file=sys.stderr) - self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu") - except Exception as err: - print("Not using XPU:", err, file=sys.stderr) + if XPU and self.OV == False: + self.XPU = True + try: + print("Optimizing model", model_name, "to XPU.", file=sys.stderr) + self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu") + except Exception as err: + print("Not using XPU:", err, file=sys.stderr) except Exception as err: print("Error:", err, file=sys.stderr) @@ -234,18 +300,26 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): max_length = 512 if request.Tokens != 0: max_length = request.Tokens - encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt") - # Create word embeddings - if self.CUDA: - encoded_input = encoded_input.to("cuda") + embeds = None - with torch.no_grad(): - model_output = self.model(**encoded_input) + if self.SentenceTransformer: + print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr) + embeds = self.model.encode(request.Embeddings) + else: + encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt") - # Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence - sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) - return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings[0]) + # Create word embeddings + if self.CUDA: + encoded_input = encoded_input.to("cuda") + + with torch.no_grad(): + model_output = self.model(**encoded_input) + + # Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence + sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) + embeds = sentence_embeddings[0] + return backend_pb2.EmbeddingResult(embeddings=embeds) async def _predict(self, request, context, streaming=False): set_seed(request.Seed) @@ -375,6 +449,114 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): finally: await iterations.aclose() + def SoundGeneration(self, request, context): + model_name = request.model + try: + if self.processor is None: + if model_name == "": + return backend_pb2.Result(success=False, message="request.model is required") + self.processor = AutoProcessor.from_pretrained(model_name) + if self.model is None: + if model_name == "": + return backend_pb2.Result(success=False, message="request.model is required") + self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) + inputs = None + if request.text == "": + inputs = self.model.get_unconditional_inputs(num_samples=1) + elif request.HasField('src'): + # TODO SECURITY CODE GOES HERE LOL + # WHO KNOWS IF THIS WORKS??? + sample_rate, wsamples = wavfile.read('path_to_your_file.wav') + + if request.HasField('src_divisor'): + wsamples = wsamples[: len(wsamples) // request.src_divisor] + + inputs = self.processor( + audio=wsamples, + sampling_rate=sample_rate, + text=[request.text], + padding=True, + return_tensors="pt", + ) + else: + inputs = self.processor( + text=[request.text], + padding=True, + return_tensors="pt", + ) + + tokens = 256 + if request.HasField('duration'): + tokens = int(request.duration * 51.2) # 256 tokens = 5 seconds, therefore 51.2 tokens is one second + guidance = 3.0 + if request.HasField('temperature'): + guidance = request.temperature + dosample = True + if request.HasField('sample'): + dosample = request.sample + audio_values = self.model.generate(**inputs, do_sample=dosample, guidance_scale=guidance, max_new_tokens=tokens) + print("[transformers-musicgen] SoundGeneration generated!", file=sys.stderr) + sampling_rate = self.model.config.audio_encoder.sampling_rate + wavfile.write(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy()) + print("[transformers-musicgen] SoundGeneration saved to", request.dst, file=sys.stderr) + print("[transformers-musicgen] SoundGeneration for", file=sys.stderr) + print("[transformers-musicgen] SoundGeneration requested tokens", tokens, file=sys.stderr) + print(request, file=sys.stderr) + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + return backend_pb2.Result(success=True) + + def OuteTTS(self, request, context): + try: + print("[OuteTTS] generating TTS", file=sys.stderr) + gen_cfg = outetts.GenerationConfig( + text="Speech synthesis is the artificial production of human speech.", + temperature=0.1, + repetition_penalty=1.1, + max_length=self.max_tokens, + speaker=self.speaker, + # voice_characteristics="upbeat enthusiasm, friendliness, clarity, professionalism, and trustworthiness" + ) + output = self.interface.generate(config=gen_cfg) + print("[OuteTTS] Generated TTS", file=sys.stderr) + output.save(request.dst) + print("[OuteTTS] TTS done", file=sys.stderr) + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + return backend_pb2.Result(success=True) + +# The TTS endpoint is older, and provides fewer features, but exists for compatibility reasons + def TTS(self, request, context): + if self.OuteTTS: + return self.OuteTTS(request, context) + + model_name = request.model + try: + if self.processor is None: + if model_name == "": + return backend_pb2.Result(success=False, message="request.model is required") + self.processor = AutoProcessor.from_pretrained(model_name) + if self.model is None: + if model_name == "": + return backend_pb2.Result(success=False, message="request.model is required") + self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) + inputs = self.processor( + text=[request.text], + padding=True, + return_tensors="pt", + ) + tokens = self.max_tokens # No good place to set the "length" in TTS, so use 10s as a sane default + audio_values = self.model.generate(**inputs, max_new_tokens=tokens) + print("[transformers-musicgen] TTS generated!", file=sys.stderr) + sampling_rate = self.model.config.audio_encoder.sampling_rate + wavfile.write(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy()) + print("[transformers-musicgen] TTS saved to", request.dst, file=sys.stderr) + print("[transformers-musicgen] TTS for", file=sys.stderr) + print(request, file=sys.stderr) + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + return backend_pb2.Result(success=True) + async def serve(address): # Start asyncio gRPC server server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) diff --git a/backend/python/transformers/requirements-cpu.txt b/backend/python/transformers/requirements-cpu.txt index f1e6281b..79863c2b 100644 --- a/backend/python/transformers/requirements-cpu.txt +++ b/backend/python/transformers/requirements-cpu.txt @@ -1,4 +1,8 @@ -torch +torch==2.4.1 +llvmlite==0.43.0 +numba==0.60.0 accelerate transformers -bitsandbytes \ No newline at end of file +bitsandbytes +outetts +sentence-transformers==3.4.1 \ No newline at end of file diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt index 0abd72d9..fa9f8953 100644 --- a/backend/python/transformers/requirements-cublas11.txt +++ b/backend/python/transformers/requirements-cublas11.txt @@ -1,5 +1,9 @@ --extra-index-url https://download.pytorch.org/whl/cu118 -torch +torch==2.4.1+cu118 +llvmlite==0.43.0 +numba==0.60.0 accelerate transformers -bitsandbytes \ No newline at end of file +bitsandbytes +outetts +sentence-transformers==3.4.1 diff --git a/backend/python/transformers/requirements-cublas12.txt b/backend/python/transformers/requirements-cublas12.txt index f1e6281b..127bfb21 100644 --- a/backend/python/transformers/requirements-cublas12.txt +++ b/backend/python/transformers/requirements-cublas12.txt @@ -1,4 +1,8 @@ -torch +torch==2.4.1 accelerate +llvmlite==0.43.0 +numba==0.60.0 transformers -bitsandbytes \ No newline at end of file +bitsandbytes +outetts +sentence-transformers==3.4.1 diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt index f6900af1..c0ca93ee 100644 --- a/backend/python/transformers/requirements-hipblas.txt +++ b/backend/python/transformers/requirements-hipblas.txt @@ -1,5 +1,10 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch +torch==2.4.1+rocm6.0 accelerate transformers -bitsandbytes \ No newline at end of file +llvmlite==0.43.0 +numba==0.60.0 +bitsandbytes +outetts +bitsandbytes +sentence-transformers==3.4.1 diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt index 5d9efb71..1418a3c3 100644 --- a/backend/python/transformers/requirements-intel.txt +++ b/backend/python/transformers/requirements-intel.txt @@ -1,6 +1,11 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch +intel-extension-for-pytorch==2.3.110+xpu +torch==2.3.1+cxx11.abi +oneccl_bind_pt==2.3.100+xpu optimum[openvino] +llvmlite==0.43.0 +numba==0.60.0 intel-extension-for-transformers -bitsandbytes \ No newline at end of file +bitsandbytes +outetts +sentence-transformers==3.4.1 diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt index b19c59c0..c0fa0c0b 100644 --- a/backend/python/transformers/requirements.txt +++ b/backend/python/transformers/requirements.txt @@ -1,4 +1,6 @@ -grpcio==1.66.1 +grpcio==1.70.0 protobuf certifi -setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file +setuptools +scipy==1.15.1 +numpy>=2.0.0 \ No newline at end of file diff --git a/backend/python/transformers/test.py b/backend/python/transformers/test.py index aab3c05e..14efa6a7 100644 --- a/backend/python/transformers/test.py +++ b/backend/python/transformers/test.py @@ -19,6 +19,7 @@ class TestBackendServicer(unittest.TestCase): This method sets up the gRPC service by starting the server """ self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) + time.sleep(10) def tearDown(self) -> None: """ @@ -31,7 +32,6 @@ class TestBackendServicer(unittest.TestCase): """ This method tests if the server starts up successfully """ - time.sleep(10) try: self.setUp() with grpc.insecure_channel("localhost:50051") as channel: @@ -48,7 +48,6 @@ class TestBackendServicer(unittest.TestCase): """ This method tests if the model is loaded successfully """ - time.sleep(10) try: self.setUp() with grpc.insecure_channel("localhost:50051") as channel: @@ -66,7 +65,6 @@ class TestBackendServicer(unittest.TestCase): """ This method tests if the embeddings are generated successfully """ - time.sleep(10) try: self.setUp() with grpc.insecure_channel("localhost:50051") as channel: @@ -80,5 +78,96 @@ class TestBackendServicer(unittest.TestCase): except Exception as err: print(err) self.fail("Embedding service failed") + finally: + self.tearDown() + + def test_audio_load_model(self): + """ + This method tests if the model is loaded successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small",Type="MusicgenForConditionalGeneration")) + self.assertTrue(response.success) + self.assertEqual(response.message, "Model loaded successfully") + except Exception as err: + print(err) + self.fail("LoadModel service failed") + finally: + self.tearDown() + + def test_tts(self): + """ + This method tests if TTS is generated successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small",Type="MusicgenForConditionalGeneration")) + self.assertTrue(response.success) + tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story") + tts_response = stub.TTS(tts_request) + self.assertIsNotNone(tts_response) + except Exception as err: + print(err) + self.fail("TTS service failed") + finally: + self.tearDown() + + def test_sound_generation(self): + """ + This method tests if SoundGeneration is generated successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small",Type="MusicgenForConditionalGeneration")) + self.assertTrue(response.success) + sg_request = backend_pb2.SoundGenerationRequest(text="80s TV news production music hit for tonight's biggest story") + sg_response = stub.SoundGeneration(sg_request) + self.assertIsNotNone(sg_response) + except Exception as err: + print(err) + self.fail("SoundGeneration service failed") + finally: + self.tearDown() + + def test_embed_load_model(self): + """ + This method tests if the model is loaded successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens",Type="SentenceTransformer")) + self.assertTrue(response.success) + self.assertEqual(response.message, "Model loaded successfully") + except Exception as err: + print(err) + self.fail("LoadModel service failed") + finally: + self.tearDown() + + def test_sentencetransformers_embedding(self): + """ + This method tests if the embeddings are generated successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens",Type="SentenceTransformer")) + self.assertTrue(response.success) + embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.") + embedding_response = stub.Embedding(embedding_request) + self.assertIsNotNone(embedding_response.embeddings) + except Exception as err: + print(err) + self.fail("Embedding service failed") finally: self.tearDown() \ No newline at end of file diff --git a/backend/python/vall-e-x/.gitignore b/backend/python/vall-e-x/.gitignore deleted file mode 100644 index 1d3a0654..00000000 --- a/backend/python/vall-e-x/.gitignore +++ /dev/null @@ -1 +0,0 @@ -source \ No newline at end of file diff --git a/backend/python/vall-e-x/Makefile b/backend/python/vall-e-x/Makefile deleted file mode 100644 index a3ca32a3..00000000 --- a/backend/python/vall-e-x/Makefile +++ /dev/null @@ -1,33 +0,0 @@ -ifneq (,$(findstring sycl,$(BUILD_TYPE))) -export SKIP_CONDA=1 -endif - -.PHONY: ttsvalle -ttsvalle: protogen - bash install.sh - -.PHONY: run -run: protogen - @echo "Running ttsvalle..." - bash run.sh - @echo "ttsvalle run." - -.PHONY: test -test: protogen - @echo "Testing valle..." - bash test.sh - @echo "valle tested." - -.PHONY: protogen -protogen: backend_pb2_grpc.py backend_pb2.py - -.PHONY: protogen-clean -protogen-clean: - $(RM) backend_pb2_grpc.py backend_pb2.py - -backend_pb2_grpc.py backend_pb2.py: - python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto - -.PHONY: clean -clean: protogen-clean - rm -rf source venv __pycache__ \ No newline at end of file diff --git a/backend/python/vall-e-x/README.md b/backend/python/vall-e-x/README.md deleted file mode 100644 index a3a93361..00000000 --- a/backend/python/vall-e-x/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Creating a separate environment for the ttsvalle project - -``` -make ttsvalle -``` \ No newline at end of file diff --git a/backend/python/vall-e-x/backend.py b/backend/python/vall-e-x/backend.py deleted file mode 100644 index fc9d93bd..00000000 --- a/backend/python/vall-e-x/backend.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env python3 - -from concurrent import futures -import argparse -import signal -import sys -import os -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - -from utils.generation import SAMPLE_RATE, generate_audio, preload_models -from scipy.io.wavfile import write as write_wav -from utils.prompt_making import make_prompt - -_ONE_DAY_IN_SECONDS = 60 * 60 * 24 - -# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 -MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) - -# Implement the BackendServicer class with the service methods -class BackendServicer(backend_pb2_grpc.BackendServicer): - """ - gRPC servicer for backend services. - """ - def Health(self, request, context): - """ - Health check service. - - Args: - request: A backend_pb2.HealthRequest instance. - context: A grpc.ServicerContext instance. - - Returns: - A backend_pb2.Reply instance with message "OK". - """ - return backend_pb2.Reply(message=bytes("OK", 'utf-8')) - - def LoadModel(self, request, context): - """ - Load model service. - - Args: - request: A backend_pb2.LoadModelRequest instance. - context: A grpc.ServicerContext instance. - - Returns: - A backend_pb2.Result instance with message "Model loaded successfully" and success=True if successful. - A backend_pb2.Result instance with success=False and error message if unsuccessful. - """ - model_name = request.Model - try: - print("Preparing models, please wait", file=sys.stderr) - # download and load all models - preload_models() - self.clonedVoice = False - # Assume directory from request.ModelFile. - # Only if request.LoraAdapter it's not an absolute path - if request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath): - # get base path of modelFile - modelFileBase = os.path.dirname(request.ModelFile) - # modify LoraAdapter to be relative to modelFileBase - request.AudioPath = os.path.join(modelFileBase, request.AudioPath) - if request.AudioPath != "": - print("Generating model", file=sys.stderr) - make_prompt(name=model_name, audio_prompt_path=request.AudioPath) - self.clonedVoice = True - ### Use given transcript - ##make_prompt(name=model_name, audio_prompt_path="paimon_prompt.wav", - ## transcript="Just, what was that? Paimon thought we were gonna get eaten.") - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - # Implement your logic here for the LoadModel service - # Replace this with your desired response - return backend_pb2.Result(message="Model loaded successfully", success=True) - - def TTS(self, request, context): - """ - Text-to-speech service. - - Args: - request: A backend_pb2.TTSRequest instance. - context: A grpc.ServicerContext instance. - - Returns: - A backend_pb2.Result instance with success=True if successful. - A backend_pb2.Result instance with success=False and error message if unsuccessful. - """ - model = request.model - print(request, file=sys.stderr) - try: - audio_array = None - if model != "": - if self.clonedVoice: - model = os.path.basename(request.model) - audio_array = generate_audio(request.text, prompt=model) - else: - audio_array = generate_audio(request.text) - print("saving to", request.dst, file=sys.stderr) - # save audio to disk - write_wav(request.dst, SAMPLE_RATE, audio_array) - print("saved to", request.dst, file=sys.stderr) - print("tts for", file=sys.stderr) - print(request, file=sys.stderr) - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - return backend_pb2.Result(success=True) - -def serve(address): - server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) - backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) - server.add_insecure_port(address) - server.start() - print("Server started. Listening on: " + address, file=sys.stderr) - - # Define the signal handler function - def signal_handler(sig, frame): - print("Received termination signal. Shutting down...") - server.stop(0) - sys.exit(0) - - # Set the signal handlers for SIGINT and SIGTERM - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) - - try: - while True: - time.sleep(_ONE_DAY_IN_SECONDS) - except KeyboardInterrupt: - server.stop(0) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Run the gRPC server.") - parser.add_argument( - "--addr", default="localhost:50051", help="The address to bind the server to." - ) - args = parser.parse_args() - - serve(args.addr) diff --git a/backend/python/vall-e-x/install.sh b/backend/python/vall-e-x/install.sh deleted file mode 100755 index c0cce96a..00000000 --- a/backend/python/vall-e-x/install.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -set -e - -VALL_E_X_VERSION=3faaf8ccadb154d63b38070caf518ce9309ea0f4 - -source $(dirname $0)/../common/libbackend.sh - -# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links. -# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match. -# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index -# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index -if [ "x${BUILD_PROFILE}" == "xintel" ]; then - EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" -fi - -installRequirements - -git clone https://github.com/Plachtaa/VALL-E-X.git ${MY_DIR}/source -pushd ${MY_DIR}/source && git checkout -b build ${VALL_E_X_VERSION} && popd -uv pip install ${BUILD_ISOLATION_FLAG} --requirement ${MY_DIR}/source/requirements.txt - -cp -v ./*py $MY_DIR/source/ diff --git a/backend/python/vall-e-x/requirements-cpu.txt b/backend/python/vall-e-x/requirements-cpu.txt deleted file mode 100644 index 3a3304c0..00000000 --- a/backend/python/vall-e-x/requirements-cpu.txt +++ /dev/null @@ -1,3 +0,0 @@ -accelerate -torch -torchaudio \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements-cublas11.txt b/backend/python/vall-e-x/requirements-cublas11.txt deleted file mode 100644 index 4e0a151a..00000000 --- a/backend/python/vall-e-x/requirements-cublas11.txt +++ /dev/null @@ -1,4 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -accelerate -torch -torchaudio \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements-cublas12.txt b/backend/python/vall-e-x/requirements-cublas12.txt deleted file mode 100644 index 3a3304c0..00000000 --- a/backend/python/vall-e-x/requirements-cublas12.txt +++ /dev/null @@ -1,3 +0,0 @@ -accelerate -torch -torchaudio \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements-hipblas.txt b/backend/python/vall-e-x/requirements-hipblas.txt deleted file mode 100644 index fc43790a..00000000 --- a/backend/python/vall-e-x/requirements-hipblas.txt +++ /dev/null @@ -1,4 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 -accelerate -torch==2.3.0+rocm6.0 -torchaudio==2.3.0+rocm6.0 \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements-intel.txt b/backend/python/vall-e-x/requirements-intel.txt deleted file mode 100644 index adbabeac..00000000 --- a/backend/python/vall-e-x/requirements-intel.txt +++ /dev/null @@ -1,7 +0,0 @@ ---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -accelerate -torch -torchaudio -optimum[openvino] -setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements.txt b/backend/python/vall-e-x/requirements.txt deleted file mode 100644 index 8e1b0195..00000000 --- a/backend/python/vall-e-x/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -grpcio==1.66.1 -protobuf -certifi \ No newline at end of file diff --git a/backend/python/vall-e-x/run.sh b/backend/python/vall-e-x/run.sh deleted file mode 100755 index 4b0682ad..00000000 --- a/backend/python/vall-e-x/run.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -BACKEND_FILE="${MY_DIR}/source/backend.py" - -source $(dirname $0)/../common/libbackend.sh - -startBackend $@ \ No newline at end of file diff --git a/backend/python/vall-e-x/test.py b/backend/python/vall-e-x/test.py deleted file mode 100644 index f31a148c..00000000 --- a/backend/python/vall-e-x/test.py +++ /dev/null @@ -1,81 +0,0 @@ -""" -A test script to test the gRPC service -""" -import unittest -import subprocess -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - - -class TestBackendServicer(unittest.TestCase): - """ - TestBackendServicer is the class that tests the gRPC service - """ - def setUp(self): - """ - This method sets up the gRPC service by starting the server - """ - self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) - time.sleep(10) - - def tearDown(self) -> None: - """ - This method tears down the gRPC service by terminating the server - """ - self.service.terminate() - self.service.wait() - - def test_server_startup(self): - """ - This method tests if the server starts up successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.Health(backend_pb2.HealthMessage()) - self.assertEqual(response.message, b'OK') - except Exception as err: - print(err) - self.fail("Server failed to start") - finally: - self.tearDown() - - def test_load_model(self): - """ - This method tests if the model is loaded successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="dingzhen")) - self.assertTrue(response.success) - self.assertEqual(response.message, "Model loaded successfully") - except Exception as err: - print(err) - self.fail("LoadModel service failed") - finally: - self.tearDown() - - def test_tts(self): - """ - This method tests if the embeddings are generated successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="dingzhen")) - self.assertTrue(response.success) - tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story") - tts_response = stub.TTS(tts_request) - self.assertIsNotNone(tts_response) - except Exception as err: - print(err) - self.fail("TTS service failed") - finally: - self.tearDown() \ No newline at end of file diff --git a/backend/python/vall-e-x/test.sh b/backend/python/vall-e-x/test.sh deleted file mode 100755 index 57336b39..00000000 --- a/backend/python/vall-e-x/test.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -set -e -TEST_FILE="./source/test.py" - -source $(dirname $0)/../common/libbackend.sh - -runUnittests diff --git a/backend/python/vllm/backend.py b/backend/python/vllm/backend.py index 2cf15c1c..98ac5081 100644 --- a/backend/python/vllm/backend.py +++ b/backend/python/vllm/backend.py @@ -5,6 +5,8 @@ import argparse import signal import sys import os +from typing import List +from PIL import Image import backend_pb2 import backend_pb2_grpc @@ -15,6 +17,10 @@ from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.sampling_params import SamplingParams from vllm.utils import random_uuid from vllm.transformers_utils.tokenizer import get_tokenizer +from vllm.multimodal.utils import fetch_image +from vllm.assets.video import VideoAsset +import base64 +import io _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -89,6 +95,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if request.Quantization != "": engine_args.quantization = request.Quantization + if request.LoadFormat != "": + engine_args.load_format = request.LoadFormat if request.GPUMemoryUtilization != 0: engine_args.gpu_memory_utilization = request.GPUMemoryUtilization if request.TrustRemoteCode: @@ -105,6 +113,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): try: self.llm = AsyncLLMEngine.from_engine_args(engine_args) except Exception as err: + print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr) return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") try: @@ -117,7 +126,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): ) except Exception as err: return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - + print("Model loaded successfully", file=sys.stderr) return backend_pb2.Result(message="Model loaded successfully", success=True) async def Predict(self, request, context): @@ -196,15 +205,35 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if request.Seed != 0: sampling_params.seed = request.Seed + # Extract image paths and process images prompt = request.Prompt - - # If tokenizer template is enabled and messages are provided instead of prompt apply the tokenizer template + + image_paths = request.Images + image_data = [self.load_image(img_path) for img_path in image_paths] + + videos_path = request.Videos + video_data = [self.load_video(video_path) for video_path in videos_path] + + # If tokenizer template is enabled and messages are provided instead of prompt, apply the tokenizer template if not request.Prompt and request.UseTokenizerTemplate and request.Messages: prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True) - # Generate text + # Generate text using the LLM engine request_id = random_uuid() - outputs = self.llm.generate(prompt, sampling_params, request_id) + print(f"Generating text with request_id: {request_id}", file=sys.stderr) + multi_modal_data = {} + if image_data: + multi_modal_data["image"] = image_data + if video_data: + multi_modal_data["video"] = video_data + outputs = self.llm.generate( + { + "prompt": prompt, + "multi_modal_data": multi_modal_data if multi_modal_data else None, + }, + sampling_params=sampling_params, + request_id=request_id, + ) # Stream the results generated_text = "" @@ -227,9 +256,57 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): if streaming: return + # Remove the image files from /tmp folder + for img_path in image_paths: + try: + os.remove(img_path) + except Exception as e: + print(f"Error removing image file: {img_path}, {e}", file=sys.stderr) + # Sending the final generated text yield backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8')) + def load_image(self, image_path: str): + """ + Load an image from the given file path or base64 encoded data. + + Args: + image_path (str): The path to the image file or base64 encoded data. + + Returns: + Image: The loaded image. + """ + try: + + image_data = base64.b64decode(image_path) + image = Image.open(io.BytesIO(image_data)) + return image + except Exception as e: + print(f"Error loading image {image_path}: {e}", file=sys.stderr) + return None + + def load_video(self, video_path: str): + """ + Load a video from the given file path. + + Args: + video_path (str): The path to the image file. + + Returns: + Video: The loaded video. + """ + try: + timestamp = str(int(time.time() * 1000)) # Generate timestamp + p = f"/tmp/vl-{timestamp}.data" # Use timestamp in filename + with open(p, "wb") as f: + f.write(base64.b64decode(video_path)) + video = VideoAsset(name=p).np_ndarrays + os.remove(p) + return video + except Exception as e: + print(f"Error loading video {video_path}: {e}", file=sys.stderr) + return None + async def serve(address): # Start asyncio gRPC server server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) diff --git a/backend/python/vllm/install.sh b/backend/python/vllm/install.sh index 78a3d5ba..0183a928 100755 --- a/backend/python/vllm/install.sh +++ b/backend/python/vllm/install.sh @@ -13,4 +13,20 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" fi -installRequirements +# We don't embed this into the images as it is a large dependency and not always needed. +# Besides, the speed inference are not actually usable in the current state for production use-cases. +if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE}" == "xtrue" ]; then + ensureVenv + # https://docs.vllm.ai/en/v0.6.1/getting_started/cpu-installation.html + if [ ! -d vllm ]; then + git clone https://github.com/vllm-project/vllm + fi + pushd vllm + uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.68.1 protobuf bitsandbytes + uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu + VLLM_TARGET_DEVICE=cpu python setup.py install + popd + rm -rf vllm + else + installRequirements +fi diff --git a/backend/python/vllm/requirements-cpu.txt b/backend/python/vllm/requirements-cpu.txt index 765a1ef5..84058901 100644 --- a/backend/python/vllm/requirements-cpu.txt +++ b/backend/python/vllm/requirements-cpu.txt @@ -1,3 +1,3 @@ accelerate -torch +torch==2.4.1 transformers \ No newline at end of file diff --git a/backend/python/vllm/requirements-cublas11.txt b/backend/python/vllm/requirements-cublas11.txt index 43817727..a6e49c1f 100644 --- a/backend/python/vllm/requirements-cublas11.txt +++ b/backend/python/vllm/requirements-cublas11.txt @@ -1,4 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/cu118 accelerate -torch -transformers \ No newline at end of file +torch==2.4.1+cu118 +transformers +bitsandbytes \ No newline at end of file diff --git a/backend/python/vllm/requirements-cublas12.txt b/backend/python/vllm/requirements-cublas12.txt index 765a1ef5..2dfc28f9 100644 --- a/backend/python/vllm/requirements-cublas12.txt +++ b/backend/python/vllm/requirements-cublas12.txt @@ -1,3 +1,4 @@ accelerate -torch -transformers \ No newline at end of file +torch==2.4.1 +transformers +bitsandbytes \ No newline at end of file diff --git a/backend/python/vllm/requirements-hipblas.txt b/backend/python/vllm/requirements-hipblas.txt index c73d8141..f580314a 100644 --- a/backend/python/vllm/requirements-hipblas.txt +++ b/backend/python/vllm/requirements-hipblas.txt @@ -1,4 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 accelerate -torch -transformers \ No newline at end of file +torch==2.4.1+rocm6.0 +transformers +bitsandbytes \ No newline at end of file diff --git a/backend/python/vllm/requirements-intel.txt b/backend/python/vllm/requirements-intel.txt index 1f82c46e..8955165a 100644 --- a/backend/python/vllm/requirements-intel.txt +++ b/backend/python/vllm/requirements-intel.txt @@ -1,7 +1,9 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch +intel-extension-for-pytorch==2.3.110+xpu accelerate -torch +torch==2.3.1+cxx11.abi transformers optimum[openvino] -setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file +setuptools +bitsandbytes +oneccl_bind_pt==2.3.100+xpu \ No newline at end of file diff --git a/backend/python/vllm/requirements.txt b/backend/python/vllm/requirements.txt index b9c192d5..1f92add8 100644 --- a/backend/python/vllm/requirements.txt +++ b/backend/python/vllm/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.66.1 +grpcio==1.70.0 protobuf certifi setuptools \ No newline at end of file diff --git a/core/application.go b/core/application.go deleted file mode 100644 index e4efbdd0..00000000 --- a/core/application.go +++ /dev/null @@ -1,38 +0,0 @@ -package core - -import ( - "github.com/mudler/LocalAI/core/config" - "github.com/mudler/LocalAI/core/services" - "github.com/mudler/LocalAI/pkg/model" -) - -// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy -// Perhaps a proper DI system is worth it in the future, but for now keep things simple. -type Application struct { - - // Application-Level Config - ApplicationConfig *config.ApplicationConfig - // ApplicationState *ApplicationState - - // Core Low-Level Services - BackendConfigLoader *config.BackendConfigLoader - ModelLoader *model.ModelLoader - - // Backend Services - // EmbeddingsBackendService *backend.EmbeddingsBackendService - // ImageGenerationBackendService *backend.ImageGenerationBackendService - // LLMBackendService *backend.LLMBackendService - // TranscriptionBackendService *backend.TranscriptionBackendService - // TextToSpeechBackendService *backend.TextToSpeechBackendService - - // LocalAI System Services - BackendMonitorService *services.BackendMonitorService - GalleryService *services.GalleryService - LocalAIMetricsService *services.LocalAIMetricsService - // OpenAIService *services.OpenAIService -} - -// TODO [NEXT PR?]: Break up ApplicationConfig. -// Migrate over stuff that is not set via config at all - especially runtime stuff -type ApplicationState struct { -} diff --git a/core/application/application.go b/core/application/application.go new file mode 100644 index 00000000..6e8d6204 --- /dev/null +++ b/core/application/application.go @@ -0,0 +1,39 @@ +package application + +import ( + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/pkg/model" + "github.com/mudler/LocalAI/pkg/templates" +) + +type Application struct { + backendLoader *config.BackendConfigLoader + modelLoader *model.ModelLoader + applicationConfig *config.ApplicationConfig + templatesEvaluator *templates.Evaluator +} + +func newApplication(appConfig *config.ApplicationConfig) *Application { + return &Application{ + backendLoader: config.NewBackendConfigLoader(appConfig.ModelPath), + modelLoader: model.NewModelLoader(appConfig.ModelPath), + applicationConfig: appConfig, + templatesEvaluator: templates.NewEvaluator(appConfig.ModelPath), + } +} + +func (a *Application) BackendLoader() *config.BackendConfigLoader { + return a.backendLoader +} + +func (a *Application) ModelLoader() *model.ModelLoader { + return a.modelLoader +} + +func (a *Application) ApplicationConfig() *config.ApplicationConfig { + return a.applicationConfig +} + +func (a *Application) TemplatesEvaluator() *templates.Evaluator { + return a.templatesEvaluator +} diff --git a/core/startup/config_file_watcher.go b/core/application/config_file_watcher.go similarity index 96% rename from core/startup/config_file_watcher.go rename to core/application/config_file_watcher.go index df72483f..46f29b10 100644 --- a/core/startup/config_file_watcher.go +++ b/core/application/config_file_watcher.go @@ -1,4 +1,4 @@ -package startup +package application import ( "encoding/json" @@ -8,8 +8,8 @@ import ( "path/filepath" "time" - "github.com/fsnotify/fsnotify" "dario.cat/mergo" + "github.com/fsnotify/fsnotify" "github.com/mudler/LocalAI/core/config" "github.com/rs/zerolog/log" ) diff --git a/core/startup/startup.go b/core/application/startup.go similarity index 58% rename from core/startup/startup.go rename to core/application/startup.go index 3565d196..fffcd8bb 100644 --- a/core/startup/startup.go +++ b/core/application/startup.go @@ -1,206 +1,201 @@ -package startup - -import ( - "fmt" - "os" - - "github.com/mudler/LocalAI/core" - "github.com/mudler/LocalAI/core/config" - "github.com/mudler/LocalAI/core/services" - "github.com/mudler/LocalAI/internal" - "github.com/mudler/LocalAI/pkg/assets" - "github.com/mudler/LocalAI/pkg/library" - "github.com/mudler/LocalAI/pkg/model" - pkgStartup "github.com/mudler/LocalAI/pkg/startup" - "github.com/mudler/LocalAI/pkg/xsysinfo" - "github.com/rs/zerolog/log" -) - -func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) { - options := config.NewApplicationConfig(opts...) - - log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath) - log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion()) - caps, err := xsysinfo.CPUCapabilities() - if err == nil { - log.Debug().Msgf("CPU capabilities: %v", caps) - } - gpus, err := xsysinfo.GPUs() - if err == nil { - log.Debug().Msgf("GPU count: %d", len(gpus)) - for _, gpu := range gpus { - log.Debug().Msgf("GPU: %s", gpu.String()) - } - } - - // Make sure directories exists - if options.ModelPath == "" { - return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty") - } - err = os.MkdirAll(options.ModelPath, 0750) - if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err) - } - if options.ImageDir != "" { - err := os.MkdirAll(options.ImageDir, 0750) - if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err) - } - } - if options.AudioDir != "" { - err := os.MkdirAll(options.AudioDir, 0750) - if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err) - } - } - if options.UploadDir != "" { - err := os.MkdirAll(options.UploadDir, 0750) - if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err) - } - } - - if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil { - log.Error().Err(err).Msg("error installing models") - } - - cl := config.NewBackendConfigLoader(options.ModelPath) - ml := model.NewModelLoader(options.ModelPath) - - configLoaderOpts := options.ToConfigLoaderOptions() - - if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil { - log.Error().Err(err).Msg("error loading config files") - } - - if options.ConfigFile != "" { - if err := cl.LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil { - log.Error().Err(err).Msg("error loading config file") - } - } - - if err := cl.Preload(options.ModelPath); err != nil { - log.Error().Err(err).Msg("error downloading models") - } - - if options.PreloadJSONModels != "" { - if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.EnforcePredownloadScans, options.Galleries); err != nil { - return nil, nil, nil, err - } - } - - if options.PreloadModelsFromPath != "" { - if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.EnforcePredownloadScans, options.Galleries); err != nil { - return nil, nil, nil, err - } - } - - if options.Debug { - for _, v := range cl.GetAllBackendConfigs() { - log.Debug().Msgf("Model: %s (config: %+v)", v.Name, v) - } - } - - if options.AssetsDestination != "" { - // Extract files from the embedded FS - err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination) - log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination) - if err != nil { - log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err) - } - } - - if options.LibPath != "" { - // If there is a lib directory, set LD_LIBRARY_PATH to include it - err := library.LoadExternal(options.LibPath) - if err != nil { - log.Error().Err(err).Str("LibPath", options.LibPath).Msg("Error while loading external libraries") - } - } - - // turn off any process that was started by GRPC if the context is canceled - go func() { - <-options.Context.Done() - log.Debug().Msgf("Context canceled, shutting down") - err := ml.StopAllGRPC() - if err != nil { - log.Error().Err(err).Msg("error while stopping all grpc backends") - } - }() - - if options.WatchDog { - wd := model.NewWatchDog( - ml, - options.WatchDogBusyTimeout, - options.WatchDogIdleTimeout, - options.WatchDogBusy, - options.WatchDogIdle) - ml.SetWatchDog(wd) - go wd.Run() - go func() { - <-options.Context.Done() - log.Debug().Msgf("Context canceled, shutting down") - wd.Shutdown() - }() - } - - // Watch the configuration directory - startWatcher(options) - - log.Info().Msg("core/startup process completed!") - return cl, ml, options, nil -} - -func startWatcher(options *config.ApplicationConfig) { - if options.DynamicConfigsDir == "" { - // No need to start the watcher if the directory is not set - return - } - - if _, err := os.Stat(options.DynamicConfigsDir); err != nil { - if os.IsNotExist(err) { - // We try to create the directory if it does not exist and was specified - if err := os.MkdirAll(options.DynamicConfigsDir, 0700); err != nil { - log.Error().Err(err).Msg("failed creating DynamicConfigsDir") - } - } else { - // something else happened, we log the error and don't start the watcher - log.Error().Err(err).Msg("failed to read DynamicConfigsDir, watcher will not be started") - return - } - } - - configHandler := newConfigFileHandler(options) - if err := configHandler.Watch(); err != nil { - log.Error().Err(err).Msg("failed creating watcher") - } -} - -// In Lieu of a proper DI framework, this function wires up the Application manually. -// This is in core/startup rather than core/state.go to keep package references clean! -func createApplication(appConfig *config.ApplicationConfig) *core.Application { - app := &core.Application{ - ApplicationConfig: appConfig, - BackendConfigLoader: config.NewBackendConfigLoader(appConfig.ModelPath), - ModelLoader: model.NewModelLoader(appConfig.ModelPath), - } - - var err error - - // app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - // app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - // app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - // app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - // app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - - app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - app.GalleryService = services.NewGalleryService(app.ApplicationConfig) - // app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService) - - app.LocalAIMetricsService, err = services.NewLocalAIMetricsService() - if err != nil { - log.Error().Err(err).Msg("encountered an error initializing metrics service, startup will continue but metrics will not be tracked.") - } - - return app -} +package application + +import ( + "fmt" + "os" + + "github.com/mudler/LocalAI/core/backend" + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/services" + "github.com/mudler/LocalAI/internal" + "github.com/mudler/LocalAI/pkg/assets" + + "github.com/mudler/LocalAI/pkg/library" + "github.com/mudler/LocalAI/pkg/model" + pkgStartup "github.com/mudler/LocalAI/pkg/startup" + "github.com/mudler/LocalAI/pkg/xsysinfo" + "github.com/rs/zerolog/log" +) + +func New(opts ...config.AppOption) (*Application, error) { + options := config.NewApplicationConfig(opts...) + application := newApplication(options) + + log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath) + log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion()) + caps, err := xsysinfo.CPUCapabilities() + if err == nil { + log.Debug().Msgf("CPU capabilities: %v", caps) + } + gpus, err := xsysinfo.GPUs() + if err == nil { + log.Debug().Msgf("GPU count: %d", len(gpus)) + for _, gpu := range gpus { + log.Debug().Msgf("GPU: %s", gpu.String()) + } + } + + // Make sure directories exists + if options.ModelPath == "" { + return nil, fmt.Errorf("options.ModelPath cannot be empty") + } + err = os.MkdirAll(options.ModelPath, 0750) + if err != nil { + return nil, fmt.Errorf("unable to create ModelPath: %q", err) + } + if options.ImageDir != "" { + err := os.MkdirAll(options.ImageDir, 0750) + if err != nil { + return nil, fmt.Errorf("unable to create ImageDir: %q", err) + } + } + if options.AudioDir != "" { + err := os.MkdirAll(options.AudioDir, 0750) + if err != nil { + return nil, fmt.Errorf("unable to create AudioDir: %q", err) + } + } + if options.UploadDir != "" { + err := os.MkdirAll(options.UploadDir, 0750) + if err != nil { + return nil, fmt.Errorf("unable to create UploadDir: %q", err) + } + } + + if err := pkgStartup.InstallModels(options.Galleries, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil { + log.Error().Err(err).Msg("error installing models") + } + + configLoaderOpts := options.ToConfigLoaderOptions() + + if err := application.BackendLoader().LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil { + log.Error().Err(err).Msg("error loading config files") + } + + if options.ConfigFile != "" { + if err := application.BackendLoader().LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil { + log.Error().Err(err).Msg("error loading config file") + } + } + + if err := application.BackendLoader().Preload(options.ModelPath); err != nil { + log.Error().Err(err).Msg("error downloading models") + } + + if options.PreloadJSONModels != "" { + if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.EnforcePredownloadScans, options.Galleries); err != nil { + return nil, err + } + } + + if options.PreloadModelsFromPath != "" { + if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.EnforcePredownloadScans, options.Galleries); err != nil { + return nil, err + } + } + + if options.Debug { + for _, v := range application.BackendLoader().GetAllBackendConfigs() { + log.Debug().Msgf("Model: %s (config: %+v)", v.Name, v) + } + } + + if options.AssetsDestination != "" { + // Extract files from the embedded FS + err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination) + log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination) + if err != nil { + log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err) + } + } + + if options.LibPath != "" { + // If there is a lib directory, set LD_LIBRARY_PATH to include it + err := library.LoadExternal(options.LibPath) + if err != nil { + log.Error().Err(err).Str("LibPath", options.LibPath).Msg("Error while loading external libraries") + } + } + + // turn off any process that was started by GRPC if the context is canceled + go func() { + <-options.Context.Done() + log.Debug().Msgf("Context canceled, shutting down") + err := application.ModelLoader().StopAllGRPC() + if err != nil { + log.Error().Err(err).Msg("error while stopping all grpc backends") + } + }() + + if options.WatchDog { + wd := model.NewWatchDog( + application.ModelLoader(), + options.WatchDogBusyTimeout, + options.WatchDogIdleTimeout, + options.WatchDogBusy, + options.WatchDogIdle) + application.ModelLoader().SetWatchDog(wd) + go wd.Run() + go func() { + <-options.Context.Done() + log.Debug().Msgf("Context canceled, shutting down") + wd.Shutdown() + }() + } + + if options.LoadToMemory != nil { + for _, m := range options.LoadToMemory { + cfg, err := application.BackendLoader().LoadBackendConfigFileByName(m, options.ModelPath, + config.LoadOptionDebug(options.Debug), + config.LoadOptionThreads(options.Threads), + config.LoadOptionContextSize(options.ContextSize), + config.LoadOptionF16(options.F16), + config.ModelPath(options.ModelPath), + ) + if err != nil { + return nil, err + } + + log.Debug().Msgf("Auto loading model %s into memory from file: %s", m, cfg.Model) + + o := backend.ModelOptions(*cfg, options) + + var backendErr error + _, backendErr = application.ModelLoader().Load(o...) + if backendErr != nil { + return nil, err + } + } + } + + // Watch the configuration directory + startWatcher(options) + + log.Info().Msg("core/startup process completed!") + return application, nil +} + +func startWatcher(options *config.ApplicationConfig) { + if options.DynamicConfigsDir == "" { + // No need to start the watcher if the directory is not set + return + } + + if _, err := os.Stat(options.DynamicConfigsDir); err != nil { + if os.IsNotExist(err) { + // We try to create the directory if it does not exist and was specified + if err := os.MkdirAll(options.DynamicConfigsDir, 0700); err != nil { + log.Error().Err(err).Msg("failed creating DynamicConfigsDir") + } + } else { + // something else happened, we log the error and don't start the watcher + log.Error().Err(err).Msg("failed to read DynamicConfigsDir, watcher will not be started") + return + } + } + + configHandler := newConfigFileHandler(options) + if err := configHandler.Watch(); err != nil { + log.Error().Err(err).Msg("failed creating watcher") + } +} diff --git a/core/backend/embeddings.go b/core/backend/embeddings.go index 31b10a19..a96e9829 100644 --- a/core/backend/embeddings.go +++ b/core/backend/embeddings.go @@ -10,27 +10,10 @@ import ( ) func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) { - modelFile := backendConfig.Model - grpcOpts := gRPCModelOpts(backendConfig) + opts := ModelOptions(backendConfig, appConfig) - var inferenceModel interface{} - var err error - - opts := modelOpts(backendConfig, appConfig, []model.Option{ - model.WithLoadGRPCLoadModelOpts(grpcOpts), - model.WithThreads(uint32(*backendConfig.Threads)), - model.WithAssetDir(appConfig.AssetsDestination), - model.WithModel(modelFile), - model.WithContext(appConfig.Context), - }) - - if backendConfig.Backend == "" { - inferenceModel, err = loader.GreedyLoader(opts...) - } else { - opts = append(opts, model.WithBackendString(backendConfig.Backend)) - inferenceModel, err = loader.BackendLoader(opts...) - } + inferenceModel, err := loader.Load(opts...) if err != nil { return nil, err } diff --git a/core/backend/image.go b/core/backend/image.go index 8c3f56b3..38ca4357 100644 --- a/core/backend/image.go +++ b/core/backend/image.go @@ -8,21 +8,9 @@ import ( ) func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) { - threads := backendConfig.Threads - if *threads == 0 && appConfig.Threads != 0 { - threads = &appConfig.Threads - } - gRPCOpts := gRPCModelOpts(backendConfig) - opts := modelOpts(backendConfig, appConfig, []model.Option{ - model.WithBackendString(backendConfig.Backend), - model.WithAssetDir(appConfig.AssetsDestination), - model.WithThreads(uint32(*threads)), - model.WithContext(appConfig.Context), - model.WithModel(backendConfig.Model), - model.WithLoadGRPCLoadModelOpts(gRPCOpts), - }) - inferenceModel, err := loader.BackendLoader( + opts := ModelOptions(backendConfig, appConfig) + inferenceModel, err := loader.Load( opts..., ) if err != nil { diff --git a/core/backend/llm.go b/core/backend/llm.go index 2b4564a8..d91ded51 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -2,6 +2,7 @@ package backend import ( "context" + "encoding/json" "fmt" "os" "regexp" @@ -15,7 +16,6 @@ import ( "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/gallery" - "github.com/mudler/LocalAI/pkg/grpc" "github.com/mudler/LocalAI/pkg/grpc/proto" model "github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/utils" @@ -27,32 +27,14 @@ type LLMResponse struct { } type TokenUsage struct { - Prompt int - Completion int + Prompt int + Completion int + TimingPromptProcessing float64 + TimingTokenGeneration float64 } -func ModelInference(ctx context.Context, s string, messages []schema.Message, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) { +func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) { modelFile := c.Model - threads := c.Threads - if *threads == 0 && o.Threads != 0 { - threads = &o.Threads - } - grpcOpts := gRPCModelOpts(c) - - var inferenceModel grpc.Backend - var err error - - opts := modelOpts(c, o, []model.Option{ - model.WithLoadGRPCLoadModelOpts(grpcOpts), - model.WithThreads(uint32(*threads)), // some models uses this to allocate threads during startup - model.WithAssetDir(o.AssetsDestination), - model.WithModel(modelFile), - model.WithContext(o.Context), - }) - - if c.Backend != "" { - opts = append(opts, model.WithBackendString(c.Backend)) - } // Check if the modelFile exists, if it doesn't try to load it from the gallery if o.AutoloadGalleries { // experimental @@ -66,12 +48,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im } } - if c.Backend == "" { - inferenceModel, err = loader.GreedyLoader(opts...) - } else { - inferenceModel, err = loader.BackendLoader(opts...) - } - + opts := ModelOptions(c, o) + inferenceModel, err := loader.Load(opts...) if err != nil { return nil, err } @@ -88,6 +66,16 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im switch ct := message.Content.(type) { case string: protoMessages[i].Content = ct + case []interface{}: + // If using the tokenizer template, in case of multimodal we want to keep the multimodal content as and return only strings here + data, _ := json.Marshal(ct) + resultData := []struct { + Text string `json:"text"` + }{} + json.Unmarshal(data, &resultData) + for _, r := range resultData { + protoMessages[i].Content += r.Text + } default: return nil, fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct) } @@ -101,6 +89,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im opts.Messages = protoMessages opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate opts.Images = images + opts.Videos = videos + opts.Audios = audios tokenUsage := TokenUsage{} @@ -129,8 +119,14 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im ss := "" var partialRune []byte - err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) { - partialRune = append(partialRune, chars...) + err := inferenceModel.PredictStream(ctx, opts, func(reply *proto.Reply) { + msg := reply.Message + partialRune = append(partialRune, msg...) + + tokenUsage.Prompt = int(reply.PromptTokens) + tokenUsage.Completion = int(reply.Tokens) + tokenUsage.TimingTokenGeneration = reply.TimingTokenGeneration + tokenUsage.TimingPromptProcessing = reply.TimingPromptProcessing for len(partialRune) > 0 { r, size := utf8.DecodeRune(partialRune) @@ -144,6 +140,10 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im partialRune = partialRune[size:] } + + if len(msg) == 0 { + tokenCallback("", tokenUsage) + } }) return LLMResponse{ Response: ss, @@ -161,6 +161,10 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im if tokenUsage.Completion == 0 { tokenUsage.Completion = int(reply.Tokens) } + + tokenUsage.TimingTokenGeneration = reply.TimingTokenGeneration + tokenUsage.TimingPromptProcessing = reply.TimingPromptProcessing + return LLMResponse{ Response: string(reply.Message), Usage: tokenUsage, diff --git a/core/backend/options.go b/core/backend/options.go index d986b8e6..3201142d 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -11,32 +11,65 @@ import ( "github.com/rs/zerolog/log" ) -func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option { +func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts ...model.Option) []model.Option { + name := c.Name + if name == "" { + name = c.Model + } + + defOpts := []model.Option{ + model.WithBackendString(c.Backend), + model.WithModel(c.Model), + model.WithAssetDir(so.AssetsDestination), + model.WithContext(so.Context), + model.WithModelID(name), + } + + threads := 1 + + if c.Threads != nil { + threads = *c.Threads + } + + if so.Threads != 0 { + threads = so.Threads + } + + c.Threads = &threads + + grpcOpts := grpcModelOpts(c) + defOpts = append(defOpts, model.WithLoadGRPCLoadModelOpts(grpcOpts)) + if so.SingleBackend { - opts = append(opts, model.WithSingleActiveBackend()) + defOpts = append(defOpts, model.WithSingleActiveBackend()) } if so.ParallelBackendRequests { - opts = append(opts, model.EnableParallelRequests) + defOpts = append(defOpts, model.EnableParallelRequests) } if c.GRPC.Attempts != 0 { - opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts)) + defOpts = append(defOpts, model.WithGRPCAttempts(c.GRPC.Attempts)) } if c.GRPC.AttemptsSleepTime != 0 { - opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime)) + defOpts = append(defOpts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime)) } for k, v := range so.ExternalGRPCBackends { - opts = append(opts, model.WithExternalBackend(k, v)) + defOpts = append(defOpts, model.WithExternalBackend(k, v)) } - return opts + return append(defOpts, opts...) } func getSeed(c config.BackendConfig) int32 { - seed := int32(*c.Seed) + var seed int32 = config.RAND_SEED + + if c.Seed != nil { + seed = int32(*c.Seed) + } + if seed == config.RAND_SEED { seed = rand.Int31() } @@ -44,32 +77,82 @@ func getSeed(c config.BackendConfig) int32 { return seed } -func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { +func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions { b := 512 if c.Batch != 0 { b = c.Batch } + + f16 := false + if c.F16 != nil { + f16 = *c.F16 + } + + embeddings := false + if c.Embeddings != nil { + embeddings = *c.Embeddings + } + + lowVRAM := false + if c.LowVRAM != nil { + lowVRAM = *c.LowVRAM + } + + mmap := false + if c.MMap != nil { + mmap = *c.MMap + } + + ctxSize := 1024 + if c.ContextSize != nil { + ctxSize = *c.ContextSize + } + + mmlock := false + if c.MMlock != nil { + mmlock = *c.MMlock + } + + nGPULayers := 9999999 + if c.NGPULayers != nil { + nGPULayers = *c.NGPULayers + } + + triggers := make([]*pb.GrammarTrigger, 0) + for _, t := range c.FunctionsConfig.GrammarConfig.GrammarTriggers { + triggers = append(triggers, &pb.GrammarTrigger{ + Word: t.Word, + AtStart: t.AtStart, + }) + + } + return &pb.ModelOptions{ CUDA: c.CUDA || c.Diffusers.CUDA, SchedulerType: c.Diffusers.SchedulerType, + GrammarTriggers: triggers, PipelineType: c.Diffusers.PipelineType, - CFGScale: c.Diffusers.CFGScale, + CFGScale: c.CFGScale, LoraAdapter: c.LoraAdapter, LoraScale: c.LoraScale, - F16Memory: *c.F16, + LoraAdapters: c.LoraAdapters, + LoraScales: c.LoraScales, + F16Memory: f16, LoraBase: c.LoraBase, IMG2IMG: c.Diffusers.IMG2IMG, CLIPModel: c.Diffusers.ClipModel, CLIPSubfolder: c.Diffusers.ClipSubFolder, + Options: c.Options, CLIPSkip: int32(c.Diffusers.ClipSkip), ControlNet: c.Diffusers.ControlNet, - ContextSize: int32(*c.ContextSize), + ContextSize: int32(ctxSize), Seed: getSeed(c), NBatch: int32(b), NoMulMatQ: c.NoMulMatQ, DraftModel: c.DraftModel, - AudioPath: c.VallE.AudioPath, + AudioPath: c.AudioPath, Quantization: c.Quantization, + LoadFormat: c.LoadFormat, GPUMemoryUtilization: c.GPUMemoryUtilization, TrustRemoteCode: c.TrustRemoteCode, EnforceEager: c.EnforceEager, @@ -78,6 +161,8 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { TensorParallelSize: int32(c.TensorParallelSize), MMProj: c.MMProj, FlashAttention: c.FlashAttention, + CacheTypeKey: c.CacheTypeK, + CacheTypeValue: c.CacheTypeV, NoKVOffload: c.NoKVOffloading, YarnExtFactor: c.YarnExtFactor, YarnAttnFactor: c.YarnAttnFactor, @@ -85,16 +170,16 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { YarnBetaSlow: c.YarnBetaSlow, NGQA: c.NGQA, RMSNormEps: c.RMSNormEps, - MLock: *c.MMlock, + MLock: mmlock, RopeFreqBase: c.RopeFreqBase, RopeScaling: c.RopeScaling, Type: c.ModelType, RopeFreqScale: c.RopeFreqScale, NUMA: c.NUMA, - Embeddings: *c.Embeddings, - LowVRAM: *c.LowVRAM, - NGPULayers: int32(*c.NGPULayers), - MMap: *c.MMap, + Embeddings: embeddings, + LowVRAM: lowVRAM, + NGPULayers: int32(nGPULayers), + MMap: mmap, MainGPU: c.MainGPU, Threads: int32(*c.Threads), TensorSplit: c.TensorSplit, diff --git a/core/backend/rerank.go b/core/backend/rerank.go index 1b718be2..8152ef7f 100644 --- a/core/backend/rerank.go +++ b/core/backend/rerank.go @@ -9,22 +9,10 @@ import ( model "github.com/mudler/LocalAI/pkg/model" ) -func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) { - bb := backend - if bb == "" { - return nil, fmt.Errorf("backend is required") - } +func Rerank(modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) { - grpcOpts := gRPCModelOpts(backendConfig) - - opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{ - model.WithBackendString(bb), - model.WithModel(modelFile), - model.WithContext(appConfig.Context), - model.WithAssetDir(appConfig.AssetsDestination), - model.WithLoadGRPCLoadModelOpts(grpcOpts), - }) - rerankModel, err := loader.BackendLoader(opts...) + opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile)) + rerankModel, err := loader.Load(opts...) if err != nil { return nil, err } diff --git a/core/backend/soundgeneration.go b/core/backend/soundgeneration.go index abd5221b..a8d46478 100644 --- a/core/backend/soundgeneration.go +++ b/core/backend/soundgeneration.go @@ -13,7 +13,6 @@ import ( ) func SoundGeneration( - backend string, modelFile string, text string, duration *float32, @@ -25,20 +24,9 @@ func SoundGeneration( appConfig *config.ApplicationConfig, backendConfig config.BackendConfig, ) (string, *proto.Result, error) { - if backend == "" { - return "", nil, fmt.Errorf("backend is a required parameter") - } - grpcOpts := gRPCModelOpts(backendConfig) - opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{ - model.WithBackendString(backend), - model.WithModel(modelFile), - model.WithContext(appConfig.Context), - model.WithAssetDir(appConfig.AssetsDestination), - model.WithLoadGRPCLoadModelOpts(grpcOpts), - }) - - soundGenModel, err := loader.BackendLoader(opts...) + opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile)) + soundGenModel, err := loader.Load(opts...) if err != nil { return "", nil, err } diff --git a/core/backend/stores.go b/core/backend/stores.go index 1b514584..f5ee9166 100644 --- a/core/backend/stores.go +++ b/core/backend/stores.go @@ -8,16 +8,15 @@ import ( ) func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) (grpc.Backend, error) { - if storeName == "" { - storeName = "default" - } + if storeName == "" { + storeName = "default" + } - sc := []model.Option{ - model.WithBackendString(model.LocalStoreBackend), - model.WithAssetDir(appConfig.AssetsDestination), - model.WithModel(storeName), - } + sc := []model.Option{ + model.WithBackendString(model.LocalStoreBackend), + model.WithAssetDir(appConfig.AssetsDestination), + model.WithModel(storeName), + } - return sl.BackendLoader(sc...) + return sl.Load(sc...) } - diff --git a/core/backend/token_metrics.go b/core/backend/token_metrics.go new file mode 100644 index 00000000..cc71c868 --- /dev/null +++ b/core/backend/token_metrics.go @@ -0,0 +1,31 @@ +package backend + +import ( + "context" + "fmt" + + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/pkg/grpc/proto" + model "github.com/mudler/LocalAI/pkg/model" +) + +func TokenMetrics( + modelFile string, + loader *model.ModelLoader, + appConfig *config.ApplicationConfig, + backendConfig config.BackendConfig) (*proto.MetricsResponse, error) { + + opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile)) + model, err := loader.Load(opts...) + if err != nil { + return nil, err + } + + if model == nil { + return nil, fmt.Errorf("could not loadmodel model") + } + + res, err := model.GetTokenMetrics(context.Background(), &proto.MetricsRequest{}) + + return res, err +} diff --git a/core/backend/tokenize.go b/core/backend/tokenize.go new file mode 100644 index 00000000..1783083b --- /dev/null +++ b/core/backend/tokenize.go @@ -0,0 +1,41 @@ +package backend + +import ( + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/schema" + "github.com/mudler/LocalAI/pkg/grpc" + model "github.com/mudler/LocalAI/pkg/model" +) + +func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) { + + modelFile := backendConfig.Model + + var inferenceModel grpc.Backend + var err error + + opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile)) + + inferenceModel, err = loader.Load(opts...) + if err != nil { + return schema.TokenizeResponse{}, err + } + + predictOptions := gRPCPredictOpts(backendConfig, loader.ModelPath) + predictOptions.Prompt = s + + // tokenize the string + resp, err := inferenceModel.TokenizeString(appConfig.Context, predictOptions) + if err != nil { + return schema.TokenizeResponse{}, err + } + + if resp.Tokens == nil { + resp.Tokens = make([]int32, 0) + } + + return schema.TokenizeResponse{ + Tokens: resp.Tokens, + }, nil + +} diff --git a/core/backend/transcript.go b/core/backend/transcript.go index 6ebc7c10..372f6984 100644 --- a/core/backend/transcript.go +++ b/core/backend/transcript.go @@ -14,15 +14,13 @@ import ( func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) { - opts := modelOpts(backendConfig, appConfig, []model.Option{ - model.WithBackendString(model.WhisperBackend), - model.WithModel(backendConfig.Model), - model.WithContext(appConfig.Context), - model.WithThreads(uint32(*backendConfig.Threads)), - model.WithAssetDir(appConfig.AssetsDestination), - }) + if backendConfig.Backend == "" { + backendConfig.Backend = model.WhisperBackend + } - transcriptionModel, err := ml.BackendLoader(opts...) + opts := ModelOptions(backendConfig, appConfig) + + transcriptionModel, err := ml.Load(opts...) if err != nil { return nil, err } diff --git a/core/backend/tts.go b/core/backend/tts.go index 258882ae..f9be6955 100644 --- a/core/backend/tts.go +++ b/core/backend/tts.go @@ -28,16 +28,8 @@ func ModelTTS( bb = model.PiperBackend } - grpcOpts := gRPCModelOpts(backendConfig) - - opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{ - model.WithBackendString(bb), - model.WithModel(modelFile), - model.WithContext(appConfig.Context), - model.WithAssetDir(appConfig.AssetsDestination), - model.WithLoadGRPCLoadModelOpts(grpcOpts), - }) - ttsModel, err := loader.BackendLoader(opts...) + opts := ModelOptions(backendConfig, appConfig, model.WithBackendString(bb), model.WithModel(modelFile)) + ttsModel, err := loader.Load(opts...) if err != nil { return "", nil, err } diff --git a/core/cli/models.go b/core/cli/models.go index 56d13fc7..28b2944f 100644 --- a/core/cli/models.go +++ b/core/cli/models.go @@ -100,7 +100,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error { log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model") } - err = startup.InstallModels(galleries, "", mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName) + err = startup.InstallModels(galleries, mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName) if err != nil { return err } diff --git a/core/cli/run.go b/core/cli/run.go index afb7204c..3162ef14 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -6,12 +6,12 @@ import ( "strings" "time" + "github.com/mudler/LocalAI/core/application" cli_api "github.com/mudler/LocalAI/core/cli/api" cliContext "github.com/mudler/LocalAI/core/cli/context" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/http" "github.com/mudler/LocalAI/core/p2p" - "github.com/mudler/LocalAI/core/startup" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) @@ -32,7 +32,6 @@ type RunCMD struct { Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"` AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"` - RemoteLibrary string `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"` PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"` Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"` PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"` @@ -53,6 +52,7 @@ type RunCMD struct { OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"` UseSubtleKeyComparison bool `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"` DisableApiKeyRequirementForHttpGet bool `env:"LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET" default:"false" help:"If true, a valid API key is not required to issue GET requests to portions of the web ui. This should only be enabled in secure testing environments" group:"hardening"` + DisableMetricsEndpoint bool `env:"LOCALAI_DISABLE_METRICS_ENDPOINT,DISABLE_METRICS_ENDPOINT" default:"false" help:"Disable the /metrics endpoint" group:"api"` HttpGetExemptedEndpoints []string `env:"LOCALAI_HTTP_GET_EXEMPTED_ENDPOINTS" default:"^/$,^/browse/?$,^/talk/?$,^/p2p/?$,^/chat/?$,^/text2image/?$,^/tts/?$,^/static/.*$,^/swagger.*$" help:"If LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET is overriden to true, this is the list of endpoints to exempt. Only adjust this in case of a security incident or as a result of a personal security posture review" group:"hardening"` Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"` Peer2PeerDHTInterval int `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"` @@ -69,6 +69,8 @@ type RunCMD struct { WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"` Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"` DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"` + MachineTag string `env:"LOCALAI_MACHINE_TAG,MACHINE_TAG" help:"Add Machine-Tag header to each response which is useful to track the machine in the P2P network" group:"api"` + LoadToMemory []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"` } func (r *RunCMD) Run(ctx *cliContext.Context) error { @@ -87,7 +89,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval), config.WithF16(r.F16), config.WithStringGalleries(r.Galleries), - config.WithModelLibraryURL(r.RemoteLibrary), config.WithCors(r.CORS), config.WithCorsAllowOrigins(r.CORSAllowOrigins), config.WithCsrf(r.CSRF), @@ -104,6 +105,12 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { config.WithDisableApiKeyRequirementForHttpGet(r.DisableApiKeyRequirementForHttpGet), config.WithHttpGetExemptedEndpoints(r.HttpGetExemptedEndpoints), config.WithP2PNetworkID(r.Peer2PeerNetworkID), + config.WithLoadToMemory(r.LoadToMemory), + config.WithMachineTag(r.MachineTag), + } + + if r.DisableMetricsEndpoint { + opts = append(opts, config.DisableMetricsEndpoint) } token := "" @@ -179,16 +186,16 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { } if r.PreloadBackendOnly { - _, _, _, err := startup.Startup(opts...) + _, err := application.New(opts...) return err } - cl, ml, options, err := startup.Startup(opts...) + app, err := application.New(opts...) if err != nil { return fmt.Errorf("failed basic startup tasks with error %s", err.Error()) } - appHTTP, err := http.App(cl, ml, options) + appHTTP, err := http.API(app) if err != nil { log.Error().Err(err).Msg("error during HTTP App construction") return err diff --git a/core/cli/soundgeneration.go b/core/cli/soundgeneration.go index 5711b199..82bc0346 100644 --- a/core/cli/soundgeneration.go +++ b/core/cli/soundgeneration.go @@ -85,13 +85,14 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error { options := config.BackendConfig{} options.SetDefaults() + options.Backend = t.Backend var inputFile *string if t.InputFile != "" { inputFile = &t.InputFile } - filePath, _, err := backend.SoundGeneration(t.Backend, t.Model, text, + filePath, _, err := backend.SoundGeneration(t.Model, text, parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample, inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options) diff --git a/core/cli/util.go b/core/cli/util.go index b3e545d8..57b8ad9e 100644 --- a/core/cli/util.go +++ b/core/cli/util.go @@ -15,8 +15,9 @@ import ( ) type UtilCMD struct { - GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"` - HFScan HFScanCMD `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"` + GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"` + HFScan HFScanCMD `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"` + UsecaseHeuristic UsecaseHeuristicCMD `cmd:"" name:"usecase-heuristic" help:"Checks a specific model config and prints what usecase LocalAI will offer for it."` } type GGUFInfoCMD struct { @@ -30,6 +31,11 @@ type HFScanCMD struct { ToScan []string `arg:""` } +type UsecaseHeuristicCMD struct { + ConfigName string `name:"The config file to check"` + ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` +} + func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error { if u.Args == nil || len(u.Args) == 0 { return fmt.Errorf("no GGUF file provided") @@ -99,3 +105,31 @@ func (hfscmd *HFScanCMD) Run(ctx *cliContext.Context) error { return nil } } + +func (uhcmd *UsecaseHeuristicCMD) Run(ctx *cliContext.Context) error { + if len(uhcmd.ConfigName) == 0 { + log.Error().Msg("ConfigName is a required parameter") + return fmt.Errorf("config name is a required parameter") + } + if len(uhcmd.ModelsPath) == 0 { + log.Error().Msg("ModelsPath is a required parameter") + return fmt.Errorf("model path is a required parameter") + } + bcl := config.NewBackendConfigLoader(uhcmd.ModelsPath) + err := bcl.LoadBackendConfig(uhcmd.ConfigName) + if err != nil { + log.Error().Err(err).Str("ConfigName", uhcmd.ConfigName).Msg("error while loading backend") + return err + } + bc, exists := bcl.GetBackendConfig(uhcmd.ConfigName) + if !exists { + log.Error().Str("ConfigName", uhcmd.ConfigName).Msg("ConfigName not found") + } + for name, uc := range config.GetAllBackendConfigUsecases() { + if bc.HasUsecases(uc) { + log.Info().Str("Usecase", name) + } + } + log.Info().Msg("---") + return nil +} diff --git a/core/cli/worker/worker_p2p.go b/core/cli/worker/worker_p2p.go index 6275481b..aa7a8f1a 100644 --- a/core/cli/worker/worker_p2p.go +++ b/core/cli/worker/worker_p2p.go @@ -76,8 +76,14 @@ func (r *P2P) Run(ctx *cliContext.Context) error { "util", "llama-cpp-rpc-server", ) - extraArgs := strings.Split(r.ExtraLLamaCPPArgs, " ") + var extraArgs []string + + if r.ExtraLLamaCPPArgs != "" { + extraArgs = strings.Split(r.ExtraLLamaCPPArgs, " ") + } args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, extraArgs...) + log.Debug().Msgf("Starting llama-cpp-rpc-server on '%s:%d' with args: %+v (%d)", address, port, args, len(args)) + args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess) cmd := exec.Command( diff --git a/core/config/application_config.go b/core/config/application_config.go index afbf325f..2cc9b01b 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -39,10 +39,10 @@ type ApplicationConfig struct { OpaqueErrors bool UseSubtleKeyComparison bool DisableApiKeyRequirementForHttpGet bool + DisableMetrics bool HttpGetExemptedEndpoints []*regexp.Regexp DisableGalleryEndpoint bool - - ModelLibraryURL string + LoadToMemory []string Galleries []Gallery @@ -63,6 +63,8 @@ type ApplicationConfig struct { ModelsURL []string WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration + + MachineTag string } type AppOption func(*ApplicationConfig) @@ -92,6 +94,12 @@ func WithModelPath(path string) AppOption { } } +func WithMachineTag(tag string) AppOption { + return func(o *ApplicationConfig) { + o.MachineTag = tag + } +} + func WithCors(b bool) AppOption { return func(o *ApplicationConfig) { o.CORS = b @@ -116,12 +124,6 @@ func WithP2PToken(s string) AppOption { } } -func WithModelLibraryURL(url string) AppOption { - return func(o *ApplicationConfig) { - o.ModelLibraryURL = url - } -} - func WithLibPath(path string) AppOption { return func(o *ApplicationConfig) { o.LibPath = path @@ -331,6 +333,12 @@ func WithOpaqueErrors(opaque bool) AppOption { } } +func WithLoadToMemory(models []string) AppOption { + return func(o *ApplicationConfig) { + o.LoadToMemory = models + } +} + func WithSubtleKeyComparison(subtle bool) AppOption { return func(o *ApplicationConfig) { o.UseSubtleKeyComparison = subtle @@ -343,6 +351,10 @@ func WithDisableApiKeyRequirementForHttpGet(required bool) AppOption { } } +var DisableMetricsEndpoint AppOption = func(o *ApplicationConfig) { + o.DisableMetrics = true +} + func WithHttpGetExemptedEndpoints(endpoints []string) AppOption { return func(o *ApplicationConfig) { o.HttpGetExemptedEndpoints = []*regexp.Regexp{} diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 5662f1ca..2b130ec8 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -3,11 +3,13 @@ package config import ( "os" "regexp" + "slices" "strings" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/pkg/downloader" "github.com/mudler/LocalAI/pkg/functions" + "gopkg.in/yaml.v3" ) const ( @@ -19,21 +21,22 @@ type TTSConfig struct { // Voice wav path or id Voice string `yaml:"voice"` - // Vall-e-x - VallE VallE `yaml:"vall-e"` + AudioPath string `yaml:"audio_path"` } type BackendConfig struct { schema.PredictionOptions `yaml:"parameters"` Name string `yaml:"name"` - F16 *bool `yaml:"f16"` - Threads *int `yaml:"threads"` - Debug *bool `yaml:"debug"` - Roles map[string]string `yaml:"roles"` - Embeddings *bool `yaml:"embeddings"` - Backend string `yaml:"backend"` - TemplateConfig TemplateConfig `yaml:"template"` + F16 *bool `yaml:"f16"` + Threads *int `yaml:"threads"` + Debug *bool `yaml:"debug"` + Roles map[string]string `yaml:"roles"` + Embeddings *bool `yaml:"embeddings"` + Backend string `yaml:"backend"` + TemplateConfig TemplateConfig `yaml:"template"` + KnownUsecaseStrings []string `yaml:"known_usecases"` + KnownUsecases *BackendConfigUsecases `yaml:"-"` PromptStrings, InputStrings []string `yaml:"-"` InputToken [][]int `yaml:"-"` @@ -68,6 +71,8 @@ type BackendConfig struct { Description string `yaml:"description"` Usage string `yaml:"usage"` + + Options []string `yaml:"options"` } type File struct { @@ -76,10 +81,6 @@ type File struct { URI downloader.URI `yaml:"uri" json:"uri"` } -type VallE struct { - AudioPath string `yaml:"audio_path"` -} - type FeatureFlag map[string]*bool func (ff FeatureFlag) Enabled(s string) bool { @@ -93,16 +94,15 @@ type GRPC struct { } type Diffusers struct { - CUDA bool `yaml:"cuda"` - PipelineType string `yaml:"pipeline_type"` - SchedulerType string `yaml:"scheduler_type"` - EnableParameters string `yaml:"enable_parameters"` // A list of comma separated parameters to specify - CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale - IMG2IMG bool `yaml:"img2img"` // Image to Image Diffuser - ClipSkip int `yaml:"clip_skip"` // Skip every N frames - ClipModel string `yaml:"clip_model"` // Clip model to use - ClipSubFolder string `yaml:"clip_subfolder"` // Subfolder to use for clip model - ControlNet string `yaml:"control_net"` + CUDA bool `yaml:"cuda"` + PipelineType string `yaml:"pipeline_type"` + SchedulerType string `yaml:"scheduler_type"` + EnableParameters string `yaml:"enable_parameters"` // A list of comma separated parameters to specify + IMG2IMG bool `yaml:"img2img"` // Image to Image Diffuser + ClipSkip int `yaml:"clip_skip"` // Skip every N frames + ClipModel string `yaml:"clip_model"` // Clip model to use + ClipSubFolder string `yaml:"clip_subfolder"` // Subfolder to use for clip model + ControlNet string `yaml:"control_net"` } // LLMConfig is a struct that holds the configuration that are @@ -130,25 +130,30 @@ type LLMConfig struct { TrimSpace []string `yaml:"trimspace"` TrimSuffix []string `yaml:"trimsuffix"` - ContextSize *int `yaml:"context_size"` - NUMA bool `yaml:"numa"` - LoraAdapter string `yaml:"lora_adapter"` - LoraBase string `yaml:"lora_base"` - LoraScale float32 `yaml:"lora_scale"` - NoMulMatQ bool `yaml:"no_mulmatq"` - DraftModel string `yaml:"draft_model"` - NDraft int32 `yaml:"n_draft"` - Quantization string `yaml:"quantization"` - GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM - TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM - EnforceEager bool `yaml:"enforce_eager"` // vLLM - SwapSpace int `yaml:"swap_space"` // vLLM - MaxModelLen int `yaml:"max_model_len"` // vLLM - TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM - MMProj string `yaml:"mmproj"` + ContextSize *int `yaml:"context_size"` + NUMA bool `yaml:"numa"` + LoraAdapter string `yaml:"lora_adapter"` + LoraBase string `yaml:"lora_base"` + LoraAdapters []string `yaml:"lora_adapters"` + LoraScales []float32 `yaml:"lora_scales"` + LoraScale float32 `yaml:"lora_scale"` + NoMulMatQ bool `yaml:"no_mulmatq"` + DraftModel string `yaml:"draft_model"` + NDraft int32 `yaml:"n_draft"` + Quantization string `yaml:"quantization"` + LoadFormat string `yaml:"load_format"` + GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM + TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM + EnforceEager bool `yaml:"enforce_eager"` // vLLM + SwapSpace int `yaml:"swap_space"` // vLLM + MaxModelLen int `yaml:"max_model_len"` // vLLM + TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM + MMProj string `yaml:"mmproj"` - FlashAttention bool `yaml:"flash_attention"` - NoKVOffloading bool `yaml:"no_kv_offloading"` + FlashAttention bool `yaml:"flash_attention"` + NoKVOffloading bool `yaml:"no_kv_offloading"` + CacheTypeK string `yaml:"cache_type_k"` + CacheTypeV string `yaml:"cache_type_v"` RopeScaling string `yaml:"rope_scaling"` ModelType string `yaml:"type"` @@ -157,6 +162,8 @@ type LLMConfig struct { YarnAttnFactor float32 `yaml:"yarn_attn_factor"` YarnBetaFast float32 `yaml:"yarn_beta_fast"` YarnBetaSlow float32 `yaml:"yarn_beta_slow"` + + CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale } // AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend @@ -192,6 +199,21 @@ type TemplateConfig struct { // JoinChatMessagesByCharacter is a string that will be used to join chat messages together. // It defaults to \n JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"` + + Multimodal string `yaml:"multimodal"` + + JinjaTemplate bool `yaml:"jinja_template"` +} + +func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error { + type BCAlias BackendConfig + var aux BCAlias + if err := value.Decode(&aux); err != nil { + return err + } + *c = BackendConfig(aux) + c.KnownUsecases = GetUsecasesFromYAML(c.KnownUsecaseStrings) + return nil } func (c *BackendConfig) SetFunctionCallString(s string) { @@ -411,3 +433,121 @@ func (c *BackendConfig) Validate() bool { func (c *BackendConfig) HasTemplate() bool { return c.TemplateConfig.Completion != "" || c.TemplateConfig.Edit != "" || c.TemplateConfig.Chat != "" || c.TemplateConfig.ChatMessage != "" } + +type BackendConfigUsecases int + +const ( + FLAG_ANY BackendConfigUsecases = 0b000000000 + FLAG_CHAT BackendConfigUsecases = 0b000000001 + FLAG_COMPLETION BackendConfigUsecases = 0b000000010 + FLAG_EDIT BackendConfigUsecases = 0b000000100 + FLAG_EMBEDDINGS BackendConfigUsecases = 0b000001000 + FLAG_RERANK BackendConfigUsecases = 0b000010000 + FLAG_IMAGE BackendConfigUsecases = 0b000100000 + FLAG_TRANSCRIPT BackendConfigUsecases = 0b001000000 + FLAG_TTS BackendConfigUsecases = 0b010000000 + FLAG_SOUND_GENERATION BackendConfigUsecases = 0b100000000 + + // Common Subsets + FLAG_LLM BackendConfigUsecases = FLAG_CHAT & FLAG_COMPLETION & FLAG_EDIT +) + +func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases { + return map[string]BackendConfigUsecases{ + "FLAG_ANY": FLAG_ANY, + "FLAG_CHAT": FLAG_CHAT, + "FLAG_COMPLETION": FLAG_COMPLETION, + "FLAG_EDIT": FLAG_EDIT, + "FLAG_EMBEDDINGS": FLAG_EMBEDDINGS, + "FLAG_RERANK": FLAG_RERANK, + "FLAG_IMAGE": FLAG_IMAGE, + "FLAG_TRANSCRIPT": FLAG_TRANSCRIPT, + "FLAG_TTS": FLAG_TTS, + "FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION, + "FLAG_LLM": FLAG_LLM, + } +} + +func GetUsecasesFromYAML(input []string) *BackendConfigUsecases { + if len(input) == 0 { + return nil + } + result := FLAG_ANY + flags := GetAllBackendConfigUsecases() + for _, str := range input { + flag, exists := flags["FLAG_"+strings.ToUpper(str)] + if exists { + result |= flag + } + } + return &result +} + +// HasUsecases examines a BackendConfig and determines which endpoints have a chance of success. +func (c *BackendConfig) HasUsecases(u BackendConfigUsecases) bool { + if (c.KnownUsecases != nil) && ((u & *c.KnownUsecases) == u) { + return true + } + return c.GuessUsecases(u) +} + +// GuessUsecases is a **heuristic based** function, as the backend in question may not be loaded yet, and the config may not record what it's useful at. +// In its current state, this function should ideally check for properties of the config like templates, rather than the direct backend name checks for the lower half. +// This avoids the maintenance burden of updating this list for each new backend - but unfortunately, that's the best option for some services currently. +func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool { + if (u & FLAG_CHAT) == FLAG_CHAT { + if c.TemplateConfig.Chat == "" && c.TemplateConfig.ChatMessage == "" { + return false + } + } + if (u & FLAG_COMPLETION) == FLAG_COMPLETION { + if c.TemplateConfig.Completion == "" { + return false + } + } + if (u & FLAG_EDIT) == FLAG_EDIT { + if c.TemplateConfig.Edit == "" { + return false + } + } + if (u & FLAG_EMBEDDINGS) == FLAG_EMBEDDINGS { + if c.Embeddings == nil || !*c.Embeddings { + return false + } + } + if (u & FLAG_IMAGE) == FLAG_IMAGE { + imageBackends := []string{"diffusers", "stablediffusion", "stablediffusion-ggml"} + if !slices.Contains(imageBackends, c.Backend) { + return false + } + + if c.Backend == "diffusers" && c.Diffusers.PipelineType == "" { + return false + } + + } + if (u & FLAG_RERANK) == FLAG_RERANK { + if c.Backend != "rerankers" { + return false + } + } + if (u & FLAG_TRANSCRIPT) == FLAG_TRANSCRIPT { + if c.Backend != "whisper" { + return false + } + } + if (u & FLAG_TTS) == FLAG_TTS { + ttsBackends := []string{"piper", "transformers-musicgen", "parler-tts"} + if !slices.Contains(ttsBackends, c.Backend) { + return false + } + } + + if (u & FLAG_SOUND_GENERATION) == FLAG_SOUND_GENERATION { + if c.Backend != "transformers-musicgen" { + return false + } + } + + return true +} diff --git a/core/config/backend_config_filter.go b/core/config/backend_config_filter.go new file mode 100644 index 00000000..f1eb2488 --- /dev/null +++ b/core/config/backend_config_filter.go @@ -0,0 +1,35 @@ +package config + +import "regexp" + +type BackendConfigFilterFn func(string, *BackendConfig) bool + +func NoFilterFn(_ string, _ *BackendConfig) bool { return true } + +func BuildNameFilterFn(filter string) (BackendConfigFilterFn, error) { + if filter == "" { + return NoFilterFn, nil + } + rxp, err := regexp.Compile(filter) + if err != nil { + return nil, err + } + return func(name string, config *BackendConfig) bool { + if config != nil { + return rxp.MatchString(config.Name) + } + return rxp.MatchString(name) + }, nil +} + +func BuildUsecaseFilterFn(usecases BackendConfigUsecases) BackendConfigFilterFn { + if usecases == FLAG_ANY { + return NoFilterFn + } + return func(name string, config *BackendConfig) bool { + if config == nil { + return false // TODO: Potentially make this a param, for now, no known usecase to include + } + return config.HasUsecases(usecases) + } +} diff --git a/core/config/backend_config_loader.go b/core/config/backend_config_loader.go index 45fe259e..7fe77d42 100644 --- a/core/config/backend_config_loader.go +++ b/core/config/backend_config_loader.go @@ -140,7 +140,7 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath } } - cfg.SetDefaults(opts...) + cfg.SetDefaults(append(opts, ModelPath(modelPath))...) return cfg, nil } @@ -201,6 +201,26 @@ func (bcl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig { return res } +func (bcl *BackendConfigLoader) GetBackendConfigsByFilter(filter BackendConfigFilterFn) []BackendConfig { + bcl.Lock() + defer bcl.Unlock() + var res []BackendConfig + + if filter == nil { + filter = NoFilterFn + } + + for n, v := range bcl.configs { + if filter(n, &v) { + res = append(res, v) + } + } + + // TODO: I don't think this one needs to Sort on name... but we'll see what breaks. + + return res +} + func (bcl *BackendConfigLoader) RemoveBackendConfig(m string) { bcl.Lock() defer bcl.Unlock() diff --git a/core/config/backend_config_test.go b/core/config/backend_config_test.go index da245933..e6a54b89 100644 --- a/core/config/backend_config_test.go +++ b/core/config/backend_config_test.go @@ -19,12 +19,17 @@ var _ = Describe("Test cases for config related functions", func() { `backend: "../foo-bar" name: "foo" parameters: - model: "foo-bar"`) + model: "foo-bar" +known_usecases: +- chat +- COMPLETION +`) Expect(err).ToNot(HaveOccurred()) config, err := readBackendConfigFromFile(tmp.Name()) Expect(err).To(BeNil()) Expect(config).ToNot(BeNil()) Expect(config.Validate()).To(BeFalse()) + Expect(config.KnownUsecases).ToNot(BeNil()) }) It("Test Validate", func() { tmp, err := os.CreateTemp("", "config.yaml") @@ -43,9 +48,9 @@ parameters: Expect(config.Name).To(Equal("bar-baz")) Expect(config.Validate()).To(BeTrue()) - // download https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml + // download https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml httpClient := http.Client{} - resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml") + resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml") Expect(err).To(BeNil()) defer resp.Body.Close() tmp, err = os.CreateTemp("", "config.yaml") @@ -61,4 +66,99 @@ parameters: Expect(config.Validate()).To(BeTrue()) }) }) + It("Properly handles backend usecase matching", func() { + + a := BackendConfig{ + Name: "a", + } + Expect(a.HasUsecases(FLAG_ANY)).To(BeTrue()) // FLAG_ANY just means the config _exists_ essentially. + + b := BackendConfig{ + Name: "b", + Backend: "stablediffusion", + } + Expect(b.HasUsecases(FLAG_ANY)).To(BeTrue()) + Expect(b.HasUsecases(FLAG_IMAGE)).To(BeTrue()) + Expect(b.HasUsecases(FLAG_CHAT)).To(BeFalse()) + + c := BackendConfig{ + Name: "c", + Backend: "llama-cpp", + TemplateConfig: TemplateConfig{ + Chat: "chat", + }, + } + Expect(c.HasUsecases(FLAG_ANY)).To(BeTrue()) + Expect(c.HasUsecases(FLAG_IMAGE)).To(BeFalse()) + Expect(c.HasUsecases(FLAG_COMPLETION)).To(BeFalse()) + Expect(c.HasUsecases(FLAG_CHAT)).To(BeTrue()) + + d := BackendConfig{ + Name: "d", + Backend: "llama-cpp", + TemplateConfig: TemplateConfig{ + Chat: "chat", + Completion: "completion", + }, + } + Expect(d.HasUsecases(FLAG_ANY)).To(BeTrue()) + Expect(d.HasUsecases(FLAG_IMAGE)).To(BeFalse()) + Expect(d.HasUsecases(FLAG_COMPLETION)).To(BeTrue()) + Expect(d.HasUsecases(FLAG_CHAT)).To(BeTrue()) + + trueValue := true + e := BackendConfig{ + Name: "e", + Backend: "llama-cpp", + TemplateConfig: TemplateConfig{ + Completion: "completion", + }, + Embeddings: &trueValue, + } + + Expect(e.HasUsecases(FLAG_ANY)).To(BeTrue()) + Expect(e.HasUsecases(FLAG_IMAGE)).To(BeFalse()) + Expect(e.HasUsecases(FLAG_COMPLETION)).To(BeTrue()) + Expect(e.HasUsecases(FLAG_CHAT)).To(BeFalse()) + Expect(e.HasUsecases(FLAG_EMBEDDINGS)).To(BeTrue()) + + f := BackendConfig{ + Name: "f", + Backend: "piper", + } + Expect(f.HasUsecases(FLAG_ANY)).To(BeTrue()) + Expect(f.HasUsecases(FLAG_TTS)).To(BeTrue()) + Expect(f.HasUsecases(FLAG_CHAT)).To(BeFalse()) + + g := BackendConfig{ + Name: "g", + Backend: "whisper", + } + Expect(g.HasUsecases(FLAG_ANY)).To(BeTrue()) + Expect(g.HasUsecases(FLAG_TRANSCRIPT)).To(BeTrue()) + Expect(g.HasUsecases(FLAG_TTS)).To(BeFalse()) + + h := BackendConfig{ + Name: "h", + Backend: "transformers-musicgen", + } + Expect(h.HasUsecases(FLAG_ANY)).To(BeTrue()) + Expect(h.HasUsecases(FLAG_TRANSCRIPT)).To(BeFalse()) + Expect(h.HasUsecases(FLAG_TTS)).To(BeTrue()) + Expect(h.HasUsecases(FLAG_SOUND_GENERATION)).To(BeTrue()) + + knownUsecases := FLAG_CHAT | FLAG_COMPLETION + i := BackendConfig{ + Name: "i", + Backend: "whisper", + // Earlier test checks parsing, this just needs to set final values + KnownUsecases: &knownUsecases, + } + Expect(i.HasUsecases(FLAG_ANY)).To(BeTrue()) + Expect(i.HasUsecases(FLAG_TRANSCRIPT)).To(BeTrue()) + Expect(i.HasUsecases(FLAG_TTS)).To(BeFalse()) + Expect(i.HasUsecases(FLAG_COMPLETION)).To(BeTrue()) + Expect(i.HasUsecases(FLAG_CHAT)).To(BeTrue()) + + }) }) diff --git a/core/config/config_test.go b/core/config/config_test.go index 5122c907..85f18eae 100644 --- a/core/config/config_test.go +++ b/core/config/config_test.go @@ -48,5 +48,66 @@ var _ = Describe("Test cases for config related functions", func() { // config should includes whisper-1 models's api.config Expect(loadedModelNames).To(ContainElements("whisper-1")) }) + + It("Test new loadconfig", func() { + + bcl := NewBackendConfigLoader(os.Getenv("MODELS_PATH")) + err := bcl.LoadBackendConfigsFromPath(os.Getenv("MODELS_PATH")) + Expect(err).To(BeNil()) + configs := bcl.GetAllBackendConfigs() + loadedModelNames := []string{} + for _, v := range configs { + loadedModelNames = append(loadedModelNames, v.Name) + } + Expect(configs).ToNot(BeNil()) + totalModels := len(loadedModelNames) + + Expect(loadedModelNames).To(ContainElements("code-search-ada-code-001")) + + // config should includes text-embedding-ada-002 models's api.config + Expect(loadedModelNames).To(ContainElements("text-embedding-ada-002")) + + // config should includes rwkv_test models's api.config + Expect(loadedModelNames).To(ContainElements("rwkv_test")) + + // config should includes whisper-1 models's api.config + Expect(loadedModelNames).To(ContainElements("whisper-1")) + + // create a temp directory and store a temporary model + tmpdir, err := os.MkdirTemp("", "test") + Expect(err).ToNot(HaveOccurred()) + defer os.RemoveAll(tmpdir) + + // create a temporary model + model := `name: "test-model" +description: "test model" +options: +- foo +- bar +- baz +` + modelFile := tmpdir + "/test-model.yaml" + err = os.WriteFile(modelFile, []byte(model), 0644) + Expect(err).ToNot(HaveOccurred()) + + err = bcl.LoadBackendConfigsFromPath(tmpdir) + Expect(err).ToNot(HaveOccurred()) + + configs = bcl.GetAllBackendConfigs() + Expect(len(configs)).ToNot(Equal(totalModels)) + + loadedModelNames = []string{} + var testModel BackendConfig + for _, v := range configs { + loadedModelNames = append(loadedModelNames, v.Name) + if v.Name == "test-model" { + testModel = v + } + } + Expect(loadedModelNames).To(ContainElements("test-model")) + Expect(testModel.Description).To(Equal("test model")) + Expect(testModel.Options).To(ContainElements("foo", "bar", "baz")) + + }) }) }) diff --git a/core/config/guesser.go b/core/config/guesser.go index b63dd051..f5627461 100644 --- a/core/config/guesser.go +++ b/core/config/guesser.go @@ -26,14 +26,14 @@ const ( type settingsConfig struct { StopWords []string TemplateConfig TemplateConfig - RepeatPenalty float64 + RepeatPenalty float64 } // default settings to adopt with a given model family var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{ Gemma: { RepeatPenalty: 1.0, - StopWords: []string{"<|im_end|>", "", ""}, + StopWords: []string{"<|im_end|>", "", ""}, TemplateConfig: TemplateConfig{ Chat: "{{.Input }}\nmodel\n", ChatMessage: "{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}", @@ -200,6 +200,18 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) { } else { log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family") } + + if cfg.HasTemplate() { + return + } + + // identify from well known templates first, otherwise use the raw jinja template + chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template") + if found { + // try to use the jinja template + cfg.TemplateConfig.JinjaTemplate = true + cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString() + } } func identifyFamily(f *gguf.GGUFFile) familyType { diff --git a/core/gallery/gallery.go b/core/gallery/gallery.go index 6ced6244..3a60e618 100644 --- a/core/gallery/gallery.go +++ b/core/gallery/gallery.go @@ -132,7 +132,7 @@ func AvailableGalleryModels(galleries []config.Gallery, basePath string) ([]*Gal func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) { var refFile string uri := downloader.URI(url) - err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error { + err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error { refFile = string(d) if len(refFile) == 0 { return fmt.Errorf("invalid reference file at url %s: %s", url, d) @@ -156,7 +156,7 @@ func getGalleryModels(gallery config.Gallery, basePath string) ([]*GalleryModel, } uri := downloader.URI(gallery.URL) - err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error { + err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error { return yaml.Unmarshal(d, &models) }) if err != nil { diff --git a/core/gallery/models.go b/core/gallery/models.go index dec6312e..58f1963a 100644 --- a/core/gallery/models.go +++ b/core/gallery/models.go @@ -69,7 +69,7 @@ type PromptTemplate struct { func GetGalleryConfigFromURL(url string, basePath string) (Config, error) { var config Config uri := downloader.URI(url) - err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error { + err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error { return yaml.Unmarshal(d, &config) }) if err != nil { diff --git a/core/gallery/models_test.go b/core/gallery/models_test.go index 5217253f..ef4faed8 100644 --- a/core/gallery/models_test.go +++ b/core/gallery/models_test.go @@ -12,6 +12,8 @@ import ( "gopkg.in/yaml.v3" ) +const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml` + var _ = Describe("Model test", func() { Context("Downloading", func() { @@ -46,8 +48,10 @@ var _ = Describe("Model test", func() { defer os.RemoveAll(tempdir) gallery := []GalleryModel{{ - Name: "bert", - URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml", + Metadata: Metadata{ + Name: "bert", + URL: bertEmbeddingsURL, + }, }} out, err := yaml.Marshal(gallery) Expect(err).ToNot(HaveOccurred()) @@ -66,7 +70,7 @@ var _ = Describe("Model test", func() { Expect(err).ToNot(HaveOccurred()) Expect(len(models)).To(Equal(1)) Expect(models[0].Name).To(Equal("bert")) - Expect(models[0].URL).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml")) + Expect(models[0].URL).To(Equal(bertEmbeddingsURL)) Expect(models[0].Installed).To(BeFalse()) err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true) @@ -78,7 +82,7 @@ var _ = Describe("Model test", func() { content := map[string]interface{}{} err = yaml.Unmarshal(dat, &content) Expect(err).ToNot(HaveOccurred()) - Expect(content["backend"]).To(Equal("bert-embeddings")) + Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this")) models, err = AvailableGalleryModels(galleries, tempdir) Expect(err).ToNot(HaveOccurred()) diff --git a/core/gallery/request.go b/core/gallery/request.go index eec764c1..72d078a1 100644 --- a/core/gallery/request.go +++ b/core/gallery/request.go @@ -11,6 +11,14 @@ import ( // It is used to install the model by resolving the URL and downloading the files. // The other fields are used to override the configuration of the model. type GalleryModel struct { + Metadata `json:",inline" yaml:",inline"` + // config_file is read in the situation where URL is blank - and therefore this is a base config. + ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"` + // Overrides are used to override the configuration of the model located at URL + Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"` +} + +type Metadata struct { URL string `json:"url,omitempty" yaml:"url,omitempty"` Name string `json:"name,omitempty" yaml:"name,omitempty"` Description string `json:"description,omitempty" yaml:"description,omitempty"` @@ -18,10 +26,6 @@ type GalleryModel struct { URLs []string `json:"urls,omitempty" yaml:"urls,omitempty"` Icon string `json:"icon,omitempty" yaml:"icon,omitempty"` Tags []string `json:"tags,omitempty" yaml:"tags,omitempty"` - // config_file is read in the situation where URL is blank - and therefore this is a base config. - ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"` - // Overrides are used to override the configuration of the model located at URL - Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"` // AdditionalFiles are used to add additional files to the model AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"` // Gallery is a reference to the gallery which contains the model diff --git a/core/gallery/request_test.go b/core/gallery/request_test.go index 23281cc6..ed07f474 100644 --- a/core/gallery/request_test.go +++ b/core/gallery/request_test.go @@ -9,7 +9,11 @@ import ( var _ = Describe("Gallery API tests", func() { Context("requests", func() { It("parses github with a branch", func() { - req := GalleryModel{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main"} + req := GalleryModel{ + Metadata: Metadata{ + URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main", + }, + } e, err := GetGalleryConfigFromURL(req.URL, "") Expect(err).ToNot(HaveOccurred()) Expect(e.Name).To(Equal("gpt4all-j")) diff --git a/core/http/app.go b/core/http/app.go index fa9cd866..d1e80f8d 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -14,10 +14,9 @@ import ( "github.com/mudler/LocalAI/core/http/middleware" "github.com/mudler/LocalAI/core/http/routes" - "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/application" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/services" - "github.com/mudler/LocalAI/pkg/model" "github.com/gofiber/contrib/fiberzerolog" "github.com/gofiber/fiber/v2" @@ -31,24 +30,6 @@ import ( "github.com/rs/zerolog/log" ) -func readAuthHeader(c *fiber.Ctx) string { - authHeader := c.Get("Authorization") - - // elevenlabs - xApiKey := c.Get("xi-api-key") - if xApiKey != "" { - authHeader = "Bearer " + xApiKey - } - - // anthropic - xApiKey = c.Get("x-api-key") - if xApiKey != "" { - authHeader = "Bearer " + xApiKey - } - - return authHeader -} - // Embed a directory // //go:embed static/* @@ -67,18 +48,18 @@ var embedDirStatic embed.FS // @in header // @name Authorization -func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) { +func API(application *application.Application) (*fiber.App, error) { fiberCfg := fiber.Config{ Views: renderEngine(), - BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB + BodyLimit: application.ApplicationConfig().UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB // We disable the Fiber startup message as it does not conform to structured logging. // We register a startup log line with connection information in the OnListen hook to keep things user friendly though DisableStartupMessage: true, // Override default error handler } - if !appConfig.OpaqueErrors { + if !application.ApplicationConfig().OpaqueErrors { // Normally, return errors as JSON responses fiberCfg.ErrorHandler = func(ctx *fiber.Ctx, err error) error { // Status code defaults to 500 @@ -104,9 +85,19 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi } } - app := fiber.New(fiberCfg) + router := fiber.New(fiberCfg) - app.Hooks().OnListen(func(listenData fiber.ListenData) error { + router.Use(middleware.StripPathPrefix()) + + if application.ApplicationConfig().MachineTag != "" { + router.Use(func(c *fiber.Ctx) error { + c.Response().Header.Set("Machine-Tag", application.ApplicationConfig().MachineTag) + + return c.Next() + }) + } + + router.Hooks().OnListen(func(listenData fiber.ListenData) error { scheme := "http" if listenData.TLS { scheme = "https" @@ -117,77 +108,82 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi // Have Fiber use zerolog like the rest of the application rather than it's built-in logger logger := log.Logger - app.Use(fiberzerolog.New(fiberzerolog.Config{ + router.Use(fiberzerolog.New(fiberzerolog.Config{ Logger: &logger, })) // Default middleware config - if !appConfig.Debug { - app.Use(recover.New()) + if !application.ApplicationConfig().Debug { + router.Use(recover.New()) } - metricsService, err := services.NewLocalAIMetricsService() - if err != nil { - return nil, err - } + if !application.ApplicationConfig().DisableMetrics { + metricsService, err := services.NewLocalAIMetricsService() + if err != nil { + return nil, err + } - if metricsService != nil { - app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService)) - app.Hooks().OnShutdown(func() error { - return metricsService.Shutdown() - }) - } + if metricsService != nil { + router.Use(localai.LocalAIMetricsAPIMiddleware(metricsService)) + router.Hooks().OnShutdown(func() error { + return metricsService.Shutdown() + }) + } - kaConfig, err := middleware.GetKeyAuthConfig(appConfig) + } + // Health Checks should always be exempt from auth, so register these first + routes.HealthRoutes(router) + + kaConfig, err := middleware.GetKeyAuthConfig(application.ApplicationConfig()) if err != nil || kaConfig == nil { return nil, fmt.Errorf("failed to create key auth config: %w", err) } // Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration - app.Use(v2keyauth.New(*kaConfig)) + router.Use(v2keyauth.New(*kaConfig)) - if appConfig.CORS { + if application.ApplicationConfig().CORS { var c func(ctx *fiber.Ctx) error - if appConfig.CORSAllowOrigins == "" { + if application.ApplicationConfig().CORSAllowOrigins == "" { c = cors.New() } else { - c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins}) + c = cors.New(cors.Config{AllowOrigins: application.ApplicationConfig().CORSAllowOrigins}) } - app.Use(c) + router.Use(c) } - if appConfig.CSRF { + if application.ApplicationConfig().CSRF { log.Debug().Msg("Enabling CSRF middleware. Tokens are now required for state-modifying requests") - app.Use(csrf.New()) + router.Use(csrf.New()) } // Load config jsons - utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles) - utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) - utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles) + utils.LoadConfig(application.ApplicationConfig().UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles) + utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) + utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles) - galleryService := services.NewGalleryService(appConfig) - galleryService.Start(appConfig.Context, cl) + galleryService := services.NewGalleryService(application.ApplicationConfig()) + galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader()) - routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig) - routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService) - routes.RegisterOpenAIRoutes(app, cl, ml, appConfig) - if !appConfig.DisableWebUI { - routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService) + routes.RegisterElevenLabsRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()) + routes.RegisterLocalAIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService) + routes.RegisterOpenAIRoutes(router, application) + if !application.ApplicationConfig().DisableWebUI { + routes.RegisterUIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService) } - routes.RegisterJINARoutes(app, cl, ml, appConfig) + routes.RegisterJINARoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()) httpFS := http.FS(embedDirStatic) - app.Use(favicon.New(favicon.Config{ + router.Use(favicon.New(favicon.Config{ URL: "/favicon.ico", FileSystem: httpFS, File: "static/favicon.ico", })) - app.Use("/static", filesystem.New(filesystem.Config{ + router.Use("/static", filesystem.New(filesystem.Config{ Root: httpFS, PathPrefix: "static", Browse: true, @@ -195,7 +191,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi // Define a custom 404 handler // Note: keep this at the bottom! - app.Use(notFoundHandler) + router.Use(notFoundHandler) - return app, nil + return router, nil } diff --git a/core/http/app_test.go b/core/http/app_test.go index 86fe7fdd..ca7a2eaa 100644 --- a/core/http/app_test.go +++ b/core/http/app_test.go @@ -5,7 +5,6 @@ import ( "context" "embed" "encoding/json" - "errors" "fmt" "io" "net/http" @@ -13,15 +12,14 @@ import ( "path/filepath" "runtime" + "github.com/mudler/LocalAI/core/application" "github.com/mudler/LocalAI/core/config" . "github.com/mudler/LocalAI/core/http" "github.com/mudler/LocalAI/core/schema" - "github.com/mudler/LocalAI/core/startup" "github.com/gofiber/fiber/v2" "github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/pkg/downloader" - "github.com/mudler/LocalAI/pkg/model" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "gopkg.in/yaml.v3" @@ -31,6 +29,9 @@ import ( "github.com/sashabaranov/go-openai/jsonschema" ) +const apiKey = "joshua" +const bearerKey = "Bearer " + apiKey + const testPrompt = `### System: You are an AI assistant that follows instruction extremely well. Help as much as you can. @@ -50,11 +51,19 @@ type modelApplyRequest struct { func getModelStatus(url string) (response map[string]interface{}) { // Create the HTTP request - resp, err := http.Get(url) + req, err := http.NewRequest("GET", url, nil) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", bearerKey) if err != nil { fmt.Println("Error creating request:", err) return } + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + fmt.Println("Error sending request:", err) + return + } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) @@ -72,14 +81,15 @@ func getModelStatus(url string) (response map[string]interface{}) { return } -func getModels(url string) (response []gallery.GalleryModel) { +func getModels(url string) ([]gallery.GalleryModel, error) { + response := []gallery.GalleryModel{} uri := downloader.URI(url) // TODO: No tests currently seem to exercise file:// urls. Fix? - uri.DownloadAndUnmarshal("", func(url string, i []byte) error { + err := uri.DownloadWithAuthorizationAndCallback("", bearerKey, func(url string, i []byte) error { // Unmarshal YAML data into a struct return json.Unmarshal(i, &response) }) - return + return response, err } func postModelApplyRequest(url string, request modelApplyRequest) (response map[string]interface{}) { @@ -101,6 +111,7 @@ func postModelApplyRequest(url string, request modelApplyRequest) (response map[ return } req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", bearerKey) // Make the request client := &http.Client{} @@ -140,6 +151,7 @@ func postRequestJSON[B any](url string, bodyJson *B) error { } req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", bearerKey) client := &http.Client{} resp, err := client.Do(req) @@ -175,6 +187,7 @@ func postRequestResponseJSON[B1 any, B2 any](url string, reqJson *B1, respJson * } req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", bearerKey) client := &http.Client{} resp, err := client.Do(req) @@ -195,6 +208,62 @@ func postRequestResponseJSON[B1 any, B2 any](url string, reqJson *B1, respJson * return json.Unmarshal(body, respJson) } +func postInvalidRequest(url string) (error, int) { + + req, err := http.NewRequest("POST", url, bytes.NewBufferString("invalid request")) + if err != nil { + return err, -1 + } + + req.Header.Set("Content-Type", "application/json") + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return err, -1 + } + + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return err, -1 + } + + if resp.StatusCode < 200 || resp.StatusCode >= 400 { + return fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)), resp.StatusCode + } + + return nil, resp.StatusCode +} + +func getRequest(url string, header http.Header) (error, int, []byte) { + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return err, -1, nil + } + + req.Header = header + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return err, -1, nil + } + + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return err, -1, nil + } + + return nil, resp.StatusCode, body +} + +const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml` + //go:embed backend-assets/* var backendAssets embed.FS @@ -207,9 +276,6 @@ var _ = Describe("API test", func() { var cancel context.CancelFunc var tmpdir string var modelDir string - var bcl *config.BackendConfigLoader - var ml *model.ModelLoader - var applicationConfig *config.ApplicationConfig commonOpts := []config.AppOption{ config.WithDebug(true), @@ -233,14 +299,18 @@ var _ = Describe("API test", func() { g := []gallery.GalleryModel{ { - Name: "bert", - URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml", + Metadata: gallery.Metadata{ + Name: "bert", + URL: bertEmbeddingsURL, + }, }, { - Name: "bert2", - URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml", - Overrides: map[string]interface{}{"foo": "bar"}, - AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"}}, + Metadata: gallery.Metadata{ + Name: "bert2", + URL: bertEmbeddingsURL, + AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}}, + }, + Overrides: map[string]interface{}{"foo": "bar"}, }, } out, err := yaml.Marshal(g) @@ -255,21 +325,22 @@ var _ = Describe("API test", func() { }, } - bcl, ml, applicationConfig, err = startup.Startup( + application, err := application.New( append(commonOpts, config.WithContext(c), config.WithGalleries(galleries), config.WithModelPath(modelDir), + config.WithApiKeys([]string{apiKey}), config.WithBackendAssets(backendAssets), config.WithBackendAssetsOutput(backendAssetsDir))...) Expect(err).ToNot(HaveOccurred()) - app, err = App(bcl, ml, applicationConfig) + app, err = API(application) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") - defaultConfig := openai.DefaultConfig("") + defaultConfig := openai.DefaultConfig(apiKey) defaultConfig.BaseURL = "http://127.0.0.1:9090/v1" client2 = openaigo.NewClient("") @@ -295,10 +366,46 @@ var _ = Describe("API test", func() { Expect(err).To(HaveOccurred()) }) + Context("Auth Tests", func() { + It("Should fail if the api key is missing", func() { + err, sc := postInvalidRequest("http://127.0.0.1:9090/models/available") + Expect(err).ToNot(BeNil()) + Expect(sc).To(Equal(401)) + }) + }) + + Context("URL routing Tests", func() { + It("Should support reverse-proxy when unauthenticated", func() { + + err, sc, body := getRequest("http://127.0.0.1:9090/myprefix/", http.Header{ + "X-Forwarded-Proto": {"https"}, + "X-Forwarded-Host": {"example.org"}, + "X-Forwarded-Prefix": {"/myprefix/"}, + }) + Expect(err).To(BeNil(), "error") + Expect(sc).To(Equal(401), "status code") + Expect(string(body)).To(ContainSubstring(``), "body") + }) + + It("Should support reverse-proxy when authenticated", func() { + + err, sc, body := getRequest("http://127.0.0.1:9090/myprefix/", http.Header{ + "Authorization": {bearerKey}, + "X-Forwarded-Proto": {"https"}, + "X-Forwarded-Host": {"example.org"}, + "X-Forwarded-Prefix": {"/myprefix/"}, + }) + Expect(err).To(BeNil(), "error") + Expect(sc).To(Equal(200), "status code") + Expect(string(body)).To(ContainSubstring(``), "body") + }) + }) + Context("Applying models", func() { It("applies models from a gallery", func() { - models := getModels("http://127.0.0.1:9090/models/available") + models, err := getModels("http://127.0.0.1:9090/models/available") + Expect(err).To(BeNil()) Expect(len(models)).To(Equal(2), fmt.Sprint(models)) Expect(models[0].Installed).To(BeFalse(), fmt.Sprint(models)) Expect(models[1].Installed).To(BeFalse(), fmt.Sprint(models)) @@ -328,10 +435,11 @@ var _ = Describe("API test", func() { content := map[string]interface{}{} err = yaml.Unmarshal(dat, &content) Expect(err).ToNot(HaveOccurred()) - Expect(content["backend"]).To(Equal("bert-embeddings")) + Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this")) Expect(content["foo"]).To(Equal("bar")) - models = getModels("http://127.0.0.1:9090/models/available") + models, err = getModels("http://127.0.0.1:9090/models/available") + Expect(err).To(BeNil()) Expect(len(models)).To(Equal(2), fmt.Sprint(models)) Expect(models[0].Name).To(Or(Equal("bert"), Equal("bert2"))) Expect(models[1].Name).To(Or(Equal("bert"), Equal("bert2"))) @@ -346,7 +454,7 @@ var _ = Describe("API test", func() { It("overrides models", func() { response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ - URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml", + URL: bertEmbeddingsURL, Name: "bert", Overrides: map[string]interface{}{ "backend": "llama", @@ -372,7 +480,7 @@ var _ = Describe("API test", func() { }) It("apply models from config", func() { response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ - ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml", + ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml", }) Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) @@ -382,7 +490,7 @@ var _ = Describe("API test", func() { Eventually(func() bool { response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid) return response["processed"].(bool) - }, "360s", "10s").Should(Equal(true)) + }, "900s", "10s").Should(Equal(true)) Eventually(func() []string { models, _ := client.ListModels(context.TODO()) @@ -395,7 +503,7 @@ var _ = Describe("API test", func() { }) It("apply models without overrides", func() { response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ - URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml", + URL: bertEmbeddingsURL, Name: "bert", Overrides: map[string]interface{}{}, }) @@ -415,7 +523,7 @@ var _ = Describe("API test", func() { content := map[string]interface{}{} err = yaml.Unmarshal(dat, &content) Expect(err).ToNot(HaveOccurred()) - Expect(content["backend"]).To(Equal("bert-embeddings")) + Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this")) }) It("runs openllama(llama-ggml backend)", Label("llama"), func() { @@ -483,7 +591,7 @@ var _ = Describe("API test", func() { var res map[string]string err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res) Expect(err).ToNot(HaveOccurred()) - Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res)) + Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res)) Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res)) Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason)) @@ -496,7 +604,7 @@ var _ = Describe("API test", func() { modelName := "hermes-2-pro-mistral" response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ - ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml", + ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml", }) Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) @@ -506,7 +614,7 @@ var _ = Describe("API test", func() { Eventually(func() bool { response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid) return response["processed"].(bool) - }, "360s", "10s").Should(Equal(true)) + }, "900s", "10s").Should(Equal(true)) By("testing chat") resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: modelName, Messages: []openai.ChatCompletionMessage{ @@ -583,9 +691,13 @@ var _ = Describe("API test", func() { Name: "model-gallery", URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/index.yaml", }, + { + Name: "localai", + URL: "https://raw.githubusercontent.com/mudler/LocalAI/refs/heads/master/gallery/index.yaml", + }, } - bcl, ml, applicationConfig, err = startup.Startup( + application, err := application.New( append(commonOpts, config.WithContext(c), config.WithAudioDir(tmpdir), @@ -596,7 +708,7 @@ var _ = Describe("API test", func() { config.WithBackendAssetsOutput(tmpdir))..., ) Expect(err).ToNot(HaveOccurred()) - app, err = App(bcl, ml, applicationConfig) + app, err = API(application) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -652,7 +764,7 @@ var _ = Describe("API test", func() { Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp)) Expect(resp.StatusCode).To(Equal(200), fmt.Sprint(string(dat))) - Expect(resp.Header.Get("Content-Type")).To(Equal("audio/x-wav")) + Expect(resp.Header.Get("Content-Type")).To(Or(Equal("audio/x-wav"), Equal("audio/vnd.wave"))) }) It("installs and is capable to generate images", Label("stablediffusion"), func() { if runtime.GOOS != "linux" { @@ -660,10 +772,8 @@ var _ = Describe("API test", func() { } response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ - ID: "model-gallery@stablediffusion", - Overrides: map[string]interface{}{ - "parameters": map[string]interface{}{"model": "stablediffusion_assets"}, - }, + ID: "localai@sd-1.5-ggml", + Name: "stablediffusion", }) Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) @@ -674,14 +784,14 @@ var _ = Describe("API test", func() { response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid) fmt.Println(response) return response["processed"].(bool) - }, "360s", "10s").Should(Equal(true)) + }, "1200s", "10s").Should(Equal(true)) resp, err := http.Post( "http://127.0.0.1:9090/v1/images/generations", "application/json", bytes.NewBuffer([]byte(`{ - "prompt": "floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful|((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text", - "mode": 2, "seed":9000, + "prompt": "a lovely cat", + "step": 1, "seed":9000, "size": "256x256", "n":2}`))) // The response should contain an URL Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp)) @@ -690,6 +800,7 @@ var _ = Describe("API test", func() { imgUrlResp := &schema.OpenAIResponse{} err = json.Unmarshal(dat, imgUrlResp) + Expect(err).ToNot(HaveOccurred(), fmt.Sprint(dat)) Expect(imgUrlResp.Data).ToNot(Or(BeNil(), BeZero())) imgUrl := imgUrlResp.Data[0].URL Expect(imgUrl).To(ContainSubstring("http://127.0.0.1:9090/"), imgUrl) @@ -716,14 +827,14 @@ var _ = Describe("API test", func() { var err error - bcl, ml, applicationConfig, err = startup.Startup( + application, err := application.New( append(commonOpts, - config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")), + config.WithExternalBackend("transformers", os.Getenv("HUGGINGFACE_GRPC")), config.WithContext(c), config.WithModelPath(modelPath), )...) Expect(err).ToNot(HaveOccurred()) - app, err = App(bcl, ml, applicationConfig) + app, err = API(application) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") @@ -750,7 +861,7 @@ var _ = Describe("API test", func() { It("returns the models list", func() { models, err := client.ListModels(context.TODO()) Expect(err).ToNot(HaveOccurred()) - Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8? + Expect(len(models.Models)).To(Equal(7)) // If "config.yaml" should be included, this should be 8? }) It("can generate completions via ggml", func() { resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt}) @@ -810,8 +921,8 @@ var _ = Describe("API test", func() { }, ) Expect(err).ToNot(HaveOccurred(), err) - Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384)) - Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384)) + Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 2048)) + Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 2048)) sunEmbedding := resp.Data[0].Embedding resp2, err := client.CreateEmbeddings( @@ -855,71 +966,6 @@ var _ = Describe("API test", func() { }) }) - Context("backends", func() { - It("runs rwkv completion", func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } - resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,"}) - Expect(err).ToNot(HaveOccurred()) - Expect(len(resp.Choices) > 0).To(BeTrue()) - Expect(resp.Choices[0].Text).To(ContainSubstring("five")) - - stream, err := client.CreateCompletionStream(context.TODO(), openai.CompletionRequest{ - Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,", Stream: true, - }) - Expect(err).ToNot(HaveOccurred()) - defer stream.Close() - - tokens := 0 - text := "" - for { - response, err := stream.Recv() - if errors.Is(err, io.EOF) { - break - } - - Expect(err).ToNot(HaveOccurred()) - text += response.Choices[0].Text - tokens++ - } - Expect(text).ToNot(BeEmpty()) - Expect(text).To(ContainSubstring("five")) - Expect(tokens).ToNot(Or(Equal(1), Equal(0))) - }) - It("runs rwkv chat completion", func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } - resp, err := client.CreateChatCompletion(context.TODO(), - openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}}) - Expect(err).ToNot(HaveOccurred()) - Expect(len(resp.Choices) > 0).To(BeTrue()) - Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("Sure"), ContainSubstring("five"))) - - stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}}) - Expect(err).ToNot(HaveOccurred()) - defer stream.Close() - - tokens := 0 - text := "" - for { - response, err := stream.Recv() - if errors.Is(err, io.EOF) { - break - } - - Expect(err).ToNot(HaveOccurred()) - text += response.Choices[0].Delta.Content - tokens++ - } - Expect(text).ToNot(BeEmpty()) - Expect(text).To(Or(ContainSubstring("Sure"), ContainSubstring("five"))) - - Expect(tokens).ToNot(Or(Equal(1), Equal(0))) - }) - }) - // See tests/integration/stores_test Context("Stores", Label("stores"), func() { @@ -999,14 +1045,14 @@ var _ = Describe("API test", func() { c, cancel = context.WithCancel(context.Background()) var err error - bcl, ml, applicationConfig, err = startup.Startup( + application, err := application.New( append(commonOpts, config.WithContext(c), config.WithModelPath(modelPath), config.WithConfigFile(os.Getenv("CONFIG_FILE")))..., ) Expect(err).ToNot(HaveOccurred()) - app, err = App(bcl, ml, applicationConfig) + app, err = API(application) Expect(err).ToNot(HaveOccurred()) go app.Listen("127.0.0.1:9090") diff --git a/core/http/ctx/fiber.go b/core/http/ctx/fiber.go index 94059847..254f0704 100644 --- a/core/http/ctx/fiber.go +++ b/core/http/ctx/fiber.go @@ -19,14 +19,16 @@ func ModelFromContext(ctx *fiber.Ctx, cl *config.BackendConfigLoader, loader *mo if ctx.Params("model") != "" { modelInput = ctx.Params("model") } - + if ctx.Query("model") != "" { + modelInput = ctx.Query("model") + } // Set model from bearer token, if available - bearer := strings.TrimLeft(ctx.Get("authorization"), "Bearer ") + bearer := strings.TrimLeft(ctx.Get("authorization"), "Bear ") // Reduced duplicate characters of Bearer bearerExists := bearer != "" && loader.ExistsInModelPath(bearer) // If no model was specified, take the first available if modelInput == "" && !bearerExists && firstModel { - models, _ := services.ListModels(cl, loader, "", true) + models, _ := services.ListModels(cl, loader, config.NoFilterFn, services.SKIP_IF_CONFIGURED) if len(models) > 0 { modelInput = models[0] log.Debug().Msgf("No model specified, using: %s", modelInput) diff --git a/core/http/elements/buttons.go b/core/http/elements/buttons.go new file mode 100644 index 00000000..2364a0b3 --- /dev/null +++ b/core/http/elements/buttons.go @@ -0,0 +1,97 @@ +package elements + +import ( + "strings" + + "github.com/chasefleming/elem-go" + "github.com/chasefleming/elem-go/attrs" + "github.com/mudler/LocalAI/core/gallery" +) + +func installButton(galleryName string) elem.Node { + return elem.Button( + attrs.Props{ + "data-twe-ripple-init": "", + "data-twe-ripple-color": "light", + "class": "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + "hx-swap": "outerHTML", + // post the Model ID as param + "hx-post": "browse/install/model/" + galleryName, + }, + elem.I( + attrs.Props{ + "class": "fa-solid fa-download pr-2", + }, + ), + elem.Text("Install"), + ) +} + +func reInstallButton(galleryName string) elem.Node { + return elem.Button( + attrs.Props{ + "data-twe-ripple-init": "", + "data-twe-ripple-color": "light", + "class": "float-right inline-block rounded bg-primary ml-2 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + "hx-target": "#action-div-" + dropBadChars(galleryName), + "hx-swap": "outerHTML", + // post the Model ID as param + "hx-post": "browse/install/model/" + galleryName, + }, + elem.I( + attrs.Props{ + "class": "fa-solid fa-arrow-rotate-right pr-2", + }, + ), + elem.Text("Reinstall"), + ) +} + +func infoButton(m *gallery.GalleryModel) elem.Node { + return elem.Button( + attrs.Props{ + "data-twe-ripple-init": "", + "data-twe-ripple-color": "light", + "class": "float-left inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + "data-modal-target": modalName(m), + "data-modal-toggle": modalName(m), + }, + elem.P( + attrs.Props{ + "class": "flex items-center", + }, + elem.I( + attrs.Props{ + "class": "fas fa-info-circle pr-2", + }, + ), + elem.Text("Info"), + ), + ) +} + +func deleteButton(galleryID string) elem.Node { + return elem.Button( + attrs.Props{ + "data-twe-ripple-init": "", + "data-twe-ripple-color": "light", + "hx-confirm": "Are you sure you wish to delete the model?", + "class": "float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", + "hx-target": "#action-div-" + dropBadChars(galleryID), + "hx-swap": "outerHTML", + // post the Model ID as param + "hx-post": "browse/delete/model/" + galleryID, + }, + elem.I( + attrs.Props{ + "class": "fa-solid fa-cancel pr-2", + }, + ), + elem.Text("Delete"), + ) +} + +// Javascript/HTMX doesn't like weird IDs +func dropBadChars(s string) string { + return strings.ReplaceAll(s, "@", "__") +} diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go index 91a12310..5ab68508 100644 --- a/core/http/elements/gallery.go +++ b/core/http/elements/gallery.go @@ -2,12 +2,11 @@ package elements import ( "fmt" - "strings" "github.com/chasefleming/elem-go" "github.com/chasefleming/elem-go/attrs" + "github.com/microcosm-cc/bluemonday" "github.com/mudler/LocalAI/core/gallery" - "github.com/mudler/LocalAI/core/p2p" "github.com/mudler/LocalAI/core/services" ) @@ -15,231 +14,6 @@ const ( noImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg" ) -func renderElements(n []elem.Node) string { - render := "" - for _, r := range n { - render += r.Render() - } - return render -} - -func DoneProgress(galleryID, text string, showDelete bool) string { - var modelName = galleryID - // Split by @ and grab the name - if strings.Contains(galleryID, "@") { - modelName = strings.Split(galleryID, "@")[1] - } - - return elem.Div( - attrs.Props{ - "id": "action-div-" + dropBadChars(galleryID), - }, - elem.H3( - attrs.Props{ - "role": "status", - "id": "pblabel", - "tabindex": "-1", - "autofocus": "", - }, - elem.Text(text), - ), - elem.If(showDelete, deleteButton(galleryID, modelName), reInstallButton(galleryID)), - ).Render() -} - -func ErrorProgress(err, galleryName string) string { - return elem.Div( - attrs.Props{}, - elem.H3( - attrs.Props{ - "role": "status", - "id": "pblabel", - "tabindex": "-1", - "autofocus": "", - }, - elem.Text("Error "+err), - ), - installButton(galleryName), - ).Render() -} - -func ProgressBar(progress string) string { - return elem.Div(attrs.Props{ - "class": "progress", - "role": "progressbar", - "aria-valuemin": "0", - "aria-valuemax": "100", - "aria-valuenow": "0", - "aria-labelledby": "pblabel", - }, - elem.Div(attrs.Props{ - "id": "pb", - "class": "progress-bar", - "style": "width:" + progress + "%", - }), - ).Render() -} - -func P2PNodeStats(nodes []p2p.NodeData) string { - /* -
-

Total Workers Detected: {{ len .Nodes }}

- {{ $online := 0 }} - {{ range .Nodes }} - {{ if .IsOnline }} - {{ $online = add $online 1 }} - {{ end }} - {{ end }} -

Total Online Workers: {{$online}}

-
- */ - - online := 0 - for _, n := range nodes { - if n.IsOnline() { - online++ - } - } - - class := "text-green-500" - if online == 0 { - class = "text-red-500" - } - /* - - */ - circle := elem.I(attrs.Props{ - "class": "fas fa-circle animate-pulse " + class + " ml-2 mr-1", - }) - nodesElements := []elem.Node{ - elem.Span( - attrs.Props{ - "class": class, - }, - circle, - elem.Text(fmt.Sprintf("%d", online)), - ), - elem.Span( - attrs.Props{ - "class": "text-gray-200", - }, - elem.Text(fmt.Sprintf("/%d", len(nodes))), - ), - } - - return renderElements(nodesElements) -} - -func P2PNodeBoxes(nodes []p2p.NodeData) string { - /* -
-
- - {{.ID}} -
-

- Status: - - - {{ if .IsOnline }}Online{{ else }}Offline{{ end }} - -

-
- */ - - nodesElements := []elem.Node{} - - for _, n := range nodes { - - nodesElements = append(nodesElements, - elem.Div( - attrs.Props{ - "class": "bg-gray-700 p-6 rounded-lg shadow-lg text-left", - }, - elem.P( - attrs.Props{ - "class": "text-sm text-gray-400 mt-2 flex", - }, - elem.I( - attrs.Props{ - "class": "fas fa-desktop text-gray-400 mr-2", - }, - ), - elem.Text("Name: "), - elem.Span( - attrs.Props{ - "class": "text-gray-200 font-semibold ml-2 mr-1", - }, - elem.Text(n.ID), - ), - elem.Text("Status: "), - elem.If( - n.IsOnline(), - elem.I( - attrs.Props{ - "class": "fas fa-circle animate-pulse text-green-500 ml-2 mr-1", - }, - ), - elem.I( - attrs.Props{ - "class": "fas fa-circle animate-pulse text-red-500 ml-2 mr-1", - }, - ), - ), - elem.If( - n.IsOnline(), - elem.Span( - attrs.Props{ - "class": "text-green-400", - }, - - elem.Text("Online"), - ), - elem.Span( - attrs.Props{ - "class": "text-red-400", - }, - elem.Text("Offline"), - ), - ), - ), - )) - } - - return renderElements(nodesElements) -} - -func StartProgressBar(uid, progress, text string) string { - if progress == "" { - progress = "0" - } - return elem.Div( - attrs.Props{ - "hx-trigger": "done", - "hx-get": "/browse/job/" + uid, - "hx-swap": "outerHTML", - "hx-target": "this", - }, - elem.H3( - attrs.Props{ - "role": "status", - "id": "pblabel", - "tabindex": "-1", - "autofocus": "", - }, - elem.Text(text), - elem.Div(attrs.Props{ - "hx-get": "/browse/job/progress/" + uid, - "hx-trigger": "every 600ms", - "hx-target": "this", - "hx-swap": "innerHTML", - }, - elem.Raw(ProgressBar(progress)), - ), - ), - ).Render() -} - func cardSpan(text, icon string) elem.Node { return elem.Span( attrs.Props{ @@ -249,9 +23,7 @@ func cardSpan(text, icon string) elem.Node { "class": icon + " pr-2", }), - elem.Text(text), - - //elem.Text(text), + elem.Text(bluemonday.StrictPolicy().Sanitize(text)), ) } @@ -269,14 +41,13 @@ func searchableElement(text, icon string) elem.Node { attrs.Props{ "class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2", }, - elem.A( attrs.Props{ // "name": "search", // "value": text, //"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2", "href": "#!", - "hx-post": "/browse/search/models", + "hx-post": "browse/search/models", "hx-target": "#search-results", // TODO: this doesn't work // "hx-vals": `{ \"search\": \"` + text + `\" }`, @@ -285,15 +56,14 @@ func searchableElement(text, icon string) elem.Node { elem.I(attrs.Props{ "class": icon + " pr-2", }), - elem.Text(text), + elem.Text(bluemonday.StrictPolicy().Sanitize(text)), ), ), - - //elem.Text(text), ) } -func link(text, url string) elem.Node { +/* +func buttonLink(text, url string) elem.Node { return elem.A( attrs.Props{ "class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2", @@ -303,166 +73,258 @@ func link(text, url string) elem.Node { elem.I(attrs.Props{ "class": "fas fa-link pr-2", }), - elem.Text(text), - ) -} -func installButton(galleryName string) elem.Node { - return elem.Button( - attrs.Props{ - "data-twe-ripple-init": "", - "data-twe-ripple-color": "light", - "class": "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", - "hx-swap": "outerHTML", - // post the Model ID as param - "hx-post": "/browse/install/model/" + galleryName, - }, - elem.I( - attrs.Props{ - "class": "fa-solid fa-download pr-2", - }, - ), - elem.Text("Install"), + elem.Text(bluemonday.StrictPolicy().Sanitize(text)), ) } +*/ -func reInstallButton(galleryName string) elem.Node { - return elem.Button( +func link(text, url string) elem.Node { + return elem.A( attrs.Props{ - "data-twe-ripple-init": "", - "data-twe-ripple-color": "light", - "class": "float-right inline-block rounded bg-primary ml-2 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", - "hx-target": "#action-div-" + dropBadChars(galleryName), - "hx-swap": "outerHTML", - // post the Model ID as param - "hx-post": "/browse/install/model/" + galleryName, + "class": "text-base leading-relaxed text-gray-500 dark:text-gray-400", + "href": url, + "target": "_blank", }, - elem.I( - attrs.Props{ - "class": "fa-solid fa-arrow-rotate-right pr-2", - }, - ), - elem.Text("Reinstall"), + elem.I(attrs.Props{ + "class": "fas fa-link pr-2", + }), + elem.Text(bluemonday.StrictPolicy().Sanitize(text)), ) } -func deleteButton(galleryID, modelName string) elem.Node { - return elem.Button( - attrs.Props{ - "data-twe-ripple-init": "", - "data-twe-ripple-color": "light", - "hx-confirm": "Are you sure you wish to delete the model?", - "class": "float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", - "hx-target": "#action-div-" + dropBadChars(galleryID), - "hx-swap": "outerHTML", - // post the Model ID as param - "hx-post": "/browse/delete/model/" + galleryID, - }, - elem.I( - attrs.Props{ - "class": "fa-solid fa-cancel pr-2", - }, - ), - elem.Text("Delete"), - ) -} - -// Javascript/HTMX doesn't like weird IDs -func dropBadChars(s string) string { - return strings.ReplaceAll(s, "@", "__") -} - type ProcessTracker interface { Exists(string) bool Get(string) string } -func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) string { - modelsElements := []elem.Node{} - descriptionDiv := func(m *gallery.GalleryModel) elem.Node { - return elem.Div( - attrs.Props{ - "class": "p-6 text-surface dark:text-white", - }, - elem.H5( - attrs.Props{ - "class": "mb-2 text-xl font-bold leading-tight", - }, - elem.Text(m.Name), - ), - elem.P( - attrs.Props{ - "class": "mb-4 text-sm [&:not(:hover)]:truncate text-base", - }, - elem.Text(m.Description), - ), +func modalName(m *gallery.GalleryModel) string { + return m.Name + "-modal" +} + +func modelDescription(m *gallery.GalleryModel) elem.Node { + urls := []elem.Node{} + for _, url := range m.URLs { + urls = append(urls, + elem.Li(attrs.Props{}, link(url, url)), ) } - actionDiv := func(m *gallery.GalleryModel) elem.Node { - galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name) - currentlyProcessing := processTracker.Exists(galleryID) - jobID := "" - isDeletionOp := false - if currentlyProcessing { - status := galleryService.GetStatus(galleryID) - if status != nil && status.Deletion { - isDeletionOp = true - } - jobID = processTracker.Get(galleryID) - // TODO: - // case not handled, if status == nil : "Waiting" - } - - nodes := []elem.Node{ - cardSpan("Repository: "+m.Gallery.Name, "fa-brands fa-git-alt"), - } - - if m.License != "" { - nodes = append(nodes, - cardSpan("License: "+m.License, "fas fa-book"), - ) - } - - tagsNodes := []elem.Node{} - for _, tag := range m.Tags { - tagsNodes = append(tagsNodes, - searchableElement(tag, "fas fa-tag"), - ) - } - - nodes = append(nodes, - elem.Div( - attrs.Props{ - "class": "flex flex-row flex-wrap content-center", - }, - tagsNodes..., - ), + tagsNodes := []elem.Node{} + for _, tag := range m.Tags { + tagsNodes = append(tagsNodes, + searchableElement(tag, "fas fa-tag"), ) + } - for i, url := range m.URLs { - nodes = append(nodes, - link("Link #"+fmt.Sprintf("%d", i+1), url), - ) - } - - progressMessage := "Installation" - if isDeletionOp { - progressMessage = "Deletion" - } - - return elem.Div( + return elem.Div( + attrs.Props{ + "class": "p-6 text-surface dark:text-white", + }, + elem.H5( attrs.Props{ - "class": "px-6 pt-4 pb-2", + "class": "mb-2 text-xl font-bold leading-tight", + }, + elem.Text(bluemonday.StrictPolicy().Sanitize(m.Name)), + ), + elem.Div( // small description + attrs.Props{ + "class": "mb-4 text-sm truncate text-base", + }, + elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)), + ), + + elem.Div( + attrs.Props{ + "id": modalName(m), + "tabindex": "-1", + "aria-hidden": "true", + "class": "hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full", }, - elem.P( - attrs.Props{ - "class": "mb-4 text-base", - }, - nodes..., - ), elem.Div( attrs.Props{ - "id": "action-div-" + dropBadChars(galleryID), + "class": "relative p-4 w-full max-w-2xl max-h-full", + }, + elem.Div( + attrs.Props{ + "class": "relative p-4 w-full max-w-2xl max-h-full bg-white rounded-lg shadow dark:bg-gray-700", + }, + // header + elem.Div( + attrs.Props{ + "class": "flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600", + }, + elem.H3( + attrs.Props{ + "class": "text-xl font-semibold text-gray-900 dark:text-white", + }, + elem.Text(bluemonday.StrictPolicy().Sanitize(m.Name)), + ), + elem.Button( // close button + attrs.Props{ + "class": "text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white", + "data-modal-hide": modalName(m), + }, + elem.Raw( + ``, + ), + elem.Span( + attrs.Props{ + "class": "sr-only", + }, + elem.Text("Close modal"), + ), + ), + ), + // body + elem.Div( + attrs.Props{ + "class": "p-4 md:p-5 space-y-4", + }, + elem.Div( + attrs.Props{ + "class": "flex justify-center items-center", + }, + elem.Img(attrs.Props{ + // "class": "rounded-t-lg object-fit object-center h-96", + "class": "lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3 entered loaded", + "src": m.Icon, + "loading": "lazy", + }), + ), + elem.P( + attrs.Props{ + "class": "text-base leading-relaxed text-gray-500 dark:text-gray-400", + }, + elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)), + ), + elem.Hr( + attrs.Props{}, + ), + elem.P( + attrs.Props{ + "class": "text-sm font-semibold text-gray-900 dark:text-white", + }, + elem.Text("Links"), + ), + elem.Ul( + attrs.Props{}, + urls..., + ), + elem.If( + len(m.Tags) > 0, + elem.Div( + attrs.Props{}, + elem.P( + attrs.Props{ + "class": "text-sm mb-5 font-semibold text-gray-900 dark:text-white", + }, + elem.Text("Tags"), + ), + elem.Div( + attrs.Props{ + "class": "flex flex-row flex-wrap content-center", + }, + tagsNodes..., + ), + ), + elem.Div(attrs.Props{}), + ), + ), + // Footer + elem.Div( + attrs.Props{ + "class": "flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600", + }, + elem.Button( + attrs.Props{ + "data-modal-hide": modalName(m), + "class": "py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700", + }, + elem.Text("Close"), + ), + ), + ), + ), + ), + ) +} + +func modelActionItems(m *gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) elem.Node { + galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name) + currentlyProcessing := processTracker.Exists(galleryID) + jobID := "" + isDeletionOp := false + if currentlyProcessing { + status := galleryService.GetStatus(galleryID) + if status != nil && status.Deletion { + isDeletionOp = true + } + jobID = processTracker.Get(galleryID) + // TODO: + // case not handled, if status == nil : "Waiting" + } + + nodes := []elem.Node{ + cardSpan("Repository: "+m.Gallery.Name, "fa-brands fa-git-alt"), + } + + if m.License != "" { + nodes = append(nodes, + cardSpan("License: "+m.License, "fas fa-book"), + ) + } + /* + tagsNodes := []elem.Node{} + + for _, tag := range m.Tags { + tagsNodes = append(tagsNodes, + searchableElement(tag, "fas fa-tag"), + ) + } + + + nodes = append(nodes, + elem.Div( + attrs.Props{ + "class": "flex flex-row flex-wrap content-center", + }, + tagsNodes..., + ), + ) + + for i, url := range m.URLs { + nodes = append(nodes, + buttonLink("Link #"+fmt.Sprintf("%d", i+1), url), + ) + } + */ + + progressMessage := "Installation" + if isDeletionOp { + progressMessage = "Deletion" + } + + return elem.Div( + attrs.Props{ + "class": "px-6 pt-4 pb-2", + }, + elem.P( + attrs.Props{ + "class": "mb-4 text-base", + }, + nodes..., + ), + elem.Div( + attrs.Props{ + "id": "action-div-" + dropBadChars(galleryID), + "class": "flow-root", // To order buttons left and right + }, + infoButton(m), + elem.Div( + attrs.Props{ + "class": "float-right", }, elem.If( currentlyProcessing, @@ -473,14 +335,18 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g elem.Node(elem.Div( attrs.Props{}, reInstallButton(m.ID()), - deleteButton(m.ID(), m.Name), + deleteButton(m.ID()), )), installButton(m.ID()), ), ), ), - ) - } + ), + ) +} + +func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) string { + modelsElements := []elem.Node{} for _, m := range models { elems := []elem.Node{} @@ -524,7 +390,10 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g )) } - elems = append(elems, descriptionDiv(m), actionDiv(m)) + elems = append(elems, + modelDescription(m), + modelActionItems(m, processTracker, galleryService), + ) modelsElements = append(modelsElements, elem.Div( attrs.Props{ diff --git a/core/http/elements/p2p.go b/core/http/elements/p2p.go new file mode 100644 index 00000000..7eb10df5 --- /dev/null +++ b/core/http/elements/p2p.go @@ -0,0 +1,147 @@ +package elements + +import ( + "fmt" + + "github.com/chasefleming/elem-go" + "github.com/chasefleming/elem-go/attrs" + "github.com/microcosm-cc/bluemonday" + "github.com/mudler/LocalAI/core/p2p" +) + +func renderElements(n []elem.Node) string { + render := "" + for _, r := range n { + render += r.Render() + } + return render +} + +func P2PNodeStats(nodes []p2p.NodeData) string { + /* +
+

Total Workers Detected: {{ len .Nodes }}

+ {{ $online := 0 }} + {{ range .Nodes }} + {{ if .IsOnline }} + {{ $online = add $online 1 }} + {{ end }} + {{ end }} +

Total Online Workers: {{$online}}

+
+ */ + + online := 0 + for _, n := range nodes { + if n.IsOnline() { + online++ + } + } + + class := "text-green-500" + if online == 0 { + class = "text-red-500" + } + /* + + */ + circle := elem.I(attrs.Props{ + "class": "fas fa-circle animate-pulse " + class + " ml-2 mr-1", + }) + nodesElements := []elem.Node{ + elem.Span( + attrs.Props{ + "class": class, + }, + circle, + elem.Text(fmt.Sprintf("%d", online)), + ), + elem.Span( + attrs.Props{ + "class": "text-gray-200", + }, + elem.Text(fmt.Sprintf("/%d", len(nodes))), + ), + } + + return renderElements(nodesElements) +} + +func P2PNodeBoxes(nodes []p2p.NodeData) string { + /* +
+
+ + {{.ID}} +
+

+ Status: + + + {{ if .IsOnline }}Online{{ else }}Offline{{ end }} + +

+
+ */ + + nodesElements := []elem.Node{} + + for _, n := range nodes { + + nodesElements = append(nodesElements, + elem.Div( + attrs.Props{ + "class": "bg-gray-700 p-6 rounded-lg shadow-lg text-left", + }, + elem.P( + attrs.Props{ + "class": "text-sm text-gray-400 mt-2 flex", + }, + elem.I( + attrs.Props{ + "class": "fas fa-desktop text-gray-400 mr-2", + }, + ), + elem.Text("Name: "), + elem.Span( + attrs.Props{ + "class": "text-gray-200 font-semibold ml-2 mr-1", + }, + elem.Text(bluemonday.StrictPolicy().Sanitize(n.ID)), + ), + elem.Text("Status: "), + elem.If( + n.IsOnline(), + elem.I( + attrs.Props{ + "class": "fas fa-circle animate-pulse text-green-500 ml-2 mr-1", + }, + ), + elem.I( + attrs.Props{ + "class": "fas fa-circle animate-pulse text-red-500 ml-2 mr-1", + }, + ), + ), + elem.If( + n.IsOnline(), + elem.Span( + attrs.Props{ + "class": "text-green-400", + }, + + elem.Text("Online"), + ), + elem.Span( + attrs.Props{ + "class": "text-red-400", + }, + elem.Text("Offline"), + ), + ), + ), + )) + } + + return renderElements(nodesElements) +} diff --git a/core/http/elements/progressbar.go b/core/http/elements/progressbar.go new file mode 100644 index 00000000..7dc340b2 --- /dev/null +++ b/core/http/elements/progressbar.go @@ -0,0 +1,89 @@ +package elements + +import ( + "github.com/chasefleming/elem-go" + "github.com/chasefleming/elem-go/attrs" + "github.com/microcosm-cc/bluemonday" +) + +func DoneProgress(galleryID, text string, showDelete bool) string { + return elem.Div( + attrs.Props{ + "id": "action-div-" + dropBadChars(galleryID), + }, + elem.H3( + attrs.Props{ + "role": "status", + "id": "pblabel", + "tabindex": "-1", + "autofocus": "", + }, + elem.Text(bluemonday.StrictPolicy().Sanitize(text)), + ), + elem.If(showDelete, deleteButton(galleryID), reInstallButton(galleryID)), + ).Render() +} + +func ErrorProgress(err, galleryName string) string { + return elem.Div( + attrs.Props{}, + elem.H3( + attrs.Props{ + "role": "status", + "id": "pblabel", + "tabindex": "-1", + "autofocus": "", + }, + elem.Text("Error "+bluemonday.StrictPolicy().Sanitize(err)), + ), + installButton(galleryName), + ).Render() +} + +func ProgressBar(progress string) string { + return elem.Div(attrs.Props{ + "class": "progress", + "role": "progressbar", + "aria-valuemin": "0", + "aria-valuemax": "100", + "aria-valuenow": "0", + "aria-labelledby": "pblabel", + }, + elem.Div(attrs.Props{ + "id": "pb", + "class": "progress-bar", + "style": "width:" + progress + "%", + }), + ).Render() +} + +func StartProgressBar(uid, progress, text string) string { + if progress == "" { + progress = "0" + } + return elem.Div( + attrs.Props{ + "hx-trigger": "done", + "hx-get": "browse/job/" + uid, + "hx-swap": "outerHTML", + "hx-target": "this", + }, + elem.H3( + attrs.Props{ + "role": "status", + "id": "pblabel", + "tabindex": "-1", + "autofocus": "", + }, + elem.Text(bluemonday.StrictPolicy().Sanitize(text)), //Perhaps overly defensive + elem.Div(attrs.Props{ + "hx-get": "browse/job/progress/" + uid, + "hx-trigger": "every 600ms", + "hx-target": "this", + "hx-swap": "innerHTML", + }, + elem.Raw(ProgressBar(progress)), + ), + ), + ).Render() +} diff --git a/core/http/endpoints/elevenlabs/soundgeneration.go b/core/http/endpoints/elevenlabs/soundgeneration.go index 619544d8..345df35b 100644 --- a/core/http/endpoints/elevenlabs/soundgeneration.go +++ b/core/http/endpoints/elevenlabs/soundgeneration.go @@ -55,7 +55,7 @@ func SoundGenerationEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad } // TODO: Support uploading files? - filePath, _, err := backend.SoundGeneration(cfg.Backend, modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg) + filePath, _, err := backend.SoundGeneration(modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg) if err != nil { return err } diff --git a/core/http/endpoints/explorer/dashboard.go b/core/http/endpoints/explorer/dashboard.go index 9c731d9a..3c896681 100644 --- a/core/http/endpoints/explorer/dashboard.go +++ b/core/http/endpoints/explorer/dashboard.go @@ -6,6 +6,7 @@ import ( "github.com/gofiber/fiber/v2" "github.com/mudler/LocalAI/core/explorer" + "github.com/mudler/LocalAI/core/http/utils" "github.com/mudler/LocalAI/internal" ) @@ -14,6 +15,7 @@ func Dashboard() func(*fiber.Ctx) error { summary := fiber.Map{ "Title": "LocalAI API - " + internal.PrintableVersion(), "Version": internal.PrintableVersion(), + "BaseURL": utils.BaseURL(c), } if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 { diff --git a/core/http/endpoints/jina/rerank.go b/core/http/endpoints/jina/rerank.go index 04fdf031..58c3972d 100644 --- a/core/http/endpoints/jina/rerank.go +++ b/core/http/endpoints/jina/rerank.go @@ -45,13 +45,13 @@ func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a config.LoadOptionContextSize(appConfig.ContextSize), config.LoadOptionF16(appConfig.F16), ) - if err != nil { modelFile = input.Model log.Warn().Msgf("Model not found in context: %s", input.Model) } else { modelFile = cfg.Model } + log.Debug().Msgf("Request for model: %s", modelFile) if input.Backend != "" { @@ -64,7 +64,7 @@ func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a Documents: req.Documents, } - results, err := backend.Rerank(cfg.Backend, modelFile, request, ml, appConfig, *cfg) + results, err := backend.Rerank(modelFile, request, ml, appConfig, *cfg) if err != nil { return err } diff --git a/core/http/endpoints/localai/backend_monitor.go b/core/http/endpoints/localai/backend_monitor.go index fa11b5c3..a1b93ac3 100644 --- a/core/http/endpoints/localai/backend_monitor.go +++ b/core/http/endpoints/localai/backend_monitor.go @@ -28,7 +28,7 @@ func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ct } } -// BackendMonitorEndpoint shuts down the specified backend +// BackendShutdownEndpoint shuts down the specified backend // @Summary Backend monitor endpoint // @Param request body schema.BackendMonitorRequest true "Backend statistics request" // @Router /backend/shutdown [post] diff --git a/core/http/endpoints/localai/gallery.go b/core/http/endpoints/localai/gallery.go index 23c5d4b8..9dc99f5d 100644 --- a/core/http/endpoints/localai/gallery.go +++ b/core/http/endpoints/localai/gallery.go @@ -9,6 +9,7 @@ import ( "github.com/google/uuid" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/http/utils" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/services" "github.com/rs/zerolog/log" @@ -82,7 +83,8 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe Galleries: mgs.galleries, ConfigURL: input.ConfigURL, } - return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()}) + + return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%smodels/jobs/%s", utils.BaseURL(c), uuid.String())}) } } @@ -105,7 +107,7 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib return err } - return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()}) + return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%smodels/jobs/%s", utils.BaseURL(c), uuid.String())}) } } @@ -115,19 +117,25 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib // @Router /models/available [get] func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries) models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath) if err != nil { return err } - log.Debug().Msgf("Models found from galleries: %+v", models) - for _, m := range models { - log.Debug().Msgf("Model found from galleries: %+v", m) + + log.Debug().Msgf("Available %d models from %d galleries\n", len(models), len(mgs.galleries)) + + m := []gallery.Metadata{} + + for _, mm := range models { + m = append(m, mm.Metadata) } - dat, err := json.Marshal(models) + + log.Debug().Msgf("Models %#v", m) + + dat, err := json.Marshal(m) if err != nil { - return err + return fmt.Errorf("could not marshal models: %w", err) } return c.Send(dat) } diff --git a/core/http/endpoints/localai/get_token_metrics.go b/core/http/endpoints/localai/get_token_metrics.go new file mode 100644 index 00000000..e0e6943f --- /dev/null +++ b/core/http/endpoints/localai/get_token_metrics.go @@ -0,0 +1,60 @@ +package localai + +import ( + "github.com/gofiber/fiber/v2" + "github.com/mudler/LocalAI/core/backend" + "github.com/mudler/LocalAI/core/config" + fiberContext "github.com/mudler/LocalAI/core/http/ctx" + "github.com/mudler/LocalAI/core/schema" + "github.com/rs/zerolog/log" + + "github.com/mudler/LocalAI/pkg/model" +) + +// TokenMetricsEndpoint is an endpoint to get TokensProcessed Per Second for Active SlotID +// +// @Summary Get TokenMetrics for Active Slot. +// @Accept json +// @Produce audio/x-wav +// @Success 200 {string} binary "generated audio/wav file" +// @Router /v1/tokenMetrics [get] +// @Router /tokenMetrics [get] +func TokenMetricsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + + input := new(schema.TokenMetricsRequest) + + // Get input data from the request body + if err := c.BodyParser(input); err != nil { + return err + } + + modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false) + if err != nil { + modelFile = input.Model + log.Warn().Msgf("Model not found in context: %s", input.Model) + } + + cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, + config.LoadOptionDebug(appConfig.Debug), + config.LoadOptionThreads(appConfig.Threads), + config.LoadOptionContextSize(appConfig.ContextSize), + config.LoadOptionF16(appConfig.F16), + ) + + if err != nil { + log.Err(err) + modelFile = input.Model + log.Warn().Msgf("Model not found in context: %s", input.Model) + } else { + modelFile = cfg.Model + } + log.Debug().Msgf("Token Metrics for model: %s", modelFile) + + response, err := backend.TokenMetrics(modelFile, ml, appConfig, *cfg) + if err != nil { + return err + } + return c.JSON(response) + } +} diff --git a/core/http/endpoints/localai/system.go b/core/http/endpoints/localai/system.go index 11704933..92d80a3a 100644 --- a/core/http/endpoints/localai/system.go +++ b/core/http/endpoints/localai/system.go @@ -17,12 +17,19 @@ func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConf if err != nil { return err } + loadedModels := ml.ListModels() for b := range appConfig.ExternalGRPCBackends { availableBackends = append(availableBackends, b) } + + sysmodels := []schema.SysInfoModel{} + for _, m := range loadedModels { + sysmodels = append(sysmodels, schema.SysInfoModel{ID: m.ID}) + } return c.JSON( schema.SystemInformationResponse{ Backends: availableBackends, + Models: sysmodels, }, ) } diff --git a/core/http/endpoints/localai/tokenize.go b/core/http/endpoints/localai/tokenize.go new file mode 100644 index 00000000..faa8a0a4 --- /dev/null +++ b/core/http/endpoints/localai/tokenize.go @@ -0,0 +1,57 @@ +package localai + +import ( + "github.com/gofiber/fiber/v2" + "github.com/mudler/LocalAI/core/backend" + "github.com/mudler/LocalAI/core/config" + fiberContext "github.com/mudler/LocalAI/core/http/ctx" + "github.com/mudler/LocalAI/core/schema" + "github.com/mudler/LocalAI/pkg/model" + "github.com/rs/zerolog/log" +) + +// TokenizeEndpoint exposes a REST API to tokenize the content +// @Summary Tokenize the input. +// @Param request body schema.TokenizeRequest true "Request" +// @Success 200 {object} schema.TokenizeResponse "Response" +// @Router /v1/tokenize [post] +func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + + input := new(schema.TokenizeRequest) + + // Get input data from the request body + if err := c.BodyParser(input); err != nil { + return err + } + + modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false) + if err != nil { + modelFile = input.Model + log.Warn().Msgf("Model not found in context: %s", input.Model) + } + + cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, + config.LoadOptionDebug(appConfig.Debug), + config.LoadOptionThreads(appConfig.Threads), + config.LoadOptionContextSize(appConfig.ContextSize), + config.LoadOptionF16(appConfig.F16), + ) + + if err != nil { + log.Err(err) + modelFile = input.Model + log.Warn().Msgf("Model not found in context: %s", input.Model) + } else { + modelFile = cfg.Model + } + log.Debug().Msgf("Request for model: %s", modelFile) + + tokenResponse, err := backend.ModelTokenize(input.Content, ml, *cfg, appConfig) + if err != nil { + return err + } + + return c.JSON(tokenResponse) + } +} diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go index ca3f58bd..9116f9fa 100644 --- a/core/http/endpoints/localai/tts.go +++ b/core/http/endpoints/localai/tts.go @@ -9,19 +9,21 @@ import ( "github.com/gofiber/fiber/v2" "github.com/mudler/LocalAI/core/schema" "github.com/rs/zerolog/log" + + "github.com/mudler/LocalAI/pkg/utils" ) // TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech -// @Summary Generates audio from the input text. -// @Accept json -// @Produce audio/x-wav -// @Param request body schema.TTSRequest true "query params" -// @Success 200 {string} binary "generated audio/wav file" -// @Router /v1/audio/speech [post] -// @Router /tts [post] +// +// @Summary Generates audio from the input text. +// @Accept json +// @Produce audio/x-wav +// @Param request body schema.TTSRequest true "query params" +// @Success 200 {string} binary "generated audio/wav file" +// @Router /v1/audio/speech [post] +// @Router /tts [post] func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - input := new(schema.TTSRequest) // Get input data from the request body @@ -67,6 +69,13 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi if err != nil { return err } + + // Convert generated file to target format + filePath, err = utils.AudioConvert(filePath, input.Format) + if err != nil { + return err + } + return c.Download(filePath) } } diff --git a/core/http/endpoints/localai/vad.go b/core/http/endpoints/localai/vad.go new file mode 100644 index 00000000..2ed6125c --- /dev/null +++ b/core/http/endpoints/localai/vad.go @@ -0,0 +1,67 @@ +package localai + +import ( + "github.com/gofiber/fiber/v2" + "github.com/mudler/LocalAI/core/backend" + "github.com/mudler/LocalAI/core/config" + fiberContext "github.com/mudler/LocalAI/core/http/ctx" + "github.com/mudler/LocalAI/core/schema" + "github.com/mudler/LocalAI/pkg/grpc/proto" + "github.com/mudler/LocalAI/pkg/model" + "github.com/rs/zerolog/log" +) + +// VADEndpoint is Voice-Activation-Detection endpoint +// @Summary Detect voice fragments in an audio stream +// @Accept json +// @Param request body schema.VADRequest true "query params" +// @Success 200 {object} proto.VADResponse "Response" +// @Router /vad [post] +func VADEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + input := new(schema.VADRequest) + + // Get input data from the request body + if err := c.BodyParser(input); err != nil { + return err + } + + modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false) + if err != nil { + modelFile = input.Model + log.Warn().Msgf("Model not found in context: %s", input.Model) + } + + cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, + config.LoadOptionDebug(appConfig.Debug), + config.LoadOptionThreads(appConfig.Threads), + config.LoadOptionContextSize(appConfig.ContextSize), + config.LoadOptionF16(appConfig.F16), + ) + + if err != nil { + log.Err(err) + modelFile = input.Model + log.Warn().Msgf("Model not found in context: %s", input.Model) + } else { + modelFile = cfg.Model + } + log.Debug().Msgf("Request for model: %s", modelFile) + + opts := backend.ModelOptions(*cfg, appConfig, model.WithBackendString(cfg.Backend), model.WithModel(modelFile)) + + vadModel, err := ml.Load(opts...) + if err != nil { + return err + } + req := proto.VADRequest{ + Audio: input.Audio, + } + resp, err := vadModel.VAD(c.Context(), &req) + if err != nil { + return err + } + + return c.JSON(resp) + } +} diff --git a/core/http/endpoints/localai/welcome.go b/core/http/endpoints/localai/welcome.go index 396c4084..57cf8809 100644 --- a/core/http/endpoints/localai/welcome.go +++ b/core/http/endpoints/localai/welcome.go @@ -4,6 +4,7 @@ import ( "github.com/gofiber/fiber/v2" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/http/utils" "github.com/mudler/LocalAI/core/p2p" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/internal" @@ -13,15 +14,10 @@ import ( func WelcomeEndpoint(appConfig *config.ApplicationConfig, cl *config.BackendConfigLoader, ml *model.ModelLoader, modelStatus func() (map[string]string, map[string]string)) func(*fiber.Ctx) error { return func(c *fiber.Ctx) error { - models, _ := services.ListModels(cl, ml, "", true) backendConfigs := cl.GetAllBackendConfigs() - galleryConfigs := map[string]*gallery.Config{} - modelsWithBackendConfig := map[string]interface{}{} for _, m := range backendConfigs { - modelsWithBackendConfig[m.Name] = nil - cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name) if err != nil { continue @@ -29,20 +25,15 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig, galleryConfigs[m.Name] = cfg } + modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY) + // Get model statuses to display in the UI the operation in progress processingModels, taskTypes := modelStatus() - modelsWithoutConfig := []string{} - - for _, m := range models { - if _, ok := modelsWithBackendConfig[m]; !ok { - modelsWithoutConfig = append(modelsWithoutConfig, m) - } - } - summary := fiber.Map{ "Title": "LocalAI API - " + internal.PrintableVersion(), "Version": internal.PrintableVersion(), + "BaseURL": utils.BaseURL(c), "Models": modelsWithoutConfig, "ModelsConfig": backendConfigs, "GalleryConfig": galleryConfigs, diff --git a/core/http/endpoints/openai/assistant.go b/core/http/endpoints/openai/assistant.go index ff218730..1d83066a 100644 --- a/core/http/endpoints/openai/assistant.go +++ b/core/http/endpoints/openai/assistant.go @@ -10,6 +10,7 @@ import ( "time" "github.com/gofiber/fiber/v2" + "github.com/microcosm-cc/bluemonday" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/services" @@ -83,7 +84,7 @@ func CreateAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad if !modelExists(cl, ml, request.Model) { log.Warn().Msgf("Model: %s was not found in list of models.", request.Model) - return c.Status(fiber.StatusBadRequest).SendString("Model " + request.Model + " not found") + return c.Status(fiber.StatusBadRequest).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Model %q not found", request.Model))) } if request.Tools == nil { @@ -147,7 +148,7 @@ func ListAssistantsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoade // Convert string limit to integer limit, err := strconv.Atoi(limitQuery) if err != nil { - return c.Status(http.StatusBadRequest).SendString(fmt.Sprintf("Invalid limit query value: %s", limitQuery)) + return c.Status(http.StatusBadRequest).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Invalid limit query value: %s", limitQuery))) } // Sort assistants @@ -225,7 +226,7 @@ func filterAssistantsAfterID(assistants []Assistant, id string) []Assistant { func modelExists(cl *config.BackendConfigLoader, ml *model.ModelLoader, modelName string) (found bool) { found = false - models, err := services.ListModels(cl, ml, "", true) + models, err := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED) if err != nil { return } @@ -288,7 +289,7 @@ func GetAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, } } - return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant with id: %s", assistantID)) + return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find assistant with id: %s", assistantID))) } } @@ -337,11 +338,11 @@ func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model } } - return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find file_id: %s", request.FileID)) + return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find file_id: %s", request.FileID))) } } - return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find %q", assistantID)) + return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find %q", assistantID))) } } @@ -442,7 +443,7 @@ func ModifyAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad return c.Status(fiber.StatusOK).JSON(newAssistant) } } - return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant with id: %s", assistantID)) + return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find assistant with id: %s", assistantID))) } } @@ -513,9 +514,9 @@ func GetAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoa if assistantFile.ID == fileId { return c.Status(fiber.StatusOK).JSON(assistantFile) } - return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant file with file_id: %s", fileId)) + return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find assistant file with file_id: %s", fileId))) } } - return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant file with assistant_id: %s", assistantID)) + return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find assistant file with assistant_id: %s", assistantID))) } } diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 8144bdcd..3b8d3056 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -14,6 +14,8 @@ import ( "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/pkg/functions" + "github.com/mudler/LocalAI/pkg/templates" + model "github.com/mudler/LocalAI/pkg/model" "github.com/rs/zerolog/log" "github.com/valyala/fasthttp" @@ -24,11 +26,11 @@ import ( // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/chat/completions [post] -func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error { +func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error { var id, textContentToReturn string var created int - process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { + process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) { initialMessage := schema.OpenAIResponse{ ID: id, Created: created, @@ -38,18 +40,24 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup } responses <- initialMessage - ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { + ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool { + usage := schema.OpenAIUsage{ + PromptTokens: tokenUsage.Prompt, + CompletionTokens: tokenUsage.Completion, + TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, + } + if extraUsage { + usage.TimingTokenGeneration = tokenUsage.TimingTokenGeneration + usage.TimingPromptProcessing = tokenUsage.TimingPromptProcessing + } + resp := schema.OpenAIResponse{ ID: id, Created: created, Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}}, Object: "chat.completion.chunk", - Usage: schema.OpenAIUsage{ - PromptTokens: usage.Prompt, - CompletionTokens: usage.Completion, - TotalTokens: usage.Prompt + usage.Completion, - }, + Usage: usage, } responses <- resp @@ -57,7 +65,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup }) close(responses) } - processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { + processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) { result := "" _, tokenUsage, _ := ComputeChoices(req, prompt, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { result += s @@ -88,6 +96,15 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup log.Error().Err(err).Msg("error handling question") return } + usage := schema.OpenAIUsage{ + PromptTokens: tokenUsage.Prompt, + CompletionTokens: tokenUsage.Completion, + TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, + } + if extraUsage { + usage.TimingTokenGeneration = tokenUsage.TimingTokenGeneration + usage.TimingPromptProcessing = tokenUsage.TimingPromptProcessing + } resp := schema.OpenAIResponse{ ID: id, @@ -95,11 +112,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}}, Object: "chat.completion.chunk", - Usage: schema.OpenAIUsage{ - PromptTokens: tokenUsage.Prompt, - CompletionTokens: tokenUsage.Completion, - TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, - }, + Usage: usage, } responses <- resp @@ -161,6 +174,15 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup textContentToReturn = "" id = uuid.New().String() created = int(time.Now().Unix()) + // Set CorrelationID + correlationID := c.Get("X-Correlation-ID") + if len(strings.TrimSpace(correlationID)) == 0 { + correlationID = id + } + c.Set("X-Correlation-ID", correlationID) + + // Opt-in extra usage flag + extraUsage := c.Get("Extra-Usage", "") != "" modelFile, input, err := readRequest(c, cl, ml, startupOptions, true) if err != nil { @@ -288,148 +310,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup // If we are using the tokenizer template, we don't need to process the messages // unless we are processing functions if !config.TemplateConfig.UseTokenizerTemplate || shouldUseFn { - suppressConfigSystemPrompt := false - mess := []string{} - for messageIndex, i := range input.Messages { - var content string - role := i.Role - - // if function call, we might want to customize the role so we can display better that the "assistant called a json action" - // if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request - if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" { - roleFn := "assistant_function_call" - r := config.Roles[roleFn] - if r != "" { - role = roleFn - } - } - r := config.Roles[role] - contentExists := i.Content != nil && i.StringContent != "" - - fcall := i.FunctionCall - if len(i.ToolCalls) > 0 { - fcall = i.ToolCalls - } - - // First attempt to populate content via a chat message specific template - if config.TemplateConfig.ChatMessage != "" { - chatMessageData := model.ChatMessageTemplateData{ - SystemPrompt: config.SystemPrompt, - Role: r, - RoleName: role, - Content: i.StringContent, - FunctionCall: fcall, - FunctionName: i.Name, - LastMessage: messageIndex == (len(input.Messages) - 1), - Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)), - MessageIndex: messageIndex, - } - templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData) - if err != nil { - log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping") - } else { - if templatedChatMessage == "" { - log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData) - continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf - } - log.Debug().Msgf("templated message for chat: %s", templatedChatMessage) - content = templatedChatMessage - } - } - - marshalAnyRole := func(f any) { - j, err := json.Marshal(f) - if err == nil { - if contentExists { - content += "\n" + fmt.Sprint(r, " ", string(j)) - } else { - content = fmt.Sprint(r, " ", string(j)) - } - } - } - marshalAny := func(f any) { - j, err := json.Marshal(f) - if err == nil { - if contentExists { - content += "\n" + string(j) - } else { - content = string(j) - } - } - } - // If this model doesn't have such a template, or if that template fails to return a value, template at the message level. - if content == "" { - if r != "" { - if contentExists { - content = fmt.Sprint(r, i.StringContent) - } - - if i.FunctionCall != nil { - marshalAnyRole(i.FunctionCall) - } - if i.ToolCalls != nil { - marshalAnyRole(i.ToolCalls) - } - } else { - if contentExists { - content = fmt.Sprint(i.StringContent) - } - if i.FunctionCall != nil { - marshalAny(i.FunctionCall) - } - if i.ToolCalls != nil { - marshalAny(i.ToolCalls) - } - } - // Special Handling: System. We care if it was printed at all, not the r branch, so check seperately - if contentExists && role == "system" { - suppressConfigSystemPrompt = true - } - } - - mess = append(mess, content) - } - - joinCharacter := "\n" - if config.TemplateConfig.JoinChatMessagesByCharacter != nil { - joinCharacter = *config.TemplateConfig.JoinChatMessagesByCharacter - } - - predInput = strings.Join(mess, joinCharacter) - log.Debug().Msgf("Prompt (before templating): %s", predInput) - - templateFile := "" - - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { - templateFile = config.Model - } - - if config.TemplateConfig.Chat != "" && !shouldUseFn { - templateFile = config.TemplateConfig.Chat - } - - if config.TemplateConfig.Functions != "" && shouldUseFn { - templateFile = config.TemplateConfig.Functions - } - - if templateFile != "" { - templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{ - SystemPrompt: config.SystemPrompt, - SuppressSystemPrompt: suppressConfigSystemPrompt, - Input: predInput, - Functions: funcs, - }) - if err == nil { - predInput = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", predInput) - } else { - log.Debug().Msgf("Template failed loading: %s", err.Error()) - } - } + predInput = evaluator.TemplateMessages(input.Messages, config, funcs, shouldUseFn) log.Debug().Msgf("Prompt (after templating): %s", predInput) - if shouldUseFn && config.Grammar != "" { + if config.Grammar != "" { log.Debug().Msgf("Grammar: %+v", config.Grammar) } } @@ -444,13 +328,14 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup c.Set("Cache-Control", "no-cache") c.Set("Connection", "keep-alive") c.Set("Transfer-Encoding", "chunked") + c.Set("X-Correlation-ID", id) responses := make(chan schema.OpenAIResponse) if !shouldUseFn { - go process(predInput, input, config, ml, responses) + go process(predInput, input, config, ml, responses, extraUsage) } else { - go processTools(noActionName, predInput, input, config, ml, responses) + go processTools(noActionName, predInput, input, config, ml, responses, extraUsage) } c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { @@ -578,6 +463,15 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup if err != nil { return err } + usage := schema.OpenAIUsage{ + PromptTokens: tokenUsage.Prompt, + CompletionTokens: tokenUsage.Completion, + TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, + } + if extraUsage { + usage.TimingTokenGeneration = tokenUsage.TimingTokenGeneration + usage.TimingPromptProcessing = tokenUsage.TimingPromptProcessing + } resp := &schema.OpenAIResponse{ ID: id, @@ -585,11 +479,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: result, Object: "chat.completion", - Usage: schema.OpenAIUsage{ - PromptTokens: tokenUsage.Prompt, - CompletionTokens: tokenUsage.Completion, - TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, - }, + Usage: usage, } respData, _ := json.Marshal(resp) log.Debug().Msgf("Response: %s", respData) @@ -640,8 +530,16 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m for _, m := range input.Messages { images = append(images, m.StringImages...) } + videos := []string{} + for _, m := range input.Messages { + videos = append(videos, m.StringVideos...) + } + audios := []string{} + for _, m := range input.Messages { + audios = append(audios, m.StringAudios...) + } - predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, ml, *config, o, nil) + predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, *config, o, nil) if err != nil { log.Error().Err(err).Msg("model inference failed") return "", err diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index b087cc5f..a353a0a1 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -16,6 +16,7 @@ import ( "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/pkg/functions" model "github.com/mudler/LocalAI/pkg/model" + "github.com/mudler/LocalAI/pkg/templates" "github.com/rs/zerolog/log" "github.com/valyala/fasthttp" ) @@ -25,12 +26,21 @@ import ( // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/completions [post] -func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { +func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { id := uuid.New().String() created := int(time.Now().Unix()) - process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) { - ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { + process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) { + ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool { + usage := schema.OpenAIUsage{ + PromptTokens: tokenUsage.Prompt, + CompletionTokens: tokenUsage.Completion, + TotalTokens: tokenUsage.Prompt + tokenUsage.Completion, + } + if extraUsage { + usage.TimingTokenGeneration = tokenUsage.TimingTokenGeneration + usage.TimingPromptProcessing = tokenUsage.TimingPromptProcessing + } resp := schema.OpenAIResponse{ ID: id, Created: created, @@ -42,11 +52,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a }, }, Object: "text_completion", - Usage: schema.OpenAIUsage{ - PromptTokens: usage.Prompt, - CompletionTokens: usage.Completion, - TotalTokens: usage.Prompt + usage.Completion, - }, + Usage: usage, } log.Debug().Msgf("Sending goroutine: %s", s) @@ -57,6 +63,12 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a } return func(c *fiber.Ctx) error { + // Add Correlation + c.Set("X-Correlation-ID", id) + + // Opt-in extra usage flag + extraUsage := c.Get("Extra-Usage", "") != "" + modelFile, input, err := readRequest(c, cl, ml, appConfig, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) @@ -92,17 +104,6 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a c.Set("Transfer-Encoding", "chunked") } - templateFile := "" - - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { - templateFile = config.Model - } - - if config.TemplateConfig.Completion != "" { - templateFile = config.TemplateConfig.Completion - } - if input.Stream { if len(config.PromptStrings) > 1 { return errors.New("cannot handle more than 1 `PromptStrings` when Streaming") @@ -110,20 +111,18 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a predInput := config.PromptStrings[0] - if templateFile != "" { - templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{ - Input: predInput, - SystemPrompt: config.SystemPrompt, - }) - if err == nil { - predInput = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", predInput) - } + templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.CompletionPromptTemplate, *config, templates.PromptTemplateData{ + Input: predInput, + SystemPrompt: config.SystemPrompt, + }) + if err == nil { + predInput = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", predInput) } responses := make(chan schema.OpenAIResponse) - go process(predInput, input, config, ml, responses) + go process(predInput, input, config, ml, responses, extraUsage) c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { @@ -163,16 +162,13 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a totalTokenUsage := backend.TokenUsage{} for k, i := range config.PromptStrings { - if templateFile != "" { - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{ - SystemPrompt: config.SystemPrompt, - Input: i, - }) - if err == nil { - i = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", i) - } + templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.CompletionPromptTemplate, *config, templates.PromptTemplateData{ + SystemPrompt: config.SystemPrompt, + Input: i, + }) + if err == nil { + i = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", i) } r, tokenUsage, err := ComputeChoices( @@ -183,11 +179,20 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a return err } - totalTokenUsage.Prompt += tokenUsage.Prompt - totalTokenUsage.Completion += tokenUsage.Completion + totalTokenUsage.TimingTokenGeneration += tokenUsage.TimingTokenGeneration + totalTokenUsage.TimingPromptProcessing += tokenUsage.TimingPromptProcessing result = append(result, r...) } + usage := schema.OpenAIUsage{ + PromptTokens: totalTokenUsage.Prompt, + CompletionTokens: totalTokenUsage.Completion, + TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion, + } + if extraUsage { + usage.TimingTokenGeneration = totalTokenUsage.TimingTokenGeneration + usage.TimingPromptProcessing = totalTokenUsage.TimingPromptProcessing + } resp := &schema.OpenAIResponse{ ID: id, @@ -195,11 +200,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: result, Object: "text_completion", - Usage: schema.OpenAIUsage{ - PromptTokens: totalTokenUsage.Prompt, - CompletionTokens: totalTokenUsage.Completion, - TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion, - }, + Usage: usage, } jsonResult, _ := json.Marshal(resp) diff --git a/core/http/endpoints/openai/edit.go b/core/http/endpoints/openai/edit.go index 12fb4035..28a3597c 100644 --- a/core/http/endpoints/openai/edit.go +++ b/core/http/endpoints/openai/edit.go @@ -12,6 +12,7 @@ import ( "github.com/google/uuid" "github.com/mudler/LocalAI/core/schema" model "github.com/mudler/LocalAI/pkg/model" + "github.com/mudler/LocalAI/pkg/templates" "github.com/rs/zerolog/log" ) @@ -21,8 +22,12 @@ import ( // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/edits [post] -func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { +func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + // Opt-in extra usage flag + extraUsage := c.Get("Extra-Usage", "") != "" + modelFile, input, err := readRequest(c, cl, ml, appConfig, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) @@ -35,31 +40,18 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConf log.Debug().Msgf("Parameter Config: %+v", config) - templateFile := "" - - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) { - templateFile = config.Model - } - - if config.TemplateConfig.Edit != "" { - templateFile = config.TemplateConfig.Edit - } - var result []schema.Choice totalTokenUsage := backend.TokenUsage{} for _, i := range config.InputStrings { - if templateFile != "" { - templatedInput, err := ml.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{ - Input: i, - Instruction: input.Instruction, - SystemPrompt: config.SystemPrompt, - }) - if err == nil { - i = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", i) - } + templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.EditPromptTemplate, *config, templates.PromptTemplateData{ + Input: i, + Instruction: input.Instruction, + SystemPrompt: config.SystemPrompt, + }) + if err == nil { + i = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", i) } r, tokenUsage, err := ComputeChoices(input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) { @@ -72,8 +64,20 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConf totalTokenUsage.Prompt += tokenUsage.Prompt totalTokenUsage.Completion += tokenUsage.Completion + totalTokenUsage.TimingTokenGeneration += tokenUsage.TimingTokenGeneration + totalTokenUsage.TimingPromptProcessing += tokenUsage.TimingPromptProcessing + result = append(result, r...) } + usage := schema.OpenAIUsage{ + PromptTokens: totalTokenUsage.Prompt, + CompletionTokens: totalTokenUsage.Completion, + TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion, + } + if extraUsage { + usage.TimingTokenGeneration = totalTokenUsage.TimingTokenGeneration + usage.TimingPromptProcessing = totalTokenUsage.TimingPromptProcessing + } id := uuid.New().String() created := int(time.Now().Unix()) @@ -83,11 +87,7 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConf Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: result, Object: "edit", - Usage: schema.OpenAIUsage{ - PromptTokens: totalTokenUsage.Prompt, - CompletionTokens: totalTokenUsage.Completion, - TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion, - }, + Usage: usage, } jsonResult, _ := json.Marshal(resp) diff --git a/core/http/endpoints/openai/files.go b/core/http/endpoints/openai/files.go index 903484b4..bc392e73 100644 --- a/core/http/endpoints/openai/files.go +++ b/core/http/endpoints/openai/files.go @@ -8,6 +8,7 @@ import ( "sync/atomic" "time" + "github.com/microcosm-cc/bluemonday" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/schema" @@ -49,7 +50,7 @@ func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli err = c.SaveFile(file, savePath) if err != nil { - return c.Status(fiber.StatusInternalServerError).SendString("Failed to save file: " + err.Error()) + return c.Status(fiber.StatusInternalServerError).SendString("Failed to save file: " + bluemonday.StrictPolicy().Sanitize(err.Error())) } f := schema.File{ @@ -121,7 +122,7 @@ func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Applicat return func(c *fiber.Ctx) error { file, err := getFileFromRequest(c) if err != nil { - return c.Status(fiber.StatusInternalServerError).SendString(err.Error()) + return c.Status(fiber.StatusInternalServerError).SendString(bluemonday.StrictPolicy().Sanitize(err.Error())) } return c.JSON(file) @@ -143,14 +144,14 @@ func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli return func(c *fiber.Ctx) error { file, err := getFileFromRequest(c) if err != nil { - return c.Status(fiber.StatusInternalServerError).SendString(err.Error()) + return c.Status(fiber.StatusInternalServerError).SendString(bluemonday.StrictPolicy().Sanitize(err.Error())) } err = os.Remove(filepath.Join(appConfig.UploadDir, file.Filename)) if err != nil { // If the file doesn't exist then we should just continue to remove it if !errors.Is(err, os.ErrNotExist) { - return c.Status(fiber.StatusInternalServerError).SendString(fmt.Sprintf("Unable to delete file: %s, %v", file.Filename, err)) + return c.Status(fiber.StatusInternalServerError).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to delete file: %s, %v", file.Filename, err))) } } @@ -180,12 +181,12 @@ func GetFilesContentsEndpoint(cm *config.BackendConfigLoader, appConfig *config. return func(c *fiber.Ctx) error { file, err := getFileFromRequest(c) if err != nil { - return c.Status(fiber.StatusInternalServerError).SendString(err.Error()) + return c.Status(fiber.StatusInternalServerError).SendString(bluemonday.StrictPolicy().Sanitize(err.Error())) } fileContents, err := os.ReadFile(filepath.Join(appConfig.UploadDir, file.Filename)) if err != nil { - return c.Status(fiber.StatusInternalServerError).SendString(err.Error()) + return c.Status(fiber.StatusInternalServerError).SendString(bluemonday.StrictPolicy().Sanitize(err.Error())) } return c.Send(fileContents) diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go index 6c76ba84..bd3f0987 100644 --- a/core/http/endpoints/openai/image.go +++ b/core/http/endpoints/openai/image.go @@ -72,7 +72,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon } if m == "" { - m = model.StableDiffusionBackend + m = "stablediffusion" } log.Debug().Msgf("Loading model: %+v", m) @@ -129,11 +129,14 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon switch config.Backend { case "stablediffusion": - config.Backend = model.StableDiffusionBackend - case "tinydream": - config.Backend = model.TinyDreamBackend + config.Backend = model.StableDiffusionGGMLBackend case "": - config.Backend = model.StableDiffusionBackend + config.Backend = model.StableDiffusionGGMLBackend + } + + if !strings.Contains(input.Size, "x") { + input.Size = "512x512" + log.Warn().Msgf("Invalid size, using default 512x512") } sizeParts := strings.Split(input.Size, "x") diff --git a/core/http/endpoints/openai/inference.go b/core/http/endpoints/openai/inference.go index 4950ce20..f59e3b60 100644 --- a/core/http/endpoints/openai/inference.go +++ b/core/http/endpoints/openai/inference.go @@ -27,9 +27,17 @@ func ComputeChoices( for _, m := range req.Messages { images = append(images, m.StringImages...) } + videos := []string{} + for _, m := range req.Messages { + videos = append(videos, m.StringVideos...) + } + audios := []string{} + for _, m := range req.Messages { + audios = append(audios, m.StringAudios...) + } // get the model function to call for the result - predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, loader, *config, o, tokenCallback) + predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, *config, o, tokenCallback) if err != nil { return result, backend.TokenUsage{}, err } @@ -44,6 +52,8 @@ func ComputeChoices( tokenUsage.Prompt += prediction.Usage.Prompt tokenUsage.Completion += prediction.Usage.Completion + tokenUsage.TimingPromptProcessing += prediction.Usage.TimingPromptProcessing + tokenUsage.TimingTokenGeneration += prediction.Usage.TimingTokenGeneration finetunedResponse := backend.Finetune(*config, predInput, prediction.Response) cb(finetunedResponse, &result) diff --git a/core/http/endpoints/openai/list.go b/core/http/endpoints/openai/list.go index d446b100..9d21f8fe 100644 --- a/core/http/endpoints/openai/list.go +++ b/core/http/endpoints/openai/list.go @@ -12,38 +12,38 @@ import ( // @Summary List and describe the various models available in the API. // @Success 200 {object} schema.ModelsDataResponse "Response" // @Router /v1/models [get] -func ListModelsEndpoint(bcl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error { +func ListModelsEndpoint(bcl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(ctx *fiber.Ctx) error { return func(c *fiber.Ctx) error { // If blank, no filter is applied. filter := c.Query("filter") // By default, exclude any loose files that are already referenced by a configuration file. - excludeConfigured := c.QueryBool("excludeConfigured", true) + var policy services.LooseFilePolicy + if c.QueryBool("excludeConfigured", true) { + policy = services.SKIP_IF_CONFIGURED + } else { + policy = services.ALWAYS_INCLUDE // This replicates current behavior. TODO: give more options to the user? + } - dataModels, err := modelList(bcl, ml, filter, excludeConfigured) + filterFn, err := config.BuildNameFilterFn(filter) if err != nil { return err } + + modelNames, err := services.ListModels(bcl, ml, filterFn, policy) + if err != nil { + return err + } + + // Map from a slice of names to a slice of OpenAIModel response objects + dataModels := []schema.OpenAIModel{} + for _, m := range modelNames { + dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"}) + } + return c.JSON(schema.ModelsDataResponse{ Object: "list", Data: dataModels, }) } } - -func modelList(bcl *config.BackendConfigLoader, ml *model.ModelLoader, filter string, excludeConfigured bool) ([]schema.OpenAIModel, error) { - - models, err := services.ListModels(bcl, ml, filter, excludeConfigured) - if err != nil { - return nil, err - } - - dataModels := []schema.OpenAIModel{} - - // Then iterate through the loose files: - for _, m := range models { - dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"}) - } - - return dataModels, nil -} diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go index a99ebea2..4eaeec24 100644 --- a/core/http/endpoints/openai/request.go +++ b/core/http/endpoints/openai/request.go @@ -4,17 +4,25 @@ import ( "context" "encoding/json" "fmt" + "strconv" "github.com/gofiber/fiber/v2" + "github.com/google/uuid" "github.com/mudler/LocalAI/core/config" fiberContext "github.com/mudler/LocalAI/core/http/ctx" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/pkg/functions" "github.com/mudler/LocalAI/pkg/model" + "github.com/mudler/LocalAI/pkg/templates" "github.com/mudler/LocalAI/pkg/utils" "github.com/rs/zerolog/log" ) +type correlationIDKeyType string + +// CorrelationIDKey to track request across process boundary +const CorrelationIDKey correlationIDKeyType = "correlationID" + func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) { input := new(schema.OpenAIRequest) @@ -24,9 +32,14 @@ func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLo } received, _ := json.Marshal(input) + // Extract or generate the correlation ID + correlationID := c.Get("X-Correlation-ID", uuid.New().String()) ctx, cancel := context.WithCancel(o.Context) - input.Context = ctx + // Add the correlation ID to the new context + ctxWithCorrelationID := context.WithValue(ctx, CorrelationIDKey, correlationID) + + input.Context = ctxWithCorrelationID input.Cancel = cancel log.Debug().Msgf("Request received: %s", string(received)) @@ -135,8 +148,12 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque } // Decode each request's message content - index := 0 + imgIndex, vidIndex, audioIndex := 0, 0, 0 for i, m := range input.Messages { + nrOfImgsInMessage := 0 + nrOfVideosInMessage := 0 + nrOfAudiosInMessage := 0 + switch content := m.Content.(type) { case string: input.Messages[i].StringContent = content @@ -144,22 +161,59 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque dat, _ := json.Marshal(content) c := []schema.Content{} json.Unmarshal(dat, &c) + + textContent := "" + // we will template this at the end + + CONTENT: for _, pp := range c { - if pp.Type == "text" { - input.Messages[i].StringContent = pp.Text - } else if pp.Type == "image_url" { - // Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64: - base64, err := utils.GetImageURLAsBase64(pp.ImageURL.URL) - if err == nil { - input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff - // set a placeholder for each image - input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + input.Messages[i].StringContent - index++ - } else { - log.Error().Msgf("Failed encoding image: %s", err) + switch pp.Type { + case "text": + textContent += pp.Text + //input.Messages[i].StringContent = pp.Text + case "video", "video_url": + // Decode content as base64 either if it's an URL or base64 text + base64, err := utils.GetContentURIAsBase64(pp.VideoURL.URL) + if err != nil { + log.Error().Msgf("Failed encoding video: %s", err) + continue CONTENT } + input.Messages[i].StringVideos = append(input.Messages[i].StringVideos, base64) // TODO: make sure that we only return base64 stuff + vidIndex++ + nrOfVideosInMessage++ + case "audio_url", "audio": + // Decode content as base64 either if it's an URL or base64 text + base64, err := utils.GetContentURIAsBase64(pp.AudioURL.URL) + if err != nil { + log.Error().Msgf("Failed encoding image: %s", err) + continue CONTENT + } + input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff + audioIndex++ + nrOfAudiosInMessage++ + case "image_url", "image": + // Decode content as base64 either if it's an URL or base64 text + base64, err := utils.GetContentURIAsBase64(pp.ImageURL.URL) + if err != nil { + log.Error().Msgf("Failed encoding image: %s", err) + continue CONTENT + } + + input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff + + imgIndex++ + nrOfImgsInMessage++ } } + + input.Messages[i].StringContent, _ = templates.TemplateMultiModal(config.TemplateConfig.Multimodal, templates.MultiModalOptions{ + TotalImages: imgIndex, + TotalVideos: vidIndex, + TotalAudios: audioIndex, + ImagesInMessage: nrOfImgsInMessage, + VideosInMessage: nrOfVideosInMessage, + AudiosInMessage: nrOfAudiosInMessage, + }, textContent) } } @@ -243,6 +297,14 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque } } } + + // If a quality was defined as number, convert it to step + if input.Quality != "" { + q, err := strconv.Atoi(input.Quality) + if err == nil { + config.Step = q + } + } } func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) { @@ -251,7 +313,6 @@ func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *c config.LoadOptionThreads(threads), config.LoadOptionContextSize(ctx), config.LoadOptionF16(f16), - config.ModelPath(loader.ModelPath), ) // Set the parameters for the language model prediction diff --git a/core/http/explorer.go b/core/http/explorer.go index bdcb93b1..36609add 100644 --- a/core/http/explorer.go +++ b/core/http/explorer.go @@ -7,6 +7,7 @@ import ( "github.com/gofiber/fiber/v2/middleware/favicon" "github.com/gofiber/fiber/v2/middleware/filesystem" "github.com/mudler/LocalAI/core/explorer" + "github.com/mudler/LocalAI/core/http/middleware" "github.com/mudler/LocalAI/core/http/routes" ) @@ -22,6 +23,7 @@ func Explorer(db *explorer.Database) *fiber.App { app := fiber.New(fiberCfg) + app.Use(middleware.StripPathPrefix()) routes.RegisterExplorerRoutes(app, db) httpFS := http.FS(embedDirStatic) diff --git a/core/http/middleware/auth.go b/core/http/middleware/auth.go index bc8bcf80..23141d4c 100644 --- a/core/http/middleware/auth.go +++ b/core/http/middleware/auth.go @@ -1,93 +1,98 @@ -package middleware - -import ( - "crypto/subtle" - "errors" - - "github.com/dave-gray101/v2keyauth" - "github.com/gofiber/fiber/v2" - "github.com/gofiber/fiber/v2/middleware/keyauth" - "github.com/mudler/LocalAI/core/config" -) - -// This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware -// Currently this requires an upstream patch - and feature patches are no longer accepted to v2 -// Therefore `dave-gray101/v2keyauth` contains the v2 backport of the middleware until v3 stabilizes and we migrate. - -func GetKeyAuthConfig(applicationConfig *config.ApplicationConfig) (*v2keyauth.Config, error) { - customLookup, err := v2keyauth.MultipleKeySourceLookup([]string{"header:Authorization", "header:x-api-key", "header:xi-api-key"}, keyauth.ConfigDefault.AuthScheme) - if err != nil { - return nil, err - } - - return &v2keyauth.Config{ - CustomKeyLookup: customLookup, - Next: getApiKeyRequiredFilterFunction(applicationConfig), - Validator: getApiKeyValidationFunction(applicationConfig), - ErrorHandler: getApiKeyErrorHandler(applicationConfig), - AuthScheme: "Bearer", - }, nil -} - -func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) fiber.ErrorHandler { - return func(ctx *fiber.Ctx, err error) error { - if errors.Is(err, v2keyauth.ErrMissingOrMalformedAPIKey) { - if len(applicationConfig.ApiKeys) == 0 { - return ctx.Next() // if no keys are set up, any error we get here is not an error. - } - if applicationConfig.OpaqueErrors { - return ctx.SendStatus(403) - } - } - if applicationConfig.OpaqueErrors { - return ctx.SendStatus(500) - } - return err - } -} - -func getApiKeyValidationFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx, string) (bool, error) { - - if applicationConfig.UseSubtleKeyComparison { - return func(ctx *fiber.Ctx, apiKey string) (bool, error) { - if len(applicationConfig.ApiKeys) == 0 { - return true, nil // If no keys are setup, accept everything - } - for _, validKey := range applicationConfig.ApiKeys { - if subtle.ConstantTimeCompare([]byte(apiKey), []byte(validKey)) == 1 { - return true, nil - } - } - return false, v2keyauth.ErrMissingOrMalformedAPIKey - } - } - - return func(ctx *fiber.Ctx, apiKey string) (bool, error) { - if len(applicationConfig.ApiKeys) == 0 { - return true, nil // If no keys are setup, accept everything - } - for _, validKey := range applicationConfig.ApiKeys { - if apiKey == validKey { - return true, nil - } - } - return false, v2keyauth.ErrMissingOrMalformedAPIKey - } -} - -func getApiKeyRequiredFilterFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx) bool { - if applicationConfig.DisableApiKeyRequirementForHttpGet { - return func(c *fiber.Ctx) bool { - if c.Method() != "GET" { - return false - } - for _, rx := range applicationConfig.HttpGetExemptedEndpoints { - if rx.MatchString(c.Path()) { - return true - } - } - return false - } - } - return func(c *fiber.Ctx) bool { return false } -} \ No newline at end of file +package middleware + +import ( + "crypto/subtle" + "errors" + + "github.com/dave-gray101/v2keyauth" + "github.com/gofiber/fiber/v2" + "github.com/gofiber/fiber/v2/middleware/keyauth" + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/http/utils" +) + +// This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware +// Currently this requires an upstream patch - and feature patches are no longer accepted to v2 +// Therefore `dave-gray101/v2keyauth` contains the v2 backport of the middleware until v3 stabilizes and we migrate. + +func GetKeyAuthConfig(applicationConfig *config.ApplicationConfig) (*v2keyauth.Config, error) { + customLookup, err := v2keyauth.MultipleKeySourceLookup([]string{"header:Authorization", "header:x-api-key", "header:xi-api-key", "cookie:token"}, keyauth.ConfigDefault.AuthScheme) + if err != nil { + return nil, err + } + + return &v2keyauth.Config{ + CustomKeyLookup: customLookup, + Next: getApiKeyRequiredFilterFunction(applicationConfig), + Validator: getApiKeyValidationFunction(applicationConfig), + ErrorHandler: getApiKeyErrorHandler(applicationConfig), + AuthScheme: "Bearer", + }, nil +} + +func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) fiber.ErrorHandler { + return func(ctx *fiber.Ctx, err error) error { + if errors.Is(err, v2keyauth.ErrMissingOrMalformedAPIKey) { + if len(applicationConfig.ApiKeys) == 0 { + return ctx.Next() // if no keys are set up, any error we get here is not an error. + } + ctx.Set("WWW-Authenticate", "Bearer") + if applicationConfig.OpaqueErrors { + return ctx.SendStatus(401) + } + return ctx.Status(401).Render("views/login", fiber.Map{ + "BaseURL": utils.BaseURL(ctx), + }) + } + if applicationConfig.OpaqueErrors { + return ctx.SendStatus(500) + } + return err + } +} + +func getApiKeyValidationFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx, string) (bool, error) { + + if applicationConfig.UseSubtleKeyComparison { + return func(ctx *fiber.Ctx, apiKey string) (bool, error) { + if len(applicationConfig.ApiKeys) == 0 { + return true, nil // If no keys are setup, accept everything + } + for _, validKey := range applicationConfig.ApiKeys { + if subtle.ConstantTimeCompare([]byte(apiKey), []byte(validKey)) == 1 { + return true, nil + } + } + return false, v2keyauth.ErrMissingOrMalformedAPIKey + } + } + + return func(ctx *fiber.Ctx, apiKey string) (bool, error) { + if len(applicationConfig.ApiKeys) == 0 { + return true, nil // If no keys are setup, accept everything + } + for _, validKey := range applicationConfig.ApiKeys { + if apiKey == validKey { + return true, nil + } + } + return false, v2keyauth.ErrMissingOrMalformedAPIKey + } +} + +func getApiKeyRequiredFilterFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx) bool { + if applicationConfig.DisableApiKeyRequirementForHttpGet { + return func(c *fiber.Ctx) bool { + if c.Method() != "GET" { + return false + } + for _, rx := range applicationConfig.HttpGetExemptedEndpoints { + if rx.MatchString(c.Path()) { + return true + } + } + return false + } + } + return func(c *fiber.Ctx) bool { return false } +} diff --git a/core/http/middleware/strippathprefix.go b/core/http/middleware/strippathprefix.go new file mode 100644 index 00000000..5c45d55d --- /dev/null +++ b/core/http/middleware/strippathprefix.go @@ -0,0 +1,36 @@ +package middleware + +import ( + "strings" + + "github.com/gofiber/fiber/v2" +) + +// StripPathPrefix returns a middleware that strips a path prefix from the request path. +// The path prefix is obtained from the X-Forwarded-Prefix HTTP request header. +func StripPathPrefix() fiber.Handler { + return func(c *fiber.Ctx) error { + for _, prefix := range c.GetReqHeaders()["X-Forwarded-Prefix"] { + if prefix != "" { + path := c.Path() + pos := len(prefix) + + if prefix[pos-1] == '/' { + pos-- + } else { + prefix += "/" + } + + if strings.HasPrefix(path, prefix) { + c.Path(path[pos:]) + break + } else if prefix[:pos] == path { + c.Redirect(prefix) + return nil + } + } + } + + return c.Next() + } +} diff --git a/core/http/middleware/strippathprefix_test.go b/core/http/middleware/strippathprefix_test.go new file mode 100644 index 00000000..529f815f --- /dev/null +++ b/core/http/middleware/strippathprefix_test.go @@ -0,0 +1,121 @@ +package middleware + +import ( + "net/http/httptest" + "testing" + + "github.com/gofiber/fiber/v2" + "github.com/stretchr/testify/require" +) + +func TestStripPathPrefix(t *testing.T) { + var actualPath string + + app := fiber.New() + + app.Use(StripPathPrefix()) + + app.Get("/hello/world", func(c *fiber.Ctx) error { + actualPath = c.Path() + return nil + }) + + app.Get("/", func(c *fiber.Ctx) error { + actualPath = c.Path() + return nil + }) + + for _, tc := range []struct { + name string + path string + prefixHeader []string + expectStatus int + expectPath string + }{ + { + name: "without prefix and header", + path: "/hello/world", + expectStatus: 200, + expectPath: "/hello/world", + }, + { + name: "without prefix and headers on root path", + path: "/", + expectStatus: 200, + expectPath: "/", + }, + { + name: "without prefix but header", + path: "/hello/world", + prefixHeader: []string{"/otherprefix/"}, + expectStatus: 200, + expectPath: "/hello/world", + }, + { + name: "with prefix but non-matching header", + path: "/prefix/hello/world", + prefixHeader: []string{"/otherprefix/"}, + expectStatus: 404, + }, + { + name: "with prefix and matching header", + path: "/myprefix/hello/world", + prefixHeader: []string{"/myprefix/"}, + expectStatus: 200, + expectPath: "/hello/world", + }, + { + name: "with prefix and 1st header matching", + path: "/myprefix/hello/world", + prefixHeader: []string{"/myprefix/", "/otherprefix/"}, + expectStatus: 200, + expectPath: "/hello/world", + }, + { + name: "with prefix and 2nd header matching", + path: "/myprefix/hello/world", + prefixHeader: []string{"/otherprefix/", "/myprefix/"}, + expectStatus: 200, + expectPath: "/hello/world", + }, + { + name: "with prefix and header not ending with slash", + path: "/myprefix/hello/world", + prefixHeader: []string{"/myprefix"}, + expectStatus: 200, + expectPath: "/hello/world", + }, + { + name: "with prefix and non-matching header not ending with slash", + path: "/myprefix-suffix/hello/world", + prefixHeader: []string{"/myprefix"}, + expectStatus: 404, + }, + { + name: "redirect when prefix does not end with a slash", + path: "/myprefix", + prefixHeader: []string{"/myprefix"}, + expectStatus: 302, + expectPath: "/myprefix/", + }, + } { + t.Run(tc.name, func(t *testing.T) { + actualPath = "" + req := httptest.NewRequest("GET", tc.path, nil) + if tc.prefixHeader != nil { + req.Header["X-Forwarded-Prefix"] = tc.prefixHeader + } + + resp, err := app.Test(req, -1) + + require.NoError(t, err) + require.Equal(t, tc.expectStatus, resp.StatusCode, "response status code") + + if tc.expectStatus == 200 { + require.Equal(t, tc.expectPath, actualPath, "rewritten path") + } else if tc.expectStatus == 302 { + require.Equal(t, tc.expectPath, resp.Header.Get("Location"), "redirect location") + } + }) + } +} diff --git a/core/http/render.go b/core/http/render.go index 205f7ca3..2f889f57 100644 --- a/core/http/render.go +++ b/core/http/render.go @@ -10,6 +10,7 @@ import ( "github.com/gofiber/fiber/v2" fiberhtml "github.com/gofiber/template/html/v2" "github.com/microcosm-cc/bluemonday" + "github.com/mudler/LocalAI/core/http/utils" "github.com/mudler/LocalAI/core/schema" "github.com/russross/blackfriday" ) @@ -26,7 +27,9 @@ func notFoundHandler(c *fiber.Ctx) error { }) } else { // The client expects an HTML response - return c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{}) + return c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{ + "BaseURL": utils.BaseURL(c), + }) } } diff --git a/core/http/routes/health.go b/core/http/routes/health.go new file mode 100644 index 00000000..f5a08e9b --- /dev/null +++ b/core/http/routes/health.go @@ -0,0 +1,13 @@ +package routes + +import "github.com/gofiber/fiber/v2" + +func HealthRoutes(app *fiber.App) { + // Service health checks + ok := func(c *fiber.Ctx) error { + return c.SendStatus(200) + } + + app.Get("/healthz", ok) + app.Get("/readyz", ok) +} diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index 29fef378..2ea9896a 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -11,64 +11,62 @@ import ( "github.com/mudler/LocalAI/pkg/model" ) -func RegisterLocalAIRoutes(app *fiber.App, +func RegisterLocalAIRoutes(router *fiber.App, cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService) { - app.Get("/swagger/*", swagger.HandlerDefault) // default + router.Get("/swagger/*", swagger.HandlerDefault) // default // LocalAI API endpoints if !appConfig.DisableGalleryEndpoint { modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) - app.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint()) - app.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint()) + router.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint()) + router.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint()) - app.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint()) - app.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint()) - app.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint()) - app.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint()) - app.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint()) - app.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint()) + router.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint()) + router.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint()) + router.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint()) + router.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint()) + router.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint()) + router.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint()) } - app.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig)) + router.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig)) + router.Post("/vad", localai.VADEndpoint(cl, ml, appConfig)) // Stores sl := model.NewModelLoader("") - app.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig)) - app.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig)) - app.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig)) - app.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig)) + router.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig)) + router.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig)) + router.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig)) + router.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig)) - // Kubernetes health checks - ok := func(c *fiber.Ctx) error { - return c.SendStatus(200) + if !appConfig.DisableMetrics { + router.Get("/metrics", localai.LocalAIMetricsEndpoint()) } - app.Get("/healthz", ok) - app.Get("/readyz", ok) - - app.Get("/metrics", localai.LocalAIMetricsEndpoint()) - // Experimental Backend Statistics Module backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now - app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService)) - app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService)) + router.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService)) + router.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService)) // p2p if p2p.IsP2PEnabled() { - app.Get("/api/p2p", localai.ShowP2PNodes(appConfig)) - app.Get("/api/p2p/token", localai.ShowP2PToken(appConfig)) + router.Get("/api/p2p", localai.ShowP2PNodes(appConfig)) + router.Get("/api/p2p/token", localai.ShowP2PToken(appConfig)) } - app.Get("/version", func(c *fiber.Ctx) error { + router.Get("/version", func(c *fiber.Ctx) error { return c.JSON(struct { Version string `json:"version"` }{Version: internal.PrintableVersion()}) }) - app.Get("/system", auth, localai.SystemInformations(ml, appConfig)) + router.Get("/system", localai.SystemInformations(ml, appConfig)) + + // misc + router.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig)) } diff --git a/core/http/routes/openai.go b/core/http/routes/openai.go index 081daf70..a48ced65 100644 --- a/core/http/routes/openai.go +++ b/core/http/routes/openai.go @@ -2,84 +2,134 @@ package routes import ( "github.com/gofiber/fiber/v2" - "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/application" "github.com/mudler/LocalAI/core/http/endpoints/localai" "github.com/mudler/LocalAI/core/http/endpoints/openai" - "github.com/mudler/LocalAI/pkg/model" ) func RegisterOpenAIRoutes(app *fiber.App, - cl *config.BackendConfigLoader, - ml *model.ModelLoader, - appConfig *config.ApplicationConfig) { + application *application.Application) { // openAI compatible API endpoint // chat - app.Post("/v1/chat/completions", openai.ChatEndpoint(cl, ml, appConfig)) - app.Post("/chat/completions", openai.ChatEndpoint(cl, ml, appConfig)) + app.Post("/v1/chat/completions", + openai.ChatEndpoint( + application.BackendLoader(), + application.ModelLoader(), + application.TemplatesEvaluator(), + application.ApplicationConfig(), + ), + ) + + app.Post("/chat/completions", + openai.ChatEndpoint( + application.BackendLoader(), + application.ModelLoader(), + application.TemplatesEvaluator(), + application.ApplicationConfig(), + ), + ) // edit - app.Post("/v1/edits", openai.EditEndpoint(cl, ml, appConfig)) - app.Post("/edits", openai.EditEndpoint(cl, ml, appConfig)) + app.Post("/v1/edits", + openai.EditEndpoint( + application.BackendLoader(), + application.ModelLoader(), + application.TemplatesEvaluator(), + application.ApplicationConfig(), + ), + ) + + app.Post("/edits", + openai.EditEndpoint( + application.BackendLoader(), + application.ModelLoader(), + application.TemplatesEvaluator(), + application.ApplicationConfig(), + ), + ) // assistant - app.Get("/v1/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig)) - app.Get("/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig)) - app.Post("/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig)) - app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig)) - app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig)) - app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) - app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) - app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) - app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) - app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) - app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig)) - app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig)) + app.Get("/v1/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Get("/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Post("/v1/assistants", openai.CreateAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Post("/assistants", openai.CreateAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) // files - app.Post("/v1/files", openai.UploadFilesEndpoint(cl, appConfig)) - app.Post("/files", openai.UploadFilesEndpoint(cl, appConfig)) - app.Get("/v1/files", openai.ListFilesEndpoint(cl, appConfig)) - app.Get("/files", openai.ListFilesEndpoint(cl, appConfig)) - app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(cl, appConfig)) - app.Get("/files/:file_id", openai.GetFilesEndpoint(cl, appConfig)) - app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig)) - app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig)) - app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig)) - app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig)) + app.Post("/v1/files", openai.UploadFilesEndpoint(application.BackendLoader(), application.ApplicationConfig())) + app.Post("/files", openai.UploadFilesEndpoint(application.BackendLoader(), application.ApplicationConfig())) + app.Get("/v1/files", openai.ListFilesEndpoint(application.BackendLoader(), application.ApplicationConfig())) + app.Get("/files", openai.ListFilesEndpoint(application.BackendLoader(), application.ApplicationConfig())) + app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(application.BackendLoader(), application.ApplicationConfig())) + app.Get("/files/:file_id", openai.GetFilesEndpoint(application.BackendLoader(), application.ApplicationConfig())) + app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(application.BackendLoader(), application.ApplicationConfig())) + app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(application.BackendLoader(), application.ApplicationConfig())) + app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig())) + app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig())) // completion - app.Post("/v1/completions", openai.CompletionEndpoint(cl, ml, appConfig)) - app.Post("/completions", openai.CompletionEndpoint(cl, ml, appConfig)) - app.Post("/v1/engines/:model/completions", openai.CompletionEndpoint(cl, ml, appConfig)) + app.Post("/v1/completions", + openai.CompletionEndpoint( + application.BackendLoader(), + application.ModelLoader(), + application.TemplatesEvaluator(), + application.ApplicationConfig(), + ), + ) + + app.Post("/completions", + openai.CompletionEndpoint( + application.BackendLoader(), + application.ModelLoader(), + application.TemplatesEvaluator(), + application.ApplicationConfig(), + ), + ) + + app.Post("/v1/engines/:model/completions", + openai.CompletionEndpoint( + application.BackendLoader(), + application.ModelLoader(), + application.TemplatesEvaluator(), + application.ApplicationConfig(), + ), + ) // embeddings - app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig)) - app.Post("/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig)) - app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig)) + app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Post("/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) // audio - app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(cl, ml, appConfig)) - app.Post("/v1/audio/speech", localai.TTSEndpoint(cl, ml, appConfig)) + app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Post("/v1/audio/speech", localai.TTSEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) // images - app.Post("/v1/images/generations", openai.ImageEndpoint(cl, ml, appConfig)) + app.Post("/v1/images/generations", openai.ImageEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) - if appConfig.ImageDir != "" { - app.Static("/generated-images", appConfig.ImageDir) + if application.ApplicationConfig().ImageDir != "" { + app.Static("/generated-images", application.ApplicationConfig().ImageDir) } - if appConfig.AudioDir != "" { - app.Static("/generated-audio", appConfig.AudioDir) + if application.ApplicationConfig().AudioDir != "" { + app.Static("/generated-audio", application.ApplicationConfig().AudioDir) } // List models - app.Get("/v1/models", openai.ListModelsEndpoint(cl, ml)) - app.Get("/models", openai.ListModelsEndpoint(cl, ml)) + app.Get("/v1/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) + app.Get("/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())) } diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index 7b2c6ae7..92d20544 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -10,15 +10,17 @@ import ( "github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/http/elements" "github.com/mudler/LocalAI/core/http/endpoints/localai" + "github.com/mudler/LocalAI/core/http/utils" "github.com/mudler/LocalAI/core/p2p" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/internal" "github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/xsync" - "github.com/rs/zerolog/log" "github.com/gofiber/fiber/v2" "github.com/google/uuid" + "github.com/microcosm-cc/bluemonday" + "github.com/rs/zerolog/log" ) type modelOpCache struct { @@ -90,6 +92,7 @@ func RegisterUIRoutes(app *fiber.App, app.Get("/p2p", func(c *fiber.Ctx) error { summary := fiber.Map{ "Title": "LocalAI - P2P dashboard", + "BaseURL": utils.BaseURL(c), "Version": internal.PrintableVersion(), //"Nodes": p2p.GetAvailableNodes(""), //"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID), @@ -148,6 +151,7 @@ func RegisterUIRoutes(app *fiber.App, summary := fiber.Map{ "Title": "LocalAI - Models", + "BaseURL": utils.BaseURL(c), "Version": internal.PrintableVersion(), "Models": template.HTML(elements.ListModels(models, processingModels, galleryService)), "Repositories": appConfig.Galleries, @@ -171,7 +175,7 @@ func RegisterUIRoutes(app *fiber.App, Search string `form:"search"` }{} if err := c.BodyParser(&form); err != nil { - return c.Status(fiber.StatusBadRequest).SendString(err.Error()) + return c.Status(fiber.StatusBadRequest).SendString(bluemonday.StrictPolicy().Sanitize(err.Error())) } models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) @@ -303,10 +307,11 @@ func RegisterUIRoutes(app *fiber.App, // Show the Chat page app.Get("/chat/:model", func(c *fiber.Ctx) error { - backendConfigs, _ := services.ListModels(cl, ml, "", true) + backendConfigs, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED) summary := fiber.Map{ "Title": "LocalAI - Chat with " + c.Params("model"), + "BaseURL": utils.BaseURL(c), "ModelsConfig": backendConfigs, "Model": c.Params("model"), "Version": internal.PrintableVersion(), @@ -318,15 +323,16 @@ func RegisterUIRoutes(app *fiber.App, }) app.Get("/talk/", func(c *fiber.Ctx) error { - backendConfigs, _ := services.ListModels(cl, ml, "", true) + backendConfigs, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED) if len(backendConfigs) == 0 { // If no model is available redirect to the index which suggests how to install models - return c.Redirect("/") + return c.Redirect(utils.BaseURL(c)) } summary := fiber.Map{ "Title": "LocalAI - Talk", + "BaseURL": utils.BaseURL(c), "ModelsConfig": backendConfigs, "Model": backendConfigs[0], "IsP2PEnabled": p2p.IsP2PEnabled(), @@ -339,15 +345,16 @@ func RegisterUIRoutes(app *fiber.App, app.Get("/chat/", func(c *fiber.Ctx) error { - backendConfigs, _ := services.ListModels(cl, ml, "", true) + backendConfigs, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED) if len(backendConfigs) == 0 { // If no model is available redirect to the index which suggests how to install models - return c.Redirect("/") + return c.Redirect(utils.BaseURL(c)) } summary := fiber.Map{ "Title": "LocalAI - Chat with " + backendConfigs[0], + "BaseURL": utils.BaseURL(c), "ModelsConfig": backendConfigs, "Model": backendConfigs[0], "Version": internal.PrintableVersion(), @@ -363,6 +370,7 @@ func RegisterUIRoutes(app *fiber.App, summary := fiber.Map{ "Title": "LocalAI - Generate images with " + c.Params("model"), + "BaseURL": utils.BaseURL(c), "ModelsConfig": backendConfigs, "Model": c.Params("model"), "Version": internal.PrintableVersion(), @@ -379,11 +387,12 @@ func RegisterUIRoutes(app *fiber.App, if len(backendConfigs) == 0 { // If no model is available redirect to the index which suggests how to install models - return c.Redirect("/") + return c.Redirect(utils.BaseURL(c)) } summary := fiber.Map{ "Title": "LocalAI - Generate images with " + backendConfigs[0].Name, + "BaseURL": utils.BaseURL(c), "ModelsConfig": backendConfigs, "Model": backendConfigs[0].Name, "Version": internal.PrintableVersion(), @@ -399,6 +408,7 @@ func RegisterUIRoutes(app *fiber.App, summary := fiber.Map{ "Title": "LocalAI - Generate images with " + c.Params("model"), + "BaseURL": utils.BaseURL(c), "ModelsConfig": backendConfigs, "Model": c.Params("model"), "Version": internal.PrintableVersion(), @@ -415,11 +425,12 @@ func RegisterUIRoutes(app *fiber.App, if len(backendConfigs) == 0 { // If no model is available redirect to the index which suggests how to install models - return c.Redirect("/") + return c.Redirect(utils.BaseURL(c)) } summary := fiber.Map{ "Title": "LocalAI - Generate audio with " + backendConfigs[0].Name, + "BaseURL": utils.BaseURL(c), "ModelsConfig": backendConfigs, "Model": backendConfigs[0].Name, "IsP2PEnabled": p2p.IsP2PEnabled(), diff --git a/core/http/static/assets/flowbite.min.js b/core/http/static/assets/flowbite.min.js new file mode 100644 index 00000000..e2c52c2c --- /dev/null +++ b/core/http/static/assets/flowbite.min.js @@ -0,0 +1,2 @@ +!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define("Flowbite",[],e):"object"==typeof exports?exports.Flowbite=e():t.Flowbite=e()}(self,(function(){return function(){"use strict";var t={647:function(t,e,i){i.r(e)},853:function(t,e,i){i.r(e),i.d(e,{afterMain:function(){return w},afterRead:function(){return y},afterWrite:function(){return O},applyStyles:function(){return P},arrow:function(){return Q},auto:function(){return a},basePlacements:function(){return c},beforeMain:function(){return b},beforeRead:function(){return _},beforeWrite:function(){return L},bottom:function(){return o},clippingParents:function(){return u},computeStyles:function(){return it},createPopper:function(){return Pt},createPopperBase:function(){return Ht},createPopperLite:function(){return St},detectOverflow:function(){return mt},end:function(){return l},eventListeners:function(){return ot},flip:function(){return yt},hide:function(){return wt},left:function(){return s},main:function(){return E},modifierPhases:function(){return k},offset:function(){return Lt},placements:function(){return g},popper:function(){return h},popperGenerator:function(){return Tt},popperOffsets:function(){return It},preventOverflow:function(){return Ot},read:function(){return m},reference:function(){return f},right:function(){return r},start:function(){return d},top:function(){return n},variationPlacements:function(){return v},viewport:function(){return p},write:function(){return I}});var n="top",o="bottom",r="right",s="left",a="auto",c=[n,o,r,s],d="start",l="end",u="clippingParents",p="viewport",h="popper",f="reference",v=c.reduce((function(t,e){return t.concat([e+"-"+d,e+"-"+l])}),[]),g=[].concat(c,[a]).reduce((function(t,e){return t.concat([e,e+"-"+d,e+"-"+l])}),[]),_="beforeRead",m="read",y="afterRead",b="beforeMain",E="main",w="afterMain",L="beforeWrite",I="write",O="afterWrite",k=[_,m,y,b,E,w,L,I,O];function x(t){return t?(t.nodeName||"").toLowerCase():null}function A(t){if(null==t)return window;if("[object Window]"!==t.toString()){var e=t.ownerDocument;return e&&e.defaultView||window}return t}function C(t){return t instanceof A(t).Element||t instanceof Element}function T(t){return t instanceof A(t).HTMLElement||t instanceof HTMLElement}function H(t){return"undefined"!=typeof ShadowRoot&&(t instanceof A(t).ShadowRoot||t instanceof ShadowRoot)}var P={name:"applyStyles",enabled:!0,phase:"write",fn:function(t){var e=t.state;Object.keys(e.elements).forEach((function(t){var i=e.styles[t]||{},n=e.attributes[t]||{},o=e.elements[t];T(o)&&x(o)&&(Object.assign(o.style,i),Object.keys(n).forEach((function(t){var e=n[t];!1===e?o.removeAttribute(t):o.setAttribute(t,!0===e?"":e)})))}))},effect:function(t){var e=t.state,i={popper:{position:e.options.strategy,left:"0",top:"0",margin:"0"},arrow:{position:"absolute"},reference:{}};return Object.assign(e.elements.popper.style,i.popper),e.styles=i,e.elements.arrow&&Object.assign(e.elements.arrow.style,i.arrow),function(){Object.keys(e.elements).forEach((function(t){var n=e.elements[t],o=e.attributes[t]||{},r=Object.keys(e.styles.hasOwnProperty(t)?e.styles[t]:i[t]).reduce((function(t,e){return t[e]="",t}),{});T(n)&&x(n)&&(Object.assign(n.style,r),Object.keys(o).forEach((function(t){n.removeAttribute(t)})))}))}},requires:["computeStyles"]};function S(t){return t.split("-")[0]}var j=Math.max,D=Math.min,z=Math.round;function M(){var t=navigator.userAgentData;return null!=t&&t.brands?t.brands.map((function(t){return t.brand+"/"+t.version})).join(" "):navigator.userAgent}function q(){return!/^((?!chrome|android).)*safari/i.test(M())}function V(t,e,i){void 0===e&&(e=!1),void 0===i&&(i=!1);var n=t.getBoundingClientRect(),o=1,r=1;e&&T(t)&&(o=t.offsetWidth>0&&z(n.width)/t.offsetWidth||1,r=t.offsetHeight>0&&z(n.height)/t.offsetHeight||1);var s=(C(t)?A(t):window).visualViewport,a=!q()&&i,c=(n.left+(a&&s?s.offsetLeft:0))/o,d=(n.top+(a&&s?s.offsetTop:0))/r,l=n.width/o,u=n.height/r;return{width:l,height:u,top:d,right:c+l,bottom:d+u,left:c,x:c,y:d}}function B(t){var e=V(t),i=t.offsetWidth,n=t.offsetHeight;return Math.abs(e.width-i)<=1&&(i=e.width),Math.abs(e.height-n)<=1&&(n=e.height),{x:t.offsetLeft,y:t.offsetTop,width:i,height:n}}function R(t,e){var i=e.getRootNode&&e.getRootNode();if(t.contains(e))return!0;if(i&&H(i)){var n=e;do{if(n&&t.isSameNode(n))return!0;n=n.parentNode||n.host}while(n)}return!1}function W(t){return A(t).getComputedStyle(t)}function F(t){return["table","td","th"].indexOf(x(t))>=0}function K(t){return((C(t)?t.ownerDocument:t.document)||window.document).documentElement}function N(t){return"html"===x(t)?t:t.assignedSlot||t.parentNode||(H(t)?t.host:null)||K(t)}function U(t){return T(t)&&"fixed"!==W(t).position?t.offsetParent:null}function X(t){for(var e=A(t),i=U(t);i&&F(i)&&"static"===W(i).position;)i=U(i);return i&&("html"===x(i)||"body"===x(i)&&"static"===W(i).position)?e:i||function(t){var e=/firefox/i.test(M());if(/Trident/i.test(M())&&T(t)&&"fixed"===W(t).position)return null;var i=N(t);for(H(i)&&(i=i.host);T(i)&&["html","body"].indexOf(x(i))<0;){var n=W(i);if("none"!==n.transform||"none"!==n.perspective||"paint"===n.contain||-1!==["transform","perspective"].indexOf(n.willChange)||e&&"filter"===n.willChange||e&&n.filter&&"none"!==n.filter)return i;i=i.parentNode}return null}(t)||e}function Y(t){return["top","bottom"].indexOf(t)>=0?"x":"y"}function G(t,e,i){return j(t,D(e,i))}function $(t){return Object.assign({},{top:0,right:0,bottom:0,left:0},t)}function J(t,e){return e.reduce((function(e,i){return e[i]=t,e}),{})}var Q={name:"arrow",enabled:!0,phase:"main",fn:function(t){var e,i=t.state,a=t.name,d=t.options,l=i.elements.arrow,u=i.modifiersData.popperOffsets,p=S(i.placement),h=Y(p),f=[s,r].indexOf(p)>=0?"height":"width";if(l&&u){var v=function(t,e){return $("number"!=typeof(t="function"==typeof t?t(Object.assign({},e.rects,{placement:e.placement})):t)?t:J(t,c))}(d.padding,i),g=B(l),_="y"===h?n:s,m="y"===h?o:r,y=i.rects.reference[f]+i.rects.reference[h]-u[h]-i.rects.popper[f],b=u[h]-i.rects.reference[h],E=X(l),w=E?"y"===h?E.clientHeight||0:E.clientWidth||0:0,L=y/2-b/2,I=v[_],O=w-g[f]-v[m],k=w/2-g[f]/2+L,x=G(I,k,O),A=h;i.modifiersData[a]=((e={})[A]=x,e.centerOffset=x-k,e)}},effect:function(t){var e=t.state,i=t.options.element,n=void 0===i?"[data-popper-arrow]":i;null!=n&&("string"!=typeof n||(n=e.elements.popper.querySelector(n)))&&R(e.elements.popper,n)&&(e.elements.arrow=n)},requires:["popperOffsets"],requiresIfExists:["preventOverflow"]};function Z(t){return t.split("-")[1]}var tt={top:"auto",right:"auto",bottom:"auto",left:"auto"};function et(t){var e,i=t.popper,a=t.popperRect,c=t.placement,d=t.variation,u=t.offsets,p=t.position,h=t.gpuAcceleration,f=t.adaptive,v=t.roundOffsets,g=t.isFixed,_=u.x,m=void 0===_?0:_,y=u.y,b=void 0===y?0:y,E="function"==typeof v?v({x:m,y:b}):{x:m,y:b};m=E.x,b=E.y;var w=u.hasOwnProperty("x"),L=u.hasOwnProperty("y"),I=s,O=n,k=window;if(f){var x=X(i),C="clientHeight",T="clientWidth";if(x===A(i)&&"static"!==W(x=K(i)).position&&"absolute"===p&&(C="scrollHeight",T="scrollWidth"),c===n||(c===s||c===r)&&d===l)O=o,b-=(g&&x===k&&k.visualViewport?k.visualViewport.height:x[C])-a.height,b*=h?1:-1;if(c===s||(c===n||c===o)&&d===l)I=r,m-=(g&&x===k&&k.visualViewport?k.visualViewport.width:x[T])-a.width,m*=h?1:-1}var H,P=Object.assign({position:p},f&&tt),S=!0===v?function(t){var e=t.x,i=t.y,n=window.devicePixelRatio||1;return{x:z(e*n)/n||0,y:z(i*n)/n||0}}({x:m,y:b}):{x:m,y:b};return m=S.x,b=S.y,h?Object.assign({},P,((H={})[O]=L?"0":"",H[I]=w?"0":"",H.transform=(k.devicePixelRatio||1)<=1?"translate("+m+"px, "+b+"px)":"translate3d("+m+"px, "+b+"px, 0)",H)):Object.assign({},P,((e={})[O]=L?b+"px":"",e[I]=w?m+"px":"",e.transform="",e))}var it={name:"computeStyles",enabled:!0,phase:"beforeWrite",fn:function(t){var e=t.state,i=t.options,n=i.gpuAcceleration,o=void 0===n||n,r=i.adaptive,s=void 0===r||r,a=i.roundOffsets,c=void 0===a||a,d={placement:S(e.placement),variation:Z(e.placement),popper:e.elements.popper,popperRect:e.rects.popper,gpuAcceleration:o,isFixed:"fixed"===e.options.strategy};null!=e.modifiersData.popperOffsets&&(e.styles.popper=Object.assign({},e.styles.popper,et(Object.assign({},d,{offsets:e.modifiersData.popperOffsets,position:e.options.strategy,adaptive:s,roundOffsets:c})))),null!=e.modifiersData.arrow&&(e.styles.arrow=Object.assign({},e.styles.arrow,et(Object.assign({},d,{offsets:e.modifiersData.arrow,position:"absolute",adaptive:!1,roundOffsets:c})))),e.attributes.popper=Object.assign({},e.attributes.popper,{"data-popper-placement":e.placement})},data:{}},nt={passive:!0};var ot={name:"eventListeners",enabled:!0,phase:"write",fn:function(){},effect:function(t){var e=t.state,i=t.instance,n=t.options,o=n.scroll,r=void 0===o||o,s=n.resize,a=void 0===s||s,c=A(e.elements.popper),d=[].concat(e.scrollParents.reference,e.scrollParents.popper);return r&&d.forEach((function(t){t.addEventListener("scroll",i.update,nt)})),a&&c.addEventListener("resize",i.update,nt),function(){r&&d.forEach((function(t){t.removeEventListener("scroll",i.update,nt)})),a&&c.removeEventListener("resize",i.update,nt)}},data:{}},rt={left:"right",right:"left",bottom:"top",top:"bottom"};function st(t){return t.replace(/left|right|bottom|top/g,(function(t){return rt[t]}))}var at={start:"end",end:"start"};function ct(t){return t.replace(/start|end/g,(function(t){return at[t]}))}function dt(t){var e=A(t);return{scrollLeft:e.pageXOffset,scrollTop:e.pageYOffset}}function lt(t){return V(K(t)).left+dt(t).scrollLeft}function ut(t){var e=W(t),i=e.overflow,n=e.overflowX,o=e.overflowY;return/auto|scroll|overlay|hidden/.test(i+o+n)}function pt(t){return["html","body","#document"].indexOf(x(t))>=0?t.ownerDocument.body:T(t)&&ut(t)?t:pt(N(t))}function ht(t,e){var i;void 0===e&&(e=[]);var n=pt(t),o=n===(null==(i=t.ownerDocument)?void 0:i.body),r=A(n),s=o?[r].concat(r.visualViewport||[],ut(n)?n:[]):n,a=e.concat(s);return o?a:a.concat(ht(N(s)))}function ft(t){return Object.assign({},t,{left:t.x,top:t.y,right:t.x+t.width,bottom:t.y+t.height})}function vt(t,e,i){return e===p?ft(function(t,e){var i=A(t),n=K(t),o=i.visualViewport,r=n.clientWidth,s=n.clientHeight,a=0,c=0;if(o){r=o.width,s=o.height;var d=q();(d||!d&&"fixed"===e)&&(a=o.offsetLeft,c=o.offsetTop)}return{width:r,height:s,x:a+lt(t),y:c}}(t,i)):C(e)?function(t,e){var i=V(t,!1,"fixed"===e);return i.top=i.top+t.clientTop,i.left=i.left+t.clientLeft,i.bottom=i.top+t.clientHeight,i.right=i.left+t.clientWidth,i.width=t.clientWidth,i.height=t.clientHeight,i.x=i.left,i.y=i.top,i}(e,i):ft(function(t){var e,i=K(t),n=dt(t),o=null==(e=t.ownerDocument)?void 0:e.body,r=j(i.scrollWidth,i.clientWidth,o?o.scrollWidth:0,o?o.clientWidth:0),s=j(i.scrollHeight,i.clientHeight,o?o.scrollHeight:0,o?o.clientHeight:0),a=-n.scrollLeft+lt(t),c=-n.scrollTop;return"rtl"===W(o||i).direction&&(a+=j(i.clientWidth,o?o.clientWidth:0)-r),{width:r,height:s,x:a,y:c}}(K(t)))}function gt(t,e,i,n){var o="clippingParents"===e?function(t){var e=ht(N(t)),i=["absolute","fixed"].indexOf(W(t).position)>=0&&T(t)?X(t):t;return C(i)?e.filter((function(t){return C(t)&&R(t,i)&&"body"!==x(t)})):[]}(t):[].concat(e),r=[].concat(o,[i]),s=r[0],a=r.reduce((function(e,i){var o=vt(t,i,n);return e.top=j(o.top,e.top),e.right=D(o.right,e.right),e.bottom=D(o.bottom,e.bottom),e.left=j(o.left,e.left),e}),vt(t,s,n));return a.width=a.right-a.left,a.height=a.bottom-a.top,a.x=a.left,a.y=a.top,a}function _t(t){var e,i=t.reference,a=t.element,c=t.placement,u=c?S(c):null,p=c?Z(c):null,h=i.x+i.width/2-a.width/2,f=i.y+i.height/2-a.height/2;switch(u){case n:e={x:h,y:i.y-a.height};break;case o:e={x:h,y:i.y+i.height};break;case r:e={x:i.x+i.width,y:f};break;case s:e={x:i.x-a.width,y:f};break;default:e={x:i.x,y:i.y}}var v=u?Y(u):null;if(null!=v){var g="y"===v?"height":"width";switch(p){case d:e[v]=e[v]-(i[g]/2-a[g]/2);break;case l:e[v]=e[v]+(i[g]/2-a[g]/2)}}return e}function mt(t,e){void 0===e&&(e={});var i=e,s=i.placement,a=void 0===s?t.placement:s,d=i.strategy,l=void 0===d?t.strategy:d,v=i.boundary,g=void 0===v?u:v,_=i.rootBoundary,m=void 0===_?p:_,y=i.elementContext,b=void 0===y?h:y,E=i.altBoundary,w=void 0!==E&&E,L=i.padding,I=void 0===L?0:L,O=$("number"!=typeof I?I:J(I,c)),k=b===h?f:h,x=t.rects.popper,A=t.elements[w?k:b],T=gt(C(A)?A:A.contextElement||K(t.elements.popper),g,m,l),H=V(t.elements.reference),P=_t({reference:H,element:x,strategy:"absolute",placement:a}),S=ft(Object.assign({},x,P)),j=b===h?S:H,D={top:T.top-j.top+O.top,bottom:j.bottom-T.bottom+O.bottom,left:T.left-j.left+O.left,right:j.right-T.right+O.right},z=t.modifiersData.offset;if(b===h&&z){var M=z[a];Object.keys(D).forEach((function(t){var e=[r,o].indexOf(t)>=0?1:-1,i=[n,o].indexOf(t)>=0?"y":"x";D[t]+=M[i]*e}))}return D}var yt={name:"flip",enabled:!0,phase:"main",fn:function(t){var e=t.state,i=t.options,l=t.name;if(!e.modifiersData[l]._skip){for(var u=i.mainAxis,p=void 0===u||u,h=i.altAxis,f=void 0===h||h,_=i.fallbackPlacements,m=i.padding,y=i.boundary,b=i.rootBoundary,E=i.altBoundary,w=i.flipVariations,L=void 0===w||w,I=i.allowedAutoPlacements,O=e.options.placement,k=S(O),x=_||(k===O||!L?[st(O)]:function(t){if(S(t)===a)return[];var e=st(t);return[ct(t),e,ct(e)]}(O)),A=[O].concat(x).reduce((function(t,i){return t.concat(S(i)===a?function(t,e){void 0===e&&(e={});var i=e,n=i.placement,o=i.boundary,r=i.rootBoundary,s=i.padding,a=i.flipVariations,d=i.allowedAutoPlacements,l=void 0===d?g:d,u=Z(n),p=u?a?v:v.filter((function(t){return Z(t)===u})):c,h=p.filter((function(t){return l.indexOf(t)>=0}));0===h.length&&(h=p);var f=h.reduce((function(e,i){return e[i]=mt(t,{placement:i,boundary:o,rootBoundary:r,padding:s})[S(i)],e}),{});return Object.keys(f).sort((function(t,e){return f[t]-f[e]}))}(e,{placement:i,boundary:y,rootBoundary:b,padding:m,flipVariations:L,allowedAutoPlacements:I}):i)}),[]),C=e.rects.reference,T=e.rects.popper,H=new Map,P=!0,j=A[0],D=0;D=0,B=V?"width":"height",R=mt(e,{placement:z,boundary:y,rootBoundary:b,altBoundary:E,padding:m}),W=V?q?r:s:q?o:n;C[B]>T[B]&&(W=st(W));var F=st(W),K=[];if(p&&K.push(R[M]<=0),f&&K.push(R[W]<=0,R[F]<=0),K.every((function(t){return t}))){j=z,P=!1;break}H.set(z,K)}if(P)for(var N=function(t){var e=A.find((function(e){var i=H.get(e);if(i)return i.slice(0,t).every((function(t){return t}))}));if(e)return j=e,"break"},U=L?3:1;U>0;U--){if("break"===N(U))break}e.placement!==j&&(e.modifiersData[l]._skip=!0,e.placement=j,e.reset=!0)}},requiresIfExists:["offset"],data:{_skip:!1}};function bt(t,e,i){return void 0===i&&(i={x:0,y:0}),{top:t.top-e.height-i.y,right:t.right-e.width+i.x,bottom:t.bottom-e.height+i.y,left:t.left-e.width-i.x}}function Et(t){return[n,r,o,s].some((function(e){return t[e]>=0}))}var wt={name:"hide",enabled:!0,phase:"main",requiresIfExists:["preventOverflow"],fn:function(t){var e=t.state,i=t.name,n=e.rects.reference,o=e.rects.popper,r=e.modifiersData.preventOverflow,s=mt(e,{elementContext:"reference"}),a=mt(e,{altBoundary:!0}),c=bt(s,n),d=bt(a,o,r),l=Et(c),u=Et(d);e.modifiersData[i]={referenceClippingOffsets:c,popperEscapeOffsets:d,isReferenceHidden:l,hasPopperEscaped:u},e.attributes.popper=Object.assign({},e.attributes.popper,{"data-popper-reference-hidden":l,"data-popper-escaped":u})}};var Lt={name:"offset",enabled:!0,phase:"main",requires:["popperOffsets"],fn:function(t){var e=t.state,i=t.options,o=t.name,a=i.offset,c=void 0===a?[0,0]:a,d=g.reduce((function(t,i){return t[i]=function(t,e,i){var o=S(t),a=[s,n].indexOf(o)>=0?-1:1,c="function"==typeof i?i(Object.assign({},e,{placement:t})):i,d=c[0],l=c[1];return d=d||0,l=(l||0)*a,[s,r].indexOf(o)>=0?{x:l,y:d}:{x:d,y:l}}(i,e.rects,c),t}),{}),l=d[e.placement],u=l.x,p=l.y;null!=e.modifiersData.popperOffsets&&(e.modifiersData.popperOffsets.x+=u,e.modifiersData.popperOffsets.y+=p),e.modifiersData[o]=d}};var It={name:"popperOffsets",enabled:!0,phase:"read",fn:function(t){var e=t.state,i=t.name;e.modifiersData[i]=_t({reference:e.rects.reference,element:e.rects.popper,strategy:"absolute",placement:e.placement})},data:{}};var Ot={name:"preventOverflow",enabled:!0,phase:"main",fn:function(t){var e=t.state,i=t.options,a=t.name,c=i.mainAxis,l=void 0===c||c,u=i.altAxis,p=void 0!==u&&u,h=i.boundary,f=i.rootBoundary,v=i.altBoundary,g=i.padding,_=i.tether,m=void 0===_||_,y=i.tetherOffset,b=void 0===y?0:y,E=mt(e,{boundary:h,rootBoundary:f,padding:g,altBoundary:v}),w=S(e.placement),L=Z(e.placement),I=!L,O=Y(w),k="x"===O?"y":"x",x=e.modifiersData.popperOffsets,A=e.rects.reference,C=e.rects.popper,T="function"==typeof b?b(Object.assign({},e.rects,{placement:e.placement})):b,H="number"==typeof T?{mainAxis:T,altAxis:T}:Object.assign({mainAxis:0,altAxis:0},T),P=e.modifiersData.offset?e.modifiersData.offset[e.placement]:null,z={x:0,y:0};if(x){if(l){var M,q="y"===O?n:s,V="y"===O?o:r,R="y"===O?"height":"width",W=x[O],F=W+E[q],K=W-E[V],N=m?-C[R]/2:0,U=L===d?A[R]:C[R],$=L===d?-C[R]:-A[R],J=e.elements.arrow,Q=m&&J?B(J):{width:0,height:0},tt=e.modifiersData["arrow#persistent"]?e.modifiersData["arrow#persistent"].padding:{top:0,right:0,bottom:0,left:0},et=tt[q],it=tt[V],nt=G(0,A[R],Q[R]),ot=I?A[R]/2-N-nt-et-H.mainAxis:U-nt-et-H.mainAxis,rt=I?-A[R]/2+N+nt+it+H.mainAxis:$+nt+it+H.mainAxis,st=e.elements.arrow&&X(e.elements.arrow),at=st?"y"===O?st.clientTop||0:st.clientLeft||0:0,ct=null!=(M=null==P?void 0:P[O])?M:0,dt=W+rt-ct,lt=G(m?D(F,W+ot-ct-at):F,W,m?j(K,dt):K);x[O]=lt,z[O]=lt-W}if(p){var ut,pt="x"===O?n:s,ht="x"===O?o:r,ft=x[k],vt="y"===k?"height":"width",gt=ft+E[pt],_t=ft-E[ht],yt=-1!==[n,s].indexOf(w),bt=null!=(ut=null==P?void 0:P[k])?ut:0,Et=yt?gt:ft-A[vt]-C[vt]-bt+H.altAxis,wt=yt?ft+A[vt]+C[vt]-bt-H.altAxis:_t,Lt=m&&yt?function(t,e,i){var n=G(t,e,i);return n>i?i:n}(Et,ft,wt):G(m?Et:gt,ft,m?wt:_t);x[k]=Lt,z[k]=Lt-ft}e.modifiersData[a]=z}},requiresIfExists:["offset"]};function kt(t,e,i){void 0===i&&(i=!1);var n,o,r=T(e),s=T(e)&&function(t){var e=t.getBoundingClientRect(),i=z(e.width)/t.offsetWidth||1,n=z(e.height)/t.offsetHeight||1;return 1!==i||1!==n}(e),a=K(e),c=V(t,s,i),d={scrollLeft:0,scrollTop:0},l={x:0,y:0};return(r||!r&&!i)&&(("body"!==x(e)||ut(a))&&(d=(n=e)!==A(n)&&T(n)?{scrollLeft:(o=n).scrollLeft,scrollTop:o.scrollTop}:dt(n)),T(e)?((l=V(e,!0)).x+=e.clientLeft,l.y+=e.clientTop):a&&(l.x=lt(a))),{x:c.left+d.scrollLeft-l.x,y:c.top+d.scrollTop-l.y,width:c.width,height:c.height}}function xt(t){var e=new Map,i=new Set,n=[];function o(t){i.add(t.name),[].concat(t.requires||[],t.requiresIfExists||[]).forEach((function(t){if(!i.has(t)){var n=e.get(t);n&&o(n)}})),n.push(t)}return t.forEach((function(t){e.set(t.name,t)})),t.forEach((function(t){i.has(t.name)||o(t)})),n}var At={placement:"bottom",modifiers:[],strategy:"absolute"};function Ct(){for(var t=arguments.length,e=new Array(t),i=0;it._options.maxValue&&(i.value=t._options.maxValue.toString()),null!==t._options.minValue&&parseInt(i.value)=this._options.maxValue||(this._targetEl.value=(this.getCurrentValue()+1).toString(),this._options.onIncrement(this))},t.prototype.decrement=function(){null!==this._options.minValue&&this.getCurrentValue()<=this._options.minValue||(this._targetEl.value=(this.getCurrentValue()-1).toString(),this._options.onDecrement(this))},t.prototype.updateOnIncrement=function(t){this._options.onIncrement=t},t.prototype.updateOnDecrement=function(t){this._options.onDecrement=t},t}();function c(){document.querySelectorAll("[data-input-counter]").forEach((function(t){var e=t.id,i=document.querySelector('[data-input-counter-increment="'+e+'"]'),n=document.querySelector('[data-input-counter-decrement="'+e+'"]'),r=t.getAttribute("data-input-counter-min"),s=t.getAttribute("data-input-counter-max");t?o.default.instanceExists("InputCounter",t.getAttribute("id"))||new a(t,i||null,n||null,{minValue:r?parseInt(r):null,maxValue:s?parseInt(s):null}):console.error('The target element with id "'.concat(e,'" does not exist. Please check the data-input-counter attribute.'))}))}e.initInputCounters=c,"undefined"!=typeof window&&(window.InputCounter=a,window.initInputCounters=c),e.default=a},16:function(t,e,i){var n=this&&this.__assign||function(){return n=Object.assign||function(t){for(var e,i=1,n=arguments.length;i

Welcome to your LocalAI instance!

- diff --git a/core/http/views/chat.html b/core/http/views/chat.html index 67d40bfd..b0f11281 100644 --- a/core/http/views/chat.html +++ b/core/http/views/chat.html @@ -28,7 +28,7 @@ SOFTWARE. {{template "views/partials/head" .}} - + + + + \ No newline at end of file diff --git a/core/http/views/partials/inprogress.html b/core/http/views/partials/inprogress.html index 51c3a70c..48da66d7 100644 --- a/core/http/views/partials/inprogress.html +++ b/core/http/views/partials/inprogress.html @@ -17,13 +17,13 @@
- {{$modelName}} {{if $repository}} (from the '{{$repository}}' repository) {{end}}
-
+

{{$op}} -

diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html index 9bf5b96a..3a057cd8 100644 --- a/core/http/views/partials/navbar.html +++ b/core/http/views/partials/navbar.html @@ -3,8 +3,8 @@
- LocalAI Logo - LocalAI + LocalAI Logo + LocalAI
@@ -14,33 +14,33 @@
diff --git a/core/http/views/partials/navbar_explorer.html b/core/http/views/partials/navbar_explorer.html index ffc6c4d5..ef10c76d 100644 --- a/core/http/views/partials/navbar_explorer.html +++ b/core/http/views/partials/navbar_explorer.html @@ -3,8 +3,8 @@
- LocalAI Logo - LocalAI + LocalAI Logo + LocalAI
@@ -14,7 +14,7 @@
@@ -22,7 +22,7 @@