mirror of
https://github.com/mudler/LocalAI.git
synced 2025-06-27 13:15:00 +00:00
Merge branch 'master' into default_miro
This commit is contained in:
commit
3eb1c1c689
197 changed files with 4171 additions and 1305 deletions
17
.devcontainer-scripts/postcreate.sh
Normal file
17
.devcontainer-scripts/postcreate.sh
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
cd /workspace
|
||||||
|
|
||||||
|
# Get the files into the volume without a bind mount
|
||||||
|
if [ ! -d ".git" ]; then
|
||||||
|
git clone https://github.com/mudler/LocalAI.git .
|
||||||
|
else
|
||||||
|
git fetch
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Standard Post-Create script completed."
|
||||||
|
|
||||||
|
if [ -f "/devcontainer-customization/postcreate.sh" ]; then
|
||||||
|
echo "Launching customization postcreate.sh"
|
||||||
|
bash "/devcontainer-customization/postcreate.sh"
|
||||||
|
fi
|
16
.devcontainer-scripts/poststart.sh
Normal file
16
.devcontainer-scripts/poststart.sh
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
cd /workspace
|
||||||
|
|
||||||
|
# Grab the pre-stashed backend assets to avoid build issues
|
||||||
|
cp -r /build/backend-assets /workspace/backend-assets
|
||||||
|
|
||||||
|
# Ensures generated source files are present upon load
|
||||||
|
make prepare
|
||||||
|
|
||||||
|
echo "Standard Post-Start script completed."
|
||||||
|
|
||||||
|
if [ -f "/devcontainer-customization/poststart.sh" ]; then
|
||||||
|
echo "Launching customization poststart.sh"
|
||||||
|
bash "/devcontainer-customization/poststart.sh"
|
||||||
|
fi
|
49
.devcontainer-scripts/utils.sh
Normal file
49
.devcontainer-scripts/utils.sh
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# This file contains some really simple functions that are useful when building up customization scripts.
|
||||||
|
|
||||||
|
|
||||||
|
# Checks if the git config has a user registered - and sets it up if not.
|
||||||
|
#
|
||||||
|
# Param 1: name
|
||||||
|
# Param 2: email
|
||||||
|
#
|
||||||
|
config_user() {
|
||||||
|
local gcn=$(git config --global user.name)
|
||||||
|
if [ -z "${gcn}" ]; then
|
||||||
|
echo "Setting up git user / remote"
|
||||||
|
git config --global user.name "$1"
|
||||||
|
git config --global user.email "$2"
|
||||||
|
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Checks if the git remote is configured - and sets it up if not. Fetches either way.
|
||||||
|
#
|
||||||
|
# Param 1: remote name
|
||||||
|
# Param 2: remote url
|
||||||
|
#
|
||||||
|
config_remote() {
|
||||||
|
local gr=$(git remote -v | grep $1)
|
||||||
|
if [ -z "${gr}" ]; then
|
||||||
|
git remote add $1 $2
|
||||||
|
fi
|
||||||
|
git fetch $1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Setup special .ssh files
|
||||||
|
#
|
||||||
|
# Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh
|
||||||
|
setup_ssh() {
|
||||||
|
local files=("$@")
|
||||||
|
for file in "${files[@]}"; then
|
||||||
|
local cfile="/devcontainer-customization/${file}"
|
||||||
|
local hfile="~/.ssh/${file}"
|
||||||
|
if [ ! -f "${hfile}" ]; then
|
||||||
|
echo "copying ${file}"
|
||||||
|
cp "${cfile}" "${hfile}"
|
||||||
|
chmod 600 "${hfile}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
ls ~/.ssh
|
||||||
|
}
|
25
.devcontainer/customization/README.md
Normal file
25
.devcontainer/customization/README.md
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
Place any additional resources your environment requires in this directory
|
||||||
|
|
||||||
|
Script hooks are currently called for:
|
||||||
|
`postcreate.sh` and `poststart.sh`
|
||||||
|
|
||||||
|
If files with those names exist here, they will be called at the end of the normal script.
|
||||||
|
|
||||||
|
This is a good place to set things like `git config --global user.name` are set - and to handle any other files that are mounted via this directory.
|
||||||
|
|
||||||
|
To assist in doing so, `source /.devcontainer-scripts/utils.sh` will provide utility functions that may be useful - for example:
|
||||||
|
|
||||||
|
```
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
source "/.devcontainer-scripts/utils.sh"
|
||||||
|
|
||||||
|
sshfiles=("config", "key.pub")
|
||||||
|
|
||||||
|
setup_ssh "${sshfiles[@]}"
|
||||||
|
|
||||||
|
config_user "YOUR NAME" "YOUR EMAIL"
|
||||||
|
|
||||||
|
config_remote "REMOTE NAME" "REMOTE URL"
|
||||||
|
|
||||||
|
```
|
24
.devcontainer/devcontainer.json
Normal file
24
.devcontainer/devcontainer.json
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
{
|
||||||
|
"$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json",
|
||||||
|
"name": "LocalAI",
|
||||||
|
"workspaceFolder": "/workspace",
|
||||||
|
"dockerComposeFile": [ "./docker-compose-devcontainer.yml" ],
|
||||||
|
"service": "api",
|
||||||
|
"shutdownAction": "stopCompose",
|
||||||
|
"customizations": {
|
||||||
|
"vscode": {
|
||||||
|
"extensions": [
|
||||||
|
"golang.go",
|
||||||
|
"ms-vscode.makefile-tools",
|
||||||
|
"ms-azuretools.vscode-docker",
|
||||||
|
"ms-python.python",
|
||||||
|
"ms-python.debugpy",
|
||||||
|
"wayou.vscode-todo-highlight",
|
||||||
|
"waderyan.gitblame"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"forwardPorts": [8080, 3000],
|
||||||
|
"postCreateCommand": "bash /.devcontainer-scripts/postcreate.sh",
|
||||||
|
"postStartCommand": "bash /.devcontainer-scripts/poststart.sh"
|
||||||
|
}
|
48
.devcontainer/docker-compose-devcontainer.yml
Normal file
48
.devcontainer/docker-compose-devcontainer.yml
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
build:
|
||||||
|
context: ..
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
target: devcontainer
|
||||||
|
args:
|
||||||
|
- FFMPEG=true
|
||||||
|
- IMAGE_TYPE=extras
|
||||||
|
- GO_TAGS=stablediffusion p2p tts
|
||||||
|
env_file:
|
||||||
|
- ../.env
|
||||||
|
ports:
|
||||||
|
- 8080:8080
|
||||||
|
volumes:
|
||||||
|
- localai_workspace:/workspace
|
||||||
|
- ../models:/host-models
|
||||||
|
- ./customization:/devcontainer-customization
|
||||||
|
command: /bin/sh -c "while sleep 1000; do :; done"
|
||||||
|
cap_add:
|
||||||
|
- SYS_PTRACE
|
||||||
|
security_opt:
|
||||||
|
- seccomp:unconfined
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus
|
||||||
|
container_name: prometheus
|
||||||
|
command:
|
||||||
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||||
|
ports:
|
||||||
|
- 9090:9090
|
||||||
|
restart: unless-stopped
|
||||||
|
volumes:
|
||||||
|
- ./prometheus:/etc/prometheus
|
||||||
|
- prom_data:/prometheus
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana
|
||||||
|
container_name: grafana
|
||||||
|
ports:
|
||||||
|
- 3000:3000
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
- GF_SECURITY_ADMIN_USER=admin
|
||||||
|
- GF_SECURITY_ADMIN_PASSWORD=grafana
|
||||||
|
volumes:
|
||||||
|
- ./grafana:/etc/grafana/provisioning/datasources
|
||||||
|
volumes:
|
||||||
|
prom_data:
|
||||||
|
localai_workspace:
|
10
.devcontainer/grafana/datasource.yml
Normal file
10
.devcontainer/grafana/datasource.yml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
datasources:
|
||||||
|
- name: Prometheus
|
||||||
|
type: prometheus
|
||||||
|
url: http://prometheus:9090
|
||||||
|
isDefault: true
|
||||||
|
access: proxy
|
||||||
|
editable: true
|
21
.devcontainer/prometheus/prometheus.yml
Normal file
21
.devcontainer/prometheus/prometheus.yml
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
scrape_timeout: 10s
|
||||||
|
evaluation_interval: 15s
|
||||||
|
alerting:
|
||||||
|
alertmanagers:
|
||||||
|
- static_configs:
|
||||||
|
- targets: []
|
||||||
|
scheme: http
|
||||||
|
timeout: 10s
|
||||||
|
api_version: v1
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: prometheus
|
||||||
|
honor_timestamps: true
|
||||||
|
scrape_interval: 15s
|
||||||
|
scrape_timeout: 10s
|
||||||
|
metrics_path: /metrics
|
||||||
|
scheme: http
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- localhost:9090
|
|
@ -1,6 +1,7 @@
|
||||||
.idea
|
.idea
|
||||||
.github
|
.github
|
||||||
.vscode
|
.vscode
|
||||||
|
.devcontainer
|
||||||
models
|
models
|
||||||
examples/chatbot-ui/models
|
examples/chatbot-ui/models
|
||||||
examples/rwkv/models
|
examples/rwkv/models
|
||||||
|
|
3
.env
3
.env
|
@ -79,6 +79,9 @@
|
||||||
### Enable to run parallel requests
|
### Enable to run parallel requests
|
||||||
# LOCALAI_PARALLEL_REQUESTS=true
|
# LOCALAI_PARALLEL_REQUESTS=true
|
||||||
|
|
||||||
|
# Enable to allow p2p mode
|
||||||
|
# LOCALAI_P2P=true
|
||||||
|
|
||||||
### Watchdog settings
|
### Watchdog settings
|
||||||
###
|
###
|
||||||
# Enables watchdog to kill backends that are inactive for too much time
|
# Enables watchdog to kill backends that are inactive for too much time
|
||||||
|
|
13
.github/bump_deps.sh
vendored
13
.github/bump_deps.sh
vendored
|
@ -6,4 +6,17 @@ VAR=$3
|
||||||
|
|
||||||
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
|
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
|
||||||
|
|
||||||
|
# Read $VAR from Makefile (only first match)
|
||||||
|
set +e
|
||||||
|
CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
|
||||||
|
set -e
|
||||||
|
|
||||||
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
|
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
|
||||||
|
|
||||||
|
if [ -z "$CURRENT_COMMIT" ]; then
|
||||||
|
echo "Could not find $VAR in Makefile."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Changes: https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
|
||||||
|
echo "${LAST_COMMIT}" >> "${VAR}_commit.txt"
|
4
.github/dependabot.yml
vendored
4
.github/dependabot.yml
vendored
|
@ -67,10 +67,6 @@ updates:
|
||||||
directory: "/backend/python/parler-tts"
|
directory: "/backend/python/parler-tts"
|
||||||
schedule:
|
schedule:
|
||||||
interval: "weekly"
|
interval: "weekly"
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/petals"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
- package-ecosystem: "pip"
|
||||||
directory: "/backend/python/rerankers"
|
directory: "/backend/python/rerankers"
|
||||||
schedule:
|
schedule:
|
||||||
|
|
17
.github/workflows/bump_deps.yaml
vendored
17
.github/workflows/bump_deps.yaml
vendored
|
@ -40,17 +40,30 @@ jobs:
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- name: Bump dependencies 🔧
|
- name: Bump dependencies 🔧
|
||||||
|
id: bump
|
||||||
run: |
|
run: |
|
||||||
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
||||||
|
{
|
||||||
|
echo 'message<<EOF'
|
||||||
|
cat "${{ matrix.variable }}_message.txt"
|
||||||
|
echo EOF
|
||||||
|
} >> "$GITHUB_OUTPUT"
|
||||||
|
{
|
||||||
|
echo 'commit<<EOF'
|
||||||
|
cat "${{ matrix.variable }}_commit.txt"
|
||||||
|
echo EOF
|
||||||
|
} >> "$GITHUB_OUTPUT"
|
||||||
|
rm -rfv ${{ matrix.variable }}_message.txt
|
||||||
|
rm -rfv ${{ matrix.variable }}_commit.txt
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v6
|
uses: peter-evans/create-pull-request@v6
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
|
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
|
||||||
title: 'chore: :arrow_up: Update ${{ matrix.repository }}'
|
title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`'
|
||||||
branch: "update/${{ matrix.variable }}"
|
branch: "update/${{ matrix.variable }}"
|
||||||
body: Bump of ${{ matrix.repository }} version
|
body: ${{ steps.bump.outputs.message }}
|
||||||
signoff: true
|
signoff: true
|
||||||
|
|
||||||
|
|
||||||
|
|
64
.github/workflows/deploy-explorer.yaml
vendored
Normal file
64
.github/workflows/deploy-explorer.yaml
vendored
Normal file
|
@ -0,0 +1,64 @@
|
||||||
|
name: Explorer deployment
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
tags:
|
||||||
|
- 'v*'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ci-deploy-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-linux:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: '1.21.x'
|
||||||
|
cache: false
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
|
make protogen-go
|
||||||
|
- name: Build api
|
||||||
|
run: |
|
||||||
|
CGO_ENABLED=0 make build-api
|
||||||
|
- name: rm
|
||||||
|
uses: appleboy/ssh-action@v1.0.3
|
||||||
|
with:
|
||||||
|
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||||
|
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||||
|
key: ${{ secrets.EXPLORER_SSH_KEY }}
|
||||||
|
port: ${{ secrets.EXPLORER_SSH_PORT }}
|
||||||
|
script: |
|
||||||
|
sudo rm -rf local-ai/ || true
|
||||||
|
- name: copy file via ssh
|
||||||
|
uses: appleboy/scp-action@v0.1.7
|
||||||
|
with:
|
||||||
|
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||||
|
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||||
|
key: ${{ secrets.EXPLORER_SSH_KEY }}
|
||||||
|
port: ${{ secrets.EXPLORER_SSH_PORT }}
|
||||||
|
source: "local-ai"
|
||||||
|
overwrite: true
|
||||||
|
rm: true
|
||||||
|
target: ./local-ai
|
||||||
|
- name: restarting
|
||||||
|
uses: appleboy/ssh-action@v1.0.3
|
||||||
|
with:
|
||||||
|
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||||
|
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||||
|
key: ${{ secrets.EXPLORER_SSH_KEY }}
|
||||||
|
port: ${{ secrets.EXPLORER_SSH_PORT }}
|
||||||
|
script: |
|
||||||
|
sudo cp -rfv local-ai/local-ai /usr/bin/local-ai
|
||||||
|
sudo systemctl restart local-ai
|
26
.github/workflows/test-extra.yml
vendored
26
.github/workflows/test-extra.yml
vendored
|
@ -168,32 +168,6 @@ jobs:
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
|
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
|
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# tests-petals:
|
|
||||||
# runs-on: ubuntu-latest
|
|
||||||
# steps:
|
|
||||||
# - name: Clone
|
|
||||||
# uses: actions/checkout@v4
|
|
||||||
# with:
|
|
||||||
# submodules: true
|
|
||||||
# - name: Dependencies
|
|
||||||
# run: |
|
|
||||||
# sudo apt-get update
|
|
||||||
# sudo apt-get install build-essential ffmpeg
|
|
||||||
# # Install UV
|
|
||||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
# sudo apt-get install -y libopencv-dev
|
|
||||||
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
|
||||||
|
|
||||||
# - name: Test petals
|
|
||||||
# run: |
|
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/petals
|
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/petals test
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# tests-bark:
|
# tests-bark:
|
||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
# steps:
|
# steps:
|
||||||
|
|
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -54,3 +54,6 @@ docs/static/gallery.html
|
||||||
|
|
||||||
# backend virtual environments
|
# backend virtual environments
|
||||||
**/venv
|
**/venv
|
||||||
|
|
||||||
|
# per-developer customization files for the development container
|
||||||
|
.devcontainer/customization/*
|
21
.vscode/launch.json
vendored
21
.vscode/launch.json
vendored
|
@ -3,12 +3,12 @@
|
||||||
"configurations": [
|
"configurations": [
|
||||||
{
|
{
|
||||||
"name": "Python: Current File",
|
"name": "Python: Current File",
|
||||||
"type": "python",
|
"type": "debugpy",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"program": "${file}",
|
"program": "${file}",
|
||||||
"console": "integratedTerminal",
|
"console": "integratedTerminal",
|
||||||
"justMyCode": false,
|
"justMyCode": false,
|
||||||
"cwd": "${workspaceFolder}/examples/langchain-chroma",
|
"cwd": "${fileDirname}",
|
||||||
"env": {
|
"env": {
|
||||||
"OPENAI_API_BASE": "http://localhost:8080/v1",
|
"OPENAI_API_BASE": "http://localhost:8080/v1",
|
||||||
"OPENAI_API_KEY": "abc"
|
"OPENAI_API_KEY": "abc"
|
||||||
|
@ -19,15 +19,16 @@
|
||||||
"type": "go",
|
"type": "go",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"mode": "debug",
|
"mode": "debug",
|
||||||
"program": "${workspaceFolder}/main.go",
|
"program": "${workspaceRoot}",
|
||||||
"args": [
|
"args": [],
|
||||||
"api"
|
|
||||||
],
|
|
||||||
"env": {
|
"env": {
|
||||||
"C_INCLUDE_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
|
"LOCALAI_LOG_LEVEL": "debug",
|
||||||
"LIBRARY_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
|
"LOCALAI_P2P": "true",
|
||||||
"DEBUG": "true"
|
"LOCALAI_FEDERATED": "true"
|
||||||
}
|
},
|
||||||
|
"buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
|
||||||
|
"envFile": "${workspaceFolder}/.env",
|
||||||
|
"cwd": "${workspaceRoot}"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
94
Dockerfile
94
Dockerfile
|
@ -8,12 +8,12 @@ FROM ${BASE_IMAGE} AS requirements-core
|
||||||
|
|
||||||
USER root
|
USER root
|
||||||
|
|
||||||
ARG GO_VERSION=1.22.5
|
ARG GO_VERSION=1.22.6
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||||
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
|
@ -30,7 +30,7 @@ RUN apt-get update && \
|
||||||
|
|
||||||
# Install Go
|
# Install Go
|
||||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
||||||
ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
|
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
|
||||||
|
|
||||||
# Install grpc compilers
|
# Install grpc compilers
|
||||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
||||||
|
@ -39,15 +39,18 @@ RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
||||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||||
RUN update-ca-certificates
|
RUN update-ca-certificates
|
||||||
|
|
||||||
|
RUN test -n "$TARGETARCH" \
|
||||||
|
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
||||||
|
|
||||||
# Use the variables in subsequent instructions
|
# Use the variables in subsequent instructions
|
||||||
RUN echo "Target Architecture: $TARGETARCH"
|
RUN echo "Target Architecture: $TARGETARCH"
|
||||||
RUN echo "Target Variant: $TARGETVARIANT"
|
RUN echo "Target Variant: $TARGETVARIANT"
|
||||||
|
|
||||||
# Cuda
|
# Cuda
|
||||||
ENV PATH /usr/local/cuda/bin:${PATH}
|
ENV PATH=/usr/local/cuda/bin:${PATH}
|
||||||
|
|
||||||
# HipBLAS requirements
|
# HipBLAS requirements
|
||||||
ENV PATH /opt/rocm/bin:${PATH}
|
ENV PATH=/opt/rocm/bin:${PATH}
|
||||||
|
|
||||||
# OpenBLAS requirements and stable diffusion
|
# OpenBLAS requirements and stable diffusion
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
|
@ -62,9 +65,6 @@ RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
RUN test -n "$TARGETARCH" \
|
|
||||||
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
|
@ -81,7 +81,7 @@ RUN apt-get update && \
|
||||||
espeak \
|
espeak \
|
||||||
python3-pip \
|
python3-pip \
|
||||||
python-is-python3 \
|
python-is-python3 \
|
||||||
python3-dev \
|
python3-dev llvm \
|
||||||
python3-venv && \
|
python3-venv && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
rm -rf /var/lib/apt/lists/* && \
|
||||||
|
@ -217,13 +217,14 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
# The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer
|
||||||
# Adjustments to the build process should likely be made here.
|
|
||||||
FROM requirements-drivers AS builder
|
FROM requirements-drivers AS builder-base
|
||||||
|
|
||||||
ARG GO_TAGS="stablediffusion tts p2p"
|
ARG GO_TAGS="stablediffusion tts p2p"
|
||||||
ARG GRPC_BACKENDS
|
ARG GRPC_BACKENDS
|
||||||
ARG MAKEFLAGS
|
ARG MAKEFLAGS
|
||||||
|
ARG LD_FLAGS="-s -w"
|
||||||
|
|
||||||
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
||||||
ENV GO_TAGS=${GO_TAGS}
|
ENV GO_TAGS=${GO_TAGS}
|
||||||
|
@ -231,14 +232,12 @@ ENV MAKEFLAGS=${MAKEFLAGS}
|
||||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||||
|
ENV LD_FLAGS=${LD_FLAGS}
|
||||||
|
|
||||||
|
RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH"
|
||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
COPY . .
|
|
||||||
COPY .git .
|
|
||||||
RUN echo "GO_TAGS: $GO_TAGS"
|
|
||||||
|
|
||||||
RUN make prepare
|
|
||||||
|
|
||||||
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
||||||
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
||||||
|
@ -256,9 +255,30 @@ RUN <<EOT bash
|
||||||
fi
|
fi
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
|
|
||||||
|
###################################
|
||||||
|
###################################
|
||||||
|
|
||||||
|
# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
|
||||||
|
# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
|
||||||
|
FROM builder-base AS builder-sd
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
COPY .git .
|
||||||
|
|
||||||
|
RUN make prepare
|
||||||
|
|
||||||
|
|
||||||
# stablediffusion does not tolerate a newer version of abseil, build it first
|
# stablediffusion does not tolerate a newer version of abseil, build it first
|
||||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||||
|
|
||||||
|
###################################
|
||||||
|
###################################
|
||||||
|
|
||||||
|
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
||||||
|
# Adjustments to the build process should likely be made here.
|
||||||
|
FROM builder-sd AS builder
|
||||||
|
|
||||||
# Install the pre-built GRPC
|
# Install the pre-built GRPC
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
COPY --from=grpc /opt/grpc /usr/local
|
||||||
|
|
||||||
|
@ -276,6 +296,41 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
|
# The devcontainer target is not used on CI. It is a target for developers to use locally -
|
||||||
|
# rather than copying files it mounts them locally and leaves building to the developer
|
||||||
|
|
||||||
|
FROM builder-base AS devcontainer
|
||||||
|
|
||||||
|
ARG FFMPEG
|
||||||
|
|
||||||
|
COPY --from=grpc /opt/grpc /usr/local
|
||||||
|
|
||||||
|
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
|
||||||
|
|
||||||
|
COPY .devcontainer-scripts /.devcontainer-scripts
|
||||||
|
|
||||||
|
# Add FFmpeg
|
||||||
|
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
ffmpeg && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/* \
|
||||||
|
; fi
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
ssh less && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
RUN go install github.com/go-delve/delve/cmd/dlv@latest
|
||||||
|
|
||||||
|
RUN go install github.com/mikefarah/yq/v4@latest
|
||||||
|
|
||||||
|
###################################
|
||||||
|
###################################
|
||||||
|
|
||||||
# This is the final target. The result of this target will be the image uploaded to the registry.
|
# This is the final target. The result of this target will be the image uploaded to the registry.
|
||||||
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
|
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
|
||||||
FROM requirements-drivers
|
FROM requirements-drivers
|
||||||
|
@ -326,7 +381,7 @@ COPY --from=builder /build/local-ai ./
|
||||||
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
||||||
|
|
||||||
# do not let stablediffusion rebuild (requires an older version of absl)
|
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||||
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
||||||
|
|
||||||
# Change the shell to bash so we can use [[ tests below
|
# Change the shell to bash so we can use [[ tests below
|
||||||
SHELL ["/bin/bash", "-c"]
|
SHELL ["/bin/bash", "-c"]
|
||||||
|
@ -356,9 +411,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$I
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/openvoice \
|
make -C backend/python/openvoice \
|
||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "petals" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/petals \
|
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/sentencetransformers \
|
make -C backend/python/sentencetransformers \
|
||||||
; fi && \
|
; fi && \
|
||||||
|
|
69
Makefile
69
Makefile
|
@ -8,11 +8,7 @@ DETECT_LIBS?=true
|
||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||||
CPPLLAMA_VERSION?=01245f5b1629075543bc4478418c7d72a0b4b3c7
|
CPPLLAMA_VERSION?=2f3c1466ff46a2413b0e363a5005c46538186ee6
|
||||||
|
|
||||||
# gpt4all version
|
|
||||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
|
||||||
GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
|
|
||||||
|
|
||||||
# go-rwkv version
|
# go-rwkv version
|
||||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||||
|
@ -20,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||||
WHISPER_CPP_VERSION?=f68298ce06ca3edd6e6f3f21c3d0bb5f073942c3
|
WHISPER_CPP_VERSION?=d65786ea540a5aef21f67cacfa6f134097727780
|
||||||
|
|
||||||
# bert.cpp version
|
# bert.cpp version
|
||||||
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
||||||
|
@ -190,7 +186,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
|
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
|
||||||
|
@ -253,18 +248,6 @@ sources/go-piper:
|
||||||
sources/go-piper/libpiper_binding.a: sources/go-piper
|
sources/go-piper/libpiper_binding.a: sources/go-piper
|
||||||
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
||||||
|
|
||||||
## GPT4ALL
|
|
||||||
sources/gpt4all:
|
|
||||||
mkdir -p sources/gpt4all
|
|
||||||
cd sources/gpt4all && \
|
|
||||||
git init && \
|
|
||||||
git remote add origin $(GPT4ALL_REPO) && \
|
|
||||||
git fetch origin && \
|
|
||||||
git checkout $(GPT4ALL_VERSION) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
|
|
||||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
|
||||||
|
|
||||||
## RWKV
|
## RWKV
|
||||||
sources/go-rwkv.cpp:
|
sources/go-rwkv.cpp:
|
||||||
|
@ -318,7 +301,7 @@ sources/whisper.cpp:
|
||||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||||
|
|
||||||
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
||||||
|
|
||||||
replace:
|
replace:
|
||||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
|
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
|
||||||
|
@ -328,7 +311,6 @@ replace:
|
||||||
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
||||||
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
|
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
||||||
|
|
||||||
dropreplace:
|
dropreplace:
|
||||||
|
@ -339,7 +321,6 @@ dropreplace:
|
||||||
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
|
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
|
||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
||||||
$(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang
|
|
||||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
||||||
|
|
||||||
prepare-sources: get-sources replace
|
prepare-sources: get-sources replace
|
||||||
|
@ -349,7 +330,6 @@ prepare-sources: get-sources replace
|
||||||
rebuild: ## Rebuilds the project
|
rebuild: ## Rebuilds the project
|
||||||
$(GOCMD) clean -cache
|
$(GOCMD) clean -cache
|
||||||
$(MAKE) -C sources/go-llama.cpp clean
|
$(MAKE) -C sources/go-llama.cpp clean
|
||||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
|
|
||||||
$(MAKE) -C sources/go-rwkv.cpp clean
|
$(MAKE) -C sources/go-rwkv.cpp clean
|
||||||
$(MAKE) -C sources/whisper.cpp clean
|
$(MAKE) -C sources/whisper.cpp clean
|
||||||
$(MAKE) -C sources/go-stable-diffusion clean
|
$(MAKE) -C sources/go-stable-diffusion clean
|
||||||
|
@ -396,7 +376,7 @@ build-minimal:
|
||||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
|
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
|
||||||
|
|
||||||
build-api:
|
build-api:
|
||||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
|
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=p2p $(MAKE) build
|
||||||
|
|
||||||
backend-assets/lib:
|
backend-assets/lib:
|
||||||
mkdir -p backend-assets/lib
|
mkdir -p backend-assets/lib
|
||||||
|
@ -407,7 +387,7 @@ ifeq ($(DETECT_LIBS),true)
|
||||||
scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
|
scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
|
||||||
endif
|
endif
|
||||||
ifeq ($(OS),Darwin)
|
ifeq ($(OS),Darwin)
|
||||||
$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
|
BUILD_TYPE=none $(MAKE) backend-assets/grpc/llama-cpp-fallback
|
||||||
else
|
else
|
||||||
$(MAKE) backend-assets/grpc/llama-cpp-cuda
|
$(MAKE) backend-assets/grpc/llama-cpp-cuda
|
||||||
$(MAKE) backend-assets/grpc/llama-cpp-hipblas
|
$(MAKE) backend-assets/grpc/llama-cpp-hipblas
|
||||||
|
@ -469,8 +449,7 @@ test: prepare test-models/testmodel.ggml grpcs
|
||||||
export GO_TAGS="tts stablediffusion debug"
|
export GO_TAGS="tts stablediffusion debug"
|
||||||
$(MAKE) prepare-test
|
$(MAKE) prepare-test
|
||||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||||
$(MAKE) test-gpt4all
|
|
||||||
$(MAKE) test-llama
|
$(MAKE) test-llama
|
||||||
$(MAKE) test-llama-gguf
|
$(MAKE) test-llama-gguf
|
||||||
$(MAKE) test-tts
|
$(MAKE) test-tts
|
||||||
|
@ -500,10 +479,6 @@ teardown-e2e:
|
||||||
rm -rf $(TEST_DIR) || true
|
rm -rf $(TEST_DIR) || true
|
||||||
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
||||||
|
|
||||||
test-gpt4all: prepare-test
|
|
||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS)
|
|
||||||
|
|
||||||
test-llama: prepare-test
|
test-llama: prepare-test
|
||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
|
||||||
|
@ -559,10 +534,10 @@ protogen-go-clean:
|
||||||
$(RM) bin/*
|
$(RM) bin/*
|
||||||
|
|
||||||
.PHONY: protogen-python
|
.PHONY: protogen-python
|
||||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
|
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
|
||||||
|
|
||||||
.PHONY: protogen-python-clean
|
.PHONY: protogen-python-clean
|
||||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
||||||
|
|
||||||
.PHONY: autogptq-protogen
|
.PHONY: autogptq-protogen
|
||||||
autogptq-protogen:
|
autogptq-protogen:
|
||||||
|
@ -620,14 +595,6 @@ mamba-protogen:
|
||||||
mamba-protogen-clean:
|
mamba-protogen-clean:
|
||||||
$(MAKE) -C backend/python/mamba protogen-clean
|
$(MAKE) -C backend/python/mamba protogen-clean
|
||||||
|
|
||||||
.PHONY: petals-protogen
|
|
||||||
petals-protogen:
|
|
||||||
$(MAKE) -C backend/python/petals protogen
|
|
||||||
|
|
||||||
.PHONY: petals-protogen-clean
|
|
||||||
petals-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/petals protogen-clean
|
|
||||||
|
|
||||||
.PHONY: rerankers-protogen
|
.PHONY: rerankers-protogen
|
||||||
rerankers-protogen:
|
rerankers-protogen:
|
||||||
$(MAKE) -C backend/python/rerankers protogen
|
$(MAKE) -C backend/python/rerankers protogen
|
||||||
|
@ -709,7 +676,6 @@ prepare-extra-conda-environments: protogen-python
|
||||||
$(MAKE) -C backend/python/vall-e-x
|
$(MAKE) -C backend/python/vall-e-x
|
||||||
$(MAKE) -C backend/python/openvoice
|
$(MAKE) -C backend/python/openvoice
|
||||||
$(MAKE) -C backend/python/exllama
|
$(MAKE) -C backend/python/exllama
|
||||||
$(MAKE) -C backend/python/petals
|
|
||||||
$(MAKE) -C backend/python/exllama2
|
$(MAKE) -C backend/python/exllama2
|
||||||
|
|
||||||
prepare-test-extra: protogen-python
|
prepare-test-extra: protogen-python
|
||||||
|
@ -730,12 +696,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
|
||||||
mkdir -p backend-assets/espeak-ng-data
|
mkdir -p backend-assets/espeak-ng-data
|
||||||
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
|
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
|
||||||
|
|
||||||
backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
|
||||||
mkdir -p backend-assets/gpt4all
|
|
||||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
|
|
||||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
|
|
||||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
|
|
||||||
|
|
||||||
backend-assets/grpc: protogen-go replace
|
backend-assets/grpc: protogen-go replace
|
||||||
mkdir -p backend-assets/grpc
|
mkdir -p backend-assets/grpc
|
||||||
|
|
||||||
|
@ -746,13 +706,6 @@ ifneq ($(UPX),)
|
||||||
$(UPX) backend-assets/grpc/bert-embeddings
|
$(UPX) backend-assets/grpc/bert-embeddings
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
|
|
||||||
ifneq ($(UPX),)
|
|
||||||
$(UPX) backend-assets/grpc/gpt4all
|
|
||||||
endif
|
|
||||||
|
|
||||||
backend-assets/grpc/huggingface: backend-assets/grpc
|
backend-assets/grpc/huggingface: backend-assets/grpc
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
||||||
ifneq ($(UPX),)
|
ifneq ($(UPX),)
|
||||||
|
@ -783,9 +736,6 @@ else
|
||||||
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
||||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
|
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
|
||||||
endif
|
endif
|
||||||
ifneq ($(UPX),)
|
|
||||||
$(UPX) backend/cpp/${VARIANT}/grpc-server
|
|
||||||
endif
|
|
||||||
|
|
||||||
# This target is for manually building a variant with-auto detected flags
|
# This target is for manually building a variant with-auto detected flags
|
||||||
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
|
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
|
@ -858,9 +808,6 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.
|
||||||
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
|
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
|
||||||
mkdir -p backend-assets/util/
|
mkdir -p backend-assets/util/
|
||||||
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
|
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
|
||||||
ifneq ($(UPX),)
|
|
||||||
$(UPX) backend-assets/util/llama-cpp-rpc-server
|
|
||||||
endif
|
|
||||||
|
|
||||||
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
||||||
|
|
|
@ -84,6 +84,7 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||||
|
|
||||||
Hot topics (looking for contributors):
|
Hot topics (looking for contributors):
|
||||||
|
|
||||||
|
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
|
||||||
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
||||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||||
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
||||||
|
@ -150,6 +151,7 @@ Other:
|
||||||
|
|
||||||
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
|
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
|
||||||
|
|
||||||
|
- [Run Visual studio code with LocalAI (SUSE)](https://www.suse.com/c/running-ai-locally/)
|
||||||
- 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
|
- 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
|
||||||
- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
|
- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
|
||||||
- [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
|
- [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
|
||||||
|
|
|
@ -458,7 +458,9 @@ struct llama_server_context
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
llama_init_result llama_init = llama_init_from_gpt_params(params);
|
||||||
|
model = llama_init.model;
|
||||||
|
ctx = llama_init.context;
|
||||||
if (model == nullptr)
|
if (model == nullptr)
|
||||||
{
|
{
|
||||||
LOG_ERROR("unable to load model", {{"model", params.model}});
|
LOG_ERROR("unable to load model", {{"model", params.model}});
|
||||||
|
@ -478,7 +480,7 @@ struct llama_server_context
|
||||||
|
|
||||||
n_ctx = llama_n_ctx(ctx);
|
n_ctx = llama_n_ctx(ctx);
|
||||||
|
|
||||||
add_bos_token = llama_should_add_bos_token(model);
|
add_bos_token = llama_add_bos_token(model);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -2258,7 +2260,7 @@ static void params_parse(const backend::ModelOptions* request,
|
||||||
}
|
}
|
||||||
// get the directory of modelfile
|
// get the directory of modelfile
|
||||||
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
|
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
|
||||||
params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
|
params.lora_adapters.push_back({ model_dir + "/"+request->loraadapter(), scale_factor });
|
||||||
}
|
}
|
||||||
params.use_mlock = request->mlock();
|
params.use_mlock = request->mlock();
|
||||||
params.use_mmap = request->mmap();
|
params.use_mmap = request->mmap();
|
||||||
|
|
|
@ -1,62 +0,0 @@
|
||||||
package main
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
|
|
||||||
)
|
|
||||||
|
|
||||||
type LLM struct {
|
|
||||||
base.SingleThread
|
|
||||||
|
|
||||||
gpt4all *gpt4all.Model
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
|
||||||
model, err := gpt4all.New(opts.ModelFile,
|
|
||||||
gpt4all.SetThreads(int(opts.Threads)),
|
|
||||||
gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath))
|
|
||||||
llm.gpt4all = model
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func buildPredictOptions(opts *pb.PredictOptions) []gpt4all.PredictOption {
|
|
||||||
predictOptions := []gpt4all.PredictOption{
|
|
||||||
gpt4all.SetTemperature(float64(opts.Temperature)),
|
|
||||||
gpt4all.SetTopP(float64(opts.TopP)),
|
|
||||||
gpt4all.SetTopK(int(opts.TopK)),
|
|
||||||
gpt4all.SetTokens(int(opts.Tokens)),
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.Batch != 0 {
|
|
||||||
predictOptions = append(predictOptions, gpt4all.SetBatch(int(opts.Batch)))
|
|
||||||
}
|
|
||||||
return predictOptions
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
|
||||||
return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
|
||||||
predictOptions := buildPredictOptions(opts)
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
llm.gpt4all.SetTokenCallback(func(token string) bool {
|
|
||||||
results <- token
|
|
||||||
return true
|
|
||||||
})
|
|
||||||
_, err := llm.gpt4all.Predict(opts.Prompt, predictOptions...)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("err: ", err)
|
|
||||||
}
|
|
||||||
llm.gpt4all.SetTokenCallback(nil)
|
|
||||||
close(results)
|
|
||||||
}()
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
|
@ -1,21 +0,0 @@
|
||||||
package main
|
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
|
||||||
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
2
backend/python/autogptq/requirements-cublas11.txt
Normal file
2
backend/python/autogptq/requirements-cublas11.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
|
torch
|
1
backend/python/autogptq/requirements-cublas12.txt
Normal file
1
backend/python/autogptq/requirements-cublas12.txt
Normal file
|
@ -0,0 +1 @@
|
||||||
|
torch
|
|
@ -2,4 +2,4 @@
|
||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
|
@ -1,7 +1,6 @@
|
||||||
accelerate
|
accelerate
|
||||||
auto-gptq==0.7.1
|
auto-gptq==0.7.1
|
||||||
grpcio==1.65.1
|
grpcio==1.65.4
|
||||||
protobuf
|
protobuf
|
||||||
torch
|
|
||||||
certifi
|
certifi
|
||||||
transformers
|
transformers
|
4
backend/python/bark/requirements-cpu.txt
Normal file
4
backend/python/bark/requirements-cpu.txt
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
torch
|
||||||
|
torchaudio
|
5
backend/python/bark/requirements-cublas11.txt
Normal file
5
backend/python/bark/requirements-cublas11.txt
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
|
torch
|
||||||
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
4
backend/python/bark/requirements-cublas12.txt
Normal file
4
backend/python/bark/requirements-cublas12.txt
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
torch
|
||||||
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
|
@ -1,3 +1,5 @@
|
||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
|
@ -4,3 +4,5 @@ torch
|
||||||
torchaudio
|
torchaudio
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
|
transformers
|
||||||
|
accelerate
|
|
@ -1,6 +1,4 @@
|
||||||
accelerate
|
|
||||||
bark==0.1.5
|
bark==0.1.5
|
||||||
grpcio==1.65.1
|
grpcio==1.65.5
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
|
|
@ -18,10 +18,23 @@
|
||||||
# source $(dirname $0)/../common/libbackend.sh
|
# source $(dirname $0)/../common/libbackend.sh
|
||||||
#
|
#
|
||||||
function init() {
|
function init() {
|
||||||
|
# Name of the backend (directory name)
|
||||||
BACKEND_NAME=${PWD##*/}
|
BACKEND_NAME=${PWD##*/}
|
||||||
|
|
||||||
|
# Path where all backends files are
|
||||||
MY_DIR=$(realpath `dirname $0`)
|
MY_DIR=$(realpath `dirname $0`)
|
||||||
|
|
||||||
|
# Build type
|
||||||
BUILD_PROFILE=$(getBuildProfile)
|
BUILD_PROFILE=$(getBuildProfile)
|
||||||
|
|
||||||
|
# Environment directory
|
||||||
|
EDIR=${MY_DIR}
|
||||||
|
|
||||||
|
# Allow to specify a custom env dir for shared environments
|
||||||
|
if [ "x${ENV_DIR}" != "x" ]; then
|
||||||
|
EDIR=${ENV_DIR}
|
||||||
|
fi
|
||||||
|
|
||||||
# If a backend has defined a list of valid build profiles...
|
# If a backend has defined a list of valid build profiles...
|
||||||
if [ ! -z "${LIMIT_TARGETS}" ]; then
|
if [ ! -z "${LIMIT_TARGETS}" ]; then
|
||||||
isValidTarget=$(checkTargets ${LIMIT_TARGETS})
|
isValidTarget=$(checkTargets ${LIMIT_TARGETS})
|
||||||
|
@ -74,13 +87,14 @@ function getBuildProfile() {
|
||||||
# This function is idempotent, so you can call it as many times as you want and it will
|
# This function is idempotent, so you can call it as many times as you want and it will
|
||||||
# always result in an activated virtual environment
|
# always result in an activated virtual environment
|
||||||
function ensureVenv() {
|
function ensureVenv() {
|
||||||
if [ ! -d "${MY_DIR}/venv" ]; then
|
if [ ! -d "${EDIR}/venv" ]; then
|
||||||
uv venv ${MY_DIR}/venv
|
uv venv ${EDIR}/venv
|
||||||
echo "virtualenv created"
|
echo "virtualenv created"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "x${VIRTUAL_ENV}" != "x${MY_DIR}/venv" ]; then
|
# Source if we are not already in a Virtual env
|
||||||
source ${MY_DIR}/venv/bin/activate
|
if [ "x${VIRTUAL_ENV}" != "x${EDIR}/venv" ]; then
|
||||||
|
source ${EDIR}/venv/bin/activate
|
||||||
echo "virtualenv activated"
|
echo "virtualenv activated"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -113,13 +127,24 @@ function installRequirements() {
|
||||||
|
|
||||||
# These are the requirements files we will attempt to install, in order
|
# These are the requirements files we will attempt to install, in order
|
||||||
declare -a requirementFiles=(
|
declare -a requirementFiles=(
|
||||||
"${MY_DIR}/requirements-install.txt"
|
"${EDIR}/requirements-install.txt"
|
||||||
"${MY_DIR}/requirements.txt"
|
"${EDIR}/requirements.txt"
|
||||||
"${MY_DIR}/requirements-${BUILD_TYPE}.txt"
|
"${EDIR}/requirements-${BUILD_TYPE}.txt"
|
||||||
)
|
)
|
||||||
|
|
||||||
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
||||||
requirementFiles+=("${MY_DIR}/requirements-${BUILD_PROFILE}.txt")
|
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}.txt")
|
||||||
|
fi
|
||||||
|
|
||||||
|
# if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
|
||||||
|
if [ "x${BUILD_TYPE}" == "x" ]; then
|
||||||
|
requirementFiles+=("${EDIR}/requirements-cpu.txt")
|
||||||
|
fi
|
||||||
|
|
||||||
|
requirementFiles+=("${EDIR}/requirements-after.txt")
|
||||||
|
|
||||||
|
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
||||||
|
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt")
|
||||||
fi
|
fi
|
||||||
|
|
||||||
for reqFile in ${requirementFiles[@]}; do
|
for reqFile in ${requirementFiles[@]}; do
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
grpcio==1.65.1
|
grpcio==1.65.5
|
||||||
protobuf
|
protobuf
|
3
backend/python/coqui/requirements-cpu.txt
Normal file
3
backend/python/coqui/requirements-cpu.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
torch
|
5
backend/python/coqui/requirements-cublas11.txt
Normal file
5
backend/python/coqui/requirements-cublas11.txt
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
|
torch
|
||||||
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
4
backend/python/coqui/requirements-cublas12.txt
Normal file
4
backend/python/coqui/requirements-cublas12.txt
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
torch
|
||||||
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
|
@ -1,3 +1,5 @@
|
||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
|
@ -3,4 +3,6 @@ intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
|
transformers
|
||||||
|
accelerate
|
|
@ -1,6 +1,4 @@
|
||||||
accelerate
|
|
||||||
TTS==0.22.0
|
TTS==0.22.0
|
||||||
grpcio==1.65.1
|
grpcio==1.65.5
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
|
|
@ -18,13 +18,13 @@ import backend_pb2_grpc
|
||||||
import grpc
|
import grpc
|
||||||
|
|
||||||
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
|
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
|
||||||
EulerAncestralDiscreteScheduler
|
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
|
||||||
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
||||||
from diffusers.pipelines.stable_diffusion import safety_checker
|
from diffusers.pipelines.stable_diffusion import safety_checker
|
||||||
from diffusers.utils import load_image, export_to_video
|
from diffusers.utils import load_image, export_to_video
|
||||||
from compel import Compel, ReturnedEmbeddingsType
|
from compel import Compel, ReturnedEmbeddingsType
|
||||||
|
from optimum.quanto import freeze, qfloat8, quantize
|
||||||
from transformers import CLIPTextModel
|
from transformers import CLIPTextModel, T5EncoderModel
|
||||||
from safetensors.torch import load_file
|
from safetensors.torch import load_file
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
|
@ -163,6 +163,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
modelFile = request.Model
|
modelFile = request.Model
|
||||||
|
|
||||||
self.cfg_scale = 7
|
self.cfg_scale = 7
|
||||||
|
self.PipelineType = request.PipelineType
|
||||||
|
|
||||||
if request.CFGScale != 0:
|
if request.CFGScale != 0:
|
||||||
self.cfg_scale = request.CFGScale
|
self.cfg_scale = request.CFGScale
|
||||||
|
|
||||||
|
@ -244,6 +246,30 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
torch_dtype=torchType,
|
torch_dtype=torchType,
|
||||||
use_safetensors=True,
|
use_safetensors=True,
|
||||||
variant=variant)
|
variant=variant)
|
||||||
|
elif request.PipelineType == "FluxPipeline":
|
||||||
|
self.pipe = FluxPipeline.from_pretrained(
|
||||||
|
request.Model,
|
||||||
|
torch_dtype=torch.bfloat16)
|
||||||
|
if request.LowVRAM:
|
||||||
|
self.pipe.enable_model_cpu_offload()
|
||||||
|
elif request.PipelineType == "FluxTransformer2DModel":
|
||||||
|
dtype = torch.bfloat16
|
||||||
|
# specify from environment or default to "ChuckMcSneed/FLUX.1-dev"
|
||||||
|
bfl_repo = os.environ.get("BFL_REPO", "ChuckMcSneed/FLUX.1-dev")
|
||||||
|
|
||||||
|
transformer = FluxTransformer2DModel.from_single_file(modelFile, torch_dtype=dtype)
|
||||||
|
quantize(transformer, weights=qfloat8)
|
||||||
|
freeze(transformer)
|
||||||
|
text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype)
|
||||||
|
quantize(text_encoder_2, weights=qfloat8)
|
||||||
|
freeze(text_encoder_2)
|
||||||
|
|
||||||
|
self.pipe = FluxPipeline.from_pretrained(bfl_repo, transformer=None, text_encoder_2=None, torch_dtype=dtype)
|
||||||
|
self.pipe.transformer = transformer
|
||||||
|
self.pipe.text_encoder_2 = text_encoder_2
|
||||||
|
|
||||||
|
if request.LowVRAM:
|
||||||
|
self.pipe.enable_model_cpu_offload()
|
||||||
|
|
||||||
if CLIPSKIP and request.CLIPSkip != 0:
|
if CLIPSKIP and request.CLIPSkip != 0:
|
||||||
self.clip_skip = request.CLIPSkip
|
self.clip_skip = request.CLIPSkip
|
||||||
|
@ -399,6 +425,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
request.seed
|
request.seed
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if self.PipelineType == "FluxPipeline":
|
||||||
|
kwargs["max_sequence_length"] = 256
|
||||||
|
|
||||||
|
if self.PipelineType == "FluxTransformer2DModel":
|
||||||
|
kwargs["output_type"] = "pil"
|
||||||
|
kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
|
||||||
|
|
||||||
if self.img2vid:
|
if self.img2vid:
|
||||||
# Load the conditioning image
|
# Load the conditioning image
|
||||||
image = load_image(request.src)
|
image = load_image(request.src)
|
||||||
|
|
9
backend/python/diffusers/requirements-cpu.txt
Normal file
9
backend/python/diffusers/requirements-cpu.txt
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
diffusers
|
||||||
|
opencv-python
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
compel
|
||||||
|
peft
|
||||||
|
sentencepiece
|
||||||
|
torch
|
||||||
|
optimum-quanto
|
10
backend/python/diffusers/requirements-cublas11.txt
Normal file
10
backend/python/diffusers/requirements-cublas11.txt
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
|
torch
|
||||||
|
diffusers
|
||||||
|
opencv-python
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
compel
|
||||||
|
peft
|
||||||
|
sentencepiece
|
||||||
|
optimum-quanto
|
9
backend/python/diffusers/requirements-cublas12.txt
Normal file
9
backend/python/diffusers/requirements-cublas12.txt
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
torch
|
||||||
|
diffusers
|
||||||
|
opencv-python
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
compel
|
||||||
|
peft
|
||||||
|
sentencepiece
|
||||||
|
optimum-quanto
|
|
@ -1,3 +1,11 @@
|
||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch
|
torch==2.3.1+rocm6.0
|
||||||
torchvision
|
torchvision==0.18.1+rocm6.0
|
||||||
|
diffusers
|
||||||
|
opencv-python
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
compel
|
||||||
|
peft
|
||||||
|
sentencepiece
|
||||||
|
optimum-quanto
|
|
@ -4,3 +4,11 @@ torch
|
||||||
torchvision
|
torchvision
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
|
diffusers
|
||||||
|
opencv-python
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
compel
|
||||||
|
peft
|
||||||
|
sentencepiece
|
||||||
|
optimum-quanto
|
|
@ -1,13 +1,5 @@
|
||||||
setuptools
|
setuptools
|
||||||
accelerate
|
grpcio==1.65.4
|
||||||
compel
|
|
||||||
peft
|
|
||||||
diffusers
|
|
||||||
grpcio==1.65.1
|
|
||||||
opencv-python
|
|
||||||
pillow
|
pillow
|
||||||
protobuf
|
protobuf
|
||||||
sentencepiece
|
|
||||||
torch
|
|
||||||
transformers
|
|
||||||
certifi
|
certifi
|
||||||
|
|
3
backend/python/exllama/requirements-cpu.txt
Normal file
3
backend/python/exllama/requirements-cpu.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
torch
|
4
backend/python/exllama/requirements-cublas11.txt
Normal file
4
backend/python/exllama/requirements-cublas11.txt
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
|
torch
|
||||||
|
transformers
|
||||||
|
accelerate
|
3
backend/python/exllama/requirements-cublas12.txt
Normal file
3
backend/python/exllama/requirements-cublas12.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
torch
|
||||||
|
transformers
|
||||||
|
accelerate
|
|
@ -1,6 +1,4 @@
|
||||||
grpcio==1.65.0
|
grpcio==1.65.5
|
||||||
protobuf
|
protobuf
|
||||||
torch
|
|
||||||
transformers
|
|
||||||
certifi
|
certifi
|
||||||
setuptools
|
setuptools
|
3
backend/python/exllama2/requirements-cpu.txt
Normal file
3
backend/python/exllama2/requirements-cpu.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
torch
|
4
backend/python/exllama2/requirements-cublas11.txt
Normal file
4
backend/python/exllama2/requirements-cublas11.txt
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
|
torch
|
||||||
|
transformers
|
||||||
|
accelerate
|
3
backend/python/exllama2/requirements-cublas12.txt
Normal file
3
backend/python/exllama2/requirements-cublas12.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
torch
|
||||||
|
transformers
|
||||||
|
accelerate
|
|
@ -1,7 +1,5 @@
|
||||||
accelerate
|
grpcio==1.65.4
|
||||||
grpcio==1.65.1
|
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
torch
|
|
||||||
wheel
|
wheel
|
||||||
setuptools
|
setuptools
|
2
backend/python/mamba/requirements-after.txt
Normal file
2
backend/python/mamba/requirements-after.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
causal-conv1d==1.4.0
|
||||||
|
mamba-ssm==2.2.2
|
2
backend/python/mamba/requirements-cpu.txt
Normal file
2
backend/python/mamba/requirements-cpu.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
torch
|
||||||
|
transformers
|
3
backend/python/mamba/requirements-cublas11.txt
Normal file
3
backend/python/mamba/requirements-cublas11.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
|
torch
|
||||||
|
transformers
|
2
backend/python/mamba/requirements-cublas12.txt
Normal file
2
backend/python/mamba/requirements-cublas12.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
torch
|
||||||
|
transformers
|
|
@ -4,4 +4,3 @@
|
||||||
packaging
|
packaging
|
||||||
setuptools
|
setuptools
|
||||||
wheel
|
wheel
|
||||||
torch==2.3.1
|
|
|
@ -1,6 +1,3 @@
|
||||||
causal-conv1d==1.4.0
|
grpcio==1.65.5
|
||||||
mamba-ssm==2.2.2
|
|
||||||
grpcio==1.65.1
|
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
|
1
backend/python/openvoice/requirements-cpu.txt
Normal file
1
backend/python/openvoice/requirements-cpu.txt
Normal file
|
@ -0,0 +1 @@
|
||||||
|
torch
|
2
backend/python/openvoice/requirements-cublas11.txt
Normal file
2
backend/python/openvoice/requirements-cublas11.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
|
torch
|
1
backend/python/openvoice/requirements-cublas12.txt
Normal file
1
backend/python/openvoice/requirements-cublas12.txt
Normal file
|
@ -0,0 +1 @@
|
||||||
|
torch
|
|
@ -2,7 +2,7 @@
|
||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
grpcio==1.65.1
|
grpcio==1.65.5
|
||||||
protobuf
|
protobuf
|
||||||
librosa==0.9.1
|
librosa==0.9.1
|
||||||
faster-whisper==1.0.3
|
faster-whisper==1.0.3
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
grpcio==1.65.1
|
grpcio==1.65.5
|
||||||
protobuf
|
protobuf
|
||||||
librosa
|
librosa
|
||||||
faster-whisper
|
faster-whisper
|
||||||
|
|
|
@ -5,7 +5,7 @@ source $(dirname $0)/../common/libbackend.sh
|
||||||
|
|
||||||
# Download checkpoints if not present
|
# Download checkpoints if not present
|
||||||
if [ ! -d "checkpoints_v2" ]; then
|
if [ ! -d "checkpoints_v2" ]; then
|
||||||
wget https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
|
wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
|
||||||
unzip checkpoints_v2.zip
|
unzip checkpoints_v2.zip
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
1
backend/python/parler-tts/requirements-after.txt
Normal file
1
backend/python/parler-tts/requirements-after.txt
Normal file
|
@ -0,0 +1 @@
|
||||||
|
git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
|
3
backend/python/parler-tts/requirements-cpu.txt
Normal file
3
backend/python/parler-tts/requirements-cpu.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
torch
|
5
backend/python/parler-tts/requirements-cublas11.txt
Normal file
5
backend/python/parler-tts/requirements-cublas11.txt
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
|
torch
|
||||||
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
4
backend/python/parler-tts/requirements-cublas12.txt
Normal file
4
backend/python/parler-tts/requirements-cublas12.txt
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
torch
|
||||||
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
|
@ -1,3 +1,5 @@
|
||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
|
@ -3,4 +3,6 @@ intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
|
transformers
|
||||||
|
accelerate
|
|
@ -1,7 +1,4 @@
|
||||||
accelerate
|
grpcio==1.65.5
|
||||||
grpcio==1.65.1
|
|
||||||
protobuf
|
protobuf
|
||||||
torch
|
|
||||||
git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
|
|
||||||
certifi
|
certifi
|
||||||
transformers
|
llvmlite==0.43.0
|
|
@ -1,31 +0,0 @@
|
||||||
.PHONY: petals
|
|
||||||
petals: protogen
|
|
||||||
@echo "Creating virtual environment..."
|
|
||||||
bash install.sh "petals.yml"
|
|
||||||
@echo "Virtual environment created."
|
|
||||||
|
|
||||||
.PHONY: run
|
|
||||||
run: protogen
|
|
||||||
@echo "Running petals..."
|
|
||||||
bash run.sh
|
|
||||||
@echo "petals run."
|
|
||||||
|
|
||||||
.PHONY: test
|
|
||||||
test: protogen
|
|
||||||
@echo "Testing petals..."
|
|
||||||
bash test.sh
|
|
||||||
@echo "petals tested."
|
|
||||||
|
|
||||||
.PHONY: protogen
|
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
.PHONY: protogen-clean
|
|
||||||
protogen-clean:
|
|
||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean: protogen-clean
|
|
||||||
rm -rf venv __pycache__
|
|
|
@ -1,140 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
from concurrent import futures
|
|
||||||
import time
|
|
||||||
import argparse
|
|
||||||
import signal
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
|
|
||||||
import grpc
|
|
||||||
import torch
|
|
||||||
from transformers import AutoTokenizer
|
|
||||||
from petals import AutoDistributedModelForCausalLM
|
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
|
||||||
|
|
||||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
|
||||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
|
||||||
|
|
||||||
# Implement the BackendServicer class with the service methods
|
|
||||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|
||||||
"""
|
|
||||||
A gRPC servicer that implements the Backend service defined in backend.proto.
|
|
||||||
"""
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""
|
|
||||||
Returns a health check message.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: The health check request.
|
|
||||||
context: The gRPC context.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
backend_pb2.Reply: The health check reply.
|
|
||||||
"""
|
|
||||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""
|
|
||||||
Loads a language model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: The load model request.
|
|
||||||
context: The gRPC context.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
backend_pb2.Result: The load model result.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.tokenizer = AutoTokenizer.from_pretrained(request.Model, use_fast=False, add_bos_token=False)
|
|
||||||
self.model = AutoDistributedModelForCausalLM.from_pretrained(request.Model)
|
|
||||||
self.cuda = False
|
|
||||||
if request.CUDA:
|
|
||||||
self.model = self.model.cuda()
|
|
||||||
self.cuda = True
|
|
||||||
|
|
||||||
except Exception as err:
|
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
"""
|
|
||||||
Generates text based on the given prompt and sampling parameters.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: The predict request.
|
|
||||||
context: The gRPC context.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
backend_pb2.Result: The predict result.
|
|
||||||
"""
|
|
||||||
|
|
||||||
inputs = self.tokenizer(request.Prompt, return_tensors="pt")["input_ids"]
|
|
||||||
if self.cuda:
|
|
||||||
inputs = inputs.cuda()
|
|
||||||
|
|
||||||
if request.Tokens == 0:
|
|
||||||
# Max to max value if tokens are not specified
|
|
||||||
request.Tokens = 8192
|
|
||||||
|
|
||||||
# TODO: kwargs and map all parameters
|
|
||||||
outputs = self.model.generate(inputs, max_new_tokens=request.Tokens)
|
|
||||||
|
|
||||||
generated_text = self.tokenizer.decode(outputs[0])
|
|
||||||
# Remove prompt from response if present
|
|
||||||
if request.Prompt in generated_text:
|
|
||||||
generated_text = generated_text.replace(request.Prompt, "")
|
|
||||||
|
|
||||||
return backend_pb2.Result(message=bytes(generated_text, encoding='utf-8'))
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
"""
|
|
||||||
Generates text based on the given prompt and sampling parameters, and streams the results.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: The predict stream request.
|
|
||||||
context: The gRPC context.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
backend_pb2.Result: The predict stream result.
|
|
||||||
"""
|
|
||||||
# Implement PredictStream RPC
|
|
||||||
#for reply in some_data_generator():
|
|
||||||
# yield reply
|
|
||||||
# Not implemented yet
|
|
||||||
return self.Predict(request, context)
|
|
||||||
|
|
||||||
def serve(address):
|
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
|
||||||
server.add_insecure_port(address)
|
|
||||||
server.start()
|
|
||||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
|
||||||
|
|
||||||
# Define the signal handler function
|
|
||||||
def signal_handler(sig, frame):
|
|
||||||
print("Received termination signal. Shutting down...")
|
|
||||||
server.stop(0)
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
# Set the signal handlers for SIGINT and SIGTERM
|
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
|
||||||
signal.signal(signal.SIGTERM, signal_handler)
|
|
||||||
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
server.stop(0)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
|
||||||
parser.add_argument(
|
|
||||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
serve(args.addr)
|
|
|
@ -1,14 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
|
||||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
|
||||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
|
||||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
|
||||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
|
||||||
fi
|
|
||||||
|
|
||||||
installRequirements
|
|
|
@ -1,2 +0,0 @@
|
||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
|
||||||
torch
|
|
|
@ -1,5 +0,0 @@
|
||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
|
||||||
intel-extension-for-pytorch
|
|
||||||
torch
|
|
||||||
optimum[openvino]
|
|
||||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
|
|
@ -1,3 +0,0 @@
|
||||||
git+https://github.com/bigscience-workshop/petals
|
|
||||||
certifi
|
|
||||||
transformers
|
|
|
@ -1,4 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
startBackend $@
|
|
|
@ -1,58 +0,0 @@
|
||||||
import unittest
|
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
import unittest
|
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
import grpc
|
|
||||||
import backend_pb2_grpc
|
|
||||||
import backend_pb2
|
|
||||||
|
|
||||||
class TestBackendServicer(unittest.TestCase):
|
|
||||||
"""
|
|
||||||
TestBackendServicer is the class that tests the gRPC service.
|
|
||||||
|
|
||||||
This class contains methods to test the startup and shutdown of the gRPC service.
|
|
||||||
"""
|
|
||||||
def setUp(self):
|
|
||||||
self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"])
|
|
||||||
time.sleep(10)
|
|
||||||
|
|
||||||
def tearDown(self) -> None:
|
|
||||||
self.service.terminate()
|
|
||||||
self.service.wait()
|
|
||||||
|
|
||||||
def test_server_startup(self):
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.Health(backend_pb2.HealthMessage())
|
|
||||||
self.assertEqual(response.message, b'OK')
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("Server failed to start")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
def test_load_model(self):
|
|
||||||
"""
|
|
||||||
This method tests if the model is loaded successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bigscience/bloom-560m"))
|
|
||||||
print(response)
|
|
||||||
self.assertTrue(response.success)
|
|
||||||
self.assertEqual(response.message, "Model loaded successfully")
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("LoadModel service failed")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
|
@ -1,6 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
runUnittests
|
|
4
backend/python/rerankers/requirements-cpu.txt
Normal file
4
backend/python/rerankers/requirements-cpu.txt
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
torch
|
||||||
|
rerankers[transformers]
|
5
backend/python/rerankers/requirements-cublas11.txt
Normal file
5
backend/python/rerankers/requirements-cublas11.txt
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
torch
|
||||||
|
rerankers[transformers]
|
4
backend/python/rerankers/requirements-cublas12.txt
Normal file
4
backend/python/rerankers/requirements-cublas12.txt
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
torch
|
||||||
|
rerankers[transformers]
|
|
@ -1,2 +1,5 @@
|
||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
torch
|
torch
|
||||||
|
rerankers[transformers]
|
|
@ -1,5 +1,8 @@
|
||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
torch
|
torch
|
||||||
|
rerankers[transformers]
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
|
@ -1,6 +1,3 @@
|
||||||
accelerate
|
grpcio==1.65.4
|
||||||
rerankers[transformers]
|
|
||||||
grpcio==1.65.1
|
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
|
6
backend/python/sentencetransformers/requirements-cpu.txt
Normal file
6
backend/python/sentencetransformers/requirements-cpu.txt
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
torch
|
||||||
|
accelerate
|
||||||
|
transformers
|
||||||
|
bitsandbytes
|
||||||
|
sentence-transformers==3.0.1
|
||||||
|
transformers
|
|
@ -0,0 +1,5 @@
|
||||||
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
|
torch
|
||||||
|
accelerate
|
||||||
|
sentence-transformers==3.0.1
|
||||||
|
transformers
|
|
@ -0,0 +1,4 @@
|
||||||
|
torch
|
||||||
|
accelerate
|
||||||
|
sentence-transformers==3.0.1
|
||||||
|
transformers
|
|
@ -1,2 +1,5 @@
|
||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch
|
torch
|
||||||
|
accelerate
|
||||||
|
sentence-transformers==3.0.1
|
||||||
|
transformers
|
|
@ -3,3 +3,6 @@ intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
|
accelerate
|
||||||
|
sentence-transformers==3.0.1
|
||||||
|
transformers
|
|
@ -1,6 +1,3 @@
|
||||||
accelerate
|
grpcio==1.65.5
|
||||||
sentence-transformers==3.0.1
|
|
||||||
transformers
|
|
||||||
grpcio==1.65.1
|
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
|
@ -0,0 +1,3 @@
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
torch
|
|
@ -0,0 +1,4 @@
|
||||||
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
torch
|
|
@ -0,0 +1,3 @@
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
torch
|
|
@ -1,2 +1,4 @@
|
||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
torch
|
torch
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue