mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-20 10:35:01 +00:00
Compare commits
263 commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
04a3d8e5ac | ||
![]() |
9af09b3f8c | ||
![]() |
0d590a4044 | ||
![]() |
e0a54de4f5 | ||
![]() |
6bc2ae5467 | ||
![]() |
8caaf49f5d | ||
![]() |
1db51044bb | ||
![]() |
ec21b58008 | ||
![]() |
996259b529 | ||
![]() |
f2942cc0e1 | ||
![]() |
f8fbfd4fa3 | ||
![]() |
41e239c67e | ||
![]() |
587827e779 | ||
![]() |
456b4982ef | ||
![]() |
159388cce8 | ||
![]() |
cfc73c7773 | ||
![]() |
6d5bde860b | ||
![]() |
6ef383033b | ||
![]() |
cd494089d9 | ||
![]() |
3033845f94 | ||
![]() |
0f365ac204 | ||
![]() |
525cf198be | ||
![]() |
658c2a4f55 | ||
![]() |
c987de090d | ||
![]() |
04365843e6 | ||
![]() |
1dc5781679 | ||
![]() |
30704292de | ||
![]() |
e52c66c76e | ||
![]() |
cb28aef93b | ||
![]() |
029f97c2a2 | ||
![]() |
3be71be696 | ||
![]() |
6adb019f8f | ||
![]() |
fcaa0a2f01 | ||
![]() |
fd17a3312c | ||
![]() |
12d0fe610b | ||
![]() |
11c67d16b8 | ||
![]() |
63f7c86c4d | ||
![]() |
ac89bf77bf | ||
![]() |
0395cc02fb | ||
![]() |
616972fca0 | ||
![]() |
942fbff62d | ||
![]() |
2612a0c910 | ||
![]() |
2dcb6d7247 | ||
![]() |
6978eec69f | ||
![]() |
2fcfe54466 | ||
![]() |
4e7506a3be | ||
![]() |
2a46217f90 | ||
![]() |
31ff9dbd52 | ||
![]() |
9483abef03 | ||
![]() |
ce3e8b3e31 | ||
![]() |
f3bb84c9a7 | ||
![]() |
ecb1297582 | ||
![]() |
73fc702b3c | ||
![]() |
e3af62ae1a | ||
![]() |
dc21604741 | ||
![]() |
5433f1a70e | ||
![]() |
d5e032bdcd | ||
![]() |
de786f6586 | ||
![]() |
8b9bc4aa6e | ||
![]() |
e6cea7d28e | ||
![]() |
7d7d56f2ce | ||
![]() |
1caae91ab6 | ||
![]() |
e90f2cb0ca | ||
![]() |
5a4291fadd | ||
![]() |
91ef58ee5a | ||
![]() |
a86e8c78f1 | ||
![]() |
adb24214c6 | ||
![]() |
f03a0430aa | ||
![]() |
73bc12abc0 | ||
![]() |
7fa437bbcc | ||
![]() |
4a27c99928 | ||
![]() |
6ce94834b6 | ||
![]() |
84a26458dc | ||
![]() |
7aa377b6a9 | ||
![]() |
64e66dda4a | ||
![]() |
a085f61fdc | ||
![]() |
21bdfe5fa4 | ||
![]() |
7ebd7b2454 | ||
![]() |
6984749ea1 | ||
![]() |
c0a206bc7a | ||
![]() |
01bbb31fb3 | ||
![]() |
72111c597d | ||
![]() |
b2f9fc870b | ||
![]() |
1fc6d469ac | ||
![]() |
05848b2027 | ||
![]() |
1da0644aa3 | ||
![]() |
c087cd1377 | ||
![]() |
c621412f6a | ||
![]() |
5a8b1892cd | ||
![]() |
5b20426863 | ||
![]() |
5c6cd50ed6 | ||
![]() |
bace6516f1 | ||
![]() |
3baadf6f27 | ||
![]() |
8804c701b8 | ||
![]() |
7b3ceb19bb | ||
![]() |
e7f3effea1 | ||
![]() |
61694a2ffb | ||
![]() |
573a3f104c | ||
![]() |
0e8af53a5b | ||
![]() |
960ffa808c | ||
![]() |
92719568e5 | ||
![]() |
163939af71 | ||
![]() |
399f1241dc | ||
![]() |
58c9ade2e8 | ||
![]() |
6e1c93d84f | ||
![]() |
4076ea0494 | ||
![]() |
26cbf77c0d | ||
![]() |
640790d628 | ||
![]() |
4132adea2f | ||
![]() |
2b2d907a3a | ||
![]() |
6e8f4f584b | ||
![]() |
662cfc2b48 | ||
![]() |
a25d355d66 | ||
![]() |
6d1cfdbefc | ||
![]() |
5ecc478968 | ||
![]() |
aef5c4291b | ||
![]() |
c059f912b9 | ||
![]() |
bc1e059259 | ||
![]() |
38dc07793a | ||
![]() |
da6ef0967d | ||
![]() |
7a011e60bd | ||
![]() |
e13dd5b09f | ||
![]() |
86ee303bd6 | ||
![]() |
978ee96fd3 | ||
![]() |
3ad5691db6 | ||
![]() |
0027681090 | ||
![]() |
8cba990edc | ||
![]() |
88857696d4 | ||
![]() |
23f347e687 | ||
![]() |
b6e3dc5f02 | ||
![]() |
69667521e2 | ||
![]() |
2a92effc5d | ||
![]() |
a65e012aa2 | ||
![]() |
8e9b41d05f | ||
![]() |
078da5c2f0 | ||
![]() |
c5af5d139c | ||
![]() |
2c9279a542 | ||
![]() |
a67d22f5f2 | ||
![]() |
dc7c51dcc7 | ||
![]() |
98df65c7aa | ||
![]() |
1559b6b522 | ||
![]() |
a0244e3fb4 | ||
![]() |
d66396201a | ||
![]() |
9628860c0e | ||
![]() |
cae9bf1308 | ||
![]() |
5bb5da0760 | ||
![]() |
867973a850 | ||
![]() |
701cd6b6d5 | ||
![]() |
7f61d397d5 | ||
![]() |
1ae0b896fa | ||
![]() |
3937407cb3 | ||
![]() |
0e34ae4f3f | ||
![]() |
a38b99ecb6 | ||
![]() |
a4a4358182 | ||
![]() |
4bc39c2db3 | ||
![]() |
cc3df759f8 | ||
![]() |
378161060c | ||
![]() |
f2f788fe60 | ||
![]() |
9fa8ed6b1e | ||
![]() |
7fc37c5e29 | ||
![]() |
4bc4b1e8bc | ||
![]() |
e495b89f18 | ||
![]() |
ba09eaea1b | ||
![]() |
61cc76c455 | ||
![]() |
8abecb4a18 | ||
![]() |
8b3f76d8e6 | ||
![]() |
4e0497f1a6 | ||
![]() |
ba88c9f451 | ||
![]() |
a598285825 | ||
![]() |
cb7a172897 | ||
![]() |
771be28dfb | ||
![]() |
7d6b3eb42d | ||
![]() |
0bb33fab55 | ||
![]() |
e3bf7f77f7 | ||
![]() |
bd1707d339 | ||
![]() |
0474804541 | ||
![]() |
72693b3917 | ||
![]() |
a03b70010f | ||
![]() |
e3717e5c1a | ||
![]() |
c8f6858218 | ||
![]() |
06d7cc43ae | ||
![]() |
f2147cb850 | ||
![]() |
75bb9f4c28 | ||
![]() |
a2ef4b1e07 | ||
![]() |
161c9fe2db | ||
![]() |
7547463f81 | ||
![]() |
32e4dfd47b | ||
![]() |
f67e5dec68 | ||
![]() |
297d54acea | ||
![]() |
56f44d448c | ||
![]() |
0f0fafacd9 | ||
![]() |
4f239bac89 | ||
![]() |
04d74ac648 | ||
![]() |
18c3dc33ee | ||
![]() |
508cfa7369 | ||
![]() |
1f94cddbae | ||
![]() |
21ae7b4cd4 | ||
![]() |
bef22ab547 | ||
![]() |
eb04e8cdcf | ||
![]() |
17e533a086 | ||
![]() |
4fc68409ff | ||
![]() |
e587044449 | ||
![]() |
1f09db5161 | ||
![]() |
05b744f086 | ||
![]() |
89ca4bc02d | ||
![]() |
e626aa48a4 | ||
![]() |
752b5e0339 | ||
![]() |
637d72d6e3 | ||
![]() |
f3bfec580a | ||
![]() |
165c1ddff3 | ||
![]() |
fb83238e9e | ||
![]() |
700bfa41c7 | ||
![]() |
25bdc350df | ||
![]() |
1b899e1a68 | ||
![]() |
3bf13f8c69 | ||
![]() |
7a00729374 | ||
![]() |
d484028532 | ||
![]() |
0eb7fc2c41 | ||
![]() |
a69e30e0c9 | ||
![]() |
9c018e6bff | ||
![]() |
281e818047 | ||
![]() |
270f0e2157 | ||
![]() |
673e59e76c | ||
![]() |
5a8a2adb44 | ||
![]() |
a7317d23bf | ||
![]() |
2bab9b5fe2 | ||
![]() |
081be3ba7d | ||
![]() |
25e6f21322 | ||
![]() |
b4df1c9cf3 | ||
![]() |
4fbd6609f2 | ||
![]() |
7387932f89 | ||
![]() |
59c37e67b2 | ||
![]() |
c09d227647 | ||
![]() |
547d322b28 | ||
![]() |
a6f0bb410f | ||
![]() |
710f624ecd | ||
![]() |
5018452be7 | ||
![]() |
ece239966f | ||
![]() |
3b8bc7e64c | ||
![]() |
fc73b2b430 | ||
![]() |
901dba6063 | ||
![]() |
b88a7a4550 | ||
![]() |
106e40845f | ||
![]() |
0064bec8f5 | ||
![]() |
9e6dbb0b5a | ||
![]() |
d26e61388b | ||
![]() |
31a7084c75 | ||
![]() |
128612a6fc | ||
![]() |
6af3f46bc3 | ||
![]() |
d2cf8ef070 | ||
![]() |
259ad3cfe6 | ||
![]() |
18b320d577 | ||
![]() |
89e151f035 | ||
![]() |
22060f6410 | ||
![]() |
7ee3288460 | ||
![]() |
cbbc954a8c | ||
![]() |
2c425e9c69 | ||
![]() |
c59975ab05 | ||
![]() |
05f7004487 | ||
![]() |
2f9203cd2a | ||
![]() |
f09b33f2ef | ||
![]() |
65470b0ab1 | ||
![]() |
9a23fe662b |
190 changed files with 6310 additions and 28870 deletions
5
.env
5
.env
|
@ -29,6 +29,9 @@
|
||||||
## Enable/Disable single backend (useful if only one GPU is available)
|
## Enable/Disable single backend (useful if only one GPU is available)
|
||||||
# LOCALAI_SINGLE_ACTIVE_BACKEND=true
|
# LOCALAI_SINGLE_ACTIVE_BACKEND=true
|
||||||
|
|
||||||
|
# Forces shutdown of the backends if busy (only if LOCALAI_SINGLE_ACTIVE_BACKEND is set)
|
||||||
|
# LOCALAI_FORCE_BACKEND_SHUTDOWN=true
|
||||||
|
|
||||||
## Specify a build type. Available: cublas, openblas, clblas.
|
## Specify a build type. Available: cublas, openblas, clblas.
|
||||||
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
|
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
|
||||||
## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
|
## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
|
||||||
|
@ -73,7 +76,7 @@
|
||||||
|
|
||||||
### Define a list of GRPC Servers for llama-cpp workers to distribute the load
|
### Define a list of GRPC Servers for llama-cpp workers to distribute the load
|
||||||
# https://github.com/ggerganov/llama.cpp/pull/6829
|
# https://github.com/ggerganov/llama.cpp/pull/6829
|
||||||
# https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md
|
# https://github.com/ggerganov/llama.cpp/blob/master/tools/rpc/README.md
|
||||||
# LLAMACPP_GRPC_SERVERS=""
|
# LLAMACPP_GRPC_SERVERS=""
|
||||||
|
|
||||||
### Enable to run parallel requests
|
### Enable to run parallel requests
|
||||||
|
|
4
.github/dependabot.yml
vendored
4
.github/dependabot.yml
vendored
|
@ -29,10 +29,6 @@ updates:
|
||||||
schedule:
|
schedule:
|
||||||
# Check for updates to GitHub Actions every weekday
|
# Check for updates to GitHub Actions every weekday
|
||||||
interval: "weekly"
|
interval: "weekly"
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/autogptq"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
- package-ecosystem: "pip"
|
||||||
directory: "/backend/python/bark"
|
directory: "/backend/python/bark"
|
||||||
schedule:
|
schedule:
|
||||||
|
|
2
.github/workflows/bump_deps.yaml
vendored
2
.github/workflows/bump_deps.yaml
vendored
|
@ -12,7 +12,7 @@ jobs:
|
||||||
- repository: "ggml-org/llama.cpp"
|
- repository: "ggml-org/llama.cpp"
|
||||||
variable: "CPPLLAMA_VERSION"
|
variable: "CPPLLAMA_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
- repository: "ggerganov/whisper.cpp"
|
- repository: "ggml-org/whisper.cpp"
|
||||||
variable: "WHISPER_CPP_VERSION"
|
variable: "WHISPER_CPP_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
- repository: "PABannier/bark.cpp"
|
- repository: "PABannier/bark.cpp"
|
||||||
|
|
2
.github/workflows/dependabot_auto.yml
vendored
2
.github/workflows/dependabot_auto.yml
vendored
|
@ -14,7 +14,7 @@ jobs:
|
||||||
steps:
|
steps:
|
||||||
- name: Dependabot metadata
|
- name: Dependabot metadata
|
||||||
id: metadata
|
id: metadata
|
||||||
uses: dependabot/fetch-metadata@v2.3.0
|
uses: dependabot/fetch-metadata@v2.4.0
|
||||||
with:
|
with:
|
||||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||||
skip-commit-verification: true
|
skip-commit-verification: true
|
||||||
|
|
2
.github/workflows/deploy-explorer.yaml
vendored
2
.github/workflows/deploy-explorer.yaml
vendored
|
@ -42,7 +42,7 @@ jobs:
|
||||||
script: |
|
script: |
|
||||||
sudo rm -rf local-ai/ || true
|
sudo rm -rf local-ai/ || true
|
||||||
- name: copy file via ssh
|
- name: copy file via ssh
|
||||||
uses: appleboy/scp-action@v0.1.7
|
uses: appleboy/scp-action@v1.0.0
|
||||||
with:
|
with:
|
||||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||||
|
|
2
.github/workflows/generate_intel_image.yaml
vendored
2
.github/workflows/generate_intel_image.yaml
vendored
|
@ -15,7 +15,7 @@ jobs:
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- base-image: intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04
|
- base-image: intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
runs-on: ${{matrix.runs-on}}
|
runs-on: ${{matrix.runs-on}}
|
||||||
|
|
50
.github/workflows/image-pr.yml
vendored
50
.github/workflows/image-pr.yml
vendored
|
@ -33,6 +33,7 @@ jobs:
|
||||||
# Pushing with all jobs in parallel
|
# Pushing with all jobs in parallel
|
||||||
# eats the bandwidth of all the nodes
|
# eats the bandwidth of all the nodes
|
||||||
max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
|
max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
# This is basically covered by the AIO test
|
# This is basically covered by the AIO test
|
||||||
|
@ -56,26 +57,35 @@ jobs:
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
# - build-type: 'hipblas'
|
- build-type: 'hipblas'
|
||||||
# platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
# tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
# tag-suffix: '-hipblas'
|
tag-suffix: '-hipblas'
|
||||||
# ffmpeg: 'false'
|
ffmpeg: 'false'
|
||||||
# image-type: 'extras'
|
image-type: 'extras'
|
||||||
# base-image: "rocm/dev-ubuntu-22.04:6.1"
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
# grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
# runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
# makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
# - build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
# platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
# tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
# base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||||
# grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
# tag-suffix: 'sycl-f16-ffmpeg'
|
tag-suffix: 'sycl-f16-ffmpeg'
|
||||||
# ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
# image-type: 'extras'
|
image-type: 'extras'
|
||||||
# runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
# makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'vulkan'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-vulkan-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
# core-image-build:
|
# core-image-build:
|
||||||
# uses: ./.github/workflows/image_build.yml
|
# uses: ./.github/workflows/image_build.yml
|
||||||
# with:
|
# with:
|
||||||
|
|
167
.github/workflows/image.yml
vendored
167
.github/workflows/image.yml
vendored
|
@ -45,13 +45,13 @@ jobs:
|
||||||
- build-type: 'hipblas'
|
- build-type: 'hipblas'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: '-hipblas-ffmpeg'
|
tag-suffix: '-hipblas-extras'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
aio: "-aio-gpu-hipblas"
|
aio: "-aio-gpu-hipblas"
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
latest-image: 'latest-gpu-hipblas'
|
latest-image: 'latest-gpu-hipblas-extras'
|
||||||
latest-image-aio: 'latest-aio-gpu-hipblas'
|
latest-image-aio: 'latest-aio-gpu-hipblas'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
@ -59,32 +59,13 @@ jobs:
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-hipblas'
|
tag-suffix: '-hipblas'
|
||||||
ffmpeg: 'false'
|
|
||||||
image-type: 'extras'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-hipblas-ffmpeg-core'
|
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'hipblas'
|
latest-image: 'latest-gpu-hipblas'
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-hipblas-core'
|
|
||||||
ffmpeg: 'false'
|
|
||||||
image-type: 'core'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
self-hosted-jobs:
|
self-hosted-jobs:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
with:
|
with:
|
||||||
|
@ -114,110 +95,58 @@ jobs:
|
||||||
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
|
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
# Extra images
|
|
||||||
- build-type: ''
|
|
||||||
#platforms: 'linux/amd64,linux/arm64'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'auto'
|
|
||||||
tag-suffix: ''
|
|
||||||
ffmpeg: ''
|
|
||||||
image-type: 'extras'
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: ''
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'auto'
|
|
||||||
tag-suffix: '-ffmpeg'
|
|
||||||
ffmpeg: 'true'
|
|
||||||
image-type: 'extras'
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda11'
|
tag-suffix: '-cublas-cuda11-extras'
|
||||||
ffmpeg: ''
|
|
||||||
image-type: 'extras'
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "12"
|
|
||||||
cuda-minor-version: "0"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-cublas-cuda12'
|
|
||||||
ffmpeg: ''
|
|
||||||
image-type: 'extras'
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "11"
|
|
||||||
cuda-minor-version: "7"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'auto'
|
|
||||||
tag-suffix: '-cublas-cuda11-ffmpeg'
|
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
aio: "-aio-gpu-nvidia-cuda-11"
|
aio: "-aio-gpu-nvidia-cuda-11"
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-11'
|
latest-image: 'latest-gpu-nvidia-cuda-11-extras'
|
||||||
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
|
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "0"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
tag-suffix: '-cublas-cuda12-extras'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
aio: "-aio-gpu-nvidia-cuda-12"
|
aio: "-aio-gpu-nvidia-cuda-12"
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-12'
|
latest-image: 'latest-gpu-nvidia-cuda-12-extras'
|
||||||
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
|
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: ''
|
|
||||||
#platforms: 'linux/amd64,linux/arm64'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'auto'
|
|
||||||
tag-suffix: ''
|
|
||||||
ffmpeg: ''
|
|
||||||
image-type: 'extras'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'false'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: '-sycl-f16-ffmpeg'
|
tag-suffix: '-sycl-f16-extras'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
aio: "-aio-gpu-intel-f16"
|
aio: "-aio-gpu-intel-f16"
|
||||||
latest-image: 'latest-gpu-intel-f16'
|
latest-image: 'latest-gpu-intel-f16-extras'
|
||||||
latest-image-aio: 'latest-aio-gpu-intel-f16'
|
latest-image-aio: 'latest-aio-gpu-intel-f16'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'sycl_f32'
|
- build-type: 'sycl_f32'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'false'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: '-sycl-f32-ffmpeg'
|
tag-suffix: '-sycl-f32-extras'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
aio: "-aio-gpu-intel-f32"
|
aio: "-aio-gpu-intel-f32"
|
||||||
latest-image: 'latest-gpu-intel-f32'
|
latest-image: 'latest-gpu-intel-f32-extras'
|
||||||
latest-image-aio: 'latest-aio-gpu-intel-f32'
|
latest-image-aio: 'latest-aio-gpu-intel-f32'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
# Core images
|
# Core images
|
||||||
|
@ -226,41 +155,23 @@ jobs:
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: '-sycl-f16-core'
|
tag-suffix: '-sycl-f16'
|
||||||
ffmpeg: 'false'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
latest-image: 'latest-gpu-intel-f16'
|
||||||
- build-type: 'sycl_f32'
|
- build-type: 'sycl_f32'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: '-sycl-f32-core'
|
tag-suffix: '-sycl-f32'
|
||||||
ffmpeg: 'false'
|
|
||||||
image-type: 'core'
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f16'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: '-sycl-f16-ffmpeg-core'
|
|
||||||
ffmpeg: 'true'
|
|
||||||
image-type: 'core'
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f32'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: '-sycl-f32-ffmpeg-core'
|
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
latest-image: 'latest-gpu-intel-f32'
|
||||||
|
|
||||||
core-image-build:
|
core-image-build:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
|
@ -293,7 +204,7 @@ jobs:
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
platforms: 'linux/amd64,linux/arm64'
|
platforms: 'linux/amd64,linux/arm64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: '-ffmpeg-core'
|
tag-suffix: ''
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
|
@ -308,60 +219,38 @@ jobs:
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda11-core'
|
tag-suffix: '-cublas-cuda11'
|
||||||
ffmpeg: ''
|
|
||||||
image-type: 'core'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
skip-drivers: 'false'
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "12"
|
|
||||||
cuda-minor-version: "0"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-cublas-cuda12-core'
|
|
||||||
ffmpeg: ''
|
|
||||||
image-type: 'core'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
skip-drivers: 'false'
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "11"
|
|
||||||
cuda-minor-version: "7"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-cublas-cuda11-ffmpeg-core'
|
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
skip-drivers: 'false'
|
skip-drivers: 'false'
|
||||||
|
latest-image: 'latest-gpu-nvidia-cuda-12'
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "0"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
tag-suffix: '-cublas-cuda12'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
skip-drivers: 'false'
|
skip-drivers: 'false'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
latest-image: 'latest-gpu-nvidia-cuda-12'
|
||||||
- build-type: 'vulkan'
|
- build-type: 'vulkan'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-vulkan-ffmpeg-core'
|
tag-suffix: '-vulkan'
|
||||||
latest-image: 'latest-vulkan-ffmpeg-core'
|
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
skip-drivers: 'false'
|
skip-drivers: 'false'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
latest-image: 'latest-gpu-vulkan'
|
||||||
gh-runner:
|
gh-runner:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
with:
|
with:
|
||||||
|
@ -394,8 +283,8 @@ jobs:
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "0"
|
||||||
platforms: 'linux/arm64'
|
platforms: 'linux/arm64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-nvidia-l4t-arm64-core'
|
tag-suffix: '-nvidia-l4t-arm64'
|
||||||
latest-image: 'latest-nvidia-l4t-arm64-core'
|
latest-image: 'latest-nvidia-l4t-arm64'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||||
|
|
10
.github/workflows/notify-models.yaml
vendored
10
.github/workflows/notify-models.yaml
vendored
|
@ -8,7 +8,7 @@ jobs:
|
||||||
notify-discord:
|
notify-discord:
|
||||||
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
||||||
env:
|
env:
|
||||||
MODEL_NAME: hermes-2-theta-llama-3-8b
|
MODEL_NAME: gemma-3-12b-it
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
@ -16,7 +16,7 @@ jobs:
|
||||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
||||||
- uses: mudler/localai-github-action@v1
|
- uses: mudler/localai-github-action@v1
|
||||||
with:
|
with:
|
||||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
model: 'gemma-3-12b-it' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||||
# Check the PR diff using the current branch and the base branch of the PR
|
# Check the PR diff using the current branch and the base branch of the PR
|
||||||
- uses: GrantBirki/git-diff-action@v2.8.0
|
- uses: GrantBirki/git-diff-action@v2.8.0
|
||||||
id: git-diff-action
|
id: git-diff-action
|
||||||
|
@ -79,7 +79,7 @@ jobs:
|
||||||
args: ${{ steps.summarize.outputs.message }}
|
args: ${{ steps.summarize.outputs.message }}
|
||||||
- name: Setup tmate session if fails
|
- name: Setup tmate session if fails
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
|
@ -87,7 +87,7 @@ jobs:
|
||||||
notify-twitter:
|
notify-twitter:
|
||||||
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
||||||
env:
|
env:
|
||||||
MODEL_NAME: hermes-2-theta-llama-3-8b
|
MODEL_NAME: gemma-3-12b-it
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
@ -161,7 +161,7 @@ jobs:
|
||||||
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
|
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
|
||||||
- name: Setup tmate session if fails
|
- name: Setup tmate session if fails
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
|
|
2
.github/workflows/notify-releases.yaml
vendored
2
.github/workflows/notify-releases.yaml
vendored
|
@ -14,7 +14,7 @@ jobs:
|
||||||
steps:
|
steps:
|
||||||
- uses: mudler/localai-github-action@v1
|
- uses: mudler/localai-github-action@v1
|
||||||
with:
|
with:
|
||||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
model: 'gemma-3-12b-it' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||||
- name: Summarize
|
- name: Summarize
|
||||||
id: summarize
|
id: summarize
|
||||||
run: |
|
run: |
|
||||||
|
|
16
.github/workflows/release.yaml
vendored
16
.github/workflows/release.yaml
vendored
|
@ -36,6 +36,7 @@ jobs:
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
|
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
|
||||||
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
|
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
|
||||||
|
make install-go-tools
|
||||||
- name: Install CUDA Dependencies
|
- name: Install CUDA Dependencies
|
||||||
run: |
|
run: |
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
|
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
|
||||||
|
@ -123,7 +124,7 @@ jobs:
|
||||||
release/*
|
release/*
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
|
@ -151,6 +152,7 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
|
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
|
||||||
|
make install-go-tools
|
||||||
- name: Intel Dependencies
|
- name: Intel Dependencies
|
||||||
run: |
|
run: |
|
||||||
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
|
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
|
||||||
|
@ -232,7 +234,7 @@ jobs:
|
||||||
release/*
|
release/*
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
|
@ -253,8 +255,7 @@ jobs:
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc
|
brew install protobuf grpc
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
make install-go-tools
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
|
||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
run: |
|
run: |
|
||||||
|
@ -275,7 +276,7 @@ jobs:
|
||||||
release/*
|
release/*
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
|
@ -295,8 +296,7 @@ jobs:
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc libomp llvm
|
brew install protobuf grpc libomp llvm
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
make install-go-tools
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
run: |
|
run: |
|
||||||
|
@ -317,7 +317,7 @@ jobs:
|
||||||
release/*
|
release/*
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
|
|
2
.github/workflows/secscan.yaml
vendored
2
.github/workflows/secscan.yaml
vendored
|
@ -18,7 +18,7 @@ jobs:
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
- name: Run Gosec Security Scanner
|
- name: Run Gosec Security Scanner
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
uses: securego/gosec@v2.22.0
|
uses: securego/gosec@v2.22.4
|
||||||
with:
|
with:
|
||||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||||
|
|
20
.github/workflows/test-extra.yml
vendored
20
.github/workflows/test-extra.yml
vendored
|
@ -78,6 +78,26 @@ jobs:
|
||||||
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
||||||
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
||||||
|
|
||||||
|
#tests-vllm:
|
||||||
|
# runs-on: ubuntu-latest
|
||||||
|
# steps:
|
||||||
|
# - name: Clone
|
||||||
|
# uses: actions/checkout@v4
|
||||||
|
# with:
|
||||||
|
# submodules: true
|
||||||
|
# - name: Dependencies
|
||||||
|
# run: |
|
||||||
|
# sudo apt-get update
|
||||||
|
# sudo apt-get install -y build-essential ffmpeg
|
||||||
|
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
|
# sudo apt-get install -y libopencv-dev
|
||||||
|
# # Install UV
|
||||||
|
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
|
# - name: Test vllm backend
|
||||||
|
# run: |
|
||||||
|
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
||||||
|
# make --jobs=5 --output-sync=target -C backend/python/vllm test
|
||||||
# tests-transformers-musicgen:
|
# tests-transformers-musicgen:
|
||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
# steps:
|
# steps:
|
||||||
|
|
11
.github/workflows/test.yml
vendored
11
.github/workflows/test.yml
vendored
|
@ -71,7 +71,7 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
|
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
|
||||||
sudo apt-get install -y libgmock-dev
|
sudo apt-get install -y libgmock-dev clang
|
||||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
@ -96,6 +96,7 @@ jobs:
|
||||||
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
go install github.com/GeertJohan/go.rice/rice@latest
|
||||||
|
|
||||||
# The python3-grpc-tools package in 22.04 is too old
|
# The python3-grpc-tools package in 22.04 is too old
|
||||||
pip install --user grpcio-tools
|
pip install --user grpcio-tools
|
||||||
|
@ -130,7 +131,7 @@ jobs:
|
||||||
PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
|
PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
|
@ -183,6 +184,7 @@ jobs:
|
||||||
rm protoc.zip
|
rm protoc.zip
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
go install github.com/GeertJohan/go.rice/rice@latest
|
||||||
PATH="$PATH:$HOME/go/bin" make protogen-go
|
PATH="$PATH:$HOME/go/bin" make protogen-go
|
||||||
- name: Build images
|
- name: Build images
|
||||||
run: |
|
run: |
|
||||||
|
@ -194,7 +196,7 @@ jobs:
|
||||||
make run-e2e-aio
|
make run-e2e-aio
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
|
@ -222,6 +224,7 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
||||||
pip install --user --no-cache-dir grpcio-tools
|
pip install --user --no-cache-dir grpcio-tools
|
||||||
|
go install github.com/GeertJohan/go.rice/rice@latest
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
|
@ -232,7 +235,7 @@ jobs:
|
||||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
|
|
17
Dockerfile
17
Dockerfile
|
@ -15,7 +15,7 @@ ARG TARGETARCH
|
||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
|
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
|
@ -46,9 +46,10 @@ EOT
|
||||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
||||||
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
|
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
|
||||||
|
|
||||||
# Install grpc compilers
|
# Install grpc compilers and rice
|
||||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \
|
||||||
|
go install github.com/GeertJohan/go.rice/rice@latest
|
||||||
|
|
||||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||||
RUN update-ca-certificates
|
RUN update-ca-certificates
|
||||||
|
@ -300,10 +301,9 @@ COPY .git .
|
||||||
RUN make prepare
|
RUN make prepare
|
||||||
|
|
||||||
## Build the binary
|
## Build the binary
|
||||||
## If it's CUDA or hipblas, we want to skip some of the llama-compat backends to save space
|
## If we're on arm64 AND using cublas/hipblas, skip some of the llama-compat backends to save space
|
||||||
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
|
## Otherwise just run the normal build
|
||||||
## (both will use CUDA or hipblas for the actual computation)
|
RUN if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
|
||||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
||||||
else \
|
else \
|
||||||
make build; \
|
make build; \
|
||||||
|
@ -431,9 +431,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMA
|
||||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/vllm \
|
make -C backend/python/vllm \
|
||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "autogptq" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/autogptq \
|
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/bark \
|
make -C backend/python/bark \
|
||||||
; fi && \
|
; fi && \
|
||||||
|
|
102
Makefile
102
Makefile
|
@ -6,11 +6,11 @@ BINARY_NAME=local-ai
|
||||||
DETECT_LIBS?=true
|
DETECT_LIBS?=true
|
||||||
|
|
||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
CPPLLAMA_VERSION?=4663bd353c61c1136cd8a97b9908755e4ab30cec
|
CPPLLAMA_VERSION?=6a2bc8bfb7cd502e5ebc72e36c97a6f848c21c2c
|
||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||||
WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
|
WHISPER_CPP_VERSION?=d1f114da61b1ae1e70b03104fad42c9dd666feeb
|
||||||
|
|
||||||
# go-piper version
|
# go-piper version
|
||||||
PIPER_REPO?=https://github.com/mudler/go-piper
|
PIPER_REPO?=https://github.com/mudler/go-piper
|
||||||
|
@ -21,8 +21,11 @@ BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
||||||
BARKCPP_VERSION?=v1.0.0
|
BARKCPP_VERSION?=v1.0.0
|
||||||
|
|
||||||
# stablediffusion.cpp (ggml)
|
# stablediffusion.cpp (ggml)
|
||||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
STABLEDIFFUSION_GGML_REPO?=https://github.com/richiejp/stable-diffusion.cpp
|
||||||
STABLEDIFFUSION_GGML_VERSION?=19d876ee300a055629926ff836489901f734f2b7
|
STABLEDIFFUSION_GGML_VERSION?=53e3b17eb3d0b5760ced06a1f98320b68b34aaae
|
||||||
|
|
||||||
|
# ONEAPI variables for SYCL
|
||||||
|
export ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||||
|
|
||||||
ONNX_VERSION?=1.20.0
|
ONNX_VERSION?=1.20.0
|
||||||
ONNX_ARCH?=x64
|
ONNX_ARCH?=x64
|
||||||
|
@ -30,8 +33,12 @@ ONNX_OS?=linux
|
||||||
|
|
||||||
export BUILD_TYPE?=
|
export BUILD_TYPE?=
|
||||||
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||||
export CMAKE_ARGS?=
|
export CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
export WHISPER_CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
|
||||||
export BACKEND_LIBS?=
|
export BACKEND_LIBS?=
|
||||||
|
export WHISPER_DIR=$(abspath ./sources/whisper.cpp)
|
||||||
|
export WHISPER_INCLUDE_PATH=$(WHISPER_DIR)/include:$(WHISPER_DIR)/ggml/include
|
||||||
|
export WHISPER_LIBRARY_PATH=$(WHISPER_DIR)/build/src/:$(WHISPER_DIR)/build/ggml/src
|
||||||
|
|
||||||
CGO_LDFLAGS?=
|
CGO_LDFLAGS?=
|
||||||
CGO_LDFLAGS_WHISPER?=
|
CGO_LDFLAGS_WHISPER?=
|
||||||
|
@ -81,6 +88,7 @@ endif
|
||||||
# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
|
# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
|
||||||
ifeq ($(NATIVE),false)
|
ifeq ($(NATIVE),false)
|
||||||
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# Detect if we are running on arm64
|
# Detect if we are running on arm64
|
||||||
|
@ -108,13 +116,31 @@ ifeq ($(OS),Darwin)
|
||||||
# disable metal if on Darwin and any other value is explicitly passed.
|
# disable metal if on Darwin and any other value is explicitly passed.
|
||||||
else ifneq ($(BUILD_TYPE),metal)
|
else ifneq ($(BUILD_TYPE),metal)
|
||||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||||
export GGML_NO_ACCELERATE=1
|
export GGML_NO_ACCELERATE=1
|
||||||
export GGML_NO_METAL=1
|
export GGML_NO_METAL=1
|
||||||
|
GO_LDFLAGS_WHISPER+=-lggml-blas
|
||||||
|
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),metal)
|
ifeq ($(BUILD_TYPE),metal)
|
||||||
# -lcblas removed: it seems to always be listed as a duplicate flag.
|
|
||||||
CGO_LDFLAGS += -framework Accelerate
|
CGO_LDFLAGS += -framework Accelerate
|
||||||
|
CGO_LDFLAGS_WHISPER+=-lggml-metal -lggml-blas
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL=ON
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||||
|
CMAKE_ARGS+=-DGGML_OPENMP=OFF
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_METAL=ON
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||||
|
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_EXAMPLES=OFF
|
||||||
|
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_TESTS=OFF
|
||||||
|
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_SERVER=OFF
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_OPENMP=OFF
|
||||||
|
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-metal/:$(WHISPER_DIR)/build/ggml/src/ggml-blas
|
||||||
|
else
|
||||||
|
CGO_LDFLAGS_WHISPER+=-lggml-blas
|
||||||
|
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
CGO_LDFLAGS_WHISPER+=-lgomp
|
CGO_LDFLAGS_WHISPER+=-lgomp
|
||||||
|
@ -126,21 +152,29 @@ ifeq ($(BUILD_TYPE),openblas)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
|
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) -L$(CUDA_LIBPATH)/stubs/ -lcuda
|
||||||
export GGML_CUDA=1
|
export GGML_CUDA=1
|
||||||
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
|
CMAKE_ARGS+=-DGGML_CUDA=ON
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_CUDA=ON
|
||||||
|
CGO_LDFLAGS_WHISPER+=-lcufft -lggml-cuda
|
||||||
|
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-cuda/
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),vulkan)
|
ifeq ($(BUILD_TYPE),vulkan)
|
||||||
CMAKE_ARGS+=-DGGML_VULKAN=1
|
CMAKE_ARGS+=-DGGML_VULKAN=1
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_VULKAN=1
|
||||||
|
CGO_LDFLAGS_WHISPER+=-lggml-vulkan -lvulkan
|
||||||
|
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-vulkan/
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||||
export GGML_SYCL=1
|
export GGML_SYCL=1
|
||||||
|
CMAKE_ARGS+=-DGGML_SYCL=ON
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||||
export GGML_SYCL_F16=1
|
export GGML_SYCL_F16=1
|
||||||
|
CMAKE_ARGS+=-DGGML_SYCL_F16=ON
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),hipblas)
|
ifeq ($(BUILD_TYPE),hipblas)
|
||||||
|
@ -151,7 +185,7 @@ ifeq ($(BUILD_TYPE),hipblas)
|
||||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||||
export STABLE_BUILD_TYPE=
|
export STABLE_BUILD_TYPE=
|
||||||
export GGML_HIP=1
|
export GGML_HIP=1
|
||||||
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
GPU_TARGETS ?= gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102
|
||||||
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
||||||
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||||
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
|
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
|
||||||
|
@ -260,11 +294,7 @@ backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp
|
||||||
$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
|
$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
|
||||||
|
|
||||||
backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
|
backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ LIBRARY_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ \
|
$(MAKE) -C backend/go/image/stablediffusion-ggml CGO_LDFLAGS="$(CGO_LDFLAGS)" stablediffusion-ggml
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion-ggml ./backend/go/image/stablediffusion-ggml/
|
|
||||||
ifneq ($(UPX),)
|
|
||||||
$(UPX) backend-assets/grpc/stablediffusion-ggml
|
|
||||||
endif
|
|
||||||
|
|
||||||
sources/onnxruntime:
|
sources/onnxruntime:
|
||||||
mkdir -p sources/onnxruntime
|
mkdir -p sources/onnxruntime
|
||||||
|
@ -290,8 +320,9 @@ sources/whisper.cpp:
|
||||||
git checkout $(WHISPER_CPP_VERSION) && \
|
git checkout $(WHISPER_CPP_VERSION) && \
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
sources/whisper.cpp/build/src/libwhisper.a: sources/whisper.cpp
|
||||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
cd sources/whisper.cpp && cmake $(WHISPER_CMAKE_ARGS) . -B ./build
|
||||||
|
cd sources/whisper.cpp/build && cmake --build . --config Release
|
||||||
|
|
||||||
get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
|
get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
|
||||||
|
|
||||||
|
@ -341,8 +372,14 @@ clean-tests:
|
||||||
clean-dc: clean
|
clean-dc: clean
|
||||||
cp -r /build/backend-assets /workspace/backend-assets
|
cp -r /build/backend-assets /workspace/backend-assets
|
||||||
|
|
||||||
|
## Install Go tools
|
||||||
|
install-go-tools:
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
|
go install github.com/GeertJohan/go.rice/rice@latest
|
||||||
|
|
||||||
## Build:
|
## Build:
|
||||||
build: prepare backend-assets grpcs ## Build the project
|
build: prepare backend-assets grpcs install-go-tools ## Build the project
|
||||||
$(info ${GREEN}I local-ai build info:${RESET})
|
$(info ${GREEN}I local-ai build info:${RESET})
|
||||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||||
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
|
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
|
||||||
|
@ -352,7 +389,9 @@ ifneq ($(BACKEND_LIBS),)
|
||||||
$(MAKE) backend-assets/lib
|
$(MAKE) backend-assets/lib
|
||||||
cp -f $(BACKEND_LIBS) backend-assets/lib/
|
cp -f $(BACKEND_LIBS) backend-assets/lib/
|
||||||
endif
|
endif
|
||||||
|
rm -rf $(BINARY_NAME) || true
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
||||||
|
rice append --exec $(BINARY_NAME)
|
||||||
|
|
||||||
build-minimal:
|
build-minimal:
|
||||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
|
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
|
||||||
|
@ -424,6 +463,7 @@ prepare-test: grpcs
|
||||||
cp -rf backend-assets core/http
|
cp -rf backend-assets core/http
|
||||||
cp tests/models_fixtures/* test-models
|
cp tests/models_fixtures/* test-models
|
||||||
|
|
||||||
|
## Test targets
|
||||||
test: prepare test-models/testmodel.ggml grpcs
|
test: prepare test-models/testmodel.ggml grpcs
|
||||||
@echo 'Running tests'
|
@echo 'Running tests'
|
||||||
export GO_TAGS="tts debug"
|
export GO_TAGS="tts debug"
|
||||||
|
@ -498,7 +538,7 @@ protogen: protogen-go protogen-python
|
||||||
protogen-clean: protogen-go-clean protogen-python-clean
|
protogen-clean: protogen-go-clean protogen-python-clean
|
||||||
|
|
||||||
.PHONY: protogen-go
|
.PHONY: protogen-go
|
||||||
protogen-go:
|
protogen-go: install-go-tools
|
||||||
mkdir -p pkg/grpc/proto
|
mkdir -p pkg/grpc/proto
|
||||||
protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
|
protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
|
||||||
backend/backend.proto
|
backend/backend.proto
|
||||||
|
@ -509,18 +549,10 @@ protogen-go-clean:
|
||||||
$(RM) bin/*
|
$(RM) bin/*
|
||||||
|
|
||||||
.PHONY: protogen-python
|
.PHONY: protogen-python
|
||||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
|
protogen-python: bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
|
||||||
|
|
||||||
.PHONY: protogen-python-clean
|
.PHONY: protogen-python-clean
|
||||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
|
protogen-python-clean: bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
|
||||||
|
|
||||||
.PHONY: autogptq-protogen
|
|
||||||
autogptq-protogen:
|
|
||||||
$(MAKE) -C backend/python/autogptq protogen
|
|
||||||
|
|
||||||
.PHONY: autogptq-protogen-clean
|
|
||||||
autogptq-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/autogptq protogen-clean
|
|
||||||
|
|
||||||
.PHONY: bark-protogen
|
.PHONY: bark-protogen
|
||||||
bark-protogen:
|
bark-protogen:
|
||||||
|
@ -597,7 +629,6 @@ vllm-protogen-clean:
|
||||||
## GRPC
|
## GRPC
|
||||||
# Note: it is duplicated in the Dockerfile
|
# Note: it is duplicated in the Dockerfile
|
||||||
prepare-extra-conda-environments: protogen-python
|
prepare-extra-conda-environments: protogen-python
|
||||||
$(MAKE) -C backend/python/autogptq
|
|
||||||
$(MAKE) -C backend/python/bark
|
$(MAKE) -C backend/python/bark
|
||||||
$(MAKE) -C backend/python/coqui
|
$(MAKE) -C backend/python/coqui
|
||||||
$(MAKE) -C backend/python/diffusers
|
$(MAKE) -C backend/python/diffusers
|
||||||
|
@ -611,10 +642,12 @@ prepare-extra-conda-environments: protogen-python
|
||||||
prepare-test-extra: protogen-python
|
prepare-test-extra: protogen-python
|
||||||
$(MAKE) -C backend/python/transformers
|
$(MAKE) -C backend/python/transformers
|
||||||
$(MAKE) -C backend/python/diffusers
|
$(MAKE) -C backend/python/diffusers
|
||||||
|
$(MAKE) -C backend/python/vllm
|
||||||
|
|
||||||
test-extra: prepare-test-extra
|
test-extra: prepare-test-extra
|
||||||
$(MAKE) -C backend/python/transformers test
|
$(MAKE) -C backend/python/transformers test
|
||||||
$(MAKE) -C backend/python/diffusers test
|
$(MAKE) -C backend/python/diffusers test
|
||||||
|
$(MAKE) -C backend/python/vllm test
|
||||||
|
|
||||||
backend-assets:
|
backend-assets:
|
||||||
mkdir -p backend-assets
|
mkdir -p backend-assets
|
||||||
|
@ -756,8 +789,8 @@ ifneq ($(UPX),)
|
||||||
$(UPX) backend-assets/grpc/silero-vad
|
$(UPX) backend-assets/grpc/silero-vad
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
|
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/build/src/libwhisper.a backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="${WHISPER_INCLUDE_PATH}" LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" LD_LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
|
||||||
ifneq ($(UPX),)
|
ifneq ($(UPX),)
|
||||||
$(UPX) backend-assets/grpc/whisper
|
$(UPX) backend-assets/grpc/whisper
|
||||||
|
@ -809,7 +842,8 @@ docker-aio-all:
|
||||||
|
|
||||||
docker-image-intel:
|
docker-image-intel:
|
||||||
docker build \
|
docker build \
|
||||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
|
--progress plain \
|
||||||
|
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.0-0-devel-ubuntu24.04 \
|
||||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||||
--build-arg GO_TAGS="none" \
|
--build-arg GO_TAGS="none" \
|
||||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||||
|
@ -817,7 +851,7 @@ docker-image-intel:
|
||||||
|
|
||||||
docker-image-intel-xpu:
|
docker-image-intel-xpu:
|
||||||
docker build \
|
docker build \
|
||||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
|
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04 \
|
||||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||||
--build-arg GO_TAGS="none" \
|
--build-arg GO_TAGS="none" \
|
||||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||||
|
|
170
README.md
170
README.md
|
@ -1,7 +1,6 @@
|
||||||
<h1 align="center">
|
<h1 align="center">
|
||||||
<br>
|
<br>
|
||||||
<img height="300" src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"> <br>
|
<img height="300" src="./core/http/static/logo.png"> <br>
|
||||||
LocalAI
|
|
||||||
<br>
|
<br>
|
||||||
</h1>
|
</h1>
|
||||||
|
|
||||||
|
@ -31,7 +30,7 @@
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<a href="https://twitter.com/LocalAI_API" target="blank">
|
<a href="https://twitter.com/LocalAI_API" target="blank">
|
||||||
<img src="https://img.shields.io/twitter/follow/LocalAI_API?label=Follow: LocalAI_API&style=social" alt="Follow LocalAI_API"/>
|
<img src="https://img.shields.io/badge/X-%23000000.svg?style=for-the-badge&logo=X&logoColor=white&label=LocalAI_API" alt="Follow LocalAI_API"/>
|
||||||
</a>
|
</a>
|
||||||
<a href="https://discord.gg/uJAeKSAGDy" target="blank">
|
<a href="https://discord.gg/uJAeKSAGDy" target="blank">
|
||||||
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
|
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
|
||||||
|
@ -44,35 +43,154 @@
|
||||||
|
|
||||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||||
>
|
>
|
||||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples)
|
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on
|
||||||
|
[](https://t.me/localaiofficial_bot)
|
||||||
|
|
||||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||||
|
|
||||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
||||||
|
|
||||||

|
|
||||||
|
## 📚🆕 Local Stack Family
|
||||||
|
|
||||||
|
🆕 LocalAI is now part of a comprehensive suite of AI tools designed to work together:
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<td width="50%" valign="top">
|
||||||
|
<a href="https://github.com/mudler/LocalAGI">
|
||||||
|
<img src="https://raw.githubusercontent.com/mudler/LocalAGI/refs/heads/main/webui/react-ui/public/logo_2.png" width="300" alt="LocalAGI Logo">
|
||||||
|
</a>
|
||||||
|
</td>
|
||||||
|
<td width="50%" valign="top">
|
||||||
|
<h3><a href="https://github.com/mudler/LocalAGI">LocalAGI</a></h3>
|
||||||
|
<p>A powerful Local AI agent management platform that serves as a drop-in replacement for OpenAI's Responses API, enhanced with advanced agentic capabilities.</p>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td width="50%" valign="top">
|
||||||
|
<a href="https://github.com/mudler/LocalRecall">
|
||||||
|
<img src="https://raw.githubusercontent.com/mudler/LocalRecall/refs/heads/main/static/localrecall_horizontal.png" width="300" alt="LocalRecall Logo">
|
||||||
|
</a>
|
||||||
|
</td>
|
||||||
|
<td width="50%" valign="top">
|
||||||
|
<h3><a href="https://github.com/mudler/LocalRecall">LocalRecall</a></h3>
|
||||||
|
<p>A REST-ful API and knowledge base management system that provides persistent memory and storage capabilities for AI agents.</p>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
## Screenshots
|
||||||
|
|
||||||
|
|
||||||
|
| Talk Interface | Generate Audio |
|
||||||
|
| --- | --- |
|
||||||
|
|  |  |
|
||||||
|
|
||||||
|
| Models Overview | Generate Images |
|
||||||
|
| --- | --- |
|
||||||
|
|  |  |
|
||||||
|
|
||||||
|
| Chat Interface | Home |
|
||||||
|
| --- | --- |
|
||||||
|
|  |  |
|
||||||
|
|
||||||
|
| Login | Swarm |
|
||||||
|
| --- | --- |
|
||||||
|
| |  |
|
||||||
|
|
||||||
|
## 💻 Quickstart
|
||||||
|
|
||||||
Run the installer script:
|
Run the installer script:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# Basic installation
|
||||||
curl https://localai.io/install.sh | sh
|
curl https://localai.io/install.sh | sh
|
||||||
```
|
```
|
||||||
|
|
||||||
Or run with docker:
|
For more installation options, see [Installer Options](https://localai.io/docs/advanced/installer/).
|
||||||
```bash
|
|
||||||
# CPU only image:
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
|
|
||||||
|
|
||||||
# Nvidia GPU:
|
Or run with docker:
|
||||||
|
|
||||||
|
### CPU only image:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
### NVIDIA GPU Images:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# CUDA 12.0 with core features
|
||||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
||||||
|
|
||||||
# CPU and GPU image (bigger size):
|
# CUDA 12.0 with extra Python dependencies
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12-extras
|
||||||
|
|
||||||
# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
|
# CUDA 11.7 with core features
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11
|
||||||
|
|
||||||
|
# CUDA 11.7 with extra Python dependencies
|
||||||
|
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11-extras
|
||||||
|
|
||||||
|
# NVIDIA Jetson (L4T) ARM64
|
||||||
|
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-nvidia-l4t-arm64
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### AMD GPU Images (ROCm):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# ROCm with core features
|
||||||
|
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas
|
||||||
|
|
||||||
|
# ROCm with extra Python dependencies
|
||||||
|
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas-extras
|
||||||
|
```
|
||||||
|
|
||||||
|
### Intel GPU Images (oneAPI):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Intel GPU with FP16 support
|
||||||
|
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16
|
||||||
|
|
||||||
|
# Intel GPU with FP16 support and extra dependencies
|
||||||
|
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16-extras
|
||||||
|
|
||||||
|
# Intel GPU with FP32 support
|
||||||
|
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32
|
||||||
|
|
||||||
|
# Intel GPU with FP32 support and extra dependencies
|
||||||
|
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32-extras
|
||||||
|
```
|
||||||
|
|
||||||
|
### Vulkan GPU Images:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Vulkan with core features
|
||||||
|
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-vulkan
|
||||||
|
```
|
||||||
|
|
||||||
|
### AIO Images (pre-downloaded models):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# CPU version
|
||||||
|
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||||
|
|
||||||
|
# NVIDIA CUDA 12 version
|
||||||
|
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
|
||||||
|
|
||||||
|
# NVIDIA CUDA 11 version
|
||||||
|
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
|
||||||
|
|
||||||
|
# Intel GPU version
|
||||||
|
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel-f16
|
||||||
|
|
||||||
|
# AMD GPU version
|
||||||
|
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
|
||||||
|
```
|
||||||
|
|
||||||
|
For more information about the AIO images and pre-downloaded models, see [Container Documentation](https://localai.io/basics/container/).
|
||||||
|
|
||||||
To load models:
|
To load models:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
@ -88,10 +206,13 @@ local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
|
||||||
local-ai run oci://localai/phi-2:latest
|
local-ai run oci://localai/phi-2:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
||||||
|
|
||||||
## 📰 Latest project news
|
## 📰 Latest project news
|
||||||
|
|
||||||
|
- Apr 2025: [LocalAGI](https://github.com/mudler/LocalAGI) and [LocalRecall](https://github.com/mudler/LocalRecall) join the LocalAI family stack.
|
||||||
|
- Apr 2025: WebUI overhaul, AIO images updates
|
||||||
|
- Feb 2025: Backend cleanup, Breaking changes, new backends (kokoro, OutelTTS, faster-whisper), Nvidia L4T images
|
||||||
- Jan 2025: LocalAI model release: https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3, SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603
|
- Jan 2025: LocalAI model release: https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3, SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603
|
||||||
- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
|
- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
|
||||||
- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
|
- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
|
||||||
|
@ -105,19 +226,6 @@ local-ai run oci://localai/phi-2:latest
|
||||||
|
|
||||||
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||||
|
|
||||||
## 🔥🔥 Hot topics (looking for help):
|
|
||||||
|
|
||||||
- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
|
|
||||||
- Realtime API https://github.com/mudler/LocalAI/issues/3714
|
|
||||||
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
|
||||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
|
||||||
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
|
||||||
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
|
||||||
- Vulkan: https://github.com/mudler/LocalAI/issues/1647
|
|
||||||
- Anthropic API: https://github.com/mudler/LocalAI/issues/1808
|
|
||||||
|
|
||||||
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
|
|
||||||
|
|
||||||
## 🚀 [Features](https://localai.io/features/)
|
## 🚀 [Features](https://localai.io/features/)
|
||||||
|
|
||||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||||
|
@ -131,12 +239,10 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
|
||||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
||||||
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
||||||
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
||||||
|
- [Agentic capabilities](https://github.com/mudler/LocalAGI)
|
||||||
- 🔊 Voice activity detection (Silero-VAD support)
|
- 🔊 Voice activity detection (Silero-VAD support)
|
||||||
- 🌍 Integrated WebUI!
|
- 🌍 Integrated WebUI!
|
||||||
|
|
||||||
## 💻 Usage
|
|
||||||
|
|
||||||
Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section in our documentation.
|
|
||||||
|
|
||||||
### 🔗 Community and integrations
|
### 🔗 Community and integrations
|
||||||
|
|
||||||
|
|
|
@ -48,6 +48,6 @@ template:
|
||||||
<|im_start|>assistant
|
<|im_start|>assistant
|
||||||
|
|
||||||
download_files:
|
download_files:
|
||||||
- filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf
|
- filename: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
|
||||||
sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5
|
sha256: 4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4
|
||||||
uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf
|
uri: huggingface://mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
|
||||||
|
|
15
assets.go
15
assets.go
|
@ -1,6 +1,15 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import "embed"
|
import (
|
||||||
|
rice "github.com/GeertJohan/go.rice"
|
||||||
|
)
|
||||||
|
|
||||||
//go:embed backend-assets/*
|
var backendAssets *rice.Box
|
||||||
var backendAssets embed.FS
|
|
||||||
|
func init() {
|
||||||
|
var err error
|
||||||
|
backendAssets, err = rice.FindBox("backend-assets")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -14,6 +14,7 @@ service Backend {
|
||||||
rpc PredictStream(PredictOptions) returns (stream Reply) {}
|
rpc PredictStream(PredictOptions) returns (stream Reply) {}
|
||||||
rpc Embedding(PredictOptions) returns (EmbeddingResult) {}
|
rpc Embedding(PredictOptions) returns (EmbeddingResult) {}
|
||||||
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
|
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
|
||||||
|
rpc GenerateVideo(GenerateVideoRequest) returns (Result) {}
|
||||||
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
|
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
|
||||||
rpc TTS(TTSRequest) returns (Result) {}
|
rpc TTS(TTSRequest) returns (Result) {}
|
||||||
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
|
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
|
||||||
|
@ -190,11 +191,7 @@ message ModelOptions {
|
||||||
int32 NGQA = 20;
|
int32 NGQA = 20;
|
||||||
string ModelFile = 21;
|
string ModelFile = 21;
|
||||||
|
|
||||||
// AutoGPTQ
|
|
||||||
string Device = 22;
|
|
||||||
bool UseTriton = 23;
|
|
||||||
string ModelBaseName = 24;
|
|
||||||
bool UseFastTokenizer = 25;
|
|
||||||
|
|
||||||
// Diffusers
|
// Diffusers
|
||||||
string PipelineType = 26;
|
string PipelineType = 26;
|
||||||
|
@ -305,6 +302,19 @@ message GenerateImageRequest {
|
||||||
int32 CLIPSkip = 11;
|
int32 CLIPSkip = 11;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message GenerateVideoRequest {
|
||||||
|
string prompt = 1;
|
||||||
|
string start_image = 2; // Path or base64 encoded image for the start frame
|
||||||
|
string end_image = 3; // Path or base64 encoded image for the end frame
|
||||||
|
int32 width = 4;
|
||||||
|
int32 height = 5;
|
||||||
|
int32 num_frames = 6; // Number of frames to generate
|
||||||
|
int32 fps = 7; // Frames per second
|
||||||
|
int32 seed = 8;
|
||||||
|
float cfg_scale = 9; // Classifier-free guidance scale
|
||||||
|
string dst = 10; // Output path for the generated video
|
||||||
|
}
|
||||||
|
|
||||||
message TTSRequest {
|
message TTSRequest {
|
||||||
string text = 1;
|
string text = 1;
|
||||||
string model = 2;
|
string model = 2;
|
||||||
|
|
|
@ -1,17 +1,17 @@
|
||||||
|
|
||||||
## XXX: In some versions of CMake clip wasn't being built before llama.
|
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||||
## This is an hack for now, but it should be fixed in the future.
|
## This is an hack for now, but it should be fixed in the future.
|
||||||
set(TARGET myclip)
|
# set(TARGET myclip)
|
||||||
add_library(${TARGET} clip.cpp clip.h llava.cpp llava.h)
|
# add_library(${TARGET} clip.cpp clip.h clip-impl.h llava.cpp llava.h)
|
||||||
install(TARGETS ${TARGET} LIBRARY)
|
# install(TARGETS ${TARGET} LIBRARY)
|
||||||
target_include_directories(myclip PUBLIC .)
|
# target_include_directories(myclip PUBLIC .)
|
||||||
target_include_directories(myclip PUBLIC ../..)
|
# target_include_directories(myclip PUBLIC ../..)
|
||||||
target_include_directories(myclip PUBLIC ../../common)
|
# target_include_directories(myclip PUBLIC ../../common)
|
||||||
target_link_libraries(${TARGET} PRIVATE common ggml llama ${CMAKE_THREAD_LIBS_INIT})
|
# target_link_libraries(${TARGET} PRIVATE common ggml llama ${CMAKE_THREAD_LIBS_INIT})
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
# target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||||
if (NOT MSVC)
|
# if (NOT MSVC)
|
||||||
target_compile_options(${TARGET} PRIVATE -Wno-cast-qual) # stb_image.h
|
# target_compile_options(${TARGET} PRIVATE -Wno-cast-qual) # stb_image.h
|
||||||
endif()
|
# endif()
|
||||||
# END CLIP hack
|
# END CLIP hack
|
||||||
|
|
||||||
|
|
||||||
|
@ -74,8 +74,12 @@ add_library(hw_grpc_proto
|
||||||
${hw_proto_srcs}
|
${hw_proto_srcs}
|
||||||
${hw_proto_hdrs} )
|
${hw_proto_hdrs} )
|
||||||
|
|
||||||
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp)
|
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp httplib.h)
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
|
||||||
|
target_include_directories(${TARGET} PRIVATE ../llava)
|
||||||
|
target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
|
||||||
|
|
||||||
|
target_link_libraries(${TARGET} PRIVATE common llama mtmd ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||||
absl::flags_parse
|
absl::flags_parse
|
||||||
gRPC::${_REFLECTION}
|
gRPC::${_REFLECTION}
|
||||||
gRPC::${_GRPC_GRPCPP}
|
gRPC::${_GRPC_GRPCPP}
|
||||||
|
|
|
@ -8,7 +8,7 @@ ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||||
TARGET?=--target grpc-server
|
TARGET?=--target grpc-server
|
||||||
|
|
||||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
|
||||||
|
|
||||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
|
@ -36,11 +36,18 @@ else ifeq ($(OS),Darwin)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||||
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
|
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||||
|
-DCMAKE_C_COMPILER=icx \
|
||||||
|
-DCMAKE_CXX_COMPILER=icpx \
|
||||||
|
-DCMAKE_CXX_FLAGS="-fsycl" \
|
||||||
|
-DGGML_SYCL_F16=ON
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||||
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||||
|
-DCMAKE_C_COMPILER=icx \
|
||||||
|
-DCMAKE_CXX_COMPILER=icpx \
|
||||||
|
-DCMAKE_CXX_FLAGS="-fsycl"
|
||||||
endif
|
endif
|
||||||
|
|
||||||
llama.cpp:
|
llama.cpp:
|
||||||
|
@ -52,8 +59,8 @@ llama.cpp:
|
||||||
git checkout -b build $(LLAMA_VERSION) && \
|
git checkout -b build $(LLAMA_VERSION) && \
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
llama.cpp/examples/grpc-server: llama.cpp
|
llama.cpp/tools/grpc-server: llama.cpp
|
||||||
mkdir -p llama.cpp/examples/grpc-server
|
mkdir -p llama.cpp/tools/grpc-server
|
||||||
bash prepare.sh
|
bash prepare.sh
|
||||||
|
|
||||||
rebuild:
|
rebuild:
|
||||||
|
@ -63,13 +70,13 @@ rebuild:
|
||||||
|
|
||||||
purge:
|
purge:
|
||||||
rm -rf llama.cpp/build
|
rm -rf llama.cpp/build
|
||||||
rm -rf llama.cpp/examples/grpc-server
|
rm -rf llama.cpp/tools/grpc-server
|
||||||
rm -rf grpc-server
|
rm -rf grpc-server
|
||||||
|
|
||||||
clean: purge
|
clean: purge
|
||||||
rm -rf llama.cpp
|
rm -rf llama.cpp
|
||||||
|
|
||||||
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
grpc-server: llama.cpp llama.cpp/tools/grpc-server
|
||||||
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||||
+bash -c "source $(ONEAPI_VARS); \
|
+bash -c "source $(ONEAPI_VARS); \
|
||||||
|
|
File diff suppressed because it is too large
Load diff
24596
backend/cpp/llama/json.hpp
vendored
24596
backend/cpp/llama/json.hpp
vendored
File diff suppressed because it is too large
Load diff
|
@ -1,7 +1,7 @@
|
||||||
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
|
||||||
index 3cd0d2fa..6c5e811a 100644
|
index 3cd0d2fa..6c5e811a 100644
|
||||||
--- a/examples/llava/clip.cpp
|
--- a/tools/mtmd/clip.cpp
|
||||||
+++ b/examples/llava/clip.cpp
|
+++ b/tools/mtmd/clip.cpp
|
||||||
@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||||
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
||||||
int* patches_data = (int*)malloc(ggml_nbytes(patches));
|
int* patches_data = (int*)malloc(ggml_nbytes(patches));
|
||||||
|
|
|
@ -7,21 +7,46 @@ for patch in $(ls patches); do
|
||||||
patch -d llama.cpp/ -p1 < patches/$patch
|
patch -d llama.cpp/ -p1 < patches/$patch
|
||||||
done
|
done
|
||||||
|
|
||||||
cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
|
set -e
|
||||||
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
|
|
||||||
cp -rfv json.hpp llama.cpp/examples/grpc-server/
|
|
||||||
cp -rfv utils.hpp llama.cpp/examples/grpc-server/
|
|
||||||
|
|
||||||
if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then
|
cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
|
||||||
|
cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
|
||||||
|
cp -rfv llama.cpp/common/json.hpp llama.cpp/tools/grpc-server/
|
||||||
|
cp -rfv llama.cpp/tools/server/utils.hpp llama.cpp/tools/grpc-server/
|
||||||
|
cp -rfv llama.cpp/tools/server/httplib.h llama.cpp/tools/grpc-server/
|
||||||
|
|
||||||
|
set +e
|
||||||
|
if grep -q "grpc-server" llama.cpp/tools/CMakeLists.txt; then
|
||||||
echo "grpc-server already added"
|
echo "grpc-server already added"
|
||||||
else
|
else
|
||||||
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
|
echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
|
||||||
fi
|
fi
|
||||||
|
set -e
|
||||||
|
|
||||||
## XXX: In some versions of CMake clip wasn't being built before llama.
|
# Now to keep maximum compatibility with the original server.cpp, we need to remove the index.html.gz.hpp and loading.html.hpp includes
|
||||||
## This is an hack for now, but it should be fixed in the future.
|
# and remove the main function
|
||||||
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
# TODO: upstream this to the original server.cpp by extracting the upstream main function to a separate file
|
||||||
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
|
awk '
|
||||||
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
|
/int[ \t]+main[ \t]*\(/ { # If the line starts the main function
|
||||||
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
|
in_main=1; # Set a flag
|
||||||
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
open_braces=0; # Track number of open braces
|
||||||
|
}
|
||||||
|
in_main {
|
||||||
|
open_braces += gsub(/\{/, "{"); # Count opening braces
|
||||||
|
open_braces -= gsub(/\}/, "}"); # Count closing braces
|
||||||
|
if (open_braces == 0) { # If all braces are closed
|
||||||
|
in_main=0; # End skipping
|
||||||
|
}
|
||||||
|
next; # Skip lines inside main
|
||||||
|
}
|
||||||
|
!in_main # Print lines not inside main
|
||||||
|
' "llama.cpp/tools/server/server.cpp" > llama.cpp/tools/grpc-server/server.cpp
|
||||||
|
|
||||||
|
# remove index.html.gz.hpp and loading.html.hpp includes
|
||||||
|
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||||
|
# macOS
|
||||||
|
sed -i '' '/#include "index\.html\.gz\.hpp"/d; /#include "loading\.html\.hpp"/d' llama.cpp/tools/grpc-server/server.cpp
|
||||||
|
else
|
||||||
|
# Linux and others
|
||||||
|
sed -i '/#include "index\.html\.gz\.hpp"/d; /#include "loading\.html\.hpp"/d' llama.cpp/tools/grpc-server/server.cpp
|
||||||
|
fi
|
483
backend/cpp/llama/utils.hpp
vendored
483
backend/cpp/llama/utils.hpp
vendored
|
@ -1,483 +0,0 @@
|
||||||
// https://github.com/ggerganov/llama.cpp/blob/master/examples/server/utils.hpp
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
#include <set>
|
|
||||||
#include <mutex>
|
|
||||||
#include <condition_variable>
|
|
||||||
#include <unordered_map>
|
|
||||||
|
|
||||||
#include "json.hpp"
|
|
||||||
|
|
||||||
#include "../llava/clip.h"
|
|
||||||
|
|
||||||
using json = nlohmann::json;
|
|
||||||
|
|
||||||
extern bool server_verbose;
|
|
||||||
|
|
||||||
#ifndef SERVER_VERBOSE
|
|
||||||
#define SERVER_VERBOSE 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if SERVER_VERBOSE != 1
|
|
||||||
#define LOG_VERBOSE(MSG, ...)
|
|
||||||
#else
|
|
||||||
#define LOG_VERBOSE(MSG, ...) \
|
|
||||||
do \
|
|
||||||
{ \
|
|
||||||
if (server_verbose) \
|
|
||||||
{ \
|
|
||||||
server_log("VERBOSE", __func__, __LINE__, MSG, __VA_ARGS__); \
|
|
||||||
} \
|
|
||||||
} while (0)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define LOG_ERROR( MSG, ...) server_log("ERROR", __func__, __LINE__, MSG, __VA_ARGS__)
|
|
||||||
#define LOG_WARNING(MSG, ...) server_log("WARNING", __func__, __LINE__, MSG, __VA_ARGS__)
|
|
||||||
#define LOG_INFO( MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__)
|
|
||||||
|
|
||||||
//
|
|
||||||
// parallel
|
|
||||||
//
|
|
||||||
|
|
||||||
enum server_state {
|
|
||||||
SERVER_STATE_LOADING_MODEL, // Server is starting up, model not fully loaded yet
|
|
||||||
SERVER_STATE_READY, // Server is ready and model is loaded
|
|
||||||
SERVER_STATE_ERROR // An error occurred, load_model failed
|
|
||||||
};
|
|
||||||
|
|
||||||
enum task_type {
|
|
||||||
TASK_TYPE_COMPLETION,
|
|
||||||
TASK_TYPE_CANCEL,
|
|
||||||
TASK_TYPE_NEXT_RESPONSE
|
|
||||||
};
|
|
||||||
|
|
||||||
struct task_server {
|
|
||||||
int id = -1; // to be filled by llama_server_queue
|
|
||||||
int target_id;
|
|
||||||
task_type type;
|
|
||||||
json data;
|
|
||||||
bool infill_mode = false;
|
|
||||||
bool embedding_mode = false;
|
|
||||||
int multitask_id = -1;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct task_result {
|
|
||||||
int id;
|
|
||||||
int multitask_id = -1;
|
|
||||||
bool stop;
|
|
||||||
bool error;
|
|
||||||
json result_json;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct task_multi {
|
|
||||||
int id;
|
|
||||||
std::set<int> subtasks_remaining{};
|
|
||||||
std::vector<task_result> results{};
|
|
||||||
};
|
|
||||||
|
|
||||||
// TODO: can become bool if we can't find use of more states
|
|
||||||
enum slot_state
|
|
||||||
{
|
|
||||||
IDLE,
|
|
||||||
PROCESSING,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum slot_command
|
|
||||||
{
|
|
||||||
NONE,
|
|
||||||
LOAD_PROMPT,
|
|
||||||
RELEASE,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct slot_params
|
|
||||||
{
|
|
||||||
bool stream = true;
|
|
||||||
bool cache_prompt = false; // remember the prompt to avoid reprocessing all prompt
|
|
||||||
|
|
||||||
uint32_t seed = -1; // RNG seed
|
|
||||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
|
||||||
int32_t n_predict = -1; // new tokens to predict
|
|
||||||
|
|
||||||
std::vector<std::string> antiprompt;
|
|
||||||
|
|
||||||
json input_prefix;
|
|
||||||
json input_suffix;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct slot_image
|
|
||||||
{
|
|
||||||
int32_t id;
|
|
||||||
|
|
||||||
bool request_encode_image = false;
|
|
||||||
float * image_embedding = nullptr;
|
|
||||||
int32_t image_tokens = 0;
|
|
||||||
|
|
||||||
clip_image_u8 * img_data;
|
|
||||||
|
|
||||||
std::string prefix_prompt; // before of this image
|
|
||||||
};
|
|
||||||
|
|
||||||
// completion token output with probabilities
|
|
||||||
struct completion_token_output
|
|
||||||
{
|
|
||||||
struct token_prob
|
|
||||||
{
|
|
||||||
llama_token tok;
|
|
||||||
float prob;
|
|
||||||
};
|
|
||||||
|
|
||||||
std::vector<token_prob> probs;
|
|
||||||
llama_token tok;
|
|
||||||
std::string text_to_send;
|
|
||||||
};
|
|
||||||
|
|
||||||
static inline void server_log(const char *level, const char *function, int line,
|
|
||||||
const char *message, const nlohmann::ordered_json &extra)
|
|
||||||
{
|
|
||||||
nlohmann::ordered_json log
|
|
||||||
{
|
|
||||||
{"timestamp", time(nullptr)},
|
|
||||||
{"level", level},
|
|
||||||
{"function", function},
|
|
||||||
{"line", line},
|
|
||||||
{"message", message},
|
|
||||||
};
|
|
||||||
|
|
||||||
if (!extra.empty())
|
|
||||||
{
|
|
||||||
log.merge_patch(extra);
|
|
||||||
}
|
|
||||||
|
|
||||||
const std::string str = log.dump(-1, ' ', false, json::error_handler_t::replace);
|
|
||||||
printf("%.*s\n", (int)str.size(), str.data());
|
|
||||||
fflush(stdout);
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// server utils
|
|
||||||
//
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
static T json_value(const json &body, const std::string &key, const T &default_value)
|
|
||||||
{
|
|
||||||
// Fallback null to default value
|
|
||||||
return body.contains(key) && !body.at(key).is_null()
|
|
||||||
? body.value(key, default_value)
|
|
||||||
: default_value;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline std::string format_chatml(std::vector<json> messages)
|
|
||||||
{
|
|
||||||
std::ostringstream chatml_msgs;
|
|
||||||
|
|
||||||
for (auto it = messages.begin(); it != messages.end(); ++it) {
|
|
||||||
chatml_msgs << "<|im_start|>"
|
|
||||||
<< json_value(*it, "role", std::string("user")) << '\n';
|
|
||||||
chatml_msgs << json_value(*it, "content", std::string(""))
|
|
||||||
<< "<|im_end|>\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
chatml_msgs << "<|im_start|>assistant" << '\n';
|
|
||||||
|
|
||||||
return chatml_msgs.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// work queue utils
|
|
||||||
//
|
|
||||||
|
|
||||||
struct llama_server_queue {
|
|
||||||
int id = 0;
|
|
||||||
std::mutex mutex_tasks;
|
|
||||||
// queues
|
|
||||||
std::vector<task_server> queue_tasks;
|
|
||||||
std::vector<task_server> queue_tasks_deferred;
|
|
||||||
std::vector<task_multi> queue_multitasks;
|
|
||||||
std::condition_variable condition_tasks;
|
|
||||||
// callback functions
|
|
||||||
std::function<void(task_server&)> callback_new_task;
|
|
||||||
std::function<void(task_multi&)> callback_finish_multitask;
|
|
||||||
std::function<void(void)> callback_all_task_finished;
|
|
||||||
|
|
||||||
// Add a new task to the end of the queue
|
|
||||||
int post(task_server task) {
|
|
||||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
|
||||||
if (task.id == -1) {
|
|
||||||
task.id = id++;
|
|
||||||
}
|
|
||||||
queue_tasks.push_back(std::move(task));
|
|
||||||
condition_tasks.notify_one();
|
|
||||||
return task.id;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add a new task, but defer until one slot is available
|
|
||||||
void defer(task_server task) {
|
|
||||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
|
||||||
queue_tasks_deferred.push_back(std::move(task));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the next id for creating anew task
|
|
||||||
int get_new_id() {
|
|
||||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
|
||||||
return id++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Register function to process a new task
|
|
||||||
void on_new_task(std::function<void(task_server&)> callback) {
|
|
||||||
callback_new_task = callback;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Register function to process a multitask
|
|
||||||
void on_finish_multitask(std::function<void(task_multi&)> callback) {
|
|
||||||
callback_finish_multitask = callback;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Register the function to be called when the batch of tasks is finished
|
|
||||||
void on_all_tasks_finished(std::function<void(void)> callback) {
|
|
||||||
callback_all_task_finished = callback;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Call when the state of one slot is changed
|
|
||||||
void notify_slot_changed() {
|
|
||||||
// move deferred tasks back to main loop
|
|
||||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
|
||||||
for (auto & task : queue_tasks_deferred) {
|
|
||||||
queue_tasks.push_back(std::move(task));
|
|
||||||
}
|
|
||||||
queue_tasks_deferred.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start the main loop. This call is blocking
|
|
||||||
[[noreturn]]
|
|
||||||
void start_loop() {
|
|
||||||
while (true) {
|
|
||||||
// new task arrived
|
|
||||||
LOG_VERBOSE("have new task", {});
|
|
||||||
{
|
|
||||||
while (true)
|
|
||||||
{
|
|
||||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
|
||||||
if (queue_tasks.empty()) {
|
|
||||||
lock.unlock();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
task_server task = queue_tasks.front();
|
|
||||||
queue_tasks.erase(queue_tasks.begin());
|
|
||||||
lock.unlock();
|
|
||||||
LOG_VERBOSE("callback_new_task", {});
|
|
||||||
callback_new_task(task);
|
|
||||||
}
|
|
||||||
LOG_VERBOSE("callback_all_task_finished", {});
|
|
||||||
// process and update all the multitasks
|
|
||||||
auto queue_iterator = queue_multitasks.begin();
|
|
||||||
while (queue_iterator != queue_multitasks.end())
|
|
||||||
{
|
|
||||||
if (queue_iterator->subtasks_remaining.empty())
|
|
||||||
{
|
|
||||||
// all subtasks done == multitask is done
|
|
||||||
task_multi current_multitask = *queue_iterator;
|
|
||||||
callback_finish_multitask(current_multitask);
|
|
||||||
// remove this multitask
|
|
||||||
queue_iterator = queue_multitasks.erase(queue_iterator);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
++queue_iterator;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// all tasks in the current loop is finished
|
|
||||||
callback_all_task_finished();
|
|
||||||
}
|
|
||||||
LOG_VERBOSE("wait for new task", {});
|
|
||||||
// wait for new task
|
|
||||||
{
|
|
||||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
|
||||||
if (queue_tasks.empty()) {
|
|
||||||
condition_tasks.wait(lock, [&]{
|
|
||||||
return !queue_tasks.empty();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// functions to manage multitasks
|
|
||||||
//
|
|
||||||
|
|
||||||
// add a multitask by specifying the id of all subtask (subtask is a task_server)
|
|
||||||
void add_multitask(int multitask_id, std::vector<int>& sub_ids)
|
|
||||||
{
|
|
||||||
std::lock_guard<std::mutex> lock(mutex_tasks);
|
|
||||||
task_multi multi;
|
|
||||||
multi.id = multitask_id;
|
|
||||||
std::copy(sub_ids.begin(), sub_ids.end(), std::inserter(multi.subtasks_remaining, multi.subtasks_remaining.end()));
|
|
||||||
queue_multitasks.push_back(multi);
|
|
||||||
}
|
|
||||||
|
|
||||||
// updatethe remaining subtasks, while appending results to multitask
|
|
||||||
void update_multitask(int multitask_id, int subtask_id, task_result& result)
|
|
||||||
{
|
|
||||||
std::lock_guard<std::mutex> lock(mutex_tasks);
|
|
||||||
for (auto& multitask : queue_multitasks)
|
|
||||||
{
|
|
||||||
if (multitask.id == multitask_id)
|
|
||||||
{
|
|
||||||
multitask.subtasks_remaining.erase(subtask_id);
|
|
||||||
multitask.results.push_back(result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct llama_server_response {
|
|
||||||
typedef std::function<void(int, int, task_result&)> callback_multitask_t;
|
|
||||||
callback_multitask_t callback_update_multitask;
|
|
||||||
// for keeping track of all tasks waiting for the result
|
|
||||||
std::set<int> waiting_task_ids;
|
|
||||||
// the main result queue
|
|
||||||
std::vector<task_result> queue_results;
|
|
||||||
std::mutex mutex_results;
|
|
||||||
std::condition_variable condition_results;
|
|
||||||
|
|
||||||
void add_waiting_task_id(int task_id) {
|
|
||||||
std::unique_lock<std::mutex> lock(mutex_results);
|
|
||||||
waiting_task_ids.insert(task_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
void remove_waiting_task_id(int task_id) {
|
|
||||||
std::unique_lock<std::mutex> lock(mutex_results);
|
|
||||||
waiting_task_ids.erase(task_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
// This function blocks the thread until there is a response for this task_id
|
|
||||||
task_result recv(int task_id) {
|
|
||||||
while (true)
|
|
||||||
{
|
|
||||||
std::unique_lock<std::mutex> lock(mutex_results);
|
|
||||||
condition_results.wait(lock, [&]{
|
|
||||||
return !queue_results.empty();
|
|
||||||
});
|
|
||||||
LOG_VERBOSE("condition_results unblock", {});
|
|
||||||
|
|
||||||
for (int i = 0; i < (int) queue_results.size(); i++)
|
|
||||||
{
|
|
||||||
if (queue_results[i].id == task_id)
|
|
||||||
{
|
|
||||||
assert(queue_results[i].multitask_id == -1);
|
|
||||||
task_result res = queue_results[i];
|
|
||||||
queue_results.erase(queue_results.begin() + i);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// should never reach here
|
|
||||||
}
|
|
||||||
|
|
||||||
// Register the function to update multitask
|
|
||||||
void on_multitask_update(callback_multitask_t callback) {
|
|
||||||
callback_update_multitask = callback;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send a new result to a waiting task_id
|
|
||||||
void send(task_result result) {
|
|
||||||
std::unique_lock<std::mutex> lock(mutex_results);
|
|
||||||
LOG_VERBOSE("send new result", {});
|
|
||||||
for (auto& task_id : waiting_task_ids) {
|
|
||||||
// LOG_TEE("waiting task id %i \n", task_id);
|
|
||||||
// for now, tasks that have associated parent multitasks just get erased once multitask picks up the result
|
|
||||||
if (result.multitask_id == task_id)
|
|
||||||
{
|
|
||||||
LOG_VERBOSE("callback_update_multitask", {});
|
|
||||||
callback_update_multitask(task_id, result.id, result);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (result.id == task_id)
|
|
||||||
{
|
|
||||||
LOG_VERBOSE("queue_results.push_back", {});
|
|
||||||
queue_results.push_back(result);
|
|
||||||
condition_results.notify_one();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
//
|
|
||||||
// base64 utils (TODO: move to common in the future)
|
|
||||||
//
|
|
||||||
|
|
||||||
static const std::string base64_chars =
|
|
||||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
||||||
"abcdefghijklmnopqrstuvwxyz"
|
|
||||||
"0123456789+/";
|
|
||||||
|
|
||||||
static inline bool is_base64(uint8_t c)
|
|
||||||
{
|
|
||||||
return (isalnum(c) || (c == '+') || (c == '/'));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline std::vector<uint8_t> base64_decode(const std::string & encoded_string)
|
|
||||||
{
|
|
||||||
int i = 0;
|
|
||||||
int j = 0;
|
|
||||||
int in_ = 0;
|
|
||||||
|
|
||||||
int in_len = encoded_string.size();
|
|
||||||
|
|
||||||
uint8_t char_array_4[4];
|
|
||||||
uint8_t char_array_3[3];
|
|
||||||
|
|
||||||
std::vector<uint8_t> ret;
|
|
||||||
|
|
||||||
while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_]))
|
|
||||||
{
|
|
||||||
char_array_4[i++] = encoded_string[in_]; in_++;
|
|
||||||
if (i == 4)
|
|
||||||
{
|
|
||||||
for (i = 0; i <4; i++)
|
|
||||||
{
|
|
||||||
char_array_4[i] = base64_chars.find(char_array_4[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4);
|
|
||||||
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
|
|
||||||
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
|
|
||||||
|
|
||||||
for (i = 0; (i < 3); i++)
|
|
||||||
{
|
|
||||||
ret.push_back(char_array_3[i]);
|
|
||||||
}
|
|
||||||
i = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (i)
|
|
||||||
{
|
|
||||||
for (j = i; j <4; j++)
|
|
||||||
{
|
|
||||||
char_array_4[j] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (j = 0; j <4; j++)
|
|
||||||
{
|
|
||||||
char_array_4[j] = base64_chars.find(char_array_4[j]);
|
|
||||||
}
|
|
||||||
|
|
||||||
char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4);
|
|
||||||
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
|
|
||||||
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
|
|
||||||
|
|
||||||
for (j = 0; (j < i - 1); j++)
|
|
||||||
{
|
|
||||||
ret.push_back(char_array_3[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
|
@ -48,7 +48,7 @@ int tts(char *text,int threads, char *dst ) {
|
||||||
|
|
||||||
// generate audio
|
// generate audio
|
||||||
if (!bark_generate_audio(c, text, threads)) {
|
if (!bark_generate_audio(c, text, threads)) {
|
||||||
fprintf(stderr, "%s: An error occured. If the problem persists, feel free to open an issue to report it.\n", __func__);
|
fprintf(stderr, "%s: An error occurred. If the problem persists, feel free to open an issue to report it.\n", __func__);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,12 +8,19 @@ ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||||
# keep standard at C11 and C++11
|
# keep standard at C11 and C++11
|
||||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
||||||
|
|
||||||
|
GOCMD?=go
|
||||||
|
CGO_LDFLAGS?=
|
||||||
|
# Avoid parent make file overwriting CGO_LDFLAGS which is needed for hipblas
|
||||||
|
CGO_LDFLAGS_SYCL=
|
||||||
|
GO_TAGS?=
|
||||||
|
LD_FLAGS?=
|
||||||
|
|
||||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
|
||||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
CMAKE_ARGS+=-DGGML_CUDA=ON
|
CMAKE_ARGS+=-DSD_CUDA=ON
|
||||||
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
# to CMAKE_ARGS automatically
|
# to CMAKE_ARGS automatically
|
||||||
else ifeq ($(BUILD_TYPE),openblas)
|
else ifeq ($(BUILD_TYPE),openblas)
|
||||||
|
@ -23,29 +30,48 @@ else ifeq ($(BUILD_TYPE),clblas)
|
||||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||||
else ifeq ($(BUILD_TYPE),hipblas)
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
CMAKE_ARGS+=-DGGML_HIP=ON
|
CMAKE_ARGS+=-DSD_HIPBLAS=ON
|
||||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
||||||
# But if it's OSX without metal, disable it here
|
# But if it's OSX without metal, disable it here
|
||||||
else ifeq ($(OS),Darwin)
|
else ifeq ($(OS),Darwin)
|
||||||
ifneq ($(BUILD_TYPE),metal)
|
ifneq ($(BUILD_TYPE),metal)
|
||||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
CMAKE_ARGS+=-DSD_METAL=OFF
|
||||||
else
|
else
|
||||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
CMAKE_ARGS+=-DSD_METAL=ON
|
||||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||||
TARGET+=--target ggml-metal
|
TARGET+=--target ggml-metal
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# ifeq ($(BUILD_TYPE),sycl_f16)
|
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||||
# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DSD_SYCL=ON -DGGML_SYCL_F16=ON
|
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||||
# endif
|
-DCMAKE_C_COMPILER=icx \
|
||||||
|
-DCMAKE_CXX_COMPILER=icpx \
|
||||||
|
-DSD_SYCL=ON \
|
||||||
|
-DGGML_SYCL_F16=ON
|
||||||
|
CC=icx
|
||||||
|
CXX=icpx
|
||||||
|
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
|
||||||
|
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
|
||||||
|
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
|
||||||
|
CGO_CXXFLAGS += $(shell pkg-config --cflags mkl-static-lp64-gomp )
|
||||||
|
endif
|
||||||
|
|
||||||
# ifeq ($(BUILD_TYPE),sycl_f32)
|
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||||
# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DSD_SYCL=ON
|
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||||
# endif
|
-DCMAKE_C_COMPILER=icx \
|
||||||
|
-DCMAKE_CXX_COMPILER=icpx \
|
||||||
|
-DSD_SYCL=ON
|
||||||
|
CC=icx
|
||||||
|
CXX=icpx
|
||||||
|
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
|
||||||
|
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
|
||||||
|
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
|
||||||
|
CGO_CXXFLAGS += $(shell pkg-config --cflags mkl-static-lp64-gomp )
|
||||||
|
endif
|
||||||
|
|
||||||
# warnings
|
# warnings
|
||||||
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
# CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
||||||
|
|
||||||
# Find all .a archives in ARCHIVE_DIR
|
# Find all .a archives in ARCHIVE_DIR
|
||||||
# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive)
|
# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive)
|
||||||
|
@ -86,11 +112,24 @@ endif
|
||||||
$(MAKE) $(COMBINED_LIB)
|
$(MAKE) $(COMBINED_LIB)
|
||||||
|
|
||||||
gosd.o:
|
gosd.o:
|
||||||
|
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||||
|
+bash -c "source $(ONEAPI_VARS); \
|
||||||
|
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c"
|
||||||
|
else
|
||||||
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
|
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
|
||||||
|
endif
|
||||||
|
|
||||||
libsd.a: gosd.o
|
libsd.a: gosd.o
|
||||||
cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a
|
cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a
|
||||||
$(AR) rcs libsd.a gosd.o
|
$(AR) rcs libsd.a gosd.o
|
||||||
|
|
||||||
|
stablediffusion-ggml:
|
||||||
|
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_SYCL)" C_INCLUDE_PATH="$(INCLUDE_PATH)" LIBRARY_PATH="$(LIBRARY_PATH)" \
|
||||||
|
CC="$(CC)" CXX="$(CXX)" CGO_CXXFLAGS="$(CGO_CXXFLAGS)" \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o ../../../../backend-assets/grpc/stablediffusion-ggml ./
|
||||||
|
ifneq ($(UPX),)
|
||||||
|
$(UPX) ../../../../backend-assets/grpc/stablediffusion-ggml
|
||||||
|
endif
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf gosd.o libsd.a build $(COMBINED_LIB)
|
rm -rf gosd.o libsd.a build $(COMBINED_LIB)
|
|
@ -74,7 +74,7 @@ func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (pb.Transcript
|
||||||
context.SetTranslate(true)
|
context.SetTranslate(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := context.Process(data, nil, nil); err != nil {
|
if err := context.Process(data, nil, nil, nil); err != nil {
|
||||||
return pb.TranscriptResult{}, err
|
return pb.TranscriptResult{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,17 +0,0 @@
|
||||||
.PHONY: autogptq
|
|
||||||
autogptq: protogen
|
|
||||||
bash install.sh
|
|
||||||
|
|
||||||
.PHONY: protogen
|
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
.PHONY: protogen-clean
|
|
||||||
protogen-clean:
|
|
||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean: protogen-clean
|
|
||||||
rm -rf venv __pycache__
|
|
|
@ -1,5 +0,0 @@
|
||||||
# Creating a separate environment for the autogptq project
|
|
||||||
|
|
||||||
```
|
|
||||||
make autogptq
|
|
||||||
```
|
|
|
@ -1,153 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
from concurrent import futures
|
|
||||||
import argparse
|
|
||||||
import signal
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
import base64
|
|
||||||
|
|
||||||
import grpc
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
|
|
||||||
from auto_gptq import AutoGPTQForCausalLM
|
|
||||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
||||||
from transformers import TextGenerationPipeline
|
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
|
||||||
|
|
||||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
|
||||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
|
||||||
|
|
||||||
# Implement the BackendServicer class with the service methods
|
|
||||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|
||||||
def Health(self, request, context):
|
|
||||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
try:
|
|
||||||
device = "cuda:0"
|
|
||||||
if request.Device != "":
|
|
||||||
device = request.Device
|
|
||||||
|
|
||||||
# support loading local model files
|
|
||||||
model_path = os.path.join(os.environ.get('MODELS_PATH', './'), request.Model)
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True, trust_remote_code=request.TrustRemoteCode)
|
|
||||||
|
|
||||||
# support model `Qwen/Qwen-VL-Chat-Int4`
|
|
||||||
if "qwen-vl" in request.Model.lower():
|
|
||||||
self.model_name = "Qwen-VL-Chat"
|
|
||||||
model = AutoModelForCausalLM.from_pretrained(model_path,
|
|
||||||
trust_remote_code=request.TrustRemoteCode,
|
|
||||||
device_map="auto").eval()
|
|
||||||
else:
|
|
||||||
model = AutoGPTQForCausalLM.from_quantized(model_path,
|
|
||||||
model_basename=request.ModelBaseName,
|
|
||||||
use_safetensors=True,
|
|
||||||
trust_remote_code=request.TrustRemoteCode,
|
|
||||||
device=device,
|
|
||||||
use_triton=request.UseTriton,
|
|
||||||
quantize_config=None)
|
|
||||||
|
|
||||||
self.model = model
|
|
||||||
self.tokenizer = tokenizer
|
|
||||||
except Exception as err:
|
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
penalty = 1.0
|
|
||||||
if request.Penalty != 0.0:
|
|
||||||
penalty = request.Penalty
|
|
||||||
tokens = 512
|
|
||||||
if request.Tokens != 0:
|
|
||||||
tokens = request.Tokens
|
|
||||||
top_p = 0.95
|
|
||||||
if request.TopP != 0.0:
|
|
||||||
top_p = request.TopP
|
|
||||||
|
|
||||||
|
|
||||||
prompt_images = self.recompile_vl_prompt(request)
|
|
||||||
compiled_prompt = prompt_images[0]
|
|
||||||
print(f"Prompt: {compiled_prompt}", file=sys.stderr)
|
|
||||||
|
|
||||||
# Implement Predict RPC
|
|
||||||
pipeline = TextGenerationPipeline(
|
|
||||||
model=self.model,
|
|
||||||
tokenizer=self.tokenizer,
|
|
||||||
max_new_tokens=tokens,
|
|
||||||
temperature=request.Temperature,
|
|
||||||
top_p=top_p,
|
|
||||||
repetition_penalty=penalty,
|
|
||||||
)
|
|
||||||
t = pipeline(compiled_prompt)[0]["generated_text"]
|
|
||||||
print(f"generated_text: {t}", file=sys.stderr)
|
|
||||||
|
|
||||||
if compiled_prompt in t:
|
|
||||||
t = t.replace(compiled_prompt, "")
|
|
||||||
# house keeping. Remove the image files from /tmp folder
|
|
||||||
for img_path in prompt_images[1]:
|
|
||||||
try:
|
|
||||||
os.remove(img_path)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error removing image file: {img_path}, {e}", file=sys.stderr)
|
|
||||||
|
|
||||||
return backend_pb2.Result(message=bytes(t, encoding='utf-8'))
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
# Implement PredictStream RPC
|
|
||||||
#for reply in some_data_generator():
|
|
||||||
# yield reply
|
|
||||||
# Not implemented yet
|
|
||||||
return self.Predict(request, context)
|
|
||||||
|
|
||||||
def recompile_vl_prompt(self, request):
|
|
||||||
prompt = request.Prompt
|
|
||||||
image_paths = []
|
|
||||||
|
|
||||||
if "qwen-vl" in self.model_name.lower():
|
|
||||||
# request.Images is an array which contains base64 encoded images. Iterate the request.Images array, decode and save each image to /tmp folder with a random filename.
|
|
||||||
# Then, save the image file paths to an array "image_paths".
|
|
||||||
# read "request.Prompt", replace "[img-%d]" with the image file paths in the order they appear in "image_paths". Save the new prompt to "prompt".
|
|
||||||
for i, img in enumerate(request.Images):
|
|
||||||
timestamp = str(int(time.time() * 1000)) # Generate timestamp
|
|
||||||
img_path = f"/tmp/vl-{timestamp}.jpg" # Use timestamp in filename
|
|
||||||
with open(img_path, "wb") as f:
|
|
||||||
f.write(base64.b64decode(img))
|
|
||||||
image_paths.append(img_path)
|
|
||||||
prompt = prompt.replace(f"[img-{i}]", "<img>" + img_path + "</img>,")
|
|
||||||
else:
|
|
||||||
prompt = request.Prompt
|
|
||||||
return (prompt, image_paths)
|
|
||||||
|
|
||||||
def serve(address):
|
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
|
||||||
server.add_insecure_port(address)
|
|
||||||
server.start()
|
|
||||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
|
||||||
|
|
||||||
# Define the signal handler function
|
|
||||||
def signal_handler(sig, frame):
|
|
||||||
print("Received termination signal. Shutting down...")
|
|
||||||
server.stop(0)
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
# Set the signal handlers for SIGINT and SIGTERM
|
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
|
||||||
signal.signal(signal.SIGTERM, signal_handler)
|
|
||||||
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
server.stop(0)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
|
||||||
parser.add_argument(
|
|
||||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
serve(args.addr)
|
|
|
@ -1,14 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
|
||||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
|
||||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
|
||||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
|
||||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
|
||||||
fi
|
|
||||||
|
|
||||||
installRequirements
|
|
|
@ -1,2 +0,0 @@
|
||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
|
||||||
torch==2.4.1+cu118
|
|
|
@ -1 +0,0 @@
|
||||||
torch==2.4.1
|
|
|
@ -1,2 +0,0 @@
|
||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
|
||||||
torch==2.4.1+rocm6.0
|
|
|
@ -1,6 +0,0 @@
|
||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
|
||||||
torch==2.3.1+cxx11.abi
|
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
optimum[openvino]
|
|
||||||
setuptools
|
|
|
@ -1,6 +0,0 @@
|
||||||
accelerate
|
|
||||||
auto-gptq==0.7.1
|
|
||||||
grpcio==1.71.0
|
|
||||||
protobuf
|
|
||||||
certifi
|
|
||||||
transformers
|
|
|
@ -1,4 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
startBackend $@
|
|
|
@ -1,6 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
runUnittests
|
|
|
@ -61,7 +61,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
return backend_pb2.Result(success=True)
|
return backend_pb2.Result(success=True)
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
bark==0.1.5
|
bark==0.1.5
|
||||||
grpcio==1.71.0
|
grpcio==1.72.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
|
@ -1,3 +1,3 @@
|
||||||
grpcio==1.71.0
|
grpcio==1.72.0
|
||||||
protobuf
|
protobuf
|
||||||
grpcio-tools
|
grpcio-tools
|
|
@ -86,7 +86,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
return backend_pb2.Result(success=True)
|
return backend_pb2.Result(success=True)
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
grpcio==1.71.0
|
grpcio==1.72.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
packaging==24.1
|
packaging==24.1
|
|
@ -19,7 +19,7 @@ import grpc
|
||||||
|
|
||||||
from diffusers import SanaPipeline, StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
|
from diffusers import SanaPipeline, StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
|
||||||
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
|
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
|
||||||
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline, Lumina2Text2ImgPipeline
|
||||||
from diffusers.pipelines.stable_diffusion import safety_checker
|
from diffusers.pipelines.stable_diffusion import safety_checker
|
||||||
from diffusers.utils import load_image, export_to_video
|
from diffusers.utils import load_image, export_to_video
|
||||||
from compel import Compel, ReturnedEmbeddingsType
|
from compel import Compel, ReturnedEmbeddingsType
|
||||||
|
@ -168,9 +168,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
# We are storing all the options in a dict so we can use it later when
|
# We are storing all the options in a dict so we can use it later when
|
||||||
# generating the images
|
# generating the images
|
||||||
for opt in options:
|
for opt in options:
|
||||||
|
if ":" not in opt:
|
||||||
|
continue
|
||||||
key, value = opt.split(":")
|
key, value = opt.split(":")
|
||||||
self.options[key] = value
|
self.options[key] = value
|
||||||
|
|
||||||
|
print(f"Options: {self.options}", file=sys.stderr)
|
||||||
|
|
||||||
local = False
|
local = False
|
||||||
modelFile = request.Model
|
modelFile = request.Model
|
||||||
|
|
||||||
|
@ -287,6 +291,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
|
|
||||||
if request.LowVRAM:
|
if request.LowVRAM:
|
||||||
self.pipe.enable_model_cpu_offload()
|
self.pipe.enable_model_cpu_offload()
|
||||||
|
elif request.PipelineType == "Lumina2Text2ImgPipeline":
|
||||||
|
self.pipe = Lumina2Text2ImgPipeline.from_pretrained(
|
||||||
|
request.Model,
|
||||||
|
torch_dtype=torch.bfloat16)
|
||||||
|
if request.LowVRAM:
|
||||||
|
self.pipe.enable_model_cpu_offload()
|
||||||
elif request.PipelineType == "SanaPipeline":
|
elif request.PipelineType == "SanaPipeline":
|
||||||
self.pipe = SanaPipeline.from_pretrained(
|
self.pipe = SanaPipeline.from_pretrained(
|
||||||
request.Model,
|
request.Model,
|
||||||
|
@ -516,7 +526,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
|
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
setuptools
|
setuptools
|
||||||
grpcio==1.71.0
|
grpcio==1.72.0
|
||||||
pillow
|
pillow
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
|
|
|
@ -105,7 +105,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
|
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
grpcio==1.71.0
|
grpcio==1.72.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
wheel
|
wheel
|
||||||
|
|
|
@ -62,7 +62,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
return backend_pb2.TranscriptResult(segments=resultSegments, text=text)
|
return backend_pb2.TranscriptResult(segments=resultSegments, text=text)
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
grpcio==1.71.0
|
grpcio==1.72.0
|
||||||
protobuf
|
protobuf
|
||||||
grpcio-tools
|
grpcio-tools
|
|
@ -99,7 +99,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
return backend_pb2.Result(success=True)
|
return backend_pb2.Result(success=True)
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
grpcio==1.71.0
|
grpcio==1.72.0
|
||||||
protobuf
|
protobuf
|
||||||
phonemizer
|
phonemizer
|
||||||
scipy
|
scipy
|
||||||
|
|
|
@ -91,7 +91,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
return backend_pb2.RerankResult(usage=usage, results=results)
|
return backend_pb2.RerankResult(usage=usage, results=results)
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
grpcio==1.71.0
|
grpcio==1.72.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
|
@ -559,7 +559,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
|
|
||||||
async def serve(address):
|
async def serve(address):
|
||||||
# Start asyncio gRPC server
|
# Start asyncio gRPC server
|
||||||
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
# Add the servicer to the server
|
# Add the servicer to the server
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
# Bind the server to the address
|
# Bind the server to the address
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
grpcio==1.71.0
|
grpcio==1.72.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
setuptools
|
setuptools
|
||||||
|
|
|
@ -194,27 +194,40 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
await iterations.aclose()
|
await iterations.aclose()
|
||||||
|
|
||||||
async def _predict(self, request, context, streaming=False):
|
async def _predict(self, request, context, streaming=False):
|
||||||
|
# Build the sampling parameters
|
||||||
|
# NOTE: this must stay in sync with the vllm backend
|
||||||
|
request_to_sampling_params = {
|
||||||
|
"N": "n",
|
||||||
|
"PresencePenalty": "presence_penalty",
|
||||||
|
"FrequencyPenalty": "frequency_penalty",
|
||||||
|
"RepetitionPenalty": "repetition_penalty",
|
||||||
|
"Temperature": "temperature",
|
||||||
|
"TopP": "top_p",
|
||||||
|
"TopK": "top_k",
|
||||||
|
"MinP": "min_p",
|
||||||
|
"Seed": "seed",
|
||||||
|
"StopPrompts": "stop",
|
||||||
|
"StopTokenIds": "stop_token_ids",
|
||||||
|
"BadWords": "bad_words",
|
||||||
|
"IncludeStopStrInOutput": "include_stop_str_in_output",
|
||||||
|
"IgnoreEOS": "ignore_eos",
|
||||||
|
"Tokens": "max_tokens",
|
||||||
|
"MinTokens": "min_tokens",
|
||||||
|
"Logprobs": "logprobs",
|
||||||
|
"PromptLogprobs": "prompt_logprobs",
|
||||||
|
"SkipSpecialTokens": "skip_special_tokens",
|
||||||
|
"SpacesBetweenSpecialTokens": "spaces_between_special_tokens",
|
||||||
|
"TruncatePromptTokens": "truncate_prompt_tokens",
|
||||||
|
"GuidedDecoding": "guided_decoding",
|
||||||
|
}
|
||||||
|
|
||||||
# Build sampling parameters
|
|
||||||
sampling_params = SamplingParams(top_p=0.9, max_tokens=200)
|
sampling_params = SamplingParams(top_p=0.9, max_tokens=200)
|
||||||
if request.TopP != 0:
|
|
||||||
sampling_params.top_p = request.TopP
|
for request_field, param_field in request_to_sampling_params.items():
|
||||||
if request.Tokens > 0:
|
if hasattr(request, request_field):
|
||||||
sampling_params.max_tokens = request.Tokens
|
value = getattr(request, request_field)
|
||||||
if request.Temperature != 0:
|
if value not in (None, 0, [], False):
|
||||||
sampling_params.temperature = request.Temperature
|
setattr(sampling_params, param_field, value)
|
||||||
if request.TopK != 0:
|
|
||||||
sampling_params.top_k = request.TopK
|
|
||||||
if request.PresencePenalty != 0:
|
|
||||||
sampling_params.presence_penalty = request.PresencePenalty
|
|
||||||
if request.FrequencyPenalty != 0:
|
|
||||||
sampling_params.frequency_penalty = request.FrequencyPenalty
|
|
||||||
if request.StopPrompts:
|
|
||||||
sampling_params.stop = request.StopPrompts
|
|
||||||
if request.IgnoreEOS:
|
|
||||||
sampling_params.ignore_eos = request.IgnoreEOS
|
|
||||||
if request.Seed != 0:
|
|
||||||
sampling_params.seed = request.Seed
|
|
||||||
|
|
||||||
# Extract image paths and process images
|
# Extract image paths and process images
|
||||||
prompt = request.Prompt
|
prompt = request.Prompt
|
||||||
|
@ -320,7 +333,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
|
|
||||||
async def serve(address):
|
async def serve(address):
|
||||||
# Start asyncio gRPC server
|
# Start asyncio gRPC server
|
||||||
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
# Add the servicer to the server
|
# Add the servicer to the server
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
# Bind the server to the address
|
# Bind the server to the address
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
grpcio==1.71.0
|
grpcio==1.72.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
setuptools
|
setuptools
|
|
@ -75,6 +75,53 @@ class TestBackendServicer(unittest.TestCase):
|
||||||
finally:
|
finally:
|
||||||
self.tearDown()
|
self.tearDown()
|
||||||
|
|
||||||
|
def test_sampling_params(self):
|
||||||
|
"""
|
||||||
|
This method tests if all sampling parameters are correctly processed
|
||||||
|
NOTE: this does NOT test for correctness, just that we received a compatible response
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.setUp()
|
||||||
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
|
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
|
||||||
|
self.assertTrue(response.success)
|
||||||
|
|
||||||
|
req = backend_pb2.PredictOptions(
|
||||||
|
Prompt="The capital of France is",
|
||||||
|
TopP=0.8,
|
||||||
|
Tokens=50,
|
||||||
|
Temperature=0.7,
|
||||||
|
TopK=40,
|
||||||
|
PresencePenalty=0.1,
|
||||||
|
FrequencyPenalty=0.2,
|
||||||
|
RepetitionPenalty=1.1,
|
||||||
|
MinP=0.05,
|
||||||
|
Seed=42,
|
||||||
|
StopPrompts=["\n"],
|
||||||
|
StopTokenIds=[50256],
|
||||||
|
BadWords=["badword"],
|
||||||
|
IncludeStopStrInOutput=True,
|
||||||
|
IgnoreEOS=True,
|
||||||
|
MinTokens=5,
|
||||||
|
Logprobs=5,
|
||||||
|
PromptLogprobs=5,
|
||||||
|
SkipSpecialTokens=True,
|
||||||
|
SpacesBetweenSpecialTokens=True,
|
||||||
|
TruncatePromptTokens=10,
|
||||||
|
GuidedDecoding=True,
|
||||||
|
N=2,
|
||||||
|
)
|
||||||
|
resp = stub.Predict(req)
|
||||||
|
self.assertIsNotNone(resp.message)
|
||||||
|
self.assertIsNotNone(resp.logprobs)
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
self.fail("sampling params service failed")
|
||||||
|
finally:
|
||||||
|
self.tearDown()
|
||||||
|
|
||||||
|
|
||||||
def test_embedding(self):
|
def test_embedding(self):
|
||||||
"""
|
"""
|
||||||
This method tests if the embeddings are generated successfully
|
This method tests if the embeddings are generated successfully
|
||||||
|
|
|
@ -16,7 +16,7 @@ type Application struct {
|
||||||
func newApplication(appConfig *config.ApplicationConfig) *Application {
|
func newApplication(appConfig *config.ApplicationConfig) *Application {
|
||||||
return &Application{
|
return &Application{
|
||||||
backendLoader: config.NewBackendConfigLoader(appConfig.ModelPath),
|
backendLoader: config.NewBackendConfigLoader(appConfig.ModelPath),
|
||||||
modelLoader: model.NewModelLoader(appConfig.ModelPath),
|
modelLoader: model.NewModelLoader(appConfig.ModelPath, appConfig.SingleBackend),
|
||||||
applicationConfig: appConfig,
|
applicationConfig: appConfig,
|
||||||
templatesEvaluator: templates.NewEvaluator(appConfig.ModelPath),
|
templatesEvaluator: templates.NewEvaluator(appConfig.ModelPath),
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,18 +43,12 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("unable to create ModelPath: %q", err)
|
return nil, fmt.Errorf("unable to create ModelPath: %q", err)
|
||||||
}
|
}
|
||||||
if options.ImageDir != "" {
|
if options.GeneratedContentDir != "" {
|
||||||
err := os.MkdirAll(options.ImageDir, 0750)
|
err := os.MkdirAll(options.GeneratedContentDir, 0750)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("unable to create ImageDir: %q", err)
|
return nil, fmt.Errorf("unable to create ImageDir: %q", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if options.AudioDir != "" {
|
|
||||||
err := os.MkdirAll(options.AudioDir, 0750)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("unable to create AudioDir: %q", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if options.UploadDir != "" {
|
if options.UploadDir != "" {
|
||||||
err := os.MkdirAll(options.UploadDir, 0750)
|
err := os.MkdirAll(options.UploadDir, 0750)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -143,7 +137,7 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
if options.LoadToMemory != nil {
|
if options.LoadToMemory != nil && !options.SingleBackend {
|
||||||
for _, m := range options.LoadToMemory {
|
for _, m := range options.LoadToMemory {
|
||||||
cfg, err := application.BackendLoader().LoadBackendConfigFileByNameDefaultOptions(m, options)
|
cfg, err := application.BackendLoader().LoadBackendConfigFileByNameDefaultOptions(m, options)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -17,6 +17,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendCo
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
defer loader.Close()
|
||||||
|
|
||||||
var fn func() ([]float32, error)
|
var fn func() ([]float32, error)
|
||||||
switch model := inferenceModel.(type) {
|
switch model := inferenceModel.(type) {
|
||||||
|
|
|
@ -16,6 +16,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
defer loader.Close()
|
||||||
|
|
||||||
fn := func() error {
|
fn := func() error {
|
||||||
_, err := inferenceModel.GenerateImage(
|
_, err := inferenceModel.GenerateImage(
|
||||||
|
|
|
@ -53,6 +53,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
defer loader.Close()
|
||||||
|
|
||||||
var protoMessages []*proto.Message
|
var protoMessages []*proto.Message
|
||||||
// if we are using the tokenizer template, we need to convert the messages to proto messages
|
// if we are using the tokenizer template, we need to convert the messages to proto messages
|
||||||
|
|
|
@ -40,10 +40,6 @@ func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts ...
|
||||||
grpcOpts := grpcModelOpts(c)
|
grpcOpts := grpcModelOpts(c)
|
||||||
defOpts = append(defOpts, model.WithLoadGRPCLoadModelOpts(grpcOpts))
|
defOpts = append(defOpts, model.WithLoadGRPCLoadModelOpts(grpcOpts))
|
||||||
|
|
||||||
if so.SingleBackend {
|
|
||||||
defOpts = append(defOpts, model.WithSingleActiveBackend())
|
|
||||||
}
|
|
||||||
|
|
||||||
if so.ParallelBackendRequests {
|
if so.ParallelBackendRequests {
|
||||||
defOpts = append(defOpts, model.EnableParallelRequests)
|
defOpts = append(defOpts, model.EnableParallelRequests)
|
||||||
}
|
}
|
||||||
|
@ -103,7 +99,7 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||||
mmap = *c.MMap
|
mmap = *c.MMap
|
||||||
}
|
}
|
||||||
|
|
||||||
ctxSize := 1024
|
ctxSize := 4096
|
||||||
if c.ContextSize != nil {
|
if c.ContextSize != nil {
|
||||||
ctxSize = *c.ContextSize
|
ctxSize = *c.ContextSize
|
||||||
}
|
}
|
||||||
|
@ -188,11 +184,6 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||||
MainGPU: c.MainGPU,
|
MainGPU: c.MainGPU,
|
||||||
Threads: int32(*c.Threads),
|
Threads: int32(*c.Threads),
|
||||||
TensorSplit: c.TensorSplit,
|
TensorSplit: c.TensorSplit,
|
||||||
// AutoGPTQ
|
|
||||||
ModelBaseName: c.AutoGPTQ.ModelBaseName,
|
|
||||||
Device: c.AutoGPTQ.Device,
|
|
||||||
UseTriton: c.AutoGPTQ.Triton,
|
|
||||||
UseFastTokenizer: c.AutoGPTQ.UseFastTokenizer,
|
|
||||||
// RWKV
|
// RWKV
|
||||||
Tokenizer: c.Tokenizer,
|
Tokenizer: c.Tokenizer,
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,10 +12,10 @@ import (
|
||||||
func Rerank(request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
func Rerank(request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
||||||
opts := ModelOptions(backendConfig, appConfig)
|
opts := ModelOptions(backendConfig, appConfig)
|
||||||
rerankModel, err := loader.Load(opts...)
|
rerankModel, err := loader.Load(opts...)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
defer loader.Close()
|
||||||
|
|
||||||
if rerankModel == nil {
|
if rerankModel == nil {
|
||||||
return nil, fmt.Errorf("could not load rerank model")
|
return nil, fmt.Errorf("could not load rerank model")
|
||||||
|
|
|
@ -26,21 +26,26 @@ func SoundGeneration(
|
||||||
|
|
||||||
opts := ModelOptions(backendConfig, appConfig)
|
opts := ModelOptions(backendConfig, appConfig)
|
||||||
soundGenModel, err := loader.Load(opts...)
|
soundGenModel, err := loader.Load(opts...)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", nil, err
|
return "", nil, err
|
||||||
}
|
}
|
||||||
|
defer loader.Close()
|
||||||
|
|
||||||
if soundGenModel == nil {
|
if soundGenModel == nil {
|
||||||
return "", nil, fmt.Errorf("could not load sound generation model")
|
return "", nil, fmt.Errorf("could not load sound generation model")
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
|
if err := os.MkdirAll(appConfig.GeneratedContentDir, 0750); err != nil {
|
||||||
return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
|
return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "sound_generation", ".wav")
|
audioDir := filepath.Join(appConfig.GeneratedContentDir, "audio")
|
||||||
filePath := filepath.Join(appConfig.AudioDir, fileName)
|
if err := os.MkdirAll(audioDir, 0750); err != nil {
|
||||||
|
return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fileName := utils.GenerateUniqueFileName(audioDir, "sound_generation", ".wav")
|
||||||
|
filePath := filepath.Join(audioDir, fileName)
|
||||||
|
|
||||||
res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
|
res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
|
||||||
Text: text,
|
Text: text,
|
||||||
|
|
|
@ -20,6 +20,7 @@ func TokenMetrics(
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
defer loader.Close()
|
||||||
|
|
||||||
if model == nil {
|
if model == nil {
|
||||||
return nil, fmt.Errorf("could not loadmodel model")
|
return nil, fmt.Errorf("could not loadmodel model")
|
||||||
|
|
|
@ -14,10 +14,10 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
|
||||||
|
|
||||||
opts := ModelOptions(backendConfig, appConfig)
|
opts := ModelOptions(backendConfig, appConfig)
|
||||||
inferenceModel, err = loader.Load(opts...)
|
inferenceModel, err = loader.Load(opts...)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return schema.TokenizeResponse{}, err
|
return schema.TokenizeResponse{}, err
|
||||||
}
|
}
|
||||||
|
defer loader.Close()
|
||||||
|
|
||||||
predictOptions := gRPCPredictOpts(backendConfig, loader.ModelPath)
|
predictOptions := gRPCPredictOpts(backendConfig, loader.ModelPath)
|
||||||
predictOptions.Prompt = s
|
predictOptions.Prompt = s
|
||||||
|
|
|
@ -24,6 +24,7 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
defer ml.Close()
|
||||||
|
|
||||||
if transcriptionModel == nil {
|
if transcriptionModel == nil {
|
||||||
return nil, fmt.Errorf("could not load transcription model")
|
return nil, fmt.Errorf("could not load transcription model")
|
||||||
|
|
|
@ -23,21 +23,22 @@ func ModelTTS(
|
||||||
) (string, *proto.Result, error) {
|
) (string, *proto.Result, error) {
|
||||||
opts := ModelOptions(backendConfig, appConfig, model.WithDefaultBackendString(model.PiperBackend))
|
opts := ModelOptions(backendConfig, appConfig, model.WithDefaultBackendString(model.PiperBackend))
|
||||||
ttsModel, err := loader.Load(opts...)
|
ttsModel, err := loader.Load(opts...)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", nil, err
|
return "", nil, err
|
||||||
}
|
}
|
||||||
|
defer loader.Close()
|
||||||
|
|
||||||
if ttsModel == nil {
|
if ttsModel == nil {
|
||||||
return "", nil, fmt.Errorf("could not load tts model %q", backendConfig.Model)
|
return "", nil, fmt.Errorf("could not load tts model %q", backendConfig.Model)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
|
audioDir := filepath.Join(appConfig.GeneratedContentDir, "audio")
|
||||||
|
if err := os.MkdirAll(audioDir, 0750); err != nil {
|
||||||
return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
|
return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
|
fileName := utils.GenerateUniqueFileName(audioDir, "tts", ".wav")
|
||||||
filePath := filepath.Join(appConfig.AudioDir, fileName)
|
filePath := filepath.Join(audioDir, fileName)
|
||||||
|
|
||||||
// We join the model name to the model path here. This seems to only be done for TTS and is HIGHLY suspect.
|
// We join the model name to the model path here. This seems to only be done for TTS and is HIGHLY suspect.
|
||||||
// This should be addressed in a follow up PR soon.
|
// This should be addressed in a follow up PR soon.
|
||||||
|
|
|
@ -19,6 +19,8 @@ func VAD(request *schema.VADRequest,
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
defer ml.Close()
|
||||||
|
|
||||||
req := proto.VADRequest{
|
req := proto.VADRequest{
|
||||||
Audio: request.Audio,
|
Audio: request.Audio,
|
||||||
}
|
}
|
||||||
|
|
36
core/backend/video.go
Normal file
36
core/backend/video.go
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
package backend
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
model "github.com/mudler/LocalAI/pkg/model"
|
||||||
|
)
|
||||||
|
|
||||||
|
func VideoGeneration(height, width int32, prompt, startImage, endImage, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
|
||||||
|
|
||||||
|
opts := ModelOptions(backendConfig, appConfig)
|
||||||
|
inferenceModel, err := loader.Load(
|
||||||
|
opts...,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer loader.Close()
|
||||||
|
|
||||||
|
fn := func() error {
|
||||||
|
_, err := inferenceModel.GenerateVideo(
|
||||||
|
appConfig.Context,
|
||||||
|
&proto.GenerateVideoRequest{
|
||||||
|
Height: height,
|
||||||
|
Width: width,
|
||||||
|
Prompt: prompt,
|
||||||
|
StartImage: startImage,
|
||||||
|
EndImage: endImage,
|
||||||
|
Dst: dst,
|
||||||
|
})
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return fn, nil
|
||||||
|
}
|
|
@ -1,11 +1,13 @@
|
||||||
package cliContext
|
package cliContext
|
||||||
|
|
||||||
import "embed"
|
import (
|
||||||
|
rice "github.com/GeertJohan/go.rice"
|
||||||
|
)
|
||||||
|
|
||||||
type Context struct {
|
type Context struct {
|
||||||
Debug bool `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"`
|
Debug bool `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"`
|
||||||
LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"`
|
LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"`
|
||||||
|
|
||||||
// This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI
|
// This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI
|
||||||
BackendAssets embed.FS `kong:"-"`
|
BackendAssets *rice.Box `kong:"-"`
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,8 +21,7 @@ type RunCMD struct {
|
||||||
|
|
||||||
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
||||||
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
|
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
|
||||||
ImagePath string `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"`
|
GeneratedContentPath string `env:"LOCALAI_GENERATED_CONTENT_PATH,GENERATED_CONTENT_PATH" type:"path" default:"/tmp/generated/content" help:"Location for generated content (e.g. images, audio, videos)" group:"storage"`
|
||||||
AudioPath string `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"`
|
|
||||||
UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
|
UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
|
||||||
ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
|
ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
|
||||||
LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"`
|
LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"`
|
||||||
|
@ -47,7 +46,7 @@ type RunCMD struct {
|
||||||
CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
|
CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
|
||||||
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
|
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
|
||||||
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
|
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
|
||||||
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
|
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disables the web user interface. When set to true, the server will only expose API endpoints without serving the web interface" group:"api"`
|
||||||
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
|
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
|
||||||
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
|
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
|
||||||
UseSubtleKeyComparison bool `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"`
|
UseSubtleKeyComparison bool `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"`
|
||||||
|
@ -81,8 +80,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||||
config.WithModelPath(r.ModelsPath),
|
config.WithModelPath(r.ModelsPath),
|
||||||
config.WithContextSize(r.ContextSize),
|
config.WithContextSize(r.ContextSize),
|
||||||
config.WithDebug(zerolog.GlobalLevel() <= zerolog.DebugLevel),
|
config.WithDebug(zerolog.GlobalLevel() <= zerolog.DebugLevel),
|
||||||
config.WithImageDir(r.ImagePath),
|
config.WithGeneratedContentDir(r.GeneratedContentPath),
|
||||||
config.WithAudioDir(r.AudioPath),
|
|
||||||
config.WithUploadDir(r.UploadPath),
|
config.WithUploadDir(r.UploadPath),
|
||||||
config.WithConfigsDir(r.ConfigPath),
|
config.WithConfigsDir(r.ConfigPath),
|
||||||
config.WithDynamicConfigDir(r.LocalaiConfigDir),
|
config.WithDynamicConfigDir(r.LocalaiConfigDir),
|
||||||
|
|
|
@ -70,11 +70,11 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
|
||||||
opts := &config.ApplicationConfig{
|
opts := &config.ApplicationConfig{
|
||||||
ModelPath: t.ModelsPath,
|
ModelPath: t.ModelsPath,
|
||||||
Context: context.Background(),
|
Context: context.Background(),
|
||||||
AudioDir: outputDir,
|
GeneratedContentDir: outputDir,
|
||||||
AssetsDestination: t.BackendAssetsPath,
|
AssetsDestination: t.BackendAssetsPath,
|
||||||
ExternalGRPCBackends: externalBackends,
|
ExternalGRPCBackends: externalBackends,
|
||||||
}
|
}
|
||||||
ml := model.NewModelLoader(opts.ModelPath)
|
ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
err := ml.StopAllGRPC()
|
err := ml.StopAllGRPC()
|
||||||
|
|
|
@ -32,7 +32,7 @@ func (t *TranscriptCMD) Run(ctx *cliContext.Context) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
cl := config.NewBackendConfigLoader(t.ModelsPath)
|
cl := config.NewBackendConfigLoader(t.ModelsPath)
|
||||||
ml := model.NewModelLoader(opts.ModelPath)
|
ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)
|
||||||
if err := cl.LoadBackendConfigsFromPath(t.ModelsPath); err != nil {
|
if err := cl.LoadBackendConfigsFromPath(t.ModelsPath); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,10 +38,10 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {
|
||||||
opts := &config.ApplicationConfig{
|
opts := &config.ApplicationConfig{
|
||||||
ModelPath: t.ModelsPath,
|
ModelPath: t.ModelsPath,
|
||||||
Context: context.Background(),
|
Context: context.Background(),
|
||||||
AudioDir: outputDir,
|
GeneratedContentDir: outputDir,
|
||||||
AssetsDestination: t.BackendAssetsPath,
|
AssetsDestination: t.BackendAssetsPath,
|
||||||
}
|
}
|
||||||
ml := model.NewModelLoader(opts.ModelPath)
|
ml := model.NewModelLoader(opts.ModelPath, opts.SingleBackend)
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
err := ml.StopAllGRPC()
|
err := ml.StopAllGRPC()
|
||||||
|
|
|
@ -7,11 +7,11 @@ import (
|
||||||
|
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
|
|
||||||
|
gguf "github.com/gpustack/gguf-parser-go"
|
||||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/gallery"
|
"github.com/mudler/LocalAI/core/gallery"
|
||||||
"github.com/mudler/LocalAI/pkg/downloader"
|
"github.com/mudler/LocalAI/pkg/downloader"
|
||||||
gguf "github.com/thxcode/gguf-parser-go"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type UtilCMD struct {
|
type UtilCMD struct {
|
||||||
|
@ -51,7 +51,7 @@ func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
|
||||||
log.Info().
|
log.Info().
|
||||||
Any("eosTokenID", f.Tokenizer().EOSTokenID).
|
Any("eosTokenID", f.Tokenizer().EOSTokenID).
|
||||||
Any("bosTokenID", f.Tokenizer().BOSTokenID).
|
Any("bosTokenID", f.Tokenizer().BOSTokenID).
|
||||||
Any("modelName", f.Model().Name).
|
Any("modelName", f.Metadata().Name).
|
||||||
Any("architecture", f.Architecture().Architecture).Msgf("GGUF file loaded: %s", u.Args[0])
|
Any("architecture", f.Architecture().Architecture).Msgf("GGUF file loaded: %s", u.Args[0])
|
||||||
|
|
||||||
log.Info().Any("tokenizer", fmt.Sprintf("%+v", f.Tokenizer())).Msg("Tokenizer")
|
log.Info().Any("tokenizer", fmt.Sprintf("%+v", f.Tokenizer())).Msg("Tokenizer")
|
||||||
|
|
|
@ -2,11 +2,11 @@ package config
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"embed"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"regexp"
|
"regexp"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
rice "github.com/GeertJohan/go.rice"
|
||||||
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
@ -19,10 +19,11 @@ type ApplicationConfig struct {
|
||||||
UploadLimitMB, Threads, ContextSize int
|
UploadLimitMB, Threads, ContextSize int
|
||||||
F16 bool
|
F16 bool
|
||||||
Debug bool
|
Debug bool
|
||||||
ImageDir string
|
GeneratedContentDir string
|
||||||
AudioDir string
|
|
||||||
UploadDir string
|
|
||||||
ConfigsDir string
|
ConfigsDir string
|
||||||
|
UploadDir string
|
||||||
|
|
||||||
DynamicConfigsDir string
|
DynamicConfigsDir string
|
||||||
DynamicConfigsDirPollInterval time.Duration
|
DynamicConfigsDirPollInterval time.Duration
|
||||||
CORS bool
|
CORS bool
|
||||||
|
@ -46,7 +47,7 @@ type ApplicationConfig struct {
|
||||||
|
|
||||||
Galleries []Gallery
|
Galleries []Gallery
|
||||||
|
|
||||||
BackendAssets embed.FS
|
BackendAssets *rice.Box
|
||||||
AssetsDestination string
|
AssetsDestination string
|
||||||
|
|
||||||
ExternalGRPCBackends map[string]string
|
ExternalGRPCBackends map[string]string
|
||||||
|
@ -197,7 +198,7 @@ func WithBackendAssetsOutput(out string) AppOption {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func WithBackendAssets(f embed.FS) AppOption {
|
func WithBackendAssets(f *rice.Box) AppOption {
|
||||||
return func(o *ApplicationConfig) {
|
return func(o *ApplicationConfig) {
|
||||||
o.BackendAssets = f
|
o.BackendAssets = f
|
||||||
}
|
}
|
||||||
|
@ -279,15 +280,9 @@ func WithDebug(debug bool) AppOption {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func WithAudioDir(audioDir string) AppOption {
|
func WithGeneratedContentDir(generatedContentDir string) AppOption {
|
||||||
return func(o *ApplicationConfig) {
|
return func(o *ApplicationConfig) {
|
||||||
o.AudioDir = audioDir
|
o.GeneratedContentDir = generatedContentDir
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func WithImageDir(imageDir string) AppOption {
|
|
||||||
return func(o *ApplicationConfig) {
|
|
||||||
o.ImageDir = imageDir
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -50,9 +50,6 @@ type BackendConfig struct {
|
||||||
// LLM configs (GPT4ALL, Llama.cpp, ...)
|
// LLM configs (GPT4ALL, Llama.cpp, ...)
|
||||||
LLMConfig `yaml:",inline"`
|
LLMConfig `yaml:",inline"`
|
||||||
|
|
||||||
// AutoGPTQ specifics
|
|
||||||
AutoGPTQ AutoGPTQ `yaml:"autogptq"`
|
|
||||||
|
|
||||||
// Diffusers
|
// Diffusers
|
||||||
Diffusers Diffusers `yaml:"diffusers"`
|
Diffusers Diffusers `yaml:"diffusers"`
|
||||||
Step int `yaml:"step"`
|
Step int `yaml:"step"`
|
||||||
|
@ -176,14 +173,6 @@ type LimitMMPerPrompt struct {
|
||||||
LimitAudioPerPrompt int `yaml:"audio"`
|
LimitAudioPerPrompt int `yaml:"audio"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend
|
|
||||||
type AutoGPTQ struct {
|
|
||||||
ModelBaseName string `yaml:"model_base_name"`
|
|
||||||
Device string `yaml:"device"`
|
|
||||||
Triton bool `yaml:"triton"`
|
|
||||||
UseFastTokenizer bool `yaml:"use_fast_tokenizer"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// TemplateConfig is a struct that holds the configuration of the templating system
|
// TemplateConfig is a struct that holds the configuration of the templating system
|
||||||
type TemplateConfig struct {
|
type TemplateConfig struct {
|
||||||
// Chat is the template used in the chat completion endpoint
|
// Chat is the template used in the chat completion endpoint
|
||||||
|
@ -315,9 +304,6 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||||
defaultTFZ := 1.0
|
defaultTFZ := 1.0
|
||||||
defaultZero := 0
|
defaultZero := 0
|
||||||
|
|
||||||
// Try to offload all GPU layers (if GPU is found)
|
|
||||||
defaultHigh := 99999999
|
|
||||||
|
|
||||||
trueV := true
|
trueV := true
|
||||||
falseV := false
|
falseV := false
|
||||||
|
|
||||||
|
@ -377,9 +363,6 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||||
if cfg.MirostatTAU == nil {
|
if cfg.MirostatTAU == nil {
|
||||||
cfg.MirostatTAU = &defaultMirostatTAU
|
cfg.MirostatTAU = &defaultMirostatTAU
|
||||||
}
|
}
|
||||||
if cfg.NGPULayers == nil {
|
|
||||||
cfg.NGPULayers = &defaultHigh
|
|
||||||
}
|
|
||||||
|
|
||||||
if cfg.LowVRAM == nil {
|
if cfg.LowVRAM == nil {
|
||||||
cfg.LowVRAM = &falseV
|
cfg.LowVRAM = &falseV
|
||||||
|
@ -447,18 +430,19 @@ func (c *BackendConfig) HasTemplate() bool {
|
||||||
type BackendConfigUsecases int
|
type BackendConfigUsecases int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
FLAG_ANY BackendConfigUsecases = 0b00000000000
|
FLAG_ANY BackendConfigUsecases = 0b000000000000
|
||||||
FLAG_CHAT BackendConfigUsecases = 0b00000000001
|
FLAG_CHAT BackendConfigUsecases = 0b000000000001
|
||||||
FLAG_COMPLETION BackendConfigUsecases = 0b00000000010
|
FLAG_COMPLETION BackendConfigUsecases = 0b000000000010
|
||||||
FLAG_EDIT BackendConfigUsecases = 0b00000000100
|
FLAG_EDIT BackendConfigUsecases = 0b000000000100
|
||||||
FLAG_EMBEDDINGS BackendConfigUsecases = 0b00000001000
|
FLAG_EMBEDDINGS BackendConfigUsecases = 0b000000001000
|
||||||
FLAG_RERANK BackendConfigUsecases = 0b00000010000
|
FLAG_RERANK BackendConfigUsecases = 0b000000010000
|
||||||
FLAG_IMAGE BackendConfigUsecases = 0b00000100000
|
FLAG_IMAGE BackendConfigUsecases = 0b000000100000
|
||||||
FLAG_TRANSCRIPT BackendConfigUsecases = 0b00001000000
|
FLAG_TRANSCRIPT BackendConfigUsecases = 0b000001000000
|
||||||
FLAG_TTS BackendConfigUsecases = 0b00010000000
|
FLAG_TTS BackendConfigUsecases = 0b000010000000
|
||||||
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b00100000000
|
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b000100000000
|
||||||
FLAG_TOKENIZE BackendConfigUsecases = 0b01000000000
|
FLAG_TOKENIZE BackendConfigUsecases = 0b001000000000
|
||||||
FLAG_VAD BackendConfigUsecases = 0b10000000000
|
FLAG_VAD BackendConfigUsecases = 0b010000000000
|
||||||
|
FLAG_VIDEO BackendConfigUsecases = 0b100000000000
|
||||||
|
|
||||||
// Common Subsets
|
// Common Subsets
|
||||||
FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
|
FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
|
||||||
|
@ -479,6 +463,7 @@ func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
|
||||||
"FLAG_TOKENIZE": FLAG_TOKENIZE,
|
"FLAG_TOKENIZE": FLAG_TOKENIZE,
|
||||||
"FLAG_VAD": FLAG_VAD,
|
"FLAG_VAD": FLAG_VAD,
|
||||||
"FLAG_LLM": FLAG_LLM,
|
"FLAG_LLM": FLAG_LLM,
|
||||||
|
"FLAG_VIDEO": FLAG_VIDEO,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -543,6 +528,17 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
if (u & FLAG_VIDEO) == FLAG_VIDEO {
|
||||||
|
videoBackends := []string{"diffusers", "stablediffusion"}
|
||||||
|
if !slices.Contains(videoBackends, c.Backend) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.Backend == "diffusers" && c.Diffusers.PipelineType == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
if (u & FLAG_RERANK) == FLAG_RERANK {
|
if (u & FLAG_RERANK) == FLAG_RERANK {
|
||||||
if c.Backend != "rerankers" {
|
if c.Backend != "rerankers" {
|
||||||
|
@ -555,7 +551,7 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (u & FLAG_TTS) == FLAG_TTS {
|
if (u & FLAG_TTS) == FLAG_TTS {
|
||||||
ttsBackends := []string{"piper", "transformers-musicgen", "parler-tts"}
|
ttsBackends := []string{"bark-cpp", "parler-tts", "piper", "transformers-musicgen"}
|
||||||
if !slices.Contains(ttsBackends, c.Backend) {
|
if !slices.Contains(ttsBackends, c.Backend) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,9 +3,10 @@ package config
|
||||||
import (
|
import (
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
|
|
||||||
gguf "github.com/thxcode/gguf-parser-go"
|
gguf "github.com/gpustack/gguf-parser-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
type familyType uint8
|
type familyType uint8
|
||||||
|
@ -23,6 +24,7 @@ const (
|
||||||
|
|
||||||
const (
|
const (
|
||||||
defaultContextSize = 1024
|
defaultContextSize = 1024
|
||||||
|
defaultNGPULayers = 99999999
|
||||||
)
|
)
|
||||||
|
|
||||||
type settingsConfig struct {
|
type settingsConfig struct {
|
||||||
|
@ -147,7 +149,7 @@ var knownTemplates = map[string]familyType{
|
||||||
func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
|
func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
|
||||||
|
|
||||||
if defaultCtx == 0 && cfg.ContextSize == nil {
|
if defaultCtx == 0 && cfg.ContextSize == nil {
|
||||||
ctxSize := f.EstimateLLaMACppUsage().ContextSize
|
ctxSize := f.EstimateLLaMACppRun().ContextSize
|
||||||
if ctxSize > 0 {
|
if ctxSize > 0 {
|
||||||
cSize := int(ctxSize)
|
cSize := int(ctxSize)
|
||||||
cfg.ContextSize = &cSize
|
cfg.ContextSize = &cSize
|
||||||
|
@ -157,6 +159,46 @@ func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GPU options
|
||||||
|
if cfg.Options == nil {
|
||||||
|
if xsysinfo.HasGPU("nvidia") || xsysinfo.HasGPU("amd") {
|
||||||
|
cfg.Options = []string{"gpu"}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// vram estimation
|
||||||
|
vram, err := xsysinfo.TotalAvailableVRAM()
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Msgf("guessDefaultsFromFile(TotalAvailableVRAM): %s", err)
|
||||||
|
} else if vram > 0 {
|
||||||
|
estimate, err := xsysinfo.EstimateGGUFVRAMUsage(f, vram)
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Msgf("guessDefaultsFromFile(EstimateGGUFVRAMUsage): %s", err)
|
||||||
|
} else {
|
||||||
|
if estimate.IsFullOffload {
|
||||||
|
log.Warn().Msgf("guessDefaultsFromFile: %s", "full offload is recommended")
|
||||||
|
}
|
||||||
|
|
||||||
|
if estimate.EstimatedVRAM > vram {
|
||||||
|
log.Warn().Msgf("guessDefaultsFromFile: %s", "estimated VRAM usage is greater than available VRAM")
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.NGPULayers == nil && estimate.EstimatedLayers > 0 {
|
||||||
|
log.Debug().Msgf("guessDefaultsFromFile: %d layers estimated", estimate.EstimatedLayers)
|
||||||
|
cfg.NGPULayers = &estimate.EstimatedLayers
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.NGPULayers == nil {
|
||||||
|
// we assume we want to offload all layers
|
||||||
|
defaultHigh := defaultNGPULayers
|
||||||
|
cfg.NGPULayers = &defaultHigh
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debug().Any("NGPULayers", cfg.NGPULayers).Msgf("guessDefaultsFromFile: %s", "NGPULayers set")
|
||||||
|
|
||||||
|
// template estimations
|
||||||
if cfg.HasTemplate() {
|
if cfg.HasTemplate() {
|
||||||
// nothing to guess here
|
// nothing to guess here
|
||||||
log.Debug().Any("name", cfg.Name).Msgf("guessDefaultsFromFile: %s", "template already set")
|
log.Debug().Any("name", cfg.Name).Msgf("guessDefaultsFromFile: %s", "template already set")
|
||||||
|
@ -166,12 +208,12 @@ func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
|
||||||
log.Debug().
|
log.Debug().
|
||||||
Any("eosTokenID", f.Tokenizer().EOSTokenID).
|
Any("eosTokenID", f.Tokenizer().EOSTokenID).
|
||||||
Any("bosTokenID", f.Tokenizer().BOSTokenID).
|
Any("bosTokenID", f.Tokenizer().BOSTokenID).
|
||||||
Any("modelName", f.Model().Name).
|
Any("modelName", f.Metadata().Name).
|
||||||
Any("architecture", f.Architecture().Architecture).Msgf("Model file loaded: %s", cfg.ModelFileName())
|
Any("architecture", f.Architecture().Architecture).Msgf("Model file loaded: %s", cfg.ModelFileName())
|
||||||
|
|
||||||
// guess the name
|
// guess the name
|
||||||
if cfg.Name == "" {
|
if cfg.Name == "" {
|
||||||
cfg.Name = f.Model().Name
|
cfg.Name = f.Metadata().Name
|
||||||
}
|
}
|
||||||
|
|
||||||
family := identifyFamily(f)
|
family := identifyFamily(f)
|
||||||
|
@ -207,6 +249,7 @@ func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
|
||||||
cfg.TemplateConfig.JinjaTemplate = true
|
cfg.TemplateConfig.JinjaTemplate = true
|
||||||
cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString()
|
cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString()
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func identifyFamily(f *gguf.GGUFFile) familyType {
|
func identifyFamily(f *gguf.GGUFFile) familyType {
|
||||||
|
@ -231,7 +274,7 @@ func identifyFamily(f *gguf.GGUFFile) familyType {
|
||||||
commandR := arch == "command-r" && eosTokenID == 255001
|
commandR := arch == "command-r" && eosTokenID == 255001
|
||||||
qwen2 := arch == "qwen2"
|
qwen2 := arch == "qwen2"
|
||||||
phi3 := arch == "phi-3"
|
phi3 := arch == "phi-3"
|
||||||
gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Model().Name), "gemma")
|
gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Metadata().Name), "gemma")
|
||||||
deepseek2 := arch == "deepseek2"
|
deepseek2 := arch == "deepseek2"
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
|
|
|
@ -4,8 +4,8 @@ import (
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
|
gguf "github.com/gpustack/gguf-parser-go"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
gguf "github.com/thxcode/gguf-parser-go"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func guessDefaultsFromFile(cfg *BackendConfig, modelPath string, defaultCtx int) {
|
func guessDefaultsFromFile(cfg *BackendConfig, modelPath string, defaultCtx int) {
|
||||||
|
|
|
@ -5,6 +5,8 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
"github.com/dave-gray101/v2keyauth"
|
"github.com/dave-gray101/v2keyauth"
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
"github.com/mudler/LocalAI/pkg/utils"
|
||||||
|
@ -142,9 +144,9 @@ func API(application *application.Application) (*fiber.App, error) {
|
||||||
httpFS := http.FS(embedDirStatic)
|
httpFS := http.FS(embedDirStatic)
|
||||||
|
|
||||||
router.Use(favicon.New(favicon.Config{
|
router.Use(favicon.New(favicon.Config{
|
||||||
URL: "/favicon.ico",
|
URL: "/favicon.svg",
|
||||||
FileSystem: httpFS,
|
FileSystem: httpFS,
|
||||||
File: "static/favicon.ico",
|
File: "static/favicon.svg",
|
||||||
}))
|
}))
|
||||||
|
|
||||||
router.Use("/static", filesystem.New(filesystem.Config{
|
router.Use("/static", filesystem.New(filesystem.Config{
|
||||||
|
@ -153,12 +155,19 @@ func API(application *application.Application) (*fiber.App, error) {
|
||||||
Browse: true,
|
Browse: true,
|
||||||
}))
|
}))
|
||||||
|
|
||||||
if application.ApplicationConfig().ImageDir != "" {
|
if application.ApplicationConfig().GeneratedContentDir != "" {
|
||||||
router.Static("/generated-images", application.ApplicationConfig().ImageDir)
|
os.MkdirAll(application.ApplicationConfig().GeneratedContentDir, 0750)
|
||||||
}
|
audioPath := filepath.Join(application.ApplicationConfig().GeneratedContentDir, "audio")
|
||||||
|
imagePath := filepath.Join(application.ApplicationConfig().GeneratedContentDir, "images")
|
||||||
|
videoPath := filepath.Join(application.ApplicationConfig().GeneratedContentDir, "videos")
|
||||||
|
|
||||||
if application.ApplicationConfig().AudioDir != "" {
|
os.MkdirAll(audioPath, 0750)
|
||||||
router.Static("/generated-audio", application.ApplicationConfig().AudioDir)
|
os.MkdirAll(imagePath, 0750)
|
||||||
|
os.MkdirAll(videoPath, 0750)
|
||||||
|
|
||||||
|
router.Static("/generated-audio", audioPath)
|
||||||
|
router.Static("/generated-images", imagePath)
|
||||||
|
router.Static("/generated-videos", videoPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration
|
// Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration
|
||||||
|
|
|
@ -3,7 +3,6 @@ package http_test
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"embed"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
@ -24,6 +23,7 @@ import (
|
||||||
. "github.com/onsi/gomega"
|
. "github.com/onsi/gomega"
|
||||||
"gopkg.in/yaml.v3"
|
"gopkg.in/yaml.v3"
|
||||||
|
|
||||||
|
rice "github.com/GeertJohan/go.rice"
|
||||||
openaigo "github.com/otiai10/openaigo"
|
openaigo "github.com/otiai10/openaigo"
|
||||||
"github.com/sashabaranov/go-openai"
|
"github.com/sashabaranov/go-openai"
|
||||||
"github.com/sashabaranov/go-openai/jsonschema"
|
"github.com/sashabaranov/go-openai/jsonschema"
|
||||||
|
@ -264,8 +264,15 @@ func getRequest(url string, header http.Header) (error, int, []byte) {
|
||||||
|
|
||||||
const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
|
const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
|
||||||
|
|
||||||
//go:embed backend-assets/*
|
var backendAssets *rice.Box
|
||||||
var backendAssets embed.FS
|
|
||||||
|
func init() {
|
||||||
|
var err error
|
||||||
|
backendAssets, err = rice.FindBox("backend-assets")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var _ = Describe("API test", func() {
|
var _ = Describe("API test", func() {
|
||||||
|
|
||||||
|
@ -629,8 +636,7 @@ var _ = Describe("API test", func() {
|
||||||
application, err := application.New(
|
application, err := application.New(
|
||||||
append(commonOpts,
|
append(commonOpts,
|
||||||
config.WithContext(c),
|
config.WithContext(c),
|
||||||
config.WithAudioDir(tmpdir),
|
config.WithGeneratedContentDir(tmpdir),
|
||||||
config.WithImageDir(tmpdir),
|
|
||||||
config.WithGalleries(galleries),
|
config.WithGalleries(galleries),
|
||||||
config.WithModelPath(modelDir),
|
config.WithModelPath(modelDir),
|
||||||
config.WithBackendAssets(backendAssets),
|
config.WithBackendAssets(backendAssets),
|
||||||
|
|
|
@ -122,15 +122,15 @@ func modelModal(m *gallery.GalleryModel) elem.Node {
|
||||||
"id": modalName(m),
|
"id": modalName(m),
|
||||||
"tabindex": "-1",
|
"tabindex": "-1",
|
||||||
"aria-hidden": "true",
|
"aria-hidden": "true",
|
||||||
"class": "hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full",
|
"class": "hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-full max-h-full bg-gray-900/50",
|
||||||
},
|
},
|
||||||
elem.Div(
|
elem.Div(
|
||||||
attrs.Props{
|
attrs.Props{
|
||||||
"class": "relative p-4 w-full max-w-2xl max-h-full",
|
"class": "relative p-4 w-full max-w-2xl h-[90vh] mx-auto mt-[5vh]",
|
||||||
},
|
},
|
||||||
elem.Div(
|
elem.Div(
|
||||||
attrs.Props{
|
attrs.Props{
|
||||||
"class": "relative p-4 w-full max-w-2xl max-h-full bg-white rounded-lg shadow dark:bg-gray-700",
|
"class": "relative bg-white rounded-lg shadow dark:bg-gray-700 h-full flex flex-col",
|
||||||
},
|
},
|
||||||
// header
|
// header
|
||||||
elem.Div(
|
elem.Div(
|
||||||
|
@ -164,14 +164,13 @@ func modelModal(m *gallery.GalleryModel) elem.Node {
|
||||||
// body
|
// body
|
||||||
elem.Div(
|
elem.Div(
|
||||||
attrs.Props{
|
attrs.Props{
|
||||||
"class": "p-4 md:p-5 space-y-4",
|
"class": "p-4 md:p-5 space-y-4 overflow-y-auto flex-1 min-h-0",
|
||||||
},
|
},
|
||||||
elem.Div(
|
elem.Div(
|
||||||
attrs.Props{
|
attrs.Props{
|
||||||
"class": "flex justify-center items-center",
|
"class": "flex justify-center items-center",
|
||||||
},
|
},
|
||||||
elem.Img(attrs.Props{
|
elem.Img(attrs.Props{
|
||||||
// "class": "rounded-t-lg object-fit object-center h-96",
|
|
||||||
"class": "lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3 entered loaded",
|
"class": "lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3 entered loaded",
|
||||||
"src": m.Icon,
|
"src": m.Icon,
|
||||||
"loading": "lazy",
|
"loading": "lazy",
|
||||||
|
@ -232,7 +231,6 @@ func modelModal(m *gallery.GalleryModel) elem.Node {
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func modelDescription(m *gallery.GalleryModel) elem.Node {
|
func modelDescription(m *gallery.GalleryModel) elem.Node {
|
||||||
|
|
|
@ -32,7 +32,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
|
||||||
return fiber.ErrBadRequest
|
return fiber.ErrBadRequest
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Str("modelName", input.ModelID).Msg("elevenlabs TTS request recieved")
|
log.Debug().Str("modelName", input.ModelID).Msg("elevenlabs TTS request received")
|
||||||
|
|
||||||
filePath, _, err := backend.ModelTTS(input.Text, voiceID, input.LanguageCode, ml, appConfig, *cfg)
|
filePath, _, err := backend.ModelTTS(input.Text, voiceID, input.LanguageCode, ml, appConfig, *cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -30,7 +30,7 @@ func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
|
||||||
return fiber.ErrBadRequest
|
return fiber.ErrBadRequest
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Str("model", input.Model).Msg("JINA Rerank Request recieved")
|
log.Debug().Str("model", input.Model).Msg("JINA Rerank Request received")
|
||||||
|
|
||||||
request := &proto.RerankRequest{
|
request := &proto.RerankRequest{
|
||||||
Query: input.Query,
|
Query: input.Query,
|
||||||
|
|
|
@ -120,6 +120,7 @@ func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *f
|
||||||
|
|
||||||
models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath)
|
models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
log.Error().Err(err).Msg("could not list models from galleries")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ func StoresSetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfi
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
defer sl.Close()
|
||||||
|
|
||||||
vals := make([][]byte, len(input.Values))
|
vals := make([][]byte, len(input.Values))
|
||||||
for i, v := range input.Values {
|
for i, v := range input.Values {
|
||||||
|
@ -48,6 +49,7 @@ func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationCo
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
defer sl.Close()
|
||||||
|
|
||||||
if err := store.DeleteCols(c.Context(), sb, input.Keys); err != nil {
|
if err := store.DeleteCols(c.Context(), sb, input.Keys); err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -69,6 +71,7 @@ func StoresGetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfi
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
defer sl.Close()
|
||||||
|
|
||||||
keys, vals, err := store.GetCols(c.Context(), sb, input.Keys)
|
keys, vals, err := store.GetCols(c.Context(), sb, input.Keys)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -100,6 +103,7 @@ func StoresFindEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConf
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
defer sl.Close()
|
||||||
|
|
||||||
keys, vals, similarities, err := store.Find(c.Context(), sb, input.Key, input.Topk)
|
keys, vals, similarities, err := store.Find(c.Context(), sb, input.Key, input.Topk)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -34,7 +34,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
|
||||||
return fiber.ErrBadRequest
|
return fiber.ErrBadRequest
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Str("model", input.Model).Msg("LocalAI TTS Request recieved")
|
log.Debug().Str("model", input.Model).Msg("LocalAI TTS Request received")
|
||||||
|
|
||||||
if cfg.Backend == "" {
|
if cfg.Backend == "" {
|
||||||
if input.Backend != "" {
|
if input.Backend != "" {
|
||||||
|
|
|
@ -28,7 +28,7 @@ func VADEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
|
||||||
return fiber.ErrBadRequest
|
return fiber.ErrBadRequest
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Str("model", input.Model).Msg("LocalAI VAD Request recieved")
|
log.Debug().Str("model", input.Model).Msg("LocalAI VAD Request received")
|
||||||
|
|
||||||
resp, err := backend.VAD(input, c.Context(), ml, appConfig, *cfg)
|
resp, err := backend.VAD(input, c.Context(), ml, appConfig, *cfg)
|
||||||
|
|
||||||
|
|
205
core/http/endpoints/localai/video.go
Normal file
205
core/http/endpoints/localai/video.go
Normal file
|
@ -0,0 +1,205 @@
|
||||||
|
package localai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/google/uuid"
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/http/middleware"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
|
|
||||||
|
"github.com/gofiber/fiber/v2"
|
||||||
|
model "github.com/mudler/LocalAI/pkg/model"
|
||||||
|
"github.com/rs/zerolog/log"
|
||||||
|
)
|
||||||
|
|
||||||
|
func downloadFile(url string) (string, error) {
|
||||||
|
// Get the data
|
||||||
|
resp, err := http.Get(url)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
// Create the file
|
||||||
|
out, err := os.CreateTemp("", "video")
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer out.Close()
|
||||||
|
|
||||||
|
// Write the body to file
|
||||||
|
_, err = io.Copy(out, resp.Body)
|
||||||
|
return out.Name(), err
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
|
||||||
|
curl http://localhost:8080/v1/images/generations \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"prompt": "A cute baby sea otter",
|
||||||
|
"n": 1,
|
||||||
|
"size": "512x512"
|
||||||
|
}'
|
||||||
|
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
// VideoEndpoint
|
||||||
|
// @Summary Creates a video given a prompt.
|
||||||
|
// @Param request body schema.OpenAIRequest true "query params"
|
||||||
|
// @Success 200 {object} schema.OpenAIResponse "Response"
|
||||||
|
// @Router /video [post]
|
||||||
|
func VideoEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
|
return func(c *fiber.Ctx) error {
|
||||||
|
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.VideoRequest)
|
||||||
|
if !ok || input.Model == "" {
|
||||||
|
log.Error().Msg("Video Endpoint - Invalid Input")
|
||||||
|
return fiber.ErrBadRequest
|
||||||
|
}
|
||||||
|
|
||||||
|
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||||
|
if !ok || config == nil {
|
||||||
|
log.Error().Msg("Video Endpoint - Invalid Config")
|
||||||
|
return fiber.ErrBadRequest
|
||||||
|
}
|
||||||
|
|
||||||
|
src := ""
|
||||||
|
if input.StartImage != "" {
|
||||||
|
|
||||||
|
var fileData []byte
|
||||||
|
var err error
|
||||||
|
// check if input.File is an URL, if so download it and save it
|
||||||
|
// to a temporary file
|
||||||
|
if strings.HasPrefix(input.StartImage, "http://") || strings.HasPrefix(input.StartImage, "https://") {
|
||||||
|
out, err := downloadFile(input.StartImage)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed downloading file:%w", err)
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(out)
|
||||||
|
|
||||||
|
fileData, err = os.ReadFile(out)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed reading file:%w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// base 64 decode the file and write it somewhere
|
||||||
|
// that we will cleanup
|
||||||
|
fileData, err = base64.StdEncoding.DecodeString(input.StartImage)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a temporary file
|
||||||
|
outputFile, err := os.CreateTemp(appConfig.GeneratedContentDir, "b64")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// write the base64 result
|
||||||
|
writer := bufio.NewWriter(outputFile)
|
||||||
|
_, err = writer.Write(fileData)
|
||||||
|
if err != nil {
|
||||||
|
outputFile.Close()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
outputFile.Close()
|
||||||
|
src = outputFile.Name()
|
||||||
|
defer os.RemoveAll(src)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||||
|
|
||||||
|
switch config.Backend {
|
||||||
|
case "stablediffusion":
|
||||||
|
config.Backend = model.StableDiffusionGGMLBackend
|
||||||
|
case "":
|
||||||
|
config.Backend = model.StableDiffusionGGMLBackend
|
||||||
|
}
|
||||||
|
|
||||||
|
width := input.Width
|
||||||
|
height := input.Height
|
||||||
|
|
||||||
|
if width == 0 {
|
||||||
|
width = 512
|
||||||
|
}
|
||||||
|
if height == 0 {
|
||||||
|
height = 512
|
||||||
|
}
|
||||||
|
|
||||||
|
b64JSON := input.ResponseFormat == "b64_json"
|
||||||
|
|
||||||
|
tempDir := ""
|
||||||
|
if !b64JSON {
|
||||||
|
tempDir = filepath.Join(appConfig.GeneratedContentDir, "videos")
|
||||||
|
}
|
||||||
|
// Create a temporary file
|
||||||
|
outputFile, err := os.CreateTemp(tempDir, "b64")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
outputFile.Close()
|
||||||
|
|
||||||
|
// TODO: use mime type to determine the extension
|
||||||
|
output := outputFile.Name() + ".mp4"
|
||||||
|
|
||||||
|
// Rename the temporary file
|
||||||
|
err = os.Rename(outputFile.Name(), output)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
baseURL := c.BaseURL()
|
||||||
|
|
||||||
|
fn, err := backend.VideoGeneration(height, width, input.Prompt, src, input.EndImage, output, ml, *config, appConfig)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := fn(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
item := &schema.Item{}
|
||||||
|
|
||||||
|
if b64JSON {
|
||||||
|
defer os.RemoveAll(output)
|
||||||
|
data, err := os.ReadFile(output)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
item.B64JSON = base64.StdEncoding.EncodeToString(data)
|
||||||
|
} else {
|
||||||
|
base := filepath.Base(output)
|
||||||
|
item.URL = baseURL + "/generated-videos/" + base
|
||||||
|
}
|
||||||
|
|
||||||
|
id := uuid.New().String()
|
||||||
|
created := int(time.Now().Unix())
|
||||||
|
resp := &schema.OpenAIResponse{
|
||||||
|
ID: id,
|
||||||
|
Created: created,
|
||||||
|
Data: []schema.Item{*item},
|
||||||
|
}
|
||||||
|
|
||||||
|
jsonResult, _ := json.Marshal(resp)
|
||||||
|
log.Debug().Msgf("Response: %s", jsonResult)
|
||||||
|
|
||||||
|
// Return the prediction in the response body
|
||||||
|
return c.JSON(resp)
|
||||||
|
}
|
||||||
|
}
|
|
@ -40,7 +40,7 @@ func TestAssistantEndpoints(t *testing.T) {
|
||||||
cl := &config.BackendConfigLoader{}
|
cl := &config.BackendConfigLoader{}
|
||||||
//configsDir := "/tmp/localai/configs"
|
//configsDir := "/tmp/localai/configs"
|
||||||
modelPath := "/tmp/localai/model"
|
modelPath := "/tmp/localai/model"
|
||||||
var ml = model.NewModelLoader(modelPath)
|
var ml = model.NewModelLoader(modelPath, false)
|
||||||
|
|
||||||
appConfig := &config.ApplicationConfig{
|
appConfig := &config.ApplicationConfig{
|
||||||
ConfigsDir: configsDir,
|
ConfigsDir: configsDir,
|
||||||
|
|
|
@ -108,7 +108,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a temporary file
|
// Create a temporary file
|
||||||
outputFile, err := os.CreateTemp(appConfig.ImageDir, "b64")
|
outputFile, err := os.CreateTemp(appConfig.GeneratedContentDir, "b64")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -184,7 +184,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
|
||||||
|
|
||||||
tempDir := ""
|
tempDir := ""
|
||||||
if !b64JSON {
|
if !b64JSON {
|
||||||
tempDir = appConfig.ImageDir
|
tempDir = filepath.Join(appConfig.GeneratedContentDir, "images")
|
||||||
}
|
}
|
||||||
// Create a temporary file
|
// Create a temporary file
|
||||||
outputFile, err := os.CreateTemp(tempDir, "b64")
|
outputFile, err := os.CreateTemp(tempDir, "b64")
|
||||||
|
@ -192,6 +192,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
outputFile.Close()
|
outputFile.Close()
|
||||||
|
|
||||||
output := outputFile.Name() + ".png"
|
output := outputFile.Name() + ".png"
|
||||||
// Rename the temporary file
|
// Rename the temporary file
|
||||||
err = os.Rename(outputFile.Name(), output)
|
err = os.Rename(outputFile.Name(), output)
|
||||||
|
|
|
@ -29,9 +29,9 @@ func Explorer(db *explorer.Database) *fiber.App {
|
||||||
httpFS := http.FS(embedDirStatic)
|
httpFS := http.FS(embedDirStatic)
|
||||||
|
|
||||||
app.Use(favicon.New(favicon.Config{
|
app.Use(favicon.New(favicon.Config{
|
||||||
URL: "/favicon.ico",
|
URL: "/favicon.svg",
|
||||||
FileSystem: httpFS,
|
FileSystem: httpFS,
|
||||||
File: "static/favicon.ico",
|
File: "static/favicon.svg",
|
||||||
}))
|
}))
|
||||||
|
|
||||||
app.Use("/static", filesystem.New(filesystem.Config{
|
app.Use("/static", filesystem.New(filesystem.Config{
|
||||||
|
|
|
@ -203,18 +203,10 @@ func mergeOpenAIRequestAndBackendConfig(config *config.BackendConfig, input *sch
|
||||||
config.Diffusers.ClipSkip = input.ClipSkip
|
config.Diffusers.ClipSkip = input.ClipSkip
|
||||||
}
|
}
|
||||||
|
|
||||||
if input.ModelBaseName != "" {
|
|
||||||
config.AutoGPTQ.ModelBaseName = input.ModelBaseName
|
|
||||||
}
|
|
||||||
|
|
||||||
if input.NegativePromptScale != 0 {
|
if input.NegativePromptScale != 0 {
|
||||||
config.NegativePromptScale = input.NegativePromptScale
|
config.NegativePromptScale = input.NegativePromptScale
|
||||||
}
|
}
|
||||||
|
|
||||||
if input.UseFastTokenizer {
|
|
||||||
config.UseFastTokenizer = input.UseFastTokenizer
|
|
||||||
}
|
|
||||||
|
|
||||||
if input.NegativePrompt != "" {
|
if input.NegativePrompt != "" {
|
||||||
config.NegativePrompt = input.NegativePrompt
|
config.NegativePrompt = input.NegativePrompt
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,16 +50,20 @@ func RegisterLocalAIRoutes(router *fiber.App,
|
||||||
router.Post("/v1/vad", vadChain...)
|
router.Post("/v1/vad", vadChain...)
|
||||||
|
|
||||||
// Stores
|
// Stores
|
||||||
sl := model.NewModelLoader("")
|
router.Post("/stores/set", localai.StoresSetEndpoint(ml, appConfig))
|
||||||
router.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
|
router.Post("/stores/delete", localai.StoresDeleteEndpoint(ml, appConfig))
|
||||||
router.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
|
router.Post("/stores/get", localai.StoresGetEndpoint(ml, appConfig))
|
||||||
router.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
|
router.Post("/stores/find", localai.StoresFindEndpoint(ml, appConfig))
|
||||||
router.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))
|
|
||||||
|
|
||||||
if !appConfig.DisableMetrics {
|
if !appConfig.DisableMetrics {
|
||||||
router.Get("/metrics", localai.LocalAIMetricsEndpoint())
|
router.Get("/metrics", localai.LocalAIMetricsEndpoint())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
router.Post("/video",
|
||||||
|
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_VIDEO)),
|
||||||
|
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.VideoRequest) }),
|
||||||
|
localai.VideoEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
// Backend Statistics Module
|
// Backend Statistics Module
|
||||||
// TODO: Should these use standard middlewares? Refactor later, they are extremely simple.
|
// TODO: Should these use standard middlewares? Refactor later, they are extremely simple.
|
||||||
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
|
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue