chore(stablediffusion-ncn): drop in favor of ggml implementation (#4652)

* chore(stablediffusion-ncn): drop in favor of ggml implementation

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(ci): drop stablediffusion build

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(tests): add

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(tests): try to fixup current tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Try to fix tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Tests improvements

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(tests): use quality to specify step

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(tests): switch to sd-1.5

also increase prep time for downloading models

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2025-01-22 19:34:16 +01:00 committed by GitHub
parent 10675ac28e
commit e15d29aba2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 123 additions and 302 deletions

View file

@ -7,7 +7,7 @@ services:
args: args:
- FFMPEG=true - FFMPEG=true
- IMAGE_TYPE=extras - IMAGE_TYPE=extras
- GO_TAGS=stablediffusion p2p tts - GO_TAGS=p2p tts
env_file: env_file:
- ../.env - ../.env
ports: ports:

6
.env
View file

@ -38,12 +38,12 @@
## Uncomment and set to true to enable rebuilding from source ## Uncomment and set to true to enable rebuilding from source
# REBUILD=true # REBUILD=true
## Enable go tags, available: stablediffusion, tts ## Enable go tags, available: p2p, tts
## stablediffusion: image generation with stablediffusion ## p2p: enable distributed inferencing
## tts: enables text-to-speech with go-piper ## tts: enables text-to-speech with go-piper
## (requires REBUILD=true) ## (requires REBUILD=true)
# #
# GO_TAGS=stablediffusion # GO_TAGS=p2p
## Path where to store generated images ## Path where to store generated images
# LOCALAI_IMAGE_PATH=/tmp/generated/images # LOCALAI_IMAGE_PATH=/tmp/generated/images

View file

@ -237,40 +237,7 @@ jobs:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180
limit-access-to-actor: true limit-access-to-actor: true
build-stablediffusion:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v5
with:
go-version: '1.21.x'
cache: false
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
- name: Build stablediffusion
run: |
export PATH=$PATH:$GOPATH/bin
make backend-assets/grpc/stablediffusion
mkdir -p release && cp backend-assets/grpc/stablediffusion release
env:
GO_TAGS: stablediffusion
- uses: actions/upload-artifact@v4
with:
name: stablediffusion
path: release/
- name: Release
uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*
build-macOS-x86_64: build-macOS-x86_64:
runs-on: macos-13 runs-on: macos-13

View file

@ -105,9 +105,7 @@ jobs:
# Pre-build piper before we start tests in order to have shared libraries in place # Pre-build piper before we start tests in order to have shared libraries in place
make sources/go-piper && \ make sources/go-piper && \
GO_TAGS="tts" make -C sources/go-piper piper.o && \ GO_TAGS="tts" make -C sources/go-piper piper.o && \
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \ sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
env: env:
CUDA_VERSION: 12-4 CUDA_VERSION: 12-4
- name: Cache grpc - name: Cache grpc
@ -129,7 +127,7 @@ jobs:
cd grpc && cd cmake/build && sudo make --jobs 5 install cd grpc && cd cmake/build && sudo make --jobs 5 install
- name: Test - name: Test
run: | run: |
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19 uses: mxschmitt/action-tmate@v3.19

2
.vscode/launch.json vendored
View file

@ -26,7 +26,7 @@
"LOCALAI_P2P": "true", "LOCALAI_P2P": "true",
"LOCALAI_FEDERATED": "true" "LOCALAI_FEDERATED": "true"
}, },
"buildFlags": ["-tags", "stablediffusion p2p tts", "-v"], "buildFlags": ["-tags", "p2p tts", "-v"],
"envFile": "${workspaceFolder}/.env", "envFile": "${workspaceFolder}/.env",
"cwd": "${workspaceRoot}" "cwd": "${workspaceRoot}"
} }

View file

@ -69,14 +69,10 @@ ENV PATH=/opt/rocm/bin:${PATH}
# OpenBLAS requirements and stable diffusion # OpenBLAS requirements and stable diffusion
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
libopenblas-dev \ libopenblas-dev && \
libopencv-dev && \
apt-get clean && \ apt-get clean && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# Set up OpenCV
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
WORKDIR /build WORKDIR /build
################################### ###################################
@ -251,7 +247,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
FROM requirements-drivers AS builder-base FROM requirements-drivers AS builder-base
ARG GO_TAGS="stablediffusion tts p2p" ARG GO_TAGS="tts p2p"
ARG GRPC_BACKENDS ARG GRPC_BACKENDS
ARG MAKEFLAGS ARG MAKEFLAGS
ARG LD_FLAGS="-s -w" ARG LD_FLAGS="-s -w"
@ -285,35 +281,12 @@ RUN <<EOT bash
fi fi
EOT EOT
###################################
###################################
# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
FROM builder-base AS builder-sd
# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
COPY Makefile .
COPY go.mod .
COPY go.sum .
COPY backend/backend.proto ./backend/backend.proto
COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
COPY pkg/grpc ./pkg/grpc
COPY pkg/stablediffusion ./pkg/stablediffusion
RUN git init
RUN make sources/go-stable-diffusion
RUN touch prepare-sources
# Actually build the backend
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
################################### ###################################
################################### ###################################
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry. # The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
# Adjustments to the build process should likely be made here. # Adjustments to the build process should likely be made here.
FROM builder-sd AS builder FROM builder-base AS builder
# Install the pre-built GRPC # Install the pre-built GRPC
COPY --from=grpc /opt/grpc /usr/local COPY --from=grpc /opt/grpc /usr/local
@ -353,8 +326,6 @@ ARG FFMPEG
COPY --from=grpc /opt/grpc /usr/local COPY --from=grpc /opt/grpc /usr/local
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
COPY .devcontainer-scripts /.devcontainer-scripts COPY .devcontainer-scripts /.devcontainer-scripts
# Add FFmpeg # Add FFmpeg
@ -427,9 +398,6 @@ COPY --from=builder /build/local-ai ./
# Copy shared libraries for piper # Copy shared libraries for piper
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/ COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
# do not let stablediffusion rebuild (requires an older version of absl)
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
# Change the shell to bash so we can use [[ tests below # Change the shell to bash so we can use [[ tests below
SHELL ["/bin/bash", "-c"] SHELL ["/bin/bash", "-c"]
# We try to strike a balance between individual layer size (as that affects total push time) and total image size # We try to strike a balance between individual layer size (as that affects total push time) and total image size

View file

@ -18,10 +18,6 @@ WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
PIPER_REPO?=https://github.com/mudler/go-piper PIPER_REPO?=https://github.com/mudler/go-piper
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0 PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
# stablediffusion version
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
# bark.cpp # bark.cpp
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
BARKCPP_VERSION?=v1.0.0 BARKCPP_VERSION?=v1.0.0
@ -179,11 +175,6 @@ ifeq ($(STATIC),true)
LD_FLAGS+=-linkmode external -extldflags -static LD_FLAGS+=-linkmode external -extldflags -static
endif endif
ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
# OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
endif
ifeq ($(findstring tts,$(GO_TAGS)),tts) ifeq ($(findstring tts,$(GO_TAGS)),tts)
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a # OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data # OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
@ -273,19 +264,6 @@ sources/go-piper:
sources/go-piper/libpiper_binding.a: sources/go-piper sources/go-piper/libpiper_binding.a: sources/go-piper
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o $(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
## stable diffusion (onnx)
sources/go-stable-diffusion:
mkdir -p sources/go-stable-diffusion
cd sources/go-stable-diffusion && \
git init && \
git remote add origin $(STABLEDIFFUSION_REPO) && \
git fetch origin && \
git checkout $(STABLEDIFFUSION_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
## stablediffusion (ggml) ## stablediffusion (ggml)
sources/stablediffusion-ggml.cpp: sources/stablediffusion-ggml.cpp:
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \ git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
@ -331,20 +309,18 @@ sources/whisper.cpp:
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion backend/cpp/llama/llama.cpp get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
replace: replace:
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper $(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
dropreplace: dropreplace:
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp $(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go $(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper $(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp $(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
prepare-sources: get-sources replace prepare-sources: get-sources replace
@ -355,7 +331,6 @@ rebuild: ## Rebuilds the project
$(GOCMD) clean -cache $(GOCMD) clean -cache
$(MAKE) -C sources/go-llama.cpp clean $(MAKE) -C sources/go-llama.cpp clean
$(MAKE) -C sources/whisper.cpp clean $(MAKE) -C sources/whisper.cpp clean
$(MAKE) -C sources/go-stable-diffusion clean
$(MAKE) -C sources/go-piper clean $(MAKE) -C sources/go-piper clean
$(MAKE) build $(MAKE) build
@ -470,7 +445,7 @@ prepare-test: grpcs
test: prepare test-models/testmodel.ggml grpcs test: prepare test-models/testmodel.ggml grpcs
@echo 'Running tests' @echo 'Running tests'
export GO_TAGS="tts stablediffusion debug" export GO_TAGS="tts debug"
$(MAKE) prepare-test $(MAKE) prepare-test
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS) $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
@ -816,13 +791,6 @@ ifneq ($(UPX),)
$(UPX) backend-assets/grpc/piper $(UPX) backend-assets/grpc/piper
endif endif
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/stablediffusion
endif
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1 backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \ CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero

View file

@ -1,56 +1,17 @@
name: stablediffusion name: stablediffusion
backend: stablediffusion backend: stablediffusion-ggml
cfg_scale: 4.5
options:
- sampler:euler
parameters: parameters:
model: stablediffusion_assets model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
step: 25
license: "BSD-3"
urls:
- https://github.com/EdVince/Stable-Diffusion-NCNN
- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
description: |
Stable Diffusion in NCNN with c++, supported txt2img and img2img
download_files: download_files:
- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param" - filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82" sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param" uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
- filename: "stablediffusion_assets/log_sigmas.bin"
sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
- filename: "stablediffusion_assets/vocab.txt"
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
usage: | usage: |
curl http://localhost:8080/v1/images/generations \ curl http://localhost:8080/v1/images/generations \

View file

@ -1,21 +0,0 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &Image{}); err != nil {
panic(err)
}
}

View file

@ -1,33 +0,0 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/stablediffusion"
)
type Image struct {
base.SingleThread
stablediffusion *stablediffusion.StableDiffusion
}
func (image *Image) Load(opts *pb.ModelOptions) error {
var err error
// Note: the Model here is a path to a directory containing the model files
image.stablediffusion, err = stablediffusion.New(opts.ModelFile)
return err
}
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
return image.stablediffusion.GenerateImage(
int(opts.Height),
int(opts.Width),
int(opts.Mode),
int(opts.Step),
int(opts.Seed),
opts.PositivePrompt,
opts.NegativePrompt,
opts.Dst)
}

View file

@ -515,7 +515,7 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
} }
} }
if (u & FLAG_IMAGE) == FLAG_IMAGE { if (u & FLAG_IMAGE) == FLAG_IMAGE {
imageBackends := []string{"diffusers", "stablediffusion"} imageBackends := []string{"diffusers", "stablediffusion", "stablediffusion-ggml"}
if !slices.Contains(imageBackends, c.Backend) { if !slices.Contains(imageBackends, c.Backend) {
return false return false
} }

View file

@ -48,5 +48,66 @@ var _ = Describe("Test cases for config related functions", func() {
// config should includes whisper-1 models's api.config // config should includes whisper-1 models's api.config
Expect(loadedModelNames).To(ContainElements("whisper-1")) Expect(loadedModelNames).To(ContainElements("whisper-1"))
}) })
It("Test new loadconfig", func() {
bcl := NewBackendConfigLoader(os.Getenv("MODELS_PATH"))
err := bcl.LoadBackendConfigsFromPath(os.Getenv("MODELS_PATH"))
Expect(err).To(BeNil())
configs := bcl.GetAllBackendConfigs()
loadedModelNames := []string{}
for _, v := range configs {
loadedModelNames = append(loadedModelNames, v.Name)
}
Expect(configs).ToNot(BeNil())
totalModels := len(loadedModelNames)
Expect(loadedModelNames).To(ContainElements("code-search-ada-code-001"))
// config should includes text-embedding-ada-002 models's api.config
Expect(loadedModelNames).To(ContainElements("text-embedding-ada-002"))
// config should includes rwkv_test models's api.config
Expect(loadedModelNames).To(ContainElements("rwkv_test"))
// config should includes whisper-1 models's api.config
Expect(loadedModelNames).To(ContainElements("whisper-1"))
// create a temp directory and store a temporary model
tmpdir, err := os.MkdirTemp("", "test")
Expect(err).ToNot(HaveOccurred())
defer os.RemoveAll(tmpdir)
// create a temporary model
model := `name: "test-model"
description: "test model"
options:
- foo
- bar
- baz
`
modelFile := tmpdir + "/test-model.yaml"
err = os.WriteFile(modelFile, []byte(model), 0644)
Expect(err).ToNot(HaveOccurred())
err = bcl.LoadBackendConfigsFromPath(tmpdir)
Expect(err).ToNot(HaveOccurred())
configs = bcl.GetAllBackendConfigs()
Expect(len(configs)).ToNot(Equal(totalModels))
loadedModelNames = []string{}
var testModel BackendConfig
for _, v := range configs {
loadedModelNames = append(loadedModelNames, v.Name)
if v.Name == "test-model" {
testModel = v
}
}
Expect(loadedModelNames).To(ContainElements("test-model"))
Expect(testModel.Description).To(Equal("test model"))
Expect(testModel.Options).To(ContainElements("foo", "bar", "baz"))
})
}) })
}) })

View file

@ -687,6 +687,10 @@ var _ = Describe("API test", func() {
Name: "model-gallery", Name: "model-gallery",
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/index.yaml", URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/index.yaml",
}, },
{
Name: "localai",
URL: "https://raw.githubusercontent.com/mudler/LocalAI/refs/heads/master/gallery/index.yaml",
},
} }
application, err := application.New( application, err := application.New(
@ -764,10 +768,8 @@ var _ = Describe("API test", func() {
} }
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
ID: "model-gallery@stablediffusion", ID: "localai@sd-1.5-ggml",
Overrides: map[string]interface{}{ Name: "stablediffusion",
"parameters": map[string]interface{}{"model": "stablediffusion_assets"},
},
}) })
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
@ -778,14 +780,14 @@ var _ = Describe("API test", func() {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid) response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
fmt.Println(response) fmt.Println(response)
return response["processed"].(bool) return response["processed"].(bool)
}, "360s", "10s").Should(Equal(true)) }, "1200s", "10s").Should(Equal(true))
resp, err := http.Post( resp, err := http.Post(
"http://127.0.0.1:9090/v1/images/generations", "http://127.0.0.1:9090/v1/images/generations",
"application/json", "application/json",
bytes.NewBuffer([]byte(`{ bytes.NewBuffer([]byte(`{
"prompt": "floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful|((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text", "prompt": "a lovely cat",
"mode": 2, "seed":9000, "step": 1, "seed":9000,
"size": "256x256", "n":2}`))) "size": "256x256", "n":2}`)))
// The response should contain an URL // The response should contain an URL
Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp)) Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
@ -794,6 +796,7 @@ var _ = Describe("API test", func() {
imgUrlResp := &schema.OpenAIResponse{} imgUrlResp := &schema.OpenAIResponse{}
err = json.Unmarshal(dat, imgUrlResp) err = json.Unmarshal(dat, imgUrlResp)
Expect(err).ToNot(HaveOccurred(), fmt.Sprint(dat))
Expect(imgUrlResp.Data).ToNot(Or(BeNil(), BeZero())) Expect(imgUrlResp.Data).ToNot(Or(BeNil(), BeZero()))
imgUrl := imgUrlResp.Data[0].URL imgUrl := imgUrlResp.Data[0].URL
Expect(imgUrl).To(ContainSubstring("http://127.0.0.1:9090/"), imgUrl) Expect(imgUrl).To(ContainSubstring("http://127.0.0.1:9090/"), imgUrl)

View file

@ -72,7 +72,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
} }
if m == "" { if m == "" {
m = model.StableDiffusionBackend m = "stablediffusion"
} }
log.Debug().Msgf("Loading model: %+v", m) log.Debug().Msgf("Loading model: %+v", m)
@ -129,9 +129,9 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
switch config.Backend { switch config.Backend {
case "stablediffusion": case "stablediffusion":
config.Backend = model.StableDiffusionBackend config.Backend = model.StableDiffusionGGMLBackend
case "": case "":
config.Backend = model.StableDiffusionBackend config.Backend = model.StableDiffusionGGMLBackend
} }
if !strings.Contains(input.Size, "x") { if !strings.Contains(input.Size, "x") {

View file

@ -4,6 +4,7 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"strconv"
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
"github.com/google/uuid" "github.com/google/uuid"
@ -296,6 +297,14 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
} }
} }
} }
// If a quality was defined as number, convert it to step
if input.Quality != "" {
q, err := strconv.Atoi(input.Quality)
if err == nil {
config.Step = q
}
}
} }
func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) { func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) {

View file

@ -191,8 +191,9 @@ type OpenAIRequest struct {
Stream bool `json:"stream"` Stream bool `json:"stream"`
// Image (not supported by OpenAI) // Image (not supported by OpenAI)
Mode int `json:"mode"` Mode int `json:"mode"`
Step int `json:"step"` Quality string `json:"quality"`
Step int `json:"step"`
// A grammar to constrain the LLM output // A grammar to constrain the LLM output
Grammar string `json:"grammar" yaml:"grammar"` Grammar string `json:"grammar" yaml:"grammar"`

View file

@ -29,6 +29,7 @@ var Aliases map[string]string = map[string]string{
"langchain-huggingface": LCHuggingFaceBackend, "langchain-huggingface": LCHuggingFaceBackend,
"transformers-musicgen": TransformersBackend, "transformers-musicgen": TransformersBackend,
"sentencetransformers": TransformersBackend, "sentencetransformers": TransformersBackend,
"stablediffusion": StableDiffusionGGMLBackend,
} }
var TypeAlias map[string]string = map[string]string{ var TypeAlias map[string]string = map[string]string{
@ -54,10 +55,10 @@ const (
LLamaCPPGRPC = "llama-cpp-grpc" LLamaCPPGRPC = "llama-cpp-grpc"
WhisperBackend = "whisper" WhisperBackend = "whisper"
StableDiffusionBackend = "stablediffusion" StableDiffusionGGMLBackend = "stablediffusion-ggml"
PiperBackend = "piper" PiperBackend = "piper"
LCHuggingFaceBackend = "huggingface" LCHuggingFaceBackend = "huggingface"
TransformersBackend = "transformers" TransformersBackend = "transformers"
LocalStoreBackend = "local-store" LocalStoreBackend = "local-store"

View file

@ -1,35 +0,0 @@
//go:build stablediffusion
// +build stablediffusion
package stablediffusion
import (
stableDiffusion "github.com/mudler/go-stable-diffusion"
)
func GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error {
if height > 512 || width > 512 {
return stableDiffusion.GenerateImageUpscaled(
height,
width,
step,
seed,
positive_prompt,
negative_prompt,
dst,
asset_dir,
)
}
return stableDiffusion.GenerateImage(
height,
width,
mode,
step,
seed,
positive_prompt,
negative_prompt,
dst,
"",
asset_dir,
)
}

View file

@ -1,10 +0,0 @@
//go:build !stablediffusion
// +build !stablediffusion
package stablediffusion
import "fmt"
func GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error {
return fmt.Errorf("This version of LocalAI was built without the stablediffusion tag")
}

View file

@ -1,20 +0,0 @@
package stablediffusion
import "os"
type StableDiffusion struct {
assetDir string
}
func New(assetDir string) (*StableDiffusion, error) {
if _, err := os.Stat(assetDir); err != nil {
return nil, err
}
return &StableDiffusion{
assetDir: assetDir,
}, nil
}
func (s *StableDiffusion) GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string) error {
return GenerateImage(height, width, mode, step, seed, positive_prompt, negative_prompt, dst, s.assetDir)
}

View file

@ -54,7 +54,7 @@ var _ = BeforeSuite(func() {
Eventually(func() error { Eventually(func() error {
_, err := client.ListModels(context.TODO()) _, err := client.ListModels(context.TODO())
return err return err
}, "20m").ShouldNot(HaveOccurred()) }, "50m").ShouldNot(HaveOccurred())
}) })
var _ = AfterSuite(func() { var _ = AfterSuite(func() {

View file

@ -123,8 +123,9 @@ var _ = Describe("E2E test", func() {
It("correctly", func() { It("correctly", func() {
resp, err := client.CreateImage(context.TODO(), resp, err := client.CreateImage(context.TODO(),
openai.ImageRequest{ openai.ImageRequest{
Prompt: "test", Prompt: "test",
Size: openai.CreateImageSize512x512, Quality: "1",
Size: openai.CreateImageSize256x256,
}, },
) )
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
@ -135,7 +136,8 @@ var _ = Describe("E2E test", func() {
resp, err := client.CreateImage(context.TODO(), resp, err := client.CreateImage(context.TODO(),
openai.ImageRequest{ openai.ImageRequest{
Prompt: "test", Prompt: "test",
Size: openai.CreateImageSize512x512, Size: openai.CreateImageSize256x256,
Quality: "1",
ResponseFormat: openai.CreateImageResponseFormatURL, ResponseFormat: openai.CreateImageResponseFormatURL,
}, },
) )
@ -147,7 +149,8 @@ var _ = Describe("E2E test", func() {
resp, err := client.CreateImage(context.TODO(), resp, err := client.CreateImage(context.TODO(),
openai.ImageRequest{ openai.ImageRequest{
Prompt: "test", Prompt: "test",
Size: openai.CreateImageSize512x512, Size: openai.CreateImageSize256x256,
Quality: "1",
ResponseFormat: openai.CreateImageResponseFormatB64JSON, ResponseFormat: openai.CreateImageResponseFormatB64JSON,
}, },
) )