feat(llama.cpp): add distributed llama.cpp inferencing (#2324)

* feat(llama.cpp): support distributed llama.cpp

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat: let tweak how chat messages are merged together

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* refactor

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Makefile: register to ALL_GRPC_BACKENDS

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* refactoring, allow disable auto-detection of backends

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* minor fixups

Signed-off-by: mudler <mudler@localai.io>

* feat: add cmd to start rpc-server from llama.cpp

Signed-off-by: mudler <mudler@localai.io>

* ci: add ccache

Signed-off-by: mudler <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: mudler <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2024-05-15 01:17:02 +02:00 committed by GitHub
parent 29909666c3
commit c89271b2e4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 222 additions and 82 deletions

View file

@ -0,0 +1,37 @@
package cli
import (
"os"
"syscall"
"github.com/go-skynet/LocalAI/pkg/assets"
"github.com/rs/zerolog/log"
)
type LLAMACPPWorkerCMD struct {
Args []string `arg:"" optional:"" name:"models" help:"Worker arguments: host port"`
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
}
func (r *LLAMACPPWorkerCMD) Run(ctx *Context) error {
// Extract files from the embedded FS
err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath)
log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath)
if err != nil {
log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
}
return syscall.Exec(
assets.ResolvePath(
r.BackendAssetsPath,
"util",
"llama-cpp-rpc-server",
),
append([]string{
assets.ResolvePath(
r.BackendAssetsPath,
"util",
"llama-cpp-rpc-server",
)}, r.Args...),
os.Environ())
}