feat(llama.cpp): add distributed llama.cpp inferencing (#2324)

* feat(llama.cpp): support distributed llama.cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat: let tweak how chat messages are merged together Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactor Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Makefile: register to ALL_GRPC_BACKENDS Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring, allow disable auto-detection of backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * minor fixups Signed-off-by: mudler <mudler@localai.io> * feat: add cmd to start rpc-server from llama.cpp Signed-off-by: mudler <mudler@localai.io> * ci: add ccache Signed-off-by: mudler <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: mudler <mudler@localai.io>
2025-05-23 12:05:00 +00:00 · 2024-05-15 01:17:02 +02:00 · 2024-05-15 01:17:02 +02:00 · c89271b2e4
commit c89271b2e4
parent 29909666c3
11 changed files with 222 additions and 82 deletions
--- a/core/cli/llamacppworker.go
+++ b/core/cli/llamacppworker.go
@ -0,0 +1,37 @@
+package cli
+
+import (
+	"os"
+	"syscall"
+
+	"github.com/go-skynet/LocalAI/pkg/assets"
+	"github.com/rs/zerolog/log"
+)
+
+type LLAMACPPWorkerCMD struct {
+	Args              []string `arg:"" optional:"" name:"models" help:"Worker arguments: host port"`
+	BackendAssetsPath string   `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+}
+
+func (r *LLAMACPPWorkerCMD) Run(ctx *Context) error {
+	// Extract files from the embedded FS
+	err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath)
+	log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath)
+	if err != nil {
+		log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
+	}
+
+	return syscall.Exec(
+		assets.ResolvePath(
+			r.BackendAssetsPath,
+			"util",
+			"llama-cpp-rpc-server",
+		),
+		append([]string{
+			assets.ResolvePath(
+				r.BackendAssetsPath,
+				"util",
+				"llama-cpp-rpc-server",
+			)}, r.Args...),
+		os.Environ())
+}