From a9559fa8ce802455ec3b8dfd3d21e5aab1194b4c Mon Sep 17 00:00:00 2001
From: mudler <mudler@localai.io>
Date: Sun, 18 Jun 2023 00:16:08 +0200
Subject: [PATCH] Bump llama.cpp to support full CUDA offload

---
 Dockerfile | 9 +++------
 Makefile   | 2 +-
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index b379a5f3..854186b7 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -41,10 +41,6 @@ ENV NVIDIA_VISIBLE_DEVICES=all
 
 WORKDIR /build
 
-COPY Makefile .
-RUN make get-sources
-COPY go.mod .
-RUN make prepare
 COPY . .
 RUN make build
 
@@ -62,12 +58,13 @@ RUN if [ "${FFMPEG}" = "true" ]; then \
 
 WORKDIR /build
 
+COPY . .
+RUN make prepare-sources
 COPY --from=builder /build/local-ai ./
-COPY entrypoint.sh .
 
 # Define the health check command
 HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
   CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
 
 EXPOSE 8080
-ENTRYPOINT [ "/build/entrypoint.sh" ]
+ENTRYPOINT [ "/build/entrypoint.sh" ]
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 75cfaca1..ec1760a2 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 
-GOLLAMA_VERSION?=5f1620443a59c5531b5a15a16cd68f600a8437e9
+GOLLAMA_VERSION?=7ad833b67070fd3ec46d838f5e38d21111013f98
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=b004c53a7bba182cd4483d95ba9e1f68d8e56da3
 GOGGMLTRANSFORMERS_VERSION?=01b8436f44294d0e1267430f9eda4460458cec54