mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-28 22:44:59 +00:00
feat: add bert.cpp embeddings (#222)
This commit is contained in:
parent
e6db14e2f1
commit
f8ee20991c
14 changed files with 104 additions and 53 deletions
|
@ -12,11 +12,7 @@ Summary of the steps:
|
|||
|
||||
## Requirements
|
||||
|
||||
For this in order to work, you will need LocalAI and a model compatible with the `llama.cpp` backend. This is will not work with gpt4all, however you can mix models (use a llama.cpp one to build the index database, and gpt4all to query it).
|
||||
|
||||
The example uses `WizardLM` for both embeddings and Q&A. Edit the config files in `models/` accordingly to specify the model you use (change `HERE` in the configuration files).
|
||||
|
||||
You will also need a training data set. Copy that over `data`.
|
||||
You will need a training data set. Copy that over `data`.
|
||||
|
||||
## Setup
|
||||
|
||||
|
@ -28,7 +24,8 @@ git clone https://github.com/go-skynet/LocalAI
|
|||
|
||||
cd LocalAI/examples/query_data
|
||||
|
||||
# Copy your models, edit config files accordingly
|
||||
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up -d --build
|
||||
|
|
|
@ -1,18 +1,6 @@
|
|||
name: text-embedding-ada-002
|
||||
parameters:
|
||||
model: HERE
|
||||
top_k: 80
|
||||
temperature: 0.2
|
||||
top_p: 0.7
|
||||
context_size: 1024
|
||||
model: bert
|
||||
threads: 14
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "GPT:"
|
||||
roles:
|
||||
user: " "
|
||||
system: " "
|
||||
backend: bert-embeddings
|
||||
embeddings: true
|
||||
template:
|
||||
completion: completion
|
||||
chat: gpt4all
|
||||
|
|
|
@ -1,12 +1,11 @@
|
|||
name: gpt-3.5-turbo
|
||||
parameters:
|
||||
model: HERE
|
||||
model: ggml-gpt4all-j
|
||||
top_k: 80
|
||||
temperature: 0.2
|
||||
top_p: 0.7
|
||||
context_size: 1024
|
||||
threads: 14
|
||||
embeddings: true
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "GPT:"
|
||||
|
@ -15,4 +14,4 @@ roles:
|
|||
system: " "
|
||||
template:
|
||||
completion: completion
|
||||
chat: wizardlm
|
||||
chat: gpt4all
|
|
@ -1,3 +0,0 @@
|
|||
{{.Input}}
|
||||
|
||||
### Response:
|
|
@ -13,7 +13,7 @@ base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
|||
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
|
||||
|
||||
# Configure prompt parameters and initialise helper
|
||||
max_input_size = 1024
|
||||
max_input_size = 500
|
||||
num_output = 256
|
||||
max_chunk_overlap = 20
|
||||
|
||||
|
|
|
@ -13,15 +13,15 @@ base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
|||
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
|
||||
|
||||
# Configure prompt parameters and initialise helper
|
||||
max_input_size = 512
|
||||
num_output = 512
|
||||
max_input_size = 400
|
||||
num_output = 400
|
||||
max_chunk_overlap = 30
|
||||
|
||||
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
|
||||
|
||||
# Load documents from the 'data' directory
|
||||
documents = SimpleDirectoryReader('data').load_data()
|
||||
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 512)
|
||||
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 400)
|
||||
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
|
||||
index.storage_context.persist(persist_dir="./storage")
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue