chianlit example (#1238)

2025-05-20 10:35:01 +00:00 · 2023-11-02 17:56:46 -04:00 · 2023-11-02 17:56:46 -04:00 · 23c7fbfe6b
commit 23c7fbfe6b
parent 035fea676a
5 changed files with 146 additions and 0 deletions
--- a/examples/chainlit/Dockerfile
+++ b/examples/chainlit/Dockerfile
@ -0,0 +1,16 @@
 # Use an official Python runtime as a parent image
 FROM harbor.home.sfxworks.net/docker/library/python:3.9-slim
 # Set the working directory in the container
 WORKDIR /app
 # Copy the current directory contents into the container at /app
 COPY requirements.txt /app
 # Install any needed packages specified in requirements.txt
 RUN pip install -r requirements.txt
 COPY . /app
 # Run app.py when the container launches
 CMD ["chainlit", "run", "-h", "--host", "0.0.0.0", "main.py" ]
--- a/examples/chainlit/README.md
+++ b/examples/chainlit/README.md
@ -0,0 +1,25 @@
 # LocalAI Demonstration with Embeddings and Chainlit
 This demonstration shows you how to use embeddings with existing data in `LocalAI`, and how to integrate it with Chainlit for an interactive querying experience. We are using the `llama_index` library to facilitate the embedding and querying processes, and `chainlit` to provide an interactive interface. The `Weaviate` client is used as the embedding source.
 ## Prerequisites
 Before proceeding, make sure you have the following installed:
 - Weaviate client
 - LocalAI and its dependencies
 - Chainlit and its dependencies
 ## Getting Started
 1. Clone this repository:
 2. Navigate to the project directory:
 3. Run the example: `chainlit run main.py`
 # Highlight on `llama_index` and `chainlit`
 `llama_index` is the key library that facilitates the process of embedding and querying data in LocalAI. It provides a seamless interface to integrate various components, such as `WeaviateVectorStore`, `LocalAI`, `ServiceContext`, and more, for a smooth querying experience.
 `chainlit` is used to provide an interactive interface for users to query the data and see the results in real-time. It integrates with llama_index to handle the querying process and display the results to the user.
 In this example, `llama_index` is used to set up the `VectorStoreIndex` and `QueryEngine`, and `chainlit` is used to handle the user interactions with `LocalAI` and display the results.
--- a/examples/chainlit/config.yaml
+++ b/examples/chainlit/config.yaml
@ -0,0 +1,16 @@
 localAI:
  temperature: 0
  modelName: gpt-3.5-turbo
  apiBase: http://local-ai.default
  apiKey: stub
  streaming: True
 weviate:
  url: http://weviate.local
  index: AIChroma
 query:
  mode: hybrid
  topK: 1
  alpha: 0.0
  chunkSize: 1024
 embedding:
  model: BAAI/bge-small-en-v1.5
--- a/examples/chainlit/main.py
+++ b/examples/chainlit/main.py
@ -0,0 +1,82 @@
 import os
 import weaviate
 from llama_index.storage.storage_context import StorageContext
 from llama_index.vector_stores import WeaviateVectorStore
 from llama_index.query_engine.retriever_query_engine import RetrieverQueryEngine
 from llama_index.callbacks.base import CallbackManager
 from llama_index import (
    LLMPredictor,
    ServiceContext,
    StorageContext,
    VectorStoreIndex,
 )
 import chainlit as cl
 from llama_index.llms import LocalAI
 from llama_index.embeddings import HuggingFaceEmbedding
 import yaml
 # Load the configuration file
 with open("config.yaml", "r") as ymlfile:
    cfg = yaml.safe_load(ymlfile)
 # Get the values from the configuration file or set the default values
 temperature = cfg['localAI'].get('temperature', 0)
 model_name = cfg['localAI'].get('modelName', "gpt-3.5-turbo")
 api_base = cfg['localAI'].get('apiBase', "http://local-ai.default")
 api_key = cfg['localAI'].get('apiKey', "stub")
 streaming = cfg['localAI'].get('streaming', True)
 weaviate_url = cfg['weviate'].get('url', "http://weviate.default")
 index_name = cfg['weviate'].get('index', "AIChroma")
 query_mode = cfg['query'].get('mode', "hybrid")
 topK = cfg['query'].get('topK', 1)
 alpha = cfg['query'].get('alpha', 0.0)
 embed_model_name = cfg['embedding'].get('model', "BAAI/bge-small-en-v1.5")
 chunk_size = cfg['query'].get('chunkSize', 1024)
 embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
 llm = LocalAI(temperature=temperature, model_name=model_name, api_base=api_base, api_key=api_key, streaming=streaming)
 llm.globally_use_chat_completions = True;
 client = weaviate.Client(weaviate_url)
 vector_store = WeaviateVectorStore(weaviate_client=client, index_name=index_name)
 storage_context = StorageContext.from_defaults(vector_store=vector_store)
@cl.on_chat_start
 async def factory():
    llm_predictor = LLMPredictor(
        llm=llm
    )
    service_context = ServiceContext.from_defaults(embed_model=embed_model, callback_manager=CallbackManager([cl.LlamaIndexCallbackHandler()]), llm_predictor=llm_predictor, chunk_size=chunk_size)
    index = VectorStoreIndex.from_vector_store(
        vector_store,
        storage_context=storage_context,
        service_context=service_context
    )
    query_engine = index.as_query_engine(vector_store_query_mode=query_mode, similarity_top_k=topK, alpha=alpha, streaming=True)
    cl.user_session.set("query_engine", query_engine)
@cl.on_message
 async def main(message: cl.Message):
    query_engine = cl.user_session.get("query_engine")
    response = await cl.make_async(query_engine.query)(message.content)
    response_message = cl.Message(content="")
    for token in response.response_gen:
        await response_message.stream_token(token=token)
    if response.response_txt:
        response_message.content = response.response_txt
    await response_message.send()
--- a/examples/chainlit/requirements.txt
+++ b/examples/chainlit/requirements.txt
@ -0,0 +1,7 @@
 llama_hub==0.0.41
 llama_index==0.8.55
 Requests==2.31.0
 weaviate_client==3.25.1
 transformers
 torch
 chainlit