File size: 3,252 Bytes
69ec05a
 
 
 
 
 
 
 
 
a0b734e
69ec05a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0b734e
 
69ec05a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from dotenv import load_dotenv  # Import dotenv to load environment variables
import os
import chainlit as cl
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.schema import Document
from langchain.embeddings import HuggingFaceEmbeddings
import json

# Load environment variables from .env file
load_dotenv()

# Get the OpenAI API key from the environment
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

if not OPENAI_API_KEY:
    raise ValueError("OPENAI_API_KEY is not set. Please add it to your .env file.")

# Global variables for vector store and QA chain
vector_store = None
qa_chain = None

# Step 1: Load and Process JSON Data
def load_json_file(file_path):
    with open(file_path, "r", encoding="utf-8") as file:
        data = json.load(file)
    return data

def setup_vector_store_from_json(json_data):
    # Create Document objects with URLs and content
    documents = [Document(page_content=item["content"], metadata={"url": item["url"]}) for item in json_data]

    # Create embeddings and store them in FAISS
    #embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
    vector_store = FAISS.from_documents(documents, embeddings)
    return vector_store

def setup_qa_chain(vector_store):
    retriever = vector_store.as_retriever(search_kwargs={"k": 3})
    llm = ChatOpenAI(model="gpt-3.5-turbo", openai_api_key=OPENAI_API_KEY)
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
    return qa_chain

# Initialize Chainlit: Preload data when the chat starts
@cl.on_chat_start
async def chat_start():
    global vector_store, qa_chain

    # Load and preprocess the JSON file
    json_data = load_json_file("football_players.json")
    vector_store = setup_vector_store_from_json(json_data)
    qa_chain = setup_qa_chain(vector_store)

    # Send a welcome message
    await cl.Message(content="Welcome to the RAG app! Ask me any question based on the knowledge base.").send()

# Process user queries
@cl.on_message
async def main(message: cl.Message):
    global qa_chain

    # Ensure the QA chain is ready
    if qa_chain is None:
        await cl.Message(content="The app is still initializing. Please wait a moment and try again.").send()
        return

    # Get query from the user and run the QA chain
    query = message.content
    response = qa_chain({"query": query})

    # Extract the answer and source documents
    answer = response["result"]
    sources = response["source_documents"]

    # Format and send the response
    await cl.Message(content=f"**Answer:** {answer}").send()
    if sources:
        await cl.Message(content="**Sources:**").send()
        for i, doc in enumerate(sources, 1):
            url = doc.metadata.get("url", "No URL available")
            await cl.Message(content=f"**Source {i}:** {doc.page_content}\n**URL:** {url}").send()