bmv2021 commited on
Commit
69ec05a
·
1 Parent(s): 6c1e5e3

updating app.py; author: Enrico Collautti enricoll@bu.edu

Browse files
Files changed (1) hide show
  1. app1.1.py +83 -0
app1.1.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv # Import dotenv to load environment variables
2
+ import os
3
+ import chainlit as cl
4
+ from langchain.chains import RetrievalQA
5
+ from langchain_community.vectorstores import FAISS
6
+ from langchain_community.embeddings import OpenAIEmbeddings
7
+ from langchain.text_splitter import CharacterTextSplitter
8
+ from langchain.chat_models import ChatOpenAI
9
+ from langchain.schema import Document
10
+ import json
11
+
12
+ # Load environment variables from .env file
13
+ load_dotenv()
14
+
15
+ # Get the OpenAI API key from the environment
16
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
17
+
18
+ if not OPENAI_API_KEY:
19
+ raise ValueError("OPENAI_API_KEY is not set. Please add it to your .env file.")
20
+
21
+ # Global variables for vector store and QA chain
22
+ vector_store = None
23
+ qa_chain = None
24
+
25
+ # Step 1: Load and Process JSON Data
26
+ def load_json_file(file_path):
27
+ with open(file_path, "r", encoding="utf-8") as file:
28
+ data = json.load(file)
29
+ return data
30
+
31
+ def setup_vector_store_from_json(json_data):
32
+ # Create Document objects with URLs and content
33
+ documents = [Document(page_content=item["content"], metadata={"url": item["url"]}) for item in json_data]
34
+
35
+ # Create embeddings and store them in FAISS
36
+ embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
37
+ vector_store = FAISS.from_documents(documents, embeddings)
38
+ return vector_store
39
+
40
+ def setup_qa_chain(vector_store):
41
+ retriever = vector_store.as_retriever(search_kwargs={"k": 3})
42
+ llm = ChatOpenAI(model="gpt-3.5-turbo", openai_api_key=OPENAI_API_KEY)
43
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
44
+ return qa_chain
45
+
46
+ # Initialize Chainlit: Preload data when the chat starts
47
+ @cl.on_chat_start
48
+ async def chat_start():
49
+ global vector_store, qa_chain
50
+
51
+ # Load and preprocess the JSON file
52
+ json_data = load_json_file("football_players.json")
53
+ vector_store = setup_vector_store_from_json(json_data)
54
+ qa_chain = setup_qa_chain(vector_store)
55
+
56
+ # Send a welcome message
57
+ await cl.Message(content="Welcome to the RAG app! Ask me any question based on the knowledge base.").send()
58
+
59
+ # Process user queries
60
+ @cl.on_message
61
+ async def main(message: cl.Message):
62
+ global qa_chain
63
+
64
+ # Ensure the QA chain is ready
65
+ if qa_chain is None:
66
+ await cl.Message(content="The app is still initializing. Please wait a moment and try again.").send()
67
+ return
68
+
69
+ # Get query from the user and run the QA chain
70
+ query = message.content
71
+ response = qa_chain({"query": query})
72
+
73
+ # Extract the answer and source documents
74
+ answer = response["result"]
75
+ sources = response["source_documents"]
76
+
77
+ # Format and send the response
78
+ await cl.Message(content=f"**Answer:** {answer}").send()
79
+ if sources:
80
+ await cl.Message(content="**Sources:**").send()
81
+ for i, doc in enumerate(sources, 1):
82
+ url = doc.metadata.get("url", "No URL available")
83
+ await cl.Message(content=f"**Source {i}:** {doc.page_content}\n**URL:** {url}").send()