bmv2021 commited on
Commit
540424a
·
1 Parent(s): a0b734e

changing from chainlit to streamlit

Browse files
Files changed (1) hide show
  1. streamlit-rag-app.py +96 -0
streamlit-rag-app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import json
4
+ from dotenv import load_dotenv
5
+
6
+ from langchain.chains import RetrievalQA
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain.text_splitter import CharacterTextSplitter
9
+ from langchain.chat_models import ChatOpenAI
10
+ from langchain.schema import Document
11
+ from langchain.embeddings import HuggingFaceEmbeddings
12
+
13
+ # Load environment variables
14
+ load_dotenv()
15
+
16
+ # Get the OpenAI API key from the environment
17
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
18
+ if not OPENAI_API_KEY:
19
+ st.error("OPENAI_API_KEY is not set. Please add it to your .env file.")
20
+
21
+ # Initialize session state variables
22
+ if 'vector_store' not in st.session_state:
23
+ st.session_state.vector_store = None
24
+ if 'qa_chain' not in st.session_state:
25
+ st.session_state.qa_chain = None
26
+
27
+ def load_json_file(file_path):
28
+ """Load JSON data from a file."""
29
+ with open(file_path, "r", encoding="utf-8") as file:
30
+ data = json.load(file)
31
+ return data
32
+
33
+ def setup_vector_store_from_json(json_data):
34
+ """Create a vector store from JSON data."""
35
+ documents = [Document(page_content=item["content"], metadata={"url": item["url"]}) for item in json_data]
36
+
37
+ # Use HuggingFace embeddings
38
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
39
+
40
+ vector_store = FAISS.from_documents(documents, embeddings)
41
+ return vector_store
42
+
43
+ def setup_qa_chain(vector_store):
44
+ """Set up the QA chain with a retriever."""
45
+ retriever = vector_store.as_retriever(search_kwargs={"k": 3})
46
+ llm = ChatOpenAI(model="gpt-3.5-turbo", openai_api_key=OPENAI_API_KEY)
47
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
48
+ return qa_chain
49
+
50
+ def main():
51
+ # Set page title and header
52
+ st.set_page_config(page_title="Football Players RAG App", page_icon="⚽")
53
+ st.title("Football Players Knowledge Base 🏆")
54
+
55
+ # Sidebar for initialization
56
+ st.sidebar.header("Initialize Knowledge Base")
57
+ if st.sidebar.button("Load Data"):
58
+ try:
59
+ # Load and preprocess the JSON file
60
+ json_data = load_json_file("football_players.json")
61
+ st.session_state.vector_store = setup_vector_store_from_json(json_data)
62
+ st.session_state.qa_chain = setup_qa_chain(st.session_state.vector_store)
63
+ st.sidebar.success("Knowledge base loaded successfully!")
64
+ except Exception as e:
65
+ st.sidebar.error(f"Error loading data: {e}")
66
+
67
+ # Query input and processing
68
+ st.header("Ask a Question")
69
+ query = st.text_input("Enter your question about football players:")
70
+
71
+ if query:
72
+ # Check if vector store and QA chain are initialized
73
+ if st.session_state.qa_chain is None:
74
+ st.warning("Please load the knowledge base first using the sidebar.")
75
+ else:
76
+ # Run the query
77
+ try:
78
+ response = st.session_state.qa_chain({"query": query})
79
+
80
+ # Display answer
81
+ st.subheader("Answer")
82
+ st.write(response["result"])
83
+
84
+ # Display sources
85
+ st.subheader("Sources")
86
+ sources = response["source_documents"]
87
+ for i, doc in enumerate(sources, 1):
88
+ with st.expander(f"Source {i}"):
89
+ st.write(f"**Content:** {doc.page_content}")
90
+ st.write(f"**URL:** {doc.metadata.get('url', 'No URL available')}")
91
+
92
+ except Exception as e:
93
+ st.error(f"An error occurred: {e}")
94
+
95
+ if __name__ == "__main__":
96
+ main()