Spaces:

yashasgupta
/

rag_system

Sleeping

App Files Files Community

yashasgupta commited on Jul 20, 2024

Commit

2a18488

verified ·

1 Parent(s): c36d1bc

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -46

app.py CHANGED Viewed

@@ -1,3 +1,102 @@
 import streamlit as st
 from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
 from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
@@ -10,14 +109,13 @@ nltk.download("punkt")
 st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper')
 st.header("AI Chatbot :robot_face:")
-os.environ["GOOGLE_API_KEY"] = os.getenv("k4")
-# Creating a template
 chat_template = ChatPromptTemplate.from_messages([
-    # System Message establishes bot's role and general behavior guidelines
     SystemMessage(content="""You are a Helpful AI Bot.
     You take the context and question from user. Your answer should be based on the specific context."""),
-    # Human Message Prompt Template
     HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
     Context:
     {context}
@@ -28,19 +126,18 @@ chat_template = ChatPromptTemplate.from_messages([
     Answer: """)
 ])
-#user's question.
-#how many results we want to print.
 from langchain_google_genai import ChatGoogleGenerativeAI
 chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest")
 from langchain_core.output_parsers import StrOutputParser
 output_parser = StrOutputParser()
 chain = chat_template | chat_model | output_parser
 from langchain_community.document_loaders import PDFMinerLoader
 from langchain_text_splitters import NLTKTextSplitter
 from langchain_google_genai import GoogleGenerativeAIEmbeddings
@@ -55,26 +152,25 @@ def extract_text_from_pdf(pdf_file):
         text += page.get_text()
     return text
-uploaded_file = st.file_uploader("Choose a pdf file",type = "pdf")
 if uploaded_file is not None:
     pdf_file = io.BytesIO(uploaded_file.read())
     text = extract_text_from_pdf(pdf_file)
-    #pdf_loader = PDFMinerLoader(pdf_file)
-    #dat_nik = pdf_loader.load()
-    text_splitter = NLTKTextSplitter(chunk_size = 500,chunk_overlap = 100)
     chunks = text_splitter.split_documents([text])
     embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
-    db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db_1")
     db.persist()
-    db_connection = Chroma(persist_directory="./chroma_db_1", embedding_function=embedding_model)
     retriever = db_connection.as_retriever(search_kwargs={"k": 5})
     def format_docs(docs):
@@ -91,31 +187,8 @@ if uploaded_file is not None:
     if st.button("Submit"):
         st.subheader(":green[Query:]")
         st.subheader(user_input)
-        response = rag_chain.invoke(user_input)
-        st.subheader(":green[Response:-]")
         st.write(response)
-# dat = PDFMinerLoader("2404.07143.pdf")
-# dat_nik =dat.load()
-# # Split the document into chunks
-# text_splitter = NLTKTextSplitter(chunk_size=500, chunk_overlap=100)
-# chunks = text_splitter.split_documents(dat_nik)
-# Creating Chunks Embedding
-# We are just loading OpenAIEmbeddings
-# vectors = embeddings.embed_documents(chunks)
-# Store the chunks in vector store
-# Creating a New Chroma Database
- #takes user's question.

+# import streamlit as st
+# from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
+# from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
+# import os
+# import nltk
+# import io
+# import fitz
+# nltk.download("punkt")
+# st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper')
+# st.header("AI Chatbot :robot_face:")
+# os.environ["GOOGLE_API_KEY"] = os.getenv("k4")
+# # Creating a template
+# chat_template = ChatPromptTemplate.from_messages([
+#     # System Message establishes bot's role and general behavior guidelines
+#     SystemMessage(content="""You are a Helpful AI Bot.
+#     You take the context and question from user. Your answer should be based on the specific context."""),
+#     # Human Message Prompt Template
+#     HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
+#     Context:
+#     {context}
+#     Question:
+#     {question}
+#     Answer: """)
+# ])
+# #user's question.
+# #how many results we want to print.
+# from langchain_google_genai import ChatGoogleGenerativeAI
+# chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest")
+# from langchain_core.output_parsers import StrOutputParser
+# output_parser = StrOutputParser()
+# chain = chat_template | chat_model | output_parser
+# from langchain_community.document_loaders import PDFMinerLoader
+# from langchain_text_splitters import NLTKTextSplitter
+# from langchain_google_genai import GoogleGenerativeAIEmbeddings
+# from langchain_community.vectorstores import Chroma
+# from langchain_core.runnables import RunnablePassthrough
+# def extract_text_from_pdf(pdf_file):
+#     document = fitz.open(stream=pdf_file, filetype="pdf")
+#     text = ""
+#     for page_num in range(len(document)):
+#         page = document.load_page(page_num)
+#         text += page.get_text()
+#     return text
+# uploaded_file = st.file_uploader("Choose a pdf file",type = "pdf")
+# if uploaded_file is not None:
+#     pdf_file = io.BytesIO(uploaded_file.read())
+#     text = extract_text_from_pdf(pdf_file)
+#     #pdf_loader = PDFMinerLoader(pdf_file)
+#     #dat_nik = pdf_loader.load()
+#     text_splitter = NLTKTextSplitter(chunk_size = 500,chunk_overlap = 100)
+#     chunks = text_splitter.split_documents([text])
+#     embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
+#     db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db_1")
+#     db.persist()
+#     db_connection = Chroma(persist_directory="./chroma_db_1", embedding_function=embedding_model)
+#     retriever = db_connection.as_retriever(search_kwargs={"k": 5})
+#     def format_docs(docs):
+#         return "\n\n".join(doc.page_content for doc in docs)
+#     rag_chain = (
+#         {"context": retriever | format_docs, "question": RunnablePassthrough()}
+#         | chat_template
+#         | chat_model
+#         | output_parser
+#     )
+#     user_input = st.text_area("Ask Questions to AI")
+#     if st.button("Submit"):
+#         st.subheader(":green[Query:]")
+#         st.subheader(user_input)
+#         response = rag_chain.invoke(user_input)
+#         st.subheader(":green[Response:-]")
+#         st.write(response)
+##################################################### chatgpt code model #############################################
 import streamlit as st
 from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
 from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
 st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper')
 st.header("AI Chatbot :robot_face:")
+# Set up environment variables
+os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
+# Creating a template
 chat_template = ChatPromptTemplate.from_messages([
     SystemMessage(content="""You are a Helpful AI Bot.
     You take the context and question from user. Your answer should be based on the specific context."""),
     HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
     Context:
     {context}
     Answer: """)
 ])
+# Initialize chat model
 from langchain_google_genai import ChatGoogleGenerativeAI
 chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest")
+# Initialize output parser
 from langchain_core.output_parsers import StrOutputParser
 output_parser = StrOutputParser()
+# Initialize the chain
 chain = chat_template | chat_model | output_parser
+# Initialize document loaders and splitters
 from langchain_community.document_loaders import PDFMinerLoader
 from langchain_text_splitters import NLTKTextSplitter
 from langchain_google_genai import GoogleGenerativeAIEmbeddings
         text += page.get_text()
     return text
+# Streamlit file uploader
+uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
 if uploaded_file is not None:
+    # Extract text from the uploaded PDF
     pdf_file = io.BytesIO(uploaded_file.read())
     text = extract_text_from_pdf(pdf_file)
+    # Split the document into chunks
+    text_splitter = NLTKTextSplitter(chunk_size=500, chunk_overlap=100)
     chunks = text_splitter.split_documents([text])
+    # Initialize embeddings and vectorstore
     embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
+    db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db")
     db.persist()
+    db_connection = Chroma(persist_directory="./chroma_db", embedding_function=embedding_model)
     retriever = db_connection.as_retriever(search_kwargs={"k": 5})
     def format_docs(docs):
     if st.button("Submit"):
         st.subheader(":green[Query:]")
         st.subheader(user_input)
+        response = rag_chain.invoke({"question": user_input})
+        st.subheader(":green[Response:]")
         st.write(response)
+else:
+    st.write("Please upload a PDF file to get started.")