yashasgupta commited on
Commit
2a18488
·
verified ·
1 Parent(s): c36d1bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -46
app.py CHANGED
@@ -1,3 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
3
  from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
@@ -10,14 +109,13 @@ nltk.download("punkt")
10
  st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper')
11
  st.header("AI Chatbot :robot_face:")
12
 
13
- os.environ["GOOGLE_API_KEY"] = os.getenv("k4")
14
- # Creating a template
15
 
 
16
  chat_template = ChatPromptTemplate.from_messages([
17
- # System Message establishes bot's role and general behavior guidelines
18
  SystemMessage(content="""You are a Helpful AI Bot.
19
  You take the context and question from user. Your answer should be based on the specific context."""),
20
- # Human Message Prompt Template
21
  HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
22
  Context:
23
  {context}
@@ -28,19 +126,18 @@ chat_template = ChatPromptTemplate.from_messages([
28
  Answer: """)
29
  ])
30
 
31
- #user's question.
32
- #how many results we want to print.
33
-
34
  from langchain_google_genai import ChatGoogleGenerativeAI
35
-
36
  chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest")
37
 
 
38
  from langchain_core.output_parsers import StrOutputParser
39
-
40
  output_parser = StrOutputParser()
41
 
 
42
  chain = chat_template | chat_model | output_parser
43
 
 
44
  from langchain_community.document_loaders import PDFMinerLoader
45
  from langchain_text_splitters import NLTKTextSplitter
46
  from langchain_google_genai import GoogleGenerativeAIEmbeddings
@@ -55,26 +152,25 @@ def extract_text_from_pdf(pdf_file):
55
  text += page.get_text()
56
  return text
57
 
58
-
59
- uploaded_file = st.file_uploader("Choose a pdf file",type = "pdf")
60
 
61
  if uploaded_file is not None:
62
-
63
  pdf_file = io.BytesIO(uploaded_file.read())
64
  text = extract_text_from_pdf(pdf_file)
65
- #pdf_loader = PDFMinerLoader(pdf_file)
66
- #dat_nik = pdf_loader.load()
67
- text_splitter = NLTKTextSplitter(chunk_size = 500,chunk_overlap = 100)
68
  chunks = text_splitter.split_documents([text])
69
 
 
70
  embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
71
-
72
- db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db_1")
73
 
74
  db.persist()
75
 
76
- db_connection = Chroma(persist_directory="./chroma_db_1", embedding_function=embedding_model)
77
-
78
  retriever = db_connection.as_retriever(search_kwargs={"k": 5})
79
 
80
  def format_docs(docs):
@@ -91,31 +187,8 @@ if uploaded_file is not None:
91
  if st.button("Submit"):
92
  st.subheader(":green[Query:]")
93
  st.subheader(user_input)
94
- response = rag_chain.invoke(user_input)
95
- st.subheader(":green[Response:-]")
96
  st.write(response)
97
-
98
- # dat = PDFMinerLoader("2404.07143.pdf")
99
- # dat_nik =dat.load()
100
- # # Split the document into chunks
101
-
102
-
103
- # text_splitter = NLTKTextSplitter(chunk_size=500, chunk_overlap=100)
104
-
105
- # chunks = text_splitter.split_documents(dat_nik)
106
- # Creating Chunks Embedding
107
- # We are just loading OpenAIEmbeddings
108
-
109
-
110
-
111
-
112
- # vectors = embeddings.embed_documents(chunks)
113
- # Store the chunks in vector store
114
-
115
-
116
- # Creating a New Chroma Database
117
-
118
-
119
-
120
- #takes user's question.
121
-
 
1
+ # import streamlit as st
2
+ # from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
3
+ # from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
4
+ # import os
5
+ # import nltk
6
+ # import io
7
+ # import fitz
8
+ # nltk.download("punkt")
9
+
10
+ # st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper')
11
+ # st.header("AI Chatbot :robot_face:")
12
+
13
+ # os.environ["GOOGLE_API_KEY"] = os.getenv("k4")
14
+ # # Creating a template
15
+
16
+ # chat_template = ChatPromptTemplate.from_messages([
17
+ # # System Message establishes bot's role and general behavior guidelines
18
+ # SystemMessage(content="""You are a Helpful AI Bot.
19
+ # You take the context and question from user. Your answer should be based on the specific context."""),
20
+ # # Human Message Prompt Template
21
+ # HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
22
+ # Context:
23
+ # {context}
24
+
25
+ # Question:
26
+ # {question}
27
+
28
+ # Answer: """)
29
+ # ])
30
+
31
+ # #user's question.
32
+ # #how many results we want to print.
33
+
34
+ # from langchain_google_genai import ChatGoogleGenerativeAI
35
+
36
+ # chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest")
37
+
38
+ # from langchain_core.output_parsers import StrOutputParser
39
+
40
+ # output_parser = StrOutputParser()
41
+
42
+ # chain = chat_template | chat_model | output_parser
43
+
44
+ # from langchain_community.document_loaders import PDFMinerLoader
45
+ # from langchain_text_splitters import NLTKTextSplitter
46
+ # from langchain_google_genai import GoogleGenerativeAIEmbeddings
47
+ # from langchain_community.vectorstores import Chroma
48
+ # from langchain_core.runnables import RunnablePassthrough
49
+
50
+ # def extract_text_from_pdf(pdf_file):
51
+ # document = fitz.open(stream=pdf_file, filetype="pdf")
52
+ # text = ""
53
+ # for page_num in range(len(document)):
54
+ # page = document.load_page(page_num)
55
+ # text += page.get_text()
56
+ # return text
57
+
58
+
59
+ # uploaded_file = st.file_uploader("Choose a pdf file",type = "pdf")
60
+
61
+ # if uploaded_file is not None:
62
+
63
+ # pdf_file = io.BytesIO(uploaded_file.read())
64
+ # text = extract_text_from_pdf(pdf_file)
65
+ # #pdf_loader = PDFMinerLoader(pdf_file)
66
+ # #dat_nik = pdf_loader.load()
67
+ # text_splitter = NLTKTextSplitter(chunk_size = 500,chunk_overlap = 100)
68
+ # chunks = text_splitter.split_documents([text])
69
+
70
+ # embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
71
+
72
+ # db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db_1")
73
+
74
+ # db.persist()
75
+
76
+ # db_connection = Chroma(persist_directory="./chroma_db_1", embedding_function=embedding_model)
77
+
78
+ # retriever = db_connection.as_retriever(search_kwargs={"k": 5})
79
+
80
+ # def format_docs(docs):
81
+ # return "\n\n".join(doc.page_content for doc in docs)
82
+
83
+ # rag_chain = (
84
+ # {"context": retriever | format_docs, "question": RunnablePassthrough()}
85
+ # | chat_template
86
+ # | chat_model
87
+ # | output_parser
88
+ # )
89
+
90
+ # user_input = st.text_area("Ask Questions to AI")
91
+ # if st.button("Submit"):
92
+ # st.subheader(":green[Query:]")
93
+ # st.subheader(user_input)
94
+ # response = rag_chain.invoke(user_input)
95
+ # st.subheader(":green[Response:-]")
96
+ # st.write(response)
97
+
98
+ ##################################################### chatgpt code model #############################################
99
+
100
  import streamlit as st
101
  from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
102
  from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
 
109
  st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper')
110
  st.header("AI Chatbot :robot_face:")
111
 
112
+ # Set up environment variables
113
+ os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
114
 
115
+ # Creating a template
116
  chat_template = ChatPromptTemplate.from_messages([
 
117
  SystemMessage(content="""You are a Helpful AI Bot.
118
  You take the context and question from user. Your answer should be based on the specific context."""),
 
119
  HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
120
  Context:
121
  {context}
 
126
  Answer: """)
127
  ])
128
 
129
+ # Initialize chat model
 
 
130
  from langchain_google_genai import ChatGoogleGenerativeAI
 
131
  chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest")
132
 
133
+ # Initialize output parser
134
  from langchain_core.output_parsers import StrOutputParser
 
135
  output_parser = StrOutputParser()
136
 
137
+ # Initialize the chain
138
  chain = chat_template | chat_model | output_parser
139
 
140
+ # Initialize document loaders and splitters
141
  from langchain_community.document_loaders import PDFMinerLoader
142
  from langchain_text_splitters import NLTKTextSplitter
143
  from langchain_google_genai import GoogleGenerativeAIEmbeddings
 
152
  text += page.get_text()
153
  return text
154
 
155
+ # Streamlit file uploader
156
+ uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
157
 
158
  if uploaded_file is not None:
159
+ # Extract text from the uploaded PDF
160
  pdf_file = io.BytesIO(uploaded_file.read())
161
  text = extract_text_from_pdf(pdf_file)
162
+
163
+ # Split the document into chunks
164
+ text_splitter = NLTKTextSplitter(chunk_size=500, chunk_overlap=100)
165
  chunks = text_splitter.split_documents([text])
166
 
167
+ # Initialize embeddings and vectorstore
168
  embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
169
+ db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db")
 
170
 
171
  db.persist()
172
 
173
+ db_connection = Chroma(persist_directory="./chroma_db", embedding_function=embedding_model)
 
174
  retriever = db_connection.as_retriever(search_kwargs={"k": 5})
175
 
176
  def format_docs(docs):
 
187
  if st.button("Submit"):
188
  st.subheader(":green[Query:]")
189
  st.subheader(user_input)
190
+ response = rag_chain.invoke({"question": user_input})
191
+ st.subheader(":green[Response:]")
192
  st.write(response)
193
+ else:
194
+ st.write("Please upload a PDF file to get started.")