isayahc commited on
Commit
8ae7246
·
verified ·
1 Parent(s): ff2dd67

refactored for readability

Browse files
Files changed (1) hide show
  1. app.py +91 -55
app.py CHANGED
@@ -33,7 +33,10 @@ dotenv.load_dotenv()
33
 
34
 
35
 
36
- text_splitter = CharacterTextSplitter(chunk_size=350, chunk_overlap=0)
 
 
 
37
 
38
  # flan_ul2 = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.1, "max_new_tokens":300})
39
  # flan_ul2 = OpenAI()
@@ -48,7 +51,7 @@ flan_ul2 = chat = ChatOpenAI(
48
 
49
  global qa
50
 
51
- # embeddings = HuggingFaceHubEmbeddings()
52
  COHERE_API_KEY = os.getenv("COHERE_API_KEY")
53
  embeddings = CohereEmbeddings(
54
  model="embed-english-v3.0",
@@ -57,17 +60,14 @@ embeddings = CohereEmbeddings(
57
 
58
 
59
 
60
-
61
-
62
  def loading_pdf():
63
  return "Loading..."
64
  def pdf_changes(pdf_doc):
65
- # embeddings = OpenAIEmbeddings()
66
- # embeddings = HuggingFaceHubEmbeddings()
67
 
68
  embeddings = CohereEmbeddings(
69
  model="embed-english-light-v3.0",
70
- # cohere_api_key=COHERE_API_KEY
71
  )
72
 
73
  loader = PyPDFLoader(pdf_doc.name)
@@ -75,44 +75,52 @@ def pdf_changes(pdf_doc):
75
  texts = text_splitter.split_documents(documents)
76
  db = Chroma.from_documents(texts, embeddings)
77
  retriever = db.as_retriever()
78
- # memory = VectorStoreRetrieverMemory(retriever=retriever)
79
- memory = ConversationBufferMemory(memory_key="chat_history", input_key="human_input")
80
-
81
- # prompt_template = """You have been given a pdf or pdfs. You must search these pdfs.
82
- # If you don't know the answer, just say that you don't know, don't try to make up an answer.
83
- # Only answer the question.
84
-
85
 
86
- # Question: {query}
87
- # Answer:"""
88
- # PROMPT = PromptTemplate(
89
- # template=prompt_template, input_variables=["context", "question"]
90
- # )
91
 
92
- prompt = PromptTemplate(input_variables=["chat_history", "human_input", "context"], template=template)
 
 
 
 
 
 
 
 
 
93
  chain_type_kwargs = {"prompt": prompt}
 
94
  global qa
95
- # qa = RetrievalQA.from_chain_type(
96
- # llm=flan_ul2,
97
- # memory=memory,
98
- # chain_type="stuff",
99
- # retriever=retriever,
100
- # return_source_documents=True,
101
- # chain_type_kwargs=chain_type_kwargs,
102
- # )
103
 
104
  prompt = PromptTemplate(
105
- input_variables=["history", "context", "question"],
 
 
 
 
106
  template=template,
107
  )
108
- memory = ConversationBufferMemory(memory_key="history", input_key="question")
109
-
110
- qa = RetrievalQAWithSourcesChain.from_chain_type(llm=flan_ul2, retriever=retriever, return_source_documents=True, verbose=True, chain_type_kwargs={
111
- "verbose": True,
112
- "memory": memory,
113
- "prompt": prompt,
114
- "document_variable_name": "context"
115
- }
 
 
 
 
 
 
 
 
116
  )
117
 
118
  return "Ready"
@@ -126,21 +134,14 @@ def bot(history):
126
  history[-1][1] = response['answer']
127
  return history
128
 
129
- # def bot(history):
130
- # response = infer(history[-1][0], history)
131
- # sources = [doc.metadata.get("source") for doc in response['source_documents']]
132
- # src_list = '\n'.join(sources)
133
- # print_this = response['answer'] + "\n\n\n Sources: \n\n\n" + src_list
134
- # return print_this
135
 
136
  def infer(question, history) -> dict:
137
 
138
  query = question
139
- # result = qa({"query": query, "context":""})
140
- # result = qa({"query": query, })
141
  result = qa({"query": query, "history": history, "question": question})
142
 
143
- # result = result['answer']
144
  return result
145
 
146
  css="""
@@ -159,22 +160,57 @@ title = """
159
  with gr.Blocks(css=css) as demo:
160
  with gr.Column(elem_id="col-container"):
161
  gr.HTML(title)
162
- # with gr.Blocks() as demo:
163
 
164
  with gr.Column():
165
  pdf_doc = gr.File()
166
- # pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="filepath") #try filepath for type if binary does not work
167
  with gr.Row():
168
- langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
 
 
 
 
169
  load_pdf = gr.Button("Load pdf to langchain")
170
 
171
- chatbot = gr.Chatbot([], elem_id="chatbot") #.style(height=350)
 
 
 
 
172
  with gr.Row():
173
- question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
174
- load_pdf.click(loading_pdf, None, langchain_status, queue=False)
175
- load_pdf.click(pdf_changes, pdf_doc, langchain_status, queue=False)
176
- question.submit(add_text, [chatbot, question], [chatbot, question]).then(
177
- bot, chatbot, chatbot
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  )
179
 
180
  demo.launch()
 
33
 
34
 
35
 
36
+ text_splitter = CharacterTextSplitter(
37
+ chunk_size=350,
38
+ chunk_overlap=0
39
+ )
40
 
41
  # flan_ul2 = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.1, "max_new_tokens":300})
42
  # flan_ul2 = OpenAI()
 
51
 
52
  global qa
53
 
54
+
55
  COHERE_API_KEY = os.getenv("COHERE_API_KEY")
56
  embeddings = CohereEmbeddings(
57
  model="embed-english-v3.0",
 
60
 
61
 
62
 
 
 
63
  def loading_pdf():
64
  return "Loading..."
65
  def pdf_changes(pdf_doc):
66
+
 
67
 
68
  embeddings = CohereEmbeddings(
69
  model="embed-english-light-v3.0",
70
+
71
  )
72
 
73
  loader = PyPDFLoader(pdf_doc.name)
 
75
  texts = text_splitter.split_documents(documents)
76
  db = Chroma.from_documents(texts, embeddings)
77
  retriever = db.as_retriever()
78
+
79
+ memory = ConversationBufferMemory(
80
+ memory_key="chat_history",
81
+ input_key="human_input"
82
+ )
 
 
83
 
 
 
 
 
 
84
 
85
+
86
+ prompt = PromptTemplate(
87
+ input_variables=[
88
+ "chat_history",
89
+ "human_input",
90
+ "context"
91
+ ],
92
+ template=template
93
+ )
94
+
95
  chain_type_kwargs = {"prompt": prompt}
96
+
97
  global qa
98
+
 
 
 
 
 
 
 
99
 
100
  prompt = PromptTemplate(
101
+ input_variables=[
102
+ "history",
103
+ "context",
104
+ "question"
105
+ ],
106
  template=template,
107
  )
108
+ memory = ConversationBufferMemory(
109
+ memory_key="history",
110
+ input_key="question"
111
+ )
112
+
113
+ qa = RetrievalQAWithSourcesChain.from_chain_type(
114
+ llm=flan_ul2,
115
+ retriever=retriever,
116
+ return_source_documents=True,
117
+ verbose=True,
118
+ chain_type_kwargs={
119
+ "verbose": True,
120
+ "memory": memory,
121
+ "prompt": prompt,
122
+ "document_variable_name": "context"
123
+ }
124
  )
125
 
126
  return "Ready"
 
134
  history[-1][1] = response['answer']
135
  return history
136
 
137
+
 
 
 
 
 
138
 
139
  def infer(question, history) -> dict:
140
 
141
  query = question
142
+
 
143
  result = qa({"query": query, "history": history, "question": question})
144
 
 
145
  return result
146
 
147
  css="""
 
160
  with gr.Blocks(css=css) as demo:
161
  with gr.Column(elem_id="col-container"):
162
  gr.HTML(title)
163
+
164
 
165
  with gr.Column():
166
  pdf_doc = gr.File()
167
+
168
  with gr.Row():
169
+ langchain_status = gr.Textbox(
170
+ label="Status",
171
+ placeholder="",
172
+ interactive=False
173
+ )
174
  load_pdf = gr.Button("Load pdf to langchain")
175
 
176
+ chatbot = gr.Chatbot(
177
+ [],
178
+ elem_id="chatbot"
179
+ ) #.style(height=350)
180
+
181
  with gr.Row():
182
+ question = gr.Textbox(
183
+ label="Question",
184
+ placeholder="Type your question and hit Enter "
185
+ )
186
+
187
+ load_pdf.click(
188
+ loading_pdf,
189
+ None,
190
+ langchain_status,
191
+ queue=False
192
+ )
193
+
194
+ load_pdf.click(
195
+ pdf_changes,
196
+ pdf_doc,
197
+ langchain_status,
198
+ queue=False
199
+ )
200
+
201
+ question.submit
202
+ (add_text,
203
+ [
204
+ chatbot,
205
+ question
206
+ ],
207
+ [
208
+ chatbot,
209
+ question]
210
+ ).then(
211
+ bot,
212
+ chatbot,
213
+ chatbot
214
  )
215
 
216
  demo.launch()