ppsingh commited on
Commit
ba9c9b0
1 Parent(s): 796ab9b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -2
app.py CHANGED
@@ -138,13 +138,74 @@ async def chat(query,history,sources,reports,subtype,year):
138
  search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.6, "k": 3, "filter":filter})
139
 
140
  context_retrieved = retriever.invoke(question)
 
 
141
 
142
  def format_docs(docs):
143
- return "\n\n".join(doc.page_content for doc in docs)
144
 
145
  context_retrieved_formatted = format_docs(context_retrieved)
146
  context_retrieved_lst.append(context_retrieved_formatted)
147
- print(context_retrieved_lst)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  yield history,docs_html
150
  #process_pdf()
 
138
  search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.6, "k": 3, "filter":filter})
139
 
140
  context_retrieved = retriever.invoke(question)
141
+ for doc in context_retrieved:
142
+ print(doc.metadata)
143
 
144
  def format_docs(docs):
145
+ return "|".join(doc.page_content for doc in docs)
146
 
147
  context_retrieved_formatted = format_docs(context_retrieved)
148
  context_retrieved_lst.append(context_retrieved_formatted)
149
+
150
+ ##-------------------Prompt---------------------------------------------------------------
151
+ SYSTEM_PROMPT = """
152
+ You are AuditQ&A, an AI Assistant created by Auditors and Data Scientist. You are given a question and extracted passages of the consolidated/departmental/thematic focus audit reports. Provide a clear and structured answer based on the passages/context provided and the guidelines.
153
+ Guidelines:
154
+ - If the passages have useful facts or numbers, use them in your answer.
155
+ - Documents are separated by "|"
156
+ - When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
157
+ - Do not use the sentence 'Doc i says ...' to say where information came from.
158
+ - If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]
159
+ - Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.
160
+ - If it makes sense, use bullet points and lists to make your answers easier to understand.
161
+ - You do not need to use every passage. Only use the ones that help answer the question.
162
+ - If the documents do not have the information needed to answer the question, just say you do not have enough information.
163
+ """
164
+
165
+ USER_PROMPT = """Passages:
166
+ {context}
167
+ -----------------------
168
+ Question: {question} - Explained to audit expert
169
+ Answer in english with the passages citations:
170
+ """.format(context = context_retrieved_lst, question=query)
171
+
172
+ messages = [
173
+ SystemMessage(content=SYSTEM_PROMPT),
174
+ HumanMessage(
175
+ content=USER_PROMPT
176
+ ),]
177
+
178
+ ###-----------------getting inference endpoints------------------------------
179
+ llm_qa = HuggingFaceEndpoint(
180
+ endpoint_url="https://nhe9phsr2zhs0e36.eu-west-1.aws.endpoints.huggingface.cloud",
181
+ max_new_tokens=512,
182
+ top_k=10,
183
+ top_p=0.95,
184
+ typical_p=0.95,
185
+ temperature=0.01,
186
+ repetition_penalty=1.03,)
187
+
188
+ # create rag chain
189
+ chat_model = ChatHuggingFace(llm=llm_qa)
190
+ chain = chat_model | StrOutputParser()
191
+
192
+ ###-------------------------- get answers ---------------------------------------
193
+ answer_lst = []
194
+ for question, context in zip(question_lst , context_retrieved_lst):
195
+ answer = chain.invoke(messages)
196
+ answer_lst.append(answer)
197
+ docs_html = []
198
+ for i, d in enumerate(context_retrieved, 1):
199
+ docs_html.append(make_html_source(d, i))
200
+ docs_html = "".join(docs_html)
201
+
202
+ previous_answer = history[-1][1]
203
+ previous_answer = previous_answer if previous_answer is not None else ""
204
+ answer_yet = previous_answer + answer_lst[0]
205
+ answer_yet = parse_output_llm_with_sources(answer_yet)
206
+ history[-1] = (query,answer_yet)
207
+
208
+ history = [tuple(x) for x in history]
209
 
210
  yield history,docs_html
211
  #process_pdf()