Ritesh-hf commited on
Commit
4451f57
·
verified ·
1 Parent(s): ab52889

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -37
app.py CHANGED
@@ -84,20 +84,7 @@ retriever = PineconeHybridSearchRetriever(
84
  )
85
 
86
 
87
- from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
88
-
89
- llm = HuggingFaceEndpoint(
90
- repo_id="meta-llama/Llama-3.1-70B-Instruct",
91
- task="text-generation",
92
- max_new_tokens=512,
93
- do_sample=False,
94
- repetition_penalty=1.03,
95
- huggingfacehub_api_token=HUGGINGFACE_TOKEN
96
- )
97
-
98
- llm = ChatHuggingFace(llm=llm, verbose=True)
99
-
100
-
101
 
102
 
103
  # Initialize LLM
@@ -141,10 +128,9 @@ When responding to queries, follow these guidelines:
141
  - Use structured Markdown elements such as headings, subheadings, lists, tables, and links.
142
  - Use emphasis on headings, important texts, and phrases.
143
 
144
- 3. Proper Citations:
145
  - Always use inline citations with embedded source URLs.
146
- - The inline citations should be in the format [1], [2], etc.
147
- - DO NOT INCLUDE THE 'References' SECTION IN THE RESPONSE.
148
 
149
  FOLLOW ALL THE GIVEN INSTRUCTIONS, FAILURE TO DO SO WILL RESULT IN THE TERMINATION OF THE CHAT.
150
  == CONTEXT ==
@@ -158,7 +144,7 @@ qa_prompt = ChatPromptTemplate.from_messages(
158
  ]
159
  )
160
 
161
- document_prompt = PromptTemplate(input_variables=["page_content", "source"], template="{page_content} \n\n Source: {source}")
162
  question_answer_chain = create_stuff_documents_chain(llm, qa_prompt, document_prompt=document_prompt)
163
 
164
  # Retrieval and Generative (RAG) Chain
@@ -204,7 +190,6 @@ async def websocket_endpoint(websocket: WebSocket):
204
  # Define an async generator for streaming
205
  async def stream_response():
206
  complete_response = ""
207
- context = {}
208
  async for chunk in conversational_rag_chain.astream(
209
  {"input": question, 'language': language},
210
  config={"configurable": {"session_id": session_id}}
@@ -216,24 +201,6 @@ async def websocket_endpoint(websocket: WebSocket):
216
  complete_response += chunk['answer']
217
  await websocket.send_json({'response': chunk['answer']})
218
 
219
- if context:
220
- citations = re.findall(r'\[(\d+)\]', complete_response)
221
- citation_numbers = list(map(int, citations))
222
- sources = dict()
223
- backup = dict()
224
- i=1
225
- for index, doc in enumerate(context):
226
- if (index+1) in citation_numbers:
227
- sources[f"[{index+1}]"] = doc.metadata["source"]
228
- else:
229
- if doc.metadata["source"] not in backup.values():
230
- backup[f"[{i}]"] = doc.metadata["source"]
231
- i += 1
232
- if sources:
233
- await websocket.send_json({'sources': sources})
234
- else:
235
- await websocket.send_json({'sources': backup})
236
-
237
  await stream_response()
238
  except Exception as e:
239
  print(f"Error during message handling: {e}")
 
84
  )
85
 
86
 
87
+ llm = ChatPerplexity(temperature=0, pplx_api_key=GROQ_API_KEY, model="llama-3.1-sonar-large-128k-chat", max_tokens=512, max_retries=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
 
90
  # Initialize LLM
 
128
  - Use structured Markdown elements such as headings, subheadings, lists, tables, and links.
129
  - Use emphasis on headings, important texts, and phrases.
130
 
131
+ 3. Proper References:
132
  - Always use inline citations with embedded source URLs.
133
+ - INCLUDE THE 'References' SECTION IN THE RESPONSE TO GIVE SOURCES URL TO USERS TO REFER.
 
134
 
135
  FOLLOW ALL THE GIVEN INSTRUCTIONS, FAILURE TO DO SO WILL RESULT IN THE TERMINATION OF THE CHAT.
136
  == CONTEXT ==
 
144
  ]
145
  )
146
 
147
+ document_prompt = PromptTemplate(input_variables=["page_content"], template="{page_content} \n\n")
148
  question_answer_chain = create_stuff_documents_chain(llm, qa_prompt, document_prompt=document_prompt)
149
 
150
  # Retrieval and Generative (RAG) Chain
 
190
  # Define an async generator for streaming
191
  async def stream_response():
192
  complete_response = ""
 
193
  async for chunk in conversational_rag_chain.astream(
194
  {"input": question, 'language': language},
195
  config={"configurable": {"session_id": session_id}}
 
201
  complete_response += chunk['answer']
202
  await websocket.send_json({'response': chunk['answer']})
203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  await stream_response()
205
  except Exception as e:
206
  print(f"Error during message handling: {e}")