Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -84,20 +84,7 @@ retriever = PineconeHybridSearchRetriever(
|
|
84 |
)
|
85 |
|
86 |
|
87 |
-
|
88 |
-
|
89 |
-
llm = HuggingFaceEndpoint(
|
90 |
-
repo_id="meta-llama/Llama-3.1-70B-Instruct",
|
91 |
-
task="text-generation",
|
92 |
-
max_new_tokens=512,
|
93 |
-
do_sample=False,
|
94 |
-
repetition_penalty=1.03,
|
95 |
-
huggingfacehub_api_token=HUGGINGFACE_TOKEN
|
96 |
-
)
|
97 |
-
|
98 |
-
llm = ChatHuggingFace(llm=llm, verbose=True)
|
99 |
-
|
100 |
-
|
101 |
|
102 |
|
103 |
# Initialize LLM
|
@@ -141,10 +128,9 @@ When responding to queries, follow these guidelines:
|
|
141 |
- Use structured Markdown elements such as headings, subheadings, lists, tables, and links.
|
142 |
- Use emphasis on headings, important texts, and phrases.
|
143 |
|
144 |
-
3. Proper
|
145 |
- Always use inline citations with embedded source URLs.
|
146 |
-
-
|
147 |
-
- DO NOT INCLUDE THE 'References' SECTION IN THE RESPONSE.
|
148 |
|
149 |
FOLLOW ALL THE GIVEN INSTRUCTIONS, FAILURE TO DO SO WILL RESULT IN THE TERMINATION OF THE CHAT.
|
150 |
== CONTEXT ==
|
@@ -158,7 +144,7 @@ qa_prompt = ChatPromptTemplate.from_messages(
|
|
158 |
]
|
159 |
)
|
160 |
|
161 |
-
document_prompt = PromptTemplate(input_variables=["page_content"
|
162 |
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt, document_prompt=document_prompt)
|
163 |
|
164 |
# Retrieval and Generative (RAG) Chain
|
@@ -204,7 +190,6 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
204 |
# Define an async generator for streaming
|
205 |
async def stream_response():
|
206 |
complete_response = ""
|
207 |
-
context = {}
|
208 |
async for chunk in conversational_rag_chain.astream(
|
209 |
{"input": question, 'language': language},
|
210 |
config={"configurable": {"session_id": session_id}}
|
@@ -216,24 +201,6 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
216 |
complete_response += chunk['answer']
|
217 |
await websocket.send_json({'response': chunk['answer']})
|
218 |
|
219 |
-
if context:
|
220 |
-
citations = re.findall(r'\[(\d+)\]', complete_response)
|
221 |
-
citation_numbers = list(map(int, citations))
|
222 |
-
sources = dict()
|
223 |
-
backup = dict()
|
224 |
-
i=1
|
225 |
-
for index, doc in enumerate(context):
|
226 |
-
if (index+1) in citation_numbers:
|
227 |
-
sources[f"[{index+1}]"] = doc.metadata["source"]
|
228 |
-
else:
|
229 |
-
if doc.metadata["source"] not in backup.values():
|
230 |
-
backup[f"[{i}]"] = doc.metadata["source"]
|
231 |
-
i += 1
|
232 |
-
if sources:
|
233 |
-
await websocket.send_json({'sources': sources})
|
234 |
-
else:
|
235 |
-
await websocket.send_json({'sources': backup})
|
236 |
-
|
237 |
await stream_response()
|
238 |
except Exception as e:
|
239 |
print(f"Error during message handling: {e}")
|
|
|
84 |
)
|
85 |
|
86 |
|
87 |
+
llm = ChatPerplexity(temperature=0, pplx_api_key=GROQ_API_KEY, model="llama-3.1-sonar-large-128k-chat", max_tokens=512, max_retries=2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
|
90 |
# Initialize LLM
|
|
|
128 |
- Use structured Markdown elements such as headings, subheadings, lists, tables, and links.
|
129 |
- Use emphasis on headings, important texts, and phrases.
|
130 |
|
131 |
+
3. Proper References:
|
132 |
- Always use inline citations with embedded source URLs.
|
133 |
+
- INCLUDE THE 'References' SECTION IN THE RESPONSE TO GIVE SOURCES URL TO USERS TO REFER.
|
|
|
134 |
|
135 |
FOLLOW ALL THE GIVEN INSTRUCTIONS, FAILURE TO DO SO WILL RESULT IN THE TERMINATION OF THE CHAT.
|
136 |
== CONTEXT ==
|
|
|
144 |
]
|
145 |
)
|
146 |
|
147 |
+
document_prompt = PromptTemplate(input_variables=["page_content"], template="{page_content} \n\n")
|
148 |
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt, document_prompt=document_prompt)
|
149 |
|
150 |
# Retrieval and Generative (RAG) Chain
|
|
|
190 |
# Define an async generator for streaming
|
191 |
async def stream_response():
|
192 |
complete_response = ""
|
|
|
193 |
async for chunk in conversational_rag_chain.astream(
|
194 |
{"input": question, 'language': language},
|
195 |
config={"configurable": {"session_id": session_id}}
|
|
|
201 |
complete_response += chunk['answer']
|
202 |
await websocket.send_json({'response': chunk['answer']})
|
203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
await stream_response()
|
205 |
except Exception as e:
|
206 |
print(f"Error during message handling: {e}")
|