Spaces:

Ritesh-hf
/

Updated-UAE-ECONOMIC-SITE

Sleeping

App Files Files Community

Ritesh-hf commited on Dec 16, 2024

Commit

4451f57

verified ·

1 Parent(s): ab52889

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -37

app.py CHANGED Viewed

@@ -84,20 +84,7 @@ retriever = PineconeHybridSearchRetriever(
 )
-from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
-llm = HuggingFaceEndpoint(
-    repo_id="meta-llama/Llama-3.1-70B-Instruct",
-    task="text-generation",
-    max_new_tokens=512,
-    do_sample=False,
-    repetition_penalty=1.03,
-    huggingfacehub_api_token=HUGGINGFACE_TOKEN
-)
-llm = ChatHuggingFace(llm=llm, verbose=True)
 # Initialize LLM
@@ -141,10 +128,9 @@ When responding to queries, follow these guidelines:
    - Use structured Markdown elements such as headings, subheadings, lists, tables, and links.
    - Use emphasis on headings, important texts, and phrases.
-3. Proper Citations:
    - Always use inline citations with embedded source URLs.
-   - The inline citations should be in the format [1], [2], etc.
-   - DO NOT INCLUDE THE 'References' SECTION IN THE RESPONSE.
 FOLLOW ALL THE GIVEN INSTRUCTIONS, FAILURE TO DO SO WILL RESULT IN THE TERMINATION OF THE CHAT.
 == CONTEXT ==
@@ -158,7 +144,7 @@ qa_prompt = ChatPromptTemplate.from_messages(
     ]
 )
-document_prompt = PromptTemplate(input_variables=["page_content", "source"], template="{page_content} \n\n Source: {source}")
 question_answer_chain = create_stuff_documents_chain(llm, qa_prompt, document_prompt=document_prompt)
 # Retrieval and Generative (RAG) Chain
@@ -204,7 +190,6 @@ async def websocket_endpoint(websocket: WebSocket):
                 # Define an async generator for streaming
                 async def stream_response():
                     complete_response = ""
-                    context = {}
                     async for chunk in conversational_rag_chain.astream(
                         {"input": question, 'language': language},
                         config={"configurable": {"session_id": session_id}}
@@ -216,24 +201,6 @@ async def websocket_endpoint(websocket: WebSocket):
                             complete_response += chunk['answer']
                             await websocket.send_json({'response': chunk['answer']})
-                    if context:
-                        citations = re.findall(r'\[(\d+)\]', complete_response)
-                        citation_numbers = list(map(int, citations))
-                        sources = dict()
-                        backup = dict()
-                        i=1
-                        for index, doc in enumerate(context):
-                            if (index+1) in citation_numbers:
-                                sources[f"[{index+1}]"] = doc.metadata["source"]
-                            else:
-                                if doc.metadata["source"] not in backup.values():
-                                    backup[f"[{i}]"] = doc.metadata["source"]
-                                    i += 1
-                        if sources:
-                            await websocket.send_json({'sources': sources})
-                        else:
-                            await websocket.send_json({'sources': backup})
                 await stream_response()
             except Exception as e:
                 print(f"Error during message handling: {e}")

 )
+llm = ChatPerplexity(temperature=0, pplx_api_key=GROQ_API_KEY, model="llama-3.1-sonar-large-128k-chat", max_tokens=512, max_retries=2)
 # Initialize LLM
    - Use structured Markdown elements such as headings, subheadings, lists, tables, and links.
    - Use emphasis on headings, important texts, and phrases.
+3. Proper References:
    - Always use inline citations with embedded source URLs.
+   - INCLUDE THE 'References' SECTION IN THE RESPONSE TO GIVE SOURCES URL TO USERS TO REFER.
 FOLLOW ALL THE GIVEN INSTRUCTIONS, FAILURE TO DO SO WILL RESULT IN THE TERMINATION OF THE CHAT.
 == CONTEXT ==
     ]
 )
+document_prompt = PromptTemplate(input_variables=["page_content"], template="{page_content} \n\n")
 question_answer_chain = create_stuff_documents_chain(llm, qa_prompt, document_prompt=document_prompt)
 # Retrieval and Generative (RAG) Chain
                 # Define an async generator for streaming
                 async def stream_response():
                     complete_response = ""
                     async for chunk in conversational_rag_chain.astream(
                         {"input": question, 'language': language},
                         config={"configurable": {"session_id": session_id}}
                             complete_response += chunk['answer']
                             await websocket.send_json({'response': chunk['answer']})
                 await stream_response()
             except Exception as e:
                 print(f"Error during message handling: {e}")