Spaces:

edithram23
/

Chatbot

Runtime error

App Files Files Community

edithram23 commited on Oct 31, 2024

Commit

52addb4

1 Parent(s): d5326c3

roll back

Browse files

Files changed (3) hide show

app.py +5 -5
retriever.py +8 -64
setup.py +18 -24

app.py CHANGED Viewed

@@ -32,17 +32,17 @@ def process(audio, input_text, pdfs, chat_history: list[ChatMessage]):
         pdf_uploaded = True
         pdf_path = pdfs.name
         output_id = vector.upload_pdfs_user(pdf_path)
-        # print(output_id)
     if pdfs is None:
         pdf_uploaded = False
         output_id = None
-        # print(output_id)
     if audio is not None:
         transcript = transcriptor.get_transcript(audio)
         chat_history.append({"role": "user", "content": transcript})
     elif input_text:
-        # print(input_text)
         chat_history.append({"role": "user", "content": input_text})
     else:
@@ -65,7 +65,7 @@ with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column(scale=1, min_width=300):
             input_pdf = gr.File(label="Upload PDF", file_types=[".pdf"], file_count='single')
-            gr.Markdown("_Upload a PDF to chat with it!_", visible=not pdf_uploaded)
     with gr.Row():
         chatbot = gr.Chatbot(label="Chatbot Conversation", type="messages", bubble_full_width=True, show_copy_button=True, autoscroll=True)
@@ -84,4 +84,4 @@ with gr.Blocks() as demo:
     )
 if __name__ == "__main__":
-    demo.launch(server_port='9000')

         pdf_uploaded = True
         pdf_path = pdfs.name
         output_id = vector.upload_pdfs_user(pdf_path)
+        print(output_id)
     if pdfs is None:
         pdf_uploaded = False
         output_id = None
+        print(output_id)
     if audio is not None:
         transcript = transcriptor.get_transcript(audio)
         chat_history.append({"role": "user", "content": transcript})
     elif input_text:
+        print(input_text)
         chat_history.append({"role": "user", "content": input_text})
     else:
     with gr.Row():
         with gr.Column(scale=1, min_width=300):
             input_pdf = gr.File(label="Upload PDF", file_types=[".pdf"], file_count='single')
+            gr.Markdown("_Use a PDF to enhance the chatbot's knowledge!_", visible=not pdf_uploaded)
     with gr.Row():
         chatbot = gr.Chatbot(label="Chatbot Conversation", type="messages", bubble_full_width=True, show_copy_button=True, autoscroll=True)
     )
 if __name__ == "__main__":
+    demo.launch()

retriever.py CHANGED Viewed

@@ -3,9 +3,6 @@ from langchain_openai import OpenAIEmbeddings
 from qdrant_client import QdrantClient
 from langchain_qdrant import QdrantVectorStore
 from qdrant_client.http import models
-from langchain_groq import ChatGroq
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.prompts import PromptTemplate
 from dotenv import load_dotenv
@@ -36,71 +33,19 @@ class Retriever():
                         'Indirect Tax Laws',
                         'INDIAN Income Tax ACTS',
                         'ONLINESITES']
-        self.groq = ChatGroq(model='llama3-70b-8192')
-    def multi_questions(self,user_prompt):
-        llm = self.groq
-        prompt = f'''
-# You are an excellent Query Decomposer for database retrieval optimization.
-# You are given a user_query.
-===============================
-# TASK:
-    -> Your task is to provide a structured and hierarchical breakdown of the user query.
-    -> This breakdown should be in the form of an ordered sequence that helps in extracting the right context from the database.
-    -> Build the user query from the bottom level (basic requirements) to the top level (more specific details), ensuring the retrieval context improves at each level.
-===============================
-# USER_QUERY: {{user}}
-===============================
-# EXAMPLE:
-    1. #USER_QUERY: "For 5 lakh, what type of taxes should I pay and how much?"
-       -> #EXPECTED OUTPUT: | I'm purchasing a car for 5 lakh. | What type of taxes should I pay on the purchase of automobiles? | What type of taxes should I pay on the purchase of a car for 5 lakh? |
-    2. #USER_QUERY: "For 5 lakh, what type of taxes should I pay and how much?"
-       -> #EXPECTED OUTPUT: | NEW TAX REGIME and Income tax. | My income is 5 lakh. What type of taxes should I pay and how much should I pay? |
-===============================
-# OUTPUT FORMAT:
-    -> Provide the formatted output separated with the pipe '|' enclosed as: |...|...|
-    -> Stick to the given format without any additional explanation. Your only response must be the formatted sequence of queries.
-    -> Do not answer the user question directly. Your job is to provide the decomposed queries in the format shown in the examples.
-'''
-        rag_prompt = PromptTemplate.from_template(prompt)
-        l = (rag_prompt | llm | StrOutputParser())
-        stream = l.invoke({"user":user_prompt})
-        return stream
-    def multiple_contexts(self,user_prompt):
-        questions = self.filters
-        contexts = []
-        for i in questions:
-            contexts+=self.filter_multiple(user_prompt,i,18)
-            print(len(contexts))
-        return contexts
-    def filter_multiple(self,query,mapper,k1=10):
         retriever1 = self.vector_store.as_retriever(
                                             search_type="similarity_score_threshold",
-                                            search_kwargs={"k": k1,
-                                                           'score_threshold':0.75,
-                                                            'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=mapper),)])
-                                                            },
-                                        )
-        ret = retriever1.invoke(query)
-        return ret
-    def filter(self,query,k1=10,k2=17):
-        retriever1 = self.vector_store.as_retriever(
-                                            search_type="mmr",
-                                            search_kwargs={"k": k1,
                                                             'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
                                                             },
                                         )
         retriever2 = self.vector_store.as_retriever(
-                                            search_type="mmr",
-                                            search_kwargs={"k": k2,
                                                             'filter':models.Filter(must_not=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
                                                            },
                                         )
@@ -119,8 +64,7 @@ class Retriever():
         return ret
     def data_retrieve(self, query=''):
-        retrieved_docs = self.vector_store.similarity_search_with_score(query, k=10)
         return [doc for doc, _ in retrieved_docs]
-# ret = Retriever()
-# print(ret.multiple_contexts("i'm purchasing a car for 5Lack, what type of taxes should I pay and how much?"))

 from qdrant_client import QdrantClient
 from langchain_qdrant import QdrantVectorStore
 from qdrant_client.http import models
 from dotenv import load_dotenv
                         'Indirect Tax Laws',
                         'INDIAN Income Tax ACTS',
                         'ONLINESITES']
+    def filter(self,query):
         retriever1 = self.vector_store.as_retriever(
                                             search_type="similarity_score_threshold",
+                                            search_kwargs={"k": 7,
+                                                           'score_threshold':0.7,
                                                             'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
                                                             },
                                         )
         retriever2 = self.vector_store.as_retriever(
+                                            search_type="similarity_score_threshold",
+                                            search_kwargs={"k": 17,
+                                                           'score_threshold':0.7,
                                                             'filter':models.Filter(must_not=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
                                                            },
                                         )
         return ret
     def data_retrieve(self, query=''):
+        retrieved_docs = self.vector_store.similarity_search_with_score(query, k=20)
         return [doc for doc, _ in retrieved_docs]

setup.py CHANGED Viewed

@@ -25,9 +25,8 @@ load_dotenv('.env')
 class Script():
     def __init__(self):
         self.retriever = Retriever()
-        self.openai_client = ChatOpenAI(model="gpt-4o-mini",temperature=0.1)
         self.groq = ChatGroq(model='llama3-70b-8192')
-        self.groq1 = ChatGroq(model='llama3-8b-8192')
     def format_docs(self,format_results,id=False):
@@ -50,45 +49,40 @@ class Script():
     def gpt_loaders(self,query:str,history:str):
         template= f"""
-                    # You are an excellent Question & Answering BOT based on Context.
-                    # TASK : Given a question and the context, you are required to answer the question..
-                    # User questions may be given as a user_query (or) User_question (or) User_scenario.
                     ===============================
                     #USER_QUERY :  {{question}}
                     ===============================
-                    #METADATA_OF_CONTEXT :
-                    -> The context given is related to INDIAN-TAXATION.
                     #CONTEXT : {{context}}
                     ===============================
-                    You are also given previous ChatHistories (User question and corresponding AI answer) to you as extra data.
-                    --# When to take the history as CONTEXT: Only if the history is relevant to the current question, you are permitted to take the chat history as a context.
-                    --# If it is not relevant to the current question, do not take it.
                     #Chat History : {{history}}
                     ===============================
                     -> Don't provide your own answer that is not in the given context.
-                    -> If you can provide a similar answer from the context that may be relevant but not exactly correct for the question, you can provide that answer.
-                    -> Try to provide a proper output for the question. Don't explain any questions too lengthy max[100 words].
                     ===============================
                     # OUTPUT FORMAT:
-                    -> Your output may be given to a voice model for a speech output. Try to be precise with your words. At the same time, fill the user with your answer.
-                    -> Don't provide any further explanation apart from the answer output.
-                    # STEP 1 : Generate a output for the query from the context:
-                    # STEP 2 : -> Based on the current output check if it is relevant to the question again.
-                               -> If you are not 100% able to answer the given question from the context => PROVIDE "Sorry! Unable to find an answer for your question. Try Again."
                 """
-        # template = f"""ANSWER THE USER QUESTION BASED ON THE GIVEN CONTEXT ALONE.
-        #     UESR QUESTION : {{question}}
-        #             CONTEXT : {{context}}
-        #             {{history}}
-        # """
         rag_prompt = PromptTemplate.from_template(template)
         rag_chain = (
                     rag_prompt
                     | self.openai_client
                     | StrOutputParser()
                     )
-        question ={"context": self.format_docs(self.retriever.multiple_contexts(query)), "question": query, "history": history}
         return rag_chain,question
     def gpt_loaders_id(self,query:str,history:str,id:str):

 class Script():
     def __init__(self):
         self.retriever = Retriever()
+        self.openai_client = ChatOpenAI(model="gpt-4o")
         self.groq = ChatGroq(model='llama3-70b-8192')
     def format_docs(self,format_results,id=False):
     def gpt_loaders(self,query:str,history:str):
         template= f"""
+                    # You are an excellent Question & Answering BOT. Given a question and the context you will answer the question only based on the given context.
+                    # You will be given a user_query (or) User_question (or) User_scenario.
+                    # TASK: Your task is to provide an Answer to the USER_QUERY with the given CONTEXT_DATA.
                     ===============================
                     #USER_QUERY :  {{question}}
                     ===============================
+                    #METADATA_OF_CONTEXT : -> The context given is related to INDIAN-TAXATIONS.
+                                        -> It may contain how to calculate tax for GOODS/SERVICES/INDIVIDUAL/CARS/TRAINS/etc anything related to INDIAN TAXES.
+                                        -> Based on the user_query use the context accordingly.
+                                        -> You can also provide a rough calculation for an example if asked for tax calculations related from the CONTEXT (if it is available in the CONTEXT).
                     #CONTEXT : {{context}}
                     ===============================
+                    You are also given previous ChatHistories (User question and corressponding AI answer) to you as an extra data.
+                    --# When to take the history as CONTEXT : Only if the history is relevant to the current question you are permitted to take the chat history as a context.
+                    --# If it is not relevant to the current question do not take it.
                     #Chat History : {{history}}
                     ===============================
+                    -> You are allowed to provide the answer only from the given context.
                     -> Don't provide your own answer that is not in the given context.
+                    -> If you are not able to answer the given question from the context => PROVIDE "Sorry! Unable to find an answer for your question. Try Again."
+                    -> Try to be a precise and provide a proper output for the question. Don't explain any questions too lengthy max[100 words].
+                    -> Provide answer only to the question that is asked.
                     ===============================
                     # OUTPUT FORMAT:
+                        -> Your output may be given to a voice model for a speech output. Try to be precise with your words. At the same time, fill the user with your answer
+                        -> Don't provide any etc explanation apart from the answer output.
                 """
         rag_prompt = PromptTemplate.from_template(template)
         rag_chain = (
                     rag_prompt
                     | self.openai_client
                     | StrOutputParser()
                     )
+        question ={"context": self.format_docs(self.retriever.data_retrieve(query)), "question": query, "history": history}
         return rag_chain,question
     def gpt_loaders_id(self,query:str,history:str,id:str):