edithram23 commited on
Commit
52addb4
·
1 Parent(s): d5326c3
Files changed (3) hide show
  1. app.py +5 -5
  2. retriever.py +8 -64
  3. setup.py +18 -24
app.py CHANGED
@@ -32,17 +32,17 @@ def process(audio, input_text, pdfs, chat_history: list[ChatMessage]):
32
  pdf_uploaded = True
33
  pdf_path = pdfs.name
34
  output_id = vector.upload_pdfs_user(pdf_path)
35
- # print(output_id)
36
  if pdfs is None:
37
  pdf_uploaded = False
38
  output_id = None
39
- # print(output_id)
40
  if audio is not None:
41
  transcript = transcriptor.get_transcript(audio)
42
  chat_history.append({"role": "user", "content": transcript})
43
 
44
  elif input_text:
45
- # print(input_text)
46
  chat_history.append({"role": "user", "content": input_text})
47
 
48
  else:
@@ -65,7 +65,7 @@ with gr.Blocks() as demo:
65
  with gr.Row():
66
  with gr.Column(scale=1, min_width=300):
67
  input_pdf = gr.File(label="Upload PDF", file_types=[".pdf"], file_count='single')
68
- gr.Markdown("_Upload a PDF to chat with it!_", visible=not pdf_uploaded)
69
 
70
  with gr.Row():
71
  chatbot = gr.Chatbot(label="Chatbot Conversation", type="messages", bubble_full_width=True, show_copy_button=True, autoscroll=True)
@@ -84,4 +84,4 @@ with gr.Blocks() as demo:
84
  )
85
 
86
  if __name__ == "__main__":
87
- demo.launch(server_port='9000')
 
32
  pdf_uploaded = True
33
  pdf_path = pdfs.name
34
  output_id = vector.upload_pdfs_user(pdf_path)
35
+ print(output_id)
36
  if pdfs is None:
37
  pdf_uploaded = False
38
  output_id = None
39
+ print(output_id)
40
  if audio is not None:
41
  transcript = transcriptor.get_transcript(audio)
42
  chat_history.append({"role": "user", "content": transcript})
43
 
44
  elif input_text:
45
+ print(input_text)
46
  chat_history.append({"role": "user", "content": input_text})
47
 
48
  else:
 
65
  with gr.Row():
66
  with gr.Column(scale=1, min_width=300):
67
  input_pdf = gr.File(label="Upload PDF", file_types=[".pdf"], file_count='single')
68
+ gr.Markdown("_Use a PDF to enhance the chatbot's knowledge!_", visible=not pdf_uploaded)
69
 
70
  with gr.Row():
71
  chatbot = gr.Chatbot(label="Chatbot Conversation", type="messages", bubble_full_width=True, show_copy_button=True, autoscroll=True)
 
84
  )
85
 
86
  if __name__ == "__main__":
87
+ demo.launch()
retriever.py CHANGED
@@ -3,9 +3,6 @@ from langchain_openai import OpenAIEmbeddings
3
  from qdrant_client import QdrantClient
4
  from langchain_qdrant import QdrantVectorStore
5
  from qdrant_client.http import models
6
- from langchain_groq import ChatGroq
7
- from langchain_core.output_parsers import StrOutputParser
8
- from langchain_core.prompts import PromptTemplate
9
 
10
  from dotenv import load_dotenv
11
 
@@ -36,71 +33,19 @@ class Retriever():
36
  'Indirect Tax Laws',
37
  'INDIAN Income Tax ACTS',
38
  'ONLINESITES']
39
- self.groq = ChatGroq(model='llama3-70b-8192')
40
-
41
-
42
-
43
- def multi_questions(self,user_prompt):
44
- llm = self.groq
45
- prompt = f'''
46
- # You are an excellent Query Decomposer for database retrieval optimization.
47
- # You are given a user_query.
48
- ===============================
49
- # TASK:
50
- -> Your task is to provide a structured and hierarchical breakdown of the user query.
51
- -> This breakdown should be in the form of an ordered sequence that helps in extracting the right context from the database.
52
- -> Build the user query from the bottom level (basic requirements) to the top level (more specific details), ensuring the retrieval context improves at each level.
53
- ===============================
54
- # USER_QUERY: {{user}}
55
- ===============================
56
- # EXAMPLE:
57
- 1. #USER_QUERY: "For 5 lakh, what type of taxes should I pay and how much?"
58
- -> #EXPECTED OUTPUT: | I'm purchasing a car for 5 lakh. | What type of taxes should I pay on the purchase of automobiles? | What type of taxes should I pay on the purchase of a car for 5 lakh? |
59
 
60
- 2. #USER_QUERY: "For 5 lakh, what type of taxes should I pay and how much?"
61
- -> #EXPECTED OUTPUT: | NEW TAX REGIME and Income tax. | My income is 5 lakh. What type of taxes should I pay and how much should I pay? |
62
-
63
- ===============================
64
- # OUTPUT FORMAT:
65
- -> Provide the formatted output separated with the pipe '|' enclosed as: |...|...|
66
- -> Stick to the given format without any additional explanation. Your only response must be the formatted sequence of queries.
67
- -> Do not answer the user question directly. Your job is to provide the decomposed queries in the format shown in the examples.
68
- '''
69
-
70
- rag_prompt = PromptTemplate.from_template(prompt)
71
- l = (rag_prompt | llm | StrOutputParser())
72
- stream = l.invoke({"user":user_prompt})
73
- return stream
74
-
75
- def multiple_contexts(self,user_prompt):
76
- questions = self.filters
77
- contexts = []
78
- for i in questions:
79
- contexts+=self.filter_multiple(user_prompt,i,18)
80
- print(len(contexts))
81
- return contexts
82
-
83
- def filter_multiple(self,query,mapper,k1=10):
84
  retriever1 = self.vector_store.as_retriever(
85
  search_type="similarity_score_threshold",
86
- search_kwargs={"k": k1,
87
- 'score_threshold':0.75,
88
- 'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=mapper),)])
89
- },
90
- )
91
- ret = retriever1.invoke(query)
92
- return ret
93
-
94
- def filter(self,query,k1=10,k2=17):
95
- retriever1 = self.vector_store.as_retriever(
96
- search_type="mmr",
97
- search_kwargs={"k": k1,
98
  'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
99
  },
100
  )
101
  retriever2 = self.vector_store.as_retriever(
102
- search_type="mmr",
103
- search_kwargs={"k": k2,
 
104
  'filter':models.Filter(must_not=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
105
  },
106
  )
@@ -119,8 +64,7 @@ class Retriever():
119
  return ret
120
 
121
  def data_retrieve(self, query=''):
122
- retrieved_docs = self.vector_store.similarity_search_with_score(query, k=10)
123
  return [doc for doc, _ in retrieved_docs]
124
 
125
- # ret = Retriever()
126
- # print(ret.multiple_contexts("i'm purchasing a car for 5Lack, what type of taxes should I pay and how much?"))
 
3
  from qdrant_client import QdrantClient
4
  from langchain_qdrant import QdrantVectorStore
5
  from qdrant_client.http import models
 
 
 
6
 
7
  from dotenv import load_dotenv
8
 
 
33
  'Indirect Tax Laws',
34
  'INDIAN Income Tax ACTS',
35
  'ONLINESITES']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ def filter(self,query):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  retriever1 = self.vector_store.as_retriever(
39
  search_type="similarity_score_threshold",
40
+ search_kwargs={"k": 7,
41
+ 'score_threshold':0.7,
 
 
 
 
 
 
 
 
 
 
42
  'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
43
  },
44
  )
45
  retriever2 = self.vector_store.as_retriever(
46
+ search_type="similarity_score_threshold",
47
+ search_kwargs={"k": 17,
48
+ 'score_threshold':0.7,
49
  'filter':models.Filter(must_not=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
50
  },
51
  )
 
64
  return ret
65
 
66
  def data_retrieve(self, query=''):
67
+ retrieved_docs = self.vector_store.similarity_search_with_score(query, k=20)
68
  return [doc for doc, _ in retrieved_docs]
69
 
70
+
 
setup.py CHANGED
@@ -25,9 +25,8 @@ load_dotenv('.env')
25
  class Script():
26
  def __init__(self):
27
  self.retriever = Retriever()
28
- self.openai_client = ChatOpenAI(model="gpt-4o-mini",temperature=0.1)
29
  self.groq = ChatGroq(model='llama3-70b-8192')
30
- self.groq1 = ChatGroq(model='llama3-8b-8192')
31
 
32
 
33
  def format_docs(self,format_results,id=False):
@@ -50,45 +49,40 @@ class Script():
50
 
51
  def gpt_loaders(self,query:str,history:str):
52
  template= f"""
53
- # You are an excellent Question & Answering BOT based on Context.
54
- # TASK : Given a question and the context, you are required to answer the question..
55
- # User questions may be given as a user_query (or) User_question (or) User_scenario.
56
  ===============================
57
  #USER_QUERY : {{question}}
58
  ===============================
59
- #METADATA_OF_CONTEXT :
60
- -> The context given is related to INDIAN-TAXATION.
 
 
61
  #CONTEXT : {{context}}
62
  ===============================
63
- You are also given previous ChatHistories (User question and corresponding AI answer) to you as extra data.
64
- --# When to take the history as CONTEXT: Only if the history is relevant to the current question, you are permitted to take the chat history as a context.
65
- --# If it is not relevant to the current question, do not take it.
66
  #Chat History : {{history}}
67
  ===============================
 
68
  -> Don't provide your own answer that is not in the given context.
69
- -> If you can provide a similar answer from the context that may be relevant but not exactly correct for the question, you can provide that answer.
70
- -> Try to provide a proper output for the question. Don't explain any questions too lengthy max[100 words].
 
71
  ===============================
72
  # OUTPUT FORMAT:
73
- -> Your output may be given to a voice model for a speech output. Try to be precise with your words. At the same time, fill the user with your answer.
74
- -> Don't provide any further explanation apart from the answer output.
75
- # STEP 1 : Generate a output for the query from the context:
76
- # STEP 2 : -> Based on the current output check if it is relevant to the question again.
77
- -> If you are not 100% able to answer the given question from the context => PROVIDE "Sorry! Unable to find an answer for your question. Try Again."
78
-
79
  """
80
- # template = f"""ANSWER THE USER QUESTION BASED ON THE GIVEN CONTEXT ALONE.
81
- # UESR QUESTION : {{question}}
82
- # CONTEXT : {{context}}
83
- # {{history}}
84
- # """
85
  rag_prompt = PromptTemplate.from_template(template)
86
  rag_chain = (
87
  rag_prompt
88
  | self.openai_client
89
  | StrOutputParser()
90
  )
91
- question ={"context": self.format_docs(self.retriever.multiple_contexts(query)), "question": query, "history": history}
92
  return rag_chain,question
93
 
94
  def gpt_loaders_id(self,query:str,history:str,id:str):
 
25
  class Script():
26
  def __init__(self):
27
  self.retriever = Retriever()
28
+ self.openai_client = ChatOpenAI(model="gpt-4o")
29
  self.groq = ChatGroq(model='llama3-70b-8192')
 
30
 
31
 
32
  def format_docs(self,format_results,id=False):
 
49
 
50
  def gpt_loaders(self,query:str,history:str):
51
  template= f"""
52
+ # You are an excellent Question & Answering BOT. Given a question and the context you will answer the question only based on the given context.
53
+ # You will be given a user_query (or) User_question (or) User_scenario.
54
+ # TASK: Your task is to provide an Answer to the USER_QUERY with the given CONTEXT_DATA.
55
  ===============================
56
  #USER_QUERY : {{question}}
57
  ===============================
58
+ #METADATA_OF_CONTEXT : -> The context given is related to INDIAN-TAXATIONS.
59
+ -> It may contain how to calculate tax for GOODS/SERVICES/INDIVIDUAL/CARS/TRAINS/etc anything related to INDIAN TAXES.
60
+ -> Based on the user_query use the context accordingly.
61
+ -> You can also provide a rough calculation for an example if asked for tax calculations related from the CONTEXT (if it is available in the CONTEXT).
62
  #CONTEXT : {{context}}
63
  ===============================
64
+ You are also given previous ChatHistories (User question and corressponding AI answer) to you as an extra data.
65
+ --# When to take the history as CONTEXT : Only if the history is relevant to the current question you are permitted to take the chat history as a context.
66
+ --# If it is not relevant to the current question do not take it.
67
  #Chat History : {{history}}
68
  ===============================
69
+ -> You are allowed to provide the answer only from the given context.
70
  -> Don't provide your own answer that is not in the given context.
71
+ -> If you are not able to answer the given question from the context => PROVIDE "Sorry! Unable to find an answer for your question. Try Again."
72
+ -> Try to be a precise and provide a proper output for the question. Don't explain any questions too lengthy max[100 words].
73
+ -> Provide answer only to the question that is asked.
74
  ===============================
75
  # OUTPUT FORMAT:
76
+ -> Your output may be given to a voice model for a speech output. Try to be precise with your words. At the same time, fill the user with your answer
77
+ -> Don't provide any etc explanation apart from the answer output.
 
 
 
 
78
  """
 
 
 
 
 
79
  rag_prompt = PromptTemplate.from_template(template)
80
  rag_chain = (
81
  rag_prompt
82
  | self.openai_client
83
  | StrOutputParser()
84
  )
85
+ question ={"context": self.format_docs(self.retriever.data_retrieve(query)), "question": query, "history": history}
86
  return rag_chain,question
87
 
88
  def gpt_loaders_id(self,query:str,history:str,id:str):