Spaces:
Runtime error
Runtime error
Commit
·
52addb4
1
Parent(s):
d5326c3
roll back
Browse files- app.py +5 -5
- retriever.py +8 -64
- setup.py +18 -24
app.py
CHANGED
@@ -32,17 +32,17 @@ def process(audio, input_text, pdfs, chat_history: list[ChatMessage]):
|
|
32 |
pdf_uploaded = True
|
33 |
pdf_path = pdfs.name
|
34 |
output_id = vector.upload_pdfs_user(pdf_path)
|
35 |
-
|
36 |
if pdfs is None:
|
37 |
pdf_uploaded = False
|
38 |
output_id = None
|
39 |
-
|
40 |
if audio is not None:
|
41 |
transcript = transcriptor.get_transcript(audio)
|
42 |
chat_history.append({"role": "user", "content": transcript})
|
43 |
|
44 |
elif input_text:
|
45 |
-
|
46 |
chat_history.append({"role": "user", "content": input_text})
|
47 |
|
48 |
else:
|
@@ -65,7 +65,7 @@ with gr.Blocks() as demo:
|
|
65 |
with gr.Row():
|
66 |
with gr.Column(scale=1, min_width=300):
|
67 |
input_pdf = gr.File(label="Upload PDF", file_types=[".pdf"], file_count='single')
|
68 |
-
gr.Markdown("
|
69 |
|
70 |
with gr.Row():
|
71 |
chatbot = gr.Chatbot(label="Chatbot Conversation", type="messages", bubble_full_width=True, show_copy_button=True, autoscroll=True)
|
@@ -84,4 +84,4 @@ with gr.Blocks() as demo:
|
|
84 |
)
|
85 |
|
86 |
if __name__ == "__main__":
|
87 |
-
demo.launch(
|
|
|
32 |
pdf_uploaded = True
|
33 |
pdf_path = pdfs.name
|
34 |
output_id = vector.upload_pdfs_user(pdf_path)
|
35 |
+
print(output_id)
|
36 |
if pdfs is None:
|
37 |
pdf_uploaded = False
|
38 |
output_id = None
|
39 |
+
print(output_id)
|
40 |
if audio is not None:
|
41 |
transcript = transcriptor.get_transcript(audio)
|
42 |
chat_history.append({"role": "user", "content": transcript})
|
43 |
|
44 |
elif input_text:
|
45 |
+
print(input_text)
|
46 |
chat_history.append({"role": "user", "content": input_text})
|
47 |
|
48 |
else:
|
|
|
65 |
with gr.Row():
|
66 |
with gr.Column(scale=1, min_width=300):
|
67 |
input_pdf = gr.File(label="Upload PDF", file_types=[".pdf"], file_count='single')
|
68 |
+
gr.Markdown("_Use a PDF to enhance the chatbot's knowledge!_", visible=not pdf_uploaded)
|
69 |
|
70 |
with gr.Row():
|
71 |
chatbot = gr.Chatbot(label="Chatbot Conversation", type="messages", bubble_full_width=True, show_copy_button=True, autoscroll=True)
|
|
|
84 |
)
|
85 |
|
86 |
if __name__ == "__main__":
|
87 |
+
demo.launch()
|
retriever.py
CHANGED
@@ -3,9 +3,6 @@ from langchain_openai import OpenAIEmbeddings
|
|
3 |
from qdrant_client import QdrantClient
|
4 |
from langchain_qdrant import QdrantVectorStore
|
5 |
from qdrant_client.http import models
|
6 |
-
from langchain_groq import ChatGroq
|
7 |
-
from langchain_core.output_parsers import StrOutputParser
|
8 |
-
from langchain_core.prompts import PromptTemplate
|
9 |
|
10 |
from dotenv import load_dotenv
|
11 |
|
@@ -36,71 +33,19 @@ class Retriever():
|
|
36 |
'Indirect Tax Laws',
|
37 |
'INDIAN Income Tax ACTS',
|
38 |
'ONLINESITES']
|
39 |
-
self.groq = ChatGroq(model='llama3-70b-8192')
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
def multi_questions(self,user_prompt):
|
44 |
-
llm = self.groq
|
45 |
-
prompt = f'''
|
46 |
-
# You are an excellent Query Decomposer for database retrieval optimization.
|
47 |
-
# You are given a user_query.
|
48 |
-
===============================
|
49 |
-
# TASK:
|
50 |
-
-> Your task is to provide a structured and hierarchical breakdown of the user query.
|
51 |
-
-> This breakdown should be in the form of an ordered sequence that helps in extracting the right context from the database.
|
52 |
-
-> Build the user query from the bottom level (basic requirements) to the top level (more specific details), ensuring the retrieval context improves at each level.
|
53 |
-
===============================
|
54 |
-
# USER_QUERY: {{user}}
|
55 |
-
===============================
|
56 |
-
# EXAMPLE:
|
57 |
-
1. #USER_QUERY: "For 5 lakh, what type of taxes should I pay and how much?"
|
58 |
-
-> #EXPECTED OUTPUT: | I'm purchasing a car for 5 lakh. | What type of taxes should I pay on the purchase of automobiles? | What type of taxes should I pay on the purchase of a car for 5 lakh? |
|
59 |
|
60 |
-
|
61 |
-
-> #EXPECTED OUTPUT: | NEW TAX REGIME and Income tax. | My income is 5 lakh. What type of taxes should I pay and how much should I pay? |
|
62 |
-
|
63 |
-
===============================
|
64 |
-
# OUTPUT FORMAT:
|
65 |
-
-> Provide the formatted output separated with the pipe '|' enclosed as: |...|...|
|
66 |
-
-> Stick to the given format without any additional explanation. Your only response must be the formatted sequence of queries.
|
67 |
-
-> Do not answer the user question directly. Your job is to provide the decomposed queries in the format shown in the examples.
|
68 |
-
'''
|
69 |
-
|
70 |
-
rag_prompt = PromptTemplate.from_template(prompt)
|
71 |
-
l = (rag_prompt | llm | StrOutputParser())
|
72 |
-
stream = l.invoke({"user":user_prompt})
|
73 |
-
return stream
|
74 |
-
|
75 |
-
def multiple_contexts(self,user_prompt):
|
76 |
-
questions = self.filters
|
77 |
-
contexts = []
|
78 |
-
for i in questions:
|
79 |
-
contexts+=self.filter_multiple(user_prompt,i,18)
|
80 |
-
print(len(contexts))
|
81 |
-
return contexts
|
82 |
-
|
83 |
-
def filter_multiple(self,query,mapper,k1=10):
|
84 |
retriever1 = self.vector_store.as_retriever(
|
85 |
search_type="similarity_score_threshold",
|
86 |
-
search_kwargs={"k":
|
87 |
-
'score_threshold':0.
|
88 |
-
'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=mapper),)])
|
89 |
-
},
|
90 |
-
)
|
91 |
-
ret = retriever1.invoke(query)
|
92 |
-
return ret
|
93 |
-
|
94 |
-
def filter(self,query,k1=10,k2=17):
|
95 |
-
retriever1 = self.vector_store.as_retriever(
|
96 |
-
search_type="mmr",
|
97 |
-
search_kwargs={"k": k1,
|
98 |
'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
|
99 |
},
|
100 |
)
|
101 |
retriever2 = self.vector_store.as_retriever(
|
102 |
-
search_type="
|
103 |
-
search_kwargs={"k":
|
|
|
104 |
'filter':models.Filter(must_not=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
|
105 |
},
|
106 |
)
|
@@ -119,8 +64,7 @@ class Retriever():
|
|
119 |
return ret
|
120 |
|
121 |
def data_retrieve(self, query=''):
|
122 |
-
retrieved_docs = self.vector_store.similarity_search_with_score(query, k=
|
123 |
return [doc for doc, _ in retrieved_docs]
|
124 |
|
125 |
-
|
126 |
-
# print(ret.multiple_contexts("i'm purchasing a car for 5Lack, what type of taxes should I pay and how much?"))
|
|
|
3 |
from qdrant_client import QdrantClient
|
4 |
from langchain_qdrant import QdrantVectorStore
|
5 |
from qdrant_client.http import models
|
|
|
|
|
|
|
6 |
|
7 |
from dotenv import load_dotenv
|
8 |
|
|
|
33 |
'Indirect Tax Laws',
|
34 |
'INDIAN Income Tax ACTS',
|
35 |
'ONLINESITES']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
+
def filter(self,query):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
retriever1 = self.vector_store.as_retriever(
|
39 |
search_type="similarity_score_threshold",
|
40 |
+
search_kwargs={"k": 7,
|
41 |
+
'score_threshold':0.7,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
|
43 |
},
|
44 |
)
|
45 |
retriever2 = self.vector_store.as_retriever(
|
46 |
+
search_type="similarity_score_threshold",
|
47 |
+
search_kwargs={"k": 17,
|
48 |
+
'score_threshold':0.7,
|
49 |
'filter':models.Filter(must_not=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
|
50 |
},
|
51 |
)
|
|
|
64 |
return ret
|
65 |
|
66 |
def data_retrieve(self, query=''):
|
67 |
+
retrieved_docs = self.vector_store.similarity_search_with_score(query, k=20)
|
68 |
return [doc for doc, _ in retrieved_docs]
|
69 |
|
70 |
+
|
|
setup.py
CHANGED
@@ -25,9 +25,8 @@ load_dotenv('.env')
|
|
25 |
class Script():
|
26 |
def __init__(self):
|
27 |
self.retriever = Retriever()
|
28 |
-
self.openai_client = ChatOpenAI(model="gpt-4o
|
29 |
self.groq = ChatGroq(model='llama3-70b-8192')
|
30 |
-
self.groq1 = ChatGroq(model='llama3-8b-8192')
|
31 |
|
32 |
|
33 |
def format_docs(self,format_results,id=False):
|
@@ -50,45 +49,40 @@ class Script():
|
|
50 |
|
51 |
def gpt_loaders(self,query:str,history:str):
|
52 |
template= f"""
|
53 |
-
# You are an excellent Question & Answering BOT based on
|
54 |
-
#
|
55 |
-
#
|
56 |
===============================
|
57 |
#USER_QUERY : {{question}}
|
58 |
===============================
|
59 |
-
#METADATA_OF_CONTEXT :
|
60 |
-
|
|
|
|
|
61 |
#CONTEXT : {{context}}
|
62 |
===============================
|
63 |
-
You are also given previous ChatHistories (User question and
|
64 |
-
--# When to take the history as CONTEXT: Only if the history is relevant to the current question
|
65 |
-
--# If it is not relevant to the current question
|
66 |
#Chat History : {{history}}
|
67 |
===============================
|
|
|
68 |
-> Don't provide your own answer that is not in the given context.
|
69 |
-
-> If you
|
70 |
-
-> Try to provide a proper output for the question. Don't explain any questions too lengthy max[100 words].
|
|
|
71 |
===============================
|
72 |
# OUTPUT FORMAT:
|
73 |
-
|
74 |
-
|
75 |
-
# STEP 1 : Generate a output for the query from the context:
|
76 |
-
# STEP 2 : -> Based on the current output check if it is relevant to the question again.
|
77 |
-
-> If you are not 100% able to answer the given question from the context => PROVIDE "Sorry! Unable to find an answer for your question. Try Again."
|
78 |
-
|
79 |
"""
|
80 |
-
# template = f"""ANSWER THE USER QUESTION BASED ON THE GIVEN CONTEXT ALONE.
|
81 |
-
# UESR QUESTION : {{question}}
|
82 |
-
# CONTEXT : {{context}}
|
83 |
-
# {{history}}
|
84 |
-
# """
|
85 |
rag_prompt = PromptTemplate.from_template(template)
|
86 |
rag_chain = (
|
87 |
rag_prompt
|
88 |
| self.openai_client
|
89 |
| StrOutputParser()
|
90 |
)
|
91 |
-
question ={"context": self.format_docs(self.retriever.
|
92 |
return rag_chain,question
|
93 |
|
94 |
def gpt_loaders_id(self,query:str,history:str,id:str):
|
|
|
25 |
class Script():
|
26 |
def __init__(self):
|
27 |
self.retriever = Retriever()
|
28 |
+
self.openai_client = ChatOpenAI(model="gpt-4o")
|
29 |
self.groq = ChatGroq(model='llama3-70b-8192')
|
|
|
30 |
|
31 |
|
32 |
def format_docs(self,format_results,id=False):
|
|
|
49 |
|
50 |
def gpt_loaders(self,query:str,history:str):
|
51 |
template= f"""
|
52 |
+
# You are an excellent Question & Answering BOT. Given a question and the context you will answer the question only based on the given context.
|
53 |
+
# You will be given a user_query (or) User_question (or) User_scenario.
|
54 |
+
# TASK: Your task is to provide an Answer to the USER_QUERY with the given CONTEXT_DATA.
|
55 |
===============================
|
56 |
#USER_QUERY : {{question}}
|
57 |
===============================
|
58 |
+
#METADATA_OF_CONTEXT : -> The context given is related to INDIAN-TAXATIONS.
|
59 |
+
-> It may contain how to calculate tax for GOODS/SERVICES/INDIVIDUAL/CARS/TRAINS/etc anything related to INDIAN TAXES.
|
60 |
+
-> Based on the user_query use the context accordingly.
|
61 |
+
-> You can also provide a rough calculation for an example if asked for tax calculations related from the CONTEXT (if it is available in the CONTEXT).
|
62 |
#CONTEXT : {{context}}
|
63 |
===============================
|
64 |
+
You are also given previous ChatHistories (User question and corressponding AI answer) to you as an extra data.
|
65 |
+
--# When to take the history as CONTEXT : Only if the history is relevant to the current question you are permitted to take the chat history as a context.
|
66 |
+
--# If it is not relevant to the current question do not take it.
|
67 |
#Chat History : {{history}}
|
68 |
===============================
|
69 |
+
-> You are allowed to provide the answer only from the given context.
|
70 |
-> Don't provide your own answer that is not in the given context.
|
71 |
+
-> If you are not able to answer the given question from the context => PROVIDE "Sorry! Unable to find an answer for your question. Try Again."
|
72 |
+
-> Try to be a precise and provide a proper output for the question. Don't explain any questions too lengthy max[100 words].
|
73 |
+
-> Provide answer only to the question that is asked.
|
74 |
===============================
|
75 |
# OUTPUT FORMAT:
|
76 |
+
-> Your output may be given to a voice model for a speech output. Try to be precise with your words. At the same time, fill the user with your answer
|
77 |
+
-> Don't provide any etc explanation apart from the answer output.
|
|
|
|
|
|
|
|
|
78 |
"""
|
|
|
|
|
|
|
|
|
|
|
79 |
rag_prompt = PromptTemplate.from_template(template)
|
80 |
rag_chain = (
|
81 |
rag_prompt
|
82 |
| self.openai_client
|
83 |
| StrOutputParser()
|
84 |
)
|
85 |
+
question ={"context": self.format_docs(self.retriever.data_retrieve(query)), "question": query, "history": history}
|
86 |
return rag_chain,question
|
87 |
|
88 |
def gpt_loaders_id(self,query:str,history:str,id:str):
|