Spaces:
Runtime error
Runtime error
import os | |
from langchain_openai import OpenAIEmbeddings | |
from qdrant_client import QdrantClient | |
from langchain_qdrant import QdrantVectorStore | |
from qdrant_client.http import models | |
from langchain_groq import ChatGroq | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_core.prompts import PromptTemplate | |
from dotenv import load_dotenv | |
# Load environment variables | |
load_dotenv('.env') | |
class Retriever(): | |
def __init__(self): | |
# Initialize Qdrant client | |
qdrant_client = QdrantClient( | |
url=os.getenv("QDRANT_URL"), | |
api_key=os.getenv("QDRANT_API_KEY") | |
) | |
# Initialize Qdrant vector store | |
self.vector_store = QdrantVectorStore( | |
client=qdrant_client, | |
collection_name="siel-ai-assignment", | |
embedding=OpenAIEmbeddings(), | |
) | |
self.vector_store_user = QdrantVectorStore( | |
client=qdrant_client, | |
collection_name="siel-ai-user", | |
embedding=OpenAIEmbeddings(), | |
) | |
self.filters = ['Taxation-Goods-and-service-Tax', | |
'Taxation-INCOME-TAX-LAW', | |
'Direct Tax Laws and International Taxation', | |
'Indirect Tax Laws', | |
'INDIAN Income Tax ACTS', | |
'ONLINESITES'] | |
self.groq = ChatGroq(model='llama3-70b-8192') | |
def multi_questions(self,user_prompt): | |
llm = self.groq | |
prompt = f''' | |
# You are an excellent Query Decomposer for database retrieval optimization. | |
# You are given a user_query. | |
=============================== | |
# TASK: | |
-> Your task is to provide a structured and hierarchical breakdown of the user query. | |
-> This breakdown should be in the form of an ordered sequence that helps in extracting the right context from the database. | |
-> Build the user query from the bottom level (basic requirements) to the top level (more specific details), ensuring the retrieval context improves at each level. | |
=============================== | |
# USER_QUERY: {{user}} | |
=============================== | |
# EXAMPLE: | |
1. #USER_QUERY: "For 5 lakh, what type of taxes should I pay and how much?" | |
-> #EXPECTED OUTPUT: | I'm purchasing a car for 5 lakh. | What type of taxes should I pay on the purchase of automobiles? | What type of taxes should I pay on the purchase of a car for 5 lakh? | | |
2. #USER_QUERY: "For 5 lakh, what type of taxes should I pay and how much?" | |
-> #EXPECTED OUTPUT: | NEW TAX REGIME and Income tax. | My income is 5 lakh. What type of taxes should I pay and how much should I pay? | | |
=============================== | |
# OUTPUT FORMAT: | |
-> Provide the formatted output separated with the pipe '|' enclosed as: |...|...| | |
-> Stick to the given format without any additional explanation. Your only response must be the formatted sequence of queries. | |
-> Do not answer the user question directly. Your job is to provide the decomposed queries in the format shown in the examples. | |
''' | |
rag_prompt = PromptTemplate.from_template(prompt) | |
l = (rag_prompt | llm | StrOutputParser()) | |
stream = l.invoke({"user":user_prompt}) | |
return stream | |
def multiple_contexts(self,user_prompt): | |
questions = self.filters | |
contexts = [] | |
for i in questions: | |
contexts+=self.filter_multiple(user_prompt,i,18) | |
print(len(contexts)) | |
return contexts | |
def filter_multiple(self,query,mapper,k1=10): | |
retriever1 = self.vector_store.as_retriever( | |
search_type="similarity_score_threshold", | |
search_kwargs={"k": k1, | |
'score_threshold':0.75, | |
'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=mapper),)]) | |
}, | |
) | |
ret = retriever1.invoke(query) | |
return ret | |
def filter(self,query,k1=10,k2=17): | |
retriever1 = self.vector_store.as_retriever( | |
search_type="mmr", | |
search_kwargs={"k": k1, | |
'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)]) | |
}, | |
) | |
retriever2 = self.vector_store.as_retriever( | |
search_type="mmr", | |
search_kwargs={"k": k2, | |
'filter':models.Filter(must_not=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)]) | |
}, | |
) | |
ret = retriever1.invoke(query)+retriever2.invoke(query) | |
return ret | |
def id_filter(self,query,id): | |
retriever1 = self.vector_store_user.as_retriever( | |
search_type="similarity_score_threshold", | |
search_kwargs={"k": 10, | |
'score_threshold':0.7, | |
'filter':models.Filter(must=[models.FieldCondition(key="metadata.ID", match=models.MatchValue(value=id),)]) | |
} | |
) | |
ret = retriever1.invoke(query) | |
return ret | |
def data_retrieve(self, query=''): | |
retrieved_docs = self.vector_store.similarity_search_with_score(query, k=10) | |
return [doc for doc, _ in retrieved_docs] | |
# ret = Retriever() | |
# print(ret.multiple_contexts("i'm purchasing a car for 5Lack, what type of taxes should I pay and how much?")) |