Spaces:

edithram23
/

Chatbot

Runtime error

App Files Files Community

Chatbot / retriever.py

edithram23

initial commit

e900f80 5 months ago

raw

history blame

6.24 kB

	import os
	from langchain_openai import OpenAIEmbeddings
	from qdrant_client import QdrantClient
	from langchain_qdrant import QdrantVectorStore
	from qdrant_client.http import models
	from langchain_groq import ChatGroq
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.prompts import PromptTemplate

	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv('.env')

	class Retriever():
	def __init__(self):
	# Initialize Qdrant client
	qdrant_client = QdrantClient(
	url=os.getenv("QDRANT_URL"),
	api_key=os.getenv("QDRANT_API_KEY")
	)
	# Initialize Qdrant vector store
	self.vector_store = QdrantVectorStore(
	client=qdrant_client,
	collection_name="siel-ai-assignment",
	embedding=OpenAIEmbeddings(),
	)
	self.vector_store_user = QdrantVectorStore(
	client=qdrant_client,
	collection_name="siel-ai-user",
	embedding=OpenAIEmbeddings(),
	)
	self.filters = ['Taxation-Goods-and-service-Tax',
	'Taxation-INCOME-TAX-LAW',
	'Direct Tax Laws and International Taxation',
	'Indirect Tax Laws',
	'INDIAN Income Tax ACTS',
	'ONLINESITES']
	self.groq = ChatGroq(model='llama3-70b-8192')



	def multi_questions(self,user_prompt):
	llm = self.groq
	prompt = f'''
	# You are an excellent Query Decomposer for database retrieval optimization.
	# You are given a user_query.
	===============================
	# TASK:
	-> Your task is to provide a structured and hierarchical breakdown of the user query.
	-> This breakdown should be in the form of an ordered sequence that helps in extracting the right context from the database.
	-> Build the user query from the bottom level (basic requirements) to the top level (more specific details), ensuring the retrieval context improves at each level.
	===============================
	# USER_QUERY: {{user}}
	===============================
	# EXAMPLE:
	1. #USER_QUERY: "For 5 lakh, what type of taxes should I pay and how much?"
	-> #EXPECTED OUTPUT: \| I'm purchasing a car for 5 lakh. \| What type of taxes should I pay on the purchase of automobiles? \| What type of taxes should I pay on the purchase of a car for 5 lakh? \|

	2. #USER_QUERY: "For 5 lakh, what type of taxes should I pay and how much?"
	-> #EXPECTED OUTPUT: \| NEW TAX REGIME and Income tax. \| My income is 5 lakh. What type of taxes should I pay and how much should I pay? \|

	===============================
	# OUTPUT FORMAT:
	-> Provide the formatted output separated with the pipe '\|' enclosed as: \|...\|...\|
	-> Stick to the given format without any additional explanation. Your only response must be the formatted sequence of queries.
	-> Do not answer the user question directly. Your job is to provide the decomposed queries in the format shown in the examples.
	'''

	rag_prompt = PromptTemplate.from_template(prompt)
	l = (rag_prompt \| llm \| StrOutputParser())
	stream = l.invoke({"user":user_prompt})
	return stream

	def multiple_contexts(self,user_prompt):
	questions = self.filters
	contexts = []
	for i in questions:
	contexts+=self.filter_multiple(user_prompt,i,18)
	print(len(contexts))
	return contexts

	def filter_multiple(self,query,mapper,k1=10):
	retriever1 = self.vector_store.as_retriever(
	search_type="similarity_score_threshold",
	search_kwargs={"k": k1,
	'score_threshold':0.75,
	'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=mapper),)])
	},
	)
	ret = retriever1.invoke(query)
	return ret

	def filter(self,query,k1=10,k2=17):
	retriever1 = self.vector_store.as_retriever(
	search_type="mmr",
	search_kwargs={"k": k1,
	'filter':models.Filter(must=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
	},
	)
	retriever2 = self.vector_store.as_retriever(
	search_type="mmr",
	search_kwargs={"k": k2,
	'filter':models.Filter(must_not=[models.FieldCondition(key="metadata.DOCUMENT_IS_ABOUT", match=models.MatchValue(value=self.filters[-1]),)])
	},
	)
	ret = retriever1.invoke(query)+retriever2.invoke(query)
	return ret

	def id_filter(self,query,id):
	retriever1 = self.vector_store_user.as_retriever(
	search_type="similarity_score_threshold",
	search_kwargs={"k": 10,
	'score_threshold':0.7,
	'filter':models.Filter(must=[models.FieldCondition(key="metadata.ID", match=models.MatchValue(value=id),)])
	}
	)
	ret = retriever1.invoke(query)
	return ret

	def data_retrieve(self, query=''):
	retrieved_docs = self.vector_store.similarity_search_with_score(query, k=10)
	return [doc for doc, _ in retrieved_docs]

	# ret = Retriever()
	# print(ret.multiple_contexts("i'm purchasing a car for 5Lack, what type of taxes should I pay and how much?"))