import argparse # from dataclasses import dataclass from langchain_chroma import Chroma from langchain_huggingface import HuggingFaceEmbeddings from langchain_huggingface import HuggingFaceEndpoint from langchain.prompts import ChatPromptTemplate from langchain.chains import LLMChain from langchain_core.prompts import PromptTemplate import os CHROMA_PATH = "chroma" PROMPT_TEMPLATE = """ Answer the question based only on the following context: {context} --- Answer the question based on the above context: {question} """ def query_data(query_text): # Prepare the DB. embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function) # Search the DB. results = db.similarity_search_with_relevance_scores(query_text, k=3) if len(results) == 0 or results[0][1] < 0.2: print(f"Unable to find matching results.") return context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results]) prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE) repo_id = "HuggingFaceH4/zephyr-7b-beta" llm = HuggingFaceEndpoint( repo_id=repo_id, max_length = 512, temperature=0.5, huggingfacehub_api_token=os.environ['HF_TOKEN'], ) llm_chain = prompt_template | llm response_text = llm_chain.invoke({"question": query_text, "context":context_text}) sources = [doc.metadata.get("source", None) for doc, _score in results] formatted_response = f"{response_text}\nSources: {sources}" return formatted_response