Spaces:
Running
Running
from src.helper import download_hugging_face_embeddings | |
from langchain_pinecone import PineconeVectorStore | |
from langchain.prompts import PromptTemplate | |
from langchain_community.llms import CTransformers | |
from langchain.chains import RetrievalQA | |
from dotenv import load_dotenv | |
from prompt import prompt_template | |
import os | |
load_dotenv() | |
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY") | |
PINECONE_API_ENV = os.environ.get("PINECONE_API_ENV") | |
embeddings = download_hugging_face_embeddings() | |
index_name = "llm-chatbot" | |
# Initializing the Pinecone | |
docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings) | |
PROMPT = PromptTemplate( | |
template=prompt_template, input_variables=["context", "question"] | |
) | |
chain_type_kwargs = {"prompt": PROMPT} | |
current_dir = os.getcwd() | |
llm = CTransformers( | |
model=os.path.join(current_dir, "saved_models/llama-2-7b-chat.ggmlv3.q4_0.bin"), | |
model_type="llama", | |
streaming=True, | |
config={"max_new_tokens": 256, "temperature": 0.6, "context_length": -1}, | |
) | |
qa = RetrievalQA.from_chain_type( | |
llm=llm, | |
chain_type="stuff", | |
retriever=docsearch.as_retriever(search_kwargs={"k": 2}), | |
return_source_documents=True, | |
chain_type_kwargs=chain_type_kwargs, | |
verbose=True, | |
) | |
def llama_call(input): | |
result = qa.invoke({"query": input}) | |
return str(result["result"]) | |