Spaces:
Running
Running
File size: 1,328 Bytes
a093d11 aca2a4f 3a7461f aca2a4f ccae188 aca2a4f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
from helper import download_hugging_face_embeddings
from langchain_pinecone import PineconeVectorStore
from langchain.prompts import PromptTemplate
from langchain_community.llms import CTransformers
from langchain.chains import RetrievalQA
from dotenv import load_dotenv
from prompt import prompt_template
import os
load_dotenv()
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
PINECONE_API_ENV = os.environ.get("PINECONE_API_ENV")
embeddings = download_hugging_face_embeddings()
index_name = "llm-chatbot"
# Initializing the Pinecone
docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)
PROMPT = PromptTemplate(
template=prompt_template, input_variables=["context", "question"]
)
chain_type_kwargs = {"prompt": PROMPT}
current_dir = os.getcwd()
def load_llm():
llm = CTransformers(
model="TheBloke/Llama-2-7B-Chat-GGML",
model_type="llama",
max_new_tokens=512,
temperature=0.5
)
return llm
llm = load_llm()
qa = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=docsearch.as_retriever(search_kwargs={"k": 2}),
return_source_documents=True,
chain_type_kwargs=chain_type_kwargs,
verbose=True,
)
def llama_call(input):
result = qa.invoke({"query": input})
return str(result["result"])
|