Spaces:

izammohammed
/

legal-advisor

Running

legal-advisor / llama_call.py

Update llama_call.py

3a7461f verified 8 months ago

1.35 kB

	from src.helper import download_hugging_face_embeddings
	from langchain_pinecone import PineconeVectorStore
	from langchain.prompts import PromptTemplate
	from langchain_community.llms import CTransformers
	from langchain.chains import RetrievalQA
	from dotenv import load_dotenv
	from prompt import prompt_template
	import os

	load_dotenv()

	PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
	PINECONE_API_ENV = os.environ.get("PINECONE_API_ENV")


	embeddings = download_hugging_face_embeddings()
	index_name = "llm-chatbot"

	# Initializing the Pinecone
	docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)


	PROMPT = PromptTemplate(
	template=prompt_template, input_variables=["context", "question"]
	)

	chain_type_kwargs = {"prompt": PROMPT}

	current_dir = os.getcwd()
	llm = CTransformers(
	model=os.path.join(current_dir, "saved_models/llama-2-7b-chat.ggmlv3.q4_0.bin"),
	model_type="llama",
	streaming=True,
	config={"max_new_tokens": 256, "temperature": 0.6, "context_length": -1},
	)


	qa = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=docsearch.as_retriever(search_kwargs={"k": 2}),
	return_source_documents=True,
	chain_type_kwargs=chain_type_kwargs,
	verbose=True,
	)


	def llama_call(input):
	result = qa.invoke({"query": input})
	return str(result["result"])