Spaces:
Paused
Paused
from llama_index import GPTPineconeIndex, LLMPredictor, ServiceContext | |
import pinecone | |
from langchain import OpenAI | |
import os | |
from llama_index.langchain_helpers.agents import IndexToolConfig, LlamaIndexTool, LlamaToolkit, create_llama_chat_agent | |
from langchain.chains.conversation.memory import ConversationBufferMemory | |
from llama_index import QuestionAnswerPrompt | |
# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) | |
# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) | |
pinecone_key=os.environ['PINECONE_KEY'] | |
def askQuestion(brain, question, prompt, temperature, maxTokens): | |
temperature = float(temperature) | |
finalQuestion = prompt+question | |
print(finalQuestion) | |
print(temperature, maxTokens) | |
#print(type(temperature)) | |
#print(type(maxTokens)) | |
Brain_Name = brain.lower() | |
print(Brain_Name) | |
pinecone.init(api_key=pinecone_key, | |
environment="us-west4-gcp") | |
pineconeindex = pinecone.Index(Brain_Name) | |
pineconeindex.describe_index_stats | |
index = GPTPineconeIndex([], pinecone_index=pineconeindex) | |
# index = GPTSimpleVectorIndex.load_from_disk('index.json') | |
# For Q-A set this value to 4, For Content-Genration set this value b/w 7-10. | |
data_chunks = 5 | |
QA_PROMPT_TMPL = ( | |
"We have provided context information below. \n" | |
"---------------------\n" | |
"{context_str}" | |
"\n---------------------\n" | |
"Given this information, please answer the question at the end of this main prompt: "+prompt+" {query_str}\n" | |
) | |
QA_PROMPT = QuestionAnswerPrompt(QA_PROMPT_TMPL) | |
query = question | |
# relevant info from brain goes here | |
info = ["pdf"] | |
llm_predictor = LLMPredictor(llm=OpenAI( | |
temperature=temperature, model_name="text-davinci-003", max_tokens=maxTokens)) | |
service_context_gpt4 = ServiceContext.from_defaults( | |
llm_predictor=llm_predictor) | |
response = index.query(query, service_context=service_context_gpt4, | |
similarity_top_k=data_chunks, response_mode="compact",text_qa_template=QA_PROMPT) | |
print(question) | |
print(response) | |
if(response.response==None): | |
return response,False | |
memory = ConversationBufferMemory(memory_key="chat_history") | |
memory.chat_memory.add_user_message(question) | |
memory.chat_memory.add_ai_message(response.response) | |
return response, memory | |
def getBrains(name): | |
pinecone.init(api_key=pinecone_key, | |
environment="us-west4-gcp") | |
active_indexes = pinecone.list_indexes() | |
print(active_indexes) | |
name = name.lower() | |
if name in active_indexes: | |
return True | |
else: | |
return False | |
def runAgent(brainName,memory, question, temperature, maxTokens): | |
if (memory == False): | |
return "Please Initiate the Chat first.." | |
temperature = float(temperature) | |
pinecone.init(api_key=pinecone_key, | |
environment="us-west4-gcp") | |
pineconeindex = pinecone.Index(brainName) | |
index = GPTPineconeIndex([], pinecone_index=pineconeindex) | |
# memory = ConversationBufferMemory(memory_key="chat_history") | |
print(memory.chat_memory) | |
llm = OpenAI( | |
temperature=temperature, model_name="text-davinci-003", max_tokens=maxTokens) | |
tool_config = IndexToolConfig( | |
index=index, | |
name="Vector Index", | |
description="Use this tool if you can't find the required Information in the previous message history", | |
index_query_kwargs={"similarity_top_k": 4, "response_mode": "compact"}, | |
tool_kwargs={"return_direct": True} | |
) | |
toolkit = LlamaToolkit(index_configs=[tool_config]) | |
agent_chain = create_llama_chat_agent( | |
toolkit, llm, memory=memory, verbose=True) | |
response = agent_chain.run(question) | |
print(memory.chat_memory) | |
return response, memory | |