from llama_index import GPTPineconeIndex, LLMPredictor, ServiceContext import pinecone from langchain import OpenAI import os from llama_index.langchain_helpers.agents import IndexToolConfig, LlamaIndexTool, LlamaToolkit, create_llama_chat_agent from langchain.chains.conversation.memory import ConversationBufferMemory from llama_index import QuestionAnswerPrompt # logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) # logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) pinecone_key=os.environ['PINECONE_KEY'] def askQuestion(brain, question, prompt, temperature, maxTokens): temperature = float(temperature) finalQuestion = prompt+question print(finalQuestion) print(temperature, maxTokens) #print(type(temperature)) #print(type(maxTokens)) Brain_Name = brain.lower() print(Brain_Name) pinecone.init(api_key=pinecone_key, environment="us-west4-gcp") pineconeindex = pinecone.Index(Brain_Name) pineconeindex.describe_index_stats index = GPTPineconeIndex([], pinecone_index=pineconeindex) # index = GPTSimpleVectorIndex.load_from_disk('index.json') # For Q-A set this value to 4, For Content-Genration set this value b/w 7-10. data_chunks = 5 QA_PROMPT_TMPL = ( "We have provided context information below. \n" "---------------------\n" "{context_str}" "\n---------------------\n" "Given this information, please answer the question at the end of this main prompt: "+prompt+" {query_str}\n" ) QA_PROMPT = QuestionAnswerPrompt(QA_PROMPT_TMPL) query = question # relevant info from brain goes here info = ["pdf"] llm_predictor = LLMPredictor(llm=OpenAI( temperature=temperature, model_name="text-davinci-003", max_tokens=maxTokens)) service_context_gpt4 = ServiceContext.from_defaults( llm_predictor=llm_predictor) response = index.query(query, service_context=service_context_gpt4, similarity_top_k=data_chunks, response_mode="compact",text_qa_template=QA_PROMPT) print(question) print(response) if(response.response==None): return response,False memory = ConversationBufferMemory(memory_key="chat_history") memory.chat_memory.add_user_message(question) memory.chat_memory.add_ai_message(response.response) return response, memory def getBrains(name): pinecone.init(api_key=pinecone_key, environment="us-west4-gcp") active_indexes = pinecone.list_indexes() print(active_indexes) name = name.lower() if name in active_indexes: return True else: return False def runAgent(brainName,memory, question, temperature, maxTokens): if (memory == False): return "Please Initiate the Chat first.." temperature = float(temperature) pinecone.init(api_key=pinecone_key, environment="us-west4-gcp") pineconeindex = pinecone.Index(brainName) index = GPTPineconeIndex([], pinecone_index=pineconeindex) # memory = ConversationBufferMemory(memory_key="chat_history") print(memory.chat_memory) llm = OpenAI( temperature=temperature, model_name="text-davinci-003", max_tokens=maxTokens) tool_config = IndexToolConfig( index=index, name="Vector Index", description="Use this tool if you can't find the required Information in the previous message history", index_query_kwargs={"similarity_top_k": 4, "response_mode": "compact"}, tool_kwargs={"return_direct": True} ) toolkit = LlamaToolkit(index_configs=[tool_config]) agent_chain = create_llama_chat_agent( toolkit, llm, memory=memory, verbose=True) response = agent_chain.run(question) print(memory.chat_memory) return response, memory