from llama_index import GPTPineconeIndex, LLMPredictor, ServiceContext
import pinecone
from langchain import OpenAI
import os
from llama_index.langchain_helpers.agents import IndexToolConfig, LlamaIndexTool, LlamaToolkit, create_llama_chat_agent
from langchain.chains.conversation.memory import ConversationBufferMemory
from llama_index import QuestionAnswerPrompt


# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
pinecone_key=os.environ['PINECONE_KEY']

def askQuestion(brain, question, prompt, temperature, maxTokens):
    temperature = float(temperature)
    finalQuestion = prompt+question
    print(finalQuestion)
    print(temperature, maxTokens)
    #print(type(temperature))
    #print(type(maxTokens))
    Brain_Name = brain.lower()
    print(Brain_Name)
    pinecone.init(api_key=pinecone_key,
                  environment="us-west4-gcp")
    pineconeindex = pinecone.Index(Brain_Name)
    pineconeindex.describe_index_stats
    index = GPTPineconeIndex([], pinecone_index=pineconeindex)
    # index = GPTSimpleVectorIndex.load_from_disk('index.json')

    # For Q-A set this value to 4, For Content-Genration set this value b/w 7-10.
    data_chunks = 5

    QA_PROMPT_TMPL = (
    "We have provided context information below. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Given this information, please answer the question at the end of this main prompt: "+prompt+" {query_str}\n"
    )

    QA_PROMPT = QuestionAnswerPrompt(QA_PROMPT_TMPL)
    
    query = question
    # relevant info from brain goes here
    info = ["pdf"]

    llm_predictor = LLMPredictor(llm=OpenAI(
        temperature=temperature, model_name="text-davinci-003", max_tokens=maxTokens))

    service_context_gpt4 = ServiceContext.from_defaults(
        llm_predictor=llm_predictor)

    response = index.query(query, service_context=service_context_gpt4,
                           similarity_top_k=data_chunks, response_mode="compact",text_qa_template=QA_PROMPT)
    print(question)
    print(response)
    if(response.response==None):
        return response,False
    memory = ConversationBufferMemory(memory_key="chat_history")
    memory.chat_memory.add_user_message(question)
    memory.chat_memory.add_ai_message(response.response)
    return response, memory


def getBrains(name):
    pinecone.init(api_key=pinecone_key,
                  environment="us-west4-gcp")
    active_indexes = pinecone.list_indexes()
    print(active_indexes)
    name = name.lower()
    if name in active_indexes:
        return True
    else:
        return False


def runAgent(brainName,memory, question, temperature, maxTokens):
    if (memory == False):
        return "Please Initiate the Chat first.."
    temperature = float(temperature)
    pinecone.init(api_key=pinecone_key,
                  environment="us-west4-gcp")
    pineconeindex = pinecone.Index(brainName)
    index = GPTPineconeIndex([], pinecone_index=pineconeindex)
    # memory = ConversationBufferMemory(memory_key="chat_history")
    print(memory.chat_memory)
    llm = OpenAI(
        temperature=temperature, model_name="text-davinci-003", max_tokens=maxTokens)
    tool_config = IndexToolConfig(
        index=index,
        name="Vector Index",
        description="Use this tool if you can't find the required Information in the previous message history",
        index_query_kwargs={"similarity_top_k": 4, "response_mode": "compact"},
        tool_kwargs={"return_direct": True}
    )

    toolkit = LlamaToolkit(index_configs=[tool_config])

    agent_chain = create_llama_chat_agent(
        toolkit, llm, memory=memory, verbose=True)
    response = agent_chain.run(question)
    print(memory.chat_memory)
    return response, memory