chatgpt-for-pdfs-without-chat-history

Runtime error

File size: 6,348 Bytes

2b0a298
 
 
7aaadbf
2b0a298
 
 
 
 
92a96a7
2b0a298
5ac4db8
 
65e2d88
2b0a298
 
 
 
 
 
 
 
 
 
 
 
e977a64
2b0a298
5ac4db8
 
 
 
 
 
5f4364a
5ac4db8
 
c2c3e9b
65e2d88
2b0a298
 
 
65e2d88
2b0a298
fe57073
a4ce5ba
fe57073
 
 
 
 
 
e7cbe60
 
 
 
 
a4ce5ba
fe57073
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e7cbe60
 
a4ce5ba
c76b601
 
 
a4ce5ba
01c0018
fe57073
 
a4ce5ba
fe57073
947e828
ad584a6
947e828
ad584a6
947e828
00e9bd9
ad584a6
947e828
 
fe57073
 
 
65e2d88
1d13553
 
5f4364a
472ec1c
 
06a82b5
44ffff3
 
 
 
 
 
 
868f6c9
44ffff3
618da7e
44ffff3
 
 
06a82b5
9121213
 
 
06a82b5
fdb11b8
874e789
449a709
 
 
7aaadbf
868f6c9
fe57073
 
 
7aaadbf
fe57073
06a82b5
449a709
472ec1c
7aaadbf
fdb11b8
7823c55
65e2d88
fe57073
 
fdb11b8
4aabad0
7823c55
2b0a298
fdb11b8
65e2d88

import gradio as gr
import os
import time
import pandas as pd


from langchain.document_loaders import OnlinePDFLoader #for laoding the pdf
from langchain.embeddings import OpenAIEmbeddings # for creating embeddings
from langchain.vectorstores import Chroma # for the vectorization part
from langchain.chains import RetrievalQA # for conversing with chatGPT
from langchain.chat_models import ChatOpenAI # the LLM model we'll use (ChatGPT)
from langchain import PromptTemplate

def load_pdf_and_generate_embeddings(pdf_doc, open_ai_key):
    if openai_key is not None:
        os.environ['OPENAI_API_KEY'] = open_ai_key
        #Load the pdf file
        loader = OnlinePDFLoader(pdf_doc.name)
        pages = loader.load_and_split()
        
        #Create an instance of OpenAIEmbeddings, which is responsible for generating embeddings for text
        embeddings = OpenAIEmbeddings()

        #To create a vector store, we use the Chroma class, which takes the documents (pages in our case), the embeddings instance, and a directory to store the vector data
        vectordb = Chroma.from_documents(pages, embedding=embeddings)
        
        #Finally, we create the bot using the RetrievalQAChain class
        global pdf_qa

        prompt_template = """Use the following pieces of context to answer the question at the end. If you do not know the answer, just return the question followed by N/A. If you encounter a date, return it in mm/dd/yyyy format.

        {context}

        Question: {question}
        Return the key fields from the question followed by : and the answer :"""
        PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
        chain_type_kwargs = {"prompt": PROMPT}
        pdf_qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(temperature=0, model_name="gpt-4"),chain_type="stuff", retriever=vectordb.as_retriever(search_kwargs={"k": 1}), chain_type_kwargs=chain_type_kwargs, return_source_documents=False)
                
        return "Ready"
    else:
        return "Please provide an OpenAI API key"
        

def answer_predefined_questions(document_type):
    
    if document_type == "Deed of Trust":
        #Create a list of questions around the relevant fields of a Deed of Trust(DOT) document
        query0 = "what is the Lender's Name?"
        field0 = "Lender"
        query1 = "what is the Loan Number?"
        field1 = "Loan Number"
        
        queryList = [query0, query1]
        fieldList= [field0, field1]


        
        
    elif document_type == "Transmittal Summary":
        #Create a list of questions around the relevant fields of a TRANSMITTAL SUMMARY document
        queryA0 = "who is the Borrower?"
        fieldA0 = "Borrower"
        queryA1 = "what is the Property Address?"
        fieldA1 = "Property Address"
        queryA2 = "who is the Co-Borrower?"
        fieldA2 = "Co-Borrower"
        queryA3 = "what is the loan term?"
        fieldA3 = "Loan Term"
        queryA4 = "What is the base income?"
        fieldA4 = "Base Income"
        queryA5 = "what is the original loan amount?"
        fieldA5 = "Original Loan Amount"
        queryA6 = "what is the Initial P&I Payment?"
        fieldA6 = "Initial P&I Payment"
        queryA7 = "what is the borrower's SSN?"
        fieldA7 = "Borrower SSN"
        queryA8 = "what is the co-borrower's SSN?"
        fieldA8 = "C0-Borrower SSN"
        queryA9 = "Number of units?"
        fieldA9 = "Number of units"
        queryA10 = "who is the seller?"
        fieldA10 = "Seller"
        queryA11 = "Document signed date?"
        fieldA11 = "Singed Date"
        queryList = [queryA0, queryA1]
        fieldList = [fieldA0, fieldA1]

    else:
        return "Please choose your Document Type"

        
    response=""
    i = 0
    while i < len(queryList):
        question = queryList[i]
        field = fieldList[i]
        
        fieldInfo = "Field Name:"+ field
        response += fieldInfo
        questionInfo = "; Question sent to gpt-4: "+ question
        response += questionInfo
        answer = pdf_qa.run(question)
        gptResponse = "; Response from gpt-4:"+ answer
        response += gptResponse
        
    return response
        

    
def answer_query(query):
    question = query
    response = "Field Name: Location; Question sent to gpt-4: ", question, "Response from gpt-4:",pdf_qa.run(question)
    #return response
    return pd.DataFrame({"Field": ['Location'], "Question": ['what is the location'], "Answer": ['Coppell,TX']})

css="""
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
"""

title = """
<div style="text-align: center;max-width: 700px;">
    <h1>Chatbot for PDFs - GPT-4</h1>
    <p style="text-align: center;">Upload a .PDF, click the "Load PDF" button, <br />
    Wait for the Status to show Ready, start typing your questions. <br />
    The app is built on GPT-4 and leverages PromptTemplate</p>
</div>
"""

with gr.Blocks(css=css,theme=gr.themes.Monochrome()) as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML(title)
    
    with gr.Column():
        openai_key = gr.Textbox(label="Your GPT-4 OpenAI API key", type="password")
        pdf_doc = gr.File(label="Load a pdf",file_types=['.pdf'],type='file')
        
        with gr.Row():
            status = gr.Textbox(label="Status", placeholder="", interactive=False)
            load_pdf = gr.Button("Load PDF").style(full_width=False)
            
        with gr.Row():
            document_type = gr.Radio(['Deed of Trust', 'Transmittal Summary'], label="Select the Document Type")
            answers = gr.Textbox(label="Answers to Predefined Question set")
            answers_for_predefined_question_set = gr.Button("Get Answers to Pre-defined Question set").style(full_width=False)
              
        with gr.Row():
            input = gr.Textbox(label="Type in your question")
            output = gr.Dataframe(label="Answer")
            submit_query = gr.Button("Submit your own question").style(full_width=False)
        
 
    load_pdf.click(load_pdf_and_generate_embeddings, inputs=[pdf_doc, openai_key], outputs=status)

    answers_for_predefined_question_set.click(answer_predefined_questions, document_type, answers)
        
    submit_query.click(answer_query,input,output)


demo.launch()