Spaces:
Runtime error
Runtime error
File size: 8,261 Bytes
2b0a298 7aaadbf 2b0a298 92a96a7 2b0a298 5ac4db8 172abd0 2b0a298 f47c9bf 172abd0 f47c9bf 67fa155 5900010 172abd0 5900010 f47c9bf 0c53820 f47c9bf 2b0a298 c28a184 2b0a298 5ac4db8 c228e38 5ac4db8 c228e38 5ac4db8 66df6a8 65e2d88 2b0a298 66df6a8 65e2d88 2b0a298 fe57073 a4ce5ba fe57073 e12fbca 551a0c0 b68b73b 551a0c0 b68b73b e12fbca fe57073 e12fbca 551a0c0 66df6a8 551a0c0 66df6a8 551a0c0 66df6a8 e12fbca a4ce5ba c76b601 e12fbca 551a0c0 fe57073 a4ce5ba 7800652 551a0c0 e12fbca fe57073 65e2d88 1d13553 551a0c0 06a82b5 44ffff3 66df6a8 618da7e 44ffff3 06a82b5 9121213 06a82b5 fdb11b8 874e789 f6709bf 449a709 37b4a4e 868f6c9 fe57073 551a0c0 6755c1e fe57073 06a82b5 449a709 551a0c0 c575982 fdb11b8 7823c55 c28a184 fe57073 fdb11b8 4aabad0 7823c55 2b0a298 fdb11b8 65e2d88 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
import gradio as gr
import os
import time
import pandas as pd
from langchain.document_loaders import OnlinePDFLoader #for laoding the pdf
from langchain.embeddings import OpenAIEmbeddings # for creating embeddings
from langchain.vectorstores import Chroma # for the vectorization part
from langchain.chains import RetrievalQA # for conversing with chatGPT
from langchain.chat_models import ChatOpenAI # the LLM model we'll use (ChatGPT)
from langchain import PromptTemplate
def load_pdf_and_generate_embeddings(pdf_doc, open_ai_key, relevant_pages):
if openai_key is not None:
os.environ['OPENAI_API_KEY'] = open_ai_key
#Load the pdf file
loader = OnlinePDFLoader(pdf_doc.name)
pages = loader.load_and_split()
#Create an instance of OpenAIEmbeddings, which is responsible for generating embeddings for text
embeddings = OpenAIEmbeddings()
pages_to_be_loaded =[]
if relevant_pages:
page_numbers = relevant_pages.split(",")
if len(page_numbers) != 0:
for page_number in page_numbers:
if page_number.isdigit():
pageIndex = int(page_number)-1
if pageIndex >=0 and pageIndex <len(pages):
pages_to_be_loaded.append(pages[pageIndex])
#In the scenario where none of the page numbers supplied exist in the PDF, we will revert to using the entire PDF.
if len(pages_to_be_loaded) ==0:
pages_to_be_loaded = pages.copy()
#To create a vector store, we use the Chroma class, which takes the documents (pages in our case) and the embeddings instance
vectordb = Chroma.from_documents(pages_to_be_loaded, embedding=embeddings)
#Finally, we create the bot using the RetrievalQA class
global pdf_qa
prompt_template = """Use the following pieces of context to answer the question at the end. If you do not know the answer, just return N/A. If you encounter a date, return it in mm/dd/yyyy format.
{context}
Question: {question}
Return just the answer :"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt": PROMPT}
pdf_qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(temperature=0, model_name="gpt-4"),chain_type="stuff", retriever=vectordb.as_retriever(search_kwargs={"k": 5}), chain_type_kwargs=chain_type_kwargs, return_source_documents=False)
return "Ready"
else:
return "Please provide an OpenAI gpt-4 API key"
def answer_predefined_questions(document_type):
if document_type == "Deed of Trust":
#Create a list of questions around the relevant fields of a Deed of Trust(DOT) document
query1 = "what is the Loan Number?"
field1 = "Loan Number"
query2 = "Who is the Borrower?"
field2 = "Borrower"
query3 = "what is the Case Number?"
field3 = "Case Number"
query4 = "what is the Mortgage Identification number?"
field4 = "MIN Number"
query5 = "DOT signed date?"
field5 = "Signed Date"
query6 = "Who is the Lender?"
field6 = "Lender"
query7 = "what is the VA/FHA Number?"
field7 = "VA/FHA Number"
query8 = "Who is the Co-Borrower?"
field8 = "Co-Borrower"
query9 = "What is the property type - single family, multi family?"
field9 = "Property Type"
query10 = "what is the Property Address?"
field10 = "Property Address"
query11 = "In what County is the property located?"
field11 = "Property County"
query12 = "what is the Electronically recorded date"
field12 = "Electronic Recording Date"
elif document_type == "Transmittal Summary":
#Create a list of questions around the relevant fields of a TRANSMITTAL SUMMARY document
query1 = "Who is the Borrower?"
field1 = "Borrower"
query2 = "what is the Property Address?"
field2 = "Property Address"
query3 = "what is the Loan Term?"
field3 = "Loan Term"
query4 = "What is the Base Income?"
field4 = "Base Income"
query5 = "what is the Borrower's SSN?"
field5 = "Borrower's SSN"
query6 = "Who is the Co-Borrower?"
field6 = "Co-Borrower"
query7 = "What is the Original Loan Amount?"
field7 = "Original Loan Amount"
query8 = "What is the Initial P&I payment?"
field8 = "Initial P&I payment"
query9 = "What is the Co-Borrower's SSN?"
field9 = "Co-Borrower’s SSN"
query10 = "Number of units?"
field10 = "Units#"
query11 = "Who is the Seller?"
field11 = "Seller"
query12 = "Document signed date?"
field12 = "Signed Date"
else:
return "Please choose your Document Type"
queryList = [query1, query2, query3, query4, query5, query6, query7, query8, query9, query10, query11,query12]
fieldList = [field1, field2, field3, field4, field5, field6, field7, field8, field9, field10, field11,field12]
responseList =[]
i = 0
while i < len(queryList):
question = queryList[i]
responseList.append(pdf_qa.run(question))
i = i+1
return pd.DataFrame({"Field": [fieldList[0],fieldList[1],fieldList[2],fieldList[3],fieldList[4],fieldList[5],fieldList[6],fieldList[7],fieldList[8],fieldList[9],fieldList[10],fieldList[11]],
"Question to gpt-4": [queryList[0],queryList[1],queryList[2],queryList[3],queryList[4],queryList[5],queryList[6],queryList[7],queryList[8],queryList[9],queryList[10],queryList[11]],
"Response from gpt-4": [responseList[0],responseList[1],responseList[2],responseList[3],responseList[4],responseList[5],responseList[6],responseList[7],responseList[8],responseList[9],responseList[10],responseList[11]]})
def answer_query(query):
question = query
return pdf_qa.run(question)
css="""
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
"""
title = """
<div style="text-align: center;max-width: 700px;">
<h1>Chatbot for PDFs - GPT-4</h1>
<p style="text-align: center;">Upload a .PDF, click the "Upload PDF and generate embeddings" button, <br />
Wait for the Status to show Ready. You can chose to get answers to the pre-defined question set OR ask your own question <br />
The app is built on GPT-4 and leverages PromptTemplate</p>
</div>
"""
with gr.Blocks(css=css,theme=gr.themes.Monochrome()) as demo:
with gr.Column(elem_id="col-container"):
gr.HTML(title)
with gr.Column():
openai_key = gr.Textbox(label="Your GPT-4 OpenAI API key", type="password")
pdf_doc = gr.File(label="Load a pdf",file_types=['.pdf'],type='file')
relevant_pages = gr.Textbox(label="*Optional - List comma separated page numbers to load or leave this field blank to use the entire PDF")
with gr.Row():
status = gr.Textbox(label="Status", placeholder="", interactive=False)
load_pdf = gr.Button("Upload PDF and generate embeddings").style(full_width=False)
with gr.Row():
document_type = gr.Radio(['Deed of Trust', 'Transmittal Summary'], label="Select the Document Type")
answers = gr.Dataframe(label="Answers to Predefined Question set")
answers_for_predefined_question_set = gr.Button("Get gpt-4 answers to pre-defined question set").style(full_width=False)
with gr.Row():
input = gr.Textbox(label="Type in your question")
output = gr.Textbox(label="Answer")
submit_query = gr.Button("Submit your own question to gpt-4").style(full_width=False)
load_pdf.click(load_pdf_and_generate_embeddings, inputs=[pdf_doc, openai_key, relevant_pages], outputs=status)
answers_for_predefined_question_set.click(answer_predefined_questions, document_type, answers)
submit_query.click(answer_query,input,output)
demo.launch()
|