chatgpt-for-pdfs-without-chat-history

Runtime error

File size: 3,236 Bytes

2b0a298
 
 
 
 
 
 
 
92a96a7
2b0a298
5ac4db8
 
 
2b0a298
 
d06abb7
2b0a298
 
 
 
 
 
 
 
 
 
 
 
e977a64
2b0a298
5ac4db8
 
 
 
 
 
 
 
 
 
 
2b0a298
 
 
 
 
 
1d13553
 
de0c29d
06a82b5
 
 
 
 
 
 
 
 
 
 
fdb11b8
874e789
449a709
 
 
 
06a82b5
449a709
 
 
fdb11b8
7823c55
d06abb7
fdb11b8
4aabad0
7823c55
2b0a298
85d6284
fdb11b8

import gradio as gr
import os
import time


from langchain.document_loaders import OnlinePDFLoader #for laoding the pdf
from langchain.embeddings import OpenAIEmbeddings # for creating embeddings
from langchain.vectorstores import Chroma # for the vectorization part
from langchain.chains import RetrievalQA # for conversing with chatGPT
from langchain.chat_models import ChatOpenAI # the LLM model we'll use (ChatGPT)
from langchain import PromptTemplate




def load_doc(pdf_doc, open_ai_key):
    if openai_key is not None:
        os.environ['OPENAI_API_KEY'] = open_ai_key
        #Load the pdf file
        loader = OnlinePDFLoader(pdf_doc.name)
        pages = loader.load_and_split()
        
        #Create an instance of OpenAIEmbeddings, which is responsible for generating embeddings for text
        embeddings = OpenAIEmbeddings()

        #To create a vector store, we use the Chroma class, which takes the documents (pages in our case), the embeddings instance, and a directory to store the vector data
        vectordb = Chroma.from_documents(pages, embedding=embeddings)
        
        #Finally, we create the bot using the RetrievalQAChain class
        global pdf_qa

        prompt_template = """Use the following pieces of context to answer the question at the end. If you do not know the answer, just return the question followed by N/A. If you encounter a date, return it in mm/dd/yyyy format.

        {context}

        Question: {question}
        Return the key fields from the question followed by the answer :"""
        PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
        chain_type_kwargs = {"prompt": PROMPT}
        pdf_qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(temperature=0, model_name="gpt-4"),chain_type="stuff", retriever=vectordb.as_retriever(), chain_type_kwargs=chain_type_kwargs, return_source_documents=True)
        
        
        return "Ready"
    else:
        return "Please provide an OpenAI API key"


def answer_query(query):
    question = query
    return pdf_qa.run(question)

html = """
<div style="text-align:center; max width: 700px;">
    <h1>ChatPDF</h1>
    <p> Upload a PDF File, then click on Load PDF File <br>
    Once the document has been loaded you can begin chatting with the PDF =)
</div>"""
css = """container{max-width:700px; margin-left:auto; margin-right:auto,padding:20px}"""
with gr.Blocks(css=css,theme=gr.themes.Monochrome()) as demo:
    gr.HTML(html)
    with gr.Column():
        openai_key = gr.Textbox(label="Your GPT-4 OpenAI API key", type="password")
        pdf_doc = gr.File(label="Load a pdf",file_types=['.pdf'],type='file')
        
        with gr.Row():
            status = gr.Textbox(label="Status", placeholder="", interactive=False)
            load_pdf = gr.Button("Load PDF to LangChain")
        with gr.Row():
            input = gr.Textbox(label="Type in your question")
            output = gr.Textbox(label="Answer")
        submit_query = gr.Button("Submit")
        
 
    load_pdf.click(load_doc, inputs=[pdf_doc, openai_key], outputs=status)
        
    submit_query.click(answer_query,input,output)


#forcing a save in order to re-build the container.
demo.launch()