File size: 3,890 Bytes
dee6f05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
411ee27
 
 
dee6f05
8626380
 
 
dee6f05
 
 
cbb7dce
dee6f05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import warnings

warnings.filterwarnings("ignore")
import os, openai, cohere
import gradio as gr
from pathlib import Path
from langchain.document_loaders import PyMuPDFLoader
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import CohereEmbeddings
from langchain.vectorstores import Qdrant
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

COHERE_API_KEY = os.environ["COHERE_API_KEY"]
QDRANT_API_KEY = os.environ["QDRANT_API_KEY"]
QDRANT_CLUSTER_URL = os.environ["QDRANT_CLUSTER_URL"]
QDRANT_COLLECTION_NAME = os.environ["QDRANT_COLLECTION_NAME"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
prompt_file = "prompt_template.txt"


def pdf_loader(pdf_file):
    yield "Extracting contents from PDF document..."

    loader_mu = PyMuPDFLoader(pdf_file.name)
    pages = loader_mu.load()
    docs = []
    for i in range(len(pages)):
        raw_page_content = pages[i].page_content
        metadata_source = {"source": str(i + 1)}
        doc = Document(
            page_content=pages[i].page_content, metadata={"source": str(i + 1)}
        )
        docs.append(doc)

    yield "Splitting contents into chunks of text..."
    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        model_name="gpt-3.5-turbo",
        chunk_size=1024,
        chunk_overlap=64,
        separators=["\n\n", "\n", " "],
    )

    docs_splitter = text_splitter.split_documents(docs)
    cohere_embeddings = CohereEmbeddings(model="large", cohere_api_key=COHERE_API_KEY)

    yield "Uploading chunks of text into Qdrant..."
    qdrant = Qdrant.from_documents(
        docs_splitter,
        cohere_embeddings,
        url=QDRANT_CLUSTER_URL,
        prefer_grpc=True,
        api_key=QDRANT_API_KEY,
        collection_name=QDRANT_COLLECTION_NAME,
    )

    with open(prompt_file, "r") as file:
        prompt_template = file.read()

    PROMPT = PromptTemplate(
        template=prompt_template, input_variables=["question", "context"]
    )

    llm = ChatOpenAI(
        model_name="gpt-3.5-turbo", temperature=0, openai_api_key=OPENAI_API_KEY
    )
    global qa
    qa = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=qdrant.as_retriever(),
        chain_type_kwargs={"prompt": PROMPT},
    )

    yield "Success! You can now click on the 'AI Assistant' tab to interact with your document"


def chat(chat_history, query):
    res = qa.run(query)
    progressive_response = ""

    for ele in "".join(res):
        progressive_response += ele + ""
        yield chat_history + [(query, progressive_response)]


with gr.Blocks() as demo:
    gr.HTML(
        """<h1>Welcome to AI PDF Assistant</h1>"""
    )
    gr.Markdown(
        "AI Assistant for PDF documents. Upload your pdf document, click 'Process PDF docs' and wait for success confirmation message.<br>"
        "After success confirmation, click on the 'AI Assistant' tab to interact with your document.<br>"
        "Type your query, and  hit enter. Click on 'Clear Chat History' to delete all previous conversations."
    )

    with gr.Tab("Upload/Process PDF documents"):
        text_input = gr.File(label="Upload PDF file", file_types=[".pdf"])
        text_output = gr.Textbox(label="Status...")
        text_button = gr.Button("Process PDF docs!")
        text_button.click(pdf_loader, text_input, text_output)

    with gr.Tab("AI Assistant"):
        chatbot = gr.Chatbot()
        query = gr.Textbox(
            label="Type your query here, then press 'enter' and scroll up for response"
        )
        clear = gr.Button("Clear Chat History!")
        query.submit(chat, [chatbot, query], chatbot)
        clear.click(lambda: None, None, chatbot, queue=False)


demo.queue().launch()