File size: 3,391 Bytes
424ff00
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
787f08a
424ff00
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import gradio as gr
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chat_models import ChatOpenAI

from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.retrievers import ContextualCompressionRetriever
from langchain.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate



from langchain.vectorstores import Chroma


chat = ChatOpenAI()

embedding_function = HuggingFaceEmbeddings(model_name = "BAAI/bge-large-en-v1.5",model_kwargs={'device': 'cpu'},encode_kwargs={"normalize_embeddings": True})

def add_docs(path):

    loader = PyPDFLoader(file_path=path)
    docs = loader.load_and_split(text_splitter=RecursiveCharacterTextSplitter(chunk_size = 500,
                                                                                chunk_overlap = 100,
                                                                                length_function = len,
                                                                                is_separator_regex=False))
    model_vectorstore = Chroma
    db = model_vectorstore.from_documents(documents=docs,embedding= embedding_function, persist_directory="output/general_knowledge")
    return db


def answer_query(message, chat_history):
    base_compressor = LLMChainExtractor.from_llm(chat)
    db = Chroma(persist_directory = "output/general_knowledge", embedding_function=embedding_function)
    base_retriever = db.as_retriever()
    mq_retriever = MultiQueryRetriever.from_llm(retriever = base_retriever, llm=chat)
    compression_retriever = ContextualCompressionRetriever(base_compressor=base_compressor, base_retriever=mq_retriever)

    matched_docs = compression_retriever.get_relevant_documents(query = message)

    context = ""

    for doc in matched_docs:
        page_content = doc.page_content
        context+=page_content
        context += "\n\n"
    template = """
    Answer the following question only by using the context given below in the triple backticks, do not use any other information to answer the question.
    If you can't answer the given question with the given context, you can return an emtpy string ('')

    Context: ```{context}```
    ----------------------------
    Question: {query}
    ----------------------------
    Answer: """
    
    human_message_prompt = HumanMessagePromptTemplate.from_template(template=template)
    chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
    prompt = chat_prompt.format_prompt(query = message, context = context)
    response = chat(messages=prompt.to_messages()).content

    chat_history.append((message,response))
    return "", chat_history



with gr.Blocks() as demo:
    gr.HTML("<h1 align = 'center'>Smart Assistant</h1>")
    gr.HTML("<h2 align = 'center'>Upload any PDF and ask your questions.</h2>")

    with gr.Row():

        upload_files = gr.File(label = 'Upload a PDF',file_types=['.pdf'],file_count='single')

    chatbot = gr.Chatbot()
    msg = gr.Textbox(label = "Enter your question here")
    upload_files.upload(add_docs,upload_files)
    msg.submit(answer_query,[msg,chatbot],[msg,chatbot])


if __name__ == "__main__":
    demo.launch()