File size: 3,077 Bytes
baf5ad5
 
e9ffa52
baf5ad5
 
 
e9ffa52
14422bb
baf5ad5
e9ffa52
 
 
baf5ad5
 
 
 
e9ffa52
baf5ad5
e9ffa52
baf5ad5
 
 
 
 
 
 
 
 
f856b07
baf5ad5
 
 
 
 
e9ffa52
baf5ad5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9ffa52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
baf5ad5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
import os
from langchain import PromptTemplate, LLMChain
from langchain_together import Together
import pdfplumber

# Set the API key
os.environ['TOGETHER_API_KEY'] = "c2f52626b97118b71c0c36f66eda4f5957c8fc475e760c3d72f98ba07d3ed3b5"

def extract_text_from_pdf(pdf_file, max_pages=16):
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for i, page in enumerate(pdf.pages):
            if i >= max_pages:
                break
            text += page.extract_text() + "\n"
    return text

def Bot(text, question):
    chat_template = """
    Based on the provided context: {text}
    Please answer the following question: {Questions}
    Only provide answers that are directly related to the context. If the question is unrelated, respond with "I don't know".
    """
    prompt = PromptTemplate(
        input_variables=['text', 'Questions'],
        template=chat_template
    )
    llama3 = Together(model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", max_tokens=50)
    Generated_chat = LLMChain(llm=llama3, prompt=prompt)

    try:
        response = Generated_chat.invoke({
            "text": text,
            "Questions": question
        })

        response_text = response['text']

        response_text = response_text.replace("assistant", "")

        # Post-processing to handle repeated words and ensure completeness
        words = response_text.split()
        seen = set()
        filtered_words = [word for word in words if word.lower() not in seen and not seen.add(word.lower())]
        response_text = ' '.join(filtered_words)
        response_text = response_text.strip()  # Ensuring no extra spaces at the ends
        if not response_text.endswith('.'):
            response_text += '.'

        return response_text
    except Exception as e:
        return f"Error in generating response: {e}"

def ChatBot(history, document, question):
    greetings = ["hi", "hello", "hey", "greetings", "what's up", "howdy"]
    question_lower = question.lower().strip()
    if question_lower in greetings or any(question_lower.startswith(greeting) for greeting in greetings):
        return history + [("User", question), ("Bot", "Hello! How can I assist you with the document today?")]

    # Extract text from the uploaded PDF document
    text = extract_text_from_pdf(document)
    
    # Generate the bot response based on the question and extracted text
    response = Bot(text, question)
    
    # Update chat history with the user's question and bot's response
    history.append(("User", question))
    history.append(("Bot", response))
    
    return history

# Set up the Gradio interface using Blocks
with gr.Blocks() as iface:
    chatbot = gr.Chatbot()
    document = gr.File(label="Upload PDF Document", type="filepath")
    question = gr.Textbox(label="Ask a Question", placeholder="Type your question here...")

    def respond(history, document, question):
        return ChatBot(history, document, question)

    question.submit(respond, [chatbot, document, question], chatbot)

iface.launch(debug=True)