File size: 2,769 Bytes
720df77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# Import required libraries
import PyPDF2
from getpass import getpass
from haystack.nodes import PreProcessor, PromptModel, PromptTemplate, PromptNode, AnswerParser
from haystack.document_stores import InMemoryDocumentStore
from haystack import Document, Pipeline
from haystack.nodes import BM25Retriever
from pprint import pprint
import streamlit as st
import logging
from dotenv import load_dotenv
load_dotenv()
import os
import logging
logging.basicConfig(level=logging.DEBUG)

# Function to extract text from a PDF
def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, "rb") as pdf_file:
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        for page_num in range(len(pdf_reader.pages)):
            page = pdf_reader.pages[page_num]
            text += page.extract_text() or ""
    return text

# Extract text from the PDF file
pdf_file_path = "Data/MR. MPROFY.pdf"
pdf_text = extract_text_from_pdf(pdf_file_path)
if not pdf_text:
    raise ValueError("No text extracted from PDF.")

# Create a Haystack document
doc = Document(content=pdf_text, meta={"name": "MR. MPROFY"})

# Initialize Document Store
document_store = InMemoryDocumentStore(use_bm25=True)
document_store.write_documents([doc])

# Initialize Retriever
retriever = BM25Retriever(document_store=document_store, top_k=2)

# Define QA Template
qa_template = PromptTemplate(
    prompt="""
    Hi, I'm Mprofier, your friendly AI assistant. I'm here to provide direct and concise answers to your specific questions. 
    I won’t ask any follow-up questions myself. 
    If I can't find the answer in the provided context, I'll simply state that I don't have enough information to answer.
    Context: {join(documents)}; 
    Question: {query}
    Answer:
    """,
    output_parser=AnswerParser()
)

# Get Huggingface token
HF_TOKEN = HF_TOKEN

# Initialize Prompt Node
prompt_node = PromptNode(
    model_name_or_path="mistralai/Mixtral-8x7B-Instruct-v0.1",
    api_key=HF_TOKEN,
    default_prompt_template=qa_template,
    max_length=500,
    model_kwargs={"model_max_length": 5000}
)

# Build Pipeline
rag_pipeline = Pipeline()
rag_pipeline.add_node(component=retriever, name="retriever", inputs=["Query"])
rag_pipeline.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"])

# Streamlit Function for Handling Input and Displaying Output
def run_streamlit_app():
    st.title("Mprofier - AI Assistant")
    query_text = st.text_input("Enter your question:")
    
    if st.button("Get Answer"):
        response = rag_pipeline.run(query=query_text)
        answer = response["answers"][0].answer if response["answers"] else "No answer found."
        st.write(answer)

# Start the Streamlit application
if __name__ == "__main__":
    run_streamlit_app()