Maxx0 commited on
Commit
720df77
1 Parent(s): 0e431ff

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import required libraries
2
+ import PyPDF2
3
+ from getpass import getpass
4
+ from haystack.nodes import PreProcessor, PromptModel, PromptTemplate, PromptNode, AnswerParser
5
+ from haystack.document_stores import InMemoryDocumentStore
6
+ from haystack import Document, Pipeline
7
+ from haystack.nodes import BM25Retriever
8
+ from pprint import pprint
9
+ import streamlit as st
10
+ import logging
11
+ from dotenv import load_dotenv
12
+ load_dotenv()
13
+ import os
14
+ import logging
15
+ logging.basicConfig(level=logging.DEBUG)
16
+
17
+ # Function to extract text from a PDF
18
+ def extract_text_from_pdf(pdf_path):
19
+ text = ""
20
+ with open(pdf_path, "rb") as pdf_file:
21
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
22
+ for page_num in range(len(pdf_reader.pages)):
23
+ page = pdf_reader.pages[page_num]
24
+ text += page.extract_text() or ""
25
+ return text
26
+
27
+ # Extract text from the PDF file
28
+ pdf_file_path = "Data/MR. MPROFY.pdf"
29
+ pdf_text = extract_text_from_pdf(pdf_file_path)
30
+ if not pdf_text:
31
+ raise ValueError("No text extracted from PDF.")
32
+
33
+ # Create a Haystack document
34
+ doc = Document(content=pdf_text, meta={"name": "MR. MPROFY"})
35
+
36
+ # Initialize Document Store
37
+ document_store = InMemoryDocumentStore(use_bm25=True)
38
+ document_store.write_documents([doc])
39
+
40
+ # Initialize Retriever
41
+ retriever = BM25Retriever(document_store=document_store, top_k=2)
42
+
43
+ # Define QA Template
44
+ qa_template = PromptTemplate(
45
+ prompt="""
46
+ Hi, I'm Mprofier, your friendly AI assistant. I'm here to provide direct and concise answers to your specific questions.
47
+ I won’t ask any follow-up questions myself.
48
+ If I can't find the answer in the provided context, I'll simply state that I don't have enough information to answer.
49
+ Context: {join(documents)};
50
+ Question: {query}
51
+ Answer:
52
+ """,
53
+ output_parser=AnswerParser()
54
+ )
55
+
56
+ # Get Huggingface token
57
+ HF_TOKEN = HF_TOKEN
58
+
59
+ # Initialize Prompt Node
60
+ prompt_node = PromptNode(
61
+ model_name_or_path="mistralai/Mixtral-8x7B-Instruct-v0.1",
62
+ api_key=HF_TOKEN,
63
+ default_prompt_template=qa_template,
64
+ max_length=500,
65
+ model_kwargs={"model_max_length": 5000}
66
+ )
67
+
68
+ # Build Pipeline
69
+ rag_pipeline = Pipeline()
70
+ rag_pipeline.add_node(component=retriever, name="retriever", inputs=["Query"])
71
+ rag_pipeline.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"])
72
+
73
+ # Streamlit Function for Handling Input and Displaying Output
74
+ def run_streamlit_app():
75
+ st.title("Mprofier - AI Assistant")
76
+ query_text = st.text_input("Enter your question:")
77
+
78
+ if st.button("Get Answer"):
79
+ response = rag_pipeline.run(query=query_text)
80
+ answer = response["answers"][0].answer if response["answers"] else "No answer found."
81
+ st.write(answer)
82
+
83
+ # Start the Streamlit application
84
+ if __name__ == "__main__":
85
+ run_streamlit_app()