Manoj21k commited on
Commit
608ef59
1 Parent(s): 754d0f1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -0
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from tempfile import NamedTemporaryFile
4
+ from langchain.document_loaders import PyPDFLoader
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ from langchain.vectorstores import Chroma
7
+ from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
8
+
9
+ # Function to save the uploaded PDF to a temporary file
10
+ def save_uploaded_file(uploaded_file):
11
+ with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
12
+ temp_file.write(uploaded_file.read())
13
+ return temp_file.name
14
+
15
+ # Function to get answers from the PDF
16
+ def get_answer(question, db, model, tokenizer):
17
+ doc = db.similarity_search(question, k=4)
18
+ context = doc[0].page_content + doc[1].page_content + doc[2].page_content + doc[3].page_content
19
+
20
+ # Load the model & tokenizer for question-answering
21
+ model_name = "deepset/roberta-base-squad2"
22
+ model = AutoModelForQuestionAnswering.from_pretrained(model_name)
23
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
24
+
25
+ # Create a question-answering pipeline
26
+ nlp = pipeline("question-answering", model=model, tokenizer=tokenizer)
27
+
28
+ # Prepare the input
29
+ QA_input = {
30
+ "question": question,
31
+ "context": context,
32
+ }
33
+
34
+ # Get the answer
35
+ result = nlp(**QA_input)
36
+
37
+ return result["answer"]
38
+
39
+ # Streamlit UI
40
+ st.title("PDF Question Answering App")
41
+ uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
42
+ if uploaded_file is not None:
43
+ # Save the uploaded file to a temporary location
44
+ temp_file_path = save_uploaded_file(uploaded_file)
45
+
46
+ # Load the PDF document using PyPDFLoader
47
+ loader = PyPDFLoader(temp_file_path)
48
+ pages = loader.load_and_split()
49
+
50
+ # Initialize embeddings and Chroma
51
+ embed = HuggingFaceEmbeddings()
52
+ db = Chroma.from_documents(pages, embed)
53
+
54
+ # Load the model & tokenizer for question-answering
55
+ model_name = "deepset/roberta-base-squad2"
56
+ model = AutoModelForQuestionAnswering.from_pretrained(model_name)
57
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
58
+
59
+ # Initializations
60
+ conversation = []
61
+ st.write("Ask your questions, and I'll provide answers:")
62
+
63
+ # Continuous question-answering loop
64
+ while True:
65
+ question = st.text_input("Enter your question:")
66
+ if st.button("Get Answer"):
67
+ answer = get_answer(question, db, model, tokenizer)
68
+ st.write("Answer:")
69
+ st.write(answer)
70
+ conversation.append({"question": question, "answer": answer})
71
+
72
+ # Add an option to end the conversation
73
+ if st.button("End Conversation"):
74
+ break
75
+
76
+ # Display the conversation history
77
+ st.write("Conversation History:")
78
+ for entry in conversation:
79
+ st.write(f"Q: {entry['question']}")
80
+ st.write(f"A: {entry['answer']}")
81
+
82
+ # Cleanup: Delete the temporary file
83
+ os.remove(temp_file_path)