import streamlit as st import os from tempfile import NamedTemporaryFile from langchain.document_loaders import PyPDFLoader from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import Chroma from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline # Function to save the uploaded PDF to a temporary file def save_uploaded_file(uploaded_file): with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: temp_file.write(uploaded_file.read()) return temp_file.name # Function to get answers from the PDF def get_answer(question, db, model, tokenizer): doc = db.similarity_search(question, k=4) context = doc[0].page_content + doc[1].page_content + doc[2].page_content + doc[3].page_content # Load the model & tokenizer for question-answering model_name = "deepset/roberta-base-squad2" model = AutoModelForQuestionAnswering.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) # Create a question-answering pipeline nlp = pipeline("question-answering", model=model, tokenizer=tokenizer) # Prepare the input QA_input = { "question": question, "context": context, } # Get the answer result = nlp(**QA_input) return result["answer"] # Streamlit UI st.title("PDF Question Answering App") uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) if uploaded_file is not None: # Save the uploaded file to a temporary location temp_file_path = save_uploaded_file(uploaded_file) # Load the PDF document using PyPDFLoader loader = PyPDFLoader(temp_file_path) pages = loader.load_and_split() # Initialize embeddings and Chroma embed = HuggingFaceEmbeddings() db = Chroma.from_documents(pages, embed) # Load the model & tokenizer for question-answering model_name = "deepset/roberta-base-squad2" model = AutoModelForQuestionAnswering.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) # Initializations conversation = [] st.write("Ask your questions, and I'll provide answers:") # Continuous question-answering loop while True: question = st.text_input("Enter your question:") if st.button("Get Answer"): answer = get_answer(question, db, model, tokenizer) st.write("Answer:") st.write(answer) conversation.append({"question": question, "answer": answer}) # Add an option to end the conversation if st.button("End Conversation"): break # Display the conversation history st.write("Conversation History:") for entry in conversation: st.write(f"Q: {entry['question']}") st.write(f"A: {entry['answer']}") # Cleanup: Delete the temporary file os.remove(temp_file_path)