Spaces:
Running
Running
File size: 2,388 Bytes
e4a1f31 7b208e8 0f897d9 6b1590b e4a1f31 0f897d9 7b208e8 0f897d9 7b208e8 f601880 7b208e8 f601880 e4a1f31 7b208e8 e4a1f31 4bd7bfe e4a1f31 6b1590b 7b208e8 e4a1f31 f601880 6b1590b f601880 6b1590b f601880 09ef786 f601880 7b208e8 f601880 7b208e8 f601880 e4a1f31 f601880 7b208e8 f601880 e4a1f31 b06f609 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import os
import streamlit as st
from transformers import pipeline
import re
from PyPDF2 import PdfFileReader
# Function to truncate text to the nearest word boundary
def truncate_to_word_boundary(text, max_words=100):
words = re.findall(r'\w+', text)
truncated_text = ' '.join(words[:max_words])
return truncated_text
# Function to perform question-answering
def question_answering(question, text):
# Perform question-answering using Hugging Face's Transformers
question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
answer = question_answerer(question=question, context=text)
return answer
def main():
st.title("Question Answering on an Uploaded File")
uploaded_file = st.file_uploader("Upload a file:", type=["pdf", "txt", "docx", "csv", "json", "txt"])
question = st.text_input("Ask your question:")
if st.button("Answer") and uploaded_file is not None:
file_extension = os.path.splitext(uploaded_file.name)[1].lower()
file_contents = uploaded_file.read()
if file_extension == ".pdf":
# Handle PDF files using PyPDF2
pdf_reader = PdfFileReader(uploaded_file)
pdf_text = ""
for page_num in range(pdf_reader.getNumPages()):
pdf_page = pdf_reader.getPage(page_num)
pdf_text += pdf_page.extractText()
# Perform question-answering
answer = question_answering(question, pdf_text)
elif file_extension == ".txt":
# Handle plain text files
text = file_contents.decode("utf-8")
# Perform question-answering
answer = question_answering(question, text)
# Add support for other file types (e.g., docx, csv, json) if needed
st.write(f"Question: '{question}'")
st.write("Answer:", answer['answer'])
st.write("Score:", answer['score'])
st.write("Page Number:", answer['start'] + 1) # Add 1 to convert 0-based index to 1-based page number
# Display truncated context
start_page = answer['start']
context = pdf_text if file_extension == ".pdf" else text
truncated_context = truncate_to_word_boundary(context)
st.write("Context:", truncated_context)
if __name__ == "__main__":
main() |