Spaces:
Sleeping
Sleeping
# streamlit_app.py | |
import streamlit as st | |
from PIL import Image | |
from transformers import AutoModelForCausalLM, AutoProcessor | |
import torch | |
# Load the model and processor | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model = AutoModelForCausalLM.from_pretrained("sahilnishad/Florence-2-FT-DocVQA").to(device) | |
processor = AutoProcessor.from_pretrained("sahilnishad/Florence-2-FT-DocVQA") | |
# Function to run inference | |
def get_answer(task_prompt, question, image): | |
prompt = task_prompt + question | |
if image.mode != "RGB": | |
image = image.convert("RGB") | |
inputs = processor(text=prompt, images=image, return_tensors="pt").to(device) | |
with torch.no_grad(): | |
generated_ids = model.generate( | |
input_ids=inputs["input_ids"], | |
pixel_values=inputs["pixel_values"], | |
max_new_tokens=1024, | |
num_beams=3 | |
) | |
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
return generated_text | |
# Streamlit UI | |
st.title("Scanned Document Question Answering with Florence-2") | |
st.write("Upload scanned document image and ask a question") | |
# File uploader for the document image | |
uploaded_file = st.file_uploader("Choose a document image...", type=["jpg", "jpeg", "png"]) | |
# Text input for the question | |
question = st.text_input("Enter your question:") | |
# Run the model and display the answer | |
if uploaded_file is not None and question: | |
image = Image.open(uploaded_file) | |
st.image(image, caption="Uploaded Document", use_column_width=True) | |
with st.spinner("Generating answer..."): | |
answer = get_answer("<DocVQA>", question, image) | |
st.write("**Answer:**", answer) |