Spaces:

sahilnishad
/

scanned-doc-chat

Sleeping

App Files Files Community

scanned-doc-chat / app.py

sahilnishad

Update app.py

e435685 verified 18 days ago

raw

history blame contribute delete

1.75 kB

	# streamlit_app.py

	import streamlit as st
	from PIL import Image
	from transformers import AutoModelForCausalLM, AutoProcessor
	import torch

	# Load the model and processor
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model = AutoModelForCausalLM.from_pretrained("sahilnishad/Florence-2-FT-DocVQA", trust_remote_code=True).to(device)
	processor = AutoProcessor.from_pretrained("sahilnishad/Florence-2-FT-DocVQA", trust_remote_code=True)

	# Function to run inference
	def get_answer(task_prompt, question, image):
	prompt = task_prompt + question
	if image.mode != "RGB":
	image = image.convert("RGB")

	inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
	with torch.no_grad():
	generated_ids = model.generate(
	input_ids=inputs["input_ids"],
	pixel_values=inputs["pixel_values"],
	max_new_tokens=1024,
	num_beams=3
	)
	generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
	return generated_text

	# Streamlit UI
	st.title("Scanned Document Question Answering with Florence-2")
	st.write("Upload scanned document image and ask a question")

	# File uploader for the document image
	uploaded_file = st.file_uploader("Choose a document image...", type=["jpg", "jpeg", "png"])

	# Text input for the question
	question = st.text_input("Enter your question:")

	# Run the model and display the answer
	if uploaded_file is not None and question:
	image = Image.open(uploaded_file)
	st.image(image, caption="Uploaded Document", use_column_width=True)

	with st.spinner("Generating answer..."):
	answer = get_answer("<DocVQA>", question, image)

	st.write("Answer:", answer)