Spaces:

sahilnishad
/

scanned-doc-chat

Sleeping

App Files Files Community

sahilnishad commited on Nov 7, 2024

Commit

fa5e37d

verified ·

1 Parent(s): 8f800df

Create app.py

Browse files

Files changed (1) hide show

app.py +48 -0

app.py ADDED Viewed

	@@ -0,0 +1,48 @@

+# streamlit_app.py
+import streamlit as st
+from PIL import Image
+from transformers import AutoModelForCausalLM, AutoProcessor
+import torch
+# Load the model and processor
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = AutoModelForCausalLM.from_pretrained("sahilnishad/Florence-2-FT-DocVQA").to(device)
+processor = AutoProcessor.from_pretrained("sahilnishad/Florence-2-FT-DocVQA")
+# Function to run inference
+def get_answer(task_prompt, question, image):
+    prompt = task_prompt + question
+    if image.mode != "RGB":
+        image = image.convert("RGB")
+    inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
+    with torch.no_grad():
+        generated_ids = model.generate(
+            input_ids=inputs["input_ids"],
+            pixel_values=inputs["pixel_values"],
+            max_new_tokens=1024,
+            num_beams=3
+        )
+    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return generated_text
+# Streamlit UI
+st.title("Scanned Document Question Answering with Florence-2")
+st.write("Upload scanned document image and ask a question")
+# File uploader for the document image
+uploaded_file = st.file_uploader("Choose a document image...", type=["jpg", "jpeg", "png"])
+# Text input for the question
+question = st.text_input("Enter your question:")
+# Run the model and display the answer
+if uploaded_file is not None and question:
+    image = Image.open(uploaded_file)
+    st.image(image, caption="Uploaded Document", use_column_width=True)
+    with st.spinner("Generating answer..."):
+        answer = get_answer("<DocVQA>", question, image)
+    st.write("**Answer:**", answer)