sahilnishad commited on
Commit
fa5e37d
·
verified ·
1 Parent(s): 8f800df

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -0
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # streamlit_app.py
2
+
3
+ import streamlit as st
4
+ from PIL import Image
5
+ from transformers import AutoModelForCausalLM, AutoProcessor
6
+ import torch
7
+
8
+ # Load the model and processor
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+ model = AutoModelForCausalLM.from_pretrained("sahilnishad/Florence-2-FT-DocVQA").to(device)
11
+ processor = AutoProcessor.from_pretrained("sahilnishad/Florence-2-FT-DocVQA")
12
+
13
+ # Function to run inference
14
+ def get_answer(task_prompt, question, image):
15
+ prompt = task_prompt + question
16
+ if image.mode != "RGB":
17
+ image = image.convert("RGB")
18
+
19
+ inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
20
+ with torch.no_grad():
21
+ generated_ids = model.generate(
22
+ input_ids=inputs["input_ids"],
23
+ pixel_values=inputs["pixel_values"],
24
+ max_new_tokens=1024,
25
+ num_beams=3
26
+ )
27
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
28
+ return generated_text
29
+
30
+ # Streamlit UI
31
+ st.title("Scanned Document Question Answering with Florence-2")
32
+ st.write("Upload scanned document image and ask a question")
33
+
34
+ # File uploader for the document image
35
+ uploaded_file = st.file_uploader("Choose a document image...", type=["jpg", "jpeg", "png"])
36
+
37
+ # Text input for the question
38
+ question = st.text_input("Enter your question:")
39
+
40
+ # Run the model and display the answer
41
+ if uploaded_file is not None and question:
42
+ image = Image.open(uploaded_file)
43
+ st.image(image, caption="Uploaded Document", use_column_width=True)
44
+
45
+ with st.spinner("Generating answer..."):
46
+ answer = get_answer("<DocVQA>", question, image)
47
+
48
+ st.write("**Answer:**", answer)