mynkchaudhry commited on
Commit
2e36d06
·
1 Parent(s): 575f6eb

upload all the files

Browse files
Files changed (4) hide show
  1. app.py +53 -0
  2. demo.png +0 -0
  3. demo2.jpg +0 -0
  4. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoProcessor
3
+ from PIL import Image
4
+ import torch
5
+
6
+ # Load model and processor
7
+ model = AutoModelForCausalLM.from_pretrained("mynkchaudhry/Florence-2-FT-DocVQA", trust_remote_code=True)
8
+ processor = AutoProcessor.from_pretrained("mynkchaudhry/Florence-2-FT-DocVQA")
9
+
10
+ def generate_response(image, question):
11
+ try:
12
+ if image.mode != "RGB":
13
+ image = image.convert("RGB")
14
+
15
+ inputs = processor(text=question, images=image, return_tensors="pt")
16
+
17
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+ model.to(device)
19
+ inputs = {key: value.to(device) for key, value in inputs.items()}
20
+
21
+ generated_ids = model.generate(
22
+ input_ids=inputs["input_ids"],
23
+ pixel_values=inputs["pixel_values"],
24
+ max_length=1024,
25
+ num_beams=3,
26
+ early_stopping=True
27
+ )
28
+
29
+ response = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
30
+ return response
31
+ except Exception as e:
32
+ return f"Error processing image: {e}"
33
+
34
+ # Example images for demonstration (update paths as needed)
35
+ examples = [
36
+ ["Image2text/demo.png", "what is the address in the page?"],
37
+ ["Image2text/demo2.jpg", "what is the date in the page?"],
38
+ ["Image2text/demo.png", "what is the name in the page?"]
39
+
40
+ ]
41
+
42
+ # Gradio interface
43
+ iface = gr.Interface(
44
+ fn=generate_response,
45
+ inputs=[gr.Image(type="pil"), gr.Textbox(label="Question")],
46
+ outputs=gr.Textbox(label="Response"),
47
+ examples=examples,
48
+ title="Image to Text Extractor",
49
+ description="Upload an image and provide a question. This tool will extract the relevant information from the image based on your question."
50
+ )
51
+
52
+ # Launch the interface
53
+ iface.launch()
demo.png ADDED
demo2.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch==2.0.1
2
+ transformers==4.30.2
3
+ Pillow==9.4.0
4
+ gradio==3.23.0