Oysiyl commited on
Commit
5aa8a86
1 Parent(s): 5194991

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from PIL import Image
4
+ from transformers import AutoProcessor, AutoModelForCausalLM
5
+
6
+ #workaround for unnecessary flash_attn requirement
7
+ from unittest.mock import patch
8
+ from transformers.dynamic_module_utils import get_imports
9
+ import numpy as np
10
+
11
+ def fixed_get_imports(filename: str | os.PathLike) -> list[str]:
12
+ if not str(filename).endswith("modeling_florence2.py"):
13
+ return get_imports(filename)
14
+ imports = get_imports(filename)
15
+ imports.remove("flash_attn")
16
+ return imports
17
+
18
+ with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports): #workaround for unnecessary flash_attn requirement
19
+ model = AutoModelForCausalLM.from_pretrained("Oysiyl/Florence-2-FT-OCR-Cauldron-IAM", attn_implementation="sdpa", trust_remote_code=True)
20
+
21
+ processor = AutoProcessor.from_pretrained("Oysiyl/Florence-2-FT-OCR-Cauldron-IAM", trust_remote_code=True)
22
+
23
+ prompt = "OCR"
24
+
25
+
26
+ def predict(im):
27
+ composite_image = Image.fromarray(im['composite'].astype(np.uint8)).convert("RGBA")
28
+ background_image = Image.new("RGBA", composite_image.size, (255, 255, 255, 255))
29
+ image = Image.alpha_composite(background_image, composite_image).convert("RGB")
30
+ inputs = processor(text=prompt, images=image, return_tensors="pt")
31
+
32
+ generated_ids = model.generate(
33
+ input_ids=inputs["input_ids"],
34
+ pixel_values=inputs["pixel_values"],
35
+ max_new_tokens=1024,
36
+ do_sample=False,
37
+ num_beams=3
38
+ )
39
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
40
+
41
+ parsed_answer = processor.post_process_generation(generated_text, task=prompt, image_size=(image.width, image.height))
42
+
43
+
44
+ return parsed_answer[prompt]
45
+
46
+
47
+ sketchpad = gr.ImageEditor(label="Draw something or upload an image")
48
+
49
+ interface = gr.Interface(
50
+ predict,
51
+ inputs=sketchpad,
52
+ outputs='text',
53
+ theme='gradio/monochrome',
54
+ title="Handwritten Recognition using Florence 2 model finetuned on IAM subset from HuggingFace Cauldron dataset",
55
+ description="<p style='text-align: center'>Draw a text or upload an image with handwritten notes and let's model try to guess the text!</p>",
56
+ article = "<p style='text-align: center'>Handwritten Text Recognition | Demo Model</p>")
57
+ interface.launch(debug=True)