Spaces:
Runtime error
Runtime error
ivelin
commited on
Commit
·
7b3f48a
1
Parent(s):
72d0321
fix: bugs
Browse filesSigned-off-by: ivelin <ivelin.eth@gmail.com>
app.py
CHANGED
@@ -4,6 +4,7 @@ from PIL import Image, ImageDraw
|
|
4 |
import math
|
5 |
import torch
|
6 |
import html
|
|
|
7 |
from transformers import DonutProcessor, VisionEncoderDecoderModel
|
8 |
|
9 |
pretrained_repo_name = "ivelin/donut-refexp-draft"
|
@@ -55,6 +56,7 @@ def process_refexp(image: Image, prompt: str):
|
|
55 |
print(
|
56 |
fr"predicted decoder sequence before token2json: {html.escape(sequence)}")
|
57 |
bbox = processor.token2json(sequence)
|
|
|
58 |
print(f"predicted bounding box: {bbox}")
|
59 |
|
60 |
print(f"image object: {image}")
|
@@ -63,10 +65,10 @@ def process_refexp(image: Image, prompt: str):
|
|
63 |
print(f"image width, height: {width, height}")
|
64 |
print(f"processed prompt: {prompt}")
|
65 |
|
66 |
-
xmin = math.floor(width*
|
67 |
-
ymin = math.floor(height*
|
68 |
-
xmax = math.floor(width*
|
69 |
-
ymax = math.floor(height*
|
70 |
|
71 |
print(
|
72 |
f"to image pixel values: xmin, ymin, xmax, ymax: {xmin, ymin, xmax, ymax}")
|
|
|
4 |
import math
|
5 |
import torch
|
6 |
import html
|
7 |
+
import json
|
8 |
from transformers import DonutProcessor, VisionEncoderDecoderModel
|
9 |
|
10 |
pretrained_repo_name = "ivelin/donut-refexp-draft"
|
|
|
56 |
print(
|
57 |
fr"predicted decoder sequence before token2json: {html.escape(sequence)}")
|
58 |
bbox = processor.token2json(sequence)
|
59 |
+
bbox = json.loads(bbox)
|
60 |
print(f"predicted bounding box: {bbox}")
|
61 |
|
62 |
print(f"image object: {image}")
|
|
|
65 |
print(f"image width, height: {width, height}")
|
66 |
print(f"processed prompt: {prompt}")
|
67 |
|
68 |
+
xmin = math.floor(width*bbox["xmin"]) if bbox.get("xmin") else 0
|
69 |
+
ymin = math.floor(height*bbox["ymin"]) if bbox.get("ymin") else 0
|
70 |
+
xmax = math.floor(width*bbox["xmax"]) if bbox.get("xmax") else 1
|
71 |
+
ymax = math.floor(height*bbox["ymax"]) if bbox.get("ymax") else 1
|
72 |
|
73 |
print(
|
74 |
f"to image pixel values: xmin, ymin, xmax, ymax: {xmin, ymin, xmax, ymax}")
|