Spaces:

ivelin
/

ui-refexp

Runtime error

ivelin commited on Jan 10, 2023

Commit

e8e6698

•

1 Parent(s): 7b3f48a

fix: bugs

Signed-off-by: ivelin <ivelin.eth@gmail.com>

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ from PIL import Image, ImageDraw
 import math
 import torch
 import html
-import json
 from transformers import DonutProcessor, VisionEncoderDecoderModel
 pretrained_repo_name = "ivelin/donut-refexp-draft"
@@ -56,7 +55,6 @@ def process_refexp(image: Image, prompt: str):
     print(
         fr"predicted decoder sequence before token2json: {html.escape(sequence)}")
     bbox = processor.token2json(sequence)
-    bbox = json.loads(bbox)
     print(f"predicted bounding box: {bbox}")
     print(f"image object: {image}")
@@ -65,10 +63,15 @@ def process_refexp(image: Image, prompt: str):
     print(f"image width, height: {width, height}")
     print(f"processed prompt: {prompt}")
-    xmin = math.floor(width*bbox["xmin"]) if bbox.get("xmin") else 0
-    ymin = math.floor(height*bbox["ymin"]) if bbox.get("ymin") else 0
-    xmax = math.floor(width*bbox["xmax"]) if bbox.get("xmax") else 1
-    ymax = math.floor(height*bbox["ymax"]) if bbox.get("ymax") else 1
     print(
         f"to image pixel values: xmin, ymin, xmax, ymax: {xmin, ymin, xmax, ymax}")

 import math
 import torch
 import html
 from transformers import DonutProcessor, VisionEncoderDecoderModel
 pretrained_repo_name = "ivelin/donut-refexp-draft"
     print(
         fr"predicted decoder sequence before token2json: {html.escape(sequence)}")
     bbox = processor.token2json(sequence)
     print(f"predicted bounding box: {bbox}")
     print(f"image object: {image}")
     print(f"image width, height: {width, height}")
     print(f"processed prompt: {prompt}")
+    # safeguard in case text prediction is missing some bounding box coordinates
+    xmin = math.floor(width*float(bbox["xmin"])
+                      ) if bbox.get("xmin") is not None else 0
+    ymin = math.floor(
+        height*float(bbox["ymin"])) if bbox.get("ymin") is not None else 0
+    xmax = math.floor(width*float(bbox["xmax"])
+                      ) if bbox.get("xmax") is not None else 1
+    ymax = math.floor(
+        height*float(bbox["ymax"])) if bbox.get("ymax") is not None else 1
     print(
         f"to image pixel values: xmin, ymin, xmax, ymax: {xmin, ymin, xmax, ymax}")