Tzktz commited on
Commit
920cea2
1 Parent(s): c1de664

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -10
app.py CHANGED
@@ -1,9 +1,3 @@
1
- import os
2
- os.system('git clone https://github.com/facebookresearch/detectron2.git')
3
- os.system('pip install -e detectron2')
4
-
5
- import sys
6
- sys.path.append("detectron2")
7
  from unilm.dit.object_detection.ditod import add_vit_config
8
  import torch
9
  import cv2
@@ -28,20 +22,35 @@ predictor = DefaultPredictor(cfg)
28
 
29
  def analyze_image(img):
30
  md = MetadataCatalog.get(cfg.DATASETS.TEST[0])
 
31
  if cfg.DATASETS.TEST[0] == 'icdar2019_test':
32
  md.set(thing_classes=["table"])
33
  else:
34
  md.set(thing_classes=["text", "title", "list", "table", "figure"])
35
 
36
  output = predictor(img)["instances"]
 
 
 
 
37
  v = Visualizer(img[:, :, ::-1],
38
  md,
39
  scale=1.0,
40
  instance_mode=ColorMode.SEGMENTATION)
41
- result = v.draw_instance_predictions(output.to("cpu"))
 
 
42
  result_image = result.get_image()[:, :, ::-1]
43
 
44
- return result_image
 
 
 
 
 
 
 
 
45
 
46
 
47
  title = " Table Detection with DiT"
@@ -50,9 +59,9 @@ css = ".output-image, .input-image, .image-preview {height: 600px !important}"
50
  iface = gr.Interface(
51
  fn=analyze_image,
52
  inputs=[gr.Image(type="numpy", label="document image")],
53
- outputs=[gr.Image(type="numpy", label="detected tables")],
54
  title=title,
55
 
56
  css=css,
57
  )
58
- iface.launch(debug=True, share=True)
 
 
 
 
 
 
 
1
  from unilm.dit.object_detection.ditod import add_vit_config
2
  import torch
3
  import cv2
 
22
 
23
  def analyze_image(img):
24
  md = MetadataCatalog.get(cfg.DATASETS.TEST[0])
25
+
26
  if cfg.DATASETS.TEST[0] == 'icdar2019_test':
27
  md.set(thing_classes=["table"])
28
  else:
29
  md.set(thing_classes=["text", "title", "list", "table", "figure"])
30
 
31
  output = predictor(img)["instances"]
32
+
33
+ # Filter instances to keep only those corresponding to tables
34
+ table_instances = output[output.pred_classes == md.thing_classes.index("table")]
35
+
36
  v = Visualizer(img[:, :, ::-1],
37
  md,
38
  scale=1.0,
39
  instance_mode=ColorMode.SEGMENTATION)
40
+
41
+ # Draw instance predictions for tables only
42
+ result = v.draw_instance_predictions(table_instances.to("cpu"))
43
  result_image = result.get_image()[:, :, ::-1]
44
 
45
+ # Get bounding box details
46
+ bbox_details = []
47
+ for i in range(len(table_instances)):
48
+ instance = table_instances[i]
49
+ bbox = instance.pred_boxes.tensor.cpu().numpy().tolist()
50
+ score = instance.scores.cpu().numpy().item()
51
+ bbox_details.append({"bbox": bbox, "score": score})
52
+
53
+ return result_image, bbox_details
54
 
55
 
56
  title = " Table Detection with DiT"
 
59
  iface = gr.Interface(
60
  fn=analyze_image,
61
  inputs=[gr.Image(type="numpy", label="document image")],
62
+ outputs=[gr.Image(type="numpy", label="detected tables"), gr.JSON(label="bounding box details")],
63
  title=title,
64
 
65
  css=css,
66
  )
67
+ iface.launch(debug=True, share=True)