Spaces:

nielsr
/

dit-document-layout-analysis

Running

App Files Files Community

nielsr HF staff commited on Mar 10, 2022

Commit

b47dc23

1 Parent(s): b39aaad

Add link to model doc

Browse files

Files changed (1) hide show

app.py +4 -3

app.py CHANGED Viewed

@@ -9,6 +9,8 @@ import cv2
 from unilm.dit.object_detection.ditod import add_vit_config
 from detectron2.config import CfgNode as CN
 from detectron2.config import get_cfg
 from detectron2.utils.visualizer import ColorMode, Visualizer
@@ -27,8 +29,7 @@ cfg.merge_from_file("cascade_dit_base.yml")
 cfg.MODEL.WEIGHTS = "https://layoutlm.blob.core.windows.net/dit/dit-fts/publaynet_dit-b_cascade.pth"
 # Step 3: set device
-# TODO also support GPU
-cfg.MODEL.DEVICE='cpu'
 # Step 4: define model
 predictor = DefaultPredictor(cfg)
@@ -53,7 +54,7 @@ def analyze_image(img):
 title = "Interactive demo: Document Layout Analysis with DiT"
 description = "Demo for Microsoft's DiT, the Document Image Transformer for state-of-the-art document understanding tasks. This particular model is fine-tuned on PubLayNet, a large dataset for document layout analysis (read more at the links below). To use it, simply upload an image or use the example image below and click 'Submit'. Results will show up in a few seconds. If you want to make the output bigger, right-click on it and select 'Open image in new tab'."
-article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2203.02378' target='_blank'>DiT: Self-supervised Pre-training for Document Image Transformer</a> | <a href='https://github.com/microsoft/unilm/dit' target='_blank'>Github Repo</a></p>"
 examples =[['publaynet_example.jpeg']]
 css = ".output-image, .input-image, .image-preview {height: 600px !important}"

 from unilm.dit.object_detection.ditod import add_vit_config
+import torch
 from detectron2.config import CfgNode as CN
 from detectron2.config import get_cfg
 from detectron2.utils.visualizer import ColorMode, Visualizer
 cfg.MODEL.WEIGHTS = "https://layoutlm.blob.core.windows.net/dit/dit-fts/publaynet_dit-b_cascade.pth"
 # Step 3: set device
+cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 # Step 4: define model
 predictor = DefaultPredictor(cfg)
 title = "Interactive demo: Document Layout Analysis with DiT"
 description = "Demo for Microsoft's DiT, the Document Image Transformer for state-of-the-art document understanding tasks. This particular model is fine-tuned on PubLayNet, a large dataset for document layout analysis (read more at the links below). To use it, simply upload an image or use the example image below and click 'Submit'. Results will show up in a few seconds. If you want to make the output bigger, right-click on it and select 'Open image in new tab'."
+article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2203.02378' target='_blank'>DiT: Self-supervised Pre-training for Document Image Transformer</a> | <a href='https://github.com/microsoft/unilm/tree/master/dit' target='_blank'>Github Repo</a></p> | <a href='https://huggingface.co/docs/transformers/master/en/model_doc/dit' target='_blank'>HuggingFace model doc</a></p>"
 examples =[['publaynet_example.jpeg']]
 css = ".output-image, .input-image, .image-preview {height: 600px !important}"