Spaces:

wufan
/

unimer_demo

Runtime error

App Files Files Community

wufan commited on Sep 6, 2024

Commit

a47bf95

verified ·

1 Parent(s): a68694f

Upload 5 files

Browse files

Files changed (5) hide show

app.py +65 -58
cfg_base.yaml +46 -0
cfg_small.yaml +46 -0
cfg_tiny.yaml +46 -0
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,76 +1,69 @@
 import os
 import sys
-import argparse
-import numpy as np
-import cv2
 import torch
 import gradio as gr
 from PIL import Image
-# sys.path.insert(0, os.path.join(os.getcwd(), ".."))
-# from unimernet.common.config import Config
-# import unimernet.tasks as tasks
-# from unimernet.processors import load_processor
-# class ImageProcessor:
-#     def __init__(self, cfg_path):
-#         self.cfg_path = cfg_path
-#         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-#         self.model, self.vis_processor = self.load_model_and_processor()
-#     def load_model_and_processor(self):
-#         args = argparse.Namespace(cfg_path=self.cfg_path, options=None)
-#         cfg = Config(args)
-#         task = tasks.setup_task(cfg)
-#         model = task.build_model(cfg).to(self.device)
-#         vis_processor = load_processor('formula_image_eval', cfg.config.datasets.formula_rec_eval.vis_processor.eval)
-#         return model, vis_processor
-#     def process_single_image(self, image_path):
-#         try:
-#             raw_image = Image.open(image_path)
-#         except IOError:
-#             print(f"Error: Unable to open image at {image_path}")
-#             return
-#         # Convert PIL Image to OpenCV format
-#         open_cv_image = np.array(raw_image)
-#         # Convert RGB to BGR
-#         if len(open_cv_image.shape) == 3:
-#             # Convert RGB to BGR
-#             open_cv_image = open_cv_image[:, :, ::-1].copy()
-#         # Display the image using cv2
-#         image = self.vis_processor(raw_image).unsqueeze(0).to(self.device)
-#         output = self.model.generate({"image": image})
-#         pred = output["pred_str"][0]
-#         print(f'Prediction:\n{pred}')
-#         cv2.imshow('Original Image', open_cv_image)
-#         cv2.waitKey(0)
-#         cv2.destroyAllWindows()
-#         return pred
-def recognize_image(input_img):
-    # latex_code = processor.process_single_image(input_img.name)
-    return "100"
 def gradio_reset():
     return gr.update(value=None), gr.update(value=None)
 if __name__ == "__main__":
-    # == init model ==
-    # root_path = os.path.abspath(os.getcwd())
-    # config_path = os.path.join(root_path, "cfg_tiny.yaml")
-    # processor_tiny = ImageProcessor(config_path)
-    # print("== all models init. ==")
-    # == init model ==
     with open("header.html", "r") as file:
         header = file.read()
@@ -79,15 +72,29 @@ if __name__ == "__main__":
         with gr.Row():
             with gr.Column():
                 input_img = gr.Image(label=" ", interactive=True)
                 with gr.Row():
                     clear = gr.Button("Clear")
                     predict = gr.Button(value="Recognize", interactive=True, variant="primary")
             with gr.Column():
                 gr.Button(value="Predict Latex:", interactive=False)
                 pred_latex = gr.Textbox(label='Latex', interactive=False)
         clear.click(gradio_reset, inputs=None, outputs=[input_img, pred_latex])
-        predict.click(recognize_image, inputs=[input_img], outputs=[pred_latex])
     demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)

 import os
 import sys
+import shutil
 import torch
+import argparse
 import gradio as gr
+import numpy as np
 from PIL import Image
+from huggingface_hub import snapshot_download
+sys.path.insert(0, os.path.join(os.getcwd(), ".."))
+from unimernet.common.config import Config
+import unimernet.tasks as tasks
+from unimernet.processors import load_processor
+def load_model_and_processor(cfg_path):
+    args = argparse.Namespace(cfg_path=cfg_path, options=None)
+    cfg = Config(args)
+    task = tasks.setup_task(cfg)
+    model = task.build_model(cfg)
+    vis_processor = load_processor('formula_image_eval', cfg.config.datasets.formula_rec_eval.vis_processor.eval)
+    return model, vis_processor
+def recognize_image(input_img, model_type):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    if model_type == "base":
+        model = model_base.to(device)
+    elif model_type == "small":
+        model = model_small.to(device)
+    else:
+        model = model_tiny.to(device)
+    if len(input_img.shape) == 3:
+            input_img = input_img[:, :, ::-1].copy()
+    img = Image.fromarray(input_img)
+    image = vis_processor(img).unsqueeze(0).to(device)
+    output = model.generate({"image": image})
+    latex_code = output["pred_str"][0]
+    return latex_code
 def gradio_reset():
     return gr.update(value=None), gr.update(value=None)
 if __name__ == "__main__":
+    root_path = os.path.abspath(os.getcwd())
+    # == download weights ==
+    tiny_model_dir = snapshot_download('wanderkid/unimernet_tiny')
+    small_model_dir = snapshot_download('wanderkid/unimernet_small')
+    base_model_dir = snapshot_download('wanderkid/unimernet_base')
+    os.makedirs(os.path.join(root_path, "models"), exist_ok=True)
+    shutil.move(tiny_model_dir, os.path.join(root_path, "models", "unimernet_tiny"))
+    shutil.move(small_model_dir, os.path.join(root_path, "models", "unimernet_small"))
+    shutil.move(base_model_dir, os.path.join(root_path, "models", "unimernet_base"))
+    # == download weights ==
+    # == load model ==
+    model_tiny, vis_processor = load_model_and_processor(os.path.join(root_path, "cfg_tiny.yaml"))
+    model_small, vis_processor = load_model_and_processor(os.path.join(root_path, "cfg_small.yaml"))
+    model_base, vis_processor = load_model_and_processor(os.path.join(root_path, "cfg_base.yaml"))
+    print("== load all models ==")
+    # == load model ==
     with open("header.html", "r") as file:
         header = file.read()
         with gr.Row():
             with gr.Column():
+                model_type = gr.Radio(
+                        choices=["tiny", "small", "base"],
+                        value="tiny",
+                        label="Model Type",
+                        interactive=True,
+                    )
                 input_img = gr.Image(label=" ", interactive=True)
                 with gr.Row():
                     clear = gr.Button("Clear")
                     predict = gr.Button(value="Recognize", interactive=True, variant="primary")
+                with gr.Accordion("Examples:"):
+                    example_root = os.path.join(os.path.dirname(__file__), "examples")
+                    gr.Examples(
+                        examples=[os.path.join(example_root, _) for _ in os.listdir(example_root) if
+                                    _.endswith("png")],
+                        inputs=input_img,
+                    )
             with gr.Column():
                 gr.Button(value="Predict Latex:", interactive=False)
                 pred_latex = gr.Textbox(label='Latex', interactive=False)
         clear.click(gradio_reset, inputs=None, outputs=[input_img, pred_latex])
+        predict.click(recognize_image, inputs=[input_img, model_type], outputs=[pred_latex])
     demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)

cfg_base.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+model:
+  arch: unimernet
+  model_type: unimernet
+  model_config:
+    model_name: ./models/unimernet_base
+    max_seq_len: 1536
+  load_pretrained: True
+  pretrained: './models/unimernet_base/unimernet_base.pth'
+  tokenizer_config:
+    path: ./models/unimernet_base
+datasets:
+  formula_rec_eval:
+    vis_processor:
+      eval:
+        name: "formula_image_eval"
+        image_size:
+          - 192
+          - 672
+run:
+  runner: runner_iter
+  task: unimernet_train
+  batch_size_train: 64
+  batch_size_eval: 64
+  num_workers: 1
+  iters_per_inner_epoch: 2000
+  max_iters: 60000
+  seed: 42
+  output_dir: "../output/demo"
+  evaluate: True
+  test_splits: [ "eval" ]
+  device: "cuda"
+  world_size: 1
+  dist_url: "env://"
+  distributed: True
+  distributed_type: ddp  # or fsdp when train llm
+  generate_cfg:
+    temperature: 0.0

cfg_small.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+model:
+  arch: unimernet
+  model_type: unimernet
+  model_config:
+    model_name: ./models/unimernet_small
+    max_seq_len: 1536
+  load_pretrained: True
+  pretrained: './models/unimernet_small/unimernet_small.pth'
+  tokenizer_config:
+    path: ./models/unimernet_small
+datasets:
+  formula_rec_eval:
+    vis_processor:
+      eval:
+        name: "formula_image_eval"
+        image_size:
+          - 192
+          - 672
+run:
+  runner: runner_iter
+  task: unimernet_train
+  batch_size_train: 64
+  batch_size_eval: 64
+  num_workers: 1
+  iters_per_inner_epoch: 2000
+  max_iters: 60000
+  seed: 42
+  output_dir: "../output/demo"
+  evaluate: True
+  test_splits: [ "eval" ]
+  device: "cuda"
+  world_size: 1
+  dist_url: "env://"
+  distributed: True
+  distributed_type: ddp  # or fsdp when train llm
+  generate_cfg:
+    temperature: 0.0

cfg_tiny.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+model:
+  arch: unimernet
+  model_type: unimernet
+  model_config:
+    model_name: ./models/unimernet_tiny
+    max_seq_len: 1536
+  load_pretrained: True
+  pretrained: './models/unimernet_tiny/unimernet_tiny.pth'
+  tokenizer_config:
+    path: ./models/unimernet_tiny
+datasets:
+  formula_rec_eval:
+    vis_processor:
+      eval:
+        name: "formula_image_eval"
+        image_size:
+          - 192
+          - 672
+run:
+  runner: runner_iter
+  task: unimernet_train
+  batch_size_train: 64
+  batch_size_eval: 64
+  num_workers: 1
+  iters_per_inner_epoch: 2000
+  max_iters: 60000
+  seed: 42
+  output_dir: "../output/demo"
+  evaluate: True
+  test_splits: [ "eval" ]
+  device: "cuda"
+  world_size: 1
+  dist_url: "env://"
+  distributed: True
+  distributed_type: ddp  # or fsdp when train llm
+  generate_cfg:
+    temperature: 0.0

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 unimernet==0.1.6
-gradio

 unimernet==0.1.6
+gradio
+transformers==4.44.2