Spaces:

wufan
/

unimer_demo

Runtime error

App Files Files Community

wufan commited on Sep 6, 2024

Commit

cc5f7b7

verified ·

1 Parent(s): bb7b589

Upload 4 files

Browse files

Files changed (4) hide show

README.md +9 -13
app.py +77 -130
header.html +109 -0
requirements.txt +2 -6

README.md CHANGED Viewed

@@ -1,13 +1,9 @@
----
-title: Unimer Demo
-emoji: 🖼
-colorFrom: purple
-colorTo: red
-sdk: gradio
-sdk_version: 4.42.0
-app_file: app.py
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: CDM
+emoji: 📈
+colorFrom: indigo
+colorTo: indigo
+sdk: docker
+pinned: false
+license: mit
+---

app.py CHANGED Viewed

@@ -1,146 +1,93 @@
-import gradio as gr
 import numpy as np
-import random
-#import spaces #[uncomment to use ZeroGPU]
-from diffusers import DiffusionPipeline
 import torch
-import os
-# os.system('pip install "unimernet[full]"')
-os.system('pwd && ls -l')
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model_repo_id = "stabilityai/sdxl-turbo" #Replace to the model you would like to use
-if torch.cuda.is_available():
-    torch_dtype = torch.float16
-else:
-    torch_dtype = torch.float32
-pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
-pipe = pipe.to(device)
-MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 1024
-#@spaces.GPU #[uncomment to use ZeroGPU]
-def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)):
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator().manual_seed(seed)
-    image = pipe(
-        prompt = prompt,
-        negative_prompt = negative_prompt,
-        guidance_scale = guidance_scale,
-        num_inference_steps = num_inference_steps,
-        width = width,
-        height = height,
-        generator = generator
-    ).images[0]
-    return image, seed
-examples = [
-    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
-    "An astronaut riding a green horse",
-    "A delicious ceviche cheesecake slice",
-]
-css="""
-#col-container {
-    margin: 0 auto;
-    max-width: 640px;
-}
-"""
-with gr.Blocks(css=css) as demo:
-    with gr.Column(elem_id="col-container"):
-        gr.Markdown(f"""
-        # Text-to-Image Gradio Template
-        """)
         with gr.Row():
-            prompt = gr.Text(
-                label="Prompt",
-                show_label=False,
-                max_lines=1,
-                placeholder="Enter your prompt",
-                container=False,
-            )
-            run_button = gr.Button("Run", scale=0)
-        result = gr.Image(label="Result", show_label=False)
-        with gr.Accordion("Advanced Settings", open=False):
-            negative_prompt = gr.Text(
-                label="Negative prompt",
-                max_lines=1,
-                placeholder="Enter a negative prompt",
-                visible=False,
-            )
-            seed = gr.Slider(
-                label="Seed",
-                minimum=0,
-                maximum=MAX_SEED,
-                step=1,
-                value=0,
-            )
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-            with gr.Row():
-                width = gr.Slider(
-                    label="Width",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024, #Replace with defaults that work for your model
-                )
-                height = gr.Slider(
-                    label="Height",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024, #Replace with defaults that work for your model
-                )
-            with gr.Row():
-                guidance_scale = gr.Slider(
-                    label="Guidance scale",
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=0.0, #Replace with defaults that work for your model
-                )
-                num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
-                    minimum=1,
-                    maximum=50,
-                    step=1,
-                    value=2, #Replace with defaults that work for your model
-                )
-        gr.Examples(
-            examples = examples,
-            inputs = [prompt]
-        )
-    gr.on(
-        triggers=[run_button.click, prompt.submit],
-        fn = infer,
-        inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
-        outputs = [result, seed]
-    )
-demo.queue().launch()

+import argparse
+import os
+import sys
 import numpy as np
+import cv2
 import torch
+import gradio as gr
+from PIL import Image
+sys.path.insert(0, os.path.join(os.getcwd(), ".."))
+from unimernet.common.config import Config
+import unimernet.tasks as tasks
+from unimernet.processors import load_processor
+class ImageProcessor:
+    def __init__(self, cfg_path):
+        self.cfg_path = cfg_path
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model, self.vis_processor = self.load_model_and_processor()
+    def load_model_and_processor(self):
+        args = argparse.Namespace(cfg_path=self.cfg_path, options=None)
+        cfg = Config(args)
+        task = tasks.setup_task(cfg)
+        model = task.build_model(cfg).to(self.device)
+        vis_processor = load_processor('formula_image_eval', cfg.config.datasets.formula_rec_eval.vis_processor.eval)
+        return model, vis_processor
+    def process_single_image(self, image_path):
+        try:
+            raw_image = Image.open(image_path)
+        except IOError:
+            print(f"Error: Unable to open image at {image_path}")
+            return
+        # Convert PIL Image to OpenCV format
+        open_cv_image = np.array(raw_image)
+        # Convert RGB to BGR
+        if len(open_cv_image.shape) == 3:
+            # Convert RGB to BGR
+            open_cv_image = open_cv_image[:, :, ::-1].copy()
+        # Display the image using cv2
+        image = self.vis_processor(raw_image).unsqueeze(0).to(self.device)
+        output = self.model.generate({"image": image})
+        pred = output["pred_str"][0]
+        print(f'Prediction:\n{pred}')
+        cv2.imshow('Original Image', open_cv_image)
+        cv2.waitKey(0)
+        cv2.destroyAllWindows()
+        return pred
+def recognize_image(input_img):
+    # latex_code = processor.process_single_image(input_img.name)
+    return "100"
+def gradio_reset():
+    return gr.update(value=None)
+if __name__ == "__main__":
+    # == init model ==
+    # root_path = os.path.abspath(os.getcwd())
+    # config_path = os.path.join(root_path, "cfg_tiny.yaml")
+    # processor_tiny = ImageProcessor(config_path)
+    # print("== all models init. ==")
+    # == init model ==
+    with open("header.html", "r") as file:
+        header = file.read()
+    with gr.Blocks() as demo:
+        gr.HTML(header)
         with gr.Row():
+            with gr.Column():
+                input_img = gr.Image(label=" ", interactive=True)
+                with gr.Row():
+                    clear = gr.Button("Clear")
+                    predict = gr.Button(value="Recognize", interactive=True, variant="primary")
+            with gr.Column():
+                gr.Button(value="Predict Latex:", interactive=False)
+                pred_latex = gr.Textbox(label='Latex', interactive=False)
+        clear.click(gradio_reset, inputs=None, outputs=[input_img, pred_latex])
+        predict.click(recognize_image, inputs=[input_img], outputs=[pred_latex])
+    demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)

header.html ADDED Viewed

	@@ -0,0 +1,109 @@

+<html><head>
+    <!-- <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.3/css/bulma.min.css"> -->
+    <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.4/css/all.css">
+  <style>
+    .link-block {
+      border: 1px solid transparent;
+      border-radius: 24px;
+      background-color: rgba(54, 54, 54, 1);
+      cursor: pointer !important;
+    }
+    .link-block:hover {
+      background-color: rgba(54, 54, 54, 0.75) !important;
+      cursor: pointer !important;
+    }
+    .external-link {
+      display: inline-flex;
+      align-items: center;
+      height: 36px;
+      line-height: 36px;
+      padding: 0 16px;
+      cursor: pointer !important;
+    }
+    .external-link,
+    .external-link:hover {
+      cursor: pointer !important;
+    }
+    a {
+      text-decoration: none;
+    }
+  </style></head>
+  <body>
+    <div style="
+        display: flex;
+        flex-direction: column;
+        justify-content: center;
+        align-items: center;
+        text-align: center;
+        background: linear-gradient(45deg, #007bff 0%, #0056b3 100%);
+        padding: 24px;
+        gap: 24px;
+        border-radius: 8px;
+      ">
+      <div style="
+          display: flex;
+          flex-direction: column;
+          align-items: center;
+          gap: 16px;
+        ">
+        <div style="display: flex; flex-direction: column; gap: 8px">
+          <h1 style="
+              font-size: 48px;
+              color: #fafafa;
+              margin: 0;
+              font-family: 'Trebuchet MS', 'Lucida Sans Unicode',
+                'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
+            ">
+            UniMERNet
+          </h1>
+        </div>
+      </div>
+      <p style="
+          margin: 0;
+          line-height: 1.6rem;
+          font-size: 16px;
+          color: #fafafa;
+          opacity: 0.8;
+        ">
+        A Universal Network for Real-World Mathematical Expression Recognition.<br>
+      </p>
+      <style>
+        .link-block {
+          display: inline-block;
+        }
+        .link-block + .link-block {
+          margin-left: 20px;
+        }
+      </style>
+      <div class="column has-text-centered">
+        <div class="publication-links">
+          <!-- Code Link. -->
+          <span class="link-block">
+            <a href="https://github.com/opendatalab/UniMERNet" class="external-link button is-normal is-rounded is-dark" style="text-decoration: none; cursor: pointer">
+              <span class="icon" style="margin-right: 4px">
+                <i class="fab fa-github" style="color: white; margin-right: 4px"></i>
+              </span>
+              <span style="color: white">Code</span>
+            </a>
+          </span>
+          <!-- Paper Link. -->
+          <span class="link-block">
+            <a href="https://arxiv.org/pdf/2404.15254" class="external-link button is-normal is-rounded is-dark" style="text-decoration: none; cursor: pointer">
+              <span class="icon" style="margin-right: 8px">
+                <i class="fas fa-globe" style="color: white"></i>
+              </span>
+              <span style="color: white">Paper</span>
+            </a>
+          </span>
+        </div>
+      </div>
+      <!-- New Demo Links -->
+    </div>
+  </body></html>

requirements.txt CHANGED Viewed

@@ -1,6 +1,2 @@
-accelerate
-diffusers
-invisible_watermark
-torch
-transformers
-xformers


1	+ unimernet==0.2.0
2	+ gradio==4.16.0