wufan commited on
Commit
cc5f7b7
β€’
1 Parent(s): bb7b589

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +9 -13
  2. app.py +77 -130
  3. header.html +109 -0
  4. requirements.txt +2 -6
README.md CHANGED
@@ -1,13 +1,9 @@
1
- ---
2
- title: Unimer Demo
3
- emoji: πŸ–Ό
4
- colorFrom: purple
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 4.42.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: CDM
3
+ emoji: πŸ“ˆ
4
+ colorFrom: indigo
5
+ colorTo: indigo
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ ---
 
 
 
 
app.py CHANGED
@@ -1,146 +1,93 @@
1
- import gradio as gr
 
 
2
  import numpy as np
3
- import random
4
- #import spaces #[uncomment to use ZeroGPU]
5
- from diffusers import DiffusionPipeline
6
  import torch
7
- import os
 
8
 
9
- # os.system('pip install "unimernet[full]"')
10
- os.system('pwd && ls -l')
 
 
11
 
12
- device = "cuda" if torch.cuda.is_available() else "cpu"
13
- model_repo_id = "stabilityai/sdxl-turbo" #Replace to the model you would like to use
14
 
15
- if torch.cuda.is_available():
16
- torch_dtype = torch.float16
17
- else:
18
- torch_dtype = torch.float32
 
19
 
20
- pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
21
- pipe = pipe.to(device)
 
 
 
 
22
 
23
- MAX_SEED = np.iinfo(np.int32).max
24
- MAX_IMAGE_SIZE = 1024
25
 
26
- #@spaces.GPU #[uncomment to use ZeroGPU]
27
- def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)):
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- if randomize_seed:
30
- seed = random.randint(0, MAX_SEED)
31
-
32
- generator = torch.Generator().manual_seed(seed)
33
-
34
- image = pipe(
35
- prompt = prompt,
36
- negative_prompt = negative_prompt,
37
- guidance_scale = guidance_scale,
38
- num_inference_steps = num_inference_steps,
39
- width = width,
40
- height = height,
41
- generator = generator
42
- ).images[0]
43
 
44
- return image, seed
45
 
46
- examples = [
47
- "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
48
- "An astronaut riding a green horse",
49
- "A delicious ceviche cheesecake slice",
50
- ]
 
51
 
52
- css="""
53
- #col-container {
54
- margin: 0 auto;
55
- max-width: 640px;
56
- }
57
- """
58
 
59
- with gr.Blocks(css=css) as demo:
 
 
60
 
61
- with gr.Column(elem_id="col-container"):
62
- gr.Markdown(f"""
63
- # Text-to-Image Gradio Template
64
- """)
65
 
66
  with gr.Row():
67
-
68
- prompt = gr.Text(
69
- label="Prompt",
70
- show_label=False,
71
- max_lines=1,
72
- placeholder="Enter your prompt",
73
- container=False,
74
- )
75
-
76
- run_button = gr.Button("Run", scale=0)
77
-
78
- result = gr.Image(label="Result", show_label=False)
79
-
80
- with gr.Accordion("Advanced Settings", open=False):
81
-
82
- negative_prompt = gr.Text(
83
- label="Negative prompt",
84
- max_lines=1,
85
- placeholder="Enter a negative prompt",
86
- visible=False,
87
- )
88
-
89
- seed = gr.Slider(
90
- label="Seed",
91
- minimum=0,
92
- maximum=MAX_SEED,
93
- step=1,
94
- value=0,
95
- )
96
-
97
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
98
-
99
- with gr.Row():
100
-
101
- width = gr.Slider(
102
- label="Width",
103
- minimum=256,
104
- maximum=MAX_IMAGE_SIZE,
105
- step=32,
106
- value=1024, #Replace with defaults that work for your model
107
- )
108
-
109
- height = gr.Slider(
110
- label="Height",
111
- minimum=256,
112
- maximum=MAX_IMAGE_SIZE,
113
- step=32,
114
- value=1024, #Replace with defaults that work for your model
115
- )
116
-
117
- with gr.Row():
118
-
119
- guidance_scale = gr.Slider(
120
- label="Guidance scale",
121
- minimum=0.0,
122
- maximum=10.0,
123
- step=0.1,
124
- value=0.0, #Replace with defaults that work for your model
125
- )
126
-
127
- num_inference_steps = gr.Slider(
128
- label="Number of inference steps",
129
- minimum=1,
130
- maximum=50,
131
- step=1,
132
- value=2, #Replace with defaults that work for your model
133
- )
134
-
135
- gr.Examples(
136
- examples = examples,
137
- inputs = [prompt]
138
- )
139
- gr.on(
140
- triggers=[run_button.click, prompt.submit],
141
- fn = infer,
142
- inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
143
- outputs = [result, seed]
144
- )
145
-
146
- demo.queue().launch()
 
1
+ import argparse
2
+ import os
3
+ import sys
4
  import numpy as np
5
+
6
+ import cv2
 
7
  import torch
8
+ import gradio as gr
9
+ from PIL import Image
10
 
11
+ sys.path.insert(0, os.path.join(os.getcwd(), ".."))
12
+ from unimernet.common.config import Config
13
+ import unimernet.tasks as tasks
14
+ from unimernet.processors import load_processor
15
 
 
 
16
 
17
+ class ImageProcessor:
18
+ def __init__(self, cfg_path):
19
+ self.cfg_path = cfg_path
20
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21
+ self.model, self.vis_processor = self.load_model_and_processor()
22
 
23
+ def load_model_and_processor(self):
24
+ args = argparse.Namespace(cfg_path=self.cfg_path, options=None)
25
+ cfg = Config(args)
26
+ task = tasks.setup_task(cfg)
27
+ model = task.build_model(cfg).to(self.device)
28
+ vis_processor = load_processor('formula_image_eval', cfg.config.datasets.formula_rec_eval.vis_processor.eval)
29
 
30
+ return model, vis_processor
 
31
 
32
+ def process_single_image(self, image_path):
33
+ try:
34
+ raw_image = Image.open(image_path)
35
+ except IOError:
36
+ print(f"Error: Unable to open image at {image_path}")
37
+ return
38
+ # Convert PIL Image to OpenCV format
39
+ open_cv_image = np.array(raw_image)
40
+ # Convert RGB to BGR
41
+ if len(open_cv_image.shape) == 3:
42
+ # Convert RGB to BGR
43
+ open_cv_image = open_cv_image[:, :, ::-1].copy()
44
+ # Display the image using cv2
45
 
46
+ image = self.vis_processor(raw_image).unsqueeze(0).to(self.device)
47
+ output = self.model.generate({"image": image})
48
+ pred = output["pred_str"][0]
49
+ print(f'Prediction:\n{pred}')
50
+
51
+ cv2.imshow('Original Image', open_cv_image)
52
+ cv2.waitKey(0)
53
+ cv2.destroyAllWindows()
54
+
55
+ return pred
 
 
 
 
56
 
 
57
 
58
+ def recognize_image(input_img):
59
+ # latex_code = processor.process_single_image(input_img.name)
60
+ return "100"
61
+
62
+ def gradio_reset():
63
+ return gr.update(value=None)
64
 
65
+
66
+ if __name__ == "__main__":
67
+ # == init model ==
68
+ # root_path = os.path.abspath(os.getcwd())
69
+ # config_path = os.path.join(root_path, "cfg_tiny.yaml")
 
70
 
71
+ # processor_tiny = ImageProcessor(config_path)
72
+ # print("== all models init. ==")
73
+ # == init model ==
74
 
75
+ with open("header.html", "r") as file:
76
+ header = file.read()
77
+ with gr.Blocks() as demo:
78
+ gr.HTML(header)
79
 
80
  with gr.Row():
81
+ with gr.Column():
82
+ input_img = gr.Image(label=" ", interactive=True)
83
+ with gr.Row():
84
+ clear = gr.Button("Clear")
85
+ predict = gr.Button(value="Recognize", interactive=True, variant="primary")
86
+ with gr.Column():
87
+ gr.Button(value="Predict Latex:", interactive=False)
88
+ pred_latex = gr.Textbox(label='Latex', interactive=False)
89
+
90
+ clear.click(gradio_reset, inputs=None, outputs=[input_img, pred_latex])
91
+ predict.click(recognize_image, inputs=[input_img], outputs=[pred_latex])
92
+
93
+ demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
header.html ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <html><head>
2
+ <!-- <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.3/css/bulma.min.css"> -->
3
+ <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.4/css/all.css">
4
+ <style>
5
+ .link-block {
6
+ border: 1px solid transparent;
7
+ border-radius: 24px;
8
+ background-color: rgba(54, 54, 54, 1);
9
+ cursor: pointer !important;
10
+ }
11
+ .link-block:hover {
12
+ background-color: rgba(54, 54, 54, 0.75) !important;
13
+ cursor: pointer !important;
14
+ }
15
+ .external-link {
16
+ display: inline-flex;
17
+ align-items: center;
18
+ height: 36px;
19
+ line-height: 36px;
20
+ padding: 0 16px;
21
+ cursor: pointer !important;
22
+ }
23
+ .external-link,
24
+ .external-link:hover {
25
+ cursor: pointer !important;
26
+ }
27
+ a {
28
+ text-decoration: none;
29
+ }
30
+ </style></head>
31
+
32
+ <body>
33
+ <div style="
34
+ display: flex;
35
+ flex-direction: column;
36
+ justify-content: center;
37
+ align-items: center;
38
+ text-align: center;
39
+ background: linear-gradient(45deg, #007bff 0%, #0056b3 100%);
40
+ padding: 24px;
41
+ gap: 24px;
42
+ border-radius: 8px;
43
+ ">
44
+ <div style="
45
+ display: flex;
46
+ flex-direction: column;
47
+ align-items: center;
48
+ gap: 16px;
49
+ ">
50
+ <div style="display: flex; flex-direction: column; gap: 8px">
51
+ <h1 style="
52
+ font-size: 48px;
53
+ color: #fafafa;
54
+ margin: 0;
55
+ font-family: 'Trebuchet MS', 'Lucida Sans Unicode',
56
+ 'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
57
+ ">
58
+ UniMERNet
59
+ </h1>
60
+ </div>
61
+ </div>
62
+
63
+ <p style="
64
+ margin: 0;
65
+ line-height: 1.6rem;
66
+ font-size: 16px;
67
+ color: #fafafa;
68
+ opacity: 0.8;
69
+ ">
70
+ A Universal Network for Real-World Mathematical Expression Recognition.<br>
71
+ </p>
72
+ <style>
73
+ .link-block {
74
+ display: inline-block;
75
+ }
76
+ .link-block + .link-block {
77
+ margin-left: 20px;
78
+ }
79
+ </style>
80
+
81
+ <div class="column has-text-centered">
82
+ <div class="publication-links">
83
+ <!-- Code Link. -->
84
+ <span class="link-block">
85
+ <a href="https://github.com/opendatalab/UniMERNet" class="external-link button is-normal is-rounded is-dark" style="text-decoration: none; cursor: pointer">
86
+ <span class="icon" style="margin-right: 4px">
87
+ <i class="fab fa-github" style="color: white; margin-right: 4px"></i>
88
+ </span>
89
+ <span style="color: white">Code</span>
90
+ </a>
91
+ </span>
92
+
93
+ <!-- Paper Link. -->
94
+ <span class="link-block">
95
+ <a href="https://arxiv.org/pdf/2404.15254" class="external-link button is-normal is-rounded is-dark" style="text-decoration: none; cursor: pointer">
96
+ <span class="icon" style="margin-right: 8px">
97
+ <i class="fas fa-globe" style="color: white"></i>
98
+ </span>
99
+ <span style="color: white">Paper</span>
100
+ </a>
101
+ </span>
102
+ </div>
103
+ </div>
104
+
105
+ <!-- New Demo Links -->
106
+ </div>
107
+
108
+
109
+ </body></html>
requirements.txt CHANGED
@@ -1,6 +1,2 @@
1
- accelerate
2
- diffusers
3
- invisible_watermark
4
- torch
5
- transformers
6
- xformers
 
1
+ unimernet==0.2.0
2
+ gradio==4.16.0