jeduardogruiz commited on
Commit
4929bfb
1 Parent(s): 3bb2155

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +333 -0
app.py ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import gradio as gr
4
+ import numpy as np
5
+ import spaces
6
+ import torch
7
+ import torch.nn.functional as F
8
+ from gradio.themes.utils import sizes
9
+ from PIL import Image
10
+ from torchvision import transforms
11
+ import tempfile
12
+
13
+ class Config:
14
+ ASSETS_DIR = os.path.join(os.path.dirname(__file__), 'assets')
15
+ CHECKPOINTS_DIR = os.path.join(ASSETS_DIR, "checkpoints")
16
+ CHECKPOINTS = {
17
+ "0.3b": "sapiens_0.3b_normal_render_people_epoch_66_torchscript.pt2",
18
+ "0.6b": "sapiens_0.6b_normal_render_people_epoch_200_torchscript.pt2",
19
+ "1b": "sapiens_1b_normal_render_people_epoch_115_torchscript.pt2",
20
+ "2b": "sapiens_2b_normal_render_people_epoch_70_torchscript.pt2",
21
+ }
22
+ SEG_CHECKPOINTS = {
23
+ "fg-bg-1b (recommended)": "sapiens_1b_seg_foreground_epoch_8_torchscript.pt2",
24
+ "no-bg-removal": None,
25
+ "part-seg-1b": "sapiens_1b_goliath_best_goliath_mIoU_7994_epoch_151_torchscript.pt2",
26
+ }
27
+
28
+ class ModelManager:
29
+ @staticmethod
30
+ def load_model(checkpoint_name: str):
31
+ if checkpoint_name is None:
32
+ return None
33
+ checkpoint_path = os.path.join(Config.CHECKPOINTS_DIR, checkpoint_name)
34
+ model = torch.jit.load(checkpoint_path)
35
+ model.eval()
36
+ model.to("cuda")
37
+ return model
38
+
39
+ @staticmethod
40
+ @torch.inference_mode()
41
+ def run_model(model, input_tensor, height, width):
42
+ output = model(input_tensor)
43
+ return F.interpolate(output, size=(height, width), mode="bilinear", align_corners=False)
44
+
45
+ class ImageProcessor:
46
+ def __init__(self):
47
+ self.transform_fn = transforms.Compose([
48
+ transforms.Resize((1024, 768)),
49
+ transforms.ToTensor(),
50
+ transforms.Normalize(mean=[123.5/255, 116.5/255, 103.5/255], std=[58.5/255, 57.0/255, 57.5/255]),
51
+ ])
52
+
53
+ @spaces.GPU
54
+ def process_image(self, image: Image.Image, normal_model_name: str, seg_model_name: str):
55
+ # Load models here instead of storing them as class attributes
56
+ normal_model = ModelManager.load_model(Config.CHECKPOINTS[normal_model_name])
57
+ input_tensor = self.transform_fn(image).unsqueeze(0).to("cuda")
58
+
59
+ # Run normal estimation
60
+ normal_output = ModelManager.run_model(normal_model, input_tensor, image.height, image.width)
61
+ normal_map = normal_output.squeeze().cpu().numpy().transpose(1, 2, 0)
62
+
63
+ # Create a copy of the normal map for visualization
64
+ normal_map_vis = normal_map.copy()
65
+
66
+ # Run segmentation
67
+ if seg_model_name != "no-bg-removal":
68
+ seg_model = ModelManager.load_model(Config.SEG_CHECKPOINTS[seg_model_name])
69
+ seg_output = ModelManager.run_model(seg_model, input_tensor, image.height, image.width)
70
+ seg_mask = (seg_output.argmax(dim=1) > 0).float().cpu().numpy()[0]
71
+
72
+ # Apply segmentation mask to normal maps
73
+ normal_map[seg_mask == 0] = np.nan # Set background to NaN for NPY file
74
+ normal_map_vis[seg_mask == 0] = -1 # Set background to -1 for visualization
75
+
76
+ # Normalize and visualize normal map
77
+ normal_map_vis = self.visualize_normal_map(normal_map_vis)
78
+
79
+ # Create downloadable .npy file
80
+ npy_path = tempfile.mktemp(suffix='.npy')
81
+ np.save(npy_path, normal_map)
82
+
83
+ return Image.fromarray(normal_map_vis), npy_path
84
+
85
+ @staticmethod
86
+ def visualize_normal_map(normal_map):
87
+ normal_map_norm = np.linalg.norm(normal_map, axis=-1, keepdims=True)
88
+ normal_map_normalized = normal_map / (normal_map_norm + 1e-5)
89
+ normal_map_vis = ((normal_map_normalized + 1) / 2 * 255).astype(np.uint8)
90
+ return normal_map_vis
91
+
92
+ class GradioInterface:
93
+ def __init__(self):
94
+ self.image_processor = ImageProcessor()
95
+
96
+ def create_interface(self):
97
+ app_styles = """
98
+ <style>
99
+ /* Global Styles */
100
+ body, #root {
101
+ font-family: Helvetica, Arial, sans-serif;
102
+ background-color: #1a1a1a;
103
+ color: #fafafa;
104
+ }
105
+
106
+ /* Header Styles */
107
+ .app-header {
108
+ background: linear-gradient(45deg, #1a1a1a 0%, #333333 100%);
109
+ padding: 24px;
110
+ border-radius: 8px;
111
+ margin-bottom: 24px;
112
+ text-align: center;
113
+ }
114
+
115
+ .app-title {
116
+ font-size: 48px;
117
+ margin: 0;
118
+ color: #fafafa;
119
+ }
120
+
121
+ .app-subtitle {
122
+ font-size: 24px;
123
+ margin: 8px 0 16px;
124
+ color: #fafafa;
125
+ }
126
+
127
+ .app-description {
128
+ font-size: 16px;
129
+ line-height: 1.6;
130
+ opacity: 0.8;
131
+ margin-bottom: 24px;
132
+ }
133
+
134
+ /* Button Styles */
135
+ .publication-links {
136
+ display: flex;
137
+ justify-content: center;
138
+ flex-wrap: wrap;
139
+ gap: 8px;
140
+ margin-bottom: 16px;
141
+ }
142
+
143
+ .publication-link {
144
+ display: inline-flex;
145
+ align-items: center;
146
+ padding: 8px 16px;
147
+ background-color: #333;
148
+ color: #fff !important;
149
+ text-decoration: none !important;
150
+ border-radius: 20px;
151
+ font-size: 14px;
152
+ transition: background-color 0.3s;
153
+ }
154
+
155
+ .publication-link:hover {
156
+ background-color: #555;
157
+ }
158
+
159
+ .publication-link i {
160
+ margin-right: 8px;
161
+ }
162
+
163
+ /* Content Styles */
164
+ .content-container {
165
+ background-color: #2a2a2a;
166
+ border-radius: 8px;
167
+ padding: 24px;
168
+ margin-bottom: 24px;
169
+ }
170
+
171
+ /* Image Styles */
172
+ .image-preview img {
173
+ max-width: 100%;
174
+ max-height: 512px;
175
+ margin: 0 auto;
176
+ border-radius: 4px;
177
+ display: block;
178
+ }
179
+
180
+ /* Control Styles */
181
+ .control-panel {
182
+ background-color: #333;
183
+ padding: 16px;
184
+ border-radius: 8px;
185
+ margin-top: 16px;
186
+ }
187
+
188
+ /* Gradio Component Overrides */
189
+ .gr-button {
190
+ background-color: #4a4a4a;
191
+ color: #fff;
192
+ border: none;
193
+ border-radius: 4px;
194
+ padding: 8px 16px;
195
+ cursor: pointer;
196
+ transition: background-color 0.3s;
197
+ }
198
+
199
+ .gr-button:hover {
200
+ background-color: #5a5a5a;
201
+ }
202
+
203
+ .gr-input, .gr-dropdown {
204
+ background-color: #3a3a3a;
205
+ color: #fff;
206
+ border: 1px solid #4a4a4a;
207
+ border-radius: 4px;
208
+ padding: 8px;
209
+ }
210
+
211
+ .gr-form {
212
+ background-color: transparent;
213
+ }
214
+
215
+ .gr-panel {
216
+ border: none;
217
+ background-color: transparent;
218
+ }
219
+
220
+ /* Override any conflicting styles from Bulma */
221
+ .button.is-normal.is-rounded.is-dark {
222
+ color: #fff !important;
223
+ text-decoration: none !important;
224
+ }
225
+ </style>
226
+ """
227
+
228
+ header_html = f"""
229
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.3/css/bulma.min.css">
230
+ <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.4/css/all.css">
231
+ {app_styles}
232
+ <div class="app-header">
233
+ <h1 class="app-title">Sapiens: Normal Estimation</h1>
234
+ <h2 class="app-subtitle">ECCV 2024 (Oral)</h2>
235
+ <p class="app-description">
236
+ Meta presents Sapiens, foundation models for human tasks pretrained on 300 million human images.
237
+ This demo showcases the finetuned normal estimation model. <br>
238
+ Checkout other normal estimation baselines to compare: <a href="https://huggingface.co/spaces/Stable-X/normal-estimation-arena" style="color: #3273dc;">normal-estimation-arena</a>
239
+ </p>
240
+ <div class="publication-links">
241
+ <a href="https://arxiv.org/abs/2408.12569" class="publication-link">
242
+ <i class="fas fa-file-pdf"></i>arXiv
243
+ </a>
244
+ <a href="https://github.com/facebookresearch/sapiens" class="publication-link">
245
+ <i class="fab fa-github"></i>Code
246
+ </a>
247
+ <a href="https://about.meta.com/realitylabs/codecavatars/sapiens/" class="publication-link">
248
+ <i class="fas fa-globe"></i>Meta
249
+ </a>
250
+ <a href="https://rawalkhirodkar.github.io/sapiens" class="publication-link">
251
+ <i class="fas fa-chart-bar"></i>Results
252
+ </a>
253
+ </div>
254
+ <div class="publication-links">
255
+ <a href="https://huggingface.co/spaces/facebook/sapiens_pose" class="publication-link">
256
+ <i class="fas fa-user"></i>Demo-Pose
257
+ </a>
258
+ <a href="https://huggingface.co/spaces/facebook/sapiens_seg" class="publication-link">
259
+ <i class="fas fa-puzzle-piece"></i>Demo-Seg
260
+ </a>
261
+ <a href="https://huggingface.co/spaces/facebook/sapiens_depth" class="publication-link">
262
+ <i class="fas fa-cube"></i>Demo-Depth
263
+ </a>
264
+ <a href="https://huggingface.co/spaces/facebook/sapiens_normal" class="publication-link">
265
+ <i class="fas fa-vector-square"></i>Demo-Normal
266
+ </a>
267
+ </div>
268
+ </div>
269
+ """
270
+
271
+ def process_image(image, normal_model_name, seg_model_name):
272
+ result, npy_path = self.image_processor.process_image(image, normal_model_name, seg_model_name)
273
+ return result, npy_path
274
+
275
+ js_func = """
276
+ function refresh() {
277
+ const url = new URL(window.location);
278
+ if (url.searchParams.get('__theme') !== 'dark') {
279
+ url.searchParams.set('__theme', 'dark');
280
+ window.location.href = url.href;
281
+ }
282
+ }
283
+ """
284
+
285
+ with gr.Blocks(js=js_func, theme=gr.themes.Default()) as demo:
286
+ gr.HTML(header_html)
287
+ with gr.Row(elem_classes="content-container"):
288
+ with gr.Column():
289
+ input_image = gr.Image(label="Input Image", type="pil", format="png", elem_classes="image-preview")
290
+ with gr.Row(elem_classes="control-panel"):
291
+ normal_model_name = gr.Dropdown(
292
+ label="Normal Model Size",
293
+ choices=list(Config.CHECKPOINTS.keys()),
294
+ value="1b",
295
+ )
296
+ seg_model_name = gr.Dropdown(
297
+ label="Background Removal Model",
298
+ choices=list(Config.SEG_CHECKPOINTS.keys()),
299
+ value="fg-bg-1b (recommended)",
300
+ )
301
+ example_model = gr.Examples(
302
+ inputs=input_image,
303
+ examples_per_page=14,
304
+ examples=[
305
+ os.path.join(Config.ASSETS_DIR, "images", img)
306
+ for img in os.listdir(os.path.join(Config.ASSETS_DIR, "images"))
307
+ ],
308
+ )
309
+ with gr.Column():
310
+ result_image = gr.Image(label="Normal Estimation Result", type="pil", elem_classes="image-preview")
311
+ npy_output = gr.File(label="Output (.npy). Note: Background normal is NaN.")
312
+ run_button = gr.Button("Run", elem_classes="gr-button")
313
+
314
+ run_button.click(
315
+ fn=process_image,
316
+ inputs=[input_image, normal_model_name, seg_model_name],
317
+ outputs=[result_image, npy_output],
318
+ )
319
+
320
+ return demo
321
+
322
+ def main():
323
+ # Configure CUDA if available
324
+ if torch.cuda.is_available() and torch.cuda.get_device_properties(0).major >= 8:
325
+ torch.backends.cuda.matmul.allow_tf32 = True
326
+ torch.backends.cudnn.allow_tf32 = True
327
+
328
+ interface = GradioInterface()
329
+ demo = interface.create_interface()
330
+ demo.launch(share=False)
331
+
332
+ if __name__ == "__main__":
333
+ main()