csuhan commited on
Commit
288480f
·
1 Parent(s): 930047c

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. app.py +59 -10
  2. examples/depth_normal/depth/0036.png +0 -0
  3. examples/depth_normal/depth/0125.png +0 -0
  4. examples/depth_normal/depth/0166.png +0 -0
  5. examples/depth_normal/depth/0168.png +0 -0
  6. examples/depth_normal/depth/0211.png +0 -0
  7. examples/depth_normal/depth/0278.png +0 -0
  8. examples/depth_normal/depth/0282.png +0 -0
  9. examples/depth_normal/depth/0331.png +0 -0
  10. examples/depth_normal/depth/0384.png +0 -0
  11. examples/depth_normal/depth/0432.png +0 -0
  12. examples/depth_normal/depth/0444.png +0 -0
  13. examples/depth_normal/depth/0475.png +0 -0
  14. examples/depth_normal/depth/0476.png +0 -0
  15. examples/depth_normal/depth/0517.png +0 -0
  16. examples/depth_normal/depth/0523.png +0 -0
  17. examples/depth_normal/depth/0524.png +0 -0
  18. examples/depth_normal/depth/0536.png +0 -0
  19. examples/depth_normal/depth/0561.png +0 -0
  20. examples/depth_normal/depth/0565.png +0 -0
  21. examples/depth_normal/depth/0590.png +0 -0
  22. examples/depth_normal/depth/0618.png +0 -0
  23. examples/depth_normal/depth/0716.png +0 -0
  24. examples/depth_normal/depth/0724.png +0 -0
  25. examples/depth_normal/depth/0758.png +0 -0
  26. examples/depth_normal/depth/0759.png +0 -0
  27. examples/depth_normal/depth/0767.png +0 -0
  28. examples/depth_normal/depth/0840.png +0 -0
  29. examples/depth_normal/depth/0849.png +0 -0
  30. examples/depth_normal/depth/0857.png +0 -0
  31. examples/depth_normal/depth/0870.png +0 -0
  32. examples/depth_normal/depth/0905.png +0 -0
  33. examples/depth_normal/depth/0993.png +0 -0
  34. examples/depth_normal/depth/1038.png +0 -0
  35. examples/depth_normal/depth/1074.png +0 -0
  36. examples/depth_normal/depth/1099.png +0 -0
  37. examples/depth_normal/depth/1101.png +0 -0
  38. examples/depth_normal/depth/1146.png +0 -0
  39. examples/depth_normal/depth/1148.png +0 -0
  40. examples/depth_normal/depth/1165.png +0 -0
  41. examples/depth_normal/depth/1173.png +0 -0
  42. examples/depth_normal/depth/1193.png +0 -0
  43. examples/depth_normal/depth/1225.png +0 -0
  44. examples/depth_normal/depth/1257.png +0 -0
  45. examples/depth_normal/depth/1291.png +0 -0
  46. examples/depth_normal/depth/1294.png +0 -0
  47. examples/depth_normal/depth/1346.png +0 -0
  48. examples/depth_normal/depth/1389.png +0 -0
  49. examples/depth_normal/depth/1398.png +0 -0
  50. examples/depth_normal/depth/1407.png +0 -0
app.py CHANGED
@@ -25,7 +25,7 @@ import plotly.graph_objects as go
25
  from data.fintune_dataset import pc_norm
26
  from functools import partial
27
  import glob
28
-
29
 
30
  T_random_resized_crop = transforms.Compose([
31
  transforms.RandomResizedCrop(size=(224, 224), scale=(0.9, 1.0), ratio=(0.75, 1.3333), interpolation=3,
@@ -33,6 +33,23 @@ T_random_resized_crop = transforms.Compose([
33
  transforms.ToTensor(),
34
  transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])])
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  def load_audio(audio_path):
38
  fbank = make_audio_features(audio_path, mel_bins=128)
@@ -55,6 +72,17 @@ def load_fmri(fmri_path):
55
  data = torch.tensor(data[None])
56
  return data
57
 
 
 
 
 
 
 
 
 
 
 
 
58
  def model_worker(
59
  rank: int, args: argparse.Namespace, barrier: mp.Barrier,
60
  request_queue: mp.Queue, response_queue: Optional[mp.Queue] = None,
@@ -107,7 +135,7 @@ def model_worker(
107
  barrier.wait()
108
 
109
  while True:
110
- img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, temperature, top_p, modality = request_queue.get()
111
  if 'image' in modality and img_path is not None:
112
  image = Image.open(img_path).convert('RGB')
113
  inputs = T_random_resized_crop(image)
@@ -119,6 +147,10 @@ def model_worker(
119
  inputs = load_point(point_path)
120
  elif 'fmri' in modality and fmri_path is not None:
121
  inputs = load_fmri(fmri_path)
 
 
 
 
122
  else:
123
  inputs = None
124
 
@@ -184,9 +216,9 @@ def gradio_worker(
184
  def show_user_input(msg, chatbot):
185
  return "", chatbot + [[msg, None]]
186
 
187
- def stream_model_output(img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, gen_t, top_p, modality):
188
  for queue in request_queues:
189
- queue.put((img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, gen_t, top_p, modality))
190
  while True:
191
  content_piece = response_queue.get()
192
  chatbot[-1][1] = content_piece["text"]
@@ -293,10 +325,25 @@ def gradio_worker(
293
  examples_per_page=3,
294
  )
295
  with gr.Tab('Depth Map') as depth_tab:
296
- gr.Markdown('Coming soon🤗')
 
 
 
 
 
 
 
 
297
  with gr.Tab('Normal Map') as normal_tab:
298
- gr.Markdown('Coming soon🤗')
299
-
 
 
 
 
 
 
 
300
  with gr.Column(scale=2):
301
  chatbot = gr.Chatbot(elem_id="chatbot")
302
  msg = gr.Textbox()
@@ -304,7 +351,7 @@ def gradio_worker(
304
  with gr.Row():
305
  submit_button = gr.Button("Submit", variant="primary")
306
  undo_button = gr.Button("Undo")
307
- clear_button = gr.ClearButton([chatbot, msg, img_path, audio_path, video_path, point_path, fmri_path, point_vis])
308
  with gr.Row():
309
  max_gen_len = gr.Slider(
310
  minimum=1, maximum=args.model_max_seq_len // 2,
@@ -325,16 +372,18 @@ def gradio_worker(
325
  audio_tab.select(partial(change_modality, 'audio'), [], [modality])
326
  point_tab.select(partial(change_modality, 'point'), [], [modality])
327
  fmri_tab.select(partial(change_modality, 'fmri'), [], [modality])
 
 
328
 
329
  msg.submit(
330
  show_user_input, [msg, chatbot], [msg, chatbot],
331
  ).then(
332
- stream_model_output, [img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, gen_t, top_p, modality], chatbot,
333
  )
334
  submit_button.click(
335
  show_user_input, [msg, chatbot], [msg, chatbot],
336
  ).then(
337
- stream_model_output, [img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, gen_t, top_p, modality], chatbot,
338
  )
339
  undo_button.click(undo, chatbot, chatbot)
340
  # img_path.change(clear, [], [chatbot, msg])
 
25
  from data.fintune_dataset import pc_norm
26
  from functools import partial
27
  import glob
28
+ import torchvision.transforms.functional as F
29
 
30
  T_random_resized_crop = transforms.Compose([
31
  transforms.RandomResizedCrop(size=(224, 224), scale=(0.9, 1.0), ratio=(0.75, 1.3333), interpolation=3,
 
33
  transforms.ToTensor(),
34
  transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])])
35
 
36
+ class PairRandomResizedCrop(transforms.RandomResizedCrop):
37
+ def forward(self, imgs):
38
+ i, j, h, w = self.get_params(imgs[0], self.scale, self.ratio)
39
+ return [F.resized_crop(img, i, j, h, w, self.size, self.interpolation, antialias=self.antialias) for img in imgs]
40
+
41
+ class PairToTensor(transforms.ToTensor):
42
+ def __call__(self, pics):
43
+ return [F.to_tensor(pic) for pic in pics]
44
+
45
+ class PairNormalize(transforms.Normalize):
46
+ def forward(self, tensors):
47
+ return [F.normalize(tensor, self.mean, self.std, self.inplace) for tensor in tensors]
48
+
49
+ transform_pairimg_train = transforms.Compose([
50
+ PairRandomResizedCrop(size=(224, 224), scale=(0.99, 1.0), ratio=(0.75, 1.3333), interpolation=3, antialias=None), # 3 is bicubic
51
+ PairToTensor(),
52
+ PairNormalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])])
53
 
54
  def load_audio(audio_path):
55
  fbank = make_audio_features(audio_path, mel_bins=128)
 
72
  data = torch.tensor(data[None])
73
  return data
74
 
75
+ def load_rgbx(image_path, x_image_path):
76
+ image = Image.open(image_path).convert('RGB')
77
+ x_image = Image.open(x_image_path).convert('RGB')
78
+ x_image = x_image.resize(image.size[-2:])
79
+
80
+ image, x_image = transform_pairimg_train([image, x_image])
81
+
82
+ # [2, 3, H, W]
83
+ image = torch.stack([image, x_image], dim=0)
84
+ return image
85
+
86
  def model_worker(
87
  rank: int, args: argparse.Namespace, barrier: mp.Barrier,
88
  request_queue: mp.Queue, response_queue: Optional[mp.Queue] = None,
 
135
  barrier.wait()
136
 
137
  while True:
138
+ img_path, audio_path, video_path, point_path, fmri_path, depth_path, depth_rgb_path, normal_path, normal_rgb_path, chatbot, max_gen_len, temperature, top_p, modality = request_queue.get()
139
  if 'image' in modality and img_path is not None:
140
  image = Image.open(img_path).convert('RGB')
141
  inputs = T_random_resized_crop(image)
 
147
  inputs = load_point(point_path)
148
  elif 'fmri' in modality and fmri_path is not None:
149
  inputs = load_fmri(fmri_path)
150
+ elif 'rgbd' in modality and depth_path is not None and depth_rgb_path is not None:
151
+ inputs = load_rgbx(depth_rgb_path, depth_path)
152
+ elif 'rgbn' in modality and normal_path is not None and normal_rgb_path is not None:
153
+ inputs = load_rgbx(normal_rgb_path, normal_path)
154
  else:
155
  inputs = None
156
 
 
216
  def show_user_input(msg, chatbot):
217
  return "", chatbot + [[msg, None]]
218
 
219
+ def stream_model_output(img_path, audio_path, video_path, point_path, fmri_path, depth_path, depth_rgb_path, normal_path, normal_rgb_path, chatbot, max_gen_len, gen_t, top_p, modality):
220
  for queue in request_queues:
221
+ queue.put((img_path, audio_path, video_path, point_path, fmri_path, depth_path, depth_rgb_path, normal_path, normal_rgb_path, chatbot, max_gen_len, gen_t, top_p, modality))
222
  while True:
223
  content_piece = response_queue.get()
224
  chatbot[-1][1] = content_piece["text"]
 
325
  examples_per_page=3,
326
  )
327
  with gr.Tab('Depth Map') as depth_tab:
328
+ depth_path = gr.Image(label='Depth Map', type='filepath')
329
+ depth_rgb_path = gr.Image(label='RGB Image', type='filepath')
330
+ gr.Examples(
331
+ examples=[
332
+ [rgb_image.replace('rgb', 'depth'), rgb_image]
333
+ for rgb_image in glob.glob("examples/depth_normal/rgb/*.png")[:9]
334
+ ],
335
+ inputs=[depth_path, depth_rgb_path]
336
+ )
337
  with gr.Tab('Normal Map') as normal_tab:
338
+ normal_path = gr.Image(label='Normal Map', type='filepath')
339
+ normal_rgb_path = gr.Image(label='RGB Image', type='filepath')
340
+ gr.Examples(
341
+ examples=[
342
+ [rgb_image.replace('rgb', 'normal'), rgb_image]
343
+ for rgb_image in glob.glob("examples/depth_normal/rgb/*.png")[-9:]
344
+ ],
345
+ inputs=[normal_path, normal_rgb_path]
346
+ )
347
  with gr.Column(scale=2):
348
  chatbot = gr.Chatbot(elem_id="chatbot")
349
  msg = gr.Textbox()
 
351
  with gr.Row():
352
  submit_button = gr.Button("Submit", variant="primary")
353
  undo_button = gr.Button("Undo")
354
+ clear_button = gr.ClearButton([chatbot, msg, img_path, audio_path, video_path, point_path, fmri_path, depth_path, depth_rgb_path, normal_path, normal_rgb_path, point_vis])
355
  with gr.Row():
356
  max_gen_len = gr.Slider(
357
  minimum=1, maximum=args.model_max_seq_len // 2,
 
372
  audio_tab.select(partial(change_modality, 'audio'), [], [modality])
373
  point_tab.select(partial(change_modality, 'point'), [], [modality])
374
  fmri_tab.select(partial(change_modality, 'fmri'), [], [modality])
375
+ depth_tab.select(partial(change_modality, 'rgbd'), [], [modality])
376
+ normal_tab.select(partial(change_modality, 'rgbn'), [], [modality])
377
 
378
  msg.submit(
379
  show_user_input, [msg, chatbot], [msg, chatbot],
380
  ).then(
381
+ stream_model_output, [img_path, audio_path, video_path, point_path, fmri_path, depth_path, depth_rgb_path, normal_path, normal_rgb_path, chatbot, max_gen_len, gen_t, top_p, modality], chatbot,
382
  )
383
  submit_button.click(
384
  show_user_input, [msg, chatbot], [msg, chatbot],
385
  ).then(
386
+ stream_model_output, [img_path, audio_path, video_path, point_path, fmri_path, depth_path, depth_rgb_path, normal_path, normal_rgb_path, chatbot, max_gen_len, gen_t, top_p, modality], chatbot,
387
  )
388
  undo_button.click(undo, chatbot, chatbot)
389
  # img_path.change(clear, [], [chatbot, msg])
examples/depth_normal/depth/0036.png ADDED
examples/depth_normal/depth/0125.png ADDED
examples/depth_normal/depth/0166.png ADDED
examples/depth_normal/depth/0168.png ADDED
examples/depth_normal/depth/0211.png ADDED
examples/depth_normal/depth/0278.png ADDED
examples/depth_normal/depth/0282.png ADDED
examples/depth_normal/depth/0331.png ADDED
examples/depth_normal/depth/0384.png ADDED
examples/depth_normal/depth/0432.png ADDED
examples/depth_normal/depth/0444.png ADDED
examples/depth_normal/depth/0475.png ADDED
examples/depth_normal/depth/0476.png ADDED
examples/depth_normal/depth/0517.png ADDED
examples/depth_normal/depth/0523.png ADDED
examples/depth_normal/depth/0524.png ADDED
examples/depth_normal/depth/0536.png ADDED
examples/depth_normal/depth/0561.png ADDED
examples/depth_normal/depth/0565.png ADDED
examples/depth_normal/depth/0590.png ADDED
examples/depth_normal/depth/0618.png ADDED
examples/depth_normal/depth/0716.png ADDED
examples/depth_normal/depth/0724.png ADDED
examples/depth_normal/depth/0758.png ADDED
examples/depth_normal/depth/0759.png ADDED
examples/depth_normal/depth/0767.png ADDED
examples/depth_normal/depth/0840.png ADDED
examples/depth_normal/depth/0849.png ADDED
examples/depth_normal/depth/0857.png ADDED
examples/depth_normal/depth/0870.png ADDED
examples/depth_normal/depth/0905.png ADDED
examples/depth_normal/depth/0993.png ADDED
examples/depth_normal/depth/1038.png ADDED
examples/depth_normal/depth/1074.png ADDED
examples/depth_normal/depth/1099.png ADDED
examples/depth_normal/depth/1101.png ADDED
examples/depth_normal/depth/1146.png ADDED
examples/depth_normal/depth/1148.png ADDED
examples/depth_normal/depth/1165.png ADDED
examples/depth_normal/depth/1173.png ADDED
examples/depth_normal/depth/1193.png ADDED
examples/depth_normal/depth/1225.png ADDED
examples/depth_normal/depth/1257.png ADDED
examples/depth_normal/depth/1291.png ADDED
examples/depth_normal/depth/1294.png ADDED
examples/depth_normal/depth/1346.png ADDED
examples/depth_normal/depth/1389.png ADDED
examples/depth_normal/depth/1398.png ADDED
examples/depth_normal/depth/1407.png ADDED