Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- app.py +59 -10
- examples/depth_normal/depth/0036.png +0 -0
- examples/depth_normal/depth/0125.png +0 -0
- examples/depth_normal/depth/0166.png +0 -0
- examples/depth_normal/depth/0168.png +0 -0
- examples/depth_normal/depth/0211.png +0 -0
- examples/depth_normal/depth/0278.png +0 -0
- examples/depth_normal/depth/0282.png +0 -0
- examples/depth_normal/depth/0331.png +0 -0
- examples/depth_normal/depth/0384.png +0 -0
- examples/depth_normal/depth/0432.png +0 -0
- examples/depth_normal/depth/0444.png +0 -0
- examples/depth_normal/depth/0475.png +0 -0
- examples/depth_normal/depth/0476.png +0 -0
- examples/depth_normal/depth/0517.png +0 -0
- examples/depth_normal/depth/0523.png +0 -0
- examples/depth_normal/depth/0524.png +0 -0
- examples/depth_normal/depth/0536.png +0 -0
- examples/depth_normal/depth/0561.png +0 -0
- examples/depth_normal/depth/0565.png +0 -0
- examples/depth_normal/depth/0590.png +0 -0
- examples/depth_normal/depth/0618.png +0 -0
- examples/depth_normal/depth/0716.png +0 -0
- examples/depth_normal/depth/0724.png +0 -0
- examples/depth_normal/depth/0758.png +0 -0
- examples/depth_normal/depth/0759.png +0 -0
- examples/depth_normal/depth/0767.png +0 -0
- examples/depth_normal/depth/0840.png +0 -0
- examples/depth_normal/depth/0849.png +0 -0
- examples/depth_normal/depth/0857.png +0 -0
- examples/depth_normal/depth/0870.png +0 -0
- examples/depth_normal/depth/0905.png +0 -0
- examples/depth_normal/depth/0993.png +0 -0
- examples/depth_normal/depth/1038.png +0 -0
- examples/depth_normal/depth/1074.png +0 -0
- examples/depth_normal/depth/1099.png +0 -0
- examples/depth_normal/depth/1101.png +0 -0
- examples/depth_normal/depth/1146.png +0 -0
- examples/depth_normal/depth/1148.png +0 -0
- examples/depth_normal/depth/1165.png +0 -0
- examples/depth_normal/depth/1173.png +0 -0
- examples/depth_normal/depth/1193.png +0 -0
- examples/depth_normal/depth/1225.png +0 -0
- examples/depth_normal/depth/1257.png +0 -0
- examples/depth_normal/depth/1291.png +0 -0
- examples/depth_normal/depth/1294.png +0 -0
- examples/depth_normal/depth/1346.png +0 -0
- examples/depth_normal/depth/1389.png +0 -0
- examples/depth_normal/depth/1398.png +0 -0
- examples/depth_normal/depth/1407.png +0 -0
app.py
CHANGED
@@ -25,7 +25,7 @@ import plotly.graph_objects as go
|
|
25 |
from data.fintune_dataset import pc_norm
|
26 |
from functools import partial
|
27 |
import glob
|
28 |
-
|
29 |
|
30 |
T_random_resized_crop = transforms.Compose([
|
31 |
transforms.RandomResizedCrop(size=(224, 224), scale=(0.9, 1.0), ratio=(0.75, 1.3333), interpolation=3,
|
@@ -33,6 +33,23 @@ T_random_resized_crop = transforms.Compose([
|
|
33 |
transforms.ToTensor(),
|
34 |
transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])])
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
def load_audio(audio_path):
|
38 |
fbank = make_audio_features(audio_path, mel_bins=128)
|
@@ -55,6 +72,17 @@ def load_fmri(fmri_path):
|
|
55 |
data = torch.tensor(data[None])
|
56 |
return data
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
def model_worker(
|
59 |
rank: int, args: argparse.Namespace, barrier: mp.Barrier,
|
60 |
request_queue: mp.Queue, response_queue: Optional[mp.Queue] = None,
|
@@ -107,7 +135,7 @@ def model_worker(
|
|
107 |
barrier.wait()
|
108 |
|
109 |
while True:
|
110 |
-
img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, temperature, top_p, modality = request_queue.get()
|
111 |
if 'image' in modality and img_path is not None:
|
112 |
image = Image.open(img_path).convert('RGB')
|
113 |
inputs = T_random_resized_crop(image)
|
@@ -119,6 +147,10 @@ def model_worker(
|
|
119 |
inputs = load_point(point_path)
|
120 |
elif 'fmri' in modality and fmri_path is not None:
|
121 |
inputs = load_fmri(fmri_path)
|
|
|
|
|
|
|
|
|
122 |
else:
|
123 |
inputs = None
|
124 |
|
@@ -184,9 +216,9 @@ def gradio_worker(
|
|
184 |
def show_user_input(msg, chatbot):
|
185 |
return "", chatbot + [[msg, None]]
|
186 |
|
187 |
-
def stream_model_output(img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, gen_t, top_p, modality):
|
188 |
for queue in request_queues:
|
189 |
-
queue.put((img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, gen_t, top_p, modality))
|
190 |
while True:
|
191 |
content_piece = response_queue.get()
|
192 |
chatbot[-1][1] = content_piece["text"]
|
@@ -293,10 +325,25 @@ def gradio_worker(
|
|
293 |
examples_per_page=3,
|
294 |
)
|
295 |
with gr.Tab('Depth Map') as depth_tab:
|
296 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
297 |
with gr.Tab('Normal Map') as normal_tab:
|
298 |
-
gr.
|
299 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
with gr.Column(scale=2):
|
301 |
chatbot = gr.Chatbot(elem_id="chatbot")
|
302 |
msg = gr.Textbox()
|
@@ -304,7 +351,7 @@ def gradio_worker(
|
|
304 |
with gr.Row():
|
305 |
submit_button = gr.Button("Submit", variant="primary")
|
306 |
undo_button = gr.Button("Undo")
|
307 |
-
clear_button = gr.ClearButton([chatbot, msg, img_path, audio_path, video_path, point_path, fmri_path, point_vis])
|
308 |
with gr.Row():
|
309 |
max_gen_len = gr.Slider(
|
310 |
minimum=1, maximum=args.model_max_seq_len // 2,
|
@@ -325,16 +372,18 @@ def gradio_worker(
|
|
325 |
audio_tab.select(partial(change_modality, 'audio'), [], [modality])
|
326 |
point_tab.select(partial(change_modality, 'point'), [], [modality])
|
327 |
fmri_tab.select(partial(change_modality, 'fmri'), [], [modality])
|
|
|
|
|
328 |
|
329 |
msg.submit(
|
330 |
show_user_input, [msg, chatbot], [msg, chatbot],
|
331 |
).then(
|
332 |
-
stream_model_output, [img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, gen_t, top_p, modality], chatbot,
|
333 |
)
|
334 |
submit_button.click(
|
335 |
show_user_input, [msg, chatbot], [msg, chatbot],
|
336 |
).then(
|
337 |
-
stream_model_output, [img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, gen_t, top_p, modality], chatbot,
|
338 |
)
|
339 |
undo_button.click(undo, chatbot, chatbot)
|
340 |
# img_path.change(clear, [], [chatbot, msg])
|
|
|
25 |
from data.fintune_dataset import pc_norm
|
26 |
from functools import partial
|
27 |
import glob
|
28 |
+
import torchvision.transforms.functional as F
|
29 |
|
30 |
T_random_resized_crop = transforms.Compose([
|
31 |
transforms.RandomResizedCrop(size=(224, 224), scale=(0.9, 1.0), ratio=(0.75, 1.3333), interpolation=3,
|
|
|
33 |
transforms.ToTensor(),
|
34 |
transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])])
|
35 |
|
36 |
+
class PairRandomResizedCrop(transforms.RandomResizedCrop):
|
37 |
+
def forward(self, imgs):
|
38 |
+
i, j, h, w = self.get_params(imgs[0], self.scale, self.ratio)
|
39 |
+
return [F.resized_crop(img, i, j, h, w, self.size, self.interpolation, antialias=self.antialias) for img in imgs]
|
40 |
+
|
41 |
+
class PairToTensor(transforms.ToTensor):
|
42 |
+
def __call__(self, pics):
|
43 |
+
return [F.to_tensor(pic) for pic in pics]
|
44 |
+
|
45 |
+
class PairNormalize(transforms.Normalize):
|
46 |
+
def forward(self, tensors):
|
47 |
+
return [F.normalize(tensor, self.mean, self.std, self.inplace) for tensor in tensors]
|
48 |
+
|
49 |
+
transform_pairimg_train = transforms.Compose([
|
50 |
+
PairRandomResizedCrop(size=(224, 224), scale=(0.99, 1.0), ratio=(0.75, 1.3333), interpolation=3, antialias=None), # 3 is bicubic
|
51 |
+
PairToTensor(),
|
52 |
+
PairNormalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])])
|
53 |
|
54 |
def load_audio(audio_path):
|
55 |
fbank = make_audio_features(audio_path, mel_bins=128)
|
|
|
72 |
data = torch.tensor(data[None])
|
73 |
return data
|
74 |
|
75 |
+
def load_rgbx(image_path, x_image_path):
|
76 |
+
image = Image.open(image_path).convert('RGB')
|
77 |
+
x_image = Image.open(x_image_path).convert('RGB')
|
78 |
+
x_image = x_image.resize(image.size[-2:])
|
79 |
+
|
80 |
+
image, x_image = transform_pairimg_train([image, x_image])
|
81 |
+
|
82 |
+
# [2, 3, H, W]
|
83 |
+
image = torch.stack([image, x_image], dim=0)
|
84 |
+
return image
|
85 |
+
|
86 |
def model_worker(
|
87 |
rank: int, args: argparse.Namespace, barrier: mp.Barrier,
|
88 |
request_queue: mp.Queue, response_queue: Optional[mp.Queue] = None,
|
|
|
135 |
barrier.wait()
|
136 |
|
137 |
while True:
|
138 |
+
img_path, audio_path, video_path, point_path, fmri_path, depth_path, depth_rgb_path, normal_path, normal_rgb_path, chatbot, max_gen_len, temperature, top_p, modality = request_queue.get()
|
139 |
if 'image' in modality and img_path is not None:
|
140 |
image = Image.open(img_path).convert('RGB')
|
141 |
inputs = T_random_resized_crop(image)
|
|
|
147 |
inputs = load_point(point_path)
|
148 |
elif 'fmri' in modality and fmri_path is not None:
|
149 |
inputs = load_fmri(fmri_path)
|
150 |
+
elif 'rgbd' in modality and depth_path is not None and depth_rgb_path is not None:
|
151 |
+
inputs = load_rgbx(depth_rgb_path, depth_path)
|
152 |
+
elif 'rgbn' in modality and normal_path is not None and normal_rgb_path is not None:
|
153 |
+
inputs = load_rgbx(normal_rgb_path, normal_path)
|
154 |
else:
|
155 |
inputs = None
|
156 |
|
|
|
216 |
def show_user_input(msg, chatbot):
|
217 |
return "", chatbot + [[msg, None]]
|
218 |
|
219 |
+
def stream_model_output(img_path, audio_path, video_path, point_path, fmri_path, depth_path, depth_rgb_path, normal_path, normal_rgb_path, chatbot, max_gen_len, gen_t, top_p, modality):
|
220 |
for queue in request_queues:
|
221 |
+
queue.put((img_path, audio_path, video_path, point_path, fmri_path, depth_path, depth_rgb_path, normal_path, normal_rgb_path, chatbot, max_gen_len, gen_t, top_p, modality))
|
222 |
while True:
|
223 |
content_piece = response_queue.get()
|
224 |
chatbot[-1][1] = content_piece["text"]
|
|
|
325 |
examples_per_page=3,
|
326 |
)
|
327 |
with gr.Tab('Depth Map') as depth_tab:
|
328 |
+
depth_path = gr.Image(label='Depth Map', type='filepath')
|
329 |
+
depth_rgb_path = gr.Image(label='RGB Image', type='filepath')
|
330 |
+
gr.Examples(
|
331 |
+
examples=[
|
332 |
+
[rgb_image.replace('rgb', 'depth'), rgb_image]
|
333 |
+
for rgb_image in glob.glob("examples/depth_normal/rgb/*.png")[:9]
|
334 |
+
],
|
335 |
+
inputs=[depth_path, depth_rgb_path]
|
336 |
+
)
|
337 |
with gr.Tab('Normal Map') as normal_tab:
|
338 |
+
normal_path = gr.Image(label='Normal Map', type='filepath')
|
339 |
+
normal_rgb_path = gr.Image(label='RGB Image', type='filepath')
|
340 |
+
gr.Examples(
|
341 |
+
examples=[
|
342 |
+
[rgb_image.replace('rgb', 'normal'), rgb_image]
|
343 |
+
for rgb_image in glob.glob("examples/depth_normal/rgb/*.png")[-9:]
|
344 |
+
],
|
345 |
+
inputs=[normal_path, normal_rgb_path]
|
346 |
+
)
|
347 |
with gr.Column(scale=2):
|
348 |
chatbot = gr.Chatbot(elem_id="chatbot")
|
349 |
msg = gr.Textbox()
|
|
|
351 |
with gr.Row():
|
352 |
submit_button = gr.Button("Submit", variant="primary")
|
353 |
undo_button = gr.Button("Undo")
|
354 |
+
clear_button = gr.ClearButton([chatbot, msg, img_path, audio_path, video_path, point_path, fmri_path, depth_path, depth_rgb_path, normal_path, normal_rgb_path, point_vis])
|
355 |
with gr.Row():
|
356 |
max_gen_len = gr.Slider(
|
357 |
minimum=1, maximum=args.model_max_seq_len // 2,
|
|
|
372 |
audio_tab.select(partial(change_modality, 'audio'), [], [modality])
|
373 |
point_tab.select(partial(change_modality, 'point'), [], [modality])
|
374 |
fmri_tab.select(partial(change_modality, 'fmri'), [], [modality])
|
375 |
+
depth_tab.select(partial(change_modality, 'rgbd'), [], [modality])
|
376 |
+
normal_tab.select(partial(change_modality, 'rgbn'), [], [modality])
|
377 |
|
378 |
msg.submit(
|
379 |
show_user_input, [msg, chatbot], [msg, chatbot],
|
380 |
).then(
|
381 |
+
stream_model_output, [img_path, audio_path, video_path, point_path, fmri_path, depth_path, depth_rgb_path, normal_path, normal_rgb_path, chatbot, max_gen_len, gen_t, top_p, modality], chatbot,
|
382 |
)
|
383 |
submit_button.click(
|
384 |
show_user_input, [msg, chatbot], [msg, chatbot],
|
385 |
).then(
|
386 |
+
stream_model_output, [img_path, audio_path, video_path, point_path, fmri_path, depth_path, depth_rgb_path, normal_path, normal_rgb_path, chatbot, max_gen_len, gen_t, top_p, modality], chatbot,
|
387 |
)
|
388 |
undo_button.click(undo, chatbot, chatbot)
|
389 |
# img_path.change(clear, [], [chatbot, msg])
|
examples/depth_normal/depth/0036.png
ADDED
![]() |
examples/depth_normal/depth/0125.png
ADDED
![]() |
examples/depth_normal/depth/0166.png
ADDED
![]() |
examples/depth_normal/depth/0168.png
ADDED
![]() |
examples/depth_normal/depth/0211.png
ADDED
![]() |
examples/depth_normal/depth/0278.png
ADDED
![]() |
examples/depth_normal/depth/0282.png
ADDED
![]() |
examples/depth_normal/depth/0331.png
ADDED
![]() |
examples/depth_normal/depth/0384.png
ADDED
![]() |
examples/depth_normal/depth/0432.png
ADDED
![]() |
examples/depth_normal/depth/0444.png
ADDED
![]() |
examples/depth_normal/depth/0475.png
ADDED
![]() |
examples/depth_normal/depth/0476.png
ADDED
![]() |
examples/depth_normal/depth/0517.png
ADDED
![]() |
examples/depth_normal/depth/0523.png
ADDED
![]() |
examples/depth_normal/depth/0524.png
ADDED
![]() |
examples/depth_normal/depth/0536.png
ADDED
![]() |
examples/depth_normal/depth/0561.png
ADDED
![]() |
examples/depth_normal/depth/0565.png
ADDED
![]() |
examples/depth_normal/depth/0590.png
ADDED
![]() |
examples/depth_normal/depth/0618.png
ADDED
![]() |
examples/depth_normal/depth/0716.png
ADDED
![]() |
examples/depth_normal/depth/0724.png
ADDED
![]() |
examples/depth_normal/depth/0758.png
ADDED
![]() |
examples/depth_normal/depth/0759.png
ADDED
![]() |
examples/depth_normal/depth/0767.png
ADDED
![]() |
examples/depth_normal/depth/0840.png
ADDED
![]() |
examples/depth_normal/depth/0849.png
ADDED
![]() |
examples/depth_normal/depth/0857.png
ADDED
![]() |
examples/depth_normal/depth/0870.png
ADDED
![]() |
examples/depth_normal/depth/0905.png
ADDED
![]() |
examples/depth_normal/depth/0993.png
ADDED
![]() |
examples/depth_normal/depth/1038.png
ADDED
![]() |
examples/depth_normal/depth/1074.png
ADDED
![]() |
examples/depth_normal/depth/1099.png
ADDED
![]() |
examples/depth_normal/depth/1101.png
ADDED
![]() |
examples/depth_normal/depth/1146.png
ADDED
![]() |
examples/depth_normal/depth/1148.png
ADDED
![]() |
examples/depth_normal/depth/1165.png
ADDED
![]() |
examples/depth_normal/depth/1173.png
ADDED
![]() |
examples/depth_normal/depth/1193.png
ADDED
![]() |
examples/depth_normal/depth/1225.png
ADDED
![]() |
examples/depth_normal/depth/1257.png
ADDED
![]() |
examples/depth_normal/depth/1291.png
ADDED
![]() |
examples/depth_normal/depth/1294.png
ADDED
![]() |
examples/depth_normal/depth/1346.png
ADDED
![]() |
examples/depth_normal/depth/1389.png
ADDED
![]() |
examples/depth_normal/depth/1398.png
ADDED
![]() |
examples/depth_normal/depth/1407.png
ADDED
![]() |