yerang commited on
Commit
35ff6ed
โ€ข
1 Parent(s): d0abe82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +281 -264
app.py CHANGED
@@ -22,7 +22,8 @@ from elevenlabs_utils import ElevenLabsPipeline
22
  from setup_environment import initialize_environment
23
  from src.utils.video import extract_audio
24
  #from flux_dev import create_flux_tab
25
- from diffusers import FluxPipeline
 
26
 
27
  # import gdown
28
  # folder_url = f"https://drive.google.com/drive/folders/1UtKgzKjFAOmZkhNK-OYT0caJ_w2XAnib"
@@ -30,73 +31,73 @@ from diffusers import FluxPipeline
30
 
31
 
32
 
33
- #========================= # FLUX ๋ชจ๋ธ ๋กœ๋“œ ์„ค์ •
34
- flux_pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16)
35
- flux_pipe.enable_sequential_cpu_offload()
36
- flux_pipe.vae.enable_slicing()
37
- flux_pipe.vae.enable_tiling()
38
- flux_pipe.to(torch.float16)
39
-
40
-
41
- @spaces.GPU(duration=120)
42
- def generate_image(prompt, guidance_scale, width, height):
43
- # ์ด๋ฏธ์ง€๋ฅผ ์ƒ์„ฑํ•˜๋Š” ํ•จ์ˆ˜
44
- output_image = flux_pipe(
45
- prompt=prompt,
46
- guidance_scale=guidance_scale,
47
- height=height,
48
- width=width,
49
- num_inference_steps=4,
50
- max_sequence_length=256,
51
- ).images[0]
52
-
53
- # ๊ฒฐ๊ณผ ํด๋” ์ƒ์„ฑ
54
- result_folder = "/tmp/flux/"
55
- os.makedirs(result_folder, exist_ok=True)
56
-
57
- # ํŒŒ์ผ ์ด๋ฆ„ ์ƒ์„ฑ
58
- timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
59
- #filename = f"{prompt.replace(' ', '_')}_{timestamp}.png"
60
- filename = f"{'_'.join(prompt.split()[:3])}_{timestamp}.png"
61
- output_path = os.path.join(result_folder, filename)
62
-
63
- # # ์ด๋ฏธ์ง€๋ฅผ ์ €์žฅ
64
- # output_image.save(output_path)
65
-
66
- return output_image, output_path # ๋‘ ๊ฐœ์˜ ์ถœ๋ ฅ ๋ฐ˜ํ™˜
67
-
68
- def flux_tab(): #image_input): # image_input์„ ์ธ์ž๋กœ ๋ฐ›์Šต๋‹ˆ๋‹ค.
69
- with gr.Tab("FLUX ์ด๋ฏธ์ง€ ์ƒ์„ฑ"):
70
- with gr.Row():
71
- with gr.Column():
72
- # ์‚ฌ์šฉ์ž ์ž…๋ ฅ ์„ค์ •
73
- prompt = gr.Textbox(label="Prompt", value="A cat holding a sign that says hello world")
74
- guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=20.0, value=3.5, step=0.1)
75
- width = gr.Slider(label="Width", minimum=256, maximum=2048, value=512, step=64)
76
- height = gr.Slider(label="Height", minimum=256, maximum=2048, value=512, step=64)
77
-
78
- with gr.Column():
79
- # ์ถœ๋ ฅ ์ด๋ฏธ์ง€์™€ ๋‹ค์šด๋กœ๋“œ ๋ฒ„ํŠผ
80
- output_image = gr.Image(type="pil", label="Output")
81
- download_button = gr.File(label="Download")
82
- generate_button = gr.Button("์ด๋ฏธ์ง€ ์ƒ์„ฑ")
83
- #use_in_text2lipsync_button = gr.Button("์ด ์ด๋ฏธ์ง€๋ฅผ Text2Lipsync์—์„œ ์‚ฌ์šฉํ•˜๊ธฐ") # ์ƒˆ๋กœ์šด ๋ฒ„ํŠผ ์ถ”๊ฐ€
84
-
85
- # ํด๋ฆญ ์ด๋ฒคํŠธ๋ฅผ ์ •์˜
86
- generate_button.click(
87
- fn=generate_image,
88
- inputs=[prompt, guidance_scale, width, height],
89
- outputs=[output_image, download_button]
90
- )
91
 
92
- # # ์ƒˆ๋กœ์šด ๋ฒ„ํŠผ ํด๋ฆญ ์ด๋ฒคํŠธ ์ •์˜
93
- # use_in_text2lipsync_button.click(
94
- # fn=lambda img: img, # ๊ฐ„๋‹จํ•œ ๋žŒ๋‹ค ํ•จ์ˆ˜๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ด๋ฏธ์ง€๋ฅผ ๊ทธ๋Œ€๋กœ ์ „๋‹ฌ
95
- # inputs=[output_image], # ์ƒ์„ฑ๋œ ์ด๋ฏธ์ง€๋ฅผ ์ž…๋ ฅ์œผ๋กœ ์‚ฌ์šฉ
96
- # outputs=[image_input] # Text to LipSync ํƒญ์˜ image_input์„ ์—…๋ฐ์ดํŠธ
97
- # )
98
 
99
- #========================= # FLUX ๋ชจ๋ธ ๋กœ๋“œ ์„ค์ •
100
 
101
  initialize_environment()
102
 
@@ -124,235 +125,251 @@ from stf_utils import STFPipeline
124
 
125
 
126
 
127
- audio_path="assets/examples/driving/test_aud.mp3"
128
  #audio_path_component = gr.Textbox(label="Input", value="assets/examples/driving/test_aud.mp3")
129
 
130
-
131
- @spaces.GPU(duration=120)
132
- def gpu_wrapped_stf_pipeline_execute(audio_path):
133
- return stf_pipeline.execute(audio_path)
134
 
135
 
136
- ###### ํ…Œ์ŠคํŠธ์ค‘ ######
137
 
138
 
139
- stf_pipeline = STFPipeline()
140
- driving_video_path=gr.Video()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  # set tyro theme
143
  tyro.extras.set_accent_color("bright_cyan")
144
  args = tyro.cli(ArgumentConfig)
145
 
146
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
147
- with gr.Row():
148
- audio_path_component = gr.Textbox(label="Input", value="assets/examples/driving/test_aud.mp3")
149
- stf_button = gr.Button("stf test", variant="primary")
150
- stf_button.click(
151
- fn=gpu_wrapped_stf_pipeline_execute,
152
- inputs=[
153
- audio_path_component
154
- ],
155
- outputs=[driving_video_path]
156
- )
157
- with gr.Row():
158
- driving_video_path.render()
159
 
160
- with gr.Row():
161
- flux_tab() # image_input์„ flux_tab์— ์ „๋‹ฌํ•ฉ๋‹ˆ๋‹ค.
 
 
 
162
 
 
 
 
 
163
 
 
 
 
164
 
165
 
166
- # def partial_fields(target_class, kwargs):
167
- # return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})
 
168
 
169
- # # set tyro theme
170
- # tyro.extras.set_accent_color("bright_cyan")
171
- # args = tyro.cli(ArgumentConfig)
172
 
173
- # # specify configs for inference
174
- # inference_cfg = partial_fields(InferenceConfig, args.__dict__) # use attribute of args to initial InferenceConfig
175
- # crop_cfg = partial_fields(CropConfig, args.__dict__) # use attribute of args to initial CropConfig
176
 
177
- # gradio_pipeline = GradioPipeline(
178
- # inference_cfg=inference_cfg,
179
- # crop_cfg=crop_cfg,
180
- # args=args
181
- # )
182
 
183
- # # ์ถ”๊ฐ€ ์ •์˜
184
- # elevenlabs_pipeline = ElevenLabsPipeline()
 
185
 
186
- # @spaces.GPU(duration=200)
187
- # def gpu_wrapped_elevenlabs_pipeline_generate_voice(text, voice):
188
- # return elevenlabs_pipeline.generate_voice(text, voice)
189
 
 
 
190
 
191
-
 
 
192
 
193
- # @spaces.GPU(duration=240)
194
- # def gpu_wrapped_execute_video(*args, **kwargs):
195
- # return gradio_pipeline.execute_video(*args, **kwargs)
196
-
197
- # @spaces.GPU(duration=240)
198
- # def gpu_wrapped_execute_image(*args, **kwargs):
199
- # return gradio_pipeline.execute_image(*args, **kwargs)
200
-
201
- # def is_square_video(video_path):
202
- # video = cv2.VideoCapture(video_path)
203
-
204
- # width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
205
- # height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
206
-
207
- # video.release()
208
- # if width != height:
209
- # raise gr.Error("Error: the video does not have a square aspect ratio. We currently only support square videos")
210
-
211
- # return gr.update(visible=True)
212
-
213
- # # assets
214
- # title_md = "assets/gradio_title.md"
215
- # example_portrait_dir = "assets/examples/source"
216
- # example_video_dir = "assets/examples/driving"
217
- # data_examples = [
218
- # [osp.join(example_portrait_dir, "s9.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
219
- # [osp.join(example_portrait_dir, "s6.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
220
- # [osp.join(example_portrait_dir, "s10.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
221
- # [osp.join(example_portrait_dir, "s5.jpg"), osp.join(example_video_dir, "d18.mp4"), True, True, True, True],
222
- # [osp.join(example_portrait_dir, "s7.jpg"), osp.join(example_video_dir, "d19.mp4"), True, True, True, True],
223
- # [osp.join(example_portrait_dir, "s22.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
224
- # ]
225
- # #################### interface logic ####################
226
-
227
- # # Define components first
228
- # eye_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target eyes-open ratio")
229
- # lip_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target lip-open ratio")
230
- # retargeting_input_image = gr.Image(type="filepath")
231
- # output_image = gr.Image(type="numpy")
232
- # output_image_paste_back = gr.Image(type="numpy")
233
- # output_video = gr.Video()
234
- # output_video_concat = gr.Video()
235
 
236
- # with gr.Blocks(theme=gr.themes.Soft()) as demo:
237
- # #gr.HTML(load_description(title_md))
238
-
239
- # with gr.Tabs():
240
- # with gr.Tab("Text to LipSync"):
241
- # gr.Markdown("# Text to LipSync")
242
- # with gr.Row():
243
- # with gr.Column():
244
- # script_txt = gr.Text()
245
- # with gr.Column():
246
- # audio_gen_button = gr.Button("Audio generation", variant="primary")
247
- # with gr.Row():
248
- # output_audio_path = gr.Audio(label="Generated audio", type="filepath")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
 
250
- # gr.Markdown(load_description("assets/gradio_description_upload.md"))
251
- # with gr.Row():
252
- # with gr.Accordion(open=True, label="Source Portrait"):
253
- # image_input = gr.Image(type="filepath")
254
- # gr.Examples(
255
- # examples=[
256
- # [osp.join(example_portrait_dir, "s9.jpg")],
257
- # [osp.join(example_portrait_dir, "s6.jpg")],
258
- # [osp.join(example_portrait_dir, "s10.jpg")],
259
- # [osp.join(example_portrait_dir, "s5.jpg")],
260
- # [osp.join(example_portrait_dir, "s7.jpg")],
261
- # [osp.join(example_portrait_dir, "s12.jpg")],
262
- # [osp.join(example_portrait_dir, "s22.jpg")],
263
- # ],
264
- # inputs=[image_input],
265
- # cache_examples=False,
266
- # )
267
- # with gr.Accordion(open=True, label="Driving Video"):
268
- # video_input = gr.Video()
269
- # gr.Examples(
270
- # examples=[
271
- # [osp.join(example_video_dir, "d0.mp4")],
272
- # [osp.join(example_video_dir, "d18.mp4")],
273
- # [osp.join(example_video_dir, "d19.mp4")],
274
- # [osp.join(example_video_dir, "d14_trim.mp4")],
275
- # [osp.join(example_video_dir, "d6_trim.mp4")],
276
- # ],
277
- # inputs=[video_input],
278
- # cache_examples=False,
279
- # )
280
- # with gr.Row():
281
- # with gr.Accordion(open=False, label="Animation Instructions and Options"):
282
- # gr.Markdown(load_description("assets/gradio_description_animation.md"))
283
- # with gr.Row():
284
- # flag_relative_input = gr.Checkbox(value=True, label="relative motion")
285
- # flag_do_crop_input = gr.Checkbox(value=True, label="do crop")
286
- # flag_remap_input = gr.Checkbox(value=True, label="paste-back")
287
- # gr.Markdown(load_description("assets/gradio_description_animate_clear.md"))
288
- # with gr.Row():
289
- # with gr.Column():
290
- # process_button_animation = gr.Button("๐Ÿš€ Animate", variant="primary")
291
- # with gr.Column():
292
- # process_button_reset = gr.ClearButton([image_input, video_input, output_video, output_video_concat], value="๐Ÿงน Clear")
293
- # with gr.Row():
294
- # with gr.Column():
295
- # with gr.Accordion(open=True, label="The animated video in the original image space"):
296
- # output_video.render()
297
- # with gr.Column():
298
- # with gr.Accordion(open=True, label="The animated video"):
299
- # output_video_concat.render()
300
- # with gr.Row():
301
- # # Examples
302
- # gr.Markdown("## You could also choose the examples below by one click โฌ‡๏ธ")
303
- # with gr.Row():
304
- # gr.Examples(
305
- # examples=data_examples,
306
- # fn=gpu_wrapped_execute_video,
307
- # inputs=[
308
- # image_input,
309
- # video_input,
310
- # flag_relative_input,
311
- # flag_do_crop_input,
312
- # flag_remap_input
313
- # ],
314
- # outputs=[output_image, output_image_paste_back],
315
- # examples_per_page=6,
316
- # cache_examples=False,
317
- # )
318
 
319
- # process_button_animation.click(
320
- # fn=gpu_wrapped_execute_video,
321
- # inputs=[
322
- # image_input,
323
- # video_input,
324
- # flag_relative_input,
325
- # flag_do_crop_input,
326
- # flag_remap_input
327
- # ],
328
- # outputs=[output_video, output_video_concat],
329
- # show_progress=True
330
- # )
331
- # audio_gen_button.click(
332
- # fn=gpu_wrapped_elevenlabs_pipeline_generate_voice,
333
- # inputs=[
334
- # script_txt
335
- # ],
336
- # outputs=[output_audio_path],
337
- # show_progress=True
338
- # )
339
 
340
 
341
 
342
- # # image_input.change(
343
- # # fn=gradio_pipeline.prepare_retargeting,
344
- # # inputs=image_input,
345
- # # outputs=[eye_retargeting_slider, lip_retargeting_slider, retargeting_input_image]
346
- # # )
347
- # video_input.upload(
348
- # fn=is_square_video,
349
- # inputs=video_input,
350
- # outputs=video_input
351
- # )
352
 
353
- # # ์„ธ ๋ฒˆ์งธ ํƒญ: Flux ๊ฐœ๋ฐœ์šฉ ํƒญ
354
- # with gr.Tab("FLUX Dev"):
355
- # flux_demo = create_flux_tab(image_input) # Flux ๊ฐœ๋ฐœ์šฉ ํƒญ ์ƒ์„ฑ
356
 
357
  demo.launch(
358
  server_port=args.server_port,
 
22
  from setup_environment import initialize_environment
23
  from src.utils.video import extract_audio
24
  #from flux_dev import create_flux_tab
25
+ from flux_schnell import create_flux_tab
26
+ # from diffusers import FluxPipeline
27
 
28
  # import gdown
29
  # folder_url = f"https://drive.google.com/drive/folders/1UtKgzKjFAOmZkhNK-OYT0caJ_w2XAnib"
 
31
 
32
 
33
 
34
+ # #========================= # FLUX ๋ชจ๋ธ ๋กœ๋“œ ์„ค์ •
35
+ # flux_pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16)
36
+ # flux_pipe.enable_sequential_cpu_offload()
37
+ # flux_pipe.vae.enable_slicing()
38
+ # flux_pipe.vae.enable_tiling()
39
+ # flux_pipe.to(torch.float16)
40
+
41
+
42
+ # @spaces.GPU(duration=120)
43
+ # def generate_image(prompt, guidance_scale, width, height):
44
+ # # ์ด๋ฏธ์ง€๋ฅผ ์ƒ์„ฑํ•˜๋Š” ํ•จ์ˆ˜
45
+ # output_image = flux_pipe(
46
+ # prompt=prompt,
47
+ # guidance_scale=guidance_scale,
48
+ # height=height,
49
+ # width=width,
50
+ # num_inference_steps=4,
51
+ # max_sequence_length=256,
52
+ # ).images[0]
53
+
54
+ # # ๊ฒฐ๊ณผ ํด๋” ์ƒ์„ฑ
55
+ # result_folder = "/tmp/flux/"
56
+ # os.makedirs(result_folder, exist_ok=True)
57
+
58
+ # # ํŒŒ์ผ ์ด๋ฆ„ ์ƒ์„ฑ
59
+ # timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
60
+ # #filename = f"{prompt.replace(' ', '_')}_{timestamp}.png"
61
+ # filename = f"{'_'.join(prompt.split()[:3])}_{timestamp}.png"
62
+ # output_path = os.path.join(result_folder, filename)
63
+
64
+ # # # ์ด๋ฏธ์ง€๋ฅผ ์ €์žฅ
65
+ # # output_image.save(output_path)
66
+
67
+ # return output_image, output_path # ๋‘ ๊ฐœ์˜ ์ถœ๋ ฅ ๋ฐ˜ํ™˜
68
+
69
+ # def flux_tab(): #image_input): # image_input์„ ์ธ์ž๋กœ ๋ฐ›์Šต๋‹ˆ๋‹ค.
70
+ # with gr.Tab("FLUX ์ด๋ฏธ์ง€ ์ƒ์„ฑ"):
71
+ # with gr.Row():
72
+ # with gr.Column():
73
+ # # ์‚ฌ์šฉ์ž ์ž…๋ ฅ ์„ค์ •
74
+ # prompt = gr.Textbox(label="Prompt", value="A cat holding a sign that says hello world")
75
+ # guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=20.0, value=3.5, step=0.1)
76
+ # width = gr.Slider(label="Width", minimum=256, maximum=2048, value=512, step=64)
77
+ # height = gr.Slider(label="Height", minimum=256, maximum=2048, value=512, step=64)
78
+
79
+ # with gr.Column():
80
+ # # ์ถœ๋ ฅ ์ด๋ฏธ์ง€์™€ ๋‹ค์šด๋กœ๋“œ ๋ฒ„ํŠผ
81
+ # output_image = gr.Image(type="pil", label="Output")
82
+ # download_button = gr.File(label="Download")
83
+ # generate_button = gr.Button("์ด๋ฏธ์ง€ ์ƒ์„ฑ")
84
+ # #use_in_text2lipsync_button = gr.Button("์ด ์ด๋ฏธ์ง€๋ฅผ Text2Lipsync์—์„œ ์‚ฌ์šฉํ•˜๊ธฐ") # ์ƒˆ๋กœ์šด ๋ฒ„ํŠผ ์ถ”๊ฐ€
85
+
86
+ # # ํด๋ฆญ ์ด๋ฒคํŠธ๋ฅผ ์ •์˜
87
+ # generate_button.click(
88
+ # fn=generate_image,
89
+ # inputs=[prompt, guidance_scale, width, height],
90
+ # outputs=[output_image, download_button]
91
+ # )
92
 
93
+ # # # ์ƒˆ๋กœ์šด ๋ฒ„ํŠผ ํด๋ฆญ ์ด๋ฒคํŠธ ์ •์˜
94
+ # # use_in_text2lipsync_button.click(
95
+ # # fn=lambda img: img, # ๊ฐ„๋‹จํ•œ ๋žŒ๋‹ค ํ•จ์ˆ˜๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ด๋ฏธ์ง€๋ฅผ ๊ทธ๋Œ€๋กœ ์ „๋‹ฌ
96
+ # # inputs=[output_image], # ์ƒ์„ฑ๋œ ์ด๋ฏธ์ง€๋ฅผ ์ž…๋ ฅ์œผ๋กœ ์‚ฌ์šฉ
97
+ # # outputs=[image_input] # Text to LipSync ํƒญ์˜ image_input์„ ์—…๋ฐ์ดํŠธ
98
+ # # )
99
 
100
+ # #========================= # FLUX ๋ชจ๋ธ ๋กœ๋“œ ์„ค์ •
101
 
102
  initialize_environment()
103
 
 
125
 
126
 
127
 
128
+ # audio_path="assets/examples/driving/test_aud.mp3"
129
  #audio_path_component = gr.Textbox(label="Input", value="assets/examples/driving/test_aud.mp3")
130
 
131
+ # @spaces.GPU(duration=120)
132
+ # def gpu_wrapped_stf_pipeline_execute(audio_path):
133
+ # return stf_pipeline.execute(audio_path)
 
134
 
135
 
136
+ # ###### ํ…Œ์ŠคํŠธ์ค‘ ######
137
 
138
 
139
+ # stf_pipeline = STFPipeline()
140
+ # driving_video_path=gr.Video()
141
+
142
+ # # set tyro theme
143
+ # tyro.extras.set_accent_color("bright_cyan")
144
+ # args = tyro.cli(ArgumentConfig)
145
+
146
+ # with gr.Blocks(theme=gr.themes.Soft()) as demo:
147
+ # with gr.Row():
148
+ # audio_path_component = gr.Textbox(label="Input", value="assets/examples/driving/test_aud.mp3")
149
+ # stf_button = gr.Button("stf test", variant="primary")
150
+ # stf_button.click(
151
+ # fn=gpu_wrapped_stf_pipeline_execute,
152
+ # inputs=[
153
+ # audio_path_component
154
+ # ],
155
+ # outputs=[driving_video_path]
156
+ # )
157
+ # with gr.Row():
158
+ # driving_video_path.render()
159
+
160
+ # with gr.Row():
161
+ # create_flux_tab() # image_input์„ flux_tab์— ์ „๋‹ฌํ•ฉ๋‹ˆ๋‹ค.
162
+
163
+ # ###### ํ…Œ์ŠคํŠธ์ค‘ ######
164
+
165
+
166
+ def partial_fields(target_class, kwargs):
167
+ return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})
168
 
169
  # set tyro theme
170
  tyro.extras.set_accent_color("bright_cyan")
171
  args = tyro.cli(ArgumentConfig)
172
 
173
+ # specify configs for inference
174
+ inference_cfg = partial_fields(InferenceConfig, args.__dict__) # use attribute of args to initial InferenceConfig
175
+ crop_cfg = partial_fields(CropConfig, args.__dict__) # use attribute of args to initial CropConfig
 
 
 
 
 
 
 
 
 
 
176
 
177
+ gradio_pipeline = GradioPipeline(
178
+ inference_cfg=inference_cfg,
179
+ crop_cfg=crop_cfg,
180
+ args=args
181
+ )
182
 
183
+ # ์ถ”๊ฐ€ ์ •์˜
184
+ elevenlabs_pipeline = ElevenLabsPipeline()
185
+ stf_pipeline = STFPipeline()
186
+ driving_video_path=gr.Video()
187
 
188
+ @spaces.GPU(duration=120)
189
+ def gpu_wrapped_stf_pipeline_execute(audio_path):
190
+ return stf_pipeline.execute(audio_path)
191
 
192
 
193
+ @spaces.GPU(duration=200)
194
+ def gpu_wrapped_elevenlabs_pipeline_generate_voice(text, voice):
195
+ return elevenlabs_pipeline.generate_voice(text, voice)
196
 
 
 
 
197
 
 
 
 
198
 
199
+ @spaces.GPU(duration=240)
200
+ def gpu_wrapped_execute_video(*args, **kwargs):
201
+ return gradio_pipeline.execute_video(*args, **kwargs)
 
 
202
 
203
+ @spaces.GPU(duration=240)
204
+ def gpu_wrapped_execute_image(*args, **kwargs):
205
+ return gradio_pipeline.execute_image(*args, **kwargs)
206
 
207
+ def is_square_video(video_path):
208
+ video = cv2.VideoCapture(video_path)
 
209
 
210
+ width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
211
+ height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
212
 
213
+ video.release()
214
+ if width != height:
215
+ raise gr.Error("Error: the video does not have a square aspect ratio. We currently only support square videos")
216
 
217
+ return gr.update(visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
+ def txt_to_driving_video(text):
220
+ audio_path = gpu_wrapped_elevenlabs_pipeline_generate_voice(text)
221
+ driving_video_path = gpu_wrapped_stf_pipeline_execute(audio_path)
222
+ return driving_video_path
223
+
224
+ # assets
225
+ title_md = "assets/gradio_title.md"
226
+ example_portrait_dir = "assets/examples/source"
227
+ example_video_dir = "assets/examples/driving"
228
+ data_examples = [
229
+ [osp.join(example_portrait_dir, "s9.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
230
+ [osp.join(example_portrait_dir, "s6.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
231
+ [osp.join(example_portrait_dir, "s10.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
232
+ [osp.join(example_portrait_dir, "s5.jpg"), osp.join(example_video_dir, "d18.mp4"), True, True, True, True],
233
+ [osp.join(example_portrait_dir, "s7.jpg"), osp.join(example_video_dir, "d19.mp4"), True, True, True, True],
234
+ [osp.join(example_portrait_dir, "s22.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
235
+ ]
236
+ #################### interface logic ####################
237
+
238
+ # Define components first
239
+ eye_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target eyes-open ratio")
240
+ lip_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target lip-open ratio")
241
+ retargeting_input_image = gr.Image(type="filepath")
242
+ output_image = gr.Image(type="numpy")
243
+ output_image_paste_back = gr.Image(type="numpy")
244
+ output_video = gr.Video()
245
+ output_video_concat = gr.Video()
246
+
247
+ video_input = gr.Video()
248
+
249
+
250
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
251
+ #gr.HTML(load_description(title_md))
252
+
253
+ with gr.Tabs():
254
+ with gr.Tab("Text to LipSync"):
255
+ gr.Markdown("# Text to LipSync")
256
+ with gr.Row():
257
+ with gr.Column():
258
+ script_txt = gr.Text()
259
+ with gr.Column():
260
+ txt2video_gen_button = gr.Button("txt2video generation", variant="primary")
261
+
262
+ # with gr.Column():
263
+ # audio_gen_button = gr.Button("Audio generation", variant="primary")
264
+ # with gr.Row():
265
+ # video_input = gr.Audio(label="Generated video", type="filepath")
266
 
267
+ gr.Markdown(load_description("assets/gradio_description_upload.md"))
268
+ with gr.Row():
269
+ with gr.Accordion(open=True, label="Source Portrait"):
270
+ image_input = gr.Image(type="filepath")
271
+ gr.Examples(
272
+ examples=[
273
+ [osp.join(example_portrait_dir, "s9.jpg")],
274
+ [osp.join(example_portrait_dir, "s6.jpg")],
275
+ [osp.join(example_portrait_dir, "s10.jpg")],
276
+ [osp.join(example_portrait_dir, "s5.jpg")],
277
+ [osp.join(example_portrait_dir, "s7.jpg")],
278
+ [osp.join(example_portrait_dir, "s12.jpg")],
279
+ [osp.join(example_portrait_dir, "s22.jpg")],
280
+ ],
281
+ inputs=[image_input],
282
+ cache_examples=False,
283
+ )
284
+ with gr.Accordion(open=True, label="Driving Video"):
285
+ #video_input = gr.Video()
286
+ gr.Examples(
287
+ examples=[
288
+ [osp.join(example_video_dir, "d0.mp4")],
289
+ [osp.join(example_video_dir, "d18.mp4")],
290
+ [osp.join(example_video_dir, "d19.mp4")],
291
+ [osp.join(example_video_dir, "d14_trim.mp4")],
292
+ [osp.join(example_video_dir, "d6_trim.mp4")],
293
+ ],
294
+ inputs=[video_input],
295
+ cache_examples=False,
296
+ )
297
+ with gr.Row():
298
+ with gr.Accordion(open=False, label="Animation Instructions and Options"):
299
+ gr.Markdown(load_description("assets/gradio_description_animation.md"))
300
+ with gr.Row():
301
+ flag_relative_input = gr.Checkbox(value=True, label="relative motion")
302
+ flag_do_crop_input = gr.Checkbox(value=True, label="do crop")
303
+ flag_remap_input = gr.Checkbox(value=True, label="paste-back")
304
+ gr.Markdown(load_description("assets/gradio_description_animate_clear.md"))
305
+ with gr.Row():
306
+ with gr.Column():
307
+ process_button_animation = gr.Button("๐Ÿš€ Animate", variant="primary")
308
+ with gr.Column():
309
+ process_button_reset = gr.ClearButton([image_input, video_input, output_video, output_video_concat], value="๐Ÿงน Clear")
310
+ with gr.Row():
311
+ with gr.Column():
312
+ with gr.Accordion(open=True, label="The animated video in the original image space"):
313
+ output_video.render()
314
+ with gr.Column():
315
+ with gr.Accordion(open=True, label="The animated video"):
316
+ output_video_concat.render()
317
+ with gr.Row():
318
+ # Examples
319
+ gr.Markdown("## You could also choose the examples below by one click โฌ‡๏ธ")
320
+ with gr.Row():
321
+ gr.Examples(
322
+ examples=data_examples,
323
+ fn=gpu_wrapped_execute_video,
324
+ inputs=[
325
+ image_input,
326
+ video_input,
327
+ flag_relative_input,
328
+ flag_do_crop_input,
329
+ flag_remap_input
330
+ ],
331
+ outputs=[output_image, output_image_paste_back],
332
+ examples_per_page=6,
333
+ cache_examples=False,
334
+ )
335
 
336
+ process_button_animation.click(
337
+ fn=gpu_wrapped_execute_video,
338
+ inputs=[
339
+ image_input,
340
+ video_input,
341
+ flag_relative_input,
342
+ flag_do_crop_input,
343
+ flag_remap_input
344
+ ],
345
+ outputs=[output_video, output_video_concat],
346
+ show_progress=True
347
+ )
348
+ txt2video_gen_button.click(
349
+ fn=txt_to_driving_video,
350
+ inputs=[
351
+ script_txt
352
+ ],
353
+ outputs=[video_input],
354
+ show_progress=True
355
+ )
356
 
357
 
358
 
359
+ # image_input.change(
360
+ # fn=gradio_pipeline.prepare_retargeting,
361
+ # inputs=image_input,
362
+ # outputs=[eye_retargeting_slider, lip_retargeting_slider, retargeting_input_image]
363
+ # )
364
+ video_input.upload(
365
+ fn=is_square_video,
366
+ inputs=video_input,
367
+ outputs=video_input
368
+ )
369
 
370
+ # ์„ธ ๋ฒˆ์งธ ํƒญ: Flux ๊ฐœ๋ฐœ์šฉ ํƒญ
371
+ with gr.Tab("FLUX Image"):
372
+ flux_demo = create_flux_tab(image_input) # Flux ๊ฐœ๋ฐœ์šฉ ํƒญ ์ƒ์„ฑ
373
 
374
  demo.launch(
375
  server_port=args.server_port,