yerang commited on
Commit
59bd2c9
1 Parent(s): 57d5c6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +226 -293
app.py CHANGED
@@ -22,7 +22,7 @@ from elevenlabs_utils import ElevenLabsPipeline
22
  from setup_environment import initialize_environment
23
  from src.utils.video import extract_audio
24
  #from flux_dev import create_flux_tab
25
- # from flux_schnell import create_flux_tab
26
  # from diffusers import FluxPipeline
27
 
28
  # import gdown
@@ -31,73 +31,7 @@ from src.utils.video import extract_audio
31
 
32
 
33
 
34
- # #========================= # FLUX 모델 로드 설정
35
- # flux_pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16)
36
- # flux_pipe.enable_sequential_cpu_offload()
37
- # flux_pipe.vae.enable_slicing()
38
- # flux_pipe.vae.enable_tiling()
39
- # flux_pipe.to(torch.float16)
40
 
41
-
42
- # @spaces.GPU(duration=120)
43
- # def generate_image(prompt, guidance_scale, width, height):
44
- # # 이미지를 생성하는 함수
45
- # output_image = flux_pipe(
46
- # prompt=prompt,
47
- # guidance_scale=guidance_scale,
48
- # height=height,
49
- # width=width,
50
- # num_inference_steps=4,
51
- # max_sequence_length=256,
52
- # ).images[0]
53
-
54
- # # 결과 폴더 생성
55
- # result_folder = "/tmp/flux/"
56
- # os.makedirs(result_folder, exist_ok=True)
57
-
58
- # # 파일 이름 생성
59
- # timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
60
- # #filename = f"{prompt.replace(' ', '_')}_{timestamp}.png"
61
- # filename = f"{'_'.join(prompt.split()[:3])}_{timestamp}.png"
62
- # output_path = os.path.join(result_folder, filename)
63
-
64
- # # # 이미지를 저장
65
- # # output_image.save(output_path)
66
-
67
- # return output_image, output_path # 두 개의 출력 반환
68
-
69
- # def flux_tab(): #image_input): # image_input을 인자로 받습니다.
70
- # with gr.Tab("FLUX 이미지 생성"):
71
- # with gr.Row():
72
- # with gr.Column():
73
- # # 사용자 입력 설정
74
- # prompt = gr.Textbox(label="Prompt", value="A cat holding a sign that says hello world")
75
- # guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=20.0, value=3.5, step=0.1)
76
- # width = gr.Slider(label="Width", minimum=256, maximum=2048, value=512, step=64)
77
- # height = gr.Slider(label="Height", minimum=256, maximum=2048, value=512, step=64)
78
-
79
- # with gr.Column():
80
- # # 출력 이미지와 다운로드 버튼
81
- # output_image = gr.Image(type="pil", label="Output")
82
- # download_button = gr.File(label="Download")
83
- # generate_button = gr.Button("이미지 생성")
84
- # #use_in_text2lipsync_button = gr.Button("이 이미지를 Text2Lipsync에서 사용하기") # 새로운 버튼 추가
85
-
86
- # # 클릭 이벤트를 정의
87
- # generate_button.click(
88
- # fn=generate_image,
89
- # inputs=[prompt, guidance_scale, width, height],
90
- # outputs=[output_image, download_button]
91
- # )
92
-
93
- # # # 새로운 버튼 클릭 이벤트 정의
94
- # # use_in_text2lipsync_button.click(
95
- # # fn=lambda img: img, # 간단한 람다 함수를 사용하여 이미지를 그대로 전달
96
- # # inputs=[output_image], # 생성된 이미지를 입력으로 사용
97
- # # outputs=[image_input] # Text to LipSync 탭의 image_input을 업데이트
98
- # # )
99
-
100
- # #========================= # FLUX 모델 로드 설정
101
 
102
  initialize_environment()
103
 
@@ -128,272 +62,271 @@ from stf_utils import STFPipeline
128
  # audio_path="assets/examples/driving/test_aud.mp3"
129
  #audio_path_component = gr.Textbox(label="Input", value="assets/examples/driving/test_aud.mp3")
130
 
131
- @spaces.GPU(duration=120)
132
- def gpu_wrapped_stf_pipeline_execute(audio_path):
133
- return stf_pipeline.execute(audio_path)
134
 
135
 
136
- ###### 테스트중 ######
137
 
138
 
139
- stf_pipeline = STFPipeline()
140
- driving_video_path=gr.Video()
141
 
142
- # set tyro theme
143
- tyro.extras.set_accent_color("bright_cyan")
144
- args = tyro.cli(ArgumentConfig)
145
 
146
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
147
- with gr.Row():
148
- audio_path_component = gr.Textbox(label="Input", value="assets/examples/driving/test_aud.mp3")
149
- stf_button = gr.Button("stf test", variant="primary")
150
- stf_button.click(
151
- fn=gpu_wrapped_stf_pipeline_execute,
152
- inputs=[
153
- audio_path_component
154
- ],
155
- outputs=[driving_video_path]
156
- )
157
- with gr.Row():
158
- driving_video_path.render()
159
 
160
- # with gr.Row():
161
- # create_flux_tab() # image_input을 flux_tab에 전달합니다.
162
 
163
- ###### 테스트중 ######
164
 
165
 
166
- # def partial_fields(target_class, kwargs):
167
- # return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})
168
 
169
- # # set tyro theme
170
- # tyro.extras.set_accent_color("bright_cyan")
171
- # args = tyro.cli(ArgumentConfig)
172
 
173
- # # specify configs for inference
174
- # inference_cfg = partial_fields(InferenceConfig, args.__dict__) # use attribute of args to initial InferenceConfig
175
- # crop_cfg = partial_fields(CropConfig, args.__dict__) # use attribute of args to initial CropConfig
176
 
177
- # gradio_pipeline = GradioPipeline(
178
- # inference_cfg=inference_cfg,
179
- # crop_cfg=crop_cfg,
180
- # args=args
181
- # )
182
 
183
- # # 추가 정의
184
- # elevenlabs_pipeline = ElevenLabsPipeline()
185
- # stf_pipeline = STFPipeline()
186
 
187
 
188
- # @spaces.GPU() #duration=240)
189
- # def gpu_wrapped_stf_pipeline_execute(audio_path):
190
- # return stf_pipeline.execute(audio_path)
191
 
192
 
193
- # @spaces.GPU()
194
- # def gpu_wrapped_elevenlabs_pipeline_generate_voice(text, voice):
195
- # return elevenlabs_pipeline.generate_voice(text, voice)
196
 
197
 
198
 
199
- # @spaces.GPU()
200
- # def gpu_wrapped_execute_video(*args, **kwargs):
201
- # return gradio_pipeline.execute_video(*args, **kwargs)
202
 
203
- # @spaces.GPU()
204
- # def gpu_wrapped_execute_image(*args, **kwargs):
205
- # return gradio_pipeline.execute_image(*args, **kwargs)
206
 
207
- # def is_square_video(video_path):
208
- # video = cv2.VideoCapture(video_path)
209
 
210
- # width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
211
- # height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
212
 
213
- # video.release()
214
- # if width != height:
215
- # raise gr.Error("Error: the video does not have a square aspect ratio. We currently only support square videos")
216
 
217
- # return gr.update(visible=True)
218
 
219
- # def txt_to_driving_video(text):
220
- # audio_path = gpu_wrapped_elevenlabs_pipeline_generate_voice(text)
221
- # driving_video_path = gpu_wrapped_stf_pipeline_execute(audio_path)
222
- # return driving_video_path
223
 
224
- # # assets
225
- # title_md = "assets/gradio_title.md"
226
- # example_portrait_dir = "assets/examples/source"
227
- # example_portrait_dir_custom = "assets/examples/source"
228
- # example_video_dir = "assets/examples/driving"
229
- # data_examples = [
230
- # [osp.join(example_portrait_dir, "s9.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
231
- # [osp.join(example_portrait_dir, "s6.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
232
- # [osp.join(example_portrait_dir, "s10.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
233
- # [osp.join(example_portrait_dir, "s5.jpg"), osp.join(example_video_dir, "d18.mp4"), True, True, True, True],
234
- # [osp.join(example_portrait_dir, "s7.jpg"), osp.join(example_video_dir, "d19.mp4"), True, True, True, True],
235
- # [osp.join(example_portrait_dir, "s22.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
236
- # ]
237
- # #################### interface logic ####################
238
 
239
- # # Define components first
240
- # eye_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target eyes-open ratio")
241
- # lip_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target lip-open ratio")
242
- # retargeting_input_image = gr.Image(type="filepath")
243
- # output_image = gr.Image(type="numpy")
244
- # output_image_paste_back = gr.Image(type="numpy")
245
- # output_video = gr.Video()
246
- # output_video_concat = gr.Video()
 
 
 
 
 
 
247
 
248
- # # video_input = gr.Video()
249
- # driving_video_path=gr.Video()
 
 
 
 
 
 
250
 
 
 
251
 
252
- # with gr.Blocks(theme=gr.themes.Soft()) as demo:
253
- # #gr.HTML(load_description(title_md))
254
-
255
- # with gr.Tabs():
256
- # with gr.Tab("Text to LipSync"):
257
- # gr.Markdown("# Text to LipSync")
258
- # with gr.Row():
259
- # with gr.Column():
260
- # script_txt = gr.Text()
261
- # # with gr.Column():
262
- # # txt2video_gen_button = gr.Button("txt2video generation", variant="primary")
263
-
264
- # with gr.Column():
265
- # audio_gen_button = gr.Button("Audio generation", variant="primary")
266
- # with gr.Row():
267
- # output_audio = gr.Audio(label="Generated audio", type="filepath")
268
- # with gr.Row():
269
- # video_gen_button = gr.Button("Audio to Video generation", variant="primary")
 
 
 
270
 
271
 
272
 
273
- # gr.Markdown(load_description("assets/gradio_description_upload.md"))
274
- # with gr.Row():
275
- # with gr.Accordion(open=True, label="Source Portrait"):
276
- # image_input = gr.Image(type="filepath")
277
- # gr.Examples(
278
- # examples=[
279
- # [osp.join(example_portrait_dir, "01.webp")],
280
- # [osp.join(example_portrait_dir, "02.webp")],
281
- # [osp.join(example_portrait_dir, "03.jpg")],
282
- # [osp.join(example_portrait_dir, "04.jpg")],
283
- # [osp.join(example_portrait_dir, "05.jpg")],
284
- # [osp.join(example_portrait_dir, "06.jpg")],
285
- # [osp.join(example_portrait_dir, "07.jpg")],
286
- # [osp.join(example_portrait_dir, "08.jpg")],
287
- # ],
288
- # inputs=[image_input],
289
- # cache_examples=False,
290
- # )
291
- # with gr.Accordion(open=True, label="Driving Video"):
292
- # video_input = gr.Video()
293
- # gr.Examples(
294
- # examples=[
295
- # [osp.join(example_video_dir, "d0.mp4")],
296
- # [osp.join(example_video_dir, "d18.mp4")],
297
- # [osp.join(example_video_dir, "d19.mp4")],
298
- # [osp.join(example_video_dir, "d14_trim.mp4")],
299
- # [osp.join(example_video_dir, "d6_trim.mp4")],
300
- # ],
301
- # inputs=[video_input],
302
- # cache_examples=False,
303
- # )
304
- # with gr.Row():
305
- # with gr.Accordion(open=False, label="Animation Instructions and Options"):
306
- # gr.Markdown(load_description("assets/gradio_description_animation.md"))
307
- # with gr.Row():
308
- # flag_relative_input = gr.Checkbox(value=True, label="relative motion")
309
- # flag_do_crop_input = gr.Checkbox(value=True, label="do crop")
310
- # flag_remap_input = gr.Checkbox(value=True, label="paste-back")
311
- # gr.Markdown(load_description("assets/gradio_description_animate_clear.md"))
312
- # with gr.Row():
313
- # with gr.Column():
314
- # process_button_animation = gr.Button("🚀 Animate", variant="primary")
315
- # with gr.Column():
316
- # process_button_reset = gr.ClearButton([image_input, video_input, output_video, output_video_concat], value="🧹 Clear")
317
- # with gr.Row():
318
- # with gr.Column():
319
- # with gr.Accordion(open=True, label="The animated video in the original image space"):
320
- # output_video.render()
321
- # with gr.Column():
322
- # with gr.Accordion(open=True, label="The animated video"):
323
- # output_video_concat.render()
324
- # with gr.Row():
325
- # # Examples
326
- # gr.Markdown("## You could also choose the examples below by one click ⬇️")
327
- # with gr.Row():
328
- # gr.Examples(
329
- # examples=data_examples,
330
- # fn=gpu_wrapped_execute_video,
331
- # inputs=[
332
- # image_input,
333
- # video_input,
334
- # flag_relative_input,
335
- # flag_do_crop_input,
336
- # flag_remap_input
337
- # ],
338
- # outputs=[output_image, output_image_paste_back],
339
- # examples_per_page=6,
340
- # cache_examples=False,
341
- # )
342
 
343
- # process_button_animation.click(
344
- # fn=gpu_wrapped_execute_video,
345
- # inputs=[
346
- # image_input,
347
- # video_input,
348
- # flag_relative_input,
349
- # flag_do_crop_input,
350
- # flag_remap_input
351
- # ],
352
- # outputs=[output_video, output_video_concat],
353
- # show_progress=True
354
- # )
355
- # # txt2video_gen_button.click(
356
- # # fn=txt_to_driving_video,
357
- # # inputs=[
358
- # # script_txt
359
- # # ],
360
- # # outputs=[video_input],
361
- # # show_progress=True
362
- # # )
363
- # audio_gen_button.click(
364
- # fn=gpu_wrapped_elevenlabs_pipeline_generate_voice,
365
- # inputs=[
366
- # script_txt
367
- # ],
368
- # outputs=[output_audio],
369
- # show_progress=True
370
- # )
371
-
372
- # video_gen_button.click(
373
- # fn=gpu_wrapped_stf_pipeline_execute,
374
- # inputs=[
375
- # output_audio
376
- # ],
377
- # outputs=[video_input],
378
- # show_progress=True
379
- # )
380
 
381
 
382
 
383
- # # image_input.change(
384
- # # fn=gradio_pipeline.prepare_retargeting,
385
- # # inputs=image_input,
386
- # # outputs=[eye_retargeting_slider, lip_retargeting_slider, retargeting_input_image]
387
- # # )
388
- # video_input.upload(
389
- # fn=is_square_video,
390
- # inputs=video_input,
391
- # outputs=video_input
392
- # )
393
 
394
- # # 세 번째 탭: Flux 개발용 탭
395
- # with gr.Tab("FLUX Image"):
396
- # flux_demo = create_flux_tab(image_input) # Flux 개발용 탭 생성
397
 
398
  demo.launch(
399
  server_port=args.server_port,
 
22
  from setup_environment import initialize_environment
23
  from src.utils.video import extract_audio
24
  #from flux_dev import create_flux_tab
25
+ from flux_schnell import create_flux_tab
26
  # from diffusers import FluxPipeline
27
 
28
  # import gdown
 
31
 
32
 
33
 
 
 
 
 
 
 
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  initialize_environment()
37
 
 
62
  # audio_path="assets/examples/driving/test_aud.mp3"
63
  #audio_path_component = gr.Textbox(label="Input", value="assets/examples/driving/test_aud.mp3")
64
 
65
+ # @spaces.GPU(duration=120)
66
+ # def gpu_wrapped_stf_pipeline_execute(audio_path):
67
+ # return stf_pipeline.execute(audio_path)
68
 
69
 
70
+ # ###### 테스트중 ######
71
 
72
 
73
+ # stf_pipeline = STFPipeline()
74
+ # driving_video_path=gr.Video()
75
 
76
+ # # set tyro theme
77
+ # tyro.extras.set_accent_color("bright_cyan")
78
+ # args = tyro.cli(ArgumentConfig)
79
 
80
+ # with gr.Blocks(theme=gr.themes.Soft()) as demo:
81
+ # with gr.Row():
82
+ # audio_path_component = gr.Textbox(label="Input", value="assets/examples/driving/test_aud.mp3")
83
+ # stf_button = gr.Button("stf test", variant="primary")
84
+ # stf_button.click(
85
+ # fn=gpu_wrapped_stf_pipeline_execute,
86
+ # inputs=[
87
+ # audio_path_component
88
+ # ],
89
+ # outputs=[driving_video_path]
90
+ # )
91
+ # with gr.Row():
92
+ # driving_video_path.render()
93
 
94
+ # # with gr.Row():
95
+ # # create_flux_tab() # image_input을 flux_tab에 전달합니다.
96
 
97
+ # ###### 테스트중 ######
98
 
99
 
100
+ def partial_fields(target_class, kwargs):
101
+ return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})
102
 
103
+ # set tyro theme
104
+ tyro.extras.set_accent_color("bright_cyan")
105
+ args = tyro.cli(ArgumentConfig)
106
 
107
+ # specify configs for inference
108
+ inference_cfg = partial_fields(InferenceConfig, args.__dict__) # use attribute of args to initial InferenceConfig
109
+ crop_cfg = partial_fields(CropConfig, args.__dict__) # use attribute of args to initial CropConfig
110
 
111
+ gradio_pipeline = GradioPipeline(
112
+ inference_cfg=inference_cfg,
113
+ crop_cfg=crop_cfg,
114
+ args=args
115
+ )
116
 
117
+ # 추가 정의
118
+ elevenlabs_pipeline = ElevenLabsPipeline()
119
+ stf_pipeline = STFPipeline()
120
 
121
 
122
+ @spaces.GPU() #duration=240)
123
+ def gpu_wrapped_stf_pipeline_execute(audio_path):
124
+ return stf_pipeline.execute(audio_path)
125
 
126
 
127
+ @spaces.GPU()
128
+ def gpu_wrapped_elevenlabs_pipeline_generate_voice(text, voice):
129
+ return elevenlabs_pipeline.generate_voice(text, voice)
130
 
131
 
132
 
133
+ @spaces.GPU()
134
+ def gpu_wrapped_execute_video(*args, **kwargs):
135
+ return gradio_pipeline.execute_video(*args, **kwargs)
136
 
137
+ @spaces.GPU()
138
+ def gpu_wrapped_execute_image(*args, **kwargs):
139
+ return gradio_pipeline.execute_image(*args, **kwargs)
140
 
141
+ def is_square_video(video_path):
142
+ video = cv2.VideoCapture(video_path)
143
 
144
+ width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
145
+ height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
146
 
147
+ video.release()
148
+ if width != height:
149
+ raise gr.Error("Error: the video does not have a square aspect ratio. We currently only support square videos")
150
 
151
+ return gr.update(visible=True)
152
 
 
 
 
 
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
+ # assets
156
+ title_md = "assets/gradio_title.md"
157
+ example_portrait_dir = "assets/examples/source"
158
+ example_portrait_dir_custom = "assets/examples/source"
159
+ example_video_dir = "assets/examples/driving"
160
+ data_examples = [
161
+ [osp.join(example_portrait_dir, "s9.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
162
+ [osp.join(example_portrait_dir, "s6.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
163
+ [osp.join(example_portrait_dir, "s10.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
164
+ [osp.join(example_portrait_dir, "s5.jpg"), osp.join(example_video_dir, "d18.mp4"), True, True, True, True],
165
+ [osp.join(example_portrait_dir, "s7.jpg"), osp.join(example_video_dir, "d19.mp4"), True, True, True, True],
166
+ [osp.join(example_portrait_dir, "s22.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
167
+ ]
168
+ #################### interface logic ####################
169
 
170
+ # Define components first
171
+ eye_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target eyes-open ratio")
172
+ lip_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target lip-open ratio")
173
+ retargeting_input_image = gr.Image(type="filepath")
174
+ output_image = gr.Image(type="numpy")
175
+ output_image_paste_back = gr.Image(type="numpy")
176
+ output_video = gr.Video()
177
+ output_video_concat = gr.Video()
178
 
179
+ # video_input = gr.Video()
180
+ driving_video_path=gr.Video()
181
 
182
+
183
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
184
+ #gr.HTML(load_description(title_md))
185
+
186
+ with gr.Tabs():
187
+ with gr.Tab("Text to LipSync"):
188
+ gr.Markdown("# Text to LipSync")
189
+ with gr.Row():
190
+ with gr.Column():
191
+ script_txt = gr.Text()
192
+ # with gr.Column():
193
+ # txt2video_gen_button = gr.Button("txt2video generation", variant="primary")
194
+
195
+ with gr.Column():
196
+ audio_gen_button = gr.Button("Audio generation", variant="primary")
197
+ with gr.Row():
198
+ output_audio = gr.Audio(label="Generated audio", type="filepath")
199
+ with gr.Row():
200
+ video_gen_button = gr.Button("Audio to Video generation", variant="primary")
201
+ with gr.Row():
202
+ a2v_output = gr.Video()
203
 
204
 
205
 
206
+ gr.Markdown(load_description("assets/gradio_description_upload.md"))
207
+ with gr.Row():
208
+ with gr.Accordion(open=True, label="Source Portrait"):
209
+ image_input = gr.Image(type="filepath")
210
+ gr.Examples(
211
+ examples=[
212
+ [osp.join(example_portrait_dir, "01.webp")],
213
+ [osp.join(example_portrait_dir, "02.webp")],
214
+ [osp.join(example_portrait_dir, "03.jpg")],
215
+ [osp.join(example_portrait_dir, "04.jpg")],
216
+ [osp.join(example_portrait_dir, "05.jpg")],
217
+ [osp.join(example_portrait_dir, "06.jpg")],
218
+ [osp.join(example_portrait_dir, "07.jpg")],
219
+ [osp.join(example_portrait_dir, "08.jpg")],
220
+ ],
221
+ inputs=[image_input],
222
+ cache_examples=False,
223
+ )
224
+ with gr.Accordion(open=True, label="Driving Video"):
225
+ video_input = gr.Video()
226
+ gr.Examples(
227
+ examples=[
228
+ [osp.join(example_video_dir, "d0.mp4")],
229
+ [osp.join(example_video_dir, "d18.mp4")],
230
+ [osp.join(example_video_dir, "d19.mp4")],
231
+ [osp.join(example_video_dir, "d14_trim.mp4")],
232
+ [osp.join(example_video_dir, "d6_trim.mp4")],
233
+ ],
234
+ inputs=[video_input],
235
+ cache_examples=False,
236
+ )
237
+ with gr.Row():
238
+ with gr.Accordion(open=False, label="Animation Instructions and Options"):
239
+ gr.Markdown(load_description("assets/gradio_description_animation.md"))
240
+ with gr.Row():
241
+ flag_relative_input = gr.Checkbox(value=True, label="relative motion")
242
+ flag_do_crop_input = gr.Checkbox(value=True, label="do crop")
243
+ flag_remap_input = gr.Checkbox(value=True, label="paste-back")
244
+ gr.Markdown(load_description("assets/gradio_description_animate_clear.md"))
245
+ with gr.Row():
246
+ with gr.Column():
247
+ process_button_animation = gr.Button("🚀 Animate", variant="primary")
248
+ with gr.Column():
249
+ process_button_reset = gr.ClearButton([image_input, video_input, output_video, output_video_concat], value="🧹 Clear")
250
+ with gr.Row():
251
+ with gr.Column():
252
+ with gr.Accordion(open=True, label="The animated video in the original image space"):
253
+ output_video.render()
254
+ with gr.Column():
255
+ with gr.Accordion(open=True, label="The animated video"):
256
+ output_video_concat.render()
257
+ with gr.Row():
258
+ # Examples
259
+ gr.Markdown("## You could also choose the examples below by one click ⬇️")
260
+ with gr.Row():
261
+ gr.Examples(
262
+ examples=data_examples,
263
+ fn=gpu_wrapped_execute_video,
264
+ inputs=[
265
+ image_input,
266
+ video_input,
267
+ flag_relative_input,
268
+ flag_do_crop_input,
269
+ flag_remap_input
270
+ ],
271
+ outputs=[output_image, output_image_paste_back],
272
+ examples_per_page=6,
273
+ cache_examples=False,
274
+ )
275
 
276
+ process_button_animation.click(
277
+ fn=gpu_wrapped_execute_video,
278
+ inputs=[
279
+ image_input,
280
+ video_input,
281
+ flag_relative_input,
282
+ flag_do_crop_input,
283
+ flag_remap_input
284
+ ],
285
+ outputs=[output_video, output_video_concat],
286
+ show_progress=True
287
+ )
288
+ # txt2video_gen_button.click(
289
+ # fn=txt_to_driving_video,
290
+ # inputs=[
291
+ # script_txt
292
+ # ],
293
+ # outputs=[video_input],
294
+ # show_progress=True
295
+ # )
296
+ audio_gen_button.click(
297
+ fn=gpu_wrapped_elevenlabs_pipeline_generate_voice,
298
+ inputs=[
299
+ script_txt
300
+ ],
301
+ outputs=[output_audio],
302
+ show_progress=True
303
+ )
304
+
305
+ video_gen_button.click(
306
+ fn=gpu_wrapped_stf_pipeline_execute,
307
+ inputs=[
308
+ output_audio
309
+ ],
310
+ outputs=[a2v_output],
311
+ show_progress=True
312
+ )
313
 
314
 
315
 
316
+ # image_input.change(
317
+ # fn=gradio_pipeline.prepare_retargeting,
318
+ # inputs=image_input,
319
+ # outputs=[eye_retargeting_slider, lip_retargeting_slider, retargeting_input_image]
320
+ # )
321
+ video_input.upload(
322
+ fn=is_square_video,
323
+ inputs=video_input,
324
+ outputs=video_input
325
+ )
326
 
327
+ # 세 번째 탭: Flux 개발용 탭
328
+ with gr.Tab("FLUX Image"):
329
+ flux_demo = create_flux_tab(image_input) # Flux 개발용 탭 생성
330
 
331
  demo.launch(
332
  server_port=args.server_port,