saicharan1234 commited on
Commit
8eca9ee
1 Parent(s): 69dd2a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -9
app.py CHANGED
@@ -2,16 +2,34 @@ import os
2
  import shutil
3
  from huggingface_hub import snapshot_download
4
  import gradio as gr
5
- os.chdir(os.path.dirname(os.path.abspath(__file__)))
6
- from scripts.inference import inference_process
 
7
  import argparse
8
  import uuid
9
 
 
 
 
10
  is_shared_ui = True if "fudan-generative-ai/hallo" in os.environ['SPACE_ID'] else False
11
 
12
  if not is_shared_ui:
13
  hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models")
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def run_inference(source_image, driving_audio, pose_weight, face_weight, lip_weight, face_expand_ratio, progress=gr.Progress(track_tqdm=True)):
16
  if is_shared_ui:
17
  raise gr.Error("This Space only works in duplicated instances")
@@ -33,23 +51,26 @@ def run_inference(source_image, driving_audio, pose_weight, face_weight, lip_wei
33
  inference_process(args)
34
  return f'output-{unique_id}.mp4'
35
 
36
- with gr.Blocks(theme='freddyaboulton/dracula_revamped@0.3.8' ) as demo:
37
  gr.Markdown(
38
  """
39
- # Talking Head Generation
40
  Upload a face image and driving audio, and adjust the weights to generate a talking head video.
 
 
 
 
 
 
41
  """
42
  )
43
 
44
  with gr.Row():
45
  with gr.Column():
46
- avatar_face = gr.Image(type="filepath", label="Face", elem_id="face-input")
47
- driving_audio = gr.Audio(type="filepath", label="Driving Audio", elem_id="audio-input")
48
-
49
-
50
 
51
  with gr.Column():
52
- output_video = gr.Video(label="Your Talking Head", elem_id="output-video")
53
  with gr.Accordion("Advanced Settings", open=False):
54
  pose_weight = gr.Slider(minimum=0.0, value=1.5, label="Pose Weight")
55
  face_weight = gr.Slider(minimum=0.0, value=1.0, label="Face Weight")
@@ -57,6 +78,7 @@ with gr.Blocks(theme='freddyaboulton/dracula_revamped@0.3.8' ) as demo:
57
  face_expand_ratio = gr.Slider(minimum=0.0, value=1.2, label="Face Expand Ratio")
58
 
59
  generate = gr.Button("Generate", elem_id="generate-button")
 
60
 
61
  generate.click(
62
  fn=run_inference,
 
2
  import shutil
3
  from huggingface_hub import snapshot_download
4
  import gradio as gr
5
+ import numpy as np
6
+ from PIL import Image
7
+ import soundfile as sf
8
  import argparse
9
  import uuid
10
 
11
+ os.chdir(os.path.dirname(os.path.abspath(__file__)))
12
+ from scripts.inference import inference_process
13
+
14
  is_shared_ui = True if "fudan-generative-ai/hallo" in os.environ['SPACE_ID'] else False
15
 
16
  if not is_shared_ui:
17
  hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models")
18
 
19
+ def check_image_square(image_path):
20
+ image = Image.open(image_path)
21
+ if image.width != image.height:
22
+ raise gr.Error("The uploaded image is not square. Please upload a square image.")
23
+ return image_path
24
+
25
+ def convert_audio_to_wav(audio_path):
26
+ if not audio_path.endswith('.wav'):
27
+ audio_data, samplerate = sf.read(audio_path)
28
+ wav_path = audio_path.rsplit('.', 1)[0] + '.wav'
29
+ sf.write(wav_path, audio_data, samplerate)
30
+ return wav_path
31
+ return audio_path
32
+
33
  def run_inference(source_image, driving_audio, pose_weight, face_weight, lip_weight, face_expand_ratio, progress=gr.Progress(track_tqdm=True)):
34
  if is_shared_ui:
35
  raise gr.Error("This Space only works in duplicated instances")
 
51
  inference_process(args)
52
  return f'output-{unique_id}.mp4'
53
 
54
+ with gr.Blocks(theme='freddyaboulton/dracula_revamped@0.3.8') as demo:
55
  gr.Markdown(
56
  """
57
+ # Talking Head Generation :🗣️📢
58
  Upload a face image and driving audio, and adjust the weights to generate a talking head video.
59
+
60
+ > **Note:**
61
+ > - The face should be the main focus, making up 50%-70% of the image.
62
+ > - The face should be facing forward, with a rotation angle of less than 30° (no side profiles).
63
+ > - To make it work, duplicate the Space and run it on your own profile using a private GPU.
64
+ > - An L4 costs US$0.80/h.
65
  """
66
  )
67
 
68
  with gr.Row():
69
  with gr.Column():
70
+ avatar_face = gr.Image(type="filepath", label="Face", elem_id="face-input").change(check_image_square, avatar_face)
71
+ driving_audio = gr.Audio(type="filepath", label="Driving Audio", elem_id="audio-input").change(convert_audio_to_wav, driving_audio)
 
 
72
 
73
  with gr.Column():
 
74
  with gr.Accordion("Advanced Settings", open=False):
75
  pose_weight = gr.Slider(minimum=0.0, value=1.5, label="Pose Weight")
76
  face_weight = gr.Slider(minimum=0.0, value=1.0, label="Face Weight")
 
78
  face_expand_ratio = gr.Slider(minimum=0.0, value=1.2, label="Face Expand Ratio")
79
 
80
  generate = gr.Button("Generate", elem_id="generate-button")
81
+ output_video = gr.Video(label="Your Talking Head", elem_id="output-video")
82
 
83
  generate.click(
84
  fn=run_inference,