John6666 commited on
Commit
2642d96
1 Parent(s): 4cfe29b

Update src/gradio_demo.py

Browse files
Files changed (1) hide show
  1. src/gradio_demo.py +170 -169
src/gradio_demo.py CHANGED
@@ -1,170 +1,171 @@
1
- import torch, uuid
2
- import os, sys, shutil, platform
3
- from src.facerender.pirender_animate import AnimateFromCoeff_PIRender
4
- from src.utils.preprocess import CropAndExtract
5
- from src.test_audio2coeff import Audio2Coeff
6
- from src.facerender.animate import AnimateFromCoeff
7
- from src.generate_batch import get_data
8
- from src.generate_facerender_batch import get_facerender_data
9
-
10
- from src.utils.init_path import init_path
11
-
12
- from pydub import AudioSegment
13
-
14
-
15
- def mp3_to_wav(mp3_filename,wav_filename,frame_rate):
16
- mp3_file = AudioSegment.from_file(file=mp3_filename)
17
- mp3_file.set_frame_rate(frame_rate).export(wav_filename,format="wav")
18
-
19
-
20
- class SadTalker():
21
-
22
- def __init__(self, checkpoint_path='checkpoints', config_path='src/config', lazy_load=False):
23
-
24
- if torch.cuda.is_available():
25
- device = "cuda"
26
- elif platform.system() == 'Darwin': # macos
27
- device = "mps"
28
- else:
29
- device = "cpu"
30
-
31
- self.device = device
32
-
33
- os.environ['TORCH_HOME']= checkpoint_path
34
-
35
- self.checkpoint_path = checkpoint_path
36
- self.config_path = config_path
37
-
38
-
39
- def test(self, source_image, driven_audio, preprocess='crop',
40
- still_mode=False, use_enhancer=False, batch_size=1, size=256,
41
- pose_style = 0,
42
- facerender='facevid2vid',
43
- exp_scale=1.0,
44
- use_ref_video = False,
45
- ref_video = None,
46
- ref_info = None,
47
- use_idle_mode = False,
48
- length_of_audio = 0, use_blink=True,
49
- result_dir='./results/'):
50
-
51
- self.sadtalker_paths = init_path(self.checkpoint_path, self.config_path, size, False, preprocess)
52
- print(self.sadtalker_paths)
53
-
54
- self.audio_to_coeff = Audio2Coeff(self.sadtalker_paths, self.device)
55
- self.preprocess_model = CropAndExtract(self.sadtalker_paths, self.device)
56
-
57
- if facerender == 'facevid2vid' and self.device != 'mps':
58
- self.animate_from_coeff = AnimateFromCoeff(self.sadtalker_paths, self.device)
59
- elif facerender == 'pirender' or self.device == 'mps':
60
- self.animate_from_coeff = AnimateFromCoeff_PIRender(self.sadtalker_paths, self.device)
61
- facerender = 'pirender'
62
- else:
63
- raise(RuntimeError('Unknown model: {}'.format(facerender)))
64
-
65
-
66
- time_tag = str(uuid.uuid4())
67
- save_dir = os.path.join(result_dir, time_tag)
68
- os.makedirs(save_dir, exist_ok=True)
69
-
70
- input_dir = os.path.join(save_dir, 'input')
71
- os.makedirs(input_dir, exist_ok=True)
72
-
73
- print(source_image)
74
- pic_path = os.path.join(input_dir, os.path.basename(source_image))
75
- shutil.move(source_image, input_dir)
76
-
77
- if driven_audio is not None and os.path.isfile(driven_audio):
78
- audio_path = os.path.join(input_dir, os.path.basename(driven_audio))
79
-
80
- #### mp3 to wav
81
- if '.mp3' in audio_path:
82
- mp3_to_wav(driven_audio, audio_path.replace('.mp3', '.wav'), 16000)
83
- audio_path = audio_path.replace('.mp3', '.wav')
84
- else:
85
- shutil.move(driven_audio, input_dir)
86
-
87
- elif use_idle_mode:
88
- audio_path = os.path.join(input_dir, 'idlemode_'+str(length_of_audio)+'.wav') ## generate audio from this new audio_path
89
- from pydub import AudioSegment
90
- one_sec_segment = AudioSegment.silent(duration=1000*length_of_audio) #duration in milliseconds
91
- one_sec_segment.export(audio_path, format="wav")
92
- else:
93
- print(use_ref_video, ref_info)
94
- assert use_ref_video == True and ref_info == 'all'
95
-
96
- if use_ref_video and ref_info == 'all': # full ref mode
97
- ref_video_videoname = os.path.basename(ref_video)
98
- audio_path = os.path.join(save_dir, ref_video_videoname+'.wav')
99
- print('new audiopath:',audio_path)
100
- # if ref_video contains audio, set the audio from ref_video.
101
- cmd = r"ffmpeg -y -hide_banner -loglevel error -i %s %s"%(ref_video, audio_path)
102
- os.system(cmd)
103
-
104
- os.makedirs(save_dir, exist_ok=True)
105
-
106
- #crop image and extract 3dmm from image
107
- first_frame_dir = os.path.join(save_dir, 'first_frame_dir')
108
- os.makedirs(first_frame_dir, exist_ok=True)
109
- first_coeff_path, crop_pic_path, crop_info = self.preprocess_model.generate(pic_path, first_frame_dir, preprocess, True, size)
110
-
111
- if first_coeff_path is None:
112
- raise AttributeError("No face is detected")
113
-
114
- if use_ref_video:
115
- print('using ref video for genreation')
116
- ref_video_videoname = os.path.splitext(os.path.split(ref_video)[-1])[0]
117
- ref_video_frame_dir = os.path.join(save_dir, ref_video_videoname)
118
- os.makedirs(ref_video_frame_dir, exist_ok=True)
119
- print('3DMM Extraction for the reference video providing pose')
120
- ref_video_coeff_path, _, _ = self.preprocess_model.generate(ref_video, ref_video_frame_dir, preprocess, source_image_flag=False)
121
- else:
122
- ref_video_coeff_path = None
123
-
124
- if use_ref_video:
125
- if ref_info == 'pose':
126
- ref_pose_coeff_path = ref_video_coeff_path
127
- ref_eyeblink_coeff_path = None
128
- elif ref_info == 'blink':
129
- ref_pose_coeff_path = None
130
- ref_eyeblink_coeff_path = ref_video_coeff_path
131
- elif ref_info == 'pose+blink':
132
- ref_pose_coeff_path = ref_video_coeff_path
133
- ref_eyeblink_coeff_path = ref_video_coeff_path
134
- elif ref_info == 'all':
135
- ref_pose_coeff_path = None
136
- ref_eyeblink_coeff_path = None
137
- else:
138
- raise('error in refinfo')
139
- else:
140
- ref_pose_coeff_path = None
141
- ref_eyeblink_coeff_path = None
142
-
143
- #audio2ceoff
144
- if use_ref_video and ref_info == 'all':
145
- coeff_path = ref_video_coeff_path # self.audio_to_coeff.generate(batch, save_dir, pose_style, ref_pose_coeff_path)
146
- else:
147
- batch = get_data(first_coeff_path, audio_path, self.device, ref_eyeblink_coeff_path=ref_eyeblink_coeff_path, still=still_mode, \
148
- idlemode=use_idle_mode, length_of_audio=length_of_audio, use_blink=use_blink) # longer audio?
149
- coeff_path = self.audio_to_coeff.generate(batch, save_dir, pose_style, ref_pose_coeff_path)
150
-
151
- #coeff2video
152
- data = get_facerender_data(coeff_path, crop_pic_path, first_coeff_path, audio_path, batch_size, still_mode=still_mode, \
153
- preprocess=preprocess, size=size, expression_scale = exp_scale, facemodel=facerender)
154
- return_path = self.animate_from_coeff.generate(data, save_dir, pic_path, crop_info, enhancer='gfpgan' if use_enhancer else None, preprocess=preprocess, img_size=size)
155
- video_name = data['video_name']
156
- print(f'The generated video is named {video_name} in {save_dir}')
157
-
158
- del self.preprocess_model
159
- del self.audio_to_coeff
160
- del self.animate_from_coeff
161
-
162
- if torch.cuda.is_available():
163
- torch.cuda.empty_cache()
164
- torch.cuda.synchronize()
165
-
166
- import gc; gc.collect()
167
-
168
- return return_path
169
-
 
170
 
 
1
+ import spaces
2
+ import torch, uuid
3
+ import os, sys, shutil, platform
4
+ from src.facerender.pirender_animate import AnimateFromCoeff_PIRender
5
+ from src.utils.preprocess import CropAndExtract
6
+ from src.test_audio2coeff import Audio2Coeff
7
+ from src.facerender.animate import AnimateFromCoeff
8
+ from src.generate_batch import get_data
9
+ from src.generate_facerender_batch import get_facerender_data
10
+
11
+ from src.utils.init_path import init_path
12
+
13
+ from pydub import AudioSegment
14
+
15
+
16
+ def mp3_to_wav(mp3_filename,wav_filename,frame_rate):
17
+ mp3_file = AudioSegment.from_file(file=mp3_filename)
18
+ mp3_file.set_frame_rate(frame_rate).export(wav_filename,format="wav")
19
+
20
+
21
+ class SadTalker():
22
+
23
+ def __init__(self, checkpoint_path='checkpoints', config_path='src/config', lazy_load=False):
24
+
25
+ if torch.cuda.is_available():
26
+ device = "cuda"
27
+ elif platform.system() == 'Darwin': # macos
28
+ device = "mps"
29
+ else:
30
+ device = "cpu"
31
+
32
+ self.device = device
33
+
34
+ os.environ['TORCH_HOME']= checkpoint_path
35
+
36
+ self.checkpoint_path = checkpoint_path
37
+ self.config_path = config_path
38
+
39
+ @spaces.GPU
40
+ def test(self, source_image, driven_audio, preprocess='crop',
41
+ still_mode=False, use_enhancer=False, batch_size=1, size=256,
42
+ pose_style = 0,
43
+ facerender='facevid2vid',
44
+ exp_scale=1.0,
45
+ use_ref_video = False,
46
+ ref_video = None,
47
+ ref_info = None,
48
+ use_idle_mode = False,
49
+ length_of_audio = 0, use_blink=True,
50
+ result_dir='./results/'):
51
+
52
+ self.sadtalker_paths = init_path(self.checkpoint_path, self.config_path, size, False, preprocess)
53
+ print(self.sadtalker_paths)
54
+
55
+ self.audio_to_coeff = Audio2Coeff(self.sadtalker_paths, self.device)
56
+ self.preprocess_model = CropAndExtract(self.sadtalker_paths, self.device)
57
+
58
+ if facerender == 'facevid2vid' and self.device != 'mps':
59
+ self.animate_from_coeff = AnimateFromCoeff(self.sadtalker_paths, self.device)
60
+ elif facerender == 'pirender' or self.device == 'mps':
61
+ self.animate_from_coeff = AnimateFromCoeff_PIRender(self.sadtalker_paths, self.device)
62
+ facerender = 'pirender'
63
+ else:
64
+ raise(RuntimeError('Unknown model: {}'.format(facerender)))
65
+
66
+
67
+ time_tag = str(uuid.uuid4())
68
+ save_dir = os.path.join(result_dir, time_tag)
69
+ os.makedirs(save_dir, exist_ok=True)
70
+
71
+ input_dir = os.path.join(save_dir, 'input')
72
+ os.makedirs(input_dir, exist_ok=True)
73
+
74
+ print(source_image)
75
+ pic_path = os.path.join(input_dir, os.path.basename(source_image))
76
+ shutil.move(source_image, input_dir)
77
+
78
+ if driven_audio is not None and os.path.isfile(driven_audio):
79
+ audio_path = os.path.join(input_dir, os.path.basename(driven_audio))
80
+
81
+ #### mp3 to wav
82
+ if '.mp3' in audio_path:
83
+ mp3_to_wav(driven_audio, audio_path.replace('.mp3', '.wav'), 16000)
84
+ audio_path = audio_path.replace('.mp3', '.wav')
85
+ else:
86
+ shutil.move(driven_audio, input_dir)
87
+
88
+ elif use_idle_mode:
89
+ audio_path = os.path.join(input_dir, 'idlemode_'+str(length_of_audio)+'.wav') ## generate audio from this new audio_path
90
+ from pydub import AudioSegment
91
+ one_sec_segment = AudioSegment.silent(duration=1000*length_of_audio) #duration in milliseconds
92
+ one_sec_segment.export(audio_path, format="wav")
93
+ else:
94
+ print(use_ref_video, ref_info)
95
+ assert use_ref_video == True and ref_info == 'all'
96
+
97
+ if use_ref_video and ref_info == 'all': # full ref mode
98
+ ref_video_videoname = os.path.basename(ref_video)
99
+ audio_path = os.path.join(save_dir, ref_video_videoname+'.wav')
100
+ print('new audiopath:',audio_path)
101
+ # if ref_video contains audio, set the audio from ref_video.
102
+ cmd = r"ffmpeg -y -hide_banner -loglevel error -i %s %s"%(ref_video, audio_path)
103
+ os.system(cmd)
104
+
105
+ os.makedirs(save_dir, exist_ok=True)
106
+
107
+ #crop image and extract 3dmm from image
108
+ first_frame_dir = os.path.join(save_dir, 'first_frame_dir')
109
+ os.makedirs(first_frame_dir, exist_ok=True)
110
+ first_coeff_path, crop_pic_path, crop_info = self.preprocess_model.generate(pic_path, first_frame_dir, preprocess, True, size)
111
+
112
+ if first_coeff_path is None:
113
+ raise AttributeError("No face is detected")
114
+
115
+ if use_ref_video:
116
+ print('using ref video for genreation')
117
+ ref_video_videoname = os.path.splitext(os.path.split(ref_video)[-1])[0]
118
+ ref_video_frame_dir = os.path.join(save_dir, ref_video_videoname)
119
+ os.makedirs(ref_video_frame_dir, exist_ok=True)
120
+ print('3DMM Extraction for the reference video providing pose')
121
+ ref_video_coeff_path, _, _ = self.preprocess_model.generate(ref_video, ref_video_frame_dir, preprocess, source_image_flag=False)
122
+ else:
123
+ ref_video_coeff_path = None
124
+
125
+ if use_ref_video:
126
+ if ref_info == 'pose':
127
+ ref_pose_coeff_path = ref_video_coeff_path
128
+ ref_eyeblink_coeff_path = None
129
+ elif ref_info == 'blink':
130
+ ref_pose_coeff_path = None
131
+ ref_eyeblink_coeff_path = ref_video_coeff_path
132
+ elif ref_info == 'pose+blink':
133
+ ref_pose_coeff_path = ref_video_coeff_path
134
+ ref_eyeblink_coeff_path = ref_video_coeff_path
135
+ elif ref_info == 'all':
136
+ ref_pose_coeff_path = None
137
+ ref_eyeblink_coeff_path = None
138
+ else:
139
+ raise('error in refinfo')
140
+ else:
141
+ ref_pose_coeff_path = None
142
+ ref_eyeblink_coeff_path = None
143
+
144
+ #audio2ceoff
145
+ if use_ref_video and ref_info == 'all':
146
+ coeff_path = ref_video_coeff_path # self.audio_to_coeff.generate(batch, save_dir, pose_style, ref_pose_coeff_path)
147
+ else:
148
+ batch = get_data(first_coeff_path, audio_path, self.device, ref_eyeblink_coeff_path=ref_eyeblink_coeff_path, still=still_mode, \
149
+ idlemode=use_idle_mode, length_of_audio=length_of_audio, use_blink=use_blink) # longer audio?
150
+ coeff_path = self.audio_to_coeff.generate(batch, save_dir, pose_style, ref_pose_coeff_path)
151
+
152
+ #coeff2video
153
+ data = get_facerender_data(coeff_path, crop_pic_path, first_coeff_path, audio_path, batch_size, still_mode=still_mode, \
154
+ preprocess=preprocess, size=size, expression_scale = exp_scale, facemodel=facerender)
155
+ return_path = self.animate_from_coeff.generate(data, save_dir, pic_path, crop_info, enhancer='gfpgan' if use_enhancer else None, preprocess=preprocess, img_size=size)
156
+ video_name = data['video_name']
157
+ print(f'The generated video is named {video_name} in {save_dir}')
158
+
159
+ del self.preprocess_model
160
+ del self.audio_to_coeff
161
+ del self.animate_from_coeff
162
+
163
+ if torch.cuda.is_available():
164
+ torch.cuda.empty_cache()
165
+ torch.cuda.synchronize()
166
+
167
+ import gc; gc.collect()
168
+
169
+ return return_path
170
+
171