Spaces:
Running
on
A10G
Running
on
A10G
fixed req
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +153 -0
- app.py +26 -8
- modules/__pycache__/sadtalker_test.cpython-38.pyc +0 -0
- modules/__pycache__/text2speech.cpython-38.pyc +0 -0
- modules/sadtalker_test.py +3 -3
- src/__pycache__/generate_batch.cpython-38.pyc +0 -0
- src/__pycache__/generate_facerender_batch.cpython-38.pyc +0 -0
- src/__pycache__/test_audio2coeff.cpython-38.pyc +0 -0
- src/audio2exp_models/__pycache__/audio2exp.cpython-38.pyc +0 -0
- src/audio2exp_models/__pycache__/networks.cpython-38.pyc +0 -0
- src/audio2exp_models/audio2exp.py +15 -5
- src/audio2pose_models/__pycache__/audio2pose.cpython-38.pyc +0 -0
- src/audio2pose_models/__pycache__/audio_encoder.cpython-38.pyc +0 -0
- src/audio2pose_models/__pycache__/cvae.cpython-38.pyc +0 -0
- src/audio2pose_models/__pycache__/discriminator.cpython-38.pyc +0 -0
- src/audio2pose_models/__pycache__/networks.cpython-38.pyc +0 -0
- src/audio2pose_models/__pycache__/res_unet.cpython-38.pyc +0 -0
- src/audio2pose_models/audio2pose.py +1 -0
- src/audio2pose_models/audio_encoder.py +2 -2
- src/face3d/__pycache__/extract_kp_videos.cpython-38.pyc +0 -0
- src/face3d/extract_kp_videos.py +1 -1
- src/face3d/models/__pycache__/__init__.cpython-38.pyc +0 -0
- src/face3d/models/__pycache__/base_model.cpython-38.pyc +0 -0
- src/face3d/models/__pycache__/networks.cpython-38.pyc +0 -0
- src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-38.pyc +0 -0
- src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-38.pyc +0 -0
- src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-38.pyc +0 -0
- src/face3d/util/__pycache__/__init__.cpython-38.pyc +0 -0
- src/face3d/util/__pycache__/load_mats.cpython-38.pyc +0 -0
- src/face3d/util/__pycache__/preprocess.cpython-38.pyc +0 -0
- src/facerender/__pycache__/animate.cpython-38.pyc +0 -0
- src/facerender/animate.py +10 -1
- src/facerender/modules/__pycache__/dense_motion.cpython-38.pyc +0 -0
- src/facerender/modules/__pycache__/generator.cpython-38.pyc +0 -0
- src/facerender/modules/__pycache__/keypoint_detector.cpython-38.pyc +0 -0
- src/facerender/modules/__pycache__/make_animation.cpython-38.pyc +0 -0
- src/facerender/modules/__pycache__/mapping.cpython-38.pyc +0 -0
- src/facerender/modules/__pycache__/util.cpython-38.pyc +0 -0
- src/facerender/sync_batchnorm/__pycache__/__init__.cpython-38.pyc +0 -0
- src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-38.pyc +0 -0
- src/facerender/sync_batchnorm/__pycache__/comm.cpython-38.pyc +0 -0
- src/facerender/sync_batchnorm/__pycache__/replicate.cpython-38.pyc +0 -0
- src/generate_batch.py +4 -25
- src/gradio_demo.py +113 -0
- src/test_audio2coeff.py +1 -1
- src/utils/__pycache__/audio.cpython-38.pyc +0 -0
- src/utils/__pycache__/croper.cpython-38.pyc +0 -0
- src/utils/__pycache__/face_enhancer.cpython-38.pyc +0 -0
- src/utils/__pycache__/hparams.cpython-38.pyc +0 -0
- src/utils/__pycache__/preprocess.cpython-38.pyc +0 -0
.gitignore
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# pdm
|
105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
106 |
+
#pdm.lock
|
107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
108 |
+
# in version control.
|
109 |
+
# https://pdm.fming.dev/#use-with-ide
|
110 |
+
.pdm.toml
|
111 |
+
|
112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
113 |
+
__pypackages__/
|
114 |
+
|
115 |
+
# Celery stuff
|
116 |
+
celerybeat-schedule
|
117 |
+
celerybeat.pid
|
118 |
+
|
119 |
+
# SageMath parsed files
|
120 |
+
*.sage.py
|
121 |
+
|
122 |
+
# Environments
|
123 |
+
.env
|
124 |
+
.venv
|
125 |
+
env/
|
126 |
+
venv/
|
127 |
+
ENV/
|
128 |
+
env.bak/
|
129 |
+
venv.bak/
|
130 |
+
|
131 |
+
# Spyder project settings
|
132 |
+
.spyderproject
|
133 |
+
.spyproject
|
134 |
+
|
135 |
+
# Rope project settings
|
136 |
+
.ropeproject
|
137 |
+
|
138 |
+
# mkdocs documentation
|
139 |
+
/site
|
140 |
+
|
141 |
+
# mypy
|
142 |
+
.mypy_cache/
|
143 |
+
.dmypy.json
|
144 |
+
dmypy.json
|
145 |
+
|
146 |
+
# Pyre type checker
|
147 |
+
.pyre/
|
148 |
+
|
149 |
+
# pytype static type analyzer
|
150 |
+
.pytype/
|
151 |
+
|
152 |
+
# Cython debug symbols
|
153 |
+
cython_debug/
|
app.py
CHANGED
@@ -27,15 +27,15 @@ def sadtalker_demo(result_dir='./tmp/'):
|
|
27 |
<a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a> \
|
28 |
<a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </div>")
|
29 |
|
30 |
-
with gr.Row()
|
31 |
with gr.Column(variant='panel'):
|
32 |
with gr.Tabs(elem_id="sadtalker_source_image"):
|
33 |
with gr.TabItem('Upload image'):
|
34 |
with gr.Row():
|
35 |
-
source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256
|
36 |
|
37 |
with gr.Tabs(elem_id="sadtalker_driven_audio"):
|
38 |
-
with gr.TabItem('Upload audio(wav only currently)'):
|
39 |
with gr.Column(variant='panel'):
|
40 |
driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
|
41 |
|
@@ -43,12 +43,13 @@ def sadtalker_demo(result_dir='./tmp/'):
|
|
43 |
with gr.Tabs(elem_id="sadtalker_checkbox"):
|
44 |
with gr.TabItem('Settings'):
|
45 |
with gr.Column(variant='panel'):
|
46 |
-
is_still_mode = gr.Checkbox(label="
|
47 |
-
|
|
|
48 |
submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
|
49 |
|
50 |
with gr.Tabs(elem_id="sadtalker_genearted"):
|
51 |
-
gen_video = gr.Video(label="Generated video", format="mp4").style(
|
52 |
gen_text = gr.Textbox(visible=False)
|
53 |
|
54 |
with gr.Row():
|
@@ -57,7 +58,22 @@ def sadtalker_demo(result_dir='./tmp/'):
|
|
57 |
'examples/source_image/art_10.png',
|
58 |
'examples/driven_audio/deyu.wav',
|
59 |
True,
|
|
|
60 |
False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
]
|
62 |
]
|
63 |
gr.Examples(examples=examples,
|
@@ -65,7 +81,8 @@ def sadtalker_demo(result_dir='./tmp/'):
|
|
65 |
source_image,
|
66 |
driven_audio,
|
67 |
is_still_mode,
|
68 |
-
|
|
|
69 |
gr.Textbox(value=result_dir, visible=False)],
|
70 |
outputs=[gen_video, gen_text],
|
71 |
fn=sad_talker.test,
|
@@ -76,7 +93,8 @@ def sadtalker_demo(result_dir='./tmp/'):
|
|
76 |
inputs=[source_image,
|
77 |
driven_audio,
|
78 |
is_still_mode,
|
79 |
-
|
|
|
80 |
gr.Textbox(value=result_dir, visible=False)],
|
81 |
outputs=[gen_video, gen_text]
|
82 |
)
|
|
|
27 |
<a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a> \
|
28 |
<a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </div>")
|
29 |
|
30 |
+
with gr.Row():
|
31 |
with gr.Column(variant='panel'):
|
32 |
with gr.Tabs(elem_id="sadtalker_source_image"):
|
33 |
with gr.TabItem('Upload image'):
|
34 |
with gr.Row():
|
35 |
+
source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256)
|
36 |
|
37 |
with gr.Tabs(elem_id="sadtalker_driven_audio"):
|
38 |
+
with gr.TabItem('Upload audio(wav/mp3 only currently)'):
|
39 |
with gr.Column(variant='panel'):
|
40 |
driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
|
41 |
|
|
|
43 |
with gr.Tabs(elem_id="sadtalker_checkbox"):
|
44 |
with gr.TabItem('Settings'):
|
45 |
with gr.Column(variant='panel'):
|
46 |
+
is_still_mode = gr.Checkbox(label="Still Mode (fewer head motion)").style(container=True)
|
47 |
+
is_resize_mode = gr.Checkbox(label="Resize Mode (⚠️ Resize mode need manually crop the image firstly, can handle larger image crop)").style(container=True)
|
48 |
+
is_enhance_mode = gr.Checkbox(label="Enhance Mode (better face quality )").style(container=True)
|
49 |
submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
|
50 |
|
51 |
with gr.Tabs(elem_id="sadtalker_genearted"):
|
52 |
+
gen_video = gr.Video(label="Generated video", format="mp4").style(width=256)
|
53 |
gen_text = gr.Textbox(visible=False)
|
54 |
|
55 |
with gr.Row():
|
|
|
58 |
'examples/source_image/art_10.png',
|
59 |
'examples/driven_audio/deyu.wav',
|
60 |
True,
|
61 |
+
False,
|
62 |
False
|
63 |
+
],
|
64 |
+
[
|
65 |
+
'examples/source_image/art_1.png',
|
66 |
+
'examples/driven_audio/fayu.wav',
|
67 |
+
True,
|
68 |
+
True,
|
69 |
+
False
|
70 |
+
],
|
71 |
+
[
|
72 |
+
'examples/source_image/art_9.png',
|
73 |
+
'examples/driven_audio/itosinger1.wav',
|
74 |
+
True,
|
75 |
+
False,
|
76 |
+
True
|
77 |
]
|
78 |
]
|
79 |
gr.Examples(examples=examples,
|
|
|
81 |
source_image,
|
82 |
driven_audio,
|
83 |
is_still_mode,
|
84 |
+
is_resize_mode,
|
85 |
+
is_enhance_mode,
|
86 |
gr.Textbox(value=result_dir, visible=False)],
|
87 |
outputs=[gen_video, gen_text],
|
88 |
fn=sad_talker.test,
|
|
|
93 |
inputs=[source_image,
|
94 |
driven_audio,
|
95 |
is_still_mode,
|
96 |
+
is_resize_mode,
|
97 |
+
is_enhance_mode,
|
98 |
gr.Textbox(value=result_dir, visible=False)],
|
99 |
outputs=[gen_video, gen_text]
|
100 |
)
|
modules/__pycache__/sadtalker_test.cpython-38.pyc
CHANGED
Binary files a/modules/__pycache__/sadtalker_test.cpython-38.pyc and b/modules/__pycache__/sadtalker_test.cpython-38.pyc differ
|
|
modules/__pycache__/text2speech.cpython-38.pyc
CHANGED
Binary files a/modules/__pycache__/text2speech.cpython-38.pyc and b/modules/__pycache__/text2speech.cpython-38.pyc differ
|
|
modules/sadtalker_test.py
CHANGED
@@ -60,7 +60,7 @@ class SadTalker():
|
|
60 |
facerender_yaml_path, device)
|
61 |
self.device = device
|
62 |
|
63 |
-
def test(self, source_image, driven_audio, still_mode, use_enhancer, result_dir='./'):
|
64 |
|
65 |
time_tag = str(uuid.uuid4()) # strftime("%Y_%m_%d_%H.%M.%S")
|
66 |
save_dir = os.path.join(result_dir, time_tag)
|
@@ -91,7 +91,7 @@ class SadTalker():
|
|
91 |
#crop image and extract 3dmm from image
|
92 |
first_frame_dir = os.path.join(save_dir, 'first_frame_dir')
|
93 |
os.makedirs(first_frame_dir, exist_ok=True)
|
94 |
-
first_coeff_path, crop_pic_path = self.preprocess_model.generate(pic_path, first_frame_dir)
|
95 |
if first_coeff_path is None:
|
96 |
raise AttributeError("No face is detected")
|
97 |
|
@@ -101,7 +101,7 @@ class SadTalker():
|
|
101 |
#coeff2video
|
102 |
batch_size = 4
|
103 |
data = get_facerender_data(coeff_path, crop_pic_path, first_coeff_path, audio_path, batch_size, still_mode=still_mode)
|
104 |
-
self.animate_from_coeff.generate(data, save_dir, enhancer='gfpgan' if use_enhancer else None)
|
105 |
video_name = data['video_name']
|
106 |
print(f'The generated video is named {video_name} in {save_dir}')
|
107 |
|
|
|
60 |
facerender_yaml_path, device)
|
61 |
self.device = device
|
62 |
|
63 |
+
def test(self, source_image, driven_audio, still_mode, resize_mode, use_enhancer, result_dir='./'):
|
64 |
|
65 |
time_tag = str(uuid.uuid4()) # strftime("%Y_%m_%d_%H.%M.%S")
|
66 |
save_dir = os.path.join(result_dir, time_tag)
|
|
|
91 |
#crop image and extract 3dmm from image
|
92 |
first_frame_dir = os.path.join(save_dir, 'first_frame_dir')
|
93 |
os.makedirs(first_frame_dir, exist_ok=True)
|
94 |
+
first_coeff_path, crop_pic_path, original_size = self.preprocess_model.generate(pic_path, first_frame_dir, crop_or_resize= 'crop' if resize_mode == 'crop' else 'resize')
|
95 |
if first_coeff_path is None:
|
96 |
raise AttributeError("No face is detected")
|
97 |
|
|
|
101 |
#coeff2video
|
102 |
batch_size = 4
|
103 |
data = get_facerender_data(coeff_path, crop_pic_path, first_coeff_path, audio_path, batch_size, still_mode=still_mode)
|
104 |
+
self.animate_from_coeff.generate(data, save_dir, enhancer='gfpgan' if use_enhancer else None, original_size=original_size)
|
105 |
video_name = data['video_name']
|
106 |
print(f'The generated video is named {video_name} in {save_dir}')
|
107 |
|
src/__pycache__/generate_batch.cpython-38.pyc
CHANGED
Binary files a/src/__pycache__/generate_batch.cpython-38.pyc and b/src/__pycache__/generate_batch.cpython-38.pyc differ
|
|
src/__pycache__/generate_facerender_batch.cpython-38.pyc
CHANGED
Binary files a/src/__pycache__/generate_facerender_batch.cpython-38.pyc and b/src/__pycache__/generate_facerender_batch.cpython-38.pyc differ
|
|
src/__pycache__/test_audio2coeff.cpython-38.pyc
CHANGED
Binary files a/src/__pycache__/test_audio2coeff.cpython-38.pyc and b/src/__pycache__/test_audio2coeff.cpython-38.pyc differ
|
|
src/audio2exp_models/__pycache__/audio2exp.cpython-38.pyc
CHANGED
Binary files a/src/audio2exp_models/__pycache__/audio2exp.cpython-38.pyc and b/src/audio2exp_models/__pycache__/audio2exp.cpython-38.pyc differ
|
|
src/audio2exp_models/__pycache__/networks.cpython-38.pyc
CHANGED
Binary files a/src/audio2exp_models/__pycache__/networks.cpython-38.pyc and b/src/audio2exp_models/__pycache__/networks.cpython-38.pyc differ
|
|
src/audio2exp_models/audio2exp.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import torch
|
2 |
from torch import nn
|
3 |
|
@@ -15,15 +16,24 @@ class Audio2Exp(nn.Module):
|
|
15 |
bs = mel_input.shape[0]
|
16 |
T = mel_input.shape[1]
|
17 |
|
18 |
-
|
19 |
-
ratio = batch['ratio_gt'] #bs T
|
20 |
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
# BS x T x 64
|
25 |
results_dict = {
|
26 |
-
'exp_coeff_pred': exp_coeff_pred
|
27 |
}
|
28 |
return results_dict
|
29 |
|
|
|
1 |
+
from tqdm import tqdm
|
2 |
import torch
|
3 |
from torch import nn
|
4 |
|
|
|
16 |
bs = mel_input.shape[0]
|
17 |
T = mel_input.shape[1]
|
18 |
|
19 |
+
exp_coeff_pred = []
|
|
|
20 |
|
21 |
+
for i in tqdm(range(0, T, 10),'audio2exp:'): # every 10 frames
|
22 |
+
|
23 |
+
current_mel_input = mel_input[:,i:i+10]
|
24 |
+
|
25 |
+
ref = batch['ref'][:, :, :64].repeat((1,current_mel_input.shape[1],1)) #bs T 64
|
26 |
+
ratio = batch['ratio_gt'][:, i:i+10] #bs T
|
27 |
+
|
28 |
+
audiox = current_mel_input.view(-1, 1, 80, 16) # bs*T 1 80 16
|
29 |
+
|
30 |
+
curr_exp_coeff_pred = self.netG(audiox, ref, ratio) # bs T 64
|
31 |
+
|
32 |
+
exp_coeff_pred += [curr_exp_coeff_pred]
|
33 |
|
34 |
# BS x T x 64
|
35 |
results_dict = {
|
36 |
+
'exp_coeff_pred': torch.cat(exp_coeff_pred, axis=1)
|
37 |
}
|
38 |
return results_dict
|
39 |
|
src/audio2pose_models/__pycache__/audio2pose.cpython-38.pyc
CHANGED
Binary files a/src/audio2pose_models/__pycache__/audio2pose.cpython-38.pyc and b/src/audio2pose_models/__pycache__/audio2pose.cpython-38.pyc differ
|
|
src/audio2pose_models/__pycache__/audio_encoder.cpython-38.pyc
CHANGED
Binary files a/src/audio2pose_models/__pycache__/audio_encoder.cpython-38.pyc and b/src/audio2pose_models/__pycache__/audio_encoder.cpython-38.pyc differ
|
|
src/audio2pose_models/__pycache__/cvae.cpython-38.pyc
CHANGED
Binary files a/src/audio2pose_models/__pycache__/cvae.cpython-38.pyc and b/src/audio2pose_models/__pycache__/cvae.cpython-38.pyc differ
|
|
src/audio2pose_models/__pycache__/discriminator.cpython-38.pyc
CHANGED
Binary files a/src/audio2pose_models/__pycache__/discriminator.cpython-38.pyc and b/src/audio2pose_models/__pycache__/discriminator.cpython-38.pyc differ
|
|
src/audio2pose_models/__pycache__/networks.cpython-38.pyc
CHANGED
Binary files a/src/audio2pose_models/__pycache__/networks.cpython-38.pyc and b/src/audio2pose_models/__pycache__/networks.cpython-38.pyc differ
|
|
src/audio2pose_models/__pycache__/res_unet.cpython-38.pyc
CHANGED
Binary files a/src/audio2pose_models/__pycache__/res_unet.cpython-38.pyc and b/src/audio2pose_models/__pycache__/res_unet.cpython-38.pyc differ
|
|
src/audio2pose_models/audio2pose.py
CHANGED
@@ -76,6 +76,7 @@ class Audio2Pose(nn.Module):
|
|
76 |
batch['audio_emb'] = audio_emb
|
77 |
batch = self.netG.test(batch)
|
78 |
pose_motion_pred_list.append(batch['pose_motion_pred']) #list of bs seq_len 6
|
|
|
79 |
if re != 0:
|
80 |
z = torch.randn(bs, self.latent_dim).to(ref.device)
|
81 |
batch['z'] = z
|
|
|
76 |
batch['audio_emb'] = audio_emb
|
77 |
batch = self.netG.test(batch)
|
78 |
pose_motion_pred_list.append(batch['pose_motion_pred']) #list of bs seq_len 6
|
79 |
+
|
80 |
if re != 0:
|
81 |
z = torch.randn(bs, self.latent_dim).to(ref.device)
|
82 |
batch['z'] = z
|
src/audio2pose_models/audio_encoder.py
CHANGED
@@ -19,7 +19,7 @@ class Conv2d(nn.Module):
|
|
19 |
return self.act(out)
|
20 |
|
21 |
class AudioEncoder(nn.Module):
|
22 |
-
def __init__(self, wav2lip_checkpoint
|
23 |
super(AudioEncoder, self).__init__()
|
24 |
|
25 |
self.audio_encoder = nn.Sequential(
|
@@ -42,7 +42,7 @@ class AudioEncoder(nn.Module):
|
|
42 |
Conv2d(512, 512, kernel_size=1, stride=1, padding=0),)
|
43 |
|
44 |
#### load the pre-trained audio_encoder\
|
45 |
-
wav2lip_state_dict = torch.load(wav2lip_checkpoint
|
46 |
state_dict = self.audio_encoder.state_dict()
|
47 |
|
48 |
for k,v in wav2lip_state_dict.items():
|
|
|
19 |
return self.act(out)
|
20 |
|
21 |
class AudioEncoder(nn.Module):
|
22 |
+
def __init__(self, wav2lip_checkpoint):
|
23 |
super(AudioEncoder, self).__init__()
|
24 |
|
25 |
self.audio_encoder = nn.Sequential(
|
|
|
42 |
Conv2d(512, 512, kernel_size=1, stride=1, padding=0),)
|
43 |
|
44 |
#### load the pre-trained audio_encoder\
|
45 |
+
wav2lip_state_dict = torch.load(wav2lip_checkpoint)['state_dict']
|
46 |
state_dict = self.audio_encoder.state_dict()
|
47 |
|
48 |
for k,v in wav2lip_state_dict.items():
|
src/face3d/__pycache__/extract_kp_videos.cpython-38.pyc
CHANGED
Binary files a/src/face3d/__pycache__/extract_kp_videos.cpython-38.pyc and b/src/face3d/__pycache__/extract_kp_videos.cpython-38.pyc differ
|
|
src/face3d/extract_kp_videos.py
CHANGED
@@ -71,7 +71,7 @@ def read_video(filename):
|
|
71 |
def run(data):
|
72 |
filename, opt, device = data
|
73 |
os.environ['CUDA_VISIBLE_DEVICES'] = device
|
74 |
-
kp_extractor = KeypointExtractor(
|
75 |
images = read_video(filename)
|
76 |
name = filename.split('/')[-2:]
|
77 |
os.makedirs(os.path.join(opt.output_dir, name[-2]), exist_ok=True)
|
|
|
71 |
def run(data):
|
72 |
filename, opt, device = data
|
73 |
os.environ['CUDA_VISIBLE_DEVICES'] = device
|
74 |
+
kp_extractor = KeypointExtractor()
|
75 |
images = read_video(filename)
|
76 |
name = filename.split('/')[-2:]
|
77 |
os.makedirs(os.path.join(opt.output_dir, name[-2]), exist_ok=True)
|
src/face3d/models/__pycache__/__init__.cpython-38.pyc
CHANGED
Binary files a/src/face3d/models/__pycache__/__init__.cpython-38.pyc and b/src/face3d/models/__pycache__/__init__.cpython-38.pyc differ
|
|
src/face3d/models/__pycache__/base_model.cpython-38.pyc
CHANGED
Binary files a/src/face3d/models/__pycache__/base_model.cpython-38.pyc and b/src/face3d/models/__pycache__/base_model.cpython-38.pyc differ
|
|
src/face3d/models/__pycache__/networks.cpython-38.pyc
CHANGED
Binary files a/src/face3d/models/__pycache__/networks.cpython-38.pyc and b/src/face3d/models/__pycache__/networks.cpython-38.pyc differ
|
|
src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-38.pyc
CHANGED
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-38.pyc and b/src/face3d/models/arcface_torch/backbones/__pycache__/__init__.cpython-38.pyc differ
|
|
src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-38.pyc
CHANGED
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-38.pyc and b/src/face3d/models/arcface_torch/backbones/__pycache__/iresnet.cpython-38.pyc differ
|
|
src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-38.pyc
CHANGED
Binary files a/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-38.pyc and b/src/face3d/models/arcface_torch/backbones/__pycache__/mobilefacenet.cpython-38.pyc differ
|
|
src/face3d/util/__pycache__/__init__.cpython-38.pyc
CHANGED
Binary files a/src/face3d/util/__pycache__/__init__.cpython-38.pyc and b/src/face3d/util/__pycache__/__init__.cpython-38.pyc differ
|
|
src/face3d/util/__pycache__/load_mats.cpython-38.pyc
CHANGED
Binary files a/src/face3d/util/__pycache__/load_mats.cpython-38.pyc and b/src/face3d/util/__pycache__/load_mats.cpython-38.pyc differ
|
|
src/face3d/util/__pycache__/preprocess.cpython-38.pyc
CHANGED
Binary files a/src/face3d/util/__pycache__/preprocess.cpython-38.pyc and b/src/face3d/util/__pycache__/preprocess.cpython-38.pyc differ
|
|
src/facerender/__pycache__/animate.cpython-38.pyc
CHANGED
Binary files a/src/facerender/__pycache__/animate.cpython-38.pyc and b/src/facerender/__pycache__/animate.cpython-38.pyc differ
|
|
src/facerender/animate.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import os
|
|
|
2 |
import yaml
|
3 |
import numpy as np
|
4 |
import warnings
|
@@ -106,7 +107,7 @@ class AnimateFromCoeff():
|
|
106 |
|
107 |
return checkpoint['epoch']
|
108 |
|
109 |
-
def generate(self, x, video_save_dir, enhancer=None):
|
110 |
|
111 |
source_image=x['source_image'].type(torch.FloatTensor)
|
112 |
source_semantics=x['source_semantics'].type(torch.FloatTensor)
|
@@ -137,6 +138,10 @@ class AnimateFromCoeff():
|
|
137 |
video.append(image)
|
138 |
result = img_as_ubyte(video)
|
139 |
|
|
|
|
|
|
|
|
|
140 |
video_name = x['video_name'] + '.mp4'
|
141 |
path = os.path.join(video_save_dir, 'temp_'+video_name)
|
142 |
imageio.mimsave(path, result, fps=float(25))
|
@@ -146,6 +151,10 @@ class AnimateFromCoeff():
|
|
146 |
av_path_enhancer = os.path.join(video_save_dir, video_name_enhancer)
|
147 |
enhanced_path = os.path.join(video_save_dir, 'temp_'+video_name_enhancer)
|
148 |
enhanced_images = face_enhancer(result, method=enhancer)
|
|
|
|
|
|
|
|
|
149 |
imageio.mimsave(enhanced_path, enhanced_images, fps=float(25))
|
150 |
|
151 |
av_path = os.path.join(video_save_dir, video_name)
|
|
|
1 |
import os
|
2 |
+
import cv2
|
3 |
import yaml
|
4 |
import numpy as np
|
5 |
import warnings
|
|
|
107 |
|
108 |
return checkpoint['epoch']
|
109 |
|
110 |
+
def generate(self, x, video_save_dir, enhancer=None, original_size=None):
|
111 |
|
112 |
source_image=x['source_image'].type(torch.FloatTensor)
|
113 |
source_semantics=x['source_semantics'].type(torch.FloatTensor)
|
|
|
138 |
video.append(image)
|
139 |
result = img_as_ubyte(video)
|
140 |
|
141 |
+
### the generated video is 256x256, so we keep the aspect ratio,
|
142 |
+
if original_size:
|
143 |
+
result = [ cv2.resize(result_i,(256, int(256.0 * original_size[1]/original_size[0]) )) for result_i in result ]
|
144 |
+
|
145 |
video_name = x['video_name'] + '.mp4'
|
146 |
path = os.path.join(video_save_dir, 'temp_'+video_name)
|
147 |
imageio.mimsave(path, result, fps=float(25))
|
|
|
151 |
av_path_enhancer = os.path.join(video_save_dir, video_name_enhancer)
|
152 |
enhanced_path = os.path.join(video_save_dir, 'temp_'+video_name_enhancer)
|
153 |
enhanced_images = face_enhancer(result, method=enhancer)
|
154 |
+
|
155 |
+
if original_size:
|
156 |
+
enhanced_images = [ cv2.resize(result_i,(256, int(256.0 * original_size[1]/original_size[0]) )) for result_i in enhanced_images ]
|
157 |
+
|
158 |
imageio.mimsave(enhanced_path, enhanced_images, fps=float(25))
|
159 |
|
160 |
av_path = os.path.join(video_save_dir, video_name)
|
src/facerender/modules/__pycache__/dense_motion.cpython-38.pyc
CHANGED
Binary files a/src/facerender/modules/__pycache__/dense_motion.cpython-38.pyc and b/src/facerender/modules/__pycache__/dense_motion.cpython-38.pyc differ
|
|
src/facerender/modules/__pycache__/generator.cpython-38.pyc
CHANGED
Binary files a/src/facerender/modules/__pycache__/generator.cpython-38.pyc and b/src/facerender/modules/__pycache__/generator.cpython-38.pyc differ
|
|
src/facerender/modules/__pycache__/keypoint_detector.cpython-38.pyc
CHANGED
Binary files a/src/facerender/modules/__pycache__/keypoint_detector.cpython-38.pyc and b/src/facerender/modules/__pycache__/keypoint_detector.cpython-38.pyc differ
|
|
src/facerender/modules/__pycache__/make_animation.cpython-38.pyc
CHANGED
Binary files a/src/facerender/modules/__pycache__/make_animation.cpython-38.pyc and b/src/facerender/modules/__pycache__/make_animation.cpython-38.pyc differ
|
|
src/facerender/modules/__pycache__/mapping.cpython-38.pyc
CHANGED
Binary files a/src/facerender/modules/__pycache__/mapping.cpython-38.pyc and b/src/facerender/modules/__pycache__/mapping.cpython-38.pyc differ
|
|
src/facerender/modules/__pycache__/util.cpython-38.pyc
CHANGED
Binary files a/src/facerender/modules/__pycache__/util.cpython-38.pyc and b/src/facerender/modules/__pycache__/util.cpython-38.pyc differ
|
|
src/facerender/sync_batchnorm/__pycache__/__init__.cpython-38.pyc
CHANGED
Binary files a/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-38.pyc and b/src/facerender/sync_batchnorm/__pycache__/__init__.cpython-38.pyc differ
|
|
src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-38.pyc
CHANGED
Binary files a/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-38.pyc and b/src/facerender/sync_batchnorm/__pycache__/batchnorm.cpython-38.pyc differ
|
|
src/facerender/sync_batchnorm/__pycache__/comm.cpython-38.pyc
CHANGED
Binary files a/src/facerender/sync_batchnorm/__pycache__/comm.cpython-38.pyc and b/src/facerender/sync_batchnorm/__pycache__/comm.cpython-38.pyc differ
|
|
src/facerender/sync_batchnorm/__pycache__/replicate.cpython-38.pyc
CHANGED
Binary files a/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-38.pyc and b/src/facerender/sync_batchnorm/__pycache__/replicate.cpython-38.pyc differ
|
|
src/generate_batch.py
CHANGED
@@ -1,18 +1,11 @@
|
|
1 |
import os
|
|
|
|
|
2 |
import torch
|
3 |
import numpy as np
|
4 |
import random
|
5 |
import scipy.io as scio
|
6 |
import src.utils.audio as audio
|
7 |
-
import subprocess, platform
|
8 |
-
|
9 |
-
from pydub import AudioSegment
|
10 |
-
|
11 |
-
def mp3_to_wav(mp3_filename,wav_filename,frame_rate):
|
12 |
-
mp3_file = AudioSegment.from_mp3(file=mp3_filename)
|
13 |
-
mp3_file.set_frame_rate(frame_rate).export(wav_filename,format="wav")
|
14 |
-
|
15 |
-
|
16 |
|
17 |
def crop_pad_audio(wav, audio_length):
|
18 |
if len(wav) > audio_length:
|
@@ -33,7 +26,6 @@ def generate_blink_seq(num_frames):
|
|
33 |
ratio = np.zeros((num_frames,1))
|
34 |
frame_id = 0
|
35 |
while frame_id in range(num_frames):
|
36 |
-
#start = random.choice(range(60,70))
|
37 |
start = 80
|
38 |
if frame_id+start+9<=num_frames - 1:
|
39 |
ratio[frame_id+start:frame_id+start+9, 0] = [0.5,0.6,0.7,0.9,1, 0.9, 0.7,0.6,0.5]
|
@@ -48,7 +40,6 @@ def generate_blink_seq_randomly(num_frames):
|
|
48 |
return ratio
|
49 |
frame_id = 0
|
50 |
while frame_id in range(num_frames):
|
51 |
-
#start = random.choice(range(60,70))
|
52 |
start = random.choice(range(min(10,num_frames), min(int(num_frames/2), 70)))
|
53 |
if frame_id+start+5<=num_frames - 1:
|
54 |
ratio[frame_id+start:frame_id+start+5, 0] = [0.5, 0.9, 1.0, 0.9, 0.5]
|
@@ -60,8 +51,6 @@ def generate_blink_seq_randomly(num_frames):
|
|
60 |
def get_data(first_coeff_path, audio_path, device):
|
61 |
|
62 |
syncnet_mel_step_size = 16
|
63 |
-
syncnet_T = 5
|
64 |
-
MAX_FRAME = 32
|
65 |
fps = 25
|
66 |
|
67 |
pic_name = os.path.splitext(os.path.split(first_coeff_path)[-1])[0]
|
@@ -71,23 +60,14 @@ def get_data(first_coeff_path, audio_path, device):
|
|
71 |
source_semantics_dict = scio.loadmat(source_semantics_path)
|
72 |
ref_coeff = source_semantics_dict['coeff_3dmm'][:1,:70] #1 70
|
73 |
|
74 |
-
|
75 |
-
if '.mp3' in audio_path:
|
76 |
-
print(audio_path)
|
77 |
-
mp3_to_wav(audio_path, audio_path.replace('.mp3','.wav'), 16000)
|
78 |
-
new_audio = audio_path.replace('.mp3','.wav')
|
79 |
-
else:
|
80 |
-
new_audio = audio_path
|
81 |
-
|
82 |
-
wav = audio.load_wav(new_audio, 16000)
|
83 |
-
|
84 |
wav_length, num_frames = parse_audio_length(len(wav), 16000, 25)
|
85 |
wav = crop_pad_audio(wav, wav_length)
|
86 |
orig_mel = audio.melspectrogram(wav).T
|
87 |
spec = orig_mel.copy() # nframes 80
|
88 |
indiv_mels = []
|
89 |
|
90 |
-
for i in range(num_frames):
|
91 |
start_frame_num = i-2
|
92 |
start_idx = int(80. * (start_frame_num / float(fps)))
|
93 |
end_idx = start_idx + syncnet_mel_step_size
|
@@ -97,7 +77,6 @@ def get_data(first_coeff_path, audio_path, device):
|
|
97 |
indiv_mels.append(m.T)
|
98 |
indiv_mels = np.asarray(indiv_mels) # T 80 16
|
99 |
ratio = generate_blink_seq_randomly(num_frames) # T
|
100 |
-
|
101 |
|
102 |
indiv_mels = torch.FloatTensor(indiv_mels).unsqueeze(1).unsqueeze(0) # bs T 1 80 16
|
103 |
ratio = torch.FloatTensor(ratio).unsqueeze(0) # bs T
|
|
|
1 |
import os
|
2 |
+
|
3 |
+
from tqdm import tqdm
|
4 |
import torch
|
5 |
import numpy as np
|
6 |
import random
|
7 |
import scipy.io as scio
|
8 |
import src.utils.audio as audio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
def crop_pad_audio(wav, audio_length):
|
11 |
if len(wav) > audio_length:
|
|
|
26 |
ratio = np.zeros((num_frames,1))
|
27 |
frame_id = 0
|
28 |
while frame_id in range(num_frames):
|
|
|
29 |
start = 80
|
30 |
if frame_id+start+9<=num_frames - 1:
|
31 |
ratio[frame_id+start:frame_id+start+9, 0] = [0.5,0.6,0.7,0.9,1, 0.9, 0.7,0.6,0.5]
|
|
|
40 |
return ratio
|
41 |
frame_id = 0
|
42 |
while frame_id in range(num_frames):
|
|
|
43 |
start = random.choice(range(min(10,num_frames), min(int(num_frames/2), 70)))
|
44 |
if frame_id+start+5<=num_frames - 1:
|
45 |
ratio[frame_id+start:frame_id+start+5, 0] = [0.5, 0.9, 1.0, 0.9, 0.5]
|
|
|
51 |
def get_data(first_coeff_path, audio_path, device):
|
52 |
|
53 |
syncnet_mel_step_size = 16
|
|
|
|
|
54 |
fps = 25
|
55 |
|
56 |
pic_name = os.path.splitext(os.path.split(first_coeff_path)[-1])[0]
|
|
|
60 |
source_semantics_dict = scio.loadmat(source_semantics_path)
|
61 |
ref_coeff = source_semantics_dict['coeff_3dmm'][:1,:70] #1 70
|
62 |
|
63 |
+
wav = audio.load_wav(audio_path, 16000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
wav_length, num_frames = parse_audio_length(len(wav), 16000, 25)
|
65 |
wav = crop_pad_audio(wav, wav_length)
|
66 |
orig_mel = audio.melspectrogram(wav).T
|
67 |
spec = orig_mel.copy() # nframes 80
|
68 |
indiv_mels = []
|
69 |
|
70 |
+
for i in tqdm(range(num_frames), 'mel:'):
|
71 |
start_frame_num = i-2
|
72 |
start_idx = int(80. * (start_frame_num / float(fps)))
|
73 |
end_idx = start_idx + syncnet_mel_step_size
|
|
|
77 |
indiv_mels.append(m.T)
|
78 |
indiv_mels = np.asarray(indiv_mels) # T 80 16
|
79 |
ratio = generate_blink_seq_randomly(num_frames) # T
|
|
|
80 |
|
81 |
indiv_mels = torch.FloatTensor(indiv_mels).unsqueeze(1).unsqueeze(0) # bs T 1 80 16
|
82 |
ratio = torch.FloatTensor(ratio).unsqueeze(0) # bs T
|
src/gradio_demo.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch, uuid
|
2 |
+
from time import gmtime, strftime
|
3 |
+
import os, sys, shutil
|
4 |
+
from src.utils.preprocess import CropAndExtract
|
5 |
+
from src.test_audio2coeff import Audio2Coeff
|
6 |
+
from src.facerender.animate import AnimateFromCoeff
|
7 |
+
from src.generate_batch import get_data
|
8 |
+
from src.generate_facerender_batch import get_facerender_data
|
9 |
+
from src.utils.text2speech import text2speech
|
10 |
+
|
11 |
+
from pydub import AudioSegment
|
12 |
+
|
13 |
+
def mp3_to_wav(mp3_filename,wav_filename,frame_rate):
|
14 |
+
mp3_file = AudioSegment.from_file(file=mp3_filename)
|
15 |
+
mp3_file.set_frame_rate(frame_rate).export(wav_filename,format="wav")
|
16 |
+
|
17 |
+
|
18 |
+
class SadTalker():
|
19 |
+
|
20 |
+
def __init__(self, checkpoint_path='checkpoints', config_path='src/config'):
|
21 |
+
|
22 |
+
if torch.cuda.is_available() :
|
23 |
+
device = "cuda"
|
24 |
+
else:
|
25 |
+
device = "cpu"
|
26 |
+
|
27 |
+
os.environ['TORCH_HOME']= checkpoint_path
|
28 |
+
|
29 |
+
path_of_lm_croper = os.path.join( checkpoint_path, 'shape_predictor_68_face_landmarks.dat')
|
30 |
+
path_of_net_recon_model = os.path.join( checkpoint_path, 'epoch_20.pth')
|
31 |
+
dir_of_BFM_fitting = os.path.join( checkpoint_path, 'BFM_Fitting')
|
32 |
+
wav2lip_checkpoint = os.path.join( checkpoint_path, 'wav2lip.pth')
|
33 |
+
|
34 |
+
audio2pose_checkpoint = os.path.join( checkpoint_path, 'auido2pose_00140-model.pth')
|
35 |
+
audio2pose_yaml_path = os.path.join( config_path, 'auido2pose.yaml')
|
36 |
+
|
37 |
+
audio2exp_checkpoint = os.path.join( checkpoint_path, 'auido2exp_00300-model.pth')
|
38 |
+
audio2exp_yaml_path = os.path.join( config_path, 'auido2exp.yaml')
|
39 |
+
|
40 |
+
free_view_checkpoint = os.path.join( checkpoint_path, 'facevid2vid_00189-model.pth.tar')
|
41 |
+
mapping_checkpoint = os.path.join( checkpoint_path, 'mapping_00229-model.pth.tar')
|
42 |
+
facerender_yaml_path = os.path.join( config_path, 'facerender.yaml')
|
43 |
+
|
44 |
+
#init model
|
45 |
+
print(path_of_lm_croper)
|
46 |
+
self.preprocess_model = CropAndExtract(path_of_lm_croper, path_of_net_recon_model, dir_of_BFM_fitting, device)
|
47 |
+
|
48 |
+
print(audio2pose_checkpoint)
|
49 |
+
self.audio_to_coeff = Audio2Coeff(audio2pose_checkpoint, audio2pose_yaml_path,
|
50 |
+
audio2exp_checkpoint, audio2exp_yaml_path, wav2lip_checkpoint, device)
|
51 |
+
print(free_view_checkpoint)
|
52 |
+
self.animate_from_coeff = AnimateFromCoeff(free_view_checkpoint, mapping_checkpoint,
|
53 |
+
facerender_yaml_path, device)
|
54 |
+
self.device = device
|
55 |
+
|
56 |
+
def test(self, source_image, driven_audio, still_mode, use_enhancer, result_dir='./'):
|
57 |
+
|
58 |
+
time_tag = str(uuid.uuid4())
|
59 |
+
save_dir = os.path.join(result_dir, time_tag)
|
60 |
+
os.makedirs(save_dir, exist_ok=True)
|
61 |
+
|
62 |
+
input_dir = os.path.join(save_dir, 'input')
|
63 |
+
os.makedirs(input_dir, exist_ok=True)
|
64 |
+
|
65 |
+
print(source_image)
|
66 |
+
pic_path = os.path.join(input_dir, os.path.basename(source_image))
|
67 |
+
shutil.move(source_image, input_dir)
|
68 |
+
|
69 |
+
if os.path.isfile(driven_audio):
|
70 |
+
audio_path = os.path.join(input_dir, os.path.basename(driven_audio))
|
71 |
+
|
72 |
+
#### mp3 to wav
|
73 |
+
if '.mp3' in audio_path:
|
74 |
+
mp3_to_wav(driven_audio, audio_path.replace('.mp3', '.wav'), 16000)
|
75 |
+
audio_path = audio_path.replace('.mp3', '.wav')
|
76 |
+
else:
|
77 |
+
shutil.move(driven_audio, input_dir)
|
78 |
+
else:
|
79 |
+
text2speech
|
80 |
+
|
81 |
+
|
82 |
+
os.makedirs(save_dir, exist_ok=True)
|
83 |
+
pose_style = 0
|
84 |
+
#crop image and extract 3dmm from image
|
85 |
+
first_frame_dir = os.path.join(save_dir, 'first_frame_dir')
|
86 |
+
os.makedirs(first_frame_dir, exist_ok=True)
|
87 |
+
first_coeff_path, crop_pic_path, original_size = self.preprocess_model.generate(pic_path, first_frame_dir)
|
88 |
+
|
89 |
+
if first_coeff_path is None:
|
90 |
+
raise AttributeError("No face is detected")
|
91 |
+
|
92 |
+
#audio2ceoff
|
93 |
+
batch = get_data(first_coeff_path, audio_path, self.device) # longer audio?
|
94 |
+
coeff_path = self.audio_to_coeff.generate(batch, save_dir, pose_style)
|
95 |
+
#coeff2video
|
96 |
+
batch_size = 4
|
97 |
+
data = get_facerender_data(coeff_path, crop_pic_path, first_coeff_path, audio_path, batch_size, still_mode=still_mode)
|
98 |
+
self.animate_from_coeff.generate(data, save_dir, enhancer='gfpgan' if use_enhancer else None, original_size=original_size)
|
99 |
+
video_name = data['video_name']
|
100 |
+
print(f'The generated video is named {video_name} in {save_dir}')
|
101 |
+
|
102 |
+
torch.cuda.empty_cache()
|
103 |
+
torch.cuda.synchronize()
|
104 |
+
import gc; gc.collect()
|
105 |
+
|
106 |
+
if use_enhancer:
|
107 |
+
return os.path.join(save_dir, video_name+'_enhanced.mp4'), os.path.join(save_dir, video_name+'_enhanced.mp4')
|
108 |
+
|
109 |
+
else:
|
110 |
+
return os.path.join(save_dir, video_name+'.mp4'), os.path.join(save_dir, video_name+'.mp4')
|
111 |
+
|
112 |
+
|
113 |
+
|
src/test_audio2coeff.py
CHANGED
@@ -81,7 +81,7 @@ class Audio2Coeff():
|
|
81 |
|
82 |
savemat(os.path.join(coeff_save_dir, '%s##%s.mat'%(batch['pic_name'], batch['audio_name'])),
|
83 |
{'coeff_3dmm': coeffs_pred_numpy})
|
84 |
-
|
85 |
return os.path.join(coeff_save_dir, '%s##%s.mat'%(batch['pic_name'], batch['audio_name']))
|
86 |
|
87 |
|
|
|
81 |
|
82 |
savemat(os.path.join(coeff_save_dir, '%s##%s.mat'%(batch['pic_name'], batch['audio_name'])),
|
83 |
{'coeff_3dmm': coeffs_pred_numpy})
|
84 |
+
|
85 |
return os.path.join(coeff_save_dir, '%s##%s.mat'%(batch['pic_name'], batch['audio_name']))
|
86 |
|
87 |
|
src/utils/__pycache__/audio.cpython-38.pyc
CHANGED
Binary files a/src/utils/__pycache__/audio.cpython-38.pyc and b/src/utils/__pycache__/audio.cpython-38.pyc differ
|
|
src/utils/__pycache__/croper.cpython-38.pyc
CHANGED
Binary files a/src/utils/__pycache__/croper.cpython-38.pyc and b/src/utils/__pycache__/croper.cpython-38.pyc differ
|
|
src/utils/__pycache__/face_enhancer.cpython-38.pyc
CHANGED
Binary files a/src/utils/__pycache__/face_enhancer.cpython-38.pyc and b/src/utils/__pycache__/face_enhancer.cpython-38.pyc differ
|
|
src/utils/__pycache__/hparams.cpython-38.pyc
CHANGED
Binary files a/src/utils/__pycache__/hparams.cpython-38.pyc and b/src/utils/__pycache__/hparams.cpython-38.pyc differ
|
|
src/utils/__pycache__/preprocess.cpython-38.pyc
CHANGED
Binary files a/src/utils/__pycache__/preprocess.cpython-38.pyc and b/src/utils/__pycache__/preprocess.cpython-38.pyc differ
|
|