lithiumice commited on
Commit
f68fadb
1 Parent(s): a31c0b9
app.py CHANGED
@@ -4,6 +4,7 @@ import gradio as gr
4
  from src.gradio_demo import SadTalker
5
  from src.utils.text2speech import TTSTalker
6
  from huggingface_hub import snapshot_download
 
7
 
8
  def get_source_image(image):
9
  return image
@@ -18,6 +19,7 @@ def sadtalker_demo():
18
 
19
  sad_talker = SadTalker(lazy_load=True)
20
  tts_talker = TTSTalker()
 
21
 
22
  with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
23
  gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
@@ -38,16 +40,40 @@ def sadtalker_demo():
38
  with gr.Row():
39
  source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256,width=256)
40
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  with gr.Tabs(elem_id="sadtalker_driven_audio"):
42
- with gr.TabItem('Upload or Generating from TTS'):
43
  with gr.Column(variant='panel'):
44
- driven_audio = gr.Audio(label="Input audio(.wav/.mp3)", source="upload", type="filepath")
45
 
46
  with gr.Column(variant='panel'):
47
- input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="Alternatively, you can genreate the audio from text using @Coqui.ai TTS.")
48
  tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
49
- tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio])
 
50
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  with gr.Column(variant='panel'):
53
  with gr.Tabs(elem_id="sadtalker_checkbox"):
 
4
  from src.gradio_demo import SadTalker
5
  from src.utils.text2speech import TTSTalker
6
  from huggingface_hub import snapshot_download
7
+ from src.utils.text2speech import TTSTalkerPlayHT
8
 
9
  def get_source_image(image):
10
  return image
 
19
 
20
  sad_talker = SadTalker(lazy_load=True)
21
  tts_talker = TTSTalker()
22
+ tts_talker_ht = TTSTalkerPlayHT()
23
 
24
  with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
25
  gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
 
40
  with gr.Row():
41
  source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256,width=256)
42
 
43
+ # with gr.Tabs(elem_id="sadtalker_driven_audio"):
44
+ # with gr.TabItem('Upload or Generating from TTS'):
45
+ # with gr.Column(variant='panel'):
46
+ # driven_audio = gr.Audio(label="Input audio(.wav/.mp3)", source="upload", type="filepath")
47
+
48
+ # with gr.Column(variant='panel'):
49
+ # input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="Alternatively, you can genreate the audio from text using @Coqui.ai TTS.")
50
+ # tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
51
+ # tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio])
52
+
53
+
54
+ # ht TTS
55
  with gr.Tabs(elem_id="sadtalker_driven_audio"):
56
+ with gr.TabItem('Play.ht: Upload OR TTS'):
57
  with gr.Column(variant='panel'):
58
+ driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
59
 
60
  with gr.Column(variant='panel'):
61
+ input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.")
62
  tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
63
+ tts.click(fn=tts_talker_ht.test, inputs=[input_text], outputs=[driven_audio])
64
+
65
 
66
+ # origin TTS
67
+ with gr.Tabs(elem_id="sadtalker_driven_audio"):
68
+ with gr.TabItem('Origin: Upload OR TTS'):
69
+ with gr.Column(variant='panel'):
70
+ driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
71
+
72
+ with gr.Column(variant='panel'):
73
+ input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.")
74
+ tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
75
+ tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio])
76
+
77
 
78
  with gr.Column(variant='panel'):
79
  with gr.Tabs(elem_id="sadtalker_checkbox"):
checkpoints/BFM_Fitting/01_MorphableModel.mat DELETED
@@ -1 +0,0 @@
1
- ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/37b1f0742db356a3b1568a8365a06f5b0fe0ab687ac1c3068c803666cbd4d8e2
 
 
checkpoints/BFM_Fitting/BFM09_model_info.mat DELETED
@@ -1 +0,0 @@
1
- ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/db8d00544f0b0182f1b8430a3bb87662b3ff674eb33c84e6f52dbe2971adb81b
 
 
checkpoints/BFM_Fitting/BFM_exp_idx.mat DELETED
@@ -1 +0,0 @@
1
- ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/1146e4e9c3bef303a497383aa7974c014fe945c7
 
 
checkpoints/BFM_Fitting/BFM_front_idx.mat DELETED
@@ -1 +0,0 @@
1
- ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/b9d7b0953dd1dc5b1e28144610485409ac321f9b
 
 
checkpoints/BFM_Fitting/Exp_Pca.bin DELETED
@@ -1 +0,0 @@
1
- ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/e7f31380e6cbdaf2aeec698db220bac4f221946e4d551d88c092d47ec49b1726
 
 
checkpoints/BFM_Fitting/facemodel_info.mat DELETED
@@ -1 +0,0 @@
1
- ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/3e516ec7297fa3248098f49ecea10579f4831c0a
 
 
checkpoints/BFM_Fitting/select_vertex_id.mat DELETED
@@ -1 +0,0 @@
1
- ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/5b8b220093d93b133acc94ffed159f31a74854cd
 
 
checkpoints/BFM_Fitting/similarity_Lm3D_all.mat DELETED
@@ -1 +0,0 @@
1
- ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/a0e23588302bc71fc899eef53ff06df5f4df4c1d
 
 
checkpoints/BFM_Fitting/std_exp.txt DELETED
@@ -1 +0,0 @@
1
- ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/767b8de4ea1ca78b6f22b98ff2dee4fa345500bb
 
 
checkpoints/auido2exp_00300-model.pth DELETED
@@ -1 +0,0 @@
1
- ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/b7608f0e6b477e50e03ca569ac5b04a841b9217f89d502862fc78fda4e46dec4
 
 
checkpoints/auido2pose_00140-model.pth DELETED
@@ -1 +0,0 @@
1
- ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/4fba6701852dc57efbed25b1e4276e4ff752941860d69fc4429f08a02326ebce
 
 
checkpoints/epoch_20.pth DELETED
@@ -1 +0,0 @@
1
- ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/6d17a6b23457b521801baae583cb6a58f7238fe6721fc3d65d76407460e9149b
 
 
checkpoints/facevid2vid_00189-model.pth.tar DELETED
@@ -1 +0,0 @@
1
- ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/fbad01d46f0510276dc4521322dde6824a873a4222cd0740c85762e7067ea71d
 
 
checkpoints/hub/checkpoints/2DFAN4-cd938726ad.zip DELETED
@@ -1 +0,0 @@
1
- ../../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/cd938726adb1f15f361263cce2db9cb820c42585fa8796ec72ce19107f369a46
 
 
checkpoints/hub/checkpoints/s3fd-619a316812.pth DELETED
@@ -1 +0,0 @@
1
- ../../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/619a31681264d3f7f7fc7a16a42cbbe8b23f31a256f75a366e5a1bcd59b33543
 
 
checkpoints/mapping_00229-model.pth.tar DELETED
@@ -1 +0,0 @@
1
- ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/62a1e06006cc963220f6477438518ed86e9788226c62ae382ddc42fbcefb83f1
 
 
checkpoints/shape_predictor_68_face_landmarks.dat DELETED
@@ -1 +0,0 @@
1
- ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f
 
 
checkpoints/wav2lip.pth DELETED
@@ -1 +0,0 @@
1
- ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/b78b681b68ad9fe6c6fb1debc6ff43ad05834a8af8a62ffc4167b7b34ef63c37
 
 
req.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ torchvision
3
+ torchaudio
4
+ numpy
5
+ face_alignment
6
+ imageio
7
+ imageio-ffmpeg
8
+ librosa
9
+ numba
10
+ resampy
11
+ pydub
12
+ scipy
13
+ kornia
14
+ tqdm
15
+ yacs
16
+ pyyaml
17
+ joblib
18
+ scikit-image
19
+ basicsr
20
+ facexlib
21
+ dlib-bin
22
+ gfpgan
23
+ TTS
run.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ python app.py
src/gradio_demo.py CHANGED
@@ -127,8 +127,9 @@ class SadTalker():
127
  del self.audio_to_coeff
128
  del self.animate_from_coeff
129
 
130
- torch.cuda.empty_cache()
131
- torch.cuda.synchronize()
 
132
  import gc; gc.collect()
133
 
134
  return return_path
 
127
  del self.audio_to_coeff
128
  del self.animate_from_coeff
129
 
130
+ if torch.cuda.is_available() :
131
+ torch.cuda.empty_cache()
132
+ torch.cuda.synchronize()
133
  import gc; gc.collect()
134
 
135
  return return_path
src/utils/text2speech.py CHANGED
@@ -18,4 +18,117 @@ class TTSTalker():
18
 
19
  self.tts.tts_to_file(text, speaker=self.tts.speakers[0], language=language, file_path=tempf.name)
20
 
21
- return tempf.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  self.tts.tts_to_file(text, speaker=self.tts.speakers[0], language=language, file_path=tempf.name)
20
 
21
+ return tempf.name
22
+
23
+ import urllib.request
24
+ import tempfile
25
+ import requests
26
+ import json
27
+ import time
28
+
29
+
30
+ class TTSTalkerPlayHT():
31
+ def __init__(self) -> None:
32
+
33
+ if 0:
34
+ from easydict import EasyDict
35
+ self = EasyDict()
36
+ text = 'hello world'
37
+
38
+ self.url = "https://play.ht/api/v1"
39
+ self.headers = {
40
+ 'Authorization': 'f35fc9d7ce0549a88f6cdc15ec860b6e',
41
+ 'X-User-ID': '96tPb0H2cXbobV9u8iLVGyJPUPc2',
42
+ 'Content-Type': 'application/json'
43
+ }
44
+
45
+ def test(self, text, language='en', **kwargs):
46
+ payload = json.dumps({
47
+ "title": "Testing public api convertion",
48
+ "voice": "en-US-MichelleNeural",
49
+ "content": [text],
50
+ })
51
+ get_url = self.url+f'/convert'
52
+ response = requests.request(
53
+ "POST",
54
+ get_url,
55
+ headers=self.headers,
56
+ data=payload)
57
+
58
+ if response.status_code == 404:
59
+ print('404')
60
+ return
61
+
62
+ # transcriptionId 如果成功是马上返回的
63
+ data = json.loads(response.text)
64
+ transcriptionId = data['transcriptionId']
65
+
66
+
67
+
68
+ s_time = time.time()
69
+ while time.time() - s_time < 10:
70
+
71
+ if 0:
72
+ get_url = self.url+f'/articleStatus?transcriptionId={transcriptionId}'
73
+ response = requests.get(
74
+ get_url,
75
+ headers=self.headers,
76
+ )
77
+ else:
78
+ get_url = self.url+f'/articleStatus'
79
+ response = requests.get(
80
+ get_url,
81
+ params={
82
+ 'transcriptionId': transcriptionId
83
+ },
84
+ headers=self.headers,
85
+ )
86
+
87
+ if response.status_code == 404:
88
+ print(response.text)
89
+ print('404')
90
+ return
91
+
92
+ # articleStatus返回的不一定马上就有audioUrl
93
+ data = json.loads(response.text)
94
+ converted = data['converted']
95
+ if converted != True:
96
+ time.sleep(0.5)
97
+ continue
98
+
99
+ # articleStatus 表示转换完成
100
+ audioUrl = data['audioUrl']
101
+
102
+ tempf = tempfile.NamedTemporaryFile(
103
+ delete = False,
104
+ suffix = ('.'+'mp3'),
105
+ )
106
+
107
+
108
+ def download_dropbox_url(url, filepath, chunk_size=1024):
109
+
110
+ import requests
111
+ headers = {'user-agent': 'Wget/1.16 (linux-gnu)'}
112
+ r = requests.get(url, stream=True, headers=headers)
113
+ with open(filepath, 'wb') as f:
114
+ for chunk in r.iter_content(chunk_size=chunk_size):
115
+ if chunk:
116
+ f.write(chunk)
117
+ return filepath
118
+
119
+
120
+ download_dropbox_url(audioUrl, tempf.name)
121
+
122
+ # urllib.request.urlretrieve(audioUrl, tempf.name)
123
+
124
+ # response = requests.get(audioUrl)
125
+ # with open(tempf.name, "wb") as f:
126
+ # f.write(response.content)
127
+
128
+ # import subprocess
129
+ # cmd = f'wget -O {tempf.name} {audioUrl}'
130
+ # # ['wget', audioUrl, '-O', tempf.name]
131
+ # subprocess.call(cmd)
132
+
133
+ return tempf.name
134
+