SadTalker

Build error

App Files Files Community

lithiumice commited on Apr 15, 2023

Commit

f68fadb

1 Parent(s): a31c0b9

add ht

Browse files

Files changed (23) hide show

app.py +30 -4
checkpoints/BFM_Fitting/01_MorphableModel.mat +0 -1
checkpoints/BFM_Fitting/BFM09_model_info.mat +0 -1
checkpoints/BFM_Fitting/BFM_exp_idx.mat +0 -1
checkpoints/BFM_Fitting/BFM_front_idx.mat +0 -1
checkpoints/BFM_Fitting/Exp_Pca.bin +0 -1
checkpoints/BFM_Fitting/facemodel_info.mat +0 -1
checkpoints/BFM_Fitting/select_vertex_id.mat +0 -1
checkpoints/BFM_Fitting/similarity_Lm3D_all.mat +0 -1
checkpoints/BFM_Fitting/std_exp.txt +0 -1
checkpoints/auido2exp_00300-model.pth +0 -1
checkpoints/auido2pose_00140-model.pth +0 -1
checkpoints/epoch_20.pth +0 -1
checkpoints/facevid2vid_00189-model.pth.tar +0 -1
checkpoints/hub/checkpoints/2DFAN4-cd938726ad.zip +0 -1
checkpoints/hub/checkpoints/s3fd-619a316812.pth +0 -1
checkpoints/mapping_00229-model.pth.tar +0 -1
checkpoints/shape_predictor_68_face_landmarks.dat +0 -1
checkpoints/wav2lip.pth +0 -1
req.txt +23 -0
run.sh +1 -0
src/gradio_demo.py +3 -2
src/utils/text2speech.py +114 -1

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import gradio as gr
 from src.gradio_demo import SadTalker
 from src.utils.text2speech import TTSTalker
 from huggingface_hub import snapshot_download
 def get_source_image(image):
         return image
@@ -18,6 +19,7 @@ def sadtalker_demo():
     sad_talker = SadTalker(lazy_load=True)
     tts_talker = TTSTalker()
     with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
         gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
@@ -38,16 +40,40 @@ def sadtalker_demo():
                         with gr.Row():
                             source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256,width=256)
                 with gr.Tabs(elem_id="sadtalker_driven_audio"):
-                    with gr.TabItem('Upload or Generating from TTS'):
                         with gr.Column(variant='panel'):
-                            driven_audio = gr.Audio(label="Input audio(.wav/.mp3)", source="upload", type="filepath")
                         with gr.Column(variant='panel'):
-                            input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="Alternatively, you can genreate the audio from text using @Coqui.ai TTS.")
                             tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
-                            tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio])
             with gr.Column(variant='panel'):
                 with gr.Tabs(elem_id="sadtalker_checkbox"):

 from src.gradio_demo import SadTalker
 from src.utils.text2speech import TTSTalker
 from huggingface_hub import snapshot_download
+from src.utils.text2speech import TTSTalkerPlayHT
 def get_source_image(image):
         return image
     sad_talker = SadTalker(lazy_load=True)
     tts_talker = TTSTalker()
+    tts_talker_ht = TTSTalkerPlayHT()
     with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
         gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
                         with gr.Row():
                             source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256,width=256)
+                # with gr.Tabs(elem_id="sadtalker_driven_audio"):
+                #     with gr.TabItem('Upload or Generating from TTS'):
+                #         with gr.Column(variant='panel'):
+                #             driven_audio = gr.Audio(label="Input audio(.wav/.mp3)", source="upload", type="filepath")
+                #         with gr.Column(variant='panel'):
+                #             input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="Alternatively, you can genreate the audio from text using @Coqui.ai TTS.")
+                #             tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
+                #             tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio])
+                # ht TTS
                 with gr.Tabs(elem_id="sadtalker_driven_audio"):
+                    with gr.TabItem('Play.ht: Upload OR TTS'):
                         with gr.Column(variant='panel'):
+                            driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
                         with gr.Column(variant='panel'):
+                            input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.")
                             tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
+                            tts.click(fn=tts_talker_ht.test, inputs=[input_text], outputs=[driven_audio])
+                # origin TTS
+                with gr.Tabs(elem_id="sadtalker_driven_audio"):
+                    with gr.TabItem('Origin: Upload OR TTS'):
+                        with gr.Column(variant='panel'):
+                            driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
+                        with gr.Column(variant='panel'):
+                            input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.")
+                            tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
+                            tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio])
             with gr.Column(variant='panel'):
                 with gr.Tabs(elem_id="sadtalker_checkbox"):

checkpoints/BFM_Fitting/01_MorphableModel.mat DELETED Viewed

	@@ -1 +0,0 @@
1	- ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/37b1f0742db356a3b1568a8365a06f5b0fe0ab687ac1c3068c803666cbd4d8e2

checkpoints/BFM_Fitting/BFM09_model_info.mat DELETED Viewed

	@@ -1 +0,0 @@
1	- ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/db8d00544f0b0182f1b8430a3bb87662b3ff674eb33c84e6f52dbe2971adb81b

checkpoints/BFM_Fitting/BFM_exp_idx.mat DELETED Viewed

	@@ -1 +0,0 @@
1	- ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/1146e4e9c3bef303a497383aa7974c014fe945c7

checkpoints/BFM_Fitting/BFM_front_idx.mat DELETED Viewed

	@@ -1 +0,0 @@
1	- ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/b9d7b0953dd1dc5b1e28144610485409ac321f9b

checkpoints/BFM_Fitting/Exp_Pca.bin DELETED Viewed

	@@ -1 +0,0 @@
1	- ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/e7f31380e6cbdaf2aeec698db220bac4f221946e4d551d88c092d47ec49b1726

checkpoints/BFM_Fitting/facemodel_info.mat DELETED Viewed

	@@ -1 +0,0 @@
1	- ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/3e516ec7297fa3248098f49ecea10579f4831c0a

checkpoints/BFM_Fitting/select_vertex_id.mat DELETED Viewed

	@@ -1 +0,0 @@
1	- ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/5b8b220093d93b133acc94ffed159f31a74854cd

checkpoints/BFM_Fitting/similarity_Lm3D_all.mat DELETED Viewed

	@@ -1 +0,0 @@
1	- ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/a0e23588302bc71fc899eef53ff06df5f4df4c1d

checkpoints/BFM_Fitting/std_exp.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- ../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/767b8de4ea1ca78b6f22b98ff2dee4fa345500bb

checkpoints/auido2exp_00300-model.pth DELETED Viewed

	@@ -1 +0,0 @@
1	- ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/b7608f0e6b477e50e03ca569ac5b04a841b9217f89d502862fc78fda4e46dec4

checkpoints/auido2pose_00140-model.pth DELETED Viewed

	@@ -1 +0,0 @@
1	- ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/4fba6701852dc57efbed25b1e4276e4ff752941860d69fc4429f08a02326ebce

checkpoints/epoch_20.pth DELETED Viewed

	@@ -1 +0,0 @@
1	- ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/6d17a6b23457b521801baae583cb6a58f7238fe6721fc3d65d76407460e9149b

checkpoints/facevid2vid_00189-model.pth.tar DELETED Viewed

	@@ -1 +0,0 @@
1	- ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/fbad01d46f0510276dc4521322dde6824a873a4222cd0740c85762e7067ea71d

checkpoints/hub/checkpoints/2DFAN4-cd938726ad.zip DELETED Viewed

	@@ -1 +0,0 @@
1	- ../../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/cd938726adb1f15f361263cce2db9cb820c42585fa8796ec72ce19107f369a46

checkpoints/hub/checkpoints/s3fd-619a316812.pth DELETED Viewed

	@@ -1 +0,0 @@
1	- ../../../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/619a31681264d3f7f7fc7a16a42cbbe8b23f31a256f75a366e5a1bcd59b33543

checkpoints/mapping_00229-model.pth.tar DELETED Viewed

	@@ -1 +0,0 @@
1	- ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/62a1e06006cc963220f6477438518ed86e9788226c62ae382ddc42fbcefb83f1

checkpoints/shape_predictor_68_face_landmarks.dat DELETED Viewed

	@@ -1 +0,0 @@
1	- ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f

checkpoints/wav2lip.pth DELETED Viewed

	@@ -1 +0,0 @@
1	- ../../../../root/.cache/huggingface/hub/models--vinthony--SadTalker/blobs/b78b681b68ad9fe6c6fb1debc6ff43ad05834a8af8a62ffc4167b7b34ef63c37

req.txt ADDED Viewed

	@@ -0,0 +1,23 @@

+torch
+torchvision
+torchaudio
+numpy
+face_alignment
+imageio
+imageio-ffmpeg
+librosa
+numba
+resampy
+pydub
+scipy
+kornia
+tqdm
+yacs
+pyyaml
+joblib
+scikit-image
+basicsr
+facexlib
+dlib-bin
+gfpgan
+TTS

run.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ python app.py

src/gradio_demo.py CHANGED Viewed

@@ -127,8 +127,9 @@ class SadTalker():
             del self.audio_to_coeff
             del self.animate_from_coeff
-        torch.cuda.empty_cache()
-        torch.cuda.synchronize()
         import gc; gc.collect()
         return return_path

             del self.audio_to_coeff
             del self.animate_from_coeff
+        if torch.cuda.is_available() :
+            torch.cuda.empty_cache()
+            torch.cuda.synchronize()
         import gc; gc.collect()
         return return_path

src/utils/text2speech.py CHANGED Viewed

@@ -18,4 +18,117 @@ class TTSTalker():
         self.tts.tts_to_file(text, speaker=self.tts.speakers[0], language=language, file_path=tempf.name)
-        return tempf.name

         self.tts.tts_to_file(text, speaker=self.tts.speakers[0], language=language, file_path=tempf.name)
+        return tempf.name
+import urllib.request
+import tempfile
+import requests
+import json
+import time
+class TTSTalkerPlayHT():
+    def __init__(self) -> None:
+        if 0:
+            from easydict import EasyDict
+            self = EasyDict()
+            text = 'hello world'
+        self.url = "https://play.ht/api/v1"
+        self.headers = {
+            'Authorization': 'f35fc9d7ce0549a88f6cdc15ec860b6e',
+            'X-User-ID': '96tPb0H2cXbobV9u8iLVGyJPUPc2',
+            'Content-Type': 'application/json'
+        }
+    def test(self, text, language='en', **kwargs):
+        payload = json.dumps({
+            "title": "Testing public api convertion",
+            "voice": "en-US-MichelleNeural",
+            "content": [text],
+        })
+        get_url = self.url+f'/convert'
+        response = requests.request(
+            "POST",
+            get_url,
+            headers=self.headers,
+            data=payload)
+        if response.status_code == 404:
+            print('404')
+            return
+        # transcriptionId 如果成功是马上返回的
+        data = json.loads(response.text)
+        transcriptionId = data['transcriptionId']
+        s_time = time.time()
+        while time.time() - s_time < 10:
+            if 0:
+                get_url = self.url+f'/articleStatus?transcriptionId={transcriptionId}'
+                response = requests.get(
+                    get_url,
+                    headers=self.headers,
+                )
+            else:
+                get_url = self.url+f'/articleStatus'
+                response = requests.get(
+                    get_url,
+                    params={
+                        'transcriptionId': transcriptionId
+                    },
+                    headers=self.headers,
+                )
+            if response.status_code == 404:
+                print(response.text)
+                print('404')
+                return
+            # articleStatus返回的不一定马上就有audioUrl
+            data = json.loads(response.text)
+            converted = data['converted']
+            if converted != True:
+                time.sleep(0.5)
+                continue
+        # articleStatus 表示转换完成
+        audioUrl = data['audioUrl']
+        tempf  = tempfile.NamedTemporaryFile(
+                delete = False,
+                suffix = ('.'+'mp3'),
+            )
+        def download_dropbox_url(url, filepath, chunk_size=1024):
+            import requests
+            headers = {'user-agent': 'Wget/1.16 (linux-gnu)'}
+            r = requests.get(url, stream=True, headers=headers)
+            with open(filepath, 'wb') as f:
+                for chunk in r.iter_content(chunk_size=chunk_size):
+                    if chunk:
+                        f.write(chunk)
+            return filepath
+        download_dropbox_url(audioUrl, tempf.name)
+        # urllib.request.urlretrieve(audioUrl, tempf.name)
+        # response = requests.get(audioUrl)
+        # with open(tempf.name, "wb") as f:
+        #     f.write(response.content)
+        # import subprocess
+        # cmd = f'wget -O {tempf.name} {audioUrl}'
+        # # ['wget', audioUrl, '-O', tempf.name]
+        # subprocess.call(cmd)
+        return tempf.name