EC2 Default User
commited on
Commit
•
7df64f6
1
Parent(s):
03cce66
Adding Text to Video Message
Browse files- .gitattributes +1 -0
- .gitignore +3 -0
- .ipynb_checkpoints/app-checkpoint.py +0 -20
- README.md +2 -1
- SE_checkpoint.pth.tar +3 -0
- app.py +63 -17
- best_model.pth.tar +3 -0
- best_model_latest.pth.tar +3 -0
- errormessage.wav +0 -0
- installation.py +45 -0
- scripts/install.sh +18 -0
- scripts/install_ffmpeg.sh +7 -0
- scripts/install_git-lfs.sh +4 -0
- speakers.json +0 -0
- utils/__init__.py +0 -0
- utils/default_models.py +56 -0
- utils/modules.py +242 -0
- utils/voice.py +120 -0
.gitattributes
CHANGED
@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
29 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
30 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
31 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
29 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
30 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
31 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
.ipynb_checkpoints
|
2 |
+
*/.ipynb_checkpoints/*
|
3 |
+
.ipynb_checkpoints*
|
.ipynb_checkpoints/app-checkpoint.py
DELETED
@@ -1,20 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import sys
|
3 |
-
import gradio as gr
|
4 |
-
|
5 |
-
os.system('git clone https://github.com/Rudrabha/Wav2Lip.git')
|
6 |
-
os.system('curl -o ./Wav2Lip/face_detection/detection/sfd/s3fd.pth https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth')
|
7 |
-
os.system('mv ./Wav2Lip/* .')
|
8 |
-
|
9 |
-
title = "Text2Lip"
|
10 |
-
description = "Wav2Lip With Text"
|
11 |
-
|
12 |
-
|
13 |
-
def inference(face, audio):
|
14 |
-
os.system("python inference.py --checkpoint_path ./wav2lip.pth --face {} --audio {}".format(face, audio))
|
15 |
-
|
16 |
-
return "./results/result_voice.mp4"
|
17 |
-
|
18 |
-
|
19 |
-
iface = gr.Interface(inference, inputs=[gr.inputs.Video(type="mp4", source="upload", label="Talking Face Video (in mp4 format)", optional=False), gr.inputs.Audio(source="upload", type="filepath", label="Audio", optional=False)], outputs=["video"], title=title, description=description, article=article, examples=[["./examples/w2l_test_f1.mp4", "./examples/w2l_test_a1.wav"]], enable_queue=True)
|
20 |
-
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
@@ -3,8 +3,9 @@ title: Text2Lip
|
|
3 |
emoji: 👀
|
4 |
colorFrom: pink
|
5 |
colorTo: indigo
|
|
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 3.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
|
|
3 |
emoji: 👀
|
4 |
colorFrom: pink
|
5 |
colorTo: indigo
|
6 |
+
python_version: 3.7.13
|
7 |
sdk: gradio
|
8 |
+
sdk_version: 3.0.4
|
9 |
app_file: app.py
|
10 |
pinned: false
|
11 |
---
|
SE_checkpoint.pth.tar
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f96efb20cbeeefd81fd8336d7f0155bf8902f82f9474e58ccb19d9e12345172
|
3 |
+
size 44610930
|
app.py
CHANGED
@@ -1,20 +1,66 @@
|
|
|
|
1 |
import os
|
2 |
import sys
|
3 |
-
|
4 |
-
|
5 |
-
os.system('
|
6 |
-
os.system('
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
import os
|
3 |
import sys
|
4 |
+
#Installation of libraries
|
5 |
+
EC2_INSTANCE = False
|
6 |
+
if EC2_INSTANCE : os.system('cd scripts && sh install.sh')
|
7 |
+
os.system('python installation.py')
|
8 |
+
TTS_PATH = "TTS/"
|
9 |
+
# add libraries into environment
|
10 |
+
sys.path.append(TTS_PATH) # set this if TTS is not installed globally
|
11 |
+
VOICE_PATH = "utils/"
|
12 |
+
# add libraries into environment
|
13 |
+
sys.path.append(VOICE_PATH) # set this if modules and voice are not installed globally
|
14 |
+
from utils.modules import *
|
15 |
+
from utils.voice import *
|
16 |
+
#Definition Web App in Gradio
|
17 |
+
text_to_say=gr.inputs.Textbox(label='What would you like the voice to say? (max. 2000 characters per request)')
|
18 |
+
url =gr.inputs.Textbox(label = "Enter the YouTube URL below:")
|
19 |
+
initial_time = gr.inputs.Textbox(label='Initial time of trim? (format: hh:mm:ss)')
|
20 |
+
final_time= gr.inputs.Textbox(label='Final time to trim? (format: hh:mm:ss)')
|
21 |
+
demo = gr.Interface(fn = video_generator,
|
22 |
+
inputs = [text_to_say,url,initial_time,final_time],
|
23 |
+
outputs = 'video',
|
24 |
+
verbose = True,
|
25 |
+
title = 'Video Speech Generator from Youtube Videos',
|
26 |
+
description = 'A simple application that replaces the original speech of the video by your text. Wait one minute to process.',
|
27 |
+
article =
|
28 |
+
'''<div>
|
29 |
+
<p style="text-align: center">
|
30 |
+
All you need to do is to paste the Youtube link and
|
31 |
+
set the initial time and final time of the real speach.
|
32 |
+
(The limit of the trim is 5 minutes and not larger than video length)
|
33 |
+
hit submit, then wait for compiling.
|
34 |
+
After that click on Play/Pause for listing to the video.
|
35 |
+
The video is saved in an mp4 format.
|
36 |
+
For more information visit <a href="https://ruslanmv.com/">ruslanmv.com</a>
|
37 |
+
</p>
|
38 |
+
</div>''',
|
39 |
|
40 |
+
examples = [['I am clonning your voice, Charles!. Machine intelligence is the last invention that humanity will ever need to make.',
|
41 |
+
"https://www.youtube.com/watch?v=xw5dvItD5zY",
|
42 |
+
"00:00:01","00:00:10"],
|
43 |
+
['I am clonning your voice, Jim Carrey!. Machine intelligence is the last invention that humanity will ever need to make.',
|
44 |
+
"https://www.youtube.com/watch?v=uIaY0l5qV0c",
|
45 |
+
"00:00:29", "00:01:05"],
|
46 |
+
['I am clonning your voice, Mark Zuckerberg!. Machine intelligence is the last invention that humanity will ever need to make.',
|
47 |
+
"https://www.youtube.com/watch?v=AYjDIFrY9rc",
|
48 |
+
"00:00:11", "00:00:44"],
|
49 |
+
['I am clonning your voice, Ronald Reagan!. Machine intelligence is the last invention that humanity will ever need to make.',
|
50 |
+
"https://www.youtube.com/watch?v=iuoRDY9c5SQ",
|
51 |
+
"00:01:03", "00:01:22"],
|
52 |
+
['I am clonning your voice, Elon Musk!. Machine intelligence is the last invention that humanity will ever need to make.',
|
53 |
+
"https://www.youtube.com/watch?v=IZ8JQ_1gytg",
|
54 |
+
"00:00:10", "00:00:43"],
|
55 |
+
['I am clonning your voice, Hitler!. Machine intelligence is the last invention that humanity will ever need to make.',
|
56 |
+
"https://www.youtube.com/watch?v=F08wrLyH5cs",
|
57 |
+
"00:00:15", "00:00:40"],
|
58 |
+
['I am clonning your voice, Alexandria!. Machine intelligence is the last invention that humanity will ever need to make.',
|
59 |
+
"https://www.youtube.com/watch?v=Eht6oIkzkew",
|
60 |
+
"00:00:02", "00:00:30"],
|
61 |
+
['I am clonning your voice, Deborah!. Machine intelligence is the last invention that humanity will ever need to make.',
|
62 |
+
"https://www.youtube.com/watch?v=qbq4_Swj0Gg",
|
63 |
+
"00:00:03", "00:0:44"],
|
64 |
+
]
|
65 |
+
)
|
66 |
+
demo.launch()
|
best_model.pth.tar
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:017bfd8907c80bb5857d65d0223f0e4e4b9d699ef52e2a853d9cc7eb7e308cf0
|
3 |
+
size 379957289
|
best_model_latest.pth.tar
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:017bfd8907c80bb5857d65d0223f0e4e4b9d699ef52e2a853d9cc7eb7e308cf0
|
3 |
+
size 379957289
|
errormessage.wav
ADDED
Binary file (889 kB). View file
|
|
installation.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Import the os module
|
2 |
+
import os
|
3 |
+
from utils.default_models import ensure_default_models
|
4 |
+
from pathlib import Path
|
5 |
+
Sagemaker = False
|
6 |
+
if Sagemaker :
|
7 |
+
env='source activate python3 && conda activate VideoMessage &&'
|
8 |
+
else:
|
9 |
+
env=''
|
10 |
+
## Step 1. Setup of the dependencies
|
11 |
+
is_first_time = True
|
12 |
+
|
13 |
+
#Install dependency
|
14 |
+
# Download pretrained model
|
15 |
+
|
16 |
+
# Get the current working directory
|
17 |
+
parent_dir = os.getcwd()
|
18 |
+
print(parent_dir)
|
19 |
+
if is_first_time:
|
20 |
+
# Directory
|
21 |
+
directory = "sample_data"
|
22 |
+
# Path
|
23 |
+
path = os.path.join(parent_dir, directory)
|
24 |
+
print(path)
|
25 |
+
try:
|
26 |
+
os.mkdir(path)
|
27 |
+
print("Directory '% s' created" % directory)
|
28 |
+
except Exception:
|
29 |
+
print("Directory '% s'was already created" % directory)
|
30 |
+
if is_first_time:
|
31 |
+
os.system('git clone https://github.com/Rudrabha/Wav2Lip')
|
32 |
+
os.system('cd Wav2Lip &&{} pip install -r requirements.txt'.format(env))
|
33 |
+
## Load the models one by one.
|
34 |
+
print("Preparing the models of Wav2Lip")
|
35 |
+
ensure_default_models(Path("Wav2Lip"))
|
36 |
+
os.system('git clone https://github.com/Edresson/Coqui-TTS -b multilingual-torchaudio-SE TTS')
|
37 |
+
os.system('{} pip install -q -e TTS/'.format(env))
|
38 |
+
os.system('{} pip install -q torchaudio==0.9.0'.format(env))
|
39 |
+
os.system('{} pip install -q youtube-dl'.format(env))
|
40 |
+
os.system('{} pip install ffmpeg-python'.format(env))
|
41 |
+
os.system('{} pip install gradio==3.0.4'.format(env))
|
42 |
+
os.system('{} pip install pytube==12.1.0'.format(env))
|
43 |
+
os.system('{} pip install torchaudio==0.9.0 TTS'.format(env))
|
44 |
+
os.system('{} pip install opencv-contrib-python-headless==4.1.2.30'.format(env))
|
45 |
+
print("Installation repositories DONE!!")
|
scripts/install.sh
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
source activate python3
|
3 |
+
# check prerequisites
|
4 |
+
command -v conda >/dev/null 2>&1 || { echo >&2 "conda not found. Please refer to the README and install Miniconda."; exit 1; }
|
5 |
+
command -v git >/dev/null 2>&1 || { echo >&2 "git not found. Please refer to the README and install Git."; exit 1; }
|
6 |
+
# Conda environment name
|
7 |
+
CONDA_ENV_NAME=VideoMessage
|
8 |
+
source $(conda info --base)/etc/profile.d/conda.sh
|
9 |
+
conda create -y -n $CONDA_ENV_NAME python=3.7.13
|
10 |
+
conda activate $CONDA_ENV_NAME
|
11 |
+
conda install -y ipykernel
|
12 |
+
python -m ipykernel install --user --name VideoMessage --display-name "Python 3 (VideoMessage)"
|
13 |
+
sh install_git-lfs.sh
|
14 |
+
sh install_ffmpeg.sh
|
15 |
+
|
16 |
+
|
17 |
+
|
18 |
+
|
scripts/install_ffmpeg.sh
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
cd /usr/local/bin
|
3 |
+
sudo mkdir ffmpeg && cd ffmpeg
|
4 |
+
sudo wget https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz
|
5 |
+
sudo tar -xf ffmpeg-release-amd64-static.tar.xz
|
6 |
+
sudo ln -s /usr/local/bin/ffmpeg/ffmpeg-5.1.1-amd64-static/ffmpeg /usr/bin/ffmpeg
|
7 |
+
sudo ln -s /usr/local/bin/ffmpeg/ffmpeg-5.1.1-amd64-static/ffprobe /usr/bin/ffprobe
|
scripts/install_git-lfs.sh
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | sudo bash
|
3 |
+
sudo yum install git-lfs -y
|
4 |
+
git lfs install
|
speakers.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
utils/__init__.py
ADDED
File without changes
|
utils/default_models.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import urllib.request
|
2 |
+
from pathlib import Path
|
3 |
+
from threading import Thread
|
4 |
+
from urllib.error import HTTPError
|
5 |
+
|
6 |
+
from tqdm import tqdm
|
7 |
+
#/Wav2Lip/checkpoints/wav2lip_gan.pth
|
8 |
+
#/Wav2Lip/face_detection/detection/sfd/s3fd.pth
|
9 |
+
default_models = {
|
10 |
+
"wav2lip_gan": ("https://drive.google.com/u/0/uc?id=1V8hobVlZJdp8dzI8qWaAlbhCrXdBiUET&export=download&confirm=t", 435801865,'checkpoints'),
|
11 |
+
"s3fd": ("https://drive.google.com/u/0/uc?id=1Y-mgxW8iq1pXUQicU_8ClNB85eQ1lk0o&export=download", 89843225,'face_detection/detection/sfd'),
|
12 |
+
|
13 |
+
}
|
14 |
+
|
15 |
+
|
16 |
+
class DownloadProgressBar(tqdm):
|
17 |
+
def update_to(self, b=1, bsize=1, tsize=None):
|
18 |
+
if tsize is not None:
|
19 |
+
self.total = tsize
|
20 |
+
self.update(b * bsize - self.n)
|
21 |
+
|
22 |
+
|
23 |
+
def download(url: str, target: Path, bar_pos=0):
|
24 |
+
# Ensure the directory exists
|
25 |
+
target.parent.mkdir(exist_ok=True, parents=True)
|
26 |
+
|
27 |
+
desc = f"Downloading {target.name}"
|
28 |
+
with DownloadProgressBar(unit="B", unit_scale=True, miniters=1, desc=desc, position=bar_pos, leave=False) as t:
|
29 |
+
try:
|
30 |
+
urllib.request.urlretrieve(url, filename=target, reporthook=t.update_to)
|
31 |
+
except HTTPError:
|
32 |
+
return
|
33 |
+
|
34 |
+
|
35 |
+
def ensure_default_models(models_dir: Path):
|
36 |
+
# Define download tasks
|
37 |
+
jobs = []
|
38 |
+
for model_name, (url, size,path_tobe) in default_models.items():
|
39 |
+
target_path = models_dir / path_tobe / f"{model_name}.pth"
|
40 |
+
print(target_path)
|
41 |
+
if target_path.exists():
|
42 |
+
if target_path.stat().st_size != size:
|
43 |
+
print(f"File {target_path} is not of expected size, redownloading...")
|
44 |
+
else:
|
45 |
+
continue
|
46 |
+
|
47 |
+
thread = Thread(target=download, args=(url, target_path, len(jobs)))
|
48 |
+
thread.start()
|
49 |
+
jobs.append((thread, target_path, size))
|
50 |
+
|
51 |
+
# Run and join threads
|
52 |
+
for thread, target_path, size in jobs:
|
53 |
+
thread.join()
|
54 |
+
|
55 |
+
assert target_path.exists() and target_path.stat().st_size == size, \
|
56 |
+
f"Download for {target_path.name} failed. You may download models manually instead.\n" \
|
utils/modules.py
ADDED
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Modules for the Video Messsage Generator From Youtube
|
2 |
+
|
3 |
+
from IPython.display import HTML, Audio
|
4 |
+
from base64 import b64decode
|
5 |
+
import numpy as np
|
6 |
+
from scipy.io.wavfile import read as wav_read
|
7 |
+
import io
|
8 |
+
import ffmpeg
|
9 |
+
from pytube import YouTube
|
10 |
+
import random
|
11 |
+
from subprocess import call
|
12 |
+
import os
|
13 |
+
from datetime import datetime
|
14 |
+
|
15 |
+
|
16 |
+
def time_between(t1, t2):
|
17 |
+
FMT = '%H:%M:%S'
|
18 |
+
t1 = datetime.strptime(t1, FMT)
|
19 |
+
t2 = datetime.strptime(t2, FMT)
|
20 |
+
delta = t2 - t1
|
21 |
+
return str(delta)
|
22 |
+
|
23 |
+
def download_video(url):
|
24 |
+
|
25 |
+
print("Downloading...")
|
26 |
+
local_file = (
|
27 |
+
YouTube(url)
|
28 |
+
.streams.filter(progressive=True, file_extension="mp4")
|
29 |
+
.first()
|
30 |
+
.download(filename="youtube{}.mp4".format(random.randint(0, 10000)))
|
31 |
+
)
|
32 |
+
print("Downloaded")
|
33 |
+
return local_file
|
34 |
+
# download(output_path=destination, filename="name.mp4")
|
35 |
+
|
36 |
+
|
37 |
+
def download_youtube(url):
|
38 |
+
#Select a Youtube Video
|
39 |
+
#find youtube video id
|
40 |
+
from urllib import parse as urlparse
|
41 |
+
url_data = urlparse.urlparse(url)
|
42 |
+
query = urlparse.parse_qs(url_data.query)
|
43 |
+
YOUTUBE_ID = query["v"][0]
|
44 |
+
url_download ="https://www.youtube.com/watch?v={}".format(YOUTUBE_ID)
|
45 |
+
# download the youtube with the given ID
|
46 |
+
os.system("{} youtube-dl -f mp4 --output youtube.mp4 '{}'".format(env,url_download))
|
47 |
+
return "youtube.mp4"
|
48 |
+
|
49 |
+
|
50 |
+
|
51 |
+
def cleanup():
|
52 |
+
import pathlib
|
53 |
+
import glob
|
54 |
+
types = ('*.mp4','*.mp3', '*.wav') # the tuple of file types
|
55 |
+
#Finding mp4 and wave files
|
56 |
+
junks = []
|
57 |
+
for files in types:
|
58 |
+
junks.extend(glob.glob(files))
|
59 |
+
try:
|
60 |
+
# Deleting those files
|
61 |
+
for junk in junks:
|
62 |
+
print("Deleting",junk)
|
63 |
+
# Setting the path for the file to delete
|
64 |
+
file = pathlib.Path(junk)
|
65 |
+
# Calling the unlink method on the path
|
66 |
+
file.unlink()
|
67 |
+
except Exception:
|
68 |
+
print("I cannot delete the file because it is being used by another process")
|
69 |
+
|
70 |
+
|
71 |
+
def clean_data():
|
72 |
+
# importing all necessary libraries
|
73 |
+
import sys, os
|
74 |
+
# initial directory
|
75 |
+
home_dir = os.getcwd()
|
76 |
+
# some non existing directory
|
77 |
+
fd = 'sample_data/'
|
78 |
+
# Join various path components
|
79 |
+
path_to_clean=os.path.join(home_dir,fd)
|
80 |
+
print("Path to clean:",path_to_clean)
|
81 |
+
# trying to insert to false directory
|
82 |
+
try:
|
83 |
+
os.chdir(path_to_clean)
|
84 |
+
print("Inside to clean", os.getcwd())
|
85 |
+
cleanup()
|
86 |
+
# Caching the exception
|
87 |
+
except:
|
88 |
+
print("Something wrong with specified\
|
89 |
+
directory. Exception- ", sys.exc_info())
|
90 |
+
# handling with finally
|
91 |
+
finally:
|
92 |
+
print("Restoring the path")
|
93 |
+
os.chdir(home_dir)
|
94 |
+
print("Current directory is-", os.getcwd())
|
95 |
+
|
96 |
+
def youtube_trim(url,start,end):
|
97 |
+
#cancel previous youtube
|
98 |
+
cleanup()
|
99 |
+
#download youtube
|
100 |
+
#download_youtube(url) # with youtube-dl (slow)
|
101 |
+
input_videos=download_video(url)
|
102 |
+
# Get the current working directory
|
103 |
+
parent_dir = os.getcwd()
|
104 |
+
# Trim the video (start, end) seconds
|
105 |
+
start = start
|
106 |
+
end = end
|
107 |
+
#Note: the trimmed video must have face on all frames
|
108 |
+
#interval = end - start
|
109 |
+
interval = time_between(start, end)
|
110 |
+
#trimmed_video= parent_dir+'/sample_data/input_vid{}.mp4'.format(random.randint(0, 10000))
|
111 |
+
#trimmed_audio= parent_dir+'/sample_data/input_audio{}.mp3'.format(random.randint(0, 10000))
|
112 |
+
trimmed_video= parent_dir+'/sample_data/input_video.mp4'
|
113 |
+
trimmed_audio= parent_dir+'/sample_data/input_audio.mp3'
|
114 |
+
#delete trimmed if already exits
|
115 |
+
clean_data()
|
116 |
+
#call(["rm","-f",trimmed_audio])
|
117 |
+
#call(["rm","-f",trimmed_video])
|
118 |
+
|
119 |
+
#!rm -f {trimmed_video}
|
120 |
+
# cut the video
|
121 |
+
call(["ffmpeg","-y","-i",input_videos,"-ss", start,"-t",interval,"-async","1",trimmed_video])
|
122 |
+
#!ffmpeg -y -i youtube.mp4 -ss {start} -t {interval} -async 1 {trimmed_video}
|
123 |
+
# cut the audio
|
124 |
+
call(["ffmpeg","-i",trimmed_video, "-q:a", "0", "-map","a",trimmed_audio])
|
125 |
+
#Preview trimmed video
|
126 |
+
#clear_output()
|
127 |
+
print("Trimmed Video+Audio")
|
128 |
+
return trimmed_video, trimmed_audio
|
129 |
+
|
130 |
+
def create_video(Text,Voicetoclone):
|
131 |
+
out_audio=greet(Text,Voicetoclone)
|
132 |
+
current_dir=os.getcwd()
|
133 |
+
clonned_audio = os.path.join(current_dir, out_audio)
|
134 |
+
|
135 |
+
#Start Crunching and Preview Output
|
136 |
+
#Note: Only change these, if you have to
|
137 |
+
pad_top = 0#@param {type:"integer"}
|
138 |
+
pad_bottom = 10#@param {type:"integer"}
|
139 |
+
pad_left = 0#@param {type:"integer"}
|
140 |
+
pad_right = 0#@param {type:"integer"}
|
141 |
+
rescaleFactor = 1#@param {type:"integer"}
|
142 |
+
nosmooth = False #@param {type:"boolean"}
|
143 |
+
|
144 |
+
out_name ="result_voice_{}.mp4".format(random.randint(0, 10000))
|
145 |
+
out_file="../"+out_name
|
146 |
+
|
147 |
+
if nosmooth == False:
|
148 |
+
os.system('{} cd Wav2Lip && python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face "../sample_data/input_video.mp4" --audio "../out/clonned_audio.wav" --outfile {} --pads {} {} {} {} --resize_factor {}'.format(env,out_file,pad_top ,pad_bottom ,pad_left ,pad_right ,rescaleFactor))
|
149 |
+
else:
|
150 |
+
os.system('{} cd Wav2Lip && python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face "../sample_data/input_video.mp4" --audio "../out/clonned_audio.wav" --outfile {} --pads {} {} {} {} --resize_factor {} --nosmooth'.format(env,out_file,pad_top ,pad_bottom ,pad_left ,pad_right ,rescaleFactor))
|
151 |
+
|
152 |
+
#clear_output()
|
153 |
+
print("Creation of Video done")
|
154 |
+
return out_name
|
155 |
+
|
156 |
+
|
157 |
+
def time_format_check(input1):
|
158 |
+
timeformat = "%H:%M:%S"
|
159 |
+
#input1 = input("At what time did sensor 1 actuate? ")
|
160 |
+
try:
|
161 |
+
validtime = datetime.strptime(input1, timeformat)
|
162 |
+
print("The time format is valid", input1)
|
163 |
+
#Do your logic with validtime, which is a valid format
|
164 |
+
return False
|
165 |
+
except ValueError:
|
166 |
+
print("The time {} has not valid format hh:mm:ss".format(input1))
|
167 |
+
return True
|
168 |
+
|
169 |
+
|
170 |
+
def to_seconds(datetime_obj):
|
171 |
+
from datetime import datetime
|
172 |
+
time =datetime_obj
|
173 |
+
date_time = datetime.strptime(time, "%H:%M:%S")
|
174 |
+
a_timedelta = date_time - datetime(1900, 1, 1)
|
175 |
+
seconds = a_timedelta.total_seconds()
|
176 |
+
return seconds
|
177 |
+
|
178 |
+
|
179 |
+
def validate_youtube(url):
|
180 |
+
#This creates a youtube objet
|
181 |
+
try:
|
182 |
+
yt = YouTube(url)
|
183 |
+
except Exception:
|
184 |
+
print("Hi there URL seems invalid")
|
185 |
+
return True, 0
|
186 |
+
#This will return the length of the video in sec as an int
|
187 |
+
video_length = yt.length
|
188 |
+
if video_length > 600:
|
189 |
+
print("Your video is larger than 10 minutes")
|
190 |
+
return True, video_length
|
191 |
+
else:
|
192 |
+
print("Your video is less than 10 minutes")
|
193 |
+
return False, video_length
|
194 |
+
|
195 |
+
|
196 |
+
def video_generator(text_to_say,url,initial_time,final_time):
|
197 |
+
print('Checking the url',url)
|
198 |
+
check1, video_length = validate_youtube(url)
|
199 |
+
if check1 is True: return "./demo/tryagain2.mp4"
|
200 |
+
check2 = validate_time(initial_time,final_time, video_length)
|
201 |
+
if check2 is True: return "./demo/tryagain0.mp4"
|
202 |
+
trimmed_video, trimmed_audio=youtube_trim(url,initial_time,final_time)
|
203 |
+
voicetoclone=trimmed_audio
|
204 |
+
print(voicetoclone)
|
205 |
+
outvideo=create_video(text_to_say,voicetoclone)
|
206 |
+
#Preview output video
|
207 |
+
print("Final Video Preview")
|
208 |
+
final_video= parent_dir+'/'+outvideo
|
209 |
+
print("DONE")
|
210 |
+
#showVideo(final_video)
|
211 |
+
return final_video
|
212 |
+
|
213 |
+
|
214 |
+
def validate_time(initial_time,final_time,video_length):
|
215 |
+
is_wrong1=time_format_check(initial_time)
|
216 |
+
is_wrong2=time_format_check(final_time)
|
217 |
+
#print(is_wrong1,is_wrong2)
|
218 |
+
if is_wrong1 is False and is_wrong2 is False:
|
219 |
+
delta=time_between(initial_time,final_time)
|
220 |
+
if len(str(delta)) > 8:
|
221 |
+
print("Final Time is Smaller than Initial Time: t1>t2")
|
222 |
+
is_wrong = True
|
223 |
+
return is_wrong
|
224 |
+
else:
|
225 |
+
print("OK")
|
226 |
+
is_wrong=False
|
227 |
+
if int(to_seconds(delta)) > 300 :
|
228 |
+
print("The trim is larger than 5 minutes")
|
229 |
+
is_wrong = True
|
230 |
+
return is_wrong
|
231 |
+
|
232 |
+
elif int(to_seconds(delta)) > video_length :
|
233 |
+
print("The trim is larger than video lenght")
|
234 |
+
is_wrong = True
|
235 |
+
return is_wrong
|
236 |
+
else:
|
237 |
+
return is_wrong
|
238 |
+
|
239 |
+
else:
|
240 |
+
print("Your time format is invalid")
|
241 |
+
is_wrong = True
|
242 |
+
return is_wrong
|
utils/voice.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
TTS_PATH = "TTS/"
|
5 |
+
# add libraries into environment
|
6 |
+
sys.path.append(TTS_PATH) # set this if TTS is not installed globally
|
7 |
+
import os
|
8 |
+
import string
|
9 |
+
import time
|
10 |
+
import argparse
|
11 |
+
import json
|
12 |
+
import numpy as np
|
13 |
+
import IPython
|
14 |
+
from IPython.display import Audio
|
15 |
+
import torch
|
16 |
+
from TTS.tts.utils.synthesis import synthesis
|
17 |
+
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
18 |
+
try:
|
19 |
+
from TTS.utils.audio import AudioProcessor
|
20 |
+
except:
|
21 |
+
from TTS.utils.audio import AudioProcessor
|
22 |
+
from TTS.tts.models import setup_model
|
23 |
+
from TTS.config import load_config
|
24 |
+
from TTS.tts.models.vits import *
|
25 |
+
OUT_PATH = 'out/'
|
26 |
+
# create output path
|
27 |
+
os.makedirs(OUT_PATH, exist_ok=True)
|
28 |
+
import os
|
29 |
+
# Get the current working directory
|
30 |
+
parent_dir = os.getcwd()
|
31 |
+
print(parent_dir)
|
32 |
+
# model vars
|
33 |
+
MODEL_PATH = parent_dir+'/best_model.pth.tar'
|
34 |
+
CONFIG_PATH = parent_dir+'/config.json'
|
35 |
+
TTS_LANGUAGES = parent_dir+"/language_ids.json"
|
36 |
+
TTS_SPEAKERS = parent_dir+"/speakers.json"
|
37 |
+
USE_CUDA = torch.cuda.is_available()
|
38 |
+
# load the config
|
39 |
+
C = load_config(CONFIG_PATH)
|
40 |
+
# load the audio processor
|
41 |
+
ap = AudioProcessor(**C.audio)
|
42 |
+
speaker_embedding = None
|
43 |
+
C.model_args['d_vector_file'] = TTS_SPEAKERS
|
44 |
+
C.model_args['use_speaker_encoder_as_loss'] = False
|
45 |
+
model = setup_model(C)
|
46 |
+
model.language_manager.set_language_ids_from_file(TTS_LANGUAGES)
|
47 |
+
# print(model.language_manager.num_languages, model.embedded_language_dim)
|
48 |
+
# print(model.emb_l)
|
49 |
+
cp = torch.load(MODEL_PATH, map_location=torch.device('cpu'))
|
50 |
+
# remove speaker encoder
|
51 |
+
model_weights = cp['model'].copy()
|
52 |
+
for key in list(model_weights.keys()):
|
53 |
+
if "speaker_encoder" in key:
|
54 |
+
del model_weights[key]
|
55 |
+
model.load_state_dict(model_weights)
|
56 |
+
model.eval()
|
57 |
+
if USE_CUDA:
|
58 |
+
model = model.cuda()
|
59 |
+
# synthesize voice
|
60 |
+
use_griffin_lim = False
|
61 |
+
os.system('pip install -q pydub ffmpeg-normalize')
|
62 |
+
CONFIG_SE_PATH = "config_se.json"
|
63 |
+
CHECKPOINT_SE_PATH = "SE_checkpoint.pth.tar"
|
64 |
+
from TTS.tts.utils.speakers import SpeakerManager
|
65 |
+
from pydub import AudioSegment
|
66 |
+
import librosa
|
67 |
+
SE_speaker_manager = SpeakerManager(encoder_model_path=CHECKPOINT_SE_PATH, encoder_config_path=CONFIG_SE_PATH, use_cuda=USE_CUDA)
|
68 |
+
def compute_spec(ref_file):
|
69 |
+
y, sr = librosa.load(ref_file, sr=ap.sample_rate)
|
70 |
+
spec = ap.spectrogram(y)
|
71 |
+
spec = torch.FloatTensor(spec).unsqueeze(0)
|
72 |
+
return spec
|
73 |
+
|
74 |
+
def greet(Text,Voicetoclone):
|
75 |
+
text= "%s" % (Text)
|
76 |
+
reference_files= "%s" % (Voicetoclone)
|
77 |
+
print("path url")
|
78 |
+
print(Voicetoclone)
|
79 |
+
sample= str(Voicetoclone)
|
80 |
+
size= len(reference_files)*sys.getsizeof(reference_files)
|
81 |
+
size2= size / 1000000
|
82 |
+
if (size2 > 0.012) or len(text)>2000:
|
83 |
+
message="File is greater than 30mb or Text inserted is longer than 2000 characters. Please re-try with smaller sizes."
|
84 |
+
print(message)
|
85 |
+
raise SystemExit("File is greater than 30mb. Please re-try or Text inserted is longer than 2000 characters. Please re-try with smaller sizes.")
|
86 |
+
else:
|
87 |
+
os.system('ffmpeg-normalize $sample -nt rms -t=-27 -o $sample -ar 16000 -f')
|
88 |
+
reference_emb = SE_speaker_manager.compute_d_vector_from_clip(reference_files)
|
89 |
+
model.length_scale = 1 # scaler for the duration predictor. The larger it is, the slower the speech.
|
90 |
+
model.inference_noise_scale = 0.3 # defines the noise variance applied to the random z vector at inference.
|
91 |
+
model.inference_noise_scale_dp = 0.3 # defines the noise variance applied to the duration predictor z vector at inference.
|
92 |
+
text = text
|
93 |
+
model.language_manager.language_id_mapping
|
94 |
+
language_id = 0
|
95 |
+
|
96 |
+
print(" > text: {}".format(text))
|
97 |
+
wav, alignment, _, _ = synthesis(
|
98 |
+
model,
|
99 |
+
text,
|
100 |
+
C,
|
101 |
+
"cuda" in str(next(model.parameters()).device),
|
102 |
+
ap,
|
103 |
+
speaker_id=None,
|
104 |
+
d_vector=reference_emb,
|
105 |
+
style_wav=None,
|
106 |
+
language_id=language_id,
|
107 |
+
enable_eos_bos_chars=C.enable_eos_bos_chars,
|
108 |
+
use_griffin_lim=True,
|
109 |
+
do_trim_silence=False,
|
110 |
+
).values()
|
111 |
+
print("Generated Audio")
|
112 |
+
IPython.display.display(Audio(wav, rate=ap.sample_rate))
|
113 |
+
#file_name = text.replace(" ", "_")
|
114 |
+
#file_name = file_name.translate(str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav'
|
115 |
+
file_name='clonned_audio.wav'
|
116 |
+
out_path = os.path.join(OUT_PATH, file_name)
|
117 |
+
print(" > Saving output to {}".format(out_path))
|
118 |
+
ap.save_wav(wav, out_path)
|
119 |
+
return out_path
|
120 |
+
|