Spaces:

ruslanmv
/

Text2Lip

Build error

App Files Files Community

EC2 Default User commited on Sep 13, 2022

Commit

7df64f6

1 Parent(s): 03cce66

Adding Text to Video Message

Browse files

Files changed (18) hide show

.gitattributes +1 -0
.gitignore +3 -0
.ipynb_checkpoints/app-checkpoint.py +0 -20
README.md +2 -1
SE_checkpoint.pth.tar +3 -0
app.py +63 -17
best_model.pth.tar +3 -0
best_model_latest.pth.tar +3 -0
errormessage.wav +0 -0
installation.py +45 -0
scripts/install.sh +18 -0
scripts/install_ffmpeg.sh +7 -0
scripts/install_git-lfs.sh +4 -0
speakers.json +0 -0
utils/__init__.py +0 -0
utils/default_models.py +56 -0
utils/modules.py +242 -0
utils/voice.py +120 -0

.gitattributes CHANGED Viewed

@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.ipynb_checkpoints
+*/.ipynb_checkpoints/*
+.ipynb_checkpoints*

.ipynb_checkpoints/app-checkpoint.py DELETED Viewed

@@ -1,20 +0,0 @@
-import os
-import sys
-import gradio as gr
-os.system('git clone https://github.com/Rudrabha/Wav2Lip.git')
-os.system('curl -o ./Wav2Lip/face_detection/detection/sfd/s3fd.pth https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth')
-os.system('mv ./Wav2Lip/* .')
-title = "Text2Lip"
-description = "Wav2Lip With Text"
-def inference(face, audio):
-    os.system("python inference.py --checkpoint_path ./wav2lip.pth --face {} --audio {}".format(face, audio))
-    return "./results/result_voice.mp4"
-iface = gr.Interface(inference, inputs=[gr.inputs.Video(type="mp4", source="upload", label="Talking Face Video (in mp4 format)", optional=False), gr.inputs.Audio(source="upload", type="filepath", label="Audio", optional=False)], outputs=["video"], title=title, description=description, article=article, examples=[["./examples/w2l_test_f1.mp4", "./examples/w2l_test_a1.wav"]], enable_queue=True)
-iface.launch()

README.md CHANGED Viewed

@@ -3,8 +3,9 @@ title: Text2Lip
 emoji: 👀
 colorFrom: pink
 colorTo: indigo
 sdk: gradio
-sdk_version: 3.3
 app_file: app.py
 pinned: false
 ---

 emoji: 👀
 colorFrom: pink
 colorTo: indigo
+python_version: 3.7.13
 sdk: gradio
+sdk_version: 3.0.4
 app_file: app.py
 pinned: false
 ---

SE_checkpoint.pth.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f96efb20cbeeefd81fd8336d7f0155bf8902f82f9474e58ccb19d9e12345172
+size 44610930

app.py CHANGED Viewed

@@ -1,20 +1,66 @@
 import os
 import sys
-import gradio as gr
-os.system('git clone https://github.com/Rudrabha/Wav2Lip.git')
-os.system('curl -o ./Wav2Lip/face_detection/detection/sfd/s3fd.pth https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth')
-os.system('mv ./Wav2Lip/* .')
-title = "Text2Lip"
-description = "Wav2Lip With Text"
-def inference(face, audio):
-    os.system("python inference.py --checkpoint_path ./wav2lip.pth --face {} --audio {}".format(face, audio))
-    return "./results/result_voice.mp4"
-iface = gr.Interface(inference, inputs=[gr.inputs.Video(type="mp4", source="upload", label="Talking Face Video (in mp4 format)", optional=False), gr.inputs.Audio(source="upload", type="filepath", label="Audio", optional=False)], outputs=["video"], title=title, description=description, article=article, examples=[["./examples/w2l_test_f1.mp4", "./examples/w2l_test_a1.wav"]], enable_queue=True)
-iface.launch()

+import gradio as gr
 import os
 import sys
+#Installation of libraries
+EC2_INSTANCE = False
+if EC2_INSTANCE : os.system('cd scripts && sh install.sh')
+os.system('python installation.py')
+TTS_PATH = "TTS/"
+# add libraries into environment
+sys.path.append(TTS_PATH) # set this if TTS is not installed globally
+VOICE_PATH = "utils/"
+# add libraries into environment
+sys.path.append(VOICE_PATH) # set this if modules and voice are not installed globally
+from utils.modules import *
+from utils.voice import *
+#Definition Web App in Gradio
+text_to_say=gr.inputs.Textbox(label='What would you like the voice to say? (max. 2000 characters per request)')
+url =gr.inputs.Textbox(label = "Enter the YouTube URL below:")
+initial_time = gr.inputs.Textbox(label='Initial time of trim? (format: hh:mm:ss)')
+final_time= gr.inputs.Textbox(label='Final time to trim? (format: hh:mm:ss)')
+demo = gr.Interface(fn = video_generator,
+            inputs = [text_to_say,url,initial_time,final_time],
+            outputs = 'video',
+            verbose = True,
+            title = 'Video Speech Generator from Youtube Videos',
+            description = 'A simple application that replaces the original speech of the video by your text. Wait one minute to process.',
+            article =
+                        '''<div>
+                            <p style="text-align: center">
+                            All you need to do is to paste the Youtube link and
+                            set the initial time and final time of the real speach.
+                            (The limit of the trim is 5 minutes and not larger than video length)
+                            hit submit, then wait for compiling.
+                            After that click on Play/Pause for listing to the video.
+                            The video is saved in an mp4 format.
+                             For more information visit <a href="https://ruslanmv.com/">ruslanmv.com</a>
+                            </p>
+                        </div>''',
+           examples = [['I am clonning your voice, Charles!. Machine intelligence is the last invention that humanity will ever need to make.',
+                        "https://www.youtube.com/watch?v=xw5dvItD5zY",
+                        "00:00:01","00:00:10"],
+                        ['I am clonning your voice, Jim Carrey!. Machine intelligence is the last invention that humanity will ever need to make.',
+                        "https://www.youtube.com/watch?v=uIaY0l5qV0c",
+                        "00:00:29",  "00:01:05"],
+                        ['I am clonning your voice, Mark Zuckerberg!. Machine intelligence is the last invention that humanity will ever need to make.',
+                        "https://www.youtube.com/watch?v=AYjDIFrY9rc",
+                        "00:00:11", "00:00:44"],
+                        ['I am clonning your voice, Ronald Reagan!. Machine intelligence is the last invention that humanity will ever need to make.',
+                        "https://www.youtube.com/watch?v=iuoRDY9c5SQ",
+                        "00:01:03",  "00:01:22"],
+                        ['I am clonning your voice, Elon Musk!. Machine intelligence is the last invention that humanity will ever need to make.',
+                        "https://www.youtube.com/watch?v=IZ8JQ_1gytg",
+                        "00:00:10",  "00:00:43"],
+                        ['I am clonning your voice, Hitler!. Machine intelligence is the last invention that humanity will ever need to make.',
+                        "https://www.youtube.com/watch?v=F08wrLyH5cs",
+                        "00:00:15",  "00:00:40"],
+                         ['I am clonning your voice, Alexandria!. Machine intelligence is the last invention that humanity will ever need to make.',
+                        "https://www.youtube.com/watch?v=Eht6oIkzkew",
+                        "00:00:02",  "00:00:30"],
+                         ['I am clonning your voice, Deborah!. Machine intelligence is the last invention that humanity will ever need to make.',
+                        "https://www.youtube.com/watch?v=qbq4_Swj0Gg",
+                        "00:00:03",  "00:0:44"],
+                        ]
+            )
+demo.launch()

best_model.pth.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:017bfd8907c80bb5857d65d0223f0e4e4b9d699ef52e2a853d9cc7eb7e308cf0
+size 379957289

best_model_latest.pth.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:017bfd8907c80bb5857d65d0223f0e4e4b9d699ef52e2a853d9cc7eb7e308cf0
+size 379957289

errormessage.wav ADDED Viewed

Binary file (889 kB). View file

installation.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# Import the os module
+import os
+from utils.default_models import ensure_default_models
+from pathlib import Path
+Sagemaker = False
+if Sagemaker :
+    env='source activate python3 && conda activate VideoMessage &&'
+else:
+    env=''
+## Step 1. Setup of the dependencies
+is_first_time = True
+#Install dependency
+# Download pretrained model
+# Get the current working directory
+parent_dir = os.getcwd()
+print(parent_dir)
+if is_first_time:
+    # Directory
+    directory = "sample_data"
+    # Path
+    path = os.path.join(parent_dir, directory)
+    print(path)
+    try:
+        os.mkdir(path)
+        print("Directory '% s' created" % directory)
+    except Exception:
+         print("Directory '% s'was already created" % directory)
+if is_first_time:
+    os.system('git clone https://github.com/Rudrabha/Wav2Lip')
+    os.system('cd Wav2Lip &&{} pip install  -r requirements.txt'.format(env))
+    ## Load the models one by one.
+    print("Preparing the models of Wav2Lip")
+    ensure_default_models(Path("Wav2Lip"))
+    os.system('git clone https://github.com/Edresson/Coqui-TTS -b multilingual-torchaudio-SE TTS')
+    os.system('{} pip install -q -e TTS/'.format(env))
+    os.system('{} pip install -q torchaudio==0.9.0'.format(env))
+    os.system('{} pip install -q youtube-dl'.format(env))
+    os.system('{} pip install ffmpeg-python'.format(env))
+    os.system('{} pip install gradio==3.0.4'.format(env))
+    os.system('{} pip install pytube==12.1.0'.format(env))
+    os.system('{} pip install torchaudio==0.9.0 TTS'.format(env))
+    os.system('{} pip install opencv-contrib-python-headless==4.1.2.30'.format(env))
+print("Installation repositories DONE!!")

scripts/install.sh ADDED Viewed

	@@ -0,0 +1,18 @@

+#!/usr/bin/env bash
+source activate python3
+# check prerequisites
+command -v conda >/dev/null 2>&1 || { echo >&2 "conda not found. Please refer to the README and install Miniconda."; exit 1; }
+command -v git >/dev/null 2>&1 || { echo >&2 "git not found. Please refer to the README and install Git."; exit 1; }
+# Conda environment name
+CONDA_ENV_NAME=VideoMessage
+source $(conda info --base)/etc/profile.d/conda.sh
+conda create -y -n $CONDA_ENV_NAME python=3.7.13
+conda activate $CONDA_ENV_NAME
+conda install -y  ipykernel
+python -m ipykernel install --user --name VideoMessage --display-name "Python 3 (VideoMessage)"
+sh install_git-lfs.sh
+sh install_ffmpeg.sh

scripts/install_ffmpeg.sh ADDED Viewed

	@@ -0,0 +1,7 @@

+#!/bin/bash
+cd /usr/local/bin
+sudo mkdir ffmpeg && cd ffmpeg
+sudo wget https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz
+sudo tar -xf ffmpeg-release-amd64-static.tar.xz
+sudo ln -s /usr/local/bin/ffmpeg/ffmpeg-5.1.1-amd64-static/ffmpeg /usr/bin/ffmpeg
+sudo ln -s /usr/local/bin/ffmpeg/ffmpeg-5.1.1-amd64-static/ffprobe /usr/bin/ffprobe

scripts/install_git-lfs.sh ADDED Viewed

	@@ -0,0 +1,4 @@

+#!/bin/bash
+curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | sudo bash
+sudo yum install git-lfs -y
+git lfs install

speakers.json ADDED Viewed

The diff for this file is too large to render. See raw diff

utils/__init__.py ADDED Viewed

File without changes

utils/default_models.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import urllib.request
+from pathlib import Path
+from threading import Thread
+from urllib.error import HTTPError
+from tqdm import tqdm
+#/Wav2Lip/checkpoints/wav2lip_gan.pth
+#/Wav2Lip/face_detection/detection/sfd/s3fd.pth
+default_models = {
+    "wav2lip_gan": ("https://drive.google.com/u/0/uc?id=1V8hobVlZJdp8dzI8qWaAlbhCrXdBiUET&export=download&confirm=t", 435801865,'checkpoints'),
+    "s3fd": ("https://drive.google.com/u/0/uc?id=1Y-mgxW8iq1pXUQicU_8ClNB85eQ1lk0o&export=download", 89843225,'face_detection/detection/sfd'),
+}
+class DownloadProgressBar(tqdm):
+    def update_to(self, b=1, bsize=1, tsize=None):
+        if tsize is not None:
+            self.total = tsize
+        self.update(b * bsize - self.n)
+def download(url: str, target: Path, bar_pos=0):
+    # Ensure the directory exists
+    target.parent.mkdir(exist_ok=True, parents=True)
+    desc = f"Downloading {target.name}"
+    with DownloadProgressBar(unit="B", unit_scale=True, miniters=1, desc=desc, position=bar_pos, leave=False) as t:
+        try:
+            urllib.request.urlretrieve(url, filename=target, reporthook=t.update_to)
+        except HTTPError:
+            return
+def ensure_default_models(models_dir: Path):
+    # Define download tasks
+    jobs = []
+    for model_name, (url, size,path_tobe) in default_models.items():
+        target_path = models_dir   / path_tobe / f"{model_name}.pth"
+        print(target_path)
+        if target_path.exists():
+            if target_path.stat().st_size != size:
+                print(f"File {target_path} is not of expected size, redownloading...")
+            else:
+                continue
+        thread = Thread(target=download, args=(url, target_path, len(jobs)))
+        thread.start()
+        jobs.append((thread, target_path, size))
+    # Run and join threads
+    for thread, target_path, size in jobs:
+        thread.join()
+        assert target_path.exists() and target_path.stat().st_size == size, \
+            f"Download for {target_path.name} failed. You may download models manually instead.\n" \

utils/modules.py ADDED Viewed

	@@ -0,0 +1,242 @@

+# Modules for the Video Messsage Generator From Youtube
+from IPython.display import HTML, Audio
+from base64 import b64decode
+import numpy as np
+from scipy.io.wavfile import read as wav_read
+import io
+import ffmpeg
+from pytube import YouTube
+import random
+from subprocess import call
+import os
+from datetime import datetime
+def time_between(t1, t2):
+    FMT = '%H:%M:%S'
+    t1 = datetime.strptime(t1, FMT)
+    t2 = datetime.strptime(t2, FMT)
+    delta = t2 - t1
+    return str(delta)
+def download_video(url):
+    print("Downloading...")
+    local_file = (
+        YouTube(url)
+        .streams.filter(progressive=True, file_extension="mp4")
+        .first()
+        .download(filename="youtube{}.mp4".format(random.randint(0, 10000)))
+    )
+    print("Downloaded")
+    return local_file
+  # download(output_path=destination, filename="name.mp4")
+def download_youtube(url):
+    #Select a Youtube Video
+    #find youtube video id
+    from urllib import parse as urlparse
+    url_data = urlparse.urlparse(url)
+    query = urlparse.parse_qs(url_data.query)
+    YOUTUBE_ID = query["v"][0]
+    url_download ="https://www.youtube.com/watch?v={}".format(YOUTUBE_ID)
+    # download the youtube with the given ID
+    os.system("{} youtube-dl -f  mp4 --output youtube.mp4 '{}'".format(env,url_download))
+    return "youtube.mp4"
+def cleanup():
+    import pathlib
+    import glob
+    types = ('*.mp4','*.mp3', '*.wav') # the tuple of file types
+    #Finding mp4 and wave files
+    junks = []
+    for files in types:
+        junks.extend(glob.glob(files))
+    try:
+        # Deleting those files
+        for junk in junks:
+            print("Deleting",junk)
+            # Setting the path for the file to delete
+            file = pathlib.Path(junk)
+            # Calling the unlink method on the path
+            file.unlink()
+    except Exception:
+        print("I cannot delete the file because it is being used by another process")
+def clean_data():
+    # importing all necessary libraries
+    import sys, os
+    # initial directory
+    home_dir = os.getcwd()
+    # some non existing directory
+    fd = 'sample_data/'
+    # Join various path components
+    path_to_clean=os.path.join(home_dir,fd)
+    print("Path to clean:",path_to_clean)
+    # trying to insert to false directory
+    try:
+        os.chdir(path_to_clean)
+        print("Inside to clean", os.getcwd())
+        cleanup()
+    # Caching the exception
+    except:
+        print("Something wrong with specified\
+        directory. Exception- ", sys.exc_info())
+    # handling with finally
+    finally:
+        print("Restoring the path")
+        os.chdir(home_dir)
+        print("Current directory is-", os.getcwd())
+def youtube_trim(url,start,end):
+    #cancel previous youtube
+    cleanup()
+    #download youtube
+    #download_youtube(url) # with youtube-dl (slow)
+    input_videos=download_video(url)
+    # Get the current working directory
+    parent_dir = os.getcwd()
+    # Trim the video (start, end) seconds
+    start =  start
+    end =  end
+    #Note: the trimmed video must have face on all frames
+    #interval = end - start
+    interval = time_between(start, end)
+    #trimmed_video= parent_dir+'/sample_data/input_vid{}.mp4'.format(random.randint(0, 10000))
+    #trimmed_audio= parent_dir+'/sample_data/input_audio{}.mp3'.format(random.randint(0, 10000))
+    trimmed_video= parent_dir+'/sample_data/input_video.mp4'
+    trimmed_audio= parent_dir+'/sample_data/input_audio.mp3'
+    #delete trimmed if already exits
+    clean_data()
+    #call(["rm","-f",trimmed_audio])
+    #call(["rm","-f",trimmed_video])
+    #!rm -f {trimmed_video}
+    # cut the video
+    call(["ffmpeg","-y","-i",input_videos,"-ss", start,"-t",interval,"-async","1",trimmed_video])
+    #!ffmpeg -y -i youtube.mp4 -ss {start} -t {interval} -async 1 {trimmed_video}
+    # cut the audio
+    call(["ffmpeg","-i",trimmed_video, "-q:a", "0", "-map","a",trimmed_audio])
+    #Preview trimmed video
+    #clear_output()
+    print("Trimmed Video+Audio")
+    return trimmed_video, trimmed_audio
+def create_video(Text,Voicetoclone):
+    out_audio=greet(Text,Voicetoclone)
+    current_dir=os.getcwd()
+    clonned_audio = os.path.join(current_dir, out_audio)
+    #Start Crunching and Preview Output
+    #Note: Only change these, if you have to
+    pad_top =  0#@param {type:"integer"}
+    pad_bottom =  10#@param {type:"integer"}
+    pad_left =  0#@param {type:"integer"}
+    pad_right =  0#@param {type:"integer"}
+    rescaleFactor =  1#@param {type:"integer"}
+    nosmooth = False #@param {type:"boolean"}
+    out_name ="result_voice_{}.mp4".format(random.randint(0, 10000))
+    out_file="../"+out_name
+    if nosmooth == False:
+        os.system('{} cd Wav2Lip && python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face "../sample_data/input_video.mp4" --audio "../out/clonned_audio.wav" --outfile {} --pads {} {} {} {} --resize_factor {}'.format(env,out_file,pad_top ,pad_bottom ,pad_left ,pad_right ,rescaleFactor))
+    else:
+        os.system('{} cd Wav2Lip && python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face "../sample_data/input_video.mp4" --audio "../out/clonned_audio.wav" --outfile {} --pads {} {} {} {} --resize_factor {} --nosmooth'.format(env,out_file,pad_top ,pad_bottom ,pad_left ,pad_right ,rescaleFactor))
+    #clear_output()
+    print("Creation of Video done")
+    return out_name
+def time_format_check(input1):
+    timeformat = "%H:%M:%S"
+    #input1 = input("At what time did sensor 1 actuate? ")
+    try:
+        validtime = datetime.strptime(input1, timeformat)
+        print("The time format is valid", input1)
+        #Do your logic with validtime, which is a valid format
+        return False
+    except ValueError:
+        print("The time {}  has not valid format hh:mm:ss".format(input1))
+        return True
+def to_seconds(datetime_obj):
+    from datetime import datetime
+    time =datetime_obj
+    date_time = datetime.strptime(time, "%H:%M:%S")
+    a_timedelta = date_time - datetime(1900, 1, 1)
+    seconds = a_timedelta.total_seconds()
+    return seconds
+def validate_youtube(url):
+    #This creates a youtube objet
+    try:
+        yt = YouTube(url)
+    except Exception:
+        print("Hi there URL seems invalid")
+        return True, 0
+    #This will return the length of the video in sec as an int
+    video_length = yt.length
+    if    video_length > 600:
+        print("Your video is larger than 10 minutes")
+        return True, video_length
+    else:
+        print("Your video is less than 10 minutes")
+        return False, video_length
+def video_generator(text_to_say,url,initial_time,final_time):
+    print('Checking the url',url)
+    check1, video_length = validate_youtube(url)
+    if check1 is True: return "./demo/tryagain2.mp4"
+    check2 = validate_time(initial_time,final_time, video_length)
+    if check2 is True: return "./demo/tryagain0.mp4"
+    trimmed_video, trimmed_audio=youtube_trim(url,initial_time,final_time)
+    voicetoclone=trimmed_audio
+    print(voicetoclone)
+    outvideo=create_video(text_to_say,voicetoclone)
+    #Preview output video
+    print("Final Video Preview")
+    final_video= parent_dir+'/'+outvideo
+    print("DONE")
+    #showVideo(final_video)
+    return final_video
+def validate_time(initial_time,final_time,video_length):
+    is_wrong1=time_format_check(initial_time)
+    is_wrong2=time_format_check(final_time)
+    #print(is_wrong1,is_wrong2)
+    if is_wrong1 is False and is_wrong2 is False:
+        delta=time_between(initial_time,final_time)
+        if len(str(delta)) > 8:
+            print("Final Time is Smaller than Initial Time: t1>t2")
+            is_wrong = True
+            return is_wrong
+        else:
+            print("OK")
+            is_wrong=False
+            if int(to_seconds(delta)) > 300 :
+                print("The trim is larger than 5 minutes")
+                is_wrong = True
+                return is_wrong
+            elif int(to_seconds(delta)) > video_length :
+                print("The trim is larger than video lenght")
+                is_wrong = True
+                return is_wrong
+            else:
+                return  is_wrong
+    else:
+        print("Your time format is invalid")
+        is_wrong = True
+        return is_wrong

utils/voice.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import gradio as gr
+import os
+import sys
+TTS_PATH = "TTS/"
+# add libraries into environment
+sys.path.append(TTS_PATH) # set this if TTS is not installed globally
+import os
+import string
+import time
+import argparse
+import json
+import numpy as np
+import IPython
+from IPython.display import Audio
+import torch
+from TTS.tts.utils.synthesis import synthesis
+from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
+try:
+  from TTS.utils.audio import AudioProcessor
+except:
+  from TTS.utils.audio import AudioProcessor
+from TTS.tts.models import setup_model
+from TTS.config import load_config
+from TTS.tts.models.vits import *
+OUT_PATH = 'out/'
+# create output path
+os.makedirs(OUT_PATH, exist_ok=True)
+import os
+# Get the current working directory
+parent_dir = os.getcwd()
+print(parent_dir)
+# model vars
+MODEL_PATH = parent_dir+'/best_model.pth.tar'
+CONFIG_PATH = parent_dir+'/config.json'
+TTS_LANGUAGES = parent_dir+"/language_ids.json"
+TTS_SPEAKERS = parent_dir+"/speakers.json"
+USE_CUDA = torch.cuda.is_available()
+# load the config
+C = load_config(CONFIG_PATH)
+# load the audio processor
+ap = AudioProcessor(**C.audio)
+speaker_embedding = None
+C.model_args['d_vector_file'] = TTS_SPEAKERS
+C.model_args['use_speaker_encoder_as_loss'] = False
+model = setup_model(C)
+model.language_manager.set_language_ids_from_file(TTS_LANGUAGES)
+# print(model.language_manager.num_languages, model.embedded_language_dim)
+# print(model.emb_l)
+cp = torch.load(MODEL_PATH, map_location=torch.device('cpu'))
+# remove speaker encoder
+model_weights = cp['model'].copy()
+for key in list(model_weights.keys()):
+  if "speaker_encoder" in key:
+    del model_weights[key]
+model.load_state_dict(model_weights)
+model.eval()
+if USE_CUDA:
+    model = model.cuda()
+# synthesize voice
+use_griffin_lim = False
+os.system('pip install -q pydub ffmpeg-normalize')
+CONFIG_SE_PATH = "config_se.json"
+CHECKPOINT_SE_PATH = "SE_checkpoint.pth.tar"
+from TTS.tts.utils.speakers import SpeakerManager
+from pydub import AudioSegment
+import librosa
+SE_speaker_manager = SpeakerManager(encoder_model_path=CHECKPOINT_SE_PATH, encoder_config_path=CONFIG_SE_PATH, use_cuda=USE_CUDA)
+def compute_spec(ref_file):
+  y, sr = librosa.load(ref_file, sr=ap.sample_rate)
+  spec = ap.spectrogram(y)
+  spec = torch.FloatTensor(spec).unsqueeze(0)
+  return spec
+def greet(Text,Voicetoclone):
+    text= "%s" % (Text)
+    reference_files= "%s" % (Voicetoclone)
+    print("path url")
+    print(Voicetoclone)
+    sample= str(Voicetoclone)
+    size= len(reference_files)*sys.getsizeof(reference_files)
+    size2= size / 1000000
+    if (size2 > 0.012) or len(text)>2000:
+      message="File is greater than 30mb or Text inserted is longer than 2000 characters. Please re-try with smaller sizes."
+      print(message)
+      raise SystemExit("File is greater than 30mb. Please re-try or Text inserted is longer than 2000 characters. Please re-try with smaller sizes.")
+    else:
+      os.system('ffmpeg-normalize $sample -nt rms -t=-27 -o $sample -ar 16000 -f')
+      reference_emb = SE_speaker_manager.compute_d_vector_from_clip(reference_files)
+      model.length_scale = 1  # scaler for the duration predictor. The larger it is, the slower the speech.
+      model.inference_noise_scale = 0.3 # defines the noise variance applied to the random z vector at inference.
+      model.inference_noise_scale_dp = 0.3 # defines the noise variance applied to the duration predictor z vector at inference.
+      text = text
+      model.language_manager.language_id_mapping
+      language_id = 0
+      print(" > text: {}".format(text))
+      wav, alignment, _, _ = synthesis(
+                        model,
+                        text,
+                        C,
+                        "cuda" in str(next(model.parameters()).device),
+                        ap,
+                        speaker_id=None,
+                        d_vector=reference_emb,
+                        style_wav=None,
+                        language_id=language_id,
+                        enable_eos_bos_chars=C.enable_eos_bos_chars,
+                        use_griffin_lim=True,
+                        do_trim_silence=False,
+                    ).values()
+      print("Generated Audio")
+      IPython.display.display(Audio(wav, rate=ap.sample_rate))
+      #file_name = text.replace(" ", "_")
+      #file_name = file_name.translate(str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav'
+      file_name='clonned_audio.wav'
+      out_path = os.path.join(OUT_PATH, file_name)
+      print(" > Saving output to {}".format(out_path))
+      ap.save_wav(wav, out_path)
+      return out_path