# coding=utf8 # Youtube Video Translator # Developed by Ruslan Magana Vsevolodovna # https://ruslanmv.com/ # importing all necessary libraries import pathlib import sys, os from gtts import gTTS import gradio as gr import os import speech_recognition as sr from googletrans import Translator, constants from pprint import pprint from moviepy.editor import * from pytube import YouTube from youtube_transcript_api import YouTubeTranscriptApi from utils import * def download_video(url): print("Downloading...") local_file = ( YouTube(url) .streams.filter(progressive=True, file_extension="mp4") .first() .download() ) print("Downloaded") return local_file def validate_youtube(url): #This creates a youtube objet try: yt = YouTube(url) except Exception: print("Hi there URL seems invalid") return True #This will return the length of the video in sec as an int video_length = yt.length if video_length > 600: print("Your video is larger than 10 minutes") return True else: print("Your video is less than 10 minutes") return False def validate_url(url): import validators if not validators.url(url): print("Hi there URL seems invalid ") return True else: return False def cleanup(): import pathlib import glob types = ('*.mp4', '*.wav') # the tuple of file types #Finding mp4 and wave files junks = [] for files in types: junks.extend(glob.glob(files)) try: # Deleting those files for junk in junks: print("Deleting",junk) # Setting the path for the file to delete file = pathlib.Path(junk) # Calling the unlink method on the path file.unlink() except Exception: print("I cannot delete the file because it is being used by another process") def getSize(filename): st = os.stat(filename) return st.st_size def generate_transcript(url,lang_api): id = url[url.index("=")+1:] transcript = YouTubeTranscriptApi.get_transcript(id,languages=[lang_api]) script = "" for text in transcript: t = text["text"] if t != '[Music]': script += t + " " return script # Set environment variables home_dir = os.getcwd() temp_dir=os.path.join(home_dir, "temp") #Create temp directory pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True) os.environ['home_dir'] = home_dir os.environ['temp_dir'] = temp_dir def video_to_translate(url,initial_language,final_language): print('Checking the url') check =validate_youtube(url) if check is True: return "./demo/tryagain2.mp4" #Internal definitions if initial_language == "English": lang_in='en-US' lang_api='en' elif initial_language == "Italian": lang_in='it-IT' lang_api='it' elif initial_language == "Spanish": lang_in='es-MX' lang_api='es' elif initial_language == "Russian": lang_in='ru-RU' lang_api='rus' elif initial_language == "German": lang_in='de-DE' lang_api='de' elif initial_language == "Japanese": lang_in='ja-JP' lang_api='ja' if final_language == "English": lang='en' elif final_language == "Italian": lang='it' elif final_language == "Spanish": lang='es' elif final_language == "Russian": lang='ru' elif final_language == "German": lang='de' elif final_language == "Japanese": lang='ja' # Initial directory home_dir= os.getenv('home_dir') print('Initial directory:',home_dir) # Cleaning previous files cleanup() file_obj=download_video(url) print(file_obj) # Insert Local Video File Path videoclip = VideoFileClip(file_obj) try: # Trying to get transcripts text = generate_transcript(url,lang_api) print("Transcript Found") except Exception: print("No Transcript Found") # Trying to recognize audio # Insert Local Audio File Path videoclip.audio.write_audiofile("audio.wav",codec='pcm_s16le') # initialize the recognizer r = sr.Recognizer() # open the file with sr.AudioFile("audio.wav") as source: # listen for the data (load audio to memory) audio_data = r.record(source) # recognize (convert from speech to text) print("Recognize from ",lang_in) #There is a limit of 10 MB on all single requests sent to the API using local file size_wav=getSize("audio.wav") if size_wav > 50000000: print("The wav is too large") audio_chunks=split_audio_wav("audio.wav") text="" for chunk in audio_chunks: print("Converting audio to text",chunk) try: text_chunk= r.recognize_google(audio_data, language = lang_in) except Exception: print("This video cannot be recognized") cleanup() return "./demo/tryagain.mp4" text=text+text_chunk+" " text=str(text) print(type(text)) else: try: text = r.recognize_google(audio_data, language = lang_in) except Exception: print("This video cannot be recognized") cleanup() return "./demo/tryagain.mp4" #print(text) print("Destination language ",lang) # init the Google API translator translator = Translator() try: translation = translator.translate(text, dest=lang) except Exception: print("This text cannot be translated") cleanup() return "./demo/tryagain.mp4" #translation.text trans=translation.text myobj = gTTS(text=trans, lang=lang, slow=False) myobj.save("audio.wav") # loading audio file audioclip = AudioFileClip("audio.wav") # adding audio to the video clip new_audioclip = CompositeAudioClip([audioclip]) videoclip.audio = new_audioclip new_video="video_translated_"+lang+".mp4" # Return back to main directory os.chdir(home_dir) print('Final directory',os.getcwd()) videoclip.write_videofile(new_video) videoclip.close() del file_obj return new_video initial_language = gr.inputs.Dropdown(["English","Italian","Japanese","Russian","Spanish","German"]) final_language = gr.inputs.Dropdown([ "Russian","Italian","Spanish","German","English","Japanese"]) url =gr.inputs.Textbox(label = "Enter the YouTube URL below:") gr.Interface(fn = video_to_translate, inputs = [url,initial_language,final_language], outputs = 'video', verbose = True, title = 'Video Youtube Translator', description = 'A simple application that translates Youtube small videos from English, Italian, Japanese, Russian, Spanish, and German to Italian, Spanish, Russian, English and Japanese. Wait one minute to process.', article = '''
All you need to do is to paste the Youtube link and hit submit,, then wait for compiling. After that click on Play/Pause for listing to the video. The video is saved in an mp4 format. The lenght video limit is 10 minutes. For more information visit ruslanmv.com