Spaces:
Running
Running
import os | |
import numpy as np | |
import gradio as gr | |
import assemblyai as aai | |
from translate import Translator | |
import uuid | |
from elevenlabs import VoiceSettings | |
from elevenlabs.client import ElevenLabs | |
from pathlib import Path | |
from scipy.io.wavfile import write, read | |
import yt_dlp | |
ELEVENLABS_API = os.environ.get("ELEVENLABS_API") | |
ASSEMBLYAI_API = os.environ.get("ASSEMBLYAI_API") | |
def voice_to_voice(audio_file): | |
transcript = transcribe_audio(audio_file) | |
if transcript.status == 'error': | |
raise gr.Error(transcript.error) | |
else: | |
transcript = transcript.text | |
list_translations = translate_text(transcript) | |
generated_audio_paths = [] | |
for translation in list_translations: | |
translated_audio_file_name = text_to_speech(translation) | |
path = Path(translated_audio_file_name) | |
generated_audio_paths.append(path) | |
return tuple(generated_audio_paths + list_translations) | |
def transcribe_audio(audio_file): | |
aai.settings.api_key = ASSEMBLYAI_API | |
transcriber = aai.Transcriber() | |
transcript = transcriber.transcribe(audio_file) | |
return transcript | |
def translate_text(text): | |
languages = ["ru", "tr", "sv", "de", "es", "ja", "id"] | |
list_translations = [] | |
for lan in languages: | |
translator = Translator(from_lang="en", to_lang=lan) | |
translation = translator.translate(text) | |
list_translations.append(translation) | |
return list_translations | |
def text_to_speech(text): | |
client = ElevenLabs(api_key=ELEVENLABS_API) | |
response = client.text_to_speech.convert( | |
voice_id="<your-voice-id>", | |
optimize_streaming_latency="0", | |
output_format="mp3_22050_32", | |
text=text, | |
model_id="eleven_multilingual_v2", | |
voice_settings=VoiceSettings( | |
stability=0.5, | |
similarity_boost=0.8, | |
style=0.5, | |
use_speaker_boost=True, | |
), | |
) | |
save_file_path = f"{uuid.uuid4()}.mp3" | |
with open(save_file_path, "wb") as f: | |
for chunk in response: | |
if chunk: | |
f.write(chunk) | |
return save_file_path | |
def download_audio(url): | |
ydl_opts = { | |
'format': 'bestaudio/best', | |
'outtmpl': 'ytdl/%(title)s.%(ext)s', | |
'postprocessors': [{ | |
'key': 'FFmpegExtractAudio', | |
'preferredcodec': 'wav', | |
'preferredquality': '192', | |
}], | |
} | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
info_dict = ydl.extract_info(url, download=True) | |
file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav' | |
sample_rate, audio_data = read(file_path) | |
audio_array = np.asarray(audio_data, dtype=np.int16) | |
return file_path | |
with gr.Blocks() as demo: | |
gr.Markdown("## Audio Translator") | |
gr.Markdown( | |
""" | |
The API Key you need: | |
[AssemblyAI API key](https://www.assemblyai.com/?utm_source=youtube&utm_medium=referral&utm_campaign=yt_mis_66)<br> | |
[Elevenlabs API key](https://elevenlabs.io/)<br> | |
Note: you need at least 30 minutes of a voice recording of yourself for the *Professional voice cloning. But there is also a simpler voice cloning option that only requires 30 seconds of voice recording. *Professional voice cloning is a paid feature. | |
""" | |
) | |
audio_input = gr.Audio(type="filepath", show_download_button=True) | |
with gr.Accordion("Inputs by Link", open=False): | |
with gr.Row(): | |
link = gr.Textbox( | |
label="Link", | |
placeholder="Paste the link here", | |
interactive=True | |
) | |
download_button = gr.Button( | |
"Download!", | |
variant="primary" | |
) | |
download_button.click(download_audio, [link], [audio_input]) | |
submit = gr.Button("Submit", variant="primary") | |
clear_button = gr.ClearButton(audio_input, "Clear") | |
output_components = [] | |
languages = ["Turkish", "Swedish", "Russian", "German", "Spanish", "Japanese", "Indonesian"] | |
for lang in languages: | |
with gr.Group(): | |
output_components.append(gr.Audio(label=lang, interactive=False)) | |
output_components.append(gr.Markdown()) | |
submit.click(fn=voice_to_voice, inputs=audio_input, outputs=output_components, show_progress=True) | |
# Use a random port if the default one is unavailable | |
demo.launch(server_port=7860) | |