Blane187 commited on
Commit
2f46bdb
1 Parent(s): 3aa1a13

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -0
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import gradio as gr
4
+ import assemblyai as aai
5
+ from translate import Translator
6
+ import uuid
7
+ from elevenlabs import VoiceSettings
8
+ from elevenlabs.client import ElevenLabs
9
+ from pathlib import Path
10
+
11
+
12
+ ELEVENLABS_API = os.environ.get("ELEVENLABS_API")
13
+
14
+ ASSEMBLYAI_API = os.environ.get("ASSEMBLYAI_API")
15
+
16
+ def voice_to_voice(audio_file):
17
+ transcript = transcribe_audio(audio_file)
18
+ if transcript.status == aai.TranscriptStatus.error:
19
+ raise gr.Error(transcript.error)
20
+ else:
21
+ transcript = transcript.text
22
+
23
+ list_translations = translate_text(transcript)
24
+ generated_audio_paths = []
25
+
26
+ for translation in list_translations:
27
+ translated_audio_file_name = text_to_speech(translation)
28
+ path = Path(translated_audio_file_name)
29
+ generated_audio_paths.append(path)
30
+
31
+ return tuple(generated_audio_paths + list_translations)
32
+
33
+ def transcribe_audio(audio_file):
34
+ aai.settings.api_key = ELEVENLABS_API
35
+ transcriber = aai.Transcriber()
36
+ transcript = transcriber.transcribe(audio_file)
37
+ return transcript
38
+
39
+ def translate_text(text):
40
+ languages = ["ru", "tr", "sv", "de", "es", "ja", "id"]
41
+ list_translations = []
42
+
43
+ for lan in languages:
44
+ translator = Translator(from_lang="en", to_lang=lan)
45
+ translation = translator.translate(text)
46
+ list_translations.append(translation)
47
+
48
+ return list_translations
49
+
50
+ def text_to_speech(text):
51
+ client = ElevenLabs(api_key=ELEVENLABS_API)
52
+ response = client.text_to_speech.convert(
53
+ voice_id="<your-voice-id>",
54
+ optimize_streaming_latency="0",
55
+ output_format="mp3_22050_32",
56
+ text=text,
57
+ model_id="eleven_multilingual_v2",
58
+ voice_settings=VoiceSettings(
59
+ stability=0.5,
60
+ similarity_boost=0.8,
61
+ style=0.5,
62
+ use_speaker_boost=True,
63
+ ),
64
+ )
65
+
66
+ save_file_path = f"{uuid.uuid4()}.mp3"
67
+ with open(save_file_path, "wb") as f:
68
+ for chunk in response:
69
+ if chunk:
70
+ f.write(chunk)
71
+
72
+ return save_file_path
73
+
74
+ with gr.Blocks() as demo:
75
+ gr.Markdown("## audio Translator")
76
+ gr.Markdown(
77
+ f"""
78
+ The API Key you need:
79
+ (AssemblyAI API key)[https://www.assemblyai.com/?utm_source=youtube&utm_medium=referral&utm_campaign=yt_mis_66]<br>
80
+ (Elevenlabs API key)[https://elevenlabs.io/]<br>
81
+ Note: you need at least 30 minutes of a voice recording of yourself for the *Professional voice cloning. But there is also a simpler voice cloning option that only requires 30 seconds of voice recording. *Professional voice cloning is a paid feature.
82
+
83
+ """
84
+ )
85
+ audio_input = gr.Audio(type="filepath", show_download_button=True)
86
+ submit = gr.Button("Submit", variant="primary")
87
+ clear_button = gr.ClearButton(audio_input, "Clear")
88
+
89
+ output_components = []
90
+ languages = ["Turkish", "Swedish", "Russian", "German", "Spanish", "Japanese", "indonesian"]
91
+
92
+ for lang in languages:
93
+ with gr.Group():
94
+ output_components.append(gr.Audio(label=lang, interactive=False))
95
+ output_components.append(gr.Markdown())
96
+
97
+ submit.click(fn=voice_to_voice, inputs=audio_input, outputs=output_components, show_progress=True)
98
+
99
+ if __name__ == "__main__":
100
+ demo.launch()