Spaces:

frogcho123
/

speech2speech

Runtime error

App Files Files Community

frogcho123 commited on Jun 6, 2023

Commit

d7dfa49

•

1 Parent(s): 5fbd86e

added app.py

Browse files

Files changed (1) hide show

app.py +60 -0

app.py CHANGED Viewed

	@@ -0,0 +1,60 @@

+import os
+import tempfile
+import gradio as gr
+import whisper
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+from gtts import gTTS
+from IPython.display import Audio
+# Load the models and tokenizer
+whisper_model = whisper.load_model("base")
+tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
+model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
+def translate_audio(input_file, to_lang):
+    # Load the audio file
+    audio = whisper.load_audio(input_file)
+    audio = whisper.pad_or_trim(audio)
+    mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
+    # Detect language using Whisper
+    _, probs = whisper_model.detect_language(mel)
+    lang = max(probs, key=probs.get)
+    # Convert audio to text
+    options = whisper.DecodingOptions()
+    result = whisper.decode(whisper_model, mel, options)
+    text = result.text
+    # Translate the text
+    tokenizer.src_lang = lang
+    encoded_bg = tokenizer(text, return_tensors="pt")
+    generated_tokens = model.generate(**encoded_bg)
+    translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
+    # Convert translated text to audio
+    tts = gTTS(text=translated_text, lang=to_lang)
+    temp_output_file = tempfile.NamedTemporaryFile(suffix=".mp3").name
+    tts.save(temp_output_file)
+    # Load audio data from file
+    audio_data = open(temp_output_file, "rb").read()
+    return Audio(audio_data)
+def translate_audio_interface(input_file, to_lang):
+    return translate_audio(input_file, to_lang)
+iface = gr.Interface(
+    fn=translate_audio_interface,
+    inputs=["file", "text"],
+    outputs="audio",
+    title="Audio Translation",
+    description="Upload an MP3 file and select the target language for translation.",
+    examples=[
+        ["audio_example.mp3", "en"],
+        ["speech_sample.mp3", "fr"],
+    ]
+)
+iface.launch(debug = True)