frogcho123 commited on
Commit
2bacaf7
1 Parent(s): e6cfad1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -45
app.py CHANGED
@@ -1,56 +1,56 @@
 
1
  import gradio as gr
2
  import whisper
 
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  from gtts import gTTS
5
- from tempfile import NamedTemporaryFile
6
-
7
- # Define translation function
8
- def translate_audio(input_file, target_language):
9
- # Save uploaded audio file to a temporary file
10
- with NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
11
- temp_audio.write(input_file.read())
12
- temp_audio.seek(0)
13
- temp_audio_path = temp_audio.name
14
-
15
- # Auto to text (STT)
16
- model = whisper.Whisper("base")
17
- audio = whisper.load_audio(temp_audio_path)
 
 
 
 
 
 
 
 
 
18
  audio = whisper.pad_or_trim(audio)
19
- mel = whisper.log_mel_spectrogram(audio).to(model.device)
20
- _, probs = model.detect_language(mel)
21
  options = whisper.DecodingOptions()
22
- result = whisper.decode(model, mel, options)
23
  text = result.text
24
- lang = max(probs, key=probs.get)
25
 
26
- # Translate
27
- tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
28
- model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
29
- tokenizer.src_lang = lang
30
- tokenizer.tgt_lang = target_language
31
- encoded_bg = tokenizer(text, return_tensors="pt")
32
- generated_tokens = model.generate(**encoded_bg)
33
- translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
34
 
35
  # Text-to-audio (TTS)
36
- tts = gTTS(text=translated_text, lang=target_language)
37
- output_file = NamedTemporaryFile(suffix=".mp3", delete=False)
38
- output_file.close()
39
- tts.save(output_file.name)
40
- return output_file.name
41
-
42
- # Define Gradio interface
43
- inputs = [
44
- gr.inputs.File(label="Upload Audio File"),
45
- gr.inputs.Dropdown(choices=['en', 'es', 'fr', 'de', 'ru'], label="Target Language")
46
- ]
47
-
48
- outputs = [
49
- gr.outputs.File(label="Translated Audio")
50
- ]
51
-
52
- title = "Audio Translation"
53
- description = "Upload an audio file, translate the speech to a target language, and download the translated audio."
54
-
55
- gr.Interface(fn=translate_audio, inputs=inputs, outputs=outputs, title=title, description=description).launch()
56
 
 
1
+ import os
2
  import gradio as gr
3
  import whisper
4
+ import IPython
5
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
  from gtts import gTTS
7
+
8
+ # Load the ASR model
9
+ asr_model = whisper.load_model("base")
10
+
11
+ # Load the translation model
12
+ translation_tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
13
+ translation_model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
14
+
15
+ # Available target languages
16
+ available_languages = {
17
+ 'Russian': 'ru',
18
+ 'Spanish': 'es',
19
+ 'English': 'en',
20
+ 'Greek': 'gr'
21
+ }
22
+
23
+ # Function to translate the audio
24
+ def translate_audio(audio_file, target_language):
25
+ to_lang = available_languages[target_language]
26
+
27
+ # Auto to text (ASR)
28
+ audio = whisper.load_audio(audio_file.name)
29
  audio = whisper.pad_or_trim(audio)
30
+ mel = whisper.log_mel_spectrogram(audio).to(asr_model.device)
31
+ _, probs = asr_model.detect_language(mel)
32
  options = whisper.DecodingOptions()
33
+ result = whisper.decode(asr_model, mel, options)
34
  text = result.text
 
35
 
36
+ # Translate the text
37
+ translation_tokenizer.src_lang = to_lang
38
+ encoded_bg = translation_tokenizer(text, return_tensors="pt")
39
+ generated_tokens = translation_model.generate(**encoded_bg)
40
+ translated_text = translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
 
 
 
41
 
42
  # Text-to-audio (TTS)
43
+ tts = gTTS(text=translated_text, lang=to_lang)
44
+ output_file = "translated_audio.mp3"
45
+ tts.save(output_file)
46
+ return output_file
47
+
48
+ # Gradio interface
49
+ audio_input = gr.inputs.Audio(label="Upload audio file")
50
+ language_dropdown = gr.inputs.Dropdown(choices=list(available_languages.keys()), label="Select Target Language")
51
+ audio_output = gr.outputs.Audio(label="Translated audio file")
52
+
53
+ iface = gr.Interface(fn=translate_audio, inputs=[audio_input, language_dropdown], outputs=audio_output, title="Audio Translation Demo")
54
+ iface.launch()
55
+
 
 
 
 
 
 
 
56