frogcho123 commited on
Commit
b9553d2
1 Parent(s): bda48ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -11,9 +11,9 @@ whisper_model = whisper.load_model("base")
11
  tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
12
  model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
13
 
14
- def translate_speech(audio):
15
- audio = audio[0]
16
- audio = whisper.pad_or_trim(audio)
17
  mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
18
  _, probs = whisper_model.detect_language(mel)
19
  options = whisper.DecodingOptions(fp16=False)
@@ -21,26 +21,30 @@ def translate_speech(audio):
21
  text = result.text
22
 
23
  # Translate text
24
- tokenizer.src_lang = 'en' # Assuming the input is always in English
25
  encoded_text = tokenizer(text, return_tensors="pt")
26
  generated_tokens = model.generate(**encoded_text)
27
  translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
28
 
29
  # Text-to-speech (TTS)
30
- tts = gTTS(text=translated_text, lang='en') # Assuming the target language is English
31
  audio_path = "translated_audio.mp3"
32
  tts.save(audio_path)
33
 
34
  return audio_path
35
 
36
- def translate_speech_interface(audio):
37
- translated_audio = translate_speech(audio)
38
  translated_audio_bytes = open(translated_audio, "rb").read()
39
 
40
  return translated_audio_bytes
41
 
42
  audio_recording = gr.inputs.Audio(source="microphone", type="numpy", label="Record your speech")
 
 
43
  output_audio = gr.outputs.Audio(type="numpy", label="Translated Audio")
44
 
45
- iface = gr.Interface(fn=translate_speech_interface, inputs=audio_recording, outputs=output_audio, title="Speech Translator")
46
  iface.launch()
 
 
 
11
  tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
12
  model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
13
 
14
+ def translate_speech(audio, target_lang):
15
+ audio = audio.astype("float32")
16
+ audio = whisper.pad_or_trim(audio, whisper_model.audio_config.sample_rate)
17
  mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
18
  _, probs = whisper_model.detect_language(mel)
19
  options = whisper.DecodingOptions(fp16=False)
 
21
  text = result.text
22
 
23
  # Translate text
24
+ tokenizer.src_lang = target_lang # Assuming the input is always in English
25
  encoded_text = tokenizer(text, return_tensors="pt")
26
  generated_tokens = model.generate(**encoded_text)
27
  translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
28
 
29
  # Text-to-speech (TTS)
30
+ tts = gTTS(text=translated_text, lang=target_lang)
31
  audio_path = "translated_audio.mp3"
32
  tts.save(audio_path)
33
 
34
  return audio_path
35
 
36
+ def translate_speech_interface(audio, target_lang):
37
+ translated_audio = translate_speech(audio, target_lang)
38
  translated_audio_bytes = open(translated_audio, "rb").read()
39
 
40
  return translated_audio_bytes
41
 
42
  audio_recording = gr.inputs.Audio(source="microphone", type="numpy", label="Record your speech")
43
+ lang_choices = ["ru", "fr", "en", "de"]
44
+ lang_dropdown = gr.inputs.Dropdown(lang_choices, label="Select Language to Translate")
45
  output_audio = gr.outputs.Audio(type="numpy", label="Translated Audio")
46
 
47
+ iface = gr.Interface(fn=translate_speech_interface, inputs=[audio_recording, lang_dropdown], outputs=output_audio, title="Speech Translator")
48
  iface.launch()
49
+
50
+