frogcho123 commited on
Commit
ec72da9
1 Parent(s): eee7326

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -39
app.py CHANGED
@@ -1,45 +1,15 @@
1
- import os
2
  import gradio as gr
3
- import soundfile as sf
4
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
- from gtts import gTTS
6
 
7
- # Load the translation model
8
- translation_tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
9
- translation_model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
10
 
11
- # Available target languages
12
- available_languages = {
13
- 'Russian': 'ru',
14
- 'Spanish': 'es',
15
- 'English': 'en',
16
- 'Greek': 'gr'
17
- }
18
 
19
- # Function to translate the audio
20
- def translate_audio(audio_file, target_language):
21
- to_lang = available_languages[target_language]
22
-
23
- # Load audio
24
- audio, sample_rate = sf.read(audio_file.name)
25
-
26
- # Translate the text
27
- translation_tokenizer.src_lang = to_lang
28
- encoded_bg = translation_tokenizer(audio, return_tensors="pt", padding=True, truncation=True)
29
- generated_tokens = translation_model.generate(**encoded_bg)
30
- translated_audio = translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
31
-
32
- # Save translated audio
33
- output_file = "translated_audio.wav"
34
- sf.write(output_file, translated_audio, sample_rate)
35
-
36
- return output_file
37
-
38
- # Gradio interface
39
- audio_input = gr.inputs.Audio(label="Upload audio file")
40
- language_dropdown = gr.inputs.Dropdown(choices=list(available_languages.keys()), label="Select Target Language")
41
- audio_output = gr.outputs.Audio(label="Translated audio file")
42
-
43
- iface = gr.Interface(fn=translate_audio, inputs=[audio_input, language_dropdown], outputs=audio_output, title="Audio Translation Demo")
44
  iface.launch()
45
 
 
 
1
  import gradio as gr
2
+ import whisper
 
 
3
 
4
+ model = whisper.load_model("base")
 
 
5
 
6
+ def transcribe(audio):
7
+ audio = whisper.pad_or_trim(audio)
8
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
9
+ options = whisper.DecodingOptions()
10
+ result = whisper.decode(model, mel, options)
11
+ return result.text
 
12
 
13
+ iface = gr.Interface(fn=transcribe, inputs="audio", outputs="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  iface.launch()
15