frogcho123 commited on
Commit
d7dfa49
1 Parent(s): 5fbd86e

added app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -0
app.py CHANGED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import gradio as gr
4
+ import whisper
5
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
+ from gtts import gTTS
7
+ from IPython.display import Audio
8
+
9
+ # Load the models and tokenizer
10
+ whisper_model = whisper.load_model("base")
11
+ tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
12
+ model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
13
+
14
+ def translate_audio(input_file, to_lang):
15
+ # Load the audio file
16
+ audio = whisper.load_audio(input_file)
17
+ audio = whisper.pad_or_trim(audio)
18
+ mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
19
+
20
+ # Detect language using Whisper
21
+ _, probs = whisper_model.detect_language(mel)
22
+ lang = max(probs, key=probs.get)
23
+
24
+ # Convert audio to text
25
+ options = whisper.DecodingOptions()
26
+ result = whisper.decode(whisper_model, mel, options)
27
+ text = result.text
28
+
29
+ # Translate the text
30
+ tokenizer.src_lang = lang
31
+ encoded_bg = tokenizer(text, return_tensors="pt")
32
+ generated_tokens = model.generate(**encoded_bg)
33
+ translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
34
+
35
+ # Convert translated text to audio
36
+ tts = gTTS(text=translated_text, lang=to_lang)
37
+ temp_output_file = tempfile.NamedTemporaryFile(suffix=".mp3").name
38
+ tts.save(temp_output_file)
39
+
40
+ # Load audio data from file
41
+ audio_data = open(temp_output_file, "rb").read()
42
+
43
+ return Audio(audio_data)
44
+
45
+ def translate_audio_interface(input_file, to_lang):
46
+ return translate_audio(input_file, to_lang)
47
+
48
+ iface = gr.Interface(
49
+ fn=translate_audio_interface,
50
+ inputs=["file", "text"],
51
+ outputs="audio",
52
+ title="Audio Translation",
53
+ description="Upload an MP3 file and select the target language for translation.",
54
+ examples=[
55
+ ["audio_example.mp3", "en"],
56
+ ["speech_sample.mp3", "fr"],
57
+ ]
58
+ )
59
+
60
+ iface.launch(debug = True)