Spaces:
Sleeping
Sleeping
frogcho123
commited on
Commit
·
c89a3ea
1
Parent(s):
19f9d93
Update app.py
Browse files
app.py
CHANGED
@@ -4,20 +4,25 @@ import whisper
|
|
4 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
5 |
from gtts import gTTS
|
6 |
|
7 |
-
def translate_voice(
|
|
|
|
|
|
|
|
|
|
|
8 |
# Load the model and switch to float32
|
9 |
model = whisper.load_model("base").float()
|
10 |
|
11 |
-
# Load the audio
|
12 |
-
audio = whisper.load_audio(
|
13 |
|
14 |
-
# Pad or trim the audio
|
15 |
audio = whisper.pad_or_trim(audio)
|
16 |
|
17 |
-
# Convert the audio to a log Mel spectrogram and move it to the same device as the model (CPU in your case)
|
18 |
mel = whisper.log_mel_spectrogram(audio).to(model.device).float() # convert to full-precision float32
|
19 |
|
20 |
-
# Proceed with your language detection and decoding
|
21 |
_, probs = model.detect_language(mel)
|
22 |
options = whisper.DecodingOptions()
|
23 |
result = whisper.decode(model, mel, options)
|
@@ -44,14 +49,14 @@ def translate_voice(file, target_lang):
|
|
44 |
iface = gr.Interface(
|
45 |
fn=translate_voice,
|
46 |
inputs=[
|
47 |
-
gr.
|
48 |
-
gr.
|
49 |
],
|
50 |
outputs=[
|
51 |
-
gr.
|
52 |
-
gr.
|
53 |
-
gr.
|
54 |
-
gr.
|
55 |
]
|
56 |
)
|
57 |
iface.launch()
|
|
|
4 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
5 |
from gtts import gTTS
|
6 |
|
7 |
+
def translate_voice(file_obj, target_lang):
|
8 |
+
# Save the temporary file to disk
|
9 |
+
temp_file_path = "temp_audio_file.wav"
|
10 |
+
with open(temp_file_path, "wb") as out_file:
|
11 |
+
out_file.write(file_obj.read())
|
12 |
+
|
13 |
# Load the model and switch to float32
|
14 |
model = whisper.load_model("base").float()
|
15 |
|
16 |
+
# Load the audio
|
17 |
+
audio = whisper.load_audio(temp_file_path)
|
18 |
|
19 |
+
# Pad or trim the audio
|
20 |
audio = whisper.pad_or_trim(audio)
|
21 |
|
22 |
+
# Convert the audio to a log Mel spectrogram and move it to the same device as the model (CPU in your case)
|
23 |
mel = whisper.log_mel_spectrogram(audio).to(model.device).float() # convert to full-precision float32
|
24 |
|
25 |
+
# Proceed with your language detection and decoding
|
26 |
_, probs = model.detect_language(mel)
|
27 |
options = whisper.DecodingOptions()
|
28 |
result = whisper.decode(model, mel, options)
|
|
|
49 |
iface = gr.Interface(
|
50 |
fn=translate_voice,
|
51 |
inputs=[
|
52 |
+
gr.inputs.File(type="file", label="Your Audio"),
|
53 |
+
gr.inputs.Dropdown(choices=['en', 'ru', 'de', 'fr'], label="Target Language")
|
54 |
],
|
55 |
outputs=[
|
56 |
+
gr.outputs.Audio(type="file", label="Translated Audio"),
|
57 |
+
gr.outputs.Textbox(label="Original Text"),
|
58 |
+
gr.outputs.Textbox(label="Translated Text"),
|
59 |
+
gr.outputs.Textbox(label="Target Language"),
|
60 |
]
|
61 |
)
|
62 |
iface.launch()
|