DHEIVER's picture
Update app.py
58e0199 verified
raw
history blame
6.93 kB
import gradio as gr
import torch
import torchaudio
import numpy as np
from transformers import AutoProcessor, SeamlessM4Tv2Model
from datetime import datetime
class SeamlessTranslator:
def __init__(self):
self.model_name = "facebook/seamless-m4t-v2-large"
print("Loading model...")
self.processor = AutoProcessor.from_pretrained(self.model_name)
self.model = SeamlessM4Tv2Model.from_pretrained(self.model_name)
self.sample_rate = self.model.config.sampling_rate
self.languages = {
"๐Ÿ‡บ๐Ÿ‡ธ English": "eng",
"๐Ÿ‡ช๐Ÿ‡ธ Spanish": "spa",
"๐Ÿ‡ซ๐Ÿ‡ท French": "fra",
"๐Ÿ‡ฉ๐Ÿ‡ช German": "deu",
"๐Ÿ‡ฎ๐Ÿ‡น Italian": "ita",
"๐Ÿ‡ต๐Ÿ‡น Portuguese": "por",
"๐Ÿ‡ท๐Ÿ‡บ Russian": "rus",
"๐Ÿ‡จ๐Ÿ‡ณ Chinese": "cmn",
"๐Ÿ‡ฏ๐Ÿ‡ต Japanese": "jpn",
"๐Ÿ‡ฐ๐Ÿ‡ท Korean": "kor"
}
def translate_text(self, text, src_lang, tgt_lang, progress=gr.Progress()):
progress(0.3, desc="Processing input...")
try:
inputs = self.processor(text=text, src_lang=self.languages[src_lang], return_tensors="pt")
progress(0.6, desc="Generating audio...")
audio_array = self.model.generate(**inputs, tgt_lang=self.languages[tgt_lang])[0].cpu().numpy().squeeze()
progress(1.0, desc="Done!")
return (self.sample_rate, audio_array), f"โœ… Translation completed: {src_lang} โ†’ {tgt_lang}"
except Exception as e:
raise gr.Error(f"โŒ Translation failed: {str(e)}")
def translate_audio(self, audio_path, tgt_lang, progress=gr.Progress()):
if audio_path is None:
raise gr.Error("โŒ Please upload an audio file")
progress(0.3, desc="Loading audio...")
try:
audio, orig_freq = torchaudio.load(audio_path)
audio = torchaudio.functional.resample(audio, orig_freq=orig_freq, new_freq=16000)
progress(0.6, desc="Translating...")
inputs = self.processor(audios=audio, return_tensors="pt")
audio_array = self.model.generate(**inputs, tgt_lang=self.languages[tgt_lang])[0].cpu().numpy().squeeze()
progress(1.0, desc="Done!")
return (self.sample_rate, audio_array), "โœ… Audio translation completed"
except Exception as e:
raise gr.Error(f"โŒ Translation failed: {str(e)}")
css = """
.gradio-container {
max-width: 1200px !important;
margin: auto !important;
}
.main-header {
text-align: center;
margin-bottom: 2rem;
padding: 2rem;
background: linear-gradient(135deg, #1e40af, #3b82f6);
border-radius: 12px;
color: white;
}
.main-title {
font-size: 2.5rem;
font-weight: bold;
margin-bottom: 0.5rem;
}
.main-subtitle {
font-size: 1.2rem;
opacity: 0.9;
}
.container {
padding: 1.5rem;
border-radius: 12px;
background: white;
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
margin-bottom: 1.5rem;
}
.status-box {
padding: 1rem;
border-radius: 8px;
background: #f0f9ff;
border-left: 4px solid #3b82f6;
margin-top: 1rem;
}
.footer {
text-align: center;
margin-top: 2rem;
padding: 1rem;
color: #666;
}
"""
def create_ui():
translator = SeamlessTranslator()
with gr.Blocks(css=css, title="A.R.I.S. Translator") as demo:
gr.HTML(
"""
<div class="main-header">
<div class="main-title">A.R.I.S. Translator</div>
<div class="main-subtitle">Advanced Real-time Interpretation System</div>
</div>
"""
)
with gr.Tabs():
# Text Translation Tab
with gr.Tab("๐Ÿ”ค Text Translation"):
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Text to Translate",
placeholder="Enter your text here...",
lines=5
)
with gr.Row():
src_lang = gr.Dropdown(
choices=list(translator.languages.keys()),
value="๐Ÿ‡บ๐Ÿ‡ธ English",
label="Source Language"
)
tgt_lang = gr.Dropdown(
choices=list(translator.languages.keys()),
value="๐Ÿ‡ช๐Ÿ‡ธ Spanish",
label="Target Language"
)
translate_btn = gr.Button("๐Ÿ”„ Translate", variant="primary")
status_text = gr.Textbox(
label="Status",
interactive=False
)
with gr.Column():
audio_output = gr.Audio(
label="Translation Output",
type="numpy"
)
# Audio Translation Tab
with gr.Tab("๐ŸŽค Audio Translation"):
with gr.Row():
with gr.Column():
audio_input = gr.Audio(
label="Upload Audio",
type="filepath"
)
tgt_lang_audio = gr.Dropdown(
choices=list(translator.languages.keys()),
value="๐Ÿ‡บ๐Ÿ‡ธ English",
label="Target Language"
)
translate_audio_btn = gr.Button("๐Ÿ”„ Translate Audio", variant="primary")
status_text_audio = gr.Textbox(
label="Status",
interactive=False
)
with gr.Column():
audio_output_from_audio = gr.Audio(
label="Translation Output",
type="numpy"
)
gr.HTML(
"""
<div class="footer">
Powered by Meta's SeamlessM4T model | Built with Gradio
</div>
"""
)
# Event handlers
translate_btn.click(
fn=translator.translate_text,
inputs=[text_input, src_lang, tgt_lang],
outputs=[audio_output, status_text]
)
translate_audio_btn.click(
fn=translator.translate_audio,
inputs=[audio_input, tgt_lang_audio],
outputs=[audio_output_from_audio, status_text_audio]
)
return demo
if __name__ == "__main__":
demo = create_ui()
demo.queue()
demo.launch()