import gradio as gr import torch import torchaudio import numpy as np from transformers import AutoProcessor, SeamlessM4Tv2Model from datetime import datetime class SeamlessTranslator: def __init__(self): self.model_name = "facebook/seamless-m4t-v2-large" print("Loading model...") self.processor = AutoProcessor.from_pretrained(self.model_name) self.model = SeamlessM4Tv2Model.from_pretrained(self.model_name) self.sample_rate = self.model.config.sampling_rate self.languages = { "🇺🇸 English": "eng", "🇪🇸 Spanish": "spa", "🇫🇷 French": "fra", "🇩🇪 German": "deu", "🇮🇹 Italian": "ita", "🇵🇹 Portuguese": "por", "🇷🇺 Russian": "rus", "🇨🇳 Chinese": "cmn", "🇯🇵 Japanese": "jpn", "🇰🇷 Korean": "kor" } def translate_text(self, text, src_lang, tgt_lang, progress=gr.Progress()): progress(0.3, desc="Processing input...") try: inputs = self.processor(text=text, src_lang=self.languages[src_lang], return_tensors="pt") progress(0.6, desc="Generating audio...") audio_array = self.model.generate(**inputs, tgt_lang=self.languages[tgt_lang])[0].cpu().numpy().squeeze() progress(1.0, desc="Done!") return (self.sample_rate, audio_array), f"✅ Translation completed: {src_lang} → {tgt_lang}" except Exception as e: raise gr.Error(f"❌ Translation failed: {str(e)}") def translate_audio(self, audio_path, tgt_lang, progress=gr.Progress()): if audio_path is None: raise gr.Error("❌ Please upload an audio file") progress(0.3, desc="Loading audio...") try: audio, orig_freq = torchaudio.load(audio_path) audio = torchaudio.functional.resample(audio, orig_freq=orig_freq, new_freq=16000) progress(0.6, desc="Translating...") inputs = self.processor(audios=audio, return_tensors="pt") audio_array = self.model.generate(**inputs, tgt_lang=self.languages[tgt_lang])[0].cpu().numpy().squeeze() progress(1.0, desc="Done!") return (self.sample_rate, audio_array), "✅ Audio translation completed" except Exception as e: raise gr.Error(f"❌ Translation failed: {str(e)}") css = """ .gradio-container { max-width: 1200px !important; margin: auto !important; } .main-header { text-align: center; margin-bottom: 2rem; padding: 2rem; background: linear-gradient(135deg, #1e40af, #3b82f6); border-radius: 12px; color: white; } .main-title { font-size: 2.5rem; font-weight: bold; margin-bottom: 0.5rem; } .main-subtitle { font-size: 1.2rem; opacity: 0.9; } .container { padding: 1.5rem; border-radius: 12px; background: white; box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1); margin-bottom: 1.5rem; } .status-box { padding: 1rem; border-radius: 8px; background: #f0f9ff; border-left: 4px solid #3b82f6; margin-top: 1rem; } .footer { text-align: center; margin-top: 2rem; padding: 1rem; color: #666; } """ def create_ui(): translator = SeamlessTranslator() with gr.Blocks(css=css, title="A.R.I.S. Translator") as demo: gr.HTML( """
A.R.I.S. Translator
Advanced Real-time Interpretation System
""" ) with gr.Tabs(): # Text Translation Tab with gr.Tab("🔤 Text Translation"): with gr.Row(): with gr.Column(): text_input = gr.Textbox( label="Text to Translate", placeholder="Enter your text here...", lines=5 ) with gr.Row(): src_lang = gr.Dropdown( choices=list(translator.languages.keys()), value="🇺🇸 English", label="Source Language" ) tgt_lang = gr.Dropdown( choices=list(translator.languages.keys()), value="🇪🇸 Spanish", label="Target Language" ) translate_btn = gr.Button("🔄 Translate", variant="primary") status_text = gr.Textbox( label="Status", interactive=False ) with gr.Column(): audio_output = gr.Audio( label="Translation Output", type="numpy" ) # Audio Translation Tab with gr.Tab("🎤 Audio Translation"): with gr.Row(): with gr.Column(): audio_input = gr.Audio( label="Upload Audio", type="filepath" ) tgt_lang_audio = gr.Dropdown( choices=list(translator.languages.keys()), value="🇺🇸 English", label="Target Language" ) translate_audio_btn = gr.Button("🔄 Translate Audio", variant="primary") status_text_audio = gr.Textbox( label="Status", interactive=False ) with gr.Column(): audio_output_from_audio = gr.Audio( label="Translation Output", type="numpy" ) gr.HTML( """ """ ) # Event handlers translate_btn.click( fn=translator.translate_text, inputs=[text_input, src_lang, tgt_lang], outputs=[audio_output, status_text] ) translate_audio_btn.click( fn=translator.translate_audio, inputs=[audio_input, tgt_lang_audio], outputs=[audio_output_from_audio, status_text_audio] ) return demo if __name__ == "__main__": demo = create_ui() demo.queue() demo.launch()