import os import gradio as gr import whisper from transformers import AutoTokenizer, AutoModelForSeq2SeqLM from gtts import gTTS import sentencepiece import sounddevice as sd import soundfile as sf import tempfile def translate_voice(audio, target_lang): with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio: temp_filename = temp_audio.name sf.write(temp_filename, audio, 16000) model = whisper.load_model("base").float() audio = whisper.load_audio(temp_filename) audio = whisper.pad_or_trim(audio) mel = whisper.log_mel_spectrogram(audio).to(model.device).float() _, probs = model.detect_language(mel) options = whisper.DecodingOptions(fp16=False) result = whisper.decode(model, mel, options) text = result.text lang = max(probs, key=probs.get) tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100") model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100") tokenizer.src_lang = target_lang encoded_bg = tokenizer(text, return_tensors="pt") generated_tokens = model.generate(**encoded_bg) translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] tts = gTTS(text=translated_text, lang=target_lang) filename = "to_speech.mp3" tts.save(filename) return filename, text, translated_text, target_lang def toggle_record(button): global is_recording if button: button.text = "Stop Recording" is_recording = True else: button.text = "Start Recording" is_recording = False def record_audio(): global is_recording fs = 16000 audio = [] while is_recording: block = sd.rec(int(fs), samplerate=fs, channels=1) audio.append(block) audio = sd.playrec(audio, samplerate=fs, channels=1) sd.wait() return audio.flatten() is_recording = False iface = gr.Interface( fn=translate_voice, inputs=[ gr.inputs.Button(label="Start Recording", type="boolean", toggle=True, default=False, onclick=toggle_record), gr.inputs.Dropdown(choices=['en', 'ru', 'de', 'fr'], label="Target Language") ], outputs=[ gr.outputs.Audio(type="filepath", label="Translated Audio"), gr.outputs.Textbox(label="Original Text"), gr.outputs.Textbox(label="Translated Text"), gr.outputs.Textbox(label="Target Language"), ] ) iface.launch()