import torch import gradio as gr from transformers import pipeline from transformers.pipelines.audio_utils import ffmpeg_read from transcription import fast_transcription, speech_to_text from audio import normalizeAudio, separateVoiceInstrumental, mp3_to_wav, stereo_to_mono, cutaudio, compose_audio from audio import overlay_audios, compose_audio, total_duration, append_wav_files from helpers import guardar_en_archivo def transcribe(audiofile, model): audio_path = audiofile[0].name audio_normalized_path = normalizeAudio(audio_path, ".wav") novocal_path, vocal_path = separateVoiceInstrumental(audio_normalized_path) novocal_path = mp3_to_wav(novocal_path, "novocal") vocal_path = mp3_to_wav(vocal_path, "vocal") out = fast_transcription(vocal_path, model, "es") transcript = "\n".join(out) #Archivo nombre_archivo = guardar_en_archivo(out) return audio_path, audio_normalized_path, vocal_path, novocal_path, transcript, nombre_archivo transcribeI = gr.Interface( fn=transcribe, inputs=[ gr.File(label="Upload Files", file_count="multiple"), gr.Radio(["base", "small", "medium", "large-v2"], label="Models", value="large-v2"), ], outputs=[gr.Audio(type="filepath", label="original"), gr.Audio(type="filepath", label="normalized"), gr.Audio(type="filepath", label="vocal"), gr.Audio(type="filepath", label="no_vocal"), gr.TextArea(label="Transcription"), gr.File(label="Archivo generado") ], theme="huggingface", title="TranscripciĆ³n", description=( "Sound extraction, processing, and dialogue transcription.\n" "Paste a link to a youtube video\n" ), allow_flagging="never", #examples=[[None, "COSER-4004-01-00_5m.wav", "large-v2"]] ) demo = gr.Blocks() with demo: gr.Markdown("# Dubbing") gr.TabbedInterface([diarizationI], ["DiarizaciĆ³n"]) #demo.queue(concurrency_count=1).launch(enable_queue=True, auth=(os.environ['USER'], os.environ['PASSWORD'])) demo.launch(enable_queue=True)