from pytube import YouTube from transformers import pipeline import gradio as gr import os model = pipeline(model="renesteeman/whisper-base-dutch-25") def get_audio(url): yt = YouTube(url) video = yt.streams.filter(only_audio=True).first() out_file=video.download(output_path=".") base, ext = os.path.splitext(out_file) new_file = base+'.mp3' os.rename(out_file, new_file) a = new_file return a def get_text(url): # result = model.transcribe(get_audio(url)) # return result['text'].strip() text = model(get_audio(url))["text"] return text # For Local usage # print(get_text("https://www.youtube.com/shorts/KvgkcQMCq44")) iface = gr.Interface( fn=get_text, inputs="text", outputs="text", title="Whisper Small Dutch", description="Realtime demo for Dutch speech recognition using a fine-tuned Whisper small model.", ) iface.launch()