Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
import numpy as np | |
import librosa | |
from punctuators.models import PunctCapSegModelONNX | |
transcriber = pipeline("automatic-speech-recognition", model="Oysiyl/w2v-bert-2.0-dutch-colab-CV16.0") | |
punct_cap_model = PunctCapSegModelONNX.from_pretrained("1-800-BAD-CODE/xlm-roberta_punctuation_fullstop_truecase") | |
def transcribe(audio): | |
sr, y = audio | |
y = y.astype(np.float32) | |
y /= np.max(np.abs(y)) | |
if sr != 16000: | |
y = librosa.resample(y, orig_sr=sr, target_sr=16000) | |
transcribed_text = transcriber({"sampling_rate": 16000, "raw": y})["text"] | |
punct_cap_text = punct_cap_model.infer(texts=[transcribed_text], apply_sbd=True)[0][0] | |
return punct_cap_text | |
demo = gr.Interface( | |
transcribe, | |
gr.Audio(sources=["upload", "microphone"]), | |
outputs="text", | |
title="Automatic Speech Recognition for Dutch language demo", | |
description="Click on the example below, upload audio from file or say something in microphone!", | |
examples=[["examples/example1.wav"], ["examples/example2.wav"]], | |
cache_examples=True | |
) | |
demo.launch() |