Spaces:
Runtime error
Runtime error
from transformers import pipeline | |
from transformers import WhisperForConditionalGeneration, WhisperProcessor, WhisperFeatureExtractor | |
import gradio as gr | |
import librosa | |
# Prepare model for prediction | |
MODEL_SPECS_ID = "dmatekenya/whisper-small_finetuned_sh_chich" | |
MODEL_SPECS_BASE_ID = "openai/whisper-small" | |
MODEL_SPECS_BASE_LAN_SW = "swahili" | |
MODEL_SPECS_BASE_LAN_SH = "shona" | |
FEATURE_EXTRACTOR = WhisperFeatureExtractor.from_pretrained(MODEL_SPECS_ID) | |
PROCESSOR_SH = WhisperProcessor.from_pretrained(MODEL_SPECS_BASE_ID, | |
language=MODEL_SPECS_BASE_LAN_SH, task="transcribe") | |
MODEL = WhisperForConditionalGeneration.from_pretrained(MODEL_SPECS_ID) | |
def transcribe(audio_file): | |
y, sr = librosa.load(audio_file, sr=16000) | |
input_features = PROCESSOR_SH(y, return_tensors="pt", sampling_rate=sr).input_features | |
generated_ids = MODEL.generate(inputs=input_features) | |
transcription = PROCESSOR_SH.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
return transcription | |
def transcribe_audio(mic=None, file=None): | |
if mic is not None: | |
audio = mic | |
elif file is not None: | |
audio = file | |
else: | |
return "You must either provide a mic recording or a file" | |
transcription = transcribe(audio_file=audio) | |
return transcription | |
title = "Transcribe Chichewa Audio" | |
description = """ | |
<img src="https://i.ibb.co/5nQdGSs/logo.png"> | |
IN THIS DEMO, TEST THE FIRST AUTOMATED SPEECH RECOGNITION (ASR) MODEL FOR CHICHEWA BY TRANSCRIBING YOUR CHICHEWA VOICE NOTES. | |
FOR AUDIO FILES, PLEASE UPLOAD SHORT VOICE NOTES ONLY (NO LONGER THAN 30 SEC). | |
""" | |
article = "Read more about the [ChichewaSpeech2Text](https://dmatekenya.github.io/Chichewa-Speech2Text/README.html) project \ | |
and make sure to sign-up for our first [voice note donation event](https://forms.gle/fHLESutofVvb2YFM9) on July 22. \ | |
You stand a chance to win Airtel or TNM units if you choose to participate in the raffle after the event" | |
gr.Interface( | |
fn=transcribe_audio, | |
theme='grass', | |
title=title, | |
description=description, | |
article=article, | |
inputs=[ | |
gr.Audio(source="microphone", type="filepath", optional=True), | |
gr.Audio(source="upload", type="filepath", optional=True), | |
], | |
outputs="text", | |
).launch() |