Spaces:
Build error
Build error
import gradio as gr | |
import librosa | |
from transformers import AutoFeatureExtractor, AutoTokenizer, SpeechEncoderDecoderModel | |
feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15", use_auth_token="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") | |
tokenizer = AutoTokenizer.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15", use_auth_token="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", use_fast=False) | |
model = SpeechEncoderDecoderModel.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15", use_auth_token="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") | |
def process_audio_file(file): | |
data, sr = librosa.load(file) | |
if sr != 16000: | |
data = librosa.resample(data, sr, 16000) | |
print(data.shape) | |
input_values = feature_extractor(data, return_tensors="pt").input_values | |
return input_values | |
def transcribe(file): | |
input_values = process_audio_file(file) | |
sequences = model.generate(input_values) | |
transcription = tokenizer.batch_decode(sequences, skip_special_tokens=True) | |
return transcription[0] | |
iface = gr.Interface( | |
fn=transcribe, | |
inputs=gr.inputs.Audio(source="microphone", type='filepath'), | |
outputs="text", | |
) | |
iface.launch() |