mms-1b-berber / app.py
TifinLab's picture
Update app.py
bff529d verified
raw
history blame contribute delete
No virus
1.63 kB
import gradio as gr
from transformers import Wav2Vec2ForCTC, AutoProcessor
import torch
import librosa
import json
with open('ISO_codes.json', 'r') as file:
iso_codes = json.load(file)
model_id = "TifinLab/mms-1b-berber"
processor = AutoProcessor.from_pretrained(model_id)
model = Wav2Vec2ForCTC.from_pretrained(model_id)
def transcribe(audio_file_mic=None, audio_file_upload=None):
if audio_file_mic:
audio_file = audio_file_mic
elif audio_file_upload:
audio_file = audio_file_upload
else:
return "Please upload an audio file or record one"
# Make sure audio is 16kHz
speech, sample_rate = librosa.load(audio_file)
if sample_rate != 16000:
speech = librosa.resample(speech, orig_sr=sample_rate, target_sr=16000)
processor.tokenizer.set_target_lang("ber")
model.load_adapter("ber")
inputs = processor(speech, sampling_rate=16_000, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs).logits
ids = torch.argmax(outputs, dim=-1)[0]
transcription = processor.decode(ids)
return transcription
examples = [["kab_1.mp3", None, "Amazigh (kab)"],
["kab_2.mp3", None, "Amazigh (kab)"]]
description = ''
iface = gr.Interface(fn=transcribe,
inputs=[
gr.Audio(type="filepath", label="Enregistrez ou téléchargez votre réponse audio ici")
],
outputs=gr.Textbox(label="Transcription"),
examples=examples,
description=description
)
iface.launch()