import torch
from transformers import pipeline

model_id = "ummonk/distilhubert-finetuned-accents"
pipe = pipeline("audio-classification", model=model_id)

def classify_audio(filepath):
    preds = pipe(filepath)
    outputs = {}
    for p in preds:
        outputs[p["label"]] = p["score"]
    return outputs

import gradio as gr

description = '''
Record the following text and submit to obtain best guesses for your accent region:

> Please call Stella. Ask her to bring these things with her from the store: Six spoons of fresh snow peas, five thick slabs of blue cheese, and maybe a snack for her brother Bob. We also need a small plastic snake and a big toy frog for the kids. She can scoop these things into three red bags, and we will go meet her Wednesday at the train station.

Note that it's only designed for Anglosphere accents (North American, British, and Australian / New Zealand). The top listed guess matches the birthplace region only ~30% of the time, but the model is rather reliable at guessing whether the accent is North American or British.
'''

demo = gr.Interface(
    fn=classify_audio,
    inputs=gr.Audio(sources="microphone", type="filepath"),
    outputs=gr.Label(),
    title="Accent Guesser",
    description=description
)

demo.launch(debug=True)