import gradio as gr LAST_UPDATED = "Nov 25th 2024" #################################### # Datos estáticos del leaderboard #################################### leaderboard_data = [ {'name': 'StyleTTS 2', 'STOI': 0.998, 'PESQ': 3.921, 'WER': 0.162, 'UTMOS': 3.47}, {'name': 'Matxa-TTS', 'STOI': 0.996, 'PESQ': 3.539, 'WER': 0.179, 'UTMOS': 3.50}, {'name': 'Matxa-TTS-multiaccent', 'STOI': 0.996, 'PESQ': 3.415, 'WER': 0.242, 'UTMOS': 2.98}, {'name': 'StableTTS', 'STOI': 0.997, 'PESQ': 3.643, 'WER': 0.164, 'UTMOS': 2.62}, ] # Texto para la pestaña de métricas METRICS_TAB_TEXT = """ ## Metrics Models in the leaderboard are evaluated using several key metrics: * **UTMOS** (UTokyo-SaruLab Mean Opinion Score), * **WER** (Word Error Rate), * **STOI** (Short-Time Objective Intelligibility), * **PESQ** (Perceptual Evaluation of Speech Quality). These metrics help evaluate both the accuracy and quality of the model. ### UTMOS (UTokyo-SaruLab Mean Opinion Score)[[Paper](https://arxiv.org/abs/2204.02152)] UTMOS is a MOS prediction system. **A higher UTMOS indicates better quality** of the generated voice. ### WER (Word Error Rate) WER is a common metric for evaluating speech recognition systems. It measures the percentage of words in the generated transcript that differ from the reference (correct) transcript. **A lower WER value indicates higher accuracy**. Example: | Reference | the | cat | sat | on | the | mat | |-------------|------|-----|---------|-----|------|-----| | Prediction | the | cat | **sit** | on | the | | | Label | ✅ | ✅ | S | ✅ | ✅ | D | The WER calculation is done as follows: ``` WER = (S + I + D) / N = (1 + 0 + 1) / 6 = 0.333 ``` ### STOI (Short-Time Objective Intelligibility)[[Paper](https://ieeexplore.ieee.org/abstract/document/5495701?casa_token=PLtqLc8KNAgAAAAA:FOLuZ4dgMYsnGb1dQHgqVOouQzRJ3vA5yqj-sbwf8gs9Q-AIDCLkMZzAgzRrAogwwxULK9zsYeE)] STOI measures the intelligibility of the synthesized speech signal compared to the original signal. **A higher STOI indicates better intelligibility**. ### PESQ (Perceptual Evaluation of Speech Quality)[[Paper](https://ieeexplore.ieee.org/abstract/document/941023?casa_token=jdtHy84_KhQAAAAA:qHN3WbT6cNdufj6OOn_fn0Je0RedMv-WJCmhQ_3CWy4nMTuDvFMF3KstAmKqLx5suQwdPgGByoY)] PESQ is a perceptual metric that evaluates the quality of speech in a similar manner to how a human listener would. **A higher PESQ indicates better voice quality**. ## Benchmark Datasets Model performance is evaluated using [our test datasets](https://huggingface.co/spaces/rjzevallos/test_app/blob/main/bsc.txt). These datasets cover a variety of domains and acoustic conditions, ensuring a robust evaluation. """ #################################### # Functions (static version) #################################### def get_leaderboard(): """ Retorna el leaderboard en orden descendente por PESQ y luego por UTMOS. """ # Ordenar primero por PESQ (calidad del habla) y luego por UTMOS (calidad percibida) sorted_leaderboard = sorted(leaderboard_data, key=lambda x: (x['UTMOS']), reverse=True) # Asignar el rank basado en el orden por PESQ for rank, model in enumerate(sorted_leaderboard): model['rank'] = rank + 1 # rank es la posición en la lista (1-indexed) return [[model['rank'], model['name'], model['UTMOS'], model['WER'], model['STOI'], model['PESQ']] for model in sorted_leaderboard] #################################### # Interfaz con Gradio #################################### theme = gr.themes.Base( font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'], ) with gr.Blocks(theme=theme) as demo: gr.Markdown("# 🏆 Leaderboard\nVote to help the community determine the best Catalan TTS models.\n") with gr.Blocks(theme=theme) as demo: gr.Markdown("# 🏆 Leaderboard\nVote to help the community determine the best Catalan TTS models.\n") with gr.Tabs(elem_classes="tab-buttons") as tabs: with gr.TabItem("🏅 Leaderboard", elem_id="od-benchmark-tab-table", id=0): leaderboard_table = gr.DataFrame( headers=["Rank", "Model", "UTMOS", "WER", "STOI", "PESQ"], datatype=["str", "str", "str", "str", "str", "str"], value=get_leaderboard() # Carga los datos iniciales de la tabla ) with gr.TabItem("📈 Metrics", elem_id="od-benchmark-tab-table", id=1): gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text") gr.Markdown(f"Last updated on **{LAST_UPDATED}**", elem_classes="markdown-text") # Lanzar la aplicación demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False)