#!/usr/bin/env python3 import requests from huggingface_hub import HfApi, hf_hub_download from huggingface_hub.repocard import metadata_load import pandas as pd import streamlit as st METRICS_TO_NOT_DISPLAY = set(["ser"]) NO_LANGUAGE_MODELS = [] def get_model_ids(): api = HfApi() models = api.list_models(filter="robust-speech-event") model_ids = [x.modelId for x in models] return model_ids def get_metadatas(model_ids): metadatas = {} for model_id in model_ids: try: readme_path = hf_hub_download(model_id, filename="README.md") metadatas[model_id] = metadata_load(readme_path) except requests.exceptions.HTTPError: # 404 README.md not found metadatas[model_id] = None return metadatas def get_model_results_and_language_map(metadatas): all_model_results = {} # model_id # - dataset # - metric model_language_map = {} # model_id: lang for model_id, metadata in metadatas.items(): if metadata is None or "language" not in metadata: NO_LANGUAGE_MODELS.append(model_id) continue lang = metadata["language"] model_language_map[model_id] = lang if isinstance(lang, list) else [lang] if "model-index" not in metadata: all_model_results[model_id] = None else: result_dict = {} for result in metadata["model-index"][0]["results"]: if "dataset" not in result or "metrics" not in result: continue dataset = result["dataset"]["type"] metrics = [x["type"] for x in result["metrics"]] values = [ x["value"] if "value" in x else None for x in result["metrics"] ] result_dict[dataset] = {k: v for k, v in zip(metrics, values)} all_model_results[model_id] = result_dict return all_model_results, model_language_map def get_datasets_metrics_langs(all_model_results, model_language_map): # get all datasets all_datasets = set( sum([list(x.keys()) for x in all_model_results.values() if x is not None], []) ) all_langs = set(sum(list(model_language_map.values()), [])) # get all metrics all_metrics = [] for metric_result in all_model_results.values(): if metric_result is not None: all_metrics += sum([list(x.keys()) for x in metric_result.values()], []) all_metrics = set(all_metrics) - METRICS_TO_NOT_DISPLAY return all_datasets, all_langs, all_metrics # get results table (one table for each dataset, metric) def retrieve_dataframes( all_model_results, model_language_map, all_datasets, all_langs, all_metrics ): all_datasets_results = {} pandas_datasets = {} for dataset in all_datasets: all_datasets_results[dataset] = {} pandas_datasets[dataset] = {} for metric in all_metrics: all_datasets_results[dataset][metric] = {} pandas_datasets[dataset][metric] = {} for lang in all_langs: all_datasets_results[dataset][metric][lang] = {} results = {} for model_id, model_result in all_model_results.items(): is_relevant = ( lang in model_language_map[model_id] and model_result is not None and dataset in model_result and metric in model_result[dataset] ) if not is_relevant: continue result = model_result[dataset][metric] if isinstance(result, str): "".join(result.split("%")) try: result = float(result) except: # noqa: E722 result = None elif isinstance(result, float) and result < 1.0: # assuming that WER is given in 0.13 format result = 100 * result elif isinstance(result, list): if len(result) > 0: result = result[0] else: result = None results[model_id] = round(result, 2) if result is not None else None results = dict( sorted(results.items(), key=lambda item: (item[1] is None, item[1])) ) all_datasets_results[dataset][metric][lang] = [ f"{v} : {k}" for k, v in results.items() ] data = all_datasets_results[dataset][metric] data_frame = pd.DataFrame.from_dict(data, orient="index") data_frame.fillna("", inplace=True) data_frame = data_frame.sort_index() data_frame.columns = data_frame.columns + 1 pandas_datasets[dataset][metric] = data_frame return pandas_datasets @st.cache(persist=True) def main(): # 0. Get model ids model_ids = get_model_ids() # 1. Retrieve metadatas metadatas = get_metadatas(model_ids) # 2. Parse to results all_model_results, model_language_map = get_model_results_and_language_map(metadatas) # 3. Get datasets and langs all_datasets, all_langs, all_metrics = get_datasets_metrics_langs( all_model_results, model_language_map ) # 4. Get dataframes all_dataframes = retrieve_dataframes( all_model_results, model_language_map, all_datasets, all_langs, all_metrics ) return all_dataframes, all_datasets, all_metrics all_dataframes, all_datasets, all_metrics = main() datasets_select = sorted(list(all_datasets)) metric_select = sorted(list(all_metrics)) dataset = st.selectbox( 'Dataset', datasets_select, index=1, ) metric = st.selectbox( 'Metric', metric_select, index=1, ) st.dataframe(all_dataframes[dataset][metric], width=600, height=1200)