#!/usr/bin/env python3 from huggingface_hub import HfApi, hf_hub_download from huggingface_hub.repocard import metadata_load import pandas as pd METRICS_TO_NOT_DISPLAY = set(["ser"]) NO_LANGUAGE_MODELS = [] api = HfApi() models = api.list_models(filter="robust-speech-event") model_ids = [x.modelId for x in models] metadatas = {} for model_id in model_ids: readme_path = hf_hub_download(model_id, filename="README.md") metadatas[model_id] = metadata_load(readme_path) all_model_results = {} # model_id # - dataset # - metric model_language_map = {} # model_id: lang for model_id, metadata in metadatas.items(): if "language" not in metadata: NO_LANGUAGE_MODELS.append(model_id) continue lang = metadata["language"] model_language_map[model_id] = lang if isinstance(lang, list) else [lang] if "model-index" not in metadata: all_model_results[model_id] = None else: result_dict = {} for result in metadata["model-index"][0]["results"]: dataset = result["dataset"]["type"] metrics = [x["type"] for x in result["metrics"]] values = [x["value"] if "value" in x else None for x in result["metrics"]] result_dict[dataset] = {k: v for k, v in zip(metrics, values)} all_model_results[model_id] = result_dict # get all datasets all_datasets = set(sum([list(x.keys()) for x in all_model_results.values() if x is not None], [])) all_langs = set(sum(list(model_language_map.values()), [])) # get all metrics all_metrics = [] for metric_result in all_model_results.values(): if metric_result is not None: all_metrics += sum([list(x.keys()) for x in metric_result.values()], []) all_metrics = set(all_metrics) - METRICS_TO_NOT_DISPLAY # get results table (one table for each dataset, metric) all_datasets_results = {} pandas_datasets = {} for dataset in all_datasets: all_datasets_results[dataset] = {} pandas_datasets[dataset] = {} for metric in all_metrics: all_datasets_results[dataset][metric] = {} pandas_datasets[dataset][metric] = {} for lang in all_langs: all_datasets_results[dataset][metric][lang] = {} results = {} for model_id, model_result in all_model_results.items(): is_relevant = lang in model_language_map[model_id] and model_result is not None and dataset in model_result and metric in model_result[dataset] if not is_relevant: continue result = model_result[dataset][metric] if isinstance(result, str): "".join(result.split("%")) try: result = float(result) except: result = None elif isinstance(result, float) and result < 1.0: # assuming that WER is given in 0.13 format result = 100 * result results[model_id] = round(result, 2) if result is not None else None results = dict(sorted(results.items(), key=lambda item: (item[1] is None, item[1]))) all_datasets_results[dataset][metric][lang] = [f"{k}: {v}" for k, v in results.items()] data = all_datasets_results[dataset][metric] data_frame = pd.DataFrame.from_dict(data, orient="index") data_frame.fillna("", inplace=True) pandas_datasets[dataset][metric] = data_frame