fair-asr-leaderboard / parsing.py
g8a9's picture
enhance data processing and visualization: add support for common languages and improve handling of models with NaN values
86e679c
import pandas as pd
from typing import List
from os.path import join as opj
import json
from config import dataset2info, model2info, LOCAL_RESULTS_DIR
import logging
logger = logging.getLogger(__name__)
def load_language_results(
model_id: str, dataset_id: str, lang_ids: List[str], setup: str
):
lang_gaps = dict()
for lang in lang_ids:
try:
with open(
opj(
LOCAL_RESULTS_DIR,
"evaluation",
dataset_id,
f"results_{model_id}_{dataset_id}_devtest_{lang}_gender_{setup}.json",
)
) as fp:
data = json.load(fp)
lang_gaps[lang] = data[f"{data['eval_metric']}_diff_mean"]
except FileNotFoundError:
logger.debug(
f"We could not find the result file for <model,dataset,lang>: {model_id}, {dataset_id}, {lang}"
)
lang_gaps[lang] = None
return lang_gaps
def read_all_configs(setup: str):
all_datasets = dataset2info.keys()
print("Parsing results datasets:", all_datasets)
all_models = model2info.keys()
print("Parsing results models:", all_models)
rows = list()
for dataset_id in all_datasets:
for model_id in all_models:
lang_gaps = load_language_results(
model_id, dataset_id, dataset2info[dataset_id].langs, setup
)
rows.extend(
[
{
"Model": model_id,
"Dataset": dataset_id,
"Language": lang,
"Gap": lang_gaps[lang],
}
for lang in lang_gaps
]
)
results_df = pd.DataFrame(rows)
# results_df = results_df.drop(columns=["Dataset"])
# results_df = results_df.sort_values(by="Mean Gap", ascending=True)
return results_df
def get_common_langs():
"""Return a list of langs that are support by all models"""
common_langs = set(model2info[list(model2info.keys())[0]].langs)
for model_id in model2info.keys():
common_langs = common_langs.intersection(model2info[model_id].langs)
return list(common_langs)