import pandas as pd from typing import List from os.path import join as opj import json from config import dataset2info, model2info, LOCAL_RESULTS_DIR import logging logger = logging.getLogger(__name__) def load_language_results( model_id: str, dataset_id: str, lang_ids: List[str], setup: str ): lang_gaps = dict() for lang in lang_ids: try: with open( opj( LOCAL_RESULTS_DIR, "evaluation", dataset_id, f"results_{model_id}_{dataset_id}_devtest_{lang}_gender_{setup}.json", ) ) as fp: data = json.load(fp) lang_gaps[lang] = data[f"{data['eval_metric']}_diff_mean"] except FileNotFoundError: logger.debug( f"We could not find the result file for : {model_id}, {dataset_id}, {lang}" ) lang_gaps[lang] = None return lang_gaps def read_all_configs(setup: str): all_datasets = dataset2info.keys() print("Parsing results datasets:", all_datasets) all_models = model2info.keys() print("Parsing results models:", all_models) rows = list() for dataset_id in all_datasets: for model_id in all_models: lang_gaps = load_language_results( model_id, dataset_id, dataset2info[dataset_id].langs, setup ) rows.extend( [ { "Model": model_id, "Dataset": dataset_id, "Language": lang, "Gap": lang_gaps[lang], } for lang in lang_gaps ] ) results_df = pd.DataFrame(rows) # results_df = results_df.drop(columns=["Dataset"]) # results_df = results_df.sort_values(by="Mean Gap", ascending=True) return results_df def get_common_langs(): """Return a list of langs that are support by all models""" common_langs = set(model2info[list(model2info.keys())[0]].langs) for model_id in model2info.keys(): common_langs = common_langs.intersection(model2info[model_id].langs) return list(common_langs)