Spaces:
Sleeping
Sleeping
import pandas as pd | |
from typing import List | |
from os.path import join as opj | |
import json | |
from config import dataset2info, model2info, LOCAL_RESULTS_DIR | |
import logging | |
logger = logging.getLogger(__name__) | |
def load_language_results( | |
model_id: str, dataset_id: str, lang_ids: List[str], setup: str | |
): | |
lang_gaps = dict() | |
for lang in lang_ids: | |
try: | |
with open( | |
opj( | |
LOCAL_RESULTS_DIR, | |
"evaluation", | |
dataset_id, | |
f"results_{model_id}_{dataset_id}_devtest_{lang}_gender_{setup}.json", | |
) | |
) as fp: | |
data = json.load(fp) | |
lang_gaps[lang] = data[f"{data['eval_metric']}_diff_mean"] | |
except FileNotFoundError: | |
logger.debug( | |
f"We could not find the result file for <model,dataset,lang>: {model_id}, {dataset_id}, {lang}" | |
) | |
lang_gaps[lang] = None | |
return lang_gaps | |
def read_all_configs(setup: str): | |
all_datasets = dataset2info.keys() | |
print("Parsing results datasets:", all_datasets) | |
all_models = model2info.keys() | |
print("Parsing results models:", all_models) | |
rows = list() | |
for dataset_id in all_datasets: | |
for model_id in all_models: | |
lang_gaps = load_language_results( | |
model_id, dataset_id, dataset2info[dataset_id].langs, setup | |
) | |
rows.extend( | |
[ | |
{ | |
"Model": model_id, | |
"Dataset": dataset_id, | |
"Language": lang, | |
"Gap": lang_gaps[lang], | |
} | |
for lang in lang_gaps | |
] | |
) | |
results_df = pd.DataFrame(rows) | |
# results_df = results_df.drop(columns=["Dataset"]) | |
# results_df = results_df.sort_values(by="Mean Gap", ascending=True) | |
return results_df | |
def get_common_langs(): | |
"""Return a list of langs that are support by all models""" | |
common_langs = set(model2info[list(model2info.keys())[0]].langs) | |
for model_id in model2info.keys(): | |
common_langs = common_langs.intersection(model2info[model_id].langs) | |
return list(common_langs) | |