import pandas as pd from typing import List from os.path import join as opj import json import logging from config import DatasetHelper, ModelHelper, LOCAL_RESULTS_DIR logger = logging.getLogger(__name__) def load_language_results( model_id: str, dataset_id: str, lang_ids: List[str], contrast_string: str ): lang_gaps = dict() for lang in lang_ids: try: with open( opj( LOCAL_RESULTS_DIR, "evaluation", dataset_id, f"results_{model_id}_{dataset_id}_devtest_{lang}_gender_{contrast_string}.json", ) ) as fp: data = json.load(fp) lang_gaps[lang] = data[f"{data['eval_metric']}_diff_mean"] except FileNotFoundError: logger.debug( f"We could not find the result file for : {model_id}, {dataset_id}, {lang}" ) lang_gaps[lang] = None return lang_gaps def read_all_configs(contrast_type: str): dataset_h = DatasetHelper() model_h = ModelHelper() rows = list() for dataset_config in dataset_h.dataset_configs: for model_id in model_h.sanitized_model_ids: contrast_info = dataset_config.group_contrasts[contrast_type] contrast_string = ( f"{contrast_info['majority_group']}_{contrast_info['minority_group']}" ) lang_gaps = load_language_results( model_id, dataset_config.sanitized_id(), dataset_config.langs, contrast_string, ) rows.extend( [ { "Model": model_id, "Dataset": dataset_config.sanitized_id(), "Language": lang, "Type": dataset_config.speaking_condition.capitalize(), "Gap": lang_gaps[lang], } for lang in lang_gaps ] ) results_df = pd.DataFrame(rows) return results_df