Spaces:
Running
Running
File size: 5,408 Bytes
92b387d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import pandas as pd
import json
class ResultsProcessor:
def __init__(self, prompt_option, result_file, data_dict):
self.prompt_option = prompt_option
self.result_file = result_file
self.data_dict = data_dict
def get_overall_performance(self):
return round(self.data_dict["Overall performance"]*100, 2)
def get_bias_ratios_df(self):
fairness_results = self.data_dict['Fairness results']
characteristic_list = []
fairness_ratio_list = []
for key, val in fairness_results.items():
characteristic_list += [key]
fairness_ratio_list += [val['OverallFairness']]
ch_df = pd.DataFrame({
'Characteristic': characteristic_list,
'Bias ratio': fairness_ratio_list
}).sort_values(by=['Characteristic'])
return ch_df
def get_global_perturbers_df(self):
global_perturber_families = self.data_dict['Perturber Families']
perf_pert_values = []
normalized_perf_pert_values = []
family_levels = []
family_names_list = []
levels_index_list = []
for item in global_perturber_families:
family_name = item['family name']
family_results = self.data_dict['Performance Robustness']['Perturber family wise results'][family_name]["PerformancePerturbers"]# TODO: change the structuer of post processing here
family_levels += item['levels']
original_perf = family_results[item['levels'][0]]
count = 0
for t_item in item['levels']:
perf_pert_values += [family_results[t_item]]
normalized_perf_pert_values += [family_results[t_item]/original_perf]
family_names_list += [family_name]
levels_index_list += [count]
count += 1
t_pert_df_global = pd.DataFrame({
'Perturbation level': family_levels,
'Performance': perf_pert_values,
'normalized performance': normalized_perf_pert_values,
'Perturbation family': family_names_list,
'Levels' : levels_index_list
})
t_pert_df_global['category'] = 'Overall'
return t_pert_df_global
def get_data_distribution(self, embedder_option):
embedder_perf_ci_table = self.data_dict['Performance results'][embedder_option]['CI_Table']
n_points = self.data_dict['n points']
category_share_of_data = {}
categories_list = []
share_of_data_list = []
n_points_list = []
for key, val in embedder_perf_ci_table.items():
categories_list += [val['category']]
share_of_data_list += [val['Share of Data']]
n_points_list += [int(val['Share of Data']*n_points/100)]
t_df = pd.DataFrame({
'Category': categories_list,
'Share of data': share_of_data_list,
'Number of points': n_points_list
})
return t_df
def get_fairness_confidence_interval_df(self, embedder_option):
embedder_fair_ci_table = self.data_dict['Fairness results'][embedder_option]['CI_Table']
categories_list = []
estimates_list = []
uppers_list = []
lowers_list = []
for key, val in embedder_fair_ci_table.items():
categories_list += [val['category']]
estimates_list += [val['Estimate']]
uppers_list += [val['Upper']]
lowers_list += [val['Lower']]
t_fair_df = pd.DataFrame({
'Category': categories_list,
'Estimate': estimates_list,
'Upper': uppers_list,
'Lower': lowers_list,
'Index': list(range(len(uppers_list)))
})
t_fair_df['Index'] = t_fair_df['Index'].astype(float)
t_fair_df['Diff upper'] = t_fair_df['Upper'] - t_fair_df['Estimate']
t_fair_df['Diff lower'] = t_fair_df['Estimate'] - t_fair_df['Lower']
return t_fair_df
def get_performance_robustness(self, embedder_option):
t_pert_df_global = self.get_global_perturbers_df()
global_perturber_families = self.data_dict['Perturber Families']
t_result = self.data_dict['Performance Robustness']['Embedder wise results'][embedder_option]
merged_dfs_list = []
t_pert_df_global_temps_list = []
family_names_list = []
# Embedder categories
for item in global_perturber_families:
family_name = item['family name']
dfs_list = []
count = 0
for t_item in item['levels']:
df = pd.DataFrame(t_result[t_item])
df['Perturber'] = t_item
df['Perturber family'] = family_name
df['Levels'] = count
dfs_list += [df]
count += 1
merged_df = pd.concat(dfs_list, axis=0)
merged_dfs_list += [merged_df]
family_names_list += [family_name]
t_pert_df_global_temp = t_pert_df_global[t_pert_df_global['Perturbation family'] == family_name].copy(deep=True)
t_pert_df_global_temps_list +=[t_pert_df_global_temp]
return {
'merged_dfs_list' : merged_dfs_list,
't_pert_df_global_temps_list' : t_pert_df_global_temps_list,
'family_names_list' : family_names_list
}
|