Zekun Wu commited on
Commit
f921051
1 Parent(s): ae16dbc
Files changed (1) hide show
  1. util/evaluation.py +36 -37
util/evaluation.py CHANGED
@@ -8,7 +8,6 @@ from scipy.spatial.distance import jensenshannon
8
  from scipy.stats import ttest_ind, friedmanchisquare, rankdata, ttest_rel
9
  from statsmodels.stats.multicomp import pairwise_tukeyhsd
10
  from scipy.stats import ttest_1samp
11
- from scikit_posthocs import posthoc_nemenyi
12
 
13
  # def bootstrap_t_test(data1, data2, num_bootstrap=1000):
14
  # """Perform a bootstrapped t-test."""
@@ -49,33 +48,30 @@ from scikit_posthocs import posthoc_nemenyi
49
  # p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
50
  # return observed_t_stat, p_value
51
 
52
- # def posthoc_friedman(data, variables, rank_suffix='_Rank'):
53
- # """Perform a post-hoc analysis for the Friedman test using pairwise comparisons."""
54
- # ranked_data = data[[v + rank_suffix for v in variables]].to_numpy()
55
- # num_subjects = ranked_data.shape[0]
56
- # num_conditions = ranked_data.shape[1]
57
- # comparisons = []
58
- #
59
- # for i in range(num_conditions):
60
- # for j in range(i + 1, num_conditions):
61
- # diff = ranked_data[:, i] - ranked_data[:, j]
62
- # abs_diff = np.abs(diff)
63
- # avg_diff = np.mean(diff)
64
- # se_diff = np.std(diff, ddof=1) / np.sqrt(num_subjects)
65
- # z_value = avg_diff / se_diff
66
- # p_value = 2 * (1 - stats.norm.cdf(np.abs(z_value)))
67
- # comparisons.append({
68
- # "Group1": variables[i],
69
- # "Group2": variables[j],
70
- # "Z": z_value,
71
- # "p-value": p_value
72
- # })
73
- #
74
- # return comparisons
75
- def posthoc_friedman_nemenyi(data, variables, rank_suffix='_Rank'):
76
- """Perform post-hoc Nemenyi test for the Friedman test."""
77
  ranked_data = data[[v + rank_suffix for v in variables]].to_numpy()
78
- return posthoc_nemenyi(ranked_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  def statistical_tests(data):
80
  """Perform various statistical tests to evaluate potential biases."""
81
  variables = ['Privilege', 'Protect', 'Neutral']
@@ -97,25 +93,28 @@ def statistical_tests(data):
97
  ]
98
 
99
  pairwise_results = {
100
- 'T-Test': {}
101
- }
102
-
103
- pairwise_results = {
104
- 'Wilcoxon Signed-Rank Test': {}
105
  }
106
 
107
  for (var1, var2) in pairs:
108
  pair_name_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
109
  pair_rank_score = f'{var1}{rank_suffix} vs {var2}{rank_suffix}'
110
 
111
- # Wilcoxon signed-rank test for pairwise comparisons
112
- wilcoxon_stat, wilcoxon_p = wilcoxon(data[f'{var1}{score_suffix}'], data[f'{var2}{score_suffix}'])
113
- pairwise_results['Wilcoxon Signed-Rank Test'][pair_name_score] = {"Statistic": wilcoxon_stat,
114
- "p-value": wilcoxon_p}
 
 
 
 
 
 
 
115
 
116
  # Friedman test
117
  friedman_stat, friedman_p = friedmanchisquare(*rank_data)
118
- posthoc_results = posthoc_friedman_nemenyi(data, variables, rank_suffix)
119
 
120
  results = {
121
  "Average Ranks": average_ranks.to_dict(),
 
8
  from scipy.stats import ttest_ind, friedmanchisquare, rankdata, ttest_rel
9
  from statsmodels.stats.multicomp import pairwise_tukeyhsd
10
  from scipy.stats import ttest_1samp
 
11
 
12
  # def bootstrap_t_test(data1, data2, num_bootstrap=1000):
13
  # """Perform a bootstrapped t-test."""
 
48
  # p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
49
  # return observed_t_stat, p_value
50
 
51
+ def posthoc_friedman(data, variables, rank_suffix='_Rank'):
52
+ """Perform a post-hoc analysis for the Friedman test using pairwise comparisons."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  ranked_data = data[[v + rank_suffix for v in variables]].to_numpy()
54
+ num_subjects = ranked_data.shape[0]
55
+ num_conditions = ranked_data.shape[1]
56
+ comparisons = []
57
+
58
+ for i in range(num_conditions):
59
+ for j in range(i + 1, num_conditions):
60
+ diff = ranked_data[:, i] - ranked_data[:, j]
61
+ abs_diff = np.abs(diff)
62
+ avg_diff = np.mean(diff)
63
+ se_diff = np.std(diff, ddof=1) / np.sqrt(num_subjects)
64
+ z_value = avg_diff / se_diff
65
+ p_value = 2 * (1 - stats.norm.cdf(np.abs(z_value)))
66
+ comparisons.append({
67
+ "Group1": variables[i],
68
+ "Group2": variables[j],
69
+ "Z": z_value,
70
+ "p-value": p_value
71
+ })
72
+
73
+ return comparisons
74
+
75
  def statistical_tests(data):
76
  """Perform various statistical tests to evaluate potential biases."""
77
  variables = ['Privilege', 'Protect', 'Neutral']
 
93
  ]
94
 
95
  pairwise_results = {
96
+ 'Wilcoxon Test': {}
 
 
 
 
97
  }
98
 
99
  for (var1, var2) in pairs:
100
  pair_name_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
101
  pair_rank_score = f'{var1}{rank_suffix} vs {var2}{rank_suffix}'
102
 
103
+ # Wilcoxon Signed-Rank Test
104
+ if len(data) > 20:
105
+ wilcoxon_stat, wilcoxon_p = wilcoxon(data[f'{var1}{rank_suffix}'], data[f'{var2}{rank_suffix}'])
106
+ else:
107
+ wilcoxon_stat, wilcoxon_p = np.nan, "Sample size too small for Wilcoxon test."
108
+ pairwise_results['Wilcoxon Test'][pair_rank_score] = {"Statistic": wilcoxon_stat, "p-value": wilcoxon_p}
109
+
110
+
111
+ # # Bootstrapped T-test for independent samples
112
+ # t_stat, t_p = bootstrap_t_test(data[f'{var1}{rank_suffix}'], data[f'{var2}{rank_suffix}'])
113
+ # pairwise_results['T-Test'][pair_rank_score] = {"Statistic": t_stat, "p-value": t_p}
114
 
115
  # Friedman test
116
  friedman_stat, friedman_p = friedmanchisquare(*rank_data)
117
+ posthoc_results = posthoc_friedman(data, variables, rank_suffix)
118
 
119
  results = {
120
  "Average Ranks": average_ranks.to_dict(),