Spaces:
Sleeping
Sleeping
Zekun Wu
commited on
Commit
•
f921051
1
Parent(s):
ae16dbc
update
Browse files- util/evaluation.py +36 -37
util/evaluation.py
CHANGED
@@ -8,7 +8,6 @@ from scipy.spatial.distance import jensenshannon
|
|
8 |
from scipy.stats import ttest_ind, friedmanchisquare, rankdata, ttest_rel
|
9 |
from statsmodels.stats.multicomp import pairwise_tukeyhsd
|
10 |
from scipy.stats import ttest_1samp
|
11 |
-
from scikit_posthocs import posthoc_nemenyi
|
12 |
|
13 |
# def bootstrap_t_test(data1, data2, num_bootstrap=1000):
|
14 |
# """Perform a bootstrapped t-test."""
|
@@ -49,33 +48,30 @@ from scikit_posthocs import posthoc_nemenyi
|
|
49 |
# p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
|
50 |
# return observed_t_stat, p_value
|
51 |
|
52 |
-
|
53 |
-
|
54 |
-
# ranked_data = data[[v + rank_suffix for v in variables]].to_numpy()
|
55 |
-
# num_subjects = ranked_data.shape[0]
|
56 |
-
# num_conditions = ranked_data.shape[1]
|
57 |
-
# comparisons = []
|
58 |
-
#
|
59 |
-
# for i in range(num_conditions):
|
60 |
-
# for j in range(i + 1, num_conditions):
|
61 |
-
# diff = ranked_data[:, i] - ranked_data[:, j]
|
62 |
-
# abs_diff = np.abs(diff)
|
63 |
-
# avg_diff = np.mean(diff)
|
64 |
-
# se_diff = np.std(diff, ddof=1) / np.sqrt(num_subjects)
|
65 |
-
# z_value = avg_diff / se_diff
|
66 |
-
# p_value = 2 * (1 - stats.norm.cdf(np.abs(z_value)))
|
67 |
-
# comparisons.append({
|
68 |
-
# "Group1": variables[i],
|
69 |
-
# "Group2": variables[j],
|
70 |
-
# "Z": z_value,
|
71 |
-
# "p-value": p_value
|
72 |
-
# })
|
73 |
-
#
|
74 |
-
# return comparisons
|
75 |
-
def posthoc_friedman_nemenyi(data, variables, rank_suffix='_Rank'):
|
76 |
-
"""Perform post-hoc Nemenyi test for the Friedman test."""
|
77 |
ranked_data = data[[v + rank_suffix for v in variables]].to_numpy()
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
def statistical_tests(data):
|
80 |
"""Perform various statistical tests to evaluate potential biases."""
|
81 |
variables = ['Privilege', 'Protect', 'Neutral']
|
@@ -97,25 +93,28 @@ def statistical_tests(data):
|
|
97 |
]
|
98 |
|
99 |
pairwise_results = {
|
100 |
-
'
|
101 |
-
}
|
102 |
-
|
103 |
-
pairwise_results = {
|
104 |
-
'Wilcoxon Signed-Rank Test': {}
|
105 |
}
|
106 |
|
107 |
for (var1, var2) in pairs:
|
108 |
pair_name_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
|
109 |
pair_rank_score = f'{var1}{rank_suffix} vs {var2}{rank_suffix}'
|
110 |
|
111 |
-
# Wilcoxon
|
112 |
-
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
# Friedman test
|
117 |
friedman_stat, friedman_p = friedmanchisquare(*rank_data)
|
118 |
-
posthoc_results =
|
119 |
|
120 |
results = {
|
121 |
"Average Ranks": average_ranks.to_dict(),
|
|
|
8 |
from scipy.stats import ttest_ind, friedmanchisquare, rankdata, ttest_rel
|
9 |
from statsmodels.stats.multicomp import pairwise_tukeyhsd
|
10 |
from scipy.stats import ttest_1samp
|
|
|
11 |
|
12 |
# def bootstrap_t_test(data1, data2, num_bootstrap=1000):
|
13 |
# """Perform a bootstrapped t-test."""
|
|
|
48 |
# p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
|
49 |
# return observed_t_stat, p_value
|
50 |
|
51 |
+
def posthoc_friedman(data, variables, rank_suffix='_Rank'):
|
52 |
+
"""Perform a post-hoc analysis for the Friedman test using pairwise comparisons."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
ranked_data = data[[v + rank_suffix for v in variables]].to_numpy()
|
54 |
+
num_subjects = ranked_data.shape[0]
|
55 |
+
num_conditions = ranked_data.shape[1]
|
56 |
+
comparisons = []
|
57 |
+
|
58 |
+
for i in range(num_conditions):
|
59 |
+
for j in range(i + 1, num_conditions):
|
60 |
+
diff = ranked_data[:, i] - ranked_data[:, j]
|
61 |
+
abs_diff = np.abs(diff)
|
62 |
+
avg_diff = np.mean(diff)
|
63 |
+
se_diff = np.std(diff, ddof=1) / np.sqrt(num_subjects)
|
64 |
+
z_value = avg_diff / se_diff
|
65 |
+
p_value = 2 * (1 - stats.norm.cdf(np.abs(z_value)))
|
66 |
+
comparisons.append({
|
67 |
+
"Group1": variables[i],
|
68 |
+
"Group2": variables[j],
|
69 |
+
"Z": z_value,
|
70 |
+
"p-value": p_value
|
71 |
+
})
|
72 |
+
|
73 |
+
return comparisons
|
74 |
+
|
75 |
def statistical_tests(data):
|
76 |
"""Perform various statistical tests to evaluate potential biases."""
|
77 |
variables = ['Privilege', 'Protect', 'Neutral']
|
|
|
93 |
]
|
94 |
|
95 |
pairwise_results = {
|
96 |
+
'Wilcoxon Test': {}
|
|
|
|
|
|
|
|
|
97 |
}
|
98 |
|
99 |
for (var1, var2) in pairs:
|
100 |
pair_name_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
|
101 |
pair_rank_score = f'{var1}{rank_suffix} vs {var2}{rank_suffix}'
|
102 |
|
103 |
+
# Wilcoxon Signed-Rank Test
|
104 |
+
if len(data) > 20:
|
105 |
+
wilcoxon_stat, wilcoxon_p = wilcoxon(data[f'{var1}{rank_suffix}'], data[f'{var2}{rank_suffix}'])
|
106 |
+
else:
|
107 |
+
wilcoxon_stat, wilcoxon_p = np.nan, "Sample size too small for Wilcoxon test."
|
108 |
+
pairwise_results['Wilcoxon Test'][pair_rank_score] = {"Statistic": wilcoxon_stat, "p-value": wilcoxon_p}
|
109 |
+
|
110 |
+
|
111 |
+
# # Bootstrapped T-test for independent samples
|
112 |
+
# t_stat, t_p = bootstrap_t_test(data[f'{var1}{rank_suffix}'], data[f'{var2}{rank_suffix}'])
|
113 |
+
# pairwise_results['T-Test'][pair_rank_score] = {"Statistic": t_stat, "p-value": t_p}
|
114 |
|
115 |
# Friedman test
|
116 |
friedman_stat, friedman_p = friedmanchisquare(*rank_data)
|
117 |
+
posthoc_results = posthoc_friedman(data, variables, rank_suffix)
|
118 |
|
119 |
results = {
|
120 |
"Average Ranks": average_ranks.to_dict(),
|