Upload mc_auroc.py
Browse files- mc_auroc.py +85 -4
mc_auroc.py
CHANGED
@@ -36,7 +36,7 @@ class AVG_MULTICLASS_AUROC(evaluate.Metric):
|
|
36 |
features=[
|
37 |
datasets.Features(
|
38 |
{
|
39 |
-
"predictions":datasets.Sequence(datasets.Value("float")),
|
40 |
"references": datasets.Value("int8")
|
41 |
}
|
42 |
),
|
@@ -46,6 +46,45 @@ class AVG_MULTICLASS_AUROC(evaluate.Metric):
|
|
46 |
],
|
47 |
)
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
def _compute(self, predictions: Sequence[Sequence[float]], references: Sequence[int]):
|
50 |
"""
|
51 |
Computes the average AUROC score for multi-class classification problems.
|
@@ -55,16 +94,58 @@ class AVG_MULTICLASS_AUROC(evaluate.Metric):
|
|
55 |
n_classes = list(range(len(probabilities[0])))
|
56 |
fpr = dict()
|
57 |
tpr = dict()
|
|
|
58 |
roc_auc = dict()
|
|
|
|
|
59 |
for i in range(len(n_classes)):
|
60 |
-
fpr[i], tpr[i],
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
roc_auc[i] = auc(fpr[i], tpr[i])
|
63 |
|
64 |
# Compute average AUC
|
65 |
average_auc = numpy.mean(list(roc_auc.values()))
|
|
|
|
|
66 |
|
67 |
return {
|
68 |
"mc_auroc_score": average_auc,
|
69 |
-
"mc_auroc_ci":
|
70 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
features=[
|
37 |
datasets.Features(
|
38 |
{
|
39 |
+
"predictions": datasets.Sequence(datasets.Value("float")),
|
40 |
"references": datasets.Value("int8")
|
41 |
}
|
42 |
),
|
|
|
46 |
],
|
47 |
)
|
48 |
|
49 |
+
def _evaluate_statistics(self, variates, coverage):
|
50 |
+
"""Evaluates the left and right margins for a given M-C distribution
|
51 |
+
|
52 |
+
|
53 |
+
Parameters
|
54 |
+
----------
|
55 |
+
|
56 |
+
variates : numpy.ndarray
|
57 |
+
A 1-D array containing the simulated variates
|
58 |
+
|
59 |
+
coverage : float
|
60 |
+
A number, between 0 and 1 to indicate the desired coverage. Typically,
|
61 |
+
this number is set to 0.95 (95% coverage).
|
62 |
+
|
63 |
+
|
64 |
+
Returns
|
65 |
+
-------
|
66 |
+
|
67 |
+
stats : (float, float, float, float)
|
68 |
+
mean, mode and credible intervals for the input simulation
|
69 |
+
|
70 |
+
"""
|
71 |
+
|
72 |
+
left_half = (1 - coverage) / 2 # size of excluded (half) area
|
73 |
+
sorted_variates = numpy.sort(variates)
|
74 |
+
|
75 |
+
# n.b.: we return the equally tailed range
|
76 |
+
|
77 |
+
# calculates position of score which would exclude the left_half (left)
|
78 |
+
lower_index = int(round(len(variates) * left_half))
|
79 |
+
|
80 |
+
# calculates position of score which would exclude the right_half (right)
|
81 |
+
upper_index = int(round(len(variates) * (1 - left_half)))
|
82 |
+
|
83 |
+
lower = sorted_variates[lower_index - 1]
|
84 |
+
upper = sorted_variates[upper_index - 1]
|
85 |
+
|
86 |
+
return lower, upper
|
87 |
+
|
88 |
def _compute(self, predictions: Sequence[Sequence[float]], references: Sequence[int]):
|
89 |
"""
|
90 |
Computes the average AUROC score for multi-class classification problems.
|
|
|
94 |
n_classes = list(range(len(probabilities[0])))
|
95 |
fpr = dict()
|
96 |
tpr = dict()
|
97 |
+
thresholds = dict()
|
98 |
roc_auc = dict()
|
99 |
+
roc_auc_ci_low = dict()
|
100 |
+
roc_auc_ci_high = dict()
|
101 |
for i in range(len(n_classes)):
|
102 |
+
fpr[i], tpr[i], thresholds[i] = roc_curve(y_true=[1 if x == n_classes[i] else 0 for x in references],
|
103 |
+
y_score=[prob[i] for prob in probabilities])
|
104 |
+
|
105 |
+
confusion_matrices = self._get_CMs(i, probabilities, references, thresholds)
|
106 |
+
|
107 |
+
low_ci_tpr, high_ci_tpr = [0] * len(thresholds[i]), [0] * len(thresholds[i])
|
108 |
+
位 = 1.0
|
109 |
+
for k in range(len(thresholds[i])):
|
110 |
+
variates = numpy.random.beta(confusion_matrices[k]["TP"] + 位, confusion_matrices[k]["FN"] + 位, 1000000)
|
111 |
+
low_ci_tpr[k], high_ci_tpr[k] = self._evaluate_statistics(variates, 0.95)
|
112 |
+
|
113 |
+
roc_auc_ci_low[i] = auc(fpr[i], low_ci_tpr)
|
114 |
+
roc_auc_ci_high[i] = auc(fpr[i], high_ci_tpr)
|
115 |
+
|
116 |
+
|
117 |
+
|
118 |
roc_auc[i] = auc(fpr[i], tpr[i])
|
119 |
|
120 |
# Compute average AUC
|
121 |
average_auc = numpy.mean(list(roc_auc.values()))
|
122 |
+
average_auc_ci_low = numpy.mean(list(roc_auc_ci_low.values()))
|
123 |
+
average_auc_ci_high = numpy.mean(list(roc_auc_ci_high.values()))
|
124 |
|
125 |
return {
|
126 |
"mc_auroc_score": average_auc,
|
127 |
+
"mc_auroc_ci": (average_auc_ci_low, average_auc_ci_high)
|
128 |
}
|
129 |
+
|
130 |
+
def _get_CMs(self, i, probabilities, references, thresholds):
|
131 |
+
confusion_matrices = []
|
132 |
+
for threshold in thresholds[i]:
|
133 |
+
TP = 0
|
134 |
+
FP = 0
|
135 |
+
TN = 0
|
136 |
+
FN = 0
|
137 |
+
for j in range(len(probabilities)):
|
138 |
+
if probabilities[j][i] >= threshold:
|
139 |
+
if references[j] == i:
|
140 |
+
TP += 1
|
141 |
+
else:
|
142 |
+
FP += 1
|
143 |
+
else:
|
144 |
+
if references[j] == i:
|
145 |
+
FN += 1
|
146 |
+
else:
|
147 |
+
TN += 1
|
148 |
+
cm = {"TP": TP, "FP": FP, "TN": TN, "FN": FN, "threshold": threshold, "class": i}
|
149 |
+
confusion_matrices.append(cm)
|
150 |
+
|
151 |
+
return confusion_matrices
|