mfajcik commited on
Commit
439a704
verified
1 Parent(s): 0c400ac

Upload mc_auroc.py

Browse files
Files changed (1) hide show
  1. mc_auroc.py +85 -4
mc_auroc.py CHANGED
@@ -36,7 +36,7 @@ class AVG_MULTICLASS_AUROC(evaluate.Metric):
36
  features=[
37
  datasets.Features(
38
  {
39
- "predictions":datasets.Sequence(datasets.Value("float")),
40
  "references": datasets.Value("int8")
41
  }
42
  ),
@@ -46,6 +46,45 @@ class AVG_MULTICLASS_AUROC(evaluate.Metric):
46
  ],
47
  )
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  def _compute(self, predictions: Sequence[Sequence[float]], references: Sequence[int]):
50
  """
51
  Computes the average AUROC score for multi-class classification problems.
@@ -55,16 +94,58 @@ class AVG_MULTICLASS_AUROC(evaluate.Metric):
55
  n_classes = list(range(len(probabilities[0])))
56
  fpr = dict()
57
  tpr = dict()
 
58
  roc_auc = dict()
 
 
59
  for i in range(len(n_classes)):
60
- fpr[i], tpr[i], _ = roc_curve(y_true=[1 if x == n_classes[i] else 0 for x in references],
61
- y_score=[prob[i] for prob in probabilities])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  roc_auc[i] = auc(fpr[i], tpr[i])
63
 
64
  # Compute average AUC
65
  average_auc = numpy.mean(list(roc_auc.values()))
 
 
66
 
67
  return {
68
  "mc_auroc_score": average_auc,
69
- "mc_auroc_ci": "Not implemented yet."
70
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  features=[
37
  datasets.Features(
38
  {
39
+ "predictions": datasets.Sequence(datasets.Value("float")),
40
  "references": datasets.Value("int8")
41
  }
42
  ),
 
46
  ],
47
  )
48
 
49
+ def _evaluate_statistics(self, variates, coverage):
50
+ """Evaluates the left and right margins for a given M-C distribution
51
+
52
+
53
+ Parameters
54
+ ----------
55
+
56
+ variates : numpy.ndarray
57
+ A 1-D array containing the simulated variates
58
+
59
+ coverage : float
60
+ A number, between 0 and 1 to indicate the desired coverage. Typically,
61
+ this number is set to 0.95 (95% coverage).
62
+
63
+
64
+ Returns
65
+ -------
66
+
67
+ stats : (float, float, float, float)
68
+ mean, mode and credible intervals for the input simulation
69
+
70
+ """
71
+
72
+ left_half = (1 - coverage) / 2 # size of excluded (half) area
73
+ sorted_variates = numpy.sort(variates)
74
+
75
+ # n.b.: we return the equally tailed range
76
+
77
+ # calculates position of score which would exclude the left_half (left)
78
+ lower_index = int(round(len(variates) * left_half))
79
+
80
+ # calculates position of score which would exclude the right_half (right)
81
+ upper_index = int(round(len(variates) * (1 - left_half)))
82
+
83
+ lower = sorted_variates[lower_index - 1]
84
+ upper = sorted_variates[upper_index - 1]
85
+
86
+ return lower, upper
87
+
88
  def _compute(self, predictions: Sequence[Sequence[float]], references: Sequence[int]):
89
  """
90
  Computes the average AUROC score for multi-class classification problems.
 
94
  n_classes = list(range(len(probabilities[0])))
95
  fpr = dict()
96
  tpr = dict()
97
+ thresholds = dict()
98
  roc_auc = dict()
99
+ roc_auc_ci_low = dict()
100
+ roc_auc_ci_high = dict()
101
  for i in range(len(n_classes)):
102
+ fpr[i], tpr[i], thresholds[i] = roc_curve(y_true=[1 if x == n_classes[i] else 0 for x in references],
103
+ y_score=[prob[i] for prob in probabilities])
104
+
105
+ confusion_matrices = self._get_CMs(i, probabilities, references, thresholds)
106
+
107
+ low_ci_tpr, high_ci_tpr = [0] * len(thresholds[i]), [0] * len(thresholds[i])
108
+ 位 = 1.0
109
+ for k in range(len(thresholds[i])):
110
+ variates = numpy.random.beta(confusion_matrices[k]["TP"] + 位, confusion_matrices[k]["FN"] + 位, 1000000)
111
+ low_ci_tpr[k], high_ci_tpr[k] = self._evaluate_statistics(variates, 0.95)
112
+
113
+ roc_auc_ci_low[i] = auc(fpr[i], low_ci_tpr)
114
+ roc_auc_ci_high[i] = auc(fpr[i], high_ci_tpr)
115
+
116
+
117
+
118
  roc_auc[i] = auc(fpr[i], tpr[i])
119
 
120
  # Compute average AUC
121
  average_auc = numpy.mean(list(roc_auc.values()))
122
+ average_auc_ci_low = numpy.mean(list(roc_auc_ci_low.values()))
123
+ average_auc_ci_high = numpy.mean(list(roc_auc_ci_high.values()))
124
 
125
  return {
126
  "mc_auroc_score": average_auc,
127
+ "mc_auroc_ci": (average_auc_ci_low, average_auc_ci_high)
128
  }
129
+
130
+ def _get_CMs(self, i, probabilities, references, thresholds):
131
+ confusion_matrices = []
132
+ for threshold in thresholds[i]:
133
+ TP = 0
134
+ FP = 0
135
+ TN = 0
136
+ FN = 0
137
+ for j in range(len(probabilities)):
138
+ if probabilities[j][i] >= threshold:
139
+ if references[j] == i:
140
+ TP += 1
141
+ else:
142
+ FP += 1
143
+ else:
144
+ if references[j] == i:
145
+ FN += 1
146
+ else:
147
+ TN += 1
148
+ cm = {"TP": TP, "FP": FP, "TN": TN, "FN": FN, "threshold": threshold, "class": i}
149
+ confusion_matrices.append(cm)
150
+
151
+ return confusion_matrices