cyk
SAA+
32faf2b
raw
history blame
8.79 kB
import numpy as np
from skimage import measure
from sklearn.metrics import auc
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
def calculate_max_f1(gt, scores):
precision, recall, thresholds = precision_recall_curve(gt, scores)
a = 2 * precision * recall
b = precision + recall
f1s = np.divide(a, b, out=np.zeros_like(a), where=b != 0)
index = np.argmax(f1s)
max_f1 = f1s[index]
threshold = thresholds[index]
return max_f1, threshold
def metric_cal(scores, gt_list, gt_mask_list, cal_pro=False):
# calculate image-level ROC AUC score
img_scores = scores.reshape(scores.shape[0], -1).max(axis=1)
gt_list = np.asarray(gt_list, dtype=int)
fpr, tpr, _ = roc_curve(gt_list, img_scores)
img_roc_auc = roc_auc_score(gt_list, img_scores)
# print('INFO: image ROCAUC: %.3f' % (img_roc_auc))
img_f1, img_threshold = calculate_max_f1(gt_list, img_scores)
gt_mask = np.asarray(gt_mask_list, dtype=int)
pxl_f1, pxl_threshold = calculate_max_f1(gt_mask.flatten(), scores.flatten())
# calculate per-pixel level ROCAUC
fpr, tpr, _ = roc_curve(gt_mask.flatten(), scores.flatten())
per_pixel_rocauc = roc_auc_score(gt_mask.flatten(), scores.flatten())
# calculate max-f1 region
if cal_pro:
# pro_auc_score = cal_pro_metric(gt_mask_list, scores, fpr_thresh=0.3)
# calculate max-f1 region
max_f1_region = calculate_max_f1_region(gt_mask_list, scores)
else:
# pro_auc_score = 0
# calculate max-f1 region
max_f1_region = 0
result_dict = {'i_roc': img_roc_auc * 100, 'p_roc': per_pixel_rocauc * 100,
'i_f1': img_f1 * 100, 'i_thresh': img_threshold, 'p_f1': pxl_f1 * 100, 'p_thresh': pxl_threshold, 'r_f1': max_f1_region * 100}
return result_dict
def rescale(x):
return (x - x.min()) / (x.max() - x.min())
def cal_pro_metric(labeled_imgs, score_imgs, fpr_thresh=0.3, max_steps=200):
labeled_imgs = np.array(labeled_imgs)
labeled_imgs[labeled_imgs <= 0.45] = 0
labeled_imgs[labeled_imgs > 0.45] = 1
labeled_imgs = labeled_imgs.astype(np.bool)
max_th = score_imgs.max()
min_th = score_imgs.min()
delta = (max_th - min_th) / max_steps
ious_mean = []
ious_std = []
pros_mean = []
pros_std = []
threds = []
fprs = []
binary_score_maps = np.zeros_like(score_imgs, dtype=bool)
for step in range(max_steps):
thred = max_th - step * delta
# segmentation
binary_score_maps[score_imgs <= thred] = 0
binary_score_maps[score_imgs > thred] = 1
pro = [] # per region overlap
iou = [] # per image iou
# pro: find each connected gt region, compute the overlapped pixels between the gt region and predicted region
# iou: for each image, compute the ratio, i.e. intersection/union between the gt and predicted binary map
for i in range(len(binary_score_maps)): # for i th image
# pro (per region level)
label_map = measure.label(labeled_imgs[i], connectivity=2)
props = measure.regionprops(label_map)
for prop in props:
x_min, y_min, x_max, y_max = prop.bbox
cropped_pred_label = binary_score_maps[i][x_min:x_max, y_min:y_max]
# cropped_mask = masks[i][x_min:x_max, y_min:y_max]
cropped_mask = prop.filled_image # corrected!
intersection = np.logical_and(cropped_pred_label, cropped_mask).astype(np.float32).sum()
pro.append(intersection / prop.area)
# iou (per image level)
intersection = np.logical_and(binary_score_maps[i], labeled_imgs[i]).astype(np.float32).sum()
union = np.logical_or(binary_score_maps[i], labeled_imgs[i]).astype(np.float32).sum()
if labeled_imgs[i].any() > 0: # when the gt have no anomaly pixels, skip it
iou.append(intersection / union)
# against steps and average metrics on the testing data
ious_mean.append(np.array(iou).mean())
# print("per image mean iou:", np.array(iou).mean())
ious_std.append(np.array(iou).std())
pros_mean.append(np.array(pro).mean())
pros_std.append(np.array(pro).std())
# fpr for pro-auc
masks_neg = ~labeled_imgs
fpr = np.logical_and(masks_neg, binary_score_maps).sum() / masks_neg.sum()
fprs.append(fpr)
threds.append(thred)
# as array
threds = np.array(threds)
pros_mean = np.array(pros_mean)
pros_std = np.array(pros_std)
fprs = np.array(fprs)
# default 30% fpr vs pro, pro_auc
idx = fprs <= fpr_thresh # find the indexs of fprs that is less than expect_fpr (default 0.3)
fprs_selected = fprs[idx]
fprs_selected = rescale(fprs_selected) # rescale fpr [0,0.3] -> [0, 1]
pros_mean_selected = pros_mean[idx]
pro_auc_score = auc(fprs_selected, pros_mean_selected)
# print("pro auc ({}% FPR):".format(int(expect_fpr * 100)), pro_auc_score)
return pro_auc_score
def calculate_max_f1_region(labeled_imgs, score_imgs, pro_thresh=0.6, max_steps=200):
labeled_imgs = np.array(labeled_imgs)
# labeled_imgs[labeled_imgs <= 0.1] = 0
# labeled_imgs[labeled_imgs > 0.1] = 1
labeled_imgs = labeled_imgs.astype(bool)
max_th = score_imgs.max()
min_th = score_imgs.min()
delta = (max_th - min_th) / max_steps
f1_list = []
recall_list = []
precision_list = []
binary_score_maps = np.zeros_like(score_imgs, dtype=bool)
for step in range(max_steps):
thred = max_th - step * delta
# segmentation
binary_score_maps[score_imgs <= thred] = 0
binary_score_maps[score_imgs > thred] = 1
pro = [] # per region overlap
predict_region_number = 0
gt_region_number = 0
# pro: find each connected gt region, compute the overlapped pixels between the gt region and predicted region
# iou: for each image, compute the ratio, i.e. intersection/union between the gt and predicted binary map
for i in range(len(binary_score_maps)): # for i th image
# pro (per region level)
label_map = measure.label(labeled_imgs[i], connectivity=2)
props = measure.regionprops(label_map)
score_map = measure.label(binary_score_maps[i], connectivity=2)
score_props = measure.regionprops(score_map)
predict_region_number += len(score_props)
gt_region_number += len(props)
# if len(score_props) == 0 or len(props) == 0:
# pro.append(0)
# continue
for score_prop in score_props:
x_min_0, y_min_0, x_max_0, y_max_0 = score_prop.bbox
cur_pros = [0]
for prop in props:
x_min_1, y_min_1, x_max_1, y_max_1 = prop.bbox
x_min = min(x_min_0, x_min_1)
y_min = min(y_min_0, y_min_1)
x_max = max(x_max_0, x_max_1)
y_max = max(y_max_0, y_max_1)
cropped_pred_label = binary_score_maps[i][x_min:x_max, y_min:y_max]
cropped_gt_label = labeled_imgs[i][x_min:x_max, y_min:y_max]
# cropped_mask = masks[i][x_min:x_max, y_min:y_max]
# cropped_mask = prop.filled_image # corrected!
intersection = np.logical_and(cropped_pred_label, cropped_gt_label).astype(np.float32).sum()
union = np.logical_or(cropped_pred_label, cropped_gt_label).astype(np.float32).sum()
cur_pros.append(intersection / union)
pro.append(max(cur_pros))
pro = np.array(pro)
if gt_region_number == 0 or predict_region_number == 0:
print(f'gt_number: {gt_region_number}, pred_number: {predict_region_number}')
recall = 0
precision = 0
f1 = 0
else:
recall = np.array(pro >= pro_thresh).astype(np.float32).sum() / gt_region_number
precision = np.array(pro >= pro_thresh).astype(np.float32).sum() / predict_region_number
if recall == 0 or precision == 0:
f1 = 0
else:
f1 = 2 * recall * precision / (recall + precision)
f1_list.append(f1)
recall_list.append(recall)
precision_list.append(precision)
# as array
f1_list = np.array(f1_list)
max_f1 = f1_list.max()
cor_recall = recall_list[f1_list.argmax()]
cor_precision = precision_list[f1_list.argmax()]
print(f'cor recall: {cor_recall}, cor precision: {cor_precision}')
return max_f1