mkthoma commited on
Commit
f742771
·
1 Parent(s): f92e416

gradio utils update

Browse files
Files changed (1) hide show
  1. custom_library/gradio_utils.py +128 -0
custom_library/gradio_utils.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ import torch
3
+ import numpy as np
4
+ import cv2
5
+ import random
6
+ from pytorch_grad_cam.base_cam import BaseCAM
7
+ from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection
8
+ from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
9
+
10
+ # Bounding box predicted on image
11
+ def draw_predictions(image: np.ndarray, boxes: List[List], class_labels: List[str]) -> np.ndarray:
12
+
13
+ colors = [[random.randint(0, 255) for _ in range(3)] for name in class_labels]
14
+
15
+ im = np.array(image)
16
+ height, width, _ = im.shape
17
+ bbox_thick = int(0.6 * (height + width) / 600)
18
+
19
+ # Create a Rectangle patch
20
+ for box in boxes:
21
+ assert len(box) == 6, "box should contain class pred, confidence, x, y, width, height"
22
+ class_pred = box[0]
23
+ conf = box[1]
24
+ box = box[2:]
25
+ upper_left_x = box[0] - box[2] / 2
26
+ upper_left_y = box[1] - box[3] / 2
27
+
28
+ x1 = int(upper_left_x * width)
29
+ y1 = int(upper_left_y * height)
30
+
31
+ x2 = x1 + int(box[2] * width)
32
+ y2 = y1 + int(box[3] * height)
33
+
34
+ cv2.rectangle(
35
+ image,
36
+ (x1, y1), (x2, y2),
37
+ color=colors[int(class_pred)],
38
+ thickness=bbox_thick
39
+ )
40
+ text = f"{class_labels[int(class_pred)]}: {conf:.2f}"
41
+ t_size = cv2.getTextSize(text, 0, 0.7, thickness=bbox_thick // 2)[0]
42
+ c3 = (x1 + t_size[0], y1 - t_size[1] - 3)
43
+
44
+ cv2.rectangle(image, (x1, y1), c3, colors[int(class_pred)], -1)
45
+ cv2.putText(
46
+ image,
47
+ text,
48
+ (x1, y1 - 2),
49
+ cv2.FONT_HERSHEY_SIMPLEX,
50
+ 0.7,
51
+ (0, 0, 0),
52
+ bbox_thick // 2,
53
+ lineType=cv2.LINE_AA,
54
+ )
55
+
56
+ return image
57
+
58
+ # GradCAM outputs
59
+ class YoloCAM(BaseCAM):
60
+ def __init__(self, model, target_layers, use_cuda=False,
61
+ reshape_transform=None):
62
+ super(YoloCAM, self).__init__(model,
63
+ target_layers,
64
+ use_cuda,
65
+ reshape_transform,
66
+ uses_gradients=False)
67
+
68
+ def forward(self,
69
+ input_tensor: torch.Tensor,
70
+ scaled_anchors: torch.Tensor,
71
+ targets: List[torch.nn.Module],
72
+ eigen_smooth: bool = False) -> np.ndarray:
73
+
74
+ if self.cuda:
75
+ input_tensor = input_tensor.cuda()
76
+
77
+ if self.compute_input_gradient:
78
+ input_tensor = torch.autograd.Variable(input_tensor,
79
+ requires_grad=True)
80
+
81
+ outputs = self.activations_and_grads(input_tensor)
82
+ if targets is None:
83
+ bboxes = [[] for _ in range(1)]
84
+ for i in range(3):
85
+ batch_size, A, S, _, _ = outputs[i].shape
86
+ anchor = scaled_anchors[i]
87
+ boxes_scale_i = cells_to_bboxes(
88
+ outputs[i], anchor, S=S, is_preds=True
89
+ )
90
+ for idx, (box) in enumerate(boxes_scale_i):
91
+ bboxes[idx] += box
92
+
93
+ nms_boxes = non_max_suppression(
94
+ bboxes[0], iou_threshold=0.5, threshold=0.4, box_format="midpoint",
95
+ )
96
+ # target_categories = np.argmax(outputs.cpu().data.numpy(), axis=-1)
97
+ target_categories = [box[0] for box in nms_boxes]
98
+ targets = [ClassifierOutputTarget(
99
+ category) for category in target_categories]
100
+
101
+ if self.uses_gradients:
102
+ self.model.zero_grad()
103
+ loss = sum([target(output)
104
+ for target, output in zip(targets, outputs)])
105
+ loss.backward(retain_graph=True)
106
+
107
+ # In most of the saliency attribution papers, the saliency is
108
+ # computed with a single target layer.
109
+ # Commonly it is the last convolutional layer.
110
+ # Here we support passing a list with multiple target layers.
111
+ # It will compute the saliency image for every image,
112
+ # and then aggregate them (with a default mean aggregation).
113
+ # This gives you more flexibility in case you just want to
114
+ # use all conv layers for example, all Batchnorm layers,
115
+ # or something else.
116
+ cam_per_layer = self.compute_cam_per_layer(input_tensor,
117
+ targets,
118
+ eigen_smooth)
119
+ return self.aggregate_multi_layers(cam_per_layer)
120
+
121
+ def get_cam_image(self,
122
+ input_tensor,
123
+ target_layer,
124
+ target_category,
125
+ activations,
126
+ grads,
127
+ eigen_smooth):
128
+ return get_2d_projection(activations)