Spanicin commited on
Commit
6c8c772
·
verified ·
1 Parent(s): b5d59e5

Update videoretalking/third_part/GPEN/face_detect/data/data_augment.py

Browse files
videoretalking/third_part/GPEN/face_detect/data/data_augment.py CHANGED
@@ -1,237 +1,237 @@
1
- import cv2
2
- import numpy as np
3
- import random
4
- from face_detect.utils.box_utils import matrix_iof
5
-
6
-
7
- def _crop(image, boxes, labels, landm, img_dim):
8
- height, width, _ = image.shape
9
- pad_image_flag = True
10
-
11
- for _ in range(250):
12
- """
13
- if random.uniform(0, 1) <= 0.2:
14
- scale = 1.0
15
- else:
16
- scale = random.uniform(0.3, 1.0)
17
- """
18
- PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0]
19
- scale = random.choice(PRE_SCALES)
20
- short_side = min(width, height)
21
- w = int(scale * short_side)
22
- h = w
23
-
24
- if width == w:
25
- l = 0
26
- else:
27
- l = random.randrange(width - w)
28
- if height == h:
29
- t = 0
30
- else:
31
- t = random.randrange(height - h)
32
- roi = np.array((l, t, l + w, t + h))
33
-
34
- value = matrix_iof(boxes, roi[np.newaxis])
35
- flag = (value >= 1)
36
- if not flag.any():
37
- continue
38
-
39
- centers = (boxes[:, :2] + boxes[:, 2:]) / 2
40
- mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1)
41
- boxes_t = boxes[mask_a].copy()
42
- labels_t = labels[mask_a].copy()
43
- landms_t = landm[mask_a].copy()
44
- landms_t = landms_t.reshape([-1, 5, 2])
45
-
46
- if boxes_t.shape[0] == 0:
47
- continue
48
-
49
- image_t = image[roi[1]:roi[3], roi[0]:roi[2]]
50
-
51
- boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2])
52
- boxes_t[:, :2] -= roi[:2]
53
- boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
54
- boxes_t[:, 2:] -= roi[:2]
55
-
56
- # landm
57
- landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2]
58
- landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0]))
59
- landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2])
60
- landms_t = landms_t.reshape([-1, 10])
61
-
62
-
63
- # make sure that the cropped image contains at least one face > 16 pixel at training image scale
64
- b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim
65
- b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim
66
- mask_b = np.minimum(b_w_t, b_h_t) > 0.0
67
- boxes_t = boxes_t[mask_b]
68
- labels_t = labels_t[mask_b]
69
- landms_t = landms_t[mask_b]
70
-
71
- if boxes_t.shape[0] == 0:
72
- continue
73
-
74
- pad_image_flag = False
75
-
76
- return image_t, boxes_t, labels_t, landms_t, pad_image_flag
77
- return image, boxes, labels, landm, pad_image_flag
78
-
79
-
80
- def _distort(image):
81
-
82
- def _convert(image, alpha=1, beta=0):
83
- tmp = image.astype(float) * alpha + beta
84
- tmp[tmp < 0] = 0
85
- tmp[tmp > 255] = 255
86
- image[:] = tmp
87
-
88
- image = image.copy()
89
-
90
- if random.randrange(2):
91
-
92
- #brightness distortion
93
- if random.randrange(2):
94
- _convert(image, beta=random.uniform(-32, 32))
95
-
96
- #contrast distortion
97
- if random.randrange(2):
98
- _convert(image, alpha=random.uniform(0.5, 1.5))
99
-
100
- image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
101
-
102
- #saturation distortion
103
- if random.randrange(2):
104
- _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
105
-
106
- #hue distortion
107
- if random.randrange(2):
108
- tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
109
- tmp %= 180
110
- image[:, :, 0] = tmp
111
-
112
- image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
113
-
114
- else:
115
-
116
- #brightness distortion
117
- if random.randrange(2):
118
- _convert(image, beta=random.uniform(-32, 32))
119
-
120
- image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
121
-
122
- #saturation distortion
123
- if random.randrange(2):
124
- _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
125
-
126
- #hue distortion
127
- if random.randrange(2):
128
- tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
129
- tmp %= 180
130
- image[:, :, 0] = tmp
131
-
132
- image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
133
-
134
- #contrast distortion
135
- if random.randrange(2):
136
- _convert(image, alpha=random.uniform(0.5, 1.5))
137
-
138
- return image
139
-
140
-
141
- def _expand(image, boxes, fill, p):
142
- if random.randrange(2):
143
- return image, boxes
144
-
145
- height, width, depth = image.shape
146
-
147
- scale = random.uniform(1, p)
148
- w = int(scale * width)
149
- h = int(scale * height)
150
-
151
- left = random.randint(0, w - width)
152
- top = random.randint(0, h - height)
153
-
154
- boxes_t = boxes.copy()
155
- boxes_t[:, :2] += (left, top)
156
- boxes_t[:, 2:] += (left, top)
157
- expand_image = np.empty(
158
- (h, w, depth),
159
- dtype=image.dtype)
160
- expand_image[:, :] = fill
161
- expand_image[top:top + height, left:left + width] = image
162
- image = expand_image
163
-
164
- return image, boxes_t
165
-
166
-
167
- def _mirror(image, boxes, landms):
168
- _, width, _ = image.shape
169
- if random.randrange(2):
170
- image = image[:, ::-1]
171
- boxes = boxes.copy()
172
- boxes[:, 0::2] = width - boxes[:, 2::-2]
173
-
174
- # landm
175
- landms = landms.copy()
176
- landms = landms.reshape([-1, 5, 2])
177
- landms[:, :, 0] = width - landms[:, :, 0]
178
- tmp = landms[:, 1, :].copy()
179
- landms[:, 1, :] = landms[:, 0, :]
180
- landms[:, 0, :] = tmp
181
- tmp1 = landms[:, 4, :].copy()
182
- landms[:, 4, :] = landms[:, 3, :]
183
- landms[:, 3, :] = tmp1
184
- landms = landms.reshape([-1, 10])
185
-
186
- return image, boxes, landms
187
-
188
-
189
- def _pad_to_square(image, rgb_mean, pad_image_flag):
190
- if not pad_image_flag:
191
- return image
192
- height, width, _ = image.shape
193
- long_side = max(width, height)
194
- image_t = np.empty((long_side, long_side, 3), dtype=image.dtype)
195
- image_t[:, :] = rgb_mean
196
- image_t[0:0 + height, 0:0 + width] = image
197
- return image_t
198
-
199
-
200
- def _resize_subtract_mean(image, insize, rgb_mean):
201
- interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
202
- interp_method = interp_methods[random.randrange(5)]
203
- image = cv2.resize(image, (insize, insize), interpolation=interp_method)
204
- image = image.astype(np.float32)
205
- image -= rgb_mean
206
- return image.transpose(2, 0, 1)
207
-
208
-
209
- class preproc(object):
210
-
211
- def __init__(self, img_dim, rgb_means):
212
- self.img_dim = img_dim
213
- self.rgb_means = rgb_means
214
-
215
- def __call__(self, image, targets):
216
- assert targets.shape[0] > 0, "this image does not have gt"
217
-
218
- boxes = targets[:, :4].copy()
219
- labels = targets[:, -1].copy()
220
- landm = targets[:, 4:-1].copy()
221
-
222
- image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop(image, boxes, labels, landm, self.img_dim)
223
- image_t = _distort(image_t)
224
- image_t = _pad_to_square(image_t,self.rgb_means, pad_image_flag)
225
- image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t)
226
- height, width, _ = image_t.shape
227
- image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means)
228
- boxes_t[:, 0::2] /= width
229
- boxes_t[:, 1::2] /= height
230
-
231
- landm_t[:, 0::2] /= width
232
- landm_t[:, 1::2] /= height
233
-
234
- labels_t = np.expand_dims(labels_t, 1)
235
- targets_t = np.hstack((boxes_t, landm_t, labels_t))
236
-
237
- return image_t, targets_t
 
1
+ import cv2
2
+ import numpy as np
3
+ import random
4
+ from videoretalking.third_part.GPEN.face_detect.utils.box_utils import matrix_iof
5
+
6
+
7
+ def _crop(image, boxes, labels, landm, img_dim):
8
+ height, width, _ = image.shape
9
+ pad_image_flag = True
10
+
11
+ for _ in range(250):
12
+ """
13
+ if random.uniform(0, 1) <= 0.2:
14
+ scale = 1.0
15
+ else:
16
+ scale = random.uniform(0.3, 1.0)
17
+ """
18
+ PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0]
19
+ scale = random.choice(PRE_SCALES)
20
+ short_side = min(width, height)
21
+ w = int(scale * short_side)
22
+ h = w
23
+
24
+ if width == w:
25
+ l = 0
26
+ else:
27
+ l = random.randrange(width - w)
28
+ if height == h:
29
+ t = 0
30
+ else:
31
+ t = random.randrange(height - h)
32
+ roi = np.array((l, t, l + w, t + h))
33
+
34
+ value = matrix_iof(boxes, roi[np.newaxis])
35
+ flag = (value >= 1)
36
+ if not flag.any():
37
+ continue
38
+
39
+ centers = (boxes[:, :2] + boxes[:, 2:]) / 2
40
+ mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1)
41
+ boxes_t = boxes[mask_a].copy()
42
+ labels_t = labels[mask_a].copy()
43
+ landms_t = landm[mask_a].copy()
44
+ landms_t = landms_t.reshape([-1, 5, 2])
45
+
46
+ if boxes_t.shape[0] == 0:
47
+ continue
48
+
49
+ image_t = image[roi[1]:roi[3], roi[0]:roi[2]]
50
+
51
+ boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2])
52
+ boxes_t[:, :2] -= roi[:2]
53
+ boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
54
+ boxes_t[:, 2:] -= roi[:2]
55
+
56
+ # landm
57
+ landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2]
58
+ landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0]))
59
+ landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2])
60
+ landms_t = landms_t.reshape([-1, 10])
61
+
62
+
63
+ # make sure that the cropped image contains at least one face > 16 pixel at training image scale
64
+ b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim
65
+ b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim
66
+ mask_b = np.minimum(b_w_t, b_h_t) > 0.0
67
+ boxes_t = boxes_t[mask_b]
68
+ labels_t = labels_t[mask_b]
69
+ landms_t = landms_t[mask_b]
70
+
71
+ if boxes_t.shape[0] == 0:
72
+ continue
73
+
74
+ pad_image_flag = False
75
+
76
+ return image_t, boxes_t, labels_t, landms_t, pad_image_flag
77
+ return image, boxes, labels, landm, pad_image_flag
78
+
79
+
80
+ def _distort(image):
81
+
82
+ def _convert(image, alpha=1, beta=0):
83
+ tmp = image.astype(float) * alpha + beta
84
+ tmp[tmp < 0] = 0
85
+ tmp[tmp > 255] = 255
86
+ image[:] = tmp
87
+
88
+ image = image.copy()
89
+
90
+ if random.randrange(2):
91
+
92
+ #brightness distortion
93
+ if random.randrange(2):
94
+ _convert(image, beta=random.uniform(-32, 32))
95
+
96
+ #contrast distortion
97
+ if random.randrange(2):
98
+ _convert(image, alpha=random.uniform(0.5, 1.5))
99
+
100
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
101
+
102
+ #saturation distortion
103
+ if random.randrange(2):
104
+ _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
105
+
106
+ #hue distortion
107
+ if random.randrange(2):
108
+ tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
109
+ tmp %= 180
110
+ image[:, :, 0] = tmp
111
+
112
+ image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
113
+
114
+ else:
115
+
116
+ #brightness distortion
117
+ if random.randrange(2):
118
+ _convert(image, beta=random.uniform(-32, 32))
119
+
120
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
121
+
122
+ #saturation distortion
123
+ if random.randrange(2):
124
+ _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
125
+
126
+ #hue distortion
127
+ if random.randrange(2):
128
+ tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
129
+ tmp %= 180
130
+ image[:, :, 0] = tmp
131
+
132
+ image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
133
+
134
+ #contrast distortion
135
+ if random.randrange(2):
136
+ _convert(image, alpha=random.uniform(0.5, 1.5))
137
+
138
+ return image
139
+
140
+
141
+ def _expand(image, boxes, fill, p):
142
+ if random.randrange(2):
143
+ return image, boxes
144
+
145
+ height, width, depth = image.shape
146
+
147
+ scale = random.uniform(1, p)
148
+ w = int(scale * width)
149
+ h = int(scale * height)
150
+
151
+ left = random.randint(0, w - width)
152
+ top = random.randint(0, h - height)
153
+
154
+ boxes_t = boxes.copy()
155
+ boxes_t[:, :2] += (left, top)
156
+ boxes_t[:, 2:] += (left, top)
157
+ expand_image = np.empty(
158
+ (h, w, depth),
159
+ dtype=image.dtype)
160
+ expand_image[:, :] = fill
161
+ expand_image[top:top + height, left:left + width] = image
162
+ image = expand_image
163
+
164
+ return image, boxes_t
165
+
166
+
167
+ def _mirror(image, boxes, landms):
168
+ _, width, _ = image.shape
169
+ if random.randrange(2):
170
+ image = image[:, ::-1]
171
+ boxes = boxes.copy()
172
+ boxes[:, 0::2] = width - boxes[:, 2::-2]
173
+
174
+ # landm
175
+ landms = landms.copy()
176
+ landms = landms.reshape([-1, 5, 2])
177
+ landms[:, :, 0] = width - landms[:, :, 0]
178
+ tmp = landms[:, 1, :].copy()
179
+ landms[:, 1, :] = landms[:, 0, :]
180
+ landms[:, 0, :] = tmp
181
+ tmp1 = landms[:, 4, :].copy()
182
+ landms[:, 4, :] = landms[:, 3, :]
183
+ landms[:, 3, :] = tmp1
184
+ landms = landms.reshape([-1, 10])
185
+
186
+ return image, boxes, landms
187
+
188
+
189
+ def _pad_to_square(image, rgb_mean, pad_image_flag):
190
+ if not pad_image_flag:
191
+ return image
192
+ height, width, _ = image.shape
193
+ long_side = max(width, height)
194
+ image_t = np.empty((long_side, long_side, 3), dtype=image.dtype)
195
+ image_t[:, :] = rgb_mean
196
+ image_t[0:0 + height, 0:0 + width] = image
197
+ return image_t
198
+
199
+
200
+ def _resize_subtract_mean(image, insize, rgb_mean):
201
+ interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
202
+ interp_method = interp_methods[random.randrange(5)]
203
+ image = cv2.resize(image, (insize, insize), interpolation=interp_method)
204
+ image = image.astype(np.float32)
205
+ image -= rgb_mean
206
+ return image.transpose(2, 0, 1)
207
+
208
+
209
+ class preproc(object):
210
+
211
+ def __init__(self, img_dim, rgb_means):
212
+ self.img_dim = img_dim
213
+ self.rgb_means = rgb_means
214
+
215
+ def __call__(self, image, targets):
216
+ assert targets.shape[0] > 0, "this image does not have gt"
217
+
218
+ boxes = targets[:, :4].copy()
219
+ labels = targets[:, -1].copy()
220
+ landm = targets[:, 4:-1].copy()
221
+
222
+ image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop(image, boxes, labels, landm, self.img_dim)
223
+ image_t = _distort(image_t)
224
+ image_t = _pad_to_square(image_t,self.rgb_means, pad_image_flag)
225
+ image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t)
226
+ height, width, _ = image_t.shape
227
+ image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means)
228
+ boxes_t[:, 0::2] /= width
229
+ boxes_t[:, 1::2] /= height
230
+
231
+ landm_t[:, 0::2] /= width
232
+ landm_t[:, 1::2] /= height
233
+
234
+ labels_t = np.expand_dims(labels_t, 1)
235
+ targets_t = np.hstack((boxes_t, landm_t, labels_t))
236
+
237
+ return image_t, targets_t