Update videoretalking/third_part/GPEN/face_detect/data/data_augment.py
Browse files
videoretalking/third_part/GPEN/face_detect/data/data_augment.py
CHANGED
@@ -1,237 +1,237 @@
|
|
1 |
-
import cv2
|
2 |
-
import numpy as np
|
3 |
-
import random
|
4 |
-
from face_detect.utils.box_utils import matrix_iof
|
5 |
-
|
6 |
-
|
7 |
-
def _crop(image, boxes, labels, landm, img_dim):
|
8 |
-
height, width, _ = image.shape
|
9 |
-
pad_image_flag = True
|
10 |
-
|
11 |
-
for _ in range(250):
|
12 |
-
"""
|
13 |
-
if random.uniform(0, 1) <= 0.2:
|
14 |
-
scale = 1.0
|
15 |
-
else:
|
16 |
-
scale = random.uniform(0.3, 1.0)
|
17 |
-
"""
|
18 |
-
PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0]
|
19 |
-
scale = random.choice(PRE_SCALES)
|
20 |
-
short_side = min(width, height)
|
21 |
-
w = int(scale * short_side)
|
22 |
-
h = w
|
23 |
-
|
24 |
-
if width == w:
|
25 |
-
l = 0
|
26 |
-
else:
|
27 |
-
l = random.randrange(width - w)
|
28 |
-
if height == h:
|
29 |
-
t = 0
|
30 |
-
else:
|
31 |
-
t = random.randrange(height - h)
|
32 |
-
roi = np.array((l, t, l + w, t + h))
|
33 |
-
|
34 |
-
value = matrix_iof(boxes, roi[np.newaxis])
|
35 |
-
flag = (value >= 1)
|
36 |
-
if not flag.any():
|
37 |
-
continue
|
38 |
-
|
39 |
-
centers = (boxes[:, :2] + boxes[:, 2:]) / 2
|
40 |
-
mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1)
|
41 |
-
boxes_t = boxes[mask_a].copy()
|
42 |
-
labels_t = labels[mask_a].copy()
|
43 |
-
landms_t = landm[mask_a].copy()
|
44 |
-
landms_t = landms_t.reshape([-1, 5, 2])
|
45 |
-
|
46 |
-
if boxes_t.shape[0] == 0:
|
47 |
-
continue
|
48 |
-
|
49 |
-
image_t = image[roi[1]:roi[3], roi[0]:roi[2]]
|
50 |
-
|
51 |
-
boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2])
|
52 |
-
boxes_t[:, :2] -= roi[:2]
|
53 |
-
boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
|
54 |
-
boxes_t[:, 2:] -= roi[:2]
|
55 |
-
|
56 |
-
# landm
|
57 |
-
landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2]
|
58 |
-
landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0]))
|
59 |
-
landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2])
|
60 |
-
landms_t = landms_t.reshape([-1, 10])
|
61 |
-
|
62 |
-
|
63 |
-
# make sure that the cropped image contains at least one face > 16 pixel at training image scale
|
64 |
-
b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim
|
65 |
-
b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim
|
66 |
-
mask_b = np.minimum(b_w_t, b_h_t) > 0.0
|
67 |
-
boxes_t = boxes_t[mask_b]
|
68 |
-
labels_t = labels_t[mask_b]
|
69 |
-
landms_t = landms_t[mask_b]
|
70 |
-
|
71 |
-
if boxes_t.shape[0] == 0:
|
72 |
-
continue
|
73 |
-
|
74 |
-
pad_image_flag = False
|
75 |
-
|
76 |
-
return image_t, boxes_t, labels_t, landms_t, pad_image_flag
|
77 |
-
return image, boxes, labels, landm, pad_image_flag
|
78 |
-
|
79 |
-
|
80 |
-
def _distort(image):
|
81 |
-
|
82 |
-
def _convert(image, alpha=1, beta=0):
|
83 |
-
tmp = image.astype(float) * alpha + beta
|
84 |
-
tmp[tmp < 0] = 0
|
85 |
-
tmp[tmp > 255] = 255
|
86 |
-
image[:] = tmp
|
87 |
-
|
88 |
-
image = image.copy()
|
89 |
-
|
90 |
-
if random.randrange(2):
|
91 |
-
|
92 |
-
#brightness distortion
|
93 |
-
if random.randrange(2):
|
94 |
-
_convert(image, beta=random.uniform(-32, 32))
|
95 |
-
|
96 |
-
#contrast distortion
|
97 |
-
if random.randrange(2):
|
98 |
-
_convert(image, alpha=random.uniform(0.5, 1.5))
|
99 |
-
|
100 |
-
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
|
101 |
-
|
102 |
-
#saturation distortion
|
103 |
-
if random.randrange(2):
|
104 |
-
_convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
|
105 |
-
|
106 |
-
#hue distortion
|
107 |
-
if random.randrange(2):
|
108 |
-
tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
|
109 |
-
tmp %= 180
|
110 |
-
image[:, :, 0] = tmp
|
111 |
-
|
112 |
-
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
|
113 |
-
|
114 |
-
else:
|
115 |
-
|
116 |
-
#brightness distortion
|
117 |
-
if random.randrange(2):
|
118 |
-
_convert(image, beta=random.uniform(-32, 32))
|
119 |
-
|
120 |
-
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
|
121 |
-
|
122 |
-
#saturation distortion
|
123 |
-
if random.randrange(2):
|
124 |
-
_convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
|
125 |
-
|
126 |
-
#hue distortion
|
127 |
-
if random.randrange(2):
|
128 |
-
tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
|
129 |
-
tmp %= 180
|
130 |
-
image[:, :, 0] = tmp
|
131 |
-
|
132 |
-
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
|
133 |
-
|
134 |
-
#contrast distortion
|
135 |
-
if random.randrange(2):
|
136 |
-
_convert(image, alpha=random.uniform(0.5, 1.5))
|
137 |
-
|
138 |
-
return image
|
139 |
-
|
140 |
-
|
141 |
-
def _expand(image, boxes, fill, p):
|
142 |
-
if random.randrange(2):
|
143 |
-
return image, boxes
|
144 |
-
|
145 |
-
height, width, depth = image.shape
|
146 |
-
|
147 |
-
scale = random.uniform(1, p)
|
148 |
-
w = int(scale * width)
|
149 |
-
h = int(scale * height)
|
150 |
-
|
151 |
-
left = random.randint(0, w - width)
|
152 |
-
top = random.randint(0, h - height)
|
153 |
-
|
154 |
-
boxes_t = boxes.copy()
|
155 |
-
boxes_t[:, :2] += (left, top)
|
156 |
-
boxes_t[:, 2:] += (left, top)
|
157 |
-
expand_image = np.empty(
|
158 |
-
(h, w, depth),
|
159 |
-
dtype=image.dtype)
|
160 |
-
expand_image[:, :] = fill
|
161 |
-
expand_image[top:top + height, left:left + width] = image
|
162 |
-
image = expand_image
|
163 |
-
|
164 |
-
return image, boxes_t
|
165 |
-
|
166 |
-
|
167 |
-
def _mirror(image, boxes, landms):
|
168 |
-
_, width, _ = image.shape
|
169 |
-
if random.randrange(2):
|
170 |
-
image = image[:, ::-1]
|
171 |
-
boxes = boxes.copy()
|
172 |
-
boxes[:, 0::2] = width - boxes[:, 2::-2]
|
173 |
-
|
174 |
-
# landm
|
175 |
-
landms = landms.copy()
|
176 |
-
landms = landms.reshape([-1, 5, 2])
|
177 |
-
landms[:, :, 0] = width - landms[:, :, 0]
|
178 |
-
tmp = landms[:, 1, :].copy()
|
179 |
-
landms[:, 1, :] = landms[:, 0, :]
|
180 |
-
landms[:, 0, :] = tmp
|
181 |
-
tmp1 = landms[:, 4, :].copy()
|
182 |
-
landms[:, 4, :] = landms[:, 3, :]
|
183 |
-
landms[:, 3, :] = tmp1
|
184 |
-
landms = landms.reshape([-1, 10])
|
185 |
-
|
186 |
-
return image, boxes, landms
|
187 |
-
|
188 |
-
|
189 |
-
def _pad_to_square(image, rgb_mean, pad_image_flag):
|
190 |
-
if not pad_image_flag:
|
191 |
-
return image
|
192 |
-
height, width, _ = image.shape
|
193 |
-
long_side = max(width, height)
|
194 |
-
image_t = np.empty((long_side, long_side, 3), dtype=image.dtype)
|
195 |
-
image_t[:, :] = rgb_mean
|
196 |
-
image_t[0:0 + height, 0:0 + width] = image
|
197 |
-
return image_t
|
198 |
-
|
199 |
-
|
200 |
-
def _resize_subtract_mean(image, insize, rgb_mean):
|
201 |
-
interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
|
202 |
-
interp_method = interp_methods[random.randrange(5)]
|
203 |
-
image = cv2.resize(image, (insize, insize), interpolation=interp_method)
|
204 |
-
image = image.astype(np.float32)
|
205 |
-
image -= rgb_mean
|
206 |
-
return image.transpose(2, 0, 1)
|
207 |
-
|
208 |
-
|
209 |
-
class preproc(object):
|
210 |
-
|
211 |
-
def __init__(self, img_dim, rgb_means):
|
212 |
-
self.img_dim = img_dim
|
213 |
-
self.rgb_means = rgb_means
|
214 |
-
|
215 |
-
def __call__(self, image, targets):
|
216 |
-
assert targets.shape[0] > 0, "this image does not have gt"
|
217 |
-
|
218 |
-
boxes = targets[:, :4].copy()
|
219 |
-
labels = targets[:, -1].copy()
|
220 |
-
landm = targets[:, 4:-1].copy()
|
221 |
-
|
222 |
-
image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop(image, boxes, labels, landm, self.img_dim)
|
223 |
-
image_t = _distort(image_t)
|
224 |
-
image_t = _pad_to_square(image_t,self.rgb_means, pad_image_flag)
|
225 |
-
image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t)
|
226 |
-
height, width, _ = image_t.shape
|
227 |
-
image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means)
|
228 |
-
boxes_t[:, 0::2] /= width
|
229 |
-
boxes_t[:, 1::2] /= height
|
230 |
-
|
231 |
-
landm_t[:, 0::2] /= width
|
232 |
-
landm_t[:, 1::2] /= height
|
233 |
-
|
234 |
-
labels_t = np.expand_dims(labels_t, 1)
|
235 |
-
targets_t = np.hstack((boxes_t, landm_t, labels_t))
|
236 |
-
|
237 |
-
return image_t, targets_t
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
import random
|
4 |
+
from videoretalking.third_part.GPEN.face_detect.utils.box_utils import matrix_iof
|
5 |
+
|
6 |
+
|
7 |
+
def _crop(image, boxes, labels, landm, img_dim):
|
8 |
+
height, width, _ = image.shape
|
9 |
+
pad_image_flag = True
|
10 |
+
|
11 |
+
for _ in range(250):
|
12 |
+
"""
|
13 |
+
if random.uniform(0, 1) <= 0.2:
|
14 |
+
scale = 1.0
|
15 |
+
else:
|
16 |
+
scale = random.uniform(0.3, 1.0)
|
17 |
+
"""
|
18 |
+
PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0]
|
19 |
+
scale = random.choice(PRE_SCALES)
|
20 |
+
short_side = min(width, height)
|
21 |
+
w = int(scale * short_side)
|
22 |
+
h = w
|
23 |
+
|
24 |
+
if width == w:
|
25 |
+
l = 0
|
26 |
+
else:
|
27 |
+
l = random.randrange(width - w)
|
28 |
+
if height == h:
|
29 |
+
t = 0
|
30 |
+
else:
|
31 |
+
t = random.randrange(height - h)
|
32 |
+
roi = np.array((l, t, l + w, t + h))
|
33 |
+
|
34 |
+
value = matrix_iof(boxes, roi[np.newaxis])
|
35 |
+
flag = (value >= 1)
|
36 |
+
if not flag.any():
|
37 |
+
continue
|
38 |
+
|
39 |
+
centers = (boxes[:, :2] + boxes[:, 2:]) / 2
|
40 |
+
mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1)
|
41 |
+
boxes_t = boxes[mask_a].copy()
|
42 |
+
labels_t = labels[mask_a].copy()
|
43 |
+
landms_t = landm[mask_a].copy()
|
44 |
+
landms_t = landms_t.reshape([-1, 5, 2])
|
45 |
+
|
46 |
+
if boxes_t.shape[0] == 0:
|
47 |
+
continue
|
48 |
+
|
49 |
+
image_t = image[roi[1]:roi[3], roi[0]:roi[2]]
|
50 |
+
|
51 |
+
boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2])
|
52 |
+
boxes_t[:, :2] -= roi[:2]
|
53 |
+
boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
|
54 |
+
boxes_t[:, 2:] -= roi[:2]
|
55 |
+
|
56 |
+
# landm
|
57 |
+
landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2]
|
58 |
+
landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0]))
|
59 |
+
landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2])
|
60 |
+
landms_t = landms_t.reshape([-1, 10])
|
61 |
+
|
62 |
+
|
63 |
+
# make sure that the cropped image contains at least one face > 16 pixel at training image scale
|
64 |
+
b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim
|
65 |
+
b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim
|
66 |
+
mask_b = np.minimum(b_w_t, b_h_t) > 0.0
|
67 |
+
boxes_t = boxes_t[mask_b]
|
68 |
+
labels_t = labels_t[mask_b]
|
69 |
+
landms_t = landms_t[mask_b]
|
70 |
+
|
71 |
+
if boxes_t.shape[0] == 0:
|
72 |
+
continue
|
73 |
+
|
74 |
+
pad_image_flag = False
|
75 |
+
|
76 |
+
return image_t, boxes_t, labels_t, landms_t, pad_image_flag
|
77 |
+
return image, boxes, labels, landm, pad_image_flag
|
78 |
+
|
79 |
+
|
80 |
+
def _distort(image):
|
81 |
+
|
82 |
+
def _convert(image, alpha=1, beta=0):
|
83 |
+
tmp = image.astype(float) * alpha + beta
|
84 |
+
tmp[tmp < 0] = 0
|
85 |
+
tmp[tmp > 255] = 255
|
86 |
+
image[:] = tmp
|
87 |
+
|
88 |
+
image = image.copy()
|
89 |
+
|
90 |
+
if random.randrange(2):
|
91 |
+
|
92 |
+
#brightness distortion
|
93 |
+
if random.randrange(2):
|
94 |
+
_convert(image, beta=random.uniform(-32, 32))
|
95 |
+
|
96 |
+
#contrast distortion
|
97 |
+
if random.randrange(2):
|
98 |
+
_convert(image, alpha=random.uniform(0.5, 1.5))
|
99 |
+
|
100 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
|
101 |
+
|
102 |
+
#saturation distortion
|
103 |
+
if random.randrange(2):
|
104 |
+
_convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
|
105 |
+
|
106 |
+
#hue distortion
|
107 |
+
if random.randrange(2):
|
108 |
+
tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
|
109 |
+
tmp %= 180
|
110 |
+
image[:, :, 0] = tmp
|
111 |
+
|
112 |
+
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
|
113 |
+
|
114 |
+
else:
|
115 |
+
|
116 |
+
#brightness distortion
|
117 |
+
if random.randrange(2):
|
118 |
+
_convert(image, beta=random.uniform(-32, 32))
|
119 |
+
|
120 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
|
121 |
+
|
122 |
+
#saturation distortion
|
123 |
+
if random.randrange(2):
|
124 |
+
_convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
|
125 |
+
|
126 |
+
#hue distortion
|
127 |
+
if random.randrange(2):
|
128 |
+
tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
|
129 |
+
tmp %= 180
|
130 |
+
image[:, :, 0] = tmp
|
131 |
+
|
132 |
+
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
|
133 |
+
|
134 |
+
#contrast distortion
|
135 |
+
if random.randrange(2):
|
136 |
+
_convert(image, alpha=random.uniform(0.5, 1.5))
|
137 |
+
|
138 |
+
return image
|
139 |
+
|
140 |
+
|
141 |
+
def _expand(image, boxes, fill, p):
|
142 |
+
if random.randrange(2):
|
143 |
+
return image, boxes
|
144 |
+
|
145 |
+
height, width, depth = image.shape
|
146 |
+
|
147 |
+
scale = random.uniform(1, p)
|
148 |
+
w = int(scale * width)
|
149 |
+
h = int(scale * height)
|
150 |
+
|
151 |
+
left = random.randint(0, w - width)
|
152 |
+
top = random.randint(0, h - height)
|
153 |
+
|
154 |
+
boxes_t = boxes.copy()
|
155 |
+
boxes_t[:, :2] += (left, top)
|
156 |
+
boxes_t[:, 2:] += (left, top)
|
157 |
+
expand_image = np.empty(
|
158 |
+
(h, w, depth),
|
159 |
+
dtype=image.dtype)
|
160 |
+
expand_image[:, :] = fill
|
161 |
+
expand_image[top:top + height, left:left + width] = image
|
162 |
+
image = expand_image
|
163 |
+
|
164 |
+
return image, boxes_t
|
165 |
+
|
166 |
+
|
167 |
+
def _mirror(image, boxes, landms):
|
168 |
+
_, width, _ = image.shape
|
169 |
+
if random.randrange(2):
|
170 |
+
image = image[:, ::-1]
|
171 |
+
boxes = boxes.copy()
|
172 |
+
boxes[:, 0::2] = width - boxes[:, 2::-2]
|
173 |
+
|
174 |
+
# landm
|
175 |
+
landms = landms.copy()
|
176 |
+
landms = landms.reshape([-1, 5, 2])
|
177 |
+
landms[:, :, 0] = width - landms[:, :, 0]
|
178 |
+
tmp = landms[:, 1, :].copy()
|
179 |
+
landms[:, 1, :] = landms[:, 0, :]
|
180 |
+
landms[:, 0, :] = tmp
|
181 |
+
tmp1 = landms[:, 4, :].copy()
|
182 |
+
landms[:, 4, :] = landms[:, 3, :]
|
183 |
+
landms[:, 3, :] = tmp1
|
184 |
+
landms = landms.reshape([-1, 10])
|
185 |
+
|
186 |
+
return image, boxes, landms
|
187 |
+
|
188 |
+
|
189 |
+
def _pad_to_square(image, rgb_mean, pad_image_flag):
|
190 |
+
if not pad_image_flag:
|
191 |
+
return image
|
192 |
+
height, width, _ = image.shape
|
193 |
+
long_side = max(width, height)
|
194 |
+
image_t = np.empty((long_side, long_side, 3), dtype=image.dtype)
|
195 |
+
image_t[:, :] = rgb_mean
|
196 |
+
image_t[0:0 + height, 0:0 + width] = image
|
197 |
+
return image_t
|
198 |
+
|
199 |
+
|
200 |
+
def _resize_subtract_mean(image, insize, rgb_mean):
|
201 |
+
interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
|
202 |
+
interp_method = interp_methods[random.randrange(5)]
|
203 |
+
image = cv2.resize(image, (insize, insize), interpolation=interp_method)
|
204 |
+
image = image.astype(np.float32)
|
205 |
+
image -= rgb_mean
|
206 |
+
return image.transpose(2, 0, 1)
|
207 |
+
|
208 |
+
|
209 |
+
class preproc(object):
|
210 |
+
|
211 |
+
def __init__(self, img_dim, rgb_means):
|
212 |
+
self.img_dim = img_dim
|
213 |
+
self.rgb_means = rgb_means
|
214 |
+
|
215 |
+
def __call__(self, image, targets):
|
216 |
+
assert targets.shape[0] > 0, "this image does not have gt"
|
217 |
+
|
218 |
+
boxes = targets[:, :4].copy()
|
219 |
+
labels = targets[:, -1].copy()
|
220 |
+
landm = targets[:, 4:-1].copy()
|
221 |
+
|
222 |
+
image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop(image, boxes, labels, landm, self.img_dim)
|
223 |
+
image_t = _distort(image_t)
|
224 |
+
image_t = _pad_to_square(image_t,self.rgb_means, pad_image_flag)
|
225 |
+
image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t)
|
226 |
+
height, width, _ = image_t.shape
|
227 |
+
image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means)
|
228 |
+
boxes_t[:, 0::2] /= width
|
229 |
+
boxes_t[:, 1::2] /= height
|
230 |
+
|
231 |
+
landm_t[:, 0::2] /= width
|
232 |
+
landm_t[:, 1::2] /= height
|
233 |
+
|
234 |
+
labels_t = np.expand_dims(labels_t, 1)
|
235 |
+
targets_t = np.hstack((boxes_t, landm_t, labels_t))
|
236 |
+
|
237 |
+
return image_t, targets_t
|