peterkros commited on
Commit
bb224ac
·
verified ·
1 Parent(s): 7de2af6

Create models/modnet.py

Browse files
Files changed (1) hide show
  1. src/models/modnet.py +255 -0
src/models/modnet.py ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+ from .backbones import SUPPORTED_BACKBONES
6
+
7
+
8
+ #------------------------------------------------------------------------------
9
+ # MODNet Basic Modules
10
+ #------------------------------------------------------------------------------
11
+
12
+ class IBNorm(nn.Module):
13
+ """ Combine Instance Norm and Batch Norm into One Layer
14
+ """
15
+
16
+ def __init__(self, in_channels):
17
+ super(IBNorm, self).__init__()
18
+ in_channels = in_channels
19
+ self.bnorm_channels = int(in_channels / 2)
20
+ self.inorm_channels = in_channels - self.bnorm_channels
21
+
22
+ self.bnorm = nn.BatchNorm2d(self.bnorm_channels, affine=True)
23
+ self.inorm = nn.InstanceNorm2d(self.inorm_channels, affine=False)
24
+
25
+ def forward(self, x):
26
+ bn_x = self.bnorm(x[:, :self.bnorm_channels, ...].contiguous())
27
+ in_x = self.inorm(x[:, self.bnorm_channels:, ...].contiguous())
28
+
29
+ return torch.cat((bn_x, in_x), 1)
30
+
31
+
32
+ class Conv2dIBNormRelu(nn.Module):
33
+ """ Convolution + IBNorm + ReLu
34
+ """
35
+
36
+ def __init__(self, in_channels, out_channels, kernel_size,
37
+ stride=1, padding=0, dilation=1, groups=1, bias=True,
38
+ with_ibn=True, with_relu=True):
39
+ super(Conv2dIBNormRelu, self).__init__()
40
+
41
+ layers = [
42
+ nn.Conv2d(in_channels, out_channels, kernel_size,
43
+ stride=stride, padding=padding, dilation=dilation,
44
+ groups=groups, bias=bias)
45
+ ]
46
+
47
+ if with_ibn:
48
+ layers.append(IBNorm(out_channels))
49
+ if with_relu:
50
+ layers.append(nn.ReLU(inplace=True))
51
+
52
+ self.layers = nn.Sequential(*layers)
53
+
54
+ def forward(self, x):
55
+ return self.layers(x)
56
+
57
+
58
+ class SEBlock(nn.Module):
59
+ """ SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf
60
+ """
61
+
62
+ def __init__(self, in_channels, out_channels, reduction=1):
63
+ super(SEBlock, self).__init__()
64
+ self.pool = nn.AdaptiveAvgPool2d(1)
65
+ self.fc = nn.Sequential(
66
+ nn.Linear(in_channels, int(in_channels // reduction), bias=False),
67
+ nn.ReLU(inplace=True),
68
+ nn.Linear(int(in_channels // reduction), out_channels, bias=False),
69
+ nn.Sigmoid()
70
+ )
71
+
72
+ def forward(self, x):
73
+ b, c, _, _ = x.size()
74
+ w = self.pool(x).view(b, c)
75
+ w = self.fc(w).view(b, c, 1, 1)
76
+
77
+ return x * w.expand_as(x)
78
+
79
+
80
+ #------------------------------------------------------------------------------
81
+ # MODNet Branches
82
+ #------------------------------------------------------------------------------
83
+
84
+ class LRBranch(nn.Module):
85
+ """ Low Resolution Branch of MODNet
86
+ """
87
+
88
+ def __init__(self, backbone):
89
+ super(LRBranch, self).__init__()
90
+
91
+ enc_channels = backbone.enc_channels
92
+
93
+ self.backbone = backbone
94
+ self.se_block = SEBlock(enc_channels[4], enc_channels[4], reduction=4)
95
+ self.conv_lr16x = Conv2dIBNormRelu(enc_channels[4], enc_channels[3], 5, stride=1, padding=2)
96
+ self.conv_lr8x = Conv2dIBNormRelu(enc_channels[3], enc_channels[2], 5, stride=1, padding=2)
97
+ self.conv_lr = Conv2dIBNormRelu(enc_channels[2], 1, kernel_size=3, stride=2, padding=1, with_ibn=False, with_relu=False)
98
+
99
+ def forward(self, img, inference):
100
+ enc_features = self.backbone.forward(img)
101
+ enc2x, enc4x, enc32x = enc_features[0], enc_features[1], enc_features[4]
102
+
103
+ enc32x = self.se_block(enc32x)
104
+ lr16x = F.interpolate(enc32x, scale_factor=2, mode='bilinear', align_corners=False)
105
+ lr16x = self.conv_lr16x(lr16x)
106
+ lr8x = F.interpolate(lr16x, scale_factor=2, mode='bilinear', align_corners=False)
107
+ lr8x = self.conv_lr8x(lr8x)
108
+
109
+ pred_semantic = None
110
+ if not inference:
111
+ lr = self.conv_lr(lr8x)
112
+ pred_semantic = torch.sigmoid(lr)
113
+
114
+ return pred_semantic, lr8x, [enc2x, enc4x]
115
+
116
+
117
+ class HRBranch(nn.Module):
118
+ """ High Resolution Branch of MODNet
119
+ """
120
+
121
+ def __init__(self, hr_channels, enc_channels):
122
+ super(HRBranch, self).__init__()
123
+
124
+ self.tohr_enc2x = Conv2dIBNormRelu(enc_channels[0], hr_channels, 1, stride=1, padding=0)
125
+ self.conv_enc2x = Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=2, padding=1)
126
+
127
+ self.tohr_enc4x = Conv2dIBNormRelu(enc_channels[1], hr_channels, 1, stride=1, padding=0)
128
+ self.conv_enc4x = Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1)
129
+
130
+ self.conv_hr4x = nn.Sequential(
131
+ Conv2dIBNormRelu(3 * hr_channels + 3, 2 * hr_channels, 3, stride=1, padding=1),
132
+ Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
133
+ Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
134
+ )
135
+
136
+ self.conv_hr2x = nn.Sequential(
137
+ Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
138
+ Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
139
+ Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
140
+ Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
141
+ )
142
+
143
+ self.conv_hr = nn.Sequential(
144
+ Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=1, padding=1),
145
+ Conv2dIBNormRelu(hr_channels, 1, kernel_size=1, stride=1, padding=0, with_ibn=False, with_relu=False),
146
+ )
147
+
148
+ def forward(self, img, enc2x, enc4x, lr8x, inference):
149
+ img2x = F.interpolate(img, scale_factor=1/2, mode='bilinear', align_corners=False)
150
+ img4x = F.interpolate(img, scale_factor=1/4, mode='bilinear', align_corners=False)
151
+
152
+ enc2x = self.tohr_enc2x(enc2x)
153
+ hr4x = self.conv_enc2x(torch.cat((img2x, enc2x), dim=1))
154
+
155
+ enc4x = self.tohr_enc4x(enc4x)
156
+ hr4x = self.conv_enc4x(torch.cat((hr4x, enc4x), dim=1))
157
+
158
+ lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
159
+ hr4x = self.conv_hr4x(torch.cat((hr4x, lr4x, img4x), dim=1))
160
+
161
+ hr2x = F.interpolate(hr4x, scale_factor=2, mode='bilinear', align_corners=False)
162
+ hr2x = self.conv_hr2x(torch.cat((hr2x, enc2x), dim=1))
163
+
164
+ pred_detail = None
165
+ if not inference:
166
+ hr = F.interpolate(hr2x, scale_factor=2, mode='bilinear', align_corners=False)
167
+ hr = self.conv_hr(torch.cat((hr, img), dim=1))
168
+ pred_detail = torch.sigmoid(hr)
169
+
170
+ return pred_detail, hr2x
171
+
172
+
173
+ class FusionBranch(nn.Module):
174
+ """ Fusion Branch of MODNet
175
+ """
176
+
177
+ def __init__(self, hr_channels, enc_channels):
178
+ super(FusionBranch, self).__init__()
179
+ self.conv_lr4x = Conv2dIBNormRelu(enc_channels[2], hr_channels, 5, stride=1, padding=2)
180
+
181
+ self.conv_f2x = Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1)
182
+ self.conv_f = nn.Sequential(
183
+ Conv2dIBNormRelu(hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1),
184
+ Conv2dIBNormRelu(int(hr_channels / 2), 1, 1, stride=1, padding=0, with_ibn=False, with_relu=False),
185
+ )
186
+
187
+ def forward(self, img, lr8x, hr2x):
188
+ lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
189
+ lr4x = self.conv_lr4x(lr4x)
190
+ lr2x = F.interpolate(lr4x, scale_factor=2, mode='bilinear', align_corners=False)
191
+
192
+ f2x = self.conv_f2x(torch.cat((lr2x, hr2x), dim=1))
193
+ f = F.interpolate(f2x, scale_factor=2, mode='bilinear', align_corners=False)
194
+ f = self.conv_f(torch.cat((f, img), dim=1))
195
+ pred_matte = torch.sigmoid(f)
196
+
197
+ return pred_matte
198
+
199
+
200
+ #------------------------------------------------------------------------------
201
+ # MODNet
202
+ #------------------------------------------------------------------------------
203
+
204
+ class MODNet(nn.Module):
205
+ """ Architecture of MODNet
206
+ """
207
+
208
+ def __init__(self, in_channels=3, hr_channels=32, backbone_arch='mobilenetv2', backbone_pretrained=True):
209
+ super(MODNet, self).__init__()
210
+
211
+ self.in_channels = in_channels
212
+ self.hr_channels = hr_channels
213
+ self.backbone_arch = backbone_arch
214
+ self.backbone_pretrained = backbone_pretrained
215
+
216
+ self.backbone = SUPPORTED_BACKBONES[self.backbone_arch](self.in_channels)
217
+
218
+ self.lr_branch = LRBranch(self.backbone)
219
+ self.hr_branch = HRBranch(self.hr_channels, self.backbone.enc_channels)
220
+ self.f_branch = FusionBranch(self.hr_channels, self.backbone.enc_channels)
221
+
222
+ for m in self.modules():
223
+ if isinstance(m, nn.Conv2d):
224
+ self._init_conv(m)
225
+ elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.InstanceNorm2d):
226
+ self._init_norm(m)
227
+
228
+ if self.backbone_pretrained:
229
+ self.backbone.load_pretrained_ckpt()
230
+
231
+ def forward(self, img, inference):
232
+ pred_semantic, lr8x, [enc2x, enc4x] = self.lr_branch(img, inference)
233
+ pred_detail, hr2x = self.hr_branch(img, enc2x, enc4x, lr8x, inference)
234
+ pred_matte = self.f_branch(img, lr8x, hr2x)
235
+
236
+ return pred_semantic, pred_detail, pred_matte
237
+
238
+ def freeze_norm(self):
239
+ norm_types = [nn.BatchNorm2d, nn.InstanceNorm2d]
240
+ for m in self.modules():
241
+ for n in norm_types:
242
+ if isinstance(m, n):
243
+ m.eval()
244
+ continue
245
+
246
+ def _init_conv(self, conv):
247
+ nn.init.kaiming_uniform_(
248
+ conv.weight, a=0, mode='fan_in', nonlinearity='relu')
249
+ if conv.bias is not None:
250
+ nn.init.constant_(conv.bias, 0)
251
+
252
+ def _init_norm(self, norm):
253
+ if norm.weight is not None:
254
+ nn.init.constant_(norm.weight, 1)
255
+ nn.init.constant_(norm.bias, 0)