Spaces:
Runtime error
Runtime error
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
from mmcv.cnn import VGG, constant_init, kaiming_init, normal_init, xavier_init | |
from mmcv.runner import load_checkpoint | |
from mmdet.utils import get_root_logger | |
from ..builder import BACKBONES | |
class SSDVGG(VGG): | |
"""VGG Backbone network for single-shot-detection. | |
Args: | |
input_size (int): width and height of input, from {300, 512}. | |
depth (int): Depth of vgg, from {11, 13, 16, 19}. | |
out_indices (Sequence[int]): Output from which stages. | |
Example: | |
>>> self = SSDVGG(input_size=300, depth=11) | |
>>> self.eval() | |
>>> inputs = torch.rand(1, 3, 300, 300) | |
>>> level_outputs = self.forward(inputs) | |
>>> for level_out in level_outputs: | |
... print(tuple(level_out.shape)) | |
(1, 1024, 19, 19) | |
(1, 512, 10, 10) | |
(1, 256, 5, 5) | |
(1, 256, 3, 3) | |
(1, 256, 1, 1) | |
""" | |
extra_setting = { | |
300: (256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256), | |
512: (256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256, 128), | |
} | |
def __init__(self, | |
input_size, | |
depth, | |
with_last_pool=False, | |
ceil_mode=True, | |
out_indices=(3, 4), | |
out_feature_indices=(22, 34), | |
l2_norm_scale=20.): | |
# TODO: in_channels for mmcv.VGG | |
super(SSDVGG, self).__init__( | |
depth, | |
with_last_pool=with_last_pool, | |
ceil_mode=ceil_mode, | |
out_indices=out_indices) | |
assert input_size in (300, 512) | |
self.input_size = input_size | |
self.features.add_module( | |
str(len(self.features)), | |
nn.MaxPool2d(kernel_size=3, stride=1, padding=1)) | |
self.features.add_module( | |
str(len(self.features)), | |
nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)) | |
self.features.add_module( | |
str(len(self.features)), nn.ReLU(inplace=True)) | |
self.features.add_module( | |
str(len(self.features)), nn.Conv2d(1024, 1024, kernel_size=1)) | |
self.features.add_module( | |
str(len(self.features)), nn.ReLU(inplace=True)) | |
self.out_feature_indices = out_feature_indices | |
self.inplanes = 1024 | |
self.extra = self._make_extra_layers(self.extra_setting[input_size]) | |
self.l2_norm = L2Norm( | |
self.features[out_feature_indices[0] - 1].out_channels, | |
l2_norm_scale) | |
def init_weights(self, pretrained=None): | |
"""Initialize the weights in backbone. | |
Args: | |
pretrained (str, optional): Path to pre-trained weights. | |
Defaults to None. | |
""" | |
if isinstance(pretrained, str): | |
logger = get_root_logger() | |
load_checkpoint(self, pretrained, strict=False, logger=logger) | |
elif pretrained is None: | |
for m in self.features.modules(): | |
if isinstance(m, nn.Conv2d): | |
kaiming_init(m) | |
elif isinstance(m, nn.BatchNorm2d): | |
constant_init(m, 1) | |
elif isinstance(m, nn.Linear): | |
normal_init(m, std=0.01) | |
else: | |
raise TypeError('pretrained must be a str or None') | |
for m in self.extra.modules(): | |
if isinstance(m, nn.Conv2d): | |
xavier_init(m, distribution='uniform') | |
constant_init(self.l2_norm, self.l2_norm.scale) | |
def forward(self, x): | |
"""Forward function.""" | |
outs = [] | |
for i, layer in enumerate(self.features): | |
x = layer(x) | |
if i in self.out_feature_indices: | |
outs.append(x) | |
for i, layer in enumerate(self.extra): | |
x = F.relu(layer(x), inplace=True) | |
if i % 2 == 1: | |
outs.append(x) | |
outs[0] = self.l2_norm(outs[0]) | |
if len(outs) == 1: | |
return outs[0] | |
else: | |
return tuple(outs) | |
def _make_extra_layers(self, outplanes): | |
layers = [] | |
kernel_sizes = (1, 3) | |
num_layers = 0 | |
outplane = None | |
for i in range(len(outplanes)): | |
if self.inplanes == 'S': | |
self.inplanes = outplane | |
continue | |
k = kernel_sizes[num_layers % 2] | |
if outplanes[i] == 'S': | |
outplane = outplanes[i + 1] | |
conv = nn.Conv2d( | |
self.inplanes, outplane, k, stride=2, padding=1) | |
else: | |
outplane = outplanes[i] | |
conv = nn.Conv2d( | |
self.inplanes, outplane, k, stride=1, padding=0) | |
layers.append(conv) | |
self.inplanes = outplanes[i] | |
num_layers += 1 | |
if self.input_size == 512: | |
layers.append(nn.Conv2d(self.inplanes, 256, 4, padding=1)) | |
return nn.Sequential(*layers) | |
class L2Norm(nn.Module): | |
def __init__(self, n_dims, scale=20., eps=1e-10): | |
"""L2 normalization layer. | |
Args: | |
n_dims (int): Number of dimensions to be normalized | |
scale (float, optional): Defaults to 20.. | |
eps (float, optional): Used to avoid division by zero. | |
Defaults to 1e-10. | |
""" | |
super(L2Norm, self).__init__() | |
self.n_dims = n_dims | |
self.weight = nn.Parameter(torch.Tensor(self.n_dims)) | |
self.eps = eps | |
self.scale = scale | |
def forward(self, x): | |
"""Forward function.""" | |
# normalization layer convert to FP32 in FP16 training | |
x_float = x.float() | |
norm = x_float.pow(2).sum(1, keepdim=True).sqrt() + self.eps | |
return (self.weight[None, :, None, None].float().expand_as(x_float) * | |
x_float / norm).type_as(x) | |