Spaces:
Running
on
Zero
Running
on
Zero
# Copyright (c) Meta Platforms, Inc. and affiliates. | |
# All rights reserved. | |
# | |
# This source code is licensed under the license found in the | |
# LICENSE file in the root directory of this source tree. | |
import math | |
import torch | |
import torch.nn as nn | |
from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule | |
from mmengine.model import BaseModule | |
from mmdet.registry import MODELS | |
from ..layers import CSPLayer | |
class YOLOXPAFPN(BaseModule): | |
"""Path Aggregation Network used in YOLOX. | |
Args: | |
in_channels (List[int]): Number of input channels per scale. | |
out_channels (int): Number of output channels (used at each scale) | |
num_csp_blocks (int): Number of bottlenecks in CSPLayer. Default: 3 | |
use_depthwise (bool): Whether to depthwise separable convolution in | |
blocks. Default: False | |
upsample_cfg (dict): Config dict for interpolate layer. | |
Default: `dict(scale_factor=2, mode='nearest')` | |
conv_cfg (dict, optional): Config dict for convolution layer. | |
Default: None, which means using conv2d. | |
norm_cfg (dict): Config dict for normalization layer. | |
Default: dict(type='BN') | |
act_cfg (dict): Config dict for activation layer. | |
Default: dict(type='Swish') | |
init_cfg (dict or list[dict], optional): Initialization config dict. | |
Default: None. | |
""" | |
def __init__(self, | |
in_channels, | |
out_channels, | |
num_csp_blocks=3, | |
use_depthwise=False, | |
upsample_cfg=dict(scale_factor=2, mode='nearest'), | |
conv_cfg=None, | |
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), | |
act_cfg=dict(type='Swish'), | |
init_cfg=dict( | |
type='Kaiming', | |
layer='Conv2d', | |
a=math.sqrt(5), | |
distribution='uniform', | |
mode='fan_in', | |
nonlinearity='leaky_relu')): | |
super(YOLOXPAFPN, self).__init__(init_cfg) | |
self.in_channels = in_channels | |
self.out_channels = out_channels | |
conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule | |
# build top-down blocks | |
self.upsample = nn.Upsample(**upsample_cfg) | |
self.reduce_layers = nn.ModuleList() | |
self.top_down_blocks = nn.ModuleList() | |
for idx in range(len(in_channels) - 1, 0, -1): | |
self.reduce_layers.append( | |
ConvModule( | |
in_channels[idx], | |
in_channels[idx - 1], | |
1, | |
conv_cfg=conv_cfg, | |
norm_cfg=norm_cfg, | |
act_cfg=act_cfg)) | |
self.top_down_blocks.append( | |
CSPLayer( | |
in_channels[idx - 1] * 2, | |
in_channels[idx - 1], | |
num_blocks=num_csp_blocks, | |
add_identity=False, | |
use_depthwise=use_depthwise, | |
conv_cfg=conv_cfg, | |
norm_cfg=norm_cfg, | |
act_cfg=act_cfg)) | |
# build bottom-up blocks | |
self.downsamples = nn.ModuleList() | |
self.bottom_up_blocks = nn.ModuleList() | |
for idx in range(len(in_channels) - 1): | |
self.downsamples.append( | |
conv( | |
in_channels[idx], | |
in_channels[idx], | |
3, | |
stride=2, | |
padding=1, | |
conv_cfg=conv_cfg, | |
norm_cfg=norm_cfg, | |
act_cfg=act_cfg)) | |
self.bottom_up_blocks.append( | |
CSPLayer( | |
in_channels[idx] * 2, | |
in_channels[idx + 1], | |
num_blocks=num_csp_blocks, | |
add_identity=False, | |
use_depthwise=use_depthwise, | |
conv_cfg=conv_cfg, | |
norm_cfg=norm_cfg, | |
act_cfg=act_cfg)) | |
self.out_convs = nn.ModuleList() | |
for i in range(len(in_channels)): | |
self.out_convs.append( | |
ConvModule( | |
in_channels[i], | |
out_channels, | |
1, | |
conv_cfg=conv_cfg, | |
norm_cfg=norm_cfg, | |
act_cfg=act_cfg)) | |
def forward(self, inputs): | |
""" | |
Args: | |
inputs (tuple[Tensor]): input features. | |
Returns: | |
tuple[Tensor]: YOLOXPAFPN features. | |
""" | |
assert len(inputs) == len(self.in_channels) | |
# top-down path | |
inner_outs = [inputs[-1]] | |
for idx in range(len(self.in_channels) - 1, 0, -1): | |
feat_heigh = inner_outs[0] | |
feat_low = inputs[idx - 1] | |
feat_heigh = self.reduce_layers[len(self.in_channels) - 1 - idx]( | |
feat_heigh) | |
inner_outs[0] = feat_heigh | |
upsample_feat = self.upsample(feat_heigh) | |
inner_out = self.top_down_blocks[len(self.in_channels) - 1 - idx]( | |
torch.cat([upsample_feat, feat_low], 1)) | |
inner_outs.insert(0, inner_out) | |
# bottom-up path | |
outs = [inner_outs[0]] | |
for idx in range(len(self.in_channels) - 1): | |
feat_low = outs[-1] | |
feat_height = inner_outs[idx + 1] | |
downsample_feat = self.downsamples[idx](feat_low) | |
out = self.bottom_up_blocks[idx]( | |
torch.cat([downsample_feat, feat_height], 1)) | |
outs.append(out) | |
# out convs | |
for idx, conv in enumerate(self.out_convs): | |
outs[idx] = conv(outs[idx]) | |
return tuple(outs) | |