kirch's picture
Duplicate from PAIR/Text2Video-Zero
508927a
import annotator.uniformer.mmcv as mmcv
import torch.nn as nn
from annotator.uniformer.mmcv.cnn import ConvModule
from .make_divisible import make_divisible
class SELayer(nn.Module):
"""Squeeze-and-Excitation Module.
Args:
channels (int): The input (and output) channels of the SE layer.
ratio (int): Squeeze ratio in SELayer, the intermediate channel will be
``int(channels/ratio)``. Default: 16.
conv_cfg (None or dict): Config dict for convolution layer.
Default: None, which means using conv2d.
act_cfg (dict or Sequence[dict]): Config dict for activation layer.
If act_cfg is a dict, two activation layers will be configured
by this dict. If act_cfg is a sequence of dicts, the first
activation layer will be configured by the first dict and the
second activation layer will be configured by the second dict.
Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0,
divisor=6.0)).
"""
def __init__(self,
channels,
ratio=16,
conv_cfg=None,
act_cfg=(dict(type='ReLU'),
dict(type='HSigmoid', bias=3.0, divisor=6.0))):
super(SELayer, self).__init__()
if isinstance(act_cfg, dict):
act_cfg = (act_cfg, act_cfg)
assert len(act_cfg) == 2
assert mmcv.is_tuple_of(act_cfg, dict)
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
self.conv1 = ConvModule(
in_channels=channels,
out_channels=make_divisible(channels // ratio, 8),
kernel_size=1,
stride=1,
conv_cfg=conv_cfg,
act_cfg=act_cfg[0])
self.conv2 = ConvModule(
in_channels=make_divisible(channels // ratio, 8),
out_channels=channels,
kernel_size=1,
stride=1,
conv_cfg=conv_cfg,
act_cfg=act_cfg[1])
def forward(self, x):
out = self.global_avgpool(x)
out = self.conv1(out)
out = self.conv2(out)
return x * out