Upload 11 files

Browse files

Files changed (11) hide show

model/MIRNet/ChannelAttention.py +40 -0
model/MIRNet/ChannelCompression.py +16 -0
model/MIRNet/Downsampling.py +135 -0
model/MIRNet/DualAttentionUnit.py +39 -0
model/MIRNet/MultiScaleResidualBlock.py +124 -0
model/MIRNet/ResidualRecurrentGroup.py +34 -0
model/MIRNet/SelectiveKernelFeatureFusion.py +65 -0
model/MIRNet/SpatialAttention.py +24 -0
model/MIRNet/Upsampling.py +56 -0
model/MIRNet/__init__.py +0 -0
model/MIRNet/model.py +47 -0

model/MIRNet/ChannelAttention.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import torch
+import torch.nn as nn
+class ChannelAttention(nn.Module):
+    """
+    Squeezes down the input to 1x1xC, applies the excitation operation and restores the C channels through a 1x1 convolution.
+    In: HxWxC
+    Out: HxWxC (original channels are restored by multiplying the output with the original input)
+    """
+    def __init__(self, in_channels, reduction_ratio=8, bias=True):
+        super().__init__()
+        self.squeezing = nn.AdaptiveAvgPool2d(1)
+        self.excitation = nn.Sequential(
+            nn.Conv2d(
+                in_channels,
+                in_channels // reduction_ratio,
+                kernel_size=1,
+                padding=0,
+                bias=bias,
+            ),
+            nn.PReLU(),
+            nn.Conv2d(
+                in_channels // reduction_ratio,
+                in_channels,
+                kernel_size=1,
+                padding=0,
+                bias=bias,
+            ),
+            nn.Sigmoid(),
+        )
+    def forward(self, x):
+        squeezed_x = self.squeezing(x)  # 1x1xC
+        excitation = self.excitation(squeezed_x)  # 1x1x(C/r)
+        return (
+            excitation * x
+        )  # HxWxC restored through the mult. with the original input

model/MIRNet/ChannelCompression.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import torch
+import torch.nn as nn
+class ChannelCompression(nn.Module):
+    """
+    Reduces the input to 2 channels by concatenating the global average pooling and global max pooling outputs.
+    In: HxWxC
+    Out: HxWx2
+    """
+    def forward(self, x):
+        return torch.cat(
+            (torch.max(x, 1)[0].unsqueeze(1), torch.mean(x, 1).unsqueeze(1)), dim=1
+        )

model/MIRNet/Downsampling.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as fun
+import numpy as np
+class DownsamplingBlock(nn.Module):
+    """
+    Downsamples the input to halve the dimensions while doubling the channels through two parallel conv + antialiased downsampling branches.
+    In: HxWxC
+    Out: H/2xW/2x2C
+    """
+    def __init__(self, in_channels, bias=False):
+        super().__init__()
+        self.branch1 = (
+            nn.Sequential(  # 1x1 conv + PReLU -> 3x3 conv + PReLU -> AD -> 1x1 conv
+                nn.Conv2d(
+                    in_channels, in_channels, kernel_size=1, padding=0, bias=bias
+                ),
+                nn.PReLU(),
+                nn.Conv2d(
+                    in_channels, in_channels, kernel_size=3, padding=1, bias=bias
+                ),
+                nn.PReLU(),
+                DownSample(channels=in_channels, filter_size=3, stride=2),
+                nn.Conv2d(
+                    in_channels, in_channels * 2, kernel_size=1, padding=0, bias=bias
+                ),
+            )
+        )
+        self.branch2 = nn.Sequential(
+            DownSample(channels=in_channels, filter_size=3, stride=2),
+            nn.Conv2d(
+                in_channels, in_channels * 2, kernel_size=1, padding=0, bias=bias
+            ),
+        )
+    def forward(self, x):
+        return self.branch1(x) + self.branch2(x)  # H/2xW/2x2C
+class DownsamplingModule(nn.Module):
+    """
+    Downsampling module of the network composed of (scaling factor) DownsamplingBlocks.
+    In: HxWxC
+    Out: H/2^(scaling factor) x W/2^(scaling factor) x C^2(scaling factor)
+    """
+    def __init__(self, in_channels, scaling_factor, stride=2):
+        super().__init__()
+        self.scaling_factor = int(np.log2(scaling_factor))
+        blocks = []
+        for i in range(self.scaling_factor):
+            blocks.append(DownsamplingBlock(in_channels))
+            in_channels = int(in_channels * stride)
+        self.blocks = nn.Sequential(*blocks)
+    def forward(self, x):
+        x = self.blocks(x)
+        return x  # H/2^(scaling factor) x W/2^(scaling factor) x C^2(scaling factor)
+class DownSample(nn.Module):
+    """
+    Antialiased downsampling module using the blur-pooling method.
+    From Adobe's implementation available here: https://github.com/yilundu/improved_contrastive_divergence/blob/master/downsample.py
+    """
+    def __init__(
+        self, pad_type="reflect", filter_size=3, stride=2, channels=None, pad_off=0
+    ):
+        super().__init__()
+        self.filter_size = filter_size
+        self.stride = stride
+        self.pad_off = pad_off
+        self.channels = channels
+        self.pad_sizes = [
+            int(1.0 * (filter_size - 1) / 2),
+            int(np.ceil(1.0 * (filter_size - 1) / 2)),
+            int(1.0 * (filter_size - 1) / 2),
+            int(np.ceil(1.0 * (filter_size - 1) / 2)),
+        ]
+        self.pad_sizes = [pad_size + pad_off for pad_size in self.pad_sizes]
+        self.off = int((self.stride - 1) / 2.0)
+        if self.filter_size == 1:
+            a = np.array([1.0])
+        elif self.filter_size == 2:
+            a = np.array([1.0, 1.0])
+        elif self.filter_size == 3:
+            a = np.array([1.0, 2.0, 1.0])
+        elif self.filter_size == 4:
+            a = np.array([1.0, 3.0, 3.0, 1.0])
+        elif self.filter_size == 5:
+            a = np.array([1.0, 4.0, 6.0, 4.0, 1.0])
+        elif self.filter_size == 6:
+            a = np.array([1.0, 5.0, 10.0, 10.0, 5.0, 1.0])
+        elif self.filter_size == 7:
+            a = np.array([1.0, 6.0, 15.0, 20.0, 15.0, 6.0, 1.0])
+        filt = torch.Tensor(a[:, None] * a[None, :])
+        filt = filt / torch.sum(filt)
+        self.register_buffer(
+            "filt", filt[None, None, :, :].repeat((self.channels, 1, 1, 1))
+        )
+        self.pad = get_pad_layer(pad_type)(self.pad_sizes)
+    def forward(self, x):
+        if self.filter_size == 1:
+            if self.pad_off == 0:
+                return x[:, :, :: self.stride, :: self.stride]
+            else:
+                return self.pad(x)[:, :, :: self.stride, :: self.stride]
+        else:
+            return fun.conv2d(
+                self.pad(x), self.filt, stride=self.stride, groups=x.shape[1]
+            )
+def get_pad_layer(pad_type):
+    if pad_type == "reflect":
+        pad_layer = nn.ReflectionPad2d
+    elif pad_type == "replication":
+        pad_layer = nn.ReplicationPad2d
+    else:
+        print("Pad Type [%s] not recognized" % pad_type)
+    return pad_layer

model/MIRNet/DualAttentionUnit.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import torch
+import torch.nn as nn
+from model.MIRNet.ChannelAttention import ChannelAttention
+from model.MIRNet.SpatialAttention import SpatialAttention
+class DualAttentionUnit(nn.Module):
+    """
+    Combines the ChannelAttention and SpatialAttention modules.
+    (conv, PReLU, conv -> concat. SA & CA output -> conv -> skip connection from input)
+    In: HxWxC
+    Out: HxWxC (original channels are restored by multiplying the output with the original input)
+    """
+    def __init__(self, in_channels, kernel_size=3, reduction_ratio=8, bias=False):
+        super().__init__()
+        self.initial_convs = nn.Sequential(
+            nn.Conv2d(in_channels, in_channels, kernel_size, padding=1, bias=bias),
+            nn.PReLU(),
+            nn.Conv2d(in_channels, in_channels, kernel_size, padding=1, bias=bias),
+        )
+        self.channel_attention = ChannelAttention(in_channels, reduction_ratio, bias)
+        self.spatial_attention = SpatialAttention()
+        self.final_conv = nn.Conv2d(
+            in_channels * 2, in_channels, kernel_size=1, bias=bias
+        )
+        self.in_channels = in_channels
+    def forward(self, x):
+        initial_convs = self.initial_convs(x)  # HxWxC
+        channel_attention = self.channel_attention(initial_convs)  # HxWxC
+        spatial_attention = self.spatial_attention(initial_convs)  # HxWxC
+        attention = torch.cat((spatial_attention, channel_attention), dim=1)  # HxWx2C
+        block_output = self.final_conv(
+            attention
+        )  # HxWxC - the 1x1 conv. restores the C channels for the skip connection
+        return x + block_output  # the addition is the skip connection from input

model/MIRNet/MultiScaleResidualBlock.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import torch
+import numpy as np
+import torch.nn as nn
+from model.MIRNet.Downsampling import DownsamplingModule
+from model.MIRNet.DualAttentionUnit import DualAttentionUnit
+from model.MIRNet.SelectiveKernelFeatureFusion import SelectiveKernelFeatureFusion
+from model.MIRNet.Upsampling import UpsamplingModule
+class MultiScaleResidualBlock(nn.Module):
+    """
+    Three parallel convolutional streams at different resolutions. Information is exchanged through residual connexions.
+    """
+    def __init__(self, num_features, height, width, stride, bias):
+        super().__init__()
+        self.num_features = num_features
+        self.height = height
+        self.width = width
+        features = [int((stride**i) * num_features) for i in range(height)]
+        scale = [2**i for i in range(1, height)]
+        self.dual_attention_units = nn.ModuleList(
+            [
+                nn.ModuleList(
+                    [DualAttentionUnit(int(num_features * stride**i))] * width
+                )
+                for i in range(height)
+            ]
+        )
+        self.last_up = nn.ModuleDict()
+        for i in range(1, height):
+            self.last_up.update(
+                {
+                    f"{i}": UpsamplingModule(
+                        in_channels=int(num_features * stride**i),
+                        scaling_factor=2**i,
+                        stride=stride,
+                    )
+                }
+            )
+        self.down = nn.ModuleDict()
+        i = 0
+        scale.reverse()
+        for f in features:
+            for s in scale[i:]:
+                self.down.update({f"{f}_{s}": DownsamplingModule(f, s, stride)})
+            i += 1
+        self.up = nn.ModuleDict()
+        i = 0
+        features.reverse()
+        for f in features:
+            for s in scale[i:]:
+                self.up.update({f"{f}_{s}": UpsamplingModule(f, s, stride)})
+            i += 1
+        self.out_conv = nn.Conv2d(
+            num_features, num_features, kernel_size=3, padding=1, bias=bias
+        )
+        self.skff_blocks = nn.ModuleList(
+            [
+                SelectiveKernelFeatureFusion(num_features * stride**i, height)
+                for i in range(height)
+            ]
+        )
+    def forward(self, x):
+        inp = x.clone()
+        out = []
+        for j in range(self.height):
+            if j == 0:
+                inp = self.dual_attention_units[j][0](inp)
+            else:
+                inp = self.dual_attention_units[j][0](
+                    self.down[f"{inp.size(1)}_{2}"](inp)
+                )
+            out.append(inp)
+        for i in range(1, self.width):
+            if True:
+                temp = []
+                for j in range(self.height):
+                    TENSOR = []
+                    nfeats = (2**j) * self.num_features
+                    for k in range(self.height):
+                        TENSOR.append(self.select_up_down(out[k], j, k))
+                    skff = self.skff_blocks[j](TENSOR)
+                    temp.append(skff)
+            else:
+                temp = out
+            for j in range(self.height):
+                out[j] = self.dual_attention_units[j][i](temp[j])
+        output = []
+        for k in range(self.height):
+            output.append(self.select_last_up(out[k], k))
+        output = self.skff_blocks[0](output)
+        output = self.out_conv(output)
+        output = output + x
+        return output
+    def select_up_down(self, tensor, j, k):
+        if j == k:
+            return tensor
+        else:
+            diff = 2 ** np.abs(j - k)
+            if j < k:
+                return self.up[f"{tensor.size(1)}_{diff}"](tensor)
+            else:
+                return self.down[f"{tensor.size(1)}_{diff}"](tensor)
+    def select_last_up(self, tensor, k):
+        if k == 0:
+            return tensor
+        else:
+            return self.last_up[f"{k}"](tensor)

model/MIRNet/ResidualRecurrentGroup.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import torch
+import torch.nn as nn
+from model.MIRNet.MultiScaleResidualBlock import MultiScaleResidualBlock
+class ResidualRecurrentGroup(nn.Module):
+    """
+    Group of multi-scale residual blocks followed by a convolutional layer. The output is what is added to the input image for restoration.
+    """
+    def __init__(
+        self, num_features, number_msrb_blocks, height, width, stride, bias=False
+    ):
+        super().__init__()
+        blocks = [
+            MultiScaleResidualBlock(num_features, height, width, stride, bias)
+            for _ in range(number_msrb_blocks)
+        ]
+        blocks.append(
+            nn.Conv2d(
+                num_features,
+                num_features,
+                kernel_size=3,
+                padding=1,
+                stride=1,
+                bias=bias,
+            )
+        )
+        self.blocks = nn.Sequential(*blocks)
+    def forward(self, x):
+        output = self.blocks(x)
+        return x + output  # restored image, HxWxC

model/MIRNet/SelectiveKernelFeatureFusion.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import torch
+import torch.nn as nn
+class SelectiveKernelFeatureFusion(nn.Module):
+    """
+    Merges outputs of the three different resolutions through self-attention.
+    All three inputs are summed -> global average pooling -> downscaling -> the signal is passed through 3 different convs to have three descriptors,
+    softmax is applied to each descriptor to get 3 attention activations used to recalibrate the three input feature maps.
+    """
+    def __init__(self, in_channels, reduction_ratio, bias=False):
+        super().__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        conv_out_channels = max(int(in_channels / reduction_ratio), 4)
+        self.convolution = nn.Sequential(
+            nn.Conv2d(
+                in_channels, conv_out_channels, kernel_size=1, padding=0, bias=bias
+            ),
+            nn.PReLU(),
+        )
+        self.attention_convs = nn.ModuleList([])
+        for i in range(3):
+            self.attention_convs.append(
+                nn.Conv2d(
+                    conv_out_channels, in_channels, kernel_size=1, stride=1, bias=bias
+                )
+            )
+        self.softmax = nn.Softmax(dim=1)
+    def forward(self, x):
+        batch_size = x[0].shape[0]
+        n_features = x[0].shape[1]
+        x = torch.cat(
+            x, dim=1
+        )  # the three outputs of diff. res. are concatenated along the channel dimension
+        x = x.view(
+            batch_size, 3, n_features, x.shape[2], x.shape[3]
+        )  # batch_size x 3 x n_features x H x W
+        z = torch.sum(x, dim=1)  # batch_size x n_features x H x W
+        z = self.avg_pool(z)  # batch_size x n_features x 1 x 1
+        z = self.convolution(z)  # batch_size x n_features/8 x 1 x 1
+        attention_activations = [
+            atn(z) for atn in self.attention_convs
+        ]  # 3 x (batch_size x n_features x 1 x 1)
+        attention_activations = torch.cat(
+            attention_activations, dim=1
+        )  # batch_size x 3*n_features x 1 x 1
+        attention_activations = attention_activations.view(
+            batch_size, 3, n_features, 1, 1
+        )  # batch_size x 3 x n_features x 1 x 1
+        attention_activations = self.softmax(
+            attention_activations
+        )  # batch_size x 3 x n_features x 1 x 1
+        return torch.sum(
+            x * attention_activations, dim=1
+        )  # batch_size x n_features x H x W (the three feature maps are recalibrated and summed

model/MIRNet/SpatialAttention.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import torch
+import torch.nn as nn
+from model.MIRNet.ChannelCompression import ChannelCompression
+class SpatialAttention(nn.Module):
+    """
+    Reduces the input to 2 channel with the ChannelCompression module and applies a 2D convolution with 1 output channel.
+    In: HxWxC
+    Out: HxWxC (original channels are restored by multiplying the output with the original input)
+    """
+    def __init__(self):
+        super().__init__()
+        self.channel_compression = ChannelCompression()
+        self.conv = nn.Conv2d(2, 1, kernel_size=5, stride=1, padding=2)
+    def forward(self, x):
+        x_compressed = self.channel_compression(x)  # HxWx2
+        x_conv = self.conv(x_compressed)  # HxWx1
+        scaling_factor = torch.sigmoid(x_conv)
+        return x * scaling_factor  # HxWxC

model/MIRNet/Upsampling.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import torch
+import torch.nn as nn
+import numpy as np
+class UpsamplingBlock(nn.Module):
+    """
+    Upsamples the input to double the dimensions while halving the channels through two parallel conv + bilinear upsampling branches.
+    In: HxWxC
+    Out: 2Hx2WxC/2
+    """
+    def __init__(self, in_channels, bias=False):
+        super().__init__()
+        self.branch1 = nn.Sequential(  # 1x1 conv + PReLU -> 3x3 conv + PReLU -> BU -> 1x1 conv
+            nn.Conv2d(in_channels, in_channels, kernel_size=1, padding=0, bias=bias),
+            nn.PReLU(),
+            nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1, bias=bias),
+            nn.PReLU(),
+            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=bias),
+            nn.Conv2d(in_channels, in_channels // 2, kernel_size=1, padding=0, bias=bias)
+        )
+        self.branch2 = nn.Sequential(
+            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=bias),
+            nn.Conv2d(in_channels, in_channels // 2, kernel_size=1, padding=0, bias=bias)
+        )
+    def forward(self, x):
+        return self.branch1(x) + self.branch2(x) # 2Hx2WxC/2
+class UpsamplingModule(nn.Module):
+    """
+    Upsampling module of the network composed of (scaling factor) UpsamplingBlocks.
+    In: HxWxC
+    Out: 2^(scaling factor)H x 2^(scaling factor)W x C/2^(scaling factor)
+    """
+    def __init__(self, in_channels, scaling_factor, stride=2):
+        super().__init__()
+        self.scaling_factor = int(np.log2(scaling_factor))
+        blocks = []
+        for i in range(self.scaling_factor):
+            blocks.append(UpsamplingBlock(in_channels))
+            in_channels = int(in_channels // 2)
+        self.blocks = nn.Sequential(*blocks)
+    def forward(self, x):
+        return self.blocks(x) # 2^(scaling factor)H x 2^(scaling factor)W x C/2^(scaling factor)

model/MIRNet/__init__.py ADDED Viewed

File without changes

model/MIRNet/model.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import torch
+import torch.nn as nn
+from model.MIRNet.ResidualRecurrentGroup import ResidualRecurrentGroup
+class MIRNet(nn.Module):
+    """
+    Low-level features are extracted through convolution and passed to n residual recurrent groups that operate at different resolutions.
+    Their output is added to the input image for restoration.
+    Please refer to the documentation of the different blocks of the model in this folder for detailed explanations.
+    """
+    def __init__(
+        self,
+        in_channels=3,
+        out_channels=3,
+        num_features=64,
+        kernel_size=3,
+        stride=2,
+        number_msrb=2,
+        number_rrg=3,
+        height=3,
+        width=2,
+        bias=False,
+    ):
+        super().__init__()
+        self.conv_start = nn.Conv2d(
+            in_channels, num_features, kernel_size, padding=1, bias=bias
+        )
+        msrb_blocks = [
+            ResidualRecurrentGroup(
+                num_features, number_msrb, height, width, stride, bias
+            )
+            for _ in range(number_rrg)
+        ]
+        self.msrb_blocks = nn.Sequential(*msrb_blocks)
+        self.conv_end = nn.Conv2d(
+            num_features, out_channels, kernel_size, padding=1, bias=bias
+        )
+    def forward(self, x):
+        output = self.conv_start(x)
+        output = self.msrb_blocks(output)
+        output = self.conv_end(output)
+        return x + output  # restored image, HxWxC