Spaces:
Runtime error
Runtime error
""" | |
Copyright (c) 2019-present NAVER Corp. | |
MIT License | |
""" | |
from __future__ import annotations | |
from collections import namedtuple | |
from typing import Iterable, Tuple | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
import torchvision | |
from packaging import version | |
from torchvision import models | |
VGGOutputs = namedtuple( | |
"VggOutputs", ["fc7", "relu5_3", "relu4_3", "relu3_2", "relu2_2"] | |
) | |
def init_weights(modules: Iterable[nn.Module]): | |
for m in modules: | |
if isinstance(m, nn.Conv2d): | |
nn.init.xavier_uniform_(m.weight) | |
if m.bias is not None: | |
nn.init.zeros_(m.bias) | |
elif isinstance(m, nn.BatchNorm2d): | |
nn.init.constant_(m.weight, 1.0) | |
nn.init.zeros_(m.bias) | |
elif isinstance(m, nn.Linear): | |
nn.init.normal_(m.weight, 0, 0.01) | |
nn.init.zeros_(m.bias) | |
class VGG16_BN(nn.Module): | |
def __init__(self, pretrained: bool=True, freeze: bool=True): | |
super().__init__() | |
if version.parse(torchvision.__version__) >= version.parse("0.13"): | |
vgg_pretrained_features = models.vgg16_bn( | |
weights=models.VGG16_BN_Weights.DEFAULT if pretrained else None | |
).features | |
else: # torchvision.__version__ < 0.13 | |
models.vgg.model_urls["vgg16_bn"] = models.vgg.model_urls[ | |
"vgg16_bn" | |
].replace("https://", "http://") | |
vgg_pretrained_features = models.vgg16_bn(pretrained=pretrained).features | |
self.slice1 = torch.nn.Sequential() | |
self.slice2 = torch.nn.Sequential() | |
self.slice3 = torch.nn.Sequential() | |
self.slice4 = torch.nn.Sequential() | |
self.slice5 = torch.nn.Sequential() | |
for x in range(12): # conv2_2 | |
self.slice1.add_module(str(x), vgg_pretrained_features[x]) | |
for x in range(12, 19): # conv3_3 | |
self.slice2.add_module(str(x), vgg_pretrained_features[x]) | |
for x in range(19, 29): # conv4_3 | |
self.slice3.add_module(str(x), vgg_pretrained_features[x]) | |
for x in range(29, 39): # conv5_3 | |
self.slice4.add_module(str(x), vgg_pretrained_features[x]) | |
# fc6, fc7 without atrous conv | |
self.slice5 = torch.nn.Sequential( | |
nn.MaxPool2d(kernel_size=3, stride=1, padding=1), | |
nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6), | |
nn.Conv2d(1024, 1024, kernel_size=1), | |
) | |
if not pretrained: | |
init_weights(self.slice1.modules()) | |
init_weights(self.slice2.modules()) | |
init_weights(self.slice3.modules()) | |
init_weights(self.slice4.modules()) | |
init_weights(self.slice5.modules()) # no pretrained model for fc6 and fc7 | |
if freeze: | |
for param in self.slice1.parameters(): # only first conv | |
param.requires_grad = False | |
def forward(self, x: torch.Tensor) -> VGGOutputs: | |
h = self.slice1(x) | |
h_relu2_2 = h | |
h = self.slice2(h) | |
h_relu3_2 = h | |
h = self.slice3(h) | |
h_relu4_3 = h | |
h = self.slice4(h) | |
h_relu5_3 = h | |
h = self.slice5(h) | |
h_fc7 = h | |
out = VGGOutputs(h_fc7, h_relu5_3, h_relu4_3, h_relu3_2, h_relu2_2) | |
return out | |
class DoubleConv(nn.Module): | |
def __init__(self, in_ch: int, mid_ch: int, out_ch: int): | |
super().__init__() | |
self.conv = nn.Sequential( | |
nn.Conv2d(in_ch + mid_ch, mid_ch, kernel_size=1), | |
nn.BatchNorm2d(mid_ch), | |
nn.ReLU(inplace=True), | |
nn.Conv2d(mid_ch, out_ch, kernel_size=3, padding=1), | |
nn.BatchNorm2d(out_ch), | |
nn.ReLU(inplace=True), | |
) | |
def forward(self, x: torch.Tensor) -> torch.Tensor: | |
x = self.conv(x) | |
return x | |
class CRAFT(nn.Module): | |
def __init__(self, pretrained: bool=False, freeze: bool=False): | |
super(CRAFT, self).__init__() | |
""" Base network """ | |
self.basenet = VGG16_BN(pretrained, freeze) | |
""" U network """ | |
self.upconv1 = DoubleConv(1024, 512, 256) | |
self.upconv2 = DoubleConv(512, 256, 128) | |
self.upconv3 = DoubleConv(256, 128, 64) | |
self.upconv4 = DoubleConv(128, 64, 32) | |
num_class = 2 | |
self.conv_cls = nn.Sequential( | |
nn.Conv2d(32, 32, kernel_size=3, padding=1), | |
nn.ReLU(inplace=True), | |
nn.Conv2d(32, 32, kernel_size=3, padding=1), | |
nn.ReLU(inplace=True), | |
nn.Conv2d(32, 16, kernel_size=3, padding=1), | |
nn.ReLU(inplace=True), | |
nn.Conv2d(16, 16, kernel_size=1), | |
nn.ReLU(inplace=True), | |
nn.Conv2d(16, num_class, kernel_size=1), | |
) | |
init_weights(self.upconv1.modules()) | |
init_weights(self.upconv2.modules()) | |
init_weights(self.upconv3.modules()) | |
init_weights(self.upconv4.modules()) | |
init_weights(self.conv_cls.modules()) | |
def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: | |
"""Base network""" | |
sources = self.basenet(x) | |
""" U network """ | |
y = torch.cat([sources[0], sources[1]], dim=1) | |
y = self.upconv1(y) | |
y = F.interpolate( | |
y, size=sources[2].size()[2:], mode="bilinear", align_corners=False | |
) | |
y = torch.cat([y, sources[2]], dim=1) | |
y = self.upconv2(y) | |
y = F.interpolate( | |
y, size=sources[3].size()[2:], mode="bilinear", align_corners=False | |
) | |
y = torch.cat([y, sources[3]], dim=1) | |
y = self.upconv3(y) | |
y = F.interpolate( | |
y, size=sources[4].size()[2:], mode="bilinear", align_corners=False | |
) | |
y = torch.cat([y, sources[4]], dim=1) | |
feature = self.upconv4(y) | |
y = self.conv_cls(feature) | |
return y.permute(0, 2, 3, 1), feature | |