from torch import nn import torch from torchvision import models class BGMotionPredictor(nn.Module): """ Module for background estimation, return single transformation, parametrized as 3x3 matrix. The third row is [0 0 1] """ def __init__(self): super(BGMotionPredictor, self).__init__() self.bg_encoder = models.resnet18(pretrained=False) self.bg_encoder.conv1 = nn.Conv2d(6, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) num_features = self.bg_encoder.fc.in_features self.bg_encoder.fc = nn.Linear(num_features, 6) self.bg_encoder.fc.weight.data.zero_() self.bg_encoder.fc.bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float)) def forward(self, source_image, driving_image): bs = source_image.shape[0] out = torch.eye(3).unsqueeze(0).repeat(bs, 1, 1).type(source_image.type()) prediction = self.bg_encoder(torch.cat([source_image, driving_image], dim=1)) out[:, :2, :] = prediction.view(bs, 2, 3) return out