WiggleGAN / architectures.py
Rodrigo_Cobo
added thesis
cc6c676
raw
history blame
38.9 kB
import torch.nn as nn
import utils, torch
from torch.autograd import Variable
import torch.nn.functional as F
class generator(nn.Module):
# Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657)
# Architecture : FC1024_BR-FC7x7x128_BR-(64)4dc2s_BR-(1)4dc2s_S
def __init__(self, input_dim=4, output_dim=1, input_shape=3, class_num=10, height=10, width=10):
super(generator, self).__init__()
self.input_dim = input_dim
self.output_dim = output_dim
# print ("self.output_dim", self.output_dim)
self.class_num = class_num
self.input_shape = list(input_shape)
self.toPreDecov = 1024
self.toDecov = 1
self.height = height
self.width = width
self.input_shape[1] = self.input_dim # esto cambio despues por colores
# print("input shpe gen",self.input_shape)
self.conv1 = nn.Sequential(
nn.Conv2d(self.input_dim, 10, 4, 2, 1), # para mi el 2 tendria que ser 1
nn.Conv2d(10, 4, 4, 2, 1),
nn.BatchNorm2d(4),
nn.LeakyReLU(0.2),
nn.Conv2d(4, 3, 4, 2, 1),
nn.BatchNorm2d(3),
nn.LeakyReLU(0.2),
)
self.n_size = self._get_conv_output(self.input_shape)
# print ("self.n_size",self.n_size)
self.cubic = (self.n_size // 8192)
# print("self.cubic: ",self.cubic)
self.fc1 = nn.Sequential(
nn.Linear(self.n_size, self.n_size),
nn.BatchNorm1d(self.n_size),
nn.LeakyReLU(0.2),
)
self.preDeconv = nn.Sequential(
##############RED SUPER CHICA PARA QUE ANDE TO DO PORQUE RAM Y MEMORY
# nn.Linear(self.toPreDecov + self.zdim + self.class_num, 1024),
# nn.BatchNorm1d(1024),
# nn.LeakyReLU(0.2),
# nn.Linear(1024, self.toDecov * self.height // 64 * self.width// 64),
# nn.BatchNorm1d(self.toDecov * self.height // 64 * self.width// 64),
# nn.LeakyReLU(0.2),
# nn.Linear(self.toDecov * self.height // 64 * self.width // 64 , self.toDecov * self.height // 32 * self.width // 32),
# nn.BatchNorm1d(self.toDecov * self.height // 32 * self.width // 32),
# nn.LeakyReLU(0.2),
# nn.Linear(self.toDecov * self.height // 32 * self.width // 32,
# 1 * self.height * self.width),
# nn.BatchNorm1d(1 * self.height * self.width),
# nn.LeakyReLU(0.2),
nn.Linear(self.n_size + self.class_num, 400),
nn.BatchNorm1d(400),
nn.LeakyReLU(0.2),
nn.Linear(400, 800),
nn.BatchNorm1d(800),
nn.LeakyReLU(0.2),
nn.Linear(800, self.output_dim * self.height * self.width),
nn.BatchNorm1d(self.output_dim * self.height * self.width),
nn.Tanh(), # Cambio porque hago como que termino ahi
)
"""
self.deconv = nn.Sequential(
nn.ConvTranspose2d(self.toDecov, 2, 4, 2, 0),
nn.BatchNorm2d(2),
nn.ReLU(),
nn.ConvTranspose2d(2, self.output_dim, 4, 2, 1),
nn.Tanh(), #esta recomendado que la ultima sea TanH de la Generadora da valores entre -1 y 1
)
"""
utils.initialize_weights(self)
def _get_conv_output(self, shape):
bs = 1
input = Variable(torch.rand(bs, *shape))
# print("inShape:",input.shape)
output_feat = self.conv1(input.squeeze())
# print ("output_feat",output_feat.shape)
n_size = output_feat.data.view(bs, -1).size(1)
# print ("n",n_size // 4)
return n_size // 4
def forward(self, clase, im):
##Esto es lo que voy a hacer
# Cat entre la imagen y la profundidad
# print ("H",self.height,"W",self.width)
# imDep = imDep[:, None, :, :]
# im = im[:, None, :, :]
x = im
# Ref Conv de ese cat
x = self.conv1(x)
x = x.view(x.size(0), -1)
# print ("x:", x.shape)
x = self.fc1(x)
# print ("x:",x.shape)
# cat entre el ruido y la clase
y = clase
# print("Cat entre input y clase", y.shape) #podria separarlo, unir primero con clase y despues con ruido
# Red Lineal que une la Conv con el cat anterior
x = torch.cat([x, y], 1)
x = self.preDeconv(x)
# print ("antes de deconv", x.shape)
x = x.view(-1, self.output_dim, self.height, self.width)
# print("Despues View: ", x.shape)
# Red que saca produce la imagen final
# x = self.deconv(x)
# print("La salida de la generadora es: ",x.shape)
return x
class discriminator(nn.Module):
# Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657)
# Architecture : (64)4c2s-(128)4c2s_BL-FC1024_BL-FC1_S
def __init__(self, input_dim=1, output_dim=1, input_shape=2, class_num=10):
super(discriminator, self).__init__()
self.input_dim = input_dim * 2 # ya que le doy el origen
self.output_dim = output_dim
self.input_shape = list(input_shape)
self.class_num = class_num
self.input_shape[1] = self.input_dim # esto cambio despues por colores
# print(self.input_shape)
"""""
in_channels (int): Number of channels in the input image
out_channels (int): Number of channels produced by the convolution
kernel_size (int or tuple): Size of the convolving kernel - lo que se agarra para la conv
stride (int or tuple, optional): Stride of the convolution. Default: 1
padding (int or tuple, optional): Zero-padding added to both sides of the input.
"""""
"""
nn.Conv2d(self.input_dim, 64, 4, 2, 1), #para mi el 2 tendria que ser 1
nn.LeakyReLU(0.2),
nn.Conv2d(64, 32, 4, 2, 1),
nn.LeakyReLU(0.2),
nn.MaxPool2d(4, stride=2),
nn.Conv2d(32, 32, 4, 2, 1),
nn.LeakyReLU(0.2),
nn.MaxPool2d(4, stride=2),
nn.Conv2d(32, 20, 4, 2, 1),
nn.BatchNorm2d(20),
nn.LeakyReLU(0.2),
"""
self.conv = nn.Sequential(
nn.Conv2d(self.input_dim, 4, 4, 2, 1), # para mi el 2 tendria que ser 1
nn.LeakyReLU(0.2),
nn.Conv2d(4, 8, 4, 2, 1),
nn.BatchNorm2d(8),
nn.LeakyReLU(0.2),
nn.Conv2d(8, 16, 4, 2, 1),
nn.BatchNorm2d(16),
)
self.n_size = self._get_conv_output(self.input_shape)
self.fc1 = nn.Sequential(
nn.Linear(self.n_size // 4, 1024),
nn.BatchNorm1d(1024),
nn.LeakyReLU(0.2),
nn.Linear(1024, 512),
nn.BatchNorm1d(512),
nn.LeakyReLU(0.2),
nn.Linear(512, 256),
nn.BatchNorm1d(256),
nn.LeakyReLU(0.2),
nn.Linear(256, 128),
nn.BatchNorm1d(128),
nn.LeakyReLU(0.2),
nn.Linear(128, 64),
nn.BatchNorm1d(64),
nn.LeakyReLU(0.2),
)
self.dc = nn.Sequential(
nn.Linear(64, self.output_dim),
nn.Sigmoid(),
)
self.cl = nn.Sequential(
nn.Linear(64, self.class_num),
nn.Sigmoid(),
)
utils.initialize_weights(self)
# generate input sample and forward to get shape
def _get_conv_output(self, shape):
bs = 1
input = Variable(torch.rand(bs, *shape))
output_feat = self.conv(input.squeeze())
n_size = output_feat.data.view(bs, -1).size(1)
return n_size
def forward(self, input, origen):
# esto va a cambiar cuando tenga color
# if (len(input.shape) <= 3):
# input = input[:, None, :, :]
# im = im[:, None, :, :]
# print("D in shape",input.shape)
# print(input.shape)
# print("this si X:", x)
# print("now shape", x.shape)
x = input
x = x.type(torch.FloatTensor)
x = x.to(device='cuda:0')
x = torch.cat((x, origen), 1)
x = self.conv(x)
x = x.view(x.size(0), -1)
x = self.fc1(x)
d = self.dc(x)
c = self.cl(x)
return d, c
#######################################################################################################################
class UnetConvBlock(nn.Module):
'''
Convolutional block of a U-Net:
Conv2d - Batch normalization - LeakyReLU
Conv2D - Batch normalization - LeakyReLU
Basic Dropout (optional)
'''
def __init__(self, in_size, out_size, dropout=0.0, stride=1, batch_norm = True):
'''
Constructor of the convolutional block
'''
super(UnetConvBlock, self).__init__()
# Convolutional layer with IN_SIZE --> OUT_SIZE
conv1 = nn.Conv2d(in_channels=in_size, out_channels=out_size, kernel_size=3, stride=1,
padding=1) # podria aplicar stride 2
# Activation unit
activ_unit1 = nn.LeakyReLU(0.2)
# Add batch normalization if necessary
if batch_norm:
self.conv1 = nn.Sequential(conv1, nn.BatchNorm2d(out_size), activ_unit1)
else:
self.conv1 = nn.Sequential(conv1, activ_unit1)
# Convolutional layer with OUT_SIZE --> OUT_SIZE
conv2 = nn.Conv2d(in_channels=out_size, out_channels=out_size, kernel_size=3, stride=stride,
padding=1) # podria aplicar stride 2
# Activation unit
activ_unit2 = nn.LeakyReLU(0.2)
# Add batch normalization
if batch_norm:
self.conv2 = nn.Sequential(conv2, nn.BatchNorm2d(out_size), activ_unit2)
else:
self.conv2 = nn.Sequential(conv2, activ_unit2)
# Dropout
if dropout > 0.0:
self.drop = nn.Dropout(dropout)
else:
self.drop = None
def forward(self, inputs):
'''
Do a forward pass
'''
outputs = self.conv1(inputs)
outputs = self.conv2(outputs)
if not (self.drop is None):
outputs = self.drop(outputs)
return outputs
class UnetDeSingleConvBlock(nn.Module):
'''
DeConvolutional block of a U-Net:
Conv2d - Batch normalization - LeakyReLU
Basic Dropout (optional)
'''
def __init__(self, in_size, out_size, dropout=0.0, stride=1, padding=1, batch_norm = True ):
'''
Constructor of the convolutional block
'''
super(UnetDeSingleConvBlock, self).__init__()
# Convolutional layer with IN_SIZE --> OUT_SIZE
conv1 = nn.Conv2d(in_channels=in_size, out_channels=out_size, kernel_size=3, stride=stride, padding=1)
# Activation unit
activ_unit1 = nn.LeakyReLU(0.2)
# Add batch normalization if necessary
if batch_norm:
self.conv1 = nn.Sequential(conv1, nn.BatchNorm2d(out_size), activ_unit1)
else:
self.conv1 = nn.Sequential(conv1, activ_unit1)
# Dropout
if dropout > 0.0:
self.drop = nn.Dropout(dropout)
else:
self.drop = None
def forward(self, inputs):
'''
Do a forward pass
'''
outputs = self.conv1(inputs)
if not (self.drop is None):
outputs = self.drop(outputs)
return outputs
class UnetDeconvBlock(nn.Module):
'''
DeConvolutional block of a U-Net:
UnetDeSingleConvBlock (skip_connection)
Cat last_layer + skip_connection
UnetDeSingleConvBlock ( Cat )
Basic Dropout (optional)
'''
def __init__(self, in_size_layer, in_size_skip_con, out_size, dropout=0.0):
'''
Constructor of the convolutional block
'''
super(UnetDeconvBlock, self).__init__()
self.conv1 = UnetDeSingleConvBlock(in_size_skip_con, in_size_skip_con, dropout)
self.conv2 = UnetDeSingleConvBlock(in_size_layer + in_size_skip_con, out_size, dropout)
# Dropout
if dropout > 0.0:
self.drop = nn.Dropout(dropout)
else:
self.drop = None
def forward(self, inputs_layer, inputs_skip):
'''
Do a forward pass
'''
outputs = self.conv1(inputs_skip)
#outputs = changeDim(outputs, inputs_layer)
outputs = torch.cat((inputs_layer, outputs), 1)
outputs = self.conv2(outputs)
return outputs
class UpBlock(nn.Module):
"""Upscaling then double conv"""
def __init__(self, in_size_layer, in_size_skip_con, out_size, bilinear=True):
super(UpBlock, self).__init__()
# if bilinear, use the normal convolutions to reduce the number of channels
if bilinear:
self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
else:
self.up = nn.ConvTranspose2d(in_size_layer // 2, in_size_layer // 2, kernel_size=2, stride=2)
self.conv = UnetDeconvBlock(in_size_layer, in_size_skip_con, out_size)
def forward(self, inputs_layer, inputs_skip):
inputs_layer = self.up(inputs_layer)
# input is CHW
#inputs_layer = changeDim(inputs_layer, inputs_skip)
return self.conv(inputs_layer, inputs_skip)
class lastBlock(nn.Module):
'''
DeConvolutional block of a U-Net:
Conv2d - Batch normalization - LeakyReLU
Basic Dropout (optional)
'''
def __init__(self, in_size, out_size, dropout=0.0):
'''
Constructor of the convolutional block
'''
super(lastBlock, self).__init__()
# Convolutional layer with IN_SIZE --> OUT_SIZE
conv1 = nn.Conv2d(in_channels=in_size, out_channels=out_size, kernel_size=3, stride=1, padding=1)
# Activation unit
activ_unit1 = nn.Tanh()
# Add batch normalization if necessary
self.conv1 = nn.Sequential(conv1, nn.BatchNorm2d(out_size), activ_unit1)
# Dropout
if dropout > 0.0:
self.drop = nn.Dropout(dropout)
else:
self.drop = None
def forward(self, inputs):
'''
Do a forward pass
'''
outputs = self.conv1(inputs)
if not (self.drop is None):
outputs = self.drop(outputs)
return outputs
################
class generator_UNet(nn.Module):
# Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657)
# Architecture : FC1024_BR-FC7x7x128_BR-(64)4dc2s_BR-(1)4dc2s_S
def __init__(self, input_dim=4, output_dim=1, input_shape=3, class_num=2, expand_net=3):
super(generator_UNet, self).__init__()
self.input_dim = input_dim + 1 # por la clase
self.output_dim = output_dim
# print ("self.output_dim", self.output_dim)
self.class_num = class_num
self.input_shape = list(input_shape)
self.input_shape[1] = self.input_dim # esto cambio despues por colores
self.expandNet = expand_net # 5
# Downsampling
self.conv1 = UnetConvBlock(self.input_dim, pow(2, self.expandNet), stride=1)
# self.maxpool1 = nn.MaxPool2d(kernel_size=2)
self.conv2 = UnetConvBlock(pow(2, self.expandNet), pow(2, self.expandNet + 1), stride=2)
# self.maxpool2 = nn.MaxPool2d(kernel_size=2)
self.conv3 = UnetConvBlock(pow(2, self.expandNet + 1), pow(2, self.expandNet + 2), stride=2)
# self.maxpool3 = nn.MaxPool2d(kernel_size=2)
# Middle ground
self.conv4 = UnetDeSingleConvBlock(pow(2, self.expandNet + 2), pow(2, self.expandNet + 2), stride=2)
# UpSampling
self.up1 = UpBlock(pow(2, self.expandNet + 2), pow(2, self.expandNet + 2), pow(2, self.expandNet + 1),
bilinear=True)
self.up2 = UpBlock(pow(2, self.expandNet + 1), pow(2, self.expandNet + 1), pow(2, self.expandNet),
bilinear=True)
self.up3 = UpBlock(pow(2, self.expandNet), pow(2, self.expandNet), 8, bilinear=True)
self.last = lastBlock(8, self.output_dim)
utils.initialize_weights(self)
def _get_conv_output(self, shape):
bs = 1
input = Variable(torch.rand(bs, *shape))
# print("inShape:",input.shape)
output_feat = self.conv1(input.squeeze()) ##CAMBIAR
# print ("output_feat",output_feat.shape)
n_size = output_feat.data.view(bs, -1).size(1)
# print ("n",n_size // 4)
return n_size // 4
def forward(self, clase, im):
x = im
##PARA TENER LA CLASE DEL CORRIMIENTO
cl = ((clase == 1))
cl = cl[:, 1]
cl = cl.type(torch.FloatTensor)
max = (clase.size())[1] - 1
cl = cl / float(max)
##crear imagen layer de corrimiento
tam = im.size()
layerClase = torch.ones([tam[0], tam[2], tam[3]], dtype=torch.float32, device="cuda:0")
for idx, item in enumerate(layerClase):
layerClase[idx] = item * cl[idx]
layerClase = layerClase.unsqueeze(0)
layerClase = layerClase.transpose(1, 0)
##unir layer el rgb de la imagen
x = torch.cat((x, layerClase), 1)
x1 = self.conv1(x)
x2 = self.conv2(x1) # self.maxpool1(x1))
x3 = self.conv3(x2) # self.maxpool2(x2))
x4 = self.conv4(x3) # self.maxpool3(x3))
x = self.up1(x4, x3)
x = self.up2(x, x2)
x = self.up3(x, x1)
x = changeDim(x, im)
x = self.last(x)
return x
class discriminator_UNet(nn.Module):
# Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657)
# Architecture : (64)4c2s-(128)4c2s_BL-FC1024_BL-FC1_S
def __init__(self, input_dim=1, output_dim=1, input_shape=[2, 2], class_num=10, expand_net = 2):
super(discriminator_UNet, self).__init__()
self.input_dim = input_dim * 2 # ya que le doy el origen
self.output_dim = output_dim
self.input_shape = list(input_shape)
self.class_num = class_num
self.input_shape[1] = self.input_dim # esto cambio despues por colores
self.expandNet = expand_net # 4
# Downsampling
self.conv1 = UnetConvBlock(self.input_dim, pow(2, self.expandNet), stride=1, dropout=0.3)
self.conv2 = UnetConvBlock(pow(2, self.expandNet), pow(2, self.expandNet + 1), stride=2, dropout=0.5)
self.conv3 = UnetConvBlock(pow(2, self.expandNet + 1), pow(2, self.expandNet + 2), stride=2, dropout=0.4)
# Middle ground
self.conv4 = UnetDeSingleConvBlock(pow(2, self.expandNet + 2), pow(2, self.expandNet + 2), stride=2,
dropout=0.3)
self.n_size = self._get_conv_output(self.input_shape)
self.fc1 = nn.Sequential(
nn.Linear(self.n_size // 4, 1024),
nn.BatchNorm1d(1024),
nn.LeakyReLU(0.2),
)
self.dc = nn.Sequential(
nn.Linear(1024, self.output_dim),
# nn.Sigmoid(),
)
self.cl = nn.Sequential(
nn.Linear(1024, self.class_num),
nn.Softmax(dim=1), # poner el que la suma da 1
)
utils.initialize_weights(self)
# generate input sample and forward to get shape
def _get_conv_output(self, shape):
bs = 1
input = Variable(torch.rand(bs, *shape))
x = input.squeeze()
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
n_size = x.data.view(bs, -1).size(1)
return n_size
def forward(self, input, origen):
# esto va a cambiar cuando tenga color
# if (len(input.shape) <= 3):
# input = input[:, None, :, :]
# im = im[:, None, :, :]
# print("D in shape",input.shape)
# print(input.shape)
# print("this si X:", x)
# print("now shape", x.shape)
x = input
x = x.type(torch.FloatTensor)
x = x.to(device='cuda:0')
x = torch.cat((x, origen), 1)
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = x.view(x.size(0), -1)
x = self.fc1(x)
d = self.dc(x)
c = self.cl(x)
return d, c
def changeDim(x, y):
''' Change dim-image from x to y '''
diffY = torch.tensor([y.size()[2] - x.size()[2]])
diffX = torch.tensor([y.size()[3] - x.size()[3]])
x = F.pad(x, [diffX // 2, diffX - diffX // 2,
diffY // 2, diffY - diffY // 2])
return x
######################################## ACGAN ###########################################################
class depth_generator(nn.Module):
# Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657)
# Architecture : FC1024_BR-FC7x7x128_BR-(64)4dc2s_BR-(1)4dc2s_S
def __init__(self, input_dim=4, output_dim=1, input_shape=3, class_num=10, zdim=1, height=10, width=10):
super(depth_generator, self).__init__()
self.input_dim = input_dim
self.output_dim = output_dim
self.class_num = class_num
# print ("self.output_dim", self.output_dim)
self.input_shape = list(input_shape)
self.zdim = zdim
self.toPreDecov = 1024
self.toDecov = 1
self.height = height
self.width = width
self.input_shape[1] = self.input_dim # esto cambio despues por colores
# print("input shpe gen",self.input_shape)
self.conv1 = nn.Sequential(
##############RED SUPER CHICA PARA QUE ANDE TO DO PORQUE RAM Y MEMORY
nn.Conv2d(self.input_dim, 2, 4, 2, 1), # para mi el 2 tendria que ser 1
nn.Conv2d(2, 1, 4, 2, 1),
nn.BatchNorm2d(1),
nn.LeakyReLU(0.2),
)
self.n_size = self._get_conv_output(self.input_shape)
# print ("self.n_size",self.n_size)
self.cubic = (self.n_size // 8192)
# print("self.cubic: ",self.cubic)
self.fc1 = nn.Sequential(
nn.Linear(self.n_size, self.n_size),
nn.BatchNorm1d(self.n_size),
nn.LeakyReLU(0.2),
)
self.preDeconv = nn.Sequential(
##############RED SUPER CHICA PARA QUE ANDE TO DO PORQUE RAM Y MEMORY
# nn.Linear(self.toPreDecov + self.zdim + self.class_num, 1024),
# nn.BatchNorm1d(1024),
# nn.LeakyReLU(0.2),
# nn.Linear(1024, self.toDecov * self.height // 64 * self.width// 64),
# nn.BatchNorm1d(self.toDecov * self.height // 64 * self.width// 64),
# nn.LeakyReLU(0.2),
# nn.Linear(self.toDecov * self.height // 64 * self.width // 64 , self.toDecov * self.height // 32 * self.width // 32),
# nn.BatchNorm1d(self.toDecov * self.height // 32 * self.width // 32),
# nn.LeakyReLU(0.2),
# nn.Linear(self.toDecov * self.height // 32 * self.width // 32,
# 1 * self.height * self.width),
# nn.BatchNorm1d(1 * self.height * self.width),
# nn.LeakyReLU(0.2),
nn.Linear(self.n_size + self.zdim + self.class_num, 50),
nn.BatchNorm1d(50),
nn.LeakyReLU(0.2),
nn.Linear(50, 200),
nn.BatchNorm1d(200),
nn.LeakyReLU(0.2),
nn.Linear(200, self.output_dim * self.height * self.width),
nn.BatchNorm1d(self.output_dim * self.height * self.width),
nn.Tanh(), # Cambio porque hago como que termino ahi
)
"""
self.deconv = nn.Sequential(
nn.ConvTranspose2d(self.toDecov, 2, 4, 2, 0),
nn.BatchNorm2d(2),
nn.ReLU(),
nn.ConvTranspose2d(2, self.output_dim, 4, 2, 1),
nn.Tanh(), #esta recomendado que la ultima sea TanH de la Generadora da valores entre -1 y 1
)
"""
utils.initialize_weights(self)
def _get_conv_output(self, shape):
bs = 1
input = Variable(torch.rand(bs, *shape))
# print("inShape:",input.shape)
output_feat = self.conv1(input.squeeze())
# print ("output_feat",output_feat.shape)
n_size = output_feat.data.view(bs, -1).size(1)
# print ("n",n_size // 4)
return n_size // 4
def forward(self, input, clase, im, imDep):
##Esto es lo que voy a hacer
# Cat entre la imagen y la profundidad
print ("H", self.height, "W", self.width)
# imDep = imDep[:, None, :, :]
# im = im[:, None, :, :]
print ("imdep", imDep.shape)
print ("im", im.shape)
x = torch.cat([im, imDep], 1)
# Ref Conv de ese cat
x = self.conv1(x)
x = x.view(x.size(0), -1)
print ("x:", x.shape)
x = self.fc1(x)
# print ("x:",x.shape)
# cat entre el ruido y la clase
y = torch.cat([input, clase], 1)
print("Cat entre input y clase", y.shape) # podria separarlo, unir primero con clase y despues con ruido
# Red Lineal que une la Conv con el cat anterior
x = torch.cat([x, y], 1)
x = self.preDeconv(x)
print ("antes de deconv", x.shape)
x = x.view(-1, self.output_dim, self.height, self.width)
print("Despues View: ", x.shape)
# Red que saca produce la imagen final
# x = self.deconv(x)
print("La salida de la generadora es: ", x.shape)
return x
class depth_discriminator(nn.Module):
# Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657)
# Architecture : (64)4c2s-(128)4c2s_BL-FC1024_BL-FC1_S
def __init__(self, input_dim=1, output_dim=1, input_shape=2, class_num=10):
super(depth_discriminator, self).__init__()
self.input_dim = input_dim
self.output_dim = output_dim
self.input_shape = list(input_shape)
self.class_num = class_num
self.input_shape[1] = self.input_dim # esto cambio despues por colores
print(self.input_shape)
"""""
in_channels (int): Number of channels in the input image
out_channels (int): Number of channels produced by the convolution
kernel_size (int or tuple): Size of the convolving kernel - lo que se agarra para la conv
stride (int or tuple, optional): Stride of the convolution. Default: 1
padding (int or tuple, optional): Zero-padding added to both sides of the input.
"""""
"""
nn.Conv2d(self.input_dim, 64, 4, 2, 1), #para mi el 2 tendria que ser 1
nn.LeakyReLU(0.2),
nn.Conv2d(64, 32, 4, 2, 1),
nn.LeakyReLU(0.2),
nn.MaxPool2d(4, stride=2),
nn.Conv2d(32, 32, 4, 2, 1),
nn.LeakyReLU(0.2),
nn.MaxPool2d(4, stride=2),
nn.Conv2d(32, 20, 4, 2, 1),
nn.BatchNorm2d(20),
nn.LeakyReLU(0.2),
"""
self.conv = nn.Sequential(
nn.Conv2d(self.input_dim, 4, 4, 2, 1), # para mi el 2 tendria que ser 1
nn.LeakyReLU(0.2),
nn.Conv2d(4, 8, 4, 2, 1),
nn.BatchNorm2d(8),
nn.LeakyReLU(0.2),
nn.Conv2d(8, 16, 4, 2, 1),
nn.BatchNorm2d(16),
)
self.n_size = self._get_conv_output(self.input_shape)
self.fc1 = nn.Sequential(
nn.Linear(self.n_size // 4, 1024),
nn.BatchNorm1d(1024),
nn.LeakyReLU(0.2),
nn.Linear(1024, 512),
nn.BatchNorm1d(512),
nn.LeakyReLU(0.2),
nn.Linear(512, 256),
nn.BatchNorm1d(256),
nn.LeakyReLU(0.2),
nn.Linear(256, 128),
nn.BatchNorm1d(128),
nn.LeakyReLU(0.2),
nn.Linear(128, 64),
nn.BatchNorm1d(64),
nn.LeakyReLU(0.2),
)
self.dc = nn.Sequential(
nn.Linear(64, self.output_dim),
nn.Sigmoid(),
)
self.cl = nn.Sequential(
nn.Linear(64, self.class_num),
nn.Sigmoid(),
)
utils.initialize_weights(self)
# generate input sample and forward to get shape
def _get_conv_output(self, shape):
bs = 1
input = Variable(torch.rand(bs, *shape))
output_feat = self.conv(input.squeeze())
n_size = output_feat.data.view(bs, -1).size(1)
return n_size
def forward(self, input, im):
# esto va a cambiar cuando tenga color
# if (len(input.shape) <= 3):
# input = input[:, None, :, :]
# im = im[:, None, :, :]
print("D in shape", input.shape)
print("D im shape", im.shape)
x = torch.cat([input, im], 1)
print(input.shape)
# print("this si X:", x)
# print("now shape", x.shape)
x = x.type(torch.FloatTensor)
x = x.to(device='cuda:0')
x = self.conv(x)
x = x.view(x.size(0), -1)
x = self.fc1(x)
d = self.dc(x)
c = self.cl(x)
return d, c
class depth_generator_UNet(nn.Module):
# Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657)
# Architecture : FC1024_BR-FC7x7x128_BR-(64)4dc2s_BR-(1)4dc2s_S
def __init__(self, input_dim=4, output_dim=1, class_num=10, expand_net=3, depth=True):
super(depth_generator_UNet, self).__init__()
if depth:
self.input_dim = input_dim + 1
else:
self.input_dim = input_dim
self.output_dim = output_dim
self.class_num = class_num
# print ("self.output_dim", self.output_dim)
self.expandNet = expand_net # 5
self.depth = depth
# Downsampling
self.conv1 = UnetConvBlock(self.input_dim, pow(2, self.expandNet))
# self.maxpool1 = nn.MaxPool2d(kernel_size=2)
self.conv2 = UnetConvBlock(pow(2, self.expandNet), pow(2, self.expandNet + 1), stride=2)
# self.maxpool2 = nn.MaxPool2d(kernel_size=2)
self.conv3 = UnetConvBlock(pow(2, self.expandNet + 1), pow(2, self.expandNet + 2), stride=2)
# self.maxpool3 = nn.MaxPool2d(kernel_size=2)
# Middle ground
self.conv4 = UnetDeSingleConvBlock(pow(2, self.expandNet + 2), pow(2, self.expandNet + 2), stride=2)
# UpSampling
self.up1 = UpBlock(pow(2, self.expandNet + 2), pow(2, self.expandNet + 2), pow(2, self.expandNet + 1))
self.up2 = UpBlock(pow(2, self.expandNet + 1), pow(2, self.expandNet + 1), pow(2, self.expandNet))
self.up3 = UpBlock(pow(2, self.expandNet), pow(2, self.expandNet), 8)
self.last = lastBlock(8, self.output_dim)
if depth:
self.upDep1 = UpBlock(pow(2, self.expandNet + 2), pow(2, self.expandNet + 2), pow(2, self.expandNet + 1))
self.upDep2 = UpBlock(pow(2, self.expandNet + 1), pow(2, self.expandNet + 1), pow(2, self.expandNet))
self.upDep3 = UpBlock(pow(2, self.expandNet), pow(2, self.expandNet), 8)
self.lastDep = lastBlock(8, 1)
utils.initialize_weights(self)
def forward(self, clase, im, imDep):
##Hago algo con el z?
#print (im.shape)
#print (z.shape)
#print (z)
#imz = torch.repeat_interleave(z, repeats=torch.tensor([2, 2]), dim=1)
#print (imz.shape)
#print (imz)
#sdadsadas
if self.depth:
x = torch.cat([im, imDep], 1)
x = torch.cat((x, clase), 1)
else:
x = torch.cat((im, clase), 1)
##unir layer el rgb de la imagen
x1 = self.conv1(x)
x2 = self.conv2(x1) # self.maxpool1(x1))
x3 = self.conv3(x2) # self.maxpool2(x2))
x4 = self.conv4(x3) # self.maxpool3(x3))
x = self.up1(x4, x3)
x = self.up2(x, x2)
x = self.up3(x, x1)
#x = changeDim(x, im)
x = self.last(x)
#x = x[:, :3, :, :] #cambio teorico
if self.depth:
dep = self.upDep1(x4, x3)
dep = self.upDep2(dep, x2)
dep = self.upDep3(dep, x1)
# x = changeDim(x, im)
dep = self.lastDep(dep)
return x, dep
else:
return x,imDep
class depth_discriminator_UNet(nn.Module):
def __init__(self, input_dim=1, output_dim=1, input_shape=[8, 7, 128, 128], class_num=2, expand_net=2):
super(depth_discriminator_UNet, self).__init__()
self.input_dim = input_dim * 2 + 1
#discriminator_UNet.__init__(self, input_dim=self.input_dim, output_dim=output_dim, input_shape=input_shape,
# class_num=class_num, expand_net = expand_net)
self.output_dim = output_dim
self.input_shape = list(input_shape)
self.class_num = class_num
self.expandNet = expand_net
self.input_dim = input_dim * 2 + 1 # ya que le doy el origen + mapa de profundidad
self.conv1 = UnetConvBlock(self.input_dim, pow(2, self.expandNet), stride=1, dropout=0.3)
self.conv2 = UnetConvBlock(pow(2, self.expandNet), pow(2, self.expandNet + 1), stride=2, dropout=0.2)
self.conv3 = UnetConvBlock(pow(2, self.expandNet + 1), pow(2, self.expandNet + 2), stride=2, dropout=0.2)
self.conv4 = UnetDeSingleConvBlock(pow(2, self.expandNet + 2), pow(2, self.expandNet + 2), stride=2,
dropout=0.3)
self.input_shape[1] = self.input_dim
self.n_size = self._get_conv_output(self.input_shape)
self.fc1 = nn.Sequential(
nn.Linear(self.n_size, 1024),
)
self.BnLr = nn.Sequential(
nn.BatchNorm1d(1024),
nn.LeakyReLU(0.2),
)
self.dc = nn.Sequential(
nn.Linear(1024, self.output_dim),
#nn.Sigmoid(),
)
self.cl = nn.Sequential(
nn.Linear(1024, self.class_num),
# nn.Softmax(dim=1), # poner el que la suma da 1
)
utils.initialize_weights(self)
def _get_conv_output(self, shape):
bs = 1
input = Variable(torch.rand(bs, *shape))
x = input.squeeze()
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = x.view(x.size(0), -1)
return x.shape[1]
def forward(self, input, origen, dep):
# esto va a cambiar cuando tenga color
# if (len(input.shape) <= 3):
# input = input[:, None, :, :]
# im = im[:, None, :, :]
# print("D in shape",input.shape)
# print(input.shape)
# print("this si X:", x)
# print("now shape", x.shape)
x = input
x = torch.cat((x, origen), 1)
x = torch.cat((x, dep), 1)
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = x.view(x.size(0), -1)
features = self.fc1(x)
x = self.BnLr(features)
d = self.dc(x)
c = self.cl(x)
return d, c, features
class depth_discriminator_noclass_UNet(nn.Module):
def __init__(self, input_dim=1, output_dim=1, input_shape=[8, 7, 128, 128], class_num=2, expand_net=2, depth=True, wgan = False):
super(depth_discriminator_noclass_UNet, self).__init__()
#discriminator_UNet.__init__(self, input_dim=self.input_dim, output_dim=output_dim, input_shape=input_shape,
# class_num=class_num, expand_net = expand_net)
self.output_dim = output_dim
self.input_shape = list(input_shape)
self.class_num = class_num
self.expandNet = expand_net
self.depth = depth
self.wgan = wgan
if depth:
self.input_dim = input_dim * 2 + 2 # ya que le doy el origen + Dep + class
else:
self.input_dim = input_dim * 2 + 1 # ya que le doy el origen + class
self.conv1 = UnetConvBlock(self.input_dim, pow(2, self.expandNet), stride=1, dropout=0.0, batch_norm = False )
self.conv2 = UnetConvBlock(pow(2, self.expandNet), pow(2, self.expandNet + 1), stride=2, dropout=0.0, batch_norm = False )
self.conv3 = UnetConvBlock(pow(2, self.expandNet + 1), pow(2, self.expandNet + 2), stride=2, dropout=0.0, batch_norm = False )
self.conv4 = UnetConvBlock(pow(2, self.expandNet + 2), pow(2, self.expandNet + 3), stride=2, dropout=0.0, batch_norm = False )
self.conv5 = UnetDeSingleConvBlock(pow(2, self.expandNet + 3), pow(2, self.expandNet + 2), stride=1, dropout=0.0, batch_norm = False )
self.lastconvs = []
imagesize = self.input_shape[2] / 8
while imagesize > 4:
self.lastconvs.append(UnetDeSingleConvBlock(pow(2, self.expandNet + 2), pow(2, self.expandNet + 2), stride=2, dropout=0.0, batch_norm = False ))
imagesize = imagesize/2
else:
self.lastconvs.append(UnetDeSingleConvBlock(pow(2, self.expandNet + 2), pow(2, self.expandNet + 1), stride=1, dropout=0.0, batch_norm = False ))
self.input_shape[1] = self.input_dim
self.n_size = self._get_conv_output(self.input_shape)
for layer in self.lastconvs:
layer = layer.cuda()
self.fc1 = nn.Sequential(
nn.Linear(self.n_size, 256),
)
self.BnLr = nn.Sequential(
nn.BatchNorm1d(256),
nn.LeakyReLU(0.2),
)
self.dc = nn.Sequential(
nn.Linear(256, self.output_dim),
#nn.Sigmoid(),
)
utils.initialize_weights(self)
def _get_conv_output(self, shape):
bs = 1
input = Variable(torch.rand(bs, *shape))
x = input.squeeze()
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
for layer in self.lastconvs:
x = layer(x)
x = x.view(x.size(0), -1)
return x.shape[1]
def forward(self, input, origen, dep, clase):
# esto va a cambiar cuando tenga color
# if (len(input.shape) <= 3):
# input = input[:, None, :, :]
# im = im[:, None, :, :]
# print("D in shape",input.shape)
# print(input.shape)
# print("this si X:", x)
# print("now shape", x.shape)
x = input
##unir layer el rgb de la imagen
x = torch.cat((x, clase), 1)
x = torch.cat((x, origen), 1)
if self.depth:
x = torch.cat((x, dep), 1)
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
for layer in self.lastconvs:
x = layer(x)
feature_vector = x.view(x.size(0), -1)
x = self.fc1(feature_vector)
x = self.BnLr(x)
d = self.dc(x)
return d, feature_vector