Fraser-Greenlee
add dreamcoder codebase
e1c1753
raw
history blame
19.5 kB
"""
Deprecated network.py module. This file only exists to support backwards-compatibility
with old pickle files. See lib/__init__.py for more information.
"""
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.nn.parameter import Parameter
# UPGRADING TO INPUT -> OUTPUT -> TARGET
# Todo:
# [X] Output attending to input
# [X] Target attending to output
# [ ] check passing hidden state between encoders/decoder (+ pass c?)
# [ ] add v_output
def choose(matrix, idxs):
if isinstance(idxs, Variable):
idxs = idxs.data
assert(matrix.ndimension() == 2)
unrolled_idxs = idxs + \
torch.arange(0, matrix.size(0)).type_as(idxs) * matrix.size(1)
return matrix.view(matrix.nelement())[unrolled_idxs]
class Network(nn.Module):
"""
Todo:
- Beam search
- check if this is right? attend during P->FC rather than during softmax->P?
- allow length 0 inputs/targets
- give n_examples as input to FC
- Initialise new weights randomly, rather than as zeroes
"""
def __init__(
self,
input_vocabulary,
target_vocabulary,
hidden_size=512,
embedding_size=128,
cell_type="LSTM"):
"""
:param list input_vocabulary: list of possible inputs
:param list target_vocabulary: list of possible targets
"""
super(Network, self).__init__()
self.h_input_encoder_size = hidden_size
self.h_output_encoder_size = hidden_size
self.h_decoder_size = hidden_size
self.embedding_size = embedding_size
self.input_vocabulary = input_vocabulary
self.target_vocabulary = target_vocabulary
# Number of tokens in input vocabulary
self.v_input = len(input_vocabulary)
# Number of tokens in target vocabulary
self.v_target = len(target_vocabulary)
self.cell_type = cell_type
if cell_type == 'GRU':
self.input_encoder_cell = nn.GRUCell(
input_size=self.v_input + 1,
hidden_size=self.h_input_encoder_size,
bias=True)
self.input_encoder_init = Parameter(
torch.rand(1, self.h_input_encoder_size))
self.output_encoder_cell = nn.GRUCell(
input_size=self.v_input +
1 +
self.h_input_encoder_size,
hidden_size=self.h_output_encoder_size,
bias=True)
self.decoder_cell = nn.GRUCell(
input_size=self.v_target + 1,
hidden_size=self.h_decoder_size,
bias=True)
if cell_type == 'LSTM':
self.input_encoder_cell = nn.LSTMCell(
input_size=self.v_input + 1,
hidden_size=self.h_input_encoder_size,
bias=True)
self.input_encoder_init = nn.ParameterList([Parameter(torch.rand(
1, self.h_input_encoder_size)), Parameter(torch.rand(1, self.h_input_encoder_size))])
self.output_encoder_cell = nn.LSTMCell(
input_size=self.v_input +
1 +
self.h_input_encoder_size,
hidden_size=self.h_output_encoder_size,
bias=True)
self.output_encoder_init_c = Parameter(
torch.rand(1, self.h_output_encoder_size))
self.decoder_cell = nn.LSTMCell(
input_size=self.v_target + 1,
hidden_size=self.h_decoder_size,
bias=True)
self.decoder_init_c = Parameter(torch.rand(1, self.h_decoder_size))
self.W = nn.Linear(
self.h_output_encoder_size +
self.h_decoder_size,
self.embedding_size)
self.V = nn.Linear(self.embedding_size, self.v_target + 1)
self.input_A = nn.Bilinear(
self.h_input_encoder_size,
self.h_output_encoder_size,
1,
bias=False)
self.output_A = nn.Bilinear(
self.h_output_encoder_size,
self.h_decoder_size,
1,
bias=False)
self.input_EOS = torch.zeros(1, self.v_input + 1)
self.input_EOS[:, -1] = 1
self.input_EOS = Parameter(self.input_EOS)
self.output_EOS = torch.zeros(1, self.v_input + 1)
self.output_EOS[:, -1] = 1
self.output_EOS = Parameter(self.output_EOS)
self.target_EOS = torch.zeros(1, self.v_target + 1)
self.target_EOS[:, -1] = 1
self.target_EOS = Parameter(self.target_EOS)
def __getstate__(self):
if hasattr(self, 'opt'):
return dict([(k, v) for k, v in self.__dict__.items(
) if k is not 'opt'] + [('optstate', self.opt.state_dict())])
# return {**{k:v for k,v in self.__dict__.items() if k is not 'opt'},
# 'optstate': self.opt.state_dict()}
else:
return self.__dict__
def __setstate__(self, state):
self.__dict__.update(state)
# Legacy:
if isinstance(self.input_encoder_init, tuple):
self.input_encoder_init = nn.ParameterList(
list(self.input_encoder_init))
def clear_optimiser(self):
if hasattr(self, 'opt'):
del self.opt
if hasattr(self, 'optstate'):
del self.optstate
def get_optimiser(self):
self.opt = torch.optim.Adam(self.parameters(), lr=0.001)
if hasattr(self, 'optstate'):
self.opt.load_state_dict(self.optstate)
def optimiser_step(self, inputs, outputs, target):
if not hasattr(self, 'opt'):
self.get_optimiser()
score = self.score(inputs, outputs, target, autograd=True).mean()
(-score).backward()
self.opt.step()
self.opt.zero_grad()
return score.data[0]
def set_target_vocabulary(self, target_vocabulary):
if target_vocabulary == self.target_vocabulary:
return
V_weight = []
V_bias = []
decoder_ih = []
for i in range(len(target_vocabulary)):
if target_vocabulary[i] in self.target_vocabulary:
j = self.target_vocabulary.index(target_vocabulary[i])
V_weight.append(self.V.weight.data[j:j + 1])
V_bias.append(self.V.bias.data[j:j + 1])
decoder_ih.append(self.decoder_cell.weight_ih.data[:, j:j + 1])
else:
V_weight.append(torch.zeros(1, self.V.weight.size(1)))
V_bias.append(torch.ones(1) * -10)
decoder_ih.append(
torch.zeros(
self.decoder_cell.weight_ih.data.size(0), 1))
V_weight.append(self.V.weight.data[-1:])
V_bias.append(self.V.bias.data[-1:])
decoder_ih.append(self.decoder_cell.weight_ih.data[:, -1:])
self.target_vocabulary = target_vocabulary
self.v_target = len(target_vocabulary)
self.target_EOS.data = torch.zeros(1, self.v_target + 1)
self.target_EOS.data[:, -1] = 1
self.V.weight.data = torch.cat(V_weight, dim=0)
self.V.bias.data = torch.cat(V_bias, dim=0)
self.V.out_features = self.V.bias.data.size(0)
self.decoder_cell.weight_ih.data = torch.cat(decoder_ih, dim=1)
self.decoder_cell.input_size = self.decoder_cell.weight_ih.data.size(1)
self.clear_optimiser()
def input_encoder_get_init(self, batch_size):
if self.cell_type == "GRU":
return self.input_encoder_init.repeat(batch_size, 1)
if self.cell_type == "LSTM":
return tuple(x.repeat(batch_size, 1)
for x in self.input_encoder_init)
def output_encoder_get_init(self, input_encoder_h):
if self.cell_type == "GRU":
return input_encoder_h
if self.cell_type == "LSTM":
return (
input_encoder_h,
self.output_encoder_init_c.repeat(
input_encoder_h.size(0),
1))
def decoder_get_init(self, output_encoder_h):
if self.cell_type == "GRU":
return output_encoder_h
if self.cell_type == "LSTM":
return (
output_encoder_h,
self.decoder_init_c.repeat(
output_encoder_h.size(0),
1))
def cell_get_h(self, cell_state):
if self.cell_type == "GRU":
return cell_state
if self.cell_type == "LSTM":
return cell_state[0]
def score(self, inputs, outputs, target, autograd=False):
inputs = self.inputsToTensors(inputs)
outputs = self.inputsToTensors(outputs)
target = self.targetToTensor(target)
target, score = self.run(inputs, outputs, target=target, mode="score")
# target = self.tensorToOutput(target)
if autograd:
return score
else:
return score.data
def sample(self, inputs, outputs):
inputs = self.inputsToTensors(inputs)
outputs = self.inputsToTensors(outputs)
target, score = self.run(inputs, outputs, mode="sample")
target = self.tensorToOutput(target)
return target
def sampleAndScore(self, inputs, outputs, nRepeats=None):
inputs = self.inputsToTensors(inputs)
outputs = self.inputsToTensors(outputs)
if nRepeats is None:
target, score = self.run(inputs, outputs, mode="sample")
target = self.tensorToOutput(target)
return target, score.data
else:
target = []
score = []
for i in range(nRepeats):
# print("repeat %d" % i)
t, s = self.run(inputs, outputs, mode="sample")
t = self.tensorToOutput(t)
target.extend(t)
score.extend(list(s.data))
return target, score
def run(self, inputs, outputs, target=None, mode="sample"):
"""
:param mode: "score" returns log p(target|input), "sample" returns target ~ p(-|input)
:param List[LongTensor] inputs: n_examples * (max_length_input * batch_size)
:param List[LongTensor] target: max_length_target * batch_size
"""
assert((mode == "score" and target is not None) or mode == "sample")
n_examples = len(inputs)
max_length_input = [inputs[j].size(0) for j in range(n_examples)]
max_length_output = [outputs[j].size(0) for j in range(n_examples)]
max_length_target = target.size(0) if target is not None else 10
batch_size = inputs[0].size(1)
score = Variable(torch.zeros(batch_size))
inputs_scatter = [Variable(torch.zeros(max_length_input[j], batch_size, self.v_input + 1).scatter_(
2, inputs[j][:, :, None], 1)) for j in range(n_examples)] # n_examples * (max_length_input * batch_size * v_input+1)
outputs_scatter = [Variable(torch.zeros(max_length_output[j], batch_size, self.v_input + 1).scatter_(
2, outputs[j][:, :, None], 1)) for j in range(n_examples)] # n_examples * (max_length_output * batch_size * v_input+1)
if target is not None:
target_scatter = Variable(torch.zeros(max_length_target,
batch_size,
self.v_target + 1).scatter_(2,
target[:,
:,
None],
1)) # max_length_target * batch_size * v_target+1
# -------------- Input Encoder -------------
# n_examples * (max_length_input * batch_size * h_encoder_size)
input_H = []
input_embeddings = [] # h for example at INPUT_EOS
# 0 until (and including) INPUT_EOS, then -inf
input_attention_mask = []
for j in range(n_examples):
active = torch.Tensor(max_length_input[j], batch_size).byte()
active[0, :] = 1
state = self.input_encoder_get_init(batch_size)
hs = []
for i in range(max_length_input[j]):
state = self.input_encoder_cell(
inputs_scatter[j][i, :, :], state)
if i + 1 < max_length_input[j]:
active[i + 1, :] = active[i, :] * \
(inputs[j][i, :] != self.v_input)
h = self.cell_get_h(state)
hs.append(h[None, :, :])
input_H.append(torch.cat(hs, 0))
embedding_idx = active.sum(0).long() - 1
embedding = input_H[j].gather(0, Variable(
embedding_idx[None, :, None].repeat(1, 1, self.h_input_encoder_size)))[0]
input_embeddings.append(embedding)
input_attention_mask.append(Variable(active.float().log()))
# -------------- Output Encoder -------------
def input_attend(j, h_out):
"""
'general' attention from https://arxiv.org/pdf/1508.04025.pdf
:param j: Index of example
:param h_out: batch_size * h_output_encoder_size
"""
scores = self.input_A(
input_H[j].view(
max_length_input[j] * batch_size,
self.h_input_encoder_size),
h_out.view(
batch_size,
self.h_output_encoder_size).repeat(
max_length_input[j],
1)).view(
max_length_input[j],
batch_size) + input_attention_mask[j]
c = (F.softmax(scores[:, :, None], dim=0) * input_H[j]).sum(0)
return c
# n_examples * (max_length_input * batch_size * h_encoder_size)
output_H = []
output_embeddings = [] # h for example at INPUT_EOS
# 0 until (and including) INPUT_EOS, then -inf
output_attention_mask = []
for j in range(n_examples):
active = torch.Tensor(max_length_output[j], batch_size).byte()
active[0, :] = 1
state = self.output_encoder_get_init(input_embeddings[j])
hs = []
h = self.cell_get_h(state)
for i in range(max_length_output[j]):
state = self.output_encoder_cell(torch.cat(
[outputs_scatter[j][i, :, :], input_attend(j, h)], 1), state)
if i + 1 < max_length_output[j]:
active[i + 1, :] = active[i, :] * \
(outputs[j][i, :] != self.v_input)
h = self.cell_get_h(state)
hs.append(h[None, :, :])
output_H.append(torch.cat(hs, 0))
embedding_idx = active.sum(0).long() - 1
embedding = output_H[j].gather(0, Variable(
embedding_idx[None, :, None].repeat(1, 1, self.h_output_encoder_size)))[0]
output_embeddings.append(embedding)
output_attention_mask.append(Variable(active.float().log()))
# ------------------ Decoder -----------------
def output_attend(j, h_dec):
"""
'general' attention from https://arxiv.org/pdf/1508.04025.pdf
:param j: Index of example
:param h_dec: batch_size * h_decoder_size
"""
scores = self.output_A(
output_H[j].view(
max_length_output[j] * batch_size,
self.h_output_encoder_size),
h_dec.view(
batch_size,
self.h_decoder_size).repeat(
max_length_output[j],
1)).view(
max_length_output[j],
batch_size) + output_attention_mask[j]
c = (F.softmax(scores[:, :, None], dim=0) * output_H[j]).sum(0)
return c
# Multi-example pooling: Figure 3, https://arxiv.org/pdf/1703.07469.pdf
target = target if mode == "score" else torch.zeros(
max_length_target, batch_size).long()
decoder_states = [
self.decoder_get_init(
output_embeddings[j]) for j in range(n_examples)] # P
active = torch.ones(batch_size).byte()
for i in range(max_length_target):
FC = []
for j in range(n_examples):
h = self.cell_get_h(decoder_states[j])
p_aug = torch.cat([h, output_attend(j, h)], 1)
FC.append(F.tanh(self.W(p_aug)[None, :, :]))
# batch_size * embedding_size
m = torch.max(torch.cat(FC, 0), 0)[0]
logsoftmax = F.log_softmax(self.V(m), dim=1)
if mode == "sample":
target[i, :] = torch.multinomial(
logsoftmax.data.exp(), 1)[:, 0]
score = score + \
choose(logsoftmax, target[i, :]) * Variable(active.float())
active *= (target[i, :] != self.v_target)
for j in range(n_examples):
if mode == "score":
target_char_scatter = target_scatter[i, :, :]
elif mode == "sample":
target_char_scatter = Variable(torch.zeros(
batch_size, self.v_target + 1).scatter_(1, target[i, :, None], 1))
decoder_states[j] = self.decoder_cell(
target_char_scatter, decoder_states[j])
return target, score
def inputsToTensors(self, inputss):
"""
:param inputss: size = nBatch * nExamples
"""
tensors = []
for j in range(len(inputss[0])):
inputs = [x[j] for x in inputss]
maxlen = max(len(s) for s in inputs)
t = torch.ones(
1 if maxlen == 0 else maxlen + 1,
len(inputs)).long() * self.v_input
for i in range(len(inputs)):
s = inputs[i]
if len(s) > 0:
t[:len(s), i] = torch.LongTensor(
[self.input_vocabulary.index(x) for x in s])
tensors.append(t)
return tensors
def targetToTensor(self, targets):
"""
:param targets:
"""
maxlen = max(len(s) for s in targets)
t = torch.ones(
1 if maxlen == 0 else maxlen + 1,
len(targets)).long() * self.v_target
for i in range(len(targets)):
s = targets[i]
if len(s) > 0:
t[:len(s), i] = torch.LongTensor(
[self.target_vocabulary.index(x) for x in s])
return t
def tensorToOutput(self, tensor):
"""
:param tensor: max_length * batch_size
"""
out = []
for i in range(tensor.size(1)):
l = tensor[:, i].tolist()
if l[0] == self.v_target:
out.append([])
elif self.v_target in l:
final = tensor[:, i].tolist().index(self.v_target)
out.append([self.target_vocabulary[x]
for x in tensor[:final, i]])
else:
out.append([self.target_vocabulary[x] for x in tensor[:, i]])
return out