Spaces:
Running
Running
# Copyright 2016 Google Inc. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""A library showing off sequence recognition and generation with the simple | |
example of names. | |
We use recurrent neural nets to learn complex functions able to recognize and | |
generate sequences of a given form. This can be used for natural language | |
syntax recognition, dynamically generating maps or puzzles and of course | |
baby name generation. | |
Before using this module, it is recommended to read the Tensorflow tutorial on | |
recurrent neural nets, as it explains the basic concepts of this model, and | |
will show off another module, the PTB module on which this model bases itself. | |
Here is an overview of the functions available in this module: | |
* RNN Module for sequence functions based on PTB | |
* Name recognition specifically for recognizing names, but can be adapted to | |
recognizing sequence patterns | |
* Name generations specifically for generating names, but can be adapted to | |
generating arbitrary sequence patterns | |
""" | |
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
import time | |
import tensorflow as tf | |
import numpy as np | |
from model import NamignizerModel | |
import data_utils | |
class SmallConfig(object): | |
"""Small config.""" | |
init_scale = 0.1 | |
learning_rate = 1.0 | |
max_grad_norm = 5 | |
num_layers = 2 | |
num_steps = 20 | |
hidden_size = 200 | |
max_epoch = 4 | |
max_max_epoch = 13 | |
keep_prob = 1.0 | |
lr_decay = 0.5 | |
batch_size = 20 | |
vocab_size = 27 | |
epoch_size = 100 | |
class LargeConfig(object): | |
"""Medium config.""" | |
init_scale = 0.05 | |
learning_rate = 1.0 | |
max_grad_norm = 5 | |
num_layers = 2 | |
num_steps = 35 | |
hidden_size = 650 | |
max_epoch = 6 | |
max_max_epoch = 39 | |
keep_prob = 0.5 | |
lr_decay = 0.8 | |
batch_size = 20 | |
vocab_size = 27 | |
epoch_size = 100 | |
class TestConfig(object): | |
"""Tiny config, for testing.""" | |
init_scale = 0.1 | |
learning_rate = 1.0 | |
max_grad_norm = 1 | |
num_layers = 1 | |
num_steps = 2 | |
hidden_size = 2 | |
max_epoch = 1 | |
max_max_epoch = 1 | |
keep_prob = 1.0 | |
lr_decay = 0.5 | |
batch_size = 20 | |
vocab_size = 27 | |
epoch_size = 100 | |
def run_epoch(session, m, names, counts, epoch_size, eval_op, verbose=False): | |
"""Runs the model on the given data for one epoch | |
Args: | |
session: the tf session holding the model graph | |
m: an instance of the NamignizerModel | |
names: a set of lowercase names of 26 characters | |
counts: a list of the frequency of the above names | |
epoch_size: the number of batches to run | |
eval_op: whether to change the params or not, and how to do it | |
Kwargs: | |
verbose: whether to print out state of training during the epoch | |
Returns: | |
cost: the average cost during the last stage of the epoch | |
""" | |
start_time = time.time() | |
costs = 0.0 | |
iters = 0 | |
for step, (x, y) in enumerate(data_utils.namignizer_iterator(names, counts, | |
m.batch_size, m.num_steps, epoch_size)): | |
cost, _ = session.run([m.cost, eval_op], | |
{m.input_data: x, | |
m.targets: y, | |
m.weights: np.ones(m.batch_size * m.num_steps)}) | |
costs += cost | |
iters += m.num_steps | |
if verbose and step % (epoch_size // 10) == 9: | |
print("%.3f perplexity: %.3f speed: %.0f lps" % | |
(step * 1.0 / epoch_size, np.exp(costs / iters), | |
iters * m.batch_size / (time.time() - start_time))) | |
if step >= epoch_size: | |
break | |
return np.exp(costs / iters) | |
def train(data_dir, checkpoint_path, config): | |
"""Trains the model with the given data | |
Args: | |
data_dir: path to the data for the model (see data_utils for data | |
format) | |
checkpoint_path: the path to save the trained model checkpoints | |
config: one of the above configs that specify the model and how it | |
should be run and trained | |
Returns: | |
None | |
""" | |
# Prepare Name data. | |
print("Reading Name data in %s" % data_dir) | |
names, counts = data_utils.read_names(data_dir) | |
with tf.Graph().as_default(), tf.Session() as session: | |
initializer = tf.random_uniform_initializer(-config.init_scale, | |
config.init_scale) | |
with tf.variable_scope("model", reuse=None, initializer=initializer): | |
m = NamignizerModel(is_training=True, config=config) | |
tf.global_variables_initializer().run() | |
for i in range(config.max_max_epoch): | |
lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) | |
m.assign_lr(session, config.learning_rate * lr_decay) | |
print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) | |
train_perplexity = run_epoch(session, m, names, counts, config.epoch_size, m.train_op, | |
verbose=True) | |
print("Epoch: %d Train Perplexity: %.3f" % | |
(i + 1, train_perplexity)) | |
m.saver.save(session, checkpoint_path, global_step=i) | |
def namignize(names, checkpoint_path, config): | |
"""Recognizes names and prints the Perplexity of the model for each names | |
in the list | |
Args: | |
names: a list of names in the model format | |
checkpoint_path: the path to restore the trained model from, should not | |
include the model name, just the path to | |
config: one of the above configs that specify the model and how it | |
should be run and trained | |
Returns: | |
None | |
""" | |
with tf.Graph().as_default(), tf.Session() as session: | |
with tf.variable_scope("model"): | |
m = NamignizerModel(is_training=False, config=config) | |
m.saver.restore(session, checkpoint_path) | |
for name in names: | |
x, y = data_utils.name_to_batch(name, m.batch_size, m.num_steps) | |
cost, loss, _ = session.run([m.cost, m.loss, tf.no_op()], | |
{m.input_data: x, | |
m.targets: y, | |
m.weights: np.concatenate(( | |
np.ones(len(name)), np.zeros(m.batch_size * m.num_steps - len(name))))}) | |
print("Name {} gives us a perplexity of {}".format( | |
name, np.exp(cost))) | |
def namignator(checkpoint_path, config): | |
"""Generates names randomly according to a given model | |
Args: | |
checkpoint_path: the path to restore the trained model from, should not | |
include the model name, just the path to | |
config: one of the above configs that specify the model and how it | |
should be run and trained | |
Returns: | |
None | |
""" | |
# mutate the config to become a name generator config | |
config.num_steps = 1 | |
config.batch_size = 1 | |
with tf.Graph().as_default(), tf.Session() as session: | |
with tf.variable_scope("model"): | |
m = NamignizerModel(is_training=False, config=config) | |
m.saver.restore(session, checkpoint_path) | |
activations, final_state, _ = session.run([m.activations, m.final_state, tf.no_op()], | |
{m.input_data: np.zeros((1, 1)), | |
m.targets: np.zeros((1, 1)), | |
m.weights: np.ones(1)}) | |
# sample from our softmax activations | |
next_letter = np.random.choice(27, p=activations[0]) | |
name = [next_letter] | |
while next_letter != 0: | |
activations, final_state, _ = session.run([m.activations, m.final_state, tf.no_op()], | |
{m.input_data: [[next_letter]], | |
m.targets: np.zeros((1, 1)), | |
m.initial_state: final_state, | |
m.weights: np.ones(1)}) | |
next_letter = np.random.choice(27, p=activations[0]) | |
name += [next_letter] | |
print(map(lambda x: chr(x + 96), name)) | |
if __name__ == "__main__": | |
train("data/SmallNames.txt", "model/namignizer", SmallConfig) | |
namignize(["mary", "ida", "gazorbazorb", "mmmhmm", "bob"], | |
tf.train.latest_checkpoint("model"), SmallConfig) | |
namignator(tf.train.latest_checkpoint("model"), SmallConfig) | |