Spaces:

NCTCMumbai
/

NCTC

Running

App Files Files Community

NCTC / models /research /cognitive_planning /policies.py

NCTCMumbai

Upload 2571 files

0b8359d over 1 year ago

raw

history blame

19 kB

	# Copyright 2018 The TensorFlow Authors All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================

	"""Interface for the policy of the agents use for navigation."""

	import abc
	import tensorflow as tf
	from absl import logging
	import embedders
	from envs import task_env

	slim = tf.contrib.slim

	def _print_debug_ios(history, goal, output):
	"""Prints sizes of history, goal and outputs."""
	if history is not None:
	shape = history.get_shape().as_list()
	# logging.info('history embedding shape ')
	# logging.info(shape)
	if len(shape) != 3:
	raise ValueError('history Tensor must have rank=3')
	if goal is not None:
	logging.info('goal embedding shape ')
	logging.info(goal.get_shape().as_list())
	if output is not None:
	logging.info('targets shape ')
	logging.info(output.get_shape().as_list())


	class Policy(object):
	"""Represents the policy of the agent for navigation tasks.

	Instantiates a policy that takes embedders for each modality and builds a
	model to infer the actions.
	"""
	__metaclass__ = abc.ABCMeta

	def __init__(self, embedders_dict, action_size):
	"""Instantiates the policy.

	Args:
	embedders_dict: Dictionary of embedders for different modalities. Keys
	should be identical to keys of observation modality.
	action_size: Number of possible actions.
	"""
	self._embedders = embedders_dict
	self._action_size = action_size

	@abc.abstractmethod
	def build(self, observations, prev_state):
	"""Builds the model that represents the policy of the agent.

	Args:
	observations: Dictionary of observations from different modalities. Keys
	are the name of the modalities.
	prev_state: The tensor of the previous state of the model. Should be set
	to None if the policy is stateless
	Returns:
	Tuple of (action, state) where action is the action logits and state is
	the state of the model after taking new observation.
	"""
	raise NotImplementedError(
	'Needs implementation as part of Policy interface')


	class LSTMPolicy(Policy):
	"""Represents the implementation of the LSTM based policy.

	The architecture of the model is as follows. It embeds all the observations
	using the embedders, concatenates the embeddings of all the modalities. Feed
	them through two fully connected layers. The lstm takes the features from
	fully connected layer and the previous action and success of previous action
	and feed them to LSTM. The value for each action is predicted afterwards.
	Although the class name has the word LSTM in it, it also supports a mode that
	builds the network without LSTM just for comparison purposes.
	"""

	def __init__(self,
	modality_names,
	embedders_dict,
	action_size,
	params,
	max_episode_length,
	feedforward_mode=False):
	"""Instantiates the LSTM policy.

	Args:
	modality_names: List of modality names. Makes sure the ordering in
	concatenation remains the same as modality_names list. Each modality
	needs to be in the embedders_dict.
	embedders_dict: Dictionary of embedders for different modalities. Keys
	should be identical to keys of observation modality. Values should be
	instance of Embedder class. All the observations except PREV_ACTION
	requires embedder.
	action_size: Number of possible actions.
	params: is instance of tf.hparams and contains the hyperparameters for the
	policy network.
	max_episode_length: integer, specifying the maximum length of each
	episode.
	feedforward_mode: If True, it does not add LSTM to the model. It should
	only be set True for comparison between LSTM and feedforward models.
	"""
	super(LSTMPolicy, self).__init__(embedders_dict, action_size)

	self._modality_names = modality_names

	self._lstm_state_size = params.lstm_state_size
	self._fc_channels = params.fc_channels
	self._weight_decay = params.weight_decay
	self._target_embedding_size = params.target_embedding_size
	self._max_episode_length = max_episode_length
	self._feedforward_mode = feedforward_mode

	def _build_lstm(self, encoded_inputs, prev_state, episode_length,
	prev_action=None):
	"""Builds an LSTM on top of the encoded inputs.

	If prev_action is not None then it concatenates them to the input of LSTM.

	Args:
	encoded_inputs: The embedding of the observations and goal.
	prev_state: previous state of LSTM.
	episode_length: The tensor that contains the length of the sequence for
	each element of the batch.
	prev_action: tensor to previous chosen action and additional bit for
	indicating whether the previous action was successful or not.

	Returns:
	a tuple of (lstm output, lstm state).
	"""

	# Adding prev action and success in addition to the embeddings of the
	# modalities.
	if prev_action is not None:
	encoded_inputs = tf.concat([encoded_inputs, prev_action], axis=-1)

	with tf.variable_scope('LSTM'):
	lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self._lstm_state_size)
	if prev_state is None:
	# If prev state is set to None, a state of all zeros will be
	# passed as a previous value for the cell. Should be used for the
	# first step of each episode.
	tf_prev_state = lstm_cell.zero_state(
	encoded_inputs.get_shape().as_list()[0], dtype=tf.float32)
	else:
	tf_prev_state = tf.nn.rnn_cell.LSTMStateTuple(prev_state[0],
	prev_state[1])

	lstm_outputs, lstm_state = tf.nn.dynamic_rnn(
	cell=lstm_cell,
	inputs=encoded_inputs,
	sequence_length=episode_length,
	initial_state=tf_prev_state,
	dtype=tf.float32,
	)
	lstm_outputs = tf.reshape(lstm_outputs, [-1, lstm_cell.output_size])
	return lstm_outputs, lstm_state

	def build(
	self,
	observations,
	prev_state,
	):
	"""Builds the model that represents the policy of the agent.

	Args:
	observations: Dictionary of observations from different modalities. Keys
	are the name of the modalities. Observation should have the following
	key-values.
	observations['goal']: One-hot tensor that indicates the semantic
	category of the goal. The shape should be
	(batch_size x max_sequence_length x goals).
	observations[task_env.ModalityTypes.PREV_ACTION]: has action_size + 1
	elements where the first action_size numbers are the one hot vector
	of the previous action and the last element indicates whether the
	previous action was successful or not. If
	task_env.ModalityTypes.PREV_ACTION is not in the observation, it
	will not be used in the policy.
	prev_state: Previous state of the model. It should be a tuple of (c,h)
	where c and h are the previous cell value and hidden state of the lstm.
	Each element of tuple has shape of (batch_size x lstm_cell_size).
	If it is set to None, then it initializes the state of the lstm with all
	zeros.

	Returns:
	Tuple of (action, state) where action is the action logits and state is
	the state of the model after taking new observation.
	Raises:
	ValueError: If any of the modality names is not in observations or
	embedders_dict.
	ValueError: If 'goal' is not in the observations.
	"""

	for modality_name in self._modality_names:
	if modality_name not in observations:
	raise ValueError('modality name does not exist in observations: {} not '
	'in {}'.format(modality_name, observations.keys()))
	if modality_name not in self._embedders:
	if modality_name == task_env.ModalityTypes.PREV_ACTION:
	continue
	raise ValueError('modality name does not have corresponding embedder'
	' {} not in {}'.format(modality_name,
	self._embedders.keys()))

	if task_env.ModalityTypes.GOAL not in observations:
	raise ValueError('goal should be provided in the observations')

	goal = observations[task_env.ModalityTypes.GOAL]
	prev_action = None
	if task_env.ModalityTypes.PREV_ACTION in observations:
	prev_action = observations[task_env.ModalityTypes.PREV_ACTION]

	with tf.variable_scope('policy'):
	with slim.arg_scope(
	[slim.fully_connected],
	activation_fn=tf.nn.relu,
	weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
	weights_regularizer=slim.l2_regularizer(self._weight_decay)):
	all_inputs = []

	# Concatenating the embedding of each modality by applying the embedders
	# to corresponding observations.
	def embed(name):
	with tf.variable_scope('embed_{}'.format(name)):
	# logging.info('Policy uses embedding %s', name)
	return self._embedders[name].build(observations[name])

	all_inputs = map(embed, [
	x for x in self._modality_names
	if x != task_env.ModalityTypes.PREV_ACTION
	])

	# Computing goal embedding.
	shape = goal.get_shape().as_list()
	with tf.variable_scope('embed_goal'):
	encoded_goal = tf.reshape(goal, [shape[0] * shape[1], -1])
	encoded_goal = slim.fully_connected(encoded_goal,
	self._target_embedding_size)
	encoded_goal = tf.reshape(encoded_goal, [shape[0], shape[1], -1])
	all_inputs.append(encoded_goal)

	# Concatenating all the modalities and goal.
	all_inputs = tf.concat(all_inputs, axis=-1, name='concat_embeddings')

	shape = all_inputs.get_shape().as_list()
	all_inputs = tf.reshape(all_inputs, [shape[0] * shape[1], shape[2]])

	# Applying fully connected layers.
	encoded_inputs = slim.fully_connected(all_inputs, self._fc_channels)
	encoded_inputs = slim.fully_connected(encoded_inputs, self._fc_channels)

	if not self._feedforward_mode:
	encoded_inputs = tf.reshape(encoded_inputs,
	[shape[0], shape[1], self._fc_channels])
	lstm_outputs, lstm_state = self._build_lstm(
	encoded_inputs=encoded_inputs,
	prev_state=prev_state,
	episode_length=tf.ones((shape[0],), dtype=tf.float32) *
	self._max_episode_length,
	prev_action=prev_action,
	)
	else:
	# If feedforward_mode=True, directly compute bypass the whole LSTM
	# computations.
	lstm_outputs = encoded_inputs

	lstm_outputs = slim.fully_connected(lstm_outputs, self._fc_channels)
	action_values = slim.fully_connected(
	lstm_outputs, self._action_size, activation_fn=None)
	action_values = tf.reshape(action_values, [shape[0], shape[1], -1])
	if not self._feedforward_mode:
	return action_values, lstm_state
	else:
	return action_values, None


	class TaskPolicy(Policy):
	"""A covenience abstract class providing functionality to deal with Tasks."""

	def __init__(self,
	task_config,
	model_hparams=None,
	embedder_hparams=None,
	train_hparams=None):
	"""Constructs a policy which knows how to work with tasks (see tasks.py).

	It allows to read task history, goal and outputs in consistency with the
	task config.

	Args:
	task_config: an object of type tasks.TaskIOConfig (see tasks.py)
	model_hparams: a tf.HParams object containing parameter pertaining to
	model (these are implementation specific)
	embedder_hparams: a tf.HParams object containing parameter pertaining to
	history, goal embedders (these are implementation specific)
	train_hparams: a tf.HParams object containing parameter pertaining to
	trainin (these are implementation specific)`
	"""
	super(TaskPolicy, self).__init__(None, None)
	self._model_hparams = model_hparams
	self._embedder_hparams = embedder_hparams
	self._train_hparams = train_hparams
	self._task_config = task_config
	self._extra_train_ops = []

	@property
	def extra_train_ops(self):
	"""Training ops in addition to the loss, e.g. batch norm updates.

	Returns:
	A list of tf ops.
	"""
	return self._extra_train_ops

	def _embed_task_ios(self, streams):
	"""Embeds a list of heterogenous streams.

	These streams correspond to task history, goal and output. The number of
	streams is equal to the total number of history, plus one for the goal if
	present, plus one for the output. If the number of history is k, then the
	first k streams are the history.

	The used embedders depend on the input (or goal) types. If an input is an
	image, then a ResNet embedder is used, otherwise
	MLPEmbedder (see embedders.py).

	Args:
	streams: a list of Tensors.
	Returns:
	Three float Tensors history, goal, output. If there are no history, or no
	goal, then the corresponding returned values are None. The shape of the
	embedded history is batch_size x sequence_length x sum of all embedding
	dimensions for all history. The shape of the goal is embedding dimension.
	"""
	# EMBED history.
	index = 0
	inps = []
	scopes = []
	for c in self._task_config.inputs:
	if c == task_env.ModalityTypes.IMAGE:
	scope_name = 'image_embedder/image'
	reuse = scope_name in scopes
	scopes.append(scope_name)
	with tf.variable_scope(scope_name, reuse=reuse):
	resnet_embedder = embedders.ResNet(self._embedder_hparams.image)
	image_embeddings = resnet_embedder.build(streams[index])
	# Uncover batch norm ops.
	if self._embedder_hparams.image.is_train:
	self._extra_train_ops += resnet_embedder.extra_train_ops
	inps.append(image_embeddings)
	index += 1
	else:
	scope_name = 'input_embedder/vector'
	reuse = scope_name in scopes
	scopes.append(scope_name)
	with tf.variable_scope(scope_name, reuse=reuse):
	input_vector_embedder = embedders.MLPEmbedder(
	layers=self._embedder_hparams.vector)
	vector_embedder = input_vector_embedder.build(streams[index])
	inps.append(vector_embedder)
	index += 1
	history = tf.concat(inps, axis=2) if inps else None

	# EMBED goal.
	goal = None
	if self._task_config.query is not None:
	scope_name = 'image_embedder/query'
	reuse = scope_name in scopes
	scopes.append(scope_name)
	with tf.variable_scope(scope_name, reuse=reuse):
	resnet_goal_embedder = embedders.ResNet(self._embedder_hparams.goal)
	goal = resnet_goal_embedder.build(streams[index])
	if self._embedder_hparams.goal.is_train:
	self._extra_train_ops += resnet_goal_embedder.extra_train_ops
	index += 1

	# Embed true targets if needed (tbd).
	true_target = streams[index]

	return history, goal, true_target

	@abc.abstractmethod
	def build(self, feeds, prev_state):
	pass


	class ReactivePolicy(TaskPolicy):
	"""A policy which ignores history.

	It processes only the current observation (last element in history) and the
	goal to output a prediction.
	"""

	def __init__(self, args, *kwargs):
	super(ReactivePolicy, self).__init__(args, *kwargs)

	# The current implementation ignores the prev_state as it is purely reactive.
	# It returns None for the current state.
	def build(self, feeds, prev_state):
	history, goal, _ = self._embed_task_ios(feeds)
	_print_debug_ios(history, goal, None)

	with tf.variable_scope('output_decoder'):
	# Concatenate the embeddings of the current observation and the goal.
	reactive_input = tf.concat([tf.squeeze(history[:, -1, :]), goal], axis=1)
	oconfig = self._task_config.output.shape
	assert len(oconfig) == 1
	decoder = embedders.MLPEmbedder(
	layers=self._embedder_hparams.predictions.layer_sizes + oconfig)
	predictions = decoder.build(reactive_input)

	return predictions, None


	class RNNPolicy(TaskPolicy):
	"""A policy which takes into account the full history via RNN.

	The implementation might and will change.
	The history, together with the goal, is processed using a stacked LSTM. The
	output of the last LSTM step is used to produce a prediction. Currently, only
	a single step output is supported.
	"""

	def __init__(self, lstm_hparams, args, *kwargs):
	super(RNNPolicy, self).__init__(args, *kwargs)
	self._lstm_hparams = lstm_hparams

	# The prev_state is ignored as for now the full history is specified as first
	# element of the feeds. It might turn out to be beneficial to keep the state
	# as part of the policy object.
	def build(self, feeds, state):
	history, goal, _ = self._embed_task_ios(feeds)
	_print_debug_ios(history, goal, None)

	params = self._lstm_hparams
	cell = lambda: tf.contrib.rnn.BasicLSTMCell(params.cell_size)
	stacked_lstm = tf.contrib.rnn.MultiRNNCell(
	[cell() for _ in range(params.num_layers)])
	# history is of shape batch_size x seq_len x embedding_dimension
	batch_size, seq_len, _ = tuple(history.get_shape().as_list())

	if state is None:
	state = stacked_lstm.zero_state(batch_size, tf.float32)
	for t in range(seq_len):
	if params.concat_goal_everywhere:
	lstm_input = tf.concat([tf.squeeze(history[:, t, :]), goal], axis=1)
	else:
	lstm_input = tf.squeeze(history[:, t, :])
	output, state = stacked_lstm(lstm_input, state)

	with tf.variable_scope('output_decoder'):
	oconfig = self._task_config.output.shape
	assert len(oconfig) == 1
	features = tf.concat([output, goal], axis=1)
	assert len(output.get_shape().as_list()) == 2
	assert len(goal.get_shape().as_list()) == 2
	decoder = embedders.MLPEmbedder(
	layers=self._embedder_hparams.predictions.layer_sizes + oconfig)
	# Prediction is done off the last step lstm output and the goal.
	predictions = decoder.build(features)

	return predictions, state