Spaces:

NCTCMumbai
/

NCTC

Running

App Files Files Community

NCTC / models /research /efficient-hrl /utils /eval_utils.py

NCTCMumbai

Upload 2571 files

0b8359d over 1 year ago

raw

history blame

5.52 kB

	# Copyright 2018 The TensorFlow Authors All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================

	"""Evaluation utility functions.
	"""

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function
	import numpy as np
	import tensorflow as tf
	from collections import namedtuple
	logging = tf.logging
	import gin.tf


	@gin.configurable
	def evaluate_checkpoint_repeatedly(checkpoint_dir,
	evaluate_checkpoint_fn,
	eval_interval_secs=600,
	max_number_of_evaluations=None,
	checkpoint_timeout=None,
	timeout_fn=None):
	"""Evaluates a checkpointed model at a set interval."""
	if max_number_of_evaluations is not None and max_number_of_evaluations <= 0:
	raise ValueError(
	'`max_number_of_evaluations` must be either None or a positive number.')

	number_of_evaluations = 0
	for checkpoint_path in tf.contrib.training.checkpoints_iterator(
	checkpoint_dir,
	min_interval_secs=eval_interval_secs,
	timeout=checkpoint_timeout,
	timeout_fn=timeout_fn):
	retries = 3
	for _ in range(retries):
	try:
	should_stop = evaluate_checkpoint_fn(checkpoint_path)
	break
	except tf.errors.DataLossError as e:
	logging.warn(
	'Encountered a DataLossError while evaluating a checkpoint. This '
	'can happen when reading a checkpoint before it is fully written. '
	'Retrying...'
	)
	time.sleep(2.0)


	def compute_model_loss(sess, model_rollout_fn, states, actions):
	"""Computes model loss."""
	preds, losses = [], []
	preds.append(states[0])
	losses.append(0)
	for state, action in zip(states[1:], actions[1:]):
	pred = model_rollout_fn(sess, preds[-1], action)
	loss = np.sqrt(np.sum((state - pred) ** 2))
	preds.append(pred)
	losses.append(loss)
	return preds, losses


	def compute_average_reward(sess, env_base, step_fn, gamma, num_steps,
	num_episodes):
	"""Computes the discounted reward for a given number of steps.

	Args:
	sess: The tensorflow session.
	env_base: A python environment.
	step_fn: A function that takes in `sess` and returns a list of
	[state, action, reward, discount, transition_type] values.
	gamma: discounting factor to apply to the reward.
	num_steps: number of steps to compute the reward over.
	num_episodes: number of episodes to average the reward over.
	Returns:
	average_reward: a scalar of discounted reward.
	last_reward: last reward received.
	"""
	average_reward = 0
	average_last_reward = 0
	average_meta_reward = 0
	average_last_meta_reward = 0
	average_success = 0.
	states, actions = None, None
	for i in range(num_episodes):
	env_base.end_episode()
	env_base.begin_episode()
	(reward, last_reward, meta_reward, last_meta_reward,
	states, actions) = compute_reward(
	sess, step_fn, gamma, num_steps)
	s_reward = last_meta_reward # Navigation
	success = (s_reward > -5.0) # When using diff=False
	logging.info('Episode = %d, reward = %s, meta_reward = %f, '
	'last_reward = %s, last meta_reward = %f, success = %s',
	i, reward, meta_reward, last_reward, last_meta_reward,
	success)
	average_reward += reward
	average_last_reward += last_reward
	average_meta_reward += meta_reward
	average_last_meta_reward += last_meta_reward
	average_success += success
	average_reward /= num_episodes
	average_last_reward /= num_episodes
	average_meta_reward /= num_episodes
	average_last_meta_reward /= num_episodes
	average_success /= num_episodes
	return (average_reward, average_last_reward,
	average_meta_reward, average_last_meta_reward,
	average_success,
	states, actions)


	def compute_reward(sess, step_fn, gamma, num_steps):
	"""Computes the discounted reward for a given number of steps.

	Args:
	sess: The tensorflow session.
	step_fn: A function that takes in `sess` and returns a list of
	[state, action, reward, discount, transition_type] values.
	gamma: discounting factor to apply to the reward.
	num_steps: number of steps to compute the reward over.
	Returns:
	reward: cumulative discounted reward.
	last_reward: reward received at final step.
	"""

	total_reward = 0
	total_meta_reward = 0
	gamma_step = 1
	states = []
	actions = []
	for _ in range(num_steps):
	state, action, transition_type, reward, meta_reward, discount, _, _ = step_fn(sess)
	total_reward += reward * gamma_step * discount
	total_meta_reward += meta_reward * gamma_step * discount
	gamma_step *= gamma
	states.append(state)
	actions.append(action)
	return (total_reward, reward, total_meta_reward, meta_reward,
	states, actions)