Spaces:
Running
Running
# Copyright 2018 The TensorFlow Authors All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ============================================================================== | |
"""Evaluation utility functions. | |
""" | |
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
import numpy as np | |
import tensorflow as tf | |
from collections import namedtuple | |
logging = tf.logging | |
import gin.tf | |
def evaluate_checkpoint_repeatedly(checkpoint_dir, | |
evaluate_checkpoint_fn, | |
eval_interval_secs=600, | |
max_number_of_evaluations=None, | |
checkpoint_timeout=None, | |
timeout_fn=None): | |
"""Evaluates a checkpointed model at a set interval.""" | |
if max_number_of_evaluations is not None and max_number_of_evaluations <= 0: | |
raise ValueError( | |
'`max_number_of_evaluations` must be either None or a positive number.') | |
number_of_evaluations = 0 | |
for checkpoint_path in tf.contrib.training.checkpoints_iterator( | |
checkpoint_dir, | |
min_interval_secs=eval_interval_secs, | |
timeout=checkpoint_timeout, | |
timeout_fn=timeout_fn): | |
retries = 3 | |
for _ in range(retries): | |
try: | |
should_stop = evaluate_checkpoint_fn(checkpoint_path) | |
break | |
except tf.errors.DataLossError as e: | |
logging.warn( | |
'Encountered a DataLossError while evaluating a checkpoint. This ' | |
'can happen when reading a checkpoint before it is fully written. ' | |
'Retrying...' | |
) | |
time.sleep(2.0) | |
def compute_model_loss(sess, model_rollout_fn, states, actions): | |
"""Computes model loss.""" | |
preds, losses = [], [] | |
preds.append(states[0]) | |
losses.append(0) | |
for state, action in zip(states[1:], actions[1:]): | |
pred = model_rollout_fn(sess, preds[-1], action) | |
loss = np.sqrt(np.sum((state - pred) ** 2)) | |
preds.append(pred) | |
losses.append(loss) | |
return preds, losses | |
def compute_average_reward(sess, env_base, step_fn, gamma, num_steps, | |
num_episodes): | |
"""Computes the discounted reward for a given number of steps. | |
Args: | |
sess: The tensorflow session. | |
env_base: A python environment. | |
step_fn: A function that takes in `sess` and returns a list of | |
[state, action, reward, discount, transition_type] values. | |
gamma: discounting factor to apply to the reward. | |
num_steps: number of steps to compute the reward over. | |
num_episodes: number of episodes to average the reward over. | |
Returns: | |
average_reward: a scalar of discounted reward. | |
last_reward: last reward received. | |
""" | |
average_reward = 0 | |
average_last_reward = 0 | |
average_meta_reward = 0 | |
average_last_meta_reward = 0 | |
average_success = 0. | |
states, actions = None, None | |
for i in range(num_episodes): | |
env_base.end_episode() | |
env_base.begin_episode() | |
(reward, last_reward, meta_reward, last_meta_reward, | |
states, actions) = compute_reward( | |
sess, step_fn, gamma, num_steps) | |
s_reward = last_meta_reward # Navigation | |
success = (s_reward > -5.0) # When using diff=False | |
logging.info('Episode = %d, reward = %s, meta_reward = %f, ' | |
'last_reward = %s, last meta_reward = %f, success = %s', | |
i, reward, meta_reward, last_reward, last_meta_reward, | |
success) | |
average_reward += reward | |
average_last_reward += last_reward | |
average_meta_reward += meta_reward | |
average_last_meta_reward += last_meta_reward | |
average_success += success | |
average_reward /= num_episodes | |
average_last_reward /= num_episodes | |
average_meta_reward /= num_episodes | |
average_last_meta_reward /= num_episodes | |
average_success /= num_episodes | |
return (average_reward, average_last_reward, | |
average_meta_reward, average_last_meta_reward, | |
average_success, | |
states, actions) | |
def compute_reward(sess, step_fn, gamma, num_steps): | |
"""Computes the discounted reward for a given number of steps. | |
Args: | |
sess: The tensorflow session. | |
step_fn: A function that takes in `sess` and returns a list of | |
[state, action, reward, discount, transition_type] values. | |
gamma: discounting factor to apply to the reward. | |
num_steps: number of steps to compute the reward over. | |
Returns: | |
reward: cumulative discounted reward. | |
last_reward: reward received at final step. | |
""" | |
total_reward = 0 | |
total_meta_reward = 0 | |
gamma_step = 1 | |
states = [] | |
actions = [] | |
for _ in range(num_steps): | |
state, action, transition_type, reward, meta_reward, discount, _, _ = step_fn(sess) | |
total_reward += reward * gamma_step * discount | |
total_meta_reward += meta_reward * gamma_step * discount | |
gamma_step *= gamma | |
states.append(state) | |
actions.append(action) | |
return (total_reward, reward, total_meta_reward, meta_reward, | |
states, actions) | |