Spaces:
Running
Running
# Copyright 2018 The TensorFlow Authors All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ============================================================================== | |
"""Simple example of contextual bandits simulation. | |
Code corresponding to: | |
Deep Bayesian Bandits Showdown: An Empirical Comparison of Bayesian Deep Networks | |
for Thompson Sampling, by Carlos Riquelme, George Tucker, and Jasper Snoek. | |
https://arxiv.org/abs/1802.09127 | |
""" | |
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
import time | |
from absl import app | |
from absl import flags | |
import numpy as np | |
import os | |
import tensorflow as tf | |
from bandits.algorithms.bootstrapped_bnn_sampling import BootstrappedBNNSampling | |
from bandits.core.contextual_bandit import run_contextual_bandit | |
from bandits.data.data_sampler import sample_adult_data | |
from bandits.data.data_sampler import sample_census_data | |
from bandits.data.data_sampler import sample_covertype_data | |
from bandits.data.data_sampler import sample_jester_data | |
from bandits.data.data_sampler import sample_mushroom_data | |
from bandits.data.data_sampler import sample_statlog_data | |
from bandits.data.data_sampler import sample_stock_data | |
from bandits.algorithms.fixed_policy_sampling import FixedPolicySampling | |
from bandits.algorithms.linear_full_posterior_sampling import LinearFullPosteriorSampling | |
from bandits.algorithms.neural_linear_sampling import NeuralLinearPosteriorSampling | |
from bandits.algorithms.parameter_noise_sampling import ParameterNoiseSampling | |
from bandits.algorithms.posterior_bnn_sampling import PosteriorBNNSampling | |
from bandits.data.synthetic_data_sampler import sample_linear_data | |
from bandits.data.synthetic_data_sampler import sample_sparse_linear_data | |
from bandits.data.synthetic_data_sampler import sample_wheel_bandit_data | |
from bandits.algorithms.uniform_sampling import UniformSampling | |
# Set up your file routes to the data files. | |
base_route = os.getcwd() | |
data_route = 'contextual_bandits/datasets' | |
FLAGS = flags.FLAGS | |
FLAGS.set_default('alsologtostderr', True) | |
flags.DEFINE_string('logdir', '/tmp/bandits/', 'Base directory to save output') | |
flags.DEFINE_string( | |
'mushroom_data', | |
os.path.join(base_route, data_route, 'mushroom.data'), | |
'Directory where Mushroom data is stored.') | |
flags.DEFINE_string( | |
'financial_data', | |
os.path.join(base_route, data_route, 'raw_stock_contexts'), | |
'Directory where Financial data is stored.') | |
flags.DEFINE_string( | |
'jester_data', | |
os.path.join(base_route, data_route, 'jester_data_40jokes_19181users.npy'), | |
'Directory where Jester data is stored.') | |
flags.DEFINE_string( | |
'statlog_data', | |
os.path.join(base_route, data_route, 'shuttle.trn'), | |
'Directory where Statlog data is stored.') | |
flags.DEFINE_string( | |
'adult_data', | |
os.path.join(base_route, data_route, 'adult.full'), | |
'Directory where Adult data is stored.') | |
flags.DEFINE_string( | |
'covertype_data', | |
os.path.join(base_route, data_route, 'covtype.data'), | |
'Directory where Covertype data is stored.') | |
flags.DEFINE_string( | |
'census_data', | |
os.path.join(base_route, data_route, 'USCensus1990.data.txt'), | |
'Directory where Census data is stored.') | |
def sample_data(data_type, num_contexts=None): | |
"""Sample data from given 'data_type'. | |
Args: | |
data_type: Dataset from which to sample. | |
num_contexts: Number of contexts to sample. | |
Returns: | |
dataset: Sampled matrix with rows: (context, reward_1, ..., reward_num_act). | |
opt_rewards: Vector of expected optimal reward for each context. | |
opt_actions: Vector of optimal action for each context. | |
num_actions: Number of available actions. | |
context_dim: Dimension of each context. | |
""" | |
if data_type == 'linear': | |
# Create linear dataset | |
num_actions = 8 | |
context_dim = 10 | |
noise_stds = [0.01 * (i + 1) for i in range(num_actions)] | |
dataset, _, opt_linear = sample_linear_data(num_contexts, context_dim, | |
num_actions, sigma=noise_stds) | |
opt_rewards, opt_actions = opt_linear | |
elif data_type == 'sparse_linear': | |
# Create sparse linear dataset | |
num_actions = 7 | |
context_dim = 10 | |
noise_stds = [0.01 * (i + 1) for i in range(num_actions)] | |
num_nnz_dims = int(context_dim / 3.0) | |
dataset, _, opt_sparse_linear = sample_sparse_linear_data( | |
num_contexts, context_dim, num_actions, num_nnz_dims, sigma=noise_stds) | |
opt_rewards, opt_actions = opt_sparse_linear | |
elif data_type == 'mushroom': | |
# Create mushroom dataset | |
num_actions = 2 | |
context_dim = 117 | |
file_name = FLAGS.mushroom_data | |
dataset, opt_mushroom = sample_mushroom_data(file_name, num_contexts) | |
opt_rewards, opt_actions = opt_mushroom | |
elif data_type == 'financial': | |
num_actions = 8 | |
context_dim = 21 | |
num_contexts = min(3713, num_contexts) | |
noise_stds = [0.01 * (i + 1) for i in range(num_actions)] | |
file_name = FLAGS.financial_data | |
dataset, opt_financial = sample_stock_data(file_name, context_dim, | |
num_actions, num_contexts, | |
noise_stds, shuffle_rows=True) | |
opt_rewards, opt_actions = opt_financial | |
elif data_type == 'jester': | |
num_actions = 8 | |
context_dim = 32 | |
num_contexts = min(19181, num_contexts) | |
file_name = FLAGS.jester_data | |
dataset, opt_jester = sample_jester_data(file_name, context_dim, | |
num_actions, num_contexts, | |
shuffle_rows=True, | |
shuffle_cols=True) | |
opt_rewards, opt_actions = opt_jester | |
elif data_type == 'statlog': | |
file_name = FLAGS.statlog_data | |
num_actions = 7 | |
num_contexts = min(43500, num_contexts) | |
sampled_vals = sample_statlog_data(file_name, num_contexts, | |
shuffle_rows=True) | |
contexts, rewards, (opt_rewards, opt_actions) = sampled_vals | |
dataset = np.hstack((contexts, rewards)) | |
context_dim = contexts.shape[1] | |
elif data_type == 'adult': | |
file_name = FLAGS.adult_data | |
num_actions = 14 | |
num_contexts = min(45222, num_contexts) | |
sampled_vals = sample_adult_data(file_name, num_contexts, | |
shuffle_rows=True) | |
contexts, rewards, (opt_rewards, opt_actions) = sampled_vals | |
dataset = np.hstack((contexts, rewards)) | |
context_dim = contexts.shape[1] | |
elif data_type == 'covertype': | |
file_name = FLAGS.covertype_data | |
num_actions = 7 | |
num_contexts = min(150000, num_contexts) | |
sampled_vals = sample_covertype_data(file_name, num_contexts, | |
shuffle_rows=True) | |
contexts, rewards, (opt_rewards, opt_actions) = sampled_vals | |
dataset = np.hstack((contexts, rewards)) | |
context_dim = contexts.shape[1] | |
elif data_type == 'census': | |
file_name = FLAGS.census_data | |
num_actions = 9 | |
num_contexts = min(150000, num_contexts) | |
sampled_vals = sample_census_data(file_name, num_contexts, | |
shuffle_rows=True) | |
contexts, rewards, (opt_rewards, opt_actions) = sampled_vals | |
dataset = np.hstack((contexts, rewards)) | |
context_dim = contexts.shape[1] | |
elif data_type == 'wheel': | |
delta = 0.95 | |
num_actions = 5 | |
context_dim = 2 | |
mean_v = [1.0, 1.0, 1.0, 1.0, 1.2] | |
std_v = [0.05, 0.05, 0.05, 0.05, 0.05] | |
mu_large = 50 | |
std_large = 0.01 | |
dataset, opt_wheel = sample_wheel_bandit_data(num_contexts, delta, | |
mean_v, std_v, | |
mu_large, std_large) | |
opt_rewards, opt_actions = opt_wheel | |
return dataset, opt_rewards, opt_actions, num_actions, context_dim | |
def display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, name): | |
"""Displays summary statistics of the performance of each algorithm.""" | |
print('---------------------------------------------------') | |
print('---------------------------------------------------') | |
print('{} bandit completed after {} seconds.'.format( | |
name, time.time() - t_init)) | |
print('---------------------------------------------------') | |
performance_pairs = [] | |
for j, a in enumerate(algos): | |
performance_pairs.append((a.name, np.sum(h_rewards[:, j]))) | |
performance_pairs = sorted(performance_pairs, | |
key=lambda elt: elt[1], | |
reverse=True) | |
for i, (name, reward) in enumerate(performance_pairs): | |
print('{:3}) {:20}| \t \t total reward = {:10}.'.format(i, name, reward)) | |
print('---------------------------------------------------') | |
print('Optimal total reward = {}.'.format(np.sum(opt_rewards))) | |
print('Frequency of optimal actions (action, frequency):') | |
print([[elt, list(opt_actions).count(elt)] for elt in set(opt_actions)]) | |
print('---------------------------------------------------') | |
print('---------------------------------------------------') | |
def main(_): | |
# Problem parameters | |
num_contexts = 2000 | |
# Data type in {linear, sparse_linear, mushroom, financial, jester, | |
# statlog, adult, covertype, census, wheel} | |
data_type = 'mushroom' | |
# Create dataset | |
sampled_vals = sample_data(data_type, num_contexts) | |
dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals | |
# Define hyperparameters and algorithms | |
hparams = tf.contrib.training.HParams(num_actions=num_actions) | |
hparams_linear = tf.contrib.training.HParams(num_actions=num_actions, | |
context_dim=context_dim, | |
a0=6, | |
b0=6, | |
lambda_prior=0.25, | |
initial_pulls=2) | |
hparams_rms = tf.contrib.training.HParams(num_actions=num_actions, | |
context_dim=context_dim, | |
init_scale=0.3, | |
activation=tf.nn.relu, | |
layer_sizes=[50], | |
batch_size=512, | |
activate_decay=True, | |
initial_lr=0.1, | |
max_grad_norm=5.0, | |
show_training=False, | |
freq_summary=1000, | |
buffer_s=-1, | |
initial_pulls=2, | |
optimizer='RMS', | |
reset_lr=True, | |
lr_decay_rate=0.5, | |
training_freq=50, | |
training_epochs=100, | |
p=0.95, | |
q=3) | |
hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions, | |
context_dim=context_dim, | |
init_scale=0.3, | |
activation=tf.nn.relu, | |
layer_sizes=[50], | |
batch_size=512, | |
activate_decay=True, | |
initial_lr=0.1, | |
max_grad_norm=5.0, | |
show_training=False, | |
freq_summary=1000, | |
buffer_s=-1, | |
initial_pulls=2, | |
optimizer='RMS', | |
reset_lr=True, | |
lr_decay_rate=0.5, | |
training_freq=50, | |
training_epochs=100, | |
use_dropout=True, | |
keep_prob=0.80) | |
hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions, | |
context_dim=context_dim, | |
init_scale=0.3, | |
activation=tf.nn.relu, | |
layer_sizes=[50], | |
batch_size=512, | |
activate_decay=True, | |
initial_lr=0.1, | |
max_grad_norm=5.0, | |
show_training=False, | |
freq_summary=1000, | |
buffer_s=-1, | |
initial_pulls=2, | |
optimizer='RMS', | |
use_sigma_exp_transform=True, | |
cleared_times_trained=10, | |
initial_training_steps=100, | |
noise_sigma=0.1, | |
reset_lr=False, | |
training_freq=50, | |
training_epochs=100) | |
hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions, | |
context_dim=context_dim, | |
init_scale=0.3, | |
activation=tf.nn.relu, | |
layer_sizes=[50], | |
batch_size=512, | |
activate_decay=True, | |
initial_lr=0.1, | |
max_grad_norm=5.0, | |
show_training=False, | |
freq_summary=1000, | |
buffer_s=-1, | |
initial_pulls=2, | |
reset_lr=True, | |
lr_decay_rate=0.5, | |
training_freq=1, | |
training_freq_network=50, | |
training_epochs=100, | |
a0=6, | |
b0=6, | |
lambda_prior=0.25) | |
hparams_nlinear2 = tf.contrib.training.HParams(num_actions=num_actions, | |
context_dim=context_dim, | |
init_scale=0.3, | |
activation=tf.nn.relu, | |
layer_sizes=[50], | |
batch_size=512, | |
activate_decay=True, | |
initial_lr=0.1, | |
max_grad_norm=5.0, | |
show_training=False, | |
freq_summary=1000, | |
buffer_s=-1, | |
initial_pulls=2, | |
reset_lr=True, | |
lr_decay_rate=0.5, | |
training_freq=10, | |
training_freq_network=50, | |
training_epochs=100, | |
a0=6, | |
b0=6, | |
lambda_prior=0.25) | |
hparams_pnoise = tf.contrib.training.HParams(num_actions=num_actions, | |
context_dim=context_dim, | |
init_scale=0.3, | |
activation=tf.nn.relu, | |
layer_sizes=[50], | |
batch_size=512, | |
activate_decay=True, | |
initial_lr=0.1, | |
max_grad_norm=5.0, | |
show_training=False, | |
freq_summary=1000, | |
buffer_s=-1, | |
initial_pulls=2, | |
optimizer='RMS', | |
reset_lr=True, | |
lr_decay_rate=0.5, | |
training_freq=50, | |
training_epochs=100, | |
noise_std=0.05, | |
eps=0.1, | |
d_samples=300, | |
) | |
hparams_alpha_div = tf.contrib.training.HParams(num_actions=num_actions, | |
context_dim=context_dim, | |
init_scale=0.3, | |
activation=tf.nn.relu, | |
layer_sizes=[50], | |
batch_size=512, | |
activate_decay=True, | |
initial_lr=0.1, | |
max_grad_norm=5.0, | |
show_training=False, | |
freq_summary=1000, | |
buffer_s=-1, | |
initial_pulls=2, | |
optimizer='RMS', | |
use_sigma_exp_transform=True, | |
cleared_times_trained=10, | |
initial_training_steps=100, | |
noise_sigma=0.1, | |
reset_lr=False, | |
training_freq=50, | |
training_epochs=100, | |
alpha=1.0, | |
k=20, | |
prior_variance=0.1) | |
hparams_gp = tf.contrib.training.HParams(num_actions=num_actions, | |
num_outputs=num_actions, | |
context_dim=context_dim, | |
reset_lr=False, | |
learn_embeddings=True, | |
max_num_points=1000, | |
show_training=False, | |
freq_summary=1000, | |
batch_size=512, | |
keep_fixed_after_max_obs=True, | |
training_freq=50, | |
initial_pulls=2, | |
training_epochs=100, | |
lr=0.01, | |
buffer_s=-1, | |
initial_lr=0.001, | |
lr_decay_rate=0.0, | |
optimizer='RMS', | |
task_latent_dim=5, | |
activate_decay=False) | |
algos = [ | |
UniformSampling('Uniform Sampling', hparams), | |
UniformSampling('Uniform Sampling 2', hparams), | |
FixedPolicySampling('fixed1', [0.75, 0.25], hparams), | |
FixedPolicySampling('fixed2', [0.25, 0.75], hparams), | |
PosteriorBNNSampling('RMS', hparams_rms, 'RMSProp'), | |
PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'), | |
PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'), | |
NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear), | |
NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2), | |
LinearFullPosteriorSampling('LinFullPost', hparams_linear), | |
BootstrappedBNNSampling('BootRMS', hparams_rms), | |
ParameterNoiseSampling('ParamNoise', hparams_pnoise), | |
PosteriorBNNSampling('BBAlphaDiv', hparams_alpha_div, 'AlphaDiv'), | |
PosteriorBNNSampling('MultitaskGP', hparams_gp, 'GP'), | |
] | |
# Run contextual bandit problem | |
t_init = time.time() | |
results = run_contextual_bandit(context_dim, num_actions, dataset, algos) | |
_, h_rewards = results | |
# Display results | |
display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, data_type) | |
if __name__ == '__main__': | |
app.run(main) | |