default: | |
trainer: ppo | |
batch_size: 1024 | |
beta: 5.0e-3 | |
buffer_size: 10240 | |
epsilon: 0.2 | |
hidden_units: 128 | |
lambd: 0.95 | |
learning_rate: 3.0e-4 | |
learning_rate_schedule: linear | |
max_steps: 5.0e5 | |
memory_size: 256 | |
normalize: false | |
num_epoch: 3 | |
num_layers: 2 | |
time_horizon: 64 | |
sequence_length: 64 | |
summary_freq: 10000 | |
use_recurrent: false | |
vis_encode_type: simple | |
reward_signals: | |
extrinsic: | |
strength: 1.0 | |
gamma: 0.99 | |
FoodCollector: | |
normalize: false | |
beta: 5.0e-3 | |
batch_size: 1024 | |
buffer_size: 10240 | |
max_steps: 2.0e6 | |
Bouncer: | |
normalize: true | |
max_steps: 7.0e6 | |
num_layers: 2 | |
hidden_units: 64 | |
PushBlock: | |
max_steps: 1.5e7 | |
batch_size: 128 | |
buffer_size: 2048 | |
beta: 1.0e-2 | |
hidden_units: 256 | |
summary_freq: 60000 | |
time_horizon: 64 | |
num_layers: 2 | |
SmallWallJump: | |
max_steps: 5e6 | |
batch_size: 128 | |
buffer_size: 2048 | |
beta: 5.0e-3 | |
hidden_units: 256 | |
summary_freq: 20000 | |
time_horizon: 128 | |
num_layers: 2 | |
normalize: false | |
BigWallJump: | |
max_steps: 2e7 | |
batch_size: 128 | |
buffer_size: 2048 | |
beta: 5.0e-3 | |
hidden_units: 256 | |
summary_freq: 20000 | |
time_horizon: 128 | |
num_layers: 2 | |
normalize: false | |
Pyramids: | |
summary_freq: 30000 | |
time_horizon: 128 | |
batch_size: 128 | |
buffer_size: 2048 | |
hidden_units: 512 | |
num_layers: 2 | |
beta: 1.0e-2 | |
max_steps: 1.0e7 | |
num_epoch: 3 | |
reward_signals: | |
extrinsic: | |
strength: 1.0 | |
gamma: 0.99 | |
curiosity: | |
strength: 0.02 | |
gamma: 0.99 | |
encoding_size: 256 | |
VisualPyramids: | |
time_horizon: 128 | |
batch_size: 64 | |
buffer_size: 2024 | |
hidden_units: 256 | |
num_layers: 1 | |
beta: 1.0e-2 | |
max_steps: 1.0e7 | |
num_epoch: 3 | |
reward_signals: | |
extrinsic: | |
strength: 1.0 | |
gamma: 0.99 | |
curiosity: | |
strength: 0.01 | |
gamma: 0.99 | |
encoding_size: 256 | |
3DBall: | |
normalize: true | |
batch_size: 64 | |
buffer_size: 12000 | |
summary_freq: 12000 | |
time_horizon: 1000 | |
lambd: 0.99 | |
beta: 0.001 | |
3DBallHard: | |
normalize: true | |
batch_size: 1200 | |
buffer_size: 12000 | |
summary_freq: 12000 | |
time_horizon: 1000 | |
max_steps: 5.0e5 | |
beta: 0.001 | |
reward_signals: | |
extrinsic: | |
strength: 1.0 | |
gamma: 0.995 | |
Tennis: | |
normalize: true | |
max_steps: 5.0e7 | |
learning_rate_schedule: constant | |
batch_size: 1024 | |
buffer_size: 10240 | |
hidden_units: 256 | |
time_horizon: 1000 | |
self_play: | |
window: 10 | |
play_against_current_self_ratio: 0.5 | |
save_steps: 50000 | |
swap_steps: 50000 | |
Soccer: | |
normalize: false | |
max_steps: 5.0e7 | |
learning_rate_schedule: constant | |
batch_size: 2048 | |
buffer_size: 20480 | |
hidden_units: 512 | |
time_horizon: 1000 | |
num_layers: 2 | |
self_play: | |
window: 10 | |
play_against_current_self_ratio: 0.5 | |
save_steps: 50000 | |
swap_steps: 50000 | |
CrawlerStatic: | |
normalize: true | |
num_epoch: 3 | |
time_horizon: 1000 | |
batch_size: 2024 | |
buffer_size: 20240 | |
max_steps: 1e7 | |
summary_freq: 30000 | |
num_layers: 3 | |
hidden_units: 512 | |
reward_signals: | |
extrinsic: | |
strength: 1.0 | |
gamma: 0.995 | |
CrawlerDynamic: | |
normalize: true | |
num_epoch: 3 | |
time_horizon: 1000 | |
batch_size: 2024 | |
buffer_size: 20240 | |
max_steps: 1e7 | |
summary_freq: 30000 | |
num_layers: 3 | |
hidden_units: 512 | |
reward_signals: | |
extrinsic: | |
strength: 1.0 | |
gamma: 0.995 | |
Walker: | |
normalize: true | |
num_epoch: 3 | |
time_horizon: 1000 | |
batch_size: 2048 | |
buffer_size: 20480 | |
max_steps: 2e7 | |
summary_freq: 30000 | |
num_layers: 3 | |
hidden_units: 512 | |
reward_signals: | |
extrinsic: | |
strength: 1.0 | |
gamma: 0.995 | |
Reacher: | |
normalize: true | |
num_epoch: 3 | |
time_horizon: 1000 | |
batch_size: 2024 | |
buffer_size: 20240 | |
max_steps: 2e7 | |
summary_freq: 60000 | |
reward_signals: | |
extrinsic: | |
strength: 1.0 | |
gamma: 0.995 | |
Hallway: | |
use_recurrent: true | |
sequence_length: 64 | |
num_layers: 2 | |
hidden_units: 128 | |
memory_size: 256 | |
beta: 1.0e-2 | |
num_epoch: 3 | |
buffer_size: 1024 | |
batch_size: 128 | |
max_steps: 1.0e7 | |
summary_freq: 10000 | |
time_horizon: 64 | |
VisualHallway: | |
use_recurrent: true | |
sequence_length: 64 | |
num_layers: 1 | |
hidden_units: 128 | |
memory_size: 256 | |
beta: 1.0e-2 | |
num_epoch: 3 | |
buffer_size: 1024 | |
batch_size: 64 | |
max_steps: 1.0e7 | |
summary_freq: 10000 | |
time_horizon: 64 | |
VisualPushBlock: | |
use_recurrent: true | |
sequence_length: 32 | |
num_layers: 1 | |
hidden_units: 128 | |
memory_size: 256 | |
beta: 1.0e-2 | |
num_epoch: 3 | |
buffer_size: 1024 | |
batch_size: 64 | |
max_steps: 3.0e6 | |
summary_freq: 60000 | |
time_horizon: 64 | |
GridWorld: | |
batch_size: 32 | |
normalize: false | |
num_layers: 1 | |
hidden_units: 256 | |
beta: 5.0e-3 | |
buffer_size: 256 | |
max_steps: 500000 | |
summary_freq: 20000 | |
time_horizon: 5 | |
reward_signals: | |
extrinsic: | |
strength: 1.0 | |
gamma: 0.9 | |
Basic: | |
batch_size: 32 | |
normalize: false | |
num_layers: 1 | |
hidden_units: 20 | |
beta: 5.0e-3 | |
buffer_size: 256 | |
max_steps: 5.0e5 | |
summary_freq: 2000 | |
time_horizon: 3 | |
reward_signals: | |
extrinsic: | |
strength: 1.0 | |
gamma: 0.9 | |
AircraftLearning: | |
summary_freq: 32000 | |
time_horizon: 128 | |
batch_size: 2048 #512 | |
buffer_size: 20480 #4096 | |
hidden_units: 256 #128 | |
num_layers: 2 | |
beta: 1.0e-2 | |
max_steps: 5.0e7 | |
num_epoch: 3 | |
reward_signals: | |
extrinsic: | |
strength: 1.0 | |
gamma: 0.99 | |