sgoodfriend's picture
PPO playing Acrobot-v1 from https://github.com/sgoodfriend/rl-algo-impls/tree/5598ebc4b03054f16eebe76792486ba7bcacfc5c
341188c
raw
history blame
No virus
4.28 kB
CartPole-v1: &cartpole-defaults
n_timesteps: !!float 1e5
env_hyperparams:
n_envs: 8
algo_hyperparams:
n_steps: 32
batch_size: 256
n_epochs: 20
gae_lambda: 0.8
gamma: 0.98
ent_coef: 0.0
learning_rate: 0.001
learning_rate_decay: linear
clip_range: 0.2
clip_range_decay: linear
eval_params:
step_freq: !!float 2.5e4
n_episodes: 10
save_best: true
CartPole-v0:
<<: *cartpole-defaults
n_timesteps: !!float 5e4
MountainCar-v0:
n_timesteps: !!float 1e6
env_hyperparams:
normalize: true
n_envs: 16
algo_hyperparams:
n_steps: 16
n_epochs: 4
gae_lambda: 0.98
gamma: 0.99
ent_coef: 0.0
MountainCarContinuous-v0:
n_timesteps: !!float 1e5
env_hyperparams:
normalize: true
n_envs: 4
policy_hyperparams:
init_layers_orthogonal: false
# log_std_init: -3.29
algo_hyperparams:
n_steps: 512
batch_size: 256
n_epochs: 10
learning_rate: !!float 7.77e-5
ent_coef: 0.01 # 0.00429
ent_coef_decay: linear
clip_range: 0.1
gae_lambda: 0.9
max_grad_norm: 5
vf_coef: 0.19
# use_sde: true
eval_params:
step_freq: 5000
n_episodes: 10
save_best: true
Acrobot-v1:
n_timesteps: !!float 1e6
env_hyperparams:
n_envs: 16
normalize: true
algo_hyperparams:
n_steps: 256
n_epochs: 4
gae_lambda: 0.94
gamma: 0.99
ent_coef: 0.0
LunarLander-v2:
n_timesteps: !!float 1e6
env_hyperparams:
n_envs: 16
algo_hyperparams:
n_steps: 1024
batch_size: 64
n_epochs: 4
gae_lambda: 0.98
gamma: 0.999
ent_coef: 0.01
ent_coef_decay: linear
normalize_advantage: false
eval_params:
step_freq: !!float 5e4
n_episodes: 10
save_best: true
CarRacing-v0:
n_timesteps: !!float 4e6
env_hyperparams:
n_envs: 8
frame_stack: 4
policy_hyperparams:
use_sde: true
log_std_init: -2
init_layers_orthogonal: false
activation_fn: relu
share_features_extractor: false
cnn_feature_dim: 256
algo_hyperparams:
n_steps: 512
batch_size: 128
n_epochs: 10
learning_rate: !!float 1e-4
learning_rate_decay: linear
gamma: 0.99
gae_lambda: 0.95
ent_coef: 0.0
sde_sample_freq: 4
max_grad_norm: 0.5
vf_coef: 0.5
clip_range: 0.2
# BreakoutNoFrameskip-v4
# PongNoFrameskip-v4
# SpaceInvadersNoFrameskip-v4
# QbertNoFrameskip-v4
atari: &atari-defaults
n_timesteps: !!float 1e7
policy_hyperparams:
activation_fn: relu
env_hyperparams: &atari-env-defaults
n_envs: 8
frame_stack: 4
no_reward_timeout_steps: 1000
no_reward_fire_steps: 500
vec_env_class: subproc
algo_hyperparams:
n_steps: 128
batch_size: 256
n_epochs: 4
learning_rate: !!float 2.5e-4
learning_rate_decay: linear
clip_range: 0.1
clip_range_decay: linear
vf_coef: 0.5
ent_coef: 0.01
eval_params:
deterministic: false
HalfCheetahBulletEnv-v0: &pybullet-defaults
n_timesteps: !!float 2e6
env_hyperparams: &pybullet-env-defaults
n_envs: 16
normalize: true
policy_hyperparams: &pybullet-policy-defaults
pi_hidden_sizes: [256, 256]
v_hidden_sizes: [256, 256]
activation_fn: relu
algo_hyperparams: &pybullet-algo-defaults
n_steps: 512
batch_size: 128
n_epochs: 20
gamma: 0.99
gae_lambda: 0.9
ent_coef: 0.0
sde_sample_freq: 4
max_grad_norm: 0.5
vf_coef: 0.5
learning_rate: !!float 3e-5
clip_range: 0.4
AntBulletEnv-v0:
<<: *pybullet-defaults
policy_hyperparams:
<<: *pybullet-policy-defaults
algo_hyperparams:
<<: *pybullet-algo-defaults
Walker2DBulletEnv-v0:
<<: *pybullet-defaults
algo_hyperparams:
<<: *pybullet-algo-defaults
clip_range_decay: linear
HopperBulletEnv-v0:
<<: *pybullet-defaults
algo_hyperparams:
<<: *pybullet-algo-defaults
clip_range_decay: linear
HumanoidBulletEnv-v0:
<<: *pybullet-defaults
n_timesteps: !!float 1e7
env_hyperparams:
<<: *pybullet-env-defaults
n_envs: 8
policy_hyperparams:
<<: *pybullet-policy-defaults
# log_std_init: -1
algo_hyperparams:
<<: *pybullet-algo-defaults
n_steps: 2048
batch_size: 64
n_epochs: 10
gae_lambda: 0.95
learning_rate: !!float 2.5e-4
clip_range: 0.2