File size: 3,121 Bytes
6f3bdf9 971403f 6f3bdf9 f050c92 6f3bdf9 f050c92 971403f 6f3bdf9 971403f 6f3bdf9 f050c92 971403f f050c92 6f3bdf9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
CartPole-v1: &cartpole-defaults
n_timesteps: !!float 5e5
env_hyperparams:
n_envs: 8
CartPole-v0:
<<: *cartpole-defaults
MountainCar-v0:
n_timesteps: !!float 1e6
env_hyperparams:
n_envs: 16
normalize: true
MountainCarContinuous-v0:
n_timesteps: !!float 1e5
env_hyperparams:
n_envs: 4
normalize: true
# policy_hyperparams:
# use_sde: true
# log_std_init: 0.0
# init_layers_orthogonal: false
algo_hyperparams:
n_steps: 100
sde_sample_freq: 16
Acrobot-v1:
n_timesteps: !!float 5e5
env_hyperparams:
normalize: true
n_envs: 16
# Tuned
LunarLander-v2:
device: cpu
n_timesteps: !!float 1e6
env_hyperparams:
n_envs: 4
normalize: true
algo_hyperparams:
n_steps: 2
gamma: 0.9955517404308908
gae_lambda: 0.9875340918797773
learning_rate: 0.0013814130817068916
learning_rate_decay: linear
ent_coef: !!float 3.388369146384422e-7
ent_coef_decay: none
max_grad_norm: 3.33982095073364
normalize_advantage: true
vf_coef: 0.1667838310548184
BipedalWalker-v3:
n_timesteps: !!float 5e6
env_hyperparams:
n_envs: 16
normalize: true
policy_hyperparams:
use_sde: true
log_std_init: -2
init_layers_orthogonal: false
algo_hyperparams:
ent_coef: 0
max_grad_norm: 0.5
n_steps: 8
gae_lambda: 0.9
vf_coef: 0.4
gamma: 0.99
learning_rate: !!float 9.6e-4
learning_rate_decay: linear
HalfCheetahBulletEnv-v0: &pybullet-defaults
n_timesteps: !!float 2e6
env_hyperparams:
n_envs: 4
normalize: true
policy_hyperparams:
use_sde: true
log_std_init: -2
init_layers_orthogonal: false
algo_hyperparams: &pybullet-algo-defaults
n_steps: 8
ent_coef: 0
max_grad_norm: 0.5
gae_lambda: 0.9
gamma: 0.99
vf_coef: 0.4
learning_rate: !!float 9.6e-4
learning_rate_decay: linear
AntBulletEnv-v0:
<<: *pybullet-defaults
Walker2DBulletEnv-v0:
<<: *pybullet-defaults
HopperBulletEnv-v0:
<<: *pybullet-defaults
# Tuned
CarRacing-v0:
n_timesteps: !!float 4e6
env_hyperparams:
n_envs: 4
frame_stack: 4
normalize: true
normalize_kwargs:
norm_obs: false
norm_reward: true
policy_hyperparams:
use_sde: true
log_std_init: -4.839609092563
init_layers_orthogonal: true
activation_fn: tanh
share_features_extractor: false
cnn_flatten_dim: 256
hidden_sizes: [256]
algo_hyperparams:
n_steps: 64
learning_rate: 0.000018971962220405576
gamma: 0.9942776405534832
gae_lambda: 0.9549244758833236
ent_coef: 0.0000015666550584860516
ent_coef_decay: linear
vf_coef: 0.12164696385898476
max_grad_norm: 2.2574480552177127
normalize_advantage: false
use_rms_prop: false
sde_sample_freq: 16
_atari: &atari-defaults
n_timesteps: !!float 1e7
env_hyperparams: &atari-env-defaults
n_envs: 16
frame_stack: 4
no_reward_timeout_steps: 1000
no_reward_fire_steps: 500
vec_env_class: async
policy_hyperparams: &atari-policy-defaults
activation_fn: relu
algo_hyperparams:
ent_coef: 0.01
vf_coef: 0.25
|