File size: 4,036 Bytes
6f3bdf9 f050c92 6f3bdf9 f050c92 6f3bdf9 f050c92 6f3bdf9 f050c92 6f3bdf9 971403f 6f3bdf9 f050c92 6f3bdf9 971403f 6f3bdf9 971403f 6f3bdf9 971403f 6f3bdf9 f050c92 6f3bdf9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 |
CartPole-v1: &cartpole-defaults
n_timesteps: !!float 4e5
algo_hyperparams:
n_steps: 4096
pi_lr: 0.01
gamma: 0.99
gae_lambda: 1
val_lr: 0.01
train_v_iters: 80
eval_hyperparams:
step_freq: !!float 2.5e4
CartPole-v0:
<<: *cartpole-defaults
n_timesteps: !!float 1e5
algo_hyperparams:
n_steps: 1024
pi_lr: 0.01
gamma: 0.99
gae_lambda: 1
val_lr: 0.01
train_v_iters: 80
MountainCar-v0:
n_timesteps: !!float 1e6
env_hyperparams:
normalize: true
n_envs: 16
algo_hyperparams:
n_steps: 200
pi_lr: 0.005
gamma: 0.99
gae_lambda: 0.97
val_lr: 0.01
train_v_iters: 80
max_grad_norm: 0.5
MountainCarContinuous-v0:
n_timesteps: !!float 3e5
env_hyperparams:
normalize: true
n_envs: 4
# policy_hyperparams:
# init_layers_orthogonal: false
# log_std_init: -3.29
# use_sde: true
algo_hyperparams:
n_steps: 1000
pi_lr: !!float 5e-4
gamma: 0.99
gae_lambda: 0.9
val_lr: !!float 1e-3
train_v_iters: 80
max_grad_norm: 5
eval_hyperparams:
step_freq: 5000
Acrobot-v1:
n_timesteps: !!float 2e5
algo_hyperparams:
n_steps: 2048
pi_lr: 0.005
gamma: 0.99
gae_lambda: 0.97
val_lr: 0.01
train_v_iters: 80
max_grad_norm: 0.5
LunarLander-v2:
n_timesteps: !!float 4e6
policy_hyperparams:
hidden_sizes: [256, 256]
algo_hyperparams:
n_steps: 2048
pi_lr: 0.0001
gamma: 0.999
gae_lambda: 0.97
val_lr: 0.0001
train_v_iters: 80
max_grad_norm: 0.5
eval_hyperparams:
deterministic: false
BipedalWalker-v3:
n_timesteps: !!float 10e6
env_hyperparams:
n_envs: 16
normalize: true
policy_hyperparams:
hidden_sizes: [256, 256]
algo_hyperparams:
n_steps: 1600
gae_lambda: 0.95
gamma: 0.99
pi_lr: !!float 1e-4
val_lr: !!float 1e-4
train_v_iters: 80
max_grad_norm: 0.5
eval_hyperparams:
deterministic: false
CarRacing-v0:
n_timesteps: !!float 4e6
env_hyperparams:
frame_stack: 4
n_envs: 4
vec_env_class: sync
policy_hyperparams:
use_sde: true
log_std_init: -2
init_layers_orthogonal: false
activation_fn: relu
cnn_flatten_dim: 256
hidden_sizes: [256]
algo_hyperparams:
n_steps: 1000
pi_lr: !!float 5e-5
gamma: 0.99
gae_lambda: 0.95
val_lr: !!float 1e-4
train_v_iters: 40
max_grad_norm: 0.5
sde_sample_freq: 4
HalfCheetahBulletEnv-v0: &pybullet-defaults
n_timesteps: !!float 2e6
env_hyperparams: &pybullet-env-defaults
normalize: true
policy_hyperparams: &pybullet-policy-defaults
hidden_sizes: [256, 256]
algo_hyperparams: &pybullet-algo-defaults
n_steps: 4000
pi_lr: !!float 3e-4
gamma: 0.99
gae_lambda: 0.97
val_lr: !!float 1e-3
train_v_iters: 80
max_grad_norm: 0.5
AntBulletEnv-v0:
<<: *pybullet-defaults
policy_hyperparams:
<<: *pybullet-policy-defaults
hidden_sizes: [400, 300]
algo_hyperparams:
<<: *pybullet-algo-defaults
pi_lr: !!float 7e-4
val_lr: !!float 7e-3
HopperBulletEnv-v0:
<<: *pybullet-defaults
Walker2DBulletEnv-v0:
<<: *pybullet-defaults
FrozenLake-v1:
n_timesteps: !!float 8e5
env_params:
make_kwargs:
map_name: 8x8
is_slippery: true
policy_hyperparams:
hidden_sizes: [64]
algo_hyperparams:
n_steps: 2048
pi_lr: 0.01
gamma: 0.99
gae_lambda: 0.98
val_lr: 0.01
train_v_iters: 80
max_grad_norm: 0.5
eval_hyperparams:
step_freq: !!float 5e4
n_episodes: 10
save_best: true
_atari: &atari-defaults
n_timesteps: !!float 10e6
env_hyperparams:
n_envs: 2
frame_stack: 4
no_reward_timeout_steps: 1000
no_reward_fire_steps: 500
vec_env_class: async
policy_hyperparams:
activation_fn: relu
algo_hyperparams:
n_steps: 3072
pi_lr: !!float 5e-5
gamma: 0.99
gae_lambda: 0.95
val_lr: !!float 1e-4
train_v_iters: 80
max_grad_norm: 0.5
ent_coef: 0.01
eval_hyperparams:
deterministic: false
|