andrewzhang505 commited on
Commit
84a6a5e
1 Parent(s): 744e253

Upload with huggingface_hub

Browse files
.summary/0/events.out.tfevents.1657162976.andrew-gpu ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b7024cdf292809b25d392d27b6be6f6f4cf1468349966d23d948704c79963dd
3
+ size 3511481
cfg.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "help": false,
3
+ "algo": "APPO",
4
+ "env": "mujoco_ant",
5
+ "experiment": "04_mujoco_all_envs_see_1111_env_mujoco_ant",
6
+ "train_dir": "./train_dir",
7
+ "experiments_root": "mujoco_all_envs/mujoco_all_envs_crl_4",
8
+ "device": "gpu",
9
+ "seed": 1111,
10
+ "num_policies": 1,
11
+ "async_rl": false,
12
+ "serial_mode": false,
13
+ "batched_sampling": false,
14
+ "num_batches_to_accumulate": 2,
15
+ "worker_num_splits": 2,
16
+ "policy_workers_per_policy": 1,
17
+ "max_policy_lag": 10000,
18
+ "num_workers": 8,
19
+ "num_envs_per_worker": 8,
20
+ "batch_size": 1024,
21
+ "num_batches_per_epoch": 4,
22
+ "num_epochs": 2,
23
+ "rollout": 64,
24
+ "recurrence": 1,
25
+ "shuffle_minibatches": false,
26
+ "gamma": 0.99,
27
+ "reward_scale": 1,
28
+ "reward_clip": 1000.0,
29
+ "value_bootstrap": false,
30
+ "normalize_returns": true,
31
+ "exploration_loss_coeff": 0.0,
32
+ "value_loss_coeff": 1.3,
33
+ "kl_loss_coeff": 0.1,
34
+ "exploration_loss": "entropy",
35
+ "gae_lambda": 0.95,
36
+ "ppo_clip_ratio": 0.2,
37
+ "ppo_clip_value": 1.0,
38
+ "with_vtrace": false,
39
+ "vtrace_rho": 1.0,
40
+ "vtrace_c": 1.0,
41
+ "optimizer": "adam",
42
+ "adam_eps": 1e-06,
43
+ "adam_beta1": 0.9,
44
+ "adam_beta2": 0.999,
45
+ "max_grad_norm": 3.5,
46
+ "learning_rate": 0.00295,
47
+ "lr_schedule": "linear_decay",
48
+ "lr_schedule_kl_threshold": 0.008,
49
+ "obs_subtract_mean": 0.0,
50
+ "obs_scale": 1.0,
51
+ "normalize_input": true,
52
+ "decorrelate_experience_max_seconds": 10,
53
+ "decorrelate_envs_on_one_worker": true,
54
+ "actor_worker_gpus": [],
55
+ "set_workers_cpu_affinity": true,
56
+ "force_envs_single_thread": true,
57
+ "default_niceness": 0,
58
+ "experiment_summaries_interval": 3,
59
+ "stats_avg": 100,
60
+ "train_for_env_steps": 10000000,
61
+ "train_for_seconds": 10000000000,
62
+ "save_every_sec": 15,
63
+ "keep_checkpoints": 3,
64
+ "load_checkpoint_kind": "latest",
65
+ "save_milestones_sec": -1,
66
+ "save_best_every_sec": 5,
67
+ "save_best_metric": "reward",
68
+ "save_best_after": 100000,
69
+ "benchmark": false,
70
+ "encoder_type": "mlp",
71
+ "encoder_subtype": "mlp_mujoco",
72
+ "encoder_custom": null,
73
+ "encoder_extra_fc_layers": 0,
74
+ "hidden_size": 64,
75
+ "nonlinearity": "tanh",
76
+ "policy_initialization": "torch_default",
77
+ "policy_init_gain": 1.0,
78
+ "actor_critic_share_weights": true,
79
+ "adaptive_stddev": false,
80
+ "initial_stddev": 1.0,
81
+ "use_rnn": false,
82
+ "rnn_type": "gru",
83
+ "rnn_num_layers": 1,
84
+ "env_gpu_actions": false,
85
+ "env_frameskip": 1,
86
+ "env_framestack": 4,
87
+ "pixel_format": "CHW",
88
+ "with_wandb": true,
89
+ "wandb_user": null,
90
+ "wandb_project": "sample_factory",
91
+ "wandb_group": null,
92
+ "wandb_job_type": "SF",
93
+ "wandb_tags": [
94
+ "mujoco",
95
+ "runner_crl_4"
96
+ ],
97
+ "command_line": "--algo=APPO --with_wandb=True --wandb_tags mujoco runner_crl_4 --seed=1111 --env=mujoco_ant --experiment=04_mujoco_all_envs_see_1111_env_mujoco_ant --train_dir=./train_dir --experiments_root=mujoco_all_envs/mujoco_all_envs_crl_4",
98
+ "cli_args": {
99
+ "algo": "APPO",
100
+ "env": "mujoco_ant",
101
+ "experiment": "04_mujoco_all_envs_see_1111_env_mujoco_ant",
102
+ "train_dir": "./train_dir",
103
+ "experiments_root": "mujoco_all_envs/mujoco_all_envs_crl_4",
104
+ "seed": 1111,
105
+ "with_wandb": true,
106
+ "wandb_tags": [
107
+ "mujoco",
108
+ "runner_crl_4"
109
+ ]
110
+ },
111
+ "git_hash": "4e69a02b57fff18dbbf96054866d0d759f70c5fa",
112
+ "git_repo_name": "https://github.com/andrewzhang505/sample-factory.git"
113
+ }
checkpoint_p0/best_000019072_9764864_reward_5662.000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74113794f2dbffc9d9b1f360b21044fa716b96c449a1d64d2ace49498513dbb5
3
+ size 89474
checkpoint_p0/checkpoint_000019440_9953280.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc4ef594dd0d76b923c0d486b68843386a767f29e7efa21e0c7e4e3720588ba3
3
+ size 89474
checkpoint_p0/checkpoint_000019488_9977856.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b0eb8009a55aca0f55e4a7466207e74da2f9342e07ec0b4b69bb3970370b429
3
+ size 89474
checkpoint_p0/checkpoint_000019536_10002432.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79926c513f96e8ef257fd695a58a5778b4fcf3aff9a165ee5599e71599dc4b38
3
+ size 89474
env_info_mujoco_ant ADDED
Binary file (1.62 kB). View file
 
git.diff ADDED
@@ -0,0 +1,328 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ diff --git a/sample_factory/algo/learning/learner.py b/sample_factory/algo/learning/learner.py
2
+ index 178d2ab..20bb937 100644
3
+ --- a/sample_factory/algo/learning/learner.py
4
+ +++ b/sample_factory/algo/learning/learner.py
5
+ @@ -110,6 +110,20 @@ class KlAdaptiveSchedulerPerEpoch(KlAdaptiveScheduler):
6
+ def invoke_after_each_epoch(self):
7
+ return True
8
+
9
+ +class LinearDecayScheduler(LearningRateScheduler):
10
+ + def __init__(self, cfg):
11
+ + num_updates = cfg.train_for_env_steps // cfg.batch_size * cfg.num_epochs
12
+ + self.linear_decay = LinearDecay([(0, cfg.learning_rate), (num_updates, 0)])
13
+ + self.step = 0
14
+ +
15
+ + def invoke_after_each_minibatch(self):
16
+ + return True
17
+ +
18
+ + def update(self, current_lr, recent_kls):
19
+ + self.step += 1
20
+ + lr = self.linear_decay.at(self.step)
21
+ + return lr
22
+ +
23
+
24
+ def get_lr_scheduler(cfg) -> LearningRateScheduler:
25
+ if cfg.lr_schedule == "constant":
26
+ @@ -118,6 +132,8 @@ def get_lr_scheduler(cfg) -> LearningRateScheduler:
27
+ return KlAdaptiveSchedulerPerMinibatch(cfg)
28
+ elif cfg.lr_schedule == "kl_adaptive_epoch":
29
+ return KlAdaptiveSchedulerPerEpoch(cfg)
30
+ + elif cfg.lr_schedule == "linear_decay":
31
+ + return LinearDecayScheduler(cfg)
32
+ else:
33
+ raise RuntimeError(f"Unknown scheduler {cfg.lr_schedule}")
34
+
35
+ diff --git a/sample_factory/envs/mujoco/mujoco_params.py b/sample_factory/envs/mujoco/mujoco_params.py
36
+ index ef0b486..cb4b977 100644
37
+ --- a/sample_factory/envs/mujoco/mujoco_params.py
38
+ +++ b/sample_factory/envs/mujoco/mujoco_params.py
39
+ @@ -1,117 +1,155 @@
40
+ +# def mujoco_override_defaults(env, parser):
41
+ +# parser.set_defaults(
42
+ +# batched_sampling=False,
43
+ +# num_workers=8,
44
+ +# num_envs_per_worker=16,
45
+ +# worker_num_splits=2,
46
+ +# train_for_env_steps=1000000,
47
+ +# encoder_type="mlp",
48
+ +# encoder_subtype="mlp_mujoco",
49
+ +# hidden_size=64,
50
+ +# encoder_extra_fc_layers=0,
51
+ +# env_frameskip=1,
52
+ +# nonlinearity="tanh",
53
+ +# batch_size=64,
54
+ +# kl_loss_coeff=0.1,
55
+ +# use_rnn=False,
56
+ +# adaptive_stddev=False,
57
+ +# policy_initialization="torch_default",
58
+ +# reward_scale=0.01,
59
+ +# rollout=8,
60
+ +# max_grad_norm=0.0,
61
+ +# ppo_epochs=10,
62
+ +# num_batches_per_epoch=32,
63
+ +# ppo_clip_ratio=0.2,
64
+ +# value_loss_coeff=2.0,
65
+ +# exploration_loss_coeff=0.0,
66
+ +# learning_rate=3e-3,
67
+ +# lr_schedule="constant",
68
+ +# shuffle_minibatches=True,
69
+ +# gamma=0.99,
70
+ +# gae_lambda=0.95,
71
+ +# with_vtrace=False,
72
+ +# recurrence=1,
73
+ +# value_bootstrap=False,
74
+ +# normalize_input=True,
75
+ +# experiment_summaries_interval=3,
76
+ +# save_every_sec=15,
77
+ +# serial_mode=False,
78
+ +# async_rl=False,
79
+ +# )
80
+ +
81
+ +# # environment specific overrides
82
+ +# env_name = "_".join(env.split("_")[1:]).lower()
83
+ +
84
+ +# if env_name == "halfcheetah":
85
+ +# parser.set_defaults(
86
+ +# reward_scale=0.1,
87
+ +# learning_rate=3e-3,
88
+ +# lr_schedule="kl_adaptive_epoch",
89
+ +# lr_schedule_kl_threshold=3e-2,
90
+ +# normalize_input=False,
91
+ +# num_batches_per_epoch=1,
92
+ +# )
93
+ +# if env_name == "humanoid":
94
+ +# parser.set_defaults(
95
+ +# learning_rate=3e-4,
96
+ +# )
97
+ +# if env_name == "hopper":
98
+ +# parser.set_defaults(
99
+ +# reward_scale=0.1,
100
+ +# learning_rate=3e-3,
101
+ +# lr_schedule="kl_adaptive_epoch",
102
+ +# lr_schedule_kl_threshold=3e-2,
103
+ +# # normalize_input=False,
104
+ +# # num_batches_per_epoch=1,
105
+ +# # normalize_returns=True,
106
+ +# # hidden_size=128,
107
+ +# )
108
+ +# if env_name == "doublependulum":
109
+ +# parser.set_defaults(
110
+ +# reward_scale=0.01,
111
+ +# learning_rate=3e-3,
112
+ +# lr_schedule="kl_adaptive_epoch",
113
+ +# lr_schedule_kl_threshold=3e-2,
114
+ +# )
115
+ +# if env_name == "pendulum":
116
+ +# parser.set_defaults(
117
+ +# # reward_scale=0.01,
118
+ +# learning_rate=3e-4,
119
+ +# lr_schedule="kl_adaptive_epoch",
120
+ +# lr_schedule_kl_threshold=3e-3,
121
+ +# )
122
+ +# if env_name == "reacher":
123
+ +# parser.set_defaults(
124
+ +# reward_scale=0.1,
125
+ +# learning_rate=3e-3,
126
+ +# lr_schedule="kl_adaptive_epoch",
127
+ +# lr_schedule_kl_threshold=3e-2,
128
+ +# normalize_input=False,
129
+ +# num_batches_per_epoch=1,
130
+ +# )
131
+ +# if env_name == "swimmer":
132
+ +# parser.set_defaults(
133
+ +# reward_scale=1,
134
+ +# # learning_rate=3e-3,
135
+ +# # lr_schedule="kl_adaptive_epoch",
136
+ +# # lr_schedule_kl_threshold=3e-2,
137
+ +# # gamma=0.9995,
138
+ +# rollout=128,
139
+ +# batch_size=128,
140
+ +# )
141
+ +# if env_name == "walker":
142
+ +# parser.set_defaults(
143
+ +# reward_scale=0.1,
144
+ +# learning_rate=3e-3,
145
+ +# lr_schedule="kl_adaptive_epoch",
146
+ +# lr_schedule_kl_threshold=3e-2,
147
+ +# )
148
+ +
149
+ def mujoco_override_defaults(env, parser):
150
+ parser.set_defaults(
151
+ batched_sampling=False,
152
+ num_workers=8,
153
+ - num_envs_per_worker=16,
154
+ + num_envs_per_worker=8,
155
+ worker_num_splits=2,
156
+ - train_for_env_steps=1000000,
157
+ + train_for_env_steps=10000000,
158
+ encoder_type="mlp",
159
+ encoder_subtype="mlp_mujoco",
160
+ hidden_size=64,
161
+ encoder_extra_fc_layers=0,
162
+ env_frameskip=1,
163
+ nonlinearity="tanh",
164
+ - batch_size=64,
165
+ + batch_size=1024,
166
+ kl_loss_coeff=0.1,
167
+ -
168
+ use_rnn=False,
169
+ adaptive_stddev=False,
170
+ policy_initialization="torch_default",
171
+ - reward_scale=0.01,
172
+ - rollout=8,
173
+ - max_grad_norm=0.0,
174
+ - ppo_epochs=10,
175
+ - num_batches_per_epoch=32,
176
+ + reward_scale=1,
177
+ + rollout=64,
178
+ + max_grad_norm=3.5,
179
+ + num_epochs=2,
180
+ + num_batches_per_epoch=4,
181
+ ppo_clip_ratio=0.2,
182
+ - value_loss_coeff=2.0,
183
+ + value_loss_coeff=1.3,
184
+ exploration_loss_coeff=0.0,
185
+ - learning_rate=3e-3,
186
+ - lr_schedule="constant",
187
+ - shuffle_minibatches=True,
188
+ + learning_rate=0.00295,
189
+ + lr_schedule="linear_decay",
190
+ + shuffle_minibatches=False,
191
+ gamma=0.99,
192
+ gae_lambda=0.95,
193
+ with_vtrace=False,
194
+ recurrence=1,
195
+ value_bootstrap=False,
196
+ normalize_input=True,
197
+ + normalize_returns=True,
198
+ experiment_summaries_interval=3,
199
+ save_every_sec=15,
200
+ -
201
+ serial_mode=False,
202
+ async_rl=False,
203
+ )
204
+
205
+ - # environment specific overrides
206
+ - env_name = "_".join(env.split("_")[1:]).lower()
207
+ -
208
+ - if env_name == "halfcheetah":
209
+ - parser.set_defaults(
210
+ - reward_scale=0.1,
211
+ - learning_rate=3e-3,
212
+ - lr_schedule="kl_adaptive_epoch",
213
+ - lr_schedule_kl_threshold=3e-2,
214
+ - normalize_input=False,
215
+ - num_batches_per_epoch=1,
216
+ - )
217
+ - if env_name == "humanoid":
218
+ - parser.set_defaults(
219
+ - learning_rate=3e-4,
220
+ - )
221
+ - if env_name == "hopper":
222
+ - parser.set_defaults(
223
+ - reward_scale=0.1,
224
+ - learning_rate=3e-3,
225
+ - lr_schedule="kl_adaptive_epoch",
226
+ - lr_schedule_kl_threshold=3e-2,
227
+ - # normalize_input=False,
228
+ - # num_batches_per_epoch=1,
229
+ - # normalize_returns=True,
230
+ - # hidden_size=128,
231
+ - )
232
+ - if env_name == "doublependulum":
233
+ - parser.set_defaults(
234
+ - reward_scale=0.01,
235
+ - learning_rate=3e-3,
236
+ - lr_schedule="kl_adaptive_epoch",
237
+ - lr_schedule_kl_threshold=3e-2,
238
+ - )
239
+ - if env_name == "pendulum":
240
+ - parser.set_defaults(
241
+ - # reward_scale=0.01,
242
+ - learning_rate=3e-4,
243
+ - lr_schedule="kl_adaptive_epoch",
244
+ - lr_schedule_kl_threshold=3e-3,
245
+ - )
246
+ - if env_name == "reacher":
247
+ - parser.set_defaults(
248
+ - reward_scale=0.1,
249
+ - learning_rate=3e-3,
250
+ - lr_schedule="kl_adaptive_epoch",
251
+ - lr_schedule_kl_threshold=3e-2,
252
+ - normalize_input=False,
253
+ - num_batches_per_epoch=1,
254
+ - )
255
+ - if env_name == "swimmer":
256
+ - parser.set_defaults(
257
+ - reward_scale=1,
258
+ - learning_rate=3e-4,
259
+ - lr_schedule="kl_adaptive_epoch",
260
+ - lr_schedule_kl_threshold=3e-3,
261
+ - # normalize_input=False,
262
+ - # num_batches_per_epoch=1,
263
+ - normalize_returns=True,
264
+ - hidden_size=128,
265
+ - )
266
+ - if env_name == "walker":
267
+ - parser.set_defaults(
268
+ - reward_scale=0.1,
269
+ - learning_rate=3e-3,
270
+ - lr_schedule="kl_adaptive_epoch",
271
+ - lr_schedule_kl_threshold=3e-2,
272
+ - # normalize_returns=True,
273
+ - # normalize_input=False,
274
+ - # num_batches_per_epoch=1,
275
+ - )
276
+ +
277
+
278
+
279
+ # noinspection PyUnusedLocal
280
+ diff --git a/sample_factory/model/model_utils.py b/sample_factory/model/model_utils.py
281
+ index df6c82c..d8226d8 100644
282
+ --- a/sample_factory/model/model_utils.py
283
+ +++ b/sample_factory/model/model_utils.py
284
+ @@ -276,7 +276,7 @@ class MlpEncoder(EncoderBase):
285
+ self.init_fc_blocks(fc_encoder_layer)
286
+
287
+ def forward(self, obs_dict):
288
+ - x = self.mlp_head(obs_dict['obs'].float())
289
+ + x = self.mlp_head(obs_dict["obs"].float())
290
+ x = self.forward_fc_blocks(x)
291
+ return x
292
+
293
+ diff --git a/sample_factory/runner/runs/mujoco_all_envs.py b/sample_factory/runner/runs/mujoco_all_envs.py
294
+ index 3ac67ce..5cbaa1a 100644
295
+ --- a/sample_factory/runner/runs/mujoco_all_envs.py
296
+ +++ b/sample_factory/runner/runs/mujoco_all_envs.py
297
+ @@ -8,12 +8,12 @@ _params = ParamGrid(
298
+ [
299
+ "mujoco_ant",
300
+ "mujoco_halfcheetah",
301
+ - "mujoco_hopper",
302
+ + # "mujoco_hopper",
303
+ "mujoco_humanoid",
304
+ - "mujoco_doublependulum",
305
+ - "mujoco_pendulum",
306
+ - "mujoco_reacher",
307
+ - "mujoco_swimmer",
308
+ + # "mujoco_doublependulum",
309
+ + # "mujoco_pendulum",
310
+ + # "mujoco_reacher",
311
+ + # "mujoco_swimmer",
312
+ "mujoco_walker",
313
+ ],
314
+ ),
315
+ @@ -23,11 +23,11 @@ _params = ParamGrid(
316
+ _experiments = [
317
+ Experiment(
318
+ "mujoco_all_envs",
319
+ - "python -m sample_factory_examples.mujoco_examples.train_mujoco --algo=APPO --with_wandb=True --wandb_tags mujoco runner_4",
320
+ + "python -m sample_factory_examples.mujoco_examples.train_mujoco --algo=APPO --with_wandb=True --wandb_tags mujoco runner_crl_4",
321
+ _params.generate_params(randomize=False),
322
+ ),
323
+ ]
324
+
325
+
326
+ RUN_DESCRIPTION = RunDescription("mujoco_all_envs", experiments=_experiments)
327
+ -# python -m sample_factory.runner.run --run=mujoco_all_envs --runner=processes --max_parallel=8 --pause_between=1 --experiments_per_gpu=10000 --num_gpus=1 --experiment_suffix=4
328
+ +# python -m sample_factory.runner.run --run=mujoco_all_envs --runner=processes --max_parallel=4 --pause_between=1 --experiments_per_gpu=32 --num_gpus=1 --experiment_suffix=crl_3
sf_log.txt ADDED
The diff for this file is too large to render. See raw diff