Upload . with huggingface_hub
Browse files- .gitattributes +1 -0
- .summary/0/events.out.tfevents.1673492200.brain2.usc.edu +3 -0
- README.md +56 -0
- checkpoint_p0/best_000014838_97320960_reward_12530.994.pth +3 -0
- checkpoint_p0/checkpoint_000015058_98762752.pth +3 -0
- checkpoint_p0/checkpoint_000015258_100073472.pth +3 -0
- config.json +147 -0
- git.diff +0 -0
- replay.mp4 +3 -0
- sf_log.txt +326 -0
.gitattributes
CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
replay.mp4 filter=lfs diff=lfs merge=lfs -text
|
.summary/0/events.out.tfevents.1673492200.brain2.usc.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5cbc79a9ea0ca3bcdcd8fcd24d550e316c6963c5d569d7f18776b1875138a5b2
|
3 |
+
size 205847
|
README.md
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: sample-factory
|
3 |
+
tags:
|
4 |
+
- deep-reinforcement-learning
|
5 |
+
- reinforcement-learning
|
6 |
+
- sample-factory
|
7 |
+
model-index:
|
8 |
+
- name: APPO
|
9 |
+
results:
|
10 |
+
- task:
|
11 |
+
type: reinforcement-learning
|
12 |
+
name: reinforcement-learning
|
13 |
+
dataset:
|
14 |
+
name: ant
|
15 |
+
type: ant
|
16 |
+
metrics:
|
17 |
+
- type: mean_reward
|
18 |
+
value: 12233.03 +/- 3798.23
|
19 |
+
name: mean_reward
|
20 |
+
verified: false
|
21 |
+
---
|
22 |
+
|
23 |
+
A(n) **APPO** model trained on the **ant** environment.
|
24 |
+
|
25 |
+
This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
|
26 |
+
Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
|
27 |
+
|
28 |
+
|
29 |
+
## Downloading the model
|
30 |
+
|
31 |
+
After installing Sample-Factory, download the model with:
|
32 |
+
```
|
33 |
+
python -m sample_factory.huggingface.load_from_hub -r apetrenko/sample_factory_brax_ant
|
34 |
+
```
|
35 |
+
|
36 |
+
|
37 |
+
## Using the model
|
38 |
+
|
39 |
+
To run the model after download, use the `enjoy` script corresponding to this environment:
|
40 |
+
```
|
41 |
+
python -m sf_examples.brax.enjoy_brax --algo=APPO --env=ant --train_dir=./train_dir --experiment=sample_factory_brax_ant
|
42 |
+
```
|
43 |
+
|
44 |
+
|
45 |
+
You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
|
46 |
+
See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
|
47 |
+
|
48 |
+
## Training with this model
|
49 |
+
|
50 |
+
To continue training with this model, use the `train` script corresponding to this environment:
|
51 |
+
```
|
52 |
+
python -m sf_examples.brax.train_brax --algo=APPO --env=ant --train_dir=./train_dir --experiment=sample_factory_brax_ant --restart_behavior=resume --train_for_env_steps=10000000000
|
53 |
+
```
|
54 |
+
|
55 |
+
Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
|
56 |
+
|
checkpoint_p0/best_000014838_97320960_reward_12530.994.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdea51de26ab48b950efd7b8a5916f05c52104b15e694c0610cb373939a1eb2b
|
3 |
+
size 788471
|
checkpoint_p0/checkpoint_000015058_98762752.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20c1a00a5eaaa28aeb04b33a1d3f45724f3103c23b5cbe03ed36e2c51e1c2878
|
3 |
+
size 788847
|
checkpoint_p0/checkpoint_000015258_100073472.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8de2e2737c38ff9e7c846f2432fca3ba08ed00db926a3ac5b32eed1812a86112
|
3 |
+
size 788847
|
config.json
ADDED
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"help": false,
|
3 |
+
"algo": "APPO",
|
4 |
+
"env": "ant",
|
5 |
+
"experiment": "00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5",
|
6 |
+
"train_dir": "./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm",
|
7 |
+
"restart_behavior": "resume",
|
8 |
+
"device": "gpu",
|
9 |
+
"seed": 2322090,
|
10 |
+
"num_policies": 1,
|
11 |
+
"async_rl": false,
|
12 |
+
"serial_mode": true,
|
13 |
+
"batched_sampling": true,
|
14 |
+
"num_batches_to_accumulate": 2,
|
15 |
+
"worker_num_splits": 1,
|
16 |
+
"policy_workers_per_policy": 1,
|
17 |
+
"max_policy_lag": 1000,
|
18 |
+
"num_workers": 1,
|
19 |
+
"num_envs_per_worker": 1,
|
20 |
+
"batch_size": 32768,
|
21 |
+
"num_batches_per_epoch": 2,
|
22 |
+
"num_epochs": 5,
|
23 |
+
"rollout": 32,
|
24 |
+
"recurrence": 1,
|
25 |
+
"shuffle_minibatches": false,
|
26 |
+
"gamma": 0.99,
|
27 |
+
"reward_scale": 0.01,
|
28 |
+
"reward_clip": 1000.0,
|
29 |
+
"value_bootstrap": true,
|
30 |
+
"normalize_returns": true,
|
31 |
+
"exploration_loss_coeff": 0.0,
|
32 |
+
"value_loss_coeff": 2.0,
|
33 |
+
"kl_loss_coeff": 0.0,
|
34 |
+
"exploration_loss": "entropy",
|
35 |
+
"gae_lambda": 0.95,
|
36 |
+
"ppo_clip_ratio": 0.2,
|
37 |
+
"ppo_clip_value": 1.0,
|
38 |
+
"with_vtrace": false,
|
39 |
+
"vtrace_rho": 1.0,
|
40 |
+
"vtrace_c": 1.0,
|
41 |
+
"optimizer": "adam",
|
42 |
+
"adam_eps": 1e-06,
|
43 |
+
"adam_beta1": 0.9,
|
44 |
+
"adam_beta2": 0.999,
|
45 |
+
"max_grad_norm": 1.0,
|
46 |
+
"learning_rate": 0.0003,
|
47 |
+
"lr_schedule": "kl_adaptive_epoch",
|
48 |
+
"lr_schedule_kl_threshold": 0.008,
|
49 |
+
"lr_adaptive_min": 1e-06,
|
50 |
+
"lr_adaptive_max": 0.002,
|
51 |
+
"obs_subtract_mean": 0.0,
|
52 |
+
"obs_scale": 1.0,
|
53 |
+
"normalize_input": true,
|
54 |
+
"normalize_input_keys": null,
|
55 |
+
"decorrelate_experience_max_seconds": 0,
|
56 |
+
"decorrelate_envs_on_one_worker": true,
|
57 |
+
"actor_worker_gpus": [
|
58 |
+
0
|
59 |
+
],
|
60 |
+
"set_workers_cpu_affinity": true,
|
61 |
+
"force_envs_single_thread": false,
|
62 |
+
"default_niceness": 0,
|
63 |
+
"log_to_file": true,
|
64 |
+
"experiment_summaries_interval": 3,
|
65 |
+
"flush_summaries_interval": 30,
|
66 |
+
"stats_avg": 100,
|
67 |
+
"summaries_use_frameskip": true,
|
68 |
+
"heartbeat_interval": 20,
|
69 |
+
"heartbeat_reporting_interval": 180,
|
70 |
+
"train_for_env_steps": 100000000,
|
71 |
+
"train_for_seconds": 10000000000,
|
72 |
+
"save_every_sec": 15,
|
73 |
+
"keep_checkpoints": 2,
|
74 |
+
"load_checkpoint_kind": "latest",
|
75 |
+
"save_milestones_sec": -1,
|
76 |
+
"save_best_every_sec": 5,
|
77 |
+
"save_best_metric": "reward",
|
78 |
+
"save_best_after": 5000000,
|
79 |
+
"benchmark": false,
|
80 |
+
"encoder_mlp_layers": [
|
81 |
+
256,
|
82 |
+
128,
|
83 |
+
64
|
84 |
+
],
|
85 |
+
"encoder_conv_architecture": "convnet_simple",
|
86 |
+
"encoder_conv_mlp_layers": [
|
87 |
+
512
|
88 |
+
],
|
89 |
+
"use_rnn": false,
|
90 |
+
"rnn_size": 512,
|
91 |
+
"rnn_type": "gru",
|
92 |
+
"rnn_num_layers": 1,
|
93 |
+
"decoder_mlp_layers": [],
|
94 |
+
"nonlinearity": "elu",
|
95 |
+
"policy_initialization": "torch_default",
|
96 |
+
"policy_init_gain": 1.0,
|
97 |
+
"actor_critic_share_weights": true,
|
98 |
+
"adaptive_stddev": false,
|
99 |
+
"continuous_tanh_scale": 0.0,
|
100 |
+
"initial_stddev": 1.0,
|
101 |
+
"use_env_info_cache": false,
|
102 |
+
"env_gpu_actions": true,
|
103 |
+
"env_gpu_observations": true,
|
104 |
+
"env_frameskip": 1,
|
105 |
+
"env_framestack": 1,
|
106 |
+
"pixel_format": "CHW",
|
107 |
+
"use_record_episode_statistics": false,
|
108 |
+
"with_wandb": true,
|
109 |
+
"wandb_user": null,
|
110 |
+
"wandb_project": "sample_factory",
|
111 |
+
"wandb_group": null,
|
112 |
+
"wandb_job_type": "SF",
|
113 |
+
"wandb_tags": [],
|
114 |
+
"with_pbt": false,
|
115 |
+
"pbt_mix_policies_in_one_env": true,
|
116 |
+
"pbt_period_env_steps": 5000000,
|
117 |
+
"pbt_start_mutation": 20000000,
|
118 |
+
"pbt_replace_fraction": 0.3,
|
119 |
+
"pbt_mutation_rate": 0.15,
|
120 |
+
"pbt_replace_reward_gap": 0.1,
|
121 |
+
"pbt_replace_reward_gap_absolute": 1e-06,
|
122 |
+
"pbt_optimize_gamma": false,
|
123 |
+
"pbt_target_objective": "true_objective",
|
124 |
+
"pbt_perturb_min": 1.1,
|
125 |
+
"pbt_perturb_max": 1.5,
|
126 |
+
"env_agents": 2048,
|
127 |
+
"clamp_actions": false,
|
128 |
+
"clamp_rew_obs": false,
|
129 |
+
"command_line": "--actor_worker_gpus 0 --wandb_project=sample_factory --with_wandb=True --seed=2322090 --env=ant --use_rnn=False --num_epochs=5 --experiment=00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5 --train_dir=./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm",
|
130 |
+
"cli_args": {
|
131 |
+
"env": "ant",
|
132 |
+
"experiment": "00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5",
|
133 |
+
"train_dir": "./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm",
|
134 |
+
"seed": 2322090,
|
135 |
+
"num_epochs": 5,
|
136 |
+
"actor_worker_gpus": [
|
137 |
+
0
|
138 |
+
],
|
139 |
+
"use_rnn": false,
|
140 |
+
"with_wandb": true,
|
141 |
+
"wandb_project": "sample_factory"
|
142 |
+
},
|
143 |
+
"git_hash": "6aa87f2d416b9fad874b299d864a522c887c238a",
|
144 |
+
"git_repo_name": "git@github.com:alex-petrenko/sample-factory.git",
|
145 |
+
"train_script": "sf_examples.brax.train_brax",
|
146 |
+
"wandb_unique_id": "00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5_20230111_185633_673782"
|
147 |
+
}
|
git.diff
ADDED
File without changes
|
replay.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ac1b8101df4100c1291f8ec18054be94feae73f23047ffc3b2cbd0aaa2dc3ba
|
3 |
+
size 1424460
|
sf_log.txt
ADDED
@@ -0,0 +1,326 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[2023-01-11 18:56:45,735][451905] Saving configuration to ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/config.json...
|
2 |
+
[2023-01-11 18:56:45,916][451905] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
3 |
+
[2023-01-11 18:56:45,917][451905] Rollout worker 0 uses device cuda:0
|
4 |
+
[2023-01-11 18:56:45,918][451905] In synchronous mode, we only accumulate one batch. Setting num_batches_to_accumulate to 1
|
5 |
+
[2023-01-11 18:56:45,958][451905] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
6 |
+
[2023-01-11 18:56:45,959][451905] InferenceWorker_p0-w0: min num requests: 1
|
7 |
+
[2023-01-11 18:56:45,960][451905] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
8 |
+
[2023-01-11 18:56:45,961][451905] WARNING! It is generally recommended to enable Fixed KL loss (https://arxiv.org/pdf/1707.06347.pdf) for continuous action tasks to avoid potential numerical issues. I.e. set --kl_loss_coeff=0.1
|
9 |
+
[2023-01-11 18:56:45,962][451905] Setting fixed seed 2322090
|
10 |
+
[2023-01-11 18:56:45,962][451905] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
11 |
+
[2023-01-11 18:56:45,963][451905] Initializing actor-critic model on device cuda:0
|
12 |
+
[2023-01-11 18:56:45,963][451905] RunningMeanStd input shape: (87,)
|
13 |
+
[2023-01-11 18:56:45,964][451905] RunningMeanStd input shape: (1,)
|
14 |
+
[2023-01-11 18:56:46,032][451905] Created Actor Critic model with architecture:
|
15 |
+
[2023-01-11 18:56:46,033][451905] ActorCriticSharedWeights(
|
16 |
+
(obs_normalizer): ObservationNormalizer(
|
17 |
+
(running_mean_std): RunningMeanStdDictInPlace(
|
18 |
+
(running_mean_std): ModuleDict(
|
19 |
+
(obs): RunningMeanStdInPlace()
|
20 |
+
)
|
21 |
+
)
|
22 |
+
)
|
23 |
+
(returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
|
24 |
+
(encoder): MultiInputEncoder(
|
25 |
+
(encoders): ModuleDict(
|
26 |
+
(obs): MlpEncoder(
|
27 |
+
(mlp_head): RecursiveScriptModule(
|
28 |
+
original_name=Sequential
|
29 |
+
(0): RecursiveScriptModule(original_name=Linear)
|
30 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
31 |
+
(2): RecursiveScriptModule(original_name=Linear)
|
32 |
+
(3): RecursiveScriptModule(original_name=ELU)
|
33 |
+
(4): RecursiveScriptModule(original_name=Linear)
|
34 |
+
(5): RecursiveScriptModule(original_name=ELU)
|
35 |
+
)
|
36 |
+
)
|
37 |
+
)
|
38 |
+
)
|
39 |
+
(core): ModelCoreIdentity()
|
40 |
+
(decoder): MlpDecoder(
|
41 |
+
(mlp): Identity()
|
42 |
+
)
|
43 |
+
(critic_linear): Linear(in_features=64, out_features=1, bias=True)
|
44 |
+
(action_parameterization): ActionParameterizationContinuousNonAdaptiveStddev(
|
45 |
+
(distribution_linear): Linear(in_features=64, out_features=8, bias=True)
|
46 |
+
)
|
47 |
+
)
|
48 |
+
[2023-01-11 18:56:46,035][451905] Using optimizer <class 'torch.optim.adam.Adam'>
|
49 |
+
[2023-01-11 18:56:46,039][451905] No checkpoints found
|
50 |
+
[2023-01-11 18:56:46,039][451905] Did not load from checkpoint, starting from scratch!
|
51 |
+
[2023-01-11 18:56:46,040][451905] Initialized policy 0 weights for model version 0
|
52 |
+
[2023-01-11 18:56:46,040][451905] LearnerWorker_p0 finished initialization!
|
53 |
+
[2023-01-11 18:56:46,042][451905] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
54 |
+
[2023-01-11 18:56:46,053][451905] Inference worker 0-0 is ready!
|
55 |
+
[2023-01-11 18:56:46,054][451905] All inference workers are ready! Signal rollout workers to start!
|
56 |
+
[2023-01-11 18:56:46,054][451905] EnvRunner 0-0 uses policy 0
|
57 |
+
[2023-01-11 18:56:47,539][451905] Resetting env <VectorGymWrapper instance> with 2048 parallel agents...
|
58 |
+
[2023-01-11 18:56:53,140][451905] reset() done, obs.shape=torch.Size([2048, 87])!
|
59 |
+
[2023-01-11 18:56:53,149][451905] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
60 |
+
[2023-01-11 18:57:02,209][451905] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 226.0. Samples: 2048. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
61 |
+
[2023-01-11 18:57:10,799][451905] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 232.1. Samples: 4096. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
62 |
+
[2023-01-11 18:57:10,804][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000000000_0.pth...
|
63 |
+
[2023-01-11 18:57:10,813][451905] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 231.9. Samples: 4096. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
64 |
+
[2023-01-11 18:57:10,821][451905] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 347.7. Samples: 6144. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
65 |
+
[2023-01-11 18:57:10,826][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000000000_0.pth...
|
66 |
+
[2023-01-11 18:57:10,833][451905] Heartbeat connected on Batcher_0
|
67 |
+
[2023-01-11 18:57:10,833][451905] Heartbeat connected on LearnerWorker_p0
|
68 |
+
[2023-01-11 18:57:10,834][451905] Heartbeat connected on InferenceWorker_p0-w0
|
69 |
+
[2023-01-11 18:57:10,834][451905] Heartbeat connected on RolloutWorker_w0
|
70 |
+
[2023-01-11 18:57:15,256][451905] Fps is (10 sec: 191771.0, 60 sec: 38537.5, 300 sec: 38537.5). Total num frames: 851968. Throughput: 0: 10838.7. Samples: 239616. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
|
71 |
+
[2023-01-11 18:57:15,257][451905] Avg episode reward: [(0, '-548.169')]
|
72 |
+
[2023-01-11 18:57:20,255][451905] Fps is (10 sec: 243129.2, 60 sec: 84619.1, 300 sec: 84619.1). Total num frames: 2293760. Throughput: 0: 73437.3. Samples: 1990656. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
73 |
+
[2023-01-11 18:57:20,256][451905] Avg episode reward: [(0, '-1154.421')]
|
74 |
+
[2023-01-11 18:57:25,255][451905] Fps is (10 sec: 294955.1, 60 sec: 118391.7, 300 sec: 118391.7). Total num frames: 3801088. Throughput: 0: 116924.6. Samples: 3753984. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
75 |
+
[2023-01-11 18:57:25,256][451905] Avg episode reward: [(0, '44.684')]
|
76 |
+
[2023-01-11 18:57:25,266][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000000580_3801088.pth...
|
77 |
+
[2023-01-11 18:57:30,257][451905] Fps is (10 sec: 294876.7, 60 sec: 141286.8, 300 sec: 141286.8). Total num frames: 5242880. Throughput: 0: 124895.3. Samples: 4634624. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
|
78 |
+
[2023-01-11 18:57:30,258][451905] Avg episode reward: [(0, '737.538')]
|
79 |
+
[2023-01-11 18:57:30,260][451905] Saving new best policy, reward=737.538!
|
80 |
+
[2023-01-11 18:57:35,255][451905] Fps is (10 sec: 288356.1, 60 sec: 158757.8, 300 sec: 158757.8). Total num frames: 6684672. Throughput: 0: 151559.2. Samples: 6381568. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
81 |
+
[2023-01-11 18:57:35,255][451905] Avg episode reward: [(0, '1378.020')]
|
82 |
+
[2023-01-11 18:57:35,261][451905] Saving new best policy, reward=1378.020!
|
83 |
+
[2023-01-11 18:57:40,255][451905] Fps is (10 sec: 288413.4, 60 sec: 172513.8, 300 sec: 172513.8). Total num frames: 8126464. Throughput: 0: 213380.6. Samples: 8120320. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
84 |
+
[2023-01-11 18:57:40,256][451905] Avg episode reward: [(0, '2120.758')]
|
85 |
+
[2023-01-11 18:57:40,266][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000001240_8126464.pth...
|
86 |
+
[2023-01-11 18:57:40,284][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000000000_0.pth
|
87 |
+
[2023-01-11 18:57:40,286][451905] Saving new best policy, reward=2120.758!
|
88 |
+
[2023-01-11 18:57:45,256][451905] Fps is (10 sec: 288325.2, 60 sec: 183626.2, 300 sec: 183626.2). Total num frames: 9568256. Throughput: 0: 260450.5. Samples: 8978432. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
|
89 |
+
[2023-01-11 18:57:45,257][451905] Avg episode reward: [(0, '2764.891')]
|
90 |
+
[2023-01-11 18:57:45,261][451905] Saving new best policy, reward=2764.891!
|
91 |
+
[2023-01-11 18:57:50,256][451905] Fps is (10 sec: 288317.5, 60 sec: 192794.8, 300 sec: 192794.8). Total num frames: 11010048. Throughput: 0: 271299.6. Samples: 10704896. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
92 |
+
[2023-01-11 18:57:50,257][451905] Avg episode reward: [(0, '3270.653')]
|
93 |
+
[2023-01-11 18:57:50,259][451905] Saving new best policy, reward=3270.653!
|
94 |
+
[2023-01-11 18:57:55,273][451905] Fps is (10 sec: 294393.9, 60 sec: 235889.2, 300 sec: 201487.4). Total num frames: 12517376. Throughput: 0: 279978.7. Samples: 12451840. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
95 |
+
[2023-01-11 18:57:55,274][451905] Avg episode reward: [(0, '3854.397')]
|
96 |
+
[2023-01-11 18:57:55,281][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000001910_12517376.pth...
|
97 |
+
[2023-01-11 18:57:55,297][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000000580_3801088.pth
|
98 |
+
[2023-01-11 18:57:55,299][451905] Saving new best policy, reward=3854.397!
|
99 |
+
[2023-01-11 18:58:00,311][451905] Fps is (10 sec: 293300.4, 60 sec: 281933.6, 300 sec: 207841.6). Total num frames: 13959168. Throughput: 0: 289960.7. Samples: 13303808. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
100 |
+
[2023-01-11 18:58:00,312][451905] Avg episode reward: [(0, '4070.903')]
|
101 |
+
[2023-01-11 18:58:00,314][451905] Saving new best policy, reward=4070.903!
|
102 |
+
[2023-01-11 18:58:05,256][451905] Fps is (10 sec: 269160.0, 60 sec: 279271.9, 300 sec: 210856.2). Total num frames: 15204352. Throughput: 0: 286578.4. Samples: 14886912. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
|
103 |
+
[2023-01-11 18:58:05,257][451905] Avg episode reward: [(0, '4564.800')]
|
104 |
+
[2023-01-11 18:58:05,259][451905] Saving new best policy, reward=4564.800!
|
105 |
+
[2023-01-11 18:58:10,257][451905] Fps is (10 sec: 270162.6, 60 sec: 280069.5, 300 sec: 215880.0). Total num frames: 16646144. Throughput: 0: 286432.4. Samples: 16644096. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
|
106 |
+
[2023-01-11 18:58:10,258][451905] Avg episode reward: [(0, '5311.048')]
|
107 |
+
[2023-01-11 18:58:10,268][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000002540_16646144.pth...
|
108 |
+
[2023-01-11 18:58:10,292][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000001240_8126464.pth
|
109 |
+
[2023-01-11 18:58:10,294][451905] Saving new best policy, reward=5311.048!
|
110 |
+
[2023-01-11 18:58:15,298][451905] Fps is (10 sec: 293691.4, 60 sec: 288157.9, 300 sec: 220981.6). Total num frames: 18153472. Throughput: 0: 285594.1. Samples: 17498112. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
111 |
+
[2023-01-11 18:58:15,299][451905] Avg episode reward: [(0, '5501.038')]
|
112 |
+
[2023-01-11 18:58:15,301][451905] Saving new best policy, reward=5501.038!
|
113 |
+
[2023-01-11 18:58:20,257][451905] Fps is (10 sec: 294904.3, 60 sec: 288350.3, 300 sec: 224952.2). Total num frames: 19595264. Throughput: 0: 286340.2. Samples: 19267584. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
114 |
+
[2023-01-11 18:58:20,258][451905] Avg episode reward: [(0, '6154.250')]
|
115 |
+
[2023-01-11 18:58:20,260][451905] Saving new best policy, reward=6154.250!
|
116 |
+
[2023-01-11 18:58:25,256][451905] Fps is (10 sec: 289554.9, 60 sec: 287257.1, 300 sec: 228395.8). Total num frames: 21037056. Throughput: 0: 286253.8. Samples: 21002240. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
|
117 |
+
[2023-01-11 18:58:25,257][451905] Avg episode reward: [(0, '6654.922')]
|
118 |
+
[2023-01-11 18:58:25,264][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000003210_21037056.pth...
|
119 |
+
[2023-01-11 18:58:25,281][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000001910_12517376.pth
|
120 |
+
[2023-01-11 18:58:25,283][451905] Saving new best policy, reward=6654.922!
|
121 |
+
[2023-01-11 18:58:30,257][451905] Fps is (10 sec: 288365.8, 60 sec: 287265.0, 300 sec: 231482.2). Total num frames: 22478848. Throughput: 0: 286394.7. Samples: 21866496. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
122 |
+
[2023-01-11 18:58:30,257][451905] Avg episode reward: [(0, '6893.634')]
|
123 |
+
[2023-01-11 18:58:30,263][451905] Saving new best policy, reward=6893.634!
|
124 |
+
[2023-01-11 18:58:35,257][451905] Fps is (10 sec: 288352.7, 60 sec: 287256.5, 300 sec: 234267.8). Total num frames: 23920640. Throughput: 0: 286716.7. Samples: 23607296. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
125 |
+
[2023-01-11 18:58:35,257][451905] Avg episode reward: [(0, '7570.934')]
|
126 |
+
[2023-01-11 18:58:35,259][451905] Saving new best policy, reward=7570.934!
|
127 |
+
[2023-01-11 18:58:40,305][451905] Fps is (10 sec: 293488.6, 60 sec: 288115.2, 300 sec: 237296.8). Total num frames: 25427968. Throughput: 0: 286698.4. Samples: 25362432. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
128 |
+
[2023-01-11 18:58:40,306][451905] Avg episode reward: [(0, '7604.016')]
|
129 |
+
[2023-01-11 18:58:40,313][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000003880_25427968.pth...
|
130 |
+
[2023-01-11 18:58:40,335][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000002540_16646144.pth
|
131 |
+
[2023-01-11 18:58:40,337][451905] Saving new best policy, reward=7604.016!
|
132 |
+
[2023-01-11 18:58:45,304][451905] Fps is (10 sec: 293524.2, 60 sec: 288127.3, 300 sec: 239576.2). Total num frames: 26869760. Throughput: 0: 286947.6. Samples: 26214400. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
133 |
+
[2023-01-11 18:58:45,305][451905] Avg episode reward: [(0, '7992.428')]
|
134 |
+
[2023-01-11 18:58:45,307][451905] Saving new best policy, reward=7992.428!
|
135 |
+
[2023-01-11 18:58:50,311][451905] Fps is (10 sec: 288192.1, 60 sec: 288094.3, 300 sec: 241643.3). Total num frames: 28311552. Throughput: 0: 290188.9. Samples: 27961344. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
136 |
+
[2023-01-11 18:58:50,312][451905] Avg episode reward: [(0, '8068.034')]
|
137 |
+
[2023-01-11 18:58:50,314][451905] Saving new best policy, reward=8068.034!
|
138 |
+
[2023-01-11 18:58:55,254][451905] Fps is (10 sec: 289807.7, 60 sec: 287359.4, 300 sec: 243669.4). Total num frames: 29753344. Throughput: 0: 290106.7. Samples: 29698048. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
139 |
+
[2023-01-11 18:58:55,254][451905] Avg episode reward: [(0, '8315.286')]
|
140 |
+
[2023-01-11 18:58:55,265][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000004540_29753344.pth...
|
141 |
+
[2023-01-11 18:58:55,288][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000003210_21037056.pth
|
142 |
+
[2023-01-11 18:58:55,290][451905] Saving new best policy, reward=8315.286!
|
143 |
+
[2023-01-11 18:59:00,277][451905] Fps is (10 sec: 289339.8, 60 sec: 287428.4, 300 sec: 245382.4). Total num frames: 31195136. Throughput: 0: 290311.6. Samples: 30556160. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
144 |
+
[2023-01-11 18:59:00,278][451905] Avg episode reward: [(0, '8176.998')]
|
145 |
+
[2023-01-11 18:59:05,255][451905] Fps is (10 sec: 288321.9, 60 sec: 290547.9, 300 sec: 247049.9). Total num frames: 32636928. Throughput: 0: 289827.1. Samples: 32309248. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
146 |
+
[2023-01-11 18:59:05,256][451905] Avg episode reward: [(0, '8691.556')]
|
147 |
+
[2023-01-11 18:59:05,261][451905] Saving new best policy, reward=8691.556!
|
148 |
+
[2023-01-11 18:59:10,255][451905] Fps is (10 sec: 288987.4, 60 sec: 290549.8, 300 sec: 248555.8). Total num frames: 34078720. Throughput: 0: 289957.8. Samples: 34050048. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
149 |
+
[2023-01-11 18:59:10,256][451905] Avg episode reward: [(0, '9060.014')]
|
150 |
+
[2023-01-11 18:59:10,266][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000005200_34078720.pth...
|
151 |
+
[2023-01-11 18:59:10,289][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000003880_25427968.pth
|
152 |
+
[2023-01-11 18:59:10,291][451905] Saving new best policy, reward=9060.014!
|
153 |
+
[2023-01-11 18:59:15,256][451905] Fps is (10 sec: 288344.1, 60 sec: 289653.9, 300 sec: 249955.9). Total num frames: 35520512. Throughput: 0: 289822.3. Samples: 34908160. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
154 |
+
[2023-01-11 18:59:15,256][451905] Avg episode reward: [(0, '9002.059')]
|
155 |
+
[2023-01-11 18:59:16,089][451905] Early stopping after 2 epochs (4 sgd steps), loss delta 0.0000003
|
156 |
+
[2023-01-11 18:59:20,257][451905] Fps is (10 sec: 288305.1, 60 sec: 289449.8, 300 sec: 251258.4). Total num frames: 36962304. Throughput: 0: 289901.7. Samples: 36653056. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
157 |
+
[2023-01-11 18:59:20,258][451905] Avg episode reward: [(0, '9044.166')]
|
158 |
+
[2023-01-11 18:59:25,255][451905] Fps is (10 sec: 294934.7, 60 sec: 290550.4, 300 sec: 252912.7). Total num frames: 38469632. Throughput: 0: 290595.6. Samples: 38424576. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
159 |
+
[2023-01-11 18:59:25,255][451905] Avg episode reward: [(0, '9184.393')]
|
160 |
+
[2023-01-11 18:59:25,267][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000005864_38469632.pth...
|
161 |
+
[2023-01-11 18:59:25,283][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000004540_29753344.pth
|
162 |
+
[2023-01-11 18:59:25,286][451905] Saving new best policy, reward=9184.393!
|
163 |
+
[2023-01-11 18:59:30,254][451905] Fps is (10 sec: 294993.9, 60 sec: 290554.3, 300 sec: 254041.4). Total num frames: 39911424. Throughput: 0: 290907.9. Samples: 39290880. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
164 |
+
[2023-01-11 18:59:30,255][451905] Avg episode reward: [(0, '9521.790')]
|
165 |
+
[2023-01-11 18:59:30,260][451905] Saving new best policy, reward=9521.790!
|
166 |
+
[2023-01-11 18:59:35,256][451905] Fps is (10 sec: 288339.3, 60 sec: 290548.1, 300 sec: 255098.2). Total num frames: 41353216. Throughput: 0: 291540.0. Samples: 41064448. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
|
167 |
+
[2023-01-11 18:59:35,256][451905] Avg episode reward: [(0, '9662.545')]
|
168 |
+
[2023-01-11 18:59:35,260][451905] Saving new best policy, reward=9662.545!
|
169 |
+
[2023-01-11 18:59:36,895][451905] Early stopping after 2 epochs (4 sgd steps), loss delta 0.0000000
|
170 |
+
[2023-01-11 18:59:40,254][451905] Fps is (10 sec: 294911.6, 60 sec: 290789.3, 300 sec: 256487.2). Total num frames: 42860544. Throughput: 0: 291631.3. Samples: 42821632. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
|
171 |
+
[2023-01-11 18:59:40,255][451905] Avg episode reward: [(0, '10113.781')]
|
172 |
+
[2023-01-11 18:59:40,265][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000006528_42860544.pth...
|
173 |
+
[2023-01-11 18:59:40,626][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000005200_34078720.pth
|
174 |
+
[2023-01-11 18:59:40,725][451905] Saving new best policy, reward=10113.781!
|
175 |
+
[2023-01-11 18:59:45,302][451905] Fps is (10 sec: 273976.5, 60 sec: 287274.9, 300 sec: 256199.9). Total num frames: 44105728. Throughput: 0: 286379.5. Samples: 43450368. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
|
176 |
+
[2023-01-11 18:59:45,303][451905] Avg episode reward: [(0, '9794.349')]
|
177 |
+
[2023-01-11 18:59:50,256][451905] Fps is (10 sec: 268652.8, 60 sec: 287529.4, 300 sec: 257174.2). Total num frames: 45547520. Throughput: 0: 286895.6. Samples: 45219840. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
|
178 |
+
[2023-01-11 18:59:50,257][451905] Avg episode reward: [(0, '10065.353')]
|
179 |
+
[2023-01-11 18:59:55,299][451905] Fps is (10 sec: 295009.9, 60 sec: 288142.9, 300 sec: 258329.9). Total num frames: 47054848. Throughput: 0: 287262.5. Samples: 46989312. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
|
180 |
+
[2023-01-11 18:59:55,300][451905] Avg episode reward: [(0, '9859.390')]
|
181 |
+
[2023-01-11 18:59:55,307][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000007168_47054848.pth...
|
182 |
+
[2023-01-11 18:59:55,604][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000005864_38469632.pth
|
183 |
+
[2023-01-11 19:00:00,257][451905] Fps is (10 sec: 281782.9, 60 sec: 286270.4, 300 sec: 258489.5). Total num frames: 48365568. Throughput: 0: 285255.5. Samples: 47745024. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
|
184 |
+
[2023-01-11 19:00:00,258][451905] Avg episode reward: [(0, '10446.865')]
|
185 |
+
[2023-01-11 19:00:00,265][451905] Saving new best policy, reward=10441.701!
|
186 |
+
[2023-01-11 19:00:05,257][451905] Fps is (10 sec: 269818.3, 60 sec: 285071.8, 300 sec: 258925.4). Total num frames: 49741824. Throughput: 0: 283580.0. Samples: 49414144. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
187 |
+
[2023-01-11 19:00:05,258][451905] Avg episode reward: [(0, '10311.739')]
|
188 |
+
[2023-01-11 19:00:10,288][451905] Fps is (10 sec: 287452.8, 60 sec: 286016.4, 300 sec: 259963.3). Total num frames: 51249152. Throughput: 0: 283322.9. Samples: 51183616. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
189 |
+
[2023-01-11 19:00:10,289][451905] Avg episode reward: [(0, '10161.780')]
|
190 |
+
[2023-01-11 19:00:10,297][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000007808_51249152.pth...
|
191 |
+
[2023-01-11 19:00:10,609][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000006528_42860544.pth
|
192 |
+
[2023-01-11 19:00:15,254][451905] Fps is (10 sec: 281893.6, 60 sec: 283996.8, 300 sec: 260061.5). Total num frames: 52559872. Throughput: 0: 281033.7. Samples: 51937280. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
193 |
+
[2023-01-11 19:00:15,255][451905] Avg episode reward: [(0, '10633.747')]
|
194 |
+
[2023-01-11 19:00:15,260][451905] Saving new best policy, reward=10633.747!
|
195 |
+
[2023-01-11 19:00:20,254][451905] Fps is (10 sec: 269625.8, 60 sec: 282912.3, 300 sec: 260428.3). Total num frames: 53936128. Throughput: 0: 278947.0. Samples: 53616640. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
196 |
+
[2023-01-11 19:00:20,255][451905] Avg episode reward: [(0, '10862.651')]
|
197 |
+
[2023-01-11 19:00:20,260][451905] Saving new best policy, reward=10862.651!
|
198 |
+
[2023-01-11 19:00:25,258][451905] Fps is (10 sec: 275143.2, 60 sec: 280698.0, 300 sec: 260772.8). Total num frames: 55312384. Throughput: 0: 276913.6. Samples: 55283712. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
199 |
+
[2023-01-11 19:00:25,259][451905] Avg episode reward: [(0, '10839.951')]
|
200 |
+
[2023-01-11 19:00:25,271][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000008428_55312384.pth...
|
201 |
+
[2023-01-11 19:00:25,297][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000007168_47054848.pth
|
202 |
+
[2023-01-11 19:00:30,257][451905] Fps is (10 sec: 281733.7, 60 sec: 280702.9, 300 sec: 261409.8). Total num frames: 56754176. Throughput: 0: 282500.2. Samples: 56150016. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
203 |
+
[2023-01-11 19:00:30,257][451905] Avg episode reward: [(0, '11148.982')]
|
204 |
+
[2023-01-11 19:00:30,263][451905] Saving new best policy, reward=11148.982!
|
205 |
+
[2023-01-11 19:00:35,256][451905] Fps is (10 sec: 288406.0, 60 sec: 280708.8, 300 sec: 262016.7). Total num frames: 58195968. Throughput: 0: 281075.5. Samples: 57868288. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
|
206 |
+
[2023-01-11 19:00:35,257][451905] Avg episode reward: [(0, '10900.463')]
|
207 |
+
[2023-01-11 19:00:40,256][451905] Fps is (10 sec: 288368.4, 60 sec: 279612.4, 300 sec: 262596.8). Total num frames: 59637760. Throughput: 0: 280796.1. Samples: 59613184. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
|
208 |
+
[2023-01-11 19:00:40,257][451905] Avg episode reward: [(0, '10984.909')]
|
209 |
+
[2023-01-11 19:00:40,268][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000009088_59637760.pth...
|
210 |
+
[2023-01-11 19:00:40,292][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000007808_51249152.pth
|
211 |
+
[2023-01-11 19:00:45,256][451905] Fps is (10 sec: 288366.8, 60 sec: 283114.2, 300 sec: 263152.0). Total num frames: 61079552. Throughput: 0: 283175.7. Samples: 60487680. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
|
212 |
+
[2023-01-11 19:00:45,257][451905] Avg episode reward: [(0, '10910.111')]
|
213 |
+
[2023-01-11 19:00:50,257][451905] Fps is (10 sec: 288340.4, 60 sec: 282894.0, 300 sec: 263682.6). Total num frames: 62521344. Throughput: 0: 284082.9. Samples: 62197760. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
214 |
+
[2023-01-11 19:00:50,258][451905] Avg episode reward: [(0, '11054.682')]
|
215 |
+
[2023-01-11 19:00:55,254][451905] Fps is (10 sec: 288421.9, 60 sec: 282015.9, 300 sec: 264195.5). Total num frames: 63963136. Throughput: 0: 283706.8. Samples: 63940608. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
216 |
+
[2023-01-11 19:00:55,255][451905] Avg episode reward: [(0, '11399.716')]
|
217 |
+
[2023-01-11 19:00:55,264][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000009748_63963136.pth...
|
218 |
+
[2023-01-11 19:00:55,281][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000008428_55312384.pth
|
219 |
+
[2023-01-11 19:00:55,283][451905] Saving new best policy, reward=11399.716!
|
220 |
+
[2023-01-11 19:01:00,283][451905] Fps is (10 sec: 294139.5, 60 sec: 284957.5, 300 sec: 264918.3). Total num frames: 65470464. Throughput: 0: 286125.9. Samples: 64821248. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
221 |
+
[2023-01-11 19:01:00,284][451905] Avg episode reward: [(0, '11540.237')]
|
222 |
+
[2023-01-11 19:01:00,286][451905] Saving new best policy, reward=11540.237!
|
223 |
+
[2023-01-11 19:01:05,257][451905] Fps is (10 sec: 294816.1, 60 sec: 286174.4, 300 sec: 265410.5). Total num frames: 66912256. Throughput: 0: 288020.1. Samples: 66578432. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
224 |
+
[2023-01-11 19:01:05,258][451905] Avg episode reward: [(0, '11359.174')]
|
225 |
+
[2023-01-11 19:01:10,256][451905] Fps is (10 sec: 289131.9, 60 sec: 285234.3, 300 sec: 265857.5). Total num frames: 68354048. Throughput: 0: 289734.5. Samples: 68321280. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
226 |
+
[2023-01-11 19:01:10,257][451905] Avg episode reward: [(0, '11490.578')]
|
227 |
+
[2023-01-11 19:01:10,268][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000010418_68354048.pth...
|
228 |
+
[2023-01-11 19:01:10,285][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000009088_59637760.pth
|
229 |
+
[2023-01-11 19:01:15,256][451905] Fps is (10 sec: 288397.2, 60 sec: 287258.0, 300 sec: 266287.3). Total num frames: 69795840. Throughput: 0: 289956.3. Samples: 69197824. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
230 |
+
[2023-01-11 19:01:15,256][451905] Avg episode reward: [(0, '11371.602')]
|
231 |
+
[2023-01-11 19:01:20,286][451905] Fps is (10 sec: 294033.6, 60 sec: 289295.5, 300 sec: 266915.4). Total num frames: 71303168. Throughput: 0: 290714.1. Samples: 70959104. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
232 |
+
[2023-01-11 19:01:20,287][451905] Avg episode reward: [(0, '11555.144')]
|
233 |
+
[2023-01-11 19:01:20,289][451905] Saving new best policy, reward=11555.144!
|
234 |
+
[2023-01-11 19:01:25,284][451905] Fps is (10 sec: 294087.5, 60 sec: 290418.0, 300 sec: 267311.8). Total num frames: 72744960. Throughput: 0: 290182.9. Samples: 72679424. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
235 |
+
[2023-01-11 19:01:25,284][451905] Avg episode reward: [(0, '11422.452')]
|
236 |
+
[2023-01-11 19:01:25,292][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000011088_72744960.pth...
|
237 |
+
[2023-01-11 19:01:25,308][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000009748_63963136.pth
|
238 |
+
[2023-01-11 19:01:30,307][451905] Fps is (10 sec: 287771.5, 60 sec: 290300.8, 300 sec: 267669.5). Total num frames: 74186752. Throughput: 0: 289626.0. Samples: 73535488. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
239 |
+
[2023-01-11 19:01:30,307][451905] Avg episode reward: [(0, '11773.974')]
|
240 |
+
[2023-01-11 19:01:30,310][451905] Saving new best policy, reward=11773.974!
|
241 |
+
[2023-01-11 19:01:35,256][451905] Fps is (10 sec: 282588.7, 60 sec: 289452.0, 300 sec: 267851.8). Total num frames: 75563008. Throughput: 0: 290365.8. Samples: 75264000. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
242 |
+
[2023-01-11 19:01:35,257][451905] Avg episode reward: [(0, '11651.489')]
|
243 |
+
[2023-01-11 19:01:40,257][451905] Fps is (10 sec: 283208.3, 60 sec: 289446.6, 300 sec: 268208.0). Total num frames: 77004800. Throughput: 0: 290294.8. Samples: 77004800. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
244 |
+
[2023-01-11 19:01:40,258][451905] Avg episode reward: [(0, '11539.934')]
|
245 |
+
[2023-01-11 19:01:40,266][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000011738_77004800.pth...
|
246 |
+
[2023-01-11 19:01:40,284][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000010418_68354048.pth
|
247 |
+
[2023-01-11 19:01:45,256][451905] Fps is (10 sec: 288367.2, 60 sec: 289452.1, 300 sec: 268554.1). Total num frames: 78446592. Throughput: 0: 289854.2. Samples: 77856768. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
248 |
+
[2023-01-11 19:01:45,256][451905] Avg episode reward: [(0, '11773.561')]
|
249 |
+
[2023-01-11 19:01:50,310][451905] Fps is (10 sec: 293362.0, 60 sec: 290286.2, 300 sec: 277520.3). Total num frames: 79953920. Throughput: 0: 288884.3. Samples: 79593472. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
250 |
+
[2023-01-11 19:01:50,311][451905] Avg episode reward: [(0, '11745.324')]
|
251 |
+
[2023-01-11 19:01:55,256][451905] Fps is (10 sec: 288354.0, 60 sec: 289440.8, 300 sec: 285913.7). Total num frames: 81330176. Throughput: 0: 289089.3. Samples: 81330176. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
252 |
+
[2023-01-11 19:01:55,257][451905] Avg episode reward: [(0, '11664.784')]
|
253 |
+
[2023-01-11 19:01:55,263][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000012398_81330176.pth...
|
254 |
+
[2023-01-11 19:01:55,281][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000011088_72744960.pth
|
255 |
+
[2023-01-11 19:02:00,257][451905] Fps is (10 sec: 289899.3, 60 sec: 289577.9, 300 sec: 286195.9). Total num frames: 82837504. Throughput: 0: 288761.8. Samples: 82192384. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
|
256 |
+
[2023-01-11 19:02:00,257][451905] Avg episode reward: [(0, '11688.261')]
|
257 |
+
[2023-01-11 19:02:05,257][451905] Fps is (10 sec: 301442.7, 60 sec: 290545.1, 300 sec: 286462.8). Total num frames: 84344832. Throughput: 0: 290369.5. Samples: 84017152. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
|
258 |
+
[2023-01-11 19:02:05,257][451905] Avg episode reward: [(0, '12067.370')]
|
259 |
+
[2023-01-11 19:02:05,263][451905] Saving new best policy, reward=12067.370!
|
260 |
+
[2023-01-11 19:02:10,291][451905] Fps is (10 sec: 293899.0, 60 sec: 290374.2, 300 sec: 287879.7). Total num frames: 85786624. Throughput: 0: 289767.1. Samples: 85721088. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
|
261 |
+
[2023-01-11 19:02:10,292][451905] Avg episode reward: [(0, '12220.248')]
|
262 |
+
[2023-01-11 19:02:10,298][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000013078_85786624.pth...
|
263 |
+
[2023-01-11 19:02:10,315][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000011738_77004800.pth
|
264 |
+
[2023-01-11 19:02:10,317][451905] Saving new best policy, reward=12220.248!
|
265 |
+
[2023-01-11 19:02:15,254][451905] Fps is (10 sec: 281876.8, 60 sec: 289458.6, 300 sec: 287693.2). Total num frames: 87162880. Throughput: 0: 290062.1. Samples: 86573056. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
|
266 |
+
[2023-01-11 19:02:15,255][451905] Avg episode reward: [(0, '11914.534')]
|
267 |
+
[2023-01-11 19:02:20,257][451905] Fps is (10 sec: 289339.0, 60 sec: 289590.2, 300 sec: 287689.2). Total num frames: 88670208. Throughput: 0: 290580.4. Samples: 88340480. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
|
268 |
+
[2023-01-11 19:02:20,258][451905] Avg episode reward: [(0, '11682.962')]
|
269 |
+
[2023-01-11 19:02:25,257][451905] Fps is (10 sec: 294827.6, 60 sec: 289580.1, 300 sec: 287691.6). Total num frames: 90112000. Throughput: 0: 290270.3. Samples: 90066944. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
|
270 |
+
[2023-01-11 19:02:25,258][451905] Avg episode reward: [(0, '12117.807')]
|
271 |
+
[2023-01-11 19:02:25,268][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000013738_90112000.pth...
|
272 |
+
[2023-01-11 19:02:25,285][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000012398_81330176.pth
|
273 |
+
[2023-01-11 19:02:30,254][451905] Fps is (10 sec: 288451.7, 60 sec: 289704.3, 300 sec: 287692.5). Total num frames: 91553792. Throughput: 0: 290508.3. Samples: 90929152. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
|
274 |
+
[2023-01-11 19:02:30,255][451905] Avg episode reward: [(0, '12096.269')]
|
275 |
+
[2023-01-11 19:02:35,313][451905] Fps is (10 sec: 293260.2, 60 sec: 291357.3, 300 sec: 287856.9). Total num frames: 93061120. Throughput: 0: 291476.5. Samples: 92710912. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
|
276 |
+
[2023-01-11 19:02:35,314][451905] Avg episode reward: [(0, '12384.883')]
|
277 |
+
[2023-01-11 19:02:35,316][451905] Saving new best policy, reward=12384.883!
|
278 |
+
[2023-01-11 19:02:40,308][451905] Fps is (10 sec: 293345.9, 60 sec: 291390.4, 300 sec: 287863.7). Total num frames: 94502912. Throughput: 0: 290937.8. Samples: 94437376. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
|
279 |
+
[2023-01-11 19:02:40,308][451905] Avg episode reward: [(0, '12423.622')]
|
280 |
+
[2023-01-11 19:02:40,315][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000014408_94502912.pth...
|
281 |
+
[2023-01-11 19:02:40,338][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000013078_85786624.pth
|
282 |
+
[2023-01-11 19:02:40,340][451905] Saving new best policy, reward=12423.622!
|
283 |
+
[2023-01-11 19:02:45,257][451905] Fps is (10 sec: 283393.6, 60 sec: 290535.8, 300 sec: 287690.8). Total num frames: 95879168. Throughput: 0: 291040.3. Samples: 95289344. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
284 |
+
[2023-01-11 19:02:45,258][451905] Avg episode reward: [(0, '12489.164')]
|
285 |
+
[2023-01-11 19:02:45,264][451905] Saving new best policy, reward=12489.164!
|
286 |
+
[2023-01-11 19:02:50,257][451905] Fps is (10 sec: 283244.4, 60 sec: 289707.8, 300 sec: 287486.1). Total num frames: 97320960. Throughput: 0: 288358.6. Samples: 96993280. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
287 |
+
[2023-01-11 19:02:50,257][451905] Avg episode reward: [(0, '12530.994')]
|
288 |
+
[2023-01-11 19:02:50,259][451905] Saving new best policy, reward=12530.994!
|
289 |
+
[2023-01-11 19:02:55,256][451905] Fps is (10 sec: 288402.6, 60 sec: 290543.9, 300 sec: 287523.7). Total num frames: 98762752. Throughput: 0: 289269.2. Samples: 98727936. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
290 |
+
[2023-01-11 19:02:55,256][451905] Avg episode reward: [(0, '11972.568')]
|
291 |
+
[2023-01-11 19:02:55,266][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000015058_98762752.pth...
|
292 |
+
[2023-01-11 19:02:55,292][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000013738_90112000.pth
|
293 |
+
[2023-01-11 19:02:59,748][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000015258_100073472.pth...
|
294 |
+
[2023-01-11 19:02:59,764][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000014408_94502912.pth
|
295 |
+
[2023-01-11 19:02:59,766][451905] Stopping InferenceWorker_p0-w0...
|
296 |
+
[2023-01-11 19:02:59,767][451905] Stopping RolloutWorker_w0...
|
297 |
+
[2023-01-11 19:02:59,767][451905] Stopping Batcher_0...
|
298 |
+
[2023-01-11 19:02:59,767][451905] Component InferenceWorker_p0-w0 stopped!
|
299 |
+
[2023-01-11 19:02:59,768][451905] Component RolloutWorker_w0 stopped!
|
300 |
+
[2023-01-11 19:02:59,768][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000015258_100073472.pth...
|
301 |
+
[2023-01-11 19:02:59,783][451905] Stopping LearnerWorker_p0...
|
302 |
+
[2023-01-11 19:02:59,784][451905] Component Batcher_0 stopped!
|
303 |
+
[2023-01-11 19:02:59,784][451905] Component LearnerWorker_p0 stopped!
|
304 |
+
[2023-01-11 19:02:59,784][451905] Batcher 0 profile tree view:
|
305 |
+
batching: 0.3719, releasing_batches: 0.0723
|
306 |
+
[2023-01-11 19:02:59,785][451905] InferenceWorker_p0-w0 profile tree view:
|
307 |
+
update_model: 0.4822
|
308 |
+
one_step: 0.0012
|
309 |
+
handle_policy_step: 62.2539
|
310 |
+
deserialize: 0.5489, stack: 0.0690, obs_to_device_normalize: 11.2582, forward: 39.1646, prepare_outputs: 6.9404, send_messages: 0.8335
|
311 |
+
[2023-01-11 19:02:59,785][451905] Learner 0 profile tree view:
|
312 |
+
misc: 0.0067, prepare_batch: 5.9127
|
313 |
+
train: 90.4802
|
314 |
+
epoch_init: 0.0649, minibatch_init: 1.0155, losses_postprocess: 3.0830, kl_divergence: 5.8746, after_optimizer: 0.3524
|
315 |
+
calculate_losses: 18.6587
|
316 |
+
losses_init: 0.0395, forward_head: 3.0733, bptt_initial: 0.1318, bptt: 0.1409, tail: 9.1108, advantages_returns: 1.1916, losses: 3.6217
|
317 |
+
update: 59.4683
|
318 |
+
clip: 9.0914
|
319 |
+
[2023-01-11 19:02:59,785][451905] RolloutWorker_w0 profile tree view:
|
320 |
+
wait_for_trajectories: 0.0886, enqueue_policy_requests: 5.8259, process_policy_outputs: 3.7482, env_step: 157.1659, finalize_trajectories: 0.1642, complete_rollouts: 0.0683
|
321 |
+
post_env_step: 20.3344
|
322 |
+
process_env_step: 8.2187
|
323 |
+
[2023-01-11 19:02:59,785][451905] Loop Runner_EvtLoop terminating...
|
324 |
+
[2023-01-11 19:02:59,786][451905] Runner profile tree view:
|
325 |
+
main_loop: 373.8249
|
326 |
+
[2023-01-11 19:02:59,786][451905] Collected {0: 100073472}, FPS: 267701.5
|