Commit
·
6839927
1
Parent(s):
0c417fd
Initial commit
Browse files- README.md +2 -2
- args.yml +2 -2
- env_kwargs.yml +1 -0
- replay.mp4 +2 -2
- results.json +1 -1
- tqc-LiftCube-v0.zip +2 -2
- tqc-LiftCube-v0/actor.optimizer.pth +1 -1
- tqc-LiftCube-v0/critic.optimizer.pth +1 -1
- tqc-LiftCube-v0/data +17 -17
- tqc-LiftCube-v0/ent_coef_optimizer.pth +1 -1
- tqc-LiftCube-v0/policy.pth +1 -1
- tqc-LiftCube-v0/pytorch_variables.pth +1 -1
- train_eval_metrics.zip +2 -2
README.md
CHANGED
@@ -16,7 +16,7 @@ model-index:
|
|
16 |
type: LiftCube-v0
|
17 |
metrics:
|
18 |
- type: mean_reward
|
19 |
-
value: 5.
|
20 |
name: mean_reward
|
21 |
verified: false
|
22 |
---
|
@@ -70,5 +70,5 @@ OrderedDict([('n_envs', 16),
|
|
70 |
|
71 |
# Environment Arguments
|
72 |
```python
|
73 |
-
{'render_mode': 'rgb_array'}
|
74 |
```
|
|
|
16 |
type: LiftCube-v0
|
17 |
metrics:
|
18 |
- type: mean_reward
|
19 |
+
value: 5.06 +/- 4.05
|
20 |
name: mean_reward
|
21 |
verified: false
|
22 |
---
|
|
|
70 |
|
71 |
# Environment Arguments
|
72 |
```python
|
73 |
+
{'observation_mode': 'state', 'render_mode': 'rgb_array'}
|
74 |
```
|
args.yml
CHANGED
@@ -8,7 +8,7 @@
|
|
8 |
- - env
|
9 |
- LiftCube-v0
|
10 |
- - env_kwargs
|
11 |
-
-
|
12 |
- - eval_env_kwargs
|
13 |
- null
|
14 |
- - eval_episodes
|
@@ -56,7 +56,7 @@
|
|
56 |
- - save_replay_buffer
|
57 |
- false
|
58 |
- - seed
|
59 |
-
-
|
60 |
- - storage
|
61 |
- null
|
62 |
- - study_name
|
|
|
8 |
- - env
|
9 |
- LiftCube-v0
|
10 |
- - env_kwargs
|
11 |
+
- observation_mode: state
|
12 |
- - eval_env_kwargs
|
13 |
- null
|
14 |
- - eval_episodes
|
|
|
56 |
- - save_replay_buffer
|
57 |
- false
|
58 |
- - seed
|
59 |
+
- 3781158540
|
60 |
- - storage
|
61 |
- null
|
62 |
- - study_name
|
env_kwargs.yml
CHANGED
@@ -1 +1,2 @@
|
|
|
|
1 |
render_mode: rgb_array
|
|
|
1 |
+
observation_mode: state
|
2 |
render_mode: rgb_array
|
replay.mp4
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddf3ea12662016b574a6ca85dbf3d3c12e4586e3529e7fdd9847ffd269863c83
|
3 |
+
size 94052
|
results.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"mean_reward": 5.
|
|
|
1 |
+
{"mean_reward": 5.0577847, "std_reward": 4.045849521693313, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2024-06-08T17:12:13.368558"}
|
tqc-LiftCube-v0.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62d4ed3d52f662b50f0bd7776263386bca2fda650a0f53010bdf33d8f654f597
|
3 |
+
size 3419894
|
tqc-LiftCube-v0/actor.optimizer.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 590670
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df8e866b92a241cd1639024e83cffda815c93e43f11a366a96ee6fbd571fa4eb
|
3 |
size 590670
|
tqc-LiftCube-v0/critic.optimizer.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1255594
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b96409c8b6cffbb6d6837461313e058449528a2764f84870f5b87198388de536
|
3 |
size 1255594
|
tqc-LiftCube-v0/data
CHANGED
@@ -4,20 +4,20 @@
|
|
4 |
":serialized:": "gAWVMQAAAAAAAACMGHNiM19jb250cmliLnRxYy5wb2xpY2llc5SMEE11bHRpSW5wdXRQb2xpY3mUk5Qu",
|
5 |
"__module__": "sb3_contrib.tqc.policies",
|
6 |
"__doc__": "\n Policy class (with both actor and critic) for TQC.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param use_expln: Use ``expln()`` function instead of ``exp()`` when using gSDE to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param clip_mean: Clip the mean output when using gSDE to avoid numerical instability.\n :param features_extractor_class: Features extractor to use.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n :param n_quantiles: Number of quantiles for the critic.\n :param n_critics: Number of critic networks to create.\n :param share_features_extractor: Whether to share or not the features extractor\n between the actor and the critic (this saves computation time)\n ",
|
7 |
-
"__init__": "<function MultiInputPolicy.__init__ at
|
8 |
"__abstractmethods__": "frozenset()",
|
9 |
-
"_abc_impl": "<_abc._abc_data object at
|
10 |
},
|
11 |
"verbose": 1,
|
12 |
"policy_kwargs": {
|
13 |
"use_sde": false
|
14 |
},
|
15 |
-
"num_timesteps":
|
16 |
"_total_timesteps": 1000000,
|
17 |
"_num_timesteps_at_start": 0,
|
18 |
"seed": 0,
|
19 |
"action_noise": null,
|
20 |
-
"start_time":
|
21 |
"learning_rate": 0.0003,
|
22 |
"tensorboard_log": null,
|
23 |
"_last_obs": null,
|
@@ -27,25 +27,25 @@
|
|
27 |
},
|
28 |
"_last_original_obs": {
|
29 |
":type:": "<class 'collections.OrderedDict'>",
|
30 |
-
":serialized:": "
|
31 |
-
"arm_qpos": "[[
|
32 |
-
"arm_qvel": "[[
|
33 |
-
"cube_pos": "[[
|
34 |
},
|
35 |
-
"_episode_num":
|
36 |
"use_sde": false,
|
37 |
"sde_sample_freq": -1,
|
38 |
-
"_current_progress_remaining": 0.
|
39 |
"_stats_window_size": 100,
|
40 |
"ep_info_buffer": {
|
41 |
":type:": "<class 'collections.deque'>",
|
42 |
-
":serialized:": "
|
43 |
},
|
44 |
"ep_success_buffer": {
|
45 |
":type:": "<class 'collections.deque'>",
|
46 |
":serialized:": "gAWVIAAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKULg=="
|
47 |
},
|
48 |
-
"_n_updates":
|
49 |
"observation_space": {
|
50 |
":type:": "<class 'gymnasium.spaces.dict.Dict'>",
|
51 |
":serialized:": "gAWVzAMAAAAAAACMFWd5bW5hc2l1bS5zcGFjZXMuZGljdJSMBERpY3SUk5QpgZR9lCiMBnNwYWNlc5SMC2NvbGxlY3Rpb25zlIwLT3JkZXJlZERpY3SUk5QpUpQojAhhcm1fcXBvc5SMFGd5bW5hc2l1bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMDWJvdW5kZWRfYmVsb3eUjBJudW1weS5jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWBgAAAAAAAAABAQEBAQGUaBOMAmIxlImIh5RSlChLA4wBfJROTk5K/////0r/////SwB0lGJLBoWUjAFDlHSUUpSMDWJvdW5kZWRfYWJvdmWUaBwolgYAAAAAAAAAAQEBAQEBlGggSwaFlGgkdJRSlIwGX3NoYXBllEsGhZSMA2xvd5RoHCiWGAAAAAAAAADbD0nA2w9JwNsPScDbD0nA2w9JwNsPScCUaBZLBoWUaCR0lFKUjARoaWdolGgcKJYYAAAAAAAAANsPSUDbD0lA2w9JQNsPSUDbD0lA2w9JQJRoFksGhZRoJHSUUpSMCGxvd19yZXBylIwKLTMuMTQxNTkyN5SMCWhpZ2hfcmVwcpSMCTMuMTQxNTkyN5SMCl9ucF9yYW5kb22UTnVijAhhcm1fcXZlbJRoDSmBlH2UKGgQaBZoGWgcKJYGAAAAAAAAAAEBAQEBAZRoIEsGhZRoJHSUUpRoJ2gcKJYGAAAAAAAAAAEBAQEBAZRoIEsGhZRoJHSUUpRoLEsGhZRoLmgcKJYYAAAAAAAAAAAAIMEAACDBAAAgwQAAIMEAACDBAAAgwZRoFksGhZRoJHSUUpRoM2gcKJYYAAAAAAAAAAAAIEEAACBBAAAgQQAAIEEAACBBAAAgQZRoFksGhZRoJHSUUpRoOIwFLTEwLjCUaDqMBDEwLjCUaDxOdWKMCGN1YmVfcG9zlGgNKYGUfZQoaBBoFmgZaBwolgMAAAAAAAAAAQEBlGggSwOFlGgkdJRSlGgnaBwolgMAAAAAAAAAAQEBlGggSwOFlGgkdJRSlGgsSwOFlGguaBwolgwAAAAAAAAAAAAgwQAAIMEAACDBlGgWSwOFlGgkdJRSlGgzaBwolgwAAAAAAAAAAAAgQQAAIEEAACBBlGgWSwOFlGgkdJRSlGg4jAUtMTAuMJRoOowEMTAuMJRoPE51YnVoLE5oEE5oPE51Yi4=",
|
@@ -83,12 +83,12 @@
|
|
83 |
"__module__": "stable_baselines3.common.buffers",
|
84 |
"__annotations__": "{'observation_space': <class 'gymnasium.spaces.dict.Dict'>, 'obs_shape': typing.Dict[str, typing.Tuple[int, ...]], 'observations': typing.Dict[str, numpy.ndarray], 'next_observations': typing.Dict[str, numpy.ndarray]}",
|
85 |
"__doc__": "\n Dict Replay buffer used in off-policy algorithms like SAC/TD3.\n Extends the ReplayBuffer to use dictionary observations\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n Disabled for now (see https://github.com/DLR-RM/stable-baselines3/pull/243#discussion_r531535702)\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
|
86 |
-
"__init__": "<function DictReplayBuffer.__init__ at
|
87 |
-
"add": "<function DictReplayBuffer.add at
|
88 |
-
"sample": "<function DictReplayBuffer.sample at
|
89 |
-
"_get_samples": "<function DictReplayBuffer._get_samples at
|
90 |
"__abstractmethods__": "frozenset()",
|
91 |
-
"_abc_impl": "<_abc._abc_data object at
|
92 |
},
|
93 |
"replay_buffer_kwargs": {},
|
94 |
"train_freq": {
|
|
|
4 |
":serialized:": "gAWVMQAAAAAAAACMGHNiM19jb250cmliLnRxYy5wb2xpY2llc5SMEE11bHRpSW5wdXRQb2xpY3mUk5Qu",
|
5 |
"__module__": "sb3_contrib.tqc.policies",
|
6 |
"__doc__": "\n Policy class (with both actor and critic) for TQC.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param use_expln: Use ``expln()`` function instead of ``exp()`` when using gSDE to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param clip_mean: Clip the mean output when using gSDE to avoid numerical instability.\n :param features_extractor_class: Features extractor to use.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n :param n_quantiles: Number of quantiles for the critic.\n :param n_critics: Number of critic networks to create.\n :param share_features_extractor: Whether to share or not the features extractor\n between the actor and the critic (this saves computation time)\n ",
|
7 |
+
"__init__": "<function MultiInputPolicy.__init__ at 0x7f4b161af370>",
|
8 |
"__abstractmethods__": "frozenset()",
|
9 |
+
"_abc_impl": "<_abc._abc_data object at 0x7f4b161bf0c0>"
|
10 |
},
|
11 |
"verbose": 1,
|
12 |
"policy_kwargs": {
|
13 |
"use_sde": false
|
14 |
},
|
15 |
+
"num_timesteps": 1000000,
|
16 |
"_total_timesteps": 1000000,
|
17 |
"_num_timesteps_at_start": 0,
|
18 |
"seed": 0,
|
19 |
"action_noise": null,
|
20 |
+
"start_time": 1717866068576169168,
|
21 |
"learning_rate": 0.0003,
|
22 |
"tensorboard_log": null,
|
23 |
"_last_obs": null,
|
|
|
27 |
},
|
28 |
"_last_original_obs": {
|
29 |
":type:": "<class 'collections.OrderedDict'>",
|
30 |
+
":serialized:": "gAWVrwQAAAAAAACMC2NvbGxlY3Rpb25zlIwLT3JkZXJlZERpY3SUk5QpUpQojAhhcm1fcXBvc5SMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJaAAQAAAAAAAD/7CT7WOUM+PrZ0PAAAgD/0dherUli7sCku3zzVaGA+PbZ0PAAAgD/U7R0wpYuLMEAxAr5kuBQ+BX+NPA0pLj9B6BM/F/mUPsiAoL3ZSoA+dpB0PLP/fz+f5W85YTciuLNV6bxLbVE+KsfFPFs2PT945S08zXISv6X7vL0mVOA+9BfOPI45d79u5nQ9yVSOPf/s/TvcPQY+VM90PN8nfz/CeBK2hT5sN2bfhLsWhgI+1SSzPFcfYT8xuLM9KsXNvud6K706B9o9PrZ0PAAAgD/YTR2mQAjSpW87qz2+65I+IlRiPE3Wnj6qAfW7CzQROzihwD4oWQQ/YaWiPGN+Cz8UPFK/TtYOPtj9XT2FqUs+brZ0PAAAgD+W4L40NyxvNZI+Gr4NdkA+HahtPDXtfz82WJs8iHZRu3bkdT2C9M49N8V0PP//fz9we5O0gepTuTWMez3H7hM+PbZ0PAAAgD8+BcYwcQ9isDXkMr4pwIo+ENPZPfuV574jTai+5SmbPpSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGJLEEsGhpSMAUOUdJRSlIwIYXJtX3F2ZWyUaAcoloABAAAAAAAABz/HMNQWIauIX9M6hpclLmXRzDODgQClAGmUsDf2JzDOX9M6mKgss3OPmLPWPxmkhp0fPswegT3JMiy+dw0FQRwjq0CeJ4fAJA6qN05ehjmeSvY6j/qAvPFAHjs/dyo2rwZUv8jbuDyFgxK9xqYlvxe6vsHzE0pBKBwsPHNDAkAw9Pq+wuOkwUXQqsDldlRCb4x2t1gKbLa2Jrg6f/aWOMPrgboQZ4QzBKxTvqDOX7wew3c800hJQOVLKMHj4zdAFRnfJe5CJ6Z1X9M6RBUsKX9Y5Shqpg0iata0PjAvjz6W95496RoHQPIqBr87EnlBzKR2P8d5jz//V6g+4184wXlUSD+403e/slx+tWj/yjTvKtM6l6zQt6K8griJRSsnPqLzvGUvzTwHzCg6MW9DvsvnFr2SH1k+TYlhOW8Cs7S4D8M6zS6KNzvAZzw1qU6xwWtwMJGZ0jC6X9M6Nn3Ys+MkdzMX+UOi+TR1wPPFMT+JolY/Bg7XweW24UF0rDnClGgOSxBLBoaUaBJ0lFKUjAhjdWJlX3Bvc5RoByiWwAAAAAAAAAAaFtAiZYm4PoLU6D1e2pmjkiTYviVwi7/QZrA+M5z2PkgVSz8bcEa7/meyvSzLnL+p/bU+nvGiPhPm5rzNrXi+34zkvouBYz6AMKY9QsXDPsPsgz+Yd3U+gxwjP/IqHb/elQWi9MqLvmuslb98XHM/GXCyvrakIb9KkcS9HvmOPn5TWT+nIxwwMXBpvowVT7+Z0mk8xro3vbIXCj9nBEm5um00PyLdsr+mCDWgXHUUP9btR7/rjUW/yNfBPxiWrr+UaA5LEEsDhpRoEnSUUpR1Lg==",
|
31 |
+
"arm_qpos": "[[ 1.34747490e-01 1.90650314e-01 1.49360280e-02 1.00000000e+00\n -5.38110572e-13 -1.36311429e-09]\n [ 2.72436906e-02 2.19149902e-01 1.49360271e-02 1.00000000e+00\n 5.74542414e-10 1.01532704e-09]\n [-1.27140999e-01 1.45234644e-01 1.72724817e-02 6.80313885e-01\n 5.77762663e-01 2.90962905e-01]\n [-7.83706307e-02 2.50571042e-01 1.49270203e-02 9.99995410e-01\n 2.28783567e-04 -3.86753854e-05]\n [-2.84832474e-02 2.04518482e-01 2.41428204e-02 7.39110649e-01\n 1.06137916e-02 -5.72064221e-01]\n [-9.22768489e-02 4.38142002e-01 2.51579061e-02 -9.65721965e-01\n 5.97900674e-02 6.94976524e-02]\n [ 7.74919940e-03 1.31095350e-01 1.49420090e-02 9.96702135e-01\n -2.18259856e-06 1.40812526e-05]\n [-4.05495148e-03 1.27464622e-01 2.18681488e-02 8.79384458e-01\n 8.77536610e-02 -4.01894867e-01]\n [-4.18652557e-02 1.06459096e-01 1.49360280e-02 1.00000000e+00\n -5.45758151e-16 -3.64347834e-16]\n [ 8.36094543e-02 2.86954820e-01 1.38140041e-02 3.10228735e-01\n -7.47700501e-03 2.21562642e-03]\n [ 3.76230001e-01 5.16985416e-01 1.98542494e-02 5.44897258e-01\n -8.21229219e-01 1.39489383e-01]\n [ 5.41971624e-02 1.98888853e-01 1.49360728e-02 1.00000000e+00\n 3.55536656e-07 8.90987792e-07]\n [-1.50629312e-01 1.87950328e-01 1.45054134e-02 9.99713242e-01\n 1.89629607e-02 -3.19615193e-03]\n [ 6.00323305e-02 1.01052299e-01 1.49395978e-02 9.99999940e-01\n -2.74706963e-07 -2.02098876e-04]\n [ 6.14130087e-02 1.44465551e-01 1.49360271e-02 1.00000000e+00\n 1.44078860e-09 -8.22402646e-10]\n [-1.74698666e-01 2.70997316e-01 1.06359601e-01 -4.52316135e-01\n -3.28713506e-01 3.03054005e-01]]",
|
32 |
+
"arm_qvel": "[[ 1.44970691e-09 -5.72303706e-13 1.61264930e-03 3.76512363e-11\n 9.53757890e-08 -1.11461105e-16]\n [-1.07982601e-09 6.11041384e-10 1.61265745e-03 -4.02002058e-08\n -7.10414483e-08 -3.32306576e-17]\n [ 1.55874342e-01 6.30470216e-02 -1.68162480e-01 8.31578732e+00\n 5.34803581e+00 -4.22358608e+00]\n [ 2.02721640e-05 2.56287341e-04 1.87905482e-03 -1.57444756e-02\n 2.41475948e-03 2.54013844e-06]\n [-8.28226984e-01 2.25657374e-02 -3.57699580e-02 -6.47076011e-01\n -2.38408642e+01 1.26298704e+01]\n [ 1.05047598e-02 2.03536677e+00 -4.90144253e-01 -2.06112099e+01\n -5.33792353e+00 5.31161079e+01]\n [-1.46954399e-05 -3.51727613e-06 1.40496227e-03 7.19847085e-05\n -9.91218141e-04 6.16547595e-08]\n [-2.06710875e-01 -1.36601031e-02 1.51222032e-02 3.14506984e+00\n -1.05185289e+01 2.87328410e+00]\n [ 3.87013298e-16 -5.80304707e-16 1.61264709e-03 3.82101170e-14\n 2.54624863e-14 1.91971622e-18]\n [ 3.53198349e-01 2.79656887e-01 7.76206702e-02 2.11101747e+00\n -5.24092793e-01 1.55669508e+01]\n [ 9.63452101e-01 1.12090385e+00 3.28796357e-01 -1.15234098e+01\n 7.82538950e-01 -9.68074322e-01]\n [-9.47572630e-07 3.78112645e-07 1.61108177e-03 -2.48759006e-05\n -6.23401866e-05 2.37687122e-15]\n [-2.97404490e-02 2.50470135e-02 6.43909385e-04 -1.90853849e-01\n -3.68421488e-02 2.12034494e-01]\n [ 2.15088206e-04 -3.33431188e-07 1.48820039e-03 1.64726753e-05\n 1.41449524e-02 -3.00731284e-09]\n [ 8.74646189e-10 1.53231572e-09 1.61265512e-03 -1.00810595e-07\n 5.75427173e-08 -2.65592950e-18]\n [-3.83135819e+00 6.94426715e-01 8.38417590e-01 -2.68818474e+01\n 2.82143040e+01 -4.64184113e+01]]",
|
33 |
+
"cube_pos": "[[ 5.6401914e-18 3.6042324e-01 1.1368658e-01]\n [-1.6680775e-17 -4.2215401e-01 -1.0893599e+00]\n [ 3.4453440e-01 4.8166046e-01 7.9329348e-01]\n [-3.0279222e-03 -8.7112412e-02 -1.2249503e+00]\n [ 3.5545090e-01 3.1824964e-01 -2.8185880e-02]\n [-2.4285050e-01 -4.4638726e-01 2.2217385e-01]\n [ 8.1147194e-02 3.8236433e-01 1.0306629e+00]\n [ 2.3971403e-01 6.3715380e-01 -6.1393654e-01]\n [-1.8104200e-18 -2.7303278e-01 -1.1693243e+00]\n [ 9.5062995e-01 -3.4851149e-01 -6.3141954e-01]\n [-9.5980242e-02 2.7924436e-01 8.4893024e-01]\n [ 5.6803134e-10 -2.2796704e-01 -8.0892253e-01]\n [ 1.4271402e-02 -4.4855855e-02 5.3942406e-01]\n [-1.9170494e-04 7.0479929e-01 -1.3973734e+00]\n [-1.5334158e-19 5.7991576e-01 -7.8097284e-01]\n [-7.7169675e-01 1.5143976e+00 -1.3639555e+00]]"
|
34 |
},
|
35 |
+
"_episode_num": 4992,
|
36 |
"use_sde": false,
|
37 |
"sde_sample_freq": -1,
|
38 |
+
"_current_progress_remaining": 0.0,
|
39 |
"_stats_window_size": 100,
|
40 |
"ep_info_buffer": {
|
41 |
":type:": "<class 'collections.deque'>",
|
42 |
+
":serialized:": "gAWV4AsAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHQAm6lUIcBEOMAWyUS8iMAXSUR0CDZc8yvcJudX2UKGgGR0AIalchTwUhaAdLyGgIR0CDZOJqIrOJdX2UKGgGR0AIbX6InBtUaAdLyGgIR0CDY/xffGdadX2UKGgGR0ATdlvqC6H1aAdLyGgIR0CDYt7MPjGUdX2UKGgGR0AIUKmbb1yvaAdLyGgIR0CDgCnEVFhHdX2UKGgGR0AIbX6InBtUaAdLyGgIR0CDf0TM7lq8dX2UKGgGR0AXV6OYIBzWaAdLyGgIR0CDfmCeVcD9dX2UKGgGR0AIRakhzNliaAdLyGgIR0CDfX4mkWRBdX2UKGgGR0AQ5ElVtGd7aAdLyGgIR0CDfJv2GqPwdX2UKGgGR0AM4plSS/0vaAdLyGgIR0CDe7lXA/LUdX2UKGgGR0AIbZcs189faAdLyGgIR0CDetYW+GoKdX2UKGgGR0AXZNsWO6uoaAdLyGgIR0CDefLvkRzzdX2UKGgGR0Ae9N7BwdbQaAdLyGgIR0CDeQ9q1w5vdX2UKGgGR0AkYHwgDA8CaAdLyGgIR0CDeCxX4j8ldX2UKGgGR0AR5mBe5WilaAdLyGgIR0CDd0mXPZ7HdX2UKGgGR0AQ8v24/eLvaAdLyGgIR0CDdmaxX4j9dX2UKGgGR0AMjhNucc2jaAdLyGgIR0CDdXviLl3hdX2UKGgGR0AIZbbDdgv2aAdLyGgIR0CDdI8zQ/ordX2UKGgGR0AJXZf2K2roaAdLyGgIR0CDc6lDWsijdX2UKGgGR0ARpy8zyjHoaAdLyGgIR0CDcovIOpbVdX2UKGgGR0AIdfoicG1QaAdLyGgIR0CDkJ/NqxkedX2UKGgGR0AQXKYAsCkoaAdLyGgIR0CDj7p48loldX2UKGgGR0ATUSxqwhW6aAdLyGgIR0CDjtX1anrIdX2UKGgGR0AIiJO32EkCaAdLyGgIR0CDjfN9ph4MdX2UKGgGR0AOBk/bCaZyaAdLyGgIR0CDjRE2Hck/dX2UKGgGR0AJYuoP07KaaAdLyGgIR0CDjC6BiCrcdX2UKGgGR0AJPlp48lolaAdLyGgIR0CDi0sjmjj8dX2UKGgGR0AM6yQgcLjQaAdLyGgIR0CDimfvnbItdX2UKGgGR0AULT7VJ+UhaAdLyGgIR0CDiYRHww0wdX2UKGgGR0ANSIk7fYSQaAdLyGgIR0CDiKEhaC+UdX2UKGgGR0AQcf3evZAZaAdLyGgIR0CDh75yEL6UdX2UKGgGR0AJILRa5f+kaAdLyGgIR0CDhtt4zJp4dX2UKGgGR0AQ611GLDQ7aAdLyGgIR0CDhfCtRvWIdX2UKGgGR0AM29DhLoOhaAdLyGgIR0CDhQP5pJwsdX2UKGgGR0AL/eSB9TgmaAdLyGgIR0CDhB3lCCz1dX2UKGgGR0AN5zRx95QhaAdLyGgIR0CDgwBS1maqdX2UKGgGR0AIbY9Pk7wKaAdLyGgIR0CDoHAD7qIKdX2UKGgGR0Ao0FQl8gIQaAdLyGgIR0CDn4qo60Y1dX2UKGgGR0AROCg9Net0aAdLyGgIR0CDnqYyfthNdX2UKGgGR0AL5P2wmmcfaAdLyGgIR0CDncOqebuudX2UKGgGR0AIcyJsO5J9aAdLyGgIR0CDnOF5fMOgdX2UKGgGR0AJTi++M6zWaAdLyGgIR0CDm/7mdRR/dX2UKGgGR0AI8aqCHymRaAdLyGgIR0CDmxuYx+KCdX2UKGgGR0AIc1ZTyauwaAdLyGgIR0CDmjhgmZ3LdX2UKGgGR0AMh6rvLHMmaAdLyGgIR0CDmVTF2mpEdX2UKGgGR0AJoskIHC40aAdLyGgIR0CDmHG1hLGrdX2UKGgGR0AQOP5pJwsHaAdLyGgIR0CDl47f51vEdX2UKGgGR0AMgGr0aqCIaAdLyGgIR0CDlqvTPSlWdX2UKGgGR0AJIXl8w5/9aAdLyGgIR0CDlcDtgKF7dX2UKGgGR0AIqRwIdELIaAdLyGgIR0CDlNRHf/FSdX2UKGgGR0AKqs0YTCcgaAdLyGgIR0CDk+5YHPeIdX2UKGgGR0AsXpSrHU+caAdLyGgIR0CDktDk2gnMdX2UKGgGR0AIbX6InBtUaAdLyGgIR0CDsCg/1QIldX2UKGgGR0AIbWTX8O0+aAdLyGgIR0CDr0MrEtNBdX2UKGgGR0BDaWLYPGyYaAdLyGgIR0CDrl668QI2dX2UKGgGR0BCRl67dznzaAdLyGgIR0CDrXw1BMSLdX2UKGgGR0AJdzhgmZ3LaAdLyGgIR0CDrJn3+MqCdX2UKGgGR0BD98x0uDjBaAdLyGgIR0CDq7dbgTAWdX2UKGgGR0ANDPGACnxbaAdLyGgIR0CDqtQ0GeMAdX2UKGgGR0AIbX6InBtUaAdLyGgIR0CDqfEYO2AodX2UKGgGR0AJum+CbtqpaAdLyGgIR0CDqQ2CuloEdX2UKGgGR0AIfJq7AckuaAdLyGgIR0CDqCp6yB07dX2UKGgGR0AUlhw2l2vCaAdLyGgIR0CDp0e2d/aydX2UKGgGR0AIftv4ubqhaAdLyGgIR0CDpmTGHYYjdX2UKGgGR0AIpf0EovzwaAdLyGgIR0CDpXn8sMAndX2UKGgGR0AIbX4CZF5OaAdLyGgIR0CDpI1Muez2dX2UKGgGR0AJ4I+nqFAWaAdLyGgIR0CDo6dPLxI8dX2UKGgGR0AIrFsHjZL7aAdLyGgIR0CDoonw5NoKdX2UKGgGR0AIbX6InBtUaAdLyGgIR0CDv/aIvalDdX2UKGgGR0ALEZ1mrbQDaAdLyGgIR0CDvxErGza9dX2UKGgGR0AW0xsVLzwuaAdLyGgIR0CDviynDR+jdX2UKGgGR0AS8/fO2RaHaAdLyGgIR0CDvUo4uK4ydX2UKGgGR0AIhsMy8BdVaAdLyGgIR0CDvGgJTl1bdX2UKGgGR0AR8yAQQL/kaAdLyGgIR0CDu4XBxgiNdX2UKGgGR0AV+qdYnv2HaAdLyGgIR0CDuqKMNtqIdX2UKGgGR0AIbX6InBtUaAdLyGgIR0CDub9m6GxmdX2UKGgGR0AIbb+Lm6oVaAdLyGgIR0CDuNva11GLdX2UKGgGR0AIbWI42jwhaAdLyGgIR0CDt/i83++/dX2UKGgGR0APIskIHC40aAdLyGgIR0CDtxXp4bCKdX2UKGgGR0AYzQKKHfuUaAdLyGgIR0CDtjLXcxj8dX2UKGgGR0AIcUwi7kGSaAdLyGgIR0CDtUfnwG4adX2UKGgGR0AIbX6InBtUaAdLyGgIR0CDtFtZV4ordX2UKGgGR0ALs3qAz544aAdLyGgIR0CDs3VT72tddX2UKGgGR0AIb/ZM+NcXaAdLyGgIR0CDslfv4M4MdX2UKGgGR0AIb+1jRUm2aAdLyGgIR0CDz7FmWdEtdX2UKGgGR0AJM1VHWjGlaAdLyGgIR0CDzswD/2kBdX2UKGgGR0ASnbSJCSieaAdLyGgIR0CDzeeK8+RpdX2UKGgGR0AOClJpWV/uaAdLyGgIR0CDzQUILPUsdX2UKGgGR0AJMgQpWmxdaAdLyGgIR0CDzCK8cuJ2dX2UKGgGR0AIoi9qUNayaAdLyGgIR0CDy0ADJU5udX2UKGgGR0AQH9aUzKs/aAdLyGgIR0CDylynUDuCdX2UKGgGR0AIbX6InBtUaAdLyGgIR0CDyXl/6O5sdX2UKGgGR0AIbX6InBtUaAdLyGgIR0CDyJYK6WgOdX2UKGgGR0AIbX6InBtUaAdLyGgIR0CDx7LqUu+RdX2UKGgGR0ARunR9gF5faAdLyGgIR0CDxtAVO9FndX2UKGgGR0AKKW9lEqlQaAdLyGgIR0CDxez3RG+cdX2UKGgGR0AIqnJkoWpIaAdLyGgIR0CDxQILPUrkdX2UKGgGR0AMgR7JGOMmaAdLyGgIR0CDxBVsDW9UdX2UKGgGR0AImDpTuOS4aAdLyGgIR0CDwy9X9zfadX2UKGgGR0AIgvJzT4L1aAdLyGgIR0CDwhHmzSkTdWUu"
|
43 |
},
|
44 |
"ep_success_buffer": {
|
45 |
":type:": "<class 'collections.deque'>",
|
46 |
":serialized:": "gAWVIAAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKULg=="
|
47 |
},
|
48 |
+
"_n_updates": 62494,
|
49 |
"observation_space": {
|
50 |
":type:": "<class 'gymnasium.spaces.dict.Dict'>",
|
51 |
":serialized:": "gAWVzAMAAAAAAACMFWd5bW5hc2l1bS5zcGFjZXMuZGljdJSMBERpY3SUk5QpgZR9lCiMBnNwYWNlc5SMC2NvbGxlY3Rpb25zlIwLT3JkZXJlZERpY3SUk5QpUpQojAhhcm1fcXBvc5SMFGd5bW5hc2l1bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMDWJvdW5kZWRfYmVsb3eUjBJudW1weS5jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWBgAAAAAAAAABAQEBAQGUaBOMAmIxlImIh5RSlChLA4wBfJROTk5K/////0r/////SwB0lGJLBoWUjAFDlHSUUpSMDWJvdW5kZWRfYWJvdmWUaBwolgYAAAAAAAAAAQEBAQEBlGggSwaFlGgkdJRSlIwGX3NoYXBllEsGhZSMA2xvd5RoHCiWGAAAAAAAAADbD0nA2w9JwNsPScDbD0nA2w9JwNsPScCUaBZLBoWUaCR0lFKUjARoaWdolGgcKJYYAAAAAAAAANsPSUDbD0lA2w9JQNsPSUDbD0lA2w9JQJRoFksGhZRoJHSUUpSMCGxvd19yZXBylIwKLTMuMTQxNTkyN5SMCWhpZ2hfcmVwcpSMCTMuMTQxNTkyN5SMCl9ucF9yYW5kb22UTnVijAhhcm1fcXZlbJRoDSmBlH2UKGgQaBZoGWgcKJYGAAAAAAAAAAEBAQEBAZRoIEsGhZRoJHSUUpRoJ2gcKJYGAAAAAAAAAAEBAQEBAZRoIEsGhZRoJHSUUpRoLEsGhZRoLmgcKJYYAAAAAAAAAAAAIMEAACDBAAAgwQAAIMEAACDBAAAgwZRoFksGhZRoJHSUUpRoM2gcKJYYAAAAAAAAAAAAIEEAACBBAAAgQQAAIEEAACBBAAAgQZRoFksGhZRoJHSUUpRoOIwFLTEwLjCUaDqMBDEwLjCUaDxOdWKMCGN1YmVfcG9zlGgNKYGUfZQoaBBoFmgZaBwolgMAAAAAAAAAAQEBlGggSwOFlGgkdJRSlGgnaBwolgMAAAAAAAAAAQEBlGggSwOFlGgkdJRSlGgsSwOFlGguaBwolgwAAAAAAAAAAAAgwQAAIMEAACDBlGgWSwOFlGgkdJRSlGgzaBwolgwAAAAAAAAAAAAgQQAAIEEAACBBlGgWSwOFlGgkdJRSlGg4jAUtMTAuMJRoOowEMTAuMJRoPE51YnVoLE5oEE5oPE51Yi4=",
|
|
|
83 |
"__module__": "stable_baselines3.common.buffers",
|
84 |
"__annotations__": "{'observation_space': <class 'gymnasium.spaces.dict.Dict'>, 'obs_shape': typing.Dict[str, typing.Tuple[int, ...]], 'observations': typing.Dict[str, numpy.ndarray], 'next_observations': typing.Dict[str, numpy.ndarray]}",
|
85 |
"__doc__": "\n Dict Replay buffer used in off-policy algorithms like SAC/TD3.\n Extends the ReplayBuffer to use dictionary observations\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n Disabled for now (see https://github.com/DLR-RM/stable-baselines3/pull/243#discussion_r531535702)\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
|
86 |
+
"__init__": "<function DictReplayBuffer.__init__ at 0x7f4b16b71f30>",
|
87 |
+
"add": "<function DictReplayBuffer.add at 0x7f4b16b71fc0>",
|
88 |
+
"sample": "<function DictReplayBuffer.sample at 0x7f4b16b72050>",
|
89 |
+
"_get_samples": "<function DictReplayBuffer._get_samples at 0x7f4b16b720e0>",
|
90 |
"__abstractmethods__": "frozenset()",
|
91 |
+
"_abc_impl": "<_abc._abc_data object at 0x7f4b16b5f440>"
|
92 |
},
|
93 |
"replay_buffer_kwargs": {},
|
94 |
"train_freq": {
|
tqc-LiftCube-v0/ent_coef_optimizer.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1940
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1d7f3d79f0f32571353353d01699068c9ce0698a5cc0b07efcf50e306208fac
|
3 |
size 1940
|
tqc-LiftCube-v0/policy.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1549366
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05fe0b4128a38657e7462f37884ff8a9b455b43320485f069b8d02f274a93b65
|
3 |
size 1549366
|
tqc-LiftCube-v0/pytorch_variables.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1180
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:012ca703a3c2f6c27851894f0e6c258716fb34edfff1fdef626e60bf8acf89c5
|
3 |
size 1180
|
train_eval_metrics.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88dd9f1f69232bf23e6d6d1815149ed55b2ed511e79d56f077127b5fb5fcca7d
|
3 |
+
size 130174
|