Initial commit
Browse files- README.md +1 -1
- args.yml +1 -3
- dqn-LunarLander-v2.zip +2 -2
- dqn-LunarLander-v2/_stable_baselines3_version +1 -1
- dqn-LunarLander-v2/data +18 -19
- dqn-LunarLander-v2/policy.optimizer.pth +1 -1
- dqn-LunarLander-v2/policy.pth +1 -1
- dqn-LunarLander-v2/system_info.txt +1 -1
- replay.mp4 +2 -2
- results.json +1 -1
- train_eval_metrics.zip +2 -2
README.md
CHANGED
@@ -16,7 +16,7 @@ model-index:
|
|
16 |
type: LunarLander-v2
|
17 |
metrics:
|
18 |
- type: mean_reward
|
19 |
-
value:
|
20 |
name: mean_reward
|
21 |
verified: false
|
22 |
---
|
|
|
16 |
type: LunarLander-v2
|
17 |
metrics:
|
18 |
- type: mean_reward
|
19 |
+
value: 144.49 +/- 16.12
|
20 |
name: mean_reward
|
21 |
verified: false
|
22 |
---
|
args.yml
CHANGED
@@ -9,8 +9,6 @@
|
|
9 |
- LunarLander-v2
|
10 |
- - env_kwargs
|
11 |
- null
|
12 |
-
- - eval_env_kwargs
|
13 |
-
- null
|
14 |
- - eval_episodes
|
15 |
- 5
|
16 |
- - eval_freq
|
@@ -56,7 +54,7 @@
|
|
56 |
- - save_replay_buffer
|
57 |
- false
|
58 |
- - seed
|
59 |
-
-
|
60 |
- - storage
|
61 |
- null
|
62 |
- - study_name
|
|
|
9 |
- LunarLander-v2
|
10 |
- - env_kwargs
|
11 |
- null
|
|
|
|
|
12 |
- - eval_episodes
|
13 |
- 5
|
14 |
- - eval_freq
|
|
|
54 |
- - save_replay_buffer
|
55 |
- false
|
56 |
- - seed
|
57 |
+
- 2913073355
|
58 |
- - storage
|
59 |
- null
|
60 |
- - study_name
|
dqn-LunarLander-v2.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56f66be88551f986692b6c59836ded4828716a2d9952a5ef680bd90329772091
|
3 |
+
size 1133306
|
dqn-LunarLander-v2/_stable_baselines3_version
CHANGED
@@ -1 +1 @@
|
|
1 |
-
2.
|
|
|
1 |
+
2.1.0
|
dqn-LunarLander-v2/data
CHANGED
@@ -5,15 +5,15 @@
|
|
5 |
"__module__": "stable_baselines3.dqn.policies",
|
6 |
"__annotations__": "{'q_net': <class 'stable_baselines3.dqn.policies.QNetwork'>, 'q_net_target': <class 'stable_baselines3.dqn.policies.QNetwork'>}",
|
7 |
"__doc__": "\n Policy class with Q-Value Net and target net for DQN\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
|
8 |
-
"__init__": "<function DQNPolicy.__init__ at
|
9 |
-
"_build": "<function DQNPolicy._build at
|
10 |
-
"make_q_net": "<function DQNPolicy.make_q_net at
|
11 |
-
"forward": "<function DQNPolicy.forward at
|
12 |
-
"_predict": "<function DQNPolicy._predict at
|
13 |
-
"_get_constructor_parameters": "<function DQNPolicy._get_constructor_parameters at
|
14 |
-
"set_training_mode": "<function DQNPolicy.set_training_mode at
|
15 |
"__abstractmethods__": "frozenset()",
|
16 |
-
"_abc_impl": "<_abc._abc_data object at
|
17 |
},
|
18 |
"verbose": 1,
|
19 |
"policy_kwargs": {
|
@@ -27,7 +27,7 @@
|
|
27 |
"_num_timesteps_at_start": 0,
|
28 |
"seed": 0,
|
29 |
"action_noise": null,
|
30 |
-
"start_time":
|
31 |
"learning_rate": {
|
32 |
":type:": "<class 'function'>",
|
33 |
":serialized:": "gAWVxQIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMSS9vcHQvY29uZGEvbGliL3B5dGhvbjMuMTAvc2l0ZS1wYWNrYWdlcy9zdGFibGVfYmFzZWxpbmVzMy9jb21tb24vdXRpbHMucHmUjARmdW5jlEuDQwIEAZSMA3ZhbJSFlCl0lFKUfZQojAtfX3BhY2thZ2VfX5SMGHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbpSMCF9fbmFtZV9flIwec3RhYmxlX2Jhc2VsaW5lczMuY29tbW9uLnV0aWxzlIwIX19maWxlX1+UjEkvb3B0L2NvbmRhL2xpYi9weXRob24zLjEwL3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lHVOTmgAjBBfbWFrZV9lbXB0eV9jZWxslJOUKVKUhZR0lFKUjBxjbG91ZHBpY2tsZS5jbG91ZHBpY2tsZV9mYXN0lIwSX2Z1bmN0aW9uX3NldHN0YXRllJOUaB99lH2UKGgWaA2MDF9fcXVhbG5hbWVfX5SMGWNvbnN0YW50X2ZuLjxsb2NhbHM+LmZ1bmOUjA9fX2Fubm90YXRpb25zX1+UfZSMDl9fa3dkZWZhdWx0c19flE6MDF9fZGVmYXVsdHNfX5ROjApfX21vZHVsZV9flGgXjAdfX2RvY19flE6MC19fY2xvc3VyZV9flGgAjApfbWFrZV9jZWxslJOURz9EpNKyv9tNhZRSlIWUjBdfY2xvdWRwaWNrbGVfc3VibW9kdWxlc5RdlIwLX19nbG9iYWxzX1+UfZR1hpSGUjAu"
|
@@ -40,16 +40,16 @@
|
|
40 |
},
|
41 |
"_last_original_obs": {
|
42 |
":type:": "<class 'numpy.ndarray'>",
|
43 |
-
":serialized:": "
|
44 |
},
|
45 |
-
"_episode_num":
|
46 |
"use_sde": false,
|
47 |
"sde_sample_freq": -1,
|
48 |
"_current_progress_remaining": 0.0,
|
49 |
"_stats_window_size": 100,
|
50 |
"ep_info_buffer": {
|
51 |
":type:": "<class 'collections.deque'>",
|
52 |
-
":serialized:": "
|
53 |
},
|
54 |
"ep_success_buffer": {
|
55 |
":type:": "<class 'collections.deque'>",
|
@@ -92,15 +92,14 @@
|
|
92 |
":type:": "<class 'abc.ABCMeta'>",
|
93 |
":serialized:": "gAWVNQAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwMUmVwbGF5QnVmZmVylJOULg==",
|
94 |
"__module__": "stable_baselines3.common.buffers",
|
95 |
-
"__annotations__": "{'observations': <class 'numpy.ndarray'>, 'next_observations': <class 'numpy.ndarray'>, 'actions': <class 'numpy.ndarray'>, 'rewards': <class 'numpy.ndarray'>, 'dones': <class 'numpy.ndarray'>, 'timeouts': <class 'numpy.ndarray'>}",
|
96 |
"__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
|
97 |
-
"__init__": "<function ReplayBuffer.__init__ at
|
98 |
-
"add": "<function ReplayBuffer.add at
|
99 |
-
"sample": "<function ReplayBuffer.sample at
|
100 |
-
"_get_samples": "<function ReplayBuffer._get_samples at
|
101 |
-
"_maybe_cast_dtype": "<staticmethod(<function ReplayBuffer._maybe_cast_dtype at
|
102 |
"__abstractmethods__": "frozenset()",
|
103 |
-
"_abc_impl": "<_abc._abc_data object at
|
104 |
},
|
105 |
"replay_buffer_kwargs": {},
|
106 |
"train_freq": {
|
|
|
5 |
"__module__": "stable_baselines3.dqn.policies",
|
6 |
"__annotations__": "{'q_net': <class 'stable_baselines3.dqn.policies.QNetwork'>, 'q_net_target': <class 'stable_baselines3.dqn.policies.QNetwork'>}",
|
7 |
"__doc__": "\n Policy class with Q-Value Net and target net for DQN\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
|
8 |
+
"__init__": "<function DQNPolicy.__init__ at 0x788a1bb46d40>",
|
9 |
+
"_build": "<function DQNPolicy._build at 0x788a1bb46dd0>",
|
10 |
+
"make_q_net": "<function DQNPolicy.make_q_net at 0x788a1bb46e60>",
|
11 |
+
"forward": "<function DQNPolicy.forward at 0x788a1bb46ef0>",
|
12 |
+
"_predict": "<function DQNPolicy._predict at 0x788a1bb46f80>",
|
13 |
+
"_get_constructor_parameters": "<function DQNPolicy._get_constructor_parameters at 0x788a1bb47010>",
|
14 |
+
"set_training_mode": "<function DQNPolicy.set_training_mode at 0x788a1bb470a0>",
|
15 |
"__abstractmethods__": "frozenset()",
|
16 |
+
"_abc_impl": "<_abc._abc_data object at 0x788a1bb5da40>"
|
17 |
},
|
18 |
"verbose": 1,
|
19 |
"policy_kwargs": {
|
|
|
27 |
"_num_timesteps_at_start": 0,
|
28 |
"seed": 0,
|
29 |
"action_noise": null,
|
30 |
+
"start_time": 1709533817958590946,
|
31 |
"learning_rate": {
|
32 |
":type:": "<class 'function'>",
|
33 |
":serialized:": "gAWVxQIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMSS9vcHQvY29uZGEvbGliL3B5dGhvbjMuMTAvc2l0ZS1wYWNrYWdlcy9zdGFibGVfYmFzZWxpbmVzMy9jb21tb24vdXRpbHMucHmUjARmdW5jlEuDQwIEAZSMA3ZhbJSFlCl0lFKUfZQojAtfX3BhY2thZ2VfX5SMGHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbpSMCF9fbmFtZV9flIwec3RhYmxlX2Jhc2VsaW5lczMuY29tbW9uLnV0aWxzlIwIX19maWxlX1+UjEkvb3B0L2NvbmRhL2xpYi9weXRob24zLjEwL3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lHVOTmgAjBBfbWFrZV9lbXB0eV9jZWxslJOUKVKUhZR0lFKUjBxjbG91ZHBpY2tsZS5jbG91ZHBpY2tsZV9mYXN0lIwSX2Z1bmN0aW9uX3NldHN0YXRllJOUaB99lH2UKGgWaA2MDF9fcXVhbG5hbWVfX5SMGWNvbnN0YW50X2ZuLjxsb2NhbHM+LmZ1bmOUjA9fX2Fubm90YXRpb25zX1+UfZSMDl9fa3dkZWZhdWx0c19flE6MDF9fZGVmYXVsdHNfX5ROjApfX21vZHVsZV9flGgXjAdfX2RvY19flE6MC19fY2xvc3VyZV9flGgAjApfbWFrZV9jZWxslJOURz9EpNKyv9tNhZRSlIWUjBdfY2xvdWRwaWNrbGVfc3VibW9kdWxlc5RdlIwLX19nbG9iYWxzX1+UfZR1hpSGUjAu"
|
|
|
40 |
},
|
41 |
"_last_original_obs": {
|
42 |
":type:": "<class 'numpy.ndarray'>",
|
43 |
+
":serialized:": "gAWVlQAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYgAAAAAAAAAGY52jzDiUK6UoKyN6t2iTEwc6M61WrPtgAAgD8AAIA/lIwFbnVtcHmUjAVkdHlwZZSTlIwCZjSUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksBSwiGlIwBQ5R0lFKULg=="
|
44 |
},
|
45 |
+
"_episode_num": 246,
|
46 |
"use_sde": false,
|
47 |
"sde_sample_freq": -1,
|
48 |
"_current_progress_remaining": 0.0,
|
49 |
"_stats_window_size": 100,
|
50 |
"ep_info_buffer": {
|
51 |
":type:": "<class 'collections.deque'>",
|
52 |
+
":serialized:": "gAWVCAwAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHQD41lxwQ17+MAWyUS9WMAXSUR0B2RoMVk+X7dX2UKGgGR0Bt/NOZb6gvaAdNgAJoCEdAdoQbiqABk3V9lChoBkdAQsM+zMRpUWgHS+RoCEdAdpsprULDynV9lChoBkdAQTKySmqHXWgHS5RoCEdAdqmvbXYlIHV9lChoBkdALRzrmhdt22gHS3JoCEdAdrUU+cH4XXV9lChoBkfABh9uP3i71GgHS3poCEdAdsEDZUT+N3V9lChoBkdAYh/B5X2du2gHTegDaAhHQHcp2kN4JNV1fZQoaAZHwC1WY8dPtUpoB0txaAhHQHc17KvFFUh1fZQoaAZHwAcAP3BYV7BoB0uAaAhHQHdCGy1NQCV1fZQoaAZHQHNNalDWsiloB011AmgIR0B3fqHh0hePdX2UKGgGR8Bw5kvnKW9laAdNAQFoCEdAd5jFzuF6A3V9lChoBkfAbf7yjHn2ZmgHS4poCEdAd6bkCmuTzXV9lChoBkdAO377CSA6MmgHS5BoCEdAd7VwKSgXdnV9lChoBkdAbyLO6/ZdwGgHTTcDaAhHQHgHW0qpcX51fZQoaAZHQDN0Wi1y/9JoB0uUaAhHQHgWK94/u9h1fZQoaAZHQDC6MKkVN6BoB0uBaAhHQHgjS7btZ3d1fZQoaAZHQDUfRnezlcRoB0t9aAhHQHgvXeenQ6Z1fZQoaAZHQGNDmQCCBf9oB03oA2gIR0B4kwh1Tzd2dX2UKGgGR0A6YieumrKeaAdLfWgIR0B4n4sNDtw8dX2UKGgGR0Bky/73wkPdaAdN6ANoCEdAeQ0cZ9/jKnV9lChoBkfAL0IVdonKGWgHS35oCEdAeR4h4dIXj3V9lChoBkdAclpBqbjLjmgHTQ0BaAhHQHk8eWWyC4B1fZQoaAZHQCOHWattALRoB0uGaAhHQHlLZqynk1d1fZQoaAZHQHDSfovBacJoB00UAWgIR0B5ayB06o2odX2UKGgGR0BwB8nkT6BRaAdNCQFoCEdAeYr6K+BYm3V9lChoBkfAJOYxL0z0pWgHS7JoCEdAeZ/PGhmGunV9lChoBkdAcfdjxCpm3GgHTTkCaAhHQHnos3hn8Kp1fZQoaAZHQHHOxfOUt7NoB029AWgIR0B6KLoV2zOYdX2UKGgGR0A5LbutwJgLaAdLamgIR0B6Nx/J/5LzdX2UKGgGR0BxZGICU5dXaAdNXgFoCEdAelwBCUornXV9lChoBkdAaFMBOpKjBWgHTegDaAhHQHrD+XJHRTl1fZQoaAZHQG+ikcKgIyFoB032AmgIR0B7FkQJ5VwQdX2UKGgGR0AsmNLlFMIvaAdLaWgIR0B7IL/hl18tdX2UKGgGR0BkdZYgaFVUaAdN6ANoCEdAe4jOmixmkHV9lChoBkfAQ6vPHDJlrmgHS2poCEdAe5Um+TNdJXV9lChoBkdAbnpLOiWVvGgHTckBaAhHQHvF5mEoOQR1fZQoaAZHwDSm17Y02tNoB0tSaAhHQHvOumixmkF1fZQoaAZHQGCNAkcCHRFoB03oA2gIR0B8TwmdAgPmdX2UKGgGR0A8z21lXiiqaAdLaWgIR0B8WayWzF/AdX2UKGgGR0BEkf1HvttzaAdLVmgIR0B8YkOx0MgEdX2UKGgGR0A7gijtXxOMaAdLgmgIR0B8b0fFJg9edX2UKGgGR0Bjfy2KEWZaaAdN6ANoCEdAfNiChvitJXV9lChoBkdAQ+meg+Qlr2gHS11oCEdAfOJGRmseXHV9lChoBkdAMsS1Z1V5r2gHS3loCEdAfO6iV0Lc9HV9lChoBkdAPwjqfOD8L2gHS2toCEdAfPvy9mHxjXV9lChoBkdARZRhc7hegWgHS2poCEdAfQiTZxrBTHV9lChoBkdAQoFtQ9A5aWgHS2toCEdAfRQUTtb9qHV9lChoBkdANvpuMuOCG2gHS3FoCEdAfSAMmnfl63V9lChoBkdAMq/BacI7eWgHS29oCEdAfSuby6MBIXV9lChoBkdAK22eQMhHLGgHS29oCEdAfTcaIeo1k3V9lChoBkfAQocIC2c8T2gHS09oCEdAfT8NdZ7ojnV9lChoBkdAYFTE/jbSJGgHTegDaAhHQH2oM7MgU111fZQoaAZHQEAeFFDv3JxoB0ttaAhHQH2zfkRzzVd1fZQoaAZHQD+E8xKxs2xoB0t/aAhHQH3A8HGCI1t1fZQoaAZHQGHKeHrQgLZoB03oA2gIR0B+KRVHWjGldX2UKGgGR0BEo73Gn4wiaAdLlGgIR0B+OIO+ZgG9dX2UKGgGR0BcfO5e7cwhaAdN6ANoCEdAfqD9XcQAdXV9lChoBkdAb3HVlwtJ4GgHTRMBaAhHQH6+iIP9UCJ1fZQoaAZHwFDCr2g3975oB0traAhHQH7JKMefZmJ1fZQoaAZHQF0qSHM2WIJoB03oA2gIR0B/NZD1GsmwdX2UKGgGR0BLyPf8/D+BaAdLaGgIR0B/P7f51vETdX2UKGgGR0BmL2pXIU8FaAdN6ANoCEdAf6p29tdiUnV9lChoBkfALunfMwDeTGgHS19oCEdAf7S8XN1QqXV9lChoBkfAVU0svqTr3WgHS3FoCEdAf8CiblRxcXV9lChoBkdAcXIbSqlxfmgHS65oCEdAf9ME3sHB13V9lChoBkfAKfqeTV2A5WgHS4FoCEdAf9/71ZkkKXV9lChoBkdAaB4ojOcDsGgHTdoBaAhHQIAJOPtD2J11fZQoaAZHQCPS39aUzKtoB0tgaAhHQIAOUuanaWZ1fZQoaAZHwEM6mICU5dZoB0t0aAhHQIAUf114gRt1fZQoaAZHQA7UdBBzFMtoB0uMaAhHQIAbnWBjFyd1fZQoaAZHQED2QPqcEvFoB0tsaAhHQIAhegezUqh1fZQoaAZHwA/F1KXfIjpoB0unaAhHQIAp30Cih391fZQoaAZHwFtdnqmj0thoB0tuaAhHQIAvzhFVktp1fZQoaAZHQEUeO2iL2pRoB0tnaAhHQIA1J5iVjZt1fZQoaAZHQEmri0fHPu5oB0t8aAhHQIA7i2phnap1fZQoaAZHwDitZ0Syt3hoB0uAaAhHQIBCntOVPep1fZQoaAZHwFhdvicXm/5oB0tyaAhHQIBIwEhaC+V1fZQoaAZHwGQpIAGSpzdoB00dAWgIR0CAV65cTrVwdX2UKGgGR8BRIG6ClJpWaAdLdGgIR0CAXYgPmPo3dX2UKGgGR0BiafQ8fV7QaAdN6ANoCEdAgJSNs3yZr3V9lChoBkfAS/Si22G7BmgHS3doCEdAgJsRCIDYAnV9lChoBkdAZK9+85CF9WgHTegDaAhHQIDQtutOmBR1fZQoaAZHwDXVGax5cC5oB0uLaAhHQIDX1pmEoOR1fZQoaAZHwCiyXfIjnmtoB0tKaAhHQIDbdLnLaEl1fZQoaAZHQGYWnh86V+toB03oA2gIR0CBEGreZXuFdX2UKGgGR0BkxohMajveaAdN6ANoCEdAgURSimEXcnV9lChoBkdAEPslLOAy22gHS5FoCEdAgUwCX6ZYxXV9lChoBkdAYHoPe54GEGgHTegDaAhHQIGBRMvh60J1fZQoaAZHQEP7JqZc9ntoB0uCaAhHQIGH9vMr3Cd1fZQoaAZHQC0PJA+pwS9oB0txaAhHQIGN9senyd51fZQoaAZHQGPsog3cYZVoB03oA2gIR0CBw+h5gPVedX2UKGgGR0BtDgFxGUfQaAdLwmgIR0CBzdNyo4uLdX2UKGgGR0Bed6DsdDIBaAdN6ANoCEdAggJKISDh+HV9lChoBkdAOSMasIVuaWgHS15oCEdAggbx8c+7lXV9lChoBkdAYiFNWU8mr2gHTegDaAhHQII7nAj6eoV1fZQoaAZHQGVrpN0vGqBoB03oA2gIR0CCcPPC2tuDdX2UKGgGR0BjW+fbsWweaAdN6ANoCEdAgqq40EX+EXV9lChoBkdAZqPlCCz1LGgHTegDaAhHQILgvMOf/WF1fZQoaAZHQGgpifYjB2xoB03oA2gIR0CDFuu01IiDdX2UKGgGR0BxVSMGX5WSaAdNhQFoCEdAgyx2+PBBRnVlLg=="
|
53 |
},
|
54 |
"ep_success_buffer": {
|
55 |
":type:": "<class 'collections.deque'>",
|
|
|
92 |
":type:": "<class 'abc.ABCMeta'>",
|
93 |
":serialized:": "gAWVNQAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwMUmVwbGF5QnVmZmVylJOULg==",
|
94 |
"__module__": "stable_baselines3.common.buffers",
|
|
|
95 |
"__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
|
96 |
+
"__init__": "<function ReplayBuffer.__init__ at 0x788a1bb3f2e0>",
|
97 |
+
"add": "<function ReplayBuffer.add at 0x788a1bb3f370>",
|
98 |
+
"sample": "<function ReplayBuffer.sample at 0x788a1bb3f400>",
|
99 |
+
"_get_samples": "<function ReplayBuffer._get_samples at 0x788a1bb3f490>",
|
100 |
+
"_maybe_cast_dtype": "<staticmethod(<function ReplayBuffer._maybe_cast_dtype at 0x788a1bb3f520>)>",
|
101 |
"__abstractmethods__": "frozenset()",
|
102 |
+
"_abc_impl": "<_abc._abc_data object at 0x7889f0758e40>"
|
103 |
},
|
104 |
"replay_buffer_kwargs": {},
|
105 |
"train_freq": {
|
dqn-LunarLander-v2/policy.optimizer.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 558240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf4230806be65a290aa57b69ac011a0708e96ce191ee415fadc962bd13d89f9d
|
3 |
size 558240
|
dqn-LunarLander-v2/policy.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557362
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65cc6087098a2d089cd52f69c9809570511d1401bbd8fc7a91ef5e8f4ea3ab47
|
3 |
size 557362
|
dqn-LunarLander-v2/system_info.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
- OS: Linux-5.15.133+-x86_64-with-glibc2.31 # 1 SMP Tue Dec 19 13:14:11 UTC 2023
|
2 |
- Python: 3.10.13
|
3 |
-
- Stable-Baselines3: 2.
|
4 |
- PyTorch: 2.1.2+cpu
|
5 |
- GPU Enabled: False
|
6 |
- Numpy: 1.26.4
|
|
|
1 |
- OS: Linux-5.15.133+-x86_64-with-glibc2.31 # 1 SMP Tue Dec 19 13:14:11 UTC 2023
|
2 |
- Python: 3.10.13
|
3 |
+
- Stable-Baselines3: 2.1.0
|
4 |
- PyTorch: 2.1.2+cpu
|
5 |
- GPU Enabled: False
|
6 |
- Numpy: 1.26.4
|
replay.mp4
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ff849d502541544eca96ce26d9fee4e170d2054274ef02e9f12e50bcbadee21
|
3 |
+
size 157380
|
results.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"mean_reward":
|
|
|
1 |
+
{"mean_reward": 144.49050929999999, "std_reward": 16.122904644961487, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2024-03-04T06:59:27.458864"}
|
train_eval_metrics.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57ecbdd22f3478d66d8984a789392627b7629bffdb979b149e52bd0c61f2ef38
|
3 |
+
size 7889
|