nsanghi commited on
Commit
6279d6f
1 Parent(s): e503e2a

Initial commit

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ model-index:
16
  type: LunarLander-v2
17
  metrics:
18
  - type: mean_reward
19
- value: -22.33 +/- 212.36
20
  name: mean_reward
21
  verified: false
22
  ---
 
16
  type: LunarLander-v2
17
  metrics:
18
  - type: mean_reward
19
+ value: 144.49 +/- 16.12
20
  name: mean_reward
21
  verified: false
22
  ---
args.yml CHANGED
@@ -9,8 +9,6 @@
9
  - LunarLander-v2
10
  - - env_kwargs
11
  - null
12
- - - eval_env_kwargs
13
- - null
14
  - - eval_episodes
15
  - 5
16
  - - eval_freq
@@ -56,7 +54,7 @@
56
  - - save_replay_buffer
57
  - false
58
  - - seed
59
- - 3780163126
60
  - - storage
61
  - null
62
  - - study_name
 
9
  - LunarLander-v2
10
  - - env_kwargs
11
  - null
 
 
12
  - - eval_episodes
13
  - 5
14
  - - eval_freq
 
54
  - - save_replay_buffer
55
  - false
56
  - - seed
57
+ - 2913073355
58
  - - storage
59
  - null
60
  - - study_name
dqn-LunarLander-v2.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fbf33f0ea50b5be5a3c6f7821b1c69f09c54778d9fa10ca0357d98807e09f2c
3
- size 1133611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56f66be88551f986692b6c59836ded4828716a2d9952a5ef680bd90329772091
3
+ size 1133306
dqn-LunarLander-v2/_stable_baselines3_version CHANGED
@@ -1 +1 @@
1
- 2.3.0a2
 
1
+ 2.1.0
dqn-LunarLander-v2/data CHANGED
@@ -5,15 +5,15 @@
5
  "__module__": "stable_baselines3.dqn.policies",
6
  "__annotations__": "{'q_net': <class 'stable_baselines3.dqn.policies.QNetwork'>, 'q_net_target': <class 'stable_baselines3.dqn.policies.QNetwork'>}",
7
  "__doc__": "\n Policy class with Q-Value Net and target net for DQN\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
8
- "__init__": "<function DQNPolicy.__init__ at 0x7b5031033010>",
9
- "_build": "<function DQNPolicy._build at 0x7b50310330a0>",
10
- "make_q_net": "<function DQNPolicy.make_q_net at 0x7b5031033130>",
11
- "forward": "<function DQNPolicy.forward at 0x7b50310331c0>",
12
- "_predict": "<function DQNPolicy._predict at 0x7b5031033250>",
13
- "_get_constructor_parameters": "<function DQNPolicy._get_constructor_parameters at 0x7b50310332e0>",
14
- "set_training_mode": "<function DQNPolicy.set_training_mode at 0x7b5031033370>",
15
  "__abstractmethods__": "frozenset()",
16
- "_abc_impl": "<_abc._abc_data object at 0x7b5031201b00>"
17
  },
18
  "verbose": 1,
19
  "policy_kwargs": {
@@ -27,7 +27,7 @@
27
  "_num_timesteps_at_start": 0,
28
  "seed": 0,
29
  "action_noise": null,
30
- "start_time": 1709528506095300901,
31
  "learning_rate": {
32
  ":type:": "<class 'function'>",
33
  ":serialized:": "gAWVxQIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMSS9vcHQvY29uZGEvbGliL3B5dGhvbjMuMTAvc2l0ZS1wYWNrYWdlcy9zdGFibGVfYmFzZWxpbmVzMy9jb21tb24vdXRpbHMucHmUjARmdW5jlEuDQwIEAZSMA3ZhbJSFlCl0lFKUfZQojAtfX3BhY2thZ2VfX5SMGHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbpSMCF9fbmFtZV9flIwec3RhYmxlX2Jhc2VsaW5lczMuY29tbW9uLnV0aWxzlIwIX19maWxlX1+UjEkvb3B0L2NvbmRhL2xpYi9weXRob24zLjEwL3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lHVOTmgAjBBfbWFrZV9lbXB0eV9jZWxslJOUKVKUhZR0lFKUjBxjbG91ZHBpY2tsZS5jbG91ZHBpY2tsZV9mYXN0lIwSX2Z1bmN0aW9uX3NldHN0YXRllJOUaB99lH2UKGgWaA2MDF9fcXVhbG5hbWVfX5SMGWNvbnN0YW50X2ZuLjxsb2NhbHM+LmZ1bmOUjA9fX2Fubm90YXRpb25zX1+UfZSMDl9fa3dkZWZhdWx0c19flE6MDF9fZGVmYXVsdHNfX5ROjApfX21vZHVsZV9flGgXjAdfX2RvY19flE6MC19fY2xvc3VyZV9flGgAjApfbWFrZV9jZWxslJOURz9EpNKyv9tNhZRSlIWUjBdfY2xvdWRwaWNrbGVfc3VibW9kdWxlc5RdlIwLX19nbG9iYWxzX1+UfZR1hpSGUjAu"
@@ -40,16 +40,16 @@
40
  },
41
  "_last_original_obs": {
42
  ":type:": "<class 'numpy.ndarray'>",
43
- ":serialized:": "gAWVlQAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYgAAAAAAAAAIBFAr4qvrE/Gkgsv5A7lb4fInY9l5uBPQAAAAAAAAAAlIwFbnVtcHmUjAVkdHlwZZSTlIwCZjSUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksBSwiGlIwBQ5R0lFKULg=="
44
  },
45
- "_episode_num": 192,
46
  "use_sde": false,
47
  "sde_sample_freq": -1,
48
  "_current_progress_remaining": 0.0,
49
  "_stats_window_size": 100,
50
  "ep_info_buffer": {
51
  ":type:": "<class 'collections.deque'>",
52
- ":serialized:": "gAWVKAwAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHQClAY3vQWveMAWyUS4uMAXSUR0Bumt6mfoRqdX2UKGgGR0Bb3k1uR9w4aAdN6ANoCEdAb2uU1yeZonV9lChoBkdAcREPRzBAOmgHTeoBaAhHQG/P5NGmUGF1fZQoaAZHQHBvG7J4jbBoB00OA2gIR0BwNpfJFLFodX2UKGgGR0Bqe58pkPMCaAdNXQFoCEdAcFvp++dsi3V9lChoBkdAcFGr0aqCH2gHTWwBaAhHQHCBAHmig011fZQoaAZHQGmjK3uuzQhoB02HA2gIR0Bw2ZZNfw7UdX2UKGgGR0BwxV1ZDArQaAdNpwFoCEdAcQQnFYMfBHV9lChoBkfASARtDUmUn2gHS7NoCEdAcRTmdAgPmXV9lChoBkfAMeVsUIsyz2gHS4xoCEdAcSOxgy/KyXV9lChoBkdAUGM5PuXu3WgHTegDaAhHQHGRRCQcPvt1fZQoaAZHQHABr8Jlar5oB02VAmgIR0Bx1MqWkadddX2UKGgGR0ATWWNWEK3NaAdN6ANoCEdAcj0Ifr8iwHV9lChoBkdAcVYDqGDcumgHS8doCEdAclLy2x6fJ3V9lChoBkdASYEafjCHh2gHTegDaAhHQHK+09lmOEN1fZQoaAZHQFXVT6i0v5BoB03oA2gIR0BzJ+qgh8pkdX2UKGgGR0AwUP9UCJXRaAdN6ANoCEdAc5iQ9A5aNnV9lChoBkdALuMcp9ZzP2gHS7loCEdAc62/hl18s3V9lChoBkfAKSaN2ki2UmgHTegDaAhHQHQX930PH1h1fZQoaAZHQGE5lVcUuctoB03oA2gIR0B0mB5TqB3BdX2UKGgGR8A5fLDQ7cO9aAdN6ANoCEdAdPkD7qIJq3V9lChoBkdAYhRD/EOy3WgHTegDaAhHQHVlyYLLIPt1fZQoaAZHwDCjGGVRk3FoB03oA2gIR0B10+Rhc7hfdX2UKGgGR0BYGfJvHcUNaAdN6ANoCEdAdj3DnNgSe3V9lChoBkdAVRpYcNpdr2gHTegDaAhHQHawN/BnBcl1fZQoaAZHwEL3kxREWqNoB03oA2gIR0B3HizWwu/UdX2UKGgGR0AxGalk6LflaAdN6ANoCEdAd5GLE1l5GHV9lChoBkdAYR3AO8TSLWgHTegDaAhHQHgCd7a7EpB1fZQoaAZHQFBRwUg0TDhoB03oA2gIR0B4czfaYeDGdX2UKGgGR0BYdoUrTYukaAdN6ANoCEdAeOi+ee4Cp3V9lChoBkdAaLeaS9ugpWgHTfcBaAhHQHklF67dzn11fZQoaAZHQF6l6Mzdk8RoB03oA2gIR0B5nf779AHFdX2UKGgGR7/zGp6yB06paAdN6ANoCEdAegLCxNZeRnV9lChoBkdAbg0SxJNCaGgHTTsBaAhHQHojEl/pdKN1fZQoaAZHQHGAVEmY0EZoB00ZAWgIR0B6QiuPmxMWdX2UKGgGR0A4U7wazeGgaAdLgWgIR0B6T6JqIrOJdX2UKGgGR0Bux3JkoWpIaAdN5QJoCEdAeqOOPNmlInV9lChoBkdANPQZKnNxEWgHS3BoCEdAeq+SiM5wO3V9lChoBkdAcZXgwXZXdWgHS/doCEdAesvUM5OrQ3V9lChoBkdAYBAnG8274GgHTegDaAhHQHs+dmg8KXx1fZQoaAZHwCMfgLqlgtxoB03oA2gIR0B7rEWKuSwGdX2UKGgGR0BtNGNHYpUhaAdN4gFoCEdAe9+OJ+DvmnV9lChoBkdAce/trKvFFWgHTZgCaAhHQHwqWHUMG5d1fZQoaAZHQEWpcIJJGvxoB0u1aAhHQHw+xTGYKIB1fZQoaAZHQDhL9n9NvfloB0t+aAhHQHxNX/xUedV1fZQoaAZHQEQlpN9H+ZRoB0tVaAhHQHxW2o3rD651fZQoaAZHQCHsoc7yQPtoB03oA2gIR0B8wdHz6JqJdX2UKGgGR8B4x4XenAIqaAdNBQFoCEdAfNxTOxB3R3V9lChoBkdAcGC5KvmozmgHS9JoCEdAfPUD4QBgeHV9lChoBkdAZDB8neBQN2gHTegDaAhHQH1ssXWOIZZ1fZQoaAZHQCitOqNp/PRoB03oA2gIR0B93b3Zf2K3dX2UKGgGR0Bs3zvb48EFaAdNKQNoCEdAfjgkIHC40HV9lChoBkdAbYoj3225QWgHTRYCaAhHQH502gi/wiJ1fZQoaAZHQGsOpSzgMttoB00JA2gIR0B+0GWnjyWidX2UKGgGR0BvpdeQdS2qaAdNaQFoCEdAfvxon8baRXV9lChoBkdAZSA+s5n14GgHTegDaAhHQH+tFMqSX+l1fZQoaAZHQGet++M6zVtoB03oA2gIR0CAESQzUI9ldX2UKGgGR0BxfjDKoybhaAdN3gNoCEdAgEqQN0/4ZnV9lChoBkdAYfmd+XqqwWgHTegDaAhHQICE4nH/9511fZQoaAZHQHBVJ0wJw85oB03cA2gIR0CAwzaFEiMYdX2UKGgGR0BivCe05U97aAdN6ANoCEdAgP2eZXuE3HV9lChoBkfAOzUQPI4lyGgHTSgBaAhHQIEPrAUL2Ht1fZQoaAZHQECMbo8p1A9oB0t9aAhHQIEW9FlTWG11fZQoaAZHQGxbS2QXAM5oB016A2gIR0CBTAfzSThYdX2UKGgGR0BwGUprk8zRaAdNGQFoCEdAgVwL/sE7n3V9lChoBkdAaGA052hZhmgHTXUBaAhHQIFxfDxb0OF1fZQoaAZHQGD7FSsKb8ZoB03oA2gIR0CBsaVW0Z3tdX2UKGgGR0BtO6aCtihGaAdNXwNoCEdAgeNXz19ORHV9lChoBkdAQ+y3Td+G5GgHS4hoCEdAgetsNtqHoHV9lChoBkdAYJY+yJKraWgHTegDaAhHQIIoEXHim2t1fZQoaAZHQGJ6DLB9Cu5oB03oA2gIR0CCX2WZZ0SzdX2UKGgGR8ALWHi3ocJdaAdLbWgIR0CCZTYe1a4ddX2UKGgGR0BG99FnZkCnaAdLcGgIR0CCaytp22XtdX2UKGgGR0By/bhfjS5RaAdNCwFoCEdAgnnj15B1LnV9lChoBkfAE0LuhK15SmgHS4BoCEdAgoF8tXgccXV9lChoBkdAQP4U5+6RQ2gHS4doCEdAgolbA+IM0HV9lChoBkdAb6uFtbcGkmgHTa4BaAhHQIKiivHLidd1fZQoaAZHQHDhyGN70FtoB00QA2gIR0CCzekAPuohdX2UKGgGR0BdO7XYlIEsaAdN6ANoCEdAgwYtnPE873V9lChoBkdAH7Q9zOoo/mgHS2loCEdAgwx2SU1Q7HV9lChoBkdANnTKT0QK8mgHS35oCEdAgxN+j/MnqnV9lChoBkfASX8FEAo5P2gHS3hoCEdAgxph5ooNNXV9lChoBkdATQHH5rP+oGgHS31oCEdAgyC9Qfp2U3V9lChoBkdAcQSoLXtjTmgHTSECaAhHQIM/WhZha1V1fZQoaAZHQGGnQAMlTm5oB03oA2gIR0CDd7HkLhJidX2UKGgGR0BobatozvZzaAdN6ANoCEdAg7M5Ec81XXV9lChoBkdAMtFv/BFd9mgHS7xoCEdAg71RyXD3unV9lChoBkdAcMWVs1sLv2gHTfkBaAhHQIPbszl90A91fZQoaAZHQDSquyNXHR1oB0uKaAhHQIPjeHgxagV1fZQoaAZHQGMP83l0YCRoB03oA2gIR0CEHZkz41xbdX2UKGgGR0A4ul0HQhOhaAdLYWgIR0CEI5XdTHbRdX2UKGgGR0BBRWa+evpyaAdLhWgIR0CEK2KdhAnldX2UKGgGRz/1u8scyWRjaAdLqGgIR0CENa3CsOoYdX2UKGgGR0A9VlyBClabaAdLdGgIR0CEPCdsi0OWdX2UKGgGR0Bm4sipvP1MaAdN6ANoCEdAhHZ1z6rNn3V9lChoBkdAcSm8D0UXYWgHTZMDaAhHQISuN+/gzgx1fZQoaAZHQGlpsmF8G9poB02cA2gIR0CE4lcyFfzCdX2UKGgGR0BwxGJ79hqkaAdNWAFoCEdAhPZJ17pmmXV9lChoBkdAYqoWnCO3lWgHTegDaAhHQIUuuvMbFS91fZQoaAZHQGF17JGOMl1oB03oA2gIR0CFZvpt78ekdWUu"
53
  },
54
  "ep_success_buffer": {
55
  ":type:": "<class 'collections.deque'>",
@@ -92,15 +92,14 @@
92
  ":type:": "<class 'abc.ABCMeta'>",
93
  ":serialized:": "gAWVNQAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwMUmVwbGF5QnVmZmVylJOULg==",
94
  "__module__": "stable_baselines3.common.buffers",
95
- "__annotations__": "{'observations': <class 'numpy.ndarray'>, 'next_observations': <class 'numpy.ndarray'>, 'actions': <class 'numpy.ndarray'>, 'rewards': <class 'numpy.ndarray'>, 'dones': <class 'numpy.ndarray'>, 'timeouts': <class 'numpy.ndarray'>}",
96
  "__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
97
- "__init__": "<function ReplayBuffer.__init__ at 0x7b5031184820>",
98
- "add": "<function ReplayBuffer.add at 0x7b50311848b0>",
99
- "sample": "<function ReplayBuffer.sample at 0x7b5031184940>",
100
- "_get_samples": "<function ReplayBuffer._get_samples at 0x7b50311849d0>",
101
- "_maybe_cast_dtype": "<staticmethod(<function ReplayBuffer._maybe_cast_dtype at 0x7b5031184a60>)>",
102
  "__abstractmethods__": "frozenset()",
103
- "_abc_impl": "<_abc._abc_data object at 0x7b50311108c0>"
104
  },
105
  "replay_buffer_kwargs": {},
106
  "train_freq": {
 
5
  "__module__": "stable_baselines3.dqn.policies",
6
  "__annotations__": "{'q_net': <class 'stable_baselines3.dqn.policies.QNetwork'>, 'q_net_target': <class 'stable_baselines3.dqn.policies.QNetwork'>}",
7
  "__doc__": "\n Policy class with Q-Value Net and target net for DQN\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
8
+ "__init__": "<function DQNPolicy.__init__ at 0x788a1bb46d40>",
9
+ "_build": "<function DQNPolicy._build at 0x788a1bb46dd0>",
10
+ "make_q_net": "<function DQNPolicy.make_q_net at 0x788a1bb46e60>",
11
+ "forward": "<function DQNPolicy.forward at 0x788a1bb46ef0>",
12
+ "_predict": "<function DQNPolicy._predict at 0x788a1bb46f80>",
13
+ "_get_constructor_parameters": "<function DQNPolicy._get_constructor_parameters at 0x788a1bb47010>",
14
+ "set_training_mode": "<function DQNPolicy.set_training_mode at 0x788a1bb470a0>",
15
  "__abstractmethods__": "frozenset()",
16
+ "_abc_impl": "<_abc._abc_data object at 0x788a1bb5da40>"
17
  },
18
  "verbose": 1,
19
  "policy_kwargs": {
 
27
  "_num_timesteps_at_start": 0,
28
  "seed": 0,
29
  "action_noise": null,
30
+ "start_time": 1709533817958590946,
31
  "learning_rate": {
32
  ":type:": "<class 'function'>",
33
  ":serialized:": "gAWVxQIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMSS9vcHQvY29uZGEvbGliL3B5dGhvbjMuMTAvc2l0ZS1wYWNrYWdlcy9zdGFibGVfYmFzZWxpbmVzMy9jb21tb24vdXRpbHMucHmUjARmdW5jlEuDQwIEAZSMA3ZhbJSFlCl0lFKUfZQojAtfX3BhY2thZ2VfX5SMGHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbpSMCF9fbmFtZV9flIwec3RhYmxlX2Jhc2VsaW5lczMuY29tbW9uLnV0aWxzlIwIX19maWxlX1+UjEkvb3B0L2NvbmRhL2xpYi9weXRob24zLjEwL3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lHVOTmgAjBBfbWFrZV9lbXB0eV9jZWxslJOUKVKUhZR0lFKUjBxjbG91ZHBpY2tsZS5jbG91ZHBpY2tsZV9mYXN0lIwSX2Z1bmN0aW9uX3NldHN0YXRllJOUaB99lH2UKGgWaA2MDF9fcXVhbG5hbWVfX5SMGWNvbnN0YW50X2ZuLjxsb2NhbHM+LmZ1bmOUjA9fX2Fubm90YXRpb25zX1+UfZSMDl9fa3dkZWZhdWx0c19flE6MDF9fZGVmYXVsdHNfX5ROjApfX21vZHVsZV9flGgXjAdfX2RvY19flE6MC19fY2xvc3VyZV9flGgAjApfbWFrZV9jZWxslJOURz9EpNKyv9tNhZRSlIWUjBdfY2xvdWRwaWNrbGVfc3VibW9kdWxlc5RdlIwLX19nbG9iYWxzX1+UfZR1hpSGUjAu"
 
40
  },
41
  "_last_original_obs": {
42
  ":type:": "<class 'numpy.ndarray'>",
43
+ ":serialized:": "gAWVlQAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYgAAAAAAAAAGY52jzDiUK6UoKyN6t2iTEwc6M61WrPtgAAgD8AAIA/lIwFbnVtcHmUjAVkdHlwZZSTlIwCZjSUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksBSwiGlIwBQ5R0lFKULg=="
44
  },
45
+ "_episode_num": 246,
46
  "use_sde": false,
47
  "sde_sample_freq": -1,
48
  "_current_progress_remaining": 0.0,
49
  "_stats_window_size": 100,
50
  "ep_info_buffer": {
51
  ":type:": "<class 'collections.deque'>",
52
+ ":serialized:": "gAWVCAwAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHQD41lxwQ17+MAWyUS9WMAXSUR0B2RoMVk+X7dX2UKGgGR0Bt/NOZb6gvaAdNgAJoCEdAdoQbiqABk3V9lChoBkdAQsM+zMRpUWgHS+RoCEdAdpsprULDynV9lChoBkdAQTKySmqHXWgHS5RoCEdAdqmvbXYlIHV9lChoBkdALRzrmhdt22gHS3JoCEdAdrUU+cH4XXV9lChoBkfABh9uP3i71GgHS3poCEdAdsEDZUT+N3V9lChoBkdAYh/B5X2du2gHTegDaAhHQHcp2kN4JNV1fZQoaAZHwC1WY8dPtUpoB0txaAhHQHc17KvFFUh1fZQoaAZHwAcAP3BYV7BoB0uAaAhHQHdCGy1NQCV1fZQoaAZHQHNNalDWsiloB011AmgIR0B3fqHh0hePdX2UKGgGR8Bw5kvnKW9laAdNAQFoCEdAd5jFzuF6A3V9lChoBkfAbf7yjHn2ZmgHS4poCEdAd6bkCmuTzXV9lChoBkdAO377CSA6MmgHS5BoCEdAd7VwKSgXdnV9lChoBkdAbyLO6/ZdwGgHTTcDaAhHQHgHW0qpcX51fZQoaAZHQDN0Wi1y/9JoB0uUaAhHQHgWK94/u9h1fZQoaAZHQDC6MKkVN6BoB0uBaAhHQHgjS7btZ3d1fZQoaAZHQDUfRnezlcRoB0t9aAhHQHgvXeenQ6Z1fZQoaAZHQGNDmQCCBf9oB03oA2gIR0B4kwh1Tzd2dX2UKGgGR0A6YieumrKeaAdLfWgIR0B4n4sNDtw8dX2UKGgGR0Bky/73wkPdaAdN6ANoCEdAeQ0cZ9/jKnV9lChoBkfAL0IVdonKGWgHS35oCEdAeR4h4dIXj3V9lChoBkdAclpBqbjLjmgHTQ0BaAhHQHk8eWWyC4B1fZQoaAZHQCOHWattALRoB0uGaAhHQHlLZqynk1d1fZQoaAZHQHDSfovBacJoB00UAWgIR0B5ayB06o2odX2UKGgGR0BwB8nkT6BRaAdNCQFoCEdAeYr6K+BYm3V9lChoBkfAJOYxL0z0pWgHS7JoCEdAeZ/PGhmGunV9lChoBkdAcfdjxCpm3GgHTTkCaAhHQHnos3hn8Kp1fZQoaAZHQHHOxfOUt7NoB029AWgIR0B6KLoV2zOYdX2UKGgGR0A5LbutwJgLaAdLamgIR0B6Nx/J/5LzdX2UKGgGR0BxZGICU5dXaAdNXgFoCEdAelwBCUornXV9lChoBkdAaFMBOpKjBWgHTegDaAhHQHrD+XJHRTl1fZQoaAZHQG+ikcKgIyFoB032AmgIR0B7FkQJ5VwQdX2UKGgGR0AsmNLlFMIvaAdLaWgIR0B7IL/hl18tdX2UKGgGR0BkdZYgaFVUaAdN6ANoCEdAe4jOmixmkHV9lChoBkfAQ6vPHDJlrmgHS2poCEdAe5Um+TNdJXV9lChoBkdAbnpLOiWVvGgHTckBaAhHQHvF5mEoOQR1fZQoaAZHwDSm17Y02tNoB0tSaAhHQHvOumixmkF1fZQoaAZHQGCNAkcCHRFoB03oA2gIR0B8TwmdAgPmdX2UKGgGR0A8z21lXiiqaAdLaWgIR0B8WayWzF/AdX2UKGgGR0BEkf1HvttzaAdLVmgIR0B8YkOx0MgEdX2UKGgGR0A7gijtXxOMaAdLgmgIR0B8b0fFJg9edX2UKGgGR0Bjfy2KEWZaaAdN6ANoCEdAfNiChvitJXV9lChoBkdAQ+meg+Qlr2gHS11oCEdAfOJGRmseXHV9lChoBkdAMsS1Z1V5r2gHS3loCEdAfO6iV0Lc9HV9lChoBkdAPwjqfOD8L2gHS2toCEdAfPvy9mHxjXV9lChoBkdARZRhc7hegWgHS2poCEdAfQiTZxrBTHV9lChoBkdAQoFtQ9A5aWgHS2toCEdAfRQUTtb9qHV9lChoBkdANvpuMuOCG2gHS3FoCEdAfSAMmnfl63V9lChoBkdAMq/BacI7eWgHS29oCEdAfSuby6MBIXV9lChoBkdAK22eQMhHLGgHS29oCEdAfTcaIeo1k3V9lChoBkfAQocIC2c8T2gHS09oCEdAfT8NdZ7ojnV9lChoBkdAYFTE/jbSJGgHTegDaAhHQH2oM7MgU111fZQoaAZHQEAeFFDv3JxoB0ttaAhHQH2zfkRzzVd1fZQoaAZHQD+E8xKxs2xoB0t/aAhHQH3A8HGCI1t1fZQoaAZHQGHKeHrQgLZoB03oA2gIR0B+KRVHWjGldX2UKGgGR0BEo73Gn4wiaAdLlGgIR0B+OIO+ZgG9dX2UKGgGR0BcfO5e7cwhaAdN6ANoCEdAfqD9XcQAdXV9lChoBkdAb3HVlwtJ4GgHTRMBaAhHQH6+iIP9UCJ1fZQoaAZHwFDCr2g3975oB0traAhHQH7JKMefZmJ1fZQoaAZHQF0qSHM2WIJoB03oA2gIR0B/NZD1GsmwdX2UKGgGR0BLyPf8/D+BaAdLaGgIR0B/P7f51vETdX2UKGgGR0BmL2pXIU8FaAdN6ANoCEdAf6p29tdiUnV9lChoBkfALunfMwDeTGgHS19oCEdAf7S8XN1QqXV9lChoBkfAVU0svqTr3WgHS3FoCEdAf8CiblRxcXV9lChoBkdAcXIbSqlxfmgHS65oCEdAf9ME3sHB13V9lChoBkfAKfqeTV2A5WgHS4FoCEdAf9/71ZkkKXV9lChoBkdAaB4ojOcDsGgHTdoBaAhHQIAJOPtD2J11fZQoaAZHQCPS39aUzKtoB0tgaAhHQIAOUuanaWZ1fZQoaAZHwEM6mICU5dZoB0t0aAhHQIAUf114gRt1fZQoaAZHQA7UdBBzFMtoB0uMaAhHQIAbnWBjFyd1fZQoaAZHQED2QPqcEvFoB0tsaAhHQIAhegezUqh1fZQoaAZHwA/F1KXfIjpoB0unaAhHQIAp30Cih391fZQoaAZHwFtdnqmj0thoB0tuaAhHQIAvzhFVktp1fZQoaAZHQEUeO2iL2pRoB0tnaAhHQIA1J5iVjZt1fZQoaAZHQEmri0fHPu5oB0t8aAhHQIA7i2phnap1fZQoaAZHwDitZ0Syt3hoB0uAaAhHQIBCntOVPep1fZQoaAZHwFhdvicXm/5oB0tyaAhHQIBIwEhaC+V1fZQoaAZHwGQpIAGSpzdoB00dAWgIR0CAV65cTrVwdX2UKGgGR8BRIG6ClJpWaAdLdGgIR0CAXYgPmPo3dX2UKGgGR0BiafQ8fV7QaAdN6ANoCEdAgJSNs3yZr3V9lChoBkfAS/Si22G7BmgHS3doCEdAgJsRCIDYAnV9lChoBkdAZK9+85CF9WgHTegDaAhHQIDQtutOmBR1fZQoaAZHwDXVGax5cC5oB0uLaAhHQIDX1pmEoOR1fZQoaAZHwCiyXfIjnmtoB0tKaAhHQIDbdLnLaEl1fZQoaAZHQGYWnh86V+toB03oA2gIR0CBEGreZXuFdX2UKGgGR0BkxohMajveaAdN6ANoCEdAgURSimEXcnV9lChoBkdAEPslLOAy22gHS5FoCEdAgUwCX6ZYxXV9lChoBkdAYHoPe54GEGgHTegDaAhHQIGBRMvh60J1fZQoaAZHQEP7JqZc9ntoB0uCaAhHQIGH9vMr3Cd1fZQoaAZHQC0PJA+pwS9oB0txaAhHQIGN9senyd51fZQoaAZHQGPsog3cYZVoB03oA2gIR0CBw+h5gPVedX2UKGgGR0BtDgFxGUfQaAdLwmgIR0CBzdNyo4uLdX2UKGgGR0Bed6DsdDIBaAdN6ANoCEdAggJKISDh+HV9lChoBkdAOSMasIVuaWgHS15oCEdAggbx8c+7lXV9lChoBkdAYiFNWU8mr2gHTegDaAhHQII7nAj6eoV1fZQoaAZHQGVrpN0vGqBoB03oA2gIR0CCcPPC2tuDdX2UKGgGR0BjW+fbsWweaAdN6ANoCEdAgqq40EX+EXV9lChoBkdAZqPlCCz1LGgHTegDaAhHQILgvMOf/WF1fZQoaAZHQGgpifYjB2xoB03oA2gIR0CDFuu01IiDdX2UKGgGR0BxVSMGX5WSaAdNhQFoCEdAgyx2+PBBRnVlLg=="
53
  },
54
  "ep_success_buffer": {
55
  ":type:": "<class 'collections.deque'>",
 
92
  ":type:": "<class 'abc.ABCMeta'>",
93
  ":serialized:": "gAWVNQAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwMUmVwbGF5QnVmZmVylJOULg==",
94
  "__module__": "stable_baselines3.common.buffers",
 
95
  "__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
96
+ "__init__": "<function ReplayBuffer.__init__ at 0x788a1bb3f2e0>",
97
+ "add": "<function ReplayBuffer.add at 0x788a1bb3f370>",
98
+ "sample": "<function ReplayBuffer.sample at 0x788a1bb3f400>",
99
+ "_get_samples": "<function ReplayBuffer._get_samples at 0x788a1bb3f490>",
100
+ "_maybe_cast_dtype": "<staticmethod(<function ReplayBuffer._maybe_cast_dtype at 0x788a1bb3f520>)>",
101
  "__abstractmethods__": "frozenset()",
102
+ "_abc_impl": "<_abc._abc_data object at 0x7889f0758e40>"
103
  },
104
  "replay_buffer_kwargs": {},
105
  "train_freq": {
dqn-LunarLander-v2/policy.optimizer.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c7ba741ae5322af53138d3a4f55f0663c0ac508903de1f5bb19039b4b1c88b5
3
  size 558240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf4230806be65a290aa57b69ac011a0708e96ce191ee415fadc962bd13d89f9d
3
  size 558240
dqn-LunarLander-v2/policy.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c1fefe666115b44f038cd18401e6e75fcd8f68cbce019bd2a4b1e027b07fffd
3
  size 557362
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65cc6087098a2d089cd52f69c9809570511d1401bbd8fc7a91ef5e8f4ea3ab47
3
  size 557362
dqn-LunarLander-v2/system_info.txt CHANGED
@@ -1,6 +1,6 @@
1
  - OS: Linux-5.15.133+-x86_64-with-glibc2.31 # 1 SMP Tue Dec 19 13:14:11 UTC 2023
2
  - Python: 3.10.13
3
- - Stable-Baselines3: 2.3.0a2
4
  - PyTorch: 2.1.2+cpu
5
  - GPU Enabled: False
6
  - Numpy: 1.26.4
 
1
  - OS: Linux-5.15.133+-x86_64-with-glibc2.31 # 1 SMP Tue Dec 19 13:14:11 UTC 2023
2
  - Python: 3.10.13
3
+ - Stable-Baselines3: 2.1.0
4
  - PyTorch: 2.1.2+cpu
5
  - GPU Enabled: False
6
  - Numpy: 1.26.4
replay.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e87dadba66a7944471083d46b934faa79d46ec8f65828a81847d3f2cf935b1b4
3
- size 198019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ff849d502541544eca96ce26d9fee4e170d2054274ef02e9f12e50bcbadee21
3
+ size 157380
results.json CHANGED
@@ -1 +1 @@
1
- {"mean_reward": -22.331476100000003, "std_reward": 212.3630177373386, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2024-03-04T05:27:51.729317"}
 
1
+ {"mean_reward": 144.49050929999999, "std_reward": 16.122904644961487, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2024-03-04T06:59:27.458864"}
train_eval_metrics.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46336f0f1d38e22725c1bc9028c41f3825089c99fcefb173d8147d32488ac342
3
- size 6493
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57ecbdd22f3478d66d8984a789392627b7629bffdb979b149e52bd0c61f2ef38
3
+ size 7889