Push to Hub
Browse files- README.md +1 -1
- config.json +1 -1
- dqn-LunarLander-v2.zip +2 -2
- dqn-LunarLander-v2/data +14 -14
- replay.mp4 +2 -2
- results.json +1 -1
README.md
CHANGED
@@ -16,7 +16,7 @@ model-index:
|
|
16 |
type: LunarLander-v2
|
17 |
metrics:
|
18 |
- type: mean_reward
|
19 |
-
value:
|
20 |
name: mean_reward
|
21 |
verified: false
|
22 |
---
|
|
|
16 |
type: LunarLander-v2
|
17 |
metrics:
|
18 |
- type: mean_reward
|
19 |
+
value: 73.77 +/- 85.42
|
20 |
name: mean_reward
|
21 |
verified: false
|
22 |
---
|
config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"policy_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVMAAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLmRxbi5wb2xpY2llc5SMCURRTlBvbGljeZSTlC4=", "__module__": "stable_baselines3.dqn.policies", "__annotations__": "{'q_net': <class 'stable_baselines3.dqn.policies.QNetwork'>, 'q_net_target': <class 'stable_baselines3.dqn.policies.QNetwork'>}", "__doc__": "\n Policy class with Q-Value Net and target net for DQN\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ", "__init__": "<function DQNPolicy.__init__ at 0x7c9c9f40a0e0>", "_build": "<function DQNPolicy._build at 0x7c9c9f40a170>", "make_q_net": "<function DQNPolicy.make_q_net at 0x7c9c9f40a200>", "forward": "<function DQNPolicy.forward at 0x7c9c9f40a290>", "_predict": "<function DQNPolicy._predict at 0x7c9c9f40a320>", "_get_constructor_parameters": "<function DQNPolicy._get_constructor_parameters at 0x7c9c9f40a3b0>", "set_training_mode": "<function DQNPolicy.set_training_mode at 0x7c9c9f40a440>", "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x7c9c9f415b40>"}, "verbose": 1, "policy_kwargs": {}, "num_timesteps": 100000, "_total_timesteps": 100000, "_num_timesteps_at_start": 0, "seed": null, "action_noise": null, "start_time": 1689233132123819670, "learning_rate": 0.0001, "tensorboard_log": null, "_last_obs": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVlQAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYgAAAAAAAAAOY44L1XPw08DfINvtVJdr61cs68bpCUvAAAAAAAAAAAlIwFbnVtcHmUjAVkdHlwZZSTlIwCZjSUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksBSwiGlIwBQ5R0lFKULg=="}, "_last_episode_starts": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVdAAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYBAAAAAAAAAAGUjAVudW1weZSMBWR0eXBllJOUjAJiMZSJiIeUUpQoSwOMAXyUTk5OSv////9K/////0sAdJRiSwGFlIwBQ5R0lFKULg=="}, "_last_original_obs": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVlQAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYgAAAAAAAAAFpd3b3X52U8SgEMvvS6i74UBce8prlovAAAAAAAAAAAlIwFbnVtcHmUjAVkdHlwZZSTlIwCZjSUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksBSwiGlIwBQ5R0lFKULg=="}, "_episode_num": 643, "use_sde": false, "sde_sample_freq": -1, "_current_progress_remaining": 0.0, "_stats_window_size": 100, "ep_info_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVHAwAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHwGCvuA7PppyMAWyUS1SMAXSUR0A9OOCoS+QEdX2UKGgGR8CBAt+nZTQ3aAdLTWgIR0A9UQfp2U0OdX2UKGgGR8CGP3NBWxQjaAdLV2gIR0A9b2a2F36idX2UKGgGR8CKgA3mV7hOaAdLWmgIR0A9ktrbg0j1dX2UKGgGR8CINZdMTN+taAdLW2gIR0A9tN7SiM5wdX2UKGgGR8B72EqPOpsHaAdLTGgIR0A90N2ki2UjdX2UKGgGR8CCraVgQYk3aAdLR2gIR0A96yuZCv5hdX2UKGgGR8CFzNwYLsrvaAdLUGgIR0A+CN21UlzEdX2UKGgGR8B6BcrsjVx0aAdLYWgIR0A+L0SAYpDvdX2UKGgGR8CBLbpFCswMaAdLUGgIR0A+THerMkhSdX2UKGgGR8B8EN3pwCKaaAdLUWgIR0A+diCrcTJydX2UKGgGR8CC7RBInSfEaAdLUGgIR0A+n2RaHKwIdX2UKGgGR8CAlVK5kK/maAdLVWgIR0A+yErXlKbsdX2UKGgGR8B8ydFtsN2DaAdLUWgIR0A+9gR9PUKBdX2UKGgGR8CA2HuYx+KCaAdLh2gIR0A/OtfXwsoVdX2UKGgGR8CBnX/PPcBVaAdLXGgIR0A/Z/3nIQvpdX2UKGgGR8CIBgZeAuqWaAdLiGgIR0A/rKEnLJS0dX2UKGgGR8B8X60QbuMNaAdLTWgIR0A/02pyZKFqdX2UKGgGR8CAuffAsTWYaAdLZ2gIR0BAA7ngYP5IdX2UKGgGR8CGQ3yvLX+VaAdLcGgIR0BAIKWTot+TdX2UKGgGR8CAz35B1LamaAdLUmgIR0BAN8iwB5oodX2UKGgGR8Cgjv2xhUiqaAdNEwFoCEdAQISpBHCoCXV9lChoBkfAhQtT5O8CgmgHS29oCEdAQJlcKPXCj3V9lChoBkfAiIsDslb/wWgHS3xoCEdAQLIacZtNz3V9lChoBkfAhmPNPHktE2gHS3xoCEdAQMiUcGTs6nV9lChoBkfAfUbV0cOsk2gHS01oCEdAQNZf+jua4XV9lChoBkfAeml5vLowEmgHS09oCEdAQOYwj+rEL3V9lChoBkfAgOWft6X0G2gHS1loCEdAQPd7OVxCIHV9lChoBkfAdXimzSkTH2gHS25oCEdAQQ29alk6LnV9lChoBkfAiEnRbr1M/WgHS4xoCEdAQSfjOs1baHV9lChoBkfAgcHxLCemN2gHS01oCEdAQTh0GNaQm3V9lChoBkfAggUy13MY/GgHS1VoCEdAQUeKyfL9uXV9lChoBkfAgeU+bExZdWgHS2ZoCEdAQVlcjZ+QVHV9lChoBkfAh9BI0qH45GgHS2loCEdAQWwsunMt9XV9lChoBkfAdmOPqLS/kGgHS1JoCEdAQX18eCCjDnV9lChoBkfAk3l/ysjmjmgHS91oCEdAQavBvaURnXV9lChoBkfAgtGAQQL/j2gHS3VoCEdAQcL6YVqN63V9lChoBkfAhzkTshPj42gHS5doCEdAQd+CROk+HXV9lChoBkfAgaQDw6QvH2gHS7toCEdAQgU56t1ZDHV9lChoBkfAiUm8EvCdjGgHTWsBaAhHQEJe3c580DV1fZQoaAZHwHdNcNx2jfxoB02eAWgIR0BCyoLG7z06dX2UKGgGR8BmxdZxJd0JaAdLxWgIR0BC8bBfrrxBdX2UKGgGR8A9U85CF9KFaAdN6ANoCEdARBeO4oZydXV9lChoBkfAS5mSKWLP2WgHTegDaAhHQEWe4yXUpd91fZQoaAZHwFYEpztCzC1oB03oA2gIR0BHQ90A93bFdX2UKGgGR8BUODpHI6sAaAdN6ANoCEdASLOiDdxhlXV9lChoBkfARuvLcKw6hmgHTegDaAhHQEpZTF2mpER1fZQoaAZHwFkm19v0h/1oB03oA2gIR0BMCys8xKxtdX2UKGgGR8BJwtgBtDUmaAdN6ANoCEdATZa+zt1IRXV9lChoBkfAVUVSaVlf7mgHTegDaAhHQE7avduYQat1fZQoaAZHwGdiH003wTdoB03oA2gIR0BQD9As052hdX2UKGgGR8BkWWeYlY2baAdN6ANoCEdAUOZNIsiB5HV9lChoBkfAacP/G2kSEmgHTegDaAhHQFHRM3ZPEbZ1fZQoaAZHwGgtr7wazeJoB03oA2gIR0BSnBDb8FY/dX2UKGgGR8Bqb2D8LrooaAdN6ANoCEdAU1olWwNb1XV9lChoBkfAZEx2Qnx8UmgHTegDaAhHQFQGE+xGDth1fZQoaAZHwGAphSk0rLBoB03oA2gIR0BU6DTz/ZM+dX2UKGgGR8BlXDLQokRjaAdN6ANoCEdAVY+/dqL0jHV9lChoBkfAUdu3KB/ZumgHTegDaAhHQFY259E1EVp1fZQoaAZHwE94EBbOeJ5oB03oA2gIR0BXBFX3g1m8dX2UKGgGR8BPYDst03fiaAdN6ANoCEdAV9AGA08/2XV9lChoBkdAa6JGecx0uGgHTZgCaAhHQFhGIWgvlEJ1fZQoaAZHwFQ5CPZIxxloB035AmgIR0BYxxp5/smfdX2UKGgGR0BdM/yLAHmjaAdNLgNoCEdAWVLiGWUr1HV9lChoBkdAVzWUHIIWxmgHTacDaAhHQFnVwK0D2al1fZQoaAZHQCpjiCJ40MxoB03oA2gIR0BanSvLX+VDdX2UKGgGR0BNXhzmwJPZaAdNFQFoCEdAWsKv6j323HV9lChoBkfAXpeaYu01ImgHTV8DaAhHQFuDfEGZ/kN1fZQoaAZHwDlWDcuanaZoB03gAWgIR0BbxCmdiDujdX2UKGgGR8BIxm/WUbDNaAdNGgJoCEdAXAM7zTWoWHV9lChoBkdAbY2jmjj7ymgHTa8BaAhHQFw3A5q/M4d1fZQoaAZHQGdkW0AtFrloB01pAmgIR0BclYsyzolldX2UKGgGR0Bp2k1Gb1AaaAdNjQJoCEdAXPauxKQJX3V9lChoBkdAaXf0JWvKU2gHTU0CaAhHQF1BKMefZmJ1fZQoaAZHwEuAQRPGhmJoB03oA2gIR0Bd/BAbADaHdX2UKGgGR0BX0Yxk/bCaaAdN1gNoCEdAXrkSeyzHCHV9lChoBkdAZaFQUpNKy2gHTZACaAhHQF8cjbBXS0B1fZQoaAZHwEDuquKXOW1oB01LAmgIR0Bfcc+JP69CdX2UKGgGR0Bv+O6ClJpWaAdN6gFoCEdAX6yBRQ79ynV9lChoBkdAZbiqebutwWgHTeECaAhHQGALZqVQhwF1fZQoaAZHQFyzggow22poB03mA2gIR0BgaxYgaFVUdX2UKGgGR8BkEM2R7qptaAdNZQNoCEdAYMtbt7a7E3V9lChoBkdAWmU7IT4+KWgHTegDaAhHQGEjwu/UONJ1fZQoaAZHQGhN3AuZkTZoB01aAmgIR0BhTQIIF/x2dX2UKGgGR8BG5nc1wYLtaAdNOAJoCEdAYXWggX/HYHV9lChoBkdAbCjERaouPGgHTZACaAhHQGGjZ8a4tpV1fZQoaAZHQGQm75Ec81ZoB02GAmgIR0Bh2MrmQr+YdX2UKGgGR0BLzm4I8hcJaAdL6GgIR0Bh5SHKwIMSdX2UKGgGR8A8SF7D2rXEaAdNGQJoCEdAYgqQiA2AG3V9lChoBkdAa31EJjUd72gHTRUCaAhHQGI4K814xDd1fZQoaAZHQGM4qOtGNJhoB01TAmgIR0BiZ2gpSaVldX2UKGgGR0AzLk9U0elsaAdN6ANoCEdAYsSKWszVMHV9lChoBkfASTHkBCD28WgHTW4CaAhHQGLxlMqSX+l1fZQoaAZHQBcAvUSZjQRoB02jAWgIR0BjCh15jYqYdX2UKGgGR0BraHJq7AclaAdNgwJoCEdAYz+fI0ZWJnV9lChoBke/1Z2ll9SdfGgHTaoBaAhHQGNbvNmlImR1fZQoaAZHQGaHeJ53TuxoB02GAmgIR0BjipfShJyydX2UKGgGR0BA2z06HTJAaAdNTAFoCEdAY6LPWxyGSXV9lChoBkdAZmRrl/6O52gHTaACaAhHQGPiSrYGt6p1fZQoaAZHwFCi8WbgCOpoB03xAWgIR0BkAg60Y0l7dWUu"}, "ep_success_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVIAAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKULg=="}, "_n_updates": 12500, "buffer_size": 1000000, "batch_size": 32, "learning_starts": 50000, "tau": 1.0, "gamma": 0.99, "gradient_steps": 1, "optimize_memory_usage": false, "replay_buffer_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVNQAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwMUmVwbGF5QnVmZmVylJOULg==", "__module__": "stable_baselines3.common.buffers", "__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ", "__init__": "<function ReplayBuffer.__init__ at 0x7c9c9f3ea560>", "add": "<function ReplayBuffer.add at 0x7c9c9f3ea5f0>", "sample": "<function ReplayBuffer.sample at 0x7c9c9f3ea680>", "_get_samples": "<function ReplayBuffer._get_samples at 0x7c9c9f3ea710>", "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x7c9c9f3e7700>"}, "replay_buffer_kwargs": {}, "train_freq": {":type:": "<class 'stable_baselines3.common.type_aliases.TrainFreq'>", ":serialized:": "gAWVYQAAAAAAAACMJXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi50eXBlX2FsaWFzZXOUjAlUcmFpbkZyZXGUk5RLBGgAjBJUcmFpbkZyZXF1ZW5jeVVuaXSUk5SMBHN0ZXCUhZRSlIaUgZQu"}, "use_sde_at_warmup": false, "exploration_initial_eps": 1.0, "exploration_final_eps": 0.1, "exploration_fraction": 0.1, "target_update_interval": 250, "_n_calls": 100000, "max_grad_norm": 10, "exploration_rate": 0.1, "observation_space": {":type:": "<class 'gymnasium.spaces.box.Box'>", ":serialized:": "gAWVdgIAAAAAAACMFGd5bW5hc2l1bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMDWJvdW5kZWRfYmVsb3eUjBJudW1weS5jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWCAAAAAAAAAABAQEBAQEBAZRoCIwCYjGUiYiHlFKUKEsDjAF8lE5OTkr/////Sv////9LAHSUYksIhZSMAUOUdJRSlIwNYm91bmRlZF9hYm92ZZRoESiWCAAAAAAAAAABAQEBAQEBAZRoFUsIhZRoGXSUUpSMBl9zaGFwZZRLCIWUjANsb3eUaBEoliAAAAAAAAAAAAC0wgAAtMIAAKDAAACgwNsPScAAAKDAAAAAgAAAAICUaAtLCIWUaBl0lFKUjARoaWdolGgRKJYgAAAAAAAAAAAAtEIAALRCAACgQAAAoEDbD0lAAACgQAAAgD8AAIA/lGgLSwiFlGgZdJRSlIwIbG93X3JlcHKUjFtbLTkwLiAgICAgICAgLTkwLiAgICAgICAgIC01LiAgICAgICAgIC01LiAgICAgICAgIC0zLjE0MTU5MjcgIC01LgogIC0wLiAgICAgICAgIC0wLiAgICAgICBdlIwJaGlnaF9yZXBylIxTWzkwLiAgICAgICAgOTAuICAgICAgICAgNS4gICAgICAgICA1LiAgICAgICAgIDMuMTQxNTkyNyAgNS4KICAxLiAgICAgICAgIDEuICAgICAgIF2UjApfbnBfcmFuZG9tlE51Yi4=", "dtype": "float32", "bounded_below": "[ True True True True True True True True]", "bounded_above": "[ True True True True True True True True]", "_shape": [8], "low": "[-90. -90. -5. -5. -3.1415927 -5.\n -0. -0. ]", "high": "[90. 90. 5. 5. 3.1415927 5.\n 1. 1. ]", "low_repr": "[-90. -90. -5. -5. -3.1415927 -5.\n -0. -0. ]", "high_repr": "[90. 90. 5. 5. 3.1415927 5.\n 1. 1. ]", "_np_random": null}, "action_space": {":type:": "<class 'gymnasium.spaces.discrete.Discrete'>", ":serialized:": "gAWVrAEAAAAAAACMGWd5bW5hc2l1bS5zcGFjZXMuZGlzY3JldGWUjAhEaXNjcmV0ZZSTlCmBlH2UKIwBbpSMFW51bXB5LmNvcmUubXVsdGlhcnJheZSMBnNjYWxhcpSTlIwFbnVtcHmUjAVkdHlwZZSTlIwCaTiUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYkMIBAAAAAAAAACUhpRSlIwFc3RhcnSUaAhoDkMIAAAAAAAAAACUhpRSlIwGX3NoYXBllCmMBWR0eXBllGgLjAJpOJSJiIeUUpQoSwNoD05OTkr/////Sv////9LAHSUYowKX25wX3JhbmRvbZSMFG51bXB5LnJhbmRvbS5fcGlja2xllIwQX19nZW5lcmF0b3JfY3RvcpSTlIwFUENHNjSUhZRSlH2UKIwNYml0X2dlbmVyYXRvcpSMBVBDRzY0lIwFc3RhdGWUfZQoaCiKEZRswTyXRwFg/Yo+I4K4zOcAjANpbmOUihFhFx5GbbzJx730zXeOKlXuAHWMCmhhc191aW50MzKUSwGMCHVpbnRlZ2VylIoFj4r/5gB1YnViLg==", "n": "4", "start": "0", "_shape": [], "dtype": "int64", "_np_random": "Generator(PCG64)"}, "n_envs": 1, "lr_schedule": {":type:": "<class 'function'>", ":serialized:": "gAWVxQIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMSS91c3IvbG9jYWwvbGliL3B5dGhvbjMuMTAvZGlzdC1wYWNrYWdlcy9zdGFibGVfYmFzZWxpbmVzMy9jb21tb24vdXRpbHMucHmUjARmdW5jlEuDQwIEAZSMA3ZhbJSFlCl0lFKUfZQojAtfX3BhY2thZ2VfX5SMGHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbpSMCF9fbmFtZV9flIwec3RhYmxlX2Jhc2VsaW5lczMuY29tbW9uLnV0aWxzlIwIX19maWxlX1+UjEkvdXNyL2xvY2FsL2xpYi9weXRob24zLjEwL2Rpc3QtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lHVOTmgAjBBfbWFrZV9lbXB0eV9jZWxslJOUKVKUhZR0lFKUjBxjbG91ZHBpY2tsZS5jbG91ZHBpY2tsZV9mYXN0lIwSX2Z1bmN0aW9uX3NldHN0YXRllJOUaB99lH2UKGgWaA2MDF9fcXVhbG5hbWVfX5SMGWNvbnN0YW50X2ZuLjxsb2NhbHM+LmZ1bmOUjA9fX2Fubm90YXRpb25zX1+UfZSMDl9fa3dkZWZhdWx0c19flE6MDF9fZGVmYXVsdHNfX5ROjApfX21vZHVsZV9flGgXjAdfX2RvY19flE6MC19fY2xvc3VyZV9flGgAjApfbWFrZV9jZWxslJOURz8aNuLrHEMthZRSlIWUjBdfY2xvdWRwaWNrbGVfc3VibW9kdWxlc5RdlIwLX19nbG9iYWxzX1+UfZR1hpSGUjAu"}, "batch_norm_stats": [], "batch_norm_stats_target": [], "exploration_schedule": {":type:": "<class 'function'>", ":serialized:": "gAWVZQMAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLBEsTQyhkAXwAGACIAWsEcgiIAFMAiAJkAXwAGACIAIgCGAAUAIgBGwAXAFMAlE5LAYaUKYwScHJvZ3Jlc3NfcmVtYWluaW5nlIWUjEkvdXNyL2xvY2FsL2xpYi9weXRob24zLjEwL2Rpc3QtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lIwEZnVuY5RLcUMGDAEEARgClIwDZW5klIwMZW5kX2ZyYWN0aW9ulIwFc3RhcnSUh5QpdJRSlH2UKIwLX19wYWNrYWdlX1+UjBhzdGFibGVfYmFzZWxpbmVzMy5jb21tb26UjAhfX25hbWVfX5SMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMCF9fZmlsZV9flIxJL3Vzci9sb2NhbC9saWIvcHl0aG9uMy4xMC9kaXN0LXBhY2thZ2VzL3N0YWJsZV9iYXNlbGluZXMzL2NvbW1vbi91dGlscy5weZR1Tk5oAIwQX21ha2VfZW1wdHlfY2VsbJSTlClSlGgdKVKUaB0pUpSHlHSUUpSMHGNsb3VkcGlja2xlLmNsb3VkcGlja2xlX2Zhc3SUjBJfZnVuY3Rpb25fc2V0c3RhdGWUk5RoI32UfZQoaBhoDYwMX19xdWFsbmFtZV9flIwbZ2V0X2xpbmVhcl9mbi48bG9jYWxzPi5mdW5jlIwPX19hbm5vdGF0aW9uc19flH2UKGgKjAhidWlsdGluc5SMBWZsb2F0lJOUjAZyZXR1cm6UaC91jA5fX2t3ZGVmYXVsdHNfX5ROjAxfX2RlZmF1bHRzX1+UTowKX19tb2R1bGVfX5RoGYwHX19kb2NfX5ROjAtfX2Nsb3N1cmVfX5RoAIwKX21ha2VfY2VsbJSTlEc/uZmZmZmZmoWUUpRoN0c/uZmZmZmZmoWUUpRoN0c/8AAAAAAAAIWUUpSHlIwXX2Nsb3VkcGlja2xlX3N1Ym1vZHVsZXOUXZSMC19fZ2xvYmFsc19flH2UdYaUhlIwLg=="}, "system_info": {"OS": "Linux-5.15.109+-x86_64-with-glibc2.31 # 1 SMP Fri Jun 9 10:57:30 UTC 2023", "Python": "3.10.12", "Stable-Baselines3": "2.0.0", "PyTorch": "2.0.1+cu118", "GPU Enabled": "True", "Numpy": "1.22.4", "Cloudpickle": "2.2.1", "Gymnasium": "0.28.1", "OpenAI Gym": "0.25.2"}}
|
|
|
1 |
+
{"policy_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVMAAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLmRxbi5wb2xpY2llc5SMCURRTlBvbGljeZSTlC4=", "__module__": "stable_baselines3.dqn.policies", "__annotations__": "{'q_net': <class 'stable_baselines3.dqn.policies.QNetwork'>, 'q_net_target': <class 'stable_baselines3.dqn.policies.QNetwork'>}", "__doc__": "\n Policy class with Q-Value Net and target net for DQN\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ", "__init__": "<function DQNPolicy.__init__ at 0x79b04580df30>", "_build": "<function DQNPolicy._build at 0x79b04580dfc0>", "make_q_net": "<function DQNPolicy.make_q_net at 0x79b04580e050>", "forward": "<function DQNPolicy.forward at 0x79b04580e0e0>", "_predict": "<function DQNPolicy._predict at 0x79b04580e170>", "_get_constructor_parameters": "<function DQNPolicy._get_constructor_parameters at 0x79b04580e200>", "set_training_mode": "<function DQNPolicy.set_training_mode at 0x79b04580e290>", "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x79b0458193c0>"}, "verbose": 1, "policy_kwargs": {}, "num_timesteps": 100000, "_total_timesteps": 100000, "_num_timesteps_at_start": 0, "seed": null, "action_noise": null, "start_time": 1689233132123819670, "learning_rate": 0.0001, "tensorboard_log": null, "_last_obs": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVlQAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYgAAAAAAAAAOY44L1XPw08DfINvtVJdr61cs68bpCUvAAAAAAAAAAAlIwFbnVtcHmUjAVkdHlwZZSTlIwCZjSUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksBSwiGlIwBQ5R0lFKULg=="}, "_last_episode_starts": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVdAAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYBAAAAAAAAAAGUjAVudW1weZSMBWR0eXBllJOUjAJiMZSJiIeUUpQoSwOMAXyUTk5OSv////9K/////0sAdJRiSwGFlIwBQ5R0lFKULg=="}, "_last_original_obs": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVlQAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYgAAAAAAAAAFpd3b3X52U8SgEMvvS6i74UBce8prlovAAAAAAAAAAAlIwFbnVtcHmUjAVkdHlwZZSTlIwCZjSUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksBSwiGlIwBQ5R0lFKULg=="}, "_episode_num": 643, "use_sde": false, "sde_sample_freq": -1, "_current_progress_remaining": 0.0, "_stats_window_size": 100, "ep_info_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVHAwAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHwGCvuA7PppyMAWyUS1SMAXSUR0A9OOCoS+QEdX2UKGgGR8CBAt+nZTQ3aAdLTWgIR0A9UQfp2U0OdX2UKGgGR8CGP3NBWxQjaAdLV2gIR0A9b2a2F36idX2UKGgGR8CKgA3mV7hOaAdLWmgIR0A9ktrbg0j1dX2UKGgGR8CINZdMTN+taAdLW2gIR0A9tN7SiM5wdX2UKGgGR8B72EqPOpsHaAdLTGgIR0A90N2ki2UjdX2UKGgGR8CCraVgQYk3aAdLR2gIR0A96yuZCv5hdX2UKGgGR8CFzNwYLsrvaAdLUGgIR0A+CN21UlzEdX2UKGgGR8B6BcrsjVx0aAdLYWgIR0A+L0SAYpDvdX2UKGgGR8CBLbpFCswMaAdLUGgIR0A+THerMkhSdX2UKGgGR8B8EN3pwCKaaAdLUWgIR0A+diCrcTJydX2UKGgGR8CC7RBInSfEaAdLUGgIR0A+n2RaHKwIdX2UKGgGR8CAlVK5kK/maAdLVWgIR0A+yErXlKbsdX2UKGgGR8B8ydFtsN2DaAdLUWgIR0A+9gR9PUKBdX2UKGgGR8CA2HuYx+KCaAdLh2gIR0A/OtfXwsoVdX2UKGgGR8CBnX/PPcBVaAdLXGgIR0A/Z/3nIQvpdX2UKGgGR8CIBgZeAuqWaAdLiGgIR0A/rKEnLJS0dX2UKGgGR8B8X60QbuMNaAdLTWgIR0A/02pyZKFqdX2UKGgGR8CAuffAsTWYaAdLZ2gIR0BAA7ngYP5IdX2UKGgGR8CGQ3yvLX+VaAdLcGgIR0BAIKWTot+TdX2UKGgGR8CAz35B1LamaAdLUmgIR0BAN8iwB5oodX2UKGgGR8Cgjv2xhUiqaAdNEwFoCEdAQISpBHCoCXV9lChoBkfAhQtT5O8CgmgHS29oCEdAQJlcKPXCj3V9lChoBkfAiIsDslb/wWgHS3xoCEdAQLIacZtNz3V9lChoBkfAhmPNPHktE2gHS3xoCEdAQMiUcGTs6nV9lChoBkfAfUbV0cOsk2gHS01oCEdAQNZf+jua4XV9lChoBkfAeml5vLowEmgHS09oCEdAQOYwj+rEL3V9lChoBkfAgOWft6X0G2gHS1loCEdAQPd7OVxCIHV9lChoBkfAdXimzSkTH2gHS25oCEdAQQ29alk6LnV9lChoBkfAiEnRbr1M/WgHS4xoCEdAQSfjOs1baHV9lChoBkfAgcHxLCemN2gHS01oCEdAQTh0GNaQm3V9lChoBkfAggUy13MY/GgHS1VoCEdAQUeKyfL9uXV9lChoBkfAgeU+bExZdWgHS2ZoCEdAQVlcjZ+QVHV9lChoBkfAh9BI0qH45GgHS2loCEdAQWwsunMt9XV9lChoBkfAdmOPqLS/kGgHS1JoCEdAQX18eCCjDnV9lChoBkfAk3l/ysjmjmgHS91oCEdAQavBvaURnXV9lChoBkfAgtGAQQL/j2gHS3VoCEdAQcL6YVqN63V9lChoBkfAhzkTshPj42gHS5doCEdAQd+CROk+HXV9lChoBkfAgaQDw6QvH2gHS7toCEdAQgU56t1ZDHV9lChoBkfAiUm8EvCdjGgHTWsBaAhHQEJe3c580DV1fZQoaAZHwHdNcNx2jfxoB02eAWgIR0BCyoLG7z06dX2UKGgGR8BmxdZxJd0JaAdLxWgIR0BC8bBfrrxBdX2UKGgGR8A9U85CF9KFaAdN6ANoCEdARBeO4oZydXV9lChoBkfAS5mSKWLP2WgHTegDaAhHQEWe4yXUpd91fZQoaAZHwFYEpztCzC1oB03oA2gIR0BHQ90A93bFdX2UKGgGR8BUODpHI6sAaAdN6ANoCEdASLOiDdxhlXV9lChoBkfARuvLcKw6hmgHTegDaAhHQEpZTF2mpER1fZQoaAZHwFkm19v0h/1oB03oA2gIR0BMCys8xKxtdX2UKGgGR8BJwtgBtDUmaAdN6ANoCEdATZa+zt1IRXV9lChoBkfAVUVSaVlf7mgHTegDaAhHQE7avduYQat1fZQoaAZHwGdiH003wTdoB03oA2gIR0BQD9As052hdX2UKGgGR8BkWWeYlY2baAdN6ANoCEdAUOZNIsiB5HV9lChoBkfAacP/G2kSEmgHTegDaAhHQFHRM3ZPEbZ1fZQoaAZHwGgtr7wazeJoB03oA2gIR0BSnBDb8FY/dX2UKGgGR8Bqb2D8LrooaAdN6ANoCEdAU1olWwNb1XV9lChoBkfAZEx2Qnx8UmgHTegDaAhHQFQGE+xGDth1fZQoaAZHwGAphSk0rLBoB03oA2gIR0BU6DTz/ZM+dX2UKGgGR8BlXDLQokRjaAdN6ANoCEdAVY+/dqL0jHV9lChoBkfAUdu3KB/ZumgHTegDaAhHQFY259E1EVp1fZQoaAZHwE94EBbOeJ5oB03oA2gIR0BXBFX3g1m8dX2UKGgGR8BPYDst03fiaAdN6ANoCEdAV9AGA08/2XV9lChoBkdAa6JGecx0uGgHTZgCaAhHQFhGIWgvlEJ1fZQoaAZHwFQ5CPZIxxloB035AmgIR0BYxxp5/smfdX2UKGgGR0BdM/yLAHmjaAdNLgNoCEdAWVLiGWUr1HV9lChoBkdAVzWUHIIWxmgHTacDaAhHQFnVwK0D2al1fZQoaAZHQCpjiCJ40MxoB03oA2gIR0BanSvLX+VDdX2UKGgGR0BNXhzmwJPZaAdNFQFoCEdAWsKv6j323HV9lChoBkfAXpeaYu01ImgHTV8DaAhHQFuDfEGZ/kN1fZQoaAZHwDlWDcuanaZoB03gAWgIR0BbxCmdiDujdX2UKGgGR8BIxm/WUbDNaAdNGgJoCEdAXAM7zTWoWHV9lChoBkdAbY2jmjj7ymgHTa8BaAhHQFw3A5q/M4d1fZQoaAZHQGdkW0AtFrloB01pAmgIR0BclYsyzolldX2UKGgGR0Bp2k1Gb1AaaAdNjQJoCEdAXPauxKQJX3V9lChoBkdAaXf0JWvKU2gHTU0CaAhHQF1BKMefZmJ1fZQoaAZHwEuAQRPGhmJoB03oA2gIR0Bd/BAbADaHdX2UKGgGR0BX0Yxk/bCaaAdN1gNoCEdAXrkSeyzHCHV9lChoBkdAZaFQUpNKy2gHTZACaAhHQF8cjbBXS0B1fZQoaAZHwEDuquKXOW1oB01LAmgIR0Bfcc+JP69CdX2UKGgGR0Bv+O6ClJpWaAdN6gFoCEdAX6yBRQ79ynV9lChoBkdAZbiqebutwWgHTeECaAhHQGALZqVQhwF1fZQoaAZHQFyzggow22poB03mA2gIR0BgaxYgaFVUdX2UKGgGR8BkEM2R7qptaAdNZQNoCEdAYMtbt7a7E3V9lChoBkdAWmU7IT4+KWgHTegDaAhHQGEjwu/UONJ1fZQoaAZHQGhN3AuZkTZoB01aAmgIR0BhTQIIF/x2dX2UKGgGR8BG5nc1wYLtaAdNOAJoCEdAYXWggX/HYHV9lChoBkdAbCjERaouPGgHTZACaAhHQGGjZ8a4tpV1fZQoaAZHQGQm75Ec81ZoB02GAmgIR0Bh2MrmQr+YdX2UKGgGR0BLzm4I8hcJaAdL6GgIR0Bh5SHKwIMSdX2UKGgGR8A8SF7D2rXEaAdNGQJoCEdAYgqQiA2AG3V9lChoBkdAa31EJjUd72gHTRUCaAhHQGI4K814xDd1fZQoaAZHQGM4qOtGNJhoB01TAmgIR0BiZ2gpSaVldX2UKGgGR0AzLk9U0elsaAdN6ANoCEdAYsSKWszVMHV9lChoBkfASTHkBCD28WgHTW4CaAhHQGLxlMqSX+l1fZQoaAZHQBcAvUSZjQRoB02jAWgIR0BjCh15jYqYdX2UKGgGR0BraHJq7AclaAdNgwJoCEdAYz+fI0ZWJnV9lChoBke/1Z2ll9SdfGgHTaoBaAhHQGNbvNmlImR1fZQoaAZHQGaHeJ53TuxoB02GAmgIR0BjipfShJyydX2UKGgGR0BA2z06HTJAaAdNTAFoCEdAY6LPWxyGSXV9lChoBkdAZmRrl/6O52gHTaACaAhHQGPiSrYGt6p1fZQoaAZHwFCi8WbgCOpoB03xAWgIR0BkAg60Y0l7dWUu"}, "ep_success_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVIAAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKULg=="}, "_n_updates": 12500, "buffer_size": 1000000, "batch_size": 32, "learning_starts": 50000, "tau": 1.0, "gamma": 0.99, "gradient_steps": 1, "optimize_memory_usage": false, "replay_buffer_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVNQAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwMUmVwbGF5QnVmZmVylJOULg==", "__module__": "stable_baselines3.common.buffers", "__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ", "__init__": "<function ReplayBuffer.__init__ at 0x79b0457e23b0>", "add": "<function ReplayBuffer.add at 0x79b0457e2440>", "sample": "<function ReplayBuffer.sample at 0x79b0457e24d0>", "_get_samples": "<function ReplayBuffer._get_samples at 0x79b0457e2560>", "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x79b0457e6f80>"}, "replay_buffer_kwargs": {}, "train_freq": {":type:": "<class 'stable_baselines3.common.type_aliases.TrainFreq'>", ":serialized:": "gAWVYQAAAAAAAACMJXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi50eXBlX2FsaWFzZXOUjAlUcmFpbkZyZXGUk5RLBGgAjBJUcmFpbkZyZXF1ZW5jeVVuaXSUk5SMBHN0ZXCUhZRSlIaUgZQu"}, "use_sde_at_warmup": false, "exploration_initial_eps": 1.0, "exploration_final_eps": 0.1, "exploration_fraction": 0.1, "target_update_interval": 250, "_n_calls": 100000, "max_grad_norm": 10, "exploration_rate": 0.1, "observation_space": {":type:": "<class 'gymnasium.spaces.box.Box'>", ":serialized:": "gAWVdgIAAAAAAACMFGd5bW5hc2l1bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMDWJvdW5kZWRfYmVsb3eUjBJudW1weS5jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWCAAAAAAAAAABAQEBAQEBAZRoCIwCYjGUiYiHlFKUKEsDjAF8lE5OTkr/////Sv////9LAHSUYksIhZSMAUOUdJRSlIwNYm91bmRlZF9hYm92ZZRoESiWCAAAAAAAAAABAQEBAQEBAZRoFUsIhZRoGXSUUpSMBl9zaGFwZZRLCIWUjANsb3eUaBEoliAAAAAAAAAAAAC0wgAAtMIAAKDAAACgwNsPScAAAKDAAAAAgAAAAICUaAtLCIWUaBl0lFKUjARoaWdolGgRKJYgAAAAAAAAAAAAtEIAALRCAACgQAAAoEDbD0lAAACgQAAAgD8AAIA/lGgLSwiFlGgZdJRSlIwIbG93X3JlcHKUjFtbLTkwLiAgICAgICAgLTkwLiAgICAgICAgIC01LiAgICAgICAgIC01LiAgICAgICAgIC0zLjE0MTU5MjcgIC01LgogIC0wLiAgICAgICAgIC0wLiAgICAgICBdlIwJaGlnaF9yZXBylIxTWzkwLiAgICAgICAgOTAuICAgICAgICAgNS4gICAgICAgICA1LiAgICAgICAgIDMuMTQxNTkyNyAgNS4KICAxLiAgICAgICAgIDEuICAgICAgIF2UjApfbnBfcmFuZG9tlE51Yi4=", "dtype": "float32", "bounded_below": "[ True True True True True True True True]", "bounded_above": "[ True True True True True True True True]", "_shape": [8], "low": "[-90. -90. -5. -5. -3.1415927 -5.\n -0. -0. ]", "high": "[90. 90. 5. 5. 3.1415927 5.\n 1. 1. ]", "low_repr": "[-90. -90. -5. -5. -3.1415927 -5.\n -0. -0. ]", "high_repr": "[90. 90. 5. 5. 3.1415927 5.\n 1. 1. ]", "_np_random": null}, "action_space": {":type:": "<class 'gymnasium.spaces.discrete.Discrete'>", ":serialized:": "gAWVqQEAAAAAAACMGWd5bW5hc2l1bS5zcGFjZXMuZGlzY3JldGWUjAhEaXNjcmV0ZZSTlCmBlH2UKIwBbpSMFW51bXB5LmNvcmUubXVsdGlhcnJheZSMBnNjYWxhcpSTlIwFbnVtcHmUjAVkdHlwZZSTlIwCaTiUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYkMIBAAAAAAAAACUhpRSlIwFc3RhcnSUaAhoDkMIAAAAAAAAAACUhpRSlIwGX3NoYXBllCmMBWR0eXBllGgLjAJpOJSJiIeUUpQoSwNoD05OTkr/////Sv////9LAHSUYowKX25wX3JhbmRvbZSMFG51bXB5LnJhbmRvbS5fcGlja2xllIwQX19nZW5lcmF0b3JfY3RvcpSTlIwFUENHNjSUhZRSlH2UKIwNYml0X2dlbmVyYXRvcpSMBVBDRzY0lIwFc3RhdGWUfZQoaCiKEK6dJzVn7nkZMx+gnvcFvFaMA2luY5SKEWEXHkZtvMnHvfTNd44qVe4AdYwKaGFzX3VpbnQzMpRLAIwIdWludGVnZXKUSn7qFDx1YnViLg==", "n": "4", "start": "0", "_shape": [], "dtype": "int64", "_np_random": "Generator(PCG64)"}, "n_envs": 1, "lr_schedule": {":type:": "<class 'function'>", ":serialized:": "gAWVxQIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMSS91c3IvbG9jYWwvbGliL3B5dGhvbjMuMTAvZGlzdC1wYWNrYWdlcy9zdGFibGVfYmFzZWxpbmVzMy9jb21tb24vdXRpbHMucHmUjARmdW5jlEuDQwIEAZSMA3ZhbJSFlCl0lFKUfZQojAtfX3BhY2thZ2VfX5SMGHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbpSMCF9fbmFtZV9flIwec3RhYmxlX2Jhc2VsaW5lczMuY29tbW9uLnV0aWxzlIwIX19maWxlX1+UjEkvdXNyL2xvY2FsL2xpYi9weXRob24zLjEwL2Rpc3QtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lHVOTmgAjBBfbWFrZV9lbXB0eV9jZWxslJOUKVKUhZR0lFKUjBxjbG91ZHBpY2tsZS5jbG91ZHBpY2tsZV9mYXN0lIwSX2Z1bmN0aW9uX3NldHN0YXRllJOUaB99lH2UKGgWaA2MDF9fcXVhbG5hbWVfX5SMGWNvbnN0YW50X2ZuLjxsb2NhbHM+LmZ1bmOUjA9fX2Fubm90YXRpb25zX1+UfZSMDl9fa3dkZWZhdWx0c19flE6MDF9fZGVmYXVsdHNfX5ROjApfX21vZHVsZV9flGgXjAdfX2RvY19flE6MC19fY2xvc3VyZV9flGgAjApfbWFrZV9jZWxslJOURz8aNuLrHEMthZRSlIWUjBdfY2xvdWRwaWNrbGVfc3VibW9kdWxlc5RdlIwLX19nbG9iYWxzX1+UfZR1hpSGUjAu"}, "batch_norm_stats": [], "batch_norm_stats_target": [], "exploration_schedule": {":type:": "<class 'function'>", ":serialized:": "gAWVZQMAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLBEsTQyhkAXwAGACIAWsEcgiIAFMAiAJkAXwAGACIAIgCGAAUAIgBGwAXAFMAlE5LAYaUKYwScHJvZ3Jlc3NfcmVtYWluaW5nlIWUjEkvdXNyL2xvY2FsL2xpYi9weXRob24zLjEwL2Rpc3QtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lIwEZnVuY5RLcUMGDAEEARgClIwDZW5klIwMZW5kX2ZyYWN0aW9ulIwFc3RhcnSUh5QpdJRSlH2UKIwLX19wYWNrYWdlX1+UjBhzdGFibGVfYmFzZWxpbmVzMy5jb21tb26UjAhfX25hbWVfX5SMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMCF9fZmlsZV9flIxJL3Vzci9sb2NhbC9saWIvcHl0aG9uMy4xMC9kaXN0LXBhY2thZ2VzL3N0YWJsZV9iYXNlbGluZXMzL2NvbW1vbi91dGlscy5weZR1Tk5oAIwQX21ha2VfZW1wdHlfY2VsbJSTlClSlGgdKVKUaB0pUpSHlHSUUpSMHGNsb3VkcGlja2xlLmNsb3VkcGlja2xlX2Zhc3SUjBJfZnVuY3Rpb25fc2V0c3RhdGWUk5RoI32UfZQoaBhoDYwMX19xdWFsbmFtZV9flIwbZ2V0X2xpbmVhcl9mbi48bG9jYWxzPi5mdW5jlIwPX19hbm5vdGF0aW9uc19flH2UKGgKjAhidWlsdGluc5SMBWZsb2F0lJOUjAZyZXR1cm6UaC91jA5fX2t3ZGVmYXVsdHNfX5ROjAxfX2RlZmF1bHRzX1+UTowKX19tb2R1bGVfX5RoGYwHX19kb2NfX5ROjAtfX2Nsb3N1cmVfX5RoAIwKX21ha2VfY2VsbJSTlEc/uZmZmZmZmoWUUpRoN0c/uZmZmZmZmoWUUpRoN0c/8AAAAAAAAIWUUpSHlIwXX2Nsb3VkcGlja2xlX3N1Ym1vZHVsZXOUXZSMC19fZ2xvYmFsc19flH2UdYaUhlIwLg=="}, "system_info": {"OS": "Linux-5.15.109+-x86_64-with-glibc2.31 # 1 SMP Fri Jun 9 10:57:30 UTC 2023", "Python": "3.10.12", "Stable-Baselines3": "2.0.0", "PyTorch": "2.0.1+cu118", "GPU Enabled": "True", "Numpy": "1.22.4", "Cloudpickle": "2.2.1", "Gymnasium": "0.28.1", "OpenAI Gym": "0.25.2"}}
|
dqn-LunarLander-v2.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92abeaaf6ce5d80e151fcd5bcd13d3018f916817a9acf50d51ef30d5ac9331dd
|
3 |
+
size 105421
|
dqn-LunarLander-v2/data
CHANGED
@@ -5,15 +5,15 @@
|
|
5 |
"__module__": "stable_baselines3.dqn.policies",
|
6 |
"__annotations__": "{'q_net': <class 'stable_baselines3.dqn.policies.QNetwork'>, 'q_net_target': <class 'stable_baselines3.dqn.policies.QNetwork'>}",
|
7 |
"__doc__": "\n Policy class with Q-Value Net and target net for DQN\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
|
8 |
-
"__init__": "<function DQNPolicy.__init__ at
|
9 |
-
"_build": "<function DQNPolicy._build at
|
10 |
-
"make_q_net": "<function DQNPolicy.make_q_net at
|
11 |
-
"forward": "<function DQNPolicy.forward at
|
12 |
-
"_predict": "<function DQNPolicy._predict at
|
13 |
-
"_get_constructor_parameters": "<function DQNPolicy._get_constructor_parameters at
|
14 |
-
"set_training_mode": "<function DQNPolicy.set_training_mode at
|
15 |
"__abstractmethods__": "frozenset()",
|
16 |
-
"_abc_impl": "<_abc._abc_data object at
|
17 |
},
|
18 |
"verbose": 1,
|
19 |
"policy_kwargs": {},
|
@@ -63,12 +63,12 @@
|
|
63 |
":serialized:": "gAWVNQAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwMUmVwbGF5QnVmZmVylJOULg==",
|
64 |
"__module__": "stable_baselines3.common.buffers",
|
65 |
"__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
|
66 |
-
"__init__": "<function ReplayBuffer.__init__ at
|
67 |
-
"add": "<function ReplayBuffer.add at
|
68 |
-
"sample": "<function ReplayBuffer.sample at
|
69 |
-
"_get_samples": "<function ReplayBuffer._get_samples at
|
70 |
"__abstractmethods__": "frozenset()",
|
71 |
-
"_abc_impl": "<_abc._abc_data object at
|
72 |
},
|
73 |
"replay_buffer_kwargs": {},
|
74 |
"train_freq": {
|
@@ -100,7 +100,7 @@
|
|
100 |
},
|
101 |
"action_space": {
|
102 |
":type:": "<class 'gymnasium.spaces.discrete.Discrete'>",
|
103 |
-
":serialized:": "
|
104 |
"n": "4",
|
105 |
"start": "0",
|
106 |
"_shape": [],
|
|
|
5 |
"__module__": "stable_baselines3.dqn.policies",
|
6 |
"__annotations__": "{'q_net': <class 'stable_baselines3.dqn.policies.QNetwork'>, 'q_net_target': <class 'stable_baselines3.dqn.policies.QNetwork'>}",
|
7 |
"__doc__": "\n Policy class with Q-Value Net and target net for DQN\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
|
8 |
+
"__init__": "<function DQNPolicy.__init__ at 0x79b04580df30>",
|
9 |
+
"_build": "<function DQNPolicy._build at 0x79b04580dfc0>",
|
10 |
+
"make_q_net": "<function DQNPolicy.make_q_net at 0x79b04580e050>",
|
11 |
+
"forward": "<function DQNPolicy.forward at 0x79b04580e0e0>",
|
12 |
+
"_predict": "<function DQNPolicy._predict at 0x79b04580e170>",
|
13 |
+
"_get_constructor_parameters": "<function DQNPolicy._get_constructor_parameters at 0x79b04580e200>",
|
14 |
+
"set_training_mode": "<function DQNPolicy.set_training_mode at 0x79b04580e290>",
|
15 |
"__abstractmethods__": "frozenset()",
|
16 |
+
"_abc_impl": "<_abc._abc_data object at 0x79b0458193c0>"
|
17 |
},
|
18 |
"verbose": 1,
|
19 |
"policy_kwargs": {},
|
|
|
63 |
":serialized:": "gAWVNQAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwMUmVwbGF5QnVmZmVylJOULg==",
|
64 |
"__module__": "stable_baselines3.common.buffers",
|
65 |
"__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
|
66 |
+
"__init__": "<function ReplayBuffer.__init__ at 0x79b0457e23b0>",
|
67 |
+
"add": "<function ReplayBuffer.add at 0x79b0457e2440>",
|
68 |
+
"sample": "<function ReplayBuffer.sample at 0x79b0457e24d0>",
|
69 |
+
"_get_samples": "<function ReplayBuffer._get_samples at 0x79b0457e2560>",
|
70 |
"__abstractmethods__": "frozenset()",
|
71 |
+
"_abc_impl": "<_abc._abc_data object at 0x79b0457e6f80>"
|
72 |
},
|
73 |
"replay_buffer_kwargs": {},
|
74 |
"train_freq": {
|
|
|
100 |
},
|
101 |
"action_space": {
|
102 |
":type:": "<class 'gymnasium.spaces.discrete.Discrete'>",
|
103 |
+
":serialized:": "gAWVqQEAAAAAAACMGWd5bW5hc2l1bS5zcGFjZXMuZGlzY3JldGWUjAhEaXNjcmV0ZZSTlCmBlH2UKIwBbpSMFW51bXB5LmNvcmUubXVsdGlhcnJheZSMBnNjYWxhcpSTlIwFbnVtcHmUjAVkdHlwZZSTlIwCaTiUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYkMIBAAAAAAAAACUhpRSlIwFc3RhcnSUaAhoDkMIAAAAAAAAAACUhpRSlIwGX3NoYXBllCmMBWR0eXBllGgLjAJpOJSJiIeUUpQoSwNoD05OTkr/////Sv////9LAHSUYowKX25wX3JhbmRvbZSMFG51bXB5LnJhbmRvbS5fcGlja2xllIwQX19nZW5lcmF0b3JfY3RvcpSTlIwFUENHNjSUhZRSlH2UKIwNYml0X2dlbmVyYXRvcpSMBVBDRzY0lIwFc3RhdGWUfZQoaCiKEK6dJzVn7nkZMx+gnvcFvFaMA2luY5SKEWEXHkZtvMnHvfTNd44qVe4AdYwKaGFzX3VpbnQzMpRLAIwIdWludGVnZXKUSn7qFDx1YnViLg==",
|
104 |
"n": "4",
|
105 |
"start": "0",
|
106 |
"_shape": [],
|
replay.mp4
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:557e8397900d0c3b4ebfd34ef90cd513229f6ff90e24a08e9c5f727b0c23d99f
|
3 |
+
size 146324
|
results.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"mean_reward":
|
|
|
1 |
+
{"mean_reward": 73.76781820459291, "std_reward": 85.41611459008848, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2023-07-13T07:37:13.238226"}
|