Upload PPO LunarLander-v2 trained agent

Files changed (8) hide show

README.md CHANGED Viewed

@@ -10,7 +10,7 @@ model-index:
   results:
   - metrics:
     - type: mean_reward
-      value: -133.88 +/- 35.82
       name: mean_reward
     task:
       type: reinforcement-learning

   results:
   - metrics:
     - type: mean_reward
+      value: -204.84 +/- 56.87
       name: mean_reward
     task:
       type: reinforcement-learning

config.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

ppo-LunarLander-v2.zip CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c6fcb9c0af646e2d52eb962c93edfaf51cd86f5591107bdc3c1f5e71bc26f251
-size 188335

 version https://git-lfs.github.com/spec/v1
+oid sha256:b56966499411987d4e2159023440e948a7daff012be9277d1ce2da908fc1b55e
+size 188334

ppo-LunarLander-v2/data CHANGED Viewed

The diff for this file is too large to render. See raw diff

ppo-LunarLander-v2/policy.optimizer.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c1aa49ac9fc778783d5c7512fc3b5c156c1aa025e8b1a4592126264bc8dda75
 size 84893

 version https://git-lfs.github.com/spec/v1
+oid sha256:8c485128cd1e6ce0c90039958eaec9c65f14a96806c671564162c469b40f473d
 size 84893

ppo-LunarLander-v2/policy.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:597071cac589811e0e150df78f50f5a6586b3139e1e8c1b1fc2f9c1729305287
 size 43201

 version https://git-lfs.github.com/spec/v1
+oid sha256:0a6a337206b25e3116ec836ff7cf30dd91c54db046078f30e5214e07f9ed95d1
 size 43201

replay.mp4 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba3bae5bd96f99a33745da32d827d8d625a66c4bf8d4db4301f797cc4907de23
-size 240951

 version https://git-lfs.github.com/spec/v1
+oid sha256:450b1e58e40bf74297a26580840083fa14167b07a47d9ea2a8e46736f8f44c86
+size 247626

results.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"mean_reward": -~~133~~.~~8756309641176~~, "std_reward": 35.~~82154651344581~~, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2022-05-~~05T12~~:46:51.~~793940~~"}


1	+ {"mean_reward": -204.84463142059175, "std_reward": 56.87323539444302, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2022-05-05T13:50:21.022000"}