Giallar commited on
Commit
83baf4a
1 Parent(s): 12ddb7c

Upload PPO LunarLander-v2 trained agent

Browse files
README.md CHANGED
@@ -10,7 +10,7 @@ model-index:
10
  results:
11
  - metrics:
12
  - type: mean_reward
13
- value: -133.88 +/- 35.82
14
  name: mean_reward
15
  task:
16
  type: reinforcement-learning
 
10
  results:
11
  - metrics:
12
  - type: mean_reward
13
+ value: -204.84 +/- 56.87
14
  name: mean_reward
15
  task:
16
  type: reinforcement-learning
config.json CHANGED
The diff for this file is too large to render. See raw diff
 
ppo-LunarLander-v2.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6fcb9c0af646e2d52eb962c93edfaf51cd86f5591107bdc3c1f5e71bc26f251
3
- size 188335
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b56966499411987d4e2159023440e948a7daff012be9277d1ce2da908fc1b55e
3
+ size 188334
ppo-LunarLander-v2/data CHANGED
The diff for this file is too large to render. See raw diff
 
ppo-LunarLander-v2/policy.optimizer.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c1aa49ac9fc778783d5c7512fc3b5c156c1aa025e8b1a4592126264bc8dda75
3
  size 84893
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c485128cd1e6ce0c90039958eaec9c65f14a96806c671564162c469b40f473d
3
  size 84893
ppo-LunarLander-v2/policy.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:597071cac589811e0e150df78f50f5a6586b3139e1e8c1b1fc2f9c1729305287
3
  size 43201
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a6a337206b25e3116ec836ff7cf30dd91c54db046078f30e5214e07f9ed95d1
3
  size 43201
replay.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba3bae5bd96f99a33745da32d827d8d625a66c4bf8d4db4301f797cc4907de23
3
- size 240951
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:450b1e58e40bf74297a26580840083fa14167b07a47d9ea2a8e46736f8f44c86
3
+ size 247626
results.json CHANGED
@@ -1 +1 @@
1
- {"mean_reward": -133.8756309641176, "std_reward": 35.82154651344581, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2022-05-05T12:46:51.793940"}
 
1
+ {"mean_reward": -204.84463142059175, "std_reward": 56.87323539444302, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2022-05-05T13:50:21.022000"}