D3MI4N commited on
Commit
084675d
1 Parent(s): 39f43cb

Initial commit

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ model-index:
16
  type: SpaceInvadersNoFrameskip-v4
17
  metrics:
18
  - type: mean_reward
19
- value: 29.00 +/- 64.30
20
  name: mean_reward
21
  verified: false
22
  ---
@@ -62,7 +62,7 @@ python -m rl_zoo3.push_to_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -f lo
62
 
63
  ## Hyperparameters
64
  ```python
65
- OrderedDict([('batch_size', 16),
66
  ('buffer_size', 10000),
67
  ('env_wrapper',
68
  ['stable_baselines3.common.atari_wrappers.AtariWrapper']),
@@ -70,7 +70,7 @@ OrderedDict([('batch_size', 16),
70
  ('exploration_fraction', 0.1),
71
  ('frame_stack', 4),
72
  ('gradient_steps', 1),
73
- ('learning_rate', 0.07782651702448214),
74
  ('learning_starts', 100000),
75
  ('n_timesteps', 1000000.0),
76
  ('optimize_memory_usage', False),
 
16
  type: SpaceInvadersNoFrameskip-v4
17
  metrics:
18
  - type: mean_reward
19
+ value: 494.50 +/- 181.50
20
  name: mean_reward
21
  verified: false
22
  ---
 
62
 
63
  ## Hyperparameters
64
  ```python
65
+ OrderedDict([('batch_size', 32),
66
  ('buffer_size', 10000),
67
  ('env_wrapper',
68
  ['stable_baselines3.common.atari_wrappers.AtariWrapper']),
 
70
  ('exploration_fraction', 0.1),
71
  ('frame_stack', 4),
72
  ('gradient_steps', 1),
73
+ ('learning_rate', 0.0001),
74
  ('learning_starts', 100000),
75
  ('n_timesteps', 1000000.0),
76
  ('optimize_memory_usage', False),
args.yml CHANGED
@@ -2,7 +2,7 @@
2
  - - - algo
3
  - dqn
4
  - - conf_file
5
- - best_dqn.yml
6
  - - device
7
  - auto
8
  - - env
@@ -56,7 +56,7 @@
56
  - - save_replay_buffer
57
  - false
58
  - - seed
59
- - 3641132205
60
  - - storage
61
  - null
62
  - - study_name
 
2
  - - - algo
3
  - dqn
4
  - - conf_file
5
+ - dqn.yml
6
  - - device
7
  - auto
8
  - - env
 
56
  - - save_replay_buffer
57
  - false
58
  - - seed
59
+ - 1866637971
60
  - - storage
61
  - null
62
  - - study_name
config.yml CHANGED
@@ -1,6 +1,6 @@
1
  !!python/object/apply:collections.OrderedDict
2
  - - - batch_size
3
- - 16
4
  - - buffer_size
5
  - 10000
6
  - - env_wrapper
@@ -14,7 +14,7 @@
14
  - - gradient_steps
15
  - 1
16
  - - learning_rate
17
- - 0.07782651702448214
18
  - - learning_starts
19
  - 100000
20
  - - n_timesteps
 
1
  !!python/object/apply:collections.OrderedDict
2
  - - - batch_size
3
+ - 32
4
  - - buffer_size
5
  - 10000
6
  - - env_wrapper
 
14
  - - gradient_steps
15
  - 1
16
  - - learning_rate
17
+ - 0.0001
18
  - - learning_starts
19
  - 100000
20
  - - n_timesteps
dqn-SpaceInvadersNoFrameskip-v4.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05f89ad4df8366a191d2e8cb82ca8c9796d7c32ab11c9cb13f60c8f447288cd7
3
  size 27220728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c377129ece06f8c368061c9f8faa9e92e15fc2f847c8c679ee3f11af68ba574
3
  size 27220728
dqn-SpaceInvadersNoFrameskip-v4/_stable_baselines3_version CHANGED
@@ -1 +1 @@
1
- 2.4.0a6
 
1
+ 2.4.0a7
dqn-SpaceInvadersNoFrameskip-v4/data CHANGED
The diff for this file is too large to render. See raw diff
 
dqn-SpaceInvadersNoFrameskip-v4/policy.optimizer.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c547718e27c23725c4eded2c1d45246dcdc134c248687ccef616c2a10fea79c
3
  size 13506172
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:449eefa05daf35f09473da4d7be8183626fb62b8720d50ec420803f0d15ed07e
3
  size 13506172
dqn-SpaceInvadersNoFrameskip-v4/policy.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0eff5af7af57b09e0c58bd349f8706af9465381de5eaa35e74e7b658e3382a06
3
  size 13505370
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d1bb1bbeea7ae31006d6e34789a70d240e3379701f71b52d658eec1888598c7
3
  size 13505370
dqn-SpaceInvadersNoFrameskip-v4/system_info.txt CHANGED
@@ -1,6 +1,6 @@
1
  - OS: Linux-6.1.85+-x86_64-with-glibc2.35 # 1 SMP PREEMPT_DYNAMIC Thu Jun 27 21:05:47 UTC 2024
2
  - Python: 3.10.12
3
- - Stable-Baselines3: 2.4.0a6
4
  - PyTorch: 2.3.1+cu121
5
  - GPU Enabled: True
6
  - Numpy: 1.25.2
 
1
  - OS: Linux-6.1.85+-x86_64-with-glibc2.35 # 1 SMP PREEMPT_DYNAMIC Thu Jun 27 21:05:47 UTC 2024
2
  - Python: 3.10.12
3
+ - Stable-Baselines3: 2.4.0a7
4
  - PyTorch: 2.3.1+cu121
5
  - GPU Enabled: True
6
  - Numpy: 1.25.2
replay.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6332f13a1224ba686797bd770b10af974d632c130bd192c46a69bdf79b1811c4
3
- size 294871
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dce7a56d47515ec9f7f74b0942737184333ece914161bea32fcc021d579186f
3
+ size 261459
results.json CHANGED
@@ -1 +1 @@
1
- {"mean_reward": 29.0, "std_reward": 64.29618962271404, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2024-07-25T12:56:27.903756"}
 
1
+ {"mean_reward": 494.5, "std_reward": 181.5, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2024-07-28T17:17:24.572210"}
train_eval_metrics.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12e82bb984355dca84ae738eb9e097feacbc33bdb04b50d0c36052fe25fd5d74
3
- size 41833
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b79cd96db5118183679479b99d33224e08c4c283eb5cd6ad902d970fcb690862
3
+ size 43140