dixedus commited on
Commit
4c93c1e
·
verified ·
1 Parent(s): e427fdd

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd11218273ee3053d63be73aa5789d16270e4374dc614764efe8b1b016a86559
3
  size 103716100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ced0aae4575f32da894f069a0689f8adfc695305a4161c3476b41484cbac0743
3
  size 103716100
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48f332baa2d72fc2a56d128c8481794ccb6d37b1ba96d757403e140a254bc155
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4874bfff8f48f58dbeacd6424c17544ca4074af0f4864ca33e34f39221c537ef
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0051c53bcb92b7c913136d782f625b409707ede35cdcc9bbc83a63d788098e04
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d10d0fa96665f6b4af4824faec3d1d9f4e8b4343723a14d86cab932da6ce3225
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": NaN,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 0.036081544290095614,
5
  "eval_steps": 100,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -93,6 +93,84 @@
93
  "eval_samples_per_second": 23.662,
94
  "eval_steps_per_second": 5.915,
95
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  }
97
  ],
98
  "logging_steps": 10,
@@ -107,7 +185,7 @@
107
  "early_stopping_threshold": 0.0
108
  },
109
  "attributes": {
110
- "early_stopping_patience_counter": 0
111
  }
112
  },
113
  "TrainerControl": {
@@ -121,7 +199,7 @@
121
  "attributes": {}
122
  }
123
  },
124
- "total_flos": 6.6353734090752e+16,
125
  "train_batch_size": 8,
126
  "trial_name": null,
127
  "trial_params": null
 
1
  {
2
  "best_metric": NaN,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 0.07216308858019123,
5
  "eval_steps": 100,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
93
  "eval_samples_per_second": 23.662,
94
  "eval_steps_per_second": 5.915,
95
  "step": 100
96
+ },
97
+ {
98
+ "epoch": 0.03968969871910518,
99
+ "grad_norm": 0.0,
100
+ "learning_rate": 0.0001861554081393806,
101
+ "loss": 0.0,
102
+ "step": 110
103
+ },
104
+ {
105
+ "epoch": 0.04329785314811474,
106
+ "grad_norm": 0.0,
107
+ "learning_rate": 0.0001833313919082515,
108
+ "loss": 0.0,
109
+ "step": 120
110
+ },
111
+ {
112
+ "epoch": 0.0469060075771243,
113
+ "grad_norm": 0.0,
114
+ "learning_rate": 0.00018027116379309638,
115
+ "loss": 0.0,
116
+ "step": 130
117
+ },
118
+ {
119
+ "epoch": 0.050514162006133866,
120
+ "grad_norm": 0.0,
121
+ "learning_rate": 0.00017698339834299061,
122
+ "loss": 0.0,
123
+ "step": 140
124
+ },
125
+ {
126
+ "epoch": 0.054122316435143425,
127
+ "grad_norm": 0.0,
128
+ "learning_rate": 0.00017347741508630672,
129
+ "loss": 0.0,
130
+ "step": 150
131
+ },
132
+ {
133
+ "epoch": 0.057730470864152984,
134
+ "grad_norm": 0.0,
135
+ "learning_rate": 0.0001697631521134985,
136
+ "loss": 0.0,
137
+ "step": 160
138
+ },
139
+ {
140
+ "epoch": 0.06133862529316255,
141
+ "grad_norm": 0.0,
142
+ "learning_rate": 0.00016585113790650388,
143
+ "loss": 0.0,
144
+ "step": 170
145
+ },
146
+ {
147
+ "epoch": 0.06494677972217211,
148
+ "grad_norm": 0.0,
149
+ "learning_rate": 0.0001617524614946192,
150
+ "loss": 0.0,
151
+ "step": 180
152
+ },
153
+ {
154
+ "epoch": 0.06855493415118168,
155
+ "grad_norm": 0.0,
156
+ "learning_rate": 0.0001574787410214407,
157
+ "loss": 0.0,
158
+ "step": 190
159
+ },
160
+ {
161
+ "epoch": 0.07216308858019123,
162
+ "grad_norm": 0.0,
163
+ "learning_rate": 0.00015304209081197425,
164
+ "loss": 0.0,
165
+ "step": 200
166
+ },
167
+ {
168
+ "epoch": 0.07216308858019123,
169
+ "eval_loss": NaN,
170
+ "eval_runtime": 197.0795,
171
+ "eval_samples_per_second": 23.686,
172
+ "eval_steps_per_second": 5.921,
173
+ "step": 200
174
  }
175
  ],
176
  "logging_steps": 10,
 
185
  "early_stopping_threshold": 0.0
186
  },
187
  "attributes": {
188
+ "early_stopping_patience_counter": 1
189
  }
190
  },
191
  "TrainerControl": {
 
199
  "attributes": {}
200
  }
201
  },
202
+ "total_flos": 1.32707468181504e+17,
203
  "train_batch_size": 8,
204
  "trial_name": null,
205
  "trial_params": null