pilotj commited on
Commit
f4779fd
1 Parent(s): 3804b77

Training in progress, step 5000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f79e495154da67b877bea3f63a4a67b50b5bd6dc9f526699f399304facad62e
3
  size 438032472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2f4ac6e2904e38a59ae6cd088c42b2010f3ce171b7c42f73e10f89189928510
3
  size 438032472
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d90d453de31c2419e4e349d4a4f435ab05d64a46c66c09d9b10ac7145a578d1f
3
  size 876185978
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c2a67e114a6e0d790034fcdf83357a636db00c48bbbee80bf67f812e4454a73
3
  size 876185978
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18250a969c7e8b744e0a81968b85799869d2e007063948d0e72541065db3bc16
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c0905be27a588e9c110d86ca5c7f44884f2cff6848f90f999b9096bd6ca85f4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bdb930721cbebdf735b7bba6276bd5739b55ef721382393eeafb8577701beb4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12b04a1fc0ffeaa6e3a4893e319ebc13a4fb773c4615723d80b4c92ffb7c98f7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.3715578615665436,
3
- "best_model_checkpoint": "results/checkpoint-4000",
4
- "epoch": 0.3820804279300793,
5
  "eval_steps": 500,
6
- "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -127,6 +127,36 @@
127
  "eval_samples_per_second": 235.496,
128
  "eval_steps_per_second": 3.683,
129
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  }
131
  ],
132
  "logging_steps": 500,
@@ -146,7 +176,7 @@
146
  "attributes": {}
147
  }
148
  },
149
- "total_flos": 6.7370944561152e+16,
150
  "train_batch_size": 64,
151
  "trial_name": null,
152
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.3664040267467499,
3
+ "best_model_checkpoint": "results/checkpoint-5000",
4
+ "epoch": 0.47760053491259913,
5
  "eval_steps": 500,
6
+ "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
127
  "eval_samples_per_second": 235.496,
128
  "eval_steps_per_second": 3.683,
129
  "step": 4000
130
+ },
131
+ {
132
+ "epoch": 0.4298404814213392,
133
+ "grad_norm": 7.005139350891113,
134
+ "learning_rate": 3.925398796446652e-05,
135
+ "loss": 0.4135,
136
+ "step": 4500
137
+ },
138
+ {
139
+ "epoch": 0.4298404814213392,
140
+ "eval_loss": 0.3852500319480896,
141
+ "eval_runtime": 110.8005,
142
+ "eval_samples_per_second": 236.019,
143
+ "eval_steps_per_second": 3.691,
144
+ "step": 4500
145
+ },
146
+ {
147
+ "epoch": 0.47760053491259913,
148
+ "grad_norm": 7.208944797515869,
149
+ "learning_rate": 3.8059986627185024e-05,
150
+ "loss": 0.4172,
151
+ "step": 5000
152
+ },
153
+ {
154
+ "epoch": 0.47760053491259913,
155
+ "eval_loss": 0.3664040267467499,
156
+ "eval_runtime": 110.6088,
157
+ "eval_samples_per_second": 236.428,
158
+ "eval_steps_per_second": 3.698,
159
+ "step": 5000
160
  }
161
  ],
162
  "logging_steps": 500,
 
176
  "attributes": {}
177
  }
178
  },
179
+ "total_flos": 8.421368070144e+16,
180
  "train_batch_size": 64,
181
  "trial_name": null,
182
  "trial_params": null