besimray commited on
Commit
918b11a
1 Parent(s): 0f27dba

Training in progress, step 370, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3da1bc8e78d62a43f5fc08c54b7da66788aab90b181d1cce0dc0e44a0c7ead64
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aa6ce92e30f158e6ce454a1b7abd904940338fe632efee699ee6f467e3113e3
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa9f5ab26f7ae0259dbc2872ec134c30afd17bf2cb254c9c4159232de93a6a4c
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d94ce2715dfd2b055f70ce7e6e940b21b7bfc9169091edc0a1e70a4b18f16713
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63df2ca6f0708e85ea13f7c75a1ee020a6142b982449338d6d6d3a95d80f4533
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af46fe813686634b4c33424def7b0536c07d9ea633d05e1dc01d052114a4c31c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89431c5d37cecda04572e69a3e5ff0abc04241564413b5510718a518a469399c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:190428be784d5032d58b9493c0079ee4726eb5d4046a38ab5f17d6b642026e37
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.018574714660645,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-360",
4
- "epoch": 0.01627081873855958,
5
  "eval_steps": 5,
6
- "global_step": 360,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3111,6 +3111,92 @@
3111
  "eval_samples_per_second": 52.838,
3112
  "eval_steps_per_second": 26.422,
3113
  "step": 360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3114
  }
3115
  ],
3116
  "logging_steps": 1,
@@ -3139,7 +3225,7 @@
3139
  "attributes": {}
3140
  }
3141
  },
3142
- "total_flos": 3782423347200.0,
3143
  "train_batch_size": 2,
3144
  "trial_name": null,
3145
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.018465042114258,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-370",
4
+ "epoch": 0.016722785925741793,
5
  "eval_steps": 5,
6
+ "global_step": 370,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3111
  "eval_samples_per_second": 52.838,
3112
  "eval_steps_per_second": 26.422,
3113
  "step": 360
3114
+ },
3115
+ {
3116
+ "epoch": 0.016316015457277802,
3117
+ "grad_norm": 0.47562116384506226,
3118
+ "learning_rate": 3.7151040211187635e-05,
3119
+ "loss": 44.0571,
3120
+ "step": 361
3121
+ },
3122
+ {
3123
+ "epoch": 0.016361212175996023,
3124
+ "grad_norm": 0.439248651266098,
3125
+ "learning_rate": 3.665364408282305e-05,
3126
+ "loss": 44.0292,
3127
+ "step": 362
3128
+ },
3129
+ {
3130
+ "epoch": 0.016406408894714244,
3131
+ "grad_norm": 0.5355764031410217,
3132
+ "learning_rate": 3.615885187503946e-05,
3133
+ "loss": 44.1601,
3134
+ "step": 363
3135
+ },
3136
+ {
3137
+ "epoch": 0.016451605613432466,
3138
+ "grad_norm": 0.5143962502479553,
3139
+ "learning_rate": 3.566668392680662e-05,
3140
+ "loss": 44.0829,
3141
+ "step": 364
3142
+ },
3143
+ {
3144
+ "epoch": 0.016496802332150687,
3145
+ "grad_norm": 0.5054187178611755,
3146
+ "learning_rate": 3.517716046922118e-05,
3147
+ "loss": 44.144,
3148
+ "step": 365
3149
+ },
3150
+ {
3151
+ "epoch": 0.016496802332150687,
3152
+ "eval_loss": 11.018515586853027,
3153
+ "eval_runtime": 176.1346,
3154
+ "eval_samples_per_second": 52.897,
3155
+ "eval_steps_per_second": 26.451,
3156
+ "step": 365
3157
+ },
3158
+ {
3159
+ "epoch": 0.016541999050868908,
3160
+ "grad_norm": 0.44439616799354553,
3161
+ "learning_rate": 3.469030162467513e-05,
3162
+ "loss": 44.0321,
3163
+ "step": 366
3164
+ },
3165
+ {
3166
+ "epoch": 0.01658719576958713,
3167
+ "grad_norm": 0.5372561812400818,
3168
+ "learning_rate": 3.4206127406028745e-05,
3169
+ "loss": 44.0923,
3170
+ "step": 367
3171
+ },
3172
+ {
3173
+ "epoch": 0.01663239248830535,
3174
+ "grad_norm": 0.48407748341560364,
3175
+ "learning_rate": 3.372465771578771e-05,
3176
+ "loss": 44.1126,
3177
+ "step": 368
3178
+ },
3179
+ {
3180
+ "epoch": 0.01667758920702357,
3181
+ "grad_norm": 0.4682793915271759,
3182
+ "learning_rate": 3.32459123452852e-05,
3183
+ "loss": 44.0227,
3184
+ "step": 369
3185
+ },
3186
+ {
3187
+ "epoch": 0.016722785925741793,
3188
+ "grad_norm": 0.4110027551651001,
3189
+ "learning_rate": 3.276991097386831e-05,
3190
+ "loss": 44.0354,
3191
+ "step": 370
3192
+ },
3193
+ {
3194
+ "epoch": 0.016722785925741793,
3195
+ "eval_loss": 11.018465042114258,
3196
+ "eval_runtime": 176.3082,
3197
+ "eval_samples_per_second": 52.845,
3198
+ "eval_steps_per_second": 26.425,
3199
+ "step": 370
3200
  }
3201
  ],
3202
  "logging_steps": 1,
 
3225
  "attributes": {}
3226
  }
3227
  },
3228
+ "total_flos": 3887490662400.0,
3229
  "train_batch_size": 2,
3230
  "trial_name": null,
3231
  "trial_params": null