MohamedAhmedAE commited on
Commit
34c855c
·
verified ·
1 Parent(s): 753e10e

Training in progress, step 610400

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:feb488d07180dda4d36dc8c04e6962a0296f67d8778b893ecac1f7e5d993b765
3
  size 1715561468
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86877e63b9882f80f5de38767511c0218b9d7d8ad2e970018cd432ec4f883f73
3
  size 1715561468
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8797c75efc47e59ca589d0274b2c8ecd06e6d51e1b0e7370194d01a342ade252
3
  size 3431474364
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9be5c4741d46132f9e2d10fdf6df2d024627198757a09826b5c62403ce4a76d
3
  size 3431474364
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55bc85299f5f6627f236f7c8b72ae391f14d02a771d86cdf81791100be66164c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be9944304efdaf5a928fd38668b62ff08647c29d13187681fe2f6268779000cd
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:657663cc54c163af99554667642ae2a96b3249ce9d1e18733019516ba49032ee
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:138e4c2f7aac4cc090560045c10c0e4885cc9c862db32dfb26b47ddb16407009
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 45.690752527143395,
5
  "eval_steps": 1000,
6
- "global_step": 610200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -25957,293 +25957,6 @@
25957
  "learning_rate": 2.8758473550836634e-05,
25958
  "loss": 0.4025,
25959
  "step": 603600
25960
- },
25961
- {
25962
- "epoch": 45.21153126169974,
25963
- "grad_norm": 5.78782320022583,
25964
- "learning_rate": 2.8746844911583486e-05,
25965
- "loss": 0.3698,
25966
- "step": 603800
25967
- },
25968
- {
25969
- "epoch": 45.22650692624485,
25970
- "grad_norm": 3.0406911373138428,
25971
- "learning_rate": 2.87352154429745e-05,
25972
- "loss": 0.3934,
25973
- "step": 604000
25974
- },
25975
- {
25976
- "epoch": 45.22650692624485,
25977
- "eval_loss": 1.5707191228866577,
25978
- "eval_runtime": 1178.78,
25979
- "eval_samples_per_second": 8.402,
25980
- "eval_steps_per_second": 0.421,
25981
- "step": 604000
25982
- },
25983
- {
25984
- "epoch": 45.241482590789964,
25985
- "grad_norm": 18.79865264892578,
25986
- "learning_rate": 2.872358514758381e-05,
25987
- "loss": 0.4355,
25988
- "step": 604200
25989
- },
25990
- {
25991
- "epoch": 45.25645825533508,
25992
- "grad_norm": 5.7239508628845215,
25993
- "learning_rate": 2.8711954027985765e-05,
25994
- "loss": 0.3789,
25995
- "step": 604400
25996
- },
25997
- {
25998
- "epoch": 45.271433919880195,
25999
- "grad_norm": 5.77394437789917,
26000
- "learning_rate": 2.8700322086754894e-05,
26001
- "loss": 0.4129,
26002
- "step": 604600
26003
- },
26004
- {
26005
- "epoch": 45.28640958442531,
26006
- "grad_norm": 8.501045227050781,
26007
- "learning_rate": 2.868868932646589e-05,
26008
- "loss": 0.4196,
26009
- "step": 604800
26010
- },
26011
- {
26012
- "epoch": 45.301385248970426,
26013
- "grad_norm": 1.9924376010894775,
26014
- "learning_rate": 2.867705574969365e-05,
26015
- "loss": 0.404,
26016
- "step": 605000
26017
- },
26018
- {
26019
- "epoch": 45.301385248970426,
26020
- "eval_loss": 1.5671298503875732,
26021
- "eval_runtime": 1178.5157,
26022
- "eval_samples_per_second": 8.404,
26023
- "eval_steps_per_second": 0.421,
26024
- "step": 605000
26025
- },
26026
- {
26027
- "epoch": 45.31636091351554,
26028
- "grad_norm": 11.104948997497559,
26029
- "learning_rate": 2.8665421359013233e-05,
26030
- "loss": 0.4253,
26031
- "step": 605200
26032
- },
26033
- {
26034
- "epoch": 45.33133657806065,
26035
- "grad_norm": 5.054950714111328,
26036
- "learning_rate": 2.865378615699989e-05,
26037
- "loss": 0.4109,
26038
- "step": 605400
26039
- },
26040
- {
26041
- "epoch": 45.34631224260576,
26042
- "grad_norm": 5.942670822143555,
26043
- "learning_rate": 2.8642150146229042e-05,
26044
- "loss": 0.395,
26045
- "step": 605600
26046
- },
26047
- {
26048
- "epoch": 45.36128790715088,
26049
- "grad_norm": 1.7649027109146118,
26050
- "learning_rate": 2.8630513329276298e-05,
26051
- "loss": 0.42,
26052
- "step": 605800
26053
- },
26054
- {
26055
- "epoch": 45.37626357169599,
26056
- "grad_norm": 4.954268932342529,
26057
- "learning_rate": 2.861887570871744e-05,
26058
- "loss": 0.4292,
26059
- "step": 606000
26060
- },
26061
- {
26062
- "epoch": 45.37626357169599,
26063
- "eval_loss": 1.5571595430374146,
26064
- "eval_runtime": 1177.9473,
26065
- "eval_samples_per_second": 8.408,
26066
- "eval_steps_per_second": 0.421,
26067
- "step": 606000
26068
- },
26069
- {
26070
- "epoch": 45.391239236241105,
26071
- "grad_norm": 8.101126670837402,
26072
- "learning_rate": 2.8607237287128442e-05,
26073
- "loss": 0.3947,
26074
- "step": 606200
26075
- },
26076
- {
26077
- "epoch": 45.406214900786225,
26078
- "grad_norm": 19.263370513916016,
26079
- "learning_rate": 2.8595598067085422e-05,
26080
- "loss": 0.42,
26081
- "step": 606400
26082
- },
26083
- {
26084
- "epoch": 45.42119056533134,
26085
- "grad_norm": 20.436559677124023,
26086
- "learning_rate": 2.8583958051164705e-05,
26087
- "loss": 0.41,
26088
- "step": 606600
26089
- },
26090
- {
26091
- "epoch": 45.43616622987645,
26092
- "grad_norm": 5.639106273651123,
26093
- "learning_rate": 2.8572317241942792e-05,
26094
- "loss": 0.4125,
26095
- "step": 606800
26096
- },
26097
- {
26098
- "epoch": 45.45114189442157,
26099
- "grad_norm": 4.174552917480469,
26100
- "learning_rate": 2.8560675641996338e-05,
26101
- "loss": 0.4398,
26102
- "step": 607000
26103
- },
26104
- {
26105
- "epoch": 45.45114189442157,
26106
- "eval_loss": 1.550969123840332,
26107
- "eval_runtime": 1178.1228,
26108
- "eval_samples_per_second": 8.407,
26109
- "eval_steps_per_second": 0.421,
26110
- "step": 607000
26111
- },
26112
- {
26113
- "epoch": 45.46611755896668,
26114
- "grad_norm": 15.794562339782715,
26115
- "learning_rate": 2.854903325390218e-05,
26116
- "loss": 0.4158,
26117
- "step": 607200
26118
- },
26119
- {
26120
- "epoch": 45.48109322351179,
26121
- "grad_norm": 3.670137882232666,
26122
- "learning_rate": 2.853739008023736e-05,
26123
- "loss": 0.4066,
26124
- "step": 607400
26125
- },
26126
- {
26127
- "epoch": 45.49606888805691,
26128
- "grad_norm": 4.4699506759643555,
26129
- "learning_rate": 2.852574612357904e-05,
26130
- "loss": 0.435,
26131
- "step": 607600
26132
- },
26133
- {
26134
- "epoch": 45.51104455260202,
26135
- "grad_norm": 9.282175064086914,
26136
- "learning_rate": 2.8514101386504605e-05,
26137
- "loss": 0.4065,
26138
- "step": 607800
26139
- },
26140
- {
26141
- "epoch": 45.526020217147135,
26142
- "grad_norm": 8.399334907531738,
26143
- "learning_rate": 2.8502455871591577e-05,
26144
- "loss": 0.4054,
26145
- "step": 608000
26146
- },
26147
- {
26148
- "epoch": 45.526020217147135,
26149
- "eval_loss": 1.5575517416000366,
26150
- "eval_runtime": 1177.6967,
26151
- "eval_samples_per_second": 8.41,
26152
- "eval_steps_per_second": 0.421,
26153
- "step": 608000
26154
- },
26155
- {
26156
- "epoch": 45.54099588169225,
26157
- "grad_norm": 5.749093532562256,
26158
- "learning_rate": 2.8490809581417675e-05,
26159
- "loss": 0.3893,
26160
- "step": 608200
26161
- },
26162
- {
26163
- "epoch": 45.555971546237366,
26164
- "grad_norm": 3.4878060817718506,
26165
- "learning_rate": 2.847916251856078e-05,
26166
- "loss": 0.4196,
26167
- "step": 608400
26168
- },
26169
- {
26170
- "epoch": 45.57094721078248,
26171
- "grad_norm": 5.982976913452148,
26172
- "learning_rate": 2.846751468559894e-05,
26173
- "loss": 0.4163,
26174
- "step": 608600
26175
- },
26176
- {
26177
- "epoch": 45.58592287532759,
26178
- "grad_norm": 9.301414489746094,
26179
- "learning_rate": 2.845586608511038e-05,
26180
- "loss": 0.4154,
26181
- "step": 608800
26182
- },
26183
- {
26184
- "epoch": 45.60089853987271,
26185
- "grad_norm": 14.666509628295898,
26186
- "learning_rate": 2.8444216719673478e-05,
26187
- "loss": 0.4265,
26188
- "step": 609000
26189
- },
26190
- {
26191
- "epoch": 45.60089853987271,
26192
- "eval_loss": 1.547120213508606,
26193
- "eval_runtime": 1178.0235,
26194
- "eval_samples_per_second": 8.407,
26195
- "eval_steps_per_second": 0.421,
26196
- "step": 609000
26197
- },
26198
- {
26199
- "epoch": 45.61587420441782,
26200
- "grad_norm": 9.147184371948242,
26201
- "learning_rate": 2.8432566591866823e-05,
26202
- "loss": 0.4117,
26203
- "step": 609200
26204
- },
26205
- {
26206
- "epoch": 45.63084986896293,
26207
- "grad_norm": 9.692912101745605,
26208
- "learning_rate": 2.8420915704269114e-05,
26209
- "loss": 0.406,
26210
- "step": 609400
26211
- },
26212
- {
26213
- "epoch": 45.64582553350805,
26214
- "grad_norm": 8.107662200927734,
26215
- "learning_rate": 2.8409264059459274e-05,
26216
- "loss": 0.4404,
26217
- "step": 609600
26218
- },
26219
- {
26220
- "epoch": 45.660801198053164,
26221
- "grad_norm": 3.5461621284484863,
26222
- "learning_rate": 2.839761166001635e-05,
26223
- "loss": 0.4198,
26224
- "step": 609800
26225
- },
26226
- {
26227
- "epoch": 45.675776862598276,
26228
- "grad_norm": 10.266241073608398,
26229
- "learning_rate": 2.8385958508519588e-05,
26230
- "loss": 0.3968,
26231
- "step": 610000
26232
- },
26233
- {
26234
- "epoch": 45.675776862598276,
26235
- "eval_loss": 1.5469167232513428,
26236
- "eval_runtime": 1178.5726,
26237
- "eval_samples_per_second": 8.403,
26238
- "eval_steps_per_second": 0.421,
26239
- "step": 610000
26240
- },
26241
- {
26242
- "epoch": 45.690752527143395,
26243
- "grad_norm": 11.695104598999023,
26244
- "learning_rate": 2.8374304607548386e-05,
26245
- "loss": 0.4112,
26246
- "step": 610200
26247
  }
26248
  ],
26249
  "logging_steps": 200,
@@ -26263,7 +25976,7 @@
26263
  "attributes": {}
26264
  }
26265
  },
26266
- "total_flos": 9.791518820806656e+18,
26267
  "train_batch_size": 10,
26268
  "trial_name": null,
26269
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 45.19655559715462,
5
  "eval_steps": 1000,
6
+ "global_step": 603600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
25957
  "learning_rate": 2.8758473550836634e-05,
25958
  "loss": 0.4025,
25959
  "step": 603600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25960
  }
25961
  ],
25962
  "logging_steps": 200,
 
25976
  "attributes": {}
25977
  }
25978
  },
25979
+ "total_flos": 9.633822324424704e+18,
25980
  "train_batch_size": 10,
25981
  "trial_name": null,
25982
  "trial_params": null
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:feb488d07180dda4d36dc8c04e6962a0296f67d8778b893ecac1f7e5d993b765
3
  size 1715561468
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c45dfc162774ddbec26ef765bf4bd84758c88e2ec06c30ee07bdd1b393ba98c
3
  size 1715561468