MohamedAhmedAE commited on
Commit
73a1910
·
verified ·
1 Parent(s): c0e3782

Training in progress, step 349600, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -23,12 +23,12 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "q_proj",
27
- "down_proj",
28
- "v_proj",
29
- "k_proj",
30
  "gate_proj",
 
31
  "o_proj",
 
 
 
32
  "up_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
 
 
26
  "gate_proj",
27
+ "v_proj",
28
  "o_proj",
29
+ "q_proj",
30
+ "k_proj",
31
+ "down_proj",
32
  "up_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:558f0558dbe2ed2fed185bbe33a32e697578eb37a71364f4ae39a77ac585d1c8
3
  size 1342238560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cdea3181ff20981cb33a156aeb80b8e43818a8f05858b95c059340e21aa8733
3
  size 1342238560
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:781887d172314801ab8802842158c08145ef998a6a80b07686139a50d9285ded
3
  size 683268498
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cd506869b94b5be970fca409bc95398841b5ffd9a592fb2c061dc58227eceba
3
  size 683268498
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e85f0257d01a91ff4050d39219e8dd384bbb4cfdc5b2e0fb4fabf6b2fe3b33e2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3650e87ca836906ee8691b7d5d7b6eea6cead08041d7a7bfe35d5aa4494083ae
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:840aa8e3a2615e43038d3be582aa3892a5d4ec1157dbf18b35d8a9ff2904fee4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d7ddbf1d6d4ba083fc85b63bf4848a26d38ae4d64ba73f9a2514a5c0d3a9d8c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.23855044785586269,
5
  "eval_steps": 500,
6
- "global_step": 343000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -12012,6 +12012,237 @@
12012
  "learning_rate": 1.9514285936093064e-05,
12013
  "loss": 1.6889,
12014
  "step": 343000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12015
  }
12016
  ],
12017
  "logging_steps": 200,
@@ -12031,7 +12262,7 @@
12031
  "attributes": {}
12032
  }
12033
  },
12034
- "total_flos": 4.567214300600918e+18,
12035
  "train_batch_size": 1,
12036
  "trial_name": null,
12037
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.24314063140061104,
5
  "eval_steps": 500,
6
+ "global_step": 349600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
12012
  "learning_rate": 1.9514285936093064e-05,
12013
  "loss": 1.6889,
12014
  "step": 343000
12015
+ },
12016
+ {
12017
+ "epoch": 0.23868954432691566,
12018
+ "grad_norm": 3.7890496253967285,
12019
+ "learning_rate": 1.951372599351318e-05,
12020
+ "loss": 1.6764,
12021
+ "step": 343200
12022
+ },
12023
+ {
12024
+ "epoch": 0.23882864079796864,
12025
+ "grad_norm": 4.050221920013428,
12026
+ "learning_rate": 1.9513165739975493e-05,
12027
+ "loss": 1.6499,
12028
+ "step": 343400
12029
+ },
12030
+ {
12031
+ "epoch": 0.23896773726902162,
12032
+ "grad_norm": 7.2723388671875,
12033
+ "learning_rate": 1.951260517550675e-05,
12034
+ "loss": 1.6283,
12035
+ "step": 343600
12036
+ },
12037
+ {
12038
+ "epoch": 0.2391068337400746,
12039
+ "grad_norm": 4.328615665435791,
12040
+ "learning_rate": 1.951204430013371e-05,
12041
+ "loss": 1.6806,
12042
+ "step": 343800
12043
+ },
12044
+ {
12045
+ "epoch": 0.23924593021112758,
12046
+ "grad_norm": 6.319999694824219,
12047
+ "learning_rate": 1.9511483113883144e-05,
12048
+ "loss": 1.6528,
12049
+ "step": 344000
12050
+ },
12051
+ {
12052
+ "epoch": 0.23938502668218056,
12053
+ "grad_norm": 3.773545265197754,
12054
+ "learning_rate": 1.9510921616781844e-05,
12055
+ "loss": 1.643,
12056
+ "step": 344200
12057
+ },
12058
+ {
12059
+ "epoch": 0.23952412315323354,
12060
+ "grad_norm": 4.095102787017822,
12061
+ "learning_rate": 1.9510359808856623e-05,
12062
+ "loss": 1.6922,
12063
+ "step": 344400
12064
+ },
12065
+ {
12066
+ "epoch": 0.23966321962428652,
12067
+ "grad_norm": 5.804976463317871,
12068
+ "learning_rate": 1.950979769013429e-05,
12069
+ "loss": 1.6055,
12070
+ "step": 344600
12071
+ },
12072
+ {
12073
+ "epoch": 0.2398023160953395,
12074
+ "grad_norm": 9.323091506958008,
12075
+ "learning_rate": 1.9509235260641682e-05,
12076
+ "loss": 1.6792,
12077
+ "step": 344800
12078
+ },
12079
+ {
12080
+ "epoch": 0.23994141256639248,
12081
+ "grad_norm": 3.1342084407806396,
12082
+ "learning_rate": 1.950867252040566e-05,
12083
+ "loss": 1.6612,
12084
+ "step": 345000
12085
+ },
12086
+ {
12087
+ "epoch": 0.24008050903744546,
12088
+ "grad_norm": 5.803501605987549,
12089
+ "learning_rate": 1.9508109469453075e-05,
12090
+ "loss": 1.6417,
12091
+ "step": 345200
12092
+ },
12093
+ {
12094
+ "epoch": 0.24021960550849844,
12095
+ "grad_norm": 2.7702744007110596,
12096
+ "learning_rate": 1.9507546107810813e-05,
12097
+ "loss": 1.6397,
12098
+ "step": 345400
12099
+ },
12100
+ {
12101
+ "epoch": 0.24035870197955142,
12102
+ "grad_norm": 6.073428153991699,
12103
+ "learning_rate": 1.9506982435505766e-05,
12104
+ "loss": 1.6321,
12105
+ "step": 345600
12106
+ },
12107
+ {
12108
+ "epoch": 0.2404977984506044,
12109
+ "grad_norm": 4.152920246124268,
12110
+ "learning_rate": 1.9506418452564844e-05,
12111
+ "loss": 1.6407,
12112
+ "step": 345800
12113
+ },
12114
+ {
12115
+ "epoch": 0.24063689492165738,
12116
+ "grad_norm": 3.3686416149139404,
12117
+ "learning_rate": 1.9505854159014972e-05,
12118
+ "loss": 1.7123,
12119
+ "step": 346000
12120
+ },
12121
+ {
12122
+ "epoch": 0.24077599139271036,
12123
+ "grad_norm": 4.510013103485107,
12124
+ "learning_rate": 1.950528955488309e-05,
12125
+ "loss": 1.6391,
12126
+ "step": 346200
12127
+ },
12128
+ {
12129
+ "epoch": 0.24091508786376334,
12130
+ "grad_norm": 4.174516201019287,
12131
+ "learning_rate": 1.9504724640196143e-05,
12132
+ "loss": 1.669,
12133
+ "step": 346400
12134
+ },
12135
+ {
12136
+ "epoch": 0.24105418433481635,
12137
+ "grad_norm": 4.815258979797363,
12138
+ "learning_rate": 1.9504159414981112e-05,
12139
+ "loss": 1.6308,
12140
+ "step": 346600
12141
+ },
12142
+ {
12143
+ "epoch": 0.24119328080586933,
12144
+ "grad_norm": 3.8510098457336426,
12145
+ "learning_rate": 1.950359387926497e-05,
12146
+ "loss": 1.6688,
12147
+ "step": 346800
12148
+ },
12149
+ {
12150
+ "epoch": 0.2413323772769223,
12151
+ "grad_norm": 6.116521835327148,
12152
+ "learning_rate": 1.950302803307472e-05,
12153
+ "loss": 1.6655,
12154
+ "step": 347000
12155
+ },
12156
+ {
12157
+ "epoch": 0.24147147374797528,
12158
+ "grad_norm": 4.095193862915039,
12159
+ "learning_rate": 1.9502461876437376e-05,
12160
+ "loss": 1.6587,
12161
+ "step": 347200
12162
+ },
12163
+ {
12164
+ "epoch": 0.24161057021902826,
12165
+ "grad_norm": 3.3929877281188965,
12166
+ "learning_rate": 1.9501895409379958e-05,
12167
+ "loss": 1.5897,
12168
+ "step": 347400
12169
+ },
12170
+ {
12171
+ "epoch": 0.24174966669008124,
12172
+ "grad_norm": 4.79518461227417,
12173
+ "learning_rate": 1.9501328631929515e-05,
12174
+ "loss": 1.6804,
12175
+ "step": 347600
12176
+ },
12177
+ {
12178
+ "epoch": 0.24188876316113422,
12179
+ "grad_norm": 4.848894119262695,
12180
+ "learning_rate": 1.9500761544113106e-05,
12181
+ "loss": 1.6742,
12182
+ "step": 347800
12183
+ },
12184
+ {
12185
+ "epoch": 0.2420278596321872,
12186
+ "grad_norm": 4.406215667724609,
12187
+ "learning_rate": 1.9500194145957797e-05,
12188
+ "loss": 1.711,
12189
+ "step": 348000
12190
+ },
12191
+ {
12192
+ "epoch": 0.24216695610324018,
12193
+ "grad_norm": 7.045769214630127,
12194
+ "learning_rate": 1.949962643749068e-05,
12195
+ "loss": 1.591,
12196
+ "step": 348200
12197
+ },
12198
+ {
12199
+ "epoch": 0.24230605257429316,
12200
+ "grad_norm": 5.135491847991943,
12201
+ "learning_rate": 1.9499058418738855e-05,
12202
+ "loss": 1.6447,
12203
+ "step": 348400
12204
+ },
12205
+ {
12206
+ "epoch": 0.24244514904534614,
12207
+ "grad_norm": 4.513916492462158,
12208
+ "learning_rate": 1.9498490089729438e-05,
12209
+ "loss": 1.6319,
12210
+ "step": 348600
12211
+ },
12212
+ {
12213
+ "epoch": 0.24258424551639912,
12214
+ "grad_norm": 3.753251791000366,
12215
+ "learning_rate": 1.949792145048956e-05,
12216
+ "loss": 1.6632,
12217
+ "step": 348800
12218
+ },
12219
+ {
12220
+ "epoch": 0.2427233419874521,
12221
+ "grad_norm": 3.935469150543213,
12222
+ "learning_rate": 1.949735250104637e-05,
12223
+ "loss": 1.678,
12224
+ "step": 349000
12225
+ },
12226
+ {
12227
+ "epoch": 0.24286243845850508,
12228
+ "grad_norm": 5.35392951965332,
12229
+ "learning_rate": 1.9496783241427026e-05,
12230
+ "loss": 1.6673,
12231
+ "step": 349200
12232
+ },
12233
+ {
12234
+ "epoch": 0.24300153492955806,
12235
+ "grad_norm": 4.7084879875183105,
12236
+ "learning_rate": 1.9496213671658703e-05,
12237
+ "loss": 1.6702,
12238
+ "step": 349400
12239
+ },
12240
+ {
12241
+ "epoch": 0.24314063140061104,
12242
+ "grad_norm": 4.929116249084473,
12243
+ "learning_rate": 1.94956437917686e-05,
12244
+ "loss": 1.6643,
12245
+ "step": 349600
12246
  }
12247
  ],
12248
  "logging_steps": 200,
 
12262
  "attributes": {}
12263
  }
12264
  },
12265
+ "total_flos": 4.654650097453105e+18,
12266
  "train_batch_size": 1,
12267
  "trial_name": null,
12268
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f194f0afbf00cd135f18b6f6e0dc2d489f2d84487accfafc9254221384d4d16
3
  size 6840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:078136780f82e4c02daf15d387ce5ace039d2fead953e9b2034a974f0b6417e9
3
  size 6840