MohamedAhmedAE commited on
Commit
c0e3782
·
verified ·
1 Parent(s): cd8ca16

Training in progress, step 349600

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a48e214829f3637e9f723c3d00cbe7d0201697caff8aaa56d68c10a31b8bf3be
3
  size 1342238560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cdea3181ff20981cb33a156aeb80b8e43818a8f05858b95c059340e21aa8733
3
  size 1342238560
last-checkpoint/adapter_config.json CHANGED
@@ -23,12 +23,12 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "gate_proj",
27
- "v_proj",
28
- "o_proj",
29
  "q_proj",
30
- "k_proj",
31
  "down_proj",
 
 
 
 
32
  "up_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
 
26
  "q_proj",
 
27
  "down_proj",
28
+ "v_proj",
29
+ "k_proj",
30
+ "gate_proj",
31
+ "o_proj",
32
  "up_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a48e214829f3637e9f723c3d00cbe7d0201697caff8aaa56d68c10a31b8bf3be
3
  size 1342238560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:558f0558dbe2ed2fed185bbe33a32e697578eb37a71364f4ae39a77ac585d1c8
3
  size 1342238560
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eadd45627c8ca1194f6799e6a9101451f15e5a8d56693bcdd7dcb2483df02796
3
  size 683268498
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:781887d172314801ab8802842158c08145ef998a6a80b07686139a50d9285ded
3
  size 683268498
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb1195b42eac89ca32807fbd177cb327563eb691518ecba53b3eff5532117160
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e85f0257d01a91ff4050d39219e8dd384bbb4cfdc5b2e0fb4fabf6b2fe3b33e2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13dcd1e5d0637b6d75833d77a011a083cb0e718f6b0a9d87d999b175835a71d2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:840aa8e3a2615e43038d3be582aa3892a5d4ec1157dbf18b35d8a9ff2904fee4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.24300153492955806,
5
  "eval_steps": 500,
6
- "global_step": 349400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -12012,230 +12012,6 @@
12012
  "learning_rate": 1.9514285936093064e-05,
12013
  "loss": 1.6889,
12014
  "step": 343000
12015
- },
12016
- {
12017
- "epoch": 0.23868954432691566,
12018
- "grad_norm": 3.7890496253967285,
12019
- "learning_rate": 1.951372599351318e-05,
12020
- "loss": 1.6764,
12021
- "step": 343200
12022
- },
12023
- {
12024
- "epoch": 0.23882864079796864,
12025
- "grad_norm": 4.050221920013428,
12026
- "learning_rate": 1.9513165739975493e-05,
12027
- "loss": 1.6499,
12028
- "step": 343400
12029
- },
12030
- {
12031
- "epoch": 0.23896773726902162,
12032
- "grad_norm": 7.2723388671875,
12033
- "learning_rate": 1.951260517550675e-05,
12034
- "loss": 1.6283,
12035
- "step": 343600
12036
- },
12037
- {
12038
- "epoch": 0.2391068337400746,
12039
- "grad_norm": 4.328615665435791,
12040
- "learning_rate": 1.951204430013371e-05,
12041
- "loss": 1.6806,
12042
- "step": 343800
12043
- },
12044
- {
12045
- "epoch": 0.23924593021112758,
12046
- "grad_norm": 6.319999694824219,
12047
- "learning_rate": 1.9511483113883144e-05,
12048
- "loss": 1.6528,
12049
- "step": 344000
12050
- },
12051
- {
12052
- "epoch": 0.23938502668218056,
12053
- "grad_norm": 3.773545265197754,
12054
- "learning_rate": 1.9510921616781844e-05,
12055
- "loss": 1.643,
12056
- "step": 344200
12057
- },
12058
- {
12059
- "epoch": 0.23952412315323354,
12060
- "grad_norm": 4.095102787017822,
12061
- "learning_rate": 1.9510359808856623e-05,
12062
- "loss": 1.6922,
12063
- "step": 344400
12064
- },
12065
- {
12066
- "epoch": 0.23966321962428652,
12067
- "grad_norm": 5.804976463317871,
12068
- "learning_rate": 1.950979769013429e-05,
12069
- "loss": 1.6055,
12070
- "step": 344600
12071
- },
12072
- {
12073
- "epoch": 0.2398023160953395,
12074
- "grad_norm": 9.323091506958008,
12075
- "learning_rate": 1.9509235260641682e-05,
12076
- "loss": 1.6792,
12077
- "step": 344800
12078
- },
12079
- {
12080
- "epoch": 0.23994141256639248,
12081
- "grad_norm": 3.1342084407806396,
12082
- "learning_rate": 1.950867252040566e-05,
12083
- "loss": 1.6612,
12084
- "step": 345000
12085
- },
12086
- {
12087
- "epoch": 0.24008050903744546,
12088
- "grad_norm": 5.803501605987549,
12089
- "learning_rate": 1.9508109469453075e-05,
12090
- "loss": 1.6417,
12091
- "step": 345200
12092
- },
12093
- {
12094
- "epoch": 0.24021960550849844,
12095
- "grad_norm": 2.7702744007110596,
12096
- "learning_rate": 1.9507546107810813e-05,
12097
- "loss": 1.6397,
12098
- "step": 345400
12099
- },
12100
- {
12101
- "epoch": 0.24035870197955142,
12102
- "grad_norm": 6.073428153991699,
12103
- "learning_rate": 1.9506982435505766e-05,
12104
- "loss": 1.6321,
12105
- "step": 345600
12106
- },
12107
- {
12108
- "epoch": 0.2404977984506044,
12109
- "grad_norm": 4.152920246124268,
12110
- "learning_rate": 1.9506418452564844e-05,
12111
- "loss": 1.6407,
12112
- "step": 345800
12113
- },
12114
- {
12115
- "epoch": 0.24063689492165738,
12116
- "grad_norm": 3.3686416149139404,
12117
- "learning_rate": 1.9505854159014972e-05,
12118
- "loss": 1.7123,
12119
- "step": 346000
12120
- },
12121
- {
12122
- "epoch": 0.24077599139271036,
12123
- "grad_norm": 4.510013103485107,
12124
- "learning_rate": 1.950528955488309e-05,
12125
- "loss": 1.6391,
12126
- "step": 346200
12127
- },
12128
- {
12129
- "epoch": 0.24091508786376334,
12130
- "grad_norm": 4.174516201019287,
12131
- "learning_rate": 1.9504724640196143e-05,
12132
- "loss": 1.669,
12133
- "step": 346400
12134
- },
12135
- {
12136
- "epoch": 0.24105418433481635,
12137
- "grad_norm": 4.815258979797363,
12138
- "learning_rate": 1.9504159414981112e-05,
12139
- "loss": 1.6308,
12140
- "step": 346600
12141
- },
12142
- {
12143
- "epoch": 0.24119328080586933,
12144
- "grad_norm": 3.8510098457336426,
12145
- "learning_rate": 1.950359387926497e-05,
12146
- "loss": 1.6688,
12147
- "step": 346800
12148
- },
12149
- {
12150
- "epoch": 0.2413323772769223,
12151
- "grad_norm": 6.116521835327148,
12152
- "learning_rate": 1.950302803307472e-05,
12153
- "loss": 1.6655,
12154
- "step": 347000
12155
- },
12156
- {
12157
- "epoch": 0.24147147374797528,
12158
- "grad_norm": 4.095193862915039,
12159
- "learning_rate": 1.9502461876437376e-05,
12160
- "loss": 1.6587,
12161
- "step": 347200
12162
- },
12163
- {
12164
- "epoch": 0.24161057021902826,
12165
- "grad_norm": 3.3929877281188965,
12166
- "learning_rate": 1.9501895409379958e-05,
12167
- "loss": 1.5897,
12168
- "step": 347400
12169
- },
12170
- {
12171
- "epoch": 0.24174966669008124,
12172
- "grad_norm": 4.79518461227417,
12173
- "learning_rate": 1.9501328631929515e-05,
12174
- "loss": 1.6804,
12175
- "step": 347600
12176
- },
12177
- {
12178
- "epoch": 0.24188876316113422,
12179
- "grad_norm": 4.848894119262695,
12180
- "learning_rate": 1.9500761544113106e-05,
12181
- "loss": 1.6742,
12182
- "step": 347800
12183
- },
12184
- {
12185
- "epoch": 0.2420278596321872,
12186
- "grad_norm": 4.406215667724609,
12187
- "learning_rate": 1.9500194145957797e-05,
12188
- "loss": 1.711,
12189
- "step": 348000
12190
- },
12191
- {
12192
- "epoch": 0.24216695610324018,
12193
- "grad_norm": 7.045769214630127,
12194
- "learning_rate": 1.949962643749068e-05,
12195
- "loss": 1.591,
12196
- "step": 348200
12197
- },
12198
- {
12199
- "epoch": 0.24230605257429316,
12200
- "grad_norm": 5.135491847991943,
12201
- "learning_rate": 1.9499058418738855e-05,
12202
- "loss": 1.6447,
12203
- "step": 348400
12204
- },
12205
- {
12206
- "epoch": 0.24244514904534614,
12207
- "grad_norm": 4.513916492462158,
12208
- "learning_rate": 1.9498490089729438e-05,
12209
- "loss": 1.6319,
12210
- "step": 348600
12211
- },
12212
- {
12213
- "epoch": 0.24258424551639912,
12214
- "grad_norm": 3.753251791000366,
12215
- "learning_rate": 1.949792145048956e-05,
12216
- "loss": 1.6632,
12217
- "step": 348800
12218
- },
12219
- {
12220
- "epoch": 0.2427233419874521,
12221
- "grad_norm": 3.935469150543213,
12222
- "learning_rate": 1.949735250104637e-05,
12223
- "loss": 1.678,
12224
- "step": 349000
12225
- },
12226
- {
12227
- "epoch": 0.24286243845850508,
12228
- "grad_norm": 5.35392951965332,
12229
- "learning_rate": 1.9496783241427026e-05,
12230
- "loss": 1.6673,
12231
- "step": 349200
12232
- },
12233
- {
12234
- "epoch": 0.24300153492955806,
12235
- "grad_norm": 4.7084879875183105,
12236
- "learning_rate": 1.9496213671658703e-05,
12237
- "loss": 1.6702,
12238
- "step": 349400
12239
  }
12240
  ],
12241
  "logging_steps": 200,
@@ -12255,7 +12031,7 @@
12255
  "attributes": {}
12256
  }
12257
  },
12258
- "total_flos": 4.652032117813862e+18,
12259
  "train_batch_size": 1,
12260
  "trial_name": null,
12261
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.23855044785586269,
5
  "eval_steps": 500,
6
+ "global_step": 343000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
12012
  "learning_rate": 1.9514285936093064e-05,
12013
  "loss": 1.6889,
12014
  "step": 343000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12015
  }
12016
  ],
12017
  "logging_steps": 200,
 
12031
  "attributes": {}
12032
  }
12033
  },
12034
+ "total_flos": 4.567214300600918e+18,
12035
  "train_batch_size": 1,
12036
  "trial_name": null,
12037
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:078136780f82e4c02daf15d387ce5ace039d2fead953e9b2034a974f0b6417e9
3
  size 6840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f194f0afbf00cd135f18b6f6e0dc2d489f2d84487accfafc9254221384d4d16
3
  size 6840