MohamedAhmedAE commited on
Commit
aac52d8
·
verified ·
1 Parent(s): 8b39bc1

Training in progress, step 87200, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "down_proj",
24
- "q_proj",
25
  "up_proj",
26
- "gate_proj",
27
- "v_proj",
 
28
  "k_proj",
29
- "o_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
23
  "up_proj",
24
+ "q_proj",
25
+ "o_proj",
26
+ "down_proj",
27
  "k_proj",
28
+ "gate_proj",
29
+ "v_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c827c6acb286eef9eb5d9fab2316b7545ab03a9b49ef673a99c9760af01f486c
3
  size 2684416208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2266f5450f07ca58be26969588d2309a083856c6f1fbfcfef2944823461d4b8
3
  size 2684416208
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed24af72109d31f584e84102cf570b3ecb488d7c1a351bcbcfffde2791f83406
3
  size 1364844242
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbb1a1ecaf99901331980a6092485606f5fe6b39f40f14c4d977abd6611b621b
3
  size 1364844242
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f17c9ef1f7995726c517252ce76596fff06bfd0bd04d841db28af93fbf681c2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:150cb34b7b1a58c082469350f25504af368f8c5c46b3fccd8d280708ff720ac8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a35fbe204f24b2fb43e35237525d951bf4c389930c0542629031c4bddc16ea54
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e6917f22460b17aaa8e706e55d947d0135dd26e75e383913c8231aebf75deef
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.1187883036640767,
5
  "eval_steps": 200,
6
- "global_step": 85400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2380,6 +2380,69 @@
2380
  "learning_rate": 1.9311780891052998e-05,
2381
  "loss": 1.6567,
2382
  "step": 85400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2383
  }
2384
  ],
2385
  "logging_steps": 200,
@@ -2399,7 +2462,7 @@
2399
  "attributes": {}
2400
  }
2401
  },
2402
- "total_flos": 1.663223037366141e+18,
2403
  "train_batch_size": 2,
2404
  "trial_name": null,
2405
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.12129203840172702,
5
  "eval_steps": 200,
6
+ "global_step": 87200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2380
  "learning_rate": 1.9311780891052998e-05,
2381
  "loss": 1.6567,
2382
  "step": 85400
2383
+ },
2384
+ {
2385
+ "epoch": 0.1190664964127045,
2386
+ "grad_norm": 0.23755620419979095,
2387
+ "learning_rate": 1.930859113373952e-05,
2388
+ "loss": 1.7054,
2389
+ "step": 85600
2390
+ },
2391
+ {
2392
+ "epoch": 0.11934468916133233,
2393
+ "grad_norm": 0.29518914222717285,
2394
+ "learning_rate": 1.9305394266234104e-05,
2395
+ "loss": 1.6406,
2396
+ "step": 85800
2397
+ },
2398
+ {
2399
+ "epoch": 0.11962288190996014,
2400
+ "grad_norm": 0.5197004675865173,
2401
+ "learning_rate": 1.9302190290978622e-05,
2402
+ "loss": 1.6807,
2403
+ "step": 86000
2404
+ },
2405
+ {
2406
+ "epoch": 0.11990107465858794,
2407
+ "grad_norm": 0.2740679979324341,
2408
+ "learning_rate": 1.929897921042036e-05,
2409
+ "loss": 1.6977,
2410
+ "step": 86200
2411
+ },
2412
+ {
2413
+ "epoch": 0.12017926740721577,
2414
+ "grad_norm": 0.33021771907806396,
2415
+ "learning_rate": 1.9295761027012046e-05,
2416
+ "loss": 1.6943,
2417
+ "step": 86400
2418
+ },
2419
+ {
2420
+ "epoch": 0.12045746015584358,
2421
+ "grad_norm": 0.32778891921043396,
2422
+ "learning_rate": 1.929253574321183e-05,
2423
+ "loss": 1.6941,
2424
+ "step": 86600
2425
+ },
2426
+ {
2427
+ "epoch": 0.12073565290447139,
2428
+ "grad_norm": 0.3531610369682312,
2429
+ "learning_rate": 1.9289303361483284e-05,
2430
+ "loss": 1.7031,
2431
+ "step": 86800
2432
+ },
2433
+ {
2434
+ "epoch": 0.12101384565309921,
2435
+ "grad_norm": 0.4716193377971649,
2436
+ "learning_rate": 1.9286063884295397e-05,
2437
+ "loss": 1.668,
2438
+ "step": 87000
2439
+ },
2440
+ {
2441
+ "epoch": 0.12129203840172702,
2442
+ "grad_norm": 0.35484832525253296,
2443
+ "learning_rate": 1.928281731412259e-05,
2444
+ "loss": 1.7128,
2445
+ "step": 87200
2446
  }
2447
  ],
2448
  "logging_steps": 200,
 
2462
  "attributes": {}
2463
  }
2464
  },
2465
+ "total_flos": 1.7288134689589985e+18,
2466
  "train_batch_size": 2,
2467
  "trial_name": null,
2468
  "trial_params": null