ben81828 commited on
Commit
37fdae2
·
verified ·
1 Parent(s): f1f0c4d

Training in progress, step 1700

Browse files
adapter_config.json CHANGED
@@ -19,7 +19,7 @@
19
  "r": 8,
20
  "rank_pattern": {},
21
  "revision": null,
22
- "target_modules": "^(?!.*patch_embed).*(?:fc1|proj|down_proj|fc2|qkv|o_proj|q_proj|k_proj|up_proj|gate_proj|v_proj).*",
23
  "task_type": "CAUSAL_LM",
24
  "use_dora": false,
25
  "use_rslora": false
 
19
  "r": 8,
20
  "rank_pattern": {},
21
  "revision": null,
22
+ "target_modules": "^(?!.*patch_embed).*(?:q_proj|qkv|gate_proj|fc1|up_proj|k_proj|o_proj|fc2|down_proj|v_proj|proj).*",
23
  "task_type": "CAUSAL_LM",
24
  "use_dora": false,
25
  "use_rslora": false
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a1114471ad61d66a88788a2005b7be221cdbe9e2fd3f43e9a2185e89b6fad6f
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2509fd541c293692844871c3c3ef3462376501768f133b9acc8bdb3a34cf0369
3
  size 29034840
trainer_log.jsonl CHANGED
@@ -362,3 +362,25 @@
362
  {"current_steps": 1645, "total_steps": 6770, "loss": 0.326, "lr": 9.016469573730869e-05, "epoch": 0.4859675036927622, "percentage": 24.3, "elapsed_time": "3:14:27", "remaining_time": "10:05:49", "throughput": 1463.76, "total_tokens": 17077904}
363
  {"current_steps": 1650, "total_steps": 6770, "loss": 0.2698, "lr": 9.009183930952836e-05, "epoch": 0.4874446085672083, "percentage": 24.37, "elapsed_time": "3:15:53", "remaining_time": "10:07:50", "throughput": 1457.55, "total_tokens": 17130896}
364
  {"current_steps": 1650, "total_steps": 6770, "eval_loss": 0.40201568603515625, "epoch": 0.4874446085672083, "percentage": 24.37, "elapsed_time": "3:16:12", "remaining_time": "10:08:51", "throughput": 1455.14, "total_tokens": 17130896}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  {"current_steps": 1645, "total_steps": 6770, "loss": 0.326, "lr": 9.016469573730869e-05, "epoch": 0.4859675036927622, "percentage": 24.3, "elapsed_time": "3:14:27", "remaining_time": "10:05:49", "throughput": 1463.76, "total_tokens": 17077904}
363
  {"current_steps": 1650, "total_steps": 6770, "loss": 0.2698, "lr": 9.009183930952836e-05, "epoch": 0.4874446085672083, "percentage": 24.37, "elapsed_time": "3:15:53", "remaining_time": "10:07:50", "throughput": 1457.55, "total_tokens": 17130896}
364
  {"current_steps": 1650, "total_steps": 6770, "eval_loss": 0.40201568603515625, "epoch": 0.4874446085672083, "percentage": 24.37, "elapsed_time": "3:16:12", "remaining_time": "10:08:51", "throughput": 1455.14, "total_tokens": 17130896}
365
+ {"current_steps": 1655, "total_steps": 6770, "loss": 0.2956, "lr": 9.00187436942368e-05, "epoch": 0.48892171344165436, "percentage": 24.45, "elapsed_time": "3:17:46", "remaining_time": "10:11:15", "throughput": 1448.0, "total_tokens": 17182896}
366
+ {"current_steps": 1660, "total_steps": 6770, "loss": 0.3027, "lr": 8.994540932752167e-05, "epoch": 0.49039881831610044, "percentage": 24.52, "elapsed_time": "3:19:14", "remaining_time": "10:13:19", "throughput": 1441.79, "total_tokens": 17235552}
367
+ {"current_steps": 1665, "total_steps": 6770, "loss": 0.3295, "lr": 8.987183664689511e-05, "epoch": 0.4918759231905465, "percentage": 24.59, "elapsed_time": "3:20:43", "remaining_time": "10:15:25", "throughput": 1435.39, "total_tokens": 17286816}
368
+ {"current_steps": 1670, "total_steps": 6770, "loss": 0.3201, "lr": 8.9798026091291e-05, "epoch": 0.4933530280649926, "percentage": 24.67, "elapsed_time": "3:22:11", "remaining_time": "10:17:28", "throughput": 1429.24, "total_tokens": 17339072}
369
+ {"current_steps": 1675, "total_steps": 6770, "loss": 0.3044, "lr": 8.972397810106235e-05, "epoch": 0.4948301329394387, "percentage": 24.74, "elapsed_time": "3:23:40", "remaining_time": "10:19:31", "throughput": 1423.15, "total_tokens": 17391288}
370
+ {"current_steps": 1680, "total_steps": 6770, "loss": 0.2781, "lr": 8.964969311797871e-05, "epoch": 0.4963072378138848, "percentage": 24.82, "elapsed_time": "3:25:07", "remaining_time": "10:21:28", "throughput": 1417.32, "total_tokens": 17443456}
371
+ {"current_steps": 1685, "total_steps": 6770, "loss": 0.423, "lr": 8.957517158522359e-05, "epoch": 0.4977843426883309, "percentage": 24.89, "elapsed_time": "3:26:34", "remaining_time": "10:23:23", "throughput": 1411.54, "total_tokens": 17494832}
372
+ {"current_steps": 1690, "total_steps": 6770, "loss": 0.2747, "lr": 8.950041394739168e-05, "epoch": 0.49926144756277696, "percentage": 24.96, "elapsed_time": "3:28:01", "remaining_time": "10:25:19", "throughput": 1405.84, "total_tokens": 17547384}
373
+ {"current_steps": 1695, "total_steps": 6770, "loss": 0.3162, "lr": 8.942542065048632e-05, "epoch": 0.5007385524372231, "percentage": 25.04, "elapsed_time": "3:29:30", "remaining_time": "10:27:15", "throughput": 1400.09, "total_tokens": 17599120}
374
+ {"current_steps": 1700, "total_steps": 6770, "loss": 0.3904, "lr": 8.935019214191672e-05, "epoch": 0.5022156573116692, "percentage": 25.11, "elapsed_time": "3:30:56", "remaining_time": "10:29:06", "throughput": 1394.6, "total_tokens": 17650984}
375
+ {"current_steps": 1700, "total_steps": 6770, "eval_loss": 0.3297054171562195, "epoch": 0.5022156573116692, "percentage": 25.11, "elapsed_time": "3:31:15", "remaining_time": "10:30:04", "throughput": 1392.48, "total_tokens": 17650984}
376
+ {"current_steps": 1655, "total_steps": 6770, "loss": 0.2956, "lr": 9.00187436942368e-05, "epoch": 0.48892171344165436, "percentage": 24.45, "elapsed_time": "0:02:46", "remaining_time": "0:08:35", "throughput": 103093.65, "total_tokens": 17182896}
377
+ {"current_steps": 1660, "total_steps": 6770, "loss": 0.3027, "lr": 8.994540932752167e-05, "epoch": 0.49039881831610044, "percentage": 24.52, "elapsed_time": "0:04:14", "remaining_time": "0:13:02", "throughput": 67768.5, "total_tokens": 17235552}
378
+ {"current_steps": 1665, "total_steps": 6770, "loss": 0.3295, "lr": 8.987183664689511e-05, "epoch": 0.4918759231905465, "percentage": 24.59, "elapsed_time": "0:05:40", "remaining_time": "0:17:24", "throughput": 50751.01, "total_tokens": 17286816}
379
+ {"current_steps": 1670, "total_steps": 6770, "loss": 0.3201, "lr": 8.9798026091291e-05, "epoch": 0.4933530280649926, "percentage": 24.67, "elapsed_time": "0:07:08", "remaining_time": "0:21:47", "throughput": 40490.2, "total_tokens": 17339072}
380
+ {"current_steps": 1675, "total_steps": 6770, "loss": 0.3044, "lr": 8.972397810106235e-05, "epoch": 0.4948301329394387, "percentage": 24.74, "elapsed_time": "0:08:34", "remaining_time": "0:26:04", "throughput": 33807.84, "total_tokens": 17391288}
381
+ {"current_steps": 1680, "total_steps": 6770, "loss": 0.2781, "lr": 8.964969311797871e-05, "epoch": 0.4963072378138848, "percentage": 24.82, "elapsed_time": "0:10:01", "remaining_time": "0:30:21", "throughput": 29020.42, "total_tokens": 17443456}
382
+ {"current_steps": 1685, "total_steps": 6770, "loss": 0.423, "lr": 8.957517158522359e-05, "epoch": 0.4977843426883309, "percentage": 24.89, "elapsed_time": "0:11:26", "remaining_time": "0:34:31", "throughput": 25485.2, "total_tokens": 17494832}
383
+ {"current_steps": 1690, "total_steps": 6770, "loss": 0.2747, "lr": 8.950041394739168e-05, "epoch": 0.49926144756277696, "percentage": 24.96, "elapsed_time": "0:12:53", "remaining_time": "0:38:45", "throughput": 22678.11, "total_tokens": 17547384}
384
+ {"current_steps": 1695, "total_steps": 6770, "loss": 0.3162, "lr": 8.942542065048632e-05, "epoch": 0.5007385524372231, "percentage": 25.04, "elapsed_time": "0:14:20", "remaining_time": "0:42:55", "throughput": 20457.93, "total_tokens": 17599120}
385
+ {"current_steps": 1700, "total_steps": 6770, "loss": 0.3904, "lr": 8.935019214191672e-05, "epoch": 0.5022156573116692, "percentage": 25.11, "elapsed_time": "0:15:49", "remaining_time": "0:47:11", "throughput": 18591.57, "total_tokens": 17650984}
386
+ {"current_steps": 1700, "total_steps": 6770, "eval_loss": 0.3297054171562195, "epoch": 0.5022156573116692, "percentage": 25.11, "elapsed_time": "0:16:37", "remaining_time": "0:49:34", "throughput": 17697.21, "total_tokens": 17650984}
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfb4b4312b0dec56488d15e8a46a372ea451bb29daae5dc2f31b7c95a1a9b038
3
  size 7480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:157f40a64e5df8b268b803366ba3fce3e3b5e6e2f20d7c81bfc44726f0bee4e8
3
  size 7480