ben81828 commited on
Commit
cd5eda0
·
verified ·
1 Parent(s): 2b7564d

Training in progress, step 1201

Browse files
adapter_config.json CHANGED
@@ -19,7 +19,7 @@
19
  "r": 8,
20
  "rank_pattern": {},
21
  "revision": null,
22
- "target_modules": "^(?!.*patch_embed).*(?:fc1|fc2|down_proj|proj|qkv|o_proj|q_proj|gate_proj|v_proj|k_proj|up_proj).*",
23
  "task_type": "CAUSAL_LM",
24
  "use_dora": false,
25
  "use_rslora": false
 
19
  "r": 8,
20
  "rank_pattern": {},
21
  "revision": null,
22
+ "target_modules": "^(?!.*patch_embed).*(?:gate_proj|qkv|q_proj|fc1|fc2|up_proj|v_proj|down_proj|k_proj|proj|o_proj).*",
23
  "task_type": "CAUSAL_LM",
24
  "use_dora": false,
25
  "use_rslora": false
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:419197c7b895b88c7afacbe056bbf0e1d22ab3d4ba5caafffba674acfa991533
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8ee2634589442880539cea4215aeae5e195f22dea93b6441497472981b36691
3
  size 29034840
trainer_log.jsonl CHANGED
@@ -262,3 +262,8 @@
262
  {"current_steps": 1195, "total_steps": 3400, "loss": 0.0007, "lr": 7.714352216101055e-05, "epoch": 0.6155034766932784, "percentage": 35.15, "elapsed_time": "5:54:05", "remaining_time": "10:53:21", "throughput": 561.58, "total_tokens": 11930880}
263
  {"current_steps": 1200, "total_steps": 3400, "loss": 0.0038, "lr": 7.693899446759727e-05, "epoch": 0.6180788050476436, "percentage": 35.29, "elapsed_time": "5:55:30", "remaining_time": "10:51:46", "throughput": 561.67, "total_tokens": 11980800}
264
  {"current_steps": 1200, "total_steps": 3400, "eval_loss": 0.0022160401567816734, "epoch": 0.6180788050476436, "percentage": 35.29, "elapsed_time": "5:55:49", "remaining_time": "10:52:20", "throughput": 561.17, "total_tokens": 11980800}
 
 
 
 
 
 
262
  {"current_steps": 1195, "total_steps": 3400, "loss": 0.0007, "lr": 7.714352216101055e-05, "epoch": 0.6155034766932784, "percentage": 35.15, "elapsed_time": "5:54:05", "remaining_time": "10:53:21", "throughput": 561.58, "total_tokens": 11930880}
263
  {"current_steps": 1200, "total_steps": 3400, "loss": 0.0038, "lr": 7.693899446759727e-05, "epoch": 0.6180788050476436, "percentage": 35.29, "elapsed_time": "5:55:30", "remaining_time": "10:51:46", "throughput": 561.67, "total_tokens": 11980800}
264
  {"current_steps": 1200, "total_steps": 3400, "eval_loss": 0.0022160401567816734, "epoch": 0.6180788050476436, "percentage": 35.29, "elapsed_time": "5:55:49", "remaining_time": "10:52:20", "throughput": 561.17, "total_tokens": 11980800}
265
+ {"current_steps": 1205, "total_steps": 3400, "loss": 0.0002, "lr": 7.673382966299163e-05, "epoch": 0.6206541334020087, "percentage": 35.44, "elapsed_time": "5:57:20", "remaining_time": "10:50:55", "throughput": 561.12, "total_tokens": 12030720}
266
+ {"current_steps": 1210, "total_steps": 3400, "loss": 0.0262, "lr": 7.65280325993715e-05, "epoch": 0.623229461756374, "percentage": 35.59, "elapsed_time": "5:58:47", "remaining_time": "10:49:22", "throughput": 561.18, "total_tokens": 12080640}
267
+ {"current_steps": 1215, "total_steps": 3400, "loss": 0.0403, "lr": 7.63216081438678e-05, "epoch": 0.6258047901107391, "percentage": 35.74, "elapsed_time": "6:00:13", "remaining_time": "10:47:48", "throughput": 561.25, "total_tokens": 12130560}
268
+ {"current_steps": 1220, "total_steps": 3400, "loss": 0.002, "lr": 7.611456117844934e-05, "epoch": 0.6283801184651043, "percentage": 35.88, "elapsed_time": "6:01:39", "remaining_time": "10:46:14", "throughput": 561.32, "total_tokens": 12180480}
269
+ {"current_steps": 1201, "total_steps": 1200, "epoch": 0.6185938707185166, "percentage": 100.08, "elapsed_time": "0:01:42", "remaining_time": "0:00:00", "throughput": 117150.6, "total_tokens": 11990784}
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3dd22ff474a0bb22d03021b1610996d5087b23b263bba9bcd763538291569b0e
3
- size 7352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d805df10b9aa9257d71d396f9c297d6e4b0be2e8da8eecef0b7efe313a60a238
3
+ size 7416