iamnguyen commited on
Commit
08ec9d8
·
verified ·
1 Parent(s): 61073e4

Training in progress, step 2480, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -23,10 +23,10 @@
23
  "down_proj",
24
  "q_proj",
25
  "up_proj",
26
- "v_proj",
27
  "k_proj",
28
- "gate_proj",
29
- "o_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
23
  "down_proj",
24
  "q_proj",
25
  "up_proj",
 
26
  "k_proj",
27
+ "o_proj",
28
+ "v_proj",
29
+ "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e296b9bb163171f7d3292e7f975cd8851abb7f06f131d59f3fc27ddc0971e89
3
  size 147770496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d2c33d436bbae09929b10f741d360ab726e726a94e1c65395877fddd4f9e454
3
  size 147770496
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2409c85204b25b0c68db7e75cff61155bb985b9ed5e78015f6ae41839de13b08
3
  size 75455810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5011f00bab7f837ab30278bc7c4e1335b56b3448a40f1f394154b2d06a99d72e
3
  size 75455810
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:477febd7eab5cb9a16378af748559d02a17dca77e420484aefc4f371d4457c5a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23b6619ef6832a5ad150938e82c728b260a07dd3b3ae733f6bc9ea6d005dd4b4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c588c84c1abcb13cdaadd37cc81933f6bbcbd611f3b25ba28f6d4d519a813632
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fc524921ad40def3d582e1dca3b700998a5e2a41bf8de3461800594bda41107
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9593863375960551,
5
  "eval_steps": 500,
6
- "global_step": 2476,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -17339,6 +17339,34 @@
17339
  "learning_rate": 4.170111648909736e-08,
17340
  "loss": 1.3903,
17341
  "step": 2476
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17342
  }
17343
  ],
17344
  "logging_steps": 1.0,
@@ -17358,7 +17386,7 @@
17358
  "attributes": {}
17359
  }
17360
  },
17361
- "total_flos": 2.448841409909757e+18,
17362
  "train_batch_size": 1,
17363
  "trial_name": null,
17364
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9609362347488758,
5
  "eval_steps": 500,
6
+ "global_step": 2480,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
17339
  "learning_rate": 4.170111648909736e-08,
17340
  "loss": 1.3903,
17341
  "step": 2476
17342
+ },
17343
+ {
17344
+ "epoch": 0.9597738118842603,
17345
+ "grad_norm": 0.20792566239833832,
17346
+ "learning_rate": 4.090411741682565e-08,
17347
+ "loss": 1.3801,
17348
+ "step": 2477
17349
+ },
17350
+ {
17351
+ "epoch": 0.9601612861724654,
17352
+ "grad_norm": 0.19542264938354492,
17353
+ "learning_rate": 4.0114776921067465e-08,
17354
+ "loss": 1.4094,
17355
+ "step": 2478
17356
+ },
17357
+ {
17358
+ "epoch": 0.9605487604606706,
17359
+ "grad_norm": 0.21897444128990173,
17360
+ "learning_rate": 3.933309622084103e-08,
17361
+ "loss": 1.4193,
17362
+ "step": 2479
17363
+ },
17364
+ {
17365
+ "epoch": 0.9609362347488758,
17366
+ "grad_norm": 0.20047014951705933,
17367
+ "learning_rate": 3.855907652333402e-08,
17368
+ "loss": 1.3831,
17369
+ "step": 2480
17370
  }
17371
  ],
17372
  "logging_steps": 1.0,
 
17386
  "attributes": {}
17387
  }
17388
  },
17389
+ "total_flos": 2.452775401006673e+18,
17390
  "train_batch_size": 1,
17391
  "trial_name": null,
17392
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76c56280507313f331f5ebcc5db348eedc6cc0a045fc5f4c2b79d99b9c533ba6
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9396e82ae7c78d167148ae12181b033e8cac56f246093dcb3425d034c307f798
3
  size 5560