ncbateman commited on
Commit
cc2a9a6
1 Parent(s): 5bd6e9a

Training in progress, step 5, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "gate_proj",
24
- "k_proj",
25
- "o_proj",
26
  "v_proj",
27
  "down_proj",
28
- "q_proj",
29
- "up_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "q_proj",
 
 
24
  "v_proj",
25
  "down_proj",
26
+ "gate_proj",
27
+ "k_proj",
28
+ "up_proj",
29
+ "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df3c4f1ca07d9487eeae4cdd4d179772c9f28050b7c284fecb0261c95d85250f
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d847474f09d76040ea9a945e5aa072ca85a0825970adc2b5fdcc4e94a928b950
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21e4bb0c28036ed1d2f3636f890837574d0b6055c949c9013e872fe6d9858bca
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bc832e1d6d58a296221fd7d502797f542b8623e1ae088266b38b7a5c1d67b17
3
  size 23159290
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a33506d13deb15f4b59817cb1048b61613518f1ef0883689e9f4785b23e81b15
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69dcafec2603b1c1ced4bbd4d60d2848e0b6db84973c4da004925986c6bce1d9
3
  size 14512
last-checkpoint/trainer_state.json CHANGED
@@ -10,7 +10,7 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.010309278350515464,
13
- "grad_norm": 0.7261636853218079,
14
  "learning_rate": 2.0000000000000003e-06,
15
  "loss": 0.9943,
16
  "step": 1
@@ -18,69 +18,69 @@
18
  {
19
  "epoch": 0.010309278350515464,
20
  "eval_loss": 0.9863536357879639,
21
- "eval_runtime": 11.7218,
22
- "eval_samples_per_second": 13.991,
23
- "eval_steps_per_second": 1.792,
24
  "step": 1
25
  },
26
  {
27
  "epoch": 0.020618556701030927,
28
- "grad_norm": 0.6982033252716064,
29
  "learning_rate": 4.000000000000001e-06,
30
  "loss": 0.9017,
31
  "step": 2
32
  },
33
  {
34
  "epoch": 0.020618556701030927,
35
- "eval_loss": 0.9875333905220032,
36
- "eval_runtime": 11.8029,
37
- "eval_samples_per_second": 13.895,
38
- "eval_steps_per_second": 1.779,
39
  "step": 2
40
  },
41
  {
42
  "epoch": 0.030927835051546393,
43
- "grad_norm": 1.0823779106140137,
44
  "learning_rate": 6e-06,
45
- "loss": 1.0974,
46
  "step": 3
47
  },
48
  {
49
  "epoch": 0.030927835051546393,
50
- "eval_loss": 0.9870172142982483,
51
- "eval_runtime": 11.8323,
52
- "eval_samples_per_second": 13.86,
53
- "eval_steps_per_second": 1.775,
54
  "step": 3
55
  },
56
  {
57
  "epoch": 0.041237113402061855,
58
- "grad_norm": 0.6564100980758667,
59
  "learning_rate": 8.000000000000001e-06,
60
- "loss": 0.813,
61
  "step": 4
62
  },
63
  {
64
  "epoch": 0.041237113402061855,
65
- "eval_loss": 0.9877360463142395,
66
- "eval_runtime": 11.8504,
67
- "eval_samples_per_second": 13.839,
68
- "eval_steps_per_second": 1.772,
69
  "step": 4
70
  },
71
  {
72
  "epoch": 0.05154639175257732,
73
- "grad_norm": 0.8452497720718384,
74
  "learning_rate": 1e-05,
75
- "loss": 0.9212,
76
  "step": 5
77
  },
78
  {
79
  "epoch": 0.05154639175257732,
80
- "eval_loss": 0.9856494665145874,
81
- "eval_runtime": 11.8911,
82
- "eval_samples_per_second": 13.792,
83
- "eval_steps_per_second": 1.766,
84
  "step": 5
85
  }
86
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.010309278350515464,
13
+ "grad_norm": 0.7860156893730164,
14
  "learning_rate": 2.0000000000000003e-06,
15
  "loss": 0.9943,
16
  "step": 1
 
18
  {
19
  "epoch": 0.010309278350515464,
20
  "eval_loss": 0.9863536357879639,
21
+ "eval_runtime": 11.8025,
22
+ "eval_samples_per_second": 13.895,
23
+ "eval_steps_per_second": 1.779,
24
  "step": 1
25
  },
26
  {
27
  "epoch": 0.020618556701030927,
28
+ "grad_norm": 0.7550910115242004,
29
  "learning_rate": 4.000000000000001e-06,
30
  "loss": 0.9017,
31
  "step": 2
32
  },
33
  {
34
  "epoch": 0.020618556701030927,
35
+ "eval_loss": 0.9886725544929504,
36
+ "eval_runtime": 11.9125,
37
+ "eval_samples_per_second": 13.767,
38
+ "eval_steps_per_second": 1.763,
39
  "step": 2
40
  },
41
  {
42
  "epoch": 0.030927835051546393,
43
+ "grad_norm": 1.1944996118545532,
44
  "learning_rate": 6e-06,
45
+ "loss": 1.1019,
46
  "step": 3
47
  },
48
  {
49
  "epoch": 0.030927835051546393,
50
+ "eval_loss": 0.9872242212295532,
51
+ "eval_runtime": 11.8973,
52
+ "eval_samples_per_second": 13.785,
53
+ "eval_steps_per_second": 1.765,
54
  "step": 3
55
  },
56
  {
57
  "epoch": 0.041237113402061855,
58
+ "grad_norm": 0.7197827696800232,
59
  "learning_rate": 8.000000000000001e-06,
60
+ "loss": 0.8137,
61
  "step": 4
62
  },
63
  {
64
  "epoch": 0.041237113402061855,
65
+ "eval_loss": 0.9864019751548767,
66
+ "eval_runtime": 11.966,
67
+ "eval_samples_per_second": 13.705,
68
+ "eval_steps_per_second": 1.755,
69
  "step": 4
70
  },
71
  {
72
  "epoch": 0.05154639175257732,
73
+ "grad_norm": 0.9193218946456909,
74
  "learning_rate": 1e-05,
75
+ "loss": 0.9198,
76
  "step": 5
77
  },
78
  {
79
  "epoch": 0.05154639175257732,
80
+ "eval_loss": 0.984832227230072,
81
+ "eval_runtime": 11.8502,
82
+ "eval_samples_per_second": 13.839,
83
+ "eval_steps_per_second": 1.772,
84
  "step": 5
85
  }
86
  ],
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7aab7569b3377c35eb78eba184a0601d7c745483ceeee6fb9509a33c18bb0950
3
  size 6712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d97c4fa40f12b62671931897af867d0478f2095eee9e2b8b99df811f0daadbfa
3
  size 6712