beamaia commited on
Commit
c7be3b7
·
verified ·
1 Parent(s): f2e305b

Training in progress, step 100, checkpoint

Browse files
checkpoint-100/adapter_config.json CHANGED
@@ -20,10 +20,10 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "o_proj",
24
  "q_proj",
 
25
  "v_proj",
26
- "k_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "q_proj",
24
+ "k_proj",
25
  "v_proj",
26
+ "o_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e3a00b2723924481e99ca7420904a3063b73b668f14806f7769bf6279ec38f7
3
  size 6849416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1d26bc61e21d0b93fd9b446c555c662c0dced2ed04a45c6e79b26eeb3ee52e9
3
  size 6849416
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:101b9b6f741ef23eec4c6ec86585ed72f0eb201356b3c71f133178a65ad02838
3
  size 13846266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a3d921e02a11aa9aed101e1624589a67a8ae88f444b6f2746ea10128ca55dc8
3
  size 13846266
checkpoint-100/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:227afea00680bdcdcf19d54b572f61ab2e563bd954561db8d7fee74cde40c145
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:363bcb1976d3d8f69d575a3bb74fad2f79e9d75da57793c889da5e2ae17ef801
3
  size 1064
checkpoint-100/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.47333332896232605,
3
  "best_model_checkpoint": "./zephyr/08-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.18-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-08_ppid_9/checkpoint-100",
4
  "epoch": 0.684931506849315,
5
  "eval_steps": 50,
@@ -10,95 +10,95 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.14,
13
- "grad_norm": 1.8155961925003794e-06,
14
- "learning_rate": 0.00018,
15
- "loss": 0.4616,
16
  "step": 20,
17
- "train/kl": 0.0,
18
- "train/logps/chosen": -384.02016395246477,
19
- "train/logps/rejected": -362.7105863764045,
20
- "train/rewards/chosen": -9.318169392330546,
21
- "train/rewards/margins": -0.6738980560858128,
22
- "train/rewards/rejected": -8.644271336244733
23
  },
24
  {
25
  "epoch": 0.27,
26
- "grad_norm": 0.0,
27
- "learning_rate": 0.00015142857142857143,
28
- "loss": 0.4875,
29
  "step": 40,
30
- "train/kl": 0.0,
31
- "train/logps/chosen": -517.3250200320513,
32
- "train/logps/rejected": -543.1232374237804,
33
- "train/rewards/chosen": -23.67786583533654,
34
- "train/rewards/margins": 1.115628412472148,
35
- "train/rewards/rejected": -24.793494247808688
36
  },
37
  {
38
  "epoch": 0.34,
39
- "eval/kl": 0.0,
40
- "eval/logps/chosen": -559.2342099471831,
41
- "eval/logps/rejected": -549.5850474683544,
42
- "eval/rewards/chosen": -27.557097958846832,
43
- "eval/rewards/margins": 0.9651281091911414,
44
- "eval/rewards/rejected": -28.522226068037973,
45
- "eval_loss": 0.47333332896232605,
46
- "eval_runtime": 140.5206,
47
- "eval_samples_per_second": 2.135,
48
- "eval_steps_per_second": 0.534,
49
  "step": 50
50
  },
51
  {
52
  "epoch": 0.41,
53
- "grad_norm": 0.0,
54
- "learning_rate": 0.00012285714285714287,
55
- "loss": 0.4531,
56
  "step": 60,
57
- "train/kl": 0.0,
58
- "train/logps/chosen": -566.4386314655172,
59
- "train/logps/rejected": -569.3920535714286,
60
- "train/rewards/chosen": -27.128678946659484,
61
- "train/rewards/margins": 0.9857937095905172,
62
- "train/rewards/rejected": -28.11447265625
63
  },
64
  {
65
  "epoch": 0.55,
66
- "grad_norm": 0.0,
67
- "learning_rate": 9.428571428571429e-05,
68
- "loss": 0.475,
69
  "step": 80,
70
- "train/kl": 0.0,
71
- "train/logps/chosen": -543.1592824835526,
72
- "train/logps/rejected": -573.1459728422619,
73
- "train/rewards/chosen": -26.98720189144737,
74
- "train/rewards/margins": 1.380697685375548,
75
- "train/rewards/rejected": -28.367899576822918
76
  },
77
  {
78
  "epoch": 0.68,
79
- "grad_norm": 0.0,
80
- "learning_rate": 6.571428571428571e-05,
81
- "loss": 0.4562,
82
  "step": 100,
83
- "train/kl": 0.0,
84
- "train/logps/chosen": -549.855522260274,
85
- "train/logps/rejected": -568.3944863505748,
86
- "train/rewards/chosen": -27.00509685359589,
87
- "train/rewards/margins": 2.2099236878911768,
88
- "train/rewards/rejected": -29.215020541487068
89
  },
90
  {
91
  "epoch": 0.68,
92
- "eval/kl": 0.0,
93
- "eval/logps/chosen": -562.8151408450705,
94
- "eval/logps/rejected": -553.574119857595,
95
- "eval/rewards/chosen": -27.915193799515844,
96
- "eval/rewards/margins": 1.0059395074461825,
97
- "eval/rewards/rejected": -28.921133306962027,
98
- "eval_loss": 0.47333332896232605,
99
- "eval_runtime": 140.4904,
100
- "eval_samples_per_second": 2.135,
101
- "eval_steps_per_second": 0.534,
102
  "step": 100
103
  }
104
  ],
 
1
  {
2
+ "best_metric": 0.3468475937843323,
3
  "best_model_checkpoint": "./zephyr/08-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.18-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-08_ppid_9/checkpoint-100",
4
  "epoch": 0.684931506849315,
5
  "eval_steps": 50,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.14,
13
+ "grad_norm": 3.3949286937713623,
14
+ "learning_rate": 0.00018142857142857142,
15
+ "loss": 0.4993,
16
  "step": 20,
17
+ "train/kl": 2.0773630142211914,
18
+ "train/logps/chosen": -280.7225560897436,
19
+ "train/logps/rejected": -280.629668445122,
20
+ "train/rewards/chosen": -0.4658302894005409,
21
+ "train/rewards/margins": 0.008156592134686835,
22
+ "train/rewards/rejected": -0.4739868815352277
23
  },
24
  {
25
  "epoch": 0.27,
26
+ "grad_norm": 2.1768083572387695,
27
+ "learning_rate": 0.00015285714285714287,
28
+ "loss": 0.412,
29
  "step": 40,
30
+ "train/kl": 11.56242561340332,
31
+ "train/logps/chosen": -279.2820556640625,
32
+ "train/logps/rejected": -285.233056640625,
33
+ "train/rewards/chosen": 1.6047859191894531,
34
+ "train/rewards/margins": 1.591878777742386,
35
+ "train/rewards/rejected": 0.01290714144706726
36
  },
37
  {
38
  "epoch": 0.34,
39
+ "eval/kl": 34.189842224121094,
40
+ "eval/logps/chosen": -240.1980633802817,
41
+ "eval/logps/rejected": -241.65251681170886,
42
+ "eval/rewards/chosen": 4.346213757152289,
43
+ "eval/rewards/margins": 2.0745202015376725,
44
+ "eval/rewards/rejected": 2.2716935556146165,
45
+ "eval_loss": 0.3987465500831604,
46
+ "eval_runtime": 139.8252,
47
+ "eval_samples_per_second": 2.146,
48
+ "eval_steps_per_second": 0.536,
49
  "step": 50
50
  },
51
  {
52
  "epoch": 0.41,
53
+ "grad_norm": 3.7959017753601074,
54
+ "learning_rate": 0.00012428571428571428,
55
+ "loss": 0.409,
56
  "step": 60,
57
+ "train/kl": 25.960163116455078,
58
+ "train/logps/chosen": -228.58444552951389,
59
+ "train/logps/rejected": -259.24209872159093,
60
+ "train/rewards/chosen": 3.7380718655056424,
61
+ "train/rewards/margins": 2.2719342395512747,
62
+ "train/rewards/rejected": 1.466137625954368
63
  },
64
  {
65
  "epoch": 0.55,
66
+ "grad_norm": 5.033076763153076,
67
+ "learning_rate": 9.571428571428573e-05,
68
+ "loss": 0.3576,
69
  "step": 80,
70
+ "train/kl": 5.953394412994385,
71
+ "train/logps/chosen": -279.7611678685897,
72
+ "train/logps/rejected": -306.2880144817073,
73
+ "train/rewards/chosen": 1.7610896183894231,
74
+ "train/rewards/margins": 3.6376326169126703,
75
+ "train/rewards/rejected": -1.876542998523247
76
  },
77
  {
78
  "epoch": 0.68,
79
+ "grad_norm": 6.184478759765625,
80
+ "learning_rate": 6.714285714285714e-05,
81
+ "loss": 0.3304,
82
  "step": 100,
83
+ "train/kl": 0.5991002321243286,
84
+ "train/logps/chosen": -307.0083233173077,
85
+ "train/logps/rejected": -345.8705592105263,
86
+ "train/rewards/chosen": -1.1401430570162259,
87
+ "train/rewards/margins": 4.161937278098906,
88
+ "train/rewards/rejected": -5.302080335115131
89
  },
90
  {
91
  "epoch": 0.68,
92
+ "eval/kl": 3.7238929271698,
93
+ "eval/logps/chosen": -265.10247029049293,
94
+ "eval/logps/rejected": -277.03555181962025,
95
+ "eval/rewards/chosen": 1.855774355606294,
96
+ "eval/rewards/margins": 3.1223836440754393,
97
+ "eval/rewards/rejected": -1.2666092884691456,
98
+ "eval_loss": 0.3468475937843323,
99
+ "eval_runtime": 139.8048,
100
+ "eval_samples_per_second": 2.146,
101
+ "eval_steps_per_second": 0.536,
102
  "step": 100
103
  }
104
  ],
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7226114c5e2b217740bdcf70fa54f9c5ff75648b1c63ef0cf288d14e4f73b13c
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38ae2e87f29eb2d5b20b8a1633c876370c1a196bac5b23847645ad024eea5fb3
3
  size 5688