just1nseo commited on
Commit
804d272
·
verified ·
1 Parent(s): 267f62a

Model save

Browse files
README.md ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ tags:
4
+ - trl
5
+ - dpo
6
+ - generated_from_trainer
7
+ base_model: allenai/tulu-2-7b
8
+ model-index:
9
+ - name: tulu2-7b-cost-UI-both-5e-7
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # tulu2-7b-cost-UI-both-5e-7
17
+
18
+ This model is a fine-tuned version of [allenai/tulu-2-7b](https://huggingface.co/allenai/tulu-2-7b) on the None dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.6928
21
+ - Rewards/chosen: -0.0002
22
+ - Rewards/rejected: -0.0001
23
+ - Rewards/accuracies: 0.5159
24
+ - Rewards/margins: -0.0001
25
+ - Rewards/margins Max: 0.0190
26
+ - Rewards/margins Min: -0.0190
27
+ - Rewards/margins Std: 0.0125
28
+ - Logps/rejected: -338.8891
29
+ - Logps/chosen: -345.6230
30
+ - Logits/rejected: 0.8420
31
+ - Logits/chosen: 0.7124
32
+
33
+ ## Model description
34
+
35
+ More information needed
36
+
37
+ ## Intended uses & limitations
38
+
39
+ More information needed
40
+
41
+ ## Training and evaluation data
42
+
43
+ More information needed
44
+
45
+ ## Training procedure
46
+
47
+ ### Training hyperparameters
48
+
49
+ The following hyperparameters were used during training:
50
+ - learning_rate: 5e-07
51
+ - train_batch_size: 2
52
+ - eval_batch_size: 8
53
+ - seed: 42
54
+ - distributed_type: multi-GPU
55
+ - num_devices: 8
56
+ - gradient_accumulation_steps: 4
57
+ - total_train_batch_size: 64
58
+ - total_eval_batch_size: 64
59
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
60
+ - lr_scheduler_type: cosine
61
+ - lr_scheduler_warmup_ratio: 0.1
62
+ - num_epochs: 1
63
+
64
+ ### Training results
65
+
66
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Rewards/margins Max | Rewards/margins Min | Rewards/margins Std | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
67
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:-------------------:|:-------------------:|:-------------------:|:--------------:|:------------:|:---------------:|:-------------:|
68
+ | 0.6764 | 1.0 | 289 | 0.6928 | -0.0002 | -0.0001 | 0.5159 | -0.0001 | 0.0190 | -0.0190 | 0.0125 | -338.8891 | -345.6230 | 0.8420 | 0.7124 |
69
+
70
+
71
+ ### Framework versions
72
+
73
+ - PEFT 0.7.1
74
+ - Transformers 4.39.0.dev0
75
+ - Pytorch 2.1.2+cu121
76
+ - Datasets 2.14.6
77
+ - Tokenizers 0.15.2
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51745a2e0ce4dd3e2b9e87dfbad689d8102b51e429b81ecd8335090d5e1808de
3
  size 639692768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86739c0322f6b1ea5b274578df67afec4d4aa82c2927f6a3add22ee23935c015
3
  size 639692768
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.6824996727148142,
4
+ "train_runtime": 3006.8067,
5
+ "train_samples": 18509,
6
+ "train_samples_per_second": 6.156,
7
+ "train_steps_per_second": 0.096
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.6824996727148142,
4
+ "train_runtime": 3006.8067,
5
+ "train_samples": 18509,
6
+ "train_samples_per_second": 6.156,
7
+ "train_steps_per_second": 0.096
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,571 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9991356957649092,
5
+ "eval_steps": 100,
6
+ "global_step": 289,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "grad_norm": 0.2373046875,
14
+ "learning_rate": 1.7241379310344825e-08,
15
+ "logits/chosen": -0.1198696494102478,
16
+ "logits/rejected": 0.2811677157878876,
17
+ "logps/chosen": -217.16847229003906,
18
+ "logps/rejected": -155.17271423339844,
19
+ "loss": 0.6931,
20
+ "rewards/accuracies": 0.0,
21
+ "rewards/chosen": 0.0,
22
+ "rewards/margins": 0.0,
23
+ "rewards/margins_max": 0.0,
24
+ "rewards/margins_min": 0.0,
25
+ "rewards/margins_std": 0.0,
26
+ "rewards/rejected": 0.0,
27
+ "step": 1
28
+ },
29
+ {
30
+ "epoch": 0.03,
31
+ "grad_norm": 0.248046875,
32
+ "learning_rate": 1.7241379310344828e-07,
33
+ "logits/chosen": -0.13907119631767273,
34
+ "logits/rejected": 0.10448222607374191,
35
+ "logps/chosen": -224.61839294433594,
36
+ "logps/rejected": -217.18283081054688,
37
+ "loss": 0.6932,
38
+ "rewards/accuracies": 0.4027777910232544,
39
+ "rewards/chosen": -0.00020672479877248406,
40
+ "rewards/margins": -0.0004321257583796978,
41
+ "rewards/margins_max": 0.0018353578634560108,
42
+ "rewards/margins_min": -0.0026996093802154064,
43
+ "rewards/margins_std": 0.0032067059073597193,
44
+ "rewards/rejected": 0.00022540091595146805,
45
+ "step": 10
46
+ },
47
+ {
48
+ "epoch": 0.07,
49
+ "grad_norm": 0.2353515625,
50
+ "learning_rate": 3.4482758620689656e-07,
51
+ "logits/chosen": -0.19345124065876007,
52
+ "logits/rejected": 0.05696944519877434,
53
+ "logps/chosen": -191.2423553466797,
54
+ "logps/rejected": -176.5889434814453,
55
+ "loss": 0.6929,
56
+ "rewards/accuracies": 0.6000000238418579,
57
+ "rewards/chosen": -2.7165409846929833e-05,
58
+ "rewards/margins": 0.0008886352297849953,
59
+ "rewards/margins_max": 0.002847478026524186,
60
+ "rewards/margins_min": -0.0010702075669541955,
61
+ "rewards/margins_std": 0.0027702220249921083,
62
+ "rewards/rejected": -0.0009158005705103278,
63
+ "step": 20
64
+ },
65
+ {
66
+ "epoch": 0.1,
67
+ "grad_norm": 0.265625,
68
+ "learning_rate": 4.999817502139027e-07,
69
+ "logits/chosen": -0.19917765259742737,
70
+ "logits/rejected": 0.08285371214151382,
71
+ "logps/chosen": -247.91159057617188,
72
+ "logps/rejected": -218.81246948242188,
73
+ "loss": 0.6922,
74
+ "rewards/accuracies": 0.75,
75
+ "rewards/chosen": 0.0009048490901477635,
76
+ "rewards/margins": 0.0024331805761903524,
77
+ "rewards/margins_max": 0.005544544197618961,
78
+ "rewards/margins_min": -0.0006781836273148656,
79
+ "rewards/margins_std": 0.004400133155286312,
80
+ "rewards/rejected": -0.0015283313114196062,
81
+ "step": 30
82
+ },
83
+ {
84
+ "epoch": 0.14,
85
+ "grad_norm": 0.267578125,
86
+ "learning_rate": 4.977949980164773e-07,
87
+ "logits/chosen": -0.05700277164578438,
88
+ "logits/rejected": 0.11779887974262238,
89
+ "logps/chosen": -193.61355590820312,
90
+ "logps/rejected": -203.84695434570312,
91
+ "loss": 0.6914,
92
+ "rewards/accuracies": 0.7875000238418579,
93
+ "rewards/chosen": 0.0017391880974173546,
94
+ "rewards/margins": 0.004240007139742374,
95
+ "rewards/margins_max": 0.008039236068725586,
96
+ "rewards/margins_min": 0.0004407777450978756,
97
+ "rewards/margins_std": 0.005372921004891396,
98
+ "rewards/rejected": -0.0025008185766637325,
99
+ "step": 40
100
+ },
101
+ {
102
+ "epoch": 0.17,
103
+ "grad_norm": 0.2314453125,
104
+ "learning_rate": 4.919948367622307e-07,
105
+ "logits/chosen": -0.1814556121826172,
106
+ "logits/rejected": 0.02914687618613243,
107
+ "logps/chosen": -220.93115234375,
108
+ "logps/rejected": -223.7554168701172,
109
+ "loss": 0.6902,
110
+ "rewards/accuracies": 0.8500000238418579,
111
+ "rewards/chosen": 0.0025483686476945877,
112
+ "rewards/margins": 0.0058803861029446125,
113
+ "rewards/margins_max": 0.009798353537917137,
114
+ "rewards/margins_min": 0.0019624175038188696,
115
+ "rewards/margins_std": 0.005540843587368727,
116
+ "rewards/rejected": -0.003332017455250025,
117
+ "step": 50
118
+ },
119
+ {
120
+ "epoch": 0.21,
121
+ "grad_norm": 0.2275390625,
122
+ "learning_rate": 4.826658458630755e-07,
123
+ "logits/chosen": -0.17724668979644775,
124
+ "logits/rejected": 0.1624217927455902,
125
+ "logps/chosen": -226.7654571533203,
126
+ "logps/rejected": -178.1088104248047,
127
+ "loss": 0.6892,
128
+ "rewards/accuracies": 0.925000011920929,
129
+ "rewards/chosen": 0.004519807640463114,
130
+ "rewards/margins": 0.008648158982396126,
131
+ "rewards/margins_max": 0.013417336158454418,
132
+ "rewards/margins_min": 0.0038789804093539715,
133
+ "rewards/margins_std": 0.006744635757058859,
134
+ "rewards/rejected": -0.004128350876271725,
135
+ "step": 60
136
+ },
137
+ {
138
+ "epoch": 0.24,
139
+ "grad_norm": 0.248046875,
140
+ "learning_rate": 4.699440630133794e-07,
141
+ "logits/chosen": -0.15689274668693542,
142
+ "logits/rejected": 0.0655246376991272,
143
+ "logps/chosen": -205.931884765625,
144
+ "logps/rejected": -213.77328491210938,
145
+ "loss": 0.6879,
146
+ "rewards/accuracies": 0.862500011920929,
147
+ "rewards/chosen": 0.004349945578724146,
148
+ "rewards/margins": 0.010797133669257164,
149
+ "rewards/margins_max": 0.016608919948339462,
150
+ "rewards/margins_min": 0.004985347390174866,
151
+ "rewards/margins_std": 0.008219106122851372,
152
+ "rewards/rejected": -0.006447188556194305,
153
+ "step": 70
154
+ },
155
+ {
156
+ "epoch": 0.28,
157
+ "grad_norm": 0.267578125,
158
+ "learning_rate": 4.5401500045405117e-07,
159
+ "logits/chosen": -0.13238832354545593,
160
+ "logits/rejected": 0.1644008904695511,
161
+ "logps/chosen": -232.31631469726562,
162
+ "logps/rejected": -199.2275390625,
163
+ "loss": 0.6865,
164
+ "rewards/accuracies": 0.887499988079071,
165
+ "rewards/chosen": 0.004281439818441868,
166
+ "rewards/margins": 0.012644032016396523,
167
+ "rewards/margins_max": 0.01821981742978096,
168
+ "rewards/margins_min": 0.007068246603012085,
169
+ "rewards/margins_std": 0.007885349914431572,
170
+ "rewards/rejected": -0.00836259126663208,
171
+ "step": 80
172
+ },
173
+ {
174
+ "epoch": 0.31,
175
+ "grad_norm": 0.244140625,
176
+ "learning_rate": 4.3511093978633086e-07,
177
+ "logits/chosen": -0.2108915150165558,
178
+ "logits/rejected": -0.023733098059892654,
179
+ "logps/chosen": -223.54336547851562,
180
+ "logps/rejected": -231.7197265625,
181
+ "loss": 0.6859,
182
+ "rewards/accuracies": 0.887499988079071,
183
+ "rewards/chosen": 0.006567983888089657,
184
+ "rewards/margins": 0.016123134642839432,
185
+ "rewards/margins_max": 0.02399933896958828,
186
+ "rewards/margins_min": 0.008246931247413158,
187
+ "rewards/margins_std": 0.011138634756207466,
188
+ "rewards/rejected": -0.009555150754749775,
189
+ "step": 90
190
+ },
191
+ {
192
+ "epoch": 0.35,
193
+ "grad_norm": 0.2451171875,
194
+ "learning_rate": 4.135075447829911e-07,
195
+ "logits/chosen": -0.13402007520198822,
196
+ "logits/rejected": 0.23311862349510193,
197
+ "logps/chosen": -242.7605438232422,
198
+ "logps/rejected": -215.2969970703125,
199
+ "loss": 0.6846,
200
+ "rewards/accuracies": 0.9125000238418579,
201
+ "rewards/chosen": 0.006707096938043833,
202
+ "rewards/margins": 0.020004788413643837,
203
+ "rewards/margins_max": 0.03050144948065281,
204
+ "rewards/margins_min": 0.009508123621344566,
205
+ "rewards/margins_std": 0.01484452374279499,
206
+ "rewards/rejected": -0.013297691941261292,
207
+ "step": 100
208
+ },
209
+ {
210
+ "epoch": 0.38,
211
+ "grad_norm": 0.240234375,
212
+ "learning_rate": 3.895198415897896e-07,
213
+ "logits/chosen": -0.10960109531879425,
214
+ "logits/rejected": 0.09310563653707504,
215
+ "logps/chosen": -215.31460571289062,
216
+ "logps/rejected": -208.97097778320312,
217
+ "loss": 0.684,
218
+ "rewards/accuracies": 0.887499988079071,
219
+ "rewards/chosen": 0.007017076015472412,
220
+ "rewards/margins": 0.01683773659169674,
221
+ "rewards/margins_max": 0.026084523648023605,
222
+ "rewards/margins_min": 0.007590950932353735,
223
+ "rewards/margins_std": 0.01307692937552929,
224
+ "rewards/rejected": -0.009820659644901752,
225
+ "step": 110
226
+ },
227
+ {
228
+ "epoch": 0.41,
229
+ "grad_norm": 0.2451171875,
230
+ "learning_rate": 3.634976249348867e-07,
231
+ "logits/chosen": -0.13226808607578278,
232
+ "logits/rejected": 0.15200194716453552,
233
+ "logps/chosen": -242.24966430664062,
234
+ "logps/rejected": -212.1212921142578,
235
+ "loss": 0.6826,
236
+ "rewards/accuracies": 0.824999988079071,
237
+ "rewards/chosen": 0.006220725364983082,
238
+ "rewards/margins": 0.01998782530426979,
239
+ "rewards/margins_max": 0.03047138825058937,
240
+ "rewards/margins_min": 0.009504261426627636,
241
+ "rewards/margins_std": 0.014825996942818165,
242
+ "rewards/rejected": -0.013767099007964134,
243
+ "step": 120
244
+ },
245
+ {
246
+ "epoch": 0.45,
247
+ "grad_norm": 0.2353515625,
248
+ "learning_rate": 3.358203573340396e-07,
249
+ "logits/chosen": -0.15161535143852234,
250
+ "logits/rejected": 0.06725052744150162,
251
+ "logps/chosen": -220.3600311279297,
252
+ "logps/rejected": -238.15267944335938,
253
+ "loss": 0.6817,
254
+ "rewards/accuracies": 0.875,
255
+ "rewards/chosen": 0.00594148738309741,
256
+ "rewards/margins": 0.02438289485871792,
257
+ "rewards/margins_max": 0.03674236685037613,
258
+ "rewards/margins_min": 0.012023425661027431,
259
+ "rewards/margins_std": 0.017478929832577705,
260
+ "rewards/rejected": -0.01844140887260437,
261
+ "step": 130
262
+ },
263
+ {
264
+ "epoch": 0.48,
265
+ "grad_norm": 0.2373046875,
266
+ "learning_rate": 3.0689163567264746e-07,
267
+ "logits/chosen": -0.1485133320093155,
268
+ "logits/rejected": 0.12535016238689423,
269
+ "logps/chosen": -246.51065063476562,
270
+ "logps/rejected": -233.1022186279297,
271
+ "loss": 0.6808,
272
+ "rewards/accuracies": 0.875,
273
+ "rewards/chosen": 0.009529241360723972,
274
+ "rewards/margins": 0.021075651049613953,
275
+ "rewards/margins_max": 0.03406635671854019,
276
+ "rewards/margins_min": 0.008084945380687714,
277
+ "rewards/margins_std": 0.01837163418531418,
278
+ "rewards/rejected": -0.011546410620212555,
279
+ "step": 140
280
+ },
281
+ {
282
+ "epoch": 0.52,
283
+ "grad_norm": 0.2470703125,
284
+ "learning_rate": 2.771333058543416e-07,
285
+ "logits/chosen": -0.0800982341170311,
286
+ "logits/rejected": 0.1670171320438385,
287
+ "logps/chosen": -229.39120483398438,
288
+ "logps/rejected": -211.8321990966797,
289
+ "loss": 0.6804,
290
+ "rewards/accuracies": 0.887499988079071,
291
+ "rewards/chosen": 0.010588416829705238,
292
+ "rewards/margins": 0.026508808135986328,
293
+ "rewards/margins_max": 0.043856311589479446,
294
+ "rewards/margins_min": 0.009161303751170635,
295
+ "rewards/margins_std": 0.024533074349164963,
296
+ "rewards/rejected": -0.01592039130628109,
297
+ "step": 150
298
+ },
299
+ {
300
+ "epoch": 0.55,
301
+ "grad_norm": 0.482421875,
302
+ "learning_rate": 2.469793113377957e-07,
303
+ "logits/chosen": -0.12969347834587097,
304
+ "logits/rejected": 0.2130148708820343,
305
+ "logps/chosen": -207.0413818359375,
306
+ "logps/rejected": -174.50192260742188,
307
+ "loss": 0.6789,
308
+ "rewards/accuracies": 0.8999999761581421,
309
+ "rewards/chosen": 0.009559371508657932,
310
+ "rewards/margins": 0.02586180530488491,
311
+ "rewards/margins_max": 0.04229948669672012,
312
+ "rewards/margins_min": 0.009424128569662571,
313
+ "rewards/margins_std": 0.023246387019753456,
314
+ "rewards/rejected": -0.016302434727549553,
315
+ "step": 160
316
+ },
317
+ {
318
+ "epoch": 0.59,
319
+ "grad_norm": 0.2265625,
320
+ "learning_rate": 2.1686936526394318e-07,
321
+ "logits/chosen": -0.1643756926059723,
322
+ "logits/rejected": 0.09980125725269318,
323
+ "logps/chosen": -232.63479614257812,
324
+ "logps/rejected": -222.15707397460938,
325
+ "loss": 0.6785,
326
+ "rewards/accuracies": 0.8999999761581421,
327
+ "rewards/chosen": 0.009239903651177883,
328
+ "rewards/margins": 0.031400762498378754,
329
+ "rewards/margins_max": 0.04692839831113815,
330
+ "rewards/margins_min": 0.015873130410909653,
331
+ "rewards/margins_std": 0.021959390491247177,
332
+ "rewards/rejected": -0.022160857915878296,
333
+ "step": 170
334
+ },
335
+ {
336
+ "epoch": 0.62,
337
+ "grad_norm": 0.306640625,
338
+ "learning_rate": 1.8724253844823456e-07,
339
+ "logits/chosen": -0.10444211959838867,
340
+ "logits/rejected": 0.11585960537195206,
341
+ "logps/chosen": -217.49295043945312,
342
+ "logps/rejected": -236.1336212158203,
343
+ "loss": 0.6789,
344
+ "rewards/accuracies": 0.925000011920929,
345
+ "rewards/chosen": 0.009765163995325565,
346
+ "rewards/margins": 0.02891402505338192,
347
+ "rewards/margins_max": 0.04084194451570511,
348
+ "rewards/margins_min": 0.016986116766929626,
349
+ "rewards/margins_std": 0.01686861552298069,
350
+ "rewards/rejected": -0.019148865714669228,
351
+ "step": 180
352
+ },
353
+ {
354
+ "epoch": 0.66,
355
+ "grad_norm": 0.2451171875,
356
+ "learning_rate": 1.5853085673944694e-07,
357
+ "logits/chosen": -0.21767011284828186,
358
+ "logits/rejected": 0.03656459227204323,
359
+ "logps/chosen": -205.89724731445312,
360
+ "logps/rejected": -195.54986572265625,
361
+ "loss": 0.677,
362
+ "rewards/accuracies": 0.8999999761581421,
363
+ "rewards/chosen": 0.009529804810881615,
364
+ "rewards/margins": 0.034868255257606506,
365
+ "rewards/margins_max": 0.05301555246114731,
366
+ "rewards/margins_min": 0.016720956191420555,
367
+ "rewards/margins_std": 0.025664156302809715,
368
+ "rewards/rejected": -0.02533845044672489,
369
+ "step": 190
370
+ },
371
+ {
372
+ "epoch": 0.69,
373
+ "grad_norm": 0.2412109375,
374
+ "learning_rate": 1.3115300110997096e-07,
375
+ "logits/chosen": -0.12012083828449249,
376
+ "logits/rejected": 0.10849084705114365,
377
+ "logps/chosen": -219.48818969726562,
378
+ "logps/rejected": -228.03585815429688,
379
+ "loss": 0.6777,
380
+ "rewards/accuracies": 0.9125000238418579,
381
+ "rewards/chosen": 0.007625420577824116,
382
+ "rewards/margins": 0.030659427866339684,
383
+ "rewards/margins_max": 0.044585634022951126,
384
+ "rewards/margins_min": 0.01673322357237339,
385
+ "rewards/margins_std": 0.019694630056619644,
386
+ "rewards/rejected": -0.023034008219838142,
387
+ "step": 200
388
+ },
389
+ {
390
+ "epoch": 0.73,
391
+ "grad_norm": 0.2236328125,
392
+ "learning_rate": 1.0550820234444626e-07,
393
+ "logits/chosen": -0.13823586702346802,
394
+ "logits/rejected": 0.07014124095439911,
395
+ "logps/chosen": -209.46163940429688,
396
+ "logps/rejected": -220.68325805664062,
397
+ "loss": 0.6771,
398
+ "rewards/accuracies": 0.949999988079071,
399
+ "rewards/chosen": 0.010470154695212841,
400
+ "rewards/margins": 0.03657643496990204,
401
+ "rewards/margins_max": 0.050303250551223755,
402
+ "rewards/margins_min": 0.02284962125122547,
403
+ "rewards/margins_std": 0.019412647932767868,
404
+ "rewards/rejected": -0.026106279343366623,
405
+ "step": 210
406
+ },
407
+ {
408
+ "epoch": 0.76,
409
+ "grad_norm": 0.244140625,
410
+ "learning_rate": 8.197041935593179e-08,
411
+ "logits/chosen": -0.15651056170463562,
412
+ "logits/rejected": 0.06445064395666122,
413
+ "logps/chosen": -212.4806671142578,
414
+ "logps/rejected": -226.1749267578125,
415
+ "loss": 0.6765,
416
+ "rewards/accuracies": 0.925000011920929,
417
+ "rewards/chosen": 0.00890127569437027,
418
+ "rewards/margins": 0.035758793354034424,
419
+ "rewards/margins_max": 0.053012482821941376,
420
+ "rewards/margins_min": 0.01850510574877262,
421
+ "rewards/margins_std": 0.02440040186047554,
422
+ "rewards/rejected": -0.026857519522309303,
423
+ "step": 220
424
+ },
425
+ {
426
+ "epoch": 0.8,
427
+ "grad_norm": 0.2333984375,
428
+ "learning_rate": 6.088288602287158e-08,
429
+ "logits/chosen": -0.19696348905563354,
430
+ "logits/rejected": -0.03903040662407875,
431
+ "logps/chosen": -213.6901397705078,
432
+ "logps/rejected": -221.56021118164062,
433
+ "loss": 0.6783,
434
+ "rewards/accuracies": 0.887499988079071,
435
+ "rewards/chosen": 0.009939353913068771,
436
+ "rewards/margins": 0.033906787633895874,
437
+ "rewards/margins_max": 0.05103258416056633,
438
+ "rewards/margins_min": 0.016780991107225418,
439
+ "rewards/margins_std": 0.024219539016485214,
440
+ "rewards/rejected": -0.023967433720827103,
441
+ "step": 230
442
+ },
443
+ {
444
+ "epoch": 0.83,
445
+ "grad_norm": 0.2392578125,
446
+ "learning_rate": 4.255310606625123e-08,
447
+ "logits/chosen": -0.18151769042015076,
448
+ "logits/rejected": 0.08627013117074966,
449
+ "logps/chosen": -232.16629028320312,
450
+ "logps/rejected": -224.4107208251953,
451
+ "loss": 0.6781,
452
+ "rewards/accuracies": 0.925000011920929,
453
+ "rewards/chosen": 0.012780356220901012,
454
+ "rewards/margins": 0.03826562687754631,
455
+ "rewards/margins_max": 0.055571459233760834,
456
+ "rewards/margins_min": 0.020959796383976936,
457
+ "rewards/margins_std": 0.024474143981933594,
458
+ "rewards/rejected": -0.025485267862677574,
459
+ "step": 240
460
+ },
461
+ {
462
+ "epoch": 0.86,
463
+ "grad_norm": 0.2451171875,
464
+ "learning_rate": 2.724836895290805e-08,
465
+ "logits/chosen": -0.1553465723991394,
466
+ "logits/rejected": 0.18157121539115906,
467
+ "logps/chosen": -247.65811157226562,
468
+ "logps/rejected": -212.87515258789062,
469
+ "loss": 0.6772,
470
+ "rewards/accuracies": 0.9125000238418579,
471
+ "rewards/chosen": 0.011063109152019024,
472
+ "rewards/margins": 0.030740728601813316,
473
+ "rewards/margins_max": 0.044029705226421356,
474
+ "rewards/margins_min": 0.017451755702495575,
475
+ "rewards/margins_std": 0.018793445080518723,
476
+ "rewards/rejected": -0.019677620381116867,
477
+ "step": 250
478
+ },
479
+ {
480
+ "epoch": 0.9,
481
+ "grad_norm": 0.2578125,
482
+ "learning_rate": 1.5191852213221196e-08,
483
+ "logits/chosen": -0.13707074522972107,
484
+ "logits/rejected": 0.0773903951048851,
485
+ "logps/chosen": -215.07275390625,
486
+ "logps/rejected": -215.782958984375,
487
+ "loss": 0.6772,
488
+ "rewards/accuracies": 0.862500011920929,
489
+ "rewards/chosen": 0.002797811757773161,
490
+ "rewards/margins": 0.031788431107997894,
491
+ "rewards/margins_max": 0.05227842181921005,
492
+ "rewards/margins_min": 0.01129843108355999,
493
+ "rewards/margins_std": 0.028977233916521072,
494
+ "rewards/rejected": -0.028990617021918297,
495
+ "step": 260
496
+ },
497
+ {
498
+ "epoch": 0.93,
499
+ "grad_norm": 0.2373046875,
500
+ "learning_rate": 6.559367010166628e-09,
501
+ "logits/chosen": -0.16215373575687408,
502
+ "logits/rejected": 0.14343757927417755,
503
+ "logps/chosen": -248.9391632080078,
504
+ "logps/rejected": -220.1829833984375,
505
+ "loss": 0.6783,
506
+ "rewards/accuracies": 0.887499988079071,
507
+ "rewards/chosen": 0.009577736258506775,
508
+ "rewards/margins": 0.028629934415221214,
509
+ "rewards/margins_max": 0.045094698667526245,
510
+ "rewards/margins_min": 0.012165175750851631,
511
+ "rewards/margins_std": 0.023284688591957092,
512
+ "rewards/rejected": -0.01905220001935959,
513
+ "step": 270
514
+ },
515
+ {
516
+ "epoch": 0.97,
517
+ "grad_norm": 0.2490234375,
518
+ "learning_rate": 1.476794416668703e-09,
519
+ "logits/chosen": -0.1460862010717392,
520
+ "logits/rejected": 0.10489644855260849,
521
+ "logps/chosen": -217.80380249023438,
522
+ "logps/rejected": -210.3385009765625,
523
+ "loss": 0.6764,
524
+ "rewards/accuracies": 0.8999999761581421,
525
+ "rewards/chosen": 0.011174037121236324,
526
+ "rewards/margins": 0.03286902233958244,
527
+ "rewards/margins_max": 0.050892699509859085,
528
+ "rewards/margins_min": 0.014845346100628376,
529
+ "rewards/margins_std": 0.025489334017038345,
530
+ "rewards/rejected": -0.021694988012313843,
531
+ "step": 280
532
+ },
533
+ {
534
+ "epoch": 1.0,
535
+ "eval_logits/chosen": 0.7124304175376892,
536
+ "eval_logits/rejected": 0.8419629335403442,
537
+ "eval_logps/chosen": -345.623046875,
538
+ "eval_logps/rejected": -338.8891296386719,
539
+ "eval_loss": 0.6928284764289856,
540
+ "eval_rewards/accuracies": 0.5158730149269104,
541
+ "eval_rewards/chosen": -0.00016850981046445668,
542
+ "eval_rewards/margins": -0.00010753136302810162,
543
+ "eval_rewards/margins_max": 0.019001232460141182,
544
+ "eval_rewards/margins_min": -0.01899593509733677,
545
+ "eval_rewards/margins_std": 0.012460649013519287,
546
+ "eval_rewards/rejected": -6.097855293774046e-05,
547
+ "eval_runtime": 224.8356,
548
+ "eval_samples_per_second": 17.791,
549
+ "eval_steps_per_second": 0.28,
550
+ "step": 289
551
+ },
552
+ {
553
+ "epoch": 1.0,
554
+ "step": 289,
555
+ "total_flos": 0.0,
556
+ "train_loss": 0.6824996727148142,
557
+ "train_runtime": 3006.8067,
558
+ "train_samples_per_second": 6.156,
559
+ "train_steps_per_second": 0.096
560
+ }
561
+ ],
562
+ "logging_steps": 10,
563
+ "max_steps": 289,
564
+ "num_input_tokens_seen": 0,
565
+ "num_train_epochs": 1,
566
+ "save_steps": 100,
567
+ "total_flos": 0.0,
568
+ "train_batch_size": 2,
569
+ "trial_name": null,
570
+ "trial_params": null
571
+ }