NicholasCorrado commited on
Commit
0525ea8
1 Parent(s): c3abef6

Model save

Browse files
README.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
5
+ tags:
6
+ - trl
7
+ - dpo
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: tinyllama-1.1b-chat-v1.0-hh-dpo
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # tinyllama-1.1b-chat-v1.0-hh-dpo
18
+
19
+ This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) on an unknown dataset.
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 5e-07
39
+ - train_batch_size: 16
40
+ - eval_batch_size: 16
41
+ - seed: 42
42
+ - distributed_type: multi-GPU
43
+ - num_devices: 4
44
+ - gradient_accumulation_steps: 8
45
+ - total_train_batch_size: 512
46
+ - total_eval_batch_size: 64
47
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
48
+ - lr_scheduler_type: cosine
49
+ - lr_scheduler_warmup_ratio: 0.1
50
+ - num_epochs: 1
51
+
52
+ ### Training results
53
+
54
+
55
+
56
+ ### Framework versions
57
+
58
+ - Transformers 4.44.1
59
+ - Pytorch 2.1.2+cu121
60
+ - Datasets 2.21.0
61
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9996020692399522,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.678918091354856,
5
+ "train_runtime": 7383.0158,
6
+ "train_samples": 160800,
7
+ "train_samples_per_second": 21.78,
8
+ "train_steps_per_second": 0.043
9
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 1,
3
+ "eos_token_id": 2,
4
+ "max_length": 2048,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.44.1"
7
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9996020692399522,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.678918091354856,
5
+ "train_runtime": 7383.0158,
6
+ "train_samples": 160800,
7
+ "train_samples_per_second": 21.78,
8
+ "train_steps_per_second": 0.043
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9996020692399522,
5
+ "eval_steps": 1000,
6
+ "global_step": 314,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0031834460803820135,
13
+ "grad_norm": 0.8913188458215807,
14
+ "learning_rate": 1.5625e-08,
15
+ "logits/chosen": -2.332231044769287,
16
+ "logits/rejected": -2.3125171661376953,
17
+ "logps/chosen": -178.02963256835938,
18
+ "logps/rejected": -150.3365478515625,
19
+ "loss": 0.6931,
20
+ "rewards/accuracies": 0.0,
21
+ "rewards/chosen": 0.0,
22
+ "rewards/margins": 0.0,
23
+ "rewards/rejected": 0.0,
24
+ "step": 1
25
+ },
26
+ {
27
+ "epoch": 0.03183446080382014,
28
+ "grad_norm": 0.7954378994525454,
29
+ "learning_rate": 1.5624999999999999e-07,
30
+ "logits/chosen": -2.299248456954956,
31
+ "logits/rejected": -2.2925186157226562,
32
+ "logps/chosen": -160.2787322998047,
33
+ "logps/rejected": -158.51319885253906,
34
+ "loss": 0.6931,
35
+ "rewards/accuracies": 0.4322916567325592,
36
+ "rewards/chosen": -3.1086481612874195e-05,
37
+ "rewards/margins": -0.00011221379099879414,
38
+ "rewards/rejected": 8.112730574794114e-05,
39
+ "step": 10
40
+ },
41
+ {
42
+ "epoch": 0.06366892160764027,
43
+ "grad_norm": 0.856612493875923,
44
+ "learning_rate": 3.1249999999999997e-07,
45
+ "logits/chosen": -2.2908735275268555,
46
+ "logits/rejected": -2.294975519180298,
47
+ "logps/chosen": -156.20362854003906,
48
+ "logps/rejected": -160.60763549804688,
49
+ "loss": 0.6932,
50
+ "rewards/accuracies": 0.48828125,
51
+ "rewards/chosen": -9.316079376731068e-05,
52
+ "rewards/margins": 9.743528607941698e-06,
53
+ "rewards/rejected": -0.00010290431964676827,
54
+ "step": 20
55
+ },
56
+ {
57
+ "epoch": 0.0955033824114604,
58
+ "grad_norm": 0.8601850412460429,
59
+ "learning_rate": 4.6874999999999996e-07,
60
+ "logits/chosen": -2.2940285205841064,
61
+ "logits/rejected": -2.2888407707214355,
62
+ "logps/chosen": -159.58680725097656,
63
+ "logps/rejected": -162.42189025878906,
64
+ "loss": 0.693,
65
+ "rewards/accuracies": 0.52734375,
66
+ "rewards/chosen": -0.0016975710168480873,
67
+ "rewards/margins": 0.00033986102789640427,
68
+ "rewards/rejected": -0.0020374320447444916,
69
+ "step": 30
70
+ },
71
+ {
72
+ "epoch": 0.12733784321528055,
73
+ "grad_norm": 0.8744440488120392,
74
+ "learning_rate": 4.990077890125363e-07,
75
+ "logits/chosen": -2.301192045211792,
76
+ "logits/rejected": -2.2977750301361084,
77
+ "logps/chosen": -151.9224395751953,
78
+ "logps/rejected": -152.43655395507812,
79
+ "loss": 0.6925,
80
+ "rewards/accuracies": 0.555468738079071,
81
+ "rewards/chosen": -0.005973272956907749,
82
+ "rewards/margins": 0.0012832467909902334,
83
+ "rewards/rejected": -0.007256519980728626,
84
+ "step": 40
85
+ },
86
+ {
87
+ "epoch": 0.15917230401910068,
88
+ "grad_norm": 0.95273331836508,
89
+ "learning_rate": 4.949904262591467e-07,
90
+ "logits/chosen": -2.3083178997039795,
91
+ "logits/rejected": -2.30576753616333,
92
+ "logps/chosen": -161.62750244140625,
93
+ "logps/rejected": -159.9287109375,
94
+ "loss": 0.6914,
95
+ "rewards/accuracies": 0.534375011920929,
96
+ "rewards/chosen": -0.016185810789465904,
97
+ "rewards/margins": 0.003111806232482195,
98
+ "rewards/rejected": -0.019297616556286812,
99
+ "step": 50
100
+ },
101
+ {
102
+ "epoch": 0.1910067648229208,
103
+ "grad_norm": 0.9051345997712666,
104
+ "learning_rate": 4.879356673988089e-07,
105
+ "logits/chosen": -2.3459715843200684,
106
+ "logits/rejected": -2.340350389480591,
107
+ "logps/chosen": -159.88613891601562,
108
+ "logps/rejected": -162.18612670898438,
109
+ "loss": 0.6903,
110
+ "rewards/accuracies": 0.555468738079071,
111
+ "rewards/chosen": -0.029835861176252365,
112
+ "rewards/margins": 0.005405827891081572,
113
+ "rewards/rejected": -0.03524169698357582,
114
+ "step": 60
115
+ },
116
+ {
117
+ "epoch": 0.22284122562674094,
118
+ "grad_norm": 0.9611988462128772,
119
+ "learning_rate": 4.779309774701573e-07,
120
+ "logits/chosen": -2.3523900508880615,
121
+ "logits/rejected": -2.348705768585205,
122
+ "logps/chosen": -166.91146850585938,
123
+ "logps/rejected": -164.3793182373047,
124
+ "loss": 0.6883,
125
+ "rewards/accuracies": 0.5609375238418579,
126
+ "rewards/chosen": -0.05225539207458496,
127
+ "rewards/margins": 0.009335539303719997,
128
+ "rewards/rejected": -0.06159093230962753,
129
+ "step": 70
130
+ },
131
+ {
132
+ "epoch": 0.2546756864305611,
133
+ "grad_norm": 0.9747994880646563,
134
+ "learning_rate": 4.6510039481503485e-07,
135
+ "logits/chosen": -2.367791175842285,
136
+ "logits/rejected": -2.3638641834259033,
137
+ "logps/chosen": -166.5675811767578,
138
+ "logps/rejected": -163.23748779296875,
139
+ "loss": 0.6866,
140
+ "rewards/accuracies": 0.56640625,
141
+ "rewards/chosen": -0.0814916118979454,
142
+ "rewards/margins": 0.01518010813742876,
143
+ "rewards/rejected": -0.09667172282934189,
144
+ "step": 80
145
+ },
146
+ {
147
+ "epoch": 0.28651014723438123,
148
+ "grad_norm": 1.0299054182265805,
149
+ "learning_rate": 4.4960299324869857e-07,
150
+ "logits/chosen": -2.3870062828063965,
151
+ "logits/rejected": -2.382286787033081,
152
+ "logps/chosen": -168.0864715576172,
153
+ "logps/rejected": -170.67047119140625,
154
+ "loss": 0.6857,
155
+ "rewards/accuracies": 0.5453125238418579,
156
+ "rewards/chosen": -0.10201652348041534,
157
+ "rewards/margins": 0.017032291740179062,
158
+ "rewards/rejected": -0.11904881149530411,
159
+ "step": 90
160
+ },
161
+ {
162
+ "epoch": 0.31834460803820136,
163
+ "grad_norm": 1.0287716469740125,
164
+ "learning_rate": 4.3163090985954074e-07,
165
+ "logits/chosen": -2.3779287338256836,
166
+ "logits/rejected": -2.369999408721924,
167
+ "logps/chosen": -170.18251037597656,
168
+ "logps/rejected": -172.33718872070312,
169
+ "loss": 0.6838,
170
+ "rewards/accuracies": 0.5835937261581421,
171
+ "rewards/chosen": -0.12887540459632874,
172
+ "rewards/margins": 0.023395564407110214,
173
+ "rewards/rejected": -0.15227097272872925,
174
+ "step": 100
175
+ },
176
+ {
177
+ "epoch": 0.3501790688420215,
178
+ "grad_norm": 1.0989482799498032,
179
+ "learning_rate": 4.114069628897006e-07,
180
+ "logits/chosen": -2.358673572540283,
181
+ "logits/rejected": -2.3572804927825928,
182
+ "logps/chosen": -173.09437561035156,
183
+ "logps/rejected": -176.59164428710938,
184
+ "loss": 0.681,
185
+ "rewards/accuracies": 0.5703125,
186
+ "rewards/chosen": -0.14945295453071594,
187
+ "rewards/margins": 0.03385554999113083,
188
+ "rewards/rejected": -0.18330851197242737,
189
+ "step": 110
190
+ },
191
+ {
192
+ "epoch": 0.3820135296458416,
193
+ "grad_norm": 1.215724859704609,
194
+ "learning_rate": 3.891818892301304e-07,
195
+ "logits/chosen": -2.3581154346466064,
196
+ "logits/rejected": -2.3629350662231445,
197
+ "logps/chosen": -176.79432678222656,
198
+ "logps/rejected": -180.97039794921875,
199
+ "loss": 0.6816,
200
+ "rewards/accuracies": 0.563281238079071,
201
+ "rewards/chosen": -0.2229323834180832,
202
+ "rewards/margins": 0.02610206976532936,
203
+ "rewards/rejected": -0.24903444945812225,
204
+ "step": 120
205
+ },
206
+ {
207
+ "epoch": 0.41384799044966175,
208
+ "grad_norm": 1.276372767011757,
209
+ "learning_rate": 3.6523123577970693e-07,
210
+ "logits/chosen": -2.362705707550049,
211
+ "logits/rejected": -2.3545660972595215,
212
+ "logps/chosen": -187.2711944580078,
213
+ "logps/rejected": -192.3023681640625,
214
+ "loss": 0.6774,
215
+ "rewards/accuracies": 0.577343761920929,
216
+ "rewards/chosen": -0.2734658420085907,
217
+ "rewards/margins": 0.04484738036990166,
218
+ "rewards/rejected": -0.31831321120262146,
219
+ "step": 130
220
+ },
221
+ {
222
+ "epoch": 0.4456824512534819,
223
+ "grad_norm": 1.3874387536648047,
224
+ "learning_rate": 3.3985194320937815e-07,
225
+ "logits/chosen": -2.3698925971984863,
226
+ "logits/rejected": -2.375953435897827,
227
+ "logps/chosen": -189.7623748779297,
228
+ "logps/rejected": -198.46641540527344,
229
+ "loss": 0.6765,
230
+ "rewards/accuracies": 0.577343761920929,
231
+ "rewards/chosen": -0.28896641731262207,
232
+ "rewards/margins": 0.0472131185233593,
233
+ "rewards/rejected": -0.3361795246601105,
234
+ "step": 140
235
+ },
236
+ {
237
+ "epoch": 0.477516912057302,
238
+ "grad_norm": 1.4443860790246479,
239
+ "learning_rate": 3.133586644859039e-07,
240
+ "logits/chosen": -2.3575439453125,
241
+ "logits/rejected": -2.3553812503814697,
242
+ "logps/chosen": -191.13748168945312,
243
+ "logps/rejected": -193.09817504882812,
244
+ "loss": 0.6777,
245
+ "rewards/accuracies": 0.5640624761581421,
246
+ "rewards/chosen": -0.3255929946899414,
247
+ "rewards/margins": 0.048027556389570236,
248
+ "rewards/rejected": -0.37362051010131836,
249
+ "step": 150
250
+ },
251
+ {
252
+ "epoch": 0.5093513728611222,
253
+ "grad_norm": 1.6146506390035185,
254
+ "learning_rate": 2.8607986379820664e-07,
255
+ "logits/chosen": -2.3630471229553223,
256
+ "logits/rejected": -2.3578333854675293,
257
+ "logps/chosen": -203.55029296875,
258
+ "logps/rejected": -198.3260955810547,
259
+ "loss": 0.6749,
260
+ "rewards/accuracies": 0.543749988079071,
261
+ "rewards/chosen": -0.36638548970222473,
262
+ "rewards/margins": 0.03375838324427605,
263
+ "rewards/rejected": -0.4001438617706299,
264
+ "step": 160
265
+ },
266
+ {
267
+ "epoch": 0.5411858336649423,
268
+ "grad_norm": 1.442156527952266,
269
+ "learning_rate": 2.583537442519186e-07,
270
+ "logits/chosen": -2.37186861038208,
271
+ "logits/rejected": -2.37400484085083,
272
+ "logps/chosen": -196.1448211669922,
273
+ "logps/rejected": -203.14988708496094,
274
+ "loss": 0.6733,
275
+ "rewards/accuracies": 0.571093738079071,
276
+ "rewards/chosen": -0.39402055740356445,
277
+ "rewards/margins": 0.05580953508615494,
278
+ "rewards/rejected": -0.4498301148414612,
279
+ "step": 170
280
+ },
281
+ {
282
+ "epoch": 0.5730202944687625,
283
+ "grad_norm": 1.6421108977251095,
284
+ "learning_rate": 2.3052405482064919e-07,
285
+ "logits/chosen": -2.375570297241211,
286
+ "logits/rejected": -2.3756964206695557,
287
+ "logps/chosen": -210.69284057617188,
288
+ "logps/rejected": -210.54672241210938,
289
+ "loss": 0.6783,
290
+ "rewards/accuracies": 0.550000011920929,
291
+ "rewards/chosen": -0.4329681396484375,
292
+ "rewards/margins": 0.0399855375289917,
293
+ "rewards/rejected": -0.4729536473751068,
294
+ "step": 180
295
+ },
296
+ {
297
+ "epoch": 0.6048547552725826,
298
+ "grad_norm": 1.8616819792733938,
299
+ "learning_rate": 2.029358285394716e-07,
300
+ "logits/chosen": -2.375413179397583,
301
+ "logits/rejected": -2.3708198070526123,
302
+ "logps/chosen": -206.62564086914062,
303
+ "logps/rejected": -206.83370971679688,
304
+ "loss": 0.6694,
305
+ "rewards/accuracies": 0.609375,
306
+ "rewards/chosen": -0.41667842864990234,
307
+ "rewards/margins": 0.06583230942487717,
308
+ "rewards/rejected": -0.4825107455253601,
309
+ "step": 190
310
+ },
311
+ {
312
+ "epoch": 0.6366892160764027,
313
+ "grad_norm": 2.107824221269726,
314
+ "learning_rate": 1.7593110477859152e-07,
315
+ "logits/chosen": -2.3875057697296143,
316
+ "logits/rejected": -2.387460470199585,
317
+ "logps/chosen": -201.22967529296875,
318
+ "logps/rejected": -205.489501953125,
319
+ "loss": 0.6713,
320
+ "rewards/accuracies": 0.5757812261581421,
321
+ "rewards/chosen": -0.42719680070877075,
322
+ "rewards/margins": 0.06031234189867973,
323
+ "rewards/rejected": -0.487509161233902,
324
+ "step": 200
325
+ },
326
+ {
327
+ "epoch": 0.6685236768802229,
328
+ "grad_norm": 2.4249761136425048,
329
+ "learning_rate": 1.4984468863253007e-07,
330
+ "logits/chosen": -2.394925594329834,
331
+ "logits/rejected": -2.386389970779419,
332
+ "logps/chosen": -215.95462036132812,
333
+ "logps/rejected": -215.75820922851562,
334
+ "loss": 0.6748,
335
+ "rewards/accuracies": 0.569531261920929,
336
+ "rewards/chosen": -0.4547385275363922,
337
+ "rewards/margins": 0.05981076508760452,
338
+ "rewards/rejected": -0.5145493149757385,
339
+ "step": 210
340
+ },
341
+ {
342
+ "epoch": 0.700358137684043,
343
+ "grad_norm": 1.8116066543619882,
344
+ "learning_rate": 1.2500000000000005e-07,
345
+ "logits/chosen": -2.3963189125061035,
346
+ "logits/rejected": -2.3958048820495605,
347
+ "logps/chosen": -212.20693969726562,
348
+ "logps/rejected": -216.2605743408203,
349
+ "loss": 0.6721,
350
+ "rewards/accuracies": 0.5757812261581421,
351
+ "rewards/chosen": -0.4895060658454895,
352
+ "rewards/margins": 0.06058992072939873,
353
+ "rewards/rejected": -0.5500959157943726,
354
+ "step": 220
355
+ },
356
+ {
357
+ "epoch": 0.7321925984878631,
358
+ "grad_norm": 1.8113200913829595,
359
+ "learning_rate": 1.0170506381766119e-07,
360
+ "logits/chosen": -2.387866497039795,
361
+ "logits/rejected": -2.388319730758667,
362
+ "logps/chosen": -207.97604370117188,
363
+ "logps/rejected": -218.859130859375,
364
+ "loss": 0.6722,
365
+ "rewards/accuracies": 0.586718738079071,
366
+ "rewards/chosen": -0.49019813537597656,
367
+ "rewards/margins": 0.07117091119289398,
368
+ "rewards/rejected": -0.5613690614700317,
369
+ "step": 230
370
+ },
371
+ {
372
+ "epoch": 0.7640270592916832,
373
+ "grad_norm": 2.1202663184992563,
374
+ "learning_rate": 8.024869116091879e-08,
375
+ "logits/chosen": -2.3979618549346924,
376
+ "logits/rejected": -2.406341075897217,
377
+ "logps/chosen": -219.69271850585938,
378
+ "logps/rejected": -221.71139526367188,
379
+ "loss": 0.6734,
380
+ "rewards/accuracies": 0.5687500238418579,
381
+ "rewards/chosen": -0.4954513907432556,
382
+ "rewards/margins": 0.05036097764968872,
383
+ "rewards/rejected": -0.5458123683929443,
384
+ "step": 240
385
+ },
386
+ {
387
+ "epoch": 0.7958615200955034,
388
+ "grad_norm": 1.8622859998372623,
389
+ "learning_rate": 6.089689855854869e-08,
390
+ "logits/chosen": -2.3931431770324707,
391
+ "logits/rejected": -2.4037561416625977,
392
+ "logps/chosen": -207.83798217773438,
393
+ "logps/rejected": -216.9298858642578,
394
+ "loss": 0.6728,
395
+ "rewards/accuracies": 0.5835937261581421,
396
+ "rewards/chosen": -0.4849010407924652,
397
+ "rewards/margins": 0.0694160908460617,
398
+ "rewards/rejected": -0.5543171167373657,
399
+ "step": 250
400
+ },
401
+ {
402
+ "epoch": 0.8276959808993235,
403
+ "grad_norm": 1.8122633866114284,
404
+ "learning_rate": 4.388960991455998e-08,
405
+ "logits/chosen": -2.3899784088134766,
406
+ "logits/rejected": -2.395357847213745,
407
+ "logps/chosen": -205.0048828125,
408
+ "logps/rejected": -213.7442626953125,
409
+ "loss": 0.6718,
410
+ "rewards/accuracies": 0.5882812738418579,
411
+ "rewards/chosen": -0.4997900128364563,
412
+ "rewards/margins": 0.0625927522778511,
413
+ "rewards/rejected": -0.5623827576637268,
414
+ "step": 260
415
+ },
416
+ {
417
+ "epoch": 0.8595304417031436,
418
+ "grad_norm": 2.0654372503377014,
419
+ "learning_rate": 2.943768192692958e-08,
420
+ "logits/chosen": -2.400757074356079,
421
+ "logits/rejected": -2.4067251682281494,
422
+ "logps/chosen": -205.0299072265625,
423
+ "logps/rejected": -216.3090362548828,
424
+ "loss": 0.6731,
425
+ "rewards/accuracies": 0.5640624761581421,
426
+ "rewards/chosen": -0.4922141134738922,
427
+ "rewards/margins": 0.06625890731811523,
428
+ "rewards/rejected": -0.5584729313850403,
429
+ "step": 270
430
+ },
431
+ {
432
+ "epoch": 0.8913649025069638,
433
+ "grad_norm": 2.2123553368333644,
434
+ "learning_rate": 1.7720289882128092e-08,
435
+ "logits/chosen": -2.3984594345092773,
436
+ "logits/rejected": -2.4013664722442627,
437
+ "logps/chosen": -204.21585083007812,
438
+ "logps/rejected": -210.4906768798828,
439
+ "loss": 0.6745,
440
+ "rewards/accuracies": 0.5453125238418579,
441
+ "rewards/chosen": -0.497935950756073,
442
+ "rewards/margins": 0.04604557901620865,
443
+ "rewards/rejected": -0.5439816117286682,
444
+ "step": 280
445
+ },
446
+ {
447
+ "epoch": 0.9231993633107839,
448
+ "grad_norm": 2.8178850748361763,
449
+ "learning_rate": 8.882706236405885e-09,
450
+ "logits/chosen": -2.3987581729888916,
451
+ "logits/rejected": -2.398507833480835,
452
+ "logps/chosen": -209.7099609375,
453
+ "logps/rejected": -216.7168426513672,
454
+ "loss": 0.6672,
455
+ "rewards/accuracies": 0.6031249761581421,
456
+ "rewards/chosen": -0.49557456374168396,
457
+ "rewards/margins": 0.07922448217868805,
458
+ "rewards/rejected": -0.5747990608215332,
459
+ "step": 290
460
+ },
461
+ {
462
+ "epoch": 0.955033824114604,
463
+ "grad_norm": 2.094778053644434,
464
+ "learning_rate": 3.0344995250326245e-09,
465
+ "logits/chosen": -2.4099345207214355,
466
+ "logits/rejected": -2.406144380569458,
467
+ "logps/chosen": -210.4052734375,
468
+ "logps/rejected": -220.04794311523438,
469
+ "loss": 0.6676,
470
+ "rewards/accuracies": 0.5921875238418579,
471
+ "rewards/chosen": -0.5059640407562256,
472
+ "rewards/margins": 0.0776122659444809,
473
+ "rewards/rejected": -0.5835763216018677,
474
+ "step": 300
475
+ },
476
+ {
477
+ "epoch": 0.9868682849184242,
478
+ "grad_norm": 2.5156498514265735,
479
+ "learning_rate": 2.481759294498398e-10,
480
+ "logits/chosen": -2.3863139152526855,
481
+ "logits/rejected": -2.3877015113830566,
482
+ "logps/chosen": -207.8264923095703,
483
+ "logps/rejected": -215.2146453857422,
484
+ "loss": 0.6713,
485
+ "rewards/accuracies": 0.5679687261581421,
486
+ "rewards/chosen": -0.5232841372489929,
487
+ "rewards/margins": 0.059422146528959274,
488
+ "rewards/rejected": -0.5827063322067261,
489
+ "step": 310
490
+ },
491
+ {
492
+ "epoch": 0.9996020692399522,
493
+ "step": 314,
494
+ "total_flos": 0.0,
495
+ "train_loss": 0.678918091354856,
496
+ "train_runtime": 7383.0158,
497
+ "train_samples_per_second": 21.78,
498
+ "train_steps_per_second": 0.043
499
+ }
500
+ ],
501
+ "logging_steps": 10,
502
+ "max_steps": 314,
503
+ "num_input_tokens_seen": 0,
504
+ "num_train_epochs": 1,
505
+ "save_steps": 100,
506
+ "stateful_callbacks": {
507
+ "TrainerControl": {
508
+ "args": {
509
+ "should_epoch_stop": false,
510
+ "should_evaluate": false,
511
+ "should_log": false,
512
+ "should_save": true,
513
+ "should_training_stop": true
514
+ },
515
+ "attributes": {}
516
+ }
517
+ },
518
+ "total_flos": 0.0,
519
+ "train_batch_size": 16,
520
+ "trial_name": null,
521
+ "trial_params": null
522
+ }