File size: 31,706 Bytes
c645722
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.1807991321641656,
  "eval_steps": 500,
  "global_step": 500,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0036159826432833123,
      "grad_norm": 0.3235728144645691,
      "learning_rate": 1.0000000000000002e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.043861389160156,
      "logits/rejected": -18.043861389160156,
      "logps/chosen": -0.6726381778717041,
      "logps/rejected": -0.6726381778717041,
      "loss": 6.9675,
      "nll_loss": 1.6725690364837646,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.06726382672786713,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.06726382672786713,
      "step": 10
    },
    {
      "epoch": 0.0072319652865666245,
      "grad_norm": 0.3509086072444916,
      "learning_rate": 2.0000000000000003e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.013948440551758,
      "logits/rejected": -18.013948440551758,
      "logps/chosen": -0.6865767240524292,
      "logps/rejected": -0.6865767240524292,
      "loss": 6.7227,
      "nll_loss": 1.6113628149032593,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.06865767389535904,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.06865767389535904,
      "step": 20
    },
    {
      "epoch": 0.010847947929849937,
      "grad_norm": 0.38406211137771606,
      "learning_rate": 3e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.367061614990234,
      "logits/rejected": -18.367061614990234,
      "logps/chosen": -0.6837120056152344,
      "logps/rejected": -0.6837120056152344,
      "loss": 6.3986,
      "nll_loss": 1.5303384065628052,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.06837119162082672,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.06837119162082672,
      "step": 30
    },
    {
      "epoch": 0.014463930573133249,
      "grad_norm": 0.4101921319961548,
      "learning_rate": 4.000000000000001e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -17.99128532409668,
      "logits/rejected": -17.99128532409668,
      "logps/chosen": -0.6463029980659485,
      "logps/rejected": -0.6463029980659485,
      "loss": 7.0291,
      "nll_loss": 1.6879545450210571,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.06463029980659485,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.06463029980659485,
      "step": 40
    },
    {
      "epoch": 0.01807991321641656,
      "grad_norm": 0.444273978471756,
      "learning_rate": 5e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.08310890197754,
      "logits/rejected": -18.08310890197754,
      "logps/chosen": -0.6336182355880737,
      "logps/rejected": -0.6336182355880737,
      "loss": 6.5453,
      "nll_loss": 1.5670195817947388,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.06336182355880737,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.06336182355880737,
      "step": 50
    },
    {
      "epoch": 0.021695895859699874,
      "grad_norm": 0.67641681432724,
      "learning_rate": 6e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.269500732421875,
      "logits/rejected": -18.269500732421875,
      "logps/chosen": -0.6496673822402954,
      "logps/rejected": -0.6496673822402954,
      "loss": 6.733,
      "nll_loss": 1.6139262914657593,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.06496674567461014,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.06496674567461014,
      "step": 60
    },
    {
      "epoch": 0.025311878502983186,
      "grad_norm": 0.39636465907096863,
      "learning_rate": 7e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.115657806396484,
      "logits/rejected": -18.115657806396484,
      "logps/chosen": -0.6373459696769714,
      "logps/rejected": -0.6373459696769714,
      "loss": 6.8825,
      "nll_loss": 1.651307463645935,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.06373460590839386,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.06373460590839386,
      "step": 70
    },
    {
      "epoch": 0.028927861146266498,
      "grad_norm": 0.5290302634239197,
      "learning_rate": 8.000000000000001e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.138675689697266,
      "logits/rejected": -18.138675689697266,
      "logps/chosen": -0.6702518463134766,
      "logps/rejected": -0.6702518463134766,
      "loss": 6.4063,
      "nll_loss": 1.5322693586349487,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.06702519208192825,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.06702519208192825,
      "step": 80
    },
    {
      "epoch": 0.03254384378954981,
      "grad_norm": 0.2837754189968109,
      "learning_rate": 9e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.285091400146484,
      "logits/rejected": -18.285091400146484,
      "logps/chosen": -0.5695949196815491,
      "logps/rejected": -0.5695949196815491,
      "loss": 6.3599,
      "nll_loss": 1.520659327507019,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.056959498673677444,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.056959498673677444,
      "step": 90
    },
    {
      "epoch": 0.03615982643283312,
      "grad_norm": 0.38816723227500916,
      "learning_rate": 1e-05,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.38321304321289,
      "logits/rejected": -18.38321304321289,
      "logps/chosen": -0.6220490336418152,
      "logps/rejected": -0.6220490336418152,
      "loss": 6.3579,
      "nll_loss": 1.5201700925827026,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.062204908579587936,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.062204908579587936,
      "step": 100
    },
    {
      "epoch": 0.039775809076116435,
      "grad_norm": 0.485514760017395,
      "learning_rate": 9.88888888888889e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.24630355834961,
      "logits/rejected": -18.24630355834961,
      "logps/chosen": -0.58094322681427,
      "logps/rejected": -0.58094322681427,
      "loss": 6.8083,
      "nll_loss": 1.6327617168426514,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.058094322681427,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.058094322681427,
      "step": 110
    },
    {
      "epoch": 0.04339179171939975,
      "grad_norm": 0.7200180292129517,
      "learning_rate": 9.777777777777779e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.222978591918945,
      "logits/rejected": -18.222978591918945,
      "logps/chosen": -0.6140703558921814,
      "logps/rejected": -0.6140703558921814,
      "loss": 7.0165,
      "nll_loss": 1.6848167181015015,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.06140704080462456,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.06140704080462456,
      "step": 120
    },
    {
      "epoch": 0.04700777436268306,
      "grad_norm": 0.5130624175071716,
      "learning_rate": 9.666666666666667e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -17.98419189453125,
      "logits/rejected": -17.98419189453125,
      "logps/chosen": -0.6171376705169678,
      "logps/rejected": -0.6171376705169678,
      "loss": 6.9743,
      "nll_loss": 1.6742585897445679,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.06171376630663872,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.06171376630663872,
      "step": 130
    },
    {
      "epoch": 0.05062375700596637,
      "grad_norm": 0.6743359565734863,
      "learning_rate": 9.555555555555556e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.43941879272461,
      "logits/rejected": -18.43941879272461,
      "logps/chosen": -0.5736243724822998,
      "logps/rejected": -0.5736243724822998,
      "loss": 6.4179,
      "nll_loss": 1.5351518392562866,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.057362429797649384,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.057362429797649384,
      "step": 140
    },
    {
      "epoch": 0.054239739649249684,
      "grad_norm": 0.7007283568382263,
      "learning_rate": 9.444444444444445e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.523662567138672,
      "logits/rejected": -18.523662567138672,
      "logps/chosen": -0.5047087073326111,
      "logps/rejected": -0.5047087073326111,
      "loss": 6.4734,
      "nll_loss": 1.549041986465454,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.050470877438783646,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.050470877438783646,
      "step": 150
    },
    {
      "epoch": 0.057855722292532996,
      "grad_norm": 0.8055678009986877,
      "learning_rate": 9.333333333333334e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.57369041442871,
      "logits/rejected": -18.57369041442871,
      "logps/chosen": -0.48697715997695923,
      "logps/rejected": -0.48697715997695923,
      "loss": 6.1125,
      "nll_loss": 1.4588209390640259,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.04869771748781204,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.04869771748781204,
      "step": 160
    },
    {
      "epoch": 0.06147170493581631,
      "grad_norm": 0.5678962469100952,
      "learning_rate": 9.222222222222224e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.546266555786133,
      "logits/rejected": -18.546266555786133,
      "logps/chosen": -0.4713471531867981,
      "logps/rejected": -0.4713471531867981,
      "loss": 6.3675,
      "nll_loss": 1.5225670337677002,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.04713470861315727,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.04713470861315727,
      "step": 170
    },
    {
      "epoch": 0.06508768757909962,
      "grad_norm": 0.9227916598320007,
      "learning_rate": 9.111111111111112e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.552295684814453,
      "logits/rejected": -18.552295684814453,
      "logps/chosen": -0.5135122537612915,
      "logps/rejected": -0.5135122537612915,
      "loss": 6.3925,
      "nll_loss": 1.5288182497024536,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.05135122686624527,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.05135122686624527,
      "step": 180
    },
    {
      "epoch": 0.06870367022238293,
      "grad_norm": 1.3030140399932861,
      "learning_rate": 9e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.453067779541016,
      "logits/rejected": -18.453067779541016,
      "logps/chosen": -0.4881868362426758,
      "logps/rejected": -0.4881868362426758,
      "loss": 6.5908,
      "nll_loss": 1.5783830881118774,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.0488186851143837,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.0488186851143837,
      "step": 190
    },
    {
      "epoch": 0.07231965286566625,
      "grad_norm": 1.0002509355545044,
      "learning_rate": 8.888888888888888e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.477296829223633,
      "logits/rejected": -18.477296829223633,
      "logps/chosen": -0.46165475249290466,
      "logps/rejected": -0.46165475249290466,
      "loss": 6.6221,
      "nll_loss": 1.5862023830413818,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.046165481209754944,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.046165481209754944,
      "step": 200
    },
    {
      "epoch": 0.07593563550894955,
      "grad_norm": 0.7885683178901672,
      "learning_rate": 8.777777777777778e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.564895629882812,
      "logits/rejected": -18.564895629882812,
      "logps/chosen": -0.41595011949539185,
      "logps/rejected": -0.41595011949539185,
      "loss": 6.2789,
      "nll_loss": 1.500412940979004,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.04159501940011978,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.04159501940011978,
      "step": 210
    },
    {
      "epoch": 0.07955161815223287,
      "grad_norm": 0.6379426717758179,
      "learning_rate": 8.666666666666668e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.577289581298828,
      "logits/rejected": -18.577289581298828,
      "logps/chosen": -0.43426617980003357,
      "logps/rejected": -0.43426617980003357,
      "loss": 6.4009,
      "nll_loss": 1.5309035778045654,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.043426621705293655,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.043426621705293655,
      "step": 220
    },
    {
      "epoch": 0.08316760079551618,
      "grad_norm": 0.5890640020370483,
      "learning_rate": 8.555555555555556e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.817012786865234,
      "logits/rejected": -18.817012786865234,
      "logps/chosen": -0.38255172967910767,
      "logps/rejected": -0.38255172967910767,
      "loss": 6.021,
      "nll_loss": 1.4359278678894043,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.038255173712968826,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.038255173712968826,
      "step": 230
    },
    {
      "epoch": 0.0867835834387995,
      "grad_norm": 0.5675965547561646,
      "learning_rate": 8.444444444444446e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.88204002380371,
      "logits/rejected": -18.88204002380371,
      "logps/chosen": -0.357022225856781,
      "logps/rejected": -0.357022225856781,
      "loss": 6.3796,
      "nll_loss": 1.5255934000015259,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.03570222482085228,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.03570222482085228,
      "step": 240
    },
    {
      "epoch": 0.0903995660820828,
      "grad_norm": 0.284084677696228,
      "learning_rate": 8.344444444444445e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.659196853637695,
      "logits/rejected": -18.659196853637695,
      "logps/chosen": -0.35400137305259705,
      "logps/rejected": -0.35400137305259705,
      "loss": 6.1544,
      "nll_loss": 1.4692747592926025,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.03540014103055,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.03540014103055,
      "step": 250
    },
    {
      "epoch": 0.09401554872536612,
      "grad_norm": 0.6424693465232849,
      "learning_rate": 8.233333333333335e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.762996673583984,
      "logits/rejected": -18.762996673583984,
      "logps/chosen": -0.40258026123046875,
      "logps/rejected": -0.40258026123046875,
      "loss": 6.3857,
      "nll_loss": 1.5271098613739014,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.040258027613162994,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.040258027613162994,
      "step": 260
    },
    {
      "epoch": 0.09763153136864942,
      "grad_norm": 0.6682632565498352,
      "learning_rate": 8.122222222222223e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.719924926757812,
      "logits/rejected": -18.719924926757812,
      "logps/chosen": -0.3123939633369446,
      "logps/rejected": -0.3123939633369446,
      "loss": 5.7456,
      "nll_loss": 1.36708664894104,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.031239395961165428,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.031239395961165428,
      "step": 270
    },
    {
      "epoch": 0.10124751401193274,
      "grad_norm": 0.9669603109359741,
      "learning_rate": 8.011111111111113e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.799579620361328,
      "logits/rejected": -18.799579620361328,
      "logps/chosen": -0.3817462623119354,
      "logps/rejected": -0.3817462623119354,
      "loss": 6.2838,
      "nll_loss": 1.5016276836395264,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.03817462921142578,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.03817462921142578,
      "step": 280
    },
    {
      "epoch": 0.10486349665521605,
      "grad_norm": 0.7264003157615662,
      "learning_rate": 7.9e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -19.090185165405273,
      "logits/rejected": -19.090185165405273,
      "logps/chosen": -0.3294925093650818,
      "logps/rejected": -0.3294925093650818,
      "loss": 5.6699,
      "nll_loss": 1.3481695652008057,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.03294925019145012,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.03294925019145012,
      "step": 290
    },
    {
      "epoch": 0.10847947929849937,
      "grad_norm": 0.564611554145813,
      "learning_rate": 7.788888888888889e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.767759323120117,
      "logits/rejected": -18.767759323120117,
      "logps/chosen": -0.26777949929237366,
      "logps/rejected": -0.26777949929237366,
      "loss": 6.5104,
      "nll_loss": 1.5582915544509888,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.026777952909469604,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.026777952909469604,
      "step": 300
    },
    {
      "epoch": 0.11209546194178267,
      "grad_norm": 0.6952475905418396,
      "learning_rate": 7.677777777777778e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.896703720092773,
      "logits/rejected": -18.896703720092773,
      "logps/chosen": -0.24664482474327087,
      "logps/rejected": -0.24664482474327087,
      "loss": 5.9777,
      "nll_loss": 1.4251067638397217,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.02466448023915291,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.02466448023915291,
      "step": 310
    },
    {
      "epoch": 0.11571144458506599,
      "grad_norm": 0.7094094753265381,
      "learning_rate": 7.566666666666667e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.993297576904297,
      "logits/rejected": -18.993297576904297,
      "logps/chosen": -0.27139216661453247,
      "logps/rejected": -0.27139216661453247,
      "loss": 5.848,
      "nll_loss": 1.3926928043365479,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.027139216661453247,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.027139216661453247,
      "step": 320
    },
    {
      "epoch": 0.1193274272283493,
      "grad_norm": 0.8386672735214233,
      "learning_rate": 7.455555555555556e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.818639755249023,
      "logits/rejected": -18.818639755249023,
      "logps/chosen": -0.2643309533596039,
      "logps/rejected": -0.2643309533596039,
      "loss": 6.2247,
      "nll_loss": 1.4868563413619995,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.026433095335960388,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.026433095335960388,
      "step": 330
    },
    {
      "epoch": 0.12294340987163262,
      "grad_norm": 0.6337667107582092,
      "learning_rate": 7.344444444444445e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -19.07388687133789,
      "logits/rejected": -19.07388687133789,
      "logps/chosen": -0.23179857432842255,
      "logps/rejected": -0.23179857432842255,
      "loss": 5.7871,
      "nll_loss": 1.3774592876434326,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.023179858922958374,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.023179858922958374,
      "step": 340
    },
    {
      "epoch": 0.12655939251491594,
      "grad_norm": 1.056504726409912,
      "learning_rate": 7.233333333333334e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.8520450592041,
      "logits/rejected": -18.8520450592041,
      "logps/chosen": -0.24602051079273224,
      "logps/rejected": -0.24602051079273224,
      "loss": 5.8268,
      "nll_loss": 1.3873755931854248,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.024602051824331284,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.024602051824331284,
      "step": 350
    },
    {
      "epoch": 0.13017537515819924,
      "grad_norm": 0.4462037682533264,
      "learning_rate": 7.122222222222222e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -19.04658317565918,
      "logits/rejected": -19.04658317565918,
      "logps/chosen": -0.2291136234998703,
      "logps/rejected": -0.2291136234998703,
      "loss": 5.8142,
      "nll_loss": 1.384232521057129,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.02291136048734188,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.02291136048734188,
      "step": 360
    },
    {
      "epoch": 0.13379135780148255,
      "grad_norm": 0.5579663515090942,
      "learning_rate": 7.011111111111112e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -18.912765502929688,
      "logits/rejected": -18.912765502929688,
      "logps/chosen": -0.26528915762901306,
      "logps/rejected": -0.26528915762901306,
      "loss": 6.0316,
      "nll_loss": 1.4385900497436523,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.026528915390372276,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.026528915390372276,
      "step": 370
    },
    {
      "epoch": 0.13740734044476585,
      "grad_norm": 1.2433960437774658,
      "learning_rate": 6.9e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -19.217700958251953,
      "logits/rejected": -19.217700958251953,
      "logps/chosen": -0.21606405079364777,
      "logps/rejected": -0.21606405079364777,
      "loss": 5.7944,
      "nll_loss": 1.3792803287506104,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.021606406196951866,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.021606406196951866,
      "step": 380
    },
    {
      "epoch": 0.14102332308804919,
      "grad_norm": 0.7657280564308167,
      "learning_rate": 6.788888888888889e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -19.175457000732422,
      "logits/rejected": -19.175457000732422,
      "logps/chosen": -0.2413448989391327,
      "logps/rejected": -0.2413448989391327,
      "loss": 5.7839,
      "nll_loss": 1.3766554594039917,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.02413449063897133,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.02413449063897133,
      "step": 390
    },
    {
      "epoch": 0.1446393057313325,
      "grad_norm": 0.7070457935333252,
      "learning_rate": 6.677777777777779e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -19.15035629272461,
      "logits/rejected": -19.15035629272461,
      "logps/chosen": -0.25096631050109863,
      "logps/rejected": -0.25096631050109863,
      "loss": 6.2307,
      "nll_loss": 1.4883568286895752,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.025096634402871132,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.025096634402871132,
      "step": 400
    },
    {
      "epoch": 0.1482552883746158,
      "grad_norm": 0.43025216460227966,
      "learning_rate": 6.566666666666667e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -19.02223014831543,
      "logits/rejected": -19.02223014831543,
      "logps/chosen": -0.24732474982738495,
      "logps/rejected": -0.24732474982738495,
      "loss": 6.2865,
      "nll_loss": 1.5023012161254883,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.024732474237680435,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.024732474237680435,
      "step": 410
    },
    {
      "epoch": 0.1518712710178991,
      "grad_norm": 1.3701528310775757,
      "learning_rate": 6.455555555555556e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -19.089460372924805,
      "logits/rejected": -19.089460372924805,
      "logps/chosen": -0.23749932646751404,
      "logps/rejected": -0.23749932646751404,
      "loss": 6.2238,
      "nll_loss": 1.4866377115249634,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.023749932646751404,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.023749932646751404,
      "step": 420
    },
    {
      "epoch": 0.15548725366118243,
      "grad_norm": 0.9421939253807068,
      "learning_rate": 6.3444444444444454e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -19.231048583984375,
      "logits/rejected": -19.231048583984375,
      "logps/chosen": -0.19140982627868652,
      "logps/rejected": -0.19140982627868652,
      "loss": 6.0814,
      "nll_loss": 1.451047420501709,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.01914098486304283,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.01914098486304283,
      "step": 430
    },
    {
      "epoch": 0.15910323630446574,
      "grad_norm": 0.9060840010643005,
      "learning_rate": 6.2333333333333335e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -19.107379913330078,
      "logits/rejected": -19.107379913330078,
      "logps/chosen": -0.2809773087501526,
      "logps/rejected": -0.2809773087501526,
      "loss": 6.3671,
      "nll_loss": 1.522457480430603,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.0280977301299572,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.0280977301299572,
      "step": 440
    },
    {
      "epoch": 0.16271921894774904,
      "grad_norm": 0.5965518355369568,
      "learning_rate": 6.1222222222222224e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -19.34296417236328,
      "logits/rejected": -19.34296417236328,
      "logps/chosen": -0.19397786259651184,
      "logps/rejected": -0.19397786259651184,
      "loss": 5.7975,
      "nll_loss": 1.3800629377365112,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.019397784024477005,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.019397784024477005,
      "step": 450
    },
    {
      "epoch": 0.16633520159103235,
      "grad_norm": 0.9895502328872681,
      "learning_rate": 6.011111111111112e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -19.35286521911621,
      "logits/rejected": -19.35286521911621,
      "logps/chosen": -0.20646443963050842,
      "logps/rejected": -0.20646443963050842,
      "loss": 5.8574,
      "nll_loss": 1.3950278759002686,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.02064644545316696,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.02064644545316696,
      "step": 460
    },
    {
      "epoch": 0.16995118423431568,
      "grad_norm": 1.6251282691955566,
      "learning_rate": 5.9e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -19.29703140258789,
      "logits/rejected": -19.29703140258789,
      "logps/chosen": -0.23044386506080627,
      "logps/rejected": -0.23044386506080627,
      "loss": 6.0743,
      "nll_loss": 1.4492676258087158,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.023044386878609657,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.023044386878609657,
      "step": 470
    },
    {
      "epoch": 0.173567166877599,
      "grad_norm": 0.7959076762199402,
      "learning_rate": 5.788888888888889e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -19.089004516601562,
      "logits/rejected": -19.089004516601562,
      "logps/chosen": -0.1713530272245407,
      "logps/rejected": -0.1713530272245407,
      "loss": 6.0034,
      "nll_loss": 1.4315412044525146,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.01713530346751213,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.01713530346751213,
      "step": 480
    },
    {
      "epoch": 0.1771831495208823,
      "grad_norm": 0.6285834908485413,
      "learning_rate": 5.677777777777779e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -19.485876083374023,
      "logits/rejected": -19.485876083374023,
      "logps/chosen": -0.18361307680606842,
      "logps/rejected": -0.18361307680606842,
      "loss": 5.6858,
      "nll_loss": 1.3521363735198975,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.018361307680606842,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.018361307680606842,
      "step": 490
    },
    {
      "epoch": 0.1807991321641656,
      "grad_norm": 0.9556539058685303,
      "learning_rate": 5.566666666666667e-06,
      "log_odds_chosen": 0.0,
      "log_odds_ratio": -0.6931472420692444,
      "logits/chosen": -19.300710678100586,
      "logits/rejected": -19.300710678100586,
      "logps/chosen": -0.17280462384223938,
      "logps/rejected": -0.17280462384223938,
      "loss": 6.0335,
      "nll_loss": 1.439051866531372,
      "rewards/accuracies": 0.0,
      "rewards/chosen": -0.01728046126663685,
      "rewards/margins": 0.0,
      "rewards/rejected": -0.01728046126663685,
      "step": 500
    }
  ],
  "logging_steps": 10,
  "max_steps": 1000,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}