{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.99581589958159, "eval_steps": 500, "global_step": 119, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.166666666666666e-08, "logits/chosen": -2.4389588832855225, "logits/rejected": -2.3863701820373535, "logps/chosen": -220.68759155273438, "logps/rejected": -373.33087158203125, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.08, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -2.3459177017211914, "logits/rejected": -2.334230422973633, "logps/chosen": -247.85926818847656, "logps/rejected": -363.747802734375, "loss": 0.6811, "rewards/accuracies": 0.5555555820465088, "rewards/chosen": -0.021831953898072243, "rewards/margins": 0.03538733720779419, "rewards/rejected": -0.05721929296851158, "step": 10 }, { "epoch": 0.17, "learning_rate": 4.931352528237397e-07, "logits/chosen": -2.238157272338867, "logits/rejected": -2.2056198120117188, "logps/chosen": -283.0589294433594, "logps/rejected": -371.25146484375, "loss": 0.6646, "rewards/accuracies": 0.625, "rewards/chosen": -0.3717283308506012, "rewards/margins": 0.2823185622692108, "rewards/rejected": -0.654046893119812, "step": 20 }, { "epoch": 0.25, "learning_rate": 4.658920803689553e-07, "logits/chosen": -2.2126076221466064, "logits/rejected": -2.165159225463867, "logps/chosen": -284.18988037109375, "logps/rejected": -394.07562255859375, "loss": 0.6218, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2739036977291107, "rewards/margins": 0.25531530380249023, "rewards/rejected": -0.5292190313339233, "step": 30 }, { "epoch": 0.33, "learning_rate": 4.201712553872657e-07, "logits/chosen": -2.2903313636779785, "logits/rejected": -2.253678798675537, "logps/chosen": -268.6778259277344, "logps/rejected": -406.4809875488281, "loss": 0.6207, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3238675594329834, "rewards/margins": 0.3434136211872101, "rewards/rejected": -0.6672812104225159, "step": 40 }, { "epoch": 0.42, "learning_rate": 3.598859066780754e-07, "logits/chosen": -2.216749668121338, "logits/rejected": -2.1586577892303467, "logps/chosen": -270.4320373535156, "logps/rejected": -431.34161376953125, "loss": 0.6423, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3394399583339691, "rewards/margins": 0.34264153242111206, "rewards/rejected": -0.6820814609527588, "step": 50 }, { "epoch": 0.5, "learning_rate": 2.9019570347986706e-07, "logits/chosen": -2.3308839797973633, "logits/rejected": -2.299959659576416, "logps/chosen": -260.9419860839844, "logps/rejected": -367.1455383300781, "loss": 0.6307, "rewards/accuracies": 0.59375, "rewards/chosen": -0.20153513550758362, "rewards/margins": 0.20082291960716248, "rewards/rejected": -0.4023580551147461, "step": 60 }, { "epoch": 0.59, "learning_rate": 2.1706525253979534e-07, "logits/chosen": -2.3276865482330322, "logits/rejected": -2.293214797973633, "logps/chosen": -288.04248046875, "logps/rejected": -417.49383544921875, "loss": 0.6296, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.24966976046562195, "rewards/margins": 0.32144370675086975, "rewards/rejected": -0.5711134076118469, "step": 70 }, { "epoch": 0.67, "learning_rate": 1.4675360263490295e-07, "logits/chosen": -2.3125128746032715, "logits/rejected": -2.2743871212005615, "logps/chosen": -275.89019775390625, "logps/rejected": -385.26055908203125, "loss": 0.6214, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.2819868326187134, "rewards/margins": 0.24118752777576447, "rewards/rejected": -0.5231744050979614, "step": 80 }, { "epoch": 0.75, "learning_rate": 8.527854855097224e-08, "logits/chosen": -2.294402837753296, "logits/rejected": -2.2615461349487305, "logps/chosen": -298.8416748046875, "logps/rejected": -409.93212890625, "loss": 0.6106, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.285415917634964, "rewards/margins": 0.26987579464912415, "rewards/rejected": -0.5552917718887329, "step": 90 }, { "epoch": 0.84, "learning_rate": 3.790158337517127e-08, "logits/chosen": -2.341721534729004, "logits/rejected": -2.3034510612487793, "logps/chosen": -271.6078186035156, "logps/rejected": -407.69989013671875, "loss": 0.606, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.26334214210510254, "rewards/margins": 0.33658233284950256, "rewards/rejected": -0.5999244451522827, "step": 100 }, { "epoch": 0.92, "learning_rate": 8.677580722139671e-09, "logits/chosen": -2.2649519443511963, "logits/rejected": -2.2306463718414307, "logps/chosen": -257.1134338378906, "logps/rejected": -387.23760986328125, "loss": 0.6153, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.28197985887527466, "rewards/margins": 0.3022700846195221, "rewards/rejected": -0.5842499732971191, "step": 110 }, { "epoch": 1.0, "step": 119, "total_flos": 0.0, "train_loss": 0.6313913529660521, "train_runtime": 1987.426, "train_samples_per_second": 7.69, "train_steps_per_second": 0.06 } ], "logging_steps": 10, "max_steps": 119, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }