|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 2942, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.694915254237288e-09, |
|
"logits/chosen": -1.5211243629455566, |
|
"logits/rejected": -0.9348576664924622, |
|
"logps/chosen": -412.05706787109375, |
|
"logps/rejected": -913.2714233398438, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.6949152542372882e-08, |
|
"logits/chosen": -1.4827719926834106, |
|
"logits/rejected": -1.226508378982544, |
|
"logps/chosen": -679.3842163085938, |
|
"logps/rejected": -639.005126953125, |
|
"loss": 0.8262, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 0.20207053422927856, |
|
"rewards/margins": 0.28480756282806396, |
|
"rewards/rejected": -0.0827370211482048, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.3898305084745764e-08, |
|
"logits/chosen": -1.4881559610366821, |
|
"logits/rejected": -1.2070544958114624, |
|
"logps/chosen": -392.80548095703125, |
|
"logps/rejected": -549.167724609375, |
|
"loss": 0.8207, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.09954075515270233, |
|
"rewards/margins": -0.08116824924945831, |
|
"rewards/rejected": 0.18070900440216064, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.0847457627118645e-08, |
|
"logits/chosen": -1.455928921699524, |
|
"logits/rejected": -1.218510389328003, |
|
"logps/chosen": -549.7676391601562, |
|
"logps/rejected": -525.0243530273438, |
|
"loss": 0.8307, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.05704854801297188, |
|
"rewards/margins": 0.16263702511787415, |
|
"rewards/rejected": -0.10558845847845078, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.779661016949153e-08, |
|
"logits/chosen": -1.4766838550567627, |
|
"logits/rejected": -1.218590259552002, |
|
"logps/chosen": -411.13653564453125, |
|
"logps/rejected": -574.4963989257812, |
|
"loss": 0.7857, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.10409893095493317, |
|
"rewards/margins": -0.21830201148986816, |
|
"rewards/rejected": 0.11420309543609619, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.47457627118644e-08, |
|
"logits/chosen": -1.5140564441680908, |
|
"logits/rejected": -1.1615564823150635, |
|
"logps/chosen": -362.17059326171875, |
|
"logps/rejected": -673.89013671875, |
|
"loss": 0.8045, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.031153270974755287, |
|
"rewards/margins": 0.03609558939933777, |
|
"rewards/rejected": -0.00494231004267931, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0169491525423729e-07, |
|
"logits/chosen": -1.498203992843628, |
|
"logits/rejected": -1.232889175415039, |
|
"logps/chosen": -459.11163330078125, |
|
"logps/rejected": -447.8902282714844, |
|
"loss": 0.7617, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.3170378804206848, |
|
"rewards/margins": 0.3453710079193115, |
|
"rewards/rejected": -0.028333133086562157, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1864406779661017e-07, |
|
"logits/chosen": -1.495025396347046, |
|
"logits/rejected": -1.215308427810669, |
|
"logps/chosen": -423.4064025878906, |
|
"logps/rejected": -605.0032958984375, |
|
"loss": 0.7105, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.3061259090900421, |
|
"rewards/margins": 0.37270691990852356, |
|
"rewards/rejected": -0.06658102571964264, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3559322033898305e-07, |
|
"logits/chosen": -1.475776195526123, |
|
"logits/rejected": -1.1816449165344238, |
|
"logps/chosen": -586.6575927734375, |
|
"logps/rejected": -481.2361755371094, |
|
"loss": 0.706, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.3719860911369324, |
|
"rewards/margins": 0.38908010721206665, |
|
"rewards/rejected": -0.017094042152166367, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5254237288135593e-07, |
|
"logits/chosen": -1.5008628368377686, |
|
"logits/rejected": -1.2657488584518433, |
|
"logps/chosen": -372.3196105957031, |
|
"logps/rejected": -367.21673583984375, |
|
"loss": 0.6233, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.44139355421066284, |
|
"rewards/margins": 0.43834584951400757, |
|
"rewards/rejected": 0.0030477314721792936, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.694915254237288e-07, |
|
"logits/chosen": -1.5043809413909912, |
|
"logits/rejected": -1.163338303565979, |
|
"logps/chosen": -347.18408203125, |
|
"logps/rejected": -516.4083862304688, |
|
"loss": 0.618, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.5171124935150146, |
|
"rewards/margins": 0.7477121353149414, |
|
"rewards/rejected": -0.23059968650341034, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_logits/chosen": -1.4878125190734863, |
|
"eval_logits/rejected": -1.1894134283065796, |
|
"eval_logps/chosen": -412.9344482421875, |
|
"eval_logps/rejected": -560.655029296875, |
|
"eval_loss": 0.5642263293266296, |
|
"eval_rewards/accuracies": 0.7424242496490479, |
|
"eval_rewards/chosen": 0.69883131980896, |
|
"eval_rewards/margins": 0.8126964569091797, |
|
"eval_rewards/rejected": -0.11386506259441376, |
|
"eval_runtime": 556.7476, |
|
"eval_samples_per_second": 17.063, |
|
"eval_steps_per_second": 0.533, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8644067796610168e-07, |
|
"logits/chosen": -1.4966198205947876, |
|
"logits/rejected": -1.1994943618774414, |
|
"logps/chosen": -360.8127746582031, |
|
"logps/rejected": -802.7747802734375, |
|
"loss": 0.5552, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.7751646637916565, |
|
"rewards/margins": 1.0216423273086548, |
|
"rewards/rejected": -0.24647776782512665, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0338983050847458e-07, |
|
"logits/chosen": -1.5263328552246094, |
|
"logits/rejected": -1.2719924449920654, |
|
"logps/chosen": -380.39715576171875, |
|
"logps/rejected": -544.8963012695312, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 1.0511916875839233, |
|
"rewards/margins": 0.912127673625946, |
|
"rewards/rejected": 0.13906405866146088, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.2033898305084743e-07, |
|
"logits/chosen": -1.4926766157150269, |
|
"logits/rejected": -1.205890417098999, |
|
"logps/chosen": -433.191650390625, |
|
"logps/rejected": -580.9930419921875, |
|
"loss": 0.4889, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 1.003864049911499, |
|
"rewards/margins": 1.033372402191162, |
|
"rewards/rejected": -0.029508382081985474, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3728813559322033e-07, |
|
"logits/chosen": -1.4977641105651855, |
|
"logits/rejected": -1.2685011625289917, |
|
"logps/chosen": -319.09954833984375, |
|
"logps/rejected": -613.55859375, |
|
"loss": 0.4889, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.8334357142448425, |
|
"rewards/margins": 0.8497712016105652, |
|
"rewards/rejected": -0.01633552275598049, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.542372881355932e-07, |
|
"logits/chosen": -1.49599289894104, |
|
"logits/rejected": -1.2160688638687134, |
|
"logps/chosen": -361.3035583496094, |
|
"logps/rejected": -552.3671264648438, |
|
"loss": 0.4224, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.0968669652938843, |
|
"rewards/margins": 1.1340056657791138, |
|
"rewards/rejected": -0.037138573825359344, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.711864406779661e-07, |
|
"logits/chosen": -1.4980968236923218, |
|
"logits/rejected": -1.2006438970565796, |
|
"logps/chosen": -340.8217468261719, |
|
"logps/rejected": -510.5523376464844, |
|
"loss": 0.466, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.201171875, |
|
"rewards/margins": 1.245226502418518, |
|
"rewards/rejected": -0.04405476525425911, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.88135593220339e-07, |
|
"logits/chosen": -1.485167145729065, |
|
"logits/rejected": -1.1941057443618774, |
|
"logps/chosen": -447.4808654785156, |
|
"logps/rejected": -482.01336669921875, |
|
"loss": 0.3674, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.3460757732391357, |
|
"rewards/margins": 1.4081037044525146, |
|
"rewards/rejected": -0.06202799081802368, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0508474576271186e-07, |
|
"logits/chosen": -1.4714066982269287, |
|
"logits/rejected": -1.2212668657302856, |
|
"logps/chosen": -495.3789978027344, |
|
"logps/rejected": -627.9542236328125, |
|
"loss": 0.3855, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.6778628826141357, |
|
"rewards/margins": 1.837794303894043, |
|
"rewards/rejected": -0.15993157029151917, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.220338983050847e-07, |
|
"logits/chosen": -1.4885241985321045, |
|
"logits/rejected": -1.1983642578125, |
|
"logps/chosen": -357.9476623535156, |
|
"logps/rejected": -569.2054443359375, |
|
"loss": 0.3162, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 1.981848955154419, |
|
"rewards/margins": 2.1885552406311035, |
|
"rewards/rejected": -0.2067060023546219, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.389830508474576e-07, |
|
"logits/chosen": -1.4891068935394287, |
|
"logits/rejected": -1.140967607498169, |
|
"logps/chosen": -420.4295349121094, |
|
"logps/rejected": -437.64874267578125, |
|
"loss": 0.3539, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 1.6021934747695923, |
|
"rewards/margins": 1.6449912786483765, |
|
"rewards/rejected": -0.04279797524213791, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_logits/chosen": -1.4799621105194092, |
|
"eval_logits/rejected": -1.1625027656555176, |
|
"eval_logps/chosen": -400.7641296386719, |
|
"eval_logps/rejected": -562.246337890625, |
|
"eval_loss": 0.31968235969543457, |
|
"eval_rewards/accuracies": 0.8846801519393921, |
|
"eval_rewards/chosen": 1.915861964225769, |
|
"eval_rewards/margins": 2.1888532638549805, |
|
"eval_rewards/rejected": -0.27299147844314575, |
|
"eval_runtime": 558.7533, |
|
"eval_samples_per_second": 17.002, |
|
"eval_steps_per_second": 0.532, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.559322033898305e-07, |
|
"logits/chosen": -1.4898474216461182, |
|
"logits/rejected": -1.2634966373443604, |
|
"logps/chosen": -344.88134765625, |
|
"logps/rejected": -730.7076416015625, |
|
"loss": 0.3019, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 2.2538204193115234, |
|
"rewards/margins": 2.555640697479248, |
|
"rewards/rejected": -0.3018200993537903, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.7288135593220336e-07, |
|
"logits/chosen": -1.4739089012145996, |
|
"logits/rejected": -1.2359154224395752, |
|
"logps/chosen": -474.7027282714844, |
|
"logps/rejected": -482.19598388671875, |
|
"loss": 0.2677, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.262795925140381, |
|
"rewards/margins": 2.4807448387145996, |
|
"rewards/rejected": -0.21794895827770233, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.898305084745763e-07, |
|
"logits/chosen": -1.4778010845184326, |
|
"logits/rejected": -1.2181063890457153, |
|
"logps/chosen": -419.628662109375, |
|
"logps/rejected": -598.6785278320312, |
|
"loss": 0.267, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.3457484245300293, |
|
"rewards/margins": 2.492673397064209, |
|
"rewards/rejected": -0.1469249576330185, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.0677966101694916e-07, |
|
"logits/chosen": -1.4769701957702637, |
|
"logits/rejected": -1.1472581624984741, |
|
"logps/chosen": -401.890625, |
|
"logps/rejected": -701.8416748046875, |
|
"loss": 0.3046, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 2.2063190937042236, |
|
"rewards/margins": 2.501216173171997, |
|
"rewards/rejected": -0.2948969304561615, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.23728813559322e-07, |
|
"logits/chosen": -1.4752933979034424, |
|
"logits/rejected": -1.1277306079864502, |
|
"logps/chosen": -344.5944519042969, |
|
"logps/rejected": -534.5394287109375, |
|
"loss": 0.2745, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.5138680934906006, |
|
"rewards/margins": 3.0067882537841797, |
|
"rewards/rejected": -0.4929198622703552, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4067796610169486e-07, |
|
"logits/chosen": -1.4643208980560303, |
|
"logits/rejected": -1.2705574035644531, |
|
"logps/chosen": -415.1036071777344, |
|
"logps/rejected": -591.1699829101562, |
|
"loss": 0.2602, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.2184274196624756, |
|
"rewards/margins": 2.6140334606170654, |
|
"rewards/rejected": -0.395606130361557, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.576271186440678e-07, |
|
"logits/chosen": -1.4880424737930298, |
|
"logits/rejected": -1.1456931829452515, |
|
"logps/chosen": -367.011962890625, |
|
"logps/rejected": -408.5341491699219, |
|
"loss": 0.2313, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.4607489109039307, |
|
"rewards/margins": 2.9449193477630615, |
|
"rewards/rejected": -0.48417049646377563, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.7457627118644066e-07, |
|
"logits/chosen": -1.4503757953643799, |
|
"logits/rejected": -1.0820204019546509, |
|
"logps/chosen": -341.2666320800781, |
|
"logps/rejected": -390.6230163574219, |
|
"loss": 0.2483, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.5250704288482666, |
|
"rewards/margins": 3.119588851928711, |
|
"rewards/rejected": -0.59451824426651, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.915254237288136e-07, |
|
"logits/chosen": -1.4802117347717285, |
|
"logits/rejected": -1.1157623529434204, |
|
"logps/chosen": -311.9570007324219, |
|
"logps/rejected": -566.5151977539062, |
|
"loss": 0.2405, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.3611626625061035, |
|
"rewards/margins": 3.0045018196105957, |
|
"rewards/rejected": -0.6433390378952026, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.990555345674349e-07, |
|
"logits/chosen": -1.483705997467041, |
|
"logits/rejected": -1.069526195526123, |
|
"logps/chosen": -341.3657531738281, |
|
"logps/rejected": -632.6183471679688, |
|
"loss": 0.2287, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.7662487030029297, |
|
"rewards/margins": 3.5426669120788574, |
|
"rewards/rejected": -0.7764180302619934, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -1.464929223060608, |
|
"eval_logits/rejected": -1.1360561847686768, |
|
"eval_logps/chosen": -391.8653564453125, |
|
"eval_logps/rejected": -565.0551147460938, |
|
"eval_loss": 0.2127748280763626, |
|
"eval_rewards/accuracies": 0.9200336933135986, |
|
"eval_rewards/chosen": 2.805741310119629, |
|
"eval_rewards/margins": 3.3596181869506836, |
|
"eval_rewards/rejected": -0.5538769960403442, |
|
"eval_runtime": 557.7091, |
|
"eval_samples_per_second": 17.034, |
|
"eval_steps_per_second": 0.533, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.971666037023044e-07, |
|
"logits/chosen": -1.478566288948059, |
|
"logits/rejected": -1.18798828125, |
|
"logps/chosen": -352.68511962890625, |
|
"logps/rejected": -547.9373168945312, |
|
"loss": 0.2133, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.948876142501831, |
|
"rewards/margins": 3.459970474243164, |
|
"rewards/rejected": -0.5110937356948853, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.952776728371742e-07, |
|
"logits/chosen": -1.441450834274292, |
|
"logits/rejected": -1.1746580600738525, |
|
"logps/chosen": -548.0260009765625, |
|
"logps/rejected": -451.2164611816406, |
|
"loss": 0.2126, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 3.0513620376586914, |
|
"rewards/margins": 3.686058759689331, |
|
"rewards/rejected": -0.6346968412399292, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.933887419720438e-07, |
|
"logits/chosen": -1.4612153768539429, |
|
"logits/rejected": -1.1157002449035645, |
|
"logps/chosen": -409.2095642089844, |
|
"logps/rejected": -628.3384399414062, |
|
"loss": 0.2312, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 3.231168031692505, |
|
"rewards/margins": 4.0572967529296875, |
|
"rewards/rejected": -0.826129138469696, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.914998111069135e-07, |
|
"logits/chosen": -1.471062183380127, |
|
"logits/rejected": -1.1619117259979248, |
|
"logps/chosen": -319.6500549316406, |
|
"logps/rejected": -560.5769653320312, |
|
"loss": 0.1976, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 3.2418594360351562, |
|
"rewards/margins": 3.9750003814697266, |
|
"rewards/rejected": -0.733141303062439, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.896108802417831e-07, |
|
"logits/chosen": -1.4680635929107666, |
|
"logits/rejected": -1.2122979164123535, |
|
"logps/chosen": -383.1250305175781, |
|
"logps/rejected": -619.9765625, |
|
"loss": 0.2053, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 3.1170341968536377, |
|
"rewards/margins": 3.632521152496338, |
|
"rewards/rejected": -0.5154868364334106, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.877219493766528e-07, |
|
"logits/chosen": -1.480360746383667, |
|
"logits/rejected": -1.1829755306243896, |
|
"logps/chosen": -315.16925048828125, |
|
"logps/rejected": -440.88409423828125, |
|
"loss": 0.1615, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 3.1895642280578613, |
|
"rewards/margins": 4.29224967956543, |
|
"rewards/rejected": -1.1026861667633057, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.858330185115224e-07, |
|
"logits/chosen": -1.4765106439590454, |
|
"logits/rejected": -1.180673360824585, |
|
"logps/chosen": -322.9911193847656, |
|
"logps/rejected": -827.7867431640625, |
|
"loss": 0.1879, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 3.0941760540008545, |
|
"rewards/margins": 3.847609043121338, |
|
"rewards/rejected": -0.7534326910972595, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.839440876463921e-07, |
|
"logits/chosen": -1.4910002946853638, |
|
"logits/rejected": -1.1475781202316284, |
|
"logps/chosen": -359.06103515625, |
|
"logps/rejected": -703.1707153320312, |
|
"loss": 0.1842, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 3.6445469856262207, |
|
"rewards/margins": 4.668353080749512, |
|
"rewards/rejected": -1.023805856704712, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.820551567812618e-07, |
|
"logits/chosen": -1.4512460231781006, |
|
"logits/rejected": -1.1814398765563965, |
|
"logps/chosen": -393.1922302246094, |
|
"logps/rejected": -443.9873962402344, |
|
"loss": 0.1744, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 3.31535267829895, |
|
"rewards/margins": 3.9874045848846436, |
|
"rewards/rejected": -0.6720519065856934, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.801662259161314e-07, |
|
"logits/chosen": -1.4928423166275024, |
|
"logits/rejected": -1.0846529006958008, |
|
"logps/chosen": -340.31768798828125, |
|
"logps/rejected": -675.9088745117188, |
|
"loss": 0.158, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 4.019392967224121, |
|
"rewards/margins": 5.392711639404297, |
|
"rewards/rejected": -1.3733187913894653, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_logits/chosen": -1.462174892425537, |
|
"eval_logits/rejected": -1.1299500465393066, |
|
"eval_logps/chosen": -385.3669738769531, |
|
"eval_logps/rejected": -569.8557739257812, |
|
"eval_loss": 0.1673159897327423, |
|
"eval_rewards/accuracies": 0.932659924030304, |
|
"eval_rewards/chosen": 3.4555790424346924, |
|
"eval_rewards/margins": 4.489521503448486, |
|
"eval_rewards/rejected": -1.0339421033859253, |
|
"eval_runtime": 557.8843, |
|
"eval_samples_per_second": 17.029, |
|
"eval_steps_per_second": 0.532, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.782772950510011e-07, |
|
"logits/chosen": -1.4920897483825684, |
|
"logits/rejected": -1.212968111038208, |
|
"logps/chosen": -316.6252746582031, |
|
"logps/rejected": -558.7969360351562, |
|
"loss": 0.165, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 3.692905902862549, |
|
"rewards/margins": 4.354551315307617, |
|
"rewards/rejected": -0.6616458296775818, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7638836418587073e-07, |
|
"logits/chosen": -1.468379259109497, |
|
"logits/rejected": -1.1990084648132324, |
|
"logps/chosen": -325.2828369140625, |
|
"logps/rejected": -746.1368408203125, |
|
"loss": 0.1803, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.6396260261535645, |
|
"rewards/margins": 4.8296284675598145, |
|
"rewards/rejected": -1.19000244140625, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7449943332074044e-07, |
|
"logits/chosen": -1.4748234748840332, |
|
"logits/rejected": -1.181004285812378, |
|
"logps/chosen": -308.9472961425781, |
|
"logps/rejected": -665.6039428710938, |
|
"loss": 0.1592, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 3.706333875656128, |
|
"rewards/margins": 4.874017715454102, |
|
"rewards/rejected": -1.1676843166351318, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7261050245561014e-07, |
|
"logits/chosen": -1.4771819114685059, |
|
"logits/rejected": -1.1283105611801147, |
|
"logps/chosen": -328.56915283203125, |
|
"logps/rejected": -495.9109802246094, |
|
"loss": 0.1475, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 3.4302115440368652, |
|
"rewards/margins": 4.693282127380371, |
|
"rewards/rejected": -1.2630702257156372, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7072157159047975e-07, |
|
"logits/chosen": -1.4672292470932007, |
|
"logits/rejected": -1.0770254135131836, |
|
"logps/chosen": -366.31182861328125, |
|
"logps/rejected": -418.83758544921875, |
|
"loss": 0.1714, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 3.2115979194641113, |
|
"rewards/margins": 4.826067924499512, |
|
"rewards/rejected": -1.6144702434539795, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.6883264072534946e-07, |
|
"logits/chosen": -1.4420884847640991, |
|
"logits/rejected": -1.096064567565918, |
|
"logps/chosen": -405.8441467285156, |
|
"logps/rejected": -466.4434509277344, |
|
"loss": 0.1394, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 3.97516131401062, |
|
"rewards/margins": 5.2190327644348145, |
|
"rewards/rejected": -1.2438714504241943, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.6694370986021906e-07, |
|
"logits/chosen": -1.4477102756500244, |
|
"logits/rejected": -1.1750242710113525, |
|
"logps/chosen": -496.8306579589844, |
|
"logps/rejected": -317.3291015625, |
|
"loss": 0.1444, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 4.208278656005859, |
|
"rewards/margins": 5.423037052154541, |
|
"rewards/rejected": -1.2147585153579712, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.6505477899508877e-07, |
|
"logits/chosen": -1.4701149463653564, |
|
"logits/rejected": -1.249976396560669, |
|
"logps/chosen": -317.600830078125, |
|
"logps/rejected": -649.2884521484375, |
|
"loss": 0.1347, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 3.9683494567871094, |
|
"rewards/margins": 5.167794227600098, |
|
"rewards/rejected": -1.199444055557251, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.631658481299584e-07, |
|
"logits/chosen": -1.4915847778320312, |
|
"logits/rejected": -1.1582549810409546, |
|
"logps/chosen": -343.9900207519531, |
|
"logps/rejected": -545.9863891601562, |
|
"loss": 0.1168, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.775851726531982, |
|
"rewards/margins": 6.124849796295166, |
|
"rewards/rejected": -1.3489978313446045, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.612769172648281e-07, |
|
"logits/chosen": -1.4516441822052002, |
|
"logits/rejected": -1.1763832569122314, |
|
"logps/chosen": -479.01776123046875, |
|
"logps/rejected": -355.7424011230469, |
|
"loss": 0.1599, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 4.103041172027588, |
|
"rewards/margins": 5.349932670593262, |
|
"rewards/rejected": -1.2468923330307007, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_logits/chosen": -1.4607428312301636, |
|
"eval_logits/rejected": -1.1274610757827759, |
|
"eval_logps/chosen": -382.4375915527344, |
|
"eval_logps/rejected": -572.8546142578125, |
|
"eval_loss": 0.13974203169345856, |
|
"eval_rewards/accuracies": 0.9461279511451721, |
|
"eval_rewards/chosen": 3.7485170364379883, |
|
"eval_rewards/margins": 5.082335472106934, |
|
"eval_rewards/rejected": -1.3338183164596558, |
|
"eval_runtime": 557.5578, |
|
"eval_samples_per_second": 17.039, |
|
"eval_steps_per_second": 0.533, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.5938798639969773e-07, |
|
"logits/chosen": -1.4614421129226685, |
|
"logits/rejected": -1.1661673784255981, |
|
"logps/chosen": -432.869384765625, |
|
"logps/rejected": -702.9627685546875, |
|
"loss": 0.1517, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 3.9662163257598877, |
|
"rewards/margins": 5.194244384765625, |
|
"rewards/rejected": -1.228027105331421, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.574990555345674e-07, |
|
"logits/chosen": -1.4722058773040771, |
|
"logits/rejected": -1.1686432361602783, |
|
"logps/chosen": -335.47344970703125, |
|
"logps/rejected": -595.4827880859375, |
|
"loss": 0.1268, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 3.8043179512023926, |
|
"rewards/margins": 5.093755722045898, |
|
"rewards/rejected": -1.2894370555877686, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.556101246694371e-07, |
|
"logits/chosen": -1.4674203395843506, |
|
"logits/rejected": -1.1525086164474487, |
|
"logps/chosen": -329.02264404296875, |
|
"logps/rejected": -652.1644287109375, |
|
"loss": 0.1353, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.099880218505859, |
|
"rewards/margins": 5.421080589294434, |
|
"rewards/rejected": -1.3212003707885742, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5372119380430675e-07, |
|
"logits/chosen": -1.4733096361160278, |
|
"logits/rejected": -1.134479284286499, |
|
"logps/chosen": -315.3797912597656, |
|
"logps/rejected": -457.8125915527344, |
|
"loss": 0.1457, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.9675514698028564, |
|
"rewards/margins": 5.392933368682861, |
|
"rewards/rejected": -1.4253814220428467, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.518322629391764e-07, |
|
"logits/chosen": -1.465785264968872, |
|
"logits/rejected": -1.1767680644989014, |
|
"logps/chosen": -327.9288635253906, |
|
"logps/rejected": -518.0633544921875, |
|
"loss": 0.1361, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 3.936115264892578, |
|
"rewards/margins": 5.388223171234131, |
|
"rewards/rejected": -1.45210862159729, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.4994333207404607e-07, |
|
"logits/chosen": -1.4351527690887451, |
|
"logits/rejected": -1.1543748378753662, |
|
"logps/chosen": -454.43292236328125, |
|
"logps/rejected": -475.8153381347656, |
|
"loss": 0.1295, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 3.992440700531006, |
|
"rewards/margins": 5.424699306488037, |
|
"rewards/rejected": -1.4322583675384521, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.480544012089157e-07, |
|
"logits/chosen": -1.4429913759231567, |
|
"logits/rejected": -1.201302409172058, |
|
"logps/chosen": -496.41790771484375, |
|
"logps/rejected": -365.816650390625, |
|
"loss": 0.1039, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.107169151306152, |
|
"rewards/margins": 5.378964424133301, |
|
"rewards/rejected": -1.2717949151992798, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.461654703437854e-07, |
|
"logits/chosen": -1.4437055587768555, |
|
"logits/rejected": -1.1555430889129639, |
|
"logps/chosen": -460.52252197265625, |
|
"logps/rejected": -543.8465576171875, |
|
"loss": 0.1497, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.182587623596191, |
|
"rewards/margins": 5.458142280578613, |
|
"rewards/rejected": -1.2755542993545532, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.442765394786551e-07, |
|
"logits/chosen": -1.4703487157821655, |
|
"logits/rejected": -1.1514756679534912, |
|
"logps/chosen": -335.76141357421875, |
|
"logps/rejected": -425.265380859375, |
|
"loss": 0.1179, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 3.938772678375244, |
|
"rewards/margins": 5.324645042419434, |
|
"rewards/rejected": -1.3858733177185059, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.423876086135247e-07, |
|
"logits/chosen": -1.4418364763259888, |
|
"logits/rejected": -1.1505249738693237, |
|
"logps/chosen": -448.2904357910156, |
|
"logps/rejected": -721.1622314453125, |
|
"loss": 0.1389, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 3.9244837760925293, |
|
"rewards/margins": 5.320973873138428, |
|
"rewards/rejected": -1.3964899778366089, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_logits/chosen": -1.4519003629684448, |
|
"eval_logits/rejected": -1.1194298267364502, |
|
"eval_logps/chosen": -380.6632995605469, |
|
"eval_logps/rejected": -574.6277465820312, |
|
"eval_loss": 0.12727472186088562, |
|
"eval_rewards/accuracies": 0.9528619647026062, |
|
"eval_rewards/chosen": 3.9259443283081055, |
|
"eval_rewards/margins": 5.437079906463623, |
|
"eval_rewards/rejected": -1.5111361742019653, |
|
"eval_runtime": 557.5407, |
|
"eval_samples_per_second": 17.039, |
|
"eval_steps_per_second": 0.533, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.404986777483944e-07, |
|
"logits/chosen": -1.462982416152954, |
|
"logits/rejected": -1.1688556671142578, |
|
"logps/chosen": -368.68914794921875, |
|
"logps/rejected": -497.86566162109375, |
|
"loss": 0.1364, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 3.3148436546325684, |
|
"rewards/margins": 4.890419006347656, |
|
"rewards/rejected": -1.5755746364593506, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3860974688326405e-07, |
|
"logits/chosen": -1.4497849941253662, |
|
"logits/rejected": -1.1572027206420898, |
|
"logps/chosen": -465.70391845703125, |
|
"logps/rejected": -631.2728881835938, |
|
"loss": 0.1163, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 4.193670272827148, |
|
"rewards/margins": 5.658702850341797, |
|
"rewards/rejected": -1.465032935142517, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.367208160181337e-07, |
|
"logits/chosen": -1.4319039583206177, |
|
"logits/rejected": -1.1632667779922485, |
|
"logps/chosen": -421.3758239746094, |
|
"logps/rejected": -333.9304504394531, |
|
"loss": 0.1224, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 3.695667266845703, |
|
"rewards/margins": 5.386081218719482, |
|
"rewards/rejected": -1.6904138326644897, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.348318851530034e-07, |
|
"logits/chosen": -1.4637318849563599, |
|
"logits/rejected": -1.095100998878479, |
|
"logps/chosen": -396.5002136230469, |
|
"logps/rejected": -622.4762573242188, |
|
"loss": 0.09, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.092984199523926, |
|
"rewards/margins": 6.124913215637207, |
|
"rewards/rejected": -2.0319290161132812, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.32942954287873e-07, |
|
"logits/chosen": -1.4509179592132568, |
|
"logits/rejected": -1.1328189373016357, |
|
"logps/chosen": -371.68572998046875, |
|
"logps/rejected": -406.5970458984375, |
|
"loss": 0.1163, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 4.031527996063232, |
|
"rewards/margins": 5.5390424728393555, |
|
"rewards/rejected": -1.5075138807296753, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.3105402342274273e-07, |
|
"logits/chosen": -1.4884783029556274, |
|
"logits/rejected": -1.1401994228363037, |
|
"logps/chosen": -321.93023681640625, |
|
"logps/rejected": -570.5530395507812, |
|
"loss": 0.1097, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 3.893491744995117, |
|
"rewards/margins": 5.886297702789307, |
|
"rewards/rejected": -1.9928067922592163, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2916509255761233e-07, |
|
"logits/chosen": -1.45878005027771, |
|
"logits/rejected": -1.0974493026733398, |
|
"logps/chosen": -378.56256103515625, |
|
"logps/rejected": -671.4094848632812, |
|
"loss": 0.1419, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 3.538317918777466, |
|
"rewards/margins": 5.423024654388428, |
|
"rewards/rejected": -1.8847074508666992, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2727616169248204e-07, |
|
"logits/chosen": -1.4535772800445557, |
|
"logits/rejected": -1.0777978897094727, |
|
"logps/chosen": -347.06304931640625, |
|
"logps/rejected": -607.7723999023438, |
|
"loss": 0.1015, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.180381774902344, |
|
"rewards/margins": 6.305299282073975, |
|
"rewards/rejected": -2.12491774559021, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.253872308273517e-07, |
|
"logits/chosen": -1.4577261209487915, |
|
"logits/rejected": -1.1172749996185303, |
|
"logps/chosen": -428.004150390625, |
|
"logps/rejected": -609.6082763671875, |
|
"loss": 0.0902, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.4771833419799805, |
|
"rewards/margins": 6.247294902801514, |
|
"rewards/rejected": -1.770111083984375, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2349829996222135e-07, |
|
"logits/chosen": -1.459695816040039, |
|
"logits/rejected": -1.1408016681671143, |
|
"logps/chosen": -375.980224609375, |
|
"logps/rejected": -637.662109375, |
|
"loss": 0.0778, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.746489524841309, |
|
"rewards/margins": 6.767748832702637, |
|
"rewards/rejected": -2.0212595462799072, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_logits/chosen": -1.4541884660720825, |
|
"eval_logits/rejected": -1.1302434206008911, |
|
"eval_logps/chosen": -379.2232971191406, |
|
"eval_logps/rejected": -578.0139770507812, |
|
"eval_loss": 0.1122458353638649, |
|
"eval_rewards/accuracies": 0.9612794518470764, |
|
"eval_rewards/chosen": 4.069947719573975, |
|
"eval_rewards/margins": 5.919719219207764, |
|
"eval_rewards/rejected": -1.84977126121521, |
|
"eval_runtime": 559.0869, |
|
"eval_samples_per_second": 16.992, |
|
"eval_steps_per_second": 0.531, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2160936909709106e-07, |
|
"logits/chosen": -1.4686052799224854, |
|
"logits/rejected": -1.17806077003479, |
|
"logps/chosen": -370.8101806640625, |
|
"logps/rejected": -445.7115783691406, |
|
"loss": 0.1139, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.275851249694824, |
|
"rewards/margins": 5.940474033355713, |
|
"rewards/rejected": -1.6646230220794678, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.1972043823196066e-07, |
|
"logits/chosen": -1.4710712432861328, |
|
"logits/rejected": -1.2118116617202759, |
|
"logps/chosen": -349.842041015625, |
|
"logps/rejected": -497.0186462402344, |
|
"loss": 0.1405, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 3.897695541381836, |
|
"rewards/margins": 6.115738391876221, |
|
"rewards/rejected": -2.2180426120758057, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1783150736683037e-07, |
|
"logits/chosen": -1.4504650831222534, |
|
"logits/rejected": -1.1481643915176392, |
|
"logps/chosen": -421.00927734375, |
|
"logps/rejected": -541.6673583984375, |
|
"loss": 0.1052, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.073805809020996, |
|
"rewards/margins": 5.906230926513672, |
|
"rewards/rejected": -1.8324254751205444, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1594257650170003e-07, |
|
"logits/chosen": -1.475731611251831, |
|
"logits/rejected": -1.1985948085784912, |
|
"logps/chosen": -405.5483703613281, |
|
"logps/rejected": -488.00091552734375, |
|
"loss": 0.1015, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 3.7700603008270264, |
|
"rewards/margins": 5.521186351776123, |
|
"rewards/rejected": -1.7511262893676758, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.140536456365697e-07, |
|
"logits/chosen": -1.4649735689163208, |
|
"logits/rejected": -1.115206241607666, |
|
"logps/chosen": -314.864501953125, |
|
"logps/rejected": -583.0494995117188, |
|
"loss": 0.0764, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.480077266693115, |
|
"rewards/margins": 6.317180633544922, |
|
"rewards/rejected": -1.837104082107544, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1216471477143934e-07, |
|
"logits/chosen": -1.4586890935897827, |
|
"logits/rejected": -1.1937768459320068, |
|
"logps/chosen": -318.05615234375, |
|
"logps/rejected": -491.633056640625, |
|
"loss": 0.1225, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 3.9885802268981934, |
|
"rewards/margins": 5.613485813140869, |
|
"rewards/rejected": -1.6249048709869385, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.10275783906309e-07, |
|
"logits/chosen": -1.474686861038208, |
|
"logits/rejected": -1.2240248918533325, |
|
"logps/chosen": -448.8330993652344, |
|
"logps/rejected": -574.8685302734375, |
|
"loss": 0.079, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.1468000411987305, |
|
"rewards/margins": 6.423197269439697, |
|
"rewards/rejected": -2.276397228240967, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0838685304117865e-07, |
|
"logits/chosen": -1.4716846942901611, |
|
"logits/rejected": -1.1474727392196655, |
|
"logps/chosen": -458.2037658691406, |
|
"logps/rejected": -677.0916137695312, |
|
"loss": 0.103, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.230536460876465, |
|
"rewards/margins": 6.255263805389404, |
|
"rewards/rejected": -2.0247273445129395, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0649792217604836e-07, |
|
"logits/chosen": -1.4893128871917725, |
|
"logits/rejected": -1.1543127298355103, |
|
"logps/chosen": -368.73876953125, |
|
"logps/rejected": -385.9571533203125, |
|
"loss": 0.0759, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.367506504058838, |
|
"rewards/margins": 6.192745208740234, |
|
"rewards/rejected": -1.825238823890686, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.04608991310918e-07, |
|
"logits/chosen": -1.4659887552261353, |
|
"logits/rejected": -1.2101812362670898, |
|
"logps/chosen": -387.55120849609375, |
|
"logps/rejected": -527.9453735351562, |
|
"loss": 0.0993, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.042902946472168, |
|
"rewards/margins": 5.99516487121582, |
|
"rewards/rejected": -1.9522621631622314, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_logits/chosen": -1.46894109249115, |
|
"eval_logits/rejected": -1.142426609992981, |
|
"eval_logps/chosen": -377.5001220703125, |
|
"eval_logps/rejected": -579.4506225585938, |
|
"eval_loss": 0.09749113768339157, |
|
"eval_rewards/accuracies": 0.9663299918174744, |
|
"eval_rewards/chosen": 4.2422590255737305, |
|
"eval_rewards/margins": 6.235683441162109, |
|
"eval_rewards/rejected": -1.9934238195419312, |
|
"eval_runtime": 558.57, |
|
"eval_samples_per_second": 17.008, |
|
"eval_steps_per_second": 0.532, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.0272006044578767e-07, |
|
"logits/chosen": -1.4774185419082642, |
|
"logits/rejected": -1.185450553894043, |
|
"logps/chosen": -389.61981201171875, |
|
"logps/rejected": -652.4323120117188, |
|
"loss": 0.1036, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.061360836029053, |
|
"rewards/margins": 6.323044776916504, |
|
"rewards/rejected": -2.261683940887451, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.0083112958065733e-07, |
|
"logits/chosen": -1.4516403675079346, |
|
"logits/rejected": -1.1874592304229736, |
|
"logps/chosen": -475.3050231933594, |
|
"logps/rejected": -444.3898010253906, |
|
"loss": 0.1065, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 3.8749263286590576, |
|
"rewards/margins": 5.786238670349121, |
|
"rewards/rejected": -1.9113123416900635, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.98942198715527e-07, |
|
"logits/chosen": -1.4862116575241089, |
|
"logits/rejected": -1.2050374746322632, |
|
"logps/chosen": -303.4962463378906, |
|
"logps/rejected": -611.756591796875, |
|
"loss": 0.1023, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.315167427062988, |
|
"rewards/margins": 6.290981769561768, |
|
"rewards/rejected": -1.9758144617080688, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.970532678503967e-07, |
|
"logits/chosen": -1.4739606380462646, |
|
"logits/rejected": -1.2146607637405396, |
|
"logps/chosen": -395.7440490722656, |
|
"logps/rejected": -519.9666748046875, |
|
"loss": 0.0939, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.144864559173584, |
|
"rewards/margins": 7.278559684753418, |
|
"rewards/rejected": -2.133694648742676, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.951643369852663e-07, |
|
"logits/chosen": -1.476678490638733, |
|
"logits/rejected": -1.206061601638794, |
|
"logps/chosen": -404.0805358886719, |
|
"logps/rejected": -790.9165649414062, |
|
"loss": 0.0917, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 4.473787307739258, |
|
"rewards/margins": 6.4759931564331055, |
|
"rewards/rejected": -2.002206325531006, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.93275406120136e-07, |
|
"logits/chosen": -1.471995234489441, |
|
"logits/rejected": -1.2011922597885132, |
|
"logps/chosen": -373.6271057128906, |
|
"logps/rejected": -589.4290771484375, |
|
"loss": 0.0896, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.3988142013549805, |
|
"rewards/margins": 6.434880256652832, |
|
"rewards/rejected": -2.036065101623535, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.913864752550056e-07, |
|
"logits/chosen": -1.4729435443878174, |
|
"logits/rejected": -1.2467955350875854, |
|
"logps/chosen": -479.82470703125, |
|
"logps/rejected": -649.1353759765625, |
|
"loss": 0.1121, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 5.313334941864014, |
|
"rewards/margins": 7.132607936859131, |
|
"rewards/rejected": -1.8192729949951172, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.894975443898753e-07, |
|
"logits/chosen": -1.4922538995742798, |
|
"logits/rejected": -1.1949760913848877, |
|
"logps/chosen": -295.2701721191406, |
|
"logps/rejected": -510.95001220703125, |
|
"loss": 0.0773, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.697510719299316, |
|
"rewards/margins": 6.993855953216553, |
|
"rewards/rejected": -2.2963459491729736, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.87608613524745e-07, |
|
"logits/chosen": -1.4829437732696533, |
|
"logits/rejected": -1.2089799642562866, |
|
"logps/chosen": -367.78387451171875, |
|
"logps/rejected": -401.001953125, |
|
"loss": 0.1007, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.9919090270996094, |
|
"rewards/margins": 5.9659037590026855, |
|
"rewards/rejected": -1.973995566368103, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.857196826596146e-07, |
|
"logits/chosen": -1.5004401206970215, |
|
"logits/rejected": -1.0854889154434204, |
|
"logps/chosen": -337.4493408203125, |
|
"logps/rejected": -477.30963134765625, |
|
"loss": 0.111, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 4.4730706214904785, |
|
"rewards/margins": 6.716238498687744, |
|
"rewards/rejected": -2.2431674003601074, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -1.4820351600646973, |
|
"eval_logits/rejected": -1.1542390584945679, |
|
"eval_logps/chosen": -376.70477294921875, |
|
"eval_logps/rejected": -582.0501098632812, |
|
"eval_loss": 0.09071440994739532, |
|
"eval_rewards/accuracies": 0.9696969985961914, |
|
"eval_rewards/chosen": 4.32179594039917, |
|
"eval_rewards/margins": 6.575175762176514, |
|
"eval_rewards/rejected": -2.253380537033081, |
|
"eval_runtime": 559.8508, |
|
"eval_samples_per_second": 16.969, |
|
"eval_steps_per_second": 0.53, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8383075179448433e-07, |
|
"logits/chosen": -1.48300302028656, |
|
"logits/rejected": -1.1969270706176758, |
|
"logps/chosen": -444.98114013671875, |
|
"logps/rejected": -399.57403564453125, |
|
"loss": 0.0892, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.185998439788818, |
|
"rewards/margins": 6.078363418579102, |
|
"rewards/rejected": -1.8923648595809937, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8194182092935394e-07, |
|
"logits/chosen": -1.4898041486740112, |
|
"logits/rejected": -1.1662390232086182, |
|
"logps/chosen": -322.0852355957031, |
|
"logps/rejected": -505.6220703125, |
|
"loss": 0.0793, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.525553226470947, |
|
"rewards/margins": 6.857700347900391, |
|
"rewards/rejected": -2.3321471214294434, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.8005289006422365e-07, |
|
"logits/chosen": -1.4851583242416382, |
|
"logits/rejected": -1.1979453563690186, |
|
"logps/chosen": -358.98101806640625, |
|
"logps/rejected": -621.0003051757812, |
|
"loss": 0.0882, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.939785957336426, |
|
"rewards/margins": 7.3684186935424805, |
|
"rewards/rejected": -2.4286324977874756, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7816395919909325e-07, |
|
"logits/chosen": -1.49127197265625, |
|
"logits/rejected": -1.239793062210083, |
|
"logps/chosen": -314.9703674316406, |
|
"logps/rejected": -568.3617553710938, |
|
"loss": 0.0664, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.589601993560791, |
|
"rewards/margins": 6.650811672210693, |
|
"rewards/rejected": -2.0612106323242188, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7627502833396296e-07, |
|
"logits/chosen": -1.4954484701156616, |
|
"logits/rejected": -1.247184157371521, |
|
"logps/chosen": -381.68499755859375, |
|
"logps/rejected": -518.2406005859375, |
|
"loss": 0.0898, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.539010047912598, |
|
"rewards/margins": 6.654293060302734, |
|
"rewards/rejected": -2.115283489227295, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.7438609746883267e-07, |
|
"logits/chosen": -1.475711464881897, |
|
"logits/rejected": -1.2076390981674194, |
|
"logps/chosen": -448.14556884765625, |
|
"logps/rejected": -554.5950927734375, |
|
"loss": 0.0742, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 5.037951946258545, |
|
"rewards/margins": 7.1611647605896, |
|
"rewards/rejected": -2.1232128143310547, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.7249716660370227e-07, |
|
"logits/chosen": -1.4880720376968384, |
|
"logits/rejected": -1.206027865409851, |
|
"logps/chosen": -303.1774597167969, |
|
"logps/rejected": -724.1024169921875, |
|
"loss": 0.0858, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 4.292189121246338, |
|
"rewards/margins": 6.413214683532715, |
|
"rewards/rejected": -2.1210262775421143, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.70608235738572e-07, |
|
"logits/chosen": -1.4684410095214844, |
|
"logits/rejected": -1.15514075756073, |
|
"logps/chosen": -447.8116760253906, |
|
"logps/rejected": -666.262939453125, |
|
"loss": 0.1078, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 4.545645236968994, |
|
"rewards/margins": 7.141517639160156, |
|
"rewards/rejected": -2.595871925354004, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.687193048734416e-07, |
|
"logits/chosen": -1.4797093868255615, |
|
"logits/rejected": -1.1443145275115967, |
|
"logps/chosen": -302.0823059082031, |
|
"logps/rejected": -653.0443115234375, |
|
"loss": 0.0951, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.940204620361328, |
|
"rewards/margins": 7.737614631652832, |
|
"rewards/rejected": -2.797410011291504, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.668303740083113e-07, |
|
"logits/chosen": -1.483666181564331, |
|
"logits/rejected": -1.183774709701538, |
|
"logps/chosen": -401.68359375, |
|
"logps/rejected": -488.19451904296875, |
|
"loss": 0.0893, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 4.864621639251709, |
|
"rewards/margins": 7.051810264587402, |
|
"rewards/rejected": -2.187187671661377, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_logits/chosen": -1.469427227973938, |
|
"eval_logits/rejected": -1.14968740940094, |
|
"eval_logps/chosen": -376.04510498046875, |
|
"eval_logps/rejected": -582.1046752929688, |
|
"eval_loss": 0.0881563276052475, |
|
"eval_rewards/accuracies": 0.9663299918174744, |
|
"eval_rewards/chosen": 4.387765407562256, |
|
"eval_rewards/margins": 6.646595001220703, |
|
"eval_rewards/rejected": -2.2588300704956055, |
|
"eval_runtime": 559.0589, |
|
"eval_samples_per_second": 16.993, |
|
"eval_steps_per_second": 0.531, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.6494144314318094e-07, |
|
"logits/chosen": -1.4677711725234985, |
|
"logits/rejected": -1.22615647315979, |
|
"logps/chosen": -408.8759765625, |
|
"logps/rejected": -469.7860412597656, |
|
"loss": 0.1257, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.661646842956543, |
|
"rewards/margins": 6.4328932762146, |
|
"rewards/rejected": -1.7712465524673462, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.630525122780506e-07, |
|
"logits/chosen": -1.497859239578247, |
|
"logits/rejected": -1.1968727111816406, |
|
"logps/chosen": -288.6357116699219, |
|
"logps/rejected": -523.6803588867188, |
|
"loss": 0.103, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.572832107543945, |
|
"rewards/margins": 6.243821620941162, |
|
"rewards/rejected": -1.670989990234375, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.6116358141292026e-07, |
|
"logits/chosen": -1.4879519939422607, |
|
"logits/rejected": -1.2336044311523438, |
|
"logps/chosen": -303.6993103027344, |
|
"logps/rejected": -508.20123291015625, |
|
"loss": 0.128, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 4.822319030761719, |
|
"rewards/margins": 6.814971923828125, |
|
"rewards/rejected": -1.9926522970199585, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.592746505477899e-07, |
|
"logits/chosen": -1.4755656719207764, |
|
"logits/rejected": -1.2433079481124878, |
|
"logps/chosen": -495.46337890625, |
|
"logps/rejected": -697.58740234375, |
|
"loss": 0.0889, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.0532355308532715, |
|
"rewards/margins": 6.689316749572754, |
|
"rewards/rejected": -2.636080503463745, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.573857196826596e-07, |
|
"logits/chosen": -1.512407660484314, |
|
"logits/rejected": -1.203151822090149, |
|
"logps/chosen": -480.6717224121094, |
|
"logps/rejected": -505.75830078125, |
|
"loss": 0.0971, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.397000312805176, |
|
"rewards/margins": 6.988126277923584, |
|
"rewards/rejected": -2.591125726699829, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.554967888175293e-07, |
|
"logits/chosen": -1.5000110864639282, |
|
"logits/rejected": -1.1538686752319336, |
|
"logps/chosen": -380.41741943359375, |
|
"logps/rejected": -551.3828735351562, |
|
"loss": 0.1111, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.297232151031494, |
|
"rewards/margins": 6.739757537841797, |
|
"rewards/rejected": -2.4425251483917236, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5360785795239893e-07, |
|
"logits/chosen": -1.4952175617218018, |
|
"logits/rejected": -1.2412437200546265, |
|
"logps/chosen": -433.78424072265625, |
|
"logps/rejected": -458.8251953125, |
|
"loss": 0.0803, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.398755073547363, |
|
"rewards/margins": 6.876921653747559, |
|
"rewards/rejected": -2.478165864944458, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.517189270872686e-07, |
|
"logits/chosen": -1.503846526145935, |
|
"logits/rejected": -1.2310945987701416, |
|
"logps/chosen": -378.23150634765625, |
|
"logps/rejected": -460.6910705566406, |
|
"loss": 0.0755, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.319530487060547, |
|
"rewards/margins": 6.819169521331787, |
|
"rewards/rejected": -2.4996395111083984, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.4982999622213824e-07, |
|
"logits/chosen": -1.489429235458374, |
|
"logits/rejected": -1.194059133529663, |
|
"logps/chosen": -378.36224365234375, |
|
"logps/rejected": -339.7525329589844, |
|
"loss": 0.0693, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.176763534545898, |
|
"rewards/margins": 6.276023864746094, |
|
"rewards/rejected": -2.099259853363037, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.479410653570079e-07, |
|
"logits/chosen": -1.4815757274627686, |
|
"logits/rejected": -1.1958659887313843, |
|
"logps/chosen": -382.04718017578125, |
|
"logps/rejected": -590.7028198242188, |
|
"loss": 0.079, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.512447834014893, |
|
"rewards/margins": 7.3736677169799805, |
|
"rewards/rejected": -2.861220121383667, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": -1.4807450771331787, |
|
"eval_logits/rejected": -1.153213620185852, |
|
"eval_logps/chosen": -375.21636962890625, |
|
"eval_logps/rejected": -582.6480712890625, |
|
"eval_loss": 0.08400283753871918, |
|
"eval_rewards/accuracies": 0.9688552021980286, |
|
"eval_rewards/chosen": 4.470638751983643, |
|
"eval_rewards/margins": 6.783812046051025, |
|
"eval_rewards/rejected": -2.313173294067383, |
|
"eval_runtime": 560.0503, |
|
"eval_samples_per_second": 16.963, |
|
"eval_steps_per_second": 0.53, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.460521344918776e-07, |
|
"logits/chosen": -1.5088837146759033, |
|
"logits/rejected": -1.1168277263641357, |
|
"logps/chosen": -275.2812194824219, |
|
"logps/rejected": -645.93701171875, |
|
"loss": 0.102, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 4.146356105804443, |
|
"rewards/margins": 6.36793327331543, |
|
"rewards/rejected": -2.2215771675109863, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.441632036267472e-07, |
|
"logits/chosen": -1.50923752784729, |
|
"logits/rejected": -1.1946974992752075, |
|
"logps/chosen": -330.641357421875, |
|
"logps/rejected": -577.8738403320312, |
|
"loss": 0.0676, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 4.274444580078125, |
|
"rewards/margins": 6.753907680511475, |
|
"rewards/rejected": -2.4794628620147705, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.422742727616169e-07, |
|
"logits/chosen": -1.4846798181533813, |
|
"logits/rejected": -1.1590081453323364, |
|
"logps/chosen": -407.13201904296875, |
|
"logps/rejected": -393.16961669921875, |
|
"loss": 0.0598, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.086771011352539, |
|
"rewards/margins": 6.169893741607666, |
|
"rewards/rejected": -2.0831220149993896, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.403853418964866e-07, |
|
"logits/chosen": -1.4728076457977295, |
|
"logits/rejected": -1.1583011150360107, |
|
"logps/chosen": -379.17791748046875, |
|
"logps/rejected": -501.658935546875, |
|
"loss": 0.0813, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.588146209716797, |
|
"rewards/margins": 7.1952104568481445, |
|
"rewards/rejected": -2.6070632934570312, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.3849641103135623e-07, |
|
"logits/chosen": -1.4747841358184814, |
|
"logits/rejected": -1.1721833944320679, |
|
"logps/chosen": -417.3246154785156, |
|
"logps/rejected": -365.9868469238281, |
|
"loss": 0.0597, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 5.068565845489502, |
|
"rewards/margins": 7.486462593078613, |
|
"rewards/rejected": -2.4178969860076904, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.3660748016622594e-07, |
|
"logits/chosen": -1.4949634075164795, |
|
"logits/rejected": -1.1580262184143066, |
|
"logps/chosen": -463.01165771484375, |
|
"logps/rejected": -490.30926513671875, |
|
"loss": 0.0793, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 4.0085272789001465, |
|
"rewards/margins": 6.497877597808838, |
|
"rewards/rejected": -2.489349603652954, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3471854930109554e-07, |
|
"logits/chosen": -1.4875307083129883, |
|
"logits/rejected": -1.1987477540969849, |
|
"logps/chosen": -300.71185302734375, |
|
"logps/rejected": -572.1688232421875, |
|
"loss": 0.0727, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.1474151611328125, |
|
"rewards/margins": 6.675169467926025, |
|
"rewards/rejected": -2.527754306793213, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3282961843596525e-07, |
|
"logits/chosen": -1.4851741790771484, |
|
"logits/rejected": -1.1590709686279297, |
|
"logps/chosen": -357.76080322265625, |
|
"logps/rejected": -723.2069091796875, |
|
"loss": 0.0862, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 4.243594169616699, |
|
"rewards/margins": 6.315056800842285, |
|
"rewards/rejected": -2.071462392807007, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3094068757083485e-07, |
|
"logits/chosen": -1.4589731693267822, |
|
"logits/rejected": -1.124894142150879, |
|
"logps/chosen": -395.55999755859375, |
|
"logps/rejected": -413.896240234375, |
|
"loss": 0.095, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.135136127471924, |
|
"rewards/margins": 6.630227565765381, |
|
"rewards/rejected": -2.495091676712036, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.2905175670570456e-07, |
|
"logits/chosen": -1.494888186454773, |
|
"logits/rejected": -1.1850342750549316, |
|
"logps/chosen": -464.22430419921875, |
|
"logps/rejected": -514.73486328125, |
|
"loss": 0.0706, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 3.990201234817505, |
|
"rewards/margins": 6.323441028594971, |
|
"rewards/rejected": -2.333240032196045, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_logits/chosen": -1.4885011911392212, |
|
"eval_logits/rejected": -1.1666902303695679, |
|
"eval_logps/chosen": -375.603759765625, |
|
"eval_logps/rejected": -586.021728515625, |
|
"eval_loss": 0.07206810265779495, |
|
"eval_rewards/accuracies": 0.9722222089767456, |
|
"eval_rewards/chosen": 4.431900501251221, |
|
"eval_rewards/margins": 7.082433223724365, |
|
"eval_rewards/rejected": -2.6505327224731445, |
|
"eval_runtime": 560.4254, |
|
"eval_samples_per_second": 16.951, |
|
"eval_steps_per_second": 0.53, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.271628258405742e-07, |
|
"logits/chosen": -1.5175247192382812, |
|
"logits/rejected": -1.1132011413574219, |
|
"logps/chosen": -382.6025085449219, |
|
"logps/rejected": -333.1087646484375, |
|
"loss": 0.0749, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.4511542320251465, |
|
"rewards/margins": 6.978426456451416, |
|
"rewards/rejected": -2.5272724628448486, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.252738949754439e-07, |
|
"logits/chosen": -1.4874627590179443, |
|
"logits/rejected": -1.1758732795715332, |
|
"logps/chosen": -365.48291015625, |
|
"logps/rejected": -641.5902099609375, |
|
"loss": 0.0659, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.6857669353485107, |
|
"rewards/margins": 6.348451614379883, |
|
"rewards/rejected": -2.662684679031372, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.233849641103136e-07, |
|
"logits/chosen": -1.4980213642120361, |
|
"logits/rejected": -1.2111032009124756, |
|
"logps/chosen": -301.4989318847656, |
|
"logps/rejected": -837.3327026367188, |
|
"loss": 0.0621, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.749375820159912, |
|
"rewards/margins": 7.8252434730529785, |
|
"rewards/rejected": -3.0758676528930664, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.214960332451832e-07, |
|
"logits/chosen": -1.4798014163970947, |
|
"logits/rejected": -1.184417963027954, |
|
"logps/chosen": -396.10888671875, |
|
"logps/rejected": -611.9696044921875, |
|
"loss": 0.0912, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 4.219520568847656, |
|
"rewards/margins": 7.1679277420043945, |
|
"rewards/rejected": -2.9484081268310547, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.196071023800529e-07, |
|
"logits/chosen": -1.5121700763702393, |
|
"logits/rejected": -1.2249577045440674, |
|
"logps/chosen": -387.7380065917969, |
|
"logps/rejected": -794.2557373046875, |
|
"loss": 0.0843, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 4.096358776092529, |
|
"rewards/margins": 6.268472194671631, |
|
"rewards/rejected": -2.1721131801605225, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.1771817151492255e-07, |
|
"logits/chosen": -1.4872616529464722, |
|
"logits/rejected": -1.2021340131759644, |
|
"logps/chosen": -396.11920166015625, |
|
"logps/rejected": -725.071533203125, |
|
"loss": 0.0682, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.439419269561768, |
|
"rewards/margins": 7.2906174659729, |
|
"rewards/rejected": -2.851198673248291, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.158292406497922e-07, |
|
"logits/chosen": -1.4843103885650635, |
|
"logits/rejected": -1.1521885395050049, |
|
"logps/chosen": -430.59686279296875, |
|
"logps/rejected": -623.4092407226562, |
|
"loss": 0.065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.504824638366699, |
|
"rewards/margins": 7.555941581726074, |
|
"rewards/rejected": -3.0511183738708496, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1394030978466186e-07, |
|
"logits/chosen": -1.5158779621124268, |
|
"logits/rejected": -1.161768913269043, |
|
"logps/chosen": -340.71282958984375, |
|
"logps/rejected": -436.19873046875, |
|
"loss": 0.0557, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.091620445251465, |
|
"rewards/margins": 7.653326511383057, |
|
"rewards/rejected": -2.5617051124572754, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.120513789195315e-07, |
|
"logits/chosen": -1.5111273527145386, |
|
"logits/rejected": -1.1369271278381348, |
|
"logps/chosen": -313.18426513671875, |
|
"logps/rejected": -592.9158935546875, |
|
"loss": 0.0644, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.0614423751831055, |
|
"rewards/margins": 7.267691612243652, |
|
"rewards/rejected": -3.2062485218048096, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1016244805440117e-07, |
|
"logits/chosen": -1.478244423866272, |
|
"logits/rejected": -1.2829620838165283, |
|
"logps/chosen": -377.1938781738281, |
|
"logps/rejected": -613.537841796875, |
|
"loss": 0.0705, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 3.685913562774658, |
|
"rewards/margins": 6.318792819976807, |
|
"rewards/rejected": -2.6328797340393066, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_logits/chosen": -1.5001074075698853, |
|
"eval_logits/rejected": -1.181748628616333, |
|
"eval_logps/chosen": -376.17987060546875, |
|
"eval_logps/rejected": -588.2330322265625, |
|
"eval_loss": 0.07252340018749237, |
|
"eval_rewards/accuracies": 0.9739057421684265, |
|
"eval_rewards/chosen": 4.374290466308594, |
|
"eval_rewards/margins": 7.245957374572754, |
|
"eval_rewards/rejected": -2.87166690826416, |
|
"eval_runtime": 559.1781, |
|
"eval_samples_per_second": 16.989, |
|
"eval_steps_per_second": 0.531, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.082735171892709e-07, |
|
"logits/chosen": -1.5192620754241943, |
|
"logits/rejected": -1.2069748640060425, |
|
"logps/chosen": -312.7062072753906, |
|
"logps/rejected": -482.1133728027344, |
|
"loss": 0.0863, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.136082649230957, |
|
"rewards/margins": 6.253493309020996, |
|
"rewards/rejected": -2.117410182952881, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0638458632414054e-07, |
|
"logits/chosen": -1.51072096824646, |
|
"logits/rejected": -1.2908846139907837, |
|
"logps/chosen": -370.267333984375, |
|
"logps/rejected": -700.0077514648438, |
|
"loss": 0.0773, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.160948276519775, |
|
"rewards/margins": 7.014911651611328, |
|
"rewards/rejected": -2.8539633750915527, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.044956554590102e-07, |
|
"logits/chosen": -1.4973653554916382, |
|
"logits/rejected": -1.1667084693908691, |
|
"logps/chosen": -367.26861572265625, |
|
"logps/rejected": -421.83868408203125, |
|
"loss": 0.0446, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.954542875289917, |
|
"rewards/margins": 6.594731330871582, |
|
"rewards/rejected": -2.6401877403259277, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.0260672459387985e-07, |
|
"logits/chosen": -1.5250272750854492, |
|
"logits/rejected": -1.1743382215499878, |
|
"logps/chosen": -298.74359130859375, |
|
"logps/rejected": -514.9177856445312, |
|
"loss": 0.0598, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.985522270202637, |
|
"rewards/margins": 7.792318820953369, |
|
"rewards/rejected": -2.806795597076416, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.007177937287495e-07, |
|
"logits/chosen": -1.509218454360962, |
|
"logits/rejected": -1.1687209606170654, |
|
"logps/chosen": -313.0664978027344, |
|
"logps/rejected": -652.6644897460938, |
|
"loss": 0.0348, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.642588138580322, |
|
"rewards/margins": 7.6289262771606445, |
|
"rewards/rejected": -2.9863381385803223, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.988288628636192e-07, |
|
"logits/chosen": -1.5087230205535889, |
|
"logits/rejected": -1.2443337440490723, |
|
"logps/chosen": -378.0022888183594, |
|
"logps/rejected": -524.3590087890625, |
|
"loss": 0.0976, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.988725662231445, |
|
"rewards/margins": 7.9680657386779785, |
|
"rewards/rejected": -2.979340076446533, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.969399319984888e-07, |
|
"logits/chosen": -1.4828989505767822, |
|
"logits/rejected": -1.1986699104309082, |
|
"logps/chosen": -329.68743896484375, |
|
"logps/rejected": -764.1326293945312, |
|
"loss": 0.06, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.3440890312194824, |
|
"rewards/margins": 6.329494476318359, |
|
"rewards/rejected": -2.985405445098877, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.950510011333585e-07, |
|
"logits/chosen": -1.5152653455734253, |
|
"logits/rejected": -1.1745421886444092, |
|
"logps/chosen": -352.6541748046875, |
|
"logps/rejected": -632.8907470703125, |
|
"loss": 0.0712, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.7109479904174805, |
|
"rewards/margins": 7.576220512390137, |
|
"rewards/rejected": -2.8652729988098145, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9316207026822813e-07, |
|
"logits/chosen": -1.523559808731079, |
|
"logits/rejected": -1.219855546951294, |
|
"logps/chosen": -350.2402648925781, |
|
"logps/rejected": -644.3220825195312, |
|
"loss": 0.0555, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.640657901763916, |
|
"rewards/margins": 7.175803184509277, |
|
"rewards/rejected": -2.5351455211639404, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.9127313940309784e-07, |
|
"logits/chosen": -1.5056852102279663, |
|
"logits/rejected": -1.171008586883545, |
|
"logps/chosen": -290.78802490234375, |
|
"logps/rejected": -539.4659423828125, |
|
"loss": 0.0537, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.571245193481445, |
|
"rewards/margins": 8.044679641723633, |
|
"rewards/rejected": -3.4734344482421875, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_logits/chosen": -1.5018603801727295, |
|
"eval_logits/rejected": -1.1789315938949585, |
|
"eval_logps/chosen": -376.07598876953125, |
|
"eval_logps/rejected": -589.1927490234375, |
|
"eval_loss": 0.06479610502719879, |
|
"eval_rewards/accuracies": 0.9755892157554626, |
|
"eval_rewards/chosen": 4.384680271148682, |
|
"eval_rewards/margins": 7.3523173332214355, |
|
"eval_rewards/rejected": -2.967637062072754, |
|
"eval_runtime": 560.4699, |
|
"eval_samples_per_second": 16.95, |
|
"eval_steps_per_second": 0.53, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8938420853796754e-07, |
|
"logits/chosen": -1.5160057544708252, |
|
"logits/rejected": -1.1476496458053589, |
|
"logps/chosen": -411.9048767089844, |
|
"logps/rejected": -589.949951171875, |
|
"loss": 0.0359, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.407751560211182, |
|
"rewards/margins": 7.413491725921631, |
|
"rewards/rejected": -3.0057406425476074, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8749527767283715e-07, |
|
"logits/chosen": -1.4947352409362793, |
|
"logits/rejected": -1.2628874778747559, |
|
"logps/chosen": -390.4944152832031, |
|
"logps/rejected": -644.2883911132812, |
|
"loss": 0.0619, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.163218021392822, |
|
"rewards/margins": 7.030184268951416, |
|
"rewards/rejected": -2.866966485977173, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8560634680770686e-07, |
|
"logits/chosen": -1.4881634712219238, |
|
"logits/rejected": -1.2398184537887573, |
|
"logps/chosen": -369.6822814941406, |
|
"logps/rejected": -579.9475708007812, |
|
"loss": 0.0704, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.736443996429443, |
|
"rewards/margins": 7.590858459472656, |
|
"rewards/rejected": -2.854414463043213, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8371741594257646e-07, |
|
"logits/chosen": -1.4975007772445679, |
|
"logits/rejected": -1.2246668338775635, |
|
"logps/chosen": -476.669677734375, |
|
"logps/rejected": -479.30108642578125, |
|
"loss": 0.0951, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.004213809967041, |
|
"rewards/margins": 6.466128349304199, |
|
"rewards/rejected": -2.4619147777557373, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8182848507744617e-07, |
|
"logits/chosen": -1.516287088394165, |
|
"logits/rejected": -1.202371597290039, |
|
"logps/chosen": -321.565673828125, |
|
"logps/rejected": -408.3916015625, |
|
"loss": 0.0573, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.261721611022949, |
|
"rewards/margins": 7.2457404136657715, |
|
"rewards/rejected": -2.9840192794799805, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.799395542123158e-07, |
|
"logits/chosen": -1.4922573566436768, |
|
"logits/rejected": -1.2690476179122925, |
|
"logps/chosen": -453.80413818359375, |
|
"logps/rejected": -637.1600952148438, |
|
"loss": 0.0575, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.037074089050293, |
|
"rewards/margins": 6.629528045654297, |
|
"rewards/rejected": -2.592454433441162, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.780506233471855e-07, |
|
"logits/chosen": -1.5398370027542114, |
|
"logits/rejected": -1.190582513809204, |
|
"logps/chosen": -303.58465576171875, |
|
"logps/rejected": -457.27142333984375, |
|
"loss": 0.0475, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 3.979724884033203, |
|
"rewards/margins": 6.8525190353393555, |
|
"rewards/rejected": -2.8727943897247314, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7616169248205513e-07, |
|
"logits/chosen": -1.480398178100586, |
|
"logits/rejected": -1.1202542781829834, |
|
"logps/chosen": -291.8365478515625, |
|
"logps/rejected": -525.9415893554688, |
|
"loss": 0.0765, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 3.7548556327819824, |
|
"rewards/margins": 6.797545433044434, |
|
"rewards/rejected": -3.042689800262451, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.742727616169248e-07, |
|
"logits/chosen": -1.5058258771896362, |
|
"logits/rejected": -1.2445354461669922, |
|
"logps/chosen": -349.40081787109375, |
|
"logps/rejected": -353.338623046875, |
|
"loss": 0.0673, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.481741428375244, |
|
"rewards/margins": 7.538400173187256, |
|
"rewards/rejected": -3.056657552719116, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.723838307517945e-07, |
|
"logits/chosen": -1.5099804401397705, |
|
"logits/rejected": -1.227176308631897, |
|
"logps/chosen": -449.3966369628906, |
|
"logps/rejected": -564.8681030273438, |
|
"loss": 0.0483, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.375594615936279, |
|
"rewards/margins": 7.313169956207275, |
|
"rewards/rejected": -2.937574863433838, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_logits/chosen": -1.5114119052886963, |
|
"eval_logits/rejected": -1.1923363208770752, |
|
"eval_logps/chosen": -376.16131591796875, |
|
"eval_logps/rejected": -591.8114013671875, |
|
"eval_loss": 0.060400474816560745, |
|
"eval_rewards/accuracies": 0.9797979593276978, |
|
"eval_rewards/chosen": 4.3761420249938965, |
|
"eval_rewards/margins": 7.605640411376953, |
|
"eval_rewards/rejected": -3.2294986248016357, |
|
"eval_runtime": 560.6153, |
|
"eval_samples_per_second": 16.946, |
|
"eval_steps_per_second": 0.53, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7049489988666416e-07, |
|
"logits/chosen": -1.5114130973815918, |
|
"logits/rejected": -1.2126189470291138, |
|
"logps/chosen": -430.208984375, |
|
"logps/rejected": -501.6602478027344, |
|
"loss": 0.075, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.029915809631348, |
|
"rewards/margins": 7.003431797027588, |
|
"rewards/rejected": -2.973515748977661, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.686059690215338e-07, |
|
"logits/chosen": -1.4975926876068115, |
|
"logits/rejected": -1.231730580329895, |
|
"logps/chosen": -384.80133056640625, |
|
"logps/rejected": -646.3175048828125, |
|
"loss": 0.0883, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.063393592834473, |
|
"rewards/margins": 7.050488471984863, |
|
"rewards/rejected": -2.9870944023132324, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6671703815640347e-07, |
|
"logits/chosen": -1.5093035697937012, |
|
"logits/rejected": -1.1693612337112427, |
|
"logps/chosen": -357.9332580566406, |
|
"logps/rejected": -419.8724060058594, |
|
"loss": 0.0435, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.968228816986084, |
|
"rewards/margins": 7.172101020812988, |
|
"rewards/rejected": -3.2038722038269043, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.648281072912731e-07, |
|
"logits/chosen": -1.5199733972549438, |
|
"logits/rejected": -1.2490711212158203, |
|
"logps/chosen": -319.37335205078125, |
|
"logps/rejected": -703.9043579101562, |
|
"loss": 0.0709, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.484343528747559, |
|
"rewards/margins": 6.947661399841309, |
|
"rewards/rejected": -2.46331787109375, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.629391764261428e-07, |
|
"logits/chosen": -1.4934265613555908, |
|
"logits/rejected": -1.2080678939819336, |
|
"logps/chosen": -397.90325927734375, |
|
"logps/rejected": -477.54681396484375, |
|
"loss": 0.069, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 3.9346039295196533, |
|
"rewards/margins": 7.143618583679199, |
|
"rewards/rejected": -3.2090160846710205, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.610502455610125e-07, |
|
"logits/chosen": -1.4791805744171143, |
|
"logits/rejected": -1.201578140258789, |
|
"logps/chosen": -478.21563720703125, |
|
"logps/rejected": -653.1609497070312, |
|
"loss": 0.0623, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.886442184448242, |
|
"rewards/margins": 6.926022529602051, |
|
"rewards/rejected": -3.0395796298980713, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.591613146958821e-07, |
|
"logits/chosen": -1.5138906240463257, |
|
"logits/rejected": -1.243032455444336, |
|
"logps/chosen": -350.7288818359375, |
|
"logps/rejected": -627.7431640625, |
|
"loss": 0.0433, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.634993076324463, |
|
"rewards/margins": 7.957524299621582, |
|
"rewards/rejected": -3.3225319385528564, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.572723838307518e-07, |
|
"logits/chosen": -1.5238749980926514, |
|
"logits/rejected": -1.1785722970962524, |
|
"logps/chosen": -377.65045166015625, |
|
"logps/rejected": -531.2706298828125, |
|
"loss": 0.0716, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.06870698928833, |
|
"rewards/margins": 7.24503231048584, |
|
"rewards/rejected": -3.176325798034668, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5538345296562145e-07, |
|
"logits/chosen": -1.5166254043579102, |
|
"logits/rejected": -1.1256914138793945, |
|
"logps/chosen": -375.782470703125, |
|
"logps/rejected": -465.26287841796875, |
|
"loss": 0.0531, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.288039684295654, |
|
"rewards/margins": 7.8462958335876465, |
|
"rewards/rejected": -3.5582566261291504, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.534945221004911e-07, |
|
"logits/chosen": -1.4953352212905884, |
|
"logits/rejected": -1.2030283212661743, |
|
"logps/chosen": -529.4041748046875, |
|
"logps/rejected": -491.87384033203125, |
|
"loss": 0.0572, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.736111164093018, |
|
"rewards/margins": 7.682862281799316, |
|
"rewards/rejected": -2.946751356124878, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_logits/chosen": -1.504213571548462, |
|
"eval_logits/rejected": -1.185482144355774, |
|
"eval_logps/chosen": -376.66448974609375, |
|
"eval_logps/rejected": -592.157470703125, |
|
"eval_loss": 0.05805225297808647, |
|
"eval_rewards/accuracies": 0.9772727489471436, |
|
"eval_rewards/chosen": 4.325828552246094, |
|
"eval_rewards/margins": 7.589939117431641, |
|
"eval_rewards/rejected": -3.2641103267669678, |
|
"eval_runtime": 560.9875, |
|
"eval_samples_per_second": 16.934, |
|
"eval_steps_per_second": 0.529, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.516055912353608e-07, |
|
"logits/chosen": -1.5023882389068604, |
|
"logits/rejected": -1.2200844287872314, |
|
"logps/chosen": -385.99200439453125, |
|
"logps/rejected": -679.5035400390625, |
|
"loss": 0.0551, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.191043376922607, |
|
"rewards/margins": 7.510348320007324, |
|
"rewards/rejected": -3.3193047046661377, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.497166603702304e-07, |
|
"logits/chosen": -1.5069820880889893, |
|
"logits/rejected": -1.2421448230743408, |
|
"logps/chosen": -367.49481201171875, |
|
"logps/rejected": -656.8528442382812, |
|
"loss": 0.0836, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.26437520980835, |
|
"rewards/margins": 7.1600213050842285, |
|
"rewards/rejected": -2.895646572113037, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4782772950510013e-07, |
|
"logits/chosen": -1.516225814819336, |
|
"logits/rejected": -1.1817419528961182, |
|
"logps/chosen": -397.768798828125, |
|
"logps/rejected": -495.23443603515625, |
|
"loss": 0.0624, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.2399187088012695, |
|
"rewards/margins": 7.541648864746094, |
|
"rewards/rejected": -3.3017311096191406, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.459387986399698e-07, |
|
"logits/chosen": -1.517730474472046, |
|
"logits/rejected": -1.2045361995697021, |
|
"logps/chosen": -356.7253112792969, |
|
"logps/rejected": -508.641357421875, |
|
"loss": 0.0592, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.046034812927246, |
|
"rewards/margins": 7.333725929260254, |
|
"rewards/rejected": -3.2876906394958496, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4404986777483944e-07, |
|
"logits/chosen": -1.497016191482544, |
|
"logits/rejected": -1.227853536605835, |
|
"logps/chosen": -446.447265625, |
|
"logps/rejected": -458.6922912597656, |
|
"loss": 0.0625, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 3.6903839111328125, |
|
"rewards/margins": 6.824693202972412, |
|
"rewards/rejected": -3.1343090534210205, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.421609369097091e-07, |
|
"logits/chosen": -1.514736533164978, |
|
"logits/rejected": -1.1953274011611938, |
|
"logps/chosen": -395.9438781738281, |
|
"logps/rejected": -519.7674560546875, |
|
"loss": 0.0504, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.116658687591553, |
|
"rewards/margins": 7.169915199279785, |
|
"rewards/rejected": -3.0532562732696533, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.4027200604457875e-07, |
|
"logits/chosen": -1.5058282613754272, |
|
"logits/rejected": -1.2581437826156616, |
|
"logps/chosen": -366.08233642578125, |
|
"logps/rejected": -546.3473510742188, |
|
"loss": 0.0501, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.698902130126953, |
|
"rewards/margins": 7.672143459320068, |
|
"rewards/rejected": -2.973240852355957, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.383830751794484e-07, |
|
"logits/chosen": -1.4994776248931885, |
|
"logits/rejected": -1.184206485748291, |
|
"logps/chosen": -481.41680908203125, |
|
"logps/rejected": -767.9908447265625, |
|
"loss": 0.0468, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.650491714477539, |
|
"rewards/margins": 8.127163887023926, |
|
"rewards/rejected": -3.4766716957092285, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.364941443143181e-07, |
|
"logits/chosen": -1.531582236289978, |
|
"logits/rejected": -1.2031329870224, |
|
"logps/chosen": -321.4193420410156, |
|
"logps/rejected": -499.52520751953125, |
|
"loss": 0.0414, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.110111236572266, |
|
"rewards/margins": 7.465426445007324, |
|
"rewards/rejected": -3.355315685272217, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3460521344918775e-07, |
|
"logits/chosen": -1.5217511653900146, |
|
"logits/rejected": -1.2462084293365479, |
|
"logps/chosen": -344.8479309082031, |
|
"logps/rejected": -528.7171020507812, |
|
"loss": 0.066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.58670711517334, |
|
"rewards/margins": 7.7972092628479, |
|
"rewards/rejected": -3.2105019092559814, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": -1.5110249519348145, |
|
"eval_logits/rejected": -1.1886183023452759, |
|
"eval_logps/chosen": -376.65234375, |
|
"eval_logps/rejected": -593.3289184570312, |
|
"eval_loss": 0.05385367199778557, |
|
"eval_rewards/accuracies": 0.9814814925193787, |
|
"eval_rewards/chosen": 4.327041149139404, |
|
"eval_rewards/margins": 7.708298683166504, |
|
"eval_rewards/rejected": -3.3812568187713623, |
|
"eval_runtime": 560.6648, |
|
"eval_samples_per_second": 16.944, |
|
"eval_steps_per_second": 0.53, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.327162825840574e-07, |
|
"logits/chosen": -1.5017929077148438, |
|
"logits/rejected": -1.2263991832733154, |
|
"logps/chosen": -440.38330078125, |
|
"logps/rejected": -768.6172485351562, |
|
"loss": 0.0801, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.483176231384277, |
|
"rewards/margins": 7.676694393157959, |
|
"rewards/rejected": -3.1935179233551025, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3082735171892708e-07, |
|
"logits/chosen": -1.4979521036148071, |
|
"logits/rejected": -1.2336069345474243, |
|
"logps/chosen": -368.3187255859375, |
|
"logps/rejected": -962.7932739257812, |
|
"loss": 0.0557, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.179459095001221, |
|
"rewards/margins": 7.952836036682129, |
|
"rewards/rejected": -3.7733776569366455, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2893842085379674e-07, |
|
"logits/chosen": -1.527874231338501, |
|
"logits/rejected": -1.1270580291748047, |
|
"logps/chosen": -333.58258056640625, |
|
"logps/rejected": -464.1814880371094, |
|
"loss": 0.0509, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.677160263061523, |
|
"rewards/margins": 8.32009220123291, |
|
"rewards/rejected": -3.642932176589966, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2704948998866642e-07, |
|
"logits/chosen": -1.4980236291885376, |
|
"logits/rejected": -1.2070553302764893, |
|
"logps/chosen": -518.45703125, |
|
"logps/rejected": -483.01422119140625, |
|
"loss": 0.0512, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.008620262145996, |
|
"rewards/margins": 7.4014739990234375, |
|
"rewards/rejected": -3.392852783203125, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2516055912353608e-07, |
|
"logits/chosen": -1.5251991748809814, |
|
"logits/rejected": -1.212727665901184, |
|
"logps/chosen": -445.04931640625, |
|
"logps/rejected": -477.67364501953125, |
|
"loss": 0.0725, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.699405193328857, |
|
"rewards/margins": 8.411505699157715, |
|
"rewards/rejected": -3.7121009826660156, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.2327162825840573e-07, |
|
"logits/chosen": -1.5143253803253174, |
|
"logits/rejected": -1.2615511417388916, |
|
"logps/chosen": -432.19561767578125, |
|
"logps/rejected": -536.9600219726562, |
|
"loss": 0.058, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.530027866363525, |
|
"rewards/margins": 7.708025932312012, |
|
"rewards/rejected": -3.1779980659484863, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.213826973932754e-07, |
|
"logits/chosen": -1.5171505212783813, |
|
"logits/rejected": -1.230185866355896, |
|
"logps/chosen": -315.69781494140625, |
|
"logps/rejected": -726.6375122070312, |
|
"loss": 0.0496, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.840248107910156, |
|
"rewards/margins": 8.333008766174316, |
|
"rewards/rejected": -3.4927608966827393, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1949376652814505e-07, |
|
"logits/chosen": -1.4918253421783447, |
|
"logits/rejected": -1.2145707607269287, |
|
"logps/chosen": -400.26776123046875, |
|
"logps/rejected": -509.4452209472656, |
|
"loss": 0.071, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.016195774078369, |
|
"rewards/margins": 7.594870567321777, |
|
"rewards/rejected": -3.5786757469177246, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.176048356630147e-07, |
|
"logits/chosen": -1.5160037279129028, |
|
"logits/rejected": -1.2147983312606812, |
|
"logps/chosen": -521.50537109375, |
|
"logps/rejected": -494.4219665527344, |
|
"loss": 0.0611, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.44297981262207, |
|
"rewards/margins": 7.988565921783447, |
|
"rewards/rejected": -3.5455868244171143, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.157159047978844e-07, |
|
"logits/chosen": -1.5425684452056885, |
|
"logits/rejected": -1.1855405569076538, |
|
"logps/chosen": -327.6552429199219, |
|
"logps/rejected": -658.273193359375, |
|
"loss": 0.0561, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.459707736968994, |
|
"rewards/margins": 7.496172904968262, |
|
"rewards/rejected": -3.0364651679992676, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_logits/chosen": -1.5143883228302002, |
|
"eval_logits/rejected": -1.194756269454956, |
|
"eval_logps/chosen": -376.0636291503906, |
|
"eval_logps/rejected": -593.4963989257812, |
|
"eval_loss": 0.05014927685260773, |
|
"eval_rewards/accuracies": 0.9797979593276978, |
|
"eval_rewards/chosen": 4.385910511016846, |
|
"eval_rewards/margins": 7.783912658691406, |
|
"eval_rewards/rejected": -3.3980023860931396, |
|
"eval_runtime": 560.6319, |
|
"eval_samples_per_second": 16.945, |
|
"eval_steps_per_second": 0.53, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.1382697393275407e-07, |
|
"logits/chosen": -1.526610016822815, |
|
"logits/rejected": -1.1567124128341675, |
|
"logps/chosen": -454.001220703125, |
|
"logps/rejected": -625.8849487304688, |
|
"loss": 0.0381, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.282632827758789, |
|
"rewards/margins": 8.028142929077148, |
|
"rewards/rejected": -3.7455101013183594, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.1193804306762372e-07, |
|
"logits/chosen": -1.514725923538208, |
|
"logits/rejected": -1.2673990726470947, |
|
"logps/chosen": -387.0127258300781, |
|
"logps/rejected": -848.1672973632812, |
|
"loss": 0.0355, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.279064655303955, |
|
"rewards/margins": 7.790387153625488, |
|
"rewards/rejected": -3.5113232135772705, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.1004911220249338e-07, |
|
"logits/chosen": -1.5263025760650635, |
|
"logits/rejected": -1.2004420757293701, |
|
"logps/chosen": -390.23028564453125, |
|
"logps/rejected": -547.3260498046875, |
|
"loss": 0.0551, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.023037433624268, |
|
"rewards/margins": 6.7541937828063965, |
|
"rewards/rejected": -2.731156349182129, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0816018133736303e-07, |
|
"logits/chosen": -1.5079574584960938, |
|
"logits/rejected": -1.1305427551269531, |
|
"logps/chosen": -463.9093322753906, |
|
"logps/rejected": -681.10205078125, |
|
"loss": 0.0428, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.483296871185303, |
|
"rewards/margins": 8.173705101013184, |
|
"rewards/rejected": -3.690408229827881, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0627125047223271e-07, |
|
"logits/chosen": -1.5206629037857056, |
|
"logits/rejected": -1.2181518077850342, |
|
"logps/chosen": -397.5008850097656, |
|
"logps/rejected": -562.5797729492188, |
|
"loss": 0.0519, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.292169094085693, |
|
"rewards/margins": 7.382157325744629, |
|
"rewards/rejected": -3.089987277984619, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0438231960710237e-07, |
|
"logits/chosen": -1.5032273530960083, |
|
"logits/rejected": -1.276940107345581, |
|
"logps/chosen": -366.43060302734375, |
|
"logps/rejected": -677.36962890625, |
|
"loss": 0.062, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.3607306480407715, |
|
"rewards/margins": 7.476487636566162, |
|
"rewards/rejected": -3.115757465362549, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0249338874197203e-07, |
|
"logits/chosen": -1.5287476778030396, |
|
"logits/rejected": -1.2484480142593384, |
|
"logps/chosen": -378.16302490234375, |
|
"logps/rejected": -514.4780883789062, |
|
"loss": 0.0657, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.000302314758301, |
|
"rewards/margins": 7.317461967468262, |
|
"rewards/rejected": -3.317160129547119, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0060445787684168e-07, |
|
"logits/chosen": -1.5523929595947266, |
|
"logits/rejected": -1.2034788131713867, |
|
"logps/chosen": -319.57110595703125, |
|
"logps/rejected": -540.62158203125, |
|
"loss": 0.0468, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.091488361358643, |
|
"rewards/margins": 7.53751277923584, |
|
"rewards/rejected": -3.4460244178771973, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.9871552701171136e-07, |
|
"logits/chosen": -1.5344616174697876, |
|
"logits/rejected": -1.2502692937850952, |
|
"logps/chosen": -417.1963806152344, |
|
"logps/rejected": -557.7677001953125, |
|
"loss": 0.0625, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.207846641540527, |
|
"rewards/margins": 7.392093658447266, |
|
"rewards/rejected": -3.184246778488159, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9682659614658105e-07, |
|
"logits/chosen": -1.5210683345794678, |
|
"logits/rejected": -1.2344892024993896, |
|
"logps/chosen": -468.1206970214844, |
|
"logps/rejected": -585.029541015625, |
|
"loss": 0.0538, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.628952980041504, |
|
"rewards/margins": 7.9791693687438965, |
|
"rewards/rejected": -3.35021710395813, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_logits/chosen": -1.5147186517715454, |
|
"eval_logits/rejected": -1.203603982925415, |
|
"eval_logps/chosen": -375.7136535644531, |
|
"eval_logps/rejected": -593.9944458007812, |
|
"eval_loss": 0.050368715077638626, |
|
"eval_rewards/accuracies": 0.9814814925193787, |
|
"eval_rewards/chosen": 4.420912742614746, |
|
"eval_rewards/margins": 7.868711471557617, |
|
"eval_rewards/rejected": -3.447798728942871, |
|
"eval_runtime": 559.9302, |
|
"eval_samples_per_second": 16.966, |
|
"eval_steps_per_second": 0.53, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.949376652814507e-07, |
|
"logits/chosen": -1.5237205028533936, |
|
"logits/rejected": -1.2196900844573975, |
|
"logps/chosen": -353.64813232421875, |
|
"logps/rejected": -567.98486328125, |
|
"loss": 0.0444, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.203667640686035, |
|
"rewards/margins": 7.699929237365723, |
|
"rewards/rejected": -3.4962615966796875, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9304873441632036e-07, |
|
"logits/chosen": -1.5383799076080322, |
|
"logits/rejected": -1.1897004842758179, |
|
"logps/chosen": -304.3857116699219, |
|
"logps/rejected": -420.22833251953125, |
|
"loss": 0.0371, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.345728874206543, |
|
"rewards/margins": 7.751856327056885, |
|
"rewards/rejected": -3.4061267375946045, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.9115980355119001e-07, |
|
"logits/chosen": -1.527374029159546, |
|
"logits/rejected": -1.2602983713150024, |
|
"logps/chosen": -329.572998046875, |
|
"logps/rejected": -735.498046875, |
|
"loss": 0.0391, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.703977108001709, |
|
"rewards/margins": 7.709604740142822, |
|
"rewards/rejected": -3.005627393722534, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8927087268605967e-07, |
|
"logits/chosen": -1.5346765518188477, |
|
"logits/rejected": -1.2059959173202515, |
|
"logps/chosen": -328.3225402832031, |
|
"logps/rejected": -767.6409912109375, |
|
"loss": 0.0463, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.100872993469238, |
|
"rewards/margins": 8.146936416625977, |
|
"rewards/rejected": -4.04606294631958, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8738194182092935e-07, |
|
"logits/chosen": -1.494866132736206, |
|
"logits/rejected": -1.2239644527435303, |
|
"logps/chosen": -555.472900390625, |
|
"logps/rejected": -520.2547607421875, |
|
"loss": 0.0514, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 4.442248344421387, |
|
"rewards/margins": 7.3626532554626465, |
|
"rewards/rejected": -2.920405626296997, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.85493010955799e-07, |
|
"logits/chosen": -1.519852876663208, |
|
"logits/rejected": -1.212501883506775, |
|
"logps/chosen": -389.075927734375, |
|
"logps/rejected": -503.87640380859375, |
|
"loss": 0.0575, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.409701347351074, |
|
"rewards/margins": 8.056486129760742, |
|
"rewards/rejected": -3.6467843055725098, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8360408009066866e-07, |
|
"logits/chosen": -1.5152437686920166, |
|
"logits/rejected": -1.2427327632904053, |
|
"logps/chosen": -401.62127685546875, |
|
"logps/rejected": -749.9725341796875, |
|
"loss": 0.054, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.197791576385498, |
|
"rewards/margins": 7.349157810211182, |
|
"rewards/rejected": -3.1513662338256836, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8171514922553835e-07, |
|
"logits/chosen": -1.531203269958496, |
|
"logits/rejected": -1.2765506505966187, |
|
"logps/chosen": -406.9158630371094, |
|
"logps/rejected": -546.1353759765625, |
|
"loss": 0.0515, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 4.106224536895752, |
|
"rewards/margins": 7.5454840660095215, |
|
"rewards/rejected": -3.439260482788086, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.79826218360408e-07, |
|
"logits/chosen": -1.4749855995178223, |
|
"logits/rejected": -1.2416235208511353, |
|
"logps/chosen": -434.5870056152344, |
|
"logps/rejected": -412.8590393066406, |
|
"loss": 0.0475, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.6538729667663574, |
|
"rewards/margins": 7.308139801025391, |
|
"rewards/rejected": -3.654266357421875, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7793728749527768e-07, |
|
"logits/chosen": -1.5064570903778076, |
|
"logits/rejected": -1.2394144535064697, |
|
"logps/chosen": -378.8180236816406, |
|
"logps/rejected": -484.3456115722656, |
|
"loss": 0.0493, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 3.8156559467315674, |
|
"rewards/margins": 7.063841342926025, |
|
"rewards/rejected": -3.2481846809387207, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": -1.5070686340332031, |
|
"eval_logits/rejected": -1.1925033330917358, |
|
"eval_logps/chosen": -376.0872802734375, |
|
"eval_logps/rejected": -595.3203125, |
|
"eval_loss": 0.04720592126250267, |
|
"eval_rewards/accuracies": 0.9831649661064148, |
|
"eval_rewards/chosen": 4.383547306060791, |
|
"eval_rewards/margins": 7.9639458656311035, |
|
"eval_rewards/rejected": -3.5803987979888916, |
|
"eval_runtime": 558.9461, |
|
"eval_samples_per_second": 16.996, |
|
"eval_steps_per_second": 0.531, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7604835663014734e-07, |
|
"logits/chosen": -1.5241343975067139, |
|
"logits/rejected": -1.2008111476898193, |
|
"logps/chosen": -319.7298278808594, |
|
"logps/rejected": -589.1061401367188, |
|
"loss": 0.0396, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.302298545837402, |
|
"rewards/margins": 7.667372703552246, |
|
"rewards/rejected": -3.3650736808776855, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.74159425765017e-07, |
|
"logits/chosen": -1.5107629299163818, |
|
"logits/rejected": -1.1774795055389404, |
|
"logps/chosen": -325.34588623046875, |
|
"logps/rejected": -675.2240600585938, |
|
"loss": 0.0605, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.494423866271973, |
|
"rewards/margins": 8.346731185913086, |
|
"rewards/rejected": -3.852306365966797, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7227049489988665e-07, |
|
"logits/chosen": -1.4806641340255737, |
|
"logits/rejected": -1.22501540184021, |
|
"logps/chosen": -429.6233825683594, |
|
"logps/rejected": -475.925537109375, |
|
"loss": 0.0373, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 4.175747871398926, |
|
"rewards/margins": 7.295570373535156, |
|
"rewards/rejected": -3.1198229789733887, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.703815640347563e-07, |
|
"logits/chosen": -1.5204100608825684, |
|
"logits/rejected": -1.2161033153533936, |
|
"logps/chosen": -313.9083557128906, |
|
"logps/rejected": -539.1317749023438, |
|
"loss": 0.0393, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.7474541664123535, |
|
"rewards/margins": 8.653862953186035, |
|
"rewards/rejected": -3.9064087867736816, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6849263316962596e-07, |
|
"logits/chosen": -1.5086395740509033, |
|
"logits/rejected": -1.1813517808914185, |
|
"logps/chosen": -395.3387756347656, |
|
"logps/rejected": -496.54052734375, |
|
"loss": 0.0482, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 3.5052483081817627, |
|
"rewards/margins": 7.271603584289551, |
|
"rewards/rejected": -3.766355514526367, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6660370230449564e-07, |
|
"logits/chosen": -1.5112879276275635, |
|
"logits/rejected": -1.1831653118133545, |
|
"logps/chosen": -406.0916442871094, |
|
"logps/rejected": -510.435302734375, |
|
"loss": 0.0525, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.234797477722168, |
|
"rewards/margins": 7.962366580963135, |
|
"rewards/rejected": -3.727570056915283, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6471477143936533e-07, |
|
"logits/chosen": -1.5286850929260254, |
|
"logits/rejected": -1.2524337768554688, |
|
"logps/chosen": -375.4138488769531, |
|
"logps/rejected": -542.0020751953125, |
|
"loss": 0.0402, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.921536922454834, |
|
"rewards/margins": 8.480701446533203, |
|
"rewards/rejected": -3.559164047241211, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6282584057423498e-07, |
|
"logits/chosen": -1.5124969482421875, |
|
"logits/rejected": -1.151609182357788, |
|
"logps/chosen": -297.8030700683594, |
|
"logps/rejected": -420.9918518066406, |
|
"loss": 0.0415, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.653942584991455, |
|
"rewards/margins": 8.311192512512207, |
|
"rewards/rejected": -3.657250165939331, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6093690970910464e-07, |
|
"logits/chosen": -1.5191973447799683, |
|
"logits/rejected": -1.2087528705596924, |
|
"logps/chosen": -406.9920654296875, |
|
"logps/rejected": -566.786376953125, |
|
"loss": 0.0491, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.626988887786865, |
|
"rewards/margins": 8.500511169433594, |
|
"rewards/rejected": -3.873521327972412, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.590479788439743e-07, |
|
"logits/chosen": -1.5033385753631592, |
|
"logits/rejected": -1.2532203197479248, |
|
"logps/chosen": -429.1227111816406, |
|
"logps/rejected": -824.8059692382812, |
|
"loss": 0.0374, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.9824090003967285, |
|
"rewards/margins": 7.593686103820801, |
|
"rewards/rejected": -3.611276149749756, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_logits/chosen": -1.5165996551513672, |
|
"eval_logits/rejected": -1.2020140886306763, |
|
"eval_logps/chosen": -376.9510498046875, |
|
"eval_logps/rejected": -597.5147094726562, |
|
"eval_loss": 0.044869087636470795, |
|
"eval_rewards/accuracies": 0.9840067625045776, |
|
"eval_rewards/chosen": 4.297166347503662, |
|
"eval_rewards/margins": 8.097002983093262, |
|
"eval_rewards/rejected": -3.7998366355895996, |
|
"eval_runtime": 560.374, |
|
"eval_samples_per_second": 16.953, |
|
"eval_steps_per_second": 0.53, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5715904797884398e-07, |
|
"logits/chosen": -1.5076260566711426, |
|
"logits/rejected": -1.236230492591858, |
|
"logps/chosen": -348.450439453125, |
|
"logps/rejected": -499.9619140625, |
|
"loss": 0.0848, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.0965800285339355, |
|
"rewards/margins": 7.605328559875488, |
|
"rewards/rejected": -3.5087478160858154, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5527011711371363e-07, |
|
"logits/chosen": -1.529317855834961, |
|
"logits/rejected": -1.2510004043579102, |
|
"logps/chosen": -339.26165771484375, |
|
"logps/rejected": -693.9097290039062, |
|
"loss": 0.0382, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.7485551834106445, |
|
"rewards/margins": 8.082775115966797, |
|
"rewards/rejected": -3.3342204093933105, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.533811862485833e-07, |
|
"logits/chosen": -1.5148240327835083, |
|
"logits/rejected": -1.199103593826294, |
|
"logps/chosen": -385.8512878417969, |
|
"logps/rejected": -639.3033447265625, |
|
"loss": 0.0424, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.181856155395508, |
|
"rewards/margins": 7.878443717956543, |
|
"rewards/rejected": -3.6965866088867188, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5149225538345294e-07, |
|
"logits/chosen": -1.5198280811309814, |
|
"logits/rejected": -1.1935245990753174, |
|
"logps/chosen": -348.9425964355469, |
|
"logps/rejected": -459.17041015625, |
|
"loss": 0.0394, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.525935173034668, |
|
"rewards/margins": 7.647967338562012, |
|
"rewards/rejected": -3.122032403945923, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.496033245183226e-07, |
|
"logits/chosen": -1.5460079908370972, |
|
"logits/rejected": -1.1968626976013184, |
|
"logps/chosen": -328.58251953125, |
|
"logps/rejected": -633.262451171875, |
|
"loss": 0.0466, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.540610313415527, |
|
"rewards/margins": 8.390274047851562, |
|
"rewards/rejected": -3.849663496017456, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.477143936531923e-07, |
|
"logits/chosen": -1.5178780555725098, |
|
"logits/rejected": -1.214658498764038, |
|
"logps/chosen": -476.41876220703125, |
|
"logps/rejected": -339.6917419433594, |
|
"loss": 0.0619, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.9794323444366455, |
|
"rewards/margins": 7.556540489196777, |
|
"rewards/rejected": -3.5771079063415527, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4582546278806196e-07, |
|
"logits/chosen": -1.520281195640564, |
|
"logits/rejected": -1.2420076131820679, |
|
"logps/chosen": -379.0602111816406, |
|
"logps/rejected": -473.5738830566406, |
|
"loss": 0.0404, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.220221042633057, |
|
"rewards/margins": 7.9052581787109375, |
|
"rewards/rejected": -3.685037612915039, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4393653192293162e-07, |
|
"logits/chosen": -1.5194097757339478, |
|
"logits/rejected": -1.2116343975067139, |
|
"logps/chosen": -409.2084655761719, |
|
"logps/rejected": -558.417236328125, |
|
"loss": 0.0416, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.273309230804443, |
|
"rewards/margins": 7.955672264099121, |
|
"rewards/rejected": -3.6823630332946777, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4204760105780127e-07, |
|
"logits/chosen": -1.5045769214630127, |
|
"logits/rejected": -1.2302758693695068, |
|
"logps/chosen": -392.59149169921875, |
|
"logps/rejected": -547.0234985351562, |
|
"loss": 0.046, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.600020408630371, |
|
"rewards/margins": 8.4217529296875, |
|
"rewards/rejected": -3.8217320442199707, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4015867019267093e-07, |
|
"logits/chosen": -1.4690983295440674, |
|
"logits/rejected": -1.182051658630371, |
|
"logps/chosen": -650.6025390625, |
|
"logps/rejected": -511.93035888671875, |
|
"loss": 0.0475, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 3.5337417125701904, |
|
"rewards/margins": 7.234931945800781, |
|
"rewards/rejected": -3.7011895179748535, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_logits/chosen": -1.517708420753479, |
|
"eval_logits/rejected": -1.1991840600967407, |
|
"eval_logps/chosen": -376.849365234375, |
|
"eval_logps/rejected": -596.0023803710938, |
|
"eval_loss": 0.04422454535961151, |
|
"eval_rewards/accuracies": 0.9840067625045776, |
|
"eval_rewards/chosen": 4.307338714599609, |
|
"eval_rewards/margins": 7.955935478210449, |
|
"eval_rewards/rejected": -3.648597002029419, |
|
"eval_runtime": 561.4605, |
|
"eval_samples_per_second": 16.92, |
|
"eval_steps_per_second": 0.529, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.382697393275406e-07, |
|
"logits/chosen": -1.4882314205169678, |
|
"logits/rejected": -1.2919832468032837, |
|
"logps/chosen": -555.08984375, |
|
"logps/rejected": -622.5364379882812, |
|
"loss": 0.0606, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.7566609382629395, |
|
"rewards/margins": 7.16671895980835, |
|
"rewards/rejected": -3.4100584983825684, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3638080846241027e-07, |
|
"logits/chosen": -1.5286386013031006, |
|
"logits/rejected": -1.2888312339782715, |
|
"logps/chosen": -284.7803039550781, |
|
"logps/rejected": -523.2872314453125, |
|
"loss": 0.0565, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.4519944190979, |
|
"rewards/margins": 7.86702823638916, |
|
"rewards/rejected": -3.4150338172912598, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3449187759727992e-07, |
|
"logits/chosen": -1.527421236038208, |
|
"logits/rejected": -1.2638188600540161, |
|
"logps/chosen": -299.5697937011719, |
|
"logps/rejected": -468.88641357421875, |
|
"loss": 0.0418, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.214964389801025, |
|
"rewards/margins": 7.762864589691162, |
|
"rewards/rejected": -3.547900676727295, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3260294673214958e-07, |
|
"logits/chosen": -1.5077273845672607, |
|
"logits/rejected": -1.2305810451507568, |
|
"logps/chosen": -315.0970153808594, |
|
"logps/rejected": -543.9883422851562, |
|
"loss": 0.0428, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.3515095710754395, |
|
"rewards/margins": 8.111806869506836, |
|
"rewards/rejected": -3.760296583175659, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3071401586701926e-07, |
|
"logits/chosen": -1.5324013233184814, |
|
"logits/rejected": -1.176598310470581, |
|
"logps/chosen": -350.73590087890625, |
|
"logps/rejected": -407.3160705566406, |
|
"loss": 0.0444, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.385611534118652, |
|
"rewards/margins": 8.232316970825195, |
|
"rewards/rejected": -3.8467063903808594, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2882508500188894e-07, |
|
"logits/chosen": -1.53853440284729, |
|
"logits/rejected": -1.1605119705200195, |
|
"logps/chosen": -322.6458740234375, |
|
"logps/rejected": -577.635009765625, |
|
"loss": 0.0612, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.1477580070495605, |
|
"rewards/margins": 8.158384323120117, |
|
"rewards/rejected": -4.010627746582031, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.269361541367586e-07, |
|
"logits/chosen": -1.4969431161880493, |
|
"logits/rejected": -1.2394483089447021, |
|
"logps/chosen": -429.3595275878906, |
|
"logps/rejected": -534.6132202148438, |
|
"loss": 0.0514, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.626293659210205, |
|
"rewards/margins": 8.510354995727539, |
|
"rewards/rejected": -3.8840622901916504, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2504722327162826e-07, |
|
"logits/chosen": -1.5438110828399658, |
|
"logits/rejected": -1.3265063762664795, |
|
"logps/chosen": -336.172607421875, |
|
"logps/rejected": -555.7020263671875, |
|
"loss": 0.0643, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 4.796633243560791, |
|
"rewards/margins": 8.188325881958008, |
|
"rewards/rejected": -3.391692638397217, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.231582924064979e-07, |
|
"logits/chosen": -1.534425973892212, |
|
"logits/rejected": -1.2709665298461914, |
|
"logps/chosen": -331.15771484375, |
|
"logps/rejected": -599.520751953125, |
|
"loss": 0.0743, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.660555362701416, |
|
"rewards/margins": 7.926393985748291, |
|
"rewards/rejected": -3.265838146209717, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2126936154136757e-07, |
|
"logits/chosen": -1.5267812013626099, |
|
"logits/rejected": -1.2841061353683472, |
|
"logps/chosen": -363.42718505859375, |
|
"logps/rejected": -614.720947265625, |
|
"loss": 0.0407, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.113651752471924, |
|
"rewards/margins": 7.806565284729004, |
|
"rewards/rejected": -3.69291353225708, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -1.5241639614105225, |
|
"eval_logits/rejected": -1.207804799079895, |
|
"eval_logps/chosen": -376.9122009277344, |
|
"eval_logps/rejected": -597.497802734375, |
|
"eval_loss": 0.04077613726258278, |
|
"eval_rewards/accuracies": 0.9882155060768127, |
|
"eval_rewards/chosen": 4.301055908203125, |
|
"eval_rewards/margins": 8.099197387695312, |
|
"eval_rewards/rejected": -3.798142194747925, |
|
"eval_runtime": 561.0154, |
|
"eval_samples_per_second": 16.934, |
|
"eval_steps_per_second": 0.529, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1938043067623725e-07, |
|
"logits/chosen": -1.5032380819320679, |
|
"logits/rejected": -1.2603862285614014, |
|
"logps/chosen": -468.38519287109375, |
|
"logps/rejected": -603.046142578125, |
|
"loss": 0.0583, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.9953246116638184, |
|
"rewards/margins": 7.673506259918213, |
|
"rewards/rejected": -3.6781811714172363, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1749149981110692e-07, |
|
"logits/chosen": -1.5227621793746948, |
|
"logits/rejected": -1.294641137123108, |
|
"logps/chosen": -368.20526123046875, |
|
"logps/rejected": -515.7191162109375, |
|
"loss": 0.0287, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 4.12636661529541, |
|
"rewards/margins": 7.043761253356934, |
|
"rewards/rejected": -2.9173948764801025, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1560256894597657e-07, |
|
"logits/chosen": -1.4953995943069458, |
|
"logits/rejected": -1.1584880352020264, |
|
"logps/chosen": -586.2529296875, |
|
"logps/rejected": -516.81591796875, |
|
"loss": 0.0496, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.4468929767608643, |
|
"rewards/margins": 7.300196647644043, |
|
"rewards/rejected": -3.853304386138916, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1371363808084623e-07, |
|
"logits/chosen": -1.511156439781189, |
|
"logits/rejected": -1.1865122318267822, |
|
"logps/chosen": -372.8666076660156, |
|
"logps/rejected": -906.1705932617188, |
|
"loss": 0.032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.395539283752441, |
|
"rewards/margins": 8.54565715789795, |
|
"rewards/rejected": -4.150118827819824, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.118247072157159e-07, |
|
"logits/chosen": -1.5099351406097412, |
|
"logits/rejected": -1.203018307685852, |
|
"logps/chosen": -348.9670715332031, |
|
"logps/rejected": -429.0040588378906, |
|
"loss": 0.0443, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.664444923400879, |
|
"rewards/margins": 8.25013542175293, |
|
"rewards/rejected": -3.5856919288635254, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0993577635058557e-07, |
|
"logits/chosen": -1.5239416360855103, |
|
"logits/rejected": -1.1614112854003906, |
|
"logps/chosen": -306.3650817871094, |
|
"logps/rejected": -499.98553466796875, |
|
"loss": 0.0506, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.466089725494385, |
|
"rewards/margins": 8.765016555786133, |
|
"rewards/rejected": -4.298927307128906, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0804684548545522e-07, |
|
"logits/chosen": -1.4973338842391968, |
|
"logits/rejected": -1.2353532314300537, |
|
"logps/chosen": -363.0428771972656, |
|
"logps/rejected": -688.9337158203125, |
|
"loss": 0.0569, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.117824554443359, |
|
"rewards/margins": 7.465353488922119, |
|
"rewards/rejected": -3.3475289344787598, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0615791462032489e-07, |
|
"logits/chosen": -1.4992105960845947, |
|
"logits/rejected": -1.1850025653839111, |
|
"logps/chosen": -390.51898193359375, |
|
"logps/rejected": -471.59246826171875, |
|
"loss": 0.0359, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.290133953094482, |
|
"rewards/margins": 8.444581985473633, |
|
"rewards/rejected": -4.154448509216309, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0426898375519455e-07, |
|
"logits/chosen": -1.5164746046066284, |
|
"logits/rejected": -1.2176775932312012, |
|
"logps/chosen": -386.88580322265625, |
|
"logps/rejected": -695.19873046875, |
|
"loss": 0.0521, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.442442893981934, |
|
"rewards/margins": 8.452461242675781, |
|
"rewards/rejected": -4.010018348693848, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0238005289006423e-07, |
|
"logits/chosen": -1.5462114810943604, |
|
"logits/rejected": -1.2368415594100952, |
|
"logps/chosen": -341.13232421875, |
|
"logps/rejected": -573.03857421875, |
|
"loss": 0.0386, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.52898645401001, |
|
"rewards/margins": 7.922593593597412, |
|
"rewards/rejected": -3.3936073780059814, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_logits/chosen": -1.5132849216461182, |
|
"eval_logits/rejected": -1.2028939723968506, |
|
"eval_logps/chosen": -376.4996337890625, |
|
"eval_logps/rejected": -596.8302001953125, |
|
"eval_loss": 0.03966302424669266, |
|
"eval_rewards/accuracies": 0.9882155060768127, |
|
"eval_rewards/chosen": 4.342313289642334, |
|
"eval_rewards/margins": 8.073698997497559, |
|
"eval_rewards/rejected": -3.731386184692383, |
|
"eval_runtime": 559.9916, |
|
"eval_samples_per_second": 16.965, |
|
"eval_steps_per_second": 0.53, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0049112202493389e-07, |
|
"logits/chosen": -1.5179309844970703, |
|
"logits/rejected": -1.190763235092163, |
|
"logps/chosen": -344.33892822265625, |
|
"logps/rejected": -483.5990295410156, |
|
"loss": 0.0401, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 4.271202564239502, |
|
"rewards/margins": 7.67493200302124, |
|
"rewards/rejected": -3.4037303924560547, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.860219115980354e-08, |
|
"logits/chosen": -1.501859426498413, |
|
"logits/rejected": -1.2754067182540894, |
|
"logps/chosen": -424.81298828125, |
|
"logps/rejected": -500.36175537109375, |
|
"loss": 0.0512, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.149248123168945, |
|
"rewards/margins": 7.505955696105957, |
|
"rewards/rejected": -3.3567073345184326, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.671326029467321e-08, |
|
"logits/chosen": -1.5037426948547363, |
|
"logits/rejected": -1.284251093864441, |
|
"logps/chosen": -380.0538024902344, |
|
"logps/rejected": -624.1697387695312, |
|
"loss": 0.054, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.587218284606934, |
|
"rewards/margins": 8.128026008605957, |
|
"rewards/rejected": -3.5408072471618652, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.482432942954287e-08, |
|
"logits/chosen": -1.5006518363952637, |
|
"logits/rejected": -1.248807668685913, |
|
"logps/chosen": -396.10888671875, |
|
"logps/rejected": -717.84033203125, |
|
"loss": 0.0336, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.7980237007141113, |
|
"rewards/margins": 7.6149582862854, |
|
"rewards/rejected": -3.816934108734131, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.293539856441255e-08, |
|
"logits/chosen": -1.5265161991119385, |
|
"logits/rejected": -1.211663007736206, |
|
"logps/chosen": -327.98809814453125, |
|
"logps/rejected": -630.8543701171875, |
|
"loss": 0.0388, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.494044303894043, |
|
"rewards/margins": 7.565443515777588, |
|
"rewards/rejected": -3.0713984966278076, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.10464676992822e-08, |
|
"logits/chosen": -1.5001169443130493, |
|
"logits/rejected": -1.2648394107818604, |
|
"logps/chosen": -304.59185791015625, |
|
"logps/rejected": -596.6958618164062, |
|
"loss": 0.049, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.436002254486084, |
|
"rewards/margins": 7.953620910644531, |
|
"rewards/rejected": -3.517618179321289, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.915753683415186e-08, |
|
"logits/chosen": -1.5231083631515503, |
|
"logits/rejected": -1.221145749092102, |
|
"logps/chosen": -391.0690612792969, |
|
"logps/rejected": -552.14208984375, |
|
"loss": 0.0472, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.607051849365234, |
|
"rewards/margins": 8.291250228881836, |
|
"rewards/rejected": -3.6841976642608643, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.726860596902153e-08, |
|
"logits/chosen": -1.4747313261032104, |
|
"logits/rejected": -1.266494631767273, |
|
"logps/chosen": -392.5005798339844, |
|
"logps/rejected": -545.3479614257812, |
|
"loss": 0.0409, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.316119194030762, |
|
"rewards/margins": 7.8105034828186035, |
|
"rewards/rejected": -3.494384288787842, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.53796751038912e-08, |
|
"logits/chosen": -1.5253236293792725, |
|
"logits/rejected": -1.2925410270690918, |
|
"logps/chosen": -378.5972900390625, |
|
"logps/rejected": -446.5828552246094, |
|
"loss": 0.04, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.2583818435668945, |
|
"rewards/margins": 7.742008209228516, |
|
"rewards/rejected": -3.4836268424987793, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.349074423876085e-08, |
|
"logits/chosen": -1.4970636367797852, |
|
"logits/rejected": -1.173380970954895, |
|
"logps/chosen": -453.74249267578125, |
|
"logps/rejected": -443.0990295410156, |
|
"loss": 0.0504, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.253788948059082, |
|
"rewards/margins": 8.546039581298828, |
|
"rewards/rejected": -4.292250156402588, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_logits/chosen": -1.5187809467315674, |
|
"eval_logits/rejected": -1.2023788690567017, |
|
"eval_logps/chosen": -376.1911926269531, |
|
"eval_logps/rejected": -597.20654296875, |
|
"eval_loss": 0.03895895555615425, |
|
"eval_rewards/accuracies": 0.9856902360916138, |
|
"eval_rewards/chosen": 4.3731584548950195, |
|
"eval_rewards/margins": 8.142176628112793, |
|
"eval_rewards/rejected": -3.7690184116363525, |
|
"eval_runtime": 560.5484, |
|
"eval_samples_per_second": 16.948, |
|
"eval_steps_per_second": 0.53, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.160181337363052e-08, |
|
"logits/chosen": -1.5079090595245361, |
|
"logits/rejected": -1.218942403793335, |
|
"logps/chosen": -400.8089904785156, |
|
"logps/rejected": -563.3532104492188, |
|
"loss": 0.0373, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.257572650909424, |
|
"rewards/margins": 7.628092288970947, |
|
"rewards/rejected": -3.3705201148986816, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.971288250850018e-08, |
|
"logits/chosen": -1.5301258563995361, |
|
"logits/rejected": -1.2807587385177612, |
|
"logps/chosen": -315.27337646484375, |
|
"logps/rejected": -816.6290893554688, |
|
"loss": 0.0417, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.818511009216309, |
|
"rewards/margins": 8.743834495544434, |
|
"rewards/rejected": -3.925323486328125, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.782395164336985e-08, |
|
"logits/chosen": -1.5073165893554688, |
|
"logits/rejected": -1.2790597677230835, |
|
"logps/chosen": -434.49267578125, |
|
"logps/rejected": -593.9933471679688, |
|
"loss": 0.0468, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.8378801345825195, |
|
"rewards/margins": 7.976640224456787, |
|
"rewards/rejected": -3.1387598514556885, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.593502077823952e-08, |
|
"logits/chosen": -1.503222942352295, |
|
"logits/rejected": -1.2196729183197021, |
|
"logps/chosen": -313.89654541015625, |
|
"logps/rejected": -462.62078857421875, |
|
"loss": 0.0303, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.41795015335083, |
|
"rewards/margins": 7.8886284828186035, |
|
"rewards/rejected": -3.4706790447235107, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.404608991310917e-08, |
|
"logits/chosen": -1.5052754878997803, |
|
"logits/rejected": -1.2386561632156372, |
|
"logps/chosen": -372.96209716796875, |
|
"logps/rejected": -585.10791015625, |
|
"loss": 0.0583, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.49515438079834, |
|
"rewards/margins": 8.383849143981934, |
|
"rewards/rejected": -3.888695478439331, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.215715904797884e-08, |
|
"logits/chosen": -1.525687336921692, |
|
"logits/rejected": -1.1762199401855469, |
|
"logps/chosen": -317.4388122558594, |
|
"logps/rejected": -654.0875854492188, |
|
"loss": 0.0313, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.393351078033447, |
|
"rewards/margins": 8.096270561218262, |
|
"rewards/rejected": -3.7029201984405518, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.02682281828485e-08, |
|
"logits/chosen": -1.520235300064087, |
|
"logits/rejected": -1.1894917488098145, |
|
"logps/chosen": -396.32781982421875, |
|
"logps/rejected": -622.02783203125, |
|
"loss": 0.061, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.850032329559326, |
|
"rewards/margins": 7.790719032287598, |
|
"rewards/rejected": -3.940687656402588, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.837929731771818e-08, |
|
"logits/chosen": -1.5212862491607666, |
|
"logits/rejected": -1.2499208450317383, |
|
"logps/chosen": -376.70684814453125, |
|
"logps/rejected": -389.7237854003906, |
|
"loss": 0.0437, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.127648830413818, |
|
"rewards/margins": 7.433091640472412, |
|
"rewards/rejected": -3.305443286895752, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.649036645258783e-08, |
|
"logits/chosen": -1.517173171043396, |
|
"logits/rejected": -1.1914104223251343, |
|
"logps/chosen": -444.6543884277344, |
|
"logps/rejected": -365.63848876953125, |
|
"loss": 0.047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.557802677154541, |
|
"rewards/margins": 8.270492553710938, |
|
"rewards/rejected": -3.7126896381378174, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.460143558745749e-08, |
|
"logits/chosen": -1.5273593664169312, |
|
"logits/rejected": -1.1451303958892822, |
|
"logps/chosen": -325.8979797363281, |
|
"logps/rejected": -582.1531982421875, |
|
"loss": 0.0402, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.247693061828613, |
|
"rewards/margins": 8.258806228637695, |
|
"rewards/rejected": -4.011113166809082, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_logits/chosen": -1.5157567262649536, |
|
"eval_logits/rejected": -1.1977304220199585, |
|
"eval_logps/chosen": -376.56494140625, |
|
"eval_logps/rejected": -597.8150024414062, |
|
"eval_loss": 0.037716832011938095, |
|
"eval_rewards/accuracies": 0.9865319728851318, |
|
"eval_rewards/chosen": 4.33577823638916, |
|
"eval_rewards/margins": 8.16563892364502, |
|
"eval_rewards/rejected": -3.8298606872558594, |
|
"eval_runtime": 560.9406, |
|
"eval_samples_per_second": 16.936, |
|
"eval_steps_per_second": 0.529, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.271250472232716e-08, |
|
"logits/chosen": -1.5143485069274902, |
|
"logits/rejected": -1.3070073127746582, |
|
"logps/chosen": -389.19464111328125, |
|
"logps/rejected": -658.4906616210938, |
|
"loss": 0.0493, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.09715461730957, |
|
"rewards/margins": 7.326831817626953, |
|
"rewards/rejected": -3.229677200317383, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.082357385719683e-08, |
|
"logits/chosen": -1.5194426774978638, |
|
"logits/rejected": -1.338354229927063, |
|
"logps/chosen": -472.4857482910156, |
|
"logps/rejected": -520.1239013671875, |
|
"loss": 0.0476, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.095303535461426, |
|
"rewards/margins": 7.620616912841797, |
|
"rewards/rejected": -3.525313138961792, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.893464299206649e-08, |
|
"logits/chosen": -1.5334171056747437, |
|
"logits/rejected": -1.2070086002349854, |
|
"logps/chosen": -345.03106689453125, |
|
"logps/rejected": -601.37060546875, |
|
"loss": 0.0327, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.165444374084473, |
|
"rewards/margins": 7.924208641052246, |
|
"rewards/rejected": -3.7587637901306152, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.704571212693615e-08, |
|
"logits/chosen": -1.5042918920516968, |
|
"logits/rejected": -1.2301785945892334, |
|
"logps/chosen": -357.8497314453125, |
|
"logps/rejected": -494.36944580078125, |
|
"loss": 0.0457, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.3990867137908936, |
|
"rewards/margins": 6.8430280685424805, |
|
"rewards/rejected": -3.443941593170166, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.5156781261805816e-08, |
|
"logits/chosen": -1.5347890853881836, |
|
"logits/rejected": -1.2450568675994873, |
|
"logps/chosen": -290.27862548828125, |
|
"logps/rejected": -452.07562255859375, |
|
"loss": 0.0342, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.202007293701172, |
|
"rewards/margins": 7.855565547943115, |
|
"rewards/rejected": -3.6535582542419434, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.326785039667548e-08, |
|
"logits/chosen": -1.5297716856002808, |
|
"logits/rejected": -1.1971065998077393, |
|
"logps/chosen": -395.7029724121094, |
|
"logps/rejected": -416.86810302734375, |
|
"loss": 0.0418, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.438989639282227, |
|
"rewards/margins": 8.2936372756958, |
|
"rewards/rejected": -3.854647159576416, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.137891953154514e-08, |
|
"logits/chosen": -1.5386561155319214, |
|
"logits/rejected": -1.268654704093933, |
|
"logps/chosen": -334.1921691894531, |
|
"logps/rejected": -703.80908203125, |
|
"loss": 0.0391, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 5.104978084564209, |
|
"rewards/margins": 8.777002334594727, |
|
"rewards/rejected": -3.6720242500305176, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.948998866641481e-08, |
|
"logits/chosen": -1.5136685371398926, |
|
"logits/rejected": -1.2055470943450928, |
|
"logps/chosen": -393.09869384765625, |
|
"logps/rejected": -756.4305419921875, |
|
"loss": 0.0371, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.0205078125, |
|
"rewards/margins": 8.307108879089355, |
|
"rewards/rejected": -4.2866010665893555, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.760105780128447e-08, |
|
"logits/chosen": -1.5168330669403076, |
|
"logits/rejected": -1.1547820568084717, |
|
"logps/chosen": -425.5591735839844, |
|
"logps/rejected": -774.383544921875, |
|
"loss": 0.0347, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.134261131286621, |
|
"rewards/margins": 8.559054374694824, |
|
"rewards/rejected": -4.424793720245361, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.5712126936154134e-08, |
|
"logits/chosen": -1.5314137935638428, |
|
"logits/rejected": -1.2539647817611694, |
|
"logps/chosen": -260.22467041015625, |
|
"logps/rejected": -489.43182373046875, |
|
"loss": 0.038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.627935409545898, |
|
"rewards/margins": 8.63275146484375, |
|
"rewards/rejected": -4.00481653213501, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_logits/chosen": -1.5139025449752808, |
|
"eval_logits/rejected": -1.2032972574234009, |
|
"eval_logps/chosen": -376.6385803222656, |
|
"eval_logps/rejected": -597.8989868164062, |
|
"eval_loss": 0.039693351835012436, |
|
"eval_rewards/accuracies": 0.9890572428703308, |
|
"eval_rewards/chosen": 4.328419208526611, |
|
"eval_rewards/margins": 8.166685104370117, |
|
"eval_rewards/rejected": -3.838265895843506, |
|
"eval_runtime": 558.6431, |
|
"eval_samples_per_second": 17.005, |
|
"eval_steps_per_second": 0.532, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.3823196071023796e-08, |
|
"logits/chosen": -1.5291705131530762, |
|
"logits/rejected": -1.2298305034637451, |
|
"logps/chosen": -323.2745056152344, |
|
"logps/rejected": -548.8238525390625, |
|
"loss": 0.0424, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.2751970291137695, |
|
"rewards/margins": 8.485635757446289, |
|
"rewards/rejected": -4.2104387283325195, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.1934265205893465e-08, |
|
"logits/chosen": -1.503124475479126, |
|
"logits/rejected": -1.2478026151657104, |
|
"logps/chosen": -480.8046875, |
|
"logps/rejected": -691.4312133789062, |
|
"loss": 0.0365, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.418412208557129, |
|
"rewards/margins": 8.399371147155762, |
|
"rewards/rejected": -3.980959415435791, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.004533434076313e-08, |
|
"logits/chosen": -1.5199840068817139, |
|
"logits/rejected": -1.2516810894012451, |
|
"logps/chosen": -318.7385559082031, |
|
"logps/rejected": -670.9912719726562, |
|
"loss": 0.0378, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.218963623046875, |
|
"rewards/margins": 7.438061714172363, |
|
"rewards/rejected": -3.21909761428833, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.815640347563279e-08, |
|
"logits/chosen": -1.4905387163162231, |
|
"logits/rejected": -1.271468162536621, |
|
"logps/chosen": -392.78240966796875, |
|
"logps/rejected": -901.37548828125, |
|
"loss": 0.0291, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 3.8263752460479736, |
|
"rewards/margins": 7.711263179779053, |
|
"rewards/rejected": -3.8848884105682373, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.626747261050245e-08, |
|
"logits/chosen": -1.5208760499954224, |
|
"logits/rejected": -1.2187827825546265, |
|
"logps/chosen": -320.1753845214844, |
|
"logps/rejected": -740.0049438476562, |
|
"loss": 0.0378, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.149036884307861, |
|
"rewards/margins": 8.07148551940918, |
|
"rewards/rejected": -3.9224491119384766, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.4378541745372115e-08, |
|
"logits/chosen": -1.5195062160491943, |
|
"logits/rejected": -1.2230064868927002, |
|
"logps/chosen": -427.1581115722656, |
|
"logps/rejected": -645.3033447265625, |
|
"loss": 0.0417, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.26482629776001, |
|
"rewards/margins": 8.393260955810547, |
|
"rewards/rejected": -4.128435134887695, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.2489610880241784e-08, |
|
"logits/chosen": -1.525866150856018, |
|
"logits/rejected": -1.2397754192352295, |
|
"logps/chosen": -306.03778076171875, |
|
"logps/rejected": -587.6482543945312, |
|
"loss": 0.0332, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.998318672180176, |
|
"rewards/margins": 8.605379104614258, |
|
"rewards/rejected": -3.6070590019226074, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.0600680015111446e-08, |
|
"logits/chosen": -1.516852617263794, |
|
"logits/rejected": -1.1658298969268799, |
|
"logps/chosen": -312.1228942871094, |
|
"logps/rejected": -346.4730224609375, |
|
"loss": 0.0445, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.162473678588867, |
|
"rewards/margins": 8.016084671020508, |
|
"rewards/rejected": -3.853611469268799, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.871174914998111e-08, |
|
"logits/chosen": -1.524436354637146, |
|
"logits/rejected": -1.1867830753326416, |
|
"logps/chosen": -326.0414123535156, |
|
"logps/rejected": -485.95166015625, |
|
"loss": 0.0375, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.58970308303833, |
|
"rewards/margins": 8.010432243347168, |
|
"rewards/rejected": -3.420729875564575, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.682281828485077e-08, |
|
"logits/chosen": -1.520810842514038, |
|
"logits/rejected": -1.1647284030914307, |
|
"logps/chosen": -333.65008544921875, |
|
"logps/rejected": -827.0930786132812, |
|
"loss": 0.0527, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.306121349334717, |
|
"rewards/margins": 8.471829414367676, |
|
"rewards/rejected": -4.165709018707275, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_logits/chosen": -1.5196325778961182, |
|
"eval_logits/rejected": -1.2036585807800293, |
|
"eval_logps/chosen": -376.93743896484375, |
|
"eval_logps/rejected": -598.0059204101562, |
|
"eval_loss": 0.0383492186665535, |
|
"eval_rewards/accuracies": 0.9856902360916138, |
|
"eval_rewards/chosen": 4.298529148101807, |
|
"eval_rewards/margins": 8.147479057312012, |
|
"eval_rewards/rejected": -3.848950147628784, |
|
"eval_runtime": 558.5691, |
|
"eval_samples_per_second": 17.008, |
|
"eval_steps_per_second": 0.532, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4933887419720436e-08, |
|
"logits/chosen": -1.5408833026885986, |
|
"logits/rejected": -1.2266209125518799, |
|
"logps/chosen": -303.4648132324219, |
|
"logps/rejected": -399.6147155761719, |
|
"loss": 0.0527, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.5173773765563965, |
|
"rewards/margins": 8.121113777160645, |
|
"rewards/rejected": -3.6037354469299316, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.30449565545901e-08, |
|
"logits/chosen": -1.5411306619644165, |
|
"logits/rejected": -1.2380434274673462, |
|
"logps/chosen": -333.23699951171875, |
|
"logps/rejected": -425.56365966796875, |
|
"loss": 0.0324, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.0661211013793945, |
|
"rewards/margins": 7.654998779296875, |
|
"rewards/rejected": -3.588876724243164, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1156025689459764e-08, |
|
"logits/chosen": -1.5202014446258545, |
|
"logits/rejected": -1.249887466430664, |
|
"logps/chosen": -415.40435791015625, |
|
"logps/rejected": -705.8953857421875, |
|
"loss": 0.0325, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.5467915534973145, |
|
"rewards/margins": 8.021774291992188, |
|
"rewards/rejected": -3.474982500076294, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.926709482432943e-08, |
|
"logits/chosen": -1.5168020725250244, |
|
"logits/rejected": -1.1783835887908936, |
|
"logps/chosen": -368.14337158203125, |
|
"logps/rejected": -583.8258056640625, |
|
"loss": 0.0346, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.783400058746338, |
|
"rewards/margins": 8.449257850646973, |
|
"rewards/rejected": -3.6658573150634766, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.7378163959199092e-08, |
|
"logits/chosen": -1.5222840309143066, |
|
"logits/rejected": -1.2157676219940186, |
|
"logps/chosen": -377.74932861328125, |
|
"logps/rejected": -491.2120056152344, |
|
"loss": 0.0368, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.284539222717285, |
|
"rewards/margins": 8.006728172302246, |
|
"rewards/rejected": -3.7221896648406982, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5489233094068758e-08, |
|
"logits/chosen": -1.5340877771377563, |
|
"logits/rejected": -1.2179819345474243, |
|
"logps/chosen": -341.79974365234375, |
|
"logps/rejected": -472.01611328125, |
|
"loss": 0.0316, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.628954887390137, |
|
"rewards/margins": 8.175573348999023, |
|
"rewards/rejected": -3.546616315841675, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3600302228938419e-08, |
|
"logits/chosen": -1.5086033344268799, |
|
"logits/rejected": -1.2281320095062256, |
|
"logps/chosen": -449.17156982421875, |
|
"logps/rejected": -668.3966064453125, |
|
"loss": 0.0593, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 4.049460411071777, |
|
"rewards/margins": 7.482198238372803, |
|
"rewards/rejected": -3.4327378273010254, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1711371363808084e-08, |
|
"logits/chosen": -1.533616304397583, |
|
"logits/rejected": -1.2686628103256226, |
|
"logps/chosen": -389.5843200683594, |
|
"logps/rejected": -875.8938598632812, |
|
"loss": 0.0386, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.17772102355957, |
|
"rewards/margins": 8.47387981414795, |
|
"rewards/rejected": -4.296158790588379, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.822440498677748e-09, |
|
"logits/chosen": -1.5015050172805786, |
|
"logits/rejected": -1.2158093452453613, |
|
"logps/chosen": -532.0431518554688, |
|
"logps/rejected": -657.7987670898438, |
|
"loss": 0.037, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 3.858596086502075, |
|
"rewards/margins": 7.628444671630859, |
|
"rewards/rejected": -3.769848346710205, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.933509633547412e-09, |
|
"logits/chosen": -1.5313704013824463, |
|
"logits/rejected": -1.2360942363739014, |
|
"logps/chosen": -339.72064208984375, |
|
"logps/rejected": -532.837158203125, |
|
"loss": 0.0365, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.4045090675354, |
|
"rewards/margins": 8.255632400512695, |
|
"rewards/rejected": -3.851123332977295, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": -1.5156338214874268, |
|
"eval_logits/rejected": -1.1996530294418335, |
|
"eval_logps/chosen": -376.83685302734375, |
|
"eval_logps/rejected": -597.8652954101562, |
|
"eval_loss": 0.03792084753513336, |
|
"eval_rewards/accuracies": 0.9873737096786499, |
|
"eval_rewards/chosen": 4.308588981628418, |
|
"eval_rewards/margins": 8.143476486206055, |
|
"eval_rewards/rejected": -3.8348886966705322, |
|
"eval_runtime": 561.0021, |
|
"eval_samples_per_second": 16.934, |
|
"eval_steps_per_second": 0.529, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.044578768417076e-09, |
|
"logits/chosen": -1.5284096002578735, |
|
"logits/rejected": -1.2779542207717896, |
|
"logps/chosen": -324.67864990234375, |
|
"logps/rejected": -710.6837768554688, |
|
"loss": 0.0286, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.9714195728302, |
|
"rewards/margins": 7.755260467529297, |
|
"rewards/rejected": -3.7838408946990967, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.15564790328674e-09, |
|
"logits/chosen": -1.5134741067886353, |
|
"logits/rejected": -1.2182211875915527, |
|
"logps/chosen": -336.8125915527344, |
|
"logps/rejected": -749.4577026367188, |
|
"loss": 0.028, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.103480815887451, |
|
"rewards/margins": 7.714101314544678, |
|
"rewards/rejected": -3.6106209754943848, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.2667170381564033e-09, |
|
"logits/chosen": -1.5044059753417969, |
|
"logits/rejected": -1.248711347579956, |
|
"logps/chosen": -317.59130859375, |
|
"logps/rejected": -411.42706298828125, |
|
"loss": 0.038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.9643845558166504, |
|
"rewards/margins": 7.606657981872559, |
|
"rewards/rejected": -3.64227294921875, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7778617302606723e-10, |
|
"logits/chosen": -1.4944725036621094, |
|
"logits/rejected": -1.208418369293213, |
|
"logps/chosen": -398.1802673339844, |
|
"logps/rejected": -528.9981689453125, |
|
"loss": 0.0369, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 4.150896072387695, |
|
"rewards/margins": 7.481464385986328, |
|
"rewards/rejected": -3.3305678367614746, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2942, |
|
"total_flos": 0.0, |
|
"train_loss": 0.11494619559330763, |
|
"train_runtime": 36321.6775, |
|
"train_samples_per_second": 5.184, |
|
"train_steps_per_second": 0.081 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2942, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|