|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1102, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009074410163339383, |
|
"grad_norm": 158.3370033102814, |
|
"learning_rate": 4.504504504504504e-09, |
|
"logits/chosen": -3.340815544128418, |
|
"logits/rejected": -3.192225456237793, |
|
"logps/chosen": -502.579833984375, |
|
"logps/rejected": -285.80780029296875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.009074410163339383, |
|
"grad_norm": 130.1834663583104, |
|
"learning_rate": 4.504504504504504e-08, |
|
"logits/chosen": -3.252624988555908, |
|
"logits/rejected": -3.2026939392089844, |
|
"logps/chosen": -445.1510925292969, |
|
"logps/rejected": -257.1930847167969, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.004173497203737497, |
|
"rewards/margins": -0.005616632290184498, |
|
"rewards/rejected": 0.0014431348536163568, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.018148820326678767, |
|
"grad_norm": 133.1160873140461, |
|
"learning_rate": 9.009009009009008e-08, |
|
"logits/chosen": -3.2142014503479004, |
|
"logits/rejected": -3.092050075531006, |
|
"logps/chosen": -358.178466796875, |
|
"logps/rejected": -255.81381225585938, |
|
"loss": 0.6818, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.02352432906627655, |
|
"rewards/margins": 0.018155094236135483, |
|
"rewards/rejected": 0.005369235761463642, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02722323049001815, |
|
"grad_norm": 105.51382523990524, |
|
"learning_rate": 1.3513513513513515e-07, |
|
"logits/chosen": -3.195345163345337, |
|
"logits/rejected": -3.0827088356018066, |
|
"logps/chosen": -436.8555603027344, |
|
"logps/rejected": -256.4704284667969, |
|
"loss": 0.6224, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.1778702437877655, |
|
"rewards/margins": 0.18295857310295105, |
|
"rewards/rejected": -0.005088324658572674, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.036297640653357534, |
|
"grad_norm": 60.7347284030573, |
|
"learning_rate": 1.8018018018018017e-07, |
|
"logits/chosen": -3.2239010334014893, |
|
"logits/rejected": -3.095165729522705, |
|
"logps/chosen": -457.87548828125, |
|
"logps/rejected": -252.0406494140625, |
|
"loss": 0.512, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.5447695851325989, |
|
"rewards/margins": 0.5851655006408691, |
|
"rewards/rejected": -0.04039595648646355, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.045372050816696916, |
|
"grad_norm": 54.695722961592466, |
|
"learning_rate": 2.2522522522522522e-07, |
|
"logits/chosen": -3.176532506942749, |
|
"logits/rejected": -3.0848500728607178, |
|
"logps/chosen": -388.94012451171875, |
|
"logps/rejected": -237.63821411132812, |
|
"loss": 0.4294, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.0301157236099243, |
|
"rewards/margins": 1.13114595413208, |
|
"rewards/rejected": -0.10103032737970352, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0544464609800363, |
|
"grad_norm": 47.472511720710024, |
|
"learning_rate": 2.702702702702703e-07, |
|
"logits/chosen": -3.2208728790283203, |
|
"logits/rejected": -3.113288640975952, |
|
"logps/chosen": -359.95257568359375, |
|
"logps/rejected": -240.6640625, |
|
"loss": 0.3796, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 1.4194958209991455, |
|
"rewards/margins": 1.7440767288208008, |
|
"rewards/rejected": -0.3245808184146881, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06352087114337568, |
|
"grad_norm": 39.52379333696054, |
|
"learning_rate": 3.153153153153153e-07, |
|
"logits/chosen": -3.226762056350708, |
|
"logits/rejected": -3.15112042427063, |
|
"logps/chosen": -369.8200378417969, |
|
"logps/rejected": -225.3522491455078, |
|
"loss": 0.3449, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.488443374633789, |
|
"rewards/margins": 1.912201166152954, |
|
"rewards/rejected": -0.4237578511238098, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07259528130671507, |
|
"grad_norm": 44.60507224682583, |
|
"learning_rate": 3.6036036036036033e-07, |
|
"logits/chosen": -3.221963882446289, |
|
"logits/rejected": -3.1375503540039062, |
|
"logps/chosen": -367.0074462890625, |
|
"logps/rejected": -247.7465362548828, |
|
"loss": 0.3376, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 1.3955223560333252, |
|
"rewards/margins": 1.9550836086273193, |
|
"rewards/rejected": -0.5595611929893494, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08166969147005444, |
|
"grad_norm": 39.08954176751689, |
|
"learning_rate": 4.054054054054054e-07, |
|
"logits/chosen": -3.2832207679748535, |
|
"logits/rejected": -3.2185757160186768, |
|
"logps/chosen": -425.96160888671875, |
|
"logps/rejected": -284.1032409667969, |
|
"loss": 0.2831, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.7428745031356812, |
|
"rewards/margins": 2.6255106925964355, |
|
"rewards/rejected": -0.8826361894607544, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09074410163339383, |
|
"grad_norm": 31.865193019318827, |
|
"learning_rate": 4.5045045045045043e-07, |
|
"logits/chosen": -3.2463021278381348, |
|
"logits/rejected": -3.1695780754089355, |
|
"logps/chosen": -405.6779479980469, |
|
"logps/rejected": -280.05279541015625, |
|
"loss": 0.2737, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 1.48948073387146, |
|
"rewards/margins": 2.4863460063934326, |
|
"rewards/rejected": -0.9968653917312622, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0998185117967332, |
|
"grad_norm": 37.24886466287313, |
|
"learning_rate": 4.954954954954955e-07, |
|
"logits/chosen": -3.3158822059631348, |
|
"logits/rejected": -3.2599291801452637, |
|
"logps/chosen": -335.0594177246094, |
|
"logps/rejected": -253.5969696044922, |
|
"loss": 0.2908, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 1.3872979879379272, |
|
"rewards/margins": 2.4369797706604004, |
|
"rewards/rejected": -1.0496819019317627, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1088929219600726, |
|
"grad_norm": 36.665353860098634, |
|
"learning_rate": 4.99898253844669e-07, |
|
"logits/chosen": -3.3115856647491455, |
|
"logits/rejected": -3.2060322761535645, |
|
"logps/chosen": -380.2586975097656, |
|
"logps/rejected": -309.19830322265625, |
|
"loss": 0.2488, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 1.6321680545806885, |
|
"rewards/margins": 3.540356397628784, |
|
"rewards/rejected": -1.9081882238388062, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11796733212341198, |
|
"grad_norm": 37.06555340402035, |
|
"learning_rate": 4.995466450646198e-07, |
|
"logits/chosen": -3.3821232318878174, |
|
"logits/rejected": -3.252227783203125, |
|
"logps/chosen": -381.7076110839844, |
|
"logps/rejected": -268.3893737792969, |
|
"loss": 0.2668, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.502217173576355, |
|
"rewards/margins": 3.5684256553649902, |
|
"rewards/rejected": -2.066208600997925, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.12704174228675136, |
|
"grad_norm": 38.75162651406194, |
|
"learning_rate": 4.989442707764628e-07, |
|
"logits/chosen": -3.352313995361328, |
|
"logits/rejected": -3.257378101348877, |
|
"logps/chosen": -374.43670654296875, |
|
"logps/rejected": -287.6412658691406, |
|
"loss": 0.2587, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.585739016532898, |
|
"rewards/margins": 3.8027961254119873, |
|
"rewards/rejected": -2.217057466506958, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.13611615245009073, |
|
"grad_norm": 43.21076330965974, |
|
"learning_rate": 4.980917362966688e-07, |
|
"logits/chosen": -3.3749313354492188, |
|
"logits/rejected": -3.275294542312622, |
|
"logps/chosen": -417.59979248046875, |
|
"logps/rejected": -307.0452575683594, |
|
"loss": 0.2517, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 1.4160921573638916, |
|
"rewards/margins": 4.021966457366943, |
|
"rewards/rejected": -2.605874538421631, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.14519056261343014, |
|
"grad_norm": 50.071879952898506, |
|
"learning_rate": 4.969898983237597e-07, |
|
"logits/chosen": -3.4258790016174316, |
|
"logits/rejected": -3.311413526535034, |
|
"logps/chosen": -355.394287109375, |
|
"logps/rejected": -277.78228759765625, |
|
"loss": 0.2454, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.4633300304412842, |
|
"rewards/margins": 3.9215309619903564, |
|
"rewards/rejected": -2.4582009315490723, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1542649727767695, |
|
"grad_norm": 37.68782415609405, |
|
"learning_rate": 4.95639864077426e-07, |
|
"logits/chosen": -3.341951370239258, |
|
"logits/rejected": -3.242036819458008, |
|
"logps/chosen": -414.0884704589844, |
|
"logps/rejected": -292.1954650878906, |
|
"loss": 0.26, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.594613790512085, |
|
"rewards/margins": 4.542968273162842, |
|
"rewards/rejected": -2.948354482650757, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.16333938294010888, |
|
"grad_norm": 38.50311716438203, |
|
"learning_rate": 4.940429901858992e-07, |
|
"logits/chosen": -3.315183162689209, |
|
"logits/rejected": -3.1880416870117188, |
|
"logps/chosen": -362.5171813964844, |
|
"logps/rejected": -274.8582763671875, |
|
"loss": 0.2323, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 1.4722703695297241, |
|
"rewards/margins": 4.3128228187561035, |
|
"rewards/rejected": -2.8405520915985107, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1724137931034483, |
|
"grad_norm": 34.06267671066188, |
|
"learning_rate": 4.922008813226972e-07, |
|
"logits/chosen": -3.3597846031188965, |
|
"logits/rejected": -3.1644554138183594, |
|
"logps/chosen": -399.7878723144531, |
|
"logps/rejected": -301.4942932128906, |
|
"loss": 0.2373, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.6164276599884033, |
|
"rewards/margins": 4.526951789855957, |
|
"rewards/rejected": -2.9105238914489746, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.18148820326678766, |
|
"grad_norm": 46.205969565119815, |
|
"learning_rate": 4.901153885941126e-07, |
|
"logits/chosen": -3.3047378063201904, |
|
"logits/rejected": -3.1578073501586914, |
|
"logps/chosen": -414.8160705566406, |
|
"logps/rejected": -362.454345703125, |
|
"loss": 0.2326, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.8784027099609375, |
|
"rewards/margins": 4.937838554382324, |
|
"rewards/rejected": -3.059436082839966, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.19056261343012704, |
|
"grad_norm": 31.435836455070017, |
|
"learning_rate": 4.877886076790663e-07, |
|
"logits/chosen": -3.190352201461792, |
|
"logits/rejected": -3.0550923347473145, |
|
"logps/chosen": -373.290771484375, |
|
"logps/rejected": -295.98297119140625, |
|
"loss": 0.2285, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 1.5484044551849365, |
|
"rewards/margins": 4.609506607055664, |
|
"rewards/rejected": -3.0611023902893066, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1996370235934664, |
|
"grad_norm": 39.42562554641845, |
|
"learning_rate": 4.852228767231913e-07, |
|
"logits/chosen": -3.2841200828552246, |
|
"logits/rejected": -3.1063590049743652, |
|
"logps/chosen": -388.2396240234375, |
|
"logps/rejected": -296.4252014160156, |
|
"loss": 0.2312, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.832167625427246, |
|
"rewards/margins": 5.544095039367676, |
|
"rewards/rejected": -3.7119274139404297, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.20871143375680581, |
|
"grad_norm": 46.72987633281948, |
|
"learning_rate": 4.824207739892674e-07, |
|
"logits/chosen": -3.2899200916290283, |
|
"logits/rejected": -3.1556496620178223, |
|
"logps/chosen": -410.05963134765625, |
|
"logps/rejected": -361.39483642578125, |
|
"loss": 0.2179, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.3326658010482788, |
|
"rewards/margins": 4.7414703369140625, |
|
"rewards/rejected": -3.4088046550750732, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2177858439201452, |
|
"grad_norm": 37.19856056372189, |
|
"learning_rate": 4.793851152663654e-07, |
|
"logits/chosen": -3.265838623046875, |
|
"logits/rejected": -3.1473007202148438, |
|
"logps/chosen": -372.2582092285156, |
|
"logps/rejected": -298.2706604003906, |
|
"loss": 0.2378, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.1389844417572021, |
|
"rewards/margins": 4.751265048980713, |
|
"rewards/rejected": -3.612281084060669, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.22686025408348456, |
|
"grad_norm": 46.60591133283558, |
|
"learning_rate": 4.7611895104030507e-07, |
|
"logits/chosen": -3.2685317993164062, |
|
"logits/rejected": -3.161949634552002, |
|
"logps/chosen": -396.34747314453125, |
|
"logps/rejected": -335.84747314453125, |
|
"loss": 0.2157, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.336761713027954, |
|
"rewards/margins": 5.206587791442871, |
|
"rewards/rejected": -3.869826078414917, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.23593466424682397, |
|
"grad_norm": 48.51885326305408, |
|
"learning_rate": 4.726255634282693e-07, |
|
"logits/chosen": -3.294827699661255, |
|
"logits/rejected": -3.185404062271118, |
|
"logps/chosen": -391.83551025390625, |
|
"logps/rejected": -374.4935607910156, |
|
"loss": 0.2463, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.8340435028076172, |
|
"rewards/margins": 4.343258857727051, |
|
"rewards/rejected": -3.5092151165008545, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.24500907441016334, |
|
"grad_norm": 32.347739963126486, |
|
"learning_rate": 4.689084628806562e-07, |
|
"logits/chosen": -3.2203621864318848, |
|
"logits/rejected": -3.1058781147003174, |
|
"logps/chosen": -343.33447265625, |
|
"logps/rejected": -298.6334533691406, |
|
"loss": 0.2047, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.4291167259216309, |
|
"rewards/margins": 5.545358657836914, |
|
"rewards/rejected": -4.116241931915283, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2540834845735027, |
|
"grad_norm": 45.442868522798584, |
|
"learning_rate": 4.6497138465348296e-07, |
|
"logits/chosen": -3.3757805824279785, |
|
"logits/rejected": -3.2095916271209717, |
|
"logps/chosen": -373.3706970214844, |
|
"logps/rejected": -310.359619140625, |
|
"loss": 0.2189, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 1.2218859195709229, |
|
"rewards/margins": 4.990644931793213, |
|
"rewards/rejected": -3.768759250640869, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2631578947368421, |
|
"grad_norm": 37.199549797615454, |
|
"learning_rate": 4.608182850548852e-07, |
|
"logits/chosen": -3.3329250812530518, |
|
"logits/rejected": -3.2051258087158203, |
|
"logps/chosen": -367.7585754394531, |
|
"logps/rejected": -309.1279602050781, |
|
"loss": 0.2062, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.8593353033065796, |
|
"rewards/margins": 5.791568756103516, |
|
"rewards/rejected": -3.932232618331909, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.27223230490018147, |
|
"grad_norm": 175.22451085843974, |
|
"learning_rate": 4.564533374694852e-07, |
|
"logits/chosen": -3.2539546489715576, |
|
"logits/rejected": -3.164071559906006, |
|
"logps/chosen": -435.7051696777344, |
|
"logps/rejected": -320.7638854980469, |
|
"loss": 0.1834, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.294732689857483, |
|
"rewards/margins": 5.408836841583252, |
|
"rewards/rejected": -4.1141037940979, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2813067150635209, |
|
"grad_norm": 49.820868636075424, |
|
"learning_rate": 4.518809281646232e-07, |
|
"logits/chosen": -3.2305614948272705, |
|
"logits/rejected": -3.1455864906311035, |
|
"logps/chosen": -393.7672119140625, |
|
"logps/rejected": -333.42657470703125, |
|
"loss": 0.1685, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 1.6657005548477173, |
|
"rewards/margins": 5.660979270935059, |
|
"rewards/rejected": -3.9952797889709473, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.29038112522686027, |
|
"grad_norm": 46.38355610514617, |
|
"learning_rate": 4.4710565188266623e-07, |
|
"logits/chosen": -3.2362568378448486, |
|
"logits/rejected": -3.151102304458618, |
|
"logps/chosen": -399.2664794921875, |
|
"logps/rejected": -307.7845458984375, |
|
"loss": 0.1988, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.6355903148651123, |
|
"rewards/margins": 5.9738664627075195, |
|
"rewards/rejected": -4.338275909423828, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.29945553539019965, |
|
"grad_norm": 42.82294462919112, |
|
"learning_rate": 4.4213230722382343e-07, |
|
"logits/chosen": -3.2030301094055176, |
|
"logits/rejected": -3.0990149974823, |
|
"logps/chosen": -394.6158752441406, |
|
"logps/rejected": -334.71051025390625, |
|
"loss": 0.1802, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.584474802017212, |
|
"rewards/margins": 6.304975986480713, |
|
"rewards/rejected": -4.720500946044922, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.308529945553539, |
|
"grad_norm": 48.08164749388606, |
|
"learning_rate": 4.3696589182410805e-07, |
|
"logits/chosen": -3.2478835582733154, |
|
"logits/rejected": -3.144749879837036, |
|
"logps/chosen": -355.66729736328125, |
|
"logps/rejected": -341.45648193359375, |
|
"loss": 0.2065, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.5237865447998047, |
|
"rewards/margins": 5.312397480010986, |
|
"rewards/rejected": -3.7886109352111816, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3176043557168784, |
|
"grad_norm": 39.92094020376778, |
|
"learning_rate": 4.3161159733329143e-07, |
|
"logits/chosen": -3.326545238494873, |
|
"logits/rejected": -3.246845245361328, |
|
"logps/chosen": -414.8565979003906, |
|
"logps/rejected": -401.8109130859375, |
|
"loss": 0.2384, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.3785138130187988, |
|
"rewards/margins": 5.425638198852539, |
|
"rewards/rejected": -4.047124862670898, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.32667876588021777, |
|
"grad_norm": 26.862261807926526, |
|
"learning_rate": 4.2607480419789587e-07, |
|
"logits/chosen": -3.271552324295044, |
|
"logits/rejected": -3.2207393646240234, |
|
"logps/chosen": -332.0171813964844, |
|
"logps/rejected": -289.7067565917969, |
|
"loss": 0.1943, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.2600226402282715, |
|
"rewards/margins": 5.030722141265869, |
|
"rewards/rejected": -3.7706997394561768, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.33575317604355714, |
|
"grad_norm": 33.49994098039321, |
|
"learning_rate": 4.2036107625446783e-07, |
|
"logits/chosen": -3.3395228385925293, |
|
"logits/rejected": -3.1348748207092285, |
|
"logps/chosen": -394.46795654296875, |
|
"logps/rejected": -317.6743469238281, |
|
"loss": 0.1864, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.9414106607437134, |
|
"rewards/margins": 6.639167785644531, |
|
"rewards/rejected": -4.697756767272949, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3448275862068966, |
|
"grad_norm": 33.33388789905036, |
|
"learning_rate": 4.1447615513856635e-07, |
|
"logits/chosen": -3.2808823585510254, |
|
"logits/rejected": -3.1995129585266113, |
|
"logps/chosen": -409.93603515625, |
|
"logps/rejected": -363.3692932128906, |
|
"loss": 0.1947, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.1632466316223145, |
|
"rewards/margins": 5.149783611297607, |
|
"rewards/rejected": -3.986537218093872, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.35390199637023595, |
|
"grad_norm": 42.856588981749276, |
|
"learning_rate": 4.084259545150832e-07, |
|
"logits/chosen": -3.324871778488159, |
|
"logits/rejected": -3.1859161853790283, |
|
"logps/chosen": -393.6763916015625, |
|
"logps/rejected": -311.64404296875, |
|
"loss": 0.2091, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.2227169275283813, |
|
"rewards/margins": 5.600234031677246, |
|
"rewards/rejected": -4.377516746520996, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3629764065335753, |
|
"grad_norm": 44.671991447679176, |
|
"learning_rate": 4.022165541356941e-07, |
|
"logits/chosen": -3.30537748336792, |
|
"logits/rejected": -3.205775499343872, |
|
"logps/chosen": -396.2132873535156, |
|
"logps/rejected": -329.81768798828125, |
|
"loss": 0.1987, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.6245359182357788, |
|
"rewards/margins": 6.8077392578125, |
|
"rewards/rejected": -5.183202743530273, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3720508166969147, |
|
"grad_norm": 66.71100346246702, |
|
"learning_rate": 3.9585419372941163e-07, |
|
"logits/chosen": -3.2828071117401123, |
|
"logits/rejected": -3.2080435752868652, |
|
"logps/chosen": -383.0317687988281, |
|
"logps/rejected": -321.2015686035156, |
|
"loss": 0.228, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.194445013999939, |
|
"rewards/margins": 5.658787727355957, |
|
"rewards/rejected": -4.464343070983887, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3811252268602541, |
|
"grad_norm": 43.403396807175234, |
|
"learning_rate": 3.893452667323793e-07, |
|
"logits/chosen": -3.319455623626709, |
|
"logits/rejected": -3.226341962814331, |
|
"logps/chosen": -390.3877258300781, |
|
"logps/rejected": -356.02667236328125, |
|
"loss": 0.1792, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.921933889389038, |
|
"rewards/margins": 6.734816074371338, |
|
"rewards/rejected": -4.8128814697265625, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.39019963702359345, |
|
"grad_norm": 33.07955087603256, |
|
"learning_rate": 3.826963138632079e-07, |
|
"logits/chosen": -3.2265000343322754, |
|
"logits/rejected": -3.149766445159912, |
|
"logps/chosen": -372.9231262207031, |
|
"logps/rejected": -309.909423828125, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 1.2967065572738647, |
|
"rewards/margins": 6.030789852142334, |
|
"rewards/rejected": -4.73408317565918, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3992740471869328, |
|
"grad_norm": 37.11726657072608, |
|
"learning_rate": 3.759140165503101e-07, |
|
"logits/chosen": -3.2834110260009766, |
|
"logits/rejected": -3.13962984085083, |
|
"logps/chosen": -365.4574279785156, |
|
"logps/rejected": -315.04095458984375, |
|
"loss": 0.2033, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.5437448024749756, |
|
"rewards/margins": 6.540799140930176, |
|
"rewards/rejected": -4.997054100036621, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.40834845735027225, |
|
"grad_norm": 31.621123892211624, |
|
"learning_rate": 3.6900519021783783e-07, |
|
"logits/chosen": -3.241823196411133, |
|
"logits/rejected": -3.1715283393859863, |
|
"logps/chosen": -383.9208984375, |
|
"logps/rejected": -330.8930358886719, |
|
"loss": 0.1902, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.2258026599884033, |
|
"rewards/margins": 5.701479911804199, |
|
"rewards/rejected": -4.475677013397217, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.41742286751361163, |
|
"grad_norm": 49.178031353029425, |
|
"learning_rate": 3.619767774369694e-07, |
|
"logits/chosen": -3.3129935264587402, |
|
"logits/rejected": -3.1908164024353027, |
|
"logps/chosen": -372.14959716796875, |
|
"logps/rejected": -343.7088623046875, |
|
"loss": 0.243, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.1361382007598877, |
|
"rewards/margins": 5.055792331695557, |
|
"rewards/rejected": -3.919654369354248, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.426497277676951, |
|
"grad_norm": 37.34866908331328, |
|
"learning_rate": 3.548358409494291e-07, |
|
"logits/chosen": -3.297877788543701, |
|
"logits/rejected": -3.2215073108673096, |
|
"logps/chosen": -404.14013671875, |
|
"logps/rejected": -313.212890625, |
|
"loss": 0.2062, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.1549508571624756, |
|
"rewards/margins": 5.121293067932129, |
|
"rewards/rejected": -3.9663422107696533, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4355716878402904, |
|
"grad_norm": 48.882738312246666, |
|
"learning_rate": 3.475895565702479e-07, |
|
"logits/chosen": -3.2060375213623047, |
|
"logits/rejected": -3.1445508003234863, |
|
"logps/chosen": -401.10638427734375, |
|
"logps/rejected": -344.1682434082031, |
|
"loss": 0.322, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.2614433765411377, |
|
"rewards/margins": 5.999361515045166, |
|
"rewards/rejected": -4.737917900085449, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.44464609800362975, |
|
"grad_norm": 50.68639708460402, |
|
"learning_rate": 3.402452059769006e-07, |
|
"logits/chosen": -3.1942269802093506, |
|
"logits/rejected": -3.0844810009002686, |
|
"logps/chosen": -388.79132080078125, |
|
"logps/rejected": -315.6092834472656, |
|
"loss": 0.2099, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.3437281847000122, |
|
"rewards/margins": 5.917460918426514, |
|
"rewards/rejected": -4.573733329772949, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4537205081669691, |
|
"grad_norm": 50.14897433584961, |
|
"learning_rate": 3.3281016939206175e-07, |
|
"logits/chosen": -3.2095024585723877, |
|
"logits/rejected": -3.1448185443878174, |
|
"logps/chosen": -387.31158447265625, |
|
"logps/rejected": -345.4943542480469, |
|
"loss": 0.2088, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.2966068983078003, |
|
"rewards/margins": 5.815266132354736, |
|
"rewards/rejected": -4.518658638000488, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4627949183303085, |
|
"grad_norm": 37.26501610114464, |
|
"learning_rate": 3.2529191816733575e-07, |
|
"logits/chosen": -3.2022509574890137, |
|
"logits/rejected": -3.110914707183838, |
|
"logps/chosen": -398.9407653808594, |
|
"logps/rejected": -354.12310791015625, |
|
"loss": 0.1804, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.4531505107879639, |
|
"rewards/margins": 5.942654609680176, |
|
"rewards/rejected": -4.489503860473633, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.47186932849364793, |
|
"grad_norm": 72.82731512227907, |
|
"learning_rate": 3.1769800727541315e-07, |
|
"logits/chosen": -3.122022867202759, |
|
"logits/rejected": -3.0256314277648926, |
|
"logps/chosen": -382.6126708984375, |
|
"logps/rejected": -322.1177673339844, |
|
"loss": 0.1792, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.351405143737793, |
|
"rewards/margins": 6.904001712799072, |
|
"rewards/rejected": -5.5525970458984375, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4809437386569873, |
|
"grad_norm": 38.26822885645478, |
|
"learning_rate": 3.1003606771819666e-07, |
|
"logits/chosen": -3.139383554458618, |
|
"logits/rejected": -3.029106616973877, |
|
"logps/chosen": -392.1285705566406, |
|
"logps/rejected": -341.97955322265625, |
|
"loss": 0.1913, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.1525547504425049, |
|
"rewards/margins": 6.076541900634766, |
|
"rewards/rejected": -4.92398738861084, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.4900181488203267, |
|
"grad_norm": 43.882325645673525, |
|
"learning_rate": 3.023137988585276e-07, |
|
"logits/chosen": -3.178436279296875, |
|
"logits/rejected": -3.1166467666625977, |
|
"logps/chosen": -387.3483581542969, |
|
"logps/rejected": -383.4141845703125, |
|
"loss": 0.2216, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.2282485961914062, |
|
"rewards/margins": 5.9733991622924805, |
|
"rewards/rejected": -4.745150089263916, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.49909255898366606, |
|
"grad_norm": 44.30893309428354, |
|
"learning_rate": 2.945389606832165e-07, |
|
"logits/chosen": -3.1245813369750977, |
|
"logits/rejected": -3.1115365028381348, |
|
"logps/chosen": -399.7410583496094, |
|
"logps/rejected": -330.3243713378906, |
|
"loss": 0.2036, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.6770175695419312, |
|
"rewards/margins": 7.239003658294678, |
|
"rewards/rejected": -5.561986446380615, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5081669691470054, |
|
"grad_norm": 49.55956318241512, |
|
"learning_rate": 2.8671936600515445e-07, |
|
"logits/chosen": -3.155513286590576, |
|
"logits/rejected": -3.0905396938323975, |
|
"logps/chosen": -374.02392578125, |
|
"logps/rejected": -352.85986328125, |
|
"loss": 0.1808, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.1260972023010254, |
|
"rewards/margins": 5.719089984893799, |
|
"rewards/rejected": -4.592992782592773, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5172413793103449, |
|
"grad_norm": 43.67166571714464, |
|
"learning_rate": 2.788628726123399e-07, |
|
"logits/chosen": -3.174683094024658, |
|
"logits/rejected": -3.1034023761749268, |
|
"logps/chosen": -368.4584655761719, |
|
"logps/rejected": -301.5198669433594, |
|
"loss": 0.2044, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.258647084236145, |
|
"rewards/margins": 6.08292293548584, |
|
"rewards/rejected": -4.824276447296143, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5263157894736842, |
|
"grad_norm": 46.763150412882524, |
|
"learning_rate": 2.7097737537171095e-07, |
|
"logits/chosen": -3.193993091583252, |
|
"logits/rejected": -3.1491267681121826, |
|
"logps/chosen": -379.1547546386719, |
|
"logps/rejected": -357.4938049316406, |
|
"loss": 0.1895, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.0861809253692627, |
|
"rewards/margins": 5.892585754394531, |
|
"rewards/rejected": -4.8064045906066895, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5353901996370236, |
|
"grad_norm": 28.50813723914452, |
|
"learning_rate": 2.6307079829571685e-07, |
|
"logits/chosen": -3.1775803565979004, |
|
"logits/rejected": -3.1690754890441895, |
|
"logps/chosen": -396.2727966308594, |
|
"logps/rejected": -364.2826232910156, |
|
"loss": 0.1916, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 1.2243824005126953, |
|
"rewards/margins": 6.1176557540893555, |
|
"rewards/rejected": -4.893273830413818, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5444646098003629, |
|
"grad_norm": 28.704553800715182, |
|
"learning_rate": 2.551510865796032e-07, |
|
"logits/chosen": -3.1417794227600098, |
|
"logits/rejected": -3.1004185676574707, |
|
"logps/chosen": -321.55340576171875, |
|
"logps/rejected": -337.394287109375, |
|
"loss": 0.1878, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.9132893681526184, |
|
"rewards/margins": 6.268902778625488, |
|
"rewards/rejected": -5.355614185333252, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5535390199637024, |
|
"grad_norm": 33.895452413293604, |
|
"learning_rate": 2.472261986174088e-07, |
|
"logits/chosen": -3.1212687492370605, |
|
"logits/rejected": -3.1359126567840576, |
|
"logps/chosen": -430.43408203125, |
|
"logps/rejected": -387.6915588378906, |
|
"loss": 0.2053, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.5770806670188904, |
|
"rewards/margins": 5.136279106140137, |
|
"rewards/rejected": -4.559198379516602, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5626134301270418, |
|
"grad_norm": 35.42714003898933, |
|
"learning_rate": 2.393040980047015e-07, |
|
"logits/chosen": -3.2134900093078613, |
|
"logits/rejected": -3.1361842155456543, |
|
"logps/chosen": -425.0316467285156, |
|
"logps/rejected": -361.84063720703125, |
|
"loss": 0.1824, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.0797230005264282, |
|
"rewards/margins": 6.7416672706604, |
|
"rewards/rejected": -5.661944389343262, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5716878402903811, |
|
"grad_norm": 34.07816065316919, |
|
"learning_rate": 2.3139274553608494e-07, |
|
"logits/chosen": -3.184861898422241, |
|
"logits/rejected": -3.1559784412384033, |
|
"logps/chosen": -404.4765319824219, |
|
"logps/rejected": -341.35546875, |
|
"loss": 0.1876, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.9906010627746582, |
|
"rewards/margins": 6.139868259429932, |
|
"rewards/rejected": -5.149266719818115, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5807622504537205, |
|
"grad_norm": 42.85248856046431, |
|
"learning_rate": 2.2350009120552156e-07, |
|
"logits/chosen": -3.1936917304992676, |
|
"logits/rejected": -3.1695635318756104, |
|
"logps/chosen": -406.64471435546875, |
|
"logps/rejected": -391.30450439453125, |
|
"loss": 0.2067, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 1.1682602167129517, |
|
"rewards/margins": 6.4593048095703125, |
|
"rewards/rejected": -5.29104471206665, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5898366606170599, |
|
"grad_norm": 55.772327844139255, |
|
"learning_rate": 2.1563406621750825e-07, |
|
"logits/chosen": -3.1266651153564453, |
|
"logits/rejected": -3.0548195838928223, |
|
"logps/chosen": -370.1761474609375, |
|
"logps/rejected": -340.8184509277344, |
|
"loss": 0.2141, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.114101767539978, |
|
"rewards/margins": 6.014601230621338, |
|
"rewards/rejected": -4.9004998207092285, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5989110707803993, |
|
"grad_norm": 25.791382900724862, |
|
"learning_rate": 2.0780257501713346e-07, |
|
"logits/chosen": -3.165015459060669, |
|
"logits/rejected": -3.1734094619750977, |
|
"logps/chosen": -415.572998046875, |
|
"logps/rejected": -385.7120666503906, |
|
"loss": 0.1892, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.1960872411727905, |
|
"rewards/margins": 6.418343544006348, |
|
"rewards/rejected": -5.222255706787109, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6079854809437386, |
|
"grad_norm": 33.330744516004074, |
|
"learning_rate": 2.000134873470243e-07, |
|
"logits/chosen": -3.128190279006958, |
|
"logits/rejected": -3.1104788780212402, |
|
"logps/chosen": -341.00494384765625, |
|
"logps/rejected": -329.31768798828125, |
|
"loss": 0.1785, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.1156467199325562, |
|
"rewards/margins": 6.29996395111084, |
|
"rewards/rejected": -5.184317111968994, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.617059891107078, |
|
"grad_norm": 33.150812329384884, |
|
"learning_rate": 1.922746303391655e-07, |
|
"logits/chosen": -3.105457305908203, |
|
"logits/rejected": -3.0872020721435547, |
|
"logps/chosen": -394.4687805175781, |
|
"logps/rejected": -330.22650146484375, |
|
"loss": 0.1791, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.7894136905670166, |
|
"rewards/margins": 7.119010925292969, |
|
"rewards/rejected": -5.329598426818848, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.6261343012704175, |
|
"grad_norm": 37.824280577969375, |
|
"learning_rate": 1.8459378064953754e-07, |
|
"logits/chosen": -3.1590168476104736, |
|
"logits/rejected": -3.0799336433410645, |
|
"logps/chosen": -410.97802734375, |
|
"logps/rejected": -351.85650634765625, |
|
"loss": 0.2003, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.496854543685913, |
|
"rewards/margins": 6.54131555557251, |
|
"rewards/rejected": -5.044460773468018, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6352087114337568, |
|
"grad_norm": 42.16693290472239, |
|
"learning_rate": 1.7697865664347694e-07, |
|
"logits/chosen": -3.1934874057769775, |
|
"logits/rejected": -3.130568265914917, |
|
"logps/chosen": -386.4817810058594, |
|
"logps/rejected": -316.8409118652344, |
|
"loss": 0.1868, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.163108229637146, |
|
"rewards/margins": 5.6252617835998535, |
|
"rewards/rejected": -4.462153434753418, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6442831215970962, |
|
"grad_norm": 51.69448179259041, |
|
"learning_rate": 1.6943691063961213e-07, |
|
"logits/chosen": -3.1876442432403564, |
|
"logits/rejected": -3.130868434906006, |
|
"logps/chosen": -434.44073486328125, |
|
"logps/rejected": -342.75146484375, |
|
"loss": 0.199, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.4867048263549805, |
|
"rewards/margins": 6.818478584289551, |
|
"rewards/rejected": -5.331774711608887, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6533575317604355, |
|
"grad_norm": 42.65687224603704, |
|
"learning_rate": 1.6197612122016846e-07, |
|
"logits/chosen": -3.15063214302063, |
|
"logits/rejected": -3.128218650817871, |
|
"logps/chosen": -396.96038818359375, |
|
"logps/rejected": -371.03314208984375, |
|
"loss": 0.1706, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 1.3427283763885498, |
|
"rewards/margins": 6.249272346496582, |
|
"rewards/rejected": -4.906544208526611, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.662431941923775, |
|
"grad_norm": 44.262085666597265, |
|
"learning_rate": 1.5460378561536985e-07, |
|
"logits/chosen": -3.130981206893921, |
|
"logits/rejected": -3.078493356704712, |
|
"logps/chosen": -367.4677734375, |
|
"logps/rejected": -304.9905090332031, |
|
"loss": 0.1785, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.2416002750396729, |
|
"rewards/margins": 6.065240383148193, |
|
"rewards/rejected": -4.823639869689941, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.6715063520871143, |
|
"grad_norm": 27.300679345139994, |
|
"learning_rate": 1.473273121695898e-07, |
|
"logits/chosen": -3.127115488052368, |
|
"logits/rejected": -3.073434591293335, |
|
"logps/chosen": -399.8476867675781, |
|
"logps/rejected": -370.6973571777344, |
|
"loss": 0.1835, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.0761568546295166, |
|
"rewards/margins": 5.881572246551514, |
|
"rewards/rejected": -4.805415630340576, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6805807622504537, |
|
"grad_norm": 50.790995812359, |
|
"learning_rate": 1.4015401289682214e-07, |
|
"logits/chosen": -3.123532772064209, |
|
"logits/rejected": -3.080094814300537, |
|
"logps/chosen": -331.3045654296875, |
|
"logps/rejected": -314.36358642578125, |
|
"loss": 0.2493, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.7594553232192993, |
|
"rewards/margins": 5.120793342590332, |
|
"rewards/rejected": -4.3613386154174805, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"grad_norm": 34.305160719125155, |
|
"learning_rate": 1.3309109613295335e-07, |
|
"logits/chosen": -3.1643004417419434, |
|
"logits/rejected": -3.095179796218872, |
|
"logps/chosen": -415.06854248046875, |
|
"logps/rejected": -358.4546203613281, |
|
"loss": 0.1776, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.9015072584152222, |
|
"rewards/margins": 5.74955415725708, |
|
"rewards/rejected": -4.84804630279541, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6987295825771325, |
|
"grad_norm": 31.66819953926833, |
|
"learning_rate": 1.2614565929221848e-07, |
|
"logits/chosen": -3.2078864574432373, |
|
"logits/rejected": -3.0819454193115234, |
|
"logps/chosen": -367.291748046875, |
|
"logps/rejected": -346.97088623046875, |
|
"loss": 0.1954, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.1401704549789429, |
|
"rewards/margins": 6.157609939575195, |
|
"rewards/rejected": -5.017439365386963, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.7078039927404719, |
|
"grad_norm": 36.80087686819721, |
|
"learning_rate": 1.1932468173512137e-07, |
|
"logits/chosen": -3.1448097229003906, |
|
"logits/rejected": -3.0595924854278564, |
|
"logps/chosen": -415.1175842285156, |
|
"logps/rejected": -330.16680908203125, |
|
"loss": 0.1789, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.58966863155365, |
|
"rewards/margins": 6.8600358963012695, |
|
"rewards/rejected": -5.270366668701172, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7168784029038112, |
|
"grad_norm": 35.47381560617302, |
|
"learning_rate": 1.1263501775498438e-07, |
|
"logits/chosen": -3.1903061866760254, |
|
"logits/rejected": -3.108388900756836, |
|
"logps/chosen": -358.5484313964844, |
|
"logps/rejected": -358.26031494140625, |
|
"loss": 0.212, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.6974457502365112, |
|
"rewards/margins": 4.846839427947998, |
|
"rewards/rejected": -4.1493940353393555, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7259528130671506, |
|
"grad_norm": 40.57531341722196, |
|
"learning_rate": 1.0608338969017682e-07, |
|
"logits/chosen": -3.199059009552002, |
|
"logits/rejected": -3.125755786895752, |
|
"logps/chosen": -452.31317138671875, |
|
"logps/rejected": -391.41912841796875, |
|
"loss": 0.2021, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.6242631673812866, |
|
"rewards/margins": 6.886317253112793, |
|
"rewards/rejected": -5.262054443359375, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.73502722323049, |
|
"grad_norm": 33.304705085054344, |
|
"learning_rate": 9.96763811689425e-08, |
|
"logits/chosen": -3.137302875518799, |
|
"logits/rejected": -3.026451826095581, |
|
"logps/chosen": -385.8514404296875, |
|
"logps/rejected": -368.15789794921875, |
|
"loss": 0.196, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.4752086400985718, |
|
"rewards/margins": 6.488913059234619, |
|
"rewards/rejected": -5.013703346252441, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7441016333938294, |
|
"grad_norm": 47.310232796094745, |
|
"learning_rate": 9.3420430493615e-08, |
|
"logits/chosen": -3.0361504554748535, |
|
"logits/rejected": -2.984409809112549, |
|
"logps/chosen": -363.8970031738281, |
|
"logps/rejected": -331.4267272949219, |
|
"loss": 0.1886, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.094775676727295, |
|
"rewards/margins": 6.252727508544922, |
|
"rewards/rejected": -5.1579508781433105, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.7531760435571688, |
|
"grad_norm": 36.729029651490606, |
|
"learning_rate": 8.732182417086903e-08, |
|
"logits/chosen": -3.07252836227417, |
|
"logits/rejected": -3.0885097980499268, |
|
"logps/chosen": -402.6160583496094, |
|
"logps/rejected": -361.8660888671875, |
|
"loss": 0.1873, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.8262115716934204, |
|
"rewards/margins": 7.684056282043457, |
|
"rewards/rejected": -5.857844829559326, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.7622504537205081, |
|
"grad_norm": 38.26434214333302, |
|
"learning_rate": 8.138669059450778e-08, |
|
"logits/chosen": -3.0775837898254395, |
|
"logits/rejected": -3.071493625640869, |
|
"logps/chosen": -388.62445068359375, |
|
"logps/rejected": -359.02850341796875, |
|
"loss": 0.1844, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.0169572830200195, |
|
"rewards/margins": 6.112603187561035, |
|
"rewards/rejected": -5.095646858215332, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.7713248638838476, |
|
"grad_norm": 46.50356797804205, |
|
"learning_rate": 7.562099388713702e-08, |
|
"logits/chosen": -3.1154263019561768, |
|
"logits/rejected": -3.1136341094970703, |
|
"logps/chosen": -388.99761962890625, |
|
"logps/rejected": -352.0494689941406, |
|
"loss": 0.1793, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.4719061851501465, |
|
"rewards/margins": 6.116942882537842, |
|
"rewards/rejected": -4.6450371742248535, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.7803992740471869, |
|
"grad_norm": 32.98126258960774, |
|
"learning_rate": 7.003052790691089e-08, |
|
"logits/chosen": -3.0627052783966064, |
|
"logits/rejected": -3.0874767303466797, |
|
"logps/chosen": -384.46795654296875, |
|
"logps/rejected": -338.3650817871094, |
|
"loss": 0.1731, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.2397066354751587, |
|
"rewards/margins": 6.408433437347412, |
|
"rewards/rejected": -5.168726921081543, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.7894736842105263, |
|
"grad_norm": 44.77250114819241, |
|
"learning_rate": 6.462091042537576e-08, |
|
"logits/chosen": -3.144641876220703, |
|
"logits/rejected": -3.09205961227417, |
|
"logps/chosen": -467.0062561035156, |
|
"logps/rejected": -395.1363830566406, |
|
"loss": 0.2021, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 1.8845285177230835, |
|
"rewards/margins": 7.703507423400879, |
|
"rewards/rejected": -5.818978786468506, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.7985480943738656, |
|
"grad_norm": 118.93536447485907, |
|
"learning_rate": 5.9397577482259043e-08, |
|
"logits/chosen": -3.0359034538269043, |
|
"logits/rejected": -3.035188674926758, |
|
"logps/chosen": -356.08587646484375, |
|
"logps/rejected": -359.81884765625, |
|
"loss": 0.1989, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.9912107586860657, |
|
"rewards/margins": 6.407823085784912, |
|
"rewards/rejected": -5.41661262512207, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.8076225045372051, |
|
"grad_norm": 44.83410379465266, |
|
"learning_rate": 5.436577792287841e-08, |
|
"logits/chosen": -3.1080565452575684, |
|
"logits/rejected": -3.024658679962158, |
|
"logps/chosen": -350.00421142578125, |
|
"logps/rejected": -313.59234619140625, |
|
"loss": 0.184, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.5829899311065674, |
|
"rewards/margins": 7.088255882263184, |
|
"rewards/rejected": -5.505266189575195, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.8166969147005445, |
|
"grad_norm": 38.85789898863309, |
|
"learning_rate": 4.953056812365958e-08, |
|
"logits/chosen": -3.08073353767395, |
|
"logits/rejected": -3.0578064918518066, |
|
"logps/chosen": -371.7347717285156, |
|
"logps/rejected": -363.08514404296875, |
|
"loss": 0.1924, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.1432292461395264, |
|
"rewards/margins": 6.080376148223877, |
|
"rewards/rejected": -4.9371466636657715, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.8257713248638838, |
|
"grad_norm": 30.66043802418784, |
|
"learning_rate": 4.489680691106279e-08, |
|
"logits/chosen": -3.1917724609375, |
|
"logits/rejected": -3.0947258472442627, |
|
"logps/chosen": -456.81982421875, |
|
"logps/rejected": -366.9622497558594, |
|
"loss": 0.1746, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.2509504556655884, |
|
"rewards/margins": 6.275618076324463, |
|
"rewards/rejected": -5.024667739868164, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.8348457350272233, |
|
"grad_norm": 31.60875396882119, |
|
"learning_rate": 4.046915067902443e-08, |
|
"logits/chosen": -3.182232141494751, |
|
"logits/rejected": -3.031777858734131, |
|
"logps/chosen": -385.55963134765625, |
|
"logps/rejected": -349.5807800292969, |
|
"loss": 0.1717, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.7857484817504883, |
|
"rewards/margins": 7.652297019958496, |
|
"rewards/rejected": -5.866549491882324, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.8439201451905626, |
|
"grad_norm": 44.88851150727403, |
|
"learning_rate": 3.625204870981974e-08, |
|
"logits/chosen": -3.136620044708252, |
|
"logits/rejected": -3.059222459793091, |
|
"logps/chosen": -377.5750427246094, |
|
"logps/rejected": -349.54327392578125, |
|
"loss": 0.1835, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.178045630455017, |
|
"rewards/margins": 6.5388946533203125, |
|
"rewards/rejected": -5.360849380493164, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.852994555353902, |
|
"grad_norm": 45.78958197714914, |
|
"learning_rate": 3.2249738703049175e-08, |
|
"logits/chosen": -3.097872257232666, |
|
"logits/rejected": -3.0364792346954346, |
|
"logps/chosen": -413.3067321777344, |
|
"logps/rejected": -384.83538818359375, |
|
"loss": 0.1759, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.3575388193130493, |
|
"rewards/margins": 6.335871696472168, |
|
"rewards/rejected": -4.978333473205566, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.8620689655172413, |
|
"grad_norm": 35.84076730560574, |
|
"learning_rate": 2.8466242517240142e-08, |
|
"logits/chosen": -3.125441551208496, |
|
"logits/rejected": -3.012545108795166, |
|
"logps/chosen": -380.4359436035156, |
|
"logps/rejected": -364.7444152832031, |
|
"loss": 0.1642, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.327865719795227, |
|
"rewards/margins": 6.545771598815918, |
|
"rewards/rejected": -5.2179059982299805, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.8711433756805808, |
|
"grad_norm": 44.73068485554119, |
|
"learning_rate": 2.4905362128344652e-08, |
|
"logits/chosen": -3.1192257404327393, |
|
"logits/rejected": -3.086743116378784, |
|
"logps/chosen": -386.00775146484375, |
|
"logps/rejected": -357.411865234375, |
|
"loss": 0.1977, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.0490394830703735, |
|
"rewards/margins": 5.79413366317749, |
|
"rewards/rejected": -4.745095252990723, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.8802177858439202, |
|
"grad_norm": 46.24126513458063, |
|
"learning_rate": 2.1570675809193554e-08, |
|
"logits/chosen": -3.0832366943359375, |
|
"logits/rejected": -3.063671350479126, |
|
"logps/chosen": -352.98785400390625, |
|
"logps/rejected": -328.38775634765625, |
|
"loss": 0.1693, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.1795412302017212, |
|
"rewards/margins": 6.687762260437012, |
|
"rewards/rejected": -5.508220672607422, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.8892921960072595, |
|
"grad_norm": 40.90013103941717, |
|
"learning_rate": 1.846553453374586e-08, |
|
"logits/chosen": -3.10916805267334, |
|
"logits/rejected": -3.096108913421631, |
|
"logps/chosen": -332.032958984375, |
|
"logps/rejected": -333.1717529296875, |
|
"loss": 0.19, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.7762575149536133, |
|
"rewards/margins": 5.550572395324707, |
|
"rewards/rejected": -4.774314880371094, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.8983666061705989, |
|
"grad_norm": 34.87323084074699, |
|
"learning_rate": 1.559305860974805e-08, |
|
"logits/chosen": -3.114400625228882, |
|
"logits/rejected": -3.0474765300750732, |
|
"logps/chosen": -375.05072021484375, |
|
"logps/rejected": -326.6473693847656, |
|
"loss": 0.1674, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.794228196144104, |
|
"rewards/margins": 7.251187324523926, |
|
"rewards/rejected": -5.456958293914795, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.9074410163339383, |
|
"grad_norm": 68.79219384818164, |
|
"learning_rate": 1.2956134543185449e-08, |
|
"logits/chosen": -3.1376454830169678, |
|
"logits/rejected": -3.045968532562256, |
|
"logps/chosen": -385.92266845703125, |
|
"logps/rejected": -303.8971252441406, |
|
"loss": 0.1882, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.1436764001846313, |
|
"rewards/margins": 6.106300354003906, |
|
"rewards/rejected": -4.962623596191406, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9165154264972777, |
|
"grad_norm": 41.53283307570751, |
|
"learning_rate": 1.0557412137677884e-08, |
|
"logits/chosen": -3.101411819458008, |
|
"logits/rejected": -3.023592472076416, |
|
"logps/chosen": -395.31597900390625, |
|
"logps/rejected": -361.7346496582031, |
|
"loss": 0.1771, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.987866222858429, |
|
"rewards/margins": 5.9940056800842285, |
|
"rewards/rejected": -5.006139755249023, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.925589836660617, |
|
"grad_norm": 38.24151872414285, |
|
"learning_rate": 8.399301831733403e-09, |
|
"logits/chosen": -3.124439239501953, |
|
"logits/rejected": -3.0538604259490967, |
|
"logps/chosen": -365.45355224609375, |
|
"logps/rejected": -359.1190490722656, |
|
"loss": 0.1784, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.254525899887085, |
|
"rewards/margins": 6.763253688812256, |
|
"rewards/rejected": -5.508728981018066, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.9346642468239564, |
|
"grad_norm": 42.11264132797008, |
|
"learning_rate": 6.483972276536576e-09, |
|
"logits/chosen": -3.1174588203430176, |
|
"logits/rejected": -3.0175864696502686, |
|
"logps/chosen": -428.16937255859375, |
|
"logps/rejected": -335.56707763671875, |
|
"loss": 0.1856, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.0685935020446777, |
|
"rewards/margins": 6.026675701141357, |
|
"rewards/rejected": -4.95808219909668, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.9437386569872959, |
|
"grad_norm": 48.30353514815893, |
|
"learning_rate": 4.813348156704866e-09, |
|
"logits/chosen": -3.0982871055603027, |
|
"logits/rejected": -3.007462978363037, |
|
"logps/chosen": -365.95343017578125, |
|
"logps/rejected": -376.6304626464844, |
|
"loss": 0.1849, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.2602510452270508, |
|
"rewards/margins": 7.303647041320801, |
|
"rewards/rejected": -6.043395042419434, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.9528130671506352, |
|
"grad_norm": 42.00692162522527, |
|
"learning_rate": 3.389108256203338e-09, |
|
"logits/chosen": -3.146289348602295, |
|
"logits/rejected": -2.9762940406799316, |
|
"logps/chosen": -395.01666259765625, |
|
"logps/rejected": -327.5099792480469, |
|
"loss": 0.1715, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.2526695728302002, |
|
"rewards/margins": 6.668323516845703, |
|
"rewards/rejected": -5.415653705596924, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.9618874773139746, |
|
"grad_norm": 65.97199815970751, |
|
"learning_rate": 2.2126837713609403e-09, |
|
"logits/chosen": -3.0963053703308105, |
|
"logits/rejected": -3.0015339851379395, |
|
"logps/chosen": -363.32708740234375, |
|
"logps/rejected": -339.29852294921875, |
|
"loss": 0.1755, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.2676588296890259, |
|
"rewards/margins": 6.559351444244385, |
|
"rewards/rejected": -5.29169225692749, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.9709618874773139, |
|
"grad_norm": 46.66363739025269, |
|
"learning_rate": 1.2852568726837987e-09, |
|
"logits/chosen": -3.1610124111175537, |
|
"logits/rejected": -3.092536449432373, |
|
"logps/chosen": -431.3716735839844, |
|
"logps/rejected": -377.6732177734375, |
|
"loss": 0.2216, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.3350967168807983, |
|
"rewards/margins": 6.358585834503174, |
|
"rewards/rejected": -5.023488998413086, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.9800362976406534, |
|
"grad_norm": 30.34640117074649, |
|
"learning_rate": 6.077595169105277e-10, |
|
"logits/chosen": -3.076800584793091, |
|
"logits/rejected": -3.0786032676696777, |
|
"logps/chosen": -358.31878662109375, |
|
"logps/rejected": -334.080810546875, |
|
"loss": 0.1916, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.380605936050415, |
|
"rewards/margins": 6.937164306640625, |
|
"rewards/rejected": -5.556557655334473, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.9891107078039928, |
|
"grad_norm": 32.7478445581115, |
|
"learning_rate": 1.8087251050369344e-10, |
|
"logits/chosen": -3.1240782737731934, |
|
"logits/rejected": -3.0358004570007324, |
|
"logps/chosen": -377.9797668457031, |
|
"logps/rejected": -362.23028564453125, |
|
"loss": 0.1972, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.7212746143341064, |
|
"rewards/margins": 7.479613304138184, |
|
"rewards/rejected": -5.758338451385498, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.9981851179673321, |
|
"grad_norm": 39.30875311875232, |
|
"learning_rate": 5.024825517951914e-12, |
|
"logits/chosen": -3.1050829887390137, |
|
"logits/rejected": -3.1044745445251465, |
|
"logps/chosen": -381.04168701171875, |
|
"logps/rejected": -363.96197509765625, |
|
"loss": 0.1954, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.0158889293670654, |
|
"rewards/margins": 5.7684783935546875, |
|
"rewards/rejected": -4.752589225769043, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1102, |
|
"total_flos": 0.0, |
|
"train_loss": 0.22562925693344074, |
|
"train_runtime": 6258.663, |
|
"train_samples_per_second": 11.267, |
|
"train_steps_per_second": 0.176 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1102, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|