|
{ |
|
"best_metric": 0.13600419461727142, |
|
"best_model_checkpoint": "ghgmetricsv1/checkpoint-205", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 205, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04878048780487805, |
|
"grad_norm": 2.9317479133605957, |
|
"learning_rate": 9.523809523809525e-07, |
|
"loss": 1.3932, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0975609756097561, |
|
"grad_norm": 3.9777166843414307, |
|
"learning_rate": 1.904761904761905e-06, |
|
"loss": 1.3836, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.14634146341463414, |
|
"grad_norm": 1.704893946647644, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 1.3873, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.1951219512195122, |
|
"grad_norm": 6.335275650024414, |
|
"learning_rate": 3.80952380952381e-06, |
|
"loss": 1.3632, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 4.805781841278076, |
|
"learning_rate": 4.761904761904762e-06, |
|
"loss": 1.3751, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2926829268292683, |
|
"grad_norm": 3.296025037765503, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 1.3728, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.34146341463414637, |
|
"grad_norm": 5.443967819213867, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.3971, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.3902439024390244, |
|
"grad_norm": 2.8683745861053467, |
|
"learning_rate": 7.61904761904762e-06, |
|
"loss": 1.3835, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.43902439024390244, |
|
"grad_norm": 3.625200033187866, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 1.3608, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 3.082287549972534, |
|
"learning_rate": 9.523809523809525e-06, |
|
"loss": 1.3707, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5365853658536586, |
|
"grad_norm": 3.2846016883850098, |
|
"learning_rate": 9.945652173913044e-06, |
|
"loss": 1.3676, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.5853658536585366, |
|
"grad_norm": 2.4396913051605225, |
|
"learning_rate": 9.836956521739131e-06, |
|
"loss": 1.3729, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.6341463414634146, |
|
"grad_norm": 3.262892246246338, |
|
"learning_rate": 9.728260869565218e-06, |
|
"loss": 1.3281, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.6829268292682927, |
|
"grad_norm": 3.9962172508239746, |
|
"learning_rate": 9.619565217391305e-06, |
|
"loss": 1.3489, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 3.4944751262664795, |
|
"learning_rate": 9.510869565217392e-06, |
|
"loss": 1.3397, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7804878048780488, |
|
"grad_norm": 3.8272864818573, |
|
"learning_rate": 9.402173913043479e-06, |
|
"loss": 1.3642, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.8292682926829268, |
|
"grad_norm": 3.0262584686279297, |
|
"learning_rate": 9.293478260869566e-06, |
|
"loss": 1.2988, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.8780487804878049, |
|
"grad_norm": 3.4564623832702637, |
|
"learning_rate": 9.184782608695653e-06, |
|
"loss": 1.3577, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.926829268292683, |
|
"grad_norm": 3.0925350189208984, |
|
"learning_rate": 9.07608695652174e-06, |
|
"loss": 1.3337, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 5.996058940887451, |
|
"learning_rate": 8.967391304347827e-06, |
|
"loss": 1.2585, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6951219512195121, |
|
"eval_f1_macro": 0.6114890400604687, |
|
"eval_f1_micro": 0.6951219512195121, |
|
"eval_f1_weighted": 0.60425308334716, |
|
"eval_loss": 1.2632431983947754, |
|
"eval_precision_macro": 0.5512016718913271, |
|
"eval_precision_micro": 0.6951219512195121, |
|
"eval_precision_weighted": 0.5440400642250938, |
|
"eval_recall_macro": 0.7023809523809523, |
|
"eval_recall_micro": 0.6951219512195121, |
|
"eval_recall_weighted": 0.6951219512195121, |
|
"eval_runtime": 0.4935, |
|
"eval_samples_per_second": 166.174, |
|
"eval_steps_per_second": 12.159, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.024390243902439, |
|
"grad_norm": 3.56406307220459, |
|
"learning_rate": 8.858695652173914e-06, |
|
"loss": 1.2581, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.0731707317073171, |
|
"grad_norm": 4.123223781585693, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 1.2903, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.1219512195121952, |
|
"grad_norm": 5.944389343261719, |
|
"learning_rate": 8.641304347826088e-06, |
|
"loss": 1.2028, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.170731707317073, |
|
"grad_norm": 6.736316680908203, |
|
"learning_rate": 8.532608695652175e-06, |
|
"loss": 1.1808, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"grad_norm": 5.414186000823975, |
|
"learning_rate": 8.423913043478262e-06, |
|
"loss": 1.2169, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.2682926829268293, |
|
"grad_norm": 4.436485767364502, |
|
"learning_rate": 8.315217391304349e-06, |
|
"loss": 1.0759, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.3170731707317074, |
|
"grad_norm": 5.241353511810303, |
|
"learning_rate": 8.206521739130436e-06, |
|
"loss": 0.969, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.3658536585365852, |
|
"grad_norm": 5.724743843078613, |
|
"learning_rate": 8.097826086956523e-06, |
|
"loss": 1.0252, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.4146341463414633, |
|
"grad_norm": 7.156280517578125, |
|
"learning_rate": 7.98913043478261e-06, |
|
"loss": 0.9933, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.4634146341463414, |
|
"grad_norm": 5.4803032875061035, |
|
"learning_rate": 7.880434782608695e-06, |
|
"loss": 1.0366, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.5121951219512195, |
|
"grad_norm": 6.876482009887695, |
|
"learning_rate": 7.771739130434784e-06, |
|
"loss": 0.9037, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.5609756097560976, |
|
"grad_norm": 6.233717918395996, |
|
"learning_rate": 7.66304347826087e-06, |
|
"loss": 0.9415, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.6097560975609757, |
|
"grad_norm": 4.58766508102417, |
|
"learning_rate": 7.5543478260869576e-06, |
|
"loss": 0.8246, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.6585365853658538, |
|
"grad_norm": 3.917952537536621, |
|
"learning_rate": 7.445652173913044e-06, |
|
"loss": 0.7553, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.7073170731707317, |
|
"grad_norm": 6.8940110206604, |
|
"learning_rate": 7.3369565217391315e-06, |
|
"loss": 0.8116, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.7560975609756098, |
|
"grad_norm": 5.17503023147583, |
|
"learning_rate": 7.228260869565218e-06, |
|
"loss": 0.7152, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.8048780487804879, |
|
"grad_norm": 4.659473419189453, |
|
"learning_rate": 7.119565217391305e-06, |
|
"loss": 0.6917, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.8536585365853657, |
|
"grad_norm": 4.061087608337402, |
|
"learning_rate": 7.0108695652173915e-06, |
|
"loss": 0.6462, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.9024390243902438, |
|
"grad_norm": 4.6313042640686035, |
|
"learning_rate": 6.9021739130434785e-06, |
|
"loss": 0.542, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.951219512195122, |
|
"grad_norm": 4.6593337059021, |
|
"learning_rate": 6.793478260869566e-06, |
|
"loss": 0.4713, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 6.978389739990234, |
|
"learning_rate": 6.6847826086956524e-06, |
|
"loss": 0.5677, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9512195121951219, |
|
"eval_f1_macro": 0.950956937799043, |
|
"eval_f1_micro": 0.9512195121951219, |
|
"eval_f1_weighted": 0.9508694130003501, |
|
"eval_loss": 0.47162312269210815, |
|
"eval_precision_macro": 0.9583333333333334, |
|
"eval_precision_micro": 0.9512195121951219, |
|
"eval_precision_weighted": 0.9593495934959351, |
|
"eval_recall_macro": 0.9523809523809523, |
|
"eval_recall_micro": 0.9512195121951219, |
|
"eval_recall_weighted": 0.9512195121951219, |
|
"eval_runtime": 0.4717, |
|
"eval_samples_per_second": 173.839, |
|
"eval_steps_per_second": 12.72, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 2.048780487804878, |
|
"grad_norm": 7.042616844177246, |
|
"learning_rate": 6.57608695652174e-06, |
|
"loss": 0.616, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 2.097560975609756, |
|
"grad_norm": 3.9137861728668213, |
|
"learning_rate": 6.467391304347826e-06, |
|
"loss": 0.4503, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 2.1463414634146343, |
|
"grad_norm": 4.030788898468018, |
|
"learning_rate": 6.358695652173914e-06, |
|
"loss": 0.4646, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 2.1951219512195124, |
|
"grad_norm": 3.972332000732422, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3834, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.2439024390243905, |
|
"grad_norm": 4.146308422088623, |
|
"learning_rate": 6.141304347826087e-06, |
|
"loss": 0.4568, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 2.292682926829268, |
|
"grad_norm": 5.081860542297363, |
|
"learning_rate": 6.032608695652174e-06, |
|
"loss": 0.3908, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.341463414634146, |
|
"grad_norm": 4.0336737632751465, |
|
"learning_rate": 5.923913043478261e-06, |
|
"loss": 0.3683, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.3902439024390243, |
|
"grad_norm": 4.34877347946167, |
|
"learning_rate": 5.815217391304349e-06, |
|
"loss": 0.3549, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"grad_norm": 4.02614688873291, |
|
"learning_rate": 5.706521739130435e-06, |
|
"loss": 0.2872, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.4878048780487805, |
|
"grad_norm": 3.376811981201172, |
|
"learning_rate": 5.597826086956523e-06, |
|
"loss": 0.2727, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 2.5365853658536586, |
|
"grad_norm": 3.333051919937134, |
|
"learning_rate": 5.489130434782609e-06, |
|
"loss": 0.4225, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 2.5853658536585367, |
|
"grad_norm": 5.0091233253479, |
|
"learning_rate": 5.380434782608695e-06, |
|
"loss": 0.3101, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.6341463414634148, |
|
"grad_norm": 2.833436965942383, |
|
"learning_rate": 5.271739130434783e-06, |
|
"loss": 0.2518, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 2.682926829268293, |
|
"grad_norm": 5.041337966918945, |
|
"learning_rate": 5.16304347826087e-06, |
|
"loss": 0.2779, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.7317073170731705, |
|
"grad_norm": 2.2699122428894043, |
|
"learning_rate": 5.054347826086957e-06, |
|
"loss": 0.2463, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.7804878048780486, |
|
"grad_norm": 4.098880290985107, |
|
"learning_rate": 4.945652173913044e-06, |
|
"loss": 0.2166, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.8292682926829267, |
|
"grad_norm": 2.4703168869018555, |
|
"learning_rate": 4.836956521739131e-06, |
|
"loss": 0.208, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 2.8780487804878048, |
|
"grad_norm": 1.5095465183258057, |
|
"learning_rate": 4.728260869565218e-06, |
|
"loss": 0.162, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.926829268292683, |
|
"grad_norm": 2.051690101623535, |
|
"learning_rate": 4.619565217391305e-06, |
|
"loss": 0.1421, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.975609756097561, |
|
"grad_norm": 3.00022292137146, |
|
"learning_rate": 4.510869565217392e-06, |
|
"loss": 0.1608, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9512195121951219, |
|
"eval_f1_macro": 0.950956937799043, |
|
"eval_f1_micro": 0.9512195121951219, |
|
"eval_f1_weighted": 0.9508694130003501, |
|
"eval_loss": 0.168848916888237, |
|
"eval_precision_macro": 0.9583333333333334, |
|
"eval_precision_micro": 0.9512195121951219, |
|
"eval_precision_weighted": 0.9593495934959351, |
|
"eval_recall_macro": 0.9523809523809523, |
|
"eval_recall_micro": 0.9512195121951219, |
|
"eval_recall_weighted": 0.9512195121951219, |
|
"eval_runtime": 0.4909, |
|
"eval_samples_per_second": 167.047, |
|
"eval_steps_per_second": 12.223, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 3.024390243902439, |
|
"grad_norm": 3.7563741207122803, |
|
"learning_rate": 4.402173913043479e-06, |
|
"loss": 0.1614, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 3.073170731707317, |
|
"grad_norm": 2.9541540145874023, |
|
"learning_rate": 4.293478260869566e-06, |
|
"loss": 0.1264, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 3.1219512195121952, |
|
"grad_norm": 6.017066955566406, |
|
"learning_rate": 4.184782608695653e-06, |
|
"loss": 0.222, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 3.1707317073170733, |
|
"grad_norm": 1.9936599731445312, |
|
"learning_rate": 4.07608695652174e-06, |
|
"loss": 0.1367, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.2195121951219514, |
|
"grad_norm": 1.2631906270980835, |
|
"learning_rate": 3.967391304347827e-06, |
|
"loss": 0.1826, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 3.2682926829268295, |
|
"grad_norm": 1.4446691274642944, |
|
"learning_rate": 3.8586956521739136e-06, |
|
"loss": 0.1043, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 3.317073170731707, |
|
"grad_norm": 2.9397401809692383, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.115, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 3.3658536585365852, |
|
"grad_norm": 4.312378883361816, |
|
"learning_rate": 3.6413043478260875e-06, |
|
"loss": 0.2012, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 3.4146341463414633, |
|
"grad_norm": 3.6363883018493652, |
|
"learning_rate": 3.5326086956521745e-06, |
|
"loss": 0.1988, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.4634146341463414, |
|
"grad_norm": 0.9747779369354248, |
|
"learning_rate": 3.4239130434782614e-06, |
|
"loss": 0.1, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 3.5121951219512195, |
|
"grad_norm": 1.1869757175445557, |
|
"learning_rate": 3.315217391304348e-06, |
|
"loss": 0.0899, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 3.5609756097560976, |
|
"grad_norm": 4.576517581939697, |
|
"learning_rate": 3.206521739130435e-06, |
|
"loss": 0.1466, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 3.6097560975609757, |
|
"grad_norm": 0.8520755171775818, |
|
"learning_rate": 3.097826086956522e-06, |
|
"loss": 0.0733, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 3.658536585365854, |
|
"grad_norm": 0.8718931078910828, |
|
"learning_rate": 2.989130434782609e-06, |
|
"loss": 0.0739, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.7073170731707314, |
|
"grad_norm": 5.841705322265625, |
|
"learning_rate": 2.880434782608696e-06, |
|
"loss": 0.1244, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 3.7560975609756095, |
|
"grad_norm": 1.4700547456741333, |
|
"learning_rate": 2.771739130434783e-06, |
|
"loss": 0.0914, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 3.8048780487804876, |
|
"grad_norm": 1.7812182903289795, |
|
"learning_rate": 2.6630434782608698e-06, |
|
"loss": 0.3075, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 3.8536585365853657, |
|
"grad_norm": 1.647403597831726, |
|
"learning_rate": 2.554347826086957e-06, |
|
"loss": 0.0708, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 3.902439024390244, |
|
"grad_norm": 5.995601177215576, |
|
"learning_rate": 2.4456521739130437e-06, |
|
"loss": 0.2148, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.951219512195122, |
|
"grad_norm": 0.676275372505188, |
|
"learning_rate": 2.3369565217391307e-06, |
|
"loss": 0.0611, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.365096092224121, |
|
"learning_rate": 2.2282608695652176e-06, |
|
"loss": 0.0752, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9512195121951219, |
|
"eval_f1_macro": 0.950956937799043, |
|
"eval_f1_micro": 0.9512195121951219, |
|
"eval_f1_weighted": 0.9508694130003501, |
|
"eval_loss": 0.1366794854402542, |
|
"eval_precision_macro": 0.9583333333333334, |
|
"eval_precision_micro": 0.9512195121951219, |
|
"eval_precision_weighted": 0.9593495934959351, |
|
"eval_recall_macro": 0.9523809523809523, |
|
"eval_recall_micro": 0.9512195121951219, |
|
"eval_recall_weighted": 0.9512195121951219, |
|
"eval_runtime": 0.4791, |
|
"eval_samples_per_second": 171.169, |
|
"eval_steps_per_second": 12.525, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 4.048780487804878, |
|
"grad_norm": 0.8058091402053833, |
|
"learning_rate": 2.1195652173913046e-06, |
|
"loss": 0.0717, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 4.097560975609756, |
|
"grad_norm": 1.4378560781478882, |
|
"learning_rate": 2.0108695652173916e-06, |
|
"loss": 0.1008, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 4.146341463414634, |
|
"grad_norm": 4.043826103210449, |
|
"learning_rate": 1.9021739130434785e-06, |
|
"loss": 0.1217, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.195121951219512, |
|
"grad_norm": 0.8742417097091675, |
|
"learning_rate": 1.7934782608695653e-06, |
|
"loss": 0.1098, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 4.2439024390243905, |
|
"grad_norm": 2.178234577178955, |
|
"learning_rate": 1.6847826086956522e-06, |
|
"loss": 0.1247, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 4.2926829268292686, |
|
"grad_norm": 0.7950467467308044, |
|
"learning_rate": 1.5760869565217394e-06, |
|
"loss": 0.0651, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 4.341463414634147, |
|
"grad_norm": 0.954619824886322, |
|
"learning_rate": 1.4673913043478264e-06, |
|
"loss": 0.0552, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 4.390243902439025, |
|
"grad_norm": 6.469586372375488, |
|
"learning_rate": 1.3586956521739131e-06, |
|
"loss": 0.2431, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.439024390243903, |
|
"grad_norm": 0.6579374074935913, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.0656, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 4.487804878048781, |
|
"grad_norm": 4.1302361488342285, |
|
"learning_rate": 1.141304347826087e-06, |
|
"loss": 0.1081, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 4.536585365853659, |
|
"grad_norm": 1.015049934387207, |
|
"learning_rate": 1.032608695652174e-06, |
|
"loss": 0.1111, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 4.585365853658536, |
|
"grad_norm": 0.679076611995697, |
|
"learning_rate": 9.239130434782609e-07, |
|
"loss": 0.0595, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 4.634146341463414, |
|
"grad_norm": 1.741278052330017, |
|
"learning_rate": 8.152173913043479e-07, |
|
"loss": 0.1325, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.682926829268292, |
|
"grad_norm": 5.028622150421143, |
|
"learning_rate": 7.065217391304348e-07, |
|
"loss": 0.173, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 4.7317073170731705, |
|
"grad_norm": 0.735034704208374, |
|
"learning_rate": 5.978260869565218e-07, |
|
"loss": 0.0457, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 4.780487804878049, |
|
"grad_norm": 7.007546424865723, |
|
"learning_rate": 4.891304347826088e-07, |
|
"loss": 0.1926, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 4.829268292682927, |
|
"grad_norm": 0.8504419922828674, |
|
"learning_rate": 3.804347826086957e-07, |
|
"loss": 0.1623, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 4.878048780487805, |
|
"grad_norm": 1.4162288904190063, |
|
"learning_rate": 2.7173913043478264e-07, |
|
"loss": 0.0726, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.926829268292683, |
|
"grad_norm": 0.7092806696891785, |
|
"learning_rate": 1.6304347826086958e-07, |
|
"loss": 0.0515, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 4.975609756097561, |
|
"grad_norm": 0.7422751784324646, |
|
"learning_rate": 5.4347826086956524e-08, |
|
"loss": 0.0845, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9512195121951219, |
|
"eval_f1_macro": 0.950956937799043, |
|
"eval_f1_micro": 0.9512195121951219, |
|
"eval_f1_weighted": 0.9508694130003501, |
|
"eval_loss": 0.13600419461727142, |
|
"eval_precision_macro": 0.9583333333333334, |
|
"eval_precision_micro": 0.9512195121951219, |
|
"eval_precision_weighted": 0.9593495934959351, |
|
"eval_recall_macro": 0.9523809523809523, |
|
"eval_recall_micro": 0.9512195121951219, |
|
"eval_recall_weighted": 0.9512195121951219, |
|
"eval_runtime": 0.491, |
|
"eval_samples_per_second": 167.022, |
|
"eval_steps_per_second": 12.221, |
|
"step": 205 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 205, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 215929561128960.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|