|
{ |
|
"best_metric": 1.2113300561904907, |
|
"best_model_checkpoint": "cars-countries/checkpoint-78", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 78, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.0, |
|
"loss": 1.781, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 9.921228408813477, |
|
"learning_rate": 6.25e-06, |
|
"loss": 1.5569, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 7.8902506828308105, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.5381, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 9.90084457397461, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 1.5947, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 10.790699005126953, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.3412, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 9.438414573669434, |
|
"learning_rate": 3.125e-05, |
|
"loss": 1.5891, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 14.554863929748535, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 1.5911, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 11.261236190795898, |
|
"learning_rate": 4.375e-05, |
|
"loss": 1.6023, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 10.684854507446289, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5315, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 8.2311429977417, |
|
"learning_rate": 4.928571428571429e-05, |
|
"loss": 1.4993, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 8.519936561584473, |
|
"learning_rate": 4.8571428571428576e-05, |
|
"loss": 1.5576, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 7.320441246032715, |
|
"learning_rate": 4.785714285714286e-05, |
|
"loss": 1.5395, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 19.024276733398438, |
|
"learning_rate": 4.714285714285714e-05, |
|
"loss": 1.5867, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 8.801590919494629, |
|
"learning_rate": 4.642857142857143e-05, |
|
"loss": 1.2448, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 8.235298156738281, |
|
"learning_rate": 4.5714285714285716e-05, |
|
"loss": 1.4907, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 9.275227546691895, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.4567, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 9.200013160705566, |
|
"learning_rate": 4.428571428571428e-05, |
|
"loss": 1.6051, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 7.5380120277404785, |
|
"learning_rate": 4.3571428571428576e-05, |
|
"loss": 1.3018, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 9.66368579864502, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 1.2577, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 6.4247260093688965, |
|
"learning_rate": 4.214285714285714e-05, |
|
"loss": 1.3549, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 7.779500484466553, |
|
"learning_rate": 4.1428571428571437e-05, |
|
"loss": 1.3177, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 9.328712463378906, |
|
"learning_rate": 4.0714285714285717e-05, |
|
"loss": 1.9277, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 9.957062721252441, |
|
"learning_rate": 4e-05, |
|
"loss": 1.543, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 11.664092063903809, |
|
"learning_rate": 3.928571428571429e-05, |
|
"loss": 1.4666, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 7.613643646240234, |
|
"learning_rate": 3.857142857142858e-05, |
|
"loss": 1.6037, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 16.03534698486328, |
|
"learning_rate": 3.785714285714286e-05, |
|
"loss": 1.2129, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.39215686274509803, |
|
"eval_f1_macro": 0.17172161172161174, |
|
"eval_f1_micro": 0.39215686274509803, |
|
"eval_f1_weighted": 0.26543130072541843, |
|
"eval_loss": 1.3392212390899658, |
|
"eval_precision_macro": 0.32999999999999996, |
|
"eval_precision_micro": 0.39215686274509803, |
|
"eval_precision_weighted": 0.4598039215686274, |
|
"eval_recall_macro": 0.2365079365079365, |
|
"eval_recall_micro": 0.39215686274509803, |
|
"eval_recall_weighted": 0.39215686274509803, |
|
"eval_runtime": 0.9569, |
|
"eval_samples_per_second": 53.295, |
|
"eval_steps_per_second": 4.18, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 7.75885009765625, |
|
"learning_rate": 3.7142857142857143e-05, |
|
"loss": 1.3228, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 8.9802827835083, |
|
"learning_rate": 3.642857142857143e-05, |
|
"loss": 1.2236, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 11.71562671661377, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 1.6211, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 8.007612228393555, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.0492, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 8.764470100402832, |
|
"learning_rate": 3.428571428571429e-05, |
|
"loss": 1.2495, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 9.003642082214355, |
|
"learning_rate": 3.357142857142857e-05, |
|
"loss": 1.4517, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 8.626983642578125, |
|
"learning_rate": 3.285714285714286e-05, |
|
"loss": 1.188, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 6.939526557922363, |
|
"learning_rate": 3.2142857142857144e-05, |
|
"loss": 1.1346, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 8.789880752563477, |
|
"learning_rate": 3.142857142857143e-05, |
|
"loss": 1.4955, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 8.395410537719727, |
|
"learning_rate": 3.071428571428572e-05, |
|
"loss": 1.1301, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 9.601696968078613, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3088, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 9.192988395690918, |
|
"learning_rate": 2.9285714285714288e-05, |
|
"loss": 1.3069, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 10.932126998901367, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 1.5361, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 8.457172393798828, |
|
"learning_rate": 2.785714285714286e-05, |
|
"loss": 1.0766, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 8.215372085571289, |
|
"learning_rate": 2.714285714285714e-05, |
|
"loss": 1.1533, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 9.27467155456543, |
|
"learning_rate": 2.642857142857143e-05, |
|
"loss": 1.2253, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 8.538656234741211, |
|
"learning_rate": 2.5714285714285714e-05, |
|
"loss": 1.5666, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 9.004700660705566, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.2428, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 9.92335319519043, |
|
"learning_rate": 2.4285714285714288e-05, |
|
"loss": 1.3294, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 6.969778537750244, |
|
"learning_rate": 2.357142857142857e-05, |
|
"loss": 0.892, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 9.29692554473877, |
|
"learning_rate": 2.2857142857142858e-05, |
|
"loss": 0.8958, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 8.588207244873047, |
|
"learning_rate": 2.214285714285714e-05, |
|
"loss": 0.965, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 7.432168483734131, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 1.1908, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 8.40340518951416, |
|
"learning_rate": 2.0714285714285718e-05, |
|
"loss": 1.3141, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 11.749902725219727, |
|
"learning_rate": 2e-05, |
|
"loss": 1.664, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": Infinity, |
|
"learning_rate": 2e-05, |
|
"loss": 2.0425, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.47058823529411764, |
|
"eval_f1_macro": 0.30023902651021295, |
|
"eval_f1_micro": 0.47058823529411764, |
|
"eval_f1_weighted": 0.3905676133991189, |
|
"eval_loss": 1.2403396368026733, |
|
"eval_precision_macro": 0.4878048780487805, |
|
"eval_precision_micro": 0.47058823529411764, |
|
"eval_precision_weighted": 0.5961262553802008, |
|
"eval_recall_macro": 0.32301587301587303, |
|
"eval_recall_micro": 0.47058823529411764, |
|
"eval_recall_weighted": 0.47058823529411764, |
|
"eval_runtime": 0.9273, |
|
"eval_samples_per_second": 54.997, |
|
"eval_steps_per_second": 4.314, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 11.03630256652832, |
|
"learning_rate": 1.928571428571429e-05, |
|
"loss": 1.1866, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 7.543542861938477, |
|
"learning_rate": 1.8571428571428572e-05, |
|
"loss": 1.1695, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 8.078568458557129, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 0.9876, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 9.406330108642578, |
|
"learning_rate": 1.7142857142857145e-05, |
|
"loss": 1.4199, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 8.94027328491211, |
|
"learning_rate": 1.642857142857143e-05, |
|
"loss": 1.0628, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 7.433661937713623, |
|
"learning_rate": 1.5714285714285715e-05, |
|
"loss": 1.0264, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 7.517592430114746, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.8727, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 11.105234146118164, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 1.0733, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 8.228996276855469, |
|
"learning_rate": 1.357142857142857e-05, |
|
"loss": 0.7755, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 10.02564525604248, |
|
"learning_rate": 1.2857142857142857e-05, |
|
"loss": 1.3597, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 7.33097505569458, |
|
"learning_rate": 1.2142857142857144e-05, |
|
"loss": 0.8739, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 11.741021156311035, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 1.3264, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 10.197936058044434, |
|
"learning_rate": 1.0714285714285714e-05, |
|
"loss": 1.1468, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 9.967548370361328, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2385, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 6.901878356933594, |
|
"learning_rate": 9.285714285714286e-06, |
|
"loss": 0.7635, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 7.190940856933594, |
|
"learning_rate": 8.571428571428573e-06, |
|
"loss": 0.8255, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 9.144484519958496, |
|
"learning_rate": 7.857142857142858e-06, |
|
"loss": 1.0753, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"grad_norm": 6.83949089050293, |
|
"learning_rate": 7.142857142857143e-06, |
|
"loss": 1.0511, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 11.242728233337402, |
|
"learning_rate": 6.428571428571429e-06, |
|
"loss": 1.6024, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 9.73320484161377, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 1.0241, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 13.551000595092773, |
|
"learning_rate": 5e-06, |
|
"loss": 1.0143, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 9.739935874938965, |
|
"learning_rate": 4.285714285714286e-06, |
|
"loss": 1.528, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 7.910869598388672, |
|
"learning_rate": 3.5714285714285714e-06, |
|
"loss": 0.9389, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 9.845719337463379, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 1.2907, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 6.709384441375732, |
|
"learning_rate": 2.142857142857143e-06, |
|
"loss": 1.0141, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 16.175195693969727, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 1.4561, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5098039215686274, |
|
"eval_f1_macro": 0.35240829346092506, |
|
"eval_f1_micro": 0.5098039215686274, |
|
"eval_f1_weighted": 0.43996622572473965, |
|
"eval_loss": 1.2113300561904907, |
|
"eval_precision_macro": 0.443963963963964, |
|
"eval_precision_micro": 0.5098039215686274, |
|
"eval_precision_weighted": 0.5037272566684332, |
|
"eval_recall_macro": 0.36746031746031743, |
|
"eval_recall_micro": 0.5098039215686274, |
|
"eval_recall_weighted": 0.5098039215686274, |
|
"eval_runtime": 0.9222, |
|
"eval_samples_per_second": 55.303, |
|
"eval_steps_per_second": 4.337, |
|
"step": 78 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 78, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 4.696140837323981e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|