|
{ |
|
"best_metric": 6.5507588386535645, |
|
"best_model_checkpoint": "poetry-author/checkpoint-4960", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 4960, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4193548387096776e-06, |
|
"loss": 7.2029, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.919354838709678e-06, |
|
"loss": 7.1816, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.419354838709678e-06, |
|
"loss": 7.1418, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.919354838709679e-06, |
|
"loss": 7.1678, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.2379032258064517e-05, |
|
"loss": 7.1274, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4879032258064519e-05, |
|
"loss": 7.1285, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.7379032258064517e-05, |
|
"loss": 7.108, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.9879032258064516e-05, |
|
"loss": 7.0809, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.2379032258064516e-05, |
|
"loss": 7.1026, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.4879032258064516e-05, |
|
"loss": 7.0691, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.737903225806452e-05, |
|
"loss": 7.0714, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.9879032258064516e-05, |
|
"loss": 7.0847, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.2379032258064515e-05, |
|
"loss": 7.0491, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.487903225806452e-05, |
|
"loss": 7.0685, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.7379032258064515e-05, |
|
"loss": 7.038, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.987903225806452e-05, |
|
"loss": 7.0614, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.2379032258064514e-05, |
|
"loss": 7.0234, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.487903225806452e-05, |
|
"loss": 6.9863, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.737903225806452e-05, |
|
"loss": 6.9664, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.987903225806452e-05, |
|
"loss": 6.9782, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.02056451612903226, |
|
"eval_f1_macro": 0.0006143466739070911, |
|
"eval_f1_micro": 0.02056451612903226, |
|
"eval_f1_weighted": 0.004146182182163288, |
|
"eval_loss": 6.874168395996094, |
|
"eval_precision_macro": 0.0003535209813895348, |
|
"eval_precision_micro": 0.02056451612903226, |
|
"eval_precision_weighted": 0.0024119848598185387, |
|
"eval_recall_macro": 0.0037010590753155195, |
|
"eval_recall_micro": 0.02056451612903226, |
|
"eval_recall_weighted": 0.02056451612903226, |
|
"eval_runtime": 5.5652, |
|
"eval_samples_per_second": 445.625, |
|
"eval_steps_per_second": 27.852, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.9735663082437276e-05, |
|
"loss": 6.9144, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.94578853046595e-05, |
|
"loss": 6.8315, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.9180107526881726e-05, |
|
"loss": 6.8663, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.89068100358423e-05, |
|
"loss": 6.837, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.862903225806452e-05, |
|
"loss": 6.7745, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.835125448028674e-05, |
|
"loss": 6.8747, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.807347670250896e-05, |
|
"loss": 6.8005, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.7795698924731186e-05, |
|
"loss": 6.8131, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.751792114695341e-05, |
|
"loss": 6.7554, |
|
"step": 1798 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.724014336917563e-05, |
|
"loss": 6.774, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.696236559139785e-05, |
|
"loss": 6.7964, |
|
"step": 1922 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.6684587813620074e-05, |
|
"loss": 6.8653, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.64068100358423e-05, |
|
"loss": 6.8209, |
|
"step": 2046 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.612903225806452e-05, |
|
"loss": 6.7755, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.585125448028674e-05, |
|
"loss": 6.8384, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.557347670250896e-05, |
|
"loss": 6.7932, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.5295698924731187e-05, |
|
"loss": 6.8017, |
|
"step": 2294 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 4.5017921146953405e-05, |
|
"loss": 6.7575, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.474014336917563e-05, |
|
"loss": 6.7087, |
|
"step": 2418 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.4462365591397856e-05, |
|
"loss": 6.8306, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.027822580645161292, |
|
"eval_f1_macro": 0.002895880527713299, |
|
"eval_f1_micro": 0.027822580645161292, |
|
"eval_f1_weighted": 0.011518078961075304, |
|
"eval_loss": 6.696753025054932, |
|
"eval_precision_macro": 0.002399325952306467, |
|
"eval_precision_micro": 0.027822580645161292, |
|
"eval_precision_weighted": 0.008800287744676547, |
|
"eval_recall_macro": 0.005697363214948901, |
|
"eval_recall_micro": 0.027822580645161292, |
|
"eval_recall_weighted": 0.027822580645161292, |
|
"eval_runtime": 5.6109, |
|
"eval_samples_per_second": 441.997, |
|
"eval_steps_per_second": 27.625, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.4184587813620074e-05, |
|
"loss": 6.5756, |
|
"step": 2542 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.390681003584229e-05, |
|
"loss": 6.5611, |
|
"step": 2604 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.362903225806452e-05, |
|
"loss": 6.6159, |
|
"step": 2666 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.335125448028674e-05, |
|
"loss": 6.5746, |
|
"step": 2728 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.307347670250896e-05, |
|
"loss": 6.4498, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.279569892473119e-05, |
|
"loss": 6.5829, |
|
"step": 2852 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 4.2517921146953405e-05, |
|
"loss": 6.4411, |
|
"step": 2914 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.224014336917563e-05, |
|
"loss": 6.5321, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.196236559139785e-05, |
|
"loss": 6.5179, |
|
"step": 3038 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.1684587813620074e-05, |
|
"loss": 6.5047, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.14068100358423e-05, |
|
"loss": 6.5946, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.112903225806452e-05, |
|
"loss": 6.519, |
|
"step": 3224 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 4.0851254480286736e-05, |
|
"loss": 6.4516, |
|
"step": 3286 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 4.057347670250896e-05, |
|
"loss": 6.5272, |
|
"step": 3348 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.029569892473119e-05, |
|
"loss": 6.5356, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 4.0017921146953405e-05, |
|
"loss": 6.5008, |
|
"step": 3472 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 3.974014336917563e-05, |
|
"loss": 6.5357, |
|
"step": 3534 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.94668458781362e-05, |
|
"loss": 6.5125, |
|
"step": 3596 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.918906810035842e-05, |
|
"loss": 6.5182, |
|
"step": 3658 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.891129032258065e-05, |
|
"loss": 6.4911, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.03951612903225806, |
|
"eval_f1_macro": 0.005772741775243502, |
|
"eval_f1_micro": 0.03951612903225806, |
|
"eval_f1_weighted": 0.01871310289711212, |
|
"eval_loss": 6.559847354888916, |
|
"eval_precision_macro": 0.005108167219150119, |
|
"eval_precision_micro": 0.03951612903225806, |
|
"eval_precision_weighted": 0.015116924332361547, |
|
"eval_recall_macro": 0.010902113654450544, |
|
"eval_recall_micro": 0.03951612903225806, |
|
"eval_recall_weighted": 0.03951612903225806, |
|
"eval_runtime": 5.604, |
|
"eval_samples_per_second": 442.538, |
|
"eval_steps_per_second": 27.659, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.863351254480287e-05, |
|
"loss": 6.111, |
|
"step": 3782 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.835573476702509e-05, |
|
"loss": 6.2635, |
|
"step": 3844 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 3.8077956989247316e-05, |
|
"loss": 6.1807, |
|
"step": 3906 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.780017921146954e-05, |
|
"loss": 6.2969, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 3.752240143369175e-05, |
|
"loss": 6.1509, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.724462365591398e-05, |
|
"loss": 6.1196, |
|
"step": 4092 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.69668458781362e-05, |
|
"loss": 6.1425, |
|
"step": 4154 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 3.668906810035843e-05, |
|
"loss": 6.1487, |
|
"step": 4216 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 3.641129032258065e-05, |
|
"loss": 6.1336, |
|
"step": 4278 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 3.6133512544802866e-05, |
|
"loss": 6.1035, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 3.585573476702509e-05, |
|
"loss": 6.168, |
|
"step": 4402 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 3.5577956989247316e-05, |
|
"loss": 6.283, |
|
"step": 4464 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 3.5300179211469535e-05, |
|
"loss": 6.146, |
|
"step": 4526 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 3.502240143369176e-05, |
|
"loss": 6.1099, |
|
"step": 4588 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 3.4744623655913985e-05, |
|
"loss": 6.2278, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 3.4466845878136204e-05, |
|
"loss": 6.0957, |
|
"step": 4712 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 3.418906810035842e-05, |
|
"loss": 6.0431, |
|
"step": 4774 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 3.391129032258065e-05, |
|
"loss": 6.0954, |
|
"step": 4836 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 3.363351254480287e-05, |
|
"loss": 6.1678, |
|
"step": 4898 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.335573476702509e-05, |
|
"loss": 6.1257, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.04596774193548387, |
|
"eval_f1_macro": 0.007553613837545024, |
|
"eval_f1_micro": 0.04596774193548386, |
|
"eval_f1_weighted": 0.02342410713485415, |
|
"eval_loss": 6.5507588386535645, |
|
"eval_precision_macro": 0.0064405619945614515, |
|
"eval_precision_micro": 0.04596774193548387, |
|
"eval_precision_weighted": 0.020352886732684962, |
|
"eval_recall_macro": 0.017759772448305164, |
|
"eval_recall_micro": 0.04596774193548387, |
|
"eval_recall_weighted": 0.04596774193548387, |
|
"eval_runtime": 5.6061, |
|
"eval_samples_per_second": 442.376, |
|
"eval_steps_per_second": 27.649, |
|
"step": 4960 |
|
} |
|
], |
|
"logging_steps": 62, |
|
"max_steps": 12400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 2638150224863232.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|