|
{ |
|
"best_metric": 0.7976659536361694, |
|
"best_model_checkpoint": "sbert-ru-huawei-rureviews-no-duplicates/checkpoint-589", |
|
"epoch": 0.9987282746926663, |
|
"eval_steps": 500, |
|
"global_step": 589, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0423908435777872, |
|
"grad_norm": 18.781570434570312, |
|
"learning_rate": 7.062146892655368e-06, |
|
"loss": 1.5601, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0847816871555744, |
|
"grad_norm": 12.51102066040039, |
|
"learning_rate": 1.4124293785310736e-05, |
|
"loss": 1.0521, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1271725307333616, |
|
"grad_norm": 24.600730895996094, |
|
"learning_rate": 2.1186440677966103e-05, |
|
"loss": 0.8671, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1695633743111488, |
|
"grad_norm": 14.450693130493164, |
|
"learning_rate": 2.8248587570621472e-05, |
|
"loss": 0.8607, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21195421788893598, |
|
"grad_norm": 15.730051040649414, |
|
"learning_rate": 3.531073446327684e-05, |
|
"loss": 0.8658, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2543450614667232, |
|
"grad_norm": 15.596332550048828, |
|
"learning_rate": 4.2372881355932206e-05, |
|
"loss": 0.8358, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.29673590504451036, |
|
"grad_norm": 14.379067420959473, |
|
"learning_rate": 4.9435028248587575e-05, |
|
"loss": 0.8535, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3391267486222976, |
|
"grad_norm": 14.174772262573242, |
|
"learning_rate": 4.927672955974843e-05, |
|
"loss": 0.8428, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3815175922000848, |
|
"grad_norm": 15.868408203125, |
|
"learning_rate": 4.849056603773585e-05, |
|
"loss": 0.8436, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.42390843577787196, |
|
"grad_norm": 12.947571754455566, |
|
"learning_rate": 4.7704402515723276e-05, |
|
"loss": 0.8498, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4662992793556592, |
|
"grad_norm": 12.922489166259766, |
|
"learning_rate": 4.691823899371069e-05, |
|
"loss": 0.8728, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5086901229334464, |
|
"grad_norm": 16.116744995117188, |
|
"learning_rate": 4.616352201257862e-05, |
|
"loss": 0.8058, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5510809665112336, |
|
"grad_norm": 13.213577270507812, |
|
"learning_rate": 4.537735849056604e-05, |
|
"loss": 0.8092, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.5934718100890207, |
|
"grad_norm": 13.749724388122559, |
|
"learning_rate": 4.459119496855346e-05, |
|
"loss": 0.8277, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6358626536668079, |
|
"grad_norm": 14.099920272827148, |
|
"learning_rate": 4.3805031446540885e-05, |
|
"loss": 0.8172, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.6782534972445952, |
|
"grad_norm": 11.363203048706055, |
|
"learning_rate": 4.301886792452831e-05, |
|
"loss": 0.8397, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7206443408223824, |
|
"grad_norm": 13.129080772399902, |
|
"learning_rate": 4.2232704402515726e-05, |
|
"loss": 0.8768, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.7630351844001696, |
|
"grad_norm": 13.105059623718262, |
|
"learning_rate": 4.1477987421383654e-05, |
|
"loss": 0.8453, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8054260279779568, |
|
"grad_norm": 17.31393814086914, |
|
"learning_rate": 4.069182389937107e-05, |
|
"loss": 0.8207, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.8478168715557439, |
|
"grad_norm": 15.73338794708252, |
|
"learning_rate": 3.9905660377358494e-05, |
|
"loss": 0.8093, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8902077151335311, |
|
"grad_norm": 10.630894660949707, |
|
"learning_rate": 3.911949685534592e-05, |
|
"loss": 0.7984, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.9325985587113184, |
|
"grad_norm": 14.764472961425781, |
|
"learning_rate": 3.8333333333333334e-05, |
|
"loss": 0.7914, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9749894022891056, |
|
"grad_norm": 14.726776123046875, |
|
"learning_rate": 3.754716981132076e-05, |
|
"loss": 0.7818, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.9987282746926663, |
|
"eval_accuracy": 0.6771595124536302, |
|
"eval_f1_macro": 0.47781142535236576, |
|
"eval_f1_micro": 0.6771595124536302, |
|
"eval_f1_weighted": 0.6542674498710257, |
|
"eval_loss": 0.7976659536361694, |
|
"eval_precision_macro": 0.6650293707575838, |
|
"eval_precision_micro": 0.6771595124536302, |
|
"eval_precision_weighted": 0.6930699230580497, |
|
"eval_recall_macro": 0.5021643205507915, |
|
"eval_recall_micro": 0.6771595124536302, |
|
"eval_recall_weighted": 0.6771595124536302, |
|
"eval_runtime": 60.5427, |
|
"eval_samples_per_second": 155.84, |
|
"eval_steps_per_second": 4.873, |
|
"step": 589 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 1767, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7587586671312896e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|