|
{ |
|
"best_metric": 19.7665, |
|
"best_model_checkpoint": "bin/liputan6-pt-pl50/checkpoint-252", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.0756858587265015, |
|
"learning_rate": 0.0008, |
|
"loss": 4.7245, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 30.652, |
|
"eval_loss": 3.9912047386169434, |
|
"eval_rouge1": 16.8276, |
|
"eval_rouge2": 3.6927, |
|
"eval_rougeL": 14.367, |
|
"eval_rougeLsum": 15.3151, |
|
"eval_runtime": 761.8947, |
|
"eval_samples_per_second": 1.313, |
|
"eval_steps_per_second": 0.042, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.0456533432006836, |
|
"learning_rate": 0.0006, |
|
"loss": 3.9104, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 35.104, |
|
"eval_loss": 3.8609139919281006, |
|
"eval_rouge1": 17.712, |
|
"eval_rouge2": 4.2061, |
|
"eval_rougeL": 14.9465, |
|
"eval_rougeLsum": 15.9818, |
|
"eval_runtime": 1158.8796, |
|
"eval_samples_per_second": 0.863, |
|
"eval_steps_per_second": 0.028, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.1262755393981934, |
|
"learning_rate": 0.0004, |
|
"loss": 3.6651, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 30.749, |
|
"eval_loss": 3.8036019802093506, |
|
"eval_rouge1": 18.8508, |
|
"eval_rouge2": 4.6943, |
|
"eval_rougeL": 15.8363, |
|
"eval_rougeLsum": 17.0134, |
|
"eval_runtime": 740.0114, |
|
"eval_samples_per_second": 1.351, |
|
"eval_steps_per_second": 0.043, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.2107006311416626, |
|
"learning_rate": 0.0002, |
|
"loss": 3.4442, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 28.31, |
|
"eval_loss": 3.7532596588134766, |
|
"eval_rouge1": 19.7665, |
|
"eval_rouge2": 5.1425, |
|
"eval_rougeL": 16.7615, |
|
"eval_rougeLsum": 18.1456, |
|
"eval_runtime": 520.3377, |
|
"eval_samples_per_second": 1.922, |
|
"eval_steps_per_second": 0.061, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.1439872980117798, |
|
"learning_rate": 0.0, |
|
"loss": 3.2664, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 29.142, |
|
"eval_loss": 3.7381248474121094, |
|
"eval_rouge1": 19.5385, |
|
"eval_rouge2": 5.1106, |
|
"eval_rougeL": 16.7601, |
|
"eval_rougeLsum": 17.9271, |
|
"eval_runtime": 664.3792, |
|
"eval_samples_per_second": 1.505, |
|
"eval_steps_per_second": 0.048, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 315, |
|
"total_flos": 3877644533760000.0, |
|
"train_loss": 3.802120681036086, |
|
"train_runtime": 4251.699, |
|
"train_samples_per_second": 1.176, |
|
"train_steps_per_second": 0.074 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 315, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 3877644533760000.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|