{ "best_metric": 34.8229, "best_model_checkpoint": "bin/liputan6-pt-pl50/checkpoint-315", "epoch": 5.0, "eval_steps": 500, "global_step": 315, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.9081947207450867, "learning_rate": 0.0008, "loss": 4.2782, "step": 63 }, { "epoch": 1.0, "eval_gen_len": 38.037, "eval_loss": 3.2600271701812744, "eval_rouge1": 25.0139, "eval_rouge2": 13.1669, "eval_rougeL": 22.4852, "eval_rougeLsum": 23.5026, "eval_runtime": 1079.1281, "eval_samples_per_second": 0.927, "eval_steps_per_second": 0.03, "step": 63 }, { "epoch": 2.0, "grad_norm": 0.9287102818489075, "learning_rate": 0.0006, "loss": 3.3831, "step": 126 }, { "epoch": 2.0, "eval_gen_len": 51.621, "eval_loss": 3.011786460876465, "eval_rouge1": 28.0005, "eval_rouge2": 15.5199, "eval_rougeL": 25.1006, "eval_rougeLsum": 26.3175, "eval_runtime": 1436.7107, "eval_samples_per_second": 0.696, "eval_steps_per_second": 0.022, "step": 126 }, { "epoch": 3.0, "grad_norm": 0.8845603466033936, "learning_rate": 0.0004, "loss": 3.0732, "step": 189 }, { "epoch": 3.0, "eval_gen_len": 51.938, "eval_loss": 2.822599172592163, "eval_rouge1": 31.6641, "eval_rouge2": 18.1569, "eval_rougeL": 27.8004, "eval_rougeLsum": 29.8463, "eval_runtime": 1387.7431, "eval_samples_per_second": 0.721, "eval_steps_per_second": 0.023, "step": 189 }, { "epoch": 4.0, "grad_norm": 0.9532793164253235, "learning_rate": 0.0002, "loss": 2.83, "step": 252 }, { "epoch": 4.0, "eval_gen_len": 51.327, "eval_loss": 2.718118667602539, "eval_rouge1": 34.3328, "eval_rouge2": 21.5065, "eval_rougeL": 30.323, "eval_rougeLsum": 32.3623, "eval_runtime": 1087.9011, "eval_samples_per_second": 0.919, "eval_steps_per_second": 0.029, "step": 252 }, { "epoch": 5.0, "grad_norm": 0.9524036049842834, "learning_rate": 0.0, "loss": 2.6441, "step": 315 }, { "epoch": 5.0, "eval_gen_len": 52.623, "eval_loss": 2.672222137451172, "eval_rouge1": 34.8229, "eval_rouge2": 22.044, "eval_rougeL": 30.8324, "eval_rougeLsum": 33.0138, "eval_runtime": 1171.6265, "eval_samples_per_second": 0.854, "eval_steps_per_second": 0.027, "step": 315 }, { "epoch": 5.0, "step": 315, "total_flos": 3877644533760000.0, "train_loss": 3.241740926106771, "train_runtime": 6571.5037, "train_samples_per_second": 0.761, "train_steps_per_second": 0.048 } ], "logging_steps": 500, "max_steps": 315, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 3877644533760000.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }