{ "best_metric": NaN, "best_model_checkpoint": "autotrain-hu2ir-jotv8/checkpoint-826", "epoch": 1.0, "eval_steps": 500, "global_step": 826, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 12.926700592041016, "learning_rate": 7.459677419354839e-06, "loss": 6.1713, "step": 41 }, { "epoch": 0.1, "grad_norm": 5.053235054016113, "learning_rate": 1.5524193548387098e-05, "loss": 2.7995, "step": 82 }, { "epoch": 0.15, "grad_norm": 2.710062026977539, "learning_rate": 2.3790322580645163e-05, "loss": 1.076, "step": 123 }, { "epoch": 0.2, "grad_norm": 3.625478982925415, "learning_rate": 3.205645161290323e-05, "loss": 0.9209, "step": 164 }, { "epoch": 0.25, "grad_norm": 1.7764992713928223, "learning_rate": 4.032258064516129e-05, "loss": 0.8778, "step": 205 }, { "epoch": 0.3, "grad_norm": 1.6964770555496216, "learning_rate": 4.858870967741936e-05, "loss": 0.8273, "step": 246 }, { "epoch": 0.35, "grad_norm": 1.7574260234832764, "learning_rate": 4.923766816143498e-05, "loss": 0.8135, "step": 287 }, { "epoch": 0.4, "grad_norm": 1.3349592685699463, "learning_rate": 4.831838565022422e-05, "loss": 0.8023, "step": 328 }, { "epoch": 0.45, "grad_norm": 1.5791077613830566, "learning_rate": 4.769058295964125e-05, "loss": 0.7892, "step": 369 }, { "epoch": 0.5, "grad_norm": NaN, "learning_rate": 4.76457399103139e-05, "loss": 0.0972, "step": 410 }, { "epoch": 0.55, "grad_norm": NaN, "learning_rate": 4.760089686098655e-05, "loss": 0.1088, "step": 451 }, { "epoch": 0.6, "grad_norm": NaN, "learning_rate": 4.757847533632287e-05, "loss": 0.0467, "step": 492 }, { "epoch": 0.65, "grad_norm": NaN, "learning_rate": 4.757847533632287e-05, "loss": 0.0, "step": 533 }, { "epoch": 0.69, "grad_norm": NaN, "learning_rate": 4.757847533632287e-05, "loss": 0.0, "step": 574 }, { "epoch": 0.74, "grad_norm": NaN, "learning_rate": 4.755605381165919e-05, "loss": 0.1944, "step": 615 }, { "epoch": 0.79, "grad_norm": NaN, "learning_rate": 4.755605381165919e-05, "loss": 0.0, "step": 656 }, { "epoch": 0.84, "grad_norm": NaN, "learning_rate": 4.755605381165919e-05, "loss": 0.0, "step": 697 }, { "epoch": 0.89, "grad_norm": NaN, "learning_rate": 4.755605381165919e-05, "loss": 0.0, "step": 738 }, { "epoch": 0.94, "grad_norm": NaN, "learning_rate": 4.755605381165919e-05, "loss": 0.0, "step": 779 }, { "epoch": 0.99, "grad_norm": NaN, "learning_rate": 4.755605381165919e-05, "loss": 0.0, "step": 820 }, { "epoch": 1.0, "eval_gen_len": 0.0, "eval_loss": NaN, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 57.261, "eval_samples_per_second": 28.833, "eval_steps_per_second": 1.816, "step": 826 } ], "logging_steps": 41, "max_steps": 2478, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1005084998369280.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }