{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.9984, "global_step": 6240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 0.3489128649234772, "eval_mse": 0.34891289472579956, "eval_runtime": 4.7891, "eval_samples_per_second": 208.807, "eval_steps_per_second": 26.101, "step": 312 }, { "epoch": 1.6, "learning_rate": 4.331812181114852e-05, "loss": 0.3355, "step": 500 }, { "epoch": 2.0, "eval_loss": 0.23772406578063965, "eval_mse": 0.23772406578063965, "eval_runtime": 4.8126, "eval_samples_per_second": 207.789, "eval_steps_per_second": 25.974, "step": 624 }, { "epoch": 3.0, "eval_loss": 0.2444075644016266, "eval_mse": 0.2444075644016266, "eval_runtime": 4.8651, "eval_samples_per_second": 205.548, "eval_steps_per_second": 25.693, "step": 936 }, { "epoch": 3.2, "learning_rate": 3.9544766252686105e-05, "loss": 0.1182, "step": 1000 }, { "epoch": 4.0, "eval_loss": 0.3121347725391388, "eval_mse": 0.3121347725391388, "eval_runtime": 4.9119, "eval_samples_per_second": 203.588, "eval_steps_per_second": 25.448, "step": 1248 }, { "epoch": 4.81, "learning_rate": 3.577141069422369e-05, "loss": 0.0651, "step": 1500 }, { "epoch": 5.0, "eval_loss": 0.22353506088256836, "eval_mse": 0.22353507578372955, "eval_runtime": 4.9339, "eval_samples_per_second": 202.678, "eval_steps_per_second": 25.335, "step": 1560 }, { "epoch": 6.0, "eval_loss": 0.19977863132953644, "eval_mse": 0.19977861642837524, "eval_runtime": 4.7835, "eval_samples_per_second": 209.051, "eval_steps_per_second": 26.131, "step": 1872 }, { "epoch": 6.41, "learning_rate": 3.199805513576128e-05, "loss": 0.0498, "step": 2000 }, { "epoch": 7.0, "eval_loss": 0.2336214929819107, "eval_mse": 0.2336214929819107, "eval_runtime": 4.7754, "eval_samples_per_second": 209.407, "eval_steps_per_second": 26.176, "step": 2184 }, { "epoch": 8.0, "eval_loss": 0.21180056035518646, "eval_mse": 0.21180060505867004, "eval_runtime": 4.9099, "eval_samples_per_second": 203.671, "eval_steps_per_second": 25.459, "step": 2496 }, { "epoch": 8.01, "learning_rate": 2.822469957729886e-05, "loss": 0.0358, "step": 2500 }, { "epoch": 9.0, "eval_loss": 0.22979474067687988, "eval_mse": 0.22979475557804108, "eval_runtime": 4.8765, "eval_samples_per_second": 205.065, "eval_steps_per_second": 25.633, "step": 2808 }, { "epoch": 9.61, "learning_rate": 2.445134401883645e-05, "loss": 0.0279, "step": 3000 }, { "epoch": 10.0, "eval_loss": 0.2303524762392044, "eval_mse": 0.2303524762392044, "eval_runtime": 4.9464, "eval_samples_per_second": 202.165, "eval_steps_per_second": 25.271, "step": 3120 }, { "epoch": 11.0, "eval_loss": 0.21912191808223724, "eval_mse": 0.21912193298339844, "eval_runtime": 4.8937, "eval_samples_per_second": 204.345, "eval_steps_per_second": 25.543, "step": 3432 }, { "epoch": 11.22, "learning_rate": 2.0677988460374033e-05, "loss": 0.0236, "step": 3500 }, { "epoch": 12.0, "eval_loss": 0.20294061303138733, "eval_mse": 0.20294061303138733, "eval_runtime": 4.6751, "eval_samples_per_second": 213.901, "eval_steps_per_second": 26.738, "step": 3744 }, { "epoch": 12.82, "learning_rate": 1.6904632901911617e-05, "loss": 0.0195, "step": 4000 }, { "epoch": 13.0, "eval_loss": 0.21014319360256195, "eval_mse": 0.21014319360256195, "eval_runtime": 4.8248, "eval_samples_per_second": 207.262, "eval_steps_per_second": 25.908, "step": 4056 }, { "epoch": 14.0, "eval_loss": 0.2216099053621292, "eval_mse": 0.2216099053621292, "eval_runtime": 4.7789, "eval_samples_per_second": 209.252, "eval_steps_per_second": 26.157, "step": 4368 }, { "epoch": 14.42, "learning_rate": 1.3131277343449203e-05, "loss": 0.0156, "step": 4500 }, { "epoch": 15.0, "eval_loss": 0.21098460257053375, "eval_mse": 0.21098460257053375, "eval_runtime": 4.7959, "eval_samples_per_second": 208.509, "eval_steps_per_second": 26.064, "step": 4680 }, { "epoch": 16.0, "eval_loss": 0.21815571188926697, "eval_mse": 0.21815571188926697, "eval_runtime": 4.9214, "eval_samples_per_second": 203.196, "eval_steps_per_second": 25.4, "step": 4992 }, { "epoch": 16.03, "learning_rate": 9.357921784986788e-06, "loss": 0.0127, "step": 5000 }, { "epoch": 17.0, "eval_loss": 0.21420633792877197, "eval_mse": 0.21420633792877197, "eval_runtime": 4.7835, "eval_samples_per_second": 209.054, "eval_steps_per_second": 26.132, "step": 5304 }, { "epoch": 17.63, "learning_rate": 5.5845662265243735e-06, "loss": 0.0109, "step": 5500 }, { "epoch": 18.0, "eval_loss": 0.2126861810684204, "eval_mse": 0.2126861810684204, "eval_runtime": 4.8516, "eval_samples_per_second": 206.117, "eval_steps_per_second": 25.765, "step": 5616 }, { "epoch": 19.0, "eval_loss": 0.21384698152542114, "eval_mse": 0.21384698152542114, "eval_runtime": 4.9235, "eval_samples_per_second": 203.109, "eval_steps_per_second": 25.389, "step": 5928 }, { "epoch": 19.23, "learning_rate": 1.8112106680619593e-06, "loss": 0.0094, "step": 6000 }, { "epoch": 20.0, "eval_loss": 0.2143700271844864, "eval_mse": 0.2143700271844864, "eval_runtime": 4.7927, "eval_samples_per_second": 208.649, "eval_steps_per_second": 26.081, "step": 6240 }, { "epoch": 20.0, "step": 6240, "total_flos": 6577191107414016.0, "train_loss": 0.05834093816005267, "train_runtime": 2383.5567, "train_samples_per_second": 41.954, "train_steps_per_second": 2.618 } ], "max_steps": 6240, "num_train_epochs": 20, "total_flos": 6577191107414016.0, "trial_name": null, "trial_params": null }