|
{ |
|
"best_metric": 1.0877293348312378, |
|
"best_model_checkpoint": "./outputs/checkpoint-4100", |
|
"epoch": 2.9879781420765026, |
|
"eval_steps": 100, |
|
"global_step": 4100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7655, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.6318098306655884, |
|
"eval_runtime": 431.2124, |
|
"eval_samples_per_second": 14.55, |
|
"eval_steps_per_second": 1.82, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6075, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.5865398645401, |
|
"eval_runtime": 418.8687, |
|
"eval_samples_per_second": 14.978, |
|
"eval_steps_per_second": 1.874, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 1.575, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.555732250213623, |
|
"eval_runtime": 419.0983, |
|
"eval_samples_per_second": 14.97, |
|
"eval_steps_per_second": 1.873, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5438, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.5291692018508911, |
|
"eval_runtime": 418.8898, |
|
"eval_samples_per_second": 14.978, |
|
"eval_steps_per_second": 1.874, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5106, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.5064970254898071, |
|
"eval_runtime": 419.133, |
|
"eval_samples_per_second": 14.969, |
|
"eval_steps_per_second": 1.873, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4939, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.4864610433578491, |
|
"eval_runtime": 418.2311, |
|
"eval_samples_per_second": 15.001, |
|
"eval_steps_per_second": 1.877, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4713, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.4674705266952515, |
|
"eval_runtime": 418.2014, |
|
"eval_samples_per_second": 15.002, |
|
"eval_steps_per_second": 1.877, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4616, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.4496861696243286, |
|
"eval_runtime": 418.2918, |
|
"eval_samples_per_second": 14.999, |
|
"eval_steps_per_second": 1.877, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4323, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.4320642948150635, |
|
"eval_runtime": 418.2292, |
|
"eval_samples_per_second": 15.001, |
|
"eval_steps_per_second": 1.877, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4266, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.4162260293960571, |
|
"eval_runtime": 417.8585, |
|
"eval_samples_per_second": 15.015, |
|
"eval_steps_per_second": 1.879, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4124, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.4014889001846313, |
|
"eval_runtime": 417.75, |
|
"eval_samples_per_second": 15.019, |
|
"eval_steps_per_second": 1.879, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3846, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.3861923217773438, |
|
"eval_runtime": 417.77, |
|
"eval_samples_per_second": 15.018, |
|
"eval_steps_per_second": 1.879, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3934, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.371946096420288, |
|
"eval_runtime": 417.7963, |
|
"eval_samples_per_second": 15.017, |
|
"eval_steps_per_second": 1.879, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3557, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 1.3575325012207031, |
|
"eval_runtime": 417.6653, |
|
"eval_samples_per_second": 15.022, |
|
"eval_steps_per_second": 1.879, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3316, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 1.3450016975402832, |
|
"eval_runtime": 417.6497, |
|
"eval_samples_per_second": 15.022, |
|
"eval_steps_per_second": 1.88, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3211, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 1.3318936824798584, |
|
"eval_runtime": 418.2286, |
|
"eval_samples_per_second": 15.001, |
|
"eval_steps_per_second": 1.877, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3213, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 1.3180677890777588, |
|
"eval_runtime": 418.7732, |
|
"eval_samples_per_second": 14.982, |
|
"eval_steps_per_second": 1.875, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2972, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 1.3067623376846313, |
|
"eval_runtime": 417.8183, |
|
"eval_samples_per_second": 15.016, |
|
"eval_steps_per_second": 1.879, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002, |
|
"loss": 1.289, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 1.294016718864441, |
|
"eval_runtime": 418.0677, |
|
"eval_samples_per_second": 15.007, |
|
"eval_steps_per_second": 1.878, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2764, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 1.2830415964126587, |
|
"eval_runtime": 448.8239, |
|
"eval_samples_per_second": 13.979, |
|
"eval_steps_per_second": 1.749, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2666, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 1.271437406539917, |
|
"eval_runtime": 417.7696, |
|
"eval_samples_per_second": 15.018, |
|
"eval_steps_per_second": 1.879, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2456, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 1.2596800327301025, |
|
"eval_runtime": 418.2044, |
|
"eval_samples_per_second": 15.002, |
|
"eval_steps_per_second": 1.877, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2442, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 1.2501691579818726, |
|
"eval_runtime": 418.0356, |
|
"eval_samples_per_second": 15.008, |
|
"eval_steps_per_second": 1.878, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2415, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 1.2380064725875854, |
|
"eval_runtime": 417.8498, |
|
"eval_samples_per_second": 15.015, |
|
"eval_steps_per_second": 1.879, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2229, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 1.227720856666565, |
|
"eval_runtime": 417.9232, |
|
"eval_samples_per_second": 15.012, |
|
"eval_steps_per_second": 1.878, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2112, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 1.2167377471923828, |
|
"eval_runtime": 418.696, |
|
"eval_samples_per_second": 14.985, |
|
"eval_steps_per_second": 1.875, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2038, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 1.2063579559326172, |
|
"eval_runtime": 417.9549, |
|
"eval_samples_per_second": 15.011, |
|
"eval_steps_per_second": 1.878, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1797, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 1.1974104642868042, |
|
"eval_runtime": 418.4692, |
|
"eval_samples_per_second": 14.993, |
|
"eval_steps_per_second": 1.876, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1547, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 1.1865276098251343, |
|
"eval_runtime": 417.7842, |
|
"eval_samples_per_second": 15.017, |
|
"eval_steps_per_second": 1.879, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1572, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 1.1763572692871094, |
|
"eval_runtime": 417.556, |
|
"eval_samples_per_second": 15.026, |
|
"eval_steps_per_second": 1.88, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1399, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 1.1691069602966309, |
|
"eval_runtime": 417.8778, |
|
"eval_samples_per_second": 15.014, |
|
"eval_steps_per_second": 1.879, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1534, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 1.1825001239776611, |
|
"eval_runtime": 339.9447, |
|
"eval_samples_per_second": 18.456, |
|
"eval_steps_per_second": 2.309, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1428, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 1.169150948524475, |
|
"eval_runtime": 346.6169, |
|
"eval_samples_per_second": 18.101, |
|
"eval_steps_per_second": 2.265, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1263, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 1.1573188304901123, |
|
"eval_runtime": 340.3731, |
|
"eval_samples_per_second": 18.433, |
|
"eval_steps_per_second": 2.306, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1238, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 1.1467550992965698, |
|
"eval_runtime": 340.2433, |
|
"eval_samples_per_second": 18.44, |
|
"eval_steps_per_second": 2.307, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1167, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 1.136830449104309, |
|
"eval_runtime": 340.4238, |
|
"eval_samples_per_second": 18.43, |
|
"eval_steps_per_second": 2.306, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1115, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 1.1273517608642578, |
|
"eval_runtime": 340.9753, |
|
"eval_samples_per_second": 18.4, |
|
"eval_steps_per_second": 2.302, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0948, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 1.1172122955322266, |
|
"eval_runtime": 340.8373, |
|
"eval_samples_per_second": 18.408, |
|
"eval_steps_per_second": 2.303, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0913, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 1.1067975759506226, |
|
"eval_runtime": 341.0366, |
|
"eval_samples_per_second": 18.397, |
|
"eval_steps_per_second": 2.302, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0811, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_loss": 1.0958919525146484, |
|
"eval_runtime": 340.8306, |
|
"eval_samples_per_second": 18.408, |
|
"eval_steps_per_second": 2.303, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0002, |
|
"loss": 1.07, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 1.0877293348312378, |
|
"eval_runtime": 357.5953, |
|
"eval_samples_per_second": 17.545, |
|
"eval_steps_per_second": 2.195, |
|
"step": 4100 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 1.3292894134103962e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|