ramdhanfirdaus's picture
Training in progress, step 4100, checkpoint
9006650
{
"best_metric": 1.0877293348312378,
"best_model_checkpoint": "./outputs/checkpoint-4100",
"epoch": 2.9879781420765026,
"eval_steps": 100,
"global_step": 4100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07,
"learning_rate": 0.0002,
"loss": 1.7655,
"step": 100
},
{
"epoch": 0.07,
"eval_loss": 1.6318098306655884,
"eval_runtime": 431.2124,
"eval_samples_per_second": 14.55,
"eval_steps_per_second": 1.82,
"step": 100
},
{
"epoch": 0.15,
"learning_rate": 0.0002,
"loss": 1.6075,
"step": 200
},
{
"epoch": 0.15,
"eval_loss": 1.5865398645401,
"eval_runtime": 418.8687,
"eval_samples_per_second": 14.978,
"eval_steps_per_second": 1.874,
"step": 200
},
{
"epoch": 0.22,
"learning_rate": 0.0002,
"loss": 1.575,
"step": 300
},
{
"epoch": 0.22,
"eval_loss": 1.555732250213623,
"eval_runtime": 419.0983,
"eval_samples_per_second": 14.97,
"eval_steps_per_second": 1.873,
"step": 300
},
{
"epoch": 0.29,
"learning_rate": 0.0002,
"loss": 1.5438,
"step": 400
},
{
"epoch": 0.29,
"eval_loss": 1.5291692018508911,
"eval_runtime": 418.8898,
"eval_samples_per_second": 14.978,
"eval_steps_per_second": 1.874,
"step": 400
},
{
"epoch": 0.36,
"learning_rate": 0.0002,
"loss": 1.5106,
"step": 500
},
{
"epoch": 0.36,
"eval_loss": 1.5064970254898071,
"eval_runtime": 419.133,
"eval_samples_per_second": 14.969,
"eval_steps_per_second": 1.873,
"step": 500
},
{
"epoch": 0.44,
"learning_rate": 0.0002,
"loss": 1.4939,
"step": 600
},
{
"epoch": 0.44,
"eval_loss": 1.4864610433578491,
"eval_runtime": 418.2311,
"eval_samples_per_second": 15.001,
"eval_steps_per_second": 1.877,
"step": 600
},
{
"epoch": 0.51,
"learning_rate": 0.0002,
"loss": 1.4713,
"step": 700
},
{
"epoch": 0.51,
"eval_loss": 1.4674705266952515,
"eval_runtime": 418.2014,
"eval_samples_per_second": 15.002,
"eval_steps_per_second": 1.877,
"step": 700
},
{
"epoch": 0.58,
"learning_rate": 0.0002,
"loss": 1.4616,
"step": 800
},
{
"epoch": 0.58,
"eval_loss": 1.4496861696243286,
"eval_runtime": 418.2918,
"eval_samples_per_second": 14.999,
"eval_steps_per_second": 1.877,
"step": 800
},
{
"epoch": 0.66,
"learning_rate": 0.0002,
"loss": 1.4323,
"step": 900
},
{
"epoch": 0.66,
"eval_loss": 1.4320642948150635,
"eval_runtime": 418.2292,
"eval_samples_per_second": 15.001,
"eval_steps_per_second": 1.877,
"step": 900
},
{
"epoch": 0.73,
"learning_rate": 0.0002,
"loss": 1.4266,
"step": 1000
},
{
"epoch": 0.73,
"eval_loss": 1.4162260293960571,
"eval_runtime": 417.8585,
"eval_samples_per_second": 15.015,
"eval_steps_per_second": 1.879,
"step": 1000
},
{
"epoch": 0.8,
"learning_rate": 0.0002,
"loss": 1.4124,
"step": 1100
},
{
"epoch": 0.8,
"eval_loss": 1.4014889001846313,
"eval_runtime": 417.75,
"eval_samples_per_second": 15.019,
"eval_steps_per_second": 1.879,
"step": 1100
},
{
"epoch": 0.87,
"learning_rate": 0.0002,
"loss": 1.3846,
"step": 1200
},
{
"epoch": 0.87,
"eval_loss": 1.3861923217773438,
"eval_runtime": 417.77,
"eval_samples_per_second": 15.018,
"eval_steps_per_second": 1.879,
"step": 1200
},
{
"epoch": 0.95,
"learning_rate": 0.0002,
"loss": 1.3934,
"step": 1300
},
{
"epoch": 0.95,
"eval_loss": 1.371946096420288,
"eval_runtime": 417.7963,
"eval_samples_per_second": 15.017,
"eval_steps_per_second": 1.879,
"step": 1300
},
{
"epoch": 1.02,
"learning_rate": 0.0002,
"loss": 1.3557,
"step": 1400
},
{
"epoch": 1.02,
"eval_loss": 1.3575325012207031,
"eval_runtime": 417.6653,
"eval_samples_per_second": 15.022,
"eval_steps_per_second": 1.879,
"step": 1400
},
{
"epoch": 1.09,
"learning_rate": 0.0002,
"loss": 1.3316,
"step": 1500
},
{
"epoch": 1.09,
"eval_loss": 1.3450016975402832,
"eval_runtime": 417.6497,
"eval_samples_per_second": 15.022,
"eval_steps_per_second": 1.88,
"step": 1500
},
{
"epoch": 1.17,
"learning_rate": 0.0002,
"loss": 1.3211,
"step": 1600
},
{
"epoch": 1.17,
"eval_loss": 1.3318936824798584,
"eval_runtime": 418.2286,
"eval_samples_per_second": 15.001,
"eval_steps_per_second": 1.877,
"step": 1600
},
{
"epoch": 1.24,
"learning_rate": 0.0002,
"loss": 1.3213,
"step": 1700
},
{
"epoch": 1.24,
"eval_loss": 1.3180677890777588,
"eval_runtime": 418.7732,
"eval_samples_per_second": 14.982,
"eval_steps_per_second": 1.875,
"step": 1700
},
{
"epoch": 1.31,
"learning_rate": 0.0002,
"loss": 1.2972,
"step": 1800
},
{
"epoch": 1.31,
"eval_loss": 1.3067623376846313,
"eval_runtime": 417.8183,
"eval_samples_per_second": 15.016,
"eval_steps_per_second": 1.879,
"step": 1800
},
{
"epoch": 1.38,
"learning_rate": 0.0002,
"loss": 1.289,
"step": 1900
},
{
"epoch": 1.38,
"eval_loss": 1.294016718864441,
"eval_runtime": 418.0677,
"eval_samples_per_second": 15.007,
"eval_steps_per_second": 1.878,
"step": 1900
},
{
"epoch": 1.46,
"learning_rate": 0.0002,
"loss": 1.2764,
"step": 2000
},
{
"epoch": 1.46,
"eval_loss": 1.2830415964126587,
"eval_runtime": 448.8239,
"eval_samples_per_second": 13.979,
"eval_steps_per_second": 1.749,
"step": 2000
},
{
"epoch": 1.53,
"learning_rate": 0.0002,
"loss": 1.2666,
"step": 2100
},
{
"epoch": 1.53,
"eval_loss": 1.271437406539917,
"eval_runtime": 417.7696,
"eval_samples_per_second": 15.018,
"eval_steps_per_second": 1.879,
"step": 2100
},
{
"epoch": 1.6,
"learning_rate": 0.0002,
"loss": 1.2456,
"step": 2200
},
{
"epoch": 1.6,
"eval_loss": 1.2596800327301025,
"eval_runtime": 418.2044,
"eval_samples_per_second": 15.002,
"eval_steps_per_second": 1.877,
"step": 2200
},
{
"epoch": 1.68,
"learning_rate": 0.0002,
"loss": 1.2442,
"step": 2300
},
{
"epoch": 1.68,
"eval_loss": 1.2501691579818726,
"eval_runtime": 418.0356,
"eval_samples_per_second": 15.008,
"eval_steps_per_second": 1.878,
"step": 2300
},
{
"epoch": 1.75,
"learning_rate": 0.0002,
"loss": 1.2415,
"step": 2400
},
{
"epoch": 1.75,
"eval_loss": 1.2380064725875854,
"eval_runtime": 417.8498,
"eval_samples_per_second": 15.015,
"eval_steps_per_second": 1.879,
"step": 2400
},
{
"epoch": 1.82,
"learning_rate": 0.0002,
"loss": 1.2229,
"step": 2500
},
{
"epoch": 1.82,
"eval_loss": 1.227720856666565,
"eval_runtime": 417.9232,
"eval_samples_per_second": 15.012,
"eval_steps_per_second": 1.878,
"step": 2500
},
{
"epoch": 1.89,
"learning_rate": 0.0002,
"loss": 1.2112,
"step": 2600
},
{
"epoch": 1.89,
"eval_loss": 1.2167377471923828,
"eval_runtime": 418.696,
"eval_samples_per_second": 14.985,
"eval_steps_per_second": 1.875,
"step": 2600
},
{
"epoch": 1.97,
"learning_rate": 0.0002,
"loss": 1.2038,
"step": 2700
},
{
"epoch": 1.97,
"eval_loss": 1.2063579559326172,
"eval_runtime": 417.9549,
"eval_samples_per_second": 15.011,
"eval_steps_per_second": 1.878,
"step": 2700
},
{
"epoch": 2.04,
"learning_rate": 0.0002,
"loss": 1.1797,
"step": 2800
},
{
"epoch": 2.04,
"eval_loss": 1.1974104642868042,
"eval_runtime": 418.4692,
"eval_samples_per_second": 14.993,
"eval_steps_per_second": 1.876,
"step": 2800
},
{
"epoch": 2.11,
"learning_rate": 0.0002,
"loss": 1.1547,
"step": 2900
},
{
"epoch": 2.11,
"eval_loss": 1.1865276098251343,
"eval_runtime": 417.7842,
"eval_samples_per_second": 15.017,
"eval_steps_per_second": 1.879,
"step": 2900
},
{
"epoch": 2.19,
"learning_rate": 0.0002,
"loss": 1.1572,
"step": 3000
},
{
"epoch": 2.19,
"eval_loss": 1.1763572692871094,
"eval_runtime": 417.556,
"eval_samples_per_second": 15.026,
"eval_steps_per_second": 1.88,
"step": 3000
},
{
"epoch": 2.26,
"learning_rate": 0.0002,
"loss": 1.1399,
"step": 3100
},
{
"epoch": 2.26,
"eval_loss": 1.1691069602966309,
"eval_runtime": 417.8778,
"eval_samples_per_second": 15.014,
"eval_steps_per_second": 1.879,
"step": 3100
},
{
"epoch": 2.33,
"learning_rate": 0.0002,
"loss": 1.1534,
"step": 3200
},
{
"epoch": 2.33,
"eval_loss": 1.1825001239776611,
"eval_runtime": 339.9447,
"eval_samples_per_second": 18.456,
"eval_steps_per_second": 2.309,
"step": 3200
},
{
"epoch": 2.41,
"learning_rate": 0.0002,
"loss": 1.1428,
"step": 3300
},
{
"epoch": 2.41,
"eval_loss": 1.169150948524475,
"eval_runtime": 346.6169,
"eval_samples_per_second": 18.101,
"eval_steps_per_second": 2.265,
"step": 3300
},
{
"epoch": 2.48,
"learning_rate": 0.0002,
"loss": 1.1263,
"step": 3400
},
{
"epoch": 2.48,
"eval_loss": 1.1573188304901123,
"eval_runtime": 340.3731,
"eval_samples_per_second": 18.433,
"eval_steps_per_second": 2.306,
"step": 3400
},
{
"epoch": 2.55,
"learning_rate": 0.0002,
"loss": 1.1238,
"step": 3500
},
{
"epoch": 2.55,
"eval_loss": 1.1467550992965698,
"eval_runtime": 340.2433,
"eval_samples_per_second": 18.44,
"eval_steps_per_second": 2.307,
"step": 3500
},
{
"epoch": 2.62,
"learning_rate": 0.0002,
"loss": 1.1167,
"step": 3600
},
{
"epoch": 2.62,
"eval_loss": 1.136830449104309,
"eval_runtime": 340.4238,
"eval_samples_per_second": 18.43,
"eval_steps_per_second": 2.306,
"step": 3600
},
{
"epoch": 2.7,
"learning_rate": 0.0002,
"loss": 1.1115,
"step": 3700
},
{
"epoch": 2.7,
"eval_loss": 1.1273517608642578,
"eval_runtime": 340.9753,
"eval_samples_per_second": 18.4,
"eval_steps_per_second": 2.302,
"step": 3700
},
{
"epoch": 2.77,
"learning_rate": 0.0002,
"loss": 1.0948,
"step": 3800
},
{
"epoch": 2.77,
"eval_loss": 1.1172122955322266,
"eval_runtime": 340.8373,
"eval_samples_per_second": 18.408,
"eval_steps_per_second": 2.303,
"step": 3800
},
{
"epoch": 2.84,
"learning_rate": 0.0002,
"loss": 1.0913,
"step": 3900
},
{
"epoch": 2.84,
"eval_loss": 1.1067975759506226,
"eval_runtime": 341.0366,
"eval_samples_per_second": 18.397,
"eval_steps_per_second": 2.302,
"step": 3900
},
{
"epoch": 2.92,
"learning_rate": 0.0002,
"loss": 1.0811,
"step": 4000
},
{
"epoch": 2.92,
"eval_loss": 1.0958919525146484,
"eval_runtime": 340.8306,
"eval_samples_per_second": 18.408,
"eval_steps_per_second": 2.303,
"step": 4000
},
{
"epoch": 2.99,
"learning_rate": 0.0002,
"loss": 1.07,
"step": 4100
},
{
"epoch": 2.99,
"eval_loss": 1.0877293348312378,
"eval_runtime": 357.5953,
"eval_samples_per_second": 17.545,
"eval_steps_per_second": 2.195,
"step": 4100
}
],
"logging_steps": 100,
"max_steps": 4116,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 1.3292894134103962e+18,
"trial_name": null,
"trial_params": null
}