gpt2-ulasan-ecommerce / trainer_state.json
sekarmulyani's picture
Upload 12 files
85d7167
raw
history blame contribute delete
No virus
11.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 24.0,
"eval_steps": 500,
"global_step": 28368,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.42,
"learning_rate": 9.894247038917091e-06,
"loss": 3.5932,
"step": 500
},
{
"epoch": 0.85,
"learning_rate": 9.78849407783418e-06,
"loss": 3.336,
"step": 1000
},
{
"epoch": 1.0,
"eval_loss": 3.2544538974761963,
"eval_runtime": 6.101,
"eval_samples_per_second": 41.961,
"eval_steps_per_second": 5.245,
"step": 1182
},
{
"epoch": 1.27,
"learning_rate": 9.68274111675127e-06,
"loss": 3.2562,
"step": 1500
},
{
"epoch": 1.69,
"learning_rate": 9.57698815566836e-06,
"loss": 3.1967,
"step": 2000
},
{
"epoch": 2.0,
"eval_loss": 3.1576380729675293,
"eval_runtime": 6.0951,
"eval_samples_per_second": 42.001,
"eval_steps_per_second": 5.25,
"step": 2364
},
{
"epoch": 2.12,
"learning_rate": 9.47123519458545e-06,
"loss": 3.1613,
"step": 2500
},
{
"epoch": 2.54,
"learning_rate": 9.36548223350254e-06,
"loss": 3.1268,
"step": 3000
},
{
"epoch": 2.96,
"learning_rate": 9.25972927241963e-06,
"loss": 3.1148,
"step": 3500
},
{
"epoch": 3.0,
"eval_loss": 3.1073131561279297,
"eval_runtime": 6.0936,
"eval_samples_per_second": 42.011,
"eval_steps_per_second": 5.251,
"step": 3546
},
{
"epoch": 3.38,
"learning_rate": 9.15397631133672e-06,
"loss": 3.0804,
"step": 4000
},
{
"epoch": 3.81,
"learning_rate": 9.048223350253808e-06,
"loss": 3.0814,
"step": 4500
},
{
"epoch": 4.0,
"eval_loss": 3.074193000793457,
"eval_runtime": 6.0939,
"eval_samples_per_second": 42.009,
"eval_steps_per_second": 5.251,
"step": 4728
},
{
"epoch": 4.23,
"learning_rate": 8.942470389170898e-06,
"loss": 3.054,
"step": 5000
},
{
"epoch": 4.65,
"learning_rate": 8.836717428087988e-06,
"loss": 3.0355,
"step": 5500
},
{
"epoch": 5.0,
"eval_loss": 3.04995059967041,
"eval_runtime": 6.09,
"eval_samples_per_second": 42.036,
"eval_steps_per_second": 5.254,
"step": 5910
},
{
"epoch": 5.08,
"learning_rate": 8.730964467005076e-06,
"loss": 3.0365,
"step": 6000
},
{
"epoch": 5.5,
"learning_rate": 8.625211505922166e-06,
"loss": 3.0104,
"step": 6500
},
{
"epoch": 5.92,
"learning_rate": 8.519458544839256e-06,
"loss": 3.0126,
"step": 7000
},
{
"epoch": 6.0,
"eval_loss": 3.0317230224609375,
"eval_runtime": 6.0899,
"eval_samples_per_second": 42.037,
"eval_steps_per_second": 5.255,
"step": 7092
},
{
"epoch": 6.35,
"learning_rate": 8.413705583756346e-06,
"loss": 2.9923,
"step": 7500
},
{
"epoch": 6.77,
"learning_rate": 8.307952622673435e-06,
"loss": 2.9902,
"step": 8000
},
{
"epoch": 7.0,
"eval_loss": 3.0167293548583984,
"eval_runtime": 6.0906,
"eval_samples_per_second": 42.032,
"eval_steps_per_second": 5.254,
"step": 8274
},
{
"epoch": 7.19,
"learning_rate": 8.202199661590525e-06,
"loss": 2.9783,
"step": 8500
},
{
"epoch": 7.61,
"learning_rate": 8.096446700507615e-06,
"loss": 2.9722,
"step": 9000
},
{
"epoch": 8.0,
"eval_loss": 3.004361391067505,
"eval_runtime": 6.0481,
"eval_samples_per_second": 42.327,
"eval_steps_per_second": 5.291,
"step": 9456
},
{
"epoch": 8.04,
"learning_rate": 7.990693739424705e-06,
"loss": 2.9628,
"step": 9500
},
{
"epoch": 8.46,
"learning_rate": 7.884940778341795e-06,
"loss": 2.9593,
"step": 10000
},
{
"epoch": 8.88,
"learning_rate": 7.779187817258885e-06,
"loss": 2.9485,
"step": 10500
},
{
"epoch": 9.0,
"eval_loss": 2.9940547943115234,
"eval_runtime": 6.1451,
"eval_samples_per_second": 41.659,
"eval_steps_per_second": 5.207,
"step": 10638
},
{
"epoch": 9.31,
"learning_rate": 7.673434856175973e-06,
"loss": 2.9405,
"step": 11000
},
{
"epoch": 9.73,
"learning_rate": 7.567681895093063e-06,
"loss": 2.943,
"step": 11500
},
{
"epoch": 10.0,
"eval_loss": 2.9857802391052246,
"eval_runtime": 6.2619,
"eval_samples_per_second": 40.882,
"eval_steps_per_second": 5.11,
"step": 11820
},
{
"epoch": 10.15,
"learning_rate": 7.461928934010153e-06,
"loss": 2.9243,
"step": 12000
},
{
"epoch": 10.58,
"learning_rate": 7.356175972927243e-06,
"loss": 2.9228,
"step": 12500
},
{
"epoch": 11.0,
"learning_rate": 7.2504230118443316e-06,
"loss": 2.9216,
"step": 13000
},
{
"epoch": 11.0,
"eval_loss": 2.9776651859283447,
"eval_runtime": 6.2854,
"eval_samples_per_second": 40.73,
"eval_steps_per_second": 5.091,
"step": 13002
},
{
"epoch": 11.42,
"learning_rate": 7.144670050761422e-06,
"loss": 2.9118,
"step": 13500
},
{
"epoch": 11.84,
"learning_rate": 7.038917089678512e-06,
"loss": 2.911,
"step": 14000
},
{
"epoch": 12.0,
"eval_loss": 2.9713006019592285,
"eval_runtime": 6.1107,
"eval_samples_per_second": 41.894,
"eval_steps_per_second": 5.237,
"step": 14184
},
{
"epoch": 12.27,
"learning_rate": 6.933164128595601e-06,
"loss": 2.9038,
"step": 14500
},
{
"epoch": 12.69,
"learning_rate": 6.827411167512691e-06,
"loss": 2.8924,
"step": 15000
},
{
"epoch": 13.0,
"eval_loss": 2.9653375148773193,
"eval_runtime": 6.1162,
"eval_samples_per_second": 41.856,
"eval_steps_per_second": 5.232,
"step": 15366
},
{
"epoch": 13.11,
"learning_rate": 6.721658206429781e-06,
"loss": 2.9025,
"step": 15500
},
{
"epoch": 13.54,
"learning_rate": 6.61590524534687e-06,
"loss": 2.8886,
"step": 16000
},
{
"epoch": 13.96,
"learning_rate": 6.51015228426396e-06,
"loss": 2.8882,
"step": 16500
},
{
"epoch": 14.0,
"eval_loss": 2.960761547088623,
"eval_runtime": 6.1211,
"eval_samples_per_second": 41.822,
"eval_steps_per_second": 5.228,
"step": 16548
},
{
"epoch": 14.38,
"learning_rate": 6.40439932318105e-06,
"loss": 2.8777,
"step": 17000
},
{
"epoch": 14.81,
"learning_rate": 6.298646362098139e-06,
"loss": 2.8826,
"step": 17500
},
{
"epoch": 15.0,
"eval_loss": 2.9559221267700195,
"eval_runtime": 6.0998,
"eval_samples_per_second": 41.969,
"eval_steps_per_second": 5.246,
"step": 17730
},
{
"epoch": 15.23,
"learning_rate": 6.1928934010152285e-06,
"loss": 2.8796,
"step": 18000
},
{
"epoch": 15.65,
"learning_rate": 6.0871404399323185e-06,
"loss": 2.8697,
"step": 18500
},
{
"epoch": 16.0,
"eval_loss": 2.952040672302246,
"eval_runtime": 6.2485,
"eval_samples_per_second": 40.97,
"eval_steps_per_second": 5.121,
"step": 18912
},
{
"epoch": 16.07,
"learning_rate": 5.981387478849409e-06,
"loss": 2.8645,
"step": 19000
},
{
"epoch": 16.5,
"learning_rate": 5.875634517766498e-06,
"loss": 2.8678,
"step": 19500
},
{
"epoch": 16.92,
"learning_rate": 5.769881556683588e-06,
"loss": 2.8616,
"step": 20000
},
{
"epoch": 17.0,
"eval_loss": 2.948793888092041,
"eval_runtime": 6.2711,
"eval_samples_per_second": 40.822,
"eval_steps_per_second": 5.103,
"step": 20094
},
{
"epoch": 17.34,
"learning_rate": 5.664128595600678e-06,
"loss": 2.8548,
"step": 20500
},
{
"epoch": 17.77,
"learning_rate": 5.558375634517766e-06,
"loss": 2.8529,
"step": 21000
},
{
"epoch": 18.0,
"eval_loss": 2.945361614227295,
"eval_runtime": 6.3517,
"eval_samples_per_second": 40.304,
"eval_steps_per_second": 5.038,
"step": 21276
},
{
"epoch": 18.19,
"learning_rate": 5.452622673434856e-06,
"loss": 2.8557,
"step": 21500
},
{
"epoch": 18.61,
"learning_rate": 5.346869712351946e-06,
"loss": 2.8448,
"step": 22000
},
{
"epoch": 19.0,
"eval_loss": 2.9428470134735107,
"eval_runtime": 6.2219,
"eval_samples_per_second": 41.145,
"eval_steps_per_second": 5.143,
"step": 22458
},
{
"epoch": 19.04,
"learning_rate": 5.241116751269036e-06,
"loss": 2.8458,
"step": 22500
},
{
"epoch": 19.46,
"learning_rate": 5.1353637901861255e-06,
"loss": 2.8462,
"step": 23000
},
{
"epoch": 19.88,
"learning_rate": 5.0296108291032155e-06,
"loss": 2.84,
"step": 23500
},
{
"epoch": 20.0,
"eval_loss": 2.940398693084717,
"eval_runtime": 6.2496,
"eval_samples_per_second": 40.962,
"eval_steps_per_second": 5.12,
"step": 23640
},
{
"epoch": 20.3,
"learning_rate": 4.923857868020305e-06,
"loss": 2.8349,
"step": 24000
},
{
"epoch": 20.73,
"learning_rate": 4.818104906937395e-06,
"loss": 2.8285,
"step": 24500
},
{
"epoch": 21.0,
"eval_loss": 2.938441276550293,
"eval_runtime": 6.1601,
"eval_samples_per_second": 41.558,
"eval_steps_per_second": 5.195,
"step": 24822
},
{
"epoch": 21.15,
"learning_rate": 4.712351945854484e-06,
"loss": 2.8345,
"step": 25000
},
{
"epoch": 21.57,
"learning_rate": 4.606598984771574e-06,
"loss": 2.8302,
"step": 25500
},
{
"epoch": 22.0,
"learning_rate": 4.500846023688664e-06,
"loss": 2.8266,
"step": 26000
},
{
"epoch": 22.0,
"eval_loss": 2.9362807273864746,
"eval_runtime": 6.0955,
"eval_samples_per_second": 41.998,
"eval_steps_per_second": 5.25,
"step": 26004
},
{
"epoch": 22.42,
"learning_rate": 4.395093062605753e-06,
"loss": 2.819,
"step": 26500
},
{
"epoch": 22.84,
"learning_rate": 4.289340101522843e-06,
"loss": 2.8232,
"step": 27000
},
{
"epoch": 23.0,
"eval_loss": 2.934544324874878,
"eval_runtime": 6.1597,
"eval_samples_per_second": 41.561,
"eval_steps_per_second": 5.195,
"step": 27186
},
{
"epoch": 23.27,
"learning_rate": 4.183587140439932e-06,
"loss": 2.8213,
"step": 27500
},
{
"epoch": 23.69,
"learning_rate": 4.0778341793570224e-06,
"loss": 2.8136,
"step": 28000
}
],
"logging_steps": 500,
"max_steps": 47280,
"num_train_epochs": 40,
"save_steps": 9456,
"total_flos": 4.4459884412928e+16,
"trial_name": null,
"trial_params": null
}