|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"best_supernet_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"global_step": 22132, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.09, |
|
"learning_rate": 2.998341292195967e-05, |
|
"loss": 4.6579, |
|
"step": 500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.18, |
|
"learning_rate": 2.9933287793976817e-05, |
|
"loss": 2.3167, |
|
"step": 1000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.27, |
|
"learning_rate": 2.9849736210934906e-05, |
|
"loss": 1.85, |
|
"step": 1500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.36, |
|
"learning_rate": 2.9732945193487002e-05, |
|
"loss": 1.6287, |
|
"step": 2000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.45, |
|
"learning_rate": 2.9583176164954607e-05, |
|
"loss": 1.4613, |
|
"step": 2500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.54, |
|
"learning_rate": 2.9401161507934368e-05, |
|
"loss": 1.3555, |
|
"step": 3000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.63, |
|
"learning_rate": 2.9186579260115404e-05, |
|
"loss": 1.2711, |
|
"step": 3500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.72, |
|
"learning_rate": 2.8940241978801994e-05, |
|
"loss": 1.23, |
|
"step": 4000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.81, |
|
"learning_rate": 2.8662701061795924e-05, |
|
"loss": 1.1722, |
|
"step": 4500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.9, |
|
"learning_rate": 2.8355224073338655e-05, |
|
"loss": 1.0992, |
|
"step": 5000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.99, |
|
"learning_rate": 2.8017267123155836e-05, |
|
"loss": 1.102, |
|
"step": 5500 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 320, 1: 128, 2: 384, 3: 384, 4: 512, 5: 384, 6: 320, 7: 384, 8: 734, 9: 721, 10: 785, 11: 669, 12: 672, 13: 625, 14: 637, 15: 412})])", |
|
"epoch": 1.0, |
|
"eval_exact_match": 74.08703878902554, |
|
"eval_f1": 82.68222981097581, |
|
"eval_runtime": 15.4387, |
|
"eval_samples_per_second": 698.503, |
|
"eval_steps_per_second": 5.506, |
|
"step": 5533 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})])", |
|
"epoch": 1.0, |
|
"eval_exact_match": 77.8240302743614, |
|
"eval_f1": 85.45764678262142, |
|
"eval_runtime": 17.4036, |
|
"eval_samples_per_second": 619.64, |
|
"eval_steps_per_second": 4.884, |
|
"step": 5533 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.08, |
|
"learning_rate": 2.7650935243960826e-05, |
|
"loss": 0.8759, |
|
"step": 6000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.17, |
|
"learning_rate": 2.7255580438167598e-05, |
|
"loss": 0.8374, |
|
"step": 6500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.27, |
|
"learning_rate": 2.6832792918668526e-05, |
|
"loss": 0.8171, |
|
"step": 7000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.36, |
|
"learning_rate": 2.6384443369274024e-05, |
|
"loss": 0.8208, |
|
"step": 7500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.45, |
|
"learning_rate": 2.5909738712880504e-05, |
|
"loss": 0.8126, |
|
"step": 8000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.54, |
|
"learning_rate": 2.5410613855401778e-05, |
|
"loss": 0.8021, |
|
"step": 8500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.63, |
|
"learning_rate": 2.4888186030687985e-05, |
|
"loss": 0.7983, |
|
"step": 9000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.72, |
|
"learning_rate": 2.434473504350374e-05, |
|
"loss": 0.7963, |
|
"step": 9500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.81, |
|
"learning_rate": 2.3779299593596958e-05, |
|
"loss": 0.787, |
|
"step": 10000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.9, |
|
"learning_rate": 2.3194212686665838e-05, |
|
"loss": 0.7865, |
|
"step": 10500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.99, |
|
"learning_rate": 2.259078397276668e-05, |
|
"loss": 0.7539, |
|
"step": 11000 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 320, 1: 128, 2: 384, 3: 384, 4: 512, 5: 384, 6: 320, 7: 384, 8: 734, 9: 721, 10: 785, 11: 669, 12: 672, 13: 625, 14: 637, 15: 412})])", |
|
"epoch": 2.0, |
|
"eval_exact_match": 76.00756859035005, |
|
"eval_f1": 84.32391737009917, |
|
"eval_runtime": 15.8101, |
|
"eval_samples_per_second": 682.096, |
|
"eval_steps_per_second": 5.376, |
|
"step": 11066 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})])", |
|
"epoch": 2.0, |
|
"eval_exact_match": 78.63765373699148, |
|
"eval_f1": 86.26458598190044, |
|
"eval_runtime": 17.107, |
|
"eval_samples_per_second": 630.386, |
|
"eval_steps_per_second": 4.969, |
|
"step": 11066 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.08, |
|
"learning_rate": 2.1971621037713612e-05, |
|
"loss": 0.6107, |
|
"step": 11500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.17, |
|
"learning_rate": 2.1335628655810864e-05, |
|
"loss": 0.5949, |
|
"step": 12000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.26, |
|
"learning_rate": 2.0686768233292663e-05, |
|
"loss": 0.5874, |
|
"step": 12500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.35, |
|
"learning_rate": 2.0023892013135002e-05, |
|
"loss": 0.595, |
|
"step": 13000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.44, |
|
"learning_rate": 1.934977038587642e-05, |
|
"loss": 0.5811, |
|
"step": 13500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.53, |
|
"learning_rate": 1.866591229560134e-05, |
|
"loss": 0.587, |
|
"step": 14000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.62, |
|
"learning_rate": 1.797384848035196e-05, |
|
"loss": 0.5865, |
|
"step": 14500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.71, |
|
"learning_rate": 1.7275128045746396e-05, |
|
"loss": 0.58, |
|
"step": 15000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.8, |
|
"learning_rate": 1.657131499748308e-05, |
|
"loss": 0.5749, |
|
"step": 15500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.89, |
|
"learning_rate": 1.5863984740493084e-05, |
|
"loss": 0.5778, |
|
"step": 16000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.98, |
|
"learning_rate": 1.5156139956150232e-05, |
|
"loss": 0.578, |
|
"step": 16500 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 320, 1: 128, 2: 384, 3: 384, 4: 512, 5: 384, 6: 320, 7: 384, 8: 734, 9: 721, 10: 785, 11: 669, 12: 672, 13: 625, 14: 637, 15: 412})])", |
|
"epoch": 3.0, |
|
"eval_exact_match": 77.12393566698202, |
|
"eval_f1": 85.18935202750461, |
|
"eval_runtime": 15.5394, |
|
"eval_samples_per_second": 693.978, |
|
"eval_steps_per_second": 5.47, |
|
"step": 16599 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})])", |
|
"epoch": 3.0, |
|
"eval_exact_match": 79.72563859981078, |
|
"eval_f1": 87.10479444519025, |
|
"eval_runtime": 28.4742, |
|
"eval_samples_per_second": 378.728, |
|
"eval_steps_per_second": 2.985, |
|
"step": 16599 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.07, |
|
"learning_rate": 1.4447947066739665e-05, |
|
"loss": 0.5046, |
|
"step": 17000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.16, |
|
"learning_rate": 1.3739570482441875e-05, |
|
"loss": 0.4814, |
|
"step": 17500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.25, |
|
"learning_rate": 1.303401522531631e-05, |
|
"loss": 0.4852, |
|
"step": 18000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.34, |
|
"learning_rate": 1.2332860600029118e-05, |
|
"loss": 0.4817, |
|
"step": 18500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.43, |
|
"learning_rate": 1.163767606093589e-05, |
|
"loss": 0.4739, |
|
"step": 19000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.52, |
|
"learning_rate": 1.0950017699037782e-05, |
|
"loss": 0.476, |
|
"step": 19500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.61, |
|
"learning_rate": 1.0271424758849935e-05, |
|
"loss": 0.4713, |
|
"step": 20000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.71, |
|
"learning_rate": 9.603416192978867e-06, |
|
"loss": 0.4699, |
|
"step": 20500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.8, |
|
"learning_rate": 8.94748726212097e-06, |
|
"loss": 0.484, |
|
"step": 21000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.89, |
|
"learning_rate": 8.305106188092613e-06, |
|
"loss": 0.4669, |
|
"step": 21500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.98, |
|
"learning_rate": 7.677710867383705e-06, |
|
"loss": 0.4711, |
|
"step": 22000 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 320, 1: 128, 2: 384, 3: 384, 4: 512, 5: 384, 6: 320, 7: 384, 8: 734, 9: 721, 10: 785, 11: 669, 12: 672, 13: 625, 14: 637, 15: 412})])", |
|
"epoch": 4.0, |
|
"eval_exact_match": 77.52128666035951, |
|
"eval_f1": 85.45904836346396, |
|
"eval_runtime": 24.1677, |
|
"eval_samples_per_second": 446.215, |
|
"eval_steps_per_second": 3.517, |
|
"step": 22132 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})])", |
|
"epoch": 4.0, |
|
"eval_exact_match": 80.12298959318827, |
|
"eval_f1": 87.32509460850044, |
|
"eval_runtime": 29.2081, |
|
"eval_samples_per_second": 369.212, |
|
"eval_steps_per_second": 2.91, |
|
"step": 22132 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 22132, |
|
"total_flos": 2.057632914980045e+16, |
|
"train_loss": 0.9007888646059434, |
|
"train_runtime": 22370.0869, |
|
"train_samples_per_second": 15.829, |
|
"train_steps_per_second": 0.989 |
|
} |
|
], |
|
"max_steps": 22132, |
|
"min_subnet_acc": null, |
|
"min_subnet_best_acc": null, |
|
"num_train_epochs": 4, |
|
"supernet_acc": null, |
|
"supernet_best_acc": null, |
|
"total_flos": 2.057632914980045e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|