m4lw4r3exe's picture
Upload with huggingface_hub
ddf00f9
raw
history blame
4.28 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.6859016393442623,
"global_step": 49152,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.22,
"learning_rate": 0.0004984426897459585,
"loss": 0.5454,
"step": 4096
},
{
"epoch": 0.22,
"eval_loss": 0.5005695223808289,
"eval_runtime": 32.9622,
"eval_samples_per_second": 104.271,
"eval_steps_per_second": 6.523,
"step": 4096
},
{
"epoch": 0.45,
"learning_rate": 0.0004934687023955681,
"loss": 0.4849,
"step": 8192
},
{
"epoch": 0.45,
"eval_loss": 0.480915367603302,
"eval_runtime": 32.9785,
"eval_samples_per_second": 104.219,
"eval_steps_per_second": 6.519,
"step": 8192
},
{
"epoch": 0.67,
"learning_rate": 0.000485144849673373,
"loss": 0.4518,
"step": 12288
},
{
"epoch": 0.67,
"eval_loss": 0.46660953760147095,
"eval_runtime": 32.8208,
"eval_samples_per_second": 104.72,
"eval_steps_per_second": 6.551,
"step": 12288
},
{
"epoch": 0.9,
"learning_rate": 0.0004735848873631612,
"loss": 0.4282,
"step": 16384
},
{
"epoch": 0.9,
"eval_loss": 0.46097490191459656,
"eval_runtime": 32.8311,
"eval_samples_per_second": 104.687,
"eval_steps_per_second": 6.549,
"step": 16384
},
{
"epoch": 1.12,
"learning_rate": 0.0004589518403420676,
"loss": 0.4145,
"step": 20480
},
{
"epoch": 1.12,
"eval_loss": 0.45063599944114685,
"eval_runtime": 32.8912,
"eval_samples_per_second": 104.496,
"eval_steps_per_second": 6.537,
"step": 20480
},
{
"epoch": 1.34,
"learning_rate": 0.0004414445597486605,
"loss": 0.399,
"step": 24576
},
{
"epoch": 1.34,
"eval_loss": 0.44468095898628235,
"eval_runtime": 32.706,
"eval_samples_per_second": 105.088,
"eval_steps_per_second": 6.574,
"step": 24576
},
{
"epoch": 1.57,
"learning_rate": 0.00042130386669061293,
"loss": 0.3882,
"step": 28672
},
{
"epoch": 1.57,
"eval_loss": 0.44857361912727356,
"eval_runtime": 32.9754,
"eval_samples_per_second": 104.229,
"eval_steps_per_second": 6.52,
"step": 28672
},
{
"epoch": 1.79,
"learning_rate": 0.0003988010477498867,
"loss": 0.3767,
"step": 32768
},
{
"epoch": 1.79,
"eval_loss": 0.44354742765426636,
"eval_runtime": 32.8219,
"eval_samples_per_second": 104.717,
"eval_steps_per_second": 6.55,
"step": 32768
},
{
"epoch": 2.01,
"learning_rate": 0.0003742566178542921,
"loss": 0.3676,
"step": 36864
},
{
"epoch": 2.01,
"eval_loss": 0.43940743803977966,
"eval_runtime": 32.9279,
"eval_samples_per_second": 104.38,
"eval_steps_per_second": 6.529,
"step": 36864
},
{
"epoch": 2.24,
"learning_rate": 0.0003479963856008823,
"loss": 0.3577,
"step": 40960
},
{
"epoch": 2.24,
"eval_loss": 0.4323909878730774,
"eval_runtime": 33.0718,
"eval_samples_per_second": 103.926,
"eval_steps_per_second": 6.501,
"step": 40960
},
{
"epoch": 2.46,
"learning_rate": 0.0003203943839704654,
"loss": 0.3517,
"step": 45056
},
{
"epoch": 2.46,
"eval_loss": 0.43262797594070435,
"eval_runtime": 32.8978,
"eval_samples_per_second": 104.475,
"eval_steps_per_second": 6.535,
"step": 45056
},
{
"epoch": 2.69,
"learning_rate": 0.000291817008494138,
"loss": 0.3427,
"step": 49152
},
{
"epoch": 2.69,
"eval_loss": 0.4303751289844513,
"eval_runtime": 32.7306,
"eval_samples_per_second": 105.009,
"eval_steps_per_second": 6.569,
"step": 49152
}
],
"max_steps": 109800,
"num_train_epochs": 6,
"total_flos": 2.28488651552981e+17,
"trial_name": null,
"trial_params": null
}