minimario's picture
add checkpoint-200
1d223b8
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.030917575675597338,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 9.999226963512678e-06,
"loss": 1.1361,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 9.998453927025357e-06,
"loss": 0.7353,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 9.997680890538034e-06,
"loss": 0.6909,
"step": 15
},
{
"epoch": 0.0,
"learning_rate": 9.996907854050712e-06,
"loss": 0.6498,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 9.99613481756339e-06,
"loss": 0.6414,
"step": 25
},
{
"epoch": 0.0,
"learning_rate": 9.995361781076068e-06,
"loss": 0.6415,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 9.994588744588745e-06,
"loss": 0.6317,
"step": 35
},
{
"epoch": 0.01,
"learning_rate": 9.993815708101423e-06,
"loss": 0.6378,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 9.9930426716141e-06,
"loss": 0.6347,
"step": 45
},
{
"epoch": 0.01,
"learning_rate": 9.992269635126779e-06,
"loss": 0.5924,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 9.991496598639456e-06,
"loss": 0.6046,
"step": 55
},
{
"epoch": 0.01,
"learning_rate": 9.990723562152135e-06,
"loss": 0.6045,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 9.989950525664813e-06,
"loss": 0.6,
"step": 65
},
{
"epoch": 0.01,
"learning_rate": 9.98917748917749e-06,
"loss": 0.5504,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 9.988404452690169e-06,
"loss": 0.5747,
"step": 75
},
{
"epoch": 0.01,
"learning_rate": 9.987631416202846e-06,
"loss": 0.5526,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 9.986858379715523e-06,
"loss": 0.5958,
"step": 85
},
{
"epoch": 0.01,
"learning_rate": 9.9860853432282e-06,
"loss": 0.608,
"step": 90
},
{
"epoch": 0.01,
"learning_rate": 9.985312306740878e-06,
"loss": 0.5988,
"step": 95
},
{
"epoch": 0.02,
"learning_rate": 9.984539270253557e-06,
"loss": 0.5861,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 9.983766233766234e-06,
"loss": 0.5749,
"step": 105
},
{
"epoch": 0.02,
"learning_rate": 9.982993197278913e-06,
"loss": 0.5498,
"step": 110
},
{
"epoch": 0.02,
"learning_rate": 9.98222016079159e-06,
"loss": 0.5841,
"step": 115
},
{
"epoch": 0.02,
"learning_rate": 9.981447124304268e-06,
"loss": 0.5973,
"step": 120
},
{
"epoch": 0.02,
"learning_rate": 9.980674087816947e-06,
"loss": 0.5954,
"step": 125
},
{
"epoch": 0.02,
"learning_rate": 9.979901051329624e-06,
"loss": 0.527,
"step": 130
},
{
"epoch": 0.02,
"learning_rate": 9.979128014842301e-06,
"loss": 0.5321,
"step": 135
},
{
"epoch": 0.02,
"learning_rate": 9.978354978354979e-06,
"loss": 0.5781,
"step": 140
},
{
"epoch": 0.02,
"learning_rate": 9.977581941867656e-06,
"loss": 0.5119,
"step": 145
},
{
"epoch": 0.02,
"learning_rate": 9.976808905380335e-06,
"loss": 0.5271,
"step": 150
},
{
"epoch": 0.02,
"learning_rate": 9.976035868893012e-06,
"loss": 0.5814,
"step": 155
},
{
"epoch": 0.02,
"learning_rate": 9.97526283240569e-06,
"loss": 0.518,
"step": 160
},
{
"epoch": 0.03,
"learning_rate": 9.974489795918369e-06,
"loss": 0.5335,
"step": 165
},
{
"epoch": 0.03,
"learning_rate": 9.973716759431046e-06,
"loss": 0.5062,
"step": 170
},
{
"epoch": 0.03,
"learning_rate": 9.972943722943725e-06,
"loss": 0.5253,
"step": 175
},
{
"epoch": 0.03,
"learning_rate": 9.972170686456402e-06,
"loss": 0.5856,
"step": 180
},
{
"epoch": 0.03,
"learning_rate": 9.97139764996908e-06,
"loss": 0.5196,
"step": 185
},
{
"epoch": 0.03,
"learning_rate": 9.970624613481757e-06,
"loss": 0.4764,
"step": 190
},
{
"epoch": 0.03,
"learning_rate": 9.969851576994434e-06,
"loss": 0.5254,
"step": 195
},
{
"epoch": 0.03,
"learning_rate": 9.969078540507111e-06,
"loss": 0.5442,
"step": 200
},
{
"epoch": 0.03,
"eval_accuracy": 0.5804400673190799,
"eval_accuracy_sklearn": 0.5804400673190799,
"eval_f1": 0.5294915349019279,
"eval_loss": 0.7918509840965271,
"eval_precision": 0.6370946036872561,
"eval_recall": 0.45298409281186464,
"eval_runtime": 4914.2737,
"eval_samples_per_second": 16.323,
"eval_steps_per_second": 2.04,
"step": 200
}
],
"max_steps": 64680,
"num_train_epochs": 10,
"total_flos": 2.37535583797248e+16,
"trial_name": null,
"trial_params": null
}