|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.003338898163606, |
|
"eval_steps": 10, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5245, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.3355263157894737, |
|
"eval_loss": 0.6506821513175964, |
|
"eval_runtime": 16.9243, |
|
"eval_samples_per_second": 17.962, |
|
"eval_steps_per_second": 4.491, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6666, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.3815789473684211, |
|
"eval_loss": 0.6464425325393677, |
|
"eval_runtime": 16.9195, |
|
"eval_samples_per_second": 17.967, |
|
"eval_steps_per_second": 4.492, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6527, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.3684210526315789, |
|
"eval_loss": 0.6426967978477478, |
|
"eval_runtime": 16.9282, |
|
"eval_samples_per_second": 17.958, |
|
"eval_steps_per_second": 4.49, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6168, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.3980263157894737, |
|
"eval_loss": 0.6321499943733215, |
|
"eval_runtime": 17.0092, |
|
"eval_samples_per_second": 17.873, |
|
"eval_steps_per_second": 4.468, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6584, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.39144736842105265, |
|
"eval_loss": 0.6181844472885132, |
|
"eval_runtime": 16.9419, |
|
"eval_samples_per_second": 17.944, |
|
"eval_steps_per_second": 4.486, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.586, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.4144736842105263, |
|
"eval_loss": 0.6244160532951355, |
|
"eval_runtime": 16.9269, |
|
"eval_samples_per_second": 17.96, |
|
"eval_steps_per_second": 4.49, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5924, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.4342105263157895, |
|
"eval_loss": 0.6033625602722168, |
|
"eval_runtime": 16.9236, |
|
"eval_samples_per_second": 17.963, |
|
"eval_steps_per_second": 4.491, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6069, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.4375, |
|
"eval_loss": 0.6096391677856445, |
|
"eval_runtime": 16.9238, |
|
"eval_samples_per_second": 17.963, |
|
"eval_steps_per_second": 4.491, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5999, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.4407894736842105, |
|
"eval_loss": 0.6095999479293823, |
|
"eval_runtime": 16.9342, |
|
"eval_samples_per_second": 17.952, |
|
"eval_steps_per_second": 4.488, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6206, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.45723684210526316, |
|
"eval_loss": 0.607021152973175, |
|
"eval_runtime": 16.9304, |
|
"eval_samples_per_second": 17.956, |
|
"eval_steps_per_second": 4.489, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5793, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.45723684210526316, |
|
"eval_loss": 0.601601243019104, |
|
"eval_runtime": 16.9375, |
|
"eval_samples_per_second": 17.948, |
|
"eval_steps_per_second": 4.487, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6208, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.4605263157894737, |
|
"eval_loss": 0.5902404189109802, |
|
"eval_runtime": 16.924, |
|
"eval_samples_per_second": 17.963, |
|
"eval_steps_per_second": 4.491, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5622, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.4769736842105263, |
|
"eval_loss": 0.5775408744812012, |
|
"eval_runtime": 16.9329, |
|
"eval_samples_per_second": 17.953, |
|
"eval_steps_per_second": 4.488, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5502, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.46710526315789475, |
|
"eval_loss": 0.57607102394104, |
|
"eval_runtime": 16.9226, |
|
"eval_samples_per_second": 17.964, |
|
"eval_steps_per_second": 4.491, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5958, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4901315789473684, |
|
"eval_loss": 0.5606401562690735, |
|
"eval_runtime": 16.929, |
|
"eval_samples_per_second": 17.957, |
|
"eval_steps_per_second": 4.489, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4558, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.47368421052631576, |
|
"eval_loss": 0.5839833617210388, |
|
"eval_runtime": 16.9304, |
|
"eval_samples_per_second": 17.956, |
|
"eval_steps_per_second": 4.489, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4411, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_accuracy": 0.4901315789473684, |
|
"eval_loss": 0.5631235837936401, |
|
"eval_runtime": 16.9238, |
|
"eval_samples_per_second": 17.963, |
|
"eval_steps_per_second": 4.491, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4144, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 0.5744868516921997, |
|
"eval_runtime": 16.9382, |
|
"eval_samples_per_second": 17.948, |
|
"eval_steps_per_second": 4.487, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4647, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_accuracy": 0.4605263157894737, |
|
"eval_loss": 0.593177080154419, |
|
"eval_runtime": 16.932, |
|
"eval_samples_per_second": 17.954, |
|
"eval_steps_per_second": 4.489, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4504, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_accuracy": 0.5098684210526315, |
|
"eval_loss": 0.5798581838607788, |
|
"eval_runtime": 16.9337, |
|
"eval_samples_per_second": 17.952, |
|
"eval_steps_per_second": 4.488, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4299, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_accuracy": 0.4934210526315789, |
|
"eval_loss": 0.64882493019104, |
|
"eval_runtime": 16.9391, |
|
"eval_samples_per_second": 17.947, |
|
"eval_steps_per_second": 4.487, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.425, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_accuracy": 0.5131578947368421, |
|
"eval_loss": 0.5704348683357239, |
|
"eval_runtime": 16.9325, |
|
"eval_samples_per_second": 17.954, |
|
"eval_steps_per_second": 4.488, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4152, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_accuracy": 0.506578947368421, |
|
"eval_loss": 0.5582014322280884, |
|
"eval_runtime": 16.9258, |
|
"eval_samples_per_second": 17.961, |
|
"eval_steps_per_second": 4.49, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.425, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.5328947368421053, |
|
"eval_loss": 0.5488855838775635, |
|
"eval_runtime": 16.9288, |
|
"eval_samples_per_second": 17.958, |
|
"eval_steps_per_second": 4.489, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.446, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_accuracy": 0.5197368421052632, |
|
"eval_loss": 0.5479023456573486, |
|
"eval_runtime": 16.9319, |
|
"eval_samples_per_second": 17.954, |
|
"eval_steps_per_second": 4.489, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3908, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_accuracy": 0.5164473684210527, |
|
"eval_loss": 0.5564107894897461, |
|
"eval_runtime": 16.9414, |
|
"eval_samples_per_second": 17.944, |
|
"eval_steps_per_second": 4.486, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.443, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_accuracy": 0.5032894736842105, |
|
"eval_loss": 0.5418796539306641, |
|
"eval_runtime": 16.9208, |
|
"eval_samples_per_second": 17.966, |
|
"eval_steps_per_second": 4.492, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4081, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_accuracy": 0.506578947368421, |
|
"eval_loss": 0.5948407053947449, |
|
"eval_runtime": 16.9289, |
|
"eval_samples_per_second": 17.957, |
|
"eval_steps_per_second": 4.489, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3944, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.5394736842105263, |
|
"eval_loss": 0.554680347442627, |
|
"eval_runtime": 16.9311, |
|
"eval_samples_per_second": 17.955, |
|
"eval_steps_per_second": 4.489, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4005, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5361842105263158, |
|
"eval_loss": 0.5615983009338379, |
|
"eval_runtime": 16.9277, |
|
"eval_samples_per_second": 17.959, |
|
"eval_steps_per_second": 4.49, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 300, |
|
"total_flos": 7.380159778455552e+16, |
|
"train_loss": 0.5479170862833659, |
|
"train_runtime": 1462.1781, |
|
"train_samples_per_second": 3.283, |
|
"train_steps_per_second": 0.205 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 150, |
|
"total_flos": 7.380159778455552e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|