|
{ |
|
"best_metric": 2.588986396789551, |
|
"best_model_checkpoint": "output/radiohead/checkpoint-240", |
|
"epoch": 8.0, |
|
"global_step": 240, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001280093426996125, |
|
"loss": 3.2073, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00010290000000000001, |
|
"loss": 2.9899, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 6.86e-05, |
|
"loss": 2.8292, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.4300000000000014e-05, |
|
"loss": 2.8291, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.190657300387505e-06, |
|
"loss": 2.8479, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.9261, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.8060529232025146, |
|
"eval_runtime": 2.0848, |
|
"eval_samples_per_second": 22.545, |
|
"eval_steps_per_second": 2.878, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 9.190657300387513e-06, |
|
"loss": 2.6517, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.429999999999997e-05, |
|
"loss": 2.7329, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 6.859999999999999e-05, |
|
"loss": 2.7524, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00010290000000000001, |
|
"loss": 2.5591, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00012800934269961248, |
|
"loss": 2.6201, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0001372, |
|
"loss": 2.5207, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.7088782787323, |
|
"eval_runtime": 2.1182, |
|
"eval_samples_per_second": 22.189, |
|
"eval_steps_per_second": 2.833, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.00012800934269961253, |
|
"loss": 2.3985, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.00010289999999999998, |
|
"loss": 2.3027, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 6.860000000000001e-05, |
|
"loss": 2.1903, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.4300000000000054e-05, |
|
"loss": 2.2458, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 9.190657300387498e-06, |
|
"loss": 2.2898, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.2945, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.6637511253356934, |
|
"eval_runtime": 2.1338, |
|
"eval_samples_per_second": 22.027, |
|
"eval_steps_per_second": 2.812, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 9.190657300387474e-06, |
|
"loss": 2.1611, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.4300000000000014e-05, |
|
"loss": 2.0392, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 6.859999999999997e-05, |
|
"loss": 2.3157, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.00010289999999999994, |
|
"loss": 2.1789, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.0001280093426996125, |
|
"loss": 2.1359, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0001372, |
|
"loss": 2.0964, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.617748737335205, |
|
"eval_runtime": 2.1438, |
|
"eval_samples_per_second": 21.924, |
|
"eval_steps_per_second": 2.799, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.00012800934269961248, |
|
"loss": 1.8717, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 0.00010290000000000009, |
|
"loss": 1.9838, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 6.860000000000003e-05, |
|
"loss": 2.0446, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 3.429999999999996e-05, |
|
"loss": 1.8428, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 9.190657300387574e-06, |
|
"loss": 1.9594, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.0192, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.6335015296936035, |
|
"eval_runtime": 2.1534, |
|
"eval_samples_per_second": 21.826, |
|
"eval_steps_per_second": 2.786, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 9.190657300387467e-06, |
|
"loss": 1.7401, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 3.429999999999989e-05, |
|
"loss": 1.8589, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 6.859999999999984e-05, |
|
"loss": 1.7454, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 0.00010290000000000003, |
|
"loss": 1.8506, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 0.00012800934269961248, |
|
"loss": 1.7711, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.0001372, |
|
"loss": 1.6952, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.604886293411255, |
|
"eval_runtime": 2.1277, |
|
"eval_samples_per_second": 22.09, |
|
"eval_steps_per_second": 2.82, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 0.00012800934269961253, |
|
"loss": 1.7717, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 0.00010290000000000012, |
|
"loss": 1.577, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 6.859999999999993e-05, |
|
"loss": 1.5197, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 3.429999999999998e-05, |
|
"loss": 1.6176, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 9.19065730038752e-06, |
|
"loss": 1.562, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.6157, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 2.6068503856658936, |
|
"eval_runtime": 2.1309, |
|
"eval_samples_per_second": 22.056, |
|
"eval_steps_per_second": 2.816, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 9.190657300387459e-06, |
|
"loss": 1.5445, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 3.429999999999988e-05, |
|
"loss": 1.5187, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 6.859999999999982e-05, |
|
"loss": 1.4775, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 0.00010290000000000002, |
|
"loss": 1.3245, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 0.00012800934269961248, |
|
"loss": 1.6077, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0001372, |
|
"loss": 1.5085, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 2.588986396789551, |
|
"eval_runtime": 2.1323, |
|
"eval_samples_per_second": 22.042, |
|
"eval_steps_per_second": 2.814, |
|
"step": 240 |
|
} |
|
], |
|
"max_steps": 300, |
|
"num_train_epochs": 10, |
|
"total_flos": 243524173824000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|