|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 6400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.5625e-06, |
|
"loss": 2.5618, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.125e-06, |
|
"loss": 2.4474, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"loss": 2.3184, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.25e-06, |
|
"loss": 2.2942, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.8125e-06, |
|
"loss": 2.26, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 2.2509, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.09375e-05, |
|
"loss": 2.2584, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.25e-05, |
|
"loss": 2.2217, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.4062500000000001e-05, |
|
"loss": 2.2244, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 2.181, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.71875e-05, |
|
"loss": 2.1925, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 2.1933, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.0312500000000002e-05, |
|
"loss": 2.1838, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.1875e-05, |
|
"loss": 2.1899, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.34375e-05, |
|
"loss": 2.1938, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.1664, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.6562500000000002e-05, |
|
"loss": 2.1748, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.8125000000000003e-05, |
|
"loss": 2.184, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.96875e-05, |
|
"loss": 2.1452, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.125e-05, |
|
"loss": 2.155, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.2812500000000005e-05, |
|
"loss": 2.1591, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.4375e-05, |
|
"loss": 2.143, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.59375e-05, |
|
"loss": 2.1634, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 2.1416, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.90625e-05, |
|
"loss": 2.1293, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.0625000000000005e-05, |
|
"loss": 2.1376, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.21875e-05, |
|
"loss": 2.1396, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.375e-05, |
|
"loss": 2.1207, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.5312500000000004e-05, |
|
"loss": 2.123, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.6875e-05, |
|
"loss": 2.1427, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.8437500000000005e-05, |
|
"loss": 2.1515, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1454, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.15625e-05, |
|
"loss": 2.1264, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5.3125000000000004e-05, |
|
"loss": 2.1426, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5.46875e-05, |
|
"loss": 2.105, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5.6250000000000005e-05, |
|
"loss": 2.1243, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5.78125e-05, |
|
"loss": 2.1041, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 5.9375e-05, |
|
"loss": 2.124, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.0937500000000004e-05, |
|
"loss": 2.1276, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.25e-05, |
|
"loss": 2.1318, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.40625e-05, |
|
"loss": 2.1337, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.562500000000001e-05, |
|
"loss": 2.1159, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.71875e-05, |
|
"loss": 2.1183, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.875e-05, |
|
"loss": 2.1199, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.031250000000001e-05, |
|
"loss": 2.1157, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.1875e-05, |
|
"loss": 2.09, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.34375e-05, |
|
"loss": 2.1108, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 2.0931, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.65625e-05, |
|
"loss": 2.1224, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.8125e-05, |
|
"loss": 2.0918, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.96875e-05, |
|
"loss": 2.1121, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 8.125000000000001e-05, |
|
"loss": 2.1009, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.28125e-05, |
|
"loss": 2.1047, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.4375e-05, |
|
"loss": 2.0922, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.593750000000001e-05, |
|
"loss": 2.1091, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.75e-05, |
|
"loss": 2.1061, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.90625e-05, |
|
"loss": 2.1206, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.062500000000001e-05, |
|
"loss": 2.0945, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.21875e-05, |
|
"loss": 2.0924, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.375e-05, |
|
"loss": 2.1014, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.53125e-05, |
|
"loss": 2.0984, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.687500000000001e-05, |
|
"loss": 2.0709, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.84375e-05, |
|
"loss": 2.1051, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0001, |
|
"loss": 2.1052, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6366426454087312, |
|
"eval_loss": 1.9433484077453613, |
|
"eval_runtime": 50910.5556, |
|
"eval_samples_per_second": 6.44, |
|
"eval_steps_per_second": 0.805, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 6400, |
|
"total_flos": 2.707934115004416e+17, |
|
"train_loss": 2.156327223777771, |
|
"train_runtime": 107381.8296, |
|
"train_samples_per_second": 3.814, |
|
"train_steps_per_second": 0.06 |
|
} |
|
], |
|
"max_steps": 6400, |
|
"num_train_epochs": 9223372036854775807, |
|
"total_flos": 2.707934115004416e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|