|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9968, |
|
"eval_steps": 500, |
|
"global_step": 78, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": "0.0000e+00", |
|
"loss": 2.2666, |
|
"slid_loss": 2.2666, |
|
"step": 1, |
|
"time": 42.16 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.2601, |
|
"slid_loss": 2.2634, |
|
"step": 2, |
|
"time": 34.12 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.3071, |
|
"slid_loss": 2.2779, |
|
"step": 3, |
|
"time": 33.4 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.1847, |
|
"slid_loss": 2.2546, |
|
"step": 4, |
|
"time": 33.28 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.2277, |
|
"slid_loss": 2.2492, |
|
"step": 5, |
|
"time": 34.62 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.1922, |
|
"slid_loss": 2.2397, |
|
"step": 6, |
|
"time": 32.87 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.168, |
|
"slid_loss": 2.2295, |
|
"step": 7, |
|
"time": 33.59 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.2024, |
|
"slid_loss": 2.2261, |
|
"step": 8, |
|
"time": 33.64 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.1198, |
|
"slid_loss": 2.2143, |
|
"step": 9, |
|
"time": 35.32 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.139, |
|
"slid_loss": 2.2068, |
|
"step": 10, |
|
"time": 33.38 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.1052, |
|
"slid_loss": 2.1975, |
|
"step": 11, |
|
"time": 33.38 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.1561, |
|
"slid_loss": 2.1941, |
|
"step": 12, |
|
"time": 33.0 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.085, |
|
"slid_loss": 2.1857, |
|
"step": 13, |
|
"time": 32.73 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.1404, |
|
"slid_loss": 2.1824, |
|
"step": 14, |
|
"time": 33.91 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.0282, |
|
"slid_loss": 2.1722, |
|
"step": 15, |
|
"time": 32.97 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.0576, |
|
"slid_loss": 2.165, |
|
"step": 16, |
|
"time": 32.89 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.0584, |
|
"slid_loss": 2.1587, |
|
"step": 17, |
|
"time": 33.64 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.086, |
|
"slid_loss": 2.1547, |
|
"step": 18, |
|
"time": 35.21 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.0918, |
|
"slid_loss": 2.1514, |
|
"step": 19, |
|
"time": 33.29 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.0255, |
|
"slid_loss": 2.1451, |
|
"step": 20, |
|
"time": 33.69 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.0119, |
|
"slid_loss": 2.1387, |
|
"step": 21, |
|
"time": 33.5 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.9633, |
|
"slid_loss": 2.1308, |
|
"step": 22, |
|
"time": 35.21 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.0063, |
|
"slid_loss": 2.1254, |
|
"step": 23, |
|
"time": 32.96 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 2.0122, |
|
"slid_loss": 2.1206, |
|
"step": 24, |
|
"time": 33.34 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.9364, |
|
"slid_loss": 2.1133, |
|
"step": 25, |
|
"time": 33.35 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.9493, |
|
"slid_loss": 2.107, |
|
"step": 26, |
|
"time": 33.24 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.9124, |
|
"slid_loss": 2.0998, |
|
"step": 27, |
|
"time": 33.34 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.9077, |
|
"slid_loss": 2.0929, |
|
"step": 28, |
|
"time": 33.03 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.9838, |
|
"slid_loss": 2.0891, |
|
"step": 29, |
|
"time": 34.5 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.988, |
|
"slid_loss": 2.0858, |
|
"step": 30, |
|
"time": 33.39 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.9561, |
|
"slid_loss": 2.0816, |
|
"step": 31, |
|
"time": 33.25 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.8664, |
|
"slid_loss": 2.0749, |
|
"step": 32, |
|
"time": 32.75 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.8385, |
|
"slid_loss": 2.0677, |
|
"step": 33, |
|
"time": 33.61 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.8827, |
|
"slid_loss": 2.0623, |
|
"step": 34, |
|
"time": 33.48 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.8249, |
|
"slid_loss": 2.0555, |
|
"step": 35, |
|
"time": 33.62 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.8204, |
|
"slid_loss": 2.049, |
|
"step": 36, |
|
"time": 33.21 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.8761, |
|
"slid_loss": 2.0443, |
|
"step": 37, |
|
"time": 32.95 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.8621, |
|
"slid_loss": 2.0395, |
|
"step": 38, |
|
"time": 33.02 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.7632, |
|
"slid_loss": 2.0324, |
|
"step": 39, |
|
"time": 32.9 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.8407, |
|
"slid_loss": 2.0276, |
|
"step": 40, |
|
"time": 192.32 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.7514, |
|
"slid_loss": 2.0209, |
|
"step": 41, |
|
"time": 33.2 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.7342, |
|
"slid_loss": 2.014, |
|
"step": 42, |
|
"time": 33.32 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.7591, |
|
"slid_loss": 2.0081, |
|
"step": 43, |
|
"time": 32.94 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.7156, |
|
"slid_loss": 2.0015, |
|
"step": 44, |
|
"time": 32.85 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.7146, |
|
"slid_loss": 1.9951, |
|
"step": 45, |
|
"time": 32.84 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.7197, |
|
"slid_loss": 1.9891, |
|
"step": 46, |
|
"time": 32.83 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.6992, |
|
"slid_loss": 1.9829, |
|
"step": 47, |
|
"time": 33.24 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.7154, |
|
"slid_loss": 1.9774, |
|
"step": 48, |
|
"time": 34.15 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.6725, |
|
"slid_loss": 1.9711, |
|
"step": 49, |
|
"time": 35.49 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.6221, |
|
"slid_loss": 1.9642, |
|
"step": 50, |
|
"time": 33.02 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.656, |
|
"slid_loss": 1.9581, |
|
"step": 51, |
|
"time": 33.54 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.6232, |
|
"slid_loss": 1.9517, |
|
"step": 52, |
|
"time": 33.15 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.6363, |
|
"slid_loss": 1.9457, |
|
"step": 53, |
|
"time": 33.17 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.6079, |
|
"slid_loss": 1.9395, |
|
"step": 54, |
|
"time": 32.8 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.5803, |
|
"slid_loss": 1.9329, |
|
"step": 55, |
|
"time": 33.72 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.5249, |
|
"slid_loss": 1.9257, |
|
"step": 56, |
|
"time": 33.48 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.624, |
|
"slid_loss": 1.9204, |
|
"step": 57, |
|
"time": 33.19 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.5509, |
|
"slid_loss": 1.914, |
|
"step": 58, |
|
"time": 32.7 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.5339, |
|
"slid_loss": 1.9076, |
|
"step": 59, |
|
"time": 34.98 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.559, |
|
"slid_loss": 1.9017, |
|
"step": 60, |
|
"time": 33.29 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.4958, |
|
"slid_loss": 1.8951, |
|
"step": 61, |
|
"time": 32.61 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.4871, |
|
"slid_loss": 1.8885, |
|
"step": 62, |
|
"time": 33.46 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.4523, |
|
"slid_loss": 1.8816, |
|
"step": 63, |
|
"time": 32.93 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.4786, |
|
"slid_loss": 1.8753, |
|
"step": 64, |
|
"time": 33.78 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.4455, |
|
"slid_loss": 1.8687, |
|
"step": 65, |
|
"time": 32.82 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.4159, |
|
"slid_loss": 1.8618, |
|
"step": 66, |
|
"time": 34.87 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.3869, |
|
"slid_loss": 1.8547, |
|
"step": 67, |
|
"time": 33.06 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.3814, |
|
"slid_loss": 1.8478, |
|
"step": 68, |
|
"time": 34.85 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.3668, |
|
"slid_loss": 1.8408, |
|
"step": 69, |
|
"time": 33.18 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.4419, |
|
"slid_loss": 1.8351, |
|
"step": 70, |
|
"time": 34.61 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.3532, |
|
"slid_loss": 1.8283, |
|
"step": 71, |
|
"time": 33.92 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.343, |
|
"slid_loss": 1.8216, |
|
"step": 72, |
|
"time": 32.6 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.3843, |
|
"slid_loss": 1.8156, |
|
"step": 73, |
|
"time": 32.92 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.3455, |
|
"slid_loss": 1.8092, |
|
"step": 74, |
|
"time": 33.47 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.3042, |
|
"slid_loss": 1.8025, |
|
"step": 75, |
|
"time": 33.54 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.347, |
|
"slid_loss": 1.7965, |
|
"step": 76, |
|
"time": 33.22 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.237, |
|
"slid_loss": 1.7892, |
|
"step": 77, |
|
"time": 33.25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": "5.0000e-06", |
|
"loss": 1.1854, |
|
"slid_loss": 1.7815, |
|
"step": 78, |
|
"time": 33.47 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 78, |
|
"time": 167.03, |
|
"total_flos": 0.0, |
|
"train_loss": 1.781490119603964, |
|
"train_runtime": 2945.5278, |
|
"train_samples_per_second": 6.79, |
|
"train_steps_per_second": 0.026 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 78, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|