{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9968, "eval_steps": 500, "global_step": 78, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": "0.0000e+00", "loss": 2.2666, "slid_loss": 2.2666, "step": 1, "time": 42.16 }, { "epoch": 0.05, "learning_rate": "5.0000e-06", "loss": 2.2601, "slid_loss": 2.2634, "step": 2, "time": 34.12 }, { "epoch": 0.08, "learning_rate": "5.0000e-06", "loss": 2.3071, "slid_loss": 2.2779, "step": 3, "time": 33.4 }, { "epoch": 0.1, "learning_rate": "5.0000e-06", "loss": 2.1847, "slid_loss": 2.2546, "step": 4, "time": 33.28 }, { "epoch": 0.13, "learning_rate": "5.0000e-06", "loss": 2.2277, "slid_loss": 2.2492, "step": 5, "time": 34.62 }, { "epoch": 0.15, "learning_rate": "5.0000e-06", "loss": 2.1922, "slid_loss": 2.2397, "step": 6, "time": 32.87 }, { "epoch": 0.18, "learning_rate": "5.0000e-06", "loss": 2.168, "slid_loss": 2.2295, "step": 7, "time": 33.59 }, { "epoch": 0.2, "learning_rate": "5.0000e-06", "loss": 2.2024, "slid_loss": 2.2261, "step": 8, "time": 33.64 }, { "epoch": 0.23, "learning_rate": "5.0000e-06", "loss": 2.1198, "slid_loss": 2.2143, "step": 9, "time": 35.32 }, { "epoch": 0.26, "learning_rate": "5.0000e-06", "loss": 2.139, "slid_loss": 2.2068, "step": 10, "time": 33.38 }, { "epoch": 0.28, "learning_rate": "5.0000e-06", "loss": 2.1052, "slid_loss": 2.1975, "step": 11, "time": 33.38 }, { "epoch": 0.31, "learning_rate": "5.0000e-06", "loss": 2.1561, "slid_loss": 2.1941, "step": 12, "time": 33.0 }, { "epoch": 0.33, "learning_rate": "5.0000e-06", "loss": 2.085, "slid_loss": 2.1857, "step": 13, "time": 32.73 }, { "epoch": 0.36, "learning_rate": "5.0000e-06", "loss": 2.1404, "slid_loss": 2.1824, "step": 14, "time": 33.91 }, { "epoch": 0.38, "learning_rate": "5.0000e-06", "loss": 2.0282, "slid_loss": 2.1722, "step": 15, "time": 32.97 }, { "epoch": 0.41, "learning_rate": "5.0000e-06", "loss": 2.0576, "slid_loss": 2.165, "step": 16, "time": 32.89 }, { "epoch": 0.44, "learning_rate": "5.0000e-06", "loss": 2.0584, "slid_loss": 2.1587, "step": 17, "time": 33.64 }, { "epoch": 0.46, "learning_rate": "5.0000e-06", "loss": 2.086, "slid_loss": 2.1547, "step": 18, "time": 35.21 }, { "epoch": 0.49, "learning_rate": "5.0000e-06", "loss": 2.0918, "slid_loss": 2.1514, "step": 19, "time": 33.29 }, { "epoch": 0.51, "learning_rate": "5.0000e-06", "loss": 2.0255, "slid_loss": 2.1451, "step": 20, "time": 33.69 }, { "epoch": 0.54, "learning_rate": "5.0000e-06", "loss": 2.0119, "slid_loss": 2.1387, "step": 21, "time": 33.5 }, { "epoch": 0.56, "learning_rate": "5.0000e-06", "loss": 1.9633, "slid_loss": 2.1308, "step": 22, "time": 35.21 }, { "epoch": 0.59, "learning_rate": "5.0000e-06", "loss": 2.0063, "slid_loss": 2.1254, "step": 23, "time": 32.96 }, { "epoch": 0.61, "learning_rate": "5.0000e-06", "loss": 2.0122, "slid_loss": 2.1206, "step": 24, "time": 33.34 }, { "epoch": 0.64, "learning_rate": "5.0000e-06", "loss": 1.9364, "slid_loss": 2.1133, "step": 25, "time": 33.35 }, { "epoch": 0.67, "learning_rate": "5.0000e-06", "loss": 1.9493, "slid_loss": 2.107, "step": 26, "time": 33.24 }, { "epoch": 0.69, "learning_rate": "5.0000e-06", "loss": 1.9124, "slid_loss": 2.0998, "step": 27, "time": 33.34 }, { "epoch": 0.72, "learning_rate": "5.0000e-06", "loss": 1.9077, "slid_loss": 2.0929, "step": 28, "time": 33.03 }, { "epoch": 0.74, "learning_rate": "5.0000e-06", "loss": 1.9838, "slid_loss": 2.0891, "step": 29, "time": 34.5 }, { "epoch": 0.77, "learning_rate": "5.0000e-06", "loss": 1.988, "slid_loss": 2.0858, "step": 30, "time": 33.39 }, { "epoch": 0.79, "learning_rate": "5.0000e-06", "loss": 1.9561, "slid_loss": 2.0816, "step": 31, "time": 33.25 }, { "epoch": 0.82, "learning_rate": "5.0000e-06", "loss": 1.8664, "slid_loss": 2.0749, "step": 32, "time": 32.75 }, { "epoch": 0.84, "learning_rate": "5.0000e-06", "loss": 1.8385, "slid_loss": 2.0677, "step": 33, "time": 33.61 }, { "epoch": 0.87, "learning_rate": "5.0000e-06", "loss": 1.8827, "slid_loss": 2.0623, "step": 34, "time": 33.48 }, { "epoch": 0.9, "learning_rate": "5.0000e-06", "loss": 1.8249, "slid_loss": 2.0555, "step": 35, "time": 33.62 }, { "epoch": 0.92, "learning_rate": "5.0000e-06", "loss": 1.8204, "slid_loss": 2.049, "step": 36, "time": 33.21 }, { "epoch": 0.95, "learning_rate": "5.0000e-06", "loss": 1.8761, "slid_loss": 2.0443, "step": 37, "time": 32.95 }, { "epoch": 0.97, "learning_rate": "5.0000e-06", "loss": 1.8621, "slid_loss": 2.0395, "step": 38, "time": 33.02 }, { "epoch": 1.0, "learning_rate": "5.0000e-06", "loss": 1.7632, "slid_loss": 2.0324, "step": 39, "time": 32.9 }, { "epoch": 1.02, "learning_rate": "5.0000e-06", "loss": 1.8407, "slid_loss": 2.0276, "step": 40, "time": 192.32 }, { "epoch": 1.05, "learning_rate": "5.0000e-06", "loss": 1.7514, "slid_loss": 2.0209, "step": 41, "time": 33.2 }, { "epoch": 1.08, "learning_rate": "5.0000e-06", "loss": 1.7342, "slid_loss": 2.014, "step": 42, "time": 33.32 }, { "epoch": 1.1, "learning_rate": "5.0000e-06", "loss": 1.7591, "slid_loss": 2.0081, "step": 43, "time": 32.94 }, { "epoch": 1.13, "learning_rate": "5.0000e-06", "loss": 1.7156, "slid_loss": 2.0015, "step": 44, "time": 32.85 }, { "epoch": 1.15, "learning_rate": "5.0000e-06", "loss": 1.7146, "slid_loss": 1.9951, "step": 45, "time": 32.84 }, { "epoch": 1.18, "learning_rate": "5.0000e-06", "loss": 1.7197, "slid_loss": 1.9891, "step": 46, "time": 32.83 }, { "epoch": 1.2, "learning_rate": "5.0000e-06", "loss": 1.6992, "slid_loss": 1.9829, "step": 47, "time": 33.24 }, { "epoch": 1.23, "learning_rate": "5.0000e-06", "loss": 1.7154, "slid_loss": 1.9774, "step": 48, "time": 34.15 }, { "epoch": 1.25, "learning_rate": "5.0000e-06", "loss": 1.6725, "slid_loss": 1.9711, "step": 49, "time": 35.49 }, { "epoch": 1.28, "learning_rate": "5.0000e-06", "loss": 1.6221, "slid_loss": 1.9642, "step": 50, "time": 33.02 }, { "epoch": 1.31, "learning_rate": "5.0000e-06", "loss": 1.656, "slid_loss": 1.9581, "step": 51, "time": 33.54 }, { "epoch": 1.33, "learning_rate": "5.0000e-06", "loss": 1.6232, "slid_loss": 1.9517, "step": 52, "time": 33.15 }, { "epoch": 1.36, "learning_rate": "5.0000e-06", "loss": 1.6363, "slid_loss": 1.9457, "step": 53, "time": 33.17 }, { "epoch": 1.38, "learning_rate": "5.0000e-06", "loss": 1.6079, "slid_loss": 1.9395, "step": 54, "time": 32.8 }, { "epoch": 1.41, "learning_rate": "5.0000e-06", "loss": 1.5803, "slid_loss": 1.9329, "step": 55, "time": 33.72 }, { "epoch": 1.43, "learning_rate": "5.0000e-06", "loss": 1.5249, "slid_loss": 1.9257, "step": 56, "time": 33.48 }, { "epoch": 1.46, "learning_rate": "5.0000e-06", "loss": 1.624, "slid_loss": 1.9204, "step": 57, "time": 33.19 }, { "epoch": 1.48, "learning_rate": "5.0000e-06", "loss": 1.5509, "slid_loss": 1.914, "step": 58, "time": 32.7 }, { "epoch": 1.51, "learning_rate": "5.0000e-06", "loss": 1.5339, "slid_loss": 1.9076, "step": 59, "time": 34.98 }, { "epoch": 1.54, "learning_rate": "5.0000e-06", "loss": 1.559, "slid_loss": 1.9017, "step": 60, "time": 33.29 }, { "epoch": 1.56, "learning_rate": "5.0000e-06", "loss": 1.4958, "slid_loss": 1.8951, "step": 61, "time": 32.61 }, { "epoch": 1.59, "learning_rate": "5.0000e-06", "loss": 1.4871, "slid_loss": 1.8885, "step": 62, "time": 33.46 }, { "epoch": 1.61, "learning_rate": "5.0000e-06", "loss": 1.4523, "slid_loss": 1.8816, "step": 63, "time": 32.93 }, { "epoch": 1.64, "learning_rate": "5.0000e-06", "loss": 1.4786, "slid_loss": 1.8753, "step": 64, "time": 33.78 }, { "epoch": 1.66, "learning_rate": "5.0000e-06", "loss": 1.4455, "slid_loss": 1.8687, "step": 65, "time": 32.82 }, { "epoch": 1.69, "learning_rate": "5.0000e-06", "loss": 1.4159, "slid_loss": 1.8618, "step": 66, "time": 34.87 }, { "epoch": 1.72, "learning_rate": "5.0000e-06", "loss": 1.3869, "slid_loss": 1.8547, "step": 67, "time": 33.06 }, { "epoch": 1.74, "learning_rate": "5.0000e-06", "loss": 1.3814, "slid_loss": 1.8478, "step": 68, "time": 34.85 }, { "epoch": 1.77, "learning_rate": "5.0000e-06", "loss": 1.3668, "slid_loss": 1.8408, "step": 69, "time": 33.18 }, { "epoch": 1.79, "learning_rate": "5.0000e-06", "loss": 1.4419, "slid_loss": 1.8351, "step": 70, "time": 34.61 }, { "epoch": 1.82, "learning_rate": "5.0000e-06", "loss": 1.3532, "slid_loss": 1.8283, "step": 71, "time": 33.92 }, { "epoch": 1.84, "learning_rate": "5.0000e-06", "loss": 1.343, "slid_loss": 1.8216, "step": 72, "time": 32.6 }, { "epoch": 1.87, "learning_rate": "5.0000e-06", "loss": 1.3843, "slid_loss": 1.8156, "step": 73, "time": 32.92 }, { "epoch": 1.89, "learning_rate": "5.0000e-06", "loss": 1.3455, "slid_loss": 1.8092, "step": 74, "time": 33.47 }, { "epoch": 1.92, "learning_rate": "5.0000e-06", "loss": 1.3042, "slid_loss": 1.8025, "step": 75, "time": 33.54 }, { "epoch": 1.95, "learning_rate": "5.0000e-06", "loss": 1.347, "slid_loss": 1.7965, "step": 76, "time": 33.22 }, { "epoch": 1.97, "learning_rate": "5.0000e-06", "loss": 1.237, "slid_loss": 1.7892, "step": 77, "time": 33.25 }, { "epoch": 2.0, "learning_rate": "5.0000e-06", "loss": 1.1854, "slid_loss": 1.7815, "step": 78, "time": 33.47 }, { "epoch": 2.0, "step": 78, "time": 167.03, "total_flos": 0.0, "train_loss": 1.781490119603964, "train_runtime": 2945.5278, "train_samples_per_second": 6.79, "train_steps_per_second": 0.026 } ], "logging_steps": 1.0, "max_steps": 78, "num_train_epochs": 2, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }