{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.99979512395001, "global_step": 19520, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 0.0004871926229508197, "loss": 0.4245, "step": 500 }, { "epoch": 0.41, "learning_rate": 0.00047438524590163935, "loss": 0.3278, "step": 1000 }, { "epoch": 0.61, "learning_rate": 0.000461577868852459, "loss": 0.2985, "step": 1500 }, { "epoch": 0.82, "learning_rate": 0.00044877049180327874, "loss": 0.2814, "step": 2000 }, { "epoch": 1.02, "learning_rate": 0.0004359631147540984, "loss": 0.2548, "step": 2500 }, { "epoch": 1.23, "learning_rate": 0.0004231557377049181, "loss": 0.2388, "step": 3000 }, { "epoch": 1.43, "learning_rate": 0.0004103483606557377, "loss": 0.2264, "step": 3500 }, { "epoch": 1.64, "learning_rate": 0.00039754098360655736, "loss": 0.2204, "step": 4000 }, { "epoch": 1.84, "learning_rate": 0.00038473360655737703, "loss": 0.2129, "step": 4500 }, { "epoch": 2.05, "learning_rate": 0.0003719262295081967, "loss": 0.2088, "step": 5000 }, { "epoch": 2.25, "learning_rate": 0.00035911885245901637, "loss": 0.1916, "step": 5500 }, { "epoch": 2.46, "learning_rate": 0.0003463114754098361, "loss": 0.182, "step": 6000 }, { "epoch": 2.66, "learning_rate": 0.00033350409836065576, "loss": 0.1856, "step": 6500 }, { "epoch": 2.87, "learning_rate": 0.00032069672131147543, "loss": 0.1852, "step": 7000 }, { "epoch": 3.07, "learning_rate": 0.0003078893442622951, "loss": 0.1718, "step": 7500 }, { "epoch": 3.28, "learning_rate": 0.00029508196721311476, "loss": 0.1642, "step": 8000 }, { "epoch": 3.48, "learning_rate": 0.00028227459016393443, "loss": 0.1619, "step": 8500 }, { "epoch": 3.69, "learning_rate": 0.0002694672131147541, "loss": 0.1571, "step": 9000 }, { "epoch": 3.89, "learning_rate": 0.00025665983606557377, "loss": 0.161, "step": 9500 }, { "epoch": 4.1, "learning_rate": 0.00024385245901639344, "loss": 0.1409, "step": 10000 }, { "epoch": 4.3, "learning_rate": 0.00023104508196721314, "loss": 0.1418, "step": 10500 }, { "epoch": 4.51, "learning_rate": 0.0002182377049180328, "loss": 0.1456, "step": 11000 }, { "epoch": 4.71, "learning_rate": 0.00020543032786885247, "loss": 0.1405, "step": 11500 }, { "epoch": 4.92, "learning_rate": 0.00019262295081967211, "loss": 0.1415, "step": 12000 }, { "epoch": 5.12, "learning_rate": 0.0001798155737704918, "loss": 0.1316, "step": 12500 }, { "epoch": 5.33, "learning_rate": 0.00016700819672131148, "loss": 0.1265, "step": 13000 }, { "epoch": 5.53, "learning_rate": 0.00015420081967213115, "loss": 0.126, "step": 13500 }, { "epoch": 5.74, "learning_rate": 0.00014139344262295082, "loss": 0.125, "step": 14000 }, { "epoch": 5.94, "learning_rate": 0.0001285860655737705, "loss": 0.1277, "step": 14500 }, { "epoch": 6.15, "learning_rate": 0.00011577868852459017, "loss": 0.1184, "step": 15000 }, { "epoch": 6.35, "learning_rate": 0.00010297131147540984, "loss": 0.1152, "step": 15500 }, { "epoch": 6.56, "learning_rate": 9.016393442622952e-05, "loss": 0.1173, "step": 16000 }, { "epoch": 6.76, "learning_rate": 7.735655737704917e-05, "loss": 0.1131, "step": 16500 }, { "epoch": 6.97, "learning_rate": 6.454918032786886e-05, "loss": 0.114, "step": 17000 }, { "epoch": 7.17, "learning_rate": 5.1741803278688525e-05, "loss": 0.1112, "step": 17500 }, { "epoch": 7.38, "learning_rate": 3.89344262295082e-05, "loss": 0.1097, "step": 18000 }, { "epoch": 7.58, "learning_rate": 2.612704918032787e-05, "loss": 0.106, "step": 18500 }, { "epoch": 7.79, "learning_rate": 1.331967213114754e-05, "loss": 0.108, "step": 19000 }, { "epoch": 7.99, "learning_rate": 5.122950819672132e-07, "loss": 0.1062, "step": 19500 }, { "epoch": 8.0, "step": 19520, "total_flos": 1.2620192221249536e+17, "train_loss": 0.1722577346397228, "train_runtime": 32063.6353, "train_samples_per_second": 19.484, "train_steps_per_second": 0.609 } ], "max_steps": 19520, "num_train_epochs": 8, "total_flos": 1.2620192221249536e+17, "trial_name": null, "trial_params": null }