{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.997867803837953, "eval_steps": 500, "global_step": 234, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0042643923240938165, "grad_norm": 25.654364462455888, "learning_rate": 4.1666666666666667e-07, "loss": 1.5118, "step": 1 }, { "epoch": 0.021321961620469083, "grad_norm": 9.217679826848354, "learning_rate": 2.0833333333333334e-06, "loss": 1.4588, "step": 5 }, { "epoch": 0.042643923240938165, "grad_norm": 3.8364568135386343, "learning_rate": 4.166666666666667e-06, "loss": 1.1844, "step": 10 }, { "epoch": 0.06396588486140725, "grad_norm": 2.7685426084723606, "learning_rate": 6.25e-06, "loss": 1.0266, "step": 15 }, { "epoch": 0.08528784648187633, "grad_norm": 2.714167254334698, "learning_rate": 8.333333333333334e-06, "loss": 0.9764, "step": 20 }, { "epoch": 0.10660980810234541, "grad_norm": 2.575043424767955, "learning_rate": 9.999440509051367e-06, "loss": 0.9474, "step": 25 }, { "epoch": 0.1279317697228145, "grad_norm": 2.5473126644658635, "learning_rate": 9.979871469976197e-06, "loss": 0.9265, "step": 30 }, { "epoch": 0.14925373134328357, "grad_norm": 2.7728522540985927, "learning_rate": 9.932452969617607e-06, "loss": 0.9103, "step": 35 }, { "epoch": 0.17057569296375266, "grad_norm": 2.262656802643975, "learning_rate": 9.857450191464337e-06, "loss": 0.9089, "step": 40 }, { "epoch": 0.19189765458422176, "grad_norm": 2.2929860595064353, "learning_rate": 9.755282581475769e-06, "loss": 0.8839, "step": 45 }, { "epoch": 0.21321961620469082, "grad_norm": 2.9962187125117556, "learning_rate": 9.626521502369984e-06, "loss": 0.8779, "step": 50 }, { "epoch": 0.2345415778251599, "grad_norm": 2.4461853196937744, "learning_rate": 9.471887038331686e-06, "loss": 0.8655, "step": 55 }, { "epoch": 0.255863539445629, "grad_norm": 2.548713200713329, "learning_rate": 9.292243968009332e-06, "loss": 0.8452, "step": 60 }, { "epoch": 0.2771855010660981, "grad_norm": 2.354080355646257, "learning_rate": 9.088596928322158e-06, "loss": 0.8453, "step": 65 }, { "epoch": 0.29850746268656714, "grad_norm": 2.3350186621937494, "learning_rate": 8.862084796122998e-06, "loss": 0.8213, "step": 70 }, { "epoch": 0.31982942430703626, "grad_norm": 2.352888208422696, "learning_rate": 8.613974319136959e-06, "loss": 0.8087, "step": 75 }, { "epoch": 0.3411513859275053, "grad_norm": 2.626490865987853, "learning_rate": 8.345653031794292e-06, "loss": 0.8, "step": 80 }, { "epoch": 0.3624733475479744, "grad_norm": 2.2564126156464934, "learning_rate": 8.058621495575032e-06, "loss": 0.7883, "step": 85 }, { "epoch": 0.3837953091684435, "grad_norm": 2.536678489630529, "learning_rate": 7.754484907260513e-06, "loss": 0.7797, "step": 90 }, { "epoch": 0.4051172707889126, "grad_norm": 2.330261835490306, "learning_rate": 7.434944122021837e-06, "loss": 0.7704, "step": 95 }, { "epoch": 0.42643923240938164, "grad_norm": 2.375473887900136, "learning_rate": 7.101786141547829e-06, "loss": 0.7491, "step": 100 }, { "epoch": 0.44776119402985076, "grad_norm": 2.31845485895562, "learning_rate": 6.7568741204067145e-06, "loss": 0.7426, "step": 105 }, { "epoch": 0.4690831556503198, "grad_norm": 2.2326175780721513, "learning_rate": 6.402136946530014e-06, "loss": 0.7366, "step": 110 }, { "epoch": 0.4904051172707889, "grad_norm": 2.444799836226394, "learning_rate": 6.039558454088796e-06, "loss": 0.7294, "step": 115 }, { "epoch": 0.511727078891258, "grad_norm": 2.42023799653421, "learning_rate": 5.671166329088278e-06, "loss": 0.7346, "step": 120 }, { "epoch": 0.5330490405117271, "grad_norm": 2.525769921790198, "learning_rate": 5.299020769725172e-06, "loss": 0.716, "step": 125 }, { "epoch": 0.5543710021321961, "grad_norm": 2.210624855154462, "learning_rate": 4.9252029649236835e-06, "loss": 0.7087, "step": 130 }, { "epoch": 0.5756929637526652, "grad_norm": 2.260417777455262, "learning_rate": 4.551803455482833e-06, "loss": 0.6979, "step": 135 }, { "epoch": 0.5970149253731343, "grad_norm": 2.5410734519213847, "learning_rate": 4.180910442924312e-06, "loss": 0.6758, "step": 140 }, { "epoch": 0.6183368869936035, "grad_norm": 2.2197214614990983, "learning_rate": 3.8145981114225135e-06, "loss": 0.6832, "step": 145 }, { "epoch": 0.6396588486140725, "grad_norm": 2.417478197312417, "learning_rate": 3.4549150281252635e-06, "loss": 0.6705, "step": 150 }, { "epoch": 0.6609808102345416, "grad_norm": 2.193206874567919, "learning_rate": 3.1038726867353587e-06, "loss": 0.6909, "step": 155 }, { "epoch": 0.6823027718550106, "grad_norm": 2.3141978562259133, "learning_rate": 2.7634342584218364e-06, "loss": 0.678, "step": 160 }, { "epoch": 0.7036247334754797, "grad_norm": 2.20282691421215, "learning_rate": 2.43550361297047e-06, "loss": 0.6646, "step": 165 }, { "epoch": 0.7249466950959488, "grad_norm": 2.3241432733966962, "learning_rate": 2.1219146715716332e-06, "loss": 0.6633, "step": 170 }, { "epoch": 0.746268656716418, "grad_norm": 2.3658483418520464, "learning_rate": 1.8244211507891064e-06, "loss": 0.6516, "step": 175 }, { "epoch": 0.767590618336887, "grad_norm": 2.259696417637488, "learning_rate": 1.544686755065677e-06, "loss": 0.6418, "step": 180 }, { "epoch": 0.7889125799573561, "grad_norm": 2.284368925546414, "learning_rate": 1.2842758726130283e-06, "loss": 0.6405, "step": 185 }, { "epoch": 0.8102345415778252, "grad_norm": 2.2174015564488223, "learning_rate": 1.044644826718295e-06, "loss": 0.6359, "step": 190 }, { "epoch": 0.8315565031982942, "grad_norm": 2.3098966859462076, "learning_rate": 8.271337313934869e-07, "loss": 0.6232, "step": 195 }, { "epoch": 0.8528784648187633, "grad_norm": 2.240425165408693, "learning_rate": 6.329589969143518e-07, "loss": 0.6263, "step": 200 }, { "epoch": 0.8742004264392325, "grad_norm": 2.203409177091297, "learning_rate": 4.632065271606756e-07, "loss": 0.6299, "step": 205 }, { "epoch": 0.8955223880597015, "grad_norm": 2.1702011902470724, "learning_rate": 3.18825646801314e-07, "loss": 0.636, "step": 210 }, { "epoch": 0.9168443496801706, "grad_norm": 2.202446820245564, "learning_rate": 2.006237922855553e-07, "loss": 0.6182, "step": 215 }, { "epoch": 0.9381663113006397, "grad_norm": 2.118840248626809, "learning_rate": 1.0926199633097156e-07, "loss": 0.609, "step": 220 }, { "epoch": 0.9594882729211087, "grad_norm": 2.168175873632397, "learning_rate": 4.52511911603265e-08, "loss": 0.6173, "step": 225 }, { "epoch": 0.9808102345415778, "grad_norm": 2.2624619803066617, "learning_rate": 8.949351161324227e-09, "loss": 0.6207, "step": 230 }, { "epoch": 0.997867803837953, "eval_loss": 0.7390011548995972, "eval_runtime": 106.273, "eval_samples_per_second": 3.67, "eval_steps_per_second": 0.922, "step": 234 }, { "epoch": 0.997867803837953, "step": 234, "total_flos": 48942494515200.0, "train_loss": 0.7695284368645432, "train_runtime": 7306.9109, "train_samples_per_second": 1.026, "train_steps_per_second": 0.032 } ], "logging_steps": 5, "max_steps": 234, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 48942494515200.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }