{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.984, "eval_steps": 1, "global_step": 124, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016, "grad_norm": 26.392181396484375, "learning_rate": 2.5e-05, "loss": 1.2683, "step": 1 }, { "epoch": 0.016, "eval_exact_match": 0.14285714285714285, "eval_f1_a": 0.544, "eval_f1_m": 0.5331374974232117, "eval_loss": 0.9992128610610962, "eval_runtime": 12.4039, "eval_samples_per_second": 20.155, "eval_steps_per_second": 2.58, "step": 1 }, { "epoch": 0.032, "grad_norm": 19.146682739257812, "learning_rate": 5e-05, "loss": 1.0276, "step": 2 }, { "epoch": 0.032, "eval_exact_match": 0.12244897959183673, "eval_f1_a": 0.5344129554655871, "eval_f1_m": 0.5208925994640281, "eval_loss": 0.9813457131385803, "eval_runtime": 12.2968, "eval_samples_per_second": 20.331, "eval_steps_per_second": 2.602, "step": 2 }, { "epoch": 0.048, "grad_norm": 25.024534225463867, "learning_rate": 4.959016393442623e-05, "loss": 1.1935, "step": 3 }, { "epoch": 0.048, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.4979253112033195, "eval_f1_m": 0.4637011514562535, "eval_loss": 0.9183418154716492, "eval_runtime": 12.5058, "eval_samples_per_second": 19.991, "eval_steps_per_second": 2.559, "step": 3 }, { "epoch": 0.064, "grad_norm": 18.022754669189453, "learning_rate": 4.918032786885246e-05, "loss": 0.9304, "step": 4 }, { "epoch": 0.064, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.43243243243243246, "eval_f1_m": 0.42084400859911064, "eval_loss": 0.8643535375595093, "eval_runtime": 12.5036, "eval_samples_per_second": 19.994, "eval_steps_per_second": 2.559, "step": 4 }, { "epoch": 0.08, "grad_norm": 17.011953353881836, "learning_rate": 4.8770491803278687e-05, "loss": 1.0187, "step": 5 }, { "epoch": 0.08, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.44131455399061037, "eval_f1_m": 0.435791589363018, "eval_loss": 0.8290175795555115, "eval_runtime": 12.501, "eval_samples_per_second": 19.998, "eval_steps_per_second": 2.56, "step": 5 }, { "epoch": 0.096, "grad_norm": 9.931246757507324, "learning_rate": 4.836065573770492e-05, "loss": 0.781, "step": 6 }, { "epoch": 0.096, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.44761904761904764, "eval_f1_m": 0.44164457990988604, "eval_loss": 0.803396463394165, "eval_runtime": 12.603, "eval_samples_per_second": 19.837, "eval_steps_per_second": 2.539, "step": 6 }, { "epoch": 0.112, "grad_norm": 34.31171417236328, "learning_rate": 4.795081967213115e-05, "loss": 1.2274, "step": 7 }, { "epoch": 0.112, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.416243654822335, "eval_f1_m": 0.42698010606173864, "eval_loss": 0.7892948985099792, "eval_runtime": 12.5071, "eval_samples_per_second": 19.989, "eval_steps_per_second": 2.559, "step": 7 }, { "epoch": 0.128, "grad_norm": 19.665807723999023, "learning_rate": 4.754098360655738e-05, "loss": 1.0631, "step": 8 }, { "epoch": 0.128, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.3734939759036145, "eval_f1_m": 0.44151661996800046, "eval_loss": 0.779171884059906, "eval_runtime": 12.401, "eval_samples_per_second": 20.16, "eval_steps_per_second": 2.58, "step": 8 }, { "epoch": 0.144, "grad_norm": 6.531087875366211, "learning_rate": 4.713114754098361e-05, "loss": 0.6823, "step": 9 }, { "epoch": 0.144, "eval_exact_match": 0.0, "eval_f1_a": 0.24657534246575338, "eval_f1_m": 0.38393596199718655, "eval_loss": 0.7732793092727661, "eval_runtime": 12.3963, "eval_samples_per_second": 20.167, "eval_steps_per_second": 2.581, "step": 9 }, { "epoch": 0.16, "grad_norm": 9.769821166992188, "learning_rate": 4.672131147540984e-05, "loss": 0.6971, "step": 10 }, { "epoch": 0.16, "eval_exact_match": 0.0, "eval_f1_a": 0.24827586206896554, "eval_f1_m": 0.38636550524305635, "eval_loss": 0.7666972875595093, "eval_runtime": 12.3966, "eval_samples_per_second": 20.167, "eval_steps_per_second": 2.581, "step": 10 }, { "epoch": 0.176, "grad_norm": 18.875186920166016, "learning_rate": 4.631147540983607e-05, "loss": 0.8232, "step": 11 }, { "epoch": 0.176, "eval_exact_match": 0.0, "eval_f1_a": 0.2028985507246377, "eval_f1_m": 0.37477658396025737, "eval_loss": 0.7616797089576721, "eval_runtime": 12.293, "eval_samples_per_second": 20.337, "eval_steps_per_second": 2.603, "step": 11 }, { "epoch": 0.192, "grad_norm": 12.508312225341797, "learning_rate": 4.59016393442623e-05, "loss": 0.4358, "step": 12 }, { "epoch": 0.192, "eval_exact_match": 0.0, "eval_f1_a": 0.2142857142857143, "eval_f1_m": 0.37786431255819003, "eval_loss": 0.7548164129257202, "eval_runtime": 12.2924, "eval_samples_per_second": 20.338, "eval_steps_per_second": 2.603, "step": 12 }, { "epoch": 0.208, "grad_norm": 13.73599624633789, "learning_rate": 4.549180327868853e-05, "loss": 0.7875, "step": 13 }, { "epoch": 0.208, "eval_exact_match": 0.0, "eval_f1_a": 0.21582733812949637, "eval_f1_m": 0.37958707958707955, "eval_loss": 0.7478398680686951, "eval_runtime": 12.293, "eval_samples_per_second": 20.337, "eval_steps_per_second": 2.603, "step": 13 }, { "epoch": 0.224, "grad_norm": 17.087984085083008, "learning_rate": 4.508196721311476e-05, "loss": 0.9012, "step": 14 }, { "epoch": 0.224, "eval_exact_match": 0.0, "eval_f1_a": 0.2377622377622378, "eval_f1_m": 0.38485918843061695, "eval_loss": 0.7398672103881836, "eval_runtime": 12.294, "eval_samples_per_second": 20.335, "eval_steps_per_second": 2.603, "step": 14 }, { "epoch": 0.24, "grad_norm": 17.19772720336914, "learning_rate": 4.467213114754098e-05, "loss": 0.8344, "step": 15 }, { "epoch": 0.24, "eval_exact_match": 0.0, "eval_f1_a": 0.24657534246575338, "eval_f1_m": 0.38393596199718655, "eval_loss": 0.7301445603370667, "eval_runtime": 12.3925, "eval_samples_per_second": 20.173, "eval_steps_per_second": 2.582, "step": 15 }, { "epoch": 0.256, "grad_norm": 9.059144973754883, "learning_rate": 4.426229508196721e-05, "loss": 0.6957, "step": 16 }, { "epoch": 0.256, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.3184713375796179, "eval_f1_m": 0.4192099850263115, "eval_loss": 0.7225546836853027, "eval_runtime": 12.2951, "eval_samples_per_second": 20.333, "eval_steps_per_second": 2.603, "step": 16 }, { "epoch": 0.272, "grad_norm": 4.891639709472656, "learning_rate": 4.3852459016393444e-05, "loss": 0.6085, "step": 17 }, { "epoch": 0.272, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.3878787878787879, "eval_f1_m": 0.4543832318142042, "eval_loss": 0.7196054458618164, "eval_runtime": 12.2946, "eval_samples_per_second": 20.334, "eval_steps_per_second": 2.603, "step": 17 }, { "epoch": 0.288, "grad_norm": 11.897939682006836, "learning_rate": 4.3442622950819674e-05, "loss": 0.7777, "step": 18 }, { "epoch": 0.288, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.3976608187134503, "eval_f1_m": 0.4537870592792561, "eval_loss": 0.7144648432731628, "eval_runtime": 12.3977, "eval_samples_per_second": 20.165, "eval_steps_per_second": 2.581, "step": 18 }, { "epoch": 0.304, "grad_norm": 11.019088745117188, "learning_rate": 4.3032786885245904e-05, "loss": 0.6732, "step": 19 }, { "epoch": 0.304, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.39285714285714285, "eval_f1_m": 0.4570021548412905, "eval_loss": 0.7114218473434448, "eval_runtime": 12.3988, "eval_samples_per_second": 20.163, "eval_steps_per_second": 2.581, "step": 19 }, { "epoch": 0.32, "grad_norm": 9.549395561218262, "learning_rate": 4.262295081967213e-05, "loss": 0.7477, "step": 20 }, { "epoch": 0.32, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.4046242774566474, "eval_f1_m": 0.4577005477515681, "eval_loss": 0.709179699420929, "eval_runtime": 12.4018, "eval_samples_per_second": 20.158, "eval_steps_per_second": 2.58, "step": 20 }, { "epoch": 0.336, "grad_norm": 6.315933704376221, "learning_rate": 4.2213114754098365e-05, "loss": 0.6196, "step": 21 }, { "epoch": 0.336, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.4069767441860465, "eval_f1_m": 0.4574949710153791, "eval_loss": 0.7092265486717224, "eval_runtime": 12.2969, "eval_samples_per_second": 20.33, "eval_steps_per_second": 2.602, "step": 21 }, { "epoch": 0.352, "grad_norm": 13.465363502502441, "learning_rate": 4.1803278688524595e-05, "loss": 0.8407, "step": 22 }, { "epoch": 0.352, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.4333333333333333, "eval_f1_m": 0.4608648720893619, "eval_loss": 0.7104062438011169, "eval_runtime": 12.3998, "eval_samples_per_second": 20.162, "eval_steps_per_second": 2.581, "step": 22 }, { "epoch": 0.368, "grad_norm": 5.6240763664245605, "learning_rate": 4.1393442622950826e-05, "loss": 0.6945, "step": 23 }, { "epoch": 0.368, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.4175824175824176, "eval_f1_m": 0.4523820850351463, "eval_loss": 0.7099843621253967, "eval_runtime": 12.5009, "eval_samples_per_second": 19.999, "eval_steps_per_second": 2.56, "step": 23 }, { "epoch": 0.384, "grad_norm": 11.571455001831055, "learning_rate": 4.098360655737705e-05, "loss": 0.7534, "step": 24 }, { "epoch": 0.384, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.42774566473988435, "eval_f1_m": 0.4625007645415809, "eval_loss": 0.7112500071525574, "eval_runtime": 12.505, "eval_samples_per_second": 19.992, "eval_steps_per_second": 2.559, "step": 24 }, { "epoch": 0.4, "grad_norm": 9.693904876708984, "learning_rate": 4.057377049180328e-05, "loss": 0.6428, "step": 25 }, { "epoch": 0.4, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.32258064516129026, "eval_f1_m": 0.42786660784860053, "eval_loss": 0.7106679677963257, "eval_runtime": 12.5005, "eval_samples_per_second": 19.999, "eval_steps_per_second": 2.56, "step": 25 }, { "epoch": 0.416, "grad_norm": 5.713263034820557, "learning_rate": 4.016393442622951e-05, "loss": 0.649, "step": 26 }, { "epoch": 0.416, "eval_exact_match": 0.0, "eval_f1_a": 0.2638888888888889, "eval_f1_m": 0.4046327255510929, "eval_loss": 0.7118203043937683, "eval_runtime": 12.4034, "eval_samples_per_second": 20.156, "eval_steps_per_second": 2.58, "step": 26 }, { "epoch": 0.432, "grad_norm": 7.6466965675354, "learning_rate": 3.975409836065574e-05, "loss": 0.7344, "step": 27 }, { "epoch": 0.432, "eval_exact_match": 0.0, "eval_f1_a": 0.18045112781954886, "eval_f1_m": 0.3727819799248371, "eval_loss": 0.7153554558753967, "eval_runtime": 12.4059, "eval_samples_per_second": 20.152, "eval_steps_per_second": 2.579, "step": 27 }, { "epoch": 0.448, "grad_norm": 12.54140567779541, "learning_rate": 3.934426229508197e-05, "loss": 0.5762, "step": 28 }, { "epoch": 0.448, "eval_exact_match": 0.0, "eval_f1_a": 0.13846153846153844, "eval_f1_m": 0.3607012601910561, "eval_loss": 0.7164726853370667, "eval_runtime": 12.4987, "eval_samples_per_second": 20.002, "eval_steps_per_second": 2.56, "step": 28 }, { "epoch": 0.464, "grad_norm": 7.025119781494141, "learning_rate": 3.89344262295082e-05, "loss": 0.6606, "step": 29 }, { "epoch": 0.464, "eval_exact_match": 0.0, "eval_f1_a": 0.11111111111111112, "eval_f1_m": 0.35377706194032715, "eval_loss": 0.719851553440094, "eval_runtime": 12.599, "eval_samples_per_second": 19.843, "eval_steps_per_second": 2.54, "step": 29 }, { "epoch": 0.48, "grad_norm": 16.650728225708008, "learning_rate": 3.8524590163934424e-05, "loss": 0.6886, "step": 30 }, { "epoch": 0.48, "eval_exact_match": 0.0, "eval_f1_a": 0.05454545454545454, "eval_f1_m": 0.3549857455469701, "eval_loss": 0.7235468626022339, "eval_runtime": 12.5049, "eval_samples_per_second": 19.992, "eval_steps_per_second": 2.559, "step": 30 }, { "epoch": 0.496, "grad_norm": 9.604567527770996, "learning_rate": 3.8114754098360655e-05, "loss": 0.6693, "step": 31 }, { "epoch": 0.496, "eval_exact_match": 0.0, "eval_f1_a": 0.03773584905660377, "eval_f1_m": 0.3570682452135034, "eval_loss": 0.730707049369812, "eval_runtime": 12.6051, "eval_samples_per_second": 19.833, "eval_steps_per_second": 2.539, "step": 31 }, { "epoch": 0.512, "grad_norm": 10.662591934204102, "learning_rate": 3.7704918032786885e-05, "loss": 0.8203, "step": 32 }, { "epoch": 0.512, "eval_exact_match": 0.0, "eval_f1_a": 0.019230769230769232, "eval_f1_m": 0.35394517780472173, "eval_loss": 0.7325780987739563, "eval_runtime": 12.5102, "eval_samples_per_second": 19.984, "eval_steps_per_second": 2.558, "step": 32 }, { "epoch": 0.528, "grad_norm": 5.810702323913574, "learning_rate": 3.729508196721312e-05, "loss": 0.6831, "step": 33 }, { "epoch": 0.528, "eval_exact_match": 0.0, "eval_f1_a": 0.019230769230769232, "eval_f1_m": 0.35394517780472173, "eval_loss": 0.7296562790870667, "eval_runtime": 12.5052, "eval_samples_per_second": 19.992, "eval_steps_per_second": 2.559, "step": 33 }, { "epoch": 0.544, "grad_norm": 6.527573585510254, "learning_rate": 3.6885245901639346e-05, "loss": 0.6942, "step": 34 }, { "epoch": 0.544, "eval_exact_match": 0.0, "eval_f1_a": 0.019230769230769232, "eval_f1_m": 0.35394517780472173, "eval_loss": 0.7321445345878601, "eval_runtime": 12.6062, "eval_samples_per_second": 19.831, "eval_steps_per_second": 2.538, "step": 34 }, { "epoch": 0.56, "grad_norm": 15.23520565032959, "learning_rate": 3.6475409836065576e-05, "loss": 0.7819, "step": 35 }, { "epoch": 0.56, "eval_exact_match": 0.0, "eval_f1_a": 0.019417475728155338, "eval_f1_m": 0.3550274288869728, "eval_loss": 0.7335781455039978, "eval_runtime": 12.6066, "eval_samples_per_second": 19.831, "eval_steps_per_second": 2.538, "step": 35 }, { "epoch": 0.576, "grad_norm": 12.970118522644043, "learning_rate": 3.6065573770491806e-05, "loss": 0.5959, "step": 36 }, { "epoch": 0.576, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.3489535707722984, "eval_loss": 0.7367480397224426, "eval_runtime": 12.5014, "eval_samples_per_second": 19.998, "eval_steps_per_second": 2.56, "step": 36 }, { "epoch": 0.592, "grad_norm": 9.984509468078613, "learning_rate": 3.5655737704918037e-05, "loss": 0.5692, "step": 37 }, { "epoch": 0.592, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.3489535707722984, "eval_loss": 0.7453652620315552, "eval_runtime": 12.4005, "eval_samples_per_second": 20.16, "eval_steps_per_second": 2.581, "step": 37 }, { "epoch": 0.608, "grad_norm": 9.310087203979492, "learning_rate": 3.524590163934427e-05, "loss": 0.8538, "step": 38 }, { "epoch": 0.608, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7506445050239563, "eval_runtime": 12.5041, "eval_samples_per_second": 19.993, "eval_steps_per_second": 2.559, "step": 38 }, { "epoch": 0.624, "grad_norm": 17.32353401184082, "learning_rate": 3.483606557377049e-05, "loss": 0.7333, "step": 39 }, { "epoch": 0.624, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7485976815223694, "eval_runtime": 12.605, "eval_samples_per_second": 19.833, "eval_steps_per_second": 2.539, "step": 39 }, { "epoch": 0.64, "grad_norm": 9.058735847473145, "learning_rate": 3.442622950819672e-05, "loss": 0.4784, "step": 40 }, { "epoch": 0.64, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7543515563011169, "eval_runtime": 12.4985, "eval_samples_per_second": 20.002, "eval_steps_per_second": 2.56, "step": 40 }, { "epoch": 0.656, "grad_norm": 15.80337142944336, "learning_rate": 3.401639344262295e-05, "loss": 0.8348, "step": 41 }, { "epoch": 0.656, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7519648671150208, "eval_runtime": 12.4002, "eval_samples_per_second": 20.161, "eval_steps_per_second": 2.581, "step": 41 }, { "epoch": 0.672, "grad_norm": 18.595117568969727, "learning_rate": 3.360655737704918e-05, "loss": 0.8212, "step": 42 }, { "epoch": 0.672, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7456523180007935, "eval_runtime": 12.5047, "eval_samples_per_second": 19.992, "eval_steps_per_second": 2.559, "step": 42 }, { "epoch": 0.688, "grad_norm": 9.790151596069336, "learning_rate": 3.319672131147541e-05, "loss": 0.9515, "step": 43 }, { "epoch": 0.688, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.3489535707722984, "eval_loss": 0.7356074452400208, "eval_runtime": 12.5029, "eval_samples_per_second": 19.995, "eval_steps_per_second": 2.559, "step": 43 }, { "epoch": 0.704, "grad_norm": 19.172956466674805, "learning_rate": 3.2786885245901635e-05, "loss": 0.8488, "step": 44 }, { "epoch": 0.704, "eval_exact_match": 0.0, "eval_f1_a": 0.019230769230769232, "eval_f1_m": 0.35394517780472173, "eval_loss": 0.7283183336257935, "eval_runtime": 12.6008, "eval_samples_per_second": 19.84, "eval_steps_per_second": 2.54, "step": 44 }, { "epoch": 0.72, "grad_norm": 10.353903770446777, "learning_rate": 3.237704918032787e-05, "loss": 0.6007, "step": 45 }, { "epoch": 0.72, "eval_exact_match": 0.0, "eval_f1_a": 0.019230769230769232, "eval_f1_m": 0.35394517780472173, "eval_loss": 0.7258906364440918, "eval_runtime": 12.4066, "eval_samples_per_second": 20.15, "eval_steps_per_second": 2.579, "step": 45 }, { "epoch": 0.736, "grad_norm": 10.669143676757812, "learning_rate": 3.19672131147541e-05, "loss": 0.8167, "step": 46 }, { "epoch": 0.736, "eval_exact_match": 0.0, "eval_f1_a": 0.019230769230769232, "eval_f1_m": 0.35394517780472173, "eval_loss": 0.7255585789680481, "eval_runtime": 12.6073, "eval_samples_per_second": 19.83, "eval_steps_per_second": 2.538, "step": 46 }, { "epoch": 0.752, "grad_norm": 12.651082992553711, "learning_rate": 3.155737704918033e-05, "loss": 0.7909, "step": 47 }, { "epoch": 0.752, "eval_exact_match": 0.0, "eval_f1_a": 0.019047619047619046, "eval_f1_m": 0.35264647650602043, "eval_loss": 0.7224531173706055, "eval_runtime": 12.5046, "eval_samples_per_second": 19.993, "eval_steps_per_second": 2.559, "step": 47 }, { "epoch": 0.768, "grad_norm": 16.77728843688965, "learning_rate": 3.114754098360656e-05, "loss": 0.7277, "step": 48 }, { "epoch": 0.768, "eval_exact_match": 0.0, "eval_f1_a": 0.037383177570093455, "eval_f1_m": 0.3553675649413946, "eval_loss": 0.7222617268562317, "eval_runtime": 12.5048, "eval_samples_per_second": 19.992, "eval_steps_per_second": 2.559, "step": 48 }, { "epoch": 0.784, "grad_norm": 9.990218162536621, "learning_rate": 3.073770491803279e-05, "loss": 0.6484, "step": 49 }, { "epoch": 0.784, "eval_exact_match": 0.0, "eval_f1_a": 0.018867924528301886, "eval_f1_m": 0.35094579623391153, "eval_loss": 0.7233593463897705, "eval_runtime": 12.3979, "eval_samples_per_second": 20.165, "eval_steps_per_second": 2.581, "step": 49 }, { "epoch": 0.8, "grad_norm": 19.805315017700195, "learning_rate": 3.0327868852459017e-05, "loss": 0.7366, "step": 50 }, { "epoch": 0.8, "eval_exact_match": 0.0, "eval_f1_a": 0.05454545454545454, "eval_f1_m": 0.35851045213290117, "eval_loss": 0.7192890644073486, "eval_runtime": 12.394, "eval_samples_per_second": 20.171, "eval_steps_per_second": 2.582, "step": 50 }, { "epoch": 0.816, "grad_norm": 6.260676860809326, "learning_rate": 2.9918032786885248e-05, "loss": 0.6544, "step": 51 }, { "epoch": 0.816, "eval_exact_match": 0.0, "eval_f1_a": 0.07207207207207207, "eval_f1_m": 0.3631490108530924, "eval_loss": 0.7178593873977661, "eval_runtime": 12.399, "eval_samples_per_second": 20.163, "eval_steps_per_second": 2.581, "step": 51 }, { "epoch": 0.832, "grad_norm": 6.211192607879639, "learning_rate": 2.9508196721311478e-05, "loss": 0.5797, "step": 52 }, { "epoch": 0.832, "eval_exact_match": 0.0, "eval_f1_a": 0.07142857142857144, "eval_f1_m": 0.36225194873154054, "eval_loss": 0.7176406383514404, "eval_runtime": 12.2942, "eval_samples_per_second": 20.335, "eval_steps_per_second": 2.603, "step": 52 }, { "epoch": 0.848, "grad_norm": 19.603347778320312, "learning_rate": 2.9098360655737705e-05, "loss": 0.613, "step": 53 }, { "epoch": 0.848, "eval_exact_match": 0.0, "eval_f1_a": 0.08928571428571429, "eval_f1_m": 0.36680547343812653, "eval_loss": 0.719406247138977, "eval_runtime": 12.2965, "eval_samples_per_second": 20.331, "eval_steps_per_second": 2.602, "step": 53 }, { "epoch": 0.864, "grad_norm": 7.0200042724609375, "learning_rate": 2.8688524590163935e-05, "loss": 0.7246, "step": 54 }, { "epoch": 0.864, "eval_exact_match": 0.0, "eval_f1_a": 0.03773584905660377, "eval_f1_m": 0.35666626624009584, "eval_loss": 0.7230820059776306, "eval_runtime": 12.3942, "eval_samples_per_second": 20.171, "eval_steps_per_second": 2.582, "step": 54 }, { "epoch": 0.88, "grad_norm": 7.6928791999816895, "learning_rate": 2.8278688524590162e-05, "loss": 0.723, "step": 55 }, { "epoch": 0.88, "eval_exact_match": 0.0, "eval_f1_a": 0.019230769230769232, "eval_f1_m": 0.35394517780472173, "eval_loss": 0.7283515334129333, "eval_runtime": 12.4051, "eval_samples_per_second": 20.153, "eval_steps_per_second": 2.58, "step": 55 }, { "epoch": 0.896, "grad_norm": 9.09825611114502, "learning_rate": 2.7868852459016392e-05, "loss": 0.526, "step": 56 }, { "epoch": 0.896, "eval_exact_match": 0.0, "eval_f1_a": 0.019230769230769232, "eval_f1_m": 0.35394517780472173, "eval_loss": 0.736648440361023, "eval_runtime": 12.5063, "eval_samples_per_second": 19.99, "eval_steps_per_second": 2.559, "step": 56 }, { "epoch": 0.912, "grad_norm": 12.663421630859375, "learning_rate": 2.7459016393442626e-05, "loss": 0.7038, "step": 57 }, { "epoch": 0.912, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.3489535707722984, "eval_loss": 0.7481816411018372, "eval_runtime": 12.4036, "eval_samples_per_second": 20.155, "eval_steps_per_second": 2.58, "step": 57 }, { "epoch": 0.928, "grad_norm": 15.922131538391113, "learning_rate": 2.7049180327868856e-05, "loss": 0.7805, "step": 58 }, { "epoch": 0.928, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7556718587875366, "eval_runtime": 12.4053, "eval_samples_per_second": 20.153, "eval_steps_per_second": 2.58, "step": 58 }, { "epoch": 0.944, "grad_norm": 15.116573333740234, "learning_rate": 2.6639344262295087e-05, "loss": 0.7645, "step": 59 }, { "epoch": 0.944, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7615898251533508, "eval_runtime": 12.301, "eval_samples_per_second": 20.324, "eval_steps_per_second": 2.601, "step": 59 }, { "epoch": 0.96, "grad_norm": 16.11196517944336, "learning_rate": 2.6229508196721314e-05, "loss": 0.802, "step": 60 }, { "epoch": 0.96, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.762087881565094, "eval_runtime": 12.4045, "eval_samples_per_second": 20.154, "eval_steps_per_second": 2.58, "step": 60 }, { "epoch": 0.976, "grad_norm": 11.784616470336914, "learning_rate": 2.5819672131147544e-05, "loss": 0.7266, "step": 61 }, { "epoch": 0.976, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7605761885643005, "eval_runtime": 12.5067, "eval_samples_per_second": 19.989, "eval_steps_per_second": 2.559, "step": 61 }, { "epoch": 0.992, "grad_norm": 5.855625152587891, "learning_rate": 2.540983606557377e-05, "loss": 0.6895, "step": 62 }, { "epoch": 0.992, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7616230249404907, "eval_runtime": 12.4021, "eval_samples_per_second": 20.158, "eval_steps_per_second": 2.58, "step": 62 }, { "epoch": 1.008, "grad_norm": 12.561187744140625, "learning_rate": 2.5e-05, "loss": 0.8057, "step": 63 }, { "epoch": 1.008, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7573671936988831, "eval_runtime": 12.5096, "eval_samples_per_second": 19.985, "eval_steps_per_second": 2.558, "step": 63 }, { "epoch": 1.024, "grad_norm": 10.385027885437012, "learning_rate": 2.459016393442623e-05, "loss": 0.7454, "step": 64 }, { "epoch": 1.024, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.3489535707722984, "eval_loss": 0.7491288781166077, "eval_runtime": 12.4029, "eval_samples_per_second": 20.157, "eval_steps_per_second": 2.58, "step": 64 }, { "epoch": 1.04, "grad_norm": 15.67496395111084, "learning_rate": 2.418032786885246e-05, "loss": 0.877, "step": 65 }, { "epoch": 1.04, "eval_exact_match": 0.0, "eval_f1_a": 0.019417475728155338, "eval_f1_m": 0.3550274288869728, "eval_loss": 0.7429726719856262, "eval_runtime": 12.4019, "eval_samples_per_second": 20.158, "eval_steps_per_second": 2.58, "step": 65 }, { "epoch": 1.056, "grad_norm": 14.67371654510498, "learning_rate": 2.377049180327869e-05, "loss": 0.7684, "step": 66 }, { "epoch": 1.056, "eval_exact_match": 0.0, "eval_f1_a": 0.019230769230769232, "eval_f1_m": 0.35394517780472173, "eval_loss": 0.7341777086257935, "eval_runtime": 12.3064, "eval_samples_per_second": 20.315, "eval_steps_per_second": 2.6, "step": 66 }, { "epoch": 1.072, "grad_norm": 11.17091178894043, "learning_rate": 2.336065573770492e-05, "loss": 0.6712, "step": 67 }, { "epoch": 1.072, "eval_exact_match": 0.0, "eval_f1_a": 0.03669724770642201, "eval_f1_m": 0.34935849464660995, "eval_loss": 0.724734365940094, "eval_runtime": 12.4001, "eval_samples_per_second": 20.161, "eval_steps_per_second": 2.581, "step": 67 }, { "epoch": 1.088, "grad_norm": 7.933443546295166, "learning_rate": 2.295081967213115e-05, "loss": 0.545, "step": 68 }, { "epoch": 1.088, "eval_exact_match": 0.0, "eval_f1_a": 0.1, "eval_f1_m": 0.35936518130395684, "eval_loss": 0.7180312275886536, "eval_runtime": 12.6064, "eval_samples_per_second": 19.831, "eval_steps_per_second": 2.538, "step": 68 }, { "epoch": 1.104, "grad_norm": 6.20879602432251, "learning_rate": 2.254098360655738e-05, "loss": 0.5563, "step": 69 }, { "epoch": 1.104, "eval_exact_match": 0.0, "eval_f1_a": 0.15503875968992248, "eval_f1_m": 0.3728975672853224, "eval_loss": 0.7145351767539978, "eval_runtime": 12.3015, "eval_samples_per_second": 20.323, "eval_steps_per_second": 2.601, "step": 69 }, { "epoch": 1.12, "grad_norm": 5.216189861297607, "learning_rate": 2.2131147540983607e-05, "loss": 0.5797, "step": 70 }, { "epoch": 1.12, "eval_exact_match": 0.0, "eval_f1_a": 0.2222222222222222, "eval_f1_m": 0.3975035168912719, "eval_loss": 0.7122148275375366, "eval_runtime": 12.2962, "eval_samples_per_second": 20.331, "eval_steps_per_second": 2.602, "step": 70 }, { "epoch": 1.1360000000000001, "grad_norm": 4.51043701171875, "learning_rate": 2.1721311475409837e-05, "loss": 0.7785, "step": 71 }, { "epoch": 1.1360000000000001, "eval_exact_match": 0.0, "eval_f1_a": 0.2571428571428571, "eval_f1_m": 0.4030671709243137, "eval_loss": 0.7087773680686951, "eval_runtime": 12.2926, "eval_samples_per_second": 20.337, "eval_steps_per_second": 2.603, "step": 71 }, { "epoch": 1.152, "grad_norm": 9.325611114501953, "learning_rate": 2.1311475409836064e-05, "loss": 0.757, "step": 72 }, { "epoch": 1.152, "eval_exact_match": 0.0, "eval_f1_a": 0.3013698630136986, "eval_f1_m": 0.4158932903830863, "eval_loss": 0.7072968482971191, "eval_runtime": 12.3042, "eval_samples_per_second": 20.318, "eval_steps_per_second": 2.601, "step": 72 }, { "epoch": 1.168, "grad_norm": 15.549028396606445, "learning_rate": 2.0901639344262298e-05, "loss": 0.85, "step": 73 }, { "epoch": 1.168, "eval_exact_match": 0.0, "eval_f1_a": 0.3013698630136986, "eval_f1_m": 0.4126730412444698, "eval_loss": 0.7068906426429749, "eval_runtime": 12.3976, "eval_samples_per_second": 20.165, "eval_steps_per_second": 2.581, "step": 73 }, { "epoch": 1.184, "grad_norm": 7.413804531097412, "learning_rate": 2.0491803278688525e-05, "loss": 0.6694, "step": 74 }, { "epoch": 1.184, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.32258064516129026, "eval_f1_m": 0.41783029895274804, "eval_loss": 0.7051249742507935, "eval_runtime": 12.3988, "eval_samples_per_second": 20.163, "eval_steps_per_second": 2.581, "step": 74 }, { "epoch": 1.2, "grad_norm": 13.059216499328613, "learning_rate": 2.0081967213114755e-05, "loss": 0.7654, "step": 75 }, { "epoch": 1.2, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.3658536585365854, "eval_f1_m": 0.4367546285913633, "eval_loss": 0.7052109241485596, "eval_runtime": 12.3021, "eval_samples_per_second": 20.322, "eval_steps_per_second": 2.601, "step": 75 }, { "epoch": 1.216, "grad_norm": 18.173431396484375, "learning_rate": 1.9672131147540985e-05, "loss": 0.7874, "step": 76 }, { "epoch": 1.216, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.393063583815029, "eval_f1_m": 0.4441113308460247, "eval_loss": 0.7044414281845093, "eval_runtime": 12.3005, "eval_samples_per_second": 20.324, "eval_steps_per_second": 2.602, "step": 76 }, { "epoch": 1.232, "grad_norm": 5.066444396972656, "learning_rate": 1.9262295081967212e-05, "loss": 0.585, "step": 77 }, { "epoch": 1.232, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.4067796610169491, "eval_f1_m": 0.44523701921661113, "eval_loss": 0.7045234441757202, "eval_runtime": 12.4024, "eval_samples_per_second": 20.157, "eval_steps_per_second": 2.58, "step": 77 }, { "epoch": 1.248, "grad_norm": 15.81064224243164, "learning_rate": 1.8852459016393442e-05, "loss": 0.7125, "step": 78 }, { "epoch": 1.248, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.4347826086956522, "eval_f1_m": 0.45324236421175196, "eval_loss": 0.7049570083618164, "eval_runtime": 12.2997, "eval_samples_per_second": 20.326, "eval_steps_per_second": 2.602, "step": 78 }, { "epoch": 1.264, "grad_norm": 15.786576271057129, "learning_rate": 1.8442622950819673e-05, "loss": 0.674, "step": 79 }, { "epoch": 1.264, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.45263157894736844, "eval_f1_m": 0.45977191742497864, "eval_loss": 0.7056093811988831, "eval_runtime": 12.297, "eval_samples_per_second": 20.33, "eval_steps_per_second": 2.602, "step": 79 }, { "epoch": 1.28, "grad_norm": 20.205602645874023, "learning_rate": 1.8032786885245903e-05, "loss": 0.7886, "step": 80 }, { "epoch": 1.28, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.450261780104712, "eval_f1_m": 0.45734237417910883, "eval_loss": 0.706125020980835, "eval_runtime": 12.2957, "eval_samples_per_second": 20.332, "eval_steps_per_second": 2.603, "step": 80 }, { "epoch": 1.296, "grad_norm": 7.352344036102295, "learning_rate": 1.7622950819672133e-05, "loss": 0.521, "step": 81 }, { "epoch": 1.296, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.44329896907216493, "eval_f1_m": 0.44956970467174556, "eval_loss": 0.7077500224113464, "eval_runtime": 12.4027, "eval_samples_per_second": 20.157, "eval_steps_per_second": 2.58, "step": 81 }, { "epoch": 1.312, "grad_norm": 9.654190063476562, "learning_rate": 1.721311475409836e-05, "loss": 0.7754, "step": 82 }, { "epoch": 1.312, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.45226130653266333, "eval_f1_m": 0.45042921930677027, "eval_loss": 0.7080312371253967, "eval_runtime": 12.2984, "eval_samples_per_second": 20.328, "eval_steps_per_second": 2.602, "step": 82 }, { "epoch": 1.328, "grad_norm": 19.156660079956055, "learning_rate": 1.680327868852459e-05, "loss": 0.7493, "step": 83 }, { "epoch": 1.328, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.4577114427860697, "eval_f1_m": 0.4507745089377742, "eval_loss": 0.7095780968666077, "eval_runtime": 12.2981, "eval_samples_per_second": 20.328, "eval_steps_per_second": 2.602, "step": 83 }, { "epoch": 1.3439999999999999, "grad_norm": 4.014058589935303, "learning_rate": 1.6393442622950818e-05, "loss": 0.7076, "step": 84 }, { "epoch": 1.3439999999999999, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.45999999999999996, "eval_f1_m": 0.45553641369967895, "eval_loss": 0.7096328139305115, "eval_runtime": 12.298, "eval_samples_per_second": 20.329, "eval_steps_per_second": 2.602, "step": 84 }, { "epoch": 1.3599999999999999, "grad_norm": 12.296324729919434, "learning_rate": 1.598360655737705e-05, "loss": 0.6649, "step": 85 }, { "epoch": 1.3599999999999999, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.4577114427860697, "eval_f1_m": 0.4507745089377742, "eval_loss": 0.7094843983650208, "eval_runtime": 12.2932, "eval_samples_per_second": 20.337, "eval_steps_per_second": 2.603, "step": 85 }, { "epoch": 1.376, "grad_norm": 8.357748985290527, "learning_rate": 1.557377049180328e-05, "loss": 0.6199, "step": 86 }, { "epoch": 1.376, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.45544554455445546, "eval_f1_m": 0.44890376063845455, "eval_loss": 0.7101484537124634, "eval_runtime": 12.2925, "eval_samples_per_second": 20.338, "eval_steps_per_second": 2.603, "step": 86 }, { "epoch": 1.392, "grad_norm": 5.569490909576416, "learning_rate": 1.5163934426229509e-05, "loss": 0.7666, "step": 87 }, { "epoch": 1.392, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.4577114427860697, "eval_f1_m": 0.4507745089377742, "eval_loss": 0.7110859155654907, "eval_runtime": 12.3978, "eval_samples_per_second": 20.165, "eval_steps_per_second": 2.581, "step": 87 }, { "epoch": 1.408, "grad_norm": 20.458703994750977, "learning_rate": 1.4754098360655739e-05, "loss": 0.7505, "step": 88 }, { "epoch": 1.408, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.4577114427860697, "eval_f1_m": 0.4507745089377742, "eval_loss": 0.7112030982971191, "eval_runtime": 12.4088, "eval_samples_per_second": 20.147, "eval_steps_per_second": 2.579, "step": 88 }, { "epoch": 1.424, "grad_norm": 17.410215377807617, "learning_rate": 1.4344262295081968e-05, "loss": 0.7426, "step": 89 }, { "epoch": 1.424, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.45, "eval_f1_m": 0.4473113054745707, "eval_loss": 0.7114999890327454, "eval_runtime": 12.4068, "eval_samples_per_second": 20.15, "eval_steps_per_second": 2.579, "step": 89 }, { "epoch": 1.44, "grad_norm": 12.000615119934082, "learning_rate": 1.3934426229508196e-05, "loss": 0.8439, "step": 90 }, { "epoch": 1.44, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.45, "eval_f1_m": 0.4473113054745707, "eval_loss": 0.7099843621253967, "eval_runtime": 12.3004, "eval_samples_per_second": 20.325, "eval_steps_per_second": 2.602, "step": 90 }, { "epoch": 1.456, "grad_norm": 9.254725456237793, "learning_rate": 1.3524590163934428e-05, "loss": 0.7011, "step": 91 }, { "epoch": 1.456, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.44102564102564107, "eval_f1_m": 0.44650038170446327, "eval_loss": 0.7092187404632568, "eval_runtime": 12.6142, "eval_samples_per_second": 19.819, "eval_steps_per_second": 2.537, "step": 91 }, { "epoch": 1.472, "grad_norm": 9.848048210144043, "learning_rate": 1.3114754098360657e-05, "loss": 0.689, "step": 92 }, { "epoch": 1.472, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.45263157894736844, "eval_f1_m": 0.4581459923296658, "eval_loss": 0.7082812786102295, "eval_runtime": 12.4011, "eval_samples_per_second": 20.159, "eval_steps_per_second": 2.58, "step": 92 }, { "epoch": 1.488, "grad_norm": 17.459680557250977, "learning_rate": 1.2704918032786885e-05, "loss": 0.8424, "step": 93 }, { "epoch": 1.488, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.450261780104712, "eval_f1_m": 0.45620235773296997, "eval_loss": 0.7073437571525574, "eval_runtime": 12.4025, "eval_samples_per_second": 20.157, "eval_steps_per_second": 2.58, "step": 93 }, { "epoch": 1.504, "grad_norm": 7.91733980178833, "learning_rate": 1.2295081967213116e-05, "loss": 0.6224, "step": 94 }, { "epoch": 1.504, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.4468085106382979, "eval_f1_m": 0.4566639709496852, "eval_loss": 0.7057031393051147, "eval_runtime": 12.4036, "eval_samples_per_second": 20.155, "eval_steps_per_second": 2.58, "step": 94 }, { "epoch": 1.52, "grad_norm": 17.14729881286621, "learning_rate": 1.1885245901639344e-05, "loss": 0.5743, "step": 95 }, { "epoch": 1.52, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.4491978609625668, "eval_f1_m": 0.4579678258249688, "eval_loss": 0.7053046822547913, "eval_runtime": 12.4028, "eval_samples_per_second": 20.157, "eval_steps_per_second": 2.58, "step": 95 }, { "epoch": 1.536, "grad_norm": 10.893065452575684, "learning_rate": 1.1475409836065575e-05, "loss": 0.7041, "step": 96 }, { "epoch": 1.536, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.44324324324324327, "eval_f1_m": 0.45810145070349156, "eval_loss": 0.7051210999488831, "eval_runtime": 12.3907, "eval_samples_per_second": 20.176, "eval_steps_per_second": 2.583, "step": 96 }, { "epoch": 1.552, "grad_norm": 9.716265678405762, "learning_rate": 1.1065573770491803e-05, "loss": 0.6443, "step": 97 }, { "epoch": 1.552, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.4262295081967213, "eval_f1_m": 0.4496709356403233, "eval_loss": 0.7061210870742798, "eval_runtime": 12.2983, "eval_samples_per_second": 20.328, "eval_steps_per_second": 2.602, "step": 97 }, { "epoch": 1.568, "grad_norm": 17.185867309570312, "learning_rate": 1.0655737704918032e-05, "loss": 0.6588, "step": 98 }, { "epoch": 1.568, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.430939226519337, "eval_f1_m": 0.4559391572146673, "eval_loss": 0.704464852809906, "eval_runtime": 12.4036, "eval_samples_per_second": 20.155, "eval_steps_per_second": 2.58, "step": 98 }, { "epoch": 1.584, "grad_norm": 9.352198600769043, "learning_rate": 1.0245901639344262e-05, "loss": 0.6669, "step": 99 }, { "epoch": 1.584, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.41860465116279066, "eval_f1_m": 0.45876048721186774, "eval_loss": 0.7052500247955322, "eval_runtime": 12.2869, "eval_samples_per_second": 20.347, "eval_steps_per_second": 2.604, "step": 99 }, { "epoch": 1.6, "grad_norm": 10.983183860778809, "learning_rate": 9.836065573770493e-06, "loss": 0.667, "step": 100 }, { "epoch": 1.6, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.3976608187134503, "eval_f1_m": 0.44378116214850916, "eval_loss": 0.7053242325782776, "eval_runtime": 12.292, "eval_samples_per_second": 20.338, "eval_steps_per_second": 2.603, "step": 100 }, { "epoch": 1.616, "grad_norm": 8.352677345275879, "learning_rate": 9.426229508196721e-06, "loss": 0.6028, "step": 101 }, { "epoch": 1.616, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.3780487804878049, "eval_f1_m": 0.440360093421318, "eval_loss": 0.7049765586853027, "eval_runtime": 12.3969, "eval_samples_per_second": 20.166, "eval_steps_per_second": 2.581, "step": 101 }, { "epoch": 1.6320000000000001, "grad_norm": 18.418481826782227, "learning_rate": 9.016393442622952e-06, "loss": 0.7084, "step": 102 }, { "epoch": 1.6320000000000001, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.3827160493827161, "eval_f1_m": 0.4446715982430268, "eval_loss": 0.7034218907356262, "eval_runtime": 12.3991, "eval_samples_per_second": 20.163, "eval_steps_per_second": 2.581, "step": 102 }, { "epoch": 1.6480000000000001, "grad_norm": 10.633659362792969, "learning_rate": 8.60655737704918e-06, "loss": 0.6221, "step": 103 }, { "epoch": 1.6480000000000001, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.33766233766233766, "eval_f1_m": 0.42304736080246286, "eval_loss": 0.7042617201805115, "eval_runtime": 12.2954, "eval_samples_per_second": 20.333, "eval_steps_per_second": 2.603, "step": 103 }, { "epoch": 1.6640000000000001, "grad_norm": 12.377849578857422, "learning_rate": 8.196721311475409e-06, "loss": 0.5413, "step": 104 }, { "epoch": 1.6640000000000001, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.3289473684210526, "eval_f1_m": 0.4253797223184978, "eval_loss": 0.7033711075782776, "eval_runtime": 12.3003, "eval_samples_per_second": 20.325, "eval_steps_per_second": 2.602, "step": 104 }, { "epoch": 1.6800000000000002, "grad_norm": 12.551422119140625, "learning_rate": 7.78688524590164e-06, "loss": 0.682, "step": 105 }, { "epoch": 1.6800000000000002, "eval_exact_match": 0.0, "eval_f1_a": 0.2953020134228188, "eval_f1_m": 0.4078515022392573, "eval_loss": 0.7049804925918579, "eval_runtime": 12.495, "eval_samples_per_second": 20.008, "eval_steps_per_second": 2.561, "step": 105 }, { "epoch": 1.696, "grad_norm": 8.237449645996094, "learning_rate": 7.3770491803278695e-06, "loss": 0.7448, "step": 106 }, { "epoch": 1.696, "eval_exact_match": 0.0, "eval_f1_a": 0.3013698630136986, "eval_f1_m": 0.41549131140967865, "eval_loss": 0.7063398361206055, "eval_runtime": 12.4967, "eval_samples_per_second": 20.005, "eval_steps_per_second": 2.561, "step": 106 }, { "epoch": 1.712, "grad_norm": 7.336461067199707, "learning_rate": 6.967213114754098e-06, "loss": 0.4493, "step": 107 }, { "epoch": 1.712, "eval_exact_match": 0.0, "eval_f1_a": 0.2553191489361702, "eval_f1_m": 0.4051889720257066, "eval_loss": 0.7075039148330688, "eval_runtime": 12.403, "eval_samples_per_second": 20.156, "eval_steps_per_second": 2.58, "step": 107 }, { "epoch": 1.728, "grad_norm": 8.348127365112305, "learning_rate": 6.557377049180328e-06, "loss": 0.8977, "step": 108 }, { "epoch": 1.728, "eval_exact_match": 0.0, "eval_f1_a": 0.2608695652173913, "eval_f1_m": 0.4141215926930211, "eval_loss": 0.7066875100135803, "eval_runtime": 12.3958, "eval_samples_per_second": 20.168, "eval_steps_per_second": 2.582, "step": 108 }, { "epoch": 1.744, "grad_norm": 6.762801170349121, "learning_rate": 6.147540983606558e-06, "loss": 0.6888, "step": 109 }, { "epoch": 1.744, "eval_exact_match": 0.0, "eval_f1_a": 0.23529411764705882, "eval_f1_m": 0.4052051689806791, "eval_loss": 0.7079609632492065, "eval_runtime": 12.2957, "eval_samples_per_second": 20.332, "eval_steps_per_second": 2.603, "step": 109 }, { "epoch": 1.76, "grad_norm": 7.54478645324707, "learning_rate": 5.737704918032787e-06, "loss": 0.7079, "step": 110 }, { "epoch": 1.76, "eval_exact_match": 0.0, "eval_f1_a": 0.196969696969697, "eval_f1_m": 0.3930353659945496, "eval_loss": 0.7083203196525574, "eval_runtime": 12.4001, "eval_samples_per_second": 20.161, "eval_steps_per_second": 2.581, "step": 110 }, { "epoch": 1.776, "grad_norm": 6.84658145904541, "learning_rate": 5.327868852459016e-06, "loss": 0.6766, "step": 111 }, { "epoch": 1.776, "eval_exact_match": 0.0, "eval_f1_a": 0.2105263157894737, "eval_f1_m": 0.39676157176157173, "eval_loss": 0.7091640830039978, "eval_runtime": 12.4008, "eval_samples_per_second": 20.16, "eval_steps_per_second": 2.58, "step": 111 }, { "epoch": 1.792, "grad_norm": 16.06954002380371, "learning_rate": 4.918032786885246e-06, "loss": 0.6298, "step": 112 }, { "epoch": 1.792, "eval_exact_match": 0.0, "eval_f1_a": 0.15748031496062992, "eval_f1_m": 0.37861616841208684, "eval_loss": 0.7095195055007935, "eval_runtime": 12.4951, "eval_samples_per_second": 20.008, "eval_steps_per_second": 2.561, "step": 112 }, { "epoch": 1.808, "grad_norm": 5.902178764343262, "learning_rate": 4.508196721311476e-06, "loss": 0.5864, "step": 113 }, { "epoch": 1.808, "eval_exact_match": 0.0, "eval_f1_a": 0.15873015873015875, "eval_f1_m": 0.38075416646845217, "eval_loss": 0.7111679911613464, "eval_runtime": 12.4019, "eval_samples_per_second": 20.158, "eval_steps_per_second": 2.58, "step": 113 }, { "epoch": 1.8239999999999998, "grad_norm": 17.344051361083984, "learning_rate": 4.098360655737704e-06, "loss": 0.7265, "step": 114 }, { "epoch": 1.8239999999999998, "eval_exact_match": 0.0, "eval_f1_a": 0.16, "eval_f1_m": 0.38211471068613934, "eval_loss": 0.7119726538658142, "eval_runtime": 12.3958, "eval_samples_per_second": 20.168, "eval_steps_per_second": 2.582, "step": 114 }, { "epoch": 1.8399999999999999, "grad_norm": 13.185153007507324, "learning_rate": 3.6885245901639347e-06, "loss": 0.5856, "step": 115 }, { "epoch": 1.8399999999999999, "eval_exact_match": 0.0, "eval_f1_a": 0.15873015873015875, "eval_f1_m": 0.37997671262977384, "eval_loss": 0.7116367220878601, "eval_runtime": 12.3902, "eval_samples_per_second": 20.177, "eval_steps_per_second": 2.583, "step": 115 }, { "epoch": 1.8559999999999999, "grad_norm": 6.701231956481934, "learning_rate": 3.278688524590164e-06, "loss": 0.718, "step": 116 }, { "epoch": 1.8559999999999999, "eval_exact_match": 0.0, "eval_f1_a": 0.09836065573770492, "eval_f1_m": 0.361842380975034, "eval_loss": 0.7136015892028809, "eval_runtime": 12.294, "eval_samples_per_second": 20.335, "eval_steps_per_second": 2.603, "step": 116 }, { "epoch": 1.8719999999999999, "grad_norm": 11.730875015258789, "learning_rate": 2.8688524590163937e-06, "loss": 0.7096, "step": 117 }, { "epoch": 1.8719999999999999, "eval_exact_match": 0.0, "eval_f1_a": 0.08333333333333334, "eval_f1_m": 0.35354549079038877, "eval_loss": 0.7125195264816284, "eval_runtime": 12.3992, "eval_samples_per_second": 20.163, "eval_steps_per_second": 2.581, "step": 117 }, { "epoch": 1.888, "grad_norm": 11.764548301696777, "learning_rate": 2.459016393442623e-06, "loss": 0.7648, "step": 118 }, { "epoch": 1.888, "eval_exact_match": 0.0, "eval_f1_a": 0.10169491525423728, "eval_f1_m": 0.36885033900340025, "eval_loss": 0.7145312428474426, "eval_runtime": 12.2937, "eval_samples_per_second": 20.336, "eval_steps_per_second": 2.603, "step": 118 }, { "epoch": 1.904, "grad_norm": 7.357093334197998, "learning_rate": 2.049180327868852e-06, "loss": 0.66, "step": 119 }, { "epoch": 1.904, "eval_exact_match": 0.0, "eval_f1_a": 0.08403361344537816, "eval_f1_m": 0.3581008843763946, "eval_loss": 0.7142617106437683, "eval_runtime": 12.2932, "eval_samples_per_second": 20.336, "eval_steps_per_second": 2.603, "step": 119 }, { "epoch": 1.92, "grad_norm": 12.222306251525879, "learning_rate": 1.639344262295082e-06, "loss": 0.7134, "step": 120 }, { "epoch": 1.92, "eval_exact_match": 0.0, "eval_f1_a": 0.08620689655172413, "eval_f1_m": 0.3664693866224478, "eval_loss": 0.7143398523330688, "eval_runtime": 12.4037, "eval_samples_per_second": 20.155, "eval_steps_per_second": 2.58, "step": 120 }, { "epoch": 1.936, "grad_norm": 11.94378662109375, "learning_rate": 1.2295081967213116e-06, "loss": 0.6269, "step": 121 }, { "epoch": 1.936, "eval_exact_match": 0.0, "eval_f1_a": 0.06837606837606838, "eval_f1_m": 0.35452724712928796, "eval_loss": 0.7161992192268372, "eval_runtime": 12.2894, "eval_samples_per_second": 20.343, "eval_steps_per_second": 2.604, "step": 121 }, { "epoch": 1.952, "grad_norm": 10.898783683776855, "learning_rate": 8.19672131147541e-07, "loss": 0.6692, "step": 122 }, { "epoch": 1.952, "eval_exact_match": 0.0, "eval_f1_a": 0.06779661016949154, "eval_f1_m": 0.35377437355228475, "eval_loss": 0.7155507802963257, "eval_runtime": 12.2897, "eval_samples_per_second": 20.342, "eval_steps_per_second": 2.604, "step": 122 }, { "epoch": 1.968, "grad_norm": 11.34163761138916, "learning_rate": 4.098360655737705e-07, "loss": 0.7837, "step": 123 }, { "epoch": 1.968, "eval_exact_match": 0.0, "eval_f1_a": 0.06837606837606838, "eval_f1_m": 0.3542995836363184, "eval_loss": 0.7164883017539978, "eval_runtime": 12.3974, "eval_samples_per_second": 20.166, "eval_steps_per_second": 2.581, "step": 123 }, { "epoch": 1.984, "grad_norm": 9.480073928833008, "learning_rate": 0.0, "loss": 0.5815, "step": 124 }, { "epoch": 1.984, "eval_exact_match": 0.0, "eval_f1_a": 0.053097345132743355, "eval_f1_m": 0.35416089179594584, "eval_loss": 0.7167656421661377, "eval_runtime": 12.2934, "eval_samples_per_second": 20.336, "eval_steps_per_second": 2.603, "step": 124 }, { "epoch": 1.984, "step": 124, "total_flos": 107566092058624.0, "train_loss": 0.7248697588520665, "train_runtime": 2001.1585, "train_samples_per_second": 0.999, "train_steps_per_second": 0.062 } ], "logging_steps": 1, "max_steps": 124, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 107566092058624.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }