{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 102000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.014705882352941176, "grad_norm": 2.69562029838562, "learning_rate": 4.975490196078432e-05, "loss": 2.0405, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.029411764705882353, "grad_norm": 4.156198978424072, "learning_rate": 4.9509803921568634e-05, "loss": 1.6006, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 1000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.04411764705882353, "grad_norm": 4.104771137237549, "learning_rate": 4.9264705882352944e-05, "loss": 1.4982, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 1500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.058823529411764705, "grad_norm": 5.6248674392700195, "learning_rate": 4.901960784313725e-05, "loss": 1.3869, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 2000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.07352941176470588, "grad_norm": 3.814528226852417, "learning_rate": 4.877450980392157e-05, "loss": 1.361, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 2500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.08823529411764706, "grad_norm": 4.103758811950684, "learning_rate": 4.8529411764705885e-05, "loss": 1.2968, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 3000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.10294117647058823, "grad_norm": 4.155980587005615, "learning_rate": 4.82843137254902e-05, "loss": 1.2537, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 3500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.11764705882352941, "grad_norm": 5.898636341094971, "learning_rate": 4.803921568627452e-05, "loss": 1.201, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 4000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.1323529411764706, "grad_norm": 4.946361541748047, "learning_rate": 4.7794117647058826e-05, "loss": 1.244, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 4500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.14705882352941177, "grad_norm": 3.6475930213928223, "learning_rate": 4.7549019607843135e-05, "loss": 1.2188, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 5000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.16176470588235295, "grad_norm": 6.001659870147705, "learning_rate": 4.730392156862745e-05, "loss": 1.1643, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 5500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.17647058823529413, "grad_norm": 4.4559526443481445, "learning_rate": 4.705882352941177e-05, "loss": 1.1959, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 6000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.19117647058823528, "grad_norm": 1.5984530448913574, "learning_rate": 4.681372549019608e-05, "loss": 1.1349, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 6500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.20588235294117646, "grad_norm": 3.468794345855713, "learning_rate": 4.656862745098039e-05, "loss": 1.1358, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 7000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.22058823529411764, "grad_norm": 5.2478485107421875, "learning_rate": 4.632352941176471e-05, "loss": 1.0712, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 7500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.23529411764705882, "grad_norm": 4.6103739738464355, "learning_rate": 4.607843137254902e-05, "loss": 1.089, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 8000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.25, "grad_norm": 6.615376949310303, "learning_rate": 4.5833333333333334e-05, "loss": 1.0837, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 8500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.2647058823529412, "grad_norm": 8.597518920898438, "learning_rate": 4.558823529411765e-05, "loss": 1.0957, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 9000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.27941176470588236, "grad_norm": 4.671794891357422, "learning_rate": 4.5343137254901966e-05, "loss": 1.0809, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 9500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.29411764705882354, "grad_norm": 6.989711761474609, "learning_rate": 4.5098039215686275e-05, "loss": 1.0722, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 10000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.3088235294117647, "grad_norm": 4.134768486022949, "learning_rate": 4.485294117647059e-05, "loss": 1.06, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 10500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.3235294117647059, "grad_norm": 7.303393840789795, "learning_rate": 4.460784313725491e-05, "loss": 0.9973, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 11000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.3382352941176471, "grad_norm": 5.387753486633301, "learning_rate": 4.4362745098039216e-05, "loss": 1.0572, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 11500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.35294117647058826, "grad_norm": 4.046933650970459, "learning_rate": 4.411764705882353e-05, "loss": 1.0197, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 12000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.36764705882352944, "grad_norm": 7.536805152893066, "learning_rate": 4.387254901960784e-05, "loss": 1.0116, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 12500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.38235294117647056, "grad_norm": 7.579131126403809, "learning_rate": 4.362745098039216e-05, "loss": 1.0145, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 13000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.39705882352941174, "grad_norm": 7.731176376342773, "learning_rate": 4.3382352941176474e-05, "loss": 1.016, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 13500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4117647058823529, "grad_norm": 7.9315690994262695, "learning_rate": 4.313725490196079e-05, "loss": 0.9855, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 14000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4264705882352941, "grad_norm": 3.403750419616699, "learning_rate": 4.28921568627451e-05, "loss": 0.969, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 14500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4411764705882353, "grad_norm": 3.1541624069213867, "learning_rate": 4.2647058823529415e-05, "loss": 0.9733, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 15000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.45588235294117646, "grad_norm": 6.017275333404541, "learning_rate": 4.2401960784313724e-05, "loss": 0.9791, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 15500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.47058823529411764, "grad_norm": 5.537393569946289, "learning_rate": 4.215686274509804e-05, "loss": 0.9449, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 16000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4852941176470588, "grad_norm": 6.600454330444336, "learning_rate": 4.1911764705882356e-05, "loss": 0.9643, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 16500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5, "grad_norm": 5.120028972625732, "learning_rate": 4.166666666666667e-05, "loss": 0.9715, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 17000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5147058823529411, "grad_norm": 3.404690742492676, "learning_rate": 4.142156862745099e-05, "loss": 0.9885, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 17500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5294117647058824, "grad_norm": 2.9108402729034424, "learning_rate": 4.11764705882353e-05, "loss": 0.9777, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 18000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5441176470588235, "grad_norm": 6.1733832359313965, "learning_rate": 4.0931372549019607e-05, "loss": 0.9525, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 18500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5588235294117647, "grad_norm": 2.5774946212768555, "learning_rate": 4.068627450980392e-05, "loss": 0.9244, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 19000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5735294117647058, "grad_norm": 2.9848501682281494, "learning_rate": 4.044117647058824e-05, "loss": 0.9585, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 19500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5882352941176471, "grad_norm": 3.7758054733276367, "learning_rate": 4.0196078431372555e-05, "loss": 0.9449, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 20000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6029411764705882, "grad_norm": 7.037225246429443, "learning_rate": 3.9950980392156864e-05, "loss": 0.9551, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 20500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6176470588235294, "grad_norm": 2.501617670059204, "learning_rate": 3.970588235294117e-05, "loss": 0.9396, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 21000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6323529411764706, "grad_norm": 3.3458075523376465, "learning_rate": 3.946078431372549e-05, "loss": 0.9053, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 21500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6470588235294118, "grad_norm": 6.687346458435059, "learning_rate": 3.9215686274509805e-05, "loss": 0.9441, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 22000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6617647058823529, "grad_norm": 4.228150844573975, "learning_rate": 3.897058823529412e-05, "loss": 0.9013, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 22500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6764705882352942, "grad_norm": 5.184825420379639, "learning_rate": 3.872549019607844e-05, "loss": 0.8978, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 23000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6911764705882353, "grad_norm": 4.4502058029174805, "learning_rate": 3.8480392156862746e-05, "loss": 0.9099, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 23500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7058823529411765, "grad_norm": 3.760995388031006, "learning_rate": 3.8235294117647055e-05, "loss": 0.9156, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 24000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7205882352941176, "grad_norm": 8.011835098266602, "learning_rate": 3.799019607843137e-05, "loss": 0.8686, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 24500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7352941176470589, "grad_norm": 6.8909382820129395, "learning_rate": 3.774509803921569e-05, "loss": 0.8945, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 25000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.75, "grad_norm": 7.357186317443848, "learning_rate": 3.7500000000000003e-05, "loss": 0.8787, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 25500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7647058823529411, "grad_norm": 7.247834205627441, "learning_rate": 3.725490196078432e-05, "loss": 0.8591, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 26000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7794117647058824, "grad_norm": 0.40234723687171936, "learning_rate": 3.700980392156863e-05, "loss": 0.8748, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 26500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7941176470588235, "grad_norm": 8.666840553283691, "learning_rate": 3.6764705882352945e-05, "loss": 0.8754, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 27000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8088235294117647, "grad_norm": 5.52506160736084, "learning_rate": 3.6519607843137254e-05, "loss": 0.8865, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 27500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8235294117647058, "grad_norm": 4.968148231506348, "learning_rate": 3.627450980392157e-05, "loss": 0.8651, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 28000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8382352941176471, "grad_norm": 2.7325375080108643, "learning_rate": 3.6029411764705886e-05, "loss": 0.8577, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 28500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8529411764705882, "grad_norm": 2.859745502471924, "learning_rate": 3.5784313725490195e-05, "loss": 0.8683, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 29000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8676470588235294, "grad_norm": 4.353167533874512, "learning_rate": 3.553921568627451e-05, "loss": 0.8804, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 29500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8823529411764706, "grad_norm": 8.247920989990234, "learning_rate": 3.529411764705883e-05, "loss": 0.8303, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 30000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8970588235294118, "grad_norm": 6.413235664367676, "learning_rate": 3.5049019607843136e-05, "loss": 0.8827, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 30500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9117647058823529, "grad_norm": 6.1504621505737305, "learning_rate": 3.480392156862745e-05, "loss": 0.8411, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 31000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9264705882352942, "grad_norm": 3.2474753856658936, "learning_rate": 3.455882352941177e-05, "loss": 0.8524, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 31500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9411764705882353, "grad_norm": 4.47405481338501, "learning_rate": 3.431372549019608e-05, "loss": 0.8539, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 32000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9558823529411765, "grad_norm": 14.277088165283203, "learning_rate": 3.4068627450980394e-05, "loss": 0.8531, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 32500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9705882352941176, "grad_norm": 0.5225111842155457, "learning_rate": 3.382352941176471e-05, "loss": 0.8423, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 33000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9852941176470589, "grad_norm": 5.679318904876709, "learning_rate": 3.357843137254902e-05, "loss": 0.8335, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 33500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0, "grad_norm": 4.732542514801025, "learning_rate": 3.3333333333333335e-05, "loss": 0.8728, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 34000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0147058823529411, "grad_norm": 5.163674354553223, "learning_rate": 3.308823529411765e-05, "loss": 0.8364, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 34500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0294117647058822, "grad_norm": 4.340713024139404, "learning_rate": 3.284313725490196e-05, "loss": 0.795, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 35000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0441176470588236, "grad_norm": 1.8152037858963013, "learning_rate": 3.2598039215686276e-05, "loss": 0.8103, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 35500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0588235294117647, "grad_norm": 4.854606628417969, "learning_rate": 3.235294117647059e-05, "loss": 0.838, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 36000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0735294117647058, "grad_norm": 9.439714431762695, "learning_rate": 3.210784313725491e-05, "loss": 0.82, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 36500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.088235294117647, "grad_norm": 4.896808624267578, "learning_rate": 3.186274509803922e-05, "loss": 0.854, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 37000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1029411764705883, "grad_norm": 5.517809867858887, "learning_rate": 3.161764705882353e-05, "loss": 0.8085, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 37500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1176470588235294, "grad_norm": 4.532939910888672, "learning_rate": 3.137254901960784e-05, "loss": 0.7907, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 38000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1323529411764706, "grad_norm": 2.4086740016937256, "learning_rate": 3.112745098039216e-05, "loss": 0.767, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 38500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1470588235294117, "grad_norm": 3.4542815685272217, "learning_rate": 3.0882352941176475e-05, "loss": 0.8148, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 39000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.161764705882353, "grad_norm": 3.529639959335327, "learning_rate": 3.063725490196079e-05, "loss": 0.7987, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 39500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1764705882352942, "grad_norm": 4.117021560668945, "learning_rate": 3.0392156862745097e-05, "loss": 0.7714, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 40000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1911764705882353, "grad_norm": 6.675631523132324, "learning_rate": 3.0147058823529413e-05, "loss": 0.7904, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 40500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2058823529411764, "grad_norm": 10.180689811706543, "learning_rate": 2.9901960784313725e-05, "loss": 0.809, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 41000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2205882352941178, "grad_norm": 4.973780632019043, "learning_rate": 2.965686274509804e-05, "loss": 0.8098, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 41500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2352941176470589, "grad_norm": 2.8854587078094482, "learning_rate": 2.9411764705882354e-05, "loss": 0.7711, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 42000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.25, "grad_norm": 3.0195820331573486, "learning_rate": 2.916666666666667e-05, "loss": 0.8153, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 42500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2647058823529411, "grad_norm": 7.375023365020752, "learning_rate": 2.8921568627450986e-05, "loss": 0.7773, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 43000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2794117647058822, "grad_norm": 2.4358959197998047, "learning_rate": 2.8676470588235295e-05, "loss": 0.7846, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 43500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2941176470588236, "grad_norm": 3.8631513118743896, "learning_rate": 2.8431372549019608e-05, "loss": 0.7751, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 44000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3088235294117647, "grad_norm": 4.434110164642334, "learning_rate": 2.8186274509803924e-05, "loss": 0.7435, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 44500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3235294117647058, "grad_norm": 0.8194059133529663, "learning_rate": 2.7941176470588236e-05, "loss": 0.7956, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 45000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3382352941176472, "grad_norm": 13.403727531433105, "learning_rate": 2.7696078431372552e-05, "loss": 0.7502, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 45500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3529411764705883, "grad_norm": 6.71666145324707, "learning_rate": 2.7450980392156865e-05, "loss": 0.7996, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 46000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3676470588235294, "grad_norm": 5.671672344207764, "learning_rate": 2.7205882352941174e-05, "loss": 0.785, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 46500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3823529411764706, "grad_norm": 7.542393207550049, "learning_rate": 2.696078431372549e-05, "loss": 0.751, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 47000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3970588235294117, "grad_norm": 9.15674114227295, "learning_rate": 2.6715686274509806e-05, "loss": 0.7809, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 47500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4117647058823528, "grad_norm": 9.020813941955566, "learning_rate": 2.647058823529412e-05, "loss": 0.775, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 48000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4264705882352942, "grad_norm": 5.843054294586182, "learning_rate": 2.6225490196078435e-05, "loss": 0.7526, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 48500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4411764705882353, "grad_norm": 6.700314521789551, "learning_rate": 2.5980392156862747e-05, "loss": 0.7809, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 49000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4558823529411764, "grad_norm": 4.38136625289917, "learning_rate": 2.5735294117647057e-05, "loss": 0.8103, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 49500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4705882352941178, "grad_norm": 4.148223400115967, "learning_rate": 2.5490196078431373e-05, "loss": 0.749, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 50000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4852941176470589, "grad_norm": 3.625410795211792, "learning_rate": 2.5245098039215685e-05, "loss": 0.7584, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 50500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5, "grad_norm": 4.744174480438232, "learning_rate": 2.5e-05, "loss": 0.7611, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 51000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5147058823529411, "grad_norm": 3.2249341011047363, "learning_rate": 2.4754901960784317e-05, "loss": 0.7571, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 51500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5294117647058822, "grad_norm": 20.33920669555664, "learning_rate": 2.4509803921568626e-05, "loss": 0.7638, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 52000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5441176470588234, "grad_norm": 7.30839729309082, "learning_rate": 2.4264705882352942e-05, "loss": 0.7334, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 52500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5588235294117647, "grad_norm": 7.568194389343262, "learning_rate": 2.401960784313726e-05, "loss": 0.7409, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 53000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5735294117647058, "grad_norm": 5.886373519897461, "learning_rate": 2.3774509803921568e-05, "loss": 0.7632, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 53500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5882352941176472, "grad_norm": 3.079610824584961, "learning_rate": 2.3529411764705884e-05, "loss": 0.7436, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 54000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6029411764705883, "grad_norm": 3.6385908126831055, "learning_rate": 2.3284313725490196e-05, "loss": 0.7572, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 54500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6176470588235294, "grad_norm": 3.804089069366455, "learning_rate": 2.303921568627451e-05, "loss": 0.7404, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 55000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6323529411764706, "grad_norm": 8.9433012008667, "learning_rate": 2.2794117647058825e-05, "loss": 0.7431, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 55500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6470588235294117, "grad_norm": 2.727815628051758, "learning_rate": 2.2549019607843138e-05, "loss": 0.7042, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 56000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6617647058823528, "grad_norm": 10.063603401184082, "learning_rate": 2.2303921568627454e-05, "loss": 0.7107, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 56500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6764705882352942, "grad_norm": 4.510873317718506, "learning_rate": 2.2058823529411766e-05, "loss": 0.7535, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 57000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6911764705882353, "grad_norm": 5.605432510375977, "learning_rate": 2.181372549019608e-05, "loss": 0.7505, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 57500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7058823529411766, "grad_norm": 0.6501856446266174, "learning_rate": 2.1568627450980395e-05, "loss": 0.7346, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 58000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7205882352941178, "grad_norm": 5.737814903259277, "learning_rate": 2.1323529411764707e-05, "loss": 0.7569, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 58500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7352941176470589, "grad_norm": 8.297637939453125, "learning_rate": 2.107843137254902e-05, "loss": 0.7421, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 59000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.75, "grad_norm": 4.928978443145752, "learning_rate": 2.0833333333333336e-05, "loss": 0.7456, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 59500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7647058823529411, "grad_norm": 2.3863282203674316, "learning_rate": 2.058823529411765e-05, "loss": 0.7474, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 60000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7794117647058822, "grad_norm": 4.122894287109375, "learning_rate": 2.034313725490196e-05, "loss": 0.7338, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 60500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7941176470588234, "grad_norm": 3.6892104148864746, "learning_rate": 2.0098039215686277e-05, "loss": 0.725, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 61000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.8088235294117647, "grad_norm": 6.866763591766357, "learning_rate": 1.9852941176470586e-05, "loss": 0.7241, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 61500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.8235294117647058, "grad_norm": 1.1342861652374268, "learning_rate": 1.9607843137254903e-05, "loss": 0.7295, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 62000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.8382352941176472, "grad_norm": 2.4428770542144775, "learning_rate": 1.936274509803922e-05, "loss": 0.733, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 62500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.8529411764705883, "grad_norm": 9.717529296875, "learning_rate": 1.9117647058823528e-05, "loss": 0.7384, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 63000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.8676470588235294, "grad_norm": 2.8925726413726807, "learning_rate": 1.8872549019607844e-05, "loss": 0.7347, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 63500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.8823529411764706, "grad_norm": 3.295344352722168, "learning_rate": 1.862745098039216e-05, "loss": 0.6938, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 64000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.8970588235294117, "grad_norm": 2.7375340461730957, "learning_rate": 1.8382352941176472e-05, "loss": 0.6905, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 64500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.9117647058823528, "grad_norm": 6.166092395782471, "learning_rate": 1.8137254901960785e-05, "loss": 0.6869, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 65000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.9264705882352942, "grad_norm": 6.4301958084106445, "learning_rate": 1.7892156862745098e-05, "loss": 0.6835, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 65500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.9411764705882353, "grad_norm": 4.680153846740723, "learning_rate": 1.7647058823529414e-05, "loss": 0.7159, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 66000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.9558823529411766, "grad_norm": 7.453646183013916, "learning_rate": 1.7401960784313726e-05, "loss": 0.7334, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 66500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.9705882352941178, "grad_norm": 5.627362251281738, "learning_rate": 1.715686274509804e-05, "loss": 0.6815, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 67000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.9852941176470589, "grad_norm": 11.986687660217285, "learning_rate": 1.6911764705882355e-05, "loss": 0.6722, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 67500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.0, "grad_norm": 4.421992778778076, "learning_rate": 1.6666666666666667e-05, "loss": 0.673, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 68000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.014705882352941, "grad_norm": 7.424468517303467, "learning_rate": 1.642156862745098e-05, "loss": 0.6993, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 68500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.0294117647058822, "grad_norm": 5.982107639312744, "learning_rate": 1.6176470588235296e-05, "loss": 0.6506, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 69000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.0441176470588234, "grad_norm": 1.392773151397705, "learning_rate": 1.593137254901961e-05, "loss": 0.6809, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 69500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.0588235294117645, "grad_norm": 9.153546333312988, "learning_rate": 1.568627450980392e-05, "loss": 0.7213, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 70000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.073529411764706, "grad_norm": 7.502923965454102, "learning_rate": 1.5441176470588237e-05, "loss": 0.6766, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 70500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.088235294117647, "grad_norm": 5.26950216293335, "learning_rate": 1.5196078431372548e-05, "loss": 0.6549, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 71000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.1029411764705883, "grad_norm": 4.774547100067139, "learning_rate": 1.4950980392156863e-05, "loss": 0.6979, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 71500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.1176470588235294, "grad_norm": 6.183305740356445, "learning_rate": 1.4705882352941177e-05, "loss": 0.6647, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 72000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.1323529411764706, "grad_norm": 6.1788811683654785, "learning_rate": 1.4460784313725493e-05, "loss": 0.7104, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 72500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.1470588235294117, "grad_norm": 6.638744831085205, "learning_rate": 1.4215686274509804e-05, "loss": 0.6424, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 73000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.161764705882353, "grad_norm": 11.812053680419922, "learning_rate": 1.3970588235294118e-05, "loss": 0.7078, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 73500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.176470588235294, "grad_norm": 7.511487007141113, "learning_rate": 1.3725490196078432e-05, "loss": 0.6519, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 74000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.1911764705882355, "grad_norm": 4.150969982147217, "learning_rate": 1.3480392156862745e-05, "loss": 0.6617, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 74500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.2058823529411766, "grad_norm": 2.6986491680145264, "learning_rate": 1.323529411764706e-05, "loss": 0.6712, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 75000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.2205882352941178, "grad_norm": 5.613584041595459, "learning_rate": 1.2990196078431374e-05, "loss": 0.6358, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 75500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.235294117647059, "grad_norm": 4.334458351135254, "learning_rate": 1.2745098039215686e-05, "loss": 0.7113, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 76000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.25, "grad_norm": 8.442917823791504, "learning_rate": 1.25e-05, "loss": 0.6613, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 76500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.264705882352941, "grad_norm": 5.224609851837158, "learning_rate": 1.2254901960784313e-05, "loss": 0.6748, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 77000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.2794117647058822, "grad_norm": 2.9958598613739014, "learning_rate": 1.200980392156863e-05, "loss": 0.6934, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 77500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.2941176470588234, "grad_norm": 7.674880027770996, "learning_rate": 1.1764705882352942e-05, "loss": 0.6294, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 78000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.3088235294117645, "grad_norm": 4.707705497741699, "learning_rate": 1.1519607843137254e-05, "loss": 0.6297, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 78500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.323529411764706, "grad_norm": 4.538410663604736, "learning_rate": 1.1274509803921569e-05, "loss": 0.6277, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 79000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.338235294117647, "grad_norm": 5.959723949432373, "learning_rate": 1.1029411764705883e-05, "loss": 0.6544, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 79500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.3529411764705883, "grad_norm": 3.681849479675293, "learning_rate": 1.0784313725490197e-05, "loss": 0.6407, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 80000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.3676470588235294, "grad_norm": 1.0268243551254272, "learning_rate": 1.053921568627451e-05, "loss": 0.6442, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 80500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.3823529411764706, "grad_norm": 3.558901786804199, "learning_rate": 1.0294117647058824e-05, "loss": 0.6604, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 81000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.3970588235294117, "grad_norm": 4.599427223205566, "learning_rate": 1.0049019607843139e-05, "loss": 0.6193, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 81500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.411764705882353, "grad_norm": 3.1800103187561035, "learning_rate": 9.803921568627451e-06, "loss": 0.6674, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 82000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.426470588235294, "grad_norm": 3.8055174350738525, "learning_rate": 9.558823529411764e-06, "loss": 0.6839, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 82500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.4411764705882355, "grad_norm": 2.8182101249694824, "learning_rate": 9.31372549019608e-06, "loss": 0.6537, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 83000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.4558823529411766, "grad_norm": 6.889364719390869, "learning_rate": 9.068627450980392e-06, "loss": 0.659, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 83500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.4705882352941178, "grad_norm": 7.853886604309082, "learning_rate": 8.823529411764707e-06, "loss": 0.67, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 84000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.485294117647059, "grad_norm": 8.194958686828613, "learning_rate": 8.57843137254902e-06, "loss": 0.6585, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 84500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.5, "grad_norm": 4.7262797355651855, "learning_rate": 8.333333333333334e-06, "loss": 0.6413, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 85000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.514705882352941, "grad_norm": 7.074121952056885, "learning_rate": 8.088235294117648e-06, "loss": 0.6184, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 85500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.5294117647058822, "grad_norm": 5.776811599731445, "learning_rate": 7.84313725490196e-06, "loss": 0.6196, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 86000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.5441176470588234, "grad_norm": 1.1076288223266602, "learning_rate": 7.598039215686274e-06, "loss": 0.6491, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 86500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.5588235294117645, "grad_norm": 4.6030354499816895, "learning_rate": 7.3529411764705884e-06, "loss": 0.668, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 87000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.5735294117647056, "grad_norm": 4.7051682472229, "learning_rate": 7.107843137254902e-06, "loss": 0.6404, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 87500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.588235294117647, "grad_norm": 5.557882308959961, "learning_rate": 6.862745098039216e-06, "loss": 0.6326, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 88000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.6029411764705883, "grad_norm": 6.156619548797607, "learning_rate": 6.61764705882353e-06, "loss": 0.6307, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 88500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.6176470588235294, "grad_norm": 6.322505474090576, "learning_rate": 6.372549019607843e-06, "loss": 0.6219, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 89000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.6323529411764706, "grad_norm": 9.32959270477295, "learning_rate": 6.127450980392157e-06, "loss": 0.6621, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 89500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.6470588235294117, "grad_norm": 6.305071830749512, "learning_rate": 5.882352941176471e-06, "loss": 0.6659, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 90000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.661764705882353, "grad_norm": 6.671431064605713, "learning_rate": 5.637254901960784e-06, "loss": 0.6363, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 90500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.6764705882352944, "grad_norm": 7.769582748413086, "learning_rate": 5.392156862745099e-06, "loss": 0.5968, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 91000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.6911764705882355, "grad_norm": 5.684257507324219, "learning_rate": 5.147058823529412e-06, "loss": 0.6371, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 91500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.7058823529411766, "grad_norm": 2.3605005741119385, "learning_rate": 4.901960784313726e-06, "loss": 0.6213, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 92000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.7205882352941178, "grad_norm": 2.693053960800171, "learning_rate": 4.65686274509804e-06, "loss": 0.6121, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 92500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.735294117647059, "grad_norm": 5.366748332977295, "learning_rate": 4.411764705882353e-06, "loss": 0.6359, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 93000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.75, "grad_norm": 10.606904983520508, "learning_rate": 4.166666666666667e-06, "loss": 0.626, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 93500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.764705882352941, "grad_norm": 3.8781898021698, "learning_rate": 3.92156862745098e-06, "loss": 0.6125, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 94000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.7794117647058822, "grad_norm": 5.4950480461120605, "learning_rate": 3.6764705882352942e-06, "loss": 0.6427, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 94500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.7941176470588234, "grad_norm": 9.753504753112793, "learning_rate": 3.431372549019608e-06, "loss": 0.609, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 95000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.8088235294117645, "grad_norm": 8.253406524658203, "learning_rate": 3.1862745098039216e-06, "loss": 0.6151, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 95500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.8235294117647056, "grad_norm": 2.7147912979125977, "learning_rate": 2.9411764705882355e-06, "loss": 0.6456, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 96000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.838235294117647, "grad_norm": 5.022973537445068, "learning_rate": 2.6960784313725493e-06, "loss": 0.6312, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 96500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.8529411764705883, "grad_norm": 12.746963500976562, "learning_rate": 2.450980392156863e-06, "loss": 0.6035, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 97000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.8676470588235294, "grad_norm": 4.809776306152344, "learning_rate": 2.2058823529411767e-06, "loss": 0.6058, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 97500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.8823529411764706, "grad_norm": 7.464942455291748, "learning_rate": 1.96078431372549e-06, "loss": 0.6275, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 98000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.8970588235294117, "grad_norm": 4.378190517425537, "learning_rate": 1.715686274509804e-06, "loss": 0.6096, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 98500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.911764705882353, "grad_norm": 6.932071685791016, "learning_rate": 1.4705882352941177e-06, "loss": 0.6174, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 99000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.9264705882352944, "grad_norm": 1.794121503829956, "learning_rate": 1.2254901960784314e-06, "loss": 0.6046, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 99500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.9411764705882355, "grad_norm": 6.8889055252075195, "learning_rate": 9.80392156862745e-07, "loss": 0.6232, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 100000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.9558823529411766, "grad_norm": 5.478596210479736, "learning_rate": 7.352941176470589e-07, "loss": 0.6378, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 100500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.9705882352941178, "grad_norm": 6.872597694396973, "learning_rate": 4.901960784313725e-07, "loss": 0.6229, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 101000, "total_memory_available (GB)": 94.62 }, { "epoch": 2.985294117647059, "grad_norm": 10.850652694702148, "learning_rate": 2.4509803921568627e-07, "loss": 0.6352, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 101500, "total_memory_available (GB)": 94.62 }, { "epoch": 3.0, "grad_norm": 4.699875831604004, "learning_rate": 0.0, "loss": 0.5992, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 102000, "total_memory_available (GB)": 94.62 }, { "epoch": 3.0, "max_memory_allocated (GB)": 2.09, "memory_allocated (GB)": 1.44, "step": 102000, "total_flos": 6.324139790696448e+19, "total_memory_available (GB)": 94.62, "train_loss": 0.8091567421707453, "train_runtime": 5702.0513, "train_samples_per_second": 143.106, "train_steps_per_second": 17.888 } ], "logging_steps": 500, "max_steps": 102000, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.324139790696448e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }