{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.0, "eval_steps": 500, "global_step": 1960, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04081632653061224, "grad_norm": 9.978917121887207, "learning_rate": 9.948979591836737e-06, "loss": 0.2617, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 10, "total_memory_available (GB)": 94.62 }, { "epoch": 0.08163265306122448, "grad_norm": 7.48874044418335, "learning_rate": 9.89795918367347e-06, "loss": 0.1563, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 20, "total_memory_available (GB)": 94.62 }, { "epoch": 0.12244897959183673, "grad_norm": 5.081777572631836, "learning_rate": 9.846938775510205e-06, "loss": 0.1254, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 30, "total_memory_available (GB)": 94.62 }, { "epoch": 0.16326530612244897, "grad_norm": 4.443576812744141, "learning_rate": 9.795918367346939e-06, "loss": 0.1113, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 40, "total_memory_available (GB)": 94.62 }, { "epoch": 0.20408163265306123, "grad_norm": 4.356841087341309, "learning_rate": 9.744897959183674e-06, "loss": 0.1257, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 50, "total_memory_available (GB)": 94.62 }, { "epoch": 0.24489795918367346, "grad_norm": 1.98320472240448, "learning_rate": 9.693877551020408e-06, "loss": 0.0819, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 60, "total_memory_available (GB)": 94.62 }, { "epoch": 0.2857142857142857, "grad_norm": 3.809190273284912, "learning_rate": 9.642857142857144e-06, "loss": 0.1032, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 70, "total_memory_available (GB)": 94.62 }, { "epoch": 0.32653061224489793, "grad_norm": 1.645442247390747, "learning_rate": 9.591836734693878e-06, "loss": 0.1124, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 80, "total_memory_available (GB)": 94.62 }, { "epoch": 0.3673469387755102, "grad_norm": 3.7085306644439697, "learning_rate": 9.540816326530612e-06, "loss": 0.0847, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 90, "total_memory_available (GB)": 94.62 }, { "epoch": 0.40816326530612246, "grad_norm": 3.9240212440490723, "learning_rate": 9.489795918367348e-06, "loss": 0.0753, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 100, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4489795918367347, "grad_norm": 3.737152338027954, "learning_rate": 9.438775510204082e-06, "loss": 0.0723, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 110, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4897959183673469, "grad_norm": 2.308751344680786, "learning_rate": 9.387755102040818e-06, "loss": 0.0699, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 120, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5306122448979592, "grad_norm": 2.3706369400024414, "learning_rate": 9.336734693877552e-06, "loss": 0.0589, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 130, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5714285714285714, "grad_norm": 0.968673050403595, "learning_rate": 9.285714285714288e-06, "loss": 0.0503, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 140, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6122448979591837, "grad_norm": 1.9251790046691895, "learning_rate": 9.234693877551022e-06, "loss": 0.0628, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 150, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6530612244897959, "grad_norm": 1.7473604679107666, "learning_rate": 9.183673469387756e-06, "loss": 0.0708, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 160, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6938775510204082, "grad_norm": 1.9279741048812866, "learning_rate": 9.13265306122449e-06, "loss": 0.075, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 170, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7346938775510204, "grad_norm": 1.4570097923278809, "learning_rate": 9.081632653061225e-06, "loss": 0.0614, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 180, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7755102040816326, "grad_norm": 2.014692544937134, "learning_rate": 9.03061224489796e-06, "loss": 0.058, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 190, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8163265306122449, "grad_norm": 1.7634117603302002, "learning_rate": 8.979591836734695e-06, "loss": 0.0629, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 200, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8571428571428571, "grad_norm": 0.7091555595397949, "learning_rate": 8.92857142857143e-06, "loss": 0.0637, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 210, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8979591836734694, "grad_norm": 1.073096752166748, "learning_rate": 8.877551020408163e-06, "loss": 0.0603, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 220, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9387755102040817, "grad_norm": 0.7938856482505798, "learning_rate": 8.826530612244899e-06, "loss": 0.0538, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 230, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9795918367346939, "grad_norm": 1.192353367805481, "learning_rate": 8.775510204081633e-06, "loss": 0.0493, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 240, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0204081632653061, "grad_norm": 0.9369480013847351, "learning_rate": 8.724489795918369e-06, "loss": 0.0595, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 250, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0612244897959184, "grad_norm": 1.2866365909576416, "learning_rate": 8.673469387755103e-06, "loss": 0.0532, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 260, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1020408163265305, "grad_norm": 16.09465980529785, "learning_rate": 8.622448979591837e-06, "loss": 0.0663, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 270, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1428571428571428, "grad_norm": 2.3071987628936768, "learning_rate": 8.571428571428571e-06, "loss": 0.0633, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 280, "total_memory_available (GB)": 94.62 }, { "epoch": 1.183673469387755, "grad_norm": 1.2905592918395996, "learning_rate": 8.520408163265307e-06, "loss": 0.0498, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 290, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2244897959183674, "grad_norm": 1.4856091737747192, "learning_rate": 8.469387755102042e-06, "loss": 0.0639, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 300, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2653061224489797, "grad_norm": 0.5537325739860535, "learning_rate": 8.418367346938776e-06, "loss": 0.0673, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 310, "total_memory_available (GB)": 94.62 }, { "epoch": 1.306122448979592, "grad_norm": 1.2954118251800537, "learning_rate": 8.36734693877551e-06, "loss": 0.0505, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 320, "total_memory_available (GB)": 94.62 }, { "epoch": 1.346938775510204, "grad_norm": 0.6809917092323303, "learning_rate": 8.316326530612246e-06, "loss": 0.0623, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 330, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3877551020408163, "grad_norm": 2.596815586090088, "learning_rate": 8.26530612244898e-06, "loss": 0.0552, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 340, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4285714285714286, "grad_norm": 1.0378236770629883, "learning_rate": 8.214285714285714e-06, "loss": 0.049, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 350, "total_memory_available (GB)": 94.62 }, { "epoch": 1.469387755102041, "grad_norm": 1.7493040561676025, "learning_rate": 8.16326530612245e-06, "loss": 0.0465, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 360, "total_memory_available (GB)": 94.62 }, { "epoch": 1.510204081632653, "grad_norm": 1.149561882019043, "learning_rate": 8.112244897959184e-06, "loss": 0.0584, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 370, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5510204081632653, "grad_norm": 0.8010720014572144, "learning_rate": 8.06122448979592e-06, "loss": 0.047, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 380, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5918367346938775, "grad_norm": 0.7010307908058167, "learning_rate": 8.010204081632654e-06, "loss": 0.0649, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 390, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6326530612244898, "grad_norm": 2.022503137588501, "learning_rate": 7.959183673469388e-06, "loss": 0.0612, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 400, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6734693877551021, "grad_norm": 1.3006742000579834, "learning_rate": 7.908163265306124e-06, "loss": 0.0605, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 410, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7142857142857144, "grad_norm": 1.513334035873413, "learning_rate": 7.857142857142858e-06, "loss": 0.054, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 420, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7551020408163265, "grad_norm": 0.28943702578544617, "learning_rate": 7.806122448979593e-06, "loss": 0.0673, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 430, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7959183673469388, "grad_norm": 1.2818681001663208, "learning_rate": 7.755102040816327e-06, "loss": 0.0614, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 440, "total_memory_available (GB)": 94.62 }, { "epoch": 1.836734693877551, "grad_norm": 0.5026584267616272, "learning_rate": 7.704081632653061e-06, "loss": 0.0443, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 450, "total_memory_available (GB)": 94.62 }, { "epoch": 1.8775510204081631, "grad_norm": 0.400056391954422, "learning_rate": 7.653061224489796e-06, "loss": 0.054, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 460, "total_memory_available (GB)": 94.62 }, { "epoch": 1.9183673469387754, "grad_norm": 0.7661588191986084, "learning_rate": 7.602040816326531e-06, "loss": 0.0439, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 470, "total_memory_available (GB)": 94.62 }, { "epoch": 1.9591836734693877, "grad_norm": 0.3066469728946686, "learning_rate": 7.551020408163265e-06, "loss": 0.0511, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 480, "total_memory_available (GB)": 94.62 }, { "epoch": 2.0, "grad_norm": 1.1751477718353271, "learning_rate": 7.500000000000001e-06, "loss": 0.0644, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 490, "total_memory_available (GB)": 94.62 }, { "epoch": 2.0408163265306123, "grad_norm": 0.6497346758842468, "learning_rate": 7.448979591836736e-06, "loss": 0.0596, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.0816326530612246, "grad_norm": 0.585145890712738, "learning_rate": 7.39795918367347e-06, "loss": 0.0502, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 510, "total_memory_available (GB)": 94.62 }, { "epoch": 2.122448979591837, "grad_norm": 1.0224946737289429, "learning_rate": 7.346938775510205e-06, "loss": 0.0462, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 520, "total_memory_available (GB)": 94.62 }, { "epoch": 2.163265306122449, "grad_norm": 0.9922281503677368, "learning_rate": 7.295918367346939e-06, "loss": 0.063, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 530, "total_memory_available (GB)": 94.62 }, { "epoch": 2.204081632653061, "grad_norm": 0.7550894618034363, "learning_rate": 7.244897959183675e-06, "loss": 0.0595, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 540, "total_memory_available (GB)": 94.62 }, { "epoch": 2.2448979591836733, "grad_norm": 1.000552773475647, "learning_rate": 7.193877551020409e-06, "loss": 0.0645, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 550, "total_memory_available (GB)": 94.62 }, { "epoch": 2.2857142857142856, "grad_norm": 0.7375513315200806, "learning_rate": 7.1428571428571436e-06, "loss": 0.0597, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 560, "total_memory_available (GB)": 94.62 }, { "epoch": 2.326530612244898, "grad_norm": 0.7129970192909241, "learning_rate": 7.091836734693878e-06, "loss": 0.0603, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 570, "total_memory_available (GB)": 94.62 }, { "epoch": 2.36734693877551, "grad_norm": 0.8948765993118286, "learning_rate": 7.0408163265306125e-06, "loss": 0.0673, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 580, "total_memory_available (GB)": 94.62 }, { "epoch": 2.4081632653061225, "grad_norm": 0.4436047375202179, "learning_rate": 6.989795918367348e-06, "loss": 0.0547, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 590, "total_memory_available (GB)": 94.62 }, { "epoch": 2.4489795918367347, "grad_norm": 2.562260627746582, "learning_rate": 6.938775510204082e-06, "loss": 0.044, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 600, "total_memory_available (GB)": 94.62 }, { "epoch": 2.489795918367347, "grad_norm": 2.5978403091430664, "learning_rate": 6.887755102040817e-06, "loss": 0.042, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 610, "total_memory_available (GB)": 94.62 }, { "epoch": 2.5306122448979593, "grad_norm": 0.8350633978843689, "learning_rate": 6.836734693877551e-06, "loss": 0.0429, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 620, "total_memory_available (GB)": 94.62 }, { "epoch": 2.571428571428571, "grad_norm": 1.0908092260360718, "learning_rate": 6.785714285714287e-06, "loss": 0.0815, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 630, "total_memory_available (GB)": 94.62 }, { "epoch": 2.612244897959184, "grad_norm": 1.411789059638977, "learning_rate": 6.734693877551021e-06, "loss": 0.0506, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 640, "total_memory_available (GB)": 94.62 }, { "epoch": 2.6530612244897958, "grad_norm": 0.7262634038925171, "learning_rate": 6.683673469387756e-06, "loss": 0.0486, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 650, "total_memory_available (GB)": 94.62 }, { "epoch": 2.693877551020408, "grad_norm": 0.6718008518218994, "learning_rate": 6.63265306122449e-06, "loss": 0.0478, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 660, "total_memory_available (GB)": 94.62 }, { "epoch": 2.7346938775510203, "grad_norm": 0.6992954015731812, "learning_rate": 6.581632653061225e-06, "loss": 0.0876, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 670, "total_memory_available (GB)": 94.62 }, { "epoch": 2.7755102040816326, "grad_norm": 3.032949447631836, "learning_rate": 6.530612244897959e-06, "loss": 0.0497, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 680, "total_memory_available (GB)": 94.62 }, { "epoch": 2.816326530612245, "grad_norm": 0.544232189655304, "learning_rate": 6.4795918367346946e-06, "loss": 0.0456, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 690, "total_memory_available (GB)": 94.62 }, { "epoch": 2.857142857142857, "grad_norm": 0.489704430103302, "learning_rate": 6.4285714285714295e-06, "loss": 0.0438, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 700, "total_memory_available (GB)": 94.62 }, { "epoch": 2.8979591836734695, "grad_norm": 0.7447965741157532, "learning_rate": 6.3775510204081635e-06, "loss": 0.0557, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 710, "total_memory_available (GB)": 94.62 }, { "epoch": 2.938775510204082, "grad_norm": 3.607469081878662, "learning_rate": 6.326530612244899e-06, "loss": 0.059, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 720, "total_memory_available (GB)": 94.62 }, { "epoch": 2.979591836734694, "grad_norm": 0.2003553807735443, "learning_rate": 6.275510204081633e-06, "loss": 0.0349, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 730, "total_memory_available (GB)": 94.62 }, { "epoch": 3.020408163265306, "grad_norm": 1.135377287864685, "learning_rate": 6.224489795918368e-06, "loss": 0.0549, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 740, "total_memory_available (GB)": 94.62 }, { "epoch": 3.061224489795918, "grad_norm": 0.9238697290420532, "learning_rate": 6.173469387755102e-06, "loss": 0.0627, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 750, "total_memory_available (GB)": 94.62 }, { "epoch": 3.1020408163265305, "grad_norm": 0.7442536354064941, "learning_rate": 6.122448979591837e-06, "loss": 0.0528, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 760, "total_memory_available (GB)": 94.62 }, { "epoch": 3.142857142857143, "grad_norm": 0.6410558819770813, "learning_rate": 6.071428571428571e-06, "loss": 0.0707, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 770, "total_memory_available (GB)": 94.62 }, { "epoch": 3.183673469387755, "grad_norm": 0.4915910065174103, "learning_rate": 6.020408163265307e-06, "loss": 0.0659, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 780, "total_memory_available (GB)": 94.62 }, { "epoch": 3.2244897959183674, "grad_norm": 0.33948495984077454, "learning_rate": 5.969387755102042e-06, "loss": 0.0535, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 790, "total_memory_available (GB)": 94.62 }, { "epoch": 3.2653061224489797, "grad_norm": 0.9314869046211243, "learning_rate": 5.918367346938776e-06, "loss": 0.0443, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 800, "total_memory_available (GB)": 94.62 }, { "epoch": 3.306122448979592, "grad_norm": 0.9704706072807312, "learning_rate": 5.867346938775511e-06, "loss": 0.0562, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 810, "total_memory_available (GB)": 94.62 }, { "epoch": 3.3469387755102042, "grad_norm": 0.8564426898956299, "learning_rate": 5.816326530612246e-06, "loss": 0.0466, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 820, "total_memory_available (GB)": 94.62 }, { "epoch": 3.387755102040816, "grad_norm": 0.31214070320129395, "learning_rate": 5.7653061224489805e-06, "loss": 0.0488, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 830, "total_memory_available (GB)": 94.62 }, { "epoch": 3.4285714285714284, "grad_norm": 0.40054649114608765, "learning_rate": 5.7142857142857145e-06, "loss": 0.0536, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 840, "total_memory_available (GB)": 94.62 }, { "epoch": 3.4693877551020407, "grad_norm": 0.476951003074646, "learning_rate": 5.663265306122449e-06, "loss": 0.0819, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 850, "total_memory_available (GB)": 94.62 }, { "epoch": 3.510204081632653, "grad_norm": 1.075916051864624, "learning_rate": 5.6122448979591834e-06, "loss": 0.0451, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 860, "total_memory_available (GB)": 94.62 }, { "epoch": 3.5510204081632653, "grad_norm": 0.4422233998775482, "learning_rate": 5.561224489795919e-06, "loss": 0.065, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 870, "total_memory_available (GB)": 94.62 }, { "epoch": 3.5918367346938775, "grad_norm": 0.7247931361198425, "learning_rate": 5.510204081632653e-06, "loss": 0.0442, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 880, "total_memory_available (GB)": 94.62 }, { "epoch": 3.63265306122449, "grad_norm": 0.18422362208366394, "learning_rate": 5.459183673469388e-06, "loss": 0.0295, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 890, "total_memory_available (GB)": 94.62 }, { "epoch": 3.673469387755102, "grad_norm": 0.6566686034202576, "learning_rate": 5.408163265306123e-06, "loss": 0.0527, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 900, "total_memory_available (GB)": 94.62 }, { "epoch": 3.7142857142857144, "grad_norm": 0.7151392698287964, "learning_rate": 5.357142857142857e-06, "loss": 0.0614, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 910, "total_memory_available (GB)": 94.62 }, { "epoch": 3.7551020408163263, "grad_norm": 0.1488690972328186, "learning_rate": 5.306122448979593e-06, "loss": 0.0546, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 920, "total_memory_available (GB)": 94.62 }, { "epoch": 3.795918367346939, "grad_norm": 0.472126841545105, "learning_rate": 5.255102040816327e-06, "loss": 0.0514, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 930, "total_memory_available (GB)": 94.62 }, { "epoch": 3.836734693877551, "grad_norm": 0.8530511260032654, "learning_rate": 5.204081632653062e-06, "loss": 0.049, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 940, "total_memory_available (GB)": 94.62 }, { "epoch": 3.877551020408163, "grad_norm": 1.6832056045532227, "learning_rate": 5.153061224489796e-06, "loss": 0.0603, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 950, "total_memory_available (GB)": 94.62 }, { "epoch": 3.9183673469387754, "grad_norm": 0.30192047357559204, "learning_rate": 5.1020408163265315e-06, "loss": 0.0512, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 960, "total_memory_available (GB)": 94.62 }, { "epoch": 3.9591836734693877, "grad_norm": 1.3734880685806274, "learning_rate": 5.0510204081632655e-06, "loss": 0.0756, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 970, "total_memory_available (GB)": 94.62 }, { "epoch": 4.0, "grad_norm": 0.7525829672813416, "learning_rate": 5e-06, "loss": 0.0715, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 980, "total_memory_available (GB)": 94.62 }, { "epoch": 4.040816326530612, "grad_norm": 0.4748665690422058, "learning_rate": 4.948979591836735e-06, "loss": 0.0487, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 990, "total_memory_available (GB)": 94.62 }, { "epoch": 4.081632653061225, "grad_norm": 1.340325117111206, "learning_rate": 4.897959183673469e-06, "loss": 0.0638, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1000, "total_memory_available (GB)": 94.62 }, { "epoch": 4.122448979591836, "grad_norm": 0.5442948937416077, "learning_rate": 4.846938775510204e-06, "loss": 0.0642, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1010, "total_memory_available (GB)": 94.62 }, { "epoch": 4.163265306122449, "grad_norm": 0.3119046688079834, "learning_rate": 4.795918367346939e-06, "loss": 0.0411, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1020, "total_memory_available (GB)": 94.62 }, { "epoch": 4.204081632653061, "grad_norm": 0.7393902540206909, "learning_rate": 4.744897959183674e-06, "loss": 0.0544, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1030, "total_memory_available (GB)": 94.62 }, { "epoch": 4.244897959183674, "grad_norm": 0.5412510633468628, "learning_rate": 4.693877551020409e-06, "loss": 0.0406, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1040, "total_memory_available (GB)": 94.62 }, { "epoch": 4.285714285714286, "grad_norm": 0.6453996300697327, "learning_rate": 4.642857142857144e-06, "loss": 0.0499, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1050, "total_memory_available (GB)": 94.62 }, { "epoch": 4.326530612244898, "grad_norm": 0.3400985896587372, "learning_rate": 4.591836734693878e-06, "loss": 0.046, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1060, "total_memory_available (GB)": 94.62 }, { "epoch": 4.36734693877551, "grad_norm": 0.5143836736679077, "learning_rate": 4.540816326530613e-06, "loss": 0.0494, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1070, "total_memory_available (GB)": 94.62 }, { "epoch": 4.408163265306122, "grad_norm": 0.38877835869789124, "learning_rate": 4.489795918367348e-06, "loss": 0.0526, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1080, "total_memory_available (GB)": 94.62 }, { "epoch": 4.448979591836735, "grad_norm": 0.38251811265945435, "learning_rate": 4.438775510204082e-06, "loss": 0.051, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1090, "total_memory_available (GB)": 94.62 }, { "epoch": 4.489795918367347, "grad_norm": 0.3022618889808655, "learning_rate": 4.3877551020408165e-06, "loss": 0.0368, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1100, "total_memory_available (GB)": 94.62 }, { "epoch": 4.530612244897959, "grad_norm": 0.12300197780132294, "learning_rate": 4.336734693877551e-06, "loss": 0.0474, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1110, "total_memory_available (GB)": 94.62 }, { "epoch": 4.571428571428571, "grad_norm": 0.7275770902633667, "learning_rate": 4.2857142857142855e-06, "loss": 0.0409, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1120, "total_memory_available (GB)": 94.62 }, { "epoch": 4.612244897959184, "grad_norm": 0.46302053332328796, "learning_rate": 4.234693877551021e-06, "loss": 0.0545, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1130, "total_memory_available (GB)": 94.62 }, { "epoch": 4.653061224489796, "grad_norm": 1.1193764209747314, "learning_rate": 4.183673469387755e-06, "loss": 0.0736, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1140, "total_memory_available (GB)": 94.62 }, { "epoch": 4.6938775510204085, "grad_norm": 0.936698317527771, "learning_rate": 4.13265306122449e-06, "loss": 0.0532, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1150, "total_memory_available (GB)": 94.62 }, { "epoch": 4.73469387755102, "grad_norm": 1.091784119606018, "learning_rate": 4.081632653061225e-06, "loss": 0.064, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1160, "total_memory_available (GB)": 94.62 }, { "epoch": 4.775510204081632, "grad_norm": 0.3371049165725708, "learning_rate": 4.03061224489796e-06, "loss": 0.0557, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1170, "total_memory_available (GB)": 94.62 }, { "epoch": 4.816326530612245, "grad_norm": 0.5533121824264526, "learning_rate": 3.979591836734694e-06, "loss": 0.0449, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1180, "total_memory_available (GB)": 94.62 }, { "epoch": 4.857142857142857, "grad_norm": 1.3483092784881592, "learning_rate": 3.928571428571429e-06, "loss": 0.0551, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1190, "total_memory_available (GB)": 94.62 }, { "epoch": 4.8979591836734695, "grad_norm": 2.4415154457092285, "learning_rate": 3.877551020408164e-06, "loss": 0.0738, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1200, "total_memory_available (GB)": 94.62 }, { "epoch": 4.938775510204081, "grad_norm": 0.4990352690219879, "learning_rate": 3.826530612244898e-06, "loss": 0.0663, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1210, "total_memory_available (GB)": 94.62 }, { "epoch": 4.979591836734694, "grad_norm": 1.045630693435669, "learning_rate": 3.7755102040816327e-06, "loss": 0.0422, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1220, "total_memory_available (GB)": 94.62 }, { "epoch": 5.020408163265306, "grad_norm": 3.719482660293579, "learning_rate": 3.724489795918368e-06, "loss": 0.0531, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1230, "total_memory_available (GB)": 94.62 }, { "epoch": 5.061224489795919, "grad_norm": 0.6931941509246826, "learning_rate": 3.6734693877551024e-06, "loss": 0.0434, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1240, "total_memory_available (GB)": 94.62 }, { "epoch": 5.1020408163265305, "grad_norm": 0.945284903049469, "learning_rate": 3.6224489795918373e-06, "loss": 0.0377, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1250, "total_memory_available (GB)": 94.62 }, { "epoch": 5.142857142857143, "grad_norm": 0.49527707695961, "learning_rate": 3.5714285714285718e-06, "loss": 0.0406, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1260, "total_memory_available (GB)": 94.62 }, { "epoch": 5.183673469387755, "grad_norm": 1.0614029169082642, "learning_rate": 3.5204081632653062e-06, "loss": 0.0614, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1270, "total_memory_available (GB)": 94.62 }, { "epoch": 5.224489795918367, "grad_norm": 1.208749771118164, "learning_rate": 3.469387755102041e-06, "loss": 0.0449, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1280, "total_memory_available (GB)": 94.62 }, { "epoch": 5.26530612244898, "grad_norm": 3.612487554550171, "learning_rate": 3.4183673469387756e-06, "loss": 0.0672, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1290, "total_memory_available (GB)": 94.62 }, { "epoch": 5.3061224489795915, "grad_norm": 0.6228938102722168, "learning_rate": 3.3673469387755105e-06, "loss": 0.0516, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1300, "total_memory_available (GB)": 94.62 }, { "epoch": 5.346938775510204, "grad_norm": 0.586557924747467, "learning_rate": 3.316326530612245e-06, "loss": 0.0674, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1310, "total_memory_available (GB)": 94.62 }, { "epoch": 5.387755102040816, "grad_norm": 0.963624119758606, "learning_rate": 3.2653061224489794e-06, "loss": 0.0621, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1320, "total_memory_available (GB)": 94.62 }, { "epoch": 5.428571428571429, "grad_norm": 1.1783013343811035, "learning_rate": 3.2142857142857147e-06, "loss": 0.0366, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1330, "total_memory_available (GB)": 94.62 }, { "epoch": 5.469387755102041, "grad_norm": 4.429933547973633, "learning_rate": 3.1632653061224496e-06, "loss": 0.0511, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1340, "total_memory_available (GB)": 94.62 }, { "epoch": 5.510204081632653, "grad_norm": 4.795422077178955, "learning_rate": 3.112244897959184e-06, "loss": 0.0601, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1350, "total_memory_available (GB)": 94.62 }, { "epoch": 5.551020408163265, "grad_norm": 0.19068406522274017, "learning_rate": 3.0612244897959185e-06, "loss": 0.0479, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1360, "total_memory_available (GB)": 94.62 }, { "epoch": 5.591836734693878, "grad_norm": 3.7448017597198486, "learning_rate": 3.0102040816326534e-06, "loss": 0.0404, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1370, "total_memory_available (GB)": 94.62 }, { "epoch": 5.63265306122449, "grad_norm": 0.3368137776851654, "learning_rate": 2.959183673469388e-06, "loss": 0.0488, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1380, "total_memory_available (GB)": 94.62 }, { "epoch": 5.673469387755102, "grad_norm": 0.14420035481452942, "learning_rate": 2.908163265306123e-06, "loss": 0.0582, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1390, "total_memory_available (GB)": 94.62 }, { "epoch": 5.714285714285714, "grad_norm": 0.372368723154068, "learning_rate": 2.8571428571428573e-06, "loss": 0.0391, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1400, "total_memory_available (GB)": 94.62 }, { "epoch": 5.755102040816326, "grad_norm": 3.4565131664276123, "learning_rate": 2.8061224489795917e-06, "loss": 0.0616, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1410, "total_memory_available (GB)": 94.62 }, { "epoch": 5.795918367346939, "grad_norm": 3.389681339263916, "learning_rate": 2.7551020408163266e-06, "loss": 0.0675, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1420, "total_memory_available (GB)": 94.62 }, { "epoch": 5.836734693877551, "grad_norm": 0.7100503444671631, "learning_rate": 2.7040816326530615e-06, "loss": 0.036, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1430, "total_memory_available (GB)": 94.62 }, { "epoch": 5.877551020408164, "grad_norm": 0.40696802735328674, "learning_rate": 2.6530612244897964e-06, "loss": 0.0632, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1440, "total_memory_available (GB)": 94.62 }, { "epoch": 5.918367346938775, "grad_norm": 0.7590793967247009, "learning_rate": 2.602040816326531e-06, "loss": 0.0549, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1450, "total_memory_available (GB)": 94.62 }, { "epoch": 5.959183673469388, "grad_norm": 0.48597386479377747, "learning_rate": 2.5510204081632657e-06, "loss": 0.0393, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1460, "total_memory_available (GB)": 94.62 }, { "epoch": 6.0, "grad_norm": 2.6455276012420654, "learning_rate": 2.5e-06, "loss": 0.0566, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1470, "total_memory_available (GB)": 94.62 }, { "epoch": 6.040816326530612, "grad_norm": 2.350471258163452, "learning_rate": 2.4489795918367347e-06, "loss": 0.0509, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1480, "total_memory_available (GB)": 94.62 }, { "epoch": 6.081632653061225, "grad_norm": 3.315977096557617, "learning_rate": 2.3979591836734696e-06, "loss": 0.0523, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1490, "total_memory_available (GB)": 94.62 }, { "epoch": 6.122448979591836, "grad_norm": 1.6327887773513794, "learning_rate": 2.3469387755102044e-06, "loss": 0.0432, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1500, "total_memory_available (GB)": 94.62 }, { "epoch": 6.163265306122449, "grad_norm": 5.029656410217285, "learning_rate": 2.295918367346939e-06, "loss": 0.0468, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1510, "total_memory_available (GB)": 94.62 }, { "epoch": 6.204081632653061, "grad_norm": 3.1543941497802734, "learning_rate": 2.244897959183674e-06, "loss": 0.0471, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1520, "total_memory_available (GB)": 94.62 }, { "epoch": 6.244897959183674, "grad_norm": 1.1178909540176392, "learning_rate": 2.1938775510204083e-06, "loss": 0.0538, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1530, "total_memory_available (GB)": 94.62 }, { "epoch": 6.285714285714286, "grad_norm": 0.7284368276596069, "learning_rate": 2.1428571428571427e-06, "loss": 0.0474, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1540, "total_memory_available (GB)": 94.62 }, { "epoch": 6.326530612244898, "grad_norm": 0.15386615693569183, "learning_rate": 2.0918367346938776e-06, "loss": 0.0327, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1550, "total_memory_available (GB)": 94.62 }, { "epoch": 6.36734693877551, "grad_norm": 12.00415325164795, "learning_rate": 2.0408163265306125e-06, "loss": 0.0568, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1560, "total_memory_available (GB)": 94.62 }, { "epoch": 6.408163265306122, "grad_norm": 0.14763076603412628, "learning_rate": 1.989795918367347e-06, "loss": 0.0389, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1570, "total_memory_available (GB)": 94.62 }, { "epoch": 6.448979591836735, "grad_norm": 0.10665205121040344, "learning_rate": 1.938775510204082e-06, "loss": 0.0526, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1580, "total_memory_available (GB)": 94.62 }, { "epoch": 6.489795918367347, "grad_norm": 0.6945566534996033, "learning_rate": 1.8877551020408163e-06, "loss": 0.0276, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1590, "total_memory_available (GB)": 94.62 }, { "epoch": 6.530612244897959, "grad_norm": 0.6304193735122681, "learning_rate": 1.8367346938775512e-06, "loss": 0.0595, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1600, "total_memory_available (GB)": 94.62 }, { "epoch": 6.571428571428571, "grad_norm": 0.738591194152832, "learning_rate": 1.7857142857142859e-06, "loss": 0.0591, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1610, "total_memory_available (GB)": 94.62 }, { "epoch": 6.612244897959184, "grad_norm": 1.1249669790267944, "learning_rate": 1.7346938775510206e-06, "loss": 0.0444, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1620, "total_memory_available (GB)": 94.62 }, { "epoch": 6.653061224489796, "grad_norm": 0.3204442858695984, "learning_rate": 1.6836734693877552e-06, "loss": 0.041, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1630, "total_memory_available (GB)": 94.62 }, { "epoch": 6.6938775510204085, "grad_norm": 0.6603041887283325, "learning_rate": 1.6326530612244897e-06, "loss": 0.0485, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1640, "total_memory_available (GB)": 94.62 }, { "epoch": 6.73469387755102, "grad_norm": 0.9167451858520508, "learning_rate": 1.5816326530612248e-06, "loss": 0.051, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1650, "total_memory_available (GB)": 94.62 }, { "epoch": 6.775510204081632, "grad_norm": 1.1892409324645996, "learning_rate": 1.5306122448979593e-06, "loss": 0.0577, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1660, "total_memory_available (GB)": 94.62 }, { "epoch": 6.816326530612245, "grad_norm": 1.1679530143737793, "learning_rate": 1.479591836734694e-06, "loss": 0.0581, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1670, "total_memory_available (GB)": 94.62 }, { "epoch": 6.857142857142857, "grad_norm": 4.730435848236084, "learning_rate": 1.4285714285714286e-06, "loss": 0.058, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1680, "total_memory_available (GB)": 94.62 }, { "epoch": 6.8979591836734695, "grad_norm": 2.7492659091949463, "learning_rate": 1.3775510204081633e-06, "loss": 0.0593, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1690, "total_memory_available (GB)": 94.62 }, { "epoch": 6.938775510204081, "grad_norm": 0.29811447858810425, "learning_rate": 1.3265306122448982e-06, "loss": 0.0425, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1700, "total_memory_available (GB)": 94.62 }, { "epoch": 6.979591836734694, "grad_norm": 0.15881459414958954, "learning_rate": 1.2755102040816329e-06, "loss": 0.0625, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1710, "total_memory_available (GB)": 94.62 }, { "epoch": 7.020408163265306, "grad_norm": 0.20461368560791016, "learning_rate": 1.2244897959183673e-06, "loss": 0.0581, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1720, "total_memory_available (GB)": 94.62 }, { "epoch": 7.061224489795919, "grad_norm": 1.1351808309555054, "learning_rate": 1.1734693877551022e-06, "loss": 0.0646, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1730, "total_memory_available (GB)": 94.62 }, { "epoch": 7.1020408163265305, "grad_norm": 0.1654195487499237, "learning_rate": 1.122448979591837e-06, "loss": 0.0472, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1740, "total_memory_available (GB)": 94.62 }, { "epoch": 7.142857142857143, "grad_norm": 0.4366483986377716, "learning_rate": 1.0714285714285714e-06, "loss": 0.0461, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1750, "total_memory_available (GB)": 94.62 }, { "epoch": 7.183673469387755, "grad_norm": 0.5380903482437134, "learning_rate": 1.0204081632653063e-06, "loss": 0.0506, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1760, "total_memory_available (GB)": 94.62 }, { "epoch": 7.224489795918367, "grad_norm": 1.661912441253662, "learning_rate": 9.69387755102041e-07, "loss": 0.0664, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1770, "total_memory_available (GB)": 94.62 }, { "epoch": 7.26530612244898, "grad_norm": 0.4192713499069214, "learning_rate": 9.183673469387756e-07, "loss": 0.0394, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1780, "total_memory_available (GB)": 94.62 }, { "epoch": 7.3061224489795915, "grad_norm": 0.6668973565101624, "learning_rate": 8.673469387755103e-07, "loss": 0.0401, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1790, "total_memory_available (GB)": 94.62 }, { "epoch": 7.346938775510204, "grad_norm": 0.5573325753211975, "learning_rate": 8.163265306122449e-07, "loss": 0.0526, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1800, "total_memory_available (GB)": 94.62 }, { "epoch": 7.387755102040816, "grad_norm": 0.39288291335105896, "learning_rate": 7.653061224489796e-07, "loss": 0.0445, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1810, "total_memory_available (GB)": 94.62 }, { "epoch": 7.428571428571429, "grad_norm": 0.7398673892021179, "learning_rate": 7.142857142857143e-07, "loss": 0.054, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1820, "total_memory_available (GB)": 94.62 }, { "epoch": 7.469387755102041, "grad_norm": 2.143411636352539, "learning_rate": 6.632653061224491e-07, "loss": 0.0458, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1830, "total_memory_available (GB)": 94.62 }, { "epoch": 7.510204081632653, "grad_norm": 0.3958425223827362, "learning_rate": 6.122448979591837e-07, "loss": 0.0641, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1840, "total_memory_available (GB)": 94.62 }, { "epoch": 7.551020408163265, "grad_norm": 2.797384023666382, "learning_rate": 5.612244897959184e-07, "loss": 0.0447, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1850, "total_memory_available (GB)": 94.62 }, { "epoch": 7.591836734693878, "grad_norm": 1.5026339292526245, "learning_rate": 5.102040816326531e-07, "loss": 0.0274, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1860, "total_memory_available (GB)": 94.62 }, { "epoch": 7.63265306122449, "grad_norm": 0.993212103843689, "learning_rate": 4.591836734693878e-07, "loss": 0.0393, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1870, "total_memory_available (GB)": 94.62 }, { "epoch": 7.673469387755102, "grad_norm": 0.16298241913318634, "learning_rate": 4.0816326530612243e-07, "loss": 0.055, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1880, "total_memory_available (GB)": 94.62 }, { "epoch": 7.714285714285714, "grad_norm": 4.067746639251709, "learning_rate": 3.5714285714285716e-07, "loss": 0.0661, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1890, "total_memory_available (GB)": 94.62 }, { "epoch": 7.755102040816326, "grad_norm": 1.387778878211975, "learning_rate": 3.0612244897959183e-07, "loss": 0.0586, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1900, "total_memory_available (GB)": 94.62 }, { "epoch": 7.795918367346939, "grad_norm": 0.6988309621810913, "learning_rate": 2.5510204081632656e-07, "loss": 0.0664, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1910, "total_memory_available (GB)": 94.62 }, { "epoch": 7.836734693877551, "grad_norm": 0.7139838933944702, "learning_rate": 2.0408163265306121e-07, "loss": 0.053, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1920, "total_memory_available (GB)": 94.62 }, { "epoch": 7.877551020408164, "grad_norm": 0.5550429224967957, "learning_rate": 1.5306122448979592e-07, "loss": 0.0458, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1930, "total_memory_available (GB)": 94.62 }, { "epoch": 7.918367346938775, "grad_norm": 1.2351597547531128, "learning_rate": 1.0204081632653061e-07, "loss": 0.0471, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1940, "total_memory_available (GB)": 94.62 }, { "epoch": 7.959183673469388, "grad_norm": 0.6292315125465393, "learning_rate": 5.1020408163265303e-08, "loss": 0.0532, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1950, "total_memory_available (GB)": 94.62 }, { "epoch": 8.0, "grad_norm": 1.6500349044799805, "learning_rate": 0.0, "loss": 0.0453, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1960, "total_memory_available (GB)": 94.62 }, { "epoch": 8.0, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1960, "total_flos": 4.89583144415232e+16, "total_memory_available (GB)": 94.62, "train_loss": 0.057532464606421335, "train_runtime": 1666.2328, "train_samples_per_second": 52.538, "train_steps_per_second": 1.315 } ], "logging_steps": 10, "max_steps": 1960, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.89583144415232e+16, "train_batch_size": 40, "trial_name": null, "trial_params": null }