{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0016353229762878, "global_step": 4896, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.00019999999917518445, "lm_loss": 0.59375, "loss": 0.5391, "step": 1, "total_loss": 0.59375 }, { "epoch": 0.0, "learning_rate": 0.00019999999670073778, "lm_loss": 0.51953125, "loss": 0.4974, "step": 2, "total_loss": 0.51953125 }, { "epoch": 0.0, "learning_rate": 0.00019999999257666007, "lm_loss": 0.482421875, "loss": 0.4663, "step": 3, "total_loss": 0.482421875 }, { "epoch": 0.0, "learning_rate": 0.00019999998680295136, "lm_loss": 0.359375, "loss": 0.3953, "step": 4, "total_loss": 0.359375 }, { "epoch": 0.0, "learning_rate": 0.00019999997937961177, "lm_loss": 0.333984375, "loss": 0.3252, "step": 5, "total_loss": 0.333984375 }, { "epoch": 0.0, "learning_rate": 0.0001999999703066414, "lm_loss": 0.244140625, "loss": 0.2549, "step": 6, "total_loss": 0.244140625 }, { "epoch": 0.0, "learning_rate": 0.00019999995958404037, "lm_loss": 0.2216796875, "loss": 0.1975, "step": 7, "total_loss": 0.2216796875 }, { "epoch": 0.0, "learning_rate": 0.00019999994721180893, "lm_loss": 0.1484375, "loss": 0.1483, "step": 8, "total_loss": 0.1484375 }, { "epoch": 0.0, "learning_rate": 0.00019999993318994724, "lm_loss": 0.10595703125, "loss": 0.1169, "step": 9, "total_loss": 0.10595703125 }, { "epoch": 0.0, "learning_rate": 0.00019999991751845552, "lm_loss": 0.1162109375, "loss": 0.0988, "step": 10, "total_loss": 0.1162109375 }, { "epoch": 0.0, "learning_rate": 0.00019999990019733407, "lm_loss": 0.095703125, "loss": 0.0734, "step": 11, "total_loss": 0.095703125 }, { "epoch": 0.0, "learning_rate": 0.00019999988122658313, "lm_loss": 0.072265625, "loss": 0.0541, "step": 12, "total_loss": 0.072265625 }, { "epoch": 0.01, "learning_rate": 0.00019999986060620305, "lm_loss": 0.060546875, "loss": 0.0553, "step": 13, "total_loss": 0.060546875 }, { "epoch": 0.01, "learning_rate": 0.00019999983833619416, "lm_loss": 0.068359375, "loss": 0.0543, "step": 14, "total_loss": 0.068359375 }, { "epoch": 0.01, "learning_rate": 0.0001999998144165568, "lm_loss": 0.0634765625, "loss": 0.0447, "step": 15, "total_loss": 0.0634765625 }, { "epoch": 0.01, "learning_rate": 0.0001999997888472914, "lm_loss": 0.0478515625, "loss": 0.0422, "step": 16, "total_loss": 0.0478515625 }, { "epoch": 0.01, "learning_rate": 0.00019999976162839836, "lm_loss": 0.05126953125, "loss": 0.0398, "step": 17, "total_loss": 0.05126953125 }, { "epoch": 0.01, "learning_rate": 0.00019999973275987816, "lm_loss": 0.040283203125, "loss": 0.0423, "step": 18, "total_loss": 0.040283203125 }, { "epoch": 0.01, "learning_rate": 0.00019999970224173126, "lm_loss": 0.030029296875, "loss": 0.0366, "step": 19, "total_loss": 0.030029296875 }, { "epoch": 0.01, "learning_rate": 0.00019999967007395812, "lm_loss": 0.0272216796875, "loss": 0.0382, "step": 20, "total_loss": 0.0272216796875 }, { "epoch": 0.01, "learning_rate": 0.00019999963625655934, "lm_loss": 0.0361328125, "loss": 0.0277, "step": 21, "total_loss": 0.0361328125 }, { "epoch": 0.01, "learning_rate": 0.0001999996007895354, "lm_loss": 0.02685546875, "loss": 0.026, "step": 22, "total_loss": 0.02685546875 }, { "epoch": 0.01, "learning_rate": 0.00019999956367288698, "lm_loss": 0.034912109375, "loss": 0.0308, "step": 23, "total_loss": 0.034912109375 }, { "epoch": 0.01, "learning_rate": 0.00019999952490661464, "lm_loss": 0.0296630859375, "loss": 0.0244, "step": 24, "total_loss": 0.0296630859375 }, { "epoch": 0.01, "learning_rate": 0.00019999948449071902, "lm_loss": 0.034912109375, "loss": 0.0246, "step": 25, "total_loss": 0.034912109375 }, { "epoch": 0.01, "learning_rate": 0.0001999994424252008, "lm_loss": 0.028564453125, "loss": 0.0209, "step": 26, "total_loss": 0.028564453125 }, { "epoch": 0.01, "learning_rate": 0.00019999939871006062, "lm_loss": 0.025390625, "loss": 0.0219, "step": 27, "total_loss": 0.025390625 }, { "epoch": 0.01, "learning_rate": 0.0001999993533452993, "lm_loss": 0.02978515625, "loss": 0.0246, "step": 28, "total_loss": 0.02978515625 }, { "epoch": 0.01, "learning_rate": 0.00019999930633091747, "lm_loss": 0.0223388671875, "loss": 0.0262, "step": 29, "total_loss": 0.0223388671875 }, { "epoch": 0.01, "learning_rate": 0.000199999257666916, "lm_loss": 0.02001953125, "loss": 0.0242, "step": 30, "total_loss": 0.02001953125 }, { "epoch": 0.01, "learning_rate": 0.00019999920735329566, "lm_loss": 0.029296875, "loss": 0.0214, "step": 31, "total_loss": 0.029296875 }, { "epoch": 0.01, "learning_rate": 0.00019999915539005731, "lm_loss": 0.0277099609375, "loss": 0.0207, "step": 32, "total_loss": 0.0277099609375 }, { "epoch": 0.01, "learning_rate": 0.00019999910177720173, "lm_loss": 0.01953125, "loss": 0.0215, "step": 33, "total_loss": 0.01953125 }, { "epoch": 0.01, "learning_rate": 0.0001999990465147299, "lm_loss": 0.01190185546875, "loss": 0.0199, "step": 34, "total_loss": 0.01190185546875 }, { "epoch": 0.01, "learning_rate": 0.00019999898960264265, "lm_loss": 0.01953125, "loss": 0.0199, "step": 35, "total_loss": 0.01953125 }, { "epoch": 0.01, "learning_rate": 0.00019999893104094096, "lm_loss": 0.01263427734375, "loss": 0.0231, "step": 36, "total_loss": 0.01263427734375 }, { "epoch": 0.02, "learning_rate": 0.0001999988708296258, "lm_loss": 0.028076171875, "loss": 0.0206, "step": 37, "total_loss": 0.028076171875 }, { "epoch": 0.02, "learning_rate": 0.00019999880896869816, "lm_loss": 0.01434326171875, "loss": 0.0165, "step": 38, "total_loss": 0.01434326171875 }, { "epoch": 0.02, "learning_rate": 0.00019999874545815902, "lm_loss": 0.0302734375, "loss": 0.0216, "step": 39, "total_loss": 0.0302734375 }, { "epoch": 0.02, "learning_rate": 0.0001999986802980095, "lm_loss": 0.013427734375, "loss": 0.0163, "step": 40, "total_loss": 0.013427734375 }, { "epoch": 0.02, "learning_rate": 0.00019999861348825063, "lm_loss": 0.01495361328125, "loss": 0.0152, "step": 41, "total_loss": 0.01495361328125 }, { "epoch": 0.02, "learning_rate": 0.0001999985450288835, "lm_loss": 0.011962890625, "loss": 0.0168, "step": 42, "total_loss": 0.011962890625 }, { "epoch": 0.02, "learning_rate": 0.00019999847491990926, "lm_loss": 0.0133056640625, "loss": 0.0179, "step": 43, "total_loss": 0.0133056640625 }, { "epoch": 0.02, "learning_rate": 0.00019999840316132906, "lm_loss": 0.0164794921875, "loss": 0.0179, "step": 44, "total_loss": 0.0164794921875 }, { "epoch": 0.02, "learning_rate": 0.0001999983297531441, "lm_loss": 0.0164794921875, "loss": 0.0132, "step": 45, "total_loss": 0.0164794921875 }, { "epoch": 0.02, "learning_rate": 0.00019999825469535558, "lm_loss": 0.01239013671875, "loss": 0.0205, "step": 46, "total_loss": 0.01239013671875 }, { "epoch": 0.02, "learning_rate": 0.00019999817798796473, "lm_loss": 0.020751953125, "loss": 0.0177, "step": 47, "total_loss": 0.020751953125 }, { "epoch": 0.02, "learning_rate": 0.00019999809963097283, "lm_loss": 0.0157470703125, "loss": 0.0153, "step": 48, "total_loss": 0.0157470703125 }, { "epoch": 0.02, "learning_rate": 0.00019999801962438115, "lm_loss": 0.01214599609375, "loss": 0.0189, "step": 49, "total_loss": 0.01214599609375 }, { "epoch": 0.02, "learning_rate": 0.000199997937968191, "lm_loss": 0.0145263671875, "loss": 0.0141, "step": 50, "total_loss": 0.0145263671875 }, { "epoch": 0.02, "learning_rate": 0.00019999785466240383, "lm_loss": 0.02490234375, "loss": 0.0181, "step": 51, "total_loss": 0.02490234375 }, { "epoch": 0.02, "learning_rate": 0.00019999776970702087, "lm_loss": 0.0084228515625, "loss": 0.0161, "step": 52, "total_loss": 0.0084228515625 }, { "epoch": 0.02, "learning_rate": 0.00019999768310204362, "lm_loss": 0.01495361328125, "loss": 0.0173, "step": 53, "total_loss": 0.01495361328125 }, { "epoch": 0.02, "learning_rate": 0.00019999759484747343, "lm_loss": 0.01531982421875, "loss": 0.0185, "step": 54, "total_loss": 0.01531982421875 }, { "epoch": 0.02, "learning_rate": 0.00019999750494331187, "lm_loss": 0.0203857421875, "loss": 0.0156, "step": 55, "total_loss": 0.0203857421875 }, { "epoch": 0.02, "learning_rate": 0.0001999974133895603, "lm_loss": 0.031005859375, "loss": 0.0172, "step": 56, "total_loss": 0.031005859375 }, { "epoch": 0.02, "learning_rate": 0.00019999732018622033, "lm_loss": 0.0250244140625, "loss": 0.0185, "step": 57, "total_loss": 0.0250244140625 }, { "epoch": 0.02, "learning_rate": 0.00019999722533329344, "lm_loss": 0.01495361328125, "loss": 0.019, "step": 58, "total_loss": 0.01495361328125 }, { "epoch": 0.02, "learning_rate": 0.00019999712883078118, "lm_loss": 0.01300048828125, "loss": 0.0147, "step": 59, "total_loss": 0.01300048828125 }, { "epoch": 0.02, "learning_rate": 0.0001999970306786852, "lm_loss": 0.0166015625, "loss": 0.0161, "step": 60, "total_loss": 0.0166015625 }, { "epoch": 0.02, "learning_rate": 0.00019999693087700707, "lm_loss": 0.0172119140625, "loss": 0.0145, "step": 61, "total_loss": 0.0172119140625 }, { "epoch": 0.03, "learning_rate": 0.00019999682942574843, "lm_loss": 0.01251220703125, "loss": 0.0136, "step": 62, "total_loss": 0.01251220703125 }, { "epoch": 0.03, "learning_rate": 0.00019999672632491102, "lm_loss": 0.01519775390625, "loss": 0.0161, "step": 63, "total_loss": 0.01519775390625 }, { "epoch": 0.03, "learning_rate": 0.0001999966215744965, "lm_loss": 0.0133056640625, "loss": 0.0144, "step": 64, "total_loss": 0.0133056640625 }, { "epoch": 0.03, "learning_rate": 0.00019999651517450657, "lm_loss": 0.0206298828125, "loss": 0.0134, "step": 65, "total_loss": 0.0206298828125 }, { "epoch": 0.03, "learning_rate": 0.00019999640712494303, "lm_loss": 0.0093994140625, "loss": 0.0164, "step": 66, "total_loss": 0.0093994140625 }, { "epoch": 0.03, "learning_rate": 0.00019999629742580764, "lm_loss": 0.012939453125, "loss": 0.0164, "step": 67, "total_loss": 0.012939453125 }, { "epoch": 0.03, "learning_rate": 0.00019999618607710221, "lm_loss": 0.0130615234375, "loss": 0.0128, "step": 68, "total_loss": 0.0130615234375 }, { "epoch": 0.03, "learning_rate": 0.0001999960730788286, "lm_loss": 0.0126953125, "loss": 0.017, "step": 69, "total_loss": 0.0126953125 }, { "epoch": 0.03, "learning_rate": 0.00019999595843098864, "lm_loss": 0.020263671875, "loss": 0.0143, "step": 70, "total_loss": 0.020263671875 }, { "epoch": 0.03, "learning_rate": 0.00019999584213358423, "lm_loss": 0.0216064453125, "loss": 0.0151, "step": 71, "total_loss": 0.0216064453125 }, { "epoch": 0.03, "learning_rate": 0.00019999572418661732, "lm_loss": 0.0289306640625, "loss": 0.0137, "step": 72, "total_loss": 0.0289306640625 }, { "epoch": 0.03, "learning_rate": 0.0001999956045900898, "lm_loss": 0.01373291015625, "loss": 0.0151, "step": 73, "total_loss": 0.01373291015625 }, { "epoch": 0.03, "learning_rate": 0.00019999548334400372, "lm_loss": 0.006683349609375, "loss": 0.0124, "step": 74, "total_loss": 0.006683349609375 }, { "epoch": 0.03, "learning_rate": 0.000199995360448361, "lm_loss": 0.01434326171875, "loss": 0.0153, "step": 75, "total_loss": 0.01434326171875 }, { "epoch": 0.03, "learning_rate": 0.0001999952359031637, "lm_loss": 0.006103515625, "loss": 0.016, "step": 76, "total_loss": 0.006103515625 }, { "epoch": 0.03, "learning_rate": 0.0001999951097084139, "lm_loss": 0.0093994140625, "loss": 0.0137, "step": 77, "total_loss": 0.0093994140625 }, { "epoch": 0.03, "learning_rate": 0.00019999498186411364, "lm_loss": 0.0211181640625, "loss": 0.0159, "step": 78, "total_loss": 0.0211181640625 }, { "epoch": 0.03, "learning_rate": 0.000199994852370265, "lm_loss": 0.01080322265625, "loss": 0.0128, "step": 79, "total_loss": 0.01080322265625 }, { "epoch": 0.03, "learning_rate": 0.00019999472122687024, "lm_loss": 0.0174560546875, "loss": 0.0133, "step": 80, "total_loss": 0.0174560546875 }, { "epoch": 0.03, "learning_rate": 0.0001999945884339314, "lm_loss": 0.01104736328125, "loss": 0.014, "step": 81, "total_loss": 0.01104736328125 }, { "epoch": 0.03, "learning_rate": 0.00019999445399145074, "lm_loss": 0.017578125, "loss": 0.0131, "step": 82, "total_loss": 0.017578125 }, { "epoch": 0.03, "learning_rate": 0.00019999431789943044, "lm_loss": 0.01025390625, "loss": 0.0137, "step": 83, "total_loss": 0.01025390625 }, { "epoch": 0.03, "learning_rate": 0.00019999418015787277, "lm_loss": 0.00909423828125, "loss": 0.0149, "step": 84, "total_loss": 0.00909423828125 }, { "epoch": 0.03, "learning_rate": 0.00019999404076677998, "lm_loss": 0.03271484375, "loss": 0.0137, "step": 85, "total_loss": 0.03271484375 }, { "epoch": 0.04, "learning_rate": 0.0001999938997261544, "lm_loss": 0.009033203125, "loss": 0.0126, "step": 86, "total_loss": 0.009033203125 }, { "epoch": 0.04, "learning_rate": 0.0001999937570359983, "lm_loss": 0.01458740234375, "loss": 0.0119, "step": 87, "total_loss": 0.01458740234375 }, { "epoch": 0.04, "learning_rate": 0.0001999936126963141, "lm_loss": 0.0198974609375, "loss": 0.0159, "step": 88, "total_loss": 0.0198974609375 }, { "epoch": 0.04, "learning_rate": 0.00019999346670710418, "lm_loss": 0.01336669921875, "loss": 0.0124, "step": 89, "total_loss": 0.01336669921875 }, { "epoch": 0.04, "learning_rate": 0.00019999331906837085, "lm_loss": 0.01470947265625, "loss": 0.0139, "step": 90, "total_loss": 0.01470947265625 }, { "epoch": 0.04, "learning_rate": 0.00019999316978011668, "lm_loss": 0.00653076171875, "loss": 0.0121, "step": 91, "total_loss": 0.00653076171875 }, { "epoch": 0.04, "learning_rate": 0.00019999301884234406, "lm_loss": 0.0069580078125, "loss": 0.0141, "step": 92, "total_loss": 0.0069580078125 }, { "epoch": 0.04, "learning_rate": 0.00019999286625505547, "lm_loss": 0.0047607421875, "loss": 0.0131, "step": 93, "total_loss": 0.0047607421875 }, { "epoch": 0.04, "learning_rate": 0.00019999271201825343, "lm_loss": 0.005859375, "loss": 0.0146, "step": 94, "total_loss": 0.005859375 }, { "epoch": 0.04, "learning_rate": 0.0001999925561319405, "lm_loss": 0.01434326171875, "loss": 0.0156, "step": 95, "total_loss": 0.01434326171875 }, { "epoch": 0.04, "learning_rate": 0.00019999239859611932, "lm_loss": 0.005859375, "loss": 0.0107, "step": 96, "total_loss": 0.005859375 }, { "epoch": 0.04, "learning_rate": 0.00019999223941079234, "lm_loss": 0.0047607421875, "loss": 0.0117, "step": 97, "total_loss": 0.0047607421875 }, { "epoch": 0.04, "learning_rate": 0.00019999207857596232, "lm_loss": 0.002960205078125, "loss": 0.0113, "step": 98, "total_loss": 0.002960205078125 }, { "epoch": 0.04, "learning_rate": 0.00019999191609163184, "lm_loss": 0.005615234375, "loss": 0.0125, "step": 99, "total_loss": 0.005615234375 }, { "epoch": 0.04, "learning_rate": 0.0001999917519578036, "lm_loss": 0.01287841796875, "loss": 0.011, "step": 100, "total_loss": 0.01287841796875 }, { "epoch": 0.04, "eval_lm_loss": 0.014467097818851471, "eval_loss": 0.015070343390107155, "eval_runtime": 44.0715, "eval_samples_per_second": 22.69, "eval_steps_per_second": 0.204, "eval_total_loss": 0.014467097818851471, "lm_loss": 0.00119781494140625, "step": 100, "total_loss": 0.00119781494140625 }, { "epoch": 0.04, "learning_rate": 0.0001999915861744803, "lm_loss": 0.01519775390625, "loss": 0.0121, "step": 101, "total_loss": 0.01519775390625 }, { "epoch": 0.04, "learning_rate": 0.00019999141874166472, "lm_loss": 0.00830078125, "loss": 0.0114, "step": 102, "total_loss": 0.00830078125 }, { "epoch": 0.04, "learning_rate": 0.00019999124965935954, "lm_loss": 0.01043701171875, "loss": 0.0139, "step": 103, "total_loss": 0.01043701171875 }, { "epoch": 0.04, "learning_rate": 0.0001999910789275676, "lm_loss": 0.0177001953125, "loss": 0.0137, "step": 104, "total_loss": 0.0177001953125 }, { "epoch": 0.04, "learning_rate": 0.00019999090654629174, "lm_loss": 0.007232666015625, "loss": 0.0102, "step": 105, "total_loss": 0.007232666015625 }, { "epoch": 0.04, "learning_rate": 0.00019999073251553476, "lm_loss": 0.01513671875, "loss": 0.0116, "step": 106, "total_loss": 0.01513671875 }, { "epoch": 0.04, "learning_rate": 0.00019999055683529954, "lm_loss": 0.0103759765625, "loss": 0.0135, "step": 107, "total_loss": 0.0103759765625 }, { "epoch": 0.04, "learning_rate": 0.000199990379505589, "lm_loss": 0.020751953125, "loss": 0.0123, "step": 108, "total_loss": 0.020751953125 }, { "epoch": 0.04, "learning_rate": 0.00019999020052640601, "lm_loss": 0.00848388671875, "loss": 0.0119, "step": 109, "total_loss": 0.00848388671875 }, { "epoch": 0.04, "learning_rate": 0.0001999900198977536, "lm_loss": 0.021728515625, "loss": 0.0153, "step": 110, "total_loss": 0.021728515625 }, { "epoch": 0.05, "learning_rate": 0.00019998983761963468, "lm_loss": 0.0166015625, "loss": 0.0119, "step": 111, "total_loss": 0.0166015625 }, { "epoch": 0.05, "learning_rate": 0.00019998965369205228, "lm_loss": 0.005126953125, "loss": 0.0116, "step": 112, "total_loss": 0.005126953125 }, { "epoch": 0.05, "learning_rate": 0.00019998946811500948, "lm_loss": 0.01373291015625, "loss": 0.0127, "step": 113, "total_loss": 0.01373291015625 }, { "epoch": 0.05, "learning_rate": 0.00019998928088850928, "lm_loss": 0.0146484375, "loss": 0.0137, "step": 114, "total_loss": 0.0146484375 }, { "epoch": 0.05, "learning_rate": 0.0001999890920125548, "lm_loss": 0.0172119140625, "loss": 0.0114, "step": 115, "total_loss": 0.0172119140625 }, { "epoch": 0.05, "learning_rate": 0.00019998890148714918, "lm_loss": 0.006378173828125, "loss": 0.0102, "step": 116, "total_loss": 0.006378173828125 }, { "epoch": 0.05, "learning_rate": 0.0001999887093122955, "lm_loss": 0.0146484375, "loss": 0.0132, "step": 117, "total_loss": 0.0146484375 }, { "epoch": 0.05, "learning_rate": 0.00019998851548799694, "lm_loss": 0.02392578125, "loss": 0.0114, "step": 118, "total_loss": 0.02392578125 }, { "epoch": 0.05, "learning_rate": 0.00019998832001425672, "lm_loss": 0.02294921875, "loss": 0.0121, "step": 119, "total_loss": 0.02294921875 }, { "epoch": 0.05, "learning_rate": 0.0001999881228910781, "lm_loss": 0.0113525390625, "loss": 0.0116, "step": 120, "total_loss": 0.0113525390625 }, { "epoch": 0.05, "learning_rate": 0.00019998792411846425, "lm_loss": 0.005615234375, "loss": 0.0104, "step": 121, "total_loss": 0.005615234375 }, { "epoch": 0.05, "learning_rate": 0.00019998772369641851, "lm_loss": 0.01043701171875, "loss": 0.0103, "step": 122, "total_loss": 0.01043701171875 }, { "epoch": 0.05, "learning_rate": 0.0001999875216249442, "lm_loss": 0.0145263671875, "loss": 0.0137, "step": 123, "total_loss": 0.0145263671875 }, { "epoch": 0.05, "learning_rate": 0.00019998731790404458, "lm_loss": 0.019775390625, "loss": 0.0137, "step": 124, "total_loss": 0.019775390625 }, { "epoch": 0.05, "learning_rate": 0.00019998711253372306, "lm_loss": 0.00323486328125, "loss": 0.0121, "step": 125, "total_loss": 0.00323486328125 }, { "epoch": 0.05, "learning_rate": 0.00019998690551398306, "lm_loss": 0.0081787109375, "loss": 0.0132, "step": 126, "total_loss": 0.0081787109375 }, { "epoch": 0.05, "learning_rate": 0.00019998669684482791, "lm_loss": 0.009521484375, "loss": 0.0116, "step": 127, "total_loss": 0.009521484375 }, { "epoch": 0.05, "learning_rate": 0.0001999864865262611, "lm_loss": 0.00958251953125, "loss": 0.0113, "step": 128, "total_loss": 0.00958251953125 }, { "epoch": 0.05, "learning_rate": 0.00019998627455828613, "lm_loss": 0.00921630859375, "loss": 0.0095, "step": 129, "total_loss": 0.00921630859375 }, { "epoch": 0.05, "learning_rate": 0.00019998606094090646, "lm_loss": 0.018798828125, "loss": 0.0134, "step": 130, "total_loss": 0.018798828125 }, { "epoch": 0.05, "learning_rate": 0.0001999858456741256, "lm_loss": 0.0238037109375, "loss": 0.0128, "step": 131, "total_loss": 0.0238037109375 }, { "epoch": 0.05, "learning_rate": 0.00019998562875794712, "lm_loss": 0.01220703125, "loss": 0.0136, "step": 132, "total_loss": 0.01220703125 }, { "epoch": 0.05, "learning_rate": 0.0001999854101923746, "lm_loss": 0.0111083984375, "loss": 0.0109, "step": 133, "total_loss": 0.0111083984375 }, { "epoch": 0.05, "learning_rate": 0.00019998518997741167, "lm_loss": 0.011962890625, "loss": 0.0132, "step": 134, "total_loss": 0.011962890625 }, { "epoch": 0.06, "learning_rate": 0.0001999849681130619, "lm_loss": 0.0111083984375, "loss": 0.0127, "step": 135, "total_loss": 0.0111083984375 }, { "epoch": 0.06, "learning_rate": 0.00019998474459932903, "lm_loss": 0.01446533203125, "loss": 0.0124, "step": 136, "total_loss": 0.01446533203125 }, { "epoch": 0.06, "learning_rate": 0.00019998451943621664, "lm_loss": 0.0164794921875, "loss": 0.0119, "step": 137, "total_loss": 0.0164794921875 }, { "epoch": 0.06, "learning_rate": 0.00019998429262372858, "lm_loss": 0.005645751953125, "loss": 0.012, "step": 138, "total_loss": 0.005645751953125 }, { "epoch": 0.06, "learning_rate": 0.00019998406416186847, "lm_loss": 0.01458740234375, "loss": 0.011, "step": 139, "total_loss": 0.01458740234375 }, { "epoch": 0.06, "learning_rate": 0.00019998383405064014, "lm_loss": 0.019287109375, "loss": 0.0117, "step": 140, "total_loss": 0.019287109375 }, { "epoch": 0.06, "learning_rate": 0.00019998360229004738, "lm_loss": 0.01544189453125, "loss": 0.0112, "step": 141, "total_loss": 0.01544189453125 }, { "epoch": 0.06, "learning_rate": 0.00019998336888009402, "lm_loss": 0.00970458984375, "loss": 0.0133, "step": 142, "total_loss": 0.00970458984375 }, { "epoch": 0.06, "learning_rate": 0.00019998313382078387, "lm_loss": 0.0108642578125, "loss": 0.0112, "step": 143, "total_loss": 0.0108642578125 }, { "epoch": 0.06, "learning_rate": 0.00019998289711212083, "lm_loss": 0.0191650390625, "loss": 0.01, "step": 144, "total_loss": 0.0191650390625 }, { "epoch": 0.06, "learning_rate": 0.00019998265875410885, "lm_loss": 0.01556396484375, "loss": 0.0122, "step": 145, "total_loss": 0.01556396484375 }, { "epoch": 0.06, "learning_rate": 0.0001999824187467518, "lm_loss": 0.00921630859375, "loss": 0.0124, "step": 146, "total_loss": 0.00921630859375 }, { "epoch": 0.06, "learning_rate": 0.00019998217709005366, "lm_loss": 0.01409912109375, "loss": 0.0124, "step": 147, "total_loss": 0.01409912109375 }, { "epoch": 0.06, "learning_rate": 0.00019998193378401843, "lm_loss": 0.0201416015625, "loss": 0.0153, "step": 148, "total_loss": 0.0201416015625 }, { "epoch": 0.06, "learning_rate": 0.0001999816888286501, "lm_loss": 0.00885009765625, "loss": 0.0099, "step": 149, "total_loss": 0.00885009765625 }, { "epoch": 0.06, "learning_rate": 0.00019998144222395272, "lm_loss": 0.006866455078125, "loss": 0.0141, "step": 150, "total_loss": 0.006866455078125 }, { "epoch": 0.06, "learning_rate": 0.00019998119396993035, "lm_loss": 0.005706787109375, "loss": 0.0117, "step": 151, "total_loss": 0.005706787109375 }, { "epoch": 0.06, "learning_rate": 0.00019998094406658713, "lm_loss": 0.0084228515625, "loss": 0.0104, "step": 152, "total_loss": 0.0084228515625 }, { "epoch": 0.06, "learning_rate": 0.00019998069251392714, "lm_loss": 0.007415771484375, "loss": 0.0125, "step": 153, "total_loss": 0.007415771484375 }, { "epoch": 0.06, "learning_rate": 0.0001999804393119545, "lm_loss": 0.01068115234375, "loss": 0.0115, "step": 154, "total_loss": 0.01068115234375 }, { "epoch": 0.06, "learning_rate": 0.00019998018446067345, "lm_loss": 0.01007080078125, "loss": 0.012, "step": 155, "total_loss": 0.01007080078125 }, { "epoch": 0.06, "learning_rate": 0.0001999799279600882, "lm_loss": 0.0157470703125, "loss": 0.0115, "step": 156, "total_loss": 0.0157470703125 }, { "epoch": 0.06, "learning_rate": 0.0001999796698102029, "lm_loss": 0.010009765625, "loss": 0.0114, "step": 157, "total_loss": 0.010009765625 }, { "epoch": 0.06, "learning_rate": 0.00019997941001102188, "lm_loss": 0.0146484375, "loss": 0.0113, "step": 158, "total_loss": 0.0146484375 }, { "epoch": 0.07, "learning_rate": 0.00019997914856254942, "lm_loss": 0.01165771484375, "loss": 0.0115, "step": 159, "total_loss": 0.01165771484375 }, { "epoch": 0.07, "learning_rate": 0.0001999788854647898, "lm_loss": 0.00921630859375, "loss": 0.0128, "step": 160, "total_loss": 0.00921630859375 }, { "epoch": 0.07, "learning_rate": 0.00019997862071774737, "lm_loss": 0.00732421875, "loss": 0.0104, "step": 161, "total_loss": 0.00732421875 }, { "epoch": 0.07, "learning_rate": 0.00019997835432142654, "lm_loss": 0.01177978515625, "loss": 0.0117, "step": 162, "total_loss": 0.01177978515625 }, { "epoch": 0.07, "learning_rate": 0.00019997808627583163, "lm_loss": 0.0115966796875, "loss": 0.0128, "step": 163, "total_loss": 0.0115966796875 }, { "epoch": 0.07, "learning_rate": 0.0001999778165809671, "lm_loss": 0.00933837890625, "loss": 0.0099, "step": 164, "total_loss": 0.00933837890625 }, { "epoch": 0.07, "learning_rate": 0.00019997754523683745, "lm_loss": 0.0054931640625, "loss": 0.0117, "step": 165, "total_loss": 0.0054931640625 }, { "epoch": 0.07, "learning_rate": 0.00019997727224344708, "lm_loss": 0.014892578125, "loss": 0.0124, "step": 166, "total_loss": 0.014892578125 }, { "epoch": 0.07, "learning_rate": 0.00019997699760080048, "lm_loss": 0.01123046875, "loss": 0.0111, "step": 167, "total_loss": 0.01123046875 }, { "epoch": 0.07, "learning_rate": 0.0001999767213089023, "lm_loss": 0.01312255859375, "loss": 0.0114, "step": 168, "total_loss": 0.01312255859375 }, { "epoch": 0.07, "learning_rate": 0.00019997644336775693, "lm_loss": 0.009521484375, "loss": 0.014, "step": 169, "total_loss": 0.009521484375 }, { "epoch": 0.07, "learning_rate": 0.0001999761637773691, "lm_loss": 0.0115966796875, "loss": 0.011, "step": 170, "total_loss": 0.0115966796875 }, { "epoch": 0.07, "learning_rate": 0.00019997588253774335, "lm_loss": 0.007568359375, "loss": 0.0119, "step": 171, "total_loss": 0.007568359375 }, { "epoch": 0.07, "learning_rate": 0.00019997559964888434, "lm_loss": 0.0240478515625, "loss": 0.0146, "step": 172, "total_loss": 0.0240478515625 }, { "epoch": 0.07, "learning_rate": 0.00019997531511079672, "lm_loss": 0.01220703125, "loss": 0.012, "step": 173, "total_loss": 0.01220703125 }, { "epoch": 0.07, "learning_rate": 0.00019997502892348518, "lm_loss": 0.00372314453125, "loss": 0.012, "step": 174, "total_loss": 0.00372314453125 }, { "epoch": 0.07, "learning_rate": 0.00019997474108695448, "lm_loss": 0.0089111328125, "loss": 0.0096, "step": 175, "total_loss": 0.0089111328125 }, { "epoch": 0.07, "learning_rate": 0.00019997445160120937, "lm_loss": 0.004119873046875, "loss": 0.0093, "step": 176, "total_loss": 0.004119873046875 }, { "epoch": 0.07, "learning_rate": 0.00019997416046625458, "lm_loss": 0.00811767578125, "loss": 0.0111, "step": 177, "total_loss": 0.00811767578125 }, { "epoch": 0.07, "learning_rate": 0.00019997386768209492, "lm_loss": 0.016845703125, "loss": 0.014, "step": 178, "total_loss": 0.016845703125 }, { "epoch": 0.07, "learning_rate": 0.00019997357324873526, "lm_loss": 0.006378173828125, "loss": 0.011, "step": 179, "total_loss": 0.006378173828125 }, { "epoch": 0.07, "learning_rate": 0.00019997327716618038, "lm_loss": 0.0145263671875, "loss": 0.0136, "step": 180, "total_loss": 0.0145263671875 }, { "epoch": 0.07, "learning_rate": 0.00019997297943443525, "lm_loss": 0.015625, "loss": 0.0134, "step": 181, "total_loss": 0.015625 }, { "epoch": 0.07, "learning_rate": 0.00019997268005350472, "lm_loss": 0.01202392578125, "loss": 0.013, "step": 182, "total_loss": 0.01202392578125 }, { "epoch": 0.07, "learning_rate": 0.00019997237902339378, "lm_loss": 0.006744384765625, "loss": 0.0117, "step": 183, "total_loss": 0.006744384765625 }, { "epoch": 0.08, "learning_rate": 0.00019997207634410736, "lm_loss": 0.01177978515625, "loss": 0.0099, "step": 184, "total_loss": 0.01177978515625 }, { "epoch": 0.08, "learning_rate": 0.00019997177201565048, "lm_loss": 0.01171875, "loss": 0.0136, "step": 185, "total_loss": 0.01171875 }, { "epoch": 0.08, "learning_rate": 0.00019997146603802812, "lm_loss": 0.01226806640625, "loss": 0.0118, "step": 186, "total_loss": 0.01226806640625 }, { "epoch": 0.08, "learning_rate": 0.00019997115841124537, "lm_loss": 0.01226806640625, "loss": 0.0118, "step": 187, "total_loss": 0.01226806640625 }, { "epoch": 0.08, "learning_rate": 0.00019997084913530726, "lm_loss": 0.00970458984375, "loss": 0.0124, "step": 188, "total_loss": 0.00970458984375 }, { "epoch": 0.08, "learning_rate": 0.00019997053821021893, "lm_loss": 0.0091552734375, "loss": 0.011, "step": 189, "total_loss": 0.0091552734375 }, { "epoch": 0.08, "learning_rate": 0.0001999702256359855, "lm_loss": 0.00665283203125, "loss": 0.01, "step": 190, "total_loss": 0.00665283203125 }, { "epoch": 0.08, "learning_rate": 0.0001999699114126121, "lm_loss": 0.007598876953125, "loss": 0.0115, "step": 191, "total_loss": 0.007598876953125 }, { "epoch": 0.08, "learning_rate": 0.000199969595540104, "lm_loss": 0.006805419921875, "loss": 0.0117, "step": 192, "total_loss": 0.006805419921875 }, { "epoch": 0.08, "learning_rate": 0.0001999692780184663, "lm_loss": 0.005584716796875, "loss": 0.0108, "step": 193, "total_loss": 0.005584716796875 }, { "epoch": 0.08, "learning_rate": 0.00019996895884770427, "lm_loss": 0.01019287109375, "loss": 0.0105, "step": 194, "total_loss": 0.01019287109375 }, { "epoch": 0.08, "learning_rate": 0.00019996863802782322, "lm_loss": 0.01202392578125, "loss": 0.0092, "step": 195, "total_loss": 0.01202392578125 }, { "epoch": 0.08, "learning_rate": 0.0001999683155588284, "lm_loss": 0.01019287109375, "loss": 0.0126, "step": 196, "total_loss": 0.01019287109375 }, { "epoch": 0.08, "learning_rate": 0.00019996799144072515, "lm_loss": 0.0031890869140625, "loss": 0.0087, "step": 197, "total_loss": 0.0031890869140625 }, { "epoch": 0.08, "learning_rate": 0.0001999676656735188, "lm_loss": 0.0174560546875, "loss": 0.0104, "step": 198, "total_loss": 0.0174560546875 }, { "epoch": 0.08, "learning_rate": 0.00019996733825721475, "lm_loss": 0.0152587890625, "loss": 0.0099, "step": 199, "total_loss": 0.0152587890625 }, { "epoch": 0.08, "learning_rate": 0.0001999670091918184, "lm_loss": 0.0186767578125, "loss": 0.0124, "step": 200, "total_loss": 0.0186767578125 }, { "epoch": 0.08, "eval_lm_loss": 0.013095361180603504, "eval_loss": 0.013629074208438396, "eval_runtime": 43.92, "eval_samples_per_second": 22.769, "eval_steps_per_second": 0.205, "eval_total_loss": 0.013095361180603504, "lm_loss": 0.0010986328125, "step": 200, "total_loss": 0.0010986328125 }, { "epoch": 0.08, "learning_rate": 0.00019996667847733512, "lm_loss": 0.005615234375, "loss": 0.0107, "step": 201, "total_loss": 0.005615234375 }, { "epoch": 0.08, "learning_rate": 0.00019996634611377042, "lm_loss": 0.01007080078125, "loss": 0.0107, "step": 202, "total_loss": 0.01007080078125 }, { "epoch": 0.08, "learning_rate": 0.00019996601210112974, "lm_loss": 0.00860595703125, "loss": 0.0105, "step": 203, "total_loss": 0.00860595703125 }, { "epoch": 0.08, "learning_rate": 0.0001999656764394187, "lm_loss": 0.00445556640625, "loss": 0.0114, "step": 204, "total_loss": 0.00445556640625 }, { "epoch": 0.08, "learning_rate": 0.00019996533912864268, "lm_loss": 0.01007080078125, "loss": 0.0124, "step": 205, "total_loss": 0.01007080078125 }, { "epoch": 0.08, "learning_rate": 0.00019996500016880738, "lm_loss": 0.0155029296875, "loss": 0.0114, "step": 206, "total_loss": 0.0155029296875 }, { "epoch": 0.08, "learning_rate": 0.00019996465955991828, "lm_loss": 0.0079345703125, "loss": 0.0096, "step": 207, "total_loss": 0.0079345703125 }, { "epoch": 0.09, "learning_rate": 0.0001999643173019811, "lm_loss": 0.026123046875, "loss": 0.0127, "step": 208, "total_loss": 0.026123046875 }, { "epoch": 0.09, "learning_rate": 0.00019996397339500143, "lm_loss": 0.007415771484375, "loss": 0.0089, "step": 209, "total_loss": 0.007415771484375 }, { "epoch": 0.09, "learning_rate": 0.00019996362783898493, "lm_loss": 0.01416015625, "loss": 0.0129, "step": 210, "total_loss": 0.01416015625 }, { "epoch": 0.09, "learning_rate": 0.0001999632806339373, "lm_loss": 0.01531982421875, "loss": 0.0101, "step": 211, "total_loss": 0.01531982421875 }, { "epoch": 0.09, "learning_rate": 0.00019996293177986435, "lm_loss": 0.012939453125, "loss": 0.0077, "step": 212, "total_loss": 0.012939453125 }, { "epoch": 0.09, "learning_rate": 0.00019996258127677173, "lm_loss": 0.0106201171875, "loss": 0.0104, "step": 213, "total_loss": 0.0106201171875 }, { "epoch": 0.09, "learning_rate": 0.00019996222912466532, "lm_loss": 0.0087890625, "loss": 0.0099, "step": 214, "total_loss": 0.0087890625 }, { "epoch": 0.09, "learning_rate": 0.00019996187532355083, "lm_loss": 0.01336669921875, "loss": 0.0108, "step": 215, "total_loss": 0.01336669921875 }, { "epoch": 0.09, "learning_rate": 0.00019996151987343414, "lm_loss": 0.010986328125, "loss": 0.0105, "step": 216, "total_loss": 0.010986328125 }, { "epoch": 0.09, "learning_rate": 0.00019996116277432114, "lm_loss": 0.0101318359375, "loss": 0.0121, "step": 217, "total_loss": 0.0101318359375 }, { "epoch": 0.09, "learning_rate": 0.00019996080402621765, "lm_loss": 0.01104736328125, "loss": 0.0098, "step": 218, "total_loss": 0.01104736328125 }, { "epoch": 0.09, "learning_rate": 0.0001999604436291297, "lm_loss": 0.00860595703125, "loss": 0.0114, "step": 219, "total_loss": 0.00860595703125 }, { "epoch": 0.09, "learning_rate": 0.00019996008158306312, "lm_loss": 0.01031494140625, "loss": 0.0117, "step": 220, "total_loss": 0.01031494140625 }, { "epoch": 0.09, "learning_rate": 0.00019995971788802397, "lm_loss": 0.01025390625, "loss": 0.012, "step": 221, "total_loss": 0.01025390625 }, { "epoch": 0.09, "learning_rate": 0.0001999593525440182, "lm_loss": 0.00811767578125, "loss": 0.0128, "step": 222, "total_loss": 0.00811767578125 }, { "epoch": 0.09, "learning_rate": 0.00019995898555105185, "lm_loss": 0.01397705078125, "loss": 0.0104, "step": 223, "total_loss": 0.01397705078125 }, { "epoch": 0.09, "learning_rate": 0.00019995861690913093, "lm_loss": 0.0140380859375, "loss": 0.013, "step": 224, "total_loss": 0.0140380859375 }, { "epoch": 0.09, "learning_rate": 0.00019995824661826162, "lm_loss": 0.0191650390625, "loss": 0.0129, "step": 225, "total_loss": 0.0191650390625 }, { "epoch": 0.09, "learning_rate": 0.00019995787467844996, "lm_loss": 0.0159912109375, "loss": 0.011, "step": 226, "total_loss": 0.0159912109375 }, { "epoch": 0.09, "learning_rate": 0.00019995750108970208, "lm_loss": 0.01092529296875, "loss": 0.0112, "step": 227, "total_loss": 0.01092529296875 }, { "epoch": 0.09, "learning_rate": 0.00019995712585202418, "lm_loss": 0.009521484375, "loss": 0.0102, "step": 228, "total_loss": 0.009521484375 }, { "epoch": 0.09, "learning_rate": 0.00019995674896542242, "lm_loss": 0.006134033203125, "loss": 0.0102, "step": 229, "total_loss": 0.006134033203125 }, { "epoch": 0.09, "learning_rate": 0.00019995637042990303, "lm_loss": 0.00872802734375, "loss": 0.0124, "step": 230, "total_loss": 0.00872802734375 }, { "epoch": 0.09, "learning_rate": 0.00019995599024547224, "lm_loss": 0.00909423828125, "loss": 0.0113, "step": 231, "total_loss": 0.00909423828125 }, { "epoch": 0.09, "learning_rate": 0.00019995560841213635, "lm_loss": 0.0072021484375, "loss": 0.01, "step": 232, "total_loss": 0.0072021484375 }, { "epoch": 0.1, "learning_rate": 0.00019995522492990167, "lm_loss": 0.00439453125, "loss": 0.0118, "step": 233, "total_loss": 0.00439453125 }, { "epoch": 0.1, "learning_rate": 0.00019995483979877446, "lm_loss": 0.007476806640625, "loss": 0.0109, "step": 234, "total_loss": 0.007476806640625 }, { "epoch": 0.1, "learning_rate": 0.0001999544530187611, "lm_loss": 0.0067138671875, "loss": 0.0128, "step": 235, "total_loss": 0.0067138671875 }, { "epoch": 0.1, "learning_rate": 0.000199954064589868, "lm_loss": 0.010986328125, "loss": 0.0113, "step": 236, "total_loss": 0.010986328125 }, { "epoch": 0.1, "learning_rate": 0.00019995367451210156, "lm_loss": 0.00518798828125, "loss": 0.0113, "step": 237, "total_loss": 0.00518798828125 }, { "epoch": 0.1, "learning_rate": 0.00019995328278546822, "lm_loss": 0.006011962890625, "loss": 0.0111, "step": 238, "total_loss": 0.006011962890625 }, { "epoch": 0.1, "learning_rate": 0.00019995288940997442, "lm_loss": 0.01171875, "loss": 0.0096, "step": 239, "total_loss": 0.01171875 }, { "epoch": 0.1, "learning_rate": 0.00019995249438562663, "lm_loss": 0.0177001953125, "loss": 0.0118, "step": 240, "total_loss": 0.0177001953125 }, { "epoch": 0.1, "learning_rate": 0.0001999520977124314, "lm_loss": 0.01287841796875, "loss": 0.0106, "step": 241, "total_loss": 0.01287841796875 }, { "epoch": 0.1, "learning_rate": 0.00019995169939039529, "lm_loss": 0.012939453125, "loss": 0.0092, "step": 242, "total_loss": 0.012939453125 }, { "epoch": 0.1, "learning_rate": 0.00019995129941952482, "lm_loss": 0.01007080078125, "loss": 0.0105, "step": 243, "total_loss": 0.01007080078125 }, { "epoch": 0.1, "learning_rate": 0.00019995089779982665, "lm_loss": 0.01708984375, "loss": 0.0148, "step": 244, "total_loss": 0.01708984375 }, { "epoch": 0.1, "learning_rate": 0.00019995049453130735, "lm_loss": 0.01043701171875, "loss": 0.0142, "step": 245, "total_loss": 0.01043701171875 }, { "epoch": 0.1, "learning_rate": 0.00019995008961397356, "lm_loss": 0.01275634765625, "loss": 0.0102, "step": 246, "total_loss": 0.01275634765625 }, { "epoch": 0.1, "learning_rate": 0.00019994968304783205, "lm_loss": 0.015625, "loss": 0.0114, "step": 247, "total_loss": 0.015625 }, { "epoch": 0.1, "learning_rate": 0.00019994927483288946, "lm_loss": 0.011474609375, "loss": 0.0109, "step": 248, "total_loss": 0.011474609375 }, { "epoch": 0.1, "learning_rate": 0.00019994886496915251, "lm_loss": 0.006011962890625, "loss": 0.0113, "step": 249, "total_loss": 0.006011962890625 }, { "epoch": 0.1, "learning_rate": 0.000199948453456628, "lm_loss": 0.007720947265625, "loss": 0.0118, "step": 250, "total_loss": 0.007720947265625 }, { "epoch": 0.1, "learning_rate": 0.0001999480402953227, "lm_loss": 0.01251220703125, "loss": 0.0109, "step": 251, "total_loss": 0.01251220703125 }, { "epoch": 0.1, "learning_rate": 0.00019994762548524342, "lm_loss": 0.0244140625, "loss": 0.0129, "step": 252, "total_loss": 0.0244140625 }, { "epoch": 0.1, "learning_rate": 0.000199947209026397, "lm_loss": 0.02001953125, "loss": 0.0118, "step": 253, "total_loss": 0.02001953125 }, { "epoch": 0.1, "learning_rate": 0.00019994679091879037, "lm_loss": 0.01080322265625, "loss": 0.0111, "step": 254, "total_loss": 0.01080322265625 }, { "epoch": 0.1, "learning_rate": 0.00019994637116243033, "lm_loss": 0.0059814453125, "loss": 0.0107, "step": 255, "total_loss": 0.0059814453125 }, { "epoch": 0.1, "learning_rate": 0.00019994594975732388, "lm_loss": 0.01300048828125, "loss": 0.0144, "step": 256, "total_loss": 0.01300048828125 }, { "epoch": 0.11, "learning_rate": 0.00019994552670347794, "lm_loss": 0.0096435546875, "loss": 0.0112, "step": 257, "total_loss": 0.0096435546875 }, { "epoch": 0.11, "learning_rate": 0.00019994510200089952, "lm_loss": 0.006072998046875, "loss": 0.0079, "step": 258, "total_loss": 0.006072998046875 }, { "epoch": 0.11, "learning_rate": 0.00019994467564959557, "lm_loss": 0.01275634765625, "loss": 0.0094, "step": 259, "total_loss": 0.01275634765625 }, { "epoch": 0.11, "learning_rate": 0.00019994424764957315, "lm_loss": 0.0079345703125, "loss": 0.0108, "step": 260, "total_loss": 0.0079345703125 }, { "epoch": 0.11, "learning_rate": 0.00019994381800083933, "lm_loss": 0.00933837890625, "loss": 0.0108, "step": 261, "total_loss": 0.00933837890625 }, { "epoch": 0.11, "learning_rate": 0.00019994338670340121, "lm_loss": 0.00579833984375, "loss": 0.0111, "step": 262, "total_loss": 0.00579833984375 }, { "epoch": 0.11, "learning_rate": 0.00019994295375726586, "lm_loss": 0.00592041015625, "loss": 0.0138, "step": 263, "total_loss": 0.00592041015625 }, { "epoch": 0.11, "learning_rate": 0.00019994251916244043, "lm_loss": 0.0115966796875, "loss": 0.0091, "step": 264, "total_loss": 0.0115966796875 }, { "epoch": 0.11, "learning_rate": 0.00019994208291893216, "lm_loss": 0.01043701171875, "loss": 0.0126, "step": 265, "total_loss": 0.01043701171875 }, { "epoch": 0.11, "learning_rate": 0.00019994164502674816, "lm_loss": 0.007080078125, "loss": 0.0123, "step": 266, "total_loss": 0.007080078125 }, { "epoch": 0.11, "learning_rate": 0.0001999412054858957, "lm_loss": 0.0159912109375, "loss": 0.0139, "step": 267, "total_loss": 0.0159912109375 }, { "epoch": 0.11, "learning_rate": 0.00019994076429638203, "lm_loss": 0.01556396484375, "loss": 0.0138, "step": 268, "total_loss": 0.01556396484375 }, { "epoch": 0.11, "learning_rate": 0.0001999403214582144, "lm_loss": 0.01263427734375, "loss": 0.0106, "step": 269, "total_loss": 0.01263427734375 }, { "epoch": 0.11, "learning_rate": 0.00019993987697140012, "lm_loss": 0.01513671875, "loss": 0.0109, "step": 270, "total_loss": 0.01513671875 }, { "epoch": 0.11, "learning_rate": 0.00019993943083594656, "lm_loss": 0.00872802734375, "loss": 0.0099, "step": 271, "total_loss": 0.00872802734375 }, { "epoch": 0.11, "learning_rate": 0.00019993898305186103, "lm_loss": 0.0089111328125, "loss": 0.0129, "step": 272, "total_loss": 0.0089111328125 }, { "epoch": 0.11, "learning_rate": 0.00019993853361915096, "lm_loss": 0.0167236328125, "loss": 0.0125, "step": 273, "total_loss": 0.0167236328125 }, { "epoch": 0.11, "learning_rate": 0.00019993808253782373, "lm_loss": 0.0074462890625, "loss": 0.011, "step": 274, "total_loss": 0.0074462890625 }, { "epoch": 0.11, "learning_rate": 0.00019993762980788683, "lm_loss": 0.00927734375, "loss": 0.01, "step": 275, "total_loss": 0.00927734375 }, { "epoch": 0.11, "learning_rate": 0.0001999371754293476, "lm_loss": 0.0084228515625, "loss": 0.0085, "step": 276, "total_loss": 0.0084228515625 }, { "epoch": 0.11, "learning_rate": 0.00019993671940221375, "lm_loss": 0.0167236328125, "loss": 0.0117, "step": 277, "total_loss": 0.0167236328125 }, { "epoch": 0.11, "learning_rate": 0.0001999362617264926, "lm_loss": 0.00872802734375, "loss": 0.0106, "step": 278, "total_loss": 0.00872802734375 }, { "epoch": 0.11, "learning_rate": 0.00019993580240219183, "lm_loss": 0.01507568359375, "loss": 0.0118, "step": 279, "total_loss": 0.01507568359375 }, { "epoch": 0.11, "learning_rate": 0.00019993534142931894, "lm_loss": 0.013671875, "loss": 0.0119, "step": 280, "total_loss": 0.013671875 }, { "epoch": 0.11, "learning_rate": 0.00019993487880788156, "lm_loss": 0.0087890625, "loss": 0.0095, "step": 281, "total_loss": 0.0087890625 }, { "epoch": 0.12, "learning_rate": 0.00019993441453788736, "lm_loss": 0.0086669921875, "loss": 0.0101, "step": 282, "total_loss": 0.0086669921875 }, { "epoch": 0.12, "learning_rate": 0.00019993394861934392, "lm_loss": 0.01123046875, "loss": 0.0122, "step": 283, "total_loss": 0.01123046875 }, { "epoch": 0.12, "learning_rate": 0.000199933481052259, "lm_loss": 0.00689697265625, "loss": 0.0117, "step": 284, "total_loss": 0.00689697265625 }, { "epoch": 0.12, "learning_rate": 0.00019993301183664027, "lm_loss": 0.01220703125, "loss": 0.0097, "step": 285, "total_loss": 0.01220703125 }, { "epoch": 0.12, "learning_rate": 0.00019993254097249547, "lm_loss": 0.010986328125, "loss": 0.0132, "step": 286, "total_loss": 0.010986328125 }, { "epoch": 0.12, "learning_rate": 0.00019993206845983244, "lm_loss": 0.005767822265625, "loss": 0.0101, "step": 287, "total_loss": 0.005767822265625 }, { "epoch": 0.12, "learning_rate": 0.00019993159429865888, "lm_loss": 0.01116943359375, "loss": 0.0113, "step": 288, "total_loss": 0.01116943359375 }, { "epoch": 0.12, "learning_rate": 0.00019993111848898263, "lm_loss": 0.01007080078125, "loss": 0.0098, "step": 289, "total_loss": 0.01007080078125 }, { "epoch": 0.12, "learning_rate": 0.00019993064103081154, "lm_loss": 0.012939453125, "loss": 0.0136, "step": 290, "total_loss": 0.012939453125 }, { "epoch": 0.12, "learning_rate": 0.00019993016192415355, "lm_loss": 0.00811767578125, "loss": 0.0115, "step": 291, "total_loss": 0.00811767578125 }, { "epoch": 0.12, "learning_rate": 0.00019992968116901649, "lm_loss": 0.0103759765625, "loss": 0.0095, "step": 292, "total_loss": 0.0103759765625 }, { "epoch": 0.12, "learning_rate": 0.00019992919876540832, "lm_loss": 0.00921630859375, "loss": 0.0086, "step": 293, "total_loss": 0.00921630859375 }, { "epoch": 0.12, "learning_rate": 0.000199928714713337, "lm_loss": 0.006500244140625, "loss": 0.0098, "step": 294, "total_loss": 0.006500244140625 }, { "epoch": 0.12, "learning_rate": 0.00019992822901281052, "lm_loss": 0.0113525390625, "loss": 0.0115, "step": 295, "total_loss": 0.0113525390625 }, { "epoch": 0.12, "learning_rate": 0.00019992774166383684, "lm_loss": 0.01177978515625, "loss": 0.0091, "step": 296, "total_loss": 0.01177978515625 }, { "epoch": 0.12, "learning_rate": 0.0001999272526664241, "lm_loss": 0.01190185546875, "loss": 0.0107, "step": 297, "total_loss": 0.01190185546875 }, { "epoch": 0.12, "learning_rate": 0.0001999267620205803, "lm_loss": 0.002960205078125, "loss": 0.0096, "step": 298, "total_loss": 0.002960205078125 }, { "epoch": 0.12, "learning_rate": 0.0001999262697263135, "lm_loss": 0.006103515625, "loss": 0.0114, "step": 299, "total_loss": 0.006103515625 }, { "epoch": 0.12, "learning_rate": 0.00019992577578363188, "lm_loss": 0.00579833984375, "loss": 0.0099, "step": 300, "total_loss": 0.00579833984375 }, { "epoch": 0.12, "eval_lm_loss": 0.012582213617861271, "eval_loss": 0.013147125020623207, "eval_runtime": 44.0148, "eval_samples_per_second": 22.72, "eval_steps_per_second": 0.204, "eval_total_loss": 0.012582213617861271, "lm_loss": 0.0034637451171875, "step": 300, "total_loss": 0.0034637451171875 }, { "epoch": 0.12, "learning_rate": 0.00019992528019254359, "lm_loss": 0.01202392578125, "loss": 0.0096, "step": 301, "total_loss": 0.01202392578125 }, { "epoch": 0.12, "learning_rate": 0.00019992478295305678, "lm_loss": 0.01104736328125, "loss": 0.0106, "step": 302, "total_loss": 0.01104736328125 }, { "epoch": 0.12, "learning_rate": 0.00019992428406517964, "lm_loss": 0.015625, "loss": 0.0097, "step": 303, "total_loss": 0.015625 }, { "epoch": 0.12, "learning_rate": 0.00019992378352892045, "lm_loss": 0.006256103515625, "loss": 0.0088, "step": 304, "total_loss": 0.006256103515625 }, { "epoch": 0.12, "learning_rate": 0.00019992328134428743, "lm_loss": 0.007659912109375, "loss": 0.0111, "step": 305, "total_loss": 0.007659912109375 }, { "epoch": 0.13, "learning_rate": 0.00019992277751128886, "lm_loss": 0.007720947265625, "loss": 0.0082, "step": 306, "total_loss": 0.007720947265625 }, { "epoch": 0.13, "learning_rate": 0.00019992227202993306, "lm_loss": 0.010986328125, "loss": 0.0118, "step": 307, "total_loss": 0.010986328125 }, { "epoch": 0.13, "learning_rate": 0.00019992176490022835, "lm_loss": 0.00616455078125, "loss": 0.0125, "step": 308, "total_loss": 0.00616455078125 }, { "epoch": 0.13, "learning_rate": 0.00019992125612218315, "lm_loss": 0.0230712890625, "loss": 0.0107, "step": 309, "total_loss": 0.0230712890625 }, { "epoch": 0.13, "learning_rate": 0.0001999207456958058, "lm_loss": 0.01141357421875, "loss": 0.0093, "step": 310, "total_loss": 0.01141357421875 }, { "epoch": 0.13, "learning_rate": 0.00019992023362110474, "lm_loss": 0.0146484375, "loss": 0.011, "step": 311, "total_loss": 0.0146484375 }, { "epoch": 0.13, "learning_rate": 0.00019991971989808844, "lm_loss": 0.008544921875, "loss": 0.0102, "step": 312, "total_loss": 0.008544921875 }, { "epoch": 0.13, "learning_rate": 0.00019991920452676535, "lm_loss": 0.01092529296875, "loss": 0.0126, "step": 313, "total_loss": 0.01092529296875 }, { "epoch": 0.13, "learning_rate": 0.00019991868750714395, "lm_loss": 0.0074462890625, "loss": 0.011, "step": 314, "total_loss": 0.0074462890625 }, { "epoch": 0.13, "learning_rate": 0.00019991816883923282, "lm_loss": 0.010986328125, "loss": 0.0093, "step": 315, "total_loss": 0.010986328125 }, { "epoch": 0.13, "learning_rate": 0.00019991764852304048, "lm_loss": 0.0115966796875, "loss": 0.0091, "step": 316, "total_loss": 0.0115966796875 }, { "epoch": 0.13, "learning_rate": 0.00019991712655857547, "lm_loss": 0.0087890625, "loss": 0.0112, "step": 317, "total_loss": 0.0087890625 }, { "epoch": 0.13, "learning_rate": 0.0001999166029458465, "lm_loss": 0.00567626953125, "loss": 0.0118, "step": 318, "total_loss": 0.00567626953125 }, { "epoch": 0.13, "learning_rate": 0.0001999160776848622, "lm_loss": 0.00982666015625, "loss": 0.0096, "step": 319, "total_loss": 0.00982666015625 }, { "epoch": 0.13, "learning_rate": 0.0001999155507756311, "lm_loss": 0.016845703125, "loss": 0.0093, "step": 320, "total_loss": 0.016845703125 }, { "epoch": 0.13, "learning_rate": 0.00019991502221816204, "lm_loss": 0.005706787109375, "loss": 0.0096, "step": 321, "total_loss": 0.005706787109375 }, { "epoch": 0.13, "learning_rate": 0.00019991449201246369, "lm_loss": 0.01300048828125, "loss": 0.0105, "step": 322, "total_loss": 0.01300048828125 }, { "epoch": 0.13, "learning_rate": 0.0001999139601585448, "lm_loss": 0.01116943359375, "loss": 0.0117, "step": 323, "total_loss": 0.01116943359375 }, { "epoch": 0.13, "learning_rate": 0.00019991342665641408, "lm_loss": 0.0048828125, "loss": 0.0082, "step": 324, "total_loss": 0.0048828125 }, { "epoch": 0.13, "learning_rate": 0.00019991289150608044, "lm_loss": 0.0089111328125, "loss": 0.0125, "step": 325, "total_loss": 0.0089111328125 }, { "epoch": 0.13, "learning_rate": 0.00019991235470755262, "lm_loss": 0.0106201171875, "loss": 0.01, "step": 326, "total_loss": 0.0106201171875 }, { "epoch": 0.13, "learning_rate": 0.00019991181626083954, "lm_loss": 0.006927490234375, "loss": 0.0099, "step": 327, "total_loss": 0.006927490234375 }, { "epoch": 0.13, "learning_rate": 0.00019991127616595004, "lm_loss": 0.005157470703125, "loss": 0.0116, "step": 328, "total_loss": 0.005157470703125 }, { "epoch": 0.13, "learning_rate": 0.00019991073442289303, "lm_loss": 0.01165771484375, "loss": 0.0098, "step": 329, "total_loss": 0.01165771484375 }, { "epoch": 0.13, "learning_rate": 0.00019991019103167748, "lm_loss": 0.01422119140625, "loss": 0.0108, "step": 330, "total_loss": 0.01422119140625 }, { "epoch": 0.14, "learning_rate": 0.0001999096459923123, "lm_loss": 0.0086669921875, "loss": 0.0098, "step": 331, "total_loss": 0.0086669921875 }, { "epoch": 0.14, "learning_rate": 0.00019990909930480658, "lm_loss": 0.01361083984375, "loss": 0.0093, "step": 332, "total_loss": 0.01361083984375 }, { "epoch": 0.14, "learning_rate": 0.0001999085509691692, "lm_loss": 0.003814697265625, "loss": 0.0104, "step": 333, "total_loss": 0.003814697265625 }, { "epoch": 0.14, "learning_rate": 0.00019990800098540932, "lm_loss": 0.01531982421875, "loss": 0.0093, "step": 334, "total_loss": 0.01531982421875 }, { "epoch": 0.14, "learning_rate": 0.00019990744935353597, "lm_loss": 0.0057373046875, "loss": 0.0089, "step": 335, "total_loss": 0.0057373046875 }, { "epoch": 0.14, "learning_rate": 0.0001999068960735582, "lm_loss": 0.01300048828125, "loss": 0.0093, "step": 336, "total_loss": 0.01300048828125 }, { "epoch": 0.14, "learning_rate": 0.00019990634114548525, "lm_loss": 0.0211181640625, "loss": 0.0109, "step": 337, "total_loss": 0.0211181640625 }, { "epoch": 0.14, "learning_rate": 0.00019990578456932618, "lm_loss": 0.004241943359375, "loss": 0.0108, "step": 338, "total_loss": 0.004241943359375 }, { "epoch": 0.14, "learning_rate": 0.0001999052263450902, "lm_loss": 0.0113525390625, "loss": 0.0101, "step": 339, "total_loss": 0.0113525390625 }, { "epoch": 0.14, "learning_rate": 0.0001999046664727865, "lm_loss": 0.01483154296875, "loss": 0.0101, "step": 340, "total_loss": 0.01483154296875 }, { "epoch": 0.14, "learning_rate": 0.0001999041049524244, "lm_loss": 0.0125732421875, "loss": 0.0105, "step": 341, "total_loss": 0.0125732421875 }, { "epoch": 0.14, "learning_rate": 0.00019990354178401308, "lm_loss": 0.008056640625, "loss": 0.0086, "step": 342, "total_loss": 0.008056640625 }, { "epoch": 0.14, "learning_rate": 0.00019990297696756182, "lm_loss": 0.00933837890625, "loss": 0.0109, "step": 343, "total_loss": 0.00933837890625 }, { "epoch": 0.14, "learning_rate": 0.00019990241050307997, "lm_loss": 0.0047607421875, "loss": 0.0127, "step": 344, "total_loss": 0.0047607421875 }, { "epoch": 0.14, "learning_rate": 0.00019990184239057689, "lm_loss": 0.01068115234375, "loss": 0.0102, "step": 345, "total_loss": 0.01068115234375 }, { "epoch": 0.14, "learning_rate": 0.00019990127263006194, "lm_loss": 0.01470947265625, "loss": 0.0111, "step": 346, "total_loss": 0.01470947265625 }, { "epoch": 0.14, "learning_rate": 0.0001999007012215445, "lm_loss": 0.01129150390625, "loss": 0.0119, "step": 347, "total_loss": 0.01129150390625 }, { "epoch": 0.14, "learning_rate": 0.00019990012816503404, "lm_loss": 0.0087890625, "loss": 0.0102, "step": 348, "total_loss": 0.0087890625 }, { "epoch": 0.14, "learning_rate": 0.00019989955346053995, "lm_loss": 0.01373291015625, "loss": 0.01, "step": 349, "total_loss": 0.01373291015625 }, { "epoch": 0.14, "learning_rate": 0.00019989897710807176, "lm_loss": 0.01397705078125, "loss": 0.0098, "step": 350, "total_loss": 0.01397705078125 }, { "epoch": 0.14, "learning_rate": 0.00019989839910763892, "lm_loss": 0.0169677734375, "loss": 0.0115, "step": 351, "total_loss": 0.0169677734375 }, { "epoch": 0.14, "learning_rate": 0.00019989781945925104, "lm_loss": 0.007354736328125, "loss": 0.0113, "step": 352, "total_loss": 0.007354736328125 }, { "epoch": 0.14, "learning_rate": 0.00019989723816291765, "lm_loss": 0.006591796875, "loss": 0.0078, "step": 353, "total_loss": 0.006591796875 }, { "epoch": 0.14, "learning_rate": 0.0001998966552186483, "lm_loss": 0.01287841796875, "loss": 0.0121, "step": 354, "total_loss": 0.01287841796875 }, { "epoch": 0.15, "learning_rate": 0.0001998960706264527, "lm_loss": 0.007537841796875, "loss": 0.0102, "step": 355, "total_loss": 0.007537841796875 }, { "epoch": 0.15, "learning_rate": 0.0001998954843863404, "lm_loss": 0.0191650390625, "loss": 0.0118, "step": 356, "total_loss": 0.0191650390625 }, { "epoch": 0.15, "learning_rate": 0.0001998948964983211, "lm_loss": 0.00970458984375, "loss": 0.01, "step": 357, "total_loss": 0.00970458984375 }, { "epoch": 0.15, "learning_rate": 0.00019989430696240454, "lm_loss": 0.0078125, "loss": 0.0107, "step": 358, "total_loss": 0.0078125 }, { "epoch": 0.15, "learning_rate": 0.0001998937157786004, "lm_loss": 0.00872802734375, "loss": 0.011, "step": 359, "total_loss": 0.00872802734375 }, { "epoch": 0.15, "learning_rate": 0.00019989312294691844, "lm_loss": 0.00897216796875, "loss": 0.0112, "step": 360, "total_loss": 0.00897216796875 }, { "epoch": 0.15, "learning_rate": 0.00019989252846736846, "lm_loss": 0.005218505859375, "loss": 0.0099, "step": 361, "total_loss": 0.005218505859375 }, { "epoch": 0.15, "learning_rate": 0.00019989193233996023, "lm_loss": 0.0103759765625, "loss": 0.009, "step": 362, "total_loss": 0.0103759765625 }, { "epoch": 0.15, "learning_rate": 0.00019989133456470358, "lm_loss": 0.01092529296875, "loss": 0.0109, "step": 363, "total_loss": 0.01092529296875 }, { "epoch": 0.15, "learning_rate": 0.00019989073514160843, "lm_loss": 0.01031494140625, "loss": 0.0097, "step": 364, "total_loss": 0.01031494140625 }, { "epoch": 0.15, "learning_rate": 0.00019989013407068463, "lm_loss": 0.0086669921875, "loss": 0.0121, "step": 365, "total_loss": 0.0086669921875 }, { "epoch": 0.15, "learning_rate": 0.00019988953135194207, "lm_loss": 0.00396728515625, "loss": 0.0097, "step": 366, "total_loss": 0.00396728515625 }, { "epoch": 0.15, "learning_rate": 0.00019988892698539077, "lm_loss": 0.01348876953125, "loss": 0.012, "step": 367, "total_loss": 0.01348876953125 }, { "epoch": 0.15, "learning_rate": 0.00019988832097104063, "lm_loss": 0.006378173828125, "loss": 0.0116, "step": 368, "total_loss": 0.006378173828125 }, { "epoch": 0.15, "learning_rate": 0.00019988771330890166, "lm_loss": 0.0040283203125, "loss": 0.0078, "step": 369, "total_loss": 0.0040283203125 }, { "epoch": 0.15, "learning_rate": 0.0001998871039989839, "lm_loss": 0.004486083984375, "loss": 0.0091, "step": 370, "total_loss": 0.004486083984375 }, { "epoch": 0.15, "learning_rate": 0.00019988649304129735, "lm_loss": 0.00494384765625, "loss": 0.0098, "step": 371, "total_loss": 0.00494384765625 }, { "epoch": 0.15, "learning_rate": 0.0001998858804358522, "lm_loss": 0.0126953125, "loss": 0.0106, "step": 372, "total_loss": 0.0126953125 }, { "epoch": 0.15, "learning_rate": 0.00019988526618265845, "lm_loss": 0.0128173828125, "loss": 0.0098, "step": 373, "total_loss": 0.0128173828125 }, { "epoch": 0.15, "learning_rate": 0.00019988465028172625, "lm_loss": 0.012939453125, "loss": 0.0114, "step": 374, "total_loss": 0.012939453125 }, { "epoch": 0.15, "learning_rate": 0.00019988403273306582, "lm_loss": 0.0169677734375, "loss": 0.0125, "step": 375, "total_loss": 0.0169677734375 }, { "epoch": 0.15, "learning_rate": 0.0001998834135366873, "lm_loss": 0.010498046875, "loss": 0.0098, "step": 376, "total_loss": 0.010498046875 }, { "epoch": 0.15, "learning_rate": 0.00019988279269260087, "lm_loss": 0.00946044921875, "loss": 0.0111, "step": 377, "total_loss": 0.00946044921875 }, { "epoch": 0.15, "learning_rate": 0.00019988217020081687, "lm_loss": 0.00958251953125, "loss": 0.0097, "step": 378, "total_loss": 0.00958251953125 }, { "epoch": 0.15, "learning_rate": 0.0001998815460613455, "lm_loss": 0.0167236328125, "loss": 0.0129, "step": 379, "total_loss": 0.0167236328125 }, { "epoch": 0.16, "learning_rate": 0.00019988092027419706, "lm_loss": 0.0089111328125, "loss": 0.012, "step": 380, "total_loss": 0.0089111328125 }, { "epoch": 0.16, "learning_rate": 0.00019988029283938188, "lm_loss": 0.010986328125, "loss": 0.0111, "step": 381, "total_loss": 0.010986328125 }, { "epoch": 0.16, "learning_rate": 0.00019987966375691033, "lm_loss": 0.00811767578125, "loss": 0.0096, "step": 382, "total_loss": 0.00811767578125 }, { "epoch": 0.16, "learning_rate": 0.00019987903302679273, "lm_loss": 0.007049560546875, "loss": 0.012, "step": 383, "total_loss": 0.007049560546875 }, { "epoch": 0.16, "learning_rate": 0.00019987840064903957, "lm_loss": 0.00653076171875, "loss": 0.009, "step": 384, "total_loss": 0.00653076171875 }, { "epoch": 0.16, "learning_rate": 0.00019987776662366122, "lm_loss": 0.01165771484375, "loss": 0.0095, "step": 385, "total_loss": 0.01165771484375 }, { "epoch": 0.16, "learning_rate": 0.00019987713095066814, "lm_loss": 0.0050048828125, "loss": 0.0078, "step": 386, "total_loss": 0.0050048828125 }, { "epoch": 0.16, "learning_rate": 0.00019987649363007086, "lm_loss": 0.01318359375, "loss": 0.0098, "step": 387, "total_loss": 0.01318359375 }, { "epoch": 0.16, "learning_rate": 0.00019987585466187983, "lm_loss": 0.0098876953125, "loss": 0.0115, "step": 388, "total_loss": 0.0098876953125 }, { "epoch": 0.16, "learning_rate": 0.00019987521404610565, "lm_loss": 0.01446533203125, "loss": 0.0105, "step": 389, "total_loss": 0.01446533203125 }, { "epoch": 0.16, "learning_rate": 0.00019987457178275887, "lm_loss": 0.006378173828125, "loss": 0.0122, "step": 390, "total_loss": 0.006378173828125 }, { "epoch": 0.16, "learning_rate": 0.00019987392787185006, "lm_loss": 0.01361083984375, "loss": 0.0125, "step": 391, "total_loss": 0.01361083984375 }, { "epoch": 0.16, "learning_rate": 0.00019987328231338988, "lm_loss": 0.004150390625, "loss": 0.0108, "step": 392, "total_loss": 0.004150390625 }, { "epoch": 0.16, "learning_rate": 0.00019987263510738893, "lm_loss": 0.00701904296875, "loss": 0.0091, "step": 393, "total_loss": 0.00701904296875 }, { "epoch": 0.16, "learning_rate": 0.00019987198625385794, "lm_loss": 0.01104736328125, "loss": 0.0114, "step": 394, "total_loss": 0.01104736328125 }, { "epoch": 0.16, "learning_rate": 0.0001998713357528076, "lm_loss": 0.004547119140625, "loss": 0.0086, "step": 395, "total_loss": 0.004547119140625 }, { "epoch": 0.16, "learning_rate": 0.00019987068360424862, "lm_loss": 0.0159912109375, "loss": 0.0132, "step": 396, "total_loss": 0.0159912109375 }, { "epoch": 0.16, "learning_rate": 0.00019987002980819177, "lm_loss": 0.01409912109375, "loss": 0.0097, "step": 397, "total_loss": 0.01409912109375 }, { "epoch": 0.16, "learning_rate": 0.00019986937436464782, "lm_loss": 0.01611328125, "loss": 0.0117, "step": 398, "total_loss": 0.01611328125 }, { "epoch": 0.16, "learning_rate": 0.00019986871727362758, "lm_loss": 0.007415771484375, "loss": 0.009, "step": 399, "total_loss": 0.007415771484375 }, { "epoch": 0.16, "learning_rate": 0.00019986805853514198, "lm_loss": 0.004180908203125, "loss": 0.0089, "step": 400, "total_loss": 0.004180908203125 }, { "epoch": 0.16, "eval_lm_loss": 0.01214557234197855, "eval_loss": 0.012592697516083717, "eval_runtime": 44.0487, "eval_samples_per_second": 22.702, "eval_steps_per_second": 0.204, "eval_total_loss": 0.01214557234197855, "lm_loss": 0.0012359619140625, "step": 400, "total_loss": 0.0012359619140625 }, { "epoch": 0.16, "learning_rate": 0.00019986739814920173, "lm_loss": 0.01226806640625, "loss": 0.0096, "step": 401, "total_loss": 0.01226806640625 }, { "epoch": 0.16, "learning_rate": 0.00019986673611581786, "lm_loss": 0.0107421875, "loss": 0.0093, "step": 402, "total_loss": 0.0107421875 }, { "epoch": 0.16, "learning_rate": 0.0001998660724350012, "lm_loss": 0.01068115234375, "loss": 0.0117, "step": 403, "total_loss": 0.01068115234375 }, { "epoch": 0.17, "learning_rate": 0.00019986540710676277, "lm_loss": 0.0159912109375, "loss": 0.0118, "step": 404, "total_loss": 0.0159912109375 }, { "epoch": 0.17, "learning_rate": 0.00019986474013111352, "lm_loss": 0.00909423828125, "loss": 0.0104, "step": 405, "total_loss": 0.00909423828125 }, { "epoch": 0.17, "learning_rate": 0.0001998640715080644, "lm_loss": 0.01361083984375, "loss": 0.0078, "step": 406, "total_loss": 0.01361083984375 }, { "epoch": 0.17, "learning_rate": 0.00019986340123762653, "lm_loss": 0.01220703125, "loss": 0.0104, "step": 407, "total_loss": 0.01220703125 }, { "epoch": 0.17, "learning_rate": 0.0001998627293198109, "lm_loss": 0.0032806396484375, "loss": 0.0098, "step": 408, "total_loss": 0.0032806396484375 }, { "epoch": 0.17, "learning_rate": 0.00019986205575462862, "lm_loss": 0.01422119140625, "loss": 0.0119, "step": 409, "total_loss": 0.01422119140625 }, { "epoch": 0.17, "learning_rate": 0.0001998613805420908, "lm_loss": 0.01300048828125, "loss": 0.0118, "step": 410, "total_loss": 0.01300048828125 }, { "epoch": 0.17, "learning_rate": 0.00019986070368220856, "lm_loss": 0.01092529296875, "loss": 0.0117, "step": 411, "total_loss": 0.01092529296875 }, { "epoch": 0.17, "learning_rate": 0.00019986002517499309, "lm_loss": 0.00921630859375, "loss": 0.0124, "step": 412, "total_loss": 0.00921630859375 }, { "epoch": 0.17, "learning_rate": 0.00019985934502045558, "lm_loss": 0.00921630859375, "loss": 0.0093, "step": 413, "total_loss": 0.00921630859375 }, { "epoch": 0.17, "learning_rate": 0.00019985866321860725, "lm_loss": 0.01019287109375, "loss": 0.01, "step": 414, "total_loss": 0.01019287109375 }, { "epoch": 0.17, "learning_rate": 0.0001998579797694593, "lm_loss": 0.0179443359375, "loss": 0.0092, "step": 415, "total_loss": 0.0179443359375 }, { "epoch": 0.17, "learning_rate": 0.0001998572946730231, "lm_loss": 0.00787353515625, "loss": 0.0105, "step": 416, "total_loss": 0.00787353515625 }, { "epoch": 0.17, "learning_rate": 0.00019985660792930986, "lm_loss": 0.009765625, "loss": 0.0115, "step": 417, "total_loss": 0.009765625 }, { "epoch": 0.17, "learning_rate": 0.00019985591953833095, "lm_loss": 0.0137939453125, "loss": 0.0115, "step": 418, "total_loss": 0.0137939453125 }, { "epoch": 0.17, "learning_rate": 0.00019985522950009775, "lm_loss": 0.005828857421875, "loss": 0.0082, "step": 419, "total_loss": 0.005828857421875 }, { "epoch": 0.17, "learning_rate": 0.00019985453781462157, "lm_loss": 0.00653076171875, "loss": 0.0093, "step": 420, "total_loss": 0.00653076171875 }, { "epoch": 0.17, "learning_rate": 0.00019985384448191392, "lm_loss": 0.006622314453125, "loss": 0.0104, "step": 421, "total_loss": 0.006622314453125 }, { "epoch": 0.17, "learning_rate": 0.00019985314950198615, "lm_loss": 0.006866455078125, "loss": 0.0105, "step": 422, "total_loss": 0.006866455078125 }, { "epoch": 0.17, "learning_rate": 0.00019985245287484977, "lm_loss": 0.0150146484375, "loss": 0.0145, "step": 423, "total_loss": 0.0150146484375 }, { "epoch": 0.17, "learning_rate": 0.00019985175460051624, "lm_loss": 0.020263671875, "loss": 0.0102, "step": 424, "total_loss": 0.020263671875 }, { "epoch": 0.17, "learning_rate": 0.0001998510546789971, "lm_loss": 0.004241943359375, "loss": 0.0129, "step": 425, "total_loss": 0.004241943359375 }, { "epoch": 0.17, "learning_rate": 0.00019985035311030391, "lm_loss": 0.00555419921875, "loss": 0.0091, "step": 426, "total_loss": 0.00555419921875 }, { "epoch": 0.17, "learning_rate": 0.0001998496498944482, "lm_loss": 0.0093994140625, "loss": 0.0111, "step": 427, "total_loss": 0.0093994140625 }, { "epoch": 0.17, "learning_rate": 0.0001998489450314416, "lm_loss": 0.00982666015625, "loss": 0.0114, "step": 428, "total_loss": 0.00982666015625 }, { "epoch": 0.18, "learning_rate": 0.00019984823852129576, "lm_loss": 0.0230712890625, "loss": 0.0115, "step": 429, "total_loss": 0.0230712890625 }, { "epoch": 0.18, "learning_rate": 0.00019984753036402233, "lm_loss": 0.0123291015625, "loss": 0.0094, "step": 430, "total_loss": 0.0123291015625 }, { "epoch": 0.18, "learning_rate": 0.00019984682055963293, "lm_loss": 0.010009765625, "loss": 0.0087, "step": 431, "total_loss": 0.010009765625 }, { "epoch": 0.18, "learning_rate": 0.00019984610910813935, "lm_loss": 0.0108642578125, "loss": 0.0085, "step": 432, "total_loss": 0.0108642578125 }, { "epoch": 0.18, "learning_rate": 0.00019984539600955326, "lm_loss": 0.0084228515625, "loss": 0.0102, "step": 433, "total_loss": 0.0084228515625 }, { "epoch": 0.18, "learning_rate": 0.00019984468126388647, "lm_loss": 0.01165771484375, "loss": 0.0105, "step": 434, "total_loss": 0.01165771484375 }, { "epoch": 0.18, "learning_rate": 0.0001998439648711507, "lm_loss": 0.011474609375, "loss": 0.0083, "step": 435, "total_loss": 0.011474609375 }, { "epoch": 0.18, "learning_rate": 0.0001998432468313579, "lm_loss": 0.0096435546875, "loss": 0.0106, "step": 436, "total_loss": 0.0096435546875 }, { "epoch": 0.18, "learning_rate": 0.00019984252714451976, "lm_loss": 0.01544189453125, "loss": 0.0094, "step": 437, "total_loss": 0.01544189453125 }, { "epoch": 0.18, "learning_rate": 0.0001998418058106483, "lm_loss": 0.005035400390625, "loss": 0.0081, "step": 438, "total_loss": 0.005035400390625 }, { "epoch": 0.18, "learning_rate": 0.00019984108282975526, "lm_loss": 0.01007080078125, "loss": 0.0127, "step": 439, "total_loss": 0.01007080078125 }, { "epoch": 0.18, "learning_rate": 0.0001998403582018527, "lm_loss": 0.01220703125, "loss": 0.0098, "step": 440, "total_loss": 0.01220703125 }, { "epoch": 0.18, "learning_rate": 0.00019983963192695255, "lm_loss": 0.00946044921875, "loss": 0.0119, "step": 441, "total_loss": 0.00946044921875 }, { "epoch": 0.18, "learning_rate": 0.00019983890400506672, "lm_loss": 0.021240234375, "loss": 0.0115, "step": 442, "total_loss": 0.021240234375 }, { "epoch": 0.18, "learning_rate": 0.00019983817443620732, "lm_loss": 0.01556396484375, "loss": 0.0115, "step": 443, "total_loss": 0.01556396484375 }, { "epoch": 0.18, "learning_rate": 0.00019983744322038627, "lm_loss": 0.01153564453125, "loss": 0.0097, "step": 444, "total_loss": 0.01153564453125 }, { "epoch": 0.18, "learning_rate": 0.00019983671035761572, "lm_loss": 0.0089111328125, "loss": 0.01, "step": 445, "total_loss": 0.0089111328125 }, { "epoch": 0.18, "learning_rate": 0.00019983597584790776, "lm_loss": 0.007476806640625, "loss": 0.0088, "step": 446, "total_loss": 0.007476806640625 }, { "epoch": 0.18, "learning_rate": 0.00019983523969127444, "lm_loss": 0.01007080078125, "loss": 0.0111, "step": 447, "total_loss": 0.01007080078125 }, { "epoch": 0.18, "learning_rate": 0.00019983450188772794, "lm_loss": 0.01080322265625, "loss": 0.0098, "step": 448, "total_loss": 0.01080322265625 }, { "epoch": 0.18, "learning_rate": 0.00019983376243728047, "lm_loss": 0.0079345703125, "loss": 0.0113, "step": 449, "total_loss": 0.0079345703125 }, { "epoch": 0.18, "learning_rate": 0.00019983302133994417, "lm_loss": 0.0091552734375, "loss": 0.0113, "step": 450, "total_loss": 0.0091552734375 }, { "epoch": 0.18, "learning_rate": 0.0001998322785957313, "lm_loss": 0.0059814453125, "loss": 0.0088, "step": 451, "total_loss": 0.0059814453125 }, { "epoch": 0.18, "learning_rate": 0.00019983153420465407, "lm_loss": 0.01123046875, "loss": 0.0102, "step": 452, "total_loss": 0.01123046875 }, { "epoch": 0.19, "learning_rate": 0.00019983078816672483, "lm_loss": 0.01226806640625, "loss": 0.0105, "step": 453, "total_loss": 0.01226806640625 }, { "epoch": 0.19, "learning_rate": 0.00019983004048195583, "lm_loss": 0.005645751953125, "loss": 0.0123, "step": 454, "total_loss": 0.005645751953125 }, { "epoch": 0.19, "learning_rate": 0.0001998292911503594, "lm_loss": 0.0194091796875, "loss": 0.0108, "step": 455, "total_loss": 0.0194091796875 }, { "epoch": 0.19, "learning_rate": 0.00019982854017194795, "lm_loss": 0.0242919921875, "loss": 0.0132, "step": 456, "total_loss": 0.0242919921875 }, { "epoch": 0.19, "learning_rate": 0.00019982778754673382, "lm_loss": 0.0216064453125, "loss": 0.0095, "step": 457, "total_loss": 0.0216064453125 }, { "epoch": 0.19, "learning_rate": 0.00019982703327472946, "lm_loss": 0.01434326171875, "loss": 0.012, "step": 458, "total_loss": 0.01434326171875 }, { "epoch": 0.19, "learning_rate": 0.0001998262773559473, "lm_loss": 0.006134033203125, "loss": 0.0097, "step": 459, "total_loss": 0.006134033203125 }, { "epoch": 0.19, "learning_rate": 0.00019982551979039981, "lm_loss": 0.0113525390625, "loss": 0.013, "step": 460, "total_loss": 0.0113525390625 }, { "epoch": 0.19, "learning_rate": 0.0001998247605780995, "lm_loss": 0.01239013671875, "loss": 0.0094, "step": 461, "total_loss": 0.01239013671875 }, { "epoch": 0.19, "learning_rate": 0.00019982399971905883, "lm_loss": 0.0076904296875, "loss": 0.0092, "step": 462, "total_loss": 0.0076904296875 }, { "epoch": 0.19, "learning_rate": 0.00019982323721329045, "lm_loss": 0.0107421875, "loss": 0.009, "step": 463, "total_loss": 0.0107421875 }, { "epoch": 0.19, "learning_rate": 0.00019982247306080686, "lm_loss": 0.005462646484375, "loss": 0.0089, "step": 464, "total_loss": 0.005462646484375 }, { "epoch": 0.19, "learning_rate": 0.0001998217072616207, "lm_loss": 0.01513671875, "loss": 0.0129, "step": 465, "total_loss": 0.01513671875 }, { "epoch": 0.19, "learning_rate": 0.00019982093981574462, "lm_loss": 0.0079345703125, "loss": 0.0116, "step": 466, "total_loss": 0.0079345703125 }, { "epoch": 0.19, "learning_rate": 0.00019982017072319125, "lm_loss": 0.009521484375, "loss": 0.012, "step": 467, "total_loss": 0.009521484375 }, { "epoch": 0.19, "learning_rate": 0.00019981939998397325, "lm_loss": 0.019287109375, "loss": 0.0111, "step": 468, "total_loss": 0.019287109375 }, { "epoch": 0.19, "learning_rate": 0.0001998186275981034, "lm_loss": 0.006317138671875, "loss": 0.0096, "step": 469, "total_loss": 0.006317138671875 }, { "epoch": 0.19, "learning_rate": 0.00019981785356559443, "lm_loss": 0.00909423828125, "loss": 0.0093, "step": 470, "total_loss": 0.00909423828125 }, { "epoch": 0.19, "learning_rate": 0.00019981707788645906, "lm_loss": 0.0078125, "loss": 0.0085, "step": 471, "total_loss": 0.0078125 }, { "epoch": 0.19, "learning_rate": 0.0001998163005607101, "lm_loss": 0.01068115234375, "loss": 0.0112, "step": 472, "total_loss": 0.01068115234375 }, { "epoch": 0.19, "learning_rate": 0.00019981552158836044, "lm_loss": 0.006500244140625, "loss": 0.011, "step": 473, "total_loss": 0.006500244140625 }, { "epoch": 0.19, "learning_rate": 0.00019981474096942284, "lm_loss": 0.006988525390625, "loss": 0.0097, "step": 474, "total_loss": 0.006988525390625 }, { "epoch": 0.19, "learning_rate": 0.00019981395870391025, "lm_loss": 0.01031494140625, "loss": 0.011, "step": 475, "total_loss": 0.01031494140625 }, { "epoch": 0.19, "learning_rate": 0.0001998131747918355, "lm_loss": 0.005401611328125, "loss": 0.0097, "step": 476, "total_loss": 0.005401611328125 }, { "epoch": 0.2, "learning_rate": 0.00019981238923321158, "lm_loss": 0.00921630859375, "loss": 0.0106, "step": 477, "total_loss": 0.00921630859375 }, { "epoch": 0.2, "learning_rate": 0.00019981160202805142, "lm_loss": 0.00848388671875, "loss": 0.0085, "step": 478, "total_loss": 0.00848388671875 }, { "epoch": 0.2, "learning_rate": 0.00019981081317636804, "lm_loss": 0.01226806640625, "loss": 0.0128, "step": 479, "total_loss": 0.01226806640625 }, { "epoch": 0.2, "learning_rate": 0.00019981002267817444, "lm_loss": 0.00872802734375, "loss": 0.0099, "step": 480, "total_loss": 0.00872802734375 }, { "epoch": 0.2, "learning_rate": 0.00019980923053348364, "lm_loss": 0.00799560546875, "loss": 0.0109, "step": 481, "total_loss": 0.00799560546875 }, { "epoch": 0.2, "learning_rate": 0.00019980843674230868, "lm_loss": 0.0096435546875, "loss": 0.0108, "step": 482, "total_loss": 0.0096435546875 }, { "epoch": 0.2, "learning_rate": 0.00019980764130466273, "lm_loss": 0.01361083984375, "loss": 0.0123, "step": 483, "total_loss": 0.01361083984375 }, { "epoch": 0.2, "learning_rate": 0.00019980684422055887, "lm_loss": 0.007598876953125, "loss": 0.0091, "step": 484, "total_loss": 0.007598876953125 }, { "epoch": 0.2, "learning_rate": 0.00019980604549001024, "lm_loss": 0.007476806640625, "loss": 0.0093, "step": 485, "total_loss": 0.007476806640625 }, { "epoch": 0.2, "learning_rate": 0.00019980524511303008, "lm_loss": 0.0162353515625, "loss": 0.0087, "step": 486, "total_loss": 0.0162353515625 }, { "epoch": 0.2, "learning_rate": 0.0001998044430896315, "lm_loss": 0.007354736328125, "loss": 0.0107, "step": 487, "total_loss": 0.007354736328125 }, { "epoch": 0.2, "learning_rate": 0.00019980363941982777, "lm_loss": 0.0064697265625, "loss": 0.0092, "step": 488, "total_loss": 0.0064697265625 }, { "epoch": 0.2, "learning_rate": 0.00019980283410363215, "lm_loss": 0.01055908203125, "loss": 0.0102, "step": 489, "total_loss": 0.01055908203125 }, { "epoch": 0.2, "learning_rate": 0.00019980202714105794, "lm_loss": 0.0084228515625, "loss": 0.0081, "step": 490, "total_loss": 0.0084228515625 }, { "epoch": 0.2, "learning_rate": 0.00019980121853211842, "lm_loss": 0.006591796875, "loss": 0.0111, "step": 491, "total_loss": 0.006591796875 }, { "epoch": 0.2, "learning_rate": 0.00019980040827682696, "lm_loss": 0.00604248046875, "loss": 0.0119, "step": 492, "total_loss": 0.00604248046875 }, { "epoch": 0.2, "learning_rate": 0.00019979959637519688, "lm_loss": 0.00994873046875, "loss": 0.0089, "step": 493, "total_loss": 0.00994873046875 }, { "epoch": 0.2, "learning_rate": 0.00019979878282724167, "lm_loss": 0.012939453125, "loss": 0.0098, "step": 494, "total_loss": 0.012939453125 }, { "epoch": 0.2, "learning_rate": 0.00019979796763297465, "lm_loss": 0.0125732421875, "loss": 0.01, "step": 495, "total_loss": 0.0125732421875 }, { "epoch": 0.2, "learning_rate": 0.00019979715079240933, "lm_loss": 0.0079345703125, "loss": 0.0081, "step": 496, "total_loss": 0.0079345703125 }, { "epoch": 0.2, "learning_rate": 0.00019979633230555912, "lm_loss": 0.01092529296875, "loss": 0.009, "step": 497, "total_loss": 0.01092529296875 }, { "epoch": 0.2, "learning_rate": 0.00019979551217243758, "lm_loss": 0.0172119140625, "loss": 0.0103, "step": 498, "total_loss": 0.0172119140625 }, { "epoch": 0.2, "learning_rate": 0.00019979469039305823, "lm_loss": 0.01275634765625, "loss": 0.0092, "step": 499, "total_loss": 0.01275634765625 }, { "epoch": 0.2, "learning_rate": 0.00019979386696743459, "lm_loss": 0.0120849609375, "loss": 0.0086, "step": 500, "total_loss": 0.0120849609375 }, { "epoch": 0.2, "eval_lm_loss": 0.011539776809513569, "eval_loss": 0.012045402079820633, "eval_runtime": 44.0051, "eval_samples_per_second": 22.725, "eval_steps_per_second": 0.205, "eval_total_loss": 0.011539776809513569, "lm_loss": 0.00072479248046875, "step": 500, "total_loss": 0.00072479248046875 }, { "epoch": 0.2, "learning_rate": 0.0001997930418955803, "lm_loss": 0.00897216796875, "loss": 0.0106, "step": 501, "total_loss": 0.00897216796875 }, { "epoch": 0.21, "learning_rate": 0.00019979221517750894, "lm_loss": 0.01422119140625, "loss": 0.0122, "step": 502, "total_loss": 0.01422119140625 }, { "epoch": 0.21, "learning_rate": 0.00019979138681323414, "lm_loss": 0.00537109375, "loss": 0.0088, "step": 503, "total_loss": 0.00537109375 }, { "epoch": 0.21, "learning_rate": 0.00019979055680276954, "lm_loss": 0.01287841796875, "loss": 0.0108, "step": 504, "total_loss": 0.01287841796875 }, { "epoch": 0.21, "learning_rate": 0.00019978972514612893, "lm_loss": 0.01531982421875, "loss": 0.0132, "step": 505, "total_loss": 0.01531982421875 }, { "epoch": 0.21, "learning_rate": 0.00019978889184332592, "lm_loss": 0.00750732421875, "loss": 0.0116, "step": 506, "total_loss": 0.00750732421875 }, { "epoch": 0.21, "learning_rate": 0.0001997880568943743, "lm_loss": 0.006744384765625, "loss": 0.0091, "step": 507, "total_loss": 0.006744384765625 }, { "epoch": 0.21, "learning_rate": 0.00019978722029928786, "lm_loss": 0.007293701171875, "loss": 0.0102, "step": 508, "total_loss": 0.007293701171875 }, { "epoch": 0.21, "learning_rate": 0.0001997863820580804, "lm_loss": 0.00872802734375, "loss": 0.0097, "step": 509, "total_loss": 0.00872802734375 }, { "epoch": 0.21, "learning_rate": 0.00019978554217076573, "lm_loss": 0.007110595703125, "loss": 0.0088, "step": 510, "total_loss": 0.007110595703125 }, { "epoch": 0.21, "learning_rate": 0.00019978470063735767, "lm_loss": 0.00799560546875, "loss": 0.0099, "step": 511, "total_loss": 0.00799560546875 }, { "epoch": 0.21, "learning_rate": 0.00019978385745787018, "lm_loss": 0.01708984375, "loss": 0.0115, "step": 512, "total_loss": 0.01708984375 }, { "epoch": 0.21, "learning_rate": 0.0001997830126323171, "lm_loss": 0.007568359375, "loss": 0.0084, "step": 513, "total_loss": 0.007568359375 }, { "epoch": 0.21, "learning_rate": 0.00019978216616071243, "lm_loss": 0.00860595703125, "loss": 0.0108, "step": 514, "total_loss": 0.00860595703125 }, { "epoch": 0.21, "learning_rate": 0.0001997813180430701, "lm_loss": 0.009765625, "loss": 0.0082, "step": 515, "total_loss": 0.009765625 }, { "epoch": 0.21, "learning_rate": 0.0001997804682794041, "lm_loss": 0.0242919921875, "loss": 0.0119, "step": 516, "total_loss": 0.0242919921875 }, { "epoch": 0.21, "learning_rate": 0.00019977961686972842, "lm_loss": 0.01025390625, "loss": 0.0091, "step": 517, "total_loss": 0.01025390625 }, { "epoch": 0.21, "learning_rate": 0.00019977876381405713, "lm_loss": 0.011962890625, "loss": 0.0083, "step": 518, "total_loss": 0.011962890625 }, { "epoch": 0.21, "learning_rate": 0.00019977790911240436, "lm_loss": 0.01129150390625, "loss": 0.0109, "step": 519, "total_loss": 0.01129150390625 }, { "epoch": 0.21, "learning_rate": 0.0001997770527647841, "lm_loss": 0.0054931640625, "loss": 0.0091, "step": 520, "total_loss": 0.0054931640625 }, { "epoch": 0.21, "learning_rate": 0.00019977619477121056, "lm_loss": 0.0111083984375, "loss": 0.0101, "step": 521, "total_loss": 0.0111083984375 }, { "epoch": 0.21, "learning_rate": 0.00019977533513169786, "lm_loss": 0.005950927734375, "loss": 0.0089, "step": 522, "total_loss": 0.005950927734375 }, { "epoch": 0.21, "learning_rate": 0.0001997744738462602, "lm_loss": 0.006744384765625, "loss": 0.0088, "step": 523, "total_loss": 0.006744384765625 }, { "epoch": 0.21, "learning_rate": 0.00019977361091491177, "lm_loss": 0.01446533203125, "loss": 0.0112, "step": 524, "total_loss": 0.01446533203125 }, { "epoch": 0.21, "learning_rate": 0.0001997727463376668, "lm_loss": 0.012451171875, "loss": 0.0101, "step": 525, "total_loss": 0.012451171875 }, { "epoch": 0.22, "learning_rate": 0.00019977188011453958, "lm_loss": 0.0036773681640625, "loss": 0.0086, "step": 526, "total_loss": 0.0036773681640625 }, { "epoch": 0.22, "learning_rate": 0.00019977101224554438, "lm_loss": 0.01519775390625, "loss": 0.0119, "step": 527, "total_loss": 0.01519775390625 }, { "epoch": 0.22, "learning_rate": 0.0001997701427306955, "lm_loss": 0.0103759765625, "loss": 0.0096, "step": 528, "total_loss": 0.0103759765625 }, { "epoch": 0.22, "learning_rate": 0.00019976927157000734, "lm_loss": 0.006988525390625, "loss": 0.0084, "step": 529, "total_loss": 0.006988525390625 }, { "epoch": 0.22, "learning_rate": 0.0001997683987634942, "lm_loss": 0.01239013671875, "loss": 0.0101, "step": 530, "total_loss": 0.01239013671875 }, { "epoch": 0.22, "learning_rate": 0.00019976752431117052, "lm_loss": 0.003204345703125, "loss": 0.0127, "step": 531, "total_loss": 0.003204345703125 }, { "epoch": 0.22, "learning_rate": 0.00019976664821305068, "lm_loss": 0.01513671875, "loss": 0.0096, "step": 532, "total_loss": 0.01513671875 }, { "epoch": 0.22, "learning_rate": 0.00019976577046914922, "lm_loss": 0.012939453125, "loss": 0.0099, "step": 533, "total_loss": 0.012939453125 }, { "epoch": 0.22, "learning_rate": 0.00019976489107948058, "lm_loss": 0.0040283203125, "loss": 0.0107, "step": 534, "total_loss": 0.0040283203125 }, { "epoch": 0.22, "learning_rate": 0.0001997640100440592, "lm_loss": 0.0123291015625, "loss": 0.011, "step": 535, "total_loss": 0.0123291015625 }, { "epoch": 0.22, "learning_rate": 0.0001997631273628997, "lm_loss": 0.01239013671875, "loss": 0.0088, "step": 536, "total_loss": 0.01239013671875 }, { "epoch": 0.22, "learning_rate": 0.00019976224303601663, "lm_loss": 0.00799560546875, "loss": 0.009, "step": 537, "total_loss": 0.00799560546875 }, { "epoch": 0.22, "learning_rate": 0.00019976135706342455, "lm_loss": 0.01953125, "loss": 0.0126, "step": 538, "total_loss": 0.01953125 }, { "epoch": 0.22, "learning_rate": 0.00019976046944513803, "lm_loss": 0.01068115234375, "loss": 0.01, "step": 539, "total_loss": 0.01068115234375 }, { "epoch": 0.22, "learning_rate": 0.00019975958018117183, "lm_loss": 0.0087890625, "loss": 0.011, "step": 540, "total_loss": 0.0087890625 }, { "epoch": 0.22, "learning_rate": 0.00019975868927154053, "lm_loss": 0.00830078125, "loss": 0.0116, "step": 541, "total_loss": 0.00830078125 }, { "epoch": 0.22, "learning_rate": 0.00019975779671625886, "lm_loss": 0.00927734375, "loss": 0.0119, "step": 542, "total_loss": 0.00927734375 }, { "epoch": 0.22, "learning_rate": 0.00019975690251534153, "lm_loss": 0.01055908203125, "loss": 0.0092, "step": 543, "total_loss": 0.01055908203125 }, { "epoch": 0.22, "learning_rate": 0.00019975600666880333, "lm_loss": 0.010009765625, "loss": 0.0091, "step": 544, "total_loss": 0.010009765625 }, { "epoch": 0.22, "learning_rate": 0.00019975510917665895, "lm_loss": 0.006195068359375, "loss": 0.0125, "step": 545, "total_loss": 0.006195068359375 }, { "epoch": 0.22, "learning_rate": 0.0001997542100389233, "lm_loss": 0.01153564453125, "loss": 0.0099, "step": 546, "total_loss": 0.01153564453125 }, { "epoch": 0.22, "learning_rate": 0.00019975330925561113, "lm_loss": 0.0111083984375, "loss": 0.0118, "step": 547, "total_loss": 0.0111083984375 }, { "epoch": 0.22, "learning_rate": 0.00019975240682673735, "lm_loss": 0.0025787353515625, "loss": 0.009, "step": 548, "total_loss": 0.0025787353515625 }, { "epoch": 0.22, "learning_rate": 0.00019975150275231682, "lm_loss": 0.006683349609375, "loss": 0.0084, "step": 549, "total_loss": 0.006683349609375 }, { "epoch": 0.22, "learning_rate": 0.00019975059703236447, "lm_loss": 0.0101318359375, "loss": 0.0088, "step": 550, "total_loss": 0.0101318359375 }, { "epoch": 0.23, "learning_rate": 0.00019974968966689525, "lm_loss": 0.00543212890625, "loss": 0.0108, "step": 551, "total_loss": 0.00543212890625 }, { "epoch": 0.23, "learning_rate": 0.00019974878065592407, "lm_loss": 0.01104736328125, "loss": 0.008, "step": 552, "total_loss": 0.01104736328125 }, { "epoch": 0.23, "learning_rate": 0.000199747869999466, "lm_loss": 0.01092529296875, "loss": 0.01, "step": 553, "total_loss": 0.01092529296875 }, { "epoch": 0.23, "learning_rate": 0.00019974695769753602, "lm_loss": 0.01611328125, "loss": 0.0119, "step": 554, "total_loss": 0.01611328125 }, { "epoch": 0.23, "learning_rate": 0.0001997460437501492, "lm_loss": 0.005523681640625, "loss": 0.0124, "step": 555, "total_loss": 0.005523681640625 }, { "epoch": 0.23, "learning_rate": 0.00019974512815732062, "lm_loss": 0.0037841796875, "loss": 0.0089, "step": 556, "total_loss": 0.0037841796875 }, { "epoch": 0.23, "learning_rate": 0.00019974421091906535, "lm_loss": 0.00836181640625, "loss": 0.01, "step": 557, "total_loss": 0.00836181640625 }, { "epoch": 0.23, "learning_rate": 0.00019974329203539855, "lm_loss": 0.012939453125, "loss": 0.0114, "step": 558, "total_loss": 0.012939453125 }, { "epoch": 0.23, "learning_rate": 0.00019974237150633534, "lm_loss": 0.01202392578125, "loss": 0.014, "step": 559, "total_loss": 0.01202392578125 }, { "epoch": 0.23, "learning_rate": 0.00019974144933189097, "lm_loss": 0.007354736328125, "loss": 0.0102, "step": 560, "total_loss": 0.007354736328125 }, { "epoch": 0.23, "learning_rate": 0.0001997405255120806, "lm_loss": 0.00750732421875, "loss": 0.0115, "step": 561, "total_loss": 0.00750732421875 }, { "epoch": 0.23, "learning_rate": 0.0001997396000469195, "lm_loss": 0.002471923828125, "loss": 0.0098, "step": 562, "total_loss": 0.002471923828125 }, { "epoch": 0.23, "learning_rate": 0.00019973867293642293, "lm_loss": 0.0064697265625, "loss": 0.0084, "step": 563, "total_loss": 0.0064697265625 }, { "epoch": 0.23, "learning_rate": 0.00019973774418060618, "lm_loss": 0.00787353515625, "loss": 0.0111, "step": 564, "total_loss": 0.00787353515625 }, { "epoch": 0.23, "learning_rate": 0.00019973681377948455, "lm_loss": 0.0107421875, "loss": 0.0095, "step": 565, "total_loss": 0.0107421875 }, { "epoch": 0.23, "learning_rate": 0.0001997358817330734, "lm_loss": 0.01141357421875, "loss": 0.0108, "step": 566, "total_loss": 0.01141357421875 }, { "epoch": 0.23, "learning_rate": 0.00019973494804138815, "lm_loss": 0.0089111328125, "loss": 0.0097, "step": 567, "total_loss": 0.0089111328125 }, { "epoch": 0.23, "learning_rate": 0.00019973401270444413, "lm_loss": 0.00787353515625, "loss": 0.0086, "step": 568, "total_loss": 0.00787353515625 }, { "epoch": 0.23, "learning_rate": 0.00019973307572225681, "lm_loss": 0.005645751953125, "loss": 0.0093, "step": 569, "total_loss": 0.005645751953125 }, { "epoch": 0.23, "learning_rate": 0.00019973213709484168, "lm_loss": 0.0125732421875, "loss": 0.0121, "step": 570, "total_loss": 0.0125732421875 }, { "epoch": 0.23, "learning_rate": 0.00019973119682221416, "lm_loss": 0.01031494140625, "loss": 0.0081, "step": 571, "total_loss": 0.01031494140625 }, { "epoch": 0.23, "learning_rate": 0.00019973025490438976, "lm_loss": 0.01019287109375, "loss": 0.0091, "step": 572, "total_loss": 0.01019287109375 }, { "epoch": 0.23, "learning_rate": 0.00019972931134138407, "lm_loss": 0.00927734375, "loss": 0.0104, "step": 573, "total_loss": 0.00927734375 }, { "epoch": 0.23, "learning_rate": 0.00019972836613321265, "lm_loss": 0.00775146484375, "loss": 0.0097, "step": 574, "total_loss": 0.00775146484375 }, { "epoch": 0.24, "learning_rate": 0.00019972741927989102, "lm_loss": 0.0142822265625, "loss": 0.011, "step": 575, "total_loss": 0.0142822265625 }, { "epoch": 0.24, "learning_rate": 0.0001997264707814349, "lm_loss": 0.0047607421875, "loss": 0.0096, "step": 576, "total_loss": 0.0047607421875 }, { "epoch": 0.24, "learning_rate": 0.00019972552063785988, "lm_loss": 0.004791259765625, "loss": 0.0107, "step": 577, "total_loss": 0.004791259765625 }, { "epoch": 0.24, "learning_rate": 0.00019972456884918163, "lm_loss": 0.0118408203125, "loss": 0.01, "step": 578, "total_loss": 0.0118408203125 }, { "epoch": 0.24, "learning_rate": 0.00019972361541541588, "lm_loss": 0.01611328125, "loss": 0.009, "step": 579, "total_loss": 0.01611328125 }, { "epoch": 0.24, "learning_rate": 0.00019972266033657833, "lm_loss": 0.006744384765625, "loss": 0.0084, "step": 580, "total_loss": 0.006744384765625 }, { "epoch": 0.24, "learning_rate": 0.00019972170361268475, "lm_loss": 0.01385498046875, "loss": 0.0126, "step": 581, "total_loss": 0.01385498046875 }, { "epoch": 0.24, "learning_rate": 0.00019972074524375087, "lm_loss": 0.005645751953125, "loss": 0.0105, "step": 582, "total_loss": 0.005645751953125 }, { "epoch": 0.24, "learning_rate": 0.00019971978522979262, "lm_loss": 0.01104736328125, "loss": 0.0096, "step": 583, "total_loss": 0.01104736328125 }, { "epoch": 0.24, "learning_rate": 0.00019971882357082575, "lm_loss": 0.011474609375, "loss": 0.0103, "step": 584, "total_loss": 0.011474609375 }, { "epoch": 0.24, "learning_rate": 0.00019971786026686612, "lm_loss": 0.00616455078125, "loss": 0.0085, "step": 585, "total_loss": 0.00616455078125 }, { "epoch": 0.24, "learning_rate": 0.00019971689531792967, "lm_loss": 0.00762939453125, "loss": 0.0108, "step": 586, "total_loss": 0.00762939453125 }, { "epoch": 0.24, "learning_rate": 0.00019971592872403227, "lm_loss": 0.01263427734375, "loss": 0.0096, "step": 587, "total_loss": 0.01263427734375 }, { "epoch": 0.24, "learning_rate": 0.00019971496048518987, "lm_loss": 0.0084228515625, "loss": 0.0105, "step": 588, "total_loss": 0.0084228515625 }, { "epoch": 0.24, "learning_rate": 0.00019971399060141848, "lm_loss": 0.0081787109375, "loss": 0.0112, "step": 589, "total_loss": 0.0081787109375 }, { "epoch": 0.24, "learning_rate": 0.00019971301907273405, "lm_loss": 0.005462646484375, "loss": 0.0125, "step": 590, "total_loss": 0.005462646484375 }, { "epoch": 0.24, "learning_rate": 0.00019971204589915267, "lm_loss": 0.015869140625, "loss": 0.0112, "step": 591, "total_loss": 0.015869140625 }, { "epoch": 0.24, "learning_rate": 0.00019971107108069034, "lm_loss": 0.012939453125, "loss": 0.0094, "step": 592, "total_loss": 0.012939453125 }, { "epoch": 0.24, "learning_rate": 0.00019971009461736315, "lm_loss": 0.01153564453125, "loss": 0.0097, "step": 593, "total_loss": 0.01153564453125 }, { "epoch": 0.24, "learning_rate": 0.0001997091165091872, "lm_loss": 0.0098876953125, "loss": 0.0103, "step": 594, "total_loss": 0.0098876953125 }, { "epoch": 0.24, "learning_rate": 0.00019970813675617867, "lm_loss": 0.0078125, "loss": 0.0092, "step": 595, "total_loss": 0.0078125 }, { "epoch": 0.24, "learning_rate": 0.00019970715535835368, "lm_loss": 0.0074462890625, "loss": 0.0084, "step": 596, "total_loss": 0.0074462890625 }, { "epoch": 0.24, "learning_rate": 0.00019970617231572843, "lm_loss": 0.00933837890625, "loss": 0.0087, "step": 597, "total_loss": 0.00933837890625 }, { "epoch": 0.24, "learning_rate": 0.00019970518762831913, "lm_loss": 0.004425048828125, "loss": 0.0101, "step": 598, "total_loss": 0.004425048828125 }, { "epoch": 0.24, "learning_rate": 0.00019970420129614204, "lm_loss": 0.00811767578125, "loss": 0.0083, "step": 599, "total_loss": 0.00811767578125 }, { "epoch": 0.25, "learning_rate": 0.00019970321331921342, "lm_loss": 0.0087890625, "loss": 0.0099, "step": 600, "total_loss": 0.0087890625 }, { "epoch": 0.25, "eval_lm_loss": 0.0111524797976017, "eval_loss": 0.011608276516199112, "eval_runtime": 44.0118, "eval_samples_per_second": 22.721, "eval_steps_per_second": 0.204, "eval_total_loss": 0.0111524797976017, "lm_loss": 0.00151824951171875, "step": 600, "total_loss": 0.00151824951171875 }, { "epoch": 0.25, "learning_rate": 0.0001997022236975496, "lm_loss": 0.00909423828125, "loss": 0.0102, "step": 601, "total_loss": 0.00909423828125 }, { "epoch": 0.25, "learning_rate": 0.00019970123243116686, "lm_loss": 0.0037078857421875, "loss": 0.0086, "step": 602, "total_loss": 0.0037078857421875 }, { "epoch": 0.25, "learning_rate": 0.00019970023952008153, "lm_loss": 0.01708984375, "loss": 0.0113, "step": 603, "total_loss": 0.01708984375 }, { "epoch": 0.25, "learning_rate": 0.00019969924496431008, "lm_loss": 0.00628662109375, "loss": 0.0098, "step": 604, "total_loss": 0.00628662109375 }, { "epoch": 0.25, "learning_rate": 0.00019969824876386885, "lm_loss": 0.006561279296875, "loss": 0.0084, "step": 605, "total_loss": 0.006561279296875 }, { "epoch": 0.25, "learning_rate": 0.00019969725091877427, "lm_loss": 0.0186767578125, "loss": 0.0117, "step": 606, "total_loss": 0.0186767578125 }, { "epoch": 0.25, "learning_rate": 0.00019969625142904284, "lm_loss": 0.0155029296875, "loss": 0.0103, "step": 607, "total_loss": 0.0155029296875 }, { "epoch": 0.25, "learning_rate": 0.00019969525029469104, "lm_loss": 0.009765625, "loss": 0.0117, "step": 608, "total_loss": 0.009765625 }, { "epoch": 0.25, "learning_rate": 0.00019969424751573533, "lm_loss": 0.0074462890625, "loss": 0.0084, "step": 609, "total_loss": 0.0074462890625 }, { "epoch": 0.25, "learning_rate": 0.00019969324309219232, "lm_loss": 0.0128173828125, "loss": 0.0099, "step": 610, "total_loss": 0.0128173828125 }, { "epoch": 0.25, "learning_rate": 0.00019969223702407854, "lm_loss": 0.019775390625, "loss": 0.0111, "step": 611, "total_loss": 0.019775390625 }, { "epoch": 0.25, "learning_rate": 0.0001996912293114106, "lm_loss": 0.0142822265625, "loss": 0.011, "step": 612, "total_loss": 0.0142822265625 }, { "epoch": 0.25, "learning_rate": 0.00019969021995420514, "lm_loss": 0.003387451171875, "loss": 0.0093, "step": 613, "total_loss": 0.003387451171875 }, { "epoch": 0.25, "learning_rate": 0.0001996892089524788, "lm_loss": 0.014404296875, "loss": 0.0081, "step": 614, "total_loss": 0.014404296875 }, { "epoch": 0.25, "learning_rate": 0.00019968819630624822, "lm_loss": 0.00390625, "loss": 0.009, "step": 615, "total_loss": 0.00390625 }, { "epoch": 0.25, "learning_rate": 0.00019968718201553016, "lm_loss": 0.01043701171875, "loss": 0.0105, "step": 616, "total_loss": 0.01043701171875 }, { "epoch": 0.25, "learning_rate": 0.0001996861660803413, "lm_loss": 0.01263427734375, "loss": 0.0116, "step": 617, "total_loss": 0.01263427734375 }, { "epoch": 0.25, "learning_rate": 0.00019968514850069846, "lm_loss": 0.0125732421875, "loss": 0.0109, "step": 618, "total_loss": 0.0125732421875 }, { "epoch": 0.25, "learning_rate": 0.00019968412927661838, "lm_loss": 0.00701904296875, "loss": 0.0087, "step": 619, "total_loss": 0.00701904296875 }, { "epoch": 0.25, "learning_rate": 0.00019968310840811788, "lm_loss": 0.005859375, "loss": 0.0095, "step": 620, "total_loss": 0.005859375 }, { "epoch": 0.25, "learning_rate": 0.0001996820858952138, "lm_loss": 0.01239013671875, "loss": 0.0102, "step": 621, "total_loss": 0.01239013671875 }, { "epoch": 0.25, "learning_rate": 0.00019968106173792304, "lm_loss": 0.00927734375, "loss": 0.0109, "step": 622, "total_loss": 0.00927734375 }, { "epoch": 0.25, "learning_rate": 0.0001996800359362625, "lm_loss": 0.01116943359375, "loss": 0.0098, "step": 623, "total_loss": 0.01116943359375 }, { "epoch": 0.26, "learning_rate": 0.00019967900849024903, "lm_loss": 0.01031494140625, "loss": 0.011, "step": 624, "total_loss": 0.01031494140625 }, { "epoch": 0.26, "learning_rate": 0.0001996779793998996, "lm_loss": 0.019287109375, "loss": 0.0112, "step": 625, "total_loss": 0.019287109375 }, { "epoch": 0.26, "learning_rate": 0.00019967694866523125, "lm_loss": 0.01434326171875, "loss": 0.0112, "step": 626, "total_loss": 0.01434326171875 }, { "epoch": 0.26, "learning_rate": 0.0001996759162862609, "lm_loss": 0.014404296875, "loss": 0.0096, "step": 627, "total_loss": 0.014404296875 }, { "epoch": 0.26, "learning_rate": 0.00019967488226300568, "lm_loss": 0.0238037109375, "loss": 0.0119, "step": 628, "total_loss": 0.0238037109375 }, { "epoch": 0.26, "learning_rate": 0.00019967384659548256, "lm_loss": 0.0164794921875, "loss": 0.0089, "step": 629, "total_loss": 0.0164794921875 }, { "epoch": 0.26, "learning_rate": 0.00019967280928370867, "lm_loss": 0.0078125, "loss": 0.0104, "step": 630, "total_loss": 0.0078125 }, { "epoch": 0.26, "learning_rate": 0.00019967177032770107, "lm_loss": 0.01385498046875, "loss": 0.0092, "step": 631, "total_loss": 0.01385498046875 }, { "epoch": 0.26, "learning_rate": 0.00019967072972747695, "lm_loss": 0.00732421875, "loss": 0.0106, "step": 632, "total_loss": 0.00732421875 }, { "epoch": 0.26, "learning_rate": 0.00019966968748305345, "lm_loss": 0.0106201171875, "loss": 0.0114, "step": 633, "total_loss": 0.0106201171875 }, { "epoch": 0.26, "learning_rate": 0.0001996686435944478, "lm_loss": 0.0032501220703125, "loss": 0.011, "step": 634, "total_loss": 0.0032501220703125 }, { "epoch": 0.26, "learning_rate": 0.00019966759806167717, "lm_loss": 0.005615234375, "loss": 0.0087, "step": 635, "total_loss": 0.005615234375 }, { "epoch": 0.26, "learning_rate": 0.00019966655088475883, "lm_loss": 0.01708984375, "loss": 0.0082, "step": 636, "total_loss": 0.01708984375 }, { "epoch": 0.26, "learning_rate": 0.0001996655020637101, "lm_loss": 0.01275634765625, "loss": 0.0097, "step": 637, "total_loss": 0.01275634765625 }, { "epoch": 0.26, "learning_rate": 0.0001996644515985482, "lm_loss": 0.012451171875, "loss": 0.011, "step": 638, "total_loss": 0.012451171875 }, { "epoch": 0.26, "learning_rate": 0.0001996633994892905, "lm_loss": 0.01275634765625, "loss": 0.0098, "step": 639, "total_loss": 0.01275634765625 }, { "epoch": 0.26, "learning_rate": 0.00019966234573595435, "lm_loss": 0.003204345703125, "loss": 0.0095, "step": 640, "total_loss": 0.003204345703125 }, { "epoch": 0.26, "learning_rate": 0.00019966129033855714, "lm_loss": 0.016357421875, "loss": 0.0142, "step": 641, "total_loss": 0.016357421875 }, { "epoch": 0.26, "learning_rate": 0.00019966023329711627, "lm_loss": 0.01312255859375, "loss": 0.0104, "step": 642, "total_loss": 0.01312255859375 }, { "epoch": 0.26, "learning_rate": 0.00019965917461164918, "lm_loss": 0.0118408203125, "loss": 0.0089, "step": 643, "total_loss": 0.0118408203125 }, { "epoch": 0.26, "learning_rate": 0.00019965811428217337, "lm_loss": 0.005401611328125, "loss": 0.0092, "step": 644, "total_loss": 0.005401611328125 }, { "epoch": 0.26, "learning_rate": 0.00019965705230870625, "lm_loss": 0.005340576171875, "loss": 0.0093, "step": 645, "total_loss": 0.005340576171875 }, { "epoch": 0.26, "learning_rate": 0.00019965598869126544, "lm_loss": 0.00872802734375, "loss": 0.011, "step": 646, "total_loss": 0.00872802734375 }, { "epoch": 0.26, "learning_rate": 0.0001996549234298684, "lm_loss": 0.017333984375, "loss": 0.0093, "step": 647, "total_loss": 0.017333984375 }, { "epoch": 0.26, "learning_rate": 0.00019965385652453272, "lm_loss": 0.007232666015625, "loss": 0.0099, "step": 648, "total_loss": 0.007232666015625 }, { "epoch": 0.27, "learning_rate": 0.00019965278797527604, "lm_loss": 0.01141357421875, "loss": 0.0093, "step": 649, "total_loss": 0.01141357421875 }, { "epoch": 0.27, "learning_rate": 0.000199651717782116, "lm_loss": 0.00946044921875, "loss": 0.0114, "step": 650, "total_loss": 0.00946044921875 }, { "epoch": 0.27, "learning_rate": 0.00019965064594507014, "lm_loss": 0.0159912109375, "loss": 0.0107, "step": 651, "total_loss": 0.0159912109375 }, { "epoch": 0.27, "learning_rate": 0.00019964957246415628, "lm_loss": 0.01422119140625, "loss": 0.0119, "step": 652, "total_loss": 0.01422119140625 }, { "epoch": 0.27, "learning_rate": 0.00019964849733939205, "lm_loss": 0.01544189453125, "loss": 0.0115, "step": 653, "total_loss": 0.01544189453125 }, { "epoch": 0.27, "learning_rate": 0.00019964742057079518, "lm_loss": 0.00506591796875, "loss": 0.0093, "step": 654, "total_loss": 0.00506591796875 }, { "epoch": 0.27, "learning_rate": 0.0001996463421583835, "lm_loss": 0.01171875, "loss": 0.0104, "step": 655, "total_loss": 0.01171875 }, { "epoch": 0.27, "learning_rate": 0.0001996452621021747, "lm_loss": 0.0020294189453125, "loss": 0.008, "step": 656, "total_loss": 0.0020294189453125 }, { "epoch": 0.27, "learning_rate": 0.0001996441804021867, "lm_loss": 0.0031585693359375, "loss": 0.0094, "step": 657, "total_loss": 0.0031585693359375 }, { "epoch": 0.27, "learning_rate": 0.00019964309705843727, "lm_loss": 0.0098876953125, "loss": 0.0094, "step": 658, "total_loss": 0.0098876953125 }, { "epoch": 0.27, "learning_rate": 0.0001996420120709443, "lm_loss": 0.00909423828125, "loss": 0.0093, "step": 659, "total_loss": 0.00909423828125 }, { "epoch": 0.27, "learning_rate": 0.00019964092543972575, "lm_loss": 0.007781982421875, "loss": 0.009, "step": 660, "total_loss": 0.007781982421875 }, { "epoch": 0.27, "learning_rate": 0.00019963983716479944, "lm_loss": 0.009521484375, "loss": 0.0088, "step": 661, "total_loss": 0.009521484375 }, { "epoch": 0.27, "learning_rate": 0.00019963874724618336, "lm_loss": 0.006927490234375, "loss": 0.0091, "step": 662, "total_loss": 0.006927490234375 }, { "epoch": 0.27, "learning_rate": 0.00019963765568389557, "lm_loss": 0.01123046875, "loss": 0.01, "step": 663, "total_loss": 0.01123046875 }, { "epoch": 0.27, "learning_rate": 0.00019963656247795397, "lm_loss": 0.01055908203125, "loss": 0.0124, "step": 664, "total_loss": 0.01055908203125 }, { "epoch": 0.27, "learning_rate": 0.00019963546762837662, "lm_loss": 0.01446533203125, "loss": 0.012, "step": 665, "total_loss": 0.01446533203125 }, { "epoch": 0.27, "learning_rate": 0.0001996343711351816, "lm_loss": 0.0036163330078125, "loss": 0.0107, "step": 666, "total_loss": 0.0036163330078125 }, { "epoch": 0.27, "learning_rate": 0.000199633272998387, "lm_loss": 0.007232666015625, "loss": 0.0102, "step": 667, "total_loss": 0.007232666015625 }, { "epoch": 0.27, "learning_rate": 0.00019963217321801094, "lm_loss": 0.0185546875, "loss": 0.0104, "step": 668, "total_loss": 0.0185546875 }, { "epoch": 0.27, "learning_rate": 0.00019963107179407156, "lm_loss": 0.006683349609375, "loss": 0.0101, "step": 669, "total_loss": 0.006683349609375 }, { "epoch": 0.27, "learning_rate": 0.000199629968726587, "lm_loss": 0.010986328125, "loss": 0.0098, "step": 670, "total_loss": 0.010986328125 }, { "epoch": 0.27, "learning_rate": 0.00019962886401557548, "lm_loss": 0.01104736328125, "loss": 0.0091, "step": 671, "total_loss": 0.01104736328125 }, { "epoch": 0.27, "learning_rate": 0.00019962775766105522, "lm_loss": 0.00628662109375, "loss": 0.01, "step": 672, "total_loss": 0.00628662109375 }, { "epoch": 0.28, "learning_rate": 0.00019962664966304447, "lm_loss": 0.01007080078125, "loss": 0.0118, "step": 673, "total_loss": 0.01007080078125 }, { "epoch": 0.28, "learning_rate": 0.0001996255400215615, "lm_loss": 0.01025390625, "loss": 0.0119, "step": 674, "total_loss": 0.01025390625 }, { "epoch": 0.28, "learning_rate": 0.00019962442873662465, "lm_loss": 0.0074462890625, "loss": 0.0102, "step": 675, "total_loss": 0.0074462890625 }, { "epoch": 0.28, "learning_rate": 0.00019962331580825223, "lm_loss": 0.00653076171875, "loss": 0.0092, "step": 676, "total_loss": 0.00653076171875 }, { "epoch": 0.28, "learning_rate": 0.00019962220123646255, "lm_loss": 0.01171875, "loss": 0.0114, "step": 677, "total_loss": 0.01171875 }, { "epoch": 0.28, "learning_rate": 0.00019962108502127407, "lm_loss": 0.0096435546875, "loss": 0.0088, "step": 678, "total_loss": 0.0096435546875 }, { "epoch": 0.28, "learning_rate": 0.0001996199671627052, "lm_loss": 0.01239013671875, "loss": 0.0107, "step": 679, "total_loss": 0.01239013671875 }, { "epoch": 0.28, "learning_rate": 0.00019961884766077432, "lm_loss": 0.004058837890625, "loss": 0.0102, "step": 680, "total_loss": 0.004058837890625 }, { "epoch": 0.28, "learning_rate": 0.00019961772651549994, "lm_loss": 0.006317138671875, "loss": 0.0099, "step": 681, "total_loss": 0.006317138671875 }, { "epoch": 0.28, "learning_rate": 0.00019961660372690054, "lm_loss": 0.00946044921875, "loss": 0.0107, "step": 682, "total_loss": 0.00946044921875 }, { "epoch": 0.28, "learning_rate": 0.00019961547929499466, "lm_loss": 0.005706787109375, "loss": 0.0094, "step": 683, "total_loss": 0.005706787109375 }, { "epoch": 0.28, "learning_rate": 0.00019961435321980085, "lm_loss": 0.0047607421875, "loss": 0.0077, "step": 684, "total_loss": 0.0047607421875 }, { "epoch": 0.28, "learning_rate": 0.00019961322550133764, "lm_loss": 0.0101318359375, "loss": 0.0096, "step": 685, "total_loss": 0.0101318359375 }, { "epoch": 0.28, "learning_rate": 0.00019961209613962374, "lm_loss": 0.01531982421875, "loss": 0.0085, "step": 686, "total_loss": 0.01531982421875 }, { "epoch": 0.28, "learning_rate": 0.00019961096513467763, "lm_loss": 0.0072021484375, "loss": 0.0123, "step": 687, "total_loss": 0.0072021484375 }, { "epoch": 0.28, "learning_rate": 0.0001996098324865181, "lm_loss": 0.009033203125, "loss": 0.0089, "step": 688, "total_loss": 0.009033203125 }, { "epoch": 0.28, "learning_rate": 0.00019960869819516376, "lm_loss": 0.01080322265625, "loss": 0.01, "step": 689, "total_loss": 0.01080322265625 }, { "epoch": 0.28, "learning_rate": 0.00019960756226063332, "lm_loss": 0.00531005859375, "loss": 0.0109, "step": 690, "total_loss": 0.00531005859375 }, { "epoch": 0.28, "learning_rate": 0.00019960642468294556, "lm_loss": 0.006591796875, "loss": 0.0096, "step": 691, "total_loss": 0.006591796875 }, { "epoch": 0.28, "learning_rate": 0.00019960528546211922, "lm_loss": 0.00482177734375, "loss": 0.0091, "step": 692, "total_loss": 0.00482177734375 }, { "epoch": 0.28, "learning_rate": 0.00019960414459817312, "lm_loss": 0.0107421875, "loss": 0.0082, "step": 693, "total_loss": 0.0107421875 }, { "epoch": 0.28, "learning_rate": 0.00019960300209112607, "lm_loss": 0.00360107421875, "loss": 0.0112, "step": 694, "total_loss": 0.00360107421875 }, { "epoch": 0.28, "learning_rate": 0.00019960185794099685, "lm_loss": 0.006011962890625, "loss": 0.0105, "step": 695, "total_loss": 0.006011962890625 }, { "epoch": 0.28, "learning_rate": 0.00019960071214780443, "lm_loss": 0.009521484375, "loss": 0.0113, "step": 696, "total_loss": 0.009521484375 }, { "epoch": 0.28, "learning_rate": 0.0001995995647115677, "lm_loss": 0.0108642578125, "loss": 0.0089, "step": 697, "total_loss": 0.0108642578125 }, { "epoch": 0.29, "learning_rate": 0.0001995984156323055, "lm_loss": 0.01263427734375, "loss": 0.0098, "step": 698, "total_loss": 0.01263427734375 }, { "epoch": 0.29, "learning_rate": 0.00019959726491003687, "lm_loss": 0.00958251953125, "loss": 0.0081, "step": 699, "total_loss": 0.00958251953125 }, { "epoch": 0.29, "learning_rate": 0.00019959611254478078, "lm_loss": 0.006561279296875, "loss": 0.0094, "step": 700, "total_loss": 0.006561279296875 }, { "epoch": 0.29, "eval_lm_loss": 0.01146012730896473, "eval_loss": 0.011966399848461151, "eval_runtime": 43.9553, "eval_samples_per_second": 22.75, "eval_steps_per_second": 0.205, "eval_total_loss": 0.01146012730896473, "lm_loss": 0.00103759765625, "step": 700, "total_loss": 0.00103759765625 }, { "epoch": 0.29, "learning_rate": 0.00019959495853655624, "lm_loss": 0.01141357421875, "loss": 0.0105, "step": 701, "total_loss": 0.01141357421875 }, { "epoch": 0.29, "learning_rate": 0.00019959380288538223, "lm_loss": 0.008544921875, "loss": 0.0111, "step": 702, "total_loss": 0.008544921875 }, { "epoch": 0.29, "learning_rate": 0.0001995926455912779, "lm_loss": 0.01373291015625, "loss": 0.0101, "step": 703, "total_loss": 0.01373291015625 }, { "epoch": 0.29, "learning_rate": 0.0001995914866542623, "lm_loss": 0.01141357421875, "loss": 0.0088, "step": 704, "total_loss": 0.01141357421875 }, { "epoch": 0.29, "learning_rate": 0.00019959032607435453, "lm_loss": 0.0089111328125, "loss": 0.0097, "step": 705, "total_loss": 0.0089111328125 }, { "epoch": 0.29, "learning_rate": 0.00019958916385157376, "lm_loss": 0.007598876953125, "loss": 0.0088, "step": 706, "total_loss": 0.007598876953125 }, { "epoch": 0.29, "learning_rate": 0.00019958799998593917, "lm_loss": 0.00823974609375, "loss": 0.0102, "step": 707, "total_loss": 0.00823974609375 }, { "epoch": 0.29, "learning_rate": 0.00019958683447746992, "lm_loss": 0.0096435546875, "loss": 0.0089, "step": 708, "total_loss": 0.0096435546875 }, { "epoch": 0.29, "learning_rate": 0.00019958566732618529, "lm_loss": 0.00640869140625, "loss": 0.0076, "step": 709, "total_loss": 0.00640869140625 }, { "epoch": 0.29, "learning_rate": 0.00019958449853210448, "lm_loss": 0.0107421875, "loss": 0.0102, "step": 710, "total_loss": 0.0107421875 }, { "epoch": 0.29, "learning_rate": 0.00019958332809524683, "lm_loss": 0.00933837890625, "loss": 0.0085, "step": 711, "total_loss": 0.00933837890625 }, { "epoch": 0.29, "learning_rate": 0.00019958215601563158, "lm_loss": 0.00823974609375, "loss": 0.0093, "step": 712, "total_loss": 0.00823974609375 }, { "epoch": 0.29, "learning_rate": 0.00019958098229327814, "lm_loss": 0.00897216796875, "loss": 0.0097, "step": 713, "total_loss": 0.00897216796875 }, { "epoch": 0.29, "learning_rate": 0.00019957980692820578, "lm_loss": 0.00811767578125, "loss": 0.0095, "step": 714, "total_loss": 0.00811767578125 }, { "epoch": 0.29, "learning_rate": 0.000199578629920434, "lm_loss": 0.0203857421875, "loss": 0.0118, "step": 715, "total_loss": 0.0203857421875 }, { "epoch": 0.29, "learning_rate": 0.00019957745126998212, "lm_loss": 0.007415771484375, "loss": 0.0099, "step": 716, "total_loss": 0.007415771484375 }, { "epoch": 0.29, "learning_rate": 0.00019957627097686963, "lm_loss": 0.007049560546875, "loss": 0.0073, "step": 717, "total_loss": 0.007049560546875 }, { "epoch": 0.29, "learning_rate": 0.00019957508904111596, "lm_loss": 0.01708984375, "loss": 0.0085, "step": 718, "total_loss": 0.01708984375 }, { "epoch": 0.29, "learning_rate": 0.0001995739054627407, "lm_loss": 0.01904296875, "loss": 0.0108, "step": 719, "total_loss": 0.01904296875 }, { "epoch": 0.29, "learning_rate": 0.00019957272024176328, "lm_loss": 0.005584716796875, "loss": 0.0115, "step": 720, "total_loss": 0.005584716796875 }, { "epoch": 0.29, "learning_rate": 0.0001995715333782033, "lm_loss": 0.0145263671875, "loss": 0.0139, "step": 721, "total_loss": 0.0145263671875 }, { "epoch": 0.3, "learning_rate": 0.00019957034487208032, "lm_loss": 0.0093994140625, "loss": 0.0103, "step": 722, "total_loss": 0.0093994140625 }, { "epoch": 0.3, "learning_rate": 0.00019956915472341393, "lm_loss": 0.01556396484375, "loss": 0.0111, "step": 723, "total_loss": 0.01556396484375 }, { "epoch": 0.3, "learning_rate": 0.0001995679629322238, "lm_loss": 0.017333984375, "loss": 0.0113, "step": 724, "total_loss": 0.017333984375 }, { "epoch": 0.3, "learning_rate": 0.00019956676949852957, "lm_loss": 0.0169677734375, "loss": 0.0103, "step": 725, "total_loss": 0.0169677734375 }, { "epoch": 0.3, "learning_rate": 0.00019956557442235093, "lm_loss": 0.01324462890625, "loss": 0.0104, "step": 726, "total_loss": 0.01324462890625 }, { "epoch": 0.3, "learning_rate": 0.0001995643777037076, "lm_loss": 0.0113525390625, "loss": 0.0112, "step": 727, "total_loss": 0.0113525390625 }, { "epoch": 0.3, "learning_rate": 0.00019956317934261933, "lm_loss": 0.00946044921875, "loss": 0.0106, "step": 728, "total_loss": 0.00946044921875 }, { "epoch": 0.3, "learning_rate": 0.00019956197933910583, "lm_loss": 0.0086669921875, "loss": 0.0111, "step": 729, "total_loss": 0.0086669921875 }, { "epoch": 0.3, "learning_rate": 0.00019956077769318697, "lm_loss": 0.0106201171875, "loss": 0.009, "step": 730, "total_loss": 0.0106201171875 }, { "epoch": 0.3, "learning_rate": 0.00019955957440488255, "lm_loss": 0.01123046875, "loss": 0.0102, "step": 731, "total_loss": 0.01123046875 }, { "epoch": 0.3, "learning_rate": 0.00019955836947421242, "lm_loss": 0.006805419921875, "loss": 0.0108, "step": 732, "total_loss": 0.006805419921875 }, { "epoch": 0.3, "learning_rate": 0.00019955716290119644, "lm_loss": 0.01177978515625, "loss": 0.0097, "step": 733, "total_loss": 0.01177978515625 }, { "epoch": 0.3, "learning_rate": 0.0001995559546858545, "lm_loss": 0.01092529296875, "loss": 0.0105, "step": 734, "total_loss": 0.01092529296875 }, { "epoch": 0.3, "learning_rate": 0.0001995547448282066, "lm_loss": 0.01190185546875, "loss": 0.0091, "step": 735, "total_loss": 0.01190185546875 }, { "epoch": 0.3, "learning_rate": 0.00019955353332827262, "lm_loss": 0.01031494140625, "loss": 0.0105, "step": 736, "total_loss": 0.01031494140625 }, { "epoch": 0.3, "learning_rate": 0.0001995523201860726, "lm_loss": 0.0130615234375, "loss": 0.0118, "step": 737, "total_loss": 0.0130615234375 }, { "epoch": 0.3, "learning_rate": 0.0001995511054016265, "lm_loss": 0.0096435546875, "loss": 0.008, "step": 738, "total_loss": 0.0096435546875 }, { "epoch": 0.3, "learning_rate": 0.00019954988897495442, "lm_loss": 0.006317138671875, "loss": 0.0089, "step": 739, "total_loss": 0.006317138671875 }, { "epoch": 0.3, "learning_rate": 0.00019954867090607638, "lm_loss": 0.0068359375, "loss": 0.0095, "step": 740, "total_loss": 0.0068359375 }, { "epoch": 0.3, "learning_rate": 0.00019954745119501254, "lm_loss": 0.0084228515625, "loss": 0.0101, "step": 741, "total_loss": 0.0084228515625 }, { "epoch": 0.3, "learning_rate": 0.0001995462298417829, "lm_loss": 0.003997802734375, "loss": 0.0094, "step": 742, "total_loss": 0.003997802734375 }, { "epoch": 0.3, "learning_rate": 0.00019954500684640772, "lm_loss": 0.01275634765625, "loss": 0.0128, "step": 743, "total_loss": 0.01275634765625 }, { "epoch": 0.3, "learning_rate": 0.0001995437822089071, "lm_loss": 0.0106201171875, "loss": 0.0086, "step": 744, "total_loss": 0.0106201171875 }, { "epoch": 0.3, "learning_rate": 0.00019954255592930133, "lm_loss": 0.0093994140625, "loss": 0.0082, "step": 745, "total_loss": 0.0093994140625 }, { "epoch": 0.3, "learning_rate": 0.00019954132800761057, "lm_loss": 0.006927490234375, "loss": 0.0083, "step": 746, "total_loss": 0.006927490234375 }, { "epoch": 0.31, "learning_rate": 0.00019954009844385507, "lm_loss": 0.01068115234375, "loss": 0.0111, "step": 747, "total_loss": 0.01068115234375 }, { "epoch": 0.31, "learning_rate": 0.00019953886723805513, "lm_loss": 0.005157470703125, "loss": 0.0086, "step": 748, "total_loss": 0.005157470703125 }, { "epoch": 0.31, "learning_rate": 0.00019953763439023109, "lm_loss": 0.015380859375, "loss": 0.0111, "step": 749, "total_loss": 0.015380859375 }, { "epoch": 0.31, "learning_rate": 0.00019953639990040323, "lm_loss": 0.00787353515625, "loss": 0.0086, "step": 750, "total_loss": 0.00787353515625 }, { "epoch": 0.31, "learning_rate": 0.00019953516376859198, "lm_loss": 0.0108642578125, "loss": 0.011, "step": 751, "total_loss": 0.0108642578125 }, { "epoch": 0.31, "learning_rate": 0.0001995339259948177, "lm_loss": 0.01513671875, "loss": 0.0096, "step": 752, "total_loss": 0.01513671875 }, { "epoch": 0.31, "learning_rate": 0.00019953268657910077, "lm_loss": 0.004730224609375, "loss": 0.0104, "step": 753, "total_loss": 0.004730224609375 }, { "epoch": 0.31, "learning_rate": 0.00019953144552146173, "lm_loss": 0.011962890625, "loss": 0.0092, "step": 754, "total_loss": 0.011962890625 }, { "epoch": 0.31, "learning_rate": 0.00019953020282192094, "lm_loss": 0.01141357421875, "loss": 0.0086, "step": 755, "total_loss": 0.01141357421875 }, { "epoch": 0.31, "learning_rate": 0.000199528958480499, "lm_loss": 0.016357421875, "loss": 0.0091, "step": 756, "total_loss": 0.016357421875 }, { "epoch": 0.31, "learning_rate": 0.00019952771249721635, "lm_loss": 0.00604248046875, "loss": 0.0099, "step": 757, "total_loss": 0.00604248046875 }, { "epoch": 0.31, "learning_rate": 0.00019952646487209362, "lm_loss": 0.01361083984375, "loss": 0.0115, "step": 758, "total_loss": 0.01361083984375 }, { "epoch": 0.31, "learning_rate": 0.00019952521560515137, "lm_loss": 0.00628662109375, "loss": 0.0101, "step": 759, "total_loss": 0.00628662109375 }, { "epoch": 0.31, "learning_rate": 0.00019952396469641016, "lm_loss": 0.01165771484375, "loss": 0.0102, "step": 760, "total_loss": 0.01165771484375 }, { "epoch": 0.31, "learning_rate": 0.0001995227121458907, "lm_loss": 0.0130615234375, "loss": 0.009, "step": 761, "total_loss": 0.0130615234375 }, { "epoch": 0.31, "learning_rate": 0.0001995214579536136, "lm_loss": 0.01177978515625, "loss": 0.0087, "step": 762, "total_loss": 0.01177978515625 }, { "epoch": 0.31, "learning_rate": 0.00019952020211959953, "lm_loss": 0.0103759765625, "loss": 0.0112, "step": 763, "total_loss": 0.0103759765625 }, { "epoch": 0.31, "learning_rate": 0.00019951894464386927, "lm_loss": 0.01031494140625, "loss": 0.0088, "step": 764, "total_loss": 0.01031494140625 }, { "epoch": 0.31, "learning_rate": 0.00019951768552644355, "lm_loss": 0.004852294921875, "loss": 0.0106, "step": 765, "total_loss": 0.004852294921875 }, { "epoch": 0.31, "learning_rate": 0.0001995164247673431, "lm_loss": 0.01806640625, "loss": 0.0093, "step": 766, "total_loss": 0.01806640625 }, { "epoch": 0.31, "learning_rate": 0.00019951516236658872, "lm_loss": 0.0152587890625, "loss": 0.0127, "step": 767, "total_loss": 0.0152587890625 }, { "epoch": 0.31, "learning_rate": 0.0001995138983242013, "lm_loss": 0.007293701171875, "loss": 0.008, "step": 768, "total_loss": 0.007293701171875 }, { "epoch": 0.31, "learning_rate": 0.00019951263264020167, "lm_loss": 0.00457763671875, "loss": 0.0089, "step": 769, "total_loss": 0.00457763671875 }, { "epoch": 0.31, "learning_rate": 0.00019951136531461066, "lm_loss": 0.00823974609375, "loss": 0.0112, "step": 770, "total_loss": 0.00823974609375 }, { "epoch": 0.32, "learning_rate": 0.00019951009634744922, "lm_loss": 0.0120849609375, "loss": 0.01, "step": 771, "total_loss": 0.0120849609375 }, { "epoch": 0.32, "learning_rate": 0.00019950882573873822, "lm_loss": 0.0107421875, "loss": 0.0103, "step": 772, "total_loss": 0.0107421875 }, { "epoch": 0.32, "learning_rate": 0.0001995075534884987, "lm_loss": 0.01202392578125, "loss": 0.0108, "step": 773, "total_loss": 0.01202392578125 }, { "epoch": 0.32, "learning_rate": 0.00019950627959675163, "lm_loss": 0.01043701171875, "loss": 0.0093, "step": 774, "total_loss": 0.01043701171875 }, { "epoch": 0.32, "learning_rate": 0.00019950500406351802, "lm_loss": 0.0166015625, "loss": 0.0117, "step": 775, "total_loss": 0.0166015625 }, { "epoch": 0.32, "learning_rate": 0.00019950372688881887, "lm_loss": 0.01043701171875, "loss": 0.0093, "step": 776, "total_loss": 0.01043701171875 }, { "epoch": 0.32, "learning_rate": 0.00019950244807267532, "lm_loss": 0.01019287109375, "loss": 0.0101, "step": 777, "total_loss": 0.01019287109375 }, { "epoch": 0.32, "learning_rate": 0.0001995011676151084, "lm_loss": 0.0087890625, "loss": 0.0083, "step": 778, "total_loss": 0.0087890625 }, { "epoch": 0.32, "learning_rate": 0.00019949988551613924, "lm_loss": 0.01422119140625, "loss": 0.0099, "step": 779, "total_loss": 0.01422119140625 }, { "epoch": 0.32, "learning_rate": 0.00019949860177578902, "lm_loss": 0.01141357421875, "loss": 0.0091, "step": 780, "total_loss": 0.01141357421875 }, { "epoch": 0.32, "learning_rate": 0.00019949731639407894, "lm_loss": 0.0084228515625, "loss": 0.0109, "step": 781, "total_loss": 0.0084228515625 }, { "epoch": 0.32, "learning_rate": 0.00019949602937103015, "lm_loss": 0.0147705078125, "loss": 0.009, "step": 782, "total_loss": 0.0147705078125 }, { "epoch": 0.32, "learning_rate": 0.0001994947407066639, "lm_loss": 0.006317138671875, "loss": 0.0114, "step": 783, "total_loss": 0.006317138671875 }, { "epoch": 0.32, "learning_rate": 0.00019949345040100146, "lm_loss": 0.0027313232421875, "loss": 0.0111, "step": 784, "total_loss": 0.0027313232421875 }, { "epoch": 0.32, "learning_rate": 0.0001994921584540641, "lm_loss": 0.0091552734375, "loss": 0.0087, "step": 785, "total_loss": 0.0091552734375 }, { "epoch": 0.32, "learning_rate": 0.00019949086486587315, "lm_loss": 0.00341796875, "loss": 0.0089, "step": 786, "total_loss": 0.00341796875 }, { "epoch": 0.32, "learning_rate": 0.00019948956963644992, "lm_loss": 0.00506591796875, "loss": 0.0081, "step": 787, "total_loss": 0.00506591796875 }, { "epoch": 0.32, "learning_rate": 0.0001994882727658158, "lm_loss": 0.01171875, "loss": 0.0108, "step": 788, "total_loss": 0.01171875 }, { "epoch": 0.32, "learning_rate": 0.00019948697425399217, "lm_loss": 0.00982666015625, "loss": 0.0097, "step": 789, "total_loss": 0.00982666015625 }, { "epoch": 0.32, "learning_rate": 0.00019948567410100045, "lm_loss": 0.00994873046875, "loss": 0.0089, "step": 790, "total_loss": 0.00994873046875 }, { "epoch": 0.32, "learning_rate": 0.0001994843723068621, "lm_loss": 0.01177978515625, "loss": 0.0124, "step": 791, "total_loss": 0.01177978515625 }, { "epoch": 0.32, "learning_rate": 0.0001994830688715986, "lm_loss": 0.01312255859375, "loss": 0.01, "step": 792, "total_loss": 0.01312255859375 }, { "epoch": 0.32, "learning_rate": 0.00019948176379523146, "lm_loss": 0.012451171875, "loss": 0.01, "step": 793, "total_loss": 0.012451171875 }, { "epoch": 0.32, "learning_rate": 0.00019948045707778218, "lm_loss": 0.0115966796875, "loss": 0.0098, "step": 794, "total_loss": 0.0115966796875 }, { "epoch": 0.33, "learning_rate": 0.00019947914871927232, "lm_loss": 0.0191650390625, "loss": 0.0091, "step": 795, "total_loss": 0.0191650390625 }, { "epoch": 0.33, "learning_rate": 0.00019947783871972346, "lm_loss": 0.0062255859375, "loss": 0.0093, "step": 796, "total_loss": 0.0062255859375 }, { "epoch": 0.33, "learning_rate": 0.00019947652707915723, "lm_loss": 0.01220703125, "loss": 0.0117, "step": 797, "total_loss": 0.01220703125 }, { "epoch": 0.33, "learning_rate": 0.00019947521379759525, "lm_loss": 0.005279541015625, "loss": 0.0092, "step": 798, "total_loss": 0.005279541015625 }, { "epoch": 0.33, "learning_rate": 0.00019947389887505922, "lm_loss": 0.0091552734375, "loss": 0.0091, "step": 799, "total_loss": 0.0091552734375 }, { "epoch": 0.33, "learning_rate": 0.00019947258231157078, "lm_loss": 0.01220703125, "loss": 0.0094, "step": 800, "total_loss": 0.01220703125 }, { "epoch": 0.33, "eval_lm_loss": 0.010947369039058685, "eval_loss": 0.011480826884508133, "eval_runtime": 43.9248, "eval_samples_per_second": 22.766, "eval_steps_per_second": 0.205, "eval_total_loss": 0.010947369039058685, "lm_loss": 0.00130462646484375, "step": 800, "total_loss": 0.00130462646484375 }, { "epoch": 0.33, "learning_rate": 0.00019947126410715168, "lm_loss": 0.006591796875, "loss": 0.0111, "step": 801, "total_loss": 0.006591796875 }, { "epoch": 0.33, "learning_rate": 0.00019946994426182365, "lm_loss": 0.0091552734375, "loss": 0.0129, "step": 802, "total_loss": 0.0091552734375 }, { "epoch": 0.33, "learning_rate": 0.0001994686227756085, "lm_loss": 0.01239013671875, "loss": 0.0091, "step": 803, "total_loss": 0.01239013671875 }, { "epoch": 0.33, "learning_rate": 0.00019946729964852796, "lm_loss": 0.00860595703125, "loss": 0.0102, "step": 804, "total_loss": 0.00860595703125 }, { "epoch": 0.33, "learning_rate": 0.0001994659748806039, "lm_loss": 0.0184326171875, "loss": 0.0099, "step": 805, "total_loss": 0.0184326171875 }, { "epoch": 0.33, "learning_rate": 0.0001994646484718582, "lm_loss": 0.00848388671875, "loss": 0.0116, "step": 806, "total_loss": 0.00848388671875 }, { "epoch": 0.33, "learning_rate": 0.0001994633204223127, "lm_loss": 0.011962890625, "loss": 0.0086, "step": 807, "total_loss": 0.011962890625 }, { "epoch": 0.33, "learning_rate": 0.0001994619907319893, "lm_loss": 0.00592041015625, "loss": 0.0091, "step": 808, "total_loss": 0.00592041015625 }, { "epoch": 0.33, "learning_rate": 0.00019946065940090998, "lm_loss": 0.007080078125, "loss": 0.0072, "step": 809, "total_loss": 0.007080078125 }, { "epoch": 0.33, "learning_rate": 0.00019945932642909667, "lm_loss": 0.0185546875, "loss": 0.0103, "step": 810, "total_loss": 0.0185546875 }, { "epoch": 0.33, "learning_rate": 0.00019945799181657133, "lm_loss": 0.01275634765625, "loss": 0.0113, "step": 811, "total_loss": 0.01275634765625 }, { "epoch": 0.33, "learning_rate": 0.00019945665556335605, "lm_loss": 0.003936767578125, "loss": 0.0097, "step": 812, "total_loss": 0.003936767578125 }, { "epoch": 0.33, "learning_rate": 0.0001994553176694728, "lm_loss": 0.00738525390625, "loss": 0.0082, "step": 813, "total_loss": 0.00738525390625 }, { "epoch": 0.33, "learning_rate": 0.00019945397813494374, "lm_loss": 0.01171875, "loss": 0.01, "step": 814, "total_loss": 0.01171875 }, { "epoch": 0.33, "learning_rate": 0.00019945263695979087, "lm_loss": 0.00823974609375, "loss": 0.0098, "step": 815, "total_loss": 0.00823974609375 }, { "epoch": 0.33, "learning_rate": 0.00019945129414403637, "lm_loss": 0.01116943359375, "loss": 0.0091, "step": 816, "total_loss": 0.01116943359375 }, { "epoch": 0.33, "learning_rate": 0.00019944994968770237, "lm_loss": 0.006591796875, "loss": 0.0073, "step": 817, "total_loss": 0.006591796875 }, { "epoch": 0.33, "learning_rate": 0.00019944860359081106, "lm_loss": 0.0137939453125, "loss": 0.0082, "step": 818, "total_loss": 0.0137939453125 }, { "epoch": 0.33, "learning_rate": 0.00019944725585338465, "lm_loss": 0.0101318359375, "loss": 0.0103, "step": 819, "total_loss": 0.0101318359375 }, { "epoch": 0.34, "learning_rate": 0.00019944590647544535, "lm_loss": 0.006439208984375, "loss": 0.0089, "step": 820, "total_loss": 0.006439208984375 }, { "epoch": 0.34, "learning_rate": 0.00019944455545701546, "lm_loss": 0.005279541015625, "loss": 0.0081, "step": 821, "total_loss": 0.005279541015625 }, { "epoch": 0.34, "learning_rate": 0.00019944320279811724, "lm_loss": 0.0096435546875, "loss": 0.0094, "step": 822, "total_loss": 0.0096435546875 }, { "epoch": 0.34, "learning_rate": 0.00019944184849877297, "lm_loss": 0.00836181640625, "loss": 0.0095, "step": 823, "total_loss": 0.00836181640625 }, { "epoch": 0.34, "learning_rate": 0.00019944049255900506, "lm_loss": 0.00537109375, "loss": 0.0092, "step": 824, "total_loss": 0.00537109375 }, { "epoch": 0.34, "learning_rate": 0.00019943913497883582, "lm_loss": 0.00787353515625, "loss": 0.0094, "step": 825, "total_loss": 0.00787353515625 }, { "epoch": 0.34, "learning_rate": 0.0001994377757582877, "lm_loss": 0.00811767578125, "loss": 0.008, "step": 826, "total_loss": 0.00811767578125 }, { "epoch": 0.34, "learning_rate": 0.00019943641489738307, "lm_loss": 0.0062255859375, "loss": 0.0103, "step": 827, "total_loss": 0.0062255859375 }, { "epoch": 0.34, "learning_rate": 0.0001994350523961444, "lm_loss": 0.00714111328125, "loss": 0.0097, "step": 828, "total_loss": 0.00714111328125 }, { "epoch": 0.34, "learning_rate": 0.00019943368825459419, "lm_loss": 0.003753662109375, "loss": 0.0083, "step": 829, "total_loss": 0.003753662109375 }, { "epoch": 0.34, "learning_rate": 0.0001994323224727549, "lm_loss": 0.010009765625, "loss": 0.0098, "step": 830, "total_loss": 0.010009765625 }, { "epoch": 0.34, "learning_rate": 0.00019943095505064909, "lm_loss": 0.0120849609375, "loss": 0.0092, "step": 831, "total_loss": 0.0120849609375 }, { "epoch": 0.34, "learning_rate": 0.0001994295859882993, "lm_loss": 0.00970458984375, "loss": 0.0095, "step": 832, "total_loss": 0.00970458984375 }, { "epoch": 0.34, "learning_rate": 0.00019942821528572813, "lm_loss": 0.005218505859375, "loss": 0.0085, "step": 833, "total_loss": 0.005218505859375 }, { "epoch": 0.34, "learning_rate": 0.00019942684294295816, "lm_loss": 0.00860595703125, "loss": 0.0074, "step": 834, "total_loss": 0.00860595703125 }, { "epoch": 0.34, "learning_rate": 0.00019942546896001208, "lm_loss": 0.01104736328125, "loss": 0.012, "step": 835, "total_loss": 0.01104736328125 }, { "epoch": 0.34, "learning_rate": 0.00019942409333691252, "lm_loss": 0.005950927734375, "loss": 0.0089, "step": 836, "total_loss": 0.005950927734375 }, { "epoch": 0.34, "learning_rate": 0.0001994227160736822, "lm_loss": 0.0078125, "loss": 0.0081, "step": 837, "total_loss": 0.0078125 }, { "epoch": 0.34, "learning_rate": 0.00019942133717034378, "lm_loss": 0.01031494140625, "loss": 0.0115, "step": 838, "total_loss": 0.01031494140625 }, { "epoch": 0.34, "learning_rate": 0.0001994199566269201, "lm_loss": 0.00958251953125, "loss": 0.0089, "step": 839, "total_loss": 0.00958251953125 }, { "epoch": 0.34, "learning_rate": 0.00019941857444343383, "lm_loss": 0.0045166015625, "loss": 0.0087, "step": 840, "total_loss": 0.0045166015625 }, { "epoch": 0.34, "learning_rate": 0.00019941719061990787, "lm_loss": 0.00701904296875, "loss": 0.012, "step": 841, "total_loss": 0.00701904296875 }, { "epoch": 0.34, "learning_rate": 0.00019941580515636496, "lm_loss": 0.0142822265625, "loss": 0.0108, "step": 842, "total_loss": 0.0142822265625 }, { "epoch": 0.34, "learning_rate": 0.000199414418052828, "lm_loss": 0.01007080078125, "loss": 0.0092, "step": 843, "total_loss": 0.01007080078125 }, { "epoch": 0.35, "learning_rate": 0.0001994130293093199, "lm_loss": 0.006561279296875, "loss": 0.0101, "step": 844, "total_loss": 0.006561279296875 }, { "epoch": 0.35, "learning_rate": 0.00019941163892586353, "lm_loss": 0.01177978515625, "loss": 0.01, "step": 845, "total_loss": 0.01177978515625 }, { "epoch": 0.35, "learning_rate": 0.0001994102469024818, "lm_loss": 0.01312255859375, "loss": 0.0101, "step": 846, "total_loss": 0.01312255859375 }, { "epoch": 0.35, "learning_rate": 0.00019940885323919773, "lm_loss": 0.0126953125, "loss": 0.0099, "step": 847, "total_loss": 0.0126953125 }, { "epoch": 0.35, "learning_rate": 0.00019940745793603428, "lm_loss": 0.016357421875, "loss": 0.0104, "step": 848, "total_loss": 0.016357421875 }, { "epoch": 0.35, "learning_rate": 0.0001994060609930145, "lm_loss": 0.0072021484375, "loss": 0.0098, "step": 849, "total_loss": 0.0072021484375 }, { "epoch": 0.35, "learning_rate": 0.00019940466241016137, "lm_loss": 0.007659912109375, "loss": 0.0087, "step": 850, "total_loss": 0.007659912109375 }, { "epoch": 0.35, "learning_rate": 0.00019940326218749802, "lm_loss": 0.01214599609375, "loss": 0.0095, "step": 851, "total_loss": 0.01214599609375 }, { "epoch": 0.35, "learning_rate": 0.00019940186032504751, "lm_loss": 0.00750732421875, "loss": 0.0119, "step": 852, "total_loss": 0.00750732421875 }, { "epoch": 0.35, "learning_rate": 0.000199400456822833, "lm_loss": 0.01092529296875, "loss": 0.0098, "step": 853, "total_loss": 0.01092529296875 }, { "epoch": 0.35, "learning_rate": 0.00019939905168087763, "lm_loss": 0.0108642578125, "loss": 0.0087, "step": 854, "total_loss": 0.0108642578125 }, { "epoch": 0.35, "learning_rate": 0.00019939764489920457, "lm_loss": 0.00787353515625, "loss": 0.0082, "step": 855, "total_loss": 0.00787353515625 }, { "epoch": 0.35, "learning_rate": 0.00019939623647783705, "lm_loss": 0.01129150390625, "loss": 0.0105, "step": 856, "total_loss": 0.01129150390625 }, { "epoch": 0.35, "learning_rate": 0.00019939482641679826, "lm_loss": 0.0045166015625, "loss": 0.0098, "step": 857, "total_loss": 0.0045166015625 }, { "epoch": 0.35, "learning_rate": 0.0001993934147161115, "lm_loss": 0.01336669921875, "loss": 0.0097, "step": 858, "total_loss": 0.01336669921875 }, { "epoch": 0.35, "learning_rate": 0.00019939200137580003, "lm_loss": 0.006500244140625, "loss": 0.0092, "step": 859, "total_loss": 0.006500244140625 }, { "epoch": 0.35, "learning_rate": 0.0001993905863958872, "lm_loss": 0.0084228515625, "loss": 0.0086, "step": 860, "total_loss": 0.0084228515625 }, { "epoch": 0.35, "learning_rate": 0.00019938916977639632, "lm_loss": 0.0040283203125, "loss": 0.0113, "step": 861, "total_loss": 0.0040283203125 }, { "epoch": 0.35, "learning_rate": 0.00019938775151735077, "lm_loss": 0.013916015625, "loss": 0.0085, "step": 862, "total_loss": 0.013916015625 }, { "epoch": 0.35, "learning_rate": 0.000199386331618774, "lm_loss": 0.00958251953125, "loss": 0.0102, "step": 863, "total_loss": 0.00958251953125 }, { "epoch": 0.35, "learning_rate": 0.00019938491008068932, "lm_loss": 0.006378173828125, "loss": 0.0096, "step": 864, "total_loss": 0.006378173828125 }, { "epoch": 0.35, "learning_rate": 0.00019938348690312022, "lm_loss": 0.01373291015625, "loss": 0.0089, "step": 865, "total_loss": 0.01373291015625 }, { "epoch": 0.35, "learning_rate": 0.00019938206208609024, "lm_loss": 0.01019287109375, "loss": 0.0108, "step": 866, "total_loss": 0.01019287109375 }, { "epoch": 0.35, "learning_rate": 0.00019938063562962283, "lm_loss": 0.00885009765625, "loss": 0.009, "step": 867, "total_loss": 0.00885009765625 }, { "epoch": 0.35, "learning_rate": 0.00019937920753374156, "lm_loss": 0.010498046875, "loss": 0.0107, "step": 868, "total_loss": 0.010498046875 }, { "epoch": 0.36, "learning_rate": 0.0001993777777984699, "lm_loss": 0.0167236328125, "loss": 0.0096, "step": 869, "total_loss": 0.0167236328125 }, { "epoch": 0.36, "learning_rate": 0.00019937634642383157, "lm_loss": 0.0150146484375, "loss": 0.0099, "step": 870, "total_loss": 0.0150146484375 }, { "epoch": 0.36, "learning_rate": 0.00019937491340985005, "lm_loss": 0.01324462890625, "loss": 0.0094, "step": 871, "total_loss": 0.01324462890625 }, { "epoch": 0.36, "learning_rate": 0.00019937347875654908, "lm_loss": 0.01068115234375, "loss": 0.0092, "step": 872, "total_loss": 0.01068115234375 }, { "epoch": 0.36, "learning_rate": 0.00019937204246395226, "lm_loss": 0.0146484375, "loss": 0.0104, "step": 873, "total_loss": 0.0146484375 }, { "epoch": 0.36, "learning_rate": 0.00019937060453208334, "lm_loss": 0.00518798828125, "loss": 0.0104, "step": 874, "total_loss": 0.00518798828125 }, { "epoch": 0.36, "learning_rate": 0.00019936916496096597, "lm_loss": 0.01043701171875, "loss": 0.0083, "step": 875, "total_loss": 0.01043701171875 }, { "epoch": 0.36, "learning_rate": 0.00019936772375062396, "lm_loss": 0.0120849609375, "loss": 0.0101, "step": 876, "total_loss": 0.0120849609375 }, { "epoch": 0.36, "learning_rate": 0.00019936628090108104, "lm_loss": 0.0103759765625, "loss": 0.0091, "step": 877, "total_loss": 0.0103759765625 }, { "epoch": 0.36, "learning_rate": 0.00019936483641236105, "lm_loss": 0.0140380859375, "loss": 0.0095, "step": 878, "total_loss": 0.0140380859375 }, { "epoch": 0.36, "learning_rate": 0.00019936339028448781, "lm_loss": 0.0113525390625, "loss": 0.0089, "step": 879, "total_loss": 0.0113525390625 }, { "epoch": 0.36, "learning_rate": 0.00019936194251748518, "lm_loss": 0.0167236328125, "loss": 0.0091, "step": 880, "total_loss": 0.0167236328125 }, { "epoch": 0.36, "learning_rate": 0.000199360493111377, "lm_loss": 0.007415771484375, "loss": 0.0103, "step": 881, "total_loss": 0.007415771484375 }, { "epoch": 0.36, "learning_rate": 0.0001993590420661872, "lm_loss": 0.00836181640625, "loss": 0.0106, "step": 882, "total_loss": 0.00836181640625 }, { "epoch": 0.36, "learning_rate": 0.00019935758938193976, "lm_loss": 0.005950927734375, "loss": 0.0078, "step": 883, "total_loss": 0.005950927734375 }, { "epoch": 0.36, "learning_rate": 0.00019935613505865857, "lm_loss": 0.0089111328125, "loss": 0.0075, "step": 884, "total_loss": 0.0089111328125 }, { "epoch": 0.36, "learning_rate": 0.0001993546790963677, "lm_loss": 0.00872802734375, "loss": 0.0123, "step": 885, "total_loss": 0.00872802734375 }, { "epoch": 0.36, "learning_rate": 0.00019935322149509114, "lm_loss": 0.0054931640625, "loss": 0.0091, "step": 886, "total_loss": 0.0054931640625 }, { "epoch": 0.36, "learning_rate": 0.00019935176225485292, "lm_loss": 0.00970458984375, "loss": 0.0083, "step": 887, "total_loss": 0.00970458984375 }, { "epoch": 0.36, "learning_rate": 0.00019935030137567707, "lm_loss": 0.0123291015625, "loss": 0.0093, "step": 888, "total_loss": 0.0123291015625 }, { "epoch": 0.36, "learning_rate": 0.00019934883885758777, "lm_loss": 0.005767822265625, "loss": 0.0097, "step": 889, "total_loss": 0.005767822265625 }, { "epoch": 0.36, "learning_rate": 0.0001993473747006091, "lm_loss": 0.01409912109375, "loss": 0.0098, "step": 890, "total_loss": 0.01409912109375 }, { "epoch": 0.36, "learning_rate": 0.00019934590890476525, "lm_loss": 0.013671875, "loss": 0.0075, "step": 891, "total_loss": 0.013671875 }, { "epoch": 0.36, "learning_rate": 0.00019934444147008036, "lm_loss": 0.00579833984375, "loss": 0.0087, "step": 892, "total_loss": 0.00579833984375 }, { "epoch": 0.37, "learning_rate": 0.00019934297239657865, "lm_loss": 0.015380859375, "loss": 0.0092, "step": 893, "total_loss": 0.015380859375 }, { "epoch": 0.37, "learning_rate": 0.00019934150168428438, "lm_loss": 0.00714111328125, "loss": 0.0098, "step": 894, "total_loss": 0.00714111328125 }, { "epoch": 0.37, "learning_rate": 0.00019934002933322175, "lm_loss": 0.01348876953125, "loss": 0.0084, "step": 895, "total_loss": 0.01348876953125 }, { "epoch": 0.37, "learning_rate": 0.00019933855534341513, "lm_loss": 0.00750732421875, "loss": 0.009, "step": 896, "total_loss": 0.00750732421875 }, { "epoch": 0.37, "learning_rate": 0.00019933707971488875, "lm_loss": 0.00958251953125, "loss": 0.0102, "step": 897, "total_loss": 0.00958251953125 }, { "epoch": 0.37, "learning_rate": 0.00019933560244766703, "lm_loss": 0.0166015625, "loss": 0.0115, "step": 898, "total_loss": 0.0166015625 }, { "epoch": 0.37, "learning_rate": 0.00019933412354177426, "lm_loss": 0.00592041015625, "loss": 0.0086, "step": 899, "total_loss": 0.00592041015625 }, { "epoch": 0.37, "learning_rate": 0.00019933264299723495, "lm_loss": 0.01202392578125, "loss": 0.0101, "step": 900, "total_loss": 0.01202392578125 }, { "epoch": 0.37, "eval_lm_loss": 0.010967607609927654, "eval_loss": 0.011497192084789276, "eval_runtime": 43.9954, "eval_samples_per_second": 22.73, "eval_steps_per_second": 0.205, "eval_total_loss": 0.010967607609927654, "lm_loss": 0.000972747802734375, "step": 900, "total_loss": 0.000972747802734375 }, { "epoch": 0.37, "learning_rate": 0.00019933116081407342, "lm_loss": 0.01031494140625, "loss": 0.0111, "step": 901, "total_loss": 0.01031494140625 }, { "epoch": 0.37, "learning_rate": 0.00019932967699231414, "lm_loss": 0.0101318359375, "loss": 0.0092, "step": 902, "total_loss": 0.0101318359375 }, { "epoch": 0.37, "learning_rate": 0.0001993281915319816, "lm_loss": 0.00689697265625, "loss": 0.0111, "step": 903, "total_loss": 0.00689697265625 }, { "epoch": 0.37, "learning_rate": 0.00019932670443310033, "lm_loss": 0.0115966796875, "loss": 0.0087, "step": 904, "total_loss": 0.0115966796875 }, { "epoch": 0.37, "learning_rate": 0.00019932521569569485, "lm_loss": 0.006500244140625, "loss": 0.0109, "step": 905, "total_loss": 0.006500244140625 }, { "epoch": 0.37, "learning_rate": 0.00019932372531978966, "lm_loss": 0.00872802734375, "loss": 0.0102, "step": 906, "total_loss": 0.00872802734375 }, { "epoch": 0.37, "learning_rate": 0.00019932223330540946, "lm_loss": 0.0089111328125, "loss": 0.0095, "step": 907, "total_loss": 0.0089111328125 }, { "epoch": 0.37, "learning_rate": 0.00019932073965257872, "lm_loss": 0.012939453125, "loss": 0.0102, "step": 908, "total_loss": 0.012939453125 }, { "epoch": 0.37, "learning_rate": 0.0001993192443613222, "lm_loss": 0.0111083984375, "loss": 0.0092, "step": 909, "total_loss": 0.0111083984375 }, { "epoch": 0.37, "learning_rate": 0.00019931774743166454, "lm_loss": 0.0029754638671875, "loss": 0.012, "step": 910, "total_loss": 0.0029754638671875 }, { "epoch": 0.37, "learning_rate": 0.0001993162488636304, "lm_loss": 0.007049560546875, "loss": 0.0091, "step": 911, "total_loss": 0.007049560546875 }, { "epoch": 0.37, "learning_rate": 0.00019931474865724448, "lm_loss": 0.0130615234375, "loss": 0.0098, "step": 912, "total_loss": 0.0130615234375 }, { "epoch": 0.37, "learning_rate": 0.0001993132468125316, "lm_loss": 0.01043701171875, "loss": 0.01, "step": 913, "total_loss": 0.01043701171875 }, { "epoch": 0.37, "learning_rate": 0.00019931174332951649, "lm_loss": 0.0089111328125, "loss": 0.0099, "step": 914, "total_loss": 0.0089111328125 }, { "epoch": 0.37, "learning_rate": 0.00019931023820822397, "lm_loss": 0.0068359375, "loss": 0.0083, "step": 915, "total_loss": 0.0068359375 }, { "epoch": 0.37, "learning_rate": 0.00019930873144867887, "lm_loss": 0.00738525390625, "loss": 0.0093, "step": 916, "total_loss": 0.00738525390625 }, { "epoch": 0.37, "learning_rate": 0.000199307223050906, "lm_loss": 0.012451171875, "loss": 0.0104, "step": 917, "total_loss": 0.012451171875 }, { "epoch": 0.38, "learning_rate": 0.00019930571301493032, "lm_loss": 0.0126953125, "loss": 0.0102, "step": 918, "total_loss": 0.0126953125 }, { "epoch": 0.38, "learning_rate": 0.0001993042013407767, "lm_loss": 0.00872802734375, "loss": 0.0094, "step": 919, "total_loss": 0.00872802734375 }, { "epoch": 0.38, "learning_rate": 0.00019930268802847005, "lm_loss": 0.020263671875, "loss": 0.0103, "step": 920, "total_loss": 0.020263671875 }, { "epoch": 0.38, "learning_rate": 0.00019930117307803536, "lm_loss": 0.005126953125, "loss": 0.0095, "step": 921, "total_loss": 0.005126953125 }, { "epoch": 0.38, "learning_rate": 0.00019929965648949764, "lm_loss": 0.0103759765625, "loss": 0.0087, "step": 922, "total_loss": 0.0103759765625 }, { "epoch": 0.38, "learning_rate": 0.00019929813826288187, "lm_loss": 0.0135498046875, "loss": 0.008, "step": 923, "total_loss": 0.0135498046875 }, { "epoch": 0.38, "learning_rate": 0.00019929661839821315, "lm_loss": 0.00433349609375, "loss": 0.0088, "step": 924, "total_loss": 0.00433349609375 }, { "epoch": 0.38, "learning_rate": 0.0001992950968955165, "lm_loss": 0.015625, "loss": 0.0106, "step": 925, "total_loss": 0.015625 }, { "epoch": 0.38, "learning_rate": 0.00019929357375481703, "lm_loss": 0.01361083984375, "loss": 0.0097, "step": 926, "total_loss": 0.01361083984375 }, { "epoch": 0.38, "learning_rate": 0.00019929204897613988, "lm_loss": 0.005401611328125, "loss": 0.0086, "step": 927, "total_loss": 0.005401611328125 }, { "epoch": 0.38, "learning_rate": 0.0001992905225595102, "lm_loss": 0.0031890869140625, "loss": 0.0097, "step": 928, "total_loss": 0.0031890869140625 }, { "epoch": 0.38, "learning_rate": 0.00019928899450495315, "lm_loss": 0.009521484375, "loss": 0.01, "step": 929, "total_loss": 0.009521484375 }, { "epoch": 0.38, "learning_rate": 0.00019928746481249397, "lm_loss": 0.0130615234375, "loss": 0.0092, "step": 930, "total_loss": 0.0130615234375 }, { "epoch": 0.38, "learning_rate": 0.00019928593348215786, "lm_loss": 0.00579833984375, "loss": 0.0105, "step": 931, "total_loss": 0.00579833984375 }, { "epoch": 0.38, "learning_rate": 0.0001992844005139701, "lm_loss": 0.00787353515625, "loss": 0.0076, "step": 932, "total_loss": 0.00787353515625 }, { "epoch": 0.38, "learning_rate": 0.00019928286590795602, "lm_loss": 0.01409912109375, "loss": 0.01, "step": 933, "total_loss": 0.01409912109375 }, { "epoch": 0.38, "learning_rate": 0.00019928132966414084, "lm_loss": 0.00616455078125, "loss": 0.0088, "step": 934, "total_loss": 0.00616455078125 }, { "epoch": 0.38, "learning_rate": 0.00019927979178255, "lm_loss": 0.005584716796875, "loss": 0.0102, "step": 935, "total_loss": 0.005584716796875 }, { "epoch": 0.38, "learning_rate": 0.00019927825226320877, "lm_loss": 0.01202392578125, "loss": 0.0077, "step": 936, "total_loss": 0.01202392578125 }, { "epoch": 0.38, "learning_rate": 0.00019927671110614266, "lm_loss": 0.0068359375, "loss": 0.008, "step": 937, "total_loss": 0.0068359375 }, { "epoch": 0.38, "learning_rate": 0.000199275168311377, "lm_loss": 0.01129150390625, "loss": 0.0079, "step": 938, "total_loss": 0.01129150390625 }, { "epoch": 0.38, "learning_rate": 0.00019927362387893725, "lm_loss": 0.01361083984375, "loss": 0.0108, "step": 939, "total_loss": 0.01361083984375 }, { "epoch": 0.38, "learning_rate": 0.00019927207780884895, "lm_loss": 0.00775146484375, "loss": 0.0086, "step": 940, "total_loss": 0.00775146484375 }, { "epoch": 0.38, "learning_rate": 0.00019927053010113758, "lm_loss": 0.00958251953125, "loss": 0.0089, "step": 941, "total_loss": 0.00958251953125 }, { "epoch": 0.39, "learning_rate": 0.00019926898075582863, "lm_loss": 0.00299072265625, "loss": 0.0086, "step": 942, "total_loss": 0.00299072265625 }, { "epoch": 0.39, "learning_rate": 0.0001992674297729477, "lm_loss": 0.00445556640625, "loss": 0.0095, "step": 943, "total_loss": 0.00445556640625 }, { "epoch": 0.39, "learning_rate": 0.00019926587715252033, "lm_loss": 0.01025390625, "loss": 0.01, "step": 944, "total_loss": 0.01025390625 }, { "epoch": 0.39, "learning_rate": 0.0001992643228945722, "lm_loss": 0.00848388671875, "loss": 0.01, "step": 945, "total_loss": 0.00848388671875 }, { "epoch": 0.39, "learning_rate": 0.0001992627669991289, "lm_loss": 0.00628662109375, "loss": 0.0099, "step": 946, "total_loss": 0.00628662109375 }, { "epoch": 0.39, "learning_rate": 0.00019926120946621613, "lm_loss": 0.021240234375, "loss": 0.0098, "step": 947, "total_loss": 0.021240234375 }, { "epoch": 0.39, "learning_rate": 0.00019925965029585956, "lm_loss": 0.01318359375, "loss": 0.008, "step": 948, "total_loss": 0.01318359375 }, { "epoch": 0.39, "learning_rate": 0.0001992580894880849, "lm_loss": 0.009033203125, "loss": 0.0088, "step": 949, "total_loss": 0.009033203125 }, { "epoch": 0.39, "learning_rate": 0.0001992565270429179, "lm_loss": 0.0264892578125, "loss": 0.0101, "step": 950, "total_loss": 0.0264892578125 }, { "epoch": 0.39, "learning_rate": 0.00019925496296038437, "lm_loss": 0.011962890625, "loss": 0.0089, "step": 951, "total_loss": 0.011962890625 }, { "epoch": 0.39, "learning_rate": 0.00019925339724051008, "lm_loss": 0.01336669921875, "loss": 0.0095, "step": 952, "total_loss": 0.01336669921875 }, { "epoch": 0.39, "learning_rate": 0.0001992518298833209, "lm_loss": 0.00933837890625, "loss": 0.009, "step": 953, "total_loss": 0.00933837890625 }, { "epoch": 0.39, "learning_rate": 0.0001992502608888426, "lm_loss": 0.006866455078125, "loss": 0.0095, "step": 954, "total_loss": 0.006866455078125 }, { "epoch": 0.39, "learning_rate": 0.00019924869025710114, "lm_loss": 0.0086669921875, "loss": 0.0083, "step": 955, "total_loss": 0.0086669921875 }, { "epoch": 0.39, "learning_rate": 0.00019924711798812242, "lm_loss": 0.004119873046875, "loss": 0.0098, "step": 956, "total_loss": 0.004119873046875 }, { "epoch": 0.39, "learning_rate": 0.00019924554408193233, "lm_loss": 0.018798828125, "loss": 0.0088, "step": 957, "total_loss": 0.018798828125 }, { "epoch": 0.39, "learning_rate": 0.00019924396853855688, "lm_loss": 0.01116943359375, "loss": 0.0083, "step": 958, "total_loss": 0.01116943359375 }, { "epoch": 0.39, "learning_rate": 0.00019924239135802205, "lm_loss": 0.0069580078125, "loss": 0.0089, "step": 959, "total_loss": 0.0069580078125 }, { "epoch": 0.39, "learning_rate": 0.00019924081254035384, "lm_loss": 0.01177978515625, "loss": 0.0118, "step": 960, "total_loss": 0.01177978515625 }, { "epoch": 0.39, "learning_rate": 0.00019923923208557833, "lm_loss": 0.00958251953125, "loss": 0.0087, "step": 961, "total_loss": 0.00958251953125 }, { "epoch": 0.39, "learning_rate": 0.00019923764999372153, "lm_loss": 0.014404296875, "loss": 0.0108, "step": 962, "total_loss": 0.014404296875 }, { "epoch": 0.39, "learning_rate": 0.00019923606626480963, "lm_loss": 0.0034027099609375, "loss": 0.0092, "step": 963, "total_loss": 0.0034027099609375 }, { "epoch": 0.39, "learning_rate": 0.0001992344808988687, "lm_loss": 0.00531005859375, "loss": 0.0085, "step": 964, "total_loss": 0.00531005859375 }, { "epoch": 0.39, "learning_rate": 0.00019923289389592487, "lm_loss": 0.0028839111328125, "loss": 0.0088, "step": 965, "total_loss": 0.0028839111328125 }, { "epoch": 0.39, "learning_rate": 0.00019923130525600436, "lm_loss": 0.0145263671875, "loss": 0.0107, "step": 966, "total_loss": 0.0145263671875 }, { "epoch": 0.4, "learning_rate": 0.00019922971497913334, "lm_loss": 0.0084228515625, "loss": 0.0094, "step": 967, "total_loss": 0.0084228515625 }, { "epoch": 0.4, "learning_rate": 0.00019922812306533808, "lm_loss": 0.006103515625, "loss": 0.0088, "step": 968, "total_loss": 0.006103515625 }, { "epoch": 0.4, "learning_rate": 0.00019922652951464485, "lm_loss": 0.0029296875, "loss": 0.0084, "step": 969, "total_loss": 0.0029296875 }, { "epoch": 0.4, "learning_rate": 0.00019922493432707989, "lm_loss": 0.019287109375, "loss": 0.0091, "step": 970, "total_loss": 0.019287109375 }, { "epoch": 0.4, "learning_rate": 0.00019922333750266958, "lm_loss": 0.007781982421875, "loss": 0.0091, "step": 971, "total_loss": 0.007781982421875 }, { "epoch": 0.4, "learning_rate": 0.00019922173904144018, "lm_loss": 0.0167236328125, "loss": 0.0106, "step": 972, "total_loss": 0.0167236328125 }, { "epoch": 0.4, "learning_rate": 0.0001992201389434181, "lm_loss": 0.0120849609375, "loss": 0.0093, "step": 973, "total_loss": 0.0120849609375 }, { "epoch": 0.4, "learning_rate": 0.00019921853720862974, "lm_loss": 0.004241943359375, "loss": 0.0068, "step": 974, "total_loss": 0.004241943359375 }, { "epoch": 0.4, "learning_rate": 0.00019921693383710153, "lm_loss": 0.0093994140625, "loss": 0.0086, "step": 975, "total_loss": 0.0093994140625 }, { "epoch": 0.4, "learning_rate": 0.0001992153288288599, "lm_loss": 0.01123046875, "loss": 0.0089, "step": 976, "total_loss": 0.01123046875 }, { "epoch": 0.4, "learning_rate": 0.00019921372218393134, "lm_loss": 0.00628662109375, "loss": 0.009, "step": 977, "total_loss": 0.00628662109375 }, { "epoch": 0.4, "learning_rate": 0.00019921211390234235, "lm_loss": 0.009033203125, "loss": 0.0099, "step": 978, "total_loss": 0.009033203125 }, { "epoch": 0.4, "learning_rate": 0.00019921050398411948, "lm_loss": 0.0133056640625, "loss": 0.0106, "step": 979, "total_loss": 0.0133056640625 }, { "epoch": 0.4, "learning_rate": 0.00019920889242928925, "lm_loss": 0.00616455078125, "loss": 0.0092, "step": 980, "total_loss": 0.00616455078125 }, { "epoch": 0.4, "learning_rate": 0.00019920727923787825, "lm_loss": 0.012939453125, "loss": 0.007, "step": 981, "total_loss": 0.012939453125 }, { "epoch": 0.4, "learning_rate": 0.00019920566440991312, "lm_loss": 0.01025390625, "loss": 0.0084, "step": 982, "total_loss": 0.01025390625 }, { "epoch": 0.4, "learning_rate": 0.00019920404794542047, "lm_loss": 0.019287109375, "loss": 0.0102, "step": 983, "total_loss": 0.019287109375 }, { "epoch": 0.4, "learning_rate": 0.000199202429844427, "lm_loss": 0.0166015625, "loss": 0.0104, "step": 984, "total_loss": 0.0166015625 }, { "epoch": 0.4, "learning_rate": 0.00019920081010695937, "lm_loss": 0.005340576171875, "loss": 0.008, "step": 985, "total_loss": 0.005340576171875 }, { "epoch": 0.4, "learning_rate": 0.0001991991887330443, "lm_loss": 0.00982666015625, "loss": 0.0078, "step": 986, "total_loss": 0.00982666015625 }, { "epoch": 0.4, "learning_rate": 0.00019919756572270856, "lm_loss": 0.010009765625, "loss": 0.0111, "step": 987, "total_loss": 0.010009765625 }, { "epoch": 0.4, "learning_rate": 0.0001991959410759789, "lm_loss": 0.006134033203125, "loss": 0.0089, "step": 988, "total_loss": 0.006134033203125 }, { "epoch": 0.4, "learning_rate": 0.00019919431479288214, "lm_loss": 0.00927734375, "loss": 0.0095, "step": 989, "total_loss": 0.00927734375 }, { "epoch": 0.4, "learning_rate": 0.0001991926868734451, "lm_loss": 0.01446533203125, "loss": 0.0099, "step": 990, "total_loss": 0.01446533203125 }, { "epoch": 0.41, "learning_rate": 0.00019919105731769463, "lm_loss": 0.00787353515625, "loss": 0.0088, "step": 991, "total_loss": 0.00787353515625 }, { "epoch": 0.41, "learning_rate": 0.00019918942612565763, "lm_loss": 0.00872802734375, "loss": 0.0084, "step": 992, "total_loss": 0.00872802734375 }, { "epoch": 0.41, "learning_rate": 0.00019918779329736096, "lm_loss": 0.00677490234375, "loss": 0.0085, "step": 993, "total_loss": 0.00677490234375 }, { "epoch": 0.41, "learning_rate": 0.00019918615883283164, "lm_loss": 0.00946044921875, "loss": 0.0094, "step": 994, "total_loss": 0.00946044921875 }, { "epoch": 0.41, "learning_rate": 0.00019918452273209655, "lm_loss": 0.00714111328125, "loss": 0.008, "step": 995, "total_loss": 0.00714111328125 }, { "epoch": 0.41, "learning_rate": 0.00019918288499518272, "lm_loss": 0.00885009765625, "loss": 0.0086, "step": 996, "total_loss": 0.00885009765625 }, { "epoch": 0.41, "learning_rate": 0.00019918124562211714, "lm_loss": 0.00860595703125, "loss": 0.0099, "step": 997, "total_loss": 0.00860595703125 }, { "epoch": 0.41, "learning_rate": 0.00019917960461292693, "lm_loss": 0.0115966796875, "loss": 0.0086, "step": 998, "total_loss": 0.0115966796875 }, { "epoch": 0.41, "learning_rate": 0.00019917796196763904, "lm_loss": 0.01025390625, "loss": 0.0105, "step": 999, "total_loss": 0.01025390625 }, { "epoch": 0.41, "learning_rate": 0.00019917631768628068, "lm_loss": 0.00860595703125, "loss": 0.009, "step": 1000, "total_loss": 0.00860595703125 }, { "epoch": 0.41, "eval_lm_loss": 0.01070873811841011, "eval_loss": 0.011119727976620197, "eval_runtime": 43.981, "eval_samples_per_second": 22.737, "eval_steps_per_second": 0.205, "eval_total_loss": 0.01070873811841011, "lm_loss": 0.00102996826171875, "step": 1000, "total_loss": 0.00102996826171875 }, { "epoch": 0.41, "learning_rate": 0.0001991746717688789, "lm_loss": 0.005706787109375, "loss": 0.01, "step": 1001, "total_loss": 0.005706787109375 }, { "epoch": 0.41, "learning_rate": 0.00019917302421546088, "lm_loss": 0.0086669921875, "loss": 0.0103, "step": 1002, "total_loss": 0.0086669921875 }, { "epoch": 0.41, "learning_rate": 0.00019917137502605377, "lm_loss": 0.007476806640625, "loss": 0.0076, "step": 1003, "total_loss": 0.007476806640625 }, { "epoch": 0.41, "learning_rate": 0.00019916972420068482, "lm_loss": 0.007293701171875, "loss": 0.0085, "step": 1004, "total_loss": 0.007293701171875 }, { "epoch": 0.41, "learning_rate": 0.00019916807173938126, "lm_loss": 0.0098876953125, "loss": 0.0077, "step": 1005, "total_loss": 0.0098876953125 }, { "epoch": 0.41, "learning_rate": 0.00019916641764217033, "lm_loss": 0.00604248046875, "loss": 0.0098, "step": 1006, "total_loss": 0.00604248046875 }, { "epoch": 0.41, "learning_rate": 0.00019916476190907928, "lm_loss": 0.01531982421875, "loss": 0.0091, "step": 1007, "total_loss": 0.01531982421875 }, { "epoch": 0.41, "learning_rate": 0.0001991631045401355, "lm_loss": 0.01214599609375, "loss": 0.0094, "step": 1008, "total_loss": 0.01214599609375 }, { "epoch": 0.41, "learning_rate": 0.0001991614455353663, "lm_loss": 0.004638671875, "loss": 0.0084, "step": 1009, "total_loss": 0.004638671875 }, { "epoch": 0.41, "learning_rate": 0.00019915978489479903, "lm_loss": 0.00927734375, "loss": 0.009, "step": 1010, "total_loss": 0.00927734375 }, { "epoch": 0.41, "learning_rate": 0.0001991581226184611, "lm_loss": 0.0093994140625, "loss": 0.0088, "step": 1011, "total_loss": 0.0093994140625 }, { "epoch": 0.41, "learning_rate": 0.00019915645870637993, "lm_loss": 0.01220703125, "loss": 0.0094, "step": 1012, "total_loss": 0.01220703125 }, { "epoch": 0.41, "learning_rate": 0.00019915479315858297, "lm_loss": 0.0106201171875, "loss": 0.0091, "step": 1013, "total_loss": 0.0106201171875 }, { "epoch": 0.41, "learning_rate": 0.00019915312597509766, "lm_loss": 0.018310546875, "loss": 0.0106, "step": 1014, "total_loss": 0.018310546875 }, { "epoch": 0.41, "learning_rate": 0.00019915145715595153, "lm_loss": 0.00927734375, "loss": 0.0092, "step": 1015, "total_loss": 0.00927734375 }, { "epoch": 0.42, "learning_rate": 0.00019914978670117214, "lm_loss": 0.00836181640625, "loss": 0.0094, "step": 1016, "total_loss": 0.00836181640625 }, { "epoch": 0.42, "learning_rate": 0.00019914811461078702, "lm_loss": 0.0089111328125, "loss": 0.0078, "step": 1017, "total_loss": 0.0089111328125 }, { "epoch": 0.42, "learning_rate": 0.00019914644088482375, "lm_loss": 0.01025390625, "loss": 0.0097, "step": 1018, "total_loss": 0.01025390625 }, { "epoch": 0.42, "learning_rate": 0.00019914476552330995, "lm_loss": 0.0098876953125, "loss": 0.0103, "step": 1019, "total_loss": 0.0098876953125 }, { "epoch": 0.42, "learning_rate": 0.00019914308852627324, "lm_loss": 0.006256103515625, "loss": 0.0098, "step": 1020, "total_loss": 0.006256103515625 }, { "epoch": 0.42, "learning_rate": 0.00019914140989374127, "lm_loss": 0.0120849609375, "loss": 0.0102, "step": 1021, "total_loss": 0.0120849609375 }, { "epoch": 0.42, "learning_rate": 0.0001991397296257418, "lm_loss": 0.018798828125, "loss": 0.0099, "step": 1022, "total_loss": 0.018798828125 }, { "epoch": 0.42, "learning_rate": 0.00019913804772230247, "lm_loss": 0.0120849609375, "loss": 0.0109, "step": 1023, "total_loss": 0.0120849609375 }, { "epoch": 0.42, "learning_rate": 0.00019913636418345107, "lm_loss": 0.00787353515625, "loss": 0.0098, "step": 1024, "total_loss": 0.00787353515625 }, { "epoch": 0.42, "learning_rate": 0.00019913467900921537, "lm_loss": 0.0208740234375, "loss": 0.0089, "step": 1025, "total_loss": 0.0208740234375 }, { "epoch": 0.42, "learning_rate": 0.00019913299219962314, "lm_loss": 0.004486083984375, "loss": 0.0077, "step": 1026, "total_loss": 0.004486083984375 }, { "epoch": 0.42, "learning_rate": 0.00019913130375470223, "lm_loss": 0.01348876953125, "loss": 0.0096, "step": 1027, "total_loss": 0.01348876953125 }, { "epoch": 0.42, "learning_rate": 0.0001991296136744805, "lm_loss": 0.01318359375, "loss": 0.0068, "step": 1028, "total_loss": 0.01318359375 }, { "epoch": 0.42, "learning_rate": 0.0001991279219589858, "lm_loss": 0.00848388671875, "loss": 0.0086, "step": 1029, "total_loss": 0.00848388671875 }, { "epoch": 0.42, "learning_rate": 0.00019912622860824607, "lm_loss": 0.01068115234375, "loss": 0.0091, "step": 1030, "total_loss": 0.01068115234375 }, { "epoch": 0.42, "learning_rate": 0.00019912453362228925, "lm_loss": 0.0096435546875, "loss": 0.0112, "step": 1031, "total_loss": 0.0096435546875 }, { "epoch": 0.42, "learning_rate": 0.00019912283700114324, "lm_loss": 0.0087890625, "loss": 0.0102, "step": 1032, "total_loss": 0.0087890625 }, { "epoch": 0.42, "learning_rate": 0.0001991211387448361, "lm_loss": 0.009765625, "loss": 0.0087, "step": 1033, "total_loss": 0.009765625 }, { "epoch": 0.42, "learning_rate": 0.0001991194388533958, "lm_loss": 0.00494384765625, "loss": 0.0086, "step": 1034, "total_loss": 0.00494384765625 }, { "epoch": 0.42, "learning_rate": 0.0001991177373268504, "lm_loss": 0.007568359375, "loss": 0.0093, "step": 1035, "total_loss": 0.007568359375 }, { "epoch": 0.42, "learning_rate": 0.00019911603416522796, "lm_loss": 0.01513671875, "loss": 0.0084, "step": 1036, "total_loss": 0.01513671875 }, { "epoch": 0.42, "learning_rate": 0.00019911432936855662, "lm_loss": 0.01251220703125, "loss": 0.0105, "step": 1037, "total_loss": 0.01251220703125 }, { "epoch": 0.42, "learning_rate": 0.0001991126229368644, "lm_loss": 0.00946044921875, "loss": 0.009, "step": 1038, "total_loss": 0.00946044921875 }, { "epoch": 0.42, "learning_rate": 0.00019911091487017957, "lm_loss": 0.01007080078125, "loss": 0.0065, "step": 1039, "total_loss": 0.01007080078125 }, { "epoch": 0.43, "learning_rate": 0.00019910920516853024, "lm_loss": 0.00848388671875, "loss": 0.0097, "step": 1040, "total_loss": 0.00848388671875 }, { "epoch": 0.43, "learning_rate": 0.0001991074938319446, "lm_loss": 0.0162353515625, "loss": 0.0094, "step": 1041, "total_loss": 0.0162353515625 }, { "epoch": 0.43, "learning_rate": 0.00019910578086045094, "lm_loss": 0.00799560546875, "loss": 0.0083, "step": 1042, "total_loss": 0.00799560546875 }, { "epoch": 0.43, "learning_rate": 0.00019910406625407744, "lm_loss": 0.013671875, "loss": 0.009, "step": 1043, "total_loss": 0.013671875 }, { "epoch": 0.43, "learning_rate": 0.00019910235001285246, "lm_loss": 0.006866455078125, "loss": 0.0078, "step": 1044, "total_loss": 0.006866455078125 }, { "epoch": 0.43, "learning_rate": 0.00019910063213680426, "lm_loss": 0.006988525390625, "loss": 0.0072, "step": 1045, "total_loss": 0.006988525390625 }, { "epoch": 0.43, "learning_rate": 0.00019909891262596122, "lm_loss": 0.01104736328125, "loss": 0.0099, "step": 1046, "total_loss": 0.01104736328125 }, { "epoch": 0.43, "learning_rate": 0.00019909719148035167, "lm_loss": 0.012451171875, "loss": 0.0079, "step": 1047, "total_loss": 0.012451171875 }, { "epoch": 0.43, "learning_rate": 0.00019909546870000398, "lm_loss": 0.0196533203125, "loss": 0.0096, "step": 1048, "total_loss": 0.0196533203125 }, { "epoch": 0.43, "learning_rate": 0.00019909374428494668, "lm_loss": 0.006866455078125, "loss": 0.0111, "step": 1049, "total_loss": 0.006866455078125 }, { "epoch": 0.43, "learning_rate": 0.0001990920182352081, "lm_loss": 0.019287109375, "loss": 0.011, "step": 1050, "total_loss": 0.019287109375 }, { "epoch": 0.43, "learning_rate": 0.00019909029055081674, "lm_loss": 0.006439208984375, "loss": 0.0077, "step": 1051, "total_loss": 0.006439208984375 }, { "epoch": 0.43, "learning_rate": 0.00019908856123180115, "lm_loss": 0.00933837890625, "loss": 0.008, "step": 1052, "total_loss": 0.00933837890625 }, { "epoch": 0.43, "learning_rate": 0.0001990868302781898, "lm_loss": 0.007171630859375, "loss": 0.0109, "step": 1053, "total_loss": 0.007171630859375 }, { "epoch": 0.43, "learning_rate": 0.00019908509769001127, "lm_loss": 0.0027923583984375, "loss": 0.0089, "step": 1054, "total_loss": 0.0027923583984375 }, { "epoch": 0.43, "learning_rate": 0.00019908336346729412, "lm_loss": 0.003814697265625, "loss": 0.0101, "step": 1055, "total_loss": 0.003814697265625 }, { "epoch": 0.43, "learning_rate": 0.000199081627610067, "lm_loss": 0.01171875, "loss": 0.0118, "step": 1056, "total_loss": 0.01171875 }, { "epoch": 0.43, "learning_rate": 0.00019907989011835856, "lm_loss": 0.024658203125, "loss": 0.0096, "step": 1057, "total_loss": 0.024658203125 }, { "epoch": 0.43, "learning_rate": 0.00019907815099219736, "lm_loss": 0.020751953125, "loss": 0.0077, "step": 1058, "total_loss": 0.020751953125 }, { "epoch": 0.43, "learning_rate": 0.00019907641023161218, "lm_loss": 0.007049560546875, "loss": 0.0083, "step": 1059, "total_loss": 0.007049560546875 }, { "epoch": 0.43, "learning_rate": 0.00019907466783663174, "lm_loss": 0.00811767578125, "loss": 0.011, "step": 1060, "total_loss": 0.00811767578125 }, { "epoch": 0.43, "learning_rate": 0.00019907292380728472, "lm_loss": 0.0079345703125, "loss": 0.0218, "step": 1061, "total_loss": 0.0079345703125 }, { "epoch": 0.43, "learning_rate": 0.00019907117814359993, "lm_loss": 0.0074462890625, "loss": 0.0098, "step": 1062, "total_loss": 0.0074462890625 }, { "epoch": 0.43, "learning_rate": 0.00019906943084560617, "lm_loss": 0.00811767578125, "loss": 0.0091, "step": 1063, "total_loss": 0.00811767578125 }, { "epoch": 0.43, "learning_rate": 0.00019906768191333223, "lm_loss": 0.006744384765625, "loss": 0.0086, "step": 1064, "total_loss": 0.006744384765625 }, { "epoch": 0.44, "learning_rate": 0.00019906593134680697, "lm_loss": 0.00787353515625, "loss": 0.0098, "step": 1065, "total_loss": 0.00787353515625 }, { "epoch": 0.44, "learning_rate": 0.00019906417914605933, "lm_loss": 0.0079345703125, "loss": 0.0104, "step": 1066, "total_loss": 0.0079345703125 }, { "epoch": 0.44, "learning_rate": 0.00019906242531111815, "lm_loss": 0.01116943359375, "loss": 0.0088, "step": 1067, "total_loss": 0.01116943359375 }, { "epoch": 0.44, "learning_rate": 0.00019906066984201238, "lm_loss": 0.004608154296875, "loss": 0.0088, "step": 1068, "total_loss": 0.004608154296875 }, { "epoch": 0.44, "learning_rate": 0.000199058912738771, "lm_loss": 0.01348876953125, "loss": 0.0103, "step": 1069, "total_loss": 0.01348876953125 }, { "epoch": 0.44, "learning_rate": 0.0001990571540014229, "lm_loss": 0.00823974609375, "loss": 0.0102, "step": 1070, "total_loss": 0.00823974609375 }, { "epoch": 0.44, "learning_rate": 0.00019905539362999723, "lm_loss": 0.00836181640625, "loss": 0.0077, "step": 1071, "total_loss": 0.00836181640625 }, { "epoch": 0.44, "learning_rate": 0.00019905363162452298, "lm_loss": 0.0078125, "loss": 0.0089, "step": 1072, "total_loss": 0.0078125 }, { "epoch": 0.44, "learning_rate": 0.00019905186798502915, "lm_loss": 0.0198974609375, "loss": 0.0104, "step": 1073, "total_loss": 0.0198974609375 }, { "epoch": 0.44, "learning_rate": 0.0001990501027115449, "lm_loss": 0.007354736328125, "loss": 0.0098, "step": 1074, "total_loss": 0.007354736328125 }, { "epoch": 0.44, "learning_rate": 0.00019904833580409935, "lm_loss": 0.006927490234375, "loss": 0.0073, "step": 1075, "total_loss": 0.006927490234375 }, { "epoch": 0.44, "learning_rate": 0.00019904656726272166, "lm_loss": 0.009521484375, "loss": 0.0079, "step": 1076, "total_loss": 0.009521484375 }, { "epoch": 0.44, "learning_rate": 0.00019904479708744095, "lm_loss": 0.01318359375, "loss": 0.0078, "step": 1077, "total_loss": 0.01318359375 }, { "epoch": 0.44, "learning_rate": 0.00019904302527828645, "lm_loss": 0.006256103515625, "loss": 0.0109, "step": 1078, "total_loss": 0.006256103515625 }, { "epoch": 0.44, "learning_rate": 0.00019904125183528739, "lm_loss": 0.01300048828125, "loss": 0.0092, "step": 1079, "total_loss": 0.01300048828125 }, { "epoch": 0.44, "learning_rate": 0.00019903947675847304, "lm_loss": 0.00933837890625, "loss": 0.0085, "step": 1080, "total_loss": 0.00933837890625 }, { "epoch": 0.44, "learning_rate": 0.00019903770004787264, "lm_loss": 0.0128173828125, "loss": 0.0113, "step": 1081, "total_loss": 0.0128173828125 }, { "epoch": 0.44, "learning_rate": 0.00019903592170351553, "lm_loss": 0.00732421875, "loss": 0.0092, "step": 1082, "total_loss": 0.00732421875 }, { "epoch": 0.44, "learning_rate": 0.00019903414172543104, "lm_loss": 0.0159912109375, "loss": 0.0107, "step": 1083, "total_loss": 0.0159912109375 }, { "epoch": 0.44, "learning_rate": 0.00019903236011364854, "lm_loss": 0.00555419921875, "loss": 0.0074, "step": 1084, "total_loss": 0.00555419921875 }, { "epoch": 0.44, "learning_rate": 0.00019903057686819744, "lm_loss": 0.0086669921875, "loss": 0.012, "step": 1085, "total_loss": 0.0086669921875 }, { "epoch": 0.44, "learning_rate": 0.0001990287919891071, "lm_loss": 0.01055908203125, "loss": 0.0104, "step": 1086, "total_loss": 0.01055908203125 }, { "epoch": 0.44, "learning_rate": 0.000199027005476407, "lm_loss": 0.00567626953125, "loss": 0.0104, "step": 1087, "total_loss": 0.00567626953125 }, { "epoch": 0.44, "learning_rate": 0.0001990252173301266, "lm_loss": 0.0108642578125, "loss": 0.012, "step": 1088, "total_loss": 0.0108642578125 }, { "epoch": 0.45, "learning_rate": 0.00019902342755029543, "lm_loss": 0.01336669921875, "loss": 0.0109, "step": 1089, "total_loss": 0.01336669921875 }, { "epoch": 0.45, "learning_rate": 0.00019902163613694294, "lm_loss": 0.01483154296875, "loss": 0.0083, "step": 1090, "total_loss": 0.01483154296875 }, { "epoch": 0.45, "learning_rate": 0.00019901984309009878, "lm_loss": 0.004302978515625, "loss": 0.0097, "step": 1091, "total_loss": 0.004302978515625 }, { "epoch": 0.45, "learning_rate": 0.00019901804840979245, "lm_loss": 0.01214599609375, "loss": 0.0086, "step": 1092, "total_loss": 0.01214599609375 }, { "epoch": 0.45, "learning_rate": 0.00019901625209605358, "lm_loss": 0.02587890625, "loss": 0.0099, "step": 1093, "total_loss": 0.02587890625 }, { "epoch": 0.45, "learning_rate": 0.00019901445414891185, "lm_loss": 0.00994873046875, "loss": 0.0086, "step": 1094, "total_loss": 0.00994873046875 }, { "epoch": 0.45, "learning_rate": 0.00019901265456839684, "lm_loss": 0.003082275390625, "loss": 0.0091, "step": 1095, "total_loss": 0.003082275390625 }, { "epoch": 0.45, "learning_rate": 0.00019901085335453827, "lm_loss": 0.0057373046875, "loss": 0.0108, "step": 1096, "total_loss": 0.0057373046875 }, { "epoch": 0.45, "learning_rate": 0.00019900905050736589, "lm_loss": 0.0128173828125, "loss": 0.0079, "step": 1097, "total_loss": 0.0128173828125 }, { "epoch": 0.45, "learning_rate": 0.0001990072460269094, "lm_loss": 0.0084228515625, "loss": 0.0079, "step": 1098, "total_loss": 0.0084228515625 }, { "epoch": 0.45, "learning_rate": 0.00019900543991319854, "lm_loss": 0.00860595703125, "loss": 0.0095, "step": 1099, "total_loss": 0.00860595703125 }, { "epoch": 0.45, "learning_rate": 0.00019900363216626316, "lm_loss": 0.0089111328125, "loss": 0.0103, "step": 1100, "total_loss": 0.0089111328125 }, { "epoch": 0.45, "eval_lm_loss": 0.01112065464258194, "eval_loss": 0.011493873782455921, "eval_runtime": 44.0156, "eval_samples_per_second": 22.719, "eval_steps_per_second": 0.204, "eval_total_loss": 0.01112065464258194, "lm_loss": 0.000705718994140625, "step": 1100, "total_loss": 0.000705718994140625 }, { "epoch": 0.45, "learning_rate": 0.00019900182278613306, "lm_loss": 0.00921630859375, "loss": 0.0099, "step": 1101, "total_loss": 0.00921630859375 }, { "epoch": 0.45, "learning_rate": 0.0001990000117728381, "lm_loss": 0.0089111328125, "loss": 0.0082, "step": 1102, "total_loss": 0.0089111328125 }, { "epoch": 0.45, "learning_rate": 0.00019899819912640813, "lm_loss": 0.006805419921875, "loss": 0.0084, "step": 1103, "total_loss": 0.006805419921875 }, { "epoch": 0.45, "learning_rate": 0.00019899638484687304, "lm_loss": 0.0057373046875, "loss": 0.0096, "step": 1104, "total_loss": 0.0057373046875 }, { "epoch": 0.45, "learning_rate": 0.00019899456893426282, "lm_loss": 0.01104736328125, "loss": 0.0092, "step": 1105, "total_loss": 0.01104736328125 }, { "epoch": 0.45, "learning_rate": 0.0001989927513886074, "lm_loss": 0.0216064453125, "loss": 0.0131, "step": 1106, "total_loss": 0.0216064453125 }, { "epoch": 0.45, "learning_rate": 0.00019899093220993673, "lm_loss": 0.0126953125, "loss": 0.0084, "step": 1107, "total_loss": 0.0126953125 }, { "epoch": 0.45, "learning_rate": 0.00019898911139828082, "lm_loss": 0.0067138671875, "loss": 0.0071, "step": 1108, "total_loss": 0.0067138671875 }, { "epoch": 0.45, "learning_rate": 0.00019898728895366975, "lm_loss": 0.004364013671875, "loss": 0.0088, "step": 1109, "total_loss": 0.004364013671875 }, { "epoch": 0.45, "learning_rate": 0.00019898546487613357, "lm_loss": 0.0067138671875, "loss": 0.0093, "step": 1110, "total_loss": 0.0067138671875 }, { "epoch": 0.45, "learning_rate": 0.00019898363916570234, "lm_loss": 0.0028228759765625, "loss": 0.009, "step": 1111, "total_loss": 0.0028228759765625 }, { "epoch": 0.45, "learning_rate": 0.00019898181182240624, "lm_loss": 0.00982666015625, "loss": 0.0091, "step": 1112, "total_loss": 0.00982666015625 }, { "epoch": 0.46, "learning_rate": 0.00019897998284627532, "lm_loss": 0.003326416015625, "loss": 0.0085, "step": 1113, "total_loss": 0.003326416015625 }, { "epoch": 0.46, "learning_rate": 0.00019897815223733983, "lm_loss": 0.01239013671875, "loss": 0.0103, "step": 1114, "total_loss": 0.01239013671875 }, { "epoch": 0.46, "learning_rate": 0.00019897631999562994, "lm_loss": 0.01275634765625, "loss": 0.0082, "step": 1115, "total_loss": 0.01275634765625 }, { "epoch": 0.46, "learning_rate": 0.00019897448612117593, "lm_loss": 0.01446533203125, "loss": 0.0099, "step": 1116, "total_loss": 0.01446533203125 }, { "epoch": 0.46, "learning_rate": 0.00019897265061400793, "lm_loss": 0.0078125, "loss": 0.0073, "step": 1117, "total_loss": 0.0078125 }, { "epoch": 0.46, "learning_rate": 0.00019897081347415634, "lm_loss": 0.0133056640625, "loss": 0.0075, "step": 1118, "total_loss": 0.0133056640625 }, { "epoch": 0.46, "learning_rate": 0.0001989689747016514, "lm_loss": 0.008056640625, "loss": 0.0104, "step": 1119, "total_loss": 0.008056640625 }, { "epoch": 0.46, "learning_rate": 0.00019896713429652345, "lm_loss": 0.00738525390625, "loss": 0.0106, "step": 1120, "total_loss": 0.00738525390625 }, { "epoch": 0.46, "learning_rate": 0.00019896529225880288, "lm_loss": 0.007720947265625, "loss": 0.0083, "step": 1121, "total_loss": 0.007720947265625 }, { "epoch": 0.46, "learning_rate": 0.00019896344858852003, "lm_loss": 0.01220703125, "loss": 0.0099, "step": 1122, "total_loss": 0.01220703125 }, { "epoch": 0.46, "learning_rate": 0.00019896160328570538, "lm_loss": 0.0106201171875, "loss": 0.0083, "step": 1123, "total_loss": 0.0106201171875 }, { "epoch": 0.46, "learning_rate": 0.0001989597563503893, "lm_loss": 0.007476806640625, "loss": 0.0121, "step": 1124, "total_loss": 0.007476806640625 }, { "epoch": 0.46, "learning_rate": 0.0001989579077826023, "lm_loss": 0.004241943359375, "loss": 0.0098, "step": 1125, "total_loss": 0.004241943359375 }, { "epoch": 0.46, "learning_rate": 0.00019895605758237485, "lm_loss": 0.00714111328125, "loss": 0.0088, "step": 1126, "total_loss": 0.00714111328125 }, { "epoch": 0.46, "learning_rate": 0.0001989542057497375, "lm_loss": 0.007110595703125, "loss": 0.012, "step": 1127, "total_loss": 0.007110595703125 }, { "epoch": 0.46, "learning_rate": 0.00019895235228472078, "lm_loss": 0.00823974609375, "loss": 0.0103, "step": 1128, "total_loss": 0.00823974609375 }, { "epoch": 0.46, "learning_rate": 0.00019895049718735526, "lm_loss": 0.00787353515625, "loss": 0.0086, "step": 1129, "total_loss": 0.00787353515625 }, { "epoch": 0.46, "learning_rate": 0.00019894864045767158, "lm_loss": 0.00543212890625, "loss": 0.0089, "step": 1130, "total_loss": 0.00543212890625 }, { "epoch": 0.46, "learning_rate": 0.00019894678209570027, "lm_loss": 0.00799560546875, "loss": 0.0085, "step": 1131, "total_loss": 0.00799560546875 }, { "epoch": 0.46, "learning_rate": 0.00019894492210147213, "lm_loss": 0.01092529296875, "loss": 0.0097, "step": 1132, "total_loss": 0.01092529296875 }, { "epoch": 0.46, "learning_rate": 0.00019894306047501772, "lm_loss": 0.0091552734375, "loss": 0.0095, "step": 1133, "total_loss": 0.0091552734375 }, { "epoch": 0.46, "learning_rate": 0.00019894119721636783, "lm_loss": 0.0103759765625, "loss": 0.0111, "step": 1134, "total_loss": 0.0103759765625 }, { "epoch": 0.46, "learning_rate": 0.00019893933232555314, "lm_loss": 0.00408935546875, "loss": 0.0089, "step": 1135, "total_loss": 0.00408935546875 }, { "epoch": 0.46, "learning_rate": 0.00019893746580260446, "lm_loss": 0.0098876953125, "loss": 0.0086, "step": 1136, "total_loss": 0.0098876953125 }, { "epoch": 0.46, "learning_rate": 0.00019893559764755256, "lm_loss": 0.004730224609375, "loss": 0.0104, "step": 1137, "total_loss": 0.004730224609375 }, { "epoch": 0.47, "learning_rate": 0.00019893372786042827, "lm_loss": 0.0186767578125, "loss": 0.0103, "step": 1138, "total_loss": 0.0186767578125 }, { "epoch": 0.47, "learning_rate": 0.0001989318564412624, "lm_loss": 0.003936767578125, "loss": 0.0085, "step": 1139, "total_loss": 0.003936767578125 }, { "epoch": 0.47, "learning_rate": 0.00019892998339008587, "lm_loss": 0.00750732421875, "loss": 0.011, "step": 1140, "total_loss": 0.00750732421875 }, { "epoch": 0.47, "learning_rate": 0.00019892810870692952, "lm_loss": 0.0098876953125, "loss": 0.0093, "step": 1141, "total_loss": 0.0098876953125 }, { "epoch": 0.47, "learning_rate": 0.0001989262323918243, "lm_loss": 0.00799560546875, "loss": 0.0079, "step": 1142, "total_loss": 0.00799560546875 }, { "epoch": 0.47, "learning_rate": 0.0001989243544448012, "lm_loss": 0.0089111328125, "loss": 0.0085, "step": 1143, "total_loss": 0.0089111328125 }, { "epoch": 0.47, "learning_rate": 0.00019892247486589116, "lm_loss": 0.0086669921875, "loss": 0.0116, "step": 1144, "total_loss": 0.0086669921875 }, { "epoch": 0.47, "learning_rate": 0.00019892059365512522, "lm_loss": 0.010009765625, "loss": 0.0076, "step": 1145, "total_loss": 0.010009765625 }, { "epoch": 0.47, "learning_rate": 0.00019891871081253434, "lm_loss": 0.0111083984375, "loss": 0.0072, "step": 1146, "total_loss": 0.0111083984375 }, { "epoch": 0.47, "learning_rate": 0.00019891682633814964, "lm_loss": 0.00653076171875, "loss": 0.0082, "step": 1147, "total_loss": 0.00653076171875 }, { "epoch": 0.47, "learning_rate": 0.00019891494023200224, "lm_loss": 0.01507568359375, "loss": 0.0117, "step": 1148, "total_loss": 0.01507568359375 }, { "epoch": 0.47, "learning_rate": 0.00019891305249412317, "lm_loss": 0.00872802734375, "loss": 0.0073, "step": 1149, "total_loss": 0.00872802734375 }, { "epoch": 0.47, "learning_rate": 0.00019891116312454362, "lm_loss": 0.0155029296875, "loss": 0.0086, "step": 1150, "total_loss": 0.0155029296875 }, { "epoch": 0.47, "learning_rate": 0.00019890927212329475, "lm_loss": 0.01336669921875, "loss": 0.01, "step": 1151, "total_loss": 0.01336669921875 }, { "epoch": 0.47, "learning_rate": 0.00019890737949040775, "lm_loss": 0.004791259765625, "loss": 0.0068, "step": 1152, "total_loss": 0.004791259765625 }, { "epoch": 0.47, "learning_rate": 0.00019890548522591385, "lm_loss": 0.006622314453125, "loss": 0.0087, "step": 1153, "total_loss": 0.006622314453125 }, { "epoch": 0.47, "learning_rate": 0.0001989035893298443, "lm_loss": 0.0108642578125, "loss": 0.0074, "step": 1154, "total_loss": 0.0108642578125 }, { "epoch": 0.47, "learning_rate": 0.00019890169180223033, "lm_loss": 0.004180908203125, "loss": 0.0074, "step": 1155, "total_loss": 0.004180908203125 }, { "epoch": 0.47, "learning_rate": 0.0001988997926431033, "lm_loss": 0.00628662109375, "loss": 0.0084, "step": 1156, "total_loss": 0.00628662109375 }, { "epoch": 0.47, "learning_rate": 0.00019889789185249451, "lm_loss": 0.00823974609375, "loss": 0.0108, "step": 1157, "total_loss": 0.00823974609375 }, { "epoch": 0.47, "learning_rate": 0.00019889598943043535, "lm_loss": 0.004608154296875, "loss": 0.0098, "step": 1158, "total_loss": 0.004608154296875 }, { "epoch": 0.47, "learning_rate": 0.00019889408537695716, "lm_loss": 0.0069580078125, "loss": 0.0075, "step": 1159, "total_loss": 0.0069580078125 }, { "epoch": 0.47, "learning_rate": 0.00019889217969209139, "lm_loss": 0.0155029296875, "loss": 0.0093, "step": 1160, "total_loss": 0.0155029296875 }, { "epoch": 0.47, "learning_rate": 0.0001988902723758694, "lm_loss": 0.014892578125, "loss": 0.0097, "step": 1161, "total_loss": 0.014892578125 }, { "epoch": 0.48, "learning_rate": 0.00019888836342832277, "lm_loss": 0.007049560546875, "loss": 0.0093, "step": 1162, "total_loss": 0.007049560546875 }, { "epoch": 0.48, "learning_rate": 0.00019888645284948288, "lm_loss": 0.0177001953125, "loss": 0.0089, "step": 1163, "total_loss": 0.0177001953125 }, { "epoch": 0.48, "learning_rate": 0.00019888454063938133, "lm_loss": 0.005096435546875, "loss": 0.0083, "step": 1164, "total_loss": 0.005096435546875 }, { "epoch": 0.48, "learning_rate": 0.0001988826267980496, "lm_loss": 0.0031890869140625, "loss": 0.0097, "step": 1165, "total_loss": 0.0031890869140625 }, { "epoch": 0.48, "learning_rate": 0.00019888071132551934, "lm_loss": 0.00885009765625, "loss": 0.0086, "step": 1166, "total_loss": 0.00885009765625 }, { "epoch": 0.48, "learning_rate": 0.00019887879422182206, "lm_loss": 0.006591796875, "loss": 0.0084, "step": 1167, "total_loss": 0.006591796875 }, { "epoch": 0.48, "learning_rate": 0.00019887687548698942, "lm_loss": 0.009521484375, "loss": 0.0099, "step": 1168, "total_loss": 0.009521484375 }, { "epoch": 0.48, "learning_rate": 0.0001988749551210531, "lm_loss": 0.0086669921875, "loss": 0.0087, "step": 1169, "total_loss": 0.0086669921875 }, { "epoch": 0.48, "learning_rate": 0.00019887303312404476, "lm_loss": 0.00787353515625, "loss": 0.009, "step": 1170, "total_loss": 0.00787353515625 }, { "epoch": 0.48, "learning_rate": 0.00019887110949599608, "lm_loss": 0.0034027099609375, "loss": 0.0076, "step": 1171, "total_loss": 0.0034027099609375 }, { "epoch": 0.48, "learning_rate": 0.0001988691842369388, "lm_loss": 0.0067138671875, "loss": 0.01, "step": 1172, "total_loss": 0.0067138671875 }, { "epoch": 0.48, "learning_rate": 0.00019886725734690472, "lm_loss": 0.0045166015625, "loss": 0.0076, "step": 1173, "total_loss": 0.0045166015625 }, { "epoch": 0.48, "learning_rate": 0.0001988653288259256, "lm_loss": 0.0137939453125, "loss": 0.0084, "step": 1174, "total_loss": 0.0137939453125 }, { "epoch": 0.48, "learning_rate": 0.00019886339867403326, "lm_loss": 0.007293701171875, "loss": 0.0093, "step": 1175, "total_loss": 0.007293701171875 }, { "epoch": 0.48, "learning_rate": 0.00019886146689125953, "lm_loss": 0.01434326171875, "loss": 0.0117, "step": 1176, "total_loss": 0.01434326171875 }, { "epoch": 0.48, "learning_rate": 0.0001988595334776363, "lm_loss": 0.006500244140625, "loss": 0.009, "step": 1177, "total_loss": 0.006500244140625 }, { "epoch": 0.48, "learning_rate": 0.00019885759843319542, "lm_loss": 0.017822265625, "loss": 0.0086, "step": 1178, "total_loss": 0.017822265625 }, { "epoch": 0.48, "learning_rate": 0.00019885566175796882, "lm_loss": 0.007293701171875, "loss": 0.0084, "step": 1179, "total_loss": 0.007293701171875 }, { "epoch": 0.48, "learning_rate": 0.0001988537234519885, "lm_loss": 0.00445556640625, "loss": 0.0083, "step": 1180, "total_loss": 0.00445556640625 }, { "epoch": 0.48, "learning_rate": 0.00019885178351528642, "lm_loss": 0.00787353515625, "loss": 0.0091, "step": 1181, "total_loss": 0.00787353515625 }, { "epoch": 0.48, "learning_rate": 0.0001988498419478945, "lm_loss": 0.0164794921875, "loss": 0.0083, "step": 1182, "total_loss": 0.0164794921875 }, { "epoch": 0.48, "learning_rate": 0.00019884789874984485, "lm_loss": 0.00921630859375, "loss": 0.0078, "step": 1183, "total_loss": 0.00921630859375 }, { "epoch": 0.48, "learning_rate": 0.00019884595392116955, "lm_loss": 0.01300048828125, "loss": 0.0092, "step": 1184, "total_loss": 0.01300048828125 }, { "epoch": 0.48, "learning_rate": 0.00019884400746190059, "lm_loss": 0.0054931640625, "loss": 0.0079, "step": 1185, "total_loss": 0.0054931640625 }, { "epoch": 0.48, "learning_rate": 0.00019884205937207016, "lm_loss": 0.00836181640625, "loss": 0.011, "step": 1186, "total_loss": 0.00836181640625 }, { "epoch": 0.49, "learning_rate": 0.00019884010965171033, "lm_loss": 0.01239013671875, "loss": 0.0093, "step": 1187, "total_loss": 0.01239013671875 }, { "epoch": 0.49, "learning_rate": 0.00019883815830085334, "lm_loss": 0.0081787109375, "loss": 0.0081, "step": 1188, "total_loss": 0.0081787109375 }, { "epoch": 0.49, "learning_rate": 0.00019883620531953132, "lm_loss": 0.00689697265625, "loss": 0.0092, "step": 1189, "total_loss": 0.00689697265625 }, { "epoch": 0.49, "learning_rate": 0.0001988342507077765, "lm_loss": 0.01214599609375, "loss": 0.008, "step": 1190, "total_loss": 0.01214599609375 }, { "epoch": 0.49, "learning_rate": 0.00019883229446562113, "lm_loss": 0.003936767578125, "loss": 0.0081, "step": 1191, "total_loss": 0.003936767578125 }, { "epoch": 0.49, "learning_rate": 0.0001988303365930975, "lm_loss": 0.01104736328125, "loss": 0.0088, "step": 1192, "total_loss": 0.01104736328125 }, { "epoch": 0.49, "learning_rate": 0.00019882837709023785, "lm_loss": 0.0078125, "loss": 0.007, "step": 1193, "total_loss": 0.0078125 }, { "epoch": 0.49, "learning_rate": 0.00019882641595707457, "lm_loss": 0.01220703125, "loss": 0.0089, "step": 1194, "total_loss": 0.01220703125 }, { "epoch": 0.49, "learning_rate": 0.00019882445319363998, "lm_loss": 0.00909423828125, "loss": 0.0087, "step": 1195, "total_loss": 0.00909423828125 }, { "epoch": 0.49, "learning_rate": 0.00019882248879996643, "lm_loss": 0.0093994140625, "loss": 0.0098, "step": 1196, "total_loss": 0.0093994140625 }, { "epoch": 0.49, "learning_rate": 0.0001988205227760864, "lm_loss": 0.00830078125, "loss": 0.0088, "step": 1197, "total_loss": 0.00830078125 }, { "epoch": 0.49, "learning_rate": 0.00019881855512203224, "lm_loss": 0.003326416015625, "loss": 0.0078, "step": 1198, "total_loss": 0.003326416015625 }, { "epoch": 0.49, "learning_rate": 0.00019881658583783648, "lm_loss": 0.0050048828125, "loss": 0.0111, "step": 1199, "total_loss": 0.0050048828125 }, { "epoch": 0.49, "learning_rate": 0.00019881461492353157, "lm_loss": 0.01190185546875, "loss": 0.0103, "step": 1200, "total_loss": 0.01190185546875 }, { "epoch": 0.49, "eval_lm_loss": 0.010570013895630836, "eval_loss": 0.011091976426541805, "eval_runtime": 44.0922, "eval_samples_per_second": 22.68, "eval_steps_per_second": 0.204, "eval_total_loss": 0.010570013895630836, "lm_loss": 0.0012359619140625, "step": 1200, "total_loss": 0.0012359619140625 }, { "epoch": 0.49, "learning_rate": 0.00019881264237915002, "lm_loss": 0.007537841796875, "loss": 0.0077, "step": 1201, "total_loss": 0.007537841796875 }, { "epoch": 0.49, "learning_rate": 0.00019881066820472438, "lm_loss": 0.007232666015625, "loss": 0.0084, "step": 1202, "total_loss": 0.007232666015625 }, { "epoch": 0.49, "learning_rate": 0.00019880869240028722, "lm_loss": 0.005279541015625, "loss": 0.0094, "step": 1203, "total_loss": 0.005279541015625 }, { "epoch": 0.49, "learning_rate": 0.0001988067149658711, "lm_loss": 0.0035247802734375, "loss": 0.0089, "step": 1204, "total_loss": 0.0035247802734375 }, { "epoch": 0.49, "learning_rate": 0.00019880473590150867, "lm_loss": 0.0098876953125, "loss": 0.0075, "step": 1205, "total_loss": 0.0098876953125 }, { "epoch": 0.49, "learning_rate": 0.0001988027552072326, "lm_loss": 0.00421142578125, "loss": 0.0109, "step": 1206, "total_loss": 0.00421142578125 }, { "epoch": 0.49, "learning_rate": 0.00019880077288307553, "lm_loss": 0.0166015625, "loss": 0.0087, "step": 1207, "total_loss": 0.0166015625 }, { "epoch": 0.49, "learning_rate": 0.00019879878892907016, "lm_loss": 0.0096435546875, "loss": 0.0101, "step": 1208, "total_loss": 0.0096435546875 }, { "epoch": 0.49, "learning_rate": 0.00019879680334524924, "lm_loss": 0.01544189453125, "loss": 0.0094, "step": 1209, "total_loss": 0.01544189453125 }, { "epoch": 0.49, "learning_rate": 0.0001987948161316455, "lm_loss": 0.00701904296875, "loss": 0.0085, "step": 1210, "total_loss": 0.00701904296875 }, { "epoch": 0.5, "learning_rate": 0.00019879282728829174, "lm_loss": 0.0087890625, "loss": 0.0082, "step": 1211, "total_loss": 0.0087890625 }, { "epoch": 0.5, "learning_rate": 0.00019879083681522075, "lm_loss": 0.01116943359375, "loss": 0.0103, "step": 1212, "total_loss": 0.01116943359375 }, { "epoch": 0.5, "learning_rate": 0.0001987888447124654, "lm_loss": 0.006195068359375, "loss": 0.0093, "step": 1213, "total_loss": 0.006195068359375 }, { "epoch": 0.5, "learning_rate": 0.0001987868509800585, "lm_loss": 0.007568359375, "loss": 0.0087, "step": 1214, "total_loss": 0.007568359375 }, { "epoch": 0.5, "learning_rate": 0.000198784855618033, "lm_loss": 0.0164794921875, "loss": 0.0096, "step": 1215, "total_loss": 0.0164794921875 }, { "epoch": 0.5, "learning_rate": 0.00019878285862642178, "lm_loss": 0.007354736328125, "loss": 0.0087, "step": 1216, "total_loss": 0.007354736328125 }, { "epoch": 0.5, "learning_rate": 0.00019878086000525778, "lm_loss": 0.0167236328125, "loss": 0.0093, "step": 1217, "total_loss": 0.0167236328125 }, { "epoch": 0.5, "learning_rate": 0.00019877885975457397, "lm_loss": 0.00811767578125, "loss": 0.0078, "step": 1218, "total_loss": 0.00811767578125 }, { "epoch": 0.5, "learning_rate": 0.00019877685787440336, "lm_loss": 0.004974365234375, "loss": 0.0083, "step": 1219, "total_loss": 0.004974365234375 }, { "epoch": 0.5, "learning_rate": 0.000198774854364779, "lm_loss": 0.0189208984375, "loss": 0.01, "step": 1220, "total_loss": 0.0189208984375 }, { "epoch": 0.5, "learning_rate": 0.00019877284922573387, "lm_loss": 0.005828857421875, "loss": 0.009, "step": 1221, "total_loss": 0.005828857421875 }, { "epoch": 0.5, "learning_rate": 0.0001987708424573011, "lm_loss": 0.004241943359375, "loss": 0.0096, "step": 1222, "total_loss": 0.004241943359375 }, { "epoch": 0.5, "learning_rate": 0.00019876883405951377, "lm_loss": 0.00714111328125, "loss": 0.0092, "step": 1223, "total_loss": 0.00714111328125 }, { "epoch": 0.5, "learning_rate": 0.00019876682403240504, "lm_loss": 0.003173828125, "loss": 0.0071, "step": 1224, "total_loss": 0.003173828125 }, { "epoch": 0.5, "learning_rate": 0.00019876481237600805, "lm_loss": 0.0150146484375, "loss": 0.0096, "step": 1225, "total_loss": 0.0150146484375 }, { "epoch": 0.5, "learning_rate": 0.00019876279909035598, "lm_loss": 0.00909423828125, "loss": 0.01, "step": 1226, "total_loss": 0.00909423828125 }, { "epoch": 0.5, "learning_rate": 0.00019876078417548206, "lm_loss": 0.01165771484375, "loss": 0.0094, "step": 1227, "total_loss": 0.01165771484375 }, { "epoch": 0.5, "learning_rate": 0.0001987587676314195, "lm_loss": 0.00482177734375, "loss": 0.0079, "step": 1228, "total_loss": 0.00482177734375 }, { "epoch": 0.5, "learning_rate": 0.00019875674945820157, "lm_loss": 0.00823974609375, "loss": 0.0093, "step": 1229, "total_loss": 0.00823974609375 }, { "epoch": 0.5, "learning_rate": 0.00019875472965586157, "lm_loss": 0.01287841796875, "loss": 0.0105, "step": 1230, "total_loss": 0.01287841796875 }, { "epoch": 0.5, "learning_rate": 0.00019875270822443286, "lm_loss": 0.015869140625, "loss": 0.0109, "step": 1231, "total_loss": 0.015869140625 }, { "epoch": 0.5, "learning_rate": 0.00019875068516394872, "lm_loss": 0.0125732421875, "loss": 0.0086, "step": 1232, "total_loss": 0.0125732421875 }, { "epoch": 0.5, "learning_rate": 0.00019874866047444255, "lm_loss": 0.01708984375, "loss": 0.0084, "step": 1233, "total_loss": 0.01708984375 }, { "epoch": 0.5, "learning_rate": 0.00019874663415594778, "lm_loss": 0.01251220703125, "loss": 0.0105, "step": 1234, "total_loss": 0.01251220703125 }, { "epoch": 0.5, "learning_rate": 0.0001987446062084978, "lm_loss": 0.004669189453125, "loss": 0.0087, "step": 1235, "total_loss": 0.004669189453125 }, { "epoch": 0.51, "learning_rate": 0.00019874257663212604, "lm_loss": 0.00494384765625, "loss": 0.0078, "step": 1236, "total_loss": 0.00494384765625 }, { "epoch": 0.51, "learning_rate": 0.00019874054542686604, "lm_loss": 0.00958251953125, "loss": 0.0088, "step": 1237, "total_loss": 0.00958251953125 }, { "epoch": 0.51, "learning_rate": 0.00019873851259275127, "lm_loss": 0.0118408203125, "loss": 0.0078, "step": 1238, "total_loss": 0.0118408203125 }, { "epoch": 0.51, "learning_rate": 0.00019873647812981528, "lm_loss": 0.00909423828125, "loss": 0.0106, "step": 1239, "total_loss": 0.00909423828125 }, { "epoch": 0.51, "learning_rate": 0.00019873444203809165, "lm_loss": 0.00946044921875, "loss": 0.0088, "step": 1240, "total_loss": 0.00946044921875 }, { "epoch": 0.51, "learning_rate": 0.00019873240431761392, "lm_loss": 0.01263427734375, "loss": 0.0094, "step": 1241, "total_loss": 0.01263427734375 }, { "epoch": 0.51, "learning_rate": 0.0001987303649684157, "lm_loss": 0.00604248046875, "loss": 0.0095, "step": 1242, "total_loss": 0.00604248046875 }, { "epoch": 0.51, "learning_rate": 0.0001987283239905307, "lm_loss": 0.007080078125, "loss": 0.0089, "step": 1243, "total_loss": 0.007080078125 }, { "epoch": 0.51, "learning_rate": 0.00019872628138399253, "lm_loss": 0.008544921875, "loss": 0.0079, "step": 1244, "total_loss": 0.008544921875 }, { "epoch": 0.51, "learning_rate": 0.00019872423714883492, "lm_loss": 0.005462646484375, "loss": 0.0088, "step": 1245, "total_loss": 0.005462646484375 }, { "epoch": 0.51, "learning_rate": 0.00019872219128509158, "lm_loss": 0.0050048828125, "loss": 0.0089, "step": 1246, "total_loss": 0.0050048828125 }, { "epoch": 0.51, "learning_rate": 0.00019872014379279624, "lm_loss": 0.0113525390625, "loss": 0.0086, "step": 1247, "total_loss": 0.0113525390625 }, { "epoch": 0.51, "learning_rate": 0.0001987180946719827, "lm_loss": 0.01214599609375, "loss": 0.0079, "step": 1248, "total_loss": 0.01214599609375 }, { "epoch": 0.51, "learning_rate": 0.00019871604392268476, "lm_loss": 0.0093994140625, "loss": 0.009, "step": 1249, "total_loss": 0.0093994140625 }, { "epoch": 0.51, "learning_rate": 0.0001987139915449362, "lm_loss": 0.004608154296875, "loss": 0.0071, "step": 1250, "total_loss": 0.004608154296875 }, { "epoch": 0.51, "learning_rate": 0.00019871193753877097, "lm_loss": 0.01312255859375, "loss": 0.0079, "step": 1251, "total_loss": 0.01312255859375 }, { "epoch": 0.51, "learning_rate": 0.00019870988190422292, "lm_loss": 0.003082275390625, "loss": 0.009, "step": 1252, "total_loss": 0.003082275390625 }, { "epoch": 0.51, "learning_rate": 0.0001987078246413259, "lm_loss": 0.0184326171875, "loss": 0.0094, "step": 1253, "total_loss": 0.0184326171875 }, { "epoch": 0.51, "learning_rate": 0.0001987057657501139, "lm_loss": 0.0103759765625, "loss": 0.0092, "step": 1254, "total_loss": 0.0103759765625 }, { "epoch": 0.51, "learning_rate": 0.0001987037052306209, "lm_loss": 0.00994873046875, "loss": 0.0102, "step": 1255, "total_loss": 0.00994873046875 }, { "epoch": 0.51, "learning_rate": 0.00019870164308288083, "lm_loss": 0.00592041015625, "loss": 0.0094, "step": 1256, "total_loss": 0.00592041015625 }, { "epoch": 0.51, "learning_rate": 0.0001986995793069278, "lm_loss": 0.004852294921875, "loss": 0.0073, "step": 1257, "total_loss": 0.004852294921875 }, { "epoch": 0.51, "learning_rate": 0.00019869751390279576, "lm_loss": 0.00921630859375, "loss": 0.0089, "step": 1258, "total_loss": 0.00921630859375 }, { "epoch": 0.51, "learning_rate": 0.00019869544687051884, "lm_loss": 0.00787353515625, "loss": 0.0069, "step": 1259, "total_loss": 0.00787353515625 }, { "epoch": 0.52, "learning_rate": 0.0001986933782101311, "lm_loss": 0.0072021484375, "loss": 0.0126, "step": 1260, "total_loss": 0.0072021484375 }, { "epoch": 0.52, "learning_rate": 0.0001986913079216667, "lm_loss": 0.01019287109375, "loss": 0.0092, "step": 1261, "total_loss": 0.01019287109375 }, { "epoch": 0.52, "learning_rate": 0.00019868923600515977, "lm_loss": 0.0034637451171875, "loss": 0.0092, "step": 1262, "total_loss": 0.0034637451171875 }, { "epoch": 0.52, "learning_rate": 0.0001986871624606445, "lm_loss": 0.02001953125, "loss": 0.0084, "step": 1263, "total_loss": 0.02001953125 }, { "epoch": 0.52, "learning_rate": 0.00019868508728815512, "lm_loss": 0.017822265625, "loss": 0.011, "step": 1264, "total_loss": 0.017822265625 }, { "epoch": 0.52, "learning_rate": 0.0001986830104877258, "lm_loss": 0.00848388671875, "loss": 0.0068, "step": 1265, "total_loss": 0.00848388671875 }, { "epoch": 0.52, "learning_rate": 0.00019868093205939084, "lm_loss": 0.0023956298828125, "loss": 0.0075, "step": 1266, "total_loss": 0.0023956298828125 }, { "epoch": 0.52, "learning_rate": 0.0001986788520031845, "lm_loss": 0.0025177001953125, "loss": 0.0078, "step": 1267, "total_loss": 0.0025177001953125 }, { "epoch": 0.52, "learning_rate": 0.00019867677031914116, "lm_loss": 0.00872802734375, "loss": 0.0086, "step": 1268, "total_loss": 0.00872802734375 }, { "epoch": 0.52, "learning_rate": 0.00019867468700729508, "lm_loss": 0.0036773681640625, "loss": 0.0081, "step": 1269, "total_loss": 0.0036773681640625 }, { "epoch": 0.52, "learning_rate": 0.00019867260206768067, "lm_loss": 0.01043701171875, "loss": 0.0081, "step": 1270, "total_loss": 0.01043701171875 }, { "epoch": 0.52, "learning_rate": 0.00019867051550033233, "lm_loss": 0.00885009765625, "loss": 0.0102, "step": 1271, "total_loss": 0.00885009765625 }, { "epoch": 0.52, "learning_rate": 0.00019866842730528446, "lm_loss": 0.008056640625, "loss": 0.0084, "step": 1272, "total_loss": 0.008056640625 }, { "epoch": 0.52, "learning_rate": 0.0001986663374825715, "lm_loss": 0.00714111328125, "loss": 0.0112, "step": 1273, "total_loss": 0.00714111328125 }, { "epoch": 0.52, "learning_rate": 0.00019866424603222796, "lm_loss": 0.00604248046875, "loss": 0.0092, "step": 1274, "total_loss": 0.00604248046875 }, { "epoch": 0.52, "learning_rate": 0.0001986621529542883, "lm_loss": 0.00848388671875, "loss": 0.0089, "step": 1275, "total_loss": 0.00848388671875 }, { "epoch": 0.52, "learning_rate": 0.00019866005824878706, "lm_loss": 0.01251220703125, "loss": 0.0093, "step": 1276, "total_loss": 0.01251220703125 }, { "epoch": 0.52, "learning_rate": 0.00019865796191575883, "lm_loss": 0.0050048828125, "loss": 0.0076, "step": 1277, "total_loss": 0.0050048828125 }, { "epoch": 0.52, "learning_rate": 0.00019865586395523814, "lm_loss": 0.00970458984375, "loss": 0.0105, "step": 1278, "total_loss": 0.00970458984375 }, { "epoch": 0.52, "learning_rate": 0.00019865376436725962, "lm_loss": 0.013427734375, "loss": 0.0096, "step": 1279, "total_loss": 0.013427734375 }, { "epoch": 0.52, "learning_rate": 0.00019865166315185794, "lm_loss": 0.00787353515625, "loss": 0.0092, "step": 1280, "total_loss": 0.00787353515625 }, { "epoch": 0.52, "learning_rate": 0.00019864956030906769, "lm_loss": 0.007171630859375, "loss": 0.0082, "step": 1281, "total_loss": 0.007171630859375 }, { "epoch": 0.52, "learning_rate": 0.0001986474558389236, "lm_loss": 0.013916015625, "loss": 0.0095, "step": 1282, "total_loss": 0.013916015625 }, { "epoch": 0.52, "learning_rate": 0.0001986453497414604, "lm_loss": 0.003997802734375, "loss": 0.011, "step": 1283, "total_loss": 0.003997802734375 }, { "epoch": 0.52, "learning_rate": 0.00019864324201671282, "lm_loss": 0.01239013671875, "loss": 0.0092, "step": 1284, "total_loss": 0.01239013671875 }, { "epoch": 0.53, "learning_rate": 0.00019864113266471564, "lm_loss": 0.00982666015625, "loss": 0.0095, "step": 1285, "total_loss": 0.00982666015625 }, { "epoch": 0.53, "learning_rate": 0.00019863902168550365, "lm_loss": 0.0142822265625, "loss": 0.0095, "step": 1286, "total_loss": 0.0142822265625 }, { "epoch": 0.53, "learning_rate": 0.00019863690907911163, "lm_loss": 0.004364013671875, "loss": 0.0076, "step": 1287, "total_loss": 0.004364013671875 }, { "epoch": 0.53, "learning_rate": 0.00019863479484557445, "lm_loss": 0.01104736328125, "loss": 0.0094, "step": 1288, "total_loss": 0.01104736328125 }, { "epoch": 0.53, "learning_rate": 0.00019863267898492706, "lm_loss": 0.016357421875, "loss": 0.0132, "step": 1289, "total_loss": 0.016357421875 }, { "epoch": 0.53, "learning_rate": 0.0001986305614972043, "lm_loss": 0.00823974609375, "loss": 0.0089, "step": 1290, "total_loss": 0.00823974609375 }, { "epoch": 0.53, "learning_rate": 0.00019862844238244107, "lm_loss": 0.02685546875, "loss": 0.0097, "step": 1291, "total_loss": 0.02685546875 }, { "epoch": 0.53, "learning_rate": 0.0001986263216406724, "lm_loss": 0.0078125, "loss": 0.01, "step": 1292, "total_loss": 0.0078125 }, { "epoch": 0.53, "learning_rate": 0.00019862419927193324, "lm_loss": 0.00665283203125, "loss": 0.0077, "step": 1293, "total_loss": 0.00665283203125 }, { "epoch": 0.53, "learning_rate": 0.0001986220752762586, "lm_loss": 0.00946044921875, "loss": 0.0118, "step": 1294, "total_loss": 0.00946044921875 }, { "epoch": 0.53, "learning_rate": 0.00019861994965368355, "lm_loss": 0.00811767578125, "loss": 0.0081, "step": 1295, "total_loss": 0.00811767578125 }, { "epoch": 0.53, "learning_rate": 0.00019861782240424307, "lm_loss": 0.006561279296875, "loss": 0.009, "step": 1296, "total_loss": 0.006561279296875 }, { "epoch": 0.53, "learning_rate": 0.00019861569352797232, "lm_loss": 0.01068115234375, "loss": 0.0095, "step": 1297, "total_loss": 0.01068115234375 }, { "epoch": 0.53, "learning_rate": 0.00019861356302490645, "lm_loss": 0.0084228515625, "loss": 0.0094, "step": 1298, "total_loss": 0.0084228515625 }, { "epoch": 0.53, "learning_rate": 0.0001986114308950805, "lm_loss": 0.01171875, "loss": 0.0074, "step": 1299, "total_loss": 0.01171875 }, { "epoch": 0.53, "learning_rate": 0.00019860929713852975, "lm_loss": 0.008544921875, "loss": 0.0099, "step": 1300, "total_loss": 0.008544921875 }, { "epoch": 0.53, "eval_lm_loss": 0.010627499781548977, "eval_loss": 0.011026458814740181, "eval_runtime": 44.281, "eval_samples_per_second": 22.583, "eval_steps_per_second": 0.203, "eval_total_loss": 0.010627499781548977, "lm_loss": 0.000827789306640625, "step": 1300, "total_loss": 0.000827789306640625 }, { "epoch": 0.53, "learning_rate": 0.00019860716175528932, "lm_loss": 0.006103515625, "loss": 0.0107, "step": 1301, "total_loss": 0.006103515625 }, { "epoch": 0.53, "learning_rate": 0.0001986050247453945, "lm_loss": 0.009521484375, "loss": 0.009, "step": 1302, "total_loss": 0.009521484375 }, { "epoch": 0.53, "learning_rate": 0.0001986028861088805, "lm_loss": 0.006988525390625, "loss": 0.0081, "step": 1303, "total_loss": 0.006988525390625 }, { "epoch": 0.53, "learning_rate": 0.0001986007458457826, "lm_loss": 0.0036468505859375, "loss": 0.0096, "step": 1304, "total_loss": 0.0036468505859375 }, { "epoch": 0.53, "learning_rate": 0.00019859860395613613, "lm_loss": 0.0048828125, "loss": 0.0095, "step": 1305, "total_loss": 0.0048828125 }, { "epoch": 0.53, "learning_rate": 0.00019859646043997642, "lm_loss": 0.00286865234375, "loss": 0.008, "step": 1306, "total_loss": 0.00286865234375 }, { "epoch": 0.53, "learning_rate": 0.00019859431529733882, "lm_loss": 0.01239013671875, "loss": 0.0083, "step": 1307, "total_loss": 0.01239013671875 }, { "epoch": 0.53, "learning_rate": 0.0001985921685282587, "lm_loss": 0.007476806640625, "loss": 0.0089, "step": 1308, "total_loss": 0.007476806640625 }, { "epoch": 0.54, "learning_rate": 0.00019859002013277146, "lm_loss": 0.01116943359375, "loss": 0.0074, "step": 1309, "total_loss": 0.01116943359375 }, { "epoch": 0.54, "learning_rate": 0.00019858787011091262, "lm_loss": 0.0037994384765625, "loss": 0.0083, "step": 1310, "total_loss": 0.0037994384765625 }, { "epoch": 0.54, "learning_rate": 0.0001985857184627176, "lm_loss": 0.00537109375, "loss": 0.008, "step": 1311, "total_loss": 0.00537109375 }, { "epoch": 0.54, "learning_rate": 0.00019858356518822189, "lm_loss": 0.0048828125, "loss": 0.0082, "step": 1312, "total_loss": 0.0048828125 }, { "epoch": 0.54, "learning_rate": 0.000198581410287461, "lm_loss": 0.007080078125, "loss": 0.0082, "step": 1313, "total_loss": 0.007080078125 }, { "epoch": 0.54, "learning_rate": 0.00019857925376047048, "lm_loss": 0.008544921875, "loss": 0.0083, "step": 1314, "total_loss": 0.008544921875 }, { "epoch": 0.54, "learning_rate": 0.00019857709560728597, "lm_loss": 0.01458740234375, "loss": 0.0111, "step": 1315, "total_loss": 0.01458740234375 }, { "epoch": 0.54, "learning_rate": 0.00019857493582794297, "lm_loss": 0.00445556640625, "loss": 0.0081, "step": 1316, "total_loss": 0.00445556640625 }, { "epoch": 0.54, "learning_rate": 0.0001985727744224772, "lm_loss": 0.0030670166015625, "loss": 0.0087, "step": 1317, "total_loss": 0.0030670166015625 }, { "epoch": 0.54, "learning_rate": 0.00019857061139092423, "lm_loss": 0.0034332275390625, "loss": 0.007, "step": 1318, "total_loss": 0.0034332275390625 }, { "epoch": 0.54, "learning_rate": 0.0001985684467333198, "lm_loss": 0.007720947265625, "loss": 0.0073, "step": 1319, "total_loss": 0.007720947265625 }, { "epoch": 0.54, "learning_rate": 0.00019856628044969963, "lm_loss": 0.0087890625, "loss": 0.009, "step": 1320, "total_loss": 0.0087890625 }, { "epoch": 0.54, "learning_rate": 0.0001985641125400994, "lm_loss": 0.005035400390625, "loss": 0.0092, "step": 1321, "total_loss": 0.005035400390625 }, { "epoch": 0.54, "learning_rate": 0.0001985619430045549, "lm_loss": 0.010009765625, "loss": 0.0104, "step": 1322, "total_loss": 0.010009765625 }, { "epoch": 0.54, "learning_rate": 0.00019855977184310195, "lm_loss": 0.0137939453125, "loss": 0.0107, "step": 1323, "total_loss": 0.0137939453125 }, { "epoch": 0.54, "learning_rate": 0.0001985575990557763, "lm_loss": 0.0167236328125, "loss": 0.0092, "step": 1324, "total_loss": 0.0167236328125 }, { "epoch": 0.54, "learning_rate": 0.00019855542464261385, "lm_loss": 0.0177001953125, "loss": 0.0086, "step": 1325, "total_loss": 0.0177001953125 }, { "epoch": 0.54, "learning_rate": 0.00019855324860365046, "lm_loss": 0.0137939453125, "loss": 0.0104, "step": 1326, "total_loss": 0.0137939453125 }, { "epoch": 0.54, "learning_rate": 0.000198551070938922, "lm_loss": 0.01104736328125, "loss": 0.0114, "step": 1327, "total_loss": 0.01104736328125 }, { "epoch": 0.54, "learning_rate": 0.00019854889164846442, "lm_loss": 0.004425048828125, "loss": 0.0087, "step": 1328, "total_loss": 0.004425048828125 }, { "epoch": 0.54, "learning_rate": 0.00019854671073231365, "lm_loss": 0.006378173828125, "loss": 0.0086, "step": 1329, "total_loss": 0.006378173828125 }, { "epoch": 0.54, "learning_rate": 0.0001985445281905057, "lm_loss": 0.00482177734375, "loss": 0.0085, "step": 1330, "total_loss": 0.00482177734375 }, { "epoch": 0.54, "learning_rate": 0.00019854234402307653, "lm_loss": 0.0030059814453125, "loss": 0.0084, "step": 1331, "total_loss": 0.0030059814453125 }, { "epoch": 0.54, "learning_rate": 0.0001985401582300622, "lm_loss": 0.0172119140625, "loss": 0.0111, "step": 1332, "total_loss": 0.0172119140625 }, { "epoch": 0.54, "learning_rate": 0.00019853797081149878, "lm_loss": 0.01275634765625, "loss": 0.0069, "step": 1333, "total_loss": 0.01275634765625 }, { "epoch": 0.55, "learning_rate": 0.0001985357817674223, "lm_loss": 0.0128173828125, "loss": 0.011, "step": 1334, "total_loss": 0.0128173828125 }, { "epoch": 0.55, "learning_rate": 0.0001985335910978689, "lm_loss": 0.00604248046875, "loss": 0.0091, "step": 1335, "total_loss": 0.00604248046875 }, { "epoch": 0.55, "learning_rate": 0.00019853139880287476, "lm_loss": 0.0145263671875, "loss": 0.0096, "step": 1336, "total_loss": 0.0145263671875 }, { "epoch": 0.55, "learning_rate": 0.00019852920488247597, "lm_loss": 0.0106201171875, "loss": 0.0097, "step": 1337, "total_loss": 0.0106201171875 }, { "epoch": 0.55, "learning_rate": 0.00019852700933670878, "lm_loss": 0.0068359375, "loss": 0.0085, "step": 1338, "total_loss": 0.0068359375 }, { "epoch": 0.55, "learning_rate": 0.00019852481216560942, "lm_loss": 0.00830078125, "loss": 0.0083, "step": 1339, "total_loss": 0.00830078125 }, { "epoch": 0.55, "learning_rate": 0.00019852261336921406, "lm_loss": 0.00927734375, "loss": 0.0085, "step": 1340, "total_loss": 0.00927734375 }, { "epoch": 0.55, "learning_rate": 0.000198520412947559, "lm_loss": 0.0086669921875, "loss": 0.0073, "step": 1341, "total_loss": 0.0086669921875 }, { "epoch": 0.55, "learning_rate": 0.00019851821090068059, "lm_loss": 0.00787353515625, "loss": 0.0088, "step": 1342, "total_loss": 0.00787353515625 }, { "epoch": 0.55, "learning_rate": 0.0001985160072286151, "lm_loss": 0.00921630859375, "loss": 0.0101, "step": 1343, "total_loss": 0.00921630859375 }, { "epoch": 0.55, "learning_rate": 0.00019851380193139893, "lm_loss": 0.01495361328125, "loss": 0.0088, "step": 1344, "total_loss": 0.01495361328125 }, { "epoch": 0.55, "learning_rate": 0.0001985115950090684, "lm_loss": 0.007598876953125, "loss": 0.0078, "step": 1345, "total_loss": 0.007598876953125 }, { "epoch": 0.55, "learning_rate": 0.00019850938646165996, "lm_loss": 0.00921630859375, "loss": 0.0099, "step": 1346, "total_loss": 0.00921630859375 }, { "epoch": 0.55, "learning_rate": 0.00019850717628921004, "lm_loss": 0.005859375, "loss": 0.0101, "step": 1347, "total_loss": 0.005859375 }, { "epoch": 0.55, "learning_rate": 0.00019850496449175508, "lm_loss": 0.0031890869140625, "loss": 0.0068, "step": 1348, "total_loss": 0.0031890869140625 }, { "epoch": 0.55, "learning_rate": 0.00019850275106933157, "lm_loss": 0.0057373046875, "loss": 0.011, "step": 1349, "total_loss": 0.0057373046875 }, { "epoch": 0.55, "learning_rate": 0.00019850053602197605, "lm_loss": 0.00726318359375, "loss": 0.0078, "step": 1350, "total_loss": 0.00726318359375 }, { "epoch": 0.55, "learning_rate": 0.00019849831934972502, "lm_loss": 0.007080078125, "loss": 0.0105, "step": 1351, "total_loss": 0.007080078125 }, { "epoch": 0.55, "learning_rate": 0.00019849610105261508, "lm_loss": 0.0038604736328125, "loss": 0.0089, "step": 1352, "total_loss": 0.0038604736328125 }, { "epoch": 0.55, "learning_rate": 0.00019849388113068282, "lm_loss": 0.003997802734375, "loss": 0.0096, "step": 1353, "total_loss": 0.003997802734375 }, { "epoch": 0.55, "learning_rate": 0.00019849165958396483, "lm_loss": 0.01007080078125, "loss": 0.0096, "step": 1354, "total_loss": 0.01007080078125 }, { "epoch": 0.55, "learning_rate": 0.0001984894364124978, "lm_loss": 0.008056640625, "loss": 0.0096, "step": 1355, "total_loss": 0.008056640625 }, { "epoch": 0.55, "learning_rate": 0.00019848721161631837, "lm_loss": 0.005401611328125, "loss": 0.01, "step": 1356, "total_loss": 0.005401611328125 }, { "epoch": 0.55, "learning_rate": 0.00019848498519546323, "lm_loss": 0.0037994384765625, "loss": 0.008, "step": 1357, "total_loss": 0.0037994384765625 }, { "epoch": 0.56, "learning_rate": 0.00019848275714996915, "lm_loss": 0.00823974609375, "loss": 0.0106, "step": 1358, "total_loss": 0.00823974609375 }, { "epoch": 0.56, "learning_rate": 0.0001984805274798729, "lm_loss": 0.00933837890625, "loss": 0.0088, "step": 1359, "total_loss": 0.00933837890625 }, { "epoch": 0.56, "learning_rate": 0.00019847829618521118, "lm_loss": 0.004974365234375, "loss": 0.0098, "step": 1360, "total_loss": 0.004974365234375 }, { "epoch": 0.56, "learning_rate": 0.00019847606326602087, "lm_loss": 0.00970458984375, "loss": 0.0091, "step": 1361, "total_loss": 0.00970458984375 }, { "epoch": 0.56, "learning_rate": 0.00019847382872233875, "lm_loss": 0.005828857421875, "loss": 0.0068, "step": 1362, "total_loss": 0.005828857421875 }, { "epoch": 0.56, "learning_rate": 0.00019847159255420173, "lm_loss": 0.0029144287109375, "loss": 0.0099, "step": 1363, "total_loss": 0.0029144287109375 }, { "epoch": 0.56, "learning_rate": 0.0001984693547616467, "lm_loss": 0.00762939453125, "loss": 0.0072, "step": 1364, "total_loss": 0.00762939453125 }, { "epoch": 0.56, "learning_rate": 0.00019846711534471053, "lm_loss": 0.0147705078125, "loss": 0.0106, "step": 1365, "total_loss": 0.0147705078125 }, { "epoch": 0.56, "learning_rate": 0.00019846487430343022, "lm_loss": 0.0057373046875, "loss": 0.0097, "step": 1366, "total_loss": 0.0057373046875 }, { "epoch": 0.56, "learning_rate": 0.00019846263163784267, "lm_loss": 0.004638671875, "loss": 0.0077, "step": 1367, "total_loss": 0.004638671875 }, { "epoch": 0.56, "learning_rate": 0.00019846038734798493, "lm_loss": 0.0029449462890625, "loss": 0.0078, "step": 1368, "total_loss": 0.0029449462890625 }, { "epoch": 0.56, "learning_rate": 0.00019845814143389402, "lm_loss": 0.013427734375, "loss": 0.0068, "step": 1369, "total_loss": 0.013427734375 }, { "epoch": 0.56, "learning_rate": 0.00019845589389560695, "lm_loss": 0.01519775390625, "loss": 0.0112, "step": 1370, "total_loss": 0.01519775390625 }, { "epoch": 0.56, "learning_rate": 0.0001984536447331608, "lm_loss": 0.005462646484375, "loss": 0.0093, "step": 1371, "total_loss": 0.005462646484375 }, { "epoch": 0.56, "learning_rate": 0.00019845139394659276, "lm_loss": 0.007049560546875, "loss": 0.0099, "step": 1372, "total_loss": 0.007049560546875 }, { "epoch": 0.56, "learning_rate": 0.00019844914153593983, "lm_loss": 0.017822265625, "loss": 0.011, "step": 1373, "total_loss": 0.017822265625 }, { "epoch": 0.56, "learning_rate": 0.00019844688750123926, "lm_loss": 0.00885009765625, "loss": 0.008, "step": 1374, "total_loss": 0.00885009765625 }, { "epoch": 0.56, "learning_rate": 0.0001984446318425282, "lm_loss": 0.005889892578125, "loss": 0.0108, "step": 1375, "total_loss": 0.005889892578125 }, { "epoch": 0.56, "learning_rate": 0.00019844237455984387, "lm_loss": 0.0086669921875, "loss": 0.0087, "step": 1376, "total_loss": 0.0086669921875 }, { "epoch": 0.56, "learning_rate": 0.0001984401156532235, "lm_loss": 0.01531982421875, "loss": 0.0099, "step": 1377, "total_loss": 0.01531982421875 }, { "epoch": 0.56, "learning_rate": 0.00019843785512270432, "lm_loss": 0.0059814453125, "loss": 0.0073, "step": 1378, "total_loss": 0.0059814453125 }, { "epoch": 0.56, "learning_rate": 0.00019843559296832367, "lm_loss": 0.01141357421875, "loss": 0.007, "step": 1379, "total_loss": 0.01141357421875 }, { "epoch": 0.56, "learning_rate": 0.00019843332919011888, "lm_loss": 0.0052490234375, "loss": 0.0065, "step": 1380, "total_loss": 0.0052490234375 }, { "epoch": 0.56, "learning_rate": 0.00019843106378812723, "lm_loss": 0.006988525390625, "loss": 0.0071, "step": 1381, "total_loss": 0.006988525390625 }, { "epoch": 0.57, "learning_rate": 0.00019842879676238614, "lm_loss": 0.0084228515625, "loss": 0.0074, "step": 1382, "total_loss": 0.0084228515625 }, { "epoch": 0.57, "learning_rate": 0.000198426528112933, "lm_loss": 0.00787353515625, "loss": 0.0097, "step": 1383, "total_loss": 0.00787353515625 }, { "epoch": 0.57, "learning_rate": 0.00019842425783980522, "lm_loss": 0.007659912109375, "loss": 0.0108, "step": 1384, "total_loss": 0.007659912109375 }, { "epoch": 0.57, "learning_rate": 0.00019842198594304024, "lm_loss": 0.00872802734375, "loss": 0.0081, "step": 1385, "total_loss": 0.00872802734375 }, { "epoch": 0.57, "learning_rate": 0.0001984197124226756, "lm_loss": 0.0054931640625, "loss": 0.0085, "step": 1386, "total_loss": 0.0054931640625 }, { "epoch": 0.57, "learning_rate": 0.00019841743727874871, "lm_loss": 0.0076904296875, "loss": 0.0094, "step": 1387, "total_loss": 0.0076904296875 }, { "epoch": 0.57, "learning_rate": 0.0001984151605112972, "lm_loss": 0.0072021484375, "loss": 0.0096, "step": 1388, "total_loss": 0.0072021484375 }, { "epoch": 0.57, "learning_rate": 0.00019841288212035853, "lm_loss": 0.0048828125, "loss": 0.0078, "step": 1389, "total_loss": 0.0048828125 }, { "epoch": 0.57, "learning_rate": 0.0001984106021059704, "lm_loss": 0.007598876953125, "loss": 0.0087, "step": 1390, "total_loss": 0.007598876953125 }, { "epoch": 0.57, "learning_rate": 0.0001984083204681703, "lm_loss": 0.01239013671875, "loss": 0.0092, "step": 1391, "total_loss": 0.01239013671875 }, { "epoch": 0.57, "learning_rate": 0.00019840603720699596, "lm_loss": 0.004730224609375, "loss": 0.0111, "step": 1392, "total_loss": 0.004730224609375 }, { "epoch": 0.57, "learning_rate": 0.000198403752322485, "lm_loss": 0.0050048828125, "loss": 0.0073, "step": 1393, "total_loss": 0.0050048828125 }, { "epoch": 0.57, "learning_rate": 0.00019840146581467512, "lm_loss": 0.0098876953125, "loss": 0.0082, "step": 1394, "total_loss": 0.0098876953125 }, { "epoch": 0.57, "learning_rate": 0.00019839917768360406, "lm_loss": 0.005615234375, "loss": 0.0089, "step": 1395, "total_loss": 0.005615234375 }, { "epoch": 0.57, "learning_rate": 0.00019839688792930952, "lm_loss": 0.008544921875, "loss": 0.0072, "step": 1396, "total_loss": 0.008544921875 }, { "epoch": 0.57, "learning_rate": 0.00019839459655182932, "lm_loss": 0.00775146484375, "loss": 0.0109, "step": 1397, "total_loss": 0.00775146484375 }, { "epoch": 0.57, "learning_rate": 0.00019839230355120125, "lm_loss": 0.0037841796875, "loss": 0.0084, "step": 1398, "total_loss": 0.0037841796875 }, { "epoch": 0.57, "learning_rate": 0.0001983900089274631, "lm_loss": 0.01055908203125, "loss": 0.0099, "step": 1399, "total_loss": 0.01055908203125 }, { "epoch": 0.57, "learning_rate": 0.00019838771268065278, "lm_loss": 0.00830078125, "loss": 0.0089, "step": 1400, "total_loss": 0.00830078125 }, { "epoch": 0.57, "eval_lm_loss": 0.010626722127199173, "eval_loss": 0.010994529351592064, "eval_runtime": 43.9509, "eval_samples_per_second": 22.753, "eval_steps_per_second": 0.205, "eval_total_loss": 0.010626722127199173, "lm_loss": 0.00112152099609375, "step": 1400, "total_loss": 0.00112152099609375 }, { "epoch": 0.57, "learning_rate": 0.00019838541481080813, "lm_loss": 0.00958251953125, "loss": 0.0083, "step": 1401, "total_loss": 0.00958251953125 }, { "epoch": 0.57, "learning_rate": 0.00019838311531796706, "lm_loss": 0.00885009765625, "loss": 0.0094, "step": 1402, "total_loss": 0.00885009765625 }, { "epoch": 0.57, "learning_rate": 0.00019838081420216753, "lm_loss": 0.00933837890625, "loss": 0.0073, "step": 1403, "total_loss": 0.00933837890625 }, { "epoch": 0.57, "learning_rate": 0.00019837851146344744, "lm_loss": 0.01336669921875, "loss": 0.0091, "step": 1404, "total_loss": 0.01336669921875 }, { "epoch": 0.57, "learning_rate": 0.00019837620710184481, "lm_loss": 0.0052490234375, "loss": 0.0085, "step": 1405, "total_loss": 0.0052490234375 }, { "epoch": 0.57, "learning_rate": 0.0001983739011173977, "lm_loss": 0.0135498046875, "loss": 0.0084, "step": 1406, "total_loss": 0.0135498046875 }, { "epoch": 0.58, "learning_rate": 0.00019837159351014408, "lm_loss": 0.00848388671875, "loss": 0.0084, "step": 1407, "total_loss": 0.00848388671875 }, { "epoch": 0.58, "learning_rate": 0.00019836928428012204, "lm_loss": 0.0125732421875, "loss": 0.0072, "step": 1408, "total_loss": 0.0125732421875 }, { "epoch": 0.58, "learning_rate": 0.00019836697342736972, "lm_loss": 0.0087890625, "loss": 0.0092, "step": 1409, "total_loss": 0.0087890625 }, { "epoch": 0.58, "learning_rate": 0.0001983646609519252, "lm_loss": 0.0078125, "loss": 0.0076, "step": 1410, "total_loss": 0.0078125 }, { "epoch": 0.58, "learning_rate": 0.00019836234685382658, "lm_loss": 0.0079345703125, "loss": 0.0079, "step": 1411, "total_loss": 0.0079345703125 }, { "epoch": 0.58, "learning_rate": 0.0001983600311331121, "lm_loss": 0.00811767578125, "loss": 0.0082, "step": 1412, "total_loss": 0.00811767578125 }, { "epoch": 0.58, "learning_rate": 0.00019835771378981995, "lm_loss": 0.0050048828125, "loss": 0.0077, "step": 1413, "total_loss": 0.0050048828125 }, { "epoch": 0.58, "learning_rate": 0.00019835539482398836, "lm_loss": 0.01116943359375, "loss": 0.0084, "step": 1414, "total_loss": 0.01116943359375 }, { "epoch": 0.58, "learning_rate": 0.00019835307423565554, "lm_loss": 0.01177978515625, "loss": 0.0086, "step": 1415, "total_loss": 0.01177978515625 }, { "epoch": 0.58, "learning_rate": 0.00019835075202485983, "lm_loss": 0.004638671875, "loss": 0.0092, "step": 1416, "total_loss": 0.004638671875 }, { "epoch": 0.58, "learning_rate": 0.0001983484281916395, "lm_loss": 0.00360107421875, "loss": 0.0109, "step": 1417, "total_loss": 0.00360107421875 }, { "epoch": 0.58, "learning_rate": 0.00019834610273603294, "lm_loss": 0.003509521484375, "loss": 0.009, "step": 1418, "total_loss": 0.003509521484375 }, { "epoch": 0.58, "learning_rate": 0.00019834377565807842, "lm_loss": 0.005035400390625, "loss": 0.0101, "step": 1419, "total_loss": 0.005035400390625 }, { "epoch": 0.58, "learning_rate": 0.0001983414469578144, "lm_loss": 0.00921630859375, "loss": 0.0097, "step": 1420, "total_loss": 0.00921630859375 }, { "epoch": 0.58, "learning_rate": 0.00019833911663527927, "lm_loss": 0.005615234375, "loss": 0.0082, "step": 1421, "total_loss": 0.005615234375 }, { "epoch": 0.58, "learning_rate": 0.00019833678469051145, "lm_loss": 0.0137939453125, "loss": 0.0104, "step": 1422, "total_loss": 0.0137939453125 }, { "epoch": 0.58, "learning_rate": 0.00019833445112354948, "lm_loss": 0.006500244140625, "loss": 0.0106, "step": 1423, "total_loss": 0.006500244140625 }, { "epoch": 0.58, "learning_rate": 0.0001983321159344318, "lm_loss": 0.00677490234375, "loss": 0.0084, "step": 1424, "total_loss": 0.00677490234375 }, { "epoch": 0.58, "learning_rate": 0.0001983297791231969, "lm_loss": 0.00567626953125, "loss": 0.0093, "step": 1425, "total_loss": 0.00567626953125 }, { "epoch": 0.58, "learning_rate": 0.00019832744068988342, "lm_loss": 0.0096435546875, "loss": 0.0093, "step": 1426, "total_loss": 0.0096435546875 }, { "epoch": 0.58, "learning_rate": 0.00019832510063452983, "lm_loss": 0.010498046875, "loss": 0.0091, "step": 1427, "total_loss": 0.010498046875 }, { "epoch": 0.58, "learning_rate": 0.00019832275895717482, "lm_loss": 0.009033203125, "loss": 0.0078, "step": 1428, "total_loss": 0.009033203125 }, { "epoch": 0.58, "learning_rate": 0.000198320415657857, "lm_loss": 0.0120849609375, "loss": 0.0099, "step": 1429, "total_loss": 0.0120849609375 }, { "epoch": 0.58, "learning_rate": 0.00019831807073661498, "lm_loss": 0.00156402587890625, "loss": 0.0075, "step": 1430, "total_loss": 0.00156402587890625 }, { "epoch": 0.59, "learning_rate": 0.00019831572419348747, "lm_loss": 0.010498046875, "loss": 0.0076, "step": 1431, "total_loss": 0.010498046875 }, { "epoch": 0.59, "learning_rate": 0.00019831337602851324, "lm_loss": 0.01495361328125, "loss": 0.0089, "step": 1432, "total_loss": 0.01495361328125 }, { "epoch": 0.59, "learning_rate": 0.00019831102624173094, "lm_loss": 0.00994873046875, "loss": 0.0099, "step": 1433, "total_loss": 0.00994873046875 }, { "epoch": 0.59, "learning_rate": 0.0001983086748331793, "lm_loss": 0.005950927734375, "loss": 0.008, "step": 1434, "total_loss": 0.005950927734375 }, { "epoch": 0.59, "learning_rate": 0.00019830632180289725, "lm_loss": 0.0078125, "loss": 0.0099, "step": 1435, "total_loss": 0.0078125 }, { "epoch": 0.59, "learning_rate": 0.0001983039671509235, "lm_loss": 0.006561279296875, "loss": 0.0101, "step": 1436, "total_loss": 0.006561279296875 }, { "epoch": 0.59, "learning_rate": 0.00019830161087729692, "lm_loss": 0.00775146484375, "loss": 0.0095, "step": 1437, "total_loss": 0.00775146484375 }, { "epoch": 0.59, "learning_rate": 0.00019829925298205637, "lm_loss": 0.01007080078125, "loss": 0.01, "step": 1438, "total_loss": 0.01007080078125 }, { "epoch": 0.59, "learning_rate": 0.0001982968934652408, "lm_loss": 0.007293701171875, "loss": 0.0082, "step": 1439, "total_loss": 0.007293701171875 }, { "epoch": 0.59, "learning_rate": 0.00019829453232688907, "lm_loss": 0.006378173828125, "loss": 0.0086, "step": 1440, "total_loss": 0.006378173828125 }, { "epoch": 0.59, "learning_rate": 0.00019829216956704013, "lm_loss": 0.012939453125, "loss": 0.0085, "step": 1441, "total_loss": 0.012939453125 }, { "epoch": 0.59, "learning_rate": 0.000198289805185733, "lm_loss": 0.0130615234375, "loss": 0.0096, "step": 1442, "total_loss": 0.0130615234375 }, { "epoch": 0.59, "learning_rate": 0.00019828743918300665, "lm_loss": 0.006103515625, "loss": 0.0082, "step": 1443, "total_loss": 0.006103515625 }, { "epoch": 0.59, "learning_rate": 0.00019828507155890014, "lm_loss": 0.00714111328125, "loss": 0.0081, "step": 1444, "total_loss": 0.00714111328125 }, { "epoch": 0.59, "learning_rate": 0.0001982827023134525, "lm_loss": 0.00494384765625, "loss": 0.0085, "step": 1445, "total_loss": 0.00494384765625 }, { "epoch": 0.59, "learning_rate": 0.00019828033144670283, "lm_loss": 0.0086669921875, "loss": 0.0098, "step": 1446, "total_loss": 0.0086669921875 }, { "epoch": 0.59, "learning_rate": 0.0001982779589586902, "lm_loss": 0.0067138671875, "loss": 0.0092, "step": 1447, "total_loss": 0.0067138671875 }, { "epoch": 0.59, "learning_rate": 0.0001982755848494538, "lm_loss": 0.01214599609375, "loss": 0.0099, "step": 1448, "total_loss": 0.01214599609375 }, { "epoch": 0.59, "learning_rate": 0.00019827320911903278, "lm_loss": 0.004425048828125, "loss": 0.0087, "step": 1449, "total_loss": 0.004425048828125 }, { "epoch": 0.59, "learning_rate": 0.00019827083176746633, "lm_loss": 0.01031494140625, "loss": 0.0105, "step": 1450, "total_loss": 0.01031494140625 }, { "epoch": 0.59, "learning_rate": 0.00019826845279479364, "lm_loss": 0.01177978515625, "loss": 0.0091, "step": 1451, "total_loss": 0.01177978515625 }, { "epoch": 0.59, "learning_rate": 0.000198266072201054, "lm_loss": 0.01080322265625, "loss": 0.0083, "step": 1452, "total_loss": 0.01080322265625 }, { "epoch": 0.59, "learning_rate": 0.00019826368998628665, "lm_loss": 0.002685546875, "loss": 0.01, "step": 1453, "total_loss": 0.002685546875 }, { "epoch": 0.59, "learning_rate": 0.0001982613061505309, "lm_loss": 0.008544921875, "loss": 0.0092, "step": 1454, "total_loss": 0.008544921875 }, { "epoch": 0.59, "learning_rate": 0.00019825892069382607, "lm_loss": 0.01116943359375, "loss": 0.0092, "step": 1455, "total_loss": 0.01116943359375 }, { "epoch": 0.6, "learning_rate": 0.00019825653361621147, "lm_loss": 0.0157470703125, "loss": 0.0091, "step": 1456, "total_loss": 0.0157470703125 }, { "epoch": 0.6, "learning_rate": 0.00019825414491772658, "lm_loss": 0.00469970703125, "loss": 0.0083, "step": 1457, "total_loss": 0.00469970703125 }, { "epoch": 0.6, "learning_rate": 0.00019825175459841073, "lm_loss": 0.01300048828125, "loss": 0.0083, "step": 1458, "total_loss": 0.01300048828125 }, { "epoch": 0.6, "learning_rate": 0.00019824936265830335, "lm_loss": 0.0032806396484375, "loss": 0.0074, "step": 1459, "total_loss": 0.0032806396484375 }, { "epoch": 0.6, "learning_rate": 0.0001982469690974439, "lm_loss": 0.01019287109375, "loss": 0.0082, "step": 1460, "total_loss": 0.01019287109375 }, { "epoch": 0.6, "learning_rate": 0.00019824457391587192, "lm_loss": 0.01123046875, "loss": 0.0098, "step": 1461, "total_loss": 0.01123046875 }, { "epoch": 0.6, "learning_rate": 0.00019824217711362688, "lm_loss": 0.01214599609375, "loss": 0.007, "step": 1462, "total_loss": 0.01214599609375 }, { "epoch": 0.6, "learning_rate": 0.0001982397786907483, "lm_loss": 0.0030670166015625, "loss": 0.0069, "step": 1463, "total_loss": 0.0030670166015625 }, { "epoch": 0.6, "learning_rate": 0.00019823737864727576, "lm_loss": 0.007415771484375, "loss": 0.0087, "step": 1464, "total_loss": 0.007415771484375 }, { "epoch": 0.6, "learning_rate": 0.00019823497698324887, "lm_loss": 0.005096435546875, "loss": 0.0099, "step": 1465, "total_loss": 0.005096435546875 }, { "epoch": 0.6, "learning_rate": 0.00019823257369870723, "lm_loss": 0.00830078125, "loss": 0.0092, "step": 1466, "total_loss": 0.00830078125 }, { "epoch": 0.6, "learning_rate": 0.00019823016879369048, "lm_loss": 0.005615234375, "loss": 0.0098, "step": 1467, "total_loss": 0.005615234375 }, { "epoch": 0.6, "learning_rate": 0.0001982277622682383, "lm_loss": 0.005401611328125, "loss": 0.0074, "step": 1468, "total_loss": 0.005401611328125 }, { "epoch": 0.6, "learning_rate": 0.0001982253541223904, "lm_loss": 0.0146484375, "loss": 0.0093, "step": 1469, "total_loss": 0.0146484375 }, { "epoch": 0.6, "learning_rate": 0.0001982229443561865, "lm_loss": 0.0059814453125, "loss": 0.0095, "step": 1470, "total_loss": 0.0059814453125 }, { "epoch": 0.6, "learning_rate": 0.00019822053296966635, "lm_loss": 0.007080078125, "loss": 0.0093, "step": 1471, "total_loss": 0.007080078125 }, { "epoch": 0.6, "learning_rate": 0.0001982181199628697, "lm_loss": 0.0024871826171875, "loss": 0.0088, "step": 1472, "total_loss": 0.0024871826171875 }, { "epoch": 0.6, "learning_rate": 0.0001982157053358364, "lm_loss": 0.012451171875, "loss": 0.0107, "step": 1473, "total_loss": 0.012451171875 }, { "epoch": 0.6, "learning_rate": 0.00019821328908860628, "lm_loss": 0.0067138671875, "loss": 0.0073, "step": 1474, "total_loss": 0.0067138671875 }, { "epoch": 0.6, "learning_rate": 0.00019821087122121918, "lm_loss": 0.010498046875, "loss": 0.0088, "step": 1475, "total_loss": 0.010498046875 }, { "epoch": 0.6, "learning_rate": 0.00019820845173371498, "lm_loss": 0.0091552734375, "loss": 0.0072, "step": 1476, "total_loss": 0.0091552734375 }, { "epoch": 0.6, "learning_rate": 0.0001982060306261336, "lm_loss": 0.006744384765625, "loss": 0.0082, "step": 1477, "total_loss": 0.006744384765625 }, { "epoch": 0.6, "learning_rate": 0.00019820360789851499, "lm_loss": 0.01239013671875, "loss": 0.008, "step": 1478, "total_loss": 0.01239013671875 }, { "epoch": 0.6, "learning_rate": 0.00019820118355089915, "lm_loss": 0.01080322265625, "loss": 0.0079, "step": 1479, "total_loss": 0.01080322265625 }, { "epoch": 0.61, "learning_rate": 0.00019819875758332595, "lm_loss": 0.0079345703125, "loss": 0.01, "step": 1480, "total_loss": 0.0079345703125 }, { "epoch": 0.61, "learning_rate": 0.00019819632999583555, "lm_loss": 0.01116943359375, "loss": 0.0094, "step": 1481, "total_loss": 0.01116943359375 }, { "epoch": 0.61, "learning_rate": 0.00019819390078846792, "lm_loss": 0.00823974609375, "loss": 0.0098, "step": 1482, "total_loss": 0.00823974609375 }, { "epoch": 0.61, "learning_rate": 0.00019819146996126317, "lm_loss": 0.01165771484375, "loss": 0.0087, "step": 1483, "total_loss": 0.01165771484375 }, { "epoch": 0.61, "learning_rate": 0.00019818903751426135, "lm_loss": 0.0076904296875, "loss": 0.0081, "step": 1484, "total_loss": 0.0076904296875 }, { "epoch": 0.61, "learning_rate": 0.00019818660344750262, "lm_loss": 0.00170135498046875, "loss": 0.0098, "step": 1485, "total_loss": 0.00170135498046875 }, { "epoch": 0.61, "learning_rate": 0.00019818416776102714, "lm_loss": 0.00982666015625, "loss": 0.009, "step": 1486, "total_loss": 0.00982666015625 }, { "epoch": 0.61, "learning_rate": 0.00019818173045487507, "lm_loss": 0.00445556640625, "loss": 0.0079, "step": 1487, "total_loss": 0.00445556640625 }, { "epoch": 0.61, "learning_rate": 0.00019817929152908664, "lm_loss": 0.00506591796875, "loss": 0.0081, "step": 1488, "total_loss": 0.00506591796875 }, { "epoch": 0.61, "learning_rate": 0.00019817685098370204, "lm_loss": 0.01312255859375, "loss": 0.0102, "step": 1489, "total_loss": 0.01312255859375 }, { "epoch": 0.61, "learning_rate": 0.0001981744088187616, "lm_loss": 0.00567626953125, "loss": 0.0071, "step": 1490, "total_loss": 0.00567626953125 }, { "epoch": 0.61, "learning_rate": 0.00019817196503430556, "lm_loss": 0.021728515625, "loss": 0.009, "step": 1491, "total_loss": 0.021728515625 }, { "epoch": 0.61, "learning_rate": 0.0001981695196303742, "lm_loss": 0.01519775390625, "loss": 0.0075, "step": 1492, "total_loss": 0.01519775390625 }, { "epoch": 0.61, "learning_rate": 0.00019816707260700793, "lm_loss": 0.009765625, "loss": 0.0087, "step": 1493, "total_loss": 0.009765625 }, { "epoch": 0.61, "learning_rate": 0.00019816462396424707, "lm_loss": 0.01068115234375, "loss": 0.0111, "step": 1494, "total_loss": 0.01068115234375 }, { "epoch": 0.61, "learning_rate": 0.00019816217370213207, "lm_loss": 0.013427734375, "loss": 0.0088, "step": 1495, "total_loss": 0.013427734375 }, { "epoch": 0.61, "learning_rate": 0.00019815972182070328, "lm_loss": 0.003814697265625, "loss": 0.0099, "step": 1496, "total_loss": 0.003814697265625 }, { "epoch": 0.61, "learning_rate": 0.00019815726832000117, "lm_loss": 0.004669189453125, "loss": 0.0102, "step": 1497, "total_loss": 0.004669189453125 }, { "epoch": 0.61, "learning_rate": 0.0001981548132000662, "lm_loss": 0.00897216796875, "loss": 0.0091, "step": 1498, "total_loss": 0.00897216796875 }, { "epoch": 0.61, "learning_rate": 0.00019815235646093894, "lm_loss": 0.00482177734375, "loss": 0.008, "step": 1499, "total_loss": 0.00482177734375 }, { "epoch": 0.61, "learning_rate": 0.00019814989810265984, "lm_loss": 0.00933837890625, "loss": 0.0106, "step": 1500, "total_loss": 0.00933837890625 }, { "epoch": 0.61, "eval_lm_loss": 0.010354571975767612, "eval_loss": 0.010629158467054367, "eval_runtime": 43.9429, "eval_samples_per_second": 22.757, "eval_steps_per_second": 0.205, "eval_total_loss": 0.010354571975767612, "lm_loss": 0.00171661376953125, "step": 1500, "total_loss": 0.00171661376953125 }, { "epoch": 0.61, "learning_rate": 0.00019814743812526952, "lm_loss": 0.00811767578125, "loss": 0.0099, "step": 1501, "total_loss": 0.00811767578125 }, { "epoch": 0.61, "learning_rate": 0.0001981449765288085, "lm_loss": 0.01080322265625, "loss": 0.0095, "step": 1502, "total_loss": 0.01080322265625 }, { "epoch": 0.61, "learning_rate": 0.0001981425133133174, "lm_loss": 0.0101318359375, "loss": 0.0084, "step": 1503, "total_loss": 0.0101318359375 }, { "epoch": 0.61, "learning_rate": 0.00019814004847883685, "lm_loss": 0.0084228515625, "loss": 0.0069, "step": 1504, "total_loss": 0.0084228515625 }, { "epoch": 0.62, "learning_rate": 0.00019813758202540756, "lm_loss": 0.01385498046875, "loss": 0.0105, "step": 1505, "total_loss": 0.01385498046875 }, { "epoch": 0.62, "learning_rate": 0.00019813511395307015, "lm_loss": 0.007537841796875, "loss": 0.0091, "step": 1506, "total_loss": 0.007537841796875 }, { "epoch": 0.62, "learning_rate": 0.0001981326442618654, "lm_loss": 0.00799560546875, "loss": 0.0084, "step": 1507, "total_loss": 0.00799560546875 }, { "epoch": 0.62, "learning_rate": 0.00019813017295183398, "lm_loss": 0.0103759765625, "loss": 0.0094, "step": 1508, "total_loss": 0.0103759765625 }, { "epoch": 0.62, "learning_rate": 0.00019812770002301672, "lm_loss": 0.0089111328125, "loss": 0.0089, "step": 1509, "total_loss": 0.0089111328125 }, { "epoch": 0.62, "learning_rate": 0.00019812522547545437, "lm_loss": 0.006927490234375, "loss": 0.0078, "step": 1510, "total_loss": 0.006927490234375 }, { "epoch": 0.62, "learning_rate": 0.0001981227493091878, "lm_loss": 0.00433349609375, "loss": 0.0077, "step": 1511, "total_loss": 0.00433349609375 }, { "epoch": 0.62, "learning_rate": 0.0001981202715242578, "lm_loss": 0.009765625, "loss": 0.0095, "step": 1512, "total_loss": 0.009765625 }, { "epoch": 0.62, "learning_rate": 0.0001981177921207053, "lm_loss": 0.007293701171875, "loss": 0.0086, "step": 1513, "total_loss": 0.007293701171875 }, { "epoch": 0.62, "learning_rate": 0.00019811531109857112, "lm_loss": 0.00439453125, "loss": 0.0075, "step": 1514, "total_loss": 0.00439453125 }, { "epoch": 0.62, "learning_rate": 0.0001981128284578963, "lm_loss": 0.01129150390625, "loss": 0.0073, "step": 1515, "total_loss": 0.01129150390625 }, { "epoch": 0.62, "learning_rate": 0.0001981103441987217, "lm_loss": 0.0169677734375, "loss": 0.0083, "step": 1516, "total_loss": 0.0169677734375 }, { "epoch": 0.62, "learning_rate": 0.00019810785832108832, "lm_loss": 0.011474609375, "loss": 0.0102, "step": 1517, "total_loss": 0.011474609375 }, { "epoch": 0.62, "learning_rate": 0.00019810537082503725, "lm_loss": 0.00457763671875, "loss": 0.0084, "step": 1518, "total_loss": 0.00457763671875 }, { "epoch": 0.62, "learning_rate": 0.0001981028817106094, "lm_loss": 0.00665283203125, "loss": 0.0085, "step": 1519, "total_loss": 0.00665283203125 }, { "epoch": 0.62, "learning_rate": 0.00019810039097784593, "lm_loss": 0.005859375, "loss": 0.009, "step": 1520, "total_loss": 0.005859375 }, { "epoch": 0.62, "learning_rate": 0.00019809789862678786, "lm_loss": 0.006866455078125, "loss": 0.007, "step": 1521, "total_loss": 0.006866455078125 }, { "epoch": 0.62, "learning_rate": 0.0001980954046574763, "lm_loss": 0.00787353515625, "loss": 0.0091, "step": 1522, "total_loss": 0.00787353515625 }, { "epoch": 0.62, "learning_rate": 0.00019809290906995245, "lm_loss": 0.00909423828125, "loss": 0.0086, "step": 1523, "total_loss": 0.00909423828125 }, { "epoch": 0.62, "learning_rate": 0.00019809041186425748, "lm_loss": 0.0062255859375, "loss": 0.0078, "step": 1524, "total_loss": 0.0062255859375 }, { "epoch": 0.62, "learning_rate": 0.00019808791304043251, "lm_loss": 0.008544921875, "loss": 0.0118, "step": 1525, "total_loss": 0.008544921875 }, { "epoch": 0.62, "learning_rate": 0.00019808541259851885, "lm_loss": 0.010986328125, "loss": 0.0083, "step": 1526, "total_loss": 0.010986328125 }, { "epoch": 0.62, "learning_rate": 0.00019808291053855767, "lm_loss": 0.00799560546875, "loss": 0.0095, "step": 1527, "total_loss": 0.00799560546875 }, { "epoch": 0.62, "learning_rate": 0.00019808040686059033, "lm_loss": 0.00738525390625, "loss": 0.0083, "step": 1528, "total_loss": 0.00738525390625 }, { "epoch": 0.63, "learning_rate": 0.000198077901564658, "lm_loss": 0.0042724609375, "loss": 0.0089, "step": 1529, "total_loss": 0.0042724609375 }, { "epoch": 0.63, "learning_rate": 0.0001980753946508022, "lm_loss": 0.005706787109375, "loss": 0.0079, "step": 1530, "total_loss": 0.005706787109375 }, { "epoch": 0.63, "learning_rate": 0.00019807288611906407, "lm_loss": 0.00823974609375, "loss": 0.0099, "step": 1531, "total_loss": 0.00823974609375 }, { "epoch": 0.63, "learning_rate": 0.00019807037596948518, "lm_loss": 0.005340576171875, "loss": 0.0089, "step": 1532, "total_loss": 0.005340576171875 }, { "epoch": 0.63, "learning_rate": 0.00019806786420210678, "lm_loss": 0.00982666015625, "loss": 0.0093, "step": 1533, "total_loss": 0.00982666015625 }, { "epoch": 0.63, "learning_rate": 0.00019806535081697043, "lm_loss": 0.006866455078125, "loss": 0.0071, "step": 1534, "total_loss": 0.006866455078125 }, { "epoch": 0.63, "learning_rate": 0.0001980628358141175, "lm_loss": 0.014892578125, "loss": 0.0085, "step": 1535, "total_loss": 0.014892578125 }, { "epoch": 0.63, "learning_rate": 0.00019806031919358957, "lm_loss": 0.0079345703125, "loss": 0.0074, "step": 1536, "total_loss": 0.0079345703125 }, { "epoch": 0.63, "learning_rate": 0.00019805780095542806, "lm_loss": 0.004241943359375, "loss": 0.0064, "step": 1537, "total_loss": 0.004241943359375 }, { "epoch": 0.63, "learning_rate": 0.00019805528109967458, "lm_loss": 0.0019989013671875, "loss": 0.0128, "step": 1538, "total_loss": 0.0019989013671875 }, { "epoch": 0.63, "learning_rate": 0.00019805275962637064, "lm_loss": 0.009765625, "loss": 0.0094, "step": 1539, "total_loss": 0.009765625 }, { "epoch": 0.63, "learning_rate": 0.0001980502365355579, "lm_loss": 0.00958251953125, "loss": 0.0075, "step": 1540, "total_loss": 0.00958251953125 }, { "epoch": 0.63, "learning_rate": 0.00019804771182727792, "lm_loss": 0.00775146484375, "loss": 0.0084, "step": 1541, "total_loss": 0.00775146484375 }, { "epoch": 0.63, "learning_rate": 0.00019804518550157243, "lm_loss": 0.00927734375, "loss": 0.0085, "step": 1542, "total_loss": 0.00927734375 }, { "epoch": 0.63, "learning_rate": 0.000198042657558483, "lm_loss": 0.00347900390625, "loss": 0.0077, "step": 1543, "total_loss": 0.00347900390625 }, { "epoch": 0.63, "learning_rate": 0.00019804012799805142, "lm_loss": 0.0096435546875, "loss": 0.0092, "step": 1544, "total_loss": 0.0096435546875 }, { "epoch": 0.63, "learning_rate": 0.0001980375968203194, "lm_loss": 0.006591796875, "loss": 0.0086, "step": 1545, "total_loss": 0.006591796875 }, { "epoch": 0.63, "learning_rate": 0.00019803506402532866, "lm_loss": 0.0059814453125, "loss": 0.0072, "step": 1546, "total_loss": 0.0059814453125 }, { "epoch": 0.63, "learning_rate": 0.00019803252961312097, "lm_loss": 0.00506591796875, "loss": 0.009, "step": 1547, "total_loss": 0.00506591796875 }, { "epoch": 0.63, "learning_rate": 0.0001980299935837382, "lm_loss": 0.00946044921875, "loss": 0.0084, "step": 1548, "total_loss": 0.00946044921875 }, { "epoch": 0.63, "learning_rate": 0.00019802745593722217, "lm_loss": 0.01141357421875, "loss": 0.0081, "step": 1549, "total_loss": 0.01141357421875 }, { "epoch": 0.63, "learning_rate": 0.0001980249166736147, "lm_loss": 0.007476806640625, "loss": 0.0075, "step": 1550, "total_loss": 0.007476806640625 }, { "epoch": 0.63, "learning_rate": 0.00019802237579295771, "lm_loss": 0.006866455078125, "loss": 0.0087, "step": 1551, "total_loss": 0.006866455078125 }, { "epoch": 0.63, "learning_rate": 0.0001980198332952931, "lm_loss": 0.01025390625, "loss": 0.0074, "step": 1552, "total_loss": 0.01025390625 }, { "epoch": 0.63, "learning_rate": 0.00019801728918066283, "lm_loss": 0.005523681640625, "loss": 0.007, "step": 1553, "total_loss": 0.005523681640625 }, { "epoch": 0.64, "learning_rate": 0.00019801474344910885, "lm_loss": 0.00811767578125, "loss": 0.0089, "step": 1554, "total_loss": 0.00811767578125 }, { "epoch": 0.64, "learning_rate": 0.0001980121961006732, "lm_loss": 0.011474609375, "loss": 0.007, "step": 1555, "total_loss": 0.011474609375 }, { "epoch": 0.64, "learning_rate": 0.00019800964713539783, "lm_loss": 0.007415771484375, "loss": 0.008, "step": 1556, "total_loss": 0.007415771484375 }, { "epoch": 0.64, "learning_rate": 0.00019800709655332487, "lm_loss": 0.010498046875, "loss": 0.0079, "step": 1557, "total_loss": 0.010498046875 }, { "epoch": 0.64, "learning_rate": 0.00019800454435449632, "lm_loss": 0.010498046875, "loss": 0.0094, "step": 1558, "total_loss": 0.010498046875 }, { "epoch": 0.64, "learning_rate": 0.0001980019905389543, "lm_loss": 0.007354736328125, "loss": 0.0084, "step": 1559, "total_loss": 0.007354736328125 }, { "epoch": 0.64, "learning_rate": 0.00019799943510674097, "lm_loss": 0.00567626953125, "loss": 0.0078, "step": 1560, "total_loss": 0.00567626953125 }, { "epoch": 0.64, "learning_rate": 0.00019799687805789845, "lm_loss": 0.0107421875, "loss": 0.0113, "step": 1561, "total_loss": 0.0107421875 }, { "epoch": 0.64, "learning_rate": 0.00019799431939246896, "lm_loss": 0.0087890625, "loss": 0.0083, "step": 1562, "total_loss": 0.0087890625 }, { "epoch": 0.64, "learning_rate": 0.0001979917591104947, "lm_loss": 0.0030059814453125, "loss": 0.01, "step": 1563, "total_loss": 0.0030059814453125 }, { "epoch": 0.64, "learning_rate": 0.00019798919721201785, "lm_loss": 0.005859375, "loss": 0.011, "step": 1564, "total_loss": 0.005859375 }, { "epoch": 0.64, "learning_rate": 0.00019798663369708074, "lm_loss": 0.004791259765625, "loss": 0.008, "step": 1565, "total_loss": 0.004791259765625 }, { "epoch": 0.64, "learning_rate": 0.00019798406856572561, "lm_loss": 0.004791259765625, "loss": 0.0088, "step": 1566, "total_loss": 0.004791259765625 }, { "epoch": 0.64, "learning_rate": 0.00019798150181799483, "lm_loss": 0.01025390625, "loss": 0.0074, "step": 1567, "total_loss": 0.01025390625 }, { "epoch": 0.64, "learning_rate": 0.00019797893345393074, "lm_loss": 0.00173187255859375, "loss": 0.0086, "step": 1568, "total_loss": 0.00173187255859375 }, { "epoch": 0.64, "learning_rate": 0.0001979763634735756, "lm_loss": 0.017333984375, "loss": 0.0077, "step": 1569, "total_loss": 0.017333984375 }, { "epoch": 0.64, "learning_rate": 0.00019797379187697192, "lm_loss": 0.012939453125, "loss": 0.0112, "step": 1570, "total_loss": 0.012939453125 }, { "epoch": 0.64, "learning_rate": 0.0001979712186641621, "lm_loss": 0.01806640625, "loss": 0.0104, "step": 1571, "total_loss": 0.01806640625 }, { "epoch": 0.64, "learning_rate": 0.00019796864383518856, "lm_loss": 0.0098876953125, "loss": 0.0103, "step": 1572, "total_loss": 0.0098876953125 }, { "epoch": 0.64, "learning_rate": 0.00019796606739009377, "lm_loss": 0.0081787109375, "loss": 0.0075, "step": 1573, "total_loss": 0.0081787109375 }, { "epoch": 0.64, "learning_rate": 0.00019796348932892027, "lm_loss": 0.009765625, "loss": 0.0069, "step": 1574, "total_loss": 0.009765625 }, { "epoch": 0.64, "learning_rate": 0.00019796090965171057, "lm_loss": 0.006317138671875, "loss": 0.0091, "step": 1575, "total_loss": 0.006317138671875 }, { "epoch": 0.64, "learning_rate": 0.00019795832835850722, "lm_loss": 0.00335693359375, "loss": 0.0074, "step": 1576, "total_loss": 0.00335693359375 }, { "epoch": 0.64, "learning_rate": 0.0001979557454493528, "lm_loss": 0.007080078125, "loss": 0.0081, "step": 1577, "total_loss": 0.007080078125 }, { "epoch": 0.65, "learning_rate": 0.00019795316092428996, "lm_loss": 0.0040283203125, "loss": 0.0091, "step": 1578, "total_loss": 0.0040283203125 }, { "epoch": 0.65, "learning_rate": 0.00019795057478336125, "lm_loss": 0.00872802734375, "loss": 0.0086, "step": 1579, "total_loss": 0.00872802734375 }, { "epoch": 0.65, "learning_rate": 0.00019794798702660942, "lm_loss": 0.00799560546875, "loss": 0.0073, "step": 1580, "total_loss": 0.00799560546875 }, { "epoch": 0.65, "learning_rate": 0.00019794539765407708, "lm_loss": 0.0079345703125, "loss": 0.0075, "step": 1581, "total_loss": 0.0079345703125 }, { "epoch": 0.65, "learning_rate": 0.000197942806665807, "lm_loss": 0.00848388671875, "loss": 0.0094, "step": 1582, "total_loss": 0.00848388671875 }, { "epoch": 0.65, "learning_rate": 0.00019794021406184195, "lm_loss": 0.006622314453125, "loss": 0.0082, "step": 1583, "total_loss": 0.006622314453125 }, { "epoch": 0.65, "learning_rate": 0.0001979376198422246, "lm_loss": 0.00173187255859375, "loss": 0.0084, "step": 1584, "total_loss": 0.00173187255859375 }, { "epoch": 0.65, "learning_rate": 0.00019793502400699783, "lm_loss": 0.00848388671875, "loss": 0.0082, "step": 1585, "total_loss": 0.00848388671875 }, { "epoch": 0.65, "learning_rate": 0.00019793242655620445, "lm_loss": 0.00921630859375, "loss": 0.0095, "step": 1586, "total_loss": 0.00921630859375 }, { "epoch": 0.65, "learning_rate": 0.00019792982748988728, "lm_loss": 0.013671875, "loss": 0.0105, "step": 1587, "total_loss": 0.013671875 }, { "epoch": 0.65, "learning_rate": 0.00019792722680808916, "lm_loss": 0.00799560546875, "loss": 0.0072, "step": 1588, "total_loss": 0.00799560546875 }, { "epoch": 0.65, "learning_rate": 0.00019792462451085306, "lm_loss": 0.0125732421875, "loss": 0.0096, "step": 1589, "total_loss": 0.0125732421875 }, { "epoch": 0.65, "learning_rate": 0.00019792202059822192, "lm_loss": 0.00396728515625, "loss": 0.008, "step": 1590, "total_loss": 0.00396728515625 }, { "epoch": 0.65, "learning_rate": 0.00019791941507023864, "lm_loss": 0.005126953125, "loss": 0.009, "step": 1591, "total_loss": 0.005126953125 }, { "epoch": 0.65, "learning_rate": 0.00019791680792694623, "lm_loss": 0.01141357421875, "loss": 0.0091, "step": 1592, "total_loss": 0.01141357421875 }, { "epoch": 0.65, "learning_rate": 0.00019791419916838766, "lm_loss": 0.007568359375, "loss": 0.0091, "step": 1593, "total_loss": 0.007568359375 }, { "epoch": 0.65, "learning_rate": 0.000197911588794606, "lm_loss": 0.0081787109375, "loss": 0.0098, "step": 1594, "total_loss": 0.0081787109375 }, { "epoch": 0.65, "learning_rate": 0.00019790897680564432, "lm_loss": 0.00909423828125, "loss": 0.0095, "step": 1595, "total_loss": 0.00909423828125 }, { "epoch": 0.65, "learning_rate": 0.0001979063632015457, "lm_loss": 0.0159912109375, "loss": 0.0085, "step": 1596, "total_loss": 0.0159912109375 }, { "epoch": 0.65, "learning_rate": 0.00019790374798235325, "lm_loss": 0.00872802734375, "loss": 0.0085, "step": 1597, "total_loss": 0.00872802734375 }, { "epoch": 0.65, "learning_rate": 0.00019790113114811011, "lm_loss": 0.01031494140625, "loss": 0.0081, "step": 1598, "total_loss": 0.01031494140625 }, { "epoch": 0.65, "learning_rate": 0.00019789851269885945, "lm_loss": 0.00970458984375, "loss": 0.0068, "step": 1599, "total_loss": 0.00970458984375 }, { "epoch": 0.65, "learning_rate": 0.00019789589263464447, "lm_loss": 0.0107421875, "loss": 0.0091, "step": 1600, "total_loss": 0.0107421875 }, { "epoch": 0.65, "eval_lm_loss": 0.00995130930095911, "eval_loss": 0.010327949188649654, "eval_runtime": 43.9584, "eval_samples_per_second": 22.749, "eval_steps_per_second": 0.205, "eval_total_loss": 0.00995130930095911, "lm_loss": 0.00160980224609375, "step": 1600, "total_loss": 0.00160980224609375 }, { "epoch": 0.65, "learning_rate": 0.00019789327095550837, "lm_loss": 0.004638671875, "loss": 0.0076, "step": 1601, "total_loss": 0.004638671875 }, { "epoch": 0.65, "learning_rate": 0.0001978906476614944, "lm_loss": 0.0111083984375, "loss": 0.0077, "step": 1602, "total_loss": 0.0111083984375 }, { "epoch": 0.66, "learning_rate": 0.0001978880227526459, "lm_loss": 0.0107421875, "loss": 0.0097, "step": 1603, "total_loss": 0.0107421875 }, { "epoch": 0.66, "learning_rate": 0.00019788539622900606, "lm_loss": 0.022216796875, "loss": 0.0093, "step": 1604, "total_loss": 0.022216796875 }, { "epoch": 0.66, "learning_rate": 0.0001978827680906183, "lm_loss": 0.01239013671875, "loss": 0.009, "step": 1605, "total_loss": 0.01239013671875 }, { "epoch": 0.66, "learning_rate": 0.00019788013833752595, "lm_loss": 0.01068115234375, "loss": 0.0072, "step": 1606, "total_loss": 0.01068115234375 }, { "epoch": 0.66, "learning_rate": 0.00019787750696977234, "lm_loss": 0.0079345703125, "loss": 0.0079, "step": 1607, "total_loss": 0.0079345703125 }, { "epoch": 0.66, "learning_rate": 0.00019787487398740096, "lm_loss": 0.00506591796875, "loss": 0.0083, "step": 1608, "total_loss": 0.00506591796875 }, { "epoch": 0.66, "learning_rate": 0.00019787223939045522, "lm_loss": 0.0023040771484375, "loss": 0.0089, "step": 1609, "total_loss": 0.0023040771484375 }, { "epoch": 0.66, "learning_rate": 0.00019786960317897854, "lm_loss": 0.00872802734375, "loss": 0.0119, "step": 1610, "total_loss": 0.00872802734375 }, { "epoch": 0.66, "learning_rate": 0.00019786696535301442, "lm_loss": 0.006622314453125, "loss": 0.0119, "step": 1611, "total_loss": 0.006622314453125 }, { "epoch": 0.66, "learning_rate": 0.0001978643259126064, "lm_loss": 0.00469970703125, "loss": 0.0069, "step": 1612, "total_loss": 0.00469970703125 }, { "epoch": 0.66, "learning_rate": 0.00019786168485779802, "lm_loss": 0.01007080078125, "loss": 0.0073, "step": 1613, "total_loss": 0.01007080078125 }, { "epoch": 0.66, "learning_rate": 0.0001978590421886328, "lm_loss": 0.00750732421875, "loss": 0.0103, "step": 1614, "total_loss": 0.00750732421875 }, { "epoch": 0.66, "learning_rate": 0.00019785639790515443, "lm_loss": 0.01385498046875, "loss": 0.0097, "step": 1615, "total_loss": 0.01385498046875 }, { "epoch": 0.66, "learning_rate": 0.00019785375200740646, "lm_loss": 0.01019287109375, "loss": 0.0095, "step": 1616, "total_loss": 0.01019287109375 }, { "epoch": 0.66, "learning_rate": 0.00019785110449543254, "lm_loss": 0.004852294921875, "loss": 0.0093, "step": 1617, "total_loss": 0.004852294921875 }, { "epoch": 0.66, "learning_rate": 0.00019784845536927636, "lm_loss": 0.00634765625, "loss": 0.0092, "step": 1618, "total_loss": 0.00634765625 }, { "epoch": 0.66, "learning_rate": 0.0001978458046289816, "lm_loss": 0.009521484375, "loss": 0.0073, "step": 1619, "total_loss": 0.009521484375 }, { "epoch": 0.66, "learning_rate": 0.000197843152274592, "lm_loss": 0.01251220703125, "loss": 0.0073, "step": 1620, "total_loss": 0.01251220703125 }, { "epoch": 0.66, "learning_rate": 0.00019784049830615135, "lm_loss": 0.01312255859375, "loss": 0.0093, "step": 1621, "total_loss": 0.01312255859375 }, { "epoch": 0.66, "learning_rate": 0.00019783784272370337, "lm_loss": 0.005767822265625, "loss": 0.008, "step": 1622, "total_loss": 0.005767822265625 }, { "epoch": 0.66, "learning_rate": 0.00019783518552729193, "lm_loss": 0.006256103515625, "loss": 0.0086, "step": 1623, "total_loss": 0.006256103515625 }, { "epoch": 0.66, "learning_rate": 0.00019783252671696076, "lm_loss": 0.01214599609375, "loss": 0.01, "step": 1624, "total_loss": 0.01214599609375 }, { "epoch": 0.66, "learning_rate": 0.00019782986629275385, "lm_loss": 0.0087890625, "loss": 0.0102, "step": 1625, "total_loss": 0.0087890625 }, { "epoch": 0.66, "learning_rate": 0.000197827204254715, "lm_loss": 0.00469970703125, "loss": 0.0083, "step": 1626, "total_loss": 0.00469970703125 }, { "epoch": 0.67, "learning_rate": 0.00019782454060288815, "lm_loss": 0.00933837890625, "loss": 0.0063, "step": 1627, "total_loss": 0.00933837890625 }, { "epoch": 0.67, "learning_rate": 0.00019782187533731727, "lm_loss": 0.007720947265625, "loss": 0.008, "step": 1628, "total_loss": 0.007720947265625 }, { "epoch": 0.67, "learning_rate": 0.00019781920845804627, "lm_loss": 0.0068359375, "loss": 0.0107, "step": 1629, "total_loss": 0.0068359375 }, { "epoch": 0.67, "learning_rate": 0.00019781653996511919, "lm_loss": 0.003448486328125, "loss": 0.0088, "step": 1630, "total_loss": 0.003448486328125 }, { "epoch": 0.67, "learning_rate": 0.00019781386985858, "lm_loss": 0.0108642578125, "loss": 0.0079, "step": 1631, "total_loss": 0.0108642578125 }, { "epoch": 0.67, "learning_rate": 0.0001978111981384728, "lm_loss": 0.01220703125, "loss": 0.0098, "step": 1632, "total_loss": 0.01220703125 }, { "epoch": 0.67, "learning_rate": 0.00019780852480484165, "lm_loss": 0.00555419921875, "loss": 0.0066, "step": 1633, "total_loss": 0.00555419921875 }, { "epoch": 0.67, "learning_rate": 0.00019780584985773064, "lm_loss": 0.004150390625, "loss": 0.0098, "step": 1634, "total_loss": 0.004150390625 }, { "epoch": 0.67, "learning_rate": 0.00019780317329718387, "lm_loss": 0.005126953125, "loss": 0.0082, "step": 1635, "total_loss": 0.005126953125 }, { "epoch": 0.67, "learning_rate": 0.00019780049512324555, "lm_loss": 0.007568359375, "loss": 0.0075, "step": 1636, "total_loss": 0.007568359375 }, { "epoch": 0.67, "learning_rate": 0.00019779781533595982, "lm_loss": 0.01055908203125, "loss": 0.0082, "step": 1637, "total_loss": 0.01055908203125 }, { "epoch": 0.67, "learning_rate": 0.00019779513393537092, "lm_loss": 0.00823974609375, "loss": 0.0072, "step": 1638, "total_loss": 0.00823974609375 }, { "epoch": 0.67, "learning_rate": 0.00019779245092152304, "lm_loss": 0.00457763671875, "loss": 0.0101, "step": 1639, "total_loss": 0.00457763671875 }, { "epoch": 0.67, "learning_rate": 0.00019778976629446048, "lm_loss": 0.0042724609375, "loss": 0.0079, "step": 1640, "total_loss": 0.0042724609375 }, { "epoch": 0.67, "learning_rate": 0.0001977870800542275, "lm_loss": 0.00799560546875, "loss": 0.0089, "step": 1641, "total_loss": 0.00799560546875 }, { "epoch": 0.67, "learning_rate": 0.0001977843922008684, "lm_loss": 0.007354736328125, "loss": 0.0087, "step": 1642, "total_loss": 0.007354736328125 }, { "epoch": 0.67, "learning_rate": 0.00019778170273442757, "lm_loss": 0.01025390625, "loss": 0.0075, "step": 1643, "total_loss": 0.01025390625 }, { "epoch": 0.67, "learning_rate": 0.00019777901165494934, "lm_loss": 0.01177978515625, "loss": 0.0091, "step": 1644, "total_loss": 0.01177978515625 }, { "epoch": 0.67, "learning_rate": 0.00019777631896247813, "lm_loss": 0.00174713134765625, "loss": 0.0072, "step": 1645, "total_loss": 0.00174713134765625 }, { "epoch": 0.67, "learning_rate": 0.0001977736246570583, "lm_loss": 0.004974365234375, "loss": 0.0094, "step": 1646, "total_loss": 0.004974365234375 }, { "epoch": 0.67, "learning_rate": 0.00019777092873873438, "lm_loss": 0.0093994140625, "loss": 0.0106, "step": 1647, "total_loss": 0.0093994140625 }, { "epoch": 0.67, "learning_rate": 0.00019776823120755077, "lm_loss": 0.00787353515625, "loss": 0.0081, "step": 1648, "total_loss": 0.00787353515625 }, { "epoch": 0.67, "learning_rate": 0.000197765532063552, "lm_loss": 0.0022125244140625, "loss": 0.0087, "step": 1649, "total_loss": 0.0022125244140625 }, { "epoch": 0.67, "learning_rate": 0.00019776283130678262, "lm_loss": 0.01031494140625, "loss": 0.0085, "step": 1650, "total_loss": 0.01031494140625 }, { "epoch": 0.67, "learning_rate": 0.0001977601289372871, "lm_loss": 0.0093994140625, "loss": 0.0071, "step": 1651, "total_loss": 0.0093994140625 }, { "epoch": 0.68, "learning_rate": 0.00019775742495511015, "lm_loss": 0.01422119140625, "loss": 0.0086, "step": 1652, "total_loss": 0.01422119140625 }, { "epoch": 0.68, "learning_rate": 0.00019775471936029623, "lm_loss": 0.001617431640625, "loss": 0.0077, "step": 1653, "total_loss": 0.001617431640625 }, { "epoch": 0.68, "learning_rate": 0.00019775201215289006, "lm_loss": 0.0089111328125, "loss": 0.009, "step": 1654, "total_loss": 0.0089111328125 }, { "epoch": 0.68, "learning_rate": 0.00019774930333293632, "lm_loss": 0.0045166015625, "loss": 0.0073, "step": 1655, "total_loss": 0.0045166015625 }, { "epoch": 0.68, "learning_rate": 0.00019774659290047963, "lm_loss": 0.005889892578125, "loss": 0.0081, "step": 1656, "total_loss": 0.005889892578125 }, { "epoch": 0.68, "learning_rate": 0.00019774388085556474, "lm_loss": 0.00921630859375, "loss": 0.0097, "step": 1657, "total_loss": 0.00921630859375 }, { "epoch": 0.68, "learning_rate": 0.00019774116719823638, "lm_loss": 0.00537109375, "loss": 0.0082, "step": 1658, "total_loss": 0.00537109375 }, { "epoch": 0.68, "learning_rate": 0.00019773845192853933, "lm_loss": 0.013427734375, "loss": 0.0096, "step": 1659, "total_loss": 0.013427734375 }, { "epoch": 0.68, "learning_rate": 0.00019773573504651832, "lm_loss": 0.005706787109375, "loss": 0.0096, "step": 1660, "total_loss": 0.005706787109375 }, { "epoch": 0.68, "learning_rate": 0.00019773301655221824, "lm_loss": 0.0147705078125, "loss": 0.0098, "step": 1661, "total_loss": 0.0147705078125 }, { "epoch": 0.68, "learning_rate": 0.00019773029644568394, "lm_loss": 0.0169677734375, "loss": 0.0112, "step": 1662, "total_loss": 0.0169677734375 }, { "epoch": 0.68, "learning_rate": 0.00019772757472696022, "lm_loss": 0.010009765625, "loss": 0.0088, "step": 1663, "total_loss": 0.010009765625 }, { "epoch": 0.68, "learning_rate": 0.00019772485139609203, "lm_loss": 0.00689697265625, "loss": 0.009, "step": 1664, "total_loss": 0.00689697265625 }, { "epoch": 0.68, "learning_rate": 0.0001977221264531243, "lm_loss": 0.0081787109375, "loss": 0.0081, "step": 1665, "total_loss": 0.0081787109375 }, { "epoch": 0.68, "learning_rate": 0.00019771939989810195, "lm_loss": 0.00579833984375, "loss": 0.0078, "step": 1666, "total_loss": 0.00579833984375 }, { "epoch": 0.68, "learning_rate": 0.00019771667173106998, "lm_loss": 0.00787353515625, "loss": 0.0066, "step": 1667, "total_loss": 0.00787353515625 }, { "epoch": 0.68, "learning_rate": 0.0001977139419520734, "lm_loss": 0.01123046875, "loss": 0.0078, "step": 1668, "total_loss": 0.01123046875 }, { "epoch": 0.68, "learning_rate": 0.0001977112105611572, "lm_loss": 0.00634765625, "loss": 0.0072, "step": 1669, "total_loss": 0.00634765625 }, { "epoch": 0.68, "learning_rate": 0.0001977084775583665, "lm_loss": 0.010009765625, "loss": 0.0083, "step": 1670, "total_loss": 0.010009765625 }, { "epoch": 0.68, "learning_rate": 0.00019770574294374632, "lm_loss": 0.00531005859375, "loss": 0.0088, "step": 1671, "total_loss": 0.00531005859375 }, { "epoch": 0.68, "learning_rate": 0.00019770300671734185, "lm_loss": 0.0211181640625, "loss": 0.0086, "step": 1672, "total_loss": 0.0211181640625 }, { "epoch": 0.68, "learning_rate": 0.00019770026887919816, "lm_loss": 0.0216064453125, "loss": 0.0118, "step": 1673, "total_loss": 0.0216064453125 }, { "epoch": 0.68, "learning_rate": 0.00019769752942936042, "lm_loss": 0.0115966796875, "loss": 0.0113, "step": 1674, "total_loss": 0.0115966796875 }, { "epoch": 0.68, "learning_rate": 0.00019769478836787385, "lm_loss": 0.01043701171875, "loss": 0.0084, "step": 1675, "total_loss": 0.01043701171875 }, { "epoch": 0.69, "learning_rate": 0.00019769204569478367, "lm_loss": 0.0052490234375, "loss": 0.0096, "step": 1676, "total_loss": 0.0052490234375 }, { "epoch": 0.69, "learning_rate": 0.00019768930141013507, "lm_loss": 0.0126953125, "loss": 0.0099, "step": 1677, "total_loss": 0.0126953125 }, { "epoch": 0.69, "learning_rate": 0.0001976865555139734, "lm_loss": 0.0068359375, "loss": 0.0076, "step": 1678, "total_loss": 0.0068359375 }, { "epoch": 0.69, "learning_rate": 0.0001976838080063439, "lm_loss": 0.007598876953125, "loss": 0.0098, "step": 1679, "total_loss": 0.007598876953125 }, { "epoch": 0.69, "learning_rate": 0.0001976810588872919, "lm_loss": 0.005950927734375, "loss": 0.0085, "step": 1680, "total_loss": 0.005950927734375 }, { "epoch": 0.69, "learning_rate": 0.00019767830815686278, "lm_loss": 0.0086669921875, "loss": 0.0065, "step": 1681, "total_loss": 0.0086669921875 }, { "epoch": 0.69, "learning_rate": 0.00019767555581510186, "lm_loss": 0.01416015625, "loss": 0.0109, "step": 1682, "total_loss": 0.01416015625 }, { "epoch": 0.69, "learning_rate": 0.00019767280186205462, "lm_loss": 0.0137939453125, "loss": 0.0088, "step": 1683, "total_loss": 0.0137939453125 }, { "epoch": 0.69, "learning_rate": 0.0001976700462977664, "lm_loss": 0.00787353515625, "loss": 0.0088, "step": 1684, "total_loss": 0.00787353515625 }, { "epoch": 0.69, "learning_rate": 0.00019766728912228273, "lm_loss": 0.0103759765625, "loss": 0.0072, "step": 1685, "total_loss": 0.0103759765625 }, { "epoch": 0.69, "learning_rate": 0.0001976645303356491, "lm_loss": 0.00140380859375, "loss": 0.0112, "step": 1686, "total_loss": 0.00140380859375 }, { "epoch": 0.69, "learning_rate": 0.000197661769937911, "lm_loss": 0.00848388671875, "loss": 0.0067, "step": 1687, "total_loss": 0.00848388671875 }, { "epoch": 0.69, "learning_rate": 0.0001976590079291139, "lm_loss": 0.00482177734375, "loss": 0.0087, "step": 1688, "total_loss": 0.00482177734375 }, { "epoch": 0.69, "learning_rate": 0.00019765624430930344, "lm_loss": 0.004058837890625, "loss": 0.0075, "step": 1689, "total_loss": 0.004058837890625 }, { "epoch": 0.69, "learning_rate": 0.0001976534790785252, "lm_loss": 0.00677490234375, "loss": 0.0082, "step": 1690, "total_loss": 0.00677490234375 }, { "epoch": 0.69, "learning_rate": 0.00019765071223682478, "lm_loss": 0.005950927734375, "loss": 0.0069, "step": 1691, "total_loss": 0.005950927734375 }, { "epoch": 0.69, "learning_rate": 0.00019764794378424782, "lm_loss": 0.00994873046875, "loss": 0.0087, "step": 1692, "total_loss": 0.00994873046875 }, { "epoch": 0.69, "learning_rate": 0.00019764517372084002, "lm_loss": 0.00872802734375, "loss": 0.0068, "step": 1693, "total_loss": 0.00872802734375 }, { "epoch": 0.69, "learning_rate": 0.00019764240204664702, "lm_loss": 0.00970458984375, "loss": 0.0086, "step": 1694, "total_loss": 0.00970458984375 }, { "epoch": 0.69, "learning_rate": 0.0001976396287617146, "lm_loss": 0.003814697265625, "loss": 0.0089, "step": 1695, "total_loss": 0.003814697265625 }, { "epoch": 0.69, "learning_rate": 0.0001976368538660885, "lm_loss": 0.01226806640625, "loss": 0.01, "step": 1696, "total_loss": 0.01226806640625 }, { "epoch": 0.69, "learning_rate": 0.00019763407735981444, "lm_loss": 0.005859375, "loss": 0.0082, "step": 1697, "total_loss": 0.005859375 }, { "epoch": 0.69, "learning_rate": 0.0001976312992429383, "lm_loss": 0.00830078125, "loss": 0.0103, "step": 1698, "total_loss": 0.00830078125 }, { "epoch": 0.69, "learning_rate": 0.00019762851951550582, "lm_loss": 0.0057373046875, "loss": 0.0078, "step": 1699, "total_loss": 0.0057373046875 }, { "epoch": 0.7, "learning_rate": 0.00019762573817756293, "lm_loss": 0.0115966796875, "loss": 0.0094, "step": 1700, "total_loss": 0.0115966796875 }, { "epoch": 0.7, "eval_lm_loss": 0.01012678537517786, "eval_loss": 0.01052780169993639, "eval_runtime": 44.5508, "eval_samples_per_second": 22.446, "eval_steps_per_second": 0.202, "eval_total_loss": 0.01012678537517786, "lm_loss": 0.00080108642578125, "step": 1700, "total_loss": 0.00080108642578125 }, { "epoch": 0.7, "learning_rate": 0.00019762295522915549, "lm_loss": 0.00909423828125, "loss": 0.0085, "step": 1701, "total_loss": 0.00909423828125 }, { "epoch": 0.7, "learning_rate": 0.00019762017067032943, "lm_loss": 0.00396728515625, "loss": 0.0063, "step": 1702, "total_loss": 0.00396728515625 }, { "epoch": 0.7, "learning_rate": 0.00019761738450113062, "lm_loss": 0.0118408203125, "loss": 0.0082, "step": 1703, "total_loss": 0.0118408203125 }, { "epoch": 0.7, "learning_rate": 0.00019761459672160508, "lm_loss": 0.006439208984375, "loss": 0.0086, "step": 1704, "total_loss": 0.006439208984375 }, { "epoch": 0.7, "learning_rate": 0.0001976118073317988, "lm_loss": 0.009033203125, "loss": 0.0085, "step": 1705, "total_loss": 0.009033203125 }, { "epoch": 0.7, "learning_rate": 0.00019760901633175774, "lm_loss": 0.0115966796875, "loss": 0.0092, "step": 1706, "total_loss": 0.0115966796875 }, { "epoch": 0.7, "learning_rate": 0.000197606223721528, "lm_loss": 0.00982666015625, "loss": 0.0074, "step": 1707, "total_loss": 0.00982666015625 }, { "epoch": 0.7, "learning_rate": 0.00019760342950115561, "lm_loss": 0.01373291015625, "loss": 0.0092, "step": 1708, "total_loss": 0.01373291015625 }, { "epoch": 0.7, "learning_rate": 0.00019760063367068667, "lm_loss": 0.0057373046875, "loss": 0.0078, "step": 1709, "total_loss": 0.0057373046875 }, { "epoch": 0.7, "learning_rate": 0.00019759783623016732, "lm_loss": 0.002593994140625, "loss": 0.0061, "step": 1710, "total_loss": 0.002593994140625 }, { "epoch": 0.7, "learning_rate": 0.0001975950371796437, "lm_loss": 0.00921630859375, "loss": 0.0084, "step": 1711, "total_loss": 0.00921630859375 }, { "epoch": 0.7, "learning_rate": 0.000197592236519162, "lm_loss": 0.0120849609375, "loss": 0.0089, "step": 1712, "total_loss": 0.0120849609375 }, { "epoch": 0.7, "learning_rate": 0.0001975894342487684, "lm_loss": 0.00518798828125, "loss": 0.0067, "step": 1713, "total_loss": 0.00518798828125 }, { "epoch": 0.7, "learning_rate": 0.00019758663036850908, "lm_loss": 0.004974365234375, "loss": 0.0085, "step": 1714, "total_loss": 0.004974365234375 }, { "epoch": 0.7, "learning_rate": 0.00019758382487843038, "lm_loss": 0.0096435546875, "loss": 0.0071, "step": 1715, "total_loss": 0.0096435546875 }, { "epoch": 0.7, "learning_rate": 0.00019758101777857853, "lm_loss": 0.012939453125, "loss": 0.009, "step": 1716, "total_loss": 0.012939453125 }, { "epoch": 0.7, "learning_rate": 0.00019757820906899986, "lm_loss": 0.01116943359375, "loss": 0.0102, "step": 1717, "total_loss": 0.01116943359375 }, { "epoch": 0.7, "learning_rate": 0.00019757539874974068, "lm_loss": 0.00714111328125, "loss": 0.0094, "step": 1718, "total_loss": 0.00714111328125 }, { "epoch": 0.7, "learning_rate": 0.0001975725868208474, "lm_loss": 0.00537109375, "loss": 0.0086, "step": 1719, "total_loss": 0.00537109375 }, { "epoch": 0.7, "learning_rate": 0.0001975697732823663, "lm_loss": 0.01220703125, "loss": 0.01, "step": 1720, "total_loss": 0.01220703125 }, { "epoch": 0.7, "learning_rate": 0.0001975669581343439, "lm_loss": 0.00360107421875, "loss": 0.0091, "step": 1721, "total_loss": 0.00360107421875 }, { "epoch": 0.7, "learning_rate": 0.00019756414137682665, "lm_loss": 0.010986328125, "loss": 0.0103, "step": 1722, "total_loss": 0.010986328125 }, { "epoch": 0.7, "learning_rate": 0.0001975613230098609, "lm_loss": 0.00732421875, "loss": 0.01, "step": 1723, "total_loss": 0.00732421875 }, { "epoch": 0.7, "learning_rate": 0.00019755850303349322, "lm_loss": 0.021728515625, "loss": 0.0082, "step": 1724, "total_loss": 0.021728515625 }, { "epoch": 0.71, "learning_rate": 0.00019755568144777015, "lm_loss": 0.006683349609375, "loss": 0.0078, "step": 1725, "total_loss": 0.006683349609375 }, { "epoch": 0.71, "learning_rate": 0.00019755285825273817, "lm_loss": 0.0087890625, "loss": 0.007, "step": 1726, "total_loss": 0.0087890625 }, { "epoch": 0.71, "learning_rate": 0.0001975500334484439, "lm_loss": 0.01287841796875, "loss": 0.0091, "step": 1727, "total_loss": 0.01287841796875 }, { "epoch": 0.71, "learning_rate": 0.0001975472070349339, "lm_loss": 0.004241943359375, "loss": 0.0067, "step": 1728, "total_loss": 0.004241943359375 }, { "epoch": 0.71, "learning_rate": 0.00019754437901225487, "lm_loss": 0.0040283203125, "loss": 0.0083, "step": 1729, "total_loss": 0.0040283203125 }, { "epoch": 0.71, "learning_rate": 0.0001975415493804534, "lm_loss": 0.0059814453125, "loss": 0.0085, "step": 1730, "total_loss": 0.0059814453125 }, { "epoch": 0.71, "learning_rate": 0.00019753871813957616, "lm_loss": 0.007720947265625, "loss": 0.0094, "step": 1731, "total_loss": 0.007720947265625 }, { "epoch": 0.71, "learning_rate": 0.0001975358852896699, "lm_loss": 0.007476806640625, "loss": 0.0077, "step": 1732, "total_loss": 0.007476806640625 }, { "epoch": 0.71, "learning_rate": 0.00019753305083078132, "lm_loss": 0.01116943359375, "loss": 0.0086, "step": 1733, "total_loss": 0.01116943359375 }, { "epoch": 0.71, "learning_rate": 0.0001975302147629572, "lm_loss": 0.0064697265625, "loss": 0.0071, "step": 1734, "total_loss": 0.0064697265625 }, { "epoch": 0.71, "learning_rate": 0.0001975273770862443, "lm_loss": 0.01129150390625, "loss": 0.0108, "step": 1735, "total_loss": 0.01129150390625 }, { "epoch": 0.71, "learning_rate": 0.00019752453780068945, "lm_loss": 0.0069580078125, "loss": 0.0093, "step": 1736, "total_loss": 0.0069580078125 }, { "epoch": 0.71, "learning_rate": 0.00019752169690633945, "lm_loss": 0.005889892578125, "loss": 0.0062, "step": 1737, "total_loss": 0.005889892578125 }, { "epoch": 0.71, "learning_rate": 0.00019751885440324123, "lm_loss": 0.0133056640625, "loss": 0.0099, "step": 1738, "total_loss": 0.0133056640625 }, { "epoch": 0.71, "learning_rate": 0.00019751601029144162, "lm_loss": 0.004180908203125, "loss": 0.0095, "step": 1739, "total_loss": 0.004180908203125 }, { "epoch": 0.71, "learning_rate": 0.00019751316457098755, "lm_loss": 0.0089111328125, "loss": 0.0108, "step": 1740, "total_loss": 0.0089111328125 }, { "epoch": 0.71, "learning_rate": 0.00019751031724192603, "lm_loss": 0.005218505859375, "loss": 0.0088, "step": 1741, "total_loss": 0.005218505859375 }, { "epoch": 0.71, "learning_rate": 0.00019750746830430395, "lm_loss": 0.006378173828125, "loss": 0.0081, "step": 1742, "total_loss": 0.006378173828125 }, { "epoch": 0.71, "learning_rate": 0.00019750461775816833, "lm_loss": 0.01025390625, "loss": 0.0077, "step": 1743, "total_loss": 0.01025390625 }, { "epoch": 0.71, "learning_rate": 0.0001975017656035662, "lm_loss": 0.005401611328125, "loss": 0.0099, "step": 1744, "total_loss": 0.005401611328125 }, { "epoch": 0.71, "learning_rate": 0.0001974989118405446, "lm_loss": 0.006195068359375, "loss": 0.0085, "step": 1745, "total_loss": 0.006195068359375 }, { "epoch": 0.71, "learning_rate": 0.00019749605646915064, "lm_loss": 0.0034332275390625, "loss": 0.0069, "step": 1746, "total_loss": 0.0034332275390625 }, { "epoch": 0.71, "learning_rate": 0.0001974931994894314, "lm_loss": 0.006591796875, "loss": 0.0079, "step": 1747, "total_loss": 0.006591796875 }, { "epoch": 0.71, "learning_rate": 0.00019749034090143396, "lm_loss": 0.0050048828125, "loss": 0.0069, "step": 1748, "total_loss": 0.0050048828125 }, { "epoch": 0.72, "learning_rate": 0.00019748748070520556, "lm_loss": 0.005523681640625, "loss": 0.0078, "step": 1749, "total_loss": 0.005523681640625 }, { "epoch": 0.72, "learning_rate": 0.00019748461890079332, "lm_loss": 0.01513671875, "loss": 0.01, "step": 1750, "total_loss": 0.01513671875 }, { "epoch": 0.72, "learning_rate": 0.0001974817554882445, "lm_loss": 0.00982666015625, "loss": 0.0101, "step": 1751, "total_loss": 0.00982666015625 }, { "epoch": 0.72, "learning_rate": 0.0001974788904676063, "lm_loss": 0.01092529296875, "loss": 0.0077, "step": 1752, "total_loss": 0.01092529296875 }, { "epoch": 0.72, "learning_rate": 0.00019747602383892598, "lm_loss": 0.01007080078125, "loss": 0.0103, "step": 1753, "total_loss": 0.01007080078125 }, { "epoch": 0.72, "learning_rate": 0.00019747315560225088, "lm_loss": 0.0018768310546875, "loss": 0.0085, "step": 1754, "total_loss": 0.0018768310546875 }, { "epoch": 0.72, "learning_rate": 0.00019747028575762824, "lm_loss": 0.0084228515625, "loss": 0.0096, "step": 1755, "total_loss": 0.0084228515625 }, { "epoch": 0.72, "learning_rate": 0.00019746741430510544, "lm_loss": 0.0106201171875, "loss": 0.009, "step": 1756, "total_loss": 0.0106201171875 }, { "epoch": 0.72, "learning_rate": 0.0001974645412447299, "lm_loss": 0.00396728515625, "loss": 0.0101, "step": 1757, "total_loss": 0.00396728515625 }, { "epoch": 0.72, "learning_rate": 0.0001974616665765489, "lm_loss": 0.0106201171875, "loss": 0.0072, "step": 1758, "total_loss": 0.0106201171875 }, { "epoch": 0.72, "learning_rate": 0.00019745879030060994, "lm_loss": 0.007049560546875, "loss": 0.0086, "step": 1759, "total_loss": 0.007049560546875 }, { "epoch": 0.72, "learning_rate": 0.00019745591241696045, "lm_loss": 0.005462646484375, "loss": 0.0079, "step": 1760, "total_loss": 0.005462646484375 }, { "epoch": 0.72, "learning_rate": 0.0001974530329256479, "lm_loss": 0.0091552734375, "loss": 0.0095, "step": 1761, "total_loss": 0.0091552734375 }, { "epoch": 0.72, "learning_rate": 0.00019745015182671985, "lm_loss": 0.0146484375, "loss": 0.01, "step": 1762, "total_loss": 0.0146484375 }, { "epoch": 0.72, "learning_rate": 0.00019744726912022373, "lm_loss": 0.003570556640625, "loss": 0.0084, "step": 1763, "total_loss": 0.003570556640625 }, { "epoch": 0.72, "learning_rate": 0.00019744438480620712, "lm_loss": 0.0098876953125, "loss": 0.0099, "step": 1764, "total_loss": 0.0098876953125 }, { "epoch": 0.72, "learning_rate": 0.00019744149888471766, "lm_loss": 0.01031494140625, "loss": 0.0096, "step": 1765, "total_loss": 0.01031494140625 }, { "epoch": 0.72, "learning_rate": 0.0001974386113558029, "lm_loss": 0.010498046875, "loss": 0.0086, "step": 1766, "total_loss": 0.010498046875 }, { "epoch": 0.72, "learning_rate": 0.0001974357222195105, "lm_loss": 0.01385498046875, "loss": 0.0087, "step": 1767, "total_loss": 0.01385498046875 }, { "epoch": 0.72, "learning_rate": 0.0001974328314758881, "lm_loss": 0.00616455078125, "loss": 0.0068, "step": 1768, "total_loss": 0.00616455078125 }, { "epoch": 0.72, "learning_rate": 0.00019742993912498337, "lm_loss": 0.00439453125, "loss": 0.0076, "step": 1769, "total_loss": 0.00439453125 }, { "epoch": 0.72, "learning_rate": 0.0001974270451668441, "lm_loss": 0.006378173828125, "loss": 0.0076, "step": 1770, "total_loss": 0.006378173828125 }, { "epoch": 0.72, "learning_rate": 0.00019742414960151794, "lm_loss": 0.00897216796875, "loss": 0.0087, "step": 1771, "total_loss": 0.00897216796875 }, { "epoch": 0.72, "learning_rate": 0.0001974212524290527, "lm_loss": 0.00677490234375, "loss": 0.0073, "step": 1772, "total_loss": 0.00677490234375 }, { "epoch": 0.72, "learning_rate": 0.0001974183536494962, "lm_loss": 0.00634765625, "loss": 0.0082, "step": 1773, "total_loss": 0.00634765625 }, { "epoch": 0.73, "learning_rate": 0.00019741545326289618, "lm_loss": 0.010498046875, "loss": 0.0078, "step": 1774, "total_loss": 0.010498046875 }, { "epoch": 0.73, "learning_rate": 0.00019741255126930058, "lm_loss": 0.00750732421875, "loss": 0.0093, "step": 1775, "total_loss": 0.00750732421875 }, { "epoch": 0.73, "learning_rate": 0.0001974096476687572, "lm_loss": 0.0115966796875, "loss": 0.0101, "step": 1776, "total_loss": 0.0115966796875 }, { "epoch": 0.73, "learning_rate": 0.00019740674246131396, "lm_loss": 0.0078125, "loss": 0.0087, "step": 1777, "total_loss": 0.0078125 }, { "epoch": 0.73, "learning_rate": 0.0001974038356470188, "lm_loss": 0.004638671875, "loss": 0.0068, "step": 1778, "total_loss": 0.004638671875 }, { "epoch": 0.73, "learning_rate": 0.00019740092722591966, "lm_loss": 0.0120849609375, "loss": 0.0088, "step": 1779, "total_loss": 0.0120849609375 }, { "epoch": 0.73, "learning_rate": 0.0001973980171980645, "lm_loss": 0.0137939453125, "loss": 0.0096, "step": 1780, "total_loss": 0.0137939453125 }, { "epoch": 0.73, "learning_rate": 0.00019739510556350136, "lm_loss": 0.007415771484375, "loss": 0.0069, "step": 1781, "total_loss": 0.007415771484375 }, { "epoch": 0.73, "learning_rate": 0.00019739219232227826, "lm_loss": 0.01080322265625, "loss": 0.008, "step": 1782, "total_loss": 0.01080322265625 }, { "epoch": 0.73, "learning_rate": 0.00019738927747444323, "lm_loss": 0.01226806640625, "loss": 0.0082, "step": 1783, "total_loss": 0.01226806640625 }, { "epoch": 0.73, "learning_rate": 0.00019738636102004442, "lm_loss": 0.00506591796875, "loss": 0.0097, "step": 1784, "total_loss": 0.00506591796875 }, { "epoch": 0.73, "learning_rate": 0.00019738344295912988, "lm_loss": 0.00653076171875, "loss": 0.0089, "step": 1785, "total_loss": 0.00653076171875 }, { "epoch": 0.73, "learning_rate": 0.00019738052329174775, "lm_loss": 0.00732421875, "loss": 0.01, "step": 1786, "total_loss": 0.00732421875 }, { "epoch": 0.73, "learning_rate": 0.00019737760201794623, "lm_loss": 0.004119873046875, "loss": 0.0065, "step": 1787, "total_loss": 0.004119873046875 }, { "epoch": 0.73, "learning_rate": 0.00019737467913777347, "lm_loss": 0.007293701171875, "loss": 0.007, "step": 1788, "total_loss": 0.007293701171875 }, { "epoch": 0.73, "learning_rate": 0.0001973717546512777, "lm_loss": 0.007049560546875, "loss": 0.0077, "step": 1789, "total_loss": 0.007049560546875 }, { "epoch": 0.73, "learning_rate": 0.0001973688285585072, "lm_loss": 0.006927490234375, "loss": 0.0074, "step": 1790, "total_loss": 0.006927490234375 }, { "epoch": 0.73, "learning_rate": 0.00019736590085951016, "lm_loss": 0.01263427734375, "loss": 0.0088, "step": 1791, "total_loss": 0.01263427734375 }, { "epoch": 0.73, "learning_rate": 0.00019736297155433495, "lm_loss": 0.0078125, "loss": 0.0086, "step": 1792, "total_loss": 0.0078125 }, { "epoch": 0.73, "learning_rate": 0.00019736004064302987, "lm_loss": 0.004241943359375, "loss": 0.0075, "step": 1793, "total_loss": 0.004241943359375 }, { "epoch": 0.73, "learning_rate": 0.00019735710812564326, "lm_loss": 0.01397705078125, "loss": 0.0086, "step": 1794, "total_loss": 0.01397705078125 }, { "epoch": 0.73, "learning_rate": 0.00019735417400222353, "lm_loss": 0.0035858154296875, "loss": 0.0096, "step": 1795, "total_loss": 0.0035858154296875 }, { "epoch": 0.73, "learning_rate": 0.000197351238272819, "lm_loss": 0.006988525390625, "loss": 0.0074, "step": 1796, "total_loss": 0.006988525390625 }, { "epoch": 0.73, "learning_rate": 0.0001973483009374782, "lm_loss": 0.011962890625, "loss": 0.0093, "step": 1797, "total_loss": 0.011962890625 }, { "epoch": 0.74, "learning_rate": 0.00019734536199624955, "lm_loss": 0.01190185546875, "loss": 0.0091, "step": 1798, "total_loss": 0.01190185546875 }, { "epoch": 0.74, "learning_rate": 0.00019734242144918147, "lm_loss": 0.0025634765625, "loss": 0.0086, "step": 1799, "total_loss": 0.0025634765625 }, { "epoch": 0.74, "learning_rate": 0.00019733947929632254, "lm_loss": 0.0185546875, "loss": 0.0096, "step": 1800, "total_loss": 0.0185546875 }, { "epoch": 0.74, "eval_lm_loss": 0.010203767567873001, "eval_loss": 0.010550899431109428, "eval_runtime": 43.9434, "eval_samples_per_second": 22.757, "eval_steps_per_second": 0.205, "eval_total_loss": 0.010203767567873001, "lm_loss": 0.00128936767578125, "step": 1800, "total_loss": 0.00128936767578125 }, { "epoch": 0.74, "learning_rate": 0.00019733653553772126, "lm_loss": 0.010009765625, "loss": 0.0092, "step": 1801, "total_loss": 0.010009765625 }, { "epoch": 0.74, "learning_rate": 0.00019733359017342622, "lm_loss": 0.01483154296875, "loss": 0.0092, "step": 1802, "total_loss": 0.01483154296875 }, { "epoch": 0.74, "learning_rate": 0.000197330643203486, "lm_loss": 0.00946044921875, "loss": 0.0069, "step": 1803, "total_loss": 0.00946044921875 }, { "epoch": 0.74, "learning_rate": 0.0001973276946279492, "lm_loss": 0.01007080078125, "loss": 0.0091, "step": 1804, "total_loss": 0.01007080078125 }, { "epoch": 0.74, "learning_rate": 0.00019732474444686447, "lm_loss": 0.008056640625, "loss": 0.0083, "step": 1805, "total_loss": 0.008056640625 }, { "epoch": 0.74, "learning_rate": 0.00019732179266028046, "lm_loss": 0.006744384765625, "loss": 0.0083, "step": 1806, "total_loss": 0.006744384765625 }, { "epoch": 0.74, "learning_rate": 0.0001973188392682459, "lm_loss": 0.00787353515625, "loss": 0.0091, "step": 1807, "total_loss": 0.00787353515625 }, { "epoch": 0.74, "learning_rate": 0.00019731588427080948, "lm_loss": 0.00634765625, "loss": 0.0059, "step": 1808, "total_loss": 0.00634765625 }, { "epoch": 0.74, "learning_rate": 0.00019731292766801995, "lm_loss": 0.005889892578125, "loss": 0.006, "step": 1809, "total_loss": 0.005889892578125 }, { "epoch": 0.74, "learning_rate": 0.00019730996945992605, "lm_loss": 0.00677490234375, "loss": 0.0084, "step": 1810, "total_loss": 0.00677490234375 }, { "epoch": 0.74, "learning_rate": 0.00019730700964657664, "lm_loss": 0.00823974609375, "loss": 0.009, "step": 1811, "total_loss": 0.00823974609375 }, { "epoch": 0.74, "learning_rate": 0.00019730404822802054, "lm_loss": 0.0086669921875, "loss": 0.0088, "step": 1812, "total_loss": 0.0086669921875 }, { "epoch": 0.74, "learning_rate": 0.0001973010852043066, "lm_loss": 0.003082275390625, "loss": 0.0088, "step": 1813, "total_loss": 0.003082275390625 }, { "epoch": 0.74, "learning_rate": 0.00019729812057548367, "lm_loss": 0.006103515625, "loss": 0.008, "step": 1814, "total_loss": 0.006103515625 }, { "epoch": 0.74, "learning_rate": 0.00019729515434160064, "lm_loss": 0.004791259765625, "loss": 0.0088, "step": 1815, "total_loss": 0.004791259765625 }, { "epoch": 0.74, "learning_rate": 0.00019729218650270652, "lm_loss": 0.006378173828125, "loss": 0.0075, "step": 1816, "total_loss": 0.006378173828125 }, { "epoch": 0.74, "learning_rate": 0.00019728921705885018, "lm_loss": 0.007537841796875, "loss": 0.0079, "step": 1817, "total_loss": 0.007537841796875 }, { "epoch": 0.74, "learning_rate": 0.00019728624601008066, "lm_loss": 0.005340576171875, "loss": 0.0082, "step": 1818, "total_loss": 0.005340576171875 }, { "epoch": 0.74, "learning_rate": 0.00019728327335644694, "lm_loss": 0.0135498046875, "loss": 0.0086, "step": 1819, "total_loss": 0.0135498046875 }, { "epoch": 0.74, "learning_rate": 0.0001972802990979981, "lm_loss": 0.006134033203125, "loss": 0.0094, "step": 1820, "total_loss": 0.006134033203125 }, { "epoch": 0.74, "learning_rate": 0.00019727732323478314, "lm_loss": 0.01025390625, "loss": 0.0093, "step": 1821, "total_loss": 0.01025390625 }, { "epoch": 0.74, "learning_rate": 0.0001972743457668512, "lm_loss": 0.00787353515625, "loss": 0.0088, "step": 1822, "total_loss": 0.00787353515625 }, { "epoch": 0.75, "learning_rate": 0.00019727136669425144, "lm_loss": 0.007171630859375, "loss": 0.009, "step": 1823, "total_loss": 0.007171630859375 }, { "epoch": 0.75, "learning_rate": 0.00019726838601703288, "lm_loss": 0.01239013671875, "loss": 0.0079, "step": 1824, "total_loss": 0.01239013671875 }, { "epoch": 0.75, "learning_rate": 0.0001972654037352448, "lm_loss": 0.005523681640625, "loss": 0.0083, "step": 1825, "total_loss": 0.005523681640625 }, { "epoch": 0.75, "learning_rate": 0.00019726241984893635, "lm_loss": 0.01287841796875, "loss": 0.0104, "step": 1826, "total_loss": 0.01287841796875 }, { "epoch": 0.75, "learning_rate": 0.00019725943435815676, "lm_loss": 0.00970458984375, "loss": 0.0076, "step": 1827, "total_loss": 0.00970458984375 }, { "epoch": 0.75, "learning_rate": 0.00019725644726295528, "lm_loss": 0.01220703125, "loss": 0.0084, "step": 1828, "total_loss": 0.01220703125 }, { "epoch": 0.75, "learning_rate": 0.00019725345856338119, "lm_loss": 0.0098876953125, "loss": 0.0091, "step": 1829, "total_loss": 0.0098876953125 }, { "epoch": 0.75, "learning_rate": 0.00019725046825948376, "lm_loss": 0.0064697265625, "loss": 0.0078, "step": 1830, "total_loss": 0.0064697265625 }, { "epoch": 0.75, "learning_rate": 0.00019724747635131235, "lm_loss": 0.003875732421875, "loss": 0.0087, "step": 1831, "total_loss": 0.003875732421875 }, { "epoch": 0.75, "learning_rate": 0.00019724448283891635, "lm_loss": 0.01031494140625, "loss": 0.0078, "step": 1832, "total_loss": 0.01031494140625 }, { "epoch": 0.75, "learning_rate": 0.00019724148772234509, "lm_loss": 0.006256103515625, "loss": 0.0084, "step": 1833, "total_loss": 0.006256103515625 }, { "epoch": 0.75, "learning_rate": 0.00019723849100164795, "lm_loss": 0.0050048828125, "loss": 0.0077, "step": 1834, "total_loss": 0.0050048828125 }, { "epoch": 0.75, "learning_rate": 0.00019723549267687445, "lm_loss": 0.006591796875, "loss": 0.0088, "step": 1835, "total_loss": 0.006591796875 }, { "epoch": 0.75, "learning_rate": 0.00019723249274807402, "lm_loss": 0.00421142578125, "loss": 0.0091, "step": 1836, "total_loss": 0.00421142578125 }, { "epoch": 0.75, "learning_rate": 0.0001972294912152961, "lm_loss": 0.00946044921875, "loss": 0.0086, "step": 1837, "total_loss": 0.00946044921875 }, { "epoch": 0.75, "learning_rate": 0.00019722648807859026, "lm_loss": 0.006500244140625, "loss": 0.0072, "step": 1838, "total_loss": 0.006500244140625 }, { "epoch": 0.75, "learning_rate": 0.00019722348333800602, "lm_loss": 0.006439208984375, "loss": 0.0079, "step": 1839, "total_loss": 0.006439208984375 }, { "epoch": 0.75, "learning_rate": 0.00019722047699359294, "lm_loss": 0.004730224609375, "loss": 0.0082, "step": 1840, "total_loss": 0.004730224609375 }, { "epoch": 0.75, "learning_rate": 0.00019721746904540063, "lm_loss": 0.005859375, "loss": 0.0095, "step": 1841, "total_loss": 0.005859375 }, { "epoch": 0.75, "learning_rate": 0.0001972144594934787, "lm_loss": 0.0020294189453125, "loss": 0.0072, "step": 1842, "total_loss": 0.0020294189453125 }, { "epoch": 0.75, "learning_rate": 0.0001972114483378768, "lm_loss": 0.0107421875, "loss": 0.0087, "step": 1843, "total_loss": 0.0107421875 }, { "epoch": 0.75, "learning_rate": 0.00019720843557864462, "lm_loss": 0.0169677734375, "loss": 0.0087, "step": 1844, "total_loss": 0.0169677734375 }, { "epoch": 0.75, "learning_rate": 0.00019720542121583182, "lm_loss": 0.006103515625, "loss": 0.0079, "step": 1845, "total_loss": 0.006103515625 }, { "epoch": 0.75, "learning_rate": 0.00019720240524948816, "lm_loss": 0.01318359375, "loss": 0.0087, "step": 1846, "total_loss": 0.01318359375 }, { "epoch": 0.76, "learning_rate": 0.00019719938767966336, "lm_loss": 0.006683349609375, "loss": 0.0068, "step": 1847, "total_loss": 0.006683349609375 }, { "epoch": 0.76, "learning_rate": 0.0001971963685064072, "lm_loss": 0.01239013671875, "loss": 0.0094, "step": 1848, "total_loss": 0.01239013671875 }, { "epoch": 0.76, "learning_rate": 0.00019719334772976956, "lm_loss": 0.01190185546875, "loss": 0.0076, "step": 1849, "total_loss": 0.01190185546875 }, { "epoch": 0.76, "learning_rate": 0.00019719032534980016, "lm_loss": 0.00616455078125, "loss": 0.0093, "step": 1850, "total_loss": 0.00616455078125 }, { "epoch": 0.76, "learning_rate": 0.00019718730136654894, "lm_loss": 0.0128173828125, "loss": 0.0097, "step": 1851, "total_loss": 0.0128173828125 }, { "epoch": 0.76, "learning_rate": 0.00019718427578006576, "lm_loss": 0.00347900390625, "loss": 0.007, "step": 1852, "total_loss": 0.00347900390625 }, { "epoch": 0.76, "learning_rate": 0.00019718124859040054, "lm_loss": 0.0159912109375, "loss": 0.0091, "step": 1853, "total_loss": 0.0159912109375 }, { "epoch": 0.76, "learning_rate": 0.0001971782197976032, "lm_loss": 0.0032196044921875, "loss": 0.0077, "step": 1854, "total_loss": 0.0032196044921875 }, { "epoch": 0.76, "learning_rate": 0.00019717518940172367, "lm_loss": 0.00994873046875, "loss": 0.0077, "step": 1855, "total_loss": 0.00994873046875 }, { "epoch": 0.76, "learning_rate": 0.00019717215740281202, "lm_loss": 0.01434326171875, "loss": 0.0085, "step": 1856, "total_loss": 0.01434326171875 }, { "epoch": 0.76, "learning_rate": 0.00019716912380091822, "lm_loss": 0.0034637451171875, "loss": 0.0072, "step": 1857, "total_loss": 0.0034637451171875 }, { "epoch": 0.76, "learning_rate": 0.00019716608859609232, "lm_loss": 0.01129150390625, "loss": 0.007, "step": 1858, "total_loss": 0.01129150390625 }, { "epoch": 0.76, "learning_rate": 0.00019716305178838435, "lm_loss": 0.004058837890625, "loss": 0.0096, "step": 1859, "total_loss": 0.004058837890625 }, { "epoch": 0.76, "learning_rate": 0.0001971600133778445, "lm_loss": 0.01361083984375, "loss": 0.0082, "step": 1860, "total_loss": 0.01361083984375 }, { "epoch": 0.76, "learning_rate": 0.00019715697336452283, "lm_loss": 0.00836181640625, "loss": 0.0065, "step": 1861, "total_loss": 0.00836181640625 }, { "epoch": 0.76, "learning_rate": 0.0001971539317484695, "lm_loss": 0.01068115234375, "loss": 0.0077, "step": 1862, "total_loss": 0.01068115234375 }, { "epoch": 0.76, "learning_rate": 0.00019715088852973467, "lm_loss": 0.01116943359375, "loss": 0.0073, "step": 1863, "total_loss": 0.01116943359375 }, { "epoch": 0.76, "learning_rate": 0.00019714784370836855, "lm_loss": 0.0033721923828125, "loss": 0.0076, "step": 1864, "total_loss": 0.0033721923828125 }, { "epoch": 0.76, "learning_rate": 0.00019714479728442138, "lm_loss": 0.00714111328125, "loss": 0.0081, "step": 1865, "total_loss": 0.00714111328125 }, { "epoch": 0.76, "learning_rate": 0.0001971417492579434, "lm_loss": 0.007232666015625, "loss": 0.0057, "step": 1866, "total_loss": 0.007232666015625 }, { "epoch": 0.76, "learning_rate": 0.00019713869962898492, "lm_loss": 0.00469970703125, "loss": 0.0094, "step": 1867, "total_loss": 0.00469970703125 }, { "epoch": 0.76, "learning_rate": 0.00019713564839759622, "lm_loss": 0.005645751953125, "loss": 0.0115, "step": 1868, "total_loss": 0.005645751953125 }, { "epoch": 0.76, "learning_rate": 0.00019713259556382765, "lm_loss": 0.005126953125, "loss": 0.0079, "step": 1869, "total_loss": 0.005126953125 }, { "epoch": 0.76, "learning_rate": 0.00019712954112772956, "lm_loss": 0.00482177734375, "loss": 0.0082, "step": 1870, "total_loss": 0.00482177734375 }, { "epoch": 0.76, "learning_rate": 0.0001971264850893523, "lm_loss": 0.007232666015625, "loss": 0.0084, "step": 1871, "total_loss": 0.007232666015625 }, { "epoch": 0.77, "learning_rate": 0.00019712342744874635, "lm_loss": 0.0076904296875, "loss": 0.0078, "step": 1872, "total_loss": 0.0076904296875 }, { "epoch": 0.77, "learning_rate": 0.00019712036820596215, "lm_loss": 0.00579833984375, "loss": 0.0074, "step": 1873, "total_loss": 0.00579833984375 }, { "epoch": 0.77, "learning_rate": 0.00019711730736105007, "lm_loss": 0.0030670166015625, "loss": 0.0101, "step": 1874, "total_loss": 0.0030670166015625 }, { "epoch": 0.77, "learning_rate": 0.0001971142449140607, "lm_loss": 0.0115966796875, "loss": 0.0086, "step": 1875, "total_loss": 0.0115966796875 }, { "epoch": 0.77, "learning_rate": 0.00019711118086504457, "lm_loss": 0.0155029296875, "loss": 0.0077, "step": 1876, "total_loss": 0.0155029296875 }, { "epoch": 0.77, "learning_rate": 0.00019710811521405215, "lm_loss": 0.00628662109375, "loss": 0.0081, "step": 1877, "total_loss": 0.00628662109375 }, { "epoch": 0.77, "learning_rate": 0.00019710504796113405, "lm_loss": 0.008056640625, "loss": 0.0107, "step": 1878, "total_loss": 0.008056640625 }, { "epoch": 0.77, "learning_rate": 0.00019710197910634088, "lm_loss": 0.006683349609375, "loss": 0.0104, "step": 1879, "total_loss": 0.006683349609375 }, { "epoch": 0.77, "learning_rate": 0.00019709890864972322, "lm_loss": 0.0096435546875, "loss": 0.0091, "step": 1880, "total_loss": 0.0096435546875 }, { "epoch": 0.77, "learning_rate": 0.00019709583659133174, "lm_loss": 0.0184326171875, "loss": 0.0116, "step": 1881, "total_loss": 0.0184326171875 }, { "epoch": 0.77, "learning_rate": 0.0001970927629312172, "lm_loss": 0.005615234375, "loss": 0.0087, "step": 1882, "total_loss": 0.005615234375 }, { "epoch": 0.77, "learning_rate": 0.00019708968766943017, "lm_loss": 0.00640869140625, "loss": 0.0086, "step": 1883, "total_loss": 0.00640869140625 }, { "epoch": 0.77, "learning_rate": 0.00019708661080602147, "lm_loss": 0.00738525390625, "loss": 0.0083, "step": 1884, "total_loss": 0.00738525390625 }, { "epoch": 0.77, "learning_rate": 0.00019708353234104182, "lm_loss": 0.01055908203125, "loss": 0.0078, "step": 1885, "total_loss": 0.01055908203125 }, { "epoch": 0.77, "learning_rate": 0.000197080452274542, "lm_loss": 0.00860595703125, "loss": 0.0089, "step": 1886, "total_loss": 0.00860595703125 }, { "epoch": 0.77, "learning_rate": 0.00019707737060657287, "lm_loss": 0.01116943359375, "loss": 0.0086, "step": 1887, "total_loss": 0.01116943359375 }, { "epoch": 0.77, "learning_rate": 0.0001970742873371852, "lm_loss": 0.0081787109375, "loss": 0.0099, "step": 1888, "total_loss": 0.0081787109375 }, { "epoch": 0.77, "learning_rate": 0.0001970712024664299, "lm_loss": 0.003143310546875, "loss": 0.0082, "step": 1889, "total_loss": 0.003143310546875 }, { "epoch": 0.77, "learning_rate": 0.00019706811599435784, "lm_loss": 0.0057373046875, "loss": 0.0093, "step": 1890, "total_loss": 0.0057373046875 }, { "epoch": 0.77, "learning_rate": 0.00019706502792101992, "lm_loss": 0.004913330078125, "loss": 0.0072, "step": 1891, "total_loss": 0.004913330078125 }, { "epoch": 0.77, "learning_rate": 0.0001970619382464671, "lm_loss": 0.007659912109375, "loss": 0.0072, "step": 1892, "total_loss": 0.007659912109375 }, { "epoch": 0.77, "learning_rate": 0.00019705884697075035, "lm_loss": 0.00469970703125, "loss": 0.0065, "step": 1893, "total_loss": 0.00469970703125 }, { "epoch": 0.77, "learning_rate": 0.00019705575409392068, "lm_loss": 0.00616455078125, "loss": 0.0075, "step": 1894, "total_loss": 0.00616455078125 }, { "epoch": 0.77, "learning_rate": 0.00019705265961602906, "lm_loss": 0.004638671875, "loss": 0.0079, "step": 1895, "total_loss": 0.004638671875 }, { "epoch": 0.78, "learning_rate": 0.0001970495635371266, "lm_loss": 0.00274658203125, "loss": 0.0074, "step": 1896, "total_loss": 0.00274658203125 }, { "epoch": 0.78, "learning_rate": 0.00019704646585726434, "lm_loss": 0.00909423828125, "loss": 0.0086, "step": 1897, "total_loss": 0.00909423828125 }, { "epoch": 0.78, "learning_rate": 0.00019704336657649335, "lm_loss": 0.005279541015625, "loss": 0.0093, "step": 1898, "total_loss": 0.005279541015625 }, { "epoch": 0.78, "learning_rate": 0.00019704026569486484, "lm_loss": 0.01031494140625, "loss": 0.0071, "step": 1899, "total_loss": 0.01031494140625 }, { "epoch": 0.78, "learning_rate": 0.00019703716321242988, "lm_loss": 0.0022735595703125, "loss": 0.0075, "step": 1900, "total_loss": 0.0022735595703125 }, { "epoch": 0.78, "eval_lm_loss": 0.010396833531558514, "eval_loss": 0.010781650431454182, "eval_runtime": 43.9963, "eval_samples_per_second": 22.729, "eval_steps_per_second": 0.205, "eval_total_loss": 0.010396833531558514, "lm_loss": 0.0005950927734375, "step": 1900, "total_loss": 0.0005950927734375 }, { "epoch": 0.78, "learning_rate": 0.00019703405912923968, "lm_loss": 0.00750732421875, "loss": 0.0083, "step": 1901, "total_loss": 0.00750732421875 }, { "epoch": 0.78, "learning_rate": 0.0001970309534453455, "lm_loss": 0.01007080078125, "loss": 0.0086, "step": 1902, "total_loss": 0.01007080078125 }, { "epoch": 0.78, "learning_rate": 0.00019702784616079845, "lm_loss": 0.005828857421875, "loss": 0.0076, "step": 1903, "total_loss": 0.005828857421875 }, { "epoch": 0.78, "learning_rate": 0.00019702473727564992, "lm_loss": 0.0035552978515625, "loss": 0.0097, "step": 1904, "total_loss": 0.0035552978515625 }, { "epoch": 0.78, "learning_rate": 0.00019702162678995107, "lm_loss": 0.004669189453125, "loss": 0.0073, "step": 1905, "total_loss": 0.004669189453125 }, { "epoch": 0.78, "learning_rate": 0.00019701851470375334, "lm_loss": 0.01446533203125, "loss": 0.0099, "step": 1906, "total_loss": 0.01446533203125 }, { "epoch": 0.78, "learning_rate": 0.00019701540101710799, "lm_loss": 0.013427734375, "loss": 0.0089, "step": 1907, "total_loss": 0.013427734375 }, { "epoch": 0.78, "learning_rate": 0.0001970122857300664, "lm_loss": 0.007293701171875, "loss": 0.0063, "step": 1908, "total_loss": 0.007293701171875 }, { "epoch": 0.78, "learning_rate": 0.00019700916884267993, "lm_loss": 0.00616455078125, "loss": 0.0084, "step": 1909, "total_loss": 0.00616455078125 }, { "epoch": 0.78, "learning_rate": 0.00019700605035500007, "lm_loss": 0.005462646484375, "loss": 0.0065, "step": 1910, "total_loss": 0.005462646484375 }, { "epoch": 0.78, "learning_rate": 0.00019700293026707815, "lm_loss": 0.010498046875, "loss": 0.0088, "step": 1911, "total_loss": 0.010498046875 }, { "epoch": 0.78, "learning_rate": 0.00019699980857896578, "lm_loss": 0.0106201171875, "loss": 0.0093, "step": 1912, "total_loss": 0.0106201171875 }, { "epoch": 0.78, "learning_rate": 0.00019699668529071435, "lm_loss": 0.00567626953125, "loss": 0.0068, "step": 1913, "total_loss": 0.00567626953125 }, { "epoch": 0.78, "learning_rate": 0.00019699356040237545, "lm_loss": 0.00640869140625, "loss": 0.0072, "step": 1914, "total_loss": 0.00640869140625 }, { "epoch": 0.78, "learning_rate": 0.0001969904339140006, "lm_loss": 0.004486083984375, "loss": 0.0095, "step": 1915, "total_loss": 0.004486083984375 }, { "epoch": 0.78, "learning_rate": 0.00019698730582564134, "lm_loss": 0.006011962890625, "loss": 0.0076, "step": 1916, "total_loss": 0.006011962890625 }, { "epoch": 0.78, "learning_rate": 0.00019698417613734934, "lm_loss": 0.0025177001953125, "loss": 0.0087, "step": 1917, "total_loss": 0.0025177001953125 }, { "epoch": 0.78, "learning_rate": 0.00019698104484917617, "lm_loss": 0.00860595703125, "loss": 0.0079, "step": 1918, "total_loss": 0.00860595703125 }, { "epoch": 0.78, "learning_rate": 0.00019697791196117352, "lm_loss": 0.0123291015625, "loss": 0.0106, "step": 1919, "total_loss": 0.0123291015625 }, { "epoch": 0.78, "learning_rate": 0.00019697477747339305, "lm_loss": 0.018310546875, "loss": 0.0088, "step": 1920, "total_loss": 0.018310546875 }, { "epoch": 0.79, "learning_rate": 0.0001969716413858865, "lm_loss": 0.01025390625, "loss": 0.01, "step": 1921, "total_loss": 0.01025390625 }, { "epoch": 0.79, "learning_rate": 0.00019696850369870558, "lm_loss": 0.01043701171875, "loss": 0.0089, "step": 1922, "total_loss": 0.01043701171875 }, { "epoch": 0.79, "learning_rate": 0.000196965364411902, "lm_loss": 0.01177978515625, "loss": 0.0105, "step": 1923, "total_loss": 0.01177978515625 }, { "epoch": 0.79, "learning_rate": 0.00019696222352552764, "lm_loss": 0.0093994140625, "loss": 0.0073, "step": 1924, "total_loss": 0.0093994140625 }, { "epoch": 0.79, "learning_rate": 0.0001969590810396343, "lm_loss": 0.01190185546875, "loss": 0.0096, "step": 1925, "total_loss": 0.01190185546875 }, { "epoch": 0.79, "learning_rate": 0.00019695593695427376, "lm_loss": 0.0050048828125, "loss": 0.0082, "step": 1926, "total_loss": 0.0050048828125 }, { "epoch": 0.79, "learning_rate": 0.00019695279126949792, "lm_loss": 0.0115966796875, "loss": 0.01, "step": 1927, "total_loss": 0.0115966796875 }, { "epoch": 0.79, "learning_rate": 0.00019694964398535864, "lm_loss": 0.011474609375, "loss": 0.0071, "step": 1928, "total_loss": 0.011474609375 }, { "epoch": 0.79, "learning_rate": 0.0001969464951019079, "lm_loss": 0.00946044921875, "loss": 0.0086, "step": 1929, "total_loss": 0.00946044921875 }, { "epoch": 0.79, "learning_rate": 0.00019694334461919764, "lm_loss": 0.00860595703125, "loss": 0.0078, "step": 1930, "total_loss": 0.00860595703125 }, { "epoch": 0.79, "learning_rate": 0.00019694019253727976, "lm_loss": 0.0084228515625, "loss": 0.0073, "step": 1931, "total_loss": 0.0084228515625 }, { "epoch": 0.79, "learning_rate": 0.0001969370388562063, "lm_loss": 0.00787353515625, "loss": 0.0089, "step": 1932, "total_loss": 0.00787353515625 }, { "epoch": 0.79, "learning_rate": 0.0001969338835760293, "lm_loss": 0.0034637451171875, "loss": 0.0082, "step": 1933, "total_loss": 0.0034637451171875 }, { "epoch": 0.79, "learning_rate": 0.0001969307266968008, "lm_loss": 0.006622314453125, "loss": 0.0086, "step": 1934, "total_loss": 0.006622314453125 }, { "epoch": 0.79, "learning_rate": 0.00019692756821857287, "lm_loss": 0.0036163330078125, "loss": 0.0072, "step": 1935, "total_loss": 0.0036163330078125 }, { "epoch": 0.79, "learning_rate": 0.00019692440814139762, "lm_loss": 0.01165771484375, "loss": 0.0099, "step": 1936, "total_loss": 0.01165771484375 }, { "epoch": 0.79, "learning_rate": 0.00019692124646532717, "lm_loss": 0.007293701171875, "loss": 0.007, "step": 1937, "total_loss": 0.007293701171875 }, { "epoch": 0.79, "learning_rate": 0.0001969180831904137, "lm_loss": 0.00579833984375, "loss": 0.0077, "step": 1938, "total_loss": 0.00579833984375 }, { "epoch": 0.79, "learning_rate": 0.00019691491831670934, "lm_loss": 0.0185546875, "loss": 0.0086, "step": 1939, "total_loss": 0.0185546875 }, { "epoch": 0.79, "learning_rate": 0.00019691175184426634, "lm_loss": 0.0101318359375, "loss": 0.0087, "step": 1940, "total_loss": 0.0101318359375 }, { "epoch": 0.79, "learning_rate": 0.00019690858377313693, "lm_loss": 0.00860595703125, "loss": 0.0089, "step": 1941, "total_loss": 0.00860595703125 }, { "epoch": 0.79, "learning_rate": 0.0001969054141033734, "lm_loss": 0.0140380859375, "loss": 0.0087, "step": 1942, "total_loss": 0.0140380859375 }, { "epoch": 0.79, "learning_rate": 0.00019690224283502798, "lm_loss": 0.00701904296875, "loss": 0.0082, "step": 1943, "total_loss": 0.00701904296875 }, { "epoch": 0.79, "learning_rate": 0.00019689906996815302, "lm_loss": 0.006072998046875, "loss": 0.0065, "step": 1944, "total_loss": 0.006072998046875 }, { "epoch": 0.8, "learning_rate": 0.00019689589550280086, "lm_loss": 0.0034332275390625, "loss": 0.0073, "step": 1945, "total_loss": 0.0034332275390625 }, { "epoch": 0.8, "learning_rate": 0.00019689271943902384, "lm_loss": 0.013916015625, "loss": 0.0093, "step": 1946, "total_loss": 0.013916015625 }, { "epoch": 0.8, "learning_rate": 0.00019688954177687437, "lm_loss": 0.011474609375, "loss": 0.0088, "step": 1947, "total_loss": 0.011474609375 }, { "epoch": 0.8, "learning_rate": 0.0001968863625164049, "lm_loss": 0.0118408203125, "loss": 0.0097, "step": 1948, "total_loss": 0.0118408203125 }, { "epoch": 0.8, "learning_rate": 0.00019688318165766782, "lm_loss": 0.0030517578125, "loss": 0.0091, "step": 1949, "total_loss": 0.0030517578125 }, { "epoch": 0.8, "learning_rate": 0.00019687999920071564, "lm_loss": 0.007568359375, "loss": 0.01, "step": 1950, "total_loss": 0.007568359375 }, { "epoch": 0.8, "learning_rate": 0.00019687681514560086, "lm_loss": 0.012451171875, "loss": 0.0107, "step": 1951, "total_loss": 0.012451171875 }, { "epoch": 0.8, "learning_rate": 0.000196873629492376, "lm_loss": 0.007568359375, "loss": 0.0108, "step": 1952, "total_loss": 0.007568359375 }, { "epoch": 0.8, "learning_rate": 0.00019687044224109358, "lm_loss": 0.004364013671875, "loss": 0.0087, "step": 1953, "total_loss": 0.004364013671875 }, { "epoch": 0.8, "learning_rate": 0.00019686725339180622, "lm_loss": 0.004302978515625, "loss": 0.007, "step": 1954, "total_loss": 0.004302978515625 }, { "epoch": 0.8, "learning_rate": 0.00019686406294456651, "lm_loss": 0.00958251953125, "loss": 0.0107, "step": 1955, "total_loss": 0.00958251953125 }, { "epoch": 0.8, "learning_rate": 0.00019686087089942707, "lm_loss": 0.008544921875, "loss": 0.0074, "step": 1956, "total_loss": 0.008544921875 }, { "epoch": 0.8, "learning_rate": 0.0001968576772564406, "lm_loss": 0.005584716796875, "loss": 0.0089, "step": 1957, "total_loss": 0.005584716796875 }, { "epoch": 0.8, "learning_rate": 0.0001968544820156597, "lm_loss": 0.01007080078125, "loss": 0.0091, "step": 1958, "total_loss": 0.01007080078125 }, { "epoch": 0.8, "learning_rate": 0.00019685128517713717, "lm_loss": 0.01171875, "loss": 0.0085, "step": 1959, "total_loss": 0.01171875 }, { "epoch": 0.8, "learning_rate": 0.00019684808674092568, "lm_loss": 0.01214599609375, "loss": 0.0093, "step": 1960, "total_loss": 0.01214599609375 }, { "epoch": 0.8, "learning_rate": 0.00019684488670707802, "lm_loss": 0.00933837890625, "loss": 0.0081, "step": 1961, "total_loss": 0.00933837890625 }, { "epoch": 0.8, "learning_rate": 0.00019684168507564696, "lm_loss": 0.011962890625, "loss": 0.011, "step": 1962, "total_loss": 0.011962890625 }, { "epoch": 0.8, "learning_rate": 0.0001968384818466854, "lm_loss": 0.0079345703125, "loss": 0.0083, "step": 1963, "total_loss": 0.0079345703125 }, { "epoch": 0.8, "learning_rate": 0.00019683527702024604, "lm_loss": 0.01348876953125, "loss": 0.01, "step": 1964, "total_loss": 0.01348876953125 }, { "epoch": 0.8, "learning_rate": 0.00019683207059638185, "lm_loss": 0.013916015625, "loss": 0.0089, "step": 1965, "total_loss": 0.013916015625 }, { "epoch": 0.8, "learning_rate": 0.0001968288625751457, "lm_loss": 0.00537109375, "loss": 0.0077, "step": 1966, "total_loss": 0.00537109375 }, { "epoch": 0.8, "learning_rate": 0.0001968256529565905, "lm_loss": 0.0067138671875, "loss": 0.0092, "step": 1967, "total_loss": 0.0067138671875 }, { "epoch": 0.8, "learning_rate": 0.0001968224417407692, "lm_loss": 0.01068115234375, "loss": 0.0076, "step": 1968, "total_loss": 0.01068115234375 }, { "epoch": 0.8, "learning_rate": 0.00019681922892773477, "lm_loss": 0.0059814453125, "loss": 0.0068, "step": 1969, "total_loss": 0.0059814453125 }, { "epoch": 0.81, "learning_rate": 0.0001968160145175402, "lm_loss": 0.00592041015625, "loss": 0.0086, "step": 1970, "total_loss": 0.00592041015625 }, { "epoch": 0.81, "learning_rate": 0.00019681279851023855, "lm_loss": 0.005218505859375, "loss": 0.0075, "step": 1971, "total_loss": 0.005218505859375 }, { "epoch": 0.81, "learning_rate": 0.00019680958090588285, "lm_loss": 0.0086669921875, "loss": 0.0086, "step": 1972, "total_loss": 0.0086669921875 }, { "epoch": 0.81, "learning_rate": 0.0001968063617045262, "lm_loss": 0.00958251953125, "loss": 0.0099, "step": 1973, "total_loss": 0.00958251953125 }, { "epoch": 0.81, "learning_rate": 0.00019680314090622165, "lm_loss": 0.01483154296875, "loss": 0.008, "step": 1974, "total_loss": 0.01483154296875 }, { "epoch": 0.81, "learning_rate": 0.0001967999185110224, "lm_loss": 0.0164794921875, "loss": 0.0103, "step": 1975, "total_loss": 0.0164794921875 }, { "epoch": 0.81, "learning_rate": 0.00019679669451898156, "lm_loss": 0.0059814453125, "loss": 0.0082, "step": 1976, "total_loss": 0.0059814453125 }, { "epoch": 0.81, "learning_rate": 0.00019679346893015234, "lm_loss": 0.00811767578125, "loss": 0.0098, "step": 1977, "total_loss": 0.00811767578125 }, { "epoch": 0.81, "learning_rate": 0.00019679024174458793, "lm_loss": 0.003570556640625, "loss": 0.0077, "step": 1978, "total_loss": 0.003570556640625 }, { "epoch": 0.81, "learning_rate": 0.0001967870129623416, "lm_loss": 0.007659912109375, "loss": 0.0065, "step": 1979, "total_loss": 0.007659912109375 }, { "epoch": 0.81, "learning_rate": 0.00019678378258346655, "lm_loss": 0.004119873046875, "loss": 0.0075, "step": 1980, "total_loss": 0.004119873046875 }, { "epoch": 0.81, "learning_rate": 0.00019678055060801613, "lm_loss": 0.011962890625, "loss": 0.0082, "step": 1981, "total_loss": 0.011962890625 }, { "epoch": 0.81, "learning_rate": 0.00019677731703604363, "lm_loss": 0.0057373046875, "loss": 0.0066, "step": 1982, "total_loss": 0.0057373046875 }, { "epoch": 0.81, "learning_rate": 0.0001967740818676024, "lm_loss": 0.00433349609375, "loss": 0.0082, "step": 1983, "total_loss": 0.00433349609375 }, { "epoch": 0.81, "learning_rate": 0.00019677084510274581, "lm_loss": 0.01031494140625, "loss": 0.0078, "step": 1984, "total_loss": 0.01031494140625 }, { "epoch": 0.81, "learning_rate": 0.00019676760674152725, "lm_loss": 0.0140380859375, "loss": 0.0101, "step": 1985, "total_loss": 0.0140380859375 }, { "epoch": 0.81, "learning_rate": 0.00019676436678400013, "lm_loss": 0.0074462890625, "loss": 0.0074, "step": 1986, "total_loss": 0.0074462890625 }, { "epoch": 0.81, "learning_rate": 0.00019676112523021793, "lm_loss": 0.00665283203125, "loss": 0.0086, "step": 1987, "total_loss": 0.00665283203125 }, { "epoch": 0.81, "learning_rate": 0.0001967578820802341, "lm_loss": 0.008056640625, "loss": 0.0081, "step": 1988, "total_loss": 0.008056640625 }, { "epoch": 0.81, "learning_rate": 0.0001967546373341021, "lm_loss": 0.01708984375, "loss": 0.0086, "step": 1989, "total_loss": 0.01708984375 }, { "epoch": 0.81, "learning_rate": 0.00019675139099187554, "lm_loss": 0.0107421875, "loss": 0.0082, "step": 1990, "total_loss": 0.0107421875 }, { "epoch": 0.81, "learning_rate": 0.00019674814305360792, "lm_loss": 0.006011962890625, "loss": 0.0062, "step": 1991, "total_loss": 0.006011962890625 }, { "epoch": 0.81, "learning_rate": 0.00019674489351935283, "lm_loss": 0.0062255859375, "loss": 0.0073, "step": 1992, "total_loss": 0.0062255859375 }, { "epoch": 0.81, "learning_rate": 0.00019674164238916387, "lm_loss": 0.006134033203125, "loss": 0.0076, "step": 1993, "total_loss": 0.006134033203125 }, { "epoch": 0.82, "learning_rate": 0.0001967383896630947, "lm_loss": 0.01019287109375, "loss": 0.0086, "step": 1994, "total_loss": 0.01019287109375 }, { "epoch": 0.82, "learning_rate": 0.0001967351353411989, "lm_loss": 0.01263427734375, "loss": 0.0094, "step": 1995, "total_loss": 0.01263427734375 }, { "epoch": 0.82, "learning_rate": 0.00019673187942353024, "lm_loss": 0.007659912109375, "loss": 0.0084, "step": 1996, "total_loss": 0.007659912109375 }, { "epoch": 0.82, "learning_rate": 0.0001967286219101424, "lm_loss": 0.0106201171875, "loss": 0.0084, "step": 1997, "total_loss": 0.0106201171875 }, { "epoch": 0.82, "learning_rate": 0.00019672536280108914, "lm_loss": 0.00823974609375, "loss": 0.0092, "step": 1998, "total_loss": 0.00823974609375 }, { "epoch": 0.82, "learning_rate": 0.00019672210209642417, "lm_loss": 0.0067138671875, "loss": 0.0085, "step": 1999, "total_loss": 0.0067138671875 }, { "epoch": 0.82, "learning_rate": 0.0001967188397962013, "lm_loss": 0.00567626953125, "loss": 0.0073, "step": 2000, "total_loss": 0.00567626953125 }, { "epoch": 0.82, "eval_lm_loss": 0.009454338811337948, "eval_loss": 0.009905356913805008, "eval_runtime": 43.8772, "eval_samples_per_second": 22.791, "eval_steps_per_second": 0.205, "eval_total_loss": 0.009454338811337948, "lm_loss": 0.00183868408203125, "step": 2000, "total_loss": 0.00183868408203125 }, { "epoch": 0.82, "learning_rate": 0.0001967155759004744, "lm_loss": 0.0111083984375, "loss": 0.0098, "step": 2001, "total_loss": 0.0111083984375 }, { "epoch": 0.82, "learning_rate": 0.0001967123104092972, "lm_loss": 0.01251220703125, "loss": 0.01, "step": 2002, "total_loss": 0.01251220703125 }, { "epoch": 0.82, "learning_rate": 0.0001967090433227237, "lm_loss": 0.004119873046875, "loss": 0.0094, "step": 2003, "total_loss": 0.004119873046875 }, { "epoch": 0.82, "learning_rate": 0.0001967057746408077, "lm_loss": 0.00439453125, "loss": 0.009, "step": 2004, "total_loss": 0.00439453125 }, { "epoch": 0.82, "learning_rate": 0.00019670250436360317, "lm_loss": 0.005096435546875, "loss": 0.0079, "step": 2005, "total_loss": 0.005096435546875 }, { "epoch": 0.82, "learning_rate": 0.000196699232491164, "lm_loss": 0.003692626953125, "loss": 0.0084, "step": 2006, "total_loss": 0.003692626953125 }, { "epoch": 0.82, "learning_rate": 0.00019669595902354424, "lm_loss": 0.00836181640625, "loss": 0.0078, "step": 2007, "total_loss": 0.00836181640625 }, { "epoch": 0.82, "learning_rate": 0.00019669268396079788, "lm_loss": 0.01025390625, "loss": 0.0094, "step": 2008, "total_loss": 0.01025390625 }, { "epoch": 0.82, "learning_rate": 0.0001966894073029789, "lm_loss": 0.01708984375, "loss": 0.0114, "step": 2009, "total_loss": 0.01708984375 }, { "epoch": 0.82, "learning_rate": 0.00019668612905014135, "lm_loss": 0.0118408203125, "loss": 0.0086, "step": 2010, "total_loss": 0.0118408203125 }, { "epoch": 0.82, "learning_rate": 0.00019668284920233933, "lm_loss": 0.004638671875, "loss": 0.0075, "step": 2011, "total_loss": 0.004638671875 }, { "epoch": 0.82, "learning_rate": 0.00019667956775962697, "lm_loss": 0.01171875, "loss": 0.0073, "step": 2012, "total_loss": 0.01171875 }, { "epoch": 0.82, "learning_rate": 0.00019667628472205836, "lm_loss": 0.00506591796875, "loss": 0.0086, "step": 2013, "total_loss": 0.00506591796875 }, { "epoch": 0.82, "learning_rate": 0.0001966730000896877, "lm_loss": 0.00970458984375, "loss": 0.0077, "step": 2014, "total_loss": 0.00970458984375 }, { "epoch": 0.82, "learning_rate": 0.0001966697138625691, "lm_loss": 0.00506591796875, "loss": 0.007, "step": 2015, "total_loss": 0.00506591796875 }, { "epoch": 0.82, "learning_rate": 0.00019666642604075686, "lm_loss": 0.0042724609375, "loss": 0.0091, "step": 2016, "total_loss": 0.0042724609375 }, { "epoch": 0.82, "learning_rate": 0.0001966631366243052, "lm_loss": 0.006805419921875, "loss": 0.0072, "step": 2017, "total_loss": 0.006805419921875 }, { "epoch": 0.83, "learning_rate": 0.00019665984561326833, "lm_loss": 0.01031494140625, "loss": 0.0076, "step": 2018, "total_loss": 0.01031494140625 }, { "epoch": 0.83, "learning_rate": 0.0001966565530077006, "lm_loss": 0.00787353515625, "loss": 0.0084, "step": 2019, "total_loss": 0.00787353515625 }, { "epoch": 0.83, "learning_rate": 0.00019665325880765627, "lm_loss": 0.01953125, "loss": 0.0084, "step": 2020, "total_loss": 0.01953125 }, { "epoch": 0.83, "learning_rate": 0.00019664996301318972, "lm_loss": 0.004547119140625, "loss": 0.0091, "step": 2021, "total_loss": 0.004547119140625 }, { "epoch": 0.83, "learning_rate": 0.0001966466656243553, "lm_loss": 0.002777099609375, "loss": 0.0086, "step": 2022, "total_loss": 0.002777099609375 }, { "epoch": 0.83, "learning_rate": 0.00019664336664120745, "lm_loss": 0.009033203125, "loss": 0.0104, "step": 2023, "total_loss": 0.009033203125 }, { "epoch": 0.83, "learning_rate": 0.0001966400660638005, "lm_loss": 0.010498046875, "loss": 0.0091, "step": 2024, "total_loss": 0.010498046875 }, { "epoch": 0.83, "learning_rate": 0.00019663676389218896, "lm_loss": 0.009521484375, "loss": 0.008, "step": 2025, "total_loss": 0.009521484375 }, { "epoch": 0.83, "learning_rate": 0.00019663346012642734, "lm_loss": 0.00482177734375, "loss": 0.0073, "step": 2026, "total_loss": 0.00482177734375 }, { "epoch": 0.83, "learning_rate": 0.00019663015476657005, "lm_loss": 0.006072998046875, "loss": 0.0086, "step": 2027, "total_loss": 0.006072998046875 }, { "epoch": 0.83, "learning_rate": 0.0001966268478126717, "lm_loss": 0.0125732421875, "loss": 0.0076, "step": 2028, "total_loss": 0.0125732421875 }, { "epoch": 0.83, "learning_rate": 0.0001966235392647868, "lm_loss": 0.00848388671875, "loss": 0.0098, "step": 2029, "total_loss": 0.00848388671875 }, { "epoch": 0.83, "learning_rate": 0.00019662022912296986, "lm_loss": 0.00921630859375, "loss": 0.0093, "step": 2030, "total_loss": 0.00921630859375 }, { "epoch": 0.83, "learning_rate": 0.00019661691738727564, "lm_loss": 0.00506591796875, "loss": 0.0079, "step": 2031, "total_loss": 0.00506591796875 }, { "epoch": 0.83, "learning_rate": 0.00019661360405775867, "lm_loss": 0.01080322265625, "loss": 0.0092, "step": 2032, "total_loss": 0.01080322265625 }, { "epoch": 0.83, "learning_rate": 0.0001966102891344736, "lm_loss": 0.003326416015625, "loss": 0.0101, "step": 2033, "total_loss": 0.003326416015625 }, { "epoch": 0.83, "learning_rate": 0.00019660697261747516, "lm_loss": 0.0057373046875, "loss": 0.0093, "step": 2034, "total_loss": 0.0057373046875 }, { "epoch": 0.83, "learning_rate": 0.00019660365450681805, "lm_loss": 0.008056640625, "loss": 0.0092, "step": 2035, "total_loss": 0.008056640625 }, { "epoch": 0.83, "learning_rate": 0.000196600334802557, "lm_loss": 0.0079345703125, "loss": 0.0087, "step": 2036, "total_loss": 0.0079345703125 }, { "epoch": 0.83, "learning_rate": 0.00019659701350474676, "lm_loss": 0.007049560546875, "loss": 0.0078, "step": 2037, "total_loss": 0.007049560546875 }, { "epoch": 0.83, "learning_rate": 0.00019659369061344212, "lm_loss": 0.0093994140625, "loss": 0.0074, "step": 2038, "total_loss": 0.0093994140625 }, { "epoch": 0.83, "learning_rate": 0.0001965903661286979, "lm_loss": 0.00787353515625, "loss": 0.0075, "step": 2039, "total_loss": 0.00787353515625 }, { "epoch": 0.83, "learning_rate": 0.00019658704005056895, "lm_loss": 0.007293701171875, "loss": 0.0092, "step": 2040, "total_loss": 0.007293701171875 }, { "epoch": 0.83, "learning_rate": 0.00019658371237911014, "lm_loss": 0.00933837890625, "loss": 0.0078, "step": 2041, "total_loss": 0.00933837890625 }, { "epoch": 0.83, "learning_rate": 0.0001965803831143764, "lm_loss": 0.0125732421875, "loss": 0.0082, "step": 2042, "total_loss": 0.0125732421875 }, { "epoch": 0.84, "learning_rate": 0.00019657705225642255, "lm_loss": 0.01312255859375, "loss": 0.0079, "step": 2043, "total_loss": 0.01312255859375 }, { "epoch": 0.84, "learning_rate": 0.00019657371980530362, "lm_loss": 0.01458740234375, "loss": 0.0094, "step": 2044, "total_loss": 0.01458740234375 }, { "epoch": 0.84, "learning_rate": 0.00019657038576107456, "lm_loss": 0.0184326171875, "loss": 0.0091, "step": 2045, "total_loss": 0.0184326171875 }, { "epoch": 0.84, "learning_rate": 0.00019656705012379038, "lm_loss": 0.00775146484375, "loss": 0.0098, "step": 2046, "total_loss": 0.00775146484375 }, { "epoch": 0.84, "learning_rate": 0.00019656371289350608, "lm_loss": 0.01214599609375, "loss": 0.0085, "step": 2047, "total_loss": 0.01214599609375 }, { "epoch": 0.84, "learning_rate": 0.00019656037407027674, "lm_loss": 0.00653076171875, "loss": 0.0062, "step": 2048, "total_loss": 0.00653076171875 }, { "epoch": 0.84, "learning_rate": 0.00019655703365415743, "lm_loss": 0.006988525390625, "loss": 0.009, "step": 2049, "total_loss": 0.006988525390625 }, { "epoch": 0.84, "learning_rate": 0.00019655369164520324, "lm_loss": 0.006866455078125, "loss": 0.0087, "step": 2050, "total_loss": 0.006866455078125 }, { "epoch": 0.84, "learning_rate": 0.0001965503480434693, "lm_loss": 0.0037841796875, "loss": 0.0096, "step": 2051, "total_loss": 0.0037841796875 }, { "epoch": 0.84, "learning_rate": 0.00019654700284901078, "lm_loss": 0.00762939453125, "loss": 0.0092, "step": 2052, "total_loss": 0.00762939453125 }, { "epoch": 0.84, "learning_rate": 0.00019654365606188289, "lm_loss": 0.004241943359375, "loss": 0.0069, "step": 2053, "total_loss": 0.004241943359375 }, { "epoch": 0.84, "learning_rate": 0.00019654030768214076, "lm_loss": 0.00982666015625, "loss": 0.0093, "step": 2054, "total_loss": 0.00982666015625 }, { "epoch": 0.84, "learning_rate": 0.0001965369577098397, "lm_loss": 0.00830078125, "loss": 0.0082, "step": 2055, "total_loss": 0.00830078125 }, { "epoch": 0.84, "learning_rate": 0.00019653360614503495, "lm_loss": 0.004974365234375, "loss": 0.0087, "step": 2056, "total_loss": 0.004974365234375 }, { "epoch": 0.84, "learning_rate": 0.0001965302529877818, "lm_loss": 0.00848388671875, "loss": 0.0077, "step": 2057, "total_loss": 0.00848388671875 }, { "epoch": 0.84, "learning_rate": 0.00019652689823813557, "lm_loss": 0.0069580078125, "loss": 0.008, "step": 2058, "total_loss": 0.0069580078125 }, { "epoch": 0.84, "learning_rate": 0.00019652354189615158, "lm_loss": 0.004913330078125, "loss": 0.0082, "step": 2059, "total_loss": 0.004913330078125 }, { "epoch": 0.84, "learning_rate": 0.00019652018396188522, "lm_loss": 0.0087890625, "loss": 0.0079, "step": 2060, "total_loss": 0.0087890625 }, { "epoch": 0.84, "learning_rate": 0.00019651682443539188, "lm_loss": 0.0096435546875, "loss": 0.0064, "step": 2061, "total_loss": 0.0096435546875 }, { "epoch": 0.84, "learning_rate": 0.00019651346331672696, "lm_loss": 0.00494384765625, "loss": 0.0091, "step": 2062, "total_loss": 0.00494384765625 }, { "epoch": 0.84, "learning_rate": 0.0001965101006059459, "lm_loss": 0.01348876953125, "loss": 0.0101, "step": 2063, "total_loss": 0.01348876953125 }, { "epoch": 0.84, "learning_rate": 0.00019650673630310425, "lm_loss": 0.00933837890625, "loss": 0.0086, "step": 2064, "total_loss": 0.00933837890625 }, { "epoch": 0.84, "learning_rate": 0.00019650337040825742, "lm_loss": 0.00689697265625, "loss": 0.0096, "step": 2065, "total_loss": 0.00689697265625 }, { "epoch": 0.84, "learning_rate": 0.00019650000292146093, "lm_loss": 0.0133056640625, "loss": 0.0097, "step": 2066, "total_loss": 0.0133056640625 }, { "epoch": 0.85, "learning_rate": 0.00019649663384277042, "lm_loss": 0.01409912109375, "loss": 0.0094, "step": 2067, "total_loss": 0.01409912109375 }, { "epoch": 0.85, "learning_rate": 0.00019649326317224138, "lm_loss": 0.0123291015625, "loss": 0.0098, "step": 2068, "total_loss": 0.0123291015625 }, { "epoch": 0.85, "learning_rate": 0.00019648989090992946, "lm_loss": 0.01055908203125, "loss": 0.0095, "step": 2069, "total_loss": 0.01055908203125 }, { "epoch": 0.85, "learning_rate": 0.00019648651705589027, "lm_loss": 0.011474609375, "loss": 0.008, "step": 2070, "total_loss": 0.011474609375 }, { "epoch": 0.85, "learning_rate": 0.0001964831416101795, "lm_loss": 0.0130615234375, "loss": 0.0098, "step": 2071, "total_loss": 0.0130615234375 }, { "epoch": 0.85, "learning_rate": 0.0001964797645728528, "lm_loss": 0.01214599609375, "loss": 0.0093, "step": 2072, "total_loss": 0.01214599609375 }, { "epoch": 0.85, "learning_rate": 0.00019647638594396587, "lm_loss": 0.00738525390625, "loss": 0.0084, "step": 2073, "total_loss": 0.00738525390625 }, { "epoch": 0.85, "learning_rate": 0.00019647300572357446, "lm_loss": 0.00933837890625, "loss": 0.0085, "step": 2074, "total_loss": 0.00933837890625 }, { "epoch": 0.85, "learning_rate": 0.00019646962391173432, "lm_loss": 0.00653076171875, "loss": 0.0071, "step": 2075, "total_loss": 0.00653076171875 }, { "epoch": 0.85, "learning_rate": 0.00019646624050850123, "lm_loss": 0.0027923583984375, "loss": 0.01, "step": 2076, "total_loss": 0.0027923583984375 }, { "epoch": 0.85, "learning_rate": 0.00019646285551393108, "lm_loss": 0.0045166015625, "loss": 0.0087, "step": 2077, "total_loss": 0.0045166015625 }, { "epoch": 0.85, "learning_rate": 0.0001964594689280796, "lm_loss": 0.0079345703125, "loss": 0.0087, "step": 2078, "total_loss": 0.0079345703125 }, { "epoch": 0.85, "learning_rate": 0.00019645608075100274, "lm_loss": 0.00946044921875, "loss": 0.0076, "step": 2079, "total_loss": 0.00946044921875 }, { "epoch": 0.85, "learning_rate": 0.00019645269098275637, "lm_loss": 0.013427734375, "loss": 0.0077, "step": 2080, "total_loss": 0.013427734375 }, { "epoch": 0.85, "learning_rate": 0.00019644929962339637, "lm_loss": 0.00141143798828125, "loss": 0.0069, "step": 2081, "total_loss": 0.00141143798828125 }, { "epoch": 0.85, "learning_rate": 0.00019644590667297873, "lm_loss": 0.01446533203125, "loss": 0.0083, "step": 2082, "total_loss": 0.01446533203125 }, { "epoch": 0.85, "learning_rate": 0.0001964425121315594, "lm_loss": 0.007537841796875, "loss": 0.0097, "step": 2083, "total_loss": 0.007537841796875 }, { "epoch": 0.85, "learning_rate": 0.00019643911599919437, "lm_loss": 0.0048828125, "loss": 0.0089, "step": 2084, "total_loss": 0.0048828125 }, { "epoch": 0.85, "learning_rate": 0.0001964357182759397, "lm_loss": 0.0028228759765625, "loss": 0.0079, "step": 2085, "total_loss": 0.0028228759765625 }, { "epoch": 0.85, "learning_rate": 0.00019643231896185142, "lm_loss": 0.01025390625, "loss": 0.0093, "step": 2086, "total_loss": 0.01025390625 }, { "epoch": 0.85, "learning_rate": 0.00019642891805698556, "lm_loss": 0.008056640625, "loss": 0.0102, "step": 2087, "total_loss": 0.008056640625 }, { "epoch": 0.85, "learning_rate": 0.0001964255155613983, "lm_loss": 0.007659912109375, "loss": 0.0078, "step": 2088, "total_loss": 0.007659912109375 }, { "epoch": 0.85, "learning_rate": 0.0001964221114751457, "lm_loss": 0.00311279296875, "loss": 0.0074, "step": 2089, "total_loss": 0.00311279296875 }, { "epoch": 0.85, "learning_rate": 0.00019641870579828398, "lm_loss": 0.010986328125, "loss": 0.0102, "step": 2090, "total_loss": 0.010986328125 }, { "epoch": 0.85, "learning_rate": 0.0001964152985308693, "lm_loss": 0.0048828125, "loss": 0.008, "step": 2091, "total_loss": 0.0048828125 }, { "epoch": 0.86, "learning_rate": 0.00019641188967295782, "lm_loss": 0.0091552734375, "loss": 0.0073, "step": 2092, "total_loss": 0.0091552734375 }, { "epoch": 0.86, "learning_rate": 0.00019640847922460585, "lm_loss": 0.01116943359375, "loss": 0.0087, "step": 2093, "total_loss": 0.01116943359375 }, { "epoch": 0.86, "learning_rate": 0.00019640506718586958, "lm_loss": 0.00390625, "loss": 0.0064, "step": 2094, "total_loss": 0.00390625 }, { "epoch": 0.86, "learning_rate": 0.00019640165355680535, "lm_loss": 0.005340576171875, "loss": 0.0089, "step": 2095, "total_loss": 0.005340576171875 }, { "epoch": 0.86, "learning_rate": 0.00019639823833746942, "lm_loss": 0.006011962890625, "loss": 0.0097, "step": 2096, "total_loss": 0.006011962890625 }, { "epoch": 0.86, "learning_rate": 0.00019639482152791817, "lm_loss": 0.0072021484375, "loss": 0.0078, "step": 2097, "total_loss": 0.0072021484375 }, { "epoch": 0.86, "learning_rate": 0.00019639140312820795, "lm_loss": 0.00994873046875, "loss": 0.0091, "step": 2098, "total_loss": 0.00994873046875 }, { "epoch": 0.86, "learning_rate": 0.00019638798313839515, "lm_loss": 0.004150390625, "loss": 0.0076, "step": 2099, "total_loss": 0.004150390625 }, { "epoch": 0.86, "learning_rate": 0.00019638456155853619, "lm_loss": 0.0057373046875, "loss": 0.0064, "step": 2100, "total_loss": 0.0057373046875 }, { "epoch": 0.86, "eval_lm_loss": 0.00981560442596674, "eval_loss": 0.010241355746984482, "eval_runtime": 43.9945, "eval_samples_per_second": 22.73, "eval_steps_per_second": 0.205, "eval_total_loss": 0.00981560442596674, "lm_loss": 0.000827789306640625, "step": 2100, "total_loss": 0.000827789306640625 }, { "epoch": 0.86, "learning_rate": 0.00019638113838868753, "lm_loss": 0.01141357421875, "loss": 0.0081, "step": 2101, "total_loss": 0.01141357421875 }, { "epoch": 0.86, "learning_rate": 0.0001963777136289056, "lm_loss": 0.010009765625, "loss": 0.0095, "step": 2102, "total_loss": 0.010009765625 }, { "epoch": 0.86, "learning_rate": 0.00019637428727924694, "lm_loss": 0.00616455078125, "loss": 0.0083, "step": 2103, "total_loss": 0.00616455078125 }, { "epoch": 0.86, "learning_rate": 0.00019637085933976804, "lm_loss": 0.0079345703125, "loss": 0.0086, "step": 2104, "total_loss": 0.0079345703125 }, { "epoch": 0.86, "learning_rate": 0.00019636742981052544, "lm_loss": 0.00994873046875, "loss": 0.0084, "step": 2105, "total_loss": 0.00994873046875 }, { "epoch": 0.86, "learning_rate": 0.00019636399869157577, "lm_loss": 0.0031585693359375, "loss": 0.0062, "step": 2106, "total_loss": 0.0031585693359375 }, { "epoch": 0.86, "learning_rate": 0.00019636056598297557, "lm_loss": 0.00457763671875, "loss": 0.0056, "step": 2107, "total_loss": 0.00457763671875 }, { "epoch": 0.86, "learning_rate": 0.00019635713168478152, "lm_loss": 0.006561279296875, "loss": 0.0076, "step": 2108, "total_loss": 0.006561279296875 }, { "epoch": 0.86, "learning_rate": 0.0001963536957970502, "lm_loss": 0.00933837890625, "loss": 0.0077, "step": 2109, "total_loss": 0.00933837890625 }, { "epoch": 0.86, "learning_rate": 0.00019635025831983834, "lm_loss": 0.010009765625, "loss": 0.0085, "step": 2110, "total_loss": 0.010009765625 }, { "epoch": 0.86, "learning_rate": 0.00019634681925320265, "lm_loss": 0.01080322265625, "loss": 0.0082, "step": 2111, "total_loss": 0.01080322265625 }, { "epoch": 0.86, "learning_rate": 0.0001963433785971999, "lm_loss": 0.0096435546875, "loss": 0.0081, "step": 2112, "total_loss": 0.0096435546875 }, { "epoch": 0.86, "learning_rate": 0.00019633993635188675, "lm_loss": 0.0130615234375, "loss": 0.0083, "step": 2113, "total_loss": 0.0130615234375 }, { "epoch": 0.86, "learning_rate": 0.00019633649251732006, "lm_loss": 0.0106201171875, "loss": 0.0083, "step": 2114, "total_loss": 0.0106201171875 }, { "epoch": 0.86, "learning_rate": 0.00019633304709355657, "lm_loss": 0.005615234375, "loss": 0.0071, "step": 2115, "total_loss": 0.005615234375 }, { "epoch": 0.87, "learning_rate": 0.0001963296000806532, "lm_loss": 0.00823974609375, "loss": 0.0084, "step": 2116, "total_loss": 0.00823974609375 }, { "epoch": 0.87, "learning_rate": 0.00019632615147866677, "lm_loss": 0.0079345703125, "loss": 0.0089, "step": 2117, "total_loss": 0.0079345703125 }, { "epoch": 0.87, "learning_rate": 0.00019632270128765416, "lm_loss": 0.00677490234375, "loss": 0.0084, "step": 2118, "total_loss": 0.00677490234375 }, { "epoch": 0.87, "learning_rate": 0.0001963192495076723, "lm_loss": 0.0050048828125, "loss": 0.0062, "step": 2119, "total_loss": 0.0050048828125 }, { "epoch": 0.87, "learning_rate": 0.0001963157961387782, "lm_loss": 0.026123046875, "loss": 0.0099, "step": 2120, "total_loss": 0.026123046875 }, { "epoch": 0.87, "learning_rate": 0.00019631234118102866, "lm_loss": 0.0067138671875, "loss": 0.0099, "step": 2121, "total_loss": 0.0067138671875 }, { "epoch": 0.87, "learning_rate": 0.0001963088846344808, "lm_loss": 0.00445556640625, "loss": 0.0081, "step": 2122, "total_loss": 0.00445556640625 }, { "epoch": 0.87, "learning_rate": 0.00019630542649919165, "lm_loss": 0.0093994140625, "loss": 0.0092, "step": 2123, "total_loss": 0.0093994140625 }, { "epoch": 0.87, "learning_rate": 0.0001963019667752182, "lm_loss": 0.003204345703125, "loss": 0.0079, "step": 2124, "total_loss": 0.003204345703125 }, { "epoch": 0.87, "learning_rate": 0.00019629850546261753, "lm_loss": 0.010498046875, "loss": 0.0082, "step": 2125, "total_loss": 0.010498046875 }, { "epoch": 0.87, "learning_rate": 0.00019629504256144675, "lm_loss": 0.01092529296875, "loss": 0.0096, "step": 2126, "total_loss": 0.01092529296875 }, { "epoch": 0.87, "learning_rate": 0.000196291578071763, "lm_loss": 0.00836181640625, "loss": 0.0101, "step": 2127, "total_loss": 0.00836181640625 }, { "epoch": 0.87, "learning_rate": 0.00019628811199362342, "lm_loss": 0.006561279296875, "loss": 0.0076, "step": 2128, "total_loss": 0.006561279296875 }, { "epoch": 0.87, "learning_rate": 0.00019628464432708517, "lm_loss": 0.00518798828125, "loss": 0.0075, "step": 2129, "total_loss": 0.00518798828125 }, { "epoch": 0.87, "learning_rate": 0.00019628117507220546, "lm_loss": 0.005706787109375, "loss": 0.008, "step": 2130, "total_loss": 0.005706787109375 }, { "epoch": 0.87, "learning_rate": 0.00019627770422904158, "lm_loss": 0.00567626953125, "loss": 0.0072, "step": 2131, "total_loss": 0.00567626953125 }, { "epoch": 0.87, "learning_rate": 0.0001962742317976507, "lm_loss": 0.00750732421875, "loss": 0.0079, "step": 2132, "total_loss": 0.00750732421875 }, { "epoch": 0.87, "learning_rate": 0.00019627075777809013, "lm_loss": 0.003662109375, "loss": 0.0089, "step": 2133, "total_loss": 0.003662109375 }, { "epoch": 0.87, "learning_rate": 0.00019626728217041718, "lm_loss": 0.00823974609375, "loss": 0.008, "step": 2134, "total_loss": 0.00823974609375 }, { "epoch": 0.87, "learning_rate": 0.00019626380497468922, "lm_loss": 0.00982666015625, "loss": 0.0087, "step": 2135, "total_loss": 0.00982666015625 }, { "epoch": 0.87, "learning_rate": 0.00019626032619096358, "lm_loss": 0.00335693359375, "loss": 0.0085, "step": 2136, "total_loss": 0.00335693359375 }, { "epoch": 0.87, "learning_rate": 0.00019625684581929762, "lm_loss": 0.00360107421875, "loss": 0.0068, "step": 2137, "total_loss": 0.00360107421875 }, { "epoch": 0.87, "learning_rate": 0.0001962533638597488, "lm_loss": 0.0079345703125, "loss": 0.0073, "step": 2138, "total_loss": 0.0079345703125 }, { "epoch": 0.87, "learning_rate": 0.00019624988031237455, "lm_loss": 0.01031494140625, "loss": 0.0071, "step": 2139, "total_loss": 0.01031494140625 }, { "epoch": 0.87, "learning_rate": 0.00019624639517723232, "lm_loss": 0.006378173828125, "loss": 0.0097, "step": 2140, "total_loss": 0.006378173828125 }, { "epoch": 0.88, "learning_rate": 0.00019624290845437958, "lm_loss": 0.00872802734375, "loss": 0.0075, "step": 2141, "total_loss": 0.00872802734375 }, { "epoch": 0.88, "learning_rate": 0.00019623942014387392, "lm_loss": 0.005584716796875, "loss": 0.0069, "step": 2142, "total_loss": 0.005584716796875 }, { "epoch": 0.88, "learning_rate": 0.0001962359302457728, "lm_loss": 0.01214599609375, "loss": 0.0091, "step": 2143, "total_loss": 0.01214599609375 }, { "epoch": 0.88, "learning_rate": 0.00019623243876013387, "lm_loss": 0.005828857421875, "loss": 0.0073, "step": 2144, "total_loss": 0.005828857421875 }, { "epoch": 0.88, "learning_rate": 0.00019622894568701465, "lm_loss": 0.0162353515625, "loss": 0.0087, "step": 2145, "total_loss": 0.0162353515625 }, { "epoch": 0.88, "learning_rate": 0.00019622545102647286, "lm_loss": 0.00994873046875, "loss": 0.0082, "step": 2146, "total_loss": 0.00994873046875 }, { "epoch": 0.88, "learning_rate": 0.00019622195477856603, "lm_loss": 0.0068359375, "loss": 0.0077, "step": 2147, "total_loss": 0.0068359375 }, { "epoch": 0.88, "learning_rate": 0.00019621845694335193, "lm_loss": 0.01300048828125, "loss": 0.008, "step": 2148, "total_loss": 0.01300048828125 }, { "epoch": 0.88, "learning_rate": 0.0001962149575208882, "lm_loss": 0.00958251953125, "loss": 0.0084, "step": 2149, "total_loss": 0.00958251953125 }, { "epoch": 0.88, "learning_rate": 0.0001962114565112326, "lm_loss": 0.0164794921875, "loss": 0.0094, "step": 2150, "total_loss": 0.0164794921875 }, { "epoch": 0.88, "learning_rate": 0.00019620795391444288, "lm_loss": 0.01422119140625, "loss": 0.0076, "step": 2151, "total_loss": 0.01422119140625 }, { "epoch": 0.88, "learning_rate": 0.00019620444973057684, "lm_loss": 0.01507568359375, "loss": 0.0073, "step": 2152, "total_loss": 0.01507568359375 }, { "epoch": 0.88, "learning_rate": 0.00019620094395969226, "lm_loss": 0.00408935546875, "loss": 0.0083, "step": 2153, "total_loss": 0.00408935546875 }, { "epoch": 0.88, "learning_rate": 0.00019619743660184694, "lm_loss": 0.01068115234375, "loss": 0.0097, "step": 2154, "total_loss": 0.01068115234375 }, { "epoch": 0.88, "learning_rate": 0.0001961939276570988, "lm_loss": 0.006317138671875, "loss": 0.008, "step": 2155, "total_loss": 0.006317138671875 }, { "epoch": 0.88, "learning_rate": 0.00019619041712550572, "lm_loss": 0.00958251953125, "loss": 0.0081, "step": 2156, "total_loss": 0.00958251953125 }, { "epoch": 0.88, "learning_rate": 0.00019618690500712558, "lm_loss": 0.00506591796875, "loss": 0.0062, "step": 2157, "total_loss": 0.00506591796875 }, { "epoch": 0.88, "learning_rate": 0.00019618339130201632, "lm_loss": 0.005523681640625, "loss": 0.0079, "step": 2158, "total_loss": 0.005523681640625 }, { "epoch": 0.88, "learning_rate": 0.0001961798760102359, "lm_loss": 0.006988525390625, "loss": 0.0081, "step": 2159, "total_loss": 0.006988525390625 }, { "epoch": 0.88, "learning_rate": 0.00019617635913184232, "lm_loss": 0.005889892578125, "loss": 0.0078, "step": 2160, "total_loss": 0.005889892578125 }, { "epoch": 0.88, "learning_rate": 0.0001961728406668936, "lm_loss": 0.002899169921875, "loss": 0.0073, "step": 2161, "total_loss": 0.002899169921875 }, { "epoch": 0.88, "learning_rate": 0.0001961693206154478, "lm_loss": 0.007171630859375, "loss": 0.0087, "step": 2162, "total_loss": 0.007171630859375 }, { "epoch": 0.88, "learning_rate": 0.00019616579897756292, "lm_loss": 0.010498046875, "loss": 0.0077, "step": 2163, "total_loss": 0.010498046875 }, { "epoch": 0.88, "learning_rate": 0.00019616227575329712, "lm_loss": 0.00726318359375, "loss": 0.0071, "step": 2164, "total_loss": 0.00726318359375 }, { "epoch": 0.89, "learning_rate": 0.0001961587509427085, "lm_loss": 0.005615234375, "loss": 0.0074, "step": 2165, "total_loss": 0.005615234375 }, { "epoch": 0.89, "learning_rate": 0.0001961552245458552, "lm_loss": 0.0174560546875, "loss": 0.0071, "step": 2166, "total_loss": 0.0174560546875 }, { "epoch": 0.89, "learning_rate": 0.0001961516965627954, "lm_loss": 0.01019287109375, "loss": 0.0083, "step": 2167, "total_loss": 0.01019287109375 }, { "epoch": 0.89, "learning_rate": 0.0001961481669935873, "lm_loss": 0.0081787109375, "loss": 0.0073, "step": 2168, "total_loss": 0.0081787109375 }, { "epoch": 0.89, "learning_rate": 0.00019614463583828915, "lm_loss": 0.00836181640625, "loss": 0.0102, "step": 2169, "total_loss": 0.00836181640625 }, { "epoch": 0.89, "learning_rate": 0.00019614110309695916, "lm_loss": 0.006134033203125, "loss": 0.0065, "step": 2170, "total_loss": 0.006134033203125 }, { "epoch": 0.89, "learning_rate": 0.00019613756876965557, "lm_loss": 0.00946044921875, "loss": 0.0087, "step": 2171, "total_loss": 0.00946044921875 }, { "epoch": 0.89, "learning_rate": 0.00019613403285643677, "lm_loss": 0.00897216796875, "loss": 0.0085, "step": 2172, "total_loss": 0.00897216796875 }, { "epoch": 0.89, "learning_rate": 0.00019613049535736107, "lm_loss": 0.004180908203125, "loss": 0.0098, "step": 2173, "total_loss": 0.004180908203125 }, { "epoch": 0.89, "learning_rate": 0.0001961269562724868, "lm_loss": 0.00506591796875, "loss": 0.0084, "step": 2174, "total_loss": 0.00506591796875 }, { "epoch": 0.89, "learning_rate": 0.0001961234156018723, "lm_loss": 0.01129150390625, "loss": 0.0102, "step": 2175, "total_loss": 0.01129150390625 }, { "epoch": 0.89, "learning_rate": 0.00019611987334557609, "lm_loss": 0.024169921875, "loss": 0.0094, "step": 2176, "total_loss": 0.024169921875 }, { "epoch": 0.89, "learning_rate": 0.00019611632950365651, "lm_loss": 0.007415771484375, "loss": 0.0081, "step": 2177, "total_loss": 0.007415771484375 }, { "epoch": 0.89, "learning_rate": 0.00019611278407617207, "lm_loss": 0.00665283203125, "loss": 0.0085, "step": 2178, "total_loss": 0.00665283203125 }, { "epoch": 0.89, "learning_rate": 0.00019610923706318124, "lm_loss": 0.005126953125, "loss": 0.0071, "step": 2179, "total_loss": 0.005126953125 }, { "epoch": 0.89, "learning_rate": 0.0001961056884647425, "lm_loss": 0.00653076171875, "loss": 0.0098, "step": 2180, "total_loss": 0.00653076171875 }, { "epoch": 0.89, "learning_rate": 0.00019610213828091447, "lm_loss": 0.0146484375, "loss": 0.0108, "step": 2181, "total_loss": 0.0146484375 }, { "epoch": 0.89, "learning_rate": 0.00019609858651175563, "lm_loss": 0.01129150390625, "loss": 0.0113, "step": 2182, "total_loss": 0.01129150390625 }, { "epoch": 0.89, "learning_rate": 0.00019609503315732462, "lm_loss": 0.00396728515625, "loss": 0.0063, "step": 2183, "total_loss": 0.00396728515625 }, { "epoch": 0.89, "learning_rate": 0.00019609147821768002, "lm_loss": 0.012451171875, "loss": 0.0084, "step": 2184, "total_loss": 0.012451171875 }, { "epoch": 0.89, "learning_rate": 0.00019608792169288053, "lm_loss": 0.01324462890625, "loss": 0.0087, "step": 2185, "total_loss": 0.01324462890625 }, { "epoch": 0.89, "learning_rate": 0.00019608436358298475, "lm_loss": 0.00787353515625, "loss": 0.008, "step": 2186, "total_loss": 0.00787353515625 }, { "epoch": 0.89, "learning_rate": 0.00019608080388805146, "lm_loss": 0.0123291015625, "loss": 0.0081, "step": 2187, "total_loss": 0.0123291015625 }, { "epoch": 0.89, "learning_rate": 0.0001960772426081393, "lm_loss": 0.01202392578125, "loss": 0.0093, "step": 2188, "total_loss": 0.01202392578125 }, { "epoch": 0.89, "learning_rate": 0.00019607367974330706, "lm_loss": 0.005889892578125, "loss": 0.0072, "step": 2189, "total_loss": 0.005889892578125 }, { "epoch": 0.9, "learning_rate": 0.0001960701152936135, "lm_loss": 0.010986328125, "loss": 0.0094, "step": 2190, "total_loss": 0.010986328125 }, { "epoch": 0.9, "learning_rate": 0.00019606654925911744, "lm_loss": 0.00885009765625, "loss": 0.0085, "step": 2191, "total_loss": 0.00885009765625 }, { "epoch": 0.9, "learning_rate": 0.0001960629816398777, "lm_loss": 0.01409912109375, "loss": 0.0086, "step": 2192, "total_loss": 0.01409912109375 }, { "epoch": 0.9, "learning_rate": 0.00019605941243595312, "lm_loss": 0.00408935546875, "loss": 0.0082, "step": 2193, "total_loss": 0.00408935546875 }, { "epoch": 0.9, "learning_rate": 0.0001960558416474026, "lm_loss": 0.00848388671875, "loss": 0.0101, "step": 2194, "total_loss": 0.00848388671875 }, { "epoch": 0.9, "learning_rate": 0.00019605226927428497, "lm_loss": 0.01312255859375, "loss": 0.0089, "step": 2195, "total_loss": 0.01312255859375 }, { "epoch": 0.9, "learning_rate": 0.00019604869531665925, "lm_loss": 0.01031494140625, "loss": 0.0104, "step": 2196, "total_loss": 0.01031494140625 }, { "epoch": 0.9, "learning_rate": 0.00019604511977458437, "lm_loss": 0.00726318359375, "loss": 0.0072, "step": 2197, "total_loss": 0.00726318359375 }, { "epoch": 0.9, "learning_rate": 0.00019604154264811933, "lm_loss": 0.004638671875, "loss": 0.006, "step": 2198, "total_loss": 0.004638671875 }, { "epoch": 0.9, "learning_rate": 0.0001960379639373231, "lm_loss": 0.006866455078125, "loss": 0.0078, "step": 2199, "total_loss": 0.006866455078125 }, { "epoch": 0.9, "learning_rate": 0.00019603438364225475, "lm_loss": 0.0093994140625, "loss": 0.008, "step": 2200, "total_loss": 0.0093994140625 }, { "epoch": 0.9, "eval_lm_loss": 0.009770958684384823, "eval_loss": 0.010218048468232155, "eval_runtime": 43.9063, "eval_samples_per_second": 22.776, "eval_steps_per_second": 0.205, "eval_total_loss": 0.009770958684384823, "lm_loss": 0.0012969970703125, "step": 2200, "total_loss": 0.0012969970703125 }, { "epoch": 0.9, "learning_rate": 0.00019603080176297332, "lm_loss": 0.01300048828125, "loss": 0.0095, "step": 2201, "total_loss": 0.01300048828125 }, { "epoch": 0.9, "learning_rate": 0.00019602721829953788, "lm_loss": 0.0166015625, "loss": 0.0084, "step": 2202, "total_loss": 0.0166015625 }, { "epoch": 0.9, "learning_rate": 0.0001960236332520076, "lm_loss": 0.00787353515625, "loss": 0.0089, "step": 2203, "total_loss": 0.00787353515625 }, { "epoch": 0.9, "learning_rate": 0.0001960200466204416, "lm_loss": 0.004730224609375, "loss": 0.009, "step": 2204, "total_loss": 0.004730224609375 }, { "epoch": 0.9, "learning_rate": 0.000196016458404899, "lm_loss": 0.004425048828125, "loss": 0.0093, "step": 2205, "total_loss": 0.004425048828125 }, { "epoch": 0.9, "learning_rate": 0.00019601286860543903, "lm_loss": 0.0140380859375, "loss": 0.0083, "step": 2206, "total_loss": 0.0140380859375 }, { "epoch": 0.9, "learning_rate": 0.00019600927722212094, "lm_loss": 0.0135498046875, "loss": 0.0116, "step": 2207, "total_loss": 0.0135498046875 }, { "epoch": 0.9, "learning_rate": 0.00019600568425500389, "lm_loss": 0.0093994140625, "loss": 0.0079, "step": 2208, "total_loss": 0.0093994140625 }, { "epoch": 0.9, "learning_rate": 0.00019600208970414724, "lm_loss": 0.006195068359375, "loss": 0.0081, "step": 2209, "total_loss": 0.006195068359375 }, { "epoch": 0.9, "learning_rate": 0.00019599849356961025, "lm_loss": 0.00677490234375, "loss": 0.0076, "step": 2210, "total_loss": 0.00677490234375 }, { "epoch": 0.9, "learning_rate": 0.00019599489585145222, "lm_loss": 0.00982666015625, "loss": 0.0074, "step": 2211, "total_loss": 0.00982666015625 }, { "epoch": 0.9, "learning_rate": 0.00019599129654973254, "lm_loss": 0.004852294921875, "loss": 0.0076, "step": 2212, "total_loss": 0.004852294921875 }, { "epoch": 0.9, "learning_rate": 0.00019598769566451054, "lm_loss": 0.0057373046875, "loss": 0.0065, "step": 2213, "total_loss": 0.0057373046875 }, { "epoch": 0.91, "learning_rate": 0.00019598409319584567, "lm_loss": 0.00494384765625, "loss": 0.0081, "step": 2214, "total_loss": 0.00494384765625 }, { "epoch": 0.91, "learning_rate": 0.00019598048914379733, "lm_loss": 0.007781982421875, "loss": 0.0097, "step": 2215, "total_loss": 0.007781982421875 }, { "epoch": 0.91, "learning_rate": 0.00019597688350842494, "lm_loss": 0.00677490234375, "loss": 0.0075, "step": 2216, "total_loss": 0.00677490234375 }, { "epoch": 0.91, "learning_rate": 0.0001959732762897881, "lm_loss": 0.0093994140625, "loss": 0.0075, "step": 2217, "total_loss": 0.0093994140625 }, { "epoch": 0.91, "learning_rate": 0.00019596966748794615, "lm_loss": 0.011962890625, "loss": 0.0086, "step": 2218, "total_loss": 0.011962890625 }, { "epoch": 0.91, "learning_rate": 0.00019596605710295873, "lm_loss": 0.004119873046875, "loss": 0.0078, "step": 2219, "total_loss": 0.004119873046875 }, { "epoch": 0.91, "learning_rate": 0.00019596244513488538, "lm_loss": 0.005615234375, "loss": 0.0076, "step": 2220, "total_loss": 0.005615234375 }, { "epoch": 0.91, "learning_rate": 0.0001959588315837857, "lm_loss": 0.01190185546875, "loss": 0.0079, "step": 2221, "total_loss": 0.01190185546875 }, { "epoch": 0.91, "learning_rate": 0.00019595521644971925, "lm_loss": 0.0027618408203125, "loss": 0.0078, "step": 2222, "total_loss": 0.0027618408203125 }, { "epoch": 0.91, "learning_rate": 0.00019595159973274568, "lm_loss": 0.0038299560546875, "loss": 0.0083, "step": 2223, "total_loss": 0.0038299560546875 }, { "epoch": 0.91, "learning_rate": 0.00019594798143292467, "lm_loss": 0.007568359375, "loss": 0.0068, "step": 2224, "total_loss": 0.007568359375 }, { "epoch": 0.91, "learning_rate": 0.00019594436155031595, "lm_loss": 0.0034942626953125, "loss": 0.0084, "step": 2225, "total_loss": 0.0034942626953125 }, { "epoch": 0.91, "learning_rate": 0.00019594074008497914, "lm_loss": 0.0045166015625, "loss": 0.009, "step": 2226, "total_loss": 0.0045166015625 }, { "epoch": 0.91, "learning_rate": 0.00019593711703697406, "lm_loss": 0.004974365234375, "loss": 0.0082, "step": 2227, "total_loss": 0.004974365234375 }, { "epoch": 0.91, "learning_rate": 0.00019593349240636042, "lm_loss": 0.00836181640625, "loss": 0.0079, "step": 2228, "total_loss": 0.00836181640625 }, { "epoch": 0.91, "learning_rate": 0.00019592986619319807, "lm_loss": 0.00738525390625, "loss": 0.009, "step": 2229, "total_loss": 0.00738525390625 }, { "epoch": 0.91, "learning_rate": 0.0001959262383975468, "lm_loss": 0.007537841796875, "loss": 0.007, "step": 2230, "total_loss": 0.007537841796875 }, { "epoch": 0.91, "learning_rate": 0.0001959226090194664, "lm_loss": 0.006195068359375, "loss": 0.0085, "step": 2231, "total_loss": 0.006195068359375 }, { "epoch": 0.91, "learning_rate": 0.00019591897805901684, "lm_loss": 0.01348876953125, "loss": 0.0081, "step": 2232, "total_loss": 0.01348876953125 }, { "epoch": 0.91, "learning_rate": 0.00019591534551625798, "lm_loss": 0.01348876953125, "loss": 0.009, "step": 2233, "total_loss": 0.01348876953125 }, { "epoch": 0.91, "learning_rate": 0.00019591171139124973, "lm_loss": 0.003570556640625, "loss": 0.0088, "step": 2234, "total_loss": 0.003570556640625 }, { "epoch": 0.91, "learning_rate": 0.00019590807568405202, "lm_loss": 0.01171875, "loss": 0.0082, "step": 2235, "total_loss": 0.01171875 }, { "epoch": 0.91, "learning_rate": 0.0001959044383947249, "lm_loss": 0.00701904296875, "loss": 0.0088, "step": 2236, "total_loss": 0.00701904296875 }, { "epoch": 0.91, "learning_rate": 0.00019590079952332829, "lm_loss": 0.00537109375, "loss": 0.0086, "step": 2237, "total_loss": 0.00537109375 }, { "epoch": 0.91, "learning_rate": 0.00019589715906992224, "lm_loss": 0.009765625, "loss": 0.0079, "step": 2238, "total_loss": 0.009765625 }, { "epoch": 0.92, "learning_rate": 0.00019589351703456687, "lm_loss": 0.00836181640625, "loss": 0.0097, "step": 2239, "total_loss": 0.00836181640625 }, { "epoch": 0.92, "learning_rate": 0.00019588987341732217, "lm_loss": 0.005035400390625, "loss": 0.0085, "step": 2240, "total_loss": 0.005035400390625 }, { "epoch": 0.92, "learning_rate": 0.0001958862282182483, "lm_loss": 0.006927490234375, "loss": 0.0085, "step": 2241, "total_loss": 0.006927490234375 }, { "epoch": 0.92, "learning_rate": 0.00019588258143740536, "lm_loss": 0.00994873046875, "loss": 0.0068, "step": 2242, "total_loss": 0.00994873046875 }, { "epoch": 0.92, "learning_rate": 0.00019587893307485352, "lm_loss": 0.0036163330078125, "loss": 0.0096, "step": 2243, "total_loss": 0.0036163330078125 }, { "epoch": 0.92, "learning_rate": 0.00019587528313065296, "lm_loss": 0.01177978515625, "loss": 0.0074, "step": 2244, "total_loss": 0.01177978515625 }, { "epoch": 0.92, "learning_rate": 0.00019587163160486392, "lm_loss": 0.00518798828125, "loss": 0.0076, "step": 2245, "total_loss": 0.00518798828125 }, { "epoch": 0.92, "learning_rate": 0.00019586797849754662, "lm_loss": 0.006011962890625, "loss": 0.0066, "step": 2246, "total_loss": 0.006011962890625 }, { "epoch": 0.92, "learning_rate": 0.0001958643238087613, "lm_loss": 0.0093994140625, "loss": 0.0091, "step": 2247, "total_loss": 0.0093994140625 }, { "epoch": 0.92, "learning_rate": 0.00019586066753856827, "lm_loss": 0.01116943359375, "loss": 0.0098, "step": 2248, "total_loss": 0.01116943359375 }, { "epoch": 0.92, "learning_rate": 0.00019585700968702784, "lm_loss": 0.004852294921875, "loss": 0.0076, "step": 2249, "total_loss": 0.004852294921875 }, { "epoch": 0.92, "learning_rate": 0.00019585335025420038, "lm_loss": 0.017822265625, "loss": 0.0072, "step": 2250, "total_loss": 0.017822265625 }, { "epoch": 0.92, "learning_rate": 0.00019584968924014619, "lm_loss": 0.00634765625, "loss": 0.0095, "step": 2251, "total_loss": 0.00634765625 }, { "epoch": 0.92, "learning_rate": 0.0001958460266449257, "lm_loss": 0.00640869140625, "loss": 0.0085, "step": 2252, "total_loss": 0.00640869140625 }, { "epoch": 0.92, "learning_rate": 0.00019584236246859938, "lm_loss": 0.013427734375, "loss": 0.0085, "step": 2253, "total_loss": 0.013427734375 }, { "epoch": 0.92, "learning_rate": 0.0001958386967112276, "lm_loss": 0.004119873046875, "loss": 0.0081, "step": 2254, "total_loss": 0.004119873046875 }, { "epoch": 0.92, "learning_rate": 0.00019583502937287086, "lm_loss": 0.008056640625, "loss": 0.0084, "step": 2255, "total_loss": 0.008056640625 }, { "epoch": 0.92, "learning_rate": 0.00019583136045358965, "lm_loss": 0.006256103515625, "loss": 0.0102, "step": 2256, "total_loss": 0.006256103515625 }, { "epoch": 0.92, "learning_rate": 0.0001958276899534445, "lm_loss": 0.004791259765625, "loss": 0.0079, "step": 2257, "total_loss": 0.004791259765625 }, { "epoch": 0.92, "learning_rate": 0.00019582401787249598, "lm_loss": 0.01202392578125, "loss": 0.0107, "step": 2258, "total_loss": 0.01202392578125 }, { "epoch": 0.92, "learning_rate": 0.00019582034421080464, "lm_loss": 0.007232666015625, "loss": 0.0074, "step": 2259, "total_loss": 0.007232666015625 }, { "epoch": 0.92, "learning_rate": 0.0001958166689684311, "lm_loss": 0.0036773681640625, "loss": 0.0085, "step": 2260, "total_loss": 0.0036773681640625 }, { "epoch": 0.92, "learning_rate": 0.00019581299214543595, "lm_loss": 0.00933837890625, "loss": 0.0084, "step": 2261, "total_loss": 0.00933837890625 }, { "epoch": 0.92, "learning_rate": 0.00019580931374187988, "lm_loss": 0.0101318359375, "loss": 0.0079, "step": 2262, "total_loss": 0.0101318359375 }, { "epoch": 0.93, "learning_rate": 0.00019580563375782358, "lm_loss": 0.0069580078125, "loss": 0.008, "step": 2263, "total_loss": 0.0069580078125 }, { "epoch": 0.93, "learning_rate": 0.0001958019521933277, "lm_loss": 0.006439208984375, "loss": 0.0077, "step": 2264, "total_loss": 0.006439208984375 }, { "epoch": 0.93, "learning_rate": 0.00019579826904845302, "lm_loss": 0.00750732421875, "loss": 0.0066, "step": 2265, "total_loss": 0.00750732421875 }, { "epoch": 0.93, "learning_rate": 0.0001957945843232603, "lm_loss": 0.016357421875, "loss": 0.0094, "step": 2266, "total_loss": 0.016357421875 }, { "epoch": 0.93, "learning_rate": 0.0001957908980178103, "lm_loss": 0.004241943359375, "loss": 0.009, "step": 2267, "total_loss": 0.004241943359375 }, { "epoch": 0.93, "learning_rate": 0.00019578721013216384, "lm_loss": 0.005584716796875, "loss": 0.0087, "step": 2268, "total_loss": 0.005584716796875 }, { "epoch": 0.93, "learning_rate": 0.00019578352066638175, "lm_loss": 0.0125732421875, "loss": 0.0098, "step": 2269, "total_loss": 0.0125732421875 }, { "epoch": 0.93, "learning_rate": 0.00019577982962052493, "lm_loss": 0.0052490234375, "loss": 0.007, "step": 2270, "total_loss": 0.0052490234375 }, { "epoch": 0.93, "learning_rate": 0.00019577613699465422, "lm_loss": 0.00726318359375, "loss": 0.0088, "step": 2271, "total_loss": 0.00726318359375 }, { "epoch": 0.93, "learning_rate": 0.00019577244278883057, "lm_loss": 0.01275634765625, "loss": 0.0075, "step": 2272, "total_loss": 0.01275634765625 }, { "epoch": 0.93, "learning_rate": 0.00019576874700311488, "lm_loss": 0.005462646484375, "loss": 0.0083, "step": 2273, "total_loss": 0.005462646484375 }, { "epoch": 0.93, "learning_rate": 0.00019576504963756818, "lm_loss": 0.00927734375, "loss": 0.0086, "step": 2274, "total_loss": 0.00927734375 }, { "epoch": 0.93, "learning_rate": 0.00019576135069225136, "lm_loss": 0.006622314453125, "loss": 0.0069, "step": 2275, "total_loss": 0.006622314453125 }, { "epoch": 0.93, "learning_rate": 0.00019575765016722555, "lm_loss": 0.00421142578125, "loss": 0.0082, "step": 2276, "total_loss": 0.00421142578125 }, { "epoch": 0.93, "learning_rate": 0.00019575394806255173, "lm_loss": 0.005523681640625, "loss": 0.0089, "step": 2277, "total_loss": 0.005523681640625 }, { "epoch": 0.93, "learning_rate": 0.00019575024437829103, "lm_loss": 0.0057373046875, "loss": 0.0068, "step": 2278, "total_loss": 0.0057373046875 }, { "epoch": 0.93, "learning_rate": 0.00019574653911450446, "lm_loss": 0.01251220703125, "loss": 0.0079, "step": 2279, "total_loss": 0.01251220703125 }, { "epoch": 0.93, "learning_rate": 0.00019574283227125321, "lm_loss": 0.011474609375, "loss": 0.0096, "step": 2280, "total_loss": 0.011474609375 }, { "epoch": 0.93, "learning_rate": 0.00019573912384859843, "lm_loss": 0.01068115234375, "loss": 0.008, "step": 2281, "total_loss": 0.01068115234375 }, { "epoch": 0.93, "learning_rate": 0.00019573541384660124, "lm_loss": 0.005889892578125, "loss": 0.0073, "step": 2282, "total_loss": 0.005889892578125 }, { "epoch": 0.93, "learning_rate": 0.0001957317022653229, "lm_loss": 0.006256103515625, "loss": 0.0078, "step": 2283, "total_loss": 0.006256103515625 }, { "epoch": 0.93, "learning_rate": 0.00019572798910482458, "lm_loss": 0.0120849609375, "loss": 0.0079, "step": 2284, "total_loss": 0.0120849609375 }, { "epoch": 0.93, "learning_rate": 0.0001957242743651676, "lm_loss": 0.0042724609375, "loss": 0.0085, "step": 2285, "total_loss": 0.0042724609375 }, { "epoch": 0.93, "learning_rate": 0.00019572055804641317, "lm_loss": 0.01031494140625, "loss": 0.0082, "step": 2286, "total_loss": 0.01031494140625 }, { "epoch": 0.93, "learning_rate": 0.00019571684014862268, "lm_loss": 0.001312255859375, "loss": 0.0089, "step": 2287, "total_loss": 0.001312255859375 }, { "epoch": 0.94, "learning_rate": 0.00019571312067185738, "lm_loss": 0.0033416748046875, "loss": 0.0102, "step": 2288, "total_loss": 0.0033416748046875 }, { "epoch": 0.94, "learning_rate": 0.00019570939961617866, "lm_loss": 0.01141357421875, "loss": 0.0094, "step": 2289, "total_loss": 0.01141357421875 }, { "epoch": 0.94, "learning_rate": 0.00019570567698164792, "lm_loss": 0.0091552734375, "loss": 0.0079, "step": 2290, "total_loss": 0.0091552734375 }, { "epoch": 0.94, "learning_rate": 0.00019570195276832656, "lm_loss": 0.00653076171875, "loss": 0.0058, "step": 2291, "total_loss": 0.00653076171875 }, { "epoch": 0.94, "learning_rate": 0.00019569822697627597, "lm_loss": 0.008544921875, "loss": 0.0074, "step": 2292, "total_loss": 0.008544921875 }, { "epoch": 0.94, "learning_rate": 0.0001956944996055577, "lm_loss": 0.0179443359375, "loss": 0.0081, "step": 2293, "total_loss": 0.0179443359375 }, { "epoch": 0.94, "learning_rate": 0.00019569077065623316, "lm_loss": 0.00958251953125, "loss": 0.0084, "step": 2294, "total_loss": 0.00958251953125 }, { "epoch": 0.94, "learning_rate": 0.00019568704012836393, "lm_loss": 0.01470947265625, "loss": 0.0085, "step": 2295, "total_loss": 0.01470947265625 }, { "epoch": 0.94, "learning_rate": 0.00019568330802201148, "lm_loss": 0.01007080078125, "loss": 0.0073, "step": 2296, "total_loss": 0.01007080078125 }, { "epoch": 0.94, "learning_rate": 0.0001956795743372374, "lm_loss": 0.007354736328125, "loss": 0.0061, "step": 2297, "total_loss": 0.007354736328125 }, { "epoch": 0.94, "learning_rate": 0.00019567583907410333, "lm_loss": 0.0026397705078125, "loss": 0.0085, "step": 2298, "total_loss": 0.0026397705078125 }, { "epoch": 0.94, "learning_rate": 0.0001956721022326708, "lm_loss": 0.007537841796875, "loss": 0.0057, "step": 2299, "total_loss": 0.007537841796875 }, { "epoch": 0.94, "learning_rate": 0.00019566836381300156, "lm_loss": 0.00836181640625, "loss": 0.007, "step": 2300, "total_loss": 0.00836181640625 }, { "epoch": 0.94, "eval_lm_loss": 0.00943505298346281, "eval_loss": 0.009795456193387508, "eval_runtime": 43.8801, "eval_samples_per_second": 22.789, "eval_steps_per_second": 0.205, "eval_total_loss": 0.00943505298346281, "lm_loss": 0.0010986328125, "step": 2300, "total_loss": 0.0010986328125 }, { "epoch": 0.94, "learning_rate": 0.00019566462381515718, "lm_loss": 0.004180908203125, "loss": 0.0064, "step": 2301, "total_loss": 0.004180908203125 }, { "epoch": 0.94, "learning_rate": 0.00019566088223919943, "lm_loss": 0.006103515625, "loss": 0.008, "step": 2302, "total_loss": 0.006103515625 }, { "epoch": 0.94, "learning_rate": 0.00019565713908518997, "lm_loss": 0.01116943359375, "loss": 0.0093, "step": 2303, "total_loss": 0.01116943359375 }, { "epoch": 0.94, "learning_rate": 0.00019565339435319062, "lm_loss": 0.00396728515625, "loss": 0.0089, "step": 2304, "total_loss": 0.00396728515625 }, { "epoch": 0.94, "learning_rate": 0.0001956496480432631, "lm_loss": 0.005523681640625, "loss": 0.01, "step": 2305, "total_loss": 0.005523681640625 }, { "epoch": 0.94, "learning_rate": 0.00019564590015546922, "lm_loss": 0.005889892578125, "loss": 0.0108, "step": 2306, "total_loss": 0.005889892578125 }, { "epoch": 0.94, "learning_rate": 0.0001956421506898708, "lm_loss": 0.00469970703125, "loss": 0.0064, "step": 2307, "total_loss": 0.00469970703125 }, { "epoch": 0.94, "learning_rate": 0.00019563839964652972, "lm_loss": 0.0074462890625, "loss": 0.0085, "step": 2308, "total_loss": 0.0074462890625 }, { "epoch": 0.94, "learning_rate": 0.00019563464702550786, "lm_loss": 0.007720947265625, "loss": 0.0097, "step": 2309, "total_loss": 0.007720947265625 }, { "epoch": 0.94, "learning_rate": 0.00019563089282686707, "lm_loss": 0.005096435546875, "loss": 0.0081, "step": 2310, "total_loss": 0.005096435546875 }, { "epoch": 0.94, "learning_rate": 0.00019562713705066932, "lm_loss": 0.0052490234375, "loss": 0.0076, "step": 2311, "total_loss": 0.0052490234375 }, { "epoch": 0.95, "learning_rate": 0.00019562337969697657, "lm_loss": 0.0118408203125, "loss": 0.0083, "step": 2312, "total_loss": 0.0118408203125 }, { "epoch": 0.95, "learning_rate": 0.00019561962076585082, "lm_loss": 0.003509521484375, "loss": 0.0077, "step": 2313, "total_loss": 0.003509521484375 }, { "epoch": 0.95, "learning_rate": 0.00019561586025735404, "lm_loss": 0.0025177001953125, "loss": 0.0089, "step": 2314, "total_loss": 0.0025177001953125 }, { "epoch": 0.95, "learning_rate": 0.00019561209817154827, "lm_loss": 0.002044677734375, "loss": 0.0074, "step": 2315, "total_loss": 0.002044677734375 }, { "epoch": 0.95, "learning_rate": 0.00019560833450849558, "lm_loss": 0.00634765625, "loss": 0.0079, "step": 2316, "total_loss": 0.00634765625 }, { "epoch": 0.95, "learning_rate": 0.00019560456926825804, "lm_loss": 0.0068359375, "loss": 0.0081, "step": 2317, "total_loss": 0.0068359375 }, { "epoch": 0.95, "learning_rate": 0.00019560080245089781, "lm_loss": 0.00439453125, "loss": 0.0081, "step": 2318, "total_loss": 0.00439453125 }, { "epoch": 0.95, "learning_rate": 0.000195597034056477, "lm_loss": 0.0035247802734375, "loss": 0.0076, "step": 2319, "total_loss": 0.0035247802734375 }, { "epoch": 0.95, "learning_rate": 0.00019559326408505773, "lm_loss": 0.01287841796875, "loss": 0.0096, "step": 2320, "total_loss": 0.01287841796875 }, { "epoch": 0.95, "learning_rate": 0.0001955894925367023, "lm_loss": 0.0103759765625, "loss": 0.0086, "step": 2321, "total_loss": 0.0103759765625 }, { "epoch": 0.95, "learning_rate": 0.0001955857194114728, "lm_loss": 0.006866455078125, "loss": 0.0082, "step": 2322, "total_loss": 0.006866455078125 }, { "epoch": 0.95, "learning_rate": 0.00019558194470943154, "lm_loss": 0.0086669921875, "loss": 0.0086, "step": 2323, "total_loss": 0.0086669921875 }, { "epoch": 0.95, "learning_rate": 0.00019557816843064082, "lm_loss": 0.002471923828125, "loss": 0.0078, "step": 2324, "total_loss": 0.002471923828125 }, { "epoch": 0.95, "learning_rate": 0.00019557439057516285, "lm_loss": 0.011962890625, "loss": 0.0085, "step": 2325, "total_loss": 0.011962890625 }, { "epoch": 0.95, "learning_rate": 0.00019557061114306, "lm_loss": 0.0040283203125, "loss": 0.0084, "step": 2326, "total_loss": 0.0040283203125 }, { "epoch": 0.95, "learning_rate": 0.00019556683013439465, "lm_loss": 0.007232666015625, "loss": 0.0082, "step": 2327, "total_loss": 0.007232666015625 }, { "epoch": 0.95, "learning_rate": 0.00019556304754922912, "lm_loss": 0.008056640625, "loss": 0.0072, "step": 2328, "total_loss": 0.008056640625 }, { "epoch": 0.95, "learning_rate": 0.0001955592633876258, "lm_loss": 0.00592041015625, "loss": 0.0087, "step": 2329, "total_loss": 0.00592041015625 }, { "epoch": 0.95, "learning_rate": 0.00019555547764964714, "lm_loss": 0.00860595703125, "loss": 0.0087, "step": 2330, "total_loss": 0.00860595703125 }, { "epoch": 0.95, "learning_rate": 0.00019555169033535564, "lm_loss": 0.008056640625, "loss": 0.0083, "step": 2331, "total_loss": 0.008056640625 }, { "epoch": 0.95, "learning_rate": 0.00019554790144481364, "lm_loss": 0.005889892578125, "loss": 0.0068, "step": 2332, "total_loss": 0.005889892578125 }, { "epoch": 0.95, "learning_rate": 0.00019554411097808382, "lm_loss": 0.002105712890625, "loss": 0.0065, "step": 2333, "total_loss": 0.002105712890625 }, { "epoch": 0.95, "learning_rate": 0.00019554031893522856, "lm_loss": 0.0118408203125, "loss": 0.0071, "step": 2334, "total_loss": 0.0118408203125 }, { "epoch": 0.95, "learning_rate": 0.00019553652531631048, "lm_loss": 0.0081787109375, "loss": 0.0073, "step": 2335, "total_loss": 0.0081787109375 }, { "epoch": 0.96, "learning_rate": 0.00019553273012139216, "lm_loss": 0.00567626953125, "loss": 0.0076, "step": 2336, "total_loss": 0.00567626953125 }, { "epoch": 0.96, "learning_rate": 0.00019552893335053622, "lm_loss": 0.0047607421875, "loss": 0.0083, "step": 2337, "total_loss": 0.0047607421875 }, { "epoch": 0.96, "learning_rate": 0.00019552513500380525, "lm_loss": 0.00151824951171875, "loss": 0.0085, "step": 2338, "total_loss": 0.00151824951171875 }, { "epoch": 0.96, "learning_rate": 0.0001955213350812619, "lm_loss": 0.01043701171875, "loss": 0.0087, "step": 2339, "total_loss": 0.01043701171875 }, { "epoch": 0.96, "learning_rate": 0.00019551753358296897, "lm_loss": 0.0089111328125, "loss": 0.0086, "step": 2340, "total_loss": 0.0089111328125 }, { "epoch": 0.96, "learning_rate": 0.000195513730508989, "lm_loss": 0.01177978515625, "loss": 0.0088, "step": 2341, "total_loss": 0.01177978515625 }, { "epoch": 0.96, "learning_rate": 0.00019550992585938486, "lm_loss": 0.01104736328125, "loss": 0.0078, "step": 2342, "total_loss": 0.01104736328125 }, { "epoch": 0.96, "learning_rate": 0.00019550611963421925, "lm_loss": 0.007171630859375, "loss": 0.0065, "step": 2343, "total_loss": 0.007171630859375 }, { "epoch": 0.96, "learning_rate": 0.00019550231183355498, "lm_loss": 0.0115966796875, "loss": 0.0088, "step": 2344, "total_loss": 0.0115966796875 }, { "epoch": 0.96, "learning_rate": 0.00019549850245745488, "lm_loss": 0.01416015625, "loss": 0.008, "step": 2345, "total_loss": 0.01416015625 }, { "epoch": 0.96, "learning_rate": 0.00019549469150598173, "lm_loss": 0.0029144287109375, "loss": 0.0076, "step": 2346, "total_loss": 0.0029144287109375 }, { "epoch": 0.96, "learning_rate": 0.00019549087897919844, "lm_loss": 0.00885009765625, "loss": 0.0093, "step": 2347, "total_loss": 0.00885009765625 }, { "epoch": 0.96, "learning_rate": 0.00019548706487716792, "lm_loss": 0.009765625, "loss": 0.0098, "step": 2348, "total_loss": 0.009765625 }, { "epoch": 0.96, "learning_rate": 0.00019548324919995306, "lm_loss": 0.00628662109375, "loss": 0.007, "step": 2349, "total_loss": 0.00628662109375 }, { "epoch": 0.96, "learning_rate": 0.00019547943194761684, "lm_loss": 0.00811767578125, "loss": 0.0086, "step": 2350, "total_loss": 0.00811767578125 }, { "epoch": 0.96, "learning_rate": 0.00019547561312022218, "lm_loss": 0.013916015625, "loss": 0.0093, "step": 2351, "total_loss": 0.013916015625 }, { "epoch": 0.96, "learning_rate": 0.0001954717927178321, "lm_loss": 0.0034637451171875, "loss": 0.0092, "step": 2352, "total_loss": 0.0034637451171875 }, { "epoch": 0.96, "learning_rate": 0.0001954679707405096, "lm_loss": 0.019775390625, "loss": 0.0099, "step": 2353, "total_loss": 0.019775390625 }, { "epoch": 0.96, "learning_rate": 0.00019546414718831775, "lm_loss": 0.004974365234375, "loss": 0.0085, "step": 2354, "total_loss": 0.004974365234375 }, { "epoch": 0.96, "learning_rate": 0.00019546032206131965, "lm_loss": 0.00531005859375, "loss": 0.0086, "step": 2355, "total_loss": 0.00531005859375 }, { "epoch": 0.96, "learning_rate": 0.00019545649535957838, "lm_loss": 0.0137939453125, "loss": 0.0074, "step": 2356, "total_loss": 0.0137939453125 }, { "epoch": 0.96, "learning_rate": 0.00019545266708315704, "lm_loss": 0.00872802734375, "loss": 0.0076, "step": 2357, "total_loss": 0.00872802734375 }, { "epoch": 0.96, "learning_rate": 0.00019544883723211881, "lm_loss": 0.0057373046875, "loss": 0.0075, "step": 2358, "total_loss": 0.0057373046875 }, { "epoch": 0.96, "learning_rate": 0.00019544500580652687, "lm_loss": 0.01080322265625, "loss": 0.0079, "step": 2359, "total_loss": 0.01080322265625 }, { "epoch": 0.96, "learning_rate": 0.00019544117280644442, "lm_loss": 0.00714111328125, "loss": 0.0076, "step": 2360, "total_loss": 0.00714111328125 }, { "epoch": 0.97, "learning_rate": 0.0001954373382319347, "lm_loss": 0.0054931640625, "loss": 0.0073, "step": 2361, "total_loss": 0.0054931640625 }, { "epoch": 0.97, "learning_rate": 0.00019543350208306093, "lm_loss": 0.006866455078125, "loss": 0.0091, "step": 2362, "total_loss": 0.006866455078125 }, { "epoch": 0.97, "learning_rate": 0.00019542966435988644, "lm_loss": 0.00494384765625, "loss": 0.0065, "step": 2363, "total_loss": 0.00494384765625 }, { "epoch": 0.97, "learning_rate": 0.0001954258250624745, "lm_loss": 0.00848388671875, "loss": 0.0075, "step": 2364, "total_loss": 0.00848388671875 }, { "epoch": 0.97, "learning_rate": 0.00019542198419088844, "lm_loss": 0.01019287109375, "loss": 0.0076, "step": 2365, "total_loss": 0.01019287109375 }, { "epoch": 0.97, "learning_rate": 0.00019541814174519164, "lm_loss": 0.004150390625, "loss": 0.0065, "step": 2366, "total_loss": 0.004150390625 }, { "epoch": 0.97, "learning_rate": 0.00019541429772544752, "lm_loss": 0.0125732421875, "loss": 0.0078, "step": 2367, "total_loss": 0.0125732421875 }, { "epoch": 0.97, "learning_rate": 0.00019541045213171943, "lm_loss": 0.0135498046875, "loss": 0.0086, "step": 2368, "total_loss": 0.0135498046875 }, { "epoch": 0.97, "learning_rate": 0.00019540660496407086, "lm_loss": 0.005889892578125, "loss": 0.0071, "step": 2369, "total_loss": 0.005889892578125 }, { "epoch": 0.97, "learning_rate": 0.00019540275622256524, "lm_loss": 0.007354736328125, "loss": 0.0089, "step": 2370, "total_loss": 0.007354736328125 }, { "epoch": 0.97, "learning_rate": 0.00019539890590726606, "lm_loss": 0.00640869140625, "loss": 0.0059, "step": 2371, "total_loss": 0.00640869140625 }, { "epoch": 0.97, "learning_rate": 0.00019539505401823683, "lm_loss": 0.0030059814453125, "loss": 0.0082, "step": 2372, "total_loss": 0.0030059814453125 }, { "epoch": 0.97, "learning_rate": 0.00019539120055554111, "lm_loss": 0.005645751953125, "loss": 0.0074, "step": 2373, "total_loss": 0.005645751953125 }, { "epoch": 0.97, "learning_rate": 0.0001953873455192425, "lm_loss": 0.006591796875, "loss": 0.0067, "step": 2374, "total_loss": 0.006591796875 }, { "epoch": 0.97, "learning_rate": 0.00019538348890940453, "lm_loss": 0.00494384765625, "loss": 0.0074, "step": 2375, "total_loss": 0.00494384765625 }, { "epoch": 0.97, "learning_rate": 0.00019537963072609088, "lm_loss": 0.0034332275390625, "loss": 0.01, "step": 2376, "total_loss": 0.0034332275390625 }, { "epoch": 0.97, "learning_rate": 0.00019537577096936514, "lm_loss": 0.00179290771484375, "loss": 0.0065, "step": 2377, "total_loss": 0.00179290771484375 }, { "epoch": 0.97, "learning_rate": 0.000195371909639291, "lm_loss": 0.006134033203125, "loss": 0.0078, "step": 2378, "total_loss": 0.006134033203125 }, { "epoch": 0.97, "learning_rate": 0.0001953680467359322, "lm_loss": 0.0036163330078125, "loss": 0.0053, "step": 2379, "total_loss": 0.0036163330078125 }, { "epoch": 0.97, "learning_rate": 0.00019536418225935237, "lm_loss": 0.0093994140625, "loss": 0.0082, "step": 2380, "total_loss": 0.0093994140625 }, { "epoch": 0.97, "learning_rate": 0.00019536031620961538, "lm_loss": 0.006805419921875, "loss": 0.008, "step": 2381, "total_loss": 0.006805419921875 }, { "epoch": 0.97, "learning_rate": 0.00019535644858678487, "lm_loss": 0.00921630859375, "loss": 0.0069, "step": 2382, "total_loss": 0.00921630859375 }, { "epoch": 0.97, "learning_rate": 0.00019535257939092476, "lm_loss": 0.0125732421875, "loss": 0.0103, "step": 2383, "total_loss": 0.0125732421875 }, { "epoch": 0.97, "learning_rate": 0.00019534870862209885, "lm_loss": 0.0078125, "loss": 0.0074, "step": 2384, "total_loss": 0.0078125 }, { "epoch": 0.98, "learning_rate": 0.00019534483628037098, "lm_loss": 0.005950927734375, "loss": 0.0076, "step": 2385, "total_loss": 0.005950927734375 }, { "epoch": 0.98, "learning_rate": 0.000195340962365805, "lm_loss": 0.0068359375, "loss": 0.0081, "step": 2386, "total_loss": 0.0068359375 }, { "epoch": 0.98, "learning_rate": 0.00019533708687846483, "lm_loss": 0.0037384033203125, "loss": 0.0066, "step": 2387, "total_loss": 0.0037384033203125 }, { "epoch": 0.98, "learning_rate": 0.00019533320981841441, "lm_loss": 0.013916015625, "loss": 0.0086, "step": 2388, "total_loss": 0.013916015625 }, { "epoch": 0.98, "learning_rate": 0.0001953293311857177, "lm_loss": 0.004180908203125, "loss": 0.0072, "step": 2389, "total_loss": 0.004180908203125 }, { "epoch": 0.98, "learning_rate": 0.0001953254509804387, "lm_loss": 0.0033416748046875, "loss": 0.0097, "step": 2390, "total_loss": 0.0033416748046875 }, { "epoch": 0.98, "learning_rate": 0.0001953215692026414, "lm_loss": 0.00927734375, "loss": 0.0098, "step": 2391, "total_loss": 0.00927734375 }, { "epoch": 0.98, "learning_rate": 0.00019531768585238983, "lm_loss": 0.01025390625, "loss": 0.0073, "step": 2392, "total_loss": 0.01025390625 }, { "epoch": 0.98, "learning_rate": 0.00019531380092974808, "lm_loss": 0.00274658203125, "loss": 0.0083, "step": 2393, "total_loss": 0.00274658203125 }, { "epoch": 0.98, "learning_rate": 0.00019530991443478019, "lm_loss": 0.0098876953125, "loss": 0.0095, "step": 2394, "total_loss": 0.0098876953125 }, { "epoch": 0.98, "learning_rate": 0.00019530602636755028, "lm_loss": 0.00482177734375, "loss": 0.0083, "step": 2395, "total_loss": 0.00482177734375 }, { "epoch": 0.98, "learning_rate": 0.0001953021367281225, "lm_loss": 0.004180908203125, "loss": 0.0072, "step": 2396, "total_loss": 0.004180908203125 }, { "epoch": 0.98, "learning_rate": 0.00019529824551656107, "lm_loss": 0.0048828125, "loss": 0.0081, "step": 2397, "total_loss": 0.0048828125 }, { "epoch": 0.98, "learning_rate": 0.0001952943527329301, "lm_loss": 0.0081787109375, "loss": 0.0076, "step": 2398, "total_loss": 0.0081787109375 }, { "epoch": 0.98, "learning_rate": 0.00019529045837729384, "lm_loss": 0.0167236328125, "loss": 0.007, "step": 2399, "total_loss": 0.0167236328125 }, { "epoch": 0.98, "learning_rate": 0.00019528656244971652, "lm_loss": 0.008544921875, "loss": 0.0101, "step": 2400, "total_loss": 0.008544921875 }, { "epoch": 0.98, "eval_lm_loss": 0.009607951156795025, "eval_loss": 0.009962158277630806, "eval_runtime": 43.878, "eval_samples_per_second": 22.79, "eval_steps_per_second": 0.205, "eval_total_loss": 0.009607951156795025, "lm_loss": 0.0012359619140625, "step": 2400, "total_loss": 0.0012359619140625 }, { "epoch": 0.98, "learning_rate": 0.0001952826649502624, "lm_loss": 0.0103759765625, "loss": 0.0084, "step": 2401, "total_loss": 0.0103759765625 }, { "epoch": 0.98, "learning_rate": 0.00019527876587899584, "lm_loss": 0.0034332275390625, "loss": 0.0073, "step": 2402, "total_loss": 0.0034332275390625 }, { "epoch": 0.98, "learning_rate": 0.00019527486523598112, "lm_loss": 0.005859375, "loss": 0.0073, "step": 2403, "total_loss": 0.005859375 }, { "epoch": 0.98, "learning_rate": 0.00019527096302128253, "lm_loss": 0.005462646484375, "loss": 0.0079, "step": 2404, "total_loss": 0.005462646484375 }, { "epoch": 0.98, "learning_rate": 0.00019526705923496453, "lm_loss": 0.0025177001953125, "loss": 0.0099, "step": 2405, "total_loss": 0.0025177001953125 }, { "epoch": 0.98, "learning_rate": 0.00019526315387709145, "lm_loss": 0.00250244140625, "loss": 0.0073, "step": 2406, "total_loss": 0.00250244140625 }, { "epoch": 0.98, "learning_rate": 0.00019525924694772776, "lm_loss": 0.0032196044921875, "loss": 0.007, "step": 2407, "total_loss": 0.0032196044921875 }, { "epoch": 0.98, "learning_rate": 0.00019525533844693787, "lm_loss": 0.01239013671875, "loss": 0.0089, "step": 2408, "total_loss": 0.01239013671875 }, { "epoch": 0.98, "learning_rate": 0.00019525142837478632, "lm_loss": 0.01336669921875, "loss": 0.0101, "step": 2409, "total_loss": 0.01336669921875 }, { "epoch": 0.99, "learning_rate": 0.00019524751673133755, "lm_loss": 0.010986328125, "loss": 0.0089, "step": 2410, "total_loss": 0.010986328125 }, { "epoch": 0.99, "learning_rate": 0.00019524360351665608, "lm_loss": 0.0062255859375, "loss": 0.0095, "step": 2411, "total_loss": 0.0062255859375 }, { "epoch": 0.99, "learning_rate": 0.00019523968873080654, "lm_loss": 0.0103759765625, "loss": 0.0077, "step": 2412, "total_loss": 0.0103759765625 }, { "epoch": 0.99, "learning_rate": 0.00019523577237385344, "lm_loss": 0.00579833984375, "loss": 0.0079, "step": 2413, "total_loss": 0.00579833984375 }, { "epoch": 0.99, "learning_rate": 0.0001952318544458614, "lm_loss": 0.0137939453125, "loss": 0.0091, "step": 2414, "total_loss": 0.0137939453125 }, { "epoch": 0.99, "learning_rate": 0.00019522793494689507, "lm_loss": 0.0152587890625, "loss": 0.0079, "step": 2415, "total_loss": 0.0152587890625 }, { "epoch": 0.99, "learning_rate": 0.0001952240138770191, "lm_loss": 0.0023193359375, "loss": 0.0084, "step": 2416, "total_loss": 0.0023193359375 }, { "epoch": 0.99, "learning_rate": 0.00019522009123629816, "lm_loss": 0.0120849609375, "loss": 0.01, "step": 2417, "total_loss": 0.0120849609375 }, { "epoch": 0.99, "learning_rate": 0.00019521616702479697, "lm_loss": 0.0078125, "loss": 0.0101, "step": 2418, "total_loss": 0.0078125 }, { "epoch": 0.99, "learning_rate": 0.00019521224124258027, "lm_loss": 0.00860595703125, "loss": 0.0072, "step": 2419, "total_loss": 0.00860595703125 }, { "epoch": 0.99, "learning_rate": 0.0001952083138897128, "lm_loss": 0.0125732421875, "loss": 0.0074, "step": 2420, "total_loss": 0.0125732421875 }, { "epoch": 0.99, "learning_rate": 0.00019520438496625935, "lm_loss": 0.002716064453125, "loss": 0.0084, "step": 2421, "total_loss": 0.002716064453125 }, { "epoch": 0.99, "learning_rate": 0.00019520045447228476, "lm_loss": 0.00151824951171875, "loss": 0.0095, "step": 2422, "total_loss": 0.00151824951171875 }, { "epoch": 0.99, "learning_rate": 0.00019519652240785384, "lm_loss": 0.005462646484375, "loss": 0.007, "step": 2423, "total_loss": 0.005462646484375 }, { "epoch": 0.99, "learning_rate": 0.00019519258877303148, "lm_loss": 0.01220703125, "loss": 0.0095, "step": 2424, "total_loss": 0.01220703125 }, { "epoch": 0.99, "learning_rate": 0.0001951886535678826, "lm_loss": 0.0072021484375, "loss": 0.0072, "step": 2425, "total_loss": 0.0072021484375 }, { "epoch": 0.99, "learning_rate": 0.000195184716792472, "lm_loss": 0.0067138671875, "loss": 0.0074, "step": 2426, "total_loss": 0.0067138671875 }, { "epoch": 0.99, "learning_rate": 0.0001951807784468647, "lm_loss": 0.0115966796875, "loss": 0.008, "step": 2427, "total_loss": 0.0115966796875 }, { "epoch": 0.99, "learning_rate": 0.00019517683853112572, "lm_loss": 0.007720947265625, "loss": 0.0085, "step": 2428, "total_loss": 0.007720947265625 }, { "epoch": 0.99, "learning_rate": 0.00019517289704531996, "lm_loss": 0.01556396484375, "loss": 0.0067, "step": 2429, "total_loss": 0.01556396484375 }, { "epoch": 0.99, "learning_rate": 0.00019516895398951248, "lm_loss": 0.01165771484375, "loss": 0.0065, "step": 2430, "total_loss": 0.01165771484375 }, { "epoch": 0.99, "learning_rate": 0.00019516500936376833, "lm_loss": 0.01202392578125, "loss": 0.0102, "step": 2431, "total_loss": 0.01202392578125 }, { "epoch": 0.99, "learning_rate": 0.0001951610631681526, "lm_loss": 0.009765625, "loss": 0.0095, "step": 2432, "total_loss": 0.009765625 }, { "epoch": 0.99, "learning_rate": 0.00019515711540273032, "lm_loss": 0.003997802734375, "loss": 0.0087, "step": 2433, "total_loss": 0.003997802734375 }, { "epoch": 1.0, "learning_rate": 0.00019515316606756667, "lm_loss": 0.00823974609375, "loss": 0.0095, "step": 2434, "total_loss": 0.00823974609375 }, { "epoch": 1.0, "learning_rate": 0.00019514921516272679, "lm_loss": 0.006256103515625, "loss": 0.0073, "step": 2435, "total_loss": 0.006256103515625 }, { "epoch": 1.0, "learning_rate": 0.00019514526268827584, "lm_loss": 0.005126953125, "loss": 0.0086, "step": 2436, "total_loss": 0.005126953125 }, { "epoch": 1.0, "learning_rate": 0.00019514130864427907, "lm_loss": 0.00677490234375, "loss": 0.0076, "step": 2437, "total_loss": 0.00677490234375 }, { "epoch": 1.0, "learning_rate": 0.00019513735303080165, "lm_loss": 0.01129150390625, "loss": 0.0073, "step": 2438, "total_loss": 0.01129150390625 }, { "epoch": 1.0, "learning_rate": 0.00019513339584790884, "lm_loss": 0.00592041015625, "loss": 0.0083, "step": 2439, "total_loss": 0.00592041015625 }, { "epoch": 1.0, "learning_rate": 0.00019512943709566594, "lm_loss": 0.009765625, "loss": 0.0092, "step": 2440, "total_loss": 0.009765625 }, { "epoch": 1.0, "learning_rate": 0.00019512547677413825, "lm_loss": 0.0048828125, "loss": 0.0078, "step": 2441, "total_loss": 0.0048828125 }, { "epoch": 1.0, "learning_rate": 0.0001951215148833911, "lm_loss": 0.00799560546875, "loss": 0.0071, "step": 2442, "total_loss": 0.00799560546875 }, { "epoch": 1.0, "learning_rate": 0.00019511755142348985, "lm_loss": 0.0048828125, "loss": 0.0076, "step": 2443, "total_loss": 0.0048828125 }, { "epoch": 1.0, "learning_rate": 0.00019511358639449987, "lm_loss": 0.01165771484375, "loss": 0.0088, "step": 2444, "total_loss": 0.01165771484375 }, { "epoch": 1.0, "learning_rate": 0.00019510961979648655, "lm_loss": 0.00787353515625, "loss": 0.0081, "step": 2445, "total_loss": 0.00787353515625 }, { "epoch": 1.0, "learning_rate": 0.00019510565162951537, "lm_loss": 0.01141357421875, "loss": 0.0102, "step": 2446, "total_loss": 0.01141357421875 }, { "epoch": 1.0, "learning_rate": 0.00019510168189365177, "lm_loss": 0.003997802734375, "loss": 0.009, "step": 2447, "total_loss": 0.003997802734375 }, { "epoch": 1.0, "learning_rate": 0.0001950977105889612, "lm_loss": 0.005340576171875, "loss": 0.0076, "step": 2448, "total_loss": 0.005340576171875 }, { "epoch": 1.0, "learning_rate": 0.00019509373771550928, "lm_loss": 0.0050048828125, "loss": 0.0077, "step": 2449, "total_loss": 0.0050048828125 }, { "epoch": 1.0, "learning_rate": 0.00019508976327336144, "lm_loss": 0.00885009765625, "loss": 0.0081, "step": 2450, "total_loss": 0.00885009765625 }, { "epoch": 1.0, "learning_rate": 0.00019508578726258326, "lm_loss": 0.004302978515625, "loss": 0.0082, "step": 2451, "total_loss": 0.004302978515625 }, { "epoch": 1.0, "learning_rate": 0.00019508180968324037, "lm_loss": 0.01031494140625, "loss": 0.0079, "step": 2452, "total_loss": 0.01031494140625 }, { "epoch": 1.0, "learning_rate": 0.00019507783053539837, "lm_loss": 0.01141357421875, "loss": 0.0079, "step": 2453, "total_loss": 0.01141357421875 }, { "epoch": 1.0, "learning_rate": 0.00019507384981912288, "lm_loss": 0.00787353515625, "loss": 0.0094, "step": 2454, "total_loss": 0.00787353515625 }, { "epoch": 1.0, "learning_rate": 0.00019506986753447958, "lm_loss": 0.0054931640625, "loss": 0.0075, "step": 2455, "total_loss": 0.0054931640625 }, { "epoch": 1.0, "learning_rate": 0.0001950658836815342, "lm_loss": 0.0047607421875, "loss": 0.0077, "step": 2456, "total_loss": 0.0047607421875 }, { "epoch": 1.0, "learning_rate": 0.00019506189826035234, "lm_loss": 0.00518798828125, "loss": 0.0067, "step": 2457, "total_loss": 0.00518798828125 }, { "epoch": 1.0, "learning_rate": 0.0001950579112709999, "lm_loss": 0.007720947265625, "loss": 0.0079, "step": 2458, "total_loss": 0.007720947265625 }, { "epoch": 1.01, "learning_rate": 0.00019505392271354258, "lm_loss": 0.00872802734375, "loss": 0.0081, "step": 2459, "total_loss": 0.00872802734375 }, { "epoch": 1.01, "learning_rate": 0.0001950499325880461, "lm_loss": 0.002777099609375, "loss": 0.0059, "step": 2460, "total_loss": 0.002777099609375 }, { "epoch": 1.01, "learning_rate": 0.00019504594089457644, "lm_loss": 0.0084228515625, "loss": 0.0074, "step": 2461, "total_loss": 0.0084228515625 }, { "epoch": 1.01, "learning_rate": 0.00019504194763319933, "lm_loss": 0.006591796875, "loss": 0.008, "step": 2462, "total_loss": 0.006591796875 }, { "epoch": 1.01, "learning_rate": 0.0001950379528039807, "lm_loss": 0.00982666015625, "loss": 0.0086, "step": 2463, "total_loss": 0.00982666015625 }, { "epoch": 1.01, "learning_rate": 0.00019503395640698643, "lm_loss": 0.00445556640625, "loss": 0.0077, "step": 2464, "total_loss": 0.00445556640625 }, { "epoch": 1.01, "learning_rate": 0.00019502995844228243, "lm_loss": 0.0078125, "loss": 0.0069, "step": 2465, "total_loss": 0.0078125 }, { "epoch": 1.01, "learning_rate": 0.00019502595890993468, "lm_loss": 0.00689697265625, "loss": 0.0079, "step": 2466, "total_loss": 0.00689697265625 }, { "epoch": 1.01, "learning_rate": 0.00019502195781000915, "lm_loss": 0.00811767578125, "loss": 0.0082, "step": 2467, "total_loss": 0.00811767578125 }, { "epoch": 1.01, "learning_rate": 0.0001950179551425718, "lm_loss": 0.013916015625, "loss": 0.0083, "step": 2468, "total_loss": 0.013916015625 }, { "epoch": 1.01, "learning_rate": 0.00019501395090768876, "lm_loss": 0.007080078125, "loss": 0.0106, "step": 2469, "total_loss": 0.007080078125 }, { "epoch": 1.01, "learning_rate": 0.00019500994510542597, "lm_loss": 0.005523681640625, "loss": 0.0074, "step": 2470, "total_loss": 0.005523681640625 }, { "epoch": 1.01, "learning_rate": 0.00019500593773584958, "lm_loss": 0.0031585693359375, "loss": 0.008, "step": 2471, "total_loss": 0.0031585693359375 }, { "epoch": 1.01, "learning_rate": 0.00019500192879902569, "lm_loss": 0.007720947265625, "loss": 0.009, "step": 2472, "total_loss": 0.007720947265625 }, { "epoch": 1.01, "learning_rate": 0.0001949979182950204, "lm_loss": 0.005859375, "loss": 0.0075, "step": 2473, "total_loss": 0.005859375 }, { "epoch": 1.01, "learning_rate": 0.0001949939062238999, "lm_loss": 0.009033203125, "loss": 0.0077, "step": 2474, "total_loss": 0.009033203125 }, { "epoch": 1.01, "learning_rate": 0.00019498989258573038, "lm_loss": 0.0101318359375, "loss": 0.0075, "step": 2475, "total_loss": 0.0101318359375 }, { "epoch": 1.01, "learning_rate": 0.00019498587738057803, "lm_loss": 0.004486083984375, "loss": 0.0109, "step": 2476, "total_loss": 0.004486083984375 }, { "epoch": 1.01, "learning_rate": 0.00019498186060850912, "lm_loss": 0.0096435546875, "loss": 0.0073, "step": 2477, "total_loss": 0.0096435546875 }, { "epoch": 1.01, "learning_rate": 0.00019497784226958986, "lm_loss": 0.00396728515625, "loss": 0.0085, "step": 2478, "total_loss": 0.00396728515625 }, { "epoch": 1.01, "learning_rate": 0.00019497382236388655, "lm_loss": 0.005340576171875, "loss": 0.0064, "step": 2479, "total_loss": 0.005340576171875 }, { "epoch": 1.01, "learning_rate": 0.0001949698008914655, "lm_loss": 0.0113525390625, "loss": 0.0072, "step": 2480, "total_loss": 0.0113525390625 }, { "epoch": 1.01, "learning_rate": 0.0001949657778523931, "lm_loss": 0.0057373046875, "loss": 0.0072, "step": 2481, "total_loss": 0.0057373046875 }, { "epoch": 1.01, "learning_rate": 0.0001949617532467357, "lm_loss": 0.0034027099609375, "loss": 0.0066, "step": 2482, "total_loss": 0.0034027099609375 }, { "epoch": 1.02, "learning_rate": 0.00019495772707455963, "lm_loss": 0.01141357421875, "loss": 0.0082, "step": 2483, "total_loss": 0.01141357421875 }, { "epoch": 1.02, "learning_rate": 0.00019495369933593138, "lm_loss": 0.0108642578125, "loss": 0.0071, "step": 2484, "total_loss": 0.0108642578125 }, { "epoch": 1.02, "learning_rate": 0.00019494967003091734, "lm_loss": 0.00994873046875, "loss": 0.0086, "step": 2485, "total_loss": 0.00994873046875 }, { "epoch": 1.02, "learning_rate": 0.00019494563915958398, "lm_loss": 0.01416015625, "loss": 0.0091, "step": 2486, "total_loss": 0.01416015625 }, { "epoch": 1.02, "learning_rate": 0.00019494160672199783, "lm_loss": 0.004730224609375, "loss": 0.0085, "step": 2487, "total_loss": 0.004730224609375 }, { "epoch": 1.02, "learning_rate": 0.00019493757271822545, "lm_loss": 0.0079345703125, "loss": 0.0083, "step": 2488, "total_loss": 0.0079345703125 }, { "epoch": 1.02, "learning_rate": 0.00019493353714833327, "lm_loss": 0.00897216796875, "loss": 0.006, "step": 2489, "total_loss": 0.00897216796875 }, { "epoch": 1.02, "learning_rate": 0.00019492950001238796, "lm_loss": 0.0072021484375, "loss": 0.0076, "step": 2490, "total_loss": 0.0072021484375 }, { "epoch": 1.02, "learning_rate": 0.00019492546131045604, "lm_loss": 0.00738525390625, "loss": 0.0076, "step": 2491, "total_loss": 0.00738525390625 }, { "epoch": 1.02, "learning_rate": 0.0001949214210426042, "lm_loss": 0.01220703125, "loss": 0.0074, "step": 2492, "total_loss": 0.01220703125 }, { "epoch": 1.02, "learning_rate": 0.00019491737920889905, "lm_loss": 0.0142822265625, "loss": 0.0079, "step": 2493, "total_loss": 0.0142822265625 }, { "epoch": 1.02, "learning_rate": 0.00019491333580940728, "lm_loss": 0.004425048828125, "loss": 0.0077, "step": 2494, "total_loss": 0.004425048828125 }, { "epoch": 1.02, "learning_rate": 0.00019490929084419562, "lm_loss": 0.00860595703125, "loss": 0.0087, "step": 2495, "total_loss": 0.00860595703125 }, { "epoch": 1.02, "learning_rate": 0.00019490524431333076, "lm_loss": 0.004638671875, "loss": 0.0076, "step": 2496, "total_loss": 0.004638671875 }, { "epoch": 1.02, "learning_rate": 0.00019490119621687942, "lm_loss": 0.0128173828125, "loss": 0.0071, "step": 2497, "total_loss": 0.0128173828125 }, { "epoch": 1.02, "learning_rate": 0.00019489714655490846, "lm_loss": 0.003814697265625, "loss": 0.006, "step": 2498, "total_loss": 0.003814697265625 }, { "epoch": 1.02, "learning_rate": 0.00019489309532748464, "lm_loss": 0.01397705078125, "loss": 0.007, "step": 2499, "total_loss": 0.01397705078125 }, { "epoch": 1.02, "learning_rate": 0.00019488904253467477, "lm_loss": 0.00799560546875, "loss": 0.0084, "step": 2500, "total_loss": 0.00799560546875 }, { "epoch": 1.02, "eval_lm_loss": 0.00966216903179884, "eval_loss": 0.01002834364771843, "eval_runtime": 43.908, "eval_samples_per_second": 22.775, "eval_steps_per_second": 0.205, "eval_total_loss": 0.00966216903179884, "lm_loss": 0.001007080078125, "step": 2500, "total_loss": 0.001007080078125 }, { "epoch": 1.02, "learning_rate": 0.00019488498817654576, "lm_loss": 0.00872802734375, "loss": 0.0072, "step": 2501, "total_loss": 0.00872802734375 }, { "epoch": 1.02, "learning_rate": 0.00019488093225316443, "lm_loss": 0.007415771484375, "loss": 0.0076, "step": 2502, "total_loss": 0.007415771484375 }, { "epoch": 1.02, "learning_rate": 0.00019487687476459775, "lm_loss": 0.00537109375, "loss": 0.0076, "step": 2503, "total_loss": 0.00537109375 }, { "epoch": 1.02, "learning_rate": 0.00019487281571091262, "lm_loss": 0.01251220703125, "loss": 0.0078, "step": 2504, "total_loss": 0.01251220703125 }, { "epoch": 1.02, "learning_rate": 0.000194868755092176, "lm_loss": 0.005767822265625, "loss": 0.0075, "step": 2505, "total_loss": 0.005767822265625 }, { "epoch": 1.02, "learning_rate": 0.00019486469290845487, "lm_loss": 0.01080322265625, "loss": 0.008, "step": 2506, "total_loss": 0.01080322265625 }, { "epoch": 1.02, "learning_rate": 0.00019486062915981625, "lm_loss": 0.00162506103515625, "loss": 0.0072, "step": 2507, "total_loss": 0.00162506103515625 }, { "epoch": 1.03, "learning_rate": 0.0001948565638463272, "lm_loss": 0.008056640625, "loss": 0.0091, "step": 2508, "total_loss": 0.008056640625 }, { "epoch": 1.03, "learning_rate": 0.00019485249696805473, "lm_loss": 0.0079345703125, "loss": 0.0085, "step": 2509, "total_loss": 0.0079345703125 }, { "epoch": 1.03, "learning_rate": 0.00019484842852506598, "lm_loss": 0.007537841796875, "loss": 0.0062, "step": 2510, "total_loss": 0.007537841796875 }, { "epoch": 1.03, "learning_rate": 0.000194844358517428, "lm_loss": 0.00518798828125, "loss": 0.0069, "step": 2511, "total_loss": 0.00518798828125 }, { "epoch": 1.03, "learning_rate": 0.000194840286945208, "lm_loss": 0.007537841796875, "loss": 0.0095, "step": 2512, "total_loss": 0.007537841796875 }, { "epoch": 1.03, "learning_rate": 0.0001948362138084731, "lm_loss": 0.010009765625, "loss": 0.0091, "step": 2513, "total_loss": 0.010009765625 }, { "epoch": 1.03, "learning_rate": 0.00019483213910729058, "lm_loss": 0.0062255859375, "loss": 0.0072, "step": 2514, "total_loss": 0.0062255859375 }, { "epoch": 1.03, "learning_rate": 0.00019482806284172753, "lm_loss": 0.00836181640625, "loss": 0.007, "step": 2515, "total_loss": 0.00836181640625 }, { "epoch": 1.03, "learning_rate": 0.00019482398501185122, "lm_loss": 0.00543212890625, "loss": 0.0082, "step": 2516, "total_loss": 0.00543212890625 }, { "epoch": 1.03, "learning_rate": 0.000194819905617729, "lm_loss": 0.00457763671875, "loss": 0.008, "step": 2517, "total_loss": 0.00457763671875 }, { "epoch": 1.03, "learning_rate": 0.0001948158246594281, "lm_loss": 0.006378173828125, "loss": 0.0068, "step": 2518, "total_loss": 0.006378173828125 }, { "epoch": 1.03, "learning_rate": 0.00019481174213701585, "lm_loss": 0.0107421875, "loss": 0.0086, "step": 2519, "total_loss": 0.0107421875 }, { "epoch": 1.03, "learning_rate": 0.0001948076580505596, "lm_loss": 0.0128173828125, "loss": 0.0082, "step": 2520, "total_loss": 0.0128173828125 }, { "epoch": 1.03, "learning_rate": 0.00019480357240012674, "lm_loss": 0.00518798828125, "loss": 0.0078, "step": 2521, "total_loss": 0.00518798828125 }, { "epoch": 1.03, "learning_rate": 0.00019479948518578465, "lm_loss": 0.004364013671875, "loss": 0.0074, "step": 2522, "total_loss": 0.004364013671875 }, { "epoch": 1.03, "learning_rate": 0.00019479539640760075, "lm_loss": 0.004638671875, "loss": 0.0075, "step": 2523, "total_loss": 0.004638671875 }, { "epoch": 1.03, "learning_rate": 0.00019479130606564246, "lm_loss": 0.013916015625, "loss": 0.0103, "step": 2524, "total_loss": 0.013916015625 }, { "epoch": 1.03, "learning_rate": 0.00019478721415997735, "lm_loss": 0.01104736328125, "loss": 0.0093, "step": 2525, "total_loss": 0.01104736328125 }, { "epoch": 1.03, "learning_rate": 0.0001947831206906728, "lm_loss": 0.0079345703125, "loss": 0.0086, "step": 2526, "total_loss": 0.0079345703125 }, { "epoch": 1.03, "learning_rate": 0.00019477902565779646, "lm_loss": 0.0038604736328125, "loss": 0.0063, "step": 2527, "total_loss": 0.0038604736328125 }, { "epoch": 1.03, "learning_rate": 0.00019477492906141581, "lm_loss": 0.00994873046875, "loss": 0.0085, "step": 2528, "total_loss": 0.00994873046875 }, { "epoch": 1.03, "learning_rate": 0.00019477083090159845, "lm_loss": 0.00885009765625, "loss": 0.0091, "step": 2529, "total_loss": 0.00885009765625 }, { "epoch": 1.03, "learning_rate": 0.00019476673117841195, "lm_loss": 0.005035400390625, "loss": 0.0103, "step": 2530, "total_loss": 0.005035400390625 }, { "epoch": 1.03, "learning_rate": 0.00019476262989192398, "lm_loss": 0.0166015625, "loss": 0.0073, "step": 2531, "total_loss": 0.0166015625 }, { "epoch": 1.04, "learning_rate": 0.00019475852704220215, "lm_loss": 0.006591796875, "loss": 0.0067, "step": 2532, "total_loss": 0.006591796875 }, { "epoch": 1.04, "learning_rate": 0.00019475442262931422, "lm_loss": 0.00176239013671875, "loss": 0.0068, "step": 2533, "total_loss": 0.00176239013671875 }, { "epoch": 1.04, "learning_rate": 0.00019475031665332784, "lm_loss": 0.00341796875, "loss": 0.0089, "step": 2534, "total_loss": 0.00341796875 }, { "epoch": 1.04, "learning_rate": 0.00019474620911431076, "lm_loss": 0.006317138671875, "loss": 0.0072, "step": 2535, "total_loss": 0.006317138671875 }, { "epoch": 1.04, "learning_rate": 0.00019474210001233072, "lm_loss": 0.00592041015625, "loss": 0.0087, "step": 2536, "total_loss": 0.00592041015625 }, { "epoch": 1.04, "learning_rate": 0.0001947379893474555, "lm_loss": 0.0067138671875, "loss": 0.0076, "step": 2537, "total_loss": 0.0067138671875 }, { "epoch": 1.04, "learning_rate": 0.00019473387711975296, "lm_loss": 0.01031494140625, "loss": 0.0103, "step": 2538, "total_loss": 0.01031494140625 }, { "epoch": 1.04, "learning_rate": 0.00019472976332929093, "lm_loss": 0.01544189453125, "loss": 0.0078, "step": 2539, "total_loss": 0.01544189453125 }, { "epoch": 1.04, "learning_rate": 0.00019472564797613721, "lm_loss": 0.0087890625, "loss": 0.0064, "step": 2540, "total_loss": 0.0087890625 }, { "epoch": 1.04, "learning_rate": 0.00019472153106035974, "lm_loss": 0.01031494140625, "loss": 0.0074, "step": 2541, "total_loss": 0.01031494140625 }, { "epoch": 1.04, "learning_rate": 0.00019471741258202642, "lm_loss": 0.006103515625, "loss": 0.0071, "step": 2542, "total_loss": 0.006103515625 }, { "epoch": 1.04, "learning_rate": 0.0001947132925412052, "lm_loss": 0.01025390625, "loss": 0.0079, "step": 2543, "total_loss": 0.01025390625 }, { "epoch": 1.04, "learning_rate": 0.00019470917093796403, "lm_loss": 0.0052490234375, "loss": 0.0068, "step": 2544, "total_loss": 0.0052490234375 }, { "epoch": 1.04, "learning_rate": 0.0001947050477723709, "lm_loss": 0.0042724609375, "loss": 0.0081, "step": 2545, "total_loss": 0.0042724609375 }, { "epoch": 1.04, "learning_rate": 0.00019470092304449386, "lm_loss": 0.00592041015625, "loss": 0.0077, "step": 2546, "total_loss": 0.00592041015625 }, { "epoch": 1.04, "learning_rate": 0.0001946967967544009, "lm_loss": 0.0084228515625, "loss": 0.0096, "step": 2547, "total_loss": 0.0084228515625 }, { "epoch": 1.04, "learning_rate": 0.00019469266890216014, "lm_loss": 0.00848388671875, "loss": 0.0064, "step": 2548, "total_loss": 0.00848388671875 }, { "epoch": 1.04, "learning_rate": 0.00019468853948783962, "lm_loss": 0.006103515625, "loss": 0.0094, "step": 2549, "total_loss": 0.006103515625 }, { "epoch": 1.04, "learning_rate": 0.00019468440851150753, "lm_loss": 0.0101318359375, "loss": 0.0067, "step": 2550, "total_loss": 0.0101318359375 }, { "epoch": 1.04, "learning_rate": 0.00019468027597323198, "lm_loss": 0.0068359375, "loss": 0.0081, "step": 2551, "total_loss": 0.0068359375 }, { "epoch": 1.04, "learning_rate": 0.0001946761418730811, "lm_loss": 0.0015869140625, "loss": 0.0094, "step": 2552, "total_loss": 0.0015869140625 }, { "epoch": 1.04, "learning_rate": 0.00019467200621112312, "lm_loss": 0.008544921875, "loss": 0.0072, "step": 2553, "total_loss": 0.008544921875 }, { "epoch": 1.04, "learning_rate": 0.0001946678689874263, "lm_loss": 0.00677490234375, "loss": 0.0066, "step": 2554, "total_loss": 0.00677490234375 }, { "epoch": 1.04, "learning_rate": 0.00019466373020205887, "lm_loss": 0.00860595703125, "loss": 0.0088, "step": 2555, "total_loss": 0.00860595703125 }, { "epoch": 1.04, "learning_rate": 0.00019465958985508906, "lm_loss": 0.00653076171875, "loss": 0.0089, "step": 2556, "total_loss": 0.00653076171875 }, { "epoch": 1.05, "learning_rate": 0.00019465544794658522, "lm_loss": 0.00787353515625, "loss": 0.0091, "step": 2557, "total_loss": 0.00787353515625 }, { "epoch": 1.05, "learning_rate": 0.00019465130447661566, "lm_loss": 0.0101318359375, "loss": 0.0095, "step": 2558, "total_loss": 0.0101318359375 }, { "epoch": 1.05, "learning_rate": 0.00019464715944524872, "lm_loss": 0.00555419921875, "loss": 0.0085, "step": 2559, "total_loss": 0.00555419921875 }, { "epoch": 1.05, "learning_rate": 0.0001946430128525528, "lm_loss": 0.0029754638671875, "loss": 0.0079, "step": 2560, "total_loss": 0.0029754638671875 }, { "epoch": 1.05, "learning_rate": 0.0001946388646985963, "lm_loss": 0.005157470703125, "loss": 0.0094, "step": 2561, "total_loss": 0.005157470703125 }, { "epoch": 1.05, "learning_rate": 0.00019463471498344762, "lm_loss": 0.006683349609375, "loss": 0.0071, "step": 2562, "total_loss": 0.006683349609375 }, { "epoch": 1.05, "learning_rate": 0.00019463056370717523, "lm_loss": 0.005828857421875, "loss": 0.0069, "step": 2563, "total_loss": 0.005828857421875 }, { "epoch": 1.05, "learning_rate": 0.00019462641086984763, "lm_loss": 0.0108642578125, "loss": 0.0074, "step": 2564, "total_loss": 0.0108642578125 }, { "epoch": 1.05, "learning_rate": 0.0001946222564715333, "lm_loss": 0.00482177734375, "loss": 0.008, "step": 2565, "total_loss": 0.00482177734375 }, { "epoch": 1.05, "learning_rate": 0.00019461810051230082, "lm_loss": 0.01068115234375, "loss": 0.0075, "step": 2566, "total_loss": 0.01068115234375 }, { "epoch": 1.05, "learning_rate": 0.00019461394299221868, "lm_loss": 0.005462646484375, "loss": 0.0078, "step": 2567, "total_loss": 0.005462646484375 }, { "epoch": 1.05, "learning_rate": 0.00019460978391135553, "lm_loss": 0.00628662109375, "loss": 0.0087, "step": 2568, "total_loss": 0.00628662109375 }, { "epoch": 1.05, "learning_rate": 0.00019460562326977993, "lm_loss": 0.0037994384765625, "loss": 0.0077, "step": 2569, "total_loss": 0.0037994384765625 }, { "epoch": 1.05, "learning_rate": 0.00019460146106756054, "lm_loss": 0.0130615234375, "loss": 0.0059, "step": 2570, "total_loss": 0.0130615234375 }, { "epoch": 1.05, "learning_rate": 0.000194597297304766, "lm_loss": 0.0115966796875, "loss": 0.0079, "step": 2571, "total_loss": 0.0115966796875 }, { "epoch": 1.05, "learning_rate": 0.00019459313198146502, "lm_loss": 0.00836181640625, "loss": 0.0093, "step": 2572, "total_loss": 0.00836181640625 }, { "epoch": 1.05, "learning_rate": 0.0001945889650977263, "lm_loss": 0.010986328125, "loss": 0.0077, "step": 2573, "total_loss": 0.010986328125 }, { "epoch": 1.05, "learning_rate": 0.0001945847966536186, "lm_loss": 0.007781982421875, "loss": 0.0097, "step": 2574, "total_loss": 0.007781982421875 }, { "epoch": 1.05, "learning_rate": 0.00019458062664921063, "lm_loss": 0.00665283203125, "loss": 0.007, "step": 2575, "total_loss": 0.00665283203125 }, { "epoch": 1.05, "learning_rate": 0.00019457645508457125, "lm_loss": 0.006805419921875, "loss": 0.0069, "step": 2576, "total_loss": 0.006805419921875 }, { "epoch": 1.05, "learning_rate": 0.00019457228195976923, "lm_loss": 0.0220947265625, "loss": 0.0093, "step": 2577, "total_loss": 0.0220947265625 }, { "epoch": 1.05, "learning_rate": 0.00019456810727487342, "lm_loss": 0.01190185546875, "loss": 0.0082, "step": 2578, "total_loss": 0.01190185546875 }, { "epoch": 1.05, "learning_rate": 0.0001945639310299527, "lm_loss": 0.006378173828125, "loss": 0.0073, "step": 2579, "total_loss": 0.006378173828125 }, { "epoch": 1.05, "learning_rate": 0.00019455975322507592, "lm_loss": 0.004241943359375, "loss": 0.0076, "step": 2580, "total_loss": 0.004241943359375 }, { "epoch": 1.06, "learning_rate": 0.00019455557386031204, "lm_loss": 0.005035400390625, "loss": 0.0097, "step": 2581, "total_loss": 0.005035400390625 }, { "epoch": 1.06, "learning_rate": 0.00019455139293573, "lm_loss": 0.00396728515625, "loss": 0.0063, "step": 2582, "total_loss": 0.00396728515625 }, { "epoch": 1.06, "learning_rate": 0.00019454721045139873, "lm_loss": 0.00958251953125, "loss": 0.0099, "step": 2583, "total_loss": 0.00958251953125 }, { "epoch": 1.06, "learning_rate": 0.0001945430264073873, "lm_loss": 0.012939453125, "loss": 0.0077, "step": 2584, "total_loss": 0.012939453125 }, { "epoch": 1.06, "learning_rate": 0.00019453884080376468, "lm_loss": 0.004608154296875, "loss": 0.0081, "step": 2585, "total_loss": 0.004608154296875 }, { "epoch": 1.06, "learning_rate": 0.00019453465364059996, "lm_loss": 0.0091552734375, "loss": 0.0096, "step": 2586, "total_loss": 0.0091552734375 }, { "epoch": 1.06, "learning_rate": 0.00019453046491796214, "lm_loss": 0.00726318359375, "loss": 0.0074, "step": 2587, "total_loss": 0.00726318359375 }, { "epoch": 1.06, "learning_rate": 0.00019452627463592036, "lm_loss": 0.007293701171875, "loss": 0.0077, "step": 2588, "total_loss": 0.007293701171875 }, { "epoch": 1.06, "learning_rate": 0.00019452208279454374, "lm_loss": 0.004791259765625, "loss": 0.0077, "step": 2589, "total_loss": 0.004791259765625 }, { "epoch": 1.06, "learning_rate": 0.00019451788939390144, "lm_loss": 0.005950927734375, "loss": 0.0083, "step": 2590, "total_loss": 0.005950927734375 }, { "epoch": 1.06, "learning_rate": 0.00019451369443406263, "lm_loss": 0.01025390625, "loss": 0.0075, "step": 2591, "total_loss": 0.01025390625 }, { "epoch": 1.06, "learning_rate": 0.0001945094979150965, "lm_loss": 0.01007080078125, "loss": 0.0087, "step": 2592, "total_loss": 0.01007080078125 }, { "epoch": 1.06, "learning_rate": 0.00019450529983707228, "lm_loss": 0.01031494140625, "loss": 0.0072, "step": 2593, "total_loss": 0.01031494140625 }, { "epoch": 1.06, "learning_rate": 0.00019450110020005924, "lm_loss": 0.004852294921875, "loss": 0.0116, "step": 2594, "total_loss": 0.004852294921875 }, { "epoch": 1.06, "learning_rate": 0.00019449689900412665, "lm_loss": 0.0030670166015625, "loss": 0.0073, "step": 2595, "total_loss": 0.0030670166015625 }, { "epoch": 1.06, "learning_rate": 0.00019449269624934382, "lm_loss": 0.005279541015625, "loss": 0.0066, "step": 2596, "total_loss": 0.005279541015625 }, { "epoch": 1.06, "learning_rate": 0.00019448849193578007, "lm_loss": 0.005584716796875, "loss": 0.0073, "step": 2597, "total_loss": 0.005584716796875 }, { "epoch": 1.06, "learning_rate": 0.00019448428606350476, "lm_loss": 0.00579833984375, "loss": 0.0086, "step": 2598, "total_loss": 0.00579833984375 }, { "epoch": 1.06, "learning_rate": 0.00019448007863258723, "lm_loss": 0.00634765625, "loss": 0.0066, "step": 2599, "total_loss": 0.00634765625 }, { "epoch": 1.06, "learning_rate": 0.00019447586964309695, "lm_loss": 0.00933837890625, "loss": 0.0093, "step": 2600, "total_loss": 0.00933837890625 }, { "epoch": 1.06, "eval_lm_loss": 0.009916252456605434, "eval_loss": 0.010305652394890785, "eval_runtime": 43.8981, "eval_samples_per_second": 22.78, "eval_steps_per_second": 0.205, "eval_total_loss": 0.009916252456605434, "lm_loss": 0.0018463134765625, "step": 2600, "total_loss": 0.0018463134765625 }, { "epoch": 1.06, "learning_rate": 0.00019447165909510335, "lm_loss": 0.0120849609375, "loss": 0.0085, "step": 2601, "total_loss": 0.0120849609375 }, { "epoch": 1.06, "learning_rate": 0.00019446744698867587, "lm_loss": 0.0057373046875, "loss": 0.0065, "step": 2602, "total_loss": 0.0057373046875 }, { "epoch": 1.06, "learning_rate": 0.00019446323332388397, "lm_loss": 0.004913330078125, "loss": 0.0087, "step": 2603, "total_loss": 0.004913330078125 }, { "epoch": 1.06, "learning_rate": 0.0001944590181007972, "lm_loss": 0.00628662109375, "loss": 0.009, "step": 2604, "total_loss": 0.00628662109375 }, { "epoch": 1.07, "learning_rate": 0.00019445480131948504, "lm_loss": 0.01123046875, "loss": 0.0071, "step": 2605, "total_loss": 0.01123046875 }, { "epoch": 1.07, "learning_rate": 0.00019445058298001714, "lm_loss": 0.0089111328125, "loss": 0.008, "step": 2606, "total_loss": 0.0089111328125 }, { "epoch": 1.07, "learning_rate": 0.00019444636308246297, "lm_loss": 0.006744384765625, "loss": 0.0115, "step": 2607, "total_loss": 0.006744384765625 }, { "epoch": 1.07, "learning_rate": 0.00019444214162689228, "lm_loss": 0.006195068359375, "loss": 0.0087, "step": 2608, "total_loss": 0.006195068359375 }, { "epoch": 1.07, "learning_rate": 0.0001944379186133746, "lm_loss": 0.005828857421875, "loss": 0.0075, "step": 2609, "total_loss": 0.005828857421875 }, { "epoch": 1.07, "learning_rate": 0.0001944336940419796, "lm_loss": 0.0115966796875, "loss": 0.0101, "step": 2610, "total_loss": 0.0115966796875 }, { "epoch": 1.07, "learning_rate": 0.00019442946791277708, "lm_loss": 0.008056640625, "loss": 0.0071, "step": 2611, "total_loss": 0.008056640625 }, { "epoch": 1.07, "learning_rate": 0.00019442524022583661, "lm_loss": 0.00885009765625, "loss": 0.009, "step": 2612, "total_loss": 0.00885009765625 }, { "epoch": 1.07, "learning_rate": 0.000194421010981228, "lm_loss": 0.00811767578125, "loss": 0.007, "step": 2613, "total_loss": 0.00811767578125 }, { "epoch": 1.07, "learning_rate": 0.00019441678017902104, "lm_loss": 0.012939453125, "loss": 0.0104, "step": 2614, "total_loss": 0.012939453125 }, { "epoch": 1.07, "learning_rate": 0.0001944125478192855, "lm_loss": 0.00640869140625, "loss": 0.008, "step": 2615, "total_loss": 0.00640869140625 }, { "epoch": 1.07, "learning_rate": 0.00019440831390209117, "lm_loss": 0.0093994140625, "loss": 0.0107, "step": 2616, "total_loss": 0.0093994140625 }, { "epoch": 1.07, "learning_rate": 0.00019440407842750794, "lm_loss": 0.005706787109375, "loss": 0.0073, "step": 2617, "total_loss": 0.005706787109375 }, { "epoch": 1.07, "learning_rate": 0.0001943998413956057, "lm_loss": 0.0093994140625, "loss": 0.0087, "step": 2618, "total_loss": 0.0093994140625 }, { "epoch": 1.07, "learning_rate": 0.00019439560280645423, "lm_loss": 0.0076904296875, "loss": 0.0083, "step": 2619, "total_loss": 0.0076904296875 }, { "epoch": 1.07, "learning_rate": 0.00019439136266012353, "lm_loss": 0.0050048828125, "loss": 0.0074, "step": 2620, "total_loss": 0.0050048828125 }, { "epoch": 1.07, "learning_rate": 0.00019438712095668357, "lm_loss": 0.005096435546875, "loss": 0.0081, "step": 2621, "total_loss": 0.005096435546875 }, { "epoch": 1.07, "learning_rate": 0.0001943828776962043, "lm_loss": 0.0098876953125, "loss": 0.0085, "step": 2622, "total_loss": 0.0098876953125 }, { "epoch": 1.07, "learning_rate": 0.0001943786328787557, "lm_loss": 0.00640869140625, "loss": 0.0093, "step": 2623, "total_loss": 0.00640869140625 }, { "epoch": 1.07, "learning_rate": 0.00019437438650440783, "lm_loss": 0.01141357421875, "loss": 0.0086, "step": 2624, "total_loss": 0.01141357421875 }, { "epoch": 1.07, "learning_rate": 0.0001943701385732307, "lm_loss": 0.0159912109375, "loss": 0.0083, "step": 2625, "total_loss": 0.0159912109375 }, { "epoch": 1.07, "learning_rate": 0.00019436588908529437, "lm_loss": 0.01336669921875, "loss": 0.0065, "step": 2626, "total_loss": 0.01336669921875 }, { "epoch": 1.07, "learning_rate": 0.00019436163804066904, "lm_loss": 0.005035400390625, "loss": 0.0077, "step": 2627, "total_loss": 0.005035400390625 }, { "epoch": 1.07, "learning_rate": 0.00019435738543942472, "lm_loss": 0.0103759765625, "loss": 0.0078, "step": 2628, "total_loss": 0.0103759765625 }, { "epoch": 1.07, "learning_rate": 0.00019435313128163162, "lm_loss": 0.013671875, "loss": 0.0091, "step": 2629, "total_loss": 0.013671875 }, { "epoch": 1.08, "learning_rate": 0.0001943488755673599, "lm_loss": 0.006622314453125, "loss": 0.0072, "step": 2630, "total_loss": 0.006622314453125 }, { "epoch": 1.08, "learning_rate": 0.00019434461829667977, "lm_loss": 0.0048828125, "loss": 0.0067, "step": 2631, "total_loss": 0.0048828125 }, { "epoch": 1.08, "learning_rate": 0.00019434035946966146, "lm_loss": 0.0145263671875, "loss": 0.008, "step": 2632, "total_loss": 0.0145263671875 }, { "epoch": 1.08, "learning_rate": 0.00019433609908637526, "lm_loss": 0.0159912109375, "loss": 0.0087, "step": 2633, "total_loss": 0.0159912109375 }, { "epoch": 1.08, "learning_rate": 0.0001943318371468914, "lm_loss": 0.01068115234375, "loss": 0.0084, "step": 2634, "total_loss": 0.01068115234375 }, { "epoch": 1.08, "learning_rate": 0.00019432757365128023, "lm_loss": 0.003936767578125, "loss": 0.0065, "step": 2635, "total_loss": 0.003936767578125 }, { "epoch": 1.08, "learning_rate": 0.000194323308599612, "lm_loss": 0.0135498046875, "loss": 0.0066, "step": 2636, "total_loss": 0.0135498046875 }, { "epoch": 1.08, "learning_rate": 0.00019431904199195717, "lm_loss": 0.0040283203125, "loss": 0.0082, "step": 2637, "total_loss": 0.0040283203125 }, { "epoch": 1.08, "learning_rate": 0.00019431477382838606, "lm_loss": 0.010986328125, "loss": 0.0078, "step": 2638, "total_loss": 0.010986328125 }, { "epoch": 1.08, "learning_rate": 0.0001943105041089691, "lm_loss": 0.004302978515625, "loss": 0.0077, "step": 2639, "total_loss": 0.004302978515625 }, { "epoch": 1.08, "learning_rate": 0.00019430623283377672, "lm_loss": 0.0086669921875, "loss": 0.0062, "step": 2640, "total_loss": 0.0086669921875 }, { "epoch": 1.08, "learning_rate": 0.00019430196000287935, "lm_loss": 0.00555419921875, "loss": 0.0084, "step": 2641, "total_loss": 0.00555419921875 }, { "epoch": 1.08, "learning_rate": 0.00019429768561634754, "lm_loss": 0.005706787109375, "loss": 0.0079, "step": 2642, "total_loss": 0.005706787109375 }, { "epoch": 1.08, "learning_rate": 0.00019429340967425175, "lm_loss": 0.00714111328125, "loss": 0.008, "step": 2643, "total_loss": 0.00714111328125 }, { "epoch": 1.08, "learning_rate": 0.00019428913217666257, "lm_loss": 0.01171875, "loss": 0.0097, "step": 2644, "total_loss": 0.01171875 }, { "epoch": 1.08, "learning_rate": 0.00019428485312365047, "lm_loss": 0.0223388671875, "loss": 0.0091, "step": 2645, "total_loss": 0.0223388671875 }, { "epoch": 1.08, "learning_rate": 0.00019428057251528615, "lm_loss": 0.01409912109375, "loss": 0.0082, "step": 2646, "total_loss": 0.01409912109375 }, { "epoch": 1.08, "learning_rate": 0.00019427629035164015, "lm_loss": 0.01263427734375, "loss": 0.0092, "step": 2647, "total_loss": 0.01263427734375 }, { "epoch": 1.08, "learning_rate": 0.00019427200663278314, "lm_loss": 0.01495361328125, "loss": 0.0078, "step": 2648, "total_loss": 0.01495361328125 }, { "epoch": 1.08, "learning_rate": 0.00019426772135878575, "lm_loss": 0.00872802734375, "loss": 0.0072, "step": 2649, "total_loss": 0.00872802734375 }, { "epoch": 1.08, "learning_rate": 0.00019426343452971873, "lm_loss": 0.01031494140625, "loss": 0.0076, "step": 2650, "total_loss": 0.01031494140625 }, { "epoch": 1.08, "learning_rate": 0.00019425914614565275, "lm_loss": 0.0033721923828125, "loss": 0.0062, "step": 2651, "total_loss": 0.0033721923828125 }, { "epoch": 1.08, "learning_rate": 0.00019425485620665857, "lm_loss": 0.006378173828125, "loss": 0.0089, "step": 2652, "total_loss": 0.006378173828125 }, { "epoch": 1.08, "learning_rate": 0.00019425056471280696, "lm_loss": 0.0101318359375, "loss": 0.0068, "step": 2653, "total_loss": 0.0101318359375 }, { "epoch": 1.09, "learning_rate": 0.0001942462716641687, "lm_loss": 0.01226806640625, "loss": 0.0095, "step": 2654, "total_loss": 0.01226806640625 }, { "epoch": 1.09, "learning_rate": 0.0001942419770608146, "lm_loss": 0.005767822265625, "loss": 0.0062, "step": 2655, "total_loss": 0.005767822265625 }, { "epoch": 1.09, "learning_rate": 0.00019423768090281557, "lm_loss": 0.00860595703125, "loss": 0.0077, "step": 2656, "total_loss": 0.00860595703125 }, { "epoch": 1.09, "learning_rate": 0.0001942333831902424, "lm_loss": 0.0027923583984375, "loss": 0.0076, "step": 2657, "total_loss": 0.0027923583984375 }, { "epoch": 1.09, "learning_rate": 0.00019422908392316602, "lm_loss": 0.00555419921875, "loss": 0.0078, "step": 2658, "total_loss": 0.00555419921875 }, { "epoch": 1.09, "learning_rate": 0.0001942247831016574, "lm_loss": 0.0167236328125, "loss": 0.0091, "step": 2659, "total_loss": 0.0167236328125 }, { "epoch": 1.09, "learning_rate": 0.00019422048072578738, "lm_loss": 0.01123046875, "loss": 0.0086, "step": 2660, "total_loss": 0.01123046875 }, { "epoch": 1.09, "learning_rate": 0.00019421617679562703, "lm_loss": 0.005096435546875, "loss": 0.008, "step": 2661, "total_loss": 0.005096435546875 }, { "epoch": 1.09, "learning_rate": 0.0001942118713112473, "lm_loss": 0.004608154296875, "loss": 0.0093, "step": 2662, "total_loss": 0.004608154296875 }, { "epoch": 1.09, "learning_rate": 0.00019420756427271921, "lm_loss": 0.0054931640625, "loss": 0.0062, "step": 2663, "total_loss": 0.0054931640625 }, { "epoch": 1.09, "learning_rate": 0.00019420325568011385, "lm_loss": 0.004974365234375, "loss": 0.0059, "step": 2664, "total_loss": 0.004974365234375 }, { "epoch": 1.09, "learning_rate": 0.00019419894553350227, "lm_loss": 0.005706787109375, "loss": 0.0083, "step": 2665, "total_loss": 0.005706787109375 }, { "epoch": 1.09, "learning_rate": 0.00019419463383295557, "lm_loss": 0.00946044921875, "loss": 0.0083, "step": 2666, "total_loss": 0.00946044921875 }, { "epoch": 1.09, "learning_rate": 0.00019419032057854492, "lm_loss": 0.007293701171875, "loss": 0.0072, "step": 2667, "total_loss": 0.007293701171875 }, { "epoch": 1.09, "learning_rate": 0.00019418600577034137, "lm_loss": 0.003814697265625, "loss": 0.0082, "step": 2668, "total_loss": 0.003814697265625 }, { "epoch": 1.09, "learning_rate": 0.0001941816894084162, "lm_loss": 0.006988525390625, "loss": 0.0076, "step": 2669, "total_loss": 0.006988525390625 }, { "epoch": 1.09, "learning_rate": 0.00019417737149284058, "lm_loss": 0.0084228515625, "loss": 0.008, "step": 2670, "total_loss": 0.0084228515625 }, { "epoch": 1.09, "learning_rate": 0.00019417305202368575, "lm_loss": 0.00885009765625, "loss": 0.0075, "step": 2671, "total_loss": 0.00885009765625 }, { "epoch": 1.09, "learning_rate": 0.0001941687310010229, "lm_loss": 0.01361083984375, "loss": 0.0084, "step": 2672, "total_loss": 0.01361083984375 }, { "epoch": 1.09, "learning_rate": 0.00019416440842492342, "lm_loss": 0.00872802734375, "loss": 0.0098, "step": 2673, "total_loss": 0.00872802734375 }, { "epoch": 1.09, "learning_rate": 0.00019416008429545852, "lm_loss": 0.00970458984375, "loss": 0.0096, "step": 2674, "total_loss": 0.00970458984375 }, { "epoch": 1.09, "learning_rate": 0.0001941557586126996, "lm_loss": 0.00286865234375, "loss": 0.008, "step": 2675, "total_loss": 0.00286865234375 }, { "epoch": 1.09, "learning_rate": 0.00019415143137671798, "lm_loss": 0.00555419921875, "loss": 0.0077, "step": 2676, "total_loss": 0.00555419921875 }, { "epoch": 1.09, "learning_rate": 0.00019414710258758506, "lm_loss": 0.010986328125, "loss": 0.0093, "step": 2677, "total_loss": 0.010986328125 }, { "epoch": 1.09, "learning_rate": 0.00019414277224537224, "lm_loss": 0.004058837890625, "loss": 0.0088, "step": 2678, "total_loss": 0.004058837890625 }, { "epoch": 1.1, "learning_rate": 0.00019413844035015093, "lm_loss": 0.0096435546875, "loss": 0.0087, "step": 2679, "total_loss": 0.0096435546875 }, { "epoch": 1.1, "learning_rate": 0.00019413410690199265, "lm_loss": 0.0023956298828125, "loss": 0.0076, "step": 2680, "total_loss": 0.0023956298828125 }, { "epoch": 1.1, "learning_rate": 0.00019412977190096884, "lm_loss": 0.01226806640625, "loss": 0.0083, "step": 2681, "total_loss": 0.01226806640625 }, { "epoch": 1.1, "learning_rate": 0.00019412543534715103, "lm_loss": 0.01129150390625, "loss": 0.0101, "step": 2682, "total_loss": 0.01129150390625 }, { "epoch": 1.1, "learning_rate": 0.00019412109724061075, "lm_loss": 0.00921630859375, "loss": 0.0075, "step": 2683, "total_loss": 0.00921630859375 }, { "epoch": 1.1, "learning_rate": 0.00019411675758141956, "lm_loss": 0.005126953125, "loss": 0.0066, "step": 2684, "total_loss": 0.005126953125 }, { "epoch": 1.1, "learning_rate": 0.00019411241636964907, "lm_loss": 0.0079345703125, "loss": 0.0076, "step": 2685, "total_loss": 0.0079345703125 }, { "epoch": 1.1, "learning_rate": 0.00019410807360537087, "lm_loss": 0.004608154296875, "loss": 0.0059, "step": 2686, "total_loss": 0.004608154296875 }, { "epoch": 1.1, "learning_rate": 0.0001941037292886566, "lm_loss": 0.0157470703125, "loss": 0.0073, "step": 2687, "total_loss": 0.0157470703125 }, { "epoch": 1.1, "learning_rate": 0.00019409938341957795, "lm_loss": 0.0079345703125, "loss": 0.0088, "step": 2688, "total_loss": 0.0079345703125 }, { "epoch": 1.1, "learning_rate": 0.00019409503599820656, "lm_loss": 0.0089111328125, "loss": 0.0073, "step": 2689, "total_loss": 0.0089111328125 }, { "epoch": 1.1, "learning_rate": 0.00019409068702461418, "lm_loss": 0.00262451171875, "loss": 0.0065, "step": 2690, "total_loss": 0.00262451171875 }, { "epoch": 1.1, "learning_rate": 0.0001940863364988726, "lm_loss": 0.0035400390625, "loss": 0.0065, "step": 2691, "total_loss": 0.0035400390625 }, { "epoch": 1.1, "learning_rate": 0.0001940819844210535, "lm_loss": 0.00836181640625, "loss": 0.0079, "step": 2692, "total_loss": 0.00836181640625 }, { "epoch": 1.1, "learning_rate": 0.0001940776307912287, "lm_loss": 0.003997802734375, "loss": 0.0085, "step": 2693, "total_loss": 0.003997802734375 }, { "epoch": 1.1, "learning_rate": 0.00019407327560947007, "lm_loss": 0.011474609375, "loss": 0.0088, "step": 2694, "total_loss": 0.011474609375 }, { "epoch": 1.1, "learning_rate": 0.0001940689188758494, "lm_loss": 0.0037384033203125, "loss": 0.0071, "step": 2695, "total_loss": 0.0037384033203125 }, { "epoch": 1.1, "learning_rate": 0.00019406456059043858, "lm_loss": 0.006378173828125, "loss": 0.0059, "step": 2696, "total_loss": 0.006378173828125 }, { "epoch": 1.1, "learning_rate": 0.00019406020075330948, "lm_loss": 0.00689697265625, "loss": 0.0061, "step": 2697, "total_loss": 0.00689697265625 }, { "epoch": 1.1, "learning_rate": 0.00019405583936453407, "lm_loss": 0.006805419921875, "loss": 0.0067, "step": 2698, "total_loss": 0.006805419921875 }, { "epoch": 1.1, "learning_rate": 0.00019405147642418425, "lm_loss": 0.0107421875, "loss": 0.0074, "step": 2699, "total_loss": 0.0107421875 }, { "epoch": 1.1, "learning_rate": 0.000194047111932332, "lm_loss": 0.0123291015625, "loss": 0.0082, "step": 2700, "total_loss": 0.0123291015625 }, { "epoch": 1.1, "eval_lm_loss": 0.00977067556232214, "eval_loss": 0.010191231034696102, "eval_runtime": 44.263, "eval_samples_per_second": 22.592, "eval_steps_per_second": 0.203, "eval_total_loss": 0.00977067556232214, "lm_loss": 0.000732421875, "step": 2700, "total_loss": 0.000732421875 }, { "epoch": 1.1, "learning_rate": 0.00019404274588904935, "lm_loss": 0.010498046875, "loss": 0.0095, "step": 2701, "total_loss": 0.010498046875 }, { "epoch": 1.1, "learning_rate": 0.0001940383782944083, "lm_loss": 0.01141357421875, "loss": 0.0092, "step": 2702, "total_loss": 0.01141357421875 }, { "epoch": 1.11, "learning_rate": 0.0001940340091484809, "lm_loss": 0.01141357421875, "loss": 0.0077, "step": 2703, "total_loss": 0.01141357421875 }, { "epoch": 1.11, "learning_rate": 0.00019402963845133924, "lm_loss": 0.00762939453125, "loss": 0.0073, "step": 2704, "total_loss": 0.00762939453125 }, { "epoch": 1.11, "learning_rate": 0.00019402526620305535, "lm_loss": 0.00909423828125, "loss": 0.0098, "step": 2705, "total_loss": 0.00909423828125 }, { "epoch": 1.11, "learning_rate": 0.00019402089240370146, "lm_loss": 0.005706787109375, "loss": 0.0084, "step": 2706, "total_loss": 0.005706787109375 }, { "epoch": 1.11, "learning_rate": 0.00019401651705334971, "lm_loss": 0.00921630859375, "loss": 0.0085, "step": 2707, "total_loss": 0.00921630859375 }, { "epoch": 1.11, "learning_rate": 0.00019401214015207218, "lm_loss": 0.007598876953125, "loss": 0.0068, "step": 2708, "total_loss": 0.007598876953125 }, { "epoch": 1.11, "learning_rate": 0.00019400776169994116, "lm_loss": 0.004791259765625, "loss": 0.0086, "step": 2709, "total_loss": 0.004791259765625 }, { "epoch": 1.11, "learning_rate": 0.00019400338169702885, "lm_loss": 0.0057373046875, "loss": 0.0072, "step": 2710, "total_loss": 0.0057373046875 }, { "epoch": 1.11, "learning_rate": 0.00019399900014340754, "lm_loss": 0.00909423828125, "loss": 0.0088, "step": 2711, "total_loss": 0.00909423828125 }, { "epoch": 1.11, "learning_rate": 0.00019399461703914942, "lm_loss": 0.00872802734375, "loss": 0.0061, "step": 2712, "total_loss": 0.00872802734375 }, { "epoch": 1.11, "learning_rate": 0.00019399023238432687, "lm_loss": 0.00787353515625, "loss": 0.0087, "step": 2713, "total_loss": 0.00787353515625 }, { "epoch": 1.11, "learning_rate": 0.00019398584617901223, "lm_loss": 0.004974365234375, "loss": 0.0069, "step": 2714, "total_loss": 0.004974365234375 }, { "epoch": 1.11, "learning_rate": 0.0001939814584232778, "lm_loss": 0.01171875, "loss": 0.0078, "step": 2715, "total_loss": 0.01171875 }, { "epoch": 1.11, "learning_rate": 0.000193977069117196, "lm_loss": 0.01092529296875, "loss": 0.0084, "step": 2716, "total_loss": 0.01092529296875 }, { "epoch": 1.11, "learning_rate": 0.00019397267826083922, "lm_loss": 0.0069580078125, "loss": 0.0089, "step": 2717, "total_loss": 0.0069580078125 }, { "epoch": 1.11, "learning_rate": 0.00019396828585427993, "lm_loss": 0.00543212890625, "loss": 0.0071, "step": 2718, "total_loss": 0.00543212890625 }, { "epoch": 1.11, "learning_rate": 0.00019396389189759052, "lm_loss": 0.00958251953125, "loss": 0.0081, "step": 2719, "total_loss": 0.00958251953125 }, { "epoch": 1.11, "learning_rate": 0.00019395949639084355, "lm_loss": 0.0111083984375, "loss": 0.0076, "step": 2720, "total_loss": 0.0111083984375 }, { "epoch": 1.11, "learning_rate": 0.00019395509933411146, "lm_loss": 0.0157470703125, "loss": 0.0072, "step": 2721, "total_loss": 0.0157470703125 }, { "epoch": 1.11, "learning_rate": 0.00019395070072746683, "lm_loss": 0.004302978515625, "loss": 0.008, "step": 2722, "total_loss": 0.004302978515625 }, { "epoch": 1.11, "learning_rate": 0.0001939463005709822, "lm_loss": 0.00677490234375, "loss": 0.0082, "step": 2723, "total_loss": 0.00677490234375 }, { "epoch": 1.11, "learning_rate": 0.00019394189886473017, "lm_loss": 0.00836181640625, "loss": 0.0082, "step": 2724, "total_loss": 0.00836181640625 }, { "epoch": 1.11, "learning_rate": 0.00019393749560878338, "lm_loss": 0.0047607421875, "loss": 0.0072, "step": 2725, "total_loss": 0.0047607421875 }, { "epoch": 1.11, "learning_rate": 0.00019393309080321437, "lm_loss": 0.00567626953125, "loss": 0.0086, "step": 2726, "total_loss": 0.00567626953125 }, { "epoch": 1.11, "learning_rate": 0.00019392868444809591, "lm_loss": 0.0029296875, "loss": 0.0068, "step": 2727, "total_loss": 0.0029296875 }, { "epoch": 1.12, "learning_rate": 0.00019392427654350066, "lm_loss": 0.005126953125, "loss": 0.0072, "step": 2728, "total_loss": 0.005126953125 }, { "epoch": 1.12, "learning_rate": 0.0001939198670895013, "lm_loss": 0.003265380859375, "loss": 0.0071, "step": 2729, "total_loss": 0.003265380859375 }, { "epoch": 1.12, "learning_rate": 0.00019391545608617058, "lm_loss": 0.00506591796875, "loss": 0.0076, "step": 2730, "total_loss": 0.00506591796875 }, { "epoch": 1.12, "learning_rate": 0.00019391104353358126, "lm_loss": 0.00872802734375, "loss": 0.0089, "step": 2731, "total_loss": 0.00872802734375 }, { "epoch": 1.12, "learning_rate": 0.0001939066294318062, "lm_loss": 0.015380859375, "loss": 0.0095, "step": 2732, "total_loss": 0.015380859375 }, { "epoch": 1.12, "learning_rate": 0.00019390221378091813, "lm_loss": 0.0033111572265625, "loss": 0.007, "step": 2733, "total_loss": 0.0033111572265625 }, { "epoch": 1.12, "learning_rate": 0.00019389779658098992, "lm_loss": 0.003814697265625, "loss": 0.008, "step": 2734, "total_loss": 0.003814697265625 }, { "epoch": 1.12, "learning_rate": 0.00019389337783209446, "lm_loss": 0.0107421875, "loss": 0.0074, "step": 2735, "total_loss": 0.0107421875 }, { "epoch": 1.12, "learning_rate": 0.00019388895753430464, "lm_loss": 0.0087890625, "loss": 0.0081, "step": 2736, "total_loss": 0.0087890625 }, { "epoch": 1.12, "learning_rate": 0.0001938845356876933, "lm_loss": 0.006805419921875, "loss": 0.0088, "step": 2737, "total_loss": 0.006805419921875 }, { "epoch": 1.12, "learning_rate": 0.0001938801122923335, "lm_loss": 0.0072021484375, "loss": 0.0076, "step": 2738, "total_loss": 0.0072021484375 }, { "epoch": 1.12, "learning_rate": 0.00019387568734829813, "lm_loss": 0.0091552734375, "loss": 0.0067, "step": 2739, "total_loss": 0.0091552734375 }, { "epoch": 1.12, "learning_rate": 0.00019387126085566023, "lm_loss": 0.005340576171875, "loss": 0.0078, "step": 2740, "total_loss": 0.005340576171875 }, { "epoch": 1.12, "learning_rate": 0.0001938668328144928, "lm_loss": 0.01025390625, "loss": 0.0101, "step": 2741, "total_loss": 0.01025390625 }, { "epoch": 1.12, "learning_rate": 0.0001938624032248689, "lm_loss": 0.01287841796875, "loss": 0.0085, "step": 2742, "total_loss": 0.01287841796875 }, { "epoch": 1.12, "learning_rate": 0.00019385797208686158, "lm_loss": 0.00653076171875, "loss": 0.0084, "step": 2743, "total_loss": 0.00653076171875 }, { "epoch": 1.12, "learning_rate": 0.00019385353940054397, "lm_loss": 0.0093994140625, "loss": 0.0058, "step": 2744, "total_loss": 0.0093994140625 }, { "epoch": 1.12, "learning_rate": 0.00019384910516598915, "lm_loss": 0.0096435546875, "loss": 0.0076, "step": 2745, "total_loss": 0.0096435546875 }, { "epoch": 1.12, "learning_rate": 0.00019384466938327029, "lm_loss": 0.00982666015625, "loss": 0.0112, "step": 2746, "total_loss": 0.00982666015625 }, { "epoch": 1.12, "learning_rate": 0.00019384023205246055, "lm_loss": 0.0113525390625, "loss": 0.0073, "step": 2747, "total_loss": 0.0113525390625 }, { "epoch": 1.12, "learning_rate": 0.0001938357931736332, "lm_loss": 0.0050048828125, "loss": 0.007, "step": 2748, "total_loss": 0.0050048828125 }, { "epoch": 1.12, "learning_rate": 0.00019383135274686134, "lm_loss": 0.01324462890625, "loss": 0.0081, "step": 2749, "total_loss": 0.01324462890625 }, { "epoch": 1.12, "learning_rate": 0.00019382691077221833, "lm_loss": 0.00836181640625, "loss": 0.0088, "step": 2750, "total_loss": 0.00836181640625 }, { "epoch": 1.12, "learning_rate": 0.0001938224672497774, "lm_loss": 0.00848388671875, "loss": 0.0071, "step": 2751, "total_loss": 0.00848388671875 }, { "epoch": 1.13, "learning_rate": 0.00019381802217961186, "lm_loss": 0.01080322265625, "loss": 0.0087, "step": 2752, "total_loss": 0.01080322265625 }, { "epoch": 1.13, "learning_rate": 0.00019381357556179504, "lm_loss": 0.00433349609375, "loss": 0.0076, "step": 2753, "total_loss": 0.00433349609375 }, { "epoch": 1.13, "learning_rate": 0.00019380912739640026, "lm_loss": 0.005126953125, "loss": 0.0065, "step": 2754, "total_loss": 0.005126953125 }, { "epoch": 1.13, "learning_rate": 0.00019380467768350098, "lm_loss": 0.00982666015625, "loss": 0.0059, "step": 2755, "total_loss": 0.00982666015625 }, { "epoch": 1.13, "learning_rate": 0.00019380022642317052, "lm_loss": 0.01611328125, "loss": 0.0093, "step": 2756, "total_loss": 0.01611328125 }, { "epoch": 1.13, "learning_rate": 0.00019379577361548233, "lm_loss": 0.0164794921875, "loss": 0.0088, "step": 2757, "total_loss": 0.0164794921875 }, { "epoch": 1.13, "learning_rate": 0.00019379131926050986, "lm_loss": 0.01239013671875, "loss": 0.0074, "step": 2758, "total_loss": 0.01239013671875 }, { "epoch": 1.13, "learning_rate": 0.00019378686335832663, "lm_loss": 0.004180908203125, "loss": 0.006, "step": 2759, "total_loss": 0.004180908203125 }, { "epoch": 1.13, "learning_rate": 0.00019378240590900615, "lm_loss": 0.006072998046875, "loss": 0.0077, "step": 2760, "total_loss": 0.006072998046875 }, { "epoch": 1.13, "learning_rate": 0.00019377794691262187, "lm_loss": 0.00897216796875, "loss": 0.0074, "step": 2761, "total_loss": 0.00897216796875 }, { "epoch": 1.13, "learning_rate": 0.00019377348636924742, "lm_loss": 0.011474609375, "loss": 0.0073, "step": 2762, "total_loss": 0.011474609375 }, { "epoch": 1.13, "learning_rate": 0.00019376902427895636, "lm_loss": 0.00408935546875, "loss": 0.0073, "step": 2763, "total_loss": 0.00408935546875 }, { "epoch": 1.13, "learning_rate": 0.00019376456064182232, "lm_loss": 0.0062255859375, "loss": 0.008, "step": 2764, "total_loss": 0.0062255859375 }, { "epoch": 1.13, "learning_rate": 0.0001937600954579189, "lm_loss": 0.01348876953125, "loss": 0.0084, "step": 2765, "total_loss": 0.01348876953125 }, { "epoch": 1.13, "learning_rate": 0.00019375562872731976, "lm_loss": 0.0076904296875, "loss": 0.0073, "step": 2766, "total_loss": 0.0076904296875 }, { "epoch": 1.13, "learning_rate": 0.0001937511604500986, "lm_loss": 0.0147705078125, "loss": 0.0107, "step": 2767, "total_loss": 0.0147705078125 }, { "epoch": 1.13, "learning_rate": 0.00019374669062632914, "lm_loss": 0.01104736328125, "loss": 0.0088, "step": 2768, "total_loss": 0.01104736328125 }, { "epoch": 1.13, "learning_rate": 0.00019374221925608508, "lm_loss": 0.0038299560546875, "loss": 0.0082, "step": 2769, "total_loss": 0.0038299560546875 }, { "epoch": 1.13, "learning_rate": 0.0001937377463394402, "lm_loss": 0.003936767578125, "loss": 0.0061, "step": 2770, "total_loss": 0.003936767578125 }, { "epoch": 1.13, "learning_rate": 0.00019373327187646836, "lm_loss": 0.006683349609375, "loss": 0.0065, "step": 2771, "total_loss": 0.006683349609375 }, { "epoch": 1.13, "learning_rate": 0.00019372879586724323, "lm_loss": 0.00994873046875, "loss": 0.008, "step": 2772, "total_loss": 0.00994873046875 }, { "epoch": 1.13, "learning_rate": 0.00019372431831183874, "lm_loss": 0.0054931640625, "loss": 0.0067, "step": 2773, "total_loss": 0.0054931640625 }, { "epoch": 1.13, "learning_rate": 0.00019371983921032872, "lm_loss": 0.0054931640625, "loss": 0.0085, "step": 2774, "total_loss": 0.0054931640625 }, { "epoch": 1.13, "learning_rate": 0.0001937153585627871, "lm_loss": 0.009033203125, "loss": 0.0088, "step": 2775, "total_loss": 0.009033203125 }, { "epoch": 1.13, "learning_rate": 0.00019371087636928778, "lm_loss": 0.0069580078125, "loss": 0.0086, "step": 2776, "total_loss": 0.0069580078125 }, { "epoch": 1.14, "learning_rate": 0.00019370639262990464, "lm_loss": 0.0224609375, "loss": 0.0084, "step": 2777, "total_loss": 0.0224609375 }, { "epoch": 1.14, "learning_rate": 0.00019370190734471172, "lm_loss": 0.005950927734375, "loss": 0.0082, "step": 2778, "total_loss": 0.005950927734375 }, { "epoch": 1.14, "learning_rate": 0.00019369742051378297, "lm_loss": 0.009521484375, "loss": 0.0065, "step": 2779, "total_loss": 0.009521484375 }, { "epoch": 1.14, "learning_rate": 0.0001936929321371924, "lm_loss": 0.00640869140625, "loss": 0.0076, "step": 2780, "total_loss": 0.00640869140625 }, { "epoch": 1.14, "learning_rate": 0.0001936884422150141, "lm_loss": 0.0035247802734375, "loss": 0.0086, "step": 2781, "total_loss": 0.0035247802734375 }, { "epoch": 1.14, "learning_rate": 0.00019368395074732212, "lm_loss": 0.006134033203125, "loss": 0.0066, "step": 2782, "total_loss": 0.006134033203125 }, { "epoch": 1.14, "learning_rate": 0.0001936794577341905, "lm_loss": 0.006378173828125, "loss": 0.0123, "step": 2783, "total_loss": 0.006378173828125 }, { "epoch": 1.14, "learning_rate": 0.00019367496317569343, "lm_loss": 0.006134033203125, "loss": 0.0075, "step": 2784, "total_loss": 0.006134033203125 }, { "epoch": 1.14, "learning_rate": 0.000193670467071905, "lm_loss": 0.007293701171875, "loss": 0.0072, "step": 2785, "total_loss": 0.007293701171875 }, { "epoch": 1.14, "learning_rate": 0.00019366596942289941, "lm_loss": 0.006378173828125, "loss": 0.0088, "step": 2786, "total_loss": 0.006378173828125 }, { "epoch": 1.14, "learning_rate": 0.00019366147022875082, "lm_loss": 0.00592041015625, "loss": 0.0073, "step": 2787, "total_loss": 0.00592041015625 }, { "epoch": 1.14, "learning_rate": 0.00019365696948953352, "lm_loss": 0.01385498046875, "loss": 0.0082, "step": 2788, "total_loss": 0.01385498046875 }, { "epoch": 1.14, "learning_rate": 0.00019365246720532168, "lm_loss": 0.013671875, "loss": 0.0083, "step": 2789, "total_loss": 0.013671875 }, { "epoch": 1.14, "learning_rate": 0.0001936479633761896, "lm_loss": 0.00830078125, "loss": 0.008, "step": 2790, "total_loss": 0.00830078125 }, { "epoch": 1.14, "learning_rate": 0.00019364345800221158, "lm_loss": 0.002288818359375, "loss": 0.0089, "step": 2791, "total_loss": 0.002288818359375 }, { "epoch": 1.14, "learning_rate": 0.00019363895108346198, "lm_loss": 0.00909423828125, "loss": 0.0079, "step": 2792, "total_loss": 0.00909423828125 }, { "epoch": 1.14, "learning_rate": 0.00019363444262001506, "lm_loss": 0.007171630859375, "loss": 0.0069, "step": 2793, "total_loss": 0.007171630859375 }, { "epoch": 1.14, "learning_rate": 0.00019362993261194526, "lm_loss": 0.004364013671875, "loss": 0.0073, "step": 2794, "total_loss": 0.004364013671875 }, { "epoch": 1.14, "learning_rate": 0.00019362542105932695, "lm_loss": 0.006805419921875, "loss": 0.0075, "step": 2795, "total_loss": 0.006805419921875 }, { "epoch": 1.14, "learning_rate": 0.00019362090796223457, "lm_loss": 0.00933837890625, "loss": 0.0098, "step": 2796, "total_loss": 0.00933837890625 }, { "epoch": 1.14, "learning_rate": 0.00019361639332074256, "lm_loss": 0.00799560546875, "loss": 0.0081, "step": 2797, "total_loss": 0.00799560546875 }, { "epoch": 1.14, "learning_rate": 0.0001936118771349254, "lm_loss": 0.0146484375, "loss": 0.009, "step": 2798, "total_loss": 0.0146484375 }, { "epoch": 1.14, "learning_rate": 0.00019360735940485762, "lm_loss": 0.01165771484375, "loss": 0.0074, "step": 2799, "total_loss": 0.01165771484375 }, { "epoch": 1.14, "learning_rate": 0.00019360284013061367, "lm_loss": 0.006866455078125, "loss": 0.0075, "step": 2800, "total_loss": 0.006866455078125 }, { "epoch": 1.14, "eval_lm_loss": 0.009459610097110271, "eval_loss": 0.009879998862743378, "eval_runtime": 44.0219, "eval_samples_per_second": 22.716, "eval_steps_per_second": 0.204, "eval_total_loss": 0.009459610097110271, "lm_loss": 0.00095367431640625, "step": 2800, "total_loss": 0.00095367431640625 }, { "epoch": 1.15, "learning_rate": 0.00019359831931226815, "lm_loss": 0.00787353515625, "loss": 0.0086, "step": 2801, "total_loss": 0.00787353515625 }, { "epoch": 1.15, "learning_rate": 0.00019359379694989563, "lm_loss": 0.007232666015625, "loss": 0.0095, "step": 2802, "total_loss": 0.007232666015625 }, { "epoch": 1.15, "learning_rate": 0.00019358927304357072, "lm_loss": 0.01177978515625, "loss": 0.01, "step": 2803, "total_loss": 0.01177978515625 }, { "epoch": 1.15, "learning_rate": 0.00019358474759336805, "lm_loss": 0.0062255859375, "loss": 0.0073, "step": 2804, "total_loss": 0.0062255859375 }, { "epoch": 1.15, "learning_rate": 0.00019358022059936226, "lm_loss": 0.00946044921875, "loss": 0.0072, "step": 2805, "total_loss": 0.00946044921875 }, { "epoch": 1.15, "learning_rate": 0.00019357569206162803, "lm_loss": 0.006622314453125, "loss": 0.0079, "step": 2806, "total_loss": 0.006622314453125 }, { "epoch": 1.15, "learning_rate": 0.00019357116198024011, "lm_loss": 0.0033111572265625, "loss": 0.0074, "step": 2807, "total_loss": 0.0033111572265625 }, { "epoch": 1.15, "learning_rate": 0.00019356663035527313, "lm_loss": 0.0133056640625, "loss": 0.0081, "step": 2808, "total_loss": 0.0133056640625 }, { "epoch": 1.15, "learning_rate": 0.00019356209718680192, "lm_loss": 0.006683349609375, "loss": 0.0068, "step": 2809, "total_loss": 0.006683349609375 }, { "epoch": 1.15, "learning_rate": 0.00019355756247490125, "lm_loss": 0.00958251953125, "loss": 0.0072, "step": 2810, "total_loss": 0.00958251953125 }, { "epoch": 1.15, "learning_rate": 0.00019355302621964593, "lm_loss": 0.005340576171875, "loss": 0.0066, "step": 2811, "total_loss": 0.005340576171875 }, { "epoch": 1.15, "learning_rate": 0.00019354848842111078, "lm_loss": 0.010986328125, "loss": 0.0088, "step": 2812, "total_loss": 0.010986328125 }, { "epoch": 1.15, "learning_rate": 0.00019354394907937064, "lm_loss": 0.00714111328125, "loss": 0.0087, "step": 2813, "total_loss": 0.00714111328125 }, { "epoch": 1.15, "learning_rate": 0.00019353940819450042, "lm_loss": 0.0034942626953125, "loss": 0.0081, "step": 2814, "total_loss": 0.0034942626953125 }, { "epoch": 1.15, "learning_rate": 0.00019353486576657503, "lm_loss": 0.005706787109375, "loss": 0.0077, "step": 2815, "total_loss": 0.005706787109375 }, { "epoch": 1.15, "learning_rate": 0.00019353032179566937, "lm_loss": 0.0164794921875, "loss": 0.0078, "step": 2816, "total_loss": 0.0164794921875 }, { "epoch": 1.15, "learning_rate": 0.00019352577628185843, "lm_loss": 0.0150146484375, "loss": 0.0089, "step": 2817, "total_loss": 0.0150146484375 }, { "epoch": 1.15, "learning_rate": 0.00019352122922521717, "lm_loss": 0.007049560546875, "loss": 0.0067, "step": 2818, "total_loss": 0.007049560546875 }, { "epoch": 1.15, "learning_rate": 0.00019351668062582062, "lm_loss": 0.012939453125, "loss": 0.0077, "step": 2819, "total_loss": 0.012939453125 }, { "epoch": 1.15, "learning_rate": 0.00019351213048374383, "lm_loss": 0.01544189453125, "loss": 0.0077, "step": 2820, "total_loss": 0.01544189453125 }, { "epoch": 1.15, "learning_rate": 0.00019350757879906184, "lm_loss": 0.0079345703125, "loss": 0.0079, "step": 2821, "total_loss": 0.0079345703125 }, { "epoch": 1.15, "learning_rate": 0.0001935030255718497, "lm_loss": 0.00970458984375, "loss": 0.0064, "step": 2822, "total_loss": 0.00970458984375 }, { "epoch": 1.15, "learning_rate": 0.0001934984708021826, "lm_loss": 0.0091552734375, "loss": 0.0087, "step": 2823, "total_loss": 0.0091552734375 }, { "epoch": 1.15, "learning_rate": 0.00019349391449013562, "lm_loss": 0.0072021484375, "loss": 0.0085, "step": 2824, "total_loss": 0.0072021484375 }, { "epoch": 1.15, "learning_rate": 0.00019348935663578392, "lm_loss": 0.0037384033203125, "loss": 0.0066, "step": 2825, "total_loss": 0.0037384033203125 }, { "epoch": 1.16, "learning_rate": 0.00019348479723920272, "lm_loss": 0.00634765625, "loss": 0.0062, "step": 2826, "total_loss": 0.00634765625 }, { "epoch": 1.16, "learning_rate": 0.0001934802363004672, "lm_loss": 0.00518798828125, "loss": 0.0074, "step": 2827, "total_loss": 0.00518798828125 }, { "epoch": 1.16, "learning_rate": 0.00019347567381965264, "lm_loss": 0.004791259765625, "loss": 0.007, "step": 2828, "total_loss": 0.004791259765625 }, { "epoch": 1.16, "learning_rate": 0.00019347110979683427, "lm_loss": 0.009033203125, "loss": 0.0079, "step": 2829, "total_loss": 0.009033203125 }, { "epoch": 1.16, "learning_rate": 0.0001934665442320874, "lm_loss": 0.006072998046875, "loss": 0.0072, "step": 2830, "total_loss": 0.006072998046875 }, { "epoch": 1.16, "learning_rate": 0.00019346197712548732, "lm_loss": 0.004730224609375, "loss": 0.0068, "step": 2831, "total_loss": 0.004730224609375 }, { "epoch": 1.16, "learning_rate": 0.00019345740847710938, "lm_loss": 0.00127410888671875, "loss": 0.0066, "step": 2832, "total_loss": 0.00127410888671875 }, { "epoch": 1.16, "learning_rate": 0.00019345283828702895, "lm_loss": 0.00286865234375, "loss": 0.0071, "step": 2833, "total_loss": 0.00286865234375 }, { "epoch": 1.16, "learning_rate": 0.00019344826655532144, "lm_loss": 0.004730224609375, "loss": 0.007, "step": 2834, "total_loss": 0.004730224609375 }, { "epoch": 1.16, "learning_rate": 0.00019344369328206228, "lm_loss": 0.0108642578125, "loss": 0.0078, "step": 2835, "total_loss": 0.0108642578125 }, { "epoch": 1.16, "learning_rate": 0.00019343911846732683, "lm_loss": 0.010986328125, "loss": 0.0072, "step": 2836, "total_loss": 0.010986328125 }, { "epoch": 1.16, "learning_rate": 0.00019343454211119063, "lm_loss": 0.00860595703125, "loss": 0.0074, "step": 2837, "total_loss": 0.00860595703125 }, { "epoch": 1.16, "learning_rate": 0.00019342996421372913, "lm_loss": 0.00848388671875, "loss": 0.0078, "step": 2838, "total_loss": 0.00848388671875 }, { "epoch": 1.16, "learning_rate": 0.0001934253847750179, "lm_loss": 0.014404296875, "loss": 0.0068, "step": 2839, "total_loss": 0.014404296875 }, { "epoch": 1.16, "learning_rate": 0.00019342080379513242, "lm_loss": 0.00830078125, "loss": 0.0082, "step": 2840, "total_loss": 0.00830078125 }, { "epoch": 1.16, "learning_rate": 0.00019341622127414833, "lm_loss": 0.01104736328125, "loss": 0.0074, "step": 2841, "total_loss": 0.01104736328125 }, { "epoch": 1.16, "learning_rate": 0.00019341163721214117, "lm_loss": 0.003631591796875, "loss": 0.0063, "step": 2842, "total_loss": 0.003631591796875 }, { "epoch": 1.16, "learning_rate": 0.00019340705160918658, "lm_loss": 0.00933837890625, "loss": 0.0089, "step": 2843, "total_loss": 0.00933837890625 }, { "epoch": 1.16, "learning_rate": 0.0001934024644653602, "lm_loss": 0.007476806640625, "loss": 0.0071, "step": 2844, "total_loss": 0.007476806640625 }, { "epoch": 1.16, "learning_rate": 0.00019339787578073772, "lm_loss": 0.0069580078125, "loss": 0.0074, "step": 2845, "total_loss": 0.0069580078125 }, { "epoch": 1.16, "learning_rate": 0.0001933932855553948, "lm_loss": 0.0054931640625, "loss": 0.0078, "step": 2846, "total_loss": 0.0054931640625 }, { "epoch": 1.16, "learning_rate": 0.0001933886937894072, "lm_loss": 0.0016326904296875, "loss": 0.0069, "step": 2847, "total_loss": 0.0016326904296875 }, { "epoch": 1.16, "learning_rate": 0.00019338410048285064, "lm_loss": 0.0194091796875, "loss": 0.0088, "step": 2848, "total_loss": 0.0194091796875 }, { "epoch": 1.16, "learning_rate": 0.0001933795056358009, "lm_loss": 0.0166015625, "loss": 0.0088, "step": 2849, "total_loss": 0.0166015625 }, { "epoch": 1.17, "learning_rate": 0.00019337490924833378, "lm_loss": 0.00885009765625, "loss": 0.0081, "step": 2850, "total_loss": 0.00885009765625 }, { "epoch": 1.17, "learning_rate": 0.0001933703113205251, "lm_loss": 0.0036468505859375, "loss": 0.0081, "step": 2851, "total_loss": 0.0036468505859375 }, { "epoch": 1.17, "learning_rate": 0.00019336571185245074, "lm_loss": 0.004364013671875, "loss": 0.009, "step": 2852, "total_loss": 0.004364013671875 }, { "epoch": 1.17, "learning_rate": 0.00019336111084418653, "lm_loss": 0.005279541015625, "loss": 0.0097, "step": 2853, "total_loss": 0.005279541015625 }, { "epoch": 1.17, "learning_rate": 0.00019335650829580838, "lm_loss": 0.01153564453125, "loss": 0.0072, "step": 2854, "total_loss": 0.01153564453125 }, { "epoch": 1.17, "learning_rate": 0.00019335190420739223, "lm_loss": 0.006500244140625, "loss": 0.0092, "step": 2855, "total_loss": 0.006500244140625 }, { "epoch": 1.17, "learning_rate": 0.00019334729857901404, "lm_loss": 0.00994873046875, "loss": 0.0092, "step": 2856, "total_loss": 0.00994873046875 }, { "epoch": 1.17, "learning_rate": 0.00019334269141074972, "lm_loss": 0.0048828125, "loss": 0.0073, "step": 2857, "total_loss": 0.0048828125 }, { "epoch": 1.17, "learning_rate": 0.00019333808270267537, "lm_loss": 0.005096435546875, "loss": 0.008, "step": 2858, "total_loss": 0.005096435546875 }, { "epoch": 1.17, "learning_rate": 0.00019333347245486694, "lm_loss": 0.00811767578125, "loss": 0.007, "step": 2859, "total_loss": 0.00811767578125 }, { "epoch": 1.17, "learning_rate": 0.0001933288606674005, "lm_loss": 0.005126953125, "loss": 0.0058, "step": 2860, "total_loss": 0.005126953125 }, { "epoch": 1.17, "learning_rate": 0.00019332424734035217, "lm_loss": 0.005340576171875, "loss": 0.0068, "step": 2861, "total_loss": 0.005340576171875 }, { "epoch": 1.17, "learning_rate": 0.00019331963247379798, "lm_loss": 0.005157470703125, "loss": 0.0083, "step": 2862, "total_loss": 0.005157470703125 }, { "epoch": 1.17, "learning_rate": 0.00019331501606781415, "lm_loss": 0.00335693359375, "loss": 0.0068, "step": 2863, "total_loss": 0.00335693359375 }, { "epoch": 1.17, "learning_rate": 0.00019331039812247672, "lm_loss": 0.00286865234375, "loss": 0.0061, "step": 2864, "total_loss": 0.00286865234375 }, { "epoch": 1.17, "learning_rate": 0.00019330577863786196, "lm_loss": 0.00738525390625, "loss": 0.0073, "step": 2865, "total_loss": 0.00738525390625 }, { "epoch": 1.17, "learning_rate": 0.00019330115761404603, "lm_loss": 0.010498046875, "loss": 0.0075, "step": 2866, "total_loss": 0.010498046875 }, { "epoch": 1.17, "learning_rate": 0.0001932965350511052, "lm_loss": 0.006317138671875, "loss": 0.0087, "step": 2867, "total_loss": 0.006317138671875 }, { "epoch": 1.17, "learning_rate": 0.00019329191094911568, "lm_loss": 0.0035400390625, "loss": 0.0086, "step": 2868, "total_loss": 0.0035400390625 }, { "epoch": 1.17, "learning_rate": 0.00019328728530815375, "lm_loss": 0.00421142578125, "loss": 0.0076, "step": 2869, "total_loss": 0.00421142578125 }, { "epoch": 1.17, "learning_rate": 0.00019328265812829578, "lm_loss": 0.01171875, "loss": 0.0111, "step": 2870, "total_loss": 0.01171875 }, { "epoch": 1.17, "learning_rate": 0.00019327802940961807, "lm_loss": 0.01177978515625, "loss": 0.0069, "step": 2871, "total_loss": 0.01177978515625 }, { "epoch": 1.17, "learning_rate": 0.0001932733991521969, "lm_loss": 0.00543212890625, "loss": 0.0087, "step": 2872, "total_loss": 0.00543212890625 }, { "epoch": 1.17, "learning_rate": 0.00019326876735610874, "lm_loss": 0.00653076171875, "loss": 0.0065, "step": 2873, "total_loss": 0.00653076171875 }, { "epoch": 1.17, "learning_rate": 0.00019326413402142998, "lm_loss": 0.0091552734375, "loss": 0.0076, "step": 2874, "total_loss": 0.0091552734375 }, { "epoch": 1.18, "learning_rate": 0.00019325949914823705, "lm_loss": 0.00677490234375, "loss": 0.0079, "step": 2875, "total_loss": 0.00677490234375 }, { "epoch": 1.18, "learning_rate": 0.00019325486273660643, "lm_loss": 0.00830078125, "loss": 0.008, "step": 2876, "total_loss": 0.00830078125 }, { "epoch": 1.18, "learning_rate": 0.00019325022478661454, "lm_loss": 0.005035400390625, "loss": 0.0086, "step": 2877, "total_loss": 0.005035400390625 }, { "epoch": 1.18, "learning_rate": 0.00019324558529833794, "lm_loss": 0.0108642578125, "loss": 0.0094, "step": 2878, "total_loss": 0.0108642578125 }, { "epoch": 1.18, "learning_rate": 0.00019324094427185316, "lm_loss": 0.009033203125, "loss": 0.0066, "step": 2879, "total_loss": 0.009033203125 }, { "epoch": 1.18, "learning_rate": 0.00019323630170723673, "lm_loss": 0.0086669921875, "loss": 0.0094, "step": 2880, "total_loss": 0.0086669921875 }, { "epoch": 1.18, "learning_rate": 0.00019323165760456526, "lm_loss": 0.0118408203125, "loss": 0.0094, "step": 2881, "total_loss": 0.0118408203125 }, { "epoch": 1.18, "learning_rate": 0.00019322701196391537, "lm_loss": 0.00543212890625, "loss": 0.0072, "step": 2882, "total_loss": 0.00543212890625 }, { "epoch": 1.18, "learning_rate": 0.0001932223647853637, "lm_loss": 0.010986328125, "loss": 0.0082, "step": 2883, "total_loss": 0.010986328125 }, { "epoch": 1.18, "learning_rate": 0.00019321771606898688, "lm_loss": 0.004119873046875, "loss": 0.0078, "step": 2884, "total_loss": 0.004119873046875 }, { "epoch": 1.18, "learning_rate": 0.0001932130658148616, "lm_loss": 0.01611328125, "loss": 0.0094, "step": 2885, "total_loss": 0.01611328125 }, { "epoch": 1.18, "learning_rate": 0.0001932084140230646, "lm_loss": 0.007568359375, "loss": 0.0065, "step": 2886, "total_loss": 0.007568359375 }, { "epoch": 1.18, "learning_rate": 0.00019320376069367258, "lm_loss": 0.002685546875, "loss": 0.0075, "step": 2887, "total_loss": 0.002685546875 }, { "epoch": 1.18, "learning_rate": 0.00019319910582676236, "lm_loss": 0.006195068359375, "loss": 0.0083, "step": 2888, "total_loss": 0.006195068359375 }, { "epoch": 1.18, "learning_rate": 0.00019319444942241066, "lm_loss": 0.007415771484375, "loss": 0.0061, "step": 2889, "total_loss": 0.007415771484375 }, { "epoch": 1.18, "learning_rate": 0.00019318979148069435, "lm_loss": 0.007476806640625, "loss": 0.0075, "step": 2890, "total_loss": 0.007476806640625 }, { "epoch": 1.18, "learning_rate": 0.00019318513200169022, "lm_loss": 0.005126953125, "loss": 0.0079, "step": 2891, "total_loss": 0.005126953125 }, { "epoch": 1.18, "learning_rate": 0.00019318047098547517, "lm_loss": 0.0101318359375, "loss": 0.0074, "step": 2892, "total_loss": 0.0101318359375 }, { "epoch": 1.18, "learning_rate": 0.00019317580843212606, "lm_loss": 0.01043701171875, "loss": 0.0077, "step": 2893, "total_loss": 0.01043701171875 }, { "epoch": 1.18, "learning_rate": 0.00019317114434171985, "lm_loss": 0.0101318359375, "loss": 0.0052, "step": 2894, "total_loss": 0.0101318359375 }, { "epoch": 1.18, "learning_rate": 0.00019316647871433343, "lm_loss": 0.006622314453125, "loss": 0.007, "step": 2895, "total_loss": 0.006622314453125 }, { "epoch": 1.18, "learning_rate": 0.0001931618115500438, "lm_loss": 0.006683349609375, "loss": 0.0079, "step": 2896, "total_loss": 0.006683349609375 }, { "epoch": 1.18, "learning_rate": 0.00019315714284892795, "lm_loss": 0.00750732421875, "loss": 0.0093, "step": 2897, "total_loss": 0.00750732421875 }, { "epoch": 1.18, "learning_rate": 0.00019315247261106287, "lm_loss": 0.007720947265625, "loss": 0.0067, "step": 2898, "total_loss": 0.007720947265625 }, { "epoch": 1.19, "learning_rate": 0.00019314780083652563, "lm_loss": 0.0098876953125, "loss": 0.0092, "step": 2899, "total_loss": 0.0098876953125 }, { "epoch": 1.19, "learning_rate": 0.0001931431275253933, "lm_loss": 0.00250244140625, "loss": 0.008, "step": 2900, "total_loss": 0.00250244140625 }, { "epoch": 1.19, "eval_lm_loss": 0.00999415386468172, "eval_loss": 0.010283890180289745, "eval_runtime": 44.1302, "eval_samples_per_second": 22.66, "eval_steps_per_second": 0.204, "eval_total_loss": 0.00999415386468172, "lm_loss": 0.00121307373046875, "step": 2900, "total_loss": 0.00121307373046875 }, { "epoch": 1.19, "learning_rate": 0.00019313845267774293, "lm_loss": 0.005401611328125, "loss": 0.0077, "step": 2901, "total_loss": 0.005401611328125 }, { "epoch": 1.19, "learning_rate": 0.00019313377629365168, "lm_loss": 0.005889892578125, "loss": 0.0061, "step": 2902, "total_loss": 0.005889892578125 }, { "epoch": 1.19, "learning_rate": 0.0001931290983731967, "lm_loss": 0.00640869140625, "loss": 0.0064, "step": 2903, "total_loss": 0.00640869140625 }, { "epoch": 1.19, "learning_rate": 0.00019312441891645508, "lm_loss": 0.008056640625, "loss": 0.0085, "step": 2904, "total_loss": 0.008056640625 }, { "epoch": 1.19, "learning_rate": 0.0001931197379235041, "lm_loss": 0.0086669921875, "loss": 0.0083, "step": 2905, "total_loss": 0.0086669921875 }, { "epoch": 1.19, "learning_rate": 0.00019311505539442095, "lm_loss": 0.01519775390625, "loss": 0.0066, "step": 2906, "total_loss": 0.01519775390625 }, { "epoch": 1.19, "learning_rate": 0.0001931103713292829, "lm_loss": 0.004791259765625, "loss": 0.0077, "step": 2907, "total_loss": 0.004791259765625 }, { "epoch": 1.19, "learning_rate": 0.00019310568572816715, "lm_loss": 0.004974365234375, "loss": 0.0072, "step": 2908, "total_loss": 0.004974365234375 }, { "epoch": 1.19, "learning_rate": 0.00019310099859115107, "lm_loss": 0.00738525390625, "loss": 0.0091, "step": 2909, "total_loss": 0.00738525390625 }, { "epoch": 1.19, "learning_rate": 0.00019309630991831193, "lm_loss": 0.0133056640625, "loss": 0.0075, "step": 2910, "total_loss": 0.0133056640625 }, { "epoch": 1.19, "learning_rate": 0.0001930916197097271, "lm_loss": 0.00439453125, "loss": 0.0066, "step": 2911, "total_loss": 0.00439453125 }, { "epoch": 1.19, "learning_rate": 0.00019308692796547394, "lm_loss": 0.00994873046875, "loss": 0.0074, "step": 2912, "total_loss": 0.00994873046875 }, { "epoch": 1.19, "learning_rate": 0.0001930822346856299, "lm_loss": 0.004364013671875, "loss": 0.0066, "step": 2913, "total_loss": 0.004364013671875 }, { "epoch": 1.19, "learning_rate": 0.00019307753987027232, "lm_loss": 0.0052490234375, "loss": 0.0094, "step": 2914, "total_loss": 0.0052490234375 }, { "epoch": 1.19, "learning_rate": 0.00019307284351947868, "lm_loss": 0.0135498046875, "loss": 0.0088, "step": 2915, "total_loss": 0.0135498046875 }, { "epoch": 1.19, "learning_rate": 0.00019306814563332646, "lm_loss": 0.0118408203125, "loss": 0.0076, "step": 2916, "total_loss": 0.0118408203125 }, { "epoch": 1.19, "learning_rate": 0.0001930634462118931, "lm_loss": 0.0081787109375, "loss": 0.0073, "step": 2917, "total_loss": 0.0081787109375 }, { "epoch": 1.19, "learning_rate": 0.00019305874525525624, "lm_loss": 0.00677490234375, "loss": 0.0081, "step": 2918, "total_loss": 0.00677490234375 }, { "epoch": 1.19, "learning_rate": 0.00019305404276349336, "lm_loss": 0.00494384765625, "loss": 0.0068, "step": 2919, "total_loss": 0.00494384765625 }, { "epoch": 1.19, "learning_rate": 0.000193049338736682, "lm_loss": 0.00836181640625, "loss": 0.0073, "step": 2920, "total_loss": 0.00836181640625 }, { "epoch": 1.19, "learning_rate": 0.00019304463317489982, "lm_loss": 0.007720947265625, "loss": 0.0067, "step": 2921, "total_loss": 0.007720947265625 }, { "epoch": 1.19, "learning_rate": 0.00019303992607822443, "lm_loss": 0.01123046875, "loss": 0.0086, "step": 2922, "total_loss": 0.01123046875 }, { "epoch": 1.2, "learning_rate": 0.00019303521744673345, "lm_loss": 0.004241943359375, "loss": 0.0077, "step": 2923, "total_loss": 0.004241943359375 }, { "epoch": 1.2, "learning_rate": 0.0001930305072805046, "lm_loss": 0.00701904296875, "loss": 0.0068, "step": 2924, "total_loss": 0.00701904296875 }, { "epoch": 1.2, "learning_rate": 0.00019302579557961552, "lm_loss": 0.007293701171875, "loss": 0.0077, "step": 2925, "total_loss": 0.007293701171875 }, { "epoch": 1.2, "learning_rate": 0.00019302108234414402, "lm_loss": 0.00531005859375, "loss": 0.0053, "step": 2926, "total_loss": 0.00531005859375 }, { "epoch": 1.2, "learning_rate": 0.00019301636757416776, "lm_loss": 0.00933837890625, "loss": 0.0079, "step": 2927, "total_loss": 0.00933837890625 }, { "epoch": 1.2, "learning_rate": 0.00019301165126976456, "lm_loss": 0.004638671875, "loss": 0.0085, "step": 2928, "total_loss": 0.004638671875 }, { "epoch": 1.2, "learning_rate": 0.00019300693343101227, "lm_loss": 0.0069580078125, "loss": 0.0079, "step": 2929, "total_loss": 0.0069580078125 }, { "epoch": 1.2, "learning_rate": 0.00019300221405798863, "lm_loss": 0.00811767578125, "loss": 0.0093, "step": 2930, "total_loss": 0.00811767578125 }, { "epoch": 1.2, "learning_rate": 0.00019299749315077153, "lm_loss": 0.006378173828125, "loss": 0.0082, "step": 2931, "total_loss": 0.006378173828125 }, { "epoch": 1.2, "learning_rate": 0.00019299277070943885, "lm_loss": 0.015869140625, "loss": 0.0071, "step": 2932, "total_loss": 0.015869140625 }, { "epoch": 1.2, "learning_rate": 0.0001929880467340685, "lm_loss": 0.01263427734375, "loss": 0.0094, "step": 2933, "total_loss": 0.01263427734375 }, { "epoch": 1.2, "learning_rate": 0.00019298332122473838, "lm_loss": 0.004302978515625, "loss": 0.0087, "step": 2934, "total_loss": 0.004302978515625 }, { "epoch": 1.2, "learning_rate": 0.00019297859418152647, "lm_loss": 0.01312255859375, "loss": 0.0081, "step": 2935, "total_loss": 0.01312255859375 }, { "epoch": 1.2, "learning_rate": 0.00019297386560451077, "lm_loss": 0.007537841796875, "loss": 0.0082, "step": 2936, "total_loss": 0.007537841796875 }, { "epoch": 1.2, "learning_rate": 0.00019296913549376923, "lm_loss": 0.01141357421875, "loss": 0.0076, "step": 2937, "total_loss": 0.01141357421875 }, { "epoch": 1.2, "learning_rate": 0.0001929644038493799, "lm_loss": 0.00701904296875, "loss": 0.0084, "step": 2938, "total_loss": 0.00701904296875 }, { "epoch": 1.2, "learning_rate": 0.00019295967067142083, "lm_loss": 0.009765625, "loss": 0.0073, "step": 2939, "total_loss": 0.009765625 }, { "epoch": 1.2, "learning_rate": 0.00019295493595997013, "lm_loss": 0.00323486328125, "loss": 0.0063, "step": 2940, "total_loss": 0.00323486328125 }, { "epoch": 1.2, "learning_rate": 0.0001929501997151059, "lm_loss": 0.005096435546875, "loss": 0.0091, "step": 2941, "total_loss": 0.005096435546875 }, { "epoch": 1.2, "learning_rate": 0.0001929454619369062, "lm_loss": 0.010009765625, "loss": 0.0071, "step": 2942, "total_loss": 0.010009765625 }, { "epoch": 1.2, "learning_rate": 0.00019294072262544929, "lm_loss": 0.009521484375, "loss": 0.0068, "step": 2943, "total_loss": 0.009521484375 }, { "epoch": 1.2, "learning_rate": 0.00019293598178081326, "lm_loss": 0.0111083984375, "loss": 0.0089, "step": 2944, "total_loss": 0.0111083984375 }, { "epoch": 1.2, "learning_rate": 0.0001929312394030764, "lm_loss": 0.0130615234375, "loss": 0.007, "step": 2945, "total_loss": 0.0130615234375 }, { "epoch": 1.2, "learning_rate": 0.00019292649549231688, "lm_loss": 0.004241943359375, "loss": 0.0059, "step": 2946, "total_loss": 0.004241943359375 }, { "epoch": 1.2, "learning_rate": 0.00019292175004861296, "lm_loss": 0.00189971923828125, "loss": 0.0073, "step": 2947, "total_loss": 0.00189971923828125 }, { "epoch": 1.21, "learning_rate": 0.00019291700307204297, "lm_loss": 0.007476806640625, "loss": 0.0087, "step": 2948, "total_loss": 0.007476806640625 }, { "epoch": 1.21, "learning_rate": 0.00019291225456268517, "lm_loss": 0.004150390625, "loss": 0.008, "step": 2949, "total_loss": 0.004150390625 }, { "epoch": 1.21, "learning_rate": 0.0001929075045206179, "lm_loss": 0.0081787109375, "loss": 0.0089, "step": 2950, "total_loss": 0.0081787109375 }, { "epoch": 1.21, "learning_rate": 0.00019290275294591954, "lm_loss": 0.003936767578125, "loss": 0.0075, "step": 2951, "total_loss": 0.003936767578125 }, { "epoch": 1.21, "learning_rate": 0.00019289799983866846, "lm_loss": 0.004669189453125, "loss": 0.0088, "step": 2952, "total_loss": 0.004669189453125 }, { "epoch": 1.21, "learning_rate": 0.0001928932451989431, "lm_loss": 0.007293701171875, "loss": 0.0087, "step": 2953, "total_loss": 0.007293701171875 }, { "epoch": 1.21, "learning_rate": 0.00019288848902682183, "lm_loss": 0.0081787109375, "loss": 0.0065, "step": 2954, "total_loss": 0.0081787109375 }, { "epoch": 1.21, "learning_rate": 0.00019288373132238316, "lm_loss": 0.01129150390625, "loss": 0.0084, "step": 2955, "total_loss": 0.01129150390625 }, { "epoch": 1.21, "learning_rate": 0.00019287897208570553, "lm_loss": 0.0079345703125, "loss": 0.0091, "step": 2956, "total_loss": 0.0079345703125 }, { "epoch": 1.21, "learning_rate": 0.0001928742113168675, "lm_loss": 0.004608154296875, "loss": 0.0078, "step": 2957, "total_loss": 0.004608154296875 }, { "epoch": 1.21, "learning_rate": 0.00019286944901594758, "lm_loss": 0.005401611328125, "loss": 0.0077, "step": 2958, "total_loss": 0.005401611328125 }, { "epoch": 1.21, "learning_rate": 0.00019286468518302433, "lm_loss": 0.00689697265625, "loss": 0.0088, "step": 2959, "total_loss": 0.00689697265625 }, { "epoch": 1.21, "learning_rate": 0.00019285991981817636, "lm_loss": 0.0120849609375, "loss": 0.0076, "step": 2960, "total_loss": 0.0120849609375 }, { "epoch": 1.21, "learning_rate": 0.00019285515292148225, "lm_loss": 0.006744384765625, "loss": 0.0083, "step": 2961, "total_loss": 0.006744384765625 }, { "epoch": 1.21, "learning_rate": 0.00019285038449302062, "lm_loss": 0.0024871826171875, "loss": 0.0072, "step": 2962, "total_loss": 0.0024871826171875 }, { "epoch": 1.21, "learning_rate": 0.00019284561453287018, "lm_loss": 0.007537841796875, "loss": 0.0074, "step": 2963, "total_loss": 0.007537841796875 }, { "epoch": 1.21, "learning_rate": 0.00019284084304110958, "lm_loss": 0.008056640625, "loss": 0.0096, "step": 2964, "total_loss": 0.008056640625 }, { "epoch": 1.21, "learning_rate": 0.00019283607001781755, "lm_loss": 0.0037994384765625, "loss": 0.0071, "step": 2965, "total_loss": 0.0037994384765625 }, { "epoch": 1.21, "learning_rate": 0.00019283129546307284, "lm_loss": 0.0047607421875, "loss": 0.0086, "step": 2966, "total_loss": 0.0047607421875 }, { "epoch": 1.21, "learning_rate": 0.0001928265193769542, "lm_loss": 0.00726318359375, "loss": 0.0093, "step": 2967, "total_loss": 0.00726318359375 }, { "epoch": 1.21, "learning_rate": 0.0001928217417595404, "lm_loss": 0.01068115234375, "loss": 0.0068, "step": 2968, "total_loss": 0.01068115234375 }, { "epoch": 1.21, "learning_rate": 0.00019281696261091026, "lm_loss": 0.02197265625, "loss": 0.0085, "step": 2969, "total_loss": 0.02197265625 }, { "epoch": 1.21, "learning_rate": 0.00019281218193114262, "lm_loss": 0.006103515625, "loss": 0.0066, "step": 2970, "total_loss": 0.006103515625 }, { "epoch": 1.21, "learning_rate": 0.0001928073997203164, "lm_loss": 0.00994873046875, "loss": 0.0086, "step": 2971, "total_loss": 0.00994873046875 }, { "epoch": 1.22, "learning_rate": 0.0001928026159785104, "lm_loss": 0.019775390625, "loss": 0.0096, "step": 2972, "total_loss": 0.019775390625 }, { "epoch": 1.22, "learning_rate": 0.00019279783070580355, "lm_loss": 0.004150390625, "loss": 0.0064, "step": 2973, "total_loss": 0.004150390625 }, { "epoch": 1.22, "learning_rate": 0.00019279304390227485, "lm_loss": 0.0040283203125, "loss": 0.0062, "step": 2974, "total_loss": 0.0040283203125 }, { "epoch": 1.22, "learning_rate": 0.0001927882555680032, "lm_loss": 0.0045166015625, "loss": 0.0058, "step": 2975, "total_loss": 0.0045166015625 }, { "epoch": 1.22, "learning_rate": 0.00019278346570306764, "lm_loss": 0.004150390625, "loss": 0.0066, "step": 2976, "total_loss": 0.004150390625 }, { "epoch": 1.22, "learning_rate": 0.0001927786743075471, "lm_loss": 0.00927734375, "loss": 0.0091, "step": 2977, "total_loss": 0.00927734375 }, { "epoch": 1.22, "learning_rate": 0.00019277388138152075, "lm_loss": 0.006988525390625, "loss": 0.0076, "step": 2978, "total_loss": 0.006988525390625 }, { "epoch": 1.22, "learning_rate": 0.00019276908692506756, "lm_loss": 0.00439453125, "loss": 0.0083, "step": 2979, "total_loss": 0.00439453125 }, { "epoch": 1.22, "learning_rate": 0.00019276429093826667, "lm_loss": 0.00439453125, "loss": 0.0086, "step": 2980, "total_loss": 0.00439453125 }, { "epoch": 1.22, "learning_rate": 0.00019275949342119716, "lm_loss": 0.006103515625, "loss": 0.0078, "step": 2981, "total_loss": 0.006103515625 }, { "epoch": 1.22, "learning_rate": 0.00019275469437393818, "lm_loss": 0.006011962890625, "loss": 0.0076, "step": 2982, "total_loss": 0.006011962890625 }, { "epoch": 1.22, "learning_rate": 0.0001927498937965689, "lm_loss": 0.01513671875, "loss": 0.0095, "step": 2983, "total_loss": 0.01513671875 }, { "epoch": 1.22, "learning_rate": 0.00019274509168916852, "lm_loss": 0.00701904296875, "loss": 0.0075, "step": 2984, "total_loss": 0.00701904296875 }, { "epoch": 1.22, "learning_rate": 0.00019274028805181623, "lm_loss": 0.00823974609375, "loss": 0.0075, "step": 2985, "total_loss": 0.00823974609375 }, { "epoch": 1.22, "learning_rate": 0.0001927354828845913, "lm_loss": 0.005035400390625, "loss": 0.0082, "step": 2986, "total_loss": 0.005035400390625 }, { "epoch": 1.22, "learning_rate": 0.00019273067618757304, "lm_loss": 0.0054931640625, "loss": 0.0077, "step": 2987, "total_loss": 0.0054931640625 }, { "epoch": 1.22, "learning_rate": 0.00019272586796084063, "lm_loss": 0.003326416015625, "loss": 0.006, "step": 2988, "total_loss": 0.003326416015625 }, { "epoch": 1.22, "learning_rate": 0.0001927210582044735, "lm_loss": 0.0216064453125, "loss": 0.009, "step": 2989, "total_loss": 0.0216064453125 }, { "epoch": 1.22, "learning_rate": 0.0001927162469185509, "lm_loss": 0.002960205078125, "loss": 0.0075, "step": 2990, "total_loss": 0.002960205078125 }, { "epoch": 1.22, "learning_rate": 0.00019271143410315227, "lm_loss": 0.00262451171875, "loss": 0.0078, "step": 2991, "total_loss": 0.00262451171875 }, { "epoch": 1.22, "learning_rate": 0.00019270661975835695, "lm_loss": 0.004852294921875, "loss": 0.007, "step": 2992, "total_loss": 0.004852294921875 }, { "epoch": 1.22, "learning_rate": 0.00019270180388424442, "lm_loss": 0.010009765625, "loss": 0.0096, "step": 2993, "total_loss": 0.010009765625 }, { "epoch": 1.22, "learning_rate": 0.00019269698648089408, "lm_loss": 0.01202392578125, "loss": 0.0076, "step": 2994, "total_loss": 0.01202392578125 }, { "epoch": 1.22, "learning_rate": 0.00019269216754838538, "lm_loss": 0.00116729736328125, "loss": 0.0083, "step": 2995, "total_loss": 0.00116729736328125 }, { "epoch": 1.22, "learning_rate": 0.00019268734708679787, "lm_loss": 0.00531005859375, "loss": 0.0074, "step": 2996, "total_loss": 0.00531005859375 }, { "epoch": 1.23, "learning_rate": 0.00019268252509621104, "lm_loss": 0.0048828125, "loss": 0.0088, "step": 2997, "total_loss": 0.0048828125 }, { "epoch": 1.23, "learning_rate": 0.00019267770157670446, "lm_loss": 0.005340576171875, "loss": 0.0065, "step": 2998, "total_loss": 0.005340576171875 }, { "epoch": 1.23, "learning_rate": 0.00019267287652835764, "lm_loss": 0.003753662109375, "loss": 0.0069, "step": 2999, "total_loss": 0.003753662109375 }, { "epoch": 1.23, "learning_rate": 0.00019266804995125023, "lm_loss": 0.013671875, "loss": 0.007, "step": 3000, "total_loss": 0.013671875 }, { "epoch": 1.23, "eval_lm_loss": 0.009405948221683502, "eval_loss": 0.009687041863799095, "eval_runtime": 43.9445, "eval_samples_per_second": 22.756, "eval_steps_per_second": 0.205, "eval_total_loss": 0.009405948221683502, "lm_loss": 0.0019073486328125, "step": 3000, "total_loss": 0.0019073486328125 }, { "epoch": 1.23, "learning_rate": 0.00019266322184546185, "lm_loss": 0.00439453125, "loss": 0.0066, "step": 3001, "total_loss": 0.00439453125 }, { "epoch": 1.23, "learning_rate": 0.00019265839221107212, "lm_loss": 0.00897216796875, "loss": 0.0098, "step": 3002, "total_loss": 0.00897216796875 }, { "epoch": 1.23, "learning_rate": 0.0001926535610481607, "lm_loss": 0.00848388671875, "loss": 0.0069, "step": 3003, "total_loss": 0.00848388671875 }, { "epoch": 1.23, "learning_rate": 0.00019264872835680734, "lm_loss": 0.005462646484375, "loss": 0.0071, "step": 3004, "total_loss": 0.005462646484375 }, { "epoch": 1.23, "learning_rate": 0.00019264389413709172, "lm_loss": 0.0047607421875, "loss": 0.0079, "step": 3005, "total_loss": 0.0047607421875 }, { "epoch": 1.23, "learning_rate": 0.0001926390583890936, "lm_loss": 0.0036163330078125, "loss": 0.0108, "step": 3006, "total_loss": 0.0036163330078125 }, { "epoch": 1.23, "learning_rate": 0.00019263422111289274, "lm_loss": 0.0052490234375, "loss": 0.0081, "step": 3007, "total_loss": 0.0052490234375 }, { "epoch": 1.23, "learning_rate": 0.00019262938230856896, "lm_loss": 0.00494384765625, "loss": 0.0085, "step": 3008, "total_loss": 0.00494384765625 }, { "epoch": 1.23, "learning_rate": 0.00019262454197620205, "lm_loss": 0.005950927734375, "loss": 0.0085, "step": 3009, "total_loss": 0.005950927734375 }, { "epoch": 1.23, "learning_rate": 0.00019261970011587187, "lm_loss": 0.00677490234375, "loss": 0.0075, "step": 3010, "total_loss": 0.00677490234375 }, { "epoch": 1.23, "learning_rate": 0.00019261485672765832, "lm_loss": 0.004425048828125, "loss": 0.0101, "step": 3011, "total_loss": 0.004425048828125 }, { "epoch": 1.23, "learning_rate": 0.00019261001181164128, "lm_loss": 0.005157470703125, "loss": 0.0067, "step": 3012, "total_loss": 0.005157470703125 }, { "epoch": 1.23, "learning_rate": 0.00019260516536790068, "lm_loss": 0.0107421875, "loss": 0.0077, "step": 3013, "total_loss": 0.0107421875 }, { "epoch": 1.23, "learning_rate": 0.00019260031739651642, "lm_loss": 0.0084228515625, "loss": 0.0076, "step": 3014, "total_loss": 0.0084228515625 }, { "epoch": 1.23, "learning_rate": 0.00019259546789756853, "lm_loss": 0.0079345703125, "loss": 0.0075, "step": 3015, "total_loss": 0.0079345703125 }, { "epoch": 1.23, "learning_rate": 0.000192590616871137, "lm_loss": 0.00836181640625, "loss": 0.0079, "step": 3016, "total_loss": 0.00836181640625 }, { "epoch": 1.23, "learning_rate": 0.00019258576431730183, "lm_loss": 0.0126953125, "loss": 0.0084, "step": 3017, "total_loss": 0.0126953125 }, { "epoch": 1.23, "learning_rate": 0.0001925809102361431, "lm_loss": 0.00848388671875, "loss": 0.0073, "step": 3018, "total_loss": 0.00848388671875 }, { "epoch": 1.23, "learning_rate": 0.00019257605462774086, "lm_loss": 0.01190185546875, "loss": 0.0081, "step": 3019, "total_loss": 0.01190185546875 }, { "epoch": 1.23, "learning_rate": 0.0001925711974921752, "lm_loss": 0.0091552734375, "loss": 0.0065, "step": 3020, "total_loss": 0.0091552734375 }, { "epoch": 1.24, "learning_rate": 0.0001925663388295263, "lm_loss": 0.007049560546875, "loss": 0.0063, "step": 3021, "total_loss": 0.007049560546875 }, { "epoch": 1.24, "learning_rate": 0.00019256147863987426, "lm_loss": 0.0118408203125, "loss": 0.0075, "step": 3022, "total_loss": 0.0118408203125 }, { "epoch": 1.24, "learning_rate": 0.00019255661692329926, "lm_loss": 0.01300048828125, "loss": 0.0079, "step": 3023, "total_loss": 0.01300048828125 }, { "epoch": 1.24, "learning_rate": 0.00019255175367988153, "lm_loss": 0.00494384765625, "loss": 0.0071, "step": 3024, "total_loss": 0.00494384765625 }, { "epoch": 1.24, "learning_rate": 0.00019254688890970125, "lm_loss": 0.005584716796875, "loss": 0.0064, "step": 3025, "total_loss": 0.005584716796875 }, { "epoch": 1.24, "learning_rate": 0.0001925420226128387, "lm_loss": 0.008544921875, "loss": 0.0091, "step": 3026, "total_loss": 0.008544921875 }, { "epoch": 1.24, "learning_rate": 0.00019253715478937417, "lm_loss": 0.01171875, "loss": 0.0082, "step": 3027, "total_loss": 0.01171875 }, { "epoch": 1.24, "learning_rate": 0.00019253228543938792, "lm_loss": 0.005096435546875, "loss": 0.0072, "step": 3028, "total_loss": 0.005096435546875 }, { "epoch": 1.24, "learning_rate": 0.0001925274145629603, "lm_loss": 0.002593994140625, "loss": 0.0079, "step": 3029, "total_loss": 0.002593994140625 }, { "epoch": 1.24, "learning_rate": 0.00019252254216017167, "lm_loss": 0.01385498046875, "loss": 0.0098, "step": 3030, "total_loss": 0.01385498046875 }, { "epoch": 1.24, "learning_rate": 0.00019251766823110238, "lm_loss": 0.004608154296875, "loss": 0.007, "step": 3031, "total_loss": 0.004608154296875 }, { "epoch": 1.24, "learning_rate": 0.00019251279277583287, "lm_loss": 0.005035400390625, "loss": 0.0073, "step": 3032, "total_loss": 0.005035400390625 }, { "epoch": 1.24, "learning_rate": 0.00019250791579444353, "lm_loss": 0.00958251953125, "loss": 0.0087, "step": 3033, "total_loss": 0.00958251953125 }, { "epoch": 1.24, "learning_rate": 0.00019250303728701485, "lm_loss": 0.012939453125, "loss": 0.007, "step": 3034, "total_loss": 0.012939453125 }, { "epoch": 1.24, "learning_rate": 0.00019249815725362726, "lm_loss": 0.0079345703125, "loss": 0.0079, "step": 3035, "total_loss": 0.0079345703125 }, { "epoch": 1.24, "learning_rate": 0.00019249327569436132, "lm_loss": 0.0027923583984375, "loss": 0.0079, "step": 3036, "total_loss": 0.0027923583984375 }, { "epoch": 1.24, "learning_rate": 0.0001924883926092975, "lm_loss": 0.0052490234375, "loss": 0.0081, "step": 3037, "total_loss": 0.0052490234375 }, { "epoch": 1.24, "learning_rate": 0.00019248350799851637, "lm_loss": 0.01416015625, "loss": 0.0072, "step": 3038, "total_loss": 0.01416015625 }, { "epoch": 1.24, "learning_rate": 0.00019247862186209854, "lm_loss": 0.0078125, "loss": 0.0075, "step": 3039, "total_loss": 0.0078125 }, { "epoch": 1.24, "learning_rate": 0.00019247373420012458, "lm_loss": 0.00811767578125, "loss": 0.0075, "step": 3040, "total_loss": 0.00811767578125 }, { "epoch": 1.24, "learning_rate": 0.00019246884501267512, "lm_loss": 0.01043701171875, "loss": 0.0081, "step": 3041, "total_loss": 0.01043701171875 }, { "epoch": 1.24, "learning_rate": 0.0001924639542998308, "lm_loss": 0.011474609375, "loss": 0.0088, "step": 3042, "total_loss": 0.011474609375 }, { "epoch": 1.24, "learning_rate": 0.00019245906206167237, "lm_loss": 0.0042724609375, "loss": 0.007, "step": 3043, "total_loss": 0.0042724609375 }, { "epoch": 1.24, "learning_rate": 0.00019245416829828047, "lm_loss": 0.0108642578125, "loss": 0.0085, "step": 3044, "total_loss": 0.0108642578125 }, { "epoch": 1.24, "learning_rate": 0.00019244927300973586, "lm_loss": 0.0091552734375, "loss": 0.0082, "step": 3045, "total_loss": 0.0091552734375 }, { "epoch": 1.25, "learning_rate": 0.00019244437619611923, "lm_loss": 0.004638671875, "loss": 0.0063, "step": 3046, "total_loss": 0.004638671875 }, { "epoch": 1.25, "learning_rate": 0.00019243947785751143, "lm_loss": 0.01275634765625, "loss": 0.0086, "step": 3047, "total_loss": 0.01275634765625 }, { "epoch": 1.25, "learning_rate": 0.00019243457799399325, "lm_loss": 0.0076904296875, "loss": 0.01, "step": 3048, "total_loss": 0.0076904296875 }, { "epoch": 1.25, "learning_rate": 0.0001924296766056455, "lm_loss": 0.0108642578125, "loss": 0.0092, "step": 3049, "total_loss": 0.0108642578125 }, { "epoch": 1.25, "learning_rate": 0.00019242477369254906, "lm_loss": 0.0084228515625, "loss": 0.0093, "step": 3050, "total_loss": 0.0084228515625 }, { "epoch": 1.25, "learning_rate": 0.00019241986925478476, "lm_loss": 0.0103759765625, "loss": 0.0081, "step": 3051, "total_loss": 0.0103759765625 }, { "epoch": 1.25, "learning_rate": 0.0001924149632924336, "lm_loss": 0.00506591796875, "loss": 0.0054, "step": 3052, "total_loss": 0.00506591796875 }, { "epoch": 1.25, "learning_rate": 0.0001924100558055764, "lm_loss": 0.0096435546875, "loss": 0.0072, "step": 3053, "total_loss": 0.0096435546875 }, { "epoch": 1.25, "learning_rate": 0.00019240514679429418, "lm_loss": 0.0089111328125, "loss": 0.008, "step": 3054, "total_loss": 0.0089111328125 }, { "epoch": 1.25, "learning_rate": 0.00019240023625866792, "lm_loss": 0.0130615234375, "loss": 0.0094, "step": 3055, "total_loss": 0.0130615234375 }, { "epoch": 1.25, "learning_rate": 0.0001923953241987786, "lm_loss": 0.01373291015625, "loss": 0.0078, "step": 3056, "total_loss": 0.01373291015625 }, { "epoch": 1.25, "learning_rate": 0.0001923904106147073, "lm_loss": 0.00811767578125, "loss": 0.0072, "step": 3057, "total_loss": 0.00811767578125 }, { "epoch": 1.25, "learning_rate": 0.000192385495506535, "lm_loss": 0.005828857421875, "loss": 0.006, "step": 3058, "total_loss": 0.005828857421875 }, { "epoch": 1.25, "learning_rate": 0.00019238057887434283, "lm_loss": 0.0159912109375, "loss": 0.0091, "step": 3059, "total_loss": 0.0159912109375 }, { "epoch": 1.25, "learning_rate": 0.00019237566071821189, "lm_loss": 0.0111083984375, "loss": 0.0092, "step": 3060, "total_loss": 0.0111083984375 }, { "epoch": 1.25, "learning_rate": 0.0001923707410382233, "lm_loss": 0.004852294921875, "loss": 0.0066, "step": 3061, "total_loss": 0.004852294921875 }, { "epoch": 1.25, "learning_rate": 0.00019236581983445822, "lm_loss": 0.00787353515625, "loss": 0.0091, "step": 3062, "total_loss": 0.00787353515625 }, { "epoch": 1.25, "learning_rate": 0.00019236089710699785, "lm_loss": 0.0048828125, "loss": 0.0059, "step": 3063, "total_loss": 0.0048828125 }, { "epoch": 1.25, "learning_rate": 0.00019235597285592338, "lm_loss": 0.00811767578125, "loss": 0.0073, "step": 3064, "total_loss": 0.00811767578125 }, { "epoch": 1.25, "learning_rate": 0.00019235104708131603, "lm_loss": 0.002716064453125, "loss": 0.0088, "step": 3065, "total_loss": 0.002716064453125 }, { "epoch": 1.25, "learning_rate": 0.00019234611978325713, "lm_loss": 0.00787353515625, "loss": 0.0088, "step": 3066, "total_loss": 0.00787353515625 }, { "epoch": 1.25, "learning_rate": 0.00019234119096182785, "lm_loss": 0.0045166015625, "loss": 0.0065, "step": 3067, "total_loss": 0.0045166015625 }, { "epoch": 1.25, "learning_rate": 0.00019233626061710955, "lm_loss": 0.01129150390625, "loss": 0.0065, "step": 3068, "total_loss": 0.01129150390625 }, { "epoch": 1.25, "learning_rate": 0.0001923313287491836, "lm_loss": 0.0025177001953125, "loss": 0.0076, "step": 3069, "total_loss": 0.0025177001953125 }, { "epoch": 1.26, "learning_rate": 0.0001923263953581313, "lm_loss": 0.00885009765625, "loss": 0.0076, "step": 3070, "total_loss": 0.00885009765625 }, { "epoch": 1.26, "learning_rate": 0.00019232146044403407, "lm_loss": 0.0096435546875, "loss": 0.0074, "step": 3071, "total_loss": 0.0096435546875 }, { "epoch": 1.26, "learning_rate": 0.0001923165240069733, "lm_loss": 0.0042724609375, "loss": 0.008, "step": 3072, "total_loss": 0.0042724609375 }, { "epoch": 1.26, "learning_rate": 0.0001923115860470304, "lm_loss": 0.01220703125, "loss": 0.009, "step": 3073, "total_loss": 0.01220703125 }, { "epoch": 1.26, "learning_rate": 0.00019230664656428692, "lm_loss": 0.01251220703125, "loss": 0.0085, "step": 3074, "total_loss": 0.01251220703125 }, { "epoch": 1.26, "learning_rate": 0.00019230170555882423, "lm_loss": 0.009765625, "loss": 0.0109, "step": 3075, "total_loss": 0.009765625 }, { "epoch": 1.26, "learning_rate": 0.00019229676303072387, "lm_loss": 0.0036468505859375, "loss": 0.0071, "step": 3076, "total_loss": 0.0036468505859375 }, { "epoch": 1.26, "learning_rate": 0.00019229181898006742, "lm_loss": 0.0037841796875, "loss": 0.0083, "step": 3077, "total_loss": 0.0037841796875 }, { "epoch": 1.26, "learning_rate": 0.00019228687340693638, "lm_loss": 0.00640869140625, "loss": 0.007, "step": 3078, "total_loss": 0.00640869140625 }, { "epoch": 1.26, "learning_rate": 0.0001922819263114124, "lm_loss": 0.007568359375, "loss": 0.0071, "step": 3079, "total_loss": 0.007568359375 }, { "epoch": 1.26, "learning_rate": 0.00019227697769357702, "lm_loss": 0.0155029296875, "loss": 0.0094, "step": 3080, "total_loss": 0.0155029296875 }, { "epoch": 1.26, "learning_rate": 0.0001922720275535119, "lm_loss": 0.01202392578125, "loss": 0.01, "step": 3081, "total_loss": 0.01202392578125 }, { "epoch": 1.26, "learning_rate": 0.00019226707589129876, "lm_loss": 0.01055908203125, "loss": 0.0088, "step": 3082, "total_loss": 0.01055908203125 }, { "epoch": 1.26, "learning_rate": 0.00019226212270701917, "lm_loss": 0.00787353515625, "loss": 0.0076, "step": 3083, "total_loss": 0.00787353515625 }, { "epoch": 1.26, "learning_rate": 0.0001922571680007549, "lm_loss": 0.004608154296875, "loss": 0.0077, "step": 3084, "total_loss": 0.004608154296875 }, { "epoch": 1.26, "learning_rate": 0.0001922522117725877, "lm_loss": 0.005218505859375, "loss": 0.0069, "step": 3085, "total_loss": 0.005218505859375 }, { "epoch": 1.26, "learning_rate": 0.00019224725402259933, "lm_loss": 0.01336669921875, "loss": 0.0082, "step": 3086, "total_loss": 0.01336669921875 }, { "epoch": 1.26, "learning_rate": 0.00019224229475087152, "lm_loss": 0.004425048828125, "loss": 0.0081, "step": 3087, "total_loss": 0.004425048828125 }, { "epoch": 1.26, "learning_rate": 0.00019223733395748615, "lm_loss": 0.00579833984375, "loss": 0.0068, "step": 3088, "total_loss": 0.00579833984375 }, { "epoch": 1.26, "learning_rate": 0.000192232371642525, "lm_loss": 0.00909423828125, "loss": 0.0077, "step": 3089, "total_loss": 0.00909423828125 }, { "epoch": 1.26, "learning_rate": 0.00019222740780606995, "lm_loss": 0.007659912109375, "loss": 0.0087, "step": 3090, "total_loss": 0.007659912109375 }, { "epoch": 1.26, "learning_rate": 0.00019222244244820286, "lm_loss": 0.0093994140625, "loss": 0.0069, "step": 3091, "total_loss": 0.0093994140625 }, { "epoch": 1.26, "learning_rate": 0.00019221747556900568, "lm_loss": 0.01031494140625, "loss": 0.0093, "step": 3092, "total_loss": 0.01031494140625 }, { "epoch": 1.26, "learning_rate": 0.00019221250716856036, "lm_loss": 0.010498046875, "loss": 0.0088, "step": 3093, "total_loss": 0.010498046875 }, { "epoch": 1.26, "learning_rate": 0.0001922075372469488, "lm_loss": 0.0027008056640625, "loss": 0.0081, "step": 3094, "total_loss": 0.0027008056640625 }, { "epoch": 1.27, "learning_rate": 0.00019220256580425305, "lm_loss": 0.0081787109375, "loss": 0.0066, "step": 3095, "total_loss": 0.0081787109375 }, { "epoch": 1.27, "learning_rate": 0.00019219759284055504, "lm_loss": 0.0103759765625, "loss": 0.0076, "step": 3096, "total_loss": 0.0103759765625 }, { "epoch": 1.27, "learning_rate": 0.00019219261835593687, "lm_loss": 0.00909423828125, "loss": 0.0074, "step": 3097, "total_loss": 0.00909423828125 }, { "epoch": 1.27, "learning_rate": 0.00019218764235048058, "lm_loss": 0.0140380859375, "loss": 0.0094, "step": 3098, "total_loss": 0.0140380859375 }, { "epoch": 1.27, "learning_rate": 0.00019218266482426828, "lm_loss": 0.00653076171875, "loss": 0.0075, "step": 3099, "total_loss": 0.00653076171875 }, { "epoch": 1.27, "learning_rate": 0.00019217768577738204, "lm_loss": 0.00946044921875, "loss": 0.0069, "step": 3100, "total_loss": 0.00946044921875 }, { "epoch": 1.27, "eval_lm_loss": 0.009472317062318325, "eval_loss": 0.00977653544396162, "eval_runtime": 43.9397, "eval_samples_per_second": 22.758, "eval_steps_per_second": 0.205, "eval_total_loss": 0.009472317062318325, "lm_loss": 0.000766754150390625, "step": 3100, "total_loss": 0.000766754150390625 }, { "epoch": 1.27, "learning_rate": 0.000192172705209904, "lm_loss": 0.0084228515625, "loss": 0.0087, "step": 3101, "total_loss": 0.0084228515625 }, { "epoch": 1.27, "learning_rate": 0.00019216772312191638, "lm_loss": 0.007415771484375, "loss": 0.008, "step": 3102, "total_loss": 0.007415771484375 }, { "epoch": 1.27, "learning_rate": 0.00019216273951350128, "lm_loss": 0.009033203125, "loss": 0.0084, "step": 3103, "total_loss": 0.009033203125 }, { "epoch": 1.27, "learning_rate": 0.000192157754384741, "lm_loss": 0.005889892578125, "loss": 0.0073, "step": 3104, "total_loss": 0.005889892578125 }, { "epoch": 1.27, "learning_rate": 0.00019215276773571768, "lm_loss": 0.0031280517578125, "loss": 0.0059, "step": 3105, "total_loss": 0.0031280517578125 }, { "epoch": 1.27, "learning_rate": 0.00019214777956651365, "lm_loss": 0.00994873046875, "loss": 0.0083, "step": 3106, "total_loss": 0.00994873046875 }, { "epoch": 1.27, "learning_rate": 0.00019214278987721118, "lm_loss": 0.00616455078125, "loss": 0.0066, "step": 3107, "total_loss": 0.00616455078125 }, { "epoch": 1.27, "learning_rate": 0.00019213779866789258, "lm_loss": 0.00830078125, "loss": 0.0081, "step": 3108, "total_loss": 0.00830078125 }, { "epoch": 1.27, "learning_rate": 0.00019213280593864015, "lm_loss": 0.0027923583984375, "loss": 0.0075, "step": 3109, "total_loss": 0.0027923583984375 }, { "epoch": 1.27, "learning_rate": 0.00019212781168953633, "lm_loss": 0.00836181640625, "loss": 0.0086, "step": 3110, "total_loss": 0.00836181640625 }, { "epoch": 1.27, "learning_rate": 0.00019212281592066347, "lm_loss": 0.005706787109375, "loss": 0.0089, "step": 3111, "total_loss": 0.005706787109375 }, { "epoch": 1.27, "learning_rate": 0.00019211781863210395, "lm_loss": 0.00494384765625, "loss": 0.0095, "step": 3112, "total_loss": 0.00494384765625 }, { "epoch": 1.27, "learning_rate": 0.00019211281982394021, "lm_loss": 0.0185546875, "loss": 0.0092, "step": 3113, "total_loss": 0.0185546875 }, { "epoch": 1.27, "learning_rate": 0.0001921078194962548, "lm_loss": 0.00274658203125, "loss": 0.0067, "step": 3114, "total_loss": 0.00274658203125 }, { "epoch": 1.27, "learning_rate": 0.0001921028176491301, "lm_loss": 0.0026092529296875, "loss": 0.0067, "step": 3115, "total_loss": 0.0026092529296875 }, { "epoch": 1.27, "learning_rate": 0.00019209781428264865, "lm_loss": 0.00494384765625, "loss": 0.0062, "step": 3116, "total_loss": 0.00494384765625 }, { "epoch": 1.27, "learning_rate": 0.00019209280939689303, "lm_loss": 0.01385498046875, "loss": 0.0075, "step": 3117, "total_loss": 0.01385498046875 }, { "epoch": 1.27, "learning_rate": 0.00019208780299194574, "lm_loss": 0.0101318359375, "loss": 0.0093, "step": 3118, "total_loss": 0.0101318359375 }, { "epoch": 1.28, "learning_rate": 0.00019208279506788945, "lm_loss": 0.0025787353515625, "loss": 0.0064, "step": 3119, "total_loss": 0.0025787353515625 }, { "epoch": 1.28, "learning_rate": 0.00019207778562480666, "lm_loss": 0.01507568359375, "loss": 0.008, "step": 3120, "total_loss": 0.01507568359375 }, { "epoch": 1.28, "learning_rate": 0.0001920727746627801, "lm_loss": 0.006317138671875, "loss": 0.0074, "step": 3121, "total_loss": 0.006317138671875 }, { "epoch": 1.28, "learning_rate": 0.00019206776218189242, "lm_loss": 0.01483154296875, "loss": 0.0068, "step": 3122, "total_loss": 0.01483154296875 }, { "epoch": 1.28, "learning_rate": 0.00019206274818222628, "lm_loss": 0.002288818359375, "loss": 0.0063, "step": 3123, "total_loss": 0.002288818359375 }, { "epoch": 1.28, "learning_rate": 0.00019205773266386437, "lm_loss": 0.007476806640625, "loss": 0.0062, "step": 3124, "total_loss": 0.007476806640625 }, { "epoch": 1.28, "learning_rate": 0.00019205271562688946, "lm_loss": 0.008056640625, "loss": 0.0089, "step": 3125, "total_loss": 0.008056640625 }, { "epoch": 1.28, "learning_rate": 0.00019204769707138435, "lm_loss": 0.01300048828125, "loss": 0.0075, "step": 3126, "total_loss": 0.01300048828125 }, { "epoch": 1.28, "learning_rate": 0.00019204267699743176, "lm_loss": 0.01312255859375, "loss": 0.0075, "step": 3127, "total_loss": 0.01312255859375 }, { "epoch": 1.28, "learning_rate": 0.00019203765540511456, "lm_loss": 0.00531005859375, "loss": 0.0091, "step": 3128, "total_loss": 0.00531005859375 }, { "epoch": 1.28, "learning_rate": 0.00019203263229451554, "lm_loss": 0.012939453125, "loss": 0.0081, "step": 3129, "total_loss": 0.012939453125 }, { "epoch": 1.28, "learning_rate": 0.0001920276076657176, "lm_loss": 0.00909423828125, "loss": 0.0087, "step": 3130, "total_loss": 0.00909423828125 }, { "epoch": 1.28, "learning_rate": 0.00019202258151880357, "lm_loss": 0.01007080078125, "loss": 0.009, "step": 3131, "total_loss": 0.01007080078125 }, { "epoch": 1.28, "learning_rate": 0.00019201755385385644, "lm_loss": 0.00933837890625, "loss": 0.0065, "step": 3132, "total_loss": 0.00933837890625 }, { "epoch": 1.28, "learning_rate": 0.00019201252467095908, "lm_loss": 0.007110595703125, "loss": 0.0073, "step": 3133, "total_loss": 0.007110595703125 }, { "epoch": 1.28, "learning_rate": 0.00019200749397019453, "lm_loss": 0.010498046875, "loss": 0.0088, "step": 3134, "total_loss": 0.010498046875 }, { "epoch": 1.28, "learning_rate": 0.0001920024617516457, "lm_loss": 0.01385498046875, "loss": 0.0085, "step": 3135, "total_loss": 0.01385498046875 }, { "epoch": 1.28, "learning_rate": 0.00019199742801539565, "lm_loss": 0.007232666015625, "loss": 0.0068, "step": 3136, "total_loss": 0.007232666015625 }, { "epoch": 1.28, "learning_rate": 0.0001919923927615274, "lm_loss": 0.002838134765625, "loss": 0.0076, "step": 3137, "total_loss": 0.002838134765625 }, { "epoch": 1.28, "learning_rate": 0.00019198735599012398, "lm_loss": 0.0036468505859375, "loss": 0.0064, "step": 3138, "total_loss": 0.0036468505859375 }, { "epoch": 1.28, "learning_rate": 0.00019198231770126855, "lm_loss": 0.00872802734375, "loss": 0.0074, "step": 3139, "total_loss": 0.00872802734375 }, { "epoch": 1.28, "learning_rate": 0.00019197727789504418, "lm_loss": 0.00469970703125, "loss": 0.0074, "step": 3140, "total_loss": 0.00469970703125 }, { "epoch": 1.28, "learning_rate": 0.000191972236571534, "lm_loss": 0.00628662109375, "loss": 0.0077, "step": 3141, "total_loss": 0.00628662109375 }, { "epoch": 1.28, "learning_rate": 0.0001919671937308212, "lm_loss": 0.00787353515625, "loss": 0.009, "step": 3142, "total_loss": 0.00787353515625 }, { "epoch": 1.28, "learning_rate": 0.00019196214937298897, "lm_loss": 0.0103759765625, "loss": 0.0073, "step": 3143, "total_loss": 0.0103759765625 }, { "epoch": 1.29, "learning_rate": 0.0001919571034981205, "lm_loss": 0.0140380859375, "loss": 0.0068, "step": 3144, "total_loss": 0.0140380859375 }, { "epoch": 1.29, "learning_rate": 0.00019195205610629909, "lm_loss": 0.0145263671875, "loss": 0.0086, "step": 3145, "total_loss": 0.0145263671875 }, { "epoch": 1.29, "learning_rate": 0.00019194700719760786, "lm_loss": 0.01129150390625, "loss": 0.0095, "step": 3146, "total_loss": 0.01129150390625 }, { "epoch": 1.29, "learning_rate": 0.00019194195677213024, "lm_loss": 0.0120849609375, "loss": 0.0088, "step": 3147, "total_loss": 0.0120849609375 }, { "epoch": 1.29, "learning_rate": 0.0001919369048299495, "lm_loss": 0.007720947265625, "loss": 0.0077, "step": 3148, "total_loss": 0.007720947265625 }, { "epoch": 1.29, "learning_rate": 0.00019193185137114896, "lm_loss": 0.00994873046875, "loss": 0.0071, "step": 3149, "total_loss": 0.00994873046875 }, { "epoch": 1.29, "learning_rate": 0.00019192679639581201, "lm_loss": 0.004730224609375, "loss": 0.0068, "step": 3150, "total_loss": 0.004730224609375 }, { "epoch": 1.29, "learning_rate": 0.000191921739904022, "lm_loss": 0.005340576171875, "loss": 0.0071, "step": 3151, "total_loss": 0.005340576171875 }, { "epoch": 1.29, "learning_rate": 0.00019191668189586238, "lm_loss": 0.0062255859375, "loss": 0.0088, "step": 3152, "total_loss": 0.0062255859375 }, { "epoch": 1.29, "learning_rate": 0.00019191162237141657, "lm_loss": 0.009765625, "loss": 0.0081, "step": 3153, "total_loss": 0.009765625 }, { "epoch": 1.29, "learning_rate": 0.00019190656133076804, "lm_loss": 0.0084228515625, "loss": 0.0073, "step": 3154, "total_loss": 0.0084228515625 }, { "epoch": 1.29, "learning_rate": 0.0001919014987740003, "lm_loss": 0.006622314453125, "loss": 0.0077, "step": 3155, "total_loss": 0.006622314453125 }, { "epoch": 1.29, "learning_rate": 0.00019189643470119685, "lm_loss": 0.00335693359375, "loss": 0.0068, "step": 3156, "total_loss": 0.00335693359375 }, { "epoch": 1.29, "learning_rate": 0.00019189136911244118, "lm_loss": 0.00823974609375, "loss": 0.0075, "step": 3157, "total_loss": 0.00823974609375 }, { "epoch": 1.29, "learning_rate": 0.0001918863020078169, "lm_loss": 0.0034027099609375, "loss": 0.0075, "step": 3158, "total_loss": 0.0034027099609375 }, { "epoch": 1.29, "learning_rate": 0.0001918812333874076, "lm_loss": 0.006591796875, "loss": 0.0085, "step": 3159, "total_loss": 0.006591796875 }, { "epoch": 1.29, "learning_rate": 0.0001918761632512969, "lm_loss": 0.006927490234375, "loss": 0.0094, "step": 3160, "total_loss": 0.006927490234375 }, { "epoch": 1.29, "learning_rate": 0.00019187109159956843, "lm_loss": 0.00836181640625, "loss": 0.0079, "step": 3161, "total_loss": 0.00836181640625 }, { "epoch": 1.29, "learning_rate": 0.00019186601843230582, "lm_loss": 0.004913330078125, "loss": 0.0071, "step": 3162, "total_loss": 0.004913330078125 }, { "epoch": 1.29, "learning_rate": 0.0001918609437495928, "lm_loss": 0.0089111328125, "loss": 0.0078, "step": 3163, "total_loss": 0.0089111328125 }, { "epoch": 1.29, "learning_rate": 0.00019185586755151305, "lm_loss": 0.003662109375, "loss": 0.0079, "step": 3164, "total_loss": 0.003662109375 }, { "epoch": 1.29, "learning_rate": 0.00019185078983815034, "lm_loss": 0.0034942626953125, "loss": 0.0079, "step": 3165, "total_loss": 0.0034942626953125 }, { "epoch": 1.29, "learning_rate": 0.00019184571060958846, "lm_loss": 0.003204345703125, "loss": 0.0067, "step": 3166, "total_loss": 0.003204345703125 }, { "epoch": 1.29, "learning_rate": 0.00019184062986591112, "lm_loss": 0.0037078857421875, "loss": 0.0091, "step": 3167, "total_loss": 0.0037078857421875 }, { "epoch": 1.3, "learning_rate": 0.00019183554760720216, "lm_loss": 0.005096435546875, "loss": 0.0089, "step": 3168, "total_loss": 0.005096435546875 }, { "epoch": 1.3, "learning_rate": 0.00019183046383354548, "lm_loss": 0.00799560546875, "loss": 0.0078, "step": 3169, "total_loss": 0.00799560546875 }, { "epoch": 1.3, "learning_rate": 0.00019182537854502485, "lm_loss": 0.00439453125, "loss": 0.0077, "step": 3170, "total_loss": 0.00439453125 }, { "epoch": 1.3, "learning_rate": 0.00019182029174172425, "lm_loss": 0.005859375, "loss": 0.0072, "step": 3171, "total_loss": 0.005859375 }, { "epoch": 1.3, "learning_rate": 0.0001918152034237275, "lm_loss": 0.007537841796875, "loss": 0.0078, "step": 3172, "total_loss": 0.007537841796875 }, { "epoch": 1.3, "learning_rate": 0.00019181011359111866, "lm_loss": 0.0029296875, "loss": 0.0067, "step": 3173, "total_loss": 0.0029296875 }, { "epoch": 1.3, "learning_rate": 0.00019180502224398156, "lm_loss": 0.00518798828125, "loss": 0.0068, "step": 3174, "total_loss": 0.00518798828125 }, { "epoch": 1.3, "learning_rate": 0.00019179992938240026, "lm_loss": 0.00970458984375, "loss": 0.0087, "step": 3175, "total_loss": 0.00970458984375 }, { "epoch": 1.3, "learning_rate": 0.00019179483500645877, "lm_loss": 0.004425048828125, "loss": 0.0076, "step": 3176, "total_loss": 0.004425048828125 }, { "epoch": 1.3, "learning_rate": 0.0001917897391162411, "lm_loss": 0.00225830078125, "loss": 0.0093, "step": 3177, "total_loss": 0.00225830078125 }, { "epoch": 1.3, "learning_rate": 0.00019178464171183132, "lm_loss": 0.00872802734375, "loss": 0.0075, "step": 3178, "total_loss": 0.00872802734375 }, { "epoch": 1.3, "learning_rate": 0.00019177954279331357, "lm_loss": 0.01287841796875, "loss": 0.008, "step": 3179, "total_loss": 0.01287841796875 }, { "epoch": 1.3, "learning_rate": 0.0001917744423607719, "lm_loss": 0.00933837890625, "loss": 0.0068, "step": 3180, "total_loss": 0.00933837890625 }, { "epoch": 1.3, "learning_rate": 0.0001917693404142905, "lm_loss": 0.00750732421875, "loss": 0.0083, "step": 3181, "total_loss": 0.00750732421875 }, { "epoch": 1.3, "learning_rate": 0.0001917642369539535, "lm_loss": 0.01080322265625, "loss": 0.0088, "step": 3182, "total_loss": 0.01080322265625 }, { "epoch": 1.3, "learning_rate": 0.0001917591319798451, "lm_loss": 0.0023040771484375, "loss": 0.0065, "step": 3183, "total_loss": 0.0023040771484375 }, { "epoch": 1.3, "learning_rate": 0.00019175402549204948, "lm_loss": 0.002349853515625, "loss": 0.0082, "step": 3184, "total_loss": 0.002349853515625 }, { "epoch": 1.3, "learning_rate": 0.00019174891749065097, "lm_loss": 0.0031280517578125, "loss": 0.0067, "step": 3185, "total_loss": 0.0031280517578125 }, { "epoch": 1.3, "learning_rate": 0.0001917438079757337, "lm_loss": 0.01263427734375, "loss": 0.0068, "step": 3186, "total_loss": 0.01263427734375 }, { "epoch": 1.3, "learning_rate": 0.00019173869694738205, "lm_loss": 0.002288818359375, "loss": 0.0067, "step": 3187, "total_loss": 0.002288818359375 }, { "epoch": 1.3, "learning_rate": 0.00019173358440568032, "lm_loss": 0.005767822265625, "loss": 0.0061, "step": 3188, "total_loss": 0.005767822265625 }, { "epoch": 1.3, "learning_rate": 0.00019172847035071284, "lm_loss": 0.0126953125, "loss": 0.0069, "step": 3189, "total_loss": 0.0126953125 }, { "epoch": 1.3, "learning_rate": 0.00019172335478256396, "lm_loss": 0.01171875, "loss": 0.0089, "step": 3190, "total_loss": 0.01171875 }, { "epoch": 1.3, "learning_rate": 0.00019171823770131808, "lm_loss": 0.00872802734375, "loss": 0.008, "step": 3191, "total_loss": 0.00872802734375 }, { "epoch": 1.3, "learning_rate": 0.00019171311910705962, "lm_loss": 0.005157470703125, "loss": 0.0088, "step": 3192, "total_loss": 0.005157470703125 }, { "epoch": 1.31, "learning_rate": 0.00019170799899987303, "lm_loss": 0.0034637451171875, "loss": 0.0078, "step": 3193, "total_loss": 0.0034637451171875 }, { "epoch": 1.31, "learning_rate": 0.00019170287737984274, "lm_loss": 0.00439453125, "loss": 0.0081, "step": 3194, "total_loss": 0.00439453125 }, { "epoch": 1.31, "learning_rate": 0.00019169775424705325, "lm_loss": 0.007568359375, "loss": 0.005, "step": 3195, "total_loss": 0.007568359375 }, { "epoch": 1.31, "learning_rate": 0.00019169262960158908, "lm_loss": 0.0196533203125, "loss": 0.0084, "step": 3196, "total_loss": 0.0196533203125 }, { "epoch": 1.31, "learning_rate": 0.00019168750344353475, "lm_loss": 0.004302978515625, "loss": 0.0065, "step": 3197, "total_loss": 0.004302978515625 }, { "epoch": 1.31, "learning_rate": 0.00019168237577297484, "lm_loss": 0.006591796875, "loss": 0.0081, "step": 3198, "total_loss": 0.006591796875 }, { "epoch": 1.31, "learning_rate": 0.00019167724658999396, "lm_loss": 0.00555419921875, "loss": 0.0067, "step": 3199, "total_loss": 0.00555419921875 }, { "epoch": 1.31, "learning_rate": 0.00019167211589467663, "lm_loss": 0.0029449462890625, "loss": 0.0072, "step": 3200, "total_loss": 0.0029449462890625 }, { "epoch": 1.31, "eval_lm_loss": 0.009713852778077126, "eval_loss": 0.010098733939230442, "eval_runtime": 44.0862, "eval_samples_per_second": 22.683, "eval_steps_per_second": 0.204, "eval_total_loss": 0.009713852778077126, "lm_loss": 0.000698089599609375, "step": 3200, "total_loss": 0.000698089599609375 }, { "epoch": 1.31, "learning_rate": 0.00019166698368710763, "lm_loss": 0.011474609375, "loss": 0.0072, "step": 3201, "total_loss": 0.011474609375 }, { "epoch": 1.31, "learning_rate": 0.0001916618499673715, "lm_loss": 0.0098876953125, "loss": 0.0092, "step": 3202, "total_loss": 0.0098876953125 }, { "epoch": 1.31, "learning_rate": 0.00019165671473555298, "lm_loss": 0.01348876953125, "loss": 0.0083, "step": 3203, "total_loss": 0.01348876953125 }, { "epoch": 1.31, "learning_rate": 0.0001916515779917368, "lm_loss": 0.004150390625, "loss": 0.0078, "step": 3204, "total_loss": 0.004150390625 }, { "epoch": 1.31, "learning_rate": 0.00019164643973600763, "lm_loss": 0.00677490234375, "loss": 0.0077, "step": 3205, "total_loss": 0.00677490234375 }, { "epoch": 1.31, "learning_rate": 0.0001916412999684503, "lm_loss": 0.0096435546875, "loss": 0.0082, "step": 3206, "total_loss": 0.0096435546875 }, { "epoch": 1.31, "learning_rate": 0.00019163615868914957, "lm_loss": 0.004669189453125, "loss": 0.0067, "step": 3207, "total_loss": 0.004669189453125 }, { "epoch": 1.31, "learning_rate": 0.00019163101589819026, "lm_loss": 0.011962890625, "loss": 0.0079, "step": 3208, "total_loss": 0.011962890625 }, { "epoch": 1.31, "learning_rate": 0.0001916258715956572, "lm_loss": 0.00830078125, "loss": 0.0076, "step": 3209, "total_loss": 0.00830078125 }, { "epoch": 1.31, "learning_rate": 0.00019162072578163524, "lm_loss": 0.009765625, "loss": 0.0084, "step": 3210, "total_loss": 0.009765625 }, { "epoch": 1.31, "learning_rate": 0.0001916155784562093, "lm_loss": 0.011962890625, "loss": 0.0086, "step": 3211, "total_loss": 0.011962890625 }, { "epoch": 1.31, "learning_rate": 0.00019161042961946426, "lm_loss": 0.00592041015625, "loss": 0.0098, "step": 3212, "total_loss": 0.00592041015625 }, { "epoch": 1.31, "learning_rate": 0.00019160527927148505, "lm_loss": 0.0101318359375, "loss": 0.0083, "step": 3213, "total_loss": 0.0101318359375 }, { "epoch": 1.31, "learning_rate": 0.00019160012741235667, "lm_loss": 0.0166015625, "loss": 0.0096, "step": 3214, "total_loss": 0.0166015625 }, { "epoch": 1.31, "learning_rate": 0.0001915949740421641, "lm_loss": 0.005584716796875, "loss": 0.0069, "step": 3215, "total_loss": 0.005584716796875 }, { "epoch": 1.31, "learning_rate": 0.00019158981916099233, "lm_loss": 0.01318359375, "loss": 0.0095, "step": 3216, "total_loss": 0.01318359375 }, { "epoch": 1.32, "learning_rate": 0.0001915846627689264, "lm_loss": 0.00830078125, "loss": 0.008, "step": 3217, "total_loss": 0.00830078125 }, { "epoch": 1.32, "learning_rate": 0.00019157950486605135, "lm_loss": 0.00156402587890625, "loss": 0.0082, "step": 3218, "total_loss": 0.00156402587890625 }, { "epoch": 1.32, "learning_rate": 0.00019157434545245233, "lm_loss": 0.007537841796875, "loss": 0.0069, "step": 3219, "total_loss": 0.007537841796875 }, { "epoch": 1.32, "learning_rate": 0.00019156918452821443, "lm_loss": 0.00848388671875, "loss": 0.0093, "step": 3220, "total_loss": 0.00848388671875 }, { "epoch": 1.32, "learning_rate": 0.00019156402209342274, "lm_loss": 0.0093994140625, "loss": 0.0076, "step": 3221, "total_loss": 0.0093994140625 }, { "epoch": 1.32, "learning_rate": 0.00019155885814816249, "lm_loss": 0.006591796875, "loss": 0.0083, "step": 3222, "total_loss": 0.006591796875 }, { "epoch": 1.32, "learning_rate": 0.00019155369269251878, "lm_loss": 0.005706787109375, "loss": 0.0084, "step": 3223, "total_loss": 0.005706787109375 }, { "epoch": 1.32, "learning_rate": 0.0001915485257265769, "lm_loss": 0.0068359375, "loss": 0.0078, "step": 3224, "total_loss": 0.0068359375 }, { "epoch": 1.32, "learning_rate": 0.00019154335725042203, "lm_loss": 0.01104736328125, "loss": 0.008, "step": 3225, "total_loss": 0.01104736328125 }, { "epoch": 1.32, "learning_rate": 0.0001915381872641395, "lm_loss": 0.005859375, "loss": 0.0074, "step": 3226, "total_loss": 0.005859375 }, { "epoch": 1.32, "learning_rate": 0.0001915330157678145, "lm_loss": 0.00701904296875, "loss": 0.0072, "step": 3227, "total_loss": 0.00701904296875 }, { "epoch": 1.32, "learning_rate": 0.0001915278427615324, "lm_loss": 0.006378173828125, "loss": 0.0083, "step": 3228, "total_loss": 0.006378173828125 }, { "epoch": 1.32, "learning_rate": 0.00019152266824537853, "lm_loss": 0.008544921875, "loss": 0.0075, "step": 3229, "total_loss": 0.008544921875 }, { "epoch": 1.32, "learning_rate": 0.00019151749221943827, "lm_loss": 0.007568359375, "loss": 0.008, "step": 3230, "total_loss": 0.007568359375 }, { "epoch": 1.32, "learning_rate": 0.00019151231468379697, "lm_loss": 0.01043701171875, "loss": 0.0076, "step": 3231, "total_loss": 0.01043701171875 }, { "epoch": 1.32, "learning_rate": 0.00019150713563854005, "lm_loss": 0.007110595703125, "loss": 0.0058, "step": 3232, "total_loss": 0.007110595703125 }, { "epoch": 1.32, "learning_rate": 0.00019150195508375295, "lm_loss": 0.00616455078125, "loss": 0.0088, "step": 3233, "total_loss": 0.00616455078125 }, { "epoch": 1.32, "learning_rate": 0.00019149677301952113, "lm_loss": 0.00921630859375, "loss": 0.0083, "step": 3234, "total_loss": 0.00921630859375 }, { "epoch": 1.32, "learning_rate": 0.00019149158944593012, "lm_loss": 0.006500244140625, "loss": 0.0083, "step": 3235, "total_loss": 0.006500244140625 }, { "epoch": 1.32, "learning_rate": 0.00019148640436306532, "lm_loss": 0.009765625, "loss": 0.0078, "step": 3236, "total_loss": 0.009765625 }, { "epoch": 1.32, "learning_rate": 0.00019148121777101234, "lm_loss": 0.00421142578125, "loss": 0.0062, "step": 3237, "total_loss": 0.00421142578125 }, { "epoch": 1.32, "learning_rate": 0.00019147602966985674, "lm_loss": 0.018310546875, "loss": 0.0074, "step": 3238, "total_loss": 0.018310546875 }, { "epoch": 1.32, "learning_rate": 0.00019147084005968407, "lm_loss": 0.0062255859375, "loss": 0.0091, "step": 3239, "total_loss": 0.0062255859375 }, { "epoch": 1.32, "learning_rate": 0.00019146564894058, "lm_loss": 0.006683349609375, "loss": 0.0081, "step": 3240, "total_loss": 0.006683349609375 }, { "epoch": 1.33, "learning_rate": 0.0001914604563126301, "lm_loss": 0.01092529296875, "loss": 0.0085, "step": 3241, "total_loss": 0.01092529296875 }, { "epoch": 1.33, "learning_rate": 0.00019145526217592007, "lm_loss": 0.00830078125, "loss": 0.0067, "step": 3242, "total_loss": 0.00830078125 }, { "epoch": 1.33, "learning_rate": 0.00019145006653053558, "lm_loss": 0.004974365234375, "loss": 0.0086, "step": 3243, "total_loss": 0.004974365234375 }, { "epoch": 1.33, "learning_rate": 0.0001914448693765623, "lm_loss": 0.00872802734375, "loss": 0.0077, "step": 3244, "total_loss": 0.00872802734375 }, { "epoch": 1.33, "learning_rate": 0.00019143967071408606, "lm_loss": 0.00714111328125, "loss": 0.0083, "step": 3245, "total_loss": 0.00714111328125 }, { "epoch": 1.33, "learning_rate": 0.00019143447054319253, "lm_loss": 0.00970458984375, "loss": 0.0073, "step": 3246, "total_loss": 0.00970458984375 }, { "epoch": 1.33, "learning_rate": 0.00019142926886396753, "lm_loss": 0.008056640625, "loss": 0.0077, "step": 3247, "total_loss": 0.008056640625 }, { "epoch": 1.33, "learning_rate": 0.00019142406567649686, "lm_loss": 0.005828857421875, "loss": 0.008, "step": 3248, "total_loss": 0.005828857421875 }, { "epoch": 1.33, "learning_rate": 0.00019141886098086636, "lm_loss": 0.008544921875, "loss": 0.0077, "step": 3249, "total_loss": 0.008544921875 }, { "epoch": 1.33, "learning_rate": 0.00019141365477716188, "lm_loss": 0.004119873046875, "loss": 0.0063, "step": 3250, "total_loss": 0.004119873046875 }, { "epoch": 1.33, "learning_rate": 0.00019140844706546931, "lm_loss": 0.01263427734375, "loss": 0.0073, "step": 3251, "total_loss": 0.01263427734375 }, { "epoch": 1.33, "learning_rate": 0.00019140323784587454, "lm_loss": 0.00836181640625, "loss": 0.007, "step": 3252, "total_loss": 0.00836181640625 }, { "epoch": 1.33, "learning_rate": 0.00019139802711846357, "lm_loss": 0.0037689208984375, "loss": 0.0068, "step": 3253, "total_loss": 0.0037689208984375 }, { "epoch": 1.33, "learning_rate": 0.00019139281488332225, "lm_loss": 0.00933837890625, "loss": 0.0078, "step": 3254, "total_loss": 0.00933837890625 }, { "epoch": 1.33, "learning_rate": 0.00019138760114053666, "lm_loss": 0.00811767578125, "loss": 0.0071, "step": 3255, "total_loss": 0.00811767578125 }, { "epoch": 1.33, "learning_rate": 0.00019138238589019274, "lm_loss": 0.0023956298828125, "loss": 0.0104, "step": 3256, "total_loss": 0.0023956298828125 }, { "epoch": 1.33, "learning_rate": 0.00019137716913237657, "lm_loss": 0.0034637451171875, "loss": 0.0092, "step": 3257, "total_loss": 0.0034637451171875 }, { "epoch": 1.33, "learning_rate": 0.00019137195086717416, "lm_loss": 0.01177978515625, "loss": 0.0081, "step": 3258, "total_loss": 0.01177978515625 }, { "epoch": 1.33, "learning_rate": 0.00019136673109467167, "lm_loss": 0.00640869140625, "loss": 0.0072, "step": 3259, "total_loss": 0.00640869140625 }, { "epoch": 1.33, "learning_rate": 0.00019136150981495512, "lm_loss": 0.01031494140625, "loss": 0.0081, "step": 3260, "total_loss": 0.01031494140625 }, { "epoch": 1.33, "learning_rate": 0.00019135628702811065, "lm_loss": 0.0079345703125, "loss": 0.0068, "step": 3261, "total_loss": 0.0079345703125 }, { "epoch": 1.33, "learning_rate": 0.0001913510627342245, "lm_loss": 0.006500244140625, "loss": 0.0081, "step": 3262, "total_loss": 0.006500244140625 }, { "epoch": 1.33, "learning_rate": 0.00019134583693338275, "lm_loss": 0.002899169921875, "loss": 0.0078, "step": 3263, "total_loss": 0.002899169921875 }, { "epoch": 1.33, "learning_rate": 0.0001913406096256717, "lm_loss": 0.00860595703125, "loss": 0.0078, "step": 3264, "total_loss": 0.00860595703125 }, { "epoch": 1.33, "learning_rate": 0.00019133538081117748, "lm_loss": 0.00909423828125, "loss": 0.0073, "step": 3265, "total_loss": 0.00909423828125 }, { "epoch": 1.34, "learning_rate": 0.00019133015048998644, "lm_loss": 0.0091552734375, "loss": 0.0072, "step": 3266, "total_loss": 0.0091552734375 }, { "epoch": 1.34, "learning_rate": 0.00019132491866218478, "lm_loss": 0.01080322265625, "loss": 0.0097, "step": 3267, "total_loss": 0.01080322265625 }, { "epoch": 1.34, "learning_rate": 0.00019131968532785888, "lm_loss": 0.01092529296875, "loss": 0.007, "step": 3268, "total_loss": 0.01092529296875 }, { "epoch": 1.34, "learning_rate": 0.00019131445048709505, "lm_loss": 0.00909423828125, "loss": 0.0069, "step": 3269, "total_loss": 0.00909423828125 }, { "epoch": 1.34, "learning_rate": 0.00019130921413997961, "lm_loss": 0.01556396484375, "loss": 0.0082, "step": 3270, "total_loss": 0.01556396484375 }, { "epoch": 1.34, "learning_rate": 0.00019130397628659896, "lm_loss": 0.0074462890625, "loss": 0.009, "step": 3271, "total_loss": 0.0074462890625 }, { "epoch": 1.34, "learning_rate": 0.0001912987369270395, "lm_loss": 0.003143310546875, "loss": 0.0079, "step": 3272, "total_loss": 0.003143310546875 }, { "epoch": 1.34, "learning_rate": 0.0001912934960613877, "lm_loss": 0.005401611328125, "loss": 0.0072, "step": 3273, "total_loss": 0.005401611328125 }, { "epoch": 1.34, "learning_rate": 0.00019128825368973, "lm_loss": 0.003997802734375, "loss": 0.0081, "step": 3274, "total_loss": 0.003997802734375 }, { "epoch": 1.34, "learning_rate": 0.0001912830098121528, "lm_loss": 0.0087890625, "loss": 0.0064, "step": 3275, "total_loss": 0.0087890625 }, { "epoch": 1.34, "learning_rate": 0.0001912777644287427, "lm_loss": 0.00506591796875, "loss": 0.0078, "step": 3276, "total_loss": 0.00506591796875 }, { "epoch": 1.34, "learning_rate": 0.0001912725175395862, "lm_loss": 0.01348876953125, "loss": 0.0075, "step": 3277, "total_loss": 0.01348876953125 }, { "epoch": 1.34, "learning_rate": 0.00019126726914476985, "lm_loss": 0.00994873046875, "loss": 0.0065, "step": 3278, "total_loss": 0.00994873046875 }, { "epoch": 1.34, "learning_rate": 0.00019126201924438022, "lm_loss": 0.0068359375, "loss": 0.0076, "step": 3279, "total_loss": 0.0068359375 }, { "epoch": 1.34, "learning_rate": 0.00019125676783850396, "lm_loss": 0.01519775390625, "loss": 0.0074, "step": 3280, "total_loss": 0.01519775390625 }, { "epoch": 1.34, "learning_rate": 0.00019125151492722762, "lm_loss": 0.01220703125, "loss": 0.0084, "step": 3281, "total_loss": 0.01220703125 }, { "epoch": 1.34, "learning_rate": 0.0001912462605106379, "lm_loss": 0.009521484375, "loss": 0.0087, "step": 3282, "total_loss": 0.009521484375 }, { "epoch": 1.34, "learning_rate": 0.0001912410045888215, "lm_loss": 0.013427734375, "loss": 0.0092, "step": 3283, "total_loss": 0.013427734375 }, { "epoch": 1.34, "learning_rate": 0.0001912357471618651, "lm_loss": 0.0034332275390625, "loss": 0.006, "step": 3284, "total_loss": 0.0034332275390625 }, { "epoch": 1.34, "learning_rate": 0.0001912304882298554, "lm_loss": 0.00799560546875, "loss": 0.0071, "step": 3285, "total_loss": 0.00799560546875 }, { "epoch": 1.34, "learning_rate": 0.0001912252277928792, "lm_loss": 0.00946044921875, "loss": 0.0101, "step": 3286, "total_loss": 0.00946044921875 }, { "epoch": 1.34, "learning_rate": 0.00019121996585102328, "lm_loss": 0.005645751953125, "loss": 0.0075, "step": 3287, "total_loss": 0.005645751953125 }, { "epoch": 1.34, "learning_rate": 0.0001912147024043744, "lm_loss": 0.00701904296875, "loss": 0.0067, "step": 3288, "total_loss": 0.00701904296875 }, { "epoch": 1.34, "learning_rate": 0.0001912094374530194, "lm_loss": 0.0089111328125, "loss": 0.0092, "step": 3289, "total_loss": 0.0089111328125 }, { "epoch": 1.35, "learning_rate": 0.00019120417099704516, "lm_loss": 0.005401611328125, "loss": 0.0072, "step": 3290, "total_loss": 0.005401611328125 }, { "epoch": 1.35, "learning_rate": 0.0001911989030365385, "lm_loss": 0.0162353515625, "loss": 0.0088, "step": 3291, "total_loss": 0.0162353515625 }, { "epoch": 1.35, "learning_rate": 0.0001911936335715864, "lm_loss": 0.007293701171875, "loss": 0.0064, "step": 3292, "total_loss": 0.007293701171875 }, { "epoch": 1.35, "learning_rate": 0.00019118836260227572, "lm_loss": 0.0159912109375, "loss": 0.009, "step": 3293, "total_loss": 0.0159912109375 }, { "epoch": 1.35, "learning_rate": 0.00019118309012869348, "lm_loss": 0.0086669921875, "loss": 0.008, "step": 3294, "total_loss": 0.0086669921875 }, { "epoch": 1.35, "learning_rate": 0.00019117781615092655, "lm_loss": 0.0079345703125, "loss": 0.0058, "step": 3295, "total_loss": 0.0079345703125 }, { "epoch": 1.35, "learning_rate": 0.00019117254066906204, "lm_loss": 0.006103515625, "loss": 0.0058, "step": 3296, "total_loss": 0.006103515625 }, { "epoch": 1.35, "learning_rate": 0.00019116726368318692, "lm_loss": 0.002899169921875, "loss": 0.0062, "step": 3297, "total_loss": 0.002899169921875 }, { "epoch": 1.35, "learning_rate": 0.00019116198519338826, "lm_loss": 0.007476806640625, "loss": 0.008, "step": 3298, "total_loss": 0.007476806640625 }, { "epoch": 1.35, "learning_rate": 0.0001911567051997531, "lm_loss": 0.01092529296875, "loss": 0.0087, "step": 3299, "total_loss": 0.01092529296875 }, { "epoch": 1.35, "learning_rate": 0.0001911514237023686, "lm_loss": 0.005218505859375, "loss": 0.0083, "step": 3300, "total_loss": 0.005218505859375 }, { "epoch": 1.35, "eval_lm_loss": 0.0095286900177598, "eval_loss": 0.00984802283346653, "eval_runtime": 44.1967, "eval_samples_per_second": 22.626, "eval_steps_per_second": 0.204, "eval_total_loss": 0.0095286900177598, "lm_loss": 0.00159454345703125, "step": 3300, "total_loss": 0.00159454345703125 }, { "epoch": 1.35, "learning_rate": 0.00019114614070132185, "lm_loss": 0.005218505859375, "loss": 0.0066, "step": 3301, "total_loss": 0.005218505859375 }, { "epoch": 1.35, "learning_rate": 0.00019114085619669998, "lm_loss": 0.004974365234375, "loss": 0.0069, "step": 3302, "total_loss": 0.004974365234375 }, { "epoch": 1.35, "learning_rate": 0.00019113557018859024, "lm_loss": 0.009033203125, "loss": 0.0073, "step": 3303, "total_loss": 0.009033203125 }, { "epoch": 1.35, "learning_rate": 0.00019113028267707974, "lm_loss": 0.01092529296875, "loss": 0.0082, "step": 3304, "total_loss": 0.01092529296875 }, { "epoch": 1.35, "learning_rate": 0.0001911249936622557, "lm_loss": 0.0074462890625, "loss": 0.0066, "step": 3305, "total_loss": 0.0074462890625 }, { "epoch": 1.35, "learning_rate": 0.0001911197031442055, "lm_loss": 0.01275634765625, "loss": 0.0082, "step": 3306, "total_loss": 0.01275634765625 }, { "epoch": 1.35, "learning_rate": 0.00019111441112301626, "lm_loss": 0.01153564453125, "loss": 0.0087, "step": 3307, "total_loss": 0.01153564453125 }, { "epoch": 1.35, "learning_rate": 0.00019110911759877538, "lm_loss": 0.00909423828125, "loss": 0.0077, "step": 3308, "total_loss": 0.00909423828125 }, { "epoch": 1.35, "learning_rate": 0.00019110382257157013, "lm_loss": 0.006866455078125, "loss": 0.006, "step": 3309, "total_loss": 0.006866455078125 }, { "epoch": 1.35, "learning_rate": 0.00019109852604148788, "lm_loss": 0.003814697265625, "loss": 0.009, "step": 3310, "total_loss": 0.003814697265625 }, { "epoch": 1.35, "learning_rate": 0.000191093228008616, "lm_loss": 0.00567626953125, "loss": 0.0066, "step": 3311, "total_loss": 0.00567626953125 }, { "epoch": 1.35, "learning_rate": 0.00019108792847304188, "lm_loss": 0.0125732421875, "loss": 0.0071, "step": 3312, "total_loss": 0.0125732421875 }, { "epoch": 1.35, "learning_rate": 0.00019108262743485299, "lm_loss": 0.004150390625, "loss": 0.0095, "step": 3313, "total_loss": 0.004150390625 }, { "epoch": 1.35, "learning_rate": 0.0001910773248941367, "lm_loss": 0.01043701171875, "loss": 0.0074, "step": 3314, "total_loss": 0.01043701171875 }, { "epoch": 1.36, "learning_rate": 0.0001910720208509805, "lm_loss": 0.0032196044921875, "loss": 0.0084, "step": 3315, "total_loss": 0.0032196044921875 }, { "epoch": 1.36, "learning_rate": 0.00019106671530547196, "lm_loss": 0.010986328125, "loss": 0.007, "step": 3316, "total_loss": 0.010986328125 }, { "epoch": 1.36, "learning_rate": 0.00019106140825769853, "lm_loss": 0.006103515625, "loss": 0.0073, "step": 3317, "total_loss": 0.006103515625 }, { "epoch": 1.36, "learning_rate": 0.00019105609970774775, "lm_loss": 0.00396728515625, "loss": 0.0065, "step": 3318, "total_loss": 0.00396728515625 }, { "epoch": 1.36, "learning_rate": 0.00019105078965570726, "lm_loss": 0.006805419921875, "loss": 0.0079, "step": 3319, "total_loss": 0.006805419921875 }, { "epoch": 1.36, "learning_rate": 0.00019104547810166458, "lm_loss": 0.004180908203125, "loss": 0.0084, "step": 3320, "total_loss": 0.004180908203125 }, { "epoch": 1.36, "learning_rate": 0.0001910401650457074, "lm_loss": 0.006103515625, "loss": 0.0071, "step": 3321, "total_loss": 0.006103515625 }, { "epoch": 1.36, "learning_rate": 0.0001910348504879233, "lm_loss": 0.0030517578125, "loss": 0.008, "step": 3322, "total_loss": 0.0030517578125 }, { "epoch": 1.36, "learning_rate": 0.0001910295344284, "lm_loss": 0.007568359375, "loss": 0.0076, "step": 3323, "total_loss": 0.007568359375 }, { "epoch": 1.36, "learning_rate": 0.00019102421686722517, "lm_loss": 0.0198974609375, "loss": 0.0075, "step": 3324, "total_loss": 0.0198974609375 }, { "epoch": 1.36, "learning_rate": 0.00019101889780448654, "lm_loss": 0.003631591796875, "loss": 0.0079, "step": 3325, "total_loss": 0.003631591796875 }, { "epoch": 1.36, "learning_rate": 0.00019101357724027183, "lm_loss": 0.005859375, "loss": 0.0084, "step": 3326, "total_loss": 0.005859375 }, { "epoch": 1.36, "learning_rate": 0.00019100825517466886, "lm_loss": 0.007476806640625, "loss": 0.0096, "step": 3327, "total_loss": 0.007476806640625 }, { "epoch": 1.36, "learning_rate": 0.00019100293160776536, "lm_loss": 0.005615234375, "loss": 0.0078, "step": 3328, "total_loss": 0.005615234375 }, { "epoch": 1.36, "learning_rate": 0.00019099760653964922, "lm_loss": 0.006072998046875, "loss": 0.007, "step": 3329, "total_loss": 0.006072998046875 }, { "epoch": 1.36, "learning_rate": 0.00019099227997040826, "lm_loss": 0.00872802734375, "loss": 0.0085, "step": 3330, "total_loss": 0.00872802734375 }, { "epoch": 1.36, "learning_rate": 0.00019098695190013028, "lm_loss": 0.00970458984375, "loss": 0.007, "step": 3331, "total_loss": 0.00970458984375 }, { "epoch": 1.36, "learning_rate": 0.0001909816223289033, "lm_loss": 0.01129150390625, "loss": 0.0074, "step": 3332, "total_loss": 0.01129150390625 }, { "epoch": 1.36, "learning_rate": 0.00019097629125681514, "lm_loss": 0.00396728515625, "loss": 0.0077, "step": 3333, "total_loss": 0.00396728515625 }, { "epoch": 1.36, "learning_rate": 0.00019097095868395378, "lm_loss": 0.01019287109375, "loss": 0.0064, "step": 3334, "total_loss": 0.01019287109375 }, { "epoch": 1.36, "learning_rate": 0.00019096562461040718, "lm_loss": 0.01116943359375, "loss": 0.0067, "step": 3335, "total_loss": 0.01116943359375 }, { "epoch": 1.36, "learning_rate": 0.0001909602890362633, "lm_loss": 0.0093994140625, "loss": 0.0078, "step": 3336, "total_loss": 0.0093994140625 }, { "epoch": 1.36, "learning_rate": 0.00019095495196161025, "lm_loss": 0.0078125, "loss": 0.0071, "step": 3337, "total_loss": 0.0078125 }, { "epoch": 1.36, "learning_rate": 0.00019094961338653597, "lm_loss": 0.012451171875, "loss": 0.0082, "step": 3338, "total_loss": 0.012451171875 }, { "epoch": 1.37, "learning_rate": 0.00019094427331112855, "lm_loss": 0.00482177734375, "loss": 0.0069, "step": 3339, "total_loss": 0.00482177734375 }, { "epoch": 1.37, "learning_rate": 0.00019093893173547613, "lm_loss": 0.010986328125, "loss": 0.0082, "step": 3340, "total_loss": 0.010986328125 }, { "epoch": 1.37, "learning_rate": 0.0001909335886596668, "lm_loss": 0.00186920166015625, "loss": 0.0079, "step": 3341, "total_loss": 0.00186920166015625 }, { "epoch": 1.37, "learning_rate": 0.00019092824408378867, "lm_loss": 0.0040283203125, "loss": 0.0071, "step": 3342, "total_loss": 0.0040283203125 }, { "epoch": 1.37, "learning_rate": 0.00019092289800793, "lm_loss": 0.004119873046875, "loss": 0.0063, "step": 3343, "total_loss": 0.004119873046875 }, { "epoch": 1.37, "learning_rate": 0.00019091755043217887, "lm_loss": 0.007049560546875, "loss": 0.0063, "step": 3344, "total_loss": 0.007049560546875 }, { "epoch": 1.37, "learning_rate": 0.00019091220135662355, "lm_loss": 0.01007080078125, "loss": 0.0097, "step": 3345, "total_loss": 0.01007080078125 }, { "epoch": 1.37, "learning_rate": 0.00019090685078135223, "lm_loss": 0.007781982421875, "loss": 0.0096, "step": 3346, "total_loss": 0.007781982421875 }, { "epoch": 1.37, "learning_rate": 0.0001909014987064533, "lm_loss": 0.006561279296875, "loss": 0.0068, "step": 3347, "total_loss": 0.006561279296875 }, { "epoch": 1.37, "learning_rate": 0.00019089614513201488, "lm_loss": 0.005462646484375, "loss": 0.0072, "step": 3348, "total_loss": 0.005462646484375 }, { "epoch": 1.37, "learning_rate": 0.0001908907900581254, "lm_loss": 0.0096435546875, "loss": 0.0061, "step": 3349, "total_loss": 0.0096435546875 }, { "epoch": 1.37, "learning_rate": 0.00019088543348487317, "lm_loss": 0.01092529296875, "loss": 0.0082, "step": 3350, "total_loss": 0.01092529296875 }, { "epoch": 1.37, "learning_rate": 0.00019088007541234655, "lm_loss": 0.00592041015625, "loss": 0.0073, "step": 3351, "total_loss": 0.00592041015625 }, { "epoch": 1.37, "learning_rate": 0.00019087471584063392, "lm_loss": 0.005218505859375, "loss": 0.008, "step": 3352, "total_loss": 0.005218505859375 }, { "epoch": 1.37, "learning_rate": 0.0001908693547698237, "lm_loss": 0.0024566650390625, "loss": 0.008, "step": 3353, "total_loss": 0.0024566650390625 }, { "epoch": 1.37, "learning_rate": 0.00019086399220000436, "lm_loss": 0.005950927734375, "loss": 0.0075, "step": 3354, "total_loss": 0.005950927734375 }, { "epoch": 1.37, "learning_rate": 0.0001908586281312643, "lm_loss": 0.00592041015625, "loss": 0.0068, "step": 3355, "total_loss": 0.00592041015625 }, { "epoch": 1.37, "learning_rate": 0.00019085326256369206, "lm_loss": 0.00775146484375, "loss": 0.0065, "step": 3356, "total_loss": 0.00775146484375 }, { "epoch": 1.37, "learning_rate": 0.0001908478954973761, "lm_loss": 0.0045166015625, "loss": 0.0079, "step": 3357, "total_loss": 0.0045166015625 }, { "epoch": 1.37, "learning_rate": 0.00019084252693240506, "lm_loss": 0.004852294921875, "loss": 0.0083, "step": 3358, "total_loss": 0.004852294921875 }, { "epoch": 1.37, "learning_rate": 0.00019083715686886736, "lm_loss": 0.002838134765625, "loss": 0.0079, "step": 3359, "total_loss": 0.002838134765625 }, { "epoch": 1.37, "learning_rate": 0.0001908317853068517, "lm_loss": 0.0103759765625, "loss": 0.0074, "step": 3360, "total_loss": 0.0103759765625 }, { "epoch": 1.37, "learning_rate": 0.00019082641224644662, "lm_loss": 0.00921630859375, "loss": 0.0059, "step": 3361, "total_loss": 0.00921630859375 }, { "epoch": 1.37, "learning_rate": 0.0001908210376877408, "lm_loss": 0.00897216796875, "loss": 0.0064, "step": 3362, "total_loss": 0.00897216796875 }, { "epoch": 1.37, "learning_rate": 0.00019081566163082285, "lm_loss": 0.0115966796875, "loss": 0.0081, "step": 3363, "total_loss": 0.0115966796875 }, { "epoch": 1.38, "learning_rate": 0.00019081028407578153, "lm_loss": 0.00799560546875, "loss": 0.0071, "step": 3364, "total_loss": 0.00799560546875 }, { "epoch": 1.38, "learning_rate": 0.00019080490502270552, "lm_loss": 0.004364013671875, "loss": 0.0072, "step": 3365, "total_loss": 0.004364013671875 }, { "epoch": 1.38, "learning_rate": 0.0001907995244716835, "lm_loss": 0.01019287109375, "loss": 0.008, "step": 3366, "total_loss": 0.01019287109375 }, { "epoch": 1.38, "learning_rate": 0.00019079414242280428, "lm_loss": 0.01385498046875, "loss": 0.0112, "step": 3367, "total_loss": 0.01385498046875 }, { "epoch": 1.38, "learning_rate": 0.00019078875887615666, "lm_loss": 0.0034637451171875, "loss": 0.0078, "step": 3368, "total_loss": 0.0034637451171875 }, { "epoch": 1.38, "learning_rate": 0.00019078337383182943, "lm_loss": 0.0093994140625, "loss": 0.0077, "step": 3369, "total_loss": 0.0093994140625 }, { "epoch": 1.38, "learning_rate": 0.00019077798728991138, "lm_loss": 0.00518798828125, "loss": 0.0059, "step": 3370, "total_loss": 0.00518798828125 }, { "epoch": 1.38, "learning_rate": 0.00019077259925049146, "lm_loss": 0.00946044921875, "loss": 0.0066, "step": 3371, "total_loss": 0.00946044921875 }, { "epoch": 1.38, "learning_rate": 0.00019076720971365844, "lm_loss": 0.0166015625, "loss": 0.0062, "step": 3372, "total_loss": 0.0166015625 }, { "epoch": 1.38, "learning_rate": 0.00019076181867950132, "lm_loss": 0.006500244140625, "loss": 0.0086, "step": 3373, "total_loss": 0.006500244140625 }, { "epoch": 1.38, "learning_rate": 0.000190756426148109, "lm_loss": 0.002532958984375, "loss": 0.0073, "step": 3374, "total_loss": 0.002532958984375 }, { "epoch": 1.38, "learning_rate": 0.00019075103211957045, "lm_loss": 0.01141357421875, "loss": 0.0082, "step": 3375, "total_loss": 0.01141357421875 }, { "epoch": 1.38, "learning_rate": 0.0001907456365939746, "lm_loss": 0.01031494140625, "loss": 0.0081, "step": 3376, "total_loss": 0.01031494140625 }, { "epoch": 1.38, "learning_rate": 0.00019074023957141051, "lm_loss": 0.00531005859375, "loss": 0.0074, "step": 3377, "total_loss": 0.00531005859375 }, { "epoch": 1.38, "learning_rate": 0.0001907348410519672, "lm_loss": 0.009033203125, "loss": 0.0069, "step": 3378, "total_loss": 0.009033203125 }, { "epoch": 1.38, "learning_rate": 0.00019072944103573373, "lm_loss": 0.00823974609375, "loss": 0.0074, "step": 3379, "total_loss": 0.00823974609375 }, { "epoch": 1.38, "learning_rate": 0.00019072403952279915, "lm_loss": 0.007080078125, "loss": 0.0075, "step": 3380, "total_loss": 0.007080078125 }, { "epoch": 1.38, "learning_rate": 0.0001907186365132526, "lm_loss": 0.00714111328125, "loss": 0.0076, "step": 3381, "total_loss": 0.00714111328125 }, { "epoch": 1.38, "learning_rate": 0.0001907132320071832, "lm_loss": 0.0048828125, "loss": 0.0078, "step": 3382, "total_loss": 0.0048828125 }, { "epoch": 1.38, "learning_rate": 0.0001907078260046801, "lm_loss": 0.01397705078125, "loss": 0.0083, "step": 3383, "total_loss": 0.01397705078125 }, { "epoch": 1.38, "learning_rate": 0.00019070241850583245, "lm_loss": 0.002166748046875, "loss": 0.0079, "step": 3384, "total_loss": 0.002166748046875 }, { "epoch": 1.38, "learning_rate": 0.0001906970095107295, "lm_loss": 0.006927490234375, "loss": 0.0074, "step": 3385, "total_loss": 0.006927490234375 }, { "epoch": 1.38, "learning_rate": 0.00019069159901946047, "lm_loss": 0.01092529296875, "loss": 0.0073, "step": 3386, "total_loss": 0.01092529296875 }, { "epoch": 1.38, "learning_rate": 0.0001906861870321146, "lm_loss": 0.0115966796875, "loss": 0.0084, "step": 3387, "total_loss": 0.0115966796875 }, { "epoch": 1.39, "learning_rate": 0.00019068077354878117, "lm_loss": 0.00640869140625, "loss": 0.0061, "step": 3388, "total_loss": 0.00640869140625 }, { "epoch": 1.39, "learning_rate": 0.0001906753585695495, "lm_loss": 0.005523681640625, "loss": 0.0092, "step": 3389, "total_loss": 0.005523681640625 }, { "epoch": 1.39, "learning_rate": 0.00019066994209450888, "lm_loss": 0.00927734375, "loss": 0.0066, "step": 3390, "total_loss": 0.00927734375 }, { "epoch": 1.39, "learning_rate": 0.0001906645241237487, "lm_loss": 0.007110595703125, "loss": 0.0084, "step": 3391, "total_loss": 0.007110595703125 }, { "epoch": 1.39, "learning_rate": 0.0001906591046573583, "lm_loss": 0.005126953125, "loss": 0.0088, "step": 3392, "total_loss": 0.005126953125 }, { "epoch": 1.39, "learning_rate": 0.00019065368369542714, "lm_loss": 0.01495361328125, "loss": 0.0082, "step": 3393, "total_loss": 0.01495361328125 }, { "epoch": 1.39, "learning_rate": 0.00019064826123804456, "lm_loss": 0.004302978515625, "loss": 0.0085, "step": 3394, "total_loss": 0.004302978515625 }, { "epoch": 1.39, "learning_rate": 0.00019064283728530007, "lm_loss": 0.004608154296875, "loss": 0.0087, "step": 3395, "total_loss": 0.004608154296875 }, { "epoch": 1.39, "learning_rate": 0.00019063741183728317, "lm_loss": 0.005462646484375, "loss": 0.0074, "step": 3396, "total_loss": 0.005462646484375 }, { "epoch": 1.39, "learning_rate": 0.00019063198489408328, "lm_loss": 0.0032806396484375, "loss": 0.008, "step": 3397, "total_loss": 0.0032806396484375 }, { "epoch": 1.39, "learning_rate": 0.00019062655645579, "lm_loss": 0.006988525390625, "loss": 0.0069, "step": 3398, "total_loss": 0.006988525390625 }, { "epoch": 1.39, "learning_rate": 0.00019062112652249282, "lm_loss": 0.00958251953125, "loss": 0.006, "step": 3399, "total_loss": 0.00958251953125 }, { "epoch": 1.39, "learning_rate": 0.00019061569509428136, "lm_loss": 0.013427734375, "loss": 0.0068, "step": 3400, "total_loss": 0.013427734375 }, { "epoch": 1.39, "eval_lm_loss": 0.009199698455631733, "eval_loss": 0.009643125347793102, "eval_runtime": 43.9104, "eval_samples_per_second": 22.774, "eval_steps_per_second": 0.205, "eval_total_loss": 0.009199698455631733, "lm_loss": 0.00109100341796875, "step": 3400, "total_loss": 0.00109100341796875 }, { "epoch": 1.39, "learning_rate": 0.0001906102621712452, "lm_loss": 0.01165771484375, "loss": 0.0086, "step": 3401, "total_loss": 0.01165771484375 }, { "epoch": 1.39, "learning_rate": 0.00019060482775347395, "lm_loss": 0.0111083984375, "loss": 0.007, "step": 3402, "total_loss": 0.0111083984375 }, { "epoch": 1.39, "learning_rate": 0.00019059939184105725, "lm_loss": 0.01287841796875, "loss": 0.008, "step": 3403, "total_loss": 0.01287841796875 }, { "epoch": 1.39, "learning_rate": 0.00019059395443408482, "lm_loss": 0.012451171875, "loss": 0.0071, "step": 3404, "total_loss": 0.012451171875 }, { "epoch": 1.39, "learning_rate": 0.00019058851553264633, "lm_loss": 0.006317138671875, "loss": 0.0077, "step": 3405, "total_loss": 0.006317138671875 }, { "epoch": 1.39, "learning_rate": 0.00019058307513683152, "lm_loss": 0.01416015625, "loss": 0.0099, "step": 3406, "total_loss": 0.01416015625 }, { "epoch": 1.39, "learning_rate": 0.00019057763324673008, "lm_loss": 0.004119873046875, "loss": 0.0071, "step": 3407, "total_loss": 0.004119873046875 }, { "epoch": 1.39, "learning_rate": 0.00019057218986243184, "lm_loss": 0.0086669921875, "loss": 0.0086, "step": 3408, "total_loss": 0.0086669921875 }, { "epoch": 1.39, "learning_rate": 0.00019056674498402657, "lm_loss": 0.0045166015625, "loss": 0.0074, "step": 3409, "total_loss": 0.0045166015625 }, { "epoch": 1.39, "learning_rate": 0.0001905612986116041, "lm_loss": 0.00726318359375, "loss": 0.0059, "step": 3410, "total_loss": 0.00726318359375 }, { "epoch": 1.39, "learning_rate": 0.0001905558507452543, "lm_loss": 0.0081787109375, "loss": 0.0072, "step": 3411, "total_loss": 0.0081787109375 }, { "epoch": 1.39, "learning_rate": 0.00019055040138506697, "lm_loss": 0.012451171875, "loss": 0.0082, "step": 3412, "total_loss": 0.012451171875 }, { "epoch": 1.4, "learning_rate": 0.00019054495053113203, "lm_loss": 0.0076904296875, "loss": 0.0084, "step": 3413, "total_loss": 0.0076904296875 }, { "epoch": 1.4, "learning_rate": 0.00019053949818353944, "lm_loss": 0.00537109375, "loss": 0.0063, "step": 3414, "total_loss": 0.00537109375 }, { "epoch": 1.4, "learning_rate": 0.00019053404434237915, "lm_loss": 0.01556396484375, "loss": 0.0085, "step": 3415, "total_loss": 0.01556396484375 }, { "epoch": 1.4, "learning_rate": 0.00019052858900774102, "lm_loss": 0.006744384765625, "loss": 0.0079, "step": 3416, "total_loss": 0.006744384765625 }, { "epoch": 1.4, "learning_rate": 0.00019052313217971518, "lm_loss": 0.01287841796875, "loss": 0.0069, "step": 3417, "total_loss": 0.01287841796875 }, { "epoch": 1.4, "learning_rate": 0.00019051767385839155, "lm_loss": 0.005859375, "loss": 0.0079, "step": 3418, "total_loss": 0.005859375 }, { "epoch": 1.4, "learning_rate": 0.0001905122140438602, "lm_loss": 0.01409912109375, "loss": 0.0089, "step": 3419, "total_loss": 0.01409912109375 }, { "epoch": 1.4, "learning_rate": 0.00019050675273621126, "lm_loss": 0.01031494140625, "loss": 0.006, "step": 3420, "total_loss": 0.01031494140625 }, { "epoch": 1.4, "learning_rate": 0.00019050128993553474, "lm_loss": 0.006500244140625, "loss": 0.0082, "step": 3421, "total_loss": 0.006500244140625 }, { "epoch": 1.4, "learning_rate": 0.00019049582564192077, "lm_loss": 0.00640869140625, "loss": 0.007, "step": 3422, "total_loss": 0.00640869140625 }, { "epoch": 1.4, "learning_rate": 0.0001904903598554595, "lm_loss": 0.012451171875, "loss": 0.0061, "step": 3423, "total_loss": 0.012451171875 }, { "epoch": 1.4, "learning_rate": 0.00019048489257624112, "lm_loss": 0.01361083984375, "loss": 0.0097, "step": 3424, "total_loss": 0.01361083984375 }, { "epoch": 1.4, "learning_rate": 0.00019047942380435578, "lm_loss": 0.011962890625, "loss": 0.008, "step": 3425, "total_loss": 0.011962890625 }, { "epoch": 1.4, "learning_rate": 0.00019047395353989372, "lm_loss": 0.013427734375, "loss": 0.0074, "step": 3426, "total_loss": 0.013427734375 }, { "epoch": 1.4, "learning_rate": 0.0001904684817829452, "lm_loss": 0.00274658203125, "loss": 0.0074, "step": 3427, "total_loss": 0.00274658203125 }, { "epoch": 1.4, "learning_rate": 0.0001904630085336004, "lm_loss": 0.01104736328125, "loss": 0.0096, "step": 3428, "total_loss": 0.01104736328125 }, { "epoch": 1.4, "learning_rate": 0.00019045753379194968, "lm_loss": 0.007659912109375, "loss": 0.0077, "step": 3429, "total_loss": 0.007659912109375 }, { "epoch": 1.4, "learning_rate": 0.00019045205755808335, "lm_loss": 0.0078125, "loss": 0.0079, "step": 3430, "total_loss": 0.0078125 }, { "epoch": 1.4, "learning_rate": 0.00019044657983209175, "lm_loss": 0.0084228515625, "loss": 0.0071, "step": 3431, "total_loss": 0.0084228515625 }, { "epoch": 1.4, "learning_rate": 0.00019044110061406522, "lm_loss": 0.008544921875, "loss": 0.0089, "step": 3432, "total_loss": 0.008544921875 }, { "epoch": 1.4, "learning_rate": 0.00019043561990409412, "lm_loss": 0.00482177734375, "loss": 0.008, "step": 3433, "total_loss": 0.00482177734375 }, { "epoch": 1.4, "learning_rate": 0.00019043013770226894, "lm_loss": 0.01043701171875, "loss": 0.0089, "step": 3434, "total_loss": 0.01043701171875 }, { "epoch": 1.4, "learning_rate": 0.00019042465400868005, "lm_loss": 0.014404296875, "loss": 0.0081, "step": 3435, "total_loss": 0.014404296875 }, { "epoch": 1.4, "learning_rate": 0.00019041916882341794, "lm_loss": 0.005462646484375, "loss": 0.0089, "step": 3436, "total_loss": 0.005462646484375 }, { "epoch": 1.41, "learning_rate": 0.0001904136821465731, "lm_loss": 0.0113525390625, "loss": 0.0082, "step": 3437, "total_loss": 0.0113525390625 }, { "epoch": 1.41, "learning_rate": 0.00019040819397823596, "lm_loss": 0.0074462890625, "loss": 0.0089, "step": 3438, "total_loss": 0.0074462890625 }, { "epoch": 1.41, "learning_rate": 0.0001904027043184972, "lm_loss": 0.00531005859375, "loss": 0.0071, "step": 3439, "total_loss": 0.00531005859375 }, { "epoch": 1.41, "learning_rate": 0.00019039721316744726, "lm_loss": 0.0028228759765625, "loss": 0.0068, "step": 3440, "total_loss": 0.0028228759765625 }, { "epoch": 1.41, "learning_rate": 0.0001903917205251768, "lm_loss": 0.007720947265625, "loss": 0.0088, "step": 3441, "total_loss": 0.007720947265625 }, { "epoch": 1.41, "learning_rate": 0.00019038622639177635, "lm_loss": 0.0062255859375, "loss": 0.0069, "step": 3442, "total_loss": 0.0062255859375 }, { "epoch": 1.41, "learning_rate": 0.00019038073076733665, "lm_loss": 0.005828857421875, "loss": 0.0081, "step": 3443, "total_loss": 0.005828857421875 }, { "epoch": 1.41, "learning_rate": 0.00019037523365194825, "lm_loss": 0.00714111328125, "loss": 0.0085, "step": 3444, "total_loss": 0.00714111328125 }, { "epoch": 1.41, "learning_rate": 0.00019036973504570187, "lm_loss": 0.0093994140625, "loss": 0.0067, "step": 3445, "total_loss": 0.0093994140625 }, { "epoch": 1.41, "learning_rate": 0.0001903642349486882, "lm_loss": 0.00531005859375, "loss": 0.0063, "step": 3446, "total_loss": 0.00531005859375 }, { "epoch": 1.41, "learning_rate": 0.00019035873336099805, "lm_loss": 0.01129150390625, "loss": 0.0084, "step": 3447, "total_loss": 0.01129150390625 }, { "epoch": 1.41, "learning_rate": 0.0001903532302827221, "lm_loss": 0.005462646484375, "loss": 0.0064, "step": 3448, "total_loss": 0.005462646484375 }, { "epoch": 1.41, "learning_rate": 0.00019034772571395117, "lm_loss": 0.00531005859375, "loss": 0.0077, "step": 3449, "total_loss": 0.00531005859375 }, { "epoch": 1.41, "learning_rate": 0.000190342219654776, "lm_loss": 0.00482177734375, "loss": 0.0062, "step": 3450, "total_loss": 0.00482177734375 }, { "epoch": 1.41, "learning_rate": 0.00019033671210528749, "lm_loss": 0.00640869140625, "loss": 0.0071, "step": 3451, "total_loss": 0.00640869140625 }, { "epoch": 1.41, "learning_rate": 0.00019033120306557647, "lm_loss": 0.00811767578125, "loss": 0.0075, "step": 3452, "total_loss": 0.00811767578125 }, { "epoch": 1.41, "learning_rate": 0.00019032569253573382, "lm_loss": 0.004058837890625, "loss": 0.0079, "step": 3453, "total_loss": 0.004058837890625 }, { "epoch": 1.41, "learning_rate": 0.00019032018051585043, "lm_loss": 0.01153564453125, "loss": 0.0078, "step": 3454, "total_loss": 0.01153564453125 }, { "epoch": 1.41, "learning_rate": 0.00019031466700601725, "lm_loss": 0.00933837890625, "loss": 0.0075, "step": 3455, "total_loss": 0.00933837890625 }, { "epoch": 1.41, "learning_rate": 0.00019030915200632522, "lm_loss": 0.0054931640625, "loss": 0.0068, "step": 3456, "total_loss": 0.0054931640625 }, { "epoch": 1.41, "learning_rate": 0.00019030363551686533, "lm_loss": 0.00506591796875, "loss": 0.0064, "step": 3457, "total_loss": 0.00506591796875 }, { "epoch": 1.41, "learning_rate": 0.00019029811753772856, "lm_loss": 0.01031494140625, "loss": 0.0079, "step": 3458, "total_loss": 0.01031494140625 }, { "epoch": 1.41, "learning_rate": 0.00019029259806900594, "lm_loss": 0.005584716796875, "loss": 0.0073, "step": 3459, "total_loss": 0.005584716796875 }, { "epoch": 1.41, "learning_rate": 0.00019028707711078854, "lm_loss": 0.007110595703125, "loss": 0.0083, "step": 3460, "total_loss": 0.007110595703125 }, { "epoch": 1.41, "learning_rate": 0.0001902815546631674, "lm_loss": 0.002899169921875, "loss": 0.0075, "step": 3461, "total_loss": 0.002899169921875 }, { "epoch": 1.42, "learning_rate": 0.00019027603072623367, "lm_loss": 0.004547119140625, "loss": 0.007, "step": 3462, "total_loss": 0.004547119140625 }, { "epoch": 1.42, "learning_rate": 0.00019027050530007845, "lm_loss": 0.0020294189453125, "loss": 0.0068, "step": 3463, "total_loss": 0.0020294189453125 }, { "epoch": 1.42, "learning_rate": 0.00019026497838479288, "lm_loss": 0.01312255859375, "loss": 0.0072, "step": 3464, "total_loss": 0.01312255859375 }, { "epoch": 1.42, "learning_rate": 0.00019025944998046814, "lm_loss": 0.0087890625, "loss": 0.0076, "step": 3465, "total_loss": 0.0087890625 }, { "epoch": 1.42, "learning_rate": 0.00019025392008719543, "lm_loss": 0.0023040771484375, "loss": 0.0058, "step": 3466, "total_loss": 0.0023040771484375 }, { "epoch": 1.42, "learning_rate": 0.00019024838870506598, "lm_loss": 0.0098876953125, "loss": 0.0077, "step": 3467, "total_loss": 0.0098876953125 }, { "epoch": 1.42, "learning_rate": 0.00019024285583417103, "lm_loss": 0.005889892578125, "loss": 0.0081, "step": 3468, "total_loss": 0.005889892578125 }, { "epoch": 1.42, "learning_rate": 0.00019023732147460185, "lm_loss": 0.00433349609375, "loss": 0.0068, "step": 3469, "total_loss": 0.00433349609375 }, { "epoch": 1.42, "learning_rate": 0.00019023178562644973, "lm_loss": 0.0111083984375, "loss": 0.0109, "step": 3470, "total_loss": 0.0111083984375 }, { "epoch": 1.42, "learning_rate": 0.00019022624828980602, "lm_loss": 0.006683349609375, "loss": 0.0077, "step": 3471, "total_loss": 0.006683349609375 }, { "epoch": 1.42, "learning_rate": 0.00019022070946476204, "lm_loss": 0.0026397705078125, "loss": 0.007, "step": 3472, "total_loss": 0.0026397705078125 }, { "epoch": 1.42, "learning_rate": 0.00019021516915140914, "lm_loss": 0.007659912109375, "loss": 0.0061, "step": 3473, "total_loss": 0.007659912109375 }, { "epoch": 1.42, "learning_rate": 0.00019020962734983874, "lm_loss": 0.00933837890625, "loss": 0.0069, "step": 3474, "total_loss": 0.00933837890625 }, { "epoch": 1.42, "learning_rate": 0.00019020408406014228, "lm_loss": 0.0072021484375, "loss": 0.0061, "step": 3475, "total_loss": 0.0072021484375 }, { "epoch": 1.42, "learning_rate": 0.0001901985392824112, "lm_loss": 0.00872802734375, "loss": 0.0071, "step": 3476, "total_loss": 0.00872802734375 }, { "epoch": 1.42, "learning_rate": 0.0001901929930167369, "lm_loss": 0.0107421875, "loss": 0.008, "step": 3477, "total_loss": 0.0107421875 }, { "epoch": 1.42, "learning_rate": 0.00019018744526321096, "lm_loss": 0.00897216796875, "loss": 0.0069, "step": 3478, "total_loss": 0.00897216796875 }, { "epoch": 1.42, "learning_rate": 0.00019018189602192486, "lm_loss": 0.005950927734375, "loss": 0.0082, "step": 3479, "total_loss": 0.005950927734375 }, { "epoch": 1.42, "learning_rate": 0.00019017634529297012, "lm_loss": 0.006378173828125, "loss": 0.0077, "step": 3480, "total_loss": 0.006378173828125 }, { "epoch": 1.42, "learning_rate": 0.00019017079307643835, "lm_loss": 0.0101318359375, "loss": 0.0068, "step": 3481, "total_loss": 0.0101318359375 }, { "epoch": 1.42, "learning_rate": 0.0001901652393724211, "lm_loss": 0.0079345703125, "loss": 0.0055, "step": 3482, "total_loss": 0.0079345703125 }, { "epoch": 1.42, "learning_rate": 0.00019015968418101004, "lm_loss": 0.005859375, "loss": 0.0079, "step": 3483, "total_loss": 0.005859375 }, { "epoch": 1.42, "learning_rate": 0.00019015412750229675, "lm_loss": 0.00872802734375, "loss": 0.0093, "step": 3484, "total_loss": 0.00872802734375 }, { "epoch": 1.42, "learning_rate": 0.00019014856933637295, "lm_loss": 0.006500244140625, "loss": 0.0088, "step": 3485, "total_loss": 0.006500244140625 }, { "epoch": 1.43, "learning_rate": 0.00019014300968333027, "lm_loss": 0.00341796875, "loss": 0.0081, "step": 3486, "total_loss": 0.00341796875 }, { "epoch": 1.43, "learning_rate": 0.0001901374485432605, "lm_loss": 0.00897216796875, "loss": 0.0082, "step": 3487, "total_loss": 0.00897216796875 }, { "epoch": 1.43, "learning_rate": 0.0001901318859162553, "lm_loss": 0.007720947265625, "loss": 0.0087, "step": 3488, "total_loss": 0.007720947265625 }, { "epoch": 1.43, "learning_rate": 0.00019012632180240646, "lm_loss": 0.00445556640625, "loss": 0.0078, "step": 3489, "total_loss": 0.00445556640625 }, { "epoch": 1.43, "learning_rate": 0.0001901207562018058, "lm_loss": 0.00823974609375, "loss": 0.0071, "step": 3490, "total_loss": 0.00823974609375 }, { "epoch": 1.43, "learning_rate": 0.0001901151891145451, "lm_loss": 0.0029449462890625, "loss": 0.0067, "step": 3491, "total_loss": 0.0029449462890625 }, { "epoch": 1.43, "learning_rate": 0.00019010962054071616, "lm_loss": 0.00848388671875, "loss": 0.0063, "step": 3492, "total_loss": 0.00848388671875 }, { "epoch": 1.43, "learning_rate": 0.00019010405048041094, "lm_loss": 0.00811767578125, "loss": 0.0065, "step": 3493, "total_loss": 0.00811767578125 }, { "epoch": 1.43, "learning_rate": 0.00019009847893372122, "lm_loss": 0.004486083984375, "loss": 0.0064, "step": 3494, "total_loss": 0.004486083984375 }, { "epoch": 1.43, "learning_rate": 0.000190092905900739, "lm_loss": 0.008544921875, "loss": 0.0089, "step": 3495, "total_loss": 0.008544921875 }, { "epoch": 1.43, "learning_rate": 0.00019008733138155615, "lm_loss": 0.00567626953125, "loss": 0.0064, "step": 3496, "total_loss": 0.00567626953125 }, { "epoch": 1.43, "learning_rate": 0.00019008175537626464, "lm_loss": 0.0024566650390625, "loss": 0.0079, "step": 3497, "total_loss": 0.0024566650390625 }, { "epoch": 1.43, "learning_rate": 0.00019007617788495645, "lm_loss": 0.003204345703125, "loss": 0.0063, "step": 3498, "total_loss": 0.003204345703125 }, { "epoch": 1.43, "learning_rate": 0.00019007059890772365, "lm_loss": 0.00439453125, "loss": 0.008, "step": 3499, "total_loss": 0.00439453125 }, { "epoch": 1.43, "learning_rate": 0.00019006501844465824, "lm_loss": 0.0125732421875, "loss": 0.0088, "step": 3500, "total_loss": 0.0125732421875 }, { "epoch": 1.43, "eval_lm_loss": 0.009533802978694439, "eval_loss": 0.009850949980318546, "eval_runtime": 44.076, "eval_samples_per_second": 22.688, "eval_steps_per_second": 0.204, "eval_total_loss": 0.009533802978694439, "lm_loss": 0.00109100341796875, "step": 3500, "total_loss": 0.00109100341796875 }, { "epoch": 1.43, "learning_rate": 0.0001900594364958522, "lm_loss": 0.0089111328125, "loss": 0.0067, "step": 3501, "total_loss": 0.0089111328125 }, { "epoch": 1.43, "learning_rate": 0.0001900538530613977, "lm_loss": 0.007476806640625, "loss": 0.0069, "step": 3502, "total_loss": 0.007476806640625 }, { "epoch": 1.43, "learning_rate": 0.00019004826814138684, "lm_loss": 0.005615234375, "loss": 0.0071, "step": 3503, "total_loss": 0.005615234375 }, { "epoch": 1.43, "learning_rate": 0.00019004268173591172, "lm_loss": 0.0042724609375, "loss": 0.0067, "step": 3504, "total_loss": 0.0042724609375 }, { "epoch": 1.43, "learning_rate": 0.0001900370938450645, "lm_loss": 0.01422119140625, "loss": 0.0092, "step": 3505, "total_loss": 0.01422119140625 }, { "epoch": 1.43, "learning_rate": 0.00019003150446893736, "lm_loss": 0.006805419921875, "loss": 0.0081, "step": 3506, "total_loss": 0.006805419921875 }, { "epoch": 1.43, "learning_rate": 0.00019002591360762252, "lm_loss": 0.01043701171875, "loss": 0.0095, "step": 3507, "total_loss": 0.01043701171875 }, { "epoch": 1.43, "learning_rate": 0.00019002032126121219, "lm_loss": 0.0025177001953125, "loss": 0.0077, "step": 3508, "total_loss": 0.0025177001953125 }, { "epoch": 1.43, "learning_rate": 0.00019001472742979862, "lm_loss": 0.0084228515625, "loss": 0.0072, "step": 3509, "total_loss": 0.0084228515625 }, { "epoch": 1.43, "learning_rate": 0.00019000913211347412, "lm_loss": 0.0079345703125, "loss": 0.0083, "step": 3510, "total_loss": 0.0079345703125 }, { "epoch": 1.44, "learning_rate": 0.00019000353531233098, "lm_loss": 0.009765625, "loss": 0.0089, "step": 3511, "total_loss": 0.009765625 }, { "epoch": 1.44, "learning_rate": 0.0001899979370264615, "lm_loss": 0.0040283203125, "loss": 0.0077, "step": 3512, "total_loss": 0.0040283203125 }, { "epoch": 1.44, "learning_rate": 0.00018999233725595804, "lm_loss": 0.0079345703125, "loss": 0.0077, "step": 3513, "total_loss": 0.0079345703125 }, { "epoch": 1.44, "learning_rate": 0.000189986736000913, "lm_loss": 0.005035400390625, "loss": 0.0081, "step": 3514, "total_loss": 0.005035400390625 }, { "epoch": 1.44, "learning_rate": 0.00018998113326141878, "lm_loss": 0.00445556640625, "loss": 0.008, "step": 3515, "total_loss": 0.00445556640625 }, { "epoch": 1.44, "learning_rate": 0.00018997552903756776, "lm_loss": 0.0107421875, "loss": 0.0069, "step": 3516, "total_loss": 0.0107421875 }, { "epoch": 1.44, "learning_rate": 0.00018996992332945243, "lm_loss": 0.0205078125, "loss": 0.0099, "step": 3517, "total_loss": 0.0205078125 }, { "epoch": 1.44, "learning_rate": 0.00018996431613716526, "lm_loss": 0.01239013671875, "loss": 0.0075, "step": 3518, "total_loss": 0.01239013671875 }, { "epoch": 1.44, "learning_rate": 0.0001899587074607987, "lm_loss": 0.0106201171875, "loss": 0.0066, "step": 3519, "total_loss": 0.0106201171875 }, { "epoch": 1.44, "learning_rate": 0.00018995309730044535, "lm_loss": 0.007476806640625, "loss": 0.0075, "step": 3520, "total_loss": 0.007476806640625 }, { "epoch": 1.44, "learning_rate": 0.0001899474856561977, "lm_loss": 0.005767822265625, "loss": 0.007, "step": 3521, "total_loss": 0.005767822265625 }, { "epoch": 1.44, "learning_rate": 0.00018994187252814837, "lm_loss": 0.005645751953125, "loss": 0.0052, "step": 3522, "total_loss": 0.005645751953125 }, { "epoch": 1.44, "learning_rate": 0.0001899362579163899, "lm_loss": 0.0181884765625, "loss": 0.0077, "step": 3523, "total_loss": 0.0181884765625 }, { "epoch": 1.44, "learning_rate": 0.00018993064182101495, "lm_loss": 0.01611328125, "loss": 0.0077, "step": 3524, "total_loss": 0.01611328125 }, { "epoch": 1.44, "learning_rate": 0.00018992502424211612, "lm_loss": 0.004058837890625, "loss": 0.0056, "step": 3525, "total_loss": 0.004058837890625 }, { "epoch": 1.44, "learning_rate": 0.00018991940517978613, "lm_loss": 0.005157470703125, "loss": 0.0065, "step": 3526, "total_loss": 0.005157470703125 }, { "epoch": 1.44, "learning_rate": 0.00018991378463411768, "lm_loss": 0.003753662109375, "loss": 0.0067, "step": 3527, "total_loss": 0.003753662109375 }, { "epoch": 1.44, "learning_rate": 0.00018990816260520344, "lm_loss": 0.00616455078125, "loss": 0.0078, "step": 3528, "total_loss": 0.00616455078125 }, { "epoch": 1.44, "learning_rate": 0.00018990253909313616, "lm_loss": 0.00811767578125, "loss": 0.0075, "step": 3529, "total_loss": 0.00811767578125 }, { "epoch": 1.44, "learning_rate": 0.00018989691409800866, "lm_loss": 0.012451171875, "loss": 0.0095, "step": 3530, "total_loss": 0.012451171875 }, { "epoch": 1.44, "learning_rate": 0.00018989128761991367, "lm_loss": 0.00799560546875, "loss": 0.0082, "step": 3531, "total_loss": 0.00799560546875 }, { "epoch": 1.44, "learning_rate": 0.00018988565965894402, "lm_loss": 0.00897216796875, "loss": 0.0076, "step": 3532, "total_loss": 0.00897216796875 }, { "epoch": 1.44, "learning_rate": 0.0001898800302151926, "lm_loss": 0.0037384033203125, "loss": 0.0066, "step": 3533, "total_loss": 0.0037384033203125 }, { "epoch": 1.44, "learning_rate": 0.0001898743992887522, "lm_loss": 0.00323486328125, "loss": 0.0076, "step": 3534, "total_loss": 0.00323486328125 }, { "epoch": 1.45, "learning_rate": 0.00018986876687971576, "lm_loss": 0.0068359375, "loss": 0.0067, "step": 3535, "total_loss": 0.0068359375 }, { "epoch": 1.45, "learning_rate": 0.00018986313298817616, "lm_loss": 0.01080322265625, "loss": 0.0093, "step": 3536, "total_loss": 0.01080322265625 }, { "epoch": 1.45, "learning_rate": 0.00018985749761422636, "lm_loss": 0.005615234375, "loss": 0.0086, "step": 3537, "total_loss": 0.005615234375 }, { "epoch": 1.45, "learning_rate": 0.00018985186075795934, "lm_loss": 0.01611328125, "loss": 0.007, "step": 3538, "total_loss": 0.01611328125 }, { "epoch": 1.45, "learning_rate": 0.00018984622241946807, "lm_loss": 0.010498046875, "loss": 0.0066, "step": 3539, "total_loss": 0.010498046875 }, { "epoch": 1.45, "learning_rate": 0.00018984058259884553, "lm_loss": 0.00250244140625, "loss": 0.0078, "step": 3540, "total_loss": 0.00250244140625 }, { "epoch": 1.45, "learning_rate": 0.00018983494129618485, "lm_loss": 0.003936767578125, "loss": 0.0083, "step": 3541, "total_loss": 0.003936767578125 }, { "epoch": 1.45, "learning_rate": 0.00018982929851157896, "lm_loss": 0.00927734375, "loss": 0.0089, "step": 3542, "total_loss": 0.00927734375 }, { "epoch": 1.45, "learning_rate": 0.00018982365424512104, "lm_loss": 0.00604248046875, "loss": 0.0072, "step": 3543, "total_loss": 0.00604248046875 }, { "epoch": 1.45, "learning_rate": 0.00018981800849690415, "lm_loss": 0.005950927734375, "loss": 0.0077, "step": 3544, "total_loss": 0.005950927734375 }, { "epoch": 1.45, "learning_rate": 0.00018981236126702148, "lm_loss": 0.0034332275390625, "loss": 0.0065, "step": 3545, "total_loss": 0.0034332275390625 }, { "epoch": 1.45, "learning_rate": 0.00018980671255556614, "lm_loss": 0.004058837890625, "loss": 0.0085, "step": 3546, "total_loss": 0.004058837890625 }, { "epoch": 1.45, "learning_rate": 0.00018980106236263133, "lm_loss": 0.0093994140625, "loss": 0.0103, "step": 3547, "total_loss": 0.0093994140625 }, { "epoch": 1.45, "learning_rate": 0.00018979541068831024, "lm_loss": 0.007537841796875, "loss": 0.0081, "step": 3548, "total_loss": 0.007537841796875 }, { "epoch": 1.45, "learning_rate": 0.00018978975753269613, "lm_loss": 0.0096435546875, "loss": 0.0065, "step": 3549, "total_loss": 0.0096435546875 }, { "epoch": 1.45, "learning_rate": 0.00018978410289588224, "lm_loss": 0.0034637451171875, "loss": 0.0076, "step": 3550, "total_loss": 0.0034637451171875 }, { "epoch": 1.45, "learning_rate": 0.00018977844677796186, "lm_loss": 0.00701904296875, "loss": 0.0069, "step": 3551, "total_loss": 0.00701904296875 }, { "epoch": 1.45, "learning_rate": 0.00018977278917902828, "lm_loss": 0.00909423828125, "loss": 0.007, "step": 3552, "total_loss": 0.00909423828125 }, { "epoch": 1.45, "learning_rate": 0.00018976713009917484, "lm_loss": 0.00830078125, "loss": 0.0085, "step": 3553, "total_loss": 0.00830078125 }, { "epoch": 1.45, "learning_rate": 0.00018976146953849488, "lm_loss": 0.0137939453125, "loss": 0.0085, "step": 3554, "total_loss": 0.0137939453125 }, { "epoch": 1.45, "learning_rate": 0.0001897558074970818, "lm_loss": 0.016845703125, "loss": 0.0084, "step": 3555, "total_loss": 0.016845703125 }, { "epoch": 1.45, "learning_rate": 0.000189750143975029, "lm_loss": 0.00933837890625, "loss": 0.0073, "step": 3556, "total_loss": 0.00933837890625 }, { "epoch": 1.45, "learning_rate": 0.00018974447897242988, "lm_loss": 0.007659912109375, "loss": 0.0078, "step": 3557, "total_loss": 0.007659912109375 }, { "epoch": 1.45, "learning_rate": 0.00018973881248937792, "lm_loss": 0.0052490234375, "loss": 0.0064, "step": 3558, "total_loss": 0.0052490234375 }, { "epoch": 1.46, "learning_rate": 0.0001897331445259666, "lm_loss": 0.005462646484375, "loss": 0.008, "step": 3559, "total_loss": 0.005462646484375 }, { "epoch": 1.46, "learning_rate": 0.00018972747508228942, "lm_loss": 0.00958251953125, "loss": 0.0086, "step": 3560, "total_loss": 0.00958251953125 }, { "epoch": 1.46, "learning_rate": 0.00018972180415843982, "lm_loss": 0.007293701171875, "loss": 0.0073, "step": 3561, "total_loss": 0.007293701171875 }, { "epoch": 1.46, "learning_rate": 0.0001897161317545115, "lm_loss": 0.0081787109375, "loss": 0.0071, "step": 3562, "total_loss": 0.0081787109375 }, { "epoch": 1.46, "learning_rate": 0.00018971045787059793, "lm_loss": 0.0096435546875, "loss": 0.0073, "step": 3563, "total_loss": 0.0096435546875 }, { "epoch": 1.46, "learning_rate": 0.00018970478250679275, "lm_loss": 0.005615234375, "loss": 0.0075, "step": 3564, "total_loss": 0.005615234375 }, { "epoch": 1.46, "learning_rate": 0.00018969910566318953, "lm_loss": 0.0068359375, "loss": 0.0083, "step": 3565, "total_loss": 0.0068359375 }, { "epoch": 1.46, "learning_rate": 0.000189693427339882, "lm_loss": 0.00555419921875, "loss": 0.007, "step": 3566, "total_loss": 0.00555419921875 }, { "epoch": 1.46, "learning_rate": 0.00018968774753696374, "lm_loss": 0.003570556640625, "loss": 0.0069, "step": 3567, "total_loss": 0.003570556640625 }, { "epoch": 1.46, "learning_rate": 0.00018968206625452852, "lm_loss": 0.003997802734375, "loss": 0.0064, "step": 3568, "total_loss": 0.003997802734375 }, { "epoch": 1.46, "learning_rate": 0.00018967638349267, "lm_loss": 0.00567626953125, "loss": 0.0072, "step": 3569, "total_loss": 0.00567626953125 }, { "epoch": 1.46, "learning_rate": 0.00018967069925148199, "lm_loss": 0.006378173828125, "loss": 0.0071, "step": 3570, "total_loss": 0.006378173828125 }, { "epoch": 1.46, "learning_rate": 0.0001896650135310582, "lm_loss": 0.01348876953125, "loss": 0.0075, "step": 3571, "total_loss": 0.01348876953125 }, { "epoch": 1.46, "learning_rate": 0.0001896593263314925, "lm_loss": 0.0037689208984375, "loss": 0.0063, "step": 3572, "total_loss": 0.0037689208984375 }, { "epoch": 1.46, "learning_rate": 0.00018965363765287857, "lm_loss": 0.007080078125, "loss": 0.007, "step": 3573, "total_loss": 0.007080078125 }, { "epoch": 1.46, "learning_rate": 0.0001896479474953104, "lm_loss": 0.006195068359375, "loss": 0.0069, "step": 3574, "total_loss": 0.006195068359375 }, { "epoch": 1.46, "learning_rate": 0.00018964225585888178, "lm_loss": 0.00775146484375, "loss": 0.0075, "step": 3575, "total_loss": 0.00775146484375 }, { "epoch": 1.46, "learning_rate": 0.0001896365627436866, "lm_loss": 0.0130615234375, "loss": 0.0091, "step": 3576, "total_loss": 0.0130615234375 }, { "epoch": 1.46, "learning_rate": 0.00018963086814981884, "lm_loss": 0.00102996826171875, "loss": 0.0069, "step": 3577, "total_loss": 0.00102996826171875 }, { "epoch": 1.46, "learning_rate": 0.00018962517207737233, "lm_loss": 0.010498046875, "loss": 0.0064, "step": 3578, "total_loss": 0.010498046875 }, { "epoch": 1.46, "learning_rate": 0.00018961947452644115, "lm_loss": 0.01104736328125, "loss": 0.0078, "step": 3579, "total_loss": 0.01104736328125 }, { "epoch": 1.46, "learning_rate": 0.00018961377549711917, "lm_loss": 0.006927490234375, "loss": 0.0075, "step": 3580, "total_loss": 0.006927490234375 }, { "epoch": 1.46, "learning_rate": 0.00018960807498950052, "lm_loss": 0.0036468505859375, "loss": 0.0071, "step": 3581, "total_loss": 0.0036468505859375 }, { "epoch": 1.46, "learning_rate": 0.0001896023730036792, "lm_loss": 0.006622314453125, "loss": 0.0079, "step": 3582, "total_loss": 0.006622314453125 }, { "epoch": 1.46, "learning_rate": 0.0001895966695397492, "lm_loss": 0.0079345703125, "loss": 0.0083, "step": 3583, "total_loss": 0.0079345703125 }, { "epoch": 1.47, "learning_rate": 0.00018959096459780469, "lm_loss": 0.00421142578125, "loss": 0.0056, "step": 3584, "total_loss": 0.00421142578125 }, { "epoch": 1.47, "learning_rate": 0.00018958525817793976, "lm_loss": 0.0036163330078125, "loss": 0.0088, "step": 3585, "total_loss": 0.0036163330078125 }, { "epoch": 1.47, "learning_rate": 0.00018957955028024853, "lm_loss": 0.0079345703125, "loss": 0.0059, "step": 3586, "total_loss": 0.0079345703125 }, { "epoch": 1.47, "learning_rate": 0.00018957384090482514, "lm_loss": 0.00193023681640625, "loss": 0.0077, "step": 3587, "total_loss": 0.00193023681640625 }, { "epoch": 1.47, "learning_rate": 0.0001895681300517638, "lm_loss": 0.003509521484375, "loss": 0.0074, "step": 3588, "total_loss": 0.003509521484375 }, { "epoch": 1.47, "learning_rate": 0.00018956241772115874, "lm_loss": 0.00799560546875, "loss": 0.0085, "step": 3589, "total_loss": 0.00799560546875 }, { "epoch": 1.47, "learning_rate": 0.0001895567039131042, "lm_loss": 0.00830078125, "loss": 0.0078, "step": 3590, "total_loss": 0.00830078125 }, { "epoch": 1.47, "learning_rate": 0.00018955098862769436, "lm_loss": 0.0072021484375, "loss": 0.0084, "step": 3591, "total_loss": 0.0072021484375 }, { "epoch": 1.47, "learning_rate": 0.00018954527186502358, "lm_loss": 0.004180908203125, "loss": 0.0069, "step": 3592, "total_loss": 0.004180908203125 }, { "epoch": 1.47, "learning_rate": 0.00018953955362518613, "lm_loss": 0.00421142578125, "loss": 0.0073, "step": 3593, "total_loss": 0.00421142578125 }, { "epoch": 1.47, "learning_rate": 0.00018953383390827634, "lm_loss": 0.007476806640625, "loss": 0.0089, "step": 3594, "total_loss": 0.007476806640625 }, { "epoch": 1.47, "learning_rate": 0.00018952811271438856, "lm_loss": 0.006744384765625, "loss": 0.0065, "step": 3595, "total_loss": 0.006744384765625 }, { "epoch": 1.47, "learning_rate": 0.00018952239004361716, "lm_loss": 0.0025482177734375, "loss": 0.0059, "step": 3596, "total_loss": 0.0025482177734375 }, { "epoch": 1.47, "learning_rate": 0.0001895166658960566, "lm_loss": 0.00958251953125, "loss": 0.0068, "step": 3597, "total_loss": 0.00958251953125 }, { "epoch": 1.47, "learning_rate": 0.00018951094027180122, "lm_loss": 0.00933837890625, "loss": 0.0079, "step": 3598, "total_loss": 0.00933837890625 }, { "epoch": 1.47, "learning_rate": 0.00018950521317094557, "lm_loss": 0.004547119140625, "loss": 0.0061, "step": 3599, "total_loss": 0.004547119140625 }, { "epoch": 1.47, "learning_rate": 0.00018949948459358402, "lm_loss": 0.01422119140625, "loss": 0.0097, "step": 3600, "total_loss": 0.01422119140625 }, { "epoch": 1.47, "eval_lm_loss": 0.009612790308892727, "eval_loss": 0.010066261515021324, "eval_runtime": 43.9934, "eval_samples_per_second": 22.731, "eval_steps_per_second": 0.205, "eval_total_loss": 0.009612790308892727, "lm_loss": 0.0012054443359375, "step": 3600, "total_loss": 0.0012054443359375 }, { "epoch": 1.47, "learning_rate": 0.00018949375453981116, "lm_loss": 0.003814697265625, "loss": 0.0072, "step": 3601, "total_loss": 0.003814697265625 }, { "epoch": 1.47, "learning_rate": 0.0001894880230097215, "lm_loss": 0.0050048828125, "loss": 0.0074, "step": 3602, "total_loss": 0.0050048828125 }, { "epoch": 1.47, "learning_rate": 0.00018948229000340954, "lm_loss": 0.006561279296875, "loss": 0.0048, "step": 3603, "total_loss": 0.006561279296875 }, { "epoch": 1.47, "learning_rate": 0.00018947655552096988, "lm_loss": 0.0032958984375, "loss": 0.0088, "step": 3604, "total_loss": 0.0032958984375 }, { "epoch": 1.47, "learning_rate": 0.0001894708195624971, "lm_loss": 0.0101318359375, "loss": 0.0088, "step": 3605, "total_loss": 0.0101318359375 }, { "epoch": 1.47, "learning_rate": 0.0001894650821280859, "lm_loss": 0.006072998046875, "loss": 0.0055, "step": 3606, "total_loss": 0.006072998046875 }, { "epoch": 1.47, "learning_rate": 0.00018945934321783084, "lm_loss": 0.01214599609375, "loss": 0.0084, "step": 3607, "total_loss": 0.01214599609375 }, { "epoch": 1.48, "learning_rate": 0.0001894536028318266, "lm_loss": 0.00665283203125, "loss": 0.0067, "step": 3608, "total_loss": 0.00665283203125 }, { "epoch": 1.48, "learning_rate": 0.0001894478609701679, "lm_loss": 0.00555419921875, "loss": 0.0073, "step": 3609, "total_loss": 0.00555419921875 }, { "epoch": 1.48, "learning_rate": 0.00018944211763294948, "lm_loss": 0.004852294921875, "loss": 0.0091, "step": 3610, "total_loss": 0.004852294921875 }, { "epoch": 1.48, "learning_rate": 0.00018943637282026603, "lm_loss": 0.01416015625, "loss": 0.0077, "step": 3611, "total_loss": 0.01416015625 }, { "epoch": 1.48, "learning_rate": 0.00018943062653221238, "lm_loss": 0.00579833984375, "loss": 0.0089, "step": 3612, "total_loss": 0.00579833984375 }, { "epoch": 1.48, "learning_rate": 0.00018942487876888326, "lm_loss": 0.01104736328125, "loss": 0.0075, "step": 3613, "total_loss": 0.01104736328125 }, { "epoch": 1.48, "learning_rate": 0.0001894191295303735, "lm_loss": 0.01104736328125, "loss": 0.0073, "step": 3614, "total_loss": 0.01104736328125 }, { "epoch": 1.48, "learning_rate": 0.000189413378816778, "lm_loss": 0.006317138671875, "loss": 0.0093, "step": 3615, "total_loss": 0.006317138671875 }, { "epoch": 1.48, "learning_rate": 0.00018940762662819158, "lm_loss": 0.008544921875, "loss": 0.0084, "step": 3616, "total_loss": 0.008544921875 }, { "epoch": 1.48, "learning_rate": 0.00018940187296470913, "lm_loss": 0.008056640625, "loss": 0.0089, "step": 3617, "total_loss": 0.008056640625 }, { "epoch": 1.48, "learning_rate": 0.00018939611782642555, "lm_loss": 0.0120849609375, "loss": 0.0095, "step": 3618, "total_loss": 0.0120849609375 }, { "epoch": 1.48, "learning_rate": 0.0001893903612134358, "lm_loss": 0.00738525390625, "loss": 0.0079, "step": 3619, "total_loss": 0.00738525390625 }, { "epoch": 1.48, "learning_rate": 0.00018938460312583483, "lm_loss": 0.004119873046875, "loss": 0.0071, "step": 3620, "total_loss": 0.004119873046875 }, { "epoch": 1.48, "learning_rate": 0.00018937884356371763, "lm_loss": 0.01043701171875, "loss": 0.0078, "step": 3621, "total_loss": 0.01043701171875 }, { "epoch": 1.48, "learning_rate": 0.00018937308252717924, "lm_loss": 0.0028076171875, "loss": 0.0076, "step": 3622, "total_loss": 0.0028076171875 }, { "epoch": 1.48, "learning_rate": 0.00018936732001631468, "lm_loss": 0.006011962890625, "loss": 0.0057, "step": 3623, "total_loss": 0.006011962890625 }, { "epoch": 1.48, "learning_rate": 0.00018936155603121896, "lm_loss": 0.005523681640625, "loss": 0.0077, "step": 3624, "total_loss": 0.005523681640625 }, { "epoch": 1.48, "learning_rate": 0.00018935579057198723, "lm_loss": 0.005218505859375, "loss": 0.0068, "step": 3625, "total_loss": 0.005218505859375 }, { "epoch": 1.48, "learning_rate": 0.0001893500236387146, "lm_loss": 0.002349853515625, "loss": 0.0065, "step": 3626, "total_loss": 0.002349853515625 }, { "epoch": 1.48, "learning_rate": 0.00018934425523149616, "lm_loss": 0.0089111328125, "loss": 0.0065, "step": 3627, "total_loss": 0.0089111328125 }, { "epoch": 1.48, "learning_rate": 0.00018933848535042708, "lm_loss": 0.009765625, "loss": 0.0069, "step": 3628, "total_loss": 0.009765625 }, { "epoch": 1.48, "learning_rate": 0.00018933271399560256, "lm_loss": 0.0098876953125, "loss": 0.0091, "step": 3629, "total_loss": 0.0098876953125 }, { "epoch": 1.48, "learning_rate": 0.0001893269411671178, "lm_loss": 0.0093994140625, "loss": 0.0075, "step": 3630, "total_loss": 0.0093994140625 }, { "epoch": 1.48, "learning_rate": 0.000189321166865068, "lm_loss": 0.0093994140625, "loss": 0.0073, "step": 3631, "total_loss": 0.0093994140625 }, { "epoch": 1.48, "learning_rate": 0.00018931539108954845, "lm_loss": 0.006561279296875, "loss": 0.0068, "step": 3632, "total_loss": 0.006561279296875 }, { "epoch": 1.49, "learning_rate": 0.00018930961384065444, "lm_loss": 0.007537841796875, "loss": 0.0088, "step": 3633, "total_loss": 0.007537841796875 }, { "epoch": 1.49, "learning_rate": 0.00018930383511848122, "lm_loss": 0.00188446044921875, "loss": 0.0081, "step": 3634, "total_loss": 0.00188446044921875 }, { "epoch": 1.49, "learning_rate": 0.00018929805492312417, "lm_loss": 0.01318359375, "loss": 0.0084, "step": 3635, "total_loss": 0.01318359375 }, { "epoch": 1.49, "learning_rate": 0.00018929227325467863, "lm_loss": 0.006072998046875, "loss": 0.0081, "step": 3636, "total_loss": 0.006072998046875 }, { "epoch": 1.49, "learning_rate": 0.00018928649011323994, "lm_loss": 0.00811767578125, "loss": 0.0075, "step": 3637, "total_loss": 0.00811767578125 }, { "epoch": 1.49, "learning_rate": 0.00018928070549890355, "lm_loss": 0.0016632080078125, "loss": 0.0069, "step": 3638, "total_loss": 0.0016632080078125 }, { "epoch": 1.49, "learning_rate": 0.00018927491941176484, "lm_loss": 0.006256103515625, "loss": 0.0093, "step": 3639, "total_loss": 0.006256103515625 }, { "epoch": 1.49, "learning_rate": 0.00018926913185191932, "lm_loss": 0.00714111328125, "loss": 0.0087, "step": 3640, "total_loss": 0.00714111328125 }, { "epoch": 1.49, "learning_rate": 0.00018926334281946241, "lm_loss": 0.0029144287109375, "loss": 0.0056, "step": 3641, "total_loss": 0.0029144287109375 }, { "epoch": 1.49, "learning_rate": 0.0001892575523144896, "lm_loss": 0.006683349609375, "loss": 0.0077, "step": 3642, "total_loss": 0.006683349609375 }, { "epoch": 1.49, "learning_rate": 0.00018925176033709646, "lm_loss": 0.0047607421875, "loss": 0.0068, "step": 3643, "total_loss": 0.0047607421875 }, { "epoch": 1.49, "learning_rate": 0.0001892459668873785, "lm_loss": 0.00799560546875, "loss": 0.0073, "step": 3644, "total_loss": 0.00799560546875 }, { "epoch": 1.49, "learning_rate": 0.0001892401719654313, "lm_loss": 0.00897216796875, "loss": 0.0063, "step": 3645, "total_loss": 0.00897216796875 }, { "epoch": 1.49, "learning_rate": 0.00018923437557135045, "lm_loss": 0.00640869140625, "loss": 0.0066, "step": 3646, "total_loss": 0.00640869140625 }, { "epoch": 1.49, "learning_rate": 0.0001892285777052316, "lm_loss": 0.005950927734375, "loss": 0.0078, "step": 3647, "total_loss": 0.005950927734375 }, { "epoch": 1.49, "learning_rate": 0.00018922277836717037, "lm_loss": 0.005401611328125, "loss": 0.0078, "step": 3648, "total_loss": 0.005401611328125 }, { "epoch": 1.49, "learning_rate": 0.00018921697755726243, "lm_loss": 0.0081787109375, "loss": 0.0076, "step": 3649, "total_loss": 0.0081787109375 }, { "epoch": 1.49, "learning_rate": 0.00018921117527560344, "lm_loss": 0.00616455078125, "loss": 0.008, "step": 3650, "total_loss": 0.00616455078125 }, { "epoch": 1.49, "learning_rate": 0.00018920537152228916, "lm_loss": 0.00830078125, "loss": 0.0078, "step": 3651, "total_loss": 0.00830078125 }, { "epoch": 1.49, "learning_rate": 0.00018919956629741532, "lm_loss": 0.0029449462890625, "loss": 0.008, "step": 3652, "total_loss": 0.0029449462890625 }, { "epoch": 1.49, "learning_rate": 0.0001891937596010777, "lm_loss": 0.004669189453125, "loss": 0.0055, "step": 3653, "total_loss": 0.004669189453125 }, { "epoch": 1.49, "learning_rate": 0.00018918795143337203, "lm_loss": 0.006591796875, "loss": 0.0077, "step": 3654, "total_loss": 0.006591796875 }, { "epoch": 1.49, "learning_rate": 0.00018918214179439418, "lm_loss": 0.007171630859375, "loss": 0.0066, "step": 3655, "total_loss": 0.007171630859375 }, { "epoch": 1.49, "learning_rate": 0.00018917633068423997, "lm_loss": 0.0052490234375, "loss": 0.0089, "step": 3656, "total_loss": 0.0052490234375 }, { "epoch": 1.5, "learning_rate": 0.00018917051810300525, "lm_loss": 0.021484375, "loss": 0.0074, "step": 3657, "total_loss": 0.021484375 }, { "epoch": 1.5, "learning_rate": 0.00018916470405078593, "lm_loss": 0.01031494140625, "loss": 0.0078, "step": 3658, "total_loss": 0.01031494140625 }, { "epoch": 1.5, "learning_rate": 0.0001891588885276779, "lm_loss": 0.00726318359375, "loss": 0.0079, "step": 3659, "total_loss": 0.00726318359375 }, { "epoch": 1.5, "learning_rate": 0.00018915307153377712, "lm_loss": 0.00653076171875, "loss": 0.0071, "step": 3660, "total_loss": 0.00653076171875 }, { "epoch": 1.5, "learning_rate": 0.00018914725306917954, "lm_loss": 0.01318359375, "loss": 0.0092, "step": 3661, "total_loss": 0.01318359375 }, { "epoch": 1.5, "learning_rate": 0.0001891414331339811, "lm_loss": 0.0023956298828125, "loss": 0.0069, "step": 3662, "total_loss": 0.0023956298828125 }, { "epoch": 1.5, "learning_rate": 0.00018913561172827787, "lm_loss": 0.020751953125, "loss": 0.0085, "step": 3663, "total_loss": 0.020751953125 }, { "epoch": 1.5, "learning_rate": 0.00018912978885216587, "lm_loss": 0.005523681640625, "loss": 0.0068, "step": 3664, "total_loss": 0.005523681640625 }, { "epoch": 1.5, "learning_rate": 0.00018912396450574111, "lm_loss": 0.00567626953125, "loss": 0.0058, "step": 3665, "total_loss": 0.00567626953125 }, { "epoch": 1.5, "learning_rate": 0.0001891181386890997, "lm_loss": 0.0111083984375, "loss": 0.0076, "step": 3666, "total_loss": 0.0111083984375 }, { "epoch": 1.5, "learning_rate": 0.0001891123114023378, "lm_loss": 0.0130615234375, "loss": 0.0097, "step": 3667, "total_loss": 0.0130615234375 }, { "epoch": 1.5, "learning_rate": 0.00018910648264555143, "lm_loss": 0.0035247802734375, "loss": 0.0072, "step": 3668, "total_loss": 0.0035247802734375 }, { "epoch": 1.5, "learning_rate": 0.0001891006524188368, "lm_loss": 0.007415771484375, "loss": 0.0073, "step": 3669, "total_loss": 0.007415771484375 }, { "epoch": 1.5, "learning_rate": 0.0001890948207222901, "lm_loss": 0.00799560546875, "loss": 0.0067, "step": 3670, "total_loss": 0.00799560546875 }, { "epoch": 1.5, "learning_rate": 0.0001890889875560075, "lm_loss": 0.004058837890625, "loss": 0.007, "step": 3671, "total_loss": 0.004058837890625 }, { "epoch": 1.5, "learning_rate": 0.00018908315292008525, "lm_loss": 0.01068115234375, "loss": 0.0075, "step": 3672, "total_loss": 0.01068115234375 }, { "epoch": 1.5, "learning_rate": 0.0001890773168146196, "lm_loss": 0.005950927734375, "loss": 0.0073, "step": 3673, "total_loss": 0.005950927734375 }, { "epoch": 1.5, "learning_rate": 0.0001890714792397068, "lm_loss": 0.006683349609375, "loss": 0.0056, "step": 3674, "total_loss": 0.006683349609375 }, { "epoch": 1.5, "learning_rate": 0.0001890656401954432, "lm_loss": 0.01611328125, "loss": 0.0089, "step": 3675, "total_loss": 0.01611328125 }, { "epoch": 1.5, "learning_rate": 0.00018905979968192506, "lm_loss": 0.00823974609375, "loss": 0.0067, "step": 3676, "total_loss": 0.00823974609375 }, { "epoch": 1.5, "learning_rate": 0.00018905395769924877, "lm_loss": 0.006744384765625, "loss": 0.0061, "step": 3677, "total_loss": 0.006744384765625 }, { "epoch": 1.5, "learning_rate": 0.00018904811424751063, "lm_loss": 0.00994873046875, "loss": 0.0076, "step": 3678, "total_loss": 0.00994873046875 }, { "epoch": 1.5, "learning_rate": 0.00018904226932680716, "lm_loss": 0.00531005859375, "loss": 0.0074, "step": 3679, "total_loss": 0.00531005859375 }, { "epoch": 1.5, "learning_rate": 0.00018903642293723467, "lm_loss": 0.01129150390625, "loss": 0.0072, "step": 3680, "total_loss": 0.01129150390625 }, { "epoch": 1.5, "learning_rate": 0.00018903057507888967, "lm_loss": 0.004302978515625, "loss": 0.0076, "step": 3681, "total_loss": 0.004302978515625 }, { "epoch": 1.51, "learning_rate": 0.00018902472575186858, "lm_loss": 0.004669189453125, "loss": 0.0067, "step": 3682, "total_loss": 0.004669189453125 }, { "epoch": 1.51, "learning_rate": 0.00018901887495626795, "lm_loss": 0.0196533203125, "loss": 0.0079, "step": 3683, "total_loss": 0.0196533203125 }, { "epoch": 1.51, "learning_rate": 0.0001890130226921842, "lm_loss": 0.0037841796875, "loss": 0.0071, "step": 3684, "total_loss": 0.0037841796875 }, { "epoch": 1.51, "learning_rate": 0.00018900716895971396, "lm_loss": 0.005035400390625, "loss": 0.0074, "step": 3685, "total_loss": 0.005035400390625 }, { "epoch": 1.51, "learning_rate": 0.0001890013137589538, "lm_loss": 0.006103515625, "loss": 0.0098, "step": 3686, "total_loss": 0.006103515625 }, { "epoch": 1.51, "learning_rate": 0.00018899545709000025, "lm_loss": 0.004302978515625, "loss": 0.0062, "step": 3687, "total_loss": 0.004302978515625 }, { "epoch": 1.51, "learning_rate": 0.00018898959895294993, "lm_loss": 0.0087890625, "loss": 0.0073, "step": 3688, "total_loss": 0.0087890625 }, { "epoch": 1.51, "learning_rate": 0.00018898373934789952, "lm_loss": 0.004058837890625, "loss": 0.0066, "step": 3689, "total_loss": 0.004058837890625 }, { "epoch": 1.51, "learning_rate": 0.00018897787827494567, "lm_loss": 0.006256103515625, "loss": 0.0071, "step": 3690, "total_loss": 0.006256103515625 }, { "epoch": 1.51, "learning_rate": 0.00018897201573418504, "lm_loss": 0.01043701171875, "loss": 0.0069, "step": 3691, "total_loss": 0.01043701171875 }, { "epoch": 1.51, "learning_rate": 0.00018896615172571437, "lm_loss": 0.0032501220703125, "loss": 0.0091, "step": 3692, "total_loss": 0.0032501220703125 }, { "epoch": 1.51, "learning_rate": 0.00018896028624963037, "lm_loss": 0.0030059814453125, "loss": 0.0073, "step": 3693, "total_loss": 0.0030059814453125 }, { "epoch": 1.51, "learning_rate": 0.0001889544193060298, "lm_loss": 0.00836181640625, "loss": 0.0079, "step": 3694, "total_loss": 0.00836181640625 }, { "epoch": 1.51, "learning_rate": 0.00018894855089500947, "lm_loss": 0.0086669921875, "loss": 0.0089, "step": 3695, "total_loss": 0.0086669921875 }, { "epoch": 1.51, "learning_rate": 0.00018894268101666615, "lm_loss": 0.0115966796875, "loss": 0.0091, "step": 3696, "total_loss": 0.0115966796875 }, { "epoch": 1.51, "learning_rate": 0.00018893680967109667, "lm_loss": 0.00537109375, "loss": 0.009, "step": 3697, "total_loss": 0.00537109375 }, { "epoch": 1.51, "learning_rate": 0.00018893093685839793, "lm_loss": 0.0034942626953125, "loss": 0.0077, "step": 3698, "total_loss": 0.0034942626953125 }, { "epoch": 1.51, "learning_rate": 0.0001889250625786668, "lm_loss": 0.005462646484375, "loss": 0.0089, "step": 3699, "total_loss": 0.005462646484375 }, { "epoch": 1.51, "learning_rate": 0.00018891918683200016, "lm_loss": 0.0068359375, "loss": 0.0103, "step": 3700, "total_loss": 0.0068359375 }, { "epoch": 1.51, "eval_lm_loss": 0.00946781411767006, "eval_loss": 0.009776020422577858, "eval_runtime": 44.1261, "eval_samples_per_second": 22.662, "eval_steps_per_second": 0.204, "eval_total_loss": 0.00946781411767006, "lm_loss": 0.0016632080078125, "step": 3700, "total_loss": 0.0016632080078125 }, { "epoch": 1.51, "learning_rate": 0.00018891330961849496, "lm_loss": 0.0064697265625, "loss": 0.0094, "step": 3701, "total_loss": 0.0064697265625 }, { "epoch": 1.51, "learning_rate": 0.0001889074309382481, "lm_loss": 0.006591796875, "loss": 0.0069, "step": 3702, "total_loss": 0.006591796875 }, { "epoch": 1.51, "learning_rate": 0.00018890155079135664, "lm_loss": 0.01123046875, "loss": 0.0094, "step": 3703, "total_loss": 0.01123046875 }, { "epoch": 1.51, "learning_rate": 0.00018889566917791752, "lm_loss": 0.006195068359375, "loss": 0.0084, "step": 3704, "total_loss": 0.006195068359375 }, { "epoch": 1.51, "learning_rate": 0.00018888978609802776, "lm_loss": 0.013916015625, "loss": 0.0084, "step": 3705, "total_loss": 0.013916015625 }, { "epoch": 1.52, "learning_rate": 0.00018888390155178447, "lm_loss": 0.005523681640625, "loss": 0.0065, "step": 3706, "total_loss": 0.005523681640625 }, { "epoch": 1.52, "learning_rate": 0.00018887801553928464, "lm_loss": 0.004608154296875, "loss": 0.0066, "step": 3707, "total_loss": 0.004608154296875 }, { "epoch": 1.52, "learning_rate": 0.00018887212806062548, "lm_loss": 0.006988525390625, "loss": 0.0078, "step": 3708, "total_loss": 0.006988525390625 }, { "epoch": 1.52, "learning_rate": 0.000188866239115904, "lm_loss": 0.00653076171875, "loss": 0.0081, "step": 3709, "total_loss": 0.00653076171875 }, { "epoch": 1.52, "learning_rate": 0.00018886034870521735, "lm_loss": 0.00156402587890625, "loss": 0.0096, "step": 3710, "total_loss": 0.00156402587890625 }, { "epoch": 1.52, "learning_rate": 0.0001888544568286628, "lm_loss": 0.00433349609375, "loss": 0.0065, "step": 3711, "total_loss": 0.00433349609375 }, { "epoch": 1.52, "learning_rate": 0.00018884856348633746, "lm_loss": 0.00762939453125, "loss": 0.0088, "step": 3712, "total_loss": 0.00762939453125 }, { "epoch": 1.52, "learning_rate": 0.00018884266867833858, "lm_loss": 0.006561279296875, "loss": 0.0072, "step": 3713, "total_loss": 0.006561279296875 }, { "epoch": 1.52, "learning_rate": 0.0001888367724047634, "lm_loss": 0.01220703125, "loss": 0.0082, "step": 3714, "total_loss": 0.01220703125 }, { "epoch": 1.52, "learning_rate": 0.00018883087466570916, "lm_loss": 0.00408935546875, "loss": 0.0057, "step": 3715, "total_loss": 0.00408935546875 }, { "epoch": 1.52, "learning_rate": 0.00018882497546127318, "lm_loss": 0.00799560546875, "loss": 0.0081, "step": 3716, "total_loss": 0.00799560546875 }, { "epoch": 1.52, "learning_rate": 0.00018881907479155276, "lm_loss": 0.00701904296875, "loss": 0.0069, "step": 3717, "total_loss": 0.00701904296875 }, { "epoch": 1.52, "learning_rate": 0.0001888131726566453, "lm_loss": 0.005035400390625, "loss": 0.0051, "step": 3718, "total_loss": 0.005035400390625 }, { "epoch": 1.52, "learning_rate": 0.00018880726905664804, "lm_loss": 0.01348876953125, "loss": 0.0077, "step": 3719, "total_loss": 0.01348876953125 }, { "epoch": 1.52, "learning_rate": 0.00018880136399165845, "lm_loss": 0.009033203125, "loss": 0.0061, "step": 3720, "total_loss": 0.009033203125 }, { "epoch": 1.52, "learning_rate": 0.00018879545746177396, "lm_loss": 0.0098876953125, "loss": 0.0075, "step": 3721, "total_loss": 0.0098876953125 }, { "epoch": 1.52, "learning_rate": 0.00018878954946709197, "lm_loss": 0.0126953125, "loss": 0.0081, "step": 3722, "total_loss": 0.0126953125 }, { "epoch": 1.52, "learning_rate": 0.00018878364000770993, "lm_loss": 0.006591796875, "loss": 0.0071, "step": 3723, "total_loss": 0.006591796875 }, { "epoch": 1.52, "learning_rate": 0.00018877772908372536, "lm_loss": 0.0050048828125, "loss": 0.0064, "step": 3724, "total_loss": 0.0050048828125 }, { "epoch": 1.52, "learning_rate": 0.00018877181669523573, "lm_loss": 0.00811767578125, "loss": 0.0075, "step": 3725, "total_loss": 0.00811767578125 }, { "epoch": 1.52, "learning_rate": 0.0001887659028423386, "lm_loss": 0.0079345703125, "loss": 0.008, "step": 3726, "total_loss": 0.0079345703125 }, { "epoch": 1.52, "learning_rate": 0.0001887599875251315, "lm_loss": 0.006195068359375, "loss": 0.006, "step": 3727, "total_loss": 0.006195068359375 }, { "epoch": 1.52, "learning_rate": 0.00018875407074371207, "lm_loss": 0.01507568359375, "loss": 0.0068, "step": 3728, "total_loss": 0.01507568359375 }, { "epoch": 1.52, "learning_rate": 0.00018874815249817783, "lm_loss": 0.00830078125, "loss": 0.0079, "step": 3729, "total_loss": 0.00830078125 }, { "epoch": 1.52, "learning_rate": 0.00018874223278862647, "lm_loss": 0.013916015625, "loss": 0.0077, "step": 3730, "total_loss": 0.013916015625 }, { "epoch": 1.53, "learning_rate": 0.00018873631161515564, "lm_loss": 0.004974365234375, "loss": 0.0077, "step": 3731, "total_loss": 0.004974365234375 }, { "epoch": 1.53, "learning_rate": 0.00018873038897786299, "lm_loss": 0.006500244140625, "loss": 0.0065, "step": 3732, "total_loss": 0.006500244140625 }, { "epoch": 1.53, "learning_rate": 0.00018872446487684625, "lm_loss": 0.006500244140625, "loss": 0.0064, "step": 3733, "total_loss": 0.006500244140625 }, { "epoch": 1.53, "learning_rate": 0.00018871853931220312, "lm_loss": 0.01007080078125, "loss": 0.0063, "step": 3734, "total_loss": 0.01007080078125 }, { "epoch": 1.53, "learning_rate": 0.00018871261228403134, "lm_loss": 0.005584716796875, "loss": 0.0076, "step": 3735, "total_loss": 0.005584716796875 }, { "epoch": 1.53, "learning_rate": 0.00018870668379242873, "lm_loss": 0.00732421875, "loss": 0.0057, "step": 3736, "total_loss": 0.00732421875 }, { "epoch": 1.53, "learning_rate": 0.00018870075383749306, "lm_loss": 0.00604248046875, "loss": 0.0076, "step": 3737, "total_loss": 0.00604248046875 }, { "epoch": 1.53, "learning_rate": 0.00018869482241932217, "lm_loss": 0.01031494140625, "loss": 0.0095, "step": 3738, "total_loss": 0.01031494140625 }, { "epoch": 1.53, "learning_rate": 0.00018868888953801387, "lm_loss": 0.00830078125, "loss": 0.0077, "step": 3739, "total_loss": 0.00830078125 }, { "epoch": 1.53, "learning_rate": 0.00018868295519366605, "lm_loss": 0.0029144287109375, "loss": 0.0084, "step": 3740, "total_loss": 0.0029144287109375 }, { "epoch": 1.53, "learning_rate": 0.00018867701938637664, "lm_loss": 0.00372314453125, "loss": 0.0054, "step": 3741, "total_loss": 0.00372314453125 }, { "epoch": 1.53, "learning_rate": 0.0001886710821162435, "lm_loss": 0.01031494140625, "loss": 0.0064, "step": 3742, "total_loss": 0.01031494140625 }, { "epoch": 1.53, "learning_rate": 0.0001886651433833646, "lm_loss": 0.0023193359375, "loss": 0.0071, "step": 3743, "total_loss": 0.0023193359375 }, { "epoch": 1.53, "learning_rate": 0.00018865920318783793, "lm_loss": 0.009521484375, "loss": 0.0067, "step": 3744, "total_loss": 0.009521484375 }, { "epoch": 1.53, "learning_rate": 0.00018865326152976141, "lm_loss": 0.0093994140625, "loss": 0.0055, "step": 3745, "total_loss": 0.0093994140625 }, { "epoch": 1.53, "learning_rate": 0.00018864731840923315, "lm_loss": 0.007537841796875, "loss": 0.0079, "step": 3746, "total_loss": 0.007537841796875 }, { "epoch": 1.53, "learning_rate": 0.00018864137382635114, "lm_loss": 0.0166015625, "loss": 0.0066, "step": 3747, "total_loss": 0.0166015625 }, { "epoch": 1.53, "learning_rate": 0.00018863542778121343, "lm_loss": 0.004638671875, "loss": 0.0077, "step": 3748, "total_loss": 0.004638671875 }, { "epoch": 1.53, "learning_rate": 0.00018862948027391813, "lm_loss": 0.007781982421875, "loss": 0.0064, "step": 3749, "total_loss": 0.007781982421875 }, { "epoch": 1.53, "learning_rate": 0.00018862353130456337, "lm_loss": 0.01220703125, "loss": 0.0078, "step": 3750, "total_loss": 0.01220703125 }, { "epoch": 1.53, "learning_rate": 0.00018861758087324723, "lm_loss": 0.008544921875, "loss": 0.0072, "step": 3751, "total_loss": 0.008544921875 }, { "epoch": 1.53, "learning_rate": 0.0001886116289800679, "lm_loss": 0.004547119140625, "loss": 0.0051, "step": 3752, "total_loss": 0.004547119140625 }, { "epoch": 1.53, "learning_rate": 0.00018860567562512358, "lm_loss": 0.00946044921875, "loss": 0.0065, "step": 3753, "total_loss": 0.00946044921875 }, { "epoch": 1.53, "learning_rate": 0.0001885997208085125, "lm_loss": 0.004791259765625, "loss": 0.0062, "step": 3754, "total_loss": 0.004791259765625 }, { "epoch": 1.54, "learning_rate": 0.00018859376453033282, "lm_loss": 0.0084228515625, "loss": 0.008, "step": 3755, "total_loss": 0.0084228515625 }, { "epoch": 1.54, "learning_rate": 0.00018858780679068283, "lm_loss": 0.00689697265625, "loss": 0.0079, "step": 3756, "total_loss": 0.00689697265625 }, { "epoch": 1.54, "learning_rate": 0.00018858184758966084, "lm_loss": 0.0035858154296875, "loss": 0.0064, "step": 3757, "total_loss": 0.0035858154296875 }, { "epoch": 1.54, "learning_rate": 0.00018857588692736512, "lm_loss": 0.01446533203125, "loss": 0.0069, "step": 3758, "total_loss": 0.01446533203125 }, { "epoch": 1.54, "learning_rate": 0.000188569924803894, "lm_loss": 0.005767822265625, "loss": 0.007, "step": 3759, "total_loss": 0.005767822265625 }, { "epoch": 1.54, "learning_rate": 0.00018856396121934586, "lm_loss": 0.0064697265625, "loss": 0.0077, "step": 3760, "total_loss": 0.0064697265625 }, { "epoch": 1.54, "learning_rate": 0.00018855799617381903, "lm_loss": 0.0076904296875, "loss": 0.0078, "step": 3761, "total_loss": 0.0076904296875 }, { "epoch": 1.54, "learning_rate": 0.00018855202966741198, "lm_loss": 0.0101318359375, "loss": 0.0074, "step": 3762, "total_loss": 0.0101318359375 }, { "epoch": 1.54, "learning_rate": 0.00018854606170022308, "lm_loss": 0.0025787353515625, "loss": 0.0063, "step": 3763, "total_loss": 0.0025787353515625 }, { "epoch": 1.54, "learning_rate": 0.00018854009227235082, "lm_loss": 0.004364013671875, "loss": 0.0069, "step": 3764, "total_loss": 0.004364013671875 }, { "epoch": 1.54, "learning_rate": 0.0001885341213838936, "lm_loss": 0.0013885498046875, "loss": 0.0093, "step": 3765, "total_loss": 0.0013885498046875 }, { "epoch": 1.54, "learning_rate": 0.00018852814903494997, "lm_loss": 0.0027313232421875, "loss": 0.0061, "step": 3766, "total_loss": 0.0027313232421875 }, { "epoch": 1.54, "learning_rate": 0.0001885221752256185, "lm_loss": 0.0023193359375, "loss": 0.0061, "step": 3767, "total_loss": 0.0023193359375 }, { "epoch": 1.54, "learning_rate": 0.00018851619995599762, "lm_loss": 0.016357421875, "loss": 0.0075, "step": 3768, "total_loss": 0.016357421875 }, { "epoch": 1.54, "learning_rate": 0.000188510223226186, "lm_loss": 0.00836181640625, "loss": 0.0067, "step": 3769, "total_loss": 0.00836181640625 }, { "epoch": 1.54, "learning_rate": 0.0001885042450362822, "lm_loss": 0.00836181640625, "loss": 0.0076, "step": 3770, "total_loss": 0.00836181640625 }, { "epoch": 1.54, "learning_rate": 0.00018849826538638485, "lm_loss": 0.005157470703125, "loss": 0.0072, "step": 3771, "total_loss": 0.005157470703125 }, { "epoch": 1.54, "learning_rate": 0.00018849228427659257, "lm_loss": 0.0045166015625, "loss": 0.0065, "step": 3772, "total_loss": 0.0045166015625 }, { "epoch": 1.54, "learning_rate": 0.00018848630170700402, "lm_loss": 0.0028228759765625, "loss": 0.0058, "step": 3773, "total_loss": 0.0028228759765625 }, { "epoch": 1.54, "learning_rate": 0.0001884803176777179, "lm_loss": 0.006134033203125, "loss": 0.0072, "step": 3774, "total_loss": 0.006134033203125 }, { "epoch": 1.54, "learning_rate": 0.00018847433218883294, "lm_loss": 0.0069580078125, "loss": 0.0078, "step": 3775, "total_loss": 0.0069580078125 }, { "epoch": 1.54, "learning_rate": 0.00018846834524044787, "lm_loss": 0.006622314453125, "loss": 0.0071, "step": 3776, "total_loss": 0.006622314453125 }, { "epoch": 1.54, "learning_rate": 0.00018846235683266148, "lm_loss": 0.004302978515625, "loss": 0.0054, "step": 3777, "total_loss": 0.004302978515625 }, { "epoch": 1.54, "learning_rate": 0.00018845636696557247, "lm_loss": 0.004913330078125, "loss": 0.0077, "step": 3778, "total_loss": 0.004913330078125 }, { "epoch": 1.54, "learning_rate": 0.00018845037563927975, "lm_loss": 0.009033203125, "loss": 0.0104, "step": 3779, "total_loss": 0.009033203125 }, { "epoch": 1.55, "learning_rate": 0.00018844438285388207, "lm_loss": 0.006866455078125, "loss": 0.0081, "step": 3780, "total_loss": 0.006866455078125 }, { "epoch": 1.55, "learning_rate": 0.00018843838860947836, "lm_loss": 0.00958251953125, "loss": 0.0075, "step": 3781, "total_loss": 0.00958251953125 }, { "epoch": 1.55, "learning_rate": 0.0001884323929061675, "lm_loss": 0.00408935546875, "loss": 0.0058, "step": 3782, "total_loss": 0.00408935546875 }, { "epoch": 1.55, "learning_rate": 0.00018842639574404832, "lm_loss": 0.005584716796875, "loss": 0.0072, "step": 3783, "total_loss": 0.005584716796875 }, { "epoch": 1.55, "learning_rate": 0.00018842039712321985, "lm_loss": 0.00445556640625, "loss": 0.0088, "step": 3784, "total_loss": 0.00445556640625 }, { "epoch": 1.55, "learning_rate": 0.00018841439704378097, "lm_loss": 0.00701904296875, "loss": 0.0074, "step": 3785, "total_loss": 0.00701904296875 }, { "epoch": 1.55, "learning_rate": 0.00018840839550583068, "lm_loss": 0.008544921875, "loss": 0.0073, "step": 3786, "total_loss": 0.008544921875 }, { "epoch": 1.55, "learning_rate": 0.00018840239250946802, "lm_loss": 0.0106201171875, "loss": 0.0067, "step": 3787, "total_loss": 0.0106201171875 }, { "epoch": 1.55, "learning_rate": 0.00018839638805479196, "lm_loss": 0.00726318359375, "loss": 0.009, "step": 3788, "total_loss": 0.00726318359375 }, { "epoch": 1.55, "learning_rate": 0.0001883903821419016, "lm_loss": 0.005279541015625, "loss": 0.0074, "step": 3789, "total_loss": 0.005279541015625 }, { "epoch": 1.55, "learning_rate": 0.00018838437477089598, "lm_loss": 0.007354736328125, "loss": 0.0069, "step": 3790, "total_loss": 0.007354736328125 }, { "epoch": 1.55, "learning_rate": 0.00018837836594187422, "lm_loss": 0.00921630859375, "loss": 0.0093, "step": 3791, "total_loss": 0.00921630859375 }, { "epoch": 1.55, "learning_rate": 0.00018837235565493543, "lm_loss": 0.0118408203125, "loss": 0.0066, "step": 3792, "total_loss": 0.0118408203125 }, { "epoch": 1.55, "learning_rate": 0.00018836634391017877, "lm_loss": 0.007598876953125, "loss": 0.0076, "step": 3793, "total_loss": 0.007598876953125 }, { "epoch": 1.55, "learning_rate": 0.0001883603307077034, "lm_loss": 0.00830078125, "loss": 0.0074, "step": 3794, "total_loss": 0.00830078125 }, { "epoch": 1.55, "learning_rate": 0.00018835431604760853, "lm_loss": 0.009765625, "loss": 0.0087, "step": 3795, "total_loss": 0.009765625 }, { "epoch": 1.55, "learning_rate": 0.00018834829992999338, "lm_loss": 0.0028533935546875, "loss": 0.0056, "step": 3796, "total_loss": 0.0028533935546875 }, { "epoch": 1.55, "learning_rate": 0.00018834228235495716, "lm_loss": 0.010009765625, "loss": 0.008, "step": 3797, "total_loss": 0.010009765625 }, { "epoch": 1.55, "learning_rate": 0.00018833626332259918, "lm_loss": 0.00994873046875, "loss": 0.0061, "step": 3798, "total_loss": 0.00994873046875 }, { "epoch": 1.55, "learning_rate": 0.0001883302428330187, "lm_loss": 0.006378173828125, "loss": 0.0069, "step": 3799, "total_loss": 0.006378173828125 }, { "epoch": 1.55, "learning_rate": 0.00018832422088631507, "lm_loss": 0.002593994140625, "loss": 0.0094, "step": 3800, "total_loss": 0.002593994140625 }, { "epoch": 1.55, "eval_lm_loss": 0.00954235065728426, "eval_loss": 0.009984311647713184, "eval_runtime": 43.8645, "eval_samples_per_second": 22.797, "eval_steps_per_second": 0.205, "eval_total_loss": 0.00954235065728426, "lm_loss": 0.0020599365234375, "step": 3800, "total_loss": 0.0020599365234375 }, { "epoch": 1.55, "learning_rate": 0.0001883181974825876, "lm_loss": 0.006011962890625, "loss": 0.0074, "step": 3801, "total_loss": 0.006011962890625 }, { "epoch": 1.55, "learning_rate": 0.00018831217262193568, "lm_loss": 0.005859375, "loss": 0.01, "step": 3802, "total_loss": 0.005859375 }, { "epoch": 1.55, "learning_rate": 0.00018830614630445866, "lm_loss": 0.008056640625, "loss": 0.0079, "step": 3803, "total_loss": 0.008056640625 }, { "epoch": 1.56, "learning_rate": 0.000188300118530256, "lm_loss": 0.005950927734375, "loss": 0.0075, "step": 3804, "total_loss": 0.005950927734375 }, { "epoch": 1.56, "learning_rate": 0.0001882940892994271, "lm_loss": 0.004730224609375, "loss": 0.0084, "step": 3805, "total_loss": 0.004730224609375 }, { "epoch": 1.56, "learning_rate": 0.0001882880586120714, "lm_loss": 0.00433349609375, "loss": 0.0072, "step": 3806, "total_loss": 0.00433349609375 }, { "epoch": 1.56, "learning_rate": 0.00018828202646828848, "lm_loss": 0.0091552734375, "loss": 0.0072, "step": 3807, "total_loss": 0.0091552734375 }, { "epoch": 1.56, "learning_rate": 0.00018827599286817774, "lm_loss": 0.004119873046875, "loss": 0.0066, "step": 3808, "total_loss": 0.004119873046875 }, { "epoch": 1.56, "learning_rate": 0.00018826995781183876, "lm_loss": 0.007080078125, "loss": 0.0069, "step": 3809, "total_loss": 0.007080078125 }, { "epoch": 1.56, "learning_rate": 0.00018826392129937109, "lm_loss": 0.0113525390625, "loss": 0.0077, "step": 3810, "total_loss": 0.0113525390625 }, { "epoch": 1.56, "learning_rate": 0.00018825788333087432, "lm_loss": 0.006866455078125, "loss": 0.0057, "step": 3811, "total_loss": 0.006866455078125 }, { "epoch": 1.56, "learning_rate": 0.00018825184390644804, "lm_loss": 0.006927490234375, "loss": 0.008, "step": 3812, "total_loss": 0.006927490234375 }, { "epoch": 1.56, "learning_rate": 0.00018824580302619185, "lm_loss": 0.0059814453125, "loss": 0.0082, "step": 3813, "total_loss": 0.0059814453125 }, { "epoch": 1.56, "learning_rate": 0.0001882397606902055, "lm_loss": 0.0037994384765625, "loss": 0.0091, "step": 3814, "total_loss": 0.0037994384765625 }, { "epoch": 1.56, "learning_rate": 0.00018823371689858856, "lm_loss": 0.005828857421875, "loss": 0.0072, "step": 3815, "total_loss": 0.005828857421875 }, { "epoch": 1.56, "learning_rate": 0.0001882276716514408, "lm_loss": 0.0021514892578125, "loss": 0.0071, "step": 3816, "total_loss": 0.0021514892578125 }, { "epoch": 1.56, "learning_rate": 0.0001882216249488619, "lm_loss": 0.01123046875, "loss": 0.0074, "step": 3817, "total_loss": 0.01123046875 }, { "epoch": 1.56, "learning_rate": 0.0001882155767909516, "lm_loss": 0.0203857421875, "loss": 0.0082, "step": 3818, "total_loss": 0.0203857421875 }, { "epoch": 1.56, "learning_rate": 0.00018820952717780976, "lm_loss": 0.00933837890625, "loss": 0.0097, "step": 3819, "total_loss": 0.00933837890625 }, { "epoch": 1.56, "learning_rate": 0.00018820347610953606, "lm_loss": 0.0034332275390625, "loss": 0.007, "step": 3820, "total_loss": 0.0034332275390625 }, { "epoch": 1.56, "learning_rate": 0.0001881974235862304, "lm_loss": 0.00537109375, "loss": 0.0082, "step": 3821, "total_loss": 0.00537109375 }, { "epoch": 1.56, "learning_rate": 0.00018819136960799258, "lm_loss": 0.0087890625, "loss": 0.0073, "step": 3822, "total_loss": 0.0087890625 }, { "epoch": 1.56, "learning_rate": 0.00018818531417492254, "lm_loss": 0.00384521484375, "loss": 0.0054, "step": 3823, "total_loss": 0.00384521484375 }, { "epoch": 1.56, "learning_rate": 0.00018817925728712006, "lm_loss": 0.0032958984375, "loss": 0.0097, "step": 3824, "total_loss": 0.0032958984375 }, { "epoch": 1.56, "learning_rate": 0.00018817319894468514, "lm_loss": 0.01025390625, "loss": 0.0084, "step": 3825, "total_loss": 0.01025390625 }, { "epoch": 1.56, "learning_rate": 0.0001881671391477177, "lm_loss": 0.0064697265625, "loss": 0.0071, "step": 3826, "total_loss": 0.0064697265625 }, { "epoch": 1.56, "learning_rate": 0.00018816107789631768, "lm_loss": 0.01806640625, "loss": 0.0087, "step": 3827, "total_loss": 0.01806640625 }, { "epoch": 1.57, "learning_rate": 0.00018815501519058508, "lm_loss": 0.0076904296875, "loss": 0.0079, "step": 3828, "total_loss": 0.0076904296875 }, { "epoch": 1.57, "learning_rate": 0.00018814895103061994, "lm_loss": 0.0078125, "loss": 0.0076, "step": 3829, "total_loss": 0.0078125 }, { "epoch": 1.57, "learning_rate": 0.00018814288541652227, "lm_loss": 0.006927490234375, "loss": 0.0075, "step": 3830, "total_loss": 0.006927490234375 }, { "epoch": 1.57, "learning_rate": 0.00018813681834839216, "lm_loss": 0.0128173828125, "loss": 0.0074, "step": 3831, "total_loss": 0.0128173828125 }, { "epoch": 1.57, "learning_rate": 0.00018813074982632966, "lm_loss": 0.008056640625, "loss": 0.006, "step": 3832, "total_loss": 0.008056640625 }, { "epoch": 1.57, "learning_rate": 0.0001881246798504349, "lm_loss": 0.005096435546875, "loss": 0.008, "step": 3833, "total_loss": 0.005096435546875 }, { "epoch": 1.57, "learning_rate": 0.00018811860842080796, "lm_loss": 0.004180908203125, "loss": 0.006, "step": 3834, "total_loss": 0.004180908203125 }, { "epoch": 1.57, "learning_rate": 0.00018811253553754908, "lm_loss": 0.005767822265625, "loss": 0.0089, "step": 3835, "total_loss": 0.005767822265625 }, { "epoch": 1.57, "learning_rate": 0.00018810646120075836, "lm_loss": 0.006805419921875, "loss": 0.0067, "step": 3836, "total_loss": 0.006805419921875 }, { "epoch": 1.57, "learning_rate": 0.00018810038541053608, "lm_loss": 0.0032501220703125, "loss": 0.0078, "step": 3837, "total_loss": 0.0032501220703125 }, { "epoch": 1.57, "learning_rate": 0.00018809430816698242, "lm_loss": 0.0106201171875, "loss": 0.0067, "step": 3838, "total_loss": 0.0106201171875 }, { "epoch": 1.57, "learning_rate": 0.00018808822947019764, "lm_loss": 0.006744384765625, "loss": 0.0093, "step": 3839, "total_loss": 0.006744384765625 }, { "epoch": 1.57, "learning_rate": 0.000188082149320282, "lm_loss": 0.00177001953125, "loss": 0.007, "step": 3840, "total_loss": 0.00177001953125 }, { "epoch": 1.57, "learning_rate": 0.00018807606771733583, "lm_loss": 0.00408935546875, "loss": 0.0092, "step": 3841, "total_loss": 0.00408935546875 }, { "epoch": 1.57, "learning_rate": 0.00018806998466145943, "lm_loss": 0.004150390625, "loss": 0.0053, "step": 3842, "total_loss": 0.004150390625 }, { "epoch": 1.57, "learning_rate": 0.0001880639001527532, "lm_loss": 0.006988525390625, "loss": 0.0105, "step": 3843, "total_loss": 0.006988525390625 }, { "epoch": 1.57, "learning_rate": 0.00018805781419131744, "lm_loss": 0.006805419921875, "loss": 0.0071, "step": 3844, "total_loss": 0.006805419921875 }, { "epoch": 1.57, "learning_rate": 0.00018805172677725262, "lm_loss": 0.005645751953125, "loss": 0.0081, "step": 3845, "total_loss": 0.005645751953125 }, { "epoch": 1.57, "learning_rate": 0.00018804563791065908, "lm_loss": 0.0059814453125, "loss": 0.009, "step": 3846, "total_loss": 0.0059814453125 }, { "epoch": 1.57, "learning_rate": 0.0001880395475916373, "lm_loss": 0.00244140625, "loss": 0.0082, "step": 3847, "total_loss": 0.00244140625 }, { "epoch": 1.57, "learning_rate": 0.00018803345582028779, "lm_loss": 0.00131988525390625, "loss": 0.0062, "step": 3848, "total_loss": 0.00131988525390625 }, { "epoch": 1.57, "learning_rate": 0.00018802736259671096, "lm_loss": 0.00531005859375, "loss": 0.0077, "step": 3849, "total_loss": 0.00531005859375 }, { "epoch": 1.57, "learning_rate": 0.0001880212679210074, "lm_loss": 0.008056640625, "loss": 0.0069, "step": 3850, "total_loss": 0.008056640625 }, { "epoch": 1.57, "learning_rate": 0.00018801517179327756, "lm_loss": 0.0031585693359375, "loss": 0.0069, "step": 3851, "total_loss": 0.0031585693359375 }, { "epoch": 1.57, "learning_rate": 0.0001880090742136221, "lm_loss": 0.01104736328125, "loss": 0.0063, "step": 3852, "total_loss": 0.01104736328125 }, { "epoch": 1.58, "learning_rate": 0.00018800297518214157, "lm_loss": 0.009033203125, "loss": 0.0083, "step": 3853, "total_loss": 0.009033203125 }, { "epoch": 1.58, "learning_rate": 0.00018799687469893658, "lm_loss": 0.00457763671875, "loss": 0.0068, "step": 3854, "total_loss": 0.00457763671875 }, { "epoch": 1.58, "learning_rate": 0.00018799077276410777, "lm_loss": 0.00726318359375, "loss": 0.0058, "step": 3855, "total_loss": 0.00726318359375 }, { "epoch": 1.58, "learning_rate": 0.0001879846693777558, "lm_loss": 0.006927490234375, "loss": 0.0087, "step": 3856, "total_loss": 0.006927490234375 }, { "epoch": 1.58, "learning_rate": 0.00018797856453998133, "lm_loss": 0.006011962890625, "loss": 0.0077, "step": 3857, "total_loss": 0.006011962890625 }, { "epoch": 1.58, "learning_rate": 0.00018797245825088508, "lm_loss": 0.0031280517578125, "loss": 0.0075, "step": 3858, "total_loss": 0.0031280517578125 }, { "epoch": 1.58, "learning_rate": 0.00018796635051056778, "lm_loss": 0.0037841796875, "loss": 0.0075, "step": 3859, "total_loss": 0.0037841796875 }, { "epoch": 1.58, "learning_rate": 0.00018796024131913024, "lm_loss": 0.00726318359375, "loss": 0.0067, "step": 3860, "total_loss": 0.00726318359375 }, { "epoch": 1.58, "learning_rate": 0.00018795413067667315, "lm_loss": 0.0126953125, "loss": 0.0081, "step": 3861, "total_loss": 0.0126953125 }, { "epoch": 1.58, "learning_rate": 0.00018794801858329735, "lm_loss": 0.003082275390625, "loss": 0.0078, "step": 3862, "total_loss": 0.003082275390625 }, { "epoch": 1.58, "learning_rate": 0.0001879419050391037, "lm_loss": 0.01251220703125, "loss": 0.0085, "step": 3863, "total_loss": 0.01251220703125 }, { "epoch": 1.58, "learning_rate": 0.00018793579004419299, "lm_loss": 0.0030975341796875, "loss": 0.0052, "step": 3864, "total_loss": 0.0030975341796875 }, { "epoch": 1.58, "learning_rate": 0.00018792967359866617, "lm_loss": 0.0074462890625, "loss": 0.0074, "step": 3865, "total_loss": 0.0074462890625 }, { "epoch": 1.58, "learning_rate": 0.00018792355570262405, "lm_loss": 0.0068359375, "loss": 0.0072, "step": 3866, "total_loss": 0.0068359375 }, { "epoch": 1.58, "learning_rate": 0.00018791743635616763, "lm_loss": 0.0037994384765625, "loss": 0.008, "step": 3867, "total_loss": 0.0037994384765625 }, { "epoch": 1.58, "learning_rate": 0.0001879113155593978, "lm_loss": 0.006988525390625, "loss": 0.0081, "step": 3868, "total_loss": 0.006988525390625 }, { "epoch": 1.58, "learning_rate": 0.0001879051933124156, "lm_loss": 0.012939453125, "loss": 0.009, "step": 3869, "total_loss": 0.012939453125 }, { "epoch": 1.58, "learning_rate": 0.00018789906961532191, "lm_loss": 0.006500244140625, "loss": 0.0063, "step": 3870, "total_loss": 0.006500244140625 }, { "epoch": 1.58, "learning_rate": 0.00018789294446821785, "lm_loss": 0.0162353515625, "loss": 0.0066, "step": 3871, "total_loss": 0.0162353515625 }, { "epoch": 1.58, "learning_rate": 0.00018788681787120445, "lm_loss": 0.004364013671875, "loss": 0.0077, "step": 3872, "total_loss": 0.004364013671875 }, { "epoch": 1.58, "learning_rate": 0.00018788068982438275, "lm_loss": 0.0130615234375, "loss": 0.0078, "step": 3873, "total_loss": 0.0130615234375 }, { "epoch": 1.58, "learning_rate": 0.00018787456032785384, "lm_loss": 0.0038909912109375, "loss": 0.0078, "step": 3874, "total_loss": 0.0038909912109375 }, { "epoch": 1.58, "learning_rate": 0.00018786842938171884, "lm_loss": 0.005615234375, "loss": 0.0072, "step": 3875, "total_loss": 0.005615234375 }, { "epoch": 1.58, "learning_rate": 0.00018786229698607892, "lm_loss": 0.0052490234375, "loss": 0.0055, "step": 3876, "total_loss": 0.0052490234375 }, { "epoch": 1.59, "learning_rate": 0.00018785616314103516, "lm_loss": 0.0098876953125, "loss": 0.0077, "step": 3877, "total_loss": 0.0098876953125 }, { "epoch": 1.59, "learning_rate": 0.00018785002784668883, "lm_loss": 0.00677490234375, "loss": 0.0078, "step": 3878, "total_loss": 0.00677490234375 }, { "epoch": 1.59, "learning_rate": 0.0001878438911031411, "lm_loss": 0.0093994140625, "loss": 0.0077, "step": 3879, "total_loss": 0.0093994140625 }, { "epoch": 1.59, "learning_rate": 0.0001878377529104932, "lm_loss": 0.003448486328125, "loss": 0.0075, "step": 3880, "total_loss": 0.003448486328125 }, { "epoch": 1.59, "learning_rate": 0.00018783161326884642, "lm_loss": 0.006011962890625, "loss": 0.0078, "step": 3881, "total_loss": 0.006011962890625 }, { "epoch": 1.59, "learning_rate": 0.000187825472178302, "lm_loss": 0.00982666015625, "loss": 0.0074, "step": 3882, "total_loss": 0.00982666015625 }, { "epoch": 1.59, "learning_rate": 0.00018781932963896127, "lm_loss": 0.004547119140625, "loss": 0.0068, "step": 3883, "total_loss": 0.004547119140625 }, { "epoch": 1.59, "learning_rate": 0.00018781318565092557, "lm_loss": 0.0057373046875, "loss": 0.0085, "step": 3884, "total_loss": 0.0057373046875 }, { "epoch": 1.59, "learning_rate": 0.0001878070402142962, "lm_loss": 0.004547119140625, "loss": 0.0077, "step": 3885, "total_loss": 0.004547119140625 }, { "epoch": 1.59, "learning_rate": 0.00018780089332917462, "lm_loss": 0.004669189453125, "loss": 0.0063, "step": 3886, "total_loss": 0.004669189453125 }, { "epoch": 1.59, "learning_rate": 0.00018779474499566214, "lm_loss": 0.00823974609375, "loss": 0.008, "step": 3887, "total_loss": 0.00823974609375 }, { "epoch": 1.59, "learning_rate": 0.00018778859521386025, "lm_loss": 0.00506591796875, "loss": 0.0063, "step": 3888, "total_loss": 0.00506591796875 }, { "epoch": 1.59, "learning_rate": 0.00018778244398387037, "lm_loss": 0.0038299560546875, "loss": 0.0058, "step": 3889, "total_loss": 0.0038299560546875 }, { "epoch": 1.59, "learning_rate": 0.000187776291305794, "lm_loss": 0.006591796875, "loss": 0.0112, "step": 3890, "total_loss": 0.006591796875 }, { "epoch": 1.59, "learning_rate": 0.0001877701371797326, "lm_loss": 0.00555419921875, "loss": 0.0079, "step": 3891, "total_loss": 0.00555419921875 }, { "epoch": 1.59, "learning_rate": 0.00018776398160578773, "lm_loss": 0.005035400390625, "loss": 0.0074, "step": 3892, "total_loss": 0.005035400390625 }, { "epoch": 1.59, "learning_rate": 0.0001877578245840609, "lm_loss": 0.0031280517578125, "loss": 0.0064, "step": 3893, "total_loss": 0.0031280517578125 }, { "epoch": 1.59, "learning_rate": 0.0001877516661146537, "lm_loss": 0.0142822265625, "loss": 0.0073, "step": 3894, "total_loss": 0.0142822265625 }, { "epoch": 1.59, "learning_rate": 0.0001877455061976677, "lm_loss": 0.01544189453125, "loss": 0.007, "step": 3895, "total_loss": 0.01544189453125 }, { "epoch": 1.59, "learning_rate": 0.00018773934483320452, "lm_loss": 0.00096893310546875, "loss": 0.0061, "step": 3896, "total_loss": 0.00096893310546875 }, { "epoch": 1.59, "learning_rate": 0.00018773318202136582, "lm_loss": 0.004119873046875, "loss": 0.0092, "step": 3897, "total_loss": 0.004119873046875 }, { "epoch": 1.59, "learning_rate": 0.00018772701776225326, "lm_loss": 0.007354736328125, "loss": 0.0085, "step": 3898, "total_loss": 0.007354736328125 }, { "epoch": 1.59, "learning_rate": 0.0001877208520559685, "lm_loss": 0.0091552734375, "loss": 0.0068, "step": 3899, "total_loss": 0.0091552734375 }, { "epoch": 1.59, "learning_rate": 0.0001877146849026133, "lm_loss": 0.00531005859375, "loss": 0.0067, "step": 3900, "total_loss": 0.00531005859375 }, { "epoch": 1.59, "eval_lm_loss": 0.009079032577574253, "eval_loss": 0.009434985928237438, "eval_runtime": 43.9242, "eval_samples_per_second": 22.766, "eval_steps_per_second": 0.205, "eval_total_loss": 0.009079032577574253, "lm_loss": 0.00119781494140625, "step": 3900, "total_loss": 0.00119781494140625 }, { "epoch": 1.59, "learning_rate": 0.0001877085163022893, "lm_loss": 0.0087890625, "loss": 0.0084, "step": 3901, "total_loss": 0.0087890625 }, { "epoch": 1.6, "learning_rate": 0.0001877023462550984, "lm_loss": 0.006866455078125, "loss": 0.0059, "step": 3902, "total_loss": 0.006866455078125 }, { "epoch": 1.6, "learning_rate": 0.00018769617476114227, "lm_loss": 0.0087890625, "loss": 0.0089, "step": 3903, "total_loss": 0.0087890625 }, { "epoch": 1.6, "learning_rate": 0.00018769000182052278, "lm_loss": 0.003509521484375, "loss": 0.0068, "step": 3904, "total_loss": 0.003509521484375 }, { "epoch": 1.6, "learning_rate": 0.00018768382743334172, "lm_loss": 0.00848388671875, "loss": 0.0072, "step": 3905, "total_loss": 0.00848388671875 }, { "epoch": 1.6, "learning_rate": 0.00018767765159970095, "lm_loss": 0.01318359375, "loss": 0.0075, "step": 3906, "total_loss": 0.01318359375 }, { "epoch": 1.6, "learning_rate": 0.00018767147431970237, "lm_loss": 0.00909423828125, "loss": 0.0073, "step": 3907, "total_loss": 0.00909423828125 }, { "epoch": 1.6, "learning_rate": 0.00018766529559344788, "lm_loss": 0.0059814453125, "loss": 0.0072, "step": 3908, "total_loss": 0.0059814453125 }, { "epoch": 1.6, "learning_rate": 0.00018765911542103938, "lm_loss": 0.004486083984375, "loss": 0.0059, "step": 3909, "total_loss": 0.004486083984375 }, { "epoch": 1.6, "learning_rate": 0.00018765293380257884, "lm_loss": 0.01116943359375, "loss": 0.0073, "step": 3910, "total_loss": 0.01116943359375 }, { "epoch": 1.6, "learning_rate": 0.00018764675073816824, "lm_loss": 0.0147705078125, "loss": 0.007, "step": 3911, "total_loss": 0.0147705078125 }, { "epoch": 1.6, "learning_rate": 0.00018764056622790957, "lm_loss": 0.003875732421875, "loss": 0.0081, "step": 3912, "total_loss": 0.003875732421875 }, { "epoch": 1.6, "learning_rate": 0.00018763438027190484, "lm_loss": 0.00811767578125, "loss": 0.0082, "step": 3913, "total_loss": 0.00811767578125 }, { "epoch": 1.6, "learning_rate": 0.00018762819287025613, "lm_loss": 0.007080078125, "loss": 0.0088, "step": 3914, "total_loss": 0.007080078125 }, { "epoch": 1.6, "learning_rate": 0.00018762200402306546, "lm_loss": 0.0020751953125, "loss": 0.0063, "step": 3915, "total_loss": 0.0020751953125 }, { "epoch": 1.6, "learning_rate": 0.00018761581373043495, "lm_loss": 0.0032958984375, "loss": 0.0079, "step": 3916, "total_loss": 0.0032958984375 }, { "epoch": 1.6, "learning_rate": 0.00018760962199246674, "lm_loss": 0.0091552734375, "loss": 0.0082, "step": 3917, "total_loss": 0.0091552734375 }, { "epoch": 1.6, "learning_rate": 0.00018760342880926295, "lm_loss": 0.0030517578125, "loss": 0.0072, "step": 3918, "total_loss": 0.0030517578125 }, { "epoch": 1.6, "learning_rate": 0.0001875972341809257, "lm_loss": 0.0052490234375, "loss": 0.0094, "step": 3919, "total_loss": 0.0052490234375 }, { "epoch": 1.6, "learning_rate": 0.00018759103810755726, "lm_loss": 0.00970458984375, "loss": 0.0075, "step": 3920, "total_loss": 0.00970458984375 }, { "epoch": 1.6, "learning_rate": 0.0001875848405892598, "lm_loss": 0.0093994140625, "loss": 0.0074, "step": 3921, "total_loss": 0.0093994140625 }, { "epoch": 1.6, "learning_rate": 0.0001875786416261355, "lm_loss": 0.0048828125, "loss": 0.0051, "step": 3922, "total_loss": 0.0048828125 }, { "epoch": 1.6, "learning_rate": 0.00018757244121828673, "lm_loss": 0.01300048828125, "loss": 0.0083, "step": 3923, "total_loss": 0.01300048828125 }, { "epoch": 1.6, "learning_rate": 0.00018756623936581573, "lm_loss": 0.00537109375, "loss": 0.0085, "step": 3924, "total_loss": 0.00537109375 }, { "epoch": 1.6, "learning_rate": 0.00018756003606882477, "lm_loss": 0.01458740234375, "loss": 0.0097, "step": 3925, "total_loss": 0.01458740234375 }, { "epoch": 1.61, "learning_rate": 0.00018755383132741622, "lm_loss": 0.0048828125, "loss": 0.0061, "step": 3926, "total_loss": 0.0048828125 }, { "epoch": 1.61, "learning_rate": 0.0001875476251416924, "lm_loss": 0.00360107421875, "loss": 0.007, "step": 3927, "total_loss": 0.00360107421875 }, { "epoch": 1.61, "learning_rate": 0.00018754141751175577, "lm_loss": 0.006134033203125, "loss": 0.0072, "step": 3928, "total_loss": 0.006134033203125 }, { "epoch": 1.61, "learning_rate": 0.00018753520843770867, "lm_loss": 0.0150146484375, "loss": 0.0069, "step": 3929, "total_loss": 0.0150146484375 }, { "epoch": 1.61, "learning_rate": 0.0001875289979196535, "lm_loss": 0.01214599609375, "loss": 0.0075, "step": 3930, "total_loss": 0.01214599609375 }, { "epoch": 1.61, "learning_rate": 0.00018752278595769278, "lm_loss": 0.0084228515625, "loss": 0.009, "step": 3931, "total_loss": 0.0084228515625 }, { "epoch": 1.61, "learning_rate": 0.00018751657255192895, "lm_loss": 0.0093994140625, "loss": 0.0091, "step": 3932, "total_loss": 0.0093994140625 }, { "epoch": 1.61, "learning_rate": 0.0001875103577024645, "lm_loss": 0.0079345703125, "loss": 0.0077, "step": 3933, "total_loss": 0.0079345703125 }, { "epoch": 1.61, "learning_rate": 0.00018750414140940197, "lm_loss": 0.00347900390625, "loss": 0.006, "step": 3934, "total_loss": 0.00347900390625 }, { "epoch": 1.61, "learning_rate": 0.00018749792367284387, "lm_loss": 0.005706787109375, "loss": 0.0097, "step": 3935, "total_loss": 0.005706787109375 }, { "epoch": 1.61, "learning_rate": 0.0001874917044928928, "lm_loss": 0.01123046875, "loss": 0.0076, "step": 3936, "total_loss": 0.01123046875 }, { "epoch": 1.61, "learning_rate": 0.00018748548386965136, "lm_loss": 0.003509521484375, "loss": 0.0057, "step": 3937, "total_loss": 0.003509521484375 }, { "epoch": 1.61, "learning_rate": 0.00018747926180322217, "lm_loss": 0.0133056640625, "loss": 0.0089, "step": 3938, "total_loss": 0.0133056640625 }, { "epoch": 1.61, "learning_rate": 0.00018747303829370787, "lm_loss": 0.007537841796875, "loss": 0.0072, "step": 3939, "total_loss": 0.007537841796875 }, { "epoch": 1.61, "learning_rate": 0.00018746681334121107, "lm_loss": 0.004852294921875, "loss": 0.0102, "step": 3940, "total_loss": 0.004852294921875 }, { "epoch": 1.61, "learning_rate": 0.00018746058694583452, "lm_loss": 0.0079345703125, "loss": 0.0077, "step": 3941, "total_loss": 0.0079345703125 }, { "epoch": 1.61, "learning_rate": 0.00018745435910768094, "lm_loss": 0.00299072265625, "loss": 0.0095, "step": 3942, "total_loss": 0.00299072265625 }, { "epoch": 1.61, "learning_rate": 0.00018744812982685302, "lm_loss": 0.004547119140625, "loss": 0.008, "step": 3943, "total_loss": 0.004547119140625 }, { "epoch": 1.61, "learning_rate": 0.00018744189910345352, "lm_loss": 0.00665283203125, "loss": 0.0082, "step": 3944, "total_loss": 0.00665283203125 }, { "epoch": 1.61, "learning_rate": 0.00018743566693758527, "lm_loss": 0.003997802734375, "loss": 0.0076, "step": 3945, "total_loss": 0.003997802734375 }, { "epoch": 1.61, "learning_rate": 0.00018742943332935105, "lm_loss": 0.01007080078125, "loss": 0.0083, "step": 3946, "total_loss": 0.01007080078125 }, { "epoch": 1.61, "learning_rate": 0.00018742319827885371, "lm_loss": 0.005340576171875, "loss": 0.0068, "step": 3947, "total_loss": 0.005340576171875 }, { "epoch": 1.61, "learning_rate": 0.00018741696178619608, "lm_loss": 0.007476806640625, "loss": 0.0057, "step": 3948, "total_loss": 0.007476806640625 }, { "epoch": 1.61, "learning_rate": 0.00018741072385148103, "lm_loss": 0.0079345703125, "loss": 0.0071, "step": 3949, "total_loss": 0.0079345703125 }, { "epoch": 1.61, "learning_rate": 0.00018740448447481152, "lm_loss": 0.005279541015625, "loss": 0.007, "step": 3950, "total_loss": 0.005279541015625 }, { "epoch": 1.62, "learning_rate": 0.0001873982436562904, "lm_loss": 0.00154876708984375, "loss": 0.0079, "step": 3951, "total_loss": 0.00154876708984375 }, { "epoch": 1.62, "learning_rate": 0.00018739200139602068, "lm_loss": 0.005279541015625, "loss": 0.0085, "step": 3952, "total_loss": 0.005279541015625 }, { "epoch": 1.62, "learning_rate": 0.0001873857576941053, "lm_loss": 0.00640869140625, "loss": 0.0068, "step": 3953, "total_loss": 0.00640869140625 }, { "epoch": 1.62, "learning_rate": 0.0001873795125506473, "lm_loss": 0.0031890869140625, "loss": 0.0061, "step": 3954, "total_loss": 0.0031890869140625 }, { "epoch": 1.62, "learning_rate": 0.00018737326596574965, "lm_loss": 0.0023651123046875, "loss": 0.0068, "step": 3955, "total_loss": 0.0023651123046875 }, { "epoch": 1.62, "learning_rate": 0.0001873670179395154, "lm_loss": 0.00927734375, "loss": 0.0065, "step": 3956, "total_loss": 0.00927734375 }, { "epoch": 1.62, "learning_rate": 0.00018736076847204768, "lm_loss": 0.006317138671875, "loss": 0.0091, "step": 3957, "total_loss": 0.006317138671875 }, { "epoch": 1.62, "learning_rate": 0.00018735451756344955, "lm_loss": 0.00823974609375, "loss": 0.0093, "step": 3958, "total_loss": 0.00823974609375 }, { "epoch": 1.62, "learning_rate": 0.00018734826521382407, "lm_loss": 0.0091552734375, "loss": 0.0084, "step": 3959, "total_loss": 0.0091552734375 }, { "epoch": 1.62, "learning_rate": 0.00018734201142327445, "lm_loss": 0.0067138671875, "loss": 0.0066, "step": 3960, "total_loss": 0.0067138671875 }, { "epoch": 1.62, "learning_rate": 0.00018733575619190383, "lm_loss": 0.005218505859375, "loss": 0.0083, "step": 3961, "total_loss": 0.005218505859375 }, { "epoch": 1.62, "learning_rate": 0.00018732949951981543, "lm_loss": 0.0133056640625, "loss": 0.0084, "step": 3962, "total_loss": 0.0133056640625 }, { "epoch": 1.62, "learning_rate": 0.00018732324140711238, "lm_loss": 0.004730224609375, "loss": 0.0062, "step": 3963, "total_loss": 0.004730224609375 }, { "epoch": 1.62, "learning_rate": 0.000187316981853898, "lm_loss": 0.004638671875, "loss": 0.0067, "step": 3964, "total_loss": 0.004638671875 }, { "epoch": 1.62, "learning_rate": 0.00018731072086027555, "lm_loss": 0.00787353515625, "loss": 0.0071, "step": 3965, "total_loss": 0.00787353515625 }, { "epoch": 1.62, "learning_rate": 0.00018730445842634824, "lm_loss": 0.00433349609375, "loss": 0.008, "step": 3966, "total_loss": 0.00433349609375 }, { "epoch": 1.62, "learning_rate": 0.00018729819455221944, "lm_loss": 0.002685546875, "loss": 0.0063, "step": 3967, "total_loss": 0.002685546875 }, { "epoch": 1.62, "learning_rate": 0.00018729192923799245, "lm_loss": 0.0146484375, "loss": 0.0074, "step": 3968, "total_loss": 0.0146484375 }, { "epoch": 1.62, "learning_rate": 0.00018728566248377065, "lm_loss": 0.00787353515625, "loss": 0.0078, "step": 3969, "total_loss": 0.00787353515625 }, { "epoch": 1.62, "learning_rate": 0.0001872793942896574, "lm_loss": 0.00439453125, "loss": 0.0058, "step": 3970, "total_loss": 0.00439453125 }, { "epoch": 1.62, "learning_rate": 0.00018727312465575608, "lm_loss": 0.0101318359375, "loss": 0.0067, "step": 3971, "total_loss": 0.0101318359375 }, { "epoch": 1.62, "learning_rate": 0.00018726685358217018, "lm_loss": 0.006134033203125, "loss": 0.0069, "step": 3972, "total_loss": 0.006134033203125 }, { "epoch": 1.62, "learning_rate": 0.00018726058106900307, "lm_loss": 0.006439208984375, "loss": 0.0072, "step": 3973, "total_loss": 0.006439208984375 }, { "epoch": 1.62, "learning_rate": 0.00018725430711635828, "lm_loss": 0.008544921875, "loss": 0.0075, "step": 3974, "total_loss": 0.008544921875 }, { "epoch": 1.63, "learning_rate": 0.0001872480317243393, "lm_loss": 0.00830078125, "loss": 0.0078, "step": 3975, "total_loss": 0.00830078125 }, { "epoch": 1.63, "learning_rate": 0.00018724175489304962, "lm_loss": 0.00921630859375, "loss": 0.0064, "step": 3976, "total_loss": 0.00921630859375 }, { "epoch": 1.63, "learning_rate": 0.00018723547662259285, "lm_loss": 0.01007080078125, "loss": 0.0069, "step": 3977, "total_loss": 0.01007080078125 }, { "epoch": 1.63, "learning_rate": 0.00018722919691307248, "lm_loss": 0.006744384765625, "loss": 0.0068, "step": 3978, "total_loss": 0.006744384765625 }, { "epoch": 1.63, "learning_rate": 0.00018722291576459216, "lm_loss": 0.00555419921875, "loss": 0.0081, "step": 3979, "total_loss": 0.00555419921875 }, { "epoch": 1.63, "learning_rate": 0.00018721663317725545, "lm_loss": 0.00970458984375, "loss": 0.0082, "step": 3980, "total_loss": 0.00970458984375 }, { "epoch": 1.63, "learning_rate": 0.00018721034915116603, "lm_loss": 0.0057373046875, "loss": 0.0077, "step": 3981, "total_loss": 0.0057373046875 }, { "epoch": 1.63, "learning_rate": 0.00018720406368642758, "lm_loss": 0.00970458984375, "loss": 0.0071, "step": 3982, "total_loss": 0.00970458984375 }, { "epoch": 1.63, "learning_rate": 0.00018719777678314375, "lm_loss": 0.00946044921875, "loss": 0.0096, "step": 3983, "total_loss": 0.00946044921875 }, { "epoch": 1.63, "learning_rate": 0.00018719148844141827, "lm_loss": 0.0050048828125, "loss": 0.0072, "step": 3984, "total_loss": 0.0050048828125 }, { "epoch": 1.63, "learning_rate": 0.00018718519866135487, "lm_loss": 0.00848388671875, "loss": 0.0077, "step": 3985, "total_loss": 0.00848388671875 }, { "epoch": 1.63, "learning_rate": 0.0001871789074430573, "lm_loss": 0.0022125244140625, "loss": 0.0067, "step": 3986, "total_loss": 0.0022125244140625 }, { "epoch": 1.63, "learning_rate": 0.00018717261478662934, "lm_loss": 0.0174560546875, "loss": 0.0084, "step": 3987, "total_loss": 0.0174560546875 }, { "epoch": 1.63, "learning_rate": 0.0001871663206921748, "lm_loss": 0.0067138671875, "loss": 0.0078, "step": 3988, "total_loss": 0.0067138671875 }, { "epoch": 1.63, "learning_rate": 0.00018716002515979755, "lm_loss": 0.00445556640625, "loss": 0.0071, "step": 3989, "total_loss": 0.00445556640625 }, { "epoch": 1.63, "learning_rate": 0.00018715372818960135, "lm_loss": 0.01300048828125, "loss": 0.0076, "step": 3990, "total_loss": 0.01300048828125 }, { "epoch": 1.63, "learning_rate": 0.00018714742978169017, "lm_loss": 0.009033203125, "loss": 0.0083, "step": 3991, "total_loss": 0.009033203125 }, { "epoch": 1.63, "learning_rate": 0.00018714112993616783, "lm_loss": 0.0081787109375, "loss": 0.0069, "step": 3992, "total_loss": 0.0081787109375 }, { "epoch": 1.63, "learning_rate": 0.00018713482865313834, "lm_loss": 0.00860595703125, "loss": 0.0084, "step": 3993, "total_loss": 0.00860595703125 }, { "epoch": 1.63, "learning_rate": 0.0001871285259327056, "lm_loss": 0.00994873046875, "loss": 0.0072, "step": 3994, "total_loss": 0.00994873046875 }, { "epoch": 1.63, "learning_rate": 0.0001871222217749736, "lm_loss": 0.00372314453125, "loss": 0.006, "step": 3995, "total_loss": 0.00372314453125 }, { "epoch": 1.63, "learning_rate": 0.00018711591618004628, "lm_loss": 0.0125732421875, "loss": 0.007, "step": 3996, "total_loss": 0.0125732421875 }, { "epoch": 1.63, "learning_rate": 0.00018710960914802773, "lm_loss": 0.01104736328125, "loss": 0.008, "step": 3997, "total_loss": 0.01104736328125 }, { "epoch": 1.63, "learning_rate": 0.00018710330067902195, "lm_loss": 0.0024261474609375, "loss": 0.0084, "step": 3998, "total_loss": 0.0024261474609375 }, { "epoch": 1.63, "learning_rate": 0.00018709699077313304, "lm_loss": 0.0103759765625, "loss": 0.0074, "step": 3999, "total_loss": 0.0103759765625 }, { "epoch": 1.64, "learning_rate": 0.00018709067943046507, "lm_loss": 0.00628662109375, "loss": 0.0075, "step": 4000, "total_loss": 0.00628662109375 }, { "epoch": 1.64, "eval_lm_loss": 0.009657489135861397, "eval_loss": 0.010090351104736328, "eval_runtime": 44.1481, "eval_samples_per_second": 22.651, "eval_steps_per_second": 0.204, "eval_total_loss": 0.009657489135861397, "lm_loss": 0.00113677978515625, "step": 4000, "total_loss": 0.00113677978515625 }, { "epoch": 1.64, "learning_rate": 0.00018708436665112213, "lm_loss": 0.00335693359375, "loss": 0.0064, "step": 4001, "total_loss": 0.00335693359375 }, { "epoch": 1.64, "learning_rate": 0.00018707805243520838, "lm_loss": 0.00994873046875, "loss": 0.0082, "step": 4002, "total_loss": 0.00994873046875 }, { "epoch": 1.64, "learning_rate": 0.00018707173678282802, "lm_loss": 0.01141357421875, "loss": 0.0073, "step": 4003, "total_loss": 0.01141357421875 }, { "epoch": 1.64, "learning_rate": 0.0001870654196940852, "lm_loss": 0.01080322265625, "loss": 0.007, "step": 4004, "total_loss": 0.01080322265625 }, { "epoch": 1.64, "learning_rate": 0.00018705910116908407, "lm_loss": 0.00445556640625, "loss": 0.0062, "step": 4005, "total_loss": 0.00445556640625 }, { "epoch": 1.64, "learning_rate": 0.00018705278120792896, "lm_loss": 0.00372314453125, "loss": 0.0069, "step": 4006, "total_loss": 0.00372314453125 }, { "epoch": 1.64, "learning_rate": 0.0001870464598107241, "lm_loss": 0.006500244140625, "loss": 0.0073, "step": 4007, "total_loss": 0.006500244140625 }, { "epoch": 1.64, "learning_rate": 0.0001870401369775737, "lm_loss": 0.00677490234375, "loss": 0.007, "step": 4008, "total_loss": 0.00677490234375 }, { "epoch": 1.64, "learning_rate": 0.00018703381270858218, "lm_loss": 0.0103759765625, "loss": 0.009, "step": 4009, "total_loss": 0.0103759765625 }, { "epoch": 1.64, "learning_rate": 0.00018702748700385376, "lm_loss": 0.006591796875, "loss": 0.0079, "step": 4010, "total_loss": 0.006591796875 }, { "epoch": 1.64, "learning_rate": 0.00018702115986349286, "lm_loss": 0.00836181640625, "loss": 0.0059, "step": 4011, "total_loss": 0.00836181640625 }, { "epoch": 1.64, "learning_rate": 0.00018701483128760383, "lm_loss": 0.00811767578125, "loss": 0.0083, "step": 4012, "total_loss": 0.00811767578125 }, { "epoch": 1.64, "learning_rate": 0.00018700850127629105, "lm_loss": 0.00482177734375, "loss": 0.0076, "step": 4013, "total_loss": 0.00482177734375 }, { "epoch": 1.64, "learning_rate": 0.00018700216982965899, "lm_loss": 0.004547119140625, "loss": 0.0071, "step": 4014, "total_loss": 0.004547119140625 }, { "epoch": 1.64, "learning_rate": 0.00018699583694781205, "lm_loss": 0.00872802734375, "loss": 0.0101, "step": 4015, "total_loss": 0.00872802734375 }, { "epoch": 1.64, "learning_rate": 0.0001869895026308547, "lm_loss": 0.0034027099609375, "loss": 0.0067, "step": 4016, "total_loss": 0.0034027099609375 }, { "epoch": 1.64, "learning_rate": 0.00018698316687889148, "lm_loss": 0.00787353515625, "loss": 0.0073, "step": 4017, "total_loss": 0.00787353515625 }, { "epoch": 1.64, "learning_rate": 0.00018697682969202685, "lm_loss": 0.00775146484375, "loss": 0.008, "step": 4018, "total_loss": 0.00775146484375 }, { "epoch": 1.64, "learning_rate": 0.0001869704910703654, "lm_loss": 0.00799560546875, "loss": 0.0069, "step": 4019, "total_loss": 0.00799560546875 }, { "epoch": 1.64, "learning_rate": 0.00018696415101401165, "lm_loss": 0.00787353515625, "loss": 0.0085, "step": 4020, "total_loss": 0.00787353515625 }, { "epoch": 1.64, "learning_rate": 0.00018695780952307023, "lm_loss": 0.00482177734375, "loss": 0.007, "step": 4021, "total_loss": 0.00482177734375 }, { "epoch": 1.64, "learning_rate": 0.00018695146659764567, "lm_loss": 0.002410888671875, "loss": 0.008, "step": 4022, "total_loss": 0.002410888671875 }, { "epoch": 1.64, "learning_rate": 0.00018694512223784273, "lm_loss": 0.0091552734375, "loss": 0.0076, "step": 4023, "total_loss": 0.0091552734375 }, { "epoch": 1.65, "learning_rate": 0.00018693877644376596, "lm_loss": 0.00390625, "loss": 0.0074, "step": 4024, "total_loss": 0.00390625 }, { "epoch": 1.65, "learning_rate": 0.00018693242921552012, "lm_loss": 0.005401611328125, "loss": 0.0082, "step": 4025, "total_loss": 0.005401611328125 }, { "epoch": 1.65, "learning_rate": 0.00018692608055320985, "lm_loss": 0.01007080078125, "loss": 0.009, "step": 4026, "total_loss": 0.01007080078125 }, { "epoch": 1.65, "learning_rate": 0.0001869197304569399, "lm_loss": 0.0048828125, "loss": 0.0074, "step": 4027, "total_loss": 0.0048828125 }, { "epoch": 1.65, "learning_rate": 0.00018691337892681506, "lm_loss": 0.01055908203125, "loss": 0.0087, "step": 4028, "total_loss": 0.01055908203125 }, { "epoch": 1.65, "learning_rate": 0.00018690702596294007, "lm_loss": 0.003326416015625, "loss": 0.0082, "step": 4029, "total_loss": 0.003326416015625 }, { "epoch": 1.65, "learning_rate": 0.00018690067156541975, "lm_loss": 0.00689697265625, "loss": 0.0075, "step": 4030, "total_loss": 0.00689697265625 }, { "epoch": 1.65, "learning_rate": 0.00018689431573435888, "lm_loss": 0.0078125, "loss": 0.0067, "step": 4031, "total_loss": 0.0078125 }, { "epoch": 1.65, "learning_rate": 0.00018688795846986234, "lm_loss": 0.0091552734375, "loss": 0.0088, "step": 4032, "total_loss": 0.0091552734375 }, { "epoch": 1.65, "learning_rate": 0.00018688159977203506, "lm_loss": 0.01080322265625, "loss": 0.009, "step": 4033, "total_loss": 0.01080322265625 }, { "epoch": 1.65, "learning_rate": 0.00018687523964098184, "lm_loss": 0.00701904296875, "loss": 0.0056, "step": 4034, "total_loss": 0.00701904296875 }, { "epoch": 1.65, "learning_rate": 0.0001868688780768076, "lm_loss": 0.01031494140625, "loss": 0.0079, "step": 4035, "total_loss": 0.01031494140625 }, { "epoch": 1.65, "learning_rate": 0.00018686251507961737, "lm_loss": 0.007293701171875, "loss": 0.0075, "step": 4036, "total_loss": 0.007293701171875 }, { "epoch": 1.65, "learning_rate": 0.00018685615064951608, "lm_loss": 0.01123046875, "loss": 0.0072, "step": 4037, "total_loss": 0.01123046875 }, { "epoch": 1.65, "learning_rate": 0.00018684978478660864, "lm_loss": 0.005157470703125, "loss": 0.0081, "step": 4038, "total_loss": 0.005157470703125 }, { "epoch": 1.65, "learning_rate": 0.00018684341749100017, "lm_loss": 0.005950927734375, "loss": 0.0088, "step": 4039, "total_loss": 0.005950927734375 }, { "epoch": 1.65, "learning_rate": 0.00018683704876279567, "lm_loss": 0.01251220703125, "loss": 0.0079, "step": 4040, "total_loss": 0.01251220703125 }, { "epoch": 1.65, "learning_rate": 0.00018683067860210018, "lm_loss": 0.005340576171875, "loss": 0.0066, "step": 4041, "total_loss": 0.005340576171875 }, { "epoch": 1.65, "learning_rate": 0.00018682430700901878, "lm_loss": 0.00262451171875, "loss": 0.0074, "step": 4042, "total_loss": 0.00262451171875 }, { "epoch": 1.65, "learning_rate": 0.00018681793398365662, "lm_loss": 0.006683349609375, "loss": 0.006, "step": 4043, "total_loss": 0.006683349609375 }, { "epoch": 1.65, "learning_rate": 0.00018681155952611882, "lm_loss": 0.01202392578125, "loss": 0.0076, "step": 4044, "total_loss": 0.01202392578125 }, { "epoch": 1.65, "learning_rate": 0.00018680518363651052, "lm_loss": 0.005584716796875, "loss": 0.0088, "step": 4045, "total_loss": 0.005584716796875 }, { "epoch": 1.65, "learning_rate": 0.0001867988063149369, "lm_loss": 0.004180908203125, "loss": 0.0055, "step": 4046, "total_loss": 0.004180908203125 }, { "epoch": 1.65, "learning_rate": 0.00018679242756150313, "lm_loss": 0.007476806640625, "loss": 0.0062, "step": 4047, "total_loss": 0.007476806640625 }, { "epoch": 1.65, "learning_rate": 0.0001867860473763145, "lm_loss": 0.01068115234375, "loss": 0.0092, "step": 4048, "total_loss": 0.01068115234375 }, { "epoch": 1.66, "learning_rate": 0.0001867796657594762, "lm_loss": 0.01123046875, "loss": 0.0078, "step": 4049, "total_loss": 0.01123046875 }, { "epoch": 1.66, "learning_rate": 0.00018677328271109357, "lm_loss": 0.00830078125, "loss": 0.0093, "step": 4050, "total_loss": 0.00830078125 }, { "epoch": 1.66, "learning_rate": 0.00018676689823127185, "lm_loss": 0.0098876953125, "loss": 0.0077, "step": 4051, "total_loss": 0.0098876953125 }, { "epoch": 1.66, "learning_rate": 0.0001867605123201164, "lm_loss": 0.0038909912109375, "loss": 0.0074, "step": 4052, "total_loss": 0.0038909912109375 }, { "epoch": 1.66, "learning_rate": 0.00018675412497773253, "lm_loss": 0.005157470703125, "loss": 0.0076, "step": 4053, "total_loss": 0.005157470703125 }, { "epoch": 1.66, "learning_rate": 0.00018674773620422564, "lm_loss": 0.0078125, "loss": 0.0061, "step": 4054, "total_loss": 0.0078125 }, { "epoch": 1.66, "learning_rate": 0.0001867413459997011, "lm_loss": 0.009521484375, "loss": 0.0092, "step": 4055, "total_loss": 0.009521484375 }, { "epoch": 1.66, "learning_rate": 0.0001867349543642643, "lm_loss": 0.0038909912109375, "loss": 0.0074, "step": 4056, "total_loss": 0.0038909912109375 }, { "epoch": 1.66, "learning_rate": 0.0001867285612980207, "lm_loss": 0.01324462890625, "loss": 0.0066, "step": 4057, "total_loss": 0.01324462890625 }, { "epoch": 1.66, "learning_rate": 0.00018672216680107578, "lm_loss": 0.00726318359375, "loss": 0.0077, "step": 4058, "total_loss": 0.00726318359375 }, { "epoch": 1.66, "learning_rate": 0.000186715770873535, "lm_loss": 0.0111083984375, "loss": 0.0093, "step": 4059, "total_loss": 0.0111083984375 }, { "epoch": 1.66, "learning_rate": 0.00018670937351550392, "lm_loss": 0.00830078125, "loss": 0.0075, "step": 4060, "total_loss": 0.00830078125 }, { "epoch": 1.66, "learning_rate": 0.000186702974727088, "lm_loss": 0.004669189453125, "loss": 0.0075, "step": 4061, "total_loss": 0.004669189453125 }, { "epoch": 1.66, "learning_rate": 0.00018669657450839287, "lm_loss": 0.00494384765625, "loss": 0.0069, "step": 4062, "total_loss": 0.00494384765625 }, { "epoch": 1.66, "learning_rate": 0.00018669017285952405, "lm_loss": 0.0084228515625, "loss": 0.0065, "step": 4063, "total_loss": 0.0084228515625 }, { "epoch": 1.66, "learning_rate": 0.00018668376978058716, "lm_loss": 0.0018768310546875, "loss": 0.0056, "step": 4064, "total_loss": 0.0018768310546875 }, { "epoch": 1.66, "learning_rate": 0.00018667736527168784, "lm_loss": 0.006103515625, "loss": 0.006, "step": 4065, "total_loss": 0.006103515625 }, { "epoch": 1.66, "learning_rate": 0.00018667095933293175, "lm_loss": 0.005645751953125, "loss": 0.0073, "step": 4066, "total_loss": 0.005645751953125 }, { "epoch": 1.66, "learning_rate": 0.0001866645519644245, "lm_loss": 0.0050048828125, "loss": 0.0062, "step": 4067, "total_loss": 0.0050048828125 }, { "epoch": 1.66, "learning_rate": 0.00018665814316627192, "lm_loss": 0.00567626953125, "loss": 0.0078, "step": 4068, "total_loss": 0.00567626953125 }, { "epoch": 1.66, "learning_rate": 0.00018665173293857956, "lm_loss": 0.0033416748046875, "loss": 0.0075, "step": 4069, "total_loss": 0.0033416748046875 }, { "epoch": 1.66, "learning_rate": 0.00018664532128145328, "lm_loss": 0.004364013671875, "loss": 0.0082, "step": 4070, "total_loss": 0.004364013671875 }, { "epoch": 1.66, "learning_rate": 0.00018663890819499884, "lm_loss": 0.01177978515625, "loss": 0.0085, "step": 4071, "total_loss": 0.01177978515625 }, { "epoch": 1.66, "learning_rate": 0.000186632493679322, "lm_loss": 0.0098876953125, "loss": 0.0076, "step": 4072, "total_loss": 0.0098876953125 }, { "epoch": 1.67, "learning_rate": 0.0001866260777345286, "lm_loss": 0.00860595703125, "loss": 0.0077, "step": 4073, "total_loss": 0.00860595703125 }, { "epoch": 1.67, "learning_rate": 0.00018661966036072445, "lm_loss": 0.012939453125, "loss": 0.0066, "step": 4074, "total_loss": 0.012939453125 }, { "epoch": 1.67, "learning_rate": 0.00018661324155801542, "lm_loss": 0.0091552734375, "loss": 0.0094, "step": 4075, "total_loss": 0.0091552734375 }, { "epoch": 1.67, "learning_rate": 0.00018660682132650745, "lm_loss": 0.0086669921875, "loss": 0.0062, "step": 4076, "total_loss": 0.0086669921875 }, { "epoch": 1.67, "learning_rate": 0.00018660039966630638, "lm_loss": 0.007171630859375, "loss": 0.0067, "step": 4077, "total_loss": 0.007171630859375 }, { "epoch": 1.67, "learning_rate": 0.0001865939765775182, "lm_loss": 0.005859375, "loss": 0.0062, "step": 4078, "total_loss": 0.005859375 }, { "epoch": 1.67, "learning_rate": 0.0001865875520602488, "lm_loss": 0.006561279296875, "loss": 0.0065, "step": 4079, "total_loss": 0.006561279296875 }, { "epoch": 1.67, "learning_rate": 0.0001865811261146042, "lm_loss": 0.006317138671875, "loss": 0.0067, "step": 4080, "total_loss": 0.006317138671875 }, { "epoch": 1.67, "learning_rate": 0.00018657469874069045, "lm_loss": 0.0087890625, "loss": 0.0077, "step": 4081, "total_loss": 0.0087890625 }, { "epoch": 1.67, "learning_rate": 0.0001865682699386135, "lm_loss": 0.00921630859375, "loss": 0.0083, "step": 4082, "total_loss": 0.00921630859375 }, { "epoch": 1.67, "learning_rate": 0.00018656183970847944, "lm_loss": 0.00396728515625, "loss": 0.0077, "step": 4083, "total_loss": 0.00396728515625 }, { "epoch": 1.67, "learning_rate": 0.00018655540805039434, "lm_loss": 0.005462646484375, "loss": 0.0059, "step": 4084, "total_loss": 0.005462646484375 }, { "epoch": 1.67, "learning_rate": 0.0001865489749644643, "lm_loss": 0.005706787109375, "loss": 0.0062, "step": 4085, "total_loss": 0.005706787109375 }, { "epoch": 1.67, "learning_rate": 0.00018654254045079544, "lm_loss": 0.0025177001953125, "loss": 0.0066, "step": 4086, "total_loss": 0.0025177001953125 }, { "epoch": 1.67, "learning_rate": 0.0001865361045094939, "lm_loss": 0.007568359375, "loss": 0.0074, "step": 4087, "total_loss": 0.007568359375 }, { "epoch": 1.67, "learning_rate": 0.00018652966714066585, "lm_loss": 0.008056640625, "loss": 0.0074, "step": 4088, "total_loss": 0.008056640625 }, { "epoch": 1.67, "learning_rate": 0.0001865232283444175, "lm_loss": 0.003997802734375, "loss": 0.0063, "step": 4089, "total_loss": 0.003997802734375 }, { "epoch": 1.67, "learning_rate": 0.00018651678812085505, "lm_loss": 0.01153564453125, "loss": 0.0074, "step": 4090, "total_loss": 0.01153564453125 }, { "epoch": 1.67, "learning_rate": 0.00018651034647008475, "lm_loss": 0.0157470703125, "loss": 0.0073, "step": 4091, "total_loss": 0.0157470703125 }, { "epoch": 1.67, "learning_rate": 0.00018650390339221284, "lm_loss": 0.013671875, "loss": 0.0088, "step": 4092, "total_loss": 0.013671875 }, { "epoch": 1.67, "learning_rate": 0.00018649745888734564, "lm_loss": 0.006256103515625, "loss": 0.0078, "step": 4093, "total_loss": 0.006256103515625 }, { "epoch": 1.67, "learning_rate": 0.00018649101295558943, "lm_loss": 0.0030364990234375, "loss": 0.0071, "step": 4094, "total_loss": 0.0030364990234375 }, { "epoch": 1.67, "learning_rate": 0.00018648456559705058, "lm_loss": 0.004425048828125, "loss": 0.0072, "step": 4095, "total_loss": 0.004425048828125 }, { "epoch": 1.67, "learning_rate": 0.00018647811681183542, "lm_loss": 0.00897216796875, "loss": 0.0094, "step": 4096, "total_loss": 0.00897216796875 }, { "epoch": 1.67, "learning_rate": 0.00018647166660005034, "lm_loss": 0.0089111328125, "loss": 0.0074, "step": 4097, "total_loss": 0.0089111328125 }, { "epoch": 1.68, "learning_rate": 0.00018646521496180172, "lm_loss": 0.00787353515625, "loss": 0.0065, "step": 4098, "total_loss": 0.00787353515625 }, { "epoch": 1.68, "learning_rate": 0.00018645876189719602, "lm_loss": 0.0030975341796875, "loss": 0.0077, "step": 4099, "total_loss": 0.0030975341796875 }, { "epoch": 1.68, "learning_rate": 0.0001864523074063397, "lm_loss": 0.0107421875, "loss": 0.0079, "step": 4100, "total_loss": 0.0107421875 }, { "epoch": 1.68, "eval_lm_loss": 0.009542959742248058, "eval_loss": 0.00987449660897255, "eval_runtime": 43.9699, "eval_samples_per_second": 22.743, "eval_steps_per_second": 0.205, "eval_total_loss": 0.009542959742248058, "lm_loss": 0.00131988525390625, "step": 4100, "total_loss": 0.00131988525390625 }, { "epoch": 1.68, "learning_rate": 0.00018644585148933922, "lm_loss": 0.005523681640625, "loss": 0.0067, "step": 4101, "total_loss": 0.005523681640625 }, { "epoch": 1.68, "learning_rate": 0.00018643939414630104, "lm_loss": 0.00970458984375, "loss": 0.0075, "step": 4102, "total_loss": 0.00970458984375 }, { "epoch": 1.68, "learning_rate": 0.00018643293537733176, "lm_loss": 0.004058837890625, "loss": 0.0065, "step": 4103, "total_loss": 0.004058837890625 }, { "epoch": 1.68, "learning_rate": 0.00018642647518253788, "lm_loss": 0.00518798828125, "loss": 0.0077, "step": 4104, "total_loss": 0.00518798828125 }, { "epoch": 1.68, "learning_rate": 0.00018642001356202595, "lm_loss": 0.01092529296875, "loss": 0.0083, "step": 4105, "total_loss": 0.01092529296875 }, { "epoch": 1.68, "learning_rate": 0.0001864135505159026, "lm_loss": 0.0091552734375, "loss": 0.0077, "step": 4106, "total_loss": 0.0091552734375 }, { "epoch": 1.68, "learning_rate": 0.00018640708604427444, "lm_loss": 0.0018310546875, "loss": 0.0061, "step": 4107, "total_loss": 0.0018310546875 }, { "epoch": 1.68, "learning_rate": 0.00018640062014724812, "lm_loss": 0.008056640625, "loss": 0.0071, "step": 4108, "total_loss": 0.008056640625 }, { "epoch": 1.68, "learning_rate": 0.00018639415282493025, "lm_loss": 0.006439208984375, "loss": 0.0071, "step": 4109, "total_loss": 0.006439208984375 }, { "epoch": 1.68, "learning_rate": 0.00018638768407742756, "lm_loss": 0.007232666015625, "loss": 0.0082, "step": 4110, "total_loss": 0.007232666015625 }, { "epoch": 1.68, "learning_rate": 0.00018638121390484677, "lm_loss": 0.0033416748046875, "loss": 0.0071, "step": 4111, "total_loss": 0.0033416748046875 }, { "epoch": 1.68, "learning_rate": 0.00018637474230729463, "lm_loss": 0.00445556640625, "loss": 0.0077, "step": 4112, "total_loss": 0.00445556640625 }, { "epoch": 1.68, "learning_rate": 0.0001863682692848778, "lm_loss": 0.002197265625, "loss": 0.0074, "step": 4113, "total_loss": 0.002197265625 }, { "epoch": 1.68, "learning_rate": 0.00018636179483770316, "lm_loss": 0.01251220703125, "loss": 0.0088, "step": 4114, "total_loss": 0.01251220703125 }, { "epoch": 1.68, "learning_rate": 0.00018635531896587747, "lm_loss": 0.00738525390625, "loss": 0.0074, "step": 4115, "total_loss": 0.00738525390625 }, { "epoch": 1.68, "learning_rate": 0.00018634884166950758, "lm_loss": 0.006134033203125, "loss": 0.0067, "step": 4116, "total_loss": 0.006134033203125 }, { "epoch": 1.68, "learning_rate": 0.00018634236294870032, "lm_loss": 0.0023193359375, "loss": 0.0058, "step": 4117, "total_loss": 0.0023193359375 }, { "epoch": 1.68, "learning_rate": 0.0001863358828035626, "lm_loss": 0.0101318359375, "loss": 0.0081, "step": 4118, "total_loss": 0.0101318359375 }, { "epoch": 1.68, "learning_rate": 0.00018632940123420127, "lm_loss": 0.003448486328125, "loss": 0.0074, "step": 4119, "total_loss": 0.003448486328125 }, { "epoch": 1.68, "learning_rate": 0.0001863229182407233, "lm_loss": 0.006988525390625, "loss": 0.0075, "step": 4120, "total_loss": 0.006988525390625 }, { "epoch": 1.68, "learning_rate": 0.0001863164338232356, "lm_loss": 0.0057373046875, "loss": 0.0067, "step": 4121, "total_loss": 0.0057373046875 }, { "epoch": 1.69, "learning_rate": 0.00018630994798184513, "lm_loss": 0.0089111328125, "loss": 0.0067, "step": 4122, "total_loss": 0.0089111328125 }, { "epoch": 1.69, "learning_rate": 0.00018630346071665894, "lm_loss": 0.01104736328125, "loss": 0.008, "step": 4123, "total_loss": 0.01104736328125 }, { "epoch": 1.69, "learning_rate": 0.00018629697202778397, "lm_loss": 0.01214599609375, "loss": 0.0078, "step": 4124, "total_loss": 0.01214599609375 }, { "epoch": 1.69, "learning_rate": 0.00018629048191532734, "lm_loss": 0.004730224609375, "loss": 0.0058, "step": 4125, "total_loss": 0.004730224609375 }, { "epoch": 1.69, "learning_rate": 0.00018628399037939604, "lm_loss": 0.0074462890625, "loss": 0.007, "step": 4126, "total_loss": 0.0074462890625 }, { "epoch": 1.69, "learning_rate": 0.0001862774974200972, "lm_loss": 0.00506591796875, "loss": 0.0075, "step": 4127, "total_loss": 0.00506591796875 }, { "epoch": 1.69, "learning_rate": 0.00018627100303753792, "lm_loss": 0.0037841796875, "loss": 0.0064, "step": 4128, "total_loss": 0.0037841796875 }, { "epoch": 1.69, "learning_rate": 0.00018626450723182534, "lm_loss": 0.00634765625, "loss": 0.0074, "step": 4129, "total_loss": 0.00634765625 }, { "epoch": 1.69, "learning_rate": 0.0001862580100030666, "lm_loss": 0.0037078857421875, "loss": 0.0073, "step": 4130, "total_loss": 0.0037078857421875 }, { "epoch": 1.69, "learning_rate": 0.0001862515113513689, "lm_loss": 0.0101318359375, "loss": 0.0087, "step": 4131, "total_loss": 0.0101318359375 }, { "epoch": 1.69, "learning_rate": 0.0001862450112768394, "lm_loss": 0.017578125, "loss": 0.0084, "step": 4132, "total_loss": 0.017578125 }, { "epoch": 1.69, "learning_rate": 0.00018623850977958538, "lm_loss": 0.00921630859375, "loss": 0.0053, "step": 4133, "total_loss": 0.00921630859375 }, { "epoch": 1.69, "learning_rate": 0.00018623200685971404, "lm_loss": 0.0150146484375, "loss": 0.0081, "step": 4134, "total_loss": 0.0150146484375 }, { "epoch": 1.69, "learning_rate": 0.00018622550251733272, "lm_loss": 0.0130615234375, "loss": 0.007, "step": 4135, "total_loss": 0.0130615234375 }, { "epoch": 1.69, "learning_rate": 0.00018621899675254867, "lm_loss": 0.00848388671875, "loss": 0.006, "step": 4136, "total_loss": 0.00848388671875 }, { "epoch": 1.69, "learning_rate": 0.00018621248956546924, "lm_loss": 0.0108642578125, "loss": 0.0074, "step": 4137, "total_loss": 0.0108642578125 }, { "epoch": 1.69, "learning_rate": 0.00018620598095620176, "lm_loss": 0.006988525390625, "loss": 0.009, "step": 4138, "total_loss": 0.006988525390625 }, { "epoch": 1.69, "learning_rate": 0.00018619947092485357, "lm_loss": 0.00579833984375, "loss": 0.0078, "step": 4139, "total_loss": 0.00579833984375 }, { "epoch": 1.69, "learning_rate": 0.0001861929594715321, "lm_loss": 0.007568359375, "loss": 0.0064, "step": 4140, "total_loss": 0.007568359375 }, { "epoch": 1.69, "learning_rate": 0.00018618644659634475, "lm_loss": 0.00872802734375, "loss": 0.0078, "step": 4141, "total_loss": 0.00872802734375 }, { "epoch": 1.69, "learning_rate": 0.000186179932299399, "lm_loss": 0.00506591796875, "loss": 0.0077, "step": 4142, "total_loss": 0.00506591796875 }, { "epoch": 1.69, "learning_rate": 0.0001861734165808022, "lm_loss": 0.0162353515625, "loss": 0.0081, "step": 4143, "total_loss": 0.0162353515625 }, { "epoch": 1.69, "learning_rate": 0.00018616689944066196, "lm_loss": 0.006927490234375, "loss": 0.0087, "step": 4144, "total_loss": 0.006927490234375 }, { "epoch": 1.69, "learning_rate": 0.00018616038087908572, "lm_loss": 0.00982666015625, "loss": 0.0074, "step": 4145, "total_loss": 0.00982666015625 }, { "epoch": 1.7, "learning_rate": 0.00018615386089618102, "lm_loss": 0.006317138671875, "loss": 0.0066, "step": 4146, "total_loss": 0.006317138671875 }, { "epoch": 1.7, "learning_rate": 0.00018614733949205543, "lm_loss": 0.0035858154296875, "loss": 0.0074, "step": 4147, "total_loss": 0.0035858154296875 }, { "epoch": 1.7, "learning_rate": 0.0001861408166668165, "lm_loss": 0.006683349609375, "loss": 0.0084, "step": 4148, "total_loss": 0.006683349609375 }, { "epoch": 1.7, "learning_rate": 0.0001861342924205719, "lm_loss": 0.0135498046875, "loss": 0.0077, "step": 4149, "total_loss": 0.0135498046875 }, { "epoch": 1.7, "learning_rate": 0.0001861277667534292, "lm_loss": 0.0146484375, "loss": 0.0077, "step": 4150, "total_loss": 0.0146484375 }, { "epoch": 1.7, "learning_rate": 0.00018612123966549606, "lm_loss": 0.00665283203125, "loss": 0.0077, "step": 4151, "total_loss": 0.00665283203125 }, { "epoch": 1.7, "learning_rate": 0.00018611471115688013, "lm_loss": 0.0067138671875, "loss": 0.0078, "step": 4152, "total_loss": 0.0067138671875 }, { "epoch": 1.7, "learning_rate": 0.00018610818122768915, "lm_loss": 0.0177001953125, "loss": 0.0081, "step": 4153, "total_loss": 0.0177001953125 }, { "epoch": 1.7, "learning_rate": 0.00018610164987803085, "lm_loss": 0.00714111328125, "loss": 0.0071, "step": 4154, "total_loss": 0.00714111328125 }, { "epoch": 1.7, "learning_rate": 0.00018609511710801288, "lm_loss": 0.01470947265625, "loss": 0.0081, "step": 4155, "total_loss": 0.01470947265625 }, { "epoch": 1.7, "learning_rate": 0.00018608858291774313, "lm_loss": 0.01385498046875, "loss": 0.0098, "step": 4156, "total_loss": 0.01385498046875 }, { "epoch": 1.7, "learning_rate": 0.0001860820473073293, "lm_loss": 0.0081787109375, "loss": 0.0072, "step": 4157, "total_loss": 0.0081787109375 }, { "epoch": 1.7, "learning_rate": 0.00018607551027687925, "lm_loss": 0.01446533203125, "loss": 0.0087, "step": 4158, "total_loss": 0.01446533203125 }, { "epoch": 1.7, "learning_rate": 0.0001860689718265008, "lm_loss": 0.003387451171875, "loss": 0.0068, "step": 4159, "total_loss": 0.003387451171875 }, { "epoch": 1.7, "learning_rate": 0.0001860624319563018, "lm_loss": 0.004364013671875, "loss": 0.0066, "step": 4160, "total_loss": 0.004364013671875 }, { "epoch": 1.7, "learning_rate": 0.00018605589066639013, "lm_loss": 0.00384521484375, "loss": 0.0078, "step": 4161, "total_loss": 0.00384521484375 }, { "epoch": 1.7, "learning_rate": 0.00018604934795687372, "lm_loss": 0.00787353515625, "loss": 0.0077, "step": 4162, "total_loss": 0.00787353515625 }, { "epoch": 1.7, "learning_rate": 0.0001860428038278605, "lm_loss": 0.01153564453125, "loss": 0.008, "step": 4163, "total_loss": 0.01153564453125 }, { "epoch": 1.7, "learning_rate": 0.00018603625827945844, "lm_loss": 0.0089111328125, "loss": 0.0072, "step": 4164, "total_loss": 0.0089111328125 }, { "epoch": 1.7, "learning_rate": 0.00018602971131177548, "lm_loss": 0.01092529296875, "loss": 0.0094, "step": 4165, "total_loss": 0.01092529296875 }, { "epoch": 1.7, "learning_rate": 0.00018602316292491963, "lm_loss": 0.0147705078125, "loss": 0.0084, "step": 4166, "total_loss": 0.0147705078125 }, { "epoch": 1.7, "learning_rate": 0.0001860166131189989, "lm_loss": 0.003570556640625, "loss": 0.0072, "step": 4167, "total_loss": 0.003570556640625 }, { "epoch": 1.7, "learning_rate": 0.00018601006189412136, "lm_loss": 0.00787353515625, "loss": 0.0071, "step": 4168, "total_loss": 0.00787353515625 }, { "epoch": 1.7, "learning_rate": 0.0001860035092503951, "lm_loss": 0.01080322265625, "loss": 0.0073, "step": 4169, "total_loss": 0.01080322265625 }, { "epoch": 1.7, "learning_rate": 0.0001859969551879282, "lm_loss": 0.0091552734375, "loss": 0.0083, "step": 4170, "total_loss": 0.0091552734375 }, { "epoch": 1.71, "learning_rate": 0.00018599039970682872, "lm_loss": 0.00830078125, "loss": 0.0078, "step": 4171, "total_loss": 0.00830078125 }, { "epoch": 1.71, "learning_rate": 0.00018598384280720488, "lm_loss": 0.00775146484375, "loss": 0.007, "step": 4172, "total_loss": 0.00775146484375 }, { "epoch": 1.71, "learning_rate": 0.00018597728448916482, "lm_loss": 0.007568359375, "loss": 0.0071, "step": 4173, "total_loss": 0.007568359375 }, { "epoch": 1.71, "learning_rate": 0.00018597072475281672, "lm_loss": 0.006744384765625, "loss": 0.0071, "step": 4174, "total_loss": 0.006744384765625 }, { "epoch": 1.71, "learning_rate": 0.0001859641635982688, "lm_loss": 0.00537109375, "loss": 0.0063, "step": 4175, "total_loss": 0.00537109375 }, { "epoch": 1.71, "learning_rate": 0.0001859576010256293, "lm_loss": 0.00946044921875, "loss": 0.0095, "step": 4176, "total_loss": 0.00946044921875 }, { "epoch": 1.71, "learning_rate": 0.00018595103703500645, "lm_loss": 0.005035400390625, "loss": 0.007, "step": 4177, "total_loss": 0.005035400390625 }, { "epoch": 1.71, "learning_rate": 0.00018594447162650855, "lm_loss": 0.003204345703125, "loss": 0.0095, "step": 4178, "total_loss": 0.003204345703125 }, { "epoch": 1.71, "learning_rate": 0.0001859379048002439, "lm_loss": 0.0054931640625, "loss": 0.0073, "step": 4179, "total_loss": 0.0054931640625 }, { "epoch": 1.71, "learning_rate": 0.00018593133655632086, "lm_loss": 0.00604248046875, "loss": 0.0066, "step": 4180, "total_loss": 0.00604248046875 }, { "epoch": 1.71, "learning_rate": 0.00018592476689484775, "lm_loss": 0.004241943359375, "loss": 0.0072, "step": 4181, "total_loss": 0.004241943359375 }, { "epoch": 1.71, "learning_rate": 0.00018591819581593292, "lm_loss": 0.0054931640625, "loss": 0.0062, "step": 4182, "total_loss": 0.0054931640625 }, { "epoch": 1.71, "learning_rate": 0.00018591162331968482, "lm_loss": 0.006988525390625, "loss": 0.0085, "step": 4183, "total_loss": 0.006988525390625 }, { "epoch": 1.71, "learning_rate": 0.00018590504940621189, "lm_loss": 0.00531005859375, "loss": 0.0067, "step": 4184, "total_loss": 0.00531005859375 }, { "epoch": 1.71, "learning_rate": 0.00018589847407562247, "lm_loss": 0.007232666015625, "loss": 0.0067, "step": 4185, "total_loss": 0.007232666015625 }, { "epoch": 1.71, "learning_rate": 0.00018589189732802513, "lm_loss": 0.00836181640625, "loss": 0.0074, "step": 4186, "total_loss": 0.00836181640625 }, { "epoch": 1.71, "learning_rate": 0.00018588531916352832, "lm_loss": 0.0123291015625, "loss": 0.0081, "step": 4187, "total_loss": 0.0123291015625 }, { "epoch": 1.71, "learning_rate": 0.00018587873958224055, "lm_loss": 0.0130615234375, "loss": 0.0072, "step": 4188, "total_loss": 0.0130615234375 }, { "epoch": 1.71, "learning_rate": 0.00018587215858427037, "lm_loss": 0.00653076171875, "loss": 0.0077, "step": 4189, "total_loss": 0.00653076171875 }, { "epoch": 1.71, "learning_rate": 0.0001858655761697264, "lm_loss": 0.00640869140625, "loss": 0.0075, "step": 4190, "total_loss": 0.00640869140625 }, { "epoch": 1.71, "learning_rate": 0.00018585899233871712, "lm_loss": 0.0167236328125, "loss": 0.0081, "step": 4191, "total_loss": 0.0167236328125 }, { "epoch": 1.71, "learning_rate": 0.00018585240709135116, "lm_loss": 0.01080322265625, "loss": 0.0073, "step": 4192, "total_loss": 0.01080322265625 }, { "epoch": 1.71, "learning_rate": 0.00018584582042773722, "lm_loss": 0.00518798828125, "loss": 0.0092, "step": 4193, "total_loss": 0.00518798828125 }, { "epoch": 1.71, "learning_rate": 0.0001858392323479839, "lm_loss": 0.0030670166015625, "loss": 0.0081, "step": 4194, "total_loss": 0.0030670166015625 }, { "epoch": 1.72, "learning_rate": 0.00018583264285219992, "lm_loss": 0.01416015625, "loss": 0.0074, "step": 4195, "total_loss": 0.01416015625 }, { "epoch": 1.72, "learning_rate": 0.00018582605194049394, "lm_loss": 0.0137939453125, "loss": 0.0076, "step": 4196, "total_loss": 0.0137939453125 }, { "epoch": 1.72, "learning_rate": 0.0001858194596129747, "lm_loss": 0.006927490234375, "loss": 0.0071, "step": 4197, "total_loss": 0.006927490234375 }, { "epoch": 1.72, "learning_rate": 0.00018581286586975094, "lm_loss": 0.00885009765625, "loss": 0.0087, "step": 4198, "total_loss": 0.00885009765625 }, { "epoch": 1.72, "learning_rate": 0.00018580627071093146, "lm_loss": 0.01220703125, "loss": 0.0076, "step": 4199, "total_loss": 0.01220703125 }, { "epoch": 1.72, "learning_rate": 0.00018579967413662508, "lm_loss": 0.007659912109375, "loss": 0.0071, "step": 4200, "total_loss": 0.007659912109375 }, { "epoch": 1.72, "eval_lm_loss": 0.009608560241758823, "eval_loss": 0.009925203397870064, "eval_runtime": 44.1406, "eval_samples_per_second": 22.655, "eval_steps_per_second": 0.204, "eval_total_loss": 0.009608560241758823, "lm_loss": 0.0020294189453125, "step": 4200, "total_loss": 0.0020294189453125 }, { "epoch": 1.72, "learning_rate": 0.0001857930761469405, "lm_loss": 0.0111083984375, "loss": 0.0096, "step": 4201, "total_loss": 0.0111083984375 }, { "epoch": 1.72, "learning_rate": 0.0001857864767419867, "lm_loss": 0.00408935546875, "loss": 0.0063, "step": 4202, "total_loss": 0.00408935546875 }, { "epoch": 1.72, "learning_rate": 0.00018577987592187248, "lm_loss": 0.006439208984375, "loss": 0.0085, "step": 4203, "total_loss": 0.006439208984375 }, { "epoch": 1.72, "learning_rate": 0.00018577327368670673, "lm_loss": 0.00677490234375, "loss": 0.0063, "step": 4204, "total_loss": 0.00677490234375 }, { "epoch": 1.72, "learning_rate": 0.00018576667003659836, "lm_loss": 0.006500244140625, "loss": 0.0088, "step": 4205, "total_loss": 0.006500244140625 }, { "epoch": 1.72, "learning_rate": 0.00018576006497165632, "lm_loss": 0.004730224609375, "loss": 0.0082, "step": 4206, "total_loss": 0.004730224609375 }, { "epoch": 1.72, "learning_rate": 0.00018575345849198958, "lm_loss": 0.006927490234375, "loss": 0.0057, "step": 4207, "total_loss": 0.006927490234375 }, { "epoch": 1.72, "learning_rate": 0.00018574685059770708, "lm_loss": 0.00579833984375, "loss": 0.0066, "step": 4208, "total_loss": 0.00579833984375 }, { "epoch": 1.72, "learning_rate": 0.00018574024128891788, "lm_loss": 0.0047607421875, "loss": 0.006, "step": 4209, "total_loss": 0.0047607421875 }, { "epoch": 1.72, "learning_rate": 0.00018573363056573097, "lm_loss": 0.006683349609375, "loss": 0.0067, "step": 4210, "total_loss": 0.006683349609375 }, { "epoch": 1.72, "learning_rate": 0.00018572701842825543, "lm_loss": 0.0016937255859375, "loss": 0.0068, "step": 4211, "total_loss": 0.0016937255859375 }, { "epoch": 1.72, "learning_rate": 0.00018572040487660027, "lm_loss": 0.0047607421875, "loss": 0.0069, "step": 4212, "total_loss": 0.0047607421875 }, { "epoch": 1.72, "learning_rate": 0.0001857137899108747, "lm_loss": 0.005462646484375, "loss": 0.0048, "step": 4213, "total_loss": 0.005462646484375 }, { "epoch": 1.72, "learning_rate": 0.00018570717353118775, "lm_loss": 0.0159912109375, "loss": 0.0078, "step": 4214, "total_loss": 0.0159912109375 }, { "epoch": 1.72, "learning_rate": 0.0001857005557376486, "lm_loss": 0.0048828125, "loss": 0.0064, "step": 4215, "total_loss": 0.0048828125 }, { "epoch": 1.72, "learning_rate": 0.00018569393653036645, "lm_loss": 0.0133056640625, "loss": 0.0067, "step": 4216, "total_loss": 0.0133056640625 }, { "epoch": 1.72, "learning_rate": 0.0001856873159094504, "lm_loss": 0.0045166015625, "loss": 0.0053, "step": 4217, "total_loss": 0.0045166015625 }, { "epoch": 1.72, "learning_rate": 0.00018568069387500977, "lm_loss": 0.006317138671875, "loss": 0.0075, "step": 4218, "total_loss": 0.006317138671875 }, { "epoch": 1.72, "learning_rate": 0.00018567407042715376, "lm_loss": 0.0093994140625, "loss": 0.0097, "step": 4219, "total_loss": 0.0093994140625 }, { "epoch": 1.73, "learning_rate": 0.0001856674455659916, "lm_loss": 0.00701904296875, "loss": 0.0077, "step": 4220, "total_loss": 0.00701904296875 }, { "epoch": 1.73, "learning_rate": 0.00018566081929163262, "lm_loss": 0.006378173828125, "loss": 0.0091, "step": 4221, "total_loss": 0.006378173828125 }, { "epoch": 1.73, "learning_rate": 0.00018565419160418613, "lm_loss": 0.004486083984375, "loss": 0.0067, "step": 4222, "total_loss": 0.004486083984375 }, { "epoch": 1.73, "learning_rate": 0.0001856475625037614, "lm_loss": 0.006500244140625, "loss": 0.0057, "step": 4223, "total_loss": 0.006500244140625 }, { "epoch": 1.73, "learning_rate": 0.00018564093199046786, "lm_loss": 0.01226806640625, "loss": 0.0061, "step": 4224, "total_loss": 0.01226806640625 }, { "epoch": 1.73, "learning_rate": 0.00018563430006441484, "lm_loss": 0.00518798828125, "loss": 0.009, "step": 4225, "total_loss": 0.00518798828125 }, { "epoch": 1.73, "learning_rate": 0.00018562766672571178, "lm_loss": 0.005889892578125, "loss": 0.0096, "step": 4226, "total_loss": 0.005889892578125 }, { "epoch": 1.73, "learning_rate": 0.00018562103197446807, "lm_loss": 0.00421142578125, "loss": 0.0082, "step": 4227, "total_loss": 0.00421142578125 }, { "epoch": 1.73, "learning_rate": 0.0001856143958107932, "lm_loss": 0.0036468505859375, "loss": 0.0054, "step": 4228, "total_loss": 0.0036468505859375 }, { "epoch": 1.73, "learning_rate": 0.0001856077582347966, "lm_loss": 0.0059814453125, "loss": 0.0084, "step": 4229, "total_loss": 0.0059814453125 }, { "epoch": 1.73, "learning_rate": 0.00018560111924658778, "lm_loss": 0.007781982421875, "loss": 0.0066, "step": 4230, "total_loss": 0.007781982421875 }, { "epoch": 1.73, "learning_rate": 0.00018559447884627627, "lm_loss": 0.00872802734375, "loss": 0.0088, "step": 4231, "total_loss": 0.00872802734375 }, { "epoch": 1.73, "learning_rate": 0.0001855878370339716, "lm_loss": 0.002166748046875, "loss": 0.0048, "step": 4232, "total_loss": 0.002166748046875 }, { "epoch": 1.73, "learning_rate": 0.00018558119380978336, "lm_loss": 0.0033416748046875, "loss": 0.0066, "step": 4233, "total_loss": 0.0033416748046875 }, { "epoch": 1.73, "learning_rate": 0.00018557454917382112, "lm_loss": 0.005645751953125, "loss": 0.0063, "step": 4234, "total_loss": 0.005645751953125 }, { "epoch": 1.73, "learning_rate": 0.00018556790312619444, "lm_loss": 0.007598876953125, "loss": 0.0097, "step": 4235, "total_loss": 0.007598876953125 }, { "epoch": 1.73, "learning_rate": 0.00018556125566701308, "lm_loss": 0.007720947265625, "loss": 0.0071, "step": 4236, "total_loss": 0.007720947265625 }, { "epoch": 1.73, "learning_rate": 0.00018555460679638657, "lm_loss": 0.00738525390625, "loss": 0.0085, "step": 4237, "total_loss": 0.00738525390625 }, { "epoch": 1.73, "learning_rate": 0.00018554795651442468, "lm_loss": 0.00811767578125, "loss": 0.007, "step": 4238, "total_loss": 0.00811767578125 }, { "epoch": 1.73, "learning_rate": 0.00018554130482123705, "lm_loss": 0.004425048828125, "loss": 0.0073, "step": 4239, "total_loss": 0.004425048828125 }, { "epoch": 1.73, "learning_rate": 0.00018553465171693346, "lm_loss": 0.006561279296875, "loss": 0.0077, "step": 4240, "total_loss": 0.006561279296875 }, { "epoch": 1.73, "learning_rate": 0.00018552799720162363, "lm_loss": 0.0123291015625, "loss": 0.0063, "step": 4241, "total_loss": 0.0123291015625 }, { "epoch": 1.73, "learning_rate": 0.00018552134127541736, "lm_loss": 0.0057373046875, "loss": 0.0067, "step": 4242, "total_loss": 0.0057373046875 }, { "epoch": 1.73, "learning_rate": 0.00018551468393842444, "lm_loss": 0.01251220703125, "loss": 0.0083, "step": 4243, "total_loss": 0.01251220703125 }, { "epoch": 1.74, "learning_rate": 0.00018550802519075467, "lm_loss": 0.0103759765625, "loss": 0.0073, "step": 4244, "total_loss": 0.0103759765625 }, { "epoch": 1.74, "learning_rate": 0.0001855013650325179, "lm_loss": 0.0074462890625, "loss": 0.0066, "step": 4245, "total_loss": 0.0074462890625 }, { "epoch": 1.74, "learning_rate": 0.00018549470346382405, "lm_loss": 0.00390625, "loss": 0.0065, "step": 4246, "total_loss": 0.00390625 }, { "epoch": 1.74, "learning_rate": 0.00018548804048478294, "lm_loss": 0.005126953125, "loss": 0.0073, "step": 4247, "total_loss": 0.005126953125 }, { "epoch": 1.74, "learning_rate": 0.00018548137609550452, "lm_loss": 0.0035400390625, "loss": 0.0081, "step": 4248, "total_loss": 0.0035400390625 }, { "epoch": 1.74, "learning_rate": 0.00018547471029609875, "lm_loss": 0.013671875, "loss": 0.0077, "step": 4249, "total_loss": 0.013671875 }, { "epoch": 1.74, "learning_rate": 0.00018546804308667552, "lm_loss": 0.00640869140625, "loss": 0.0074, "step": 4250, "total_loss": 0.00640869140625 }, { "epoch": 1.74, "learning_rate": 0.00018546137446734487, "lm_loss": 0.0054931640625, "loss": 0.0069, "step": 4251, "total_loss": 0.0054931640625 }, { "epoch": 1.74, "learning_rate": 0.00018545470443821681, "lm_loss": 0.00787353515625, "loss": 0.0064, "step": 4252, "total_loss": 0.00787353515625 }, { "epoch": 1.74, "learning_rate": 0.00018544803299940137, "lm_loss": 0.00396728515625, "loss": 0.0081, "step": 4253, "total_loss": 0.00396728515625 }, { "epoch": 1.74, "learning_rate": 0.00018544136015100857, "lm_loss": 0.0042724609375, "loss": 0.009, "step": 4254, "total_loss": 0.0042724609375 }, { "epoch": 1.74, "learning_rate": 0.0001854346858931485, "lm_loss": 0.015625, "loss": 0.0078, "step": 4255, "total_loss": 0.015625 }, { "epoch": 1.74, "learning_rate": 0.00018542801022593128, "lm_loss": 0.006591796875, "loss": 0.0059, "step": 4256, "total_loss": 0.006591796875 }, { "epoch": 1.74, "learning_rate": 0.00018542133314946704, "lm_loss": 0.0164794921875, "loss": 0.0103, "step": 4257, "total_loss": 0.0164794921875 }, { "epoch": 1.74, "learning_rate": 0.0001854146546638659, "lm_loss": 0.00152587890625, "loss": 0.0065, "step": 4258, "total_loss": 0.00152587890625 }, { "epoch": 1.74, "learning_rate": 0.00018540797476923802, "lm_loss": 0.004425048828125, "loss": 0.0076, "step": 4259, "total_loss": 0.004425048828125 }, { "epoch": 1.74, "learning_rate": 0.00018540129346569364, "lm_loss": 0.0034332275390625, "loss": 0.0065, "step": 4260, "total_loss": 0.0034332275390625 }, { "epoch": 1.74, "learning_rate": 0.0001853946107533429, "lm_loss": 0.007354736328125, "loss": 0.0074, "step": 4261, "total_loss": 0.007354736328125 }, { "epoch": 1.74, "learning_rate": 0.00018538792663229617, "lm_loss": 0.006988525390625, "loss": 0.0077, "step": 4262, "total_loss": 0.006988525390625 }, { "epoch": 1.74, "learning_rate": 0.00018538124110266358, "lm_loss": 0.00555419921875, "loss": 0.006, "step": 4263, "total_loss": 0.00555419921875 }, { "epoch": 1.74, "learning_rate": 0.0001853745541645555, "lm_loss": 0.01373291015625, "loss": 0.009, "step": 4264, "total_loss": 0.01373291015625 }, { "epoch": 1.74, "learning_rate": 0.0001853678658180822, "lm_loss": 0.014892578125, "loss": 0.0092, "step": 4265, "total_loss": 0.014892578125 }, { "epoch": 1.74, "learning_rate": 0.00018536117606335402, "lm_loss": 0.0079345703125, "loss": 0.0063, "step": 4266, "total_loss": 0.0079345703125 }, { "epoch": 1.74, "learning_rate": 0.0001853544849004813, "lm_loss": 0.01300048828125, "loss": 0.008, "step": 4267, "total_loss": 0.01300048828125 }, { "epoch": 1.74, "learning_rate": 0.00018534779232957447, "lm_loss": 0.0133056640625, "loss": 0.0071, "step": 4268, "total_loss": 0.0133056640625 }, { "epoch": 1.75, "learning_rate": 0.0001853410983507439, "lm_loss": 0.002044677734375, "loss": 0.0083, "step": 4269, "total_loss": 0.002044677734375 }, { "epoch": 1.75, "learning_rate": 0.00018533440296410002, "lm_loss": 0.01446533203125, "loss": 0.0088, "step": 4270, "total_loss": 0.01446533203125 }, { "epoch": 1.75, "learning_rate": 0.00018532770616975326, "lm_loss": 0.007049560546875, "loss": 0.0077, "step": 4271, "total_loss": 0.007049560546875 }, { "epoch": 1.75, "learning_rate": 0.00018532100796781414, "lm_loss": 0.00421142578125, "loss": 0.0076, "step": 4272, "total_loss": 0.00421142578125 }, { "epoch": 1.75, "learning_rate": 0.00018531430835839308, "lm_loss": 0.012939453125, "loss": 0.0085, "step": 4273, "total_loss": 0.012939453125 }, { "epoch": 1.75, "learning_rate": 0.00018530760734160066, "lm_loss": 0.004425048828125, "loss": 0.0068, "step": 4274, "total_loss": 0.004425048828125 }, { "epoch": 1.75, "learning_rate": 0.00018530090491754742, "lm_loss": 0.005645751953125, "loss": 0.0064, "step": 4275, "total_loss": 0.005645751953125 }, { "epoch": 1.75, "learning_rate": 0.0001852942010863439, "lm_loss": 0.018798828125, "loss": 0.0062, "step": 4276, "total_loss": 0.018798828125 }, { "epoch": 1.75, "learning_rate": 0.0001852874958481007, "lm_loss": 0.0078125, "loss": 0.0079, "step": 4277, "total_loss": 0.0078125 }, { "epoch": 1.75, "learning_rate": 0.00018528078920292847, "lm_loss": 0.0164794921875, "loss": 0.0081, "step": 4278, "total_loss": 0.0164794921875 }, { "epoch": 1.75, "learning_rate": 0.00018527408115093775, "lm_loss": 0.005340576171875, "loss": 0.0082, "step": 4279, "total_loss": 0.005340576171875 }, { "epoch": 1.75, "learning_rate": 0.0001852673716922393, "lm_loss": 0.0078125, "loss": 0.0078, "step": 4280, "total_loss": 0.0078125 }, { "epoch": 1.75, "learning_rate": 0.00018526066082694373, "lm_loss": 0.00543212890625, "loss": 0.0069, "step": 4281, "total_loss": 0.00543212890625 }, { "epoch": 1.75, "learning_rate": 0.0001852539485551618, "lm_loss": 0.006072998046875, "loss": 0.0081, "step": 4282, "total_loss": 0.006072998046875 }, { "epoch": 1.75, "learning_rate": 0.00018524723487700414, "lm_loss": 0.0106201171875, "loss": 0.0086, "step": 4283, "total_loss": 0.0106201171875 }, { "epoch": 1.75, "learning_rate": 0.00018524051979258163, "lm_loss": 0.01171875, "loss": 0.0069, "step": 4284, "total_loss": 0.01171875 }, { "epoch": 1.75, "learning_rate": 0.000185233803302005, "lm_loss": 0.005218505859375, "loss": 0.0077, "step": 4285, "total_loss": 0.005218505859375 }, { "epoch": 1.75, "learning_rate": 0.000185227085405385, "lm_loss": 0.0091552734375, "loss": 0.0072, "step": 4286, "total_loss": 0.0091552734375 }, { "epoch": 1.75, "learning_rate": 0.00018522036610283248, "lm_loss": 0.004150390625, "loss": 0.0067, "step": 4287, "total_loss": 0.004150390625 }, { "epoch": 1.75, "learning_rate": 0.00018521364539445827, "lm_loss": 0.004608154296875, "loss": 0.0066, "step": 4288, "total_loss": 0.004608154296875 }, { "epoch": 1.75, "learning_rate": 0.00018520692328037326, "lm_loss": 0.0047607421875, "loss": 0.007, "step": 4289, "total_loss": 0.0047607421875 }, { "epoch": 1.75, "learning_rate": 0.00018520019976068835, "lm_loss": 0.00482177734375, "loss": 0.0085, "step": 4290, "total_loss": 0.00482177734375 }, { "epoch": 1.75, "learning_rate": 0.00018519347483551441, "lm_loss": 0.0029296875, "loss": 0.0068, "step": 4291, "total_loss": 0.0029296875 }, { "epoch": 1.75, "learning_rate": 0.00018518674850496244, "lm_loss": 0.0081787109375, "loss": 0.0064, "step": 4292, "total_loss": 0.0081787109375 }, { "epoch": 1.76, "learning_rate": 0.00018518002076914334, "lm_loss": 0.005126953125, "loss": 0.0071, "step": 4293, "total_loss": 0.005126953125 }, { "epoch": 1.76, "learning_rate": 0.0001851732916281681, "lm_loss": 0.0030059814453125, "loss": 0.0043, "step": 4294, "total_loss": 0.0030059814453125 }, { "epoch": 1.76, "learning_rate": 0.00018516656108214777, "lm_loss": 0.00360107421875, "loss": 0.0066, "step": 4295, "total_loss": 0.00360107421875 }, { "epoch": 1.76, "learning_rate": 0.00018515982913119334, "lm_loss": 0.00927734375, "loss": 0.0079, "step": 4296, "total_loss": 0.00927734375 }, { "epoch": 1.76, "learning_rate": 0.00018515309577541586, "lm_loss": 0.00445556640625, "loss": 0.0066, "step": 4297, "total_loss": 0.00445556640625 }, { "epoch": 1.76, "learning_rate": 0.00018514636101492641, "lm_loss": 0.0062255859375, "loss": 0.0086, "step": 4298, "total_loss": 0.0062255859375 }, { "epoch": 1.76, "learning_rate": 0.00018513962484983614, "lm_loss": 0.0118408203125, "loss": 0.0083, "step": 4299, "total_loss": 0.0118408203125 }, { "epoch": 1.76, "learning_rate": 0.00018513288728025607, "lm_loss": 0.0108642578125, "loss": 0.0095, "step": 4300, "total_loss": 0.0108642578125 }, { "epoch": 1.76, "eval_lm_loss": 0.009254022501409054, "eval_loss": 0.009628057479858398, "eval_runtime": 44.0235, "eval_samples_per_second": 22.715, "eval_steps_per_second": 0.204, "eval_total_loss": 0.009254022501409054, "lm_loss": 0.000827789306640625, "step": 4300, "total_loss": 0.000827789306640625 }, { "epoch": 1.76, "learning_rate": 0.00018512614830629745, "lm_loss": 0.007415771484375, "loss": 0.0065, "step": 4301, "total_loss": 0.007415771484375 }, { "epoch": 1.76, "learning_rate": 0.00018511940792807137, "lm_loss": 0.00836181640625, "loss": 0.0067, "step": 4302, "total_loss": 0.00836181640625 }, { "epoch": 1.76, "learning_rate": 0.00018511266614568904, "lm_loss": 0.007110595703125, "loss": 0.0078, "step": 4303, "total_loss": 0.007110595703125 }, { "epoch": 1.76, "learning_rate": 0.00018510592295926172, "lm_loss": 0.0042724609375, "loss": 0.0071, "step": 4304, "total_loss": 0.0042724609375 }, { "epoch": 1.76, "learning_rate": 0.00018509917836890063, "lm_loss": 0.00787353515625, "loss": 0.0069, "step": 4305, "total_loss": 0.00787353515625 }, { "epoch": 1.76, "learning_rate": 0.00018509243237471696, "lm_loss": 0.00408935546875, "loss": 0.0069, "step": 4306, "total_loss": 0.00408935546875 }, { "epoch": 1.76, "learning_rate": 0.0001850856849768221, "lm_loss": 0.008544921875, "loss": 0.0071, "step": 4307, "total_loss": 0.008544921875 }, { "epoch": 1.76, "learning_rate": 0.0001850789361753273, "lm_loss": 0.005401611328125, "loss": 0.0072, "step": 4308, "total_loss": 0.005401611328125 }, { "epoch": 1.76, "learning_rate": 0.00018507218597034388, "lm_loss": 0.0078125, "loss": 0.0054, "step": 4309, "total_loss": 0.0078125 }, { "epoch": 1.76, "learning_rate": 0.0001850654343619832, "lm_loss": 0.005767822265625, "loss": 0.0087, "step": 4310, "total_loss": 0.005767822265625 }, { "epoch": 1.76, "learning_rate": 0.00018505868135035666, "lm_loss": 0.01348876953125, "loss": 0.0066, "step": 4311, "total_loss": 0.01348876953125 }, { "epoch": 1.76, "learning_rate": 0.00018505192693557567, "lm_loss": 0.00640869140625, "loss": 0.0073, "step": 4312, "total_loss": 0.00640869140625 }, { "epoch": 1.76, "learning_rate": 0.00018504517111775162, "lm_loss": 0.004730224609375, "loss": 0.0068, "step": 4313, "total_loss": 0.004730224609375 }, { "epoch": 1.76, "learning_rate": 0.0001850384138969959, "lm_loss": 0.0038299560546875, "loss": 0.0065, "step": 4314, "total_loss": 0.0038299560546875 }, { "epoch": 1.76, "learning_rate": 0.00018503165527342016, "lm_loss": 0.01263427734375, "loss": 0.0084, "step": 4315, "total_loss": 0.01263427734375 }, { "epoch": 1.76, "learning_rate": 0.0001850248952471357, "lm_loss": 0.0036163330078125, "loss": 0.0084, "step": 4316, "total_loss": 0.0036163330078125 }, { "epoch": 1.76, "learning_rate": 0.00018501813381825412, "lm_loss": 0.00531005859375, "loss": 0.0077, "step": 4317, "total_loss": 0.00531005859375 }, { "epoch": 1.77, "learning_rate": 0.000185011370986887, "lm_loss": 0.005462646484375, "loss": 0.0074, "step": 4318, "total_loss": 0.005462646484375 }, { "epoch": 1.77, "learning_rate": 0.00018500460675314578, "lm_loss": 0.0091552734375, "loss": 0.0079, "step": 4319, "total_loss": 0.0091552734375 }, { "epoch": 1.77, "learning_rate": 0.00018499784111714217, "lm_loss": 0.007537841796875, "loss": 0.0074, "step": 4320, "total_loss": 0.007537841796875 }, { "epoch": 1.77, "learning_rate": 0.0001849910740789877, "lm_loss": 0.0047607421875, "loss": 0.0088, "step": 4321, "total_loss": 0.0047607421875 }, { "epoch": 1.77, "learning_rate": 0.00018498430563879405, "lm_loss": 0.011962890625, "loss": 0.0078, "step": 4322, "total_loss": 0.011962890625 }, { "epoch": 1.77, "learning_rate": 0.00018497753579667286, "lm_loss": 0.00213623046875, "loss": 0.0072, "step": 4323, "total_loss": 0.00213623046875 }, { "epoch": 1.77, "learning_rate": 0.00018497076455273578, "lm_loss": 0.0093994140625, "loss": 0.007, "step": 4324, "total_loss": 0.0093994140625 }, { "epoch": 1.77, "learning_rate": 0.00018496399190709453, "lm_loss": 0.007659912109375, "loss": 0.0072, "step": 4325, "total_loss": 0.007659912109375 }, { "epoch": 1.77, "learning_rate": 0.00018495721785986082, "lm_loss": 0.0035400390625, "loss": 0.0064, "step": 4326, "total_loss": 0.0035400390625 }, { "epoch": 1.77, "learning_rate": 0.00018495044241114642, "lm_loss": 0.005035400390625, "loss": 0.0069, "step": 4327, "total_loss": 0.005035400390625 }, { "epoch": 1.77, "learning_rate": 0.0001849436655610631, "lm_loss": 0.0137939453125, "loss": 0.0083, "step": 4328, "total_loss": 0.0137939453125 }, { "epoch": 1.77, "learning_rate": 0.00018493688730972262, "lm_loss": 0.004119873046875, "loss": 0.0067, "step": 4329, "total_loss": 0.004119873046875 }, { "epoch": 1.77, "learning_rate": 0.00018493010765723686, "lm_loss": 0.005645751953125, "loss": 0.0087, "step": 4330, "total_loss": 0.005645751953125 }, { "epoch": 1.77, "learning_rate": 0.00018492332660371757, "lm_loss": 0.0062255859375, "loss": 0.007, "step": 4331, "total_loss": 0.0062255859375 }, { "epoch": 1.77, "learning_rate": 0.0001849165441492767, "lm_loss": 0.008544921875, "loss": 0.0068, "step": 4332, "total_loss": 0.008544921875 }, { "epoch": 1.77, "learning_rate": 0.0001849097602940261, "lm_loss": 0.005340576171875, "loss": 0.0068, "step": 4333, "total_loss": 0.005340576171875 }, { "epoch": 1.77, "learning_rate": 0.00018490297503807765, "lm_loss": 0.007293701171875, "loss": 0.0053, "step": 4334, "total_loss": 0.007293701171875 }, { "epoch": 1.77, "learning_rate": 0.00018489618838154332, "lm_loss": 0.0108642578125, "loss": 0.0069, "step": 4335, "total_loss": 0.0108642578125 }, { "epoch": 1.77, "learning_rate": 0.00018488940032453506, "lm_loss": 0.010986328125, "loss": 0.0089, "step": 4336, "total_loss": 0.010986328125 }, { "epoch": 1.77, "learning_rate": 0.0001848826108671648, "lm_loss": 0.003387451171875, "loss": 0.0068, "step": 4337, "total_loss": 0.003387451171875 }, { "epoch": 1.77, "learning_rate": 0.00018487582000954462, "lm_loss": 0.01300048828125, "loss": 0.0083, "step": 4338, "total_loss": 0.01300048828125 }, { "epoch": 1.77, "learning_rate": 0.0001848690277517865, "lm_loss": 0.007171630859375, "loss": 0.0078, "step": 4339, "total_loss": 0.007171630859375 }, { "epoch": 1.77, "learning_rate": 0.0001848622340940025, "lm_loss": 0.005706787109375, "loss": 0.0064, "step": 4340, "total_loss": 0.005706787109375 }, { "epoch": 1.77, "learning_rate": 0.00018485543903630464, "lm_loss": 0.01165771484375, "loss": 0.0085, "step": 4341, "total_loss": 0.01165771484375 }, { "epoch": 1.78, "learning_rate": 0.0001848486425788051, "lm_loss": 0.00872802734375, "loss": 0.0089, "step": 4342, "total_loss": 0.00872802734375 }, { "epoch": 1.78, "learning_rate": 0.00018484184472161594, "lm_loss": 0.0103759765625, "loss": 0.0075, "step": 4343, "total_loss": 0.0103759765625 }, { "epoch": 1.78, "learning_rate": 0.0001848350454648493, "lm_loss": 0.0021209716796875, "loss": 0.0054, "step": 4344, "total_loss": 0.0021209716796875 }, { "epoch": 1.78, "learning_rate": 0.0001848282448086174, "lm_loss": 0.00628662109375, "loss": 0.0074, "step": 4345, "total_loss": 0.00628662109375 }, { "epoch": 1.78, "learning_rate": 0.00018482144275303231, "lm_loss": 0.0087890625, "loss": 0.0056, "step": 4346, "total_loss": 0.0087890625 }, { "epoch": 1.78, "learning_rate": 0.00018481463929820633, "lm_loss": 0.006011962890625, "loss": 0.0073, "step": 4347, "total_loss": 0.006011962890625 }, { "epoch": 1.78, "learning_rate": 0.0001848078344442517, "lm_loss": 0.005615234375, "loss": 0.0064, "step": 4348, "total_loss": 0.005615234375 }, { "epoch": 1.78, "learning_rate": 0.0001848010281912806, "lm_loss": 0.00177001953125, "loss": 0.006, "step": 4349, "total_loss": 0.00177001953125 }, { "epoch": 1.78, "learning_rate": 0.0001847942205394054, "lm_loss": 0.004425048828125, "loss": 0.0074, "step": 4350, "total_loss": 0.004425048828125 }, { "epoch": 1.78, "learning_rate": 0.00018478741148873832, "lm_loss": 0.00836181640625, "loss": 0.0081, "step": 4351, "total_loss": 0.00836181640625 }, { "epoch": 1.78, "learning_rate": 0.0001847806010393917, "lm_loss": 0.00457763671875, "loss": 0.0074, "step": 4352, "total_loss": 0.00457763671875 }, { "epoch": 1.78, "learning_rate": 0.00018477378919147798, "lm_loss": 0.009033203125, "loss": 0.0085, "step": 4353, "total_loss": 0.009033203125 }, { "epoch": 1.78, "learning_rate": 0.00018476697594510942, "lm_loss": 0.007415771484375, "loss": 0.0071, "step": 4354, "total_loss": 0.007415771484375 }, { "epoch": 1.78, "learning_rate": 0.00018476016130039843, "lm_loss": 0.004180908203125, "loss": 0.0051, "step": 4355, "total_loss": 0.004180908203125 }, { "epoch": 1.78, "learning_rate": 0.00018475334525745745, "lm_loss": 0.013671875, "loss": 0.0087, "step": 4356, "total_loss": 0.013671875 }, { "epoch": 1.78, "learning_rate": 0.00018474652781639894, "lm_loss": 0.0096435546875, "loss": 0.0073, "step": 4357, "total_loss": 0.0096435546875 }, { "epoch": 1.78, "learning_rate": 0.00018473970897733532, "lm_loss": 0.01214599609375, "loss": 0.0085, "step": 4358, "total_loss": 0.01214599609375 }, { "epoch": 1.78, "learning_rate": 0.00018473288874037912, "lm_loss": 0.0130615234375, "loss": 0.0073, "step": 4359, "total_loss": 0.0130615234375 }, { "epoch": 1.78, "learning_rate": 0.0001847260671056428, "lm_loss": 0.004486083984375, "loss": 0.0084, "step": 4360, "total_loss": 0.004486083984375 }, { "epoch": 1.78, "learning_rate": 0.00018471924407323893, "lm_loss": 0.005584716796875, "loss": 0.0094, "step": 4361, "total_loss": 0.005584716796875 }, { "epoch": 1.78, "learning_rate": 0.00018471241964328002, "lm_loss": 0.007354736328125, "loss": 0.0072, "step": 4362, "total_loss": 0.007354736328125 }, { "epoch": 1.78, "learning_rate": 0.00018470559381587873, "lm_loss": 0.01318359375, "loss": 0.0084, "step": 4363, "total_loss": 0.01318359375 }, { "epoch": 1.78, "learning_rate": 0.0001846987665911476, "lm_loss": 0.007659912109375, "loss": 0.007, "step": 4364, "total_loss": 0.007659912109375 }, { "epoch": 1.78, "learning_rate": 0.0001846919379691992, "lm_loss": 0.00933837890625, "loss": 0.0077, "step": 4365, "total_loss": 0.00933837890625 }, { "epoch": 1.78, "learning_rate": 0.0001846851079501463, "lm_loss": 0.0042724609375, "loss": 0.007, "step": 4366, "total_loss": 0.0042724609375 }, { "epoch": 1.79, "learning_rate": 0.00018467827653410147, "lm_loss": 0.002838134765625, "loss": 0.0073, "step": 4367, "total_loss": 0.002838134765625 }, { "epoch": 1.79, "learning_rate": 0.00018467144372117747, "lm_loss": 0.00885009765625, "loss": 0.0072, "step": 4368, "total_loss": 0.00885009765625 }, { "epoch": 1.79, "learning_rate": 0.000184664609511487, "lm_loss": 0.00848388671875, "loss": 0.0066, "step": 4369, "total_loss": 0.00848388671875 }, { "epoch": 1.79, "learning_rate": 0.00018465777390514276, "lm_loss": 0.00390625, "loss": 0.0081, "step": 4370, "total_loss": 0.00390625 }, { "epoch": 1.79, "learning_rate": 0.00018465093690225756, "lm_loss": 0.00457763671875, "loss": 0.0089, "step": 4371, "total_loss": 0.00457763671875 }, { "epoch": 1.79, "learning_rate": 0.00018464409850294414, "lm_loss": 0.0081787109375, "loss": 0.007, "step": 4372, "total_loss": 0.0081787109375 }, { "epoch": 1.79, "learning_rate": 0.0001846372587073154, "lm_loss": 0.00701904296875, "loss": 0.0065, "step": 4373, "total_loss": 0.00701904296875 }, { "epoch": 1.79, "learning_rate": 0.00018463041751548404, "lm_loss": 0.00439453125, "loss": 0.0073, "step": 4374, "total_loss": 0.00439453125 }, { "epoch": 1.79, "learning_rate": 0.00018462357492756302, "lm_loss": 0.005584716796875, "loss": 0.0064, "step": 4375, "total_loss": 0.005584716796875 }, { "epoch": 1.79, "learning_rate": 0.00018461673094366513, "lm_loss": 0.0174560546875, "loss": 0.0075, "step": 4376, "total_loss": 0.0174560546875 }, { "epoch": 1.79, "learning_rate": 0.00018460988556390334, "lm_loss": 0.0191650390625, "loss": 0.0097, "step": 4377, "total_loss": 0.0191650390625 }, { "epoch": 1.79, "learning_rate": 0.00018460303878839058, "lm_loss": 0.003692626953125, "loss": 0.0086, "step": 4378, "total_loss": 0.003692626953125 }, { "epoch": 1.79, "learning_rate": 0.00018459619061723977, "lm_loss": 0.0038604736328125, "loss": 0.0069, "step": 4379, "total_loss": 0.0038604736328125 }, { "epoch": 1.79, "learning_rate": 0.00018458934105056383, "lm_loss": 0.00689697265625, "loss": 0.007, "step": 4380, "total_loss": 0.00689697265625 }, { "epoch": 1.79, "learning_rate": 0.00018458249008847583, "lm_loss": 0.01165771484375, "loss": 0.0089, "step": 4381, "total_loss": 0.01165771484375 }, { "epoch": 1.79, "learning_rate": 0.00018457563773108878, "lm_loss": 0.00421142578125, "loss": 0.0068, "step": 4382, "total_loss": 0.00421142578125 }, { "epoch": 1.79, "learning_rate": 0.00018456878397851565, "lm_loss": 0.01092529296875, "loss": 0.0073, "step": 4383, "total_loss": 0.01092529296875 }, { "epoch": 1.79, "learning_rate": 0.00018456192883086958, "lm_loss": 0.01263427734375, "loss": 0.0098, "step": 4384, "total_loss": 0.01263427734375 }, { "epoch": 1.79, "learning_rate": 0.00018455507228826358, "lm_loss": 0.00299072265625, "loss": 0.0058, "step": 4385, "total_loss": 0.00299072265625 }, { "epoch": 1.79, "learning_rate": 0.00018454821435081082, "lm_loss": 0.00439453125, "loss": 0.0079, "step": 4386, "total_loss": 0.00439453125 }, { "epoch": 1.79, "learning_rate": 0.00018454135501862445, "lm_loss": 0.01708984375, "loss": 0.0079, "step": 4387, "total_loss": 0.01708984375 }, { "epoch": 1.79, "learning_rate": 0.00018453449429181752, "lm_loss": 0.00628662109375, "loss": 0.0074, "step": 4388, "total_loss": 0.00628662109375 }, { "epoch": 1.79, "learning_rate": 0.0001845276321705033, "lm_loss": 0.013916015625, "loss": 0.0082, "step": 4389, "total_loss": 0.013916015625 }, { "epoch": 1.79, "learning_rate": 0.00018452076865479495, "lm_loss": 0.00390625, "loss": 0.0068, "step": 4390, "total_loss": 0.00390625 }, { "epoch": 1.8, "learning_rate": 0.00018451390374480572, "lm_loss": 0.0052490234375, "loss": 0.0075, "step": 4391, "total_loss": 0.0052490234375 }, { "epoch": 1.8, "learning_rate": 0.00018450703744064882, "lm_loss": 0.004791259765625, "loss": 0.0071, "step": 4392, "total_loss": 0.004791259765625 }, { "epoch": 1.8, "learning_rate": 0.00018450016974243757, "lm_loss": 0.010498046875, "loss": 0.0069, "step": 4393, "total_loss": 0.010498046875 }, { "epoch": 1.8, "learning_rate": 0.0001844933006502852, "lm_loss": 0.005279541015625, "loss": 0.0079, "step": 4394, "total_loss": 0.005279541015625 }, { "epoch": 1.8, "learning_rate": 0.00018448643016430504, "lm_loss": 0.005340576171875, "loss": 0.0069, "step": 4395, "total_loss": 0.005340576171875 }, { "epoch": 1.8, "learning_rate": 0.00018447955828461047, "lm_loss": 0.0068359375, "loss": 0.0066, "step": 4396, "total_loss": 0.0068359375 }, { "epoch": 1.8, "learning_rate": 0.0001844726850113148, "lm_loss": 0.0194091796875, "loss": 0.0076, "step": 4397, "total_loss": 0.0194091796875 }, { "epoch": 1.8, "learning_rate": 0.00018446581034453144, "lm_loss": 0.004913330078125, "loss": 0.0084, "step": 4398, "total_loss": 0.004913330078125 }, { "epoch": 1.8, "learning_rate": 0.0001844589342843738, "lm_loss": 0.00640869140625, "loss": 0.0073, "step": 4399, "total_loss": 0.00640869140625 }, { "epoch": 1.8, "learning_rate": 0.0001844520568309553, "lm_loss": 0.008544921875, "loss": 0.008, "step": 4400, "total_loss": 0.008544921875 }, { "epoch": 1.8, "eval_lm_loss": 0.00976422056555748, "eval_loss": 0.010152596980333328, "eval_runtime": 43.9624, "eval_samples_per_second": 22.747, "eval_steps_per_second": 0.205, "eval_total_loss": 0.00976422056555748, "lm_loss": 0.000942230224609375, "step": 4400, "total_loss": 0.000942230224609375 }, { "epoch": 1.8, "learning_rate": 0.0001844451779843894, "lm_loss": 0.006683349609375, "loss": 0.0067, "step": 4401, "total_loss": 0.006683349609375 }, { "epoch": 1.8, "learning_rate": 0.00018443829774478956, "lm_loss": 0.008056640625, "loss": 0.006, "step": 4402, "total_loss": 0.008056640625 }, { "epoch": 1.8, "learning_rate": 0.00018443141611226927, "lm_loss": 0.01416015625, "loss": 0.0076, "step": 4403, "total_loss": 0.01416015625 }, { "epoch": 1.8, "learning_rate": 0.0001844245330869421, "lm_loss": 0.00628662109375, "loss": 0.0065, "step": 4404, "total_loss": 0.00628662109375 }, { "epoch": 1.8, "learning_rate": 0.00018441764866892156, "lm_loss": 0.00701904296875, "loss": 0.0094, "step": 4405, "total_loss": 0.00701904296875 }, { "epoch": 1.8, "learning_rate": 0.0001844107628583212, "lm_loss": 0.005096435546875, "loss": 0.0062, "step": 4406, "total_loss": 0.005096435546875 }, { "epoch": 1.8, "learning_rate": 0.00018440387565525464, "lm_loss": 0.005645751953125, "loss": 0.0069, "step": 4407, "total_loss": 0.005645751953125 }, { "epoch": 1.8, "learning_rate": 0.0001843969870598355, "lm_loss": 0.00823974609375, "loss": 0.0075, "step": 4408, "total_loss": 0.00823974609375 }, { "epoch": 1.8, "learning_rate": 0.00018439009707217738, "lm_loss": 0.004241943359375, "loss": 0.0081, "step": 4409, "total_loss": 0.004241943359375 }, { "epoch": 1.8, "learning_rate": 0.00018438320569239398, "lm_loss": 0.01177978515625, "loss": 0.0089, "step": 4410, "total_loss": 0.01177978515625 }, { "epoch": 1.8, "learning_rate": 0.00018437631292059895, "lm_loss": 0.01055908203125, "loss": 0.008, "step": 4411, "total_loss": 0.01055908203125 }, { "epoch": 1.8, "learning_rate": 0.00018436941875690603, "lm_loss": 0.00093841552734375, "loss": 0.0092, "step": 4412, "total_loss": 0.00093841552734375 }, { "epoch": 1.8, "learning_rate": 0.0001843625232014289, "lm_loss": 0.0074462890625, "loss": 0.0093, "step": 4413, "total_loss": 0.0074462890625 }, { "epoch": 1.8, "learning_rate": 0.00018435562625428138, "lm_loss": 0.004913330078125, "loss": 0.0059, "step": 4414, "total_loss": 0.004913330078125 }, { "epoch": 1.8, "learning_rate": 0.00018434872791557716, "lm_loss": 0.00872802734375, "loss": 0.0083, "step": 4415, "total_loss": 0.00872802734375 }, { "epoch": 1.81, "learning_rate": 0.00018434182818543012, "lm_loss": 0.00775146484375, "loss": 0.0069, "step": 4416, "total_loss": 0.00775146484375 }, { "epoch": 1.81, "learning_rate": 0.00018433492706395398, "lm_loss": 0.00872802734375, "loss": 0.0068, "step": 4417, "total_loss": 0.00872802734375 }, { "epoch": 1.81, "learning_rate": 0.00018432802455126268, "lm_loss": 0.009765625, "loss": 0.0076, "step": 4418, "total_loss": 0.009765625 }, { "epoch": 1.81, "learning_rate": 0.00018432112064747005, "lm_loss": 0.005340576171875, "loss": 0.0091, "step": 4419, "total_loss": 0.005340576171875 }, { "epoch": 1.81, "learning_rate": 0.00018431421535268998, "lm_loss": 0.01348876953125, "loss": 0.0062, "step": 4420, "total_loss": 0.01348876953125 }, { "epoch": 1.81, "learning_rate": 0.00018430730866703638, "lm_loss": 0.0101318359375, "loss": 0.0095, "step": 4421, "total_loss": 0.0101318359375 }, { "epoch": 1.81, "learning_rate": 0.00018430040059062318, "lm_loss": 0.01214599609375, "loss": 0.0076, "step": 4422, "total_loss": 0.01214599609375 }, { "epoch": 1.81, "learning_rate": 0.00018429349112356432, "lm_loss": 0.001953125, "loss": 0.0069, "step": 4423, "total_loss": 0.001953125 }, { "epoch": 1.81, "learning_rate": 0.00018428658026597387, "lm_loss": 0.0076904296875, "loss": 0.0085, "step": 4424, "total_loss": 0.0076904296875 }, { "epoch": 1.81, "learning_rate": 0.0001842796680179657, "lm_loss": 0.007415771484375, "loss": 0.0092, "step": 4425, "total_loss": 0.007415771484375 }, { "epoch": 1.81, "learning_rate": 0.0001842727543796539, "lm_loss": 0.004180908203125, "loss": 0.0076, "step": 4426, "total_loss": 0.004180908203125 }, { "epoch": 1.81, "learning_rate": 0.00018426583935115255, "lm_loss": 0.0089111328125, "loss": 0.0059, "step": 4427, "total_loss": 0.0089111328125 }, { "epoch": 1.81, "learning_rate": 0.00018425892293257568, "lm_loss": 0.003204345703125, "loss": 0.0061, "step": 4428, "total_loss": 0.003204345703125 }, { "epoch": 1.81, "learning_rate": 0.00018425200512403744, "lm_loss": 0.011474609375, "loss": 0.0083, "step": 4429, "total_loss": 0.011474609375 }, { "epoch": 1.81, "learning_rate": 0.00018424508592565188, "lm_loss": 0.0089111328125, "loss": 0.0069, "step": 4430, "total_loss": 0.0089111328125 }, { "epoch": 1.81, "learning_rate": 0.00018423816533753317, "lm_loss": 0.004486083984375, "loss": 0.0076, "step": 4431, "total_loss": 0.004486083984375 }, { "epoch": 1.81, "learning_rate": 0.00018423124335979548, "lm_loss": 0.00775146484375, "loss": 0.008, "step": 4432, "total_loss": 0.00775146484375 }, { "epoch": 1.81, "learning_rate": 0.000184224319992553, "lm_loss": 0.009033203125, "loss": 0.007, "step": 4433, "total_loss": 0.009033203125 }, { "epoch": 1.81, "learning_rate": 0.00018421739523591992, "lm_loss": 0.002044677734375, "loss": 0.0077, "step": 4434, "total_loss": 0.002044677734375 }, { "epoch": 1.81, "learning_rate": 0.00018421046909001046, "lm_loss": 0.0021514892578125, "loss": 0.0073, "step": 4435, "total_loss": 0.0021514892578125 }, { "epoch": 1.81, "learning_rate": 0.00018420354155493894, "lm_loss": 0.00445556640625, "loss": 0.0063, "step": 4436, "total_loss": 0.00445556640625 }, { "epoch": 1.81, "learning_rate": 0.0001841966126308196, "lm_loss": 0.0101318359375, "loss": 0.0086, "step": 4437, "total_loss": 0.0101318359375 }, { "epoch": 1.81, "learning_rate": 0.0001841896823177667, "lm_loss": 0.006927490234375, "loss": 0.0074, "step": 4438, "total_loss": 0.006927490234375 }, { "epoch": 1.81, "learning_rate": 0.00018418275061589466, "lm_loss": 0.0096435546875, "loss": 0.0066, "step": 4439, "total_loss": 0.0096435546875 }, { "epoch": 1.82, "learning_rate": 0.00018417581752531775, "lm_loss": 0.00457763671875, "loss": 0.0071, "step": 4440, "total_loss": 0.00457763671875 }, { "epoch": 1.82, "learning_rate": 0.00018416888304615035, "lm_loss": 0.00579833984375, "loss": 0.0084, "step": 4441, "total_loss": 0.00579833984375 }, { "epoch": 1.82, "learning_rate": 0.00018416194717850687, "lm_loss": 0.007720947265625, "loss": 0.0088, "step": 4442, "total_loss": 0.007720947265625 }, { "epoch": 1.82, "learning_rate": 0.00018415500992250173, "lm_loss": 0.0185546875, "loss": 0.0076, "step": 4443, "total_loss": 0.0185546875 }, { "epoch": 1.82, "learning_rate": 0.00018414807127824936, "lm_loss": 0.003936767578125, "loss": 0.0066, "step": 4444, "total_loss": 0.003936767578125 }, { "epoch": 1.82, "learning_rate": 0.00018414113124586422, "lm_loss": 0.00592041015625, "loss": 0.0068, "step": 4445, "total_loss": 0.00592041015625 }, { "epoch": 1.82, "learning_rate": 0.0001841341898254608, "lm_loss": 0.014404296875, "loss": 0.0073, "step": 4446, "total_loss": 0.014404296875 }, { "epoch": 1.82, "learning_rate": 0.0001841272470171536, "lm_loss": 0.00408935546875, "loss": 0.0048, "step": 4447, "total_loss": 0.00408935546875 }, { "epoch": 1.82, "learning_rate": 0.00018412030282105718, "lm_loss": 0.006439208984375, "loss": 0.0059, "step": 4448, "total_loss": 0.006439208984375 }, { "epoch": 1.82, "learning_rate": 0.00018411335723728608, "lm_loss": 0.003692626953125, "loss": 0.0065, "step": 4449, "total_loss": 0.003692626953125 }, { "epoch": 1.82, "learning_rate": 0.00018410641026595482, "lm_loss": 0.0074462890625, "loss": 0.0084, "step": 4450, "total_loss": 0.0074462890625 }, { "epoch": 1.82, "learning_rate": 0.00018409946190717807, "lm_loss": 0.00616455078125, "loss": 0.007, "step": 4451, "total_loss": 0.00616455078125 }, { "epoch": 1.82, "learning_rate": 0.00018409251216107046, "lm_loss": 0.00787353515625, "loss": 0.0066, "step": 4452, "total_loss": 0.00787353515625 }, { "epoch": 1.82, "learning_rate": 0.00018408556102774657, "lm_loss": 0.00518798828125, "loss": 0.0066, "step": 4453, "total_loss": 0.00518798828125 }, { "epoch": 1.82, "learning_rate": 0.00018407860850732112, "lm_loss": 0.006317138671875, "loss": 0.009, "step": 4454, "total_loss": 0.006317138671875 }, { "epoch": 1.82, "learning_rate": 0.00018407165459990876, "lm_loss": 0.00799560546875, "loss": 0.007, "step": 4455, "total_loss": 0.00799560546875 }, { "epoch": 1.82, "learning_rate": 0.00018406469930562426, "lm_loss": 0.006805419921875, "loss": 0.008, "step": 4456, "total_loss": 0.006805419921875 }, { "epoch": 1.82, "learning_rate": 0.00018405774262458227, "lm_loss": 0.00439453125, "loss": 0.0063, "step": 4457, "total_loss": 0.00439453125 }, { "epoch": 1.82, "learning_rate": 0.0001840507845568977, "lm_loss": 0.00628662109375, "loss": 0.007, "step": 4458, "total_loss": 0.00628662109375 }, { "epoch": 1.82, "learning_rate": 0.00018404382510268515, "lm_loss": 0.01116943359375, "loss": 0.0076, "step": 4459, "total_loss": 0.01116943359375 }, { "epoch": 1.82, "learning_rate": 0.00018403686426205954, "lm_loss": 0.00543212890625, "loss": 0.0078, "step": 4460, "total_loss": 0.00543212890625 }, { "epoch": 1.82, "learning_rate": 0.00018402990203513566, "lm_loss": 0.004913330078125, "loss": 0.0067, "step": 4461, "total_loss": 0.004913330078125 }, { "epoch": 1.82, "learning_rate": 0.00018402293842202837, "lm_loss": 0.002899169921875, "loss": 0.0058, "step": 4462, "total_loss": 0.002899169921875 }, { "epoch": 1.82, "learning_rate": 0.00018401597342285254, "lm_loss": 0.003936767578125, "loss": 0.0071, "step": 4463, "total_loss": 0.003936767578125 }, { "epoch": 1.83, "learning_rate": 0.0001840090070377231, "lm_loss": 0.01263427734375, "loss": 0.0068, "step": 4464, "total_loss": 0.01263427734375 }, { "epoch": 1.83, "learning_rate": 0.00018400203926675492, "lm_loss": 0.007232666015625, "loss": 0.008, "step": 4465, "total_loss": 0.007232666015625 }, { "epoch": 1.83, "learning_rate": 0.00018399507011006297, "lm_loss": 0.01483154296875, "loss": 0.0079, "step": 4466, "total_loss": 0.01483154296875 }, { "epoch": 1.83, "learning_rate": 0.0001839880995677622, "lm_loss": 0.00555419921875, "loss": 0.0069, "step": 4467, "total_loss": 0.00555419921875 }, { "epoch": 1.83, "learning_rate": 0.00018398112763996763, "lm_loss": 0.0067138671875, "loss": 0.0067, "step": 4468, "total_loss": 0.0067138671875 }, { "epoch": 1.83, "learning_rate": 0.00018397415432679423, "lm_loss": 0.006805419921875, "loss": 0.0077, "step": 4469, "total_loss": 0.006805419921875 }, { "epoch": 1.83, "learning_rate": 0.00018396717962835705, "lm_loss": 0.006134033203125, "loss": 0.0084, "step": 4470, "total_loss": 0.006134033203125 }, { "epoch": 1.83, "learning_rate": 0.00018396020354477117, "lm_loss": 0.004119873046875, "loss": 0.0073, "step": 4471, "total_loss": 0.004119873046875 }, { "epoch": 1.83, "learning_rate": 0.0001839532260761516, "lm_loss": 0.0067138671875, "loss": 0.0059, "step": 4472, "total_loss": 0.0067138671875 }, { "epoch": 1.83, "learning_rate": 0.00018394624722261355, "lm_loss": 0.00677490234375, "loss": 0.0057, "step": 4473, "total_loss": 0.00677490234375 }, { "epoch": 1.83, "learning_rate": 0.00018393926698427205, "lm_loss": 0.00665283203125, "loss": 0.0081, "step": 4474, "total_loss": 0.00665283203125 }, { "epoch": 1.83, "learning_rate": 0.00018393228536124233, "lm_loss": 0.006591796875, "loss": 0.0061, "step": 4475, "total_loss": 0.006591796875 }, { "epoch": 1.83, "learning_rate": 0.00018392530235363945, "lm_loss": 0.0064697265625, "loss": 0.0077, "step": 4476, "total_loss": 0.0064697265625 }, { "epoch": 1.83, "learning_rate": 0.00018391831796157873, "lm_loss": 0.006072998046875, "loss": 0.0081, "step": 4477, "total_loss": 0.006072998046875 }, { "epoch": 1.83, "learning_rate": 0.0001839113321851753, "lm_loss": 0.00299072265625, "loss": 0.0078, "step": 4478, "total_loss": 0.00299072265625 }, { "epoch": 1.83, "learning_rate": 0.00018390434502454442, "lm_loss": 0.0028228759765625, "loss": 0.0083, "step": 4479, "total_loss": 0.0028228759765625 }, { "epoch": 1.83, "learning_rate": 0.00018389735647980136, "lm_loss": 0.00872802734375, "loss": 0.0064, "step": 4480, "total_loss": 0.00872802734375 }, { "epoch": 1.83, "learning_rate": 0.0001838903665510614, "lm_loss": 0.004486083984375, "loss": 0.0067, "step": 4481, "total_loss": 0.004486083984375 }, { "epoch": 1.83, "learning_rate": 0.00018388337523843985, "lm_loss": 0.00787353515625, "loss": 0.0076, "step": 4482, "total_loss": 0.00787353515625 }, { "epoch": 1.83, "learning_rate": 0.00018387638254205207, "lm_loss": 0.0125732421875, "loss": 0.0064, "step": 4483, "total_loss": 0.0125732421875 }, { "epoch": 1.83, "learning_rate": 0.00018386938846201337, "lm_loss": 0.00897216796875, "loss": 0.0072, "step": 4484, "total_loss": 0.00897216796875 }, { "epoch": 1.83, "learning_rate": 0.00018386239299843913, "lm_loss": 0.00518798828125, "loss": 0.0052, "step": 4485, "total_loss": 0.00518798828125 }, { "epoch": 1.83, "learning_rate": 0.00018385539615144476, "lm_loss": 0.00701904296875, "loss": 0.0063, "step": 4486, "total_loss": 0.00701904296875 }, { "epoch": 1.83, "learning_rate": 0.0001838483979211457, "lm_loss": 0.00482177734375, "loss": 0.0077, "step": 4487, "total_loss": 0.00482177734375 }, { "epoch": 1.83, "learning_rate": 0.00018384139830765735, "lm_loss": 0.126953125, "loss": 0.0167, "step": 4488, "total_loss": 0.126953125 }, { "epoch": 1.84, "learning_rate": 0.00018383439731109525, "lm_loss": 0.01153564453125, "loss": 0.0091, "step": 4489, "total_loss": 0.01153564453125 }, { "epoch": 1.84, "learning_rate": 0.00018382739493157484, "lm_loss": 0.01153564453125, "loss": 0.0066, "step": 4490, "total_loss": 0.01153564453125 }, { "epoch": 1.84, "learning_rate": 0.00018382039116921162, "lm_loss": 0.00567626953125, "loss": 0.0068, "step": 4491, "total_loss": 0.00567626953125 }, { "epoch": 1.84, "learning_rate": 0.00018381338602412115, "lm_loss": 0.008056640625, "loss": 0.0074, "step": 4492, "total_loss": 0.008056640625 }, { "epoch": 1.84, "learning_rate": 0.00018380637949641903, "lm_loss": 0.00787353515625, "loss": 0.0079, "step": 4493, "total_loss": 0.00787353515625 }, { "epoch": 1.84, "learning_rate": 0.00018379937158622073, "lm_loss": 0.00823974609375, "loss": 0.0073, "step": 4494, "total_loss": 0.00823974609375 }, { "epoch": 1.84, "learning_rate": 0.00018379236229364195, "lm_loss": 0.0185546875, "loss": 0.0087, "step": 4495, "total_loss": 0.0185546875 }, { "epoch": 1.84, "learning_rate": 0.00018378535161879832, "lm_loss": 0.00390625, "loss": 0.0063, "step": 4496, "total_loss": 0.00390625 }, { "epoch": 1.84, "learning_rate": 0.0001837783395618054, "lm_loss": 0.0047607421875, "loss": 0.0062, "step": 4497, "total_loss": 0.0047607421875 }, { "epoch": 1.84, "learning_rate": 0.00018377132612277896, "lm_loss": 0.004486083984375, "loss": 0.0058, "step": 4498, "total_loss": 0.004486083984375 }, { "epoch": 1.84, "learning_rate": 0.00018376431130183468, "lm_loss": 0.0031585693359375, "loss": 0.0067, "step": 4499, "total_loss": 0.0031585693359375 }, { "epoch": 1.84, "learning_rate": 0.00018375729509908823, "lm_loss": 0.00701904296875, "loss": 0.0067, "step": 4500, "total_loss": 0.00701904296875 }, { "epoch": 1.84, "eval_lm_loss": 0.009700492955744267, "eval_loss": 0.010171051137149334, "eval_runtime": 44.0137, "eval_samples_per_second": 22.72, "eval_steps_per_second": 0.204, "eval_total_loss": 0.009700492955744267, "lm_loss": 0.00225830078125, "step": 4500, "total_loss": 0.00225830078125 }, { "epoch": 1.84, "learning_rate": 0.0001837502775146554, "lm_loss": 0.01019287109375, "loss": 0.0087, "step": 4501, "total_loss": 0.01019287109375 }, { "epoch": 1.84, "learning_rate": 0.0001837432585486519, "lm_loss": 0.00933837890625, "loss": 0.0073, "step": 4502, "total_loss": 0.00933837890625 }, { "epoch": 1.84, "learning_rate": 0.00018373623820119357, "lm_loss": 0.00848388671875, "loss": 0.0063, "step": 4503, "total_loss": 0.00848388671875 }, { "epoch": 1.84, "learning_rate": 0.00018372921647239618, "lm_loss": 0.00982666015625, "loss": 0.0098, "step": 4504, "total_loss": 0.00982666015625 }, { "epoch": 1.84, "learning_rate": 0.0001837221933623756, "lm_loss": 0.007415771484375, "loss": 0.0088, "step": 4505, "total_loss": 0.007415771484375 }, { "epoch": 1.84, "learning_rate": 0.0001837151688712477, "lm_loss": 0.006439208984375, "loss": 0.0082, "step": 4506, "total_loss": 0.006439208984375 }, { "epoch": 1.84, "learning_rate": 0.0001837081429991283, "lm_loss": 0.006622314453125, "loss": 0.0072, "step": 4507, "total_loss": 0.006622314453125 }, { "epoch": 1.84, "learning_rate": 0.0001837011157461333, "lm_loss": 0.00921630859375, "loss": 0.0081, "step": 4508, "total_loss": 0.00921630859375 }, { "epoch": 1.84, "learning_rate": 0.0001836940871123787, "lm_loss": 0.01104736328125, "loss": 0.007, "step": 4509, "total_loss": 0.01104736328125 }, { "epoch": 1.84, "learning_rate": 0.00018368705709798037, "lm_loss": 0.00653076171875, "loss": 0.0071, "step": 4510, "total_loss": 0.00653076171875 }, { "epoch": 1.84, "learning_rate": 0.00018368002570305433, "lm_loss": 0.007171630859375, "loss": 0.0067, "step": 4511, "total_loss": 0.007171630859375 }, { "epoch": 1.84, "learning_rate": 0.00018367299292771651, "lm_loss": 0.01129150390625, "loss": 0.0072, "step": 4512, "total_loss": 0.01129150390625 }, { "epoch": 1.85, "learning_rate": 0.000183665958772083, "lm_loss": 0.006378173828125, "loss": 0.0059, "step": 4513, "total_loss": 0.006378173828125 }, { "epoch": 1.85, "learning_rate": 0.0001836589232362698, "lm_loss": 0.0076904296875, "loss": 0.0063, "step": 4514, "total_loss": 0.0076904296875 }, { "epoch": 1.85, "learning_rate": 0.00018365188632039295, "lm_loss": 0.0118408203125, "loss": 0.0067, "step": 4515, "total_loss": 0.0118408203125 }, { "epoch": 1.85, "learning_rate": 0.0001836448480245686, "lm_loss": 0.002227783203125, "loss": 0.007, "step": 4516, "total_loss": 0.002227783203125 }, { "epoch": 1.85, "learning_rate": 0.00018363780834891277, "lm_loss": 0.00823974609375, "loss": 0.0087, "step": 4517, "total_loss": 0.00823974609375 }, { "epoch": 1.85, "learning_rate": 0.00018363076729354164, "lm_loss": 0.00445556640625, "loss": 0.0083, "step": 4518, "total_loss": 0.00445556640625 }, { "epoch": 1.85, "learning_rate": 0.00018362372485857133, "lm_loss": 0.003173828125, "loss": 0.0064, "step": 4519, "total_loss": 0.003173828125 }, { "epoch": 1.85, "learning_rate": 0.00018361668104411807, "lm_loss": 0.00909423828125, "loss": 0.0083, "step": 4520, "total_loss": 0.00909423828125 }, { "epoch": 1.85, "learning_rate": 0.00018360963585029803, "lm_loss": 0.0032196044921875, "loss": 0.0068, "step": 4521, "total_loss": 0.0032196044921875 }, { "epoch": 1.85, "learning_rate": 0.00018360258927722738, "lm_loss": 0.00732421875, "loss": 0.0071, "step": 4522, "total_loss": 0.00732421875 }, { "epoch": 1.85, "learning_rate": 0.00018359554132502243, "lm_loss": 0.006317138671875, "loss": 0.0078, "step": 4523, "total_loss": 0.006317138671875 }, { "epoch": 1.85, "learning_rate": 0.00018358849199379942, "lm_loss": 0.0142822265625, "loss": 0.0078, "step": 4524, "total_loss": 0.0142822265625 }, { "epoch": 1.85, "learning_rate": 0.00018358144128367463, "lm_loss": 0.005584716796875, "loss": 0.0063, "step": 4525, "total_loss": 0.005584716796875 }, { "epoch": 1.85, "learning_rate": 0.00018357438919476438, "lm_loss": 0.0064697265625, "loss": 0.007, "step": 4526, "total_loss": 0.0064697265625 }, { "epoch": 1.85, "learning_rate": 0.00018356733572718502, "lm_loss": 0.009521484375, "loss": 0.0063, "step": 4527, "total_loss": 0.009521484375 }, { "epoch": 1.85, "learning_rate": 0.00018356028088105286, "lm_loss": 0.0125732421875, "loss": 0.0093, "step": 4528, "total_loss": 0.0125732421875 }, { "epoch": 1.85, "learning_rate": 0.00018355322465648434, "lm_loss": 0.01239013671875, "loss": 0.0089, "step": 4529, "total_loss": 0.01239013671875 }, { "epoch": 1.85, "learning_rate": 0.00018354616705359582, "lm_loss": 0.006805419921875, "loss": 0.0065, "step": 4530, "total_loss": 0.006805419921875 }, { "epoch": 1.85, "learning_rate": 0.00018353910807250372, "lm_loss": 0.01025390625, "loss": 0.0055, "step": 4531, "total_loss": 0.01025390625 }, { "epoch": 1.85, "learning_rate": 0.0001835320477133245, "lm_loss": 0.01422119140625, "loss": 0.0072, "step": 4532, "total_loss": 0.01422119140625 }, { "epoch": 1.85, "learning_rate": 0.00018352498597617463, "lm_loss": 0.0050048828125, "loss": 0.0066, "step": 4533, "total_loss": 0.0050048828125 }, { "epoch": 1.85, "learning_rate": 0.00018351792286117063, "lm_loss": 0.005218505859375, "loss": 0.0054, "step": 4534, "total_loss": 0.005218505859375 }, { "epoch": 1.85, "learning_rate": 0.00018351085836842897, "lm_loss": 0.0106201171875, "loss": 0.0077, "step": 4535, "total_loss": 0.0106201171875 }, { "epoch": 1.85, "learning_rate": 0.0001835037924980662, "lm_loss": 0.0024566650390625, "loss": 0.0073, "step": 4536, "total_loss": 0.0024566650390625 }, { "epoch": 1.85, "learning_rate": 0.0001834967252501989, "lm_loss": 0.002777099609375, "loss": 0.007, "step": 4537, "total_loss": 0.002777099609375 }, { "epoch": 1.86, "learning_rate": 0.00018348965662494363, "lm_loss": 0.005828857421875, "loss": 0.0065, "step": 4538, "total_loss": 0.005828857421875 }, { "epoch": 1.86, "learning_rate": 0.00018348258662241701, "lm_loss": 0.003997802734375, "loss": 0.0083, "step": 4539, "total_loss": 0.003997802734375 }, { "epoch": 1.86, "learning_rate": 0.00018347551524273566, "lm_loss": 0.0010833740234375, "loss": 0.009, "step": 4540, "total_loss": 0.0010833740234375 }, { "epoch": 1.86, "learning_rate": 0.00018346844248601625, "lm_loss": 0.007598876953125, "loss": 0.0061, "step": 4541, "total_loss": 0.007598876953125 }, { "epoch": 1.86, "learning_rate": 0.00018346136835237543, "lm_loss": 0.005767822265625, "loss": 0.0072, "step": 4542, "total_loss": 0.005767822265625 }, { "epoch": 1.86, "learning_rate": 0.00018345429284192993, "lm_loss": 0.004486083984375, "loss": 0.0059, "step": 4543, "total_loss": 0.004486083984375 }, { "epoch": 1.86, "learning_rate": 0.00018344721595479644, "lm_loss": 0.021484375, "loss": 0.0078, "step": 4544, "total_loss": 0.021484375 }, { "epoch": 1.86, "learning_rate": 0.0001834401376910917, "lm_loss": 0.0069580078125, "loss": 0.0081, "step": 4545, "total_loss": 0.0069580078125 }, { "epoch": 1.86, "learning_rate": 0.0001834330580509325, "lm_loss": 0.005126953125, "loss": 0.0059, "step": 4546, "total_loss": 0.005126953125 }, { "epoch": 1.86, "learning_rate": 0.00018342597703443562, "lm_loss": 0.004791259765625, "loss": 0.007, "step": 4547, "total_loss": 0.004791259765625 }, { "epoch": 1.86, "learning_rate": 0.00018341889464171783, "lm_loss": 0.00872802734375, "loss": 0.0071, "step": 4548, "total_loss": 0.00872802734375 }, { "epoch": 1.86, "learning_rate": 0.00018341181087289603, "lm_loss": 0.00732421875, "loss": 0.0071, "step": 4549, "total_loss": 0.00732421875 }, { "epoch": 1.86, "learning_rate": 0.00018340472572808705, "lm_loss": 0.006988525390625, "loss": 0.0073, "step": 4550, "total_loss": 0.006988525390625 }, { "epoch": 1.86, "learning_rate": 0.00018339763920740773, "lm_loss": 0.006683349609375, "loss": 0.0066, "step": 4551, "total_loss": 0.006683349609375 }, { "epoch": 1.86, "learning_rate": 0.00018339055131097505, "lm_loss": 0.0084228515625, "loss": 0.0077, "step": 4552, "total_loss": 0.0084228515625 }, { "epoch": 1.86, "learning_rate": 0.00018338346203890585, "lm_loss": 0.01214599609375, "loss": 0.008, "step": 4553, "total_loss": 0.01214599609375 }, { "epoch": 1.86, "learning_rate": 0.00018337637139131713, "lm_loss": 0.006500244140625, "loss": 0.0068, "step": 4554, "total_loss": 0.006500244140625 }, { "epoch": 1.86, "learning_rate": 0.00018336927936832585, "lm_loss": 0.006378173828125, "loss": 0.0077, "step": 4555, "total_loss": 0.006378173828125 }, { "epoch": 1.86, "learning_rate": 0.000183362185970049, "lm_loss": 0.01043701171875, "loss": 0.008, "step": 4556, "total_loss": 0.01043701171875 }, { "epoch": 1.86, "learning_rate": 0.00018335509119660358, "lm_loss": 0.008056640625, "loss": 0.0072, "step": 4557, "total_loss": 0.008056640625 }, { "epoch": 1.86, "learning_rate": 0.00018334799504810665, "lm_loss": 0.00323486328125, "loss": 0.0078, "step": 4558, "total_loss": 0.00323486328125 }, { "epoch": 1.86, "learning_rate": 0.00018334089752467526, "lm_loss": 0.00933837890625, "loss": 0.0073, "step": 4559, "total_loss": 0.00933837890625 }, { "epoch": 1.86, "learning_rate": 0.0001833337986264265, "lm_loss": 0.00244140625, "loss": 0.0053, "step": 4560, "total_loss": 0.00244140625 }, { "epoch": 1.86, "learning_rate": 0.00018332669835347746, "lm_loss": 0.00433349609375, "loss": 0.0071, "step": 4561, "total_loss": 0.00433349609375 }, { "epoch": 1.87, "learning_rate": 0.0001833195967059453, "lm_loss": 0.0162353515625, "loss": 0.0065, "step": 4562, "total_loss": 0.0162353515625 }, { "epoch": 1.87, "learning_rate": 0.0001833124936839471, "lm_loss": 0.01068115234375, "loss": 0.0065, "step": 4563, "total_loss": 0.01068115234375 }, { "epoch": 1.87, "learning_rate": 0.00018330538928760013, "lm_loss": 0.0089111328125, "loss": 0.0061, "step": 4564, "total_loss": 0.0089111328125 }, { "epoch": 1.87, "learning_rate": 0.00018329828351702152, "lm_loss": 0.0078125, "loss": 0.0074, "step": 4565, "total_loss": 0.0078125 }, { "epoch": 1.87, "learning_rate": 0.00018329117637232848, "lm_loss": 0.0050048828125, "loss": 0.0079, "step": 4566, "total_loss": 0.0050048828125 }, { "epoch": 1.87, "learning_rate": 0.00018328406785363832, "lm_loss": 0.007568359375, "loss": 0.0081, "step": 4567, "total_loss": 0.007568359375 }, { "epoch": 1.87, "learning_rate": 0.00018327695796106826, "lm_loss": 0.0087890625, "loss": 0.0058, "step": 4568, "total_loss": 0.0087890625 }, { "epoch": 1.87, "learning_rate": 0.00018326984669473557, "lm_loss": 0.004638671875, "loss": 0.0066, "step": 4569, "total_loss": 0.004638671875 }, { "epoch": 1.87, "learning_rate": 0.0001832627340547576, "lm_loss": 0.0040283203125, "loss": 0.0069, "step": 4570, "total_loss": 0.0040283203125 }, { "epoch": 1.87, "learning_rate": 0.00018325562004125164, "lm_loss": 0.0096435546875, "loss": 0.0056, "step": 4571, "total_loss": 0.0096435546875 }, { "epoch": 1.87, "learning_rate": 0.00018324850465433506, "lm_loss": 0.004913330078125, "loss": 0.0066, "step": 4572, "total_loss": 0.004913330078125 }, { "epoch": 1.87, "learning_rate": 0.00018324138789412527, "lm_loss": 0.00823974609375, "loss": 0.007, "step": 4573, "total_loss": 0.00823974609375 }, { "epoch": 1.87, "learning_rate": 0.00018323426976073965, "lm_loss": 0.01348876953125, "loss": 0.0082, "step": 4574, "total_loss": 0.01348876953125 }, { "epoch": 1.87, "learning_rate": 0.00018322715025429563, "lm_loss": 0.0054931640625, "loss": 0.0064, "step": 4575, "total_loss": 0.0054931640625 }, { "epoch": 1.87, "learning_rate": 0.0001832200293749106, "lm_loss": 0.005096435546875, "loss": 0.0074, "step": 4576, "total_loss": 0.005096435546875 }, { "epoch": 1.87, "learning_rate": 0.0001832129071227021, "lm_loss": 0.00811767578125, "loss": 0.0069, "step": 4577, "total_loss": 0.00811767578125 }, { "epoch": 1.87, "learning_rate": 0.0001832057834977876, "lm_loss": 0.00439453125, "loss": 0.006, "step": 4578, "total_loss": 0.00439453125 }, { "epoch": 1.87, "learning_rate": 0.00018319865850028458, "lm_loss": 0.011474609375, "loss": 0.0074, "step": 4579, "total_loss": 0.011474609375 }, { "epoch": 1.87, "learning_rate": 0.00018319153213031063, "lm_loss": 0.0028839111328125, "loss": 0.0078, "step": 4580, "total_loss": 0.0028839111328125 }, { "epoch": 1.87, "learning_rate": 0.00018318440438798325, "lm_loss": 0.0059814453125, "loss": 0.0069, "step": 4581, "total_loss": 0.0059814453125 }, { "epoch": 1.87, "learning_rate": 0.00018317727527342008, "lm_loss": 0.01080322265625, "loss": 0.0084, "step": 4582, "total_loss": 0.01080322265625 }, { "epoch": 1.87, "learning_rate": 0.0001831701447867387, "lm_loss": 0.006072998046875, "loss": 0.0073, "step": 4583, "total_loss": 0.006072998046875 }, { "epoch": 1.87, "learning_rate": 0.00018316301292805668, "lm_loss": 0.004913330078125, "loss": 0.0069, "step": 4584, "total_loss": 0.004913330078125 }, { "epoch": 1.87, "learning_rate": 0.0001831558796974918, "lm_loss": 0.0068359375, "loss": 0.0073, "step": 4585, "total_loss": 0.0068359375 }, { "epoch": 1.87, "learning_rate": 0.00018314874509516162, "lm_loss": 0.00537109375, "loss": 0.0065, "step": 4586, "total_loss": 0.00537109375 }, { "epoch": 1.88, "learning_rate": 0.00018314160912118386, "lm_loss": 0.00469970703125, "loss": 0.0077, "step": 4587, "total_loss": 0.00469970703125 }, { "epoch": 1.88, "learning_rate": 0.00018313447177567628, "lm_loss": 0.005584716796875, "loss": 0.0072, "step": 4588, "total_loss": 0.005584716796875 }, { "epoch": 1.88, "learning_rate": 0.00018312733305875654, "lm_loss": 0.0089111328125, "loss": 0.0091, "step": 4589, "total_loss": 0.0089111328125 }, { "epoch": 1.88, "learning_rate": 0.00018312019297054248, "lm_loss": 0.006317138671875, "loss": 0.0072, "step": 4590, "total_loss": 0.006317138671875 }, { "epoch": 1.88, "learning_rate": 0.00018311305151115184, "lm_loss": 0.004791259765625, "loss": 0.006, "step": 4591, "total_loss": 0.004791259765625 }, { "epoch": 1.88, "learning_rate": 0.00018310590868070246, "lm_loss": 0.00469970703125, "loss": 0.0071, "step": 4592, "total_loss": 0.00469970703125 }, { "epoch": 1.88, "learning_rate": 0.00018309876447931213, "lm_loss": 0.007781982421875, "loss": 0.0059, "step": 4593, "total_loss": 0.007781982421875 }, { "epoch": 1.88, "learning_rate": 0.00018309161890709877, "lm_loss": 0.01190185546875, "loss": 0.008, "step": 4594, "total_loss": 0.01190185546875 }, { "epoch": 1.88, "learning_rate": 0.00018308447196418014, "lm_loss": 0.00299072265625, "loss": 0.0057, "step": 4595, "total_loss": 0.00299072265625 }, { "epoch": 1.88, "learning_rate": 0.00018307732365067426, "lm_loss": 0.005340576171875, "loss": 0.0077, "step": 4596, "total_loss": 0.005340576171875 }, { "epoch": 1.88, "learning_rate": 0.00018307017396669898, "lm_loss": 0.00445556640625, "loss": 0.0073, "step": 4597, "total_loss": 0.00445556640625 }, { "epoch": 1.88, "learning_rate": 0.0001830630229123723, "lm_loss": 0.005889892578125, "loss": 0.0068, "step": 4598, "total_loss": 0.005889892578125 }, { "epoch": 1.88, "learning_rate": 0.00018305587048781212, "lm_loss": 0.00811767578125, "loss": 0.0061, "step": 4599, "total_loss": 0.00811767578125 }, { "epoch": 1.88, "learning_rate": 0.00018304871669313645, "lm_loss": 0.00885009765625, "loss": 0.0084, "step": 4600, "total_loss": 0.00885009765625 }, { "epoch": 1.88, "eval_lm_loss": 0.009746807627379894, "eval_loss": 0.01018200907856226, "eval_runtime": 44.1121, "eval_samples_per_second": 22.67, "eval_steps_per_second": 0.204, "eval_total_loss": 0.009746807627379894, "lm_loss": 0.00109100341796875, "step": 4600, "total_loss": 0.00109100341796875 }, { "epoch": 1.88, "learning_rate": 0.00018304156152846332, "lm_loss": 0.00604248046875, "loss": 0.0052, "step": 4601, "total_loss": 0.00604248046875 }, { "epoch": 1.88, "learning_rate": 0.00018303440499391075, "lm_loss": 0.004974365234375, "loss": 0.0058, "step": 4602, "total_loss": 0.004974365234375 }, { "epoch": 1.88, "learning_rate": 0.0001830272470895968, "lm_loss": 0.006622314453125, "loss": 0.0087, "step": 4603, "total_loss": 0.006622314453125 }, { "epoch": 1.88, "learning_rate": 0.00018302008781563957, "lm_loss": 0.01300048828125, "loss": 0.0081, "step": 4604, "total_loss": 0.01300048828125 }, { "epoch": 1.88, "learning_rate": 0.0001830129271721571, "lm_loss": 0.0036163330078125, "loss": 0.0082, "step": 4605, "total_loss": 0.0036163330078125 }, { "epoch": 1.88, "learning_rate": 0.0001830057651592676, "lm_loss": 0.012451171875, "loss": 0.0073, "step": 4606, "total_loss": 0.012451171875 }, { "epoch": 1.88, "learning_rate": 0.00018299860177708915, "lm_loss": 0.007415771484375, "loss": 0.0065, "step": 4607, "total_loss": 0.007415771484375 }, { "epoch": 1.88, "learning_rate": 0.00018299143702573992, "lm_loss": 0.00421142578125, "loss": 0.0077, "step": 4608, "total_loss": 0.00421142578125 }, { "epoch": 1.88, "learning_rate": 0.00018298427090533814, "lm_loss": 0.01202392578125, "loss": 0.0068, "step": 4609, "total_loss": 0.01202392578125 }, { "epoch": 1.88, "learning_rate": 0.00018297710341600202, "lm_loss": 0.0106201171875, "loss": 0.007, "step": 4610, "total_loss": 0.0106201171875 }, { "epoch": 1.89, "learning_rate": 0.00018296993455784976, "lm_loss": 0.016845703125, "loss": 0.0068, "step": 4611, "total_loss": 0.016845703125 }, { "epoch": 1.89, "learning_rate": 0.00018296276433099967, "lm_loss": 0.006988525390625, "loss": 0.0062, "step": 4612, "total_loss": 0.006988525390625 }, { "epoch": 1.89, "learning_rate": 0.00018295559273557, "lm_loss": 0.0103759765625, "loss": 0.009, "step": 4613, "total_loss": 0.0103759765625 }, { "epoch": 1.89, "learning_rate": 0.00018294841977167906, "lm_loss": 0.0036163330078125, "loss": 0.0071, "step": 4614, "total_loss": 0.0036163330078125 }, { "epoch": 1.89, "learning_rate": 0.00018294124543944516, "lm_loss": 0.0084228515625, "loss": 0.0064, "step": 4615, "total_loss": 0.0084228515625 }, { "epoch": 1.89, "learning_rate": 0.0001829340697389867, "lm_loss": 0.00885009765625, "loss": 0.0076, "step": 4616, "total_loss": 0.00885009765625 }, { "epoch": 1.89, "learning_rate": 0.000182926892670422, "lm_loss": 0.00958251953125, "loss": 0.0076, "step": 4617, "total_loss": 0.00958251953125 }, { "epoch": 1.89, "learning_rate": 0.00018291971423386945, "lm_loss": 0.00811767578125, "loss": 0.0072, "step": 4618, "total_loss": 0.00811767578125 }, { "epoch": 1.89, "learning_rate": 0.00018291253442944753, "lm_loss": 0.006378173828125, "loss": 0.0076, "step": 4619, "total_loss": 0.006378173828125 }, { "epoch": 1.89, "learning_rate": 0.00018290535325727464, "lm_loss": 0.00762939453125, "loss": 0.0068, "step": 4620, "total_loss": 0.00762939453125 }, { "epoch": 1.89, "learning_rate": 0.00018289817071746924, "lm_loss": 0.003326416015625, "loss": 0.0069, "step": 4621, "total_loss": 0.003326416015625 }, { "epoch": 1.89, "learning_rate": 0.00018289098681014982, "lm_loss": 0.009521484375, "loss": 0.0047, "step": 4622, "total_loss": 0.009521484375 }, { "epoch": 1.89, "learning_rate": 0.00018288380153543493, "lm_loss": 0.01397705078125, "loss": 0.0067, "step": 4623, "total_loss": 0.01397705078125 }, { "epoch": 1.89, "learning_rate": 0.00018287661489344302, "lm_loss": 0.005218505859375, "loss": 0.0061, "step": 4624, "total_loss": 0.005218505859375 }, { "epoch": 1.89, "learning_rate": 0.00018286942688429267, "lm_loss": 0.007080078125, "loss": 0.0078, "step": 4625, "total_loss": 0.007080078125 }, { "epoch": 1.89, "learning_rate": 0.0001828622375081025, "lm_loss": 0.00811767578125, "loss": 0.0091, "step": 4626, "total_loss": 0.00811767578125 }, { "epoch": 1.89, "learning_rate": 0.00018285504676499107, "lm_loss": 0.0022125244140625, "loss": 0.0049, "step": 4627, "total_loss": 0.0022125244140625 }, { "epoch": 1.89, "learning_rate": 0.00018284785465507702, "lm_loss": 0.0098876953125, "loss": 0.0063, "step": 4628, "total_loss": 0.0098876953125 }, { "epoch": 1.89, "learning_rate": 0.00018284066117847897, "lm_loss": 0.006256103515625, "loss": 0.005, "step": 4629, "total_loss": 0.006256103515625 }, { "epoch": 1.89, "learning_rate": 0.0001828334663353156, "lm_loss": 0.004608154296875, "loss": 0.0062, "step": 4630, "total_loss": 0.004608154296875 }, { "epoch": 1.89, "learning_rate": 0.0001828262701257056, "lm_loss": 0.006591796875, "loss": 0.0071, "step": 4631, "total_loss": 0.006591796875 }, { "epoch": 1.89, "learning_rate": 0.00018281907254976768, "lm_loss": 0.00897216796875, "loss": 0.0093, "step": 4632, "total_loss": 0.00897216796875 }, { "epoch": 1.89, "learning_rate": 0.00018281187360762058, "lm_loss": 0.0106201171875, "loss": 0.0085, "step": 4633, "total_loss": 0.0106201171875 }, { "epoch": 1.89, "learning_rate": 0.00018280467329938302, "lm_loss": 0.0150146484375, "loss": 0.0091, "step": 4634, "total_loss": 0.0150146484375 }, { "epoch": 1.89, "learning_rate": 0.00018279747162517382, "lm_loss": 0.00592041015625, "loss": 0.0069, "step": 4635, "total_loss": 0.00592041015625 }, { "epoch": 1.9, "learning_rate": 0.00018279026858511176, "lm_loss": 0.0074462890625, "loss": 0.0068, "step": 4636, "total_loss": 0.0074462890625 }, { "epoch": 1.9, "learning_rate": 0.00018278306417931567, "lm_loss": 0.0113525390625, "loss": 0.0075, "step": 4637, "total_loss": 0.0113525390625 }, { "epoch": 1.9, "learning_rate": 0.00018277585840790442, "lm_loss": 0.01312255859375, "loss": 0.0068, "step": 4638, "total_loss": 0.01312255859375 }, { "epoch": 1.9, "learning_rate": 0.00018276865127099683, "lm_loss": 0.008056640625, "loss": 0.0075, "step": 4639, "total_loss": 0.008056640625 }, { "epoch": 1.9, "learning_rate": 0.0001827614427687118, "lm_loss": 0.005218505859375, "loss": 0.0061, "step": 4640, "total_loss": 0.005218505859375 }, { "epoch": 1.9, "learning_rate": 0.0001827542329011683, "lm_loss": 0.00689697265625, "loss": 0.0098, "step": 4641, "total_loss": 0.00689697265625 }, { "epoch": 1.9, "learning_rate": 0.00018274702166848518, "lm_loss": 0.01019287109375, "loss": 0.006, "step": 4642, "total_loss": 0.01019287109375 }, { "epoch": 1.9, "learning_rate": 0.00018273980907078147, "lm_loss": 0.00830078125, "loss": 0.0075, "step": 4643, "total_loss": 0.00830078125 }, { "epoch": 1.9, "learning_rate": 0.00018273259510817616, "lm_loss": 0.0059814453125, "loss": 0.0079, "step": 4644, "total_loss": 0.0059814453125 }, { "epoch": 1.9, "learning_rate": 0.00018272537978078815, "lm_loss": 0.004638671875, "loss": 0.0055, "step": 4645, "total_loss": 0.004638671875 }, { "epoch": 1.9, "learning_rate": 0.0001827181630887366, "lm_loss": 0.00836181640625, "loss": 0.0066, "step": 4646, "total_loss": 0.00836181640625 }, { "epoch": 1.9, "learning_rate": 0.00018271094503214042, "lm_loss": 0.00909423828125, "loss": 0.0066, "step": 4647, "total_loss": 0.00909423828125 }, { "epoch": 1.9, "learning_rate": 0.00018270372561111884, "lm_loss": 0.0103759765625, "loss": 0.0073, "step": 4648, "total_loss": 0.0103759765625 }, { "epoch": 1.9, "learning_rate": 0.00018269650482579083, "lm_loss": 0.007232666015625, "loss": 0.0057, "step": 4649, "total_loss": 0.007232666015625 }, { "epoch": 1.9, "learning_rate": 0.00018268928267627555, "lm_loss": 0.011474609375, "loss": 0.0096, "step": 4650, "total_loss": 0.011474609375 }, { "epoch": 1.9, "learning_rate": 0.00018268205916269212, "lm_loss": 0.01251220703125, "loss": 0.0073, "step": 4651, "total_loss": 0.01251220703125 }, { "epoch": 1.9, "learning_rate": 0.00018267483428515972, "lm_loss": 0.0042724609375, "loss": 0.0081, "step": 4652, "total_loss": 0.0042724609375 }, { "epoch": 1.9, "learning_rate": 0.00018266760804379754, "lm_loss": 0.0064697265625, "loss": 0.006, "step": 4653, "total_loss": 0.0064697265625 }, { "epoch": 1.9, "learning_rate": 0.00018266038043872478, "lm_loss": 0.0030975341796875, "loss": 0.0071, "step": 4654, "total_loss": 0.0030975341796875 }, { "epoch": 1.9, "learning_rate": 0.00018265315147006066, "lm_loss": 0.005767822265625, "loss": 0.0078, "step": 4655, "total_loss": 0.005767822265625 }, { "epoch": 1.9, "learning_rate": 0.00018264592113792442, "lm_loss": 0.00457763671875, "loss": 0.0064, "step": 4656, "total_loss": 0.00457763671875 }, { "epoch": 1.9, "learning_rate": 0.00018263868944243537, "lm_loss": 0.0022430419921875, "loss": 0.0077, "step": 4657, "total_loss": 0.0022430419921875 }, { "epoch": 1.9, "learning_rate": 0.0001826314563837128, "lm_loss": 0.00970458984375, "loss": 0.0079, "step": 4658, "total_loss": 0.00970458984375 }, { "epoch": 1.9, "learning_rate": 0.00018262422196187598, "lm_loss": 0.01031494140625, "loss": 0.0062, "step": 4659, "total_loss": 0.01031494140625 }, { "epoch": 1.91, "learning_rate": 0.00018261698617704432, "lm_loss": 0.0035400390625, "loss": 0.0062, "step": 4660, "total_loss": 0.0035400390625 }, { "epoch": 1.91, "learning_rate": 0.00018260974902933712, "lm_loss": 0.00384521484375, "loss": 0.0065, "step": 4661, "total_loss": 0.00384521484375 }, { "epoch": 1.91, "learning_rate": 0.00018260251051887384, "lm_loss": 0.0027618408203125, "loss": 0.0063, "step": 4662, "total_loss": 0.0027618408203125 }, { "epoch": 1.91, "learning_rate": 0.0001825952706457738, "lm_loss": 0.01190185546875, "loss": 0.007, "step": 4663, "total_loss": 0.01190185546875 }, { "epoch": 1.91, "learning_rate": 0.0001825880294101565, "lm_loss": 0.0084228515625, "loss": 0.0087, "step": 4664, "total_loss": 0.0084228515625 }, { "epoch": 1.91, "learning_rate": 0.00018258078681214135, "lm_loss": 0.00823974609375, "loss": 0.008, "step": 4665, "total_loss": 0.00823974609375 }, { "epoch": 1.91, "learning_rate": 0.00018257354285184787, "lm_loss": 0.005828857421875, "loss": 0.0062, "step": 4666, "total_loss": 0.005828857421875 }, { "epoch": 1.91, "learning_rate": 0.00018256629752939552, "lm_loss": 0.010498046875, "loss": 0.0069, "step": 4667, "total_loss": 0.010498046875 }, { "epoch": 1.91, "learning_rate": 0.00018255905084490383, "lm_loss": 0.00994873046875, "loss": 0.0071, "step": 4668, "total_loss": 0.00994873046875 }, { "epoch": 1.91, "learning_rate": 0.00018255180279849235, "lm_loss": 0.00689697265625, "loss": 0.0071, "step": 4669, "total_loss": 0.00689697265625 }, { "epoch": 1.91, "learning_rate": 0.00018254455339028068, "lm_loss": 0.0096435546875, "loss": 0.007, "step": 4670, "total_loss": 0.0096435546875 }, { "epoch": 1.91, "learning_rate": 0.00018253730262038836, "lm_loss": 0.01068115234375, "loss": 0.0077, "step": 4671, "total_loss": 0.01068115234375 }, { "epoch": 1.91, "learning_rate": 0.000182530050488935, "lm_loss": 0.0048828125, "loss": 0.0071, "step": 4672, "total_loss": 0.0048828125 }, { "epoch": 1.91, "learning_rate": 0.00018252279699604025, "lm_loss": 0.00946044921875, "loss": 0.0073, "step": 4673, "total_loss": 0.00946044921875 }, { "epoch": 1.91, "learning_rate": 0.00018251554214182377, "lm_loss": 0.0034942626953125, "loss": 0.0066, "step": 4674, "total_loss": 0.0034942626953125 }, { "epoch": 1.91, "learning_rate": 0.0001825082859264052, "lm_loss": 0.00665283203125, "loss": 0.0076, "step": 4675, "total_loss": 0.00665283203125 }, { "epoch": 1.91, "learning_rate": 0.00018250102834990433, "lm_loss": 0.004974365234375, "loss": 0.0078, "step": 4676, "total_loss": 0.004974365234375 }, { "epoch": 1.91, "learning_rate": 0.0001824937694124408, "lm_loss": 0.0072021484375, "loss": 0.006, "step": 4677, "total_loss": 0.0072021484375 }, { "epoch": 1.91, "learning_rate": 0.00018248650911413438, "lm_loss": 0.0045166015625, "loss": 0.0061, "step": 4678, "total_loss": 0.0045166015625 }, { "epoch": 1.91, "learning_rate": 0.00018247924745510484, "lm_loss": 0.00154876708984375, "loss": 0.0073, "step": 4679, "total_loss": 0.00154876708984375 }, { "epoch": 1.91, "learning_rate": 0.000182471984435472, "lm_loss": 0.005859375, "loss": 0.0072, "step": 4680, "total_loss": 0.005859375 }, { "epoch": 1.91, "learning_rate": 0.00018246472005535558, "lm_loss": 0.01055908203125, "loss": 0.0089, "step": 4681, "total_loss": 0.01055908203125 }, { "epoch": 1.91, "learning_rate": 0.00018245745431487556, "lm_loss": 0.007537841796875, "loss": 0.0065, "step": 4682, "total_loss": 0.007537841796875 }, { "epoch": 1.91, "learning_rate": 0.00018245018721415167, "lm_loss": 0.005096435546875, "loss": 0.0071, "step": 4683, "total_loss": 0.005096435546875 }, { "epoch": 1.91, "learning_rate": 0.00018244291875330385, "lm_loss": 0.0081787109375, "loss": 0.0071, "step": 4684, "total_loss": 0.0081787109375 }, { "epoch": 1.92, "learning_rate": 0.00018243564893245196, "lm_loss": 0.006744384765625, "loss": 0.007, "step": 4685, "total_loss": 0.006744384765625 }, { "epoch": 1.92, "learning_rate": 0.000182428377751716, "lm_loss": 0.0135498046875, "loss": 0.008, "step": 4686, "total_loss": 0.0135498046875 }, { "epoch": 1.92, "learning_rate": 0.00018242110521121585, "lm_loss": 0.003265380859375, "loss": 0.0073, "step": 4687, "total_loss": 0.003265380859375 }, { "epoch": 1.92, "learning_rate": 0.0001824138313110715, "lm_loss": 0.005615234375, "loss": 0.0086, "step": 4688, "total_loss": 0.005615234375 }, { "epoch": 1.92, "learning_rate": 0.00018240655605140295, "lm_loss": 0.00482177734375, "loss": 0.0069, "step": 4689, "total_loss": 0.00482177734375 }, { "epoch": 1.92, "learning_rate": 0.00018239927943233025, "lm_loss": 0.00946044921875, "loss": 0.0071, "step": 4690, "total_loss": 0.00946044921875 }, { "epoch": 1.92, "learning_rate": 0.00018239200145397333, "lm_loss": 0.00982666015625, "loss": 0.007, "step": 4691, "total_loss": 0.00982666015625 }, { "epoch": 1.92, "learning_rate": 0.00018238472211645233, "lm_loss": 0.0042724609375, "loss": 0.0069, "step": 4692, "total_loss": 0.0042724609375 }, { "epoch": 1.92, "learning_rate": 0.00018237744141988738, "lm_loss": 0.0059814453125, "loss": 0.006, "step": 4693, "total_loss": 0.0059814453125 }, { "epoch": 1.92, "learning_rate": 0.0001823701593643985, "lm_loss": 0.00958251953125, "loss": 0.0079, "step": 4694, "total_loss": 0.00958251953125 }, { "epoch": 1.92, "learning_rate": 0.0001823628759501058, "lm_loss": 0.00787353515625, "loss": 0.0055, "step": 4695, "total_loss": 0.00787353515625 }, { "epoch": 1.92, "learning_rate": 0.00018235559117712953, "lm_loss": 0.003448486328125, "loss": 0.0059, "step": 4696, "total_loss": 0.003448486328125 }, { "epoch": 1.92, "learning_rate": 0.00018234830504558976, "lm_loss": 0.00604248046875, "loss": 0.0079, "step": 4697, "total_loss": 0.00604248046875 }, { "epoch": 1.92, "learning_rate": 0.00018234101755560674, "lm_loss": 0.0034332275390625, "loss": 0.0074, "step": 4698, "total_loss": 0.0034332275390625 }, { "epoch": 1.92, "learning_rate": 0.00018233372870730068, "lm_loss": 0.00933837890625, "loss": 0.0065, "step": 4699, "total_loss": 0.00933837890625 }, { "epoch": 1.92, "learning_rate": 0.00018232643850079182, "lm_loss": 0.00372314453125, "loss": 0.0071, "step": 4700, "total_loss": 0.00372314453125 }, { "epoch": 1.92, "eval_lm_loss": 0.009319985285401344, "eval_loss": 0.009750260971486568, "eval_runtime": 44.1299, "eval_samples_per_second": 22.66, "eval_steps_per_second": 0.204, "eval_total_loss": 0.009319985285401344, "lm_loss": 0.00115203857421875, "step": 4700, "total_loss": 0.00115203857421875 }, { "epoch": 1.92, "learning_rate": 0.0001823191469362004, "lm_loss": 0.0069580078125, "loss": 0.0072, "step": 4701, "total_loss": 0.0069580078125 }, { "epoch": 1.92, "learning_rate": 0.00018231185401364674, "lm_loss": 0.00439453125, "loss": 0.0056, "step": 4702, "total_loss": 0.00439453125 }, { "epoch": 1.92, "learning_rate": 0.00018230455973325112, "lm_loss": 0.006378173828125, "loss": 0.0056, "step": 4703, "total_loss": 0.006378173828125 }, { "epoch": 1.92, "learning_rate": 0.00018229726409513388, "lm_loss": 0.016357421875, "loss": 0.0085, "step": 4704, "total_loss": 0.016357421875 }, { "epoch": 1.92, "learning_rate": 0.00018228996709941534, "lm_loss": 0.01416015625, "loss": 0.0089, "step": 4705, "total_loss": 0.01416015625 }, { "epoch": 1.92, "learning_rate": 0.0001822826687462159, "lm_loss": 0.003997802734375, "loss": 0.0065, "step": 4706, "total_loss": 0.003997802734375 }, { "epoch": 1.92, "learning_rate": 0.00018227536903565596, "lm_loss": 0.005035400390625, "loss": 0.0076, "step": 4707, "total_loss": 0.005035400390625 }, { "epoch": 1.92, "learning_rate": 0.00018226806796785595, "lm_loss": 0.00579833984375, "loss": 0.0057, "step": 4708, "total_loss": 0.00579833984375 }, { "epoch": 1.93, "learning_rate": 0.0001822607655429363, "lm_loss": 0.007354736328125, "loss": 0.0073, "step": 4709, "total_loss": 0.007354736328125 }, { "epoch": 1.93, "learning_rate": 0.00018225346176101742, "lm_loss": 0.006683349609375, "loss": 0.0075, "step": 4710, "total_loss": 0.006683349609375 }, { "epoch": 1.93, "learning_rate": 0.0001822461566222199, "lm_loss": 0.006072998046875, "loss": 0.0067, "step": 4711, "total_loss": 0.006072998046875 }, { "epoch": 1.93, "learning_rate": 0.00018223885012666415, "lm_loss": 0.0072021484375, "loss": 0.008, "step": 4712, "total_loss": 0.0072021484375 }, { "epoch": 1.93, "learning_rate": 0.00018223154227447075, "lm_loss": 0.00677490234375, "loss": 0.0058, "step": 4713, "total_loss": 0.00677490234375 }, { "epoch": 1.93, "learning_rate": 0.00018222423306576025, "lm_loss": 0.005401611328125, "loss": 0.0054, "step": 4714, "total_loss": 0.005401611328125 }, { "epoch": 1.93, "learning_rate": 0.0001822169225006532, "lm_loss": 0.01275634765625, "loss": 0.009, "step": 4715, "total_loss": 0.01275634765625 }, { "epoch": 1.93, "learning_rate": 0.00018220961057927024, "lm_loss": 0.00482177734375, "loss": 0.006, "step": 4716, "total_loss": 0.00482177734375 }, { "epoch": 1.93, "learning_rate": 0.00018220229730173194, "lm_loss": 0.00347900390625, "loss": 0.008, "step": 4717, "total_loss": 0.00347900390625 }, { "epoch": 1.93, "learning_rate": 0.000182194982668159, "lm_loss": 0.004425048828125, "loss": 0.0059, "step": 4718, "total_loss": 0.004425048828125 }, { "epoch": 1.93, "learning_rate": 0.000182187666678672, "lm_loss": 0.0030975341796875, "loss": 0.0071, "step": 4719, "total_loss": 0.0030975341796875 }, { "epoch": 1.93, "learning_rate": 0.00018218034933339174, "lm_loss": 0.0038604736328125, "loss": 0.0079, "step": 4720, "total_loss": 0.0038604736328125 }, { "epoch": 1.93, "learning_rate": 0.00018217303063243882, "lm_loss": 0.01287841796875, "loss": 0.0077, "step": 4721, "total_loss": 0.01287841796875 }, { "epoch": 1.93, "learning_rate": 0.00018216571057593406, "lm_loss": 0.0081787109375, "loss": 0.0066, "step": 4722, "total_loss": 0.0081787109375 }, { "epoch": 1.93, "learning_rate": 0.00018215838916399815, "lm_loss": 0.00970458984375, "loss": 0.0069, "step": 4723, "total_loss": 0.00970458984375 }, { "epoch": 1.93, "learning_rate": 0.0001821510663967519, "lm_loss": 0.0052490234375, "loss": 0.0069, "step": 4724, "total_loss": 0.0052490234375 }, { "epoch": 1.93, "learning_rate": 0.00018214374227431605, "lm_loss": 0.01092529296875, "loss": 0.0071, "step": 4725, "total_loss": 0.01092529296875 }, { "epoch": 1.93, "learning_rate": 0.00018213641679681153, "lm_loss": 0.0167236328125, "loss": 0.0091, "step": 4726, "total_loss": 0.0167236328125 }, { "epoch": 1.93, "learning_rate": 0.0001821290899643591, "lm_loss": 0.004364013671875, "loss": 0.0058, "step": 4727, "total_loss": 0.004364013671875 }, { "epoch": 1.93, "learning_rate": 0.00018212176177707964, "lm_loss": 0.0037994384765625, "loss": 0.0066, "step": 4728, "total_loss": 0.0037994384765625 }, { "epoch": 1.93, "learning_rate": 0.00018211443223509406, "lm_loss": 0.01531982421875, "loss": 0.0095, "step": 4729, "total_loss": 0.01531982421875 }, { "epoch": 1.93, "learning_rate": 0.00018210710133852325, "lm_loss": 0.0166015625, "loss": 0.0079, "step": 4730, "total_loss": 0.0166015625 }, { "epoch": 1.93, "learning_rate": 0.00018209976908748813, "lm_loss": 0.005157470703125, "loss": 0.0071, "step": 4731, "total_loss": 0.005157470703125 }, { "epoch": 1.93, "learning_rate": 0.00018209243548210973, "lm_loss": 0.006744384765625, "loss": 0.0068, "step": 4732, "total_loss": 0.006744384765625 }, { "epoch": 1.93, "learning_rate": 0.0001820851005225089, "lm_loss": 0.01092529296875, "loss": 0.0085, "step": 4733, "total_loss": 0.01092529296875 }, { "epoch": 1.94, "learning_rate": 0.00018207776420880675, "lm_loss": 0.006622314453125, "loss": 0.0078, "step": 4734, "total_loss": 0.006622314453125 }, { "epoch": 1.94, "learning_rate": 0.00018207042654112428, "lm_loss": 0.005828857421875, "loss": 0.0058, "step": 4735, "total_loss": 0.005828857421875 }, { "epoch": 1.94, "learning_rate": 0.00018206308751958248, "lm_loss": 0.0034027099609375, "loss": 0.0071, "step": 4736, "total_loss": 0.0034027099609375 }, { "epoch": 1.94, "learning_rate": 0.00018205574714430246, "lm_loss": 0.00811767578125, "loss": 0.0057, "step": 4737, "total_loss": 0.00811767578125 }, { "epoch": 1.94, "learning_rate": 0.00018204840541540532, "lm_loss": 0.006683349609375, "loss": 0.0095, "step": 4738, "total_loss": 0.006683349609375 }, { "epoch": 1.94, "learning_rate": 0.00018204106233301218, "lm_loss": 0.0098876953125, "loss": 0.0082, "step": 4739, "total_loss": 0.0098876953125 }, { "epoch": 1.94, "learning_rate": 0.00018203371789724413, "lm_loss": 0.00982666015625, "loss": 0.0076, "step": 4740, "total_loss": 0.00982666015625 }, { "epoch": 1.94, "learning_rate": 0.0001820263721082223, "lm_loss": 0.002655029296875, "loss": 0.008, "step": 4741, "total_loss": 0.002655029296875 }, { "epoch": 1.94, "learning_rate": 0.00018201902496606798, "lm_loss": 0.00830078125, "loss": 0.0056, "step": 4742, "total_loss": 0.00830078125 }, { "epoch": 1.94, "learning_rate": 0.00018201167647090226, "lm_loss": 0.01031494140625, "loss": 0.007, "step": 4743, "total_loss": 0.01031494140625 }, { "epoch": 1.94, "learning_rate": 0.00018200432662284643, "lm_loss": 0.00439453125, "loss": 0.0061, "step": 4744, "total_loss": 0.00439453125 }, { "epoch": 1.94, "learning_rate": 0.0001819969754220217, "lm_loss": 0.0028228759765625, "loss": 0.0065, "step": 4745, "total_loss": 0.0028228759765625 }, { "epoch": 1.94, "learning_rate": 0.00018198962286854938, "lm_loss": 0.005218505859375, "loss": 0.0077, "step": 4746, "total_loss": 0.005218505859375 }, { "epoch": 1.94, "learning_rate": 0.00018198226896255068, "lm_loss": 0.00250244140625, "loss": 0.0071, "step": 4747, "total_loss": 0.00250244140625 }, { "epoch": 1.94, "learning_rate": 0.00018197491370414701, "lm_loss": 0.004486083984375, "loss": 0.0072, "step": 4748, "total_loss": 0.004486083984375 }, { "epoch": 1.94, "learning_rate": 0.00018196755709345966, "lm_loss": 0.00457763671875, "loss": 0.0064, "step": 4749, "total_loss": 0.00457763671875 }, { "epoch": 1.94, "learning_rate": 0.00018196019913060995, "lm_loss": 0.004364013671875, "loss": 0.0074, "step": 4750, "total_loss": 0.004364013671875 }, { "epoch": 1.94, "learning_rate": 0.0001819528398157193, "lm_loss": 0.006988525390625, "loss": 0.0083, "step": 4751, "total_loss": 0.006988525390625 }, { "epoch": 1.94, "learning_rate": 0.00018194547914890911, "lm_loss": 0.0098876953125, "loss": 0.0067, "step": 4752, "total_loss": 0.0098876953125 }, { "epoch": 1.94, "learning_rate": 0.00018193811713030083, "lm_loss": 0.004425048828125, "loss": 0.0093, "step": 4753, "total_loss": 0.004425048828125 }, { "epoch": 1.94, "learning_rate": 0.00018193075376001582, "lm_loss": 0.0091552734375, "loss": 0.0079, "step": 4754, "total_loss": 0.0091552734375 }, { "epoch": 1.94, "learning_rate": 0.00018192338903817566, "lm_loss": 0.00830078125, "loss": 0.0072, "step": 4755, "total_loss": 0.00830078125 }, { "epoch": 1.94, "learning_rate": 0.00018191602296490172, "lm_loss": 0.006988525390625, "loss": 0.0069, "step": 4756, "total_loss": 0.006988525390625 }, { "epoch": 1.94, "learning_rate": 0.00018190865554031563, "lm_loss": 0.0084228515625, "loss": 0.0082, "step": 4757, "total_loss": 0.0084228515625 }, { "epoch": 1.95, "learning_rate": 0.00018190128676453886, "lm_loss": 0.005035400390625, "loss": 0.0066, "step": 4758, "total_loss": 0.005035400390625 }, { "epoch": 1.95, "learning_rate": 0.00018189391663769297, "lm_loss": 0.0025787353515625, "loss": 0.0065, "step": 4759, "total_loss": 0.0025787353515625 }, { "epoch": 1.95, "learning_rate": 0.00018188654515989956, "lm_loss": 0.006134033203125, "loss": 0.0066, "step": 4760, "total_loss": 0.006134033203125 }, { "epoch": 1.95, "learning_rate": 0.0001818791723312802, "lm_loss": 0.00604248046875, "loss": 0.0069, "step": 4761, "total_loss": 0.00604248046875 }, { "epoch": 1.95, "learning_rate": 0.00018187179815195656, "lm_loss": 0.0113525390625, "loss": 0.0068, "step": 4762, "total_loss": 0.0113525390625 }, { "epoch": 1.95, "learning_rate": 0.0001818644226220503, "lm_loss": 0.0032196044921875, "loss": 0.0067, "step": 4763, "total_loss": 0.0032196044921875 }, { "epoch": 1.95, "learning_rate": 0.00018185704574168301, "lm_loss": 0.005706787109375, "loss": 0.008, "step": 4764, "total_loss": 0.005706787109375 }, { "epoch": 1.95, "learning_rate": 0.00018184966751097644, "lm_loss": 0.009033203125, "loss": 0.0074, "step": 4765, "total_loss": 0.009033203125 }, { "epoch": 1.95, "learning_rate": 0.00018184228793005224, "lm_loss": 0.00640869140625, "loss": 0.0066, "step": 4766, "total_loss": 0.00640869140625 }, { "epoch": 1.95, "learning_rate": 0.00018183490699903224, "lm_loss": 0.00872802734375, "loss": 0.0078, "step": 4767, "total_loss": 0.00872802734375 }, { "epoch": 1.95, "learning_rate": 0.00018182752471803814, "lm_loss": 0.00848388671875, "loss": 0.0076, "step": 4768, "total_loss": 0.00848388671875 }, { "epoch": 1.95, "learning_rate": 0.00018182014108719175, "lm_loss": 0.004791259765625, "loss": 0.0071, "step": 4769, "total_loss": 0.004791259765625 }, { "epoch": 1.95, "learning_rate": 0.00018181275610661485, "lm_loss": 0.004058837890625, "loss": 0.0081, "step": 4770, "total_loss": 0.004058837890625 }, { "epoch": 1.95, "learning_rate": 0.00018180536977642923, "lm_loss": 0.006683349609375, "loss": 0.0054, "step": 4771, "total_loss": 0.006683349609375 }, { "epoch": 1.95, "learning_rate": 0.0001817979820967568, "lm_loss": 0.00738525390625, "loss": 0.007, "step": 4772, "total_loss": 0.00738525390625 }, { "epoch": 1.95, "learning_rate": 0.00018179059306771943, "lm_loss": 0.003173828125, "loss": 0.0059, "step": 4773, "total_loss": 0.003173828125 }, { "epoch": 1.95, "learning_rate": 0.00018178320268943897, "lm_loss": 0.00457763671875, "loss": 0.0072, "step": 4774, "total_loss": 0.00457763671875 }, { "epoch": 1.95, "learning_rate": 0.00018177581096203736, "lm_loss": 0.01092529296875, "loss": 0.0073, "step": 4775, "total_loss": 0.01092529296875 }, { "epoch": 1.95, "learning_rate": 0.00018176841788563653, "lm_loss": 0.00909423828125, "loss": 0.006, "step": 4776, "total_loss": 0.00909423828125 }, { "epoch": 1.95, "learning_rate": 0.0001817610234603584, "lm_loss": 0.012939453125, "loss": 0.0072, "step": 4777, "total_loss": 0.012939453125 }, { "epoch": 1.95, "learning_rate": 0.00018175362768632503, "lm_loss": 0.008056640625, "loss": 0.0072, "step": 4778, "total_loss": 0.008056640625 }, { "epoch": 1.95, "learning_rate": 0.00018174623056365835, "lm_loss": 0.0057373046875, "loss": 0.0069, "step": 4779, "total_loss": 0.0057373046875 }, { "epoch": 1.95, "learning_rate": 0.00018173883209248045, "lm_loss": 0.004241943359375, "loss": 0.0077, "step": 4780, "total_loss": 0.004241943359375 }, { "epoch": 1.95, "learning_rate": 0.00018173143227291332, "lm_loss": 0.006317138671875, "loss": 0.0072, "step": 4781, "total_loss": 0.006317138671875 }, { "epoch": 1.96, "learning_rate": 0.00018172403110507904, "lm_loss": 0.0035247802734375, "loss": 0.0072, "step": 4782, "total_loss": 0.0035247802734375 }, { "epoch": 1.96, "learning_rate": 0.00018171662858909975, "lm_loss": 0.00567626953125, "loss": 0.0064, "step": 4783, "total_loss": 0.00567626953125 }, { "epoch": 1.96, "learning_rate": 0.0001817092247250975, "lm_loss": 0.00592041015625, "loss": 0.009, "step": 4784, "total_loss": 0.00592041015625 }, { "epoch": 1.96, "learning_rate": 0.0001817018195131945, "lm_loss": 0.00421142578125, "loss": 0.0054, "step": 4785, "total_loss": 0.00421142578125 }, { "epoch": 1.96, "learning_rate": 0.00018169441295351281, "lm_loss": 0.00775146484375, "loss": 0.0083, "step": 4786, "total_loss": 0.00775146484375 }, { "epoch": 1.96, "learning_rate": 0.0001816870050461747, "lm_loss": 0.010498046875, "loss": 0.0065, "step": 4787, "total_loss": 0.010498046875 }, { "epoch": 1.96, "learning_rate": 0.00018167959579130233, "lm_loss": 0.0096435546875, "loss": 0.0084, "step": 4788, "total_loss": 0.0096435546875 }, { "epoch": 1.96, "learning_rate": 0.00018167218518901792, "lm_loss": 0.00762939453125, "loss": 0.0067, "step": 4789, "total_loss": 0.00762939453125 }, { "epoch": 1.96, "learning_rate": 0.00018166477323944375, "lm_loss": 0.002410888671875, "loss": 0.0063, "step": 4790, "total_loss": 0.002410888671875 }, { "epoch": 1.96, "learning_rate": 0.00018165735994270205, "lm_loss": 0.01007080078125, "loss": 0.0076, "step": 4791, "total_loss": 0.01007080078125 }, { "epoch": 1.96, "learning_rate": 0.00018164994529891514, "lm_loss": 0.00543212890625, "loss": 0.0077, "step": 4792, "total_loss": 0.00543212890625 }, { "epoch": 1.96, "learning_rate": 0.00018164252930820535, "lm_loss": 0.00494384765625, "loss": 0.0066, "step": 4793, "total_loss": 0.00494384765625 }, { "epoch": 1.96, "learning_rate": 0.00018163511197069498, "lm_loss": 0.00408935546875, "loss": 0.0075, "step": 4794, "total_loss": 0.00408935546875 }, { "epoch": 1.96, "learning_rate": 0.00018162769328650642, "lm_loss": 0.0050048828125, "loss": 0.007, "step": 4795, "total_loss": 0.0050048828125 }, { "epoch": 1.96, "learning_rate": 0.000181620273255762, "lm_loss": 0.00799560546875, "loss": 0.0064, "step": 4796, "total_loss": 0.00799560546875 }, { "epoch": 1.96, "learning_rate": 0.0001816128518785842, "lm_loss": 0.01116943359375, "loss": 0.0088, "step": 4797, "total_loss": 0.01116943359375 }, { "epoch": 1.96, "learning_rate": 0.00018160542915509535, "lm_loss": 0.004974365234375, "loss": 0.0057, "step": 4798, "total_loss": 0.004974365234375 }, { "epoch": 1.96, "learning_rate": 0.00018159800508541797, "lm_loss": 0.00823974609375, "loss": 0.0065, "step": 4799, "total_loss": 0.00823974609375 }, { "epoch": 1.96, "learning_rate": 0.00018159057966967449, "lm_loss": 0.0027008056640625, "loss": 0.0088, "step": 4800, "total_loss": 0.0027008056640625 }, { "epoch": 1.96, "eval_lm_loss": 0.009422020055353642, "eval_loss": 0.009859314188361168, "eval_runtime": 43.9688, "eval_samples_per_second": 22.743, "eval_steps_per_second": 0.205, "eval_total_loss": 0.009422020055353642, "lm_loss": 0.00168609619140625, "step": 4800, "total_loss": 0.00168609619140625 }, { "epoch": 1.96, "learning_rate": 0.00018158315290798746, "lm_loss": 0.006866455078125, "loss": 0.0064, "step": 4801, "total_loss": 0.006866455078125 }, { "epoch": 1.96, "learning_rate": 0.00018157572480047936, "lm_loss": 0.006134033203125, "loss": 0.0091, "step": 4802, "total_loss": 0.006134033203125 }, { "epoch": 1.96, "learning_rate": 0.0001815682953472727, "lm_loss": 0.01275634765625, "loss": 0.0063, "step": 4803, "total_loss": 0.01275634765625 }, { "epoch": 1.96, "learning_rate": 0.00018156086454849005, "lm_loss": 0.0042724609375, "loss": 0.0076, "step": 4804, "total_loss": 0.0042724609375 }, { "epoch": 1.96, "learning_rate": 0.00018155343240425402, "lm_loss": 0.00555419921875, "loss": 0.0067, "step": 4805, "total_loss": 0.00555419921875 }, { "epoch": 1.96, "learning_rate": 0.00018154599891468718, "lm_loss": 0.00457763671875, "loss": 0.0073, "step": 4806, "total_loss": 0.00457763671875 }, { "epoch": 1.97, "learning_rate": 0.0001815385640799122, "lm_loss": 0.00921630859375, "loss": 0.0082, "step": 4807, "total_loss": 0.00921630859375 }, { "epoch": 1.97, "learning_rate": 0.00018153112790005165, "lm_loss": 0.006805419921875, "loss": 0.0078, "step": 4808, "total_loss": 0.006805419921875 }, { "epoch": 1.97, "learning_rate": 0.0001815236903752283, "lm_loss": 0.0037994384765625, "loss": 0.0081, "step": 4809, "total_loss": 0.0037994384765625 }, { "epoch": 1.97, "learning_rate": 0.00018151625150556475, "lm_loss": 0.00616455078125, "loss": 0.0083, "step": 4810, "total_loss": 0.00616455078125 }, { "epoch": 1.97, "learning_rate": 0.00018150881129118377, "lm_loss": 0.00732421875, "loss": 0.0063, "step": 4811, "total_loss": 0.00732421875 }, { "epoch": 1.97, "learning_rate": 0.00018150136973220805, "lm_loss": 0.00677490234375, "loss": 0.0079, "step": 4812, "total_loss": 0.00677490234375 }, { "epoch": 1.97, "learning_rate": 0.0001814939268287604, "lm_loss": 0.0081787109375, "loss": 0.0074, "step": 4813, "total_loss": 0.0081787109375 }, { "epoch": 1.97, "learning_rate": 0.00018148648258096357, "lm_loss": 0.0050048828125, "loss": 0.0072, "step": 4814, "total_loss": 0.0050048828125 }, { "epoch": 1.97, "learning_rate": 0.00018147903698894038, "lm_loss": 0.005889892578125, "loss": 0.0092, "step": 4815, "total_loss": 0.005889892578125 }, { "epoch": 1.97, "learning_rate": 0.00018147159005281363, "lm_loss": 0.01239013671875, "loss": 0.008, "step": 4816, "total_loss": 0.01239013671875 }, { "epoch": 1.97, "learning_rate": 0.00018146414177270617, "lm_loss": 0.0054931640625, "loss": 0.0085, "step": 4817, "total_loss": 0.0054931640625 }, { "epoch": 1.97, "learning_rate": 0.00018145669214874092, "lm_loss": 0.0108642578125, "loss": 0.0077, "step": 4818, "total_loss": 0.0108642578125 }, { "epoch": 1.97, "learning_rate": 0.00018144924118104072, "lm_loss": 0.01055908203125, "loss": 0.0082, "step": 4819, "total_loss": 0.01055908203125 }, { "epoch": 1.97, "learning_rate": 0.00018144178886972848, "lm_loss": 0.00579833984375, "loss": 0.0079, "step": 4820, "total_loss": 0.00579833984375 }, { "epoch": 1.97, "learning_rate": 0.00018143433521492717, "lm_loss": 0.00506591796875, "loss": 0.007, "step": 4821, "total_loss": 0.00506591796875 }, { "epoch": 1.97, "learning_rate": 0.00018142688021675972, "lm_loss": 0.005126953125, "loss": 0.0081, "step": 4822, "total_loss": 0.005126953125 }, { "epoch": 1.97, "learning_rate": 0.0001814194238753491, "lm_loss": 0.00921630859375, "loss": 0.0102, "step": 4823, "total_loss": 0.00921630859375 }, { "epoch": 1.97, "learning_rate": 0.00018141196619081833, "lm_loss": 0.005950927734375, "loss": 0.0073, "step": 4824, "total_loss": 0.005950927734375 }, { "epoch": 1.97, "learning_rate": 0.00018140450716329045, "lm_loss": 0.0079345703125, "loss": 0.0075, "step": 4825, "total_loss": 0.0079345703125 }, { "epoch": 1.97, "learning_rate": 0.0001813970467928885, "lm_loss": 0.006500244140625, "loss": 0.0059, "step": 4826, "total_loss": 0.006500244140625 }, { "epoch": 1.97, "learning_rate": 0.00018138958507973554, "lm_loss": 0.0047607421875, "loss": 0.0062, "step": 4827, "total_loss": 0.0047607421875 }, { "epoch": 1.97, "learning_rate": 0.0001813821220239546, "lm_loss": 0.0101318359375, "loss": 0.0074, "step": 4828, "total_loss": 0.0101318359375 }, { "epoch": 1.97, "learning_rate": 0.00018137465762566894, "lm_loss": 0.002685546875, "loss": 0.0061, "step": 4829, "total_loss": 0.002685546875 }, { "epoch": 1.97, "learning_rate": 0.00018136719188500158, "lm_loss": 0.0067138671875, "loss": 0.0072, "step": 4830, "total_loss": 0.0067138671875 }, { "epoch": 1.98, "learning_rate": 0.0001813597248020757, "lm_loss": 0.010498046875, "loss": 0.0082, "step": 4831, "total_loss": 0.010498046875 }, { "epoch": 1.98, "learning_rate": 0.00018135225637701447, "lm_loss": 0.010986328125, "loss": 0.0066, "step": 4832, "total_loss": 0.010986328125 }, { "epoch": 1.98, "learning_rate": 0.00018134478660994114, "lm_loss": 0.006683349609375, "loss": 0.0047, "step": 4833, "total_loss": 0.006683349609375 }, { "epoch": 1.98, "learning_rate": 0.00018133731550097886, "lm_loss": 0.005218505859375, "loss": 0.0065, "step": 4834, "total_loss": 0.005218505859375 }, { "epoch": 1.98, "learning_rate": 0.00018132984305025093, "lm_loss": 0.01171875, "loss": 0.0077, "step": 4835, "total_loss": 0.01171875 }, { "epoch": 1.98, "learning_rate": 0.00018132236925788063, "lm_loss": 0.006591796875, "loss": 0.008, "step": 4836, "total_loss": 0.006591796875 }, { "epoch": 1.98, "learning_rate": 0.00018131489412399117, "lm_loss": 0.007720947265625, "loss": 0.0054, "step": 4837, "total_loss": 0.007720947265625 }, { "epoch": 1.98, "learning_rate": 0.000181307417648706, "lm_loss": 0.006500244140625, "loss": 0.0074, "step": 4838, "total_loss": 0.006500244140625 }, { "epoch": 1.98, "learning_rate": 0.0001812999398321483, "lm_loss": 0.0032501220703125, "loss": 0.0058, "step": 4839, "total_loss": 0.0032501220703125 }, { "epoch": 1.98, "learning_rate": 0.00018129246067444152, "lm_loss": 0.00787353515625, "loss": 0.0077, "step": 4840, "total_loss": 0.00787353515625 }, { "epoch": 1.98, "learning_rate": 0.000181284980175709, "lm_loss": 0.0162353515625, "loss": 0.0092, "step": 4841, "total_loss": 0.0162353515625 }, { "epoch": 1.98, "learning_rate": 0.00018127749833607418, "lm_loss": 0.0050048828125, "loss": 0.0085, "step": 4842, "total_loss": 0.0050048828125 }, { "epoch": 1.98, "learning_rate": 0.00018127001515566048, "lm_loss": 0.0024566650390625, "loss": 0.0065, "step": 4843, "total_loss": 0.0024566650390625 }, { "epoch": 1.98, "learning_rate": 0.00018126253063459127, "lm_loss": 0.00830078125, "loss": 0.0086, "step": 4844, "total_loss": 0.00830078125 }, { "epoch": 1.98, "learning_rate": 0.00018125504477299007, "lm_loss": 0.0142822265625, "loss": 0.0076, "step": 4845, "total_loss": 0.0142822265625 }, { "epoch": 1.98, "learning_rate": 0.0001812475575709804, "lm_loss": 0.007049560546875, "loss": 0.0067, "step": 4846, "total_loss": 0.007049560546875 }, { "epoch": 1.98, "learning_rate": 0.00018124006902868573, "lm_loss": 0.00634765625, "loss": 0.0082, "step": 4847, "total_loss": 0.00634765625 }, { "epoch": 1.98, "learning_rate": 0.00018123257914622962, "lm_loss": 0.00506591796875, "loss": 0.0066, "step": 4848, "total_loss": 0.00506591796875 }, { "epoch": 1.98, "learning_rate": 0.0001812250879237356, "lm_loss": 0.004730224609375, "loss": 0.0084, "step": 4849, "total_loss": 0.004730224609375 }, { "epoch": 1.98, "learning_rate": 0.00018121759536132722, "lm_loss": 0.01025390625, "loss": 0.0079, "step": 4850, "total_loss": 0.01025390625 }, { "epoch": 1.98, "learning_rate": 0.0001812101014591282, "lm_loss": 0.005340576171875, "loss": 0.0078, "step": 4851, "total_loss": 0.005340576171875 }, { "epoch": 1.98, "learning_rate": 0.00018120260621726198, "lm_loss": 0.0027618408203125, "loss": 0.007, "step": 4852, "total_loss": 0.0027618408203125 }, { "epoch": 1.98, "learning_rate": 0.0001811951096358524, "lm_loss": 0.004425048828125, "loss": 0.0052, "step": 4853, "total_loss": 0.004425048828125 }, { "epoch": 1.98, "learning_rate": 0.00018118761171502295, "lm_loss": 0.0118408203125, "loss": 0.0078, "step": 4854, "total_loss": 0.0118408203125 }, { "epoch": 1.98, "learning_rate": 0.00018118011245489744, "lm_loss": 0.0030517578125, "loss": 0.0072, "step": 4855, "total_loss": 0.0030517578125 }, { "epoch": 1.99, "learning_rate": 0.0001811726118555995, "lm_loss": 0.012939453125, "loss": 0.0082, "step": 4856, "total_loss": 0.012939453125 }, { "epoch": 1.99, "learning_rate": 0.00018116510991725293, "lm_loss": 0.00457763671875, "loss": 0.006, "step": 4857, "total_loss": 0.00457763671875 }, { "epoch": 1.99, "learning_rate": 0.00018115760663998143, "lm_loss": 0.0091552734375, "loss": 0.0084, "step": 4858, "total_loss": 0.0091552734375 }, { "epoch": 1.99, "learning_rate": 0.0001811501020239088, "lm_loss": 0.006011962890625, "loss": 0.0049, "step": 4859, "total_loss": 0.006011962890625 }, { "epoch": 1.99, "learning_rate": 0.00018114259606915887, "lm_loss": 0.0120849609375, "loss": 0.0083, "step": 4860, "total_loss": 0.0120849609375 }, { "epoch": 1.99, "learning_rate": 0.0001811350887758554, "lm_loss": 0.00830078125, "loss": 0.0084, "step": 4861, "total_loss": 0.00830078125 }, { "epoch": 1.99, "learning_rate": 0.00018112758014412228, "lm_loss": 0.007232666015625, "loss": 0.0084, "step": 4862, "total_loss": 0.007232666015625 }, { "epoch": 1.99, "learning_rate": 0.00018112007017408334, "lm_loss": 0.00537109375, "loss": 0.0064, "step": 4863, "total_loss": 0.00537109375 }, { "epoch": 1.99, "learning_rate": 0.00018111255886586248, "lm_loss": 0.004669189453125, "loss": 0.006, "step": 4864, "total_loss": 0.004669189453125 }, { "epoch": 1.99, "learning_rate": 0.00018110504621958362, "lm_loss": 0.01171875, "loss": 0.0079, "step": 4865, "total_loss": 0.01171875 }, { "epoch": 1.99, "learning_rate": 0.0001810975322353707, "lm_loss": 0.00689697265625, "loss": 0.0076, "step": 4866, "total_loss": 0.00689697265625 }, { "epoch": 1.99, "learning_rate": 0.0001810900169133476, "lm_loss": 0.0032806396484375, "loss": 0.0069, "step": 4867, "total_loss": 0.0032806396484375 }, { "epoch": 1.99, "learning_rate": 0.0001810825002536384, "lm_loss": 0.0029754638671875, "loss": 0.0061, "step": 4868, "total_loss": 0.0029754638671875 }, { "epoch": 1.99, "learning_rate": 0.00018107498225636704, "lm_loss": 0.00799560546875, "loss": 0.0075, "step": 4869, "total_loss": 0.00799560546875 }, { "epoch": 1.99, "learning_rate": 0.00018106746292165756, "lm_loss": 0.004150390625, "loss": 0.0052, "step": 4870, "total_loss": 0.004150390625 }, { "epoch": 1.99, "learning_rate": 0.00018105994224963396, "lm_loss": 0.004730224609375, "loss": 0.007, "step": 4871, "total_loss": 0.004730224609375 }, { "epoch": 1.99, "learning_rate": 0.00018105242024042033, "lm_loss": 0.0069580078125, "loss": 0.0068, "step": 4872, "total_loss": 0.0069580078125 }, { "epoch": 1.99, "learning_rate": 0.00018104489689414077, "lm_loss": 0.003448486328125, "loss": 0.0076, "step": 4873, "total_loss": 0.003448486328125 }, { "epoch": 1.99, "learning_rate": 0.00018103737221091936, "lm_loss": 0.00921630859375, "loss": 0.0076, "step": 4874, "total_loss": 0.00921630859375 }, { "epoch": 1.99, "learning_rate": 0.00018102984619088026, "lm_loss": 0.007171630859375, "loss": 0.0079, "step": 4875, "total_loss": 0.007171630859375 }, { "epoch": 1.99, "learning_rate": 0.0001810223188341476, "lm_loss": 0.01019287109375, "loss": 0.0067, "step": 4876, "total_loss": 0.01019287109375 }, { "epoch": 1.99, "learning_rate": 0.00018101479014084556, "lm_loss": 0.01025390625, "loss": 0.0076, "step": 4877, "total_loss": 0.01025390625 }, { "epoch": 1.99, "learning_rate": 0.00018100726011109833, "lm_loss": 0.009765625, "loss": 0.0086, "step": 4878, "total_loss": 0.009765625 }, { "epoch": 1.99, "learning_rate": 0.00018099972874503016, "lm_loss": 0.0244140625, "loss": 0.0085, "step": 4879, "total_loss": 0.0244140625 }, { "epoch": 2.0, "learning_rate": 0.00018099219604276524, "lm_loss": 0.004730224609375, "loss": 0.0071, "step": 4880, "total_loss": 0.004730224609375 }, { "epoch": 2.0, "learning_rate": 0.00018098466200442784, "lm_loss": 0.0027618408203125, "loss": 0.0069, "step": 4881, "total_loss": 0.0027618408203125 }, { "epoch": 2.0, "learning_rate": 0.00018097712663014227, "lm_loss": 0.00701904296875, "loss": 0.0065, "step": 4882, "total_loss": 0.00701904296875 }, { "epoch": 2.0, "learning_rate": 0.0001809695899200328, "lm_loss": 0.004547119140625, "loss": 0.007, "step": 4883, "total_loss": 0.004547119140625 }, { "epoch": 2.0, "learning_rate": 0.00018096205187422384, "lm_loss": 0.01165771484375, "loss": 0.0091, "step": 4884, "total_loss": 0.01165771484375 }, { "epoch": 2.0, "learning_rate": 0.00018095451249283963, "lm_loss": 0.017822265625, "loss": 0.0084, "step": 4885, "total_loss": 0.017822265625 }, { "epoch": 2.0, "learning_rate": 0.0001809469717760046, "lm_loss": 0.0038604736328125, "loss": 0.0073, "step": 4886, "total_loss": 0.0038604736328125 }, { "epoch": 2.0, "learning_rate": 0.00018093942972384318, "lm_loss": 0.01171875, "loss": 0.0065, "step": 4887, "total_loss": 0.01171875 }, { "epoch": 2.0, "learning_rate": 0.0001809318863364797, "lm_loss": 0.0079345703125, "loss": 0.0066, "step": 4888, "total_loss": 0.0079345703125 }, { "epoch": 2.0, "learning_rate": 0.00018092434161403866, "lm_loss": 0.014404296875, "loss": 0.0069, "step": 4889, "total_loss": 0.014404296875 }, { "epoch": 2.0, "learning_rate": 0.00018091679555664447, "lm_loss": 0.01458740234375, "loss": 0.0062, "step": 4890, "total_loss": 0.01458740234375 }, { "epoch": 2.0, "learning_rate": 0.00018090924816442167, "lm_loss": 0.005615234375, "loss": 0.0064, "step": 4891, "total_loss": 0.005615234375 }, { "epoch": 2.0, "learning_rate": 0.00018090169943749476, "lm_loss": 0.00145721435546875, "loss": 0.0066, "step": 4892, "total_loss": 0.00145721435546875 }, { "epoch": 2.0, "learning_rate": 0.00018089414937598822, "lm_loss": 0.01080322265625, "loss": 0.0078, "step": 4893, "total_loss": 0.01080322265625 }, { "epoch": 2.0, "learning_rate": 0.00018088659798002664, "lm_loss": 0.007049560546875, "loss": 0.0059, "step": 4894, "total_loss": 0.007049560546875 }, { "epoch": 2.0, "learning_rate": 0.00018087904524973454, "lm_loss": 0.00933837890625, "loss": 0.0078, "step": 4895, "total_loss": 0.00933837890625 }, { "epoch": 2.0, "learning_rate": 0.00018087149118523654, "lm_loss": 0.010986328125, "loss": 0.0065, "step": 4896, "total_loss": 0.010986328125 } ], "max_steps": 24460, "num_train_epochs": 10, "total_flos": 4.56740603808113e+19, "trial_name": null, "trial_params": null }