{ "best_metric": 0.8285714285714286, "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-ADC-3cls-0922/checkpoint-40", "epoch": 200.0, "eval_steps": 500, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6875032186508179, "eval_runtime": 0.8381, "eval_samples_per_second": 83.526, "eval_steps_per_second": 2.386, "step": 2 }, { "epoch": 2.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6874324083328247, "eval_runtime": 0.6285, "eval_samples_per_second": 111.384, "eval_steps_per_second": 3.182, "step": 4 }, { "epoch": 3.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6873045563697815, "eval_runtime": 0.6533, "eval_samples_per_second": 107.153, "eval_steps_per_second": 3.062, "step": 6 }, { "epoch": 4.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.687107264995575, "eval_runtime": 0.8041, "eval_samples_per_second": 87.057, "eval_steps_per_second": 2.487, "step": 8 }, { "epoch": 5.0, "learning_rate": 1.25e-05, "loss": 0.7555, "step": 10 }, { "epoch": 5.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6868652701377869, "eval_runtime": 0.6585, "eval_samples_per_second": 106.309, "eval_steps_per_second": 3.037, "step": 10 }, { "epoch": 6.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.686565101146698, "eval_runtime": 0.6372, "eval_samples_per_second": 109.86, "eval_steps_per_second": 3.139, "step": 12 }, { "epoch": 7.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6861968636512756, "eval_runtime": 0.8401, "eval_samples_per_second": 83.326, "eval_steps_per_second": 2.381, "step": 14 }, { "epoch": 8.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.685771644115448, "eval_runtime": 0.6402, "eval_samples_per_second": 109.344, "eval_steps_per_second": 3.124, "step": 16 }, { "epoch": 9.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6853042244911194, "eval_runtime": 0.638, "eval_samples_per_second": 109.711, "eval_steps_per_second": 3.135, "step": 18 }, { "epoch": 10.0, "learning_rate": 2.5e-05, "loss": 0.7576, "step": 20 }, { "epoch": 10.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6847913861274719, "eval_runtime": 0.8284, "eval_samples_per_second": 84.496, "eval_steps_per_second": 2.414, "step": 20 }, { "epoch": 11.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6842377185821533, "eval_runtime": 0.6408, "eval_samples_per_second": 109.237, "eval_steps_per_second": 3.121, "step": 22 }, { "epoch": 12.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6836268901824951, "eval_runtime": 0.6496, "eval_samples_per_second": 107.755, "eval_steps_per_second": 3.079, "step": 24 }, { "epoch": 13.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6829591393470764, "eval_runtime": 0.8145, "eval_samples_per_second": 85.938, "eval_steps_per_second": 2.455, "step": 26 }, { "epoch": 14.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6822755336761475, "eval_runtime": 0.6641, "eval_samples_per_second": 105.412, "eval_steps_per_second": 3.012, "step": 28 }, { "epoch": 15.0, "learning_rate": 3.7500000000000003e-05, "loss": 0.769, "step": 30 }, { "epoch": 15.0, "eval_accuracy": 0.8, "eval_loss": 0.6815804839134216, "eval_runtime": 0.6278, "eval_samples_per_second": 111.502, "eval_steps_per_second": 3.186, "step": 30 }, { "epoch": 16.0, "eval_accuracy": 0.8, "eval_loss": 0.6808401346206665, "eval_runtime": 0.8247, "eval_samples_per_second": 84.88, "eval_steps_per_second": 2.425, "step": 32 }, { "epoch": 17.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6800239086151123, "eval_runtime": 0.6376, "eval_samples_per_second": 109.794, "eval_steps_per_second": 3.137, "step": 34 }, { "epoch": 18.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.679133951663971, "eval_runtime": 0.6356, "eval_samples_per_second": 110.128, "eval_steps_per_second": 3.147, "step": 36 }, { "epoch": 19.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6781331896781921, "eval_runtime": 0.8136, "eval_samples_per_second": 86.039, "eval_steps_per_second": 2.458, "step": 38 }, { "epoch": 20.0, "learning_rate": 5e-05, "loss": 0.7564, "step": 40 }, { "epoch": 20.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.6770716309547424, "eval_runtime": 0.627, "eval_samples_per_second": 111.643, "eval_steps_per_second": 3.19, "step": 40 }, { "epoch": 21.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6759592294692993, "eval_runtime": 0.6244, "eval_samples_per_second": 112.113, "eval_steps_per_second": 3.203, "step": 42 }, { "epoch": 22.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.674824059009552, "eval_runtime": 0.72, "eval_samples_per_second": 97.226, "eval_steps_per_second": 2.778, "step": 44 }, { "epoch": 23.0, "eval_accuracy": 0.8, "eval_loss": 0.6736522912979126, "eval_runtime": 0.6356, "eval_samples_per_second": 110.125, "eval_steps_per_second": 3.146, "step": 46 }, { "epoch": 24.0, "eval_accuracy": 0.8, "eval_loss": 0.6724562644958496, "eval_runtime": 0.6465, "eval_samples_per_second": 108.268, "eval_steps_per_second": 3.093, "step": 48 }, { "epoch": 25.0, "learning_rate": 6.25e-05, "loss": 0.7508, "step": 50 }, { "epoch": 25.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6713314056396484, "eval_runtime": 0.6458, "eval_samples_per_second": 108.385, "eval_steps_per_second": 3.097, "step": 50 }, { "epoch": 26.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6701393723487854, "eval_runtime": 0.7532, "eval_samples_per_second": 92.934, "eval_steps_per_second": 2.655, "step": 52 }, { "epoch": 27.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6688514947891235, "eval_runtime": 0.6275, "eval_samples_per_second": 111.546, "eval_steps_per_second": 3.187, "step": 54 }, { "epoch": 28.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6674489378929138, "eval_runtime": 0.6455, "eval_samples_per_second": 108.446, "eval_steps_per_second": 3.098, "step": 56 }, { "epoch": 29.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6660061478614807, "eval_runtime": 0.7926, "eval_samples_per_second": 88.312, "eval_steps_per_second": 2.523, "step": 58 }, { "epoch": 30.0, "learning_rate": 7.500000000000001e-05, "loss": 0.747, "step": 60 }, { "epoch": 30.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6645620465278625, "eval_runtime": 0.6238, "eval_samples_per_second": 112.214, "eval_steps_per_second": 3.206, "step": 60 }, { "epoch": 31.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6631242632865906, "eval_runtime": 0.651, "eval_samples_per_second": 107.52, "eval_steps_per_second": 3.072, "step": 62 }, { "epoch": 32.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6616196036338806, "eval_runtime": 0.8036, "eval_samples_per_second": 87.111, "eval_steps_per_second": 2.489, "step": 64 }, { "epoch": 33.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6600926518440247, "eval_runtime": 0.638, "eval_samples_per_second": 109.722, "eval_steps_per_second": 3.135, "step": 66 }, { "epoch": 34.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6585766673088074, "eval_runtime": 0.6365, "eval_samples_per_second": 109.973, "eval_steps_per_second": 3.142, "step": 68 }, { "epoch": 35.0, "learning_rate": 8.75e-05, "loss": 0.7343, "step": 70 }, { "epoch": 35.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6569960117340088, "eval_runtime": 0.7913, "eval_samples_per_second": 88.467, "eval_steps_per_second": 2.528, "step": 70 }, { "epoch": 36.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6553293466567993, "eval_runtime": 0.6317, "eval_samples_per_second": 110.815, "eval_steps_per_second": 3.166, "step": 72 }, { "epoch": 37.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6535871028900146, "eval_runtime": 0.6261, "eval_samples_per_second": 111.81, "eval_steps_per_second": 3.195, "step": 74 }, { "epoch": 38.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6517333388328552, "eval_runtime": 0.7801, "eval_samples_per_second": 89.727, "eval_steps_per_second": 2.564, "step": 76 }, { "epoch": 39.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6498710513114929, "eval_runtime": 0.6688, "eval_samples_per_second": 104.667, "eval_steps_per_second": 2.99, "step": 78 }, { "epoch": 40.0, "learning_rate": 0.0001, "loss": 0.7532, "step": 80 }, { "epoch": 40.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6480462551116943, "eval_runtime": 0.6422, "eval_samples_per_second": 108.998, "eval_steps_per_second": 3.114, "step": 80 }, { "epoch": 41.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6461040377616882, "eval_runtime": 0.7878, "eval_samples_per_second": 88.86, "eval_steps_per_second": 2.539, "step": 82 }, { "epoch": 42.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6441839337348938, "eval_runtime": 0.6221, "eval_samples_per_second": 112.518, "eval_steps_per_second": 3.215, "step": 84 }, { "epoch": 43.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6423068046569824, "eval_runtime": 0.6404, "eval_samples_per_second": 109.306, "eval_steps_per_second": 3.123, "step": 86 }, { "epoch": 44.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6404834985733032, "eval_runtime": 0.8194, "eval_samples_per_second": 85.431, "eval_steps_per_second": 2.441, "step": 88 }, { "epoch": 45.0, "learning_rate": 9.687500000000001e-05, "loss": 0.7239, "step": 90 }, { "epoch": 45.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.638668417930603, "eval_runtime": 0.6293, "eval_samples_per_second": 111.227, "eval_steps_per_second": 3.178, "step": 90 }, { "epoch": 46.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6368482112884521, "eval_runtime": 0.6307, "eval_samples_per_second": 110.981, "eval_steps_per_second": 3.171, "step": 92 }, { "epoch": 47.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6351889967918396, "eval_runtime": 0.8243, "eval_samples_per_second": 84.921, "eval_steps_per_second": 2.426, "step": 94 }, { "epoch": 48.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6336590051651001, "eval_runtime": 0.6325, "eval_samples_per_second": 110.664, "eval_steps_per_second": 3.162, "step": 96 }, { "epoch": 49.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.6321325302124023, "eval_runtime": 0.6292, "eval_samples_per_second": 111.258, "eval_steps_per_second": 3.179, "step": 98 }, { "epoch": 50.0, "learning_rate": 9.375e-05, "loss": 0.7085, "step": 100 }, { "epoch": 50.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.6307134628295898, "eval_runtime": 0.8147, "eval_samples_per_second": 85.924, "eval_steps_per_second": 2.455, "step": 100 }, { "epoch": 51.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.6293519139289856, "eval_runtime": 0.6273, "eval_samples_per_second": 111.588, "eval_steps_per_second": 3.188, "step": 102 }, { "epoch": 52.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.6278188228607178, "eval_runtime": 0.6366, "eval_samples_per_second": 109.96, "eval_steps_per_second": 3.142, "step": 104 }, { "epoch": 53.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.6263061165809631, "eval_runtime": 0.8106, "eval_samples_per_second": 86.353, "eval_steps_per_second": 2.467, "step": 106 }, { "epoch": 54.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6247809529304504, "eval_runtime": 0.637, "eval_samples_per_second": 109.885, "eval_steps_per_second": 3.14, "step": 108 }, { "epoch": 55.0, "learning_rate": 9.062500000000001e-05, "loss": 0.7203, "step": 110 }, { "epoch": 55.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6232935190200806, "eval_runtime": 0.6312, "eval_samples_per_second": 110.901, "eval_steps_per_second": 3.169, "step": 110 }, { "epoch": 56.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6218679547309875, "eval_runtime": 0.8253, "eval_samples_per_second": 84.819, "eval_steps_per_second": 2.423, "step": 112 }, { "epoch": 57.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6204643845558167, "eval_runtime": 0.6393, "eval_samples_per_second": 109.495, "eval_steps_per_second": 3.128, "step": 114 }, { "epoch": 58.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6191075444221497, "eval_runtime": 0.6278, "eval_samples_per_second": 111.495, "eval_steps_per_second": 3.186, "step": 116 }, { "epoch": 59.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6178752779960632, "eval_runtime": 0.8233, "eval_samples_per_second": 85.027, "eval_steps_per_second": 2.429, "step": 118 }, { "epoch": 60.0, "learning_rate": 8.75e-05, "loss": 0.7136, "step": 120 }, { "epoch": 60.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6167242527008057, "eval_runtime": 0.6489, "eval_samples_per_second": 107.881, "eval_steps_per_second": 3.082, "step": 120 }, { "epoch": 61.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6156985759735107, "eval_runtime": 0.6472, "eval_samples_per_second": 108.154, "eval_steps_per_second": 3.09, "step": 122 }, { "epoch": 62.0, "eval_accuracy": 0.8, "eval_loss": 0.61481112241745, "eval_runtime": 0.8228, "eval_samples_per_second": 85.073, "eval_steps_per_second": 2.431, "step": 124 }, { "epoch": 63.0, "eval_accuracy": 0.8, "eval_loss": 0.6138356328010559, "eval_runtime": 0.6327, "eval_samples_per_second": 110.64, "eval_steps_per_second": 3.161, "step": 126 }, { "epoch": 64.0, "eval_accuracy": 0.8, "eval_loss": 0.6125301122665405, "eval_runtime": 0.6379, "eval_samples_per_second": 109.736, "eval_steps_per_second": 3.135, "step": 128 }, { "epoch": 65.0, "learning_rate": 8.4375e-05, "loss": 0.7123, "step": 130 }, { "epoch": 65.0, "eval_accuracy": 0.8, "eval_loss": 0.6110576391220093, "eval_runtime": 0.825, "eval_samples_per_second": 84.849, "eval_steps_per_second": 2.424, "step": 130 }, { "epoch": 66.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6096405982971191, "eval_runtime": 0.6376, "eval_samples_per_second": 109.782, "eval_steps_per_second": 3.137, "step": 132 }, { "epoch": 67.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6083278656005859, "eval_runtime": 0.8232, "eval_samples_per_second": 85.037, "eval_steps_per_second": 2.43, "step": 134 }, { "epoch": 68.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6069909930229187, "eval_runtime": 0.8193, "eval_samples_per_second": 85.437, "eval_steps_per_second": 2.441, "step": 136 }, { "epoch": 69.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6057179570198059, "eval_runtime": 0.6486, "eval_samples_per_second": 107.925, "eval_steps_per_second": 3.084, "step": 138 }, { "epoch": 70.0, "learning_rate": 8.125000000000001e-05, "loss": 0.7076, "step": 140 }, { "epoch": 70.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.604619562625885, "eval_runtime": 0.6358, "eval_samples_per_second": 110.095, "eval_steps_per_second": 3.146, "step": 140 }, { "epoch": 71.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6034784913063049, "eval_runtime": 0.8201, "eval_samples_per_second": 85.352, "eval_steps_per_second": 2.439, "step": 142 }, { "epoch": 72.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6022736430168152, "eval_runtime": 0.6311, "eval_samples_per_second": 110.91, "eval_steps_per_second": 3.169, "step": 144 }, { "epoch": 73.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6011058688163757, "eval_runtime": 0.6566, "eval_samples_per_second": 106.607, "eval_steps_per_second": 3.046, "step": 146 }, { "epoch": 74.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5999324917793274, "eval_runtime": 0.8174, "eval_samples_per_second": 85.64, "eval_steps_per_second": 2.447, "step": 148 }, { "epoch": 75.0, "learning_rate": 7.8125e-05, "loss": 0.6878, "step": 150 }, { "epoch": 75.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5987647175788879, "eval_runtime": 0.6275, "eval_samples_per_second": 111.562, "eval_steps_per_second": 3.187, "step": 150 }, { "epoch": 76.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5975351333618164, "eval_runtime": 0.6296, "eval_samples_per_second": 111.178, "eval_steps_per_second": 3.177, "step": 152 }, { "epoch": 77.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5963953137397766, "eval_runtime": 0.8044, "eval_samples_per_second": 87.018, "eval_steps_per_second": 2.486, "step": 154 }, { "epoch": 78.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5952684879302979, "eval_runtime": 0.6501, "eval_samples_per_second": 107.669, "eval_steps_per_second": 3.076, "step": 156 }, { "epoch": 79.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5942099094390869, "eval_runtime": 0.6469, "eval_samples_per_second": 108.203, "eval_steps_per_second": 3.092, "step": 158 }, { "epoch": 80.0, "learning_rate": 7.500000000000001e-05, "loss": 0.6657, "step": 160 }, { "epoch": 80.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5932222008705139, "eval_runtime": 0.8259, "eval_samples_per_second": 84.754, "eval_steps_per_second": 2.422, "step": 160 }, { "epoch": 81.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5923032760620117, "eval_runtime": 0.6393, "eval_samples_per_second": 109.49, "eval_steps_per_second": 3.128, "step": 162 }, { "epoch": 82.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5914328098297119, "eval_runtime": 0.6466, "eval_samples_per_second": 108.262, "eval_steps_per_second": 3.093, "step": 164 }, { "epoch": 83.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5905909538269043, "eval_runtime": 0.8278, "eval_samples_per_second": 84.56, "eval_steps_per_second": 2.416, "step": 166 }, { "epoch": 84.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5897351503372192, "eval_runtime": 0.6485, "eval_samples_per_second": 107.935, "eval_steps_per_second": 3.084, "step": 168 }, { "epoch": 85.0, "learning_rate": 7.1875e-05, "loss": 0.6434, "step": 170 }, { "epoch": 85.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.588803768157959, "eval_runtime": 0.6407, "eval_samples_per_second": 109.255, "eval_steps_per_second": 3.122, "step": 170 }, { "epoch": 86.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5878075361251831, "eval_runtime": 0.7846, "eval_samples_per_second": 89.216, "eval_steps_per_second": 2.549, "step": 172 }, { "epoch": 87.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5868256688117981, "eval_runtime": 0.6427, "eval_samples_per_second": 108.917, "eval_steps_per_second": 3.112, "step": 174 }, { "epoch": 88.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5859082341194153, "eval_runtime": 0.6384, "eval_samples_per_second": 109.65, "eval_steps_per_second": 3.133, "step": 176 }, { "epoch": 89.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5850787162780762, "eval_runtime": 0.7009, "eval_samples_per_second": 99.878, "eval_steps_per_second": 2.854, "step": 178 }, { "epoch": 90.0, "learning_rate": 6.875e-05, "loss": 0.6825, "step": 180 }, { "epoch": 90.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5843265652656555, "eval_runtime": 0.6343, "eval_samples_per_second": 110.361, "eval_steps_per_second": 3.153, "step": 180 }, { "epoch": 91.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5835766792297363, "eval_runtime": 0.645, "eval_samples_per_second": 108.529, "eval_steps_per_second": 3.101, "step": 182 }, { "epoch": 92.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5828419923782349, "eval_runtime": 0.6414, "eval_samples_per_second": 109.129, "eval_steps_per_second": 3.118, "step": 184 }, { "epoch": 93.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5822591781616211, "eval_runtime": 0.6506, "eval_samples_per_second": 107.585, "eval_steps_per_second": 3.074, "step": 186 }, { "epoch": 94.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5817149877548218, "eval_runtime": 0.6481, "eval_samples_per_second": 108.003, "eval_steps_per_second": 3.086, "step": 188 }, { "epoch": 95.0, "learning_rate": 6.562500000000001e-05, "loss": 0.6695, "step": 190 }, { "epoch": 95.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5809342265129089, "eval_runtime": 0.6426, "eval_samples_per_second": 108.939, "eval_steps_per_second": 3.113, "step": 190 }, { "epoch": 96.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5801157355308533, "eval_runtime": 0.7408, "eval_samples_per_second": 94.487, "eval_steps_per_second": 2.7, "step": 192 }, { "epoch": 97.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5793442130088806, "eval_runtime": 0.6328, "eval_samples_per_second": 110.628, "eval_steps_per_second": 3.161, "step": 194 }, { "epoch": 98.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5787318348884583, "eval_runtime": 0.6404, "eval_samples_per_second": 109.309, "eval_steps_per_second": 3.123, "step": 196 }, { "epoch": 99.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5780039429664612, "eval_runtime": 0.7894, "eval_samples_per_second": 88.678, "eval_steps_per_second": 2.534, "step": 198 }, { "epoch": 100.0, "learning_rate": 6.25e-05, "loss": 0.6672, "step": 200 }, { "epoch": 100.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5772114992141724, "eval_runtime": 0.6485, "eval_samples_per_second": 107.942, "eval_steps_per_second": 3.084, "step": 200 }, { "epoch": 101.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5762485265731812, "eval_runtime": 0.632, "eval_samples_per_second": 110.757, "eval_steps_per_second": 3.164, "step": 202 }, { "epoch": 102.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5753609538078308, "eval_runtime": 0.8156, "eval_samples_per_second": 85.824, "eval_steps_per_second": 2.452, "step": 204 }, { "epoch": 103.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5745884776115417, "eval_runtime": 0.641, "eval_samples_per_second": 109.197, "eval_steps_per_second": 3.12, "step": 206 }, { "epoch": 104.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.573843777179718, "eval_runtime": 0.64, "eval_samples_per_second": 109.374, "eval_steps_per_second": 3.125, "step": 208 }, { "epoch": 105.0, "learning_rate": 5.9375e-05, "loss": 0.6569, "step": 210 }, { "epoch": 105.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5731338858604431, "eval_runtime": 0.8165, "eval_samples_per_second": 85.735, "eval_steps_per_second": 2.45, "step": 210 }, { "epoch": 106.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5723776817321777, "eval_runtime": 0.6448, "eval_samples_per_second": 108.558, "eval_steps_per_second": 3.102, "step": 212 }, { "epoch": 107.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5715596675872803, "eval_runtime": 0.6552, "eval_samples_per_second": 106.837, "eval_steps_per_second": 3.052, "step": 214 }, { "epoch": 108.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5707866549491882, "eval_runtime": 0.7955, "eval_samples_per_second": 87.991, "eval_steps_per_second": 2.514, "step": 216 }, { "epoch": 109.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.570074737071991, "eval_runtime": 0.6364, "eval_samples_per_second": 109.993, "eval_steps_per_second": 3.143, "step": 218 }, { "epoch": 110.0, "learning_rate": 5.6250000000000005e-05, "loss": 0.6748, "step": 220 }, { "epoch": 110.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5693923830986023, "eval_runtime": 0.6356, "eval_samples_per_second": 110.138, "eval_steps_per_second": 3.147, "step": 220 }, { "epoch": 111.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5686994791030884, "eval_runtime": 0.8207, "eval_samples_per_second": 85.298, "eval_steps_per_second": 2.437, "step": 222 }, { "epoch": 112.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5680269598960876, "eval_runtime": 0.6498, "eval_samples_per_second": 107.722, "eval_steps_per_second": 3.078, "step": 224 }, { "epoch": 113.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5673888325691223, "eval_runtime": 0.6711, "eval_samples_per_second": 104.299, "eval_steps_per_second": 2.98, "step": 226 }, { "epoch": 114.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5668244957923889, "eval_runtime": 0.8599, "eval_samples_per_second": 81.4, "eval_steps_per_second": 2.326, "step": 228 }, { "epoch": 115.0, "learning_rate": 5.3125000000000004e-05, "loss": 0.6388, "step": 230 }, { "epoch": 115.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.56624835729599, "eval_runtime": 0.639, "eval_samples_per_second": 109.549, "eval_steps_per_second": 3.13, "step": 230 }, { "epoch": 116.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5657045841217041, "eval_runtime": 0.6452, "eval_samples_per_second": 108.495, "eval_steps_per_second": 3.1, "step": 232 }, { "epoch": 117.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5652384757995605, "eval_runtime": 0.8319, "eval_samples_per_second": 84.146, "eval_steps_per_second": 2.404, "step": 234 }, { "epoch": 118.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5648259520530701, "eval_runtime": 0.6475, "eval_samples_per_second": 108.103, "eval_steps_per_second": 3.089, "step": 236 }, { "epoch": 119.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5644696354866028, "eval_runtime": 0.6531, "eval_samples_per_second": 107.184, "eval_steps_per_second": 3.062, "step": 238 }, { "epoch": 120.0, "learning_rate": 5e-05, "loss": 0.6551, "step": 240 }, { "epoch": 120.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5640624165534973, "eval_runtime": 0.8277, "eval_samples_per_second": 84.574, "eval_steps_per_second": 2.416, "step": 240 }, { "epoch": 121.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5636399388313293, "eval_runtime": 0.6478, "eval_samples_per_second": 108.056, "eval_steps_per_second": 3.087, "step": 242 }, { "epoch": 122.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.563149094581604, "eval_runtime": 0.6338, "eval_samples_per_second": 110.453, "eval_steps_per_second": 3.156, "step": 244 }, { "epoch": 123.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5627174973487854, "eval_runtime": 0.8111, "eval_samples_per_second": 86.304, "eval_steps_per_second": 2.466, "step": 246 }, { "epoch": 124.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.562400221824646, "eval_runtime": 0.649, "eval_samples_per_second": 107.86, "eval_steps_per_second": 3.082, "step": 248 }, { "epoch": 125.0, "learning_rate": 4.6875e-05, "loss": 0.6452, "step": 250 }, { "epoch": 125.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5621911287307739, "eval_runtime": 0.6465, "eval_samples_per_second": 108.279, "eval_steps_per_second": 3.094, "step": 250 }, { "epoch": 126.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5620221495628357, "eval_runtime": 0.827, "eval_samples_per_second": 84.639, "eval_steps_per_second": 2.418, "step": 252 }, { "epoch": 127.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5617978572845459, "eval_runtime": 0.6221, "eval_samples_per_second": 112.525, "eval_steps_per_second": 3.215, "step": 254 }, { "epoch": 128.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5614616870880127, "eval_runtime": 0.6384, "eval_samples_per_second": 109.65, "eval_steps_per_second": 3.133, "step": 256 }, { "epoch": 129.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5612771511077881, "eval_runtime": 0.8188, "eval_samples_per_second": 85.487, "eval_steps_per_second": 2.442, "step": 258 }, { "epoch": 130.0, "learning_rate": 4.375e-05, "loss": 0.645, "step": 260 }, { "epoch": 130.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5610944032669067, "eval_runtime": 0.6274, "eval_samples_per_second": 111.57, "eval_steps_per_second": 3.188, "step": 260 }, { "epoch": 131.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5608205199241638, "eval_runtime": 0.6351, "eval_samples_per_second": 110.223, "eval_steps_per_second": 3.149, "step": 262 }, { "epoch": 132.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5606086254119873, "eval_runtime": 0.8451, "eval_samples_per_second": 82.832, "eval_steps_per_second": 2.367, "step": 264 }, { "epoch": 133.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5602155923843384, "eval_runtime": 0.6314, "eval_samples_per_second": 110.864, "eval_steps_per_second": 3.168, "step": 266 }, { "epoch": 134.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5596277713775635, "eval_runtime": 0.6347, "eval_samples_per_second": 110.28, "eval_steps_per_second": 3.151, "step": 268 }, { "epoch": 135.0, "learning_rate": 4.0625000000000005e-05, "loss": 0.629, "step": 270 }, { "epoch": 135.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.558956503868103, "eval_runtime": 0.8112, "eval_samples_per_second": 86.289, "eval_steps_per_second": 2.465, "step": 270 }, { "epoch": 136.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5582412481307983, "eval_runtime": 0.6394, "eval_samples_per_second": 109.485, "eval_steps_per_second": 3.128, "step": 272 }, { "epoch": 137.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5576009750366211, "eval_runtime": 0.6293, "eval_samples_per_second": 111.232, "eval_steps_per_second": 3.178, "step": 274 }, { "epoch": 138.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5571399927139282, "eval_runtime": 0.8108, "eval_samples_per_second": 86.33, "eval_steps_per_second": 2.467, "step": 276 }, { "epoch": 139.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5567926168441772, "eval_runtime": 0.6262, "eval_samples_per_second": 111.788, "eval_steps_per_second": 3.194, "step": 278 }, { "epoch": 140.0, "learning_rate": 3.7500000000000003e-05, "loss": 0.7126, "step": 280 }, { "epoch": 140.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.556534469127655, "eval_runtime": 0.6392, "eval_samples_per_second": 109.51, "eval_steps_per_second": 3.129, "step": 280 }, { "epoch": 141.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5563255548477173, "eval_runtime": 0.8384, "eval_samples_per_second": 83.488, "eval_steps_per_second": 2.385, "step": 282 }, { "epoch": 142.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5561147928237915, "eval_runtime": 0.6382, "eval_samples_per_second": 109.687, "eval_steps_per_second": 3.134, "step": 284 }, { "epoch": 143.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5559044480323792, "eval_runtime": 0.647, "eval_samples_per_second": 108.191, "eval_steps_per_second": 3.091, "step": 286 }, { "epoch": 144.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.555549681186676, "eval_runtime": 0.8257, "eval_samples_per_second": 84.779, "eval_steps_per_second": 2.422, "step": 288 }, { "epoch": 145.0, "learning_rate": 3.4375e-05, "loss": 0.669, "step": 290 }, { "epoch": 145.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5551820397377014, "eval_runtime": 0.6311, "eval_samples_per_second": 110.921, "eval_steps_per_second": 3.169, "step": 290 }, { "epoch": 146.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.55474454164505, "eval_runtime": 0.653, "eval_samples_per_second": 107.193, "eval_steps_per_second": 3.063, "step": 292 }, { "epoch": 147.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5542392134666443, "eval_runtime": 0.8093, "eval_samples_per_second": 86.493, "eval_steps_per_second": 2.471, "step": 294 }, { "epoch": 148.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5537976622581482, "eval_runtime": 0.6472, "eval_samples_per_second": 108.161, "eval_steps_per_second": 3.09, "step": 296 }, { "epoch": 149.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5534089803695679, "eval_runtime": 0.6283, "eval_samples_per_second": 111.403, "eval_steps_per_second": 3.183, "step": 298 }, { "epoch": 150.0, "learning_rate": 3.125e-05, "loss": 0.6481, "step": 300 }, { "epoch": 150.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5530030727386475, "eval_runtime": 0.8284, "eval_samples_per_second": 84.505, "eval_steps_per_second": 2.414, "step": 300 }, { "epoch": 151.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5526387095451355, "eval_runtime": 0.6358, "eval_samples_per_second": 110.105, "eval_steps_per_second": 3.146, "step": 302 }, { "epoch": 152.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5522416830062866, "eval_runtime": 0.6285, "eval_samples_per_second": 111.384, "eval_steps_per_second": 3.182, "step": 304 }, { "epoch": 153.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5518553853034973, "eval_runtime": 0.7207, "eval_samples_per_second": 97.122, "eval_steps_per_second": 2.775, "step": 306 }, { "epoch": 154.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5514690279960632, "eval_runtime": 0.6359, "eval_samples_per_second": 110.085, "eval_steps_per_second": 3.145, "step": 308 }, { "epoch": 155.0, "learning_rate": 2.8125000000000003e-05, "loss": 0.6211, "step": 310 }, { "epoch": 155.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5510378479957581, "eval_runtime": 0.636, "eval_samples_per_second": 110.056, "eval_steps_per_second": 3.144, "step": 310 }, { "epoch": 156.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5506120920181274, "eval_runtime": 0.6297, "eval_samples_per_second": 111.157, "eval_steps_per_second": 3.176, "step": 312 }, { "epoch": 157.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5502142906188965, "eval_runtime": 0.6795, "eval_samples_per_second": 103.02, "eval_steps_per_second": 2.943, "step": 314 }, { "epoch": 158.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5498998761177063, "eval_runtime": 0.6321, "eval_samples_per_second": 110.745, "eval_steps_per_second": 3.164, "step": 316 }, { "epoch": 159.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5495581030845642, "eval_runtime": 0.6392, "eval_samples_per_second": 109.52, "eval_steps_per_second": 3.129, "step": 318 }, { "epoch": 160.0, "learning_rate": 2.5e-05, "loss": 0.6458, "step": 320 }, { "epoch": 160.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5492438077926636, "eval_runtime": 0.8024, "eval_samples_per_second": 87.237, "eval_steps_per_second": 2.492, "step": 320 }, { "epoch": 161.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5489979982376099, "eval_runtime": 0.6575, "eval_samples_per_second": 106.471, "eval_steps_per_second": 3.042, "step": 322 }, { "epoch": 162.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5487762689590454, "eval_runtime": 0.6515, "eval_samples_per_second": 107.45, "eval_steps_per_second": 3.07, "step": 324 }, { "epoch": 163.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.548595130443573, "eval_runtime": 0.8069, "eval_samples_per_second": 86.752, "eval_steps_per_second": 2.479, "step": 326 }, { "epoch": 164.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5483713746070862, "eval_runtime": 0.6447, "eval_samples_per_second": 108.582, "eval_steps_per_second": 3.102, "step": 328 }, { "epoch": 165.0, "learning_rate": 2.1875e-05, "loss": 0.6317, "step": 330 }, { "epoch": 165.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.5481104254722595, "eval_runtime": 0.6486, "eval_samples_per_second": 107.926, "eval_steps_per_second": 3.084, "step": 330 }, { "epoch": 166.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5478586554527283, "eval_runtime": 0.8209, "eval_samples_per_second": 85.268, "eval_steps_per_second": 2.436, "step": 332 }, { "epoch": 167.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5475797653198242, "eval_runtime": 0.7417, "eval_samples_per_second": 94.381, "eval_steps_per_second": 2.697, "step": 334 }, { "epoch": 168.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5473471879959106, "eval_runtime": 0.6501, "eval_samples_per_second": 107.671, "eval_steps_per_second": 3.076, "step": 336 }, { "epoch": 169.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5471236705780029, "eval_runtime": 0.7944, "eval_samples_per_second": 88.115, "eval_steps_per_second": 2.518, "step": 338 }, { "epoch": 170.0, "learning_rate": 1.8750000000000002e-05, "loss": 0.6154, "step": 340 }, { "epoch": 170.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5469514727592468, "eval_runtime": 0.6378, "eval_samples_per_second": 109.76, "eval_steps_per_second": 3.136, "step": 340 }, { "epoch": 171.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5467889308929443, "eval_runtime": 0.6433, "eval_samples_per_second": 108.819, "eval_steps_per_second": 3.109, "step": 342 }, { "epoch": 172.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5466357469558716, "eval_runtime": 0.8146, "eval_samples_per_second": 85.93, "eval_steps_per_second": 2.455, "step": 344 }, { "epoch": 173.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5464411973953247, "eval_runtime": 0.6826, "eval_samples_per_second": 102.554, "eval_steps_per_second": 2.93, "step": 346 }, { "epoch": 174.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5462457537651062, "eval_runtime": 0.6413, "eval_samples_per_second": 109.146, "eval_steps_per_second": 3.118, "step": 348 }, { "epoch": 175.0, "learning_rate": 1.5625e-05, "loss": 0.6323, "step": 350 }, { "epoch": 175.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5460384488105774, "eval_runtime": 0.8055, "eval_samples_per_second": 86.906, "eval_steps_per_second": 2.483, "step": 350 }, { "epoch": 176.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.545864999294281, "eval_runtime": 0.635, "eval_samples_per_second": 110.23, "eval_steps_per_second": 3.149, "step": 352 }, { "epoch": 177.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.54571932554245, "eval_runtime": 0.6362, "eval_samples_per_second": 110.035, "eval_steps_per_second": 3.144, "step": 354 }, { "epoch": 178.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5455992817878723, "eval_runtime": 0.8155, "eval_samples_per_second": 85.839, "eval_steps_per_second": 2.453, "step": 356 }, { "epoch": 179.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5454698801040649, "eval_runtime": 0.6543, "eval_samples_per_second": 106.992, "eval_steps_per_second": 3.057, "step": 358 }, { "epoch": 180.0, "learning_rate": 1.25e-05, "loss": 0.6331, "step": 360 }, { "epoch": 180.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5453290343284607, "eval_runtime": 0.6499, "eval_samples_per_second": 107.716, "eval_steps_per_second": 3.078, "step": 360 }, { "epoch": 181.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5451884865760803, "eval_runtime": 0.8265, "eval_samples_per_second": 84.691, "eval_steps_per_second": 2.42, "step": 362 }, { "epoch": 182.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5450613498687744, "eval_runtime": 0.6389, "eval_samples_per_second": 109.556, "eval_steps_per_second": 3.13, "step": 364 }, { "epoch": 183.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5449284911155701, "eval_runtime": 0.6467, "eval_samples_per_second": 108.243, "eval_steps_per_second": 3.093, "step": 366 }, { "epoch": 184.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5448177456855774, "eval_runtime": 0.825, "eval_samples_per_second": 84.846, "eval_steps_per_second": 2.424, "step": 368 }, { "epoch": 185.0, "learning_rate": 9.375000000000001e-06, "loss": 0.6333, "step": 370 }, { "epoch": 185.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.544733464717865, "eval_runtime": 0.6541, "eval_samples_per_second": 107.012, "eval_steps_per_second": 3.057, "step": 370 }, { "epoch": 186.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5446553230285645, "eval_runtime": 0.6491, "eval_samples_per_second": 107.838, "eval_steps_per_second": 3.081, "step": 372 }, { "epoch": 187.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5445802211761475, "eval_runtime": 0.8184, "eval_samples_per_second": 85.533, "eval_steps_per_second": 2.444, "step": 374 }, { "epoch": 188.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5445207357406616, "eval_runtime": 0.6378, "eval_samples_per_second": 109.754, "eval_steps_per_second": 3.136, "step": 376 }, { "epoch": 189.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5444640517234802, "eval_runtime": 0.6708, "eval_samples_per_second": 104.36, "eval_steps_per_second": 2.982, "step": 378 }, { "epoch": 190.0, "learning_rate": 6.25e-06, "loss": 0.608, "step": 380 }, { "epoch": 190.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.544407069683075, "eval_runtime": 0.8392, "eval_samples_per_second": 83.416, "eval_steps_per_second": 2.383, "step": 380 }, { "epoch": 191.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5443536043167114, "eval_runtime": 0.6405, "eval_samples_per_second": 109.293, "eval_steps_per_second": 3.123, "step": 382 }, { "epoch": 192.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5443087816238403, "eval_runtime": 0.6431, "eval_samples_per_second": 108.85, "eval_steps_per_second": 3.11, "step": 384 }, { "epoch": 193.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5442724823951721, "eval_runtime": 0.8311, "eval_samples_per_second": 84.221, "eval_steps_per_second": 2.406, "step": 386 }, { "epoch": 194.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5442416667938232, "eval_runtime": 0.6416, "eval_samples_per_second": 109.095, "eval_steps_per_second": 3.117, "step": 388 }, { "epoch": 195.0, "learning_rate": 3.125e-06, "loss": 0.6155, "step": 390 }, { "epoch": 195.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5442100763320923, "eval_runtime": 0.6472, "eval_samples_per_second": 108.158, "eval_steps_per_second": 3.09, "step": 390 }, { "epoch": 196.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5441816449165344, "eval_runtime": 0.8234, "eval_samples_per_second": 85.016, "eval_steps_per_second": 2.429, "step": 392 }, { "epoch": 197.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5441582202911377, "eval_runtime": 0.6411, "eval_samples_per_second": 109.183, "eval_steps_per_second": 3.12, "step": 394 }, { "epoch": 198.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5441429615020752, "eval_runtime": 0.6367, "eval_samples_per_second": 109.941, "eval_steps_per_second": 3.141, "step": 396 }, { "epoch": 199.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5441319346427917, "eval_runtime": 0.8204, "eval_samples_per_second": 85.32, "eval_steps_per_second": 2.438, "step": 398 }, { "epoch": 200.0, "learning_rate": 0.0, "loss": 0.6272, "step": 400 }, { "epoch": 200.0, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5441268086433411, "eval_runtime": 0.646, "eval_samples_per_second": 108.365, "eval_steps_per_second": 3.096, "step": 400 }, { "epoch": 200.0, "step": 400, "total_flos": 2.23710151698432e+18, "train_loss": 0.6791047298908234, "train_runtime": 1022.1437, "train_samples_per_second": 88.05, "train_steps_per_second": 0.391 } ], "logging_steps": 10, "max_steps": 400, "num_train_epochs": 200, "save_steps": 500, "total_flos": 2.23710151698432e+18, "trial_name": null, "trial_params": null }