|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.06, |
|
"eval_steps": 1000, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 2.3598, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 2.1113, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0722, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019863945578231293, |
|
"loss": 2.1798, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019727891156462587, |
|
"loss": 1.9333, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001959183673469388, |
|
"loss": 2.4093, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001945578231292517, |
|
"loss": 2.6837, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019319727891156462, |
|
"loss": 1.5956, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019183673469387756, |
|
"loss": 2.02, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019047619047619048, |
|
"loss": 2.0534, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018911564625850343, |
|
"loss": 1.804, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018775510204081634, |
|
"loss": 2.0297, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00018639455782312926, |
|
"loss": 1.8164, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001850340136054422, |
|
"loss": 1.8929, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00018367346938775512, |
|
"loss": 1.7967, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00018231292517006804, |
|
"loss": 1.5366, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00018095238095238095, |
|
"loss": 1.7421, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001795918367346939, |
|
"loss": 2.5308, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017823129251700681, |
|
"loss": 1.5277, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017687074829931973, |
|
"loss": 1.9084, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017551020408163265, |
|
"loss": 1.9708, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001741496598639456, |
|
"loss": 1.8567, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001727891156462585, |
|
"loss": 1.5694, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017142857142857143, |
|
"loss": 1.8519, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017006802721088434, |
|
"loss": 1.663, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016870748299319729, |
|
"loss": 1.9309, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016734693877551023, |
|
"loss": 1.551, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016598639455782315, |
|
"loss": 1.4782, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016462585034013606, |
|
"loss": 1.6643, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016326530612244898, |
|
"loss": 1.5571, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016190476190476192, |
|
"loss": 1.6747, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016054421768707484, |
|
"loss": 1.3539, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00015918367346938776, |
|
"loss": 2.0579, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00015782312925170067, |
|
"loss": 1.5418, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00015646258503401362, |
|
"loss": 1.8159, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00015510204081632654, |
|
"loss": 1.6081, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00015374149659863945, |
|
"loss": 1.5529, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00015238095238095237, |
|
"loss": 1.5456, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001510204081632653, |
|
"loss": 2.1016, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00014965986394557826, |
|
"loss": 1.4394, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00014829931972789117, |
|
"loss": 1.4033, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001469387755102041, |
|
"loss": 1.5887, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000145578231292517, |
|
"loss": 1.8761, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00014421768707482995, |
|
"loss": 1.67, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00014285714285714287, |
|
"loss": 1.8064, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00014149659863945578, |
|
"loss": 1.4283, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001401360544217687, |
|
"loss": 1.8028, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00013877551020408165, |
|
"loss": 1.5101, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00013741496598639456, |
|
"loss": 1.7825, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00013605442176870748, |
|
"loss": 1.3797, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001346938775510204, |
|
"loss": 1.7408, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 1.6008, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00013197278911564626, |
|
"loss": 1.5121, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00013061224489795917, |
|
"loss": 1.3599, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00012925170068027212, |
|
"loss": 1.4465, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00012789115646258506, |
|
"loss": 1.6332, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00012653061224489798, |
|
"loss": 1.6305, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001251700680272109, |
|
"loss": 1.5599, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001238095238095238, |
|
"loss": 1.8265, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00012244897959183676, |
|
"loss": 1.7149, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00012108843537414967, |
|
"loss": 1.7577, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00011972789115646259, |
|
"loss": 1.5519, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00011836734693877552, |
|
"loss": 1.4681, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00011700680272108844, |
|
"loss": 1.3328, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00011564625850340137, |
|
"loss": 1.4249, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00011428571428571428, |
|
"loss": 1.8161, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00011292517006802721, |
|
"loss": 1.4775, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00011156462585034013, |
|
"loss": 1.5223, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00011020408163265306, |
|
"loss": 1.5734, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000108843537414966, |
|
"loss": 1.5901, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00010748299319727892, |
|
"loss": 2.1238, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00010612244897959185, |
|
"loss": 1.8808, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00010476190476190477, |
|
"loss": 1.3648, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001034013605442177, |
|
"loss": 1.3667, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00010204081632653062, |
|
"loss": 1.3925, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00010068027210884355, |
|
"loss": 1.8534, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.931972789115646e-05, |
|
"loss": 1.6686, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.79591836734694e-05, |
|
"loss": 1.5057, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.659863945578231e-05, |
|
"loss": 1.4539, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.523809523809524e-05, |
|
"loss": 1.6102, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.387755102040817e-05, |
|
"loss": 1.3404, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.25170068027211e-05, |
|
"loss": 1.5923, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.115646258503402e-05, |
|
"loss": 1.34, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.979591836734695e-05, |
|
"loss": 1.9159, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.843537414965987e-05, |
|
"loss": 1.5353, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.70748299319728e-05, |
|
"loss": 1.5158, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.571428571428571e-05, |
|
"loss": 1.3898, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.435374149659864e-05, |
|
"loss": 1.6535, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.299319727891157e-05, |
|
"loss": 1.6544, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.163265306122449e-05, |
|
"loss": 1.8656, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.027210884353742e-05, |
|
"loss": 1.465, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.891156462585034e-05, |
|
"loss": 1.0163, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.755102040816327e-05, |
|
"loss": 1.61, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.619047619047618e-05, |
|
"loss": 2.0532, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.482993197278913e-05, |
|
"loss": 1.4248, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.346938775510205e-05, |
|
"loss": 1.4999, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.210884353741498e-05, |
|
"loss": 1.6352, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.074829931972789e-05, |
|
"loss": 1.6345, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.938775510204082e-05, |
|
"loss": 1.6019, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.802721088435374e-05, |
|
"loss": 1.4834, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 1.4514, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.530612244897959e-05, |
|
"loss": 1.5484, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.394557823129253e-05, |
|
"loss": 1.6164, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.258503401360545e-05, |
|
"loss": 1.6819, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.122448979591838e-05, |
|
"loss": 1.8219, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.9863945578231295e-05, |
|
"loss": 1.385, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.850340136054422e-05, |
|
"loss": 1.6358, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.714285714285714e-05, |
|
"loss": 1.5059, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.5782312925170065e-05, |
|
"loss": 1.6794, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.4421768707483e-05, |
|
"loss": 1.4888, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.3061224489795926e-05, |
|
"loss": 1.3177, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.170068027210885e-05, |
|
"loss": 1.5026, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.034013605442177e-05, |
|
"loss": 1.8063, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.89795918367347e-05, |
|
"loss": 1.8037, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 1.6401, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.625850340136055e-05, |
|
"loss": 1.7781, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.4897959183673474e-05, |
|
"loss": 1.7268, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.35374149659864e-05, |
|
"loss": 1.6482, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.217687074829932e-05, |
|
"loss": 1.5628, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.0816326530612245e-05, |
|
"loss": 1.3526, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.945578231292517e-05, |
|
"loss": 1.3096, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.809523809523809e-05, |
|
"loss": 1.6429, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.673469387755102e-05, |
|
"loss": 1.4119, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.5374149659863946e-05, |
|
"loss": 1.4932, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.401360544217687e-05, |
|
"loss": 1.8325, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.265306122448979e-05, |
|
"loss": 1.6309, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.1292517006802724e-05, |
|
"loss": 1.6754, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9931972789115647e-05, |
|
"loss": 1.1972, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 1.6411, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.72108843537415e-05, |
|
"loss": 2.031, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5850340136054425e-05, |
|
"loss": 1.726, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.448979591836735e-05, |
|
"loss": 1.0537, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3129251700680275e-05, |
|
"loss": 1.2133, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.17687074829932e-05, |
|
"loss": 1.4264, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.0408163265306123e-05, |
|
"loss": 1.8015, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9047619047619046e-05, |
|
"loss": 1.6847, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.7687074829931973e-05, |
|
"loss": 1.6395, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.6326530612244897e-05, |
|
"loss": 1.3069, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.4965986394557824e-05, |
|
"loss": 1.7292, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.360544217687075e-05, |
|
"loss": 1.5996, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.2244897959183674e-05, |
|
"loss": 1.5048, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.08843537414966e-05, |
|
"loss": 1.5075, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 1.4126, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.163265306122448e-06, |
|
"loss": 1.4858, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.802721088435375e-06, |
|
"loss": 1.4445, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.4421768707483e-06, |
|
"loss": 1.2193, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.081632653061224e-06, |
|
"loss": 1.5606, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.72108843537415e-06, |
|
"loss": 1.5721, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.360544217687075e-06, |
|
"loss": 1.8318, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0, |
|
"loss": 1.4401, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 150, |
|
"num_train_epochs": 1, |
|
"save_steps": 5, |
|
"total_flos": 1.863424689143808e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|