diff --git a/checkpoint-1134/config.json b/checkpoint-1134/config.json new file mode 100644 index 0000000000000000000000000000000000000000..364156e83c34ba8c6fcc66e875a05b1d1a9b4821 --- /dev/null +++ b/checkpoint-1134/config.json @@ -0,0 +1,30 @@ +{ + "_name_or_path": "distilbert-base-cased", + "activation": "gelu", + "architectures": [ + "DistilBertForSequenceClassification" + ], + "attention_dropout": 0.1, + "dim": 768, + "dropout": 0.1, + "hidden_dim": 3072, + "id2label": { + "0": "NO DISASTER", + "1": "DISASTER" + }, + "initializer_range": 0.02, + "max_position_embeddings": 512, + "model_type": "distilbert", + "n_heads": 12, + "n_layers": 6, + "output_past": true, + "pad_token_id": 0, + "problem_type": "single_label_classification", + "qa_dropout": 0.1, + "seq_classif_dropout": 0.2, + "sinusoidal_pos_embds": false, + "tie_weights_": true, + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "vocab_size": 28996 +} diff --git a/checkpoint-1134/optimizer.pt b/checkpoint-1134/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3526c39b7b88f8c8258e0fe471dd70aef7e169cb --- /dev/null +++ b/checkpoint-1134/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:288a3a2153c0322b4276e669c96adb82618323aa14dd22a877cdc10d5ff281fc +size 526325317 diff --git a/checkpoint-1134/pytorch_model.bin b/checkpoint-1134/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a85f24c2286078a01bbe6c225457a7800c87332e --- /dev/null +++ b/checkpoint-1134/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf607644ed3f532ddfcf22bf3b6b096f6d857686842d3ded36af6cf515d24b7c +size 263167661 diff --git a/checkpoint-1134/rng_state.pth b/checkpoint-1134/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0fedb0bf825e85c9a7b905bb7ef1385d31bc0687 --- /dev/null +++ b/checkpoint-1134/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:589ad13af77107a97b9c7365d87d9e8ab0ae8d444ec08c6f1b85dafe8552374c +size 14575 diff --git a/checkpoint-1134/scheduler.pt b/checkpoint-1134/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..17b9588071a0e8e210c6dd4781e7d2d459b8a942 --- /dev/null +++ b/checkpoint-1134/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6c1f9635b56ab51528d4beb03bca865e6a9a33739312547fb70a929ef992f64 +size 627 diff --git a/checkpoint-1134/trainer_state.json b/checkpoint-1134/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c51dbf2d8b9d9ed5c401814f01f0c278ce78df06 --- /dev/null +++ b/checkpoint-1134/trainer_state.json @@ -0,0 +1,6847 @@ +{ + "best_metric": 0.40209120512008667, + "best_model_checkpoint": "./disaster-tweet-distilbert-classification/checkpoint-378", + "epoch": 3.0, + "global_step": 1134, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.7503, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.7789, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.7344, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 0.7709, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 5e-06, + "loss": 0.6884, + "step": 5 + }, + { + "epoch": 0.02, + "learning_rate": 6e-06, + "loss": 0.7087, + "step": 6 + }, + { + "epoch": 0.02, + "learning_rate": 7.000000000000001e-06, + "loss": 0.6655, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6978, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 9e-06, + "loss": 0.7435, + "step": 9 + }, + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 0.719, + "step": 10 + }, + { + "epoch": 0.03, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.7129, + "step": 11 + }, + { + "epoch": 0.03, + "learning_rate": 1.2e-05, + "loss": 0.7249, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.7436, + "step": 13 + }, + { + "epoch": 0.04, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.6886, + "step": 14 + }, + { + "epoch": 0.04, + "learning_rate": 1.5e-05, + "loss": 0.702, + "step": 15 + }, + { + "epoch": 0.04, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7105, + "step": 16 + }, + { + "epoch": 0.04, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.6709, + "step": 17 + }, + { + "epoch": 0.05, + "learning_rate": 1.8e-05, + "loss": 0.6767, + "step": 18 + }, + { + "epoch": 0.05, + "learning_rate": 1.9e-05, + "loss": 0.6784, + "step": 19 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 0.7016, + "step": 20 + }, + { + "epoch": 0.06, + "learning_rate": 2.1e-05, + "loss": 0.6308, + "step": 21 + }, + { + "epoch": 0.06, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.645, + "step": 22 + }, + { + "epoch": 0.06, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.6845, + "step": 23 + }, + { + "epoch": 0.06, + "learning_rate": 2.4e-05, + "loss": 0.6891, + "step": 24 + }, + { + "epoch": 0.07, + "learning_rate": 2.5e-05, + "loss": 0.5914, + "step": 25 + }, + { + "epoch": 0.07, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.6916, + "step": 26 + }, + { + "epoch": 0.07, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.5884, + "step": 27 + }, + { + "epoch": 0.07, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6587, + "step": 28 + }, + { + "epoch": 0.08, + "learning_rate": 2.9e-05, + "loss": 0.6008, + "step": 29 + }, + { + "epoch": 0.08, + "learning_rate": 3e-05, + "loss": 0.6717, + "step": 30 + }, + { + "epoch": 0.08, + "learning_rate": 3.1e-05, + "loss": 0.5795, + "step": 31 + }, + { + "epoch": 0.08, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6358, + "step": 32 + }, + { + "epoch": 0.09, + "learning_rate": 3.3e-05, + "loss": 0.7508, + "step": 33 + }, + { + "epoch": 0.09, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.6601, + "step": 34 + }, + { + "epoch": 0.09, + "learning_rate": 3.5e-05, + "loss": 0.6573, + "step": 35 + }, + { + "epoch": 0.1, + "learning_rate": 3.6e-05, + "loss": 0.5695, + "step": 36 + }, + { + "epoch": 0.1, + "learning_rate": 3.7e-05, + "loss": 0.5535, + "step": 37 + }, + { + "epoch": 0.1, + "learning_rate": 3.8e-05, + "loss": 0.5813, + "step": 38 + }, + { + "epoch": 0.1, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.5224, + "step": 39 + }, + { + "epoch": 0.11, + "learning_rate": 4e-05, + "loss": 0.4757, + "step": 40 + }, + { + "epoch": 0.11, + "learning_rate": 4.1e-05, + "loss": 0.5529, + "step": 41 + }, + { + "epoch": 0.11, + "learning_rate": 4.2e-05, + "loss": 0.4964, + "step": 42 + }, + { + "epoch": 0.11, + "learning_rate": 4.3e-05, + "loss": 0.4565, + "step": 43 + }, + { + "epoch": 0.12, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.5848, + "step": 44 + }, + { + "epoch": 0.12, + "learning_rate": 4.5e-05, + "loss": 0.7333, + "step": 45 + }, + { + "epoch": 0.12, + "learning_rate": 4.600000000000001e-05, + "loss": 0.5224, + "step": 46 + }, + { + "epoch": 0.12, + "learning_rate": 4.7e-05, + "loss": 0.4826, + "step": 47 + }, + { + "epoch": 0.13, + "learning_rate": 4.8e-05, + "loss": 0.4328, + "step": 48 + }, + { + "epoch": 0.13, + "learning_rate": 4.9e-05, + "loss": 0.2546, + "step": 49 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.2636, + "step": 50 + }, + { + "epoch": 0.13, + "learning_rate": 4.998659517426274e-05, + "loss": 0.4116, + "step": 51 + }, + { + "epoch": 0.14, + "learning_rate": 4.997319034852547e-05, + "loss": 0.3191, + "step": 52 + }, + { + "epoch": 0.14, + "learning_rate": 4.995978552278821e-05, + "loss": 0.2822, + "step": 53 + }, + { + "epoch": 0.14, + "learning_rate": 4.994638069705094e-05, + "loss": 0.8821, + "step": 54 + }, + { + "epoch": 0.15, + "learning_rate": 4.993297587131368e-05, + "loss": 0.483, + "step": 55 + }, + { + "epoch": 0.15, + "learning_rate": 4.9919571045576406e-05, + "loss": 0.7164, + "step": 56 + }, + { + "epoch": 0.15, + "learning_rate": 4.990616621983915e-05, + "loss": 0.4161, + "step": 57 + }, + { + "epoch": 0.15, + "learning_rate": 4.989276139410188e-05, + "loss": 0.2668, + "step": 58 + }, + { + "epoch": 0.16, + "learning_rate": 4.987935656836462e-05, + "loss": 0.5255, + "step": 59 + }, + { + "epoch": 0.16, + "learning_rate": 4.986595174262735e-05, + "loss": 0.3784, + "step": 60 + }, + { + "epoch": 0.16, + "learning_rate": 4.985254691689008e-05, + "loss": 0.5065, + "step": 61 + }, + { + "epoch": 0.16, + "learning_rate": 4.983914209115282e-05, + "loss": 0.1988, + "step": 62 + }, + { + "epoch": 0.17, + "learning_rate": 4.982573726541555e-05, + "loss": 0.4362, + "step": 63 + }, + { + "epoch": 0.17, + "learning_rate": 4.981233243967829e-05, + "loss": 0.6619, + "step": 64 + }, + { + "epoch": 0.17, + "learning_rate": 4.979892761394102e-05, + "loss": 0.3217, + "step": 65 + }, + { + "epoch": 0.17, + "learning_rate": 4.978552278820375e-05, + "loss": 0.2967, + "step": 66 + }, + { + "epoch": 0.18, + "learning_rate": 4.977211796246649e-05, + "loss": 0.2429, + "step": 67 + }, + { + "epoch": 0.18, + "learning_rate": 4.975871313672922e-05, + "loss": 0.6642, + "step": 68 + }, + { + "epoch": 0.18, + "learning_rate": 4.974530831099196e-05, + "loss": 0.56, + "step": 69 + }, + { + "epoch": 0.19, + "learning_rate": 4.973190348525469e-05, + "loss": 1.2979, + "step": 70 + }, + { + "epoch": 0.19, + "learning_rate": 4.9718498659517427e-05, + "loss": 0.5287, + "step": 71 + }, + { + "epoch": 0.19, + "learning_rate": 4.970509383378016e-05, + "loss": 0.4684, + "step": 72 + }, + { + "epoch": 0.19, + "learning_rate": 4.96916890080429e-05, + "loss": 0.283, + "step": 73 + }, + { + "epoch": 0.2, + "learning_rate": 4.967828418230563e-05, + "loss": 0.6818, + "step": 74 + }, + { + "epoch": 0.2, + "learning_rate": 4.966487935656837e-05, + "loss": 0.6141, + "step": 75 + }, + { + "epoch": 0.2, + "learning_rate": 4.96514745308311e-05, + "loss": 0.5046, + "step": 76 + }, + { + "epoch": 0.2, + "learning_rate": 4.963806970509384e-05, + "loss": 0.5266, + "step": 77 + }, + { + "epoch": 0.21, + "learning_rate": 4.962466487935657e-05, + "loss": 0.5944, + "step": 78 + }, + { + "epoch": 0.21, + "learning_rate": 4.961126005361931e-05, + "loss": 0.5631, + "step": 79 + }, + { + "epoch": 0.21, + "learning_rate": 4.959785522788204e-05, + "loss": 0.4791, + "step": 80 + }, + { + "epoch": 0.21, + "learning_rate": 4.958445040214477e-05, + "loss": 0.5645, + "step": 81 + }, + { + "epoch": 0.22, + "learning_rate": 4.957104557640751e-05, + "loss": 0.4349, + "step": 82 + }, + { + "epoch": 0.22, + "learning_rate": 4.955764075067024e-05, + "loss": 0.3865, + "step": 83 + }, + { + "epoch": 0.22, + "learning_rate": 4.954423592493298e-05, + "loss": 0.486, + "step": 84 + }, + { + "epoch": 0.22, + "learning_rate": 4.953083109919571e-05, + "loss": 0.2179, + "step": 85 + }, + { + "epoch": 0.23, + "learning_rate": 4.951742627345845e-05, + "loss": 0.3896, + "step": 86 + }, + { + "epoch": 0.23, + "learning_rate": 4.950402144772118e-05, + "loss": 0.4247, + "step": 87 + }, + { + "epoch": 0.23, + "learning_rate": 4.949061662198392e-05, + "loss": 0.4906, + "step": 88 + }, + { + "epoch": 0.24, + "learning_rate": 4.947721179624665e-05, + "loss": 0.4483, + "step": 89 + }, + { + "epoch": 0.24, + "learning_rate": 4.946380697050939e-05, + "loss": 0.557, + "step": 90 + }, + { + "epoch": 0.24, + "learning_rate": 4.9450402144772116e-05, + "loss": 0.7521, + "step": 91 + }, + { + "epoch": 0.24, + "learning_rate": 4.943699731903486e-05, + "loss": 0.3103, + "step": 92 + }, + { + "epoch": 0.25, + "learning_rate": 4.9423592493297586e-05, + "loss": 0.757, + "step": 93 + }, + { + "epoch": 0.25, + "learning_rate": 4.941018766756033e-05, + "loss": 0.8248, + "step": 94 + }, + { + "epoch": 0.25, + "learning_rate": 4.9396782841823056e-05, + "loss": 0.4591, + "step": 95 + }, + { + "epoch": 0.25, + "learning_rate": 4.938337801608579e-05, + "loss": 0.3912, + "step": 96 + }, + { + "epoch": 0.26, + "learning_rate": 4.9369973190348526e-05, + "loss": 0.5289, + "step": 97 + }, + { + "epoch": 0.26, + "learning_rate": 4.935656836461126e-05, + "loss": 0.3264, + "step": 98 + }, + { + "epoch": 0.26, + "learning_rate": 4.9343163538874e-05, + "loss": 0.2947, + "step": 99 + }, + { + "epoch": 0.26, + "learning_rate": 4.932975871313673e-05, + "loss": 0.2647, + "step": 100 + }, + { + "epoch": 0.27, + "learning_rate": 4.931635388739946e-05, + "loss": 0.3691, + "step": 101 + }, + { + "epoch": 0.27, + "learning_rate": 4.93029490616622e-05, + "loss": 0.4796, + "step": 102 + }, + { + "epoch": 0.27, + "learning_rate": 4.928954423592493e-05, + "loss": 0.4827, + "step": 103 + }, + { + "epoch": 0.28, + "learning_rate": 4.927613941018767e-05, + "loss": 0.2672, + "step": 104 + }, + { + "epoch": 0.28, + "learning_rate": 4.92627345844504e-05, + "loss": 0.7456, + "step": 105 + }, + { + "epoch": 0.28, + "learning_rate": 4.9249329758713136e-05, + "loss": 0.5206, + "step": 106 + }, + { + "epoch": 0.28, + "learning_rate": 4.923592493297587e-05, + "loss": 0.3576, + "step": 107 + }, + { + "epoch": 0.29, + "learning_rate": 4.9222520107238606e-05, + "loss": 0.2596, + "step": 108 + }, + { + "epoch": 0.29, + "learning_rate": 4.920911528150134e-05, + "loss": 0.4115, + "step": 109 + }, + { + "epoch": 0.29, + "learning_rate": 4.9195710455764076e-05, + "loss": 0.3481, + "step": 110 + }, + { + "epoch": 0.29, + "learning_rate": 4.918230563002681e-05, + "loss": 0.4387, + "step": 111 + }, + { + "epoch": 0.3, + "learning_rate": 4.916890080428955e-05, + "loss": 0.5023, + "step": 112 + }, + { + "epoch": 0.3, + "learning_rate": 4.915549597855228e-05, + "loss": 0.5916, + "step": 113 + }, + { + "epoch": 0.3, + "learning_rate": 4.914209115281502e-05, + "loss": 0.5467, + "step": 114 + }, + { + "epoch": 0.3, + "learning_rate": 4.912868632707775e-05, + "loss": 0.5631, + "step": 115 + }, + { + "epoch": 0.31, + "learning_rate": 4.911528150134049e-05, + "loss": 0.5512, + "step": 116 + }, + { + "epoch": 0.31, + "learning_rate": 4.910187667560322e-05, + "loss": 0.5546, + "step": 117 + }, + { + "epoch": 0.31, + "learning_rate": 4.908847184986595e-05, + "loss": 0.4209, + "step": 118 + }, + { + "epoch": 0.31, + "learning_rate": 4.907506702412869e-05, + "loss": 0.6064, + "step": 119 + }, + { + "epoch": 0.32, + "learning_rate": 4.906166219839142e-05, + "loss": 0.5301, + "step": 120 + }, + { + "epoch": 0.32, + "learning_rate": 4.904825737265416e-05, + "loss": 0.436, + "step": 121 + }, + { + "epoch": 0.32, + "learning_rate": 4.903485254691689e-05, + "loss": 0.726, + "step": 122 + }, + { + "epoch": 0.33, + "learning_rate": 4.9021447721179626e-05, + "loss": 0.5288, + "step": 123 + }, + { + "epoch": 0.33, + "learning_rate": 4.900804289544236e-05, + "loss": 0.4887, + "step": 124 + }, + { + "epoch": 0.33, + "learning_rate": 4.8994638069705097e-05, + "loss": 0.3752, + "step": 125 + }, + { + "epoch": 0.33, + "learning_rate": 4.898123324396783e-05, + "loss": 0.4895, + "step": 126 + }, + { + "epoch": 0.34, + "learning_rate": 4.896782841823057e-05, + "loss": 0.5046, + "step": 127 + }, + { + "epoch": 0.34, + "learning_rate": 4.8954423592493295e-05, + "loss": 0.3953, + "step": 128 + }, + { + "epoch": 0.34, + "learning_rate": 4.894101876675604e-05, + "loss": 0.2015, + "step": 129 + }, + { + "epoch": 0.34, + "learning_rate": 4.8927613941018765e-05, + "loss": 0.5165, + "step": 130 + }, + { + "epoch": 0.35, + "learning_rate": 4.891420911528151e-05, + "loss": 0.4237, + "step": 131 + }, + { + "epoch": 0.35, + "learning_rate": 4.8900804289544236e-05, + "loss": 0.239, + "step": 132 + }, + { + "epoch": 0.35, + "learning_rate": 4.888739946380697e-05, + "loss": 0.5515, + "step": 133 + }, + { + "epoch": 0.35, + "learning_rate": 4.8873994638069706e-05, + "loss": 0.303, + "step": 134 + }, + { + "epoch": 0.36, + "learning_rate": 4.886058981233244e-05, + "loss": 0.2867, + "step": 135 + }, + { + "epoch": 0.36, + "learning_rate": 4.8847184986595176e-05, + "loss": 0.6756, + "step": 136 + }, + { + "epoch": 0.36, + "learning_rate": 4.883378016085791e-05, + "loss": 0.4996, + "step": 137 + }, + { + "epoch": 0.37, + "learning_rate": 4.8820375335120646e-05, + "loss": 0.2798, + "step": 138 + }, + { + "epoch": 0.37, + "learning_rate": 4.880697050938338e-05, + "loss": 0.8877, + "step": 139 + }, + { + "epoch": 0.37, + "learning_rate": 4.879356568364612e-05, + "loss": 0.5022, + "step": 140 + }, + { + "epoch": 0.37, + "learning_rate": 4.878016085790885e-05, + "loss": 0.2177, + "step": 141 + }, + { + "epoch": 0.38, + "learning_rate": 4.876675603217159e-05, + "loss": 0.8708, + "step": 142 + }, + { + "epoch": 0.38, + "learning_rate": 4.8753351206434315e-05, + "loss": 0.719, + "step": 143 + }, + { + "epoch": 0.38, + "learning_rate": 4.873994638069706e-05, + "loss": 0.562, + "step": 144 + }, + { + "epoch": 0.38, + "learning_rate": 4.8726541554959786e-05, + "loss": 0.492, + "step": 145 + }, + { + "epoch": 0.39, + "learning_rate": 4.871313672922253e-05, + "loss": 0.4637, + "step": 146 + }, + { + "epoch": 0.39, + "learning_rate": 4.8699731903485256e-05, + "loss": 0.4132, + "step": 147 + }, + { + "epoch": 0.39, + "learning_rate": 4.868632707774799e-05, + "loss": 0.2889, + "step": 148 + }, + { + "epoch": 0.39, + "learning_rate": 4.8672922252010726e-05, + "loss": 0.2213, + "step": 149 + }, + { + "epoch": 0.4, + "learning_rate": 4.865951742627346e-05, + "loss": 0.9268, + "step": 150 + }, + { + "epoch": 0.4, + "learning_rate": 4.8646112600536196e-05, + "loss": 0.2852, + "step": 151 + }, + { + "epoch": 0.4, + "learning_rate": 4.863270777479893e-05, + "loss": 0.4599, + "step": 152 + }, + { + "epoch": 0.4, + "learning_rate": 4.861930294906166e-05, + "loss": 0.1913, + "step": 153 + }, + { + "epoch": 0.41, + "learning_rate": 4.86058981233244e-05, + "loss": 0.4488, + "step": 154 + }, + { + "epoch": 0.41, + "learning_rate": 4.859249329758713e-05, + "loss": 0.9022, + "step": 155 + }, + { + "epoch": 0.41, + "learning_rate": 4.857908847184987e-05, + "loss": 0.5221, + "step": 156 + }, + { + "epoch": 0.42, + "learning_rate": 4.85656836461126e-05, + "loss": 0.2394, + "step": 157 + }, + { + "epoch": 0.42, + "learning_rate": 4.8552278820375336e-05, + "loss": 0.3332, + "step": 158 + }, + { + "epoch": 0.42, + "learning_rate": 4.853887399463807e-05, + "loss": 0.4015, + "step": 159 + }, + { + "epoch": 0.42, + "learning_rate": 4.8525469168900806e-05, + "loss": 0.4461, + "step": 160 + }, + { + "epoch": 0.43, + "learning_rate": 4.851206434316354e-05, + "loss": 0.337, + "step": 161 + }, + { + "epoch": 0.43, + "learning_rate": 4.8498659517426276e-05, + "loss": 0.4908, + "step": 162 + }, + { + "epoch": 0.43, + "learning_rate": 4.848525469168901e-05, + "loss": 0.526, + "step": 163 + }, + { + "epoch": 0.43, + "learning_rate": 4.8471849865951746e-05, + "loss": 0.5262, + "step": 164 + }, + { + "epoch": 0.44, + "learning_rate": 4.845844504021448e-05, + "loss": 0.6818, + "step": 165 + }, + { + "epoch": 0.44, + "learning_rate": 4.8445040214477217e-05, + "loss": 0.3154, + "step": 166 + }, + { + "epoch": 0.44, + "learning_rate": 4.843163538873995e-05, + "loss": 0.5963, + "step": 167 + }, + { + "epoch": 0.44, + "learning_rate": 4.841823056300268e-05, + "loss": 0.4451, + "step": 168 + }, + { + "epoch": 0.45, + "learning_rate": 4.840482573726542e-05, + "loss": 0.5969, + "step": 169 + }, + { + "epoch": 0.45, + "learning_rate": 4.839142091152815e-05, + "loss": 0.438, + "step": 170 + }, + { + "epoch": 0.45, + "learning_rate": 4.837801608579089e-05, + "loss": 0.4827, + "step": 171 + }, + { + "epoch": 0.46, + "learning_rate": 4.836461126005362e-05, + "loss": 0.2029, + "step": 172 + }, + { + "epoch": 0.46, + "learning_rate": 4.8351206434316356e-05, + "loss": 0.5195, + "step": 173 + }, + { + "epoch": 0.46, + "learning_rate": 4.833780160857909e-05, + "loss": 0.517, + "step": 174 + }, + { + "epoch": 0.46, + "learning_rate": 4.8324396782841826e-05, + "loss": 0.5532, + "step": 175 + }, + { + "epoch": 0.47, + "learning_rate": 4.831099195710456e-05, + "loss": 0.4198, + "step": 176 + }, + { + "epoch": 0.47, + "learning_rate": 4.8297587131367296e-05, + "loss": 0.8386, + "step": 177 + }, + { + "epoch": 0.47, + "learning_rate": 4.8284182305630025e-05, + "loss": 0.575, + "step": 178 + }, + { + "epoch": 0.47, + "learning_rate": 4.8270777479892766e-05, + "loss": 0.6156, + "step": 179 + }, + { + "epoch": 0.48, + "learning_rate": 4.8257372654155495e-05, + "loss": 0.7044, + "step": 180 + }, + { + "epoch": 0.48, + "learning_rate": 4.824396782841824e-05, + "loss": 0.5712, + "step": 181 + }, + { + "epoch": 0.48, + "learning_rate": 4.8230563002680965e-05, + "loss": 0.34, + "step": 182 + }, + { + "epoch": 0.48, + "learning_rate": 4.82171581769437e-05, + "loss": 0.5773, + "step": 183 + }, + { + "epoch": 0.49, + "learning_rate": 4.8203753351206435e-05, + "loss": 0.546, + "step": 184 + }, + { + "epoch": 0.49, + "learning_rate": 4.819034852546917e-05, + "loss": 0.3955, + "step": 185 + }, + { + "epoch": 0.49, + "learning_rate": 4.8176943699731906e-05, + "loss": 0.5921, + "step": 186 + }, + { + "epoch": 0.49, + "learning_rate": 4.816353887399464e-05, + "loss": 0.3108, + "step": 187 + }, + { + "epoch": 0.5, + "learning_rate": 4.8150134048257376e-05, + "loss": 0.5469, + "step": 188 + }, + { + "epoch": 0.5, + "learning_rate": 4.813672922252011e-05, + "loss": 0.64, + "step": 189 + }, + { + "epoch": 0.5, + "learning_rate": 4.8123324396782846e-05, + "loss": 0.5153, + "step": 190 + }, + { + "epoch": 0.51, + "learning_rate": 4.810991957104558e-05, + "loss": 0.4719, + "step": 191 + }, + { + "epoch": 0.51, + "learning_rate": 4.8096514745308316e-05, + "loss": 0.52, + "step": 192 + }, + { + "epoch": 0.51, + "learning_rate": 4.8083109919571045e-05, + "loss": 0.5114, + "step": 193 + }, + { + "epoch": 0.51, + "learning_rate": 4.806970509383379e-05, + "loss": 0.5469, + "step": 194 + }, + { + "epoch": 0.52, + "learning_rate": 4.8056300268096515e-05, + "loss": 0.3435, + "step": 195 + }, + { + "epoch": 0.52, + "learning_rate": 4.804289544235926e-05, + "loss": 0.6469, + "step": 196 + }, + { + "epoch": 0.52, + "learning_rate": 4.8029490616621985e-05, + "loss": 0.6595, + "step": 197 + }, + { + "epoch": 0.52, + "learning_rate": 4.801608579088472e-05, + "loss": 0.5503, + "step": 198 + }, + { + "epoch": 0.53, + "learning_rate": 4.8002680965147456e-05, + "loss": 0.3799, + "step": 199 + }, + { + "epoch": 0.53, + "learning_rate": 4.798927613941019e-05, + "loss": 0.417, + "step": 200 + }, + { + "epoch": 0.53, + "learning_rate": 4.7975871313672926e-05, + "loss": 0.5281, + "step": 201 + }, + { + "epoch": 0.53, + "learning_rate": 4.796246648793566e-05, + "loss": 0.3439, + "step": 202 + }, + { + "epoch": 0.54, + "learning_rate": 4.794906166219839e-05, + "loss": 0.5777, + "step": 203 + }, + { + "epoch": 0.54, + "learning_rate": 4.793565683646113e-05, + "loss": 0.5286, + "step": 204 + }, + { + "epoch": 0.54, + "learning_rate": 4.792225201072386e-05, + "loss": 0.4302, + "step": 205 + }, + { + "epoch": 0.54, + "learning_rate": 4.79088471849866e-05, + "loss": 0.5413, + "step": 206 + }, + { + "epoch": 0.55, + "learning_rate": 4.789544235924933e-05, + "loss": 0.3087, + "step": 207 + }, + { + "epoch": 0.55, + "learning_rate": 4.7882037533512065e-05, + "loss": 0.4385, + "step": 208 + }, + { + "epoch": 0.55, + "learning_rate": 4.78686327077748e-05, + "loss": 0.7137, + "step": 209 + }, + { + "epoch": 0.56, + "learning_rate": 4.7855227882037535e-05, + "loss": 0.6278, + "step": 210 + }, + { + "epoch": 0.56, + "learning_rate": 4.784182305630027e-05, + "loss": 0.2832, + "step": 211 + }, + { + "epoch": 0.56, + "learning_rate": 4.7828418230563005e-05, + "loss": 0.5899, + "step": 212 + }, + { + "epoch": 0.56, + "learning_rate": 4.7815013404825734e-05, + "loss": 0.4421, + "step": 213 + }, + { + "epoch": 0.57, + "learning_rate": 4.7801608579088476e-05, + "loss": 0.3411, + "step": 214 + }, + { + "epoch": 0.57, + "learning_rate": 4.7788203753351204e-05, + "loss": 0.4236, + "step": 215 + }, + { + "epoch": 0.57, + "learning_rate": 4.7774798927613946e-05, + "loss": 0.4444, + "step": 216 + }, + { + "epoch": 0.57, + "learning_rate": 4.7761394101876674e-05, + "loss": 0.4765, + "step": 217 + }, + { + "epoch": 0.58, + "learning_rate": 4.774798927613941e-05, + "loss": 0.2453, + "step": 218 + }, + { + "epoch": 0.58, + "learning_rate": 4.7734584450402145e-05, + "loss": 0.2765, + "step": 219 + }, + { + "epoch": 0.58, + "learning_rate": 4.772117962466488e-05, + "loss": 0.2075, + "step": 220 + }, + { + "epoch": 0.58, + "learning_rate": 4.7707774798927615e-05, + "loss": 0.5905, + "step": 221 + }, + { + "epoch": 0.59, + "learning_rate": 4.769436997319035e-05, + "loss": 0.3457, + "step": 222 + }, + { + "epoch": 0.59, + "learning_rate": 4.7680965147453085e-05, + "loss": 0.5986, + "step": 223 + }, + { + "epoch": 0.59, + "learning_rate": 4.766756032171582e-05, + "loss": 0.3881, + "step": 224 + }, + { + "epoch": 0.6, + "learning_rate": 4.7654155495978555e-05, + "loss": 0.5655, + "step": 225 + }, + { + "epoch": 0.6, + "learning_rate": 4.764075067024129e-05, + "loss": 0.6156, + "step": 226 + }, + { + "epoch": 0.6, + "learning_rate": 4.7627345844504026e-05, + "loss": 0.4784, + "step": 227 + }, + { + "epoch": 0.6, + "learning_rate": 4.7613941018766754e-05, + "loss": 0.5195, + "step": 228 + }, + { + "epoch": 0.61, + "learning_rate": 4.7600536193029496e-05, + "loss": 0.66, + "step": 229 + }, + { + "epoch": 0.61, + "learning_rate": 4.7587131367292224e-05, + "loss": 0.4104, + "step": 230 + }, + { + "epoch": 0.61, + "learning_rate": 4.7573726541554966e-05, + "loss": 0.3478, + "step": 231 + }, + { + "epoch": 0.61, + "learning_rate": 4.7560321715817695e-05, + "loss": 0.2037, + "step": 232 + }, + { + "epoch": 0.62, + "learning_rate": 4.754691689008043e-05, + "loss": 0.75, + "step": 233 + }, + { + "epoch": 0.62, + "learning_rate": 4.7533512064343165e-05, + "loss": 0.4237, + "step": 234 + }, + { + "epoch": 0.62, + "learning_rate": 4.75201072386059e-05, + "loss": 0.2372, + "step": 235 + }, + { + "epoch": 0.62, + "learning_rate": 4.7506702412868635e-05, + "loss": 0.7874, + "step": 236 + }, + { + "epoch": 0.63, + "learning_rate": 4.749329758713137e-05, + "loss": 0.5751, + "step": 237 + }, + { + "epoch": 0.63, + "learning_rate": 4.7479892761394105e-05, + "loss": 0.5801, + "step": 238 + }, + { + "epoch": 0.63, + "learning_rate": 4.746648793565684e-05, + "loss": 0.4983, + "step": 239 + }, + { + "epoch": 0.63, + "learning_rate": 4.745308310991957e-05, + "loss": 0.4215, + "step": 240 + }, + { + "epoch": 0.64, + "learning_rate": 4.743967828418231e-05, + "loss": 0.3655, + "step": 241 + }, + { + "epoch": 0.64, + "learning_rate": 4.742627345844504e-05, + "loss": 0.523, + "step": 242 + }, + { + "epoch": 0.64, + "learning_rate": 4.741286863270778e-05, + "loss": 0.5952, + "step": 243 + }, + { + "epoch": 0.65, + "learning_rate": 4.739946380697051e-05, + "loss": 0.4226, + "step": 244 + }, + { + "epoch": 0.65, + "learning_rate": 4.7386058981233244e-05, + "loss": 0.316, + "step": 245 + }, + { + "epoch": 0.65, + "learning_rate": 4.737265415549598e-05, + "loss": 0.5096, + "step": 246 + }, + { + "epoch": 0.65, + "learning_rate": 4.7359249329758715e-05, + "loss": 0.435, + "step": 247 + }, + { + "epoch": 0.66, + "learning_rate": 4.734584450402145e-05, + "loss": 0.7516, + "step": 248 + }, + { + "epoch": 0.66, + "learning_rate": 4.7332439678284185e-05, + "loss": 0.531, + "step": 249 + }, + { + "epoch": 0.66, + "learning_rate": 4.731903485254692e-05, + "loss": 0.4096, + "step": 250 + }, + { + "epoch": 0.66, + "learning_rate": 4.7305630026809655e-05, + "loss": 0.387, + "step": 251 + }, + { + "epoch": 0.67, + "learning_rate": 4.729222520107239e-05, + "loss": 0.5468, + "step": 252 + }, + { + "epoch": 0.67, + "learning_rate": 4.7278820375335125e-05, + "loss": 0.4613, + "step": 253 + }, + { + "epoch": 0.67, + "learning_rate": 4.726541554959786e-05, + "loss": 0.8437, + "step": 254 + }, + { + "epoch": 0.67, + "learning_rate": 4.725201072386059e-05, + "loss": 0.522, + "step": 255 + }, + { + "epoch": 0.68, + "learning_rate": 4.723860589812333e-05, + "loss": 0.3922, + "step": 256 + }, + { + "epoch": 0.68, + "learning_rate": 4.722520107238606e-05, + "loss": 0.5114, + "step": 257 + }, + { + "epoch": 0.68, + "learning_rate": 4.72117962466488e-05, + "loss": 0.6148, + "step": 258 + }, + { + "epoch": 0.69, + "learning_rate": 4.719839142091153e-05, + "loss": 0.4578, + "step": 259 + }, + { + "epoch": 0.69, + "learning_rate": 4.7184986595174265e-05, + "loss": 0.6286, + "step": 260 + }, + { + "epoch": 0.69, + "learning_rate": 4.7171581769437e-05, + "loss": 0.5883, + "step": 261 + }, + { + "epoch": 0.69, + "learning_rate": 4.7158176943699735e-05, + "loss": 0.5634, + "step": 262 + }, + { + "epoch": 0.7, + "learning_rate": 4.714477211796247e-05, + "loss": 0.4085, + "step": 263 + }, + { + "epoch": 0.7, + "learning_rate": 4.7131367292225205e-05, + "loss": 0.2988, + "step": 264 + }, + { + "epoch": 0.7, + "learning_rate": 4.7117962466487934e-05, + "loss": 0.6353, + "step": 265 + }, + { + "epoch": 0.7, + "learning_rate": 4.7104557640750675e-05, + "loss": 0.4598, + "step": 266 + }, + { + "epoch": 0.71, + "learning_rate": 4.7091152815013404e-05, + "loss": 0.5072, + "step": 267 + }, + { + "epoch": 0.71, + "learning_rate": 4.7077747989276146e-05, + "loss": 0.49, + "step": 268 + }, + { + "epoch": 0.71, + "learning_rate": 4.7064343163538874e-05, + "loss": 0.7225, + "step": 269 + }, + { + "epoch": 0.71, + "learning_rate": 4.705093833780161e-05, + "loss": 0.5332, + "step": 270 + }, + { + "epoch": 0.72, + "learning_rate": 4.7037533512064344e-05, + "loss": 0.6064, + "step": 271 + }, + { + "epoch": 0.72, + "learning_rate": 4.702412868632708e-05, + "loss": 0.3518, + "step": 272 + }, + { + "epoch": 0.72, + "learning_rate": 4.7010723860589815e-05, + "loss": 0.3673, + "step": 273 + }, + { + "epoch": 0.72, + "learning_rate": 4.699731903485255e-05, + "loss": 0.4688, + "step": 274 + }, + { + "epoch": 0.73, + "learning_rate": 4.6983914209115285e-05, + "loss": 0.5389, + "step": 275 + }, + { + "epoch": 0.73, + "learning_rate": 4.697050938337802e-05, + "loss": 0.374, + "step": 276 + }, + { + "epoch": 0.73, + "learning_rate": 4.6957104557640755e-05, + "loss": 0.475, + "step": 277 + }, + { + "epoch": 0.74, + "learning_rate": 4.694369973190349e-05, + "loss": 0.5397, + "step": 278 + }, + { + "epoch": 0.74, + "learning_rate": 4.6930294906166225e-05, + "loss": 0.3821, + "step": 279 + }, + { + "epoch": 0.74, + "learning_rate": 4.6916890080428954e-05, + "loss": 0.3372, + "step": 280 + }, + { + "epoch": 0.74, + "learning_rate": 4.6903485254691696e-05, + "loss": 0.6652, + "step": 281 + }, + { + "epoch": 0.75, + "learning_rate": 4.6890080428954424e-05, + "loss": 0.2894, + "step": 282 + }, + { + "epoch": 0.75, + "learning_rate": 4.6876675603217166e-05, + "loss": 0.5639, + "step": 283 + }, + { + "epoch": 0.75, + "learning_rate": 4.6863270777479894e-05, + "loss": 0.353, + "step": 284 + }, + { + "epoch": 0.75, + "learning_rate": 4.684986595174263e-05, + "loss": 0.2932, + "step": 285 + }, + { + "epoch": 0.76, + "learning_rate": 4.6836461126005364e-05, + "loss": 0.467, + "step": 286 + }, + { + "epoch": 0.76, + "learning_rate": 4.68230563002681e-05, + "loss": 0.4732, + "step": 287 + }, + { + "epoch": 0.76, + "learning_rate": 4.6809651474530835e-05, + "loss": 0.1808, + "step": 288 + }, + { + "epoch": 0.76, + "learning_rate": 4.679624664879357e-05, + "loss": 0.6031, + "step": 289 + }, + { + "epoch": 0.77, + "learning_rate": 4.67828418230563e-05, + "loss": 0.2555, + "step": 290 + }, + { + "epoch": 0.77, + "learning_rate": 4.676943699731904e-05, + "loss": 0.4041, + "step": 291 + }, + { + "epoch": 0.77, + "learning_rate": 4.675603217158177e-05, + "loss": 0.7822, + "step": 292 + }, + { + "epoch": 0.78, + "learning_rate": 4.674262734584451e-05, + "loss": 0.138, + "step": 293 + }, + { + "epoch": 0.78, + "learning_rate": 4.672922252010724e-05, + "loss": 0.2746, + "step": 294 + }, + { + "epoch": 0.78, + "learning_rate": 4.6715817694369974e-05, + "loss": 0.5835, + "step": 295 + }, + { + "epoch": 0.78, + "learning_rate": 4.670241286863271e-05, + "loss": 0.2367, + "step": 296 + }, + { + "epoch": 0.79, + "learning_rate": 4.6689008042895444e-05, + "loss": 0.3247, + "step": 297 + }, + { + "epoch": 0.79, + "learning_rate": 4.667560321715818e-05, + "loss": 0.306, + "step": 298 + }, + { + "epoch": 0.79, + "learning_rate": 4.6662198391420914e-05, + "loss": 0.2825, + "step": 299 + }, + { + "epoch": 0.79, + "learning_rate": 4.664879356568364e-05, + "loss": 0.6102, + "step": 300 + }, + { + "epoch": 0.8, + "learning_rate": 4.6635388739946385e-05, + "loss": 0.3613, + "step": 301 + }, + { + "epoch": 0.8, + "learning_rate": 4.662198391420911e-05, + "loss": 0.6327, + "step": 302 + }, + { + "epoch": 0.8, + "learning_rate": 4.6608579088471855e-05, + "loss": 0.4059, + "step": 303 + }, + { + "epoch": 0.8, + "learning_rate": 4.659517426273458e-05, + "loss": 0.4027, + "step": 304 + }, + { + "epoch": 0.81, + "learning_rate": 4.658176943699732e-05, + "loss": 0.9133, + "step": 305 + }, + { + "epoch": 0.81, + "learning_rate": 4.6568364611260054e-05, + "loss": 0.1869, + "step": 306 + }, + { + "epoch": 0.81, + "learning_rate": 4.655495978552279e-05, + "loss": 0.3987, + "step": 307 + }, + { + "epoch": 0.81, + "learning_rate": 4.6541554959785524e-05, + "loss": 0.6114, + "step": 308 + }, + { + "epoch": 0.82, + "learning_rate": 4.652815013404826e-05, + "loss": 0.4406, + "step": 309 + }, + { + "epoch": 0.82, + "learning_rate": 4.6514745308310994e-05, + "loss": 0.2954, + "step": 310 + }, + { + "epoch": 0.82, + "learning_rate": 4.650134048257373e-05, + "loss": 0.5067, + "step": 311 + }, + { + "epoch": 0.83, + "learning_rate": 4.6487935656836464e-05, + "loss": 0.3985, + "step": 312 + }, + { + "epoch": 0.83, + "learning_rate": 4.64745308310992e-05, + "loss": 0.3756, + "step": 313 + }, + { + "epoch": 0.83, + "learning_rate": 4.6461126005361935e-05, + "loss": 0.2618, + "step": 314 + }, + { + "epoch": 0.83, + "learning_rate": 4.644772117962466e-05, + "loss": 0.4992, + "step": 315 + }, + { + "epoch": 0.84, + "learning_rate": 4.6434316353887405e-05, + "loss": 0.8224, + "step": 316 + }, + { + "epoch": 0.84, + "learning_rate": 4.642091152815013e-05, + "loss": 0.3425, + "step": 317 + }, + { + "epoch": 0.84, + "learning_rate": 4.6407506702412875e-05, + "loss": 0.4062, + "step": 318 + }, + { + "epoch": 0.84, + "learning_rate": 4.6394101876675603e-05, + "loss": 0.4748, + "step": 319 + }, + { + "epoch": 0.85, + "learning_rate": 4.638069705093834e-05, + "loss": 0.6857, + "step": 320 + }, + { + "epoch": 0.85, + "learning_rate": 4.6367292225201074e-05, + "loss": 0.5368, + "step": 321 + }, + { + "epoch": 0.85, + "learning_rate": 4.635388739946381e-05, + "loss": 0.5571, + "step": 322 + }, + { + "epoch": 0.85, + "learning_rate": 4.6340482573726544e-05, + "loss": 0.3045, + "step": 323 + }, + { + "epoch": 0.86, + "learning_rate": 4.632707774798928e-05, + "loss": 0.3189, + "step": 324 + }, + { + "epoch": 0.86, + "learning_rate": 4.631367292225201e-05, + "loss": 0.3033, + "step": 325 + }, + { + "epoch": 0.86, + "learning_rate": 4.630026809651475e-05, + "loss": 0.4659, + "step": 326 + }, + { + "epoch": 0.87, + "learning_rate": 4.628686327077748e-05, + "loss": 0.4058, + "step": 327 + }, + { + "epoch": 0.87, + "learning_rate": 4.627345844504022e-05, + "loss": 0.6745, + "step": 328 + }, + { + "epoch": 0.87, + "learning_rate": 4.626005361930295e-05, + "loss": 0.3259, + "step": 329 + }, + { + "epoch": 0.87, + "learning_rate": 4.624664879356568e-05, + "loss": 0.5126, + "step": 330 + }, + { + "epoch": 0.88, + "learning_rate": 4.623324396782842e-05, + "loss": 0.2759, + "step": 331 + }, + { + "epoch": 0.88, + "learning_rate": 4.621983914209115e-05, + "loss": 0.2512, + "step": 332 + }, + { + "epoch": 0.88, + "learning_rate": 4.620643431635389e-05, + "loss": 0.3046, + "step": 333 + }, + { + "epoch": 0.88, + "learning_rate": 4.6193029490616624e-05, + "loss": 0.3931, + "step": 334 + }, + { + "epoch": 0.89, + "learning_rate": 4.617962466487936e-05, + "loss": 0.4838, + "step": 335 + }, + { + "epoch": 0.89, + "learning_rate": 4.6166219839142094e-05, + "loss": 0.2925, + "step": 336 + }, + { + "epoch": 0.89, + "learning_rate": 4.615281501340483e-05, + "loss": 0.4481, + "step": 337 + }, + { + "epoch": 0.89, + "learning_rate": 4.6139410187667564e-05, + "loss": 0.4528, + "step": 338 + }, + { + "epoch": 0.9, + "learning_rate": 4.61260053619303e-05, + "loss": 0.2934, + "step": 339 + }, + { + "epoch": 0.9, + "learning_rate": 4.611260053619303e-05, + "loss": 0.609, + "step": 340 + }, + { + "epoch": 0.9, + "learning_rate": 4.609919571045577e-05, + "loss": 0.8988, + "step": 341 + }, + { + "epoch": 0.9, + "learning_rate": 4.60857908847185e-05, + "loss": 1.1222, + "step": 342 + }, + { + "epoch": 0.91, + "learning_rate": 4.607238605898124e-05, + "loss": 0.3265, + "step": 343 + }, + { + "epoch": 0.91, + "learning_rate": 4.605898123324397e-05, + "loss": 0.4722, + "step": 344 + }, + { + "epoch": 0.91, + "learning_rate": 4.60455764075067e-05, + "loss": 0.2791, + "step": 345 + }, + { + "epoch": 0.92, + "learning_rate": 4.603217158176944e-05, + "loss": 0.4183, + "step": 346 + }, + { + "epoch": 0.92, + "learning_rate": 4.6018766756032174e-05, + "loss": 0.5323, + "step": 347 + }, + { + "epoch": 0.92, + "learning_rate": 4.600536193029491e-05, + "loss": 0.6108, + "step": 348 + }, + { + "epoch": 0.92, + "learning_rate": 4.5991957104557644e-05, + "loss": 0.2875, + "step": 349 + }, + { + "epoch": 0.93, + "learning_rate": 4.597855227882037e-05, + "loss": 0.6642, + "step": 350 + }, + { + "epoch": 0.93, + "learning_rate": 4.5965147453083114e-05, + "loss": 0.5244, + "step": 351 + }, + { + "epoch": 0.93, + "learning_rate": 4.595174262734584e-05, + "loss": 0.5562, + "step": 352 + }, + { + "epoch": 0.93, + "learning_rate": 4.5938337801608584e-05, + "loss": 0.3972, + "step": 353 + }, + { + "epoch": 0.94, + "learning_rate": 4.592493297587131e-05, + "loss": 0.4532, + "step": 354 + }, + { + "epoch": 0.94, + "learning_rate": 4.591152815013405e-05, + "loss": 0.3368, + "step": 355 + }, + { + "epoch": 0.94, + "learning_rate": 4.589812332439678e-05, + "loss": 0.171, + "step": 356 + }, + { + "epoch": 0.94, + "learning_rate": 4.588471849865952e-05, + "loss": 0.4036, + "step": 357 + }, + { + "epoch": 0.95, + "learning_rate": 4.587131367292225e-05, + "loss": 0.4305, + "step": 358 + }, + { + "epoch": 0.95, + "learning_rate": 4.585790884718499e-05, + "loss": 0.2643, + "step": 359 + }, + { + "epoch": 0.95, + "learning_rate": 4.5844504021447723e-05, + "loss": 0.3782, + "step": 360 + }, + { + "epoch": 0.96, + "learning_rate": 4.583109919571046e-05, + "loss": 0.1673, + "step": 361 + }, + { + "epoch": 0.96, + "learning_rate": 4.5817694369973194e-05, + "loss": 0.3969, + "step": 362 + }, + { + "epoch": 0.96, + "learning_rate": 4.580428954423593e-05, + "loss": 0.3249, + "step": 363 + }, + { + "epoch": 0.96, + "learning_rate": 4.5790884718498664e-05, + "loss": 0.1656, + "step": 364 + }, + { + "epoch": 0.97, + "learning_rate": 4.57774798927614e-05, + "loss": 0.4551, + "step": 365 + }, + { + "epoch": 0.97, + "learning_rate": 4.5764075067024134e-05, + "loss": 0.6075, + "step": 366 + }, + { + "epoch": 0.97, + "learning_rate": 4.575067024128686e-05, + "loss": 0.4699, + "step": 367 + }, + { + "epoch": 0.97, + "learning_rate": 4.5737265415549605e-05, + "loss": 0.5752, + "step": 368 + }, + { + "epoch": 0.98, + "learning_rate": 4.572386058981233e-05, + "loss": 0.3114, + "step": 369 + }, + { + "epoch": 0.98, + "learning_rate": 4.5710455764075075e-05, + "loss": 0.7407, + "step": 370 + }, + { + "epoch": 0.98, + "learning_rate": 4.56970509383378e-05, + "loss": 0.6427, + "step": 371 + }, + { + "epoch": 0.98, + "learning_rate": 4.568364611260054e-05, + "loss": 0.5021, + "step": 372 + }, + { + "epoch": 0.99, + "learning_rate": 4.5670241286863273e-05, + "loss": 0.4209, + "step": 373 + }, + { + "epoch": 0.99, + "learning_rate": 4.565683646112601e-05, + "loss": 0.5957, + "step": 374 + }, + { + "epoch": 0.99, + "learning_rate": 4.5643431635388744e-05, + "loss": 0.495, + "step": 375 + }, + { + "epoch": 0.99, + "learning_rate": 4.563002680965148e-05, + "loss": 0.3101, + "step": 376 + }, + { + "epoch": 1.0, + "learning_rate": 4.561662198391421e-05, + "loss": 0.472, + "step": 377 + }, + { + "epoch": 1.0, + "learning_rate": 4.560321715817695e-05, + "loss": 0.4607, + "step": 378 + }, + { + "epoch": 1.0, + "eval_f1": 0.7914963205233032, + "eval_loss": 0.40209120512008667, + "eval_runtime": 2.1585, + "eval_samples_per_second": 700.949, + "eval_steps_per_second": 44.012, + "step": 378 + }, + { + "epoch": 1.0, + "learning_rate": 4.558981233243968e-05, + "loss": 0.2422, + "step": 379 + }, + { + "epoch": 1.01, + "learning_rate": 4.557640750670242e-05, + "loss": 0.2074, + "step": 380 + }, + { + "epoch": 1.01, + "learning_rate": 4.556300268096515e-05, + "loss": 0.4469, + "step": 381 + }, + { + "epoch": 1.01, + "learning_rate": 4.554959785522788e-05, + "loss": 0.3121, + "step": 382 + }, + { + "epoch": 1.01, + "learning_rate": 4.553619302949062e-05, + "loss": 0.3634, + "step": 383 + }, + { + "epoch": 1.02, + "learning_rate": 4.552278820375335e-05, + "loss": 0.2139, + "step": 384 + }, + { + "epoch": 1.02, + "learning_rate": 4.550938337801609e-05, + "loss": 0.5273, + "step": 385 + }, + { + "epoch": 1.02, + "learning_rate": 4.549597855227882e-05, + "loss": 0.1228, + "step": 386 + }, + { + "epoch": 1.02, + "learning_rate": 4.548257372654156e-05, + "loss": 0.2459, + "step": 387 + }, + { + "epoch": 1.03, + "learning_rate": 4.5469168900804294e-05, + "loss": 0.443, + "step": 388 + }, + { + "epoch": 1.03, + "learning_rate": 4.545576407506703e-05, + "loss": 0.364, + "step": 389 + }, + { + "epoch": 1.03, + "learning_rate": 4.5442359249329764e-05, + "loss": 0.4071, + "step": 390 + }, + { + "epoch": 1.03, + "learning_rate": 4.54289544235925e-05, + "loss": 0.3876, + "step": 391 + }, + { + "epoch": 1.04, + "learning_rate": 4.541554959785523e-05, + "loss": 0.5239, + "step": 392 + }, + { + "epoch": 1.04, + "learning_rate": 4.540214477211797e-05, + "loss": 0.4548, + "step": 393 + }, + { + "epoch": 1.04, + "learning_rate": 4.53887399463807e-05, + "loss": 0.3588, + "step": 394 + }, + { + "epoch": 1.04, + "learning_rate": 4.537533512064344e-05, + "loss": 0.2693, + "step": 395 + }, + { + "epoch": 1.05, + "learning_rate": 4.536193029490617e-05, + "loss": 0.3773, + "step": 396 + }, + { + "epoch": 1.05, + "learning_rate": 4.53485254691689e-05, + "loss": 0.3982, + "step": 397 + }, + { + "epoch": 1.05, + "learning_rate": 4.533512064343164e-05, + "loss": 0.2074, + "step": 398 + }, + { + "epoch": 1.06, + "learning_rate": 4.532171581769437e-05, + "loss": 0.3322, + "step": 399 + }, + { + "epoch": 1.06, + "learning_rate": 4.530831099195711e-05, + "loss": 0.118, + "step": 400 + }, + { + "epoch": 1.06, + "learning_rate": 4.5294906166219844e-05, + "loss": 0.6216, + "step": 401 + }, + { + "epoch": 1.06, + "learning_rate": 4.528150134048257e-05, + "loss": 0.4028, + "step": 402 + }, + { + "epoch": 1.07, + "learning_rate": 4.5268096514745314e-05, + "loss": 0.3179, + "step": 403 + }, + { + "epoch": 1.07, + "learning_rate": 4.525469168900804e-05, + "loss": 0.2815, + "step": 404 + }, + { + "epoch": 1.07, + "learning_rate": 4.5241286863270784e-05, + "loss": 0.2366, + "step": 405 + }, + { + "epoch": 1.07, + "learning_rate": 4.522788203753351e-05, + "loss": 0.14, + "step": 406 + }, + { + "epoch": 1.08, + "learning_rate": 4.521447721179625e-05, + "loss": 0.1255, + "step": 407 + }, + { + "epoch": 1.08, + "learning_rate": 4.520107238605898e-05, + "loss": 0.1482, + "step": 408 + }, + { + "epoch": 1.08, + "learning_rate": 4.518766756032172e-05, + "loss": 0.665, + "step": 409 + }, + { + "epoch": 1.08, + "learning_rate": 4.517426273458445e-05, + "loss": 0.2535, + "step": 410 + }, + { + "epoch": 1.09, + "learning_rate": 4.516085790884719e-05, + "loss": 0.1716, + "step": 411 + }, + { + "epoch": 1.09, + "learning_rate": 4.5147453083109916e-05, + "loss": 0.0765, + "step": 412 + }, + { + "epoch": 1.09, + "learning_rate": 4.513404825737266e-05, + "loss": 0.1238, + "step": 413 + }, + { + "epoch": 1.1, + "learning_rate": 4.512064343163539e-05, + "loss": 0.3877, + "step": 414 + }, + { + "epoch": 1.1, + "learning_rate": 4.510723860589813e-05, + "loss": 0.206, + "step": 415 + }, + { + "epoch": 1.1, + "learning_rate": 4.509383378016086e-05, + "loss": 0.1822, + "step": 416 + }, + { + "epoch": 1.1, + "learning_rate": 4.508042895442359e-05, + "loss": 0.2294, + "step": 417 + }, + { + "epoch": 1.11, + "learning_rate": 4.506702412868633e-05, + "loss": 0.7083, + "step": 418 + }, + { + "epoch": 1.11, + "learning_rate": 4.505361930294906e-05, + "loss": 0.325, + "step": 419 + }, + { + "epoch": 1.11, + "learning_rate": 4.50402144772118e-05, + "loss": 0.2378, + "step": 420 + }, + { + "epoch": 1.11, + "learning_rate": 4.502680965147453e-05, + "loss": 0.2494, + "step": 421 + }, + { + "epoch": 1.12, + "learning_rate": 4.501340482573727e-05, + "loss": 0.3812, + "step": 422 + }, + { + "epoch": 1.12, + "learning_rate": 4.5e-05, + "loss": 0.1491, + "step": 423 + }, + { + "epoch": 1.12, + "learning_rate": 4.498659517426274e-05, + "loss": 0.0833, + "step": 424 + }, + { + "epoch": 1.12, + "learning_rate": 4.497319034852547e-05, + "loss": 0.4637, + "step": 425 + }, + { + "epoch": 1.13, + "learning_rate": 4.495978552278821e-05, + "loss": 0.2594, + "step": 426 + }, + { + "epoch": 1.13, + "learning_rate": 4.4946380697050937e-05, + "loss": 0.4136, + "step": 427 + }, + { + "epoch": 1.13, + "learning_rate": 4.493297587131368e-05, + "loss": 0.4962, + "step": 428 + }, + { + "epoch": 1.13, + "learning_rate": 4.491957104557641e-05, + "loss": 0.0842, + "step": 429 + }, + { + "epoch": 1.14, + "learning_rate": 4.490616621983915e-05, + "loss": 0.4761, + "step": 430 + }, + { + "epoch": 1.14, + "learning_rate": 4.489276139410188e-05, + "loss": 0.4351, + "step": 431 + }, + { + "epoch": 1.14, + "learning_rate": 4.487935656836461e-05, + "loss": 0.4226, + "step": 432 + }, + { + "epoch": 1.15, + "learning_rate": 4.486595174262735e-05, + "loss": 0.5714, + "step": 433 + }, + { + "epoch": 1.15, + "learning_rate": 4.485254691689008e-05, + "loss": 0.2034, + "step": 434 + }, + { + "epoch": 1.15, + "learning_rate": 4.483914209115282e-05, + "loss": 0.3354, + "step": 435 + }, + { + "epoch": 1.15, + "learning_rate": 4.482573726541555e-05, + "loss": 0.7503, + "step": 436 + }, + { + "epoch": 1.16, + "learning_rate": 4.481233243967828e-05, + "loss": 0.1542, + "step": 437 + }, + { + "epoch": 1.16, + "learning_rate": 4.479892761394102e-05, + "loss": 0.3067, + "step": 438 + }, + { + "epoch": 1.16, + "learning_rate": 4.478552278820375e-05, + "loss": 0.4134, + "step": 439 + }, + { + "epoch": 1.16, + "learning_rate": 4.477211796246649e-05, + "loss": 0.1458, + "step": 440 + }, + { + "epoch": 1.17, + "learning_rate": 4.475871313672922e-05, + "loss": 0.2814, + "step": 441 + }, + { + "epoch": 1.17, + "learning_rate": 4.474530831099196e-05, + "loss": 0.1751, + "step": 442 + }, + { + "epoch": 1.17, + "learning_rate": 4.473190348525469e-05, + "loss": 0.4144, + "step": 443 + }, + { + "epoch": 1.17, + "learning_rate": 4.471849865951743e-05, + "loss": 0.3275, + "step": 444 + }, + { + "epoch": 1.18, + "learning_rate": 4.470509383378016e-05, + "loss": 0.3083, + "step": 445 + }, + { + "epoch": 1.18, + "learning_rate": 4.46916890080429e-05, + "loss": 0.4185, + "step": 446 + }, + { + "epoch": 1.18, + "learning_rate": 4.467828418230563e-05, + "loss": 0.3181, + "step": 447 + }, + { + "epoch": 1.19, + "learning_rate": 4.466487935656837e-05, + "loss": 0.438, + "step": 448 + }, + { + "epoch": 1.19, + "learning_rate": 4.46514745308311e-05, + "loss": 0.4509, + "step": 449 + }, + { + "epoch": 1.19, + "learning_rate": 4.463806970509384e-05, + "loss": 0.4597, + "step": 450 + }, + { + "epoch": 1.19, + "learning_rate": 4.462466487935657e-05, + "loss": 0.3123, + "step": 451 + }, + { + "epoch": 1.2, + "learning_rate": 4.46112600536193e-05, + "loss": 0.376, + "step": 452 + }, + { + "epoch": 1.2, + "learning_rate": 4.459785522788204e-05, + "loss": 0.2392, + "step": 453 + }, + { + "epoch": 1.2, + "learning_rate": 4.458445040214477e-05, + "loss": 0.4451, + "step": 454 + }, + { + "epoch": 1.2, + "learning_rate": 4.4571045576407513e-05, + "loss": 0.2436, + "step": 455 + }, + { + "epoch": 1.21, + "learning_rate": 4.455764075067024e-05, + "loss": 0.2944, + "step": 456 + }, + { + "epoch": 1.21, + "learning_rate": 4.454423592493298e-05, + "loss": 0.323, + "step": 457 + }, + { + "epoch": 1.21, + "learning_rate": 4.453083109919571e-05, + "loss": 0.1966, + "step": 458 + }, + { + "epoch": 1.21, + "learning_rate": 4.451742627345845e-05, + "loss": 0.273, + "step": 459 + }, + { + "epoch": 1.22, + "learning_rate": 4.450402144772118e-05, + "loss": 0.3498, + "step": 460 + }, + { + "epoch": 1.22, + "learning_rate": 4.449061662198392e-05, + "loss": 0.458, + "step": 461 + }, + { + "epoch": 1.22, + "learning_rate": 4.4477211796246646e-05, + "loss": 0.1607, + "step": 462 + }, + { + "epoch": 1.22, + "learning_rate": 4.446380697050939e-05, + "loss": 0.5712, + "step": 463 + }, + { + "epoch": 1.23, + "learning_rate": 4.4450402144772116e-05, + "loss": 0.0908, + "step": 464 + }, + { + "epoch": 1.23, + "learning_rate": 4.443699731903486e-05, + "loss": 0.2625, + "step": 465 + }, + { + "epoch": 1.23, + "learning_rate": 4.4423592493297586e-05, + "loss": 0.368, + "step": 466 + }, + { + "epoch": 1.24, + "learning_rate": 4.441018766756032e-05, + "loss": 0.5096, + "step": 467 + }, + { + "epoch": 1.24, + "learning_rate": 4.4396782841823057e-05, + "loss": 0.209, + "step": 468 + }, + { + "epoch": 1.24, + "learning_rate": 4.438337801608579e-05, + "loss": 0.5235, + "step": 469 + }, + { + "epoch": 1.24, + "learning_rate": 4.436997319034853e-05, + "loss": 0.5116, + "step": 470 + }, + { + "epoch": 1.25, + "learning_rate": 4.435656836461126e-05, + "loss": 0.49, + "step": 471 + }, + { + "epoch": 1.25, + "learning_rate": 4.4343163538874e-05, + "loss": 0.4409, + "step": 472 + }, + { + "epoch": 1.25, + "learning_rate": 4.432975871313673e-05, + "loss": 0.1324, + "step": 473 + }, + { + "epoch": 1.25, + "learning_rate": 4.431635388739947e-05, + "loss": 0.1257, + "step": 474 + }, + { + "epoch": 1.26, + "learning_rate": 4.43029490616622e-05, + "loss": 0.3844, + "step": 475 + }, + { + "epoch": 1.26, + "learning_rate": 4.428954423592494e-05, + "loss": 0.2942, + "step": 476 + }, + { + "epoch": 1.26, + "learning_rate": 4.4276139410187666e-05, + "loss": 0.316, + "step": 477 + }, + { + "epoch": 1.26, + "learning_rate": 4.426273458445041e-05, + "loss": 0.3724, + "step": 478 + }, + { + "epoch": 1.27, + "learning_rate": 4.4249329758713136e-05, + "loss": 0.4498, + "step": 479 + }, + { + "epoch": 1.27, + "learning_rate": 4.423592493297588e-05, + "loss": 0.2677, + "step": 480 + }, + { + "epoch": 1.27, + "learning_rate": 4.4222520107238607e-05, + "loss": 0.3331, + "step": 481 + }, + { + "epoch": 1.28, + "learning_rate": 4.420911528150134e-05, + "loss": 0.4375, + "step": 482 + }, + { + "epoch": 1.28, + "learning_rate": 4.419571045576408e-05, + "loss": 0.3097, + "step": 483 + }, + { + "epoch": 1.28, + "learning_rate": 4.418230563002681e-05, + "loss": 0.4438, + "step": 484 + }, + { + "epoch": 1.28, + "learning_rate": 4.416890080428955e-05, + "loss": 0.3648, + "step": 485 + }, + { + "epoch": 1.29, + "learning_rate": 4.415549597855228e-05, + "loss": 0.3023, + "step": 486 + }, + { + "epoch": 1.29, + "learning_rate": 4.414209115281502e-05, + "loss": 0.2963, + "step": 487 + }, + { + "epoch": 1.29, + "learning_rate": 4.412868632707775e-05, + "loss": 0.1749, + "step": 488 + }, + { + "epoch": 1.29, + "learning_rate": 4.411528150134048e-05, + "loss": 0.2006, + "step": 489 + }, + { + "epoch": 1.3, + "learning_rate": 4.410187667560322e-05, + "loss": 0.0923, + "step": 490 + }, + { + "epoch": 1.3, + "learning_rate": 4.408847184986595e-05, + "loss": 0.2665, + "step": 491 + }, + { + "epoch": 1.3, + "learning_rate": 4.407506702412869e-05, + "loss": 0.3378, + "step": 492 + }, + { + "epoch": 1.3, + "learning_rate": 4.406166219839142e-05, + "loss": 0.2543, + "step": 493 + }, + { + "epoch": 1.31, + "learning_rate": 4.4048257372654156e-05, + "loss": 0.2521, + "step": 494 + }, + { + "epoch": 1.31, + "learning_rate": 4.403485254691689e-05, + "loss": 0.1616, + "step": 495 + }, + { + "epoch": 1.31, + "learning_rate": 4.402144772117963e-05, + "loss": 0.1445, + "step": 496 + }, + { + "epoch": 1.31, + "learning_rate": 4.400804289544236e-05, + "loss": 0.5352, + "step": 497 + }, + { + "epoch": 1.32, + "learning_rate": 4.39946380697051e-05, + "loss": 0.0654, + "step": 498 + }, + { + "epoch": 1.32, + "learning_rate": 4.398123324396783e-05, + "loss": 0.4354, + "step": 499 + }, + { + "epoch": 1.32, + "learning_rate": 4.396782841823057e-05, + "loss": 0.3893, + "step": 500 + }, + { + "epoch": 1.33, + "learning_rate": 4.39544235924933e-05, + "loss": 0.2779, + "step": 501 + }, + { + "epoch": 1.33, + "learning_rate": 4.394101876675604e-05, + "loss": 0.3702, + "step": 502 + }, + { + "epoch": 1.33, + "learning_rate": 4.392761394101877e-05, + "loss": 0.3899, + "step": 503 + }, + { + "epoch": 1.33, + "learning_rate": 4.39142091152815e-05, + "loss": 0.3091, + "step": 504 + }, + { + "epoch": 1.34, + "learning_rate": 4.390080428954424e-05, + "loss": 0.2143, + "step": 505 + }, + { + "epoch": 1.34, + "learning_rate": 4.388739946380697e-05, + "loss": 0.2551, + "step": 506 + }, + { + "epoch": 1.34, + "learning_rate": 4.387399463806971e-05, + "loss": 0.2227, + "step": 507 + }, + { + "epoch": 1.34, + "learning_rate": 4.386058981233244e-05, + "loss": 0.1383, + "step": 508 + }, + { + "epoch": 1.35, + "learning_rate": 4.384718498659518e-05, + "loss": 0.3463, + "step": 509 + }, + { + "epoch": 1.35, + "learning_rate": 4.383378016085791e-05, + "loss": 0.1104, + "step": 510 + }, + { + "epoch": 1.35, + "learning_rate": 4.382037533512065e-05, + "loss": 0.2439, + "step": 511 + }, + { + "epoch": 1.35, + "learning_rate": 4.380697050938338e-05, + "loss": 0.0725, + "step": 512 + }, + { + "epoch": 1.36, + "learning_rate": 4.379356568364612e-05, + "loss": 0.4928, + "step": 513 + }, + { + "epoch": 1.36, + "learning_rate": 4.3780160857908846e-05, + "loss": 0.0903, + "step": 514 + }, + { + "epoch": 1.36, + "learning_rate": 4.376675603217159e-05, + "loss": 0.1808, + "step": 515 + }, + { + "epoch": 1.37, + "learning_rate": 4.3753351206434316e-05, + "loss": 0.617, + "step": 516 + }, + { + "epoch": 1.37, + "learning_rate": 4.373994638069706e-05, + "loss": 0.0333, + "step": 517 + }, + { + "epoch": 1.37, + "learning_rate": 4.3726541554959786e-05, + "loss": 0.1113, + "step": 518 + }, + { + "epoch": 1.37, + "learning_rate": 4.371313672922252e-05, + "loss": 0.4604, + "step": 519 + }, + { + "epoch": 1.38, + "learning_rate": 4.3699731903485256e-05, + "loss": 0.4267, + "step": 520 + }, + { + "epoch": 1.38, + "learning_rate": 4.368632707774799e-05, + "loss": 0.2621, + "step": 521 + }, + { + "epoch": 1.38, + "learning_rate": 4.3672922252010727e-05, + "loss": 0.3236, + "step": 522 + }, + { + "epoch": 1.38, + "learning_rate": 4.365951742627346e-05, + "loss": 0.0559, + "step": 523 + }, + { + "epoch": 1.39, + "learning_rate": 4.364611260053619e-05, + "loss": 0.0801, + "step": 524 + }, + { + "epoch": 1.39, + "learning_rate": 4.363270777479893e-05, + "loss": 0.0518, + "step": 525 + }, + { + "epoch": 1.39, + "learning_rate": 4.361930294906166e-05, + "loss": 0.0618, + "step": 526 + }, + { + "epoch": 1.39, + "learning_rate": 4.36058981233244e-05, + "loss": 0.3043, + "step": 527 + }, + { + "epoch": 1.4, + "learning_rate": 4.359249329758713e-05, + "loss": 0.4693, + "step": 528 + }, + { + "epoch": 1.4, + "learning_rate": 4.3579088471849866e-05, + "loss": 0.2978, + "step": 529 + }, + { + "epoch": 1.4, + "learning_rate": 4.35656836461126e-05, + "loss": 0.0817, + "step": 530 + }, + { + "epoch": 1.4, + "learning_rate": 4.3552278820375336e-05, + "loss": 0.0784, + "step": 531 + }, + { + "epoch": 1.41, + "learning_rate": 4.353887399463807e-05, + "loss": 0.5389, + "step": 532 + }, + { + "epoch": 1.41, + "learning_rate": 4.3525469168900806e-05, + "loss": 0.2337, + "step": 533 + }, + { + "epoch": 1.41, + "learning_rate": 4.351206434316354e-05, + "loss": 0.3189, + "step": 534 + }, + { + "epoch": 1.42, + "learning_rate": 4.3498659517426276e-05, + "loss": 0.2503, + "step": 535 + }, + { + "epoch": 1.42, + "learning_rate": 4.348525469168901e-05, + "loss": 0.333, + "step": 536 + }, + { + "epoch": 1.42, + "learning_rate": 4.347184986595175e-05, + "loss": 0.2311, + "step": 537 + }, + { + "epoch": 1.42, + "learning_rate": 4.345844504021448e-05, + "loss": 0.5196, + "step": 538 + }, + { + "epoch": 1.43, + "learning_rate": 4.344504021447721e-05, + "loss": 0.5466, + "step": 539 + }, + { + "epoch": 1.43, + "learning_rate": 4.343163538873995e-05, + "loss": 0.4296, + "step": 540 + }, + { + "epoch": 1.43, + "learning_rate": 4.341823056300268e-05, + "loss": 0.6559, + "step": 541 + }, + { + "epoch": 1.43, + "learning_rate": 4.340482573726542e-05, + "loss": 0.6134, + "step": 542 + }, + { + "epoch": 1.44, + "learning_rate": 4.339142091152815e-05, + "loss": 0.5614, + "step": 543 + }, + { + "epoch": 1.44, + "learning_rate": 4.3378016085790886e-05, + "loss": 0.3143, + "step": 544 + }, + { + "epoch": 1.44, + "learning_rate": 4.336461126005362e-05, + "loss": 0.564, + "step": 545 + }, + { + "epoch": 1.44, + "learning_rate": 4.3351206434316356e-05, + "loss": 0.2141, + "step": 546 + }, + { + "epoch": 1.45, + "learning_rate": 4.333780160857909e-05, + "loss": 0.5886, + "step": 547 + }, + { + "epoch": 1.45, + "learning_rate": 4.3324396782841826e-05, + "loss": 0.2129, + "step": 548 + }, + { + "epoch": 1.45, + "learning_rate": 4.3310991957104555e-05, + "loss": 0.2491, + "step": 549 + }, + { + "epoch": 1.46, + "learning_rate": 4.32975871313673e-05, + "loss": 0.4023, + "step": 550 + }, + { + "epoch": 1.46, + "learning_rate": 4.3284182305630025e-05, + "loss": 0.2159, + "step": 551 + }, + { + "epoch": 1.46, + "learning_rate": 4.327077747989277e-05, + "loss": 0.2975, + "step": 552 + }, + { + "epoch": 1.46, + "learning_rate": 4.3257372654155495e-05, + "loss": 0.165, + "step": 553 + }, + { + "epoch": 1.47, + "learning_rate": 4.324396782841823e-05, + "loss": 0.063, + "step": 554 + }, + { + "epoch": 1.47, + "learning_rate": 4.3230563002680966e-05, + "loss": 0.4209, + "step": 555 + }, + { + "epoch": 1.47, + "learning_rate": 4.32171581769437e-05, + "loss": 0.2704, + "step": 556 + }, + { + "epoch": 1.47, + "learning_rate": 4.3203753351206436e-05, + "loss": 0.6455, + "step": 557 + }, + { + "epoch": 1.48, + "learning_rate": 4.319034852546917e-05, + "loss": 0.3354, + "step": 558 + }, + { + "epoch": 1.48, + "learning_rate": 4.3176943699731906e-05, + "loss": 0.4069, + "step": 559 + }, + { + "epoch": 1.48, + "learning_rate": 4.316353887399464e-05, + "loss": 0.2334, + "step": 560 + }, + { + "epoch": 1.48, + "learning_rate": 4.3150134048257376e-05, + "loss": 0.4192, + "step": 561 + }, + { + "epoch": 1.49, + "learning_rate": 4.313672922252011e-05, + "loss": 0.2942, + "step": 562 + }, + { + "epoch": 1.49, + "learning_rate": 4.3123324396782847e-05, + "loss": 0.2503, + "step": 563 + }, + { + "epoch": 1.49, + "learning_rate": 4.3109919571045575e-05, + "loss": 0.15, + "step": 564 + }, + { + "epoch": 1.49, + "learning_rate": 4.309651474530832e-05, + "loss": 0.6895, + "step": 565 + }, + { + "epoch": 1.5, + "learning_rate": 4.3083109919571045e-05, + "loss": 0.2462, + "step": 566 + }, + { + "epoch": 1.5, + "learning_rate": 4.306970509383379e-05, + "loss": 0.142, + "step": 567 + }, + { + "epoch": 1.5, + "learning_rate": 4.3056300268096515e-05, + "loss": 0.4838, + "step": 568 + }, + { + "epoch": 1.51, + "learning_rate": 4.304289544235925e-05, + "loss": 0.4714, + "step": 569 + }, + { + "epoch": 1.51, + "learning_rate": 4.3029490616621986e-05, + "loss": 0.4916, + "step": 570 + }, + { + "epoch": 1.51, + "learning_rate": 4.301608579088472e-05, + "loss": 0.2001, + "step": 571 + }, + { + "epoch": 1.51, + "learning_rate": 4.3002680965147456e-05, + "loss": 0.3897, + "step": 572 + }, + { + "epoch": 1.52, + "learning_rate": 4.298927613941019e-05, + "loss": 0.1723, + "step": 573 + }, + { + "epoch": 1.52, + "learning_rate": 4.297587131367292e-05, + "loss": 0.4095, + "step": 574 + }, + { + "epoch": 1.52, + "learning_rate": 4.296246648793566e-05, + "loss": 0.2615, + "step": 575 + }, + { + "epoch": 1.52, + "learning_rate": 4.294906166219839e-05, + "loss": 0.221, + "step": 576 + }, + { + "epoch": 1.53, + "learning_rate": 4.293565683646113e-05, + "loss": 0.1373, + "step": 577 + }, + { + "epoch": 1.53, + "learning_rate": 4.292225201072386e-05, + "loss": 0.313, + "step": 578 + }, + { + "epoch": 1.53, + "learning_rate": 4.2908847184986595e-05, + "loss": 0.4608, + "step": 579 + }, + { + "epoch": 1.53, + "learning_rate": 4.289544235924933e-05, + "loss": 0.2894, + "step": 580 + }, + { + "epoch": 1.54, + "learning_rate": 4.2882037533512065e-05, + "loss": 0.5123, + "step": 581 + }, + { + "epoch": 1.54, + "learning_rate": 4.28686327077748e-05, + "loss": 0.3287, + "step": 582 + }, + { + "epoch": 1.54, + "learning_rate": 4.2855227882037536e-05, + "loss": 0.4634, + "step": 583 + }, + { + "epoch": 1.54, + "learning_rate": 4.284182305630027e-05, + "loss": 0.0753, + "step": 584 + }, + { + "epoch": 1.55, + "learning_rate": 4.2828418230563006e-05, + "loss": 0.5287, + "step": 585 + }, + { + "epoch": 1.55, + "learning_rate": 4.281501340482574e-05, + "loss": 0.5483, + "step": 586 + }, + { + "epoch": 1.55, + "learning_rate": 4.2801608579088476e-05, + "loss": 0.1076, + "step": 587 + }, + { + "epoch": 1.56, + "learning_rate": 4.278820375335121e-05, + "loss": 0.232, + "step": 588 + }, + { + "epoch": 1.56, + "learning_rate": 4.277479892761394e-05, + "loss": 0.4706, + "step": 589 + }, + { + "epoch": 1.56, + "learning_rate": 4.276139410187668e-05, + "loss": 0.5791, + "step": 590 + }, + { + "epoch": 1.56, + "learning_rate": 4.274798927613941e-05, + "loss": 0.4995, + "step": 591 + }, + { + "epoch": 1.57, + "learning_rate": 4.273458445040215e-05, + "loss": 0.3419, + "step": 592 + }, + { + "epoch": 1.57, + "learning_rate": 4.272117962466488e-05, + "loss": 0.2728, + "step": 593 + }, + { + "epoch": 1.57, + "learning_rate": 4.2707774798927615e-05, + "loss": 0.3598, + "step": 594 + }, + { + "epoch": 1.57, + "learning_rate": 4.269436997319035e-05, + "loss": 0.2669, + "step": 595 + }, + { + "epoch": 1.58, + "learning_rate": 4.2680965147453086e-05, + "loss": 0.4091, + "step": 596 + }, + { + "epoch": 1.58, + "learning_rate": 4.266756032171582e-05, + "loss": 0.7653, + "step": 597 + }, + { + "epoch": 1.58, + "learning_rate": 4.2654155495978556e-05, + "loss": 0.4323, + "step": 598 + }, + { + "epoch": 1.58, + "learning_rate": 4.2640750670241284e-05, + "loss": 0.2147, + "step": 599 + }, + { + "epoch": 1.59, + "learning_rate": 4.2627345844504026e-05, + "loss": 0.2229, + "step": 600 + }, + { + "epoch": 1.59, + "learning_rate": 4.2613941018766754e-05, + "loss": 0.1477, + "step": 601 + }, + { + "epoch": 1.59, + "learning_rate": 4.2600536193029496e-05, + "loss": 0.4813, + "step": 602 + }, + { + "epoch": 1.6, + "learning_rate": 4.2587131367292225e-05, + "loss": 0.2719, + "step": 603 + }, + { + "epoch": 1.6, + "learning_rate": 4.257372654155496e-05, + "loss": 0.1577, + "step": 604 + }, + { + "epoch": 1.6, + "learning_rate": 4.2560321715817695e-05, + "loss": 0.5718, + "step": 605 + }, + { + "epoch": 1.6, + "learning_rate": 4.254691689008043e-05, + "loss": 0.1481, + "step": 606 + }, + { + "epoch": 1.61, + "learning_rate": 4.2533512064343165e-05, + "loss": 0.3548, + "step": 607 + }, + { + "epoch": 1.61, + "learning_rate": 4.25201072386059e-05, + "loss": 0.1907, + "step": 608 + }, + { + "epoch": 1.61, + "learning_rate": 4.250670241286863e-05, + "loss": 0.4215, + "step": 609 + }, + { + "epoch": 1.61, + "learning_rate": 4.249329758713137e-05, + "loss": 0.0681, + "step": 610 + }, + { + "epoch": 1.62, + "learning_rate": 4.24798927613941e-05, + "loss": 0.514, + "step": 611 + }, + { + "epoch": 1.62, + "learning_rate": 4.246648793565684e-05, + "loss": 0.1242, + "step": 612 + }, + { + "epoch": 1.62, + "learning_rate": 4.245308310991957e-05, + "loss": 0.217, + "step": 613 + }, + { + "epoch": 1.62, + "learning_rate": 4.243967828418231e-05, + "loss": 0.1641, + "step": 614 + }, + { + "epoch": 1.63, + "learning_rate": 4.242627345844504e-05, + "loss": 0.7095, + "step": 615 + }, + { + "epoch": 1.63, + "learning_rate": 4.2412868632707775e-05, + "loss": 0.359, + "step": 616 + }, + { + "epoch": 1.63, + "learning_rate": 4.239946380697051e-05, + "loss": 0.94, + "step": 617 + }, + { + "epoch": 1.63, + "learning_rate": 4.2386058981233245e-05, + "loss": 0.5194, + "step": 618 + }, + { + "epoch": 1.64, + "learning_rate": 4.237265415549598e-05, + "loss": 0.3398, + "step": 619 + }, + { + "epoch": 1.64, + "learning_rate": 4.2359249329758715e-05, + "loss": 0.2305, + "step": 620 + }, + { + "epoch": 1.64, + "learning_rate": 4.234584450402145e-05, + "loss": 0.6045, + "step": 621 + }, + { + "epoch": 1.65, + "learning_rate": 4.2332439678284185e-05, + "loss": 0.3196, + "step": 622 + }, + { + "epoch": 1.65, + "learning_rate": 4.231903485254692e-05, + "loss": 0.0753, + "step": 623 + }, + { + "epoch": 1.65, + "learning_rate": 4.2305630026809656e-05, + "loss": 0.2732, + "step": 624 + }, + { + "epoch": 1.65, + "learning_rate": 4.229222520107239e-05, + "loss": 0.4372, + "step": 625 + }, + { + "epoch": 1.66, + "learning_rate": 4.227882037533512e-05, + "loss": 0.1299, + "step": 626 + }, + { + "epoch": 1.66, + "learning_rate": 4.226541554959786e-05, + "loss": 0.2928, + "step": 627 + }, + { + "epoch": 1.66, + "learning_rate": 4.225201072386059e-05, + "loss": 0.2028, + "step": 628 + }, + { + "epoch": 1.66, + "learning_rate": 4.223860589812333e-05, + "loss": 0.2725, + "step": 629 + }, + { + "epoch": 1.67, + "learning_rate": 4.222520107238606e-05, + "loss": 0.0851, + "step": 630 + }, + { + "epoch": 1.67, + "learning_rate": 4.2211796246648795e-05, + "loss": 0.1471, + "step": 631 + }, + { + "epoch": 1.67, + "learning_rate": 4.219839142091153e-05, + "loss": 0.1986, + "step": 632 + }, + { + "epoch": 1.67, + "learning_rate": 4.2184986595174265e-05, + "loss": 0.5156, + "step": 633 + }, + { + "epoch": 1.68, + "learning_rate": 4.2171581769437e-05, + "loss": 0.5286, + "step": 634 + }, + { + "epoch": 1.68, + "learning_rate": 4.2158176943699735e-05, + "loss": 0.3635, + "step": 635 + }, + { + "epoch": 1.68, + "learning_rate": 4.2144772117962464e-05, + "loss": 0.1407, + "step": 636 + }, + { + "epoch": 1.69, + "learning_rate": 4.2131367292225206e-05, + "loss": 0.1042, + "step": 637 + }, + { + "epoch": 1.69, + "learning_rate": 4.2117962466487934e-05, + "loss": 0.1553, + "step": 638 + }, + { + "epoch": 1.69, + "learning_rate": 4.2104557640750676e-05, + "loss": 0.1665, + "step": 639 + }, + { + "epoch": 1.69, + "learning_rate": 4.2091152815013404e-05, + "loss": 0.3706, + "step": 640 + }, + { + "epoch": 1.7, + "learning_rate": 4.207774798927614e-05, + "loss": 0.6195, + "step": 641 + }, + { + "epoch": 1.7, + "learning_rate": 4.2064343163538874e-05, + "loss": 0.1341, + "step": 642 + }, + { + "epoch": 1.7, + "learning_rate": 4.205093833780161e-05, + "loss": 0.5384, + "step": 643 + }, + { + "epoch": 1.7, + "learning_rate": 4.2037533512064345e-05, + "loss": 0.2802, + "step": 644 + }, + { + "epoch": 1.71, + "learning_rate": 4.202412868632708e-05, + "loss": 0.3812, + "step": 645 + }, + { + "epoch": 1.71, + "learning_rate": 4.2010723860589815e-05, + "loss": 0.2433, + "step": 646 + }, + { + "epoch": 1.71, + "learning_rate": 4.199731903485255e-05, + "loss": 0.0642, + "step": 647 + }, + { + "epoch": 1.71, + "learning_rate": 4.1983914209115285e-05, + "loss": 0.0547, + "step": 648 + }, + { + "epoch": 1.72, + "learning_rate": 4.197050938337802e-05, + "loss": 0.4388, + "step": 649 + }, + { + "epoch": 1.72, + "learning_rate": 4.1957104557640756e-05, + "loss": 0.8228, + "step": 650 + }, + { + "epoch": 1.72, + "learning_rate": 4.1943699731903484e-05, + "loss": 0.6453, + "step": 651 + }, + { + "epoch": 1.72, + "learning_rate": 4.1930294906166226e-05, + "loss": 0.3367, + "step": 652 + }, + { + "epoch": 1.73, + "learning_rate": 4.1916890080428954e-05, + "loss": 0.2139, + "step": 653 + }, + { + "epoch": 1.73, + "learning_rate": 4.1903485254691696e-05, + "loss": 0.2144, + "step": 654 + }, + { + "epoch": 1.73, + "learning_rate": 4.1890080428954424e-05, + "loss": 0.3894, + "step": 655 + }, + { + "epoch": 1.74, + "learning_rate": 4.187667560321716e-05, + "loss": 0.0891, + "step": 656 + }, + { + "epoch": 1.74, + "learning_rate": 4.1863270777479895e-05, + "loss": 0.1287, + "step": 657 + }, + { + "epoch": 1.74, + "learning_rate": 4.184986595174263e-05, + "loss": 0.2826, + "step": 658 + }, + { + "epoch": 1.74, + "learning_rate": 4.1836461126005365e-05, + "loss": 0.474, + "step": 659 + }, + { + "epoch": 1.75, + "learning_rate": 4.18230563002681e-05, + "loss": 0.4228, + "step": 660 + }, + { + "epoch": 1.75, + "learning_rate": 4.180965147453083e-05, + "loss": 0.4952, + "step": 661 + }, + { + "epoch": 1.75, + "learning_rate": 4.179624664879357e-05, + "loss": 0.173, + "step": 662 + }, + { + "epoch": 1.75, + "learning_rate": 4.17828418230563e-05, + "loss": 0.363, + "step": 663 + }, + { + "epoch": 1.76, + "learning_rate": 4.176943699731904e-05, + "loss": 0.4404, + "step": 664 + }, + { + "epoch": 1.76, + "learning_rate": 4.175603217158177e-05, + "loss": 0.486, + "step": 665 + }, + { + "epoch": 1.76, + "learning_rate": 4.1742627345844504e-05, + "loss": 0.4463, + "step": 666 + }, + { + "epoch": 1.76, + "learning_rate": 4.172922252010724e-05, + "loss": 0.2409, + "step": 667 + }, + { + "epoch": 1.77, + "learning_rate": 4.1715817694369974e-05, + "loss": 0.5291, + "step": 668 + }, + { + "epoch": 1.77, + "learning_rate": 4.170241286863271e-05, + "loss": 0.069, + "step": 669 + }, + { + "epoch": 1.77, + "learning_rate": 4.1689008042895445e-05, + "loss": 0.4162, + "step": 670 + }, + { + "epoch": 1.78, + "learning_rate": 4.167560321715818e-05, + "loss": 0.6171, + "step": 671 + }, + { + "epoch": 1.78, + "learning_rate": 4.1662198391420915e-05, + "loss": 0.3097, + "step": 672 + }, + { + "epoch": 1.78, + "learning_rate": 4.164879356568365e-05, + "loss": 0.5109, + "step": 673 + }, + { + "epoch": 1.78, + "learning_rate": 4.1635388739946385e-05, + "loss": 0.2169, + "step": 674 + }, + { + "epoch": 1.79, + "learning_rate": 4.162198391420912e-05, + "loss": 0.2406, + "step": 675 + }, + { + "epoch": 1.79, + "learning_rate": 4.160857908847185e-05, + "loss": 0.1853, + "step": 676 + }, + { + "epoch": 1.79, + "learning_rate": 4.159517426273459e-05, + "loss": 0.5743, + "step": 677 + }, + { + "epoch": 1.79, + "learning_rate": 4.158176943699732e-05, + "loss": 0.5432, + "step": 678 + }, + { + "epoch": 1.8, + "learning_rate": 4.156836461126006e-05, + "loss": 0.2033, + "step": 679 + }, + { + "epoch": 1.8, + "learning_rate": 4.155495978552279e-05, + "loss": 0.3848, + "step": 680 + }, + { + "epoch": 1.8, + "learning_rate": 4.1541554959785524e-05, + "loss": 0.1721, + "step": 681 + }, + { + "epoch": 1.8, + "learning_rate": 4.152815013404826e-05, + "loss": 0.3793, + "step": 682 + }, + { + "epoch": 1.81, + "learning_rate": 4.1514745308310994e-05, + "loss": 0.3848, + "step": 683 + }, + { + "epoch": 1.81, + "learning_rate": 4.150134048257373e-05, + "loss": 0.186, + "step": 684 + }, + { + "epoch": 1.81, + "learning_rate": 4.1487935656836465e-05, + "loss": 0.2692, + "step": 685 + }, + { + "epoch": 1.81, + "learning_rate": 4.147453083109919e-05, + "loss": 0.3839, + "step": 686 + }, + { + "epoch": 1.82, + "learning_rate": 4.1461126005361935e-05, + "loss": 0.3037, + "step": 687 + }, + { + "epoch": 1.82, + "learning_rate": 4.144772117962466e-05, + "loss": 0.6401, + "step": 688 + }, + { + "epoch": 1.82, + "learning_rate": 4.1434316353887405e-05, + "loss": 0.1173, + "step": 689 + }, + { + "epoch": 1.83, + "learning_rate": 4.1420911528150134e-05, + "loss": 0.3217, + "step": 690 + }, + { + "epoch": 1.83, + "learning_rate": 4.140750670241287e-05, + "loss": 0.2358, + "step": 691 + }, + { + "epoch": 1.83, + "learning_rate": 4.1394101876675604e-05, + "loss": 0.7696, + "step": 692 + }, + { + "epoch": 1.83, + "learning_rate": 4.138069705093834e-05, + "loss": 0.2288, + "step": 693 + }, + { + "epoch": 1.84, + "learning_rate": 4.1367292225201074e-05, + "loss": 0.2575, + "step": 694 + }, + { + "epoch": 1.84, + "learning_rate": 4.135388739946381e-05, + "loss": 0.1201, + "step": 695 + }, + { + "epoch": 1.84, + "learning_rate": 4.1340482573726544e-05, + "loss": 0.2034, + "step": 696 + }, + { + "epoch": 1.84, + "learning_rate": 4.132707774798928e-05, + "loss": 0.1142, + "step": 697 + }, + { + "epoch": 1.85, + "learning_rate": 4.1313672922252015e-05, + "loss": 0.5671, + "step": 698 + }, + { + "epoch": 1.85, + "learning_rate": 4.130026809651475e-05, + "loss": 0.3132, + "step": 699 + }, + { + "epoch": 1.85, + "learning_rate": 4.1286863270777485e-05, + "loss": 0.4266, + "step": 700 + }, + { + "epoch": 1.85, + "learning_rate": 4.127345844504021e-05, + "loss": 0.1354, + "step": 701 + }, + { + "epoch": 1.86, + "learning_rate": 4.1260053619302955e-05, + "loss": 0.2867, + "step": 702 + }, + { + "epoch": 1.86, + "learning_rate": 4.1246648793565684e-05, + "loss": 0.1839, + "step": 703 + }, + { + "epoch": 1.86, + "learning_rate": 4.1233243967828425e-05, + "loss": 0.4741, + "step": 704 + }, + { + "epoch": 1.87, + "learning_rate": 4.1219839142091154e-05, + "loss": 0.2909, + "step": 705 + }, + { + "epoch": 1.87, + "learning_rate": 4.120643431635389e-05, + "loss": 0.2705, + "step": 706 + }, + { + "epoch": 1.87, + "learning_rate": 4.1193029490616624e-05, + "loss": 0.1354, + "step": 707 + }, + { + "epoch": 1.87, + "learning_rate": 4.117962466487936e-05, + "loss": 0.4801, + "step": 708 + }, + { + "epoch": 1.88, + "learning_rate": 4.1166219839142094e-05, + "loss": 0.189, + "step": 709 + }, + { + "epoch": 1.88, + "learning_rate": 4.115281501340483e-05, + "loss": 0.3204, + "step": 710 + }, + { + "epoch": 1.88, + "learning_rate": 4.113941018766756e-05, + "loss": 0.4358, + "step": 711 + }, + { + "epoch": 1.88, + "learning_rate": 4.11260053619303e-05, + "loss": 0.9474, + "step": 712 + }, + { + "epoch": 1.89, + "learning_rate": 4.111260053619303e-05, + "loss": 0.2102, + "step": 713 + }, + { + "epoch": 1.89, + "learning_rate": 4.109919571045577e-05, + "loss": 0.3927, + "step": 714 + }, + { + "epoch": 1.89, + "learning_rate": 4.10857908847185e-05, + "loss": 0.139, + "step": 715 + }, + { + "epoch": 1.89, + "learning_rate": 4.1072386058981233e-05, + "loss": 0.3575, + "step": 716 + }, + { + "epoch": 1.9, + "learning_rate": 4.105898123324397e-05, + "loss": 0.7534, + "step": 717 + }, + { + "epoch": 1.9, + "learning_rate": 4.1045576407506704e-05, + "loss": 0.1134, + "step": 718 + }, + { + "epoch": 1.9, + "learning_rate": 4.103217158176944e-05, + "loss": 0.2136, + "step": 719 + }, + { + "epoch": 1.9, + "learning_rate": 4.1018766756032174e-05, + "loss": 0.4344, + "step": 720 + }, + { + "epoch": 1.91, + "learning_rate": 4.10053619302949e-05, + "loss": 0.0695, + "step": 721 + }, + { + "epoch": 1.91, + "learning_rate": 4.0991957104557644e-05, + "loss": 0.2286, + "step": 722 + }, + { + "epoch": 1.91, + "learning_rate": 4.097855227882037e-05, + "loss": 0.1189, + "step": 723 + }, + { + "epoch": 1.92, + "learning_rate": 4.0965147453083115e-05, + "loss": 0.2882, + "step": 724 + }, + { + "epoch": 1.92, + "learning_rate": 4.095174262734584e-05, + "loss": 0.2623, + "step": 725 + }, + { + "epoch": 1.92, + "learning_rate": 4.093833780160858e-05, + "loss": 0.2473, + "step": 726 + }, + { + "epoch": 1.92, + "learning_rate": 4.092493297587131e-05, + "loss": 0.4846, + "step": 727 + }, + { + "epoch": 1.93, + "learning_rate": 4.091152815013405e-05, + "loss": 0.1689, + "step": 728 + }, + { + "epoch": 1.93, + "learning_rate": 4.0898123324396783e-05, + "loss": 0.3481, + "step": 729 + }, + { + "epoch": 1.93, + "learning_rate": 4.088471849865952e-05, + "loss": 0.3447, + "step": 730 + }, + { + "epoch": 1.93, + "learning_rate": 4.0871313672922254e-05, + "loss": 0.2959, + "step": 731 + }, + { + "epoch": 1.94, + "learning_rate": 4.085790884718499e-05, + "loss": 0.3387, + "step": 732 + }, + { + "epoch": 1.94, + "learning_rate": 4.0844504021447724e-05, + "loss": 0.3742, + "step": 733 + }, + { + "epoch": 1.94, + "learning_rate": 4.083109919571046e-05, + "loss": 0.3245, + "step": 734 + }, + { + "epoch": 1.94, + "learning_rate": 4.0817694369973194e-05, + "loss": 0.4891, + "step": 735 + }, + { + "epoch": 1.95, + "learning_rate": 4.080428954423593e-05, + "loss": 0.1444, + "step": 736 + }, + { + "epoch": 1.95, + "learning_rate": 4.0790884718498664e-05, + "loss": 0.3678, + "step": 737 + }, + { + "epoch": 1.95, + "learning_rate": 4.077747989276139e-05, + "loss": 0.3772, + "step": 738 + }, + { + "epoch": 1.96, + "learning_rate": 4.0764075067024135e-05, + "loss": 0.43, + "step": 739 + }, + { + "epoch": 1.96, + "learning_rate": 4.075067024128686e-05, + "loss": 0.2463, + "step": 740 + }, + { + "epoch": 1.96, + "learning_rate": 4.0737265415549605e-05, + "loss": 0.2277, + "step": 741 + }, + { + "epoch": 1.96, + "learning_rate": 4.072386058981233e-05, + "loss": 0.2153, + "step": 742 + }, + { + "epoch": 1.97, + "learning_rate": 4.071045576407507e-05, + "loss": 0.1052, + "step": 743 + }, + { + "epoch": 1.97, + "learning_rate": 4.0697050938337804e-05, + "loss": 0.5657, + "step": 744 + }, + { + "epoch": 1.97, + "learning_rate": 4.068364611260054e-05, + "loss": 0.2664, + "step": 745 + }, + { + "epoch": 1.97, + "learning_rate": 4.0670241286863274e-05, + "loss": 0.1369, + "step": 746 + }, + { + "epoch": 1.98, + "learning_rate": 4.065683646112601e-05, + "loss": 0.4972, + "step": 747 + }, + { + "epoch": 1.98, + "learning_rate": 4.064343163538874e-05, + "loss": 0.4131, + "step": 748 + }, + { + "epoch": 1.98, + "learning_rate": 4.063002680965148e-05, + "loss": 0.3824, + "step": 749 + }, + { + "epoch": 1.98, + "learning_rate": 4.061662198391421e-05, + "loss": 0.323, + "step": 750 + }, + { + "epoch": 1.99, + "learning_rate": 4.060321715817695e-05, + "loss": 0.3698, + "step": 751 + }, + { + "epoch": 1.99, + "learning_rate": 4.058981233243968e-05, + "loss": 0.1708, + "step": 752 + }, + { + "epoch": 1.99, + "learning_rate": 4.057640750670241e-05, + "loss": 0.2941, + "step": 753 + }, + { + "epoch": 1.99, + "learning_rate": 4.056300268096515e-05, + "loss": 0.3224, + "step": 754 + }, + { + "epoch": 2.0, + "learning_rate": 4.054959785522788e-05, + "loss": 0.0851, + "step": 755 + }, + { + "epoch": 2.0, + "learning_rate": 4.053619302949062e-05, + "loss": 0.4694, + "step": 756 + }, + { + "epoch": 2.0, + "eval_f1": 0.7882736156351792, + "eval_loss": 0.4484867751598358, + "eval_runtime": 1.8734, + "eval_samples_per_second": 807.638, + "eval_steps_per_second": 50.711, + "step": 756 + }, + { + "epoch": 2.0, + "learning_rate": 4.0522788203753354e-05, + "loss": 0.4739, + "step": 757 + }, + { + "epoch": 2.01, + "learning_rate": 4.050938337801609e-05, + "loss": 0.4117, + "step": 758 + }, + { + "epoch": 2.01, + "learning_rate": 4.0495978552278824e-05, + "loss": 0.2169, + "step": 759 + }, + { + "epoch": 2.01, + "learning_rate": 4.048257372654156e-05, + "loss": 0.1848, + "step": 760 + }, + { + "epoch": 2.01, + "learning_rate": 4.0469168900804294e-05, + "loss": 0.5066, + "step": 761 + }, + { + "epoch": 2.02, + "learning_rate": 4.045576407506703e-05, + "loss": 0.1784, + "step": 762 + }, + { + "epoch": 2.02, + "learning_rate": 4.044235924932976e-05, + "loss": 0.3869, + "step": 763 + }, + { + "epoch": 2.02, + "learning_rate": 4.04289544235925e-05, + "loss": 0.1132, + "step": 764 + }, + { + "epoch": 2.02, + "learning_rate": 4.041554959785523e-05, + "loss": 0.2724, + "step": 765 + }, + { + "epoch": 2.03, + "learning_rate": 4.040214477211797e-05, + "loss": 0.0983, + "step": 766 + }, + { + "epoch": 2.03, + "learning_rate": 4.03887399463807e-05, + "loss": 0.1831, + "step": 767 + }, + { + "epoch": 2.03, + "learning_rate": 4.037533512064343e-05, + "loss": 0.1954, + "step": 768 + }, + { + "epoch": 2.03, + "learning_rate": 4.036193029490617e-05, + "loss": 0.4738, + "step": 769 + }, + { + "epoch": 2.04, + "learning_rate": 4.0348525469168903e-05, + "loss": 0.3375, + "step": 770 + }, + { + "epoch": 2.04, + "learning_rate": 4.033512064343164e-05, + "loss": 0.3991, + "step": 771 + }, + { + "epoch": 2.04, + "learning_rate": 4.0321715817694374e-05, + "loss": 0.0696, + "step": 772 + }, + { + "epoch": 2.04, + "learning_rate": 4.03083109919571e-05, + "loss": 0.0982, + "step": 773 + }, + { + "epoch": 2.05, + "learning_rate": 4.0294906166219844e-05, + "loss": 0.2236, + "step": 774 + }, + { + "epoch": 2.05, + "learning_rate": 4.028150134048257e-05, + "loss": 0.4225, + "step": 775 + }, + { + "epoch": 2.05, + "learning_rate": 4.0268096514745314e-05, + "loss": 0.0583, + "step": 776 + }, + { + "epoch": 2.06, + "learning_rate": 4.025469168900804e-05, + "loss": 0.1643, + "step": 777 + }, + { + "epoch": 2.06, + "learning_rate": 4.024128686327078e-05, + "loss": 0.0351, + "step": 778 + }, + { + "epoch": 2.06, + "learning_rate": 4.022788203753351e-05, + "loss": 0.4496, + "step": 779 + }, + { + "epoch": 2.06, + "learning_rate": 4.021447721179625e-05, + "loss": 0.0372, + "step": 780 + }, + { + "epoch": 2.07, + "learning_rate": 4.020107238605898e-05, + "loss": 0.4198, + "step": 781 + }, + { + "epoch": 2.07, + "learning_rate": 4.018766756032172e-05, + "loss": 0.1968, + "step": 782 + }, + { + "epoch": 2.07, + "learning_rate": 4.017426273458445e-05, + "loss": 0.515, + "step": 783 + }, + { + "epoch": 2.07, + "learning_rate": 4.016085790884719e-05, + "loss": 0.408, + "step": 784 + }, + { + "epoch": 2.08, + "learning_rate": 4.0147453083109924e-05, + "loss": 0.3693, + "step": 785 + }, + { + "epoch": 2.08, + "learning_rate": 4.013404825737266e-05, + "loss": 0.0561, + "step": 786 + }, + { + "epoch": 2.08, + "learning_rate": 4.0120643431635394e-05, + "loss": 0.4011, + "step": 787 + }, + { + "epoch": 2.08, + "learning_rate": 4.010723860589812e-05, + "loss": 0.0872, + "step": 788 + }, + { + "epoch": 2.09, + "learning_rate": 4.0093833780160864e-05, + "loss": 0.0768, + "step": 789 + }, + { + "epoch": 2.09, + "learning_rate": 4.008042895442359e-05, + "loss": 0.0184, + "step": 790 + }, + { + "epoch": 2.09, + "learning_rate": 4.0067024128686334e-05, + "loss": 0.3287, + "step": 791 + }, + { + "epoch": 2.1, + "learning_rate": 4.005361930294906e-05, + "loss": 0.0262, + "step": 792 + }, + { + "epoch": 2.1, + "learning_rate": 4.00402144772118e-05, + "loss": 0.0248, + "step": 793 + }, + { + "epoch": 2.1, + "learning_rate": 4.002680965147453e-05, + "loss": 0.3853, + "step": 794 + }, + { + "epoch": 2.1, + "learning_rate": 4.001340482573727e-05, + "loss": 0.3512, + "step": 795 + }, + { + "epoch": 2.11, + "learning_rate": 4e-05, + "loss": 0.4188, + "step": 796 + }, + { + "epoch": 2.11, + "learning_rate": 3.998659517426274e-05, + "loss": 0.1834, + "step": 797 + }, + { + "epoch": 2.11, + "learning_rate": 3.997319034852547e-05, + "loss": 0.2074, + "step": 798 + }, + { + "epoch": 2.11, + "learning_rate": 3.995978552278821e-05, + "loss": 0.7317, + "step": 799 + }, + { + "epoch": 2.12, + "learning_rate": 3.994638069705094e-05, + "loss": 0.3534, + "step": 800 + }, + { + "epoch": 2.12, + "learning_rate": 3.993297587131368e-05, + "loss": 0.3184, + "step": 801 + }, + { + "epoch": 2.12, + "learning_rate": 3.991957104557641e-05, + "loss": 0.1088, + "step": 802 + }, + { + "epoch": 2.12, + "learning_rate": 3.990616621983914e-05, + "loss": 0.0429, + "step": 803 + }, + { + "epoch": 2.13, + "learning_rate": 3.989276139410188e-05, + "loss": 0.4518, + "step": 804 + }, + { + "epoch": 2.13, + "learning_rate": 3.987935656836461e-05, + "loss": 0.1746, + "step": 805 + }, + { + "epoch": 2.13, + "learning_rate": 3.986595174262735e-05, + "loss": 0.1881, + "step": 806 + }, + { + "epoch": 2.13, + "learning_rate": 3.985254691689008e-05, + "loss": 0.4111, + "step": 807 + }, + { + "epoch": 2.14, + "learning_rate": 3.983914209115281e-05, + "loss": 0.059, + "step": 808 + }, + { + "epoch": 2.14, + "learning_rate": 3.982573726541555e-05, + "loss": 0.0495, + "step": 809 + }, + { + "epoch": 2.14, + "learning_rate": 3.981233243967828e-05, + "loss": 0.1134, + "step": 810 + }, + { + "epoch": 2.15, + "learning_rate": 3.9798927613941023e-05, + "loss": 0.5469, + "step": 811 + }, + { + "epoch": 2.15, + "learning_rate": 3.978552278820375e-05, + "loss": 0.0581, + "step": 812 + }, + { + "epoch": 2.15, + "learning_rate": 3.977211796246649e-05, + "loss": 0.1254, + "step": 813 + }, + { + "epoch": 2.15, + "learning_rate": 3.975871313672922e-05, + "loss": 0.0679, + "step": 814 + }, + { + "epoch": 2.16, + "learning_rate": 3.974530831099196e-05, + "loss": 0.0463, + "step": 815 + }, + { + "epoch": 2.16, + "learning_rate": 3.973190348525469e-05, + "loss": 0.33, + "step": 816 + }, + { + "epoch": 2.16, + "learning_rate": 3.971849865951743e-05, + "loss": 0.2931, + "step": 817 + }, + { + "epoch": 2.16, + "learning_rate": 3.970509383378016e-05, + "loss": 0.1034, + "step": 818 + }, + { + "epoch": 2.17, + "learning_rate": 3.96916890080429e-05, + "loss": 0.0379, + "step": 819 + }, + { + "epoch": 2.17, + "learning_rate": 3.967828418230563e-05, + "loss": 0.0456, + "step": 820 + }, + { + "epoch": 2.17, + "learning_rate": 3.966487935656837e-05, + "loss": 0.4862, + "step": 821 + }, + { + "epoch": 2.17, + "learning_rate": 3.96514745308311e-05, + "loss": 0.0512, + "step": 822 + }, + { + "epoch": 2.18, + "learning_rate": 3.963806970509383e-05, + "loss": 0.0879, + "step": 823 + }, + { + "epoch": 2.18, + "learning_rate": 3.962466487935657e-05, + "loss": 0.3664, + "step": 824 + }, + { + "epoch": 2.18, + "learning_rate": 3.96112600536193e-05, + "loss": 0.0975, + "step": 825 + }, + { + "epoch": 2.19, + "learning_rate": 3.9597855227882044e-05, + "loss": 0.162, + "step": 826 + }, + { + "epoch": 2.19, + "learning_rate": 3.958445040214477e-05, + "loss": 0.1076, + "step": 827 + }, + { + "epoch": 2.19, + "learning_rate": 3.957104557640751e-05, + "loss": 0.1185, + "step": 828 + }, + { + "epoch": 2.19, + "learning_rate": 3.955764075067024e-05, + "loss": 0.0868, + "step": 829 + }, + { + "epoch": 2.2, + "learning_rate": 3.954423592493298e-05, + "loss": 0.0246, + "step": 830 + }, + { + "epoch": 2.2, + "learning_rate": 3.953083109919571e-05, + "loss": 0.2233, + "step": 831 + }, + { + "epoch": 2.2, + "learning_rate": 3.951742627345845e-05, + "loss": 0.2113, + "step": 832 + }, + { + "epoch": 2.2, + "learning_rate": 3.9504021447721176e-05, + "loss": 0.0587, + "step": 833 + }, + { + "epoch": 2.21, + "learning_rate": 3.949061662198392e-05, + "loss": 0.0521, + "step": 834 + }, + { + "epoch": 2.21, + "learning_rate": 3.9477211796246646e-05, + "loss": 0.3845, + "step": 835 + }, + { + "epoch": 2.21, + "learning_rate": 3.946380697050939e-05, + "loss": 0.1096, + "step": 836 + }, + { + "epoch": 2.21, + "learning_rate": 3.9450402144772117e-05, + "loss": 0.1488, + "step": 837 + }, + { + "epoch": 2.22, + "learning_rate": 3.943699731903485e-05, + "loss": 0.1937, + "step": 838 + }, + { + "epoch": 2.22, + "learning_rate": 3.942359249329759e-05, + "loss": 0.1309, + "step": 839 + }, + { + "epoch": 2.22, + "learning_rate": 3.941018766756032e-05, + "loss": 0.3271, + "step": 840 + }, + { + "epoch": 2.22, + "learning_rate": 3.939678284182306e-05, + "loss": 0.3318, + "step": 841 + }, + { + "epoch": 2.23, + "learning_rate": 3.938337801608579e-05, + "loss": 0.3516, + "step": 842 + }, + { + "epoch": 2.23, + "learning_rate": 3.936997319034853e-05, + "loss": 0.1641, + "step": 843 + }, + { + "epoch": 2.23, + "learning_rate": 3.935656836461126e-05, + "loss": 0.064, + "step": 844 + }, + { + "epoch": 2.24, + "learning_rate": 3.9343163538874e-05, + "loss": 0.1971, + "step": 845 + }, + { + "epoch": 2.24, + "learning_rate": 3.932975871313673e-05, + "loss": 0.1166, + "step": 846 + }, + { + "epoch": 2.24, + "learning_rate": 3.931635388739947e-05, + "loss": 0.0384, + "step": 847 + }, + { + "epoch": 2.24, + "learning_rate": 3.9302949061662196e-05, + "loss": 0.0462, + "step": 848 + }, + { + "epoch": 2.25, + "learning_rate": 3.928954423592494e-05, + "loss": 0.1073, + "step": 849 + }, + { + "epoch": 2.25, + "learning_rate": 3.9276139410187666e-05, + "loss": 0.0227, + "step": 850 + }, + { + "epoch": 2.25, + "learning_rate": 3.926273458445041e-05, + "loss": 0.0683, + "step": 851 + }, + { + "epoch": 2.25, + "learning_rate": 3.924932975871314e-05, + "loss": 0.7962, + "step": 852 + }, + { + "epoch": 2.26, + "learning_rate": 3.923592493297587e-05, + "loss": 0.0635, + "step": 853 + }, + { + "epoch": 2.26, + "learning_rate": 3.922252010723861e-05, + "loss": 0.0454, + "step": 854 + }, + { + "epoch": 2.26, + "learning_rate": 3.920911528150134e-05, + "loss": 0.5389, + "step": 855 + }, + { + "epoch": 2.26, + "learning_rate": 3.919571045576408e-05, + "loss": 0.1181, + "step": 856 + }, + { + "epoch": 2.27, + "learning_rate": 3.918230563002681e-05, + "loss": 0.1372, + "step": 857 + }, + { + "epoch": 2.27, + "learning_rate": 3.916890080428954e-05, + "loss": 0.3192, + "step": 858 + }, + { + "epoch": 2.27, + "learning_rate": 3.915549597855228e-05, + "loss": 0.3419, + "step": 859 + }, + { + "epoch": 2.28, + "learning_rate": 3.914209115281501e-05, + "loss": 0.0114, + "step": 860 + }, + { + "epoch": 2.28, + "learning_rate": 3.912868632707775e-05, + "loss": 0.1905, + "step": 861 + }, + { + "epoch": 2.28, + "learning_rate": 3.911528150134048e-05, + "loss": 0.0218, + "step": 862 + }, + { + "epoch": 2.28, + "learning_rate": 3.910187667560322e-05, + "loss": 0.4374, + "step": 863 + }, + { + "epoch": 2.29, + "learning_rate": 3.908847184986595e-05, + "loss": 0.1844, + "step": 864 + }, + { + "epoch": 2.29, + "learning_rate": 3.907506702412869e-05, + "loss": 0.2427, + "step": 865 + }, + { + "epoch": 2.29, + "learning_rate": 3.906166219839142e-05, + "loss": 0.2749, + "step": 866 + }, + { + "epoch": 2.29, + "learning_rate": 3.904825737265416e-05, + "loss": 0.2089, + "step": 867 + }, + { + "epoch": 2.3, + "learning_rate": 3.903485254691689e-05, + "loss": 0.06, + "step": 868 + }, + { + "epoch": 2.3, + "learning_rate": 3.902144772117963e-05, + "loss": 0.1951, + "step": 869 + }, + { + "epoch": 2.3, + "learning_rate": 3.900804289544236e-05, + "loss": 0.0252, + "step": 870 + }, + { + "epoch": 2.3, + "learning_rate": 3.89946380697051e-05, + "loss": 0.0299, + "step": 871 + }, + { + "epoch": 2.31, + "learning_rate": 3.898123324396783e-05, + "loss": 0.0298, + "step": 872 + }, + { + "epoch": 2.31, + "learning_rate": 3.896782841823057e-05, + "loss": 0.5186, + "step": 873 + }, + { + "epoch": 2.31, + "learning_rate": 3.89544235924933e-05, + "loss": 0.2704, + "step": 874 + }, + { + "epoch": 2.31, + "learning_rate": 3.894101876675603e-05, + "loss": 0.2435, + "step": 875 + }, + { + "epoch": 2.32, + "learning_rate": 3.892761394101877e-05, + "loss": 0.039, + "step": 876 + }, + { + "epoch": 2.32, + "learning_rate": 3.89142091152815e-05, + "loss": 0.0275, + "step": 877 + }, + { + "epoch": 2.32, + "learning_rate": 3.890080428954424e-05, + "loss": 0.1164, + "step": 878 + }, + { + "epoch": 2.33, + "learning_rate": 3.888739946380697e-05, + "loss": 0.1551, + "step": 879 + }, + { + "epoch": 2.33, + "learning_rate": 3.887399463806971e-05, + "loss": 0.0215, + "step": 880 + }, + { + "epoch": 2.33, + "learning_rate": 3.886058981233244e-05, + "loss": 0.0379, + "step": 881 + }, + { + "epoch": 2.33, + "learning_rate": 3.884718498659518e-05, + "loss": 0.0553, + "step": 882 + }, + { + "epoch": 2.34, + "learning_rate": 3.883378016085791e-05, + "loss": 0.1073, + "step": 883 + }, + { + "epoch": 2.34, + "learning_rate": 3.882037533512065e-05, + "loss": 0.3525, + "step": 884 + }, + { + "epoch": 2.34, + "learning_rate": 3.8806970509383376e-05, + "loss": 0.2646, + "step": 885 + }, + { + "epoch": 2.34, + "learning_rate": 3.879356568364612e-05, + "loss": 0.5758, + "step": 886 + }, + { + "epoch": 2.35, + "learning_rate": 3.8780160857908846e-05, + "loss": 0.9312, + "step": 887 + }, + { + "epoch": 2.35, + "learning_rate": 3.876675603217159e-05, + "loss": 0.2748, + "step": 888 + }, + { + "epoch": 2.35, + "learning_rate": 3.8753351206434316e-05, + "loss": 0.33, + "step": 889 + }, + { + "epoch": 2.35, + "learning_rate": 3.873994638069705e-05, + "loss": 0.0312, + "step": 890 + }, + { + "epoch": 2.36, + "learning_rate": 3.8726541554959786e-05, + "loss": 0.0449, + "step": 891 + }, + { + "epoch": 2.36, + "learning_rate": 3.871313672922252e-05, + "loss": 0.1197, + "step": 892 + }, + { + "epoch": 2.36, + "learning_rate": 3.869973190348526e-05, + "loss": 0.0913, + "step": 893 + }, + { + "epoch": 2.37, + "learning_rate": 3.868632707774799e-05, + "loss": 0.0284, + "step": 894 + }, + { + "epoch": 2.37, + "learning_rate": 3.867292225201073e-05, + "loss": 0.3769, + "step": 895 + }, + { + "epoch": 2.37, + "learning_rate": 3.865951742627346e-05, + "loss": 0.0947, + "step": 896 + }, + { + "epoch": 2.37, + "learning_rate": 3.86461126005362e-05, + "loss": 0.4282, + "step": 897 + }, + { + "epoch": 2.38, + "learning_rate": 3.863270777479893e-05, + "loss": 0.0049, + "step": 898 + }, + { + "epoch": 2.38, + "learning_rate": 3.861930294906167e-05, + "loss": 0.3632, + "step": 899 + }, + { + "epoch": 2.38, + "learning_rate": 3.8605898123324396e-05, + "loss": 0.0421, + "step": 900 + }, + { + "epoch": 2.38, + "learning_rate": 3.859249329758714e-05, + "loss": 0.5793, + "step": 901 + }, + { + "epoch": 2.39, + "learning_rate": 3.8579088471849866e-05, + "loss": 0.1695, + "step": 902 + }, + { + "epoch": 2.39, + "learning_rate": 3.856568364611261e-05, + "loss": 0.3082, + "step": 903 + }, + { + "epoch": 2.39, + "learning_rate": 3.8552278820375336e-05, + "loss": 0.0151, + "step": 904 + }, + { + "epoch": 2.39, + "learning_rate": 3.853887399463807e-05, + "loss": 0.3463, + "step": 905 + }, + { + "epoch": 2.4, + "learning_rate": 3.852546916890081e-05, + "loss": 0.4573, + "step": 906 + }, + { + "epoch": 2.4, + "learning_rate": 3.851206434316354e-05, + "loss": 0.1281, + "step": 907 + }, + { + "epoch": 2.4, + "learning_rate": 3.849865951742628e-05, + "loss": 0.3168, + "step": 908 + }, + { + "epoch": 2.4, + "learning_rate": 3.848525469168901e-05, + "loss": 0.0331, + "step": 909 + }, + { + "epoch": 2.41, + "learning_rate": 3.847184986595174e-05, + "loss": 0.1825, + "step": 910 + }, + { + "epoch": 2.41, + "learning_rate": 3.845844504021448e-05, + "loss": 0.6238, + "step": 911 + }, + { + "epoch": 2.41, + "learning_rate": 3.844504021447721e-05, + "loss": 0.0663, + "step": 912 + }, + { + "epoch": 2.42, + "learning_rate": 3.843163538873995e-05, + "loss": 0.2827, + "step": 913 + }, + { + "epoch": 2.42, + "learning_rate": 3.841823056300268e-05, + "loss": 0.0133, + "step": 914 + }, + { + "epoch": 2.42, + "learning_rate": 3.8404825737265416e-05, + "loss": 0.0415, + "step": 915 + }, + { + "epoch": 2.42, + "learning_rate": 3.839142091152815e-05, + "loss": 0.2969, + "step": 916 + }, + { + "epoch": 2.43, + "learning_rate": 3.8378016085790886e-05, + "loss": 0.0145, + "step": 917 + }, + { + "epoch": 2.43, + "learning_rate": 3.836461126005362e-05, + "loss": 0.0223, + "step": 918 + }, + { + "epoch": 2.43, + "learning_rate": 3.8351206434316357e-05, + "loss": 1.042, + "step": 919 + }, + { + "epoch": 2.43, + "learning_rate": 3.8337801608579085e-05, + "loss": 0.5061, + "step": 920 + }, + { + "epoch": 2.44, + "learning_rate": 3.832439678284183e-05, + "loss": 0.406, + "step": 921 + }, + { + "epoch": 2.44, + "learning_rate": 3.8310991957104555e-05, + "loss": 0.1337, + "step": 922 + }, + { + "epoch": 2.44, + "learning_rate": 3.82975871313673e-05, + "loss": 0.2057, + "step": 923 + }, + { + "epoch": 2.44, + "learning_rate": 3.8284182305630025e-05, + "loss": 0.0987, + "step": 924 + }, + { + "epoch": 2.45, + "learning_rate": 3.827077747989276e-05, + "loss": 0.4611, + "step": 925 + }, + { + "epoch": 2.45, + "learning_rate": 3.8257372654155496e-05, + "loss": 0.0152, + "step": 926 + }, + { + "epoch": 2.45, + "learning_rate": 3.824396782841823e-05, + "loss": 0.4478, + "step": 927 + }, + { + "epoch": 2.46, + "learning_rate": 3.8230563002680966e-05, + "loss": 0.0579, + "step": 928 + }, + { + "epoch": 2.46, + "learning_rate": 3.82171581769437e-05, + "loss": 0.1506, + "step": 929 + }, + { + "epoch": 2.46, + "learning_rate": 3.8203753351206436e-05, + "loss": 0.5514, + "step": 930 + }, + { + "epoch": 2.46, + "learning_rate": 3.819034852546917e-05, + "loss": 0.1505, + "step": 931 + }, + { + "epoch": 2.47, + "learning_rate": 3.8176943699731906e-05, + "loss": 0.1413, + "step": 932 + }, + { + "epoch": 2.47, + "learning_rate": 3.816353887399464e-05, + "loss": 0.4758, + "step": 933 + }, + { + "epoch": 2.47, + "learning_rate": 3.815013404825738e-05, + "loss": 0.0863, + "step": 934 + }, + { + "epoch": 2.47, + "learning_rate": 3.8136729222520105e-05, + "loss": 0.3561, + "step": 935 + }, + { + "epoch": 2.48, + "learning_rate": 3.812332439678285e-05, + "loss": 0.7784, + "step": 936 + }, + { + "epoch": 2.48, + "learning_rate": 3.8109919571045575e-05, + "loss": 0.2243, + "step": 937 + }, + { + "epoch": 2.48, + "learning_rate": 3.809651474530832e-05, + "loss": 0.1013, + "step": 938 + }, + { + "epoch": 2.48, + "learning_rate": 3.8083109919571046e-05, + "loss": 0.132, + "step": 939 + }, + { + "epoch": 2.49, + "learning_rate": 3.806970509383378e-05, + "loss": 0.3971, + "step": 940 + }, + { + "epoch": 2.49, + "learning_rate": 3.8056300268096516e-05, + "loss": 0.0637, + "step": 941 + }, + { + "epoch": 2.49, + "learning_rate": 3.804289544235925e-05, + "loss": 0.5178, + "step": 942 + }, + { + "epoch": 2.49, + "learning_rate": 3.8029490616621986e-05, + "loss": 0.1982, + "step": 943 + }, + { + "epoch": 2.5, + "learning_rate": 3.801608579088472e-05, + "loss": 0.2225, + "step": 944 + }, + { + "epoch": 2.5, + "learning_rate": 3.800268096514745e-05, + "loss": 0.1425, + "step": 945 + }, + { + "epoch": 2.5, + "learning_rate": 3.798927613941019e-05, + "loss": 0.0621, + "step": 946 + }, + { + "epoch": 2.51, + "learning_rate": 3.797587131367292e-05, + "loss": 0.1556, + "step": 947 + }, + { + "epoch": 2.51, + "learning_rate": 3.796246648793566e-05, + "loss": 0.3805, + "step": 948 + }, + { + "epoch": 2.51, + "learning_rate": 3.794906166219839e-05, + "loss": 0.7049, + "step": 949 + }, + { + "epoch": 2.51, + "learning_rate": 3.7935656836461125e-05, + "loss": 0.1055, + "step": 950 + }, + { + "epoch": 2.52, + "learning_rate": 3.792225201072386e-05, + "loss": 0.0489, + "step": 951 + }, + { + "epoch": 2.52, + "learning_rate": 3.7908847184986596e-05, + "loss": 0.1881, + "step": 952 + }, + { + "epoch": 2.52, + "learning_rate": 3.789544235924933e-05, + "loss": 0.0202, + "step": 953 + }, + { + "epoch": 2.52, + "learning_rate": 3.7882037533512066e-05, + "loss": 0.1043, + "step": 954 + }, + { + "epoch": 2.53, + "learning_rate": 3.78686327077748e-05, + "loss": 0.2093, + "step": 955 + }, + { + "epoch": 2.53, + "learning_rate": 3.7855227882037536e-05, + "loss": 0.0395, + "step": 956 + }, + { + "epoch": 2.53, + "learning_rate": 3.784182305630027e-05, + "loss": 0.1459, + "step": 957 + }, + { + "epoch": 2.53, + "learning_rate": 3.7828418230563006e-05, + "loss": 0.0338, + "step": 958 + }, + { + "epoch": 2.54, + "learning_rate": 3.781501340482574e-05, + "loss": 0.4741, + "step": 959 + }, + { + "epoch": 2.54, + "learning_rate": 3.780160857908847e-05, + "loss": 0.2049, + "step": 960 + }, + { + "epoch": 2.54, + "learning_rate": 3.778820375335121e-05, + "loss": 0.309, + "step": 961 + }, + { + "epoch": 2.54, + "learning_rate": 3.777479892761394e-05, + "loss": 0.0253, + "step": 962 + }, + { + "epoch": 2.55, + "learning_rate": 3.776139410187668e-05, + "loss": 0.4832, + "step": 963 + }, + { + "epoch": 2.55, + "learning_rate": 3.774798927613941e-05, + "loss": 0.2111, + "step": 964 + }, + { + "epoch": 2.55, + "learning_rate": 3.7734584450402145e-05, + "loss": 0.1788, + "step": 965 + }, + { + "epoch": 2.56, + "learning_rate": 3.772117962466488e-05, + "loss": 0.5252, + "step": 966 + }, + { + "epoch": 2.56, + "learning_rate": 3.7707774798927616e-05, + "loss": 0.4711, + "step": 967 + }, + { + "epoch": 2.56, + "learning_rate": 3.769436997319035e-05, + "loss": 0.5184, + "step": 968 + }, + { + "epoch": 2.56, + "learning_rate": 3.7680965147453086e-05, + "loss": 0.2164, + "step": 969 + }, + { + "epoch": 2.57, + "learning_rate": 3.7667560321715814e-05, + "loss": 0.5393, + "step": 970 + }, + { + "epoch": 2.57, + "learning_rate": 3.7654155495978556e-05, + "loss": 0.2588, + "step": 971 + }, + { + "epoch": 2.57, + "learning_rate": 3.7640750670241285e-05, + "loss": 0.164, + "step": 972 + }, + { + "epoch": 2.57, + "learning_rate": 3.7627345844504027e-05, + "loss": 0.2896, + "step": 973 + }, + { + "epoch": 2.58, + "learning_rate": 3.7613941018766755e-05, + "loss": 0.039, + "step": 974 + }, + { + "epoch": 2.58, + "learning_rate": 3.760053619302949e-05, + "loss": 0.16, + "step": 975 + }, + { + "epoch": 2.58, + "learning_rate": 3.7587131367292225e-05, + "loss": 0.1832, + "step": 976 + }, + { + "epoch": 2.58, + "learning_rate": 3.757372654155496e-05, + "loss": 0.0812, + "step": 977 + }, + { + "epoch": 2.59, + "learning_rate": 3.7560321715817695e-05, + "loss": 0.1476, + "step": 978 + }, + { + "epoch": 2.59, + "learning_rate": 3.754691689008043e-05, + "loss": 0.1853, + "step": 979 + }, + { + "epoch": 2.59, + "learning_rate": 3.7533512064343166e-05, + "loss": 0.2875, + "step": 980 + }, + { + "epoch": 2.6, + "learning_rate": 3.75201072386059e-05, + "loss": 0.1918, + "step": 981 + }, + { + "epoch": 2.6, + "learning_rate": 3.7506702412868636e-05, + "loss": 0.2445, + "step": 982 + }, + { + "epoch": 2.6, + "learning_rate": 3.749329758713137e-05, + "loss": 0.4653, + "step": 983 + }, + { + "epoch": 2.6, + "learning_rate": 3.7479892761394106e-05, + "loss": 0.0614, + "step": 984 + }, + { + "epoch": 2.61, + "learning_rate": 3.746648793565684e-05, + "loss": 0.2818, + "step": 985 + }, + { + "epoch": 2.61, + "learning_rate": 3.7453083109919576e-05, + "loss": 0.1363, + "step": 986 + }, + { + "epoch": 2.61, + "learning_rate": 3.7439678284182305e-05, + "loss": 0.3244, + "step": 987 + }, + { + "epoch": 2.61, + "learning_rate": 3.742627345844505e-05, + "loss": 0.081, + "step": 988 + }, + { + "epoch": 2.62, + "learning_rate": 3.7412868632707775e-05, + "loss": 0.0488, + "step": 989 + }, + { + "epoch": 2.62, + "learning_rate": 3.739946380697052e-05, + "loss": 0.2057, + "step": 990 + }, + { + "epoch": 2.62, + "learning_rate": 3.7386058981233245e-05, + "loss": 0.2598, + "step": 991 + }, + { + "epoch": 2.62, + "learning_rate": 3.737265415549598e-05, + "loss": 0.1318, + "step": 992 + }, + { + "epoch": 2.63, + "learning_rate": 3.7359249329758716e-05, + "loss": 0.0482, + "step": 993 + }, + { + "epoch": 2.63, + "learning_rate": 3.734584450402145e-05, + "loss": 0.2586, + "step": 994 + }, + { + "epoch": 2.63, + "learning_rate": 3.7332439678284186e-05, + "loss": 0.2533, + "step": 995 + }, + { + "epoch": 2.63, + "learning_rate": 3.731903485254692e-05, + "loss": 0.5292, + "step": 996 + }, + { + "epoch": 2.64, + "learning_rate": 3.730563002680965e-05, + "loss": 0.1677, + "step": 997 + }, + { + "epoch": 2.64, + "learning_rate": 3.729222520107239e-05, + "loss": 0.1869, + "step": 998 + }, + { + "epoch": 2.64, + "learning_rate": 3.727882037533512e-05, + "loss": 0.0645, + "step": 999 + }, + { + "epoch": 2.65, + "learning_rate": 3.726541554959786e-05, + "loss": 0.4107, + "step": 1000 + }, + { + "epoch": 2.65, + "learning_rate": 3.725201072386059e-05, + "loss": 0.0484, + "step": 1001 + }, + { + "epoch": 2.65, + "learning_rate": 3.7238605898123325e-05, + "loss": 0.0813, + "step": 1002 + }, + { + "epoch": 2.65, + "learning_rate": 3.722520107238606e-05, + "loss": 0.2467, + "step": 1003 + }, + { + "epoch": 2.66, + "learning_rate": 3.7211796246648795e-05, + "loss": 0.0324, + "step": 1004 + }, + { + "epoch": 2.66, + "learning_rate": 3.719839142091153e-05, + "loss": 0.0536, + "step": 1005 + }, + { + "epoch": 2.66, + "learning_rate": 3.7184986595174266e-05, + "loss": 0.0399, + "step": 1006 + }, + { + "epoch": 2.66, + "learning_rate": 3.7171581769436994e-05, + "loss": 0.0257, + "step": 1007 + }, + { + "epoch": 2.67, + "learning_rate": 3.7158176943699736e-05, + "loss": 0.0407, + "step": 1008 + }, + { + "epoch": 2.67, + "learning_rate": 3.7144772117962464e-05, + "loss": 0.0173, + "step": 1009 + }, + { + "epoch": 2.67, + "learning_rate": 3.7131367292225206e-05, + "loss": 0.0166, + "step": 1010 + }, + { + "epoch": 2.67, + "learning_rate": 3.7117962466487934e-05, + "loss": 0.1898, + "step": 1011 + }, + { + "epoch": 2.68, + "learning_rate": 3.710455764075067e-05, + "loss": 0.0525, + "step": 1012 + }, + { + "epoch": 2.68, + "learning_rate": 3.7091152815013405e-05, + "loss": 0.043, + "step": 1013 + }, + { + "epoch": 2.68, + "learning_rate": 3.707774798927614e-05, + "loss": 0.3994, + "step": 1014 + }, + { + "epoch": 2.69, + "learning_rate": 3.7064343163538875e-05, + "loss": 0.0372, + "step": 1015 + }, + { + "epoch": 2.69, + "learning_rate": 3.705093833780161e-05, + "loss": 0.2909, + "step": 1016 + }, + { + "epoch": 2.69, + "learning_rate": 3.7037533512064345e-05, + "loss": 0.8221, + "step": 1017 + }, + { + "epoch": 2.69, + "learning_rate": 3.702412868632708e-05, + "loss": 0.0084, + "step": 1018 + }, + { + "epoch": 2.7, + "learning_rate": 3.7010723860589815e-05, + "loss": 0.3058, + "step": 1019 + }, + { + "epoch": 2.7, + "learning_rate": 3.699731903485255e-05, + "loss": 1.0774, + "step": 1020 + }, + { + "epoch": 2.7, + "learning_rate": 3.6983914209115286e-05, + "loss": 0.2018, + "step": 1021 + }, + { + "epoch": 2.7, + "learning_rate": 3.6970509383378014e-05, + "loss": 0.0537, + "step": 1022 + }, + { + "epoch": 2.71, + "learning_rate": 3.6957104557640756e-05, + "loss": 0.444, + "step": 1023 + }, + { + "epoch": 2.71, + "learning_rate": 3.6943699731903484e-05, + "loss": 0.6497, + "step": 1024 + }, + { + "epoch": 2.71, + "learning_rate": 3.6930294906166226e-05, + "loss": 0.0097, + "step": 1025 + }, + { + "epoch": 2.71, + "learning_rate": 3.6916890080428955e-05, + "loss": 0.0082, + "step": 1026 + }, + { + "epoch": 2.72, + "learning_rate": 3.690348525469169e-05, + "loss": 0.0387, + "step": 1027 + }, + { + "epoch": 2.72, + "learning_rate": 3.6890080428954425e-05, + "loss": 0.3969, + "step": 1028 + }, + { + "epoch": 2.72, + "learning_rate": 3.687667560321716e-05, + "loss": 0.0136, + "step": 1029 + }, + { + "epoch": 2.72, + "learning_rate": 3.6863270777479895e-05, + "loss": 0.0099, + "step": 1030 + }, + { + "epoch": 2.73, + "learning_rate": 3.684986595174263e-05, + "loss": 0.3509, + "step": 1031 + }, + { + "epoch": 2.73, + "learning_rate": 3.683646112600536e-05, + "loss": 0.0257, + "step": 1032 + }, + { + "epoch": 2.73, + "learning_rate": 3.68230563002681e-05, + "loss": 0.3158, + "step": 1033 + }, + { + "epoch": 2.74, + "learning_rate": 3.680965147453083e-05, + "loss": 0.0677, + "step": 1034 + }, + { + "epoch": 2.74, + "learning_rate": 3.679624664879357e-05, + "loss": 0.3887, + "step": 1035 + }, + { + "epoch": 2.74, + "learning_rate": 3.67828418230563e-05, + "loss": 0.0714, + "step": 1036 + }, + { + "epoch": 2.74, + "learning_rate": 3.6769436997319034e-05, + "loss": 0.1066, + "step": 1037 + }, + { + "epoch": 2.75, + "learning_rate": 3.675603217158177e-05, + "loss": 0.6238, + "step": 1038 + }, + { + "epoch": 2.75, + "learning_rate": 3.6742627345844504e-05, + "loss": 0.0405, + "step": 1039 + }, + { + "epoch": 2.75, + "learning_rate": 3.672922252010724e-05, + "loss": 0.0223, + "step": 1040 + }, + { + "epoch": 2.75, + "learning_rate": 3.6715817694369975e-05, + "loss": 0.2737, + "step": 1041 + }, + { + "epoch": 2.76, + "learning_rate": 3.670241286863271e-05, + "loss": 0.015, + "step": 1042 + }, + { + "epoch": 2.76, + "learning_rate": 3.6689008042895445e-05, + "loss": 0.1709, + "step": 1043 + }, + { + "epoch": 2.76, + "learning_rate": 3.667560321715818e-05, + "loss": 0.2649, + "step": 1044 + }, + { + "epoch": 2.76, + "learning_rate": 3.6662198391420915e-05, + "loss": 0.1524, + "step": 1045 + }, + { + "epoch": 2.77, + "learning_rate": 3.664879356568365e-05, + "loss": 0.2461, + "step": 1046 + }, + { + "epoch": 2.77, + "learning_rate": 3.663538873994638e-05, + "loss": 0.3425, + "step": 1047 + }, + { + "epoch": 2.77, + "learning_rate": 3.662198391420912e-05, + "loss": 0.2689, + "step": 1048 + }, + { + "epoch": 2.78, + "learning_rate": 3.660857908847185e-05, + "loss": 0.0066, + "step": 1049 + }, + { + "epoch": 2.78, + "learning_rate": 3.659517426273459e-05, + "loss": 0.0328, + "step": 1050 + }, + { + "epoch": 2.78, + "learning_rate": 3.658176943699732e-05, + "loss": 0.1273, + "step": 1051 + }, + { + "epoch": 2.78, + "learning_rate": 3.6568364611260054e-05, + "loss": 0.2346, + "step": 1052 + }, + { + "epoch": 2.79, + "learning_rate": 3.655495978552279e-05, + "loss": 0.0118, + "step": 1053 + }, + { + "epoch": 2.79, + "learning_rate": 3.6541554959785525e-05, + "loss": 0.0287, + "step": 1054 + }, + { + "epoch": 2.79, + "learning_rate": 3.652815013404826e-05, + "loss": 0.264, + "step": 1055 + }, + { + "epoch": 2.79, + "learning_rate": 3.6514745308310995e-05, + "loss": 0.0216, + "step": 1056 + }, + { + "epoch": 2.8, + "learning_rate": 3.650134048257372e-05, + "loss": 0.0261, + "step": 1057 + }, + { + "epoch": 2.8, + "learning_rate": 3.6487935656836465e-05, + "loss": 0.1911, + "step": 1058 + }, + { + "epoch": 2.8, + "learning_rate": 3.6474530831099194e-05, + "loss": 0.029, + "step": 1059 + }, + { + "epoch": 2.8, + "learning_rate": 3.6461126005361935e-05, + "loss": 0.0393, + "step": 1060 + }, + { + "epoch": 2.81, + "learning_rate": 3.6447721179624664e-05, + "loss": 0.1044, + "step": 1061 + }, + { + "epoch": 2.81, + "learning_rate": 3.64343163538874e-05, + "loss": 0.6364, + "step": 1062 + }, + { + "epoch": 2.81, + "learning_rate": 3.6420911528150134e-05, + "loss": 0.3589, + "step": 1063 + }, + { + "epoch": 2.81, + "learning_rate": 3.640750670241287e-05, + "loss": 0.1128, + "step": 1064 + }, + { + "epoch": 2.82, + "learning_rate": 3.6394101876675604e-05, + "loss": 0.2219, + "step": 1065 + }, + { + "epoch": 2.82, + "learning_rate": 3.638069705093834e-05, + "loss": 0.211, + "step": 1066 + }, + { + "epoch": 2.82, + "learning_rate": 3.6367292225201075e-05, + "loss": 0.387, + "step": 1067 + }, + { + "epoch": 2.83, + "learning_rate": 3.635388739946381e-05, + "loss": 0.0725, + "step": 1068 + }, + { + "epoch": 2.83, + "learning_rate": 3.6340482573726545e-05, + "loss": 0.0268, + "step": 1069 + }, + { + "epoch": 2.83, + "learning_rate": 3.632707774798928e-05, + "loss": 0.516, + "step": 1070 + }, + { + "epoch": 2.83, + "learning_rate": 3.6313672922252015e-05, + "loss": 0.0746, + "step": 1071 + }, + { + "epoch": 2.84, + "learning_rate": 3.6300268096514743e-05, + "loss": 0.2486, + "step": 1072 + }, + { + "epoch": 2.84, + "learning_rate": 3.6286863270777485e-05, + "loss": 0.1584, + "step": 1073 + }, + { + "epoch": 2.84, + "learning_rate": 3.6273458445040214e-05, + "loss": 0.1301, + "step": 1074 + }, + { + "epoch": 2.84, + "learning_rate": 3.6260053619302956e-05, + "loss": 0.0122, + "step": 1075 + }, + { + "epoch": 2.85, + "learning_rate": 3.6246648793565684e-05, + "loss": 0.0215, + "step": 1076 + }, + { + "epoch": 2.85, + "learning_rate": 3.623324396782842e-05, + "loss": 0.2068, + "step": 1077 + }, + { + "epoch": 2.85, + "learning_rate": 3.6219839142091154e-05, + "loss": 0.1882, + "step": 1078 + }, + { + "epoch": 2.85, + "learning_rate": 3.620643431635389e-05, + "loss": 0.368, + "step": 1079 + }, + { + "epoch": 2.86, + "learning_rate": 3.6193029490616625e-05, + "loss": 0.7537, + "step": 1080 + }, + { + "epoch": 2.86, + "learning_rate": 3.617962466487936e-05, + "loss": 0.1614, + "step": 1081 + }, + { + "epoch": 2.86, + "learning_rate": 3.616621983914209e-05, + "loss": 0.0585, + "step": 1082 + }, + { + "epoch": 2.87, + "learning_rate": 3.615281501340483e-05, + "loss": 0.0535, + "step": 1083 + }, + { + "epoch": 2.87, + "learning_rate": 3.613941018766756e-05, + "loss": 0.6518, + "step": 1084 + }, + { + "epoch": 2.87, + "learning_rate": 3.61260053619303e-05, + "loss": 0.4885, + "step": 1085 + }, + { + "epoch": 2.87, + "learning_rate": 3.611260053619303e-05, + "loss": 0.4078, + "step": 1086 + }, + { + "epoch": 2.88, + "learning_rate": 3.6099195710455764e-05, + "loss": 0.2101, + "step": 1087 + }, + { + "epoch": 2.88, + "learning_rate": 3.60857908847185e-05, + "loss": 0.0192, + "step": 1088 + }, + { + "epoch": 2.88, + "learning_rate": 3.6072386058981234e-05, + "loss": 0.3885, + "step": 1089 + }, + { + "epoch": 2.88, + "learning_rate": 3.605898123324397e-05, + "loss": 0.0393, + "step": 1090 + }, + { + "epoch": 2.89, + "learning_rate": 3.6045576407506704e-05, + "loss": 0.2179, + "step": 1091 + }, + { + "epoch": 2.89, + "learning_rate": 3.603217158176944e-05, + "loss": 0.1814, + "step": 1092 + }, + { + "epoch": 2.89, + "learning_rate": 3.6018766756032174e-05, + "loss": 0.0647, + "step": 1093 + }, + { + "epoch": 2.89, + "learning_rate": 3.600536193029491e-05, + "loss": 0.0657, + "step": 1094 + }, + { + "epoch": 2.9, + "learning_rate": 3.5991957104557645e-05, + "loss": 0.1062, + "step": 1095 + }, + { + "epoch": 2.9, + "learning_rate": 3.597855227882038e-05, + "loss": 0.4314, + "step": 1096 + }, + { + "epoch": 2.9, + "learning_rate": 3.596514745308311e-05, + "loss": 0.3074, + "step": 1097 + }, + { + "epoch": 2.9, + "learning_rate": 3.595174262734585e-05, + "loss": 0.0159, + "step": 1098 + }, + { + "epoch": 2.91, + "learning_rate": 3.593833780160858e-05, + "loss": 0.3829, + "step": 1099 + }, + { + "epoch": 2.91, + "learning_rate": 3.592493297587132e-05, + "loss": 0.3277, + "step": 1100 + }, + { + "epoch": 2.91, + "learning_rate": 3.591152815013405e-05, + "loss": 0.3785, + "step": 1101 + }, + { + "epoch": 2.92, + "learning_rate": 3.5898123324396784e-05, + "loss": 0.0162, + "step": 1102 + }, + { + "epoch": 2.92, + "learning_rate": 3.588471849865952e-05, + "loss": 0.0772, + "step": 1103 + }, + { + "epoch": 2.92, + "learning_rate": 3.5871313672922254e-05, + "loss": 0.0292, + "step": 1104 + }, + { + "epoch": 2.92, + "learning_rate": 3.585790884718499e-05, + "loss": 0.0748, + "step": 1105 + }, + { + "epoch": 2.93, + "learning_rate": 3.5844504021447724e-05, + "loss": 0.2276, + "step": 1106 + }, + { + "epoch": 2.93, + "learning_rate": 3.583109919571046e-05, + "loss": 0.3174, + "step": 1107 + }, + { + "epoch": 2.93, + "learning_rate": 3.5817694369973195e-05, + "loss": 0.134, + "step": 1108 + }, + { + "epoch": 2.93, + "learning_rate": 3.580428954423592e-05, + "loss": 0.3488, + "step": 1109 + }, + { + "epoch": 2.94, + "learning_rate": 3.5790884718498665e-05, + "loss": 0.1003, + "step": 1110 + }, + { + "epoch": 2.94, + "learning_rate": 3.577747989276139e-05, + "loss": 0.1972, + "step": 1111 + }, + { + "epoch": 2.94, + "learning_rate": 3.5764075067024135e-05, + "loss": 0.2151, + "step": 1112 + }, + { + "epoch": 2.94, + "learning_rate": 3.5750670241286863e-05, + "loss": 0.3725, + "step": 1113 + }, + { + "epoch": 2.95, + "learning_rate": 3.57372654155496e-05, + "loss": 0.0227, + "step": 1114 + }, + { + "epoch": 2.95, + "learning_rate": 3.5723860589812334e-05, + "loss": 0.2063, + "step": 1115 + }, + { + "epoch": 2.95, + "learning_rate": 3.571045576407507e-05, + "loss": 0.0316, + "step": 1116 + }, + { + "epoch": 2.96, + "learning_rate": 3.5697050938337804e-05, + "loss": 0.1545, + "step": 1117 + }, + { + "epoch": 2.96, + "learning_rate": 3.568364611260054e-05, + "loss": 0.4582, + "step": 1118 + }, + { + "epoch": 2.96, + "learning_rate": 3.567024128686327e-05, + "loss": 0.5452, + "step": 1119 + }, + { + "epoch": 2.96, + "learning_rate": 3.565683646112601e-05, + "loss": 0.2009, + "step": 1120 + }, + { + "epoch": 2.97, + "learning_rate": 3.564343163538874e-05, + "loss": 0.1201, + "step": 1121 + }, + { + "epoch": 2.97, + "learning_rate": 3.563002680965148e-05, + "loss": 0.5343, + "step": 1122 + }, + { + "epoch": 2.97, + "learning_rate": 3.561662198391421e-05, + "loss": 0.3925, + "step": 1123 + }, + { + "epoch": 2.97, + "learning_rate": 3.560321715817694e-05, + "loss": 0.0375, + "step": 1124 + }, + { + "epoch": 2.98, + "learning_rate": 3.558981233243968e-05, + "loss": 0.0411, + "step": 1125 + }, + { + "epoch": 2.98, + "learning_rate": 3.5576407506702413e-05, + "loss": 0.0338, + "step": 1126 + }, + { + "epoch": 2.98, + "learning_rate": 3.556300268096515e-05, + "loss": 0.035, + "step": 1127 + }, + { + "epoch": 2.98, + "learning_rate": 3.5549597855227884e-05, + "loss": 0.0283, + "step": 1128 + }, + { + "epoch": 2.99, + "learning_rate": 3.553619302949062e-05, + "loss": 0.034, + "step": 1129 + }, + { + "epoch": 2.99, + "learning_rate": 3.5522788203753354e-05, + "loss": 0.518, + "step": 1130 + }, + { + "epoch": 2.99, + "learning_rate": 3.550938337801609e-05, + "loss": 0.0241, + "step": 1131 + }, + { + "epoch": 2.99, + "learning_rate": 3.5495978552278824e-05, + "loss": 0.3143, + "step": 1132 + }, + { + "epoch": 3.0, + "learning_rate": 3.548257372654156e-05, + "loss": 0.8011, + "step": 1133 + }, + { + "epoch": 3.0, + "learning_rate": 3.546916890080429e-05, + "loss": 0.4365, + "step": 1134 + }, + { + "epoch": 3.0, + "eval_f1": 0.7867219917012448, + "eval_loss": 0.613310694694519, + "eval_runtime": 1.9007, + "eval_samples_per_second": 796.041, + "eval_steps_per_second": 49.983, + "step": 1134 + } + ], + "max_steps": 3780, + "num_train_epochs": 10, + "total_flos": 291221296732800.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1134/training_args.bin b/checkpoint-1134/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e04ed002938f760694506615e2c2b7be439a9c1 --- /dev/null +++ b/checkpoint-1134/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c01d8e107b4a20c0ba0f3692dae4e25d8f1dffe1d23d6e4f4bdf92b87ab5ea +size 3899 diff --git a/checkpoint-1512/config.json b/checkpoint-1512/config.json new file mode 100644 index 0000000000000000000000000000000000000000..364156e83c34ba8c6fcc66e875a05b1d1a9b4821 --- /dev/null +++ b/checkpoint-1512/config.json @@ -0,0 +1,30 @@ +{ + "_name_or_path": "distilbert-base-cased", + "activation": "gelu", + "architectures": [ + "DistilBertForSequenceClassification" + ], + "attention_dropout": 0.1, + "dim": 768, + "dropout": 0.1, + "hidden_dim": 3072, + "id2label": { + "0": "NO DISASTER", + "1": "DISASTER" + }, + "initializer_range": 0.02, + "max_position_embeddings": 512, + "model_type": "distilbert", + "n_heads": 12, + "n_layers": 6, + "output_past": true, + "pad_token_id": 0, + "problem_type": "single_label_classification", + "qa_dropout": 0.1, + "seq_classif_dropout": 0.2, + "sinusoidal_pos_embds": false, + "tie_weights_": true, + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "vocab_size": 28996 +} diff --git a/checkpoint-1512/optimizer.pt b/checkpoint-1512/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3437e12ba1519d5bad854afad5339b263fe7e7d --- /dev/null +++ b/checkpoint-1512/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87ce52713400f327c5fd03cd733de4a3969a3062515aba0904a292f02e98fe96 +size 526325317 diff --git a/checkpoint-1512/pytorch_model.bin b/checkpoint-1512/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6022c80729ba964a080ed2d0de3e635911301926 --- /dev/null +++ b/checkpoint-1512/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c453f3f6a59f2ec8da836d5fdb537bd870363dc090f30d9341e30fb9a06987b +size 263167661 diff --git a/checkpoint-1512/rng_state.pth b/checkpoint-1512/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a99be7bd3bffc94baf8636dfc3de7a5ff8327fb9 --- /dev/null +++ b/checkpoint-1512/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4652bd48bb7c106c661fd71929aac1388a67e4cd1e78571ff090909c5e7dc7d0 +size 14575 diff --git a/checkpoint-1512/scheduler.pt b/checkpoint-1512/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..36d19fb7b58d33b1570bf0fd1b5dbe22fe674110 --- /dev/null +++ b/checkpoint-1512/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08dda3a4f935a7a4b13455d1b343f791c393a17cccb53664b36097e59734998c +size 627 diff --git a/checkpoint-1512/trainer_state.json b/checkpoint-1512/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d5f1fcbd7fc655d7512fb84ce02b848ffaff0e09 --- /dev/null +++ b/checkpoint-1512/trainer_state.json @@ -0,0 +1,9124 @@ +{ + "best_metric": 0.40209120512008667, + "best_model_checkpoint": "./disaster-tweet-distilbert-classification/checkpoint-378", + "epoch": 4.0, + "global_step": 1512, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.7503, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.7789, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.7344, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 0.7709, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 5e-06, + "loss": 0.6884, + "step": 5 + }, + { + "epoch": 0.02, + "learning_rate": 6e-06, + "loss": 0.7087, + "step": 6 + }, + { + "epoch": 0.02, + "learning_rate": 7.000000000000001e-06, + "loss": 0.6655, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6978, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 9e-06, + "loss": 0.7435, + "step": 9 + }, + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 0.719, + "step": 10 + }, + { + "epoch": 0.03, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.7129, + "step": 11 + }, + { + "epoch": 0.03, + "learning_rate": 1.2e-05, + "loss": 0.7249, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.7436, + "step": 13 + }, + { + "epoch": 0.04, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.6886, + "step": 14 + }, + { + "epoch": 0.04, + "learning_rate": 1.5e-05, + "loss": 0.702, + "step": 15 + }, + { + "epoch": 0.04, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7105, + "step": 16 + }, + { + "epoch": 0.04, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.6709, + "step": 17 + }, + { + "epoch": 0.05, + "learning_rate": 1.8e-05, + "loss": 0.6767, + "step": 18 + }, + { + "epoch": 0.05, + "learning_rate": 1.9e-05, + "loss": 0.6784, + "step": 19 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 0.7016, + "step": 20 + }, + { + "epoch": 0.06, + "learning_rate": 2.1e-05, + "loss": 0.6308, + "step": 21 + }, + { + "epoch": 0.06, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.645, + "step": 22 + }, + { + "epoch": 0.06, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.6845, + "step": 23 + }, + { + "epoch": 0.06, + "learning_rate": 2.4e-05, + "loss": 0.6891, + "step": 24 + }, + { + "epoch": 0.07, + "learning_rate": 2.5e-05, + "loss": 0.5914, + "step": 25 + }, + { + "epoch": 0.07, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.6916, + "step": 26 + }, + { + "epoch": 0.07, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.5884, + "step": 27 + }, + { + "epoch": 0.07, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6587, + "step": 28 + }, + { + "epoch": 0.08, + "learning_rate": 2.9e-05, + "loss": 0.6008, + "step": 29 + }, + { + "epoch": 0.08, + "learning_rate": 3e-05, + "loss": 0.6717, + "step": 30 + }, + { + "epoch": 0.08, + "learning_rate": 3.1e-05, + "loss": 0.5795, + "step": 31 + }, + { + "epoch": 0.08, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6358, + "step": 32 + }, + { + "epoch": 0.09, + "learning_rate": 3.3e-05, + "loss": 0.7508, + "step": 33 + }, + { + "epoch": 0.09, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.6601, + "step": 34 + }, + { + "epoch": 0.09, + "learning_rate": 3.5e-05, + "loss": 0.6573, + "step": 35 + }, + { + "epoch": 0.1, + "learning_rate": 3.6e-05, + "loss": 0.5695, + "step": 36 + }, + { + "epoch": 0.1, + "learning_rate": 3.7e-05, + "loss": 0.5535, + "step": 37 + }, + { + "epoch": 0.1, + "learning_rate": 3.8e-05, + "loss": 0.5813, + "step": 38 + }, + { + "epoch": 0.1, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.5224, + "step": 39 + }, + { + "epoch": 0.11, + "learning_rate": 4e-05, + "loss": 0.4757, + "step": 40 + }, + { + "epoch": 0.11, + "learning_rate": 4.1e-05, + "loss": 0.5529, + "step": 41 + }, + { + "epoch": 0.11, + "learning_rate": 4.2e-05, + "loss": 0.4964, + "step": 42 + }, + { + "epoch": 0.11, + "learning_rate": 4.3e-05, + "loss": 0.4565, + "step": 43 + }, + { + "epoch": 0.12, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.5848, + "step": 44 + }, + { + "epoch": 0.12, + "learning_rate": 4.5e-05, + "loss": 0.7333, + "step": 45 + }, + { + "epoch": 0.12, + "learning_rate": 4.600000000000001e-05, + "loss": 0.5224, + "step": 46 + }, + { + "epoch": 0.12, + "learning_rate": 4.7e-05, + "loss": 0.4826, + "step": 47 + }, + { + "epoch": 0.13, + "learning_rate": 4.8e-05, + "loss": 0.4328, + "step": 48 + }, + { + "epoch": 0.13, + "learning_rate": 4.9e-05, + "loss": 0.2546, + "step": 49 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.2636, + "step": 50 + }, + { + "epoch": 0.13, + "learning_rate": 4.998659517426274e-05, + "loss": 0.4116, + "step": 51 + }, + { + "epoch": 0.14, + "learning_rate": 4.997319034852547e-05, + "loss": 0.3191, + "step": 52 + }, + { + "epoch": 0.14, + "learning_rate": 4.995978552278821e-05, + "loss": 0.2822, + "step": 53 + }, + { + "epoch": 0.14, + "learning_rate": 4.994638069705094e-05, + "loss": 0.8821, + "step": 54 + }, + { + "epoch": 0.15, + "learning_rate": 4.993297587131368e-05, + "loss": 0.483, + "step": 55 + }, + { + "epoch": 0.15, + "learning_rate": 4.9919571045576406e-05, + "loss": 0.7164, + "step": 56 + }, + { + "epoch": 0.15, + "learning_rate": 4.990616621983915e-05, + "loss": 0.4161, + "step": 57 + }, + { + "epoch": 0.15, + "learning_rate": 4.989276139410188e-05, + "loss": 0.2668, + "step": 58 + }, + { + "epoch": 0.16, + "learning_rate": 4.987935656836462e-05, + "loss": 0.5255, + "step": 59 + }, + { + "epoch": 0.16, + "learning_rate": 4.986595174262735e-05, + "loss": 0.3784, + "step": 60 + }, + { + "epoch": 0.16, + "learning_rate": 4.985254691689008e-05, + "loss": 0.5065, + "step": 61 + }, + { + "epoch": 0.16, + "learning_rate": 4.983914209115282e-05, + "loss": 0.1988, + "step": 62 + }, + { + "epoch": 0.17, + "learning_rate": 4.982573726541555e-05, + "loss": 0.4362, + "step": 63 + }, + { + "epoch": 0.17, + "learning_rate": 4.981233243967829e-05, + "loss": 0.6619, + "step": 64 + }, + { + "epoch": 0.17, + "learning_rate": 4.979892761394102e-05, + "loss": 0.3217, + "step": 65 + }, + { + "epoch": 0.17, + "learning_rate": 4.978552278820375e-05, + "loss": 0.2967, + "step": 66 + }, + { + "epoch": 0.18, + "learning_rate": 4.977211796246649e-05, + "loss": 0.2429, + "step": 67 + }, + { + "epoch": 0.18, + "learning_rate": 4.975871313672922e-05, + "loss": 0.6642, + "step": 68 + }, + { + "epoch": 0.18, + "learning_rate": 4.974530831099196e-05, + "loss": 0.56, + "step": 69 + }, + { + "epoch": 0.19, + "learning_rate": 4.973190348525469e-05, + "loss": 1.2979, + "step": 70 + }, + { + "epoch": 0.19, + "learning_rate": 4.9718498659517427e-05, + "loss": 0.5287, + "step": 71 + }, + { + "epoch": 0.19, + "learning_rate": 4.970509383378016e-05, + "loss": 0.4684, + "step": 72 + }, + { + "epoch": 0.19, + "learning_rate": 4.96916890080429e-05, + "loss": 0.283, + "step": 73 + }, + { + "epoch": 0.2, + "learning_rate": 4.967828418230563e-05, + "loss": 0.6818, + "step": 74 + }, + { + "epoch": 0.2, + "learning_rate": 4.966487935656837e-05, + "loss": 0.6141, + "step": 75 + }, + { + "epoch": 0.2, + "learning_rate": 4.96514745308311e-05, + "loss": 0.5046, + "step": 76 + }, + { + "epoch": 0.2, + "learning_rate": 4.963806970509384e-05, + "loss": 0.5266, + "step": 77 + }, + { + "epoch": 0.21, + "learning_rate": 4.962466487935657e-05, + "loss": 0.5944, + "step": 78 + }, + { + "epoch": 0.21, + "learning_rate": 4.961126005361931e-05, + "loss": 0.5631, + "step": 79 + }, + { + "epoch": 0.21, + "learning_rate": 4.959785522788204e-05, + "loss": 0.4791, + "step": 80 + }, + { + "epoch": 0.21, + "learning_rate": 4.958445040214477e-05, + "loss": 0.5645, + "step": 81 + }, + { + "epoch": 0.22, + "learning_rate": 4.957104557640751e-05, + "loss": 0.4349, + "step": 82 + }, + { + "epoch": 0.22, + "learning_rate": 4.955764075067024e-05, + "loss": 0.3865, + "step": 83 + }, + { + "epoch": 0.22, + "learning_rate": 4.954423592493298e-05, + "loss": 0.486, + "step": 84 + }, + { + "epoch": 0.22, + "learning_rate": 4.953083109919571e-05, + "loss": 0.2179, + "step": 85 + }, + { + "epoch": 0.23, + "learning_rate": 4.951742627345845e-05, + "loss": 0.3896, + "step": 86 + }, + { + "epoch": 0.23, + "learning_rate": 4.950402144772118e-05, + "loss": 0.4247, + "step": 87 + }, + { + "epoch": 0.23, + "learning_rate": 4.949061662198392e-05, + "loss": 0.4906, + "step": 88 + }, + { + "epoch": 0.24, + "learning_rate": 4.947721179624665e-05, + "loss": 0.4483, + "step": 89 + }, + { + "epoch": 0.24, + "learning_rate": 4.946380697050939e-05, + "loss": 0.557, + "step": 90 + }, + { + "epoch": 0.24, + "learning_rate": 4.9450402144772116e-05, + "loss": 0.7521, + "step": 91 + }, + { + "epoch": 0.24, + "learning_rate": 4.943699731903486e-05, + "loss": 0.3103, + "step": 92 + }, + { + "epoch": 0.25, + "learning_rate": 4.9423592493297586e-05, + "loss": 0.757, + "step": 93 + }, + { + "epoch": 0.25, + "learning_rate": 4.941018766756033e-05, + "loss": 0.8248, + "step": 94 + }, + { + "epoch": 0.25, + "learning_rate": 4.9396782841823056e-05, + "loss": 0.4591, + "step": 95 + }, + { + "epoch": 0.25, + "learning_rate": 4.938337801608579e-05, + "loss": 0.3912, + "step": 96 + }, + { + "epoch": 0.26, + "learning_rate": 4.9369973190348526e-05, + "loss": 0.5289, + "step": 97 + }, + { + "epoch": 0.26, + "learning_rate": 4.935656836461126e-05, + "loss": 0.3264, + "step": 98 + }, + { + "epoch": 0.26, + "learning_rate": 4.9343163538874e-05, + "loss": 0.2947, + "step": 99 + }, + { + "epoch": 0.26, + "learning_rate": 4.932975871313673e-05, + "loss": 0.2647, + "step": 100 + }, + { + "epoch": 0.27, + "learning_rate": 4.931635388739946e-05, + "loss": 0.3691, + "step": 101 + }, + { + "epoch": 0.27, + "learning_rate": 4.93029490616622e-05, + "loss": 0.4796, + "step": 102 + }, + { + "epoch": 0.27, + "learning_rate": 4.928954423592493e-05, + "loss": 0.4827, + "step": 103 + }, + { + "epoch": 0.28, + "learning_rate": 4.927613941018767e-05, + "loss": 0.2672, + "step": 104 + }, + { + "epoch": 0.28, + "learning_rate": 4.92627345844504e-05, + "loss": 0.7456, + "step": 105 + }, + { + "epoch": 0.28, + "learning_rate": 4.9249329758713136e-05, + "loss": 0.5206, + "step": 106 + }, + { + "epoch": 0.28, + "learning_rate": 4.923592493297587e-05, + "loss": 0.3576, + "step": 107 + }, + { + "epoch": 0.29, + "learning_rate": 4.9222520107238606e-05, + "loss": 0.2596, + "step": 108 + }, + { + "epoch": 0.29, + "learning_rate": 4.920911528150134e-05, + "loss": 0.4115, + "step": 109 + }, + { + "epoch": 0.29, + "learning_rate": 4.9195710455764076e-05, + "loss": 0.3481, + "step": 110 + }, + { + "epoch": 0.29, + "learning_rate": 4.918230563002681e-05, + "loss": 0.4387, + "step": 111 + }, + { + "epoch": 0.3, + "learning_rate": 4.916890080428955e-05, + "loss": 0.5023, + "step": 112 + }, + { + "epoch": 0.3, + "learning_rate": 4.915549597855228e-05, + "loss": 0.5916, + "step": 113 + }, + { + "epoch": 0.3, + "learning_rate": 4.914209115281502e-05, + "loss": 0.5467, + "step": 114 + }, + { + "epoch": 0.3, + "learning_rate": 4.912868632707775e-05, + "loss": 0.5631, + "step": 115 + }, + { + "epoch": 0.31, + "learning_rate": 4.911528150134049e-05, + "loss": 0.5512, + "step": 116 + }, + { + "epoch": 0.31, + "learning_rate": 4.910187667560322e-05, + "loss": 0.5546, + "step": 117 + }, + { + "epoch": 0.31, + "learning_rate": 4.908847184986595e-05, + "loss": 0.4209, + "step": 118 + }, + { + "epoch": 0.31, + "learning_rate": 4.907506702412869e-05, + "loss": 0.6064, + "step": 119 + }, + { + "epoch": 0.32, + "learning_rate": 4.906166219839142e-05, + "loss": 0.5301, + "step": 120 + }, + { + "epoch": 0.32, + "learning_rate": 4.904825737265416e-05, + "loss": 0.436, + "step": 121 + }, + { + "epoch": 0.32, + "learning_rate": 4.903485254691689e-05, + "loss": 0.726, + "step": 122 + }, + { + "epoch": 0.33, + "learning_rate": 4.9021447721179626e-05, + "loss": 0.5288, + "step": 123 + }, + { + "epoch": 0.33, + "learning_rate": 4.900804289544236e-05, + "loss": 0.4887, + "step": 124 + }, + { + "epoch": 0.33, + "learning_rate": 4.8994638069705097e-05, + "loss": 0.3752, + "step": 125 + }, + { + "epoch": 0.33, + "learning_rate": 4.898123324396783e-05, + "loss": 0.4895, + "step": 126 + }, + { + "epoch": 0.34, + "learning_rate": 4.896782841823057e-05, + "loss": 0.5046, + "step": 127 + }, + { + "epoch": 0.34, + "learning_rate": 4.8954423592493295e-05, + "loss": 0.3953, + "step": 128 + }, + { + "epoch": 0.34, + "learning_rate": 4.894101876675604e-05, + "loss": 0.2015, + "step": 129 + }, + { + "epoch": 0.34, + "learning_rate": 4.8927613941018765e-05, + "loss": 0.5165, + "step": 130 + }, + { + "epoch": 0.35, + "learning_rate": 4.891420911528151e-05, + "loss": 0.4237, + "step": 131 + }, + { + "epoch": 0.35, + "learning_rate": 4.8900804289544236e-05, + "loss": 0.239, + "step": 132 + }, + { + "epoch": 0.35, + "learning_rate": 4.888739946380697e-05, + "loss": 0.5515, + "step": 133 + }, + { + "epoch": 0.35, + "learning_rate": 4.8873994638069706e-05, + "loss": 0.303, + "step": 134 + }, + { + "epoch": 0.36, + "learning_rate": 4.886058981233244e-05, + "loss": 0.2867, + "step": 135 + }, + { + "epoch": 0.36, + "learning_rate": 4.8847184986595176e-05, + "loss": 0.6756, + "step": 136 + }, + { + "epoch": 0.36, + "learning_rate": 4.883378016085791e-05, + "loss": 0.4996, + "step": 137 + }, + { + "epoch": 0.37, + "learning_rate": 4.8820375335120646e-05, + "loss": 0.2798, + "step": 138 + }, + { + "epoch": 0.37, + "learning_rate": 4.880697050938338e-05, + "loss": 0.8877, + "step": 139 + }, + { + "epoch": 0.37, + "learning_rate": 4.879356568364612e-05, + "loss": 0.5022, + "step": 140 + }, + { + "epoch": 0.37, + "learning_rate": 4.878016085790885e-05, + "loss": 0.2177, + "step": 141 + }, + { + "epoch": 0.38, + "learning_rate": 4.876675603217159e-05, + "loss": 0.8708, + "step": 142 + }, + { + "epoch": 0.38, + "learning_rate": 4.8753351206434315e-05, + "loss": 0.719, + "step": 143 + }, + { + "epoch": 0.38, + "learning_rate": 4.873994638069706e-05, + "loss": 0.562, + "step": 144 + }, + { + "epoch": 0.38, + "learning_rate": 4.8726541554959786e-05, + "loss": 0.492, + "step": 145 + }, + { + "epoch": 0.39, + "learning_rate": 4.871313672922253e-05, + "loss": 0.4637, + "step": 146 + }, + { + "epoch": 0.39, + "learning_rate": 4.8699731903485256e-05, + "loss": 0.4132, + "step": 147 + }, + { + "epoch": 0.39, + "learning_rate": 4.868632707774799e-05, + "loss": 0.2889, + "step": 148 + }, + { + "epoch": 0.39, + "learning_rate": 4.8672922252010726e-05, + "loss": 0.2213, + "step": 149 + }, + { + "epoch": 0.4, + "learning_rate": 4.865951742627346e-05, + "loss": 0.9268, + "step": 150 + }, + { + "epoch": 0.4, + "learning_rate": 4.8646112600536196e-05, + "loss": 0.2852, + "step": 151 + }, + { + "epoch": 0.4, + "learning_rate": 4.863270777479893e-05, + "loss": 0.4599, + "step": 152 + }, + { + "epoch": 0.4, + "learning_rate": 4.861930294906166e-05, + "loss": 0.1913, + "step": 153 + }, + { + "epoch": 0.41, + "learning_rate": 4.86058981233244e-05, + "loss": 0.4488, + "step": 154 + }, + { + "epoch": 0.41, + "learning_rate": 4.859249329758713e-05, + "loss": 0.9022, + "step": 155 + }, + { + "epoch": 0.41, + "learning_rate": 4.857908847184987e-05, + "loss": 0.5221, + "step": 156 + }, + { + "epoch": 0.42, + "learning_rate": 4.85656836461126e-05, + "loss": 0.2394, + "step": 157 + }, + { + "epoch": 0.42, + "learning_rate": 4.8552278820375336e-05, + "loss": 0.3332, + "step": 158 + }, + { + "epoch": 0.42, + "learning_rate": 4.853887399463807e-05, + "loss": 0.4015, + "step": 159 + }, + { + "epoch": 0.42, + "learning_rate": 4.8525469168900806e-05, + "loss": 0.4461, + "step": 160 + }, + { + "epoch": 0.43, + "learning_rate": 4.851206434316354e-05, + "loss": 0.337, + "step": 161 + }, + { + "epoch": 0.43, + "learning_rate": 4.8498659517426276e-05, + "loss": 0.4908, + "step": 162 + }, + { + "epoch": 0.43, + "learning_rate": 4.848525469168901e-05, + "loss": 0.526, + "step": 163 + }, + { + "epoch": 0.43, + "learning_rate": 4.8471849865951746e-05, + "loss": 0.5262, + "step": 164 + }, + { + "epoch": 0.44, + "learning_rate": 4.845844504021448e-05, + "loss": 0.6818, + "step": 165 + }, + { + "epoch": 0.44, + "learning_rate": 4.8445040214477217e-05, + "loss": 0.3154, + "step": 166 + }, + { + "epoch": 0.44, + "learning_rate": 4.843163538873995e-05, + "loss": 0.5963, + "step": 167 + }, + { + "epoch": 0.44, + "learning_rate": 4.841823056300268e-05, + "loss": 0.4451, + "step": 168 + }, + { + "epoch": 0.45, + "learning_rate": 4.840482573726542e-05, + "loss": 0.5969, + "step": 169 + }, + { + "epoch": 0.45, + "learning_rate": 4.839142091152815e-05, + "loss": 0.438, + "step": 170 + }, + { + "epoch": 0.45, + "learning_rate": 4.837801608579089e-05, + "loss": 0.4827, + "step": 171 + }, + { + "epoch": 0.46, + "learning_rate": 4.836461126005362e-05, + "loss": 0.2029, + "step": 172 + }, + { + "epoch": 0.46, + "learning_rate": 4.8351206434316356e-05, + "loss": 0.5195, + "step": 173 + }, + { + "epoch": 0.46, + "learning_rate": 4.833780160857909e-05, + "loss": 0.517, + "step": 174 + }, + { + "epoch": 0.46, + "learning_rate": 4.8324396782841826e-05, + "loss": 0.5532, + "step": 175 + }, + { + "epoch": 0.47, + "learning_rate": 4.831099195710456e-05, + "loss": 0.4198, + "step": 176 + }, + { + "epoch": 0.47, + "learning_rate": 4.8297587131367296e-05, + "loss": 0.8386, + "step": 177 + }, + { + "epoch": 0.47, + "learning_rate": 4.8284182305630025e-05, + "loss": 0.575, + "step": 178 + }, + { + "epoch": 0.47, + "learning_rate": 4.8270777479892766e-05, + "loss": 0.6156, + "step": 179 + }, + { + "epoch": 0.48, + "learning_rate": 4.8257372654155495e-05, + "loss": 0.7044, + "step": 180 + }, + { + "epoch": 0.48, + "learning_rate": 4.824396782841824e-05, + "loss": 0.5712, + "step": 181 + }, + { + "epoch": 0.48, + "learning_rate": 4.8230563002680965e-05, + "loss": 0.34, + "step": 182 + }, + { + "epoch": 0.48, + "learning_rate": 4.82171581769437e-05, + "loss": 0.5773, + "step": 183 + }, + { + "epoch": 0.49, + "learning_rate": 4.8203753351206435e-05, + "loss": 0.546, + "step": 184 + }, + { + "epoch": 0.49, + "learning_rate": 4.819034852546917e-05, + "loss": 0.3955, + "step": 185 + }, + { + "epoch": 0.49, + "learning_rate": 4.8176943699731906e-05, + "loss": 0.5921, + "step": 186 + }, + { + "epoch": 0.49, + "learning_rate": 4.816353887399464e-05, + "loss": 0.3108, + "step": 187 + }, + { + "epoch": 0.5, + "learning_rate": 4.8150134048257376e-05, + "loss": 0.5469, + "step": 188 + }, + { + "epoch": 0.5, + "learning_rate": 4.813672922252011e-05, + "loss": 0.64, + "step": 189 + }, + { + "epoch": 0.5, + "learning_rate": 4.8123324396782846e-05, + "loss": 0.5153, + "step": 190 + }, + { + "epoch": 0.51, + "learning_rate": 4.810991957104558e-05, + "loss": 0.4719, + "step": 191 + }, + { + "epoch": 0.51, + "learning_rate": 4.8096514745308316e-05, + "loss": 0.52, + "step": 192 + }, + { + "epoch": 0.51, + "learning_rate": 4.8083109919571045e-05, + "loss": 0.5114, + "step": 193 + }, + { + "epoch": 0.51, + "learning_rate": 4.806970509383379e-05, + "loss": 0.5469, + "step": 194 + }, + { + "epoch": 0.52, + "learning_rate": 4.8056300268096515e-05, + "loss": 0.3435, + "step": 195 + }, + { + "epoch": 0.52, + "learning_rate": 4.804289544235926e-05, + "loss": 0.6469, + "step": 196 + }, + { + "epoch": 0.52, + "learning_rate": 4.8029490616621985e-05, + "loss": 0.6595, + "step": 197 + }, + { + "epoch": 0.52, + "learning_rate": 4.801608579088472e-05, + "loss": 0.5503, + "step": 198 + }, + { + "epoch": 0.53, + "learning_rate": 4.8002680965147456e-05, + "loss": 0.3799, + "step": 199 + }, + { + "epoch": 0.53, + "learning_rate": 4.798927613941019e-05, + "loss": 0.417, + "step": 200 + }, + { + "epoch": 0.53, + "learning_rate": 4.7975871313672926e-05, + "loss": 0.5281, + "step": 201 + }, + { + "epoch": 0.53, + "learning_rate": 4.796246648793566e-05, + "loss": 0.3439, + "step": 202 + }, + { + "epoch": 0.54, + "learning_rate": 4.794906166219839e-05, + "loss": 0.5777, + "step": 203 + }, + { + "epoch": 0.54, + "learning_rate": 4.793565683646113e-05, + "loss": 0.5286, + "step": 204 + }, + { + "epoch": 0.54, + "learning_rate": 4.792225201072386e-05, + "loss": 0.4302, + "step": 205 + }, + { + "epoch": 0.54, + "learning_rate": 4.79088471849866e-05, + "loss": 0.5413, + "step": 206 + }, + { + "epoch": 0.55, + "learning_rate": 4.789544235924933e-05, + "loss": 0.3087, + "step": 207 + }, + { + "epoch": 0.55, + "learning_rate": 4.7882037533512065e-05, + "loss": 0.4385, + "step": 208 + }, + { + "epoch": 0.55, + "learning_rate": 4.78686327077748e-05, + "loss": 0.7137, + "step": 209 + }, + { + "epoch": 0.56, + "learning_rate": 4.7855227882037535e-05, + "loss": 0.6278, + "step": 210 + }, + { + "epoch": 0.56, + "learning_rate": 4.784182305630027e-05, + "loss": 0.2832, + "step": 211 + }, + { + "epoch": 0.56, + "learning_rate": 4.7828418230563005e-05, + "loss": 0.5899, + "step": 212 + }, + { + "epoch": 0.56, + "learning_rate": 4.7815013404825734e-05, + "loss": 0.4421, + "step": 213 + }, + { + "epoch": 0.57, + "learning_rate": 4.7801608579088476e-05, + "loss": 0.3411, + "step": 214 + }, + { + "epoch": 0.57, + "learning_rate": 4.7788203753351204e-05, + "loss": 0.4236, + "step": 215 + }, + { + "epoch": 0.57, + "learning_rate": 4.7774798927613946e-05, + "loss": 0.4444, + "step": 216 + }, + { + "epoch": 0.57, + "learning_rate": 4.7761394101876674e-05, + "loss": 0.4765, + "step": 217 + }, + { + "epoch": 0.58, + "learning_rate": 4.774798927613941e-05, + "loss": 0.2453, + "step": 218 + }, + { + "epoch": 0.58, + "learning_rate": 4.7734584450402145e-05, + "loss": 0.2765, + "step": 219 + }, + { + "epoch": 0.58, + "learning_rate": 4.772117962466488e-05, + "loss": 0.2075, + "step": 220 + }, + { + "epoch": 0.58, + "learning_rate": 4.7707774798927615e-05, + "loss": 0.5905, + "step": 221 + }, + { + "epoch": 0.59, + "learning_rate": 4.769436997319035e-05, + "loss": 0.3457, + "step": 222 + }, + { + "epoch": 0.59, + "learning_rate": 4.7680965147453085e-05, + "loss": 0.5986, + "step": 223 + }, + { + "epoch": 0.59, + "learning_rate": 4.766756032171582e-05, + "loss": 0.3881, + "step": 224 + }, + { + "epoch": 0.6, + "learning_rate": 4.7654155495978555e-05, + "loss": 0.5655, + "step": 225 + }, + { + "epoch": 0.6, + "learning_rate": 4.764075067024129e-05, + "loss": 0.6156, + "step": 226 + }, + { + "epoch": 0.6, + "learning_rate": 4.7627345844504026e-05, + "loss": 0.4784, + "step": 227 + }, + { + "epoch": 0.6, + "learning_rate": 4.7613941018766754e-05, + "loss": 0.5195, + "step": 228 + }, + { + "epoch": 0.61, + "learning_rate": 4.7600536193029496e-05, + "loss": 0.66, + "step": 229 + }, + { + "epoch": 0.61, + "learning_rate": 4.7587131367292224e-05, + "loss": 0.4104, + "step": 230 + }, + { + "epoch": 0.61, + "learning_rate": 4.7573726541554966e-05, + "loss": 0.3478, + "step": 231 + }, + { + "epoch": 0.61, + "learning_rate": 4.7560321715817695e-05, + "loss": 0.2037, + "step": 232 + }, + { + "epoch": 0.62, + "learning_rate": 4.754691689008043e-05, + "loss": 0.75, + "step": 233 + }, + { + "epoch": 0.62, + "learning_rate": 4.7533512064343165e-05, + "loss": 0.4237, + "step": 234 + }, + { + "epoch": 0.62, + "learning_rate": 4.75201072386059e-05, + "loss": 0.2372, + "step": 235 + }, + { + "epoch": 0.62, + "learning_rate": 4.7506702412868635e-05, + "loss": 0.7874, + "step": 236 + }, + { + "epoch": 0.63, + "learning_rate": 4.749329758713137e-05, + "loss": 0.5751, + "step": 237 + }, + { + "epoch": 0.63, + "learning_rate": 4.7479892761394105e-05, + "loss": 0.5801, + "step": 238 + }, + { + "epoch": 0.63, + "learning_rate": 4.746648793565684e-05, + "loss": 0.4983, + "step": 239 + }, + { + "epoch": 0.63, + "learning_rate": 4.745308310991957e-05, + "loss": 0.4215, + "step": 240 + }, + { + "epoch": 0.64, + "learning_rate": 4.743967828418231e-05, + "loss": 0.3655, + "step": 241 + }, + { + "epoch": 0.64, + "learning_rate": 4.742627345844504e-05, + "loss": 0.523, + "step": 242 + }, + { + "epoch": 0.64, + "learning_rate": 4.741286863270778e-05, + "loss": 0.5952, + "step": 243 + }, + { + "epoch": 0.65, + "learning_rate": 4.739946380697051e-05, + "loss": 0.4226, + "step": 244 + }, + { + "epoch": 0.65, + "learning_rate": 4.7386058981233244e-05, + "loss": 0.316, + "step": 245 + }, + { + "epoch": 0.65, + "learning_rate": 4.737265415549598e-05, + "loss": 0.5096, + "step": 246 + }, + { + "epoch": 0.65, + "learning_rate": 4.7359249329758715e-05, + "loss": 0.435, + "step": 247 + }, + { + "epoch": 0.66, + "learning_rate": 4.734584450402145e-05, + "loss": 0.7516, + "step": 248 + }, + { + "epoch": 0.66, + "learning_rate": 4.7332439678284185e-05, + "loss": 0.531, + "step": 249 + }, + { + "epoch": 0.66, + "learning_rate": 4.731903485254692e-05, + "loss": 0.4096, + "step": 250 + }, + { + "epoch": 0.66, + "learning_rate": 4.7305630026809655e-05, + "loss": 0.387, + "step": 251 + }, + { + "epoch": 0.67, + "learning_rate": 4.729222520107239e-05, + "loss": 0.5468, + "step": 252 + }, + { + "epoch": 0.67, + "learning_rate": 4.7278820375335125e-05, + "loss": 0.4613, + "step": 253 + }, + { + "epoch": 0.67, + "learning_rate": 4.726541554959786e-05, + "loss": 0.8437, + "step": 254 + }, + { + "epoch": 0.67, + "learning_rate": 4.725201072386059e-05, + "loss": 0.522, + "step": 255 + }, + { + "epoch": 0.68, + "learning_rate": 4.723860589812333e-05, + "loss": 0.3922, + "step": 256 + }, + { + "epoch": 0.68, + "learning_rate": 4.722520107238606e-05, + "loss": 0.5114, + "step": 257 + }, + { + "epoch": 0.68, + "learning_rate": 4.72117962466488e-05, + "loss": 0.6148, + "step": 258 + }, + { + "epoch": 0.69, + "learning_rate": 4.719839142091153e-05, + "loss": 0.4578, + "step": 259 + }, + { + "epoch": 0.69, + "learning_rate": 4.7184986595174265e-05, + "loss": 0.6286, + "step": 260 + }, + { + "epoch": 0.69, + "learning_rate": 4.7171581769437e-05, + "loss": 0.5883, + "step": 261 + }, + { + "epoch": 0.69, + "learning_rate": 4.7158176943699735e-05, + "loss": 0.5634, + "step": 262 + }, + { + "epoch": 0.7, + "learning_rate": 4.714477211796247e-05, + "loss": 0.4085, + "step": 263 + }, + { + "epoch": 0.7, + "learning_rate": 4.7131367292225205e-05, + "loss": 0.2988, + "step": 264 + }, + { + "epoch": 0.7, + "learning_rate": 4.7117962466487934e-05, + "loss": 0.6353, + "step": 265 + }, + { + "epoch": 0.7, + "learning_rate": 4.7104557640750675e-05, + "loss": 0.4598, + "step": 266 + }, + { + "epoch": 0.71, + "learning_rate": 4.7091152815013404e-05, + "loss": 0.5072, + "step": 267 + }, + { + "epoch": 0.71, + "learning_rate": 4.7077747989276146e-05, + "loss": 0.49, + "step": 268 + }, + { + "epoch": 0.71, + "learning_rate": 4.7064343163538874e-05, + "loss": 0.7225, + "step": 269 + }, + { + "epoch": 0.71, + "learning_rate": 4.705093833780161e-05, + "loss": 0.5332, + "step": 270 + }, + { + "epoch": 0.72, + "learning_rate": 4.7037533512064344e-05, + "loss": 0.6064, + "step": 271 + }, + { + "epoch": 0.72, + "learning_rate": 4.702412868632708e-05, + "loss": 0.3518, + "step": 272 + }, + { + "epoch": 0.72, + "learning_rate": 4.7010723860589815e-05, + "loss": 0.3673, + "step": 273 + }, + { + "epoch": 0.72, + "learning_rate": 4.699731903485255e-05, + "loss": 0.4688, + "step": 274 + }, + { + "epoch": 0.73, + "learning_rate": 4.6983914209115285e-05, + "loss": 0.5389, + "step": 275 + }, + { + "epoch": 0.73, + "learning_rate": 4.697050938337802e-05, + "loss": 0.374, + "step": 276 + }, + { + "epoch": 0.73, + "learning_rate": 4.6957104557640755e-05, + "loss": 0.475, + "step": 277 + }, + { + "epoch": 0.74, + "learning_rate": 4.694369973190349e-05, + "loss": 0.5397, + "step": 278 + }, + { + "epoch": 0.74, + "learning_rate": 4.6930294906166225e-05, + "loss": 0.3821, + "step": 279 + }, + { + "epoch": 0.74, + "learning_rate": 4.6916890080428954e-05, + "loss": 0.3372, + "step": 280 + }, + { + "epoch": 0.74, + "learning_rate": 4.6903485254691696e-05, + "loss": 0.6652, + "step": 281 + }, + { + "epoch": 0.75, + "learning_rate": 4.6890080428954424e-05, + "loss": 0.2894, + "step": 282 + }, + { + "epoch": 0.75, + "learning_rate": 4.6876675603217166e-05, + "loss": 0.5639, + "step": 283 + }, + { + "epoch": 0.75, + "learning_rate": 4.6863270777479894e-05, + "loss": 0.353, + "step": 284 + }, + { + "epoch": 0.75, + "learning_rate": 4.684986595174263e-05, + "loss": 0.2932, + "step": 285 + }, + { + "epoch": 0.76, + "learning_rate": 4.6836461126005364e-05, + "loss": 0.467, + "step": 286 + }, + { + "epoch": 0.76, + "learning_rate": 4.68230563002681e-05, + "loss": 0.4732, + "step": 287 + }, + { + "epoch": 0.76, + "learning_rate": 4.6809651474530835e-05, + "loss": 0.1808, + "step": 288 + }, + { + "epoch": 0.76, + "learning_rate": 4.679624664879357e-05, + "loss": 0.6031, + "step": 289 + }, + { + "epoch": 0.77, + "learning_rate": 4.67828418230563e-05, + "loss": 0.2555, + "step": 290 + }, + { + "epoch": 0.77, + "learning_rate": 4.676943699731904e-05, + "loss": 0.4041, + "step": 291 + }, + { + "epoch": 0.77, + "learning_rate": 4.675603217158177e-05, + "loss": 0.7822, + "step": 292 + }, + { + "epoch": 0.78, + "learning_rate": 4.674262734584451e-05, + "loss": 0.138, + "step": 293 + }, + { + "epoch": 0.78, + "learning_rate": 4.672922252010724e-05, + "loss": 0.2746, + "step": 294 + }, + { + "epoch": 0.78, + "learning_rate": 4.6715817694369974e-05, + "loss": 0.5835, + "step": 295 + }, + { + "epoch": 0.78, + "learning_rate": 4.670241286863271e-05, + "loss": 0.2367, + "step": 296 + }, + { + "epoch": 0.79, + "learning_rate": 4.6689008042895444e-05, + "loss": 0.3247, + "step": 297 + }, + { + "epoch": 0.79, + "learning_rate": 4.667560321715818e-05, + "loss": 0.306, + "step": 298 + }, + { + "epoch": 0.79, + "learning_rate": 4.6662198391420914e-05, + "loss": 0.2825, + "step": 299 + }, + { + "epoch": 0.79, + "learning_rate": 4.664879356568364e-05, + "loss": 0.6102, + "step": 300 + }, + { + "epoch": 0.8, + "learning_rate": 4.6635388739946385e-05, + "loss": 0.3613, + "step": 301 + }, + { + "epoch": 0.8, + "learning_rate": 4.662198391420911e-05, + "loss": 0.6327, + "step": 302 + }, + { + "epoch": 0.8, + "learning_rate": 4.6608579088471855e-05, + "loss": 0.4059, + "step": 303 + }, + { + "epoch": 0.8, + "learning_rate": 4.659517426273458e-05, + "loss": 0.4027, + "step": 304 + }, + { + "epoch": 0.81, + "learning_rate": 4.658176943699732e-05, + "loss": 0.9133, + "step": 305 + }, + { + "epoch": 0.81, + "learning_rate": 4.6568364611260054e-05, + "loss": 0.1869, + "step": 306 + }, + { + "epoch": 0.81, + "learning_rate": 4.655495978552279e-05, + "loss": 0.3987, + "step": 307 + }, + { + "epoch": 0.81, + "learning_rate": 4.6541554959785524e-05, + "loss": 0.6114, + "step": 308 + }, + { + "epoch": 0.82, + "learning_rate": 4.652815013404826e-05, + "loss": 0.4406, + "step": 309 + }, + { + "epoch": 0.82, + "learning_rate": 4.6514745308310994e-05, + "loss": 0.2954, + "step": 310 + }, + { + "epoch": 0.82, + "learning_rate": 4.650134048257373e-05, + "loss": 0.5067, + "step": 311 + }, + { + "epoch": 0.83, + "learning_rate": 4.6487935656836464e-05, + "loss": 0.3985, + "step": 312 + }, + { + "epoch": 0.83, + "learning_rate": 4.64745308310992e-05, + "loss": 0.3756, + "step": 313 + }, + { + "epoch": 0.83, + "learning_rate": 4.6461126005361935e-05, + "loss": 0.2618, + "step": 314 + }, + { + "epoch": 0.83, + "learning_rate": 4.644772117962466e-05, + "loss": 0.4992, + "step": 315 + }, + { + "epoch": 0.84, + "learning_rate": 4.6434316353887405e-05, + "loss": 0.8224, + "step": 316 + }, + { + "epoch": 0.84, + "learning_rate": 4.642091152815013e-05, + "loss": 0.3425, + "step": 317 + }, + { + "epoch": 0.84, + "learning_rate": 4.6407506702412875e-05, + "loss": 0.4062, + "step": 318 + }, + { + "epoch": 0.84, + "learning_rate": 4.6394101876675603e-05, + "loss": 0.4748, + "step": 319 + }, + { + "epoch": 0.85, + "learning_rate": 4.638069705093834e-05, + "loss": 0.6857, + "step": 320 + }, + { + "epoch": 0.85, + "learning_rate": 4.6367292225201074e-05, + "loss": 0.5368, + "step": 321 + }, + { + "epoch": 0.85, + "learning_rate": 4.635388739946381e-05, + "loss": 0.5571, + "step": 322 + }, + { + "epoch": 0.85, + "learning_rate": 4.6340482573726544e-05, + "loss": 0.3045, + "step": 323 + }, + { + "epoch": 0.86, + "learning_rate": 4.632707774798928e-05, + "loss": 0.3189, + "step": 324 + }, + { + "epoch": 0.86, + "learning_rate": 4.631367292225201e-05, + "loss": 0.3033, + "step": 325 + }, + { + "epoch": 0.86, + "learning_rate": 4.630026809651475e-05, + "loss": 0.4659, + "step": 326 + }, + { + "epoch": 0.87, + "learning_rate": 4.628686327077748e-05, + "loss": 0.4058, + "step": 327 + }, + { + "epoch": 0.87, + "learning_rate": 4.627345844504022e-05, + "loss": 0.6745, + "step": 328 + }, + { + "epoch": 0.87, + "learning_rate": 4.626005361930295e-05, + "loss": 0.3259, + "step": 329 + }, + { + "epoch": 0.87, + "learning_rate": 4.624664879356568e-05, + "loss": 0.5126, + "step": 330 + }, + { + "epoch": 0.88, + "learning_rate": 4.623324396782842e-05, + "loss": 0.2759, + "step": 331 + }, + { + "epoch": 0.88, + "learning_rate": 4.621983914209115e-05, + "loss": 0.2512, + "step": 332 + }, + { + "epoch": 0.88, + "learning_rate": 4.620643431635389e-05, + "loss": 0.3046, + "step": 333 + }, + { + "epoch": 0.88, + "learning_rate": 4.6193029490616624e-05, + "loss": 0.3931, + "step": 334 + }, + { + "epoch": 0.89, + "learning_rate": 4.617962466487936e-05, + "loss": 0.4838, + "step": 335 + }, + { + "epoch": 0.89, + "learning_rate": 4.6166219839142094e-05, + "loss": 0.2925, + "step": 336 + }, + { + "epoch": 0.89, + "learning_rate": 4.615281501340483e-05, + "loss": 0.4481, + "step": 337 + }, + { + "epoch": 0.89, + "learning_rate": 4.6139410187667564e-05, + "loss": 0.4528, + "step": 338 + }, + { + "epoch": 0.9, + "learning_rate": 4.61260053619303e-05, + "loss": 0.2934, + "step": 339 + }, + { + "epoch": 0.9, + "learning_rate": 4.611260053619303e-05, + "loss": 0.609, + "step": 340 + }, + { + "epoch": 0.9, + "learning_rate": 4.609919571045577e-05, + "loss": 0.8988, + "step": 341 + }, + { + "epoch": 0.9, + "learning_rate": 4.60857908847185e-05, + "loss": 1.1222, + "step": 342 + }, + { + "epoch": 0.91, + "learning_rate": 4.607238605898124e-05, + "loss": 0.3265, + "step": 343 + }, + { + "epoch": 0.91, + "learning_rate": 4.605898123324397e-05, + "loss": 0.4722, + "step": 344 + }, + { + "epoch": 0.91, + "learning_rate": 4.60455764075067e-05, + "loss": 0.2791, + "step": 345 + }, + { + "epoch": 0.92, + "learning_rate": 4.603217158176944e-05, + "loss": 0.4183, + "step": 346 + }, + { + "epoch": 0.92, + "learning_rate": 4.6018766756032174e-05, + "loss": 0.5323, + "step": 347 + }, + { + "epoch": 0.92, + "learning_rate": 4.600536193029491e-05, + "loss": 0.6108, + "step": 348 + }, + { + "epoch": 0.92, + "learning_rate": 4.5991957104557644e-05, + "loss": 0.2875, + "step": 349 + }, + { + "epoch": 0.93, + "learning_rate": 4.597855227882037e-05, + "loss": 0.6642, + "step": 350 + }, + { + "epoch": 0.93, + "learning_rate": 4.5965147453083114e-05, + "loss": 0.5244, + "step": 351 + }, + { + "epoch": 0.93, + "learning_rate": 4.595174262734584e-05, + "loss": 0.5562, + "step": 352 + }, + { + "epoch": 0.93, + "learning_rate": 4.5938337801608584e-05, + "loss": 0.3972, + "step": 353 + }, + { + "epoch": 0.94, + "learning_rate": 4.592493297587131e-05, + "loss": 0.4532, + "step": 354 + }, + { + "epoch": 0.94, + "learning_rate": 4.591152815013405e-05, + "loss": 0.3368, + "step": 355 + }, + { + "epoch": 0.94, + "learning_rate": 4.589812332439678e-05, + "loss": 0.171, + "step": 356 + }, + { + "epoch": 0.94, + "learning_rate": 4.588471849865952e-05, + "loss": 0.4036, + "step": 357 + }, + { + "epoch": 0.95, + "learning_rate": 4.587131367292225e-05, + "loss": 0.4305, + "step": 358 + }, + { + "epoch": 0.95, + "learning_rate": 4.585790884718499e-05, + "loss": 0.2643, + "step": 359 + }, + { + "epoch": 0.95, + "learning_rate": 4.5844504021447723e-05, + "loss": 0.3782, + "step": 360 + }, + { + "epoch": 0.96, + "learning_rate": 4.583109919571046e-05, + "loss": 0.1673, + "step": 361 + }, + { + "epoch": 0.96, + "learning_rate": 4.5817694369973194e-05, + "loss": 0.3969, + "step": 362 + }, + { + "epoch": 0.96, + "learning_rate": 4.580428954423593e-05, + "loss": 0.3249, + "step": 363 + }, + { + "epoch": 0.96, + "learning_rate": 4.5790884718498664e-05, + "loss": 0.1656, + "step": 364 + }, + { + "epoch": 0.97, + "learning_rate": 4.57774798927614e-05, + "loss": 0.4551, + "step": 365 + }, + { + "epoch": 0.97, + "learning_rate": 4.5764075067024134e-05, + "loss": 0.6075, + "step": 366 + }, + { + "epoch": 0.97, + "learning_rate": 4.575067024128686e-05, + "loss": 0.4699, + "step": 367 + }, + { + "epoch": 0.97, + "learning_rate": 4.5737265415549605e-05, + "loss": 0.5752, + "step": 368 + }, + { + "epoch": 0.98, + "learning_rate": 4.572386058981233e-05, + "loss": 0.3114, + "step": 369 + }, + { + "epoch": 0.98, + "learning_rate": 4.5710455764075075e-05, + "loss": 0.7407, + "step": 370 + }, + { + "epoch": 0.98, + "learning_rate": 4.56970509383378e-05, + "loss": 0.6427, + "step": 371 + }, + { + "epoch": 0.98, + "learning_rate": 4.568364611260054e-05, + "loss": 0.5021, + "step": 372 + }, + { + "epoch": 0.99, + "learning_rate": 4.5670241286863273e-05, + "loss": 0.4209, + "step": 373 + }, + { + "epoch": 0.99, + "learning_rate": 4.565683646112601e-05, + "loss": 0.5957, + "step": 374 + }, + { + "epoch": 0.99, + "learning_rate": 4.5643431635388744e-05, + "loss": 0.495, + "step": 375 + }, + { + "epoch": 0.99, + "learning_rate": 4.563002680965148e-05, + "loss": 0.3101, + "step": 376 + }, + { + "epoch": 1.0, + "learning_rate": 4.561662198391421e-05, + "loss": 0.472, + "step": 377 + }, + { + "epoch": 1.0, + "learning_rate": 4.560321715817695e-05, + "loss": 0.4607, + "step": 378 + }, + { + "epoch": 1.0, + "eval_f1": 0.7914963205233032, + "eval_loss": 0.40209120512008667, + "eval_runtime": 2.1585, + "eval_samples_per_second": 700.949, + "eval_steps_per_second": 44.012, + "step": 378 + }, + { + "epoch": 1.0, + "learning_rate": 4.558981233243968e-05, + "loss": 0.2422, + "step": 379 + }, + { + "epoch": 1.01, + "learning_rate": 4.557640750670242e-05, + "loss": 0.2074, + "step": 380 + }, + { + "epoch": 1.01, + "learning_rate": 4.556300268096515e-05, + "loss": 0.4469, + "step": 381 + }, + { + "epoch": 1.01, + "learning_rate": 4.554959785522788e-05, + "loss": 0.3121, + "step": 382 + }, + { + "epoch": 1.01, + "learning_rate": 4.553619302949062e-05, + "loss": 0.3634, + "step": 383 + }, + { + "epoch": 1.02, + "learning_rate": 4.552278820375335e-05, + "loss": 0.2139, + "step": 384 + }, + { + "epoch": 1.02, + "learning_rate": 4.550938337801609e-05, + "loss": 0.5273, + "step": 385 + }, + { + "epoch": 1.02, + "learning_rate": 4.549597855227882e-05, + "loss": 0.1228, + "step": 386 + }, + { + "epoch": 1.02, + "learning_rate": 4.548257372654156e-05, + "loss": 0.2459, + "step": 387 + }, + { + "epoch": 1.03, + "learning_rate": 4.5469168900804294e-05, + "loss": 0.443, + "step": 388 + }, + { + "epoch": 1.03, + "learning_rate": 4.545576407506703e-05, + "loss": 0.364, + "step": 389 + }, + { + "epoch": 1.03, + "learning_rate": 4.5442359249329764e-05, + "loss": 0.4071, + "step": 390 + }, + { + "epoch": 1.03, + "learning_rate": 4.54289544235925e-05, + "loss": 0.3876, + "step": 391 + }, + { + "epoch": 1.04, + "learning_rate": 4.541554959785523e-05, + "loss": 0.5239, + "step": 392 + }, + { + "epoch": 1.04, + "learning_rate": 4.540214477211797e-05, + "loss": 0.4548, + "step": 393 + }, + { + "epoch": 1.04, + "learning_rate": 4.53887399463807e-05, + "loss": 0.3588, + "step": 394 + }, + { + "epoch": 1.04, + "learning_rate": 4.537533512064344e-05, + "loss": 0.2693, + "step": 395 + }, + { + "epoch": 1.05, + "learning_rate": 4.536193029490617e-05, + "loss": 0.3773, + "step": 396 + }, + { + "epoch": 1.05, + "learning_rate": 4.53485254691689e-05, + "loss": 0.3982, + "step": 397 + }, + { + "epoch": 1.05, + "learning_rate": 4.533512064343164e-05, + "loss": 0.2074, + "step": 398 + }, + { + "epoch": 1.06, + "learning_rate": 4.532171581769437e-05, + "loss": 0.3322, + "step": 399 + }, + { + "epoch": 1.06, + "learning_rate": 4.530831099195711e-05, + "loss": 0.118, + "step": 400 + }, + { + "epoch": 1.06, + "learning_rate": 4.5294906166219844e-05, + "loss": 0.6216, + "step": 401 + }, + { + "epoch": 1.06, + "learning_rate": 4.528150134048257e-05, + "loss": 0.4028, + "step": 402 + }, + { + "epoch": 1.07, + "learning_rate": 4.5268096514745314e-05, + "loss": 0.3179, + "step": 403 + }, + { + "epoch": 1.07, + "learning_rate": 4.525469168900804e-05, + "loss": 0.2815, + "step": 404 + }, + { + "epoch": 1.07, + "learning_rate": 4.5241286863270784e-05, + "loss": 0.2366, + "step": 405 + }, + { + "epoch": 1.07, + "learning_rate": 4.522788203753351e-05, + "loss": 0.14, + "step": 406 + }, + { + "epoch": 1.08, + "learning_rate": 4.521447721179625e-05, + "loss": 0.1255, + "step": 407 + }, + { + "epoch": 1.08, + "learning_rate": 4.520107238605898e-05, + "loss": 0.1482, + "step": 408 + }, + { + "epoch": 1.08, + "learning_rate": 4.518766756032172e-05, + "loss": 0.665, + "step": 409 + }, + { + "epoch": 1.08, + "learning_rate": 4.517426273458445e-05, + "loss": 0.2535, + "step": 410 + }, + { + "epoch": 1.09, + "learning_rate": 4.516085790884719e-05, + "loss": 0.1716, + "step": 411 + }, + { + "epoch": 1.09, + "learning_rate": 4.5147453083109916e-05, + "loss": 0.0765, + "step": 412 + }, + { + "epoch": 1.09, + "learning_rate": 4.513404825737266e-05, + "loss": 0.1238, + "step": 413 + }, + { + "epoch": 1.1, + "learning_rate": 4.512064343163539e-05, + "loss": 0.3877, + "step": 414 + }, + { + "epoch": 1.1, + "learning_rate": 4.510723860589813e-05, + "loss": 0.206, + "step": 415 + }, + { + "epoch": 1.1, + "learning_rate": 4.509383378016086e-05, + "loss": 0.1822, + "step": 416 + }, + { + "epoch": 1.1, + "learning_rate": 4.508042895442359e-05, + "loss": 0.2294, + "step": 417 + }, + { + "epoch": 1.11, + "learning_rate": 4.506702412868633e-05, + "loss": 0.7083, + "step": 418 + }, + { + "epoch": 1.11, + "learning_rate": 4.505361930294906e-05, + "loss": 0.325, + "step": 419 + }, + { + "epoch": 1.11, + "learning_rate": 4.50402144772118e-05, + "loss": 0.2378, + "step": 420 + }, + { + "epoch": 1.11, + "learning_rate": 4.502680965147453e-05, + "loss": 0.2494, + "step": 421 + }, + { + "epoch": 1.12, + "learning_rate": 4.501340482573727e-05, + "loss": 0.3812, + "step": 422 + }, + { + "epoch": 1.12, + "learning_rate": 4.5e-05, + "loss": 0.1491, + "step": 423 + }, + { + "epoch": 1.12, + "learning_rate": 4.498659517426274e-05, + "loss": 0.0833, + "step": 424 + }, + { + "epoch": 1.12, + "learning_rate": 4.497319034852547e-05, + "loss": 0.4637, + "step": 425 + }, + { + "epoch": 1.13, + "learning_rate": 4.495978552278821e-05, + "loss": 0.2594, + "step": 426 + }, + { + "epoch": 1.13, + "learning_rate": 4.4946380697050937e-05, + "loss": 0.4136, + "step": 427 + }, + { + "epoch": 1.13, + "learning_rate": 4.493297587131368e-05, + "loss": 0.4962, + "step": 428 + }, + { + "epoch": 1.13, + "learning_rate": 4.491957104557641e-05, + "loss": 0.0842, + "step": 429 + }, + { + "epoch": 1.14, + "learning_rate": 4.490616621983915e-05, + "loss": 0.4761, + "step": 430 + }, + { + "epoch": 1.14, + "learning_rate": 4.489276139410188e-05, + "loss": 0.4351, + "step": 431 + }, + { + "epoch": 1.14, + "learning_rate": 4.487935656836461e-05, + "loss": 0.4226, + "step": 432 + }, + { + "epoch": 1.15, + "learning_rate": 4.486595174262735e-05, + "loss": 0.5714, + "step": 433 + }, + { + "epoch": 1.15, + "learning_rate": 4.485254691689008e-05, + "loss": 0.2034, + "step": 434 + }, + { + "epoch": 1.15, + "learning_rate": 4.483914209115282e-05, + "loss": 0.3354, + "step": 435 + }, + { + "epoch": 1.15, + "learning_rate": 4.482573726541555e-05, + "loss": 0.7503, + "step": 436 + }, + { + "epoch": 1.16, + "learning_rate": 4.481233243967828e-05, + "loss": 0.1542, + "step": 437 + }, + { + "epoch": 1.16, + "learning_rate": 4.479892761394102e-05, + "loss": 0.3067, + "step": 438 + }, + { + "epoch": 1.16, + "learning_rate": 4.478552278820375e-05, + "loss": 0.4134, + "step": 439 + }, + { + "epoch": 1.16, + "learning_rate": 4.477211796246649e-05, + "loss": 0.1458, + "step": 440 + }, + { + "epoch": 1.17, + "learning_rate": 4.475871313672922e-05, + "loss": 0.2814, + "step": 441 + }, + { + "epoch": 1.17, + "learning_rate": 4.474530831099196e-05, + "loss": 0.1751, + "step": 442 + }, + { + "epoch": 1.17, + "learning_rate": 4.473190348525469e-05, + "loss": 0.4144, + "step": 443 + }, + { + "epoch": 1.17, + "learning_rate": 4.471849865951743e-05, + "loss": 0.3275, + "step": 444 + }, + { + "epoch": 1.18, + "learning_rate": 4.470509383378016e-05, + "loss": 0.3083, + "step": 445 + }, + { + "epoch": 1.18, + "learning_rate": 4.46916890080429e-05, + "loss": 0.4185, + "step": 446 + }, + { + "epoch": 1.18, + "learning_rate": 4.467828418230563e-05, + "loss": 0.3181, + "step": 447 + }, + { + "epoch": 1.19, + "learning_rate": 4.466487935656837e-05, + "loss": 0.438, + "step": 448 + }, + { + "epoch": 1.19, + "learning_rate": 4.46514745308311e-05, + "loss": 0.4509, + "step": 449 + }, + { + "epoch": 1.19, + "learning_rate": 4.463806970509384e-05, + "loss": 0.4597, + "step": 450 + }, + { + "epoch": 1.19, + "learning_rate": 4.462466487935657e-05, + "loss": 0.3123, + "step": 451 + }, + { + "epoch": 1.2, + "learning_rate": 4.46112600536193e-05, + "loss": 0.376, + "step": 452 + }, + { + "epoch": 1.2, + "learning_rate": 4.459785522788204e-05, + "loss": 0.2392, + "step": 453 + }, + { + "epoch": 1.2, + "learning_rate": 4.458445040214477e-05, + "loss": 0.4451, + "step": 454 + }, + { + "epoch": 1.2, + "learning_rate": 4.4571045576407513e-05, + "loss": 0.2436, + "step": 455 + }, + { + "epoch": 1.21, + "learning_rate": 4.455764075067024e-05, + "loss": 0.2944, + "step": 456 + }, + { + "epoch": 1.21, + "learning_rate": 4.454423592493298e-05, + "loss": 0.323, + "step": 457 + }, + { + "epoch": 1.21, + "learning_rate": 4.453083109919571e-05, + "loss": 0.1966, + "step": 458 + }, + { + "epoch": 1.21, + "learning_rate": 4.451742627345845e-05, + "loss": 0.273, + "step": 459 + }, + { + "epoch": 1.22, + "learning_rate": 4.450402144772118e-05, + "loss": 0.3498, + "step": 460 + }, + { + "epoch": 1.22, + "learning_rate": 4.449061662198392e-05, + "loss": 0.458, + "step": 461 + }, + { + "epoch": 1.22, + "learning_rate": 4.4477211796246646e-05, + "loss": 0.1607, + "step": 462 + }, + { + "epoch": 1.22, + "learning_rate": 4.446380697050939e-05, + "loss": 0.5712, + "step": 463 + }, + { + "epoch": 1.23, + "learning_rate": 4.4450402144772116e-05, + "loss": 0.0908, + "step": 464 + }, + { + "epoch": 1.23, + "learning_rate": 4.443699731903486e-05, + "loss": 0.2625, + "step": 465 + }, + { + "epoch": 1.23, + "learning_rate": 4.4423592493297586e-05, + "loss": 0.368, + "step": 466 + }, + { + "epoch": 1.24, + "learning_rate": 4.441018766756032e-05, + "loss": 0.5096, + "step": 467 + }, + { + "epoch": 1.24, + "learning_rate": 4.4396782841823057e-05, + "loss": 0.209, + "step": 468 + }, + { + "epoch": 1.24, + "learning_rate": 4.438337801608579e-05, + "loss": 0.5235, + "step": 469 + }, + { + "epoch": 1.24, + "learning_rate": 4.436997319034853e-05, + "loss": 0.5116, + "step": 470 + }, + { + "epoch": 1.25, + "learning_rate": 4.435656836461126e-05, + "loss": 0.49, + "step": 471 + }, + { + "epoch": 1.25, + "learning_rate": 4.4343163538874e-05, + "loss": 0.4409, + "step": 472 + }, + { + "epoch": 1.25, + "learning_rate": 4.432975871313673e-05, + "loss": 0.1324, + "step": 473 + }, + { + "epoch": 1.25, + "learning_rate": 4.431635388739947e-05, + "loss": 0.1257, + "step": 474 + }, + { + "epoch": 1.26, + "learning_rate": 4.43029490616622e-05, + "loss": 0.3844, + "step": 475 + }, + { + "epoch": 1.26, + "learning_rate": 4.428954423592494e-05, + "loss": 0.2942, + "step": 476 + }, + { + "epoch": 1.26, + "learning_rate": 4.4276139410187666e-05, + "loss": 0.316, + "step": 477 + }, + { + "epoch": 1.26, + "learning_rate": 4.426273458445041e-05, + "loss": 0.3724, + "step": 478 + }, + { + "epoch": 1.27, + "learning_rate": 4.4249329758713136e-05, + "loss": 0.4498, + "step": 479 + }, + { + "epoch": 1.27, + "learning_rate": 4.423592493297588e-05, + "loss": 0.2677, + "step": 480 + }, + { + "epoch": 1.27, + "learning_rate": 4.4222520107238607e-05, + "loss": 0.3331, + "step": 481 + }, + { + "epoch": 1.28, + "learning_rate": 4.420911528150134e-05, + "loss": 0.4375, + "step": 482 + }, + { + "epoch": 1.28, + "learning_rate": 4.419571045576408e-05, + "loss": 0.3097, + "step": 483 + }, + { + "epoch": 1.28, + "learning_rate": 4.418230563002681e-05, + "loss": 0.4438, + "step": 484 + }, + { + "epoch": 1.28, + "learning_rate": 4.416890080428955e-05, + "loss": 0.3648, + "step": 485 + }, + { + "epoch": 1.29, + "learning_rate": 4.415549597855228e-05, + "loss": 0.3023, + "step": 486 + }, + { + "epoch": 1.29, + "learning_rate": 4.414209115281502e-05, + "loss": 0.2963, + "step": 487 + }, + { + "epoch": 1.29, + "learning_rate": 4.412868632707775e-05, + "loss": 0.1749, + "step": 488 + }, + { + "epoch": 1.29, + "learning_rate": 4.411528150134048e-05, + "loss": 0.2006, + "step": 489 + }, + { + "epoch": 1.3, + "learning_rate": 4.410187667560322e-05, + "loss": 0.0923, + "step": 490 + }, + { + "epoch": 1.3, + "learning_rate": 4.408847184986595e-05, + "loss": 0.2665, + "step": 491 + }, + { + "epoch": 1.3, + "learning_rate": 4.407506702412869e-05, + "loss": 0.3378, + "step": 492 + }, + { + "epoch": 1.3, + "learning_rate": 4.406166219839142e-05, + "loss": 0.2543, + "step": 493 + }, + { + "epoch": 1.31, + "learning_rate": 4.4048257372654156e-05, + "loss": 0.2521, + "step": 494 + }, + { + "epoch": 1.31, + "learning_rate": 4.403485254691689e-05, + "loss": 0.1616, + "step": 495 + }, + { + "epoch": 1.31, + "learning_rate": 4.402144772117963e-05, + "loss": 0.1445, + "step": 496 + }, + { + "epoch": 1.31, + "learning_rate": 4.400804289544236e-05, + "loss": 0.5352, + "step": 497 + }, + { + "epoch": 1.32, + "learning_rate": 4.39946380697051e-05, + "loss": 0.0654, + "step": 498 + }, + { + "epoch": 1.32, + "learning_rate": 4.398123324396783e-05, + "loss": 0.4354, + "step": 499 + }, + { + "epoch": 1.32, + "learning_rate": 4.396782841823057e-05, + "loss": 0.3893, + "step": 500 + }, + { + "epoch": 1.33, + "learning_rate": 4.39544235924933e-05, + "loss": 0.2779, + "step": 501 + }, + { + "epoch": 1.33, + "learning_rate": 4.394101876675604e-05, + "loss": 0.3702, + "step": 502 + }, + { + "epoch": 1.33, + "learning_rate": 4.392761394101877e-05, + "loss": 0.3899, + "step": 503 + }, + { + "epoch": 1.33, + "learning_rate": 4.39142091152815e-05, + "loss": 0.3091, + "step": 504 + }, + { + "epoch": 1.34, + "learning_rate": 4.390080428954424e-05, + "loss": 0.2143, + "step": 505 + }, + { + "epoch": 1.34, + "learning_rate": 4.388739946380697e-05, + "loss": 0.2551, + "step": 506 + }, + { + "epoch": 1.34, + "learning_rate": 4.387399463806971e-05, + "loss": 0.2227, + "step": 507 + }, + { + "epoch": 1.34, + "learning_rate": 4.386058981233244e-05, + "loss": 0.1383, + "step": 508 + }, + { + "epoch": 1.35, + "learning_rate": 4.384718498659518e-05, + "loss": 0.3463, + "step": 509 + }, + { + "epoch": 1.35, + "learning_rate": 4.383378016085791e-05, + "loss": 0.1104, + "step": 510 + }, + { + "epoch": 1.35, + "learning_rate": 4.382037533512065e-05, + "loss": 0.2439, + "step": 511 + }, + { + "epoch": 1.35, + "learning_rate": 4.380697050938338e-05, + "loss": 0.0725, + "step": 512 + }, + { + "epoch": 1.36, + "learning_rate": 4.379356568364612e-05, + "loss": 0.4928, + "step": 513 + }, + { + "epoch": 1.36, + "learning_rate": 4.3780160857908846e-05, + "loss": 0.0903, + "step": 514 + }, + { + "epoch": 1.36, + "learning_rate": 4.376675603217159e-05, + "loss": 0.1808, + "step": 515 + }, + { + "epoch": 1.37, + "learning_rate": 4.3753351206434316e-05, + "loss": 0.617, + "step": 516 + }, + { + "epoch": 1.37, + "learning_rate": 4.373994638069706e-05, + "loss": 0.0333, + "step": 517 + }, + { + "epoch": 1.37, + "learning_rate": 4.3726541554959786e-05, + "loss": 0.1113, + "step": 518 + }, + { + "epoch": 1.37, + "learning_rate": 4.371313672922252e-05, + "loss": 0.4604, + "step": 519 + }, + { + "epoch": 1.38, + "learning_rate": 4.3699731903485256e-05, + "loss": 0.4267, + "step": 520 + }, + { + "epoch": 1.38, + "learning_rate": 4.368632707774799e-05, + "loss": 0.2621, + "step": 521 + }, + { + "epoch": 1.38, + "learning_rate": 4.3672922252010727e-05, + "loss": 0.3236, + "step": 522 + }, + { + "epoch": 1.38, + "learning_rate": 4.365951742627346e-05, + "loss": 0.0559, + "step": 523 + }, + { + "epoch": 1.39, + "learning_rate": 4.364611260053619e-05, + "loss": 0.0801, + "step": 524 + }, + { + "epoch": 1.39, + "learning_rate": 4.363270777479893e-05, + "loss": 0.0518, + "step": 525 + }, + { + "epoch": 1.39, + "learning_rate": 4.361930294906166e-05, + "loss": 0.0618, + "step": 526 + }, + { + "epoch": 1.39, + "learning_rate": 4.36058981233244e-05, + "loss": 0.3043, + "step": 527 + }, + { + "epoch": 1.4, + "learning_rate": 4.359249329758713e-05, + "loss": 0.4693, + "step": 528 + }, + { + "epoch": 1.4, + "learning_rate": 4.3579088471849866e-05, + "loss": 0.2978, + "step": 529 + }, + { + "epoch": 1.4, + "learning_rate": 4.35656836461126e-05, + "loss": 0.0817, + "step": 530 + }, + { + "epoch": 1.4, + "learning_rate": 4.3552278820375336e-05, + "loss": 0.0784, + "step": 531 + }, + { + "epoch": 1.41, + "learning_rate": 4.353887399463807e-05, + "loss": 0.5389, + "step": 532 + }, + { + "epoch": 1.41, + "learning_rate": 4.3525469168900806e-05, + "loss": 0.2337, + "step": 533 + }, + { + "epoch": 1.41, + "learning_rate": 4.351206434316354e-05, + "loss": 0.3189, + "step": 534 + }, + { + "epoch": 1.42, + "learning_rate": 4.3498659517426276e-05, + "loss": 0.2503, + "step": 535 + }, + { + "epoch": 1.42, + "learning_rate": 4.348525469168901e-05, + "loss": 0.333, + "step": 536 + }, + { + "epoch": 1.42, + "learning_rate": 4.347184986595175e-05, + "loss": 0.2311, + "step": 537 + }, + { + "epoch": 1.42, + "learning_rate": 4.345844504021448e-05, + "loss": 0.5196, + "step": 538 + }, + { + "epoch": 1.43, + "learning_rate": 4.344504021447721e-05, + "loss": 0.5466, + "step": 539 + }, + { + "epoch": 1.43, + "learning_rate": 4.343163538873995e-05, + "loss": 0.4296, + "step": 540 + }, + { + "epoch": 1.43, + "learning_rate": 4.341823056300268e-05, + "loss": 0.6559, + "step": 541 + }, + { + "epoch": 1.43, + "learning_rate": 4.340482573726542e-05, + "loss": 0.6134, + "step": 542 + }, + { + "epoch": 1.44, + "learning_rate": 4.339142091152815e-05, + "loss": 0.5614, + "step": 543 + }, + { + "epoch": 1.44, + "learning_rate": 4.3378016085790886e-05, + "loss": 0.3143, + "step": 544 + }, + { + "epoch": 1.44, + "learning_rate": 4.336461126005362e-05, + "loss": 0.564, + "step": 545 + }, + { + "epoch": 1.44, + "learning_rate": 4.3351206434316356e-05, + "loss": 0.2141, + "step": 546 + }, + { + "epoch": 1.45, + "learning_rate": 4.333780160857909e-05, + "loss": 0.5886, + "step": 547 + }, + { + "epoch": 1.45, + "learning_rate": 4.3324396782841826e-05, + "loss": 0.2129, + "step": 548 + }, + { + "epoch": 1.45, + "learning_rate": 4.3310991957104555e-05, + "loss": 0.2491, + "step": 549 + }, + { + "epoch": 1.46, + "learning_rate": 4.32975871313673e-05, + "loss": 0.4023, + "step": 550 + }, + { + "epoch": 1.46, + "learning_rate": 4.3284182305630025e-05, + "loss": 0.2159, + "step": 551 + }, + { + "epoch": 1.46, + "learning_rate": 4.327077747989277e-05, + "loss": 0.2975, + "step": 552 + }, + { + "epoch": 1.46, + "learning_rate": 4.3257372654155495e-05, + "loss": 0.165, + "step": 553 + }, + { + "epoch": 1.47, + "learning_rate": 4.324396782841823e-05, + "loss": 0.063, + "step": 554 + }, + { + "epoch": 1.47, + "learning_rate": 4.3230563002680966e-05, + "loss": 0.4209, + "step": 555 + }, + { + "epoch": 1.47, + "learning_rate": 4.32171581769437e-05, + "loss": 0.2704, + "step": 556 + }, + { + "epoch": 1.47, + "learning_rate": 4.3203753351206436e-05, + "loss": 0.6455, + "step": 557 + }, + { + "epoch": 1.48, + "learning_rate": 4.319034852546917e-05, + "loss": 0.3354, + "step": 558 + }, + { + "epoch": 1.48, + "learning_rate": 4.3176943699731906e-05, + "loss": 0.4069, + "step": 559 + }, + { + "epoch": 1.48, + "learning_rate": 4.316353887399464e-05, + "loss": 0.2334, + "step": 560 + }, + { + "epoch": 1.48, + "learning_rate": 4.3150134048257376e-05, + "loss": 0.4192, + "step": 561 + }, + { + "epoch": 1.49, + "learning_rate": 4.313672922252011e-05, + "loss": 0.2942, + "step": 562 + }, + { + "epoch": 1.49, + "learning_rate": 4.3123324396782847e-05, + "loss": 0.2503, + "step": 563 + }, + { + "epoch": 1.49, + "learning_rate": 4.3109919571045575e-05, + "loss": 0.15, + "step": 564 + }, + { + "epoch": 1.49, + "learning_rate": 4.309651474530832e-05, + "loss": 0.6895, + "step": 565 + }, + { + "epoch": 1.5, + "learning_rate": 4.3083109919571045e-05, + "loss": 0.2462, + "step": 566 + }, + { + "epoch": 1.5, + "learning_rate": 4.306970509383379e-05, + "loss": 0.142, + "step": 567 + }, + { + "epoch": 1.5, + "learning_rate": 4.3056300268096515e-05, + "loss": 0.4838, + "step": 568 + }, + { + "epoch": 1.51, + "learning_rate": 4.304289544235925e-05, + "loss": 0.4714, + "step": 569 + }, + { + "epoch": 1.51, + "learning_rate": 4.3029490616621986e-05, + "loss": 0.4916, + "step": 570 + }, + { + "epoch": 1.51, + "learning_rate": 4.301608579088472e-05, + "loss": 0.2001, + "step": 571 + }, + { + "epoch": 1.51, + "learning_rate": 4.3002680965147456e-05, + "loss": 0.3897, + "step": 572 + }, + { + "epoch": 1.52, + "learning_rate": 4.298927613941019e-05, + "loss": 0.1723, + "step": 573 + }, + { + "epoch": 1.52, + "learning_rate": 4.297587131367292e-05, + "loss": 0.4095, + "step": 574 + }, + { + "epoch": 1.52, + "learning_rate": 4.296246648793566e-05, + "loss": 0.2615, + "step": 575 + }, + { + "epoch": 1.52, + "learning_rate": 4.294906166219839e-05, + "loss": 0.221, + "step": 576 + }, + { + "epoch": 1.53, + "learning_rate": 4.293565683646113e-05, + "loss": 0.1373, + "step": 577 + }, + { + "epoch": 1.53, + "learning_rate": 4.292225201072386e-05, + "loss": 0.313, + "step": 578 + }, + { + "epoch": 1.53, + "learning_rate": 4.2908847184986595e-05, + "loss": 0.4608, + "step": 579 + }, + { + "epoch": 1.53, + "learning_rate": 4.289544235924933e-05, + "loss": 0.2894, + "step": 580 + }, + { + "epoch": 1.54, + "learning_rate": 4.2882037533512065e-05, + "loss": 0.5123, + "step": 581 + }, + { + "epoch": 1.54, + "learning_rate": 4.28686327077748e-05, + "loss": 0.3287, + "step": 582 + }, + { + "epoch": 1.54, + "learning_rate": 4.2855227882037536e-05, + "loss": 0.4634, + "step": 583 + }, + { + "epoch": 1.54, + "learning_rate": 4.284182305630027e-05, + "loss": 0.0753, + "step": 584 + }, + { + "epoch": 1.55, + "learning_rate": 4.2828418230563006e-05, + "loss": 0.5287, + "step": 585 + }, + { + "epoch": 1.55, + "learning_rate": 4.281501340482574e-05, + "loss": 0.5483, + "step": 586 + }, + { + "epoch": 1.55, + "learning_rate": 4.2801608579088476e-05, + "loss": 0.1076, + "step": 587 + }, + { + "epoch": 1.56, + "learning_rate": 4.278820375335121e-05, + "loss": 0.232, + "step": 588 + }, + { + "epoch": 1.56, + "learning_rate": 4.277479892761394e-05, + "loss": 0.4706, + "step": 589 + }, + { + "epoch": 1.56, + "learning_rate": 4.276139410187668e-05, + "loss": 0.5791, + "step": 590 + }, + { + "epoch": 1.56, + "learning_rate": 4.274798927613941e-05, + "loss": 0.4995, + "step": 591 + }, + { + "epoch": 1.57, + "learning_rate": 4.273458445040215e-05, + "loss": 0.3419, + "step": 592 + }, + { + "epoch": 1.57, + "learning_rate": 4.272117962466488e-05, + "loss": 0.2728, + "step": 593 + }, + { + "epoch": 1.57, + "learning_rate": 4.2707774798927615e-05, + "loss": 0.3598, + "step": 594 + }, + { + "epoch": 1.57, + "learning_rate": 4.269436997319035e-05, + "loss": 0.2669, + "step": 595 + }, + { + "epoch": 1.58, + "learning_rate": 4.2680965147453086e-05, + "loss": 0.4091, + "step": 596 + }, + { + "epoch": 1.58, + "learning_rate": 4.266756032171582e-05, + "loss": 0.7653, + "step": 597 + }, + { + "epoch": 1.58, + "learning_rate": 4.2654155495978556e-05, + "loss": 0.4323, + "step": 598 + }, + { + "epoch": 1.58, + "learning_rate": 4.2640750670241284e-05, + "loss": 0.2147, + "step": 599 + }, + { + "epoch": 1.59, + "learning_rate": 4.2627345844504026e-05, + "loss": 0.2229, + "step": 600 + }, + { + "epoch": 1.59, + "learning_rate": 4.2613941018766754e-05, + "loss": 0.1477, + "step": 601 + }, + { + "epoch": 1.59, + "learning_rate": 4.2600536193029496e-05, + "loss": 0.4813, + "step": 602 + }, + { + "epoch": 1.6, + "learning_rate": 4.2587131367292225e-05, + "loss": 0.2719, + "step": 603 + }, + { + "epoch": 1.6, + "learning_rate": 4.257372654155496e-05, + "loss": 0.1577, + "step": 604 + }, + { + "epoch": 1.6, + "learning_rate": 4.2560321715817695e-05, + "loss": 0.5718, + "step": 605 + }, + { + "epoch": 1.6, + "learning_rate": 4.254691689008043e-05, + "loss": 0.1481, + "step": 606 + }, + { + "epoch": 1.61, + "learning_rate": 4.2533512064343165e-05, + "loss": 0.3548, + "step": 607 + }, + { + "epoch": 1.61, + "learning_rate": 4.25201072386059e-05, + "loss": 0.1907, + "step": 608 + }, + { + "epoch": 1.61, + "learning_rate": 4.250670241286863e-05, + "loss": 0.4215, + "step": 609 + }, + { + "epoch": 1.61, + "learning_rate": 4.249329758713137e-05, + "loss": 0.0681, + "step": 610 + }, + { + "epoch": 1.62, + "learning_rate": 4.24798927613941e-05, + "loss": 0.514, + "step": 611 + }, + { + "epoch": 1.62, + "learning_rate": 4.246648793565684e-05, + "loss": 0.1242, + "step": 612 + }, + { + "epoch": 1.62, + "learning_rate": 4.245308310991957e-05, + "loss": 0.217, + "step": 613 + }, + { + "epoch": 1.62, + "learning_rate": 4.243967828418231e-05, + "loss": 0.1641, + "step": 614 + }, + { + "epoch": 1.63, + "learning_rate": 4.242627345844504e-05, + "loss": 0.7095, + "step": 615 + }, + { + "epoch": 1.63, + "learning_rate": 4.2412868632707775e-05, + "loss": 0.359, + "step": 616 + }, + { + "epoch": 1.63, + "learning_rate": 4.239946380697051e-05, + "loss": 0.94, + "step": 617 + }, + { + "epoch": 1.63, + "learning_rate": 4.2386058981233245e-05, + "loss": 0.5194, + "step": 618 + }, + { + "epoch": 1.64, + "learning_rate": 4.237265415549598e-05, + "loss": 0.3398, + "step": 619 + }, + { + "epoch": 1.64, + "learning_rate": 4.2359249329758715e-05, + "loss": 0.2305, + "step": 620 + }, + { + "epoch": 1.64, + "learning_rate": 4.234584450402145e-05, + "loss": 0.6045, + "step": 621 + }, + { + "epoch": 1.65, + "learning_rate": 4.2332439678284185e-05, + "loss": 0.3196, + "step": 622 + }, + { + "epoch": 1.65, + "learning_rate": 4.231903485254692e-05, + "loss": 0.0753, + "step": 623 + }, + { + "epoch": 1.65, + "learning_rate": 4.2305630026809656e-05, + "loss": 0.2732, + "step": 624 + }, + { + "epoch": 1.65, + "learning_rate": 4.229222520107239e-05, + "loss": 0.4372, + "step": 625 + }, + { + "epoch": 1.66, + "learning_rate": 4.227882037533512e-05, + "loss": 0.1299, + "step": 626 + }, + { + "epoch": 1.66, + "learning_rate": 4.226541554959786e-05, + "loss": 0.2928, + "step": 627 + }, + { + "epoch": 1.66, + "learning_rate": 4.225201072386059e-05, + "loss": 0.2028, + "step": 628 + }, + { + "epoch": 1.66, + "learning_rate": 4.223860589812333e-05, + "loss": 0.2725, + "step": 629 + }, + { + "epoch": 1.67, + "learning_rate": 4.222520107238606e-05, + "loss": 0.0851, + "step": 630 + }, + { + "epoch": 1.67, + "learning_rate": 4.2211796246648795e-05, + "loss": 0.1471, + "step": 631 + }, + { + "epoch": 1.67, + "learning_rate": 4.219839142091153e-05, + "loss": 0.1986, + "step": 632 + }, + { + "epoch": 1.67, + "learning_rate": 4.2184986595174265e-05, + "loss": 0.5156, + "step": 633 + }, + { + "epoch": 1.68, + "learning_rate": 4.2171581769437e-05, + "loss": 0.5286, + "step": 634 + }, + { + "epoch": 1.68, + "learning_rate": 4.2158176943699735e-05, + "loss": 0.3635, + "step": 635 + }, + { + "epoch": 1.68, + "learning_rate": 4.2144772117962464e-05, + "loss": 0.1407, + "step": 636 + }, + { + "epoch": 1.69, + "learning_rate": 4.2131367292225206e-05, + "loss": 0.1042, + "step": 637 + }, + { + "epoch": 1.69, + "learning_rate": 4.2117962466487934e-05, + "loss": 0.1553, + "step": 638 + }, + { + "epoch": 1.69, + "learning_rate": 4.2104557640750676e-05, + "loss": 0.1665, + "step": 639 + }, + { + "epoch": 1.69, + "learning_rate": 4.2091152815013404e-05, + "loss": 0.3706, + "step": 640 + }, + { + "epoch": 1.7, + "learning_rate": 4.207774798927614e-05, + "loss": 0.6195, + "step": 641 + }, + { + "epoch": 1.7, + "learning_rate": 4.2064343163538874e-05, + "loss": 0.1341, + "step": 642 + }, + { + "epoch": 1.7, + "learning_rate": 4.205093833780161e-05, + "loss": 0.5384, + "step": 643 + }, + { + "epoch": 1.7, + "learning_rate": 4.2037533512064345e-05, + "loss": 0.2802, + "step": 644 + }, + { + "epoch": 1.71, + "learning_rate": 4.202412868632708e-05, + "loss": 0.3812, + "step": 645 + }, + { + "epoch": 1.71, + "learning_rate": 4.2010723860589815e-05, + "loss": 0.2433, + "step": 646 + }, + { + "epoch": 1.71, + "learning_rate": 4.199731903485255e-05, + "loss": 0.0642, + "step": 647 + }, + { + "epoch": 1.71, + "learning_rate": 4.1983914209115285e-05, + "loss": 0.0547, + "step": 648 + }, + { + "epoch": 1.72, + "learning_rate": 4.197050938337802e-05, + "loss": 0.4388, + "step": 649 + }, + { + "epoch": 1.72, + "learning_rate": 4.1957104557640756e-05, + "loss": 0.8228, + "step": 650 + }, + { + "epoch": 1.72, + "learning_rate": 4.1943699731903484e-05, + "loss": 0.6453, + "step": 651 + }, + { + "epoch": 1.72, + "learning_rate": 4.1930294906166226e-05, + "loss": 0.3367, + "step": 652 + }, + { + "epoch": 1.73, + "learning_rate": 4.1916890080428954e-05, + "loss": 0.2139, + "step": 653 + }, + { + "epoch": 1.73, + "learning_rate": 4.1903485254691696e-05, + "loss": 0.2144, + "step": 654 + }, + { + "epoch": 1.73, + "learning_rate": 4.1890080428954424e-05, + "loss": 0.3894, + "step": 655 + }, + { + "epoch": 1.74, + "learning_rate": 4.187667560321716e-05, + "loss": 0.0891, + "step": 656 + }, + { + "epoch": 1.74, + "learning_rate": 4.1863270777479895e-05, + "loss": 0.1287, + "step": 657 + }, + { + "epoch": 1.74, + "learning_rate": 4.184986595174263e-05, + "loss": 0.2826, + "step": 658 + }, + { + "epoch": 1.74, + "learning_rate": 4.1836461126005365e-05, + "loss": 0.474, + "step": 659 + }, + { + "epoch": 1.75, + "learning_rate": 4.18230563002681e-05, + "loss": 0.4228, + "step": 660 + }, + { + "epoch": 1.75, + "learning_rate": 4.180965147453083e-05, + "loss": 0.4952, + "step": 661 + }, + { + "epoch": 1.75, + "learning_rate": 4.179624664879357e-05, + "loss": 0.173, + "step": 662 + }, + { + "epoch": 1.75, + "learning_rate": 4.17828418230563e-05, + "loss": 0.363, + "step": 663 + }, + { + "epoch": 1.76, + "learning_rate": 4.176943699731904e-05, + "loss": 0.4404, + "step": 664 + }, + { + "epoch": 1.76, + "learning_rate": 4.175603217158177e-05, + "loss": 0.486, + "step": 665 + }, + { + "epoch": 1.76, + "learning_rate": 4.1742627345844504e-05, + "loss": 0.4463, + "step": 666 + }, + { + "epoch": 1.76, + "learning_rate": 4.172922252010724e-05, + "loss": 0.2409, + "step": 667 + }, + { + "epoch": 1.77, + "learning_rate": 4.1715817694369974e-05, + "loss": 0.5291, + "step": 668 + }, + { + "epoch": 1.77, + "learning_rate": 4.170241286863271e-05, + "loss": 0.069, + "step": 669 + }, + { + "epoch": 1.77, + "learning_rate": 4.1689008042895445e-05, + "loss": 0.4162, + "step": 670 + }, + { + "epoch": 1.78, + "learning_rate": 4.167560321715818e-05, + "loss": 0.6171, + "step": 671 + }, + { + "epoch": 1.78, + "learning_rate": 4.1662198391420915e-05, + "loss": 0.3097, + "step": 672 + }, + { + "epoch": 1.78, + "learning_rate": 4.164879356568365e-05, + "loss": 0.5109, + "step": 673 + }, + { + "epoch": 1.78, + "learning_rate": 4.1635388739946385e-05, + "loss": 0.2169, + "step": 674 + }, + { + "epoch": 1.79, + "learning_rate": 4.162198391420912e-05, + "loss": 0.2406, + "step": 675 + }, + { + "epoch": 1.79, + "learning_rate": 4.160857908847185e-05, + "loss": 0.1853, + "step": 676 + }, + { + "epoch": 1.79, + "learning_rate": 4.159517426273459e-05, + "loss": 0.5743, + "step": 677 + }, + { + "epoch": 1.79, + "learning_rate": 4.158176943699732e-05, + "loss": 0.5432, + "step": 678 + }, + { + "epoch": 1.8, + "learning_rate": 4.156836461126006e-05, + "loss": 0.2033, + "step": 679 + }, + { + "epoch": 1.8, + "learning_rate": 4.155495978552279e-05, + "loss": 0.3848, + "step": 680 + }, + { + "epoch": 1.8, + "learning_rate": 4.1541554959785524e-05, + "loss": 0.1721, + "step": 681 + }, + { + "epoch": 1.8, + "learning_rate": 4.152815013404826e-05, + "loss": 0.3793, + "step": 682 + }, + { + "epoch": 1.81, + "learning_rate": 4.1514745308310994e-05, + "loss": 0.3848, + "step": 683 + }, + { + "epoch": 1.81, + "learning_rate": 4.150134048257373e-05, + "loss": 0.186, + "step": 684 + }, + { + "epoch": 1.81, + "learning_rate": 4.1487935656836465e-05, + "loss": 0.2692, + "step": 685 + }, + { + "epoch": 1.81, + "learning_rate": 4.147453083109919e-05, + "loss": 0.3839, + "step": 686 + }, + { + "epoch": 1.82, + "learning_rate": 4.1461126005361935e-05, + "loss": 0.3037, + "step": 687 + }, + { + "epoch": 1.82, + "learning_rate": 4.144772117962466e-05, + "loss": 0.6401, + "step": 688 + }, + { + "epoch": 1.82, + "learning_rate": 4.1434316353887405e-05, + "loss": 0.1173, + "step": 689 + }, + { + "epoch": 1.83, + "learning_rate": 4.1420911528150134e-05, + "loss": 0.3217, + "step": 690 + }, + { + "epoch": 1.83, + "learning_rate": 4.140750670241287e-05, + "loss": 0.2358, + "step": 691 + }, + { + "epoch": 1.83, + "learning_rate": 4.1394101876675604e-05, + "loss": 0.7696, + "step": 692 + }, + { + "epoch": 1.83, + "learning_rate": 4.138069705093834e-05, + "loss": 0.2288, + "step": 693 + }, + { + "epoch": 1.84, + "learning_rate": 4.1367292225201074e-05, + "loss": 0.2575, + "step": 694 + }, + { + "epoch": 1.84, + "learning_rate": 4.135388739946381e-05, + "loss": 0.1201, + "step": 695 + }, + { + "epoch": 1.84, + "learning_rate": 4.1340482573726544e-05, + "loss": 0.2034, + "step": 696 + }, + { + "epoch": 1.84, + "learning_rate": 4.132707774798928e-05, + "loss": 0.1142, + "step": 697 + }, + { + "epoch": 1.85, + "learning_rate": 4.1313672922252015e-05, + "loss": 0.5671, + "step": 698 + }, + { + "epoch": 1.85, + "learning_rate": 4.130026809651475e-05, + "loss": 0.3132, + "step": 699 + }, + { + "epoch": 1.85, + "learning_rate": 4.1286863270777485e-05, + "loss": 0.4266, + "step": 700 + }, + { + "epoch": 1.85, + "learning_rate": 4.127345844504021e-05, + "loss": 0.1354, + "step": 701 + }, + { + "epoch": 1.86, + "learning_rate": 4.1260053619302955e-05, + "loss": 0.2867, + "step": 702 + }, + { + "epoch": 1.86, + "learning_rate": 4.1246648793565684e-05, + "loss": 0.1839, + "step": 703 + }, + { + "epoch": 1.86, + "learning_rate": 4.1233243967828425e-05, + "loss": 0.4741, + "step": 704 + }, + { + "epoch": 1.87, + "learning_rate": 4.1219839142091154e-05, + "loss": 0.2909, + "step": 705 + }, + { + "epoch": 1.87, + "learning_rate": 4.120643431635389e-05, + "loss": 0.2705, + "step": 706 + }, + { + "epoch": 1.87, + "learning_rate": 4.1193029490616624e-05, + "loss": 0.1354, + "step": 707 + }, + { + "epoch": 1.87, + "learning_rate": 4.117962466487936e-05, + "loss": 0.4801, + "step": 708 + }, + { + "epoch": 1.88, + "learning_rate": 4.1166219839142094e-05, + "loss": 0.189, + "step": 709 + }, + { + "epoch": 1.88, + "learning_rate": 4.115281501340483e-05, + "loss": 0.3204, + "step": 710 + }, + { + "epoch": 1.88, + "learning_rate": 4.113941018766756e-05, + "loss": 0.4358, + "step": 711 + }, + { + "epoch": 1.88, + "learning_rate": 4.11260053619303e-05, + "loss": 0.9474, + "step": 712 + }, + { + "epoch": 1.89, + "learning_rate": 4.111260053619303e-05, + "loss": 0.2102, + "step": 713 + }, + { + "epoch": 1.89, + "learning_rate": 4.109919571045577e-05, + "loss": 0.3927, + "step": 714 + }, + { + "epoch": 1.89, + "learning_rate": 4.10857908847185e-05, + "loss": 0.139, + "step": 715 + }, + { + "epoch": 1.89, + "learning_rate": 4.1072386058981233e-05, + "loss": 0.3575, + "step": 716 + }, + { + "epoch": 1.9, + "learning_rate": 4.105898123324397e-05, + "loss": 0.7534, + "step": 717 + }, + { + "epoch": 1.9, + "learning_rate": 4.1045576407506704e-05, + "loss": 0.1134, + "step": 718 + }, + { + "epoch": 1.9, + "learning_rate": 4.103217158176944e-05, + "loss": 0.2136, + "step": 719 + }, + { + "epoch": 1.9, + "learning_rate": 4.1018766756032174e-05, + "loss": 0.4344, + "step": 720 + }, + { + "epoch": 1.91, + "learning_rate": 4.10053619302949e-05, + "loss": 0.0695, + "step": 721 + }, + { + "epoch": 1.91, + "learning_rate": 4.0991957104557644e-05, + "loss": 0.2286, + "step": 722 + }, + { + "epoch": 1.91, + "learning_rate": 4.097855227882037e-05, + "loss": 0.1189, + "step": 723 + }, + { + "epoch": 1.92, + "learning_rate": 4.0965147453083115e-05, + "loss": 0.2882, + "step": 724 + }, + { + "epoch": 1.92, + "learning_rate": 4.095174262734584e-05, + "loss": 0.2623, + "step": 725 + }, + { + "epoch": 1.92, + "learning_rate": 4.093833780160858e-05, + "loss": 0.2473, + "step": 726 + }, + { + "epoch": 1.92, + "learning_rate": 4.092493297587131e-05, + "loss": 0.4846, + "step": 727 + }, + { + "epoch": 1.93, + "learning_rate": 4.091152815013405e-05, + "loss": 0.1689, + "step": 728 + }, + { + "epoch": 1.93, + "learning_rate": 4.0898123324396783e-05, + "loss": 0.3481, + "step": 729 + }, + { + "epoch": 1.93, + "learning_rate": 4.088471849865952e-05, + "loss": 0.3447, + "step": 730 + }, + { + "epoch": 1.93, + "learning_rate": 4.0871313672922254e-05, + "loss": 0.2959, + "step": 731 + }, + { + "epoch": 1.94, + "learning_rate": 4.085790884718499e-05, + "loss": 0.3387, + "step": 732 + }, + { + "epoch": 1.94, + "learning_rate": 4.0844504021447724e-05, + "loss": 0.3742, + "step": 733 + }, + { + "epoch": 1.94, + "learning_rate": 4.083109919571046e-05, + "loss": 0.3245, + "step": 734 + }, + { + "epoch": 1.94, + "learning_rate": 4.0817694369973194e-05, + "loss": 0.4891, + "step": 735 + }, + { + "epoch": 1.95, + "learning_rate": 4.080428954423593e-05, + "loss": 0.1444, + "step": 736 + }, + { + "epoch": 1.95, + "learning_rate": 4.0790884718498664e-05, + "loss": 0.3678, + "step": 737 + }, + { + "epoch": 1.95, + "learning_rate": 4.077747989276139e-05, + "loss": 0.3772, + "step": 738 + }, + { + "epoch": 1.96, + "learning_rate": 4.0764075067024135e-05, + "loss": 0.43, + "step": 739 + }, + { + "epoch": 1.96, + "learning_rate": 4.075067024128686e-05, + "loss": 0.2463, + "step": 740 + }, + { + "epoch": 1.96, + "learning_rate": 4.0737265415549605e-05, + "loss": 0.2277, + "step": 741 + }, + { + "epoch": 1.96, + "learning_rate": 4.072386058981233e-05, + "loss": 0.2153, + "step": 742 + }, + { + "epoch": 1.97, + "learning_rate": 4.071045576407507e-05, + "loss": 0.1052, + "step": 743 + }, + { + "epoch": 1.97, + "learning_rate": 4.0697050938337804e-05, + "loss": 0.5657, + "step": 744 + }, + { + "epoch": 1.97, + "learning_rate": 4.068364611260054e-05, + "loss": 0.2664, + "step": 745 + }, + { + "epoch": 1.97, + "learning_rate": 4.0670241286863274e-05, + "loss": 0.1369, + "step": 746 + }, + { + "epoch": 1.98, + "learning_rate": 4.065683646112601e-05, + "loss": 0.4972, + "step": 747 + }, + { + "epoch": 1.98, + "learning_rate": 4.064343163538874e-05, + "loss": 0.4131, + "step": 748 + }, + { + "epoch": 1.98, + "learning_rate": 4.063002680965148e-05, + "loss": 0.3824, + "step": 749 + }, + { + "epoch": 1.98, + "learning_rate": 4.061662198391421e-05, + "loss": 0.323, + "step": 750 + }, + { + "epoch": 1.99, + "learning_rate": 4.060321715817695e-05, + "loss": 0.3698, + "step": 751 + }, + { + "epoch": 1.99, + "learning_rate": 4.058981233243968e-05, + "loss": 0.1708, + "step": 752 + }, + { + "epoch": 1.99, + "learning_rate": 4.057640750670241e-05, + "loss": 0.2941, + "step": 753 + }, + { + "epoch": 1.99, + "learning_rate": 4.056300268096515e-05, + "loss": 0.3224, + "step": 754 + }, + { + "epoch": 2.0, + "learning_rate": 4.054959785522788e-05, + "loss": 0.0851, + "step": 755 + }, + { + "epoch": 2.0, + "learning_rate": 4.053619302949062e-05, + "loss": 0.4694, + "step": 756 + }, + { + "epoch": 2.0, + "eval_f1": 0.7882736156351792, + "eval_loss": 0.4484867751598358, + "eval_runtime": 1.8734, + "eval_samples_per_second": 807.638, + "eval_steps_per_second": 50.711, + "step": 756 + }, + { + "epoch": 2.0, + "learning_rate": 4.0522788203753354e-05, + "loss": 0.4739, + "step": 757 + }, + { + "epoch": 2.01, + "learning_rate": 4.050938337801609e-05, + "loss": 0.4117, + "step": 758 + }, + { + "epoch": 2.01, + "learning_rate": 4.0495978552278824e-05, + "loss": 0.2169, + "step": 759 + }, + { + "epoch": 2.01, + "learning_rate": 4.048257372654156e-05, + "loss": 0.1848, + "step": 760 + }, + { + "epoch": 2.01, + "learning_rate": 4.0469168900804294e-05, + "loss": 0.5066, + "step": 761 + }, + { + "epoch": 2.02, + "learning_rate": 4.045576407506703e-05, + "loss": 0.1784, + "step": 762 + }, + { + "epoch": 2.02, + "learning_rate": 4.044235924932976e-05, + "loss": 0.3869, + "step": 763 + }, + { + "epoch": 2.02, + "learning_rate": 4.04289544235925e-05, + "loss": 0.1132, + "step": 764 + }, + { + "epoch": 2.02, + "learning_rate": 4.041554959785523e-05, + "loss": 0.2724, + "step": 765 + }, + { + "epoch": 2.03, + "learning_rate": 4.040214477211797e-05, + "loss": 0.0983, + "step": 766 + }, + { + "epoch": 2.03, + "learning_rate": 4.03887399463807e-05, + "loss": 0.1831, + "step": 767 + }, + { + "epoch": 2.03, + "learning_rate": 4.037533512064343e-05, + "loss": 0.1954, + "step": 768 + }, + { + "epoch": 2.03, + "learning_rate": 4.036193029490617e-05, + "loss": 0.4738, + "step": 769 + }, + { + "epoch": 2.04, + "learning_rate": 4.0348525469168903e-05, + "loss": 0.3375, + "step": 770 + }, + { + "epoch": 2.04, + "learning_rate": 4.033512064343164e-05, + "loss": 0.3991, + "step": 771 + }, + { + "epoch": 2.04, + "learning_rate": 4.0321715817694374e-05, + "loss": 0.0696, + "step": 772 + }, + { + "epoch": 2.04, + "learning_rate": 4.03083109919571e-05, + "loss": 0.0982, + "step": 773 + }, + { + "epoch": 2.05, + "learning_rate": 4.0294906166219844e-05, + "loss": 0.2236, + "step": 774 + }, + { + "epoch": 2.05, + "learning_rate": 4.028150134048257e-05, + "loss": 0.4225, + "step": 775 + }, + { + "epoch": 2.05, + "learning_rate": 4.0268096514745314e-05, + "loss": 0.0583, + "step": 776 + }, + { + "epoch": 2.06, + "learning_rate": 4.025469168900804e-05, + "loss": 0.1643, + "step": 777 + }, + { + "epoch": 2.06, + "learning_rate": 4.024128686327078e-05, + "loss": 0.0351, + "step": 778 + }, + { + "epoch": 2.06, + "learning_rate": 4.022788203753351e-05, + "loss": 0.4496, + "step": 779 + }, + { + "epoch": 2.06, + "learning_rate": 4.021447721179625e-05, + "loss": 0.0372, + "step": 780 + }, + { + "epoch": 2.07, + "learning_rate": 4.020107238605898e-05, + "loss": 0.4198, + "step": 781 + }, + { + "epoch": 2.07, + "learning_rate": 4.018766756032172e-05, + "loss": 0.1968, + "step": 782 + }, + { + "epoch": 2.07, + "learning_rate": 4.017426273458445e-05, + "loss": 0.515, + "step": 783 + }, + { + "epoch": 2.07, + "learning_rate": 4.016085790884719e-05, + "loss": 0.408, + "step": 784 + }, + { + "epoch": 2.08, + "learning_rate": 4.0147453083109924e-05, + "loss": 0.3693, + "step": 785 + }, + { + "epoch": 2.08, + "learning_rate": 4.013404825737266e-05, + "loss": 0.0561, + "step": 786 + }, + { + "epoch": 2.08, + "learning_rate": 4.0120643431635394e-05, + "loss": 0.4011, + "step": 787 + }, + { + "epoch": 2.08, + "learning_rate": 4.010723860589812e-05, + "loss": 0.0872, + "step": 788 + }, + { + "epoch": 2.09, + "learning_rate": 4.0093833780160864e-05, + "loss": 0.0768, + "step": 789 + }, + { + "epoch": 2.09, + "learning_rate": 4.008042895442359e-05, + "loss": 0.0184, + "step": 790 + }, + { + "epoch": 2.09, + "learning_rate": 4.0067024128686334e-05, + "loss": 0.3287, + "step": 791 + }, + { + "epoch": 2.1, + "learning_rate": 4.005361930294906e-05, + "loss": 0.0262, + "step": 792 + }, + { + "epoch": 2.1, + "learning_rate": 4.00402144772118e-05, + "loss": 0.0248, + "step": 793 + }, + { + "epoch": 2.1, + "learning_rate": 4.002680965147453e-05, + "loss": 0.3853, + "step": 794 + }, + { + "epoch": 2.1, + "learning_rate": 4.001340482573727e-05, + "loss": 0.3512, + "step": 795 + }, + { + "epoch": 2.11, + "learning_rate": 4e-05, + "loss": 0.4188, + "step": 796 + }, + { + "epoch": 2.11, + "learning_rate": 3.998659517426274e-05, + "loss": 0.1834, + "step": 797 + }, + { + "epoch": 2.11, + "learning_rate": 3.997319034852547e-05, + "loss": 0.2074, + "step": 798 + }, + { + "epoch": 2.11, + "learning_rate": 3.995978552278821e-05, + "loss": 0.7317, + "step": 799 + }, + { + "epoch": 2.12, + "learning_rate": 3.994638069705094e-05, + "loss": 0.3534, + "step": 800 + }, + { + "epoch": 2.12, + "learning_rate": 3.993297587131368e-05, + "loss": 0.3184, + "step": 801 + }, + { + "epoch": 2.12, + "learning_rate": 3.991957104557641e-05, + "loss": 0.1088, + "step": 802 + }, + { + "epoch": 2.12, + "learning_rate": 3.990616621983914e-05, + "loss": 0.0429, + "step": 803 + }, + { + "epoch": 2.13, + "learning_rate": 3.989276139410188e-05, + "loss": 0.4518, + "step": 804 + }, + { + "epoch": 2.13, + "learning_rate": 3.987935656836461e-05, + "loss": 0.1746, + "step": 805 + }, + { + "epoch": 2.13, + "learning_rate": 3.986595174262735e-05, + "loss": 0.1881, + "step": 806 + }, + { + "epoch": 2.13, + "learning_rate": 3.985254691689008e-05, + "loss": 0.4111, + "step": 807 + }, + { + "epoch": 2.14, + "learning_rate": 3.983914209115281e-05, + "loss": 0.059, + "step": 808 + }, + { + "epoch": 2.14, + "learning_rate": 3.982573726541555e-05, + "loss": 0.0495, + "step": 809 + }, + { + "epoch": 2.14, + "learning_rate": 3.981233243967828e-05, + "loss": 0.1134, + "step": 810 + }, + { + "epoch": 2.15, + "learning_rate": 3.9798927613941023e-05, + "loss": 0.5469, + "step": 811 + }, + { + "epoch": 2.15, + "learning_rate": 3.978552278820375e-05, + "loss": 0.0581, + "step": 812 + }, + { + "epoch": 2.15, + "learning_rate": 3.977211796246649e-05, + "loss": 0.1254, + "step": 813 + }, + { + "epoch": 2.15, + "learning_rate": 3.975871313672922e-05, + "loss": 0.0679, + "step": 814 + }, + { + "epoch": 2.16, + "learning_rate": 3.974530831099196e-05, + "loss": 0.0463, + "step": 815 + }, + { + "epoch": 2.16, + "learning_rate": 3.973190348525469e-05, + "loss": 0.33, + "step": 816 + }, + { + "epoch": 2.16, + "learning_rate": 3.971849865951743e-05, + "loss": 0.2931, + "step": 817 + }, + { + "epoch": 2.16, + "learning_rate": 3.970509383378016e-05, + "loss": 0.1034, + "step": 818 + }, + { + "epoch": 2.17, + "learning_rate": 3.96916890080429e-05, + "loss": 0.0379, + "step": 819 + }, + { + "epoch": 2.17, + "learning_rate": 3.967828418230563e-05, + "loss": 0.0456, + "step": 820 + }, + { + "epoch": 2.17, + "learning_rate": 3.966487935656837e-05, + "loss": 0.4862, + "step": 821 + }, + { + "epoch": 2.17, + "learning_rate": 3.96514745308311e-05, + "loss": 0.0512, + "step": 822 + }, + { + "epoch": 2.18, + "learning_rate": 3.963806970509383e-05, + "loss": 0.0879, + "step": 823 + }, + { + "epoch": 2.18, + "learning_rate": 3.962466487935657e-05, + "loss": 0.3664, + "step": 824 + }, + { + "epoch": 2.18, + "learning_rate": 3.96112600536193e-05, + "loss": 0.0975, + "step": 825 + }, + { + "epoch": 2.19, + "learning_rate": 3.9597855227882044e-05, + "loss": 0.162, + "step": 826 + }, + { + "epoch": 2.19, + "learning_rate": 3.958445040214477e-05, + "loss": 0.1076, + "step": 827 + }, + { + "epoch": 2.19, + "learning_rate": 3.957104557640751e-05, + "loss": 0.1185, + "step": 828 + }, + { + "epoch": 2.19, + "learning_rate": 3.955764075067024e-05, + "loss": 0.0868, + "step": 829 + }, + { + "epoch": 2.2, + "learning_rate": 3.954423592493298e-05, + "loss": 0.0246, + "step": 830 + }, + { + "epoch": 2.2, + "learning_rate": 3.953083109919571e-05, + "loss": 0.2233, + "step": 831 + }, + { + "epoch": 2.2, + "learning_rate": 3.951742627345845e-05, + "loss": 0.2113, + "step": 832 + }, + { + "epoch": 2.2, + "learning_rate": 3.9504021447721176e-05, + "loss": 0.0587, + "step": 833 + }, + { + "epoch": 2.21, + "learning_rate": 3.949061662198392e-05, + "loss": 0.0521, + "step": 834 + }, + { + "epoch": 2.21, + "learning_rate": 3.9477211796246646e-05, + "loss": 0.3845, + "step": 835 + }, + { + "epoch": 2.21, + "learning_rate": 3.946380697050939e-05, + "loss": 0.1096, + "step": 836 + }, + { + "epoch": 2.21, + "learning_rate": 3.9450402144772117e-05, + "loss": 0.1488, + "step": 837 + }, + { + "epoch": 2.22, + "learning_rate": 3.943699731903485e-05, + "loss": 0.1937, + "step": 838 + }, + { + "epoch": 2.22, + "learning_rate": 3.942359249329759e-05, + "loss": 0.1309, + "step": 839 + }, + { + "epoch": 2.22, + "learning_rate": 3.941018766756032e-05, + "loss": 0.3271, + "step": 840 + }, + { + "epoch": 2.22, + "learning_rate": 3.939678284182306e-05, + "loss": 0.3318, + "step": 841 + }, + { + "epoch": 2.23, + "learning_rate": 3.938337801608579e-05, + "loss": 0.3516, + "step": 842 + }, + { + "epoch": 2.23, + "learning_rate": 3.936997319034853e-05, + "loss": 0.1641, + "step": 843 + }, + { + "epoch": 2.23, + "learning_rate": 3.935656836461126e-05, + "loss": 0.064, + "step": 844 + }, + { + "epoch": 2.24, + "learning_rate": 3.9343163538874e-05, + "loss": 0.1971, + "step": 845 + }, + { + "epoch": 2.24, + "learning_rate": 3.932975871313673e-05, + "loss": 0.1166, + "step": 846 + }, + { + "epoch": 2.24, + "learning_rate": 3.931635388739947e-05, + "loss": 0.0384, + "step": 847 + }, + { + "epoch": 2.24, + "learning_rate": 3.9302949061662196e-05, + "loss": 0.0462, + "step": 848 + }, + { + "epoch": 2.25, + "learning_rate": 3.928954423592494e-05, + "loss": 0.1073, + "step": 849 + }, + { + "epoch": 2.25, + "learning_rate": 3.9276139410187666e-05, + "loss": 0.0227, + "step": 850 + }, + { + "epoch": 2.25, + "learning_rate": 3.926273458445041e-05, + "loss": 0.0683, + "step": 851 + }, + { + "epoch": 2.25, + "learning_rate": 3.924932975871314e-05, + "loss": 0.7962, + "step": 852 + }, + { + "epoch": 2.26, + "learning_rate": 3.923592493297587e-05, + "loss": 0.0635, + "step": 853 + }, + { + "epoch": 2.26, + "learning_rate": 3.922252010723861e-05, + "loss": 0.0454, + "step": 854 + }, + { + "epoch": 2.26, + "learning_rate": 3.920911528150134e-05, + "loss": 0.5389, + "step": 855 + }, + { + "epoch": 2.26, + "learning_rate": 3.919571045576408e-05, + "loss": 0.1181, + "step": 856 + }, + { + "epoch": 2.27, + "learning_rate": 3.918230563002681e-05, + "loss": 0.1372, + "step": 857 + }, + { + "epoch": 2.27, + "learning_rate": 3.916890080428954e-05, + "loss": 0.3192, + "step": 858 + }, + { + "epoch": 2.27, + "learning_rate": 3.915549597855228e-05, + "loss": 0.3419, + "step": 859 + }, + { + "epoch": 2.28, + "learning_rate": 3.914209115281501e-05, + "loss": 0.0114, + "step": 860 + }, + { + "epoch": 2.28, + "learning_rate": 3.912868632707775e-05, + "loss": 0.1905, + "step": 861 + }, + { + "epoch": 2.28, + "learning_rate": 3.911528150134048e-05, + "loss": 0.0218, + "step": 862 + }, + { + "epoch": 2.28, + "learning_rate": 3.910187667560322e-05, + "loss": 0.4374, + "step": 863 + }, + { + "epoch": 2.29, + "learning_rate": 3.908847184986595e-05, + "loss": 0.1844, + "step": 864 + }, + { + "epoch": 2.29, + "learning_rate": 3.907506702412869e-05, + "loss": 0.2427, + "step": 865 + }, + { + "epoch": 2.29, + "learning_rate": 3.906166219839142e-05, + "loss": 0.2749, + "step": 866 + }, + { + "epoch": 2.29, + "learning_rate": 3.904825737265416e-05, + "loss": 0.2089, + "step": 867 + }, + { + "epoch": 2.3, + "learning_rate": 3.903485254691689e-05, + "loss": 0.06, + "step": 868 + }, + { + "epoch": 2.3, + "learning_rate": 3.902144772117963e-05, + "loss": 0.1951, + "step": 869 + }, + { + "epoch": 2.3, + "learning_rate": 3.900804289544236e-05, + "loss": 0.0252, + "step": 870 + }, + { + "epoch": 2.3, + "learning_rate": 3.89946380697051e-05, + "loss": 0.0299, + "step": 871 + }, + { + "epoch": 2.31, + "learning_rate": 3.898123324396783e-05, + "loss": 0.0298, + "step": 872 + }, + { + "epoch": 2.31, + "learning_rate": 3.896782841823057e-05, + "loss": 0.5186, + "step": 873 + }, + { + "epoch": 2.31, + "learning_rate": 3.89544235924933e-05, + "loss": 0.2704, + "step": 874 + }, + { + "epoch": 2.31, + "learning_rate": 3.894101876675603e-05, + "loss": 0.2435, + "step": 875 + }, + { + "epoch": 2.32, + "learning_rate": 3.892761394101877e-05, + "loss": 0.039, + "step": 876 + }, + { + "epoch": 2.32, + "learning_rate": 3.89142091152815e-05, + "loss": 0.0275, + "step": 877 + }, + { + "epoch": 2.32, + "learning_rate": 3.890080428954424e-05, + "loss": 0.1164, + "step": 878 + }, + { + "epoch": 2.33, + "learning_rate": 3.888739946380697e-05, + "loss": 0.1551, + "step": 879 + }, + { + "epoch": 2.33, + "learning_rate": 3.887399463806971e-05, + "loss": 0.0215, + "step": 880 + }, + { + "epoch": 2.33, + "learning_rate": 3.886058981233244e-05, + "loss": 0.0379, + "step": 881 + }, + { + "epoch": 2.33, + "learning_rate": 3.884718498659518e-05, + "loss": 0.0553, + "step": 882 + }, + { + "epoch": 2.34, + "learning_rate": 3.883378016085791e-05, + "loss": 0.1073, + "step": 883 + }, + { + "epoch": 2.34, + "learning_rate": 3.882037533512065e-05, + "loss": 0.3525, + "step": 884 + }, + { + "epoch": 2.34, + "learning_rate": 3.8806970509383376e-05, + "loss": 0.2646, + "step": 885 + }, + { + "epoch": 2.34, + "learning_rate": 3.879356568364612e-05, + "loss": 0.5758, + "step": 886 + }, + { + "epoch": 2.35, + "learning_rate": 3.8780160857908846e-05, + "loss": 0.9312, + "step": 887 + }, + { + "epoch": 2.35, + "learning_rate": 3.876675603217159e-05, + "loss": 0.2748, + "step": 888 + }, + { + "epoch": 2.35, + "learning_rate": 3.8753351206434316e-05, + "loss": 0.33, + "step": 889 + }, + { + "epoch": 2.35, + "learning_rate": 3.873994638069705e-05, + "loss": 0.0312, + "step": 890 + }, + { + "epoch": 2.36, + "learning_rate": 3.8726541554959786e-05, + "loss": 0.0449, + "step": 891 + }, + { + "epoch": 2.36, + "learning_rate": 3.871313672922252e-05, + "loss": 0.1197, + "step": 892 + }, + { + "epoch": 2.36, + "learning_rate": 3.869973190348526e-05, + "loss": 0.0913, + "step": 893 + }, + { + "epoch": 2.37, + "learning_rate": 3.868632707774799e-05, + "loss": 0.0284, + "step": 894 + }, + { + "epoch": 2.37, + "learning_rate": 3.867292225201073e-05, + "loss": 0.3769, + "step": 895 + }, + { + "epoch": 2.37, + "learning_rate": 3.865951742627346e-05, + "loss": 0.0947, + "step": 896 + }, + { + "epoch": 2.37, + "learning_rate": 3.86461126005362e-05, + "loss": 0.4282, + "step": 897 + }, + { + "epoch": 2.38, + "learning_rate": 3.863270777479893e-05, + "loss": 0.0049, + "step": 898 + }, + { + "epoch": 2.38, + "learning_rate": 3.861930294906167e-05, + "loss": 0.3632, + "step": 899 + }, + { + "epoch": 2.38, + "learning_rate": 3.8605898123324396e-05, + "loss": 0.0421, + "step": 900 + }, + { + "epoch": 2.38, + "learning_rate": 3.859249329758714e-05, + "loss": 0.5793, + "step": 901 + }, + { + "epoch": 2.39, + "learning_rate": 3.8579088471849866e-05, + "loss": 0.1695, + "step": 902 + }, + { + "epoch": 2.39, + "learning_rate": 3.856568364611261e-05, + "loss": 0.3082, + "step": 903 + }, + { + "epoch": 2.39, + "learning_rate": 3.8552278820375336e-05, + "loss": 0.0151, + "step": 904 + }, + { + "epoch": 2.39, + "learning_rate": 3.853887399463807e-05, + "loss": 0.3463, + "step": 905 + }, + { + "epoch": 2.4, + "learning_rate": 3.852546916890081e-05, + "loss": 0.4573, + "step": 906 + }, + { + "epoch": 2.4, + "learning_rate": 3.851206434316354e-05, + "loss": 0.1281, + "step": 907 + }, + { + "epoch": 2.4, + "learning_rate": 3.849865951742628e-05, + "loss": 0.3168, + "step": 908 + }, + { + "epoch": 2.4, + "learning_rate": 3.848525469168901e-05, + "loss": 0.0331, + "step": 909 + }, + { + "epoch": 2.41, + "learning_rate": 3.847184986595174e-05, + "loss": 0.1825, + "step": 910 + }, + { + "epoch": 2.41, + "learning_rate": 3.845844504021448e-05, + "loss": 0.6238, + "step": 911 + }, + { + "epoch": 2.41, + "learning_rate": 3.844504021447721e-05, + "loss": 0.0663, + "step": 912 + }, + { + "epoch": 2.42, + "learning_rate": 3.843163538873995e-05, + "loss": 0.2827, + "step": 913 + }, + { + "epoch": 2.42, + "learning_rate": 3.841823056300268e-05, + "loss": 0.0133, + "step": 914 + }, + { + "epoch": 2.42, + "learning_rate": 3.8404825737265416e-05, + "loss": 0.0415, + "step": 915 + }, + { + "epoch": 2.42, + "learning_rate": 3.839142091152815e-05, + "loss": 0.2969, + "step": 916 + }, + { + "epoch": 2.43, + "learning_rate": 3.8378016085790886e-05, + "loss": 0.0145, + "step": 917 + }, + { + "epoch": 2.43, + "learning_rate": 3.836461126005362e-05, + "loss": 0.0223, + "step": 918 + }, + { + "epoch": 2.43, + "learning_rate": 3.8351206434316357e-05, + "loss": 1.042, + "step": 919 + }, + { + "epoch": 2.43, + "learning_rate": 3.8337801608579085e-05, + "loss": 0.5061, + "step": 920 + }, + { + "epoch": 2.44, + "learning_rate": 3.832439678284183e-05, + "loss": 0.406, + "step": 921 + }, + { + "epoch": 2.44, + "learning_rate": 3.8310991957104555e-05, + "loss": 0.1337, + "step": 922 + }, + { + "epoch": 2.44, + "learning_rate": 3.82975871313673e-05, + "loss": 0.2057, + "step": 923 + }, + { + "epoch": 2.44, + "learning_rate": 3.8284182305630025e-05, + "loss": 0.0987, + "step": 924 + }, + { + "epoch": 2.45, + "learning_rate": 3.827077747989276e-05, + "loss": 0.4611, + "step": 925 + }, + { + "epoch": 2.45, + "learning_rate": 3.8257372654155496e-05, + "loss": 0.0152, + "step": 926 + }, + { + "epoch": 2.45, + "learning_rate": 3.824396782841823e-05, + "loss": 0.4478, + "step": 927 + }, + { + "epoch": 2.46, + "learning_rate": 3.8230563002680966e-05, + "loss": 0.0579, + "step": 928 + }, + { + "epoch": 2.46, + "learning_rate": 3.82171581769437e-05, + "loss": 0.1506, + "step": 929 + }, + { + "epoch": 2.46, + "learning_rate": 3.8203753351206436e-05, + "loss": 0.5514, + "step": 930 + }, + { + "epoch": 2.46, + "learning_rate": 3.819034852546917e-05, + "loss": 0.1505, + "step": 931 + }, + { + "epoch": 2.47, + "learning_rate": 3.8176943699731906e-05, + "loss": 0.1413, + "step": 932 + }, + { + "epoch": 2.47, + "learning_rate": 3.816353887399464e-05, + "loss": 0.4758, + "step": 933 + }, + { + "epoch": 2.47, + "learning_rate": 3.815013404825738e-05, + "loss": 0.0863, + "step": 934 + }, + { + "epoch": 2.47, + "learning_rate": 3.8136729222520105e-05, + "loss": 0.3561, + "step": 935 + }, + { + "epoch": 2.48, + "learning_rate": 3.812332439678285e-05, + "loss": 0.7784, + "step": 936 + }, + { + "epoch": 2.48, + "learning_rate": 3.8109919571045575e-05, + "loss": 0.2243, + "step": 937 + }, + { + "epoch": 2.48, + "learning_rate": 3.809651474530832e-05, + "loss": 0.1013, + "step": 938 + }, + { + "epoch": 2.48, + "learning_rate": 3.8083109919571046e-05, + "loss": 0.132, + "step": 939 + }, + { + "epoch": 2.49, + "learning_rate": 3.806970509383378e-05, + "loss": 0.3971, + "step": 940 + }, + { + "epoch": 2.49, + "learning_rate": 3.8056300268096516e-05, + "loss": 0.0637, + "step": 941 + }, + { + "epoch": 2.49, + "learning_rate": 3.804289544235925e-05, + "loss": 0.5178, + "step": 942 + }, + { + "epoch": 2.49, + "learning_rate": 3.8029490616621986e-05, + "loss": 0.1982, + "step": 943 + }, + { + "epoch": 2.5, + "learning_rate": 3.801608579088472e-05, + "loss": 0.2225, + "step": 944 + }, + { + "epoch": 2.5, + "learning_rate": 3.800268096514745e-05, + "loss": 0.1425, + "step": 945 + }, + { + "epoch": 2.5, + "learning_rate": 3.798927613941019e-05, + "loss": 0.0621, + "step": 946 + }, + { + "epoch": 2.51, + "learning_rate": 3.797587131367292e-05, + "loss": 0.1556, + "step": 947 + }, + { + "epoch": 2.51, + "learning_rate": 3.796246648793566e-05, + "loss": 0.3805, + "step": 948 + }, + { + "epoch": 2.51, + "learning_rate": 3.794906166219839e-05, + "loss": 0.7049, + "step": 949 + }, + { + "epoch": 2.51, + "learning_rate": 3.7935656836461125e-05, + "loss": 0.1055, + "step": 950 + }, + { + "epoch": 2.52, + "learning_rate": 3.792225201072386e-05, + "loss": 0.0489, + "step": 951 + }, + { + "epoch": 2.52, + "learning_rate": 3.7908847184986596e-05, + "loss": 0.1881, + "step": 952 + }, + { + "epoch": 2.52, + "learning_rate": 3.789544235924933e-05, + "loss": 0.0202, + "step": 953 + }, + { + "epoch": 2.52, + "learning_rate": 3.7882037533512066e-05, + "loss": 0.1043, + "step": 954 + }, + { + "epoch": 2.53, + "learning_rate": 3.78686327077748e-05, + "loss": 0.2093, + "step": 955 + }, + { + "epoch": 2.53, + "learning_rate": 3.7855227882037536e-05, + "loss": 0.0395, + "step": 956 + }, + { + "epoch": 2.53, + "learning_rate": 3.784182305630027e-05, + "loss": 0.1459, + "step": 957 + }, + { + "epoch": 2.53, + "learning_rate": 3.7828418230563006e-05, + "loss": 0.0338, + "step": 958 + }, + { + "epoch": 2.54, + "learning_rate": 3.781501340482574e-05, + "loss": 0.4741, + "step": 959 + }, + { + "epoch": 2.54, + "learning_rate": 3.780160857908847e-05, + "loss": 0.2049, + "step": 960 + }, + { + "epoch": 2.54, + "learning_rate": 3.778820375335121e-05, + "loss": 0.309, + "step": 961 + }, + { + "epoch": 2.54, + "learning_rate": 3.777479892761394e-05, + "loss": 0.0253, + "step": 962 + }, + { + "epoch": 2.55, + "learning_rate": 3.776139410187668e-05, + "loss": 0.4832, + "step": 963 + }, + { + "epoch": 2.55, + "learning_rate": 3.774798927613941e-05, + "loss": 0.2111, + "step": 964 + }, + { + "epoch": 2.55, + "learning_rate": 3.7734584450402145e-05, + "loss": 0.1788, + "step": 965 + }, + { + "epoch": 2.56, + "learning_rate": 3.772117962466488e-05, + "loss": 0.5252, + "step": 966 + }, + { + "epoch": 2.56, + "learning_rate": 3.7707774798927616e-05, + "loss": 0.4711, + "step": 967 + }, + { + "epoch": 2.56, + "learning_rate": 3.769436997319035e-05, + "loss": 0.5184, + "step": 968 + }, + { + "epoch": 2.56, + "learning_rate": 3.7680965147453086e-05, + "loss": 0.2164, + "step": 969 + }, + { + "epoch": 2.57, + "learning_rate": 3.7667560321715814e-05, + "loss": 0.5393, + "step": 970 + }, + { + "epoch": 2.57, + "learning_rate": 3.7654155495978556e-05, + "loss": 0.2588, + "step": 971 + }, + { + "epoch": 2.57, + "learning_rate": 3.7640750670241285e-05, + "loss": 0.164, + "step": 972 + }, + { + "epoch": 2.57, + "learning_rate": 3.7627345844504027e-05, + "loss": 0.2896, + "step": 973 + }, + { + "epoch": 2.58, + "learning_rate": 3.7613941018766755e-05, + "loss": 0.039, + "step": 974 + }, + { + "epoch": 2.58, + "learning_rate": 3.760053619302949e-05, + "loss": 0.16, + "step": 975 + }, + { + "epoch": 2.58, + "learning_rate": 3.7587131367292225e-05, + "loss": 0.1832, + "step": 976 + }, + { + "epoch": 2.58, + "learning_rate": 3.757372654155496e-05, + "loss": 0.0812, + "step": 977 + }, + { + "epoch": 2.59, + "learning_rate": 3.7560321715817695e-05, + "loss": 0.1476, + "step": 978 + }, + { + "epoch": 2.59, + "learning_rate": 3.754691689008043e-05, + "loss": 0.1853, + "step": 979 + }, + { + "epoch": 2.59, + "learning_rate": 3.7533512064343166e-05, + "loss": 0.2875, + "step": 980 + }, + { + "epoch": 2.6, + "learning_rate": 3.75201072386059e-05, + "loss": 0.1918, + "step": 981 + }, + { + "epoch": 2.6, + "learning_rate": 3.7506702412868636e-05, + "loss": 0.2445, + "step": 982 + }, + { + "epoch": 2.6, + "learning_rate": 3.749329758713137e-05, + "loss": 0.4653, + "step": 983 + }, + { + "epoch": 2.6, + "learning_rate": 3.7479892761394106e-05, + "loss": 0.0614, + "step": 984 + }, + { + "epoch": 2.61, + "learning_rate": 3.746648793565684e-05, + "loss": 0.2818, + "step": 985 + }, + { + "epoch": 2.61, + "learning_rate": 3.7453083109919576e-05, + "loss": 0.1363, + "step": 986 + }, + { + "epoch": 2.61, + "learning_rate": 3.7439678284182305e-05, + "loss": 0.3244, + "step": 987 + }, + { + "epoch": 2.61, + "learning_rate": 3.742627345844505e-05, + "loss": 0.081, + "step": 988 + }, + { + "epoch": 2.62, + "learning_rate": 3.7412868632707775e-05, + "loss": 0.0488, + "step": 989 + }, + { + "epoch": 2.62, + "learning_rate": 3.739946380697052e-05, + "loss": 0.2057, + "step": 990 + }, + { + "epoch": 2.62, + "learning_rate": 3.7386058981233245e-05, + "loss": 0.2598, + "step": 991 + }, + { + "epoch": 2.62, + "learning_rate": 3.737265415549598e-05, + "loss": 0.1318, + "step": 992 + }, + { + "epoch": 2.63, + "learning_rate": 3.7359249329758716e-05, + "loss": 0.0482, + "step": 993 + }, + { + "epoch": 2.63, + "learning_rate": 3.734584450402145e-05, + "loss": 0.2586, + "step": 994 + }, + { + "epoch": 2.63, + "learning_rate": 3.7332439678284186e-05, + "loss": 0.2533, + "step": 995 + }, + { + "epoch": 2.63, + "learning_rate": 3.731903485254692e-05, + "loss": 0.5292, + "step": 996 + }, + { + "epoch": 2.64, + "learning_rate": 3.730563002680965e-05, + "loss": 0.1677, + "step": 997 + }, + { + "epoch": 2.64, + "learning_rate": 3.729222520107239e-05, + "loss": 0.1869, + "step": 998 + }, + { + "epoch": 2.64, + "learning_rate": 3.727882037533512e-05, + "loss": 0.0645, + "step": 999 + }, + { + "epoch": 2.65, + "learning_rate": 3.726541554959786e-05, + "loss": 0.4107, + "step": 1000 + }, + { + "epoch": 2.65, + "learning_rate": 3.725201072386059e-05, + "loss": 0.0484, + "step": 1001 + }, + { + "epoch": 2.65, + "learning_rate": 3.7238605898123325e-05, + "loss": 0.0813, + "step": 1002 + }, + { + "epoch": 2.65, + "learning_rate": 3.722520107238606e-05, + "loss": 0.2467, + "step": 1003 + }, + { + "epoch": 2.66, + "learning_rate": 3.7211796246648795e-05, + "loss": 0.0324, + "step": 1004 + }, + { + "epoch": 2.66, + "learning_rate": 3.719839142091153e-05, + "loss": 0.0536, + "step": 1005 + }, + { + "epoch": 2.66, + "learning_rate": 3.7184986595174266e-05, + "loss": 0.0399, + "step": 1006 + }, + { + "epoch": 2.66, + "learning_rate": 3.7171581769436994e-05, + "loss": 0.0257, + "step": 1007 + }, + { + "epoch": 2.67, + "learning_rate": 3.7158176943699736e-05, + "loss": 0.0407, + "step": 1008 + }, + { + "epoch": 2.67, + "learning_rate": 3.7144772117962464e-05, + "loss": 0.0173, + "step": 1009 + }, + { + "epoch": 2.67, + "learning_rate": 3.7131367292225206e-05, + "loss": 0.0166, + "step": 1010 + }, + { + "epoch": 2.67, + "learning_rate": 3.7117962466487934e-05, + "loss": 0.1898, + "step": 1011 + }, + { + "epoch": 2.68, + "learning_rate": 3.710455764075067e-05, + "loss": 0.0525, + "step": 1012 + }, + { + "epoch": 2.68, + "learning_rate": 3.7091152815013405e-05, + "loss": 0.043, + "step": 1013 + }, + { + "epoch": 2.68, + "learning_rate": 3.707774798927614e-05, + "loss": 0.3994, + "step": 1014 + }, + { + "epoch": 2.69, + "learning_rate": 3.7064343163538875e-05, + "loss": 0.0372, + "step": 1015 + }, + { + "epoch": 2.69, + "learning_rate": 3.705093833780161e-05, + "loss": 0.2909, + "step": 1016 + }, + { + "epoch": 2.69, + "learning_rate": 3.7037533512064345e-05, + "loss": 0.8221, + "step": 1017 + }, + { + "epoch": 2.69, + "learning_rate": 3.702412868632708e-05, + "loss": 0.0084, + "step": 1018 + }, + { + "epoch": 2.7, + "learning_rate": 3.7010723860589815e-05, + "loss": 0.3058, + "step": 1019 + }, + { + "epoch": 2.7, + "learning_rate": 3.699731903485255e-05, + "loss": 1.0774, + "step": 1020 + }, + { + "epoch": 2.7, + "learning_rate": 3.6983914209115286e-05, + "loss": 0.2018, + "step": 1021 + }, + { + "epoch": 2.7, + "learning_rate": 3.6970509383378014e-05, + "loss": 0.0537, + "step": 1022 + }, + { + "epoch": 2.71, + "learning_rate": 3.6957104557640756e-05, + "loss": 0.444, + "step": 1023 + }, + { + "epoch": 2.71, + "learning_rate": 3.6943699731903484e-05, + "loss": 0.6497, + "step": 1024 + }, + { + "epoch": 2.71, + "learning_rate": 3.6930294906166226e-05, + "loss": 0.0097, + "step": 1025 + }, + { + "epoch": 2.71, + "learning_rate": 3.6916890080428955e-05, + "loss": 0.0082, + "step": 1026 + }, + { + "epoch": 2.72, + "learning_rate": 3.690348525469169e-05, + "loss": 0.0387, + "step": 1027 + }, + { + "epoch": 2.72, + "learning_rate": 3.6890080428954425e-05, + "loss": 0.3969, + "step": 1028 + }, + { + "epoch": 2.72, + "learning_rate": 3.687667560321716e-05, + "loss": 0.0136, + "step": 1029 + }, + { + "epoch": 2.72, + "learning_rate": 3.6863270777479895e-05, + "loss": 0.0099, + "step": 1030 + }, + { + "epoch": 2.73, + "learning_rate": 3.684986595174263e-05, + "loss": 0.3509, + "step": 1031 + }, + { + "epoch": 2.73, + "learning_rate": 3.683646112600536e-05, + "loss": 0.0257, + "step": 1032 + }, + { + "epoch": 2.73, + "learning_rate": 3.68230563002681e-05, + "loss": 0.3158, + "step": 1033 + }, + { + "epoch": 2.74, + "learning_rate": 3.680965147453083e-05, + "loss": 0.0677, + "step": 1034 + }, + { + "epoch": 2.74, + "learning_rate": 3.679624664879357e-05, + "loss": 0.3887, + "step": 1035 + }, + { + "epoch": 2.74, + "learning_rate": 3.67828418230563e-05, + "loss": 0.0714, + "step": 1036 + }, + { + "epoch": 2.74, + "learning_rate": 3.6769436997319034e-05, + "loss": 0.1066, + "step": 1037 + }, + { + "epoch": 2.75, + "learning_rate": 3.675603217158177e-05, + "loss": 0.6238, + "step": 1038 + }, + { + "epoch": 2.75, + "learning_rate": 3.6742627345844504e-05, + "loss": 0.0405, + "step": 1039 + }, + { + "epoch": 2.75, + "learning_rate": 3.672922252010724e-05, + "loss": 0.0223, + "step": 1040 + }, + { + "epoch": 2.75, + "learning_rate": 3.6715817694369975e-05, + "loss": 0.2737, + "step": 1041 + }, + { + "epoch": 2.76, + "learning_rate": 3.670241286863271e-05, + "loss": 0.015, + "step": 1042 + }, + { + "epoch": 2.76, + "learning_rate": 3.6689008042895445e-05, + "loss": 0.1709, + "step": 1043 + }, + { + "epoch": 2.76, + "learning_rate": 3.667560321715818e-05, + "loss": 0.2649, + "step": 1044 + }, + { + "epoch": 2.76, + "learning_rate": 3.6662198391420915e-05, + "loss": 0.1524, + "step": 1045 + }, + { + "epoch": 2.77, + "learning_rate": 3.664879356568365e-05, + "loss": 0.2461, + "step": 1046 + }, + { + "epoch": 2.77, + "learning_rate": 3.663538873994638e-05, + "loss": 0.3425, + "step": 1047 + }, + { + "epoch": 2.77, + "learning_rate": 3.662198391420912e-05, + "loss": 0.2689, + "step": 1048 + }, + { + "epoch": 2.78, + "learning_rate": 3.660857908847185e-05, + "loss": 0.0066, + "step": 1049 + }, + { + "epoch": 2.78, + "learning_rate": 3.659517426273459e-05, + "loss": 0.0328, + "step": 1050 + }, + { + "epoch": 2.78, + "learning_rate": 3.658176943699732e-05, + "loss": 0.1273, + "step": 1051 + }, + { + "epoch": 2.78, + "learning_rate": 3.6568364611260054e-05, + "loss": 0.2346, + "step": 1052 + }, + { + "epoch": 2.79, + "learning_rate": 3.655495978552279e-05, + "loss": 0.0118, + "step": 1053 + }, + { + "epoch": 2.79, + "learning_rate": 3.6541554959785525e-05, + "loss": 0.0287, + "step": 1054 + }, + { + "epoch": 2.79, + "learning_rate": 3.652815013404826e-05, + "loss": 0.264, + "step": 1055 + }, + { + "epoch": 2.79, + "learning_rate": 3.6514745308310995e-05, + "loss": 0.0216, + "step": 1056 + }, + { + "epoch": 2.8, + "learning_rate": 3.650134048257372e-05, + "loss": 0.0261, + "step": 1057 + }, + { + "epoch": 2.8, + "learning_rate": 3.6487935656836465e-05, + "loss": 0.1911, + "step": 1058 + }, + { + "epoch": 2.8, + "learning_rate": 3.6474530831099194e-05, + "loss": 0.029, + "step": 1059 + }, + { + "epoch": 2.8, + "learning_rate": 3.6461126005361935e-05, + "loss": 0.0393, + "step": 1060 + }, + { + "epoch": 2.81, + "learning_rate": 3.6447721179624664e-05, + "loss": 0.1044, + "step": 1061 + }, + { + "epoch": 2.81, + "learning_rate": 3.64343163538874e-05, + "loss": 0.6364, + "step": 1062 + }, + { + "epoch": 2.81, + "learning_rate": 3.6420911528150134e-05, + "loss": 0.3589, + "step": 1063 + }, + { + "epoch": 2.81, + "learning_rate": 3.640750670241287e-05, + "loss": 0.1128, + "step": 1064 + }, + { + "epoch": 2.82, + "learning_rate": 3.6394101876675604e-05, + "loss": 0.2219, + "step": 1065 + }, + { + "epoch": 2.82, + "learning_rate": 3.638069705093834e-05, + "loss": 0.211, + "step": 1066 + }, + { + "epoch": 2.82, + "learning_rate": 3.6367292225201075e-05, + "loss": 0.387, + "step": 1067 + }, + { + "epoch": 2.83, + "learning_rate": 3.635388739946381e-05, + "loss": 0.0725, + "step": 1068 + }, + { + "epoch": 2.83, + "learning_rate": 3.6340482573726545e-05, + "loss": 0.0268, + "step": 1069 + }, + { + "epoch": 2.83, + "learning_rate": 3.632707774798928e-05, + "loss": 0.516, + "step": 1070 + }, + { + "epoch": 2.83, + "learning_rate": 3.6313672922252015e-05, + "loss": 0.0746, + "step": 1071 + }, + { + "epoch": 2.84, + "learning_rate": 3.6300268096514743e-05, + "loss": 0.2486, + "step": 1072 + }, + { + "epoch": 2.84, + "learning_rate": 3.6286863270777485e-05, + "loss": 0.1584, + "step": 1073 + }, + { + "epoch": 2.84, + "learning_rate": 3.6273458445040214e-05, + "loss": 0.1301, + "step": 1074 + }, + { + "epoch": 2.84, + "learning_rate": 3.6260053619302956e-05, + "loss": 0.0122, + "step": 1075 + }, + { + "epoch": 2.85, + "learning_rate": 3.6246648793565684e-05, + "loss": 0.0215, + "step": 1076 + }, + { + "epoch": 2.85, + "learning_rate": 3.623324396782842e-05, + "loss": 0.2068, + "step": 1077 + }, + { + "epoch": 2.85, + "learning_rate": 3.6219839142091154e-05, + "loss": 0.1882, + "step": 1078 + }, + { + "epoch": 2.85, + "learning_rate": 3.620643431635389e-05, + "loss": 0.368, + "step": 1079 + }, + { + "epoch": 2.86, + "learning_rate": 3.6193029490616625e-05, + "loss": 0.7537, + "step": 1080 + }, + { + "epoch": 2.86, + "learning_rate": 3.617962466487936e-05, + "loss": 0.1614, + "step": 1081 + }, + { + "epoch": 2.86, + "learning_rate": 3.616621983914209e-05, + "loss": 0.0585, + "step": 1082 + }, + { + "epoch": 2.87, + "learning_rate": 3.615281501340483e-05, + "loss": 0.0535, + "step": 1083 + }, + { + "epoch": 2.87, + "learning_rate": 3.613941018766756e-05, + "loss": 0.6518, + "step": 1084 + }, + { + "epoch": 2.87, + "learning_rate": 3.61260053619303e-05, + "loss": 0.4885, + "step": 1085 + }, + { + "epoch": 2.87, + "learning_rate": 3.611260053619303e-05, + "loss": 0.4078, + "step": 1086 + }, + { + "epoch": 2.88, + "learning_rate": 3.6099195710455764e-05, + "loss": 0.2101, + "step": 1087 + }, + { + "epoch": 2.88, + "learning_rate": 3.60857908847185e-05, + "loss": 0.0192, + "step": 1088 + }, + { + "epoch": 2.88, + "learning_rate": 3.6072386058981234e-05, + "loss": 0.3885, + "step": 1089 + }, + { + "epoch": 2.88, + "learning_rate": 3.605898123324397e-05, + "loss": 0.0393, + "step": 1090 + }, + { + "epoch": 2.89, + "learning_rate": 3.6045576407506704e-05, + "loss": 0.2179, + "step": 1091 + }, + { + "epoch": 2.89, + "learning_rate": 3.603217158176944e-05, + "loss": 0.1814, + "step": 1092 + }, + { + "epoch": 2.89, + "learning_rate": 3.6018766756032174e-05, + "loss": 0.0647, + "step": 1093 + }, + { + "epoch": 2.89, + "learning_rate": 3.600536193029491e-05, + "loss": 0.0657, + "step": 1094 + }, + { + "epoch": 2.9, + "learning_rate": 3.5991957104557645e-05, + "loss": 0.1062, + "step": 1095 + }, + { + "epoch": 2.9, + "learning_rate": 3.597855227882038e-05, + "loss": 0.4314, + "step": 1096 + }, + { + "epoch": 2.9, + "learning_rate": 3.596514745308311e-05, + "loss": 0.3074, + "step": 1097 + }, + { + "epoch": 2.9, + "learning_rate": 3.595174262734585e-05, + "loss": 0.0159, + "step": 1098 + }, + { + "epoch": 2.91, + "learning_rate": 3.593833780160858e-05, + "loss": 0.3829, + "step": 1099 + }, + { + "epoch": 2.91, + "learning_rate": 3.592493297587132e-05, + "loss": 0.3277, + "step": 1100 + }, + { + "epoch": 2.91, + "learning_rate": 3.591152815013405e-05, + "loss": 0.3785, + "step": 1101 + }, + { + "epoch": 2.92, + "learning_rate": 3.5898123324396784e-05, + "loss": 0.0162, + "step": 1102 + }, + { + "epoch": 2.92, + "learning_rate": 3.588471849865952e-05, + "loss": 0.0772, + "step": 1103 + }, + { + "epoch": 2.92, + "learning_rate": 3.5871313672922254e-05, + "loss": 0.0292, + "step": 1104 + }, + { + "epoch": 2.92, + "learning_rate": 3.585790884718499e-05, + "loss": 0.0748, + "step": 1105 + }, + { + "epoch": 2.93, + "learning_rate": 3.5844504021447724e-05, + "loss": 0.2276, + "step": 1106 + }, + { + "epoch": 2.93, + "learning_rate": 3.583109919571046e-05, + "loss": 0.3174, + "step": 1107 + }, + { + "epoch": 2.93, + "learning_rate": 3.5817694369973195e-05, + "loss": 0.134, + "step": 1108 + }, + { + "epoch": 2.93, + "learning_rate": 3.580428954423592e-05, + "loss": 0.3488, + "step": 1109 + }, + { + "epoch": 2.94, + "learning_rate": 3.5790884718498665e-05, + "loss": 0.1003, + "step": 1110 + }, + { + "epoch": 2.94, + "learning_rate": 3.577747989276139e-05, + "loss": 0.1972, + "step": 1111 + }, + { + "epoch": 2.94, + "learning_rate": 3.5764075067024135e-05, + "loss": 0.2151, + "step": 1112 + }, + { + "epoch": 2.94, + "learning_rate": 3.5750670241286863e-05, + "loss": 0.3725, + "step": 1113 + }, + { + "epoch": 2.95, + "learning_rate": 3.57372654155496e-05, + "loss": 0.0227, + "step": 1114 + }, + { + "epoch": 2.95, + "learning_rate": 3.5723860589812334e-05, + "loss": 0.2063, + "step": 1115 + }, + { + "epoch": 2.95, + "learning_rate": 3.571045576407507e-05, + "loss": 0.0316, + "step": 1116 + }, + { + "epoch": 2.96, + "learning_rate": 3.5697050938337804e-05, + "loss": 0.1545, + "step": 1117 + }, + { + "epoch": 2.96, + "learning_rate": 3.568364611260054e-05, + "loss": 0.4582, + "step": 1118 + }, + { + "epoch": 2.96, + "learning_rate": 3.567024128686327e-05, + "loss": 0.5452, + "step": 1119 + }, + { + "epoch": 2.96, + "learning_rate": 3.565683646112601e-05, + "loss": 0.2009, + "step": 1120 + }, + { + "epoch": 2.97, + "learning_rate": 3.564343163538874e-05, + "loss": 0.1201, + "step": 1121 + }, + { + "epoch": 2.97, + "learning_rate": 3.563002680965148e-05, + "loss": 0.5343, + "step": 1122 + }, + { + "epoch": 2.97, + "learning_rate": 3.561662198391421e-05, + "loss": 0.3925, + "step": 1123 + }, + { + "epoch": 2.97, + "learning_rate": 3.560321715817694e-05, + "loss": 0.0375, + "step": 1124 + }, + { + "epoch": 2.98, + "learning_rate": 3.558981233243968e-05, + "loss": 0.0411, + "step": 1125 + }, + { + "epoch": 2.98, + "learning_rate": 3.5576407506702413e-05, + "loss": 0.0338, + "step": 1126 + }, + { + "epoch": 2.98, + "learning_rate": 3.556300268096515e-05, + "loss": 0.035, + "step": 1127 + }, + { + "epoch": 2.98, + "learning_rate": 3.5549597855227884e-05, + "loss": 0.0283, + "step": 1128 + }, + { + "epoch": 2.99, + "learning_rate": 3.553619302949062e-05, + "loss": 0.034, + "step": 1129 + }, + { + "epoch": 2.99, + "learning_rate": 3.5522788203753354e-05, + "loss": 0.518, + "step": 1130 + }, + { + "epoch": 2.99, + "learning_rate": 3.550938337801609e-05, + "loss": 0.0241, + "step": 1131 + }, + { + "epoch": 2.99, + "learning_rate": 3.5495978552278824e-05, + "loss": 0.3143, + "step": 1132 + }, + { + "epoch": 3.0, + "learning_rate": 3.548257372654156e-05, + "loss": 0.8011, + "step": 1133 + }, + { + "epoch": 3.0, + "learning_rate": 3.546916890080429e-05, + "loss": 0.4365, + "step": 1134 + }, + { + "epoch": 3.0, + "eval_f1": 0.7867219917012448, + "eval_loss": 0.613310694694519, + "eval_runtime": 1.9007, + "eval_samples_per_second": 796.041, + "eval_steps_per_second": 49.983, + "step": 1134 + }, + { + "epoch": 3.0, + "learning_rate": 3.545576407506703e-05, + "loss": 0.0111, + "step": 1135 + }, + { + "epoch": 3.01, + "learning_rate": 3.544235924932976e-05, + "loss": 0.0166, + "step": 1136 + }, + { + "epoch": 3.01, + "learning_rate": 3.54289544235925e-05, + "loss": 0.2159, + "step": 1137 + }, + { + "epoch": 3.01, + "learning_rate": 3.541554959785523e-05, + "loss": 0.0096, + "step": 1138 + }, + { + "epoch": 3.01, + "learning_rate": 3.540214477211796e-05, + "loss": 0.1352, + "step": 1139 + }, + { + "epoch": 3.02, + "learning_rate": 3.53887399463807e-05, + "loss": 0.0195, + "step": 1140 + }, + { + "epoch": 3.02, + "learning_rate": 3.5375335120643434e-05, + "loss": 0.1579, + "step": 1141 + }, + { + "epoch": 3.02, + "learning_rate": 3.536193029490617e-05, + "loss": 0.0078, + "step": 1142 + }, + { + "epoch": 3.02, + "learning_rate": 3.5348525469168904e-05, + "loss": 0.0111, + "step": 1143 + }, + { + "epoch": 3.03, + "learning_rate": 3.533512064343163e-05, + "loss": 0.2457, + "step": 1144 + }, + { + "epoch": 3.03, + "learning_rate": 3.5321715817694374e-05, + "loss": 0.014, + "step": 1145 + }, + { + "epoch": 3.03, + "learning_rate": 3.53083109919571e-05, + "loss": 0.2021, + "step": 1146 + }, + { + "epoch": 3.03, + "learning_rate": 3.5294906166219844e-05, + "loss": 0.5334, + "step": 1147 + }, + { + "epoch": 3.04, + "learning_rate": 3.528150134048257e-05, + "loss": 0.0116, + "step": 1148 + }, + { + "epoch": 3.04, + "learning_rate": 3.526809651474531e-05, + "loss": 0.0099, + "step": 1149 + }, + { + "epoch": 3.04, + "learning_rate": 3.525469168900804e-05, + "loss": 0.2102, + "step": 1150 + }, + { + "epoch": 3.04, + "learning_rate": 3.524128686327078e-05, + "loss": 0.0093, + "step": 1151 + }, + { + "epoch": 3.05, + "learning_rate": 3.522788203753351e-05, + "loss": 0.0112, + "step": 1152 + }, + { + "epoch": 3.05, + "learning_rate": 3.521447721179625e-05, + "loss": 0.1761, + "step": 1153 + }, + { + "epoch": 3.05, + "learning_rate": 3.5201072386058984e-05, + "loss": 0.1608, + "step": 1154 + }, + { + "epoch": 3.06, + "learning_rate": 3.518766756032172e-05, + "loss": 0.2883, + "step": 1155 + }, + { + "epoch": 3.06, + "learning_rate": 3.5174262734584454e-05, + "loss": 0.0304, + "step": 1156 + }, + { + "epoch": 3.06, + "learning_rate": 3.516085790884719e-05, + "loss": 0.0623, + "step": 1157 + }, + { + "epoch": 3.06, + "learning_rate": 3.5147453083109924e-05, + "loss": 0.1824, + "step": 1158 + }, + { + "epoch": 3.07, + "learning_rate": 3.513404825737265e-05, + "loss": 0.2527, + "step": 1159 + }, + { + "epoch": 3.07, + "learning_rate": 3.5120643431635394e-05, + "loss": 0.0877, + "step": 1160 + }, + { + "epoch": 3.07, + "learning_rate": 3.510723860589812e-05, + "loss": 0.2735, + "step": 1161 + }, + { + "epoch": 3.07, + "learning_rate": 3.5093833780160865e-05, + "loss": 0.1126, + "step": 1162 + }, + { + "epoch": 3.08, + "learning_rate": 3.508042895442359e-05, + "loss": 0.2498, + "step": 1163 + }, + { + "epoch": 3.08, + "learning_rate": 3.506702412868633e-05, + "loss": 0.022, + "step": 1164 + }, + { + "epoch": 3.08, + "learning_rate": 3.505361930294906e-05, + "loss": 0.2768, + "step": 1165 + }, + { + "epoch": 3.08, + "learning_rate": 3.50402144772118e-05, + "loss": 0.0429, + "step": 1166 + }, + { + "epoch": 3.09, + "learning_rate": 3.5026809651474533e-05, + "loss": 0.0198, + "step": 1167 + }, + { + "epoch": 3.09, + "learning_rate": 3.501340482573727e-05, + "loss": 0.0097, + "step": 1168 + }, + { + "epoch": 3.09, + "learning_rate": 3.5e-05, + "loss": 0.0276, + "step": 1169 + }, + { + "epoch": 3.1, + "learning_rate": 3.498659517426274e-05, + "loss": 0.2276, + "step": 1170 + }, + { + "epoch": 3.1, + "learning_rate": 3.497319034852547e-05, + "loss": 0.0461, + "step": 1171 + }, + { + "epoch": 3.1, + "learning_rate": 3.495978552278821e-05, + "loss": 0.0103, + "step": 1172 + }, + { + "epoch": 3.1, + "learning_rate": 3.494638069705094e-05, + "loss": 0.1455, + "step": 1173 + }, + { + "epoch": 3.11, + "learning_rate": 3.493297587131367e-05, + "loss": 0.0865, + "step": 1174 + }, + { + "epoch": 3.11, + "learning_rate": 3.491957104557641e-05, + "loss": 0.3226, + "step": 1175 + }, + { + "epoch": 3.11, + "learning_rate": 3.490616621983914e-05, + "loss": 0.1744, + "step": 1176 + }, + { + "epoch": 3.11, + "learning_rate": 3.489276139410188e-05, + "loss": 0.0148, + "step": 1177 + }, + { + "epoch": 3.12, + "learning_rate": 3.487935656836461e-05, + "loss": 0.2582, + "step": 1178 + }, + { + "epoch": 3.12, + "learning_rate": 3.486595174262735e-05, + "loss": 0.2782, + "step": 1179 + }, + { + "epoch": 3.12, + "learning_rate": 3.485254691689008e-05, + "loss": 0.143, + "step": 1180 + }, + { + "epoch": 3.12, + "learning_rate": 3.483914209115282e-05, + "loss": 0.0853, + "step": 1181 + }, + { + "epoch": 3.13, + "learning_rate": 3.4825737265415554e-05, + "loss": 0.1361, + "step": 1182 + }, + { + "epoch": 3.13, + "learning_rate": 3.481233243967829e-05, + "loss": 0.0883, + "step": 1183 + }, + { + "epoch": 3.13, + "learning_rate": 3.479892761394102e-05, + "loss": 0.0116, + "step": 1184 + }, + { + "epoch": 3.13, + "learning_rate": 3.478552278820376e-05, + "loss": 0.0531, + "step": 1185 + }, + { + "epoch": 3.14, + "learning_rate": 3.477211796246649e-05, + "loss": 0.0184, + "step": 1186 + }, + { + "epoch": 3.14, + "learning_rate": 3.475871313672923e-05, + "loss": 0.1601, + "step": 1187 + }, + { + "epoch": 3.14, + "learning_rate": 3.474530831099196e-05, + "loss": 0.007, + "step": 1188 + }, + { + "epoch": 3.15, + "learning_rate": 3.473190348525469e-05, + "loss": 0.0101, + "step": 1189 + }, + { + "epoch": 3.15, + "learning_rate": 3.471849865951743e-05, + "loss": 0.2385, + "step": 1190 + }, + { + "epoch": 3.15, + "learning_rate": 3.470509383378016e-05, + "loss": 0.0075, + "step": 1191 + }, + { + "epoch": 3.15, + "learning_rate": 3.46916890080429e-05, + "loss": 0.0919, + "step": 1192 + }, + { + "epoch": 3.16, + "learning_rate": 3.467828418230563e-05, + "loss": 0.0162, + "step": 1193 + }, + { + "epoch": 3.16, + "learning_rate": 3.466487935656836e-05, + "loss": 0.2239, + "step": 1194 + }, + { + "epoch": 3.16, + "learning_rate": 3.4651474530831104e-05, + "loss": 0.5757, + "step": 1195 + }, + { + "epoch": 3.16, + "learning_rate": 3.463806970509383e-05, + "loss": 0.0774, + "step": 1196 + }, + { + "epoch": 3.17, + "learning_rate": 3.4624664879356574e-05, + "loss": 0.2124, + "step": 1197 + }, + { + "epoch": 3.17, + "learning_rate": 3.46112600536193e-05, + "loss": 0.0107, + "step": 1198 + }, + { + "epoch": 3.17, + "learning_rate": 3.459785522788204e-05, + "loss": 0.3179, + "step": 1199 + }, + { + "epoch": 3.17, + "learning_rate": 3.458445040214477e-05, + "loss": 0.0138, + "step": 1200 + }, + { + "epoch": 3.18, + "learning_rate": 3.457104557640751e-05, + "loss": 0.0094, + "step": 1201 + }, + { + "epoch": 3.18, + "learning_rate": 3.455764075067024e-05, + "loss": 0.0039, + "step": 1202 + }, + { + "epoch": 3.18, + "learning_rate": 3.454423592493298e-05, + "loss": 0.0745, + "step": 1203 + }, + { + "epoch": 3.19, + "learning_rate": 3.453083109919571e-05, + "loss": 0.0387, + "step": 1204 + }, + { + "epoch": 3.19, + "learning_rate": 3.451742627345845e-05, + "loss": 0.1378, + "step": 1205 + }, + { + "epoch": 3.19, + "learning_rate": 3.450402144772118e-05, + "loss": 0.1299, + "step": 1206 + }, + { + "epoch": 3.19, + "learning_rate": 3.449061662198392e-05, + "loss": 0.2821, + "step": 1207 + }, + { + "epoch": 3.2, + "learning_rate": 3.4477211796246653e-05, + "loss": 0.2236, + "step": 1208 + }, + { + "epoch": 3.2, + "learning_rate": 3.446380697050938e-05, + "loss": 0.1436, + "step": 1209 + }, + { + "epoch": 3.2, + "learning_rate": 3.4450402144772124e-05, + "loss": 0.1504, + "step": 1210 + }, + { + "epoch": 3.2, + "learning_rate": 3.443699731903485e-05, + "loss": 0.0415, + "step": 1211 + }, + { + "epoch": 3.21, + "learning_rate": 3.4423592493297594e-05, + "loss": 0.023, + "step": 1212 + }, + { + "epoch": 3.21, + "learning_rate": 3.441018766756032e-05, + "loss": 0.2128, + "step": 1213 + }, + { + "epoch": 3.21, + "learning_rate": 3.439678284182306e-05, + "loss": 0.0066, + "step": 1214 + }, + { + "epoch": 3.21, + "learning_rate": 3.438337801608579e-05, + "loss": 0.4345, + "step": 1215 + }, + { + "epoch": 3.22, + "learning_rate": 3.436997319034853e-05, + "loss": 0.0214, + "step": 1216 + }, + { + "epoch": 3.22, + "learning_rate": 3.435656836461126e-05, + "loss": 0.2094, + "step": 1217 + }, + { + "epoch": 3.22, + "learning_rate": 3.4343163538874e-05, + "loss": 0.0822, + "step": 1218 + }, + { + "epoch": 3.22, + "learning_rate": 3.4329758713136726e-05, + "loss": 0.1153, + "step": 1219 + }, + { + "epoch": 3.23, + "learning_rate": 3.431635388739947e-05, + "loss": 0.0059, + "step": 1220 + }, + { + "epoch": 3.23, + "learning_rate": 3.43029490616622e-05, + "loss": 0.0069, + "step": 1221 + }, + { + "epoch": 3.23, + "learning_rate": 3.428954423592494e-05, + "loss": 0.044, + "step": 1222 + }, + { + "epoch": 3.24, + "learning_rate": 3.427613941018767e-05, + "loss": 0.1975, + "step": 1223 + }, + { + "epoch": 3.24, + "learning_rate": 3.42627345844504e-05, + "loss": 0.3294, + "step": 1224 + }, + { + "epoch": 3.24, + "learning_rate": 3.424932975871314e-05, + "loss": 0.026, + "step": 1225 + }, + { + "epoch": 3.24, + "learning_rate": 3.423592493297587e-05, + "loss": 0.2666, + "step": 1226 + }, + { + "epoch": 3.25, + "learning_rate": 3.422252010723861e-05, + "loss": 0.0628, + "step": 1227 + }, + { + "epoch": 3.25, + "learning_rate": 3.420911528150134e-05, + "loss": 0.0068, + "step": 1228 + }, + { + "epoch": 3.25, + "learning_rate": 3.419571045576407e-05, + "loss": 0.0144, + "step": 1229 + }, + { + "epoch": 3.25, + "learning_rate": 3.418230563002681e-05, + "loss": 0.0029, + "step": 1230 + }, + { + "epoch": 3.26, + "learning_rate": 3.416890080428954e-05, + "loss": 0.606, + "step": 1231 + }, + { + "epoch": 3.26, + "learning_rate": 3.415549597855228e-05, + "loss": 0.2162, + "step": 1232 + }, + { + "epoch": 3.26, + "learning_rate": 3.414209115281501e-05, + "loss": 0.146, + "step": 1233 + }, + { + "epoch": 3.26, + "learning_rate": 3.412868632707775e-05, + "loss": 0.3649, + "step": 1234 + }, + { + "epoch": 3.27, + "learning_rate": 3.411528150134048e-05, + "loss": 0.0062, + "step": 1235 + }, + { + "epoch": 3.27, + "learning_rate": 3.410187667560322e-05, + "loss": 0.4097, + "step": 1236 + }, + { + "epoch": 3.27, + "learning_rate": 3.408847184986595e-05, + "loss": 0.5354, + "step": 1237 + }, + { + "epoch": 3.28, + "learning_rate": 3.407506702412869e-05, + "loss": 0.6222, + "step": 1238 + }, + { + "epoch": 3.28, + "learning_rate": 3.406166219839142e-05, + "loss": 0.0023, + "step": 1239 + }, + { + "epoch": 3.28, + "learning_rate": 3.404825737265416e-05, + "loss": 0.0247, + "step": 1240 + }, + { + "epoch": 3.28, + "learning_rate": 3.403485254691689e-05, + "loss": 0.0051, + "step": 1241 + }, + { + "epoch": 3.29, + "learning_rate": 3.402144772117963e-05, + "loss": 0.2504, + "step": 1242 + }, + { + "epoch": 3.29, + "learning_rate": 3.400804289544236e-05, + "loss": 0.0195, + "step": 1243 + }, + { + "epoch": 3.29, + "learning_rate": 3.39946380697051e-05, + "loss": 0.3706, + "step": 1244 + }, + { + "epoch": 3.29, + "learning_rate": 3.398123324396783e-05, + "loss": 0.0174, + "step": 1245 + }, + { + "epoch": 3.3, + "learning_rate": 3.396782841823056e-05, + "loss": 0.0068, + "step": 1246 + }, + { + "epoch": 3.3, + "learning_rate": 3.39544235924933e-05, + "loss": 0.3938, + "step": 1247 + }, + { + "epoch": 3.3, + "learning_rate": 3.394101876675603e-05, + "loss": 0.0114, + "step": 1248 + }, + { + "epoch": 3.3, + "learning_rate": 3.3927613941018774e-05, + "loss": 0.0088, + "step": 1249 + }, + { + "epoch": 3.31, + "learning_rate": 3.39142091152815e-05, + "loss": 0.0126, + "step": 1250 + }, + { + "epoch": 3.31, + "learning_rate": 3.390080428954424e-05, + "loss": 0.0091, + "step": 1251 + }, + { + "epoch": 3.31, + "learning_rate": 3.388739946380697e-05, + "loss": 0.0232, + "step": 1252 + }, + { + "epoch": 3.31, + "learning_rate": 3.387399463806971e-05, + "loss": 0.3704, + "step": 1253 + }, + { + "epoch": 3.32, + "learning_rate": 3.386058981233244e-05, + "loss": 0.0112, + "step": 1254 + }, + { + "epoch": 3.32, + "learning_rate": 3.384718498659518e-05, + "loss": 0.1709, + "step": 1255 + }, + { + "epoch": 3.32, + "learning_rate": 3.3833780160857906e-05, + "loss": 0.0109, + "step": 1256 + }, + { + "epoch": 3.33, + "learning_rate": 3.382037533512065e-05, + "loss": 0.2874, + "step": 1257 + }, + { + "epoch": 3.33, + "learning_rate": 3.3806970509383376e-05, + "loss": 0.024, + "step": 1258 + }, + { + "epoch": 3.33, + "learning_rate": 3.379356568364612e-05, + "loss": 0.0131, + "step": 1259 + }, + { + "epoch": 3.33, + "learning_rate": 3.3780160857908846e-05, + "loss": 0.2076, + "step": 1260 + }, + { + "epoch": 3.34, + "learning_rate": 3.376675603217158e-05, + "loss": 0.0083, + "step": 1261 + }, + { + "epoch": 3.34, + "learning_rate": 3.375335120643432e-05, + "loss": 0.0234, + "step": 1262 + }, + { + "epoch": 3.34, + "learning_rate": 3.373994638069705e-05, + "loss": 0.0066, + "step": 1263 + }, + { + "epoch": 3.34, + "learning_rate": 3.372654155495979e-05, + "loss": 0.3983, + "step": 1264 + }, + { + "epoch": 3.35, + "learning_rate": 3.371313672922252e-05, + "loss": 0.0648, + "step": 1265 + }, + { + "epoch": 3.35, + "learning_rate": 3.369973190348526e-05, + "loss": 0.006, + "step": 1266 + }, + { + "epoch": 3.35, + "learning_rate": 3.368632707774799e-05, + "loss": 0.0807, + "step": 1267 + }, + { + "epoch": 3.35, + "learning_rate": 3.367292225201073e-05, + "loss": 0.0975, + "step": 1268 + }, + { + "epoch": 3.36, + "learning_rate": 3.365951742627346e-05, + "loss": 0.2934, + "step": 1269 + }, + { + "epoch": 3.36, + "learning_rate": 3.36461126005362e-05, + "loss": 0.0869, + "step": 1270 + }, + { + "epoch": 3.36, + "learning_rate": 3.3632707774798926e-05, + "loss": 0.1374, + "step": 1271 + }, + { + "epoch": 3.37, + "learning_rate": 3.361930294906167e-05, + "loss": 0.3314, + "step": 1272 + }, + { + "epoch": 3.37, + "learning_rate": 3.3605898123324396e-05, + "loss": 0.0045, + "step": 1273 + }, + { + "epoch": 3.37, + "learning_rate": 3.359249329758714e-05, + "loss": 0.0536, + "step": 1274 + }, + { + "epoch": 3.37, + "learning_rate": 3.3579088471849867e-05, + "loss": 0.0564, + "step": 1275 + }, + { + "epoch": 3.38, + "learning_rate": 3.35656836461126e-05, + "loss": 0.0689, + "step": 1276 + }, + { + "epoch": 3.38, + "learning_rate": 3.355227882037534e-05, + "loss": 0.5177, + "step": 1277 + }, + { + "epoch": 3.38, + "learning_rate": 3.353887399463807e-05, + "loss": 0.0689, + "step": 1278 + }, + { + "epoch": 3.38, + "learning_rate": 3.352546916890081e-05, + "loss": 0.0664, + "step": 1279 + }, + { + "epoch": 3.39, + "learning_rate": 3.351206434316354e-05, + "loss": 0.0614, + "step": 1280 + }, + { + "epoch": 3.39, + "learning_rate": 3.349865951742627e-05, + "loss": 0.1994, + "step": 1281 + }, + { + "epoch": 3.39, + "learning_rate": 3.348525469168901e-05, + "loss": 0.4769, + "step": 1282 + }, + { + "epoch": 3.39, + "learning_rate": 3.347184986595174e-05, + "loss": 0.1851, + "step": 1283 + }, + { + "epoch": 3.4, + "learning_rate": 3.345844504021448e-05, + "loss": 0.0092, + "step": 1284 + }, + { + "epoch": 3.4, + "learning_rate": 3.344504021447721e-05, + "loss": 0.0052, + "step": 1285 + }, + { + "epoch": 3.4, + "learning_rate": 3.3431635388739946e-05, + "loss": 0.0095, + "step": 1286 + }, + { + "epoch": 3.4, + "learning_rate": 3.341823056300268e-05, + "loss": 0.0242, + "step": 1287 + }, + { + "epoch": 3.41, + "learning_rate": 3.3404825737265416e-05, + "loss": 0.0565, + "step": 1288 + }, + { + "epoch": 3.41, + "learning_rate": 3.339142091152815e-05, + "loss": 0.2645, + "step": 1289 + }, + { + "epoch": 3.41, + "learning_rate": 3.337801608579089e-05, + "loss": 0.0049, + "step": 1290 + }, + { + "epoch": 3.42, + "learning_rate": 3.336461126005362e-05, + "loss": 0.0929, + "step": 1291 + }, + { + "epoch": 3.42, + "learning_rate": 3.335120643431636e-05, + "loss": 0.3968, + "step": 1292 + }, + { + "epoch": 3.42, + "learning_rate": 3.333780160857909e-05, + "loss": 0.033, + "step": 1293 + }, + { + "epoch": 3.42, + "learning_rate": 3.332439678284183e-05, + "loss": 0.007, + "step": 1294 + }, + { + "epoch": 3.43, + "learning_rate": 3.331099195710456e-05, + "loss": 0.2552, + "step": 1295 + }, + { + "epoch": 3.43, + "learning_rate": 3.329758713136729e-05, + "loss": 0.004, + "step": 1296 + }, + { + "epoch": 3.43, + "learning_rate": 3.328418230563003e-05, + "loss": 0.136, + "step": 1297 + }, + { + "epoch": 3.43, + "learning_rate": 3.327077747989276e-05, + "loss": 0.1407, + "step": 1298 + }, + { + "epoch": 3.44, + "learning_rate": 3.32573726541555e-05, + "loss": 0.0354, + "step": 1299 + }, + { + "epoch": 3.44, + "learning_rate": 3.324396782841823e-05, + "loss": 0.6141, + "step": 1300 + }, + { + "epoch": 3.44, + "learning_rate": 3.3230563002680966e-05, + "loss": 0.2544, + "step": 1301 + }, + { + "epoch": 3.44, + "learning_rate": 3.32171581769437e-05, + "loss": 0.0046, + "step": 1302 + }, + { + "epoch": 3.45, + "learning_rate": 3.320375335120644e-05, + "loss": 0.0126, + "step": 1303 + }, + { + "epoch": 3.45, + "learning_rate": 3.319034852546917e-05, + "loss": 0.3506, + "step": 1304 + }, + { + "epoch": 3.45, + "learning_rate": 3.317694369973191e-05, + "loss": 0.3512, + "step": 1305 + }, + { + "epoch": 3.46, + "learning_rate": 3.3163538873994635e-05, + "loss": 0.3675, + "step": 1306 + }, + { + "epoch": 3.46, + "learning_rate": 3.315013404825738e-05, + "loss": 0.1676, + "step": 1307 + }, + { + "epoch": 3.46, + "learning_rate": 3.3136729222520106e-05, + "loss": 0.0307, + "step": 1308 + }, + { + "epoch": 3.46, + "learning_rate": 3.312332439678285e-05, + "loss": 0.0084, + "step": 1309 + }, + { + "epoch": 3.47, + "learning_rate": 3.3109919571045576e-05, + "loss": 0.1977, + "step": 1310 + }, + { + "epoch": 3.47, + "learning_rate": 3.309651474530831e-05, + "loss": 0.1645, + "step": 1311 + }, + { + "epoch": 3.47, + "learning_rate": 3.3083109919571046e-05, + "loss": 0.2579, + "step": 1312 + }, + { + "epoch": 3.47, + "learning_rate": 3.306970509383378e-05, + "loss": 0.1656, + "step": 1313 + }, + { + "epoch": 3.48, + "learning_rate": 3.3056300268096516e-05, + "loss": 0.0168, + "step": 1314 + }, + { + "epoch": 3.48, + "learning_rate": 3.304289544235925e-05, + "loss": 0.0291, + "step": 1315 + }, + { + "epoch": 3.48, + "learning_rate": 3.302949061662198e-05, + "loss": 0.0146, + "step": 1316 + }, + { + "epoch": 3.48, + "learning_rate": 3.301608579088472e-05, + "loss": 0.0037, + "step": 1317 + }, + { + "epoch": 3.49, + "learning_rate": 3.300268096514745e-05, + "loss": 0.0113, + "step": 1318 + }, + { + "epoch": 3.49, + "learning_rate": 3.298927613941019e-05, + "loss": 0.0734, + "step": 1319 + }, + { + "epoch": 3.49, + "learning_rate": 3.297587131367292e-05, + "loss": 0.0292, + "step": 1320 + }, + { + "epoch": 3.49, + "learning_rate": 3.2962466487935655e-05, + "loss": 0.3875, + "step": 1321 + }, + { + "epoch": 3.5, + "learning_rate": 3.294906166219839e-05, + "loss": 0.0138, + "step": 1322 + }, + { + "epoch": 3.5, + "learning_rate": 3.2935656836461126e-05, + "loss": 0.4653, + "step": 1323 + }, + { + "epoch": 3.5, + "learning_rate": 3.292225201072386e-05, + "loss": 0.1864, + "step": 1324 + }, + { + "epoch": 3.51, + "learning_rate": 3.2908847184986596e-05, + "loss": 0.0116, + "step": 1325 + }, + { + "epoch": 3.51, + "learning_rate": 3.289544235924933e-05, + "loss": 0.014, + "step": 1326 + }, + { + "epoch": 3.51, + "learning_rate": 3.2882037533512066e-05, + "loss": 0.3344, + "step": 1327 + }, + { + "epoch": 3.51, + "learning_rate": 3.28686327077748e-05, + "loss": 0.1544, + "step": 1328 + }, + { + "epoch": 3.52, + "learning_rate": 3.2855227882037537e-05, + "loss": 0.0065, + "step": 1329 + }, + { + "epoch": 3.52, + "learning_rate": 3.284182305630027e-05, + "loss": 0.0041, + "step": 1330 + }, + { + "epoch": 3.52, + "learning_rate": 3.2828418230563e-05, + "loss": 0.0044, + "step": 1331 + }, + { + "epoch": 3.52, + "learning_rate": 3.281501340482574e-05, + "loss": 0.1808, + "step": 1332 + }, + { + "epoch": 3.53, + "learning_rate": 3.280160857908847e-05, + "loss": 0.0521, + "step": 1333 + }, + { + "epoch": 3.53, + "learning_rate": 3.278820375335121e-05, + "loss": 0.3505, + "step": 1334 + }, + { + "epoch": 3.53, + "learning_rate": 3.277479892761394e-05, + "loss": 0.2032, + "step": 1335 + }, + { + "epoch": 3.53, + "learning_rate": 3.2761394101876676e-05, + "loss": 0.004, + "step": 1336 + }, + { + "epoch": 3.54, + "learning_rate": 3.274798927613941e-05, + "loss": 0.0343, + "step": 1337 + }, + { + "epoch": 3.54, + "learning_rate": 3.2734584450402146e-05, + "loss": 0.278, + "step": 1338 + }, + { + "epoch": 3.54, + "learning_rate": 3.272117962466488e-05, + "loss": 0.0056, + "step": 1339 + }, + { + "epoch": 3.54, + "learning_rate": 3.2707774798927616e-05, + "loss": 0.1673, + "step": 1340 + }, + { + "epoch": 3.55, + "learning_rate": 3.2694369973190345e-05, + "loss": 0.0092, + "step": 1341 + }, + { + "epoch": 3.55, + "learning_rate": 3.2680965147453086e-05, + "loss": 0.0058, + "step": 1342 + }, + { + "epoch": 3.55, + "learning_rate": 3.2667560321715815e-05, + "loss": 0.097, + "step": 1343 + }, + { + "epoch": 3.56, + "learning_rate": 3.265415549597856e-05, + "loss": 0.2138, + "step": 1344 + }, + { + "epoch": 3.56, + "learning_rate": 3.2640750670241285e-05, + "loss": 0.0077, + "step": 1345 + }, + { + "epoch": 3.56, + "learning_rate": 3.262734584450402e-05, + "loss": 0.2294, + "step": 1346 + }, + { + "epoch": 3.56, + "learning_rate": 3.2613941018766755e-05, + "loss": 0.3282, + "step": 1347 + }, + { + "epoch": 3.57, + "learning_rate": 3.260053619302949e-05, + "loss": 0.233, + "step": 1348 + }, + { + "epoch": 3.57, + "learning_rate": 3.2587131367292226e-05, + "loss": 0.0379, + "step": 1349 + }, + { + "epoch": 3.57, + "learning_rate": 3.257372654155496e-05, + "loss": 0.2168, + "step": 1350 + }, + { + "epoch": 3.57, + "learning_rate": 3.2560321715817696e-05, + "loss": 0.0443, + "step": 1351 + }, + { + "epoch": 3.58, + "learning_rate": 3.254691689008043e-05, + "loss": 0.2665, + "step": 1352 + }, + { + "epoch": 3.58, + "learning_rate": 3.2533512064343166e-05, + "loss": 0.0136, + "step": 1353 + }, + { + "epoch": 3.58, + "learning_rate": 3.25201072386059e-05, + "loss": 0.0035, + "step": 1354 + }, + { + "epoch": 3.58, + "learning_rate": 3.2506702412868636e-05, + "loss": 0.2153, + "step": 1355 + }, + { + "epoch": 3.59, + "learning_rate": 3.249329758713137e-05, + "loss": 0.088, + "step": 1356 + }, + { + "epoch": 3.59, + "learning_rate": 3.247989276139411e-05, + "loss": 0.0074, + "step": 1357 + }, + { + "epoch": 3.59, + "learning_rate": 3.2466487935656835e-05, + "loss": 0.0924, + "step": 1358 + }, + { + "epoch": 3.6, + "learning_rate": 3.245308310991958e-05, + "loss": 0.0171, + "step": 1359 + }, + { + "epoch": 3.6, + "learning_rate": 3.2439678284182305e-05, + "loss": 0.0132, + "step": 1360 + }, + { + "epoch": 3.6, + "learning_rate": 3.242627345844505e-05, + "loss": 0.0583, + "step": 1361 + }, + { + "epoch": 3.6, + "learning_rate": 3.2412868632707776e-05, + "loss": 0.0038, + "step": 1362 + }, + { + "epoch": 3.61, + "learning_rate": 3.239946380697051e-05, + "loss": 0.0846, + "step": 1363 + }, + { + "epoch": 3.61, + "learning_rate": 3.2386058981233246e-05, + "loss": 0.0058, + "step": 1364 + }, + { + "epoch": 3.61, + "learning_rate": 3.237265415549598e-05, + "loss": 0.4456, + "step": 1365 + }, + { + "epoch": 3.61, + "learning_rate": 3.2359249329758716e-05, + "loss": 0.0029, + "step": 1366 + }, + { + "epoch": 3.62, + "learning_rate": 3.234584450402145e-05, + "loss": 0.2553, + "step": 1367 + }, + { + "epoch": 3.62, + "learning_rate": 3.233243967828418e-05, + "loss": 0.0936, + "step": 1368 + }, + { + "epoch": 3.62, + "learning_rate": 3.231903485254692e-05, + "loss": 0.1017, + "step": 1369 + }, + { + "epoch": 3.62, + "learning_rate": 3.230563002680965e-05, + "loss": 0.0379, + "step": 1370 + }, + { + "epoch": 3.63, + "learning_rate": 3.229222520107239e-05, + "loss": 0.0069, + "step": 1371 + }, + { + "epoch": 3.63, + "learning_rate": 3.227882037533512e-05, + "loss": 0.3235, + "step": 1372 + }, + { + "epoch": 3.63, + "learning_rate": 3.2265415549597855e-05, + "loss": 0.3796, + "step": 1373 + }, + { + "epoch": 3.63, + "learning_rate": 3.225201072386059e-05, + "loss": 0.3246, + "step": 1374 + }, + { + "epoch": 3.64, + "learning_rate": 3.2238605898123325e-05, + "loss": 0.0059, + "step": 1375 + }, + { + "epoch": 3.64, + "learning_rate": 3.222520107238606e-05, + "loss": 0.0405, + "step": 1376 + }, + { + "epoch": 3.64, + "learning_rate": 3.2211796246648796e-05, + "loss": 0.0142, + "step": 1377 + }, + { + "epoch": 3.65, + "learning_rate": 3.219839142091153e-05, + "loss": 0.4426, + "step": 1378 + }, + { + "epoch": 3.65, + "learning_rate": 3.2184986595174266e-05, + "loss": 0.0249, + "step": 1379 + }, + { + "epoch": 3.65, + "learning_rate": 3.2171581769437e-05, + "loss": 0.1053, + "step": 1380 + }, + { + "epoch": 3.65, + "learning_rate": 3.2158176943699736e-05, + "loss": 0.0179, + "step": 1381 + }, + { + "epoch": 3.66, + "learning_rate": 3.214477211796247e-05, + "loss": 0.0718, + "step": 1382 + }, + { + "epoch": 3.66, + "learning_rate": 3.21313672922252e-05, + "loss": 0.1431, + "step": 1383 + }, + { + "epoch": 3.66, + "learning_rate": 3.211796246648794e-05, + "loss": 0.2391, + "step": 1384 + }, + { + "epoch": 3.66, + "learning_rate": 3.210455764075067e-05, + "loss": 0.0053, + "step": 1385 + }, + { + "epoch": 3.67, + "learning_rate": 3.209115281501341e-05, + "loss": 0.2935, + "step": 1386 + }, + { + "epoch": 3.67, + "learning_rate": 3.207774798927614e-05, + "loss": 0.0071, + "step": 1387 + }, + { + "epoch": 3.67, + "learning_rate": 3.2064343163538875e-05, + "loss": 0.031, + "step": 1388 + }, + { + "epoch": 3.67, + "learning_rate": 3.205093833780161e-05, + "loss": 0.1989, + "step": 1389 + }, + { + "epoch": 3.68, + "learning_rate": 3.2037533512064346e-05, + "loss": 0.0533, + "step": 1390 + }, + { + "epoch": 3.68, + "learning_rate": 3.202412868632708e-05, + "loss": 0.2408, + "step": 1391 + }, + { + "epoch": 3.68, + "learning_rate": 3.2010723860589816e-05, + "loss": 0.3158, + "step": 1392 + }, + { + "epoch": 3.69, + "learning_rate": 3.1997319034852544e-05, + "loss": 0.3629, + "step": 1393 + }, + { + "epoch": 3.69, + "learning_rate": 3.1983914209115286e-05, + "loss": 0.0122, + "step": 1394 + }, + { + "epoch": 3.69, + "learning_rate": 3.1970509383378014e-05, + "loss": 0.0449, + "step": 1395 + }, + { + "epoch": 3.69, + "learning_rate": 3.1957104557640756e-05, + "loss": 0.1273, + "step": 1396 + }, + { + "epoch": 3.7, + "learning_rate": 3.1943699731903485e-05, + "loss": 0.3401, + "step": 1397 + }, + { + "epoch": 3.7, + "learning_rate": 3.193029490616622e-05, + "loss": 0.0183, + "step": 1398 + }, + { + "epoch": 3.7, + "learning_rate": 3.1916890080428955e-05, + "loss": 0.0526, + "step": 1399 + }, + { + "epoch": 3.7, + "learning_rate": 3.190348525469169e-05, + "loss": 0.5037, + "step": 1400 + }, + { + "epoch": 3.71, + "learning_rate": 3.1890080428954425e-05, + "loss": 0.0059, + "step": 1401 + }, + { + "epoch": 3.71, + "learning_rate": 3.187667560321716e-05, + "loss": 0.0266, + "step": 1402 + }, + { + "epoch": 3.71, + "learning_rate": 3.1863270777479896e-05, + "loss": 0.4095, + "step": 1403 + }, + { + "epoch": 3.71, + "learning_rate": 3.184986595174263e-05, + "loss": 0.1802, + "step": 1404 + }, + { + "epoch": 3.72, + "learning_rate": 3.1836461126005366e-05, + "loss": 0.3586, + "step": 1405 + }, + { + "epoch": 3.72, + "learning_rate": 3.18230563002681e-05, + "loss": 0.2058, + "step": 1406 + }, + { + "epoch": 3.72, + "learning_rate": 3.1809651474530836e-05, + "loss": 0.008, + "step": 1407 + }, + { + "epoch": 3.72, + "learning_rate": 3.1796246648793564e-05, + "loss": 0.0282, + "step": 1408 + }, + { + "epoch": 3.73, + "learning_rate": 3.1782841823056306e-05, + "loss": 0.0077, + "step": 1409 + }, + { + "epoch": 3.73, + "learning_rate": 3.1769436997319035e-05, + "loss": 0.3461, + "step": 1410 + }, + { + "epoch": 3.73, + "learning_rate": 3.1756032171581777e-05, + "loss": 0.0038, + "step": 1411 + }, + { + "epoch": 3.74, + "learning_rate": 3.1742627345844505e-05, + "loss": 0.0087, + "step": 1412 + }, + { + "epoch": 3.74, + "learning_rate": 3.172922252010724e-05, + "loss": 0.8254, + "step": 1413 + }, + { + "epoch": 3.74, + "learning_rate": 3.1715817694369975e-05, + "loss": 0.017, + "step": 1414 + }, + { + "epoch": 3.74, + "learning_rate": 3.170241286863271e-05, + "loss": 0.2954, + "step": 1415 + }, + { + "epoch": 3.75, + "learning_rate": 3.1689008042895445e-05, + "loss": 0.0286, + "step": 1416 + }, + { + "epoch": 3.75, + "learning_rate": 3.167560321715818e-05, + "loss": 0.0454, + "step": 1417 + }, + { + "epoch": 3.75, + "learning_rate": 3.166219839142091e-05, + "loss": 0.222, + "step": 1418 + }, + { + "epoch": 3.75, + "learning_rate": 3.164879356568365e-05, + "loss": 0.0225, + "step": 1419 + }, + { + "epoch": 3.76, + "learning_rate": 3.163538873994638e-05, + "loss": 0.2599, + "step": 1420 + }, + { + "epoch": 3.76, + "learning_rate": 3.162198391420912e-05, + "loss": 0.2343, + "step": 1421 + }, + { + "epoch": 3.76, + "learning_rate": 3.160857908847185e-05, + "loss": 0.0274, + "step": 1422 + }, + { + "epoch": 3.76, + "learning_rate": 3.1595174262734585e-05, + "loss": 0.0109, + "step": 1423 + }, + { + "epoch": 3.77, + "learning_rate": 3.158176943699732e-05, + "loss": 0.012, + "step": 1424 + }, + { + "epoch": 3.77, + "learning_rate": 3.1568364611260055e-05, + "loss": 0.0267, + "step": 1425 + }, + { + "epoch": 3.77, + "learning_rate": 3.155495978552279e-05, + "loss": 0.0116, + "step": 1426 + }, + { + "epoch": 3.78, + "learning_rate": 3.1541554959785525e-05, + "loss": 0.2563, + "step": 1427 + }, + { + "epoch": 3.78, + "learning_rate": 3.1528150134048253e-05, + "loss": 0.2149, + "step": 1428 + }, + { + "epoch": 3.78, + "learning_rate": 3.1514745308310995e-05, + "loss": 0.2099, + "step": 1429 + }, + { + "epoch": 3.78, + "learning_rate": 3.1501340482573724e-05, + "loss": 0.1445, + "step": 1430 + }, + { + "epoch": 3.79, + "learning_rate": 3.1487935656836466e-05, + "loss": 0.0069, + "step": 1431 + }, + { + "epoch": 3.79, + "learning_rate": 3.1474530831099194e-05, + "loss": 0.3583, + "step": 1432 + }, + { + "epoch": 3.79, + "learning_rate": 3.146112600536193e-05, + "loss": 0.1112, + "step": 1433 + }, + { + "epoch": 3.79, + "learning_rate": 3.1447721179624664e-05, + "loss": 0.5379, + "step": 1434 + }, + { + "epoch": 3.8, + "learning_rate": 3.14343163538874e-05, + "loss": 0.0248, + "step": 1435 + }, + { + "epoch": 3.8, + "learning_rate": 3.1420911528150135e-05, + "loss": 0.0255, + "step": 1436 + }, + { + "epoch": 3.8, + "learning_rate": 3.140750670241287e-05, + "loss": 0.3363, + "step": 1437 + }, + { + "epoch": 3.8, + "learning_rate": 3.1394101876675605e-05, + "loss": 0.2952, + "step": 1438 + }, + { + "epoch": 3.81, + "learning_rate": 3.138069705093834e-05, + "loss": 0.0337, + "step": 1439 + }, + { + "epoch": 3.81, + "learning_rate": 3.1367292225201075e-05, + "loss": 0.0157, + "step": 1440 + }, + { + "epoch": 3.81, + "learning_rate": 3.135388739946381e-05, + "loss": 0.0204, + "step": 1441 + }, + { + "epoch": 3.81, + "learning_rate": 3.1340482573726545e-05, + "loss": 0.7707, + "step": 1442 + }, + { + "epoch": 3.82, + "learning_rate": 3.1327077747989274e-05, + "loss": 0.4232, + "step": 1443 + }, + { + "epoch": 3.82, + "learning_rate": 3.1313672922252016e-05, + "loss": 0.116, + "step": 1444 + }, + { + "epoch": 3.82, + "learning_rate": 3.1300268096514744e-05, + "loss": 0.421, + "step": 1445 + }, + { + "epoch": 3.83, + "learning_rate": 3.1286863270777486e-05, + "loss": 0.0267, + "step": 1446 + }, + { + "epoch": 3.83, + "learning_rate": 3.1273458445040214e-05, + "loss": 0.0078, + "step": 1447 + }, + { + "epoch": 3.83, + "learning_rate": 3.126005361930295e-05, + "loss": 0.0996, + "step": 1448 + }, + { + "epoch": 3.83, + "learning_rate": 3.1246648793565684e-05, + "loss": 0.0389, + "step": 1449 + }, + { + "epoch": 3.84, + "learning_rate": 3.123324396782842e-05, + "loss": 0.0482, + "step": 1450 + }, + { + "epoch": 3.84, + "learning_rate": 3.1219839142091155e-05, + "loss": 0.0053, + "step": 1451 + }, + { + "epoch": 3.84, + "learning_rate": 3.120643431635389e-05, + "loss": 0.0153, + "step": 1452 + }, + { + "epoch": 3.84, + "learning_rate": 3.119302949061662e-05, + "loss": 0.008, + "step": 1453 + }, + { + "epoch": 3.85, + "learning_rate": 3.117962466487936e-05, + "loss": 0.0166, + "step": 1454 + }, + { + "epoch": 3.85, + "learning_rate": 3.116621983914209e-05, + "loss": 0.0889, + "step": 1455 + }, + { + "epoch": 3.85, + "learning_rate": 3.115281501340483e-05, + "loss": 0.0695, + "step": 1456 + }, + { + "epoch": 3.85, + "learning_rate": 3.113941018766756e-05, + "loss": 0.3353, + "step": 1457 + }, + { + "epoch": 3.86, + "learning_rate": 3.1126005361930294e-05, + "loss": 0.0729, + "step": 1458 + }, + { + "epoch": 3.86, + "learning_rate": 3.111260053619303e-05, + "loss": 0.0187, + "step": 1459 + }, + { + "epoch": 3.86, + "learning_rate": 3.1099195710455764e-05, + "loss": 0.2512, + "step": 1460 + }, + { + "epoch": 3.87, + "learning_rate": 3.10857908847185e-05, + "loss": 0.3837, + "step": 1461 + }, + { + "epoch": 3.87, + "learning_rate": 3.1072386058981234e-05, + "loss": 0.2543, + "step": 1462 + }, + { + "epoch": 3.87, + "learning_rate": 3.105898123324397e-05, + "loss": 0.1797, + "step": 1463 + }, + { + "epoch": 3.87, + "learning_rate": 3.1045576407506705e-05, + "loss": 0.3097, + "step": 1464 + }, + { + "epoch": 3.88, + "learning_rate": 3.103217158176944e-05, + "loss": 0.268, + "step": 1465 + }, + { + "epoch": 3.88, + "learning_rate": 3.1018766756032175e-05, + "loss": 0.1773, + "step": 1466 + }, + { + "epoch": 3.88, + "learning_rate": 3.100536193029491e-05, + "loss": 0.2055, + "step": 1467 + }, + { + "epoch": 3.88, + "learning_rate": 3.099195710455764e-05, + "loss": 0.0279, + "step": 1468 + }, + { + "epoch": 3.89, + "learning_rate": 3.097855227882038e-05, + "loss": 0.1263, + "step": 1469 + }, + { + "epoch": 3.89, + "learning_rate": 3.096514745308311e-05, + "loss": 0.0449, + "step": 1470 + }, + { + "epoch": 3.89, + "learning_rate": 3.095174262734585e-05, + "loss": 0.2429, + "step": 1471 + }, + { + "epoch": 3.89, + "learning_rate": 3.093833780160858e-05, + "loss": 0.1245, + "step": 1472 + }, + { + "epoch": 3.9, + "learning_rate": 3.0924932975871314e-05, + "loss": 0.1303, + "step": 1473 + }, + { + "epoch": 3.9, + "learning_rate": 3.091152815013405e-05, + "loss": 0.0303, + "step": 1474 + }, + { + "epoch": 3.9, + "learning_rate": 3.0898123324396784e-05, + "loss": 0.3279, + "step": 1475 + }, + { + "epoch": 3.9, + "learning_rate": 3.088471849865952e-05, + "loss": 0.134, + "step": 1476 + }, + { + "epoch": 3.91, + "learning_rate": 3.0871313672922255e-05, + "loss": 0.5138, + "step": 1477 + }, + { + "epoch": 3.91, + "learning_rate": 3.085790884718498e-05, + "loss": 0.0476, + "step": 1478 + }, + { + "epoch": 3.91, + "learning_rate": 3.0844504021447725e-05, + "loss": 0.1956, + "step": 1479 + }, + { + "epoch": 3.92, + "learning_rate": 3.083109919571045e-05, + "loss": 0.2061, + "step": 1480 + }, + { + "epoch": 3.92, + "learning_rate": 3.0817694369973195e-05, + "loss": 0.269, + "step": 1481 + }, + { + "epoch": 3.92, + "learning_rate": 3.0804289544235923e-05, + "loss": 0.0708, + "step": 1482 + }, + { + "epoch": 3.92, + "learning_rate": 3.0790884718498665e-05, + "loss": 0.0389, + "step": 1483 + }, + { + "epoch": 3.93, + "learning_rate": 3.0777479892761394e-05, + "loss": 0.2566, + "step": 1484 + }, + { + "epoch": 3.93, + "learning_rate": 3.076407506702413e-05, + "loss": 0.0581, + "step": 1485 + }, + { + "epoch": 3.93, + "learning_rate": 3.0750670241286864e-05, + "loss": 0.1527, + "step": 1486 + }, + { + "epoch": 3.93, + "learning_rate": 3.07372654155496e-05, + "loss": 0.3963, + "step": 1487 + }, + { + "epoch": 3.94, + "learning_rate": 3.0723860589812334e-05, + "loss": 0.2241, + "step": 1488 + }, + { + "epoch": 3.94, + "learning_rate": 3.071045576407507e-05, + "loss": 0.1275, + "step": 1489 + }, + { + "epoch": 3.94, + "learning_rate": 3.0697050938337804e-05, + "loss": 0.3148, + "step": 1490 + }, + { + "epoch": 3.94, + "learning_rate": 3.068364611260054e-05, + "loss": 0.1474, + "step": 1491 + }, + { + "epoch": 3.95, + "learning_rate": 3.0670241286863275e-05, + "loss": 0.0233, + "step": 1492 + }, + { + "epoch": 3.95, + "learning_rate": 3.065683646112601e-05, + "loss": 0.1721, + "step": 1493 + }, + { + "epoch": 3.95, + "learning_rate": 3.0643431635388745e-05, + "loss": 0.6024, + "step": 1494 + }, + { + "epoch": 3.96, + "learning_rate": 3.063002680965147e-05, + "loss": 0.1425, + "step": 1495 + }, + { + "epoch": 3.96, + "learning_rate": 3.0616621983914215e-05, + "loss": 0.0311, + "step": 1496 + }, + { + "epoch": 3.96, + "learning_rate": 3.0603217158176944e-05, + "loss": 0.0197, + "step": 1497 + }, + { + "epoch": 3.96, + "learning_rate": 3.0589812332439686e-05, + "loss": 0.0406, + "step": 1498 + }, + { + "epoch": 3.97, + "learning_rate": 3.0576407506702414e-05, + "loss": 0.054, + "step": 1499 + }, + { + "epoch": 3.97, + "learning_rate": 3.056300268096515e-05, + "loss": 0.161, + "step": 1500 + }, + { + "epoch": 3.97, + "learning_rate": 3.0549597855227884e-05, + "loss": 0.0549, + "step": 1501 + }, + { + "epoch": 3.97, + "learning_rate": 3.053619302949062e-05, + "loss": 0.1667, + "step": 1502 + }, + { + "epoch": 3.98, + "learning_rate": 3.0522788203753354e-05, + "loss": 0.1264, + "step": 1503 + }, + { + "epoch": 3.98, + "learning_rate": 3.0509383378016086e-05, + "loss": 0.0133, + "step": 1504 + }, + { + "epoch": 3.98, + "learning_rate": 3.049597855227882e-05, + "loss": 0.0655, + "step": 1505 + }, + { + "epoch": 3.98, + "learning_rate": 3.0482573726541556e-05, + "loss": 0.1054, + "step": 1506 + }, + { + "epoch": 3.99, + "learning_rate": 3.046916890080429e-05, + "loss": 0.0053, + "step": 1507 + }, + { + "epoch": 3.99, + "learning_rate": 3.0455764075067027e-05, + "loss": 0.0347, + "step": 1508 + }, + { + "epoch": 3.99, + "learning_rate": 3.0442359249329762e-05, + "loss": 0.6095, + "step": 1509 + }, + { + "epoch": 3.99, + "learning_rate": 3.0428954423592494e-05, + "loss": 0.1339, + "step": 1510 + }, + { + "epoch": 4.0, + "learning_rate": 3.0415549597855232e-05, + "loss": 0.0088, + "step": 1511 + }, + { + "epoch": 4.0, + "learning_rate": 3.0402144772117964e-05, + "loss": 0.4356, + "step": 1512 + }, + { + "epoch": 4.0, + "eval_f1": 0.7822580645161291, + "eval_loss": 0.6966613531112671, + "eval_runtime": 1.8703, + "eval_samples_per_second": 808.957, + "eval_steps_per_second": 50.794, + "step": 1512 + } + ], + "max_steps": 3780, + "num_train_epochs": 10, + "total_flos": 387814867990272.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1512/training_args.bin b/checkpoint-1512/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e04ed002938f760694506615e2c2b7be439a9c1 --- /dev/null +++ b/checkpoint-1512/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c01d8e107b4a20c0ba0f3692dae4e25d8f1dffe1d23d6e4f4bdf92b87ab5ea +size 3899 diff --git a/checkpoint-1890/config.json b/checkpoint-1890/config.json new file mode 100644 index 0000000000000000000000000000000000000000..364156e83c34ba8c6fcc66e875a05b1d1a9b4821 --- /dev/null +++ b/checkpoint-1890/config.json @@ -0,0 +1,30 @@ +{ + "_name_or_path": "distilbert-base-cased", + "activation": "gelu", + "architectures": [ + "DistilBertForSequenceClassification" + ], + "attention_dropout": 0.1, + "dim": 768, + "dropout": 0.1, + "hidden_dim": 3072, + "id2label": { + "0": "NO DISASTER", + "1": "DISASTER" + }, + "initializer_range": 0.02, + "max_position_embeddings": 512, + "model_type": "distilbert", + "n_heads": 12, + "n_layers": 6, + "output_past": true, + "pad_token_id": 0, + "problem_type": "single_label_classification", + "qa_dropout": 0.1, + "seq_classif_dropout": 0.2, + "sinusoidal_pos_embds": false, + "tie_weights_": true, + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "vocab_size": 28996 +} diff --git a/checkpoint-1890/optimizer.pt b/checkpoint-1890/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a1218cd806aeb8964e3f4b3ff14b554f9945bda0 --- /dev/null +++ b/checkpoint-1890/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d678854dbfdcfa8b908c9332ac25c1f8ef0c3ff62cfa32314a14fdeba3398df +size 526325317 diff --git a/checkpoint-1890/pytorch_model.bin b/checkpoint-1890/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..2cb1d439627f710955e15443ad0ec02c06d16ef8 --- /dev/null +++ b/checkpoint-1890/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f57911e1cd736481c049e1f00b39d9c6f9570bb5e62277d1525fea2092d3ff61 +size 263167661 diff --git a/checkpoint-1890/rng_state.pth b/checkpoint-1890/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2437e9d55d63f9f268b5436998d6276ff8b9fd96 --- /dev/null +++ b/checkpoint-1890/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4736e2b29a3bb716eb6278813e759366aca017726091b3748ed8a0cbd6fca255 +size 14575 diff --git a/checkpoint-1890/scheduler.pt b/checkpoint-1890/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a8d557155982f6f55a9c893c3c0f6c41634d9eea --- /dev/null +++ b/checkpoint-1890/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04b47afbdd02c26529bafa5013aad97cbb6f1bc4bbbff15fd4376d3ffddaeab4 +size 627 diff --git a/checkpoint-1890/trainer_state.json b/checkpoint-1890/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cbbc868d07b92af4d2b6a443a02948a2b082493f --- /dev/null +++ b/checkpoint-1890/trainer_state.json @@ -0,0 +1,11401 @@ +{ + "best_metric": 0.40209120512008667, + "best_model_checkpoint": "./disaster-tweet-distilbert-classification/checkpoint-378", + "epoch": 5.0, + "global_step": 1890, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.7503, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.7789, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.7344, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 0.7709, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 5e-06, + "loss": 0.6884, + "step": 5 + }, + { + "epoch": 0.02, + "learning_rate": 6e-06, + "loss": 0.7087, + "step": 6 + }, + { + "epoch": 0.02, + "learning_rate": 7.000000000000001e-06, + "loss": 0.6655, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6978, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 9e-06, + "loss": 0.7435, + "step": 9 + }, + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 0.719, + "step": 10 + }, + { + "epoch": 0.03, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.7129, + "step": 11 + }, + { + "epoch": 0.03, + "learning_rate": 1.2e-05, + "loss": 0.7249, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.7436, + "step": 13 + }, + { + "epoch": 0.04, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.6886, + "step": 14 + }, + { + "epoch": 0.04, + "learning_rate": 1.5e-05, + "loss": 0.702, + "step": 15 + }, + { + "epoch": 0.04, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7105, + "step": 16 + }, + { + "epoch": 0.04, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.6709, + "step": 17 + }, + { + "epoch": 0.05, + "learning_rate": 1.8e-05, + "loss": 0.6767, + "step": 18 + }, + { + "epoch": 0.05, + "learning_rate": 1.9e-05, + "loss": 0.6784, + "step": 19 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 0.7016, + "step": 20 + }, + { + "epoch": 0.06, + "learning_rate": 2.1e-05, + "loss": 0.6308, + "step": 21 + }, + { + "epoch": 0.06, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.645, + "step": 22 + }, + { + "epoch": 0.06, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.6845, + "step": 23 + }, + { + "epoch": 0.06, + "learning_rate": 2.4e-05, + "loss": 0.6891, + "step": 24 + }, + { + "epoch": 0.07, + "learning_rate": 2.5e-05, + "loss": 0.5914, + "step": 25 + }, + { + "epoch": 0.07, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.6916, + "step": 26 + }, + { + "epoch": 0.07, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.5884, + "step": 27 + }, + { + "epoch": 0.07, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6587, + "step": 28 + }, + { + "epoch": 0.08, + "learning_rate": 2.9e-05, + "loss": 0.6008, + "step": 29 + }, + { + "epoch": 0.08, + "learning_rate": 3e-05, + "loss": 0.6717, + "step": 30 + }, + { + "epoch": 0.08, + "learning_rate": 3.1e-05, + "loss": 0.5795, + "step": 31 + }, + { + "epoch": 0.08, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6358, + "step": 32 + }, + { + "epoch": 0.09, + "learning_rate": 3.3e-05, + "loss": 0.7508, + "step": 33 + }, + { + "epoch": 0.09, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.6601, + "step": 34 + }, + { + "epoch": 0.09, + "learning_rate": 3.5e-05, + "loss": 0.6573, + "step": 35 + }, + { + "epoch": 0.1, + "learning_rate": 3.6e-05, + "loss": 0.5695, + "step": 36 + }, + { + "epoch": 0.1, + "learning_rate": 3.7e-05, + "loss": 0.5535, + "step": 37 + }, + { + "epoch": 0.1, + "learning_rate": 3.8e-05, + "loss": 0.5813, + "step": 38 + }, + { + "epoch": 0.1, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.5224, + "step": 39 + }, + { + "epoch": 0.11, + "learning_rate": 4e-05, + "loss": 0.4757, + "step": 40 + }, + { + "epoch": 0.11, + "learning_rate": 4.1e-05, + "loss": 0.5529, + "step": 41 + }, + { + "epoch": 0.11, + "learning_rate": 4.2e-05, + "loss": 0.4964, + "step": 42 + }, + { + "epoch": 0.11, + "learning_rate": 4.3e-05, + "loss": 0.4565, + "step": 43 + }, + { + "epoch": 0.12, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.5848, + "step": 44 + }, + { + "epoch": 0.12, + "learning_rate": 4.5e-05, + "loss": 0.7333, + "step": 45 + }, + { + "epoch": 0.12, + "learning_rate": 4.600000000000001e-05, + "loss": 0.5224, + "step": 46 + }, + { + "epoch": 0.12, + "learning_rate": 4.7e-05, + "loss": 0.4826, + "step": 47 + }, + { + "epoch": 0.13, + "learning_rate": 4.8e-05, + "loss": 0.4328, + "step": 48 + }, + { + "epoch": 0.13, + "learning_rate": 4.9e-05, + "loss": 0.2546, + "step": 49 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.2636, + "step": 50 + }, + { + "epoch": 0.13, + "learning_rate": 4.998659517426274e-05, + "loss": 0.4116, + "step": 51 + }, + { + "epoch": 0.14, + "learning_rate": 4.997319034852547e-05, + "loss": 0.3191, + "step": 52 + }, + { + "epoch": 0.14, + "learning_rate": 4.995978552278821e-05, + "loss": 0.2822, + "step": 53 + }, + { + "epoch": 0.14, + "learning_rate": 4.994638069705094e-05, + "loss": 0.8821, + "step": 54 + }, + { + "epoch": 0.15, + "learning_rate": 4.993297587131368e-05, + "loss": 0.483, + "step": 55 + }, + { + "epoch": 0.15, + "learning_rate": 4.9919571045576406e-05, + "loss": 0.7164, + "step": 56 + }, + { + "epoch": 0.15, + "learning_rate": 4.990616621983915e-05, + "loss": 0.4161, + "step": 57 + }, + { + "epoch": 0.15, + "learning_rate": 4.989276139410188e-05, + "loss": 0.2668, + "step": 58 + }, + { + "epoch": 0.16, + "learning_rate": 4.987935656836462e-05, + "loss": 0.5255, + "step": 59 + }, + { + "epoch": 0.16, + "learning_rate": 4.986595174262735e-05, + "loss": 0.3784, + "step": 60 + }, + { + "epoch": 0.16, + "learning_rate": 4.985254691689008e-05, + "loss": 0.5065, + "step": 61 + }, + { + "epoch": 0.16, + "learning_rate": 4.983914209115282e-05, + "loss": 0.1988, + "step": 62 + }, + { + "epoch": 0.17, + "learning_rate": 4.982573726541555e-05, + "loss": 0.4362, + "step": 63 + }, + { + "epoch": 0.17, + "learning_rate": 4.981233243967829e-05, + "loss": 0.6619, + "step": 64 + }, + { + "epoch": 0.17, + "learning_rate": 4.979892761394102e-05, + "loss": 0.3217, + "step": 65 + }, + { + "epoch": 0.17, + "learning_rate": 4.978552278820375e-05, + "loss": 0.2967, + "step": 66 + }, + { + "epoch": 0.18, + "learning_rate": 4.977211796246649e-05, + "loss": 0.2429, + "step": 67 + }, + { + "epoch": 0.18, + "learning_rate": 4.975871313672922e-05, + "loss": 0.6642, + "step": 68 + }, + { + "epoch": 0.18, + "learning_rate": 4.974530831099196e-05, + "loss": 0.56, + "step": 69 + }, + { + "epoch": 0.19, + "learning_rate": 4.973190348525469e-05, + "loss": 1.2979, + "step": 70 + }, + { + "epoch": 0.19, + "learning_rate": 4.9718498659517427e-05, + "loss": 0.5287, + "step": 71 + }, + { + "epoch": 0.19, + "learning_rate": 4.970509383378016e-05, + "loss": 0.4684, + "step": 72 + }, + { + "epoch": 0.19, + "learning_rate": 4.96916890080429e-05, + "loss": 0.283, + "step": 73 + }, + { + "epoch": 0.2, + "learning_rate": 4.967828418230563e-05, + "loss": 0.6818, + "step": 74 + }, + { + "epoch": 0.2, + "learning_rate": 4.966487935656837e-05, + "loss": 0.6141, + "step": 75 + }, + { + "epoch": 0.2, + "learning_rate": 4.96514745308311e-05, + "loss": 0.5046, + "step": 76 + }, + { + "epoch": 0.2, + "learning_rate": 4.963806970509384e-05, + "loss": 0.5266, + "step": 77 + }, + { + "epoch": 0.21, + "learning_rate": 4.962466487935657e-05, + "loss": 0.5944, + "step": 78 + }, + { + "epoch": 0.21, + "learning_rate": 4.961126005361931e-05, + "loss": 0.5631, + "step": 79 + }, + { + "epoch": 0.21, + "learning_rate": 4.959785522788204e-05, + "loss": 0.4791, + "step": 80 + }, + { + "epoch": 0.21, + "learning_rate": 4.958445040214477e-05, + "loss": 0.5645, + "step": 81 + }, + { + "epoch": 0.22, + "learning_rate": 4.957104557640751e-05, + "loss": 0.4349, + "step": 82 + }, + { + "epoch": 0.22, + "learning_rate": 4.955764075067024e-05, + "loss": 0.3865, + "step": 83 + }, + { + "epoch": 0.22, + "learning_rate": 4.954423592493298e-05, + "loss": 0.486, + "step": 84 + }, + { + "epoch": 0.22, + "learning_rate": 4.953083109919571e-05, + "loss": 0.2179, + "step": 85 + }, + { + "epoch": 0.23, + "learning_rate": 4.951742627345845e-05, + "loss": 0.3896, + "step": 86 + }, + { + "epoch": 0.23, + "learning_rate": 4.950402144772118e-05, + "loss": 0.4247, + "step": 87 + }, + { + "epoch": 0.23, + "learning_rate": 4.949061662198392e-05, + "loss": 0.4906, + "step": 88 + }, + { + "epoch": 0.24, + "learning_rate": 4.947721179624665e-05, + "loss": 0.4483, + "step": 89 + }, + { + "epoch": 0.24, + "learning_rate": 4.946380697050939e-05, + "loss": 0.557, + "step": 90 + }, + { + "epoch": 0.24, + "learning_rate": 4.9450402144772116e-05, + "loss": 0.7521, + "step": 91 + }, + { + "epoch": 0.24, + "learning_rate": 4.943699731903486e-05, + "loss": 0.3103, + "step": 92 + }, + { + "epoch": 0.25, + "learning_rate": 4.9423592493297586e-05, + "loss": 0.757, + "step": 93 + }, + { + "epoch": 0.25, + "learning_rate": 4.941018766756033e-05, + "loss": 0.8248, + "step": 94 + }, + { + "epoch": 0.25, + "learning_rate": 4.9396782841823056e-05, + "loss": 0.4591, + "step": 95 + }, + { + "epoch": 0.25, + "learning_rate": 4.938337801608579e-05, + "loss": 0.3912, + "step": 96 + }, + { + "epoch": 0.26, + "learning_rate": 4.9369973190348526e-05, + "loss": 0.5289, + "step": 97 + }, + { + "epoch": 0.26, + "learning_rate": 4.935656836461126e-05, + "loss": 0.3264, + "step": 98 + }, + { + "epoch": 0.26, + "learning_rate": 4.9343163538874e-05, + "loss": 0.2947, + "step": 99 + }, + { + "epoch": 0.26, + "learning_rate": 4.932975871313673e-05, + "loss": 0.2647, + "step": 100 + }, + { + "epoch": 0.27, + "learning_rate": 4.931635388739946e-05, + "loss": 0.3691, + "step": 101 + }, + { + "epoch": 0.27, + "learning_rate": 4.93029490616622e-05, + "loss": 0.4796, + "step": 102 + }, + { + "epoch": 0.27, + "learning_rate": 4.928954423592493e-05, + "loss": 0.4827, + "step": 103 + }, + { + "epoch": 0.28, + "learning_rate": 4.927613941018767e-05, + "loss": 0.2672, + "step": 104 + }, + { + "epoch": 0.28, + "learning_rate": 4.92627345844504e-05, + "loss": 0.7456, + "step": 105 + }, + { + "epoch": 0.28, + "learning_rate": 4.9249329758713136e-05, + "loss": 0.5206, + "step": 106 + }, + { + "epoch": 0.28, + "learning_rate": 4.923592493297587e-05, + "loss": 0.3576, + "step": 107 + }, + { + "epoch": 0.29, + "learning_rate": 4.9222520107238606e-05, + "loss": 0.2596, + "step": 108 + }, + { + "epoch": 0.29, + "learning_rate": 4.920911528150134e-05, + "loss": 0.4115, + "step": 109 + }, + { + "epoch": 0.29, + "learning_rate": 4.9195710455764076e-05, + "loss": 0.3481, + "step": 110 + }, + { + "epoch": 0.29, + "learning_rate": 4.918230563002681e-05, + "loss": 0.4387, + "step": 111 + }, + { + "epoch": 0.3, + "learning_rate": 4.916890080428955e-05, + "loss": 0.5023, + "step": 112 + }, + { + "epoch": 0.3, + "learning_rate": 4.915549597855228e-05, + "loss": 0.5916, + "step": 113 + }, + { + "epoch": 0.3, + "learning_rate": 4.914209115281502e-05, + "loss": 0.5467, + "step": 114 + }, + { + "epoch": 0.3, + "learning_rate": 4.912868632707775e-05, + "loss": 0.5631, + "step": 115 + }, + { + "epoch": 0.31, + "learning_rate": 4.911528150134049e-05, + "loss": 0.5512, + "step": 116 + }, + { + "epoch": 0.31, + "learning_rate": 4.910187667560322e-05, + "loss": 0.5546, + "step": 117 + }, + { + "epoch": 0.31, + "learning_rate": 4.908847184986595e-05, + "loss": 0.4209, + "step": 118 + }, + { + "epoch": 0.31, + "learning_rate": 4.907506702412869e-05, + "loss": 0.6064, + "step": 119 + }, + { + "epoch": 0.32, + "learning_rate": 4.906166219839142e-05, + "loss": 0.5301, + "step": 120 + }, + { + "epoch": 0.32, + "learning_rate": 4.904825737265416e-05, + "loss": 0.436, + "step": 121 + }, + { + "epoch": 0.32, + "learning_rate": 4.903485254691689e-05, + "loss": 0.726, + "step": 122 + }, + { + "epoch": 0.33, + "learning_rate": 4.9021447721179626e-05, + "loss": 0.5288, + "step": 123 + }, + { + "epoch": 0.33, + "learning_rate": 4.900804289544236e-05, + "loss": 0.4887, + "step": 124 + }, + { + "epoch": 0.33, + "learning_rate": 4.8994638069705097e-05, + "loss": 0.3752, + "step": 125 + }, + { + "epoch": 0.33, + "learning_rate": 4.898123324396783e-05, + "loss": 0.4895, + "step": 126 + }, + { + "epoch": 0.34, + "learning_rate": 4.896782841823057e-05, + "loss": 0.5046, + "step": 127 + }, + { + "epoch": 0.34, + "learning_rate": 4.8954423592493295e-05, + "loss": 0.3953, + "step": 128 + }, + { + "epoch": 0.34, + "learning_rate": 4.894101876675604e-05, + "loss": 0.2015, + "step": 129 + }, + { + "epoch": 0.34, + "learning_rate": 4.8927613941018765e-05, + "loss": 0.5165, + "step": 130 + }, + { + "epoch": 0.35, + "learning_rate": 4.891420911528151e-05, + "loss": 0.4237, + "step": 131 + }, + { + "epoch": 0.35, + "learning_rate": 4.8900804289544236e-05, + "loss": 0.239, + "step": 132 + }, + { + "epoch": 0.35, + "learning_rate": 4.888739946380697e-05, + "loss": 0.5515, + "step": 133 + }, + { + "epoch": 0.35, + "learning_rate": 4.8873994638069706e-05, + "loss": 0.303, + "step": 134 + }, + { + "epoch": 0.36, + "learning_rate": 4.886058981233244e-05, + "loss": 0.2867, + "step": 135 + }, + { + "epoch": 0.36, + "learning_rate": 4.8847184986595176e-05, + "loss": 0.6756, + "step": 136 + }, + { + "epoch": 0.36, + "learning_rate": 4.883378016085791e-05, + "loss": 0.4996, + "step": 137 + }, + { + "epoch": 0.37, + "learning_rate": 4.8820375335120646e-05, + "loss": 0.2798, + "step": 138 + }, + { + "epoch": 0.37, + "learning_rate": 4.880697050938338e-05, + "loss": 0.8877, + "step": 139 + }, + { + "epoch": 0.37, + "learning_rate": 4.879356568364612e-05, + "loss": 0.5022, + "step": 140 + }, + { + "epoch": 0.37, + "learning_rate": 4.878016085790885e-05, + "loss": 0.2177, + "step": 141 + }, + { + "epoch": 0.38, + "learning_rate": 4.876675603217159e-05, + "loss": 0.8708, + "step": 142 + }, + { + "epoch": 0.38, + "learning_rate": 4.8753351206434315e-05, + "loss": 0.719, + "step": 143 + }, + { + "epoch": 0.38, + "learning_rate": 4.873994638069706e-05, + "loss": 0.562, + "step": 144 + }, + { + "epoch": 0.38, + "learning_rate": 4.8726541554959786e-05, + "loss": 0.492, + "step": 145 + }, + { + "epoch": 0.39, + "learning_rate": 4.871313672922253e-05, + "loss": 0.4637, + "step": 146 + }, + { + "epoch": 0.39, + "learning_rate": 4.8699731903485256e-05, + "loss": 0.4132, + "step": 147 + }, + { + "epoch": 0.39, + "learning_rate": 4.868632707774799e-05, + "loss": 0.2889, + "step": 148 + }, + { + "epoch": 0.39, + "learning_rate": 4.8672922252010726e-05, + "loss": 0.2213, + "step": 149 + }, + { + "epoch": 0.4, + "learning_rate": 4.865951742627346e-05, + "loss": 0.9268, + "step": 150 + }, + { + "epoch": 0.4, + "learning_rate": 4.8646112600536196e-05, + "loss": 0.2852, + "step": 151 + }, + { + "epoch": 0.4, + "learning_rate": 4.863270777479893e-05, + "loss": 0.4599, + "step": 152 + }, + { + "epoch": 0.4, + "learning_rate": 4.861930294906166e-05, + "loss": 0.1913, + "step": 153 + }, + { + "epoch": 0.41, + "learning_rate": 4.86058981233244e-05, + "loss": 0.4488, + "step": 154 + }, + { + "epoch": 0.41, + "learning_rate": 4.859249329758713e-05, + "loss": 0.9022, + "step": 155 + }, + { + "epoch": 0.41, + "learning_rate": 4.857908847184987e-05, + "loss": 0.5221, + "step": 156 + }, + { + "epoch": 0.42, + "learning_rate": 4.85656836461126e-05, + "loss": 0.2394, + "step": 157 + }, + { + "epoch": 0.42, + "learning_rate": 4.8552278820375336e-05, + "loss": 0.3332, + "step": 158 + }, + { + "epoch": 0.42, + "learning_rate": 4.853887399463807e-05, + "loss": 0.4015, + "step": 159 + }, + { + "epoch": 0.42, + "learning_rate": 4.8525469168900806e-05, + "loss": 0.4461, + "step": 160 + }, + { + "epoch": 0.43, + "learning_rate": 4.851206434316354e-05, + "loss": 0.337, + "step": 161 + }, + { + "epoch": 0.43, + "learning_rate": 4.8498659517426276e-05, + "loss": 0.4908, + "step": 162 + }, + { + "epoch": 0.43, + "learning_rate": 4.848525469168901e-05, + "loss": 0.526, + "step": 163 + }, + { + "epoch": 0.43, + "learning_rate": 4.8471849865951746e-05, + "loss": 0.5262, + "step": 164 + }, + { + "epoch": 0.44, + "learning_rate": 4.845844504021448e-05, + "loss": 0.6818, + "step": 165 + }, + { + "epoch": 0.44, + "learning_rate": 4.8445040214477217e-05, + "loss": 0.3154, + "step": 166 + }, + { + "epoch": 0.44, + "learning_rate": 4.843163538873995e-05, + "loss": 0.5963, + "step": 167 + }, + { + "epoch": 0.44, + "learning_rate": 4.841823056300268e-05, + "loss": 0.4451, + "step": 168 + }, + { + "epoch": 0.45, + "learning_rate": 4.840482573726542e-05, + "loss": 0.5969, + "step": 169 + }, + { + "epoch": 0.45, + "learning_rate": 4.839142091152815e-05, + "loss": 0.438, + "step": 170 + }, + { + "epoch": 0.45, + "learning_rate": 4.837801608579089e-05, + "loss": 0.4827, + "step": 171 + }, + { + "epoch": 0.46, + "learning_rate": 4.836461126005362e-05, + "loss": 0.2029, + "step": 172 + }, + { + "epoch": 0.46, + "learning_rate": 4.8351206434316356e-05, + "loss": 0.5195, + "step": 173 + }, + { + "epoch": 0.46, + "learning_rate": 4.833780160857909e-05, + "loss": 0.517, + "step": 174 + }, + { + "epoch": 0.46, + "learning_rate": 4.8324396782841826e-05, + "loss": 0.5532, + "step": 175 + }, + { + "epoch": 0.47, + "learning_rate": 4.831099195710456e-05, + "loss": 0.4198, + "step": 176 + }, + { + "epoch": 0.47, + "learning_rate": 4.8297587131367296e-05, + "loss": 0.8386, + "step": 177 + }, + { + "epoch": 0.47, + "learning_rate": 4.8284182305630025e-05, + "loss": 0.575, + "step": 178 + }, + { + "epoch": 0.47, + "learning_rate": 4.8270777479892766e-05, + "loss": 0.6156, + "step": 179 + }, + { + "epoch": 0.48, + "learning_rate": 4.8257372654155495e-05, + "loss": 0.7044, + "step": 180 + }, + { + "epoch": 0.48, + "learning_rate": 4.824396782841824e-05, + "loss": 0.5712, + "step": 181 + }, + { + "epoch": 0.48, + "learning_rate": 4.8230563002680965e-05, + "loss": 0.34, + "step": 182 + }, + { + "epoch": 0.48, + "learning_rate": 4.82171581769437e-05, + "loss": 0.5773, + "step": 183 + }, + { + "epoch": 0.49, + "learning_rate": 4.8203753351206435e-05, + "loss": 0.546, + "step": 184 + }, + { + "epoch": 0.49, + "learning_rate": 4.819034852546917e-05, + "loss": 0.3955, + "step": 185 + }, + { + "epoch": 0.49, + "learning_rate": 4.8176943699731906e-05, + "loss": 0.5921, + "step": 186 + }, + { + "epoch": 0.49, + "learning_rate": 4.816353887399464e-05, + "loss": 0.3108, + "step": 187 + }, + { + "epoch": 0.5, + "learning_rate": 4.8150134048257376e-05, + "loss": 0.5469, + "step": 188 + }, + { + "epoch": 0.5, + "learning_rate": 4.813672922252011e-05, + "loss": 0.64, + "step": 189 + }, + { + "epoch": 0.5, + "learning_rate": 4.8123324396782846e-05, + "loss": 0.5153, + "step": 190 + }, + { + "epoch": 0.51, + "learning_rate": 4.810991957104558e-05, + "loss": 0.4719, + "step": 191 + }, + { + "epoch": 0.51, + "learning_rate": 4.8096514745308316e-05, + "loss": 0.52, + "step": 192 + }, + { + "epoch": 0.51, + "learning_rate": 4.8083109919571045e-05, + "loss": 0.5114, + "step": 193 + }, + { + "epoch": 0.51, + "learning_rate": 4.806970509383379e-05, + "loss": 0.5469, + "step": 194 + }, + { + "epoch": 0.52, + "learning_rate": 4.8056300268096515e-05, + "loss": 0.3435, + "step": 195 + }, + { + "epoch": 0.52, + "learning_rate": 4.804289544235926e-05, + "loss": 0.6469, + "step": 196 + }, + { + "epoch": 0.52, + "learning_rate": 4.8029490616621985e-05, + "loss": 0.6595, + "step": 197 + }, + { + "epoch": 0.52, + "learning_rate": 4.801608579088472e-05, + "loss": 0.5503, + "step": 198 + }, + { + "epoch": 0.53, + "learning_rate": 4.8002680965147456e-05, + "loss": 0.3799, + "step": 199 + }, + { + "epoch": 0.53, + "learning_rate": 4.798927613941019e-05, + "loss": 0.417, + "step": 200 + }, + { + "epoch": 0.53, + "learning_rate": 4.7975871313672926e-05, + "loss": 0.5281, + "step": 201 + }, + { + "epoch": 0.53, + "learning_rate": 4.796246648793566e-05, + "loss": 0.3439, + "step": 202 + }, + { + "epoch": 0.54, + "learning_rate": 4.794906166219839e-05, + "loss": 0.5777, + "step": 203 + }, + { + "epoch": 0.54, + "learning_rate": 4.793565683646113e-05, + "loss": 0.5286, + "step": 204 + }, + { + "epoch": 0.54, + "learning_rate": 4.792225201072386e-05, + "loss": 0.4302, + "step": 205 + }, + { + "epoch": 0.54, + "learning_rate": 4.79088471849866e-05, + "loss": 0.5413, + "step": 206 + }, + { + "epoch": 0.55, + "learning_rate": 4.789544235924933e-05, + "loss": 0.3087, + "step": 207 + }, + { + "epoch": 0.55, + "learning_rate": 4.7882037533512065e-05, + "loss": 0.4385, + "step": 208 + }, + { + "epoch": 0.55, + "learning_rate": 4.78686327077748e-05, + "loss": 0.7137, + "step": 209 + }, + { + "epoch": 0.56, + "learning_rate": 4.7855227882037535e-05, + "loss": 0.6278, + "step": 210 + }, + { + "epoch": 0.56, + "learning_rate": 4.784182305630027e-05, + "loss": 0.2832, + "step": 211 + }, + { + "epoch": 0.56, + "learning_rate": 4.7828418230563005e-05, + "loss": 0.5899, + "step": 212 + }, + { + "epoch": 0.56, + "learning_rate": 4.7815013404825734e-05, + "loss": 0.4421, + "step": 213 + }, + { + "epoch": 0.57, + "learning_rate": 4.7801608579088476e-05, + "loss": 0.3411, + "step": 214 + }, + { + "epoch": 0.57, + "learning_rate": 4.7788203753351204e-05, + "loss": 0.4236, + "step": 215 + }, + { + "epoch": 0.57, + "learning_rate": 4.7774798927613946e-05, + "loss": 0.4444, + "step": 216 + }, + { + "epoch": 0.57, + "learning_rate": 4.7761394101876674e-05, + "loss": 0.4765, + "step": 217 + }, + { + "epoch": 0.58, + "learning_rate": 4.774798927613941e-05, + "loss": 0.2453, + "step": 218 + }, + { + "epoch": 0.58, + "learning_rate": 4.7734584450402145e-05, + "loss": 0.2765, + "step": 219 + }, + { + "epoch": 0.58, + "learning_rate": 4.772117962466488e-05, + "loss": 0.2075, + "step": 220 + }, + { + "epoch": 0.58, + "learning_rate": 4.7707774798927615e-05, + "loss": 0.5905, + "step": 221 + }, + { + "epoch": 0.59, + "learning_rate": 4.769436997319035e-05, + "loss": 0.3457, + "step": 222 + }, + { + "epoch": 0.59, + "learning_rate": 4.7680965147453085e-05, + "loss": 0.5986, + "step": 223 + }, + { + "epoch": 0.59, + "learning_rate": 4.766756032171582e-05, + "loss": 0.3881, + "step": 224 + }, + { + "epoch": 0.6, + "learning_rate": 4.7654155495978555e-05, + "loss": 0.5655, + "step": 225 + }, + { + "epoch": 0.6, + "learning_rate": 4.764075067024129e-05, + "loss": 0.6156, + "step": 226 + }, + { + "epoch": 0.6, + "learning_rate": 4.7627345844504026e-05, + "loss": 0.4784, + "step": 227 + }, + { + "epoch": 0.6, + "learning_rate": 4.7613941018766754e-05, + "loss": 0.5195, + "step": 228 + }, + { + "epoch": 0.61, + "learning_rate": 4.7600536193029496e-05, + "loss": 0.66, + "step": 229 + }, + { + "epoch": 0.61, + "learning_rate": 4.7587131367292224e-05, + "loss": 0.4104, + "step": 230 + }, + { + "epoch": 0.61, + "learning_rate": 4.7573726541554966e-05, + "loss": 0.3478, + "step": 231 + }, + { + "epoch": 0.61, + "learning_rate": 4.7560321715817695e-05, + "loss": 0.2037, + "step": 232 + }, + { + "epoch": 0.62, + "learning_rate": 4.754691689008043e-05, + "loss": 0.75, + "step": 233 + }, + { + "epoch": 0.62, + "learning_rate": 4.7533512064343165e-05, + "loss": 0.4237, + "step": 234 + }, + { + "epoch": 0.62, + "learning_rate": 4.75201072386059e-05, + "loss": 0.2372, + "step": 235 + }, + { + "epoch": 0.62, + "learning_rate": 4.7506702412868635e-05, + "loss": 0.7874, + "step": 236 + }, + { + "epoch": 0.63, + "learning_rate": 4.749329758713137e-05, + "loss": 0.5751, + "step": 237 + }, + { + "epoch": 0.63, + "learning_rate": 4.7479892761394105e-05, + "loss": 0.5801, + "step": 238 + }, + { + "epoch": 0.63, + "learning_rate": 4.746648793565684e-05, + "loss": 0.4983, + "step": 239 + }, + { + "epoch": 0.63, + "learning_rate": 4.745308310991957e-05, + "loss": 0.4215, + "step": 240 + }, + { + "epoch": 0.64, + "learning_rate": 4.743967828418231e-05, + "loss": 0.3655, + "step": 241 + }, + { + "epoch": 0.64, + "learning_rate": 4.742627345844504e-05, + "loss": 0.523, + "step": 242 + }, + { + "epoch": 0.64, + "learning_rate": 4.741286863270778e-05, + "loss": 0.5952, + "step": 243 + }, + { + "epoch": 0.65, + "learning_rate": 4.739946380697051e-05, + "loss": 0.4226, + "step": 244 + }, + { + "epoch": 0.65, + "learning_rate": 4.7386058981233244e-05, + "loss": 0.316, + "step": 245 + }, + { + "epoch": 0.65, + "learning_rate": 4.737265415549598e-05, + "loss": 0.5096, + "step": 246 + }, + { + "epoch": 0.65, + "learning_rate": 4.7359249329758715e-05, + "loss": 0.435, + "step": 247 + }, + { + "epoch": 0.66, + "learning_rate": 4.734584450402145e-05, + "loss": 0.7516, + "step": 248 + }, + { + "epoch": 0.66, + "learning_rate": 4.7332439678284185e-05, + "loss": 0.531, + "step": 249 + }, + { + "epoch": 0.66, + "learning_rate": 4.731903485254692e-05, + "loss": 0.4096, + "step": 250 + }, + { + "epoch": 0.66, + "learning_rate": 4.7305630026809655e-05, + "loss": 0.387, + "step": 251 + }, + { + "epoch": 0.67, + "learning_rate": 4.729222520107239e-05, + "loss": 0.5468, + "step": 252 + }, + { + "epoch": 0.67, + "learning_rate": 4.7278820375335125e-05, + "loss": 0.4613, + "step": 253 + }, + { + "epoch": 0.67, + "learning_rate": 4.726541554959786e-05, + "loss": 0.8437, + "step": 254 + }, + { + "epoch": 0.67, + "learning_rate": 4.725201072386059e-05, + "loss": 0.522, + "step": 255 + }, + { + "epoch": 0.68, + "learning_rate": 4.723860589812333e-05, + "loss": 0.3922, + "step": 256 + }, + { + "epoch": 0.68, + "learning_rate": 4.722520107238606e-05, + "loss": 0.5114, + "step": 257 + }, + { + "epoch": 0.68, + "learning_rate": 4.72117962466488e-05, + "loss": 0.6148, + "step": 258 + }, + { + "epoch": 0.69, + "learning_rate": 4.719839142091153e-05, + "loss": 0.4578, + "step": 259 + }, + { + "epoch": 0.69, + "learning_rate": 4.7184986595174265e-05, + "loss": 0.6286, + "step": 260 + }, + { + "epoch": 0.69, + "learning_rate": 4.7171581769437e-05, + "loss": 0.5883, + "step": 261 + }, + { + "epoch": 0.69, + "learning_rate": 4.7158176943699735e-05, + "loss": 0.5634, + "step": 262 + }, + { + "epoch": 0.7, + "learning_rate": 4.714477211796247e-05, + "loss": 0.4085, + "step": 263 + }, + { + "epoch": 0.7, + "learning_rate": 4.7131367292225205e-05, + "loss": 0.2988, + "step": 264 + }, + { + "epoch": 0.7, + "learning_rate": 4.7117962466487934e-05, + "loss": 0.6353, + "step": 265 + }, + { + "epoch": 0.7, + "learning_rate": 4.7104557640750675e-05, + "loss": 0.4598, + "step": 266 + }, + { + "epoch": 0.71, + "learning_rate": 4.7091152815013404e-05, + "loss": 0.5072, + "step": 267 + }, + { + "epoch": 0.71, + "learning_rate": 4.7077747989276146e-05, + "loss": 0.49, + "step": 268 + }, + { + "epoch": 0.71, + "learning_rate": 4.7064343163538874e-05, + "loss": 0.7225, + "step": 269 + }, + { + "epoch": 0.71, + "learning_rate": 4.705093833780161e-05, + "loss": 0.5332, + "step": 270 + }, + { + "epoch": 0.72, + "learning_rate": 4.7037533512064344e-05, + "loss": 0.6064, + "step": 271 + }, + { + "epoch": 0.72, + "learning_rate": 4.702412868632708e-05, + "loss": 0.3518, + "step": 272 + }, + { + "epoch": 0.72, + "learning_rate": 4.7010723860589815e-05, + "loss": 0.3673, + "step": 273 + }, + { + "epoch": 0.72, + "learning_rate": 4.699731903485255e-05, + "loss": 0.4688, + "step": 274 + }, + { + "epoch": 0.73, + "learning_rate": 4.6983914209115285e-05, + "loss": 0.5389, + "step": 275 + }, + { + "epoch": 0.73, + "learning_rate": 4.697050938337802e-05, + "loss": 0.374, + "step": 276 + }, + { + "epoch": 0.73, + "learning_rate": 4.6957104557640755e-05, + "loss": 0.475, + "step": 277 + }, + { + "epoch": 0.74, + "learning_rate": 4.694369973190349e-05, + "loss": 0.5397, + "step": 278 + }, + { + "epoch": 0.74, + "learning_rate": 4.6930294906166225e-05, + "loss": 0.3821, + "step": 279 + }, + { + "epoch": 0.74, + "learning_rate": 4.6916890080428954e-05, + "loss": 0.3372, + "step": 280 + }, + { + "epoch": 0.74, + "learning_rate": 4.6903485254691696e-05, + "loss": 0.6652, + "step": 281 + }, + { + "epoch": 0.75, + "learning_rate": 4.6890080428954424e-05, + "loss": 0.2894, + "step": 282 + }, + { + "epoch": 0.75, + "learning_rate": 4.6876675603217166e-05, + "loss": 0.5639, + "step": 283 + }, + { + "epoch": 0.75, + "learning_rate": 4.6863270777479894e-05, + "loss": 0.353, + "step": 284 + }, + { + "epoch": 0.75, + "learning_rate": 4.684986595174263e-05, + "loss": 0.2932, + "step": 285 + }, + { + "epoch": 0.76, + "learning_rate": 4.6836461126005364e-05, + "loss": 0.467, + "step": 286 + }, + { + "epoch": 0.76, + "learning_rate": 4.68230563002681e-05, + "loss": 0.4732, + "step": 287 + }, + { + "epoch": 0.76, + "learning_rate": 4.6809651474530835e-05, + "loss": 0.1808, + "step": 288 + }, + { + "epoch": 0.76, + "learning_rate": 4.679624664879357e-05, + "loss": 0.6031, + "step": 289 + }, + { + "epoch": 0.77, + "learning_rate": 4.67828418230563e-05, + "loss": 0.2555, + "step": 290 + }, + { + "epoch": 0.77, + "learning_rate": 4.676943699731904e-05, + "loss": 0.4041, + "step": 291 + }, + { + "epoch": 0.77, + "learning_rate": 4.675603217158177e-05, + "loss": 0.7822, + "step": 292 + }, + { + "epoch": 0.78, + "learning_rate": 4.674262734584451e-05, + "loss": 0.138, + "step": 293 + }, + { + "epoch": 0.78, + "learning_rate": 4.672922252010724e-05, + "loss": 0.2746, + "step": 294 + }, + { + "epoch": 0.78, + "learning_rate": 4.6715817694369974e-05, + "loss": 0.5835, + "step": 295 + }, + { + "epoch": 0.78, + "learning_rate": 4.670241286863271e-05, + "loss": 0.2367, + "step": 296 + }, + { + "epoch": 0.79, + "learning_rate": 4.6689008042895444e-05, + "loss": 0.3247, + "step": 297 + }, + { + "epoch": 0.79, + "learning_rate": 4.667560321715818e-05, + "loss": 0.306, + "step": 298 + }, + { + "epoch": 0.79, + "learning_rate": 4.6662198391420914e-05, + "loss": 0.2825, + "step": 299 + }, + { + "epoch": 0.79, + "learning_rate": 4.664879356568364e-05, + "loss": 0.6102, + "step": 300 + }, + { + "epoch": 0.8, + "learning_rate": 4.6635388739946385e-05, + "loss": 0.3613, + "step": 301 + }, + { + "epoch": 0.8, + "learning_rate": 4.662198391420911e-05, + "loss": 0.6327, + "step": 302 + }, + { + "epoch": 0.8, + "learning_rate": 4.6608579088471855e-05, + "loss": 0.4059, + "step": 303 + }, + { + "epoch": 0.8, + "learning_rate": 4.659517426273458e-05, + "loss": 0.4027, + "step": 304 + }, + { + "epoch": 0.81, + "learning_rate": 4.658176943699732e-05, + "loss": 0.9133, + "step": 305 + }, + { + "epoch": 0.81, + "learning_rate": 4.6568364611260054e-05, + "loss": 0.1869, + "step": 306 + }, + { + "epoch": 0.81, + "learning_rate": 4.655495978552279e-05, + "loss": 0.3987, + "step": 307 + }, + { + "epoch": 0.81, + "learning_rate": 4.6541554959785524e-05, + "loss": 0.6114, + "step": 308 + }, + { + "epoch": 0.82, + "learning_rate": 4.652815013404826e-05, + "loss": 0.4406, + "step": 309 + }, + { + "epoch": 0.82, + "learning_rate": 4.6514745308310994e-05, + "loss": 0.2954, + "step": 310 + }, + { + "epoch": 0.82, + "learning_rate": 4.650134048257373e-05, + "loss": 0.5067, + "step": 311 + }, + { + "epoch": 0.83, + "learning_rate": 4.6487935656836464e-05, + "loss": 0.3985, + "step": 312 + }, + { + "epoch": 0.83, + "learning_rate": 4.64745308310992e-05, + "loss": 0.3756, + "step": 313 + }, + { + "epoch": 0.83, + "learning_rate": 4.6461126005361935e-05, + "loss": 0.2618, + "step": 314 + }, + { + "epoch": 0.83, + "learning_rate": 4.644772117962466e-05, + "loss": 0.4992, + "step": 315 + }, + { + "epoch": 0.84, + "learning_rate": 4.6434316353887405e-05, + "loss": 0.8224, + "step": 316 + }, + { + "epoch": 0.84, + "learning_rate": 4.642091152815013e-05, + "loss": 0.3425, + "step": 317 + }, + { + "epoch": 0.84, + "learning_rate": 4.6407506702412875e-05, + "loss": 0.4062, + "step": 318 + }, + { + "epoch": 0.84, + "learning_rate": 4.6394101876675603e-05, + "loss": 0.4748, + "step": 319 + }, + { + "epoch": 0.85, + "learning_rate": 4.638069705093834e-05, + "loss": 0.6857, + "step": 320 + }, + { + "epoch": 0.85, + "learning_rate": 4.6367292225201074e-05, + "loss": 0.5368, + "step": 321 + }, + { + "epoch": 0.85, + "learning_rate": 4.635388739946381e-05, + "loss": 0.5571, + "step": 322 + }, + { + "epoch": 0.85, + "learning_rate": 4.6340482573726544e-05, + "loss": 0.3045, + "step": 323 + }, + { + "epoch": 0.86, + "learning_rate": 4.632707774798928e-05, + "loss": 0.3189, + "step": 324 + }, + { + "epoch": 0.86, + "learning_rate": 4.631367292225201e-05, + "loss": 0.3033, + "step": 325 + }, + { + "epoch": 0.86, + "learning_rate": 4.630026809651475e-05, + "loss": 0.4659, + "step": 326 + }, + { + "epoch": 0.87, + "learning_rate": 4.628686327077748e-05, + "loss": 0.4058, + "step": 327 + }, + { + "epoch": 0.87, + "learning_rate": 4.627345844504022e-05, + "loss": 0.6745, + "step": 328 + }, + { + "epoch": 0.87, + "learning_rate": 4.626005361930295e-05, + "loss": 0.3259, + "step": 329 + }, + { + "epoch": 0.87, + "learning_rate": 4.624664879356568e-05, + "loss": 0.5126, + "step": 330 + }, + { + "epoch": 0.88, + "learning_rate": 4.623324396782842e-05, + "loss": 0.2759, + "step": 331 + }, + { + "epoch": 0.88, + "learning_rate": 4.621983914209115e-05, + "loss": 0.2512, + "step": 332 + }, + { + "epoch": 0.88, + "learning_rate": 4.620643431635389e-05, + "loss": 0.3046, + "step": 333 + }, + { + "epoch": 0.88, + "learning_rate": 4.6193029490616624e-05, + "loss": 0.3931, + "step": 334 + }, + { + "epoch": 0.89, + "learning_rate": 4.617962466487936e-05, + "loss": 0.4838, + "step": 335 + }, + { + "epoch": 0.89, + "learning_rate": 4.6166219839142094e-05, + "loss": 0.2925, + "step": 336 + }, + { + "epoch": 0.89, + "learning_rate": 4.615281501340483e-05, + "loss": 0.4481, + "step": 337 + }, + { + "epoch": 0.89, + "learning_rate": 4.6139410187667564e-05, + "loss": 0.4528, + "step": 338 + }, + { + "epoch": 0.9, + "learning_rate": 4.61260053619303e-05, + "loss": 0.2934, + "step": 339 + }, + { + "epoch": 0.9, + "learning_rate": 4.611260053619303e-05, + "loss": 0.609, + "step": 340 + }, + { + "epoch": 0.9, + "learning_rate": 4.609919571045577e-05, + "loss": 0.8988, + "step": 341 + }, + { + "epoch": 0.9, + "learning_rate": 4.60857908847185e-05, + "loss": 1.1222, + "step": 342 + }, + { + "epoch": 0.91, + "learning_rate": 4.607238605898124e-05, + "loss": 0.3265, + "step": 343 + }, + { + "epoch": 0.91, + "learning_rate": 4.605898123324397e-05, + "loss": 0.4722, + "step": 344 + }, + { + "epoch": 0.91, + "learning_rate": 4.60455764075067e-05, + "loss": 0.2791, + "step": 345 + }, + { + "epoch": 0.92, + "learning_rate": 4.603217158176944e-05, + "loss": 0.4183, + "step": 346 + }, + { + "epoch": 0.92, + "learning_rate": 4.6018766756032174e-05, + "loss": 0.5323, + "step": 347 + }, + { + "epoch": 0.92, + "learning_rate": 4.600536193029491e-05, + "loss": 0.6108, + "step": 348 + }, + { + "epoch": 0.92, + "learning_rate": 4.5991957104557644e-05, + "loss": 0.2875, + "step": 349 + }, + { + "epoch": 0.93, + "learning_rate": 4.597855227882037e-05, + "loss": 0.6642, + "step": 350 + }, + { + "epoch": 0.93, + "learning_rate": 4.5965147453083114e-05, + "loss": 0.5244, + "step": 351 + }, + { + "epoch": 0.93, + "learning_rate": 4.595174262734584e-05, + "loss": 0.5562, + "step": 352 + }, + { + "epoch": 0.93, + "learning_rate": 4.5938337801608584e-05, + "loss": 0.3972, + "step": 353 + }, + { + "epoch": 0.94, + "learning_rate": 4.592493297587131e-05, + "loss": 0.4532, + "step": 354 + }, + { + "epoch": 0.94, + "learning_rate": 4.591152815013405e-05, + "loss": 0.3368, + "step": 355 + }, + { + "epoch": 0.94, + "learning_rate": 4.589812332439678e-05, + "loss": 0.171, + "step": 356 + }, + { + "epoch": 0.94, + "learning_rate": 4.588471849865952e-05, + "loss": 0.4036, + "step": 357 + }, + { + "epoch": 0.95, + "learning_rate": 4.587131367292225e-05, + "loss": 0.4305, + "step": 358 + }, + { + "epoch": 0.95, + "learning_rate": 4.585790884718499e-05, + "loss": 0.2643, + "step": 359 + }, + { + "epoch": 0.95, + "learning_rate": 4.5844504021447723e-05, + "loss": 0.3782, + "step": 360 + }, + { + "epoch": 0.96, + "learning_rate": 4.583109919571046e-05, + "loss": 0.1673, + "step": 361 + }, + { + "epoch": 0.96, + "learning_rate": 4.5817694369973194e-05, + "loss": 0.3969, + "step": 362 + }, + { + "epoch": 0.96, + "learning_rate": 4.580428954423593e-05, + "loss": 0.3249, + "step": 363 + }, + { + "epoch": 0.96, + "learning_rate": 4.5790884718498664e-05, + "loss": 0.1656, + "step": 364 + }, + { + "epoch": 0.97, + "learning_rate": 4.57774798927614e-05, + "loss": 0.4551, + "step": 365 + }, + { + "epoch": 0.97, + "learning_rate": 4.5764075067024134e-05, + "loss": 0.6075, + "step": 366 + }, + { + "epoch": 0.97, + "learning_rate": 4.575067024128686e-05, + "loss": 0.4699, + "step": 367 + }, + { + "epoch": 0.97, + "learning_rate": 4.5737265415549605e-05, + "loss": 0.5752, + "step": 368 + }, + { + "epoch": 0.98, + "learning_rate": 4.572386058981233e-05, + "loss": 0.3114, + "step": 369 + }, + { + "epoch": 0.98, + "learning_rate": 4.5710455764075075e-05, + "loss": 0.7407, + "step": 370 + }, + { + "epoch": 0.98, + "learning_rate": 4.56970509383378e-05, + "loss": 0.6427, + "step": 371 + }, + { + "epoch": 0.98, + "learning_rate": 4.568364611260054e-05, + "loss": 0.5021, + "step": 372 + }, + { + "epoch": 0.99, + "learning_rate": 4.5670241286863273e-05, + "loss": 0.4209, + "step": 373 + }, + { + "epoch": 0.99, + "learning_rate": 4.565683646112601e-05, + "loss": 0.5957, + "step": 374 + }, + { + "epoch": 0.99, + "learning_rate": 4.5643431635388744e-05, + "loss": 0.495, + "step": 375 + }, + { + "epoch": 0.99, + "learning_rate": 4.563002680965148e-05, + "loss": 0.3101, + "step": 376 + }, + { + "epoch": 1.0, + "learning_rate": 4.561662198391421e-05, + "loss": 0.472, + "step": 377 + }, + { + "epoch": 1.0, + "learning_rate": 4.560321715817695e-05, + "loss": 0.4607, + "step": 378 + }, + { + "epoch": 1.0, + "eval_f1": 0.7914963205233032, + "eval_loss": 0.40209120512008667, + "eval_runtime": 2.1585, + "eval_samples_per_second": 700.949, + "eval_steps_per_second": 44.012, + "step": 378 + }, + { + "epoch": 1.0, + "learning_rate": 4.558981233243968e-05, + "loss": 0.2422, + "step": 379 + }, + { + "epoch": 1.01, + "learning_rate": 4.557640750670242e-05, + "loss": 0.2074, + "step": 380 + }, + { + "epoch": 1.01, + "learning_rate": 4.556300268096515e-05, + "loss": 0.4469, + "step": 381 + }, + { + "epoch": 1.01, + "learning_rate": 4.554959785522788e-05, + "loss": 0.3121, + "step": 382 + }, + { + "epoch": 1.01, + "learning_rate": 4.553619302949062e-05, + "loss": 0.3634, + "step": 383 + }, + { + "epoch": 1.02, + "learning_rate": 4.552278820375335e-05, + "loss": 0.2139, + "step": 384 + }, + { + "epoch": 1.02, + "learning_rate": 4.550938337801609e-05, + "loss": 0.5273, + "step": 385 + }, + { + "epoch": 1.02, + "learning_rate": 4.549597855227882e-05, + "loss": 0.1228, + "step": 386 + }, + { + "epoch": 1.02, + "learning_rate": 4.548257372654156e-05, + "loss": 0.2459, + "step": 387 + }, + { + "epoch": 1.03, + "learning_rate": 4.5469168900804294e-05, + "loss": 0.443, + "step": 388 + }, + { + "epoch": 1.03, + "learning_rate": 4.545576407506703e-05, + "loss": 0.364, + "step": 389 + }, + { + "epoch": 1.03, + "learning_rate": 4.5442359249329764e-05, + "loss": 0.4071, + "step": 390 + }, + { + "epoch": 1.03, + "learning_rate": 4.54289544235925e-05, + "loss": 0.3876, + "step": 391 + }, + { + "epoch": 1.04, + "learning_rate": 4.541554959785523e-05, + "loss": 0.5239, + "step": 392 + }, + { + "epoch": 1.04, + "learning_rate": 4.540214477211797e-05, + "loss": 0.4548, + "step": 393 + }, + { + "epoch": 1.04, + "learning_rate": 4.53887399463807e-05, + "loss": 0.3588, + "step": 394 + }, + { + "epoch": 1.04, + "learning_rate": 4.537533512064344e-05, + "loss": 0.2693, + "step": 395 + }, + { + "epoch": 1.05, + "learning_rate": 4.536193029490617e-05, + "loss": 0.3773, + "step": 396 + }, + { + "epoch": 1.05, + "learning_rate": 4.53485254691689e-05, + "loss": 0.3982, + "step": 397 + }, + { + "epoch": 1.05, + "learning_rate": 4.533512064343164e-05, + "loss": 0.2074, + "step": 398 + }, + { + "epoch": 1.06, + "learning_rate": 4.532171581769437e-05, + "loss": 0.3322, + "step": 399 + }, + { + "epoch": 1.06, + "learning_rate": 4.530831099195711e-05, + "loss": 0.118, + "step": 400 + }, + { + "epoch": 1.06, + "learning_rate": 4.5294906166219844e-05, + "loss": 0.6216, + "step": 401 + }, + { + "epoch": 1.06, + "learning_rate": 4.528150134048257e-05, + "loss": 0.4028, + "step": 402 + }, + { + "epoch": 1.07, + "learning_rate": 4.5268096514745314e-05, + "loss": 0.3179, + "step": 403 + }, + { + "epoch": 1.07, + "learning_rate": 4.525469168900804e-05, + "loss": 0.2815, + "step": 404 + }, + { + "epoch": 1.07, + "learning_rate": 4.5241286863270784e-05, + "loss": 0.2366, + "step": 405 + }, + { + "epoch": 1.07, + "learning_rate": 4.522788203753351e-05, + "loss": 0.14, + "step": 406 + }, + { + "epoch": 1.08, + "learning_rate": 4.521447721179625e-05, + "loss": 0.1255, + "step": 407 + }, + { + "epoch": 1.08, + "learning_rate": 4.520107238605898e-05, + "loss": 0.1482, + "step": 408 + }, + { + "epoch": 1.08, + "learning_rate": 4.518766756032172e-05, + "loss": 0.665, + "step": 409 + }, + { + "epoch": 1.08, + "learning_rate": 4.517426273458445e-05, + "loss": 0.2535, + "step": 410 + }, + { + "epoch": 1.09, + "learning_rate": 4.516085790884719e-05, + "loss": 0.1716, + "step": 411 + }, + { + "epoch": 1.09, + "learning_rate": 4.5147453083109916e-05, + "loss": 0.0765, + "step": 412 + }, + { + "epoch": 1.09, + "learning_rate": 4.513404825737266e-05, + "loss": 0.1238, + "step": 413 + }, + { + "epoch": 1.1, + "learning_rate": 4.512064343163539e-05, + "loss": 0.3877, + "step": 414 + }, + { + "epoch": 1.1, + "learning_rate": 4.510723860589813e-05, + "loss": 0.206, + "step": 415 + }, + { + "epoch": 1.1, + "learning_rate": 4.509383378016086e-05, + "loss": 0.1822, + "step": 416 + }, + { + "epoch": 1.1, + "learning_rate": 4.508042895442359e-05, + "loss": 0.2294, + "step": 417 + }, + { + "epoch": 1.11, + "learning_rate": 4.506702412868633e-05, + "loss": 0.7083, + "step": 418 + }, + { + "epoch": 1.11, + "learning_rate": 4.505361930294906e-05, + "loss": 0.325, + "step": 419 + }, + { + "epoch": 1.11, + "learning_rate": 4.50402144772118e-05, + "loss": 0.2378, + "step": 420 + }, + { + "epoch": 1.11, + "learning_rate": 4.502680965147453e-05, + "loss": 0.2494, + "step": 421 + }, + { + "epoch": 1.12, + "learning_rate": 4.501340482573727e-05, + "loss": 0.3812, + "step": 422 + }, + { + "epoch": 1.12, + "learning_rate": 4.5e-05, + "loss": 0.1491, + "step": 423 + }, + { + "epoch": 1.12, + "learning_rate": 4.498659517426274e-05, + "loss": 0.0833, + "step": 424 + }, + { + "epoch": 1.12, + "learning_rate": 4.497319034852547e-05, + "loss": 0.4637, + "step": 425 + }, + { + "epoch": 1.13, + "learning_rate": 4.495978552278821e-05, + "loss": 0.2594, + "step": 426 + }, + { + "epoch": 1.13, + "learning_rate": 4.4946380697050937e-05, + "loss": 0.4136, + "step": 427 + }, + { + "epoch": 1.13, + "learning_rate": 4.493297587131368e-05, + "loss": 0.4962, + "step": 428 + }, + { + "epoch": 1.13, + "learning_rate": 4.491957104557641e-05, + "loss": 0.0842, + "step": 429 + }, + { + "epoch": 1.14, + "learning_rate": 4.490616621983915e-05, + "loss": 0.4761, + "step": 430 + }, + { + "epoch": 1.14, + "learning_rate": 4.489276139410188e-05, + "loss": 0.4351, + "step": 431 + }, + { + "epoch": 1.14, + "learning_rate": 4.487935656836461e-05, + "loss": 0.4226, + "step": 432 + }, + { + "epoch": 1.15, + "learning_rate": 4.486595174262735e-05, + "loss": 0.5714, + "step": 433 + }, + { + "epoch": 1.15, + "learning_rate": 4.485254691689008e-05, + "loss": 0.2034, + "step": 434 + }, + { + "epoch": 1.15, + "learning_rate": 4.483914209115282e-05, + "loss": 0.3354, + "step": 435 + }, + { + "epoch": 1.15, + "learning_rate": 4.482573726541555e-05, + "loss": 0.7503, + "step": 436 + }, + { + "epoch": 1.16, + "learning_rate": 4.481233243967828e-05, + "loss": 0.1542, + "step": 437 + }, + { + "epoch": 1.16, + "learning_rate": 4.479892761394102e-05, + "loss": 0.3067, + "step": 438 + }, + { + "epoch": 1.16, + "learning_rate": 4.478552278820375e-05, + "loss": 0.4134, + "step": 439 + }, + { + "epoch": 1.16, + "learning_rate": 4.477211796246649e-05, + "loss": 0.1458, + "step": 440 + }, + { + "epoch": 1.17, + "learning_rate": 4.475871313672922e-05, + "loss": 0.2814, + "step": 441 + }, + { + "epoch": 1.17, + "learning_rate": 4.474530831099196e-05, + "loss": 0.1751, + "step": 442 + }, + { + "epoch": 1.17, + "learning_rate": 4.473190348525469e-05, + "loss": 0.4144, + "step": 443 + }, + { + "epoch": 1.17, + "learning_rate": 4.471849865951743e-05, + "loss": 0.3275, + "step": 444 + }, + { + "epoch": 1.18, + "learning_rate": 4.470509383378016e-05, + "loss": 0.3083, + "step": 445 + }, + { + "epoch": 1.18, + "learning_rate": 4.46916890080429e-05, + "loss": 0.4185, + "step": 446 + }, + { + "epoch": 1.18, + "learning_rate": 4.467828418230563e-05, + "loss": 0.3181, + "step": 447 + }, + { + "epoch": 1.19, + "learning_rate": 4.466487935656837e-05, + "loss": 0.438, + "step": 448 + }, + { + "epoch": 1.19, + "learning_rate": 4.46514745308311e-05, + "loss": 0.4509, + "step": 449 + }, + { + "epoch": 1.19, + "learning_rate": 4.463806970509384e-05, + "loss": 0.4597, + "step": 450 + }, + { + "epoch": 1.19, + "learning_rate": 4.462466487935657e-05, + "loss": 0.3123, + "step": 451 + }, + { + "epoch": 1.2, + "learning_rate": 4.46112600536193e-05, + "loss": 0.376, + "step": 452 + }, + { + "epoch": 1.2, + "learning_rate": 4.459785522788204e-05, + "loss": 0.2392, + "step": 453 + }, + { + "epoch": 1.2, + "learning_rate": 4.458445040214477e-05, + "loss": 0.4451, + "step": 454 + }, + { + "epoch": 1.2, + "learning_rate": 4.4571045576407513e-05, + "loss": 0.2436, + "step": 455 + }, + { + "epoch": 1.21, + "learning_rate": 4.455764075067024e-05, + "loss": 0.2944, + "step": 456 + }, + { + "epoch": 1.21, + "learning_rate": 4.454423592493298e-05, + "loss": 0.323, + "step": 457 + }, + { + "epoch": 1.21, + "learning_rate": 4.453083109919571e-05, + "loss": 0.1966, + "step": 458 + }, + { + "epoch": 1.21, + "learning_rate": 4.451742627345845e-05, + "loss": 0.273, + "step": 459 + }, + { + "epoch": 1.22, + "learning_rate": 4.450402144772118e-05, + "loss": 0.3498, + "step": 460 + }, + { + "epoch": 1.22, + "learning_rate": 4.449061662198392e-05, + "loss": 0.458, + "step": 461 + }, + { + "epoch": 1.22, + "learning_rate": 4.4477211796246646e-05, + "loss": 0.1607, + "step": 462 + }, + { + "epoch": 1.22, + "learning_rate": 4.446380697050939e-05, + "loss": 0.5712, + "step": 463 + }, + { + "epoch": 1.23, + "learning_rate": 4.4450402144772116e-05, + "loss": 0.0908, + "step": 464 + }, + { + "epoch": 1.23, + "learning_rate": 4.443699731903486e-05, + "loss": 0.2625, + "step": 465 + }, + { + "epoch": 1.23, + "learning_rate": 4.4423592493297586e-05, + "loss": 0.368, + "step": 466 + }, + { + "epoch": 1.24, + "learning_rate": 4.441018766756032e-05, + "loss": 0.5096, + "step": 467 + }, + { + "epoch": 1.24, + "learning_rate": 4.4396782841823057e-05, + "loss": 0.209, + "step": 468 + }, + { + "epoch": 1.24, + "learning_rate": 4.438337801608579e-05, + "loss": 0.5235, + "step": 469 + }, + { + "epoch": 1.24, + "learning_rate": 4.436997319034853e-05, + "loss": 0.5116, + "step": 470 + }, + { + "epoch": 1.25, + "learning_rate": 4.435656836461126e-05, + "loss": 0.49, + "step": 471 + }, + { + "epoch": 1.25, + "learning_rate": 4.4343163538874e-05, + "loss": 0.4409, + "step": 472 + }, + { + "epoch": 1.25, + "learning_rate": 4.432975871313673e-05, + "loss": 0.1324, + "step": 473 + }, + { + "epoch": 1.25, + "learning_rate": 4.431635388739947e-05, + "loss": 0.1257, + "step": 474 + }, + { + "epoch": 1.26, + "learning_rate": 4.43029490616622e-05, + "loss": 0.3844, + "step": 475 + }, + { + "epoch": 1.26, + "learning_rate": 4.428954423592494e-05, + "loss": 0.2942, + "step": 476 + }, + { + "epoch": 1.26, + "learning_rate": 4.4276139410187666e-05, + "loss": 0.316, + "step": 477 + }, + { + "epoch": 1.26, + "learning_rate": 4.426273458445041e-05, + "loss": 0.3724, + "step": 478 + }, + { + "epoch": 1.27, + "learning_rate": 4.4249329758713136e-05, + "loss": 0.4498, + "step": 479 + }, + { + "epoch": 1.27, + "learning_rate": 4.423592493297588e-05, + "loss": 0.2677, + "step": 480 + }, + { + "epoch": 1.27, + "learning_rate": 4.4222520107238607e-05, + "loss": 0.3331, + "step": 481 + }, + { + "epoch": 1.28, + "learning_rate": 4.420911528150134e-05, + "loss": 0.4375, + "step": 482 + }, + { + "epoch": 1.28, + "learning_rate": 4.419571045576408e-05, + "loss": 0.3097, + "step": 483 + }, + { + "epoch": 1.28, + "learning_rate": 4.418230563002681e-05, + "loss": 0.4438, + "step": 484 + }, + { + "epoch": 1.28, + "learning_rate": 4.416890080428955e-05, + "loss": 0.3648, + "step": 485 + }, + { + "epoch": 1.29, + "learning_rate": 4.415549597855228e-05, + "loss": 0.3023, + "step": 486 + }, + { + "epoch": 1.29, + "learning_rate": 4.414209115281502e-05, + "loss": 0.2963, + "step": 487 + }, + { + "epoch": 1.29, + "learning_rate": 4.412868632707775e-05, + "loss": 0.1749, + "step": 488 + }, + { + "epoch": 1.29, + "learning_rate": 4.411528150134048e-05, + "loss": 0.2006, + "step": 489 + }, + { + "epoch": 1.3, + "learning_rate": 4.410187667560322e-05, + "loss": 0.0923, + "step": 490 + }, + { + "epoch": 1.3, + "learning_rate": 4.408847184986595e-05, + "loss": 0.2665, + "step": 491 + }, + { + "epoch": 1.3, + "learning_rate": 4.407506702412869e-05, + "loss": 0.3378, + "step": 492 + }, + { + "epoch": 1.3, + "learning_rate": 4.406166219839142e-05, + "loss": 0.2543, + "step": 493 + }, + { + "epoch": 1.31, + "learning_rate": 4.4048257372654156e-05, + "loss": 0.2521, + "step": 494 + }, + { + "epoch": 1.31, + "learning_rate": 4.403485254691689e-05, + "loss": 0.1616, + "step": 495 + }, + { + "epoch": 1.31, + "learning_rate": 4.402144772117963e-05, + "loss": 0.1445, + "step": 496 + }, + { + "epoch": 1.31, + "learning_rate": 4.400804289544236e-05, + "loss": 0.5352, + "step": 497 + }, + { + "epoch": 1.32, + "learning_rate": 4.39946380697051e-05, + "loss": 0.0654, + "step": 498 + }, + { + "epoch": 1.32, + "learning_rate": 4.398123324396783e-05, + "loss": 0.4354, + "step": 499 + }, + { + "epoch": 1.32, + "learning_rate": 4.396782841823057e-05, + "loss": 0.3893, + "step": 500 + }, + { + "epoch": 1.33, + "learning_rate": 4.39544235924933e-05, + "loss": 0.2779, + "step": 501 + }, + { + "epoch": 1.33, + "learning_rate": 4.394101876675604e-05, + "loss": 0.3702, + "step": 502 + }, + { + "epoch": 1.33, + "learning_rate": 4.392761394101877e-05, + "loss": 0.3899, + "step": 503 + }, + { + "epoch": 1.33, + "learning_rate": 4.39142091152815e-05, + "loss": 0.3091, + "step": 504 + }, + { + "epoch": 1.34, + "learning_rate": 4.390080428954424e-05, + "loss": 0.2143, + "step": 505 + }, + { + "epoch": 1.34, + "learning_rate": 4.388739946380697e-05, + "loss": 0.2551, + "step": 506 + }, + { + "epoch": 1.34, + "learning_rate": 4.387399463806971e-05, + "loss": 0.2227, + "step": 507 + }, + { + "epoch": 1.34, + "learning_rate": 4.386058981233244e-05, + "loss": 0.1383, + "step": 508 + }, + { + "epoch": 1.35, + "learning_rate": 4.384718498659518e-05, + "loss": 0.3463, + "step": 509 + }, + { + "epoch": 1.35, + "learning_rate": 4.383378016085791e-05, + "loss": 0.1104, + "step": 510 + }, + { + "epoch": 1.35, + "learning_rate": 4.382037533512065e-05, + "loss": 0.2439, + "step": 511 + }, + { + "epoch": 1.35, + "learning_rate": 4.380697050938338e-05, + "loss": 0.0725, + "step": 512 + }, + { + "epoch": 1.36, + "learning_rate": 4.379356568364612e-05, + "loss": 0.4928, + "step": 513 + }, + { + "epoch": 1.36, + "learning_rate": 4.3780160857908846e-05, + "loss": 0.0903, + "step": 514 + }, + { + "epoch": 1.36, + "learning_rate": 4.376675603217159e-05, + "loss": 0.1808, + "step": 515 + }, + { + "epoch": 1.37, + "learning_rate": 4.3753351206434316e-05, + "loss": 0.617, + "step": 516 + }, + { + "epoch": 1.37, + "learning_rate": 4.373994638069706e-05, + "loss": 0.0333, + "step": 517 + }, + { + "epoch": 1.37, + "learning_rate": 4.3726541554959786e-05, + "loss": 0.1113, + "step": 518 + }, + { + "epoch": 1.37, + "learning_rate": 4.371313672922252e-05, + "loss": 0.4604, + "step": 519 + }, + { + "epoch": 1.38, + "learning_rate": 4.3699731903485256e-05, + "loss": 0.4267, + "step": 520 + }, + { + "epoch": 1.38, + "learning_rate": 4.368632707774799e-05, + "loss": 0.2621, + "step": 521 + }, + { + "epoch": 1.38, + "learning_rate": 4.3672922252010727e-05, + "loss": 0.3236, + "step": 522 + }, + { + "epoch": 1.38, + "learning_rate": 4.365951742627346e-05, + "loss": 0.0559, + "step": 523 + }, + { + "epoch": 1.39, + "learning_rate": 4.364611260053619e-05, + "loss": 0.0801, + "step": 524 + }, + { + "epoch": 1.39, + "learning_rate": 4.363270777479893e-05, + "loss": 0.0518, + "step": 525 + }, + { + "epoch": 1.39, + "learning_rate": 4.361930294906166e-05, + "loss": 0.0618, + "step": 526 + }, + { + "epoch": 1.39, + "learning_rate": 4.36058981233244e-05, + "loss": 0.3043, + "step": 527 + }, + { + "epoch": 1.4, + "learning_rate": 4.359249329758713e-05, + "loss": 0.4693, + "step": 528 + }, + { + "epoch": 1.4, + "learning_rate": 4.3579088471849866e-05, + "loss": 0.2978, + "step": 529 + }, + { + "epoch": 1.4, + "learning_rate": 4.35656836461126e-05, + "loss": 0.0817, + "step": 530 + }, + { + "epoch": 1.4, + "learning_rate": 4.3552278820375336e-05, + "loss": 0.0784, + "step": 531 + }, + { + "epoch": 1.41, + "learning_rate": 4.353887399463807e-05, + "loss": 0.5389, + "step": 532 + }, + { + "epoch": 1.41, + "learning_rate": 4.3525469168900806e-05, + "loss": 0.2337, + "step": 533 + }, + { + "epoch": 1.41, + "learning_rate": 4.351206434316354e-05, + "loss": 0.3189, + "step": 534 + }, + { + "epoch": 1.42, + "learning_rate": 4.3498659517426276e-05, + "loss": 0.2503, + "step": 535 + }, + { + "epoch": 1.42, + "learning_rate": 4.348525469168901e-05, + "loss": 0.333, + "step": 536 + }, + { + "epoch": 1.42, + "learning_rate": 4.347184986595175e-05, + "loss": 0.2311, + "step": 537 + }, + { + "epoch": 1.42, + "learning_rate": 4.345844504021448e-05, + "loss": 0.5196, + "step": 538 + }, + { + "epoch": 1.43, + "learning_rate": 4.344504021447721e-05, + "loss": 0.5466, + "step": 539 + }, + { + "epoch": 1.43, + "learning_rate": 4.343163538873995e-05, + "loss": 0.4296, + "step": 540 + }, + { + "epoch": 1.43, + "learning_rate": 4.341823056300268e-05, + "loss": 0.6559, + "step": 541 + }, + { + "epoch": 1.43, + "learning_rate": 4.340482573726542e-05, + "loss": 0.6134, + "step": 542 + }, + { + "epoch": 1.44, + "learning_rate": 4.339142091152815e-05, + "loss": 0.5614, + "step": 543 + }, + { + "epoch": 1.44, + "learning_rate": 4.3378016085790886e-05, + "loss": 0.3143, + "step": 544 + }, + { + "epoch": 1.44, + "learning_rate": 4.336461126005362e-05, + "loss": 0.564, + "step": 545 + }, + { + "epoch": 1.44, + "learning_rate": 4.3351206434316356e-05, + "loss": 0.2141, + "step": 546 + }, + { + "epoch": 1.45, + "learning_rate": 4.333780160857909e-05, + "loss": 0.5886, + "step": 547 + }, + { + "epoch": 1.45, + "learning_rate": 4.3324396782841826e-05, + "loss": 0.2129, + "step": 548 + }, + { + "epoch": 1.45, + "learning_rate": 4.3310991957104555e-05, + "loss": 0.2491, + "step": 549 + }, + { + "epoch": 1.46, + "learning_rate": 4.32975871313673e-05, + "loss": 0.4023, + "step": 550 + }, + { + "epoch": 1.46, + "learning_rate": 4.3284182305630025e-05, + "loss": 0.2159, + "step": 551 + }, + { + "epoch": 1.46, + "learning_rate": 4.327077747989277e-05, + "loss": 0.2975, + "step": 552 + }, + { + "epoch": 1.46, + "learning_rate": 4.3257372654155495e-05, + "loss": 0.165, + "step": 553 + }, + { + "epoch": 1.47, + "learning_rate": 4.324396782841823e-05, + "loss": 0.063, + "step": 554 + }, + { + "epoch": 1.47, + "learning_rate": 4.3230563002680966e-05, + "loss": 0.4209, + "step": 555 + }, + { + "epoch": 1.47, + "learning_rate": 4.32171581769437e-05, + "loss": 0.2704, + "step": 556 + }, + { + "epoch": 1.47, + "learning_rate": 4.3203753351206436e-05, + "loss": 0.6455, + "step": 557 + }, + { + "epoch": 1.48, + "learning_rate": 4.319034852546917e-05, + "loss": 0.3354, + "step": 558 + }, + { + "epoch": 1.48, + "learning_rate": 4.3176943699731906e-05, + "loss": 0.4069, + "step": 559 + }, + { + "epoch": 1.48, + "learning_rate": 4.316353887399464e-05, + "loss": 0.2334, + "step": 560 + }, + { + "epoch": 1.48, + "learning_rate": 4.3150134048257376e-05, + "loss": 0.4192, + "step": 561 + }, + { + "epoch": 1.49, + "learning_rate": 4.313672922252011e-05, + "loss": 0.2942, + "step": 562 + }, + { + "epoch": 1.49, + "learning_rate": 4.3123324396782847e-05, + "loss": 0.2503, + "step": 563 + }, + { + "epoch": 1.49, + "learning_rate": 4.3109919571045575e-05, + "loss": 0.15, + "step": 564 + }, + { + "epoch": 1.49, + "learning_rate": 4.309651474530832e-05, + "loss": 0.6895, + "step": 565 + }, + { + "epoch": 1.5, + "learning_rate": 4.3083109919571045e-05, + "loss": 0.2462, + "step": 566 + }, + { + "epoch": 1.5, + "learning_rate": 4.306970509383379e-05, + "loss": 0.142, + "step": 567 + }, + { + "epoch": 1.5, + "learning_rate": 4.3056300268096515e-05, + "loss": 0.4838, + "step": 568 + }, + { + "epoch": 1.51, + "learning_rate": 4.304289544235925e-05, + "loss": 0.4714, + "step": 569 + }, + { + "epoch": 1.51, + "learning_rate": 4.3029490616621986e-05, + "loss": 0.4916, + "step": 570 + }, + { + "epoch": 1.51, + "learning_rate": 4.301608579088472e-05, + "loss": 0.2001, + "step": 571 + }, + { + "epoch": 1.51, + "learning_rate": 4.3002680965147456e-05, + "loss": 0.3897, + "step": 572 + }, + { + "epoch": 1.52, + "learning_rate": 4.298927613941019e-05, + "loss": 0.1723, + "step": 573 + }, + { + "epoch": 1.52, + "learning_rate": 4.297587131367292e-05, + "loss": 0.4095, + "step": 574 + }, + { + "epoch": 1.52, + "learning_rate": 4.296246648793566e-05, + "loss": 0.2615, + "step": 575 + }, + { + "epoch": 1.52, + "learning_rate": 4.294906166219839e-05, + "loss": 0.221, + "step": 576 + }, + { + "epoch": 1.53, + "learning_rate": 4.293565683646113e-05, + "loss": 0.1373, + "step": 577 + }, + { + "epoch": 1.53, + "learning_rate": 4.292225201072386e-05, + "loss": 0.313, + "step": 578 + }, + { + "epoch": 1.53, + "learning_rate": 4.2908847184986595e-05, + "loss": 0.4608, + "step": 579 + }, + { + "epoch": 1.53, + "learning_rate": 4.289544235924933e-05, + "loss": 0.2894, + "step": 580 + }, + { + "epoch": 1.54, + "learning_rate": 4.2882037533512065e-05, + "loss": 0.5123, + "step": 581 + }, + { + "epoch": 1.54, + "learning_rate": 4.28686327077748e-05, + "loss": 0.3287, + "step": 582 + }, + { + "epoch": 1.54, + "learning_rate": 4.2855227882037536e-05, + "loss": 0.4634, + "step": 583 + }, + { + "epoch": 1.54, + "learning_rate": 4.284182305630027e-05, + "loss": 0.0753, + "step": 584 + }, + { + "epoch": 1.55, + "learning_rate": 4.2828418230563006e-05, + "loss": 0.5287, + "step": 585 + }, + { + "epoch": 1.55, + "learning_rate": 4.281501340482574e-05, + "loss": 0.5483, + "step": 586 + }, + { + "epoch": 1.55, + "learning_rate": 4.2801608579088476e-05, + "loss": 0.1076, + "step": 587 + }, + { + "epoch": 1.56, + "learning_rate": 4.278820375335121e-05, + "loss": 0.232, + "step": 588 + }, + { + "epoch": 1.56, + "learning_rate": 4.277479892761394e-05, + "loss": 0.4706, + "step": 589 + }, + { + "epoch": 1.56, + "learning_rate": 4.276139410187668e-05, + "loss": 0.5791, + "step": 590 + }, + { + "epoch": 1.56, + "learning_rate": 4.274798927613941e-05, + "loss": 0.4995, + "step": 591 + }, + { + "epoch": 1.57, + "learning_rate": 4.273458445040215e-05, + "loss": 0.3419, + "step": 592 + }, + { + "epoch": 1.57, + "learning_rate": 4.272117962466488e-05, + "loss": 0.2728, + "step": 593 + }, + { + "epoch": 1.57, + "learning_rate": 4.2707774798927615e-05, + "loss": 0.3598, + "step": 594 + }, + { + "epoch": 1.57, + "learning_rate": 4.269436997319035e-05, + "loss": 0.2669, + "step": 595 + }, + { + "epoch": 1.58, + "learning_rate": 4.2680965147453086e-05, + "loss": 0.4091, + "step": 596 + }, + { + "epoch": 1.58, + "learning_rate": 4.266756032171582e-05, + "loss": 0.7653, + "step": 597 + }, + { + "epoch": 1.58, + "learning_rate": 4.2654155495978556e-05, + "loss": 0.4323, + "step": 598 + }, + { + "epoch": 1.58, + "learning_rate": 4.2640750670241284e-05, + "loss": 0.2147, + "step": 599 + }, + { + "epoch": 1.59, + "learning_rate": 4.2627345844504026e-05, + "loss": 0.2229, + "step": 600 + }, + { + "epoch": 1.59, + "learning_rate": 4.2613941018766754e-05, + "loss": 0.1477, + "step": 601 + }, + { + "epoch": 1.59, + "learning_rate": 4.2600536193029496e-05, + "loss": 0.4813, + "step": 602 + }, + { + "epoch": 1.6, + "learning_rate": 4.2587131367292225e-05, + "loss": 0.2719, + "step": 603 + }, + { + "epoch": 1.6, + "learning_rate": 4.257372654155496e-05, + "loss": 0.1577, + "step": 604 + }, + { + "epoch": 1.6, + "learning_rate": 4.2560321715817695e-05, + "loss": 0.5718, + "step": 605 + }, + { + "epoch": 1.6, + "learning_rate": 4.254691689008043e-05, + "loss": 0.1481, + "step": 606 + }, + { + "epoch": 1.61, + "learning_rate": 4.2533512064343165e-05, + "loss": 0.3548, + "step": 607 + }, + { + "epoch": 1.61, + "learning_rate": 4.25201072386059e-05, + "loss": 0.1907, + "step": 608 + }, + { + "epoch": 1.61, + "learning_rate": 4.250670241286863e-05, + "loss": 0.4215, + "step": 609 + }, + { + "epoch": 1.61, + "learning_rate": 4.249329758713137e-05, + "loss": 0.0681, + "step": 610 + }, + { + "epoch": 1.62, + "learning_rate": 4.24798927613941e-05, + "loss": 0.514, + "step": 611 + }, + { + "epoch": 1.62, + "learning_rate": 4.246648793565684e-05, + "loss": 0.1242, + "step": 612 + }, + { + "epoch": 1.62, + "learning_rate": 4.245308310991957e-05, + "loss": 0.217, + "step": 613 + }, + { + "epoch": 1.62, + "learning_rate": 4.243967828418231e-05, + "loss": 0.1641, + "step": 614 + }, + { + "epoch": 1.63, + "learning_rate": 4.242627345844504e-05, + "loss": 0.7095, + "step": 615 + }, + { + "epoch": 1.63, + "learning_rate": 4.2412868632707775e-05, + "loss": 0.359, + "step": 616 + }, + { + "epoch": 1.63, + "learning_rate": 4.239946380697051e-05, + "loss": 0.94, + "step": 617 + }, + { + "epoch": 1.63, + "learning_rate": 4.2386058981233245e-05, + "loss": 0.5194, + "step": 618 + }, + { + "epoch": 1.64, + "learning_rate": 4.237265415549598e-05, + "loss": 0.3398, + "step": 619 + }, + { + "epoch": 1.64, + "learning_rate": 4.2359249329758715e-05, + "loss": 0.2305, + "step": 620 + }, + { + "epoch": 1.64, + "learning_rate": 4.234584450402145e-05, + "loss": 0.6045, + "step": 621 + }, + { + "epoch": 1.65, + "learning_rate": 4.2332439678284185e-05, + "loss": 0.3196, + "step": 622 + }, + { + "epoch": 1.65, + "learning_rate": 4.231903485254692e-05, + "loss": 0.0753, + "step": 623 + }, + { + "epoch": 1.65, + "learning_rate": 4.2305630026809656e-05, + "loss": 0.2732, + "step": 624 + }, + { + "epoch": 1.65, + "learning_rate": 4.229222520107239e-05, + "loss": 0.4372, + "step": 625 + }, + { + "epoch": 1.66, + "learning_rate": 4.227882037533512e-05, + "loss": 0.1299, + "step": 626 + }, + { + "epoch": 1.66, + "learning_rate": 4.226541554959786e-05, + "loss": 0.2928, + "step": 627 + }, + { + "epoch": 1.66, + "learning_rate": 4.225201072386059e-05, + "loss": 0.2028, + "step": 628 + }, + { + "epoch": 1.66, + "learning_rate": 4.223860589812333e-05, + "loss": 0.2725, + "step": 629 + }, + { + "epoch": 1.67, + "learning_rate": 4.222520107238606e-05, + "loss": 0.0851, + "step": 630 + }, + { + "epoch": 1.67, + "learning_rate": 4.2211796246648795e-05, + "loss": 0.1471, + "step": 631 + }, + { + "epoch": 1.67, + "learning_rate": 4.219839142091153e-05, + "loss": 0.1986, + "step": 632 + }, + { + "epoch": 1.67, + "learning_rate": 4.2184986595174265e-05, + "loss": 0.5156, + "step": 633 + }, + { + "epoch": 1.68, + "learning_rate": 4.2171581769437e-05, + "loss": 0.5286, + "step": 634 + }, + { + "epoch": 1.68, + "learning_rate": 4.2158176943699735e-05, + "loss": 0.3635, + "step": 635 + }, + { + "epoch": 1.68, + "learning_rate": 4.2144772117962464e-05, + "loss": 0.1407, + "step": 636 + }, + { + "epoch": 1.69, + "learning_rate": 4.2131367292225206e-05, + "loss": 0.1042, + "step": 637 + }, + { + "epoch": 1.69, + "learning_rate": 4.2117962466487934e-05, + "loss": 0.1553, + "step": 638 + }, + { + "epoch": 1.69, + "learning_rate": 4.2104557640750676e-05, + "loss": 0.1665, + "step": 639 + }, + { + "epoch": 1.69, + "learning_rate": 4.2091152815013404e-05, + "loss": 0.3706, + "step": 640 + }, + { + "epoch": 1.7, + "learning_rate": 4.207774798927614e-05, + "loss": 0.6195, + "step": 641 + }, + { + "epoch": 1.7, + "learning_rate": 4.2064343163538874e-05, + "loss": 0.1341, + "step": 642 + }, + { + "epoch": 1.7, + "learning_rate": 4.205093833780161e-05, + "loss": 0.5384, + "step": 643 + }, + { + "epoch": 1.7, + "learning_rate": 4.2037533512064345e-05, + "loss": 0.2802, + "step": 644 + }, + { + "epoch": 1.71, + "learning_rate": 4.202412868632708e-05, + "loss": 0.3812, + "step": 645 + }, + { + "epoch": 1.71, + "learning_rate": 4.2010723860589815e-05, + "loss": 0.2433, + "step": 646 + }, + { + "epoch": 1.71, + "learning_rate": 4.199731903485255e-05, + "loss": 0.0642, + "step": 647 + }, + { + "epoch": 1.71, + "learning_rate": 4.1983914209115285e-05, + "loss": 0.0547, + "step": 648 + }, + { + "epoch": 1.72, + "learning_rate": 4.197050938337802e-05, + "loss": 0.4388, + "step": 649 + }, + { + "epoch": 1.72, + "learning_rate": 4.1957104557640756e-05, + "loss": 0.8228, + "step": 650 + }, + { + "epoch": 1.72, + "learning_rate": 4.1943699731903484e-05, + "loss": 0.6453, + "step": 651 + }, + { + "epoch": 1.72, + "learning_rate": 4.1930294906166226e-05, + "loss": 0.3367, + "step": 652 + }, + { + "epoch": 1.73, + "learning_rate": 4.1916890080428954e-05, + "loss": 0.2139, + "step": 653 + }, + { + "epoch": 1.73, + "learning_rate": 4.1903485254691696e-05, + "loss": 0.2144, + "step": 654 + }, + { + "epoch": 1.73, + "learning_rate": 4.1890080428954424e-05, + "loss": 0.3894, + "step": 655 + }, + { + "epoch": 1.74, + "learning_rate": 4.187667560321716e-05, + "loss": 0.0891, + "step": 656 + }, + { + "epoch": 1.74, + "learning_rate": 4.1863270777479895e-05, + "loss": 0.1287, + "step": 657 + }, + { + "epoch": 1.74, + "learning_rate": 4.184986595174263e-05, + "loss": 0.2826, + "step": 658 + }, + { + "epoch": 1.74, + "learning_rate": 4.1836461126005365e-05, + "loss": 0.474, + "step": 659 + }, + { + "epoch": 1.75, + "learning_rate": 4.18230563002681e-05, + "loss": 0.4228, + "step": 660 + }, + { + "epoch": 1.75, + "learning_rate": 4.180965147453083e-05, + "loss": 0.4952, + "step": 661 + }, + { + "epoch": 1.75, + "learning_rate": 4.179624664879357e-05, + "loss": 0.173, + "step": 662 + }, + { + "epoch": 1.75, + "learning_rate": 4.17828418230563e-05, + "loss": 0.363, + "step": 663 + }, + { + "epoch": 1.76, + "learning_rate": 4.176943699731904e-05, + "loss": 0.4404, + "step": 664 + }, + { + "epoch": 1.76, + "learning_rate": 4.175603217158177e-05, + "loss": 0.486, + "step": 665 + }, + { + "epoch": 1.76, + "learning_rate": 4.1742627345844504e-05, + "loss": 0.4463, + "step": 666 + }, + { + "epoch": 1.76, + "learning_rate": 4.172922252010724e-05, + "loss": 0.2409, + "step": 667 + }, + { + "epoch": 1.77, + "learning_rate": 4.1715817694369974e-05, + "loss": 0.5291, + "step": 668 + }, + { + "epoch": 1.77, + "learning_rate": 4.170241286863271e-05, + "loss": 0.069, + "step": 669 + }, + { + "epoch": 1.77, + "learning_rate": 4.1689008042895445e-05, + "loss": 0.4162, + "step": 670 + }, + { + "epoch": 1.78, + "learning_rate": 4.167560321715818e-05, + "loss": 0.6171, + "step": 671 + }, + { + "epoch": 1.78, + "learning_rate": 4.1662198391420915e-05, + "loss": 0.3097, + "step": 672 + }, + { + "epoch": 1.78, + "learning_rate": 4.164879356568365e-05, + "loss": 0.5109, + "step": 673 + }, + { + "epoch": 1.78, + "learning_rate": 4.1635388739946385e-05, + "loss": 0.2169, + "step": 674 + }, + { + "epoch": 1.79, + "learning_rate": 4.162198391420912e-05, + "loss": 0.2406, + "step": 675 + }, + { + "epoch": 1.79, + "learning_rate": 4.160857908847185e-05, + "loss": 0.1853, + "step": 676 + }, + { + "epoch": 1.79, + "learning_rate": 4.159517426273459e-05, + "loss": 0.5743, + "step": 677 + }, + { + "epoch": 1.79, + "learning_rate": 4.158176943699732e-05, + "loss": 0.5432, + "step": 678 + }, + { + "epoch": 1.8, + "learning_rate": 4.156836461126006e-05, + "loss": 0.2033, + "step": 679 + }, + { + "epoch": 1.8, + "learning_rate": 4.155495978552279e-05, + "loss": 0.3848, + "step": 680 + }, + { + "epoch": 1.8, + "learning_rate": 4.1541554959785524e-05, + "loss": 0.1721, + "step": 681 + }, + { + "epoch": 1.8, + "learning_rate": 4.152815013404826e-05, + "loss": 0.3793, + "step": 682 + }, + { + "epoch": 1.81, + "learning_rate": 4.1514745308310994e-05, + "loss": 0.3848, + "step": 683 + }, + { + "epoch": 1.81, + "learning_rate": 4.150134048257373e-05, + "loss": 0.186, + "step": 684 + }, + { + "epoch": 1.81, + "learning_rate": 4.1487935656836465e-05, + "loss": 0.2692, + "step": 685 + }, + { + "epoch": 1.81, + "learning_rate": 4.147453083109919e-05, + "loss": 0.3839, + "step": 686 + }, + { + "epoch": 1.82, + "learning_rate": 4.1461126005361935e-05, + "loss": 0.3037, + "step": 687 + }, + { + "epoch": 1.82, + "learning_rate": 4.144772117962466e-05, + "loss": 0.6401, + "step": 688 + }, + { + "epoch": 1.82, + "learning_rate": 4.1434316353887405e-05, + "loss": 0.1173, + "step": 689 + }, + { + "epoch": 1.83, + "learning_rate": 4.1420911528150134e-05, + "loss": 0.3217, + "step": 690 + }, + { + "epoch": 1.83, + "learning_rate": 4.140750670241287e-05, + "loss": 0.2358, + "step": 691 + }, + { + "epoch": 1.83, + "learning_rate": 4.1394101876675604e-05, + "loss": 0.7696, + "step": 692 + }, + { + "epoch": 1.83, + "learning_rate": 4.138069705093834e-05, + "loss": 0.2288, + "step": 693 + }, + { + "epoch": 1.84, + "learning_rate": 4.1367292225201074e-05, + "loss": 0.2575, + "step": 694 + }, + { + "epoch": 1.84, + "learning_rate": 4.135388739946381e-05, + "loss": 0.1201, + "step": 695 + }, + { + "epoch": 1.84, + "learning_rate": 4.1340482573726544e-05, + "loss": 0.2034, + "step": 696 + }, + { + "epoch": 1.84, + "learning_rate": 4.132707774798928e-05, + "loss": 0.1142, + "step": 697 + }, + { + "epoch": 1.85, + "learning_rate": 4.1313672922252015e-05, + "loss": 0.5671, + "step": 698 + }, + { + "epoch": 1.85, + "learning_rate": 4.130026809651475e-05, + "loss": 0.3132, + "step": 699 + }, + { + "epoch": 1.85, + "learning_rate": 4.1286863270777485e-05, + "loss": 0.4266, + "step": 700 + }, + { + "epoch": 1.85, + "learning_rate": 4.127345844504021e-05, + "loss": 0.1354, + "step": 701 + }, + { + "epoch": 1.86, + "learning_rate": 4.1260053619302955e-05, + "loss": 0.2867, + "step": 702 + }, + { + "epoch": 1.86, + "learning_rate": 4.1246648793565684e-05, + "loss": 0.1839, + "step": 703 + }, + { + "epoch": 1.86, + "learning_rate": 4.1233243967828425e-05, + "loss": 0.4741, + "step": 704 + }, + { + "epoch": 1.87, + "learning_rate": 4.1219839142091154e-05, + "loss": 0.2909, + "step": 705 + }, + { + "epoch": 1.87, + "learning_rate": 4.120643431635389e-05, + "loss": 0.2705, + "step": 706 + }, + { + "epoch": 1.87, + "learning_rate": 4.1193029490616624e-05, + "loss": 0.1354, + "step": 707 + }, + { + "epoch": 1.87, + "learning_rate": 4.117962466487936e-05, + "loss": 0.4801, + "step": 708 + }, + { + "epoch": 1.88, + "learning_rate": 4.1166219839142094e-05, + "loss": 0.189, + "step": 709 + }, + { + "epoch": 1.88, + "learning_rate": 4.115281501340483e-05, + "loss": 0.3204, + "step": 710 + }, + { + "epoch": 1.88, + "learning_rate": 4.113941018766756e-05, + "loss": 0.4358, + "step": 711 + }, + { + "epoch": 1.88, + "learning_rate": 4.11260053619303e-05, + "loss": 0.9474, + "step": 712 + }, + { + "epoch": 1.89, + "learning_rate": 4.111260053619303e-05, + "loss": 0.2102, + "step": 713 + }, + { + "epoch": 1.89, + "learning_rate": 4.109919571045577e-05, + "loss": 0.3927, + "step": 714 + }, + { + "epoch": 1.89, + "learning_rate": 4.10857908847185e-05, + "loss": 0.139, + "step": 715 + }, + { + "epoch": 1.89, + "learning_rate": 4.1072386058981233e-05, + "loss": 0.3575, + "step": 716 + }, + { + "epoch": 1.9, + "learning_rate": 4.105898123324397e-05, + "loss": 0.7534, + "step": 717 + }, + { + "epoch": 1.9, + "learning_rate": 4.1045576407506704e-05, + "loss": 0.1134, + "step": 718 + }, + { + "epoch": 1.9, + "learning_rate": 4.103217158176944e-05, + "loss": 0.2136, + "step": 719 + }, + { + "epoch": 1.9, + "learning_rate": 4.1018766756032174e-05, + "loss": 0.4344, + "step": 720 + }, + { + "epoch": 1.91, + "learning_rate": 4.10053619302949e-05, + "loss": 0.0695, + "step": 721 + }, + { + "epoch": 1.91, + "learning_rate": 4.0991957104557644e-05, + "loss": 0.2286, + "step": 722 + }, + { + "epoch": 1.91, + "learning_rate": 4.097855227882037e-05, + "loss": 0.1189, + "step": 723 + }, + { + "epoch": 1.92, + "learning_rate": 4.0965147453083115e-05, + "loss": 0.2882, + "step": 724 + }, + { + "epoch": 1.92, + "learning_rate": 4.095174262734584e-05, + "loss": 0.2623, + "step": 725 + }, + { + "epoch": 1.92, + "learning_rate": 4.093833780160858e-05, + "loss": 0.2473, + "step": 726 + }, + { + "epoch": 1.92, + "learning_rate": 4.092493297587131e-05, + "loss": 0.4846, + "step": 727 + }, + { + "epoch": 1.93, + "learning_rate": 4.091152815013405e-05, + "loss": 0.1689, + "step": 728 + }, + { + "epoch": 1.93, + "learning_rate": 4.0898123324396783e-05, + "loss": 0.3481, + "step": 729 + }, + { + "epoch": 1.93, + "learning_rate": 4.088471849865952e-05, + "loss": 0.3447, + "step": 730 + }, + { + "epoch": 1.93, + "learning_rate": 4.0871313672922254e-05, + "loss": 0.2959, + "step": 731 + }, + { + "epoch": 1.94, + "learning_rate": 4.085790884718499e-05, + "loss": 0.3387, + "step": 732 + }, + { + "epoch": 1.94, + "learning_rate": 4.0844504021447724e-05, + "loss": 0.3742, + "step": 733 + }, + { + "epoch": 1.94, + "learning_rate": 4.083109919571046e-05, + "loss": 0.3245, + "step": 734 + }, + { + "epoch": 1.94, + "learning_rate": 4.0817694369973194e-05, + "loss": 0.4891, + "step": 735 + }, + { + "epoch": 1.95, + "learning_rate": 4.080428954423593e-05, + "loss": 0.1444, + "step": 736 + }, + { + "epoch": 1.95, + "learning_rate": 4.0790884718498664e-05, + "loss": 0.3678, + "step": 737 + }, + { + "epoch": 1.95, + "learning_rate": 4.077747989276139e-05, + "loss": 0.3772, + "step": 738 + }, + { + "epoch": 1.96, + "learning_rate": 4.0764075067024135e-05, + "loss": 0.43, + "step": 739 + }, + { + "epoch": 1.96, + "learning_rate": 4.075067024128686e-05, + "loss": 0.2463, + "step": 740 + }, + { + "epoch": 1.96, + "learning_rate": 4.0737265415549605e-05, + "loss": 0.2277, + "step": 741 + }, + { + "epoch": 1.96, + "learning_rate": 4.072386058981233e-05, + "loss": 0.2153, + "step": 742 + }, + { + "epoch": 1.97, + "learning_rate": 4.071045576407507e-05, + "loss": 0.1052, + "step": 743 + }, + { + "epoch": 1.97, + "learning_rate": 4.0697050938337804e-05, + "loss": 0.5657, + "step": 744 + }, + { + "epoch": 1.97, + "learning_rate": 4.068364611260054e-05, + "loss": 0.2664, + "step": 745 + }, + { + "epoch": 1.97, + "learning_rate": 4.0670241286863274e-05, + "loss": 0.1369, + "step": 746 + }, + { + "epoch": 1.98, + "learning_rate": 4.065683646112601e-05, + "loss": 0.4972, + "step": 747 + }, + { + "epoch": 1.98, + "learning_rate": 4.064343163538874e-05, + "loss": 0.4131, + "step": 748 + }, + { + "epoch": 1.98, + "learning_rate": 4.063002680965148e-05, + "loss": 0.3824, + "step": 749 + }, + { + "epoch": 1.98, + "learning_rate": 4.061662198391421e-05, + "loss": 0.323, + "step": 750 + }, + { + "epoch": 1.99, + "learning_rate": 4.060321715817695e-05, + "loss": 0.3698, + "step": 751 + }, + { + "epoch": 1.99, + "learning_rate": 4.058981233243968e-05, + "loss": 0.1708, + "step": 752 + }, + { + "epoch": 1.99, + "learning_rate": 4.057640750670241e-05, + "loss": 0.2941, + "step": 753 + }, + { + "epoch": 1.99, + "learning_rate": 4.056300268096515e-05, + "loss": 0.3224, + "step": 754 + }, + { + "epoch": 2.0, + "learning_rate": 4.054959785522788e-05, + "loss": 0.0851, + "step": 755 + }, + { + "epoch": 2.0, + "learning_rate": 4.053619302949062e-05, + "loss": 0.4694, + "step": 756 + }, + { + "epoch": 2.0, + "eval_f1": 0.7882736156351792, + "eval_loss": 0.4484867751598358, + "eval_runtime": 1.8734, + "eval_samples_per_second": 807.638, + "eval_steps_per_second": 50.711, + "step": 756 + }, + { + "epoch": 2.0, + "learning_rate": 4.0522788203753354e-05, + "loss": 0.4739, + "step": 757 + }, + { + "epoch": 2.01, + "learning_rate": 4.050938337801609e-05, + "loss": 0.4117, + "step": 758 + }, + { + "epoch": 2.01, + "learning_rate": 4.0495978552278824e-05, + "loss": 0.2169, + "step": 759 + }, + { + "epoch": 2.01, + "learning_rate": 4.048257372654156e-05, + "loss": 0.1848, + "step": 760 + }, + { + "epoch": 2.01, + "learning_rate": 4.0469168900804294e-05, + "loss": 0.5066, + "step": 761 + }, + { + "epoch": 2.02, + "learning_rate": 4.045576407506703e-05, + "loss": 0.1784, + "step": 762 + }, + { + "epoch": 2.02, + "learning_rate": 4.044235924932976e-05, + "loss": 0.3869, + "step": 763 + }, + { + "epoch": 2.02, + "learning_rate": 4.04289544235925e-05, + "loss": 0.1132, + "step": 764 + }, + { + "epoch": 2.02, + "learning_rate": 4.041554959785523e-05, + "loss": 0.2724, + "step": 765 + }, + { + "epoch": 2.03, + "learning_rate": 4.040214477211797e-05, + "loss": 0.0983, + "step": 766 + }, + { + "epoch": 2.03, + "learning_rate": 4.03887399463807e-05, + "loss": 0.1831, + "step": 767 + }, + { + "epoch": 2.03, + "learning_rate": 4.037533512064343e-05, + "loss": 0.1954, + "step": 768 + }, + { + "epoch": 2.03, + "learning_rate": 4.036193029490617e-05, + "loss": 0.4738, + "step": 769 + }, + { + "epoch": 2.04, + "learning_rate": 4.0348525469168903e-05, + "loss": 0.3375, + "step": 770 + }, + { + "epoch": 2.04, + "learning_rate": 4.033512064343164e-05, + "loss": 0.3991, + "step": 771 + }, + { + "epoch": 2.04, + "learning_rate": 4.0321715817694374e-05, + "loss": 0.0696, + "step": 772 + }, + { + "epoch": 2.04, + "learning_rate": 4.03083109919571e-05, + "loss": 0.0982, + "step": 773 + }, + { + "epoch": 2.05, + "learning_rate": 4.0294906166219844e-05, + "loss": 0.2236, + "step": 774 + }, + { + "epoch": 2.05, + "learning_rate": 4.028150134048257e-05, + "loss": 0.4225, + "step": 775 + }, + { + "epoch": 2.05, + "learning_rate": 4.0268096514745314e-05, + "loss": 0.0583, + "step": 776 + }, + { + "epoch": 2.06, + "learning_rate": 4.025469168900804e-05, + "loss": 0.1643, + "step": 777 + }, + { + "epoch": 2.06, + "learning_rate": 4.024128686327078e-05, + "loss": 0.0351, + "step": 778 + }, + { + "epoch": 2.06, + "learning_rate": 4.022788203753351e-05, + "loss": 0.4496, + "step": 779 + }, + { + "epoch": 2.06, + "learning_rate": 4.021447721179625e-05, + "loss": 0.0372, + "step": 780 + }, + { + "epoch": 2.07, + "learning_rate": 4.020107238605898e-05, + "loss": 0.4198, + "step": 781 + }, + { + "epoch": 2.07, + "learning_rate": 4.018766756032172e-05, + "loss": 0.1968, + "step": 782 + }, + { + "epoch": 2.07, + "learning_rate": 4.017426273458445e-05, + "loss": 0.515, + "step": 783 + }, + { + "epoch": 2.07, + "learning_rate": 4.016085790884719e-05, + "loss": 0.408, + "step": 784 + }, + { + "epoch": 2.08, + "learning_rate": 4.0147453083109924e-05, + "loss": 0.3693, + "step": 785 + }, + { + "epoch": 2.08, + "learning_rate": 4.013404825737266e-05, + "loss": 0.0561, + "step": 786 + }, + { + "epoch": 2.08, + "learning_rate": 4.0120643431635394e-05, + "loss": 0.4011, + "step": 787 + }, + { + "epoch": 2.08, + "learning_rate": 4.010723860589812e-05, + "loss": 0.0872, + "step": 788 + }, + { + "epoch": 2.09, + "learning_rate": 4.0093833780160864e-05, + "loss": 0.0768, + "step": 789 + }, + { + "epoch": 2.09, + "learning_rate": 4.008042895442359e-05, + "loss": 0.0184, + "step": 790 + }, + { + "epoch": 2.09, + "learning_rate": 4.0067024128686334e-05, + "loss": 0.3287, + "step": 791 + }, + { + "epoch": 2.1, + "learning_rate": 4.005361930294906e-05, + "loss": 0.0262, + "step": 792 + }, + { + "epoch": 2.1, + "learning_rate": 4.00402144772118e-05, + "loss": 0.0248, + "step": 793 + }, + { + "epoch": 2.1, + "learning_rate": 4.002680965147453e-05, + "loss": 0.3853, + "step": 794 + }, + { + "epoch": 2.1, + "learning_rate": 4.001340482573727e-05, + "loss": 0.3512, + "step": 795 + }, + { + "epoch": 2.11, + "learning_rate": 4e-05, + "loss": 0.4188, + "step": 796 + }, + { + "epoch": 2.11, + "learning_rate": 3.998659517426274e-05, + "loss": 0.1834, + "step": 797 + }, + { + "epoch": 2.11, + "learning_rate": 3.997319034852547e-05, + "loss": 0.2074, + "step": 798 + }, + { + "epoch": 2.11, + "learning_rate": 3.995978552278821e-05, + "loss": 0.7317, + "step": 799 + }, + { + "epoch": 2.12, + "learning_rate": 3.994638069705094e-05, + "loss": 0.3534, + "step": 800 + }, + { + "epoch": 2.12, + "learning_rate": 3.993297587131368e-05, + "loss": 0.3184, + "step": 801 + }, + { + "epoch": 2.12, + "learning_rate": 3.991957104557641e-05, + "loss": 0.1088, + "step": 802 + }, + { + "epoch": 2.12, + "learning_rate": 3.990616621983914e-05, + "loss": 0.0429, + "step": 803 + }, + { + "epoch": 2.13, + "learning_rate": 3.989276139410188e-05, + "loss": 0.4518, + "step": 804 + }, + { + "epoch": 2.13, + "learning_rate": 3.987935656836461e-05, + "loss": 0.1746, + "step": 805 + }, + { + "epoch": 2.13, + "learning_rate": 3.986595174262735e-05, + "loss": 0.1881, + "step": 806 + }, + { + "epoch": 2.13, + "learning_rate": 3.985254691689008e-05, + "loss": 0.4111, + "step": 807 + }, + { + "epoch": 2.14, + "learning_rate": 3.983914209115281e-05, + "loss": 0.059, + "step": 808 + }, + { + "epoch": 2.14, + "learning_rate": 3.982573726541555e-05, + "loss": 0.0495, + "step": 809 + }, + { + "epoch": 2.14, + "learning_rate": 3.981233243967828e-05, + "loss": 0.1134, + "step": 810 + }, + { + "epoch": 2.15, + "learning_rate": 3.9798927613941023e-05, + "loss": 0.5469, + "step": 811 + }, + { + "epoch": 2.15, + "learning_rate": 3.978552278820375e-05, + "loss": 0.0581, + "step": 812 + }, + { + "epoch": 2.15, + "learning_rate": 3.977211796246649e-05, + "loss": 0.1254, + "step": 813 + }, + { + "epoch": 2.15, + "learning_rate": 3.975871313672922e-05, + "loss": 0.0679, + "step": 814 + }, + { + "epoch": 2.16, + "learning_rate": 3.974530831099196e-05, + "loss": 0.0463, + "step": 815 + }, + { + "epoch": 2.16, + "learning_rate": 3.973190348525469e-05, + "loss": 0.33, + "step": 816 + }, + { + "epoch": 2.16, + "learning_rate": 3.971849865951743e-05, + "loss": 0.2931, + "step": 817 + }, + { + "epoch": 2.16, + "learning_rate": 3.970509383378016e-05, + "loss": 0.1034, + "step": 818 + }, + { + "epoch": 2.17, + "learning_rate": 3.96916890080429e-05, + "loss": 0.0379, + "step": 819 + }, + { + "epoch": 2.17, + "learning_rate": 3.967828418230563e-05, + "loss": 0.0456, + "step": 820 + }, + { + "epoch": 2.17, + "learning_rate": 3.966487935656837e-05, + "loss": 0.4862, + "step": 821 + }, + { + "epoch": 2.17, + "learning_rate": 3.96514745308311e-05, + "loss": 0.0512, + "step": 822 + }, + { + "epoch": 2.18, + "learning_rate": 3.963806970509383e-05, + "loss": 0.0879, + "step": 823 + }, + { + "epoch": 2.18, + "learning_rate": 3.962466487935657e-05, + "loss": 0.3664, + "step": 824 + }, + { + "epoch": 2.18, + "learning_rate": 3.96112600536193e-05, + "loss": 0.0975, + "step": 825 + }, + { + "epoch": 2.19, + "learning_rate": 3.9597855227882044e-05, + "loss": 0.162, + "step": 826 + }, + { + "epoch": 2.19, + "learning_rate": 3.958445040214477e-05, + "loss": 0.1076, + "step": 827 + }, + { + "epoch": 2.19, + "learning_rate": 3.957104557640751e-05, + "loss": 0.1185, + "step": 828 + }, + { + "epoch": 2.19, + "learning_rate": 3.955764075067024e-05, + "loss": 0.0868, + "step": 829 + }, + { + "epoch": 2.2, + "learning_rate": 3.954423592493298e-05, + "loss": 0.0246, + "step": 830 + }, + { + "epoch": 2.2, + "learning_rate": 3.953083109919571e-05, + "loss": 0.2233, + "step": 831 + }, + { + "epoch": 2.2, + "learning_rate": 3.951742627345845e-05, + "loss": 0.2113, + "step": 832 + }, + { + "epoch": 2.2, + "learning_rate": 3.9504021447721176e-05, + "loss": 0.0587, + "step": 833 + }, + { + "epoch": 2.21, + "learning_rate": 3.949061662198392e-05, + "loss": 0.0521, + "step": 834 + }, + { + "epoch": 2.21, + "learning_rate": 3.9477211796246646e-05, + "loss": 0.3845, + "step": 835 + }, + { + "epoch": 2.21, + "learning_rate": 3.946380697050939e-05, + "loss": 0.1096, + "step": 836 + }, + { + "epoch": 2.21, + "learning_rate": 3.9450402144772117e-05, + "loss": 0.1488, + "step": 837 + }, + { + "epoch": 2.22, + "learning_rate": 3.943699731903485e-05, + "loss": 0.1937, + "step": 838 + }, + { + "epoch": 2.22, + "learning_rate": 3.942359249329759e-05, + "loss": 0.1309, + "step": 839 + }, + { + "epoch": 2.22, + "learning_rate": 3.941018766756032e-05, + "loss": 0.3271, + "step": 840 + }, + { + "epoch": 2.22, + "learning_rate": 3.939678284182306e-05, + "loss": 0.3318, + "step": 841 + }, + { + "epoch": 2.23, + "learning_rate": 3.938337801608579e-05, + "loss": 0.3516, + "step": 842 + }, + { + "epoch": 2.23, + "learning_rate": 3.936997319034853e-05, + "loss": 0.1641, + "step": 843 + }, + { + "epoch": 2.23, + "learning_rate": 3.935656836461126e-05, + "loss": 0.064, + "step": 844 + }, + { + "epoch": 2.24, + "learning_rate": 3.9343163538874e-05, + "loss": 0.1971, + "step": 845 + }, + { + "epoch": 2.24, + "learning_rate": 3.932975871313673e-05, + "loss": 0.1166, + "step": 846 + }, + { + "epoch": 2.24, + "learning_rate": 3.931635388739947e-05, + "loss": 0.0384, + "step": 847 + }, + { + "epoch": 2.24, + "learning_rate": 3.9302949061662196e-05, + "loss": 0.0462, + "step": 848 + }, + { + "epoch": 2.25, + "learning_rate": 3.928954423592494e-05, + "loss": 0.1073, + "step": 849 + }, + { + "epoch": 2.25, + "learning_rate": 3.9276139410187666e-05, + "loss": 0.0227, + "step": 850 + }, + { + "epoch": 2.25, + "learning_rate": 3.926273458445041e-05, + "loss": 0.0683, + "step": 851 + }, + { + "epoch": 2.25, + "learning_rate": 3.924932975871314e-05, + "loss": 0.7962, + "step": 852 + }, + { + "epoch": 2.26, + "learning_rate": 3.923592493297587e-05, + "loss": 0.0635, + "step": 853 + }, + { + "epoch": 2.26, + "learning_rate": 3.922252010723861e-05, + "loss": 0.0454, + "step": 854 + }, + { + "epoch": 2.26, + "learning_rate": 3.920911528150134e-05, + "loss": 0.5389, + "step": 855 + }, + { + "epoch": 2.26, + "learning_rate": 3.919571045576408e-05, + "loss": 0.1181, + "step": 856 + }, + { + "epoch": 2.27, + "learning_rate": 3.918230563002681e-05, + "loss": 0.1372, + "step": 857 + }, + { + "epoch": 2.27, + "learning_rate": 3.916890080428954e-05, + "loss": 0.3192, + "step": 858 + }, + { + "epoch": 2.27, + "learning_rate": 3.915549597855228e-05, + "loss": 0.3419, + "step": 859 + }, + { + "epoch": 2.28, + "learning_rate": 3.914209115281501e-05, + "loss": 0.0114, + "step": 860 + }, + { + "epoch": 2.28, + "learning_rate": 3.912868632707775e-05, + "loss": 0.1905, + "step": 861 + }, + { + "epoch": 2.28, + "learning_rate": 3.911528150134048e-05, + "loss": 0.0218, + "step": 862 + }, + { + "epoch": 2.28, + "learning_rate": 3.910187667560322e-05, + "loss": 0.4374, + "step": 863 + }, + { + "epoch": 2.29, + "learning_rate": 3.908847184986595e-05, + "loss": 0.1844, + "step": 864 + }, + { + "epoch": 2.29, + "learning_rate": 3.907506702412869e-05, + "loss": 0.2427, + "step": 865 + }, + { + "epoch": 2.29, + "learning_rate": 3.906166219839142e-05, + "loss": 0.2749, + "step": 866 + }, + { + "epoch": 2.29, + "learning_rate": 3.904825737265416e-05, + "loss": 0.2089, + "step": 867 + }, + { + "epoch": 2.3, + "learning_rate": 3.903485254691689e-05, + "loss": 0.06, + "step": 868 + }, + { + "epoch": 2.3, + "learning_rate": 3.902144772117963e-05, + "loss": 0.1951, + "step": 869 + }, + { + "epoch": 2.3, + "learning_rate": 3.900804289544236e-05, + "loss": 0.0252, + "step": 870 + }, + { + "epoch": 2.3, + "learning_rate": 3.89946380697051e-05, + "loss": 0.0299, + "step": 871 + }, + { + "epoch": 2.31, + "learning_rate": 3.898123324396783e-05, + "loss": 0.0298, + "step": 872 + }, + { + "epoch": 2.31, + "learning_rate": 3.896782841823057e-05, + "loss": 0.5186, + "step": 873 + }, + { + "epoch": 2.31, + "learning_rate": 3.89544235924933e-05, + "loss": 0.2704, + "step": 874 + }, + { + "epoch": 2.31, + "learning_rate": 3.894101876675603e-05, + "loss": 0.2435, + "step": 875 + }, + { + "epoch": 2.32, + "learning_rate": 3.892761394101877e-05, + "loss": 0.039, + "step": 876 + }, + { + "epoch": 2.32, + "learning_rate": 3.89142091152815e-05, + "loss": 0.0275, + "step": 877 + }, + { + "epoch": 2.32, + "learning_rate": 3.890080428954424e-05, + "loss": 0.1164, + "step": 878 + }, + { + "epoch": 2.33, + "learning_rate": 3.888739946380697e-05, + "loss": 0.1551, + "step": 879 + }, + { + "epoch": 2.33, + "learning_rate": 3.887399463806971e-05, + "loss": 0.0215, + "step": 880 + }, + { + "epoch": 2.33, + "learning_rate": 3.886058981233244e-05, + "loss": 0.0379, + "step": 881 + }, + { + "epoch": 2.33, + "learning_rate": 3.884718498659518e-05, + "loss": 0.0553, + "step": 882 + }, + { + "epoch": 2.34, + "learning_rate": 3.883378016085791e-05, + "loss": 0.1073, + "step": 883 + }, + { + "epoch": 2.34, + "learning_rate": 3.882037533512065e-05, + "loss": 0.3525, + "step": 884 + }, + { + "epoch": 2.34, + "learning_rate": 3.8806970509383376e-05, + "loss": 0.2646, + "step": 885 + }, + { + "epoch": 2.34, + "learning_rate": 3.879356568364612e-05, + "loss": 0.5758, + "step": 886 + }, + { + "epoch": 2.35, + "learning_rate": 3.8780160857908846e-05, + "loss": 0.9312, + "step": 887 + }, + { + "epoch": 2.35, + "learning_rate": 3.876675603217159e-05, + "loss": 0.2748, + "step": 888 + }, + { + "epoch": 2.35, + "learning_rate": 3.8753351206434316e-05, + "loss": 0.33, + "step": 889 + }, + { + "epoch": 2.35, + "learning_rate": 3.873994638069705e-05, + "loss": 0.0312, + "step": 890 + }, + { + "epoch": 2.36, + "learning_rate": 3.8726541554959786e-05, + "loss": 0.0449, + "step": 891 + }, + { + "epoch": 2.36, + "learning_rate": 3.871313672922252e-05, + "loss": 0.1197, + "step": 892 + }, + { + "epoch": 2.36, + "learning_rate": 3.869973190348526e-05, + "loss": 0.0913, + "step": 893 + }, + { + "epoch": 2.37, + "learning_rate": 3.868632707774799e-05, + "loss": 0.0284, + "step": 894 + }, + { + "epoch": 2.37, + "learning_rate": 3.867292225201073e-05, + "loss": 0.3769, + "step": 895 + }, + { + "epoch": 2.37, + "learning_rate": 3.865951742627346e-05, + "loss": 0.0947, + "step": 896 + }, + { + "epoch": 2.37, + "learning_rate": 3.86461126005362e-05, + "loss": 0.4282, + "step": 897 + }, + { + "epoch": 2.38, + "learning_rate": 3.863270777479893e-05, + "loss": 0.0049, + "step": 898 + }, + { + "epoch": 2.38, + "learning_rate": 3.861930294906167e-05, + "loss": 0.3632, + "step": 899 + }, + { + "epoch": 2.38, + "learning_rate": 3.8605898123324396e-05, + "loss": 0.0421, + "step": 900 + }, + { + "epoch": 2.38, + "learning_rate": 3.859249329758714e-05, + "loss": 0.5793, + "step": 901 + }, + { + "epoch": 2.39, + "learning_rate": 3.8579088471849866e-05, + "loss": 0.1695, + "step": 902 + }, + { + "epoch": 2.39, + "learning_rate": 3.856568364611261e-05, + "loss": 0.3082, + "step": 903 + }, + { + "epoch": 2.39, + "learning_rate": 3.8552278820375336e-05, + "loss": 0.0151, + "step": 904 + }, + { + "epoch": 2.39, + "learning_rate": 3.853887399463807e-05, + "loss": 0.3463, + "step": 905 + }, + { + "epoch": 2.4, + "learning_rate": 3.852546916890081e-05, + "loss": 0.4573, + "step": 906 + }, + { + "epoch": 2.4, + "learning_rate": 3.851206434316354e-05, + "loss": 0.1281, + "step": 907 + }, + { + "epoch": 2.4, + "learning_rate": 3.849865951742628e-05, + "loss": 0.3168, + "step": 908 + }, + { + "epoch": 2.4, + "learning_rate": 3.848525469168901e-05, + "loss": 0.0331, + "step": 909 + }, + { + "epoch": 2.41, + "learning_rate": 3.847184986595174e-05, + "loss": 0.1825, + "step": 910 + }, + { + "epoch": 2.41, + "learning_rate": 3.845844504021448e-05, + "loss": 0.6238, + "step": 911 + }, + { + "epoch": 2.41, + "learning_rate": 3.844504021447721e-05, + "loss": 0.0663, + "step": 912 + }, + { + "epoch": 2.42, + "learning_rate": 3.843163538873995e-05, + "loss": 0.2827, + "step": 913 + }, + { + "epoch": 2.42, + "learning_rate": 3.841823056300268e-05, + "loss": 0.0133, + "step": 914 + }, + { + "epoch": 2.42, + "learning_rate": 3.8404825737265416e-05, + "loss": 0.0415, + "step": 915 + }, + { + "epoch": 2.42, + "learning_rate": 3.839142091152815e-05, + "loss": 0.2969, + "step": 916 + }, + { + "epoch": 2.43, + "learning_rate": 3.8378016085790886e-05, + "loss": 0.0145, + "step": 917 + }, + { + "epoch": 2.43, + "learning_rate": 3.836461126005362e-05, + "loss": 0.0223, + "step": 918 + }, + { + "epoch": 2.43, + "learning_rate": 3.8351206434316357e-05, + "loss": 1.042, + "step": 919 + }, + { + "epoch": 2.43, + "learning_rate": 3.8337801608579085e-05, + "loss": 0.5061, + "step": 920 + }, + { + "epoch": 2.44, + "learning_rate": 3.832439678284183e-05, + "loss": 0.406, + "step": 921 + }, + { + "epoch": 2.44, + "learning_rate": 3.8310991957104555e-05, + "loss": 0.1337, + "step": 922 + }, + { + "epoch": 2.44, + "learning_rate": 3.82975871313673e-05, + "loss": 0.2057, + "step": 923 + }, + { + "epoch": 2.44, + "learning_rate": 3.8284182305630025e-05, + "loss": 0.0987, + "step": 924 + }, + { + "epoch": 2.45, + "learning_rate": 3.827077747989276e-05, + "loss": 0.4611, + "step": 925 + }, + { + "epoch": 2.45, + "learning_rate": 3.8257372654155496e-05, + "loss": 0.0152, + "step": 926 + }, + { + "epoch": 2.45, + "learning_rate": 3.824396782841823e-05, + "loss": 0.4478, + "step": 927 + }, + { + "epoch": 2.46, + "learning_rate": 3.8230563002680966e-05, + "loss": 0.0579, + "step": 928 + }, + { + "epoch": 2.46, + "learning_rate": 3.82171581769437e-05, + "loss": 0.1506, + "step": 929 + }, + { + "epoch": 2.46, + "learning_rate": 3.8203753351206436e-05, + "loss": 0.5514, + "step": 930 + }, + { + "epoch": 2.46, + "learning_rate": 3.819034852546917e-05, + "loss": 0.1505, + "step": 931 + }, + { + "epoch": 2.47, + "learning_rate": 3.8176943699731906e-05, + "loss": 0.1413, + "step": 932 + }, + { + "epoch": 2.47, + "learning_rate": 3.816353887399464e-05, + "loss": 0.4758, + "step": 933 + }, + { + "epoch": 2.47, + "learning_rate": 3.815013404825738e-05, + "loss": 0.0863, + "step": 934 + }, + { + "epoch": 2.47, + "learning_rate": 3.8136729222520105e-05, + "loss": 0.3561, + "step": 935 + }, + { + "epoch": 2.48, + "learning_rate": 3.812332439678285e-05, + "loss": 0.7784, + "step": 936 + }, + { + "epoch": 2.48, + "learning_rate": 3.8109919571045575e-05, + "loss": 0.2243, + "step": 937 + }, + { + "epoch": 2.48, + "learning_rate": 3.809651474530832e-05, + "loss": 0.1013, + "step": 938 + }, + { + "epoch": 2.48, + "learning_rate": 3.8083109919571046e-05, + "loss": 0.132, + "step": 939 + }, + { + "epoch": 2.49, + "learning_rate": 3.806970509383378e-05, + "loss": 0.3971, + "step": 940 + }, + { + "epoch": 2.49, + "learning_rate": 3.8056300268096516e-05, + "loss": 0.0637, + "step": 941 + }, + { + "epoch": 2.49, + "learning_rate": 3.804289544235925e-05, + "loss": 0.5178, + "step": 942 + }, + { + "epoch": 2.49, + "learning_rate": 3.8029490616621986e-05, + "loss": 0.1982, + "step": 943 + }, + { + "epoch": 2.5, + "learning_rate": 3.801608579088472e-05, + "loss": 0.2225, + "step": 944 + }, + { + "epoch": 2.5, + "learning_rate": 3.800268096514745e-05, + "loss": 0.1425, + "step": 945 + }, + { + "epoch": 2.5, + "learning_rate": 3.798927613941019e-05, + "loss": 0.0621, + "step": 946 + }, + { + "epoch": 2.51, + "learning_rate": 3.797587131367292e-05, + "loss": 0.1556, + "step": 947 + }, + { + "epoch": 2.51, + "learning_rate": 3.796246648793566e-05, + "loss": 0.3805, + "step": 948 + }, + { + "epoch": 2.51, + "learning_rate": 3.794906166219839e-05, + "loss": 0.7049, + "step": 949 + }, + { + "epoch": 2.51, + "learning_rate": 3.7935656836461125e-05, + "loss": 0.1055, + "step": 950 + }, + { + "epoch": 2.52, + "learning_rate": 3.792225201072386e-05, + "loss": 0.0489, + "step": 951 + }, + { + "epoch": 2.52, + "learning_rate": 3.7908847184986596e-05, + "loss": 0.1881, + "step": 952 + }, + { + "epoch": 2.52, + "learning_rate": 3.789544235924933e-05, + "loss": 0.0202, + "step": 953 + }, + { + "epoch": 2.52, + "learning_rate": 3.7882037533512066e-05, + "loss": 0.1043, + "step": 954 + }, + { + "epoch": 2.53, + "learning_rate": 3.78686327077748e-05, + "loss": 0.2093, + "step": 955 + }, + { + "epoch": 2.53, + "learning_rate": 3.7855227882037536e-05, + "loss": 0.0395, + "step": 956 + }, + { + "epoch": 2.53, + "learning_rate": 3.784182305630027e-05, + "loss": 0.1459, + "step": 957 + }, + { + "epoch": 2.53, + "learning_rate": 3.7828418230563006e-05, + "loss": 0.0338, + "step": 958 + }, + { + "epoch": 2.54, + "learning_rate": 3.781501340482574e-05, + "loss": 0.4741, + "step": 959 + }, + { + "epoch": 2.54, + "learning_rate": 3.780160857908847e-05, + "loss": 0.2049, + "step": 960 + }, + { + "epoch": 2.54, + "learning_rate": 3.778820375335121e-05, + "loss": 0.309, + "step": 961 + }, + { + "epoch": 2.54, + "learning_rate": 3.777479892761394e-05, + "loss": 0.0253, + "step": 962 + }, + { + "epoch": 2.55, + "learning_rate": 3.776139410187668e-05, + "loss": 0.4832, + "step": 963 + }, + { + "epoch": 2.55, + "learning_rate": 3.774798927613941e-05, + "loss": 0.2111, + "step": 964 + }, + { + "epoch": 2.55, + "learning_rate": 3.7734584450402145e-05, + "loss": 0.1788, + "step": 965 + }, + { + "epoch": 2.56, + "learning_rate": 3.772117962466488e-05, + "loss": 0.5252, + "step": 966 + }, + { + "epoch": 2.56, + "learning_rate": 3.7707774798927616e-05, + "loss": 0.4711, + "step": 967 + }, + { + "epoch": 2.56, + "learning_rate": 3.769436997319035e-05, + "loss": 0.5184, + "step": 968 + }, + { + "epoch": 2.56, + "learning_rate": 3.7680965147453086e-05, + "loss": 0.2164, + "step": 969 + }, + { + "epoch": 2.57, + "learning_rate": 3.7667560321715814e-05, + "loss": 0.5393, + "step": 970 + }, + { + "epoch": 2.57, + "learning_rate": 3.7654155495978556e-05, + "loss": 0.2588, + "step": 971 + }, + { + "epoch": 2.57, + "learning_rate": 3.7640750670241285e-05, + "loss": 0.164, + "step": 972 + }, + { + "epoch": 2.57, + "learning_rate": 3.7627345844504027e-05, + "loss": 0.2896, + "step": 973 + }, + { + "epoch": 2.58, + "learning_rate": 3.7613941018766755e-05, + "loss": 0.039, + "step": 974 + }, + { + "epoch": 2.58, + "learning_rate": 3.760053619302949e-05, + "loss": 0.16, + "step": 975 + }, + { + "epoch": 2.58, + "learning_rate": 3.7587131367292225e-05, + "loss": 0.1832, + "step": 976 + }, + { + "epoch": 2.58, + "learning_rate": 3.757372654155496e-05, + "loss": 0.0812, + "step": 977 + }, + { + "epoch": 2.59, + "learning_rate": 3.7560321715817695e-05, + "loss": 0.1476, + "step": 978 + }, + { + "epoch": 2.59, + "learning_rate": 3.754691689008043e-05, + "loss": 0.1853, + "step": 979 + }, + { + "epoch": 2.59, + "learning_rate": 3.7533512064343166e-05, + "loss": 0.2875, + "step": 980 + }, + { + "epoch": 2.6, + "learning_rate": 3.75201072386059e-05, + "loss": 0.1918, + "step": 981 + }, + { + "epoch": 2.6, + "learning_rate": 3.7506702412868636e-05, + "loss": 0.2445, + "step": 982 + }, + { + "epoch": 2.6, + "learning_rate": 3.749329758713137e-05, + "loss": 0.4653, + "step": 983 + }, + { + "epoch": 2.6, + "learning_rate": 3.7479892761394106e-05, + "loss": 0.0614, + "step": 984 + }, + { + "epoch": 2.61, + "learning_rate": 3.746648793565684e-05, + "loss": 0.2818, + "step": 985 + }, + { + "epoch": 2.61, + "learning_rate": 3.7453083109919576e-05, + "loss": 0.1363, + "step": 986 + }, + { + "epoch": 2.61, + "learning_rate": 3.7439678284182305e-05, + "loss": 0.3244, + "step": 987 + }, + { + "epoch": 2.61, + "learning_rate": 3.742627345844505e-05, + "loss": 0.081, + "step": 988 + }, + { + "epoch": 2.62, + "learning_rate": 3.7412868632707775e-05, + "loss": 0.0488, + "step": 989 + }, + { + "epoch": 2.62, + "learning_rate": 3.739946380697052e-05, + "loss": 0.2057, + "step": 990 + }, + { + "epoch": 2.62, + "learning_rate": 3.7386058981233245e-05, + "loss": 0.2598, + "step": 991 + }, + { + "epoch": 2.62, + "learning_rate": 3.737265415549598e-05, + "loss": 0.1318, + "step": 992 + }, + { + "epoch": 2.63, + "learning_rate": 3.7359249329758716e-05, + "loss": 0.0482, + "step": 993 + }, + { + "epoch": 2.63, + "learning_rate": 3.734584450402145e-05, + "loss": 0.2586, + "step": 994 + }, + { + "epoch": 2.63, + "learning_rate": 3.7332439678284186e-05, + "loss": 0.2533, + "step": 995 + }, + { + "epoch": 2.63, + "learning_rate": 3.731903485254692e-05, + "loss": 0.5292, + "step": 996 + }, + { + "epoch": 2.64, + "learning_rate": 3.730563002680965e-05, + "loss": 0.1677, + "step": 997 + }, + { + "epoch": 2.64, + "learning_rate": 3.729222520107239e-05, + "loss": 0.1869, + "step": 998 + }, + { + "epoch": 2.64, + "learning_rate": 3.727882037533512e-05, + "loss": 0.0645, + "step": 999 + }, + { + "epoch": 2.65, + "learning_rate": 3.726541554959786e-05, + "loss": 0.4107, + "step": 1000 + }, + { + "epoch": 2.65, + "learning_rate": 3.725201072386059e-05, + "loss": 0.0484, + "step": 1001 + }, + { + "epoch": 2.65, + "learning_rate": 3.7238605898123325e-05, + "loss": 0.0813, + "step": 1002 + }, + { + "epoch": 2.65, + "learning_rate": 3.722520107238606e-05, + "loss": 0.2467, + "step": 1003 + }, + { + "epoch": 2.66, + "learning_rate": 3.7211796246648795e-05, + "loss": 0.0324, + "step": 1004 + }, + { + "epoch": 2.66, + "learning_rate": 3.719839142091153e-05, + "loss": 0.0536, + "step": 1005 + }, + { + "epoch": 2.66, + "learning_rate": 3.7184986595174266e-05, + "loss": 0.0399, + "step": 1006 + }, + { + "epoch": 2.66, + "learning_rate": 3.7171581769436994e-05, + "loss": 0.0257, + "step": 1007 + }, + { + "epoch": 2.67, + "learning_rate": 3.7158176943699736e-05, + "loss": 0.0407, + "step": 1008 + }, + { + "epoch": 2.67, + "learning_rate": 3.7144772117962464e-05, + "loss": 0.0173, + "step": 1009 + }, + { + "epoch": 2.67, + "learning_rate": 3.7131367292225206e-05, + "loss": 0.0166, + "step": 1010 + }, + { + "epoch": 2.67, + "learning_rate": 3.7117962466487934e-05, + "loss": 0.1898, + "step": 1011 + }, + { + "epoch": 2.68, + "learning_rate": 3.710455764075067e-05, + "loss": 0.0525, + "step": 1012 + }, + { + "epoch": 2.68, + "learning_rate": 3.7091152815013405e-05, + "loss": 0.043, + "step": 1013 + }, + { + "epoch": 2.68, + "learning_rate": 3.707774798927614e-05, + "loss": 0.3994, + "step": 1014 + }, + { + "epoch": 2.69, + "learning_rate": 3.7064343163538875e-05, + "loss": 0.0372, + "step": 1015 + }, + { + "epoch": 2.69, + "learning_rate": 3.705093833780161e-05, + "loss": 0.2909, + "step": 1016 + }, + { + "epoch": 2.69, + "learning_rate": 3.7037533512064345e-05, + "loss": 0.8221, + "step": 1017 + }, + { + "epoch": 2.69, + "learning_rate": 3.702412868632708e-05, + "loss": 0.0084, + "step": 1018 + }, + { + "epoch": 2.7, + "learning_rate": 3.7010723860589815e-05, + "loss": 0.3058, + "step": 1019 + }, + { + "epoch": 2.7, + "learning_rate": 3.699731903485255e-05, + "loss": 1.0774, + "step": 1020 + }, + { + "epoch": 2.7, + "learning_rate": 3.6983914209115286e-05, + "loss": 0.2018, + "step": 1021 + }, + { + "epoch": 2.7, + "learning_rate": 3.6970509383378014e-05, + "loss": 0.0537, + "step": 1022 + }, + { + "epoch": 2.71, + "learning_rate": 3.6957104557640756e-05, + "loss": 0.444, + "step": 1023 + }, + { + "epoch": 2.71, + "learning_rate": 3.6943699731903484e-05, + "loss": 0.6497, + "step": 1024 + }, + { + "epoch": 2.71, + "learning_rate": 3.6930294906166226e-05, + "loss": 0.0097, + "step": 1025 + }, + { + "epoch": 2.71, + "learning_rate": 3.6916890080428955e-05, + "loss": 0.0082, + "step": 1026 + }, + { + "epoch": 2.72, + "learning_rate": 3.690348525469169e-05, + "loss": 0.0387, + "step": 1027 + }, + { + "epoch": 2.72, + "learning_rate": 3.6890080428954425e-05, + "loss": 0.3969, + "step": 1028 + }, + { + "epoch": 2.72, + "learning_rate": 3.687667560321716e-05, + "loss": 0.0136, + "step": 1029 + }, + { + "epoch": 2.72, + "learning_rate": 3.6863270777479895e-05, + "loss": 0.0099, + "step": 1030 + }, + { + "epoch": 2.73, + "learning_rate": 3.684986595174263e-05, + "loss": 0.3509, + "step": 1031 + }, + { + "epoch": 2.73, + "learning_rate": 3.683646112600536e-05, + "loss": 0.0257, + "step": 1032 + }, + { + "epoch": 2.73, + "learning_rate": 3.68230563002681e-05, + "loss": 0.3158, + "step": 1033 + }, + { + "epoch": 2.74, + "learning_rate": 3.680965147453083e-05, + "loss": 0.0677, + "step": 1034 + }, + { + "epoch": 2.74, + "learning_rate": 3.679624664879357e-05, + "loss": 0.3887, + "step": 1035 + }, + { + "epoch": 2.74, + "learning_rate": 3.67828418230563e-05, + "loss": 0.0714, + "step": 1036 + }, + { + "epoch": 2.74, + "learning_rate": 3.6769436997319034e-05, + "loss": 0.1066, + "step": 1037 + }, + { + "epoch": 2.75, + "learning_rate": 3.675603217158177e-05, + "loss": 0.6238, + "step": 1038 + }, + { + "epoch": 2.75, + "learning_rate": 3.6742627345844504e-05, + "loss": 0.0405, + "step": 1039 + }, + { + "epoch": 2.75, + "learning_rate": 3.672922252010724e-05, + "loss": 0.0223, + "step": 1040 + }, + { + "epoch": 2.75, + "learning_rate": 3.6715817694369975e-05, + "loss": 0.2737, + "step": 1041 + }, + { + "epoch": 2.76, + "learning_rate": 3.670241286863271e-05, + "loss": 0.015, + "step": 1042 + }, + { + "epoch": 2.76, + "learning_rate": 3.6689008042895445e-05, + "loss": 0.1709, + "step": 1043 + }, + { + "epoch": 2.76, + "learning_rate": 3.667560321715818e-05, + "loss": 0.2649, + "step": 1044 + }, + { + "epoch": 2.76, + "learning_rate": 3.6662198391420915e-05, + "loss": 0.1524, + "step": 1045 + }, + { + "epoch": 2.77, + "learning_rate": 3.664879356568365e-05, + "loss": 0.2461, + "step": 1046 + }, + { + "epoch": 2.77, + "learning_rate": 3.663538873994638e-05, + "loss": 0.3425, + "step": 1047 + }, + { + "epoch": 2.77, + "learning_rate": 3.662198391420912e-05, + "loss": 0.2689, + "step": 1048 + }, + { + "epoch": 2.78, + "learning_rate": 3.660857908847185e-05, + "loss": 0.0066, + "step": 1049 + }, + { + "epoch": 2.78, + "learning_rate": 3.659517426273459e-05, + "loss": 0.0328, + "step": 1050 + }, + { + "epoch": 2.78, + "learning_rate": 3.658176943699732e-05, + "loss": 0.1273, + "step": 1051 + }, + { + "epoch": 2.78, + "learning_rate": 3.6568364611260054e-05, + "loss": 0.2346, + "step": 1052 + }, + { + "epoch": 2.79, + "learning_rate": 3.655495978552279e-05, + "loss": 0.0118, + "step": 1053 + }, + { + "epoch": 2.79, + "learning_rate": 3.6541554959785525e-05, + "loss": 0.0287, + "step": 1054 + }, + { + "epoch": 2.79, + "learning_rate": 3.652815013404826e-05, + "loss": 0.264, + "step": 1055 + }, + { + "epoch": 2.79, + "learning_rate": 3.6514745308310995e-05, + "loss": 0.0216, + "step": 1056 + }, + { + "epoch": 2.8, + "learning_rate": 3.650134048257372e-05, + "loss": 0.0261, + "step": 1057 + }, + { + "epoch": 2.8, + "learning_rate": 3.6487935656836465e-05, + "loss": 0.1911, + "step": 1058 + }, + { + "epoch": 2.8, + "learning_rate": 3.6474530831099194e-05, + "loss": 0.029, + "step": 1059 + }, + { + "epoch": 2.8, + "learning_rate": 3.6461126005361935e-05, + "loss": 0.0393, + "step": 1060 + }, + { + "epoch": 2.81, + "learning_rate": 3.6447721179624664e-05, + "loss": 0.1044, + "step": 1061 + }, + { + "epoch": 2.81, + "learning_rate": 3.64343163538874e-05, + "loss": 0.6364, + "step": 1062 + }, + { + "epoch": 2.81, + "learning_rate": 3.6420911528150134e-05, + "loss": 0.3589, + "step": 1063 + }, + { + "epoch": 2.81, + "learning_rate": 3.640750670241287e-05, + "loss": 0.1128, + "step": 1064 + }, + { + "epoch": 2.82, + "learning_rate": 3.6394101876675604e-05, + "loss": 0.2219, + "step": 1065 + }, + { + "epoch": 2.82, + "learning_rate": 3.638069705093834e-05, + "loss": 0.211, + "step": 1066 + }, + { + "epoch": 2.82, + "learning_rate": 3.6367292225201075e-05, + "loss": 0.387, + "step": 1067 + }, + { + "epoch": 2.83, + "learning_rate": 3.635388739946381e-05, + "loss": 0.0725, + "step": 1068 + }, + { + "epoch": 2.83, + "learning_rate": 3.6340482573726545e-05, + "loss": 0.0268, + "step": 1069 + }, + { + "epoch": 2.83, + "learning_rate": 3.632707774798928e-05, + "loss": 0.516, + "step": 1070 + }, + { + "epoch": 2.83, + "learning_rate": 3.6313672922252015e-05, + "loss": 0.0746, + "step": 1071 + }, + { + "epoch": 2.84, + "learning_rate": 3.6300268096514743e-05, + "loss": 0.2486, + "step": 1072 + }, + { + "epoch": 2.84, + "learning_rate": 3.6286863270777485e-05, + "loss": 0.1584, + "step": 1073 + }, + { + "epoch": 2.84, + "learning_rate": 3.6273458445040214e-05, + "loss": 0.1301, + "step": 1074 + }, + { + "epoch": 2.84, + "learning_rate": 3.6260053619302956e-05, + "loss": 0.0122, + "step": 1075 + }, + { + "epoch": 2.85, + "learning_rate": 3.6246648793565684e-05, + "loss": 0.0215, + "step": 1076 + }, + { + "epoch": 2.85, + "learning_rate": 3.623324396782842e-05, + "loss": 0.2068, + "step": 1077 + }, + { + "epoch": 2.85, + "learning_rate": 3.6219839142091154e-05, + "loss": 0.1882, + "step": 1078 + }, + { + "epoch": 2.85, + "learning_rate": 3.620643431635389e-05, + "loss": 0.368, + "step": 1079 + }, + { + "epoch": 2.86, + "learning_rate": 3.6193029490616625e-05, + "loss": 0.7537, + "step": 1080 + }, + { + "epoch": 2.86, + "learning_rate": 3.617962466487936e-05, + "loss": 0.1614, + "step": 1081 + }, + { + "epoch": 2.86, + "learning_rate": 3.616621983914209e-05, + "loss": 0.0585, + "step": 1082 + }, + { + "epoch": 2.87, + "learning_rate": 3.615281501340483e-05, + "loss": 0.0535, + "step": 1083 + }, + { + "epoch": 2.87, + "learning_rate": 3.613941018766756e-05, + "loss": 0.6518, + "step": 1084 + }, + { + "epoch": 2.87, + "learning_rate": 3.61260053619303e-05, + "loss": 0.4885, + "step": 1085 + }, + { + "epoch": 2.87, + "learning_rate": 3.611260053619303e-05, + "loss": 0.4078, + "step": 1086 + }, + { + "epoch": 2.88, + "learning_rate": 3.6099195710455764e-05, + "loss": 0.2101, + "step": 1087 + }, + { + "epoch": 2.88, + "learning_rate": 3.60857908847185e-05, + "loss": 0.0192, + "step": 1088 + }, + { + "epoch": 2.88, + "learning_rate": 3.6072386058981234e-05, + "loss": 0.3885, + "step": 1089 + }, + { + "epoch": 2.88, + "learning_rate": 3.605898123324397e-05, + "loss": 0.0393, + "step": 1090 + }, + { + "epoch": 2.89, + "learning_rate": 3.6045576407506704e-05, + "loss": 0.2179, + "step": 1091 + }, + { + "epoch": 2.89, + "learning_rate": 3.603217158176944e-05, + "loss": 0.1814, + "step": 1092 + }, + { + "epoch": 2.89, + "learning_rate": 3.6018766756032174e-05, + "loss": 0.0647, + "step": 1093 + }, + { + "epoch": 2.89, + "learning_rate": 3.600536193029491e-05, + "loss": 0.0657, + "step": 1094 + }, + { + "epoch": 2.9, + "learning_rate": 3.5991957104557645e-05, + "loss": 0.1062, + "step": 1095 + }, + { + "epoch": 2.9, + "learning_rate": 3.597855227882038e-05, + "loss": 0.4314, + "step": 1096 + }, + { + "epoch": 2.9, + "learning_rate": 3.596514745308311e-05, + "loss": 0.3074, + "step": 1097 + }, + { + "epoch": 2.9, + "learning_rate": 3.595174262734585e-05, + "loss": 0.0159, + "step": 1098 + }, + { + "epoch": 2.91, + "learning_rate": 3.593833780160858e-05, + "loss": 0.3829, + "step": 1099 + }, + { + "epoch": 2.91, + "learning_rate": 3.592493297587132e-05, + "loss": 0.3277, + "step": 1100 + }, + { + "epoch": 2.91, + "learning_rate": 3.591152815013405e-05, + "loss": 0.3785, + "step": 1101 + }, + { + "epoch": 2.92, + "learning_rate": 3.5898123324396784e-05, + "loss": 0.0162, + "step": 1102 + }, + { + "epoch": 2.92, + "learning_rate": 3.588471849865952e-05, + "loss": 0.0772, + "step": 1103 + }, + { + "epoch": 2.92, + "learning_rate": 3.5871313672922254e-05, + "loss": 0.0292, + "step": 1104 + }, + { + "epoch": 2.92, + "learning_rate": 3.585790884718499e-05, + "loss": 0.0748, + "step": 1105 + }, + { + "epoch": 2.93, + "learning_rate": 3.5844504021447724e-05, + "loss": 0.2276, + "step": 1106 + }, + { + "epoch": 2.93, + "learning_rate": 3.583109919571046e-05, + "loss": 0.3174, + "step": 1107 + }, + { + "epoch": 2.93, + "learning_rate": 3.5817694369973195e-05, + "loss": 0.134, + "step": 1108 + }, + { + "epoch": 2.93, + "learning_rate": 3.580428954423592e-05, + "loss": 0.3488, + "step": 1109 + }, + { + "epoch": 2.94, + "learning_rate": 3.5790884718498665e-05, + "loss": 0.1003, + "step": 1110 + }, + { + "epoch": 2.94, + "learning_rate": 3.577747989276139e-05, + "loss": 0.1972, + "step": 1111 + }, + { + "epoch": 2.94, + "learning_rate": 3.5764075067024135e-05, + "loss": 0.2151, + "step": 1112 + }, + { + "epoch": 2.94, + "learning_rate": 3.5750670241286863e-05, + "loss": 0.3725, + "step": 1113 + }, + { + "epoch": 2.95, + "learning_rate": 3.57372654155496e-05, + "loss": 0.0227, + "step": 1114 + }, + { + "epoch": 2.95, + "learning_rate": 3.5723860589812334e-05, + "loss": 0.2063, + "step": 1115 + }, + { + "epoch": 2.95, + "learning_rate": 3.571045576407507e-05, + "loss": 0.0316, + "step": 1116 + }, + { + "epoch": 2.96, + "learning_rate": 3.5697050938337804e-05, + "loss": 0.1545, + "step": 1117 + }, + { + "epoch": 2.96, + "learning_rate": 3.568364611260054e-05, + "loss": 0.4582, + "step": 1118 + }, + { + "epoch": 2.96, + "learning_rate": 3.567024128686327e-05, + "loss": 0.5452, + "step": 1119 + }, + { + "epoch": 2.96, + "learning_rate": 3.565683646112601e-05, + "loss": 0.2009, + "step": 1120 + }, + { + "epoch": 2.97, + "learning_rate": 3.564343163538874e-05, + "loss": 0.1201, + "step": 1121 + }, + { + "epoch": 2.97, + "learning_rate": 3.563002680965148e-05, + "loss": 0.5343, + "step": 1122 + }, + { + "epoch": 2.97, + "learning_rate": 3.561662198391421e-05, + "loss": 0.3925, + "step": 1123 + }, + { + "epoch": 2.97, + "learning_rate": 3.560321715817694e-05, + "loss": 0.0375, + "step": 1124 + }, + { + "epoch": 2.98, + "learning_rate": 3.558981233243968e-05, + "loss": 0.0411, + "step": 1125 + }, + { + "epoch": 2.98, + "learning_rate": 3.5576407506702413e-05, + "loss": 0.0338, + "step": 1126 + }, + { + "epoch": 2.98, + "learning_rate": 3.556300268096515e-05, + "loss": 0.035, + "step": 1127 + }, + { + "epoch": 2.98, + "learning_rate": 3.5549597855227884e-05, + "loss": 0.0283, + "step": 1128 + }, + { + "epoch": 2.99, + "learning_rate": 3.553619302949062e-05, + "loss": 0.034, + "step": 1129 + }, + { + "epoch": 2.99, + "learning_rate": 3.5522788203753354e-05, + "loss": 0.518, + "step": 1130 + }, + { + "epoch": 2.99, + "learning_rate": 3.550938337801609e-05, + "loss": 0.0241, + "step": 1131 + }, + { + "epoch": 2.99, + "learning_rate": 3.5495978552278824e-05, + "loss": 0.3143, + "step": 1132 + }, + { + "epoch": 3.0, + "learning_rate": 3.548257372654156e-05, + "loss": 0.8011, + "step": 1133 + }, + { + "epoch": 3.0, + "learning_rate": 3.546916890080429e-05, + "loss": 0.4365, + "step": 1134 + }, + { + "epoch": 3.0, + "eval_f1": 0.7867219917012448, + "eval_loss": 0.613310694694519, + "eval_runtime": 1.9007, + "eval_samples_per_second": 796.041, + "eval_steps_per_second": 49.983, + "step": 1134 + }, + { + "epoch": 3.0, + "learning_rate": 3.545576407506703e-05, + "loss": 0.0111, + "step": 1135 + }, + { + "epoch": 3.01, + "learning_rate": 3.544235924932976e-05, + "loss": 0.0166, + "step": 1136 + }, + { + "epoch": 3.01, + "learning_rate": 3.54289544235925e-05, + "loss": 0.2159, + "step": 1137 + }, + { + "epoch": 3.01, + "learning_rate": 3.541554959785523e-05, + "loss": 0.0096, + "step": 1138 + }, + { + "epoch": 3.01, + "learning_rate": 3.540214477211796e-05, + "loss": 0.1352, + "step": 1139 + }, + { + "epoch": 3.02, + "learning_rate": 3.53887399463807e-05, + "loss": 0.0195, + "step": 1140 + }, + { + "epoch": 3.02, + "learning_rate": 3.5375335120643434e-05, + "loss": 0.1579, + "step": 1141 + }, + { + "epoch": 3.02, + "learning_rate": 3.536193029490617e-05, + "loss": 0.0078, + "step": 1142 + }, + { + "epoch": 3.02, + "learning_rate": 3.5348525469168904e-05, + "loss": 0.0111, + "step": 1143 + }, + { + "epoch": 3.03, + "learning_rate": 3.533512064343163e-05, + "loss": 0.2457, + "step": 1144 + }, + { + "epoch": 3.03, + "learning_rate": 3.5321715817694374e-05, + "loss": 0.014, + "step": 1145 + }, + { + "epoch": 3.03, + "learning_rate": 3.53083109919571e-05, + "loss": 0.2021, + "step": 1146 + }, + { + "epoch": 3.03, + "learning_rate": 3.5294906166219844e-05, + "loss": 0.5334, + "step": 1147 + }, + { + "epoch": 3.04, + "learning_rate": 3.528150134048257e-05, + "loss": 0.0116, + "step": 1148 + }, + { + "epoch": 3.04, + "learning_rate": 3.526809651474531e-05, + "loss": 0.0099, + "step": 1149 + }, + { + "epoch": 3.04, + "learning_rate": 3.525469168900804e-05, + "loss": 0.2102, + "step": 1150 + }, + { + "epoch": 3.04, + "learning_rate": 3.524128686327078e-05, + "loss": 0.0093, + "step": 1151 + }, + { + "epoch": 3.05, + "learning_rate": 3.522788203753351e-05, + "loss": 0.0112, + "step": 1152 + }, + { + "epoch": 3.05, + "learning_rate": 3.521447721179625e-05, + "loss": 0.1761, + "step": 1153 + }, + { + "epoch": 3.05, + "learning_rate": 3.5201072386058984e-05, + "loss": 0.1608, + "step": 1154 + }, + { + "epoch": 3.06, + "learning_rate": 3.518766756032172e-05, + "loss": 0.2883, + "step": 1155 + }, + { + "epoch": 3.06, + "learning_rate": 3.5174262734584454e-05, + "loss": 0.0304, + "step": 1156 + }, + { + "epoch": 3.06, + "learning_rate": 3.516085790884719e-05, + "loss": 0.0623, + "step": 1157 + }, + { + "epoch": 3.06, + "learning_rate": 3.5147453083109924e-05, + "loss": 0.1824, + "step": 1158 + }, + { + "epoch": 3.07, + "learning_rate": 3.513404825737265e-05, + "loss": 0.2527, + "step": 1159 + }, + { + "epoch": 3.07, + "learning_rate": 3.5120643431635394e-05, + "loss": 0.0877, + "step": 1160 + }, + { + "epoch": 3.07, + "learning_rate": 3.510723860589812e-05, + "loss": 0.2735, + "step": 1161 + }, + { + "epoch": 3.07, + "learning_rate": 3.5093833780160865e-05, + "loss": 0.1126, + "step": 1162 + }, + { + "epoch": 3.08, + "learning_rate": 3.508042895442359e-05, + "loss": 0.2498, + "step": 1163 + }, + { + "epoch": 3.08, + "learning_rate": 3.506702412868633e-05, + "loss": 0.022, + "step": 1164 + }, + { + "epoch": 3.08, + "learning_rate": 3.505361930294906e-05, + "loss": 0.2768, + "step": 1165 + }, + { + "epoch": 3.08, + "learning_rate": 3.50402144772118e-05, + "loss": 0.0429, + "step": 1166 + }, + { + "epoch": 3.09, + "learning_rate": 3.5026809651474533e-05, + "loss": 0.0198, + "step": 1167 + }, + { + "epoch": 3.09, + "learning_rate": 3.501340482573727e-05, + "loss": 0.0097, + "step": 1168 + }, + { + "epoch": 3.09, + "learning_rate": 3.5e-05, + "loss": 0.0276, + "step": 1169 + }, + { + "epoch": 3.1, + "learning_rate": 3.498659517426274e-05, + "loss": 0.2276, + "step": 1170 + }, + { + "epoch": 3.1, + "learning_rate": 3.497319034852547e-05, + "loss": 0.0461, + "step": 1171 + }, + { + "epoch": 3.1, + "learning_rate": 3.495978552278821e-05, + "loss": 0.0103, + "step": 1172 + }, + { + "epoch": 3.1, + "learning_rate": 3.494638069705094e-05, + "loss": 0.1455, + "step": 1173 + }, + { + "epoch": 3.11, + "learning_rate": 3.493297587131367e-05, + "loss": 0.0865, + "step": 1174 + }, + { + "epoch": 3.11, + "learning_rate": 3.491957104557641e-05, + "loss": 0.3226, + "step": 1175 + }, + { + "epoch": 3.11, + "learning_rate": 3.490616621983914e-05, + "loss": 0.1744, + "step": 1176 + }, + { + "epoch": 3.11, + "learning_rate": 3.489276139410188e-05, + "loss": 0.0148, + "step": 1177 + }, + { + "epoch": 3.12, + "learning_rate": 3.487935656836461e-05, + "loss": 0.2582, + "step": 1178 + }, + { + "epoch": 3.12, + "learning_rate": 3.486595174262735e-05, + "loss": 0.2782, + "step": 1179 + }, + { + "epoch": 3.12, + "learning_rate": 3.485254691689008e-05, + "loss": 0.143, + "step": 1180 + }, + { + "epoch": 3.12, + "learning_rate": 3.483914209115282e-05, + "loss": 0.0853, + "step": 1181 + }, + { + "epoch": 3.13, + "learning_rate": 3.4825737265415554e-05, + "loss": 0.1361, + "step": 1182 + }, + { + "epoch": 3.13, + "learning_rate": 3.481233243967829e-05, + "loss": 0.0883, + "step": 1183 + }, + { + "epoch": 3.13, + "learning_rate": 3.479892761394102e-05, + "loss": 0.0116, + "step": 1184 + }, + { + "epoch": 3.13, + "learning_rate": 3.478552278820376e-05, + "loss": 0.0531, + "step": 1185 + }, + { + "epoch": 3.14, + "learning_rate": 3.477211796246649e-05, + "loss": 0.0184, + "step": 1186 + }, + { + "epoch": 3.14, + "learning_rate": 3.475871313672923e-05, + "loss": 0.1601, + "step": 1187 + }, + { + "epoch": 3.14, + "learning_rate": 3.474530831099196e-05, + "loss": 0.007, + "step": 1188 + }, + { + "epoch": 3.15, + "learning_rate": 3.473190348525469e-05, + "loss": 0.0101, + "step": 1189 + }, + { + "epoch": 3.15, + "learning_rate": 3.471849865951743e-05, + "loss": 0.2385, + "step": 1190 + }, + { + "epoch": 3.15, + "learning_rate": 3.470509383378016e-05, + "loss": 0.0075, + "step": 1191 + }, + { + "epoch": 3.15, + "learning_rate": 3.46916890080429e-05, + "loss": 0.0919, + "step": 1192 + }, + { + "epoch": 3.16, + "learning_rate": 3.467828418230563e-05, + "loss": 0.0162, + "step": 1193 + }, + { + "epoch": 3.16, + "learning_rate": 3.466487935656836e-05, + "loss": 0.2239, + "step": 1194 + }, + { + "epoch": 3.16, + "learning_rate": 3.4651474530831104e-05, + "loss": 0.5757, + "step": 1195 + }, + { + "epoch": 3.16, + "learning_rate": 3.463806970509383e-05, + "loss": 0.0774, + "step": 1196 + }, + { + "epoch": 3.17, + "learning_rate": 3.4624664879356574e-05, + "loss": 0.2124, + "step": 1197 + }, + { + "epoch": 3.17, + "learning_rate": 3.46112600536193e-05, + "loss": 0.0107, + "step": 1198 + }, + { + "epoch": 3.17, + "learning_rate": 3.459785522788204e-05, + "loss": 0.3179, + "step": 1199 + }, + { + "epoch": 3.17, + "learning_rate": 3.458445040214477e-05, + "loss": 0.0138, + "step": 1200 + }, + { + "epoch": 3.18, + "learning_rate": 3.457104557640751e-05, + "loss": 0.0094, + "step": 1201 + }, + { + "epoch": 3.18, + "learning_rate": 3.455764075067024e-05, + "loss": 0.0039, + "step": 1202 + }, + { + "epoch": 3.18, + "learning_rate": 3.454423592493298e-05, + "loss": 0.0745, + "step": 1203 + }, + { + "epoch": 3.19, + "learning_rate": 3.453083109919571e-05, + "loss": 0.0387, + "step": 1204 + }, + { + "epoch": 3.19, + "learning_rate": 3.451742627345845e-05, + "loss": 0.1378, + "step": 1205 + }, + { + "epoch": 3.19, + "learning_rate": 3.450402144772118e-05, + "loss": 0.1299, + "step": 1206 + }, + { + "epoch": 3.19, + "learning_rate": 3.449061662198392e-05, + "loss": 0.2821, + "step": 1207 + }, + { + "epoch": 3.2, + "learning_rate": 3.4477211796246653e-05, + "loss": 0.2236, + "step": 1208 + }, + { + "epoch": 3.2, + "learning_rate": 3.446380697050938e-05, + "loss": 0.1436, + "step": 1209 + }, + { + "epoch": 3.2, + "learning_rate": 3.4450402144772124e-05, + "loss": 0.1504, + "step": 1210 + }, + { + "epoch": 3.2, + "learning_rate": 3.443699731903485e-05, + "loss": 0.0415, + "step": 1211 + }, + { + "epoch": 3.21, + "learning_rate": 3.4423592493297594e-05, + "loss": 0.023, + "step": 1212 + }, + { + "epoch": 3.21, + "learning_rate": 3.441018766756032e-05, + "loss": 0.2128, + "step": 1213 + }, + { + "epoch": 3.21, + "learning_rate": 3.439678284182306e-05, + "loss": 0.0066, + "step": 1214 + }, + { + "epoch": 3.21, + "learning_rate": 3.438337801608579e-05, + "loss": 0.4345, + "step": 1215 + }, + { + "epoch": 3.22, + "learning_rate": 3.436997319034853e-05, + "loss": 0.0214, + "step": 1216 + }, + { + "epoch": 3.22, + "learning_rate": 3.435656836461126e-05, + "loss": 0.2094, + "step": 1217 + }, + { + "epoch": 3.22, + "learning_rate": 3.4343163538874e-05, + "loss": 0.0822, + "step": 1218 + }, + { + "epoch": 3.22, + "learning_rate": 3.4329758713136726e-05, + "loss": 0.1153, + "step": 1219 + }, + { + "epoch": 3.23, + "learning_rate": 3.431635388739947e-05, + "loss": 0.0059, + "step": 1220 + }, + { + "epoch": 3.23, + "learning_rate": 3.43029490616622e-05, + "loss": 0.0069, + "step": 1221 + }, + { + "epoch": 3.23, + "learning_rate": 3.428954423592494e-05, + "loss": 0.044, + "step": 1222 + }, + { + "epoch": 3.24, + "learning_rate": 3.427613941018767e-05, + "loss": 0.1975, + "step": 1223 + }, + { + "epoch": 3.24, + "learning_rate": 3.42627345844504e-05, + "loss": 0.3294, + "step": 1224 + }, + { + "epoch": 3.24, + "learning_rate": 3.424932975871314e-05, + "loss": 0.026, + "step": 1225 + }, + { + "epoch": 3.24, + "learning_rate": 3.423592493297587e-05, + "loss": 0.2666, + "step": 1226 + }, + { + "epoch": 3.25, + "learning_rate": 3.422252010723861e-05, + "loss": 0.0628, + "step": 1227 + }, + { + "epoch": 3.25, + "learning_rate": 3.420911528150134e-05, + "loss": 0.0068, + "step": 1228 + }, + { + "epoch": 3.25, + "learning_rate": 3.419571045576407e-05, + "loss": 0.0144, + "step": 1229 + }, + { + "epoch": 3.25, + "learning_rate": 3.418230563002681e-05, + "loss": 0.0029, + "step": 1230 + }, + { + "epoch": 3.26, + "learning_rate": 3.416890080428954e-05, + "loss": 0.606, + "step": 1231 + }, + { + "epoch": 3.26, + "learning_rate": 3.415549597855228e-05, + "loss": 0.2162, + "step": 1232 + }, + { + "epoch": 3.26, + "learning_rate": 3.414209115281501e-05, + "loss": 0.146, + "step": 1233 + }, + { + "epoch": 3.26, + "learning_rate": 3.412868632707775e-05, + "loss": 0.3649, + "step": 1234 + }, + { + "epoch": 3.27, + "learning_rate": 3.411528150134048e-05, + "loss": 0.0062, + "step": 1235 + }, + { + "epoch": 3.27, + "learning_rate": 3.410187667560322e-05, + "loss": 0.4097, + "step": 1236 + }, + { + "epoch": 3.27, + "learning_rate": 3.408847184986595e-05, + "loss": 0.5354, + "step": 1237 + }, + { + "epoch": 3.28, + "learning_rate": 3.407506702412869e-05, + "loss": 0.6222, + "step": 1238 + }, + { + "epoch": 3.28, + "learning_rate": 3.406166219839142e-05, + "loss": 0.0023, + "step": 1239 + }, + { + "epoch": 3.28, + "learning_rate": 3.404825737265416e-05, + "loss": 0.0247, + "step": 1240 + }, + { + "epoch": 3.28, + "learning_rate": 3.403485254691689e-05, + "loss": 0.0051, + "step": 1241 + }, + { + "epoch": 3.29, + "learning_rate": 3.402144772117963e-05, + "loss": 0.2504, + "step": 1242 + }, + { + "epoch": 3.29, + "learning_rate": 3.400804289544236e-05, + "loss": 0.0195, + "step": 1243 + }, + { + "epoch": 3.29, + "learning_rate": 3.39946380697051e-05, + "loss": 0.3706, + "step": 1244 + }, + { + "epoch": 3.29, + "learning_rate": 3.398123324396783e-05, + "loss": 0.0174, + "step": 1245 + }, + { + "epoch": 3.3, + "learning_rate": 3.396782841823056e-05, + "loss": 0.0068, + "step": 1246 + }, + { + "epoch": 3.3, + "learning_rate": 3.39544235924933e-05, + "loss": 0.3938, + "step": 1247 + }, + { + "epoch": 3.3, + "learning_rate": 3.394101876675603e-05, + "loss": 0.0114, + "step": 1248 + }, + { + "epoch": 3.3, + "learning_rate": 3.3927613941018774e-05, + "loss": 0.0088, + "step": 1249 + }, + { + "epoch": 3.31, + "learning_rate": 3.39142091152815e-05, + "loss": 0.0126, + "step": 1250 + }, + { + "epoch": 3.31, + "learning_rate": 3.390080428954424e-05, + "loss": 0.0091, + "step": 1251 + }, + { + "epoch": 3.31, + "learning_rate": 3.388739946380697e-05, + "loss": 0.0232, + "step": 1252 + }, + { + "epoch": 3.31, + "learning_rate": 3.387399463806971e-05, + "loss": 0.3704, + "step": 1253 + }, + { + "epoch": 3.32, + "learning_rate": 3.386058981233244e-05, + "loss": 0.0112, + "step": 1254 + }, + { + "epoch": 3.32, + "learning_rate": 3.384718498659518e-05, + "loss": 0.1709, + "step": 1255 + }, + { + "epoch": 3.32, + "learning_rate": 3.3833780160857906e-05, + "loss": 0.0109, + "step": 1256 + }, + { + "epoch": 3.33, + "learning_rate": 3.382037533512065e-05, + "loss": 0.2874, + "step": 1257 + }, + { + "epoch": 3.33, + "learning_rate": 3.3806970509383376e-05, + "loss": 0.024, + "step": 1258 + }, + { + "epoch": 3.33, + "learning_rate": 3.379356568364612e-05, + "loss": 0.0131, + "step": 1259 + }, + { + "epoch": 3.33, + "learning_rate": 3.3780160857908846e-05, + "loss": 0.2076, + "step": 1260 + }, + { + "epoch": 3.34, + "learning_rate": 3.376675603217158e-05, + "loss": 0.0083, + "step": 1261 + }, + { + "epoch": 3.34, + "learning_rate": 3.375335120643432e-05, + "loss": 0.0234, + "step": 1262 + }, + { + "epoch": 3.34, + "learning_rate": 3.373994638069705e-05, + "loss": 0.0066, + "step": 1263 + }, + { + "epoch": 3.34, + "learning_rate": 3.372654155495979e-05, + "loss": 0.3983, + "step": 1264 + }, + { + "epoch": 3.35, + "learning_rate": 3.371313672922252e-05, + "loss": 0.0648, + "step": 1265 + }, + { + "epoch": 3.35, + "learning_rate": 3.369973190348526e-05, + "loss": 0.006, + "step": 1266 + }, + { + "epoch": 3.35, + "learning_rate": 3.368632707774799e-05, + "loss": 0.0807, + "step": 1267 + }, + { + "epoch": 3.35, + "learning_rate": 3.367292225201073e-05, + "loss": 0.0975, + "step": 1268 + }, + { + "epoch": 3.36, + "learning_rate": 3.365951742627346e-05, + "loss": 0.2934, + "step": 1269 + }, + { + "epoch": 3.36, + "learning_rate": 3.36461126005362e-05, + "loss": 0.0869, + "step": 1270 + }, + { + "epoch": 3.36, + "learning_rate": 3.3632707774798926e-05, + "loss": 0.1374, + "step": 1271 + }, + { + "epoch": 3.37, + "learning_rate": 3.361930294906167e-05, + "loss": 0.3314, + "step": 1272 + }, + { + "epoch": 3.37, + "learning_rate": 3.3605898123324396e-05, + "loss": 0.0045, + "step": 1273 + }, + { + "epoch": 3.37, + "learning_rate": 3.359249329758714e-05, + "loss": 0.0536, + "step": 1274 + }, + { + "epoch": 3.37, + "learning_rate": 3.3579088471849867e-05, + "loss": 0.0564, + "step": 1275 + }, + { + "epoch": 3.38, + "learning_rate": 3.35656836461126e-05, + "loss": 0.0689, + "step": 1276 + }, + { + "epoch": 3.38, + "learning_rate": 3.355227882037534e-05, + "loss": 0.5177, + "step": 1277 + }, + { + "epoch": 3.38, + "learning_rate": 3.353887399463807e-05, + "loss": 0.0689, + "step": 1278 + }, + { + "epoch": 3.38, + "learning_rate": 3.352546916890081e-05, + "loss": 0.0664, + "step": 1279 + }, + { + "epoch": 3.39, + "learning_rate": 3.351206434316354e-05, + "loss": 0.0614, + "step": 1280 + }, + { + "epoch": 3.39, + "learning_rate": 3.349865951742627e-05, + "loss": 0.1994, + "step": 1281 + }, + { + "epoch": 3.39, + "learning_rate": 3.348525469168901e-05, + "loss": 0.4769, + "step": 1282 + }, + { + "epoch": 3.39, + "learning_rate": 3.347184986595174e-05, + "loss": 0.1851, + "step": 1283 + }, + { + "epoch": 3.4, + "learning_rate": 3.345844504021448e-05, + "loss": 0.0092, + "step": 1284 + }, + { + "epoch": 3.4, + "learning_rate": 3.344504021447721e-05, + "loss": 0.0052, + "step": 1285 + }, + { + "epoch": 3.4, + "learning_rate": 3.3431635388739946e-05, + "loss": 0.0095, + "step": 1286 + }, + { + "epoch": 3.4, + "learning_rate": 3.341823056300268e-05, + "loss": 0.0242, + "step": 1287 + }, + { + "epoch": 3.41, + "learning_rate": 3.3404825737265416e-05, + "loss": 0.0565, + "step": 1288 + }, + { + "epoch": 3.41, + "learning_rate": 3.339142091152815e-05, + "loss": 0.2645, + "step": 1289 + }, + { + "epoch": 3.41, + "learning_rate": 3.337801608579089e-05, + "loss": 0.0049, + "step": 1290 + }, + { + "epoch": 3.42, + "learning_rate": 3.336461126005362e-05, + "loss": 0.0929, + "step": 1291 + }, + { + "epoch": 3.42, + "learning_rate": 3.335120643431636e-05, + "loss": 0.3968, + "step": 1292 + }, + { + "epoch": 3.42, + "learning_rate": 3.333780160857909e-05, + "loss": 0.033, + "step": 1293 + }, + { + "epoch": 3.42, + "learning_rate": 3.332439678284183e-05, + "loss": 0.007, + "step": 1294 + }, + { + "epoch": 3.43, + "learning_rate": 3.331099195710456e-05, + "loss": 0.2552, + "step": 1295 + }, + { + "epoch": 3.43, + "learning_rate": 3.329758713136729e-05, + "loss": 0.004, + "step": 1296 + }, + { + "epoch": 3.43, + "learning_rate": 3.328418230563003e-05, + "loss": 0.136, + "step": 1297 + }, + { + "epoch": 3.43, + "learning_rate": 3.327077747989276e-05, + "loss": 0.1407, + "step": 1298 + }, + { + "epoch": 3.44, + "learning_rate": 3.32573726541555e-05, + "loss": 0.0354, + "step": 1299 + }, + { + "epoch": 3.44, + "learning_rate": 3.324396782841823e-05, + "loss": 0.6141, + "step": 1300 + }, + { + "epoch": 3.44, + "learning_rate": 3.3230563002680966e-05, + "loss": 0.2544, + "step": 1301 + }, + { + "epoch": 3.44, + "learning_rate": 3.32171581769437e-05, + "loss": 0.0046, + "step": 1302 + }, + { + "epoch": 3.45, + "learning_rate": 3.320375335120644e-05, + "loss": 0.0126, + "step": 1303 + }, + { + "epoch": 3.45, + "learning_rate": 3.319034852546917e-05, + "loss": 0.3506, + "step": 1304 + }, + { + "epoch": 3.45, + "learning_rate": 3.317694369973191e-05, + "loss": 0.3512, + "step": 1305 + }, + { + "epoch": 3.46, + "learning_rate": 3.3163538873994635e-05, + "loss": 0.3675, + "step": 1306 + }, + { + "epoch": 3.46, + "learning_rate": 3.315013404825738e-05, + "loss": 0.1676, + "step": 1307 + }, + { + "epoch": 3.46, + "learning_rate": 3.3136729222520106e-05, + "loss": 0.0307, + "step": 1308 + }, + { + "epoch": 3.46, + "learning_rate": 3.312332439678285e-05, + "loss": 0.0084, + "step": 1309 + }, + { + "epoch": 3.47, + "learning_rate": 3.3109919571045576e-05, + "loss": 0.1977, + "step": 1310 + }, + { + "epoch": 3.47, + "learning_rate": 3.309651474530831e-05, + "loss": 0.1645, + "step": 1311 + }, + { + "epoch": 3.47, + "learning_rate": 3.3083109919571046e-05, + "loss": 0.2579, + "step": 1312 + }, + { + "epoch": 3.47, + "learning_rate": 3.306970509383378e-05, + "loss": 0.1656, + "step": 1313 + }, + { + "epoch": 3.48, + "learning_rate": 3.3056300268096516e-05, + "loss": 0.0168, + "step": 1314 + }, + { + "epoch": 3.48, + "learning_rate": 3.304289544235925e-05, + "loss": 0.0291, + "step": 1315 + }, + { + "epoch": 3.48, + "learning_rate": 3.302949061662198e-05, + "loss": 0.0146, + "step": 1316 + }, + { + "epoch": 3.48, + "learning_rate": 3.301608579088472e-05, + "loss": 0.0037, + "step": 1317 + }, + { + "epoch": 3.49, + "learning_rate": 3.300268096514745e-05, + "loss": 0.0113, + "step": 1318 + }, + { + "epoch": 3.49, + "learning_rate": 3.298927613941019e-05, + "loss": 0.0734, + "step": 1319 + }, + { + "epoch": 3.49, + "learning_rate": 3.297587131367292e-05, + "loss": 0.0292, + "step": 1320 + }, + { + "epoch": 3.49, + "learning_rate": 3.2962466487935655e-05, + "loss": 0.3875, + "step": 1321 + }, + { + "epoch": 3.5, + "learning_rate": 3.294906166219839e-05, + "loss": 0.0138, + "step": 1322 + }, + { + "epoch": 3.5, + "learning_rate": 3.2935656836461126e-05, + "loss": 0.4653, + "step": 1323 + }, + { + "epoch": 3.5, + "learning_rate": 3.292225201072386e-05, + "loss": 0.1864, + "step": 1324 + }, + { + "epoch": 3.51, + "learning_rate": 3.2908847184986596e-05, + "loss": 0.0116, + "step": 1325 + }, + { + "epoch": 3.51, + "learning_rate": 3.289544235924933e-05, + "loss": 0.014, + "step": 1326 + }, + { + "epoch": 3.51, + "learning_rate": 3.2882037533512066e-05, + "loss": 0.3344, + "step": 1327 + }, + { + "epoch": 3.51, + "learning_rate": 3.28686327077748e-05, + "loss": 0.1544, + "step": 1328 + }, + { + "epoch": 3.52, + "learning_rate": 3.2855227882037537e-05, + "loss": 0.0065, + "step": 1329 + }, + { + "epoch": 3.52, + "learning_rate": 3.284182305630027e-05, + "loss": 0.0041, + "step": 1330 + }, + { + "epoch": 3.52, + "learning_rate": 3.2828418230563e-05, + "loss": 0.0044, + "step": 1331 + }, + { + "epoch": 3.52, + "learning_rate": 3.281501340482574e-05, + "loss": 0.1808, + "step": 1332 + }, + { + "epoch": 3.53, + "learning_rate": 3.280160857908847e-05, + "loss": 0.0521, + "step": 1333 + }, + { + "epoch": 3.53, + "learning_rate": 3.278820375335121e-05, + "loss": 0.3505, + "step": 1334 + }, + { + "epoch": 3.53, + "learning_rate": 3.277479892761394e-05, + "loss": 0.2032, + "step": 1335 + }, + { + "epoch": 3.53, + "learning_rate": 3.2761394101876676e-05, + "loss": 0.004, + "step": 1336 + }, + { + "epoch": 3.54, + "learning_rate": 3.274798927613941e-05, + "loss": 0.0343, + "step": 1337 + }, + { + "epoch": 3.54, + "learning_rate": 3.2734584450402146e-05, + "loss": 0.278, + "step": 1338 + }, + { + "epoch": 3.54, + "learning_rate": 3.272117962466488e-05, + "loss": 0.0056, + "step": 1339 + }, + { + "epoch": 3.54, + "learning_rate": 3.2707774798927616e-05, + "loss": 0.1673, + "step": 1340 + }, + { + "epoch": 3.55, + "learning_rate": 3.2694369973190345e-05, + "loss": 0.0092, + "step": 1341 + }, + { + "epoch": 3.55, + "learning_rate": 3.2680965147453086e-05, + "loss": 0.0058, + "step": 1342 + }, + { + "epoch": 3.55, + "learning_rate": 3.2667560321715815e-05, + "loss": 0.097, + "step": 1343 + }, + { + "epoch": 3.56, + "learning_rate": 3.265415549597856e-05, + "loss": 0.2138, + "step": 1344 + }, + { + "epoch": 3.56, + "learning_rate": 3.2640750670241285e-05, + "loss": 0.0077, + "step": 1345 + }, + { + "epoch": 3.56, + "learning_rate": 3.262734584450402e-05, + "loss": 0.2294, + "step": 1346 + }, + { + "epoch": 3.56, + "learning_rate": 3.2613941018766755e-05, + "loss": 0.3282, + "step": 1347 + }, + { + "epoch": 3.57, + "learning_rate": 3.260053619302949e-05, + "loss": 0.233, + "step": 1348 + }, + { + "epoch": 3.57, + "learning_rate": 3.2587131367292226e-05, + "loss": 0.0379, + "step": 1349 + }, + { + "epoch": 3.57, + "learning_rate": 3.257372654155496e-05, + "loss": 0.2168, + "step": 1350 + }, + { + "epoch": 3.57, + "learning_rate": 3.2560321715817696e-05, + "loss": 0.0443, + "step": 1351 + }, + { + "epoch": 3.58, + "learning_rate": 3.254691689008043e-05, + "loss": 0.2665, + "step": 1352 + }, + { + "epoch": 3.58, + "learning_rate": 3.2533512064343166e-05, + "loss": 0.0136, + "step": 1353 + }, + { + "epoch": 3.58, + "learning_rate": 3.25201072386059e-05, + "loss": 0.0035, + "step": 1354 + }, + { + "epoch": 3.58, + "learning_rate": 3.2506702412868636e-05, + "loss": 0.2153, + "step": 1355 + }, + { + "epoch": 3.59, + "learning_rate": 3.249329758713137e-05, + "loss": 0.088, + "step": 1356 + }, + { + "epoch": 3.59, + "learning_rate": 3.247989276139411e-05, + "loss": 0.0074, + "step": 1357 + }, + { + "epoch": 3.59, + "learning_rate": 3.2466487935656835e-05, + "loss": 0.0924, + "step": 1358 + }, + { + "epoch": 3.6, + "learning_rate": 3.245308310991958e-05, + "loss": 0.0171, + "step": 1359 + }, + { + "epoch": 3.6, + "learning_rate": 3.2439678284182305e-05, + "loss": 0.0132, + "step": 1360 + }, + { + "epoch": 3.6, + "learning_rate": 3.242627345844505e-05, + "loss": 0.0583, + "step": 1361 + }, + { + "epoch": 3.6, + "learning_rate": 3.2412868632707776e-05, + "loss": 0.0038, + "step": 1362 + }, + { + "epoch": 3.61, + "learning_rate": 3.239946380697051e-05, + "loss": 0.0846, + "step": 1363 + }, + { + "epoch": 3.61, + "learning_rate": 3.2386058981233246e-05, + "loss": 0.0058, + "step": 1364 + }, + { + "epoch": 3.61, + "learning_rate": 3.237265415549598e-05, + "loss": 0.4456, + "step": 1365 + }, + { + "epoch": 3.61, + "learning_rate": 3.2359249329758716e-05, + "loss": 0.0029, + "step": 1366 + }, + { + "epoch": 3.62, + "learning_rate": 3.234584450402145e-05, + "loss": 0.2553, + "step": 1367 + }, + { + "epoch": 3.62, + "learning_rate": 3.233243967828418e-05, + "loss": 0.0936, + "step": 1368 + }, + { + "epoch": 3.62, + "learning_rate": 3.231903485254692e-05, + "loss": 0.1017, + "step": 1369 + }, + { + "epoch": 3.62, + "learning_rate": 3.230563002680965e-05, + "loss": 0.0379, + "step": 1370 + }, + { + "epoch": 3.63, + "learning_rate": 3.229222520107239e-05, + "loss": 0.0069, + "step": 1371 + }, + { + "epoch": 3.63, + "learning_rate": 3.227882037533512e-05, + "loss": 0.3235, + "step": 1372 + }, + { + "epoch": 3.63, + "learning_rate": 3.2265415549597855e-05, + "loss": 0.3796, + "step": 1373 + }, + { + "epoch": 3.63, + "learning_rate": 3.225201072386059e-05, + "loss": 0.3246, + "step": 1374 + }, + { + "epoch": 3.64, + "learning_rate": 3.2238605898123325e-05, + "loss": 0.0059, + "step": 1375 + }, + { + "epoch": 3.64, + "learning_rate": 3.222520107238606e-05, + "loss": 0.0405, + "step": 1376 + }, + { + "epoch": 3.64, + "learning_rate": 3.2211796246648796e-05, + "loss": 0.0142, + "step": 1377 + }, + { + "epoch": 3.65, + "learning_rate": 3.219839142091153e-05, + "loss": 0.4426, + "step": 1378 + }, + { + "epoch": 3.65, + "learning_rate": 3.2184986595174266e-05, + "loss": 0.0249, + "step": 1379 + }, + { + "epoch": 3.65, + "learning_rate": 3.2171581769437e-05, + "loss": 0.1053, + "step": 1380 + }, + { + "epoch": 3.65, + "learning_rate": 3.2158176943699736e-05, + "loss": 0.0179, + "step": 1381 + }, + { + "epoch": 3.66, + "learning_rate": 3.214477211796247e-05, + "loss": 0.0718, + "step": 1382 + }, + { + "epoch": 3.66, + "learning_rate": 3.21313672922252e-05, + "loss": 0.1431, + "step": 1383 + }, + { + "epoch": 3.66, + "learning_rate": 3.211796246648794e-05, + "loss": 0.2391, + "step": 1384 + }, + { + "epoch": 3.66, + "learning_rate": 3.210455764075067e-05, + "loss": 0.0053, + "step": 1385 + }, + { + "epoch": 3.67, + "learning_rate": 3.209115281501341e-05, + "loss": 0.2935, + "step": 1386 + }, + { + "epoch": 3.67, + "learning_rate": 3.207774798927614e-05, + "loss": 0.0071, + "step": 1387 + }, + { + "epoch": 3.67, + "learning_rate": 3.2064343163538875e-05, + "loss": 0.031, + "step": 1388 + }, + { + "epoch": 3.67, + "learning_rate": 3.205093833780161e-05, + "loss": 0.1989, + "step": 1389 + }, + { + "epoch": 3.68, + "learning_rate": 3.2037533512064346e-05, + "loss": 0.0533, + "step": 1390 + }, + { + "epoch": 3.68, + "learning_rate": 3.202412868632708e-05, + "loss": 0.2408, + "step": 1391 + }, + { + "epoch": 3.68, + "learning_rate": 3.2010723860589816e-05, + "loss": 0.3158, + "step": 1392 + }, + { + "epoch": 3.69, + "learning_rate": 3.1997319034852544e-05, + "loss": 0.3629, + "step": 1393 + }, + { + "epoch": 3.69, + "learning_rate": 3.1983914209115286e-05, + "loss": 0.0122, + "step": 1394 + }, + { + "epoch": 3.69, + "learning_rate": 3.1970509383378014e-05, + "loss": 0.0449, + "step": 1395 + }, + { + "epoch": 3.69, + "learning_rate": 3.1957104557640756e-05, + "loss": 0.1273, + "step": 1396 + }, + { + "epoch": 3.7, + "learning_rate": 3.1943699731903485e-05, + "loss": 0.3401, + "step": 1397 + }, + { + "epoch": 3.7, + "learning_rate": 3.193029490616622e-05, + "loss": 0.0183, + "step": 1398 + }, + { + "epoch": 3.7, + "learning_rate": 3.1916890080428955e-05, + "loss": 0.0526, + "step": 1399 + }, + { + "epoch": 3.7, + "learning_rate": 3.190348525469169e-05, + "loss": 0.5037, + "step": 1400 + }, + { + "epoch": 3.71, + "learning_rate": 3.1890080428954425e-05, + "loss": 0.0059, + "step": 1401 + }, + { + "epoch": 3.71, + "learning_rate": 3.187667560321716e-05, + "loss": 0.0266, + "step": 1402 + }, + { + "epoch": 3.71, + "learning_rate": 3.1863270777479896e-05, + "loss": 0.4095, + "step": 1403 + }, + { + "epoch": 3.71, + "learning_rate": 3.184986595174263e-05, + "loss": 0.1802, + "step": 1404 + }, + { + "epoch": 3.72, + "learning_rate": 3.1836461126005366e-05, + "loss": 0.3586, + "step": 1405 + }, + { + "epoch": 3.72, + "learning_rate": 3.18230563002681e-05, + "loss": 0.2058, + "step": 1406 + }, + { + "epoch": 3.72, + "learning_rate": 3.1809651474530836e-05, + "loss": 0.008, + "step": 1407 + }, + { + "epoch": 3.72, + "learning_rate": 3.1796246648793564e-05, + "loss": 0.0282, + "step": 1408 + }, + { + "epoch": 3.73, + "learning_rate": 3.1782841823056306e-05, + "loss": 0.0077, + "step": 1409 + }, + { + "epoch": 3.73, + "learning_rate": 3.1769436997319035e-05, + "loss": 0.3461, + "step": 1410 + }, + { + "epoch": 3.73, + "learning_rate": 3.1756032171581777e-05, + "loss": 0.0038, + "step": 1411 + }, + { + "epoch": 3.74, + "learning_rate": 3.1742627345844505e-05, + "loss": 0.0087, + "step": 1412 + }, + { + "epoch": 3.74, + "learning_rate": 3.172922252010724e-05, + "loss": 0.8254, + "step": 1413 + }, + { + "epoch": 3.74, + "learning_rate": 3.1715817694369975e-05, + "loss": 0.017, + "step": 1414 + }, + { + "epoch": 3.74, + "learning_rate": 3.170241286863271e-05, + "loss": 0.2954, + "step": 1415 + }, + { + "epoch": 3.75, + "learning_rate": 3.1689008042895445e-05, + "loss": 0.0286, + "step": 1416 + }, + { + "epoch": 3.75, + "learning_rate": 3.167560321715818e-05, + "loss": 0.0454, + "step": 1417 + }, + { + "epoch": 3.75, + "learning_rate": 3.166219839142091e-05, + "loss": 0.222, + "step": 1418 + }, + { + "epoch": 3.75, + "learning_rate": 3.164879356568365e-05, + "loss": 0.0225, + "step": 1419 + }, + { + "epoch": 3.76, + "learning_rate": 3.163538873994638e-05, + "loss": 0.2599, + "step": 1420 + }, + { + "epoch": 3.76, + "learning_rate": 3.162198391420912e-05, + "loss": 0.2343, + "step": 1421 + }, + { + "epoch": 3.76, + "learning_rate": 3.160857908847185e-05, + "loss": 0.0274, + "step": 1422 + }, + { + "epoch": 3.76, + "learning_rate": 3.1595174262734585e-05, + "loss": 0.0109, + "step": 1423 + }, + { + "epoch": 3.77, + "learning_rate": 3.158176943699732e-05, + "loss": 0.012, + "step": 1424 + }, + { + "epoch": 3.77, + "learning_rate": 3.1568364611260055e-05, + "loss": 0.0267, + "step": 1425 + }, + { + "epoch": 3.77, + "learning_rate": 3.155495978552279e-05, + "loss": 0.0116, + "step": 1426 + }, + { + "epoch": 3.78, + "learning_rate": 3.1541554959785525e-05, + "loss": 0.2563, + "step": 1427 + }, + { + "epoch": 3.78, + "learning_rate": 3.1528150134048253e-05, + "loss": 0.2149, + "step": 1428 + }, + { + "epoch": 3.78, + "learning_rate": 3.1514745308310995e-05, + "loss": 0.2099, + "step": 1429 + }, + { + "epoch": 3.78, + "learning_rate": 3.1501340482573724e-05, + "loss": 0.1445, + "step": 1430 + }, + { + "epoch": 3.79, + "learning_rate": 3.1487935656836466e-05, + "loss": 0.0069, + "step": 1431 + }, + { + "epoch": 3.79, + "learning_rate": 3.1474530831099194e-05, + "loss": 0.3583, + "step": 1432 + }, + { + "epoch": 3.79, + "learning_rate": 3.146112600536193e-05, + "loss": 0.1112, + "step": 1433 + }, + { + "epoch": 3.79, + "learning_rate": 3.1447721179624664e-05, + "loss": 0.5379, + "step": 1434 + }, + { + "epoch": 3.8, + "learning_rate": 3.14343163538874e-05, + "loss": 0.0248, + "step": 1435 + }, + { + "epoch": 3.8, + "learning_rate": 3.1420911528150135e-05, + "loss": 0.0255, + "step": 1436 + }, + { + "epoch": 3.8, + "learning_rate": 3.140750670241287e-05, + "loss": 0.3363, + "step": 1437 + }, + { + "epoch": 3.8, + "learning_rate": 3.1394101876675605e-05, + "loss": 0.2952, + "step": 1438 + }, + { + "epoch": 3.81, + "learning_rate": 3.138069705093834e-05, + "loss": 0.0337, + "step": 1439 + }, + { + "epoch": 3.81, + "learning_rate": 3.1367292225201075e-05, + "loss": 0.0157, + "step": 1440 + }, + { + "epoch": 3.81, + "learning_rate": 3.135388739946381e-05, + "loss": 0.0204, + "step": 1441 + }, + { + "epoch": 3.81, + "learning_rate": 3.1340482573726545e-05, + "loss": 0.7707, + "step": 1442 + }, + { + "epoch": 3.82, + "learning_rate": 3.1327077747989274e-05, + "loss": 0.4232, + "step": 1443 + }, + { + "epoch": 3.82, + "learning_rate": 3.1313672922252016e-05, + "loss": 0.116, + "step": 1444 + }, + { + "epoch": 3.82, + "learning_rate": 3.1300268096514744e-05, + "loss": 0.421, + "step": 1445 + }, + { + "epoch": 3.83, + "learning_rate": 3.1286863270777486e-05, + "loss": 0.0267, + "step": 1446 + }, + { + "epoch": 3.83, + "learning_rate": 3.1273458445040214e-05, + "loss": 0.0078, + "step": 1447 + }, + { + "epoch": 3.83, + "learning_rate": 3.126005361930295e-05, + "loss": 0.0996, + "step": 1448 + }, + { + "epoch": 3.83, + "learning_rate": 3.1246648793565684e-05, + "loss": 0.0389, + "step": 1449 + }, + { + "epoch": 3.84, + "learning_rate": 3.123324396782842e-05, + "loss": 0.0482, + "step": 1450 + }, + { + "epoch": 3.84, + "learning_rate": 3.1219839142091155e-05, + "loss": 0.0053, + "step": 1451 + }, + { + "epoch": 3.84, + "learning_rate": 3.120643431635389e-05, + "loss": 0.0153, + "step": 1452 + }, + { + "epoch": 3.84, + "learning_rate": 3.119302949061662e-05, + "loss": 0.008, + "step": 1453 + }, + { + "epoch": 3.85, + "learning_rate": 3.117962466487936e-05, + "loss": 0.0166, + "step": 1454 + }, + { + "epoch": 3.85, + "learning_rate": 3.116621983914209e-05, + "loss": 0.0889, + "step": 1455 + }, + { + "epoch": 3.85, + "learning_rate": 3.115281501340483e-05, + "loss": 0.0695, + "step": 1456 + }, + { + "epoch": 3.85, + "learning_rate": 3.113941018766756e-05, + "loss": 0.3353, + "step": 1457 + }, + { + "epoch": 3.86, + "learning_rate": 3.1126005361930294e-05, + "loss": 0.0729, + "step": 1458 + }, + { + "epoch": 3.86, + "learning_rate": 3.111260053619303e-05, + "loss": 0.0187, + "step": 1459 + }, + { + "epoch": 3.86, + "learning_rate": 3.1099195710455764e-05, + "loss": 0.2512, + "step": 1460 + }, + { + "epoch": 3.87, + "learning_rate": 3.10857908847185e-05, + "loss": 0.3837, + "step": 1461 + }, + { + "epoch": 3.87, + "learning_rate": 3.1072386058981234e-05, + "loss": 0.2543, + "step": 1462 + }, + { + "epoch": 3.87, + "learning_rate": 3.105898123324397e-05, + "loss": 0.1797, + "step": 1463 + }, + { + "epoch": 3.87, + "learning_rate": 3.1045576407506705e-05, + "loss": 0.3097, + "step": 1464 + }, + { + "epoch": 3.88, + "learning_rate": 3.103217158176944e-05, + "loss": 0.268, + "step": 1465 + }, + { + "epoch": 3.88, + "learning_rate": 3.1018766756032175e-05, + "loss": 0.1773, + "step": 1466 + }, + { + "epoch": 3.88, + "learning_rate": 3.100536193029491e-05, + "loss": 0.2055, + "step": 1467 + }, + { + "epoch": 3.88, + "learning_rate": 3.099195710455764e-05, + "loss": 0.0279, + "step": 1468 + }, + { + "epoch": 3.89, + "learning_rate": 3.097855227882038e-05, + "loss": 0.1263, + "step": 1469 + }, + { + "epoch": 3.89, + "learning_rate": 3.096514745308311e-05, + "loss": 0.0449, + "step": 1470 + }, + { + "epoch": 3.89, + "learning_rate": 3.095174262734585e-05, + "loss": 0.2429, + "step": 1471 + }, + { + "epoch": 3.89, + "learning_rate": 3.093833780160858e-05, + "loss": 0.1245, + "step": 1472 + }, + { + "epoch": 3.9, + "learning_rate": 3.0924932975871314e-05, + "loss": 0.1303, + "step": 1473 + }, + { + "epoch": 3.9, + "learning_rate": 3.091152815013405e-05, + "loss": 0.0303, + "step": 1474 + }, + { + "epoch": 3.9, + "learning_rate": 3.0898123324396784e-05, + "loss": 0.3279, + "step": 1475 + }, + { + "epoch": 3.9, + "learning_rate": 3.088471849865952e-05, + "loss": 0.134, + "step": 1476 + }, + { + "epoch": 3.91, + "learning_rate": 3.0871313672922255e-05, + "loss": 0.5138, + "step": 1477 + }, + { + "epoch": 3.91, + "learning_rate": 3.085790884718498e-05, + "loss": 0.0476, + "step": 1478 + }, + { + "epoch": 3.91, + "learning_rate": 3.0844504021447725e-05, + "loss": 0.1956, + "step": 1479 + }, + { + "epoch": 3.92, + "learning_rate": 3.083109919571045e-05, + "loss": 0.2061, + "step": 1480 + }, + { + "epoch": 3.92, + "learning_rate": 3.0817694369973195e-05, + "loss": 0.269, + "step": 1481 + }, + { + "epoch": 3.92, + "learning_rate": 3.0804289544235923e-05, + "loss": 0.0708, + "step": 1482 + }, + { + "epoch": 3.92, + "learning_rate": 3.0790884718498665e-05, + "loss": 0.0389, + "step": 1483 + }, + { + "epoch": 3.93, + "learning_rate": 3.0777479892761394e-05, + "loss": 0.2566, + "step": 1484 + }, + { + "epoch": 3.93, + "learning_rate": 3.076407506702413e-05, + "loss": 0.0581, + "step": 1485 + }, + { + "epoch": 3.93, + "learning_rate": 3.0750670241286864e-05, + "loss": 0.1527, + "step": 1486 + }, + { + "epoch": 3.93, + "learning_rate": 3.07372654155496e-05, + "loss": 0.3963, + "step": 1487 + }, + { + "epoch": 3.94, + "learning_rate": 3.0723860589812334e-05, + "loss": 0.2241, + "step": 1488 + }, + { + "epoch": 3.94, + "learning_rate": 3.071045576407507e-05, + "loss": 0.1275, + "step": 1489 + }, + { + "epoch": 3.94, + "learning_rate": 3.0697050938337804e-05, + "loss": 0.3148, + "step": 1490 + }, + { + "epoch": 3.94, + "learning_rate": 3.068364611260054e-05, + "loss": 0.1474, + "step": 1491 + }, + { + "epoch": 3.95, + "learning_rate": 3.0670241286863275e-05, + "loss": 0.0233, + "step": 1492 + }, + { + "epoch": 3.95, + "learning_rate": 3.065683646112601e-05, + "loss": 0.1721, + "step": 1493 + }, + { + "epoch": 3.95, + "learning_rate": 3.0643431635388745e-05, + "loss": 0.6024, + "step": 1494 + }, + { + "epoch": 3.96, + "learning_rate": 3.063002680965147e-05, + "loss": 0.1425, + "step": 1495 + }, + { + "epoch": 3.96, + "learning_rate": 3.0616621983914215e-05, + "loss": 0.0311, + "step": 1496 + }, + { + "epoch": 3.96, + "learning_rate": 3.0603217158176944e-05, + "loss": 0.0197, + "step": 1497 + }, + { + "epoch": 3.96, + "learning_rate": 3.0589812332439686e-05, + "loss": 0.0406, + "step": 1498 + }, + { + "epoch": 3.97, + "learning_rate": 3.0576407506702414e-05, + "loss": 0.054, + "step": 1499 + }, + { + "epoch": 3.97, + "learning_rate": 3.056300268096515e-05, + "loss": 0.161, + "step": 1500 + }, + { + "epoch": 3.97, + "learning_rate": 3.0549597855227884e-05, + "loss": 0.0549, + "step": 1501 + }, + { + "epoch": 3.97, + "learning_rate": 3.053619302949062e-05, + "loss": 0.1667, + "step": 1502 + }, + { + "epoch": 3.98, + "learning_rate": 3.0522788203753354e-05, + "loss": 0.1264, + "step": 1503 + }, + { + "epoch": 3.98, + "learning_rate": 3.0509383378016086e-05, + "loss": 0.0133, + "step": 1504 + }, + { + "epoch": 3.98, + "learning_rate": 3.049597855227882e-05, + "loss": 0.0655, + "step": 1505 + }, + { + "epoch": 3.98, + "learning_rate": 3.0482573726541556e-05, + "loss": 0.1054, + "step": 1506 + }, + { + "epoch": 3.99, + "learning_rate": 3.046916890080429e-05, + "loss": 0.0053, + "step": 1507 + }, + { + "epoch": 3.99, + "learning_rate": 3.0455764075067027e-05, + "loss": 0.0347, + "step": 1508 + }, + { + "epoch": 3.99, + "learning_rate": 3.0442359249329762e-05, + "loss": 0.6095, + "step": 1509 + }, + { + "epoch": 3.99, + "learning_rate": 3.0428954423592494e-05, + "loss": 0.1339, + "step": 1510 + }, + { + "epoch": 4.0, + "learning_rate": 3.0415549597855232e-05, + "loss": 0.0088, + "step": 1511 + }, + { + "epoch": 4.0, + "learning_rate": 3.0402144772117964e-05, + "loss": 0.4356, + "step": 1512 + }, + { + "epoch": 4.0, + "eval_f1": 0.7822580645161291, + "eval_loss": 0.6966613531112671, + "eval_runtime": 1.8703, + "eval_samples_per_second": 808.957, + "eval_steps_per_second": 50.794, + "step": 1512 + }, + { + "epoch": 4.0, + "learning_rate": 3.0388739946380702e-05, + "loss": 0.003, + "step": 1513 + }, + { + "epoch": 4.01, + "learning_rate": 3.0375335120643434e-05, + "loss": 0.0067, + "step": 1514 + }, + { + "epoch": 4.01, + "learning_rate": 3.0361930294906166e-05, + "loss": 0.0488, + "step": 1515 + }, + { + "epoch": 4.01, + "learning_rate": 3.0348525469168904e-05, + "loss": 0.0106, + "step": 1516 + }, + { + "epoch": 4.01, + "learning_rate": 3.0335120643431636e-05, + "loss": 0.0098, + "step": 1517 + }, + { + "epoch": 4.02, + "learning_rate": 3.0321715817694375e-05, + "loss": 0.274, + "step": 1518 + }, + { + "epoch": 4.02, + "learning_rate": 3.0308310991957106e-05, + "loss": 0.2007, + "step": 1519 + }, + { + "epoch": 4.02, + "learning_rate": 3.0294906166219838e-05, + "loss": 0.0121, + "step": 1520 + }, + { + "epoch": 4.02, + "learning_rate": 3.0281501340482577e-05, + "loss": 0.0632, + "step": 1521 + }, + { + "epoch": 4.03, + "learning_rate": 3.026809651474531e-05, + "loss": 0.0062, + "step": 1522 + }, + { + "epoch": 4.03, + "learning_rate": 3.0254691689008047e-05, + "loss": 0.0123, + "step": 1523 + }, + { + "epoch": 4.03, + "learning_rate": 3.024128686327078e-05, + "loss": 0.0063, + "step": 1524 + }, + { + "epoch": 4.03, + "learning_rate": 3.022788203753351e-05, + "loss": 0.0102, + "step": 1525 + }, + { + "epoch": 4.04, + "learning_rate": 3.021447721179625e-05, + "loss": 0.0082, + "step": 1526 + }, + { + "epoch": 4.04, + "learning_rate": 3.020107238605898e-05, + "loss": 0.3369, + "step": 1527 + }, + { + "epoch": 4.04, + "learning_rate": 3.018766756032172e-05, + "loss": 0.2587, + "step": 1528 + }, + { + "epoch": 4.04, + "learning_rate": 3.017426273458445e-05, + "loss": 0.0067, + "step": 1529 + }, + { + "epoch": 4.05, + "learning_rate": 3.0160857908847186e-05, + "loss": 0.0021, + "step": 1530 + }, + { + "epoch": 4.05, + "learning_rate": 3.014745308310992e-05, + "loss": 0.0724, + "step": 1531 + }, + { + "epoch": 4.05, + "learning_rate": 3.0134048257372656e-05, + "loss": 0.0074, + "step": 1532 + }, + { + "epoch": 4.06, + "learning_rate": 3.012064343163539e-05, + "loss": 0.0202, + "step": 1533 + }, + { + "epoch": 4.06, + "learning_rate": 3.0107238605898126e-05, + "loss": 0.1435, + "step": 1534 + }, + { + "epoch": 4.06, + "learning_rate": 3.0093833780160858e-05, + "loss": 0.0074, + "step": 1535 + }, + { + "epoch": 4.06, + "learning_rate": 3.0080428954423597e-05, + "loss": 0.4145, + "step": 1536 + }, + { + "epoch": 4.07, + "learning_rate": 3.006702412868633e-05, + "loss": 0.0186, + "step": 1537 + }, + { + "epoch": 4.07, + "learning_rate": 3.0053619302949067e-05, + "loss": 0.1648, + "step": 1538 + }, + { + "epoch": 4.07, + "learning_rate": 3.00402144772118e-05, + "loss": 0.2545, + "step": 1539 + }, + { + "epoch": 4.07, + "learning_rate": 3.002680965147453e-05, + "loss": 0.0016, + "step": 1540 + }, + { + "epoch": 4.08, + "learning_rate": 3.001340482573727e-05, + "loss": 0.0184, + "step": 1541 + }, + { + "epoch": 4.08, + "learning_rate": 3e-05, + "loss": 0.1208, + "step": 1542 + }, + { + "epoch": 4.08, + "learning_rate": 2.998659517426274e-05, + "loss": 0.0021, + "step": 1543 + }, + { + "epoch": 4.08, + "learning_rate": 2.997319034852547e-05, + "loss": 0.0092, + "step": 1544 + }, + { + "epoch": 4.09, + "learning_rate": 2.9959785522788203e-05, + "loss": 0.1514, + "step": 1545 + }, + { + "epoch": 4.09, + "learning_rate": 2.994638069705094e-05, + "loss": 0.0773, + "step": 1546 + }, + { + "epoch": 4.09, + "learning_rate": 2.9932975871313673e-05, + "loss": 0.0093, + "step": 1547 + }, + { + "epoch": 4.1, + "learning_rate": 2.991957104557641e-05, + "loss": 0.0022, + "step": 1548 + }, + { + "epoch": 4.1, + "learning_rate": 2.9906166219839143e-05, + "loss": 0.1765, + "step": 1549 + }, + { + "epoch": 4.1, + "learning_rate": 2.9892761394101875e-05, + "loss": 0.1766, + "step": 1550 + }, + { + "epoch": 4.1, + "learning_rate": 2.9879356568364614e-05, + "loss": 0.0024, + "step": 1551 + }, + { + "epoch": 4.11, + "learning_rate": 2.9865951742627345e-05, + "loss": 0.012, + "step": 1552 + }, + { + "epoch": 4.11, + "learning_rate": 2.9852546916890084e-05, + "loss": 0.0055, + "step": 1553 + }, + { + "epoch": 4.11, + "learning_rate": 2.9839142091152816e-05, + "loss": 0.0088, + "step": 1554 + }, + { + "epoch": 4.11, + "learning_rate": 2.9825737265415547e-05, + "loss": 0.0019, + "step": 1555 + }, + { + "epoch": 4.12, + "learning_rate": 2.9812332439678286e-05, + "loss": 0.0186, + "step": 1556 + }, + { + "epoch": 4.12, + "learning_rate": 2.9798927613941018e-05, + "loss": 0.25, + "step": 1557 + }, + { + "epoch": 4.12, + "learning_rate": 2.9785522788203756e-05, + "loss": 0.0129, + "step": 1558 + }, + { + "epoch": 4.12, + "learning_rate": 2.9772117962466488e-05, + "loss": 0.0048, + "step": 1559 + }, + { + "epoch": 4.13, + "learning_rate": 2.9758713136729223e-05, + "loss": 0.1153, + "step": 1560 + }, + { + "epoch": 4.13, + "learning_rate": 2.9745308310991958e-05, + "loss": 0.1871, + "step": 1561 + }, + { + "epoch": 4.13, + "learning_rate": 2.9731903485254693e-05, + "loss": 0.0087, + "step": 1562 + }, + { + "epoch": 4.13, + "learning_rate": 2.971849865951743e-05, + "loss": 0.0048, + "step": 1563 + }, + { + "epoch": 4.14, + "learning_rate": 2.9705093833780163e-05, + "loss": 0.026, + "step": 1564 + }, + { + "epoch": 4.14, + "learning_rate": 2.9691689008042895e-05, + "loss": 0.3336, + "step": 1565 + }, + { + "epoch": 4.14, + "learning_rate": 2.9678284182305634e-05, + "loss": 0.0015, + "step": 1566 + }, + { + "epoch": 4.15, + "learning_rate": 2.9664879356568365e-05, + "loss": 0.0044, + "step": 1567 + }, + { + "epoch": 4.15, + "learning_rate": 2.9651474530831104e-05, + "loss": 0.0035, + "step": 1568 + }, + { + "epoch": 4.15, + "learning_rate": 2.9638069705093836e-05, + "loss": 0.1206, + "step": 1569 + }, + { + "epoch": 4.15, + "learning_rate": 2.9624664879356567e-05, + "loss": 0.1247, + "step": 1570 + }, + { + "epoch": 4.16, + "learning_rate": 2.9611260053619306e-05, + "loss": 0.0011, + "step": 1571 + }, + { + "epoch": 4.16, + "learning_rate": 2.9597855227882038e-05, + "loss": 0.0023, + "step": 1572 + }, + { + "epoch": 4.16, + "learning_rate": 2.9584450402144776e-05, + "loss": 0.0014, + "step": 1573 + }, + { + "epoch": 4.16, + "learning_rate": 2.9571045576407508e-05, + "loss": 0.2967, + "step": 1574 + }, + { + "epoch": 4.17, + "learning_rate": 2.955764075067024e-05, + "loss": 0.0373, + "step": 1575 + }, + { + "epoch": 4.17, + "learning_rate": 2.9544235924932978e-05, + "loss": 0.3351, + "step": 1576 + }, + { + "epoch": 4.17, + "learning_rate": 2.953083109919571e-05, + "loss": 0.0025, + "step": 1577 + }, + { + "epoch": 4.17, + "learning_rate": 2.951742627345845e-05, + "loss": 0.0025, + "step": 1578 + }, + { + "epoch": 4.18, + "learning_rate": 2.950402144772118e-05, + "loss": 0.0182, + "step": 1579 + }, + { + "epoch": 4.18, + "learning_rate": 2.9490616621983912e-05, + "loss": 0.001, + "step": 1580 + }, + { + "epoch": 4.18, + "learning_rate": 2.947721179624665e-05, + "loss": 0.003, + "step": 1581 + }, + { + "epoch": 4.19, + "learning_rate": 2.9463806970509382e-05, + "loss": 0.0038, + "step": 1582 + }, + { + "epoch": 4.19, + "learning_rate": 2.945040214477212e-05, + "loss": 0.002, + "step": 1583 + }, + { + "epoch": 4.19, + "learning_rate": 2.9436997319034853e-05, + "loss": 0.1688, + "step": 1584 + }, + { + "epoch": 4.19, + "learning_rate": 2.9423592493297584e-05, + "loss": 0.0014, + "step": 1585 + }, + { + "epoch": 4.2, + "learning_rate": 2.9410187667560323e-05, + "loss": 0.2664, + "step": 1586 + }, + { + "epoch": 4.2, + "learning_rate": 2.9396782841823055e-05, + "loss": 0.0012, + "step": 1587 + }, + { + "epoch": 4.2, + "learning_rate": 2.9383378016085793e-05, + "loss": 0.0022, + "step": 1588 + }, + { + "epoch": 4.2, + "learning_rate": 2.9369973190348525e-05, + "loss": 0.0959, + "step": 1589 + }, + { + "epoch": 4.21, + "learning_rate": 2.935656836461126e-05, + "loss": 0.0839, + "step": 1590 + }, + { + "epoch": 4.21, + "learning_rate": 2.9343163538873995e-05, + "loss": 0.7405, + "step": 1591 + }, + { + "epoch": 4.21, + "learning_rate": 2.932975871313673e-05, + "loss": 0.0351, + "step": 1592 + }, + { + "epoch": 4.21, + "learning_rate": 2.9316353887399465e-05, + "loss": 0.0025, + "step": 1593 + }, + { + "epoch": 4.22, + "learning_rate": 2.93029490616622e-05, + "loss": 0.0054, + "step": 1594 + }, + { + "epoch": 4.22, + "learning_rate": 2.9289544235924932e-05, + "loss": 0.0043, + "step": 1595 + }, + { + "epoch": 4.22, + "learning_rate": 2.927613941018767e-05, + "loss": 0.1828, + "step": 1596 + }, + { + "epoch": 4.22, + "learning_rate": 2.9262734584450402e-05, + "loss": 0.0022, + "step": 1597 + }, + { + "epoch": 4.23, + "learning_rate": 2.924932975871314e-05, + "loss": 0.0051, + "step": 1598 + }, + { + "epoch": 4.23, + "learning_rate": 2.9235924932975873e-05, + "loss": 0.0025, + "step": 1599 + }, + { + "epoch": 4.23, + "learning_rate": 2.9222520107238604e-05, + "loss": 0.0018, + "step": 1600 + }, + { + "epoch": 4.24, + "learning_rate": 2.9209115281501343e-05, + "loss": 0.0348, + "step": 1601 + }, + { + "epoch": 4.24, + "learning_rate": 2.9195710455764075e-05, + "loss": 0.207, + "step": 1602 + }, + { + "epoch": 4.24, + "learning_rate": 2.9182305630026813e-05, + "loss": 0.0249, + "step": 1603 + }, + { + "epoch": 4.24, + "learning_rate": 2.9168900804289545e-05, + "loss": 0.0028, + "step": 1604 + }, + { + "epoch": 4.25, + "learning_rate": 2.9155495978552283e-05, + "loss": 0.2604, + "step": 1605 + }, + { + "epoch": 4.25, + "learning_rate": 2.9142091152815015e-05, + "loss": 0.2808, + "step": 1606 + }, + { + "epoch": 4.25, + "learning_rate": 2.9128686327077747e-05, + "loss": 0.0289, + "step": 1607 + }, + { + "epoch": 4.25, + "learning_rate": 2.9115281501340486e-05, + "loss": 0.005, + "step": 1608 + }, + { + "epoch": 4.26, + "learning_rate": 2.9101876675603217e-05, + "loss": 0.7931, + "step": 1609 + }, + { + "epoch": 4.26, + "learning_rate": 2.9088471849865956e-05, + "loss": 0.335, + "step": 1610 + }, + { + "epoch": 4.26, + "learning_rate": 2.9075067024128688e-05, + "loss": 0.2779, + "step": 1611 + }, + { + "epoch": 4.26, + "learning_rate": 2.906166219839142e-05, + "loss": 0.1649, + "step": 1612 + }, + { + "epoch": 4.27, + "learning_rate": 2.9048257372654158e-05, + "loss": 0.0081, + "step": 1613 + }, + { + "epoch": 4.27, + "learning_rate": 2.903485254691689e-05, + "loss": 0.0638, + "step": 1614 + }, + { + "epoch": 4.27, + "learning_rate": 2.9021447721179628e-05, + "loss": 0.016, + "step": 1615 + }, + { + "epoch": 4.28, + "learning_rate": 2.900804289544236e-05, + "loss": 0.0025, + "step": 1616 + }, + { + "epoch": 4.28, + "learning_rate": 2.8994638069705095e-05, + "loss": 0.0249, + "step": 1617 + }, + { + "epoch": 4.28, + "learning_rate": 2.898123324396783e-05, + "loss": 0.0291, + "step": 1618 + }, + { + "epoch": 4.28, + "learning_rate": 2.8967828418230565e-05, + "loss": 0.1773, + "step": 1619 + }, + { + "epoch": 4.29, + "learning_rate": 2.89544235924933e-05, + "loss": 0.3452, + "step": 1620 + }, + { + "epoch": 4.29, + "learning_rate": 2.8941018766756035e-05, + "loss": 0.006, + "step": 1621 + }, + { + "epoch": 4.29, + "learning_rate": 2.8927613941018767e-05, + "loss": 0.0054, + "step": 1622 + }, + { + "epoch": 4.29, + "learning_rate": 2.8914209115281506e-05, + "loss": 0.1852, + "step": 1623 + }, + { + "epoch": 4.3, + "learning_rate": 2.8900804289544237e-05, + "loss": 0.4424, + "step": 1624 + }, + { + "epoch": 4.3, + "learning_rate": 2.8887399463806976e-05, + "loss": 0.0063, + "step": 1625 + }, + { + "epoch": 4.3, + "learning_rate": 2.8873994638069708e-05, + "loss": 0.43, + "step": 1626 + }, + { + "epoch": 4.3, + "learning_rate": 2.886058981233244e-05, + "loss": 0.2283, + "step": 1627 + }, + { + "epoch": 4.31, + "learning_rate": 2.8847184986595178e-05, + "loss": 0.0519, + "step": 1628 + }, + { + "epoch": 4.31, + "learning_rate": 2.883378016085791e-05, + "loss": 0.1797, + "step": 1629 + }, + { + "epoch": 4.31, + "learning_rate": 2.8820375335120648e-05, + "loss": 0.2569, + "step": 1630 + }, + { + "epoch": 4.31, + "learning_rate": 2.880697050938338e-05, + "loss": 0.0024, + "step": 1631 + }, + { + "epoch": 4.32, + "learning_rate": 2.8793565683646112e-05, + "loss": 0.1727, + "step": 1632 + }, + { + "epoch": 4.32, + "learning_rate": 2.878016085790885e-05, + "loss": 0.0091, + "step": 1633 + }, + { + "epoch": 4.32, + "learning_rate": 2.8766756032171582e-05, + "loss": 0.2002, + "step": 1634 + }, + { + "epoch": 4.33, + "learning_rate": 2.875335120643432e-05, + "loss": 0.0217, + "step": 1635 + }, + { + "epoch": 4.33, + "learning_rate": 2.8739946380697052e-05, + "loss": 0.2163, + "step": 1636 + }, + { + "epoch": 4.33, + "learning_rate": 2.8726541554959784e-05, + "loss": 0.0065, + "step": 1637 + }, + { + "epoch": 4.33, + "learning_rate": 2.8713136729222522e-05, + "loss": 0.1567, + "step": 1638 + }, + { + "epoch": 4.34, + "learning_rate": 2.8699731903485254e-05, + "loss": 0.1775, + "step": 1639 + }, + { + "epoch": 4.34, + "learning_rate": 2.8686327077747993e-05, + "loss": 0.0116, + "step": 1640 + }, + { + "epoch": 4.34, + "learning_rate": 2.8672922252010724e-05, + "loss": 0.0114, + "step": 1641 + }, + { + "epoch": 4.34, + "learning_rate": 2.8659517426273456e-05, + "loss": 0.0264, + "step": 1642 + }, + { + "epoch": 4.35, + "learning_rate": 2.8646112600536195e-05, + "loss": 0.0172, + "step": 1643 + }, + { + "epoch": 4.35, + "learning_rate": 2.8632707774798926e-05, + "loss": 0.187, + "step": 1644 + }, + { + "epoch": 4.35, + "learning_rate": 2.8619302949061665e-05, + "loss": 0.009, + "step": 1645 + }, + { + "epoch": 4.35, + "learning_rate": 2.8605898123324397e-05, + "loss": 0.014, + "step": 1646 + }, + { + "epoch": 4.36, + "learning_rate": 2.8592493297587132e-05, + "loss": 0.1643, + "step": 1647 + }, + { + "epoch": 4.36, + "learning_rate": 2.8579088471849867e-05, + "loss": 0.2763, + "step": 1648 + }, + { + "epoch": 4.36, + "learning_rate": 2.8565683646112602e-05, + "loss": 0.0641, + "step": 1649 + }, + { + "epoch": 4.37, + "learning_rate": 2.8552278820375337e-05, + "loss": 0.6128, + "step": 1650 + }, + { + "epoch": 4.37, + "learning_rate": 2.8538873994638072e-05, + "loss": 0.0229, + "step": 1651 + }, + { + "epoch": 4.37, + "learning_rate": 2.8525469168900804e-05, + "loss": 0.0344, + "step": 1652 + }, + { + "epoch": 4.37, + "learning_rate": 2.8512064343163543e-05, + "loss": 0.018, + "step": 1653 + }, + { + "epoch": 4.38, + "learning_rate": 2.8498659517426274e-05, + "loss": 0.191, + "step": 1654 + }, + { + "epoch": 4.38, + "learning_rate": 2.8485254691689013e-05, + "loss": 0.0397, + "step": 1655 + }, + { + "epoch": 4.38, + "learning_rate": 2.8471849865951745e-05, + "loss": 0.0029, + "step": 1656 + }, + { + "epoch": 4.38, + "learning_rate": 2.8458445040214476e-05, + "loss": 0.0034, + "step": 1657 + }, + { + "epoch": 4.39, + "learning_rate": 2.8445040214477215e-05, + "loss": 0.0031, + "step": 1658 + }, + { + "epoch": 4.39, + "learning_rate": 2.8431635388739947e-05, + "loss": 0.4272, + "step": 1659 + }, + { + "epoch": 4.39, + "learning_rate": 2.8418230563002685e-05, + "loss": 0.0042, + "step": 1660 + }, + { + "epoch": 4.39, + "learning_rate": 2.8404825737265417e-05, + "loss": 0.0224, + "step": 1661 + }, + { + "epoch": 4.4, + "learning_rate": 2.839142091152815e-05, + "loss": 0.1021, + "step": 1662 + }, + { + "epoch": 4.4, + "learning_rate": 2.8378016085790887e-05, + "loss": 0.0076, + "step": 1663 + }, + { + "epoch": 4.4, + "learning_rate": 2.836461126005362e-05, + "loss": 0.084, + "step": 1664 + }, + { + "epoch": 4.4, + "learning_rate": 2.8351206434316357e-05, + "loss": 0.0321, + "step": 1665 + }, + { + "epoch": 4.41, + "learning_rate": 2.833780160857909e-05, + "loss": 0.1369, + "step": 1666 + }, + { + "epoch": 4.41, + "learning_rate": 2.832439678284182e-05, + "loss": 0.018, + "step": 1667 + }, + { + "epoch": 4.41, + "learning_rate": 2.831099195710456e-05, + "loss": 0.1886, + "step": 1668 + }, + { + "epoch": 4.42, + "learning_rate": 2.829758713136729e-05, + "loss": 0.0016, + "step": 1669 + }, + { + "epoch": 4.42, + "learning_rate": 2.828418230563003e-05, + "loss": 0.0031, + "step": 1670 + }, + { + "epoch": 4.42, + "learning_rate": 2.827077747989276e-05, + "loss": 0.0043, + "step": 1671 + }, + { + "epoch": 4.42, + "learning_rate": 2.8257372654155497e-05, + "loss": 0.1202, + "step": 1672 + }, + { + "epoch": 4.43, + "learning_rate": 2.8243967828418232e-05, + "loss": 0.1409, + "step": 1673 + }, + { + "epoch": 4.43, + "learning_rate": 2.8230563002680967e-05, + "loss": 0.0821, + "step": 1674 + }, + { + "epoch": 4.43, + "learning_rate": 2.8217158176943702e-05, + "loss": 0.0468, + "step": 1675 + }, + { + "epoch": 4.43, + "learning_rate": 2.8203753351206437e-05, + "loss": 0.0559, + "step": 1676 + }, + { + "epoch": 4.44, + "learning_rate": 2.819034852546917e-05, + "loss": 0.0192, + "step": 1677 + }, + { + "epoch": 4.44, + "learning_rate": 2.8176943699731907e-05, + "loss": 0.0024, + "step": 1678 + }, + { + "epoch": 4.44, + "learning_rate": 2.816353887399464e-05, + "loss": 0.0021, + "step": 1679 + }, + { + "epoch": 4.44, + "learning_rate": 2.8150134048257378e-05, + "loss": 0.0139, + "step": 1680 + }, + { + "epoch": 4.45, + "learning_rate": 2.813672922252011e-05, + "loss": 0.0042, + "step": 1681 + }, + { + "epoch": 4.45, + "learning_rate": 2.812332439678284e-05, + "loss": 0.1666, + "step": 1682 + }, + { + "epoch": 4.45, + "learning_rate": 2.810991957104558e-05, + "loss": 0.5925, + "step": 1683 + }, + { + "epoch": 4.46, + "learning_rate": 2.809651474530831e-05, + "loss": 0.1689, + "step": 1684 + }, + { + "epoch": 4.46, + "learning_rate": 2.808310991957105e-05, + "loss": 0.0053, + "step": 1685 + }, + { + "epoch": 4.46, + "learning_rate": 2.806970509383378e-05, + "loss": 0.0019, + "step": 1686 + }, + { + "epoch": 4.46, + "learning_rate": 2.8056300268096513e-05, + "loss": 0.0632, + "step": 1687 + }, + { + "epoch": 4.47, + "learning_rate": 2.8042895442359252e-05, + "loss": 0.0115, + "step": 1688 + }, + { + "epoch": 4.47, + "learning_rate": 2.8029490616621984e-05, + "loss": 0.002, + "step": 1689 + }, + { + "epoch": 4.47, + "learning_rate": 2.8016085790884722e-05, + "loss": 0.0021, + "step": 1690 + }, + { + "epoch": 4.47, + "learning_rate": 2.8002680965147454e-05, + "loss": 0.0079, + "step": 1691 + }, + { + "epoch": 4.48, + "learning_rate": 2.7989276139410186e-05, + "loss": 0.0016, + "step": 1692 + }, + { + "epoch": 4.48, + "learning_rate": 2.7975871313672924e-05, + "loss": 0.1824, + "step": 1693 + }, + { + "epoch": 4.48, + "learning_rate": 2.7962466487935656e-05, + "loss": 0.1025, + "step": 1694 + }, + { + "epoch": 4.48, + "learning_rate": 2.7949061662198394e-05, + "loss": 0.4274, + "step": 1695 + }, + { + "epoch": 4.49, + "learning_rate": 2.7935656836461126e-05, + "loss": 0.0834, + "step": 1696 + }, + { + "epoch": 4.49, + "learning_rate": 2.7922252010723858e-05, + "loss": 0.6412, + "step": 1697 + }, + { + "epoch": 4.49, + "learning_rate": 2.7908847184986596e-05, + "loss": 0.3051, + "step": 1698 + }, + { + "epoch": 4.49, + "learning_rate": 2.7895442359249328e-05, + "loss": 0.0909, + "step": 1699 + }, + { + "epoch": 4.5, + "learning_rate": 2.7882037533512067e-05, + "loss": 0.2655, + "step": 1700 + }, + { + "epoch": 4.5, + "learning_rate": 2.78686327077748e-05, + "loss": 0.305, + "step": 1701 + }, + { + "epoch": 4.5, + "learning_rate": 2.7855227882037534e-05, + "loss": 0.2733, + "step": 1702 + }, + { + "epoch": 4.51, + "learning_rate": 2.784182305630027e-05, + "loss": 0.0021, + "step": 1703 + }, + { + "epoch": 4.51, + "learning_rate": 2.7828418230563004e-05, + "loss": 0.0072, + "step": 1704 + }, + { + "epoch": 4.51, + "learning_rate": 2.781501340482574e-05, + "loss": 0.0027, + "step": 1705 + }, + { + "epoch": 4.51, + "learning_rate": 2.7801608579088474e-05, + "loss": 0.184, + "step": 1706 + }, + { + "epoch": 4.52, + "learning_rate": 2.7788203753351206e-05, + "loss": 0.0143, + "step": 1707 + }, + { + "epoch": 4.52, + "learning_rate": 2.7774798927613944e-05, + "loss": 0.0297, + "step": 1708 + }, + { + "epoch": 4.52, + "learning_rate": 2.7761394101876676e-05, + "loss": 0.0739, + "step": 1709 + }, + { + "epoch": 4.52, + "learning_rate": 2.7747989276139415e-05, + "loss": 0.0188, + "step": 1710 + }, + { + "epoch": 4.53, + "learning_rate": 2.7734584450402146e-05, + "loss": 0.2487, + "step": 1711 + }, + { + "epoch": 4.53, + "learning_rate": 2.7721179624664878e-05, + "loss": 0.0222, + "step": 1712 + }, + { + "epoch": 4.53, + "learning_rate": 2.7707774798927617e-05, + "loss": 0.0041, + "step": 1713 + }, + { + "epoch": 4.53, + "learning_rate": 2.769436997319035e-05, + "loss": 0.0164, + "step": 1714 + }, + { + "epoch": 4.54, + "learning_rate": 2.7680965147453087e-05, + "loss": 0.0985, + "step": 1715 + }, + { + "epoch": 4.54, + "learning_rate": 2.766756032171582e-05, + "loss": 0.0067, + "step": 1716 + }, + { + "epoch": 4.54, + "learning_rate": 2.765415549597855e-05, + "loss": 0.3304, + "step": 1717 + }, + { + "epoch": 4.54, + "learning_rate": 2.764075067024129e-05, + "loss": 0.006, + "step": 1718 + }, + { + "epoch": 4.55, + "learning_rate": 2.762734584450402e-05, + "loss": 0.0142, + "step": 1719 + }, + { + "epoch": 4.55, + "learning_rate": 2.761394101876676e-05, + "loss": 0.2205, + "step": 1720 + }, + { + "epoch": 4.55, + "learning_rate": 2.760053619302949e-05, + "loss": 0.298, + "step": 1721 + }, + { + "epoch": 4.56, + "learning_rate": 2.7587131367292223e-05, + "loss": 0.0041, + "step": 1722 + }, + { + "epoch": 4.56, + "learning_rate": 2.757372654155496e-05, + "loss": 0.0018, + "step": 1723 + }, + { + "epoch": 4.56, + "learning_rate": 2.7560321715817693e-05, + "loss": 0.0185, + "step": 1724 + }, + { + "epoch": 4.56, + "learning_rate": 2.754691689008043e-05, + "loss": 0.0042, + "step": 1725 + }, + { + "epoch": 4.57, + "learning_rate": 2.7533512064343163e-05, + "loss": 0.036, + "step": 1726 + }, + { + "epoch": 4.57, + "learning_rate": 2.7520107238605898e-05, + "loss": 0.2593, + "step": 1727 + }, + { + "epoch": 4.57, + "learning_rate": 2.7506702412868633e-05, + "loss": 0.0062, + "step": 1728 + }, + { + "epoch": 4.57, + "learning_rate": 2.749329758713137e-05, + "loss": 0.1759, + "step": 1729 + }, + { + "epoch": 4.58, + "learning_rate": 2.7479892761394104e-05, + "loss": 0.0202, + "step": 1730 + }, + { + "epoch": 4.58, + "learning_rate": 2.746648793565684e-05, + "loss": 0.2156, + "step": 1731 + }, + { + "epoch": 4.58, + "learning_rate": 2.7453083109919574e-05, + "loss": 0.4112, + "step": 1732 + }, + { + "epoch": 4.58, + "learning_rate": 2.743967828418231e-05, + "loss": 0.0037, + "step": 1733 + }, + { + "epoch": 4.59, + "learning_rate": 2.742627345844504e-05, + "loss": 0.0186, + "step": 1734 + }, + { + "epoch": 4.59, + "learning_rate": 2.741286863270778e-05, + "loss": 0.0117, + "step": 1735 + }, + { + "epoch": 4.59, + "learning_rate": 2.739946380697051e-05, + "loss": 0.0039, + "step": 1736 + }, + { + "epoch": 4.6, + "learning_rate": 2.738605898123325e-05, + "loss": 0.1185, + "step": 1737 + }, + { + "epoch": 4.6, + "learning_rate": 2.737265415549598e-05, + "loss": 0.0276, + "step": 1738 + }, + { + "epoch": 4.6, + "learning_rate": 2.7359249329758713e-05, + "loss": 0.0041, + "step": 1739 + }, + { + "epoch": 4.6, + "learning_rate": 2.734584450402145e-05, + "loss": 0.0133, + "step": 1740 + }, + { + "epoch": 4.61, + "learning_rate": 2.7332439678284183e-05, + "loss": 0.1042, + "step": 1741 + }, + { + "epoch": 4.61, + "learning_rate": 2.7319034852546922e-05, + "loss": 0.0023, + "step": 1742 + }, + { + "epoch": 4.61, + "learning_rate": 2.7305630026809654e-05, + "loss": 0.1586, + "step": 1743 + }, + { + "epoch": 4.61, + "learning_rate": 2.7292225201072385e-05, + "loss": 0.0258, + "step": 1744 + }, + { + "epoch": 4.62, + "learning_rate": 2.7278820375335124e-05, + "loss": 0.1119, + "step": 1745 + }, + { + "epoch": 4.62, + "learning_rate": 2.7265415549597856e-05, + "loss": 0.1115, + "step": 1746 + }, + { + "epoch": 4.62, + "learning_rate": 2.7252010723860594e-05, + "loss": 0.4607, + "step": 1747 + }, + { + "epoch": 4.62, + "learning_rate": 2.7238605898123326e-05, + "loss": 0.0296, + "step": 1748 + }, + { + "epoch": 4.63, + "learning_rate": 2.7225201072386058e-05, + "loss": 0.0277, + "step": 1749 + }, + { + "epoch": 4.63, + "learning_rate": 2.7211796246648796e-05, + "loss": 0.0777, + "step": 1750 + }, + { + "epoch": 4.63, + "learning_rate": 2.7198391420911528e-05, + "loss": 0.0031, + "step": 1751 + }, + { + "epoch": 4.63, + "learning_rate": 2.7184986595174266e-05, + "loss": 0.2238, + "step": 1752 + }, + { + "epoch": 4.64, + "learning_rate": 2.7171581769436998e-05, + "loss": 0.0409, + "step": 1753 + }, + { + "epoch": 4.64, + "learning_rate": 2.715817694369973e-05, + "loss": 0.0032, + "step": 1754 + }, + { + "epoch": 4.64, + "learning_rate": 2.714477211796247e-05, + "loss": 0.0113, + "step": 1755 + }, + { + "epoch": 4.65, + "learning_rate": 2.71313672922252e-05, + "loss": 0.0204, + "step": 1756 + }, + { + "epoch": 4.65, + "learning_rate": 2.711796246648794e-05, + "loss": 0.0022, + "step": 1757 + }, + { + "epoch": 4.65, + "learning_rate": 2.710455764075067e-05, + "loss": 0.0018, + "step": 1758 + }, + { + "epoch": 4.65, + "learning_rate": 2.7091152815013406e-05, + "loss": 0.263, + "step": 1759 + }, + { + "epoch": 4.66, + "learning_rate": 2.707774798927614e-05, + "loss": 0.0109, + "step": 1760 + }, + { + "epoch": 4.66, + "learning_rate": 2.7064343163538876e-05, + "loss": 0.0653, + "step": 1761 + }, + { + "epoch": 4.66, + "learning_rate": 2.705093833780161e-05, + "loss": 0.0116, + "step": 1762 + }, + { + "epoch": 4.66, + "learning_rate": 2.7037533512064346e-05, + "loss": 0.0063, + "step": 1763 + }, + { + "epoch": 4.67, + "learning_rate": 2.7024128686327078e-05, + "loss": 0.0034, + "step": 1764 + }, + { + "epoch": 4.67, + "learning_rate": 2.7010723860589816e-05, + "loss": 0.0395, + "step": 1765 + }, + { + "epoch": 4.67, + "learning_rate": 2.6997319034852548e-05, + "loss": 0.0014, + "step": 1766 + }, + { + "epoch": 4.67, + "learning_rate": 2.6983914209115287e-05, + "loss": 0.0057, + "step": 1767 + }, + { + "epoch": 4.68, + "learning_rate": 2.697050938337802e-05, + "loss": 0.0018, + "step": 1768 + }, + { + "epoch": 4.68, + "learning_rate": 2.695710455764075e-05, + "loss": 0.012, + "step": 1769 + }, + { + "epoch": 4.68, + "learning_rate": 2.694369973190349e-05, + "loss": 0.0017, + "step": 1770 + }, + { + "epoch": 4.69, + "learning_rate": 2.693029490616622e-05, + "loss": 0.0654, + "step": 1771 + }, + { + "epoch": 4.69, + "learning_rate": 2.691689008042896e-05, + "loss": 0.8002, + "step": 1772 + }, + { + "epoch": 4.69, + "learning_rate": 2.690348525469169e-05, + "loss": 0.0035, + "step": 1773 + }, + { + "epoch": 4.69, + "learning_rate": 2.6890080428954422e-05, + "loss": 0.0051, + "step": 1774 + }, + { + "epoch": 4.7, + "learning_rate": 2.687667560321716e-05, + "loss": 0.0031, + "step": 1775 + }, + { + "epoch": 4.7, + "learning_rate": 2.6863270777479893e-05, + "loss": 0.0142, + "step": 1776 + }, + { + "epoch": 4.7, + "learning_rate": 2.684986595174263e-05, + "loss": 0.0009, + "step": 1777 + }, + { + "epoch": 4.7, + "learning_rate": 2.6836461126005363e-05, + "loss": 0.0015, + "step": 1778 + }, + { + "epoch": 4.71, + "learning_rate": 2.6823056300268095e-05, + "loss": 0.3481, + "step": 1779 + }, + { + "epoch": 4.71, + "learning_rate": 2.6809651474530833e-05, + "loss": 0.3095, + "step": 1780 + }, + { + "epoch": 4.71, + "learning_rate": 2.6796246648793565e-05, + "loss": 0.2567, + "step": 1781 + }, + { + "epoch": 4.71, + "learning_rate": 2.6782841823056303e-05, + "loss": 0.0037, + "step": 1782 + }, + { + "epoch": 4.72, + "learning_rate": 2.6769436997319035e-05, + "loss": 0.001, + "step": 1783 + }, + { + "epoch": 4.72, + "learning_rate": 2.675603217158177e-05, + "loss": 0.0065, + "step": 1784 + }, + { + "epoch": 4.72, + "learning_rate": 2.6742627345844505e-05, + "loss": 0.0029, + "step": 1785 + }, + { + "epoch": 4.72, + "learning_rate": 2.672922252010724e-05, + "loss": 0.6096, + "step": 1786 + }, + { + "epoch": 4.73, + "learning_rate": 2.6715817694369976e-05, + "loss": 0.0127, + "step": 1787 + }, + { + "epoch": 4.73, + "learning_rate": 2.670241286863271e-05, + "loss": 0.0031, + "step": 1788 + }, + { + "epoch": 4.73, + "learning_rate": 2.6689008042895443e-05, + "loss": 0.2463, + "step": 1789 + }, + { + "epoch": 4.74, + "learning_rate": 2.667560321715818e-05, + "loss": 0.1022, + "step": 1790 + }, + { + "epoch": 4.74, + "learning_rate": 2.6662198391420913e-05, + "loss": 0.002, + "step": 1791 + }, + { + "epoch": 4.74, + "learning_rate": 2.664879356568365e-05, + "loss": 0.1576, + "step": 1792 + }, + { + "epoch": 4.74, + "learning_rate": 2.6635388739946383e-05, + "loss": 0.1099, + "step": 1793 + }, + { + "epoch": 4.75, + "learning_rate": 2.6621983914209115e-05, + "loss": 0.1482, + "step": 1794 + }, + { + "epoch": 4.75, + "learning_rate": 2.6608579088471853e-05, + "loss": 0.0007, + "step": 1795 + }, + { + "epoch": 4.75, + "learning_rate": 2.6595174262734585e-05, + "loss": 0.0009, + "step": 1796 + }, + { + "epoch": 4.75, + "learning_rate": 2.6581769436997324e-05, + "loss": 0.005, + "step": 1797 + }, + { + "epoch": 4.76, + "learning_rate": 2.6568364611260055e-05, + "loss": 0.1808, + "step": 1798 + }, + { + "epoch": 4.76, + "learning_rate": 2.6554959785522787e-05, + "loss": 0.0351, + "step": 1799 + }, + { + "epoch": 4.76, + "learning_rate": 2.6541554959785526e-05, + "loss": 0.2555, + "step": 1800 + }, + { + "epoch": 4.76, + "learning_rate": 2.6528150134048257e-05, + "loss": 0.2236, + "step": 1801 + }, + { + "epoch": 4.77, + "learning_rate": 2.6514745308310996e-05, + "loss": 0.3208, + "step": 1802 + }, + { + "epoch": 4.77, + "learning_rate": 2.6501340482573728e-05, + "loss": 0.0202, + "step": 1803 + }, + { + "epoch": 4.77, + "learning_rate": 2.648793565683646e-05, + "loss": 0.0033, + "step": 1804 + }, + { + "epoch": 4.78, + "learning_rate": 2.6474530831099198e-05, + "loss": 0.001, + "step": 1805 + }, + { + "epoch": 4.78, + "learning_rate": 2.646112600536193e-05, + "loss": 0.0019, + "step": 1806 + }, + { + "epoch": 4.78, + "learning_rate": 2.6447721179624668e-05, + "loss": 0.0027, + "step": 1807 + }, + { + "epoch": 4.78, + "learning_rate": 2.64343163538874e-05, + "loss": 0.0051, + "step": 1808 + }, + { + "epoch": 4.79, + "learning_rate": 2.642091152815013e-05, + "loss": 0.1994, + "step": 1809 + }, + { + "epoch": 4.79, + "learning_rate": 2.640750670241287e-05, + "loss": 0.0372, + "step": 1810 + }, + { + "epoch": 4.79, + "learning_rate": 2.6394101876675602e-05, + "loss": 0.0678, + "step": 1811 + }, + { + "epoch": 4.79, + "learning_rate": 2.638069705093834e-05, + "loss": 0.0252, + "step": 1812 + }, + { + "epoch": 4.8, + "learning_rate": 2.6367292225201072e-05, + "loss": 0.0065, + "step": 1813 + }, + { + "epoch": 4.8, + "learning_rate": 2.6353887399463807e-05, + "loss": 0.0045, + "step": 1814 + }, + { + "epoch": 4.8, + "learning_rate": 2.6340482573726542e-05, + "loss": 0.0037, + "step": 1815 + }, + { + "epoch": 4.8, + "learning_rate": 2.6327077747989277e-05, + "loss": 0.0251, + "step": 1816 + }, + { + "epoch": 4.81, + "learning_rate": 2.6313672922252013e-05, + "loss": 0.4196, + "step": 1817 + }, + { + "epoch": 4.81, + "learning_rate": 2.6300268096514748e-05, + "loss": 0.0071, + "step": 1818 + }, + { + "epoch": 4.81, + "learning_rate": 2.628686327077748e-05, + "loss": 0.0787, + "step": 1819 + }, + { + "epoch": 4.81, + "learning_rate": 2.6273458445040218e-05, + "loss": 0.0145, + "step": 1820 + }, + { + "epoch": 4.82, + "learning_rate": 2.626005361930295e-05, + "loss": 0.009, + "step": 1821 + }, + { + "epoch": 4.82, + "learning_rate": 2.6246648793565688e-05, + "loss": 0.0027, + "step": 1822 + }, + { + "epoch": 4.82, + "learning_rate": 2.623324396782842e-05, + "loss": 0.0017, + "step": 1823 + }, + { + "epoch": 4.83, + "learning_rate": 2.6219839142091152e-05, + "loss": 0.4824, + "step": 1824 + }, + { + "epoch": 4.83, + "learning_rate": 2.620643431635389e-05, + "loss": 0.0022, + "step": 1825 + }, + { + "epoch": 4.83, + "learning_rate": 2.6193029490616622e-05, + "loss": 0.3223, + "step": 1826 + }, + { + "epoch": 4.83, + "learning_rate": 2.617962466487936e-05, + "loss": 0.2195, + "step": 1827 + }, + { + "epoch": 4.84, + "learning_rate": 2.6166219839142092e-05, + "loss": 0.0013, + "step": 1828 + }, + { + "epoch": 4.84, + "learning_rate": 2.6152815013404824e-05, + "loss": 0.0343, + "step": 1829 + }, + { + "epoch": 4.84, + "learning_rate": 2.6139410187667563e-05, + "loss": 0.0022, + "step": 1830 + }, + { + "epoch": 4.84, + "learning_rate": 2.6126005361930294e-05, + "loss": 0.0022, + "step": 1831 + }, + { + "epoch": 4.85, + "learning_rate": 2.6112600536193033e-05, + "loss": 0.4116, + "step": 1832 + }, + { + "epoch": 4.85, + "learning_rate": 2.6099195710455765e-05, + "loss": 0.0048, + "step": 1833 + }, + { + "epoch": 4.85, + "learning_rate": 2.6085790884718496e-05, + "loss": 0.5819, + "step": 1834 + }, + { + "epoch": 4.85, + "learning_rate": 2.6072386058981235e-05, + "loss": 0.1985, + "step": 1835 + }, + { + "epoch": 4.86, + "learning_rate": 2.6058981233243967e-05, + "loss": 0.0989, + "step": 1836 + }, + { + "epoch": 4.86, + "learning_rate": 2.6045576407506705e-05, + "loss": 0.341, + "step": 1837 + }, + { + "epoch": 4.86, + "learning_rate": 2.6032171581769437e-05, + "loss": 0.0044, + "step": 1838 + }, + { + "epoch": 4.87, + "learning_rate": 2.601876675603217e-05, + "loss": 0.004, + "step": 1839 + }, + { + "epoch": 4.87, + "learning_rate": 2.6005361930294907e-05, + "loss": 0.2858, + "step": 1840 + }, + { + "epoch": 4.87, + "learning_rate": 2.599195710455764e-05, + "loss": 0.0009, + "step": 1841 + }, + { + "epoch": 4.87, + "learning_rate": 2.5978552278820377e-05, + "loss": 0.0042, + "step": 1842 + }, + { + "epoch": 4.88, + "learning_rate": 2.596514745308311e-05, + "loss": 0.0045, + "step": 1843 + }, + { + "epoch": 4.88, + "learning_rate": 2.5951742627345844e-05, + "loss": 0.0144, + "step": 1844 + }, + { + "epoch": 4.88, + "learning_rate": 2.593833780160858e-05, + "loss": 0.0084, + "step": 1845 + }, + { + "epoch": 4.88, + "learning_rate": 2.5924932975871314e-05, + "loss": 0.4276, + "step": 1846 + }, + { + "epoch": 4.89, + "learning_rate": 2.591152815013405e-05, + "loss": 0.0122, + "step": 1847 + }, + { + "epoch": 4.89, + "learning_rate": 2.5898123324396785e-05, + "loss": 0.0776, + "step": 1848 + }, + { + "epoch": 4.89, + "learning_rate": 2.5884718498659516e-05, + "loss": 0.0117, + "step": 1849 + }, + { + "epoch": 4.89, + "learning_rate": 2.5871313672922255e-05, + "loss": 0.2809, + "step": 1850 + }, + { + "epoch": 4.9, + "learning_rate": 2.5857908847184987e-05, + "loss": 0.0413, + "step": 1851 + }, + { + "epoch": 4.9, + "learning_rate": 2.5844504021447725e-05, + "loss": 0.0187, + "step": 1852 + }, + { + "epoch": 4.9, + "learning_rate": 2.5831099195710457e-05, + "loss": 0.452, + "step": 1853 + }, + { + "epoch": 4.9, + "learning_rate": 2.5817694369973195e-05, + "loss": 0.0206, + "step": 1854 + }, + { + "epoch": 4.91, + "learning_rate": 2.5804289544235927e-05, + "loss": 0.1639, + "step": 1855 + }, + { + "epoch": 4.91, + "learning_rate": 2.579088471849866e-05, + "loss": 0.1865, + "step": 1856 + }, + { + "epoch": 4.91, + "learning_rate": 2.5777479892761398e-05, + "loss": 0.0022, + "step": 1857 + }, + { + "epoch": 4.92, + "learning_rate": 2.576407506702413e-05, + "loss": 0.1167, + "step": 1858 + }, + { + "epoch": 4.92, + "learning_rate": 2.5750670241286868e-05, + "loss": 0.4013, + "step": 1859 + }, + { + "epoch": 4.92, + "learning_rate": 2.57372654155496e-05, + "loss": 0.2355, + "step": 1860 + }, + { + "epoch": 4.92, + "learning_rate": 2.572386058981233e-05, + "loss": 0.0076, + "step": 1861 + }, + { + "epoch": 4.93, + "learning_rate": 2.571045576407507e-05, + "loss": 0.1612, + "step": 1862 + }, + { + "epoch": 4.93, + "learning_rate": 2.56970509383378e-05, + "loss": 0.0047, + "step": 1863 + }, + { + "epoch": 4.93, + "learning_rate": 2.568364611260054e-05, + "loss": 0.1511, + "step": 1864 + }, + { + "epoch": 4.93, + "learning_rate": 2.5670241286863272e-05, + "loss": 0.011, + "step": 1865 + }, + { + "epoch": 4.94, + "learning_rate": 2.5656836461126004e-05, + "loss": 0.1761, + "step": 1866 + }, + { + "epoch": 4.94, + "learning_rate": 2.5643431635388742e-05, + "loss": 0.004, + "step": 1867 + }, + { + "epoch": 4.94, + "learning_rate": 2.5630026809651474e-05, + "loss": 0.0036, + "step": 1868 + }, + { + "epoch": 4.94, + "learning_rate": 2.5616621983914212e-05, + "loss": 0.4345, + "step": 1869 + }, + { + "epoch": 4.95, + "learning_rate": 2.5603217158176944e-05, + "loss": 0.0034, + "step": 1870 + }, + { + "epoch": 4.95, + "learning_rate": 2.558981233243968e-05, + "loss": 0.1269, + "step": 1871 + }, + { + "epoch": 4.95, + "learning_rate": 2.5576407506702414e-05, + "loss": 0.183, + "step": 1872 + }, + { + "epoch": 4.96, + "learning_rate": 2.556300268096515e-05, + "loss": 0.008, + "step": 1873 + }, + { + "epoch": 4.96, + "learning_rate": 2.5549597855227885e-05, + "loss": 0.0035, + "step": 1874 + }, + { + "epoch": 4.96, + "learning_rate": 2.553619302949062e-05, + "loss": 0.0133, + "step": 1875 + }, + { + "epoch": 4.96, + "learning_rate": 2.552278820375335e-05, + "loss": 0.2156, + "step": 1876 + }, + { + "epoch": 4.97, + "learning_rate": 2.550938337801609e-05, + "loss": 0.0043, + "step": 1877 + }, + { + "epoch": 4.97, + "learning_rate": 2.549597855227882e-05, + "loss": 0.2614, + "step": 1878 + }, + { + "epoch": 4.97, + "learning_rate": 2.548257372654156e-05, + "loss": 0.0208, + "step": 1879 + }, + { + "epoch": 4.97, + "learning_rate": 2.5469168900804292e-05, + "loss": 0.0228, + "step": 1880 + }, + { + "epoch": 4.98, + "learning_rate": 2.5455764075067024e-05, + "loss": 0.0105, + "step": 1881 + }, + { + "epoch": 4.98, + "learning_rate": 2.5442359249329762e-05, + "loss": 0.0108, + "step": 1882 + }, + { + "epoch": 4.98, + "learning_rate": 2.5428954423592494e-05, + "loss": 0.3828, + "step": 1883 + }, + { + "epoch": 4.98, + "learning_rate": 2.5415549597855232e-05, + "loss": 0.0093, + "step": 1884 + }, + { + "epoch": 4.99, + "learning_rate": 2.5402144772117964e-05, + "loss": 0.0231, + "step": 1885 + }, + { + "epoch": 4.99, + "learning_rate": 2.5388739946380696e-05, + "loss": 0.0082, + "step": 1886 + }, + { + "epoch": 4.99, + "learning_rate": 2.5375335120643434e-05, + "loss": 0.1796, + "step": 1887 + }, + { + "epoch": 4.99, + "learning_rate": 2.5361930294906166e-05, + "loss": 0.0753, + "step": 1888 + }, + { + "epoch": 5.0, + "learning_rate": 2.5348525469168905e-05, + "loss": 0.0142, + "step": 1889 + }, + { + "epoch": 5.0, + "learning_rate": 2.5335120643431636e-05, + "loss": 0.0047, + "step": 1890 + }, + { + "epoch": 5.0, + "eval_f1": 0.7775974025974025, + "eval_loss": 0.953689694404602, + "eval_runtime": 1.8696, + "eval_samples_per_second": 809.285, + "eval_steps_per_second": 50.814, + "step": 1890 + } + ], + "max_steps": 3780, + "num_train_epochs": 10, + "total_flos": 484242854999424.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1890/training_args.bin b/checkpoint-1890/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e04ed002938f760694506615e2c2b7be439a9c1 --- /dev/null +++ b/checkpoint-1890/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c01d8e107b4a20c0ba0f3692dae4e25d8f1dffe1d23d6e4f4bdf92b87ab5ea +size 3899 diff --git a/checkpoint-2268/config.json b/checkpoint-2268/config.json new file mode 100644 index 0000000000000000000000000000000000000000..364156e83c34ba8c6fcc66e875a05b1d1a9b4821 --- /dev/null +++ b/checkpoint-2268/config.json @@ -0,0 +1,30 @@ +{ + "_name_or_path": "distilbert-base-cased", + "activation": "gelu", + "architectures": [ + "DistilBertForSequenceClassification" + ], + "attention_dropout": 0.1, + "dim": 768, + "dropout": 0.1, + "hidden_dim": 3072, + "id2label": { + "0": "NO DISASTER", + "1": "DISASTER" + }, + "initializer_range": 0.02, + "max_position_embeddings": 512, + "model_type": "distilbert", + "n_heads": 12, + "n_layers": 6, + "output_past": true, + "pad_token_id": 0, + "problem_type": "single_label_classification", + "qa_dropout": 0.1, + "seq_classif_dropout": 0.2, + "sinusoidal_pos_embds": false, + "tie_weights_": true, + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "vocab_size": 28996 +} diff --git a/checkpoint-2268/optimizer.pt b/checkpoint-2268/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f6933fcc0224f39b51e49445f47fbffb49e98ee8 --- /dev/null +++ b/checkpoint-2268/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb00801aa9d975c647b6e878986a419cd3cade5e19595bca603265b4e575932c +size 526325317 diff --git a/checkpoint-2268/pytorch_model.bin b/checkpoint-2268/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..964cd7c7e2b6c22222c85877cc4bdc869d4b282e --- /dev/null +++ b/checkpoint-2268/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aecf74abbd12126248d1ab6b0343cb7f1a8e8ea550da4711db32b4c27f1a460b +size 263167661 diff --git a/checkpoint-2268/rng_state.pth b/checkpoint-2268/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bc16d44736dcbc13d247bd7a2d942bef880ab685 --- /dev/null +++ b/checkpoint-2268/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a0a36e41b0f6ae09acc41f6d5ee6a72f9ef8ae9bbab3384bc65d0290dfa4975 +size 14575 diff --git a/checkpoint-2268/scheduler.pt b/checkpoint-2268/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d00e540529585d98615368179023a2b3fcf5fb01 --- /dev/null +++ b/checkpoint-2268/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:587b3803bf070c7fd1a5809542f456d09391b47ebc2a66313fffa72515a3cc84 +size 627 diff --git a/checkpoint-2268/trainer_state.json b/checkpoint-2268/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..997b99380fa900498bf0417e6a05c9c54b757e71 --- /dev/null +++ b/checkpoint-2268/trainer_state.json @@ -0,0 +1,13678 @@ +{ + "best_metric": 0.40209120512008667, + "best_model_checkpoint": "./disaster-tweet-distilbert-classification/checkpoint-378", + "epoch": 6.0, + "global_step": 2268, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.7503, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.7789, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.7344, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 0.7709, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 5e-06, + "loss": 0.6884, + "step": 5 + }, + { + "epoch": 0.02, + "learning_rate": 6e-06, + "loss": 0.7087, + "step": 6 + }, + { + "epoch": 0.02, + "learning_rate": 7.000000000000001e-06, + "loss": 0.6655, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6978, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 9e-06, + "loss": 0.7435, + "step": 9 + }, + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 0.719, + "step": 10 + }, + { + "epoch": 0.03, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.7129, + "step": 11 + }, + { + "epoch": 0.03, + "learning_rate": 1.2e-05, + "loss": 0.7249, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.7436, + "step": 13 + }, + { + "epoch": 0.04, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.6886, + "step": 14 + }, + { + "epoch": 0.04, + "learning_rate": 1.5e-05, + "loss": 0.702, + "step": 15 + }, + { + "epoch": 0.04, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7105, + "step": 16 + }, + { + "epoch": 0.04, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.6709, + "step": 17 + }, + { + "epoch": 0.05, + "learning_rate": 1.8e-05, + "loss": 0.6767, + "step": 18 + }, + { + "epoch": 0.05, + "learning_rate": 1.9e-05, + "loss": 0.6784, + "step": 19 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 0.7016, + "step": 20 + }, + { + "epoch": 0.06, + "learning_rate": 2.1e-05, + "loss": 0.6308, + "step": 21 + }, + { + "epoch": 0.06, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.645, + "step": 22 + }, + { + "epoch": 0.06, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.6845, + "step": 23 + }, + { + "epoch": 0.06, + "learning_rate": 2.4e-05, + "loss": 0.6891, + "step": 24 + }, + { + "epoch": 0.07, + "learning_rate": 2.5e-05, + "loss": 0.5914, + "step": 25 + }, + { + "epoch": 0.07, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.6916, + "step": 26 + }, + { + "epoch": 0.07, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.5884, + "step": 27 + }, + { + "epoch": 0.07, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6587, + "step": 28 + }, + { + "epoch": 0.08, + "learning_rate": 2.9e-05, + "loss": 0.6008, + "step": 29 + }, + { + "epoch": 0.08, + "learning_rate": 3e-05, + "loss": 0.6717, + "step": 30 + }, + { + "epoch": 0.08, + "learning_rate": 3.1e-05, + "loss": 0.5795, + "step": 31 + }, + { + "epoch": 0.08, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6358, + "step": 32 + }, + { + "epoch": 0.09, + "learning_rate": 3.3e-05, + "loss": 0.7508, + "step": 33 + }, + { + "epoch": 0.09, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.6601, + "step": 34 + }, + { + "epoch": 0.09, + "learning_rate": 3.5e-05, + "loss": 0.6573, + "step": 35 + }, + { + "epoch": 0.1, + "learning_rate": 3.6e-05, + "loss": 0.5695, + "step": 36 + }, + { + "epoch": 0.1, + "learning_rate": 3.7e-05, + "loss": 0.5535, + "step": 37 + }, + { + "epoch": 0.1, + "learning_rate": 3.8e-05, + "loss": 0.5813, + "step": 38 + }, + { + "epoch": 0.1, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.5224, + "step": 39 + }, + { + "epoch": 0.11, + "learning_rate": 4e-05, + "loss": 0.4757, + "step": 40 + }, + { + "epoch": 0.11, + "learning_rate": 4.1e-05, + "loss": 0.5529, + "step": 41 + }, + { + "epoch": 0.11, + "learning_rate": 4.2e-05, + "loss": 0.4964, + "step": 42 + }, + { + "epoch": 0.11, + "learning_rate": 4.3e-05, + "loss": 0.4565, + "step": 43 + }, + { + "epoch": 0.12, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.5848, + "step": 44 + }, + { + "epoch": 0.12, + "learning_rate": 4.5e-05, + "loss": 0.7333, + "step": 45 + }, + { + "epoch": 0.12, + "learning_rate": 4.600000000000001e-05, + "loss": 0.5224, + "step": 46 + }, + { + "epoch": 0.12, + "learning_rate": 4.7e-05, + "loss": 0.4826, + "step": 47 + }, + { + "epoch": 0.13, + "learning_rate": 4.8e-05, + "loss": 0.4328, + "step": 48 + }, + { + "epoch": 0.13, + "learning_rate": 4.9e-05, + "loss": 0.2546, + "step": 49 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.2636, + "step": 50 + }, + { + "epoch": 0.13, + "learning_rate": 4.998659517426274e-05, + "loss": 0.4116, + "step": 51 + }, + { + "epoch": 0.14, + "learning_rate": 4.997319034852547e-05, + "loss": 0.3191, + "step": 52 + }, + { + "epoch": 0.14, + "learning_rate": 4.995978552278821e-05, + "loss": 0.2822, + "step": 53 + }, + { + "epoch": 0.14, + "learning_rate": 4.994638069705094e-05, + "loss": 0.8821, + "step": 54 + }, + { + "epoch": 0.15, + "learning_rate": 4.993297587131368e-05, + "loss": 0.483, + "step": 55 + }, + { + "epoch": 0.15, + "learning_rate": 4.9919571045576406e-05, + "loss": 0.7164, + "step": 56 + }, + { + "epoch": 0.15, + "learning_rate": 4.990616621983915e-05, + "loss": 0.4161, + "step": 57 + }, + { + "epoch": 0.15, + "learning_rate": 4.989276139410188e-05, + "loss": 0.2668, + "step": 58 + }, + { + "epoch": 0.16, + "learning_rate": 4.987935656836462e-05, + "loss": 0.5255, + "step": 59 + }, + { + "epoch": 0.16, + "learning_rate": 4.986595174262735e-05, + "loss": 0.3784, + "step": 60 + }, + { + "epoch": 0.16, + "learning_rate": 4.985254691689008e-05, + "loss": 0.5065, + "step": 61 + }, + { + "epoch": 0.16, + "learning_rate": 4.983914209115282e-05, + "loss": 0.1988, + "step": 62 + }, + { + "epoch": 0.17, + "learning_rate": 4.982573726541555e-05, + "loss": 0.4362, + "step": 63 + }, + { + "epoch": 0.17, + "learning_rate": 4.981233243967829e-05, + "loss": 0.6619, + "step": 64 + }, + { + "epoch": 0.17, + "learning_rate": 4.979892761394102e-05, + "loss": 0.3217, + "step": 65 + }, + { + "epoch": 0.17, + "learning_rate": 4.978552278820375e-05, + "loss": 0.2967, + "step": 66 + }, + { + "epoch": 0.18, + "learning_rate": 4.977211796246649e-05, + "loss": 0.2429, + "step": 67 + }, + { + "epoch": 0.18, + "learning_rate": 4.975871313672922e-05, + "loss": 0.6642, + "step": 68 + }, + { + "epoch": 0.18, + "learning_rate": 4.974530831099196e-05, + "loss": 0.56, + "step": 69 + }, + { + "epoch": 0.19, + "learning_rate": 4.973190348525469e-05, + "loss": 1.2979, + "step": 70 + }, + { + "epoch": 0.19, + "learning_rate": 4.9718498659517427e-05, + "loss": 0.5287, + "step": 71 + }, + { + "epoch": 0.19, + "learning_rate": 4.970509383378016e-05, + "loss": 0.4684, + "step": 72 + }, + { + "epoch": 0.19, + "learning_rate": 4.96916890080429e-05, + "loss": 0.283, + "step": 73 + }, + { + "epoch": 0.2, + "learning_rate": 4.967828418230563e-05, + "loss": 0.6818, + "step": 74 + }, + { + "epoch": 0.2, + "learning_rate": 4.966487935656837e-05, + "loss": 0.6141, + "step": 75 + }, + { + "epoch": 0.2, + "learning_rate": 4.96514745308311e-05, + "loss": 0.5046, + "step": 76 + }, + { + "epoch": 0.2, + "learning_rate": 4.963806970509384e-05, + "loss": 0.5266, + "step": 77 + }, + { + "epoch": 0.21, + "learning_rate": 4.962466487935657e-05, + "loss": 0.5944, + "step": 78 + }, + { + "epoch": 0.21, + "learning_rate": 4.961126005361931e-05, + "loss": 0.5631, + "step": 79 + }, + { + "epoch": 0.21, + "learning_rate": 4.959785522788204e-05, + "loss": 0.4791, + "step": 80 + }, + { + "epoch": 0.21, + "learning_rate": 4.958445040214477e-05, + "loss": 0.5645, + "step": 81 + }, + { + "epoch": 0.22, + "learning_rate": 4.957104557640751e-05, + "loss": 0.4349, + "step": 82 + }, + { + "epoch": 0.22, + "learning_rate": 4.955764075067024e-05, + "loss": 0.3865, + "step": 83 + }, + { + "epoch": 0.22, + "learning_rate": 4.954423592493298e-05, + "loss": 0.486, + "step": 84 + }, + { + "epoch": 0.22, + "learning_rate": 4.953083109919571e-05, + "loss": 0.2179, + "step": 85 + }, + { + "epoch": 0.23, + "learning_rate": 4.951742627345845e-05, + "loss": 0.3896, + "step": 86 + }, + { + "epoch": 0.23, + "learning_rate": 4.950402144772118e-05, + "loss": 0.4247, + "step": 87 + }, + { + "epoch": 0.23, + "learning_rate": 4.949061662198392e-05, + "loss": 0.4906, + "step": 88 + }, + { + "epoch": 0.24, + "learning_rate": 4.947721179624665e-05, + "loss": 0.4483, + "step": 89 + }, + { + "epoch": 0.24, + "learning_rate": 4.946380697050939e-05, + "loss": 0.557, + "step": 90 + }, + { + "epoch": 0.24, + "learning_rate": 4.9450402144772116e-05, + "loss": 0.7521, + "step": 91 + }, + { + "epoch": 0.24, + "learning_rate": 4.943699731903486e-05, + "loss": 0.3103, + "step": 92 + }, + { + "epoch": 0.25, + "learning_rate": 4.9423592493297586e-05, + "loss": 0.757, + "step": 93 + }, + { + "epoch": 0.25, + "learning_rate": 4.941018766756033e-05, + "loss": 0.8248, + "step": 94 + }, + { + "epoch": 0.25, + "learning_rate": 4.9396782841823056e-05, + "loss": 0.4591, + "step": 95 + }, + { + "epoch": 0.25, + "learning_rate": 4.938337801608579e-05, + "loss": 0.3912, + "step": 96 + }, + { + "epoch": 0.26, + "learning_rate": 4.9369973190348526e-05, + "loss": 0.5289, + "step": 97 + }, + { + "epoch": 0.26, + "learning_rate": 4.935656836461126e-05, + "loss": 0.3264, + "step": 98 + }, + { + "epoch": 0.26, + "learning_rate": 4.9343163538874e-05, + "loss": 0.2947, + "step": 99 + }, + { + "epoch": 0.26, + "learning_rate": 4.932975871313673e-05, + "loss": 0.2647, + "step": 100 + }, + { + "epoch": 0.27, + "learning_rate": 4.931635388739946e-05, + "loss": 0.3691, + "step": 101 + }, + { + "epoch": 0.27, + "learning_rate": 4.93029490616622e-05, + "loss": 0.4796, + "step": 102 + }, + { + "epoch": 0.27, + "learning_rate": 4.928954423592493e-05, + "loss": 0.4827, + "step": 103 + }, + { + "epoch": 0.28, + "learning_rate": 4.927613941018767e-05, + "loss": 0.2672, + "step": 104 + }, + { + "epoch": 0.28, + "learning_rate": 4.92627345844504e-05, + "loss": 0.7456, + "step": 105 + }, + { + "epoch": 0.28, + "learning_rate": 4.9249329758713136e-05, + "loss": 0.5206, + "step": 106 + }, + { + "epoch": 0.28, + "learning_rate": 4.923592493297587e-05, + "loss": 0.3576, + "step": 107 + }, + { + "epoch": 0.29, + "learning_rate": 4.9222520107238606e-05, + "loss": 0.2596, + "step": 108 + }, + { + "epoch": 0.29, + "learning_rate": 4.920911528150134e-05, + "loss": 0.4115, + "step": 109 + }, + { + "epoch": 0.29, + "learning_rate": 4.9195710455764076e-05, + "loss": 0.3481, + "step": 110 + }, + { + "epoch": 0.29, + "learning_rate": 4.918230563002681e-05, + "loss": 0.4387, + "step": 111 + }, + { + "epoch": 0.3, + "learning_rate": 4.916890080428955e-05, + "loss": 0.5023, + "step": 112 + }, + { + "epoch": 0.3, + "learning_rate": 4.915549597855228e-05, + "loss": 0.5916, + "step": 113 + }, + { + "epoch": 0.3, + "learning_rate": 4.914209115281502e-05, + "loss": 0.5467, + "step": 114 + }, + { + "epoch": 0.3, + "learning_rate": 4.912868632707775e-05, + "loss": 0.5631, + "step": 115 + }, + { + "epoch": 0.31, + "learning_rate": 4.911528150134049e-05, + "loss": 0.5512, + "step": 116 + }, + { + "epoch": 0.31, + "learning_rate": 4.910187667560322e-05, + "loss": 0.5546, + "step": 117 + }, + { + "epoch": 0.31, + "learning_rate": 4.908847184986595e-05, + "loss": 0.4209, + "step": 118 + }, + { + "epoch": 0.31, + "learning_rate": 4.907506702412869e-05, + "loss": 0.6064, + "step": 119 + }, + { + "epoch": 0.32, + "learning_rate": 4.906166219839142e-05, + "loss": 0.5301, + "step": 120 + }, + { + "epoch": 0.32, + "learning_rate": 4.904825737265416e-05, + "loss": 0.436, + "step": 121 + }, + { + "epoch": 0.32, + "learning_rate": 4.903485254691689e-05, + "loss": 0.726, + "step": 122 + }, + { + "epoch": 0.33, + "learning_rate": 4.9021447721179626e-05, + "loss": 0.5288, + "step": 123 + }, + { + "epoch": 0.33, + "learning_rate": 4.900804289544236e-05, + "loss": 0.4887, + "step": 124 + }, + { + "epoch": 0.33, + "learning_rate": 4.8994638069705097e-05, + "loss": 0.3752, + "step": 125 + }, + { + "epoch": 0.33, + "learning_rate": 4.898123324396783e-05, + "loss": 0.4895, + "step": 126 + }, + { + "epoch": 0.34, + "learning_rate": 4.896782841823057e-05, + "loss": 0.5046, + "step": 127 + }, + { + "epoch": 0.34, + "learning_rate": 4.8954423592493295e-05, + "loss": 0.3953, + "step": 128 + }, + { + "epoch": 0.34, + "learning_rate": 4.894101876675604e-05, + "loss": 0.2015, + "step": 129 + }, + { + "epoch": 0.34, + "learning_rate": 4.8927613941018765e-05, + "loss": 0.5165, + "step": 130 + }, + { + "epoch": 0.35, + "learning_rate": 4.891420911528151e-05, + "loss": 0.4237, + "step": 131 + }, + { + "epoch": 0.35, + "learning_rate": 4.8900804289544236e-05, + "loss": 0.239, + "step": 132 + }, + { + "epoch": 0.35, + "learning_rate": 4.888739946380697e-05, + "loss": 0.5515, + "step": 133 + }, + { + "epoch": 0.35, + "learning_rate": 4.8873994638069706e-05, + "loss": 0.303, + "step": 134 + }, + { + "epoch": 0.36, + "learning_rate": 4.886058981233244e-05, + "loss": 0.2867, + "step": 135 + }, + { + "epoch": 0.36, + "learning_rate": 4.8847184986595176e-05, + "loss": 0.6756, + "step": 136 + }, + { + "epoch": 0.36, + "learning_rate": 4.883378016085791e-05, + "loss": 0.4996, + "step": 137 + }, + { + "epoch": 0.37, + "learning_rate": 4.8820375335120646e-05, + "loss": 0.2798, + "step": 138 + }, + { + "epoch": 0.37, + "learning_rate": 4.880697050938338e-05, + "loss": 0.8877, + "step": 139 + }, + { + "epoch": 0.37, + "learning_rate": 4.879356568364612e-05, + "loss": 0.5022, + "step": 140 + }, + { + "epoch": 0.37, + "learning_rate": 4.878016085790885e-05, + "loss": 0.2177, + "step": 141 + }, + { + "epoch": 0.38, + "learning_rate": 4.876675603217159e-05, + "loss": 0.8708, + "step": 142 + }, + { + "epoch": 0.38, + "learning_rate": 4.8753351206434315e-05, + "loss": 0.719, + "step": 143 + }, + { + "epoch": 0.38, + "learning_rate": 4.873994638069706e-05, + "loss": 0.562, + "step": 144 + }, + { + "epoch": 0.38, + "learning_rate": 4.8726541554959786e-05, + "loss": 0.492, + "step": 145 + }, + { + "epoch": 0.39, + "learning_rate": 4.871313672922253e-05, + "loss": 0.4637, + "step": 146 + }, + { + "epoch": 0.39, + "learning_rate": 4.8699731903485256e-05, + "loss": 0.4132, + "step": 147 + }, + { + "epoch": 0.39, + "learning_rate": 4.868632707774799e-05, + "loss": 0.2889, + "step": 148 + }, + { + "epoch": 0.39, + "learning_rate": 4.8672922252010726e-05, + "loss": 0.2213, + "step": 149 + }, + { + "epoch": 0.4, + "learning_rate": 4.865951742627346e-05, + "loss": 0.9268, + "step": 150 + }, + { + "epoch": 0.4, + "learning_rate": 4.8646112600536196e-05, + "loss": 0.2852, + "step": 151 + }, + { + "epoch": 0.4, + "learning_rate": 4.863270777479893e-05, + "loss": 0.4599, + "step": 152 + }, + { + "epoch": 0.4, + "learning_rate": 4.861930294906166e-05, + "loss": 0.1913, + "step": 153 + }, + { + "epoch": 0.41, + "learning_rate": 4.86058981233244e-05, + "loss": 0.4488, + "step": 154 + }, + { + "epoch": 0.41, + "learning_rate": 4.859249329758713e-05, + "loss": 0.9022, + "step": 155 + }, + { + "epoch": 0.41, + "learning_rate": 4.857908847184987e-05, + "loss": 0.5221, + "step": 156 + }, + { + "epoch": 0.42, + "learning_rate": 4.85656836461126e-05, + "loss": 0.2394, + "step": 157 + }, + { + "epoch": 0.42, + "learning_rate": 4.8552278820375336e-05, + "loss": 0.3332, + "step": 158 + }, + { + "epoch": 0.42, + "learning_rate": 4.853887399463807e-05, + "loss": 0.4015, + "step": 159 + }, + { + "epoch": 0.42, + "learning_rate": 4.8525469168900806e-05, + "loss": 0.4461, + "step": 160 + }, + { + "epoch": 0.43, + "learning_rate": 4.851206434316354e-05, + "loss": 0.337, + "step": 161 + }, + { + "epoch": 0.43, + "learning_rate": 4.8498659517426276e-05, + "loss": 0.4908, + "step": 162 + }, + { + "epoch": 0.43, + "learning_rate": 4.848525469168901e-05, + "loss": 0.526, + "step": 163 + }, + { + "epoch": 0.43, + "learning_rate": 4.8471849865951746e-05, + "loss": 0.5262, + "step": 164 + }, + { + "epoch": 0.44, + "learning_rate": 4.845844504021448e-05, + "loss": 0.6818, + "step": 165 + }, + { + "epoch": 0.44, + "learning_rate": 4.8445040214477217e-05, + "loss": 0.3154, + "step": 166 + }, + { + "epoch": 0.44, + "learning_rate": 4.843163538873995e-05, + "loss": 0.5963, + "step": 167 + }, + { + "epoch": 0.44, + "learning_rate": 4.841823056300268e-05, + "loss": 0.4451, + "step": 168 + }, + { + "epoch": 0.45, + "learning_rate": 4.840482573726542e-05, + "loss": 0.5969, + "step": 169 + }, + { + "epoch": 0.45, + "learning_rate": 4.839142091152815e-05, + "loss": 0.438, + "step": 170 + }, + { + "epoch": 0.45, + "learning_rate": 4.837801608579089e-05, + "loss": 0.4827, + "step": 171 + }, + { + "epoch": 0.46, + "learning_rate": 4.836461126005362e-05, + "loss": 0.2029, + "step": 172 + }, + { + "epoch": 0.46, + "learning_rate": 4.8351206434316356e-05, + "loss": 0.5195, + "step": 173 + }, + { + "epoch": 0.46, + "learning_rate": 4.833780160857909e-05, + "loss": 0.517, + "step": 174 + }, + { + "epoch": 0.46, + "learning_rate": 4.8324396782841826e-05, + "loss": 0.5532, + "step": 175 + }, + { + "epoch": 0.47, + "learning_rate": 4.831099195710456e-05, + "loss": 0.4198, + "step": 176 + }, + { + "epoch": 0.47, + "learning_rate": 4.8297587131367296e-05, + "loss": 0.8386, + "step": 177 + }, + { + "epoch": 0.47, + "learning_rate": 4.8284182305630025e-05, + "loss": 0.575, + "step": 178 + }, + { + "epoch": 0.47, + "learning_rate": 4.8270777479892766e-05, + "loss": 0.6156, + "step": 179 + }, + { + "epoch": 0.48, + "learning_rate": 4.8257372654155495e-05, + "loss": 0.7044, + "step": 180 + }, + { + "epoch": 0.48, + "learning_rate": 4.824396782841824e-05, + "loss": 0.5712, + "step": 181 + }, + { + "epoch": 0.48, + "learning_rate": 4.8230563002680965e-05, + "loss": 0.34, + "step": 182 + }, + { + "epoch": 0.48, + "learning_rate": 4.82171581769437e-05, + "loss": 0.5773, + "step": 183 + }, + { + "epoch": 0.49, + "learning_rate": 4.8203753351206435e-05, + "loss": 0.546, + "step": 184 + }, + { + "epoch": 0.49, + "learning_rate": 4.819034852546917e-05, + "loss": 0.3955, + "step": 185 + }, + { + "epoch": 0.49, + "learning_rate": 4.8176943699731906e-05, + "loss": 0.5921, + "step": 186 + }, + { + "epoch": 0.49, + "learning_rate": 4.816353887399464e-05, + "loss": 0.3108, + "step": 187 + }, + { + "epoch": 0.5, + "learning_rate": 4.8150134048257376e-05, + "loss": 0.5469, + "step": 188 + }, + { + "epoch": 0.5, + "learning_rate": 4.813672922252011e-05, + "loss": 0.64, + "step": 189 + }, + { + "epoch": 0.5, + "learning_rate": 4.8123324396782846e-05, + "loss": 0.5153, + "step": 190 + }, + { + "epoch": 0.51, + "learning_rate": 4.810991957104558e-05, + "loss": 0.4719, + "step": 191 + }, + { + "epoch": 0.51, + "learning_rate": 4.8096514745308316e-05, + "loss": 0.52, + "step": 192 + }, + { + "epoch": 0.51, + "learning_rate": 4.8083109919571045e-05, + "loss": 0.5114, + "step": 193 + }, + { + "epoch": 0.51, + "learning_rate": 4.806970509383379e-05, + "loss": 0.5469, + "step": 194 + }, + { + "epoch": 0.52, + "learning_rate": 4.8056300268096515e-05, + "loss": 0.3435, + "step": 195 + }, + { + "epoch": 0.52, + "learning_rate": 4.804289544235926e-05, + "loss": 0.6469, + "step": 196 + }, + { + "epoch": 0.52, + "learning_rate": 4.8029490616621985e-05, + "loss": 0.6595, + "step": 197 + }, + { + "epoch": 0.52, + "learning_rate": 4.801608579088472e-05, + "loss": 0.5503, + "step": 198 + }, + { + "epoch": 0.53, + "learning_rate": 4.8002680965147456e-05, + "loss": 0.3799, + "step": 199 + }, + { + "epoch": 0.53, + "learning_rate": 4.798927613941019e-05, + "loss": 0.417, + "step": 200 + }, + { + "epoch": 0.53, + "learning_rate": 4.7975871313672926e-05, + "loss": 0.5281, + "step": 201 + }, + { + "epoch": 0.53, + "learning_rate": 4.796246648793566e-05, + "loss": 0.3439, + "step": 202 + }, + { + "epoch": 0.54, + "learning_rate": 4.794906166219839e-05, + "loss": 0.5777, + "step": 203 + }, + { + "epoch": 0.54, + "learning_rate": 4.793565683646113e-05, + "loss": 0.5286, + "step": 204 + }, + { + "epoch": 0.54, + "learning_rate": 4.792225201072386e-05, + "loss": 0.4302, + "step": 205 + }, + { + "epoch": 0.54, + "learning_rate": 4.79088471849866e-05, + "loss": 0.5413, + "step": 206 + }, + { + "epoch": 0.55, + "learning_rate": 4.789544235924933e-05, + "loss": 0.3087, + "step": 207 + }, + { + "epoch": 0.55, + "learning_rate": 4.7882037533512065e-05, + "loss": 0.4385, + "step": 208 + }, + { + "epoch": 0.55, + "learning_rate": 4.78686327077748e-05, + "loss": 0.7137, + "step": 209 + }, + { + "epoch": 0.56, + "learning_rate": 4.7855227882037535e-05, + "loss": 0.6278, + "step": 210 + }, + { + "epoch": 0.56, + "learning_rate": 4.784182305630027e-05, + "loss": 0.2832, + "step": 211 + }, + { + "epoch": 0.56, + "learning_rate": 4.7828418230563005e-05, + "loss": 0.5899, + "step": 212 + }, + { + "epoch": 0.56, + "learning_rate": 4.7815013404825734e-05, + "loss": 0.4421, + "step": 213 + }, + { + "epoch": 0.57, + "learning_rate": 4.7801608579088476e-05, + "loss": 0.3411, + "step": 214 + }, + { + "epoch": 0.57, + "learning_rate": 4.7788203753351204e-05, + "loss": 0.4236, + "step": 215 + }, + { + "epoch": 0.57, + "learning_rate": 4.7774798927613946e-05, + "loss": 0.4444, + "step": 216 + }, + { + "epoch": 0.57, + "learning_rate": 4.7761394101876674e-05, + "loss": 0.4765, + "step": 217 + }, + { + "epoch": 0.58, + "learning_rate": 4.774798927613941e-05, + "loss": 0.2453, + "step": 218 + }, + { + "epoch": 0.58, + "learning_rate": 4.7734584450402145e-05, + "loss": 0.2765, + "step": 219 + }, + { + "epoch": 0.58, + "learning_rate": 4.772117962466488e-05, + "loss": 0.2075, + "step": 220 + }, + { + "epoch": 0.58, + "learning_rate": 4.7707774798927615e-05, + "loss": 0.5905, + "step": 221 + }, + { + "epoch": 0.59, + "learning_rate": 4.769436997319035e-05, + "loss": 0.3457, + "step": 222 + }, + { + "epoch": 0.59, + "learning_rate": 4.7680965147453085e-05, + "loss": 0.5986, + "step": 223 + }, + { + "epoch": 0.59, + "learning_rate": 4.766756032171582e-05, + "loss": 0.3881, + "step": 224 + }, + { + "epoch": 0.6, + "learning_rate": 4.7654155495978555e-05, + "loss": 0.5655, + "step": 225 + }, + { + "epoch": 0.6, + "learning_rate": 4.764075067024129e-05, + "loss": 0.6156, + "step": 226 + }, + { + "epoch": 0.6, + "learning_rate": 4.7627345844504026e-05, + "loss": 0.4784, + "step": 227 + }, + { + "epoch": 0.6, + "learning_rate": 4.7613941018766754e-05, + "loss": 0.5195, + "step": 228 + }, + { + "epoch": 0.61, + "learning_rate": 4.7600536193029496e-05, + "loss": 0.66, + "step": 229 + }, + { + "epoch": 0.61, + "learning_rate": 4.7587131367292224e-05, + "loss": 0.4104, + "step": 230 + }, + { + "epoch": 0.61, + "learning_rate": 4.7573726541554966e-05, + "loss": 0.3478, + "step": 231 + }, + { + "epoch": 0.61, + "learning_rate": 4.7560321715817695e-05, + "loss": 0.2037, + "step": 232 + }, + { + "epoch": 0.62, + "learning_rate": 4.754691689008043e-05, + "loss": 0.75, + "step": 233 + }, + { + "epoch": 0.62, + "learning_rate": 4.7533512064343165e-05, + "loss": 0.4237, + "step": 234 + }, + { + "epoch": 0.62, + "learning_rate": 4.75201072386059e-05, + "loss": 0.2372, + "step": 235 + }, + { + "epoch": 0.62, + "learning_rate": 4.7506702412868635e-05, + "loss": 0.7874, + "step": 236 + }, + { + "epoch": 0.63, + "learning_rate": 4.749329758713137e-05, + "loss": 0.5751, + "step": 237 + }, + { + "epoch": 0.63, + "learning_rate": 4.7479892761394105e-05, + "loss": 0.5801, + "step": 238 + }, + { + "epoch": 0.63, + "learning_rate": 4.746648793565684e-05, + "loss": 0.4983, + "step": 239 + }, + { + "epoch": 0.63, + "learning_rate": 4.745308310991957e-05, + "loss": 0.4215, + "step": 240 + }, + { + "epoch": 0.64, + "learning_rate": 4.743967828418231e-05, + "loss": 0.3655, + "step": 241 + }, + { + "epoch": 0.64, + "learning_rate": 4.742627345844504e-05, + "loss": 0.523, + "step": 242 + }, + { + "epoch": 0.64, + "learning_rate": 4.741286863270778e-05, + "loss": 0.5952, + "step": 243 + }, + { + "epoch": 0.65, + "learning_rate": 4.739946380697051e-05, + "loss": 0.4226, + "step": 244 + }, + { + "epoch": 0.65, + "learning_rate": 4.7386058981233244e-05, + "loss": 0.316, + "step": 245 + }, + { + "epoch": 0.65, + "learning_rate": 4.737265415549598e-05, + "loss": 0.5096, + "step": 246 + }, + { + "epoch": 0.65, + "learning_rate": 4.7359249329758715e-05, + "loss": 0.435, + "step": 247 + }, + { + "epoch": 0.66, + "learning_rate": 4.734584450402145e-05, + "loss": 0.7516, + "step": 248 + }, + { + "epoch": 0.66, + "learning_rate": 4.7332439678284185e-05, + "loss": 0.531, + "step": 249 + }, + { + "epoch": 0.66, + "learning_rate": 4.731903485254692e-05, + "loss": 0.4096, + "step": 250 + }, + { + "epoch": 0.66, + "learning_rate": 4.7305630026809655e-05, + "loss": 0.387, + "step": 251 + }, + { + "epoch": 0.67, + "learning_rate": 4.729222520107239e-05, + "loss": 0.5468, + "step": 252 + }, + { + "epoch": 0.67, + "learning_rate": 4.7278820375335125e-05, + "loss": 0.4613, + "step": 253 + }, + { + "epoch": 0.67, + "learning_rate": 4.726541554959786e-05, + "loss": 0.8437, + "step": 254 + }, + { + "epoch": 0.67, + "learning_rate": 4.725201072386059e-05, + "loss": 0.522, + "step": 255 + }, + { + "epoch": 0.68, + "learning_rate": 4.723860589812333e-05, + "loss": 0.3922, + "step": 256 + }, + { + "epoch": 0.68, + "learning_rate": 4.722520107238606e-05, + "loss": 0.5114, + "step": 257 + }, + { + "epoch": 0.68, + "learning_rate": 4.72117962466488e-05, + "loss": 0.6148, + "step": 258 + }, + { + "epoch": 0.69, + "learning_rate": 4.719839142091153e-05, + "loss": 0.4578, + "step": 259 + }, + { + "epoch": 0.69, + "learning_rate": 4.7184986595174265e-05, + "loss": 0.6286, + "step": 260 + }, + { + "epoch": 0.69, + "learning_rate": 4.7171581769437e-05, + "loss": 0.5883, + "step": 261 + }, + { + "epoch": 0.69, + "learning_rate": 4.7158176943699735e-05, + "loss": 0.5634, + "step": 262 + }, + { + "epoch": 0.7, + "learning_rate": 4.714477211796247e-05, + "loss": 0.4085, + "step": 263 + }, + { + "epoch": 0.7, + "learning_rate": 4.7131367292225205e-05, + "loss": 0.2988, + "step": 264 + }, + { + "epoch": 0.7, + "learning_rate": 4.7117962466487934e-05, + "loss": 0.6353, + "step": 265 + }, + { + "epoch": 0.7, + "learning_rate": 4.7104557640750675e-05, + "loss": 0.4598, + "step": 266 + }, + { + "epoch": 0.71, + "learning_rate": 4.7091152815013404e-05, + "loss": 0.5072, + "step": 267 + }, + { + "epoch": 0.71, + "learning_rate": 4.7077747989276146e-05, + "loss": 0.49, + "step": 268 + }, + { + "epoch": 0.71, + "learning_rate": 4.7064343163538874e-05, + "loss": 0.7225, + "step": 269 + }, + { + "epoch": 0.71, + "learning_rate": 4.705093833780161e-05, + "loss": 0.5332, + "step": 270 + }, + { + "epoch": 0.72, + "learning_rate": 4.7037533512064344e-05, + "loss": 0.6064, + "step": 271 + }, + { + "epoch": 0.72, + "learning_rate": 4.702412868632708e-05, + "loss": 0.3518, + "step": 272 + }, + { + "epoch": 0.72, + "learning_rate": 4.7010723860589815e-05, + "loss": 0.3673, + "step": 273 + }, + { + "epoch": 0.72, + "learning_rate": 4.699731903485255e-05, + "loss": 0.4688, + "step": 274 + }, + { + "epoch": 0.73, + "learning_rate": 4.6983914209115285e-05, + "loss": 0.5389, + "step": 275 + }, + { + "epoch": 0.73, + "learning_rate": 4.697050938337802e-05, + "loss": 0.374, + "step": 276 + }, + { + "epoch": 0.73, + "learning_rate": 4.6957104557640755e-05, + "loss": 0.475, + "step": 277 + }, + { + "epoch": 0.74, + "learning_rate": 4.694369973190349e-05, + "loss": 0.5397, + "step": 278 + }, + { + "epoch": 0.74, + "learning_rate": 4.6930294906166225e-05, + "loss": 0.3821, + "step": 279 + }, + { + "epoch": 0.74, + "learning_rate": 4.6916890080428954e-05, + "loss": 0.3372, + "step": 280 + }, + { + "epoch": 0.74, + "learning_rate": 4.6903485254691696e-05, + "loss": 0.6652, + "step": 281 + }, + { + "epoch": 0.75, + "learning_rate": 4.6890080428954424e-05, + "loss": 0.2894, + "step": 282 + }, + { + "epoch": 0.75, + "learning_rate": 4.6876675603217166e-05, + "loss": 0.5639, + "step": 283 + }, + { + "epoch": 0.75, + "learning_rate": 4.6863270777479894e-05, + "loss": 0.353, + "step": 284 + }, + { + "epoch": 0.75, + "learning_rate": 4.684986595174263e-05, + "loss": 0.2932, + "step": 285 + }, + { + "epoch": 0.76, + "learning_rate": 4.6836461126005364e-05, + "loss": 0.467, + "step": 286 + }, + { + "epoch": 0.76, + "learning_rate": 4.68230563002681e-05, + "loss": 0.4732, + "step": 287 + }, + { + "epoch": 0.76, + "learning_rate": 4.6809651474530835e-05, + "loss": 0.1808, + "step": 288 + }, + { + "epoch": 0.76, + "learning_rate": 4.679624664879357e-05, + "loss": 0.6031, + "step": 289 + }, + { + "epoch": 0.77, + "learning_rate": 4.67828418230563e-05, + "loss": 0.2555, + "step": 290 + }, + { + "epoch": 0.77, + "learning_rate": 4.676943699731904e-05, + "loss": 0.4041, + "step": 291 + }, + { + "epoch": 0.77, + "learning_rate": 4.675603217158177e-05, + "loss": 0.7822, + "step": 292 + }, + { + "epoch": 0.78, + "learning_rate": 4.674262734584451e-05, + "loss": 0.138, + "step": 293 + }, + { + "epoch": 0.78, + "learning_rate": 4.672922252010724e-05, + "loss": 0.2746, + "step": 294 + }, + { + "epoch": 0.78, + "learning_rate": 4.6715817694369974e-05, + "loss": 0.5835, + "step": 295 + }, + { + "epoch": 0.78, + "learning_rate": 4.670241286863271e-05, + "loss": 0.2367, + "step": 296 + }, + { + "epoch": 0.79, + "learning_rate": 4.6689008042895444e-05, + "loss": 0.3247, + "step": 297 + }, + { + "epoch": 0.79, + "learning_rate": 4.667560321715818e-05, + "loss": 0.306, + "step": 298 + }, + { + "epoch": 0.79, + "learning_rate": 4.6662198391420914e-05, + "loss": 0.2825, + "step": 299 + }, + { + "epoch": 0.79, + "learning_rate": 4.664879356568364e-05, + "loss": 0.6102, + "step": 300 + }, + { + "epoch": 0.8, + "learning_rate": 4.6635388739946385e-05, + "loss": 0.3613, + "step": 301 + }, + { + "epoch": 0.8, + "learning_rate": 4.662198391420911e-05, + "loss": 0.6327, + "step": 302 + }, + { + "epoch": 0.8, + "learning_rate": 4.6608579088471855e-05, + "loss": 0.4059, + "step": 303 + }, + { + "epoch": 0.8, + "learning_rate": 4.659517426273458e-05, + "loss": 0.4027, + "step": 304 + }, + { + "epoch": 0.81, + "learning_rate": 4.658176943699732e-05, + "loss": 0.9133, + "step": 305 + }, + { + "epoch": 0.81, + "learning_rate": 4.6568364611260054e-05, + "loss": 0.1869, + "step": 306 + }, + { + "epoch": 0.81, + "learning_rate": 4.655495978552279e-05, + "loss": 0.3987, + "step": 307 + }, + { + "epoch": 0.81, + "learning_rate": 4.6541554959785524e-05, + "loss": 0.6114, + "step": 308 + }, + { + "epoch": 0.82, + "learning_rate": 4.652815013404826e-05, + "loss": 0.4406, + "step": 309 + }, + { + "epoch": 0.82, + "learning_rate": 4.6514745308310994e-05, + "loss": 0.2954, + "step": 310 + }, + { + "epoch": 0.82, + "learning_rate": 4.650134048257373e-05, + "loss": 0.5067, + "step": 311 + }, + { + "epoch": 0.83, + "learning_rate": 4.6487935656836464e-05, + "loss": 0.3985, + "step": 312 + }, + { + "epoch": 0.83, + "learning_rate": 4.64745308310992e-05, + "loss": 0.3756, + "step": 313 + }, + { + "epoch": 0.83, + "learning_rate": 4.6461126005361935e-05, + "loss": 0.2618, + "step": 314 + }, + { + "epoch": 0.83, + "learning_rate": 4.644772117962466e-05, + "loss": 0.4992, + "step": 315 + }, + { + "epoch": 0.84, + "learning_rate": 4.6434316353887405e-05, + "loss": 0.8224, + "step": 316 + }, + { + "epoch": 0.84, + "learning_rate": 4.642091152815013e-05, + "loss": 0.3425, + "step": 317 + }, + { + "epoch": 0.84, + "learning_rate": 4.6407506702412875e-05, + "loss": 0.4062, + "step": 318 + }, + { + "epoch": 0.84, + "learning_rate": 4.6394101876675603e-05, + "loss": 0.4748, + "step": 319 + }, + { + "epoch": 0.85, + "learning_rate": 4.638069705093834e-05, + "loss": 0.6857, + "step": 320 + }, + { + "epoch": 0.85, + "learning_rate": 4.6367292225201074e-05, + "loss": 0.5368, + "step": 321 + }, + { + "epoch": 0.85, + "learning_rate": 4.635388739946381e-05, + "loss": 0.5571, + "step": 322 + }, + { + "epoch": 0.85, + "learning_rate": 4.6340482573726544e-05, + "loss": 0.3045, + "step": 323 + }, + { + "epoch": 0.86, + "learning_rate": 4.632707774798928e-05, + "loss": 0.3189, + "step": 324 + }, + { + "epoch": 0.86, + "learning_rate": 4.631367292225201e-05, + "loss": 0.3033, + "step": 325 + }, + { + "epoch": 0.86, + "learning_rate": 4.630026809651475e-05, + "loss": 0.4659, + "step": 326 + }, + { + "epoch": 0.87, + "learning_rate": 4.628686327077748e-05, + "loss": 0.4058, + "step": 327 + }, + { + "epoch": 0.87, + "learning_rate": 4.627345844504022e-05, + "loss": 0.6745, + "step": 328 + }, + { + "epoch": 0.87, + "learning_rate": 4.626005361930295e-05, + "loss": 0.3259, + "step": 329 + }, + { + "epoch": 0.87, + "learning_rate": 4.624664879356568e-05, + "loss": 0.5126, + "step": 330 + }, + { + "epoch": 0.88, + "learning_rate": 4.623324396782842e-05, + "loss": 0.2759, + "step": 331 + }, + { + "epoch": 0.88, + "learning_rate": 4.621983914209115e-05, + "loss": 0.2512, + "step": 332 + }, + { + "epoch": 0.88, + "learning_rate": 4.620643431635389e-05, + "loss": 0.3046, + "step": 333 + }, + { + "epoch": 0.88, + "learning_rate": 4.6193029490616624e-05, + "loss": 0.3931, + "step": 334 + }, + { + "epoch": 0.89, + "learning_rate": 4.617962466487936e-05, + "loss": 0.4838, + "step": 335 + }, + { + "epoch": 0.89, + "learning_rate": 4.6166219839142094e-05, + "loss": 0.2925, + "step": 336 + }, + { + "epoch": 0.89, + "learning_rate": 4.615281501340483e-05, + "loss": 0.4481, + "step": 337 + }, + { + "epoch": 0.89, + "learning_rate": 4.6139410187667564e-05, + "loss": 0.4528, + "step": 338 + }, + { + "epoch": 0.9, + "learning_rate": 4.61260053619303e-05, + "loss": 0.2934, + "step": 339 + }, + { + "epoch": 0.9, + "learning_rate": 4.611260053619303e-05, + "loss": 0.609, + "step": 340 + }, + { + "epoch": 0.9, + "learning_rate": 4.609919571045577e-05, + "loss": 0.8988, + "step": 341 + }, + { + "epoch": 0.9, + "learning_rate": 4.60857908847185e-05, + "loss": 1.1222, + "step": 342 + }, + { + "epoch": 0.91, + "learning_rate": 4.607238605898124e-05, + "loss": 0.3265, + "step": 343 + }, + { + "epoch": 0.91, + "learning_rate": 4.605898123324397e-05, + "loss": 0.4722, + "step": 344 + }, + { + "epoch": 0.91, + "learning_rate": 4.60455764075067e-05, + "loss": 0.2791, + "step": 345 + }, + { + "epoch": 0.92, + "learning_rate": 4.603217158176944e-05, + "loss": 0.4183, + "step": 346 + }, + { + "epoch": 0.92, + "learning_rate": 4.6018766756032174e-05, + "loss": 0.5323, + "step": 347 + }, + { + "epoch": 0.92, + "learning_rate": 4.600536193029491e-05, + "loss": 0.6108, + "step": 348 + }, + { + "epoch": 0.92, + "learning_rate": 4.5991957104557644e-05, + "loss": 0.2875, + "step": 349 + }, + { + "epoch": 0.93, + "learning_rate": 4.597855227882037e-05, + "loss": 0.6642, + "step": 350 + }, + { + "epoch": 0.93, + "learning_rate": 4.5965147453083114e-05, + "loss": 0.5244, + "step": 351 + }, + { + "epoch": 0.93, + "learning_rate": 4.595174262734584e-05, + "loss": 0.5562, + "step": 352 + }, + { + "epoch": 0.93, + "learning_rate": 4.5938337801608584e-05, + "loss": 0.3972, + "step": 353 + }, + { + "epoch": 0.94, + "learning_rate": 4.592493297587131e-05, + "loss": 0.4532, + "step": 354 + }, + { + "epoch": 0.94, + "learning_rate": 4.591152815013405e-05, + "loss": 0.3368, + "step": 355 + }, + { + "epoch": 0.94, + "learning_rate": 4.589812332439678e-05, + "loss": 0.171, + "step": 356 + }, + { + "epoch": 0.94, + "learning_rate": 4.588471849865952e-05, + "loss": 0.4036, + "step": 357 + }, + { + "epoch": 0.95, + "learning_rate": 4.587131367292225e-05, + "loss": 0.4305, + "step": 358 + }, + { + "epoch": 0.95, + "learning_rate": 4.585790884718499e-05, + "loss": 0.2643, + "step": 359 + }, + { + "epoch": 0.95, + "learning_rate": 4.5844504021447723e-05, + "loss": 0.3782, + "step": 360 + }, + { + "epoch": 0.96, + "learning_rate": 4.583109919571046e-05, + "loss": 0.1673, + "step": 361 + }, + { + "epoch": 0.96, + "learning_rate": 4.5817694369973194e-05, + "loss": 0.3969, + "step": 362 + }, + { + "epoch": 0.96, + "learning_rate": 4.580428954423593e-05, + "loss": 0.3249, + "step": 363 + }, + { + "epoch": 0.96, + "learning_rate": 4.5790884718498664e-05, + "loss": 0.1656, + "step": 364 + }, + { + "epoch": 0.97, + "learning_rate": 4.57774798927614e-05, + "loss": 0.4551, + "step": 365 + }, + { + "epoch": 0.97, + "learning_rate": 4.5764075067024134e-05, + "loss": 0.6075, + "step": 366 + }, + { + "epoch": 0.97, + "learning_rate": 4.575067024128686e-05, + "loss": 0.4699, + "step": 367 + }, + { + "epoch": 0.97, + "learning_rate": 4.5737265415549605e-05, + "loss": 0.5752, + "step": 368 + }, + { + "epoch": 0.98, + "learning_rate": 4.572386058981233e-05, + "loss": 0.3114, + "step": 369 + }, + { + "epoch": 0.98, + "learning_rate": 4.5710455764075075e-05, + "loss": 0.7407, + "step": 370 + }, + { + "epoch": 0.98, + "learning_rate": 4.56970509383378e-05, + "loss": 0.6427, + "step": 371 + }, + { + "epoch": 0.98, + "learning_rate": 4.568364611260054e-05, + "loss": 0.5021, + "step": 372 + }, + { + "epoch": 0.99, + "learning_rate": 4.5670241286863273e-05, + "loss": 0.4209, + "step": 373 + }, + { + "epoch": 0.99, + "learning_rate": 4.565683646112601e-05, + "loss": 0.5957, + "step": 374 + }, + { + "epoch": 0.99, + "learning_rate": 4.5643431635388744e-05, + "loss": 0.495, + "step": 375 + }, + { + "epoch": 0.99, + "learning_rate": 4.563002680965148e-05, + "loss": 0.3101, + "step": 376 + }, + { + "epoch": 1.0, + "learning_rate": 4.561662198391421e-05, + "loss": 0.472, + "step": 377 + }, + { + "epoch": 1.0, + "learning_rate": 4.560321715817695e-05, + "loss": 0.4607, + "step": 378 + }, + { + "epoch": 1.0, + "eval_f1": 0.7914963205233032, + "eval_loss": 0.40209120512008667, + "eval_runtime": 2.1585, + "eval_samples_per_second": 700.949, + "eval_steps_per_second": 44.012, + "step": 378 + }, + { + "epoch": 1.0, + "learning_rate": 4.558981233243968e-05, + "loss": 0.2422, + "step": 379 + }, + { + "epoch": 1.01, + "learning_rate": 4.557640750670242e-05, + "loss": 0.2074, + "step": 380 + }, + { + "epoch": 1.01, + "learning_rate": 4.556300268096515e-05, + "loss": 0.4469, + "step": 381 + }, + { + "epoch": 1.01, + "learning_rate": 4.554959785522788e-05, + "loss": 0.3121, + "step": 382 + }, + { + "epoch": 1.01, + "learning_rate": 4.553619302949062e-05, + "loss": 0.3634, + "step": 383 + }, + { + "epoch": 1.02, + "learning_rate": 4.552278820375335e-05, + "loss": 0.2139, + "step": 384 + }, + { + "epoch": 1.02, + "learning_rate": 4.550938337801609e-05, + "loss": 0.5273, + "step": 385 + }, + { + "epoch": 1.02, + "learning_rate": 4.549597855227882e-05, + "loss": 0.1228, + "step": 386 + }, + { + "epoch": 1.02, + "learning_rate": 4.548257372654156e-05, + "loss": 0.2459, + "step": 387 + }, + { + "epoch": 1.03, + "learning_rate": 4.5469168900804294e-05, + "loss": 0.443, + "step": 388 + }, + { + "epoch": 1.03, + "learning_rate": 4.545576407506703e-05, + "loss": 0.364, + "step": 389 + }, + { + "epoch": 1.03, + "learning_rate": 4.5442359249329764e-05, + "loss": 0.4071, + "step": 390 + }, + { + "epoch": 1.03, + "learning_rate": 4.54289544235925e-05, + "loss": 0.3876, + "step": 391 + }, + { + "epoch": 1.04, + "learning_rate": 4.541554959785523e-05, + "loss": 0.5239, + "step": 392 + }, + { + "epoch": 1.04, + "learning_rate": 4.540214477211797e-05, + "loss": 0.4548, + "step": 393 + }, + { + "epoch": 1.04, + "learning_rate": 4.53887399463807e-05, + "loss": 0.3588, + "step": 394 + }, + { + "epoch": 1.04, + "learning_rate": 4.537533512064344e-05, + "loss": 0.2693, + "step": 395 + }, + { + "epoch": 1.05, + "learning_rate": 4.536193029490617e-05, + "loss": 0.3773, + "step": 396 + }, + { + "epoch": 1.05, + "learning_rate": 4.53485254691689e-05, + "loss": 0.3982, + "step": 397 + }, + { + "epoch": 1.05, + "learning_rate": 4.533512064343164e-05, + "loss": 0.2074, + "step": 398 + }, + { + "epoch": 1.06, + "learning_rate": 4.532171581769437e-05, + "loss": 0.3322, + "step": 399 + }, + { + "epoch": 1.06, + "learning_rate": 4.530831099195711e-05, + "loss": 0.118, + "step": 400 + }, + { + "epoch": 1.06, + "learning_rate": 4.5294906166219844e-05, + "loss": 0.6216, + "step": 401 + }, + { + "epoch": 1.06, + "learning_rate": 4.528150134048257e-05, + "loss": 0.4028, + "step": 402 + }, + { + "epoch": 1.07, + "learning_rate": 4.5268096514745314e-05, + "loss": 0.3179, + "step": 403 + }, + { + "epoch": 1.07, + "learning_rate": 4.525469168900804e-05, + "loss": 0.2815, + "step": 404 + }, + { + "epoch": 1.07, + "learning_rate": 4.5241286863270784e-05, + "loss": 0.2366, + "step": 405 + }, + { + "epoch": 1.07, + "learning_rate": 4.522788203753351e-05, + "loss": 0.14, + "step": 406 + }, + { + "epoch": 1.08, + "learning_rate": 4.521447721179625e-05, + "loss": 0.1255, + "step": 407 + }, + { + "epoch": 1.08, + "learning_rate": 4.520107238605898e-05, + "loss": 0.1482, + "step": 408 + }, + { + "epoch": 1.08, + "learning_rate": 4.518766756032172e-05, + "loss": 0.665, + "step": 409 + }, + { + "epoch": 1.08, + "learning_rate": 4.517426273458445e-05, + "loss": 0.2535, + "step": 410 + }, + { + "epoch": 1.09, + "learning_rate": 4.516085790884719e-05, + "loss": 0.1716, + "step": 411 + }, + { + "epoch": 1.09, + "learning_rate": 4.5147453083109916e-05, + "loss": 0.0765, + "step": 412 + }, + { + "epoch": 1.09, + "learning_rate": 4.513404825737266e-05, + "loss": 0.1238, + "step": 413 + }, + { + "epoch": 1.1, + "learning_rate": 4.512064343163539e-05, + "loss": 0.3877, + "step": 414 + }, + { + "epoch": 1.1, + "learning_rate": 4.510723860589813e-05, + "loss": 0.206, + "step": 415 + }, + { + "epoch": 1.1, + "learning_rate": 4.509383378016086e-05, + "loss": 0.1822, + "step": 416 + }, + { + "epoch": 1.1, + "learning_rate": 4.508042895442359e-05, + "loss": 0.2294, + "step": 417 + }, + { + "epoch": 1.11, + "learning_rate": 4.506702412868633e-05, + "loss": 0.7083, + "step": 418 + }, + { + "epoch": 1.11, + "learning_rate": 4.505361930294906e-05, + "loss": 0.325, + "step": 419 + }, + { + "epoch": 1.11, + "learning_rate": 4.50402144772118e-05, + "loss": 0.2378, + "step": 420 + }, + { + "epoch": 1.11, + "learning_rate": 4.502680965147453e-05, + "loss": 0.2494, + "step": 421 + }, + { + "epoch": 1.12, + "learning_rate": 4.501340482573727e-05, + "loss": 0.3812, + "step": 422 + }, + { + "epoch": 1.12, + "learning_rate": 4.5e-05, + "loss": 0.1491, + "step": 423 + }, + { + "epoch": 1.12, + "learning_rate": 4.498659517426274e-05, + "loss": 0.0833, + "step": 424 + }, + { + "epoch": 1.12, + "learning_rate": 4.497319034852547e-05, + "loss": 0.4637, + "step": 425 + }, + { + "epoch": 1.13, + "learning_rate": 4.495978552278821e-05, + "loss": 0.2594, + "step": 426 + }, + { + "epoch": 1.13, + "learning_rate": 4.4946380697050937e-05, + "loss": 0.4136, + "step": 427 + }, + { + "epoch": 1.13, + "learning_rate": 4.493297587131368e-05, + "loss": 0.4962, + "step": 428 + }, + { + "epoch": 1.13, + "learning_rate": 4.491957104557641e-05, + "loss": 0.0842, + "step": 429 + }, + { + "epoch": 1.14, + "learning_rate": 4.490616621983915e-05, + "loss": 0.4761, + "step": 430 + }, + { + "epoch": 1.14, + "learning_rate": 4.489276139410188e-05, + "loss": 0.4351, + "step": 431 + }, + { + "epoch": 1.14, + "learning_rate": 4.487935656836461e-05, + "loss": 0.4226, + "step": 432 + }, + { + "epoch": 1.15, + "learning_rate": 4.486595174262735e-05, + "loss": 0.5714, + "step": 433 + }, + { + "epoch": 1.15, + "learning_rate": 4.485254691689008e-05, + "loss": 0.2034, + "step": 434 + }, + { + "epoch": 1.15, + "learning_rate": 4.483914209115282e-05, + "loss": 0.3354, + "step": 435 + }, + { + "epoch": 1.15, + "learning_rate": 4.482573726541555e-05, + "loss": 0.7503, + "step": 436 + }, + { + "epoch": 1.16, + "learning_rate": 4.481233243967828e-05, + "loss": 0.1542, + "step": 437 + }, + { + "epoch": 1.16, + "learning_rate": 4.479892761394102e-05, + "loss": 0.3067, + "step": 438 + }, + { + "epoch": 1.16, + "learning_rate": 4.478552278820375e-05, + "loss": 0.4134, + "step": 439 + }, + { + "epoch": 1.16, + "learning_rate": 4.477211796246649e-05, + "loss": 0.1458, + "step": 440 + }, + { + "epoch": 1.17, + "learning_rate": 4.475871313672922e-05, + "loss": 0.2814, + "step": 441 + }, + { + "epoch": 1.17, + "learning_rate": 4.474530831099196e-05, + "loss": 0.1751, + "step": 442 + }, + { + "epoch": 1.17, + "learning_rate": 4.473190348525469e-05, + "loss": 0.4144, + "step": 443 + }, + { + "epoch": 1.17, + "learning_rate": 4.471849865951743e-05, + "loss": 0.3275, + "step": 444 + }, + { + "epoch": 1.18, + "learning_rate": 4.470509383378016e-05, + "loss": 0.3083, + "step": 445 + }, + { + "epoch": 1.18, + "learning_rate": 4.46916890080429e-05, + "loss": 0.4185, + "step": 446 + }, + { + "epoch": 1.18, + "learning_rate": 4.467828418230563e-05, + "loss": 0.3181, + "step": 447 + }, + { + "epoch": 1.19, + "learning_rate": 4.466487935656837e-05, + "loss": 0.438, + "step": 448 + }, + { + "epoch": 1.19, + "learning_rate": 4.46514745308311e-05, + "loss": 0.4509, + "step": 449 + }, + { + "epoch": 1.19, + "learning_rate": 4.463806970509384e-05, + "loss": 0.4597, + "step": 450 + }, + { + "epoch": 1.19, + "learning_rate": 4.462466487935657e-05, + "loss": 0.3123, + "step": 451 + }, + { + "epoch": 1.2, + "learning_rate": 4.46112600536193e-05, + "loss": 0.376, + "step": 452 + }, + { + "epoch": 1.2, + "learning_rate": 4.459785522788204e-05, + "loss": 0.2392, + "step": 453 + }, + { + "epoch": 1.2, + "learning_rate": 4.458445040214477e-05, + "loss": 0.4451, + "step": 454 + }, + { + "epoch": 1.2, + "learning_rate": 4.4571045576407513e-05, + "loss": 0.2436, + "step": 455 + }, + { + "epoch": 1.21, + "learning_rate": 4.455764075067024e-05, + "loss": 0.2944, + "step": 456 + }, + { + "epoch": 1.21, + "learning_rate": 4.454423592493298e-05, + "loss": 0.323, + "step": 457 + }, + { + "epoch": 1.21, + "learning_rate": 4.453083109919571e-05, + "loss": 0.1966, + "step": 458 + }, + { + "epoch": 1.21, + "learning_rate": 4.451742627345845e-05, + "loss": 0.273, + "step": 459 + }, + { + "epoch": 1.22, + "learning_rate": 4.450402144772118e-05, + "loss": 0.3498, + "step": 460 + }, + { + "epoch": 1.22, + "learning_rate": 4.449061662198392e-05, + "loss": 0.458, + "step": 461 + }, + { + "epoch": 1.22, + "learning_rate": 4.4477211796246646e-05, + "loss": 0.1607, + "step": 462 + }, + { + "epoch": 1.22, + "learning_rate": 4.446380697050939e-05, + "loss": 0.5712, + "step": 463 + }, + { + "epoch": 1.23, + "learning_rate": 4.4450402144772116e-05, + "loss": 0.0908, + "step": 464 + }, + { + "epoch": 1.23, + "learning_rate": 4.443699731903486e-05, + "loss": 0.2625, + "step": 465 + }, + { + "epoch": 1.23, + "learning_rate": 4.4423592493297586e-05, + "loss": 0.368, + "step": 466 + }, + { + "epoch": 1.24, + "learning_rate": 4.441018766756032e-05, + "loss": 0.5096, + "step": 467 + }, + { + "epoch": 1.24, + "learning_rate": 4.4396782841823057e-05, + "loss": 0.209, + "step": 468 + }, + { + "epoch": 1.24, + "learning_rate": 4.438337801608579e-05, + "loss": 0.5235, + "step": 469 + }, + { + "epoch": 1.24, + "learning_rate": 4.436997319034853e-05, + "loss": 0.5116, + "step": 470 + }, + { + "epoch": 1.25, + "learning_rate": 4.435656836461126e-05, + "loss": 0.49, + "step": 471 + }, + { + "epoch": 1.25, + "learning_rate": 4.4343163538874e-05, + "loss": 0.4409, + "step": 472 + }, + { + "epoch": 1.25, + "learning_rate": 4.432975871313673e-05, + "loss": 0.1324, + "step": 473 + }, + { + "epoch": 1.25, + "learning_rate": 4.431635388739947e-05, + "loss": 0.1257, + "step": 474 + }, + { + "epoch": 1.26, + "learning_rate": 4.43029490616622e-05, + "loss": 0.3844, + "step": 475 + }, + { + "epoch": 1.26, + "learning_rate": 4.428954423592494e-05, + "loss": 0.2942, + "step": 476 + }, + { + "epoch": 1.26, + "learning_rate": 4.4276139410187666e-05, + "loss": 0.316, + "step": 477 + }, + { + "epoch": 1.26, + "learning_rate": 4.426273458445041e-05, + "loss": 0.3724, + "step": 478 + }, + { + "epoch": 1.27, + "learning_rate": 4.4249329758713136e-05, + "loss": 0.4498, + "step": 479 + }, + { + "epoch": 1.27, + "learning_rate": 4.423592493297588e-05, + "loss": 0.2677, + "step": 480 + }, + { + "epoch": 1.27, + "learning_rate": 4.4222520107238607e-05, + "loss": 0.3331, + "step": 481 + }, + { + "epoch": 1.28, + "learning_rate": 4.420911528150134e-05, + "loss": 0.4375, + "step": 482 + }, + { + "epoch": 1.28, + "learning_rate": 4.419571045576408e-05, + "loss": 0.3097, + "step": 483 + }, + { + "epoch": 1.28, + "learning_rate": 4.418230563002681e-05, + "loss": 0.4438, + "step": 484 + }, + { + "epoch": 1.28, + "learning_rate": 4.416890080428955e-05, + "loss": 0.3648, + "step": 485 + }, + { + "epoch": 1.29, + "learning_rate": 4.415549597855228e-05, + "loss": 0.3023, + "step": 486 + }, + { + "epoch": 1.29, + "learning_rate": 4.414209115281502e-05, + "loss": 0.2963, + "step": 487 + }, + { + "epoch": 1.29, + "learning_rate": 4.412868632707775e-05, + "loss": 0.1749, + "step": 488 + }, + { + "epoch": 1.29, + "learning_rate": 4.411528150134048e-05, + "loss": 0.2006, + "step": 489 + }, + { + "epoch": 1.3, + "learning_rate": 4.410187667560322e-05, + "loss": 0.0923, + "step": 490 + }, + { + "epoch": 1.3, + "learning_rate": 4.408847184986595e-05, + "loss": 0.2665, + "step": 491 + }, + { + "epoch": 1.3, + "learning_rate": 4.407506702412869e-05, + "loss": 0.3378, + "step": 492 + }, + { + "epoch": 1.3, + "learning_rate": 4.406166219839142e-05, + "loss": 0.2543, + "step": 493 + }, + { + "epoch": 1.31, + "learning_rate": 4.4048257372654156e-05, + "loss": 0.2521, + "step": 494 + }, + { + "epoch": 1.31, + "learning_rate": 4.403485254691689e-05, + "loss": 0.1616, + "step": 495 + }, + { + "epoch": 1.31, + "learning_rate": 4.402144772117963e-05, + "loss": 0.1445, + "step": 496 + }, + { + "epoch": 1.31, + "learning_rate": 4.400804289544236e-05, + "loss": 0.5352, + "step": 497 + }, + { + "epoch": 1.32, + "learning_rate": 4.39946380697051e-05, + "loss": 0.0654, + "step": 498 + }, + { + "epoch": 1.32, + "learning_rate": 4.398123324396783e-05, + "loss": 0.4354, + "step": 499 + }, + { + "epoch": 1.32, + "learning_rate": 4.396782841823057e-05, + "loss": 0.3893, + "step": 500 + }, + { + "epoch": 1.33, + "learning_rate": 4.39544235924933e-05, + "loss": 0.2779, + "step": 501 + }, + { + "epoch": 1.33, + "learning_rate": 4.394101876675604e-05, + "loss": 0.3702, + "step": 502 + }, + { + "epoch": 1.33, + "learning_rate": 4.392761394101877e-05, + "loss": 0.3899, + "step": 503 + }, + { + "epoch": 1.33, + "learning_rate": 4.39142091152815e-05, + "loss": 0.3091, + "step": 504 + }, + { + "epoch": 1.34, + "learning_rate": 4.390080428954424e-05, + "loss": 0.2143, + "step": 505 + }, + { + "epoch": 1.34, + "learning_rate": 4.388739946380697e-05, + "loss": 0.2551, + "step": 506 + }, + { + "epoch": 1.34, + "learning_rate": 4.387399463806971e-05, + "loss": 0.2227, + "step": 507 + }, + { + "epoch": 1.34, + "learning_rate": 4.386058981233244e-05, + "loss": 0.1383, + "step": 508 + }, + { + "epoch": 1.35, + "learning_rate": 4.384718498659518e-05, + "loss": 0.3463, + "step": 509 + }, + { + "epoch": 1.35, + "learning_rate": 4.383378016085791e-05, + "loss": 0.1104, + "step": 510 + }, + { + "epoch": 1.35, + "learning_rate": 4.382037533512065e-05, + "loss": 0.2439, + "step": 511 + }, + { + "epoch": 1.35, + "learning_rate": 4.380697050938338e-05, + "loss": 0.0725, + "step": 512 + }, + { + "epoch": 1.36, + "learning_rate": 4.379356568364612e-05, + "loss": 0.4928, + "step": 513 + }, + { + "epoch": 1.36, + "learning_rate": 4.3780160857908846e-05, + "loss": 0.0903, + "step": 514 + }, + { + "epoch": 1.36, + "learning_rate": 4.376675603217159e-05, + "loss": 0.1808, + "step": 515 + }, + { + "epoch": 1.37, + "learning_rate": 4.3753351206434316e-05, + "loss": 0.617, + "step": 516 + }, + { + "epoch": 1.37, + "learning_rate": 4.373994638069706e-05, + "loss": 0.0333, + "step": 517 + }, + { + "epoch": 1.37, + "learning_rate": 4.3726541554959786e-05, + "loss": 0.1113, + "step": 518 + }, + { + "epoch": 1.37, + "learning_rate": 4.371313672922252e-05, + "loss": 0.4604, + "step": 519 + }, + { + "epoch": 1.38, + "learning_rate": 4.3699731903485256e-05, + "loss": 0.4267, + "step": 520 + }, + { + "epoch": 1.38, + "learning_rate": 4.368632707774799e-05, + "loss": 0.2621, + "step": 521 + }, + { + "epoch": 1.38, + "learning_rate": 4.3672922252010727e-05, + "loss": 0.3236, + "step": 522 + }, + { + "epoch": 1.38, + "learning_rate": 4.365951742627346e-05, + "loss": 0.0559, + "step": 523 + }, + { + "epoch": 1.39, + "learning_rate": 4.364611260053619e-05, + "loss": 0.0801, + "step": 524 + }, + { + "epoch": 1.39, + "learning_rate": 4.363270777479893e-05, + "loss": 0.0518, + "step": 525 + }, + { + "epoch": 1.39, + "learning_rate": 4.361930294906166e-05, + "loss": 0.0618, + "step": 526 + }, + { + "epoch": 1.39, + "learning_rate": 4.36058981233244e-05, + "loss": 0.3043, + "step": 527 + }, + { + "epoch": 1.4, + "learning_rate": 4.359249329758713e-05, + "loss": 0.4693, + "step": 528 + }, + { + "epoch": 1.4, + "learning_rate": 4.3579088471849866e-05, + "loss": 0.2978, + "step": 529 + }, + { + "epoch": 1.4, + "learning_rate": 4.35656836461126e-05, + "loss": 0.0817, + "step": 530 + }, + { + "epoch": 1.4, + "learning_rate": 4.3552278820375336e-05, + "loss": 0.0784, + "step": 531 + }, + { + "epoch": 1.41, + "learning_rate": 4.353887399463807e-05, + "loss": 0.5389, + "step": 532 + }, + { + "epoch": 1.41, + "learning_rate": 4.3525469168900806e-05, + "loss": 0.2337, + "step": 533 + }, + { + "epoch": 1.41, + "learning_rate": 4.351206434316354e-05, + "loss": 0.3189, + "step": 534 + }, + { + "epoch": 1.42, + "learning_rate": 4.3498659517426276e-05, + "loss": 0.2503, + "step": 535 + }, + { + "epoch": 1.42, + "learning_rate": 4.348525469168901e-05, + "loss": 0.333, + "step": 536 + }, + { + "epoch": 1.42, + "learning_rate": 4.347184986595175e-05, + "loss": 0.2311, + "step": 537 + }, + { + "epoch": 1.42, + "learning_rate": 4.345844504021448e-05, + "loss": 0.5196, + "step": 538 + }, + { + "epoch": 1.43, + "learning_rate": 4.344504021447721e-05, + "loss": 0.5466, + "step": 539 + }, + { + "epoch": 1.43, + "learning_rate": 4.343163538873995e-05, + "loss": 0.4296, + "step": 540 + }, + { + "epoch": 1.43, + "learning_rate": 4.341823056300268e-05, + "loss": 0.6559, + "step": 541 + }, + { + "epoch": 1.43, + "learning_rate": 4.340482573726542e-05, + "loss": 0.6134, + "step": 542 + }, + { + "epoch": 1.44, + "learning_rate": 4.339142091152815e-05, + "loss": 0.5614, + "step": 543 + }, + { + "epoch": 1.44, + "learning_rate": 4.3378016085790886e-05, + "loss": 0.3143, + "step": 544 + }, + { + "epoch": 1.44, + "learning_rate": 4.336461126005362e-05, + "loss": 0.564, + "step": 545 + }, + { + "epoch": 1.44, + "learning_rate": 4.3351206434316356e-05, + "loss": 0.2141, + "step": 546 + }, + { + "epoch": 1.45, + "learning_rate": 4.333780160857909e-05, + "loss": 0.5886, + "step": 547 + }, + { + "epoch": 1.45, + "learning_rate": 4.3324396782841826e-05, + "loss": 0.2129, + "step": 548 + }, + { + "epoch": 1.45, + "learning_rate": 4.3310991957104555e-05, + "loss": 0.2491, + "step": 549 + }, + { + "epoch": 1.46, + "learning_rate": 4.32975871313673e-05, + "loss": 0.4023, + "step": 550 + }, + { + "epoch": 1.46, + "learning_rate": 4.3284182305630025e-05, + "loss": 0.2159, + "step": 551 + }, + { + "epoch": 1.46, + "learning_rate": 4.327077747989277e-05, + "loss": 0.2975, + "step": 552 + }, + { + "epoch": 1.46, + "learning_rate": 4.3257372654155495e-05, + "loss": 0.165, + "step": 553 + }, + { + "epoch": 1.47, + "learning_rate": 4.324396782841823e-05, + "loss": 0.063, + "step": 554 + }, + { + "epoch": 1.47, + "learning_rate": 4.3230563002680966e-05, + "loss": 0.4209, + "step": 555 + }, + { + "epoch": 1.47, + "learning_rate": 4.32171581769437e-05, + "loss": 0.2704, + "step": 556 + }, + { + "epoch": 1.47, + "learning_rate": 4.3203753351206436e-05, + "loss": 0.6455, + "step": 557 + }, + { + "epoch": 1.48, + "learning_rate": 4.319034852546917e-05, + "loss": 0.3354, + "step": 558 + }, + { + "epoch": 1.48, + "learning_rate": 4.3176943699731906e-05, + "loss": 0.4069, + "step": 559 + }, + { + "epoch": 1.48, + "learning_rate": 4.316353887399464e-05, + "loss": 0.2334, + "step": 560 + }, + { + "epoch": 1.48, + "learning_rate": 4.3150134048257376e-05, + "loss": 0.4192, + "step": 561 + }, + { + "epoch": 1.49, + "learning_rate": 4.313672922252011e-05, + "loss": 0.2942, + "step": 562 + }, + { + "epoch": 1.49, + "learning_rate": 4.3123324396782847e-05, + "loss": 0.2503, + "step": 563 + }, + { + "epoch": 1.49, + "learning_rate": 4.3109919571045575e-05, + "loss": 0.15, + "step": 564 + }, + { + "epoch": 1.49, + "learning_rate": 4.309651474530832e-05, + "loss": 0.6895, + "step": 565 + }, + { + "epoch": 1.5, + "learning_rate": 4.3083109919571045e-05, + "loss": 0.2462, + "step": 566 + }, + { + "epoch": 1.5, + "learning_rate": 4.306970509383379e-05, + "loss": 0.142, + "step": 567 + }, + { + "epoch": 1.5, + "learning_rate": 4.3056300268096515e-05, + "loss": 0.4838, + "step": 568 + }, + { + "epoch": 1.51, + "learning_rate": 4.304289544235925e-05, + "loss": 0.4714, + "step": 569 + }, + { + "epoch": 1.51, + "learning_rate": 4.3029490616621986e-05, + "loss": 0.4916, + "step": 570 + }, + { + "epoch": 1.51, + "learning_rate": 4.301608579088472e-05, + "loss": 0.2001, + "step": 571 + }, + { + "epoch": 1.51, + "learning_rate": 4.3002680965147456e-05, + "loss": 0.3897, + "step": 572 + }, + { + "epoch": 1.52, + "learning_rate": 4.298927613941019e-05, + "loss": 0.1723, + "step": 573 + }, + { + "epoch": 1.52, + "learning_rate": 4.297587131367292e-05, + "loss": 0.4095, + "step": 574 + }, + { + "epoch": 1.52, + "learning_rate": 4.296246648793566e-05, + "loss": 0.2615, + "step": 575 + }, + { + "epoch": 1.52, + "learning_rate": 4.294906166219839e-05, + "loss": 0.221, + "step": 576 + }, + { + "epoch": 1.53, + "learning_rate": 4.293565683646113e-05, + "loss": 0.1373, + "step": 577 + }, + { + "epoch": 1.53, + "learning_rate": 4.292225201072386e-05, + "loss": 0.313, + "step": 578 + }, + { + "epoch": 1.53, + "learning_rate": 4.2908847184986595e-05, + "loss": 0.4608, + "step": 579 + }, + { + "epoch": 1.53, + "learning_rate": 4.289544235924933e-05, + "loss": 0.2894, + "step": 580 + }, + { + "epoch": 1.54, + "learning_rate": 4.2882037533512065e-05, + "loss": 0.5123, + "step": 581 + }, + { + "epoch": 1.54, + "learning_rate": 4.28686327077748e-05, + "loss": 0.3287, + "step": 582 + }, + { + "epoch": 1.54, + "learning_rate": 4.2855227882037536e-05, + "loss": 0.4634, + "step": 583 + }, + { + "epoch": 1.54, + "learning_rate": 4.284182305630027e-05, + "loss": 0.0753, + "step": 584 + }, + { + "epoch": 1.55, + "learning_rate": 4.2828418230563006e-05, + "loss": 0.5287, + "step": 585 + }, + { + "epoch": 1.55, + "learning_rate": 4.281501340482574e-05, + "loss": 0.5483, + "step": 586 + }, + { + "epoch": 1.55, + "learning_rate": 4.2801608579088476e-05, + "loss": 0.1076, + "step": 587 + }, + { + "epoch": 1.56, + "learning_rate": 4.278820375335121e-05, + "loss": 0.232, + "step": 588 + }, + { + "epoch": 1.56, + "learning_rate": 4.277479892761394e-05, + "loss": 0.4706, + "step": 589 + }, + { + "epoch": 1.56, + "learning_rate": 4.276139410187668e-05, + "loss": 0.5791, + "step": 590 + }, + { + "epoch": 1.56, + "learning_rate": 4.274798927613941e-05, + "loss": 0.4995, + "step": 591 + }, + { + "epoch": 1.57, + "learning_rate": 4.273458445040215e-05, + "loss": 0.3419, + "step": 592 + }, + { + "epoch": 1.57, + "learning_rate": 4.272117962466488e-05, + "loss": 0.2728, + "step": 593 + }, + { + "epoch": 1.57, + "learning_rate": 4.2707774798927615e-05, + "loss": 0.3598, + "step": 594 + }, + { + "epoch": 1.57, + "learning_rate": 4.269436997319035e-05, + "loss": 0.2669, + "step": 595 + }, + { + "epoch": 1.58, + "learning_rate": 4.2680965147453086e-05, + "loss": 0.4091, + "step": 596 + }, + { + "epoch": 1.58, + "learning_rate": 4.266756032171582e-05, + "loss": 0.7653, + "step": 597 + }, + { + "epoch": 1.58, + "learning_rate": 4.2654155495978556e-05, + "loss": 0.4323, + "step": 598 + }, + { + "epoch": 1.58, + "learning_rate": 4.2640750670241284e-05, + "loss": 0.2147, + "step": 599 + }, + { + "epoch": 1.59, + "learning_rate": 4.2627345844504026e-05, + "loss": 0.2229, + "step": 600 + }, + { + "epoch": 1.59, + "learning_rate": 4.2613941018766754e-05, + "loss": 0.1477, + "step": 601 + }, + { + "epoch": 1.59, + "learning_rate": 4.2600536193029496e-05, + "loss": 0.4813, + "step": 602 + }, + { + "epoch": 1.6, + "learning_rate": 4.2587131367292225e-05, + "loss": 0.2719, + "step": 603 + }, + { + "epoch": 1.6, + "learning_rate": 4.257372654155496e-05, + "loss": 0.1577, + "step": 604 + }, + { + "epoch": 1.6, + "learning_rate": 4.2560321715817695e-05, + "loss": 0.5718, + "step": 605 + }, + { + "epoch": 1.6, + "learning_rate": 4.254691689008043e-05, + "loss": 0.1481, + "step": 606 + }, + { + "epoch": 1.61, + "learning_rate": 4.2533512064343165e-05, + "loss": 0.3548, + "step": 607 + }, + { + "epoch": 1.61, + "learning_rate": 4.25201072386059e-05, + "loss": 0.1907, + "step": 608 + }, + { + "epoch": 1.61, + "learning_rate": 4.250670241286863e-05, + "loss": 0.4215, + "step": 609 + }, + { + "epoch": 1.61, + "learning_rate": 4.249329758713137e-05, + "loss": 0.0681, + "step": 610 + }, + { + "epoch": 1.62, + "learning_rate": 4.24798927613941e-05, + "loss": 0.514, + "step": 611 + }, + { + "epoch": 1.62, + "learning_rate": 4.246648793565684e-05, + "loss": 0.1242, + "step": 612 + }, + { + "epoch": 1.62, + "learning_rate": 4.245308310991957e-05, + "loss": 0.217, + "step": 613 + }, + { + "epoch": 1.62, + "learning_rate": 4.243967828418231e-05, + "loss": 0.1641, + "step": 614 + }, + { + "epoch": 1.63, + "learning_rate": 4.242627345844504e-05, + "loss": 0.7095, + "step": 615 + }, + { + "epoch": 1.63, + "learning_rate": 4.2412868632707775e-05, + "loss": 0.359, + "step": 616 + }, + { + "epoch": 1.63, + "learning_rate": 4.239946380697051e-05, + "loss": 0.94, + "step": 617 + }, + { + "epoch": 1.63, + "learning_rate": 4.2386058981233245e-05, + "loss": 0.5194, + "step": 618 + }, + { + "epoch": 1.64, + "learning_rate": 4.237265415549598e-05, + "loss": 0.3398, + "step": 619 + }, + { + "epoch": 1.64, + "learning_rate": 4.2359249329758715e-05, + "loss": 0.2305, + "step": 620 + }, + { + "epoch": 1.64, + "learning_rate": 4.234584450402145e-05, + "loss": 0.6045, + "step": 621 + }, + { + "epoch": 1.65, + "learning_rate": 4.2332439678284185e-05, + "loss": 0.3196, + "step": 622 + }, + { + "epoch": 1.65, + "learning_rate": 4.231903485254692e-05, + "loss": 0.0753, + "step": 623 + }, + { + "epoch": 1.65, + "learning_rate": 4.2305630026809656e-05, + "loss": 0.2732, + "step": 624 + }, + { + "epoch": 1.65, + "learning_rate": 4.229222520107239e-05, + "loss": 0.4372, + "step": 625 + }, + { + "epoch": 1.66, + "learning_rate": 4.227882037533512e-05, + "loss": 0.1299, + "step": 626 + }, + { + "epoch": 1.66, + "learning_rate": 4.226541554959786e-05, + "loss": 0.2928, + "step": 627 + }, + { + "epoch": 1.66, + "learning_rate": 4.225201072386059e-05, + "loss": 0.2028, + "step": 628 + }, + { + "epoch": 1.66, + "learning_rate": 4.223860589812333e-05, + "loss": 0.2725, + "step": 629 + }, + { + "epoch": 1.67, + "learning_rate": 4.222520107238606e-05, + "loss": 0.0851, + "step": 630 + }, + { + "epoch": 1.67, + "learning_rate": 4.2211796246648795e-05, + "loss": 0.1471, + "step": 631 + }, + { + "epoch": 1.67, + "learning_rate": 4.219839142091153e-05, + "loss": 0.1986, + "step": 632 + }, + { + "epoch": 1.67, + "learning_rate": 4.2184986595174265e-05, + "loss": 0.5156, + "step": 633 + }, + { + "epoch": 1.68, + "learning_rate": 4.2171581769437e-05, + "loss": 0.5286, + "step": 634 + }, + { + "epoch": 1.68, + "learning_rate": 4.2158176943699735e-05, + "loss": 0.3635, + "step": 635 + }, + { + "epoch": 1.68, + "learning_rate": 4.2144772117962464e-05, + "loss": 0.1407, + "step": 636 + }, + { + "epoch": 1.69, + "learning_rate": 4.2131367292225206e-05, + "loss": 0.1042, + "step": 637 + }, + { + "epoch": 1.69, + "learning_rate": 4.2117962466487934e-05, + "loss": 0.1553, + "step": 638 + }, + { + "epoch": 1.69, + "learning_rate": 4.2104557640750676e-05, + "loss": 0.1665, + "step": 639 + }, + { + "epoch": 1.69, + "learning_rate": 4.2091152815013404e-05, + "loss": 0.3706, + "step": 640 + }, + { + "epoch": 1.7, + "learning_rate": 4.207774798927614e-05, + "loss": 0.6195, + "step": 641 + }, + { + "epoch": 1.7, + "learning_rate": 4.2064343163538874e-05, + "loss": 0.1341, + "step": 642 + }, + { + "epoch": 1.7, + "learning_rate": 4.205093833780161e-05, + "loss": 0.5384, + "step": 643 + }, + { + "epoch": 1.7, + "learning_rate": 4.2037533512064345e-05, + "loss": 0.2802, + "step": 644 + }, + { + "epoch": 1.71, + "learning_rate": 4.202412868632708e-05, + "loss": 0.3812, + "step": 645 + }, + { + "epoch": 1.71, + "learning_rate": 4.2010723860589815e-05, + "loss": 0.2433, + "step": 646 + }, + { + "epoch": 1.71, + "learning_rate": 4.199731903485255e-05, + "loss": 0.0642, + "step": 647 + }, + { + "epoch": 1.71, + "learning_rate": 4.1983914209115285e-05, + "loss": 0.0547, + "step": 648 + }, + { + "epoch": 1.72, + "learning_rate": 4.197050938337802e-05, + "loss": 0.4388, + "step": 649 + }, + { + "epoch": 1.72, + "learning_rate": 4.1957104557640756e-05, + "loss": 0.8228, + "step": 650 + }, + { + "epoch": 1.72, + "learning_rate": 4.1943699731903484e-05, + "loss": 0.6453, + "step": 651 + }, + { + "epoch": 1.72, + "learning_rate": 4.1930294906166226e-05, + "loss": 0.3367, + "step": 652 + }, + { + "epoch": 1.73, + "learning_rate": 4.1916890080428954e-05, + "loss": 0.2139, + "step": 653 + }, + { + "epoch": 1.73, + "learning_rate": 4.1903485254691696e-05, + "loss": 0.2144, + "step": 654 + }, + { + "epoch": 1.73, + "learning_rate": 4.1890080428954424e-05, + "loss": 0.3894, + "step": 655 + }, + { + "epoch": 1.74, + "learning_rate": 4.187667560321716e-05, + "loss": 0.0891, + "step": 656 + }, + { + "epoch": 1.74, + "learning_rate": 4.1863270777479895e-05, + "loss": 0.1287, + "step": 657 + }, + { + "epoch": 1.74, + "learning_rate": 4.184986595174263e-05, + "loss": 0.2826, + "step": 658 + }, + { + "epoch": 1.74, + "learning_rate": 4.1836461126005365e-05, + "loss": 0.474, + "step": 659 + }, + { + "epoch": 1.75, + "learning_rate": 4.18230563002681e-05, + "loss": 0.4228, + "step": 660 + }, + { + "epoch": 1.75, + "learning_rate": 4.180965147453083e-05, + "loss": 0.4952, + "step": 661 + }, + { + "epoch": 1.75, + "learning_rate": 4.179624664879357e-05, + "loss": 0.173, + "step": 662 + }, + { + "epoch": 1.75, + "learning_rate": 4.17828418230563e-05, + "loss": 0.363, + "step": 663 + }, + { + "epoch": 1.76, + "learning_rate": 4.176943699731904e-05, + "loss": 0.4404, + "step": 664 + }, + { + "epoch": 1.76, + "learning_rate": 4.175603217158177e-05, + "loss": 0.486, + "step": 665 + }, + { + "epoch": 1.76, + "learning_rate": 4.1742627345844504e-05, + "loss": 0.4463, + "step": 666 + }, + { + "epoch": 1.76, + "learning_rate": 4.172922252010724e-05, + "loss": 0.2409, + "step": 667 + }, + { + "epoch": 1.77, + "learning_rate": 4.1715817694369974e-05, + "loss": 0.5291, + "step": 668 + }, + { + "epoch": 1.77, + "learning_rate": 4.170241286863271e-05, + "loss": 0.069, + "step": 669 + }, + { + "epoch": 1.77, + "learning_rate": 4.1689008042895445e-05, + "loss": 0.4162, + "step": 670 + }, + { + "epoch": 1.78, + "learning_rate": 4.167560321715818e-05, + "loss": 0.6171, + "step": 671 + }, + { + "epoch": 1.78, + "learning_rate": 4.1662198391420915e-05, + "loss": 0.3097, + "step": 672 + }, + { + "epoch": 1.78, + "learning_rate": 4.164879356568365e-05, + "loss": 0.5109, + "step": 673 + }, + { + "epoch": 1.78, + "learning_rate": 4.1635388739946385e-05, + "loss": 0.2169, + "step": 674 + }, + { + "epoch": 1.79, + "learning_rate": 4.162198391420912e-05, + "loss": 0.2406, + "step": 675 + }, + { + "epoch": 1.79, + "learning_rate": 4.160857908847185e-05, + "loss": 0.1853, + "step": 676 + }, + { + "epoch": 1.79, + "learning_rate": 4.159517426273459e-05, + "loss": 0.5743, + "step": 677 + }, + { + "epoch": 1.79, + "learning_rate": 4.158176943699732e-05, + "loss": 0.5432, + "step": 678 + }, + { + "epoch": 1.8, + "learning_rate": 4.156836461126006e-05, + "loss": 0.2033, + "step": 679 + }, + { + "epoch": 1.8, + "learning_rate": 4.155495978552279e-05, + "loss": 0.3848, + "step": 680 + }, + { + "epoch": 1.8, + "learning_rate": 4.1541554959785524e-05, + "loss": 0.1721, + "step": 681 + }, + { + "epoch": 1.8, + "learning_rate": 4.152815013404826e-05, + "loss": 0.3793, + "step": 682 + }, + { + "epoch": 1.81, + "learning_rate": 4.1514745308310994e-05, + "loss": 0.3848, + "step": 683 + }, + { + "epoch": 1.81, + "learning_rate": 4.150134048257373e-05, + "loss": 0.186, + "step": 684 + }, + { + "epoch": 1.81, + "learning_rate": 4.1487935656836465e-05, + "loss": 0.2692, + "step": 685 + }, + { + "epoch": 1.81, + "learning_rate": 4.147453083109919e-05, + "loss": 0.3839, + "step": 686 + }, + { + "epoch": 1.82, + "learning_rate": 4.1461126005361935e-05, + "loss": 0.3037, + "step": 687 + }, + { + "epoch": 1.82, + "learning_rate": 4.144772117962466e-05, + "loss": 0.6401, + "step": 688 + }, + { + "epoch": 1.82, + "learning_rate": 4.1434316353887405e-05, + "loss": 0.1173, + "step": 689 + }, + { + "epoch": 1.83, + "learning_rate": 4.1420911528150134e-05, + "loss": 0.3217, + "step": 690 + }, + { + "epoch": 1.83, + "learning_rate": 4.140750670241287e-05, + "loss": 0.2358, + "step": 691 + }, + { + "epoch": 1.83, + "learning_rate": 4.1394101876675604e-05, + "loss": 0.7696, + "step": 692 + }, + { + "epoch": 1.83, + "learning_rate": 4.138069705093834e-05, + "loss": 0.2288, + "step": 693 + }, + { + "epoch": 1.84, + "learning_rate": 4.1367292225201074e-05, + "loss": 0.2575, + "step": 694 + }, + { + "epoch": 1.84, + "learning_rate": 4.135388739946381e-05, + "loss": 0.1201, + "step": 695 + }, + { + "epoch": 1.84, + "learning_rate": 4.1340482573726544e-05, + "loss": 0.2034, + "step": 696 + }, + { + "epoch": 1.84, + "learning_rate": 4.132707774798928e-05, + "loss": 0.1142, + "step": 697 + }, + { + "epoch": 1.85, + "learning_rate": 4.1313672922252015e-05, + "loss": 0.5671, + "step": 698 + }, + { + "epoch": 1.85, + "learning_rate": 4.130026809651475e-05, + "loss": 0.3132, + "step": 699 + }, + { + "epoch": 1.85, + "learning_rate": 4.1286863270777485e-05, + "loss": 0.4266, + "step": 700 + }, + { + "epoch": 1.85, + "learning_rate": 4.127345844504021e-05, + "loss": 0.1354, + "step": 701 + }, + { + "epoch": 1.86, + "learning_rate": 4.1260053619302955e-05, + "loss": 0.2867, + "step": 702 + }, + { + "epoch": 1.86, + "learning_rate": 4.1246648793565684e-05, + "loss": 0.1839, + "step": 703 + }, + { + "epoch": 1.86, + "learning_rate": 4.1233243967828425e-05, + "loss": 0.4741, + "step": 704 + }, + { + "epoch": 1.87, + "learning_rate": 4.1219839142091154e-05, + "loss": 0.2909, + "step": 705 + }, + { + "epoch": 1.87, + "learning_rate": 4.120643431635389e-05, + "loss": 0.2705, + "step": 706 + }, + { + "epoch": 1.87, + "learning_rate": 4.1193029490616624e-05, + "loss": 0.1354, + "step": 707 + }, + { + "epoch": 1.87, + "learning_rate": 4.117962466487936e-05, + "loss": 0.4801, + "step": 708 + }, + { + "epoch": 1.88, + "learning_rate": 4.1166219839142094e-05, + "loss": 0.189, + "step": 709 + }, + { + "epoch": 1.88, + "learning_rate": 4.115281501340483e-05, + "loss": 0.3204, + "step": 710 + }, + { + "epoch": 1.88, + "learning_rate": 4.113941018766756e-05, + "loss": 0.4358, + "step": 711 + }, + { + "epoch": 1.88, + "learning_rate": 4.11260053619303e-05, + "loss": 0.9474, + "step": 712 + }, + { + "epoch": 1.89, + "learning_rate": 4.111260053619303e-05, + "loss": 0.2102, + "step": 713 + }, + { + "epoch": 1.89, + "learning_rate": 4.109919571045577e-05, + "loss": 0.3927, + "step": 714 + }, + { + "epoch": 1.89, + "learning_rate": 4.10857908847185e-05, + "loss": 0.139, + "step": 715 + }, + { + "epoch": 1.89, + "learning_rate": 4.1072386058981233e-05, + "loss": 0.3575, + "step": 716 + }, + { + "epoch": 1.9, + "learning_rate": 4.105898123324397e-05, + "loss": 0.7534, + "step": 717 + }, + { + "epoch": 1.9, + "learning_rate": 4.1045576407506704e-05, + "loss": 0.1134, + "step": 718 + }, + { + "epoch": 1.9, + "learning_rate": 4.103217158176944e-05, + "loss": 0.2136, + "step": 719 + }, + { + "epoch": 1.9, + "learning_rate": 4.1018766756032174e-05, + "loss": 0.4344, + "step": 720 + }, + { + "epoch": 1.91, + "learning_rate": 4.10053619302949e-05, + "loss": 0.0695, + "step": 721 + }, + { + "epoch": 1.91, + "learning_rate": 4.0991957104557644e-05, + "loss": 0.2286, + "step": 722 + }, + { + "epoch": 1.91, + "learning_rate": 4.097855227882037e-05, + "loss": 0.1189, + "step": 723 + }, + { + "epoch": 1.92, + "learning_rate": 4.0965147453083115e-05, + "loss": 0.2882, + "step": 724 + }, + { + "epoch": 1.92, + "learning_rate": 4.095174262734584e-05, + "loss": 0.2623, + "step": 725 + }, + { + "epoch": 1.92, + "learning_rate": 4.093833780160858e-05, + "loss": 0.2473, + "step": 726 + }, + { + "epoch": 1.92, + "learning_rate": 4.092493297587131e-05, + "loss": 0.4846, + "step": 727 + }, + { + "epoch": 1.93, + "learning_rate": 4.091152815013405e-05, + "loss": 0.1689, + "step": 728 + }, + { + "epoch": 1.93, + "learning_rate": 4.0898123324396783e-05, + "loss": 0.3481, + "step": 729 + }, + { + "epoch": 1.93, + "learning_rate": 4.088471849865952e-05, + "loss": 0.3447, + "step": 730 + }, + { + "epoch": 1.93, + "learning_rate": 4.0871313672922254e-05, + "loss": 0.2959, + "step": 731 + }, + { + "epoch": 1.94, + "learning_rate": 4.085790884718499e-05, + "loss": 0.3387, + "step": 732 + }, + { + "epoch": 1.94, + "learning_rate": 4.0844504021447724e-05, + "loss": 0.3742, + "step": 733 + }, + { + "epoch": 1.94, + "learning_rate": 4.083109919571046e-05, + "loss": 0.3245, + "step": 734 + }, + { + "epoch": 1.94, + "learning_rate": 4.0817694369973194e-05, + "loss": 0.4891, + "step": 735 + }, + { + "epoch": 1.95, + "learning_rate": 4.080428954423593e-05, + "loss": 0.1444, + "step": 736 + }, + { + "epoch": 1.95, + "learning_rate": 4.0790884718498664e-05, + "loss": 0.3678, + "step": 737 + }, + { + "epoch": 1.95, + "learning_rate": 4.077747989276139e-05, + "loss": 0.3772, + "step": 738 + }, + { + "epoch": 1.96, + "learning_rate": 4.0764075067024135e-05, + "loss": 0.43, + "step": 739 + }, + { + "epoch": 1.96, + "learning_rate": 4.075067024128686e-05, + "loss": 0.2463, + "step": 740 + }, + { + "epoch": 1.96, + "learning_rate": 4.0737265415549605e-05, + "loss": 0.2277, + "step": 741 + }, + { + "epoch": 1.96, + "learning_rate": 4.072386058981233e-05, + "loss": 0.2153, + "step": 742 + }, + { + "epoch": 1.97, + "learning_rate": 4.071045576407507e-05, + "loss": 0.1052, + "step": 743 + }, + { + "epoch": 1.97, + "learning_rate": 4.0697050938337804e-05, + "loss": 0.5657, + "step": 744 + }, + { + "epoch": 1.97, + "learning_rate": 4.068364611260054e-05, + "loss": 0.2664, + "step": 745 + }, + { + "epoch": 1.97, + "learning_rate": 4.0670241286863274e-05, + "loss": 0.1369, + "step": 746 + }, + { + "epoch": 1.98, + "learning_rate": 4.065683646112601e-05, + "loss": 0.4972, + "step": 747 + }, + { + "epoch": 1.98, + "learning_rate": 4.064343163538874e-05, + "loss": 0.4131, + "step": 748 + }, + { + "epoch": 1.98, + "learning_rate": 4.063002680965148e-05, + "loss": 0.3824, + "step": 749 + }, + { + "epoch": 1.98, + "learning_rate": 4.061662198391421e-05, + "loss": 0.323, + "step": 750 + }, + { + "epoch": 1.99, + "learning_rate": 4.060321715817695e-05, + "loss": 0.3698, + "step": 751 + }, + { + "epoch": 1.99, + "learning_rate": 4.058981233243968e-05, + "loss": 0.1708, + "step": 752 + }, + { + "epoch": 1.99, + "learning_rate": 4.057640750670241e-05, + "loss": 0.2941, + "step": 753 + }, + { + "epoch": 1.99, + "learning_rate": 4.056300268096515e-05, + "loss": 0.3224, + "step": 754 + }, + { + "epoch": 2.0, + "learning_rate": 4.054959785522788e-05, + "loss": 0.0851, + "step": 755 + }, + { + "epoch": 2.0, + "learning_rate": 4.053619302949062e-05, + "loss": 0.4694, + "step": 756 + }, + { + "epoch": 2.0, + "eval_f1": 0.7882736156351792, + "eval_loss": 0.4484867751598358, + "eval_runtime": 1.8734, + "eval_samples_per_second": 807.638, + "eval_steps_per_second": 50.711, + "step": 756 + }, + { + "epoch": 2.0, + "learning_rate": 4.0522788203753354e-05, + "loss": 0.4739, + "step": 757 + }, + { + "epoch": 2.01, + "learning_rate": 4.050938337801609e-05, + "loss": 0.4117, + "step": 758 + }, + { + "epoch": 2.01, + "learning_rate": 4.0495978552278824e-05, + "loss": 0.2169, + "step": 759 + }, + { + "epoch": 2.01, + "learning_rate": 4.048257372654156e-05, + "loss": 0.1848, + "step": 760 + }, + { + "epoch": 2.01, + "learning_rate": 4.0469168900804294e-05, + "loss": 0.5066, + "step": 761 + }, + { + "epoch": 2.02, + "learning_rate": 4.045576407506703e-05, + "loss": 0.1784, + "step": 762 + }, + { + "epoch": 2.02, + "learning_rate": 4.044235924932976e-05, + "loss": 0.3869, + "step": 763 + }, + { + "epoch": 2.02, + "learning_rate": 4.04289544235925e-05, + "loss": 0.1132, + "step": 764 + }, + { + "epoch": 2.02, + "learning_rate": 4.041554959785523e-05, + "loss": 0.2724, + "step": 765 + }, + { + "epoch": 2.03, + "learning_rate": 4.040214477211797e-05, + "loss": 0.0983, + "step": 766 + }, + { + "epoch": 2.03, + "learning_rate": 4.03887399463807e-05, + "loss": 0.1831, + "step": 767 + }, + { + "epoch": 2.03, + "learning_rate": 4.037533512064343e-05, + "loss": 0.1954, + "step": 768 + }, + { + "epoch": 2.03, + "learning_rate": 4.036193029490617e-05, + "loss": 0.4738, + "step": 769 + }, + { + "epoch": 2.04, + "learning_rate": 4.0348525469168903e-05, + "loss": 0.3375, + "step": 770 + }, + { + "epoch": 2.04, + "learning_rate": 4.033512064343164e-05, + "loss": 0.3991, + "step": 771 + }, + { + "epoch": 2.04, + "learning_rate": 4.0321715817694374e-05, + "loss": 0.0696, + "step": 772 + }, + { + "epoch": 2.04, + "learning_rate": 4.03083109919571e-05, + "loss": 0.0982, + "step": 773 + }, + { + "epoch": 2.05, + "learning_rate": 4.0294906166219844e-05, + "loss": 0.2236, + "step": 774 + }, + { + "epoch": 2.05, + "learning_rate": 4.028150134048257e-05, + "loss": 0.4225, + "step": 775 + }, + { + "epoch": 2.05, + "learning_rate": 4.0268096514745314e-05, + "loss": 0.0583, + "step": 776 + }, + { + "epoch": 2.06, + "learning_rate": 4.025469168900804e-05, + "loss": 0.1643, + "step": 777 + }, + { + "epoch": 2.06, + "learning_rate": 4.024128686327078e-05, + "loss": 0.0351, + "step": 778 + }, + { + "epoch": 2.06, + "learning_rate": 4.022788203753351e-05, + "loss": 0.4496, + "step": 779 + }, + { + "epoch": 2.06, + "learning_rate": 4.021447721179625e-05, + "loss": 0.0372, + "step": 780 + }, + { + "epoch": 2.07, + "learning_rate": 4.020107238605898e-05, + "loss": 0.4198, + "step": 781 + }, + { + "epoch": 2.07, + "learning_rate": 4.018766756032172e-05, + "loss": 0.1968, + "step": 782 + }, + { + "epoch": 2.07, + "learning_rate": 4.017426273458445e-05, + "loss": 0.515, + "step": 783 + }, + { + "epoch": 2.07, + "learning_rate": 4.016085790884719e-05, + "loss": 0.408, + "step": 784 + }, + { + "epoch": 2.08, + "learning_rate": 4.0147453083109924e-05, + "loss": 0.3693, + "step": 785 + }, + { + "epoch": 2.08, + "learning_rate": 4.013404825737266e-05, + "loss": 0.0561, + "step": 786 + }, + { + "epoch": 2.08, + "learning_rate": 4.0120643431635394e-05, + "loss": 0.4011, + "step": 787 + }, + { + "epoch": 2.08, + "learning_rate": 4.010723860589812e-05, + "loss": 0.0872, + "step": 788 + }, + { + "epoch": 2.09, + "learning_rate": 4.0093833780160864e-05, + "loss": 0.0768, + "step": 789 + }, + { + "epoch": 2.09, + "learning_rate": 4.008042895442359e-05, + "loss": 0.0184, + "step": 790 + }, + { + "epoch": 2.09, + "learning_rate": 4.0067024128686334e-05, + "loss": 0.3287, + "step": 791 + }, + { + "epoch": 2.1, + "learning_rate": 4.005361930294906e-05, + "loss": 0.0262, + "step": 792 + }, + { + "epoch": 2.1, + "learning_rate": 4.00402144772118e-05, + "loss": 0.0248, + "step": 793 + }, + { + "epoch": 2.1, + "learning_rate": 4.002680965147453e-05, + "loss": 0.3853, + "step": 794 + }, + { + "epoch": 2.1, + "learning_rate": 4.001340482573727e-05, + "loss": 0.3512, + "step": 795 + }, + { + "epoch": 2.11, + "learning_rate": 4e-05, + "loss": 0.4188, + "step": 796 + }, + { + "epoch": 2.11, + "learning_rate": 3.998659517426274e-05, + "loss": 0.1834, + "step": 797 + }, + { + "epoch": 2.11, + "learning_rate": 3.997319034852547e-05, + "loss": 0.2074, + "step": 798 + }, + { + "epoch": 2.11, + "learning_rate": 3.995978552278821e-05, + "loss": 0.7317, + "step": 799 + }, + { + "epoch": 2.12, + "learning_rate": 3.994638069705094e-05, + "loss": 0.3534, + "step": 800 + }, + { + "epoch": 2.12, + "learning_rate": 3.993297587131368e-05, + "loss": 0.3184, + "step": 801 + }, + { + "epoch": 2.12, + "learning_rate": 3.991957104557641e-05, + "loss": 0.1088, + "step": 802 + }, + { + "epoch": 2.12, + "learning_rate": 3.990616621983914e-05, + "loss": 0.0429, + "step": 803 + }, + { + "epoch": 2.13, + "learning_rate": 3.989276139410188e-05, + "loss": 0.4518, + "step": 804 + }, + { + "epoch": 2.13, + "learning_rate": 3.987935656836461e-05, + "loss": 0.1746, + "step": 805 + }, + { + "epoch": 2.13, + "learning_rate": 3.986595174262735e-05, + "loss": 0.1881, + "step": 806 + }, + { + "epoch": 2.13, + "learning_rate": 3.985254691689008e-05, + "loss": 0.4111, + "step": 807 + }, + { + "epoch": 2.14, + "learning_rate": 3.983914209115281e-05, + "loss": 0.059, + "step": 808 + }, + { + "epoch": 2.14, + "learning_rate": 3.982573726541555e-05, + "loss": 0.0495, + "step": 809 + }, + { + "epoch": 2.14, + "learning_rate": 3.981233243967828e-05, + "loss": 0.1134, + "step": 810 + }, + { + "epoch": 2.15, + "learning_rate": 3.9798927613941023e-05, + "loss": 0.5469, + "step": 811 + }, + { + "epoch": 2.15, + "learning_rate": 3.978552278820375e-05, + "loss": 0.0581, + "step": 812 + }, + { + "epoch": 2.15, + "learning_rate": 3.977211796246649e-05, + "loss": 0.1254, + "step": 813 + }, + { + "epoch": 2.15, + "learning_rate": 3.975871313672922e-05, + "loss": 0.0679, + "step": 814 + }, + { + "epoch": 2.16, + "learning_rate": 3.974530831099196e-05, + "loss": 0.0463, + "step": 815 + }, + { + "epoch": 2.16, + "learning_rate": 3.973190348525469e-05, + "loss": 0.33, + "step": 816 + }, + { + "epoch": 2.16, + "learning_rate": 3.971849865951743e-05, + "loss": 0.2931, + "step": 817 + }, + { + "epoch": 2.16, + "learning_rate": 3.970509383378016e-05, + "loss": 0.1034, + "step": 818 + }, + { + "epoch": 2.17, + "learning_rate": 3.96916890080429e-05, + "loss": 0.0379, + "step": 819 + }, + { + "epoch": 2.17, + "learning_rate": 3.967828418230563e-05, + "loss": 0.0456, + "step": 820 + }, + { + "epoch": 2.17, + "learning_rate": 3.966487935656837e-05, + "loss": 0.4862, + "step": 821 + }, + { + "epoch": 2.17, + "learning_rate": 3.96514745308311e-05, + "loss": 0.0512, + "step": 822 + }, + { + "epoch": 2.18, + "learning_rate": 3.963806970509383e-05, + "loss": 0.0879, + "step": 823 + }, + { + "epoch": 2.18, + "learning_rate": 3.962466487935657e-05, + "loss": 0.3664, + "step": 824 + }, + { + "epoch": 2.18, + "learning_rate": 3.96112600536193e-05, + "loss": 0.0975, + "step": 825 + }, + { + "epoch": 2.19, + "learning_rate": 3.9597855227882044e-05, + "loss": 0.162, + "step": 826 + }, + { + "epoch": 2.19, + "learning_rate": 3.958445040214477e-05, + "loss": 0.1076, + "step": 827 + }, + { + "epoch": 2.19, + "learning_rate": 3.957104557640751e-05, + "loss": 0.1185, + "step": 828 + }, + { + "epoch": 2.19, + "learning_rate": 3.955764075067024e-05, + "loss": 0.0868, + "step": 829 + }, + { + "epoch": 2.2, + "learning_rate": 3.954423592493298e-05, + "loss": 0.0246, + "step": 830 + }, + { + "epoch": 2.2, + "learning_rate": 3.953083109919571e-05, + "loss": 0.2233, + "step": 831 + }, + { + "epoch": 2.2, + "learning_rate": 3.951742627345845e-05, + "loss": 0.2113, + "step": 832 + }, + { + "epoch": 2.2, + "learning_rate": 3.9504021447721176e-05, + "loss": 0.0587, + "step": 833 + }, + { + "epoch": 2.21, + "learning_rate": 3.949061662198392e-05, + "loss": 0.0521, + "step": 834 + }, + { + "epoch": 2.21, + "learning_rate": 3.9477211796246646e-05, + "loss": 0.3845, + "step": 835 + }, + { + "epoch": 2.21, + "learning_rate": 3.946380697050939e-05, + "loss": 0.1096, + "step": 836 + }, + { + "epoch": 2.21, + "learning_rate": 3.9450402144772117e-05, + "loss": 0.1488, + "step": 837 + }, + { + "epoch": 2.22, + "learning_rate": 3.943699731903485e-05, + "loss": 0.1937, + "step": 838 + }, + { + "epoch": 2.22, + "learning_rate": 3.942359249329759e-05, + "loss": 0.1309, + "step": 839 + }, + { + "epoch": 2.22, + "learning_rate": 3.941018766756032e-05, + "loss": 0.3271, + "step": 840 + }, + { + "epoch": 2.22, + "learning_rate": 3.939678284182306e-05, + "loss": 0.3318, + "step": 841 + }, + { + "epoch": 2.23, + "learning_rate": 3.938337801608579e-05, + "loss": 0.3516, + "step": 842 + }, + { + "epoch": 2.23, + "learning_rate": 3.936997319034853e-05, + "loss": 0.1641, + "step": 843 + }, + { + "epoch": 2.23, + "learning_rate": 3.935656836461126e-05, + "loss": 0.064, + "step": 844 + }, + { + "epoch": 2.24, + "learning_rate": 3.9343163538874e-05, + "loss": 0.1971, + "step": 845 + }, + { + "epoch": 2.24, + "learning_rate": 3.932975871313673e-05, + "loss": 0.1166, + "step": 846 + }, + { + "epoch": 2.24, + "learning_rate": 3.931635388739947e-05, + "loss": 0.0384, + "step": 847 + }, + { + "epoch": 2.24, + "learning_rate": 3.9302949061662196e-05, + "loss": 0.0462, + "step": 848 + }, + { + "epoch": 2.25, + "learning_rate": 3.928954423592494e-05, + "loss": 0.1073, + "step": 849 + }, + { + "epoch": 2.25, + "learning_rate": 3.9276139410187666e-05, + "loss": 0.0227, + "step": 850 + }, + { + "epoch": 2.25, + "learning_rate": 3.926273458445041e-05, + "loss": 0.0683, + "step": 851 + }, + { + "epoch": 2.25, + "learning_rate": 3.924932975871314e-05, + "loss": 0.7962, + "step": 852 + }, + { + "epoch": 2.26, + "learning_rate": 3.923592493297587e-05, + "loss": 0.0635, + "step": 853 + }, + { + "epoch": 2.26, + "learning_rate": 3.922252010723861e-05, + "loss": 0.0454, + "step": 854 + }, + { + "epoch": 2.26, + "learning_rate": 3.920911528150134e-05, + "loss": 0.5389, + "step": 855 + }, + { + "epoch": 2.26, + "learning_rate": 3.919571045576408e-05, + "loss": 0.1181, + "step": 856 + }, + { + "epoch": 2.27, + "learning_rate": 3.918230563002681e-05, + "loss": 0.1372, + "step": 857 + }, + { + "epoch": 2.27, + "learning_rate": 3.916890080428954e-05, + "loss": 0.3192, + "step": 858 + }, + { + "epoch": 2.27, + "learning_rate": 3.915549597855228e-05, + "loss": 0.3419, + "step": 859 + }, + { + "epoch": 2.28, + "learning_rate": 3.914209115281501e-05, + "loss": 0.0114, + "step": 860 + }, + { + "epoch": 2.28, + "learning_rate": 3.912868632707775e-05, + "loss": 0.1905, + "step": 861 + }, + { + "epoch": 2.28, + "learning_rate": 3.911528150134048e-05, + "loss": 0.0218, + "step": 862 + }, + { + "epoch": 2.28, + "learning_rate": 3.910187667560322e-05, + "loss": 0.4374, + "step": 863 + }, + { + "epoch": 2.29, + "learning_rate": 3.908847184986595e-05, + "loss": 0.1844, + "step": 864 + }, + { + "epoch": 2.29, + "learning_rate": 3.907506702412869e-05, + "loss": 0.2427, + "step": 865 + }, + { + "epoch": 2.29, + "learning_rate": 3.906166219839142e-05, + "loss": 0.2749, + "step": 866 + }, + { + "epoch": 2.29, + "learning_rate": 3.904825737265416e-05, + "loss": 0.2089, + "step": 867 + }, + { + "epoch": 2.3, + "learning_rate": 3.903485254691689e-05, + "loss": 0.06, + "step": 868 + }, + { + "epoch": 2.3, + "learning_rate": 3.902144772117963e-05, + "loss": 0.1951, + "step": 869 + }, + { + "epoch": 2.3, + "learning_rate": 3.900804289544236e-05, + "loss": 0.0252, + "step": 870 + }, + { + "epoch": 2.3, + "learning_rate": 3.89946380697051e-05, + "loss": 0.0299, + "step": 871 + }, + { + "epoch": 2.31, + "learning_rate": 3.898123324396783e-05, + "loss": 0.0298, + "step": 872 + }, + { + "epoch": 2.31, + "learning_rate": 3.896782841823057e-05, + "loss": 0.5186, + "step": 873 + }, + { + "epoch": 2.31, + "learning_rate": 3.89544235924933e-05, + "loss": 0.2704, + "step": 874 + }, + { + "epoch": 2.31, + "learning_rate": 3.894101876675603e-05, + "loss": 0.2435, + "step": 875 + }, + { + "epoch": 2.32, + "learning_rate": 3.892761394101877e-05, + "loss": 0.039, + "step": 876 + }, + { + "epoch": 2.32, + "learning_rate": 3.89142091152815e-05, + "loss": 0.0275, + "step": 877 + }, + { + "epoch": 2.32, + "learning_rate": 3.890080428954424e-05, + "loss": 0.1164, + "step": 878 + }, + { + "epoch": 2.33, + "learning_rate": 3.888739946380697e-05, + "loss": 0.1551, + "step": 879 + }, + { + "epoch": 2.33, + "learning_rate": 3.887399463806971e-05, + "loss": 0.0215, + "step": 880 + }, + { + "epoch": 2.33, + "learning_rate": 3.886058981233244e-05, + "loss": 0.0379, + "step": 881 + }, + { + "epoch": 2.33, + "learning_rate": 3.884718498659518e-05, + "loss": 0.0553, + "step": 882 + }, + { + "epoch": 2.34, + "learning_rate": 3.883378016085791e-05, + "loss": 0.1073, + "step": 883 + }, + { + "epoch": 2.34, + "learning_rate": 3.882037533512065e-05, + "loss": 0.3525, + "step": 884 + }, + { + "epoch": 2.34, + "learning_rate": 3.8806970509383376e-05, + "loss": 0.2646, + "step": 885 + }, + { + "epoch": 2.34, + "learning_rate": 3.879356568364612e-05, + "loss": 0.5758, + "step": 886 + }, + { + "epoch": 2.35, + "learning_rate": 3.8780160857908846e-05, + "loss": 0.9312, + "step": 887 + }, + { + "epoch": 2.35, + "learning_rate": 3.876675603217159e-05, + "loss": 0.2748, + "step": 888 + }, + { + "epoch": 2.35, + "learning_rate": 3.8753351206434316e-05, + "loss": 0.33, + "step": 889 + }, + { + "epoch": 2.35, + "learning_rate": 3.873994638069705e-05, + "loss": 0.0312, + "step": 890 + }, + { + "epoch": 2.36, + "learning_rate": 3.8726541554959786e-05, + "loss": 0.0449, + "step": 891 + }, + { + "epoch": 2.36, + "learning_rate": 3.871313672922252e-05, + "loss": 0.1197, + "step": 892 + }, + { + "epoch": 2.36, + "learning_rate": 3.869973190348526e-05, + "loss": 0.0913, + "step": 893 + }, + { + "epoch": 2.37, + "learning_rate": 3.868632707774799e-05, + "loss": 0.0284, + "step": 894 + }, + { + "epoch": 2.37, + "learning_rate": 3.867292225201073e-05, + "loss": 0.3769, + "step": 895 + }, + { + "epoch": 2.37, + "learning_rate": 3.865951742627346e-05, + "loss": 0.0947, + "step": 896 + }, + { + "epoch": 2.37, + "learning_rate": 3.86461126005362e-05, + "loss": 0.4282, + "step": 897 + }, + { + "epoch": 2.38, + "learning_rate": 3.863270777479893e-05, + "loss": 0.0049, + "step": 898 + }, + { + "epoch": 2.38, + "learning_rate": 3.861930294906167e-05, + "loss": 0.3632, + "step": 899 + }, + { + "epoch": 2.38, + "learning_rate": 3.8605898123324396e-05, + "loss": 0.0421, + "step": 900 + }, + { + "epoch": 2.38, + "learning_rate": 3.859249329758714e-05, + "loss": 0.5793, + "step": 901 + }, + { + "epoch": 2.39, + "learning_rate": 3.8579088471849866e-05, + "loss": 0.1695, + "step": 902 + }, + { + "epoch": 2.39, + "learning_rate": 3.856568364611261e-05, + "loss": 0.3082, + "step": 903 + }, + { + "epoch": 2.39, + "learning_rate": 3.8552278820375336e-05, + "loss": 0.0151, + "step": 904 + }, + { + "epoch": 2.39, + "learning_rate": 3.853887399463807e-05, + "loss": 0.3463, + "step": 905 + }, + { + "epoch": 2.4, + "learning_rate": 3.852546916890081e-05, + "loss": 0.4573, + "step": 906 + }, + { + "epoch": 2.4, + "learning_rate": 3.851206434316354e-05, + "loss": 0.1281, + "step": 907 + }, + { + "epoch": 2.4, + "learning_rate": 3.849865951742628e-05, + "loss": 0.3168, + "step": 908 + }, + { + "epoch": 2.4, + "learning_rate": 3.848525469168901e-05, + "loss": 0.0331, + "step": 909 + }, + { + "epoch": 2.41, + "learning_rate": 3.847184986595174e-05, + "loss": 0.1825, + "step": 910 + }, + { + "epoch": 2.41, + "learning_rate": 3.845844504021448e-05, + "loss": 0.6238, + "step": 911 + }, + { + "epoch": 2.41, + "learning_rate": 3.844504021447721e-05, + "loss": 0.0663, + "step": 912 + }, + { + "epoch": 2.42, + "learning_rate": 3.843163538873995e-05, + "loss": 0.2827, + "step": 913 + }, + { + "epoch": 2.42, + "learning_rate": 3.841823056300268e-05, + "loss": 0.0133, + "step": 914 + }, + { + "epoch": 2.42, + "learning_rate": 3.8404825737265416e-05, + "loss": 0.0415, + "step": 915 + }, + { + "epoch": 2.42, + "learning_rate": 3.839142091152815e-05, + "loss": 0.2969, + "step": 916 + }, + { + "epoch": 2.43, + "learning_rate": 3.8378016085790886e-05, + "loss": 0.0145, + "step": 917 + }, + { + "epoch": 2.43, + "learning_rate": 3.836461126005362e-05, + "loss": 0.0223, + "step": 918 + }, + { + "epoch": 2.43, + "learning_rate": 3.8351206434316357e-05, + "loss": 1.042, + "step": 919 + }, + { + "epoch": 2.43, + "learning_rate": 3.8337801608579085e-05, + "loss": 0.5061, + "step": 920 + }, + { + "epoch": 2.44, + "learning_rate": 3.832439678284183e-05, + "loss": 0.406, + "step": 921 + }, + { + "epoch": 2.44, + "learning_rate": 3.8310991957104555e-05, + "loss": 0.1337, + "step": 922 + }, + { + "epoch": 2.44, + "learning_rate": 3.82975871313673e-05, + "loss": 0.2057, + "step": 923 + }, + { + "epoch": 2.44, + "learning_rate": 3.8284182305630025e-05, + "loss": 0.0987, + "step": 924 + }, + { + "epoch": 2.45, + "learning_rate": 3.827077747989276e-05, + "loss": 0.4611, + "step": 925 + }, + { + "epoch": 2.45, + "learning_rate": 3.8257372654155496e-05, + "loss": 0.0152, + "step": 926 + }, + { + "epoch": 2.45, + "learning_rate": 3.824396782841823e-05, + "loss": 0.4478, + "step": 927 + }, + { + "epoch": 2.46, + "learning_rate": 3.8230563002680966e-05, + "loss": 0.0579, + "step": 928 + }, + { + "epoch": 2.46, + "learning_rate": 3.82171581769437e-05, + "loss": 0.1506, + "step": 929 + }, + { + "epoch": 2.46, + "learning_rate": 3.8203753351206436e-05, + "loss": 0.5514, + "step": 930 + }, + { + "epoch": 2.46, + "learning_rate": 3.819034852546917e-05, + "loss": 0.1505, + "step": 931 + }, + { + "epoch": 2.47, + "learning_rate": 3.8176943699731906e-05, + "loss": 0.1413, + "step": 932 + }, + { + "epoch": 2.47, + "learning_rate": 3.816353887399464e-05, + "loss": 0.4758, + "step": 933 + }, + { + "epoch": 2.47, + "learning_rate": 3.815013404825738e-05, + "loss": 0.0863, + "step": 934 + }, + { + "epoch": 2.47, + "learning_rate": 3.8136729222520105e-05, + "loss": 0.3561, + "step": 935 + }, + { + "epoch": 2.48, + "learning_rate": 3.812332439678285e-05, + "loss": 0.7784, + "step": 936 + }, + { + "epoch": 2.48, + "learning_rate": 3.8109919571045575e-05, + "loss": 0.2243, + "step": 937 + }, + { + "epoch": 2.48, + "learning_rate": 3.809651474530832e-05, + "loss": 0.1013, + "step": 938 + }, + { + "epoch": 2.48, + "learning_rate": 3.8083109919571046e-05, + "loss": 0.132, + "step": 939 + }, + { + "epoch": 2.49, + "learning_rate": 3.806970509383378e-05, + "loss": 0.3971, + "step": 940 + }, + { + "epoch": 2.49, + "learning_rate": 3.8056300268096516e-05, + "loss": 0.0637, + "step": 941 + }, + { + "epoch": 2.49, + "learning_rate": 3.804289544235925e-05, + "loss": 0.5178, + "step": 942 + }, + { + "epoch": 2.49, + "learning_rate": 3.8029490616621986e-05, + "loss": 0.1982, + "step": 943 + }, + { + "epoch": 2.5, + "learning_rate": 3.801608579088472e-05, + "loss": 0.2225, + "step": 944 + }, + { + "epoch": 2.5, + "learning_rate": 3.800268096514745e-05, + "loss": 0.1425, + "step": 945 + }, + { + "epoch": 2.5, + "learning_rate": 3.798927613941019e-05, + "loss": 0.0621, + "step": 946 + }, + { + "epoch": 2.51, + "learning_rate": 3.797587131367292e-05, + "loss": 0.1556, + "step": 947 + }, + { + "epoch": 2.51, + "learning_rate": 3.796246648793566e-05, + "loss": 0.3805, + "step": 948 + }, + { + "epoch": 2.51, + "learning_rate": 3.794906166219839e-05, + "loss": 0.7049, + "step": 949 + }, + { + "epoch": 2.51, + "learning_rate": 3.7935656836461125e-05, + "loss": 0.1055, + "step": 950 + }, + { + "epoch": 2.52, + "learning_rate": 3.792225201072386e-05, + "loss": 0.0489, + "step": 951 + }, + { + "epoch": 2.52, + "learning_rate": 3.7908847184986596e-05, + "loss": 0.1881, + "step": 952 + }, + { + "epoch": 2.52, + "learning_rate": 3.789544235924933e-05, + "loss": 0.0202, + "step": 953 + }, + { + "epoch": 2.52, + "learning_rate": 3.7882037533512066e-05, + "loss": 0.1043, + "step": 954 + }, + { + "epoch": 2.53, + "learning_rate": 3.78686327077748e-05, + "loss": 0.2093, + "step": 955 + }, + { + "epoch": 2.53, + "learning_rate": 3.7855227882037536e-05, + "loss": 0.0395, + "step": 956 + }, + { + "epoch": 2.53, + "learning_rate": 3.784182305630027e-05, + "loss": 0.1459, + "step": 957 + }, + { + "epoch": 2.53, + "learning_rate": 3.7828418230563006e-05, + "loss": 0.0338, + "step": 958 + }, + { + "epoch": 2.54, + "learning_rate": 3.781501340482574e-05, + "loss": 0.4741, + "step": 959 + }, + { + "epoch": 2.54, + "learning_rate": 3.780160857908847e-05, + "loss": 0.2049, + "step": 960 + }, + { + "epoch": 2.54, + "learning_rate": 3.778820375335121e-05, + "loss": 0.309, + "step": 961 + }, + { + "epoch": 2.54, + "learning_rate": 3.777479892761394e-05, + "loss": 0.0253, + "step": 962 + }, + { + "epoch": 2.55, + "learning_rate": 3.776139410187668e-05, + "loss": 0.4832, + "step": 963 + }, + { + "epoch": 2.55, + "learning_rate": 3.774798927613941e-05, + "loss": 0.2111, + "step": 964 + }, + { + "epoch": 2.55, + "learning_rate": 3.7734584450402145e-05, + "loss": 0.1788, + "step": 965 + }, + { + "epoch": 2.56, + "learning_rate": 3.772117962466488e-05, + "loss": 0.5252, + "step": 966 + }, + { + "epoch": 2.56, + "learning_rate": 3.7707774798927616e-05, + "loss": 0.4711, + "step": 967 + }, + { + "epoch": 2.56, + "learning_rate": 3.769436997319035e-05, + "loss": 0.5184, + "step": 968 + }, + { + "epoch": 2.56, + "learning_rate": 3.7680965147453086e-05, + "loss": 0.2164, + "step": 969 + }, + { + "epoch": 2.57, + "learning_rate": 3.7667560321715814e-05, + "loss": 0.5393, + "step": 970 + }, + { + "epoch": 2.57, + "learning_rate": 3.7654155495978556e-05, + "loss": 0.2588, + "step": 971 + }, + { + "epoch": 2.57, + "learning_rate": 3.7640750670241285e-05, + "loss": 0.164, + "step": 972 + }, + { + "epoch": 2.57, + "learning_rate": 3.7627345844504027e-05, + "loss": 0.2896, + "step": 973 + }, + { + "epoch": 2.58, + "learning_rate": 3.7613941018766755e-05, + "loss": 0.039, + "step": 974 + }, + { + "epoch": 2.58, + "learning_rate": 3.760053619302949e-05, + "loss": 0.16, + "step": 975 + }, + { + "epoch": 2.58, + "learning_rate": 3.7587131367292225e-05, + "loss": 0.1832, + "step": 976 + }, + { + "epoch": 2.58, + "learning_rate": 3.757372654155496e-05, + "loss": 0.0812, + "step": 977 + }, + { + "epoch": 2.59, + "learning_rate": 3.7560321715817695e-05, + "loss": 0.1476, + "step": 978 + }, + { + "epoch": 2.59, + "learning_rate": 3.754691689008043e-05, + "loss": 0.1853, + "step": 979 + }, + { + "epoch": 2.59, + "learning_rate": 3.7533512064343166e-05, + "loss": 0.2875, + "step": 980 + }, + { + "epoch": 2.6, + "learning_rate": 3.75201072386059e-05, + "loss": 0.1918, + "step": 981 + }, + { + "epoch": 2.6, + "learning_rate": 3.7506702412868636e-05, + "loss": 0.2445, + "step": 982 + }, + { + "epoch": 2.6, + "learning_rate": 3.749329758713137e-05, + "loss": 0.4653, + "step": 983 + }, + { + "epoch": 2.6, + "learning_rate": 3.7479892761394106e-05, + "loss": 0.0614, + "step": 984 + }, + { + "epoch": 2.61, + "learning_rate": 3.746648793565684e-05, + "loss": 0.2818, + "step": 985 + }, + { + "epoch": 2.61, + "learning_rate": 3.7453083109919576e-05, + "loss": 0.1363, + "step": 986 + }, + { + "epoch": 2.61, + "learning_rate": 3.7439678284182305e-05, + "loss": 0.3244, + "step": 987 + }, + { + "epoch": 2.61, + "learning_rate": 3.742627345844505e-05, + "loss": 0.081, + "step": 988 + }, + { + "epoch": 2.62, + "learning_rate": 3.7412868632707775e-05, + "loss": 0.0488, + "step": 989 + }, + { + "epoch": 2.62, + "learning_rate": 3.739946380697052e-05, + "loss": 0.2057, + "step": 990 + }, + { + "epoch": 2.62, + "learning_rate": 3.7386058981233245e-05, + "loss": 0.2598, + "step": 991 + }, + { + "epoch": 2.62, + "learning_rate": 3.737265415549598e-05, + "loss": 0.1318, + "step": 992 + }, + { + "epoch": 2.63, + "learning_rate": 3.7359249329758716e-05, + "loss": 0.0482, + "step": 993 + }, + { + "epoch": 2.63, + "learning_rate": 3.734584450402145e-05, + "loss": 0.2586, + "step": 994 + }, + { + "epoch": 2.63, + "learning_rate": 3.7332439678284186e-05, + "loss": 0.2533, + "step": 995 + }, + { + "epoch": 2.63, + "learning_rate": 3.731903485254692e-05, + "loss": 0.5292, + "step": 996 + }, + { + "epoch": 2.64, + "learning_rate": 3.730563002680965e-05, + "loss": 0.1677, + "step": 997 + }, + { + "epoch": 2.64, + "learning_rate": 3.729222520107239e-05, + "loss": 0.1869, + "step": 998 + }, + { + "epoch": 2.64, + "learning_rate": 3.727882037533512e-05, + "loss": 0.0645, + "step": 999 + }, + { + "epoch": 2.65, + "learning_rate": 3.726541554959786e-05, + "loss": 0.4107, + "step": 1000 + }, + { + "epoch": 2.65, + "learning_rate": 3.725201072386059e-05, + "loss": 0.0484, + "step": 1001 + }, + { + "epoch": 2.65, + "learning_rate": 3.7238605898123325e-05, + "loss": 0.0813, + "step": 1002 + }, + { + "epoch": 2.65, + "learning_rate": 3.722520107238606e-05, + "loss": 0.2467, + "step": 1003 + }, + { + "epoch": 2.66, + "learning_rate": 3.7211796246648795e-05, + "loss": 0.0324, + "step": 1004 + }, + { + "epoch": 2.66, + "learning_rate": 3.719839142091153e-05, + "loss": 0.0536, + "step": 1005 + }, + { + "epoch": 2.66, + "learning_rate": 3.7184986595174266e-05, + "loss": 0.0399, + "step": 1006 + }, + { + "epoch": 2.66, + "learning_rate": 3.7171581769436994e-05, + "loss": 0.0257, + "step": 1007 + }, + { + "epoch": 2.67, + "learning_rate": 3.7158176943699736e-05, + "loss": 0.0407, + "step": 1008 + }, + { + "epoch": 2.67, + "learning_rate": 3.7144772117962464e-05, + "loss": 0.0173, + "step": 1009 + }, + { + "epoch": 2.67, + "learning_rate": 3.7131367292225206e-05, + "loss": 0.0166, + "step": 1010 + }, + { + "epoch": 2.67, + "learning_rate": 3.7117962466487934e-05, + "loss": 0.1898, + "step": 1011 + }, + { + "epoch": 2.68, + "learning_rate": 3.710455764075067e-05, + "loss": 0.0525, + "step": 1012 + }, + { + "epoch": 2.68, + "learning_rate": 3.7091152815013405e-05, + "loss": 0.043, + "step": 1013 + }, + { + "epoch": 2.68, + "learning_rate": 3.707774798927614e-05, + "loss": 0.3994, + "step": 1014 + }, + { + "epoch": 2.69, + "learning_rate": 3.7064343163538875e-05, + "loss": 0.0372, + "step": 1015 + }, + { + "epoch": 2.69, + "learning_rate": 3.705093833780161e-05, + "loss": 0.2909, + "step": 1016 + }, + { + "epoch": 2.69, + "learning_rate": 3.7037533512064345e-05, + "loss": 0.8221, + "step": 1017 + }, + { + "epoch": 2.69, + "learning_rate": 3.702412868632708e-05, + "loss": 0.0084, + "step": 1018 + }, + { + "epoch": 2.7, + "learning_rate": 3.7010723860589815e-05, + "loss": 0.3058, + "step": 1019 + }, + { + "epoch": 2.7, + "learning_rate": 3.699731903485255e-05, + "loss": 1.0774, + "step": 1020 + }, + { + "epoch": 2.7, + "learning_rate": 3.6983914209115286e-05, + "loss": 0.2018, + "step": 1021 + }, + { + "epoch": 2.7, + "learning_rate": 3.6970509383378014e-05, + "loss": 0.0537, + "step": 1022 + }, + { + "epoch": 2.71, + "learning_rate": 3.6957104557640756e-05, + "loss": 0.444, + "step": 1023 + }, + { + "epoch": 2.71, + "learning_rate": 3.6943699731903484e-05, + "loss": 0.6497, + "step": 1024 + }, + { + "epoch": 2.71, + "learning_rate": 3.6930294906166226e-05, + "loss": 0.0097, + "step": 1025 + }, + { + "epoch": 2.71, + "learning_rate": 3.6916890080428955e-05, + "loss": 0.0082, + "step": 1026 + }, + { + "epoch": 2.72, + "learning_rate": 3.690348525469169e-05, + "loss": 0.0387, + "step": 1027 + }, + { + "epoch": 2.72, + "learning_rate": 3.6890080428954425e-05, + "loss": 0.3969, + "step": 1028 + }, + { + "epoch": 2.72, + "learning_rate": 3.687667560321716e-05, + "loss": 0.0136, + "step": 1029 + }, + { + "epoch": 2.72, + "learning_rate": 3.6863270777479895e-05, + "loss": 0.0099, + "step": 1030 + }, + { + "epoch": 2.73, + "learning_rate": 3.684986595174263e-05, + "loss": 0.3509, + "step": 1031 + }, + { + "epoch": 2.73, + "learning_rate": 3.683646112600536e-05, + "loss": 0.0257, + "step": 1032 + }, + { + "epoch": 2.73, + "learning_rate": 3.68230563002681e-05, + "loss": 0.3158, + "step": 1033 + }, + { + "epoch": 2.74, + "learning_rate": 3.680965147453083e-05, + "loss": 0.0677, + "step": 1034 + }, + { + "epoch": 2.74, + "learning_rate": 3.679624664879357e-05, + "loss": 0.3887, + "step": 1035 + }, + { + "epoch": 2.74, + "learning_rate": 3.67828418230563e-05, + "loss": 0.0714, + "step": 1036 + }, + { + "epoch": 2.74, + "learning_rate": 3.6769436997319034e-05, + "loss": 0.1066, + "step": 1037 + }, + { + "epoch": 2.75, + "learning_rate": 3.675603217158177e-05, + "loss": 0.6238, + "step": 1038 + }, + { + "epoch": 2.75, + "learning_rate": 3.6742627345844504e-05, + "loss": 0.0405, + "step": 1039 + }, + { + "epoch": 2.75, + "learning_rate": 3.672922252010724e-05, + "loss": 0.0223, + "step": 1040 + }, + { + "epoch": 2.75, + "learning_rate": 3.6715817694369975e-05, + "loss": 0.2737, + "step": 1041 + }, + { + "epoch": 2.76, + "learning_rate": 3.670241286863271e-05, + "loss": 0.015, + "step": 1042 + }, + { + "epoch": 2.76, + "learning_rate": 3.6689008042895445e-05, + "loss": 0.1709, + "step": 1043 + }, + { + "epoch": 2.76, + "learning_rate": 3.667560321715818e-05, + "loss": 0.2649, + "step": 1044 + }, + { + "epoch": 2.76, + "learning_rate": 3.6662198391420915e-05, + "loss": 0.1524, + "step": 1045 + }, + { + "epoch": 2.77, + "learning_rate": 3.664879356568365e-05, + "loss": 0.2461, + "step": 1046 + }, + { + "epoch": 2.77, + "learning_rate": 3.663538873994638e-05, + "loss": 0.3425, + "step": 1047 + }, + { + "epoch": 2.77, + "learning_rate": 3.662198391420912e-05, + "loss": 0.2689, + "step": 1048 + }, + { + "epoch": 2.78, + "learning_rate": 3.660857908847185e-05, + "loss": 0.0066, + "step": 1049 + }, + { + "epoch": 2.78, + "learning_rate": 3.659517426273459e-05, + "loss": 0.0328, + "step": 1050 + }, + { + "epoch": 2.78, + "learning_rate": 3.658176943699732e-05, + "loss": 0.1273, + "step": 1051 + }, + { + "epoch": 2.78, + "learning_rate": 3.6568364611260054e-05, + "loss": 0.2346, + "step": 1052 + }, + { + "epoch": 2.79, + "learning_rate": 3.655495978552279e-05, + "loss": 0.0118, + "step": 1053 + }, + { + "epoch": 2.79, + "learning_rate": 3.6541554959785525e-05, + "loss": 0.0287, + "step": 1054 + }, + { + "epoch": 2.79, + "learning_rate": 3.652815013404826e-05, + "loss": 0.264, + "step": 1055 + }, + { + "epoch": 2.79, + "learning_rate": 3.6514745308310995e-05, + "loss": 0.0216, + "step": 1056 + }, + { + "epoch": 2.8, + "learning_rate": 3.650134048257372e-05, + "loss": 0.0261, + "step": 1057 + }, + { + "epoch": 2.8, + "learning_rate": 3.6487935656836465e-05, + "loss": 0.1911, + "step": 1058 + }, + { + "epoch": 2.8, + "learning_rate": 3.6474530831099194e-05, + "loss": 0.029, + "step": 1059 + }, + { + "epoch": 2.8, + "learning_rate": 3.6461126005361935e-05, + "loss": 0.0393, + "step": 1060 + }, + { + "epoch": 2.81, + "learning_rate": 3.6447721179624664e-05, + "loss": 0.1044, + "step": 1061 + }, + { + "epoch": 2.81, + "learning_rate": 3.64343163538874e-05, + "loss": 0.6364, + "step": 1062 + }, + { + "epoch": 2.81, + "learning_rate": 3.6420911528150134e-05, + "loss": 0.3589, + "step": 1063 + }, + { + "epoch": 2.81, + "learning_rate": 3.640750670241287e-05, + "loss": 0.1128, + "step": 1064 + }, + { + "epoch": 2.82, + "learning_rate": 3.6394101876675604e-05, + "loss": 0.2219, + "step": 1065 + }, + { + "epoch": 2.82, + "learning_rate": 3.638069705093834e-05, + "loss": 0.211, + "step": 1066 + }, + { + "epoch": 2.82, + "learning_rate": 3.6367292225201075e-05, + "loss": 0.387, + "step": 1067 + }, + { + "epoch": 2.83, + "learning_rate": 3.635388739946381e-05, + "loss": 0.0725, + "step": 1068 + }, + { + "epoch": 2.83, + "learning_rate": 3.6340482573726545e-05, + "loss": 0.0268, + "step": 1069 + }, + { + "epoch": 2.83, + "learning_rate": 3.632707774798928e-05, + "loss": 0.516, + "step": 1070 + }, + { + "epoch": 2.83, + "learning_rate": 3.6313672922252015e-05, + "loss": 0.0746, + "step": 1071 + }, + { + "epoch": 2.84, + "learning_rate": 3.6300268096514743e-05, + "loss": 0.2486, + "step": 1072 + }, + { + "epoch": 2.84, + "learning_rate": 3.6286863270777485e-05, + "loss": 0.1584, + "step": 1073 + }, + { + "epoch": 2.84, + "learning_rate": 3.6273458445040214e-05, + "loss": 0.1301, + "step": 1074 + }, + { + "epoch": 2.84, + "learning_rate": 3.6260053619302956e-05, + "loss": 0.0122, + "step": 1075 + }, + { + "epoch": 2.85, + "learning_rate": 3.6246648793565684e-05, + "loss": 0.0215, + "step": 1076 + }, + { + "epoch": 2.85, + "learning_rate": 3.623324396782842e-05, + "loss": 0.2068, + "step": 1077 + }, + { + "epoch": 2.85, + "learning_rate": 3.6219839142091154e-05, + "loss": 0.1882, + "step": 1078 + }, + { + "epoch": 2.85, + "learning_rate": 3.620643431635389e-05, + "loss": 0.368, + "step": 1079 + }, + { + "epoch": 2.86, + "learning_rate": 3.6193029490616625e-05, + "loss": 0.7537, + "step": 1080 + }, + { + "epoch": 2.86, + "learning_rate": 3.617962466487936e-05, + "loss": 0.1614, + "step": 1081 + }, + { + "epoch": 2.86, + "learning_rate": 3.616621983914209e-05, + "loss": 0.0585, + "step": 1082 + }, + { + "epoch": 2.87, + "learning_rate": 3.615281501340483e-05, + "loss": 0.0535, + "step": 1083 + }, + { + "epoch": 2.87, + "learning_rate": 3.613941018766756e-05, + "loss": 0.6518, + "step": 1084 + }, + { + "epoch": 2.87, + "learning_rate": 3.61260053619303e-05, + "loss": 0.4885, + "step": 1085 + }, + { + "epoch": 2.87, + "learning_rate": 3.611260053619303e-05, + "loss": 0.4078, + "step": 1086 + }, + { + "epoch": 2.88, + "learning_rate": 3.6099195710455764e-05, + "loss": 0.2101, + "step": 1087 + }, + { + "epoch": 2.88, + "learning_rate": 3.60857908847185e-05, + "loss": 0.0192, + "step": 1088 + }, + { + "epoch": 2.88, + "learning_rate": 3.6072386058981234e-05, + "loss": 0.3885, + "step": 1089 + }, + { + "epoch": 2.88, + "learning_rate": 3.605898123324397e-05, + "loss": 0.0393, + "step": 1090 + }, + { + "epoch": 2.89, + "learning_rate": 3.6045576407506704e-05, + "loss": 0.2179, + "step": 1091 + }, + { + "epoch": 2.89, + "learning_rate": 3.603217158176944e-05, + "loss": 0.1814, + "step": 1092 + }, + { + "epoch": 2.89, + "learning_rate": 3.6018766756032174e-05, + "loss": 0.0647, + "step": 1093 + }, + { + "epoch": 2.89, + "learning_rate": 3.600536193029491e-05, + "loss": 0.0657, + "step": 1094 + }, + { + "epoch": 2.9, + "learning_rate": 3.5991957104557645e-05, + "loss": 0.1062, + "step": 1095 + }, + { + "epoch": 2.9, + "learning_rate": 3.597855227882038e-05, + "loss": 0.4314, + "step": 1096 + }, + { + "epoch": 2.9, + "learning_rate": 3.596514745308311e-05, + "loss": 0.3074, + "step": 1097 + }, + { + "epoch": 2.9, + "learning_rate": 3.595174262734585e-05, + "loss": 0.0159, + "step": 1098 + }, + { + "epoch": 2.91, + "learning_rate": 3.593833780160858e-05, + "loss": 0.3829, + "step": 1099 + }, + { + "epoch": 2.91, + "learning_rate": 3.592493297587132e-05, + "loss": 0.3277, + "step": 1100 + }, + { + "epoch": 2.91, + "learning_rate": 3.591152815013405e-05, + "loss": 0.3785, + "step": 1101 + }, + { + "epoch": 2.92, + "learning_rate": 3.5898123324396784e-05, + "loss": 0.0162, + "step": 1102 + }, + { + "epoch": 2.92, + "learning_rate": 3.588471849865952e-05, + "loss": 0.0772, + "step": 1103 + }, + { + "epoch": 2.92, + "learning_rate": 3.5871313672922254e-05, + "loss": 0.0292, + "step": 1104 + }, + { + "epoch": 2.92, + "learning_rate": 3.585790884718499e-05, + "loss": 0.0748, + "step": 1105 + }, + { + "epoch": 2.93, + "learning_rate": 3.5844504021447724e-05, + "loss": 0.2276, + "step": 1106 + }, + { + "epoch": 2.93, + "learning_rate": 3.583109919571046e-05, + "loss": 0.3174, + "step": 1107 + }, + { + "epoch": 2.93, + "learning_rate": 3.5817694369973195e-05, + "loss": 0.134, + "step": 1108 + }, + { + "epoch": 2.93, + "learning_rate": 3.580428954423592e-05, + "loss": 0.3488, + "step": 1109 + }, + { + "epoch": 2.94, + "learning_rate": 3.5790884718498665e-05, + "loss": 0.1003, + "step": 1110 + }, + { + "epoch": 2.94, + "learning_rate": 3.577747989276139e-05, + "loss": 0.1972, + "step": 1111 + }, + { + "epoch": 2.94, + "learning_rate": 3.5764075067024135e-05, + "loss": 0.2151, + "step": 1112 + }, + { + "epoch": 2.94, + "learning_rate": 3.5750670241286863e-05, + "loss": 0.3725, + "step": 1113 + }, + { + "epoch": 2.95, + "learning_rate": 3.57372654155496e-05, + "loss": 0.0227, + "step": 1114 + }, + { + "epoch": 2.95, + "learning_rate": 3.5723860589812334e-05, + "loss": 0.2063, + "step": 1115 + }, + { + "epoch": 2.95, + "learning_rate": 3.571045576407507e-05, + "loss": 0.0316, + "step": 1116 + }, + { + "epoch": 2.96, + "learning_rate": 3.5697050938337804e-05, + "loss": 0.1545, + "step": 1117 + }, + { + "epoch": 2.96, + "learning_rate": 3.568364611260054e-05, + "loss": 0.4582, + "step": 1118 + }, + { + "epoch": 2.96, + "learning_rate": 3.567024128686327e-05, + "loss": 0.5452, + "step": 1119 + }, + { + "epoch": 2.96, + "learning_rate": 3.565683646112601e-05, + "loss": 0.2009, + "step": 1120 + }, + { + "epoch": 2.97, + "learning_rate": 3.564343163538874e-05, + "loss": 0.1201, + "step": 1121 + }, + { + "epoch": 2.97, + "learning_rate": 3.563002680965148e-05, + "loss": 0.5343, + "step": 1122 + }, + { + "epoch": 2.97, + "learning_rate": 3.561662198391421e-05, + "loss": 0.3925, + "step": 1123 + }, + { + "epoch": 2.97, + "learning_rate": 3.560321715817694e-05, + "loss": 0.0375, + "step": 1124 + }, + { + "epoch": 2.98, + "learning_rate": 3.558981233243968e-05, + "loss": 0.0411, + "step": 1125 + }, + { + "epoch": 2.98, + "learning_rate": 3.5576407506702413e-05, + "loss": 0.0338, + "step": 1126 + }, + { + "epoch": 2.98, + "learning_rate": 3.556300268096515e-05, + "loss": 0.035, + "step": 1127 + }, + { + "epoch": 2.98, + "learning_rate": 3.5549597855227884e-05, + "loss": 0.0283, + "step": 1128 + }, + { + "epoch": 2.99, + "learning_rate": 3.553619302949062e-05, + "loss": 0.034, + "step": 1129 + }, + { + "epoch": 2.99, + "learning_rate": 3.5522788203753354e-05, + "loss": 0.518, + "step": 1130 + }, + { + "epoch": 2.99, + "learning_rate": 3.550938337801609e-05, + "loss": 0.0241, + "step": 1131 + }, + { + "epoch": 2.99, + "learning_rate": 3.5495978552278824e-05, + "loss": 0.3143, + "step": 1132 + }, + { + "epoch": 3.0, + "learning_rate": 3.548257372654156e-05, + "loss": 0.8011, + "step": 1133 + }, + { + "epoch": 3.0, + "learning_rate": 3.546916890080429e-05, + "loss": 0.4365, + "step": 1134 + }, + { + "epoch": 3.0, + "eval_f1": 0.7867219917012448, + "eval_loss": 0.613310694694519, + "eval_runtime": 1.9007, + "eval_samples_per_second": 796.041, + "eval_steps_per_second": 49.983, + "step": 1134 + }, + { + "epoch": 3.0, + "learning_rate": 3.545576407506703e-05, + "loss": 0.0111, + "step": 1135 + }, + { + "epoch": 3.01, + "learning_rate": 3.544235924932976e-05, + "loss": 0.0166, + "step": 1136 + }, + { + "epoch": 3.01, + "learning_rate": 3.54289544235925e-05, + "loss": 0.2159, + "step": 1137 + }, + { + "epoch": 3.01, + "learning_rate": 3.541554959785523e-05, + "loss": 0.0096, + "step": 1138 + }, + { + "epoch": 3.01, + "learning_rate": 3.540214477211796e-05, + "loss": 0.1352, + "step": 1139 + }, + { + "epoch": 3.02, + "learning_rate": 3.53887399463807e-05, + "loss": 0.0195, + "step": 1140 + }, + { + "epoch": 3.02, + "learning_rate": 3.5375335120643434e-05, + "loss": 0.1579, + "step": 1141 + }, + { + "epoch": 3.02, + "learning_rate": 3.536193029490617e-05, + "loss": 0.0078, + "step": 1142 + }, + { + "epoch": 3.02, + "learning_rate": 3.5348525469168904e-05, + "loss": 0.0111, + "step": 1143 + }, + { + "epoch": 3.03, + "learning_rate": 3.533512064343163e-05, + "loss": 0.2457, + "step": 1144 + }, + { + "epoch": 3.03, + "learning_rate": 3.5321715817694374e-05, + "loss": 0.014, + "step": 1145 + }, + { + "epoch": 3.03, + "learning_rate": 3.53083109919571e-05, + "loss": 0.2021, + "step": 1146 + }, + { + "epoch": 3.03, + "learning_rate": 3.5294906166219844e-05, + "loss": 0.5334, + "step": 1147 + }, + { + "epoch": 3.04, + "learning_rate": 3.528150134048257e-05, + "loss": 0.0116, + "step": 1148 + }, + { + "epoch": 3.04, + "learning_rate": 3.526809651474531e-05, + "loss": 0.0099, + "step": 1149 + }, + { + "epoch": 3.04, + "learning_rate": 3.525469168900804e-05, + "loss": 0.2102, + "step": 1150 + }, + { + "epoch": 3.04, + "learning_rate": 3.524128686327078e-05, + "loss": 0.0093, + "step": 1151 + }, + { + "epoch": 3.05, + "learning_rate": 3.522788203753351e-05, + "loss": 0.0112, + "step": 1152 + }, + { + "epoch": 3.05, + "learning_rate": 3.521447721179625e-05, + "loss": 0.1761, + "step": 1153 + }, + { + "epoch": 3.05, + "learning_rate": 3.5201072386058984e-05, + "loss": 0.1608, + "step": 1154 + }, + { + "epoch": 3.06, + "learning_rate": 3.518766756032172e-05, + "loss": 0.2883, + "step": 1155 + }, + { + "epoch": 3.06, + "learning_rate": 3.5174262734584454e-05, + "loss": 0.0304, + "step": 1156 + }, + { + "epoch": 3.06, + "learning_rate": 3.516085790884719e-05, + "loss": 0.0623, + "step": 1157 + }, + { + "epoch": 3.06, + "learning_rate": 3.5147453083109924e-05, + "loss": 0.1824, + "step": 1158 + }, + { + "epoch": 3.07, + "learning_rate": 3.513404825737265e-05, + "loss": 0.2527, + "step": 1159 + }, + { + "epoch": 3.07, + "learning_rate": 3.5120643431635394e-05, + "loss": 0.0877, + "step": 1160 + }, + { + "epoch": 3.07, + "learning_rate": 3.510723860589812e-05, + "loss": 0.2735, + "step": 1161 + }, + { + "epoch": 3.07, + "learning_rate": 3.5093833780160865e-05, + "loss": 0.1126, + "step": 1162 + }, + { + "epoch": 3.08, + "learning_rate": 3.508042895442359e-05, + "loss": 0.2498, + "step": 1163 + }, + { + "epoch": 3.08, + "learning_rate": 3.506702412868633e-05, + "loss": 0.022, + "step": 1164 + }, + { + "epoch": 3.08, + "learning_rate": 3.505361930294906e-05, + "loss": 0.2768, + "step": 1165 + }, + { + "epoch": 3.08, + "learning_rate": 3.50402144772118e-05, + "loss": 0.0429, + "step": 1166 + }, + { + "epoch": 3.09, + "learning_rate": 3.5026809651474533e-05, + "loss": 0.0198, + "step": 1167 + }, + { + "epoch": 3.09, + "learning_rate": 3.501340482573727e-05, + "loss": 0.0097, + "step": 1168 + }, + { + "epoch": 3.09, + "learning_rate": 3.5e-05, + "loss": 0.0276, + "step": 1169 + }, + { + "epoch": 3.1, + "learning_rate": 3.498659517426274e-05, + "loss": 0.2276, + "step": 1170 + }, + { + "epoch": 3.1, + "learning_rate": 3.497319034852547e-05, + "loss": 0.0461, + "step": 1171 + }, + { + "epoch": 3.1, + "learning_rate": 3.495978552278821e-05, + "loss": 0.0103, + "step": 1172 + }, + { + "epoch": 3.1, + "learning_rate": 3.494638069705094e-05, + "loss": 0.1455, + "step": 1173 + }, + { + "epoch": 3.11, + "learning_rate": 3.493297587131367e-05, + "loss": 0.0865, + "step": 1174 + }, + { + "epoch": 3.11, + "learning_rate": 3.491957104557641e-05, + "loss": 0.3226, + "step": 1175 + }, + { + "epoch": 3.11, + "learning_rate": 3.490616621983914e-05, + "loss": 0.1744, + "step": 1176 + }, + { + "epoch": 3.11, + "learning_rate": 3.489276139410188e-05, + "loss": 0.0148, + "step": 1177 + }, + { + "epoch": 3.12, + "learning_rate": 3.487935656836461e-05, + "loss": 0.2582, + "step": 1178 + }, + { + "epoch": 3.12, + "learning_rate": 3.486595174262735e-05, + "loss": 0.2782, + "step": 1179 + }, + { + "epoch": 3.12, + "learning_rate": 3.485254691689008e-05, + "loss": 0.143, + "step": 1180 + }, + { + "epoch": 3.12, + "learning_rate": 3.483914209115282e-05, + "loss": 0.0853, + "step": 1181 + }, + { + "epoch": 3.13, + "learning_rate": 3.4825737265415554e-05, + "loss": 0.1361, + "step": 1182 + }, + { + "epoch": 3.13, + "learning_rate": 3.481233243967829e-05, + "loss": 0.0883, + "step": 1183 + }, + { + "epoch": 3.13, + "learning_rate": 3.479892761394102e-05, + "loss": 0.0116, + "step": 1184 + }, + { + "epoch": 3.13, + "learning_rate": 3.478552278820376e-05, + "loss": 0.0531, + "step": 1185 + }, + { + "epoch": 3.14, + "learning_rate": 3.477211796246649e-05, + "loss": 0.0184, + "step": 1186 + }, + { + "epoch": 3.14, + "learning_rate": 3.475871313672923e-05, + "loss": 0.1601, + "step": 1187 + }, + { + "epoch": 3.14, + "learning_rate": 3.474530831099196e-05, + "loss": 0.007, + "step": 1188 + }, + { + "epoch": 3.15, + "learning_rate": 3.473190348525469e-05, + "loss": 0.0101, + "step": 1189 + }, + { + "epoch": 3.15, + "learning_rate": 3.471849865951743e-05, + "loss": 0.2385, + "step": 1190 + }, + { + "epoch": 3.15, + "learning_rate": 3.470509383378016e-05, + "loss": 0.0075, + "step": 1191 + }, + { + "epoch": 3.15, + "learning_rate": 3.46916890080429e-05, + "loss": 0.0919, + "step": 1192 + }, + { + "epoch": 3.16, + "learning_rate": 3.467828418230563e-05, + "loss": 0.0162, + "step": 1193 + }, + { + "epoch": 3.16, + "learning_rate": 3.466487935656836e-05, + "loss": 0.2239, + "step": 1194 + }, + { + "epoch": 3.16, + "learning_rate": 3.4651474530831104e-05, + "loss": 0.5757, + "step": 1195 + }, + { + "epoch": 3.16, + "learning_rate": 3.463806970509383e-05, + "loss": 0.0774, + "step": 1196 + }, + { + "epoch": 3.17, + "learning_rate": 3.4624664879356574e-05, + "loss": 0.2124, + "step": 1197 + }, + { + "epoch": 3.17, + "learning_rate": 3.46112600536193e-05, + "loss": 0.0107, + "step": 1198 + }, + { + "epoch": 3.17, + "learning_rate": 3.459785522788204e-05, + "loss": 0.3179, + "step": 1199 + }, + { + "epoch": 3.17, + "learning_rate": 3.458445040214477e-05, + "loss": 0.0138, + "step": 1200 + }, + { + "epoch": 3.18, + "learning_rate": 3.457104557640751e-05, + "loss": 0.0094, + "step": 1201 + }, + { + "epoch": 3.18, + "learning_rate": 3.455764075067024e-05, + "loss": 0.0039, + "step": 1202 + }, + { + "epoch": 3.18, + "learning_rate": 3.454423592493298e-05, + "loss": 0.0745, + "step": 1203 + }, + { + "epoch": 3.19, + "learning_rate": 3.453083109919571e-05, + "loss": 0.0387, + "step": 1204 + }, + { + "epoch": 3.19, + "learning_rate": 3.451742627345845e-05, + "loss": 0.1378, + "step": 1205 + }, + { + "epoch": 3.19, + "learning_rate": 3.450402144772118e-05, + "loss": 0.1299, + "step": 1206 + }, + { + "epoch": 3.19, + "learning_rate": 3.449061662198392e-05, + "loss": 0.2821, + "step": 1207 + }, + { + "epoch": 3.2, + "learning_rate": 3.4477211796246653e-05, + "loss": 0.2236, + "step": 1208 + }, + { + "epoch": 3.2, + "learning_rate": 3.446380697050938e-05, + "loss": 0.1436, + "step": 1209 + }, + { + "epoch": 3.2, + "learning_rate": 3.4450402144772124e-05, + "loss": 0.1504, + "step": 1210 + }, + { + "epoch": 3.2, + "learning_rate": 3.443699731903485e-05, + "loss": 0.0415, + "step": 1211 + }, + { + "epoch": 3.21, + "learning_rate": 3.4423592493297594e-05, + "loss": 0.023, + "step": 1212 + }, + { + "epoch": 3.21, + "learning_rate": 3.441018766756032e-05, + "loss": 0.2128, + "step": 1213 + }, + { + "epoch": 3.21, + "learning_rate": 3.439678284182306e-05, + "loss": 0.0066, + "step": 1214 + }, + { + "epoch": 3.21, + "learning_rate": 3.438337801608579e-05, + "loss": 0.4345, + "step": 1215 + }, + { + "epoch": 3.22, + "learning_rate": 3.436997319034853e-05, + "loss": 0.0214, + "step": 1216 + }, + { + "epoch": 3.22, + "learning_rate": 3.435656836461126e-05, + "loss": 0.2094, + "step": 1217 + }, + { + "epoch": 3.22, + "learning_rate": 3.4343163538874e-05, + "loss": 0.0822, + "step": 1218 + }, + { + "epoch": 3.22, + "learning_rate": 3.4329758713136726e-05, + "loss": 0.1153, + "step": 1219 + }, + { + "epoch": 3.23, + "learning_rate": 3.431635388739947e-05, + "loss": 0.0059, + "step": 1220 + }, + { + "epoch": 3.23, + "learning_rate": 3.43029490616622e-05, + "loss": 0.0069, + "step": 1221 + }, + { + "epoch": 3.23, + "learning_rate": 3.428954423592494e-05, + "loss": 0.044, + "step": 1222 + }, + { + "epoch": 3.24, + "learning_rate": 3.427613941018767e-05, + "loss": 0.1975, + "step": 1223 + }, + { + "epoch": 3.24, + "learning_rate": 3.42627345844504e-05, + "loss": 0.3294, + "step": 1224 + }, + { + "epoch": 3.24, + "learning_rate": 3.424932975871314e-05, + "loss": 0.026, + "step": 1225 + }, + { + "epoch": 3.24, + "learning_rate": 3.423592493297587e-05, + "loss": 0.2666, + "step": 1226 + }, + { + "epoch": 3.25, + "learning_rate": 3.422252010723861e-05, + "loss": 0.0628, + "step": 1227 + }, + { + "epoch": 3.25, + "learning_rate": 3.420911528150134e-05, + "loss": 0.0068, + "step": 1228 + }, + { + "epoch": 3.25, + "learning_rate": 3.419571045576407e-05, + "loss": 0.0144, + "step": 1229 + }, + { + "epoch": 3.25, + "learning_rate": 3.418230563002681e-05, + "loss": 0.0029, + "step": 1230 + }, + { + "epoch": 3.26, + "learning_rate": 3.416890080428954e-05, + "loss": 0.606, + "step": 1231 + }, + { + "epoch": 3.26, + "learning_rate": 3.415549597855228e-05, + "loss": 0.2162, + "step": 1232 + }, + { + "epoch": 3.26, + "learning_rate": 3.414209115281501e-05, + "loss": 0.146, + "step": 1233 + }, + { + "epoch": 3.26, + "learning_rate": 3.412868632707775e-05, + "loss": 0.3649, + "step": 1234 + }, + { + "epoch": 3.27, + "learning_rate": 3.411528150134048e-05, + "loss": 0.0062, + "step": 1235 + }, + { + "epoch": 3.27, + "learning_rate": 3.410187667560322e-05, + "loss": 0.4097, + "step": 1236 + }, + { + "epoch": 3.27, + "learning_rate": 3.408847184986595e-05, + "loss": 0.5354, + "step": 1237 + }, + { + "epoch": 3.28, + "learning_rate": 3.407506702412869e-05, + "loss": 0.6222, + "step": 1238 + }, + { + "epoch": 3.28, + "learning_rate": 3.406166219839142e-05, + "loss": 0.0023, + "step": 1239 + }, + { + "epoch": 3.28, + "learning_rate": 3.404825737265416e-05, + "loss": 0.0247, + "step": 1240 + }, + { + "epoch": 3.28, + "learning_rate": 3.403485254691689e-05, + "loss": 0.0051, + "step": 1241 + }, + { + "epoch": 3.29, + "learning_rate": 3.402144772117963e-05, + "loss": 0.2504, + "step": 1242 + }, + { + "epoch": 3.29, + "learning_rate": 3.400804289544236e-05, + "loss": 0.0195, + "step": 1243 + }, + { + "epoch": 3.29, + "learning_rate": 3.39946380697051e-05, + "loss": 0.3706, + "step": 1244 + }, + { + "epoch": 3.29, + "learning_rate": 3.398123324396783e-05, + "loss": 0.0174, + "step": 1245 + }, + { + "epoch": 3.3, + "learning_rate": 3.396782841823056e-05, + "loss": 0.0068, + "step": 1246 + }, + { + "epoch": 3.3, + "learning_rate": 3.39544235924933e-05, + "loss": 0.3938, + "step": 1247 + }, + { + "epoch": 3.3, + "learning_rate": 3.394101876675603e-05, + "loss": 0.0114, + "step": 1248 + }, + { + "epoch": 3.3, + "learning_rate": 3.3927613941018774e-05, + "loss": 0.0088, + "step": 1249 + }, + { + "epoch": 3.31, + "learning_rate": 3.39142091152815e-05, + "loss": 0.0126, + "step": 1250 + }, + { + "epoch": 3.31, + "learning_rate": 3.390080428954424e-05, + "loss": 0.0091, + "step": 1251 + }, + { + "epoch": 3.31, + "learning_rate": 3.388739946380697e-05, + "loss": 0.0232, + "step": 1252 + }, + { + "epoch": 3.31, + "learning_rate": 3.387399463806971e-05, + "loss": 0.3704, + "step": 1253 + }, + { + "epoch": 3.32, + "learning_rate": 3.386058981233244e-05, + "loss": 0.0112, + "step": 1254 + }, + { + "epoch": 3.32, + "learning_rate": 3.384718498659518e-05, + "loss": 0.1709, + "step": 1255 + }, + { + "epoch": 3.32, + "learning_rate": 3.3833780160857906e-05, + "loss": 0.0109, + "step": 1256 + }, + { + "epoch": 3.33, + "learning_rate": 3.382037533512065e-05, + "loss": 0.2874, + "step": 1257 + }, + { + "epoch": 3.33, + "learning_rate": 3.3806970509383376e-05, + "loss": 0.024, + "step": 1258 + }, + { + "epoch": 3.33, + "learning_rate": 3.379356568364612e-05, + "loss": 0.0131, + "step": 1259 + }, + { + "epoch": 3.33, + "learning_rate": 3.3780160857908846e-05, + "loss": 0.2076, + "step": 1260 + }, + { + "epoch": 3.34, + "learning_rate": 3.376675603217158e-05, + "loss": 0.0083, + "step": 1261 + }, + { + "epoch": 3.34, + "learning_rate": 3.375335120643432e-05, + "loss": 0.0234, + "step": 1262 + }, + { + "epoch": 3.34, + "learning_rate": 3.373994638069705e-05, + "loss": 0.0066, + "step": 1263 + }, + { + "epoch": 3.34, + "learning_rate": 3.372654155495979e-05, + "loss": 0.3983, + "step": 1264 + }, + { + "epoch": 3.35, + "learning_rate": 3.371313672922252e-05, + "loss": 0.0648, + "step": 1265 + }, + { + "epoch": 3.35, + "learning_rate": 3.369973190348526e-05, + "loss": 0.006, + "step": 1266 + }, + { + "epoch": 3.35, + "learning_rate": 3.368632707774799e-05, + "loss": 0.0807, + "step": 1267 + }, + { + "epoch": 3.35, + "learning_rate": 3.367292225201073e-05, + "loss": 0.0975, + "step": 1268 + }, + { + "epoch": 3.36, + "learning_rate": 3.365951742627346e-05, + "loss": 0.2934, + "step": 1269 + }, + { + "epoch": 3.36, + "learning_rate": 3.36461126005362e-05, + "loss": 0.0869, + "step": 1270 + }, + { + "epoch": 3.36, + "learning_rate": 3.3632707774798926e-05, + "loss": 0.1374, + "step": 1271 + }, + { + "epoch": 3.37, + "learning_rate": 3.361930294906167e-05, + "loss": 0.3314, + "step": 1272 + }, + { + "epoch": 3.37, + "learning_rate": 3.3605898123324396e-05, + "loss": 0.0045, + "step": 1273 + }, + { + "epoch": 3.37, + "learning_rate": 3.359249329758714e-05, + "loss": 0.0536, + "step": 1274 + }, + { + "epoch": 3.37, + "learning_rate": 3.3579088471849867e-05, + "loss": 0.0564, + "step": 1275 + }, + { + "epoch": 3.38, + "learning_rate": 3.35656836461126e-05, + "loss": 0.0689, + "step": 1276 + }, + { + "epoch": 3.38, + "learning_rate": 3.355227882037534e-05, + "loss": 0.5177, + "step": 1277 + }, + { + "epoch": 3.38, + "learning_rate": 3.353887399463807e-05, + "loss": 0.0689, + "step": 1278 + }, + { + "epoch": 3.38, + "learning_rate": 3.352546916890081e-05, + "loss": 0.0664, + "step": 1279 + }, + { + "epoch": 3.39, + "learning_rate": 3.351206434316354e-05, + "loss": 0.0614, + "step": 1280 + }, + { + "epoch": 3.39, + "learning_rate": 3.349865951742627e-05, + "loss": 0.1994, + "step": 1281 + }, + { + "epoch": 3.39, + "learning_rate": 3.348525469168901e-05, + "loss": 0.4769, + "step": 1282 + }, + { + "epoch": 3.39, + "learning_rate": 3.347184986595174e-05, + "loss": 0.1851, + "step": 1283 + }, + { + "epoch": 3.4, + "learning_rate": 3.345844504021448e-05, + "loss": 0.0092, + "step": 1284 + }, + { + "epoch": 3.4, + "learning_rate": 3.344504021447721e-05, + "loss": 0.0052, + "step": 1285 + }, + { + "epoch": 3.4, + "learning_rate": 3.3431635388739946e-05, + "loss": 0.0095, + "step": 1286 + }, + { + "epoch": 3.4, + "learning_rate": 3.341823056300268e-05, + "loss": 0.0242, + "step": 1287 + }, + { + "epoch": 3.41, + "learning_rate": 3.3404825737265416e-05, + "loss": 0.0565, + "step": 1288 + }, + { + "epoch": 3.41, + "learning_rate": 3.339142091152815e-05, + "loss": 0.2645, + "step": 1289 + }, + { + "epoch": 3.41, + "learning_rate": 3.337801608579089e-05, + "loss": 0.0049, + "step": 1290 + }, + { + "epoch": 3.42, + "learning_rate": 3.336461126005362e-05, + "loss": 0.0929, + "step": 1291 + }, + { + "epoch": 3.42, + "learning_rate": 3.335120643431636e-05, + "loss": 0.3968, + "step": 1292 + }, + { + "epoch": 3.42, + "learning_rate": 3.333780160857909e-05, + "loss": 0.033, + "step": 1293 + }, + { + "epoch": 3.42, + "learning_rate": 3.332439678284183e-05, + "loss": 0.007, + "step": 1294 + }, + { + "epoch": 3.43, + "learning_rate": 3.331099195710456e-05, + "loss": 0.2552, + "step": 1295 + }, + { + "epoch": 3.43, + "learning_rate": 3.329758713136729e-05, + "loss": 0.004, + "step": 1296 + }, + { + "epoch": 3.43, + "learning_rate": 3.328418230563003e-05, + "loss": 0.136, + "step": 1297 + }, + { + "epoch": 3.43, + "learning_rate": 3.327077747989276e-05, + "loss": 0.1407, + "step": 1298 + }, + { + "epoch": 3.44, + "learning_rate": 3.32573726541555e-05, + "loss": 0.0354, + "step": 1299 + }, + { + "epoch": 3.44, + "learning_rate": 3.324396782841823e-05, + "loss": 0.6141, + "step": 1300 + }, + { + "epoch": 3.44, + "learning_rate": 3.3230563002680966e-05, + "loss": 0.2544, + "step": 1301 + }, + { + "epoch": 3.44, + "learning_rate": 3.32171581769437e-05, + "loss": 0.0046, + "step": 1302 + }, + { + "epoch": 3.45, + "learning_rate": 3.320375335120644e-05, + "loss": 0.0126, + "step": 1303 + }, + { + "epoch": 3.45, + "learning_rate": 3.319034852546917e-05, + "loss": 0.3506, + "step": 1304 + }, + { + "epoch": 3.45, + "learning_rate": 3.317694369973191e-05, + "loss": 0.3512, + "step": 1305 + }, + { + "epoch": 3.46, + "learning_rate": 3.3163538873994635e-05, + "loss": 0.3675, + "step": 1306 + }, + { + "epoch": 3.46, + "learning_rate": 3.315013404825738e-05, + "loss": 0.1676, + "step": 1307 + }, + { + "epoch": 3.46, + "learning_rate": 3.3136729222520106e-05, + "loss": 0.0307, + "step": 1308 + }, + { + "epoch": 3.46, + "learning_rate": 3.312332439678285e-05, + "loss": 0.0084, + "step": 1309 + }, + { + "epoch": 3.47, + "learning_rate": 3.3109919571045576e-05, + "loss": 0.1977, + "step": 1310 + }, + { + "epoch": 3.47, + "learning_rate": 3.309651474530831e-05, + "loss": 0.1645, + "step": 1311 + }, + { + "epoch": 3.47, + "learning_rate": 3.3083109919571046e-05, + "loss": 0.2579, + "step": 1312 + }, + { + "epoch": 3.47, + "learning_rate": 3.306970509383378e-05, + "loss": 0.1656, + "step": 1313 + }, + { + "epoch": 3.48, + "learning_rate": 3.3056300268096516e-05, + "loss": 0.0168, + "step": 1314 + }, + { + "epoch": 3.48, + "learning_rate": 3.304289544235925e-05, + "loss": 0.0291, + "step": 1315 + }, + { + "epoch": 3.48, + "learning_rate": 3.302949061662198e-05, + "loss": 0.0146, + "step": 1316 + }, + { + "epoch": 3.48, + "learning_rate": 3.301608579088472e-05, + "loss": 0.0037, + "step": 1317 + }, + { + "epoch": 3.49, + "learning_rate": 3.300268096514745e-05, + "loss": 0.0113, + "step": 1318 + }, + { + "epoch": 3.49, + "learning_rate": 3.298927613941019e-05, + "loss": 0.0734, + "step": 1319 + }, + { + "epoch": 3.49, + "learning_rate": 3.297587131367292e-05, + "loss": 0.0292, + "step": 1320 + }, + { + "epoch": 3.49, + "learning_rate": 3.2962466487935655e-05, + "loss": 0.3875, + "step": 1321 + }, + { + "epoch": 3.5, + "learning_rate": 3.294906166219839e-05, + "loss": 0.0138, + "step": 1322 + }, + { + "epoch": 3.5, + "learning_rate": 3.2935656836461126e-05, + "loss": 0.4653, + "step": 1323 + }, + { + "epoch": 3.5, + "learning_rate": 3.292225201072386e-05, + "loss": 0.1864, + "step": 1324 + }, + { + "epoch": 3.51, + "learning_rate": 3.2908847184986596e-05, + "loss": 0.0116, + "step": 1325 + }, + { + "epoch": 3.51, + "learning_rate": 3.289544235924933e-05, + "loss": 0.014, + "step": 1326 + }, + { + "epoch": 3.51, + "learning_rate": 3.2882037533512066e-05, + "loss": 0.3344, + "step": 1327 + }, + { + "epoch": 3.51, + "learning_rate": 3.28686327077748e-05, + "loss": 0.1544, + "step": 1328 + }, + { + "epoch": 3.52, + "learning_rate": 3.2855227882037537e-05, + "loss": 0.0065, + "step": 1329 + }, + { + "epoch": 3.52, + "learning_rate": 3.284182305630027e-05, + "loss": 0.0041, + "step": 1330 + }, + { + "epoch": 3.52, + "learning_rate": 3.2828418230563e-05, + "loss": 0.0044, + "step": 1331 + }, + { + "epoch": 3.52, + "learning_rate": 3.281501340482574e-05, + "loss": 0.1808, + "step": 1332 + }, + { + "epoch": 3.53, + "learning_rate": 3.280160857908847e-05, + "loss": 0.0521, + "step": 1333 + }, + { + "epoch": 3.53, + "learning_rate": 3.278820375335121e-05, + "loss": 0.3505, + "step": 1334 + }, + { + "epoch": 3.53, + "learning_rate": 3.277479892761394e-05, + "loss": 0.2032, + "step": 1335 + }, + { + "epoch": 3.53, + "learning_rate": 3.2761394101876676e-05, + "loss": 0.004, + "step": 1336 + }, + { + "epoch": 3.54, + "learning_rate": 3.274798927613941e-05, + "loss": 0.0343, + "step": 1337 + }, + { + "epoch": 3.54, + "learning_rate": 3.2734584450402146e-05, + "loss": 0.278, + "step": 1338 + }, + { + "epoch": 3.54, + "learning_rate": 3.272117962466488e-05, + "loss": 0.0056, + "step": 1339 + }, + { + "epoch": 3.54, + "learning_rate": 3.2707774798927616e-05, + "loss": 0.1673, + "step": 1340 + }, + { + "epoch": 3.55, + "learning_rate": 3.2694369973190345e-05, + "loss": 0.0092, + "step": 1341 + }, + { + "epoch": 3.55, + "learning_rate": 3.2680965147453086e-05, + "loss": 0.0058, + "step": 1342 + }, + { + "epoch": 3.55, + "learning_rate": 3.2667560321715815e-05, + "loss": 0.097, + "step": 1343 + }, + { + "epoch": 3.56, + "learning_rate": 3.265415549597856e-05, + "loss": 0.2138, + "step": 1344 + }, + { + "epoch": 3.56, + "learning_rate": 3.2640750670241285e-05, + "loss": 0.0077, + "step": 1345 + }, + { + "epoch": 3.56, + "learning_rate": 3.262734584450402e-05, + "loss": 0.2294, + "step": 1346 + }, + { + "epoch": 3.56, + "learning_rate": 3.2613941018766755e-05, + "loss": 0.3282, + "step": 1347 + }, + { + "epoch": 3.57, + "learning_rate": 3.260053619302949e-05, + "loss": 0.233, + "step": 1348 + }, + { + "epoch": 3.57, + "learning_rate": 3.2587131367292226e-05, + "loss": 0.0379, + "step": 1349 + }, + { + "epoch": 3.57, + "learning_rate": 3.257372654155496e-05, + "loss": 0.2168, + "step": 1350 + }, + { + "epoch": 3.57, + "learning_rate": 3.2560321715817696e-05, + "loss": 0.0443, + "step": 1351 + }, + { + "epoch": 3.58, + "learning_rate": 3.254691689008043e-05, + "loss": 0.2665, + "step": 1352 + }, + { + "epoch": 3.58, + "learning_rate": 3.2533512064343166e-05, + "loss": 0.0136, + "step": 1353 + }, + { + "epoch": 3.58, + "learning_rate": 3.25201072386059e-05, + "loss": 0.0035, + "step": 1354 + }, + { + "epoch": 3.58, + "learning_rate": 3.2506702412868636e-05, + "loss": 0.2153, + "step": 1355 + }, + { + "epoch": 3.59, + "learning_rate": 3.249329758713137e-05, + "loss": 0.088, + "step": 1356 + }, + { + "epoch": 3.59, + "learning_rate": 3.247989276139411e-05, + "loss": 0.0074, + "step": 1357 + }, + { + "epoch": 3.59, + "learning_rate": 3.2466487935656835e-05, + "loss": 0.0924, + "step": 1358 + }, + { + "epoch": 3.6, + "learning_rate": 3.245308310991958e-05, + "loss": 0.0171, + "step": 1359 + }, + { + "epoch": 3.6, + "learning_rate": 3.2439678284182305e-05, + "loss": 0.0132, + "step": 1360 + }, + { + "epoch": 3.6, + "learning_rate": 3.242627345844505e-05, + "loss": 0.0583, + "step": 1361 + }, + { + "epoch": 3.6, + "learning_rate": 3.2412868632707776e-05, + "loss": 0.0038, + "step": 1362 + }, + { + "epoch": 3.61, + "learning_rate": 3.239946380697051e-05, + "loss": 0.0846, + "step": 1363 + }, + { + "epoch": 3.61, + "learning_rate": 3.2386058981233246e-05, + "loss": 0.0058, + "step": 1364 + }, + { + "epoch": 3.61, + "learning_rate": 3.237265415549598e-05, + "loss": 0.4456, + "step": 1365 + }, + { + "epoch": 3.61, + "learning_rate": 3.2359249329758716e-05, + "loss": 0.0029, + "step": 1366 + }, + { + "epoch": 3.62, + "learning_rate": 3.234584450402145e-05, + "loss": 0.2553, + "step": 1367 + }, + { + "epoch": 3.62, + "learning_rate": 3.233243967828418e-05, + "loss": 0.0936, + "step": 1368 + }, + { + "epoch": 3.62, + "learning_rate": 3.231903485254692e-05, + "loss": 0.1017, + "step": 1369 + }, + { + "epoch": 3.62, + "learning_rate": 3.230563002680965e-05, + "loss": 0.0379, + "step": 1370 + }, + { + "epoch": 3.63, + "learning_rate": 3.229222520107239e-05, + "loss": 0.0069, + "step": 1371 + }, + { + "epoch": 3.63, + "learning_rate": 3.227882037533512e-05, + "loss": 0.3235, + "step": 1372 + }, + { + "epoch": 3.63, + "learning_rate": 3.2265415549597855e-05, + "loss": 0.3796, + "step": 1373 + }, + { + "epoch": 3.63, + "learning_rate": 3.225201072386059e-05, + "loss": 0.3246, + "step": 1374 + }, + { + "epoch": 3.64, + "learning_rate": 3.2238605898123325e-05, + "loss": 0.0059, + "step": 1375 + }, + { + "epoch": 3.64, + "learning_rate": 3.222520107238606e-05, + "loss": 0.0405, + "step": 1376 + }, + { + "epoch": 3.64, + "learning_rate": 3.2211796246648796e-05, + "loss": 0.0142, + "step": 1377 + }, + { + "epoch": 3.65, + "learning_rate": 3.219839142091153e-05, + "loss": 0.4426, + "step": 1378 + }, + { + "epoch": 3.65, + "learning_rate": 3.2184986595174266e-05, + "loss": 0.0249, + "step": 1379 + }, + { + "epoch": 3.65, + "learning_rate": 3.2171581769437e-05, + "loss": 0.1053, + "step": 1380 + }, + { + "epoch": 3.65, + "learning_rate": 3.2158176943699736e-05, + "loss": 0.0179, + "step": 1381 + }, + { + "epoch": 3.66, + "learning_rate": 3.214477211796247e-05, + "loss": 0.0718, + "step": 1382 + }, + { + "epoch": 3.66, + "learning_rate": 3.21313672922252e-05, + "loss": 0.1431, + "step": 1383 + }, + { + "epoch": 3.66, + "learning_rate": 3.211796246648794e-05, + "loss": 0.2391, + "step": 1384 + }, + { + "epoch": 3.66, + "learning_rate": 3.210455764075067e-05, + "loss": 0.0053, + "step": 1385 + }, + { + "epoch": 3.67, + "learning_rate": 3.209115281501341e-05, + "loss": 0.2935, + "step": 1386 + }, + { + "epoch": 3.67, + "learning_rate": 3.207774798927614e-05, + "loss": 0.0071, + "step": 1387 + }, + { + "epoch": 3.67, + "learning_rate": 3.2064343163538875e-05, + "loss": 0.031, + "step": 1388 + }, + { + "epoch": 3.67, + "learning_rate": 3.205093833780161e-05, + "loss": 0.1989, + "step": 1389 + }, + { + "epoch": 3.68, + "learning_rate": 3.2037533512064346e-05, + "loss": 0.0533, + "step": 1390 + }, + { + "epoch": 3.68, + "learning_rate": 3.202412868632708e-05, + "loss": 0.2408, + "step": 1391 + }, + { + "epoch": 3.68, + "learning_rate": 3.2010723860589816e-05, + "loss": 0.3158, + "step": 1392 + }, + { + "epoch": 3.69, + "learning_rate": 3.1997319034852544e-05, + "loss": 0.3629, + "step": 1393 + }, + { + "epoch": 3.69, + "learning_rate": 3.1983914209115286e-05, + "loss": 0.0122, + "step": 1394 + }, + { + "epoch": 3.69, + "learning_rate": 3.1970509383378014e-05, + "loss": 0.0449, + "step": 1395 + }, + { + "epoch": 3.69, + "learning_rate": 3.1957104557640756e-05, + "loss": 0.1273, + "step": 1396 + }, + { + "epoch": 3.7, + "learning_rate": 3.1943699731903485e-05, + "loss": 0.3401, + "step": 1397 + }, + { + "epoch": 3.7, + "learning_rate": 3.193029490616622e-05, + "loss": 0.0183, + "step": 1398 + }, + { + "epoch": 3.7, + "learning_rate": 3.1916890080428955e-05, + "loss": 0.0526, + "step": 1399 + }, + { + "epoch": 3.7, + "learning_rate": 3.190348525469169e-05, + "loss": 0.5037, + "step": 1400 + }, + { + "epoch": 3.71, + "learning_rate": 3.1890080428954425e-05, + "loss": 0.0059, + "step": 1401 + }, + { + "epoch": 3.71, + "learning_rate": 3.187667560321716e-05, + "loss": 0.0266, + "step": 1402 + }, + { + "epoch": 3.71, + "learning_rate": 3.1863270777479896e-05, + "loss": 0.4095, + "step": 1403 + }, + { + "epoch": 3.71, + "learning_rate": 3.184986595174263e-05, + "loss": 0.1802, + "step": 1404 + }, + { + "epoch": 3.72, + "learning_rate": 3.1836461126005366e-05, + "loss": 0.3586, + "step": 1405 + }, + { + "epoch": 3.72, + "learning_rate": 3.18230563002681e-05, + "loss": 0.2058, + "step": 1406 + }, + { + "epoch": 3.72, + "learning_rate": 3.1809651474530836e-05, + "loss": 0.008, + "step": 1407 + }, + { + "epoch": 3.72, + "learning_rate": 3.1796246648793564e-05, + "loss": 0.0282, + "step": 1408 + }, + { + "epoch": 3.73, + "learning_rate": 3.1782841823056306e-05, + "loss": 0.0077, + "step": 1409 + }, + { + "epoch": 3.73, + "learning_rate": 3.1769436997319035e-05, + "loss": 0.3461, + "step": 1410 + }, + { + "epoch": 3.73, + "learning_rate": 3.1756032171581777e-05, + "loss": 0.0038, + "step": 1411 + }, + { + "epoch": 3.74, + "learning_rate": 3.1742627345844505e-05, + "loss": 0.0087, + "step": 1412 + }, + { + "epoch": 3.74, + "learning_rate": 3.172922252010724e-05, + "loss": 0.8254, + "step": 1413 + }, + { + "epoch": 3.74, + "learning_rate": 3.1715817694369975e-05, + "loss": 0.017, + "step": 1414 + }, + { + "epoch": 3.74, + "learning_rate": 3.170241286863271e-05, + "loss": 0.2954, + "step": 1415 + }, + { + "epoch": 3.75, + "learning_rate": 3.1689008042895445e-05, + "loss": 0.0286, + "step": 1416 + }, + { + "epoch": 3.75, + "learning_rate": 3.167560321715818e-05, + "loss": 0.0454, + "step": 1417 + }, + { + "epoch": 3.75, + "learning_rate": 3.166219839142091e-05, + "loss": 0.222, + "step": 1418 + }, + { + "epoch": 3.75, + "learning_rate": 3.164879356568365e-05, + "loss": 0.0225, + "step": 1419 + }, + { + "epoch": 3.76, + "learning_rate": 3.163538873994638e-05, + "loss": 0.2599, + "step": 1420 + }, + { + "epoch": 3.76, + "learning_rate": 3.162198391420912e-05, + "loss": 0.2343, + "step": 1421 + }, + { + "epoch": 3.76, + "learning_rate": 3.160857908847185e-05, + "loss": 0.0274, + "step": 1422 + }, + { + "epoch": 3.76, + "learning_rate": 3.1595174262734585e-05, + "loss": 0.0109, + "step": 1423 + }, + { + "epoch": 3.77, + "learning_rate": 3.158176943699732e-05, + "loss": 0.012, + "step": 1424 + }, + { + "epoch": 3.77, + "learning_rate": 3.1568364611260055e-05, + "loss": 0.0267, + "step": 1425 + }, + { + "epoch": 3.77, + "learning_rate": 3.155495978552279e-05, + "loss": 0.0116, + "step": 1426 + }, + { + "epoch": 3.78, + "learning_rate": 3.1541554959785525e-05, + "loss": 0.2563, + "step": 1427 + }, + { + "epoch": 3.78, + "learning_rate": 3.1528150134048253e-05, + "loss": 0.2149, + "step": 1428 + }, + { + "epoch": 3.78, + "learning_rate": 3.1514745308310995e-05, + "loss": 0.2099, + "step": 1429 + }, + { + "epoch": 3.78, + "learning_rate": 3.1501340482573724e-05, + "loss": 0.1445, + "step": 1430 + }, + { + "epoch": 3.79, + "learning_rate": 3.1487935656836466e-05, + "loss": 0.0069, + "step": 1431 + }, + { + "epoch": 3.79, + "learning_rate": 3.1474530831099194e-05, + "loss": 0.3583, + "step": 1432 + }, + { + "epoch": 3.79, + "learning_rate": 3.146112600536193e-05, + "loss": 0.1112, + "step": 1433 + }, + { + "epoch": 3.79, + "learning_rate": 3.1447721179624664e-05, + "loss": 0.5379, + "step": 1434 + }, + { + "epoch": 3.8, + "learning_rate": 3.14343163538874e-05, + "loss": 0.0248, + "step": 1435 + }, + { + "epoch": 3.8, + "learning_rate": 3.1420911528150135e-05, + "loss": 0.0255, + "step": 1436 + }, + { + "epoch": 3.8, + "learning_rate": 3.140750670241287e-05, + "loss": 0.3363, + "step": 1437 + }, + { + "epoch": 3.8, + "learning_rate": 3.1394101876675605e-05, + "loss": 0.2952, + "step": 1438 + }, + { + "epoch": 3.81, + "learning_rate": 3.138069705093834e-05, + "loss": 0.0337, + "step": 1439 + }, + { + "epoch": 3.81, + "learning_rate": 3.1367292225201075e-05, + "loss": 0.0157, + "step": 1440 + }, + { + "epoch": 3.81, + "learning_rate": 3.135388739946381e-05, + "loss": 0.0204, + "step": 1441 + }, + { + "epoch": 3.81, + "learning_rate": 3.1340482573726545e-05, + "loss": 0.7707, + "step": 1442 + }, + { + "epoch": 3.82, + "learning_rate": 3.1327077747989274e-05, + "loss": 0.4232, + "step": 1443 + }, + { + "epoch": 3.82, + "learning_rate": 3.1313672922252016e-05, + "loss": 0.116, + "step": 1444 + }, + { + "epoch": 3.82, + "learning_rate": 3.1300268096514744e-05, + "loss": 0.421, + "step": 1445 + }, + { + "epoch": 3.83, + "learning_rate": 3.1286863270777486e-05, + "loss": 0.0267, + "step": 1446 + }, + { + "epoch": 3.83, + "learning_rate": 3.1273458445040214e-05, + "loss": 0.0078, + "step": 1447 + }, + { + "epoch": 3.83, + "learning_rate": 3.126005361930295e-05, + "loss": 0.0996, + "step": 1448 + }, + { + "epoch": 3.83, + "learning_rate": 3.1246648793565684e-05, + "loss": 0.0389, + "step": 1449 + }, + { + "epoch": 3.84, + "learning_rate": 3.123324396782842e-05, + "loss": 0.0482, + "step": 1450 + }, + { + "epoch": 3.84, + "learning_rate": 3.1219839142091155e-05, + "loss": 0.0053, + "step": 1451 + }, + { + "epoch": 3.84, + "learning_rate": 3.120643431635389e-05, + "loss": 0.0153, + "step": 1452 + }, + { + "epoch": 3.84, + "learning_rate": 3.119302949061662e-05, + "loss": 0.008, + "step": 1453 + }, + { + "epoch": 3.85, + "learning_rate": 3.117962466487936e-05, + "loss": 0.0166, + "step": 1454 + }, + { + "epoch": 3.85, + "learning_rate": 3.116621983914209e-05, + "loss": 0.0889, + "step": 1455 + }, + { + "epoch": 3.85, + "learning_rate": 3.115281501340483e-05, + "loss": 0.0695, + "step": 1456 + }, + { + "epoch": 3.85, + "learning_rate": 3.113941018766756e-05, + "loss": 0.3353, + "step": 1457 + }, + { + "epoch": 3.86, + "learning_rate": 3.1126005361930294e-05, + "loss": 0.0729, + "step": 1458 + }, + { + "epoch": 3.86, + "learning_rate": 3.111260053619303e-05, + "loss": 0.0187, + "step": 1459 + }, + { + "epoch": 3.86, + "learning_rate": 3.1099195710455764e-05, + "loss": 0.2512, + "step": 1460 + }, + { + "epoch": 3.87, + "learning_rate": 3.10857908847185e-05, + "loss": 0.3837, + "step": 1461 + }, + { + "epoch": 3.87, + "learning_rate": 3.1072386058981234e-05, + "loss": 0.2543, + "step": 1462 + }, + { + "epoch": 3.87, + "learning_rate": 3.105898123324397e-05, + "loss": 0.1797, + "step": 1463 + }, + { + "epoch": 3.87, + "learning_rate": 3.1045576407506705e-05, + "loss": 0.3097, + "step": 1464 + }, + { + "epoch": 3.88, + "learning_rate": 3.103217158176944e-05, + "loss": 0.268, + "step": 1465 + }, + { + "epoch": 3.88, + "learning_rate": 3.1018766756032175e-05, + "loss": 0.1773, + "step": 1466 + }, + { + "epoch": 3.88, + "learning_rate": 3.100536193029491e-05, + "loss": 0.2055, + "step": 1467 + }, + { + "epoch": 3.88, + "learning_rate": 3.099195710455764e-05, + "loss": 0.0279, + "step": 1468 + }, + { + "epoch": 3.89, + "learning_rate": 3.097855227882038e-05, + "loss": 0.1263, + "step": 1469 + }, + { + "epoch": 3.89, + "learning_rate": 3.096514745308311e-05, + "loss": 0.0449, + "step": 1470 + }, + { + "epoch": 3.89, + "learning_rate": 3.095174262734585e-05, + "loss": 0.2429, + "step": 1471 + }, + { + "epoch": 3.89, + "learning_rate": 3.093833780160858e-05, + "loss": 0.1245, + "step": 1472 + }, + { + "epoch": 3.9, + "learning_rate": 3.0924932975871314e-05, + "loss": 0.1303, + "step": 1473 + }, + { + "epoch": 3.9, + "learning_rate": 3.091152815013405e-05, + "loss": 0.0303, + "step": 1474 + }, + { + "epoch": 3.9, + "learning_rate": 3.0898123324396784e-05, + "loss": 0.3279, + "step": 1475 + }, + { + "epoch": 3.9, + "learning_rate": 3.088471849865952e-05, + "loss": 0.134, + "step": 1476 + }, + { + "epoch": 3.91, + "learning_rate": 3.0871313672922255e-05, + "loss": 0.5138, + "step": 1477 + }, + { + "epoch": 3.91, + "learning_rate": 3.085790884718498e-05, + "loss": 0.0476, + "step": 1478 + }, + { + "epoch": 3.91, + "learning_rate": 3.0844504021447725e-05, + "loss": 0.1956, + "step": 1479 + }, + { + "epoch": 3.92, + "learning_rate": 3.083109919571045e-05, + "loss": 0.2061, + "step": 1480 + }, + { + "epoch": 3.92, + "learning_rate": 3.0817694369973195e-05, + "loss": 0.269, + "step": 1481 + }, + { + "epoch": 3.92, + "learning_rate": 3.0804289544235923e-05, + "loss": 0.0708, + "step": 1482 + }, + { + "epoch": 3.92, + "learning_rate": 3.0790884718498665e-05, + "loss": 0.0389, + "step": 1483 + }, + { + "epoch": 3.93, + "learning_rate": 3.0777479892761394e-05, + "loss": 0.2566, + "step": 1484 + }, + { + "epoch": 3.93, + "learning_rate": 3.076407506702413e-05, + "loss": 0.0581, + "step": 1485 + }, + { + "epoch": 3.93, + "learning_rate": 3.0750670241286864e-05, + "loss": 0.1527, + "step": 1486 + }, + { + "epoch": 3.93, + "learning_rate": 3.07372654155496e-05, + "loss": 0.3963, + "step": 1487 + }, + { + "epoch": 3.94, + "learning_rate": 3.0723860589812334e-05, + "loss": 0.2241, + "step": 1488 + }, + { + "epoch": 3.94, + "learning_rate": 3.071045576407507e-05, + "loss": 0.1275, + "step": 1489 + }, + { + "epoch": 3.94, + "learning_rate": 3.0697050938337804e-05, + "loss": 0.3148, + "step": 1490 + }, + { + "epoch": 3.94, + "learning_rate": 3.068364611260054e-05, + "loss": 0.1474, + "step": 1491 + }, + { + "epoch": 3.95, + "learning_rate": 3.0670241286863275e-05, + "loss": 0.0233, + "step": 1492 + }, + { + "epoch": 3.95, + "learning_rate": 3.065683646112601e-05, + "loss": 0.1721, + "step": 1493 + }, + { + "epoch": 3.95, + "learning_rate": 3.0643431635388745e-05, + "loss": 0.6024, + "step": 1494 + }, + { + "epoch": 3.96, + "learning_rate": 3.063002680965147e-05, + "loss": 0.1425, + "step": 1495 + }, + { + "epoch": 3.96, + "learning_rate": 3.0616621983914215e-05, + "loss": 0.0311, + "step": 1496 + }, + { + "epoch": 3.96, + "learning_rate": 3.0603217158176944e-05, + "loss": 0.0197, + "step": 1497 + }, + { + "epoch": 3.96, + "learning_rate": 3.0589812332439686e-05, + "loss": 0.0406, + "step": 1498 + }, + { + "epoch": 3.97, + "learning_rate": 3.0576407506702414e-05, + "loss": 0.054, + "step": 1499 + }, + { + "epoch": 3.97, + "learning_rate": 3.056300268096515e-05, + "loss": 0.161, + "step": 1500 + }, + { + "epoch": 3.97, + "learning_rate": 3.0549597855227884e-05, + "loss": 0.0549, + "step": 1501 + }, + { + "epoch": 3.97, + "learning_rate": 3.053619302949062e-05, + "loss": 0.1667, + "step": 1502 + }, + { + "epoch": 3.98, + "learning_rate": 3.0522788203753354e-05, + "loss": 0.1264, + "step": 1503 + }, + { + "epoch": 3.98, + "learning_rate": 3.0509383378016086e-05, + "loss": 0.0133, + "step": 1504 + }, + { + "epoch": 3.98, + "learning_rate": 3.049597855227882e-05, + "loss": 0.0655, + "step": 1505 + }, + { + "epoch": 3.98, + "learning_rate": 3.0482573726541556e-05, + "loss": 0.1054, + "step": 1506 + }, + { + "epoch": 3.99, + "learning_rate": 3.046916890080429e-05, + "loss": 0.0053, + "step": 1507 + }, + { + "epoch": 3.99, + "learning_rate": 3.0455764075067027e-05, + "loss": 0.0347, + "step": 1508 + }, + { + "epoch": 3.99, + "learning_rate": 3.0442359249329762e-05, + "loss": 0.6095, + "step": 1509 + }, + { + "epoch": 3.99, + "learning_rate": 3.0428954423592494e-05, + "loss": 0.1339, + "step": 1510 + }, + { + "epoch": 4.0, + "learning_rate": 3.0415549597855232e-05, + "loss": 0.0088, + "step": 1511 + }, + { + "epoch": 4.0, + "learning_rate": 3.0402144772117964e-05, + "loss": 0.4356, + "step": 1512 + }, + { + "epoch": 4.0, + "eval_f1": 0.7822580645161291, + "eval_loss": 0.6966613531112671, + "eval_runtime": 1.8703, + "eval_samples_per_second": 808.957, + "eval_steps_per_second": 50.794, + "step": 1512 + }, + { + "epoch": 4.0, + "learning_rate": 3.0388739946380702e-05, + "loss": 0.003, + "step": 1513 + }, + { + "epoch": 4.01, + "learning_rate": 3.0375335120643434e-05, + "loss": 0.0067, + "step": 1514 + }, + { + "epoch": 4.01, + "learning_rate": 3.0361930294906166e-05, + "loss": 0.0488, + "step": 1515 + }, + { + "epoch": 4.01, + "learning_rate": 3.0348525469168904e-05, + "loss": 0.0106, + "step": 1516 + }, + { + "epoch": 4.01, + "learning_rate": 3.0335120643431636e-05, + "loss": 0.0098, + "step": 1517 + }, + { + "epoch": 4.02, + "learning_rate": 3.0321715817694375e-05, + "loss": 0.274, + "step": 1518 + }, + { + "epoch": 4.02, + "learning_rate": 3.0308310991957106e-05, + "loss": 0.2007, + "step": 1519 + }, + { + "epoch": 4.02, + "learning_rate": 3.0294906166219838e-05, + "loss": 0.0121, + "step": 1520 + }, + { + "epoch": 4.02, + "learning_rate": 3.0281501340482577e-05, + "loss": 0.0632, + "step": 1521 + }, + { + "epoch": 4.03, + "learning_rate": 3.026809651474531e-05, + "loss": 0.0062, + "step": 1522 + }, + { + "epoch": 4.03, + "learning_rate": 3.0254691689008047e-05, + "loss": 0.0123, + "step": 1523 + }, + { + "epoch": 4.03, + "learning_rate": 3.024128686327078e-05, + "loss": 0.0063, + "step": 1524 + }, + { + "epoch": 4.03, + "learning_rate": 3.022788203753351e-05, + "loss": 0.0102, + "step": 1525 + }, + { + "epoch": 4.04, + "learning_rate": 3.021447721179625e-05, + "loss": 0.0082, + "step": 1526 + }, + { + "epoch": 4.04, + "learning_rate": 3.020107238605898e-05, + "loss": 0.3369, + "step": 1527 + }, + { + "epoch": 4.04, + "learning_rate": 3.018766756032172e-05, + "loss": 0.2587, + "step": 1528 + }, + { + "epoch": 4.04, + "learning_rate": 3.017426273458445e-05, + "loss": 0.0067, + "step": 1529 + }, + { + "epoch": 4.05, + "learning_rate": 3.0160857908847186e-05, + "loss": 0.0021, + "step": 1530 + }, + { + "epoch": 4.05, + "learning_rate": 3.014745308310992e-05, + "loss": 0.0724, + "step": 1531 + }, + { + "epoch": 4.05, + "learning_rate": 3.0134048257372656e-05, + "loss": 0.0074, + "step": 1532 + }, + { + "epoch": 4.06, + "learning_rate": 3.012064343163539e-05, + "loss": 0.0202, + "step": 1533 + }, + { + "epoch": 4.06, + "learning_rate": 3.0107238605898126e-05, + "loss": 0.1435, + "step": 1534 + }, + { + "epoch": 4.06, + "learning_rate": 3.0093833780160858e-05, + "loss": 0.0074, + "step": 1535 + }, + { + "epoch": 4.06, + "learning_rate": 3.0080428954423597e-05, + "loss": 0.4145, + "step": 1536 + }, + { + "epoch": 4.07, + "learning_rate": 3.006702412868633e-05, + "loss": 0.0186, + "step": 1537 + }, + { + "epoch": 4.07, + "learning_rate": 3.0053619302949067e-05, + "loss": 0.1648, + "step": 1538 + }, + { + "epoch": 4.07, + "learning_rate": 3.00402144772118e-05, + "loss": 0.2545, + "step": 1539 + }, + { + "epoch": 4.07, + "learning_rate": 3.002680965147453e-05, + "loss": 0.0016, + "step": 1540 + }, + { + "epoch": 4.08, + "learning_rate": 3.001340482573727e-05, + "loss": 0.0184, + "step": 1541 + }, + { + "epoch": 4.08, + "learning_rate": 3e-05, + "loss": 0.1208, + "step": 1542 + }, + { + "epoch": 4.08, + "learning_rate": 2.998659517426274e-05, + "loss": 0.0021, + "step": 1543 + }, + { + "epoch": 4.08, + "learning_rate": 2.997319034852547e-05, + "loss": 0.0092, + "step": 1544 + }, + { + "epoch": 4.09, + "learning_rate": 2.9959785522788203e-05, + "loss": 0.1514, + "step": 1545 + }, + { + "epoch": 4.09, + "learning_rate": 2.994638069705094e-05, + "loss": 0.0773, + "step": 1546 + }, + { + "epoch": 4.09, + "learning_rate": 2.9932975871313673e-05, + "loss": 0.0093, + "step": 1547 + }, + { + "epoch": 4.1, + "learning_rate": 2.991957104557641e-05, + "loss": 0.0022, + "step": 1548 + }, + { + "epoch": 4.1, + "learning_rate": 2.9906166219839143e-05, + "loss": 0.1765, + "step": 1549 + }, + { + "epoch": 4.1, + "learning_rate": 2.9892761394101875e-05, + "loss": 0.1766, + "step": 1550 + }, + { + "epoch": 4.1, + "learning_rate": 2.9879356568364614e-05, + "loss": 0.0024, + "step": 1551 + }, + { + "epoch": 4.11, + "learning_rate": 2.9865951742627345e-05, + "loss": 0.012, + "step": 1552 + }, + { + "epoch": 4.11, + "learning_rate": 2.9852546916890084e-05, + "loss": 0.0055, + "step": 1553 + }, + { + "epoch": 4.11, + "learning_rate": 2.9839142091152816e-05, + "loss": 0.0088, + "step": 1554 + }, + { + "epoch": 4.11, + "learning_rate": 2.9825737265415547e-05, + "loss": 0.0019, + "step": 1555 + }, + { + "epoch": 4.12, + "learning_rate": 2.9812332439678286e-05, + "loss": 0.0186, + "step": 1556 + }, + { + "epoch": 4.12, + "learning_rate": 2.9798927613941018e-05, + "loss": 0.25, + "step": 1557 + }, + { + "epoch": 4.12, + "learning_rate": 2.9785522788203756e-05, + "loss": 0.0129, + "step": 1558 + }, + { + "epoch": 4.12, + "learning_rate": 2.9772117962466488e-05, + "loss": 0.0048, + "step": 1559 + }, + { + "epoch": 4.13, + "learning_rate": 2.9758713136729223e-05, + "loss": 0.1153, + "step": 1560 + }, + { + "epoch": 4.13, + "learning_rate": 2.9745308310991958e-05, + "loss": 0.1871, + "step": 1561 + }, + { + "epoch": 4.13, + "learning_rate": 2.9731903485254693e-05, + "loss": 0.0087, + "step": 1562 + }, + { + "epoch": 4.13, + "learning_rate": 2.971849865951743e-05, + "loss": 0.0048, + "step": 1563 + }, + { + "epoch": 4.14, + "learning_rate": 2.9705093833780163e-05, + "loss": 0.026, + "step": 1564 + }, + { + "epoch": 4.14, + "learning_rate": 2.9691689008042895e-05, + "loss": 0.3336, + "step": 1565 + }, + { + "epoch": 4.14, + "learning_rate": 2.9678284182305634e-05, + "loss": 0.0015, + "step": 1566 + }, + { + "epoch": 4.15, + "learning_rate": 2.9664879356568365e-05, + "loss": 0.0044, + "step": 1567 + }, + { + "epoch": 4.15, + "learning_rate": 2.9651474530831104e-05, + "loss": 0.0035, + "step": 1568 + }, + { + "epoch": 4.15, + "learning_rate": 2.9638069705093836e-05, + "loss": 0.1206, + "step": 1569 + }, + { + "epoch": 4.15, + "learning_rate": 2.9624664879356567e-05, + "loss": 0.1247, + "step": 1570 + }, + { + "epoch": 4.16, + "learning_rate": 2.9611260053619306e-05, + "loss": 0.0011, + "step": 1571 + }, + { + "epoch": 4.16, + "learning_rate": 2.9597855227882038e-05, + "loss": 0.0023, + "step": 1572 + }, + { + "epoch": 4.16, + "learning_rate": 2.9584450402144776e-05, + "loss": 0.0014, + "step": 1573 + }, + { + "epoch": 4.16, + "learning_rate": 2.9571045576407508e-05, + "loss": 0.2967, + "step": 1574 + }, + { + "epoch": 4.17, + "learning_rate": 2.955764075067024e-05, + "loss": 0.0373, + "step": 1575 + }, + { + "epoch": 4.17, + "learning_rate": 2.9544235924932978e-05, + "loss": 0.3351, + "step": 1576 + }, + { + "epoch": 4.17, + "learning_rate": 2.953083109919571e-05, + "loss": 0.0025, + "step": 1577 + }, + { + "epoch": 4.17, + "learning_rate": 2.951742627345845e-05, + "loss": 0.0025, + "step": 1578 + }, + { + "epoch": 4.18, + "learning_rate": 2.950402144772118e-05, + "loss": 0.0182, + "step": 1579 + }, + { + "epoch": 4.18, + "learning_rate": 2.9490616621983912e-05, + "loss": 0.001, + "step": 1580 + }, + { + "epoch": 4.18, + "learning_rate": 2.947721179624665e-05, + "loss": 0.003, + "step": 1581 + }, + { + "epoch": 4.19, + "learning_rate": 2.9463806970509382e-05, + "loss": 0.0038, + "step": 1582 + }, + { + "epoch": 4.19, + "learning_rate": 2.945040214477212e-05, + "loss": 0.002, + "step": 1583 + }, + { + "epoch": 4.19, + "learning_rate": 2.9436997319034853e-05, + "loss": 0.1688, + "step": 1584 + }, + { + "epoch": 4.19, + "learning_rate": 2.9423592493297584e-05, + "loss": 0.0014, + "step": 1585 + }, + { + "epoch": 4.2, + "learning_rate": 2.9410187667560323e-05, + "loss": 0.2664, + "step": 1586 + }, + { + "epoch": 4.2, + "learning_rate": 2.9396782841823055e-05, + "loss": 0.0012, + "step": 1587 + }, + { + "epoch": 4.2, + "learning_rate": 2.9383378016085793e-05, + "loss": 0.0022, + "step": 1588 + }, + { + "epoch": 4.2, + "learning_rate": 2.9369973190348525e-05, + "loss": 0.0959, + "step": 1589 + }, + { + "epoch": 4.21, + "learning_rate": 2.935656836461126e-05, + "loss": 0.0839, + "step": 1590 + }, + { + "epoch": 4.21, + "learning_rate": 2.9343163538873995e-05, + "loss": 0.7405, + "step": 1591 + }, + { + "epoch": 4.21, + "learning_rate": 2.932975871313673e-05, + "loss": 0.0351, + "step": 1592 + }, + { + "epoch": 4.21, + "learning_rate": 2.9316353887399465e-05, + "loss": 0.0025, + "step": 1593 + }, + { + "epoch": 4.22, + "learning_rate": 2.93029490616622e-05, + "loss": 0.0054, + "step": 1594 + }, + { + "epoch": 4.22, + "learning_rate": 2.9289544235924932e-05, + "loss": 0.0043, + "step": 1595 + }, + { + "epoch": 4.22, + "learning_rate": 2.927613941018767e-05, + "loss": 0.1828, + "step": 1596 + }, + { + "epoch": 4.22, + "learning_rate": 2.9262734584450402e-05, + "loss": 0.0022, + "step": 1597 + }, + { + "epoch": 4.23, + "learning_rate": 2.924932975871314e-05, + "loss": 0.0051, + "step": 1598 + }, + { + "epoch": 4.23, + "learning_rate": 2.9235924932975873e-05, + "loss": 0.0025, + "step": 1599 + }, + { + "epoch": 4.23, + "learning_rate": 2.9222520107238604e-05, + "loss": 0.0018, + "step": 1600 + }, + { + "epoch": 4.24, + "learning_rate": 2.9209115281501343e-05, + "loss": 0.0348, + "step": 1601 + }, + { + "epoch": 4.24, + "learning_rate": 2.9195710455764075e-05, + "loss": 0.207, + "step": 1602 + }, + { + "epoch": 4.24, + "learning_rate": 2.9182305630026813e-05, + "loss": 0.0249, + "step": 1603 + }, + { + "epoch": 4.24, + "learning_rate": 2.9168900804289545e-05, + "loss": 0.0028, + "step": 1604 + }, + { + "epoch": 4.25, + "learning_rate": 2.9155495978552283e-05, + "loss": 0.2604, + "step": 1605 + }, + { + "epoch": 4.25, + "learning_rate": 2.9142091152815015e-05, + "loss": 0.2808, + "step": 1606 + }, + { + "epoch": 4.25, + "learning_rate": 2.9128686327077747e-05, + "loss": 0.0289, + "step": 1607 + }, + { + "epoch": 4.25, + "learning_rate": 2.9115281501340486e-05, + "loss": 0.005, + "step": 1608 + }, + { + "epoch": 4.26, + "learning_rate": 2.9101876675603217e-05, + "loss": 0.7931, + "step": 1609 + }, + { + "epoch": 4.26, + "learning_rate": 2.9088471849865956e-05, + "loss": 0.335, + "step": 1610 + }, + { + "epoch": 4.26, + "learning_rate": 2.9075067024128688e-05, + "loss": 0.2779, + "step": 1611 + }, + { + "epoch": 4.26, + "learning_rate": 2.906166219839142e-05, + "loss": 0.1649, + "step": 1612 + }, + { + "epoch": 4.27, + "learning_rate": 2.9048257372654158e-05, + "loss": 0.0081, + "step": 1613 + }, + { + "epoch": 4.27, + "learning_rate": 2.903485254691689e-05, + "loss": 0.0638, + "step": 1614 + }, + { + "epoch": 4.27, + "learning_rate": 2.9021447721179628e-05, + "loss": 0.016, + "step": 1615 + }, + { + "epoch": 4.28, + "learning_rate": 2.900804289544236e-05, + "loss": 0.0025, + "step": 1616 + }, + { + "epoch": 4.28, + "learning_rate": 2.8994638069705095e-05, + "loss": 0.0249, + "step": 1617 + }, + { + "epoch": 4.28, + "learning_rate": 2.898123324396783e-05, + "loss": 0.0291, + "step": 1618 + }, + { + "epoch": 4.28, + "learning_rate": 2.8967828418230565e-05, + "loss": 0.1773, + "step": 1619 + }, + { + "epoch": 4.29, + "learning_rate": 2.89544235924933e-05, + "loss": 0.3452, + "step": 1620 + }, + { + "epoch": 4.29, + "learning_rate": 2.8941018766756035e-05, + "loss": 0.006, + "step": 1621 + }, + { + "epoch": 4.29, + "learning_rate": 2.8927613941018767e-05, + "loss": 0.0054, + "step": 1622 + }, + { + "epoch": 4.29, + "learning_rate": 2.8914209115281506e-05, + "loss": 0.1852, + "step": 1623 + }, + { + "epoch": 4.3, + "learning_rate": 2.8900804289544237e-05, + "loss": 0.4424, + "step": 1624 + }, + { + "epoch": 4.3, + "learning_rate": 2.8887399463806976e-05, + "loss": 0.0063, + "step": 1625 + }, + { + "epoch": 4.3, + "learning_rate": 2.8873994638069708e-05, + "loss": 0.43, + "step": 1626 + }, + { + "epoch": 4.3, + "learning_rate": 2.886058981233244e-05, + "loss": 0.2283, + "step": 1627 + }, + { + "epoch": 4.31, + "learning_rate": 2.8847184986595178e-05, + "loss": 0.0519, + "step": 1628 + }, + { + "epoch": 4.31, + "learning_rate": 2.883378016085791e-05, + "loss": 0.1797, + "step": 1629 + }, + { + "epoch": 4.31, + "learning_rate": 2.8820375335120648e-05, + "loss": 0.2569, + "step": 1630 + }, + { + "epoch": 4.31, + "learning_rate": 2.880697050938338e-05, + "loss": 0.0024, + "step": 1631 + }, + { + "epoch": 4.32, + "learning_rate": 2.8793565683646112e-05, + "loss": 0.1727, + "step": 1632 + }, + { + "epoch": 4.32, + "learning_rate": 2.878016085790885e-05, + "loss": 0.0091, + "step": 1633 + }, + { + "epoch": 4.32, + "learning_rate": 2.8766756032171582e-05, + "loss": 0.2002, + "step": 1634 + }, + { + "epoch": 4.33, + "learning_rate": 2.875335120643432e-05, + "loss": 0.0217, + "step": 1635 + }, + { + "epoch": 4.33, + "learning_rate": 2.8739946380697052e-05, + "loss": 0.2163, + "step": 1636 + }, + { + "epoch": 4.33, + "learning_rate": 2.8726541554959784e-05, + "loss": 0.0065, + "step": 1637 + }, + { + "epoch": 4.33, + "learning_rate": 2.8713136729222522e-05, + "loss": 0.1567, + "step": 1638 + }, + { + "epoch": 4.34, + "learning_rate": 2.8699731903485254e-05, + "loss": 0.1775, + "step": 1639 + }, + { + "epoch": 4.34, + "learning_rate": 2.8686327077747993e-05, + "loss": 0.0116, + "step": 1640 + }, + { + "epoch": 4.34, + "learning_rate": 2.8672922252010724e-05, + "loss": 0.0114, + "step": 1641 + }, + { + "epoch": 4.34, + "learning_rate": 2.8659517426273456e-05, + "loss": 0.0264, + "step": 1642 + }, + { + "epoch": 4.35, + "learning_rate": 2.8646112600536195e-05, + "loss": 0.0172, + "step": 1643 + }, + { + "epoch": 4.35, + "learning_rate": 2.8632707774798926e-05, + "loss": 0.187, + "step": 1644 + }, + { + "epoch": 4.35, + "learning_rate": 2.8619302949061665e-05, + "loss": 0.009, + "step": 1645 + }, + { + "epoch": 4.35, + "learning_rate": 2.8605898123324397e-05, + "loss": 0.014, + "step": 1646 + }, + { + "epoch": 4.36, + "learning_rate": 2.8592493297587132e-05, + "loss": 0.1643, + "step": 1647 + }, + { + "epoch": 4.36, + "learning_rate": 2.8579088471849867e-05, + "loss": 0.2763, + "step": 1648 + }, + { + "epoch": 4.36, + "learning_rate": 2.8565683646112602e-05, + "loss": 0.0641, + "step": 1649 + }, + { + "epoch": 4.37, + "learning_rate": 2.8552278820375337e-05, + "loss": 0.6128, + "step": 1650 + }, + { + "epoch": 4.37, + "learning_rate": 2.8538873994638072e-05, + "loss": 0.0229, + "step": 1651 + }, + { + "epoch": 4.37, + "learning_rate": 2.8525469168900804e-05, + "loss": 0.0344, + "step": 1652 + }, + { + "epoch": 4.37, + "learning_rate": 2.8512064343163543e-05, + "loss": 0.018, + "step": 1653 + }, + { + "epoch": 4.38, + "learning_rate": 2.8498659517426274e-05, + "loss": 0.191, + "step": 1654 + }, + { + "epoch": 4.38, + "learning_rate": 2.8485254691689013e-05, + "loss": 0.0397, + "step": 1655 + }, + { + "epoch": 4.38, + "learning_rate": 2.8471849865951745e-05, + "loss": 0.0029, + "step": 1656 + }, + { + "epoch": 4.38, + "learning_rate": 2.8458445040214476e-05, + "loss": 0.0034, + "step": 1657 + }, + { + "epoch": 4.39, + "learning_rate": 2.8445040214477215e-05, + "loss": 0.0031, + "step": 1658 + }, + { + "epoch": 4.39, + "learning_rate": 2.8431635388739947e-05, + "loss": 0.4272, + "step": 1659 + }, + { + "epoch": 4.39, + "learning_rate": 2.8418230563002685e-05, + "loss": 0.0042, + "step": 1660 + }, + { + "epoch": 4.39, + "learning_rate": 2.8404825737265417e-05, + "loss": 0.0224, + "step": 1661 + }, + { + "epoch": 4.4, + "learning_rate": 2.839142091152815e-05, + "loss": 0.1021, + "step": 1662 + }, + { + "epoch": 4.4, + "learning_rate": 2.8378016085790887e-05, + "loss": 0.0076, + "step": 1663 + }, + { + "epoch": 4.4, + "learning_rate": 2.836461126005362e-05, + "loss": 0.084, + "step": 1664 + }, + { + "epoch": 4.4, + "learning_rate": 2.8351206434316357e-05, + "loss": 0.0321, + "step": 1665 + }, + { + "epoch": 4.41, + "learning_rate": 2.833780160857909e-05, + "loss": 0.1369, + "step": 1666 + }, + { + "epoch": 4.41, + "learning_rate": 2.832439678284182e-05, + "loss": 0.018, + "step": 1667 + }, + { + "epoch": 4.41, + "learning_rate": 2.831099195710456e-05, + "loss": 0.1886, + "step": 1668 + }, + { + "epoch": 4.42, + "learning_rate": 2.829758713136729e-05, + "loss": 0.0016, + "step": 1669 + }, + { + "epoch": 4.42, + "learning_rate": 2.828418230563003e-05, + "loss": 0.0031, + "step": 1670 + }, + { + "epoch": 4.42, + "learning_rate": 2.827077747989276e-05, + "loss": 0.0043, + "step": 1671 + }, + { + "epoch": 4.42, + "learning_rate": 2.8257372654155497e-05, + "loss": 0.1202, + "step": 1672 + }, + { + "epoch": 4.43, + "learning_rate": 2.8243967828418232e-05, + "loss": 0.1409, + "step": 1673 + }, + { + "epoch": 4.43, + "learning_rate": 2.8230563002680967e-05, + "loss": 0.0821, + "step": 1674 + }, + { + "epoch": 4.43, + "learning_rate": 2.8217158176943702e-05, + "loss": 0.0468, + "step": 1675 + }, + { + "epoch": 4.43, + "learning_rate": 2.8203753351206437e-05, + "loss": 0.0559, + "step": 1676 + }, + { + "epoch": 4.44, + "learning_rate": 2.819034852546917e-05, + "loss": 0.0192, + "step": 1677 + }, + { + "epoch": 4.44, + "learning_rate": 2.8176943699731907e-05, + "loss": 0.0024, + "step": 1678 + }, + { + "epoch": 4.44, + "learning_rate": 2.816353887399464e-05, + "loss": 0.0021, + "step": 1679 + }, + { + "epoch": 4.44, + "learning_rate": 2.8150134048257378e-05, + "loss": 0.0139, + "step": 1680 + }, + { + "epoch": 4.45, + "learning_rate": 2.813672922252011e-05, + "loss": 0.0042, + "step": 1681 + }, + { + "epoch": 4.45, + "learning_rate": 2.812332439678284e-05, + "loss": 0.1666, + "step": 1682 + }, + { + "epoch": 4.45, + "learning_rate": 2.810991957104558e-05, + "loss": 0.5925, + "step": 1683 + }, + { + "epoch": 4.46, + "learning_rate": 2.809651474530831e-05, + "loss": 0.1689, + "step": 1684 + }, + { + "epoch": 4.46, + "learning_rate": 2.808310991957105e-05, + "loss": 0.0053, + "step": 1685 + }, + { + "epoch": 4.46, + "learning_rate": 2.806970509383378e-05, + "loss": 0.0019, + "step": 1686 + }, + { + "epoch": 4.46, + "learning_rate": 2.8056300268096513e-05, + "loss": 0.0632, + "step": 1687 + }, + { + "epoch": 4.47, + "learning_rate": 2.8042895442359252e-05, + "loss": 0.0115, + "step": 1688 + }, + { + "epoch": 4.47, + "learning_rate": 2.8029490616621984e-05, + "loss": 0.002, + "step": 1689 + }, + { + "epoch": 4.47, + "learning_rate": 2.8016085790884722e-05, + "loss": 0.0021, + "step": 1690 + }, + { + "epoch": 4.47, + "learning_rate": 2.8002680965147454e-05, + "loss": 0.0079, + "step": 1691 + }, + { + "epoch": 4.48, + "learning_rate": 2.7989276139410186e-05, + "loss": 0.0016, + "step": 1692 + }, + { + "epoch": 4.48, + "learning_rate": 2.7975871313672924e-05, + "loss": 0.1824, + "step": 1693 + }, + { + "epoch": 4.48, + "learning_rate": 2.7962466487935656e-05, + "loss": 0.1025, + "step": 1694 + }, + { + "epoch": 4.48, + "learning_rate": 2.7949061662198394e-05, + "loss": 0.4274, + "step": 1695 + }, + { + "epoch": 4.49, + "learning_rate": 2.7935656836461126e-05, + "loss": 0.0834, + "step": 1696 + }, + { + "epoch": 4.49, + "learning_rate": 2.7922252010723858e-05, + "loss": 0.6412, + "step": 1697 + }, + { + "epoch": 4.49, + "learning_rate": 2.7908847184986596e-05, + "loss": 0.3051, + "step": 1698 + }, + { + "epoch": 4.49, + "learning_rate": 2.7895442359249328e-05, + "loss": 0.0909, + "step": 1699 + }, + { + "epoch": 4.5, + "learning_rate": 2.7882037533512067e-05, + "loss": 0.2655, + "step": 1700 + }, + { + "epoch": 4.5, + "learning_rate": 2.78686327077748e-05, + "loss": 0.305, + "step": 1701 + }, + { + "epoch": 4.5, + "learning_rate": 2.7855227882037534e-05, + "loss": 0.2733, + "step": 1702 + }, + { + "epoch": 4.51, + "learning_rate": 2.784182305630027e-05, + "loss": 0.0021, + "step": 1703 + }, + { + "epoch": 4.51, + "learning_rate": 2.7828418230563004e-05, + "loss": 0.0072, + "step": 1704 + }, + { + "epoch": 4.51, + "learning_rate": 2.781501340482574e-05, + "loss": 0.0027, + "step": 1705 + }, + { + "epoch": 4.51, + "learning_rate": 2.7801608579088474e-05, + "loss": 0.184, + "step": 1706 + }, + { + "epoch": 4.52, + "learning_rate": 2.7788203753351206e-05, + "loss": 0.0143, + "step": 1707 + }, + { + "epoch": 4.52, + "learning_rate": 2.7774798927613944e-05, + "loss": 0.0297, + "step": 1708 + }, + { + "epoch": 4.52, + "learning_rate": 2.7761394101876676e-05, + "loss": 0.0739, + "step": 1709 + }, + { + "epoch": 4.52, + "learning_rate": 2.7747989276139415e-05, + "loss": 0.0188, + "step": 1710 + }, + { + "epoch": 4.53, + "learning_rate": 2.7734584450402146e-05, + "loss": 0.2487, + "step": 1711 + }, + { + "epoch": 4.53, + "learning_rate": 2.7721179624664878e-05, + "loss": 0.0222, + "step": 1712 + }, + { + "epoch": 4.53, + "learning_rate": 2.7707774798927617e-05, + "loss": 0.0041, + "step": 1713 + }, + { + "epoch": 4.53, + "learning_rate": 2.769436997319035e-05, + "loss": 0.0164, + "step": 1714 + }, + { + "epoch": 4.54, + "learning_rate": 2.7680965147453087e-05, + "loss": 0.0985, + "step": 1715 + }, + { + "epoch": 4.54, + "learning_rate": 2.766756032171582e-05, + "loss": 0.0067, + "step": 1716 + }, + { + "epoch": 4.54, + "learning_rate": 2.765415549597855e-05, + "loss": 0.3304, + "step": 1717 + }, + { + "epoch": 4.54, + "learning_rate": 2.764075067024129e-05, + "loss": 0.006, + "step": 1718 + }, + { + "epoch": 4.55, + "learning_rate": 2.762734584450402e-05, + "loss": 0.0142, + "step": 1719 + }, + { + "epoch": 4.55, + "learning_rate": 2.761394101876676e-05, + "loss": 0.2205, + "step": 1720 + }, + { + "epoch": 4.55, + "learning_rate": 2.760053619302949e-05, + "loss": 0.298, + "step": 1721 + }, + { + "epoch": 4.56, + "learning_rate": 2.7587131367292223e-05, + "loss": 0.0041, + "step": 1722 + }, + { + "epoch": 4.56, + "learning_rate": 2.757372654155496e-05, + "loss": 0.0018, + "step": 1723 + }, + { + "epoch": 4.56, + "learning_rate": 2.7560321715817693e-05, + "loss": 0.0185, + "step": 1724 + }, + { + "epoch": 4.56, + "learning_rate": 2.754691689008043e-05, + "loss": 0.0042, + "step": 1725 + }, + { + "epoch": 4.57, + "learning_rate": 2.7533512064343163e-05, + "loss": 0.036, + "step": 1726 + }, + { + "epoch": 4.57, + "learning_rate": 2.7520107238605898e-05, + "loss": 0.2593, + "step": 1727 + }, + { + "epoch": 4.57, + "learning_rate": 2.7506702412868633e-05, + "loss": 0.0062, + "step": 1728 + }, + { + "epoch": 4.57, + "learning_rate": 2.749329758713137e-05, + "loss": 0.1759, + "step": 1729 + }, + { + "epoch": 4.58, + "learning_rate": 2.7479892761394104e-05, + "loss": 0.0202, + "step": 1730 + }, + { + "epoch": 4.58, + "learning_rate": 2.746648793565684e-05, + "loss": 0.2156, + "step": 1731 + }, + { + "epoch": 4.58, + "learning_rate": 2.7453083109919574e-05, + "loss": 0.4112, + "step": 1732 + }, + { + "epoch": 4.58, + "learning_rate": 2.743967828418231e-05, + "loss": 0.0037, + "step": 1733 + }, + { + "epoch": 4.59, + "learning_rate": 2.742627345844504e-05, + "loss": 0.0186, + "step": 1734 + }, + { + "epoch": 4.59, + "learning_rate": 2.741286863270778e-05, + "loss": 0.0117, + "step": 1735 + }, + { + "epoch": 4.59, + "learning_rate": 2.739946380697051e-05, + "loss": 0.0039, + "step": 1736 + }, + { + "epoch": 4.6, + "learning_rate": 2.738605898123325e-05, + "loss": 0.1185, + "step": 1737 + }, + { + "epoch": 4.6, + "learning_rate": 2.737265415549598e-05, + "loss": 0.0276, + "step": 1738 + }, + { + "epoch": 4.6, + "learning_rate": 2.7359249329758713e-05, + "loss": 0.0041, + "step": 1739 + }, + { + "epoch": 4.6, + "learning_rate": 2.734584450402145e-05, + "loss": 0.0133, + "step": 1740 + }, + { + "epoch": 4.61, + "learning_rate": 2.7332439678284183e-05, + "loss": 0.1042, + "step": 1741 + }, + { + "epoch": 4.61, + "learning_rate": 2.7319034852546922e-05, + "loss": 0.0023, + "step": 1742 + }, + { + "epoch": 4.61, + "learning_rate": 2.7305630026809654e-05, + "loss": 0.1586, + "step": 1743 + }, + { + "epoch": 4.61, + "learning_rate": 2.7292225201072385e-05, + "loss": 0.0258, + "step": 1744 + }, + { + "epoch": 4.62, + "learning_rate": 2.7278820375335124e-05, + "loss": 0.1119, + "step": 1745 + }, + { + "epoch": 4.62, + "learning_rate": 2.7265415549597856e-05, + "loss": 0.1115, + "step": 1746 + }, + { + "epoch": 4.62, + "learning_rate": 2.7252010723860594e-05, + "loss": 0.4607, + "step": 1747 + }, + { + "epoch": 4.62, + "learning_rate": 2.7238605898123326e-05, + "loss": 0.0296, + "step": 1748 + }, + { + "epoch": 4.63, + "learning_rate": 2.7225201072386058e-05, + "loss": 0.0277, + "step": 1749 + }, + { + "epoch": 4.63, + "learning_rate": 2.7211796246648796e-05, + "loss": 0.0777, + "step": 1750 + }, + { + "epoch": 4.63, + "learning_rate": 2.7198391420911528e-05, + "loss": 0.0031, + "step": 1751 + }, + { + "epoch": 4.63, + "learning_rate": 2.7184986595174266e-05, + "loss": 0.2238, + "step": 1752 + }, + { + "epoch": 4.64, + "learning_rate": 2.7171581769436998e-05, + "loss": 0.0409, + "step": 1753 + }, + { + "epoch": 4.64, + "learning_rate": 2.715817694369973e-05, + "loss": 0.0032, + "step": 1754 + }, + { + "epoch": 4.64, + "learning_rate": 2.714477211796247e-05, + "loss": 0.0113, + "step": 1755 + }, + { + "epoch": 4.65, + "learning_rate": 2.71313672922252e-05, + "loss": 0.0204, + "step": 1756 + }, + { + "epoch": 4.65, + "learning_rate": 2.711796246648794e-05, + "loss": 0.0022, + "step": 1757 + }, + { + "epoch": 4.65, + "learning_rate": 2.710455764075067e-05, + "loss": 0.0018, + "step": 1758 + }, + { + "epoch": 4.65, + "learning_rate": 2.7091152815013406e-05, + "loss": 0.263, + "step": 1759 + }, + { + "epoch": 4.66, + "learning_rate": 2.707774798927614e-05, + "loss": 0.0109, + "step": 1760 + }, + { + "epoch": 4.66, + "learning_rate": 2.7064343163538876e-05, + "loss": 0.0653, + "step": 1761 + }, + { + "epoch": 4.66, + "learning_rate": 2.705093833780161e-05, + "loss": 0.0116, + "step": 1762 + }, + { + "epoch": 4.66, + "learning_rate": 2.7037533512064346e-05, + "loss": 0.0063, + "step": 1763 + }, + { + "epoch": 4.67, + "learning_rate": 2.7024128686327078e-05, + "loss": 0.0034, + "step": 1764 + }, + { + "epoch": 4.67, + "learning_rate": 2.7010723860589816e-05, + "loss": 0.0395, + "step": 1765 + }, + { + "epoch": 4.67, + "learning_rate": 2.6997319034852548e-05, + "loss": 0.0014, + "step": 1766 + }, + { + "epoch": 4.67, + "learning_rate": 2.6983914209115287e-05, + "loss": 0.0057, + "step": 1767 + }, + { + "epoch": 4.68, + "learning_rate": 2.697050938337802e-05, + "loss": 0.0018, + "step": 1768 + }, + { + "epoch": 4.68, + "learning_rate": 2.695710455764075e-05, + "loss": 0.012, + "step": 1769 + }, + { + "epoch": 4.68, + "learning_rate": 2.694369973190349e-05, + "loss": 0.0017, + "step": 1770 + }, + { + "epoch": 4.69, + "learning_rate": 2.693029490616622e-05, + "loss": 0.0654, + "step": 1771 + }, + { + "epoch": 4.69, + "learning_rate": 2.691689008042896e-05, + "loss": 0.8002, + "step": 1772 + }, + { + "epoch": 4.69, + "learning_rate": 2.690348525469169e-05, + "loss": 0.0035, + "step": 1773 + }, + { + "epoch": 4.69, + "learning_rate": 2.6890080428954422e-05, + "loss": 0.0051, + "step": 1774 + }, + { + "epoch": 4.7, + "learning_rate": 2.687667560321716e-05, + "loss": 0.0031, + "step": 1775 + }, + { + "epoch": 4.7, + "learning_rate": 2.6863270777479893e-05, + "loss": 0.0142, + "step": 1776 + }, + { + "epoch": 4.7, + "learning_rate": 2.684986595174263e-05, + "loss": 0.0009, + "step": 1777 + }, + { + "epoch": 4.7, + "learning_rate": 2.6836461126005363e-05, + "loss": 0.0015, + "step": 1778 + }, + { + "epoch": 4.71, + "learning_rate": 2.6823056300268095e-05, + "loss": 0.3481, + "step": 1779 + }, + { + "epoch": 4.71, + "learning_rate": 2.6809651474530833e-05, + "loss": 0.3095, + "step": 1780 + }, + { + "epoch": 4.71, + "learning_rate": 2.6796246648793565e-05, + "loss": 0.2567, + "step": 1781 + }, + { + "epoch": 4.71, + "learning_rate": 2.6782841823056303e-05, + "loss": 0.0037, + "step": 1782 + }, + { + "epoch": 4.72, + "learning_rate": 2.6769436997319035e-05, + "loss": 0.001, + "step": 1783 + }, + { + "epoch": 4.72, + "learning_rate": 2.675603217158177e-05, + "loss": 0.0065, + "step": 1784 + }, + { + "epoch": 4.72, + "learning_rate": 2.6742627345844505e-05, + "loss": 0.0029, + "step": 1785 + }, + { + "epoch": 4.72, + "learning_rate": 2.672922252010724e-05, + "loss": 0.6096, + "step": 1786 + }, + { + "epoch": 4.73, + "learning_rate": 2.6715817694369976e-05, + "loss": 0.0127, + "step": 1787 + }, + { + "epoch": 4.73, + "learning_rate": 2.670241286863271e-05, + "loss": 0.0031, + "step": 1788 + }, + { + "epoch": 4.73, + "learning_rate": 2.6689008042895443e-05, + "loss": 0.2463, + "step": 1789 + }, + { + "epoch": 4.74, + "learning_rate": 2.667560321715818e-05, + "loss": 0.1022, + "step": 1790 + }, + { + "epoch": 4.74, + "learning_rate": 2.6662198391420913e-05, + "loss": 0.002, + "step": 1791 + }, + { + "epoch": 4.74, + "learning_rate": 2.664879356568365e-05, + "loss": 0.1576, + "step": 1792 + }, + { + "epoch": 4.74, + "learning_rate": 2.6635388739946383e-05, + "loss": 0.1099, + "step": 1793 + }, + { + "epoch": 4.75, + "learning_rate": 2.6621983914209115e-05, + "loss": 0.1482, + "step": 1794 + }, + { + "epoch": 4.75, + "learning_rate": 2.6608579088471853e-05, + "loss": 0.0007, + "step": 1795 + }, + { + "epoch": 4.75, + "learning_rate": 2.6595174262734585e-05, + "loss": 0.0009, + "step": 1796 + }, + { + "epoch": 4.75, + "learning_rate": 2.6581769436997324e-05, + "loss": 0.005, + "step": 1797 + }, + { + "epoch": 4.76, + "learning_rate": 2.6568364611260055e-05, + "loss": 0.1808, + "step": 1798 + }, + { + "epoch": 4.76, + "learning_rate": 2.6554959785522787e-05, + "loss": 0.0351, + "step": 1799 + }, + { + "epoch": 4.76, + "learning_rate": 2.6541554959785526e-05, + "loss": 0.2555, + "step": 1800 + }, + { + "epoch": 4.76, + "learning_rate": 2.6528150134048257e-05, + "loss": 0.2236, + "step": 1801 + }, + { + "epoch": 4.77, + "learning_rate": 2.6514745308310996e-05, + "loss": 0.3208, + "step": 1802 + }, + { + "epoch": 4.77, + "learning_rate": 2.6501340482573728e-05, + "loss": 0.0202, + "step": 1803 + }, + { + "epoch": 4.77, + "learning_rate": 2.648793565683646e-05, + "loss": 0.0033, + "step": 1804 + }, + { + "epoch": 4.78, + "learning_rate": 2.6474530831099198e-05, + "loss": 0.001, + "step": 1805 + }, + { + "epoch": 4.78, + "learning_rate": 2.646112600536193e-05, + "loss": 0.0019, + "step": 1806 + }, + { + "epoch": 4.78, + "learning_rate": 2.6447721179624668e-05, + "loss": 0.0027, + "step": 1807 + }, + { + "epoch": 4.78, + "learning_rate": 2.64343163538874e-05, + "loss": 0.0051, + "step": 1808 + }, + { + "epoch": 4.79, + "learning_rate": 2.642091152815013e-05, + "loss": 0.1994, + "step": 1809 + }, + { + "epoch": 4.79, + "learning_rate": 2.640750670241287e-05, + "loss": 0.0372, + "step": 1810 + }, + { + "epoch": 4.79, + "learning_rate": 2.6394101876675602e-05, + "loss": 0.0678, + "step": 1811 + }, + { + "epoch": 4.79, + "learning_rate": 2.638069705093834e-05, + "loss": 0.0252, + "step": 1812 + }, + { + "epoch": 4.8, + "learning_rate": 2.6367292225201072e-05, + "loss": 0.0065, + "step": 1813 + }, + { + "epoch": 4.8, + "learning_rate": 2.6353887399463807e-05, + "loss": 0.0045, + "step": 1814 + }, + { + "epoch": 4.8, + "learning_rate": 2.6340482573726542e-05, + "loss": 0.0037, + "step": 1815 + }, + { + "epoch": 4.8, + "learning_rate": 2.6327077747989277e-05, + "loss": 0.0251, + "step": 1816 + }, + { + "epoch": 4.81, + "learning_rate": 2.6313672922252013e-05, + "loss": 0.4196, + "step": 1817 + }, + { + "epoch": 4.81, + "learning_rate": 2.6300268096514748e-05, + "loss": 0.0071, + "step": 1818 + }, + { + "epoch": 4.81, + "learning_rate": 2.628686327077748e-05, + "loss": 0.0787, + "step": 1819 + }, + { + "epoch": 4.81, + "learning_rate": 2.6273458445040218e-05, + "loss": 0.0145, + "step": 1820 + }, + { + "epoch": 4.82, + "learning_rate": 2.626005361930295e-05, + "loss": 0.009, + "step": 1821 + }, + { + "epoch": 4.82, + "learning_rate": 2.6246648793565688e-05, + "loss": 0.0027, + "step": 1822 + }, + { + "epoch": 4.82, + "learning_rate": 2.623324396782842e-05, + "loss": 0.0017, + "step": 1823 + }, + { + "epoch": 4.83, + "learning_rate": 2.6219839142091152e-05, + "loss": 0.4824, + "step": 1824 + }, + { + "epoch": 4.83, + "learning_rate": 2.620643431635389e-05, + "loss": 0.0022, + "step": 1825 + }, + { + "epoch": 4.83, + "learning_rate": 2.6193029490616622e-05, + "loss": 0.3223, + "step": 1826 + }, + { + "epoch": 4.83, + "learning_rate": 2.617962466487936e-05, + "loss": 0.2195, + "step": 1827 + }, + { + "epoch": 4.84, + "learning_rate": 2.6166219839142092e-05, + "loss": 0.0013, + "step": 1828 + }, + { + "epoch": 4.84, + "learning_rate": 2.6152815013404824e-05, + "loss": 0.0343, + "step": 1829 + }, + { + "epoch": 4.84, + "learning_rate": 2.6139410187667563e-05, + "loss": 0.0022, + "step": 1830 + }, + { + "epoch": 4.84, + "learning_rate": 2.6126005361930294e-05, + "loss": 0.0022, + "step": 1831 + }, + { + "epoch": 4.85, + "learning_rate": 2.6112600536193033e-05, + "loss": 0.4116, + "step": 1832 + }, + { + "epoch": 4.85, + "learning_rate": 2.6099195710455765e-05, + "loss": 0.0048, + "step": 1833 + }, + { + "epoch": 4.85, + "learning_rate": 2.6085790884718496e-05, + "loss": 0.5819, + "step": 1834 + }, + { + "epoch": 4.85, + "learning_rate": 2.6072386058981235e-05, + "loss": 0.1985, + "step": 1835 + }, + { + "epoch": 4.86, + "learning_rate": 2.6058981233243967e-05, + "loss": 0.0989, + "step": 1836 + }, + { + "epoch": 4.86, + "learning_rate": 2.6045576407506705e-05, + "loss": 0.341, + "step": 1837 + }, + { + "epoch": 4.86, + "learning_rate": 2.6032171581769437e-05, + "loss": 0.0044, + "step": 1838 + }, + { + "epoch": 4.87, + "learning_rate": 2.601876675603217e-05, + "loss": 0.004, + "step": 1839 + }, + { + "epoch": 4.87, + "learning_rate": 2.6005361930294907e-05, + "loss": 0.2858, + "step": 1840 + }, + { + "epoch": 4.87, + "learning_rate": 2.599195710455764e-05, + "loss": 0.0009, + "step": 1841 + }, + { + "epoch": 4.87, + "learning_rate": 2.5978552278820377e-05, + "loss": 0.0042, + "step": 1842 + }, + { + "epoch": 4.88, + "learning_rate": 2.596514745308311e-05, + "loss": 0.0045, + "step": 1843 + }, + { + "epoch": 4.88, + "learning_rate": 2.5951742627345844e-05, + "loss": 0.0144, + "step": 1844 + }, + { + "epoch": 4.88, + "learning_rate": 2.593833780160858e-05, + "loss": 0.0084, + "step": 1845 + }, + { + "epoch": 4.88, + "learning_rate": 2.5924932975871314e-05, + "loss": 0.4276, + "step": 1846 + }, + { + "epoch": 4.89, + "learning_rate": 2.591152815013405e-05, + "loss": 0.0122, + "step": 1847 + }, + { + "epoch": 4.89, + "learning_rate": 2.5898123324396785e-05, + "loss": 0.0776, + "step": 1848 + }, + { + "epoch": 4.89, + "learning_rate": 2.5884718498659516e-05, + "loss": 0.0117, + "step": 1849 + }, + { + "epoch": 4.89, + "learning_rate": 2.5871313672922255e-05, + "loss": 0.2809, + "step": 1850 + }, + { + "epoch": 4.9, + "learning_rate": 2.5857908847184987e-05, + "loss": 0.0413, + "step": 1851 + }, + { + "epoch": 4.9, + "learning_rate": 2.5844504021447725e-05, + "loss": 0.0187, + "step": 1852 + }, + { + "epoch": 4.9, + "learning_rate": 2.5831099195710457e-05, + "loss": 0.452, + "step": 1853 + }, + { + "epoch": 4.9, + "learning_rate": 2.5817694369973195e-05, + "loss": 0.0206, + "step": 1854 + }, + { + "epoch": 4.91, + "learning_rate": 2.5804289544235927e-05, + "loss": 0.1639, + "step": 1855 + }, + { + "epoch": 4.91, + "learning_rate": 2.579088471849866e-05, + "loss": 0.1865, + "step": 1856 + }, + { + "epoch": 4.91, + "learning_rate": 2.5777479892761398e-05, + "loss": 0.0022, + "step": 1857 + }, + { + "epoch": 4.92, + "learning_rate": 2.576407506702413e-05, + "loss": 0.1167, + "step": 1858 + }, + { + "epoch": 4.92, + "learning_rate": 2.5750670241286868e-05, + "loss": 0.4013, + "step": 1859 + }, + { + "epoch": 4.92, + "learning_rate": 2.57372654155496e-05, + "loss": 0.2355, + "step": 1860 + }, + { + "epoch": 4.92, + "learning_rate": 2.572386058981233e-05, + "loss": 0.0076, + "step": 1861 + }, + { + "epoch": 4.93, + "learning_rate": 2.571045576407507e-05, + "loss": 0.1612, + "step": 1862 + }, + { + "epoch": 4.93, + "learning_rate": 2.56970509383378e-05, + "loss": 0.0047, + "step": 1863 + }, + { + "epoch": 4.93, + "learning_rate": 2.568364611260054e-05, + "loss": 0.1511, + "step": 1864 + }, + { + "epoch": 4.93, + "learning_rate": 2.5670241286863272e-05, + "loss": 0.011, + "step": 1865 + }, + { + "epoch": 4.94, + "learning_rate": 2.5656836461126004e-05, + "loss": 0.1761, + "step": 1866 + }, + { + "epoch": 4.94, + "learning_rate": 2.5643431635388742e-05, + "loss": 0.004, + "step": 1867 + }, + { + "epoch": 4.94, + "learning_rate": 2.5630026809651474e-05, + "loss": 0.0036, + "step": 1868 + }, + { + "epoch": 4.94, + "learning_rate": 2.5616621983914212e-05, + "loss": 0.4345, + "step": 1869 + }, + { + "epoch": 4.95, + "learning_rate": 2.5603217158176944e-05, + "loss": 0.0034, + "step": 1870 + }, + { + "epoch": 4.95, + "learning_rate": 2.558981233243968e-05, + "loss": 0.1269, + "step": 1871 + }, + { + "epoch": 4.95, + "learning_rate": 2.5576407506702414e-05, + "loss": 0.183, + "step": 1872 + }, + { + "epoch": 4.96, + "learning_rate": 2.556300268096515e-05, + "loss": 0.008, + "step": 1873 + }, + { + "epoch": 4.96, + "learning_rate": 2.5549597855227885e-05, + "loss": 0.0035, + "step": 1874 + }, + { + "epoch": 4.96, + "learning_rate": 2.553619302949062e-05, + "loss": 0.0133, + "step": 1875 + }, + { + "epoch": 4.96, + "learning_rate": 2.552278820375335e-05, + "loss": 0.2156, + "step": 1876 + }, + { + "epoch": 4.97, + "learning_rate": 2.550938337801609e-05, + "loss": 0.0043, + "step": 1877 + }, + { + "epoch": 4.97, + "learning_rate": 2.549597855227882e-05, + "loss": 0.2614, + "step": 1878 + }, + { + "epoch": 4.97, + "learning_rate": 2.548257372654156e-05, + "loss": 0.0208, + "step": 1879 + }, + { + "epoch": 4.97, + "learning_rate": 2.5469168900804292e-05, + "loss": 0.0228, + "step": 1880 + }, + { + "epoch": 4.98, + "learning_rate": 2.5455764075067024e-05, + "loss": 0.0105, + "step": 1881 + }, + { + "epoch": 4.98, + "learning_rate": 2.5442359249329762e-05, + "loss": 0.0108, + "step": 1882 + }, + { + "epoch": 4.98, + "learning_rate": 2.5428954423592494e-05, + "loss": 0.3828, + "step": 1883 + }, + { + "epoch": 4.98, + "learning_rate": 2.5415549597855232e-05, + "loss": 0.0093, + "step": 1884 + }, + { + "epoch": 4.99, + "learning_rate": 2.5402144772117964e-05, + "loss": 0.0231, + "step": 1885 + }, + { + "epoch": 4.99, + "learning_rate": 2.5388739946380696e-05, + "loss": 0.0082, + "step": 1886 + }, + { + "epoch": 4.99, + "learning_rate": 2.5375335120643434e-05, + "loss": 0.1796, + "step": 1887 + }, + { + "epoch": 4.99, + "learning_rate": 2.5361930294906166e-05, + "loss": 0.0753, + "step": 1888 + }, + { + "epoch": 5.0, + "learning_rate": 2.5348525469168905e-05, + "loss": 0.0142, + "step": 1889 + }, + { + "epoch": 5.0, + "learning_rate": 2.5335120643431636e-05, + "loss": 0.0047, + "step": 1890 + }, + { + "epoch": 5.0, + "eval_f1": 0.7775974025974025, + "eval_loss": 0.953689694404602, + "eval_runtime": 1.8696, + "eval_samples_per_second": 809.285, + "eval_steps_per_second": 50.814, + "step": 1890 + }, + { + "epoch": 5.0, + "learning_rate": 2.5321715817694368e-05, + "loss": 0.0014, + "step": 1891 + }, + { + "epoch": 5.01, + "learning_rate": 2.5308310991957107e-05, + "loss": 0.0487, + "step": 1892 + }, + { + "epoch": 5.01, + "learning_rate": 2.529490616621984e-05, + "loss": 0.0037, + "step": 1893 + }, + { + "epoch": 5.01, + "learning_rate": 2.5281501340482577e-05, + "loss": 0.0512, + "step": 1894 + }, + { + "epoch": 5.01, + "learning_rate": 2.526809651474531e-05, + "loss": 0.134, + "step": 1895 + }, + { + "epoch": 5.02, + "learning_rate": 2.525469168900804e-05, + "loss": 0.3762, + "step": 1896 + }, + { + "epoch": 5.02, + "learning_rate": 2.524128686327078e-05, + "loss": 0.0011, + "step": 1897 + }, + { + "epoch": 5.02, + "learning_rate": 2.522788203753351e-05, + "loss": 0.0023, + "step": 1898 + }, + { + "epoch": 5.02, + "learning_rate": 2.521447721179625e-05, + "loss": 0.0526, + "step": 1899 + }, + { + "epoch": 5.03, + "learning_rate": 2.520107238605898e-05, + "loss": 0.0553, + "step": 1900 + }, + { + "epoch": 5.03, + "learning_rate": 2.5187667560321716e-05, + "loss": 0.1773, + "step": 1901 + }, + { + "epoch": 5.03, + "learning_rate": 2.517426273458445e-05, + "loss": 0.451, + "step": 1902 + }, + { + "epoch": 5.03, + "learning_rate": 2.5160857908847186e-05, + "loss": 0.0217, + "step": 1903 + }, + { + "epoch": 5.04, + "learning_rate": 2.514745308310992e-05, + "loss": 0.0728, + "step": 1904 + }, + { + "epoch": 5.04, + "learning_rate": 2.5134048257372657e-05, + "loss": 0.0009, + "step": 1905 + }, + { + "epoch": 5.04, + "learning_rate": 2.512064343163539e-05, + "loss": 0.1018, + "step": 1906 + }, + { + "epoch": 5.04, + "learning_rate": 2.5107238605898127e-05, + "loss": 0.0012, + "step": 1907 + }, + { + "epoch": 5.05, + "learning_rate": 2.509383378016086e-05, + "loss": 0.004, + "step": 1908 + }, + { + "epoch": 5.05, + "learning_rate": 2.5080428954423597e-05, + "loss": 0.0012, + "step": 1909 + }, + { + "epoch": 5.05, + "learning_rate": 2.506702412868633e-05, + "loss": 0.0128, + "step": 1910 + }, + { + "epoch": 5.06, + "learning_rate": 2.505361930294906e-05, + "loss": 0.1116, + "step": 1911 + }, + { + "epoch": 5.06, + "learning_rate": 2.50402144772118e-05, + "loss": 0.0011, + "step": 1912 + }, + { + "epoch": 5.06, + "learning_rate": 2.502680965147453e-05, + "loss": 0.0011, + "step": 1913 + }, + { + "epoch": 5.06, + "learning_rate": 2.501340482573727e-05, + "loss": 0.0897, + "step": 1914 + }, + { + "epoch": 5.07, + "learning_rate": 2.5e-05, + "loss": 0.0014, + "step": 1915 + }, + { + "epoch": 5.07, + "learning_rate": 2.4986595174262736e-05, + "loss": 0.0918, + "step": 1916 + }, + { + "epoch": 5.07, + "learning_rate": 2.497319034852547e-05, + "loss": 0.0026, + "step": 1917 + }, + { + "epoch": 5.07, + "learning_rate": 2.4959785522788203e-05, + "loss": 0.0225, + "step": 1918 + }, + { + "epoch": 5.08, + "learning_rate": 2.494638069705094e-05, + "loss": 0.2655, + "step": 1919 + }, + { + "epoch": 5.08, + "learning_rate": 2.4932975871313673e-05, + "loss": 0.0029, + "step": 1920 + }, + { + "epoch": 5.08, + "learning_rate": 2.491957104557641e-05, + "loss": 0.0006, + "step": 1921 + }, + { + "epoch": 5.08, + "learning_rate": 2.4906166219839144e-05, + "loss": 0.0008, + "step": 1922 + }, + { + "epoch": 5.09, + "learning_rate": 2.4892761394101875e-05, + "loss": 0.0012, + "step": 1923 + }, + { + "epoch": 5.09, + "learning_rate": 2.487935656836461e-05, + "loss": 0.0013, + "step": 1924 + }, + { + "epoch": 5.09, + "learning_rate": 2.4865951742627346e-05, + "loss": 0.0524, + "step": 1925 + }, + { + "epoch": 5.1, + "learning_rate": 2.485254691689008e-05, + "loss": 0.0059, + "step": 1926 + }, + { + "epoch": 5.1, + "learning_rate": 2.4839142091152816e-05, + "loss": 0.0026, + "step": 1927 + }, + { + "epoch": 5.1, + "learning_rate": 2.482573726541555e-05, + "loss": 0.0015, + "step": 1928 + }, + { + "epoch": 5.1, + "learning_rate": 2.4812332439678286e-05, + "loss": 0.0073, + "step": 1929 + }, + { + "epoch": 5.11, + "learning_rate": 2.479892761394102e-05, + "loss": 0.0008, + "step": 1930 + }, + { + "epoch": 5.11, + "learning_rate": 2.4785522788203757e-05, + "loss": 0.1519, + "step": 1931 + }, + { + "epoch": 5.11, + "learning_rate": 2.477211796246649e-05, + "loss": 0.008, + "step": 1932 + }, + { + "epoch": 5.11, + "learning_rate": 2.4758713136729223e-05, + "loss": 0.0009, + "step": 1933 + }, + { + "epoch": 5.12, + "learning_rate": 2.474530831099196e-05, + "loss": 0.0299, + "step": 1934 + }, + { + "epoch": 5.12, + "learning_rate": 2.4731903485254694e-05, + "loss": 0.1637, + "step": 1935 + }, + { + "epoch": 5.12, + "learning_rate": 2.471849865951743e-05, + "loss": 0.0369, + "step": 1936 + }, + { + "epoch": 5.12, + "learning_rate": 2.4705093833780164e-05, + "loss": 0.0057, + "step": 1937 + }, + { + "epoch": 5.13, + "learning_rate": 2.4691689008042896e-05, + "loss": 0.0035, + "step": 1938 + }, + { + "epoch": 5.13, + "learning_rate": 2.467828418230563e-05, + "loss": 0.0011, + "step": 1939 + }, + { + "epoch": 5.13, + "learning_rate": 2.4664879356568366e-05, + "loss": 0.0018, + "step": 1940 + }, + { + "epoch": 5.13, + "learning_rate": 2.46514745308311e-05, + "loss": 0.0013, + "step": 1941 + }, + { + "epoch": 5.14, + "learning_rate": 2.4638069705093836e-05, + "loss": 0.0779, + "step": 1942 + }, + { + "epoch": 5.14, + "learning_rate": 2.4624664879356568e-05, + "loss": 0.1762, + "step": 1943 + }, + { + "epoch": 5.14, + "learning_rate": 2.4611260053619303e-05, + "loss": 0.0006, + "step": 1944 + }, + { + "epoch": 5.15, + "learning_rate": 2.4597855227882038e-05, + "loss": 0.0037, + "step": 1945 + }, + { + "epoch": 5.15, + "learning_rate": 2.4584450402144773e-05, + "loss": 0.0005, + "step": 1946 + }, + { + "epoch": 5.15, + "learning_rate": 2.457104557640751e-05, + "loss": 0.0397, + "step": 1947 + }, + { + "epoch": 5.15, + "learning_rate": 2.4557640750670244e-05, + "loss": 0.0082, + "step": 1948 + }, + { + "epoch": 5.16, + "learning_rate": 2.4544235924932975e-05, + "loss": 0.0008, + "step": 1949 + }, + { + "epoch": 5.16, + "learning_rate": 2.453083109919571e-05, + "loss": 0.0219, + "step": 1950 + }, + { + "epoch": 5.16, + "learning_rate": 2.4517426273458446e-05, + "loss": 0.3966, + "step": 1951 + }, + { + "epoch": 5.16, + "learning_rate": 2.450402144772118e-05, + "loss": 0.0011, + "step": 1952 + }, + { + "epoch": 5.17, + "learning_rate": 2.4490616621983916e-05, + "loss": 0.3447, + "step": 1953 + }, + { + "epoch": 5.17, + "learning_rate": 2.4477211796246648e-05, + "loss": 0.0006, + "step": 1954 + }, + { + "epoch": 5.17, + "learning_rate": 2.4463806970509383e-05, + "loss": 0.0011, + "step": 1955 + }, + { + "epoch": 5.17, + "learning_rate": 2.4450402144772118e-05, + "loss": 0.0013, + "step": 1956 + }, + { + "epoch": 5.18, + "learning_rate": 2.4436997319034853e-05, + "loss": 0.1495, + "step": 1957 + }, + { + "epoch": 5.18, + "learning_rate": 2.4423592493297588e-05, + "loss": 0.0005, + "step": 1958 + }, + { + "epoch": 5.18, + "learning_rate": 2.4410187667560323e-05, + "loss": 0.3345, + "step": 1959 + }, + { + "epoch": 5.19, + "learning_rate": 2.439678284182306e-05, + "loss": 0.0048, + "step": 1960 + }, + { + "epoch": 5.19, + "learning_rate": 2.4383378016085793e-05, + "loss": 0.001, + "step": 1961 + }, + { + "epoch": 5.19, + "learning_rate": 2.436997319034853e-05, + "loss": 0.0025, + "step": 1962 + }, + { + "epoch": 5.19, + "learning_rate": 2.4356568364611264e-05, + "loss": 0.3215, + "step": 1963 + }, + { + "epoch": 5.2, + "learning_rate": 2.4343163538873995e-05, + "loss": 0.0197, + "step": 1964 + }, + { + "epoch": 5.2, + "learning_rate": 2.432975871313673e-05, + "loss": 0.0018, + "step": 1965 + }, + { + "epoch": 5.2, + "learning_rate": 2.4316353887399466e-05, + "loss": 0.1012, + "step": 1966 + }, + { + "epoch": 5.2, + "learning_rate": 2.43029490616622e-05, + "loss": 0.0179, + "step": 1967 + }, + { + "epoch": 5.21, + "learning_rate": 2.4289544235924936e-05, + "loss": 0.0032, + "step": 1968 + }, + { + "epoch": 5.21, + "learning_rate": 2.4276139410187668e-05, + "loss": 0.0011, + "step": 1969 + }, + { + "epoch": 5.21, + "learning_rate": 2.4262734584450403e-05, + "loss": 0.4875, + "step": 1970 + }, + { + "epoch": 5.21, + "learning_rate": 2.4249329758713138e-05, + "loss": 0.2791, + "step": 1971 + }, + { + "epoch": 5.22, + "learning_rate": 2.4235924932975873e-05, + "loss": 0.0011, + "step": 1972 + }, + { + "epoch": 5.22, + "learning_rate": 2.4222520107238608e-05, + "loss": 0.0011, + "step": 1973 + }, + { + "epoch": 5.22, + "learning_rate": 2.420911528150134e-05, + "loss": 0.0976, + "step": 1974 + }, + { + "epoch": 5.22, + "learning_rate": 2.4195710455764075e-05, + "loss": 0.3669, + "step": 1975 + }, + { + "epoch": 5.23, + "learning_rate": 2.418230563002681e-05, + "loss": 0.0022, + "step": 1976 + }, + { + "epoch": 5.23, + "learning_rate": 2.4168900804289545e-05, + "loss": 0.0015, + "step": 1977 + }, + { + "epoch": 5.23, + "learning_rate": 2.415549597855228e-05, + "loss": 0.0014, + "step": 1978 + }, + { + "epoch": 5.24, + "learning_rate": 2.4142091152815012e-05, + "loss": 0.0433, + "step": 1979 + }, + { + "epoch": 5.24, + "learning_rate": 2.4128686327077747e-05, + "loss": 0.0019, + "step": 1980 + }, + { + "epoch": 5.24, + "learning_rate": 2.4115281501340483e-05, + "loss": 0.0007, + "step": 1981 + }, + { + "epoch": 5.24, + "learning_rate": 2.4101876675603218e-05, + "loss": 0.0136, + "step": 1982 + }, + { + "epoch": 5.25, + "learning_rate": 2.4088471849865953e-05, + "loss": 0.1744, + "step": 1983 + }, + { + "epoch": 5.25, + "learning_rate": 2.4075067024128688e-05, + "loss": 0.1557, + "step": 1984 + }, + { + "epoch": 5.25, + "learning_rate": 2.4061662198391423e-05, + "loss": 0.1192, + "step": 1985 + }, + { + "epoch": 5.25, + "learning_rate": 2.4048257372654158e-05, + "loss": 0.0406, + "step": 1986 + }, + { + "epoch": 5.26, + "learning_rate": 2.4034852546916893e-05, + "loss": 0.2243, + "step": 1987 + }, + { + "epoch": 5.26, + "learning_rate": 2.402144772117963e-05, + "loss": 0.0021, + "step": 1988 + }, + { + "epoch": 5.26, + "learning_rate": 2.400804289544236e-05, + "loss": 0.002, + "step": 1989 + }, + { + "epoch": 5.26, + "learning_rate": 2.3994638069705095e-05, + "loss": 0.077, + "step": 1990 + }, + { + "epoch": 5.27, + "learning_rate": 2.398123324396783e-05, + "loss": 0.0378, + "step": 1991 + }, + { + "epoch": 5.27, + "learning_rate": 2.3967828418230566e-05, + "loss": 0.012, + "step": 1992 + }, + { + "epoch": 5.27, + "learning_rate": 2.39544235924933e-05, + "loss": 0.1386, + "step": 1993 + }, + { + "epoch": 5.28, + "learning_rate": 2.3941018766756032e-05, + "loss": 0.002, + "step": 1994 + }, + { + "epoch": 5.28, + "learning_rate": 2.3927613941018768e-05, + "loss": 0.0008, + "step": 1995 + }, + { + "epoch": 5.28, + "learning_rate": 2.3914209115281503e-05, + "loss": 0.0021, + "step": 1996 + }, + { + "epoch": 5.28, + "learning_rate": 2.3900804289544238e-05, + "loss": 0.022, + "step": 1997 + }, + { + "epoch": 5.29, + "learning_rate": 2.3887399463806973e-05, + "loss": 0.0015, + "step": 1998 + }, + { + "epoch": 5.29, + "learning_rate": 2.3873994638069705e-05, + "loss": 0.1486, + "step": 1999 + }, + { + "epoch": 5.29, + "learning_rate": 2.386058981233244e-05, + "loss": 0.2586, + "step": 2000 + }, + { + "epoch": 5.29, + "learning_rate": 2.3847184986595175e-05, + "loss": 0.0088, + "step": 2001 + }, + { + "epoch": 5.3, + "learning_rate": 2.383378016085791e-05, + "loss": 0.0044, + "step": 2002 + }, + { + "epoch": 5.3, + "learning_rate": 2.3820375335120645e-05, + "loss": 0.0015, + "step": 2003 + }, + { + "epoch": 5.3, + "learning_rate": 2.3806970509383377e-05, + "loss": 0.0008, + "step": 2004 + }, + { + "epoch": 5.3, + "learning_rate": 2.3793565683646112e-05, + "loss": 0.212, + "step": 2005 + }, + { + "epoch": 5.31, + "learning_rate": 2.3780160857908847e-05, + "loss": 0.0005, + "step": 2006 + }, + { + "epoch": 5.31, + "learning_rate": 2.3766756032171582e-05, + "loss": 0.1511, + "step": 2007 + }, + { + "epoch": 5.31, + "learning_rate": 2.3753351206434318e-05, + "loss": 0.0023, + "step": 2008 + }, + { + "epoch": 5.31, + "learning_rate": 2.3739946380697053e-05, + "loss": 0.1544, + "step": 2009 + }, + { + "epoch": 5.32, + "learning_rate": 2.3726541554959784e-05, + "loss": 0.0306, + "step": 2010 + }, + { + "epoch": 5.32, + "learning_rate": 2.371313672922252e-05, + "loss": 0.1005, + "step": 2011 + }, + { + "epoch": 5.32, + "learning_rate": 2.3699731903485255e-05, + "loss": 0.0744, + "step": 2012 + }, + { + "epoch": 5.33, + "learning_rate": 2.368632707774799e-05, + "loss": 0.0622, + "step": 2013 + }, + { + "epoch": 5.33, + "learning_rate": 2.3672922252010725e-05, + "loss": 0.0192, + "step": 2014 + }, + { + "epoch": 5.33, + "learning_rate": 2.365951742627346e-05, + "loss": 0.016, + "step": 2015 + }, + { + "epoch": 5.33, + "learning_rate": 2.3646112600536195e-05, + "loss": 0.293, + "step": 2016 + }, + { + "epoch": 5.34, + "learning_rate": 2.363270777479893e-05, + "loss": 0.0114, + "step": 2017 + }, + { + "epoch": 5.34, + "learning_rate": 2.3619302949061665e-05, + "loss": 0.1254, + "step": 2018 + }, + { + "epoch": 5.34, + "learning_rate": 2.36058981233244e-05, + "loss": 0.0638, + "step": 2019 + }, + { + "epoch": 5.34, + "learning_rate": 2.3592493297587132e-05, + "loss": 0.0192, + "step": 2020 + }, + { + "epoch": 5.35, + "learning_rate": 2.3579088471849867e-05, + "loss": 0.0069, + "step": 2021 + }, + { + "epoch": 5.35, + "learning_rate": 2.3565683646112603e-05, + "loss": 0.0573, + "step": 2022 + }, + { + "epoch": 5.35, + "learning_rate": 2.3552278820375338e-05, + "loss": 0.0039, + "step": 2023 + }, + { + "epoch": 5.35, + "learning_rate": 2.3538873994638073e-05, + "loss": 0.0079, + "step": 2024 + }, + { + "epoch": 5.36, + "learning_rate": 2.3525469168900805e-05, + "loss": 0.0063, + "step": 2025 + }, + { + "epoch": 5.36, + "learning_rate": 2.351206434316354e-05, + "loss": 0.0032, + "step": 2026 + }, + { + "epoch": 5.36, + "learning_rate": 2.3498659517426275e-05, + "loss": 0.1018, + "step": 2027 + }, + { + "epoch": 5.37, + "learning_rate": 2.348525469168901e-05, + "loss": 0.0017, + "step": 2028 + }, + { + "epoch": 5.37, + "learning_rate": 2.3471849865951745e-05, + "loss": 0.0203, + "step": 2029 + }, + { + "epoch": 5.37, + "learning_rate": 2.3458445040214477e-05, + "loss": 0.0008, + "step": 2030 + }, + { + "epoch": 5.37, + "learning_rate": 2.3445040214477212e-05, + "loss": 0.3145, + "step": 2031 + }, + { + "epoch": 5.38, + "learning_rate": 2.3431635388739947e-05, + "loss": 0.2476, + "step": 2032 + }, + { + "epoch": 5.38, + "learning_rate": 2.3418230563002682e-05, + "loss": 0.0573, + "step": 2033 + }, + { + "epoch": 5.38, + "learning_rate": 2.3404825737265417e-05, + "loss": 0.0008, + "step": 2034 + }, + { + "epoch": 5.38, + "learning_rate": 2.339142091152815e-05, + "loss": 0.0636, + "step": 2035 + }, + { + "epoch": 5.39, + "learning_rate": 2.3378016085790884e-05, + "loss": 0.001, + "step": 2036 + }, + { + "epoch": 5.39, + "learning_rate": 2.336461126005362e-05, + "loss": 0.0079, + "step": 2037 + }, + { + "epoch": 5.39, + "learning_rate": 2.3351206434316355e-05, + "loss": 0.0752, + "step": 2038 + }, + { + "epoch": 5.39, + "learning_rate": 2.333780160857909e-05, + "loss": 0.0024, + "step": 2039 + }, + { + "epoch": 5.4, + "learning_rate": 2.332439678284182e-05, + "loss": 0.0061, + "step": 2040 + }, + { + "epoch": 5.4, + "learning_rate": 2.3310991957104557e-05, + "loss": 0.2882, + "step": 2041 + }, + { + "epoch": 5.4, + "learning_rate": 2.329758713136729e-05, + "loss": 0.02, + "step": 2042 + }, + { + "epoch": 5.4, + "learning_rate": 2.3284182305630027e-05, + "loss": 0.018, + "step": 2043 + }, + { + "epoch": 5.41, + "learning_rate": 2.3270777479892762e-05, + "loss": 0.0125, + "step": 2044 + }, + { + "epoch": 5.41, + "learning_rate": 2.3257372654155497e-05, + "loss": 0.0007, + "step": 2045 + }, + { + "epoch": 5.41, + "learning_rate": 2.3243967828418232e-05, + "loss": 0.3849, + "step": 2046 + }, + { + "epoch": 5.42, + "learning_rate": 2.3230563002680967e-05, + "loss": 0.0011, + "step": 2047 + }, + { + "epoch": 5.42, + "learning_rate": 2.3217158176943702e-05, + "loss": 0.1235, + "step": 2048 + }, + { + "epoch": 5.42, + "learning_rate": 2.3203753351206438e-05, + "loss": 0.0006, + "step": 2049 + }, + { + "epoch": 5.42, + "learning_rate": 2.319034852546917e-05, + "loss": 0.0006, + "step": 2050 + }, + { + "epoch": 5.43, + "learning_rate": 2.3176943699731904e-05, + "loss": 0.0011, + "step": 2051 + }, + { + "epoch": 5.43, + "learning_rate": 2.316353887399464e-05, + "loss": 0.0011, + "step": 2052 + }, + { + "epoch": 5.43, + "learning_rate": 2.3150134048257375e-05, + "loss": 0.0032, + "step": 2053 + }, + { + "epoch": 5.43, + "learning_rate": 2.313672922252011e-05, + "loss": 0.0718, + "step": 2054 + }, + { + "epoch": 5.44, + "learning_rate": 2.312332439678284e-05, + "loss": 0.0052, + "step": 2055 + }, + { + "epoch": 5.44, + "learning_rate": 2.3109919571045577e-05, + "loss": 0.0026, + "step": 2056 + }, + { + "epoch": 5.44, + "learning_rate": 2.3096514745308312e-05, + "loss": 0.1854, + "step": 2057 + }, + { + "epoch": 5.44, + "learning_rate": 2.3083109919571047e-05, + "loss": 0.0008, + "step": 2058 + }, + { + "epoch": 5.45, + "learning_rate": 2.3069705093833782e-05, + "loss": 0.2671, + "step": 2059 + }, + { + "epoch": 5.45, + "learning_rate": 2.3056300268096514e-05, + "loss": 0.0064, + "step": 2060 + }, + { + "epoch": 5.45, + "learning_rate": 2.304289544235925e-05, + "loss": 0.0012, + "step": 2061 + }, + { + "epoch": 5.46, + "learning_rate": 2.3029490616621984e-05, + "loss": 0.0006, + "step": 2062 + }, + { + "epoch": 5.46, + "learning_rate": 2.301608579088472e-05, + "loss": 0.0008, + "step": 2063 + }, + { + "epoch": 5.46, + "learning_rate": 2.3002680965147454e-05, + "loss": 0.4998, + "step": 2064 + }, + { + "epoch": 5.46, + "learning_rate": 2.2989276139410186e-05, + "loss": 0.0037, + "step": 2065 + }, + { + "epoch": 5.47, + "learning_rate": 2.297587131367292e-05, + "loss": 0.0029, + "step": 2066 + }, + { + "epoch": 5.47, + "learning_rate": 2.2962466487935656e-05, + "loss": 0.043, + "step": 2067 + }, + { + "epoch": 5.47, + "learning_rate": 2.294906166219839e-05, + "loss": 0.0451, + "step": 2068 + }, + { + "epoch": 5.47, + "learning_rate": 2.2935656836461127e-05, + "loss": 0.0009, + "step": 2069 + }, + { + "epoch": 5.48, + "learning_rate": 2.2922252010723862e-05, + "loss": 0.002, + "step": 2070 + }, + { + "epoch": 5.48, + "learning_rate": 2.2908847184986597e-05, + "loss": 0.2744, + "step": 2071 + }, + { + "epoch": 5.48, + "learning_rate": 2.2895442359249332e-05, + "loss": 0.0146, + "step": 2072 + }, + { + "epoch": 5.48, + "learning_rate": 2.2882037533512067e-05, + "loss": 0.0011, + "step": 2073 + }, + { + "epoch": 5.49, + "learning_rate": 2.2868632707774802e-05, + "loss": 0.0421, + "step": 2074 + }, + { + "epoch": 5.49, + "learning_rate": 2.2855227882037537e-05, + "loss": 0.1518, + "step": 2075 + }, + { + "epoch": 5.49, + "learning_rate": 2.284182305630027e-05, + "loss": 0.0072, + "step": 2076 + }, + { + "epoch": 5.49, + "learning_rate": 2.2828418230563004e-05, + "loss": 0.2781, + "step": 2077 + }, + { + "epoch": 5.5, + "learning_rate": 2.281501340482574e-05, + "loss": 0.004, + "step": 2078 + }, + { + "epoch": 5.5, + "learning_rate": 2.2801608579088475e-05, + "loss": 0.1029, + "step": 2079 + }, + { + "epoch": 5.5, + "learning_rate": 2.278820375335121e-05, + "loss": 0.0526, + "step": 2080 + }, + { + "epoch": 5.51, + "learning_rate": 2.277479892761394e-05, + "loss": 0.0011, + "step": 2081 + }, + { + "epoch": 5.51, + "learning_rate": 2.2761394101876677e-05, + "loss": 0.0139, + "step": 2082 + }, + { + "epoch": 5.51, + "learning_rate": 2.274798927613941e-05, + "loss": 0.0509, + "step": 2083 + }, + { + "epoch": 5.51, + "learning_rate": 2.2734584450402147e-05, + "loss": 0.0042, + "step": 2084 + }, + { + "epoch": 5.52, + "learning_rate": 2.2721179624664882e-05, + "loss": 0.1964, + "step": 2085 + }, + { + "epoch": 5.52, + "learning_rate": 2.2707774798927614e-05, + "loss": 0.0083, + "step": 2086 + }, + { + "epoch": 5.52, + "learning_rate": 2.269436997319035e-05, + "loss": 0.101, + "step": 2087 + }, + { + "epoch": 5.52, + "learning_rate": 2.2680965147453084e-05, + "loss": 0.0094, + "step": 2088 + }, + { + "epoch": 5.53, + "learning_rate": 2.266756032171582e-05, + "loss": 0.1433, + "step": 2089 + }, + { + "epoch": 5.53, + "learning_rate": 2.2654155495978554e-05, + "loss": 0.0091, + "step": 2090 + }, + { + "epoch": 5.53, + "learning_rate": 2.2640750670241286e-05, + "loss": 0.0018, + "step": 2091 + }, + { + "epoch": 5.53, + "learning_rate": 2.262734584450402e-05, + "loss": 0.001, + "step": 2092 + }, + { + "epoch": 5.54, + "learning_rate": 2.2613941018766756e-05, + "loss": 0.3507, + "step": 2093 + }, + { + "epoch": 5.54, + "learning_rate": 2.260053619302949e-05, + "loss": 0.0527, + "step": 2094 + }, + { + "epoch": 5.54, + "learning_rate": 2.2587131367292226e-05, + "loss": 0.0015, + "step": 2095 + }, + { + "epoch": 5.54, + "learning_rate": 2.2573726541554958e-05, + "loss": 0.0195, + "step": 2096 + }, + { + "epoch": 5.55, + "learning_rate": 2.2560321715817693e-05, + "loss": 0.0007, + "step": 2097 + }, + { + "epoch": 5.55, + "learning_rate": 2.254691689008043e-05, + "loss": 0.001, + "step": 2098 + }, + { + "epoch": 5.55, + "learning_rate": 2.2533512064343164e-05, + "loss": 0.2777, + "step": 2099 + }, + { + "epoch": 5.56, + "learning_rate": 2.25201072386059e-05, + "loss": 0.0657, + "step": 2100 + }, + { + "epoch": 5.56, + "learning_rate": 2.2506702412868634e-05, + "loss": 0.159, + "step": 2101 + }, + { + "epoch": 5.56, + "learning_rate": 2.249329758713137e-05, + "loss": 0.0342, + "step": 2102 + }, + { + "epoch": 5.56, + "learning_rate": 2.2479892761394104e-05, + "loss": 0.3001, + "step": 2103 + }, + { + "epoch": 5.57, + "learning_rate": 2.246648793565684e-05, + "loss": 0.0028, + "step": 2104 + }, + { + "epoch": 5.57, + "learning_rate": 2.2453083109919574e-05, + "loss": 0.0191, + "step": 2105 + }, + { + "epoch": 5.57, + "learning_rate": 2.2439678284182306e-05, + "loss": 0.0012, + "step": 2106 + }, + { + "epoch": 5.57, + "learning_rate": 2.242627345844504e-05, + "loss": 0.2619, + "step": 2107 + }, + { + "epoch": 5.58, + "learning_rate": 2.2412868632707776e-05, + "loss": 0.001, + "step": 2108 + }, + { + "epoch": 5.58, + "learning_rate": 2.239946380697051e-05, + "loss": 0.094, + "step": 2109 + }, + { + "epoch": 5.58, + "learning_rate": 2.2386058981233247e-05, + "loss": 0.003, + "step": 2110 + }, + { + "epoch": 5.58, + "learning_rate": 2.237265415549598e-05, + "loss": 0.0528, + "step": 2111 + }, + { + "epoch": 5.59, + "learning_rate": 2.2359249329758714e-05, + "loss": 0.1252, + "step": 2112 + }, + { + "epoch": 5.59, + "learning_rate": 2.234584450402145e-05, + "loss": 0.0039, + "step": 2113 + }, + { + "epoch": 5.59, + "learning_rate": 2.2332439678284184e-05, + "loss": 0.0913, + "step": 2114 + }, + { + "epoch": 5.6, + "learning_rate": 2.231903485254692e-05, + "loss": 0.0023, + "step": 2115 + }, + { + "epoch": 5.6, + "learning_rate": 2.230563002680965e-05, + "loss": 0.0047, + "step": 2116 + }, + { + "epoch": 5.6, + "learning_rate": 2.2292225201072386e-05, + "loss": 0.0688, + "step": 2117 + }, + { + "epoch": 5.6, + "learning_rate": 2.227882037533512e-05, + "loss": 0.0013, + "step": 2118 + }, + { + "epoch": 5.61, + "learning_rate": 2.2265415549597856e-05, + "loss": 0.0012, + "step": 2119 + }, + { + "epoch": 5.61, + "learning_rate": 2.225201072386059e-05, + "loss": 0.0048, + "step": 2120 + }, + { + "epoch": 5.61, + "learning_rate": 2.2238605898123323e-05, + "loss": 0.0011, + "step": 2121 + }, + { + "epoch": 5.61, + "learning_rate": 2.2225201072386058e-05, + "loss": 0.0009, + "step": 2122 + }, + { + "epoch": 5.62, + "learning_rate": 2.2211796246648793e-05, + "loss": 0.0015, + "step": 2123 + }, + { + "epoch": 5.62, + "learning_rate": 2.2198391420911528e-05, + "loss": 0.0077, + "step": 2124 + }, + { + "epoch": 5.62, + "learning_rate": 2.2184986595174263e-05, + "loss": 0.0007, + "step": 2125 + }, + { + "epoch": 5.62, + "learning_rate": 2.2171581769437e-05, + "loss": 0.0007, + "step": 2126 + }, + { + "epoch": 5.63, + "learning_rate": 2.2158176943699734e-05, + "loss": 0.0506, + "step": 2127 + }, + { + "epoch": 5.63, + "learning_rate": 2.214477211796247e-05, + "loss": 0.0016, + "step": 2128 + }, + { + "epoch": 5.63, + "learning_rate": 2.2131367292225204e-05, + "loss": 0.0005, + "step": 2129 + }, + { + "epoch": 5.63, + "learning_rate": 2.211796246648794e-05, + "loss": 0.0049, + "step": 2130 + }, + { + "epoch": 5.64, + "learning_rate": 2.210455764075067e-05, + "loss": 0.0305, + "step": 2131 + }, + { + "epoch": 5.64, + "learning_rate": 2.2091152815013406e-05, + "loss": 0.0448, + "step": 2132 + }, + { + "epoch": 5.64, + "learning_rate": 2.207774798927614e-05, + "loss": 0.5391, + "step": 2133 + }, + { + "epoch": 5.65, + "learning_rate": 2.2064343163538876e-05, + "loss": 0.0005, + "step": 2134 + }, + { + "epoch": 5.65, + "learning_rate": 2.205093833780161e-05, + "loss": 0.0141, + "step": 2135 + }, + { + "epoch": 5.65, + "learning_rate": 2.2037533512064346e-05, + "loss": 0.3613, + "step": 2136 + }, + { + "epoch": 5.65, + "learning_rate": 2.2024128686327078e-05, + "loss": 0.0013, + "step": 2137 + }, + { + "epoch": 5.66, + "learning_rate": 2.2010723860589813e-05, + "loss": 0.002, + "step": 2138 + }, + { + "epoch": 5.66, + "learning_rate": 2.199731903485255e-05, + "loss": 0.0207, + "step": 2139 + }, + { + "epoch": 5.66, + "learning_rate": 2.1983914209115284e-05, + "loss": 0.0004, + "step": 2140 + }, + { + "epoch": 5.66, + "learning_rate": 2.197050938337802e-05, + "loss": 0.0022, + "step": 2141 + }, + { + "epoch": 5.67, + "learning_rate": 2.195710455764075e-05, + "loss": 0.5076, + "step": 2142 + }, + { + "epoch": 5.67, + "learning_rate": 2.1943699731903486e-05, + "loss": 0.0016, + "step": 2143 + }, + { + "epoch": 5.67, + "learning_rate": 2.193029490616622e-05, + "loss": 0.0014, + "step": 2144 + }, + { + "epoch": 5.67, + "learning_rate": 2.1916890080428956e-05, + "loss": 0.0101, + "step": 2145 + }, + { + "epoch": 5.68, + "learning_rate": 2.190348525469169e-05, + "loss": 0.0048, + "step": 2146 + }, + { + "epoch": 5.68, + "learning_rate": 2.1890080428954423e-05, + "loss": 0.001, + "step": 2147 + }, + { + "epoch": 5.68, + "learning_rate": 2.1876675603217158e-05, + "loss": 0.0004, + "step": 2148 + }, + { + "epoch": 5.69, + "learning_rate": 2.1863270777479893e-05, + "loss": 0.2627, + "step": 2149 + }, + { + "epoch": 5.69, + "learning_rate": 2.1849865951742628e-05, + "loss": 0.0013, + "step": 2150 + }, + { + "epoch": 5.69, + "learning_rate": 2.1836461126005363e-05, + "loss": 0.0074, + "step": 2151 + }, + { + "epoch": 5.69, + "learning_rate": 2.1823056300268095e-05, + "loss": 0.0238, + "step": 2152 + }, + { + "epoch": 5.7, + "learning_rate": 2.180965147453083e-05, + "loss": 0.0013, + "step": 2153 + }, + { + "epoch": 5.7, + "learning_rate": 2.1796246648793565e-05, + "loss": 0.0005, + "step": 2154 + }, + { + "epoch": 5.7, + "learning_rate": 2.17828418230563e-05, + "loss": 0.0006, + "step": 2155 + }, + { + "epoch": 5.7, + "learning_rate": 2.1769436997319036e-05, + "loss": 0.0442, + "step": 2156 + }, + { + "epoch": 5.71, + "learning_rate": 2.175603217158177e-05, + "loss": 0.0007, + "step": 2157 + }, + { + "epoch": 5.71, + "learning_rate": 2.1742627345844506e-05, + "loss": 0.0032, + "step": 2158 + }, + { + "epoch": 5.71, + "learning_rate": 2.172922252010724e-05, + "loss": 0.005, + "step": 2159 + }, + { + "epoch": 5.71, + "learning_rate": 2.1715817694369976e-05, + "loss": 0.0005, + "step": 2160 + }, + { + "epoch": 5.72, + "learning_rate": 2.170241286863271e-05, + "loss": 0.0005, + "step": 2161 + }, + { + "epoch": 5.72, + "learning_rate": 2.1689008042895443e-05, + "loss": 0.2023, + "step": 2162 + }, + { + "epoch": 5.72, + "learning_rate": 2.1675603217158178e-05, + "loss": 0.3146, + "step": 2163 + }, + { + "epoch": 5.72, + "learning_rate": 2.1662198391420913e-05, + "loss": 0.0035, + "step": 2164 + }, + { + "epoch": 5.73, + "learning_rate": 2.164879356568365e-05, + "loss": 0.141, + "step": 2165 + }, + { + "epoch": 5.73, + "learning_rate": 2.1635388739946383e-05, + "loss": 0.0005, + "step": 2166 + }, + { + "epoch": 5.73, + "learning_rate": 2.1621983914209115e-05, + "loss": 0.0013, + "step": 2167 + }, + { + "epoch": 5.74, + "learning_rate": 2.160857908847185e-05, + "loss": 0.0484, + "step": 2168 + }, + { + "epoch": 5.74, + "learning_rate": 2.1595174262734585e-05, + "loss": 0.0109, + "step": 2169 + }, + { + "epoch": 5.74, + "learning_rate": 2.158176943699732e-05, + "loss": 0.3307, + "step": 2170 + }, + { + "epoch": 5.74, + "learning_rate": 2.1568364611260056e-05, + "loss": 0.0013, + "step": 2171 + }, + { + "epoch": 5.75, + "learning_rate": 2.1554959785522787e-05, + "loss": 0.0005, + "step": 2172 + }, + { + "epoch": 5.75, + "learning_rate": 2.1541554959785523e-05, + "loss": 0.0004, + "step": 2173 + }, + { + "epoch": 5.75, + "learning_rate": 2.1528150134048258e-05, + "loss": 0.0744, + "step": 2174 + }, + { + "epoch": 5.75, + "learning_rate": 2.1514745308310993e-05, + "loss": 0.0008, + "step": 2175 + }, + { + "epoch": 5.76, + "learning_rate": 2.1501340482573728e-05, + "loss": 0.0013, + "step": 2176 + }, + { + "epoch": 5.76, + "learning_rate": 2.148793565683646e-05, + "loss": 0.0006, + "step": 2177 + }, + { + "epoch": 5.76, + "learning_rate": 2.1474530831099195e-05, + "loss": 0.055, + "step": 2178 + }, + { + "epoch": 5.76, + "learning_rate": 2.146112600536193e-05, + "loss": 0.0013, + "step": 2179 + }, + { + "epoch": 5.77, + "learning_rate": 2.1447721179624665e-05, + "loss": 0.0022, + "step": 2180 + }, + { + "epoch": 5.77, + "learning_rate": 2.14343163538874e-05, + "loss": 0.0009, + "step": 2181 + }, + { + "epoch": 5.77, + "learning_rate": 2.1420911528150135e-05, + "loss": 0.0011, + "step": 2182 + }, + { + "epoch": 5.78, + "learning_rate": 2.140750670241287e-05, + "loss": 0.0676, + "step": 2183 + }, + { + "epoch": 5.78, + "learning_rate": 2.1394101876675606e-05, + "loss": 0.4583, + "step": 2184 + }, + { + "epoch": 5.78, + "learning_rate": 2.138069705093834e-05, + "loss": 0.0009, + "step": 2185 + }, + { + "epoch": 5.78, + "learning_rate": 2.1367292225201076e-05, + "loss": 0.002, + "step": 2186 + }, + { + "epoch": 5.79, + "learning_rate": 2.1353887399463808e-05, + "loss": 0.7625, + "step": 2187 + }, + { + "epoch": 5.79, + "learning_rate": 2.1340482573726543e-05, + "loss": 0.4912, + "step": 2188 + }, + { + "epoch": 5.79, + "learning_rate": 2.1327077747989278e-05, + "loss": 0.0283, + "step": 2189 + }, + { + "epoch": 5.79, + "learning_rate": 2.1313672922252013e-05, + "loss": 0.0011, + "step": 2190 + }, + { + "epoch": 5.8, + "learning_rate": 2.1300268096514748e-05, + "loss": 0.0013, + "step": 2191 + }, + { + "epoch": 5.8, + "learning_rate": 2.128686327077748e-05, + "loss": 0.3384, + "step": 2192 + }, + { + "epoch": 5.8, + "learning_rate": 2.1273458445040215e-05, + "loss": 0.4533, + "step": 2193 + }, + { + "epoch": 5.8, + "learning_rate": 2.126005361930295e-05, + "loss": 0.0039, + "step": 2194 + }, + { + "epoch": 5.81, + "learning_rate": 2.1246648793565685e-05, + "loss": 0.0038, + "step": 2195 + }, + { + "epoch": 5.81, + "learning_rate": 2.123324396782842e-05, + "loss": 0.0318, + "step": 2196 + }, + { + "epoch": 5.81, + "learning_rate": 2.1219839142091156e-05, + "loss": 0.0045, + "step": 2197 + }, + { + "epoch": 5.81, + "learning_rate": 2.1206434316353887e-05, + "loss": 0.3134, + "step": 2198 + }, + { + "epoch": 5.82, + "learning_rate": 2.1193029490616622e-05, + "loss": 0.0011, + "step": 2199 + }, + { + "epoch": 5.82, + "learning_rate": 2.1179624664879358e-05, + "loss": 0.0008, + "step": 2200 + }, + { + "epoch": 5.82, + "learning_rate": 2.1166219839142093e-05, + "loss": 0.0009, + "step": 2201 + }, + { + "epoch": 5.83, + "learning_rate": 2.1152815013404828e-05, + "loss": 0.0083, + "step": 2202 + }, + { + "epoch": 5.83, + "learning_rate": 2.113941018766756e-05, + "loss": 0.0196, + "step": 2203 + }, + { + "epoch": 5.83, + "learning_rate": 2.1126005361930295e-05, + "loss": 0.0063, + "step": 2204 + }, + { + "epoch": 5.83, + "learning_rate": 2.111260053619303e-05, + "loss": 0.0064, + "step": 2205 + }, + { + "epoch": 5.84, + "learning_rate": 2.1099195710455765e-05, + "loss": 0.0143, + "step": 2206 + }, + { + "epoch": 5.84, + "learning_rate": 2.10857908847185e-05, + "loss": 0.0012, + "step": 2207 + }, + { + "epoch": 5.84, + "learning_rate": 2.1072386058981232e-05, + "loss": 0.0033, + "step": 2208 + }, + { + "epoch": 5.84, + "learning_rate": 2.1058981233243967e-05, + "loss": 0.0014, + "step": 2209 + }, + { + "epoch": 5.85, + "learning_rate": 2.1045576407506702e-05, + "loss": 0.0219, + "step": 2210 + }, + { + "epoch": 5.85, + "learning_rate": 2.1032171581769437e-05, + "loss": 0.3033, + "step": 2211 + }, + { + "epoch": 5.85, + "learning_rate": 2.1018766756032172e-05, + "loss": 0.0711, + "step": 2212 + }, + { + "epoch": 5.85, + "learning_rate": 2.1005361930294907e-05, + "loss": 0.0051, + "step": 2213 + }, + { + "epoch": 5.86, + "learning_rate": 2.0991957104557643e-05, + "loss": 0.026, + "step": 2214 + }, + { + "epoch": 5.86, + "learning_rate": 2.0978552278820378e-05, + "loss": 0.0024, + "step": 2215 + }, + { + "epoch": 5.86, + "learning_rate": 2.0965147453083113e-05, + "loss": 0.3622, + "step": 2216 + }, + { + "epoch": 5.87, + "learning_rate": 2.0951742627345848e-05, + "loss": 0.0009, + "step": 2217 + }, + { + "epoch": 5.87, + "learning_rate": 2.093833780160858e-05, + "loss": 0.0019, + "step": 2218 + }, + { + "epoch": 5.87, + "learning_rate": 2.0924932975871315e-05, + "loss": 0.3412, + "step": 2219 + }, + { + "epoch": 5.87, + "learning_rate": 2.091152815013405e-05, + "loss": 0.0197, + "step": 2220 + }, + { + "epoch": 5.88, + "learning_rate": 2.0898123324396785e-05, + "loss": 0.4229, + "step": 2221 + }, + { + "epoch": 5.88, + "learning_rate": 2.088471849865952e-05, + "loss": 0.0014, + "step": 2222 + }, + { + "epoch": 5.88, + "learning_rate": 2.0871313672922252e-05, + "loss": 0.0183, + "step": 2223 + }, + { + "epoch": 5.88, + "learning_rate": 2.0857908847184987e-05, + "loss": 0.2005, + "step": 2224 + }, + { + "epoch": 5.89, + "learning_rate": 2.0844504021447722e-05, + "loss": 0.0122, + "step": 2225 + }, + { + "epoch": 5.89, + "learning_rate": 2.0831099195710457e-05, + "loss": 0.1178, + "step": 2226 + }, + { + "epoch": 5.89, + "learning_rate": 2.0817694369973193e-05, + "loss": 0.0105, + "step": 2227 + }, + { + "epoch": 5.89, + "learning_rate": 2.0804289544235924e-05, + "loss": 0.0328, + "step": 2228 + }, + { + "epoch": 5.9, + "learning_rate": 2.079088471849866e-05, + "loss": 0.0087, + "step": 2229 + }, + { + "epoch": 5.9, + "learning_rate": 2.0777479892761395e-05, + "loss": 0.0288, + "step": 2230 + }, + { + "epoch": 5.9, + "learning_rate": 2.076407506702413e-05, + "loss": 0.0017, + "step": 2231 + }, + { + "epoch": 5.9, + "learning_rate": 2.0750670241286865e-05, + "loss": 0.002, + "step": 2232 + }, + { + "epoch": 5.91, + "learning_rate": 2.0737265415549597e-05, + "loss": 0.0024, + "step": 2233 + }, + { + "epoch": 5.91, + "learning_rate": 2.072386058981233e-05, + "loss": 0.0008, + "step": 2234 + }, + { + "epoch": 5.91, + "learning_rate": 2.0710455764075067e-05, + "loss": 0.002, + "step": 2235 + }, + { + "epoch": 5.92, + "learning_rate": 2.0697050938337802e-05, + "loss": 0.0052, + "step": 2236 + }, + { + "epoch": 5.92, + "learning_rate": 2.0683646112600537e-05, + "loss": 0.0058, + "step": 2237 + }, + { + "epoch": 5.92, + "learning_rate": 2.0670241286863272e-05, + "loss": 0.0013, + "step": 2238 + }, + { + "epoch": 5.92, + "learning_rate": 2.0656836461126007e-05, + "loss": 0.0158, + "step": 2239 + }, + { + "epoch": 5.93, + "learning_rate": 2.0643431635388742e-05, + "loss": 0.229, + "step": 2240 + }, + { + "epoch": 5.93, + "learning_rate": 2.0630026809651478e-05, + "loss": 0.1844, + "step": 2241 + }, + { + "epoch": 5.93, + "learning_rate": 2.0616621983914213e-05, + "loss": 0.2905, + "step": 2242 + }, + { + "epoch": 5.93, + "learning_rate": 2.0603217158176944e-05, + "loss": 0.0059, + "step": 2243 + }, + { + "epoch": 5.94, + "learning_rate": 2.058981233243968e-05, + "loss": 0.0007, + "step": 2244 + }, + { + "epoch": 5.94, + "learning_rate": 2.0576407506702415e-05, + "loss": 0.1638, + "step": 2245 + }, + { + "epoch": 5.94, + "learning_rate": 2.056300268096515e-05, + "loss": 0.1195, + "step": 2246 + }, + { + "epoch": 5.94, + "learning_rate": 2.0549597855227885e-05, + "loss": 0.0015, + "step": 2247 + }, + { + "epoch": 5.95, + "learning_rate": 2.0536193029490617e-05, + "loss": 0.0013, + "step": 2248 + }, + { + "epoch": 5.95, + "learning_rate": 2.0522788203753352e-05, + "loss": 0.5152, + "step": 2249 + }, + { + "epoch": 5.95, + "learning_rate": 2.0509383378016087e-05, + "loss": 0.0315, + "step": 2250 + }, + { + "epoch": 5.96, + "learning_rate": 2.0495978552278822e-05, + "loss": 0.1213, + "step": 2251 + }, + { + "epoch": 5.96, + "learning_rate": 2.0482573726541557e-05, + "loss": 0.0006, + "step": 2252 + }, + { + "epoch": 5.96, + "learning_rate": 2.046916890080429e-05, + "loss": 0.0011, + "step": 2253 + }, + { + "epoch": 5.96, + "learning_rate": 2.0455764075067024e-05, + "loss": 0.2546, + "step": 2254 + }, + { + "epoch": 5.97, + "learning_rate": 2.044235924932976e-05, + "loss": 0.1259, + "step": 2255 + }, + { + "epoch": 5.97, + "learning_rate": 2.0428954423592494e-05, + "loss": 0.0179, + "step": 2256 + }, + { + "epoch": 5.97, + "learning_rate": 2.041554959785523e-05, + "loss": 0.0257, + "step": 2257 + }, + { + "epoch": 5.97, + "learning_rate": 2.0402144772117965e-05, + "loss": 0.0092, + "step": 2258 + }, + { + "epoch": 5.98, + "learning_rate": 2.0388739946380696e-05, + "loss": 0.3231, + "step": 2259 + }, + { + "epoch": 5.98, + "learning_rate": 2.037533512064343e-05, + "loss": 0.0084, + "step": 2260 + }, + { + "epoch": 5.98, + "learning_rate": 2.0361930294906167e-05, + "loss": 0.0517, + "step": 2261 + }, + { + "epoch": 5.98, + "learning_rate": 2.0348525469168902e-05, + "loss": 0.0012, + "step": 2262 + }, + { + "epoch": 5.99, + "learning_rate": 2.0335120643431637e-05, + "loss": 0.0045, + "step": 2263 + }, + { + "epoch": 5.99, + "learning_rate": 2.032171581769437e-05, + "loss": 0.0012, + "step": 2264 + }, + { + "epoch": 5.99, + "learning_rate": 2.0308310991957104e-05, + "loss": 0.0032, + "step": 2265 + }, + { + "epoch": 5.99, + "learning_rate": 2.029490616621984e-05, + "loss": 0.0038, + "step": 2266 + }, + { + "epoch": 6.0, + "learning_rate": 2.0281501340482574e-05, + "loss": 0.2731, + "step": 2267 + }, + { + "epoch": 6.0, + "learning_rate": 2.026809651474531e-05, + "loss": 0.3122, + "step": 2268 + }, + { + "epoch": 6.0, + "eval_f1": 0.7762762762762763, + "eval_loss": 1.0977023839950562, + "eval_runtime": 1.8624, + "eval_samples_per_second": 812.383, + "eval_steps_per_second": 51.009, + "step": 2268 + } + ], + "max_steps": 3780, + "num_train_epochs": 10, + "total_flos": 580687400433408.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2268/training_args.bin b/checkpoint-2268/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e04ed002938f760694506615e2c2b7be439a9c1 --- /dev/null +++ b/checkpoint-2268/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c01d8e107b4a20c0ba0f3692dae4e25d8f1dffe1d23d6e4f4bdf92b87ab5ea +size 3899 diff --git a/checkpoint-2646/config.json b/checkpoint-2646/config.json new file mode 100644 index 0000000000000000000000000000000000000000..364156e83c34ba8c6fcc66e875a05b1d1a9b4821 --- /dev/null +++ b/checkpoint-2646/config.json @@ -0,0 +1,30 @@ +{ + "_name_or_path": "distilbert-base-cased", + "activation": "gelu", + "architectures": [ + "DistilBertForSequenceClassification" + ], + "attention_dropout": 0.1, + "dim": 768, + "dropout": 0.1, + "hidden_dim": 3072, + "id2label": { + "0": "NO DISASTER", + "1": "DISASTER" + }, + "initializer_range": 0.02, + "max_position_embeddings": 512, + "model_type": "distilbert", + "n_heads": 12, + "n_layers": 6, + "output_past": true, + "pad_token_id": 0, + "problem_type": "single_label_classification", + "qa_dropout": 0.1, + "seq_classif_dropout": 0.2, + "sinusoidal_pos_embds": false, + "tie_weights_": true, + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "vocab_size": 28996 +} diff --git a/checkpoint-2646/optimizer.pt b/checkpoint-2646/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a01dde6137ce2982faed583eeb7136523b4d124 --- /dev/null +++ b/checkpoint-2646/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53912039cd2f47825838734a584e08d2a7dd2446c9cec942224fd1ae50de1e47 +size 526325317 diff --git a/checkpoint-2646/pytorch_model.bin b/checkpoint-2646/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..c6c8c46fe1ccf83eaffe99599664a086a4266249 --- /dev/null +++ b/checkpoint-2646/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8689e4c6d7995e2fae60d4a7f938d919a858337c1e51bab76711830eb6f9063b +size 263167661 diff --git a/checkpoint-2646/rng_state.pth b/checkpoint-2646/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..219abb4a1f1f2b18ad6f9cfc3934ffc2018cd78f --- /dev/null +++ b/checkpoint-2646/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4efd6b59e664dd5d3e5b916b9efad5576e8242df7a17314fc41a8cf175dd2d31 +size 14575 diff --git a/checkpoint-2646/scheduler.pt b/checkpoint-2646/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..421760b074cbee88fcfb4620996d83a992777530 --- /dev/null +++ b/checkpoint-2646/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad115c008906441ed2ca379a9e36021b2662b360f7030e43093fc964b8bf0d60 +size 627 diff --git a/checkpoint-2646/trainer_state.json b/checkpoint-2646/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8c15ba039d39937e6c0885bcaa0ae40605f740a5 --- /dev/null +++ b/checkpoint-2646/trainer_state.json @@ -0,0 +1,15955 @@ +{ + "best_metric": 0.40209120512008667, + "best_model_checkpoint": "./disaster-tweet-distilbert-classification/checkpoint-378", + "epoch": 7.0, + "global_step": 2646, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.7503, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.7789, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.7344, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 0.7709, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 5e-06, + "loss": 0.6884, + "step": 5 + }, + { + "epoch": 0.02, + "learning_rate": 6e-06, + "loss": 0.7087, + "step": 6 + }, + { + "epoch": 0.02, + "learning_rate": 7.000000000000001e-06, + "loss": 0.6655, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6978, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 9e-06, + "loss": 0.7435, + "step": 9 + }, + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 0.719, + "step": 10 + }, + { + "epoch": 0.03, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.7129, + "step": 11 + }, + { + "epoch": 0.03, + "learning_rate": 1.2e-05, + "loss": 0.7249, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.7436, + "step": 13 + }, + { + "epoch": 0.04, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.6886, + "step": 14 + }, + { + "epoch": 0.04, + "learning_rate": 1.5e-05, + "loss": 0.702, + "step": 15 + }, + { + "epoch": 0.04, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7105, + "step": 16 + }, + { + "epoch": 0.04, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.6709, + "step": 17 + }, + { + "epoch": 0.05, + "learning_rate": 1.8e-05, + "loss": 0.6767, + "step": 18 + }, + { + "epoch": 0.05, + "learning_rate": 1.9e-05, + "loss": 0.6784, + "step": 19 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 0.7016, + "step": 20 + }, + { + "epoch": 0.06, + "learning_rate": 2.1e-05, + "loss": 0.6308, + "step": 21 + }, + { + "epoch": 0.06, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.645, + "step": 22 + }, + { + "epoch": 0.06, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.6845, + "step": 23 + }, + { + "epoch": 0.06, + "learning_rate": 2.4e-05, + "loss": 0.6891, + "step": 24 + }, + { + "epoch": 0.07, + "learning_rate": 2.5e-05, + "loss": 0.5914, + "step": 25 + }, + { + "epoch": 0.07, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.6916, + "step": 26 + }, + { + "epoch": 0.07, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.5884, + "step": 27 + }, + { + "epoch": 0.07, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6587, + "step": 28 + }, + { + "epoch": 0.08, + "learning_rate": 2.9e-05, + "loss": 0.6008, + "step": 29 + }, + { + "epoch": 0.08, + "learning_rate": 3e-05, + "loss": 0.6717, + "step": 30 + }, + { + "epoch": 0.08, + "learning_rate": 3.1e-05, + "loss": 0.5795, + "step": 31 + }, + { + "epoch": 0.08, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6358, + "step": 32 + }, + { + "epoch": 0.09, + "learning_rate": 3.3e-05, + "loss": 0.7508, + "step": 33 + }, + { + "epoch": 0.09, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.6601, + "step": 34 + }, + { + "epoch": 0.09, + "learning_rate": 3.5e-05, + "loss": 0.6573, + "step": 35 + }, + { + "epoch": 0.1, + "learning_rate": 3.6e-05, + "loss": 0.5695, + "step": 36 + }, + { + "epoch": 0.1, + "learning_rate": 3.7e-05, + "loss": 0.5535, + "step": 37 + }, + { + "epoch": 0.1, + "learning_rate": 3.8e-05, + "loss": 0.5813, + "step": 38 + }, + { + "epoch": 0.1, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.5224, + "step": 39 + }, + { + "epoch": 0.11, + "learning_rate": 4e-05, + "loss": 0.4757, + "step": 40 + }, + { + "epoch": 0.11, + "learning_rate": 4.1e-05, + "loss": 0.5529, + "step": 41 + }, + { + "epoch": 0.11, + "learning_rate": 4.2e-05, + "loss": 0.4964, + "step": 42 + }, + { + "epoch": 0.11, + "learning_rate": 4.3e-05, + "loss": 0.4565, + "step": 43 + }, + { + "epoch": 0.12, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.5848, + "step": 44 + }, + { + "epoch": 0.12, + "learning_rate": 4.5e-05, + "loss": 0.7333, + "step": 45 + }, + { + "epoch": 0.12, + "learning_rate": 4.600000000000001e-05, + "loss": 0.5224, + "step": 46 + }, + { + "epoch": 0.12, + "learning_rate": 4.7e-05, + "loss": 0.4826, + "step": 47 + }, + { + "epoch": 0.13, + "learning_rate": 4.8e-05, + "loss": 0.4328, + "step": 48 + }, + { + "epoch": 0.13, + "learning_rate": 4.9e-05, + "loss": 0.2546, + "step": 49 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.2636, + "step": 50 + }, + { + "epoch": 0.13, + "learning_rate": 4.998659517426274e-05, + "loss": 0.4116, + "step": 51 + }, + { + "epoch": 0.14, + "learning_rate": 4.997319034852547e-05, + "loss": 0.3191, + "step": 52 + }, + { + "epoch": 0.14, + "learning_rate": 4.995978552278821e-05, + "loss": 0.2822, + "step": 53 + }, + { + "epoch": 0.14, + "learning_rate": 4.994638069705094e-05, + "loss": 0.8821, + "step": 54 + }, + { + "epoch": 0.15, + "learning_rate": 4.993297587131368e-05, + "loss": 0.483, + "step": 55 + }, + { + "epoch": 0.15, + "learning_rate": 4.9919571045576406e-05, + "loss": 0.7164, + "step": 56 + }, + { + "epoch": 0.15, + "learning_rate": 4.990616621983915e-05, + "loss": 0.4161, + "step": 57 + }, + { + "epoch": 0.15, + "learning_rate": 4.989276139410188e-05, + "loss": 0.2668, + "step": 58 + }, + { + "epoch": 0.16, + "learning_rate": 4.987935656836462e-05, + "loss": 0.5255, + "step": 59 + }, + { + "epoch": 0.16, + "learning_rate": 4.986595174262735e-05, + "loss": 0.3784, + "step": 60 + }, + { + "epoch": 0.16, + "learning_rate": 4.985254691689008e-05, + "loss": 0.5065, + "step": 61 + }, + { + "epoch": 0.16, + "learning_rate": 4.983914209115282e-05, + "loss": 0.1988, + "step": 62 + }, + { + "epoch": 0.17, + "learning_rate": 4.982573726541555e-05, + "loss": 0.4362, + "step": 63 + }, + { + "epoch": 0.17, + "learning_rate": 4.981233243967829e-05, + "loss": 0.6619, + "step": 64 + }, + { + "epoch": 0.17, + "learning_rate": 4.979892761394102e-05, + "loss": 0.3217, + "step": 65 + }, + { + "epoch": 0.17, + "learning_rate": 4.978552278820375e-05, + "loss": 0.2967, + "step": 66 + }, + { + "epoch": 0.18, + "learning_rate": 4.977211796246649e-05, + "loss": 0.2429, + "step": 67 + }, + { + "epoch": 0.18, + "learning_rate": 4.975871313672922e-05, + "loss": 0.6642, + "step": 68 + }, + { + "epoch": 0.18, + "learning_rate": 4.974530831099196e-05, + "loss": 0.56, + "step": 69 + }, + { + "epoch": 0.19, + "learning_rate": 4.973190348525469e-05, + "loss": 1.2979, + "step": 70 + }, + { + "epoch": 0.19, + "learning_rate": 4.9718498659517427e-05, + "loss": 0.5287, + "step": 71 + }, + { + "epoch": 0.19, + "learning_rate": 4.970509383378016e-05, + "loss": 0.4684, + "step": 72 + }, + { + "epoch": 0.19, + "learning_rate": 4.96916890080429e-05, + "loss": 0.283, + "step": 73 + }, + { + "epoch": 0.2, + "learning_rate": 4.967828418230563e-05, + "loss": 0.6818, + "step": 74 + }, + { + "epoch": 0.2, + "learning_rate": 4.966487935656837e-05, + "loss": 0.6141, + "step": 75 + }, + { + "epoch": 0.2, + "learning_rate": 4.96514745308311e-05, + "loss": 0.5046, + "step": 76 + }, + { + "epoch": 0.2, + "learning_rate": 4.963806970509384e-05, + "loss": 0.5266, + "step": 77 + }, + { + "epoch": 0.21, + "learning_rate": 4.962466487935657e-05, + "loss": 0.5944, + "step": 78 + }, + { + "epoch": 0.21, + "learning_rate": 4.961126005361931e-05, + "loss": 0.5631, + "step": 79 + }, + { + "epoch": 0.21, + "learning_rate": 4.959785522788204e-05, + "loss": 0.4791, + "step": 80 + }, + { + "epoch": 0.21, + "learning_rate": 4.958445040214477e-05, + "loss": 0.5645, + "step": 81 + }, + { + "epoch": 0.22, + "learning_rate": 4.957104557640751e-05, + "loss": 0.4349, + "step": 82 + }, + { + "epoch": 0.22, + "learning_rate": 4.955764075067024e-05, + "loss": 0.3865, + "step": 83 + }, + { + "epoch": 0.22, + "learning_rate": 4.954423592493298e-05, + "loss": 0.486, + "step": 84 + }, + { + "epoch": 0.22, + "learning_rate": 4.953083109919571e-05, + "loss": 0.2179, + "step": 85 + }, + { + "epoch": 0.23, + "learning_rate": 4.951742627345845e-05, + "loss": 0.3896, + "step": 86 + }, + { + "epoch": 0.23, + "learning_rate": 4.950402144772118e-05, + "loss": 0.4247, + "step": 87 + }, + { + "epoch": 0.23, + "learning_rate": 4.949061662198392e-05, + "loss": 0.4906, + "step": 88 + }, + { + "epoch": 0.24, + "learning_rate": 4.947721179624665e-05, + "loss": 0.4483, + "step": 89 + }, + { + "epoch": 0.24, + "learning_rate": 4.946380697050939e-05, + "loss": 0.557, + "step": 90 + }, + { + "epoch": 0.24, + "learning_rate": 4.9450402144772116e-05, + "loss": 0.7521, + "step": 91 + }, + { + "epoch": 0.24, + "learning_rate": 4.943699731903486e-05, + "loss": 0.3103, + "step": 92 + }, + { + "epoch": 0.25, + "learning_rate": 4.9423592493297586e-05, + "loss": 0.757, + "step": 93 + }, + { + "epoch": 0.25, + "learning_rate": 4.941018766756033e-05, + "loss": 0.8248, + "step": 94 + }, + { + "epoch": 0.25, + "learning_rate": 4.9396782841823056e-05, + "loss": 0.4591, + "step": 95 + }, + { + "epoch": 0.25, + "learning_rate": 4.938337801608579e-05, + "loss": 0.3912, + "step": 96 + }, + { + "epoch": 0.26, + "learning_rate": 4.9369973190348526e-05, + "loss": 0.5289, + "step": 97 + }, + { + "epoch": 0.26, + "learning_rate": 4.935656836461126e-05, + "loss": 0.3264, + "step": 98 + }, + { + "epoch": 0.26, + "learning_rate": 4.9343163538874e-05, + "loss": 0.2947, + "step": 99 + }, + { + "epoch": 0.26, + "learning_rate": 4.932975871313673e-05, + "loss": 0.2647, + "step": 100 + }, + { + "epoch": 0.27, + "learning_rate": 4.931635388739946e-05, + "loss": 0.3691, + "step": 101 + }, + { + "epoch": 0.27, + "learning_rate": 4.93029490616622e-05, + "loss": 0.4796, + "step": 102 + }, + { + "epoch": 0.27, + "learning_rate": 4.928954423592493e-05, + "loss": 0.4827, + "step": 103 + }, + { + "epoch": 0.28, + "learning_rate": 4.927613941018767e-05, + "loss": 0.2672, + "step": 104 + }, + { + "epoch": 0.28, + "learning_rate": 4.92627345844504e-05, + "loss": 0.7456, + "step": 105 + }, + { + "epoch": 0.28, + "learning_rate": 4.9249329758713136e-05, + "loss": 0.5206, + "step": 106 + }, + { + "epoch": 0.28, + "learning_rate": 4.923592493297587e-05, + "loss": 0.3576, + "step": 107 + }, + { + "epoch": 0.29, + "learning_rate": 4.9222520107238606e-05, + "loss": 0.2596, + "step": 108 + }, + { + "epoch": 0.29, + "learning_rate": 4.920911528150134e-05, + "loss": 0.4115, + "step": 109 + }, + { + "epoch": 0.29, + "learning_rate": 4.9195710455764076e-05, + "loss": 0.3481, + "step": 110 + }, + { + "epoch": 0.29, + "learning_rate": 4.918230563002681e-05, + "loss": 0.4387, + "step": 111 + }, + { + "epoch": 0.3, + "learning_rate": 4.916890080428955e-05, + "loss": 0.5023, + "step": 112 + }, + { + "epoch": 0.3, + "learning_rate": 4.915549597855228e-05, + "loss": 0.5916, + "step": 113 + }, + { + "epoch": 0.3, + "learning_rate": 4.914209115281502e-05, + "loss": 0.5467, + "step": 114 + }, + { + "epoch": 0.3, + "learning_rate": 4.912868632707775e-05, + "loss": 0.5631, + "step": 115 + }, + { + "epoch": 0.31, + "learning_rate": 4.911528150134049e-05, + "loss": 0.5512, + "step": 116 + }, + { + "epoch": 0.31, + "learning_rate": 4.910187667560322e-05, + "loss": 0.5546, + "step": 117 + }, + { + "epoch": 0.31, + "learning_rate": 4.908847184986595e-05, + "loss": 0.4209, + "step": 118 + }, + { + "epoch": 0.31, + "learning_rate": 4.907506702412869e-05, + "loss": 0.6064, + "step": 119 + }, + { + "epoch": 0.32, + "learning_rate": 4.906166219839142e-05, + "loss": 0.5301, + "step": 120 + }, + { + "epoch": 0.32, + "learning_rate": 4.904825737265416e-05, + "loss": 0.436, + "step": 121 + }, + { + "epoch": 0.32, + "learning_rate": 4.903485254691689e-05, + "loss": 0.726, + "step": 122 + }, + { + "epoch": 0.33, + "learning_rate": 4.9021447721179626e-05, + "loss": 0.5288, + "step": 123 + }, + { + "epoch": 0.33, + "learning_rate": 4.900804289544236e-05, + "loss": 0.4887, + "step": 124 + }, + { + "epoch": 0.33, + "learning_rate": 4.8994638069705097e-05, + "loss": 0.3752, + "step": 125 + }, + { + "epoch": 0.33, + "learning_rate": 4.898123324396783e-05, + "loss": 0.4895, + "step": 126 + }, + { + "epoch": 0.34, + "learning_rate": 4.896782841823057e-05, + "loss": 0.5046, + "step": 127 + }, + { + "epoch": 0.34, + "learning_rate": 4.8954423592493295e-05, + "loss": 0.3953, + "step": 128 + }, + { + "epoch": 0.34, + "learning_rate": 4.894101876675604e-05, + "loss": 0.2015, + "step": 129 + }, + { + "epoch": 0.34, + "learning_rate": 4.8927613941018765e-05, + "loss": 0.5165, + "step": 130 + }, + { + "epoch": 0.35, + "learning_rate": 4.891420911528151e-05, + "loss": 0.4237, + "step": 131 + }, + { + "epoch": 0.35, + "learning_rate": 4.8900804289544236e-05, + "loss": 0.239, + "step": 132 + }, + { + "epoch": 0.35, + "learning_rate": 4.888739946380697e-05, + "loss": 0.5515, + "step": 133 + }, + { + "epoch": 0.35, + "learning_rate": 4.8873994638069706e-05, + "loss": 0.303, + "step": 134 + }, + { + "epoch": 0.36, + "learning_rate": 4.886058981233244e-05, + "loss": 0.2867, + "step": 135 + }, + { + "epoch": 0.36, + "learning_rate": 4.8847184986595176e-05, + "loss": 0.6756, + "step": 136 + }, + { + "epoch": 0.36, + "learning_rate": 4.883378016085791e-05, + "loss": 0.4996, + "step": 137 + }, + { + "epoch": 0.37, + "learning_rate": 4.8820375335120646e-05, + "loss": 0.2798, + "step": 138 + }, + { + "epoch": 0.37, + "learning_rate": 4.880697050938338e-05, + "loss": 0.8877, + "step": 139 + }, + { + "epoch": 0.37, + "learning_rate": 4.879356568364612e-05, + "loss": 0.5022, + "step": 140 + }, + { + "epoch": 0.37, + "learning_rate": 4.878016085790885e-05, + "loss": 0.2177, + "step": 141 + }, + { + "epoch": 0.38, + "learning_rate": 4.876675603217159e-05, + "loss": 0.8708, + "step": 142 + }, + { + "epoch": 0.38, + "learning_rate": 4.8753351206434315e-05, + "loss": 0.719, + "step": 143 + }, + { + "epoch": 0.38, + "learning_rate": 4.873994638069706e-05, + "loss": 0.562, + "step": 144 + }, + { + "epoch": 0.38, + "learning_rate": 4.8726541554959786e-05, + "loss": 0.492, + "step": 145 + }, + { + "epoch": 0.39, + "learning_rate": 4.871313672922253e-05, + "loss": 0.4637, + "step": 146 + }, + { + "epoch": 0.39, + "learning_rate": 4.8699731903485256e-05, + "loss": 0.4132, + "step": 147 + }, + { + "epoch": 0.39, + "learning_rate": 4.868632707774799e-05, + "loss": 0.2889, + "step": 148 + }, + { + "epoch": 0.39, + "learning_rate": 4.8672922252010726e-05, + "loss": 0.2213, + "step": 149 + }, + { + "epoch": 0.4, + "learning_rate": 4.865951742627346e-05, + "loss": 0.9268, + "step": 150 + }, + { + "epoch": 0.4, + "learning_rate": 4.8646112600536196e-05, + "loss": 0.2852, + "step": 151 + }, + { + "epoch": 0.4, + "learning_rate": 4.863270777479893e-05, + "loss": 0.4599, + "step": 152 + }, + { + "epoch": 0.4, + "learning_rate": 4.861930294906166e-05, + "loss": 0.1913, + "step": 153 + }, + { + "epoch": 0.41, + "learning_rate": 4.86058981233244e-05, + "loss": 0.4488, + "step": 154 + }, + { + "epoch": 0.41, + "learning_rate": 4.859249329758713e-05, + "loss": 0.9022, + "step": 155 + }, + { + "epoch": 0.41, + "learning_rate": 4.857908847184987e-05, + "loss": 0.5221, + "step": 156 + }, + { + "epoch": 0.42, + "learning_rate": 4.85656836461126e-05, + "loss": 0.2394, + "step": 157 + }, + { + "epoch": 0.42, + "learning_rate": 4.8552278820375336e-05, + "loss": 0.3332, + "step": 158 + }, + { + "epoch": 0.42, + "learning_rate": 4.853887399463807e-05, + "loss": 0.4015, + "step": 159 + }, + { + "epoch": 0.42, + "learning_rate": 4.8525469168900806e-05, + "loss": 0.4461, + "step": 160 + }, + { + "epoch": 0.43, + "learning_rate": 4.851206434316354e-05, + "loss": 0.337, + "step": 161 + }, + { + "epoch": 0.43, + "learning_rate": 4.8498659517426276e-05, + "loss": 0.4908, + "step": 162 + }, + { + "epoch": 0.43, + "learning_rate": 4.848525469168901e-05, + "loss": 0.526, + "step": 163 + }, + { + "epoch": 0.43, + "learning_rate": 4.8471849865951746e-05, + "loss": 0.5262, + "step": 164 + }, + { + "epoch": 0.44, + "learning_rate": 4.845844504021448e-05, + "loss": 0.6818, + "step": 165 + }, + { + "epoch": 0.44, + "learning_rate": 4.8445040214477217e-05, + "loss": 0.3154, + "step": 166 + }, + { + "epoch": 0.44, + "learning_rate": 4.843163538873995e-05, + "loss": 0.5963, + "step": 167 + }, + { + "epoch": 0.44, + "learning_rate": 4.841823056300268e-05, + "loss": 0.4451, + "step": 168 + }, + { + "epoch": 0.45, + "learning_rate": 4.840482573726542e-05, + "loss": 0.5969, + "step": 169 + }, + { + "epoch": 0.45, + "learning_rate": 4.839142091152815e-05, + "loss": 0.438, + "step": 170 + }, + { + "epoch": 0.45, + "learning_rate": 4.837801608579089e-05, + "loss": 0.4827, + "step": 171 + }, + { + "epoch": 0.46, + "learning_rate": 4.836461126005362e-05, + "loss": 0.2029, + "step": 172 + }, + { + "epoch": 0.46, + "learning_rate": 4.8351206434316356e-05, + "loss": 0.5195, + "step": 173 + }, + { + "epoch": 0.46, + "learning_rate": 4.833780160857909e-05, + "loss": 0.517, + "step": 174 + }, + { + "epoch": 0.46, + "learning_rate": 4.8324396782841826e-05, + "loss": 0.5532, + "step": 175 + }, + { + "epoch": 0.47, + "learning_rate": 4.831099195710456e-05, + "loss": 0.4198, + "step": 176 + }, + { + "epoch": 0.47, + "learning_rate": 4.8297587131367296e-05, + "loss": 0.8386, + "step": 177 + }, + { + "epoch": 0.47, + "learning_rate": 4.8284182305630025e-05, + "loss": 0.575, + "step": 178 + }, + { + "epoch": 0.47, + "learning_rate": 4.8270777479892766e-05, + "loss": 0.6156, + "step": 179 + }, + { + "epoch": 0.48, + "learning_rate": 4.8257372654155495e-05, + "loss": 0.7044, + "step": 180 + }, + { + "epoch": 0.48, + "learning_rate": 4.824396782841824e-05, + "loss": 0.5712, + "step": 181 + }, + { + "epoch": 0.48, + "learning_rate": 4.8230563002680965e-05, + "loss": 0.34, + "step": 182 + }, + { + "epoch": 0.48, + "learning_rate": 4.82171581769437e-05, + "loss": 0.5773, + "step": 183 + }, + { + "epoch": 0.49, + "learning_rate": 4.8203753351206435e-05, + "loss": 0.546, + "step": 184 + }, + { + "epoch": 0.49, + "learning_rate": 4.819034852546917e-05, + "loss": 0.3955, + "step": 185 + }, + { + "epoch": 0.49, + "learning_rate": 4.8176943699731906e-05, + "loss": 0.5921, + "step": 186 + }, + { + "epoch": 0.49, + "learning_rate": 4.816353887399464e-05, + "loss": 0.3108, + "step": 187 + }, + { + "epoch": 0.5, + "learning_rate": 4.8150134048257376e-05, + "loss": 0.5469, + "step": 188 + }, + { + "epoch": 0.5, + "learning_rate": 4.813672922252011e-05, + "loss": 0.64, + "step": 189 + }, + { + "epoch": 0.5, + "learning_rate": 4.8123324396782846e-05, + "loss": 0.5153, + "step": 190 + }, + { + "epoch": 0.51, + "learning_rate": 4.810991957104558e-05, + "loss": 0.4719, + "step": 191 + }, + { + "epoch": 0.51, + "learning_rate": 4.8096514745308316e-05, + "loss": 0.52, + "step": 192 + }, + { + "epoch": 0.51, + "learning_rate": 4.8083109919571045e-05, + "loss": 0.5114, + "step": 193 + }, + { + "epoch": 0.51, + "learning_rate": 4.806970509383379e-05, + "loss": 0.5469, + "step": 194 + }, + { + "epoch": 0.52, + "learning_rate": 4.8056300268096515e-05, + "loss": 0.3435, + "step": 195 + }, + { + "epoch": 0.52, + "learning_rate": 4.804289544235926e-05, + "loss": 0.6469, + "step": 196 + }, + { + "epoch": 0.52, + "learning_rate": 4.8029490616621985e-05, + "loss": 0.6595, + "step": 197 + }, + { + "epoch": 0.52, + "learning_rate": 4.801608579088472e-05, + "loss": 0.5503, + "step": 198 + }, + { + "epoch": 0.53, + "learning_rate": 4.8002680965147456e-05, + "loss": 0.3799, + "step": 199 + }, + { + "epoch": 0.53, + "learning_rate": 4.798927613941019e-05, + "loss": 0.417, + "step": 200 + }, + { + "epoch": 0.53, + "learning_rate": 4.7975871313672926e-05, + "loss": 0.5281, + "step": 201 + }, + { + "epoch": 0.53, + "learning_rate": 4.796246648793566e-05, + "loss": 0.3439, + "step": 202 + }, + { + "epoch": 0.54, + "learning_rate": 4.794906166219839e-05, + "loss": 0.5777, + "step": 203 + }, + { + "epoch": 0.54, + "learning_rate": 4.793565683646113e-05, + "loss": 0.5286, + "step": 204 + }, + { + "epoch": 0.54, + "learning_rate": 4.792225201072386e-05, + "loss": 0.4302, + "step": 205 + }, + { + "epoch": 0.54, + "learning_rate": 4.79088471849866e-05, + "loss": 0.5413, + "step": 206 + }, + { + "epoch": 0.55, + "learning_rate": 4.789544235924933e-05, + "loss": 0.3087, + "step": 207 + }, + { + "epoch": 0.55, + "learning_rate": 4.7882037533512065e-05, + "loss": 0.4385, + "step": 208 + }, + { + "epoch": 0.55, + "learning_rate": 4.78686327077748e-05, + "loss": 0.7137, + "step": 209 + }, + { + "epoch": 0.56, + "learning_rate": 4.7855227882037535e-05, + "loss": 0.6278, + "step": 210 + }, + { + "epoch": 0.56, + "learning_rate": 4.784182305630027e-05, + "loss": 0.2832, + "step": 211 + }, + { + "epoch": 0.56, + "learning_rate": 4.7828418230563005e-05, + "loss": 0.5899, + "step": 212 + }, + { + "epoch": 0.56, + "learning_rate": 4.7815013404825734e-05, + "loss": 0.4421, + "step": 213 + }, + { + "epoch": 0.57, + "learning_rate": 4.7801608579088476e-05, + "loss": 0.3411, + "step": 214 + }, + { + "epoch": 0.57, + "learning_rate": 4.7788203753351204e-05, + "loss": 0.4236, + "step": 215 + }, + { + "epoch": 0.57, + "learning_rate": 4.7774798927613946e-05, + "loss": 0.4444, + "step": 216 + }, + { + "epoch": 0.57, + "learning_rate": 4.7761394101876674e-05, + "loss": 0.4765, + "step": 217 + }, + { + "epoch": 0.58, + "learning_rate": 4.774798927613941e-05, + "loss": 0.2453, + "step": 218 + }, + { + "epoch": 0.58, + "learning_rate": 4.7734584450402145e-05, + "loss": 0.2765, + "step": 219 + }, + { + "epoch": 0.58, + "learning_rate": 4.772117962466488e-05, + "loss": 0.2075, + "step": 220 + }, + { + "epoch": 0.58, + "learning_rate": 4.7707774798927615e-05, + "loss": 0.5905, + "step": 221 + }, + { + "epoch": 0.59, + "learning_rate": 4.769436997319035e-05, + "loss": 0.3457, + "step": 222 + }, + { + "epoch": 0.59, + "learning_rate": 4.7680965147453085e-05, + "loss": 0.5986, + "step": 223 + }, + { + "epoch": 0.59, + "learning_rate": 4.766756032171582e-05, + "loss": 0.3881, + "step": 224 + }, + { + "epoch": 0.6, + "learning_rate": 4.7654155495978555e-05, + "loss": 0.5655, + "step": 225 + }, + { + "epoch": 0.6, + "learning_rate": 4.764075067024129e-05, + "loss": 0.6156, + "step": 226 + }, + { + "epoch": 0.6, + "learning_rate": 4.7627345844504026e-05, + "loss": 0.4784, + "step": 227 + }, + { + "epoch": 0.6, + "learning_rate": 4.7613941018766754e-05, + "loss": 0.5195, + "step": 228 + }, + { + "epoch": 0.61, + "learning_rate": 4.7600536193029496e-05, + "loss": 0.66, + "step": 229 + }, + { + "epoch": 0.61, + "learning_rate": 4.7587131367292224e-05, + "loss": 0.4104, + "step": 230 + }, + { + "epoch": 0.61, + "learning_rate": 4.7573726541554966e-05, + "loss": 0.3478, + "step": 231 + }, + { + "epoch": 0.61, + "learning_rate": 4.7560321715817695e-05, + "loss": 0.2037, + "step": 232 + }, + { + "epoch": 0.62, + "learning_rate": 4.754691689008043e-05, + "loss": 0.75, + "step": 233 + }, + { + "epoch": 0.62, + "learning_rate": 4.7533512064343165e-05, + "loss": 0.4237, + "step": 234 + }, + { + "epoch": 0.62, + "learning_rate": 4.75201072386059e-05, + "loss": 0.2372, + "step": 235 + }, + { + "epoch": 0.62, + "learning_rate": 4.7506702412868635e-05, + "loss": 0.7874, + "step": 236 + }, + { + "epoch": 0.63, + "learning_rate": 4.749329758713137e-05, + "loss": 0.5751, + "step": 237 + }, + { + "epoch": 0.63, + "learning_rate": 4.7479892761394105e-05, + "loss": 0.5801, + "step": 238 + }, + { + "epoch": 0.63, + "learning_rate": 4.746648793565684e-05, + "loss": 0.4983, + "step": 239 + }, + { + "epoch": 0.63, + "learning_rate": 4.745308310991957e-05, + "loss": 0.4215, + "step": 240 + }, + { + "epoch": 0.64, + "learning_rate": 4.743967828418231e-05, + "loss": 0.3655, + "step": 241 + }, + { + "epoch": 0.64, + "learning_rate": 4.742627345844504e-05, + "loss": 0.523, + "step": 242 + }, + { + "epoch": 0.64, + "learning_rate": 4.741286863270778e-05, + "loss": 0.5952, + "step": 243 + }, + { + "epoch": 0.65, + "learning_rate": 4.739946380697051e-05, + "loss": 0.4226, + "step": 244 + }, + { + "epoch": 0.65, + "learning_rate": 4.7386058981233244e-05, + "loss": 0.316, + "step": 245 + }, + { + "epoch": 0.65, + "learning_rate": 4.737265415549598e-05, + "loss": 0.5096, + "step": 246 + }, + { + "epoch": 0.65, + "learning_rate": 4.7359249329758715e-05, + "loss": 0.435, + "step": 247 + }, + { + "epoch": 0.66, + "learning_rate": 4.734584450402145e-05, + "loss": 0.7516, + "step": 248 + }, + { + "epoch": 0.66, + "learning_rate": 4.7332439678284185e-05, + "loss": 0.531, + "step": 249 + }, + { + "epoch": 0.66, + "learning_rate": 4.731903485254692e-05, + "loss": 0.4096, + "step": 250 + }, + { + "epoch": 0.66, + "learning_rate": 4.7305630026809655e-05, + "loss": 0.387, + "step": 251 + }, + { + "epoch": 0.67, + "learning_rate": 4.729222520107239e-05, + "loss": 0.5468, + "step": 252 + }, + { + "epoch": 0.67, + "learning_rate": 4.7278820375335125e-05, + "loss": 0.4613, + "step": 253 + }, + { + "epoch": 0.67, + "learning_rate": 4.726541554959786e-05, + "loss": 0.8437, + "step": 254 + }, + { + "epoch": 0.67, + "learning_rate": 4.725201072386059e-05, + "loss": 0.522, + "step": 255 + }, + { + "epoch": 0.68, + "learning_rate": 4.723860589812333e-05, + "loss": 0.3922, + "step": 256 + }, + { + "epoch": 0.68, + "learning_rate": 4.722520107238606e-05, + "loss": 0.5114, + "step": 257 + }, + { + "epoch": 0.68, + "learning_rate": 4.72117962466488e-05, + "loss": 0.6148, + "step": 258 + }, + { + "epoch": 0.69, + "learning_rate": 4.719839142091153e-05, + "loss": 0.4578, + "step": 259 + }, + { + "epoch": 0.69, + "learning_rate": 4.7184986595174265e-05, + "loss": 0.6286, + "step": 260 + }, + { + "epoch": 0.69, + "learning_rate": 4.7171581769437e-05, + "loss": 0.5883, + "step": 261 + }, + { + "epoch": 0.69, + "learning_rate": 4.7158176943699735e-05, + "loss": 0.5634, + "step": 262 + }, + { + "epoch": 0.7, + "learning_rate": 4.714477211796247e-05, + "loss": 0.4085, + "step": 263 + }, + { + "epoch": 0.7, + "learning_rate": 4.7131367292225205e-05, + "loss": 0.2988, + "step": 264 + }, + { + "epoch": 0.7, + "learning_rate": 4.7117962466487934e-05, + "loss": 0.6353, + "step": 265 + }, + { + "epoch": 0.7, + "learning_rate": 4.7104557640750675e-05, + "loss": 0.4598, + "step": 266 + }, + { + "epoch": 0.71, + "learning_rate": 4.7091152815013404e-05, + "loss": 0.5072, + "step": 267 + }, + { + "epoch": 0.71, + "learning_rate": 4.7077747989276146e-05, + "loss": 0.49, + "step": 268 + }, + { + "epoch": 0.71, + "learning_rate": 4.7064343163538874e-05, + "loss": 0.7225, + "step": 269 + }, + { + "epoch": 0.71, + "learning_rate": 4.705093833780161e-05, + "loss": 0.5332, + "step": 270 + }, + { + "epoch": 0.72, + "learning_rate": 4.7037533512064344e-05, + "loss": 0.6064, + "step": 271 + }, + { + "epoch": 0.72, + "learning_rate": 4.702412868632708e-05, + "loss": 0.3518, + "step": 272 + }, + { + "epoch": 0.72, + "learning_rate": 4.7010723860589815e-05, + "loss": 0.3673, + "step": 273 + }, + { + "epoch": 0.72, + "learning_rate": 4.699731903485255e-05, + "loss": 0.4688, + "step": 274 + }, + { + "epoch": 0.73, + "learning_rate": 4.6983914209115285e-05, + "loss": 0.5389, + "step": 275 + }, + { + "epoch": 0.73, + "learning_rate": 4.697050938337802e-05, + "loss": 0.374, + "step": 276 + }, + { + "epoch": 0.73, + "learning_rate": 4.6957104557640755e-05, + "loss": 0.475, + "step": 277 + }, + { + "epoch": 0.74, + "learning_rate": 4.694369973190349e-05, + "loss": 0.5397, + "step": 278 + }, + { + "epoch": 0.74, + "learning_rate": 4.6930294906166225e-05, + "loss": 0.3821, + "step": 279 + }, + { + "epoch": 0.74, + "learning_rate": 4.6916890080428954e-05, + "loss": 0.3372, + "step": 280 + }, + { + "epoch": 0.74, + "learning_rate": 4.6903485254691696e-05, + "loss": 0.6652, + "step": 281 + }, + { + "epoch": 0.75, + "learning_rate": 4.6890080428954424e-05, + "loss": 0.2894, + "step": 282 + }, + { + "epoch": 0.75, + "learning_rate": 4.6876675603217166e-05, + "loss": 0.5639, + "step": 283 + }, + { + "epoch": 0.75, + "learning_rate": 4.6863270777479894e-05, + "loss": 0.353, + "step": 284 + }, + { + "epoch": 0.75, + "learning_rate": 4.684986595174263e-05, + "loss": 0.2932, + "step": 285 + }, + { + "epoch": 0.76, + "learning_rate": 4.6836461126005364e-05, + "loss": 0.467, + "step": 286 + }, + { + "epoch": 0.76, + "learning_rate": 4.68230563002681e-05, + "loss": 0.4732, + "step": 287 + }, + { + "epoch": 0.76, + "learning_rate": 4.6809651474530835e-05, + "loss": 0.1808, + "step": 288 + }, + { + "epoch": 0.76, + "learning_rate": 4.679624664879357e-05, + "loss": 0.6031, + "step": 289 + }, + { + "epoch": 0.77, + "learning_rate": 4.67828418230563e-05, + "loss": 0.2555, + "step": 290 + }, + { + "epoch": 0.77, + "learning_rate": 4.676943699731904e-05, + "loss": 0.4041, + "step": 291 + }, + { + "epoch": 0.77, + "learning_rate": 4.675603217158177e-05, + "loss": 0.7822, + "step": 292 + }, + { + "epoch": 0.78, + "learning_rate": 4.674262734584451e-05, + "loss": 0.138, + "step": 293 + }, + { + "epoch": 0.78, + "learning_rate": 4.672922252010724e-05, + "loss": 0.2746, + "step": 294 + }, + { + "epoch": 0.78, + "learning_rate": 4.6715817694369974e-05, + "loss": 0.5835, + "step": 295 + }, + { + "epoch": 0.78, + "learning_rate": 4.670241286863271e-05, + "loss": 0.2367, + "step": 296 + }, + { + "epoch": 0.79, + "learning_rate": 4.6689008042895444e-05, + "loss": 0.3247, + "step": 297 + }, + { + "epoch": 0.79, + "learning_rate": 4.667560321715818e-05, + "loss": 0.306, + "step": 298 + }, + { + "epoch": 0.79, + "learning_rate": 4.6662198391420914e-05, + "loss": 0.2825, + "step": 299 + }, + { + "epoch": 0.79, + "learning_rate": 4.664879356568364e-05, + "loss": 0.6102, + "step": 300 + }, + { + "epoch": 0.8, + "learning_rate": 4.6635388739946385e-05, + "loss": 0.3613, + "step": 301 + }, + { + "epoch": 0.8, + "learning_rate": 4.662198391420911e-05, + "loss": 0.6327, + "step": 302 + }, + { + "epoch": 0.8, + "learning_rate": 4.6608579088471855e-05, + "loss": 0.4059, + "step": 303 + }, + { + "epoch": 0.8, + "learning_rate": 4.659517426273458e-05, + "loss": 0.4027, + "step": 304 + }, + { + "epoch": 0.81, + "learning_rate": 4.658176943699732e-05, + "loss": 0.9133, + "step": 305 + }, + { + "epoch": 0.81, + "learning_rate": 4.6568364611260054e-05, + "loss": 0.1869, + "step": 306 + }, + { + "epoch": 0.81, + "learning_rate": 4.655495978552279e-05, + "loss": 0.3987, + "step": 307 + }, + { + "epoch": 0.81, + "learning_rate": 4.6541554959785524e-05, + "loss": 0.6114, + "step": 308 + }, + { + "epoch": 0.82, + "learning_rate": 4.652815013404826e-05, + "loss": 0.4406, + "step": 309 + }, + { + "epoch": 0.82, + "learning_rate": 4.6514745308310994e-05, + "loss": 0.2954, + "step": 310 + }, + { + "epoch": 0.82, + "learning_rate": 4.650134048257373e-05, + "loss": 0.5067, + "step": 311 + }, + { + "epoch": 0.83, + "learning_rate": 4.6487935656836464e-05, + "loss": 0.3985, + "step": 312 + }, + { + "epoch": 0.83, + "learning_rate": 4.64745308310992e-05, + "loss": 0.3756, + "step": 313 + }, + { + "epoch": 0.83, + "learning_rate": 4.6461126005361935e-05, + "loss": 0.2618, + "step": 314 + }, + { + "epoch": 0.83, + "learning_rate": 4.644772117962466e-05, + "loss": 0.4992, + "step": 315 + }, + { + "epoch": 0.84, + "learning_rate": 4.6434316353887405e-05, + "loss": 0.8224, + "step": 316 + }, + { + "epoch": 0.84, + "learning_rate": 4.642091152815013e-05, + "loss": 0.3425, + "step": 317 + }, + { + "epoch": 0.84, + "learning_rate": 4.6407506702412875e-05, + "loss": 0.4062, + "step": 318 + }, + { + "epoch": 0.84, + "learning_rate": 4.6394101876675603e-05, + "loss": 0.4748, + "step": 319 + }, + { + "epoch": 0.85, + "learning_rate": 4.638069705093834e-05, + "loss": 0.6857, + "step": 320 + }, + { + "epoch": 0.85, + "learning_rate": 4.6367292225201074e-05, + "loss": 0.5368, + "step": 321 + }, + { + "epoch": 0.85, + "learning_rate": 4.635388739946381e-05, + "loss": 0.5571, + "step": 322 + }, + { + "epoch": 0.85, + "learning_rate": 4.6340482573726544e-05, + "loss": 0.3045, + "step": 323 + }, + { + "epoch": 0.86, + "learning_rate": 4.632707774798928e-05, + "loss": 0.3189, + "step": 324 + }, + { + "epoch": 0.86, + "learning_rate": 4.631367292225201e-05, + "loss": 0.3033, + "step": 325 + }, + { + "epoch": 0.86, + "learning_rate": 4.630026809651475e-05, + "loss": 0.4659, + "step": 326 + }, + { + "epoch": 0.87, + "learning_rate": 4.628686327077748e-05, + "loss": 0.4058, + "step": 327 + }, + { + "epoch": 0.87, + "learning_rate": 4.627345844504022e-05, + "loss": 0.6745, + "step": 328 + }, + { + "epoch": 0.87, + "learning_rate": 4.626005361930295e-05, + "loss": 0.3259, + "step": 329 + }, + { + "epoch": 0.87, + "learning_rate": 4.624664879356568e-05, + "loss": 0.5126, + "step": 330 + }, + { + "epoch": 0.88, + "learning_rate": 4.623324396782842e-05, + "loss": 0.2759, + "step": 331 + }, + { + "epoch": 0.88, + "learning_rate": 4.621983914209115e-05, + "loss": 0.2512, + "step": 332 + }, + { + "epoch": 0.88, + "learning_rate": 4.620643431635389e-05, + "loss": 0.3046, + "step": 333 + }, + { + "epoch": 0.88, + "learning_rate": 4.6193029490616624e-05, + "loss": 0.3931, + "step": 334 + }, + { + "epoch": 0.89, + "learning_rate": 4.617962466487936e-05, + "loss": 0.4838, + "step": 335 + }, + { + "epoch": 0.89, + "learning_rate": 4.6166219839142094e-05, + "loss": 0.2925, + "step": 336 + }, + { + "epoch": 0.89, + "learning_rate": 4.615281501340483e-05, + "loss": 0.4481, + "step": 337 + }, + { + "epoch": 0.89, + "learning_rate": 4.6139410187667564e-05, + "loss": 0.4528, + "step": 338 + }, + { + "epoch": 0.9, + "learning_rate": 4.61260053619303e-05, + "loss": 0.2934, + "step": 339 + }, + { + "epoch": 0.9, + "learning_rate": 4.611260053619303e-05, + "loss": 0.609, + "step": 340 + }, + { + "epoch": 0.9, + "learning_rate": 4.609919571045577e-05, + "loss": 0.8988, + "step": 341 + }, + { + "epoch": 0.9, + "learning_rate": 4.60857908847185e-05, + "loss": 1.1222, + "step": 342 + }, + { + "epoch": 0.91, + "learning_rate": 4.607238605898124e-05, + "loss": 0.3265, + "step": 343 + }, + { + "epoch": 0.91, + "learning_rate": 4.605898123324397e-05, + "loss": 0.4722, + "step": 344 + }, + { + "epoch": 0.91, + "learning_rate": 4.60455764075067e-05, + "loss": 0.2791, + "step": 345 + }, + { + "epoch": 0.92, + "learning_rate": 4.603217158176944e-05, + "loss": 0.4183, + "step": 346 + }, + { + "epoch": 0.92, + "learning_rate": 4.6018766756032174e-05, + "loss": 0.5323, + "step": 347 + }, + { + "epoch": 0.92, + "learning_rate": 4.600536193029491e-05, + "loss": 0.6108, + "step": 348 + }, + { + "epoch": 0.92, + "learning_rate": 4.5991957104557644e-05, + "loss": 0.2875, + "step": 349 + }, + { + "epoch": 0.93, + "learning_rate": 4.597855227882037e-05, + "loss": 0.6642, + "step": 350 + }, + { + "epoch": 0.93, + "learning_rate": 4.5965147453083114e-05, + "loss": 0.5244, + "step": 351 + }, + { + "epoch": 0.93, + "learning_rate": 4.595174262734584e-05, + "loss": 0.5562, + "step": 352 + }, + { + "epoch": 0.93, + "learning_rate": 4.5938337801608584e-05, + "loss": 0.3972, + "step": 353 + }, + { + "epoch": 0.94, + "learning_rate": 4.592493297587131e-05, + "loss": 0.4532, + "step": 354 + }, + { + "epoch": 0.94, + "learning_rate": 4.591152815013405e-05, + "loss": 0.3368, + "step": 355 + }, + { + "epoch": 0.94, + "learning_rate": 4.589812332439678e-05, + "loss": 0.171, + "step": 356 + }, + { + "epoch": 0.94, + "learning_rate": 4.588471849865952e-05, + "loss": 0.4036, + "step": 357 + }, + { + "epoch": 0.95, + "learning_rate": 4.587131367292225e-05, + "loss": 0.4305, + "step": 358 + }, + { + "epoch": 0.95, + "learning_rate": 4.585790884718499e-05, + "loss": 0.2643, + "step": 359 + }, + { + "epoch": 0.95, + "learning_rate": 4.5844504021447723e-05, + "loss": 0.3782, + "step": 360 + }, + { + "epoch": 0.96, + "learning_rate": 4.583109919571046e-05, + "loss": 0.1673, + "step": 361 + }, + { + "epoch": 0.96, + "learning_rate": 4.5817694369973194e-05, + "loss": 0.3969, + "step": 362 + }, + { + "epoch": 0.96, + "learning_rate": 4.580428954423593e-05, + "loss": 0.3249, + "step": 363 + }, + { + "epoch": 0.96, + "learning_rate": 4.5790884718498664e-05, + "loss": 0.1656, + "step": 364 + }, + { + "epoch": 0.97, + "learning_rate": 4.57774798927614e-05, + "loss": 0.4551, + "step": 365 + }, + { + "epoch": 0.97, + "learning_rate": 4.5764075067024134e-05, + "loss": 0.6075, + "step": 366 + }, + { + "epoch": 0.97, + "learning_rate": 4.575067024128686e-05, + "loss": 0.4699, + "step": 367 + }, + { + "epoch": 0.97, + "learning_rate": 4.5737265415549605e-05, + "loss": 0.5752, + "step": 368 + }, + { + "epoch": 0.98, + "learning_rate": 4.572386058981233e-05, + "loss": 0.3114, + "step": 369 + }, + { + "epoch": 0.98, + "learning_rate": 4.5710455764075075e-05, + "loss": 0.7407, + "step": 370 + }, + { + "epoch": 0.98, + "learning_rate": 4.56970509383378e-05, + "loss": 0.6427, + "step": 371 + }, + { + "epoch": 0.98, + "learning_rate": 4.568364611260054e-05, + "loss": 0.5021, + "step": 372 + }, + { + "epoch": 0.99, + "learning_rate": 4.5670241286863273e-05, + "loss": 0.4209, + "step": 373 + }, + { + "epoch": 0.99, + "learning_rate": 4.565683646112601e-05, + "loss": 0.5957, + "step": 374 + }, + { + "epoch": 0.99, + "learning_rate": 4.5643431635388744e-05, + "loss": 0.495, + "step": 375 + }, + { + "epoch": 0.99, + "learning_rate": 4.563002680965148e-05, + "loss": 0.3101, + "step": 376 + }, + { + "epoch": 1.0, + "learning_rate": 4.561662198391421e-05, + "loss": 0.472, + "step": 377 + }, + { + "epoch": 1.0, + "learning_rate": 4.560321715817695e-05, + "loss": 0.4607, + "step": 378 + }, + { + "epoch": 1.0, + "eval_f1": 0.7914963205233032, + "eval_loss": 0.40209120512008667, + "eval_runtime": 2.1585, + "eval_samples_per_second": 700.949, + "eval_steps_per_second": 44.012, + "step": 378 + }, + { + "epoch": 1.0, + "learning_rate": 4.558981233243968e-05, + "loss": 0.2422, + "step": 379 + }, + { + "epoch": 1.01, + "learning_rate": 4.557640750670242e-05, + "loss": 0.2074, + "step": 380 + }, + { + "epoch": 1.01, + "learning_rate": 4.556300268096515e-05, + "loss": 0.4469, + "step": 381 + }, + { + "epoch": 1.01, + "learning_rate": 4.554959785522788e-05, + "loss": 0.3121, + "step": 382 + }, + { + "epoch": 1.01, + "learning_rate": 4.553619302949062e-05, + "loss": 0.3634, + "step": 383 + }, + { + "epoch": 1.02, + "learning_rate": 4.552278820375335e-05, + "loss": 0.2139, + "step": 384 + }, + { + "epoch": 1.02, + "learning_rate": 4.550938337801609e-05, + "loss": 0.5273, + "step": 385 + }, + { + "epoch": 1.02, + "learning_rate": 4.549597855227882e-05, + "loss": 0.1228, + "step": 386 + }, + { + "epoch": 1.02, + "learning_rate": 4.548257372654156e-05, + "loss": 0.2459, + "step": 387 + }, + { + "epoch": 1.03, + "learning_rate": 4.5469168900804294e-05, + "loss": 0.443, + "step": 388 + }, + { + "epoch": 1.03, + "learning_rate": 4.545576407506703e-05, + "loss": 0.364, + "step": 389 + }, + { + "epoch": 1.03, + "learning_rate": 4.5442359249329764e-05, + "loss": 0.4071, + "step": 390 + }, + { + "epoch": 1.03, + "learning_rate": 4.54289544235925e-05, + "loss": 0.3876, + "step": 391 + }, + { + "epoch": 1.04, + "learning_rate": 4.541554959785523e-05, + "loss": 0.5239, + "step": 392 + }, + { + "epoch": 1.04, + "learning_rate": 4.540214477211797e-05, + "loss": 0.4548, + "step": 393 + }, + { + "epoch": 1.04, + "learning_rate": 4.53887399463807e-05, + "loss": 0.3588, + "step": 394 + }, + { + "epoch": 1.04, + "learning_rate": 4.537533512064344e-05, + "loss": 0.2693, + "step": 395 + }, + { + "epoch": 1.05, + "learning_rate": 4.536193029490617e-05, + "loss": 0.3773, + "step": 396 + }, + { + "epoch": 1.05, + "learning_rate": 4.53485254691689e-05, + "loss": 0.3982, + "step": 397 + }, + { + "epoch": 1.05, + "learning_rate": 4.533512064343164e-05, + "loss": 0.2074, + "step": 398 + }, + { + "epoch": 1.06, + "learning_rate": 4.532171581769437e-05, + "loss": 0.3322, + "step": 399 + }, + { + "epoch": 1.06, + "learning_rate": 4.530831099195711e-05, + "loss": 0.118, + "step": 400 + }, + { + "epoch": 1.06, + "learning_rate": 4.5294906166219844e-05, + "loss": 0.6216, + "step": 401 + }, + { + "epoch": 1.06, + "learning_rate": 4.528150134048257e-05, + "loss": 0.4028, + "step": 402 + }, + { + "epoch": 1.07, + "learning_rate": 4.5268096514745314e-05, + "loss": 0.3179, + "step": 403 + }, + { + "epoch": 1.07, + "learning_rate": 4.525469168900804e-05, + "loss": 0.2815, + "step": 404 + }, + { + "epoch": 1.07, + "learning_rate": 4.5241286863270784e-05, + "loss": 0.2366, + "step": 405 + }, + { + "epoch": 1.07, + "learning_rate": 4.522788203753351e-05, + "loss": 0.14, + "step": 406 + }, + { + "epoch": 1.08, + "learning_rate": 4.521447721179625e-05, + "loss": 0.1255, + "step": 407 + }, + { + "epoch": 1.08, + "learning_rate": 4.520107238605898e-05, + "loss": 0.1482, + "step": 408 + }, + { + "epoch": 1.08, + "learning_rate": 4.518766756032172e-05, + "loss": 0.665, + "step": 409 + }, + { + "epoch": 1.08, + "learning_rate": 4.517426273458445e-05, + "loss": 0.2535, + "step": 410 + }, + { + "epoch": 1.09, + "learning_rate": 4.516085790884719e-05, + "loss": 0.1716, + "step": 411 + }, + { + "epoch": 1.09, + "learning_rate": 4.5147453083109916e-05, + "loss": 0.0765, + "step": 412 + }, + { + "epoch": 1.09, + "learning_rate": 4.513404825737266e-05, + "loss": 0.1238, + "step": 413 + }, + { + "epoch": 1.1, + "learning_rate": 4.512064343163539e-05, + "loss": 0.3877, + "step": 414 + }, + { + "epoch": 1.1, + "learning_rate": 4.510723860589813e-05, + "loss": 0.206, + "step": 415 + }, + { + "epoch": 1.1, + "learning_rate": 4.509383378016086e-05, + "loss": 0.1822, + "step": 416 + }, + { + "epoch": 1.1, + "learning_rate": 4.508042895442359e-05, + "loss": 0.2294, + "step": 417 + }, + { + "epoch": 1.11, + "learning_rate": 4.506702412868633e-05, + "loss": 0.7083, + "step": 418 + }, + { + "epoch": 1.11, + "learning_rate": 4.505361930294906e-05, + "loss": 0.325, + "step": 419 + }, + { + "epoch": 1.11, + "learning_rate": 4.50402144772118e-05, + "loss": 0.2378, + "step": 420 + }, + { + "epoch": 1.11, + "learning_rate": 4.502680965147453e-05, + "loss": 0.2494, + "step": 421 + }, + { + "epoch": 1.12, + "learning_rate": 4.501340482573727e-05, + "loss": 0.3812, + "step": 422 + }, + { + "epoch": 1.12, + "learning_rate": 4.5e-05, + "loss": 0.1491, + "step": 423 + }, + { + "epoch": 1.12, + "learning_rate": 4.498659517426274e-05, + "loss": 0.0833, + "step": 424 + }, + { + "epoch": 1.12, + "learning_rate": 4.497319034852547e-05, + "loss": 0.4637, + "step": 425 + }, + { + "epoch": 1.13, + "learning_rate": 4.495978552278821e-05, + "loss": 0.2594, + "step": 426 + }, + { + "epoch": 1.13, + "learning_rate": 4.4946380697050937e-05, + "loss": 0.4136, + "step": 427 + }, + { + "epoch": 1.13, + "learning_rate": 4.493297587131368e-05, + "loss": 0.4962, + "step": 428 + }, + { + "epoch": 1.13, + "learning_rate": 4.491957104557641e-05, + "loss": 0.0842, + "step": 429 + }, + { + "epoch": 1.14, + "learning_rate": 4.490616621983915e-05, + "loss": 0.4761, + "step": 430 + }, + { + "epoch": 1.14, + "learning_rate": 4.489276139410188e-05, + "loss": 0.4351, + "step": 431 + }, + { + "epoch": 1.14, + "learning_rate": 4.487935656836461e-05, + "loss": 0.4226, + "step": 432 + }, + { + "epoch": 1.15, + "learning_rate": 4.486595174262735e-05, + "loss": 0.5714, + "step": 433 + }, + { + "epoch": 1.15, + "learning_rate": 4.485254691689008e-05, + "loss": 0.2034, + "step": 434 + }, + { + "epoch": 1.15, + "learning_rate": 4.483914209115282e-05, + "loss": 0.3354, + "step": 435 + }, + { + "epoch": 1.15, + "learning_rate": 4.482573726541555e-05, + "loss": 0.7503, + "step": 436 + }, + { + "epoch": 1.16, + "learning_rate": 4.481233243967828e-05, + "loss": 0.1542, + "step": 437 + }, + { + "epoch": 1.16, + "learning_rate": 4.479892761394102e-05, + "loss": 0.3067, + "step": 438 + }, + { + "epoch": 1.16, + "learning_rate": 4.478552278820375e-05, + "loss": 0.4134, + "step": 439 + }, + { + "epoch": 1.16, + "learning_rate": 4.477211796246649e-05, + "loss": 0.1458, + "step": 440 + }, + { + "epoch": 1.17, + "learning_rate": 4.475871313672922e-05, + "loss": 0.2814, + "step": 441 + }, + { + "epoch": 1.17, + "learning_rate": 4.474530831099196e-05, + "loss": 0.1751, + "step": 442 + }, + { + "epoch": 1.17, + "learning_rate": 4.473190348525469e-05, + "loss": 0.4144, + "step": 443 + }, + { + "epoch": 1.17, + "learning_rate": 4.471849865951743e-05, + "loss": 0.3275, + "step": 444 + }, + { + "epoch": 1.18, + "learning_rate": 4.470509383378016e-05, + "loss": 0.3083, + "step": 445 + }, + { + "epoch": 1.18, + "learning_rate": 4.46916890080429e-05, + "loss": 0.4185, + "step": 446 + }, + { + "epoch": 1.18, + "learning_rate": 4.467828418230563e-05, + "loss": 0.3181, + "step": 447 + }, + { + "epoch": 1.19, + "learning_rate": 4.466487935656837e-05, + "loss": 0.438, + "step": 448 + }, + { + "epoch": 1.19, + "learning_rate": 4.46514745308311e-05, + "loss": 0.4509, + "step": 449 + }, + { + "epoch": 1.19, + "learning_rate": 4.463806970509384e-05, + "loss": 0.4597, + "step": 450 + }, + { + "epoch": 1.19, + "learning_rate": 4.462466487935657e-05, + "loss": 0.3123, + "step": 451 + }, + { + "epoch": 1.2, + "learning_rate": 4.46112600536193e-05, + "loss": 0.376, + "step": 452 + }, + { + "epoch": 1.2, + "learning_rate": 4.459785522788204e-05, + "loss": 0.2392, + "step": 453 + }, + { + "epoch": 1.2, + "learning_rate": 4.458445040214477e-05, + "loss": 0.4451, + "step": 454 + }, + { + "epoch": 1.2, + "learning_rate": 4.4571045576407513e-05, + "loss": 0.2436, + "step": 455 + }, + { + "epoch": 1.21, + "learning_rate": 4.455764075067024e-05, + "loss": 0.2944, + "step": 456 + }, + { + "epoch": 1.21, + "learning_rate": 4.454423592493298e-05, + "loss": 0.323, + "step": 457 + }, + { + "epoch": 1.21, + "learning_rate": 4.453083109919571e-05, + "loss": 0.1966, + "step": 458 + }, + { + "epoch": 1.21, + "learning_rate": 4.451742627345845e-05, + "loss": 0.273, + "step": 459 + }, + { + "epoch": 1.22, + "learning_rate": 4.450402144772118e-05, + "loss": 0.3498, + "step": 460 + }, + { + "epoch": 1.22, + "learning_rate": 4.449061662198392e-05, + "loss": 0.458, + "step": 461 + }, + { + "epoch": 1.22, + "learning_rate": 4.4477211796246646e-05, + "loss": 0.1607, + "step": 462 + }, + { + "epoch": 1.22, + "learning_rate": 4.446380697050939e-05, + "loss": 0.5712, + "step": 463 + }, + { + "epoch": 1.23, + "learning_rate": 4.4450402144772116e-05, + "loss": 0.0908, + "step": 464 + }, + { + "epoch": 1.23, + "learning_rate": 4.443699731903486e-05, + "loss": 0.2625, + "step": 465 + }, + { + "epoch": 1.23, + "learning_rate": 4.4423592493297586e-05, + "loss": 0.368, + "step": 466 + }, + { + "epoch": 1.24, + "learning_rate": 4.441018766756032e-05, + "loss": 0.5096, + "step": 467 + }, + { + "epoch": 1.24, + "learning_rate": 4.4396782841823057e-05, + "loss": 0.209, + "step": 468 + }, + { + "epoch": 1.24, + "learning_rate": 4.438337801608579e-05, + "loss": 0.5235, + "step": 469 + }, + { + "epoch": 1.24, + "learning_rate": 4.436997319034853e-05, + "loss": 0.5116, + "step": 470 + }, + { + "epoch": 1.25, + "learning_rate": 4.435656836461126e-05, + "loss": 0.49, + "step": 471 + }, + { + "epoch": 1.25, + "learning_rate": 4.4343163538874e-05, + "loss": 0.4409, + "step": 472 + }, + { + "epoch": 1.25, + "learning_rate": 4.432975871313673e-05, + "loss": 0.1324, + "step": 473 + }, + { + "epoch": 1.25, + "learning_rate": 4.431635388739947e-05, + "loss": 0.1257, + "step": 474 + }, + { + "epoch": 1.26, + "learning_rate": 4.43029490616622e-05, + "loss": 0.3844, + "step": 475 + }, + { + "epoch": 1.26, + "learning_rate": 4.428954423592494e-05, + "loss": 0.2942, + "step": 476 + }, + { + "epoch": 1.26, + "learning_rate": 4.4276139410187666e-05, + "loss": 0.316, + "step": 477 + }, + { + "epoch": 1.26, + "learning_rate": 4.426273458445041e-05, + "loss": 0.3724, + "step": 478 + }, + { + "epoch": 1.27, + "learning_rate": 4.4249329758713136e-05, + "loss": 0.4498, + "step": 479 + }, + { + "epoch": 1.27, + "learning_rate": 4.423592493297588e-05, + "loss": 0.2677, + "step": 480 + }, + { + "epoch": 1.27, + "learning_rate": 4.4222520107238607e-05, + "loss": 0.3331, + "step": 481 + }, + { + "epoch": 1.28, + "learning_rate": 4.420911528150134e-05, + "loss": 0.4375, + "step": 482 + }, + { + "epoch": 1.28, + "learning_rate": 4.419571045576408e-05, + "loss": 0.3097, + "step": 483 + }, + { + "epoch": 1.28, + "learning_rate": 4.418230563002681e-05, + "loss": 0.4438, + "step": 484 + }, + { + "epoch": 1.28, + "learning_rate": 4.416890080428955e-05, + "loss": 0.3648, + "step": 485 + }, + { + "epoch": 1.29, + "learning_rate": 4.415549597855228e-05, + "loss": 0.3023, + "step": 486 + }, + { + "epoch": 1.29, + "learning_rate": 4.414209115281502e-05, + "loss": 0.2963, + "step": 487 + }, + { + "epoch": 1.29, + "learning_rate": 4.412868632707775e-05, + "loss": 0.1749, + "step": 488 + }, + { + "epoch": 1.29, + "learning_rate": 4.411528150134048e-05, + "loss": 0.2006, + "step": 489 + }, + { + "epoch": 1.3, + "learning_rate": 4.410187667560322e-05, + "loss": 0.0923, + "step": 490 + }, + { + "epoch": 1.3, + "learning_rate": 4.408847184986595e-05, + "loss": 0.2665, + "step": 491 + }, + { + "epoch": 1.3, + "learning_rate": 4.407506702412869e-05, + "loss": 0.3378, + "step": 492 + }, + { + "epoch": 1.3, + "learning_rate": 4.406166219839142e-05, + "loss": 0.2543, + "step": 493 + }, + { + "epoch": 1.31, + "learning_rate": 4.4048257372654156e-05, + "loss": 0.2521, + "step": 494 + }, + { + "epoch": 1.31, + "learning_rate": 4.403485254691689e-05, + "loss": 0.1616, + "step": 495 + }, + { + "epoch": 1.31, + "learning_rate": 4.402144772117963e-05, + "loss": 0.1445, + "step": 496 + }, + { + "epoch": 1.31, + "learning_rate": 4.400804289544236e-05, + "loss": 0.5352, + "step": 497 + }, + { + "epoch": 1.32, + "learning_rate": 4.39946380697051e-05, + "loss": 0.0654, + "step": 498 + }, + { + "epoch": 1.32, + "learning_rate": 4.398123324396783e-05, + "loss": 0.4354, + "step": 499 + }, + { + "epoch": 1.32, + "learning_rate": 4.396782841823057e-05, + "loss": 0.3893, + "step": 500 + }, + { + "epoch": 1.33, + "learning_rate": 4.39544235924933e-05, + "loss": 0.2779, + "step": 501 + }, + { + "epoch": 1.33, + "learning_rate": 4.394101876675604e-05, + "loss": 0.3702, + "step": 502 + }, + { + "epoch": 1.33, + "learning_rate": 4.392761394101877e-05, + "loss": 0.3899, + "step": 503 + }, + { + "epoch": 1.33, + "learning_rate": 4.39142091152815e-05, + "loss": 0.3091, + "step": 504 + }, + { + "epoch": 1.34, + "learning_rate": 4.390080428954424e-05, + "loss": 0.2143, + "step": 505 + }, + { + "epoch": 1.34, + "learning_rate": 4.388739946380697e-05, + "loss": 0.2551, + "step": 506 + }, + { + "epoch": 1.34, + "learning_rate": 4.387399463806971e-05, + "loss": 0.2227, + "step": 507 + }, + { + "epoch": 1.34, + "learning_rate": 4.386058981233244e-05, + "loss": 0.1383, + "step": 508 + }, + { + "epoch": 1.35, + "learning_rate": 4.384718498659518e-05, + "loss": 0.3463, + "step": 509 + }, + { + "epoch": 1.35, + "learning_rate": 4.383378016085791e-05, + "loss": 0.1104, + "step": 510 + }, + { + "epoch": 1.35, + "learning_rate": 4.382037533512065e-05, + "loss": 0.2439, + "step": 511 + }, + { + "epoch": 1.35, + "learning_rate": 4.380697050938338e-05, + "loss": 0.0725, + "step": 512 + }, + { + "epoch": 1.36, + "learning_rate": 4.379356568364612e-05, + "loss": 0.4928, + "step": 513 + }, + { + "epoch": 1.36, + "learning_rate": 4.3780160857908846e-05, + "loss": 0.0903, + "step": 514 + }, + { + "epoch": 1.36, + "learning_rate": 4.376675603217159e-05, + "loss": 0.1808, + "step": 515 + }, + { + "epoch": 1.37, + "learning_rate": 4.3753351206434316e-05, + "loss": 0.617, + "step": 516 + }, + { + "epoch": 1.37, + "learning_rate": 4.373994638069706e-05, + "loss": 0.0333, + "step": 517 + }, + { + "epoch": 1.37, + "learning_rate": 4.3726541554959786e-05, + "loss": 0.1113, + "step": 518 + }, + { + "epoch": 1.37, + "learning_rate": 4.371313672922252e-05, + "loss": 0.4604, + "step": 519 + }, + { + "epoch": 1.38, + "learning_rate": 4.3699731903485256e-05, + "loss": 0.4267, + "step": 520 + }, + { + "epoch": 1.38, + "learning_rate": 4.368632707774799e-05, + "loss": 0.2621, + "step": 521 + }, + { + "epoch": 1.38, + "learning_rate": 4.3672922252010727e-05, + "loss": 0.3236, + "step": 522 + }, + { + "epoch": 1.38, + "learning_rate": 4.365951742627346e-05, + "loss": 0.0559, + "step": 523 + }, + { + "epoch": 1.39, + "learning_rate": 4.364611260053619e-05, + "loss": 0.0801, + "step": 524 + }, + { + "epoch": 1.39, + "learning_rate": 4.363270777479893e-05, + "loss": 0.0518, + "step": 525 + }, + { + "epoch": 1.39, + "learning_rate": 4.361930294906166e-05, + "loss": 0.0618, + "step": 526 + }, + { + "epoch": 1.39, + "learning_rate": 4.36058981233244e-05, + "loss": 0.3043, + "step": 527 + }, + { + "epoch": 1.4, + "learning_rate": 4.359249329758713e-05, + "loss": 0.4693, + "step": 528 + }, + { + "epoch": 1.4, + "learning_rate": 4.3579088471849866e-05, + "loss": 0.2978, + "step": 529 + }, + { + "epoch": 1.4, + "learning_rate": 4.35656836461126e-05, + "loss": 0.0817, + "step": 530 + }, + { + "epoch": 1.4, + "learning_rate": 4.3552278820375336e-05, + "loss": 0.0784, + "step": 531 + }, + { + "epoch": 1.41, + "learning_rate": 4.353887399463807e-05, + "loss": 0.5389, + "step": 532 + }, + { + "epoch": 1.41, + "learning_rate": 4.3525469168900806e-05, + "loss": 0.2337, + "step": 533 + }, + { + "epoch": 1.41, + "learning_rate": 4.351206434316354e-05, + "loss": 0.3189, + "step": 534 + }, + { + "epoch": 1.42, + "learning_rate": 4.3498659517426276e-05, + "loss": 0.2503, + "step": 535 + }, + { + "epoch": 1.42, + "learning_rate": 4.348525469168901e-05, + "loss": 0.333, + "step": 536 + }, + { + "epoch": 1.42, + "learning_rate": 4.347184986595175e-05, + "loss": 0.2311, + "step": 537 + }, + { + "epoch": 1.42, + "learning_rate": 4.345844504021448e-05, + "loss": 0.5196, + "step": 538 + }, + { + "epoch": 1.43, + "learning_rate": 4.344504021447721e-05, + "loss": 0.5466, + "step": 539 + }, + { + "epoch": 1.43, + "learning_rate": 4.343163538873995e-05, + "loss": 0.4296, + "step": 540 + }, + { + "epoch": 1.43, + "learning_rate": 4.341823056300268e-05, + "loss": 0.6559, + "step": 541 + }, + { + "epoch": 1.43, + "learning_rate": 4.340482573726542e-05, + "loss": 0.6134, + "step": 542 + }, + { + "epoch": 1.44, + "learning_rate": 4.339142091152815e-05, + "loss": 0.5614, + "step": 543 + }, + { + "epoch": 1.44, + "learning_rate": 4.3378016085790886e-05, + "loss": 0.3143, + "step": 544 + }, + { + "epoch": 1.44, + "learning_rate": 4.336461126005362e-05, + "loss": 0.564, + "step": 545 + }, + { + "epoch": 1.44, + "learning_rate": 4.3351206434316356e-05, + "loss": 0.2141, + "step": 546 + }, + { + "epoch": 1.45, + "learning_rate": 4.333780160857909e-05, + "loss": 0.5886, + "step": 547 + }, + { + "epoch": 1.45, + "learning_rate": 4.3324396782841826e-05, + "loss": 0.2129, + "step": 548 + }, + { + "epoch": 1.45, + "learning_rate": 4.3310991957104555e-05, + "loss": 0.2491, + "step": 549 + }, + { + "epoch": 1.46, + "learning_rate": 4.32975871313673e-05, + "loss": 0.4023, + "step": 550 + }, + { + "epoch": 1.46, + "learning_rate": 4.3284182305630025e-05, + "loss": 0.2159, + "step": 551 + }, + { + "epoch": 1.46, + "learning_rate": 4.327077747989277e-05, + "loss": 0.2975, + "step": 552 + }, + { + "epoch": 1.46, + "learning_rate": 4.3257372654155495e-05, + "loss": 0.165, + "step": 553 + }, + { + "epoch": 1.47, + "learning_rate": 4.324396782841823e-05, + "loss": 0.063, + "step": 554 + }, + { + "epoch": 1.47, + "learning_rate": 4.3230563002680966e-05, + "loss": 0.4209, + "step": 555 + }, + { + "epoch": 1.47, + "learning_rate": 4.32171581769437e-05, + "loss": 0.2704, + "step": 556 + }, + { + "epoch": 1.47, + "learning_rate": 4.3203753351206436e-05, + "loss": 0.6455, + "step": 557 + }, + { + "epoch": 1.48, + "learning_rate": 4.319034852546917e-05, + "loss": 0.3354, + "step": 558 + }, + { + "epoch": 1.48, + "learning_rate": 4.3176943699731906e-05, + "loss": 0.4069, + "step": 559 + }, + { + "epoch": 1.48, + "learning_rate": 4.316353887399464e-05, + "loss": 0.2334, + "step": 560 + }, + { + "epoch": 1.48, + "learning_rate": 4.3150134048257376e-05, + "loss": 0.4192, + "step": 561 + }, + { + "epoch": 1.49, + "learning_rate": 4.313672922252011e-05, + "loss": 0.2942, + "step": 562 + }, + { + "epoch": 1.49, + "learning_rate": 4.3123324396782847e-05, + "loss": 0.2503, + "step": 563 + }, + { + "epoch": 1.49, + "learning_rate": 4.3109919571045575e-05, + "loss": 0.15, + "step": 564 + }, + { + "epoch": 1.49, + "learning_rate": 4.309651474530832e-05, + "loss": 0.6895, + "step": 565 + }, + { + "epoch": 1.5, + "learning_rate": 4.3083109919571045e-05, + "loss": 0.2462, + "step": 566 + }, + { + "epoch": 1.5, + "learning_rate": 4.306970509383379e-05, + "loss": 0.142, + "step": 567 + }, + { + "epoch": 1.5, + "learning_rate": 4.3056300268096515e-05, + "loss": 0.4838, + "step": 568 + }, + { + "epoch": 1.51, + "learning_rate": 4.304289544235925e-05, + "loss": 0.4714, + "step": 569 + }, + { + "epoch": 1.51, + "learning_rate": 4.3029490616621986e-05, + "loss": 0.4916, + "step": 570 + }, + { + "epoch": 1.51, + "learning_rate": 4.301608579088472e-05, + "loss": 0.2001, + "step": 571 + }, + { + "epoch": 1.51, + "learning_rate": 4.3002680965147456e-05, + "loss": 0.3897, + "step": 572 + }, + { + "epoch": 1.52, + "learning_rate": 4.298927613941019e-05, + "loss": 0.1723, + "step": 573 + }, + { + "epoch": 1.52, + "learning_rate": 4.297587131367292e-05, + "loss": 0.4095, + "step": 574 + }, + { + "epoch": 1.52, + "learning_rate": 4.296246648793566e-05, + "loss": 0.2615, + "step": 575 + }, + { + "epoch": 1.52, + "learning_rate": 4.294906166219839e-05, + "loss": 0.221, + "step": 576 + }, + { + "epoch": 1.53, + "learning_rate": 4.293565683646113e-05, + "loss": 0.1373, + "step": 577 + }, + { + "epoch": 1.53, + "learning_rate": 4.292225201072386e-05, + "loss": 0.313, + "step": 578 + }, + { + "epoch": 1.53, + "learning_rate": 4.2908847184986595e-05, + "loss": 0.4608, + "step": 579 + }, + { + "epoch": 1.53, + "learning_rate": 4.289544235924933e-05, + "loss": 0.2894, + "step": 580 + }, + { + "epoch": 1.54, + "learning_rate": 4.2882037533512065e-05, + "loss": 0.5123, + "step": 581 + }, + { + "epoch": 1.54, + "learning_rate": 4.28686327077748e-05, + "loss": 0.3287, + "step": 582 + }, + { + "epoch": 1.54, + "learning_rate": 4.2855227882037536e-05, + "loss": 0.4634, + "step": 583 + }, + { + "epoch": 1.54, + "learning_rate": 4.284182305630027e-05, + "loss": 0.0753, + "step": 584 + }, + { + "epoch": 1.55, + "learning_rate": 4.2828418230563006e-05, + "loss": 0.5287, + "step": 585 + }, + { + "epoch": 1.55, + "learning_rate": 4.281501340482574e-05, + "loss": 0.5483, + "step": 586 + }, + { + "epoch": 1.55, + "learning_rate": 4.2801608579088476e-05, + "loss": 0.1076, + "step": 587 + }, + { + "epoch": 1.56, + "learning_rate": 4.278820375335121e-05, + "loss": 0.232, + "step": 588 + }, + { + "epoch": 1.56, + "learning_rate": 4.277479892761394e-05, + "loss": 0.4706, + "step": 589 + }, + { + "epoch": 1.56, + "learning_rate": 4.276139410187668e-05, + "loss": 0.5791, + "step": 590 + }, + { + "epoch": 1.56, + "learning_rate": 4.274798927613941e-05, + "loss": 0.4995, + "step": 591 + }, + { + "epoch": 1.57, + "learning_rate": 4.273458445040215e-05, + "loss": 0.3419, + "step": 592 + }, + { + "epoch": 1.57, + "learning_rate": 4.272117962466488e-05, + "loss": 0.2728, + "step": 593 + }, + { + "epoch": 1.57, + "learning_rate": 4.2707774798927615e-05, + "loss": 0.3598, + "step": 594 + }, + { + "epoch": 1.57, + "learning_rate": 4.269436997319035e-05, + "loss": 0.2669, + "step": 595 + }, + { + "epoch": 1.58, + "learning_rate": 4.2680965147453086e-05, + "loss": 0.4091, + "step": 596 + }, + { + "epoch": 1.58, + "learning_rate": 4.266756032171582e-05, + "loss": 0.7653, + "step": 597 + }, + { + "epoch": 1.58, + "learning_rate": 4.2654155495978556e-05, + "loss": 0.4323, + "step": 598 + }, + { + "epoch": 1.58, + "learning_rate": 4.2640750670241284e-05, + "loss": 0.2147, + "step": 599 + }, + { + "epoch": 1.59, + "learning_rate": 4.2627345844504026e-05, + "loss": 0.2229, + "step": 600 + }, + { + "epoch": 1.59, + "learning_rate": 4.2613941018766754e-05, + "loss": 0.1477, + "step": 601 + }, + { + "epoch": 1.59, + "learning_rate": 4.2600536193029496e-05, + "loss": 0.4813, + "step": 602 + }, + { + "epoch": 1.6, + "learning_rate": 4.2587131367292225e-05, + "loss": 0.2719, + "step": 603 + }, + { + "epoch": 1.6, + "learning_rate": 4.257372654155496e-05, + "loss": 0.1577, + "step": 604 + }, + { + "epoch": 1.6, + "learning_rate": 4.2560321715817695e-05, + "loss": 0.5718, + "step": 605 + }, + { + "epoch": 1.6, + "learning_rate": 4.254691689008043e-05, + "loss": 0.1481, + "step": 606 + }, + { + "epoch": 1.61, + "learning_rate": 4.2533512064343165e-05, + "loss": 0.3548, + "step": 607 + }, + { + "epoch": 1.61, + "learning_rate": 4.25201072386059e-05, + "loss": 0.1907, + "step": 608 + }, + { + "epoch": 1.61, + "learning_rate": 4.250670241286863e-05, + "loss": 0.4215, + "step": 609 + }, + { + "epoch": 1.61, + "learning_rate": 4.249329758713137e-05, + "loss": 0.0681, + "step": 610 + }, + { + "epoch": 1.62, + "learning_rate": 4.24798927613941e-05, + "loss": 0.514, + "step": 611 + }, + { + "epoch": 1.62, + "learning_rate": 4.246648793565684e-05, + "loss": 0.1242, + "step": 612 + }, + { + "epoch": 1.62, + "learning_rate": 4.245308310991957e-05, + "loss": 0.217, + "step": 613 + }, + { + "epoch": 1.62, + "learning_rate": 4.243967828418231e-05, + "loss": 0.1641, + "step": 614 + }, + { + "epoch": 1.63, + "learning_rate": 4.242627345844504e-05, + "loss": 0.7095, + "step": 615 + }, + { + "epoch": 1.63, + "learning_rate": 4.2412868632707775e-05, + "loss": 0.359, + "step": 616 + }, + { + "epoch": 1.63, + "learning_rate": 4.239946380697051e-05, + "loss": 0.94, + "step": 617 + }, + { + "epoch": 1.63, + "learning_rate": 4.2386058981233245e-05, + "loss": 0.5194, + "step": 618 + }, + { + "epoch": 1.64, + "learning_rate": 4.237265415549598e-05, + "loss": 0.3398, + "step": 619 + }, + { + "epoch": 1.64, + "learning_rate": 4.2359249329758715e-05, + "loss": 0.2305, + "step": 620 + }, + { + "epoch": 1.64, + "learning_rate": 4.234584450402145e-05, + "loss": 0.6045, + "step": 621 + }, + { + "epoch": 1.65, + "learning_rate": 4.2332439678284185e-05, + "loss": 0.3196, + "step": 622 + }, + { + "epoch": 1.65, + "learning_rate": 4.231903485254692e-05, + "loss": 0.0753, + "step": 623 + }, + { + "epoch": 1.65, + "learning_rate": 4.2305630026809656e-05, + "loss": 0.2732, + "step": 624 + }, + { + "epoch": 1.65, + "learning_rate": 4.229222520107239e-05, + "loss": 0.4372, + "step": 625 + }, + { + "epoch": 1.66, + "learning_rate": 4.227882037533512e-05, + "loss": 0.1299, + "step": 626 + }, + { + "epoch": 1.66, + "learning_rate": 4.226541554959786e-05, + "loss": 0.2928, + "step": 627 + }, + { + "epoch": 1.66, + "learning_rate": 4.225201072386059e-05, + "loss": 0.2028, + "step": 628 + }, + { + "epoch": 1.66, + "learning_rate": 4.223860589812333e-05, + "loss": 0.2725, + "step": 629 + }, + { + "epoch": 1.67, + "learning_rate": 4.222520107238606e-05, + "loss": 0.0851, + "step": 630 + }, + { + "epoch": 1.67, + "learning_rate": 4.2211796246648795e-05, + "loss": 0.1471, + "step": 631 + }, + { + "epoch": 1.67, + "learning_rate": 4.219839142091153e-05, + "loss": 0.1986, + "step": 632 + }, + { + "epoch": 1.67, + "learning_rate": 4.2184986595174265e-05, + "loss": 0.5156, + "step": 633 + }, + { + "epoch": 1.68, + "learning_rate": 4.2171581769437e-05, + "loss": 0.5286, + "step": 634 + }, + { + "epoch": 1.68, + "learning_rate": 4.2158176943699735e-05, + "loss": 0.3635, + "step": 635 + }, + { + "epoch": 1.68, + "learning_rate": 4.2144772117962464e-05, + "loss": 0.1407, + "step": 636 + }, + { + "epoch": 1.69, + "learning_rate": 4.2131367292225206e-05, + "loss": 0.1042, + "step": 637 + }, + { + "epoch": 1.69, + "learning_rate": 4.2117962466487934e-05, + "loss": 0.1553, + "step": 638 + }, + { + "epoch": 1.69, + "learning_rate": 4.2104557640750676e-05, + "loss": 0.1665, + "step": 639 + }, + { + "epoch": 1.69, + "learning_rate": 4.2091152815013404e-05, + "loss": 0.3706, + "step": 640 + }, + { + "epoch": 1.7, + "learning_rate": 4.207774798927614e-05, + "loss": 0.6195, + "step": 641 + }, + { + "epoch": 1.7, + "learning_rate": 4.2064343163538874e-05, + "loss": 0.1341, + "step": 642 + }, + { + "epoch": 1.7, + "learning_rate": 4.205093833780161e-05, + "loss": 0.5384, + "step": 643 + }, + { + "epoch": 1.7, + "learning_rate": 4.2037533512064345e-05, + "loss": 0.2802, + "step": 644 + }, + { + "epoch": 1.71, + "learning_rate": 4.202412868632708e-05, + "loss": 0.3812, + "step": 645 + }, + { + "epoch": 1.71, + "learning_rate": 4.2010723860589815e-05, + "loss": 0.2433, + "step": 646 + }, + { + "epoch": 1.71, + "learning_rate": 4.199731903485255e-05, + "loss": 0.0642, + "step": 647 + }, + { + "epoch": 1.71, + "learning_rate": 4.1983914209115285e-05, + "loss": 0.0547, + "step": 648 + }, + { + "epoch": 1.72, + "learning_rate": 4.197050938337802e-05, + "loss": 0.4388, + "step": 649 + }, + { + "epoch": 1.72, + "learning_rate": 4.1957104557640756e-05, + "loss": 0.8228, + "step": 650 + }, + { + "epoch": 1.72, + "learning_rate": 4.1943699731903484e-05, + "loss": 0.6453, + "step": 651 + }, + { + "epoch": 1.72, + "learning_rate": 4.1930294906166226e-05, + "loss": 0.3367, + "step": 652 + }, + { + "epoch": 1.73, + "learning_rate": 4.1916890080428954e-05, + "loss": 0.2139, + "step": 653 + }, + { + "epoch": 1.73, + "learning_rate": 4.1903485254691696e-05, + "loss": 0.2144, + "step": 654 + }, + { + "epoch": 1.73, + "learning_rate": 4.1890080428954424e-05, + "loss": 0.3894, + "step": 655 + }, + { + "epoch": 1.74, + "learning_rate": 4.187667560321716e-05, + "loss": 0.0891, + "step": 656 + }, + { + "epoch": 1.74, + "learning_rate": 4.1863270777479895e-05, + "loss": 0.1287, + "step": 657 + }, + { + "epoch": 1.74, + "learning_rate": 4.184986595174263e-05, + "loss": 0.2826, + "step": 658 + }, + { + "epoch": 1.74, + "learning_rate": 4.1836461126005365e-05, + "loss": 0.474, + "step": 659 + }, + { + "epoch": 1.75, + "learning_rate": 4.18230563002681e-05, + "loss": 0.4228, + "step": 660 + }, + { + "epoch": 1.75, + "learning_rate": 4.180965147453083e-05, + "loss": 0.4952, + "step": 661 + }, + { + "epoch": 1.75, + "learning_rate": 4.179624664879357e-05, + "loss": 0.173, + "step": 662 + }, + { + "epoch": 1.75, + "learning_rate": 4.17828418230563e-05, + "loss": 0.363, + "step": 663 + }, + { + "epoch": 1.76, + "learning_rate": 4.176943699731904e-05, + "loss": 0.4404, + "step": 664 + }, + { + "epoch": 1.76, + "learning_rate": 4.175603217158177e-05, + "loss": 0.486, + "step": 665 + }, + { + "epoch": 1.76, + "learning_rate": 4.1742627345844504e-05, + "loss": 0.4463, + "step": 666 + }, + { + "epoch": 1.76, + "learning_rate": 4.172922252010724e-05, + "loss": 0.2409, + "step": 667 + }, + { + "epoch": 1.77, + "learning_rate": 4.1715817694369974e-05, + "loss": 0.5291, + "step": 668 + }, + { + "epoch": 1.77, + "learning_rate": 4.170241286863271e-05, + "loss": 0.069, + "step": 669 + }, + { + "epoch": 1.77, + "learning_rate": 4.1689008042895445e-05, + "loss": 0.4162, + "step": 670 + }, + { + "epoch": 1.78, + "learning_rate": 4.167560321715818e-05, + "loss": 0.6171, + "step": 671 + }, + { + "epoch": 1.78, + "learning_rate": 4.1662198391420915e-05, + "loss": 0.3097, + "step": 672 + }, + { + "epoch": 1.78, + "learning_rate": 4.164879356568365e-05, + "loss": 0.5109, + "step": 673 + }, + { + "epoch": 1.78, + "learning_rate": 4.1635388739946385e-05, + "loss": 0.2169, + "step": 674 + }, + { + "epoch": 1.79, + "learning_rate": 4.162198391420912e-05, + "loss": 0.2406, + "step": 675 + }, + { + "epoch": 1.79, + "learning_rate": 4.160857908847185e-05, + "loss": 0.1853, + "step": 676 + }, + { + "epoch": 1.79, + "learning_rate": 4.159517426273459e-05, + "loss": 0.5743, + "step": 677 + }, + { + "epoch": 1.79, + "learning_rate": 4.158176943699732e-05, + "loss": 0.5432, + "step": 678 + }, + { + "epoch": 1.8, + "learning_rate": 4.156836461126006e-05, + "loss": 0.2033, + "step": 679 + }, + { + "epoch": 1.8, + "learning_rate": 4.155495978552279e-05, + "loss": 0.3848, + "step": 680 + }, + { + "epoch": 1.8, + "learning_rate": 4.1541554959785524e-05, + "loss": 0.1721, + "step": 681 + }, + { + "epoch": 1.8, + "learning_rate": 4.152815013404826e-05, + "loss": 0.3793, + "step": 682 + }, + { + "epoch": 1.81, + "learning_rate": 4.1514745308310994e-05, + "loss": 0.3848, + "step": 683 + }, + { + "epoch": 1.81, + "learning_rate": 4.150134048257373e-05, + "loss": 0.186, + "step": 684 + }, + { + "epoch": 1.81, + "learning_rate": 4.1487935656836465e-05, + "loss": 0.2692, + "step": 685 + }, + { + "epoch": 1.81, + "learning_rate": 4.147453083109919e-05, + "loss": 0.3839, + "step": 686 + }, + { + "epoch": 1.82, + "learning_rate": 4.1461126005361935e-05, + "loss": 0.3037, + "step": 687 + }, + { + "epoch": 1.82, + "learning_rate": 4.144772117962466e-05, + "loss": 0.6401, + "step": 688 + }, + { + "epoch": 1.82, + "learning_rate": 4.1434316353887405e-05, + "loss": 0.1173, + "step": 689 + }, + { + "epoch": 1.83, + "learning_rate": 4.1420911528150134e-05, + "loss": 0.3217, + "step": 690 + }, + { + "epoch": 1.83, + "learning_rate": 4.140750670241287e-05, + "loss": 0.2358, + "step": 691 + }, + { + "epoch": 1.83, + "learning_rate": 4.1394101876675604e-05, + "loss": 0.7696, + "step": 692 + }, + { + "epoch": 1.83, + "learning_rate": 4.138069705093834e-05, + "loss": 0.2288, + "step": 693 + }, + { + "epoch": 1.84, + "learning_rate": 4.1367292225201074e-05, + "loss": 0.2575, + "step": 694 + }, + { + "epoch": 1.84, + "learning_rate": 4.135388739946381e-05, + "loss": 0.1201, + "step": 695 + }, + { + "epoch": 1.84, + "learning_rate": 4.1340482573726544e-05, + "loss": 0.2034, + "step": 696 + }, + { + "epoch": 1.84, + "learning_rate": 4.132707774798928e-05, + "loss": 0.1142, + "step": 697 + }, + { + "epoch": 1.85, + "learning_rate": 4.1313672922252015e-05, + "loss": 0.5671, + "step": 698 + }, + { + "epoch": 1.85, + "learning_rate": 4.130026809651475e-05, + "loss": 0.3132, + "step": 699 + }, + { + "epoch": 1.85, + "learning_rate": 4.1286863270777485e-05, + "loss": 0.4266, + "step": 700 + }, + { + "epoch": 1.85, + "learning_rate": 4.127345844504021e-05, + "loss": 0.1354, + "step": 701 + }, + { + "epoch": 1.86, + "learning_rate": 4.1260053619302955e-05, + "loss": 0.2867, + "step": 702 + }, + { + "epoch": 1.86, + "learning_rate": 4.1246648793565684e-05, + "loss": 0.1839, + "step": 703 + }, + { + "epoch": 1.86, + "learning_rate": 4.1233243967828425e-05, + "loss": 0.4741, + "step": 704 + }, + { + "epoch": 1.87, + "learning_rate": 4.1219839142091154e-05, + "loss": 0.2909, + "step": 705 + }, + { + "epoch": 1.87, + "learning_rate": 4.120643431635389e-05, + "loss": 0.2705, + "step": 706 + }, + { + "epoch": 1.87, + "learning_rate": 4.1193029490616624e-05, + "loss": 0.1354, + "step": 707 + }, + { + "epoch": 1.87, + "learning_rate": 4.117962466487936e-05, + "loss": 0.4801, + "step": 708 + }, + { + "epoch": 1.88, + "learning_rate": 4.1166219839142094e-05, + "loss": 0.189, + "step": 709 + }, + { + "epoch": 1.88, + "learning_rate": 4.115281501340483e-05, + "loss": 0.3204, + "step": 710 + }, + { + "epoch": 1.88, + "learning_rate": 4.113941018766756e-05, + "loss": 0.4358, + "step": 711 + }, + { + "epoch": 1.88, + "learning_rate": 4.11260053619303e-05, + "loss": 0.9474, + "step": 712 + }, + { + "epoch": 1.89, + "learning_rate": 4.111260053619303e-05, + "loss": 0.2102, + "step": 713 + }, + { + "epoch": 1.89, + "learning_rate": 4.109919571045577e-05, + "loss": 0.3927, + "step": 714 + }, + { + "epoch": 1.89, + "learning_rate": 4.10857908847185e-05, + "loss": 0.139, + "step": 715 + }, + { + "epoch": 1.89, + "learning_rate": 4.1072386058981233e-05, + "loss": 0.3575, + "step": 716 + }, + { + "epoch": 1.9, + "learning_rate": 4.105898123324397e-05, + "loss": 0.7534, + "step": 717 + }, + { + "epoch": 1.9, + "learning_rate": 4.1045576407506704e-05, + "loss": 0.1134, + "step": 718 + }, + { + "epoch": 1.9, + "learning_rate": 4.103217158176944e-05, + "loss": 0.2136, + "step": 719 + }, + { + "epoch": 1.9, + "learning_rate": 4.1018766756032174e-05, + "loss": 0.4344, + "step": 720 + }, + { + "epoch": 1.91, + "learning_rate": 4.10053619302949e-05, + "loss": 0.0695, + "step": 721 + }, + { + "epoch": 1.91, + "learning_rate": 4.0991957104557644e-05, + "loss": 0.2286, + "step": 722 + }, + { + "epoch": 1.91, + "learning_rate": 4.097855227882037e-05, + "loss": 0.1189, + "step": 723 + }, + { + "epoch": 1.92, + "learning_rate": 4.0965147453083115e-05, + "loss": 0.2882, + "step": 724 + }, + { + "epoch": 1.92, + "learning_rate": 4.095174262734584e-05, + "loss": 0.2623, + "step": 725 + }, + { + "epoch": 1.92, + "learning_rate": 4.093833780160858e-05, + "loss": 0.2473, + "step": 726 + }, + { + "epoch": 1.92, + "learning_rate": 4.092493297587131e-05, + "loss": 0.4846, + "step": 727 + }, + { + "epoch": 1.93, + "learning_rate": 4.091152815013405e-05, + "loss": 0.1689, + "step": 728 + }, + { + "epoch": 1.93, + "learning_rate": 4.0898123324396783e-05, + "loss": 0.3481, + "step": 729 + }, + { + "epoch": 1.93, + "learning_rate": 4.088471849865952e-05, + "loss": 0.3447, + "step": 730 + }, + { + "epoch": 1.93, + "learning_rate": 4.0871313672922254e-05, + "loss": 0.2959, + "step": 731 + }, + { + "epoch": 1.94, + "learning_rate": 4.085790884718499e-05, + "loss": 0.3387, + "step": 732 + }, + { + "epoch": 1.94, + "learning_rate": 4.0844504021447724e-05, + "loss": 0.3742, + "step": 733 + }, + { + "epoch": 1.94, + "learning_rate": 4.083109919571046e-05, + "loss": 0.3245, + "step": 734 + }, + { + "epoch": 1.94, + "learning_rate": 4.0817694369973194e-05, + "loss": 0.4891, + "step": 735 + }, + { + "epoch": 1.95, + "learning_rate": 4.080428954423593e-05, + "loss": 0.1444, + "step": 736 + }, + { + "epoch": 1.95, + "learning_rate": 4.0790884718498664e-05, + "loss": 0.3678, + "step": 737 + }, + { + "epoch": 1.95, + "learning_rate": 4.077747989276139e-05, + "loss": 0.3772, + "step": 738 + }, + { + "epoch": 1.96, + "learning_rate": 4.0764075067024135e-05, + "loss": 0.43, + "step": 739 + }, + { + "epoch": 1.96, + "learning_rate": 4.075067024128686e-05, + "loss": 0.2463, + "step": 740 + }, + { + "epoch": 1.96, + "learning_rate": 4.0737265415549605e-05, + "loss": 0.2277, + "step": 741 + }, + { + "epoch": 1.96, + "learning_rate": 4.072386058981233e-05, + "loss": 0.2153, + "step": 742 + }, + { + "epoch": 1.97, + "learning_rate": 4.071045576407507e-05, + "loss": 0.1052, + "step": 743 + }, + { + "epoch": 1.97, + "learning_rate": 4.0697050938337804e-05, + "loss": 0.5657, + "step": 744 + }, + { + "epoch": 1.97, + "learning_rate": 4.068364611260054e-05, + "loss": 0.2664, + "step": 745 + }, + { + "epoch": 1.97, + "learning_rate": 4.0670241286863274e-05, + "loss": 0.1369, + "step": 746 + }, + { + "epoch": 1.98, + "learning_rate": 4.065683646112601e-05, + "loss": 0.4972, + "step": 747 + }, + { + "epoch": 1.98, + "learning_rate": 4.064343163538874e-05, + "loss": 0.4131, + "step": 748 + }, + { + "epoch": 1.98, + "learning_rate": 4.063002680965148e-05, + "loss": 0.3824, + "step": 749 + }, + { + "epoch": 1.98, + "learning_rate": 4.061662198391421e-05, + "loss": 0.323, + "step": 750 + }, + { + "epoch": 1.99, + "learning_rate": 4.060321715817695e-05, + "loss": 0.3698, + "step": 751 + }, + { + "epoch": 1.99, + "learning_rate": 4.058981233243968e-05, + "loss": 0.1708, + "step": 752 + }, + { + "epoch": 1.99, + "learning_rate": 4.057640750670241e-05, + "loss": 0.2941, + "step": 753 + }, + { + "epoch": 1.99, + "learning_rate": 4.056300268096515e-05, + "loss": 0.3224, + "step": 754 + }, + { + "epoch": 2.0, + "learning_rate": 4.054959785522788e-05, + "loss": 0.0851, + "step": 755 + }, + { + "epoch": 2.0, + "learning_rate": 4.053619302949062e-05, + "loss": 0.4694, + "step": 756 + }, + { + "epoch": 2.0, + "eval_f1": 0.7882736156351792, + "eval_loss": 0.4484867751598358, + "eval_runtime": 1.8734, + "eval_samples_per_second": 807.638, + "eval_steps_per_second": 50.711, + "step": 756 + }, + { + "epoch": 2.0, + "learning_rate": 4.0522788203753354e-05, + "loss": 0.4739, + "step": 757 + }, + { + "epoch": 2.01, + "learning_rate": 4.050938337801609e-05, + "loss": 0.4117, + "step": 758 + }, + { + "epoch": 2.01, + "learning_rate": 4.0495978552278824e-05, + "loss": 0.2169, + "step": 759 + }, + { + "epoch": 2.01, + "learning_rate": 4.048257372654156e-05, + "loss": 0.1848, + "step": 760 + }, + { + "epoch": 2.01, + "learning_rate": 4.0469168900804294e-05, + "loss": 0.5066, + "step": 761 + }, + { + "epoch": 2.02, + "learning_rate": 4.045576407506703e-05, + "loss": 0.1784, + "step": 762 + }, + { + "epoch": 2.02, + "learning_rate": 4.044235924932976e-05, + "loss": 0.3869, + "step": 763 + }, + { + "epoch": 2.02, + "learning_rate": 4.04289544235925e-05, + "loss": 0.1132, + "step": 764 + }, + { + "epoch": 2.02, + "learning_rate": 4.041554959785523e-05, + "loss": 0.2724, + "step": 765 + }, + { + "epoch": 2.03, + "learning_rate": 4.040214477211797e-05, + "loss": 0.0983, + "step": 766 + }, + { + "epoch": 2.03, + "learning_rate": 4.03887399463807e-05, + "loss": 0.1831, + "step": 767 + }, + { + "epoch": 2.03, + "learning_rate": 4.037533512064343e-05, + "loss": 0.1954, + "step": 768 + }, + { + "epoch": 2.03, + "learning_rate": 4.036193029490617e-05, + "loss": 0.4738, + "step": 769 + }, + { + "epoch": 2.04, + "learning_rate": 4.0348525469168903e-05, + "loss": 0.3375, + "step": 770 + }, + { + "epoch": 2.04, + "learning_rate": 4.033512064343164e-05, + "loss": 0.3991, + "step": 771 + }, + { + "epoch": 2.04, + "learning_rate": 4.0321715817694374e-05, + "loss": 0.0696, + "step": 772 + }, + { + "epoch": 2.04, + "learning_rate": 4.03083109919571e-05, + "loss": 0.0982, + "step": 773 + }, + { + "epoch": 2.05, + "learning_rate": 4.0294906166219844e-05, + "loss": 0.2236, + "step": 774 + }, + { + "epoch": 2.05, + "learning_rate": 4.028150134048257e-05, + "loss": 0.4225, + "step": 775 + }, + { + "epoch": 2.05, + "learning_rate": 4.0268096514745314e-05, + "loss": 0.0583, + "step": 776 + }, + { + "epoch": 2.06, + "learning_rate": 4.025469168900804e-05, + "loss": 0.1643, + "step": 777 + }, + { + "epoch": 2.06, + "learning_rate": 4.024128686327078e-05, + "loss": 0.0351, + "step": 778 + }, + { + "epoch": 2.06, + "learning_rate": 4.022788203753351e-05, + "loss": 0.4496, + "step": 779 + }, + { + "epoch": 2.06, + "learning_rate": 4.021447721179625e-05, + "loss": 0.0372, + "step": 780 + }, + { + "epoch": 2.07, + "learning_rate": 4.020107238605898e-05, + "loss": 0.4198, + "step": 781 + }, + { + "epoch": 2.07, + "learning_rate": 4.018766756032172e-05, + "loss": 0.1968, + "step": 782 + }, + { + "epoch": 2.07, + "learning_rate": 4.017426273458445e-05, + "loss": 0.515, + "step": 783 + }, + { + "epoch": 2.07, + "learning_rate": 4.016085790884719e-05, + "loss": 0.408, + "step": 784 + }, + { + "epoch": 2.08, + "learning_rate": 4.0147453083109924e-05, + "loss": 0.3693, + "step": 785 + }, + { + "epoch": 2.08, + "learning_rate": 4.013404825737266e-05, + "loss": 0.0561, + "step": 786 + }, + { + "epoch": 2.08, + "learning_rate": 4.0120643431635394e-05, + "loss": 0.4011, + "step": 787 + }, + { + "epoch": 2.08, + "learning_rate": 4.010723860589812e-05, + "loss": 0.0872, + "step": 788 + }, + { + "epoch": 2.09, + "learning_rate": 4.0093833780160864e-05, + "loss": 0.0768, + "step": 789 + }, + { + "epoch": 2.09, + "learning_rate": 4.008042895442359e-05, + "loss": 0.0184, + "step": 790 + }, + { + "epoch": 2.09, + "learning_rate": 4.0067024128686334e-05, + "loss": 0.3287, + "step": 791 + }, + { + "epoch": 2.1, + "learning_rate": 4.005361930294906e-05, + "loss": 0.0262, + "step": 792 + }, + { + "epoch": 2.1, + "learning_rate": 4.00402144772118e-05, + "loss": 0.0248, + "step": 793 + }, + { + "epoch": 2.1, + "learning_rate": 4.002680965147453e-05, + "loss": 0.3853, + "step": 794 + }, + { + "epoch": 2.1, + "learning_rate": 4.001340482573727e-05, + "loss": 0.3512, + "step": 795 + }, + { + "epoch": 2.11, + "learning_rate": 4e-05, + "loss": 0.4188, + "step": 796 + }, + { + "epoch": 2.11, + "learning_rate": 3.998659517426274e-05, + "loss": 0.1834, + "step": 797 + }, + { + "epoch": 2.11, + "learning_rate": 3.997319034852547e-05, + "loss": 0.2074, + "step": 798 + }, + { + "epoch": 2.11, + "learning_rate": 3.995978552278821e-05, + "loss": 0.7317, + "step": 799 + }, + { + "epoch": 2.12, + "learning_rate": 3.994638069705094e-05, + "loss": 0.3534, + "step": 800 + }, + { + "epoch": 2.12, + "learning_rate": 3.993297587131368e-05, + "loss": 0.3184, + "step": 801 + }, + { + "epoch": 2.12, + "learning_rate": 3.991957104557641e-05, + "loss": 0.1088, + "step": 802 + }, + { + "epoch": 2.12, + "learning_rate": 3.990616621983914e-05, + "loss": 0.0429, + "step": 803 + }, + { + "epoch": 2.13, + "learning_rate": 3.989276139410188e-05, + "loss": 0.4518, + "step": 804 + }, + { + "epoch": 2.13, + "learning_rate": 3.987935656836461e-05, + "loss": 0.1746, + "step": 805 + }, + { + "epoch": 2.13, + "learning_rate": 3.986595174262735e-05, + "loss": 0.1881, + "step": 806 + }, + { + "epoch": 2.13, + "learning_rate": 3.985254691689008e-05, + "loss": 0.4111, + "step": 807 + }, + { + "epoch": 2.14, + "learning_rate": 3.983914209115281e-05, + "loss": 0.059, + "step": 808 + }, + { + "epoch": 2.14, + "learning_rate": 3.982573726541555e-05, + "loss": 0.0495, + "step": 809 + }, + { + "epoch": 2.14, + "learning_rate": 3.981233243967828e-05, + "loss": 0.1134, + "step": 810 + }, + { + "epoch": 2.15, + "learning_rate": 3.9798927613941023e-05, + "loss": 0.5469, + "step": 811 + }, + { + "epoch": 2.15, + "learning_rate": 3.978552278820375e-05, + "loss": 0.0581, + "step": 812 + }, + { + "epoch": 2.15, + "learning_rate": 3.977211796246649e-05, + "loss": 0.1254, + "step": 813 + }, + { + "epoch": 2.15, + "learning_rate": 3.975871313672922e-05, + "loss": 0.0679, + "step": 814 + }, + { + "epoch": 2.16, + "learning_rate": 3.974530831099196e-05, + "loss": 0.0463, + "step": 815 + }, + { + "epoch": 2.16, + "learning_rate": 3.973190348525469e-05, + "loss": 0.33, + "step": 816 + }, + { + "epoch": 2.16, + "learning_rate": 3.971849865951743e-05, + "loss": 0.2931, + "step": 817 + }, + { + "epoch": 2.16, + "learning_rate": 3.970509383378016e-05, + "loss": 0.1034, + "step": 818 + }, + { + "epoch": 2.17, + "learning_rate": 3.96916890080429e-05, + "loss": 0.0379, + "step": 819 + }, + { + "epoch": 2.17, + "learning_rate": 3.967828418230563e-05, + "loss": 0.0456, + "step": 820 + }, + { + "epoch": 2.17, + "learning_rate": 3.966487935656837e-05, + "loss": 0.4862, + "step": 821 + }, + { + "epoch": 2.17, + "learning_rate": 3.96514745308311e-05, + "loss": 0.0512, + "step": 822 + }, + { + "epoch": 2.18, + "learning_rate": 3.963806970509383e-05, + "loss": 0.0879, + "step": 823 + }, + { + "epoch": 2.18, + "learning_rate": 3.962466487935657e-05, + "loss": 0.3664, + "step": 824 + }, + { + "epoch": 2.18, + "learning_rate": 3.96112600536193e-05, + "loss": 0.0975, + "step": 825 + }, + { + "epoch": 2.19, + "learning_rate": 3.9597855227882044e-05, + "loss": 0.162, + "step": 826 + }, + { + "epoch": 2.19, + "learning_rate": 3.958445040214477e-05, + "loss": 0.1076, + "step": 827 + }, + { + "epoch": 2.19, + "learning_rate": 3.957104557640751e-05, + "loss": 0.1185, + "step": 828 + }, + { + "epoch": 2.19, + "learning_rate": 3.955764075067024e-05, + "loss": 0.0868, + "step": 829 + }, + { + "epoch": 2.2, + "learning_rate": 3.954423592493298e-05, + "loss": 0.0246, + "step": 830 + }, + { + "epoch": 2.2, + "learning_rate": 3.953083109919571e-05, + "loss": 0.2233, + "step": 831 + }, + { + "epoch": 2.2, + "learning_rate": 3.951742627345845e-05, + "loss": 0.2113, + "step": 832 + }, + { + "epoch": 2.2, + "learning_rate": 3.9504021447721176e-05, + "loss": 0.0587, + "step": 833 + }, + { + "epoch": 2.21, + "learning_rate": 3.949061662198392e-05, + "loss": 0.0521, + "step": 834 + }, + { + "epoch": 2.21, + "learning_rate": 3.9477211796246646e-05, + "loss": 0.3845, + "step": 835 + }, + { + "epoch": 2.21, + "learning_rate": 3.946380697050939e-05, + "loss": 0.1096, + "step": 836 + }, + { + "epoch": 2.21, + "learning_rate": 3.9450402144772117e-05, + "loss": 0.1488, + "step": 837 + }, + { + "epoch": 2.22, + "learning_rate": 3.943699731903485e-05, + "loss": 0.1937, + "step": 838 + }, + { + "epoch": 2.22, + "learning_rate": 3.942359249329759e-05, + "loss": 0.1309, + "step": 839 + }, + { + "epoch": 2.22, + "learning_rate": 3.941018766756032e-05, + "loss": 0.3271, + "step": 840 + }, + { + "epoch": 2.22, + "learning_rate": 3.939678284182306e-05, + "loss": 0.3318, + "step": 841 + }, + { + "epoch": 2.23, + "learning_rate": 3.938337801608579e-05, + "loss": 0.3516, + "step": 842 + }, + { + "epoch": 2.23, + "learning_rate": 3.936997319034853e-05, + "loss": 0.1641, + "step": 843 + }, + { + "epoch": 2.23, + "learning_rate": 3.935656836461126e-05, + "loss": 0.064, + "step": 844 + }, + { + "epoch": 2.24, + "learning_rate": 3.9343163538874e-05, + "loss": 0.1971, + "step": 845 + }, + { + "epoch": 2.24, + "learning_rate": 3.932975871313673e-05, + "loss": 0.1166, + "step": 846 + }, + { + "epoch": 2.24, + "learning_rate": 3.931635388739947e-05, + "loss": 0.0384, + "step": 847 + }, + { + "epoch": 2.24, + "learning_rate": 3.9302949061662196e-05, + "loss": 0.0462, + "step": 848 + }, + { + "epoch": 2.25, + "learning_rate": 3.928954423592494e-05, + "loss": 0.1073, + "step": 849 + }, + { + "epoch": 2.25, + "learning_rate": 3.9276139410187666e-05, + "loss": 0.0227, + "step": 850 + }, + { + "epoch": 2.25, + "learning_rate": 3.926273458445041e-05, + "loss": 0.0683, + "step": 851 + }, + { + "epoch": 2.25, + "learning_rate": 3.924932975871314e-05, + "loss": 0.7962, + "step": 852 + }, + { + "epoch": 2.26, + "learning_rate": 3.923592493297587e-05, + "loss": 0.0635, + "step": 853 + }, + { + "epoch": 2.26, + "learning_rate": 3.922252010723861e-05, + "loss": 0.0454, + "step": 854 + }, + { + "epoch": 2.26, + "learning_rate": 3.920911528150134e-05, + "loss": 0.5389, + "step": 855 + }, + { + "epoch": 2.26, + "learning_rate": 3.919571045576408e-05, + "loss": 0.1181, + "step": 856 + }, + { + "epoch": 2.27, + "learning_rate": 3.918230563002681e-05, + "loss": 0.1372, + "step": 857 + }, + { + "epoch": 2.27, + "learning_rate": 3.916890080428954e-05, + "loss": 0.3192, + "step": 858 + }, + { + "epoch": 2.27, + "learning_rate": 3.915549597855228e-05, + "loss": 0.3419, + "step": 859 + }, + { + "epoch": 2.28, + "learning_rate": 3.914209115281501e-05, + "loss": 0.0114, + "step": 860 + }, + { + "epoch": 2.28, + "learning_rate": 3.912868632707775e-05, + "loss": 0.1905, + "step": 861 + }, + { + "epoch": 2.28, + "learning_rate": 3.911528150134048e-05, + "loss": 0.0218, + "step": 862 + }, + { + "epoch": 2.28, + "learning_rate": 3.910187667560322e-05, + "loss": 0.4374, + "step": 863 + }, + { + "epoch": 2.29, + "learning_rate": 3.908847184986595e-05, + "loss": 0.1844, + "step": 864 + }, + { + "epoch": 2.29, + "learning_rate": 3.907506702412869e-05, + "loss": 0.2427, + "step": 865 + }, + { + "epoch": 2.29, + "learning_rate": 3.906166219839142e-05, + "loss": 0.2749, + "step": 866 + }, + { + "epoch": 2.29, + "learning_rate": 3.904825737265416e-05, + "loss": 0.2089, + "step": 867 + }, + { + "epoch": 2.3, + "learning_rate": 3.903485254691689e-05, + "loss": 0.06, + "step": 868 + }, + { + "epoch": 2.3, + "learning_rate": 3.902144772117963e-05, + "loss": 0.1951, + "step": 869 + }, + { + "epoch": 2.3, + "learning_rate": 3.900804289544236e-05, + "loss": 0.0252, + "step": 870 + }, + { + "epoch": 2.3, + "learning_rate": 3.89946380697051e-05, + "loss": 0.0299, + "step": 871 + }, + { + "epoch": 2.31, + "learning_rate": 3.898123324396783e-05, + "loss": 0.0298, + "step": 872 + }, + { + "epoch": 2.31, + "learning_rate": 3.896782841823057e-05, + "loss": 0.5186, + "step": 873 + }, + { + "epoch": 2.31, + "learning_rate": 3.89544235924933e-05, + "loss": 0.2704, + "step": 874 + }, + { + "epoch": 2.31, + "learning_rate": 3.894101876675603e-05, + "loss": 0.2435, + "step": 875 + }, + { + "epoch": 2.32, + "learning_rate": 3.892761394101877e-05, + "loss": 0.039, + "step": 876 + }, + { + "epoch": 2.32, + "learning_rate": 3.89142091152815e-05, + "loss": 0.0275, + "step": 877 + }, + { + "epoch": 2.32, + "learning_rate": 3.890080428954424e-05, + "loss": 0.1164, + "step": 878 + }, + { + "epoch": 2.33, + "learning_rate": 3.888739946380697e-05, + "loss": 0.1551, + "step": 879 + }, + { + "epoch": 2.33, + "learning_rate": 3.887399463806971e-05, + "loss": 0.0215, + "step": 880 + }, + { + "epoch": 2.33, + "learning_rate": 3.886058981233244e-05, + "loss": 0.0379, + "step": 881 + }, + { + "epoch": 2.33, + "learning_rate": 3.884718498659518e-05, + "loss": 0.0553, + "step": 882 + }, + { + "epoch": 2.34, + "learning_rate": 3.883378016085791e-05, + "loss": 0.1073, + "step": 883 + }, + { + "epoch": 2.34, + "learning_rate": 3.882037533512065e-05, + "loss": 0.3525, + "step": 884 + }, + { + "epoch": 2.34, + "learning_rate": 3.8806970509383376e-05, + "loss": 0.2646, + "step": 885 + }, + { + "epoch": 2.34, + "learning_rate": 3.879356568364612e-05, + "loss": 0.5758, + "step": 886 + }, + { + "epoch": 2.35, + "learning_rate": 3.8780160857908846e-05, + "loss": 0.9312, + "step": 887 + }, + { + "epoch": 2.35, + "learning_rate": 3.876675603217159e-05, + "loss": 0.2748, + "step": 888 + }, + { + "epoch": 2.35, + "learning_rate": 3.8753351206434316e-05, + "loss": 0.33, + "step": 889 + }, + { + "epoch": 2.35, + "learning_rate": 3.873994638069705e-05, + "loss": 0.0312, + "step": 890 + }, + { + "epoch": 2.36, + "learning_rate": 3.8726541554959786e-05, + "loss": 0.0449, + "step": 891 + }, + { + "epoch": 2.36, + "learning_rate": 3.871313672922252e-05, + "loss": 0.1197, + "step": 892 + }, + { + "epoch": 2.36, + "learning_rate": 3.869973190348526e-05, + "loss": 0.0913, + "step": 893 + }, + { + "epoch": 2.37, + "learning_rate": 3.868632707774799e-05, + "loss": 0.0284, + "step": 894 + }, + { + "epoch": 2.37, + "learning_rate": 3.867292225201073e-05, + "loss": 0.3769, + "step": 895 + }, + { + "epoch": 2.37, + "learning_rate": 3.865951742627346e-05, + "loss": 0.0947, + "step": 896 + }, + { + "epoch": 2.37, + "learning_rate": 3.86461126005362e-05, + "loss": 0.4282, + "step": 897 + }, + { + "epoch": 2.38, + "learning_rate": 3.863270777479893e-05, + "loss": 0.0049, + "step": 898 + }, + { + "epoch": 2.38, + "learning_rate": 3.861930294906167e-05, + "loss": 0.3632, + "step": 899 + }, + { + "epoch": 2.38, + "learning_rate": 3.8605898123324396e-05, + "loss": 0.0421, + "step": 900 + }, + { + "epoch": 2.38, + "learning_rate": 3.859249329758714e-05, + "loss": 0.5793, + "step": 901 + }, + { + "epoch": 2.39, + "learning_rate": 3.8579088471849866e-05, + "loss": 0.1695, + "step": 902 + }, + { + "epoch": 2.39, + "learning_rate": 3.856568364611261e-05, + "loss": 0.3082, + "step": 903 + }, + { + "epoch": 2.39, + "learning_rate": 3.8552278820375336e-05, + "loss": 0.0151, + "step": 904 + }, + { + "epoch": 2.39, + "learning_rate": 3.853887399463807e-05, + "loss": 0.3463, + "step": 905 + }, + { + "epoch": 2.4, + "learning_rate": 3.852546916890081e-05, + "loss": 0.4573, + "step": 906 + }, + { + "epoch": 2.4, + "learning_rate": 3.851206434316354e-05, + "loss": 0.1281, + "step": 907 + }, + { + "epoch": 2.4, + "learning_rate": 3.849865951742628e-05, + "loss": 0.3168, + "step": 908 + }, + { + "epoch": 2.4, + "learning_rate": 3.848525469168901e-05, + "loss": 0.0331, + "step": 909 + }, + { + "epoch": 2.41, + "learning_rate": 3.847184986595174e-05, + "loss": 0.1825, + "step": 910 + }, + { + "epoch": 2.41, + "learning_rate": 3.845844504021448e-05, + "loss": 0.6238, + "step": 911 + }, + { + "epoch": 2.41, + "learning_rate": 3.844504021447721e-05, + "loss": 0.0663, + "step": 912 + }, + { + "epoch": 2.42, + "learning_rate": 3.843163538873995e-05, + "loss": 0.2827, + "step": 913 + }, + { + "epoch": 2.42, + "learning_rate": 3.841823056300268e-05, + "loss": 0.0133, + "step": 914 + }, + { + "epoch": 2.42, + "learning_rate": 3.8404825737265416e-05, + "loss": 0.0415, + "step": 915 + }, + { + "epoch": 2.42, + "learning_rate": 3.839142091152815e-05, + "loss": 0.2969, + "step": 916 + }, + { + "epoch": 2.43, + "learning_rate": 3.8378016085790886e-05, + "loss": 0.0145, + "step": 917 + }, + { + "epoch": 2.43, + "learning_rate": 3.836461126005362e-05, + "loss": 0.0223, + "step": 918 + }, + { + "epoch": 2.43, + "learning_rate": 3.8351206434316357e-05, + "loss": 1.042, + "step": 919 + }, + { + "epoch": 2.43, + "learning_rate": 3.8337801608579085e-05, + "loss": 0.5061, + "step": 920 + }, + { + "epoch": 2.44, + "learning_rate": 3.832439678284183e-05, + "loss": 0.406, + "step": 921 + }, + { + "epoch": 2.44, + "learning_rate": 3.8310991957104555e-05, + "loss": 0.1337, + "step": 922 + }, + { + "epoch": 2.44, + "learning_rate": 3.82975871313673e-05, + "loss": 0.2057, + "step": 923 + }, + { + "epoch": 2.44, + "learning_rate": 3.8284182305630025e-05, + "loss": 0.0987, + "step": 924 + }, + { + "epoch": 2.45, + "learning_rate": 3.827077747989276e-05, + "loss": 0.4611, + "step": 925 + }, + { + "epoch": 2.45, + "learning_rate": 3.8257372654155496e-05, + "loss": 0.0152, + "step": 926 + }, + { + "epoch": 2.45, + "learning_rate": 3.824396782841823e-05, + "loss": 0.4478, + "step": 927 + }, + { + "epoch": 2.46, + "learning_rate": 3.8230563002680966e-05, + "loss": 0.0579, + "step": 928 + }, + { + "epoch": 2.46, + "learning_rate": 3.82171581769437e-05, + "loss": 0.1506, + "step": 929 + }, + { + "epoch": 2.46, + "learning_rate": 3.8203753351206436e-05, + "loss": 0.5514, + "step": 930 + }, + { + "epoch": 2.46, + "learning_rate": 3.819034852546917e-05, + "loss": 0.1505, + "step": 931 + }, + { + "epoch": 2.47, + "learning_rate": 3.8176943699731906e-05, + "loss": 0.1413, + "step": 932 + }, + { + "epoch": 2.47, + "learning_rate": 3.816353887399464e-05, + "loss": 0.4758, + "step": 933 + }, + { + "epoch": 2.47, + "learning_rate": 3.815013404825738e-05, + "loss": 0.0863, + "step": 934 + }, + { + "epoch": 2.47, + "learning_rate": 3.8136729222520105e-05, + "loss": 0.3561, + "step": 935 + }, + { + "epoch": 2.48, + "learning_rate": 3.812332439678285e-05, + "loss": 0.7784, + "step": 936 + }, + { + "epoch": 2.48, + "learning_rate": 3.8109919571045575e-05, + "loss": 0.2243, + "step": 937 + }, + { + "epoch": 2.48, + "learning_rate": 3.809651474530832e-05, + "loss": 0.1013, + "step": 938 + }, + { + "epoch": 2.48, + "learning_rate": 3.8083109919571046e-05, + "loss": 0.132, + "step": 939 + }, + { + "epoch": 2.49, + "learning_rate": 3.806970509383378e-05, + "loss": 0.3971, + "step": 940 + }, + { + "epoch": 2.49, + "learning_rate": 3.8056300268096516e-05, + "loss": 0.0637, + "step": 941 + }, + { + "epoch": 2.49, + "learning_rate": 3.804289544235925e-05, + "loss": 0.5178, + "step": 942 + }, + { + "epoch": 2.49, + "learning_rate": 3.8029490616621986e-05, + "loss": 0.1982, + "step": 943 + }, + { + "epoch": 2.5, + "learning_rate": 3.801608579088472e-05, + "loss": 0.2225, + "step": 944 + }, + { + "epoch": 2.5, + "learning_rate": 3.800268096514745e-05, + "loss": 0.1425, + "step": 945 + }, + { + "epoch": 2.5, + "learning_rate": 3.798927613941019e-05, + "loss": 0.0621, + "step": 946 + }, + { + "epoch": 2.51, + "learning_rate": 3.797587131367292e-05, + "loss": 0.1556, + "step": 947 + }, + { + "epoch": 2.51, + "learning_rate": 3.796246648793566e-05, + "loss": 0.3805, + "step": 948 + }, + { + "epoch": 2.51, + "learning_rate": 3.794906166219839e-05, + "loss": 0.7049, + "step": 949 + }, + { + "epoch": 2.51, + "learning_rate": 3.7935656836461125e-05, + "loss": 0.1055, + "step": 950 + }, + { + "epoch": 2.52, + "learning_rate": 3.792225201072386e-05, + "loss": 0.0489, + "step": 951 + }, + { + "epoch": 2.52, + "learning_rate": 3.7908847184986596e-05, + "loss": 0.1881, + "step": 952 + }, + { + "epoch": 2.52, + "learning_rate": 3.789544235924933e-05, + "loss": 0.0202, + "step": 953 + }, + { + "epoch": 2.52, + "learning_rate": 3.7882037533512066e-05, + "loss": 0.1043, + "step": 954 + }, + { + "epoch": 2.53, + "learning_rate": 3.78686327077748e-05, + "loss": 0.2093, + "step": 955 + }, + { + "epoch": 2.53, + "learning_rate": 3.7855227882037536e-05, + "loss": 0.0395, + "step": 956 + }, + { + "epoch": 2.53, + "learning_rate": 3.784182305630027e-05, + "loss": 0.1459, + "step": 957 + }, + { + "epoch": 2.53, + "learning_rate": 3.7828418230563006e-05, + "loss": 0.0338, + "step": 958 + }, + { + "epoch": 2.54, + "learning_rate": 3.781501340482574e-05, + "loss": 0.4741, + "step": 959 + }, + { + "epoch": 2.54, + "learning_rate": 3.780160857908847e-05, + "loss": 0.2049, + "step": 960 + }, + { + "epoch": 2.54, + "learning_rate": 3.778820375335121e-05, + "loss": 0.309, + "step": 961 + }, + { + "epoch": 2.54, + "learning_rate": 3.777479892761394e-05, + "loss": 0.0253, + "step": 962 + }, + { + "epoch": 2.55, + "learning_rate": 3.776139410187668e-05, + "loss": 0.4832, + "step": 963 + }, + { + "epoch": 2.55, + "learning_rate": 3.774798927613941e-05, + "loss": 0.2111, + "step": 964 + }, + { + "epoch": 2.55, + "learning_rate": 3.7734584450402145e-05, + "loss": 0.1788, + "step": 965 + }, + { + "epoch": 2.56, + "learning_rate": 3.772117962466488e-05, + "loss": 0.5252, + "step": 966 + }, + { + "epoch": 2.56, + "learning_rate": 3.7707774798927616e-05, + "loss": 0.4711, + "step": 967 + }, + { + "epoch": 2.56, + "learning_rate": 3.769436997319035e-05, + "loss": 0.5184, + "step": 968 + }, + { + "epoch": 2.56, + "learning_rate": 3.7680965147453086e-05, + "loss": 0.2164, + "step": 969 + }, + { + "epoch": 2.57, + "learning_rate": 3.7667560321715814e-05, + "loss": 0.5393, + "step": 970 + }, + { + "epoch": 2.57, + "learning_rate": 3.7654155495978556e-05, + "loss": 0.2588, + "step": 971 + }, + { + "epoch": 2.57, + "learning_rate": 3.7640750670241285e-05, + "loss": 0.164, + "step": 972 + }, + { + "epoch": 2.57, + "learning_rate": 3.7627345844504027e-05, + "loss": 0.2896, + "step": 973 + }, + { + "epoch": 2.58, + "learning_rate": 3.7613941018766755e-05, + "loss": 0.039, + "step": 974 + }, + { + "epoch": 2.58, + "learning_rate": 3.760053619302949e-05, + "loss": 0.16, + "step": 975 + }, + { + "epoch": 2.58, + "learning_rate": 3.7587131367292225e-05, + "loss": 0.1832, + "step": 976 + }, + { + "epoch": 2.58, + "learning_rate": 3.757372654155496e-05, + "loss": 0.0812, + "step": 977 + }, + { + "epoch": 2.59, + "learning_rate": 3.7560321715817695e-05, + "loss": 0.1476, + "step": 978 + }, + { + "epoch": 2.59, + "learning_rate": 3.754691689008043e-05, + "loss": 0.1853, + "step": 979 + }, + { + "epoch": 2.59, + "learning_rate": 3.7533512064343166e-05, + "loss": 0.2875, + "step": 980 + }, + { + "epoch": 2.6, + "learning_rate": 3.75201072386059e-05, + "loss": 0.1918, + "step": 981 + }, + { + "epoch": 2.6, + "learning_rate": 3.7506702412868636e-05, + "loss": 0.2445, + "step": 982 + }, + { + "epoch": 2.6, + "learning_rate": 3.749329758713137e-05, + "loss": 0.4653, + "step": 983 + }, + { + "epoch": 2.6, + "learning_rate": 3.7479892761394106e-05, + "loss": 0.0614, + "step": 984 + }, + { + "epoch": 2.61, + "learning_rate": 3.746648793565684e-05, + "loss": 0.2818, + "step": 985 + }, + { + "epoch": 2.61, + "learning_rate": 3.7453083109919576e-05, + "loss": 0.1363, + "step": 986 + }, + { + "epoch": 2.61, + "learning_rate": 3.7439678284182305e-05, + "loss": 0.3244, + "step": 987 + }, + { + "epoch": 2.61, + "learning_rate": 3.742627345844505e-05, + "loss": 0.081, + "step": 988 + }, + { + "epoch": 2.62, + "learning_rate": 3.7412868632707775e-05, + "loss": 0.0488, + "step": 989 + }, + { + "epoch": 2.62, + "learning_rate": 3.739946380697052e-05, + "loss": 0.2057, + "step": 990 + }, + { + "epoch": 2.62, + "learning_rate": 3.7386058981233245e-05, + "loss": 0.2598, + "step": 991 + }, + { + "epoch": 2.62, + "learning_rate": 3.737265415549598e-05, + "loss": 0.1318, + "step": 992 + }, + { + "epoch": 2.63, + "learning_rate": 3.7359249329758716e-05, + "loss": 0.0482, + "step": 993 + }, + { + "epoch": 2.63, + "learning_rate": 3.734584450402145e-05, + "loss": 0.2586, + "step": 994 + }, + { + "epoch": 2.63, + "learning_rate": 3.7332439678284186e-05, + "loss": 0.2533, + "step": 995 + }, + { + "epoch": 2.63, + "learning_rate": 3.731903485254692e-05, + "loss": 0.5292, + "step": 996 + }, + { + "epoch": 2.64, + "learning_rate": 3.730563002680965e-05, + "loss": 0.1677, + "step": 997 + }, + { + "epoch": 2.64, + "learning_rate": 3.729222520107239e-05, + "loss": 0.1869, + "step": 998 + }, + { + "epoch": 2.64, + "learning_rate": 3.727882037533512e-05, + "loss": 0.0645, + "step": 999 + }, + { + "epoch": 2.65, + "learning_rate": 3.726541554959786e-05, + "loss": 0.4107, + "step": 1000 + }, + { + "epoch": 2.65, + "learning_rate": 3.725201072386059e-05, + "loss": 0.0484, + "step": 1001 + }, + { + "epoch": 2.65, + "learning_rate": 3.7238605898123325e-05, + "loss": 0.0813, + "step": 1002 + }, + { + "epoch": 2.65, + "learning_rate": 3.722520107238606e-05, + "loss": 0.2467, + "step": 1003 + }, + { + "epoch": 2.66, + "learning_rate": 3.7211796246648795e-05, + "loss": 0.0324, + "step": 1004 + }, + { + "epoch": 2.66, + "learning_rate": 3.719839142091153e-05, + "loss": 0.0536, + "step": 1005 + }, + { + "epoch": 2.66, + "learning_rate": 3.7184986595174266e-05, + "loss": 0.0399, + "step": 1006 + }, + { + "epoch": 2.66, + "learning_rate": 3.7171581769436994e-05, + "loss": 0.0257, + "step": 1007 + }, + { + "epoch": 2.67, + "learning_rate": 3.7158176943699736e-05, + "loss": 0.0407, + "step": 1008 + }, + { + "epoch": 2.67, + "learning_rate": 3.7144772117962464e-05, + "loss": 0.0173, + "step": 1009 + }, + { + "epoch": 2.67, + "learning_rate": 3.7131367292225206e-05, + "loss": 0.0166, + "step": 1010 + }, + { + "epoch": 2.67, + "learning_rate": 3.7117962466487934e-05, + "loss": 0.1898, + "step": 1011 + }, + { + "epoch": 2.68, + "learning_rate": 3.710455764075067e-05, + "loss": 0.0525, + "step": 1012 + }, + { + "epoch": 2.68, + "learning_rate": 3.7091152815013405e-05, + "loss": 0.043, + "step": 1013 + }, + { + "epoch": 2.68, + "learning_rate": 3.707774798927614e-05, + "loss": 0.3994, + "step": 1014 + }, + { + "epoch": 2.69, + "learning_rate": 3.7064343163538875e-05, + "loss": 0.0372, + "step": 1015 + }, + { + "epoch": 2.69, + "learning_rate": 3.705093833780161e-05, + "loss": 0.2909, + "step": 1016 + }, + { + "epoch": 2.69, + "learning_rate": 3.7037533512064345e-05, + "loss": 0.8221, + "step": 1017 + }, + { + "epoch": 2.69, + "learning_rate": 3.702412868632708e-05, + "loss": 0.0084, + "step": 1018 + }, + { + "epoch": 2.7, + "learning_rate": 3.7010723860589815e-05, + "loss": 0.3058, + "step": 1019 + }, + { + "epoch": 2.7, + "learning_rate": 3.699731903485255e-05, + "loss": 1.0774, + "step": 1020 + }, + { + "epoch": 2.7, + "learning_rate": 3.6983914209115286e-05, + "loss": 0.2018, + "step": 1021 + }, + { + "epoch": 2.7, + "learning_rate": 3.6970509383378014e-05, + "loss": 0.0537, + "step": 1022 + }, + { + "epoch": 2.71, + "learning_rate": 3.6957104557640756e-05, + "loss": 0.444, + "step": 1023 + }, + { + "epoch": 2.71, + "learning_rate": 3.6943699731903484e-05, + "loss": 0.6497, + "step": 1024 + }, + { + "epoch": 2.71, + "learning_rate": 3.6930294906166226e-05, + "loss": 0.0097, + "step": 1025 + }, + { + "epoch": 2.71, + "learning_rate": 3.6916890080428955e-05, + "loss": 0.0082, + "step": 1026 + }, + { + "epoch": 2.72, + "learning_rate": 3.690348525469169e-05, + "loss": 0.0387, + "step": 1027 + }, + { + "epoch": 2.72, + "learning_rate": 3.6890080428954425e-05, + "loss": 0.3969, + "step": 1028 + }, + { + "epoch": 2.72, + "learning_rate": 3.687667560321716e-05, + "loss": 0.0136, + "step": 1029 + }, + { + "epoch": 2.72, + "learning_rate": 3.6863270777479895e-05, + "loss": 0.0099, + "step": 1030 + }, + { + "epoch": 2.73, + "learning_rate": 3.684986595174263e-05, + "loss": 0.3509, + "step": 1031 + }, + { + "epoch": 2.73, + "learning_rate": 3.683646112600536e-05, + "loss": 0.0257, + "step": 1032 + }, + { + "epoch": 2.73, + "learning_rate": 3.68230563002681e-05, + "loss": 0.3158, + "step": 1033 + }, + { + "epoch": 2.74, + "learning_rate": 3.680965147453083e-05, + "loss": 0.0677, + "step": 1034 + }, + { + "epoch": 2.74, + "learning_rate": 3.679624664879357e-05, + "loss": 0.3887, + "step": 1035 + }, + { + "epoch": 2.74, + "learning_rate": 3.67828418230563e-05, + "loss": 0.0714, + "step": 1036 + }, + { + "epoch": 2.74, + "learning_rate": 3.6769436997319034e-05, + "loss": 0.1066, + "step": 1037 + }, + { + "epoch": 2.75, + "learning_rate": 3.675603217158177e-05, + "loss": 0.6238, + "step": 1038 + }, + { + "epoch": 2.75, + "learning_rate": 3.6742627345844504e-05, + "loss": 0.0405, + "step": 1039 + }, + { + "epoch": 2.75, + "learning_rate": 3.672922252010724e-05, + "loss": 0.0223, + "step": 1040 + }, + { + "epoch": 2.75, + "learning_rate": 3.6715817694369975e-05, + "loss": 0.2737, + "step": 1041 + }, + { + "epoch": 2.76, + "learning_rate": 3.670241286863271e-05, + "loss": 0.015, + "step": 1042 + }, + { + "epoch": 2.76, + "learning_rate": 3.6689008042895445e-05, + "loss": 0.1709, + "step": 1043 + }, + { + "epoch": 2.76, + "learning_rate": 3.667560321715818e-05, + "loss": 0.2649, + "step": 1044 + }, + { + "epoch": 2.76, + "learning_rate": 3.6662198391420915e-05, + "loss": 0.1524, + "step": 1045 + }, + { + "epoch": 2.77, + "learning_rate": 3.664879356568365e-05, + "loss": 0.2461, + "step": 1046 + }, + { + "epoch": 2.77, + "learning_rate": 3.663538873994638e-05, + "loss": 0.3425, + "step": 1047 + }, + { + "epoch": 2.77, + "learning_rate": 3.662198391420912e-05, + "loss": 0.2689, + "step": 1048 + }, + { + "epoch": 2.78, + "learning_rate": 3.660857908847185e-05, + "loss": 0.0066, + "step": 1049 + }, + { + "epoch": 2.78, + "learning_rate": 3.659517426273459e-05, + "loss": 0.0328, + "step": 1050 + }, + { + "epoch": 2.78, + "learning_rate": 3.658176943699732e-05, + "loss": 0.1273, + "step": 1051 + }, + { + "epoch": 2.78, + "learning_rate": 3.6568364611260054e-05, + "loss": 0.2346, + "step": 1052 + }, + { + "epoch": 2.79, + "learning_rate": 3.655495978552279e-05, + "loss": 0.0118, + "step": 1053 + }, + { + "epoch": 2.79, + "learning_rate": 3.6541554959785525e-05, + "loss": 0.0287, + "step": 1054 + }, + { + "epoch": 2.79, + "learning_rate": 3.652815013404826e-05, + "loss": 0.264, + "step": 1055 + }, + { + "epoch": 2.79, + "learning_rate": 3.6514745308310995e-05, + "loss": 0.0216, + "step": 1056 + }, + { + "epoch": 2.8, + "learning_rate": 3.650134048257372e-05, + "loss": 0.0261, + "step": 1057 + }, + { + "epoch": 2.8, + "learning_rate": 3.6487935656836465e-05, + "loss": 0.1911, + "step": 1058 + }, + { + "epoch": 2.8, + "learning_rate": 3.6474530831099194e-05, + "loss": 0.029, + "step": 1059 + }, + { + "epoch": 2.8, + "learning_rate": 3.6461126005361935e-05, + "loss": 0.0393, + "step": 1060 + }, + { + "epoch": 2.81, + "learning_rate": 3.6447721179624664e-05, + "loss": 0.1044, + "step": 1061 + }, + { + "epoch": 2.81, + "learning_rate": 3.64343163538874e-05, + "loss": 0.6364, + "step": 1062 + }, + { + "epoch": 2.81, + "learning_rate": 3.6420911528150134e-05, + "loss": 0.3589, + "step": 1063 + }, + { + "epoch": 2.81, + "learning_rate": 3.640750670241287e-05, + "loss": 0.1128, + "step": 1064 + }, + { + "epoch": 2.82, + "learning_rate": 3.6394101876675604e-05, + "loss": 0.2219, + "step": 1065 + }, + { + "epoch": 2.82, + "learning_rate": 3.638069705093834e-05, + "loss": 0.211, + "step": 1066 + }, + { + "epoch": 2.82, + "learning_rate": 3.6367292225201075e-05, + "loss": 0.387, + "step": 1067 + }, + { + "epoch": 2.83, + "learning_rate": 3.635388739946381e-05, + "loss": 0.0725, + "step": 1068 + }, + { + "epoch": 2.83, + "learning_rate": 3.6340482573726545e-05, + "loss": 0.0268, + "step": 1069 + }, + { + "epoch": 2.83, + "learning_rate": 3.632707774798928e-05, + "loss": 0.516, + "step": 1070 + }, + { + "epoch": 2.83, + "learning_rate": 3.6313672922252015e-05, + "loss": 0.0746, + "step": 1071 + }, + { + "epoch": 2.84, + "learning_rate": 3.6300268096514743e-05, + "loss": 0.2486, + "step": 1072 + }, + { + "epoch": 2.84, + "learning_rate": 3.6286863270777485e-05, + "loss": 0.1584, + "step": 1073 + }, + { + "epoch": 2.84, + "learning_rate": 3.6273458445040214e-05, + "loss": 0.1301, + "step": 1074 + }, + { + "epoch": 2.84, + "learning_rate": 3.6260053619302956e-05, + "loss": 0.0122, + "step": 1075 + }, + { + "epoch": 2.85, + "learning_rate": 3.6246648793565684e-05, + "loss": 0.0215, + "step": 1076 + }, + { + "epoch": 2.85, + "learning_rate": 3.623324396782842e-05, + "loss": 0.2068, + "step": 1077 + }, + { + "epoch": 2.85, + "learning_rate": 3.6219839142091154e-05, + "loss": 0.1882, + "step": 1078 + }, + { + "epoch": 2.85, + "learning_rate": 3.620643431635389e-05, + "loss": 0.368, + "step": 1079 + }, + { + "epoch": 2.86, + "learning_rate": 3.6193029490616625e-05, + "loss": 0.7537, + "step": 1080 + }, + { + "epoch": 2.86, + "learning_rate": 3.617962466487936e-05, + "loss": 0.1614, + "step": 1081 + }, + { + "epoch": 2.86, + "learning_rate": 3.616621983914209e-05, + "loss": 0.0585, + "step": 1082 + }, + { + "epoch": 2.87, + "learning_rate": 3.615281501340483e-05, + "loss": 0.0535, + "step": 1083 + }, + { + "epoch": 2.87, + "learning_rate": 3.613941018766756e-05, + "loss": 0.6518, + "step": 1084 + }, + { + "epoch": 2.87, + "learning_rate": 3.61260053619303e-05, + "loss": 0.4885, + "step": 1085 + }, + { + "epoch": 2.87, + "learning_rate": 3.611260053619303e-05, + "loss": 0.4078, + "step": 1086 + }, + { + "epoch": 2.88, + "learning_rate": 3.6099195710455764e-05, + "loss": 0.2101, + "step": 1087 + }, + { + "epoch": 2.88, + "learning_rate": 3.60857908847185e-05, + "loss": 0.0192, + "step": 1088 + }, + { + "epoch": 2.88, + "learning_rate": 3.6072386058981234e-05, + "loss": 0.3885, + "step": 1089 + }, + { + "epoch": 2.88, + "learning_rate": 3.605898123324397e-05, + "loss": 0.0393, + "step": 1090 + }, + { + "epoch": 2.89, + "learning_rate": 3.6045576407506704e-05, + "loss": 0.2179, + "step": 1091 + }, + { + "epoch": 2.89, + "learning_rate": 3.603217158176944e-05, + "loss": 0.1814, + "step": 1092 + }, + { + "epoch": 2.89, + "learning_rate": 3.6018766756032174e-05, + "loss": 0.0647, + "step": 1093 + }, + { + "epoch": 2.89, + "learning_rate": 3.600536193029491e-05, + "loss": 0.0657, + "step": 1094 + }, + { + "epoch": 2.9, + "learning_rate": 3.5991957104557645e-05, + "loss": 0.1062, + "step": 1095 + }, + { + "epoch": 2.9, + "learning_rate": 3.597855227882038e-05, + "loss": 0.4314, + "step": 1096 + }, + { + "epoch": 2.9, + "learning_rate": 3.596514745308311e-05, + "loss": 0.3074, + "step": 1097 + }, + { + "epoch": 2.9, + "learning_rate": 3.595174262734585e-05, + "loss": 0.0159, + "step": 1098 + }, + { + "epoch": 2.91, + "learning_rate": 3.593833780160858e-05, + "loss": 0.3829, + "step": 1099 + }, + { + "epoch": 2.91, + "learning_rate": 3.592493297587132e-05, + "loss": 0.3277, + "step": 1100 + }, + { + "epoch": 2.91, + "learning_rate": 3.591152815013405e-05, + "loss": 0.3785, + "step": 1101 + }, + { + "epoch": 2.92, + "learning_rate": 3.5898123324396784e-05, + "loss": 0.0162, + "step": 1102 + }, + { + "epoch": 2.92, + "learning_rate": 3.588471849865952e-05, + "loss": 0.0772, + "step": 1103 + }, + { + "epoch": 2.92, + "learning_rate": 3.5871313672922254e-05, + "loss": 0.0292, + "step": 1104 + }, + { + "epoch": 2.92, + "learning_rate": 3.585790884718499e-05, + "loss": 0.0748, + "step": 1105 + }, + { + "epoch": 2.93, + "learning_rate": 3.5844504021447724e-05, + "loss": 0.2276, + "step": 1106 + }, + { + "epoch": 2.93, + "learning_rate": 3.583109919571046e-05, + "loss": 0.3174, + "step": 1107 + }, + { + "epoch": 2.93, + "learning_rate": 3.5817694369973195e-05, + "loss": 0.134, + "step": 1108 + }, + { + "epoch": 2.93, + "learning_rate": 3.580428954423592e-05, + "loss": 0.3488, + "step": 1109 + }, + { + "epoch": 2.94, + "learning_rate": 3.5790884718498665e-05, + "loss": 0.1003, + "step": 1110 + }, + { + "epoch": 2.94, + "learning_rate": 3.577747989276139e-05, + "loss": 0.1972, + "step": 1111 + }, + { + "epoch": 2.94, + "learning_rate": 3.5764075067024135e-05, + "loss": 0.2151, + "step": 1112 + }, + { + "epoch": 2.94, + "learning_rate": 3.5750670241286863e-05, + "loss": 0.3725, + "step": 1113 + }, + { + "epoch": 2.95, + "learning_rate": 3.57372654155496e-05, + "loss": 0.0227, + "step": 1114 + }, + { + "epoch": 2.95, + "learning_rate": 3.5723860589812334e-05, + "loss": 0.2063, + "step": 1115 + }, + { + "epoch": 2.95, + "learning_rate": 3.571045576407507e-05, + "loss": 0.0316, + "step": 1116 + }, + { + "epoch": 2.96, + "learning_rate": 3.5697050938337804e-05, + "loss": 0.1545, + "step": 1117 + }, + { + "epoch": 2.96, + "learning_rate": 3.568364611260054e-05, + "loss": 0.4582, + "step": 1118 + }, + { + "epoch": 2.96, + "learning_rate": 3.567024128686327e-05, + "loss": 0.5452, + "step": 1119 + }, + { + "epoch": 2.96, + "learning_rate": 3.565683646112601e-05, + "loss": 0.2009, + "step": 1120 + }, + { + "epoch": 2.97, + "learning_rate": 3.564343163538874e-05, + "loss": 0.1201, + "step": 1121 + }, + { + "epoch": 2.97, + "learning_rate": 3.563002680965148e-05, + "loss": 0.5343, + "step": 1122 + }, + { + "epoch": 2.97, + "learning_rate": 3.561662198391421e-05, + "loss": 0.3925, + "step": 1123 + }, + { + "epoch": 2.97, + "learning_rate": 3.560321715817694e-05, + "loss": 0.0375, + "step": 1124 + }, + { + "epoch": 2.98, + "learning_rate": 3.558981233243968e-05, + "loss": 0.0411, + "step": 1125 + }, + { + "epoch": 2.98, + "learning_rate": 3.5576407506702413e-05, + "loss": 0.0338, + "step": 1126 + }, + { + "epoch": 2.98, + "learning_rate": 3.556300268096515e-05, + "loss": 0.035, + "step": 1127 + }, + { + "epoch": 2.98, + "learning_rate": 3.5549597855227884e-05, + "loss": 0.0283, + "step": 1128 + }, + { + "epoch": 2.99, + "learning_rate": 3.553619302949062e-05, + "loss": 0.034, + "step": 1129 + }, + { + "epoch": 2.99, + "learning_rate": 3.5522788203753354e-05, + "loss": 0.518, + "step": 1130 + }, + { + "epoch": 2.99, + "learning_rate": 3.550938337801609e-05, + "loss": 0.0241, + "step": 1131 + }, + { + "epoch": 2.99, + "learning_rate": 3.5495978552278824e-05, + "loss": 0.3143, + "step": 1132 + }, + { + "epoch": 3.0, + "learning_rate": 3.548257372654156e-05, + "loss": 0.8011, + "step": 1133 + }, + { + "epoch": 3.0, + "learning_rate": 3.546916890080429e-05, + "loss": 0.4365, + "step": 1134 + }, + { + "epoch": 3.0, + "eval_f1": 0.7867219917012448, + "eval_loss": 0.613310694694519, + "eval_runtime": 1.9007, + "eval_samples_per_second": 796.041, + "eval_steps_per_second": 49.983, + "step": 1134 + }, + { + "epoch": 3.0, + "learning_rate": 3.545576407506703e-05, + "loss": 0.0111, + "step": 1135 + }, + { + "epoch": 3.01, + "learning_rate": 3.544235924932976e-05, + "loss": 0.0166, + "step": 1136 + }, + { + "epoch": 3.01, + "learning_rate": 3.54289544235925e-05, + "loss": 0.2159, + "step": 1137 + }, + { + "epoch": 3.01, + "learning_rate": 3.541554959785523e-05, + "loss": 0.0096, + "step": 1138 + }, + { + "epoch": 3.01, + "learning_rate": 3.540214477211796e-05, + "loss": 0.1352, + "step": 1139 + }, + { + "epoch": 3.02, + "learning_rate": 3.53887399463807e-05, + "loss": 0.0195, + "step": 1140 + }, + { + "epoch": 3.02, + "learning_rate": 3.5375335120643434e-05, + "loss": 0.1579, + "step": 1141 + }, + { + "epoch": 3.02, + "learning_rate": 3.536193029490617e-05, + "loss": 0.0078, + "step": 1142 + }, + { + "epoch": 3.02, + "learning_rate": 3.5348525469168904e-05, + "loss": 0.0111, + "step": 1143 + }, + { + "epoch": 3.03, + "learning_rate": 3.533512064343163e-05, + "loss": 0.2457, + "step": 1144 + }, + { + "epoch": 3.03, + "learning_rate": 3.5321715817694374e-05, + "loss": 0.014, + "step": 1145 + }, + { + "epoch": 3.03, + "learning_rate": 3.53083109919571e-05, + "loss": 0.2021, + "step": 1146 + }, + { + "epoch": 3.03, + "learning_rate": 3.5294906166219844e-05, + "loss": 0.5334, + "step": 1147 + }, + { + "epoch": 3.04, + "learning_rate": 3.528150134048257e-05, + "loss": 0.0116, + "step": 1148 + }, + { + "epoch": 3.04, + "learning_rate": 3.526809651474531e-05, + "loss": 0.0099, + "step": 1149 + }, + { + "epoch": 3.04, + "learning_rate": 3.525469168900804e-05, + "loss": 0.2102, + "step": 1150 + }, + { + "epoch": 3.04, + "learning_rate": 3.524128686327078e-05, + "loss": 0.0093, + "step": 1151 + }, + { + "epoch": 3.05, + "learning_rate": 3.522788203753351e-05, + "loss": 0.0112, + "step": 1152 + }, + { + "epoch": 3.05, + "learning_rate": 3.521447721179625e-05, + "loss": 0.1761, + "step": 1153 + }, + { + "epoch": 3.05, + "learning_rate": 3.5201072386058984e-05, + "loss": 0.1608, + "step": 1154 + }, + { + "epoch": 3.06, + "learning_rate": 3.518766756032172e-05, + "loss": 0.2883, + "step": 1155 + }, + { + "epoch": 3.06, + "learning_rate": 3.5174262734584454e-05, + "loss": 0.0304, + "step": 1156 + }, + { + "epoch": 3.06, + "learning_rate": 3.516085790884719e-05, + "loss": 0.0623, + "step": 1157 + }, + { + "epoch": 3.06, + "learning_rate": 3.5147453083109924e-05, + "loss": 0.1824, + "step": 1158 + }, + { + "epoch": 3.07, + "learning_rate": 3.513404825737265e-05, + "loss": 0.2527, + "step": 1159 + }, + { + "epoch": 3.07, + "learning_rate": 3.5120643431635394e-05, + "loss": 0.0877, + "step": 1160 + }, + { + "epoch": 3.07, + "learning_rate": 3.510723860589812e-05, + "loss": 0.2735, + "step": 1161 + }, + { + "epoch": 3.07, + "learning_rate": 3.5093833780160865e-05, + "loss": 0.1126, + "step": 1162 + }, + { + "epoch": 3.08, + "learning_rate": 3.508042895442359e-05, + "loss": 0.2498, + "step": 1163 + }, + { + "epoch": 3.08, + "learning_rate": 3.506702412868633e-05, + "loss": 0.022, + "step": 1164 + }, + { + "epoch": 3.08, + "learning_rate": 3.505361930294906e-05, + "loss": 0.2768, + "step": 1165 + }, + { + "epoch": 3.08, + "learning_rate": 3.50402144772118e-05, + "loss": 0.0429, + "step": 1166 + }, + { + "epoch": 3.09, + "learning_rate": 3.5026809651474533e-05, + "loss": 0.0198, + "step": 1167 + }, + { + "epoch": 3.09, + "learning_rate": 3.501340482573727e-05, + "loss": 0.0097, + "step": 1168 + }, + { + "epoch": 3.09, + "learning_rate": 3.5e-05, + "loss": 0.0276, + "step": 1169 + }, + { + "epoch": 3.1, + "learning_rate": 3.498659517426274e-05, + "loss": 0.2276, + "step": 1170 + }, + { + "epoch": 3.1, + "learning_rate": 3.497319034852547e-05, + "loss": 0.0461, + "step": 1171 + }, + { + "epoch": 3.1, + "learning_rate": 3.495978552278821e-05, + "loss": 0.0103, + "step": 1172 + }, + { + "epoch": 3.1, + "learning_rate": 3.494638069705094e-05, + "loss": 0.1455, + "step": 1173 + }, + { + "epoch": 3.11, + "learning_rate": 3.493297587131367e-05, + "loss": 0.0865, + "step": 1174 + }, + { + "epoch": 3.11, + "learning_rate": 3.491957104557641e-05, + "loss": 0.3226, + "step": 1175 + }, + { + "epoch": 3.11, + "learning_rate": 3.490616621983914e-05, + "loss": 0.1744, + "step": 1176 + }, + { + "epoch": 3.11, + "learning_rate": 3.489276139410188e-05, + "loss": 0.0148, + "step": 1177 + }, + { + "epoch": 3.12, + "learning_rate": 3.487935656836461e-05, + "loss": 0.2582, + "step": 1178 + }, + { + "epoch": 3.12, + "learning_rate": 3.486595174262735e-05, + "loss": 0.2782, + "step": 1179 + }, + { + "epoch": 3.12, + "learning_rate": 3.485254691689008e-05, + "loss": 0.143, + "step": 1180 + }, + { + "epoch": 3.12, + "learning_rate": 3.483914209115282e-05, + "loss": 0.0853, + "step": 1181 + }, + { + "epoch": 3.13, + "learning_rate": 3.4825737265415554e-05, + "loss": 0.1361, + "step": 1182 + }, + { + "epoch": 3.13, + "learning_rate": 3.481233243967829e-05, + "loss": 0.0883, + "step": 1183 + }, + { + "epoch": 3.13, + "learning_rate": 3.479892761394102e-05, + "loss": 0.0116, + "step": 1184 + }, + { + "epoch": 3.13, + "learning_rate": 3.478552278820376e-05, + "loss": 0.0531, + "step": 1185 + }, + { + "epoch": 3.14, + "learning_rate": 3.477211796246649e-05, + "loss": 0.0184, + "step": 1186 + }, + { + "epoch": 3.14, + "learning_rate": 3.475871313672923e-05, + "loss": 0.1601, + "step": 1187 + }, + { + "epoch": 3.14, + "learning_rate": 3.474530831099196e-05, + "loss": 0.007, + "step": 1188 + }, + { + "epoch": 3.15, + "learning_rate": 3.473190348525469e-05, + "loss": 0.0101, + "step": 1189 + }, + { + "epoch": 3.15, + "learning_rate": 3.471849865951743e-05, + "loss": 0.2385, + "step": 1190 + }, + { + "epoch": 3.15, + "learning_rate": 3.470509383378016e-05, + "loss": 0.0075, + "step": 1191 + }, + { + "epoch": 3.15, + "learning_rate": 3.46916890080429e-05, + "loss": 0.0919, + "step": 1192 + }, + { + "epoch": 3.16, + "learning_rate": 3.467828418230563e-05, + "loss": 0.0162, + "step": 1193 + }, + { + "epoch": 3.16, + "learning_rate": 3.466487935656836e-05, + "loss": 0.2239, + "step": 1194 + }, + { + "epoch": 3.16, + "learning_rate": 3.4651474530831104e-05, + "loss": 0.5757, + "step": 1195 + }, + { + "epoch": 3.16, + "learning_rate": 3.463806970509383e-05, + "loss": 0.0774, + "step": 1196 + }, + { + "epoch": 3.17, + "learning_rate": 3.4624664879356574e-05, + "loss": 0.2124, + "step": 1197 + }, + { + "epoch": 3.17, + "learning_rate": 3.46112600536193e-05, + "loss": 0.0107, + "step": 1198 + }, + { + "epoch": 3.17, + "learning_rate": 3.459785522788204e-05, + "loss": 0.3179, + "step": 1199 + }, + { + "epoch": 3.17, + "learning_rate": 3.458445040214477e-05, + "loss": 0.0138, + "step": 1200 + }, + { + "epoch": 3.18, + "learning_rate": 3.457104557640751e-05, + "loss": 0.0094, + "step": 1201 + }, + { + "epoch": 3.18, + "learning_rate": 3.455764075067024e-05, + "loss": 0.0039, + "step": 1202 + }, + { + "epoch": 3.18, + "learning_rate": 3.454423592493298e-05, + "loss": 0.0745, + "step": 1203 + }, + { + "epoch": 3.19, + "learning_rate": 3.453083109919571e-05, + "loss": 0.0387, + "step": 1204 + }, + { + "epoch": 3.19, + "learning_rate": 3.451742627345845e-05, + "loss": 0.1378, + "step": 1205 + }, + { + "epoch": 3.19, + "learning_rate": 3.450402144772118e-05, + "loss": 0.1299, + "step": 1206 + }, + { + "epoch": 3.19, + "learning_rate": 3.449061662198392e-05, + "loss": 0.2821, + "step": 1207 + }, + { + "epoch": 3.2, + "learning_rate": 3.4477211796246653e-05, + "loss": 0.2236, + "step": 1208 + }, + { + "epoch": 3.2, + "learning_rate": 3.446380697050938e-05, + "loss": 0.1436, + "step": 1209 + }, + { + "epoch": 3.2, + "learning_rate": 3.4450402144772124e-05, + "loss": 0.1504, + "step": 1210 + }, + { + "epoch": 3.2, + "learning_rate": 3.443699731903485e-05, + "loss": 0.0415, + "step": 1211 + }, + { + "epoch": 3.21, + "learning_rate": 3.4423592493297594e-05, + "loss": 0.023, + "step": 1212 + }, + { + "epoch": 3.21, + "learning_rate": 3.441018766756032e-05, + "loss": 0.2128, + "step": 1213 + }, + { + "epoch": 3.21, + "learning_rate": 3.439678284182306e-05, + "loss": 0.0066, + "step": 1214 + }, + { + "epoch": 3.21, + "learning_rate": 3.438337801608579e-05, + "loss": 0.4345, + "step": 1215 + }, + { + "epoch": 3.22, + "learning_rate": 3.436997319034853e-05, + "loss": 0.0214, + "step": 1216 + }, + { + "epoch": 3.22, + "learning_rate": 3.435656836461126e-05, + "loss": 0.2094, + "step": 1217 + }, + { + "epoch": 3.22, + "learning_rate": 3.4343163538874e-05, + "loss": 0.0822, + "step": 1218 + }, + { + "epoch": 3.22, + "learning_rate": 3.4329758713136726e-05, + "loss": 0.1153, + "step": 1219 + }, + { + "epoch": 3.23, + "learning_rate": 3.431635388739947e-05, + "loss": 0.0059, + "step": 1220 + }, + { + "epoch": 3.23, + "learning_rate": 3.43029490616622e-05, + "loss": 0.0069, + "step": 1221 + }, + { + "epoch": 3.23, + "learning_rate": 3.428954423592494e-05, + "loss": 0.044, + "step": 1222 + }, + { + "epoch": 3.24, + "learning_rate": 3.427613941018767e-05, + "loss": 0.1975, + "step": 1223 + }, + { + "epoch": 3.24, + "learning_rate": 3.42627345844504e-05, + "loss": 0.3294, + "step": 1224 + }, + { + "epoch": 3.24, + "learning_rate": 3.424932975871314e-05, + "loss": 0.026, + "step": 1225 + }, + { + "epoch": 3.24, + "learning_rate": 3.423592493297587e-05, + "loss": 0.2666, + "step": 1226 + }, + { + "epoch": 3.25, + "learning_rate": 3.422252010723861e-05, + "loss": 0.0628, + "step": 1227 + }, + { + "epoch": 3.25, + "learning_rate": 3.420911528150134e-05, + "loss": 0.0068, + "step": 1228 + }, + { + "epoch": 3.25, + "learning_rate": 3.419571045576407e-05, + "loss": 0.0144, + "step": 1229 + }, + { + "epoch": 3.25, + "learning_rate": 3.418230563002681e-05, + "loss": 0.0029, + "step": 1230 + }, + { + "epoch": 3.26, + "learning_rate": 3.416890080428954e-05, + "loss": 0.606, + "step": 1231 + }, + { + "epoch": 3.26, + "learning_rate": 3.415549597855228e-05, + "loss": 0.2162, + "step": 1232 + }, + { + "epoch": 3.26, + "learning_rate": 3.414209115281501e-05, + "loss": 0.146, + "step": 1233 + }, + { + "epoch": 3.26, + "learning_rate": 3.412868632707775e-05, + "loss": 0.3649, + "step": 1234 + }, + { + "epoch": 3.27, + "learning_rate": 3.411528150134048e-05, + "loss": 0.0062, + "step": 1235 + }, + { + "epoch": 3.27, + "learning_rate": 3.410187667560322e-05, + "loss": 0.4097, + "step": 1236 + }, + { + "epoch": 3.27, + "learning_rate": 3.408847184986595e-05, + "loss": 0.5354, + "step": 1237 + }, + { + "epoch": 3.28, + "learning_rate": 3.407506702412869e-05, + "loss": 0.6222, + "step": 1238 + }, + { + "epoch": 3.28, + "learning_rate": 3.406166219839142e-05, + "loss": 0.0023, + "step": 1239 + }, + { + "epoch": 3.28, + "learning_rate": 3.404825737265416e-05, + "loss": 0.0247, + "step": 1240 + }, + { + "epoch": 3.28, + "learning_rate": 3.403485254691689e-05, + "loss": 0.0051, + "step": 1241 + }, + { + "epoch": 3.29, + "learning_rate": 3.402144772117963e-05, + "loss": 0.2504, + "step": 1242 + }, + { + "epoch": 3.29, + "learning_rate": 3.400804289544236e-05, + "loss": 0.0195, + "step": 1243 + }, + { + "epoch": 3.29, + "learning_rate": 3.39946380697051e-05, + "loss": 0.3706, + "step": 1244 + }, + { + "epoch": 3.29, + "learning_rate": 3.398123324396783e-05, + "loss": 0.0174, + "step": 1245 + }, + { + "epoch": 3.3, + "learning_rate": 3.396782841823056e-05, + "loss": 0.0068, + "step": 1246 + }, + { + "epoch": 3.3, + "learning_rate": 3.39544235924933e-05, + "loss": 0.3938, + "step": 1247 + }, + { + "epoch": 3.3, + "learning_rate": 3.394101876675603e-05, + "loss": 0.0114, + "step": 1248 + }, + { + "epoch": 3.3, + "learning_rate": 3.3927613941018774e-05, + "loss": 0.0088, + "step": 1249 + }, + { + "epoch": 3.31, + "learning_rate": 3.39142091152815e-05, + "loss": 0.0126, + "step": 1250 + }, + { + "epoch": 3.31, + "learning_rate": 3.390080428954424e-05, + "loss": 0.0091, + "step": 1251 + }, + { + "epoch": 3.31, + "learning_rate": 3.388739946380697e-05, + "loss": 0.0232, + "step": 1252 + }, + { + "epoch": 3.31, + "learning_rate": 3.387399463806971e-05, + "loss": 0.3704, + "step": 1253 + }, + { + "epoch": 3.32, + "learning_rate": 3.386058981233244e-05, + "loss": 0.0112, + "step": 1254 + }, + { + "epoch": 3.32, + "learning_rate": 3.384718498659518e-05, + "loss": 0.1709, + "step": 1255 + }, + { + "epoch": 3.32, + "learning_rate": 3.3833780160857906e-05, + "loss": 0.0109, + "step": 1256 + }, + { + "epoch": 3.33, + "learning_rate": 3.382037533512065e-05, + "loss": 0.2874, + "step": 1257 + }, + { + "epoch": 3.33, + "learning_rate": 3.3806970509383376e-05, + "loss": 0.024, + "step": 1258 + }, + { + "epoch": 3.33, + "learning_rate": 3.379356568364612e-05, + "loss": 0.0131, + "step": 1259 + }, + { + "epoch": 3.33, + "learning_rate": 3.3780160857908846e-05, + "loss": 0.2076, + "step": 1260 + }, + { + "epoch": 3.34, + "learning_rate": 3.376675603217158e-05, + "loss": 0.0083, + "step": 1261 + }, + { + "epoch": 3.34, + "learning_rate": 3.375335120643432e-05, + "loss": 0.0234, + "step": 1262 + }, + { + "epoch": 3.34, + "learning_rate": 3.373994638069705e-05, + "loss": 0.0066, + "step": 1263 + }, + { + "epoch": 3.34, + "learning_rate": 3.372654155495979e-05, + "loss": 0.3983, + "step": 1264 + }, + { + "epoch": 3.35, + "learning_rate": 3.371313672922252e-05, + "loss": 0.0648, + "step": 1265 + }, + { + "epoch": 3.35, + "learning_rate": 3.369973190348526e-05, + "loss": 0.006, + "step": 1266 + }, + { + "epoch": 3.35, + "learning_rate": 3.368632707774799e-05, + "loss": 0.0807, + "step": 1267 + }, + { + "epoch": 3.35, + "learning_rate": 3.367292225201073e-05, + "loss": 0.0975, + "step": 1268 + }, + { + "epoch": 3.36, + "learning_rate": 3.365951742627346e-05, + "loss": 0.2934, + "step": 1269 + }, + { + "epoch": 3.36, + "learning_rate": 3.36461126005362e-05, + "loss": 0.0869, + "step": 1270 + }, + { + "epoch": 3.36, + "learning_rate": 3.3632707774798926e-05, + "loss": 0.1374, + "step": 1271 + }, + { + "epoch": 3.37, + "learning_rate": 3.361930294906167e-05, + "loss": 0.3314, + "step": 1272 + }, + { + "epoch": 3.37, + "learning_rate": 3.3605898123324396e-05, + "loss": 0.0045, + "step": 1273 + }, + { + "epoch": 3.37, + "learning_rate": 3.359249329758714e-05, + "loss": 0.0536, + "step": 1274 + }, + { + "epoch": 3.37, + "learning_rate": 3.3579088471849867e-05, + "loss": 0.0564, + "step": 1275 + }, + { + "epoch": 3.38, + "learning_rate": 3.35656836461126e-05, + "loss": 0.0689, + "step": 1276 + }, + { + "epoch": 3.38, + "learning_rate": 3.355227882037534e-05, + "loss": 0.5177, + "step": 1277 + }, + { + "epoch": 3.38, + "learning_rate": 3.353887399463807e-05, + "loss": 0.0689, + "step": 1278 + }, + { + "epoch": 3.38, + "learning_rate": 3.352546916890081e-05, + "loss": 0.0664, + "step": 1279 + }, + { + "epoch": 3.39, + "learning_rate": 3.351206434316354e-05, + "loss": 0.0614, + "step": 1280 + }, + { + "epoch": 3.39, + "learning_rate": 3.349865951742627e-05, + "loss": 0.1994, + "step": 1281 + }, + { + "epoch": 3.39, + "learning_rate": 3.348525469168901e-05, + "loss": 0.4769, + "step": 1282 + }, + { + "epoch": 3.39, + "learning_rate": 3.347184986595174e-05, + "loss": 0.1851, + "step": 1283 + }, + { + "epoch": 3.4, + "learning_rate": 3.345844504021448e-05, + "loss": 0.0092, + "step": 1284 + }, + { + "epoch": 3.4, + "learning_rate": 3.344504021447721e-05, + "loss": 0.0052, + "step": 1285 + }, + { + "epoch": 3.4, + "learning_rate": 3.3431635388739946e-05, + "loss": 0.0095, + "step": 1286 + }, + { + "epoch": 3.4, + "learning_rate": 3.341823056300268e-05, + "loss": 0.0242, + "step": 1287 + }, + { + "epoch": 3.41, + "learning_rate": 3.3404825737265416e-05, + "loss": 0.0565, + "step": 1288 + }, + { + "epoch": 3.41, + "learning_rate": 3.339142091152815e-05, + "loss": 0.2645, + "step": 1289 + }, + { + "epoch": 3.41, + "learning_rate": 3.337801608579089e-05, + "loss": 0.0049, + "step": 1290 + }, + { + "epoch": 3.42, + "learning_rate": 3.336461126005362e-05, + "loss": 0.0929, + "step": 1291 + }, + { + "epoch": 3.42, + "learning_rate": 3.335120643431636e-05, + "loss": 0.3968, + "step": 1292 + }, + { + "epoch": 3.42, + "learning_rate": 3.333780160857909e-05, + "loss": 0.033, + "step": 1293 + }, + { + "epoch": 3.42, + "learning_rate": 3.332439678284183e-05, + "loss": 0.007, + "step": 1294 + }, + { + "epoch": 3.43, + "learning_rate": 3.331099195710456e-05, + "loss": 0.2552, + "step": 1295 + }, + { + "epoch": 3.43, + "learning_rate": 3.329758713136729e-05, + "loss": 0.004, + "step": 1296 + }, + { + "epoch": 3.43, + "learning_rate": 3.328418230563003e-05, + "loss": 0.136, + "step": 1297 + }, + { + "epoch": 3.43, + "learning_rate": 3.327077747989276e-05, + "loss": 0.1407, + "step": 1298 + }, + { + "epoch": 3.44, + "learning_rate": 3.32573726541555e-05, + "loss": 0.0354, + "step": 1299 + }, + { + "epoch": 3.44, + "learning_rate": 3.324396782841823e-05, + "loss": 0.6141, + "step": 1300 + }, + { + "epoch": 3.44, + "learning_rate": 3.3230563002680966e-05, + "loss": 0.2544, + "step": 1301 + }, + { + "epoch": 3.44, + "learning_rate": 3.32171581769437e-05, + "loss": 0.0046, + "step": 1302 + }, + { + "epoch": 3.45, + "learning_rate": 3.320375335120644e-05, + "loss": 0.0126, + "step": 1303 + }, + { + "epoch": 3.45, + "learning_rate": 3.319034852546917e-05, + "loss": 0.3506, + "step": 1304 + }, + { + "epoch": 3.45, + "learning_rate": 3.317694369973191e-05, + "loss": 0.3512, + "step": 1305 + }, + { + "epoch": 3.46, + "learning_rate": 3.3163538873994635e-05, + "loss": 0.3675, + "step": 1306 + }, + { + "epoch": 3.46, + "learning_rate": 3.315013404825738e-05, + "loss": 0.1676, + "step": 1307 + }, + { + "epoch": 3.46, + "learning_rate": 3.3136729222520106e-05, + "loss": 0.0307, + "step": 1308 + }, + { + "epoch": 3.46, + "learning_rate": 3.312332439678285e-05, + "loss": 0.0084, + "step": 1309 + }, + { + "epoch": 3.47, + "learning_rate": 3.3109919571045576e-05, + "loss": 0.1977, + "step": 1310 + }, + { + "epoch": 3.47, + "learning_rate": 3.309651474530831e-05, + "loss": 0.1645, + "step": 1311 + }, + { + "epoch": 3.47, + "learning_rate": 3.3083109919571046e-05, + "loss": 0.2579, + "step": 1312 + }, + { + "epoch": 3.47, + "learning_rate": 3.306970509383378e-05, + "loss": 0.1656, + "step": 1313 + }, + { + "epoch": 3.48, + "learning_rate": 3.3056300268096516e-05, + "loss": 0.0168, + "step": 1314 + }, + { + "epoch": 3.48, + "learning_rate": 3.304289544235925e-05, + "loss": 0.0291, + "step": 1315 + }, + { + "epoch": 3.48, + "learning_rate": 3.302949061662198e-05, + "loss": 0.0146, + "step": 1316 + }, + { + "epoch": 3.48, + "learning_rate": 3.301608579088472e-05, + "loss": 0.0037, + "step": 1317 + }, + { + "epoch": 3.49, + "learning_rate": 3.300268096514745e-05, + "loss": 0.0113, + "step": 1318 + }, + { + "epoch": 3.49, + "learning_rate": 3.298927613941019e-05, + "loss": 0.0734, + "step": 1319 + }, + { + "epoch": 3.49, + "learning_rate": 3.297587131367292e-05, + "loss": 0.0292, + "step": 1320 + }, + { + "epoch": 3.49, + "learning_rate": 3.2962466487935655e-05, + "loss": 0.3875, + "step": 1321 + }, + { + "epoch": 3.5, + "learning_rate": 3.294906166219839e-05, + "loss": 0.0138, + "step": 1322 + }, + { + "epoch": 3.5, + "learning_rate": 3.2935656836461126e-05, + "loss": 0.4653, + "step": 1323 + }, + { + "epoch": 3.5, + "learning_rate": 3.292225201072386e-05, + "loss": 0.1864, + "step": 1324 + }, + { + "epoch": 3.51, + "learning_rate": 3.2908847184986596e-05, + "loss": 0.0116, + "step": 1325 + }, + { + "epoch": 3.51, + "learning_rate": 3.289544235924933e-05, + "loss": 0.014, + "step": 1326 + }, + { + "epoch": 3.51, + "learning_rate": 3.2882037533512066e-05, + "loss": 0.3344, + "step": 1327 + }, + { + "epoch": 3.51, + "learning_rate": 3.28686327077748e-05, + "loss": 0.1544, + "step": 1328 + }, + { + "epoch": 3.52, + "learning_rate": 3.2855227882037537e-05, + "loss": 0.0065, + "step": 1329 + }, + { + "epoch": 3.52, + "learning_rate": 3.284182305630027e-05, + "loss": 0.0041, + "step": 1330 + }, + { + "epoch": 3.52, + "learning_rate": 3.2828418230563e-05, + "loss": 0.0044, + "step": 1331 + }, + { + "epoch": 3.52, + "learning_rate": 3.281501340482574e-05, + "loss": 0.1808, + "step": 1332 + }, + { + "epoch": 3.53, + "learning_rate": 3.280160857908847e-05, + "loss": 0.0521, + "step": 1333 + }, + { + "epoch": 3.53, + "learning_rate": 3.278820375335121e-05, + "loss": 0.3505, + "step": 1334 + }, + { + "epoch": 3.53, + "learning_rate": 3.277479892761394e-05, + "loss": 0.2032, + "step": 1335 + }, + { + "epoch": 3.53, + "learning_rate": 3.2761394101876676e-05, + "loss": 0.004, + "step": 1336 + }, + { + "epoch": 3.54, + "learning_rate": 3.274798927613941e-05, + "loss": 0.0343, + "step": 1337 + }, + { + "epoch": 3.54, + "learning_rate": 3.2734584450402146e-05, + "loss": 0.278, + "step": 1338 + }, + { + "epoch": 3.54, + "learning_rate": 3.272117962466488e-05, + "loss": 0.0056, + "step": 1339 + }, + { + "epoch": 3.54, + "learning_rate": 3.2707774798927616e-05, + "loss": 0.1673, + "step": 1340 + }, + { + "epoch": 3.55, + "learning_rate": 3.2694369973190345e-05, + "loss": 0.0092, + "step": 1341 + }, + { + "epoch": 3.55, + "learning_rate": 3.2680965147453086e-05, + "loss": 0.0058, + "step": 1342 + }, + { + "epoch": 3.55, + "learning_rate": 3.2667560321715815e-05, + "loss": 0.097, + "step": 1343 + }, + { + "epoch": 3.56, + "learning_rate": 3.265415549597856e-05, + "loss": 0.2138, + "step": 1344 + }, + { + "epoch": 3.56, + "learning_rate": 3.2640750670241285e-05, + "loss": 0.0077, + "step": 1345 + }, + { + "epoch": 3.56, + "learning_rate": 3.262734584450402e-05, + "loss": 0.2294, + "step": 1346 + }, + { + "epoch": 3.56, + "learning_rate": 3.2613941018766755e-05, + "loss": 0.3282, + "step": 1347 + }, + { + "epoch": 3.57, + "learning_rate": 3.260053619302949e-05, + "loss": 0.233, + "step": 1348 + }, + { + "epoch": 3.57, + "learning_rate": 3.2587131367292226e-05, + "loss": 0.0379, + "step": 1349 + }, + { + "epoch": 3.57, + "learning_rate": 3.257372654155496e-05, + "loss": 0.2168, + "step": 1350 + }, + { + "epoch": 3.57, + "learning_rate": 3.2560321715817696e-05, + "loss": 0.0443, + "step": 1351 + }, + { + "epoch": 3.58, + "learning_rate": 3.254691689008043e-05, + "loss": 0.2665, + "step": 1352 + }, + { + "epoch": 3.58, + "learning_rate": 3.2533512064343166e-05, + "loss": 0.0136, + "step": 1353 + }, + { + "epoch": 3.58, + "learning_rate": 3.25201072386059e-05, + "loss": 0.0035, + "step": 1354 + }, + { + "epoch": 3.58, + "learning_rate": 3.2506702412868636e-05, + "loss": 0.2153, + "step": 1355 + }, + { + "epoch": 3.59, + "learning_rate": 3.249329758713137e-05, + "loss": 0.088, + "step": 1356 + }, + { + "epoch": 3.59, + "learning_rate": 3.247989276139411e-05, + "loss": 0.0074, + "step": 1357 + }, + { + "epoch": 3.59, + "learning_rate": 3.2466487935656835e-05, + "loss": 0.0924, + "step": 1358 + }, + { + "epoch": 3.6, + "learning_rate": 3.245308310991958e-05, + "loss": 0.0171, + "step": 1359 + }, + { + "epoch": 3.6, + "learning_rate": 3.2439678284182305e-05, + "loss": 0.0132, + "step": 1360 + }, + { + "epoch": 3.6, + "learning_rate": 3.242627345844505e-05, + "loss": 0.0583, + "step": 1361 + }, + { + "epoch": 3.6, + "learning_rate": 3.2412868632707776e-05, + "loss": 0.0038, + "step": 1362 + }, + { + "epoch": 3.61, + "learning_rate": 3.239946380697051e-05, + "loss": 0.0846, + "step": 1363 + }, + { + "epoch": 3.61, + "learning_rate": 3.2386058981233246e-05, + "loss": 0.0058, + "step": 1364 + }, + { + "epoch": 3.61, + "learning_rate": 3.237265415549598e-05, + "loss": 0.4456, + "step": 1365 + }, + { + "epoch": 3.61, + "learning_rate": 3.2359249329758716e-05, + "loss": 0.0029, + "step": 1366 + }, + { + "epoch": 3.62, + "learning_rate": 3.234584450402145e-05, + "loss": 0.2553, + "step": 1367 + }, + { + "epoch": 3.62, + "learning_rate": 3.233243967828418e-05, + "loss": 0.0936, + "step": 1368 + }, + { + "epoch": 3.62, + "learning_rate": 3.231903485254692e-05, + "loss": 0.1017, + "step": 1369 + }, + { + "epoch": 3.62, + "learning_rate": 3.230563002680965e-05, + "loss": 0.0379, + "step": 1370 + }, + { + "epoch": 3.63, + "learning_rate": 3.229222520107239e-05, + "loss": 0.0069, + "step": 1371 + }, + { + "epoch": 3.63, + "learning_rate": 3.227882037533512e-05, + "loss": 0.3235, + "step": 1372 + }, + { + "epoch": 3.63, + "learning_rate": 3.2265415549597855e-05, + "loss": 0.3796, + "step": 1373 + }, + { + "epoch": 3.63, + "learning_rate": 3.225201072386059e-05, + "loss": 0.3246, + "step": 1374 + }, + { + "epoch": 3.64, + "learning_rate": 3.2238605898123325e-05, + "loss": 0.0059, + "step": 1375 + }, + { + "epoch": 3.64, + "learning_rate": 3.222520107238606e-05, + "loss": 0.0405, + "step": 1376 + }, + { + "epoch": 3.64, + "learning_rate": 3.2211796246648796e-05, + "loss": 0.0142, + "step": 1377 + }, + { + "epoch": 3.65, + "learning_rate": 3.219839142091153e-05, + "loss": 0.4426, + "step": 1378 + }, + { + "epoch": 3.65, + "learning_rate": 3.2184986595174266e-05, + "loss": 0.0249, + "step": 1379 + }, + { + "epoch": 3.65, + "learning_rate": 3.2171581769437e-05, + "loss": 0.1053, + "step": 1380 + }, + { + "epoch": 3.65, + "learning_rate": 3.2158176943699736e-05, + "loss": 0.0179, + "step": 1381 + }, + { + "epoch": 3.66, + "learning_rate": 3.214477211796247e-05, + "loss": 0.0718, + "step": 1382 + }, + { + "epoch": 3.66, + "learning_rate": 3.21313672922252e-05, + "loss": 0.1431, + "step": 1383 + }, + { + "epoch": 3.66, + "learning_rate": 3.211796246648794e-05, + "loss": 0.2391, + "step": 1384 + }, + { + "epoch": 3.66, + "learning_rate": 3.210455764075067e-05, + "loss": 0.0053, + "step": 1385 + }, + { + "epoch": 3.67, + "learning_rate": 3.209115281501341e-05, + "loss": 0.2935, + "step": 1386 + }, + { + "epoch": 3.67, + "learning_rate": 3.207774798927614e-05, + "loss": 0.0071, + "step": 1387 + }, + { + "epoch": 3.67, + "learning_rate": 3.2064343163538875e-05, + "loss": 0.031, + "step": 1388 + }, + { + "epoch": 3.67, + "learning_rate": 3.205093833780161e-05, + "loss": 0.1989, + "step": 1389 + }, + { + "epoch": 3.68, + "learning_rate": 3.2037533512064346e-05, + "loss": 0.0533, + "step": 1390 + }, + { + "epoch": 3.68, + "learning_rate": 3.202412868632708e-05, + "loss": 0.2408, + "step": 1391 + }, + { + "epoch": 3.68, + "learning_rate": 3.2010723860589816e-05, + "loss": 0.3158, + "step": 1392 + }, + { + "epoch": 3.69, + "learning_rate": 3.1997319034852544e-05, + "loss": 0.3629, + "step": 1393 + }, + { + "epoch": 3.69, + "learning_rate": 3.1983914209115286e-05, + "loss": 0.0122, + "step": 1394 + }, + { + "epoch": 3.69, + "learning_rate": 3.1970509383378014e-05, + "loss": 0.0449, + "step": 1395 + }, + { + "epoch": 3.69, + "learning_rate": 3.1957104557640756e-05, + "loss": 0.1273, + "step": 1396 + }, + { + "epoch": 3.7, + "learning_rate": 3.1943699731903485e-05, + "loss": 0.3401, + "step": 1397 + }, + { + "epoch": 3.7, + "learning_rate": 3.193029490616622e-05, + "loss": 0.0183, + "step": 1398 + }, + { + "epoch": 3.7, + "learning_rate": 3.1916890080428955e-05, + "loss": 0.0526, + "step": 1399 + }, + { + "epoch": 3.7, + "learning_rate": 3.190348525469169e-05, + "loss": 0.5037, + "step": 1400 + }, + { + "epoch": 3.71, + "learning_rate": 3.1890080428954425e-05, + "loss": 0.0059, + "step": 1401 + }, + { + "epoch": 3.71, + "learning_rate": 3.187667560321716e-05, + "loss": 0.0266, + "step": 1402 + }, + { + "epoch": 3.71, + "learning_rate": 3.1863270777479896e-05, + "loss": 0.4095, + "step": 1403 + }, + { + "epoch": 3.71, + "learning_rate": 3.184986595174263e-05, + "loss": 0.1802, + "step": 1404 + }, + { + "epoch": 3.72, + "learning_rate": 3.1836461126005366e-05, + "loss": 0.3586, + "step": 1405 + }, + { + "epoch": 3.72, + "learning_rate": 3.18230563002681e-05, + "loss": 0.2058, + "step": 1406 + }, + { + "epoch": 3.72, + "learning_rate": 3.1809651474530836e-05, + "loss": 0.008, + "step": 1407 + }, + { + "epoch": 3.72, + "learning_rate": 3.1796246648793564e-05, + "loss": 0.0282, + "step": 1408 + }, + { + "epoch": 3.73, + "learning_rate": 3.1782841823056306e-05, + "loss": 0.0077, + "step": 1409 + }, + { + "epoch": 3.73, + "learning_rate": 3.1769436997319035e-05, + "loss": 0.3461, + "step": 1410 + }, + { + "epoch": 3.73, + "learning_rate": 3.1756032171581777e-05, + "loss": 0.0038, + "step": 1411 + }, + { + "epoch": 3.74, + "learning_rate": 3.1742627345844505e-05, + "loss": 0.0087, + "step": 1412 + }, + { + "epoch": 3.74, + "learning_rate": 3.172922252010724e-05, + "loss": 0.8254, + "step": 1413 + }, + { + "epoch": 3.74, + "learning_rate": 3.1715817694369975e-05, + "loss": 0.017, + "step": 1414 + }, + { + "epoch": 3.74, + "learning_rate": 3.170241286863271e-05, + "loss": 0.2954, + "step": 1415 + }, + { + "epoch": 3.75, + "learning_rate": 3.1689008042895445e-05, + "loss": 0.0286, + "step": 1416 + }, + { + "epoch": 3.75, + "learning_rate": 3.167560321715818e-05, + "loss": 0.0454, + "step": 1417 + }, + { + "epoch": 3.75, + "learning_rate": 3.166219839142091e-05, + "loss": 0.222, + "step": 1418 + }, + { + "epoch": 3.75, + "learning_rate": 3.164879356568365e-05, + "loss": 0.0225, + "step": 1419 + }, + { + "epoch": 3.76, + "learning_rate": 3.163538873994638e-05, + "loss": 0.2599, + "step": 1420 + }, + { + "epoch": 3.76, + "learning_rate": 3.162198391420912e-05, + "loss": 0.2343, + "step": 1421 + }, + { + "epoch": 3.76, + "learning_rate": 3.160857908847185e-05, + "loss": 0.0274, + "step": 1422 + }, + { + "epoch": 3.76, + "learning_rate": 3.1595174262734585e-05, + "loss": 0.0109, + "step": 1423 + }, + { + "epoch": 3.77, + "learning_rate": 3.158176943699732e-05, + "loss": 0.012, + "step": 1424 + }, + { + "epoch": 3.77, + "learning_rate": 3.1568364611260055e-05, + "loss": 0.0267, + "step": 1425 + }, + { + "epoch": 3.77, + "learning_rate": 3.155495978552279e-05, + "loss": 0.0116, + "step": 1426 + }, + { + "epoch": 3.78, + "learning_rate": 3.1541554959785525e-05, + "loss": 0.2563, + "step": 1427 + }, + { + "epoch": 3.78, + "learning_rate": 3.1528150134048253e-05, + "loss": 0.2149, + "step": 1428 + }, + { + "epoch": 3.78, + "learning_rate": 3.1514745308310995e-05, + "loss": 0.2099, + "step": 1429 + }, + { + "epoch": 3.78, + "learning_rate": 3.1501340482573724e-05, + "loss": 0.1445, + "step": 1430 + }, + { + "epoch": 3.79, + "learning_rate": 3.1487935656836466e-05, + "loss": 0.0069, + "step": 1431 + }, + { + "epoch": 3.79, + "learning_rate": 3.1474530831099194e-05, + "loss": 0.3583, + "step": 1432 + }, + { + "epoch": 3.79, + "learning_rate": 3.146112600536193e-05, + "loss": 0.1112, + "step": 1433 + }, + { + "epoch": 3.79, + "learning_rate": 3.1447721179624664e-05, + "loss": 0.5379, + "step": 1434 + }, + { + "epoch": 3.8, + "learning_rate": 3.14343163538874e-05, + "loss": 0.0248, + "step": 1435 + }, + { + "epoch": 3.8, + "learning_rate": 3.1420911528150135e-05, + "loss": 0.0255, + "step": 1436 + }, + { + "epoch": 3.8, + "learning_rate": 3.140750670241287e-05, + "loss": 0.3363, + "step": 1437 + }, + { + "epoch": 3.8, + "learning_rate": 3.1394101876675605e-05, + "loss": 0.2952, + "step": 1438 + }, + { + "epoch": 3.81, + "learning_rate": 3.138069705093834e-05, + "loss": 0.0337, + "step": 1439 + }, + { + "epoch": 3.81, + "learning_rate": 3.1367292225201075e-05, + "loss": 0.0157, + "step": 1440 + }, + { + "epoch": 3.81, + "learning_rate": 3.135388739946381e-05, + "loss": 0.0204, + "step": 1441 + }, + { + "epoch": 3.81, + "learning_rate": 3.1340482573726545e-05, + "loss": 0.7707, + "step": 1442 + }, + { + "epoch": 3.82, + "learning_rate": 3.1327077747989274e-05, + "loss": 0.4232, + "step": 1443 + }, + { + "epoch": 3.82, + "learning_rate": 3.1313672922252016e-05, + "loss": 0.116, + "step": 1444 + }, + { + "epoch": 3.82, + "learning_rate": 3.1300268096514744e-05, + "loss": 0.421, + "step": 1445 + }, + { + "epoch": 3.83, + "learning_rate": 3.1286863270777486e-05, + "loss": 0.0267, + "step": 1446 + }, + { + "epoch": 3.83, + "learning_rate": 3.1273458445040214e-05, + "loss": 0.0078, + "step": 1447 + }, + { + "epoch": 3.83, + "learning_rate": 3.126005361930295e-05, + "loss": 0.0996, + "step": 1448 + }, + { + "epoch": 3.83, + "learning_rate": 3.1246648793565684e-05, + "loss": 0.0389, + "step": 1449 + }, + { + "epoch": 3.84, + "learning_rate": 3.123324396782842e-05, + "loss": 0.0482, + "step": 1450 + }, + { + "epoch": 3.84, + "learning_rate": 3.1219839142091155e-05, + "loss": 0.0053, + "step": 1451 + }, + { + "epoch": 3.84, + "learning_rate": 3.120643431635389e-05, + "loss": 0.0153, + "step": 1452 + }, + { + "epoch": 3.84, + "learning_rate": 3.119302949061662e-05, + "loss": 0.008, + "step": 1453 + }, + { + "epoch": 3.85, + "learning_rate": 3.117962466487936e-05, + "loss": 0.0166, + "step": 1454 + }, + { + "epoch": 3.85, + "learning_rate": 3.116621983914209e-05, + "loss": 0.0889, + "step": 1455 + }, + { + "epoch": 3.85, + "learning_rate": 3.115281501340483e-05, + "loss": 0.0695, + "step": 1456 + }, + { + "epoch": 3.85, + "learning_rate": 3.113941018766756e-05, + "loss": 0.3353, + "step": 1457 + }, + { + "epoch": 3.86, + "learning_rate": 3.1126005361930294e-05, + "loss": 0.0729, + "step": 1458 + }, + { + "epoch": 3.86, + "learning_rate": 3.111260053619303e-05, + "loss": 0.0187, + "step": 1459 + }, + { + "epoch": 3.86, + "learning_rate": 3.1099195710455764e-05, + "loss": 0.2512, + "step": 1460 + }, + { + "epoch": 3.87, + "learning_rate": 3.10857908847185e-05, + "loss": 0.3837, + "step": 1461 + }, + { + "epoch": 3.87, + "learning_rate": 3.1072386058981234e-05, + "loss": 0.2543, + "step": 1462 + }, + { + "epoch": 3.87, + "learning_rate": 3.105898123324397e-05, + "loss": 0.1797, + "step": 1463 + }, + { + "epoch": 3.87, + "learning_rate": 3.1045576407506705e-05, + "loss": 0.3097, + "step": 1464 + }, + { + "epoch": 3.88, + "learning_rate": 3.103217158176944e-05, + "loss": 0.268, + "step": 1465 + }, + { + "epoch": 3.88, + "learning_rate": 3.1018766756032175e-05, + "loss": 0.1773, + "step": 1466 + }, + { + "epoch": 3.88, + "learning_rate": 3.100536193029491e-05, + "loss": 0.2055, + "step": 1467 + }, + { + "epoch": 3.88, + "learning_rate": 3.099195710455764e-05, + "loss": 0.0279, + "step": 1468 + }, + { + "epoch": 3.89, + "learning_rate": 3.097855227882038e-05, + "loss": 0.1263, + "step": 1469 + }, + { + "epoch": 3.89, + "learning_rate": 3.096514745308311e-05, + "loss": 0.0449, + "step": 1470 + }, + { + "epoch": 3.89, + "learning_rate": 3.095174262734585e-05, + "loss": 0.2429, + "step": 1471 + }, + { + "epoch": 3.89, + "learning_rate": 3.093833780160858e-05, + "loss": 0.1245, + "step": 1472 + }, + { + "epoch": 3.9, + "learning_rate": 3.0924932975871314e-05, + "loss": 0.1303, + "step": 1473 + }, + { + "epoch": 3.9, + "learning_rate": 3.091152815013405e-05, + "loss": 0.0303, + "step": 1474 + }, + { + "epoch": 3.9, + "learning_rate": 3.0898123324396784e-05, + "loss": 0.3279, + "step": 1475 + }, + { + "epoch": 3.9, + "learning_rate": 3.088471849865952e-05, + "loss": 0.134, + "step": 1476 + }, + { + "epoch": 3.91, + "learning_rate": 3.0871313672922255e-05, + "loss": 0.5138, + "step": 1477 + }, + { + "epoch": 3.91, + "learning_rate": 3.085790884718498e-05, + "loss": 0.0476, + "step": 1478 + }, + { + "epoch": 3.91, + "learning_rate": 3.0844504021447725e-05, + "loss": 0.1956, + "step": 1479 + }, + { + "epoch": 3.92, + "learning_rate": 3.083109919571045e-05, + "loss": 0.2061, + "step": 1480 + }, + { + "epoch": 3.92, + "learning_rate": 3.0817694369973195e-05, + "loss": 0.269, + "step": 1481 + }, + { + "epoch": 3.92, + "learning_rate": 3.0804289544235923e-05, + "loss": 0.0708, + "step": 1482 + }, + { + "epoch": 3.92, + "learning_rate": 3.0790884718498665e-05, + "loss": 0.0389, + "step": 1483 + }, + { + "epoch": 3.93, + "learning_rate": 3.0777479892761394e-05, + "loss": 0.2566, + "step": 1484 + }, + { + "epoch": 3.93, + "learning_rate": 3.076407506702413e-05, + "loss": 0.0581, + "step": 1485 + }, + { + "epoch": 3.93, + "learning_rate": 3.0750670241286864e-05, + "loss": 0.1527, + "step": 1486 + }, + { + "epoch": 3.93, + "learning_rate": 3.07372654155496e-05, + "loss": 0.3963, + "step": 1487 + }, + { + "epoch": 3.94, + "learning_rate": 3.0723860589812334e-05, + "loss": 0.2241, + "step": 1488 + }, + { + "epoch": 3.94, + "learning_rate": 3.071045576407507e-05, + "loss": 0.1275, + "step": 1489 + }, + { + "epoch": 3.94, + "learning_rate": 3.0697050938337804e-05, + "loss": 0.3148, + "step": 1490 + }, + { + "epoch": 3.94, + "learning_rate": 3.068364611260054e-05, + "loss": 0.1474, + "step": 1491 + }, + { + "epoch": 3.95, + "learning_rate": 3.0670241286863275e-05, + "loss": 0.0233, + "step": 1492 + }, + { + "epoch": 3.95, + "learning_rate": 3.065683646112601e-05, + "loss": 0.1721, + "step": 1493 + }, + { + "epoch": 3.95, + "learning_rate": 3.0643431635388745e-05, + "loss": 0.6024, + "step": 1494 + }, + { + "epoch": 3.96, + "learning_rate": 3.063002680965147e-05, + "loss": 0.1425, + "step": 1495 + }, + { + "epoch": 3.96, + "learning_rate": 3.0616621983914215e-05, + "loss": 0.0311, + "step": 1496 + }, + { + "epoch": 3.96, + "learning_rate": 3.0603217158176944e-05, + "loss": 0.0197, + "step": 1497 + }, + { + "epoch": 3.96, + "learning_rate": 3.0589812332439686e-05, + "loss": 0.0406, + "step": 1498 + }, + { + "epoch": 3.97, + "learning_rate": 3.0576407506702414e-05, + "loss": 0.054, + "step": 1499 + }, + { + "epoch": 3.97, + "learning_rate": 3.056300268096515e-05, + "loss": 0.161, + "step": 1500 + }, + { + "epoch": 3.97, + "learning_rate": 3.0549597855227884e-05, + "loss": 0.0549, + "step": 1501 + }, + { + "epoch": 3.97, + "learning_rate": 3.053619302949062e-05, + "loss": 0.1667, + "step": 1502 + }, + { + "epoch": 3.98, + "learning_rate": 3.0522788203753354e-05, + "loss": 0.1264, + "step": 1503 + }, + { + "epoch": 3.98, + "learning_rate": 3.0509383378016086e-05, + "loss": 0.0133, + "step": 1504 + }, + { + "epoch": 3.98, + "learning_rate": 3.049597855227882e-05, + "loss": 0.0655, + "step": 1505 + }, + { + "epoch": 3.98, + "learning_rate": 3.0482573726541556e-05, + "loss": 0.1054, + "step": 1506 + }, + { + "epoch": 3.99, + "learning_rate": 3.046916890080429e-05, + "loss": 0.0053, + "step": 1507 + }, + { + "epoch": 3.99, + "learning_rate": 3.0455764075067027e-05, + "loss": 0.0347, + "step": 1508 + }, + { + "epoch": 3.99, + "learning_rate": 3.0442359249329762e-05, + "loss": 0.6095, + "step": 1509 + }, + { + "epoch": 3.99, + "learning_rate": 3.0428954423592494e-05, + "loss": 0.1339, + "step": 1510 + }, + { + "epoch": 4.0, + "learning_rate": 3.0415549597855232e-05, + "loss": 0.0088, + "step": 1511 + }, + { + "epoch": 4.0, + "learning_rate": 3.0402144772117964e-05, + "loss": 0.4356, + "step": 1512 + }, + { + "epoch": 4.0, + "eval_f1": 0.7822580645161291, + "eval_loss": 0.6966613531112671, + "eval_runtime": 1.8703, + "eval_samples_per_second": 808.957, + "eval_steps_per_second": 50.794, + "step": 1512 + }, + { + "epoch": 4.0, + "learning_rate": 3.0388739946380702e-05, + "loss": 0.003, + "step": 1513 + }, + { + "epoch": 4.01, + "learning_rate": 3.0375335120643434e-05, + "loss": 0.0067, + "step": 1514 + }, + { + "epoch": 4.01, + "learning_rate": 3.0361930294906166e-05, + "loss": 0.0488, + "step": 1515 + }, + { + "epoch": 4.01, + "learning_rate": 3.0348525469168904e-05, + "loss": 0.0106, + "step": 1516 + }, + { + "epoch": 4.01, + "learning_rate": 3.0335120643431636e-05, + "loss": 0.0098, + "step": 1517 + }, + { + "epoch": 4.02, + "learning_rate": 3.0321715817694375e-05, + "loss": 0.274, + "step": 1518 + }, + { + "epoch": 4.02, + "learning_rate": 3.0308310991957106e-05, + "loss": 0.2007, + "step": 1519 + }, + { + "epoch": 4.02, + "learning_rate": 3.0294906166219838e-05, + "loss": 0.0121, + "step": 1520 + }, + { + "epoch": 4.02, + "learning_rate": 3.0281501340482577e-05, + "loss": 0.0632, + "step": 1521 + }, + { + "epoch": 4.03, + "learning_rate": 3.026809651474531e-05, + "loss": 0.0062, + "step": 1522 + }, + { + "epoch": 4.03, + "learning_rate": 3.0254691689008047e-05, + "loss": 0.0123, + "step": 1523 + }, + { + "epoch": 4.03, + "learning_rate": 3.024128686327078e-05, + "loss": 0.0063, + "step": 1524 + }, + { + "epoch": 4.03, + "learning_rate": 3.022788203753351e-05, + "loss": 0.0102, + "step": 1525 + }, + { + "epoch": 4.04, + "learning_rate": 3.021447721179625e-05, + "loss": 0.0082, + "step": 1526 + }, + { + "epoch": 4.04, + "learning_rate": 3.020107238605898e-05, + "loss": 0.3369, + "step": 1527 + }, + { + "epoch": 4.04, + "learning_rate": 3.018766756032172e-05, + "loss": 0.2587, + "step": 1528 + }, + { + "epoch": 4.04, + "learning_rate": 3.017426273458445e-05, + "loss": 0.0067, + "step": 1529 + }, + { + "epoch": 4.05, + "learning_rate": 3.0160857908847186e-05, + "loss": 0.0021, + "step": 1530 + }, + { + "epoch": 4.05, + "learning_rate": 3.014745308310992e-05, + "loss": 0.0724, + "step": 1531 + }, + { + "epoch": 4.05, + "learning_rate": 3.0134048257372656e-05, + "loss": 0.0074, + "step": 1532 + }, + { + "epoch": 4.06, + "learning_rate": 3.012064343163539e-05, + "loss": 0.0202, + "step": 1533 + }, + { + "epoch": 4.06, + "learning_rate": 3.0107238605898126e-05, + "loss": 0.1435, + "step": 1534 + }, + { + "epoch": 4.06, + "learning_rate": 3.0093833780160858e-05, + "loss": 0.0074, + "step": 1535 + }, + { + "epoch": 4.06, + "learning_rate": 3.0080428954423597e-05, + "loss": 0.4145, + "step": 1536 + }, + { + "epoch": 4.07, + "learning_rate": 3.006702412868633e-05, + "loss": 0.0186, + "step": 1537 + }, + { + "epoch": 4.07, + "learning_rate": 3.0053619302949067e-05, + "loss": 0.1648, + "step": 1538 + }, + { + "epoch": 4.07, + "learning_rate": 3.00402144772118e-05, + "loss": 0.2545, + "step": 1539 + }, + { + "epoch": 4.07, + "learning_rate": 3.002680965147453e-05, + "loss": 0.0016, + "step": 1540 + }, + { + "epoch": 4.08, + "learning_rate": 3.001340482573727e-05, + "loss": 0.0184, + "step": 1541 + }, + { + "epoch": 4.08, + "learning_rate": 3e-05, + "loss": 0.1208, + "step": 1542 + }, + { + "epoch": 4.08, + "learning_rate": 2.998659517426274e-05, + "loss": 0.0021, + "step": 1543 + }, + { + "epoch": 4.08, + "learning_rate": 2.997319034852547e-05, + "loss": 0.0092, + "step": 1544 + }, + { + "epoch": 4.09, + "learning_rate": 2.9959785522788203e-05, + "loss": 0.1514, + "step": 1545 + }, + { + "epoch": 4.09, + "learning_rate": 2.994638069705094e-05, + "loss": 0.0773, + "step": 1546 + }, + { + "epoch": 4.09, + "learning_rate": 2.9932975871313673e-05, + "loss": 0.0093, + "step": 1547 + }, + { + "epoch": 4.1, + "learning_rate": 2.991957104557641e-05, + "loss": 0.0022, + "step": 1548 + }, + { + "epoch": 4.1, + "learning_rate": 2.9906166219839143e-05, + "loss": 0.1765, + "step": 1549 + }, + { + "epoch": 4.1, + "learning_rate": 2.9892761394101875e-05, + "loss": 0.1766, + "step": 1550 + }, + { + "epoch": 4.1, + "learning_rate": 2.9879356568364614e-05, + "loss": 0.0024, + "step": 1551 + }, + { + "epoch": 4.11, + "learning_rate": 2.9865951742627345e-05, + "loss": 0.012, + "step": 1552 + }, + { + "epoch": 4.11, + "learning_rate": 2.9852546916890084e-05, + "loss": 0.0055, + "step": 1553 + }, + { + "epoch": 4.11, + "learning_rate": 2.9839142091152816e-05, + "loss": 0.0088, + "step": 1554 + }, + { + "epoch": 4.11, + "learning_rate": 2.9825737265415547e-05, + "loss": 0.0019, + "step": 1555 + }, + { + "epoch": 4.12, + "learning_rate": 2.9812332439678286e-05, + "loss": 0.0186, + "step": 1556 + }, + { + "epoch": 4.12, + "learning_rate": 2.9798927613941018e-05, + "loss": 0.25, + "step": 1557 + }, + { + "epoch": 4.12, + "learning_rate": 2.9785522788203756e-05, + "loss": 0.0129, + "step": 1558 + }, + { + "epoch": 4.12, + "learning_rate": 2.9772117962466488e-05, + "loss": 0.0048, + "step": 1559 + }, + { + "epoch": 4.13, + "learning_rate": 2.9758713136729223e-05, + "loss": 0.1153, + "step": 1560 + }, + { + "epoch": 4.13, + "learning_rate": 2.9745308310991958e-05, + "loss": 0.1871, + "step": 1561 + }, + { + "epoch": 4.13, + "learning_rate": 2.9731903485254693e-05, + "loss": 0.0087, + "step": 1562 + }, + { + "epoch": 4.13, + "learning_rate": 2.971849865951743e-05, + "loss": 0.0048, + "step": 1563 + }, + { + "epoch": 4.14, + "learning_rate": 2.9705093833780163e-05, + "loss": 0.026, + "step": 1564 + }, + { + "epoch": 4.14, + "learning_rate": 2.9691689008042895e-05, + "loss": 0.3336, + "step": 1565 + }, + { + "epoch": 4.14, + "learning_rate": 2.9678284182305634e-05, + "loss": 0.0015, + "step": 1566 + }, + { + "epoch": 4.15, + "learning_rate": 2.9664879356568365e-05, + "loss": 0.0044, + "step": 1567 + }, + { + "epoch": 4.15, + "learning_rate": 2.9651474530831104e-05, + "loss": 0.0035, + "step": 1568 + }, + { + "epoch": 4.15, + "learning_rate": 2.9638069705093836e-05, + "loss": 0.1206, + "step": 1569 + }, + { + "epoch": 4.15, + "learning_rate": 2.9624664879356567e-05, + "loss": 0.1247, + "step": 1570 + }, + { + "epoch": 4.16, + "learning_rate": 2.9611260053619306e-05, + "loss": 0.0011, + "step": 1571 + }, + { + "epoch": 4.16, + "learning_rate": 2.9597855227882038e-05, + "loss": 0.0023, + "step": 1572 + }, + { + "epoch": 4.16, + "learning_rate": 2.9584450402144776e-05, + "loss": 0.0014, + "step": 1573 + }, + { + "epoch": 4.16, + "learning_rate": 2.9571045576407508e-05, + "loss": 0.2967, + "step": 1574 + }, + { + "epoch": 4.17, + "learning_rate": 2.955764075067024e-05, + "loss": 0.0373, + "step": 1575 + }, + { + "epoch": 4.17, + "learning_rate": 2.9544235924932978e-05, + "loss": 0.3351, + "step": 1576 + }, + { + "epoch": 4.17, + "learning_rate": 2.953083109919571e-05, + "loss": 0.0025, + "step": 1577 + }, + { + "epoch": 4.17, + "learning_rate": 2.951742627345845e-05, + "loss": 0.0025, + "step": 1578 + }, + { + "epoch": 4.18, + "learning_rate": 2.950402144772118e-05, + "loss": 0.0182, + "step": 1579 + }, + { + "epoch": 4.18, + "learning_rate": 2.9490616621983912e-05, + "loss": 0.001, + "step": 1580 + }, + { + "epoch": 4.18, + "learning_rate": 2.947721179624665e-05, + "loss": 0.003, + "step": 1581 + }, + { + "epoch": 4.19, + "learning_rate": 2.9463806970509382e-05, + "loss": 0.0038, + "step": 1582 + }, + { + "epoch": 4.19, + "learning_rate": 2.945040214477212e-05, + "loss": 0.002, + "step": 1583 + }, + { + "epoch": 4.19, + "learning_rate": 2.9436997319034853e-05, + "loss": 0.1688, + "step": 1584 + }, + { + "epoch": 4.19, + "learning_rate": 2.9423592493297584e-05, + "loss": 0.0014, + "step": 1585 + }, + { + "epoch": 4.2, + "learning_rate": 2.9410187667560323e-05, + "loss": 0.2664, + "step": 1586 + }, + { + "epoch": 4.2, + "learning_rate": 2.9396782841823055e-05, + "loss": 0.0012, + "step": 1587 + }, + { + "epoch": 4.2, + "learning_rate": 2.9383378016085793e-05, + "loss": 0.0022, + "step": 1588 + }, + { + "epoch": 4.2, + "learning_rate": 2.9369973190348525e-05, + "loss": 0.0959, + "step": 1589 + }, + { + "epoch": 4.21, + "learning_rate": 2.935656836461126e-05, + "loss": 0.0839, + "step": 1590 + }, + { + "epoch": 4.21, + "learning_rate": 2.9343163538873995e-05, + "loss": 0.7405, + "step": 1591 + }, + { + "epoch": 4.21, + "learning_rate": 2.932975871313673e-05, + "loss": 0.0351, + "step": 1592 + }, + { + "epoch": 4.21, + "learning_rate": 2.9316353887399465e-05, + "loss": 0.0025, + "step": 1593 + }, + { + "epoch": 4.22, + "learning_rate": 2.93029490616622e-05, + "loss": 0.0054, + "step": 1594 + }, + { + "epoch": 4.22, + "learning_rate": 2.9289544235924932e-05, + "loss": 0.0043, + "step": 1595 + }, + { + "epoch": 4.22, + "learning_rate": 2.927613941018767e-05, + "loss": 0.1828, + "step": 1596 + }, + { + "epoch": 4.22, + "learning_rate": 2.9262734584450402e-05, + "loss": 0.0022, + "step": 1597 + }, + { + "epoch": 4.23, + "learning_rate": 2.924932975871314e-05, + "loss": 0.0051, + "step": 1598 + }, + { + "epoch": 4.23, + "learning_rate": 2.9235924932975873e-05, + "loss": 0.0025, + "step": 1599 + }, + { + "epoch": 4.23, + "learning_rate": 2.9222520107238604e-05, + "loss": 0.0018, + "step": 1600 + }, + { + "epoch": 4.24, + "learning_rate": 2.9209115281501343e-05, + "loss": 0.0348, + "step": 1601 + }, + { + "epoch": 4.24, + "learning_rate": 2.9195710455764075e-05, + "loss": 0.207, + "step": 1602 + }, + { + "epoch": 4.24, + "learning_rate": 2.9182305630026813e-05, + "loss": 0.0249, + "step": 1603 + }, + { + "epoch": 4.24, + "learning_rate": 2.9168900804289545e-05, + "loss": 0.0028, + "step": 1604 + }, + { + "epoch": 4.25, + "learning_rate": 2.9155495978552283e-05, + "loss": 0.2604, + "step": 1605 + }, + { + "epoch": 4.25, + "learning_rate": 2.9142091152815015e-05, + "loss": 0.2808, + "step": 1606 + }, + { + "epoch": 4.25, + "learning_rate": 2.9128686327077747e-05, + "loss": 0.0289, + "step": 1607 + }, + { + "epoch": 4.25, + "learning_rate": 2.9115281501340486e-05, + "loss": 0.005, + "step": 1608 + }, + { + "epoch": 4.26, + "learning_rate": 2.9101876675603217e-05, + "loss": 0.7931, + "step": 1609 + }, + { + "epoch": 4.26, + "learning_rate": 2.9088471849865956e-05, + "loss": 0.335, + "step": 1610 + }, + { + "epoch": 4.26, + "learning_rate": 2.9075067024128688e-05, + "loss": 0.2779, + "step": 1611 + }, + { + "epoch": 4.26, + "learning_rate": 2.906166219839142e-05, + "loss": 0.1649, + "step": 1612 + }, + { + "epoch": 4.27, + "learning_rate": 2.9048257372654158e-05, + "loss": 0.0081, + "step": 1613 + }, + { + "epoch": 4.27, + "learning_rate": 2.903485254691689e-05, + "loss": 0.0638, + "step": 1614 + }, + { + "epoch": 4.27, + "learning_rate": 2.9021447721179628e-05, + "loss": 0.016, + "step": 1615 + }, + { + "epoch": 4.28, + "learning_rate": 2.900804289544236e-05, + "loss": 0.0025, + "step": 1616 + }, + { + "epoch": 4.28, + "learning_rate": 2.8994638069705095e-05, + "loss": 0.0249, + "step": 1617 + }, + { + "epoch": 4.28, + "learning_rate": 2.898123324396783e-05, + "loss": 0.0291, + "step": 1618 + }, + { + "epoch": 4.28, + "learning_rate": 2.8967828418230565e-05, + "loss": 0.1773, + "step": 1619 + }, + { + "epoch": 4.29, + "learning_rate": 2.89544235924933e-05, + "loss": 0.3452, + "step": 1620 + }, + { + "epoch": 4.29, + "learning_rate": 2.8941018766756035e-05, + "loss": 0.006, + "step": 1621 + }, + { + "epoch": 4.29, + "learning_rate": 2.8927613941018767e-05, + "loss": 0.0054, + "step": 1622 + }, + { + "epoch": 4.29, + "learning_rate": 2.8914209115281506e-05, + "loss": 0.1852, + "step": 1623 + }, + { + "epoch": 4.3, + "learning_rate": 2.8900804289544237e-05, + "loss": 0.4424, + "step": 1624 + }, + { + "epoch": 4.3, + "learning_rate": 2.8887399463806976e-05, + "loss": 0.0063, + "step": 1625 + }, + { + "epoch": 4.3, + "learning_rate": 2.8873994638069708e-05, + "loss": 0.43, + "step": 1626 + }, + { + "epoch": 4.3, + "learning_rate": 2.886058981233244e-05, + "loss": 0.2283, + "step": 1627 + }, + { + "epoch": 4.31, + "learning_rate": 2.8847184986595178e-05, + "loss": 0.0519, + "step": 1628 + }, + { + "epoch": 4.31, + "learning_rate": 2.883378016085791e-05, + "loss": 0.1797, + "step": 1629 + }, + { + "epoch": 4.31, + "learning_rate": 2.8820375335120648e-05, + "loss": 0.2569, + "step": 1630 + }, + { + "epoch": 4.31, + "learning_rate": 2.880697050938338e-05, + "loss": 0.0024, + "step": 1631 + }, + { + "epoch": 4.32, + "learning_rate": 2.8793565683646112e-05, + "loss": 0.1727, + "step": 1632 + }, + { + "epoch": 4.32, + "learning_rate": 2.878016085790885e-05, + "loss": 0.0091, + "step": 1633 + }, + { + "epoch": 4.32, + "learning_rate": 2.8766756032171582e-05, + "loss": 0.2002, + "step": 1634 + }, + { + "epoch": 4.33, + "learning_rate": 2.875335120643432e-05, + "loss": 0.0217, + "step": 1635 + }, + { + "epoch": 4.33, + "learning_rate": 2.8739946380697052e-05, + "loss": 0.2163, + "step": 1636 + }, + { + "epoch": 4.33, + "learning_rate": 2.8726541554959784e-05, + "loss": 0.0065, + "step": 1637 + }, + { + "epoch": 4.33, + "learning_rate": 2.8713136729222522e-05, + "loss": 0.1567, + "step": 1638 + }, + { + "epoch": 4.34, + "learning_rate": 2.8699731903485254e-05, + "loss": 0.1775, + "step": 1639 + }, + { + "epoch": 4.34, + "learning_rate": 2.8686327077747993e-05, + "loss": 0.0116, + "step": 1640 + }, + { + "epoch": 4.34, + "learning_rate": 2.8672922252010724e-05, + "loss": 0.0114, + "step": 1641 + }, + { + "epoch": 4.34, + "learning_rate": 2.8659517426273456e-05, + "loss": 0.0264, + "step": 1642 + }, + { + "epoch": 4.35, + "learning_rate": 2.8646112600536195e-05, + "loss": 0.0172, + "step": 1643 + }, + { + "epoch": 4.35, + "learning_rate": 2.8632707774798926e-05, + "loss": 0.187, + "step": 1644 + }, + { + "epoch": 4.35, + "learning_rate": 2.8619302949061665e-05, + "loss": 0.009, + "step": 1645 + }, + { + "epoch": 4.35, + "learning_rate": 2.8605898123324397e-05, + "loss": 0.014, + "step": 1646 + }, + { + "epoch": 4.36, + "learning_rate": 2.8592493297587132e-05, + "loss": 0.1643, + "step": 1647 + }, + { + "epoch": 4.36, + "learning_rate": 2.8579088471849867e-05, + "loss": 0.2763, + "step": 1648 + }, + { + "epoch": 4.36, + "learning_rate": 2.8565683646112602e-05, + "loss": 0.0641, + "step": 1649 + }, + { + "epoch": 4.37, + "learning_rate": 2.8552278820375337e-05, + "loss": 0.6128, + "step": 1650 + }, + { + "epoch": 4.37, + "learning_rate": 2.8538873994638072e-05, + "loss": 0.0229, + "step": 1651 + }, + { + "epoch": 4.37, + "learning_rate": 2.8525469168900804e-05, + "loss": 0.0344, + "step": 1652 + }, + { + "epoch": 4.37, + "learning_rate": 2.8512064343163543e-05, + "loss": 0.018, + "step": 1653 + }, + { + "epoch": 4.38, + "learning_rate": 2.8498659517426274e-05, + "loss": 0.191, + "step": 1654 + }, + { + "epoch": 4.38, + "learning_rate": 2.8485254691689013e-05, + "loss": 0.0397, + "step": 1655 + }, + { + "epoch": 4.38, + "learning_rate": 2.8471849865951745e-05, + "loss": 0.0029, + "step": 1656 + }, + { + "epoch": 4.38, + "learning_rate": 2.8458445040214476e-05, + "loss": 0.0034, + "step": 1657 + }, + { + "epoch": 4.39, + "learning_rate": 2.8445040214477215e-05, + "loss": 0.0031, + "step": 1658 + }, + { + "epoch": 4.39, + "learning_rate": 2.8431635388739947e-05, + "loss": 0.4272, + "step": 1659 + }, + { + "epoch": 4.39, + "learning_rate": 2.8418230563002685e-05, + "loss": 0.0042, + "step": 1660 + }, + { + "epoch": 4.39, + "learning_rate": 2.8404825737265417e-05, + "loss": 0.0224, + "step": 1661 + }, + { + "epoch": 4.4, + "learning_rate": 2.839142091152815e-05, + "loss": 0.1021, + "step": 1662 + }, + { + "epoch": 4.4, + "learning_rate": 2.8378016085790887e-05, + "loss": 0.0076, + "step": 1663 + }, + { + "epoch": 4.4, + "learning_rate": 2.836461126005362e-05, + "loss": 0.084, + "step": 1664 + }, + { + "epoch": 4.4, + "learning_rate": 2.8351206434316357e-05, + "loss": 0.0321, + "step": 1665 + }, + { + "epoch": 4.41, + "learning_rate": 2.833780160857909e-05, + "loss": 0.1369, + "step": 1666 + }, + { + "epoch": 4.41, + "learning_rate": 2.832439678284182e-05, + "loss": 0.018, + "step": 1667 + }, + { + "epoch": 4.41, + "learning_rate": 2.831099195710456e-05, + "loss": 0.1886, + "step": 1668 + }, + { + "epoch": 4.42, + "learning_rate": 2.829758713136729e-05, + "loss": 0.0016, + "step": 1669 + }, + { + "epoch": 4.42, + "learning_rate": 2.828418230563003e-05, + "loss": 0.0031, + "step": 1670 + }, + { + "epoch": 4.42, + "learning_rate": 2.827077747989276e-05, + "loss": 0.0043, + "step": 1671 + }, + { + "epoch": 4.42, + "learning_rate": 2.8257372654155497e-05, + "loss": 0.1202, + "step": 1672 + }, + { + "epoch": 4.43, + "learning_rate": 2.8243967828418232e-05, + "loss": 0.1409, + "step": 1673 + }, + { + "epoch": 4.43, + "learning_rate": 2.8230563002680967e-05, + "loss": 0.0821, + "step": 1674 + }, + { + "epoch": 4.43, + "learning_rate": 2.8217158176943702e-05, + "loss": 0.0468, + "step": 1675 + }, + { + "epoch": 4.43, + "learning_rate": 2.8203753351206437e-05, + "loss": 0.0559, + "step": 1676 + }, + { + "epoch": 4.44, + "learning_rate": 2.819034852546917e-05, + "loss": 0.0192, + "step": 1677 + }, + { + "epoch": 4.44, + "learning_rate": 2.8176943699731907e-05, + "loss": 0.0024, + "step": 1678 + }, + { + "epoch": 4.44, + "learning_rate": 2.816353887399464e-05, + "loss": 0.0021, + "step": 1679 + }, + { + "epoch": 4.44, + "learning_rate": 2.8150134048257378e-05, + "loss": 0.0139, + "step": 1680 + }, + { + "epoch": 4.45, + "learning_rate": 2.813672922252011e-05, + "loss": 0.0042, + "step": 1681 + }, + { + "epoch": 4.45, + "learning_rate": 2.812332439678284e-05, + "loss": 0.1666, + "step": 1682 + }, + { + "epoch": 4.45, + "learning_rate": 2.810991957104558e-05, + "loss": 0.5925, + "step": 1683 + }, + { + "epoch": 4.46, + "learning_rate": 2.809651474530831e-05, + "loss": 0.1689, + "step": 1684 + }, + { + "epoch": 4.46, + "learning_rate": 2.808310991957105e-05, + "loss": 0.0053, + "step": 1685 + }, + { + "epoch": 4.46, + "learning_rate": 2.806970509383378e-05, + "loss": 0.0019, + "step": 1686 + }, + { + "epoch": 4.46, + "learning_rate": 2.8056300268096513e-05, + "loss": 0.0632, + "step": 1687 + }, + { + "epoch": 4.47, + "learning_rate": 2.8042895442359252e-05, + "loss": 0.0115, + "step": 1688 + }, + { + "epoch": 4.47, + "learning_rate": 2.8029490616621984e-05, + "loss": 0.002, + "step": 1689 + }, + { + "epoch": 4.47, + "learning_rate": 2.8016085790884722e-05, + "loss": 0.0021, + "step": 1690 + }, + { + "epoch": 4.47, + "learning_rate": 2.8002680965147454e-05, + "loss": 0.0079, + "step": 1691 + }, + { + "epoch": 4.48, + "learning_rate": 2.7989276139410186e-05, + "loss": 0.0016, + "step": 1692 + }, + { + "epoch": 4.48, + "learning_rate": 2.7975871313672924e-05, + "loss": 0.1824, + "step": 1693 + }, + { + "epoch": 4.48, + "learning_rate": 2.7962466487935656e-05, + "loss": 0.1025, + "step": 1694 + }, + { + "epoch": 4.48, + "learning_rate": 2.7949061662198394e-05, + "loss": 0.4274, + "step": 1695 + }, + { + "epoch": 4.49, + "learning_rate": 2.7935656836461126e-05, + "loss": 0.0834, + "step": 1696 + }, + { + "epoch": 4.49, + "learning_rate": 2.7922252010723858e-05, + "loss": 0.6412, + "step": 1697 + }, + { + "epoch": 4.49, + "learning_rate": 2.7908847184986596e-05, + "loss": 0.3051, + "step": 1698 + }, + { + "epoch": 4.49, + "learning_rate": 2.7895442359249328e-05, + "loss": 0.0909, + "step": 1699 + }, + { + "epoch": 4.5, + "learning_rate": 2.7882037533512067e-05, + "loss": 0.2655, + "step": 1700 + }, + { + "epoch": 4.5, + "learning_rate": 2.78686327077748e-05, + "loss": 0.305, + "step": 1701 + }, + { + "epoch": 4.5, + "learning_rate": 2.7855227882037534e-05, + "loss": 0.2733, + "step": 1702 + }, + { + "epoch": 4.51, + "learning_rate": 2.784182305630027e-05, + "loss": 0.0021, + "step": 1703 + }, + { + "epoch": 4.51, + "learning_rate": 2.7828418230563004e-05, + "loss": 0.0072, + "step": 1704 + }, + { + "epoch": 4.51, + "learning_rate": 2.781501340482574e-05, + "loss": 0.0027, + "step": 1705 + }, + { + "epoch": 4.51, + "learning_rate": 2.7801608579088474e-05, + "loss": 0.184, + "step": 1706 + }, + { + "epoch": 4.52, + "learning_rate": 2.7788203753351206e-05, + "loss": 0.0143, + "step": 1707 + }, + { + "epoch": 4.52, + "learning_rate": 2.7774798927613944e-05, + "loss": 0.0297, + "step": 1708 + }, + { + "epoch": 4.52, + "learning_rate": 2.7761394101876676e-05, + "loss": 0.0739, + "step": 1709 + }, + { + "epoch": 4.52, + "learning_rate": 2.7747989276139415e-05, + "loss": 0.0188, + "step": 1710 + }, + { + "epoch": 4.53, + "learning_rate": 2.7734584450402146e-05, + "loss": 0.2487, + "step": 1711 + }, + { + "epoch": 4.53, + "learning_rate": 2.7721179624664878e-05, + "loss": 0.0222, + "step": 1712 + }, + { + "epoch": 4.53, + "learning_rate": 2.7707774798927617e-05, + "loss": 0.0041, + "step": 1713 + }, + { + "epoch": 4.53, + "learning_rate": 2.769436997319035e-05, + "loss": 0.0164, + "step": 1714 + }, + { + "epoch": 4.54, + "learning_rate": 2.7680965147453087e-05, + "loss": 0.0985, + "step": 1715 + }, + { + "epoch": 4.54, + "learning_rate": 2.766756032171582e-05, + "loss": 0.0067, + "step": 1716 + }, + { + "epoch": 4.54, + "learning_rate": 2.765415549597855e-05, + "loss": 0.3304, + "step": 1717 + }, + { + "epoch": 4.54, + "learning_rate": 2.764075067024129e-05, + "loss": 0.006, + "step": 1718 + }, + { + "epoch": 4.55, + "learning_rate": 2.762734584450402e-05, + "loss": 0.0142, + "step": 1719 + }, + { + "epoch": 4.55, + "learning_rate": 2.761394101876676e-05, + "loss": 0.2205, + "step": 1720 + }, + { + "epoch": 4.55, + "learning_rate": 2.760053619302949e-05, + "loss": 0.298, + "step": 1721 + }, + { + "epoch": 4.56, + "learning_rate": 2.7587131367292223e-05, + "loss": 0.0041, + "step": 1722 + }, + { + "epoch": 4.56, + "learning_rate": 2.757372654155496e-05, + "loss": 0.0018, + "step": 1723 + }, + { + "epoch": 4.56, + "learning_rate": 2.7560321715817693e-05, + "loss": 0.0185, + "step": 1724 + }, + { + "epoch": 4.56, + "learning_rate": 2.754691689008043e-05, + "loss": 0.0042, + "step": 1725 + }, + { + "epoch": 4.57, + "learning_rate": 2.7533512064343163e-05, + "loss": 0.036, + "step": 1726 + }, + { + "epoch": 4.57, + "learning_rate": 2.7520107238605898e-05, + "loss": 0.2593, + "step": 1727 + }, + { + "epoch": 4.57, + "learning_rate": 2.7506702412868633e-05, + "loss": 0.0062, + "step": 1728 + }, + { + "epoch": 4.57, + "learning_rate": 2.749329758713137e-05, + "loss": 0.1759, + "step": 1729 + }, + { + "epoch": 4.58, + "learning_rate": 2.7479892761394104e-05, + "loss": 0.0202, + "step": 1730 + }, + { + "epoch": 4.58, + "learning_rate": 2.746648793565684e-05, + "loss": 0.2156, + "step": 1731 + }, + { + "epoch": 4.58, + "learning_rate": 2.7453083109919574e-05, + "loss": 0.4112, + "step": 1732 + }, + { + "epoch": 4.58, + "learning_rate": 2.743967828418231e-05, + "loss": 0.0037, + "step": 1733 + }, + { + "epoch": 4.59, + "learning_rate": 2.742627345844504e-05, + "loss": 0.0186, + "step": 1734 + }, + { + "epoch": 4.59, + "learning_rate": 2.741286863270778e-05, + "loss": 0.0117, + "step": 1735 + }, + { + "epoch": 4.59, + "learning_rate": 2.739946380697051e-05, + "loss": 0.0039, + "step": 1736 + }, + { + "epoch": 4.6, + "learning_rate": 2.738605898123325e-05, + "loss": 0.1185, + "step": 1737 + }, + { + "epoch": 4.6, + "learning_rate": 2.737265415549598e-05, + "loss": 0.0276, + "step": 1738 + }, + { + "epoch": 4.6, + "learning_rate": 2.7359249329758713e-05, + "loss": 0.0041, + "step": 1739 + }, + { + "epoch": 4.6, + "learning_rate": 2.734584450402145e-05, + "loss": 0.0133, + "step": 1740 + }, + { + "epoch": 4.61, + "learning_rate": 2.7332439678284183e-05, + "loss": 0.1042, + "step": 1741 + }, + { + "epoch": 4.61, + "learning_rate": 2.7319034852546922e-05, + "loss": 0.0023, + "step": 1742 + }, + { + "epoch": 4.61, + "learning_rate": 2.7305630026809654e-05, + "loss": 0.1586, + "step": 1743 + }, + { + "epoch": 4.61, + "learning_rate": 2.7292225201072385e-05, + "loss": 0.0258, + "step": 1744 + }, + { + "epoch": 4.62, + "learning_rate": 2.7278820375335124e-05, + "loss": 0.1119, + "step": 1745 + }, + { + "epoch": 4.62, + "learning_rate": 2.7265415549597856e-05, + "loss": 0.1115, + "step": 1746 + }, + { + "epoch": 4.62, + "learning_rate": 2.7252010723860594e-05, + "loss": 0.4607, + "step": 1747 + }, + { + "epoch": 4.62, + "learning_rate": 2.7238605898123326e-05, + "loss": 0.0296, + "step": 1748 + }, + { + "epoch": 4.63, + "learning_rate": 2.7225201072386058e-05, + "loss": 0.0277, + "step": 1749 + }, + { + "epoch": 4.63, + "learning_rate": 2.7211796246648796e-05, + "loss": 0.0777, + "step": 1750 + }, + { + "epoch": 4.63, + "learning_rate": 2.7198391420911528e-05, + "loss": 0.0031, + "step": 1751 + }, + { + "epoch": 4.63, + "learning_rate": 2.7184986595174266e-05, + "loss": 0.2238, + "step": 1752 + }, + { + "epoch": 4.64, + "learning_rate": 2.7171581769436998e-05, + "loss": 0.0409, + "step": 1753 + }, + { + "epoch": 4.64, + "learning_rate": 2.715817694369973e-05, + "loss": 0.0032, + "step": 1754 + }, + { + "epoch": 4.64, + "learning_rate": 2.714477211796247e-05, + "loss": 0.0113, + "step": 1755 + }, + { + "epoch": 4.65, + "learning_rate": 2.71313672922252e-05, + "loss": 0.0204, + "step": 1756 + }, + { + "epoch": 4.65, + "learning_rate": 2.711796246648794e-05, + "loss": 0.0022, + "step": 1757 + }, + { + "epoch": 4.65, + "learning_rate": 2.710455764075067e-05, + "loss": 0.0018, + "step": 1758 + }, + { + "epoch": 4.65, + "learning_rate": 2.7091152815013406e-05, + "loss": 0.263, + "step": 1759 + }, + { + "epoch": 4.66, + "learning_rate": 2.707774798927614e-05, + "loss": 0.0109, + "step": 1760 + }, + { + "epoch": 4.66, + "learning_rate": 2.7064343163538876e-05, + "loss": 0.0653, + "step": 1761 + }, + { + "epoch": 4.66, + "learning_rate": 2.705093833780161e-05, + "loss": 0.0116, + "step": 1762 + }, + { + "epoch": 4.66, + "learning_rate": 2.7037533512064346e-05, + "loss": 0.0063, + "step": 1763 + }, + { + "epoch": 4.67, + "learning_rate": 2.7024128686327078e-05, + "loss": 0.0034, + "step": 1764 + }, + { + "epoch": 4.67, + "learning_rate": 2.7010723860589816e-05, + "loss": 0.0395, + "step": 1765 + }, + { + "epoch": 4.67, + "learning_rate": 2.6997319034852548e-05, + "loss": 0.0014, + "step": 1766 + }, + { + "epoch": 4.67, + "learning_rate": 2.6983914209115287e-05, + "loss": 0.0057, + "step": 1767 + }, + { + "epoch": 4.68, + "learning_rate": 2.697050938337802e-05, + "loss": 0.0018, + "step": 1768 + }, + { + "epoch": 4.68, + "learning_rate": 2.695710455764075e-05, + "loss": 0.012, + "step": 1769 + }, + { + "epoch": 4.68, + "learning_rate": 2.694369973190349e-05, + "loss": 0.0017, + "step": 1770 + }, + { + "epoch": 4.69, + "learning_rate": 2.693029490616622e-05, + "loss": 0.0654, + "step": 1771 + }, + { + "epoch": 4.69, + "learning_rate": 2.691689008042896e-05, + "loss": 0.8002, + "step": 1772 + }, + { + "epoch": 4.69, + "learning_rate": 2.690348525469169e-05, + "loss": 0.0035, + "step": 1773 + }, + { + "epoch": 4.69, + "learning_rate": 2.6890080428954422e-05, + "loss": 0.0051, + "step": 1774 + }, + { + "epoch": 4.7, + "learning_rate": 2.687667560321716e-05, + "loss": 0.0031, + "step": 1775 + }, + { + "epoch": 4.7, + "learning_rate": 2.6863270777479893e-05, + "loss": 0.0142, + "step": 1776 + }, + { + "epoch": 4.7, + "learning_rate": 2.684986595174263e-05, + "loss": 0.0009, + "step": 1777 + }, + { + "epoch": 4.7, + "learning_rate": 2.6836461126005363e-05, + "loss": 0.0015, + "step": 1778 + }, + { + "epoch": 4.71, + "learning_rate": 2.6823056300268095e-05, + "loss": 0.3481, + "step": 1779 + }, + { + "epoch": 4.71, + "learning_rate": 2.6809651474530833e-05, + "loss": 0.3095, + "step": 1780 + }, + { + "epoch": 4.71, + "learning_rate": 2.6796246648793565e-05, + "loss": 0.2567, + "step": 1781 + }, + { + "epoch": 4.71, + "learning_rate": 2.6782841823056303e-05, + "loss": 0.0037, + "step": 1782 + }, + { + "epoch": 4.72, + "learning_rate": 2.6769436997319035e-05, + "loss": 0.001, + "step": 1783 + }, + { + "epoch": 4.72, + "learning_rate": 2.675603217158177e-05, + "loss": 0.0065, + "step": 1784 + }, + { + "epoch": 4.72, + "learning_rate": 2.6742627345844505e-05, + "loss": 0.0029, + "step": 1785 + }, + { + "epoch": 4.72, + "learning_rate": 2.672922252010724e-05, + "loss": 0.6096, + "step": 1786 + }, + { + "epoch": 4.73, + "learning_rate": 2.6715817694369976e-05, + "loss": 0.0127, + "step": 1787 + }, + { + "epoch": 4.73, + "learning_rate": 2.670241286863271e-05, + "loss": 0.0031, + "step": 1788 + }, + { + "epoch": 4.73, + "learning_rate": 2.6689008042895443e-05, + "loss": 0.2463, + "step": 1789 + }, + { + "epoch": 4.74, + "learning_rate": 2.667560321715818e-05, + "loss": 0.1022, + "step": 1790 + }, + { + "epoch": 4.74, + "learning_rate": 2.6662198391420913e-05, + "loss": 0.002, + "step": 1791 + }, + { + "epoch": 4.74, + "learning_rate": 2.664879356568365e-05, + "loss": 0.1576, + "step": 1792 + }, + { + "epoch": 4.74, + "learning_rate": 2.6635388739946383e-05, + "loss": 0.1099, + "step": 1793 + }, + { + "epoch": 4.75, + "learning_rate": 2.6621983914209115e-05, + "loss": 0.1482, + "step": 1794 + }, + { + "epoch": 4.75, + "learning_rate": 2.6608579088471853e-05, + "loss": 0.0007, + "step": 1795 + }, + { + "epoch": 4.75, + "learning_rate": 2.6595174262734585e-05, + "loss": 0.0009, + "step": 1796 + }, + { + "epoch": 4.75, + "learning_rate": 2.6581769436997324e-05, + "loss": 0.005, + "step": 1797 + }, + { + "epoch": 4.76, + "learning_rate": 2.6568364611260055e-05, + "loss": 0.1808, + "step": 1798 + }, + { + "epoch": 4.76, + "learning_rate": 2.6554959785522787e-05, + "loss": 0.0351, + "step": 1799 + }, + { + "epoch": 4.76, + "learning_rate": 2.6541554959785526e-05, + "loss": 0.2555, + "step": 1800 + }, + { + "epoch": 4.76, + "learning_rate": 2.6528150134048257e-05, + "loss": 0.2236, + "step": 1801 + }, + { + "epoch": 4.77, + "learning_rate": 2.6514745308310996e-05, + "loss": 0.3208, + "step": 1802 + }, + { + "epoch": 4.77, + "learning_rate": 2.6501340482573728e-05, + "loss": 0.0202, + "step": 1803 + }, + { + "epoch": 4.77, + "learning_rate": 2.648793565683646e-05, + "loss": 0.0033, + "step": 1804 + }, + { + "epoch": 4.78, + "learning_rate": 2.6474530831099198e-05, + "loss": 0.001, + "step": 1805 + }, + { + "epoch": 4.78, + "learning_rate": 2.646112600536193e-05, + "loss": 0.0019, + "step": 1806 + }, + { + "epoch": 4.78, + "learning_rate": 2.6447721179624668e-05, + "loss": 0.0027, + "step": 1807 + }, + { + "epoch": 4.78, + "learning_rate": 2.64343163538874e-05, + "loss": 0.0051, + "step": 1808 + }, + { + "epoch": 4.79, + "learning_rate": 2.642091152815013e-05, + "loss": 0.1994, + "step": 1809 + }, + { + "epoch": 4.79, + "learning_rate": 2.640750670241287e-05, + "loss": 0.0372, + "step": 1810 + }, + { + "epoch": 4.79, + "learning_rate": 2.6394101876675602e-05, + "loss": 0.0678, + "step": 1811 + }, + { + "epoch": 4.79, + "learning_rate": 2.638069705093834e-05, + "loss": 0.0252, + "step": 1812 + }, + { + "epoch": 4.8, + "learning_rate": 2.6367292225201072e-05, + "loss": 0.0065, + "step": 1813 + }, + { + "epoch": 4.8, + "learning_rate": 2.6353887399463807e-05, + "loss": 0.0045, + "step": 1814 + }, + { + "epoch": 4.8, + "learning_rate": 2.6340482573726542e-05, + "loss": 0.0037, + "step": 1815 + }, + { + "epoch": 4.8, + "learning_rate": 2.6327077747989277e-05, + "loss": 0.0251, + "step": 1816 + }, + { + "epoch": 4.81, + "learning_rate": 2.6313672922252013e-05, + "loss": 0.4196, + "step": 1817 + }, + { + "epoch": 4.81, + "learning_rate": 2.6300268096514748e-05, + "loss": 0.0071, + "step": 1818 + }, + { + "epoch": 4.81, + "learning_rate": 2.628686327077748e-05, + "loss": 0.0787, + "step": 1819 + }, + { + "epoch": 4.81, + "learning_rate": 2.6273458445040218e-05, + "loss": 0.0145, + "step": 1820 + }, + { + "epoch": 4.82, + "learning_rate": 2.626005361930295e-05, + "loss": 0.009, + "step": 1821 + }, + { + "epoch": 4.82, + "learning_rate": 2.6246648793565688e-05, + "loss": 0.0027, + "step": 1822 + }, + { + "epoch": 4.82, + "learning_rate": 2.623324396782842e-05, + "loss": 0.0017, + "step": 1823 + }, + { + "epoch": 4.83, + "learning_rate": 2.6219839142091152e-05, + "loss": 0.4824, + "step": 1824 + }, + { + "epoch": 4.83, + "learning_rate": 2.620643431635389e-05, + "loss": 0.0022, + "step": 1825 + }, + { + "epoch": 4.83, + "learning_rate": 2.6193029490616622e-05, + "loss": 0.3223, + "step": 1826 + }, + { + "epoch": 4.83, + "learning_rate": 2.617962466487936e-05, + "loss": 0.2195, + "step": 1827 + }, + { + "epoch": 4.84, + "learning_rate": 2.6166219839142092e-05, + "loss": 0.0013, + "step": 1828 + }, + { + "epoch": 4.84, + "learning_rate": 2.6152815013404824e-05, + "loss": 0.0343, + "step": 1829 + }, + { + "epoch": 4.84, + "learning_rate": 2.6139410187667563e-05, + "loss": 0.0022, + "step": 1830 + }, + { + "epoch": 4.84, + "learning_rate": 2.6126005361930294e-05, + "loss": 0.0022, + "step": 1831 + }, + { + "epoch": 4.85, + "learning_rate": 2.6112600536193033e-05, + "loss": 0.4116, + "step": 1832 + }, + { + "epoch": 4.85, + "learning_rate": 2.6099195710455765e-05, + "loss": 0.0048, + "step": 1833 + }, + { + "epoch": 4.85, + "learning_rate": 2.6085790884718496e-05, + "loss": 0.5819, + "step": 1834 + }, + { + "epoch": 4.85, + "learning_rate": 2.6072386058981235e-05, + "loss": 0.1985, + "step": 1835 + }, + { + "epoch": 4.86, + "learning_rate": 2.6058981233243967e-05, + "loss": 0.0989, + "step": 1836 + }, + { + "epoch": 4.86, + "learning_rate": 2.6045576407506705e-05, + "loss": 0.341, + "step": 1837 + }, + { + "epoch": 4.86, + "learning_rate": 2.6032171581769437e-05, + "loss": 0.0044, + "step": 1838 + }, + { + "epoch": 4.87, + "learning_rate": 2.601876675603217e-05, + "loss": 0.004, + "step": 1839 + }, + { + "epoch": 4.87, + "learning_rate": 2.6005361930294907e-05, + "loss": 0.2858, + "step": 1840 + }, + { + "epoch": 4.87, + "learning_rate": 2.599195710455764e-05, + "loss": 0.0009, + "step": 1841 + }, + { + "epoch": 4.87, + "learning_rate": 2.5978552278820377e-05, + "loss": 0.0042, + "step": 1842 + }, + { + "epoch": 4.88, + "learning_rate": 2.596514745308311e-05, + "loss": 0.0045, + "step": 1843 + }, + { + "epoch": 4.88, + "learning_rate": 2.5951742627345844e-05, + "loss": 0.0144, + "step": 1844 + }, + { + "epoch": 4.88, + "learning_rate": 2.593833780160858e-05, + "loss": 0.0084, + "step": 1845 + }, + { + "epoch": 4.88, + "learning_rate": 2.5924932975871314e-05, + "loss": 0.4276, + "step": 1846 + }, + { + "epoch": 4.89, + "learning_rate": 2.591152815013405e-05, + "loss": 0.0122, + "step": 1847 + }, + { + "epoch": 4.89, + "learning_rate": 2.5898123324396785e-05, + "loss": 0.0776, + "step": 1848 + }, + { + "epoch": 4.89, + "learning_rate": 2.5884718498659516e-05, + "loss": 0.0117, + "step": 1849 + }, + { + "epoch": 4.89, + "learning_rate": 2.5871313672922255e-05, + "loss": 0.2809, + "step": 1850 + }, + { + "epoch": 4.9, + "learning_rate": 2.5857908847184987e-05, + "loss": 0.0413, + "step": 1851 + }, + { + "epoch": 4.9, + "learning_rate": 2.5844504021447725e-05, + "loss": 0.0187, + "step": 1852 + }, + { + "epoch": 4.9, + "learning_rate": 2.5831099195710457e-05, + "loss": 0.452, + "step": 1853 + }, + { + "epoch": 4.9, + "learning_rate": 2.5817694369973195e-05, + "loss": 0.0206, + "step": 1854 + }, + { + "epoch": 4.91, + "learning_rate": 2.5804289544235927e-05, + "loss": 0.1639, + "step": 1855 + }, + { + "epoch": 4.91, + "learning_rate": 2.579088471849866e-05, + "loss": 0.1865, + "step": 1856 + }, + { + "epoch": 4.91, + "learning_rate": 2.5777479892761398e-05, + "loss": 0.0022, + "step": 1857 + }, + { + "epoch": 4.92, + "learning_rate": 2.576407506702413e-05, + "loss": 0.1167, + "step": 1858 + }, + { + "epoch": 4.92, + "learning_rate": 2.5750670241286868e-05, + "loss": 0.4013, + "step": 1859 + }, + { + "epoch": 4.92, + "learning_rate": 2.57372654155496e-05, + "loss": 0.2355, + "step": 1860 + }, + { + "epoch": 4.92, + "learning_rate": 2.572386058981233e-05, + "loss": 0.0076, + "step": 1861 + }, + { + "epoch": 4.93, + "learning_rate": 2.571045576407507e-05, + "loss": 0.1612, + "step": 1862 + }, + { + "epoch": 4.93, + "learning_rate": 2.56970509383378e-05, + "loss": 0.0047, + "step": 1863 + }, + { + "epoch": 4.93, + "learning_rate": 2.568364611260054e-05, + "loss": 0.1511, + "step": 1864 + }, + { + "epoch": 4.93, + "learning_rate": 2.5670241286863272e-05, + "loss": 0.011, + "step": 1865 + }, + { + "epoch": 4.94, + "learning_rate": 2.5656836461126004e-05, + "loss": 0.1761, + "step": 1866 + }, + { + "epoch": 4.94, + "learning_rate": 2.5643431635388742e-05, + "loss": 0.004, + "step": 1867 + }, + { + "epoch": 4.94, + "learning_rate": 2.5630026809651474e-05, + "loss": 0.0036, + "step": 1868 + }, + { + "epoch": 4.94, + "learning_rate": 2.5616621983914212e-05, + "loss": 0.4345, + "step": 1869 + }, + { + "epoch": 4.95, + "learning_rate": 2.5603217158176944e-05, + "loss": 0.0034, + "step": 1870 + }, + { + "epoch": 4.95, + "learning_rate": 2.558981233243968e-05, + "loss": 0.1269, + "step": 1871 + }, + { + "epoch": 4.95, + "learning_rate": 2.5576407506702414e-05, + "loss": 0.183, + "step": 1872 + }, + { + "epoch": 4.96, + "learning_rate": 2.556300268096515e-05, + "loss": 0.008, + "step": 1873 + }, + { + "epoch": 4.96, + "learning_rate": 2.5549597855227885e-05, + "loss": 0.0035, + "step": 1874 + }, + { + "epoch": 4.96, + "learning_rate": 2.553619302949062e-05, + "loss": 0.0133, + "step": 1875 + }, + { + "epoch": 4.96, + "learning_rate": 2.552278820375335e-05, + "loss": 0.2156, + "step": 1876 + }, + { + "epoch": 4.97, + "learning_rate": 2.550938337801609e-05, + "loss": 0.0043, + "step": 1877 + }, + { + "epoch": 4.97, + "learning_rate": 2.549597855227882e-05, + "loss": 0.2614, + "step": 1878 + }, + { + "epoch": 4.97, + "learning_rate": 2.548257372654156e-05, + "loss": 0.0208, + "step": 1879 + }, + { + "epoch": 4.97, + "learning_rate": 2.5469168900804292e-05, + "loss": 0.0228, + "step": 1880 + }, + { + "epoch": 4.98, + "learning_rate": 2.5455764075067024e-05, + "loss": 0.0105, + "step": 1881 + }, + { + "epoch": 4.98, + "learning_rate": 2.5442359249329762e-05, + "loss": 0.0108, + "step": 1882 + }, + { + "epoch": 4.98, + "learning_rate": 2.5428954423592494e-05, + "loss": 0.3828, + "step": 1883 + }, + { + "epoch": 4.98, + "learning_rate": 2.5415549597855232e-05, + "loss": 0.0093, + "step": 1884 + }, + { + "epoch": 4.99, + "learning_rate": 2.5402144772117964e-05, + "loss": 0.0231, + "step": 1885 + }, + { + "epoch": 4.99, + "learning_rate": 2.5388739946380696e-05, + "loss": 0.0082, + "step": 1886 + }, + { + "epoch": 4.99, + "learning_rate": 2.5375335120643434e-05, + "loss": 0.1796, + "step": 1887 + }, + { + "epoch": 4.99, + "learning_rate": 2.5361930294906166e-05, + "loss": 0.0753, + "step": 1888 + }, + { + "epoch": 5.0, + "learning_rate": 2.5348525469168905e-05, + "loss": 0.0142, + "step": 1889 + }, + { + "epoch": 5.0, + "learning_rate": 2.5335120643431636e-05, + "loss": 0.0047, + "step": 1890 + }, + { + "epoch": 5.0, + "eval_f1": 0.7775974025974025, + "eval_loss": 0.953689694404602, + "eval_runtime": 1.8696, + "eval_samples_per_second": 809.285, + "eval_steps_per_second": 50.814, + "step": 1890 + }, + { + "epoch": 5.0, + "learning_rate": 2.5321715817694368e-05, + "loss": 0.0014, + "step": 1891 + }, + { + "epoch": 5.01, + "learning_rate": 2.5308310991957107e-05, + "loss": 0.0487, + "step": 1892 + }, + { + "epoch": 5.01, + "learning_rate": 2.529490616621984e-05, + "loss": 0.0037, + "step": 1893 + }, + { + "epoch": 5.01, + "learning_rate": 2.5281501340482577e-05, + "loss": 0.0512, + "step": 1894 + }, + { + "epoch": 5.01, + "learning_rate": 2.526809651474531e-05, + "loss": 0.134, + "step": 1895 + }, + { + "epoch": 5.02, + "learning_rate": 2.525469168900804e-05, + "loss": 0.3762, + "step": 1896 + }, + { + "epoch": 5.02, + "learning_rate": 2.524128686327078e-05, + "loss": 0.0011, + "step": 1897 + }, + { + "epoch": 5.02, + "learning_rate": 2.522788203753351e-05, + "loss": 0.0023, + "step": 1898 + }, + { + "epoch": 5.02, + "learning_rate": 2.521447721179625e-05, + "loss": 0.0526, + "step": 1899 + }, + { + "epoch": 5.03, + "learning_rate": 2.520107238605898e-05, + "loss": 0.0553, + "step": 1900 + }, + { + "epoch": 5.03, + "learning_rate": 2.5187667560321716e-05, + "loss": 0.1773, + "step": 1901 + }, + { + "epoch": 5.03, + "learning_rate": 2.517426273458445e-05, + "loss": 0.451, + "step": 1902 + }, + { + "epoch": 5.03, + "learning_rate": 2.5160857908847186e-05, + "loss": 0.0217, + "step": 1903 + }, + { + "epoch": 5.04, + "learning_rate": 2.514745308310992e-05, + "loss": 0.0728, + "step": 1904 + }, + { + "epoch": 5.04, + "learning_rate": 2.5134048257372657e-05, + "loss": 0.0009, + "step": 1905 + }, + { + "epoch": 5.04, + "learning_rate": 2.512064343163539e-05, + "loss": 0.1018, + "step": 1906 + }, + { + "epoch": 5.04, + "learning_rate": 2.5107238605898127e-05, + "loss": 0.0012, + "step": 1907 + }, + { + "epoch": 5.05, + "learning_rate": 2.509383378016086e-05, + "loss": 0.004, + "step": 1908 + }, + { + "epoch": 5.05, + "learning_rate": 2.5080428954423597e-05, + "loss": 0.0012, + "step": 1909 + }, + { + "epoch": 5.05, + "learning_rate": 2.506702412868633e-05, + "loss": 0.0128, + "step": 1910 + }, + { + "epoch": 5.06, + "learning_rate": 2.505361930294906e-05, + "loss": 0.1116, + "step": 1911 + }, + { + "epoch": 5.06, + "learning_rate": 2.50402144772118e-05, + "loss": 0.0011, + "step": 1912 + }, + { + "epoch": 5.06, + "learning_rate": 2.502680965147453e-05, + "loss": 0.0011, + "step": 1913 + }, + { + "epoch": 5.06, + "learning_rate": 2.501340482573727e-05, + "loss": 0.0897, + "step": 1914 + }, + { + "epoch": 5.07, + "learning_rate": 2.5e-05, + "loss": 0.0014, + "step": 1915 + }, + { + "epoch": 5.07, + "learning_rate": 2.4986595174262736e-05, + "loss": 0.0918, + "step": 1916 + }, + { + "epoch": 5.07, + "learning_rate": 2.497319034852547e-05, + "loss": 0.0026, + "step": 1917 + }, + { + "epoch": 5.07, + "learning_rate": 2.4959785522788203e-05, + "loss": 0.0225, + "step": 1918 + }, + { + "epoch": 5.08, + "learning_rate": 2.494638069705094e-05, + "loss": 0.2655, + "step": 1919 + }, + { + "epoch": 5.08, + "learning_rate": 2.4932975871313673e-05, + "loss": 0.0029, + "step": 1920 + }, + { + "epoch": 5.08, + "learning_rate": 2.491957104557641e-05, + "loss": 0.0006, + "step": 1921 + }, + { + "epoch": 5.08, + "learning_rate": 2.4906166219839144e-05, + "loss": 0.0008, + "step": 1922 + }, + { + "epoch": 5.09, + "learning_rate": 2.4892761394101875e-05, + "loss": 0.0012, + "step": 1923 + }, + { + "epoch": 5.09, + "learning_rate": 2.487935656836461e-05, + "loss": 0.0013, + "step": 1924 + }, + { + "epoch": 5.09, + "learning_rate": 2.4865951742627346e-05, + "loss": 0.0524, + "step": 1925 + }, + { + "epoch": 5.1, + "learning_rate": 2.485254691689008e-05, + "loss": 0.0059, + "step": 1926 + }, + { + "epoch": 5.1, + "learning_rate": 2.4839142091152816e-05, + "loss": 0.0026, + "step": 1927 + }, + { + "epoch": 5.1, + "learning_rate": 2.482573726541555e-05, + "loss": 0.0015, + "step": 1928 + }, + { + "epoch": 5.1, + "learning_rate": 2.4812332439678286e-05, + "loss": 0.0073, + "step": 1929 + }, + { + "epoch": 5.11, + "learning_rate": 2.479892761394102e-05, + "loss": 0.0008, + "step": 1930 + }, + { + "epoch": 5.11, + "learning_rate": 2.4785522788203757e-05, + "loss": 0.1519, + "step": 1931 + }, + { + "epoch": 5.11, + "learning_rate": 2.477211796246649e-05, + "loss": 0.008, + "step": 1932 + }, + { + "epoch": 5.11, + "learning_rate": 2.4758713136729223e-05, + "loss": 0.0009, + "step": 1933 + }, + { + "epoch": 5.12, + "learning_rate": 2.474530831099196e-05, + "loss": 0.0299, + "step": 1934 + }, + { + "epoch": 5.12, + "learning_rate": 2.4731903485254694e-05, + "loss": 0.1637, + "step": 1935 + }, + { + "epoch": 5.12, + "learning_rate": 2.471849865951743e-05, + "loss": 0.0369, + "step": 1936 + }, + { + "epoch": 5.12, + "learning_rate": 2.4705093833780164e-05, + "loss": 0.0057, + "step": 1937 + }, + { + "epoch": 5.13, + "learning_rate": 2.4691689008042896e-05, + "loss": 0.0035, + "step": 1938 + }, + { + "epoch": 5.13, + "learning_rate": 2.467828418230563e-05, + "loss": 0.0011, + "step": 1939 + }, + { + "epoch": 5.13, + "learning_rate": 2.4664879356568366e-05, + "loss": 0.0018, + "step": 1940 + }, + { + "epoch": 5.13, + "learning_rate": 2.46514745308311e-05, + "loss": 0.0013, + "step": 1941 + }, + { + "epoch": 5.14, + "learning_rate": 2.4638069705093836e-05, + "loss": 0.0779, + "step": 1942 + }, + { + "epoch": 5.14, + "learning_rate": 2.4624664879356568e-05, + "loss": 0.1762, + "step": 1943 + }, + { + "epoch": 5.14, + "learning_rate": 2.4611260053619303e-05, + "loss": 0.0006, + "step": 1944 + }, + { + "epoch": 5.15, + "learning_rate": 2.4597855227882038e-05, + "loss": 0.0037, + "step": 1945 + }, + { + "epoch": 5.15, + "learning_rate": 2.4584450402144773e-05, + "loss": 0.0005, + "step": 1946 + }, + { + "epoch": 5.15, + "learning_rate": 2.457104557640751e-05, + "loss": 0.0397, + "step": 1947 + }, + { + "epoch": 5.15, + "learning_rate": 2.4557640750670244e-05, + "loss": 0.0082, + "step": 1948 + }, + { + "epoch": 5.16, + "learning_rate": 2.4544235924932975e-05, + "loss": 0.0008, + "step": 1949 + }, + { + "epoch": 5.16, + "learning_rate": 2.453083109919571e-05, + "loss": 0.0219, + "step": 1950 + }, + { + "epoch": 5.16, + "learning_rate": 2.4517426273458446e-05, + "loss": 0.3966, + "step": 1951 + }, + { + "epoch": 5.16, + "learning_rate": 2.450402144772118e-05, + "loss": 0.0011, + "step": 1952 + }, + { + "epoch": 5.17, + "learning_rate": 2.4490616621983916e-05, + "loss": 0.3447, + "step": 1953 + }, + { + "epoch": 5.17, + "learning_rate": 2.4477211796246648e-05, + "loss": 0.0006, + "step": 1954 + }, + { + "epoch": 5.17, + "learning_rate": 2.4463806970509383e-05, + "loss": 0.0011, + "step": 1955 + }, + { + "epoch": 5.17, + "learning_rate": 2.4450402144772118e-05, + "loss": 0.0013, + "step": 1956 + }, + { + "epoch": 5.18, + "learning_rate": 2.4436997319034853e-05, + "loss": 0.1495, + "step": 1957 + }, + { + "epoch": 5.18, + "learning_rate": 2.4423592493297588e-05, + "loss": 0.0005, + "step": 1958 + }, + { + "epoch": 5.18, + "learning_rate": 2.4410187667560323e-05, + "loss": 0.3345, + "step": 1959 + }, + { + "epoch": 5.19, + "learning_rate": 2.439678284182306e-05, + "loss": 0.0048, + "step": 1960 + }, + { + "epoch": 5.19, + "learning_rate": 2.4383378016085793e-05, + "loss": 0.001, + "step": 1961 + }, + { + "epoch": 5.19, + "learning_rate": 2.436997319034853e-05, + "loss": 0.0025, + "step": 1962 + }, + { + "epoch": 5.19, + "learning_rate": 2.4356568364611264e-05, + "loss": 0.3215, + "step": 1963 + }, + { + "epoch": 5.2, + "learning_rate": 2.4343163538873995e-05, + "loss": 0.0197, + "step": 1964 + }, + { + "epoch": 5.2, + "learning_rate": 2.432975871313673e-05, + "loss": 0.0018, + "step": 1965 + }, + { + "epoch": 5.2, + "learning_rate": 2.4316353887399466e-05, + "loss": 0.1012, + "step": 1966 + }, + { + "epoch": 5.2, + "learning_rate": 2.43029490616622e-05, + "loss": 0.0179, + "step": 1967 + }, + { + "epoch": 5.21, + "learning_rate": 2.4289544235924936e-05, + "loss": 0.0032, + "step": 1968 + }, + { + "epoch": 5.21, + "learning_rate": 2.4276139410187668e-05, + "loss": 0.0011, + "step": 1969 + }, + { + "epoch": 5.21, + "learning_rate": 2.4262734584450403e-05, + "loss": 0.4875, + "step": 1970 + }, + { + "epoch": 5.21, + "learning_rate": 2.4249329758713138e-05, + "loss": 0.2791, + "step": 1971 + }, + { + "epoch": 5.22, + "learning_rate": 2.4235924932975873e-05, + "loss": 0.0011, + "step": 1972 + }, + { + "epoch": 5.22, + "learning_rate": 2.4222520107238608e-05, + "loss": 0.0011, + "step": 1973 + }, + { + "epoch": 5.22, + "learning_rate": 2.420911528150134e-05, + "loss": 0.0976, + "step": 1974 + }, + { + "epoch": 5.22, + "learning_rate": 2.4195710455764075e-05, + "loss": 0.3669, + "step": 1975 + }, + { + "epoch": 5.23, + "learning_rate": 2.418230563002681e-05, + "loss": 0.0022, + "step": 1976 + }, + { + "epoch": 5.23, + "learning_rate": 2.4168900804289545e-05, + "loss": 0.0015, + "step": 1977 + }, + { + "epoch": 5.23, + "learning_rate": 2.415549597855228e-05, + "loss": 0.0014, + "step": 1978 + }, + { + "epoch": 5.24, + "learning_rate": 2.4142091152815012e-05, + "loss": 0.0433, + "step": 1979 + }, + { + "epoch": 5.24, + "learning_rate": 2.4128686327077747e-05, + "loss": 0.0019, + "step": 1980 + }, + { + "epoch": 5.24, + "learning_rate": 2.4115281501340483e-05, + "loss": 0.0007, + "step": 1981 + }, + { + "epoch": 5.24, + "learning_rate": 2.4101876675603218e-05, + "loss": 0.0136, + "step": 1982 + }, + { + "epoch": 5.25, + "learning_rate": 2.4088471849865953e-05, + "loss": 0.1744, + "step": 1983 + }, + { + "epoch": 5.25, + "learning_rate": 2.4075067024128688e-05, + "loss": 0.1557, + "step": 1984 + }, + { + "epoch": 5.25, + "learning_rate": 2.4061662198391423e-05, + "loss": 0.1192, + "step": 1985 + }, + { + "epoch": 5.25, + "learning_rate": 2.4048257372654158e-05, + "loss": 0.0406, + "step": 1986 + }, + { + "epoch": 5.26, + "learning_rate": 2.4034852546916893e-05, + "loss": 0.2243, + "step": 1987 + }, + { + "epoch": 5.26, + "learning_rate": 2.402144772117963e-05, + "loss": 0.0021, + "step": 1988 + }, + { + "epoch": 5.26, + "learning_rate": 2.400804289544236e-05, + "loss": 0.002, + "step": 1989 + }, + { + "epoch": 5.26, + "learning_rate": 2.3994638069705095e-05, + "loss": 0.077, + "step": 1990 + }, + { + "epoch": 5.27, + "learning_rate": 2.398123324396783e-05, + "loss": 0.0378, + "step": 1991 + }, + { + "epoch": 5.27, + "learning_rate": 2.3967828418230566e-05, + "loss": 0.012, + "step": 1992 + }, + { + "epoch": 5.27, + "learning_rate": 2.39544235924933e-05, + "loss": 0.1386, + "step": 1993 + }, + { + "epoch": 5.28, + "learning_rate": 2.3941018766756032e-05, + "loss": 0.002, + "step": 1994 + }, + { + "epoch": 5.28, + "learning_rate": 2.3927613941018768e-05, + "loss": 0.0008, + "step": 1995 + }, + { + "epoch": 5.28, + "learning_rate": 2.3914209115281503e-05, + "loss": 0.0021, + "step": 1996 + }, + { + "epoch": 5.28, + "learning_rate": 2.3900804289544238e-05, + "loss": 0.022, + "step": 1997 + }, + { + "epoch": 5.29, + "learning_rate": 2.3887399463806973e-05, + "loss": 0.0015, + "step": 1998 + }, + { + "epoch": 5.29, + "learning_rate": 2.3873994638069705e-05, + "loss": 0.1486, + "step": 1999 + }, + { + "epoch": 5.29, + "learning_rate": 2.386058981233244e-05, + "loss": 0.2586, + "step": 2000 + }, + { + "epoch": 5.29, + "learning_rate": 2.3847184986595175e-05, + "loss": 0.0088, + "step": 2001 + }, + { + "epoch": 5.3, + "learning_rate": 2.383378016085791e-05, + "loss": 0.0044, + "step": 2002 + }, + { + "epoch": 5.3, + "learning_rate": 2.3820375335120645e-05, + "loss": 0.0015, + "step": 2003 + }, + { + "epoch": 5.3, + "learning_rate": 2.3806970509383377e-05, + "loss": 0.0008, + "step": 2004 + }, + { + "epoch": 5.3, + "learning_rate": 2.3793565683646112e-05, + "loss": 0.212, + "step": 2005 + }, + { + "epoch": 5.31, + "learning_rate": 2.3780160857908847e-05, + "loss": 0.0005, + "step": 2006 + }, + { + "epoch": 5.31, + "learning_rate": 2.3766756032171582e-05, + "loss": 0.1511, + "step": 2007 + }, + { + "epoch": 5.31, + "learning_rate": 2.3753351206434318e-05, + "loss": 0.0023, + "step": 2008 + }, + { + "epoch": 5.31, + "learning_rate": 2.3739946380697053e-05, + "loss": 0.1544, + "step": 2009 + }, + { + "epoch": 5.32, + "learning_rate": 2.3726541554959784e-05, + "loss": 0.0306, + "step": 2010 + }, + { + "epoch": 5.32, + "learning_rate": 2.371313672922252e-05, + "loss": 0.1005, + "step": 2011 + }, + { + "epoch": 5.32, + "learning_rate": 2.3699731903485255e-05, + "loss": 0.0744, + "step": 2012 + }, + { + "epoch": 5.33, + "learning_rate": 2.368632707774799e-05, + "loss": 0.0622, + "step": 2013 + }, + { + "epoch": 5.33, + "learning_rate": 2.3672922252010725e-05, + "loss": 0.0192, + "step": 2014 + }, + { + "epoch": 5.33, + "learning_rate": 2.365951742627346e-05, + "loss": 0.016, + "step": 2015 + }, + { + "epoch": 5.33, + "learning_rate": 2.3646112600536195e-05, + "loss": 0.293, + "step": 2016 + }, + { + "epoch": 5.34, + "learning_rate": 2.363270777479893e-05, + "loss": 0.0114, + "step": 2017 + }, + { + "epoch": 5.34, + "learning_rate": 2.3619302949061665e-05, + "loss": 0.1254, + "step": 2018 + }, + { + "epoch": 5.34, + "learning_rate": 2.36058981233244e-05, + "loss": 0.0638, + "step": 2019 + }, + { + "epoch": 5.34, + "learning_rate": 2.3592493297587132e-05, + "loss": 0.0192, + "step": 2020 + }, + { + "epoch": 5.35, + "learning_rate": 2.3579088471849867e-05, + "loss": 0.0069, + "step": 2021 + }, + { + "epoch": 5.35, + "learning_rate": 2.3565683646112603e-05, + "loss": 0.0573, + "step": 2022 + }, + { + "epoch": 5.35, + "learning_rate": 2.3552278820375338e-05, + "loss": 0.0039, + "step": 2023 + }, + { + "epoch": 5.35, + "learning_rate": 2.3538873994638073e-05, + "loss": 0.0079, + "step": 2024 + }, + { + "epoch": 5.36, + "learning_rate": 2.3525469168900805e-05, + "loss": 0.0063, + "step": 2025 + }, + { + "epoch": 5.36, + "learning_rate": 2.351206434316354e-05, + "loss": 0.0032, + "step": 2026 + }, + { + "epoch": 5.36, + "learning_rate": 2.3498659517426275e-05, + "loss": 0.1018, + "step": 2027 + }, + { + "epoch": 5.37, + "learning_rate": 2.348525469168901e-05, + "loss": 0.0017, + "step": 2028 + }, + { + "epoch": 5.37, + "learning_rate": 2.3471849865951745e-05, + "loss": 0.0203, + "step": 2029 + }, + { + "epoch": 5.37, + "learning_rate": 2.3458445040214477e-05, + "loss": 0.0008, + "step": 2030 + }, + { + "epoch": 5.37, + "learning_rate": 2.3445040214477212e-05, + "loss": 0.3145, + "step": 2031 + }, + { + "epoch": 5.38, + "learning_rate": 2.3431635388739947e-05, + "loss": 0.2476, + "step": 2032 + }, + { + "epoch": 5.38, + "learning_rate": 2.3418230563002682e-05, + "loss": 0.0573, + "step": 2033 + }, + { + "epoch": 5.38, + "learning_rate": 2.3404825737265417e-05, + "loss": 0.0008, + "step": 2034 + }, + { + "epoch": 5.38, + "learning_rate": 2.339142091152815e-05, + "loss": 0.0636, + "step": 2035 + }, + { + "epoch": 5.39, + "learning_rate": 2.3378016085790884e-05, + "loss": 0.001, + "step": 2036 + }, + { + "epoch": 5.39, + "learning_rate": 2.336461126005362e-05, + "loss": 0.0079, + "step": 2037 + }, + { + "epoch": 5.39, + "learning_rate": 2.3351206434316355e-05, + "loss": 0.0752, + "step": 2038 + }, + { + "epoch": 5.39, + "learning_rate": 2.333780160857909e-05, + "loss": 0.0024, + "step": 2039 + }, + { + "epoch": 5.4, + "learning_rate": 2.332439678284182e-05, + "loss": 0.0061, + "step": 2040 + }, + { + "epoch": 5.4, + "learning_rate": 2.3310991957104557e-05, + "loss": 0.2882, + "step": 2041 + }, + { + "epoch": 5.4, + "learning_rate": 2.329758713136729e-05, + "loss": 0.02, + "step": 2042 + }, + { + "epoch": 5.4, + "learning_rate": 2.3284182305630027e-05, + "loss": 0.018, + "step": 2043 + }, + { + "epoch": 5.41, + "learning_rate": 2.3270777479892762e-05, + "loss": 0.0125, + "step": 2044 + }, + { + "epoch": 5.41, + "learning_rate": 2.3257372654155497e-05, + "loss": 0.0007, + "step": 2045 + }, + { + "epoch": 5.41, + "learning_rate": 2.3243967828418232e-05, + "loss": 0.3849, + "step": 2046 + }, + { + "epoch": 5.42, + "learning_rate": 2.3230563002680967e-05, + "loss": 0.0011, + "step": 2047 + }, + { + "epoch": 5.42, + "learning_rate": 2.3217158176943702e-05, + "loss": 0.1235, + "step": 2048 + }, + { + "epoch": 5.42, + "learning_rate": 2.3203753351206438e-05, + "loss": 0.0006, + "step": 2049 + }, + { + "epoch": 5.42, + "learning_rate": 2.319034852546917e-05, + "loss": 0.0006, + "step": 2050 + }, + { + "epoch": 5.43, + "learning_rate": 2.3176943699731904e-05, + "loss": 0.0011, + "step": 2051 + }, + { + "epoch": 5.43, + "learning_rate": 2.316353887399464e-05, + "loss": 0.0011, + "step": 2052 + }, + { + "epoch": 5.43, + "learning_rate": 2.3150134048257375e-05, + "loss": 0.0032, + "step": 2053 + }, + { + "epoch": 5.43, + "learning_rate": 2.313672922252011e-05, + "loss": 0.0718, + "step": 2054 + }, + { + "epoch": 5.44, + "learning_rate": 2.312332439678284e-05, + "loss": 0.0052, + "step": 2055 + }, + { + "epoch": 5.44, + "learning_rate": 2.3109919571045577e-05, + "loss": 0.0026, + "step": 2056 + }, + { + "epoch": 5.44, + "learning_rate": 2.3096514745308312e-05, + "loss": 0.1854, + "step": 2057 + }, + { + "epoch": 5.44, + "learning_rate": 2.3083109919571047e-05, + "loss": 0.0008, + "step": 2058 + }, + { + "epoch": 5.45, + "learning_rate": 2.3069705093833782e-05, + "loss": 0.2671, + "step": 2059 + }, + { + "epoch": 5.45, + "learning_rate": 2.3056300268096514e-05, + "loss": 0.0064, + "step": 2060 + }, + { + "epoch": 5.45, + "learning_rate": 2.304289544235925e-05, + "loss": 0.0012, + "step": 2061 + }, + { + "epoch": 5.46, + "learning_rate": 2.3029490616621984e-05, + "loss": 0.0006, + "step": 2062 + }, + { + "epoch": 5.46, + "learning_rate": 2.301608579088472e-05, + "loss": 0.0008, + "step": 2063 + }, + { + "epoch": 5.46, + "learning_rate": 2.3002680965147454e-05, + "loss": 0.4998, + "step": 2064 + }, + { + "epoch": 5.46, + "learning_rate": 2.2989276139410186e-05, + "loss": 0.0037, + "step": 2065 + }, + { + "epoch": 5.47, + "learning_rate": 2.297587131367292e-05, + "loss": 0.0029, + "step": 2066 + }, + { + "epoch": 5.47, + "learning_rate": 2.2962466487935656e-05, + "loss": 0.043, + "step": 2067 + }, + { + "epoch": 5.47, + "learning_rate": 2.294906166219839e-05, + "loss": 0.0451, + "step": 2068 + }, + { + "epoch": 5.47, + "learning_rate": 2.2935656836461127e-05, + "loss": 0.0009, + "step": 2069 + }, + { + "epoch": 5.48, + "learning_rate": 2.2922252010723862e-05, + "loss": 0.002, + "step": 2070 + }, + { + "epoch": 5.48, + "learning_rate": 2.2908847184986597e-05, + "loss": 0.2744, + "step": 2071 + }, + { + "epoch": 5.48, + "learning_rate": 2.2895442359249332e-05, + "loss": 0.0146, + "step": 2072 + }, + { + "epoch": 5.48, + "learning_rate": 2.2882037533512067e-05, + "loss": 0.0011, + "step": 2073 + }, + { + "epoch": 5.49, + "learning_rate": 2.2868632707774802e-05, + "loss": 0.0421, + "step": 2074 + }, + { + "epoch": 5.49, + "learning_rate": 2.2855227882037537e-05, + "loss": 0.1518, + "step": 2075 + }, + { + "epoch": 5.49, + "learning_rate": 2.284182305630027e-05, + "loss": 0.0072, + "step": 2076 + }, + { + "epoch": 5.49, + "learning_rate": 2.2828418230563004e-05, + "loss": 0.2781, + "step": 2077 + }, + { + "epoch": 5.5, + "learning_rate": 2.281501340482574e-05, + "loss": 0.004, + "step": 2078 + }, + { + "epoch": 5.5, + "learning_rate": 2.2801608579088475e-05, + "loss": 0.1029, + "step": 2079 + }, + { + "epoch": 5.5, + "learning_rate": 2.278820375335121e-05, + "loss": 0.0526, + "step": 2080 + }, + { + "epoch": 5.51, + "learning_rate": 2.277479892761394e-05, + "loss": 0.0011, + "step": 2081 + }, + { + "epoch": 5.51, + "learning_rate": 2.2761394101876677e-05, + "loss": 0.0139, + "step": 2082 + }, + { + "epoch": 5.51, + "learning_rate": 2.274798927613941e-05, + "loss": 0.0509, + "step": 2083 + }, + { + "epoch": 5.51, + "learning_rate": 2.2734584450402147e-05, + "loss": 0.0042, + "step": 2084 + }, + { + "epoch": 5.52, + "learning_rate": 2.2721179624664882e-05, + "loss": 0.1964, + "step": 2085 + }, + { + "epoch": 5.52, + "learning_rate": 2.2707774798927614e-05, + "loss": 0.0083, + "step": 2086 + }, + { + "epoch": 5.52, + "learning_rate": 2.269436997319035e-05, + "loss": 0.101, + "step": 2087 + }, + { + "epoch": 5.52, + "learning_rate": 2.2680965147453084e-05, + "loss": 0.0094, + "step": 2088 + }, + { + "epoch": 5.53, + "learning_rate": 2.266756032171582e-05, + "loss": 0.1433, + "step": 2089 + }, + { + "epoch": 5.53, + "learning_rate": 2.2654155495978554e-05, + "loss": 0.0091, + "step": 2090 + }, + { + "epoch": 5.53, + "learning_rate": 2.2640750670241286e-05, + "loss": 0.0018, + "step": 2091 + }, + { + "epoch": 5.53, + "learning_rate": 2.262734584450402e-05, + "loss": 0.001, + "step": 2092 + }, + { + "epoch": 5.54, + "learning_rate": 2.2613941018766756e-05, + "loss": 0.3507, + "step": 2093 + }, + { + "epoch": 5.54, + "learning_rate": 2.260053619302949e-05, + "loss": 0.0527, + "step": 2094 + }, + { + "epoch": 5.54, + "learning_rate": 2.2587131367292226e-05, + "loss": 0.0015, + "step": 2095 + }, + { + "epoch": 5.54, + "learning_rate": 2.2573726541554958e-05, + "loss": 0.0195, + "step": 2096 + }, + { + "epoch": 5.55, + "learning_rate": 2.2560321715817693e-05, + "loss": 0.0007, + "step": 2097 + }, + { + "epoch": 5.55, + "learning_rate": 2.254691689008043e-05, + "loss": 0.001, + "step": 2098 + }, + { + "epoch": 5.55, + "learning_rate": 2.2533512064343164e-05, + "loss": 0.2777, + "step": 2099 + }, + { + "epoch": 5.56, + "learning_rate": 2.25201072386059e-05, + "loss": 0.0657, + "step": 2100 + }, + { + "epoch": 5.56, + "learning_rate": 2.2506702412868634e-05, + "loss": 0.159, + "step": 2101 + }, + { + "epoch": 5.56, + "learning_rate": 2.249329758713137e-05, + "loss": 0.0342, + "step": 2102 + }, + { + "epoch": 5.56, + "learning_rate": 2.2479892761394104e-05, + "loss": 0.3001, + "step": 2103 + }, + { + "epoch": 5.57, + "learning_rate": 2.246648793565684e-05, + "loss": 0.0028, + "step": 2104 + }, + { + "epoch": 5.57, + "learning_rate": 2.2453083109919574e-05, + "loss": 0.0191, + "step": 2105 + }, + { + "epoch": 5.57, + "learning_rate": 2.2439678284182306e-05, + "loss": 0.0012, + "step": 2106 + }, + { + "epoch": 5.57, + "learning_rate": 2.242627345844504e-05, + "loss": 0.2619, + "step": 2107 + }, + { + "epoch": 5.58, + "learning_rate": 2.2412868632707776e-05, + "loss": 0.001, + "step": 2108 + }, + { + "epoch": 5.58, + "learning_rate": 2.239946380697051e-05, + "loss": 0.094, + "step": 2109 + }, + { + "epoch": 5.58, + "learning_rate": 2.2386058981233247e-05, + "loss": 0.003, + "step": 2110 + }, + { + "epoch": 5.58, + "learning_rate": 2.237265415549598e-05, + "loss": 0.0528, + "step": 2111 + }, + { + "epoch": 5.59, + "learning_rate": 2.2359249329758714e-05, + "loss": 0.1252, + "step": 2112 + }, + { + "epoch": 5.59, + "learning_rate": 2.234584450402145e-05, + "loss": 0.0039, + "step": 2113 + }, + { + "epoch": 5.59, + "learning_rate": 2.2332439678284184e-05, + "loss": 0.0913, + "step": 2114 + }, + { + "epoch": 5.6, + "learning_rate": 2.231903485254692e-05, + "loss": 0.0023, + "step": 2115 + }, + { + "epoch": 5.6, + "learning_rate": 2.230563002680965e-05, + "loss": 0.0047, + "step": 2116 + }, + { + "epoch": 5.6, + "learning_rate": 2.2292225201072386e-05, + "loss": 0.0688, + "step": 2117 + }, + { + "epoch": 5.6, + "learning_rate": 2.227882037533512e-05, + "loss": 0.0013, + "step": 2118 + }, + { + "epoch": 5.61, + "learning_rate": 2.2265415549597856e-05, + "loss": 0.0012, + "step": 2119 + }, + { + "epoch": 5.61, + "learning_rate": 2.225201072386059e-05, + "loss": 0.0048, + "step": 2120 + }, + { + "epoch": 5.61, + "learning_rate": 2.2238605898123323e-05, + "loss": 0.0011, + "step": 2121 + }, + { + "epoch": 5.61, + "learning_rate": 2.2225201072386058e-05, + "loss": 0.0009, + "step": 2122 + }, + { + "epoch": 5.62, + "learning_rate": 2.2211796246648793e-05, + "loss": 0.0015, + "step": 2123 + }, + { + "epoch": 5.62, + "learning_rate": 2.2198391420911528e-05, + "loss": 0.0077, + "step": 2124 + }, + { + "epoch": 5.62, + "learning_rate": 2.2184986595174263e-05, + "loss": 0.0007, + "step": 2125 + }, + { + "epoch": 5.62, + "learning_rate": 2.2171581769437e-05, + "loss": 0.0007, + "step": 2126 + }, + { + "epoch": 5.63, + "learning_rate": 2.2158176943699734e-05, + "loss": 0.0506, + "step": 2127 + }, + { + "epoch": 5.63, + "learning_rate": 2.214477211796247e-05, + "loss": 0.0016, + "step": 2128 + }, + { + "epoch": 5.63, + "learning_rate": 2.2131367292225204e-05, + "loss": 0.0005, + "step": 2129 + }, + { + "epoch": 5.63, + "learning_rate": 2.211796246648794e-05, + "loss": 0.0049, + "step": 2130 + }, + { + "epoch": 5.64, + "learning_rate": 2.210455764075067e-05, + "loss": 0.0305, + "step": 2131 + }, + { + "epoch": 5.64, + "learning_rate": 2.2091152815013406e-05, + "loss": 0.0448, + "step": 2132 + }, + { + "epoch": 5.64, + "learning_rate": 2.207774798927614e-05, + "loss": 0.5391, + "step": 2133 + }, + { + "epoch": 5.65, + "learning_rate": 2.2064343163538876e-05, + "loss": 0.0005, + "step": 2134 + }, + { + "epoch": 5.65, + "learning_rate": 2.205093833780161e-05, + "loss": 0.0141, + "step": 2135 + }, + { + "epoch": 5.65, + "learning_rate": 2.2037533512064346e-05, + "loss": 0.3613, + "step": 2136 + }, + { + "epoch": 5.65, + "learning_rate": 2.2024128686327078e-05, + "loss": 0.0013, + "step": 2137 + }, + { + "epoch": 5.66, + "learning_rate": 2.2010723860589813e-05, + "loss": 0.002, + "step": 2138 + }, + { + "epoch": 5.66, + "learning_rate": 2.199731903485255e-05, + "loss": 0.0207, + "step": 2139 + }, + { + "epoch": 5.66, + "learning_rate": 2.1983914209115284e-05, + "loss": 0.0004, + "step": 2140 + }, + { + "epoch": 5.66, + "learning_rate": 2.197050938337802e-05, + "loss": 0.0022, + "step": 2141 + }, + { + "epoch": 5.67, + "learning_rate": 2.195710455764075e-05, + "loss": 0.5076, + "step": 2142 + }, + { + "epoch": 5.67, + "learning_rate": 2.1943699731903486e-05, + "loss": 0.0016, + "step": 2143 + }, + { + "epoch": 5.67, + "learning_rate": 2.193029490616622e-05, + "loss": 0.0014, + "step": 2144 + }, + { + "epoch": 5.67, + "learning_rate": 2.1916890080428956e-05, + "loss": 0.0101, + "step": 2145 + }, + { + "epoch": 5.68, + "learning_rate": 2.190348525469169e-05, + "loss": 0.0048, + "step": 2146 + }, + { + "epoch": 5.68, + "learning_rate": 2.1890080428954423e-05, + "loss": 0.001, + "step": 2147 + }, + { + "epoch": 5.68, + "learning_rate": 2.1876675603217158e-05, + "loss": 0.0004, + "step": 2148 + }, + { + "epoch": 5.69, + "learning_rate": 2.1863270777479893e-05, + "loss": 0.2627, + "step": 2149 + }, + { + "epoch": 5.69, + "learning_rate": 2.1849865951742628e-05, + "loss": 0.0013, + "step": 2150 + }, + { + "epoch": 5.69, + "learning_rate": 2.1836461126005363e-05, + "loss": 0.0074, + "step": 2151 + }, + { + "epoch": 5.69, + "learning_rate": 2.1823056300268095e-05, + "loss": 0.0238, + "step": 2152 + }, + { + "epoch": 5.7, + "learning_rate": 2.180965147453083e-05, + "loss": 0.0013, + "step": 2153 + }, + { + "epoch": 5.7, + "learning_rate": 2.1796246648793565e-05, + "loss": 0.0005, + "step": 2154 + }, + { + "epoch": 5.7, + "learning_rate": 2.17828418230563e-05, + "loss": 0.0006, + "step": 2155 + }, + { + "epoch": 5.7, + "learning_rate": 2.1769436997319036e-05, + "loss": 0.0442, + "step": 2156 + }, + { + "epoch": 5.71, + "learning_rate": 2.175603217158177e-05, + "loss": 0.0007, + "step": 2157 + }, + { + "epoch": 5.71, + "learning_rate": 2.1742627345844506e-05, + "loss": 0.0032, + "step": 2158 + }, + { + "epoch": 5.71, + "learning_rate": 2.172922252010724e-05, + "loss": 0.005, + "step": 2159 + }, + { + "epoch": 5.71, + "learning_rate": 2.1715817694369976e-05, + "loss": 0.0005, + "step": 2160 + }, + { + "epoch": 5.72, + "learning_rate": 2.170241286863271e-05, + "loss": 0.0005, + "step": 2161 + }, + { + "epoch": 5.72, + "learning_rate": 2.1689008042895443e-05, + "loss": 0.2023, + "step": 2162 + }, + { + "epoch": 5.72, + "learning_rate": 2.1675603217158178e-05, + "loss": 0.3146, + "step": 2163 + }, + { + "epoch": 5.72, + "learning_rate": 2.1662198391420913e-05, + "loss": 0.0035, + "step": 2164 + }, + { + "epoch": 5.73, + "learning_rate": 2.164879356568365e-05, + "loss": 0.141, + "step": 2165 + }, + { + "epoch": 5.73, + "learning_rate": 2.1635388739946383e-05, + "loss": 0.0005, + "step": 2166 + }, + { + "epoch": 5.73, + "learning_rate": 2.1621983914209115e-05, + "loss": 0.0013, + "step": 2167 + }, + { + "epoch": 5.74, + "learning_rate": 2.160857908847185e-05, + "loss": 0.0484, + "step": 2168 + }, + { + "epoch": 5.74, + "learning_rate": 2.1595174262734585e-05, + "loss": 0.0109, + "step": 2169 + }, + { + "epoch": 5.74, + "learning_rate": 2.158176943699732e-05, + "loss": 0.3307, + "step": 2170 + }, + { + "epoch": 5.74, + "learning_rate": 2.1568364611260056e-05, + "loss": 0.0013, + "step": 2171 + }, + { + "epoch": 5.75, + "learning_rate": 2.1554959785522787e-05, + "loss": 0.0005, + "step": 2172 + }, + { + "epoch": 5.75, + "learning_rate": 2.1541554959785523e-05, + "loss": 0.0004, + "step": 2173 + }, + { + "epoch": 5.75, + "learning_rate": 2.1528150134048258e-05, + "loss": 0.0744, + "step": 2174 + }, + { + "epoch": 5.75, + "learning_rate": 2.1514745308310993e-05, + "loss": 0.0008, + "step": 2175 + }, + { + "epoch": 5.76, + "learning_rate": 2.1501340482573728e-05, + "loss": 0.0013, + "step": 2176 + }, + { + "epoch": 5.76, + "learning_rate": 2.148793565683646e-05, + "loss": 0.0006, + "step": 2177 + }, + { + "epoch": 5.76, + "learning_rate": 2.1474530831099195e-05, + "loss": 0.055, + "step": 2178 + }, + { + "epoch": 5.76, + "learning_rate": 2.146112600536193e-05, + "loss": 0.0013, + "step": 2179 + }, + { + "epoch": 5.77, + "learning_rate": 2.1447721179624665e-05, + "loss": 0.0022, + "step": 2180 + }, + { + "epoch": 5.77, + "learning_rate": 2.14343163538874e-05, + "loss": 0.0009, + "step": 2181 + }, + { + "epoch": 5.77, + "learning_rate": 2.1420911528150135e-05, + "loss": 0.0011, + "step": 2182 + }, + { + "epoch": 5.78, + "learning_rate": 2.140750670241287e-05, + "loss": 0.0676, + "step": 2183 + }, + { + "epoch": 5.78, + "learning_rate": 2.1394101876675606e-05, + "loss": 0.4583, + "step": 2184 + }, + { + "epoch": 5.78, + "learning_rate": 2.138069705093834e-05, + "loss": 0.0009, + "step": 2185 + }, + { + "epoch": 5.78, + "learning_rate": 2.1367292225201076e-05, + "loss": 0.002, + "step": 2186 + }, + { + "epoch": 5.79, + "learning_rate": 2.1353887399463808e-05, + "loss": 0.7625, + "step": 2187 + }, + { + "epoch": 5.79, + "learning_rate": 2.1340482573726543e-05, + "loss": 0.4912, + "step": 2188 + }, + { + "epoch": 5.79, + "learning_rate": 2.1327077747989278e-05, + "loss": 0.0283, + "step": 2189 + }, + { + "epoch": 5.79, + "learning_rate": 2.1313672922252013e-05, + "loss": 0.0011, + "step": 2190 + }, + { + "epoch": 5.8, + "learning_rate": 2.1300268096514748e-05, + "loss": 0.0013, + "step": 2191 + }, + { + "epoch": 5.8, + "learning_rate": 2.128686327077748e-05, + "loss": 0.3384, + "step": 2192 + }, + { + "epoch": 5.8, + "learning_rate": 2.1273458445040215e-05, + "loss": 0.4533, + "step": 2193 + }, + { + "epoch": 5.8, + "learning_rate": 2.126005361930295e-05, + "loss": 0.0039, + "step": 2194 + }, + { + "epoch": 5.81, + "learning_rate": 2.1246648793565685e-05, + "loss": 0.0038, + "step": 2195 + }, + { + "epoch": 5.81, + "learning_rate": 2.123324396782842e-05, + "loss": 0.0318, + "step": 2196 + }, + { + "epoch": 5.81, + "learning_rate": 2.1219839142091156e-05, + "loss": 0.0045, + "step": 2197 + }, + { + "epoch": 5.81, + "learning_rate": 2.1206434316353887e-05, + "loss": 0.3134, + "step": 2198 + }, + { + "epoch": 5.82, + "learning_rate": 2.1193029490616622e-05, + "loss": 0.0011, + "step": 2199 + }, + { + "epoch": 5.82, + "learning_rate": 2.1179624664879358e-05, + "loss": 0.0008, + "step": 2200 + }, + { + "epoch": 5.82, + "learning_rate": 2.1166219839142093e-05, + "loss": 0.0009, + "step": 2201 + }, + { + "epoch": 5.83, + "learning_rate": 2.1152815013404828e-05, + "loss": 0.0083, + "step": 2202 + }, + { + "epoch": 5.83, + "learning_rate": 2.113941018766756e-05, + "loss": 0.0196, + "step": 2203 + }, + { + "epoch": 5.83, + "learning_rate": 2.1126005361930295e-05, + "loss": 0.0063, + "step": 2204 + }, + { + "epoch": 5.83, + "learning_rate": 2.111260053619303e-05, + "loss": 0.0064, + "step": 2205 + }, + { + "epoch": 5.84, + "learning_rate": 2.1099195710455765e-05, + "loss": 0.0143, + "step": 2206 + }, + { + "epoch": 5.84, + "learning_rate": 2.10857908847185e-05, + "loss": 0.0012, + "step": 2207 + }, + { + "epoch": 5.84, + "learning_rate": 2.1072386058981232e-05, + "loss": 0.0033, + "step": 2208 + }, + { + "epoch": 5.84, + "learning_rate": 2.1058981233243967e-05, + "loss": 0.0014, + "step": 2209 + }, + { + "epoch": 5.85, + "learning_rate": 2.1045576407506702e-05, + "loss": 0.0219, + "step": 2210 + }, + { + "epoch": 5.85, + "learning_rate": 2.1032171581769437e-05, + "loss": 0.3033, + "step": 2211 + }, + { + "epoch": 5.85, + "learning_rate": 2.1018766756032172e-05, + "loss": 0.0711, + "step": 2212 + }, + { + "epoch": 5.85, + "learning_rate": 2.1005361930294907e-05, + "loss": 0.0051, + "step": 2213 + }, + { + "epoch": 5.86, + "learning_rate": 2.0991957104557643e-05, + "loss": 0.026, + "step": 2214 + }, + { + "epoch": 5.86, + "learning_rate": 2.0978552278820378e-05, + "loss": 0.0024, + "step": 2215 + }, + { + "epoch": 5.86, + "learning_rate": 2.0965147453083113e-05, + "loss": 0.3622, + "step": 2216 + }, + { + "epoch": 5.87, + "learning_rate": 2.0951742627345848e-05, + "loss": 0.0009, + "step": 2217 + }, + { + "epoch": 5.87, + "learning_rate": 2.093833780160858e-05, + "loss": 0.0019, + "step": 2218 + }, + { + "epoch": 5.87, + "learning_rate": 2.0924932975871315e-05, + "loss": 0.3412, + "step": 2219 + }, + { + "epoch": 5.87, + "learning_rate": 2.091152815013405e-05, + "loss": 0.0197, + "step": 2220 + }, + { + "epoch": 5.88, + "learning_rate": 2.0898123324396785e-05, + "loss": 0.4229, + "step": 2221 + }, + { + "epoch": 5.88, + "learning_rate": 2.088471849865952e-05, + "loss": 0.0014, + "step": 2222 + }, + { + "epoch": 5.88, + "learning_rate": 2.0871313672922252e-05, + "loss": 0.0183, + "step": 2223 + }, + { + "epoch": 5.88, + "learning_rate": 2.0857908847184987e-05, + "loss": 0.2005, + "step": 2224 + }, + { + "epoch": 5.89, + "learning_rate": 2.0844504021447722e-05, + "loss": 0.0122, + "step": 2225 + }, + { + "epoch": 5.89, + "learning_rate": 2.0831099195710457e-05, + "loss": 0.1178, + "step": 2226 + }, + { + "epoch": 5.89, + "learning_rate": 2.0817694369973193e-05, + "loss": 0.0105, + "step": 2227 + }, + { + "epoch": 5.89, + "learning_rate": 2.0804289544235924e-05, + "loss": 0.0328, + "step": 2228 + }, + { + "epoch": 5.9, + "learning_rate": 2.079088471849866e-05, + "loss": 0.0087, + "step": 2229 + }, + { + "epoch": 5.9, + "learning_rate": 2.0777479892761395e-05, + "loss": 0.0288, + "step": 2230 + }, + { + "epoch": 5.9, + "learning_rate": 2.076407506702413e-05, + "loss": 0.0017, + "step": 2231 + }, + { + "epoch": 5.9, + "learning_rate": 2.0750670241286865e-05, + "loss": 0.002, + "step": 2232 + }, + { + "epoch": 5.91, + "learning_rate": 2.0737265415549597e-05, + "loss": 0.0024, + "step": 2233 + }, + { + "epoch": 5.91, + "learning_rate": 2.072386058981233e-05, + "loss": 0.0008, + "step": 2234 + }, + { + "epoch": 5.91, + "learning_rate": 2.0710455764075067e-05, + "loss": 0.002, + "step": 2235 + }, + { + "epoch": 5.92, + "learning_rate": 2.0697050938337802e-05, + "loss": 0.0052, + "step": 2236 + }, + { + "epoch": 5.92, + "learning_rate": 2.0683646112600537e-05, + "loss": 0.0058, + "step": 2237 + }, + { + "epoch": 5.92, + "learning_rate": 2.0670241286863272e-05, + "loss": 0.0013, + "step": 2238 + }, + { + "epoch": 5.92, + "learning_rate": 2.0656836461126007e-05, + "loss": 0.0158, + "step": 2239 + }, + { + "epoch": 5.93, + "learning_rate": 2.0643431635388742e-05, + "loss": 0.229, + "step": 2240 + }, + { + "epoch": 5.93, + "learning_rate": 2.0630026809651478e-05, + "loss": 0.1844, + "step": 2241 + }, + { + "epoch": 5.93, + "learning_rate": 2.0616621983914213e-05, + "loss": 0.2905, + "step": 2242 + }, + { + "epoch": 5.93, + "learning_rate": 2.0603217158176944e-05, + "loss": 0.0059, + "step": 2243 + }, + { + "epoch": 5.94, + "learning_rate": 2.058981233243968e-05, + "loss": 0.0007, + "step": 2244 + }, + { + "epoch": 5.94, + "learning_rate": 2.0576407506702415e-05, + "loss": 0.1638, + "step": 2245 + }, + { + "epoch": 5.94, + "learning_rate": 2.056300268096515e-05, + "loss": 0.1195, + "step": 2246 + }, + { + "epoch": 5.94, + "learning_rate": 2.0549597855227885e-05, + "loss": 0.0015, + "step": 2247 + }, + { + "epoch": 5.95, + "learning_rate": 2.0536193029490617e-05, + "loss": 0.0013, + "step": 2248 + }, + { + "epoch": 5.95, + "learning_rate": 2.0522788203753352e-05, + "loss": 0.5152, + "step": 2249 + }, + { + "epoch": 5.95, + "learning_rate": 2.0509383378016087e-05, + "loss": 0.0315, + "step": 2250 + }, + { + "epoch": 5.96, + "learning_rate": 2.0495978552278822e-05, + "loss": 0.1213, + "step": 2251 + }, + { + "epoch": 5.96, + "learning_rate": 2.0482573726541557e-05, + "loss": 0.0006, + "step": 2252 + }, + { + "epoch": 5.96, + "learning_rate": 2.046916890080429e-05, + "loss": 0.0011, + "step": 2253 + }, + { + "epoch": 5.96, + "learning_rate": 2.0455764075067024e-05, + "loss": 0.2546, + "step": 2254 + }, + { + "epoch": 5.97, + "learning_rate": 2.044235924932976e-05, + "loss": 0.1259, + "step": 2255 + }, + { + "epoch": 5.97, + "learning_rate": 2.0428954423592494e-05, + "loss": 0.0179, + "step": 2256 + }, + { + "epoch": 5.97, + "learning_rate": 2.041554959785523e-05, + "loss": 0.0257, + "step": 2257 + }, + { + "epoch": 5.97, + "learning_rate": 2.0402144772117965e-05, + "loss": 0.0092, + "step": 2258 + }, + { + "epoch": 5.98, + "learning_rate": 2.0388739946380696e-05, + "loss": 0.3231, + "step": 2259 + }, + { + "epoch": 5.98, + "learning_rate": 2.037533512064343e-05, + "loss": 0.0084, + "step": 2260 + }, + { + "epoch": 5.98, + "learning_rate": 2.0361930294906167e-05, + "loss": 0.0517, + "step": 2261 + }, + { + "epoch": 5.98, + "learning_rate": 2.0348525469168902e-05, + "loss": 0.0012, + "step": 2262 + }, + { + "epoch": 5.99, + "learning_rate": 2.0335120643431637e-05, + "loss": 0.0045, + "step": 2263 + }, + { + "epoch": 5.99, + "learning_rate": 2.032171581769437e-05, + "loss": 0.0012, + "step": 2264 + }, + { + "epoch": 5.99, + "learning_rate": 2.0308310991957104e-05, + "loss": 0.0032, + "step": 2265 + }, + { + "epoch": 5.99, + "learning_rate": 2.029490616621984e-05, + "loss": 0.0038, + "step": 2266 + }, + { + "epoch": 6.0, + "learning_rate": 2.0281501340482574e-05, + "loss": 0.2731, + "step": 2267 + }, + { + "epoch": 6.0, + "learning_rate": 2.026809651474531e-05, + "loss": 0.3122, + "step": 2268 + }, + { + "epoch": 6.0, + "eval_f1": 0.7762762762762763, + "eval_loss": 1.0977023839950562, + "eval_runtime": 1.8624, + "eval_samples_per_second": 812.383, + "eval_steps_per_second": 51.009, + "step": 2268 + }, + { + "epoch": 6.0, + "learning_rate": 2.0254691689008044e-05, + "loss": 0.0035, + "step": 2269 + }, + { + "epoch": 6.01, + "learning_rate": 2.024128686327078e-05, + "loss": 0.0709, + "step": 2270 + }, + { + "epoch": 6.01, + "learning_rate": 2.0227882037533515e-05, + "loss": 0.1548, + "step": 2271 + }, + { + "epoch": 6.01, + "learning_rate": 2.021447721179625e-05, + "loss": 0.0035, + "step": 2272 + }, + { + "epoch": 6.01, + "learning_rate": 2.0201072386058985e-05, + "loss": 0.0009, + "step": 2273 + }, + { + "epoch": 6.02, + "learning_rate": 2.0187667560321717e-05, + "loss": 0.0061, + "step": 2274 + }, + { + "epoch": 6.02, + "learning_rate": 2.0174262734584452e-05, + "loss": 0.3586, + "step": 2275 + }, + { + "epoch": 6.02, + "learning_rate": 2.0160857908847187e-05, + "loss": 0.0019, + "step": 2276 + }, + { + "epoch": 6.02, + "learning_rate": 2.0147453083109922e-05, + "loss": 0.0077, + "step": 2277 + }, + { + "epoch": 6.03, + "learning_rate": 2.0134048257372657e-05, + "loss": 0.0022, + "step": 2278 + }, + { + "epoch": 6.03, + "learning_rate": 2.012064343163539e-05, + "loss": 0.0021, + "step": 2279 + }, + { + "epoch": 6.03, + "learning_rate": 2.0107238605898124e-05, + "loss": 0.0022, + "step": 2280 + }, + { + "epoch": 6.03, + "learning_rate": 2.009383378016086e-05, + "loss": 0.1387, + "step": 2281 + }, + { + "epoch": 6.04, + "learning_rate": 2.0080428954423594e-05, + "loss": 0.0034, + "step": 2282 + }, + { + "epoch": 6.04, + "learning_rate": 2.006702412868633e-05, + "loss": 0.0128, + "step": 2283 + }, + { + "epoch": 6.04, + "learning_rate": 2.005361930294906e-05, + "loss": 0.0201, + "step": 2284 + }, + { + "epoch": 6.04, + "learning_rate": 2.0040214477211796e-05, + "loss": 0.0072, + "step": 2285 + }, + { + "epoch": 6.05, + "learning_rate": 2.002680965147453e-05, + "loss": 0.0031, + "step": 2286 + }, + { + "epoch": 6.05, + "learning_rate": 2.0013404825737267e-05, + "loss": 0.0273, + "step": 2287 + }, + { + "epoch": 6.05, + "learning_rate": 2e-05, + "loss": 0.0679, + "step": 2288 + }, + { + "epoch": 6.06, + "learning_rate": 1.9986595174262733e-05, + "loss": 0.0012, + "step": 2289 + }, + { + "epoch": 6.06, + "learning_rate": 1.997319034852547e-05, + "loss": 0.0045, + "step": 2290 + }, + { + "epoch": 6.06, + "learning_rate": 1.9959785522788204e-05, + "loss": 0.0289, + "step": 2291 + }, + { + "epoch": 6.06, + "learning_rate": 1.994638069705094e-05, + "loss": 0.1421, + "step": 2292 + }, + { + "epoch": 6.07, + "learning_rate": 1.9932975871313674e-05, + "loss": 0.0007, + "step": 2293 + }, + { + "epoch": 6.07, + "learning_rate": 1.9919571045576406e-05, + "loss": 0.0037, + "step": 2294 + }, + { + "epoch": 6.07, + "learning_rate": 1.990616621983914e-05, + "loss": 0.1566, + "step": 2295 + }, + { + "epoch": 6.07, + "learning_rate": 1.9892761394101876e-05, + "loss": 0.0008, + "step": 2296 + }, + { + "epoch": 6.08, + "learning_rate": 1.987935656836461e-05, + "loss": 0.0008, + "step": 2297 + }, + { + "epoch": 6.08, + "learning_rate": 1.9865951742627346e-05, + "loss": 0.1319, + "step": 2298 + }, + { + "epoch": 6.08, + "learning_rate": 1.985254691689008e-05, + "loss": 0.0007, + "step": 2299 + }, + { + "epoch": 6.08, + "learning_rate": 1.9839142091152816e-05, + "loss": 0.0048, + "step": 2300 + }, + { + "epoch": 6.09, + "learning_rate": 1.982573726541555e-05, + "loss": 0.223, + "step": 2301 + }, + { + "epoch": 6.09, + "learning_rate": 1.9812332439678287e-05, + "loss": 0.2188, + "step": 2302 + }, + { + "epoch": 6.09, + "learning_rate": 1.9798927613941022e-05, + "loss": 0.0209, + "step": 2303 + }, + { + "epoch": 6.1, + "learning_rate": 1.9785522788203754e-05, + "loss": 0.001, + "step": 2304 + }, + { + "epoch": 6.1, + "learning_rate": 1.977211796246649e-05, + "loss": 0.0006, + "step": 2305 + }, + { + "epoch": 6.1, + "learning_rate": 1.9758713136729224e-05, + "loss": 0.0011, + "step": 2306 + }, + { + "epoch": 6.1, + "learning_rate": 1.974530831099196e-05, + "loss": 0.0008, + "step": 2307 + }, + { + "epoch": 6.11, + "learning_rate": 1.9731903485254694e-05, + "loss": 0.0015, + "step": 2308 + }, + { + "epoch": 6.11, + "learning_rate": 1.9718498659517426e-05, + "loss": 0.0024, + "step": 2309 + }, + { + "epoch": 6.11, + "learning_rate": 1.970509383378016e-05, + "loss": 0.0006, + "step": 2310 + }, + { + "epoch": 6.11, + "learning_rate": 1.9691689008042896e-05, + "loss": 0.0007, + "step": 2311 + }, + { + "epoch": 6.12, + "learning_rate": 1.967828418230563e-05, + "loss": 0.0026, + "step": 2312 + }, + { + "epoch": 6.12, + "learning_rate": 1.9664879356568366e-05, + "loss": 0.0019, + "step": 2313 + }, + { + "epoch": 6.12, + "learning_rate": 1.9651474530831098e-05, + "loss": 0.0055, + "step": 2314 + }, + { + "epoch": 6.12, + "learning_rate": 1.9638069705093833e-05, + "loss": 0.0023, + "step": 2315 + }, + { + "epoch": 6.13, + "learning_rate": 1.962466487935657e-05, + "loss": 0.0011, + "step": 2316 + }, + { + "epoch": 6.13, + "learning_rate": 1.9611260053619303e-05, + "loss": 0.0407, + "step": 2317 + }, + { + "epoch": 6.13, + "learning_rate": 1.959785522788204e-05, + "loss": 0.046, + "step": 2318 + }, + { + "epoch": 6.13, + "learning_rate": 1.958445040214477e-05, + "loss": 0.0062, + "step": 2319 + }, + { + "epoch": 6.14, + "learning_rate": 1.9571045576407505e-05, + "loss": 0.0007, + "step": 2320 + }, + { + "epoch": 6.14, + "learning_rate": 1.955764075067024e-05, + "loss": 0.3377, + "step": 2321 + }, + { + "epoch": 6.14, + "learning_rate": 1.9544235924932976e-05, + "loss": 0.4294, + "step": 2322 + }, + { + "epoch": 6.15, + "learning_rate": 1.953083109919571e-05, + "loss": 0.0016, + "step": 2323 + }, + { + "epoch": 6.15, + "learning_rate": 1.9517426273458446e-05, + "loss": 0.0283, + "step": 2324 + }, + { + "epoch": 6.15, + "learning_rate": 1.950402144772118e-05, + "loss": 0.0005, + "step": 2325 + }, + { + "epoch": 6.15, + "learning_rate": 1.9490616621983916e-05, + "loss": 0.0011, + "step": 2326 + }, + { + "epoch": 6.16, + "learning_rate": 1.947721179624665e-05, + "loss": 0.0237, + "step": 2327 + }, + { + "epoch": 6.16, + "learning_rate": 1.9463806970509387e-05, + "loss": 0.0581, + "step": 2328 + }, + { + "epoch": 6.16, + "learning_rate": 1.945040214477212e-05, + "loss": 0.0905, + "step": 2329 + }, + { + "epoch": 6.16, + "learning_rate": 1.9436997319034853e-05, + "loss": 0.0012, + "step": 2330 + }, + { + "epoch": 6.17, + "learning_rate": 1.942359249329759e-05, + "loss": 0.0053, + "step": 2331 + }, + { + "epoch": 6.17, + "learning_rate": 1.9410187667560324e-05, + "loss": 0.0225, + "step": 2332 + }, + { + "epoch": 6.17, + "learning_rate": 1.939678284182306e-05, + "loss": 0.0374, + "step": 2333 + }, + { + "epoch": 6.17, + "learning_rate": 1.9383378016085794e-05, + "loss": 0.0006, + "step": 2334 + }, + { + "epoch": 6.18, + "learning_rate": 1.9369973190348526e-05, + "loss": 0.0008, + "step": 2335 + }, + { + "epoch": 6.18, + "learning_rate": 1.935656836461126e-05, + "loss": 0.0006, + "step": 2336 + }, + { + "epoch": 6.18, + "learning_rate": 1.9343163538873996e-05, + "loss": 0.0011, + "step": 2337 + }, + { + "epoch": 6.19, + "learning_rate": 1.932975871313673e-05, + "loss": 0.0014, + "step": 2338 + }, + { + "epoch": 6.19, + "learning_rate": 1.9316353887399466e-05, + "loss": 0.0006, + "step": 2339 + }, + { + "epoch": 6.19, + "learning_rate": 1.9302949061662198e-05, + "loss": 0.0504, + "step": 2340 + }, + { + "epoch": 6.19, + "learning_rate": 1.9289544235924933e-05, + "loss": 0.005, + "step": 2341 + }, + { + "epoch": 6.2, + "learning_rate": 1.9276139410187668e-05, + "loss": 0.2673, + "step": 2342 + }, + { + "epoch": 6.2, + "learning_rate": 1.9262734584450403e-05, + "loss": 0.2173, + "step": 2343 + }, + { + "epoch": 6.2, + "learning_rate": 1.924932975871314e-05, + "loss": 0.0235, + "step": 2344 + }, + { + "epoch": 6.2, + "learning_rate": 1.923592493297587e-05, + "loss": 0.0092, + "step": 2345 + }, + { + "epoch": 6.21, + "learning_rate": 1.9222520107238605e-05, + "loss": 0.0088, + "step": 2346 + }, + { + "epoch": 6.21, + "learning_rate": 1.920911528150134e-05, + "loss": 0.0506, + "step": 2347 + }, + { + "epoch": 6.21, + "learning_rate": 1.9195710455764076e-05, + "loss": 0.0033, + "step": 2348 + }, + { + "epoch": 6.21, + "learning_rate": 1.918230563002681e-05, + "loss": 0.0006, + "step": 2349 + }, + { + "epoch": 6.22, + "learning_rate": 1.9168900804289542e-05, + "loss": 0.0021, + "step": 2350 + }, + { + "epoch": 6.22, + "learning_rate": 1.9155495978552278e-05, + "loss": 0.0006, + "step": 2351 + }, + { + "epoch": 6.22, + "learning_rate": 1.9142091152815013e-05, + "loss": 0.0041, + "step": 2352 + }, + { + "epoch": 6.22, + "learning_rate": 1.9128686327077748e-05, + "loss": 0.0209, + "step": 2353 + }, + { + "epoch": 6.23, + "learning_rate": 1.9115281501340483e-05, + "loss": 0.0145, + "step": 2354 + }, + { + "epoch": 6.23, + "learning_rate": 1.9101876675603218e-05, + "loss": 0.0303, + "step": 2355 + }, + { + "epoch": 6.23, + "learning_rate": 1.9088471849865953e-05, + "loss": 0.0951, + "step": 2356 + }, + { + "epoch": 6.24, + "learning_rate": 1.907506702412869e-05, + "loss": 0.042, + "step": 2357 + }, + { + "epoch": 6.24, + "learning_rate": 1.9061662198391424e-05, + "loss": 0.0009, + "step": 2358 + }, + { + "epoch": 6.24, + "learning_rate": 1.904825737265416e-05, + "loss": 0.0006, + "step": 2359 + }, + { + "epoch": 6.24, + "learning_rate": 1.903485254691689e-05, + "loss": 0.0057, + "step": 2360 + }, + { + "epoch": 6.25, + "learning_rate": 1.9021447721179626e-05, + "loss": 0.0578, + "step": 2361 + }, + { + "epoch": 6.25, + "learning_rate": 1.900804289544236e-05, + "loss": 0.3295, + "step": 2362 + }, + { + "epoch": 6.25, + "learning_rate": 1.8994638069705096e-05, + "loss": 0.0005, + "step": 2363 + }, + { + "epoch": 6.25, + "learning_rate": 1.898123324396783e-05, + "loss": 0.0009, + "step": 2364 + }, + { + "epoch": 6.26, + "learning_rate": 1.8967828418230563e-05, + "loss": 0.0036, + "step": 2365 + }, + { + "epoch": 6.26, + "learning_rate": 1.8954423592493298e-05, + "loss": 0.0029, + "step": 2366 + }, + { + "epoch": 6.26, + "learning_rate": 1.8941018766756033e-05, + "loss": 0.1468, + "step": 2367 + }, + { + "epoch": 6.26, + "learning_rate": 1.8927613941018768e-05, + "loss": 0.3163, + "step": 2368 + }, + { + "epoch": 6.27, + "learning_rate": 1.8914209115281503e-05, + "loss": 0.169, + "step": 2369 + }, + { + "epoch": 6.27, + "learning_rate": 1.8900804289544235e-05, + "loss": 0.0493, + "step": 2370 + }, + { + "epoch": 6.27, + "learning_rate": 1.888739946380697e-05, + "loss": 0.0003, + "step": 2371 + }, + { + "epoch": 6.28, + "learning_rate": 1.8873994638069705e-05, + "loss": 0.0015, + "step": 2372 + }, + { + "epoch": 6.28, + "learning_rate": 1.886058981233244e-05, + "loss": 0.258, + "step": 2373 + }, + { + "epoch": 6.28, + "learning_rate": 1.8847184986595175e-05, + "loss": 0.002, + "step": 2374 + }, + { + "epoch": 6.28, + "learning_rate": 1.8833780160857907e-05, + "loss": 0.0008, + "step": 2375 + }, + { + "epoch": 6.29, + "learning_rate": 1.8820375335120642e-05, + "loss": 0.0003, + "step": 2376 + }, + { + "epoch": 6.29, + "learning_rate": 1.8806970509383377e-05, + "loss": 0.0014, + "step": 2377 + }, + { + "epoch": 6.29, + "learning_rate": 1.8793565683646113e-05, + "loss": 0.0068, + "step": 2378 + }, + { + "epoch": 6.29, + "learning_rate": 1.8780160857908848e-05, + "loss": 0.39, + "step": 2379 + }, + { + "epoch": 6.3, + "learning_rate": 1.8766756032171583e-05, + "loss": 0.0046, + "step": 2380 + }, + { + "epoch": 6.3, + "learning_rate": 1.8753351206434318e-05, + "loss": 0.0008, + "step": 2381 + }, + { + "epoch": 6.3, + "learning_rate": 1.8739946380697053e-05, + "loss": 0.0007, + "step": 2382 + }, + { + "epoch": 6.3, + "learning_rate": 1.8726541554959788e-05, + "loss": 0.0013, + "step": 2383 + }, + { + "epoch": 6.31, + "learning_rate": 1.8713136729222523e-05, + "loss": 0.0055, + "step": 2384 + }, + { + "epoch": 6.31, + "learning_rate": 1.869973190348526e-05, + "loss": 0.0014, + "step": 2385 + }, + { + "epoch": 6.31, + "learning_rate": 1.868632707774799e-05, + "loss": 0.0039, + "step": 2386 + }, + { + "epoch": 6.31, + "learning_rate": 1.8672922252010725e-05, + "loss": 0.0995, + "step": 2387 + }, + { + "epoch": 6.32, + "learning_rate": 1.865951742627346e-05, + "loss": 0.0015, + "step": 2388 + }, + { + "epoch": 6.32, + "learning_rate": 1.8646112600536196e-05, + "loss": 0.083, + "step": 2389 + }, + { + "epoch": 6.32, + "learning_rate": 1.863270777479893e-05, + "loss": 0.0044, + "step": 2390 + }, + { + "epoch": 6.33, + "learning_rate": 1.8619302949061662e-05, + "loss": 0.0105, + "step": 2391 + }, + { + "epoch": 6.33, + "learning_rate": 1.8605898123324398e-05, + "loss": 0.0005, + "step": 2392 + }, + { + "epoch": 6.33, + "learning_rate": 1.8592493297587133e-05, + "loss": 0.0014, + "step": 2393 + }, + { + "epoch": 6.33, + "learning_rate": 1.8579088471849868e-05, + "loss": 0.0114, + "step": 2394 + }, + { + "epoch": 6.34, + "learning_rate": 1.8565683646112603e-05, + "loss": 0.0334, + "step": 2395 + }, + { + "epoch": 6.34, + "learning_rate": 1.8552278820375335e-05, + "loss": 0.006, + "step": 2396 + }, + { + "epoch": 6.34, + "learning_rate": 1.853887399463807e-05, + "loss": 0.3124, + "step": 2397 + }, + { + "epoch": 6.34, + "learning_rate": 1.8525469168900805e-05, + "loss": 0.2324, + "step": 2398 + }, + { + "epoch": 6.35, + "learning_rate": 1.851206434316354e-05, + "loss": 0.0889, + "step": 2399 + }, + { + "epoch": 6.35, + "learning_rate": 1.8498659517426275e-05, + "loss": 0.0705, + "step": 2400 + }, + { + "epoch": 6.35, + "learning_rate": 1.8485254691689007e-05, + "loss": 0.0012, + "step": 2401 + }, + { + "epoch": 6.35, + "learning_rate": 1.8471849865951742e-05, + "loss": 0.0033, + "step": 2402 + }, + { + "epoch": 6.36, + "learning_rate": 1.8458445040214477e-05, + "loss": 0.0021, + "step": 2403 + }, + { + "epoch": 6.36, + "learning_rate": 1.8445040214477212e-05, + "loss": 0.0005, + "step": 2404 + }, + { + "epoch": 6.36, + "learning_rate": 1.8431635388739948e-05, + "loss": 0.0011, + "step": 2405 + }, + { + "epoch": 6.37, + "learning_rate": 1.841823056300268e-05, + "loss": 0.0006, + "step": 2406 + }, + { + "epoch": 6.37, + "learning_rate": 1.8404825737265414e-05, + "loss": 0.0726, + "step": 2407 + }, + { + "epoch": 6.37, + "learning_rate": 1.839142091152815e-05, + "loss": 0.0009, + "step": 2408 + }, + { + "epoch": 6.37, + "learning_rate": 1.8378016085790885e-05, + "loss": 0.0007, + "step": 2409 + }, + { + "epoch": 6.38, + "learning_rate": 1.836461126005362e-05, + "loss": 0.0859, + "step": 2410 + }, + { + "epoch": 6.38, + "learning_rate": 1.8351206434316355e-05, + "loss": 0.0011, + "step": 2411 + }, + { + "epoch": 6.38, + "learning_rate": 1.833780160857909e-05, + "loss": 0.6542, + "step": 2412 + }, + { + "epoch": 6.38, + "learning_rate": 1.8324396782841825e-05, + "loss": 0.2733, + "step": 2413 + }, + { + "epoch": 6.39, + "learning_rate": 1.831099195710456e-05, + "loss": 0.2825, + "step": 2414 + }, + { + "epoch": 6.39, + "learning_rate": 1.8297587131367295e-05, + "loss": 0.0012, + "step": 2415 + }, + { + "epoch": 6.39, + "learning_rate": 1.8284182305630027e-05, + "loss": 0.1404, + "step": 2416 + }, + { + "epoch": 6.39, + "learning_rate": 1.8270777479892762e-05, + "loss": 0.0006, + "step": 2417 + }, + { + "epoch": 6.4, + "learning_rate": 1.8257372654155497e-05, + "loss": 0.0007, + "step": 2418 + }, + { + "epoch": 6.4, + "learning_rate": 1.8243967828418233e-05, + "loss": 0.1429, + "step": 2419 + }, + { + "epoch": 6.4, + "learning_rate": 1.8230563002680968e-05, + "loss": 0.0008, + "step": 2420 + }, + { + "epoch": 6.4, + "learning_rate": 1.82171581769437e-05, + "loss": 0.0062, + "step": 2421 + }, + { + "epoch": 6.41, + "learning_rate": 1.8203753351206435e-05, + "loss": 0.0071, + "step": 2422 + }, + { + "epoch": 6.41, + "learning_rate": 1.819034852546917e-05, + "loss": 0.0017, + "step": 2423 + }, + { + "epoch": 6.41, + "learning_rate": 1.8176943699731905e-05, + "loss": 0.084, + "step": 2424 + }, + { + "epoch": 6.42, + "learning_rate": 1.816353887399464e-05, + "loss": 0.0011, + "step": 2425 + }, + { + "epoch": 6.42, + "learning_rate": 1.8150134048257372e-05, + "loss": 0.0255, + "step": 2426 + }, + { + "epoch": 6.42, + "learning_rate": 1.8136729222520107e-05, + "loss": 0.0009, + "step": 2427 + }, + { + "epoch": 6.42, + "learning_rate": 1.8123324396782842e-05, + "loss": 0.3105, + "step": 2428 + }, + { + "epoch": 6.43, + "learning_rate": 1.8109919571045577e-05, + "loss": 0.0046, + "step": 2429 + }, + { + "epoch": 6.43, + "learning_rate": 1.8096514745308312e-05, + "loss": 0.0089, + "step": 2430 + }, + { + "epoch": 6.43, + "learning_rate": 1.8083109919571044e-05, + "loss": 0.1176, + "step": 2431 + }, + { + "epoch": 6.43, + "learning_rate": 1.806970509383378e-05, + "loss": 0.0235, + "step": 2432 + }, + { + "epoch": 6.44, + "learning_rate": 1.8056300268096514e-05, + "loss": 0.029, + "step": 2433 + }, + { + "epoch": 6.44, + "learning_rate": 1.804289544235925e-05, + "loss": 0.0013, + "step": 2434 + }, + { + "epoch": 6.44, + "learning_rate": 1.8029490616621985e-05, + "loss": 0.0075, + "step": 2435 + }, + { + "epoch": 6.44, + "learning_rate": 1.801608579088472e-05, + "loss": 0.1744, + "step": 2436 + }, + { + "epoch": 6.45, + "learning_rate": 1.8002680965147455e-05, + "loss": 0.0017, + "step": 2437 + }, + { + "epoch": 6.45, + "learning_rate": 1.798927613941019e-05, + "loss": 0.0188, + "step": 2438 + }, + { + "epoch": 6.45, + "learning_rate": 1.7975871313672925e-05, + "loss": 0.0232, + "step": 2439 + }, + { + "epoch": 6.46, + "learning_rate": 1.796246648793566e-05, + "loss": 0.1459, + "step": 2440 + }, + { + "epoch": 6.46, + "learning_rate": 1.7949061662198392e-05, + "loss": 0.0007, + "step": 2441 + }, + { + "epoch": 6.46, + "learning_rate": 1.7935656836461127e-05, + "loss": 0.0005, + "step": 2442 + }, + { + "epoch": 6.46, + "learning_rate": 1.7922252010723862e-05, + "loss": 0.0012, + "step": 2443 + }, + { + "epoch": 6.47, + "learning_rate": 1.7908847184986597e-05, + "loss": 0.0041, + "step": 2444 + }, + { + "epoch": 6.47, + "learning_rate": 1.7895442359249332e-05, + "loss": 0.4884, + "step": 2445 + }, + { + "epoch": 6.47, + "learning_rate": 1.7882037533512068e-05, + "loss": 0.0017, + "step": 2446 + }, + { + "epoch": 6.47, + "learning_rate": 1.78686327077748e-05, + "loss": 0.0566, + "step": 2447 + }, + { + "epoch": 6.48, + "learning_rate": 1.7855227882037534e-05, + "loss": 0.012, + "step": 2448 + }, + { + "epoch": 6.48, + "learning_rate": 1.784182305630027e-05, + "loss": 0.001, + "step": 2449 + }, + { + "epoch": 6.48, + "learning_rate": 1.7828418230563005e-05, + "loss": 0.0028, + "step": 2450 + }, + { + "epoch": 6.48, + "learning_rate": 1.781501340482574e-05, + "loss": 0.4622, + "step": 2451 + }, + { + "epoch": 6.49, + "learning_rate": 1.780160857908847e-05, + "loss": 0.0042, + "step": 2452 + }, + { + "epoch": 6.49, + "learning_rate": 1.7788203753351207e-05, + "loss": 0.0176, + "step": 2453 + }, + { + "epoch": 6.49, + "learning_rate": 1.7774798927613942e-05, + "loss": 0.0012, + "step": 2454 + }, + { + "epoch": 6.49, + "learning_rate": 1.7761394101876677e-05, + "loss": 0.0344, + "step": 2455 + }, + { + "epoch": 6.5, + "learning_rate": 1.7747989276139412e-05, + "loss": 0.1278, + "step": 2456 + }, + { + "epoch": 6.5, + "learning_rate": 1.7734584450402144e-05, + "loss": 0.0017, + "step": 2457 + }, + { + "epoch": 6.5, + "learning_rate": 1.772117962466488e-05, + "loss": 0.0044, + "step": 2458 + }, + { + "epoch": 6.51, + "learning_rate": 1.7707774798927614e-05, + "loss": 0.0016, + "step": 2459 + }, + { + "epoch": 6.51, + "learning_rate": 1.769436997319035e-05, + "loss": 0.0799, + "step": 2460 + }, + { + "epoch": 6.51, + "learning_rate": 1.7680965147453084e-05, + "loss": 0.0066, + "step": 2461 + }, + { + "epoch": 6.51, + "learning_rate": 1.7667560321715816e-05, + "loss": 0.1607, + "step": 2462 + }, + { + "epoch": 6.52, + "learning_rate": 1.765415549597855e-05, + "loss": 0.0742, + "step": 2463 + }, + { + "epoch": 6.52, + "learning_rate": 1.7640750670241286e-05, + "loss": 0.0005, + "step": 2464 + }, + { + "epoch": 6.52, + "learning_rate": 1.762734584450402e-05, + "loss": 0.0006, + "step": 2465 + }, + { + "epoch": 6.52, + "learning_rate": 1.7613941018766757e-05, + "loss": 0.0019, + "step": 2466 + }, + { + "epoch": 6.53, + "learning_rate": 1.7600536193029492e-05, + "loss": 0.0009, + "step": 2467 + }, + { + "epoch": 6.53, + "learning_rate": 1.7587131367292227e-05, + "loss": 0.0023, + "step": 2468 + }, + { + "epoch": 6.53, + "learning_rate": 1.7573726541554962e-05, + "loss": 0.0202, + "step": 2469 + }, + { + "epoch": 6.53, + "learning_rate": 1.7560321715817697e-05, + "loss": 0.0223, + "step": 2470 + }, + { + "epoch": 6.54, + "learning_rate": 1.7546916890080432e-05, + "loss": 0.0009, + "step": 2471 + }, + { + "epoch": 6.54, + "learning_rate": 1.7533512064343164e-05, + "loss": 0.0014, + "step": 2472 + }, + { + "epoch": 6.54, + "learning_rate": 1.75201072386059e-05, + "loss": 0.0514, + "step": 2473 + }, + { + "epoch": 6.54, + "learning_rate": 1.7506702412868634e-05, + "loss": 0.0013, + "step": 2474 + }, + { + "epoch": 6.55, + "learning_rate": 1.749329758713137e-05, + "loss": 0.0087, + "step": 2475 + }, + { + "epoch": 6.55, + "learning_rate": 1.7479892761394105e-05, + "loss": 0.0035, + "step": 2476 + }, + { + "epoch": 6.55, + "learning_rate": 1.7466487935656836e-05, + "loss": 0.0397, + "step": 2477 + }, + { + "epoch": 6.56, + "learning_rate": 1.745308310991957e-05, + "loss": 0.0021, + "step": 2478 + }, + { + "epoch": 6.56, + "learning_rate": 1.7439678284182307e-05, + "loss": 0.052, + "step": 2479 + }, + { + "epoch": 6.56, + "learning_rate": 1.742627345844504e-05, + "loss": 0.0027, + "step": 2480 + }, + { + "epoch": 6.56, + "learning_rate": 1.7412868632707777e-05, + "loss": 0.001, + "step": 2481 + }, + { + "epoch": 6.57, + "learning_rate": 1.739946380697051e-05, + "loss": 0.2899, + "step": 2482 + }, + { + "epoch": 6.57, + "learning_rate": 1.7386058981233244e-05, + "loss": 0.0007, + "step": 2483 + }, + { + "epoch": 6.57, + "learning_rate": 1.737265415549598e-05, + "loss": 0.0704, + "step": 2484 + }, + { + "epoch": 6.57, + "learning_rate": 1.7359249329758714e-05, + "loss": 0.001, + "step": 2485 + }, + { + "epoch": 6.58, + "learning_rate": 1.734584450402145e-05, + "loss": 0.057, + "step": 2486 + }, + { + "epoch": 6.58, + "learning_rate": 1.733243967828418e-05, + "loss": 0.0002, + "step": 2487 + }, + { + "epoch": 6.58, + "learning_rate": 1.7319034852546916e-05, + "loss": 0.0064, + "step": 2488 + }, + { + "epoch": 6.58, + "learning_rate": 1.730563002680965e-05, + "loss": 0.0638, + "step": 2489 + }, + { + "epoch": 6.59, + "learning_rate": 1.7292225201072386e-05, + "loss": 0.0006, + "step": 2490 + }, + { + "epoch": 6.59, + "learning_rate": 1.727882037533512e-05, + "loss": 0.0142, + "step": 2491 + }, + { + "epoch": 6.59, + "learning_rate": 1.7265415549597856e-05, + "loss": 0.0015, + "step": 2492 + }, + { + "epoch": 6.6, + "learning_rate": 1.725201072386059e-05, + "loss": 0.0949, + "step": 2493 + }, + { + "epoch": 6.6, + "learning_rate": 1.7238605898123327e-05, + "loss": 0.0004, + "step": 2494 + }, + { + "epoch": 6.6, + "learning_rate": 1.7225201072386062e-05, + "loss": 0.0111, + "step": 2495 + }, + { + "epoch": 6.6, + "learning_rate": 1.7211796246648797e-05, + "loss": 0.0215, + "step": 2496 + }, + { + "epoch": 6.61, + "learning_rate": 1.719839142091153e-05, + "loss": 0.266, + "step": 2497 + }, + { + "epoch": 6.61, + "learning_rate": 1.7184986595174264e-05, + "loss": 0.4487, + "step": 2498 + }, + { + "epoch": 6.61, + "learning_rate": 1.7171581769437e-05, + "loss": 0.0021, + "step": 2499 + }, + { + "epoch": 6.61, + "learning_rate": 1.7158176943699734e-05, + "loss": 0.0004, + "step": 2500 + }, + { + "epoch": 6.62, + "learning_rate": 1.714477211796247e-05, + "loss": 0.0004, + "step": 2501 + }, + { + "epoch": 6.62, + "learning_rate": 1.71313672922252e-05, + "loss": 0.0011, + "step": 2502 + }, + { + "epoch": 6.62, + "learning_rate": 1.7117962466487936e-05, + "loss": 0.0006, + "step": 2503 + }, + { + "epoch": 6.62, + "learning_rate": 1.710455764075067e-05, + "loss": 0.1005, + "step": 2504 + }, + { + "epoch": 6.63, + "learning_rate": 1.7091152815013406e-05, + "loss": 0.0472, + "step": 2505 + }, + { + "epoch": 6.63, + "learning_rate": 1.707774798927614e-05, + "loss": 0.0004, + "step": 2506 + }, + { + "epoch": 6.63, + "learning_rate": 1.7064343163538877e-05, + "loss": 0.0162, + "step": 2507 + }, + { + "epoch": 6.63, + "learning_rate": 1.705093833780161e-05, + "loss": 0.004, + "step": 2508 + }, + { + "epoch": 6.64, + "learning_rate": 1.7037533512064344e-05, + "loss": 0.0007, + "step": 2509 + }, + { + "epoch": 6.64, + "learning_rate": 1.702412868632708e-05, + "loss": 0.1447, + "step": 2510 + }, + { + "epoch": 6.64, + "learning_rate": 1.7010723860589814e-05, + "loss": 0.0006, + "step": 2511 + }, + { + "epoch": 6.65, + "learning_rate": 1.699731903485255e-05, + "loss": 0.0002, + "step": 2512 + }, + { + "epoch": 6.65, + "learning_rate": 1.698391420911528e-05, + "loss": 0.0004, + "step": 2513 + }, + { + "epoch": 6.65, + "learning_rate": 1.6970509383378016e-05, + "loss": 0.0017, + "step": 2514 + }, + { + "epoch": 6.65, + "learning_rate": 1.695710455764075e-05, + "loss": 0.4581, + "step": 2515 + }, + { + "epoch": 6.66, + "learning_rate": 1.6943699731903486e-05, + "loss": 0.0005, + "step": 2516 + }, + { + "epoch": 6.66, + "learning_rate": 1.693029490616622e-05, + "loss": 0.0043, + "step": 2517 + }, + { + "epoch": 6.66, + "learning_rate": 1.6916890080428953e-05, + "loss": 0.0005, + "step": 2518 + }, + { + "epoch": 6.66, + "learning_rate": 1.6903485254691688e-05, + "loss": 0.0002, + "step": 2519 + }, + { + "epoch": 6.67, + "learning_rate": 1.6890080428954423e-05, + "loss": 0.0005, + "step": 2520 + }, + { + "epoch": 6.67, + "learning_rate": 1.687667560321716e-05, + "loss": 0.0037, + "step": 2521 + }, + { + "epoch": 6.67, + "learning_rate": 1.6863270777479893e-05, + "loss": 0.0003, + "step": 2522 + }, + { + "epoch": 6.67, + "learning_rate": 1.684986595174263e-05, + "loss": 0.0019, + "step": 2523 + }, + { + "epoch": 6.68, + "learning_rate": 1.6836461126005364e-05, + "loss": 0.0023, + "step": 2524 + }, + { + "epoch": 6.68, + "learning_rate": 1.68230563002681e-05, + "loss": 0.0004, + "step": 2525 + }, + { + "epoch": 6.68, + "learning_rate": 1.6809651474530834e-05, + "loss": 0.3317, + "step": 2526 + }, + { + "epoch": 6.69, + "learning_rate": 1.679624664879357e-05, + "loss": 0.0004, + "step": 2527 + }, + { + "epoch": 6.69, + "learning_rate": 1.67828418230563e-05, + "loss": 0.002, + "step": 2528 + }, + { + "epoch": 6.69, + "learning_rate": 1.6769436997319036e-05, + "loss": 0.0003, + "step": 2529 + }, + { + "epoch": 6.69, + "learning_rate": 1.675603217158177e-05, + "loss": 0.0007, + "step": 2530 + }, + { + "epoch": 6.7, + "learning_rate": 1.6742627345844506e-05, + "loss": 0.0012, + "step": 2531 + }, + { + "epoch": 6.7, + "learning_rate": 1.672922252010724e-05, + "loss": 0.024, + "step": 2532 + }, + { + "epoch": 6.7, + "learning_rate": 1.6715817694369973e-05, + "loss": 0.0041, + "step": 2533 + }, + { + "epoch": 6.7, + "learning_rate": 1.6702412868632708e-05, + "loss": 0.1821, + "step": 2534 + }, + { + "epoch": 6.71, + "learning_rate": 1.6689008042895443e-05, + "loss": 0.0004, + "step": 2535 + }, + { + "epoch": 6.71, + "learning_rate": 1.667560321715818e-05, + "loss": 0.0293, + "step": 2536 + }, + { + "epoch": 6.71, + "learning_rate": 1.6662198391420914e-05, + "loss": 0.0005, + "step": 2537 + }, + { + "epoch": 6.71, + "learning_rate": 1.6648793565683645e-05, + "loss": 0.0043, + "step": 2538 + }, + { + "epoch": 6.72, + "learning_rate": 1.663538873994638e-05, + "loss": 0.0112, + "step": 2539 + }, + { + "epoch": 6.72, + "learning_rate": 1.6621983914209116e-05, + "loss": 0.0003, + "step": 2540 + }, + { + "epoch": 6.72, + "learning_rate": 1.660857908847185e-05, + "loss": 0.0005, + "step": 2541 + }, + { + "epoch": 6.72, + "learning_rate": 1.6595174262734586e-05, + "loss": 0.4617, + "step": 2542 + }, + { + "epoch": 6.73, + "learning_rate": 1.6581769436997318e-05, + "loss": 0.0004, + "step": 2543 + }, + { + "epoch": 6.73, + "learning_rate": 1.6568364611260053e-05, + "loss": 0.1932, + "step": 2544 + }, + { + "epoch": 6.73, + "learning_rate": 1.6554959785522788e-05, + "loss": 0.0004, + "step": 2545 + }, + { + "epoch": 6.74, + "learning_rate": 1.6541554959785523e-05, + "loss": 0.0785, + "step": 2546 + }, + { + "epoch": 6.74, + "learning_rate": 1.6528150134048258e-05, + "loss": 0.0882, + "step": 2547 + }, + { + "epoch": 6.74, + "learning_rate": 1.651474530831099e-05, + "loss": 0.3937, + "step": 2548 + }, + { + "epoch": 6.74, + "learning_rate": 1.6501340482573725e-05, + "loss": 0.3401, + "step": 2549 + }, + { + "epoch": 6.75, + "learning_rate": 1.648793565683646e-05, + "loss": 0.026, + "step": 2550 + }, + { + "epoch": 6.75, + "learning_rate": 1.6474530831099195e-05, + "loss": 0.1959, + "step": 2551 + }, + { + "epoch": 6.75, + "learning_rate": 1.646112600536193e-05, + "loss": 0.0022, + "step": 2552 + }, + { + "epoch": 6.75, + "learning_rate": 1.6447721179624666e-05, + "loss": 0.0012, + "step": 2553 + }, + { + "epoch": 6.76, + "learning_rate": 1.64343163538874e-05, + "loss": 0.0064, + "step": 2554 + }, + { + "epoch": 6.76, + "learning_rate": 1.6420911528150136e-05, + "loss": 0.0105, + "step": 2555 + }, + { + "epoch": 6.76, + "learning_rate": 1.640750670241287e-05, + "loss": 0.0008, + "step": 2556 + }, + { + "epoch": 6.76, + "learning_rate": 1.6394101876675606e-05, + "loss": 0.0339, + "step": 2557 + }, + { + "epoch": 6.77, + "learning_rate": 1.6380697050938338e-05, + "loss": 0.1458, + "step": 2558 + }, + { + "epoch": 6.77, + "learning_rate": 1.6367292225201073e-05, + "loss": 0.2526, + "step": 2559 + }, + { + "epoch": 6.77, + "learning_rate": 1.6353887399463808e-05, + "loss": 0.038, + "step": 2560 + }, + { + "epoch": 6.78, + "learning_rate": 1.6340482573726543e-05, + "loss": 0.174, + "step": 2561 + }, + { + "epoch": 6.78, + "learning_rate": 1.632707774798928e-05, + "loss": 0.1936, + "step": 2562 + }, + { + "epoch": 6.78, + "learning_rate": 1.631367292225201e-05, + "loss": 0.0014, + "step": 2563 + }, + { + "epoch": 6.78, + "learning_rate": 1.6300268096514745e-05, + "loss": 0.0008, + "step": 2564 + }, + { + "epoch": 6.79, + "learning_rate": 1.628686327077748e-05, + "loss": 0.0616, + "step": 2565 + }, + { + "epoch": 6.79, + "learning_rate": 1.6273458445040215e-05, + "loss": 0.0029, + "step": 2566 + }, + { + "epoch": 6.79, + "learning_rate": 1.626005361930295e-05, + "loss": 0.1301, + "step": 2567 + }, + { + "epoch": 6.79, + "learning_rate": 1.6246648793565686e-05, + "loss": 0.0076, + "step": 2568 + }, + { + "epoch": 6.8, + "learning_rate": 1.6233243967828417e-05, + "loss": 0.038, + "step": 2569 + }, + { + "epoch": 6.8, + "learning_rate": 1.6219839142091153e-05, + "loss": 0.0376, + "step": 2570 + }, + { + "epoch": 6.8, + "learning_rate": 1.6206434316353888e-05, + "loss": 0.0007, + "step": 2571 + }, + { + "epoch": 6.8, + "learning_rate": 1.6193029490616623e-05, + "loss": 0.016, + "step": 2572 + }, + { + "epoch": 6.81, + "learning_rate": 1.6179624664879358e-05, + "loss": 0.0005, + "step": 2573 + }, + { + "epoch": 6.81, + "learning_rate": 1.616621983914209e-05, + "loss": 0.0024, + "step": 2574 + }, + { + "epoch": 6.81, + "learning_rate": 1.6152815013404825e-05, + "loss": 0.001, + "step": 2575 + }, + { + "epoch": 6.81, + "learning_rate": 1.613941018766756e-05, + "loss": 0.0009, + "step": 2576 + }, + { + "epoch": 6.82, + "learning_rate": 1.6126005361930295e-05, + "loss": 0.1889, + "step": 2577 + }, + { + "epoch": 6.82, + "learning_rate": 1.611260053619303e-05, + "loss": 0.5094, + "step": 2578 + }, + { + "epoch": 6.82, + "learning_rate": 1.6099195710455765e-05, + "loss": 0.017, + "step": 2579 + }, + { + "epoch": 6.83, + "learning_rate": 1.60857908847185e-05, + "loss": 0.029, + "step": 2580 + }, + { + "epoch": 6.83, + "learning_rate": 1.6072386058981236e-05, + "loss": 0.1249, + "step": 2581 + }, + { + "epoch": 6.83, + "learning_rate": 1.605898123324397e-05, + "loss": 0.2531, + "step": 2582 + }, + { + "epoch": 6.83, + "learning_rate": 1.6045576407506706e-05, + "loss": 0.0006, + "step": 2583 + }, + { + "epoch": 6.84, + "learning_rate": 1.6032171581769438e-05, + "loss": 0.0624, + "step": 2584 + }, + { + "epoch": 6.84, + "learning_rate": 1.6018766756032173e-05, + "loss": 0.0254, + "step": 2585 + }, + { + "epoch": 6.84, + "learning_rate": 1.6005361930294908e-05, + "loss": 0.0034, + "step": 2586 + }, + { + "epoch": 6.84, + "learning_rate": 1.5991957104557643e-05, + "loss": 0.0204, + "step": 2587 + }, + { + "epoch": 6.85, + "learning_rate": 1.5978552278820378e-05, + "loss": 0.002, + "step": 2588 + }, + { + "epoch": 6.85, + "learning_rate": 1.596514745308311e-05, + "loss": 0.0015, + "step": 2589 + }, + { + "epoch": 6.85, + "learning_rate": 1.5951742627345845e-05, + "loss": 0.0465, + "step": 2590 + }, + { + "epoch": 6.85, + "learning_rate": 1.593833780160858e-05, + "loss": 0.1892, + "step": 2591 + }, + { + "epoch": 6.86, + "learning_rate": 1.5924932975871315e-05, + "loss": 0.0932, + "step": 2592 + }, + { + "epoch": 6.86, + "learning_rate": 1.591152815013405e-05, + "loss": 0.0015, + "step": 2593 + }, + { + "epoch": 6.86, + "learning_rate": 1.5898123324396782e-05, + "loss": 0.0062, + "step": 2594 + }, + { + "epoch": 6.87, + "learning_rate": 1.5884718498659517e-05, + "loss": 0.0731, + "step": 2595 + }, + { + "epoch": 6.87, + "learning_rate": 1.5871313672922252e-05, + "loss": 0.002, + "step": 2596 + }, + { + "epoch": 6.87, + "learning_rate": 1.5857908847184988e-05, + "loss": 0.0484, + "step": 2597 + }, + { + "epoch": 6.87, + "learning_rate": 1.5844504021447723e-05, + "loss": 0.0082, + "step": 2598 + }, + { + "epoch": 6.88, + "learning_rate": 1.5831099195710454e-05, + "loss": 0.0213, + "step": 2599 + }, + { + "epoch": 6.88, + "learning_rate": 1.581769436997319e-05, + "loss": 0.1612, + "step": 2600 + }, + { + "epoch": 6.88, + "learning_rate": 1.5804289544235925e-05, + "loss": 0.184, + "step": 2601 + }, + { + "epoch": 6.88, + "learning_rate": 1.579088471849866e-05, + "loss": 0.1413, + "step": 2602 + }, + { + "epoch": 6.89, + "learning_rate": 1.5777479892761395e-05, + "loss": 0.0019, + "step": 2603 + }, + { + "epoch": 6.89, + "learning_rate": 1.5764075067024127e-05, + "loss": 0.0047, + "step": 2604 + }, + { + "epoch": 6.89, + "learning_rate": 1.5750670241286862e-05, + "loss": 0.0409, + "step": 2605 + }, + { + "epoch": 6.89, + "learning_rate": 1.5737265415549597e-05, + "loss": 0.0379, + "step": 2606 + }, + { + "epoch": 6.9, + "learning_rate": 1.5723860589812332e-05, + "loss": 0.0005, + "step": 2607 + }, + { + "epoch": 6.9, + "learning_rate": 1.5710455764075067e-05, + "loss": 0.0332, + "step": 2608 + }, + { + "epoch": 6.9, + "learning_rate": 1.5697050938337802e-05, + "loss": 0.0543, + "step": 2609 + }, + { + "epoch": 6.9, + "learning_rate": 1.5683646112600538e-05, + "loss": 0.0009, + "step": 2610 + }, + { + "epoch": 6.91, + "learning_rate": 1.5670241286863273e-05, + "loss": 0.016, + "step": 2611 + }, + { + "epoch": 6.91, + "learning_rate": 1.5656836461126008e-05, + "loss": 0.0035, + "step": 2612 + }, + { + "epoch": 6.91, + "learning_rate": 1.5643431635388743e-05, + "loss": 0.0713, + "step": 2613 + }, + { + "epoch": 6.92, + "learning_rate": 1.5630026809651475e-05, + "loss": 0.0022, + "step": 2614 + }, + { + "epoch": 6.92, + "learning_rate": 1.561662198391421e-05, + "loss": 0.0005, + "step": 2615 + }, + { + "epoch": 6.92, + "learning_rate": 1.5603217158176945e-05, + "loss": 0.0009, + "step": 2616 + }, + { + "epoch": 6.92, + "learning_rate": 1.558981233243968e-05, + "loss": 0.0016, + "step": 2617 + }, + { + "epoch": 6.93, + "learning_rate": 1.5576407506702415e-05, + "loss": 0.0017, + "step": 2618 + }, + { + "epoch": 6.93, + "learning_rate": 1.5563002680965147e-05, + "loss": 0.0094, + "step": 2619 + }, + { + "epoch": 6.93, + "learning_rate": 1.5549597855227882e-05, + "loss": 0.016, + "step": 2620 + }, + { + "epoch": 6.93, + "learning_rate": 1.5536193029490617e-05, + "loss": 0.0005, + "step": 2621 + }, + { + "epoch": 6.94, + "learning_rate": 1.5522788203753352e-05, + "loss": 0.0549, + "step": 2622 + }, + { + "epoch": 6.94, + "learning_rate": 1.5509383378016087e-05, + "loss": 0.3791, + "step": 2623 + }, + { + "epoch": 6.94, + "learning_rate": 1.549597855227882e-05, + "loss": 0.0003, + "step": 2624 + }, + { + "epoch": 6.94, + "learning_rate": 1.5482573726541554e-05, + "loss": 0.0774, + "step": 2625 + }, + { + "epoch": 6.95, + "learning_rate": 1.546916890080429e-05, + "loss": 0.0879, + "step": 2626 + }, + { + "epoch": 6.95, + "learning_rate": 1.5455764075067025e-05, + "loss": 0.0007, + "step": 2627 + }, + { + "epoch": 6.95, + "learning_rate": 1.544235924932976e-05, + "loss": 0.0047, + "step": 2628 + }, + { + "epoch": 6.96, + "learning_rate": 1.542895442359249e-05, + "loss": 0.0011, + "step": 2629 + }, + { + "epoch": 6.96, + "learning_rate": 1.5415549597855227e-05, + "loss": 0.0004, + "step": 2630 + }, + { + "epoch": 6.96, + "learning_rate": 1.5402144772117962e-05, + "loss": 0.4962, + "step": 2631 + }, + { + "epoch": 6.96, + "learning_rate": 1.5388739946380697e-05, + "loss": 0.1182, + "step": 2632 + }, + { + "epoch": 6.97, + "learning_rate": 1.5375335120643432e-05, + "loss": 0.0269, + "step": 2633 + }, + { + "epoch": 6.97, + "learning_rate": 1.5361930294906167e-05, + "loss": 0.0157, + "step": 2634 + }, + { + "epoch": 6.97, + "learning_rate": 1.5348525469168902e-05, + "loss": 0.0022, + "step": 2635 + }, + { + "epoch": 6.97, + "learning_rate": 1.5335120643431637e-05, + "loss": 0.3299, + "step": 2636 + }, + { + "epoch": 6.98, + "learning_rate": 1.5321715817694372e-05, + "loss": 0.0529, + "step": 2637 + }, + { + "epoch": 6.98, + "learning_rate": 1.5308310991957108e-05, + "loss": 0.1396, + "step": 2638 + }, + { + "epoch": 6.98, + "learning_rate": 1.5294906166219843e-05, + "loss": 0.0008, + "step": 2639 + }, + { + "epoch": 6.98, + "learning_rate": 1.5281501340482574e-05, + "loss": 0.0086, + "step": 2640 + }, + { + "epoch": 6.99, + "learning_rate": 1.526809651474531e-05, + "loss": 0.0036, + "step": 2641 + }, + { + "epoch": 6.99, + "learning_rate": 1.5254691689008043e-05, + "loss": 0.0149, + "step": 2642 + }, + { + "epoch": 6.99, + "learning_rate": 1.5241286863270778e-05, + "loss": 0.0011, + "step": 2643 + }, + { + "epoch": 6.99, + "learning_rate": 1.5227882037533513e-05, + "loss": 0.0003, + "step": 2644 + }, + { + "epoch": 7.0, + "learning_rate": 1.5214477211796247e-05, + "loss": 0.0064, + "step": 2645 + }, + { + "epoch": 7.0, + "learning_rate": 1.5201072386058982e-05, + "loss": 0.0281, + "step": 2646 + }, + { + "epoch": 7.0, + "eval_f1": 0.7856000000000002, + "eval_loss": 1.1071351766586304, + "eval_runtime": 1.8613, + "eval_samples_per_second": 812.89, + "eval_steps_per_second": 51.041, + "step": 2646 + } + ], + "max_steps": 3780, + "num_train_epochs": 10, + "total_flos": 678005402777280.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2646/training_args.bin b/checkpoint-2646/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e04ed002938f760694506615e2c2b7be439a9c1 --- /dev/null +++ b/checkpoint-2646/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c01d8e107b4a20c0ba0f3692dae4e25d8f1dffe1d23d6e4f4bdf92b87ab5ea +size 3899 diff --git a/checkpoint-3024/config.json b/checkpoint-3024/config.json new file mode 100644 index 0000000000000000000000000000000000000000..364156e83c34ba8c6fcc66e875a05b1d1a9b4821 --- /dev/null +++ b/checkpoint-3024/config.json @@ -0,0 +1,30 @@ +{ + "_name_or_path": "distilbert-base-cased", + "activation": "gelu", + "architectures": [ + "DistilBertForSequenceClassification" + ], + "attention_dropout": 0.1, + "dim": 768, + "dropout": 0.1, + "hidden_dim": 3072, + "id2label": { + "0": "NO DISASTER", + "1": "DISASTER" + }, + "initializer_range": 0.02, + "max_position_embeddings": 512, + "model_type": "distilbert", + "n_heads": 12, + "n_layers": 6, + "output_past": true, + "pad_token_id": 0, + "problem_type": "single_label_classification", + "qa_dropout": 0.1, + "seq_classif_dropout": 0.2, + "sinusoidal_pos_embds": false, + "tie_weights_": true, + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "vocab_size": 28996 +} diff --git a/checkpoint-3024/optimizer.pt b/checkpoint-3024/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..226ce6c12b976df15271e4f341882b983ee0b3b1 --- /dev/null +++ b/checkpoint-3024/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4adf5995dfff43bbcf0456a8150fb2939f3bf8c44115a1f81d62b513c5962006 +size 526325317 diff --git a/checkpoint-3024/pytorch_model.bin b/checkpoint-3024/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8622c9af84c7c31b4bd544523c2fbc916e3e6b8 --- /dev/null +++ b/checkpoint-3024/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d502e804f64e2acd194387aa5fd310131d70f8506192f89f513436dc2de8916 +size 263167661 diff --git a/checkpoint-3024/rng_state.pth b/checkpoint-3024/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..66ba450a703e32aa2c68dea1a073c9ddf633830b --- /dev/null +++ b/checkpoint-3024/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab62b0bebbe356352a4069309bea70f50837588122439261bc3e8a0e6ce05c23 +size 14575 diff --git a/checkpoint-3024/scheduler.pt b/checkpoint-3024/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3bd49da2d7ab4538e5a4a020bd90b2df81b1a947 --- /dev/null +++ b/checkpoint-3024/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9daf155d8896ce2ce99e58ebee9b511a9716e2308a91ca6e9dfd99c08653734 +size 627 diff --git a/checkpoint-3024/trainer_state.json b/checkpoint-3024/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d1236db0402d1891af625d808a09dbe31db0016f --- /dev/null +++ b/checkpoint-3024/trainer_state.json @@ -0,0 +1,18232 @@ +{ + "best_metric": 0.40209120512008667, + "best_model_checkpoint": "./disaster-tweet-distilbert-classification/checkpoint-378", + "epoch": 8.0, + "global_step": 3024, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.7503, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.7789, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.7344, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 0.7709, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 5e-06, + "loss": 0.6884, + "step": 5 + }, + { + "epoch": 0.02, + "learning_rate": 6e-06, + "loss": 0.7087, + "step": 6 + }, + { + "epoch": 0.02, + "learning_rate": 7.000000000000001e-06, + "loss": 0.6655, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6978, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 9e-06, + "loss": 0.7435, + "step": 9 + }, + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 0.719, + "step": 10 + }, + { + "epoch": 0.03, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.7129, + "step": 11 + }, + { + "epoch": 0.03, + "learning_rate": 1.2e-05, + "loss": 0.7249, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.7436, + "step": 13 + }, + { + "epoch": 0.04, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.6886, + "step": 14 + }, + { + "epoch": 0.04, + "learning_rate": 1.5e-05, + "loss": 0.702, + "step": 15 + }, + { + "epoch": 0.04, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7105, + "step": 16 + }, + { + "epoch": 0.04, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.6709, + "step": 17 + }, + { + "epoch": 0.05, + "learning_rate": 1.8e-05, + "loss": 0.6767, + "step": 18 + }, + { + "epoch": 0.05, + "learning_rate": 1.9e-05, + "loss": 0.6784, + "step": 19 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 0.7016, + "step": 20 + }, + { + "epoch": 0.06, + "learning_rate": 2.1e-05, + "loss": 0.6308, + "step": 21 + }, + { + "epoch": 0.06, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.645, + "step": 22 + }, + { + "epoch": 0.06, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.6845, + "step": 23 + }, + { + "epoch": 0.06, + "learning_rate": 2.4e-05, + "loss": 0.6891, + "step": 24 + }, + { + "epoch": 0.07, + "learning_rate": 2.5e-05, + "loss": 0.5914, + "step": 25 + }, + { + "epoch": 0.07, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.6916, + "step": 26 + }, + { + "epoch": 0.07, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.5884, + "step": 27 + }, + { + "epoch": 0.07, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6587, + "step": 28 + }, + { + "epoch": 0.08, + "learning_rate": 2.9e-05, + "loss": 0.6008, + "step": 29 + }, + { + "epoch": 0.08, + "learning_rate": 3e-05, + "loss": 0.6717, + "step": 30 + }, + { + "epoch": 0.08, + "learning_rate": 3.1e-05, + "loss": 0.5795, + "step": 31 + }, + { + "epoch": 0.08, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6358, + "step": 32 + }, + { + "epoch": 0.09, + "learning_rate": 3.3e-05, + "loss": 0.7508, + "step": 33 + }, + { + "epoch": 0.09, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.6601, + "step": 34 + }, + { + "epoch": 0.09, + "learning_rate": 3.5e-05, + "loss": 0.6573, + "step": 35 + }, + { + "epoch": 0.1, + "learning_rate": 3.6e-05, + "loss": 0.5695, + "step": 36 + }, + { + "epoch": 0.1, + "learning_rate": 3.7e-05, + "loss": 0.5535, + "step": 37 + }, + { + "epoch": 0.1, + "learning_rate": 3.8e-05, + "loss": 0.5813, + "step": 38 + }, + { + "epoch": 0.1, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.5224, + "step": 39 + }, + { + "epoch": 0.11, + "learning_rate": 4e-05, + "loss": 0.4757, + "step": 40 + }, + { + "epoch": 0.11, + "learning_rate": 4.1e-05, + "loss": 0.5529, + "step": 41 + }, + { + "epoch": 0.11, + "learning_rate": 4.2e-05, + "loss": 0.4964, + "step": 42 + }, + { + "epoch": 0.11, + "learning_rate": 4.3e-05, + "loss": 0.4565, + "step": 43 + }, + { + "epoch": 0.12, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.5848, + "step": 44 + }, + { + "epoch": 0.12, + "learning_rate": 4.5e-05, + "loss": 0.7333, + "step": 45 + }, + { + "epoch": 0.12, + "learning_rate": 4.600000000000001e-05, + "loss": 0.5224, + "step": 46 + }, + { + "epoch": 0.12, + "learning_rate": 4.7e-05, + "loss": 0.4826, + "step": 47 + }, + { + "epoch": 0.13, + "learning_rate": 4.8e-05, + "loss": 0.4328, + "step": 48 + }, + { + "epoch": 0.13, + "learning_rate": 4.9e-05, + "loss": 0.2546, + "step": 49 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.2636, + "step": 50 + }, + { + "epoch": 0.13, + "learning_rate": 4.998659517426274e-05, + "loss": 0.4116, + "step": 51 + }, + { + "epoch": 0.14, + "learning_rate": 4.997319034852547e-05, + "loss": 0.3191, + "step": 52 + }, + { + "epoch": 0.14, + "learning_rate": 4.995978552278821e-05, + "loss": 0.2822, + "step": 53 + }, + { + "epoch": 0.14, + "learning_rate": 4.994638069705094e-05, + "loss": 0.8821, + "step": 54 + }, + { + "epoch": 0.15, + "learning_rate": 4.993297587131368e-05, + "loss": 0.483, + "step": 55 + }, + { + "epoch": 0.15, + "learning_rate": 4.9919571045576406e-05, + "loss": 0.7164, + "step": 56 + }, + { + "epoch": 0.15, + "learning_rate": 4.990616621983915e-05, + "loss": 0.4161, + "step": 57 + }, + { + "epoch": 0.15, + "learning_rate": 4.989276139410188e-05, + "loss": 0.2668, + "step": 58 + }, + { + "epoch": 0.16, + "learning_rate": 4.987935656836462e-05, + "loss": 0.5255, + "step": 59 + }, + { + "epoch": 0.16, + "learning_rate": 4.986595174262735e-05, + "loss": 0.3784, + "step": 60 + }, + { + "epoch": 0.16, + "learning_rate": 4.985254691689008e-05, + "loss": 0.5065, + "step": 61 + }, + { + "epoch": 0.16, + "learning_rate": 4.983914209115282e-05, + "loss": 0.1988, + "step": 62 + }, + { + "epoch": 0.17, + "learning_rate": 4.982573726541555e-05, + "loss": 0.4362, + "step": 63 + }, + { + "epoch": 0.17, + "learning_rate": 4.981233243967829e-05, + "loss": 0.6619, + "step": 64 + }, + { + "epoch": 0.17, + "learning_rate": 4.979892761394102e-05, + "loss": 0.3217, + "step": 65 + }, + { + "epoch": 0.17, + "learning_rate": 4.978552278820375e-05, + "loss": 0.2967, + "step": 66 + }, + { + "epoch": 0.18, + "learning_rate": 4.977211796246649e-05, + "loss": 0.2429, + "step": 67 + }, + { + "epoch": 0.18, + "learning_rate": 4.975871313672922e-05, + "loss": 0.6642, + "step": 68 + }, + { + "epoch": 0.18, + "learning_rate": 4.974530831099196e-05, + "loss": 0.56, + "step": 69 + }, + { + "epoch": 0.19, + "learning_rate": 4.973190348525469e-05, + "loss": 1.2979, + "step": 70 + }, + { + "epoch": 0.19, + "learning_rate": 4.9718498659517427e-05, + "loss": 0.5287, + "step": 71 + }, + { + "epoch": 0.19, + "learning_rate": 4.970509383378016e-05, + "loss": 0.4684, + "step": 72 + }, + { + "epoch": 0.19, + "learning_rate": 4.96916890080429e-05, + "loss": 0.283, + "step": 73 + }, + { + "epoch": 0.2, + "learning_rate": 4.967828418230563e-05, + "loss": 0.6818, + "step": 74 + }, + { + "epoch": 0.2, + "learning_rate": 4.966487935656837e-05, + "loss": 0.6141, + "step": 75 + }, + { + "epoch": 0.2, + "learning_rate": 4.96514745308311e-05, + "loss": 0.5046, + "step": 76 + }, + { + "epoch": 0.2, + "learning_rate": 4.963806970509384e-05, + "loss": 0.5266, + "step": 77 + }, + { + "epoch": 0.21, + "learning_rate": 4.962466487935657e-05, + "loss": 0.5944, + "step": 78 + }, + { + "epoch": 0.21, + "learning_rate": 4.961126005361931e-05, + "loss": 0.5631, + "step": 79 + }, + { + "epoch": 0.21, + "learning_rate": 4.959785522788204e-05, + "loss": 0.4791, + "step": 80 + }, + { + "epoch": 0.21, + "learning_rate": 4.958445040214477e-05, + "loss": 0.5645, + "step": 81 + }, + { + "epoch": 0.22, + "learning_rate": 4.957104557640751e-05, + "loss": 0.4349, + "step": 82 + }, + { + "epoch": 0.22, + "learning_rate": 4.955764075067024e-05, + "loss": 0.3865, + "step": 83 + }, + { + "epoch": 0.22, + "learning_rate": 4.954423592493298e-05, + "loss": 0.486, + "step": 84 + }, + { + "epoch": 0.22, + "learning_rate": 4.953083109919571e-05, + "loss": 0.2179, + "step": 85 + }, + { + "epoch": 0.23, + "learning_rate": 4.951742627345845e-05, + "loss": 0.3896, + "step": 86 + }, + { + "epoch": 0.23, + "learning_rate": 4.950402144772118e-05, + "loss": 0.4247, + "step": 87 + }, + { + "epoch": 0.23, + "learning_rate": 4.949061662198392e-05, + "loss": 0.4906, + "step": 88 + }, + { + "epoch": 0.24, + "learning_rate": 4.947721179624665e-05, + "loss": 0.4483, + "step": 89 + }, + { + "epoch": 0.24, + "learning_rate": 4.946380697050939e-05, + "loss": 0.557, + "step": 90 + }, + { + "epoch": 0.24, + "learning_rate": 4.9450402144772116e-05, + "loss": 0.7521, + "step": 91 + }, + { + "epoch": 0.24, + "learning_rate": 4.943699731903486e-05, + "loss": 0.3103, + "step": 92 + }, + { + "epoch": 0.25, + "learning_rate": 4.9423592493297586e-05, + "loss": 0.757, + "step": 93 + }, + { + "epoch": 0.25, + "learning_rate": 4.941018766756033e-05, + "loss": 0.8248, + "step": 94 + }, + { + "epoch": 0.25, + "learning_rate": 4.9396782841823056e-05, + "loss": 0.4591, + "step": 95 + }, + { + "epoch": 0.25, + "learning_rate": 4.938337801608579e-05, + "loss": 0.3912, + "step": 96 + }, + { + "epoch": 0.26, + "learning_rate": 4.9369973190348526e-05, + "loss": 0.5289, + "step": 97 + }, + { + "epoch": 0.26, + "learning_rate": 4.935656836461126e-05, + "loss": 0.3264, + "step": 98 + }, + { + "epoch": 0.26, + "learning_rate": 4.9343163538874e-05, + "loss": 0.2947, + "step": 99 + }, + { + "epoch": 0.26, + "learning_rate": 4.932975871313673e-05, + "loss": 0.2647, + "step": 100 + }, + { + "epoch": 0.27, + "learning_rate": 4.931635388739946e-05, + "loss": 0.3691, + "step": 101 + }, + { + "epoch": 0.27, + "learning_rate": 4.93029490616622e-05, + "loss": 0.4796, + "step": 102 + }, + { + "epoch": 0.27, + "learning_rate": 4.928954423592493e-05, + "loss": 0.4827, + "step": 103 + }, + { + "epoch": 0.28, + "learning_rate": 4.927613941018767e-05, + "loss": 0.2672, + "step": 104 + }, + { + "epoch": 0.28, + "learning_rate": 4.92627345844504e-05, + "loss": 0.7456, + "step": 105 + }, + { + "epoch": 0.28, + "learning_rate": 4.9249329758713136e-05, + "loss": 0.5206, + "step": 106 + }, + { + "epoch": 0.28, + "learning_rate": 4.923592493297587e-05, + "loss": 0.3576, + "step": 107 + }, + { + "epoch": 0.29, + "learning_rate": 4.9222520107238606e-05, + "loss": 0.2596, + "step": 108 + }, + { + "epoch": 0.29, + "learning_rate": 4.920911528150134e-05, + "loss": 0.4115, + "step": 109 + }, + { + "epoch": 0.29, + "learning_rate": 4.9195710455764076e-05, + "loss": 0.3481, + "step": 110 + }, + { + "epoch": 0.29, + "learning_rate": 4.918230563002681e-05, + "loss": 0.4387, + "step": 111 + }, + { + "epoch": 0.3, + "learning_rate": 4.916890080428955e-05, + "loss": 0.5023, + "step": 112 + }, + { + "epoch": 0.3, + "learning_rate": 4.915549597855228e-05, + "loss": 0.5916, + "step": 113 + }, + { + "epoch": 0.3, + "learning_rate": 4.914209115281502e-05, + "loss": 0.5467, + "step": 114 + }, + { + "epoch": 0.3, + "learning_rate": 4.912868632707775e-05, + "loss": 0.5631, + "step": 115 + }, + { + "epoch": 0.31, + "learning_rate": 4.911528150134049e-05, + "loss": 0.5512, + "step": 116 + }, + { + "epoch": 0.31, + "learning_rate": 4.910187667560322e-05, + "loss": 0.5546, + "step": 117 + }, + { + "epoch": 0.31, + "learning_rate": 4.908847184986595e-05, + "loss": 0.4209, + "step": 118 + }, + { + "epoch": 0.31, + "learning_rate": 4.907506702412869e-05, + "loss": 0.6064, + "step": 119 + }, + { + "epoch": 0.32, + "learning_rate": 4.906166219839142e-05, + "loss": 0.5301, + "step": 120 + }, + { + "epoch": 0.32, + "learning_rate": 4.904825737265416e-05, + "loss": 0.436, + "step": 121 + }, + { + "epoch": 0.32, + "learning_rate": 4.903485254691689e-05, + "loss": 0.726, + "step": 122 + }, + { + "epoch": 0.33, + "learning_rate": 4.9021447721179626e-05, + "loss": 0.5288, + "step": 123 + }, + { + "epoch": 0.33, + "learning_rate": 4.900804289544236e-05, + "loss": 0.4887, + "step": 124 + }, + { + "epoch": 0.33, + "learning_rate": 4.8994638069705097e-05, + "loss": 0.3752, + "step": 125 + }, + { + "epoch": 0.33, + "learning_rate": 4.898123324396783e-05, + "loss": 0.4895, + "step": 126 + }, + { + "epoch": 0.34, + "learning_rate": 4.896782841823057e-05, + "loss": 0.5046, + "step": 127 + }, + { + "epoch": 0.34, + "learning_rate": 4.8954423592493295e-05, + "loss": 0.3953, + "step": 128 + }, + { + "epoch": 0.34, + "learning_rate": 4.894101876675604e-05, + "loss": 0.2015, + "step": 129 + }, + { + "epoch": 0.34, + "learning_rate": 4.8927613941018765e-05, + "loss": 0.5165, + "step": 130 + }, + { + "epoch": 0.35, + "learning_rate": 4.891420911528151e-05, + "loss": 0.4237, + "step": 131 + }, + { + "epoch": 0.35, + "learning_rate": 4.8900804289544236e-05, + "loss": 0.239, + "step": 132 + }, + { + "epoch": 0.35, + "learning_rate": 4.888739946380697e-05, + "loss": 0.5515, + "step": 133 + }, + { + "epoch": 0.35, + "learning_rate": 4.8873994638069706e-05, + "loss": 0.303, + "step": 134 + }, + { + "epoch": 0.36, + "learning_rate": 4.886058981233244e-05, + "loss": 0.2867, + "step": 135 + }, + { + "epoch": 0.36, + "learning_rate": 4.8847184986595176e-05, + "loss": 0.6756, + "step": 136 + }, + { + "epoch": 0.36, + "learning_rate": 4.883378016085791e-05, + "loss": 0.4996, + "step": 137 + }, + { + "epoch": 0.37, + "learning_rate": 4.8820375335120646e-05, + "loss": 0.2798, + "step": 138 + }, + { + "epoch": 0.37, + "learning_rate": 4.880697050938338e-05, + "loss": 0.8877, + "step": 139 + }, + { + "epoch": 0.37, + "learning_rate": 4.879356568364612e-05, + "loss": 0.5022, + "step": 140 + }, + { + "epoch": 0.37, + "learning_rate": 4.878016085790885e-05, + "loss": 0.2177, + "step": 141 + }, + { + "epoch": 0.38, + "learning_rate": 4.876675603217159e-05, + "loss": 0.8708, + "step": 142 + }, + { + "epoch": 0.38, + "learning_rate": 4.8753351206434315e-05, + "loss": 0.719, + "step": 143 + }, + { + "epoch": 0.38, + "learning_rate": 4.873994638069706e-05, + "loss": 0.562, + "step": 144 + }, + { + "epoch": 0.38, + "learning_rate": 4.8726541554959786e-05, + "loss": 0.492, + "step": 145 + }, + { + "epoch": 0.39, + "learning_rate": 4.871313672922253e-05, + "loss": 0.4637, + "step": 146 + }, + { + "epoch": 0.39, + "learning_rate": 4.8699731903485256e-05, + "loss": 0.4132, + "step": 147 + }, + { + "epoch": 0.39, + "learning_rate": 4.868632707774799e-05, + "loss": 0.2889, + "step": 148 + }, + { + "epoch": 0.39, + "learning_rate": 4.8672922252010726e-05, + "loss": 0.2213, + "step": 149 + }, + { + "epoch": 0.4, + "learning_rate": 4.865951742627346e-05, + "loss": 0.9268, + "step": 150 + }, + { + "epoch": 0.4, + "learning_rate": 4.8646112600536196e-05, + "loss": 0.2852, + "step": 151 + }, + { + "epoch": 0.4, + "learning_rate": 4.863270777479893e-05, + "loss": 0.4599, + "step": 152 + }, + { + "epoch": 0.4, + "learning_rate": 4.861930294906166e-05, + "loss": 0.1913, + "step": 153 + }, + { + "epoch": 0.41, + "learning_rate": 4.86058981233244e-05, + "loss": 0.4488, + "step": 154 + }, + { + "epoch": 0.41, + "learning_rate": 4.859249329758713e-05, + "loss": 0.9022, + "step": 155 + }, + { + "epoch": 0.41, + "learning_rate": 4.857908847184987e-05, + "loss": 0.5221, + "step": 156 + }, + { + "epoch": 0.42, + "learning_rate": 4.85656836461126e-05, + "loss": 0.2394, + "step": 157 + }, + { + "epoch": 0.42, + "learning_rate": 4.8552278820375336e-05, + "loss": 0.3332, + "step": 158 + }, + { + "epoch": 0.42, + "learning_rate": 4.853887399463807e-05, + "loss": 0.4015, + "step": 159 + }, + { + "epoch": 0.42, + "learning_rate": 4.8525469168900806e-05, + "loss": 0.4461, + "step": 160 + }, + { + "epoch": 0.43, + "learning_rate": 4.851206434316354e-05, + "loss": 0.337, + "step": 161 + }, + { + "epoch": 0.43, + "learning_rate": 4.8498659517426276e-05, + "loss": 0.4908, + "step": 162 + }, + { + "epoch": 0.43, + "learning_rate": 4.848525469168901e-05, + "loss": 0.526, + "step": 163 + }, + { + "epoch": 0.43, + "learning_rate": 4.8471849865951746e-05, + "loss": 0.5262, + "step": 164 + }, + { + "epoch": 0.44, + "learning_rate": 4.845844504021448e-05, + "loss": 0.6818, + "step": 165 + }, + { + "epoch": 0.44, + "learning_rate": 4.8445040214477217e-05, + "loss": 0.3154, + "step": 166 + }, + { + "epoch": 0.44, + "learning_rate": 4.843163538873995e-05, + "loss": 0.5963, + "step": 167 + }, + { + "epoch": 0.44, + "learning_rate": 4.841823056300268e-05, + "loss": 0.4451, + "step": 168 + }, + { + "epoch": 0.45, + "learning_rate": 4.840482573726542e-05, + "loss": 0.5969, + "step": 169 + }, + { + "epoch": 0.45, + "learning_rate": 4.839142091152815e-05, + "loss": 0.438, + "step": 170 + }, + { + "epoch": 0.45, + "learning_rate": 4.837801608579089e-05, + "loss": 0.4827, + "step": 171 + }, + { + "epoch": 0.46, + "learning_rate": 4.836461126005362e-05, + "loss": 0.2029, + "step": 172 + }, + { + "epoch": 0.46, + "learning_rate": 4.8351206434316356e-05, + "loss": 0.5195, + "step": 173 + }, + { + "epoch": 0.46, + "learning_rate": 4.833780160857909e-05, + "loss": 0.517, + "step": 174 + }, + { + "epoch": 0.46, + "learning_rate": 4.8324396782841826e-05, + "loss": 0.5532, + "step": 175 + }, + { + "epoch": 0.47, + "learning_rate": 4.831099195710456e-05, + "loss": 0.4198, + "step": 176 + }, + { + "epoch": 0.47, + "learning_rate": 4.8297587131367296e-05, + "loss": 0.8386, + "step": 177 + }, + { + "epoch": 0.47, + "learning_rate": 4.8284182305630025e-05, + "loss": 0.575, + "step": 178 + }, + { + "epoch": 0.47, + "learning_rate": 4.8270777479892766e-05, + "loss": 0.6156, + "step": 179 + }, + { + "epoch": 0.48, + "learning_rate": 4.8257372654155495e-05, + "loss": 0.7044, + "step": 180 + }, + { + "epoch": 0.48, + "learning_rate": 4.824396782841824e-05, + "loss": 0.5712, + "step": 181 + }, + { + "epoch": 0.48, + "learning_rate": 4.8230563002680965e-05, + "loss": 0.34, + "step": 182 + }, + { + "epoch": 0.48, + "learning_rate": 4.82171581769437e-05, + "loss": 0.5773, + "step": 183 + }, + { + "epoch": 0.49, + "learning_rate": 4.8203753351206435e-05, + "loss": 0.546, + "step": 184 + }, + { + "epoch": 0.49, + "learning_rate": 4.819034852546917e-05, + "loss": 0.3955, + "step": 185 + }, + { + "epoch": 0.49, + "learning_rate": 4.8176943699731906e-05, + "loss": 0.5921, + "step": 186 + }, + { + "epoch": 0.49, + "learning_rate": 4.816353887399464e-05, + "loss": 0.3108, + "step": 187 + }, + { + "epoch": 0.5, + "learning_rate": 4.8150134048257376e-05, + "loss": 0.5469, + "step": 188 + }, + { + "epoch": 0.5, + "learning_rate": 4.813672922252011e-05, + "loss": 0.64, + "step": 189 + }, + { + "epoch": 0.5, + "learning_rate": 4.8123324396782846e-05, + "loss": 0.5153, + "step": 190 + }, + { + "epoch": 0.51, + "learning_rate": 4.810991957104558e-05, + "loss": 0.4719, + "step": 191 + }, + { + "epoch": 0.51, + "learning_rate": 4.8096514745308316e-05, + "loss": 0.52, + "step": 192 + }, + { + "epoch": 0.51, + "learning_rate": 4.8083109919571045e-05, + "loss": 0.5114, + "step": 193 + }, + { + "epoch": 0.51, + "learning_rate": 4.806970509383379e-05, + "loss": 0.5469, + "step": 194 + }, + { + "epoch": 0.52, + "learning_rate": 4.8056300268096515e-05, + "loss": 0.3435, + "step": 195 + }, + { + "epoch": 0.52, + "learning_rate": 4.804289544235926e-05, + "loss": 0.6469, + "step": 196 + }, + { + "epoch": 0.52, + "learning_rate": 4.8029490616621985e-05, + "loss": 0.6595, + "step": 197 + }, + { + "epoch": 0.52, + "learning_rate": 4.801608579088472e-05, + "loss": 0.5503, + "step": 198 + }, + { + "epoch": 0.53, + "learning_rate": 4.8002680965147456e-05, + "loss": 0.3799, + "step": 199 + }, + { + "epoch": 0.53, + "learning_rate": 4.798927613941019e-05, + "loss": 0.417, + "step": 200 + }, + { + "epoch": 0.53, + "learning_rate": 4.7975871313672926e-05, + "loss": 0.5281, + "step": 201 + }, + { + "epoch": 0.53, + "learning_rate": 4.796246648793566e-05, + "loss": 0.3439, + "step": 202 + }, + { + "epoch": 0.54, + "learning_rate": 4.794906166219839e-05, + "loss": 0.5777, + "step": 203 + }, + { + "epoch": 0.54, + "learning_rate": 4.793565683646113e-05, + "loss": 0.5286, + "step": 204 + }, + { + "epoch": 0.54, + "learning_rate": 4.792225201072386e-05, + "loss": 0.4302, + "step": 205 + }, + { + "epoch": 0.54, + "learning_rate": 4.79088471849866e-05, + "loss": 0.5413, + "step": 206 + }, + { + "epoch": 0.55, + "learning_rate": 4.789544235924933e-05, + "loss": 0.3087, + "step": 207 + }, + { + "epoch": 0.55, + "learning_rate": 4.7882037533512065e-05, + "loss": 0.4385, + "step": 208 + }, + { + "epoch": 0.55, + "learning_rate": 4.78686327077748e-05, + "loss": 0.7137, + "step": 209 + }, + { + "epoch": 0.56, + "learning_rate": 4.7855227882037535e-05, + "loss": 0.6278, + "step": 210 + }, + { + "epoch": 0.56, + "learning_rate": 4.784182305630027e-05, + "loss": 0.2832, + "step": 211 + }, + { + "epoch": 0.56, + "learning_rate": 4.7828418230563005e-05, + "loss": 0.5899, + "step": 212 + }, + { + "epoch": 0.56, + "learning_rate": 4.7815013404825734e-05, + "loss": 0.4421, + "step": 213 + }, + { + "epoch": 0.57, + "learning_rate": 4.7801608579088476e-05, + "loss": 0.3411, + "step": 214 + }, + { + "epoch": 0.57, + "learning_rate": 4.7788203753351204e-05, + "loss": 0.4236, + "step": 215 + }, + { + "epoch": 0.57, + "learning_rate": 4.7774798927613946e-05, + "loss": 0.4444, + "step": 216 + }, + { + "epoch": 0.57, + "learning_rate": 4.7761394101876674e-05, + "loss": 0.4765, + "step": 217 + }, + { + "epoch": 0.58, + "learning_rate": 4.774798927613941e-05, + "loss": 0.2453, + "step": 218 + }, + { + "epoch": 0.58, + "learning_rate": 4.7734584450402145e-05, + "loss": 0.2765, + "step": 219 + }, + { + "epoch": 0.58, + "learning_rate": 4.772117962466488e-05, + "loss": 0.2075, + "step": 220 + }, + { + "epoch": 0.58, + "learning_rate": 4.7707774798927615e-05, + "loss": 0.5905, + "step": 221 + }, + { + "epoch": 0.59, + "learning_rate": 4.769436997319035e-05, + "loss": 0.3457, + "step": 222 + }, + { + "epoch": 0.59, + "learning_rate": 4.7680965147453085e-05, + "loss": 0.5986, + "step": 223 + }, + { + "epoch": 0.59, + "learning_rate": 4.766756032171582e-05, + "loss": 0.3881, + "step": 224 + }, + { + "epoch": 0.6, + "learning_rate": 4.7654155495978555e-05, + "loss": 0.5655, + "step": 225 + }, + { + "epoch": 0.6, + "learning_rate": 4.764075067024129e-05, + "loss": 0.6156, + "step": 226 + }, + { + "epoch": 0.6, + "learning_rate": 4.7627345844504026e-05, + "loss": 0.4784, + "step": 227 + }, + { + "epoch": 0.6, + "learning_rate": 4.7613941018766754e-05, + "loss": 0.5195, + "step": 228 + }, + { + "epoch": 0.61, + "learning_rate": 4.7600536193029496e-05, + "loss": 0.66, + "step": 229 + }, + { + "epoch": 0.61, + "learning_rate": 4.7587131367292224e-05, + "loss": 0.4104, + "step": 230 + }, + { + "epoch": 0.61, + "learning_rate": 4.7573726541554966e-05, + "loss": 0.3478, + "step": 231 + }, + { + "epoch": 0.61, + "learning_rate": 4.7560321715817695e-05, + "loss": 0.2037, + "step": 232 + }, + { + "epoch": 0.62, + "learning_rate": 4.754691689008043e-05, + "loss": 0.75, + "step": 233 + }, + { + "epoch": 0.62, + "learning_rate": 4.7533512064343165e-05, + "loss": 0.4237, + "step": 234 + }, + { + "epoch": 0.62, + "learning_rate": 4.75201072386059e-05, + "loss": 0.2372, + "step": 235 + }, + { + "epoch": 0.62, + "learning_rate": 4.7506702412868635e-05, + "loss": 0.7874, + "step": 236 + }, + { + "epoch": 0.63, + "learning_rate": 4.749329758713137e-05, + "loss": 0.5751, + "step": 237 + }, + { + "epoch": 0.63, + "learning_rate": 4.7479892761394105e-05, + "loss": 0.5801, + "step": 238 + }, + { + "epoch": 0.63, + "learning_rate": 4.746648793565684e-05, + "loss": 0.4983, + "step": 239 + }, + { + "epoch": 0.63, + "learning_rate": 4.745308310991957e-05, + "loss": 0.4215, + "step": 240 + }, + { + "epoch": 0.64, + "learning_rate": 4.743967828418231e-05, + "loss": 0.3655, + "step": 241 + }, + { + "epoch": 0.64, + "learning_rate": 4.742627345844504e-05, + "loss": 0.523, + "step": 242 + }, + { + "epoch": 0.64, + "learning_rate": 4.741286863270778e-05, + "loss": 0.5952, + "step": 243 + }, + { + "epoch": 0.65, + "learning_rate": 4.739946380697051e-05, + "loss": 0.4226, + "step": 244 + }, + { + "epoch": 0.65, + "learning_rate": 4.7386058981233244e-05, + "loss": 0.316, + "step": 245 + }, + { + "epoch": 0.65, + "learning_rate": 4.737265415549598e-05, + "loss": 0.5096, + "step": 246 + }, + { + "epoch": 0.65, + "learning_rate": 4.7359249329758715e-05, + "loss": 0.435, + "step": 247 + }, + { + "epoch": 0.66, + "learning_rate": 4.734584450402145e-05, + "loss": 0.7516, + "step": 248 + }, + { + "epoch": 0.66, + "learning_rate": 4.7332439678284185e-05, + "loss": 0.531, + "step": 249 + }, + { + "epoch": 0.66, + "learning_rate": 4.731903485254692e-05, + "loss": 0.4096, + "step": 250 + }, + { + "epoch": 0.66, + "learning_rate": 4.7305630026809655e-05, + "loss": 0.387, + "step": 251 + }, + { + "epoch": 0.67, + "learning_rate": 4.729222520107239e-05, + "loss": 0.5468, + "step": 252 + }, + { + "epoch": 0.67, + "learning_rate": 4.7278820375335125e-05, + "loss": 0.4613, + "step": 253 + }, + { + "epoch": 0.67, + "learning_rate": 4.726541554959786e-05, + "loss": 0.8437, + "step": 254 + }, + { + "epoch": 0.67, + "learning_rate": 4.725201072386059e-05, + "loss": 0.522, + "step": 255 + }, + { + "epoch": 0.68, + "learning_rate": 4.723860589812333e-05, + "loss": 0.3922, + "step": 256 + }, + { + "epoch": 0.68, + "learning_rate": 4.722520107238606e-05, + "loss": 0.5114, + "step": 257 + }, + { + "epoch": 0.68, + "learning_rate": 4.72117962466488e-05, + "loss": 0.6148, + "step": 258 + }, + { + "epoch": 0.69, + "learning_rate": 4.719839142091153e-05, + "loss": 0.4578, + "step": 259 + }, + { + "epoch": 0.69, + "learning_rate": 4.7184986595174265e-05, + "loss": 0.6286, + "step": 260 + }, + { + "epoch": 0.69, + "learning_rate": 4.7171581769437e-05, + "loss": 0.5883, + "step": 261 + }, + { + "epoch": 0.69, + "learning_rate": 4.7158176943699735e-05, + "loss": 0.5634, + "step": 262 + }, + { + "epoch": 0.7, + "learning_rate": 4.714477211796247e-05, + "loss": 0.4085, + "step": 263 + }, + { + "epoch": 0.7, + "learning_rate": 4.7131367292225205e-05, + "loss": 0.2988, + "step": 264 + }, + { + "epoch": 0.7, + "learning_rate": 4.7117962466487934e-05, + "loss": 0.6353, + "step": 265 + }, + { + "epoch": 0.7, + "learning_rate": 4.7104557640750675e-05, + "loss": 0.4598, + "step": 266 + }, + { + "epoch": 0.71, + "learning_rate": 4.7091152815013404e-05, + "loss": 0.5072, + "step": 267 + }, + { + "epoch": 0.71, + "learning_rate": 4.7077747989276146e-05, + "loss": 0.49, + "step": 268 + }, + { + "epoch": 0.71, + "learning_rate": 4.7064343163538874e-05, + "loss": 0.7225, + "step": 269 + }, + { + "epoch": 0.71, + "learning_rate": 4.705093833780161e-05, + "loss": 0.5332, + "step": 270 + }, + { + "epoch": 0.72, + "learning_rate": 4.7037533512064344e-05, + "loss": 0.6064, + "step": 271 + }, + { + "epoch": 0.72, + "learning_rate": 4.702412868632708e-05, + "loss": 0.3518, + "step": 272 + }, + { + "epoch": 0.72, + "learning_rate": 4.7010723860589815e-05, + "loss": 0.3673, + "step": 273 + }, + { + "epoch": 0.72, + "learning_rate": 4.699731903485255e-05, + "loss": 0.4688, + "step": 274 + }, + { + "epoch": 0.73, + "learning_rate": 4.6983914209115285e-05, + "loss": 0.5389, + "step": 275 + }, + { + "epoch": 0.73, + "learning_rate": 4.697050938337802e-05, + "loss": 0.374, + "step": 276 + }, + { + "epoch": 0.73, + "learning_rate": 4.6957104557640755e-05, + "loss": 0.475, + "step": 277 + }, + { + "epoch": 0.74, + "learning_rate": 4.694369973190349e-05, + "loss": 0.5397, + "step": 278 + }, + { + "epoch": 0.74, + "learning_rate": 4.6930294906166225e-05, + "loss": 0.3821, + "step": 279 + }, + { + "epoch": 0.74, + "learning_rate": 4.6916890080428954e-05, + "loss": 0.3372, + "step": 280 + }, + { + "epoch": 0.74, + "learning_rate": 4.6903485254691696e-05, + "loss": 0.6652, + "step": 281 + }, + { + "epoch": 0.75, + "learning_rate": 4.6890080428954424e-05, + "loss": 0.2894, + "step": 282 + }, + { + "epoch": 0.75, + "learning_rate": 4.6876675603217166e-05, + "loss": 0.5639, + "step": 283 + }, + { + "epoch": 0.75, + "learning_rate": 4.6863270777479894e-05, + "loss": 0.353, + "step": 284 + }, + { + "epoch": 0.75, + "learning_rate": 4.684986595174263e-05, + "loss": 0.2932, + "step": 285 + }, + { + "epoch": 0.76, + "learning_rate": 4.6836461126005364e-05, + "loss": 0.467, + "step": 286 + }, + { + "epoch": 0.76, + "learning_rate": 4.68230563002681e-05, + "loss": 0.4732, + "step": 287 + }, + { + "epoch": 0.76, + "learning_rate": 4.6809651474530835e-05, + "loss": 0.1808, + "step": 288 + }, + { + "epoch": 0.76, + "learning_rate": 4.679624664879357e-05, + "loss": 0.6031, + "step": 289 + }, + { + "epoch": 0.77, + "learning_rate": 4.67828418230563e-05, + "loss": 0.2555, + "step": 290 + }, + { + "epoch": 0.77, + "learning_rate": 4.676943699731904e-05, + "loss": 0.4041, + "step": 291 + }, + { + "epoch": 0.77, + "learning_rate": 4.675603217158177e-05, + "loss": 0.7822, + "step": 292 + }, + { + "epoch": 0.78, + "learning_rate": 4.674262734584451e-05, + "loss": 0.138, + "step": 293 + }, + { + "epoch": 0.78, + "learning_rate": 4.672922252010724e-05, + "loss": 0.2746, + "step": 294 + }, + { + "epoch": 0.78, + "learning_rate": 4.6715817694369974e-05, + "loss": 0.5835, + "step": 295 + }, + { + "epoch": 0.78, + "learning_rate": 4.670241286863271e-05, + "loss": 0.2367, + "step": 296 + }, + { + "epoch": 0.79, + "learning_rate": 4.6689008042895444e-05, + "loss": 0.3247, + "step": 297 + }, + { + "epoch": 0.79, + "learning_rate": 4.667560321715818e-05, + "loss": 0.306, + "step": 298 + }, + { + "epoch": 0.79, + "learning_rate": 4.6662198391420914e-05, + "loss": 0.2825, + "step": 299 + }, + { + "epoch": 0.79, + "learning_rate": 4.664879356568364e-05, + "loss": 0.6102, + "step": 300 + }, + { + "epoch": 0.8, + "learning_rate": 4.6635388739946385e-05, + "loss": 0.3613, + "step": 301 + }, + { + "epoch": 0.8, + "learning_rate": 4.662198391420911e-05, + "loss": 0.6327, + "step": 302 + }, + { + "epoch": 0.8, + "learning_rate": 4.6608579088471855e-05, + "loss": 0.4059, + "step": 303 + }, + { + "epoch": 0.8, + "learning_rate": 4.659517426273458e-05, + "loss": 0.4027, + "step": 304 + }, + { + "epoch": 0.81, + "learning_rate": 4.658176943699732e-05, + "loss": 0.9133, + "step": 305 + }, + { + "epoch": 0.81, + "learning_rate": 4.6568364611260054e-05, + "loss": 0.1869, + "step": 306 + }, + { + "epoch": 0.81, + "learning_rate": 4.655495978552279e-05, + "loss": 0.3987, + "step": 307 + }, + { + "epoch": 0.81, + "learning_rate": 4.6541554959785524e-05, + "loss": 0.6114, + "step": 308 + }, + { + "epoch": 0.82, + "learning_rate": 4.652815013404826e-05, + "loss": 0.4406, + "step": 309 + }, + { + "epoch": 0.82, + "learning_rate": 4.6514745308310994e-05, + "loss": 0.2954, + "step": 310 + }, + { + "epoch": 0.82, + "learning_rate": 4.650134048257373e-05, + "loss": 0.5067, + "step": 311 + }, + { + "epoch": 0.83, + "learning_rate": 4.6487935656836464e-05, + "loss": 0.3985, + "step": 312 + }, + { + "epoch": 0.83, + "learning_rate": 4.64745308310992e-05, + "loss": 0.3756, + "step": 313 + }, + { + "epoch": 0.83, + "learning_rate": 4.6461126005361935e-05, + "loss": 0.2618, + "step": 314 + }, + { + "epoch": 0.83, + "learning_rate": 4.644772117962466e-05, + "loss": 0.4992, + "step": 315 + }, + { + "epoch": 0.84, + "learning_rate": 4.6434316353887405e-05, + "loss": 0.8224, + "step": 316 + }, + { + "epoch": 0.84, + "learning_rate": 4.642091152815013e-05, + "loss": 0.3425, + "step": 317 + }, + { + "epoch": 0.84, + "learning_rate": 4.6407506702412875e-05, + "loss": 0.4062, + "step": 318 + }, + { + "epoch": 0.84, + "learning_rate": 4.6394101876675603e-05, + "loss": 0.4748, + "step": 319 + }, + { + "epoch": 0.85, + "learning_rate": 4.638069705093834e-05, + "loss": 0.6857, + "step": 320 + }, + { + "epoch": 0.85, + "learning_rate": 4.6367292225201074e-05, + "loss": 0.5368, + "step": 321 + }, + { + "epoch": 0.85, + "learning_rate": 4.635388739946381e-05, + "loss": 0.5571, + "step": 322 + }, + { + "epoch": 0.85, + "learning_rate": 4.6340482573726544e-05, + "loss": 0.3045, + "step": 323 + }, + { + "epoch": 0.86, + "learning_rate": 4.632707774798928e-05, + "loss": 0.3189, + "step": 324 + }, + { + "epoch": 0.86, + "learning_rate": 4.631367292225201e-05, + "loss": 0.3033, + "step": 325 + }, + { + "epoch": 0.86, + "learning_rate": 4.630026809651475e-05, + "loss": 0.4659, + "step": 326 + }, + { + "epoch": 0.87, + "learning_rate": 4.628686327077748e-05, + "loss": 0.4058, + "step": 327 + }, + { + "epoch": 0.87, + "learning_rate": 4.627345844504022e-05, + "loss": 0.6745, + "step": 328 + }, + { + "epoch": 0.87, + "learning_rate": 4.626005361930295e-05, + "loss": 0.3259, + "step": 329 + }, + { + "epoch": 0.87, + "learning_rate": 4.624664879356568e-05, + "loss": 0.5126, + "step": 330 + }, + { + "epoch": 0.88, + "learning_rate": 4.623324396782842e-05, + "loss": 0.2759, + "step": 331 + }, + { + "epoch": 0.88, + "learning_rate": 4.621983914209115e-05, + "loss": 0.2512, + "step": 332 + }, + { + "epoch": 0.88, + "learning_rate": 4.620643431635389e-05, + "loss": 0.3046, + "step": 333 + }, + { + "epoch": 0.88, + "learning_rate": 4.6193029490616624e-05, + "loss": 0.3931, + "step": 334 + }, + { + "epoch": 0.89, + "learning_rate": 4.617962466487936e-05, + "loss": 0.4838, + "step": 335 + }, + { + "epoch": 0.89, + "learning_rate": 4.6166219839142094e-05, + "loss": 0.2925, + "step": 336 + }, + { + "epoch": 0.89, + "learning_rate": 4.615281501340483e-05, + "loss": 0.4481, + "step": 337 + }, + { + "epoch": 0.89, + "learning_rate": 4.6139410187667564e-05, + "loss": 0.4528, + "step": 338 + }, + { + "epoch": 0.9, + "learning_rate": 4.61260053619303e-05, + "loss": 0.2934, + "step": 339 + }, + { + "epoch": 0.9, + "learning_rate": 4.611260053619303e-05, + "loss": 0.609, + "step": 340 + }, + { + "epoch": 0.9, + "learning_rate": 4.609919571045577e-05, + "loss": 0.8988, + "step": 341 + }, + { + "epoch": 0.9, + "learning_rate": 4.60857908847185e-05, + "loss": 1.1222, + "step": 342 + }, + { + "epoch": 0.91, + "learning_rate": 4.607238605898124e-05, + "loss": 0.3265, + "step": 343 + }, + { + "epoch": 0.91, + "learning_rate": 4.605898123324397e-05, + "loss": 0.4722, + "step": 344 + }, + { + "epoch": 0.91, + "learning_rate": 4.60455764075067e-05, + "loss": 0.2791, + "step": 345 + }, + { + "epoch": 0.92, + "learning_rate": 4.603217158176944e-05, + "loss": 0.4183, + "step": 346 + }, + { + "epoch": 0.92, + "learning_rate": 4.6018766756032174e-05, + "loss": 0.5323, + "step": 347 + }, + { + "epoch": 0.92, + "learning_rate": 4.600536193029491e-05, + "loss": 0.6108, + "step": 348 + }, + { + "epoch": 0.92, + "learning_rate": 4.5991957104557644e-05, + "loss": 0.2875, + "step": 349 + }, + { + "epoch": 0.93, + "learning_rate": 4.597855227882037e-05, + "loss": 0.6642, + "step": 350 + }, + { + "epoch": 0.93, + "learning_rate": 4.5965147453083114e-05, + "loss": 0.5244, + "step": 351 + }, + { + "epoch": 0.93, + "learning_rate": 4.595174262734584e-05, + "loss": 0.5562, + "step": 352 + }, + { + "epoch": 0.93, + "learning_rate": 4.5938337801608584e-05, + "loss": 0.3972, + "step": 353 + }, + { + "epoch": 0.94, + "learning_rate": 4.592493297587131e-05, + "loss": 0.4532, + "step": 354 + }, + { + "epoch": 0.94, + "learning_rate": 4.591152815013405e-05, + "loss": 0.3368, + "step": 355 + }, + { + "epoch": 0.94, + "learning_rate": 4.589812332439678e-05, + "loss": 0.171, + "step": 356 + }, + { + "epoch": 0.94, + "learning_rate": 4.588471849865952e-05, + "loss": 0.4036, + "step": 357 + }, + { + "epoch": 0.95, + "learning_rate": 4.587131367292225e-05, + "loss": 0.4305, + "step": 358 + }, + { + "epoch": 0.95, + "learning_rate": 4.585790884718499e-05, + "loss": 0.2643, + "step": 359 + }, + { + "epoch": 0.95, + "learning_rate": 4.5844504021447723e-05, + "loss": 0.3782, + "step": 360 + }, + { + "epoch": 0.96, + "learning_rate": 4.583109919571046e-05, + "loss": 0.1673, + "step": 361 + }, + { + "epoch": 0.96, + "learning_rate": 4.5817694369973194e-05, + "loss": 0.3969, + "step": 362 + }, + { + "epoch": 0.96, + "learning_rate": 4.580428954423593e-05, + "loss": 0.3249, + "step": 363 + }, + { + "epoch": 0.96, + "learning_rate": 4.5790884718498664e-05, + "loss": 0.1656, + "step": 364 + }, + { + "epoch": 0.97, + "learning_rate": 4.57774798927614e-05, + "loss": 0.4551, + "step": 365 + }, + { + "epoch": 0.97, + "learning_rate": 4.5764075067024134e-05, + "loss": 0.6075, + "step": 366 + }, + { + "epoch": 0.97, + "learning_rate": 4.575067024128686e-05, + "loss": 0.4699, + "step": 367 + }, + { + "epoch": 0.97, + "learning_rate": 4.5737265415549605e-05, + "loss": 0.5752, + "step": 368 + }, + { + "epoch": 0.98, + "learning_rate": 4.572386058981233e-05, + "loss": 0.3114, + "step": 369 + }, + { + "epoch": 0.98, + "learning_rate": 4.5710455764075075e-05, + "loss": 0.7407, + "step": 370 + }, + { + "epoch": 0.98, + "learning_rate": 4.56970509383378e-05, + "loss": 0.6427, + "step": 371 + }, + { + "epoch": 0.98, + "learning_rate": 4.568364611260054e-05, + "loss": 0.5021, + "step": 372 + }, + { + "epoch": 0.99, + "learning_rate": 4.5670241286863273e-05, + "loss": 0.4209, + "step": 373 + }, + { + "epoch": 0.99, + "learning_rate": 4.565683646112601e-05, + "loss": 0.5957, + "step": 374 + }, + { + "epoch": 0.99, + "learning_rate": 4.5643431635388744e-05, + "loss": 0.495, + "step": 375 + }, + { + "epoch": 0.99, + "learning_rate": 4.563002680965148e-05, + "loss": 0.3101, + "step": 376 + }, + { + "epoch": 1.0, + "learning_rate": 4.561662198391421e-05, + "loss": 0.472, + "step": 377 + }, + { + "epoch": 1.0, + "learning_rate": 4.560321715817695e-05, + "loss": 0.4607, + "step": 378 + }, + { + "epoch": 1.0, + "eval_f1": 0.7914963205233032, + "eval_loss": 0.40209120512008667, + "eval_runtime": 2.1585, + "eval_samples_per_second": 700.949, + "eval_steps_per_second": 44.012, + "step": 378 + }, + { + "epoch": 1.0, + "learning_rate": 4.558981233243968e-05, + "loss": 0.2422, + "step": 379 + }, + { + "epoch": 1.01, + "learning_rate": 4.557640750670242e-05, + "loss": 0.2074, + "step": 380 + }, + { + "epoch": 1.01, + "learning_rate": 4.556300268096515e-05, + "loss": 0.4469, + "step": 381 + }, + { + "epoch": 1.01, + "learning_rate": 4.554959785522788e-05, + "loss": 0.3121, + "step": 382 + }, + { + "epoch": 1.01, + "learning_rate": 4.553619302949062e-05, + "loss": 0.3634, + "step": 383 + }, + { + "epoch": 1.02, + "learning_rate": 4.552278820375335e-05, + "loss": 0.2139, + "step": 384 + }, + { + "epoch": 1.02, + "learning_rate": 4.550938337801609e-05, + "loss": 0.5273, + "step": 385 + }, + { + "epoch": 1.02, + "learning_rate": 4.549597855227882e-05, + "loss": 0.1228, + "step": 386 + }, + { + "epoch": 1.02, + "learning_rate": 4.548257372654156e-05, + "loss": 0.2459, + "step": 387 + }, + { + "epoch": 1.03, + "learning_rate": 4.5469168900804294e-05, + "loss": 0.443, + "step": 388 + }, + { + "epoch": 1.03, + "learning_rate": 4.545576407506703e-05, + "loss": 0.364, + "step": 389 + }, + { + "epoch": 1.03, + "learning_rate": 4.5442359249329764e-05, + "loss": 0.4071, + "step": 390 + }, + { + "epoch": 1.03, + "learning_rate": 4.54289544235925e-05, + "loss": 0.3876, + "step": 391 + }, + { + "epoch": 1.04, + "learning_rate": 4.541554959785523e-05, + "loss": 0.5239, + "step": 392 + }, + { + "epoch": 1.04, + "learning_rate": 4.540214477211797e-05, + "loss": 0.4548, + "step": 393 + }, + { + "epoch": 1.04, + "learning_rate": 4.53887399463807e-05, + "loss": 0.3588, + "step": 394 + }, + { + "epoch": 1.04, + "learning_rate": 4.537533512064344e-05, + "loss": 0.2693, + "step": 395 + }, + { + "epoch": 1.05, + "learning_rate": 4.536193029490617e-05, + "loss": 0.3773, + "step": 396 + }, + { + "epoch": 1.05, + "learning_rate": 4.53485254691689e-05, + "loss": 0.3982, + "step": 397 + }, + { + "epoch": 1.05, + "learning_rate": 4.533512064343164e-05, + "loss": 0.2074, + "step": 398 + }, + { + "epoch": 1.06, + "learning_rate": 4.532171581769437e-05, + "loss": 0.3322, + "step": 399 + }, + { + "epoch": 1.06, + "learning_rate": 4.530831099195711e-05, + "loss": 0.118, + "step": 400 + }, + { + "epoch": 1.06, + "learning_rate": 4.5294906166219844e-05, + "loss": 0.6216, + "step": 401 + }, + { + "epoch": 1.06, + "learning_rate": 4.528150134048257e-05, + "loss": 0.4028, + "step": 402 + }, + { + "epoch": 1.07, + "learning_rate": 4.5268096514745314e-05, + "loss": 0.3179, + "step": 403 + }, + { + "epoch": 1.07, + "learning_rate": 4.525469168900804e-05, + "loss": 0.2815, + "step": 404 + }, + { + "epoch": 1.07, + "learning_rate": 4.5241286863270784e-05, + "loss": 0.2366, + "step": 405 + }, + { + "epoch": 1.07, + "learning_rate": 4.522788203753351e-05, + "loss": 0.14, + "step": 406 + }, + { + "epoch": 1.08, + "learning_rate": 4.521447721179625e-05, + "loss": 0.1255, + "step": 407 + }, + { + "epoch": 1.08, + "learning_rate": 4.520107238605898e-05, + "loss": 0.1482, + "step": 408 + }, + { + "epoch": 1.08, + "learning_rate": 4.518766756032172e-05, + "loss": 0.665, + "step": 409 + }, + { + "epoch": 1.08, + "learning_rate": 4.517426273458445e-05, + "loss": 0.2535, + "step": 410 + }, + { + "epoch": 1.09, + "learning_rate": 4.516085790884719e-05, + "loss": 0.1716, + "step": 411 + }, + { + "epoch": 1.09, + "learning_rate": 4.5147453083109916e-05, + "loss": 0.0765, + "step": 412 + }, + { + "epoch": 1.09, + "learning_rate": 4.513404825737266e-05, + "loss": 0.1238, + "step": 413 + }, + { + "epoch": 1.1, + "learning_rate": 4.512064343163539e-05, + "loss": 0.3877, + "step": 414 + }, + { + "epoch": 1.1, + "learning_rate": 4.510723860589813e-05, + "loss": 0.206, + "step": 415 + }, + { + "epoch": 1.1, + "learning_rate": 4.509383378016086e-05, + "loss": 0.1822, + "step": 416 + }, + { + "epoch": 1.1, + "learning_rate": 4.508042895442359e-05, + "loss": 0.2294, + "step": 417 + }, + { + "epoch": 1.11, + "learning_rate": 4.506702412868633e-05, + "loss": 0.7083, + "step": 418 + }, + { + "epoch": 1.11, + "learning_rate": 4.505361930294906e-05, + "loss": 0.325, + "step": 419 + }, + { + "epoch": 1.11, + "learning_rate": 4.50402144772118e-05, + "loss": 0.2378, + "step": 420 + }, + { + "epoch": 1.11, + "learning_rate": 4.502680965147453e-05, + "loss": 0.2494, + "step": 421 + }, + { + "epoch": 1.12, + "learning_rate": 4.501340482573727e-05, + "loss": 0.3812, + "step": 422 + }, + { + "epoch": 1.12, + "learning_rate": 4.5e-05, + "loss": 0.1491, + "step": 423 + }, + { + "epoch": 1.12, + "learning_rate": 4.498659517426274e-05, + "loss": 0.0833, + "step": 424 + }, + { + "epoch": 1.12, + "learning_rate": 4.497319034852547e-05, + "loss": 0.4637, + "step": 425 + }, + { + "epoch": 1.13, + "learning_rate": 4.495978552278821e-05, + "loss": 0.2594, + "step": 426 + }, + { + "epoch": 1.13, + "learning_rate": 4.4946380697050937e-05, + "loss": 0.4136, + "step": 427 + }, + { + "epoch": 1.13, + "learning_rate": 4.493297587131368e-05, + "loss": 0.4962, + "step": 428 + }, + { + "epoch": 1.13, + "learning_rate": 4.491957104557641e-05, + "loss": 0.0842, + "step": 429 + }, + { + "epoch": 1.14, + "learning_rate": 4.490616621983915e-05, + "loss": 0.4761, + "step": 430 + }, + { + "epoch": 1.14, + "learning_rate": 4.489276139410188e-05, + "loss": 0.4351, + "step": 431 + }, + { + "epoch": 1.14, + "learning_rate": 4.487935656836461e-05, + "loss": 0.4226, + "step": 432 + }, + { + "epoch": 1.15, + "learning_rate": 4.486595174262735e-05, + "loss": 0.5714, + "step": 433 + }, + { + "epoch": 1.15, + "learning_rate": 4.485254691689008e-05, + "loss": 0.2034, + "step": 434 + }, + { + "epoch": 1.15, + "learning_rate": 4.483914209115282e-05, + "loss": 0.3354, + "step": 435 + }, + { + "epoch": 1.15, + "learning_rate": 4.482573726541555e-05, + "loss": 0.7503, + "step": 436 + }, + { + "epoch": 1.16, + "learning_rate": 4.481233243967828e-05, + "loss": 0.1542, + "step": 437 + }, + { + "epoch": 1.16, + "learning_rate": 4.479892761394102e-05, + "loss": 0.3067, + "step": 438 + }, + { + "epoch": 1.16, + "learning_rate": 4.478552278820375e-05, + "loss": 0.4134, + "step": 439 + }, + { + "epoch": 1.16, + "learning_rate": 4.477211796246649e-05, + "loss": 0.1458, + "step": 440 + }, + { + "epoch": 1.17, + "learning_rate": 4.475871313672922e-05, + "loss": 0.2814, + "step": 441 + }, + { + "epoch": 1.17, + "learning_rate": 4.474530831099196e-05, + "loss": 0.1751, + "step": 442 + }, + { + "epoch": 1.17, + "learning_rate": 4.473190348525469e-05, + "loss": 0.4144, + "step": 443 + }, + { + "epoch": 1.17, + "learning_rate": 4.471849865951743e-05, + "loss": 0.3275, + "step": 444 + }, + { + "epoch": 1.18, + "learning_rate": 4.470509383378016e-05, + "loss": 0.3083, + "step": 445 + }, + { + "epoch": 1.18, + "learning_rate": 4.46916890080429e-05, + "loss": 0.4185, + "step": 446 + }, + { + "epoch": 1.18, + "learning_rate": 4.467828418230563e-05, + "loss": 0.3181, + "step": 447 + }, + { + "epoch": 1.19, + "learning_rate": 4.466487935656837e-05, + "loss": 0.438, + "step": 448 + }, + { + "epoch": 1.19, + "learning_rate": 4.46514745308311e-05, + "loss": 0.4509, + "step": 449 + }, + { + "epoch": 1.19, + "learning_rate": 4.463806970509384e-05, + "loss": 0.4597, + "step": 450 + }, + { + "epoch": 1.19, + "learning_rate": 4.462466487935657e-05, + "loss": 0.3123, + "step": 451 + }, + { + "epoch": 1.2, + "learning_rate": 4.46112600536193e-05, + "loss": 0.376, + "step": 452 + }, + { + "epoch": 1.2, + "learning_rate": 4.459785522788204e-05, + "loss": 0.2392, + "step": 453 + }, + { + "epoch": 1.2, + "learning_rate": 4.458445040214477e-05, + "loss": 0.4451, + "step": 454 + }, + { + "epoch": 1.2, + "learning_rate": 4.4571045576407513e-05, + "loss": 0.2436, + "step": 455 + }, + { + "epoch": 1.21, + "learning_rate": 4.455764075067024e-05, + "loss": 0.2944, + "step": 456 + }, + { + "epoch": 1.21, + "learning_rate": 4.454423592493298e-05, + "loss": 0.323, + "step": 457 + }, + { + "epoch": 1.21, + "learning_rate": 4.453083109919571e-05, + "loss": 0.1966, + "step": 458 + }, + { + "epoch": 1.21, + "learning_rate": 4.451742627345845e-05, + "loss": 0.273, + "step": 459 + }, + { + "epoch": 1.22, + "learning_rate": 4.450402144772118e-05, + "loss": 0.3498, + "step": 460 + }, + { + "epoch": 1.22, + "learning_rate": 4.449061662198392e-05, + "loss": 0.458, + "step": 461 + }, + { + "epoch": 1.22, + "learning_rate": 4.4477211796246646e-05, + "loss": 0.1607, + "step": 462 + }, + { + "epoch": 1.22, + "learning_rate": 4.446380697050939e-05, + "loss": 0.5712, + "step": 463 + }, + { + "epoch": 1.23, + "learning_rate": 4.4450402144772116e-05, + "loss": 0.0908, + "step": 464 + }, + { + "epoch": 1.23, + "learning_rate": 4.443699731903486e-05, + "loss": 0.2625, + "step": 465 + }, + { + "epoch": 1.23, + "learning_rate": 4.4423592493297586e-05, + "loss": 0.368, + "step": 466 + }, + { + "epoch": 1.24, + "learning_rate": 4.441018766756032e-05, + "loss": 0.5096, + "step": 467 + }, + { + "epoch": 1.24, + "learning_rate": 4.4396782841823057e-05, + "loss": 0.209, + "step": 468 + }, + { + "epoch": 1.24, + "learning_rate": 4.438337801608579e-05, + "loss": 0.5235, + "step": 469 + }, + { + "epoch": 1.24, + "learning_rate": 4.436997319034853e-05, + "loss": 0.5116, + "step": 470 + }, + { + "epoch": 1.25, + "learning_rate": 4.435656836461126e-05, + "loss": 0.49, + "step": 471 + }, + { + "epoch": 1.25, + "learning_rate": 4.4343163538874e-05, + "loss": 0.4409, + "step": 472 + }, + { + "epoch": 1.25, + "learning_rate": 4.432975871313673e-05, + "loss": 0.1324, + "step": 473 + }, + { + "epoch": 1.25, + "learning_rate": 4.431635388739947e-05, + "loss": 0.1257, + "step": 474 + }, + { + "epoch": 1.26, + "learning_rate": 4.43029490616622e-05, + "loss": 0.3844, + "step": 475 + }, + { + "epoch": 1.26, + "learning_rate": 4.428954423592494e-05, + "loss": 0.2942, + "step": 476 + }, + { + "epoch": 1.26, + "learning_rate": 4.4276139410187666e-05, + "loss": 0.316, + "step": 477 + }, + { + "epoch": 1.26, + "learning_rate": 4.426273458445041e-05, + "loss": 0.3724, + "step": 478 + }, + { + "epoch": 1.27, + "learning_rate": 4.4249329758713136e-05, + "loss": 0.4498, + "step": 479 + }, + { + "epoch": 1.27, + "learning_rate": 4.423592493297588e-05, + "loss": 0.2677, + "step": 480 + }, + { + "epoch": 1.27, + "learning_rate": 4.4222520107238607e-05, + "loss": 0.3331, + "step": 481 + }, + { + "epoch": 1.28, + "learning_rate": 4.420911528150134e-05, + "loss": 0.4375, + "step": 482 + }, + { + "epoch": 1.28, + "learning_rate": 4.419571045576408e-05, + "loss": 0.3097, + "step": 483 + }, + { + "epoch": 1.28, + "learning_rate": 4.418230563002681e-05, + "loss": 0.4438, + "step": 484 + }, + { + "epoch": 1.28, + "learning_rate": 4.416890080428955e-05, + "loss": 0.3648, + "step": 485 + }, + { + "epoch": 1.29, + "learning_rate": 4.415549597855228e-05, + "loss": 0.3023, + "step": 486 + }, + { + "epoch": 1.29, + "learning_rate": 4.414209115281502e-05, + "loss": 0.2963, + "step": 487 + }, + { + "epoch": 1.29, + "learning_rate": 4.412868632707775e-05, + "loss": 0.1749, + "step": 488 + }, + { + "epoch": 1.29, + "learning_rate": 4.411528150134048e-05, + "loss": 0.2006, + "step": 489 + }, + { + "epoch": 1.3, + "learning_rate": 4.410187667560322e-05, + "loss": 0.0923, + "step": 490 + }, + { + "epoch": 1.3, + "learning_rate": 4.408847184986595e-05, + "loss": 0.2665, + "step": 491 + }, + { + "epoch": 1.3, + "learning_rate": 4.407506702412869e-05, + "loss": 0.3378, + "step": 492 + }, + { + "epoch": 1.3, + "learning_rate": 4.406166219839142e-05, + "loss": 0.2543, + "step": 493 + }, + { + "epoch": 1.31, + "learning_rate": 4.4048257372654156e-05, + "loss": 0.2521, + "step": 494 + }, + { + "epoch": 1.31, + "learning_rate": 4.403485254691689e-05, + "loss": 0.1616, + "step": 495 + }, + { + "epoch": 1.31, + "learning_rate": 4.402144772117963e-05, + "loss": 0.1445, + "step": 496 + }, + { + "epoch": 1.31, + "learning_rate": 4.400804289544236e-05, + "loss": 0.5352, + "step": 497 + }, + { + "epoch": 1.32, + "learning_rate": 4.39946380697051e-05, + "loss": 0.0654, + "step": 498 + }, + { + "epoch": 1.32, + "learning_rate": 4.398123324396783e-05, + "loss": 0.4354, + "step": 499 + }, + { + "epoch": 1.32, + "learning_rate": 4.396782841823057e-05, + "loss": 0.3893, + "step": 500 + }, + { + "epoch": 1.33, + "learning_rate": 4.39544235924933e-05, + "loss": 0.2779, + "step": 501 + }, + { + "epoch": 1.33, + "learning_rate": 4.394101876675604e-05, + "loss": 0.3702, + "step": 502 + }, + { + "epoch": 1.33, + "learning_rate": 4.392761394101877e-05, + "loss": 0.3899, + "step": 503 + }, + { + "epoch": 1.33, + "learning_rate": 4.39142091152815e-05, + "loss": 0.3091, + "step": 504 + }, + { + "epoch": 1.34, + "learning_rate": 4.390080428954424e-05, + "loss": 0.2143, + "step": 505 + }, + { + "epoch": 1.34, + "learning_rate": 4.388739946380697e-05, + "loss": 0.2551, + "step": 506 + }, + { + "epoch": 1.34, + "learning_rate": 4.387399463806971e-05, + "loss": 0.2227, + "step": 507 + }, + { + "epoch": 1.34, + "learning_rate": 4.386058981233244e-05, + "loss": 0.1383, + "step": 508 + }, + { + "epoch": 1.35, + "learning_rate": 4.384718498659518e-05, + "loss": 0.3463, + "step": 509 + }, + { + "epoch": 1.35, + "learning_rate": 4.383378016085791e-05, + "loss": 0.1104, + "step": 510 + }, + { + "epoch": 1.35, + "learning_rate": 4.382037533512065e-05, + "loss": 0.2439, + "step": 511 + }, + { + "epoch": 1.35, + "learning_rate": 4.380697050938338e-05, + "loss": 0.0725, + "step": 512 + }, + { + "epoch": 1.36, + "learning_rate": 4.379356568364612e-05, + "loss": 0.4928, + "step": 513 + }, + { + "epoch": 1.36, + "learning_rate": 4.3780160857908846e-05, + "loss": 0.0903, + "step": 514 + }, + { + "epoch": 1.36, + "learning_rate": 4.376675603217159e-05, + "loss": 0.1808, + "step": 515 + }, + { + "epoch": 1.37, + "learning_rate": 4.3753351206434316e-05, + "loss": 0.617, + "step": 516 + }, + { + "epoch": 1.37, + "learning_rate": 4.373994638069706e-05, + "loss": 0.0333, + "step": 517 + }, + { + "epoch": 1.37, + "learning_rate": 4.3726541554959786e-05, + "loss": 0.1113, + "step": 518 + }, + { + "epoch": 1.37, + "learning_rate": 4.371313672922252e-05, + "loss": 0.4604, + "step": 519 + }, + { + "epoch": 1.38, + "learning_rate": 4.3699731903485256e-05, + "loss": 0.4267, + "step": 520 + }, + { + "epoch": 1.38, + "learning_rate": 4.368632707774799e-05, + "loss": 0.2621, + "step": 521 + }, + { + "epoch": 1.38, + "learning_rate": 4.3672922252010727e-05, + "loss": 0.3236, + "step": 522 + }, + { + "epoch": 1.38, + "learning_rate": 4.365951742627346e-05, + "loss": 0.0559, + "step": 523 + }, + { + "epoch": 1.39, + "learning_rate": 4.364611260053619e-05, + "loss": 0.0801, + "step": 524 + }, + { + "epoch": 1.39, + "learning_rate": 4.363270777479893e-05, + "loss": 0.0518, + "step": 525 + }, + { + "epoch": 1.39, + "learning_rate": 4.361930294906166e-05, + "loss": 0.0618, + "step": 526 + }, + { + "epoch": 1.39, + "learning_rate": 4.36058981233244e-05, + "loss": 0.3043, + "step": 527 + }, + { + "epoch": 1.4, + "learning_rate": 4.359249329758713e-05, + "loss": 0.4693, + "step": 528 + }, + { + "epoch": 1.4, + "learning_rate": 4.3579088471849866e-05, + "loss": 0.2978, + "step": 529 + }, + { + "epoch": 1.4, + "learning_rate": 4.35656836461126e-05, + "loss": 0.0817, + "step": 530 + }, + { + "epoch": 1.4, + "learning_rate": 4.3552278820375336e-05, + "loss": 0.0784, + "step": 531 + }, + { + "epoch": 1.41, + "learning_rate": 4.353887399463807e-05, + "loss": 0.5389, + "step": 532 + }, + { + "epoch": 1.41, + "learning_rate": 4.3525469168900806e-05, + "loss": 0.2337, + "step": 533 + }, + { + "epoch": 1.41, + "learning_rate": 4.351206434316354e-05, + "loss": 0.3189, + "step": 534 + }, + { + "epoch": 1.42, + "learning_rate": 4.3498659517426276e-05, + "loss": 0.2503, + "step": 535 + }, + { + "epoch": 1.42, + "learning_rate": 4.348525469168901e-05, + "loss": 0.333, + "step": 536 + }, + { + "epoch": 1.42, + "learning_rate": 4.347184986595175e-05, + "loss": 0.2311, + "step": 537 + }, + { + "epoch": 1.42, + "learning_rate": 4.345844504021448e-05, + "loss": 0.5196, + "step": 538 + }, + { + "epoch": 1.43, + "learning_rate": 4.344504021447721e-05, + "loss": 0.5466, + "step": 539 + }, + { + "epoch": 1.43, + "learning_rate": 4.343163538873995e-05, + "loss": 0.4296, + "step": 540 + }, + { + "epoch": 1.43, + "learning_rate": 4.341823056300268e-05, + "loss": 0.6559, + "step": 541 + }, + { + "epoch": 1.43, + "learning_rate": 4.340482573726542e-05, + "loss": 0.6134, + "step": 542 + }, + { + "epoch": 1.44, + "learning_rate": 4.339142091152815e-05, + "loss": 0.5614, + "step": 543 + }, + { + "epoch": 1.44, + "learning_rate": 4.3378016085790886e-05, + "loss": 0.3143, + "step": 544 + }, + { + "epoch": 1.44, + "learning_rate": 4.336461126005362e-05, + "loss": 0.564, + "step": 545 + }, + { + "epoch": 1.44, + "learning_rate": 4.3351206434316356e-05, + "loss": 0.2141, + "step": 546 + }, + { + "epoch": 1.45, + "learning_rate": 4.333780160857909e-05, + "loss": 0.5886, + "step": 547 + }, + { + "epoch": 1.45, + "learning_rate": 4.3324396782841826e-05, + "loss": 0.2129, + "step": 548 + }, + { + "epoch": 1.45, + "learning_rate": 4.3310991957104555e-05, + "loss": 0.2491, + "step": 549 + }, + { + "epoch": 1.46, + "learning_rate": 4.32975871313673e-05, + "loss": 0.4023, + "step": 550 + }, + { + "epoch": 1.46, + "learning_rate": 4.3284182305630025e-05, + "loss": 0.2159, + "step": 551 + }, + { + "epoch": 1.46, + "learning_rate": 4.327077747989277e-05, + "loss": 0.2975, + "step": 552 + }, + { + "epoch": 1.46, + "learning_rate": 4.3257372654155495e-05, + "loss": 0.165, + "step": 553 + }, + { + "epoch": 1.47, + "learning_rate": 4.324396782841823e-05, + "loss": 0.063, + "step": 554 + }, + { + "epoch": 1.47, + "learning_rate": 4.3230563002680966e-05, + "loss": 0.4209, + "step": 555 + }, + { + "epoch": 1.47, + "learning_rate": 4.32171581769437e-05, + "loss": 0.2704, + "step": 556 + }, + { + "epoch": 1.47, + "learning_rate": 4.3203753351206436e-05, + "loss": 0.6455, + "step": 557 + }, + { + "epoch": 1.48, + "learning_rate": 4.319034852546917e-05, + "loss": 0.3354, + "step": 558 + }, + { + "epoch": 1.48, + "learning_rate": 4.3176943699731906e-05, + "loss": 0.4069, + "step": 559 + }, + { + "epoch": 1.48, + "learning_rate": 4.316353887399464e-05, + "loss": 0.2334, + "step": 560 + }, + { + "epoch": 1.48, + "learning_rate": 4.3150134048257376e-05, + "loss": 0.4192, + "step": 561 + }, + { + "epoch": 1.49, + "learning_rate": 4.313672922252011e-05, + "loss": 0.2942, + "step": 562 + }, + { + "epoch": 1.49, + "learning_rate": 4.3123324396782847e-05, + "loss": 0.2503, + "step": 563 + }, + { + "epoch": 1.49, + "learning_rate": 4.3109919571045575e-05, + "loss": 0.15, + "step": 564 + }, + { + "epoch": 1.49, + "learning_rate": 4.309651474530832e-05, + "loss": 0.6895, + "step": 565 + }, + { + "epoch": 1.5, + "learning_rate": 4.3083109919571045e-05, + "loss": 0.2462, + "step": 566 + }, + { + "epoch": 1.5, + "learning_rate": 4.306970509383379e-05, + "loss": 0.142, + "step": 567 + }, + { + "epoch": 1.5, + "learning_rate": 4.3056300268096515e-05, + "loss": 0.4838, + "step": 568 + }, + { + "epoch": 1.51, + "learning_rate": 4.304289544235925e-05, + "loss": 0.4714, + "step": 569 + }, + { + "epoch": 1.51, + "learning_rate": 4.3029490616621986e-05, + "loss": 0.4916, + "step": 570 + }, + { + "epoch": 1.51, + "learning_rate": 4.301608579088472e-05, + "loss": 0.2001, + "step": 571 + }, + { + "epoch": 1.51, + "learning_rate": 4.3002680965147456e-05, + "loss": 0.3897, + "step": 572 + }, + { + "epoch": 1.52, + "learning_rate": 4.298927613941019e-05, + "loss": 0.1723, + "step": 573 + }, + { + "epoch": 1.52, + "learning_rate": 4.297587131367292e-05, + "loss": 0.4095, + "step": 574 + }, + { + "epoch": 1.52, + "learning_rate": 4.296246648793566e-05, + "loss": 0.2615, + "step": 575 + }, + { + "epoch": 1.52, + "learning_rate": 4.294906166219839e-05, + "loss": 0.221, + "step": 576 + }, + { + "epoch": 1.53, + "learning_rate": 4.293565683646113e-05, + "loss": 0.1373, + "step": 577 + }, + { + "epoch": 1.53, + "learning_rate": 4.292225201072386e-05, + "loss": 0.313, + "step": 578 + }, + { + "epoch": 1.53, + "learning_rate": 4.2908847184986595e-05, + "loss": 0.4608, + "step": 579 + }, + { + "epoch": 1.53, + "learning_rate": 4.289544235924933e-05, + "loss": 0.2894, + "step": 580 + }, + { + "epoch": 1.54, + "learning_rate": 4.2882037533512065e-05, + "loss": 0.5123, + "step": 581 + }, + { + "epoch": 1.54, + "learning_rate": 4.28686327077748e-05, + "loss": 0.3287, + "step": 582 + }, + { + "epoch": 1.54, + "learning_rate": 4.2855227882037536e-05, + "loss": 0.4634, + "step": 583 + }, + { + "epoch": 1.54, + "learning_rate": 4.284182305630027e-05, + "loss": 0.0753, + "step": 584 + }, + { + "epoch": 1.55, + "learning_rate": 4.2828418230563006e-05, + "loss": 0.5287, + "step": 585 + }, + { + "epoch": 1.55, + "learning_rate": 4.281501340482574e-05, + "loss": 0.5483, + "step": 586 + }, + { + "epoch": 1.55, + "learning_rate": 4.2801608579088476e-05, + "loss": 0.1076, + "step": 587 + }, + { + "epoch": 1.56, + "learning_rate": 4.278820375335121e-05, + "loss": 0.232, + "step": 588 + }, + { + "epoch": 1.56, + "learning_rate": 4.277479892761394e-05, + "loss": 0.4706, + "step": 589 + }, + { + "epoch": 1.56, + "learning_rate": 4.276139410187668e-05, + "loss": 0.5791, + "step": 590 + }, + { + "epoch": 1.56, + "learning_rate": 4.274798927613941e-05, + "loss": 0.4995, + "step": 591 + }, + { + "epoch": 1.57, + "learning_rate": 4.273458445040215e-05, + "loss": 0.3419, + "step": 592 + }, + { + "epoch": 1.57, + "learning_rate": 4.272117962466488e-05, + "loss": 0.2728, + "step": 593 + }, + { + "epoch": 1.57, + "learning_rate": 4.2707774798927615e-05, + "loss": 0.3598, + "step": 594 + }, + { + "epoch": 1.57, + "learning_rate": 4.269436997319035e-05, + "loss": 0.2669, + "step": 595 + }, + { + "epoch": 1.58, + "learning_rate": 4.2680965147453086e-05, + "loss": 0.4091, + "step": 596 + }, + { + "epoch": 1.58, + "learning_rate": 4.266756032171582e-05, + "loss": 0.7653, + "step": 597 + }, + { + "epoch": 1.58, + "learning_rate": 4.2654155495978556e-05, + "loss": 0.4323, + "step": 598 + }, + { + "epoch": 1.58, + "learning_rate": 4.2640750670241284e-05, + "loss": 0.2147, + "step": 599 + }, + { + "epoch": 1.59, + "learning_rate": 4.2627345844504026e-05, + "loss": 0.2229, + "step": 600 + }, + { + "epoch": 1.59, + "learning_rate": 4.2613941018766754e-05, + "loss": 0.1477, + "step": 601 + }, + { + "epoch": 1.59, + "learning_rate": 4.2600536193029496e-05, + "loss": 0.4813, + "step": 602 + }, + { + "epoch": 1.6, + "learning_rate": 4.2587131367292225e-05, + "loss": 0.2719, + "step": 603 + }, + { + "epoch": 1.6, + "learning_rate": 4.257372654155496e-05, + "loss": 0.1577, + "step": 604 + }, + { + "epoch": 1.6, + "learning_rate": 4.2560321715817695e-05, + "loss": 0.5718, + "step": 605 + }, + { + "epoch": 1.6, + "learning_rate": 4.254691689008043e-05, + "loss": 0.1481, + "step": 606 + }, + { + "epoch": 1.61, + "learning_rate": 4.2533512064343165e-05, + "loss": 0.3548, + "step": 607 + }, + { + "epoch": 1.61, + "learning_rate": 4.25201072386059e-05, + "loss": 0.1907, + "step": 608 + }, + { + "epoch": 1.61, + "learning_rate": 4.250670241286863e-05, + "loss": 0.4215, + "step": 609 + }, + { + "epoch": 1.61, + "learning_rate": 4.249329758713137e-05, + "loss": 0.0681, + "step": 610 + }, + { + "epoch": 1.62, + "learning_rate": 4.24798927613941e-05, + "loss": 0.514, + "step": 611 + }, + { + "epoch": 1.62, + "learning_rate": 4.246648793565684e-05, + "loss": 0.1242, + "step": 612 + }, + { + "epoch": 1.62, + "learning_rate": 4.245308310991957e-05, + "loss": 0.217, + "step": 613 + }, + { + "epoch": 1.62, + "learning_rate": 4.243967828418231e-05, + "loss": 0.1641, + "step": 614 + }, + { + "epoch": 1.63, + "learning_rate": 4.242627345844504e-05, + "loss": 0.7095, + "step": 615 + }, + { + "epoch": 1.63, + "learning_rate": 4.2412868632707775e-05, + "loss": 0.359, + "step": 616 + }, + { + "epoch": 1.63, + "learning_rate": 4.239946380697051e-05, + "loss": 0.94, + "step": 617 + }, + { + "epoch": 1.63, + "learning_rate": 4.2386058981233245e-05, + "loss": 0.5194, + "step": 618 + }, + { + "epoch": 1.64, + "learning_rate": 4.237265415549598e-05, + "loss": 0.3398, + "step": 619 + }, + { + "epoch": 1.64, + "learning_rate": 4.2359249329758715e-05, + "loss": 0.2305, + "step": 620 + }, + { + "epoch": 1.64, + "learning_rate": 4.234584450402145e-05, + "loss": 0.6045, + "step": 621 + }, + { + "epoch": 1.65, + "learning_rate": 4.2332439678284185e-05, + "loss": 0.3196, + "step": 622 + }, + { + "epoch": 1.65, + "learning_rate": 4.231903485254692e-05, + "loss": 0.0753, + "step": 623 + }, + { + "epoch": 1.65, + "learning_rate": 4.2305630026809656e-05, + "loss": 0.2732, + "step": 624 + }, + { + "epoch": 1.65, + "learning_rate": 4.229222520107239e-05, + "loss": 0.4372, + "step": 625 + }, + { + "epoch": 1.66, + "learning_rate": 4.227882037533512e-05, + "loss": 0.1299, + "step": 626 + }, + { + "epoch": 1.66, + "learning_rate": 4.226541554959786e-05, + "loss": 0.2928, + "step": 627 + }, + { + "epoch": 1.66, + "learning_rate": 4.225201072386059e-05, + "loss": 0.2028, + "step": 628 + }, + { + "epoch": 1.66, + "learning_rate": 4.223860589812333e-05, + "loss": 0.2725, + "step": 629 + }, + { + "epoch": 1.67, + "learning_rate": 4.222520107238606e-05, + "loss": 0.0851, + "step": 630 + }, + { + "epoch": 1.67, + "learning_rate": 4.2211796246648795e-05, + "loss": 0.1471, + "step": 631 + }, + { + "epoch": 1.67, + "learning_rate": 4.219839142091153e-05, + "loss": 0.1986, + "step": 632 + }, + { + "epoch": 1.67, + "learning_rate": 4.2184986595174265e-05, + "loss": 0.5156, + "step": 633 + }, + { + "epoch": 1.68, + "learning_rate": 4.2171581769437e-05, + "loss": 0.5286, + "step": 634 + }, + { + "epoch": 1.68, + "learning_rate": 4.2158176943699735e-05, + "loss": 0.3635, + "step": 635 + }, + { + "epoch": 1.68, + "learning_rate": 4.2144772117962464e-05, + "loss": 0.1407, + "step": 636 + }, + { + "epoch": 1.69, + "learning_rate": 4.2131367292225206e-05, + "loss": 0.1042, + "step": 637 + }, + { + "epoch": 1.69, + "learning_rate": 4.2117962466487934e-05, + "loss": 0.1553, + "step": 638 + }, + { + "epoch": 1.69, + "learning_rate": 4.2104557640750676e-05, + "loss": 0.1665, + "step": 639 + }, + { + "epoch": 1.69, + "learning_rate": 4.2091152815013404e-05, + "loss": 0.3706, + "step": 640 + }, + { + "epoch": 1.7, + "learning_rate": 4.207774798927614e-05, + "loss": 0.6195, + "step": 641 + }, + { + "epoch": 1.7, + "learning_rate": 4.2064343163538874e-05, + "loss": 0.1341, + "step": 642 + }, + { + "epoch": 1.7, + "learning_rate": 4.205093833780161e-05, + "loss": 0.5384, + "step": 643 + }, + { + "epoch": 1.7, + "learning_rate": 4.2037533512064345e-05, + "loss": 0.2802, + "step": 644 + }, + { + "epoch": 1.71, + "learning_rate": 4.202412868632708e-05, + "loss": 0.3812, + "step": 645 + }, + { + "epoch": 1.71, + "learning_rate": 4.2010723860589815e-05, + "loss": 0.2433, + "step": 646 + }, + { + "epoch": 1.71, + "learning_rate": 4.199731903485255e-05, + "loss": 0.0642, + "step": 647 + }, + { + "epoch": 1.71, + "learning_rate": 4.1983914209115285e-05, + "loss": 0.0547, + "step": 648 + }, + { + "epoch": 1.72, + "learning_rate": 4.197050938337802e-05, + "loss": 0.4388, + "step": 649 + }, + { + "epoch": 1.72, + "learning_rate": 4.1957104557640756e-05, + "loss": 0.8228, + "step": 650 + }, + { + "epoch": 1.72, + "learning_rate": 4.1943699731903484e-05, + "loss": 0.6453, + "step": 651 + }, + { + "epoch": 1.72, + "learning_rate": 4.1930294906166226e-05, + "loss": 0.3367, + "step": 652 + }, + { + "epoch": 1.73, + "learning_rate": 4.1916890080428954e-05, + "loss": 0.2139, + "step": 653 + }, + { + "epoch": 1.73, + "learning_rate": 4.1903485254691696e-05, + "loss": 0.2144, + "step": 654 + }, + { + "epoch": 1.73, + "learning_rate": 4.1890080428954424e-05, + "loss": 0.3894, + "step": 655 + }, + { + "epoch": 1.74, + "learning_rate": 4.187667560321716e-05, + "loss": 0.0891, + "step": 656 + }, + { + "epoch": 1.74, + "learning_rate": 4.1863270777479895e-05, + "loss": 0.1287, + "step": 657 + }, + { + "epoch": 1.74, + "learning_rate": 4.184986595174263e-05, + "loss": 0.2826, + "step": 658 + }, + { + "epoch": 1.74, + "learning_rate": 4.1836461126005365e-05, + "loss": 0.474, + "step": 659 + }, + { + "epoch": 1.75, + "learning_rate": 4.18230563002681e-05, + "loss": 0.4228, + "step": 660 + }, + { + "epoch": 1.75, + "learning_rate": 4.180965147453083e-05, + "loss": 0.4952, + "step": 661 + }, + { + "epoch": 1.75, + "learning_rate": 4.179624664879357e-05, + "loss": 0.173, + "step": 662 + }, + { + "epoch": 1.75, + "learning_rate": 4.17828418230563e-05, + "loss": 0.363, + "step": 663 + }, + { + "epoch": 1.76, + "learning_rate": 4.176943699731904e-05, + "loss": 0.4404, + "step": 664 + }, + { + "epoch": 1.76, + "learning_rate": 4.175603217158177e-05, + "loss": 0.486, + "step": 665 + }, + { + "epoch": 1.76, + "learning_rate": 4.1742627345844504e-05, + "loss": 0.4463, + "step": 666 + }, + { + "epoch": 1.76, + "learning_rate": 4.172922252010724e-05, + "loss": 0.2409, + "step": 667 + }, + { + "epoch": 1.77, + "learning_rate": 4.1715817694369974e-05, + "loss": 0.5291, + "step": 668 + }, + { + "epoch": 1.77, + "learning_rate": 4.170241286863271e-05, + "loss": 0.069, + "step": 669 + }, + { + "epoch": 1.77, + "learning_rate": 4.1689008042895445e-05, + "loss": 0.4162, + "step": 670 + }, + { + "epoch": 1.78, + "learning_rate": 4.167560321715818e-05, + "loss": 0.6171, + "step": 671 + }, + { + "epoch": 1.78, + "learning_rate": 4.1662198391420915e-05, + "loss": 0.3097, + "step": 672 + }, + { + "epoch": 1.78, + "learning_rate": 4.164879356568365e-05, + "loss": 0.5109, + "step": 673 + }, + { + "epoch": 1.78, + "learning_rate": 4.1635388739946385e-05, + "loss": 0.2169, + "step": 674 + }, + { + "epoch": 1.79, + "learning_rate": 4.162198391420912e-05, + "loss": 0.2406, + "step": 675 + }, + { + "epoch": 1.79, + "learning_rate": 4.160857908847185e-05, + "loss": 0.1853, + "step": 676 + }, + { + "epoch": 1.79, + "learning_rate": 4.159517426273459e-05, + "loss": 0.5743, + "step": 677 + }, + { + "epoch": 1.79, + "learning_rate": 4.158176943699732e-05, + "loss": 0.5432, + "step": 678 + }, + { + "epoch": 1.8, + "learning_rate": 4.156836461126006e-05, + "loss": 0.2033, + "step": 679 + }, + { + "epoch": 1.8, + "learning_rate": 4.155495978552279e-05, + "loss": 0.3848, + "step": 680 + }, + { + "epoch": 1.8, + "learning_rate": 4.1541554959785524e-05, + "loss": 0.1721, + "step": 681 + }, + { + "epoch": 1.8, + "learning_rate": 4.152815013404826e-05, + "loss": 0.3793, + "step": 682 + }, + { + "epoch": 1.81, + "learning_rate": 4.1514745308310994e-05, + "loss": 0.3848, + "step": 683 + }, + { + "epoch": 1.81, + "learning_rate": 4.150134048257373e-05, + "loss": 0.186, + "step": 684 + }, + { + "epoch": 1.81, + "learning_rate": 4.1487935656836465e-05, + "loss": 0.2692, + "step": 685 + }, + { + "epoch": 1.81, + "learning_rate": 4.147453083109919e-05, + "loss": 0.3839, + "step": 686 + }, + { + "epoch": 1.82, + "learning_rate": 4.1461126005361935e-05, + "loss": 0.3037, + "step": 687 + }, + { + "epoch": 1.82, + "learning_rate": 4.144772117962466e-05, + "loss": 0.6401, + "step": 688 + }, + { + "epoch": 1.82, + "learning_rate": 4.1434316353887405e-05, + "loss": 0.1173, + "step": 689 + }, + { + "epoch": 1.83, + "learning_rate": 4.1420911528150134e-05, + "loss": 0.3217, + "step": 690 + }, + { + "epoch": 1.83, + "learning_rate": 4.140750670241287e-05, + "loss": 0.2358, + "step": 691 + }, + { + "epoch": 1.83, + "learning_rate": 4.1394101876675604e-05, + "loss": 0.7696, + "step": 692 + }, + { + "epoch": 1.83, + "learning_rate": 4.138069705093834e-05, + "loss": 0.2288, + "step": 693 + }, + { + "epoch": 1.84, + "learning_rate": 4.1367292225201074e-05, + "loss": 0.2575, + "step": 694 + }, + { + "epoch": 1.84, + "learning_rate": 4.135388739946381e-05, + "loss": 0.1201, + "step": 695 + }, + { + "epoch": 1.84, + "learning_rate": 4.1340482573726544e-05, + "loss": 0.2034, + "step": 696 + }, + { + "epoch": 1.84, + "learning_rate": 4.132707774798928e-05, + "loss": 0.1142, + "step": 697 + }, + { + "epoch": 1.85, + "learning_rate": 4.1313672922252015e-05, + "loss": 0.5671, + "step": 698 + }, + { + "epoch": 1.85, + "learning_rate": 4.130026809651475e-05, + "loss": 0.3132, + "step": 699 + }, + { + "epoch": 1.85, + "learning_rate": 4.1286863270777485e-05, + "loss": 0.4266, + "step": 700 + }, + { + "epoch": 1.85, + "learning_rate": 4.127345844504021e-05, + "loss": 0.1354, + "step": 701 + }, + { + "epoch": 1.86, + "learning_rate": 4.1260053619302955e-05, + "loss": 0.2867, + "step": 702 + }, + { + "epoch": 1.86, + "learning_rate": 4.1246648793565684e-05, + "loss": 0.1839, + "step": 703 + }, + { + "epoch": 1.86, + "learning_rate": 4.1233243967828425e-05, + "loss": 0.4741, + "step": 704 + }, + { + "epoch": 1.87, + "learning_rate": 4.1219839142091154e-05, + "loss": 0.2909, + "step": 705 + }, + { + "epoch": 1.87, + "learning_rate": 4.120643431635389e-05, + "loss": 0.2705, + "step": 706 + }, + { + "epoch": 1.87, + "learning_rate": 4.1193029490616624e-05, + "loss": 0.1354, + "step": 707 + }, + { + "epoch": 1.87, + "learning_rate": 4.117962466487936e-05, + "loss": 0.4801, + "step": 708 + }, + { + "epoch": 1.88, + "learning_rate": 4.1166219839142094e-05, + "loss": 0.189, + "step": 709 + }, + { + "epoch": 1.88, + "learning_rate": 4.115281501340483e-05, + "loss": 0.3204, + "step": 710 + }, + { + "epoch": 1.88, + "learning_rate": 4.113941018766756e-05, + "loss": 0.4358, + "step": 711 + }, + { + "epoch": 1.88, + "learning_rate": 4.11260053619303e-05, + "loss": 0.9474, + "step": 712 + }, + { + "epoch": 1.89, + "learning_rate": 4.111260053619303e-05, + "loss": 0.2102, + "step": 713 + }, + { + "epoch": 1.89, + "learning_rate": 4.109919571045577e-05, + "loss": 0.3927, + "step": 714 + }, + { + "epoch": 1.89, + "learning_rate": 4.10857908847185e-05, + "loss": 0.139, + "step": 715 + }, + { + "epoch": 1.89, + "learning_rate": 4.1072386058981233e-05, + "loss": 0.3575, + "step": 716 + }, + { + "epoch": 1.9, + "learning_rate": 4.105898123324397e-05, + "loss": 0.7534, + "step": 717 + }, + { + "epoch": 1.9, + "learning_rate": 4.1045576407506704e-05, + "loss": 0.1134, + "step": 718 + }, + { + "epoch": 1.9, + "learning_rate": 4.103217158176944e-05, + "loss": 0.2136, + "step": 719 + }, + { + "epoch": 1.9, + "learning_rate": 4.1018766756032174e-05, + "loss": 0.4344, + "step": 720 + }, + { + "epoch": 1.91, + "learning_rate": 4.10053619302949e-05, + "loss": 0.0695, + "step": 721 + }, + { + "epoch": 1.91, + "learning_rate": 4.0991957104557644e-05, + "loss": 0.2286, + "step": 722 + }, + { + "epoch": 1.91, + "learning_rate": 4.097855227882037e-05, + "loss": 0.1189, + "step": 723 + }, + { + "epoch": 1.92, + "learning_rate": 4.0965147453083115e-05, + "loss": 0.2882, + "step": 724 + }, + { + "epoch": 1.92, + "learning_rate": 4.095174262734584e-05, + "loss": 0.2623, + "step": 725 + }, + { + "epoch": 1.92, + "learning_rate": 4.093833780160858e-05, + "loss": 0.2473, + "step": 726 + }, + { + "epoch": 1.92, + "learning_rate": 4.092493297587131e-05, + "loss": 0.4846, + "step": 727 + }, + { + "epoch": 1.93, + "learning_rate": 4.091152815013405e-05, + "loss": 0.1689, + "step": 728 + }, + { + "epoch": 1.93, + "learning_rate": 4.0898123324396783e-05, + "loss": 0.3481, + "step": 729 + }, + { + "epoch": 1.93, + "learning_rate": 4.088471849865952e-05, + "loss": 0.3447, + "step": 730 + }, + { + "epoch": 1.93, + "learning_rate": 4.0871313672922254e-05, + "loss": 0.2959, + "step": 731 + }, + { + "epoch": 1.94, + "learning_rate": 4.085790884718499e-05, + "loss": 0.3387, + "step": 732 + }, + { + "epoch": 1.94, + "learning_rate": 4.0844504021447724e-05, + "loss": 0.3742, + "step": 733 + }, + { + "epoch": 1.94, + "learning_rate": 4.083109919571046e-05, + "loss": 0.3245, + "step": 734 + }, + { + "epoch": 1.94, + "learning_rate": 4.0817694369973194e-05, + "loss": 0.4891, + "step": 735 + }, + { + "epoch": 1.95, + "learning_rate": 4.080428954423593e-05, + "loss": 0.1444, + "step": 736 + }, + { + "epoch": 1.95, + "learning_rate": 4.0790884718498664e-05, + "loss": 0.3678, + "step": 737 + }, + { + "epoch": 1.95, + "learning_rate": 4.077747989276139e-05, + "loss": 0.3772, + "step": 738 + }, + { + "epoch": 1.96, + "learning_rate": 4.0764075067024135e-05, + "loss": 0.43, + "step": 739 + }, + { + "epoch": 1.96, + "learning_rate": 4.075067024128686e-05, + "loss": 0.2463, + "step": 740 + }, + { + "epoch": 1.96, + "learning_rate": 4.0737265415549605e-05, + "loss": 0.2277, + "step": 741 + }, + { + "epoch": 1.96, + "learning_rate": 4.072386058981233e-05, + "loss": 0.2153, + "step": 742 + }, + { + "epoch": 1.97, + "learning_rate": 4.071045576407507e-05, + "loss": 0.1052, + "step": 743 + }, + { + "epoch": 1.97, + "learning_rate": 4.0697050938337804e-05, + "loss": 0.5657, + "step": 744 + }, + { + "epoch": 1.97, + "learning_rate": 4.068364611260054e-05, + "loss": 0.2664, + "step": 745 + }, + { + "epoch": 1.97, + "learning_rate": 4.0670241286863274e-05, + "loss": 0.1369, + "step": 746 + }, + { + "epoch": 1.98, + "learning_rate": 4.065683646112601e-05, + "loss": 0.4972, + "step": 747 + }, + { + "epoch": 1.98, + "learning_rate": 4.064343163538874e-05, + "loss": 0.4131, + "step": 748 + }, + { + "epoch": 1.98, + "learning_rate": 4.063002680965148e-05, + "loss": 0.3824, + "step": 749 + }, + { + "epoch": 1.98, + "learning_rate": 4.061662198391421e-05, + "loss": 0.323, + "step": 750 + }, + { + "epoch": 1.99, + "learning_rate": 4.060321715817695e-05, + "loss": 0.3698, + "step": 751 + }, + { + "epoch": 1.99, + "learning_rate": 4.058981233243968e-05, + "loss": 0.1708, + "step": 752 + }, + { + "epoch": 1.99, + "learning_rate": 4.057640750670241e-05, + "loss": 0.2941, + "step": 753 + }, + { + "epoch": 1.99, + "learning_rate": 4.056300268096515e-05, + "loss": 0.3224, + "step": 754 + }, + { + "epoch": 2.0, + "learning_rate": 4.054959785522788e-05, + "loss": 0.0851, + "step": 755 + }, + { + "epoch": 2.0, + "learning_rate": 4.053619302949062e-05, + "loss": 0.4694, + "step": 756 + }, + { + "epoch": 2.0, + "eval_f1": 0.7882736156351792, + "eval_loss": 0.4484867751598358, + "eval_runtime": 1.8734, + "eval_samples_per_second": 807.638, + "eval_steps_per_second": 50.711, + "step": 756 + }, + { + "epoch": 2.0, + "learning_rate": 4.0522788203753354e-05, + "loss": 0.4739, + "step": 757 + }, + { + "epoch": 2.01, + "learning_rate": 4.050938337801609e-05, + "loss": 0.4117, + "step": 758 + }, + { + "epoch": 2.01, + "learning_rate": 4.0495978552278824e-05, + "loss": 0.2169, + "step": 759 + }, + { + "epoch": 2.01, + "learning_rate": 4.048257372654156e-05, + "loss": 0.1848, + "step": 760 + }, + { + "epoch": 2.01, + "learning_rate": 4.0469168900804294e-05, + "loss": 0.5066, + "step": 761 + }, + { + "epoch": 2.02, + "learning_rate": 4.045576407506703e-05, + "loss": 0.1784, + "step": 762 + }, + { + "epoch": 2.02, + "learning_rate": 4.044235924932976e-05, + "loss": 0.3869, + "step": 763 + }, + { + "epoch": 2.02, + "learning_rate": 4.04289544235925e-05, + "loss": 0.1132, + "step": 764 + }, + { + "epoch": 2.02, + "learning_rate": 4.041554959785523e-05, + "loss": 0.2724, + "step": 765 + }, + { + "epoch": 2.03, + "learning_rate": 4.040214477211797e-05, + "loss": 0.0983, + "step": 766 + }, + { + "epoch": 2.03, + "learning_rate": 4.03887399463807e-05, + "loss": 0.1831, + "step": 767 + }, + { + "epoch": 2.03, + "learning_rate": 4.037533512064343e-05, + "loss": 0.1954, + "step": 768 + }, + { + "epoch": 2.03, + "learning_rate": 4.036193029490617e-05, + "loss": 0.4738, + "step": 769 + }, + { + "epoch": 2.04, + "learning_rate": 4.0348525469168903e-05, + "loss": 0.3375, + "step": 770 + }, + { + "epoch": 2.04, + "learning_rate": 4.033512064343164e-05, + "loss": 0.3991, + "step": 771 + }, + { + "epoch": 2.04, + "learning_rate": 4.0321715817694374e-05, + "loss": 0.0696, + "step": 772 + }, + { + "epoch": 2.04, + "learning_rate": 4.03083109919571e-05, + "loss": 0.0982, + "step": 773 + }, + { + "epoch": 2.05, + "learning_rate": 4.0294906166219844e-05, + "loss": 0.2236, + "step": 774 + }, + { + "epoch": 2.05, + "learning_rate": 4.028150134048257e-05, + "loss": 0.4225, + "step": 775 + }, + { + "epoch": 2.05, + "learning_rate": 4.0268096514745314e-05, + "loss": 0.0583, + "step": 776 + }, + { + "epoch": 2.06, + "learning_rate": 4.025469168900804e-05, + "loss": 0.1643, + "step": 777 + }, + { + "epoch": 2.06, + "learning_rate": 4.024128686327078e-05, + "loss": 0.0351, + "step": 778 + }, + { + "epoch": 2.06, + "learning_rate": 4.022788203753351e-05, + "loss": 0.4496, + "step": 779 + }, + { + "epoch": 2.06, + "learning_rate": 4.021447721179625e-05, + "loss": 0.0372, + "step": 780 + }, + { + "epoch": 2.07, + "learning_rate": 4.020107238605898e-05, + "loss": 0.4198, + "step": 781 + }, + { + "epoch": 2.07, + "learning_rate": 4.018766756032172e-05, + "loss": 0.1968, + "step": 782 + }, + { + "epoch": 2.07, + "learning_rate": 4.017426273458445e-05, + "loss": 0.515, + "step": 783 + }, + { + "epoch": 2.07, + "learning_rate": 4.016085790884719e-05, + "loss": 0.408, + "step": 784 + }, + { + "epoch": 2.08, + "learning_rate": 4.0147453083109924e-05, + "loss": 0.3693, + "step": 785 + }, + { + "epoch": 2.08, + "learning_rate": 4.013404825737266e-05, + "loss": 0.0561, + "step": 786 + }, + { + "epoch": 2.08, + "learning_rate": 4.0120643431635394e-05, + "loss": 0.4011, + "step": 787 + }, + { + "epoch": 2.08, + "learning_rate": 4.010723860589812e-05, + "loss": 0.0872, + "step": 788 + }, + { + "epoch": 2.09, + "learning_rate": 4.0093833780160864e-05, + "loss": 0.0768, + "step": 789 + }, + { + "epoch": 2.09, + "learning_rate": 4.008042895442359e-05, + "loss": 0.0184, + "step": 790 + }, + { + "epoch": 2.09, + "learning_rate": 4.0067024128686334e-05, + "loss": 0.3287, + "step": 791 + }, + { + "epoch": 2.1, + "learning_rate": 4.005361930294906e-05, + "loss": 0.0262, + "step": 792 + }, + { + "epoch": 2.1, + "learning_rate": 4.00402144772118e-05, + "loss": 0.0248, + "step": 793 + }, + { + "epoch": 2.1, + "learning_rate": 4.002680965147453e-05, + "loss": 0.3853, + "step": 794 + }, + { + "epoch": 2.1, + "learning_rate": 4.001340482573727e-05, + "loss": 0.3512, + "step": 795 + }, + { + "epoch": 2.11, + "learning_rate": 4e-05, + "loss": 0.4188, + "step": 796 + }, + { + "epoch": 2.11, + "learning_rate": 3.998659517426274e-05, + "loss": 0.1834, + "step": 797 + }, + { + "epoch": 2.11, + "learning_rate": 3.997319034852547e-05, + "loss": 0.2074, + "step": 798 + }, + { + "epoch": 2.11, + "learning_rate": 3.995978552278821e-05, + "loss": 0.7317, + "step": 799 + }, + { + "epoch": 2.12, + "learning_rate": 3.994638069705094e-05, + "loss": 0.3534, + "step": 800 + }, + { + "epoch": 2.12, + "learning_rate": 3.993297587131368e-05, + "loss": 0.3184, + "step": 801 + }, + { + "epoch": 2.12, + "learning_rate": 3.991957104557641e-05, + "loss": 0.1088, + "step": 802 + }, + { + "epoch": 2.12, + "learning_rate": 3.990616621983914e-05, + "loss": 0.0429, + "step": 803 + }, + { + "epoch": 2.13, + "learning_rate": 3.989276139410188e-05, + "loss": 0.4518, + "step": 804 + }, + { + "epoch": 2.13, + "learning_rate": 3.987935656836461e-05, + "loss": 0.1746, + "step": 805 + }, + { + "epoch": 2.13, + "learning_rate": 3.986595174262735e-05, + "loss": 0.1881, + "step": 806 + }, + { + "epoch": 2.13, + "learning_rate": 3.985254691689008e-05, + "loss": 0.4111, + "step": 807 + }, + { + "epoch": 2.14, + "learning_rate": 3.983914209115281e-05, + "loss": 0.059, + "step": 808 + }, + { + "epoch": 2.14, + "learning_rate": 3.982573726541555e-05, + "loss": 0.0495, + "step": 809 + }, + { + "epoch": 2.14, + "learning_rate": 3.981233243967828e-05, + "loss": 0.1134, + "step": 810 + }, + { + "epoch": 2.15, + "learning_rate": 3.9798927613941023e-05, + "loss": 0.5469, + "step": 811 + }, + { + "epoch": 2.15, + "learning_rate": 3.978552278820375e-05, + "loss": 0.0581, + "step": 812 + }, + { + "epoch": 2.15, + "learning_rate": 3.977211796246649e-05, + "loss": 0.1254, + "step": 813 + }, + { + "epoch": 2.15, + "learning_rate": 3.975871313672922e-05, + "loss": 0.0679, + "step": 814 + }, + { + "epoch": 2.16, + "learning_rate": 3.974530831099196e-05, + "loss": 0.0463, + "step": 815 + }, + { + "epoch": 2.16, + "learning_rate": 3.973190348525469e-05, + "loss": 0.33, + "step": 816 + }, + { + "epoch": 2.16, + "learning_rate": 3.971849865951743e-05, + "loss": 0.2931, + "step": 817 + }, + { + "epoch": 2.16, + "learning_rate": 3.970509383378016e-05, + "loss": 0.1034, + "step": 818 + }, + { + "epoch": 2.17, + "learning_rate": 3.96916890080429e-05, + "loss": 0.0379, + "step": 819 + }, + { + "epoch": 2.17, + "learning_rate": 3.967828418230563e-05, + "loss": 0.0456, + "step": 820 + }, + { + "epoch": 2.17, + "learning_rate": 3.966487935656837e-05, + "loss": 0.4862, + "step": 821 + }, + { + "epoch": 2.17, + "learning_rate": 3.96514745308311e-05, + "loss": 0.0512, + "step": 822 + }, + { + "epoch": 2.18, + "learning_rate": 3.963806970509383e-05, + "loss": 0.0879, + "step": 823 + }, + { + "epoch": 2.18, + "learning_rate": 3.962466487935657e-05, + "loss": 0.3664, + "step": 824 + }, + { + "epoch": 2.18, + "learning_rate": 3.96112600536193e-05, + "loss": 0.0975, + "step": 825 + }, + { + "epoch": 2.19, + "learning_rate": 3.9597855227882044e-05, + "loss": 0.162, + "step": 826 + }, + { + "epoch": 2.19, + "learning_rate": 3.958445040214477e-05, + "loss": 0.1076, + "step": 827 + }, + { + "epoch": 2.19, + "learning_rate": 3.957104557640751e-05, + "loss": 0.1185, + "step": 828 + }, + { + "epoch": 2.19, + "learning_rate": 3.955764075067024e-05, + "loss": 0.0868, + "step": 829 + }, + { + "epoch": 2.2, + "learning_rate": 3.954423592493298e-05, + "loss": 0.0246, + "step": 830 + }, + { + "epoch": 2.2, + "learning_rate": 3.953083109919571e-05, + "loss": 0.2233, + "step": 831 + }, + { + "epoch": 2.2, + "learning_rate": 3.951742627345845e-05, + "loss": 0.2113, + "step": 832 + }, + { + "epoch": 2.2, + "learning_rate": 3.9504021447721176e-05, + "loss": 0.0587, + "step": 833 + }, + { + "epoch": 2.21, + "learning_rate": 3.949061662198392e-05, + "loss": 0.0521, + "step": 834 + }, + { + "epoch": 2.21, + "learning_rate": 3.9477211796246646e-05, + "loss": 0.3845, + "step": 835 + }, + { + "epoch": 2.21, + "learning_rate": 3.946380697050939e-05, + "loss": 0.1096, + "step": 836 + }, + { + "epoch": 2.21, + "learning_rate": 3.9450402144772117e-05, + "loss": 0.1488, + "step": 837 + }, + { + "epoch": 2.22, + "learning_rate": 3.943699731903485e-05, + "loss": 0.1937, + "step": 838 + }, + { + "epoch": 2.22, + "learning_rate": 3.942359249329759e-05, + "loss": 0.1309, + "step": 839 + }, + { + "epoch": 2.22, + "learning_rate": 3.941018766756032e-05, + "loss": 0.3271, + "step": 840 + }, + { + "epoch": 2.22, + "learning_rate": 3.939678284182306e-05, + "loss": 0.3318, + "step": 841 + }, + { + "epoch": 2.23, + "learning_rate": 3.938337801608579e-05, + "loss": 0.3516, + "step": 842 + }, + { + "epoch": 2.23, + "learning_rate": 3.936997319034853e-05, + "loss": 0.1641, + "step": 843 + }, + { + "epoch": 2.23, + "learning_rate": 3.935656836461126e-05, + "loss": 0.064, + "step": 844 + }, + { + "epoch": 2.24, + "learning_rate": 3.9343163538874e-05, + "loss": 0.1971, + "step": 845 + }, + { + "epoch": 2.24, + "learning_rate": 3.932975871313673e-05, + "loss": 0.1166, + "step": 846 + }, + { + "epoch": 2.24, + "learning_rate": 3.931635388739947e-05, + "loss": 0.0384, + "step": 847 + }, + { + "epoch": 2.24, + "learning_rate": 3.9302949061662196e-05, + "loss": 0.0462, + "step": 848 + }, + { + "epoch": 2.25, + "learning_rate": 3.928954423592494e-05, + "loss": 0.1073, + "step": 849 + }, + { + "epoch": 2.25, + "learning_rate": 3.9276139410187666e-05, + "loss": 0.0227, + "step": 850 + }, + { + "epoch": 2.25, + "learning_rate": 3.926273458445041e-05, + "loss": 0.0683, + "step": 851 + }, + { + "epoch": 2.25, + "learning_rate": 3.924932975871314e-05, + "loss": 0.7962, + "step": 852 + }, + { + "epoch": 2.26, + "learning_rate": 3.923592493297587e-05, + "loss": 0.0635, + "step": 853 + }, + { + "epoch": 2.26, + "learning_rate": 3.922252010723861e-05, + "loss": 0.0454, + "step": 854 + }, + { + "epoch": 2.26, + "learning_rate": 3.920911528150134e-05, + "loss": 0.5389, + "step": 855 + }, + { + "epoch": 2.26, + "learning_rate": 3.919571045576408e-05, + "loss": 0.1181, + "step": 856 + }, + { + "epoch": 2.27, + "learning_rate": 3.918230563002681e-05, + "loss": 0.1372, + "step": 857 + }, + { + "epoch": 2.27, + "learning_rate": 3.916890080428954e-05, + "loss": 0.3192, + "step": 858 + }, + { + "epoch": 2.27, + "learning_rate": 3.915549597855228e-05, + "loss": 0.3419, + "step": 859 + }, + { + "epoch": 2.28, + "learning_rate": 3.914209115281501e-05, + "loss": 0.0114, + "step": 860 + }, + { + "epoch": 2.28, + "learning_rate": 3.912868632707775e-05, + "loss": 0.1905, + "step": 861 + }, + { + "epoch": 2.28, + "learning_rate": 3.911528150134048e-05, + "loss": 0.0218, + "step": 862 + }, + { + "epoch": 2.28, + "learning_rate": 3.910187667560322e-05, + "loss": 0.4374, + "step": 863 + }, + { + "epoch": 2.29, + "learning_rate": 3.908847184986595e-05, + "loss": 0.1844, + "step": 864 + }, + { + "epoch": 2.29, + "learning_rate": 3.907506702412869e-05, + "loss": 0.2427, + "step": 865 + }, + { + "epoch": 2.29, + "learning_rate": 3.906166219839142e-05, + "loss": 0.2749, + "step": 866 + }, + { + "epoch": 2.29, + "learning_rate": 3.904825737265416e-05, + "loss": 0.2089, + "step": 867 + }, + { + "epoch": 2.3, + "learning_rate": 3.903485254691689e-05, + "loss": 0.06, + "step": 868 + }, + { + "epoch": 2.3, + "learning_rate": 3.902144772117963e-05, + "loss": 0.1951, + "step": 869 + }, + { + "epoch": 2.3, + "learning_rate": 3.900804289544236e-05, + "loss": 0.0252, + "step": 870 + }, + { + "epoch": 2.3, + "learning_rate": 3.89946380697051e-05, + "loss": 0.0299, + "step": 871 + }, + { + "epoch": 2.31, + "learning_rate": 3.898123324396783e-05, + "loss": 0.0298, + "step": 872 + }, + { + "epoch": 2.31, + "learning_rate": 3.896782841823057e-05, + "loss": 0.5186, + "step": 873 + }, + { + "epoch": 2.31, + "learning_rate": 3.89544235924933e-05, + "loss": 0.2704, + "step": 874 + }, + { + "epoch": 2.31, + "learning_rate": 3.894101876675603e-05, + "loss": 0.2435, + "step": 875 + }, + { + "epoch": 2.32, + "learning_rate": 3.892761394101877e-05, + "loss": 0.039, + "step": 876 + }, + { + "epoch": 2.32, + "learning_rate": 3.89142091152815e-05, + "loss": 0.0275, + "step": 877 + }, + { + "epoch": 2.32, + "learning_rate": 3.890080428954424e-05, + "loss": 0.1164, + "step": 878 + }, + { + "epoch": 2.33, + "learning_rate": 3.888739946380697e-05, + "loss": 0.1551, + "step": 879 + }, + { + "epoch": 2.33, + "learning_rate": 3.887399463806971e-05, + "loss": 0.0215, + "step": 880 + }, + { + "epoch": 2.33, + "learning_rate": 3.886058981233244e-05, + "loss": 0.0379, + "step": 881 + }, + { + "epoch": 2.33, + "learning_rate": 3.884718498659518e-05, + "loss": 0.0553, + "step": 882 + }, + { + "epoch": 2.34, + "learning_rate": 3.883378016085791e-05, + "loss": 0.1073, + "step": 883 + }, + { + "epoch": 2.34, + "learning_rate": 3.882037533512065e-05, + "loss": 0.3525, + "step": 884 + }, + { + "epoch": 2.34, + "learning_rate": 3.8806970509383376e-05, + "loss": 0.2646, + "step": 885 + }, + { + "epoch": 2.34, + "learning_rate": 3.879356568364612e-05, + "loss": 0.5758, + "step": 886 + }, + { + "epoch": 2.35, + "learning_rate": 3.8780160857908846e-05, + "loss": 0.9312, + "step": 887 + }, + { + "epoch": 2.35, + "learning_rate": 3.876675603217159e-05, + "loss": 0.2748, + "step": 888 + }, + { + "epoch": 2.35, + "learning_rate": 3.8753351206434316e-05, + "loss": 0.33, + "step": 889 + }, + { + "epoch": 2.35, + "learning_rate": 3.873994638069705e-05, + "loss": 0.0312, + "step": 890 + }, + { + "epoch": 2.36, + "learning_rate": 3.8726541554959786e-05, + "loss": 0.0449, + "step": 891 + }, + { + "epoch": 2.36, + "learning_rate": 3.871313672922252e-05, + "loss": 0.1197, + "step": 892 + }, + { + "epoch": 2.36, + "learning_rate": 3.869973190348526e-05, + "loss": 0.0913, + "step": 893 + }, + { + "epoch": 2.37, + "learning_rate": 3.868632707774799e-05, + "loss": 0.0284, + "step": 894 + }, + { + "epoch": 2.37, + "learning_rate": 3.867292225201073e-05, + "loss": 0.3769, + "step": 895 + }, + { + "epoch": 2.37, + "learning_rate": 3.865951742627346e-05, + "loss": 0.0947, + "step": 896 + }, + { + "epoch": 2.37, + "learning_rate": 3.86461126005362e-05, + "loss": 0.4282, + "step": 897 + }, + { + "epoch": 2.38, + "learning_rate": 3.863270777479893e-05, + "loss": 0.0049, + "step": 898 + }, + { + "epoch": 2.38, + "learning_rate": 3.861930294906167e-05, + "loss": 0.3632, + "step": 899 + }, + { + "epoch": 2.38, + "learning_rate": 3.8605898123324396e-05, + "loss": 0.0421, + "step": 900 + }, + { + "epoch": 2.38, + "learning_rate": 3.859249329758714e-05, + "loss": 0.5793, + "step": 901 + }, + { + "epoch": 2.39, + "learning_rate": 3.8579088471849866e-05, + "loss": 0.1695, + "step": 902 + }, + { + "epoch": 2.39, + "learning_rate": 3.856568364611261e-05, + "loss": 0.3082, + "step": 903 + }, + { + "epoch": 2.39, + "learning_rate": 3.8552278820375336e-05, + "loss": 0.0151, + "step": 904 + }, + { + "epoch": 2.39, + "learning_rate": 3.853887399463807e-05, + "loss": 0.3463, + "step": 905 + }, + { + "epoch": 2.4, + "learning_rate": 3.852546916890081e-05, + "loss": 0.4573, + "step": 906 + }, + { + "epoch": 2.4, + "learning_rate": 3.851206434316354e-05, + "loss": 0.1281, + "step": 907 + }, + { + "epoch": 2.4, + "learning_rate": 3.849865951742628e-05, + "loss": 0.3168, + "step": 908 + }, + { + "epoch": 2.4, + "learning_rate": 3.848525469168901e-05, + "loss": 0.0331, + "step": 909 + }, + { + "epoch": 2.41, + "learning_rate": 3.847184986595174e-05, + "loss": 0.1825, + "step": 910 + }, + { + "epoch": 2.41, + "learning_rate": 3.845844504021448e-05, + "loss": 0.6238, + "step": 911 + }, + { + "epoch": 2.41, + "learning_rate": 3.844504021447721e-05, + "loss": 0.0663, + "step": 912 + }, + { + "epoch": 2.42, + "learning_rate": 3.843163538873995e-05, + "loss": 0.2827, + "step": 913 + }, + { + "epoch": 2.42, + "learning_rate": 3.841823056300268e-05, + "loss": 0.0133, + "step": 914 + }, + { + "epoch": 2.42, + "learning_rate": 3.8404825737265416e-05, + "loss": 0.0415, + "step": 915 + }, + { + "epoch": 2.42, + "learning_rate": 3.839142091152815e-05, + "loss": 0.2969, + "step": 916 + }, + { + "epoch": 2.43, + "learning_rate": 3.8378016085790886e-05, + "loss": 0.0145, + "step": 917 + }, + { + "epoch": 2.43, + "learning_rate": 3.836461126005362e-05, + "loss": 0.0223, + "step": 918 + }, + { + "epoch": 2.43, + "learning_rate": 3.8351206434316357e-05, + "loss": 1.042, + "step": 919 + }, + { + "epoch": 2.43, + "learning_rate": 3.8337801608579085e-05, + "loss": 0.5061, + "step": 920 + }, + { + "epoch": 2.44, + "learning_rate": 3.832439678284183e-05, + "loss": 0.406, + "step": 921 + }, + { + "epoch": 2.44, + "learning_rate": 3.8310991957104555e-05, + "loss": 0.1337, + "step": 922 + }, + { + "epoch": 2.44, + "learning_rate": 3.82975871313673e-05, + "loss": 0.2057, + "step": 923 + }, + { + "epoch": 2.44, + "learning_rate": 3.8284182305630025e-05, + "loss": 0.0987, + "step": 924 + }, + { + "epoch": 2.45, + "learning_rate": 3.827077747989276e-05, + "loss": 0.4611, + "step": 925 + }, + { + "epoch": 2.45, + "learning_rate": 3.8257372654155496e-05, + "loss": 0.0152, + "step": 926 + }, + { + "epoch": 2.45, + "learning_rate": 3.824396782841823e-05, + "loss": 0.4478, + "step": 927 + }, + { + "epoch": 2.46, + "learning_rate": 3.8230563002680966e-05, + "loss": 0.0579, + "step": 928 + }, + { + "epoch": 2.46, + "learning_rate": 3.82171581769437e-05, + "loss": 0.1506, + "step": 929 + }, + { + "epoch": 2.46, + "learning_rate": 3.8203753351206436e-05, + "loss": 0.5514, + "step": 930 + }, + { + "epoch": 2.46, + "learning_rate": 3.819034852546917e-05, + "loss": 0.1505, + "step": 931 + }, + { + "epoch": 2.47, + "learning_rate": 3.8176943699731906e-05, + "loss": 0.1413, + "step": 932 + }, + { + "epoch": 2.47, + "learning_rate": 3.816353887399464e-05, + "loss": 0.4758, + "step": 933 + }, + { + "epoch": 2.47, + "learning_rate": 3.815013404825738e-05, + "loss": 0.0863, + "step": 934 + }, + { + "epoch": 2.47, + "learning_rate": 3.8136729222520105e-05, + "loss": 0.3561, + "step": 935 + }, + { + "epoch": 2.48, + "learning_rate": 3.812332439678285e-05, + "loss": 0.7784, + "step": 936 + }, + { + "epoch": 2.48, + "learning_rate": 3.8109919571045575e-05, + "loss": 0.2243, + "step": 937 + }, + { + "epoch": 2.48, + "learning_rate": 3.809651474530832e-05, + "loss": 0.1013, + "step": 938 + }, + { + "epoch": 2.48, + "learning_rate": 3.8083109919571046e-05, + "loss": 0.132, + "step": 939 + }, + { + "epoch": 2.49, + "learning_rate": 3.806970509383378e-05, + "loss": 0.3971, + "step": 940 + }, + { + "epoch": 2.49, + "learning_rate": 3.8056300268096516e-05, + "loss": 0.0637, + "step": 941 + }, + { + "epoch": 2.49, + "learning_rate": 3.804289544235925e-05, + "loss": 0.5178, + "step": 942 + }, + { + "epoch": 2.49, + "learning_rate": 3.8029490616621986e-05, + "loss": 0.1982, + "step": 943 + }, + { + "epoch": 2.5, + "learning_rate": 3.801608579088472e-05, + "loss": 0.2225, + "step": 944 + }, + { + "epoch": 2.5, + "learning_rate": 3.800268096514745e-05, + "loss": 0.1425, + "step": 945 + }, + { + "epoch": 2.5, + "learning_rate": 3.798927613941019e-05, + "loss": 0.0621, + "step": 946 + }, + { + "epoch": 2.51, + "learning_rate": 3.797587131367292e-05, + "loss": 0.1556, + "step": 947 + }, + { + "epoch": 2.51, + "learning_rate": 3.796246648793566e-05, + "loss": 0.3805, + "step": 948 + }, + { + "epoch": 2.51, + "learning_rate": 3.794906166219839e-05, + "loss": 0.7049, + "step": 949 + }, + { + "epoch": 2.51, + "learning_rate": 3.7935656836461125e-05, + "loss": 0.1055, + "step": 950 + }, + { + "epoch": 2.52, + "learning_rate": 3.792225201072386e-05, + "loss": 0.0489, + "step": 951 + }, + { + "epoch": 2.52, + "learning_rate": 3.7908847184986596e-05, + "loss": 0.1881, + "step": 952 + }, + { + "epoch": 2.52, + "learning_rate": 3.789544235924933e-05, + "loss": 0.0202, + "step": 953 + }, + { + "epoch": 2.52, + "learning_rate": 3.7882037533512066e-05, + "loss": 0.1043, + "step": 954 + }, + { + "epoch": 2.53, + "learning_rate": 3.78686327077748e-05, + "loss": 0.2093, + "step": 955 + }, + { + "epoch": 2.53, + "learning_rate": 3.7855227882037536e-05, + "loss": 0.0395, + "step": 956 + }, + { + "epoch": 2.53, + "learning_rate": 3.784182305630027e-05, + "loss": 0.1459, + "step": 957 + }, + { + "epoch": 2.53, + "learning_rate": 3.7828418230563006e-05, + "loss": 0.0338, + "step": 958 + }, + { + "epoch": 2.54, + "learning_rate": 3.781501340482574e-05, + "loss": 0.4741, + "step": 959 + }, + { + "epoch": 2.54, + "learning_rate": 3.780160857908847e-05, + "loss": 0.2049, + "step": 960 + }, + { + "epoch": 2.54, + "learning_rate": 3.778820375335121e-05, + "loss": 0.309, + "step": 961 + }, + { + "epoch": 2.54, + "learning_rate": 3.777479892761394e-05, + "loss": 0.0253, + "step": 962 + }, + { + "epoch": 2.55, + "learning_rate": 3.776139410187668e-05, + "loss": 0.4832, + "step": 963 + }, + { + "epoch": 2.55, + "learning_rate": 3.774798927613941e-05, + "loss": 0.2111, + "step": 964 + }, + { + "epoch": 2.55, + "learning_rate": 3.7734584450402145e-05, + "loss": 0.1788, + "step": 965 + }, + { + "epoch": 2.56, + "learning_rate": 3.772117962466488e-05, + "loss": 0.5252, + "step": 966 + }, + { + "epoch": 2.56, + "learning_rate": 3.7707774798927616e-05, + "loss": 0.4711, + "step": 967 + }, + { + "epoch": 2.56, + "learning_rate": 3.769436997319035e-05, + "loss": 0.5184, + "step": 968 + }, + { + "epoch": 2.56, + "learning_rate": 3.7680965147453086e-05, + "loss": 0.2164, + "step": 969 + }, + { + "epoch": 2.57, + "learning_rate": 3.7667560321715814e-05, + "loss": 0.5393, + "step": 970 + }, + { + "epoch": 2.57, + "learning_rate": 3.7654155495978556e-05, + "loss": 0.2588, + "step": 971 + }, + { + "epoch": 2.57, + "learning_rate": 3.7640750670241285e-05, + "loss": 0.164, + "step": 972 + }, + { + "epoch": 2.57, + "learning_rate": 3.7627345844504027e-05, + "loss": 0.2896, + "step": 973 + }, + { + "epoch": 2.58, + "learning_rate": 3.7613941018766755e-05, + "loss": 0.039, + "step": 974 + }, + { + "epoch": 2.58, + "learning_rate": 3.760053619302949e-05, + "loss": 0.16, + "step": 975 + }, + { + "epoch": 2.58, + "learning_rate": 3.7587131367292225e-05, + "loss": 0.1832, + "step": 976 + }, + { + "epoch": 2.58, + "learning_rate": 3.757372654155496e-05, + "loss": 0.0812, + "step": 977 + }, + { + "epoch": 2.59, + "learning_rate": 3.7560321715817695e-05, + "loss": 0.1476, + "step": 978 + }, + { + "epoch": 2.59, + "learning_rate": 3.754691689008043e-05, + "loss": 0.1853, + "step": 979 + }, + { + "epoch": 2.59, + "learning_rate": 3.7533512064343166e-05, + "loss": 0.2875, + "step": 980 + }, + { + "epoch": 2.6, + "learning_rate": 3.75201072386059e-05, + "loss": 0.1918, + "step": 981 + }, + { + "epoch": 2.6, + "learning_rate": 3.7506702412868636e-05, + "loss": 0.2445, + "step": 982 + }, + { + "epoch": 2.6, + "learning_rate": 3.749329758713137e-05, + "loss": 0.4653, + "step": 983 + }, + { + "epoch": 2.6, + "learning_rate": 3.7479892761394106e-05, + "loss": 0.0614, + "step": 984 + }, + { + "epoch": 2.61, + "learning_rate": 3.746648793565684e-05, + "loss": 0.2818, + "step": 985 + }, + { + "epoch": 2.61, + "learning_rate": 3.7453083109919576e-05, + "loss": 0.1363, + "step": 986 + }, + { + "epoch": 2.61, + "learning_rate": 3.7439678284182305e-05, + "loss": 0.3244, + "step": 987 + }, + { + "epoch": 2.61, + "learning_rate": 3.742627345844505e-05, + "loss": 0.081, + "step": 988 + }, + { + "epoch": 2.62, + "learning_rate": 3.7412868632707775e-05, + "loss": 0.0488, + "step": 989 + }, + { + "epoch": 2.62, + "learning_rate": 3.739946380697052e-05, + "loss": 0.2057, + "step": 990 + }, + { + "epoch": 2.62, + "learning_rate": 3.7386058981233245e-05, + "loss": 0.2598, + "step": 991 + }, + { + "epoch": 2.62, + "learning_rate": 3.737265415549598e-05, + "loss": 0.1318, + "step": 992 + }, + { + "epoch": 2.63, + "learning_rate": 3.7359249329758716e-05, + "loss": 0.0482, + "step": 993 + }, + { + "epoch": 2.63, + "learning_rate": 3.734584450402145e-05, + "loss": 0.2586, + "step": 994 + }, + { + "epoch": 2.63, + "learning_rate": 3.7332439678284186e-05, + "loss": 0.2533, + "step": 995 + }, + { + "epoch": 2.63, + "learning_rate": 3.731903485254692e-05, + "loss": 0.5292, + "step": 996 + }, + { + "epoch": 2.64, + "learning_rate": 3.730563002680965e-05, + "loss": 0.1677, + "step": 997 + }, + { + "epoch": 2.64, + "learning_rate": 3.729222520107239e-05, + "loss": 0.1869, + "step": 998 + }, + { + "epoch": 2.64, + "learning_rate": 3.727882037533512e-05, + "loss": 0.0645, + "step": 999 + }, + { + "epoch": 2.65, + "learning_rate": 3.726541554959786e-05, + "loss": 0.4107, + "step": 1000 + }, + { + "epoch": 2.65, + "learning_rate": 3.725201072386059e-05, + "loss": 0.0484, + "step": 1001 + }, + { + "epoch": 2.65, + "learning_rate": 3.7238605898123325e-05, + "loss": 0.0813, + "step": 1002 + }, + { + "epoch": 2.65, + "learning_rate": 3.722520107238606e-05, + "loss": 0.2467, + "step": 1003 + }, + { + "epoch": 2.66, + "learning_rate": 3.7211796246648795e-05, + "loss": 0.0324, + "step": 1004 + }, + { + "epoch": 2.66, + "learning_rate": 3.719839142091153e-05, + "loss": 0.0536, + "step": 1005 + }, + { + "epoch": 2.66, + "learning_rate": 3.7184986595174266e-05, + "loss": 0.0399, + "step": 1006 + }, + { + "epoch": 2.66, + "learning_rate": 3.7171581769436994e-05, + "loss": 0.0257, + "step": 1007 + }, + { + "epoch": 2.67, + "learning_rate": 3.7158176943699736e-05, + "loss": 0.0407, + "step": 1008 + }, + { + "epoch": 2.67, + "learning_rate": 3.7144772117962464e-05, + "loss": 0.0173, + "step": 1009 + }, + { + "epoch": 2.67, + "learning_rate": 3.7131367292225206e-05, + "loss": 0.0166, + "step": 1010 + }, + { + "epoch": 2.67, + "learning_rate": 3.7117962466487934e-05, + "loss": 0.1898, + "step": 1011 + }, + { + "epoch": 2.68, + "learning_rate": 3.710455764075067e-05, + "loss": 0.0525, + "step": 1012 + }, + { + "epoch": 2.68, + "learning_rate": 3.7091152815013405e-05, + "loss": 0.043, + "step": 1013 + }, + { + "epoch": 2.68, + "learning_rate": 3.707774798927614e-05, + "loss": 0.3994, + "step": 1014 + }, + { + "epoch": 2.69, + "learning_rate": 3.7064343163538875e-05, + "loss": 0.0372, + "step": 1015 + }, + { + "epoch": 2.69, + "learning_rate": 3.705093833780161e-05, + "loss": 0.2909, + "step": 1016 + }, + { + "epoch": 2.69, + "learning_rate": 3.7037533512064345e-05, + "loss": 0.8221, + "step": 1017 + }, + { + "epoch": 2.69, + "learning_rate": 3.702412868632708e-05, + "loss": 0.0084, + "step": 1018 + }, + { + "epoch": 2.7, + "learning_rate": 3.7010723860589815e-05, + "loss": 0.3058, + "step": 1019 + }, + { + "epoch": 2.7, + "learning_rate": 3.699731903485255e-05, + "loss": 1.0774, + "step": 1020 + }, + { + "epoch": 2.7, + "learning_rate": 3.6983914209115286e-05, + "loss": 0.2018, + "step": 1021 + }, + { + "epoch": 2.7, + "learning_rate": 3.6970509383378014e-05, + "loss": 0.0537, + "step": 1022 + }, + { + "epoch": 2.71, + "learning_rate": 3.6957104557640756e-05, + "loss": 0.444, + "step": 1023 + }, + { + "epoch": 2.71, + "learning_rate": 3.6943699731903484e-05, + "loss": 0.6497, + "step": 1024 + }, + { + "epoch": 2.71, + "learning_rate": 3.6930294906166226e-05, + "loss": 0.0097, + "step": 1025 + }, + { + "epoch": 2.71, + "learning_rate": 3.6916890080428955e-05, + "loss": 0.0082, + "step": 1026 + }, + { + "epoch": 2.72, + "learning_rate": 3.690348525469169e-05, + "loss": 0.0387, + "step": 1027 + }, + { + "epoch": 2.72, + "learning_rate": 3.6890080428954425e-05, + "loss": 0.3969, + "step": 1028 + }, + { + "epoch": 2.72, + "learning_rate": 3.687667560321716e-05, + "loss": 0.0136, + "step": 1029 + }, + { + "epoch": 2.72, + "learning_rate": 3.6863270777479895e-05, + "loss": 0.0099, + "step": 1030 + }, + { + "epoch": 2.73, + "learning_rate": 3.684986595174263e-05, + "loss": 0.3509, + "step": 1031 + }, + { + "epoch": 2.73, + "learning_rate": 3.683646112600536e-05, + "loss": 0.0257, + "step": 1032 + }, + { + "epoch": 2.73, + "learning_rate": 3.68230563002681e-05, + "loss": 0.3158, + "step": 1033 + }, + { + "epoch": 2.74, + "learning_rate": 3.680965147453083e-05, + "loss": 0.0677, + "step": 1034 + }, + { + "epoch": 2.74, + "learning_rate": 3.679624664879357e-05, + "loss": 0.3887, + "step": 1035 + }, + { + "epoch": 2.74, + "learning_rate": 3.67828418230563e-05, + "loss": 0.0714, + "step": 1036 + }, + { + "epoch": 2.74, + "learning_rate": 3.6769436997319034e-05, + "loss": 0.1066, + "step": 1037 + }, + { + "epoch": 2.75, + "learning_rate": 3.675603217158177e-05, + "loss": 0.6238, + "step": 1038 + }, + { + "epoch": 2.75, + "learning_rate": 3.6742627345844504e-05, + "loss": 0.0405, + "step": 1039 + }, + { + "epoch": 2.75, + "learning_rate": 3.672922252010724e-05, + "loss": 0.0223, + "step": 1040 + }, + { + "epoch": 2.75, + "learning_rate": 3.6715817694369975e-05, + "loss": 0.2737, + "step": 1041 + }, + { + "epoch": 2.76, + "learning_rate": 3.670241286863271e-05, + "loss": 0.015, + "step": 1042 + }, + { + "epoch": 2.76, + "learning_rate": 3.6689008042895445e-05, + "loss": 0.1709, + "step": 1043 + }, + { + "epoch": 2.76, + "learning_rate": 3.667560321715818e-05, + "loss": 0.2649, + "step": 1044 + }, + { + "epoch": 2.76, + "learning_rate": 3.6662198391420915e-05, + "loss": 0.1524, + "step": 1045 + }, + { + "epoch": 2.77, + "learning_rate": 3.664879356568365e-05, + "loss": 0.2461, + "step": 1046 + }, + { + "epoch": 2.77, + "learning_rate": 3.663538873994638e-05, + "loss": 0.3425, + "step": 1047 + }, + { + "epoch": 2.77, + "learning_rate": 3.662198391420912e-05, + "loss": 0.2689, + "step": 1048 + }, + { + "epoch": 2.78, + "learning_rate": 3.660857908847185e-05, + "loss": 0.0066, + "step": 1049 + }, + { + "epoch": 2.78, + "learning_rate": 3.659517426273459e-05, + "loss": 0.0328, + "step": 1050 + }, + { + "epoch": 2.78, + "learning_rate": 3.658176943699732e-05, + "loss": 0.1273, + "step": 1051 + }, + { + "epoch": 2.78, + "learning_rate": 3.6568364611260054e-05, + "loss": 0.2346, + "step": 1052 + }, + { + "epoch": 2.79, + "learning_rate": 3.655495978552279e-05, + "loss": 0.0118, + "step": 1053 + }, + { + "epoch": 2.79, + "learning_rate": 3.6541554959785525e-05, + "loss": 0.0287, + "step": 1054 + }, + { + "epoch": 2.79, + "learning_rate": 3.652815013404826e-05, + "loss": 0.264, + "step": 1055 + }, + { + "epoch": 2.79, + "learning_rate": 3.6514745308310995e-05, + "loss": 0.0216, + "step": 1056 + }, + { + "epoch": 2.8, + "learning_rate": 3.650134048257372e-05, + "loss": 0.0261, + "step": 1057 + }, + { + "epoch": 2.8, + "learning_rate": 3.6487935656836465e-05, + "loss": 0.1911, + "step": 1058 + }, + { + "epoch": 2.8, + "learning_rate": 3.6474530831099194e-05, + "loss": 0.029, + "step": 1059 + }, + { + "epoch": 2.8, + "learning_rate": 3.6461126005361935e-05, + "loss": 0.0393, + "step": 1060 + }, + { + "epoch": 2.81, + "learning_rate": 3.6447721179624664e-05, + "loss": 0.1044, + "step": 1061 + }, + { + "epoch": 2.81, + "learning_rate": 3.64343163538874e-05, + "loss": 0.6364, + "step": 1062 + }, + { + "epoch": 2.81, + "learning_rate": 3.6420911528150134e-05, + "loss": 0.3589, + "step": 1063 + }, + { + "epoch": 2.81, + "learning_rate": 3.640750670241287e-05, + "loss": 0.1128, + "step": 1064 + }, + { + "epoch": 2.82, + "learning_rate": 3.6394101876675604e-05, + "loss": 0.2219, + "step": 1065 + }, + { + "epoch": 2.82, + "learning_rate": 3.638069705093834e-05, + "loss": 0.211, + "step": 1066 + }, + { + "epoch": 2.82, + "learning_rate": 3.6367292225201075e-05, + "loss": 0.387, + "step": 1067 + }, + { + "epoch": 2.83, + "learning_rate": 3.635388739946381e-05, + "loss": 0.0725, + "step": 1068 + }, + { + "epoch": 2.83, + "learning_rate": 3.6340482573726545e-05, + "loss": 0.0268, + "step": 1069 + }, + { + "epoch": 2.83, + "learning_rate": 3.632707774798928e-05, + "loss": 0.516, + "step": 1070 + }, + { + "epoch": 2.83, + "learning_rate": 3.6313672922252015e-05, + "loss": 0.0746, + "step": 1071 + }, + { + "epoch": 2.84, + "learning_rate": 3.6300268096514743e-05, + "loss": 0.2486, + "step": 1072 + }, + { + "epoch": 2.84, + "learning_rate": 3.6286863270777485e-05, + "loss": 0.1584, + "step": 1073 + }, + { + "epoch": 2.84, + "learning_rate": 3.6273458445040214e-05, + "loss": 0.1301, + "step": 1074 + }, + { + "epoch": 2.84, + "learning_rate": 3.6260053619302956e-05, + "loss": 0.0122, + "step": 1075 + }, + { + "epoch": 2.85, + "learning_rate": 3.6246648793565684e-05, + "loss": 0.0215, + "step": 1076 + }, + { + "epoch": 2.85, + "learning_rate": 3.623324396782842e-05, + "loss": 0.2068, + "step": 1077 + }, + { + "epoch": 2.85, + "learning_rate": 3.6219839142091154e-05, + "loss": 0.1882, + "step": 1078 + }, + { + "epoch": 2.85, + "learning_rate": 3.620643431635389e-05, + "loss": 0.368, + "step": 1079 + }, + { + "epoch": 2.86, + "learning_rate": 3.6193029490616625e-05, + "loss": 0.7537, + "step": 1080 + }, + { + "epoch": 2.86, + "learning_rate": 3.617962466487936e-05, + "loss": 0.1614, + "step": 1081 + }, + { + "epoch": 2.86, + "learning_rate": 3.616621983914209e-05, + "loss": 0.0585, + "step": 1082 + }, + { + "epoch": 2.87, + "learning_rate": 3.615281501340483e-05, + "loss": 0.0535, + "step": 1083 + }, + { + "epoch": 2.87, + "learning_rate": 3.613941018766756e-05, + "loss": 0.6518, + "step": 1084 + }, + { + "epoch": 2.87, + "learning_rate": 3.61260053619303e-05, + "loss": 0.4885, + "step": 1085 + }, + { + "epoch": 2.87, + "learning_rate": 3.611260053619303e-05, + "loss": 0.4078, + "step": 1086 + }, + { + "epoch": 2.88, + "learning_rate": 3.6099195710455764e-05, + "loss": 0.2101, + "step": 1087 + }, + { + "epoch": 2.88, + "learning_rate": 3.60857908847185e-05, + "loss": 0.0192, + "step": 1088 + }, + { + "epoch": 2.88, + "learning_rate": 3.6072386058981234e-05, + "loss": 0.3885, + "step": 1089 + }, + { + "epoch": 2.88, + "learning_rate": 3.605898123324397e-05, + "loss": 0.0393, + "step": 1090 + }, + { + "epoch": 2.89, + "learning_rate": 3.6045576407506704e-05, + "loss": 0.2179, + "step": 1091 + }, + { + "epoch": 2.89, + "learning_rate": 3.603217158176944e-05, + "loss": 0.1814, + "step": 1092 + }, + { + "epoch": 2.89, + "learning_rate": 3.6018766756032174e-05, + "loss": 0.0647, + "step": 1093 + }, + { + "epoch": 2.89, + "learning_rate": 3.600536193029491e-05, + "loss": 0.0657, + "step": 1094 + }, + { + "epoch": 2.9, + "learning_rate": 3.5991957104557645e-05, + "loss": 0.1062, + "step": 1095 + }, + { + "epoch": 2.9, + "learning_rate": 3.597855227882038e-05, + "loss": 0.4314, + "step": 1096 + }, + { + "epoch": 2.9, + "learning_rate": 3.596514745308311e-05, + "loss": 0.3074, + "step": 1097 + }, + { + "epoch": 2.9, + "learning_rate": 3.595174262734585e-05, + "loss": 0.0159, + "step": 1098 + }, + { + "epoch": 2.91, + "learning_rate": 3.593833780160858e-05, + "loss": 0.3829, + "step": 1099 + }, + { + "epoch": 2.91, + "learning_rate": 3.592493297587132e-05, + "loss": 0.3277, + "step": 1100 + }, + { + "epoch": 2.91, + "learning_rate": 3.591152815013405e-05, + "loss": 0.3785, + "step": 1101 + }, + { + "epoch": 2.92, + "learning_rate": 3.5898123324396784e-05, + "loss": 0.0162, + "step": 1102 + }, + { + "epoch": 2.92, + "learning_rate": 3.588471849865952e-05, + "loss": 0.0772, + "step": 1103 + }, + { + "epoch": 2.92, + "learning_rate": 3.5871313672922254e-05, + "loss": 0.0292, + "step": 1104 + }, + { + "epoch": 2.92, + "learning_rate": 3.585790884718499e-05, + "loss": 0.0748, + "step": 1105 + }, + { + "epoch": 2.93, + "learning_rate": 3.5844504021447724e-05, + "loss": 0.2276, + "step": 1106 + }, + { + "epoch": 2.93, + "learning_rate": 3.583109919571046e-05, + "loss": 0.3174, + "step": 1107 + }, + { + "epoch": 2.93, + "learning_rate": 3.5817694369973195e-05, + "loss": 0.134, + "step": 1108 + }, + { + "epoch": 2.93, + "learning_rate": 3.580428954423592e-05, + "loss": 0.3488, + "step": 1109 + }, + { + "epoch": 2.94, + "learning_rate": 3.5790884718498665e-05, + "loss": 0.1003, + "step": 1110 + }, + { + "epoch": 2.94, + "learning_rate": 3.577747989276139e-05, + "loss": 0.1972, + "step": 1111 + }, + { + "epoch": 2.94, + "learning_rate": 3.5764075067024135e-05, + "loss": 0.2151, + "step": 1112 + }, + { + "epoch": 2.94, + "learning_rate": 3.5750670241286863e-05, + "loss": 0.3725, + "step": 1113 + }, + { + "epoch": 2.95, + "learning_rate": 3.57372654155496e-05, + "loss": 0.0227, + "step": 1114 + }, + { + "epoch": 2.95, + "learning_rate": 3.5723860589812334e-05, + "loss": 0.2063, + "step": 1115 + }, + { + "epoch": 2.95, + "learning_rate": 3.571045576407507e-05, + "loss": 0.0316, + "step": 1116 + }, + { + "epoch": 2.96, + "learning_rate": 3.5697050938337804e-05, + "loss": 0.1545, + "step": 1117 + }, + { + "epoch": 2.96, + "learning_rate": 3.568364611260054e-05, + "loss": 0.4582, + "step": 1118 + }, + { + "epoch": 2.96, + "learning_rate": 3.567024128686327e-05, + "loss": 0.5452, + "step": 1119 + }, + { + "epoch": 2.96, + "learning_rate": 3.565683646112601e-05, + "loss": 0.2009, + "step": 1120 + }, + { + "epoch": 2.97, + "learning_rate": 3.564343163538874e-05, + "loss": 0.1201, + "step": 1121 + }, + { + "epoch": 2.97, + "learning_rate": 3.563002680965148e-05, + "loss": 0.5343, + "step": 1122 + }, + { + "epoch": 2.97, + "learning_rate": 3.561662198391421e-05, + "loss": 0.3925, + "step": 1123 + }, + { + "epoch": 2.97, + "learning_rate": 3.560321715817694e-05, + "loss": 0.0375, + "step": 1124 + }, + { + "epoch": 2.98, + "learning_rate": 3.558981233243968e-05, + "loss": 0.0411, + "step": 1125 + }, + { + "epoch": 2.98, + "learning_rate": 3.5576407506702413e-05, + "loss": 0.0338, + "step": 1126 + }, + { + "epoch": 2.98, + "learning_rate": 3.556300268096515e-05, + "loss": 0.035, + "step": 1127 + }, + { + "epoch": 2.98, + "learning_rate": 3.5549597855227884e-05, + "loss": 0.0283, + "step": 1128 + }, + { + "epoch": 2.99, + "learning_rate": 3.553619302949062e-05, + "loss": 0.034, + "step": 1129 + }, + { + "epoch": 2.99, + "learning_rate": 3.5522788203753354e-05, + "loss": 0.518, + "step": 1130 + }, + { + "epoch": 2.99, + "learning_rate": 3.550938337801609e-05, + "loss": 0.0241, + "step": 1131 + }, + { + "epoch": 2.99, + "learning_rate": 3.5495978552278824e-05, + "loss": 0.3143, + "step": 1132 + }, + { + "epoch": 3.0, + "learning_rate": 3.548257372654156e-05, + "loss": 0.8011, + "step": 1133 + }, + { + "epoch": 3.0, + "learning_rate": 3.546916890080429e-05, + "loss": 0.4365, + "step": 1134 + }, + { + "epoch": 3.0, + "eval_f1": 0.7867219917012448, + "eval_loss": 0.613310694694519, + "eval_runtime": 1.9007, + "eval_samples_per_second": 796.041, + "eval_steps_per_second": 49.983, + "step": 1134 + }, + { + "epoch": 3.0, + "learning_rate": 3.545576407506703e-05, + "loss": 0.0111, + "step": 1135 + }, + { + "epoch": 3.01, + "learning_rate": 3.544235924932976e-05, + "loss": 0.0166, + "step": 1136 + }, + { + "epoch": 3.01, + "learning_rate": 3.54289544235925e-05, + "loss": 0.2159, + "step": 1137 + }, + { + "epoch": 3.01, + "learning_rate": 3.541554959785523e-05, + "loss": 0.0096, + "step": 1138 + }, + { + "epoch": 3.01, + "learning_rate": 3.540214477211796e-05, + "loss": 0.1352, + "step": 1139 + }, + { + "epoch": 3.02, + "learning_rate": 3.53887399463807e-05, + "loss": 0.0195, + "step": 1140 + }, + { + "epoch": 3.02, + "learning_rate": 3.5375335120643434e-05, + "loss": 0.1579, + "step": 1141 + }, + { + "epoch": 3.02, + "learning_rate": 3.536193029490617e-05, + "loss": 0.0078, + "step": 1142 + }, + { + "epoch": 3.02, + "learning_rate": 3.5348525469168904e-05, + "loss": 0.0111, + "step": 1143 + }, + { + "epoch": 3.03, + "learning_rate": 3.533512064343163e-05, + "loss": 0.2457, + "step": 1144 + }, + { + "epoch": 3.03, + "learning_rate": 3.5321715817694374e-05, + "loss": 0.014, + "step": 1145 + }, + { + "epoch": 3.03, + "learning_rate": 3.53083109919571e-05, + "loss": 0.2021, + "step": 1146 + }, + { + "epoch": 3.03, + "learning_rate": 3.5294906166219844e-05, + "loss": 0.5334, + "step": 1147 + }, + { + "epoch": 3.04, + "learning_rate": 3.528150134048257e-05, + "loss": 0.0116, + "step": 1148 + }, + { + "epoch": 3.04, + "learning_rate": 3.526809651474531e-05, + "loss": 0.0099, + "step": 1149 + }, + { + "epoch": 3.04, + "learning_rate": 3.525469168900804e-05, + "loss": 0.2102, + "step": 1150 + }, + { + "epoch": 3.04, + "learning_rate": 3.524128686327078e-05, + "loss": 0.0093, + "step": 1151 + }, + { + "epoch": 3.05, + "learning_rate": 3.522788203753351e-05, + "loss": 0.0112, + "step": 1152 + }, + { + "epoch": 3.05, + "learning_rate": 3.521447721179625e-05, + "loss": 0.1761, + "step": 1153 + }, + { + "epoch": 3.05, + "learning_rate": 3.5201072386058984e-05, + "loss": 0.1608, + "step": 1154 + }, + { + "epoch": 3.06, + "learning_rate": 3.518766756032172e-05, + "loss": 0.2883, + "step": 1155 + }, + { + "epoch": 3.06, + "learning_rate": 3.5174262734584454e-05, + "loss": 0.0304, + "step": 1156 + }, + { + "epoch": 3.06, + "learning_rate": 3.516085790884719e-05, + "loss": 0.0623, + "step": 1157 + }, + { + "epoch": 3.06, + "learning_rate": 3.5147453083109924e-05, + "loss": 0.1824, + "step": 1158 + }, + { + "epoch": 3.07, + "learning_rate": 3.513404825737265e-05, + "loss": 0.2527, + "step": 1159 + }, + { + "epoch": 3.07, + "learning_rate": 3.5120643431635394e-05, + "loss": 0.0877, + "step": 1160 + }, + { + "epoch": 3.07, + "learning_rate": 3.510723860589812e-05, + "loss": 0.2735, + "step": 1161 + }, + { + "epoch": 3.07, + "learning_rate": 3.5093833780160865e-05, + "loss": 0.1126, + "step": 1162 + }, + { + "epoch": 3.08, + "learning_rate": 3.508042895442359e-05, + "loss": 0.2498, + "step": 1163 + }, + { + "epoch": 3.08, + "learning_rate": 3.506702412868633e-05, + "loss": 0.022, + "step": 1164 + }, + { + "epoch": 3.08, + "learning_rate": 3.505361930294906e-05, + "loss": 0.2768, + "step": 1165 + }, + { + "epoch": 3.08, + "learning_rate": 3.50402144772118e-05, + "loss": 0.0429, + "step": 1166 + }, + { + "epoch": 3.09, + "learning_rate": 3.5026809651474533e-05, + "loss": 0.0198, + "step": 1167 + }, + { + "epoch": 3.09, + "learning_rate": 3.501340482573727e-05, + "loss": 0.0097, + "step": 1168 + }, + { + "epoch": 3.09, + "learning_rate": 3.5e-05, + "loss": 0.0276, + "step": 1169 + }, + { + "epoch": 3.1, + "learning_rate": 3.498659517426274e-05, + "loss": 0.2276, + "step": 1170 + }, + { + "epoch": 3.1, + "learning_rate": 3.497319034852547e-05, + "loss": 0.0461, + "step": 1171 + }, + { + "epoch": 3.1, + "learning_rate": 3.495978552278821e-05, + "loss": 0.0103, + "step": 1172 + }, + { + "epoch": 3.1, + "learning_rate": 3.494638069705094e-05, + "loss": 0.1455, + "step": 1173 + }, + { + "epoch": 3.11, + "learning_rate": 3.493297587131367e-05, + "loss": 0.0865, + "step": 1174 + }, + { + "epoch": 3.11, + "learning_rate": 3.491957104557641e-05, + "loss": 0.3226, + "step": 1175 + }, + { + "epoch": 3.11, + "learning_rate": 3.490616621983914e-05, + "loss": 0.1744, + "step": 1176 + }, + { + "epoch": 3.11, + "learning_rate": 3.489276139410188e-05, + "loss": 0.0148, + "step": 1177 + }, + { + "epoch": 3.12, + "learning_rate": 3.487935656836461e-05, + "loss": 0.2582, + "step": 1178 + }, + { + "epoch": 3.12, + "learning_rate": 3.486595174262735e-05, + "loss": 0.2782, + "step": 1179 + }, + { + "epoch": 3.12, + "learning_rate": 3.485254691689008e-05, + "loss": 0.143, + "step": 1180 + }, + { + "epoch": 3.12, + "learning_rate": 3.483914209115282e-05, + "loss": 0.0853, + "step": 1181 + }, + { + "epoch": 3.13, + "learning_rate": 3.4825737265415554e-05, + "loss": 0.1361, + "step": 1182 + }, + { + "epoch": 3.13, + "learning_rate": 3.481233243967829e-05, + "loss": 0.0883, + "step": 1183 + }, + { + "epoch": 3.13, + "learning_rate": 3.479892761394102e-05, + "loss": 0.0116, + "step": 1184 + }, + { + "epoch": 3.13, + "learning_rate": 3.478552278820376e-05, + "loss": 0.0531, + "step": 1185 + }, + { + "epoch": 3.14, + "learning_rate": 3.477211796246649e-05, + "loss": 0.0184, + "step": 1186 + }, + { + "epoch": 3.14, + "learning_rate": 3.475871313672923e-05, + "loss": 0.1601, + "step": 1187 + }, + { + "epoch": 3.14, + "learning_rate": 3.474530831099196e-05, + "loss": 0.007, + "step": 1188 + }, + { + "epoch": 3.15, + "learning_rate": 3.473190348525469e-05, + "loss": 0.0101, + "step": 1189 + }, + { + "epoch": 3.15, + "learning_rate": 3.471849865951743e-05, + "loss": 0.2385, + "step": 1190 + }, + { + "epoch": 3.15, + "learning_rate": 3.470509383378016e-05, + "loss": 0.0075, + "step": 1191 + }, + { + "epoch": 3.15, + "learning_rate": 3.46916890080429e-05, + "loss": 0.0919, + "step": 1192 + }, + { + "epoch": 3.16, + "learning_rate": 3.467828418230563e-05, + "loss": 0.0162, + "step": 1193 + }, + { + "epoch": 3.16, + "learning_rate": 3.466487935656836e-05, + "loss": 0.2239, + "step": 1194 + }, + { + "epoch": 3.16, + "learning_rate": 3.4651474530831104e-05, + "loss": 0.5757, + "step": 1195 + }, + { + "epoch": 3.16, + "learning_rate": 3.463806970509383e-05, + "loss": 0.0774, + "step": 1196 + }, + { + "epoch": 3.17, + "learning_rate": 3.4624664879356574e-05, + "loss": 0.2124, + "step": 1197 + }, + { + "epoch": 3.17, + "learning_rate": 3.46112600536193e-05, + "loss": 0.0107, + "step": 1198 + }, + { + "epoch": 3.17, + "learning_rate": 3.459785522788204e-05, + "loss": 0.3179, + "step": 1199 + }, + { + "epoch": 3.17, + "learning_rate": 3.458445040214477e-05, + "loss": 0.0138, + "step": 1200 + }, + { + "epoch": 3.18, + "learning_rate": 3.457104557640751e-05, + "loss": 0.0094, + "step": 1201 + }, + { + "epoch": 3.18, + "learning_rate": 3.455764075067024e-05, + "loss": 0.0039, + "step": 1202 + }, + { + "epoch": 3.18, + "learning_rate": 3.454423592493298e-05, + "loss": 0.0745, + "step": 1203 + }, + { + "epoch": 3.19, + "learning_rate": 3.453083109919571e-05, + "loss": 0.0387, + "step": 1204 + }, + { + "epoch": 3.19, + "learning_rate": 3.451742627345845e-05, + "loss": 0.1378, + "step": 1205 + }, + { + "epoch": 3.19, + "learning_rate": 3.450402144772118e-05, + "loss": 0.1299, + "step": 1206 + }, + { + "epoch": 3.19, + "learning_rate": 3.449061662198392e-05, + "loss": 0.2821, + "step": 1207 + }, + { + "epoch": 3.2, + "learning_rate": 3.4477211796246653e-05, + "loss": 0.2236, + "step": 1208 + }, + { + "epoch": 3.2, + "learning_rate": 3.446380697050938e-05, + "loss": 0.1436, + "step": 1209 + }, + { + "epoch": 3.2, + "learning_rate": 3.4450402144772124e-05, + "loss": 0.1504, + "step": 1210 + }, + { + "epoch": 3.2, + "learning_rate": 3.443699731903485e-05, + "loss": 0.0415, + "step": 1211 + }, + { + "epoch": 3.21, + "learning_rate": 3.4423592493297594e-05, + "loss": 0.023, + "step": 1212 + }, + { + "epoch": 3.21, + "learning_rate": 3.441018766756032e-05, + "loss": 0.2128, + "step": 1213 + }, + { + "epoch": 3.21, + "learning_rate": 3.439678284182306e-05, + "loss": 0.0066, + "step": 1214 + }, + { + "epoch": 3.21, + "learning_rate": 3.438337801608579e-05, + "loss": 0.4345, + "step": 1215 + }, + { + "epoch": 3.22, + "learning_rate": 3.436997319034853e-05, + "loss": 0.0214, + "step": 1216 + }, + { + "epoch": 3.22, + "learning_rate": 3.435656836461126e-05, + "loss": 0.2094, + "step": 1217 + }, + { + "epoch": 3.22, + "learning_rate": 3.4343163538874e-05, + "loss": 0.0822, + "step": 1218 + }, + { + "epoch": 3.22, + "learning_rate": 3.4329758713136726e-05, + "loss": 0.1153, + "step": 1219 + }, + { + "epoch": 3.23, + "learning_rate": 3.431635388739947e-05, + "loss": 0.0059, + "step": 1220 + }, + { + "epoch": 3.23, + "learning_rate": 3.43029490616622e-05, + "loss": 0.0069, + "step": 1221 + }, + { + "epoch": 3.23, + "learning_rate": 3.428954423592494e-05, + "loss": 0.044, + "step": 1222 + }, + { + "epoch": 3.24, + "learning_rate": 3.427613941018767e-05, + "loss": 0.1975, + "step": 1223 + }, + { + "epoch": 3.24, + "learning_rate": 3.42627345844504e-05, + "loss": 0.3294, + "step": 1224 + }, + { + "epoch": 3.24, + "learning_rate": 3.424932975871314e-05, + "loss": 0.026, + "step": 1225 + }, + { + "epoch": 3.24, + "learning_rate": 3.423592493297587e-05, + "loss": 0.2666, + "step": 1226 + }, + { + "epoch": 3.25, + "learning_rate": 3.422252010723861e-05, + "loss": 0.0628, + "step": 1227 + }, + { + "epoch": 3.25, + "learning_rate": 3.420911528150134e-05, + "loss": 0.0068, + "step": 1228 + }, + { + "epoch": 3.25, + "learning_rate": 3.419571045576407e-05, + "loss": 0.0144, + "step": 1229 + }, + { + "epoch": 3.25, + "learning_rate": 3.418230563002681e-05, + "loss": 0.0029, + "step": 1230 + }, + { + "epoch": 3.26, + "learning_rate": 3.416890080428954e-05, + "loss": 0.606, + "step": 1231 + }, + { + "epoch": 3.26, + "learning_rate": 3.415549597855228e-05, + "loss": 0.2162, + "step": 1232 + }, + { + "epoch": 3.26, + "learning_rate": 3.414209115281501e-05, + "loss": 0.146, + "step": 1233 + }, + { + "epoch": 3.26, + "learning_rate": 3.412868632707775e-05, + "loss": 0.3649, + "step": 1234 + }, + { + "epoch": 3.27, + "learning_rate": 3.411528150134048e-05, + "loss": 0.0062, + "step": 1235 + }, + { + "epoch": 3.27, + "learning_rate": 3.410187667560322e-05, + "loss": 0.4097, + "step": 1236 + }, + { + "epoch": 3.27, + "learning_rate": 3.408847184986595e-05, + "loss": 0.5354, + "step": 1237 + }, + { + "epoch": 3.28, + "learning_rate": 3.407506702412869e-05, + "loss": 0.6222, + "step": 1238 + }, + { + "epoch": 3.28, + "learning_rate": 3.406166219839142e-05, + "loss": 0.0023, + "step": 1239 + }, + { + "epoch": 3.28, + "learning_rate": 3.404825737265416e-05, + "loss": 0.0247, + "step": 1240 + }, + { + "epoch": 3.28, + "learning_rate": 3.403485254691689e-05, + "loss": 0.0051, + "step": 1241 + }, + { + "epoch": 3.29, + "learning_rate": 3.402144772117963e-05, + "loss": 0.2504, + "step": 1242 + }, + { + "epoch": 3.29, + "learning_rate": 3.400804289544236e-05, + "loss": 0.0195, + "step": 1243 + }, + { + "epoch": 3.29, + "learning_rate": 3.39946380697051e-05, + "loss": 0.3706, + "step": 1244 + }, + { + "epoch": 3.29, + "learning_rate": 3.398123324396783e-05, + "loss": 0.0174, + "step": 1245 + }, + { + "epoch": 3.3, + "learning_rate": 3.396782841823056e-05, + "loss": 0.0068, + "step": 1246 + }, + { + "epoch": 3.3, + "learning_rate": 3.39544235924933e-05, + "loss": 0.3938, + "step": 1247 + }, + { + "epoch": 3.3, + "learning_rate": 3.394101876675603e-05, + "loss": 0.0114, + "step": 1248 + }, + { + "epoch": 3.3, + "learning_rate": 3.3927613941018774e-05, + "loss": 0.0088, + "step": 1249 + }, + { + "epoch": 3.31, + "learning_rate": 3.39142091152815e-05, + "loss": 0.0126, + "step": 1250 + }, + { + "epoch": 3.31, + "learning_rate": 3.390080428954424e-05, + "loss": 0.0091, + "step": 1251 + }, + { + "epoch": 3.31, + "learning_rate": 3.388739946380697e-05, + "loss": 0.0232, + "step": 1252 + }, + { + "epoch": 3.31, + "learning_rate": 3.387399463806971e-05, + "loss": 0.3704, + "step": 1253 + }, + { + "epoch": 3.32, + "learning_rate": 3.386058981233244e-05, + "loss": 0.0112, + "step": 1254 + }, + { + "epoch": 3.32, + "learning_rate": 3.384718498659518e-05, + "loss": 0.1709, + "step": 1255 + }, + { + "epoch": 3.32, + "learning_rate": 3.3833780160857906e-05, + "loss": 0.0109, + "step": 1256 + }, + { + "epoch": 3.33, + "learning_rate": 3.382037533512065e-05, + "loss": 0.2874, + "step": 1257 + }, + { + "epoch": 3.33, + "learning_rate": 3.3806970509383376e-05, + "loss": 0.024, + "step": 1258 + }, + { + "epoch": 3.33, + "learning_rate": 3.379356568364612e-05, + "loss": 0.0131, + "step": 1259 + }, + { + "epoch": 3.33, + "learning_rate": 3.3780160857908846e-05, + "loss": 0.2076, + "step": 1260 + }, + { + "epoch": 3.34, + "learning_rate": 3.376675603217158e-05, + "loss": 0.0083, + "step": 1261 + }, + { + "epoch": 3.34, + "learning_rate": 3.375335120643432e-05, + "loss": 0.0234, + "step": 1262 + }, + { + "epoch": 3.34, + "learning_rate": 3.373994638069705e-05, + "loss": 0.0066, + "step": 1263 + }, + { + "epoch": 3.34, + "learning_rate": 3.372654155495979e-05, + "loss": 0.3983, + "step": 1264 + }, + { + "epoch": 3.35, + "learning_rate": 3.371313672922252e-05, + "loss": 0.0648, + "step": 1265 + }, + { + "epoch": 3.35, + "learning_rate": 3.369973190348526e-05, + "loss": 0.006, + "step": 1266 + }, + { + "epoch": 3.35, + "learning_rate": 3.368632707774799e-05, + "loss": 0.0807, + "step": 1267 + }, + { + "epoch": 3.35, + "learning_rate": 3.367292225201073e-05, + "loss": 0.0975, + "step": 1268 + }, + { + "epoch": 3.36, + "learning_rate": 3.365951742627346e-05, + "loss": 0.2934, + "step": 1269 + }, + { + "epoch": 3.36, + "learning_rate": 3.36461126005362e-05, + "loss": 0.0869, + "step": 1270 + }, + { + "epoch": 3.36, + "learning_rate": 3.3632707774798926e-05, + "loss": 0.1374, + "step": 1271 + }, + { + "epoch": 3.37, + "learning_rate": 3.361930294906167e-05, + "loss": 0.3314, + "step": 1272 + }, + { + "epoch": 3.37, + "learning_rate": 3.3605898123324396e-05, + "loss": 0.0045, + "step": 1273 + }, + { + "epoch": 3.37, + "learning_rate": 3.359249329758714e-05, + "loss": 0.0536, + "step": 1274 + }, + { + "epoch": 3.37, + "learning_rate": 3.3579088471849867e-05, + "loss": 0.0564, + "step": 1275 + }, + { + "epoch": 3.38, + "learning_rate": 3.35656836461126e-05, + "loss": 0.0689, + "step": 1276 + }, + { + "epoch": 3.38, + "learning_rate": 3.355227882037534e-05, + "loss": 0.5177, + "step": 1277 + }, + { + "epoch": 3.38, + "learning_rate": 3.353887399463807e-05, + "loss": 0.0689, + "step": 1278 + }, + { + "epoch": 3.38, + "learning_rate": 3.352546916890081e-05, + "loss": 0.0664, + "step": 1279 + }, + { + "epoch": 3.39, + "learning_rate": 3.351206434316354e-05, + "loss": 0.0614, + "step": 1280 + }, + { + "epoch": 3.39, + "learning_rate": 3.349865951742627e-05, + "loss": 0.1994, + "step": 1281 + }, + { + "epoch": 3.39, + "learning_rate": 3.348525469168901e-05, + "loss": 0.4769, + "step": 1282 + }, + { + "epoch": 3.39, + "learning_rate": 3.347184986595174e-05, + "loss": 0.1851, + "step": 1283 + }, + { + "epoch": 3.4, + "learning_rate": 3.345844504021448e-05, + "loss": 0.0092, + "step": 1284 + }, + { + "epoch": 3.4, + "learning_rate": 3.344504021447721e-05, + "loss": 0.0052, + "step": 1285 + }, + { + "epoch": 3.4, + "learning_rate": 3.3431635388739946e-05, + "loss": 0.0095, + "step": 1286 + }, + { + "epoch": 3.4, + "learning_rate": 3.341823056300268e-05, + "loss": 0.0242, + "step": 1287 + }, + { + "epoch": 3.41, + "learning_rate": 3.3404825737265416e-05, + "loss": 0.0565, + "step": 1288 + }, + { + "epoch": 3.41, + "learning_rate": 3.339142091152815e-05, + "loss": 0.2645, + "step": 1289 + }, + { + "epoch": 3.41, + "learning_rate": 3.337801608579089e-05, + "loss": 0.0049, + "step": 1290 + }, + { + "epoch": 3.42, + "learning_rate": 3.336461126005362e-05, + "loss": 0.0929, + "step": 1291 + }, + { + "epoch": 3.42, + "learning_rate": 3.335120643431636e-05, + "loss": 0.3968, + "step": 1292 + }, + { + "epoch": 3.42, + "learning_rate": 3.333780160857909e-05, + "loss": 0.033, + "step": 1293 + }, + { + "epoch": 3.42, + "learning_rate": 3.332439678284183e-05, + "loss": 0.007, + "step": 1294 + }, + { + "epoch": 3.43, + "learning_rate": 3.331099195710456e-05, + "loss": 0.2552, + "step": 1295 + }, + { + "epoch": 3.43, + "learning_rate": 3.329758713136729e-05, + "loss": 0.004, + "step": 1296 + }, + { + "epoch": 3.43, + "learning_rate": 3.328418230563003e-05, + "loss": 0.136, + "step": 1297 + }, + { + "epoch": 3.43, + "learning_rate": 3.327077747989276e-05, + "loss": 0.1407, + "step": 1298 + }, + { + "epoch": 3.44, + "learning_rate": 3.32573726541555e-05, + "loss": 0.0354, + "step": 1299 + }, + { + "epoch": 3.44, + "learning_rate": 3.324396782841823e-05, + "loss": 0.6141, + "step": 1300 + }, + { + "epoch": 3.44, + "learning_rate": 3.3230563002680966e-05, + "loss": 0.2544, + "step": 1301 + }, + { + "epoch": 3.44, + "learning_rate": 3.32171581769437e-05, + "loss": 0.0046, + "step": 1302 + }, + { + "epoch": 3.45, + "learning_rate": 3.320375335120644e-05, + "loss": 0.0126, + "step": 1303 + }, + { + "epoch": 3.45, + "learning_rate": 3.319034852546917e-05, + "loss": 0.3506, + "step": 1304 + }, + { + "epoch": 3.45, + "learning_rate": 3.317694369973191e-05, + "loss": 0.3512, + "step": 1305 + }, + { + "epoch": 3.46, + "learning_rate": 3.3163538873994635e-05, + "loss": 0.3675, + "step": 1306 + }, + { + "epoch": 3.46, + "learning_rate": 3.315013404825738e-05, + "loss": 0.1676, + "step": 1307 + }, + { + "epoch": 3.46, + "learning_rate": 3.3136729222520106e-05, + "loss": 0.0307, + "step": 1308 + }, + { + "epoch": 3.46, + "learning_rate": 3.312332439678285e-05, + "loss": 0.0084, + "step": 1309 + }, + { + "epoch": 3.47, + "learning_rate": 3.3109919571045576e-05, + "loss": 0.1977, + "step": 1310 + }, + { + "epoch": 3.47, + "learning_rate": 3.309651474530831e-05, + "loss": 0.1645, + "step": 1311 + }, + { + "epoch": 3.47, + "learning_rate": 3.3083109919571046e-05, + "loss": 0.2579, + "step": 1312 + }, + { + "epoch": 3.47, + "learning_rate": 3.306970509383378e-05, + "loss": 0.1656, + "step": 1313 + }, + { + "epoch": 3.48, + "learning_rate": 3.3056300268096516e-05, + "loss": 0.0168, + "step": 1314 + }, + { + "epoch": 3.48, + "learning_rate": 3.304289544235925e-05, + "loss": 0.0291, + "step": 1315 + }, + { + "epoch": 3.48, + "learning_rate": 3.302949061662198e-05, + "loss": 0.0146, + "step": 1316 + }, + { + "epoch": 3.48, + "learning_rate": 3.301608579088472e-05, + "loss": 0.0037, + "step": 1317 + }, + { + "epoch": 3.49, + "learning_rate": 3.300268096514745e-05, + "loss": 0.0113, + "step": 1318 + }, + { + "epoch": 3.49, + "learning_rate": 3.298927613941019e-05, + "loss": 0.0734, + "step": 1319 + }, + { + "epoch": 3.49, + "learning_rate": 3.297587131367292e-05, + "loss": 0.0292, + "step": 1320 + }, + { + "epoch": 3.49, + "learning_rate": 3.2962466487935655e-05, + "loss": 0.3875, + "step": 1321 + }, + { + "epoch": 3.5, + "learning_rate": 3.294906166219839e-05, + "loss": 0.0138, + "step": 1322 + }, + { + "epoch": 3.5, + "learning_rate": 3.2935656836461126e-05, + "loss": 0.4653, + "step": 1323 + }, + { + "epoch": 3.5, + "learning_rate": 3.292225201072386e-05, + "loss": 0.1864, + "step": 1324 + }, + { + "epoch": 3.51, + "learning_rate": 3.2908847184986596e-05, + "loss": 0.0116, + "step": 1325 + }, + { + "epoch": 3.51, + "learning_rate": 3.289544235924933e-05, + "loss": 0.014, + "step": 1326 + }, + { + "epoch": 3.51, + "learning_rate": 3.2882037533512066e-05, + "loss": 0.3344, + "step": 1327 + }, + { + "epoch": 3.51, + "learning_rate": 3.28686327077748e-05, + "loss": 0.1544, + "step": 1328 + }, + { + "epoch": 3.52, + "learning_rate": 3.2855227882037537e-05, + "loss": 0.0065, + "step": 1329 + }, + { + "epoch": 3.52, + "learning_rate": 3.284182305630027e-05, + "loss": 0.0041, + "step": 1330 + }, + { + "epoch": 3.52, + "learning_rate": 3.2828418230563e-05, + "loss": 0.0044, + "step": 1331 + }, + { + "epoch": 3.52, + "learning_rate": 3.281501340482574e-05, + "loss": 0.1808, + "step": 1332 + }, + { + "epoch": 3.53, + "learning_rate": 3.280160857908847e-05, + "loss": 0.0521, + "step": 1333 + }, + { + "epoch": 3.53, + "learning_rate": 3.278820375335121e-05, + "loss": 0.3505, + "step": 1334 + }, + { + "epoch": 3.53, + "learning_rate": 3.277479892761394e-05, + "loss": 0.2032, + "step": 1335 + }, + { + "epoch": 3.53, + "learning_rate": 3.2761394101876676e-05, + "loss": 0.004, + "step": 1336 + }, + { + "epoch": 3.54, + "learning_rate": 3.274798927613941e-05, + "loss": 0.0343, + "step": 1337 + }, + { + "epoch": 3.54, + "learning_rate": 3.2734584450402146e-05, + "loss": 0.278, + "step": 1338 + }, + { + "epoch": 3.54, + "learning_rate": 3.272117962466488e-05, + "loss": 0.0056, + "step": 1339 + }, + { + "epoch": 3.54, + "learning_rate": 3.2707774798927616e-05, + "loss": 0.1673, + "step": 1340 + }, + { + "epoch": 3.55, + "learning_rate": 3.2694369973190345e-05, + "loss": 0.0092, + "step": 1341 + }, + { + "epoch": 3.55, + "learning_rate": 3.2680965147453086e-05, + "loss": 0.0058, + "step": 1342 + }, + { + "epoch": 3.55, + "learning_rate": 3.2667560321715815e-05, + "loss": 0.097, + "step": 1343 + }, + { + "epoch": 3.56, + "learning_rate": 3.265415549597856e-05, + "loss": 0.2138, + "step": 1344 + }, + { + "epoch": 3.56, + "learning_rate": 3.2640750670241285e-05, + "loss": 0.0077, + "step": 1345 + }, + { + "epoch": 3.56, + "learning_rate": 3.262734584450402e-05, + "loss": 0.2294, + "step": 1346 + }, + { + "epoch": 3.56, + "learning_rate": 3.2613941018766755e-05, + "loss": 0.3282, + "step": 1347 + }, + { + "epoch": 3.57, + "learning_rate": 3.260053619302949e-05, + "loss": 0.233, + "step": 1348 + }, + { + "epoch": 3.57, + "learning_rate": 3.2587131367292226e-05, + "loss": 0.0379, + "step": 1349 + }, + { + "epoch": 3.57, + "learning_rate": 3.257372654155496e-05, + "loss": 0.2168, + "step": 1350 + }, + { + "epoch": 3.57, + "learning_rate": 3.2560321715817696e-05, + "loss": 0.0443, + "step": 1351 + }, + { + "epoch": 3.58, + "learning_rate": 3.254691689008043e-05, + "loss": 0.2665, + "step": 1352 + }, + { + "epoch": 3.58, + "learning_rate": 3.2533512064343166e-05, + "loss": 0.0136, + "step": 1353 + }, + { + "epoch": 3.58, + "learning_rate": 3.25201072386059e-05, + "loss": 0.0035, + "step": 1354 + }, + { + "epoch": 3.58, + "learning_rate": 3.2506702412868636e-05, + "loss": 0.2153, + "step": 1355 + }, + { + "epoch": 3.59, + "learning_rate": 3.249329758713137e-05, + "loss": 0.088, + "step": 1356 + }, + { + "epoch": 3.59, + "learning_rate": 3.247989276139411e-05, + "loss": 0.0074, + "step": 1357 + }, + { + "epoch": 3.59, + "learning_rate": 3.2466487935656835e-05, + "loss": 0.0924, + "step": 1358 + }, + { + "epoch": 3.6, + "learning_rate": 3.245308310991958e-05, + "loss": 0.0171, + "step": 1359 + }, + { + "epoch": 3.6, + "learning_rate": 3.2439678284182305e-05, + "loss": 0.0132, + "step": 1360 + }, + { + "epoch": 3.6, + "learning_rate": 3.242627345844505e-05, + "loss": 0.0583, + "step": 1361 + }, + { + "epoch": 3.6, + "learning_rate": 3.2412868632707776e-05, + "loss": 0.0038, + "step": 1362 + }, + { + "epoch": 3.61, + "learning_rate": 3.239946380697051e-05, + "loss": 0.0846, + "step": 1363 + }, + { + "epoch": 3.61, + "learning_rate": 3.2386058981233246e-05, + "loss": 0.0058, + "step": 1364 + }, + { + "epoch": 3.61, + "learning_rate": 3.237265415549598e-05, + "loss": 0.4456, + "step": 1365 + }, + { + "epoch": 3.61, + "learning_rate": 3.2359249329758716e-05, + "loss": 0.0029, + "step": 1366 + }, + { + "epoch": 3.62, + "learning_rate": 3.234584450402145e-05, + "loss": 0.2553, + "step": 1367 + }, + { + "epoch": 3.62, + "learning_rate": 3.233243967828418e-05, + "loss": 0.0936, + "step": 1368 + }, + { + "epoch": 3.62, + "learning_rate": 3.231903485254692e-05, + "loss": 0.1017, + "step": 1369 + }, + { + "epoch": 3.62, + "learning_rate": 3.230563002680965e-05, + "loss": 0.0379, + "step": 1370 + }, + { + "epoch": 3.63, + "learning_rate": 3.229222520107239e-05, + "loss": 0.0069, + "step": 1371 + }, + { + "epoch": 3.63, + "learning_rate": 3.227882037533512e-05, + "loss": 0.3235, + "step": 1372 + }, + { + "epoch": 3.63, + "learning_rate": 3.2265415549597855e-05, + "loss": 0.3796, + "step": 1373 + }, + { + "epoch": 3.63, + "learning_rate": 3.225201072386059e-05, + "loss": 0.3246, + "step": 1374 + }, + { + "epoch": 3.64, + "learning_rate": 3.2238605898123325e-05, + "loss": 0.0059, + "step": 1375 + }, + { + "epoch": 3.64, + "learning_rate": 3.222520107238606e-05, + "loss": 0.0405, + "step": 1376 + }, + { + "epoch": 3.64, + "learning_rate": 3.2211796246648796e-05, + "loss": 0.0142, + "step": 1377 + }, + { + "epoch": 3.65, + "learning_rate": 3.219839142091153e-05, + "loss": 0.4426, + "step": 1378 + }, + { + "epoch": 3.65, + "learning_rate": 3.2184986595174266e-05, + "loss": 0.0249, + "step": 1379 + }, + { + "epoch": 3.65, + "learning_rate": 3.2171581769437e-05, + "loss": 0.1053, + "step": 1380 + }, + { + "epoch": 3.65, + "learning_rate": 3.2158176943699736e-05, + "loss": 0.0179, + "step": 1381 + }, + { + "epoch": 3.66, + "learning_rate": 3.214477211796247e-05, + "loss": 0.0718, + "step": 1382 + }, + { + "epoch": 3.66, + "learning_rate": 3.21313672922252e-05, + "loss": 0.1431, + "step": 1383 + }, + { + "epoch": 3.66, + "learning_rate": 3.211796246648794e-05, + "loss": 0.2391, + "step": 1384 + }, + { + "epoch": 3.66, + "learning_rate": 3.210455764075067e-05, + "loss": 0.0053, + "step": 1385 + }, + { + "epoch": 3.67, + "learning_rate": 3.209115281501341e-05, + "loss": 0.2935, + "step": 1386 + }, + { + "epoch": 3.67, + "learning_rate": 3.207774798927614e-05, + "loss": 0.0071, + "step": 1387 + }, + { + "epoch": 3.67, + "learning_rate": 3.2064343163538875e-05, + "loss": 0.031, + "step": 1388 + }, + { + "epoch": 3.67, + "learning_rate": 3.205093833780161e-05, + "loss": 0.1989, + "step": 1389 + }, + { + "epoch": 3.68, + "learning_rate": 3.2037533512064346e-05, + "loss": 0.0533, + "step": 1390 + }, + { + "epoch": 3.68, + "learning_rate": 3.202412868632708e-05, + "loss": 0.2408, + "step": 1391 + }, + { + "epoch": 3.68, + "learning_rate": 3.2010723860589816e-05, + "loss": 0.3158, + "step": 1392 + }, + { + "epoch": 3.69, + "learning_rate": 3.1997319034852544e-05, + "loss": 0.3629, + "step": 1393 + }, + { + "epoch": 3.69, + "learning_rate": 3.1983914209115286e-05, + "loss": 0.0122, + "step": 1394 + }, + { + "epoch": 3.69, + "learning_rate": 3.1970509383378014e-05, + "loss": 0.0449, + "step": 1395 + }, + { + "epoch": 3.69, + "learning_rate": 3.1957104557640756e-05, + "loss": 0.1273, + "step": 1396 + }, + { + "epoch": 3.7, + "learning_rate": 3.1943699731903485e-05, + "loss": 0.3401, + "step": 1397 + }, + { + "epoch": 3.7, + "learning_rate": 3.193029490616622e-05, + "loss": 0.0183, + "step": 1398 + }, + { + "epoch": 3.7, + "learning_rate": 3.1916890080428955e-05, + "loss": 0.0526, + "step": 1399 + }, + { + "epoch": 3.7, + "learning_rate": 3.190348525469169e-05, + "loss": 0.5037, + "step": 1400 + }, + { + "epoch": 3.71, + "learning_rate": 3.1890080428954425e-05, + "loss": 0.0059, + "step": 1401 + }, + { + "epoch": 3.71, + "learning_rate": 3.187667560321716e-05, + "loss": 0.0266, + "step": 1402 + }, + { + "epoch": 3.71, + "learning_rate": 3.1863270777479896e-05, + "loss": 0.4095, + "step": 1403 + }, + { + "epoch": 3.71, + "learning_rate": 3.184986595174263e-05, + "loss": 0.1802, + "step": 1404 + }, + { + "epoch": 3.72, + "learning_rate": 3.1836461126005366e-05, + "loss": 0.3586, + "step": 1405 + }, + { + "epoch": 3.72, + "learning_rate": 3.18230563002681e-05, + "loss": 0.2058, + "step": 1406 + }, + { + "epoch": 3.72, + "learning_rate": 3.1809651474530836e-05, + "loss": 0.008, + "step": 1407 + }, + { + "epoch": 3.72, + "learning_rate": 3.1796246648793564e-05, + "loss": 0.0282, + "step": 1408 + }, + { + "epoch": 3.73, + "learning_rate": 3.1782841823056306e-05, + "loss": 0.0077, + "step": 1409 + }, + { + "epoch": 3.73, + "learning_rate": 3.1769436997319035e-05, + "loss": 0.3461, + "step": 1410 + }, + { + "epoch": 3.73, + "learning_rate": 3.1756032171581777e-05, + "loss": 0.0038, + "step": 1411 + }, + { + "epoch": 3.74, + "learning_rate": 3.1742627345844505e-05, + "loss": 0.0087, + "step": 1412 + }, + { + "epoch": 3.74, + "learning_rate": 3.172922252010724e-05, + "loss": 0.8254, + "step": 1413 + }, + { + "epoch": 3.74, + "learning_rate": 3.1715817694369975e-05, + "loss": 0.017, + "step": 1414 + }, + { + "epoch": 3.74, + "learning_rate": 3.170241286863271e-05, + "loss": 0.2954, + "step": 1415 + }, + { + "epoch": 3.75, + "learning_rate": 3.1689008042895445e-05, + "loss": 0.0286, + "step": 1416 + }, + { + "epoch": 3.75, + "learning_rate": 3.167560321715818e-05, + "loss": 0.0454, + "step": 1417 + }, + { + "epoch": 3.75, + "learning_rate": 3.166219839142091e-05, + "loss": 0.222, + "step": 1418 + }, + { + "epoch": 3.75, + "learning_rate": 3.164879356568365e-05, + "loss": 0.0225, + "step": 1419 + }, + { + "epoch": 3.76, + "learning_rate": 3.163538873994638e-05, + "loss": 0.2599, + "step": 1420 + }, + { + "epoch": 3.76, + "learning_rate": 3.162198391420912e-05, + "loss": 0.2343, + "step": 1421 + }, + { + "epoch": 3.76, + "learning_rate": 3.160857908847185e-05, + "loss": 0.0274, + "step": 1422 + }, + { + "epoch": 3.76, + "learning_rate": 3.1595174262734585e-05, + "loss": 0.0109, + "step": 1423 + }, + { + "epoch": 3.77, + "learning_rate": 3.158176943699732e-05, + "loss": 0.012, + "step": 1424 + }, + { + "epoch": 3.77, + "learning_rate": 3.1568364611260055e-05, + "loss": 0.0267, + "step": 1425 + }, + { + "epoch": 3.77, + "learning_rate": 3.155495978552279e-05, + "loss": 0.0116, + "step": 1426 + }, + { + "epoch": 3.78, + "learning_rate": 3.1541554959785525e-05, + "loss": 0.2563, + "step": 1427 + }, + { + "epoch": 3.78, + "learning_rate": 3.1528150134048253e-05, + "loss": 0.2149, + "step": 1428 + }, + { + "epoch": 3.78, + "learning_rate": 3.1514745308310995e-05, + "loss": 0.2099, + "step": 1429 + }, + { + "epoch": 3.78, + "learning_rate": 3.1501340482573724e-05, + "loss": 0.1445, + "step": 1430 + }, + { + "epoch": 3.79, + "learning_rate": 3.1487935656836466e-05, + "loss": 0.0069, + "step": 1431 + }, + { + "epoch": 3.79, + "learning_rate": 3.1474530831099194e-05, + "loss": 0.3583, + "step": 1432 + }, + { + "epoch": 3.79, + "learning_rate": 3.146112600536193e-05, + "loss": 0.1112, + "step": 1433 + }, + { + "epoch": 3.79, + "learning_rate": 3.1447721179624664e-05, + "loss": 0.5379, + "step": 1434 + }, + { + "epoch": 3.8, + "learning_rate": 3.14343163538874e-05, + "loss": 0.0248, + "step": 1435 + }, + { + "epoch": 3.8, + "learning_rate": 3.1420911528150135e-05, + "loss": 0.0255, + "step": 1436 + }, + { + "epoch": 3.8, + "learning_rate": 3.140750670241287e-05, + "loss": 0.3363, + "step": 1437 + }, + { + "epoch": 3.8, + "learning_rate": 3.1394101876675605e-05, + "loss": 0.2952, + "step": 1438 + }, + { + "epoch": 3.81, + "learning_rate": 3.138069705093834e-05, + "loss": 0.0337, + "step": 1439 + }, + { + "epoch": 3.81, + "learning_rate": 3.1367292225201075e-05, + "loss": 0.0157, + "step": 1440 + }, + { + "epoch": 3.81, + "learning_rate": 3.135388739946381e-05, + "loss": 0.0204, + "step": 1441 + }, + { + "epoch": 3.81, + "learning_rate": 3.1340482573726545e-05, + "loss": 0.7707, + "step": 1442 + }, + { + "epoch": 3.82, + "learning_rate": 3.1327077747989274e-05, + "loss": 0.4232, + "step": 1443 + }, + { + "epoch": 3.82, + "learning_rate": 3.1313672922252016e-05, + "loss": 0.116, + "step": 1444 + }, + { + "epoch": 3.82, + "learning_rate": 3.1300268096514744e-05, + "loss": 0.421, + "step": 1445 + }, + { + "epoch": 3.83, + "learning_rate": 3.1286863270777486e-05, + "loss": 0.0267, + "step": 1446 + }, + { + "epoch": 3.83, + "learning_rate": 3.1273458445040214e-05, + "loss": 0.0078, + "step": 1447 + }, + { + "epoch": 3.83, + "learning_rate": 3.126005361930295e-05, + "loss": 0.0996, + "step": 1448 + }, + { + "epoch": 3.83, + "learning_rate": 3.1246648793565684e-05, + "loss": 0.0389, + "step": 1449 + }, + { + "epoch": 3.84, + "learning_rate": 3.123324396782842e-05, + "loss": 0.0482, + "step": 1450 + }, + { + "epoch": 3.84, + "learning_rate": 3.1219839142091155e-05, + "loss": 0.0053, + "step": 1451 + }, + { + "epoch": 3.84, + "learning_rate": 3.120643431635389e-05, + "loss": 0.0153, + "step": 1452 + }, + { + "epoch": 3.84, + "learning_rate": 3.119302949061662e-05, + "loss": 0.008, + "step": 1453 + }, + { + "epoch": 3.85, + "learning_rate": 3.117962466487936e-05, + "loss": 0.0166, + "step": 1454 + }, + { + "epoch": 3.85, + "learning_rate": 3.116621983914209e-05, + "loss": 0.0889, + "step": 1455 + }, + { + "epoch": 3.85, + "learning_rate": 3.115281501340483e-05, + "loss": 0.0695, + "step": 1456 + }, + { + "epoch": 3.85, + "learning_rate": 3.113941018766756e-05, + "loss": 0.3353, + "step": 1457 + }, + { + "epoch": 3.86, + "learning_rate": 3.1126005361930294e-05, + "loss": 0.0729, + "step": 1458 + }, + { + "epoch": 3.86, + "learning_rate": 3.111260053619303e-05, + "loss": 0.0187, + "step": 1459 + }, + { + "epoch": 3.86, + "learning_rate": 3.1099195710455764e-05, + "loss": 0.2512, + "step": 1460 + }, + { + "epoch": 3.87, + "learning_rate": 3.10857908847185e-05, + "loss": 0.3837, + "step": 1461 + }, + { + "epoch": 3.87, + "learning_rate": 3.1072386058981234e-05, + "loss": 0.2543, + "step": 1462 + }, + { + "epoch": 3.87, + "learning_rate": 3.105898123324397e-05, + "loss": 0.1797, + "step": 1463 + }, + { + "epoch": 3.87, + "learning_rate": 3.1045576407506705e-05, + "loss": 0.3097, + "step": 1464 + }, + { + "epoch": 3.88, + "learning_rate": 3.103217158176944e-05, + "loss": 0.268, + "step": 1465 + }, + { + "epoch": 3.88, + "learning_rate": 3.1018766756032175e-05, + "loss": 0.1773, + "step": 1466 + }, + { + "epoch": 3.88, + "learning_rate": 3.100536193029491e-05, + "loss": 0.2055, + "step": 1467 + }, + { + "epoch": 3.88, + "learning_rate": 3.099195710455764e-05, + "loss": 0.0279, + "step": 1468 + }, + { + "epoch": 3.89, + "learning_rate": 3.097855227882038e-05, + "loss": 0.1263, + "step": 1469 + }, + { + "epoch": 3.89, + "learning_rate": 3.096514745308311e-05, + "loss": 0.0449, + "step": 1470 + }, + { + "epoch": 3.89, + "learning_rate": 3.095174262734585e-05, + "loss": 0.2429, + "step": 1471 + }, + { + "epoch": 3.89, + "learning_rate": 3.093833780160858e-05, + "loss": 0.1245, + "step": 1472 + }, + { + "epoch": 3.9, + "learning_rate": 3.0924932975871314e-05, + "loss": 0.1303, + "step": 1473 + }, + { + "epoch": 3.9, + "learning_rate": 3.091152815013405e-05, + "loss": 0.0303, + "step": 1474 + }, + { + "epoch": 3.9, + "learning_rate": 3.0898123324396784e-05, + "loss": 0.3279, + "step": 1475 + }, + { + "epoch": 3.9, + "learning_rate": 3.088471849865952e-05, + "loss": 0.134, + "step": 1476 + }, + { + "epoch": 3.91, + "learning_rate": 3.0871313672922255e-05, + "loss": 0.5138, + "step": 1477 + }, + { + "epoch": 3.91, + "learning_rate": 3.085790884718498e-05, + "loss": 0.0476, + "step": 1478 + }, + { + "epoch": 3.91, + "learning_rate": 3.0844504021447725e-05, + "loss": 0.1956, + "step": 1479 + }, + { + "epoch": 3.92, + "learning_rate": 3.083109919571045e-05, + "loss": 0.2061, + "step": 1480 + }, + { + "epoch": 3.92, + "learning_rate": 3.0817694369973195e-05, + "loss": 0.269, + "step": 1481 + }, + { + "epoch": 3.92, + "learning_rate": 3.0804289544235923e-05, + "loss": 0.0708, + "step": 1482 + }, + { + "epoch": 3.92, + "learning_rate": 3.0790884718498665e-05, + "loss": 0.0389, + "step": 1483 + }, + { + "epoch": 3.93, + "learning_rate": 3.0777479892761394e-05, + "loss": 0.2566, + "step": 1484 + }, + { + "epoch": 3.93, + "learning_rate": 3.076407506702413e-05, + "loss": 0.0581, + "step": 1485 + }, + { + "epoch": 3.93, + "learning_rate": 3.0750670241286864e-05, + "loss": 0.1527, + "step": 1486 + }, + { + "epoch": 3.93, + "learning_rate": 3.07372654155496e-05, + "loss": 0.3963, + "step": 1487 + }, + { + "epoch": 3.94, + "learning_rate": 3.0723860589812334e-05, + "loss": 0.2241, + "step": 1488 + }, + { + "epoch": 3.94, + "learning_rate": 3.071045576407507e-05, + "loss": 0.1275, + "step": 1489 + }, + { + "epoch": 3.94, + "learning_rate": 3.0697050938337804e-05, + "loss": 0.3148, + "step": 1490 + }, + { + "epoch": 3.94, + "learning_rate": 3.068364611260054e-05, + "loss": 0.1474, + "step": 1491 + }, + { + "epoch": 3.95, + "learning_rate": 3.0670241286863275e-05, + "loss": 0.0233, + "step": 1492 + }, + { + "epoch": 3.95, + "learning_rate": 3.065683646112601e-05, + "loss": 0.1721, + "step": 1493 + }, + { + "epoch": 3.95, + "learning_rate": 3.0643431635388745e-05, + "loss": 0.6024, + "step": 1494 + }, + { + "epoch": 3.96, + "learning_rate": 3.063002680965147e-05, + "loss": 0.1425, + "step": 1495 + }, + { + "epoch": 3.96, + "learning_rate": 3.0616621983914215e-05, + "loss": 0.0311, + "step": 1496 + }, + { + "epoch": 3.96, + "learning_rate": 3.0603217158176944e-05, + "loss": 0.0197, + "step": 1497 + }, + { + "epoch": 3.96, + "learning_rate": 3.0589812332439686e-05, + "loss": 0.0406, + "step": 1498 + }, + { + "epoch": 3.97, + "learning_rate": 3.0576407506702414e-05, + "loss": 0.054, + "step": 1499 + }, + { + "epoch": 3.97, + "learning_rate": 3.056300268096515e-05, + "loss": 0.161, + "step": 1500 + }, + { + "epoch": 3.97, + "learning_rate": 3.0549597855227884e-05, + "loss": 0.0549, + "step": 1501 + }, + { + "epoch": 3.97, + "learning_rate": 3.053619302949062e-05, + "loss": 0.1667, + "step": 1502 + }, + { + "epoch": 3.98, + "learning_rate": 3.0522788203753354e-05, + "loss": 0.1264, + "step": 1503 + }, + { + "epoch": 3.98, + "learning_rate": 3.0509383378016086e-05, + "loss": 0.0133, + "step": 1504 + }, + { + "epoch": 3.98, + "learning_rate": 3.049597855227882e-05, + "loss": 0.0655, + "step": 1505 + }, + { + "epoch": 3.98, + "learning_rate": 3.0482573726541556e-05, + "loss": 0.1054, + "step": 1506 + }, + { + "epoch": 3.99, + "learning_rate": 3.046916890080429e-05, + "loss": 0.0053, + "step": 1507 + }, + { + "epoch": 3.99, + "learning_rate": 3.0455764075067027e-05, + "loss": 0.0347, + "step": 1508 + }, + { + "epoch": 3.99, + "learning_rate": 3.0442359249329762e-05, + "loss": 0.6095, + "step": 1509 + }, + { + "epoch": 3.99, + "learning_rate": 3.0428954423592494e-05, + "loss": 0.1339, + "step": 1510 + }, + { + "epoch": 4.0, + "learning_rate": 3.0415549597855232e-05, + "loss": 0.0088, + "step": 1511 + }, + { + "epoch": 4.0, + "learning_rate": 3.0402144772117964e-05, + "loss": 0.4356, + "step": 1512 + }, + { + "epoch": 4.0, + "eval_f1": 0.7822580645161291, + "eval_loss": 0.6966613531112671, + "eval_runtime": 1.8703, + "eval_samples_per_second": 808.957, + "eval_steps_per_second": 50.794, + "step": 1512 + }, + { + "epoch": 4.0, + "learning_rate": 3.0388739946380702e-05, + "loss": 0.003, + "step": 1513 + }, + { + "epoch": 4.01, + "learning_rate": 3.0375335120643434e-05, + "loss": 0.0067, + "step": 1514 + }, + { + "epoch": 4.01, + "learning_rate": 3.0361930294906166e-05, + "loss": 0.0488, + "step": 1515 + }, + { + "epoch": 4.01, + "learning_rate": 3.0348525469168904e-05, + "loss": 0.0106, + "step": 1516 + }, + { + "epoch": 4.01, + "learning_rate": 3.0335120643431636e-05, + "loss": 0.0098, + "step": 1517 + }, + { + "epoch": 4.02, + "learning_rate": 3.0321715817694375e-05, + "loss": 0.274, + "step": 1518 + }, + { + "epoch": 4.02, + "learning_rate": 3.0308310991957106e-05, + "loss": 0.2007, + "step": 1519 + }, + { + "epoch": 4.02, + "learning_rate": 3.0294906166219838e-05, + "loss": 0.0121, + "step": 1520 + }, + { + "epoch": 4.02, + "learning_rate": 3.0281501340482577e-05, + "loss": 0.0632, + "step": 1521 + }, + { + "epoch": 4.03, + "learning_rate": 3.026809651474531e-05, + "loss": 0.0062, + "step": 1522 + }, + { + "epoch": 4.03, + "learning_rate": 3.0254691689008047e-05, + "loss": 0.0123, + "step": 1523 + }, + { + "epoch": 4.03, + "learning_rate": 3.024128686327078e-05, + "loss": 0.0063, + "step": 1524 + }, + { + "epoch": 4.03, + "learning_rate": 3.022788203753351e-05, + "loss": 0.0102, + "step": 1525 + }, + { + "epoch": 4.04, + "learning_rate": 3.021447721179625e-05, + "loss": 0.0082, + "step": 1526 + }, + { + "epoch": 4.04, + "learning_rate": 3.020107238605898e-05, + "loss": 0.3369, + "step": 1527 + }, + { + "epoch": 4.04, + "learning_rate": 3.018766756032172e-05, + "loss": 0.2587, + "step": 1528 + }, + { + "epoch": 4.04, + "learning_rate": 3.017426273458445e-05, + "loss": 0.0067, + "step": 1529 + }, + { + "epoch": 4.05, + "learning_rate": 3.0160857908847186e-05, + "loss": 0.0021, + "step": 1530 + }, + { + "epoch": 4.05, + "learning_rate": 3.014745308310992e-05, + "loss": 0.0724, + "step": 1531 + }, + { + "epoch": 4.05, + "learning_rate": 3.0134048257372656e-05, + "loss": 0.0074, + "step": 1532 + }, + { + "epoch": 4.06, + "learning_rate": 3.012064343163539e-05, + "loss": 0.0202, + "step": 1533 + }, + { + "epoch": 4.06, + "learning_rate": 3.0107238605898126e-05, + "loss": 0.1435, + "step": 1534 + }, + { + "epoch": 4.06, + "learning_rate": 3.0093833780160858e-05, + "loss": 0.0074, + "step": 1535 + }, + { + "epoch": 4.06, + "learning_rate": 3.0080428954423597e-05, + "loss": 0.4145, + "step": 1536 + }, + { + "epoch": 4.07, + "learning_rate": 3.006702412868633e-05, + "loss": 0.0186, + "step": 1537 + }, + { + "epoch": 4.07, + "learning_rate": 3.0053619302949067e-05, + "loss": 0.1648, + "step": 1538 + }, + { + "epoch": 4.07, + "learning_rate": 3.00402144772118e-05, + "loss": 0.2545, + "step": 1539 + }, + { + "epoch": 4.07, + "learning_rate": 3.002680965147453e-05, + "loss": 0.0016, + "step": 1540 + }, + { + "epoch": 4.08, + "learning_rate": 3.001340482573727e-05, + "loss": 0.0184, + "step": 1541 + }, + { + "epoch": 4.08, + "learning_rate": 3e-05, + "loss": 0.1208, + "step": 1542 + }, + { + "epoch": 4.08, + "learning_rate": 2.998659517426274e-05, + "loss": 0.0021, + "step": 1543 + }, + { + "epoch": 4.08, + "learning_rate": 2.997319034852547e-05, + "loss": 0.0092, + "step": 1544 + }, + { + "epoch": 4.09, + "learning_rate": 2.9959785522788203e-05, + "loss": 0.1514, + "step": 1545 + }, + { + "epoch": 4.09, + "learning_rate": 2.994638069705094e-05, + "loss": 0.0773, + "step": 1546 + }, + { + "epoch": 4.09, + "learning_rate": 2.9932975871313673e-05, + "loss": 0.0093, + "step": 1547 + }, + { + "epoch": 4.1, + "learning_rate": 2.991957104557641e-05, + "loss": 0.0022, + "step": 1548 + }, + { + "epoch": 4.1, + "learning_rate": 2.9906166219839143e-05, + "loss": 0.1765, + "step": 1549 + }, + { + "epoch": 4.1, + "learning_rate": 2.9892761394101875e-05, + "loss": 0.1766, + "step": 1550 + }, + { + "epoch": 4.1, + "learning_rate": 2.9879356568364614e-05, + "loss": 0.0024, + "step": 1551 + }, + { + "epoch": 4.11, + "learning_rate": 2.9865951742627345e-05, + "loss": 0.012, + "step": 1552 + }, + { + "epoch": 4.11, + "learning_rate": 2.9852546916890084e-05, + "loss": 0.0055, + "step": 1553 + }, + { + "epoch": 4.11, + "learning_rate": 2.9839142091152816e-05, + "loss": 0.0088, + "step": 1554 + }, + { + "epoch": 4.11, + "learning_rate": 2.9825737265415547e-05, + "loss": 0.0019, + "step": 1555 + }, + { + "epoch": 4.12, + "learning_rate": 2.9812332439678286e-05, + "loss": 0.0186, + "step": 1556 + }, + { + "epoch": 4.12, + "learning_rate": 2.9798927613941018e-05, + "loss": 0.25, + "step": 1557 + }, + { + "epoch": 4.12, + "learning_rate": 2.9785522788203756e-05, + "loss": 0.0129, + "step": 1558 + }, + { + "epoch": 4.12, + "learning_rate": 2.9772117962466488e-05, + "loss": 0.0048, + "step": 1559 + }, + { + "epoch": 4.13, + "learning_rate": 2.9758713136729223e-05, + "loss": 0.1153, + "step": 1560 + }, + { + "epoch": 4.13, + "learning_rate": 2.9745308310991958e-05, + "loss": 0.1871, + "step": 1561 + }, + { + "epoch": 4.13, + "learning_rate": 2.9731903485254693e-05, + "loss": 0.0087, + "step": 1562 + }, + { + "epoch": 4.13, + "learning_rate": 2.971849865951743e-05, + "loss": 0.0048, + "step": 1563 + }, + { + "epoch": 4.14, + "learning_rate": 2.9705093833780163e-05, + "loss": 0.026, + "step": 1564 + }, + { + "epoch": 4.14, + "learning_rate": 2.9691689008042895e-05, + "loss": 0.3336, + "step": 1565 + }, + { + "epoch": 4.14, + "learning_rate": 2.9678284182305634e-05, + "loss": 0.0015, + "step": 1566 + }, + { + "epoch": 4.15, + "learning_rate": 2.9664879356568365e-05, + "loss": 0.0044, + "step": 1567 + }, + { + "epoch": 4.15, + "learning_rate": 2.9651474530831104e-05, + "loss": 0.0035, + "step": 1568 + }, + { + "epoch": 4.15, + "learning_rate": 2.9638069705093836e-05, + "loss": 0.1206, + "step": 1569 + }, + { + "epoch": 4.15, + "learning_rate": 2.9624664879356567e-05, + "loss": 0.1247, + "step": 1570 + }, + { + "epoch": 4.16, + "learning_rate": 2.9611260053619306e-05, + "loss": 0.0011, + "step": 1571 + }, + { + "epoch": 4.16, + "learning_rate": 2.9597855227882038e-05, + "loss": 0.0023, + "step": 1572 + }, + { + "epoch": 4.16, + "learning_rate": 2.9584450402144776e-05, + "loss": 0.0014, + "step": 1573 + }, + { + "epoch": 4.16, + "learning_rate": 2.9571045576407508e-05, + "loss": 0.2967, + "step": 1574 + }, + { + "epoch": 4.17, + "learning_rate": 2.955764075067024e-05, + "loss": 0.0373, + "step": 1575 + }, + { + "epoch": 4.17, + "learning_rate": 2.9544235924932978e-05, + "loss": 0.3351, + "step": 1576 + }, + { + "epoch": 4.17, + "learning_rate": 2.953083109919571e-05, + "loss": 0.0025, + "step": 1577 + }, + { + "epoch": 4.17, + "learning_rate": 2.951742627345845e-05, + "loss": 0.0025, + "step": 1578 + }, + { + "epoch": 4.18, + "learning_rate": 2.950402144772118e-05, + "loss": 0.0182, + "step": 1579 + }, + { + "epoch": 4.18, + "learning_rate": 2.9490616621983912e-05, + "loss": 0.001, + "step": 1580 + }, + { + "epoch": 4.18, + "learning_rate": 2.947721179624665e-05, + "loss": 0.003, + "step": 1581 + }, + { + "epoch": 4.19, + "learning_rate": 2.9463806970509382e-05, + "loss": 0.0038, + "step": 1582 + }, + { + "epoch": 4.19, + "learning_rate": 2.945040214477212e-05, + "loss": 0.002, + "step": 1583 + }, + { + "epoch": 4.19, + "learning_rate": 2.9436997319034853e-05, + "loss": 0.1688, + "step": 1584 + }, + { + "epoch": 4.19, + "learning_rate": 2.9423592493297584e-05, + "loss": 0.0014, + "step": 1585 + }, + { + "epoch": 4.2, + "learning_rate": 2.9410187667560323e-05, + "loss": 0.2664, + "step": 1586 + }, + { + "epoch": 4.2, + "learning_rate": 2.9396782841823055e-05, + "loss": 0.0012, + "step": 1587 + }, + { + "epoch": 4.2, + "learning_rate": 2.9383378016085793e-05, + "loss": 0.0022, + "step": 1588 + }, + { + "epoch": 4.2, + "learning_rate": 2.9369973190348525e-05, + "loss": 0.0959, + "step": 1589 + }, + { + "epoch": 4.21, + "learning_rate": 2.935656836461126e-05, + "loss": 0.0839, + "step": 1590 + }, + { + "epoch": 4.21, + "learning_rate": 2.9343163538873995e-05, + "loss": 0.7405, + "step": 1591 + }, + { + "epoch": 4.21, + "learning_rate": 2.932975871313673e-05, + "loss": 0.0351, + "step": 1592 + }, + { + "epoch": 4.21, + "learning_rate": 2.9316353887399465e-05, + "loss": 0.0025, + "step": 1593 + }, + { + "epoch": 4.22, + "learning_rate": 2.93029490616622e-05, + "loss": 0.0054, + "step": 1594 + }, + { + "epoch": 4.22, + "learning_rate": 2.9289544235924932e-05, + "loss": 0.0043, + "step": 1595 + }, + { + "epoch": 4.22, + "learning_rate": 2.927613941018767e-05, + "loss": 0.1828, + "step": 1596 + }, + { + "epoch": 4.22, + "learning_rate": 2.9262734584450402e-05, + "loss": 0.0022, + "step": 1597 + }, + { + "epoch": 4.23, + "learning_rate": 2.924932975871314e-05, + "loss": 0.0051, + "step": 1598 + }, + { + "epoch": 4.23, + "learning_rate": 2.9235924932975873e-05, + "loss": 0.0025, + "step": 1599 + }, + { + "epoch": 4.23, + "learning_rate": 2.9222520107238604e-05, + "loss": 0.0018, + "step": 1600 + }, + { + "epoch": 4.24, + "learning_rate": 2.9209115281501343e-05, + "loss": 0.0348, + "step": 1601 + }, + { + "epoch": 4.24, + "learning_rate": 2.9195710455764075e-05, + "loss": 0.207, + "step": 1602 + }, + { + "epoch": 4.24, + "learning_rate": 2.9182305630026813e-05, + "loss": 0.0249, + "step": 1603 + }, + { + "epoch": 4.24, + "learning_rate": 2.9168900804289545e-05, + "loss": 0.0028, + "step": 1604 + }, + { + "epoch": 4.25, + "learning_rate": 2.9155495978552283e-05, + "loss": 0.2604, + "step": 1605 + }, + { + "epoch": 4.25, + "learning_rate": 2.9142091152815015e-05, + "loss": 0.2808, + "step": 1606 + }, + { + "epoch": 4.25, + "learning_rate": 2.9128686327077747e-05, + "loss": 0.0289, + "step": 1607 + }, + { + "epoch": 4.25, + "learning_rate": 2.9115281501340486e-05, + "loss": 0.005, + "step": 1608 + }, + { + "epoch": 4.26, + "learning_rate": 2.9101876675603217e-05, + "loss": 0.7931, + "step": 1609 + }, + { + "epoch": 4.26, + "learning_rate": 2.9088471849865956e-05, + "loss": 0.335, + "step": 1610 + }, + { + "epoch": 4.26, + "learning_rate": 2.9075067024128688e-05, + "loss": 0.2779, + "step": 1611 + }, + { + "epoch": 4.26, + "learning_rate": 2.906166219839142e-05, + "loss": 0.1649, + "step": 1612 + }, + { + "epoch": 4.27, + "learning_rate": 2.9048257372654158e-05, + "loss": 0.0081, + "step": 1613 + }, + { + "epoch": 4.27, + "learning_rate": 2.903485254691689e-05, + "loss": 0.0638, + "step": 1614 + }, + { + "epoch": 4.27, + "learning_rate": 2.9021447721179628e-05, + "loss": 0.016, + "step": 1615 + }, + { + "epoch": 4.28, + "learning_rate": 2.900804289544236e-05, + "loss": 0.0025, + "step": 1616 + }, + { + "epoch": 4.28, + "learning_rate": 2.8994638069705095e-05, + "loss": 0.0249, + "step": 1617 + }, + { + "epoch": 4.28, + "learning_rate": 2.898123324396783e-05, + "loss": 0.0291, + "step": 1618 + }, + { + "epoch": 4.28, + "learning_rate": 2.8967828418230565e-05, + "loss": 0.1773, + "step": 1619 + }, + { + "epoch": 4.29, + "learning_rate": 2.89544235924933e-05, + "loss": 0.3452, + "step": 1620 + }, + { + "epoch": 4.29, + "learning_rate": 2.8941018766756035e-05, + "loss": 0.006, + "step": 1621 + }, + { + "epoch": 4.29, + "learning_rate": 2.8927613941018767e-05, + "loss": 0.0054, + "step": 1622 + }, + { + "epoch": 4.29, + "learning_rate": 2.8914209115281506e-05, + "loss": 0.1852, + "step": 1623 + }, + { + "epoch": 4.3, + "learning_rate": 2.8900804289544237e-05, + "loss": 0.4424, + "step": 1624 + }, + { + "epoch": 4.3, + "learning_rate": 2.8887399463806976e-05, + "loss": 0.0063, + "step": 1625 + }, + { + "epoch": 4.3, + "learning_rate": 2.8873994638069708e-05, + "loss": 0.43, + "step": 1626 + }, + { + "epoch": 4.3, + "learning_rate": 2.886058981233244e-05, + "loss": 0.2283, + "step": 1627 + }, + { + "epoch": 4.31, + "learning_rate": 2.8847184986595178e-05, + "loss": 0.0519, + "step": 1628 + }, + { + "epoch": 4.31, + "learning_rate": 2.883378016085791e-05, + "loss": 0.1797, + "step": 1629 + }, + { + "epoch": 4.31, + "learning_rate": 2.8820375335120648e-05, + "loss": 0.2569, + "step": 1630 + }, + { + "epoch": 4.31, + "learning_rate": 2.880697050938338e-05, + "loss": 0.0024, + "step": 1631 + }, + { + "epoch": 4.32, + "learning_rate": 2.8793565683646112e-05, + "loss": 0.1727, + "step": 1632 + }, + { + "epoch": 4.32, + "learning_rate": 2.878016085790885e-05, + "loss": 0.0091, + "step": 1633 + }, + { + "epoch": 4.32, + "learning_rate": 2.8766756032171582e-05, + "loss": 0.2002, + "step": 1634 + }, + { + "epoch": 4.33, + "learning_rate": 2.875335120643432e-05, + "loss": 0.0217, + "step": 1635 + }, + { + "epoch": 4.33, + "learning_rate": 2.8739946380697052e-05, + "loss": 0.2163, + "step": 1636 + }, + { + "epoch": 4.33, + "learning_rate": 2.8726541554959784e-05, + "loss": 0.0065, + "step": 1637 + }, + { + "epoch": 4.33, + "learning_rate": 2.8713136729222522e-05, + "loss": 0.1567, + "step": 1638 + }, + { + "epoch": 4.34, + "learning_rate": 2.8699731903485254e-05, + "loss": 0.1775, + "step": 1639 + }, + { + "epoch": 4.34, + "learning_rate": 2.8686327077747993e-05, + "loss": 0.0116, + "step": 1640 + }, + { + "epoch": 4.34, + "learning_rate": 2.8672922252010724e-05, + "loss": 0.0114, + "step": 1641 + }, + { + "epoch": 4.34, + "learning_rate": 2.8659517426273456e-05, + "loss": 0.0264, + "step": 1642 + }, + { + "epoch": 4.35, + "learning_rate": 2.8646112600536195e-05, + "loss": 0.0172, + "step": 1643 + }, + { + "epoch": 4.35, + "learning_rate": 2.8632707774798926e-05, + "loss": 0.187, + "step": 1644 + }, + { + "epoch": 4.35, + "learning_rate": 2.8619302949061665e-05, + "loss": 0.009, + "step": 1645 + }, + { + "epoch": 4.35, + "learning_rate": 2.8605898123324397e-05, + "loss": 0.014, + "step": 1646 + }, + { + "epoch": 4.36, + "learning_rate": 2.8592493297587132e-05, + "loss": 0.1643, + "step": 1647 + }, + { + "epoch": 4.36, + "learning_rate": 2.8579088471849867e-05, + "loss": 0.2763, + "step": 1648 + }, + { + "epoch": 4.36, + "learning_rate": 2.8565683646112602e-05, + "loss": 0.0641, + "step": 1649 + }, + { + "epoch": 4.37, + "learning_rate": 2.8552278820375337e-05, + "loss": 0.6128, + "step": 1650 + }, + { + "epoch": 4.37, + "learning_rate": 2.8538873994638072e-05, + "loss": 0.0229, + "step": 1651 + }, + { + "epoch": 4.37, + "learning_rate": 2.8525469168900804e-05, + "loss": 0.0344, + "step": 1652 + }, + { + "epoch": 4.37, + "learning_rate": 2.8512064343163543e-05, + "loss": 0.018, + "step": 1653 + }, + { + "epoch": 4.38, + "learning_rate": 2.8498659517426274e-05, + "loss": 0.191, + "step": 1654 + }, + { + "epoch": 4.38, + "learning_rate": 2.8485254691689013e-05, + "loss": 0.0397, + "step": 1655 + }, + { + "epoch": 4.38, + "learning_rate": 2.8471849865951745e-05, + "loss": 0.0029, + "step": 1656 + }, + { + "epoch": 4.38, + "learning_rate": 2.8458445040214476e-05, + "loss": 0.0034, + "step": 1657 + }, + { + "epoch": 4.39, + "learning_rate": 2.8445040214477215e-05, + "loss": 0.0031, + "step": 1658 + }, + { + "epoch": 4.39, + "learning_rate": 2.8431635388739947e-05, + "loss": 0.4272, + "step": 1659 + }, + { + "epoch": 4.39, + "learning_rate": 2.8418230563002685e-05, + "loss": 0.0042, + "step": 1660 + }, + { + "epoch": 4.39, + "learning_rate": 2.8404825737265417e-05, + "loss": 0.0224, + "step": 1661 + }, + { + "epoch": 4.4, + "learning_rate": 2.839142091152815e-05, + "loss": 0.1021, + "step": 1662 + }, + { + "epoch": 4.4, + "learning_rate": 2.8378016085790887e-05, + "loss": 0.0076, + "step": 1663 + }, + { + "epoch": 4.4, + "learning_rate": 2.836461126005362e-05, + "loss": 0.084, + "step": 1664 + }, + { + "epoch": 4.4, + "learning_rate": 2.8351206434316357e-05, + "loss": 0.0321, + "step": 1665 + }, + { + "epoch": 4.41, + "learning_rate": 2.833780160857909e-05, + "loss": 0.1369, + "step": 1666 + }, + { + "epoch": 4.41, + "learning_rate": 2.832439678284182e-05, + "loss": 0.018, + "step": 1667 + }, + { + "epoch": 4.41, + "learning_rate": 2.831099195710456e-05, + "loss": 0.1886, + "step": 1668 + }, + { + "epoch": 4.42, + "learning_rate": 2.829758713136729e-05, + "loss": 0.0016, + "step": 1669 + }, + { + "epoch": 4.42, + "learning_rate": 2.828418230563003e-05, + "loss": 0.0031, + "step": 1670 + }, + { + "epoch": 4.42, + "learning_rate": 2.827077747989276e-05, + "loss": 0.0043, + "step": 1671 + }, + { + "epoch": 4.42, + "learning_rate": 2.8257372654155497e-05, + "loss": 0.1202, + "step": 1672 + }, + { + "epoch": 4.43, + "learning_rate": 2.8243967828418232e-05, + "loss": 0.1409, + "step": 1673 + }, + { + "epoch": 4.43, + "learning_rate": 2.8230563002680967e-05, + "loss": 0.0821, + "step": 1674 + }, + { + "epoch": 4.43, + "learning_rate": 2.8217158176943702e-05, + "loss": 0.0468, + "step": 1675 + }, + { + "epoch": 4.43, + "learning_rate": 2.8203753351206437e-05, + "loss": 0.0559, + "step": 1676 + }, + { + "epoch": 4.44, + "learning_rate": 2.819034852546917e-05, + "loss": 0.0192, + "step": 1677 + }, + { + "epoch": 4.44, + "learning_rate": 2.8176943699731907e-05, + "loss": 0.0024, + "step": 1678 + }, + { + "epoch": 4.44, + "learning_rate": 2.816353887399464e-05, + "loss": 0.0021, + "step": 1679 + }, + { + "epoch": 4.44, + "learning_rate": 2.8150134048257378e-05, + "loss": 0.0139, + "step": 1680 + }, + { + "epoch": 4.45, + "learning_rate": 2.813672922252011e-05, + "loss": 0.0042, + "step": 1681 + }, + { + "epoch": 4.45, + "learning_rate": 2.812332439678284e-05, + "loss": 0.1666, + "step": 1682 + }, + { + "epoch": 4.45, + "learning_rate": 2.810991957104558e-05, + "loss": 0.5925, + "step": 1683 + }, + { + "epoch": 4.46, + "learning_rate": 2.809651474530831e-05, + "loss": 0.1689, + "step": 1684 + }, + { + "epoch": 4.46, + "learning_rate": 2.808310991957105e-05, + "loss": 0.0053, + "step": 1685 + }, + { + "epoch": 4.46, + "learning_rate": 2.806970509383378e-05, + "loss": 0.0019, + "step": 1686 + }, + { + "epoch": 4.46, + "learning_rate": 2.8056300268096513e-05, + "loss": 0.0632, + "step": 1687 + }, + { + "epoch": 4.47, + "learning_rate": 2.8042895442359252e-05, + "loss": 0.0115, + "step": 1688 + }, + { + "epoch": 4.47, + "learning_rate": 2.8029490616621984e-05, + "loss": 0.002, + "step": 1689 + }, + { + "epoch": 4.47, + "learning_rate": 2.8016085790884722e-05, + "loss": 0.0021, + "step": 1690 + }, + { + "epoch": 4.47, + "learning_rate": 2.8002680965147454e-05, + "loss": 0.0079, + "step": 1691 + }, + { + "epoch": 4.48, + "learning_rate": 2.7989276139410186e-05, + "loss": 0.0016, + "step": 1692 + }, + { + "epoch": 4.48, + "learning_rate": 2.7975871313672924e-05, + "loss": 0.1824, + "step": 1693 + }, + { + "epoch": 4.48, + "learning_rate": 2.7962466487935656e-05, + "loss": 0.1025, + "step": 1694 + }, + { + "epoch": 4.48, + "learning_rate": 2.7949061662198394e-05, + "loss": 0.4274, + "step": 1695 + }, + { + "epoch": 4.49, + "learning_rate": 2.7935656836461126e-05, + "loss": 0.0834, + "step": 1696 + }, + { + "epoch": 4.49, + "learning_rate": 2.7922252010723858e-05, + "loss": 0.6412, + "step": 1697 + }, + { + "epoch": 4.49, + "learning_rate": 2.7908847184986596e-05, + "loss": 0.3051, + "step": 1698 + }, + { + "epoch": 4.49, + "learning_rate": 2.7895442359249328e-05, + "loss": 0.0909, + "step": 1699 + }, + { + "epoch": 4.5, + "learning_rate": 2.7882037533512067e-05, + "loss": 0.2655, + "step": 1700 + }, + { + "epoch": 4.5, + "learning_rate": 2.78686327077748e-05, + "loss": 0.305, + "step": 1701 + }, + { + "epoch": 4.5, + "learning_rate": 2.7855227882037534e-05, + "loss": 0.2733, + "step": 1702 + }, + { + "epoch": 4.51, + "learning_rate": 2.784182305630027e-05, + "loss": 0.0021, + "step": 1703 + }, + { + "epoch": 4.51, + "learning_rate": 2.7828418230563004e-05, + "loss": 0.0072, + "step": 1704 + }, + { + "epoch": 4.51, + "learning_rate": 2.781501340482574e-05, + "loss": 0.0027, + "step": 1705 + }, + { + "epoch": 4.51, + "learning_rate": 2.7801608579088474e-05, + "loss": 0.184, + "step": 1706 + }, + { + "epoch": 4.52, + "learning_rate": 2.7788203753351206e-05, + "loss": 0.0143, + "step": 1707 + }, + { + "epoch": 4.52, + "learning_rate": 2.7774798927613944e-05, + "loss": 0.0297, + "step": 1708 + }, + { + "epoch": 4.52, + "learning_rate": 2.7761394101876676e-05, + "loss": 0.0739, + "step": 1709 + }, + { + "epoch": 4.52, + "learning_rate": 2.7747989276139415e-05, + "loss": 0.0188, + "step": 1710 + }, + { + "epoch": 4.53, + "learning_rate": 2.7734584450402146e-05, + "loss": 0.2487, + "step": 1711 + }, + { + "epoch": 4.53, + "learning_rate": 2.7721179624664878e-05, + "loss": 0.0222, + "step": 1712 + }, + { + "epoch": 4.53, + "learning_rate": 2.7707774798927617e-05, + "loss": 0.0041, + "step": 1713 + }, + { + "epoch": 4.53, + "learning_rate": 2.769436997319035e-05, + "loss": 0.0164, + "step": 1714 + }, + { + "epoch": 4.54, + "learning_rate": 2.7680965147453087e-05, + "loss": 0.0985, + "step": 1715 + }, + { + "epoch": 4.54, + "learning_rate": 2.766756032171582e-05, + "loss": 0.0067, + "step": 1716 + }, + { + "epoch": 4.54, + "learning_rate": 2.765415549597855e-05, + "loss": 0.3304, + "step": 1717 + }, + { + "epoch": 4.54, + "learning_rate": 2.764075067024129e-05, + "loss": 0.006, + "step": 1718 + }, + { + "epoch": 4.55, + "learning_rate": 2.762734584450402e-05, + "loss": 0.0142, + "step": 1719 + }, + { + "epoch": 4.55, + "learning_rate": 2.761394101876676e-05, + "loss": 0.2205, + "step": 1720 + }, + { + "epoch": 4.55, + "learning_rate": 2.760053619302949e-05, + "loss": 0.298, + "step": 1721 + }, + { + "epoch": 4.56, + "learning_rate": 2.7587131367292223e-05, + "loss": 0.0041, + "step": 1722 + }, + { + "epoch": 4.56, + "learning_rate": 2.757372654155496e-05, + "loss": 0.0018, + "step": 1723 + }, + { + "epoch": 4.56, + "learning_rate": 2.7560321715817693e-05, + "loss": 0.0185, + "step": 1724 + }, + { + "epoch": 4.56, + "learning_rate": 2.754691689008043e-05, + "loss": 0.0042, + "step": 1725 + }, + { + "epoch": 4.57, + "learning_rate": 2.7533512064343163e-05, + "loss": 0.036, + "step": 1726 + }, + { + "epoch": 4.57, + "learning_rate": 2.7520107238605898e-05, + "loss": 0.2593, + "step": 1727 + }, + { + "epoch": 4.57, + "learning_rate": 2.7506702412868633e-05, + "loss": 0.0062, + "step": 1728 + }, + { + "epoch": 4.57, + "learning_rate": 2.749329758713137e-05, + "loss": 0.1759, + "step": 1729 + }, + { + "epoch": 4.58, + "learning_rate": 2.7479892761394104e-05, + "loss": 0.0202, + "step": 1730 + }, + { + "epoch": 4.58, + "learning_rate": 2.746648793565684e-05, + "loss": 0.2156, + "step": 1731 + }, + { + "epoch": 4.58, + "learning_rate": 2.7453083109919574e-05, + "loss": 0.4112, + "step": 1732 + }, + { + "epoch": 4.58, + "learning_rate": 2.743967828418231e-05, + "loss": 0.0037, + "step": 1733 + }, + { + "epoch": 4.59, + "learning_rate": 2.742627345844504e-05, + "loss": 0.0186, + "step": 1734 + }, + { + "epoch": 4.59, + "learning_rate": 2.741286863270778e-05, + "loss": 0.0117, + "step": 1735 + }, + { + "epoch": 4.59, + "learning_rate": 2.739946380697051e-05, + "loss": 0.0039, + "step": 1736 + }, + { + "epoch": 4.6, + "learning_rate": 2.738605898123325e-05, + "loss": 0.1185, + "step": 1737 + }, + { + "epoch": 4.6, + "learning_rate": 2.737265415549598e-05, + "loss": 0.0276, + "step": 1738 + }, + { + "epoch": 4.6, + "learning_rate": 2.7359249329758713e-05, + "loss": 0.0041, + "step": 1739 + }, + { + "epoch": 4.6, + "learning_rate": 2.734584450402145e-05, + "loss": 0.0133, + "step": 1740 + }, + { + "epoch": 4.61, + "learning_rate": 2.7332439678284183e-05, + "loss": 0.1042, + "step": 1741 + }, + { + "epoch": 4.61, + "learning_rate": 2.7319034852546922e-05, + "loss": 0.0023, + "step": 1742 + }, + { + "epoch": 4.61, + "learning_rate": 2.7305630026809654e-05, + "loss": 0.1586, + "step": 1743 + }, + { + "epoch": 4.61, + "learning_rate": 2.7292225201072385e-05, + "loss": 0.0258, + "step": 1744 + }, + { + "epoch": 4.62, + "learning_rate": 2.7278820375335124e-05, + "loss": 0.1119, + "step": 1745 + }, + { + "epoch": 4.62, + "learning_rate": 2.7265415549597856e-05, + "loss": 0.1115, + "step": 1746 + }, + { + "epoch": 4.62, + "learning_rate": 2.7252010723860594e-05, + "loss": 0.4607, + "step": 1747 + }, + { + "epoch": 4.62, + "learning_rate": 2.7238605898123326e-05, + "loss": 0.0296, + "step": 1748 + }, + { + "epoch": 4.63, + "learning_rate": 2.7225201072386058e-05, + "loss": 0.0277, + "step": 1749 + }, + { + "epoch": 4.63, + "learning_rate": 2.7211796246648796e-05, + "loss": 0.0777, + "step": 1750 + }, + { + "epoch": 4.63, + "learning_rate": 2.7198391420911528e-05, + "loss": 0.0031, + "step": 1751 + }, + { + "epoch": 4.63, + "learning_rate": 2.7184986595174266e-05, + "loss": 0.2238, + "step": 1752 + }, + { + "epoch": 4.64, + "learning_rate": 2.7171581769436998e-05, + "loss": 0.0409, + "step": 1753 + }, + { + "epoch": 4.64, + "learning_rate": 2.715817694369973e-05, + "loss": 0.0032, + "step": 1754 + }, + { + "epoch": 4.64, + "learning_rate": 2.714477211796247e-05, + "loss": 0.0113, + "step": 1755 + }, + { + "epoch": 4.65, + "learning_rate": 2.71313672922252e-05, + "loss": 0.0204, + "step": 1756 + }, + { + "epoch": 4.65, + "learning_rate": 2.711796246648794e-05, + "loss": 0.0022, + "step": 1757 + }, + { + "epoch": 4.65, + "learning_rate": 2.710455764075067e-05, + "loss": 0.0018, + "step": 1758 + }, + { + "epoch": 4.65, + "learning_rate": 2.7091152815013406e-05, + "loss": 0.263, + "step": 1759 + }, + { + "epoch": 4.66, + "learning_rate": 2.707774798927614e-05, + "loss": 0.0109, + "step": 1760 + }, + { + "epoch": 4.66, + "learning_rate": 2.7064343163538876e-05, + "loss": 0.0653, + "step": 1761 + }, + { + "epoch": 4.66, + "learning_rate": 2.705093833780161e-05, + "loss": 0.0116, + "step": 1762 + }, + { + "epoch": 4.66, + "learning_rate": 2.7037533512064346e-05, + "loss": 0.0063, + "step": 1763 + }, + { + "epoch": 4.67, + "learning_rate": 2.7024128686327078e-05, + "loss": 0.0034, + "step": 1764 + }, + { + "epoch": 4.67, + "learning_rate": 2.7010723860589816e-05, + "loss": 0.0395, + "step": 1765 + }, + { + "epoch": 4.67, + "learning_rate": 2.6997319034852548e-05, + "loss": 0.0014, + "step": 1766 + }, + { + "epoch": 4.67, + "learning_rate": 2.6983914209115287e-05, + "loss": 0.0057, + "step": 1767 + }, + { + "epoch": 4.68, + "learning_rate": 2.697050938337802e-05, + "loss": 0.0018, + "step": 1768 + }, + { + "epoch": 4.68, + "learning_rate": 2.695710455764075e-05, + "loss": 0.012, + "step": 1769 + }, + { + "epoch": 4.68, + "learning_rate": 2.694369973190349e-05, + "loss": 0.0017, + "step": 1770 + }, + { + "epoch": 4.69, + "learning_rate": 2.693029490616622e-05, + "loss": 0.0654, + "step": 1771 + }, + { + "epoch": 4.69, + "learning_rate": 2.691689008042896e-05, + "loss": 0.8002, + "step": 1772 + }, + { + "epoch": 4.69, + "learning_rate": 2.690348525469169e-05, + "loss": 0.0035, + "step": 1773 + }, + { + "epoch": 4.69, + "learning_rate": 2.6890080428954422e-05, + "loss": 0.0051, + "step": 1774 + }, + { + "epoch": 4.7, + "learning_rate": 2.687667560321716e-05, + "loss": 0.0031, + "step": 1775 + }, + { + "epoch": 4.7, + "learning_rate": 2.6863270777479893e-05, + "loss": 0.0142, + "step": 1776 + }, + { + "epoch": 4.7, + "learning_rate": 2.684986595174263e-05, + "loss": 0.0009, + "step": 1777 + }, + { + "epoch": 4.7, + "learning_rate": 2.6836461126005363e-05, + "loss": 0.0015, + "step": 1778 + }, + { + "epoch": 4.71, + "learning_rate": 2.6823056300268095e-05, + "loss": 0.3481, + "step": 1779 + }, + { + "epoch": 4.71, + "learning_rate": 2.6809651474530833e-05, + "loss": 0.3095, + "step": 1780 + }, + { + "epoch": 4.71, + "learning_rate": 2.6796246648793565e-05, + "loss": 0.2567, + "step": 1781 + }, + { + "epoch": 4.71, + "learning_rate": 2.6782841823056303e-05, + "loss": 0.0037, + "step": 1782 + }, + { + "epoch": 4.72, + "learning_rate": 2.6769436997319035e-05, + "loss": 0.001, + "step": 1783 + }, + { + "epoch": 4.72, + "learning_rate": 2.675603217158177e-05, + "loss": 0.0065, + "step": 1784 + }, + { + "epoch": 4.72, + "learning_rate": 2.6742627345844505e-05, + "loss": 0.0029, + "step": 1785 + }, + { + "epoch": 4.72, + "learning_rate": 2.672922252010724e-05, + "loss": 0.6096, + "step": 1786 + }, + { + "epoch": 4.73, + "learning_rate": 2.6715817694369976e-05, + "loss": 0.0127, + "step": 1787 + }, + { + "epoch": 4.73, + "learning_rate": 2.670241286863271e-05, + "loss": 0.0031, + "step": 1788 + }, + { + "epoch": 4.73, + "learning_rate": 2.6689008042895443e-05, + "loss": 0.2463, + "step": 1789 + }, + { + "epoch": 4.74, + "learning_rate": 2.667560321715818e-05, + "loss": 0.1022, + "step": 1790 + }, + { + "epoch": 4.74, + "learning_rate": 2.6662198391420913e-05, + "loss": 0.002, + "step": 1791 + }, + { + "epoch": 4.74, + "learning_rate": 2.664879356568365e-05, + "loss": 0.1576, + "step": 1792 + }, + { + "epoch": 4.74, + "learning_rate": 2.6635388739946383e-05, + "loss": 0.1099, + "step": 1793 + }, + { + "epoch": 4.75, + "learning_rate": 2.6621983914209115e-05, + "loss": 0.1482, + "step": 1794 + }, + { + "epoch": 4.75, + "learning_rate": 2.6608579088471853e-05, + "loss": 0.0007, + "step": 1795 + }, + { + "epoch": 4.75, + "learning_rate": 2.6595174262734585e-05, + "loss": 0.0009, + "step": 1796 + }, + { + "epoch": 4.75, + "learning_rate": 2.6581769436997324e-05, + "loss": 0.005, + "step": 1797 + }, + { + "epoch": 4.76, + "learning_rate": 2.6568364611260055e-05, + "loss": 0.1808, + "step": 1798 + }, + { + "epoch": 4.76, + "learning_rate": 2.6554959785522787e-05, + "loss": 0.0351, + "step": 1799 + }, + { + "epoch": 4.76, + "learning_rate": 2.6541554959785526e-05, + "loss": 0.2555, + "step": 1800 + }, + { + "epoch": 4.76, + "learning_rate": 2.6528150134048257e-05, + "loss": 0.2236, + "step": 1801 + }, + { + "epoch": 4.77, + "learning_rate": 2.6514745308310996e-05, + "loss": 0.3208, + "step": 1802 + }, + { + "epoch": 4.77, + "learning_rate": 2.6501340482573728e-05, + "loss": 0.0202, + "step": 1803 + }, + { + "epoch": 4.77, + "learning_rate": 2.648793565683646e-05, + "loss": 0.0033, + "step": 1804 + }, + { + "epoch": 4.78, + "learning_rate": 2.6474530831099198e-05, + "loss": 0.001, + "step": 1805 + }, + { + "epoch": 4.78, + "learning_rate": 2.646112600536193e-05, + "loss": 0.0019, + "step": 1806 + }, + { + "epoch": 4.78, + "learning_rate": 2.6447721179624668e-05, + "loss": 0.0027, + "step": 1807 + }, + { + "epoch": 4.78, + "learning_rate": 2.64343163538874e-05, + "loss": 0.0051, + "step": 1808 + }, + { + "epoch": 4.79, + "learning_rate": 2.642091152815013e-05, + "loss": 0.1994, + "step": 1809 + }, + { + "epoch": 4.79, + "learning_rate": 2.640750670241287e-05, + "loss": 0.0372, + "step": 1810 + }, + { + "epoch": 4.79, + "learning_rate": 2.6394101876675602e-05, + "loss": 0.0678, + "step": 1811 + }, + { + "epoch": 4.79, + "learning_rate": 2.638069705093834e-05, + "loss": 0.0252, + "step": 1812 + }, + { + "epoch": 4.8, + "learning_rate": 2.6367292225201072e-05, + "loss": 0.0065, + "step": 1813 + }, + { + "epoch": 4.8, + "learning_rate": 2.6353887399463807e-05, + "loss": 0.0045, + "step": 1814 + }, + { + "epoch": 4.8, + "learning_rate": 2.6340482573726542e-05, + "loss": 0.0037, + "step": 1815 + }, + { + "epoch": 4.8, + "learning_rate": 2.6327077747989277e-05, + "loss": 0.0251, + "step": 1816 + }, + { + "epoch": 4.81, + "learning_rate": 2.6313672922252013e-05, + "loss": 0.4196, + "step": 1817 + }, + { + "epoch": 4.81, + "learning_rate": 2.6300268096514748e-05, + "loss": 0.0071, + "step": 1818 + }, + { + "epoch": 4.81, + "learning_rate": 2.628686327077748e-05, + "loss": 0.0787, + "step": 1819 + }, + { + "epoch": 4.81, + "learning_rate": 2.6273458445040218e-05, + "loss": 0.0145, + "step": 1820 + }, + { + "epoch": 4.82, + "learning_rate": 2.626005361930295e-05, + "loss": 0.009, + "step": 1821 + }, + { + "epoch": 4.82, + "learning_rate": 2.6246648793565688e-05, + "loss": 0.0027, + "step": 1822 + }, + { + "epoch": 4.82, + "learning_rate": 2.623324396782842e-05, + "loss": 0.0017, + "step": 1823 + }, + { + "epoch": 4.83, + "learning_rate": 2.6219839142091152e-05, + "loss": 0.4824, + "step": 1824 + }, + { + "epoch": 4.83, + "learning_rate": 2.620643431635389e-05, + "loss": 0.0022, + "step": 1825 + }, + { + "epoch": 4.83, + "learning_rate": 2.6193029490616622e-05, + "loss": 0.3223, + "step": 1826 + }, + { + "epoch": 4.83, + "learning_rate": 2.617962466487936e-05, + "loss": 0.2195, + "step": 1827 + }, + { + "epoch": 4.84, + "learning_rate": 2.6166219839142092e-05, + "loss": 0.0013, + "step": 1828 + }, + { + "epoch": 4.84, + "learning_rate": 2.6152815013404824e-05, + "loss": 0.0343, + "step": 1829 + }, + { + "epoch": 4.84, + "learning_rate": 2.6139410187667563e-05, + "loss": 0.0022, + "step": 1830 + }, + { + "epoch": 4.84, + "learning_rate": 2.6126005361930294e-05, + "loss": 0.0022, + "step": 1831 + }, + { + "epoch": 4.85, + "learning_rate": 2.6112600536193033e-05, + "loss": 0.4116, + "step": 1832 + }, + { + "epoch": 4.85, + "learning_rate": 2.6099195710455765e-05, + "loss": 0.0048, + "step": 1833 + }, + { + "epoch": 4.85, + "learning_rate": 2.6085790884718496e-05, + "loss": 0.5819, + "step": 1834 + }, + { + "epoch": 4.85, + "learning_rate": 2.6072386058981235e-05, + "loss": 0.1985, + "step": 1835 + }, + { + "epoch": 4.86, + "learning_rate": 2.6058981233243967e-05, + "loss": 0.0989, + "step": 1836 + }, + { + "epoch": 4.86, + "learning_rate": 2.6045576407506705e-05, + "loss": 0.341, + "step": 1837 + }, + { + "epoch": 4.86, + "learning_rate": 2.6032171581769437e-05, + "loss": 0.0044, + "step": 1838 + }, + { + "epoch": 4.87, + "learning_rate": 2.601876675603217e-05, + "loss": 0.004, + "step": 1839 + }, + { + "epoch": 4.87, + "learning_rate": 2.6005361930294907e-05, + "loss": 0.2858, + "step": 1840 + }, + { + "epoch": 4.87, + "learning_rate": 2.599195710455764e-05, + "loss": 0.0009, + "step": 1841 + }, + { + "epoch": 4.87, + "learning_rate": 2.5978552278820377e-05, + "loss": 0.0042, + "step": 1842 + }, + { + "epoch": 4.88, + "learning_rate": 2.596514745308311e-05, + "loss": 0.0045, + "step": 1843 + }, + { + "epoch": 4.88, + "learning_rate": 2.5951742627345844e-05, + "loss": 0.0144, + "step": 1844 + }, + { + "epoch": 4.88, + "learning_rate": 2.593833780160858e-05, + "loss": 0.0084, + "step": 1845 + }, + { + "epoch": 4.88, + "learning_rate": 2.5924932975871314e-05, + "loss": 0.4276, + "step": 1846 + }, + { + "epoch": 4.89, + "learning_rate": 2.591152815013405e-05, + "loss": 0.0122, + "step": 1847 + }, + { + "epoch": 4.89, + "learning_rate": 2.5898123324396785e-05, + "loss": 0.0776, + "step": 1848 + }, + { + "epoch": 4.89, + "learning_rate": 2.5884718498659516e-05, + "loss": 0.0117, + "step": 1849 + }, + { + "epoch": 4.89, + "learning_rate": 2.5871313672922255e-05, + "loss": 0.2809, + "step": 1850 + }, + { + "epoch": 4.9, + "learning_rate": 2.5857908847184987e-05, + "loss": 0.0413, + "step": 1851 + }, + { + "epoch": 4.9, + "learning_rate": 2.5844504021447725e-05, + "loss": 0.0187, + "step": 1852 + }, + { + "epoch": 4.9, + "learning_rate": 2.5831099195710457e-05, + "loss": 0.452, + "step": 1853 + }, + { + "epoch": 4.9, + "learning_rate": 2.5817694369973195e-05, + "loss": 0.0206, + "step": 1854 + }, + { + "epoch": 4.91, + "learning_rate": 2.5804289544235927e-05, + "loss": 0.1639, + "step": 1855 + }, + { + "epoch": 4.91, + "learning_rate": 2.579088471849866e-05, + "loss": 0.1865, + "step": 1856 + }, + { + "epoch": 4.91, + "learning_rate": 2.5777479892761398e-05, + "loss": 0.0022, + "step": 1857 + }, + { + "epoch": 4.92, + "learning_rate": 2.576407506702413e-05, + "loss": 0.1167, + "step": 1858 + }, + { + "epoch": 4.92, + "learning_rate": 2.5750670241286868e-05, + "loss": 0.4013, + "step": 1859 + }, + { + "epoch": 4.92, + "learning_rate": 2.57372654155496e-05, + "loss": 0.2355, + "step": 1860 + }, + { + "epoch": 4.92, + "learning_rate": 2.572386058981233e-05, + "loss": 0.0076, + "step": 1861 + }, + { + "epoch": 4.93, + "learning_rate": 2.571045576407507e-05, + "loss": 0.1612, + "step": 1862 + }, + { + "epoch": 4.93, + "learning_rate": 2.56970509383378e-05, + "loss": 0.0047, + "step": 1863 + }, + { + "epoch": 4.93, + "learning_rate": 2.568364611260054e-05, + "loss": 0.1511, + "step": 1864 + }, + { + "epoch": 4.93, + "learning_rate": 2.5670241286863272e-05, + "loss": 0.011, + "step": 1865 + }, + { + "epoch": 4.94, + "learning_rate": 2.5656836461126004e-05, + "loss": 0.1761, + "step": 1866 + }, + { + "epoch": 4.94, + "learning_rate": 2.5643431635388742e-05, + "loss": 0.004, + "step": 1867 + }, + { + "epoch": 4.94, + "learning_rate": 2.5630026809651474e-05, + "loss": 0.0036, + "step": 1868 + }, + { + "epoch": 4.94, + "learning_rate": 2.5616621983914212e-05, + "loss": 0.4345, + "step": 1869 + }, + { + "epoch": 4.95, + "learning_rate": 2.5603217158176944e-05, + "loss": 0.0034, + "step": 1870 + }, + { + "epoch": 4.95, + "learning_rate": 2.558981233243968e-05, + "loss": 0.1269, + "step": 1871 + }, + { + "epoch": 4.95, + "learning_rate": 2.5576407506702414e-05, + "loss": 0.183, + "step": 1872 + }, + { + "epoch": 4.96, + "learning_rate": 2.556300268096515e-05, + "loss": 0.008, + "step": 1873 + }, + { + "epoch": 4.96, + "learning_rate": 2.5549597855227885e-05, + "loss": 0.0035, + "step": 1874 + }, + { + "epoch": 4.96, + "learning_rate": 2.553619302949062e-05, + "loss": 0.0133, + "step": 1875 + }, + { + "epoch": 4.96, + "learning_rate": 2.552278820375335e-05, + "loss": 0.2156, + "step": 1876 + }, + { + "epoch": 4.97, + "learning_rate": 2.550938337801609e-05, + "loss": 0.0043, + "step": 1877 + }, + { + "epoch": 4.97, + "learning_rate": 2.549597855227882e-05, + "loss": 0.2614, + "step": 1878 + }, + { + "epoch": 4.97, + "learning_rate": 2.548257372654156e-05, + "loss": 0.0208, + "step": 1879 + }, + { + "epoch": 4.97, + "learning_rate": 2.5469168900804292e-05, + "loss": 0.0228, + "step": 1880 + }, + { + "epoch": 4.98, + "learning_rate": 2.5455764075067024e-05, + "loss": 0.0105, + "step": 1881 + }, + { + "epoch": 4.98, + "learning_rate": 2.5442359249329762e-05, + "loss": 0.0108, + "step": 1882 + }, + { + "epoch": 4.98, + "learning_rate": 2.5428954423592494e-05, + "loss": 0.3828, + "step": 1883 + }, + { + "epoch": 4.98, + "learning_rate": 2.5415549597855232e-05, + "loss": 0.0093, + "step": 1884 + }, + { + "epoch": 4.99, + "learning_rate": 2.5402144772117964e-05, + "loss": 0.0231, + "step": 1885 + }, + { + "epoch": 4.99, + "learning_rate": 2.5388739946380696e-05, + "loss": 0.0082, + "step": 1886 + }, + { + "epoch": 4.99, + "learning_rate": 2.5375335120643434e-05, + "loss": 0.1796, + "step": 1887 + }, + { + "epoch": 4.99, + "learning_rate": 2.5361930294906166e-05, + "loss": 0.0753, + "step": 1888 + }, + { + "epoch": 5.0, + "learning_rate": 2.5348525469168905e-05, + "loss": 0.0142, + "step": 1889 + }, + { + "epoch": 5.0, + "learning_rate": 2.5335120643431636e-05, + "loss": 0.0047, + "step": 1890 + }, + { + "epoch": 5.0, + "eval_f1": 0.7775974025974025, + "eval_loss": 0.953689694404602, + "eval_runtime": 1.8696, + "eval_samples_per_second": 809.285, + "eval_steps_per_second": 50.814, + "step": 1890 + }, + { + "epoch": 5.0, + "learning_rate": 2.5321715817694368e-05, + "loss": 0.0014, + "step": 1891 + }, + { + "epoch": 5.01, + "learning_rate": 2.5308310991957107e-05, + "loss": 0.0487, + "step": 1892 + }, + { + "epoch": 5.01, + "learning_rate": 2.529490616621984e-05, + "loss": 0.0037, + "step": 1893 + }, + { + "epoch": 5.01, + "learning_rate": 2.5281501340482577e-05, + "loss": 0.0512, + "step": 1894 + }, + { + "epoch": 5.01, + "learning_rate": 2.526809651474531e-05, + "loss": 0.134, + "step": 1895 + }, + { + "epoch": 5.02, + "learning_rate": 2.525469168900804e-05, + "loss": 0.3762, + "step": 1896 + }, + { + "epoch": 5.02, + "learning_rate": 2.524128686327078e-05, + "loss": 0.0011, + "step": 1897 + }, + { + "epoch": 5.02, + "learning_rate": 2.522788203753351e-05, + "loss": 0.0023, + "step": 1898 + }, + { + "epoch": 5.02, + "learning_rate": 2.521447721179625e-05, + "loss": 0.0526, + "step": 1899 + }, + { + "epoch": 5.03, + "learning_rate": 2.520107238605898e-05, + "loss": 0.0553, + "step": 1900 + }, + { + "epoch": 5.03, + "learning_rate": 2.5187667560321716e-05, + "loss": 0.1773, + "step": 1901 + }, + { + "epoch": 5.03, + "learning_rate": 2.517426273458445e-05, + "loss": 0.451, + "step": 1902 + }, + { + "epoch": 5.03, + "learning_rate": 2.5160857908847186e-05, + "loss": 0.0217, + "step": 1903 + }, + { + "epoch": 5.04, + "learning_rate": 2.514745308310992e-05, + "loss": 0.0728, + "step": 1904 + }, + { + "epoch": 5.04, + "learning_rate": 2.5134048257372657e-05, + "loss": 0.0009, + "step": 1905 + }, + { + "epoch": 5.04, + "learning_rate": 2.512064343163539e-05, + "loss": 0.1018, + "step": 1906 + }, + { + "epoch": 5.04, + "learning_rate": 2.5107238605898127e-05, + "loss": 0.0012, + "step": 1907 + }, + { + "epoch": 5.05, + "learning_rate": 2.509383378016086e-05, + "loss": 0.004, + "step": 1908 + }, + { + "epoch": 5.05, + "learning_rate": 2.5080428954423597e-05, + "loss": 0.0012, + "step": 1909 + }, + { + "epoch": 5.05, + "learning_rate": 2.506702412868633e-05, + "loss": 0.0128, + "step": 1910 + }, + { + "epoch": 5.06, + "learning_rate": 2.505361930294906e-05, + "loss": 0.1116, + "step": 1911 + }, + { + "epoch": 5.06, + "learning_rate": 2.50402144772118e-05, + "loss": 0.0011, + "step": 1912 + }, + { + "epoch": 5.06, + "learning_rate": 2.502680965147453e-05, + "loss": 0.0011, + "step": 1913 + }, + { + "epoch": 5.06, + "learning_rate": 2.501340482573727e-05, + "loss": 0.0897, + "step": 1914 + }, + { + "epoch": 5.07, + "learning_rate": 2.5e-05, + "loss": 0.0014, + "step": 1915 + }, + { + "epoch": 5.07, + "learning_rate": 2.4986595174262736e-05, + "loss": 0.0918, + "step": 1916 + }, + { + "epoch": 5.07, + "learning_rate": 2.497319034852547e-05, + "loss": 0.0026, + "step": 1917 + }, + { + "epoch": 5.07, + "learning_rate": 2.4959785522788203e-05, + "loss": 0.0225, + "step": 1918 + }, + { + "epoch": 5.08, + "learning_rate": 2.494638069705094e-05, + "loss": 0.2655, + "step": 1919 + }, + { + "epoch": 5.08, + "learning_rate": 2.4932975871313673e-05, + "loss": 0.0029, + "step": 1920 + }, + { + "epoch": 5.08, + "learning_rate": 2.491957104557641e-05, + "loss": 0.0006, + "step": 1921 + }, + { + "epoch": 5.08, + "learning_rate": 2.4906166219839144e-05, + "loss": 0.0008, + "step": 1922 + }, + { + "epoch": 5.09, + "learning_rate": 2.4892761394101875e-05, + "loss": 0.0012, + "step": 1923 + }, + { + "epoch": 5.09, + "learning_rate": 2.487935656836461e-05, + "loss": 0.0013, + "step": 1924 + }, + { + "epoch": 5.09, + "learning_rate": 2.4865951742627346e-05, + "loss": 0.0524, + "step": 1925 + }, + { + "epoch": 5.1, + "learning_rate": 2.485254691689008e-05, + "loss": 0.0059, + "step": 1926 + }, + { + "epoch": 5.1, + "learning_rate": 2.4839142091152816e-05, + "loss": 0.0026, + "step": 1927 + }, + { + "epoch": 5.1, + "learning_rate": 2.482573726541555e-05, + "loss": 0.0015, + "step": 1928 + }, + { + "epoch": 5.1, + "learning_rate": 2.4812332439678286e-05, + "loss": 0.0073, + "step": 1929 + }, + { + "epoch": 5.11, + "learning_rate": 2.479892761394102e-05, + "loss": 0.0008, + "step": 1930 + }, + { + "epoch": 5.11, + "learning_rate": 2.4785522788203757e-05, + "loss": 0.1519, + "step": 1931 + }, + { + "epoch": 5.11, + "learning_rate": 2.477211796246649e-05, + "loss": 0.008, + "step": 1932 + }, + { + "epoch": 5.11, + "learning_rate": 2.4758713136729223e-05, + "loss": 0.0009, + "step": 1933 + }, + { + "epoch": 5.12, + "learning_rate": 2.474530831099196e-05, + "loss": 0.0299, + "step": 1934 + }, + { + "epoch": 5.12, + "learning_rate": 2.4731903485254694e-05, + "loss": 0.1637, + "step": 1935 + }, + { + "epoch": 5.12, + "learning_rate": 2.471849865951743e-05, + "loss": 0.0369, + "step": 1936 + }, + { + "epoch": 5.12, + "learning_rate": 2.4705093833780164e-05, + "loss": 0.0057, + "step": 1937 + }, + { + "epoch": 5.13, + "learning_rate": 2.4691689008042896e-05, + "loss": 0.0035, + "step": 1938 + }, + { + "epoch": 5.13, + "learning_rate": 2.467828418230563e-05, + "loss": 0.0011, + "step": 1939 + }, + { + "epoch": 5.13, + "learning_rate": 2.4664879356568366e-05, + "loss": 0.0018, + "step": 1940 + }, + { + "epoch": 5.13, + "learning_rate": 2.46514745308311e-05, + "loss": 0.0013, + "step": 1941 + }, + { + "epoch": 5.14, + "learning_rate": 2.4638069705093836e-05, + "loss": 0.0779, + "step": 1942 + }, + { + "epoch": 5.14, + "learning_rate": 2.4624664879356568e-05, + "loss": 0.1762, + "step": 1943 + }, + { + "epoch": 5.14, + "learning_rate": 2.4611260053619303e-05, + "loss": 0.0006, + "step": 1944 + }, + { + "epoch": 5.15, + "learning_rate": 2.4597855227882038e-05, + "loss": 0.0037, + "step": 1945 + }, + { + "epoch": 5.15, + "learning_rate": 2.4584450402144773e-05, + "loss": 0.0005, + "step": 1946 + }, + { + "epoch": 5.15, + "learning_rate": 2.457104557640751e-05, + "loss": 0.0397, + "step": 1947 + }, + { + "epoch": 5.15, + "learning_rate": 2.4557640750670244e-05, + "loss": 0.0082, + "step": 1948 + }, + { + "epoch": 5.16, + "learning_rate": 2.4544235924932975e-05, + "loss": 0.0008, + "step": 1949 + }, + { + "epoch": 5.16, + "learning_rate": 2.453083109919571e-05, + "loss": 0.0219, + "step": 1950 + }, + { + "epoch": 5.16, + "learning_rate": 2.4517426273458446e-05, + "loss": 0.3966, + "step": 1951 + }, + { + "epoch": 5.16, + "learning_rate": 2.450402144772118e-05, + "loss": 0.0011, + "step": 1952 + }, + { + "epoch": 5.17, + "learning_rate": 2.4490616621983916e-05, + "loss": 0.3447, + "step": 1953 + }, + { + "epoch": 5.17, + "learning_rate": 2.4477211796246648e-05, + "loss": 0.0006, + "step": 1954 + }, + { + "epoch": 5.17, + "learning_rate": 2.4463806970509383e-05, + "loss": 0.0011, + "step": 1955 + }, + { + "epoch": 5.17, + "learning_rate": 2.4450402144772118e-05, + "loss": 0.0013, + "step": 1956 + }, + { + "epoch": 5.18, + "learning_rate": 2.4436997319034853e-05, + "loss": 0.1495, + "step": 1957 + }, + { + "epoch": 5.18, + "learning_rate": 2.4423592493297588e-05, + "loss": 0.0005, + "step": 1958 + }, + { + "epoch": 5.18, + "learning_rate": 2.4410187667560323e-05, + "loss": 0.3345, + "step": 1959 + }, + { + "epoch": 5.19, + "learning_rate": 2.439678284182306e-05, + "loss": 0.0048, + "step": 1960 + }, + { + "epoch": 5.19, + "learning_rate": 2.4383378016085793e-05, + "loss": 0.001, + "step": 1961 + }, + { + "epoch": 5.19, + "learning_rate": 2.436997319034853e-05, + "loss": 0.0025, + "step": 1962 + }, + { + "epoch": 5.19, + "learning_rate": 2.4356568364611264e-05, + "loss": 0.3215, + "step": 1963 + }, + { + "epoch": 5.2, + "learning_rate": 2.4343163538873995e-05, + "loss": 0.0197, + "step": 1964 + }, + { + "epoch": 5.2, + "learning_rate": 2.432975871313673e-05, + "loss": 0.0018, + "step": 1965 + }, + { + "epoch": 5.2, + "learning_rate": 2.4316353887399466e-05, + "loss": 0.1012, + "step": 1966 + }, + { + "epoch": 5.2, + "learning_rate": 2.43029490616622e-05, + "loss": 0.0179, + "step": 1967 + }, + { + "epoch": 5.21, + "learning_rate": 2.4289544235924936e-05, + "loss": 0.0032, + "step": 1968 + }, + { + "epoch": 5.21, + "learning_rate": 2.4276139410187668e-05, + "loss": 0.0011, + "step": 1969 + }, + { + "epoch": 5.21, + "learning_rate": 2.4262734584450403e-05, + "loss": 0.4875, + "step": 1970 + }, + { + "epoch": 5.21, + "learning_rate": 2.4249329758713138e-05, + "loss": 0.2791, + "step": 1971 + }, + { + "epoch": 5.22, + "learning_rate": 2.4235924932975873e-05, + "loss": 0.0011, + "step": 1972 + }, + { + "epoch": 5.22, + "learning_rate": 2.4222520107238608e-05, + "loss": 0.0011, + "step": 1973 + }, + { + "epoch": 5.22, + "learning_rate": 2.420911528150134e-05, + "loss": 0.0976, + "step": 1974 + }, + { + "epoch": 5.22, + "learning_rate": 2.4195710455764075e-05, + "loss": 0.3669, + "step": 1975 + }, + { + "epoch": 5.23, + "learning_rate": 2.418230563002681e-05, + "loss": 0.0022, + "step": 1976 + }, + { + "epoch": 5.23, + "learning_rate": 2.4168900804289545e-05, + "loss": 0.0015, + "step": 1977 + }, + { + "epoch": 5.23, + "learning_rate": 2.415549597855228e-05, + "loss": 0.0014, + "step": 1978 + }, + { + "epoch": 5.24, + "learning_rate": 2.4142091152815012e-05, + "loss": 0.0433, + "step": 1979 + }, + { + "epoch": 5.24, + "learning_rate": 2.4128686327077747e-05, + "loss": 0.0019, + "step": 1980 + }, + { + "epoch": 5.24, + "learning_rate": 2.4115281501340483e-05, + "loss": 0.0007, + "step": 1981 + }, + { + "epoch": 5.24, + "learning_rate": 2.4101876675603218e-05, + "loss": 0.0136, + "step": 1982 + }, + { + "epoch": 5.25, + "learning_rate": 2.4088471849865953e-05, + "loss": 0.1744, + "step": 1983 + }, + { + "epoch": 5.25, + "learning_rate": 2.4075067024128688e-05, + "loss": 0.1557, + "step": 1984 + }, + { + "epoch": 5.25, + "learning_rate": 2.4061662198391423e-05, + "loss": 0.1192, + "step": 1985 + }, + { + "epoch": 5.25, + "learning_rate": 2.4048257372654158e-05, + "loss": 0.0406, + "step": 1986 + }, + { + "epoch": 5.26, + "learning_rate": 2.4034852546916893e-05, + "loss": 0.2243, + "step": 1987 + }, + { + "epoch": 5.26, + "learning_rate": 2.402144772117963e-05, + "loss": 0.0021, + "step": 1988 + }, + { + "epoch": 5.26, + "learning_rate": 2.400804289544236e-05, + "loss": 0.002, + "step": 1989 + }, + { + "epoch": 5.26, + "learning_rate": 2.3994638069705095e-05, + "loss": 0.077, + "step": 1990 + }, + { + "epoch": 5.27, + "learning_rate": 2.398123324396783e-05, + "loss": 0.0378, + "step": 1991 + }, + { + "epoch": 5.27, + "learning_rate": 2.3967828418230566e-05, + "loss": 0.012, + "step": 1992 + }, + { + "epoch": 5.27, + "learning_rate": 2.39544235924933e-05, + "loss": 0.1386, + "step": 1993 + }, + { + "epoch": 5.28, + "learning_rate": 2.3941018766756032e-05, + "loss": 0.002, + "step": 1994 + }, + { + "epoch": 5.28, + "learning_rate": 2.3927613941018768e-05, + "loss": 0.0008, + "step": 1995 + }, + { + "epoch": 5.28, + "learning_rate": 2.3914209115281503e-05, + "loss": 0.0021, + "step": 1996 + }, + { + "epoch": 5.28, + "learning_rate": 2.3900804289544238e-05, + "loss": 0.022, + "step": 1997 + }, + { + "epoch": 5.29, + "learning_rate": 2.3887399463806973e-05, + "loss": 0.0015, + "step": 1998 + }, + { + "epoch": 5.29, + "learning_rate": 2.3873994638069705e-05, + "loss": 0.1486, + "step": 1999 + }, + { + "epoch": 5.29, + "learning_rate": 2.386058981233244e-05, + "loss": 0.2586, + "step": 2000 + }, + { + "epoch": 5.29, + "learning_rate": 2.3847184986595175e-05, + "loss": 0.0088, + "step": 2001 + }, + { + "epoch": 5.3, + "learning_rate": 2.383378016085791e-05, + "loss": 0.0044, + "step": 2002 + }, + { + "epoch": 5.3, + "learning_rate": 2.3820375335120645e-05, + "loss": 0.0015, + "step": 2003 + }, + { + "epoch": 5.3, + "learning_rate": 2.3806970509383377e-05, + "loss": 0.0008, + "step": 2004 + }, + { + "epoch": 5.3, + "learning_rate": 2.3793565683646112e-05, + "loss": 0.212, + "step": 2005 + }, + { + "epoch": 5.31, + "learning_rate": 2.3780160857908847e-05, + "loss": 0.0005, + "step": 2006 + }, + { + "epoch": 5.31, + "learning_rate": 2.3766756032171582e-05, + "loss": 0.1511, + "step": 2007 + }, + { + "epoch": 5.31, + "learning_rate": 2.3753351206434318e-05, + "loss": 0.0023, + "step": 2008 + }, + { + "epoch": 5.31, + "learning_rate": 2.3739946380697053e-05, + "loss": 0.1544, + "step": 2009 + }, + { + "epoch": 5.32, + "learning_rate": 2.3726541554959784e-05, + "loss": 0.0306, + "step": 2010 + }, + { + "epoch": 5.32, + "learning_rate": 2.371313672922252e-05, + "loss": 0.1005, + "step": 2011 + }, + { + "epoch": 5.32, + "learning_rate": 2.3699731903485255e-05, + "loss": 0.0744, + "step": 2012 + }, + { + "epoch": 5.33, + "learning_rate": 2.368632707774799e-05, + "loss": 0.0622, + "step": 2013 + }, + { + "epoch": 5.33, + "learning_rate": 2.3672922252010725e-05, + "loss": 0.0192, + "step": 2014 + }, + { + "epoch": 5.33, + "learning_rate": 2.365951742627346e-05, + "loss": 0.016, + "step": 2015 + }, + { + "epoch": 5.33, + "learning_rate": 2.3646112600536195e-05, + "loss": 0.293, + "step": 2016 + }, + { + "epoch": 5.34, + "learning_rate": 2.363270777479893e-05, + "loss": 0.0114, + "step": 2017 + }, + { + "epoch": 5.34, + "learning_rate": 2.3619302949061665e-05, + "loss": 0.1254, + "step": 2018 + }, + { + "epoch": 5.34, + "learning_rate": 2.36058981233244e-05, + "loss": 0.0638, + "step": 2019 + }, + { + "epoch": 5.34, + "learning_rate": 2.3592493297587132e-05, + "loss": 0.0192, + "step": 2020 + }, + { + "epoch": 5.35, + "learning_rate": 2.3579088471849867e-05, + "loss": 0.0069, + "step": 2021 + }, + { + "epoch": 5.35, + "learning_rate": 2.3565683646112603e-05, + "loss": 0.0573, + "step": 2022 + }, + { + "epoch": 5.35, + "learning_rate": 2.3552278820375338e-05, + "loss": 0.0039, + "step": 2023 + }, + { + "epoch": 5.35, + "learning_rate": 2.3538873994638073e-05, + "loss": 0.0079, + "step": 2024 + }, + { + "epoch": 5.36, + "learning_rate": 2.3525469168900805e-05, + "loss": 0.0063, + "step": 2025 + }, + { + "epoch": 5.36, + "learning_rate": 2.351206434316354e-05, + "loss": 0.0032, + "step": 2026 + }, + { + "epoch": 5.36, + "learning_rate": 2.3498659517426275e-05, + "loss": 0.1018, + "step": 2027 + }, + { + "epoch": 5.37, + "learning_rate": 2.348525469168901e-05, + "loss": 0.0017, + "step": 2028 + }, + { + "epoch": 5.37, + "learning_rate": 2.3471849865951745e-05, + "loss": 0.0203, + "step": 2029 + }, + { + "epoch": 5.37, + "learning_rate": 2.3458445040214477e-05, + "loss": 0.0008, + "step": 2030 + }, + { + "epoch": 5.37, + "learning_rate": 2.3445040214477212e-05, + "loss": 0.3145, + "step": 2031 + }, + { + "epoch": 5.38, + "learning_rate": 2.3431635388739947e-05, + "loss": 0.2476, + "step": 2032 + }, + { + "epoch": 5.38, + "learning_rate": 2.3418230563002682e-05, + "loss": 0.0573, + "step": 2033 + }, + { + "epoch": 5.38, + "learning_rate": 2.3404825737265417e-05, + "loss": 0.0008, + "step": 2034 + }, + { + "epoch": 5.38, + "learning_rate": 2.339142091152815e-05, + "loss": 0.0636, + "step": 2035 + }, + { + "epoch": 5.39, + "learning_rate": 2.3378016085790884e-05, + "loss": 0.001, + "step": 2036 + }, + { + "epoch": 5.39, + "learning_rate": 2.336461126005362e-05, + "loss": 0.0079, + "step": 2037 + }, + { + "epoch": 5.39, + "learning_rate": 2.3351206434316355e-05, + "loss": 0.0752, + "step": 2038 + }, + { + "epoch": 5.39, + "learning_rate": 2.333780160857909e-05, + "loss": 0.0024, + "step": 2039 + }, + { + "epoch": 5.4, + "learning_rate": 2.332439678284182e-05, + "loss": 0.0061, + "step": 2040 + }, + { + "epoch": 5.4, + "learning_rate": 2.3310991957104557e-05, + "loss": 0.2882, + "step": 2041 + }, + { + "epoch": 5.4, + "learning_rate": 2.329758713136729e-05, + "loss": 0.02, + "step": 2042 + }, + { + "epoch": 5.4, + "learning_rate": 2.3284182305630027e-05, + "loss": 0.018, + "step": 2043 + }, + { + "epoch": 5.41, + "learning_rate": 2.3270777479892762e-05, + "loss": 0.0125, + "step": 2044 + }, + { + "epoch": 5.41, + "learning_rate": 2.3257372654155497e-05, + "loss": 0.0007, + "step": 2045 + }, + { + "epoch": 5.41, + "learning_rate": 2.3243967828418232e-05, + "loss": 0.3849, + "step": 2046 + }, + { + "epoch": 5.42, + "learning_rate": 2.3230563002680967e-05, + "loss": 0.0011, + "step": 2047 + }, + { + "epoch": 5.42, + "learning_rate": 2.3217158176943702e-05, + "loss": 0.1235, + "step": 2048 + }, + { + "epoch": 5.42, + "learning_rate": 2.3203753351206438e-05, + "loss": 0.0006, + "step": 2049 + }, + { + "epoch": 5.42, + "learning_rate": 2.319034852546917e-05, + "loss": 0.0006, + "step": 2050 + }, + { + "epoch": 5.43, + "learning_rate": 2.3176943699731904e-05, + "loss": 0.0011, + "step": 2051 + }, + { + "epoch": 5.43, + "learning_rate": 2.316353887399464e-05, + "loss": 0.0011, + "step": 2052 + }, + { + "epoch": 5.43, + "learning_rate": 2.3150134048257375e-05, + "loss": 0.0032, + "step": 2053 + }, + { + "epoch": 5.43, + "learning_rate": 2.313672922252011e-05, + "loss": 0.0718, + "step": 2054 + }, + { + "epoch": 5.44, + "learning_rate": 2.312332439678284e-05, + "loss": 0.0052, + "step": 2055 + }, + { + "epoch": 5.44, + "learning_rate": 2.3109919571045577e-05, + "loss": 0.0026, + "step": 2056 + }, + { + "epoch": 5.44, + "learning_rate": 2.3096514745308312e-05, + "loss": 0.1854, + "step": 2057 + }, + { + "epoch": 5.44, + "learning_rate": 2.3083109919571047e-05, + "loss": 0.0008, + "step": 2058 + }, + { + "epoch": 5.45, + "learning_rate": 2.3069705093833782e-05, + "loss": 0.2671, + "step": 2059 + }, + { + "epoch": 5.45, + "learning_rate": 2.3056300268096514e-05, + "loss": 0.0064, + "step": 2060 + }, + { + "epoch": 5.45, + "learning_rate": 2.304289544235925e-05, + "loss": 0.0012, + "step": 2061 + }, + { + "epoch": 5.46, + "learning_rate": 2.3029490616621984e-05, + "loss": 0.0006, + "step": 2062 + }, + { + "epoch": 5.46, + "learning_rate": 2.301608579088472e-05, + "loss": 0.0008, + "step": 2063 + }, + { + "epoch": 5.46, + "learning_rate": 2.3002680965147454e-05, + "loss": 0.4998, + "step": 2064 + }, + { + "epoch": 5.46, + "learning_rate": 2.2989276139410186e-05, + "loss": 0.0037, + "step": 2065 + }, + { + "epoch": 5.47, + "learning_rate": 2.297587131367292e-05, + "loss": 0.0029, + "step": 2066 + }, + { + "epoch": 5.47, + "learning_rate": 2.2962466487935656e-05, + "loss": 0.043, + "step": 2067 + }, + { + "epoch": 5.47, + "learning_rate": 2.294906166219839e-05, + "loss": 0.0451, + "step": 2068 + }, + { + "epoch": 5.47, + "learning_rate": 2.2935656836461127e-05, + "loss": 0.0009, + "step": 2069 + }, + { + "epoch": 5.48, + "learning_rate": 2.2922252010723862e-05, + "loss": 0.002, + "step": 2070 + }, + { + "epoch": 5.48, + "learning_rate": 2.2908847184986597e-05, + "loss": 0.2744, + "step": 2071 + }, + { + "epoch": 5.48, + "learning_rate": 2.2895442359249332e-05, + "loss": 0.0146, + "step": 2072 + }, + { + "epoch": 5.48, + "learning_rate": 2.2882037533512067e-05, + "loss": 0.0011, + "step": 2073 + }, + { + "epoch": 5.49, + "learning_rate": 2.2868632707774802e-05, + "loss": 0.0421, + "step": 2074 + }, + { + "epoch": 5.49, + "learning_rate": 2.2855227882037537e-05, + "loss": 0.1518, + "step": 2075 + }, + { + "epoch": 5.49, + "learning_rate": 2.284182305630027e-05, + "loss": 0.0072, + "step": 2076 + }, + { + "epoch": 5.49, + "learning_rate": 2.2828418230563004e-05, + "loss": 0.2781, + "step": 2077 + }, + { + "epoch": 5.5, + "learning_rate": 2.281501340482574e-05, + "loss": 0.004, + "step": 2078 + }, + { + "epoch": 5.5, + "learning_rate": 2.2801608579088475e-05, + "loss": 0.1029, + "step": 2079 + }, + { + "epoch": 5.5, + "learning_rate": 2.278820375335121e-05, + "loss": 0.0526, + "step": 2080 + }, + { + "epoch": 5.51, + "learning_rate": 2.277479892761394e-05, + "loss": 0.0011, + "step": 2081 + }, + { + "epoch": 5.51, + "learning_rate": 2.2761394101876677e-05, + "loss": 0.0139, + "step": 2082 + }, + { + "epoch": 5.51, + "learning_rate": 2.274798927613941e-05, + "loss": 0.0509, + "step": 2083 + }, + { + "epoch": 5.51, + "learning_rate": 2.2734584450402147e-05, + "loss": 0.0042, + "step": 2084 + }, + { + "epoch": 5.52, + "learning_rate": 2.2721179624664882e-05, + "loss": 0.1964, + "step": 2085 + }, + { + "epoch": 5.52, + "learning_rate": 2.2707774798927614e-05, + "loss": 0.0083, + "step": 2086 + }, + { + "epoch": 5.52, + "learning_rate": 2.269436997319035e-05, + "loss": 0.101, + "step": 2087 + }, + { + "epoch": 5.52, + "learning_rate": 2.2680965147453084e-05, + "loss": 0.0094, + "step": 2088 + }, + { + "epoch": 5.53, + "learning_rate": 2.266756032171582e-05, + "loss": 0.1433, + "step": 2089 + }, + { + "epoch": 5.53, + "learning_rate": 2.2654155495978554e-05, + "loss": 0.0091, + "step": 2090 + }, + { + "epoch": 5.53, + "learning_rate": 2.2640750670241286e-05, + "loss": 0.0018, + "step": 2091 + }, + { + "epoch": 5.53, + "learning_rate": 2.262734584450402e-05, + "loss": 0.001, + "step": 2092 + }, + { + "epoch": 5.54, + "learning_rate": 2.2613941018766756e-05, + "loss": 0.3507, + "step": 2093 + }, + { + "epoch": 5.54, + "learning_rate": 2.260053619302949e-05, + "loss": 0.0527, + "step": 2094 + }, + { + "epoch": 5.54, + "learning_rate": 2.2587131367292226e-05, + "loss": 0.0015, + "step": 2095 + }, + { + "epoch": 5.54, + "learning_rate": 2.2573726541554958e-05, + "loss": 0.0195, + "step": 2096 + }, + { + "epoch": 5.55, + "learning_rate": 2.2560321715817693e-05, + "loss": 0.0007, + "step": 2097 + }, + { + "epoch": 5.55, + "learning_rate": 2.254691689008043e-05, + "loss": 0.001, + "step": 2098 + }, + { + "epoch": 5.55, + "learning_rate": 2.2533512064343164e-05, + "loss": 0.2777, + "step": 2099 + }, + { + "epoch": 5.56, + "learning_rate": 2.25201072386059e-05, + "loss": 0.0657, + "step": 2100 + }, + { + "epoch": 5.56, + "learning_rate": 2.2506702412868634e-05, + "loss": 0.159, + "step": 2101 + }, + { + "epoch": 5.56, + "learning_rate": 2.249329758713137e-05, + "loss": 0.0342, + "step": 2102 + }, + { + "epoch": 5.56, + "learning_rate": 2.2479892761394104e-05, + "loss": 0.3001, + "step": 2103 + }, + { + "epoch": 5.57, + "learning_rate": 2.246648793565684e-05, + "loss": 0.0028, + "step": 2104 + }, + { + "epoch": 5.57, + "learning_rate": 2.2453083109919574e-05, + "loss": 0.0191, + "step": 2105 + }, + { + "epoch": 5.57, + "learning_rate": 2.2439678284182306e-05, + "loss": 0.0012, + "step": 2106 + }, + { + "epoch": 5.57, + "learning_rate": 2.242627345844504e-05, + "loss": 0.2619, + "step": 2107 + }, + { + "epoch": 5.58, + "learning_rate": 2.2412868632707776e-05, + "loss": 0.001, + "step": 2108 + }, + { + "epoch": 5.58, + "learning_rate": 2.239946380697051e-05, + "loss": 0.094, + "step": 2109 + }, + { + "epoch": 5.58, + "learning_rate": 2.2386058981233247e-05, + "loss": 0.003, + "step": 2110 + }, + { + "epoch": 5.58, + "learning_rate": 2.237265415549598e-05, + "loss": 0.0528, + "step": 2111 + }, + { + "epoch": 5.59, + "learning_rate": 2.2359249329758714e-05, + "loss": 0.1252, + "step": 2112 + }, + { + "epoch": 5.59, + "learning_rate": 2.234584450402145e-05, + "loss": 0.0039, + "step": 2113 + }, + { + "epoch": 5.59, + "learning_rate": 2.2332439678284184e-05, + "loss": 0.0913, + "step": 2114 + }, + { + "epoch": 5.6, + "learning_rate": 2.231903485254692e-05, + "loss": 0.0023, + "step": 2115 + }, + { + "epoch": 5.6, + "learning_rate": 2.230563002680965e-05, + "loss": 0.0047, + "step": 2116 + }, + { + "epoch": 5.6, + "learning_rate": 2.2292225201072386e-05, + "loss": 0.0688, + "step": 2117 + }, + { + "epoch": 5.6, + "learning_rate": 2.227882037533512e-05, + "loss": 0.0013, + "step": 2118 + }, + { + "epoch": 5.61, + "learning_rate": 2.2265415549597856e-05, + "loss": 0.0012, + "step": 2119 + }, + { + "epoch": 5.61, + "learning_rate": 2.225201072386059e-05, + "loss": 0.0048, + "step": 2120 + }, + { + "epoch": 5.61, + "learning_rate": 2.2238605898123323e-05, + "loss": 0.0011, + "step": 2121 + }, + { + "epoch": 5.61, + "learning_rate": 2.2225201072386058e-05, + "loss": 0.0009, + "step": 2122 + }, + { + "epoch": 5.62, + "learning_rate": 2.2211796246648793e-05, + "loss": 0.0015, + "step": 2123 + }, + { + "epoch": 5.62, + "learning_rate": 2.2198391420911528e-05, + "loss": 0.0077, + "step": 2124 + }, + { + "epoch": 5.62, + "learning_rate": 2.2184986595174263e-05, + "loss": 0.0007, + "step": 2125 + }, + { + "epoch": 5.62, + "learning_rate": 2.2171581769437e-05, + "loss": 0.0007, + "step": 2126 + }, + { + "epoch": 5.63, + "learning_rate": 2.2158176943699734e-05, + "loss": 0.0506, + "step": 2127 + }, + { + "epoch": 5.63, + "learning_rate": 2.214477211796247e-05, + "loss": 0.0016, + "step": 2128 + }, + { + "epoch": 5.63, + "learning_rate": 2.2131367292225204e-05, + "loss": 0.0005, + "step": 2129 + }, + { + "epoch": 5.63, + "learning_rate": 2.211796246648794e-05, + "loss": 0.0049, + "step": 2130 + }, + { + "epoch": 5.64, + "learning_rate": 2.210455764075067e-05, + "loss": 0.0305, + "step": 2131 + }, + { + "epoch": 5.64, + "learning_rate": 2.2091152815013406e-05, + "loss": 0.0448, + "step": 2132 + }, + { + "epoch": 5.64, + "learning_rate": 2.207774798927614e-05, + "loss": 0.5391, + "step": 2133 + }, + { + "epoch": 5.65, + "learning_rate": 2.2064343163538876e-05, + "loss": 0.0005, + "step": 2134 + }, + { + "epoch": 5.65, + "learning_rate": 2.205093833780161e-05, + "loss": 0.0141, + "step": 2135 + }, + { + "epoch": 5.65, + "learning_rate": 2.2037533512064346e-05, + "loss": 0.3613, + "step": 2136 + }, + { + "epoch": 5.65, + "learning_rate": 2.2024128686327078e-05, + "loss": 0.0013, + "step": 2137 + }, + { + "epoch": 5.66, + "learning_rate": 2.2010723860589813e-05, + "loss": 0.002, + "step": 2138 + }, + { + "epoch": 5.66, + "learning_rate": 2.199731903485255e-05, + "loss": 0.0207, + "step": 2139 + }, + { + "epoch": 5.66, + "learning_rate": 2.1983914209115284e-05, + "loss": 0.0004, + "step": 2140 + }, + { + "epoch": 5.66, + "learning_rate": 2.197050938337802e-05, + "loss": 0.0022, + "step": 2141 + }, + { + "epoch": 5.67, + "learning_rate": 2.195710455764075e-05, + "loss": 0.5076, + "step": 2142 + }, + { + "epoch": 5.67, + "learning_rate": 2.1943699731903486e-05, + "loss": 0.0016, + "step": 2143 + }, + { + "epoch": 5.67, + "learning_rate": 2.193029490616622e-05, + "loss": 0.0014, + "step": 2144 + }, + { + "epoch": 5.67, + "learning_rate": 2.1916890080428956e-05, + "loss": 0.0101, + "step": 2145 + }, + { + "epoch": 5.68, + "learning_rate": 2.190348525469169e-05, + "loss": 0.0048, + "step": 2146 + }, + { + "epoch": 5.68, + "learning_rate": 2.1890080428954423e-05, + "loss": 0.001, + "step": 2147 + }, + { + "epoch": 5.68, + "learning_rate": 2.1876675603217158e-05, + "loss": 0.0004, + "step": 2148 + }, + { + "epoch": 5.69, + "learning_rate": 2.1863270777479893e-05, + "loss": 0.2627, + "step": 2149 + }, + { + "epoch": 5.69, + "learning_rate": 2.1849865951742628e-05, + "loss": 0.0013, + "step": 2150 + }, + { + "epoch": 5.69, + "learning_rate": 2.1836461126005363e-05, + "loss": 0.0074, + "step": 2151 + }, + { + "epoch": 5.69, + "learning_rate": 2.1823056300268095e-05, + "loss": 0.0238, + "step": 2152 + }, + { + "epoch": 5.7, + "learning_rate": 2.180965147453083e-05, + "loss": 0.0013, + "step": 2153 + }, + { + "epoch": 5.7, + "learning_rate": 2.1796246648793565e-05, + "loss": 0.0005, + "step": 2154 + }, + { + "epoch": 5.7, + "learning_rate": 2.17828418230563e-05, + "loss": 0.0006, + "step": 2155 + }, + { + "epoch": 5.7, + "learning_rate": 2.1769436997319036e-05, + "loss": 0.0442, + "step": 2156 + }, + { + "epoch": 5.71, + "learning_rate": 2.175603217158177e-05, + "loss": 0.0007, + "step": 2157 + }, + { + "epoch": 5.71, + "learning_rate": 2.1742627345844506e-05, + "loss": 0.0032, + "step": 2158 + }, + { + "epoch": 5.71, + "learning_rate": 2.172922252010724e-05, + "loss": 0.005, + "step": 2159 + }, + { + "epoch": 5.71, + "learning_rate": 2.1715817694369976e-05, + "loss": 0.0005, + "step": 2160 + }, + { + "epoch": 5.72, + "learning_rate": 2.170241286863271e-05, + "loss": 0.0005, + "step": 2161 + }, + { + "epoch": 5.72, + "learning_rate": 2.1689008042895443e-05, + "loss": 0.2023, + "step": 2162 + }, + { + "epoch": 5.72, + "learning_rate": 2.1675603217158178e-05, + "loss": 0.3146, + "step": 2163 + }, + { + "epoch": 5.72, + "learning_rate": 2.1662198391420913e-05, + "loss": 0.0035, + "step": 2164 + }, + { + "epoch": 5.73, + "learning_rate": 2.164879356568365e-05, + "loss": 0.141, + "step": 2165 + }, + { + "epoch": 5.73, + "learning_rate": 2.1635388739946383e-05, + "loss": 0.0005, + "step": 2166 + }, + { + "epoch": 5.73, + "learning_rate": 2.1621983914209115e-05, + "loss": 0.0013, + "step": 2167 + }, + { + "epoch": 5.74, + "learning_rate": 2.160857908847185e-05, + "loss": 0.0484, + "step": 2168 + }, + { + "epoch": 5.74, + "learning_rate": 2.1595174262734585e-05, + "loss": 0.0109, + "step": 2169 + }, + { + "epoch": 5.74, + "learning_rate": 2.158176943699732e-05, + "loss": 0.3307, + "step": 2170 + }, + { + "epoch": 5.74, + "learning_rate": 2.1568364611260056e-05, + "loss": 0.0013, + "step": 2171 + }, + { + "epoch": 5.75, + "learning_rate": 2.1554959785522787e-05, + "loss": 0.0005, + "step": 2172 + }, + { + "epoch": 5.75, + "learning_rate": 2.1541554959785523e-05, + "loss": 0.0004, + "step": 2173 + }, + { + "epoch": 5.75, + "learning_rate": 2.1528150134048258e-05, + "loss": 0.0744, + "step": 2174 + }, + { + "epoch": 5.75, + "learning_rate": 2.1514745308310993e-05, + "loss": 0.0008, + "step": 2175 + }, + { + "epoch": 5.76, + "learning_rate": 2.1501340482573728e-05, + "loss": 0.0013, + "step": 2176 + }, + { + "epoch": 5.76, + "learning_rate": 2.148793565683646e-05, + "loss": 0.0006, + "step": 2177 + }, + { + "epoch": 5.76, + "learning_rate": 2.1474530831099195e-05, + "loss": 0.055, + "step": 2178 + }, + { + "epoch": 5.76, + "learning_rate": 2.146112600536193e-05, + "loss": 0.0013, + "step": 2179 + }, + { + "epoch": 5.77, + "learning_rate": 2.1447721179624665e-05, + "loss": 0.0022, + "step": 2180 + }, + { + "epoch": 5.77, + "learning_rate": 2.14343163538874e-05, + "loss": 0.0009, + "step": 2181 + }, + { + "epoch": 5.77, + "learning_rate": 2.1420911528150135e-05, + "loss": 0.0011, + "step": 2182 + }, + { + "epoch": 5.78, + "learning_rate": 2.140750670241287e-05, + "loss": 0.0676, + "step": 2183 + }, + { + "epoch": 5.78, + "learning_rate": 2.1394101876675606e-05, + "loss": 0.4583, + "step": 2184 + }, + { + "epoch": 5.78, + "learning_rate": 2.138069705093834e-05, + "loss": 0.0009, + "step": 2185 + }, + { + "epoch": 5.78, + "learning_rate": 2.1367292225201076e-05, + "loss": 0.002, + "step": 2186 + }, + { + "epoch": 5.79, + "learning_rate": 2.1353887399463808e-05, + "loss": 0.7625, + "step": 2187 + }, + { + "epoch": 5.79, + "learning_rate": 2.1340482573726543e-05, + "loss": 0.4912, + "step": 2188 + }, + { + "epoch": 5.79, + "learning_rate": 2.1327077747989278e-05, + "loss": 0.0283, + "step": 2189 + }, + { + "epoch": 5.79, + "learning_rate": 2.1313672922252013e-05, + "loss": 0.0011, + "step": 2190 + }, + { + "epoch": 5.8, + "learning_rate": 2.1300268096514748e-05, + "loss": 0.0013, + "step": 2191 + }, + { + "epoch": 5.8, + "learning_rate": 2.128686327077748e-05, + "loss": 0.3384, + "step": 2192 + }, + { + "epoch": 5.8, + "learning_rate": 2.1273458445040215e-05, + "loss": 0.4533, + "step": 2193 + }, + { + "epoch": 5.8, + "learning_rate": 2.126005361930295e-05, + "loss": 0.0039, + "step": 2194 + }, + { + "epoch": 5.81, + "learning_rate": 2.1246648793565685e-05, + "loss": 0.0038, + "step": 2195 + }, + { + "epoch": 5.81, + "learning_rate": 2.123324396782842e-05, + "loss": 0.0318, + "step": 2196 + }, + { + "epoch": 5.81, + "learning_rate": 2.1219839142091156e-05, + "loss": 0.0045, + "step": 2197 + }, + { + "epoch": 5.81, + "learning_rate": 2.1206434316353887e-05, + "loss": 0.3134, + "step": 2198 + }, + { + "epoch": 5.82, + "learning_rate": 2.1193029490616622e-05, + "loss": 0.0011, + "step": 2199 + }, + { + "epoch": 5.82, + "learning_rate": 2.1179624664879358e-05, + "loss": 0.0008, + "step": 2200 + }, + { + "epoch": 5.82, + "learning_rate": 2.1166219839142093e-05, + "loss": 0.0009, + "step": 2201 + }, + { + "epoch": 5.83, + "learning_rate": 2.1152815013404828e-05, + "loss": 0.0083, + "step": 2202 + }, + { + "epoch": 5.83, + "learning_rate": 2.113941018766756e-05, + "loss": 0.0196, + "step": 2203 + }, + { + "epoch": 5.83, + "learning_rate": 2.1126005361930295e-05, + "loss": 0.0063, + "step": 2204 + }, + { + "epoch": 5.83, + "learning_rate": 2.111260053619303e-05, + "loss": 0.0064, + "step": 2205 + }, + { + "epoch": 5.84, + "learning_rate": 2.1099195710455765e-05, + "loss": 0.0143, + "step": 2206 + }, + { + "epoch": 5.84, + "learning_rate": 2.10857908847185e-05, + "loss": 0.0012, + "step": 2207 + }, + { + "epoch": 5.84, + "learning_rate": 2.1072386058981232e-05, + "loss": 0.0033, + "step": 2208 + }, + { + "epoch": 5.84, + "learning_rate": 2.1058981233243967e-05, + "loss": 0.0014, + "step": 2209 + }, + { + "epoch": 5.85, + "learning_rate": 2.1045576407506702e-05, + "loss": 0.0219, + "step": 2210 + }, + { + "epoch": 5.85, + "learning_rate": 2.1032171581769437e-05, + "loss": 0.3033, + "step": 2211 + }, + { + "epoch": 5.85, + "learning_rate": 2.1018766756032172e-05, + "loss": 0.0711, + "step": 2212 + }, + { + "epoch": 5.85, + "learning_rate": 2.1005361930294907e-05, + "loss": 0.0051, + "step": 2213 + }, + { + "epoch": 5.86, + "learning_rate": 2.0991957104557643e-05, + "loss": 0.026, + "step": 2214 + }, + { + "epoch": 5.86, + "learning_rate": 2.0978552278820378e-05, + "loss": 0.0024, + "step": 2215 + }, + { + "epoch": 5.86, + "learning_rate": 2.0965147453083113e-05, + "loss": 0.3622, + "step": 2216 + }, + { + "epoch": 5.87, + "learning_rate": 2.0951742627345848e-05, + "loss": 0.0009, + "step": 2217 + }, + { + "epoch": 5.87, + "learning_rate": 2.093833780160858e-05, + "loss": 0.0019, + "step": 2218 + }, + { + "epoch": 5.87, + "learning_rate": 2.0924932975871315e-05, + "loss": 0.3412, + "step": 2219 + }, + { + "epoch": 5.87, + "learning_rate": 2.091152815013405e-05, + "loss": 0.0197, + "step": 2220 + }, + { + "epoch": 5.88, + "learning_rate": 2.0898123324396785e-05, + "loss": 0.4229, + "step": 2221 + }, + { + "epoch": 5.88, + "learning_rate": 2.088471849865952e-05, + "loss": 0.0014, + "step": 2222 + }, + { + "epoch": 5.88, + "learning_rate": 2.0871313672922252e-05, + "loss": 0.0183, + "step": 2223 + }, + { + "epoch": 5.88, + "learning_rate": 2.0857908847184987e-05, + "loss": 0.2005, + "step": 2224 + }, + { + "epoch": 5.89, + "learning_rate": 2.0844504021447722e-05, + "loss": 0.0122, + "step": 2225 + }, + { + "epoch": 5.89, + "learning_rate": 2.0831099195710457e-05, + "loss": 0.1178, + "step": 2226 + }, + { + "epoch": 5.89, + "learning_rate": 2.0817694369973193e-05, + "loss": 0.0105, + "step": 2227 + }, + { + "epoch": 5.89, + "learning_rate": 2.0804289544235924e-05, + "loss": 0.0328, + "step": 2228 + }, + { + "epoch": 5.9, + "learning_rate": 2.079088471849866e-05, + "loss": 0.0087, + "step": 2229 + }, + { + "epoch": 5.9, + "learning_rate": 2.0777479892761395e-05, + "loss": 0.0288, + "step": 2230 + }, + { + "epoch": 5.9, + "learning_rate": 2.076407506702413e-05, + "loss": 0.0017, + "step": 2231 + }, + { + "epoch": 5.9, + "learning_rate": 2.0750670241286865e-05, + "loss": 0.002, + "step": 2232 + }, + { + "epoch": 5.91, + "learning_rate": 2.0737265415549597e-05, + "loss": 0.0024, + "step": 2233 + }, + { + "epoch": 5.91, + "learning_rate": 2.072386058981233e-05, + "loss": 0.0008, + "step": 2234 + }, + { + "epoch": 5.91, + "learning_rate": 2.0710455764075067e-05, + "loss": 0.002, + "step": 2235 + }, + { + "epoch": 5.92, + "learning_rate": 2.0697050938337802e-05, + "loss": 0.0052, + "step": 2236 + }, + { + "epoch": 5.92, + "learning_rate": 2.0683646112600537e-05, + "loss": 0.0058, + "step": 2237 + }, + { + "epoch": 5.92, + "learning_rate": 2.0670241286863272e-05, + "loss": 0.0013, + "step": 2238 + }, + { + "epoch": 5.92, + "learning_rate": 2.0656836461126007e-05, + "loss": 0.0158, + "step": 2239 + }, + { + "epoch": 5.93, + "learning_rate": 2.0643431635388742e-05, + "loss": 0.229, + "step": 2240 + }, + { + "epoch": 5.93, + "learning_rate": 2.0630026809651478e-05, + "loss": 0.1844, + "step": 2241 + }, + { + "epoch": 5.93, + "learning_rate": 2.0616621983914213e-05, + "loss": 0.2905, + "step": 2242 + }, + { + "epoch": 5.93, + "learning_rate": 2.0603217158176944e-05, + "loss": 0.0059, + "step": 2243 + }, + { + "epoch": 5.94, + "learning_rate": 2.058981233243968e-05, + "loss": 0.0007, + "step": 2244 + }, + { + "epoch": 5.94, + "learning_rate": 2.0576407506702415e-05, + "loss": 0.1638, + "step": 2245 + }, + { + "epoch": 5.94, + "learning_rate": 2.056300268096515e-05, + "loss": 0.1195, + "step": 2246 + }, + { + "epoch": 5.94, + "learning_rate": 2.0549597855227885e-05, + "loss": 0.0015, + "step": 2247 + }, + { + "epoch": 5.95, + "learning_rate": 2.0536193029490617e-05, + "loss": 0.0013, + "step": 2248 + }, + { + "epoch": 5.95, + "learning_rate": 2.0522788203753352e-05, + "loss": 0.5152, + "step": 2249 + }, + { + "epoch": 5.95, + "learning_rate": 2.0509383378016087e-05, + "loss": 0.0315, + "step": 2250 + }, + { + "epoch": 5.96, + "learning_rate": 2.0495978552278822e-05, + "loss": 0.1213, + "step": 2251 + }, + { + "epoch": 5.96, + "learning_rate": 2.0482573726541557e-05, + "loss": 0.0006, + "step": 2252 + }, + { + "epoch": 5.96, + "learning_rate": 2.046916890080429e-05, + "loss": 0.0011, + "step": 2253 + }, + { + "epoch": 5.96, + "learning_rate": 2.0455764075067024e-05, + "loss": 0.2546, + "step": 2254 + }, + { + "epoch": 5.97, + "learning_rate": 2.044235924932976e-05, + "loss": 0.1259, + "step": 2255 + }, + { + "epoch": 5.97, + "learning_rate": 2.0428954423592494e-05, + "loss": 0.0179, + "step": 2256 + }, + { + "epoch": 5.97, + "learning_rate": 2.041554959785523e-05, + "loss": 0.0257, + "step": 2257 + }, + { + "epoch": 5.97, + "learning_rate": 2.0402144772117965e-05, + "loss": 0.0092, + "step": 2258 + }, + { + "epoch": 5.98, + "learning_rate": 2.0388739946380696e-05, + "loss": 0.3231, + "step": 2259 + }, + { + "epoch": 5.98, + "learning_rate": 2.037533512064343e-05, + "loss": 0.0084, + "step": 2260 + }, + { + "epoch": 5.98, + "learning_rate": 2.0361930294906167e-05, + "loss": 0.0517, + "step": 2261 + }, + { + "epoch": 5.98, + "learning_rate": 2.0348525469168902e-05, + "loss": 0.0012, + "step": 2262 + }, + { + "epoch": 5.99, + "learning_rate": 2.0335120643431637e-05, + "loss": 0.0045, + "step": 2263 + }, + { + "epoch": 5.99, + "learning_rate": 2.032171581769437e-05, + "loss": 0.0012, + "step": 2264 + }, + { + "epoch": 5.99, + "learning_rate": 2.0308310991957104e-05, + "loss": 0.0032, + "step": 2265 + }, + { + "epoch": 5.99, + "learning_rate": 2.029490616621984e-05, + "loss": 0.0038, + "step": 2266 + }, + { + "epoch": 6.0, + "learning_rate": 2.0281501340482574e-05, + "loss": 0.2731, + "step": 2267 + }, + { + "epoch": 6.0, + "learning_rate": 2.026809651474531e-05, + "loss": 0.3122, + "step": 2268 + }, + { + "epoch": 6.0, + "eval_f1": 0.7762762762762763, + "eval_loss": 1.0977023839950562, + "eval_runtime": 1.8624, + "eval_samples_per_second": 812.383, + "eval_steps_per_second": 51.009, + "step": 2268 + }, + { + "epoch": 6.0, + "learning_rate": 2.0254691689008044e-05, + "loss": 0.0035, + "step": 2269 + }, + { + "epoch": 6.01, + "learning_rate": 2.024128686327078e-05, + "loss": 0.0709, + "step": 2270 + }, + { + "epoch": 6.01, + "learning_rate": 2.0227882037533515e-05, + "loss": 0.1548, + "step": 2271 + }, + { + "epoch": 6.01, + "learning_rate": 2.021447721179625e-05, + "loss": 0.0035, + "step": 2272 + }, + { + "epoch": 6.01, + "learning_rate": 2.0201072386058985e-05, + "loss": 0.0009, + "step": 2273 + }, + { + "epoch": 6.02, + "learning_rate": 2.0187667560321717e-05, + "loss": 0.0061, + "step": 2274 + }, + { + "epoch": 6.02, + "learning_rate": 2.0174262734584452e-05, + "loss": 0.3586, + "step": 2275 + }, + { + "epoch": 6.02, + "learning_rate": 2.0160857908847187e-05, + "loss": 0.0019, + "step": 2276 + }, + { + "epoch": 6.02, + "learning_rate": 2.0147453083109922e-05, + "loss": 0.0077, + "step": 2277 + }, + { + "epoch": 6.03, + "learning_rate": 2.0134048257372657e-05, + "loss": 0.0022, + "step": 2278 + }, + { + "epoch": 6.03, + "learning_rate": 2.012064343163539e-05, + "loss": 0.0021, + "step": 2279 + }, + { + "epoch": 6.03, + "learning_rate": 2.0107238605898124e-05, + "loss": 0.0022, + "step": 2280 + }, + { + "epoch": 6.03, + "learning_rate": 2.009383378016086e-05, + "loss": 0.1387, + "step": 2281 + }, + { + "epoch": 6.04, + "learning_rate": 2.0080428954423594e-05, + "loss": 0.0034, + "step": 2282 + }, + { + "epoch": 6.04, + "learning_rate": 2.006702412868633e-05, + "loss": 0.0128, + "step": 2283 + }, + { + "epoch": 6.04, + "learning_rate": 2.005361930294906e-05, + "loss": 0.0201, + "step": 2284 + }, + { + "epoch": 6.04, + "learning_rate": 2.0040214477211796e-05, + "loss": 0.0072, + "step": 2285 + }, + { + "epoch": 6.05, + "learning_rate": 2.002680965147453e-05, + "loss": 0.0031, + "step": 2286 + }, + { + "epoch": 6.05, + "learning_rate": 2.0013404825737267e-05, + "loss": 0.0273, + "step": 2287 + }, + { + "epoch": 6.05, + "learning_rate": 2e-05, + "loss": 0.0679, + "step": 2288 + }, + { + "epoch": 6.06, + "learning_rate": 1.9986595174262733e-05, + "loss": 0.0012, + "step": 2289 + }, + { + "epoch": 6.06, + "learning_rate": 1.997319034852547e-05, + "loss": 0.0045, + "step": 2290 + }, + { + "epoch": 6.06, + "learning_rate": 1.9959785522788204e-05, + "loss": 0.0289, + "step": 2291 + }, + { + "epoch": 6.06, + "learning_rate": 1.994638069705094e-05, + "loss": 0.1421, + "step": 2292 + }, + { + "epoch": 6.07, + "learning_rate": 1.9932975871313674e-05, + "loss": 0.0007, + "step": 2293 + }, + { + "epoch": 6.07, + "learning_rate": 1.9919571045576406e-05, + "loss": 0.0037, + "step": 2294 + }, + { + "epoch": 6.07, + "learning_rate": 1.990616621983914e-05, + "loss": 0.1566, + "step": 2295 + }, + { + "epoch": 6.07, + "learning_rate": 1.9892761394101876e-05, + "loss": 0.0008, + "step": 2296 + }, + { + "epoch": 6.08, + "learning_rate": 1.987935656836461e-05, + "loss": 0.0008, + "step": 2297 + }, + { + "epoch": 6.08, + "learning_rate": 1.9865951742627346e-05, + "loss": 0.1319, + "step": 2298 + }, + { + "epoch": 6.08, + "learning_rate": 1.985254691689008e-05, + "loss": 0.0007, + "step": 2299 + }, + { + "epoch": 6.08, + "learning_rate": 1.9839142091152816e-05, + "loss": 0.0048, + "step": 2300 + }, + { + "epoch": 6.09, + "learning_rate": 1.982573726541555e-05, + "loss": 0.223, + "step": 2301 + }, + { + "epoch": 6.09, + "learning_rate": 1.9812332439678287e-05, + "loss": 0.2188, + "step": 2302 + }, + { + "epoch": 6.09, + "learning_rate": 1.9798927613941022e-05, + "loss": 0.0209, + "step": 2303 + }, + { + "epoch": 6.1, + "learning_rate": 1.9785522788203754e-05, + "loss": 0.001, + "step": 2304 + }, + { + "epoch": 6.1, + "learning_rate": 1.977211796246649e-05, + "loss": 0.0006, + "step": 2305 + }, + { + "epoch": 6.1, + "learning_rate": 1.9758713136729224e-05, + "loss": 0.0011, + "step": 2306 + }, + { + "epoch": 6.1, + "learning_rate": 1.974530831099196e-05, + "loss": 0.0008, + "step": 2307 + }, + { + "epoch": 6.11, + "learning_rate": 1.9731903485254694e-05, + "loss": 0.0015, + "step": 2308 + }, + { + "epoch": 6.11, + "learning_rate": 1.9718498659517426e-05, + "loss": 0.0024, + "step": 2309 + }, + { + "epoch": 6.11, + "learning_rate": 1.970509383378016e-05, + "loss": 0.0006, + "step": 2310 + }, + { + "epoch": 6.11, + "learning_rate": 1.9691689008042896e-05, + "loss": 0.0007, + "step": 2311 + }, + { + "epoch": 6.12, + "learning_rate": 1.967828418230563e-05, + "loss": 0.0026, + "step": 2312 + }, + { + "epoch": 6.12, + "learning_rate": 1.9664879356568366e-05, + "loss": 0.0019, + "step": 2313 + }, + { + "epoch": 6.12, + "learning_rate": 1.9651474530831098e-05, + "loss": 0.0055, + "step": 2314 + }, + { + "epoch": 6.12, + "learning_rate": 1.9638069705093833e-05, + "loss": 0.0023, + "step": 2315 + }, + { + "epoch": 6.13, + "learning_rate": 1.962466487935657e-05, + "loss": 0.0011, + "step": 2316 + }, + { + "epoch": 6.13, + "learning_rate": 1.9611260053619303e-05, + "loss": 0.0407, + "step": 2317 + }, + { + "epoch": 6.13, + "learning_rate": 1.959785522788204e-05, + "loss": 0.046, + "step": 2318 + }, + { + "epoch": 6.13, + "learning_rate": 1.958445040214477e-05, + "loss": 0.0062, + "step": 2319 + }, + { + "epoch": 6.14, + "learning_rate": 1.9571045576407505e-05, + "loss": 0.0007, + "step": 2320 + }, + { + "epoch": 6.14, + "learning_rate": 1.955764075067024e-05, + "loss": 0.3377, + "step": 2321 + }, + { + "epoch": 6.14, + "learning_rate": 1.9544235924932976e-05, + "loss": 0.4294, + "step": 2322 + }, + { + "epoch": 6.15, + "learning_rate": 1.953083109919571e-05, + "loss": 0.0016, + "step": 2323 + }, + { + "epoch": 6.15, + "learning_rate": 1.9517426273458446e-05, + "loss": 0.0283, + "step": 2324 + }, + { + "epoch": 6.15, + "learning_rate": 1.950402144772118e-05, + "loss": 0.0005, + "step": 2325 + }, + { + "epoch": 6.15, + "learning_rate": 1.9490616621983916e-05, + "loss": 0.0011, + "step": 2326 + }, + { + "epoch": 6.16, + "learning_rate": 1.947721179624665e-05, + "loss": 0.0237, + "step": 2327 + }, + { + "epoch": 6.16, + "learning_rate": 1.9463806970509387e-05, + "loss": 0.0581, + "step": 2328 + }, + { + "epoch": 6.16, + "learning_rate": 1.945040214477212e-05, + "loss": 0.0905, + "step": 2329 + }, + { + "epoch": 6.16, + "learning_rate": 1.9436997319034853e-05, + "loss": 0.0012, + "step": 2330 + }, + { + "epoch": 6.17, + "learning_rate": 1.942359249329759e-05, + "loss": 0.0053, + "step": 2331 + }, + { + "epoch": 6.17, + "learning_rate": 1.9410187667560324e-05, + "loss": 0.0225, + "step": 2332 + }, + { + "epoch": 6.17, + "learning_rate": 1.939678284182306e-05, + "loss": 0.0374, + "step": 2333 + }, + { + "epoch": 6.17, + "learning_rate": 1.9383378016085794e-05, + "loss": 0.0006, + "step": 2334 + }, + { + "epoch": 6.18, + "learning_rate": 1.9369973190348526e-05, + "loss": 0.0008, + "step": 2335 + }, + { + "epoch": 6.18, + "learning_rate": 1.935656836461126e-05, + "loss": 0.0006, + "step": 2336 + }, + { + "epoch": 6.18, + "learning_rate": 1.9343163538873996e-05, + "loss": 0.0011, + "step": 2337 + }, + { + "epoch": 6.19, + "learning_rate": 1.932975871313673e-05, + "loss": 0.0014, + "step": 2338 + }, + { + "epoch": 6.19, + "learning_rate": 1.9316353887399466e-05, + "loss": 0.0006, + "step": 2339 + }, + { + "epoch": 6.19, + "learning_rate": 1.9302949061662198e-05, + "loss": 0.0504, + "step": 2340 + }, + { + "epoch": 6.19, + "learning_rate": 1.9289544235924933e-05, + "loss": 0.005, + "step": 2341 + }, + { + "epoch": 6.2, + "learning_rate": 1.9276139410187668e-05, + "loss": 0.2673, + "step": 2342 + }, + { + "epoch": 6.2, + "learning_rate": 1.9262734584450403e-05, + "loss": 0.2173, + "step": 2343 + }, + { + "epoch": 6.2, + "learning_rate": 1.924932975871314e-05, + "loss": 0.0235, + "step": 2344 + }, + { + "epoch": 6.2, + "learning_rate": 1.923592493297587e-05, + "loss": 0.0092, + "step": 2345 + }, + { + "epoch": 6.21, + "learning_rate": 1.9222520107238605e-05, + "loss": 0.0088, + "step": 2346 + }, + { + "epoch": 6.21, + "learning_rate": 1.920911528150134e-05, + "loss": 0.0506, + "step": 2347 + }, + { + "epoch": 6.21, + "learning_rate": 1.9195710455764076e-05, + "loss": 0.0033, + "step": 2348 + }, + { + "epoch": 6.21, + "learning_rate": 1.918230563002681e-05, + "loss": 0.0006, + "step": 2349 + }, + { + "epoch": 6.22, + "learning_rate": 1.9168900804289542e-05, + "loss": 0.0021, + "step": 2350 + }, + { + "epoch": 6.22, + "learning_rate": 1.9155495978552278e-05, + "loss": 0.0006, + "step": 2351 + }, + { + "epoch": 6.22, + "learning_rate": 1.9142091152815013e-05, + "loss": 0.0041, + "step": 2352 + }, + { + "epoch": 6.22, + "learning_rate": 1.9128686327077748e-05, + "loss": 0.0209, + "step": 2353 + }, + { + "epoch": 6.23, + "learning_rate": 1.9115281501340483e-05, + "loss": 0.0145, + "step": 2354 + }, + { + "epoch": 6.23, + "learning_rate": 1.9101876675603218e-05, + "loss": 0.0303, + "step": 2355 + }, + { + "epoch": 6.23, + "learning_rate": 1.9088471849865953e-05, + "loss": 0.0951, + "step": 2356 + }, + { + "epoch": 6.24, + "learning_rate": 1.907506702412869e-05, + "loss": 0.042, + "step": 2357 + }, + { + "epoch": 6.24, + "learning_rate": 1.9061662198391424e-05, + "loss": 0.0009, + "step": 2358 + }, + { + "epoch": 6.24, + "learning_rate": 1.904825737265416e-05, + "loss": 0.0006, + "step": 2359 + }, + { + "epoch": 6.24, + "learning_rate": 1.903485254691689e-05, + "loss": 0.0057, + "step": 2360 + }, + { + "epoch": 6.25, + "learning_rate": 1.9021447721179626e-05, + "loss": 0.0578, + "step": 2361 + }, + { + "epoch": 6.25, + "learning_rate": 1.900804289544236e-05, + "loss": 0.3295, + "step": 2362 + }, + { + "epoch": 6.25, + "learning_rate": 1.8994638069705096e-05, + "loss": 0.0005, + "step": 2363 + }, + { + "epoch": 6.25, + "learning_rate": 1.898123324396783e-05, + "loss": 0.0009, + "step": 2364 + }, + { + "epoch": 6.26, + "learning_rate": 1.8967828418230563e-05, + "loss": 0.0036, + "step": 2365 + }, + { + "epoch": 6.26, + "learning_rate": 1.8954423592493298e-05, + "loss": 0.0029, + "step": 2366 + }, + { + "epoch": 6.26, + "learning_rate": 1.8941018766756033e-05, + "loss": 0.1468, + "step": 2367 + }, + { + "epoch": 6.26, + "learning_rate": 1.8927613941018768e-05, + "loss": 0.3163, + "step": 2368 + }, + { + "epoch": 6.27, + "learning_rate": 1.8914209115281503e-05, + "loss": 0.169, + "step": 2369 + }, + { + "epoch": 6.27, + "learning_rate": 1.8900804289544235e-05, + "loss": 0.0493, + "step": 2370 + }, + { + "epoch": 6.27, + "learning_rate": 1.888739946380697e-05, + "loss": 0.0003, + "step": 2371 + }, + { + "epoch": 6.28, + "learning_rate": 1.8873994638069705e-05, + "loss": 0.0015, + "step": 2372 + }, + { + "epoch": 6.28, + "learning_rate": 1.886058981233244e-05, + "loss": 0.258, + "step": 2373 + }, + { + "epoch": 6.28, + "learning_rate": 1.8847184986595175e-05, + "loss": 0.002, + "step": 2374 + }, + { + "epoch": 6.28, + "learning_rate": 1.8833780160857907e-05, + "loss": 0.0008, + "step": 2375 + }, + { + "epoch": 6.29, + "learning_rate": 1.8820375335120642e-05, + "loss": 0.0003, + "step": 2376 + }, + { + "epoch": 6.29, + "learning_rate": 1.8806970509383377e-05, + "loss": 0.0014, + "step": 2377 + }, + { + "epoch": 6.29, + "learning_rate": 1.8793565683646113e-05, + "loss": 0.0068, + "step": 2378 + }, + { + "epoch": 6.29, + "learning_rate": 1.8780160857908848e-05, + "loss": 0.39, + "step": 2379 + }, + { + "epoch": 6.3, + "learning_rate": 1.8766756032171583e-05, + "loss": 0.0046, + "step": 2380 + }, + { + "epoch": 6.3, + "learning_rate": 1.8753351206434318e-05, + "loss": 0.0008, + "step": 2381 + }, + { + "epoch": 6.3, + "learning_rate": 1.8739946380697053e-05, + "loss": 0.0007, + "step": 2382 + }, + { + "epoch": 6.3, + "learning_rate": 1.8726541554959788e-05, + "loss": 0.0013, + "step": 2383 + }, + { + "epoch": 6.31, + "learning_rate": 1.8713136729222523e-05, + "loss": 0.0055, + "step": 2384 + }, + { + "epoch": 6.31, + "learning_rate": 1.869973190348526e-05, + "loss": 0.0014, + "step": 2385 + }, + { + "epoch": 6.31, + "learning_rate": 1.868632707774799e-05, + "loss": 0.0039, + "step": 2386 + }, + { + "epoch": 6.31, + "learning_rate": 1.8672922252010725e-05, + "loss": 0.0995, + "step": 2387 + }, + { + "epoch": 6.32, + "learning_rate": 1.865951742627346e-05, + "loss": 0.0015, + "step": 2388 + }, + { + "epoch": 6.32, + "learning_rate": 1.8646112600536196e-05, + "loss": 0.083, + "step": 2389 + }, + { + "epoch": 6.32, + "learning_rate": 1.863270777479893e-05, + "loss": 0.0044, + "step": 2390 + }, + { + "epoch": 6.33, + "learning_rate": 1.8619302949061662e-05, + "loss": 0.0105, + "step": 2391 + }, + { + "epoch": 6.33, + "learning_rate": 1.8605898123324398e-05, + "loss": 0.0005, + "step": 2392 + }, + { + "epoch": 6.33, + "learning_rate": 1.8592493297587133e-05, + "loss": 0.0014, + "step": 2393 + }, + { + "epoch": 6.33, + "learning_rate": 1.8579088471849868e-05, + "loss": 0.0114, + "step": 2394 + }, + { + "epoch": 6.34, + "learning_rate": 1.8565683646112603e-05, + "loss": 0.0334, + "step": 2395 + }, + { + "epoch": 6.34, + "learning_rate": 1.8552278820375335e-05, + "loss": 0.006, + "step": 2396 + }, + { + "epoch": 6.34, + "learning_rate": 1.853887399463807e-05, + "loss": 0.3124, + "step": 2397 + }, + { + "epoch": 6.34, + "learning_rate": 1.8525469168900805e-05, + "loss": 0.2324, + "step": 2398 + }, + { + "epoch": 6.35, + "learning_rate": 1.851206434316354e-05, + "loss": 0.0889, + "step": 2399 + }, + { + "epoch": 6.35, + "learning_rate": 1.8498659517426275e-05, + "loss": 0.0705, + "step": 2400 + }, + { + "epoch": 6.35, + "learning_rate": 1.8485254691689007e-05, + "loss": 0.0012, + "step": 2401 + }, + { + "epoch": 6.35, + "learning_rate": 1.8471849865951742e-05, + "loss": 0.0033, + "step": 2402 + }, + { + "epoch": 6.36, + "learning_rate": 1.8458445040214477e-05, + "loss": 0.0021, + "step": 2403 + }, + { + "epoch": 6.36, + "learning_rate": 1.8445040214477212e-05, + "loss": 0.0005, + "step": 2404 + }, + { + "epoch": 6.36, + "learning_rate": 1.8431635388739948e-05, + "loss": 0.0011, + "step": 2405 + }, + { + "epoch": 6.37, + "learning_rate": 1.841823056300268e-05, + "loss": 0.0006, + "step": 2406 + }, + { + "epoch": 6.37, + "learning_rate": 1.8404825737265414e-05, + "loss": 0.0726, + "step": 2407 + }, + { + "epoch": 6.37, + "learning_rate": 1.839142091152815e-05, + "loss": 0.0009, + "step": 2408 + }, + { + "epoch": 6.37, + "learning_rate": 1.8378016085790885e-05, + "loss": 0.0007, + "step": 2409 + }, + { + "epoch": 6.38, + "learning_rate": 1.836461126005362e-05, + "loss": 0.0859, + "step": 2410 + }, + { + "epoch": 6.38, + "learning_rate": 1.8351206434316355e-05, + "loss": 0.0011, + "step": 2411 + }, + { + "epoch": 6.38, + "learning_rate": 1.833780160857909e-05, + "loss": 0.6542, + "step": 2412 + }, + { + "epoch": 6.38, + "learning_rate": 1.8324396782841825e-05, + "loss": 0.2733, + "step": 2413 + }, + { + "epoch": 6.39, + "learning_rate": 1.831099195710456e-05, + "loss": 0.2825, + "step": 2414 + }, + { + "epoch": 6.39, + "learning_rate": 1.8297587131367295e-05, + "loss": 0.0012, + "step": 2415 + }, + { + "epoch": 6.39, + "learning_rate": 1.8284182305630027e-05, + "loss": 0.1404, + "step": 2416 + }, + { + "epoch": 6.39, + "learning_rate": 1.8270777479892762e-05, + "loss": 0.0006, + "step": 2417 + }, + { + "epoch": 6.4, + "learning_rate": 1.8257372654155497e-05, + "loss": 0.0007, + "step": 2418 + }, + { + "epoch": 6.4, + "learning_rate": 1.8243967828418233e-05, + "loss": 0.1429, + "step": 2419 + }, + { + "epoch": 6.4, + "learning_rate": 1.8230563002680968e-05, + "loss": 0.0008, + "step": 2420 + }, + { + "epoch": 6.4, + "learning_rate": 1.82171581769437e-05, + "loss": 0.0062, + "step": 2421 + }, + { + "epoch": 6.41, + "learning_rate": 1.8203753351206435e-05, + "loss": 0.0071, + "step": 2422 + }, + { + "epoch": 6.41, + "learning_rate": 1.819034852546917e-05, + "loss": 0.0017, + "step": 2423 + }, + { + "epoch": 6.41, + "learning_rate": 1.8176943699731905e-05, + "loss": 0.084, + "step": 2424 + }, + { + "epoch": 6.42, + "learning_rate": 1.816353887399464e-05, + "loss": 0.0011, + "step": 2425 + }, + { + "epoch": 6.42, + "learning_rate": 1.8150134048257372e-05, + "loss": 0.0255, + "step": 2426 + }, + { + "epoch": 6.42, + "learning_rate": 1.8136729222520107e-05, + "loss": 0.0009, + "step": 2427 + }, + { + "epoch": 6.42, + "learning_rate": 1.8123324396782842e-05, + "loss": 0.3105, + "step": 2428 + }, + { + "epoch": 6.43, + "learning_rate": 1.8109919571045577e-05, + "loss": 0.0046, + "step": 2429 + }, + { + "epoch": 6.43, + "learning_rate": 1.8096514745308312e-05, + "loss": 0.0089, + "step": 2430 + }, + { + "epoch": 6.43, + "learning_rate": 1.8083109919571044e-05, + "loss": 0.1176, + "step": 2431 + }, + { + "epoch": 6.43, + "learning_rate": 1.806970509383378e-05, + "loss": 0.0235, + "step": 2432 + }, + { + "epoch": 6.44, + "learning_rate": 1.8056300268096514e-05, + "loss": 0.029, + "step": 2433 + }, + { + "epoch": 6.44, + "learning_rate": 1.804289544235925e-05, + "loss": 0.0013, + "step": 2434 + }, + { + "epoch": 6.44, + "learning_rate": 1.8029490616621985e-05, + "loss": 0.0075, + "step": 2435 + }, + { + "epoch": 6.44, + "learning_rate": 1.801608579088472e-05, + "loss": 0.1744, + "step": 2436 + }, + { + "epoch": 6.45, + "learning_rate": 1.8002680965147455e-05, + "loss": 0.0017, + "step": 2437 + }, + { + "epoch": 6.45, + "learning_rate": 1.798927613941019e-05, + "loss": 0.0188, + "step": 2438 + }, + { + "epoch": 6.45, + "learning_rate": 1.7975871313672925e-05, + "loss": 0.0232, + "step": 2439 + }, + { + "epoch": 6.46, + "learning_rate": 1.796246648793566e-05, + "loss": 0.1459, + "step": 2440 + }, + { + "epoch": 6.46, + "learning_rate": 1.7949061662198392e-05, + "loss": 0.0007, + "step": 2441 + }, + { + "epoch": 6.46, + "learning_rate": 1.7935656836461127e-05, + "loss": 0.0005, + "step": 2442 + }, + { + "epoch": 6.46, + "learning_rate": 1.7922252010723862e-05, + "loss": 0.0012, + "step": 2443 + }, + { + "epoch": 6.47, + "learning_rate": 1.7908847184986597e-05, + "loss": 0.0041, + "step": 2444 + }, + { + "epoch": 6.47, + "learning_rate": 1.7895442359249332e-05, + "loss": 0.4884, + "step": 2445 + }, + { + "epoch": 6.47, + "learning_rate": 1.7882037533512068e-05, + "loss": 0.0017, + "step": 2446 + }, + { + "epoch": 6.47, + "learning_rate": 1.78686327077748e-05, + "loss": 0.0566, + "step": 2447 + }, + { + "epoch": 6.48, + "learning_rate": 1.7855227882037534e-05, + "loss": 0.012, + "step": 2448 + }, + { + "epoch": 6.48, + "learning_rate": 1.784182305630027e-05, + "loss": 0.001, + "step": 2449 + }, + { + "epoch": 6.48, + "learning_rate": 1.7828418230563005e-05, + "loss": 0.0028, + "step": 2450 + }, + { + "epoch": 6.48, + "learning_rate": 1.781501340482574e-05, + "loss": 0.4622, + "step": 2451 + }, + { + "epoch": 6.49, + "learning_rate": 1.780160857908847e-05, + "loss": 0.0042, + "step": 2452 + }, + { + "epoch": 6.49, + "learning_rate": 1.7788203753351207e-05, + "loss": 0.0176, + "step": 2453 + }, + { + "epoch": 6.49, + "learning_rate": 1.7774798927613942e-05, + "loss": 0.0012, + "step": 2454 + }, + { + "epoch": 6.49, + "learning_rate": 1.7761394101876677e-05, + "loss": 0.0344, + "step": 2455 + }, + { + "epoch": 6.5, + "learning_rate": 1.7747989276139412e-05, + "loss": 0.1278, + "step": 2456 + }, + { + "epoch": 6.5, + "learning_rate": 1.7734584450402144e-05, + "loss": 0.0017, + "step": 2457 + }, + { + "epoch": 6.5, + "learning_rate": 1.772117962466488e-05, + "loss": 0.0044, + "step": 2458 + }, + { + "epoch": 6.51, + "learning_rate": 1.7707774798927614e-05, + "loss": 0.0016, + "step": 2459 + }, + { + "epoch": 6.51, + "learning_rate": 1.769436997319035e-05, + "loss": 0.0799, + "step": 2460 + }, + { + "epoch": 6.51, + "learning_rate": 1.7680965147453084e-05, + "loss": 0.0066, + "step": 2461 + }, + { + "epoch": 6.51, + "learning_rate": 1.7667560321715816e-05, + "loss": 0.1607, + "step": 2462 + }, + { + "epoch": 6.52, + "learning_rate": 1.765415549597855e-05, + "loss": 0.0742, + "step": 2463 + }, + { + "epoch": 6.52, + "learning_rate": 1.7640750670241286e-05, + "loss": 0.0005, + "step": 2464 + }, + { + "epoch": 6.52, + "learning_rate": 1.762734584450402e-05, + "loss": 0.0006, + "step": 2465 + }, + { + "epoch": 6.52, + "learning_rate": 1.7613941018766757e-05, + "loss": 0.0019, + "step": 2466 + }, + { + "epoch": 6.53, + "learning_rate": 1.7600536193029492e-05, + "loss": 0.0009, + "step": 2467 + }, + { + "epoch": 6.53, + "learning_rate": 1.7587131367292227e-05, + "loss": 0.0023, + "step": 2468 + }, + { + "epoch": 6.53, + "learning_rate": 1.7573726541554962e-05, + "loss": 0.0202, + "step": 2469 + }, + { + "epoch": 6.53, + "learning_rate": 1.7560321715817697e-05, + "loss": 0.0223, + "step": 2470 + }, + { + "epoch": 6.54, + "learning_rate": 1.7546916890080432e-05, + "loss": 0.0009, + "step": 2471 + }, + { + "epoch": 6.54, + "learning_rate": 1.7533512064343164e-05, + "loss": 0.0014, + "step": 2472 + }, + { + "epoch": 6.54, + "learning_rate": 1.75201072386059e-05, + "loss": 0.0514, + "step": 2473 + }, + { + "epoch": 6.54, + "learning_rate": 1.7506702412868634e-05, + "loss": 0.0013, + "step": 2474 + }, + { + "epoch": 6.55, + "learning_rate": 1.749329758713137e-05, + "loss": 0.0087, + "step": 2475 + }, + { + "epoch": 6.55, + "learning_rate": 1.7479892761394105e-05, + "loss": 0.0035, + "step": 2476 + }, + { + "epoch": 6.55, + "learning_rate": 1.7466487935656836e-05, + "loss": 0.0397, + "step": 2477 + }, + { + "epoch": 6.56, + "learning_rate": 1.745308310991957e-05, + "loss": 0.0021, + "step": 2478 + }, + { + "epoch": 6.56, + "learning_rate": 1.7439678284182307e-05, + "loss": 0.052, + "step": 2479 + }, + { + "epoch": 6.56, + "learning_rate": 1.742627345844504e-05, + "loss": 0.0027, + "step": 2480 + }, + { + "epoch": 6.56, + "learning_rate": 1.7412868632707777e-05, + "loss": 0.001, + "step": 2481 + }, + { + "epoch": 6.57, + "learning_rate": 1.739946380697051e-05, + "loss": 0.2899, + "step": 2482 + }, + { + "epoch": 6.57, + "learning_rate": 1.7386058981233244e-05, + "loss": 0.0007, + "step": 2483 + }, + { + "epoch": 6.57, + "learning_rate": 1.737265415549598e-05, + "loss": 0.0704, + "step": 2484 + }, + { + "epoch": 6.57, + "learning_rate": 1.7359249329758714e-05, + "loss": 0.001, + "step": 2485 + }, + { + "epoch": 6.58, + "learning_rate": 1.734584450402145e-05, + "loss": 0.057, + "step": 2486 + }, + { + "epoch": 6.58, + "learning_rate": 1.733243967828418e-05, + "loss": 0.0002, + "step": 2487 + }, + { + "epoch": 6.58, + "learning_rate": 1.7319034852546916e-05, + "loss": 0.0064, + "step": 2488 + }, + { + "epoch": 6.58, + "learning_rate": 1.730563002680965e-05, + "loss": 0.0638, + "step": 2489 + }, + { + "epoch": 6.59, + "learning_rate": 1.7292225201072386e-05, + "loss": 0.0006, + "step": 2490 + }, + { + "epoch": 6.59, + "learning_rate": 1.727882037533512e-05, + "loss": 0.0142, + "step": 2491 + }, + { + "epoch": 6.59, + "learning_rate": 1.7265415549597856e-05, + "loss": 0.0015, + "step": 2492 + }, + { + "epoch": 6.6, + "learning_rate": 1.725201072386059e-05, + "loss": 0.0949, + "step": 2493 + }, + { + "epoch": 6.6, + "learning_rate": 1.7238605898123327e-05, + "loss": 0.0004, + "step": 2494 + }, + { + "epoch": 6.6, + "learning_rate": 1.7225201072386062e-05, + "loss": 0.0111, + "step": 2495 + }, + { + "epoch": 6.6, + "learning_rate": 1.7211796246648797e-05, + "loss": 0.0215, + "step": 2496 + }, + { + "epoch": 6.61, + "learning_rate": 1.719839142091153e-05, + "loss": 0.266, + "step": 2497 + }, + { + "epoch": 6.61, + "learning_rate": 1.7184986595174264e-05, + "loss": 0.4487, + "step": 2498 + }, + { + "epoch": 6.61, + "learning_rate": 1.7171581769437e-05, + "loss": 0.0021, + "step": 2499 + }, + { + "epoch": 6.61, + "learning_rate": 1.7158176943699734e-05, + "loss": 0.0004, + "step": 2500 + }, + { + "epoch": 6.62, + "learning_rate": 1.714477211796247e-05, + "loss": 0.0004, + "step": 2501 + }, + { + "epoch": 6.62, + "learning_rate": 1.71313672922252e-05, + "loss": 0.0011, + "step": 2502 + }, + { + "epoch": 6.62, + "learning_rate": 1.7117962466487936e-05, + "loss": 0.0006, + "step": 2503 + }, + { + "epoch": 6.62, + "learning_rate": 1.710455764075067e-05, + "loss": 0.1005, + "step": 2504 + }, + { + "epoch": 6.63, + "learning_rate": 1.7091152815013406e-05, + "loss": 0.0472, + "step": 2505 + }, + { + "epoch": 6.63, + "learning_rate": 1.707774798927614e-05, + "loss": 0.0004, + "step": 2506 + }, + { + "epoch": 6.63, + "learning_rate": 1.7064343163538877e-05, + "loss": 0.0162, + "step": 2507 + }, + { + "epoch": 6.63, + "learning_rate": 1.705093833780161e-05, + "loss": 0.004, + "step": 2508 + }, + { + "epoch": 6.64, + "learning_rate": 1.7037533512064344e-05, + "loss": 0.0007, + "step": 2509 + }, + { + "epoch": 6.64, + "learning_rate": 1.702412868632708e-05, + "loss": 0.1447, + "step": 2510 + }, + { + "epoch": 6.64, + "learning_rate": 1.7010723860589814e-05, + "loss": 0.0006, + "step": 2511 + }, + { + "epoch": 6.65, + "learning_rate": 1.699731903485255e-05, + "loss": 0.0002, + "step": 2512 + }, + { + "epoch": 6.65, + "learning_rate": 1.698391420911528e-05, + "loss": 0.0004, + "step": 2513 + }, + { + "epoch": 6.65, + "learning_rate": 1.6970509383378016e-05, + "loss": 0.0017, + "step": 2514 + }, + { + "epoch": 6.65, + "learning_rate": 1.695710455764075e-05, + "loss": 0.4581, + "step": 2515 + }, + { + "epoch": 6.66, + "learning_rate": 1.6943699731903486e-05, + "loss": 0.0005, + "step": 2516 + }, + { + "epoch": 6.66, + "learning_rate": 1.693029490616622e-05, + "loss": 0.0043, + "step": 2517 + }, + { + "epoch": 6.66, + "learning_rate": 1.6916890080428953e-05, + "loss": 0.0005, + "step": 2518 + }, + { + "epoch": 6.66, + "learning_rate": 1.6903485254691688e-05, + "loss": 0.0002, + "step": 2519 + }, + { + "epoch": 6.67, + "learning_rate": 1.6890080428954423e-05, + "loss": 0.0005, + "step": 2520 + }, + { + "epoch": 6.67, + "learning_rate": 1.687667560321716e-05, + "loss": 0.0037, + "step": 2521 + }, + { + "epoch": 6.67, + "learning_rate": 1.6863270777479893e-05, + "loss": 0.0003, + "step": 2522 + }, + { + "epoch": 6.67, + "learning_rate": 1.684986595174263e-05, + "loss": 0.0019, + "step": 2523 + }, + { + "epoch": 6.68, + "learning_rate": 1.6836461126005364e-05, + "loss": 0.0023, + "step": 2524 + }, + { + "epoch": 6.68, + "learning_rate": 1.68230563002681e-05, + "loss": 0.0004, + "step": 2525 + }, + { + "epoch": 6.68, + "learning_rate": 1.6809651474530834e-05, + "loss": 0.3317, + "step": 2526 + }, + { + "epoch": 6.69, + "learning_rate": 1.679624664879357e-05, + "loss": 0.0004, + "step": 2527 + }, + { + "epoch": 6.69, + "learning_rate": 1.67828418230563e-05, + "loss": 0.002, + "step": 2528 + }, + { + "epoch": 6.69, + "learning_rate": 1.6769436997319036e-05, + "loss": 0.0003, + "step": 2529 + }, + { + "epoch": 6.69, + "learning_rate": 1.675603217158177e-05, + "loss": 0.0007, + "step": 2530 + }, + { + "epoch": 6.7, + "learning_rate": 1.6742627345844506e-05, + "loss": 0.0012, + "step": 2531 + }, + { + "epoch": 6.7, + "learning_rate": 1.672922252010724e-05, + "loss": 0.024, + "step": 2532 + }, + { + "epoch": 6.7, + "learning_rate": 1.6715817694369973e-05, + "loss": 0.0041, + "step": 2533 + }, + { + "epoch": 6.7, + "learning_rate": 1.6702412868632708e-05, + "loss": 0.1821, + "step": 2534 + }, + { + "epoch": 6.71, + "learning_rate": 1.6689008042895443e-05, + "loss": 0.0004, + "step": 2535 + }, + { + "epoch": 6.71, + "learning_rate": 1.667560321715818e-05, + "loss": 0.0293, + "step": 2536 + }, + { + "epoch": 6.71, + "learning_rate": 1.6662198391420914e-05, + "loss": 0.0005, + "step": 2537 + }, + { + "epoch": 6.71, + "learning_rate": 1.6648793565683645e-05, + "loss": 0.0043, + "step": 2538 + }, + { + "epoch": 6.72, + "learning_rate": 1.663538873994638e-05, + "loss": 0.0112, + "step": 2539 + }, + { + "epoch": 6.72, + "learning_rate": 1.6621983914209116e-05, + "loss": 0.0003, + "step": 2540 + }, + { + "epoch": 6.72, + "learning_rate": 1.660857908847185e-05, + "loss": 0.0005, + "step": 2541 + }, + { + "epoch": 6.72, + "learning_rate": 1.6595174262734586e-05, + "loss": 0.4617, + "step": 2542 + }, + { + "epoch": 6.73, + "learning_rate": 1.6581769436997318e-05, + "loss": 0.0004, + "step": 2543 + }, + { + "epoch": 6.73, + "learning_rate": 1.6568364611260053e-05, + "loss": 0.1932, + "step": 2544 + }, + { + "epoch": 6.73, + "learning_rate": 1.6554959785522788e-05, + "loss": 0.0004, + "step": 2545 + }, + { + "epoch": 6.74, + "learning_rate": 1.6541554959785523e-05, + "loss": 0.0785, + "step": 2546 + }, + { + "epoch": 6.74, + "learning_rate": 1.6528150134048258e-05, + "loss": 0.0882, + "step": 2547 + }, + { + "epoch": 6.74, + "learning_rate": 1.651474530831099e-05, + "loss": 0.3937, + "step": 2548 + }, + { + "epoch": 6.74, + "learning_rate": 1.6501340482573725e-05, + "loss": 0.3401, + "step": 2549 + }, + { + "epoch": 6.75, + "learning_rate": 1.648793565683646e-05, + "loss": 0.026, + "step": 2550 + }, + { + "epoch": 6.75, + "learning_rate": 1.6474530831099195e-05, + "loss": 0.1959, + "step": 2551 + }, + { + "epoch": 6.75, + "learning_rate": 1.646112600536193e-05, + "loss": 0.0022, + "step": 2552 + }, + { + "epoch": 6.75, + "learning_rate": 1.6447721179624666e-05, + "loss": 0.0012, + "step": 2553 + }, + { + "epoch": 6.76, + "learning_rate": 1.64343163538874e-05, + "loss": 0.0064, + "step": 2554 + }, + { + "epoch": 6.76, + "learning_rate": 1.6420911528150136e-05, + "loss": 0.0105, + "step": 2555 + }, + { + "epoch": 6.76, + "learning_rate": 1.640750670241287e-05, + "loss": 0.0008, + "step": 2556 + }, + { + "epoch": 6.76, + "learning_rate": 1.6394101876675606e-05, + "loss": 0.0339, + "step": 2557 + }, + { + "epoch": 6.77, + "learning_rate": 1.6380697050938338e-05, + "loss": 0.1458, + "step": 2558 + }, + { + "epoch": 6.77, + "learning_rate": 1.6367292225201073e-05, + "loss": 0.2526, + "step": 2559 + }, + { + "epoch": 6.77, + "learning_rate": 1.6353887399463808e-05, + "loss": 0.038, + "step": 2560 + }, + { + "epoch": 6.78, + "learning_rate": 1.6340482573726543e-05, + "loss": 0.174, + "step": 2561 + }, + { + "epoch": 6.78, + "learning_rate": 1.632707774798928e-05, + "loss": 0.1936, + "step": 2562 + }, + { + "epoch": 6.78, + "learning_rate": 1.631367292225201e-05, + "loss": 0.0014, + "step": 2563 + }, + { + "epoch": 6.78, + "learning_rate": 1.6300268096514745e-05, + "loss": 0.0008, + "step": 2564 + }, + { + "epoch": 6.79, + "learning_rate": 1.628686327077748e-05, + "loss": 0.0616, + "step": 2565 + }, + { + "epoch": 6.79, + "learning_rate": 1.6273458445040215e-05, + "loss": 0.0029, + "step": 2566 + }, + { + "epoch": 6.79, + "learning_rate": 1.626005361930295e-05, + "loss": 0.1301, + "step": 2567 + }, + { + "epoch": 6.79, + "learning_rate": 1.6246648793565686e-05, + "loss": 0.0076, + "step": 2568 + }, + { + "epoch": 6.8, + "learning_rate": 1.6233243967828417e-05, + "loss": 0.038, + "step": 2569 + }, + { + "epoch": 6.8, + "learning_rate": 1.6219839142091153e-05, + "loss": 0.0376, + "step": 2570 + }, + { + "epoch": 6.8, + "learning_rate": 1.6206434316353888e-05, + "loss": 0.0007, + "step": 2571 + }, + { + "epoch": 6.8, + "learning_rate": 1.6193029490616623e-05, + "loss": 0.016, + "step": 2572 + }, + { + "epoch": 6.81, + "learning_rate": 1.6179624664879358e-05, + "loss": 0.0005, + "step": 2573 + }, + { + "epoch": 6.81, + "learning_rate": 1.616621983914209e-05, + "loss": 0.0024, + "step": 2574 + }, + { + "epoch": 6.81, + "learning_rate": 1.6152815013404825e-05, + "loss": 0.001, + "step": 2575 + }, + { + "epoch": 6.81, + "learning_rate": 1.613941018766756e-05, + "loss": 0.0009, + "step": 2576 + }, + { + "epoch": 6.82, + "learning_rate": 1.6126005361930295e-05, + "loss": 0.1889, + "step": 2577 + }, + { + "epoch": 6.82, + "learning_rate": 1.611260053619303e-05, + "loss": 0.5094, + "step": 2578 + }, + { + "epoch": 6.82, + "learning_rate": 1.6099195710455765e-05, + "loss": 0.017, + "step": 2579 + }, + { + "epoch": 6.83, + "learning_rate": 1.60857908847185e-05, + "loss": 0.029, + "step": 2580 + }, + { + "epoch": 6.83, + "learning_rate": 1.6072386058981236e-05, + "loss": 0.1249, + "step": 2581 + }, + { + "epoch": 6.83, + "learning_rate": 1.605898123324397e-05, + "loss": 0.2531, + "step": 2582 + }, + { + "epoch": 6.83, + "learning_rate": 1.6045576407506706e-05, + "loss": 0.0006, + "step": 2583 + }, + { + "epoch": 6.84, + "learning_rate": 1.6032171581769438e-05, + "loss": 0.0624, + "step": 2584 + }, + { + "epoch": 6.84, + "learning_rate": 1.6018766756032173e-05, + "loss": 0.0254, + "step": 2585 + }, + { + "epoch": 6.84, + "learning_rate": 1.6005361930294908e-05, + "loss": 0.0034, + "step": 2586 + }, + { + "epoch": 6.84, + "learning_rate": 1.5991957104557643e-05, + "loss": 0.0204, + "step": 2587 + }, + { + "epoch": 6.85, + "learning_rate": 1.5978552278820378e-05, + "loss": 0.002, + "step": 2588 + }, + { + "epoch": 6.85, + "learning_rate": 1.596514745308311e-05, + "loss": 0.0015, + "step": 2589 + }, + { + "epoch": 6.85, + "learning_rate": 1.5951742627345845e-05, + "loss": 0.0465, + "step": 2590 + }, + { + "epoch": 6.85, + "learning_rate": 1.593833780160858e-05, + "loss": 0.1892, + "step": 2591 + }, + { + "epoch": 6.86, + "learning_rate": 1.5924932975871315e-05, + "loss": 0.0932, + "step": 2592 + }, + { + "epoch": 6.86, + "learning_rate": 1.591152815013405e-05, + "loss": 0.0015, + "step": 2593 + }, + { + "epoch": 6.86, + "learning_rate": 1.5898123324396782e-05, + "loss": 0.0062, + "step": 2594 + }, + { + "epoch": 6.87, + "learning_rate": 1.5884718498659517e-05, + "loss": 0.0731, + "step": 2595 + }, + { + "epoch": 6.87, + "learning_rate": 1.5871313672922252e-05, + "loss": 0.002, + "step": 2596 + }, + { + "epoch": 6.87, + "learning_rate": 1.5857908847184988e-05, + "loss": 0.0484, + "step": 2597 + }, + { + "epoch": 6.87, + "learning_rate": 1.5844504021447723e-05, + "loss": 0.0082, + "step": 2598 + }, + { + "epoch": 6.88, + "learning_rate": 1.5831099195710454e-05, + "loss": 0.0213, + "step": 2599 + }, + { + "epoch": 6.88, + "learning_rate": 1.581769436997319e-05, + "loss": 0.1612, + "step": 2600 + }, + { + "epoch": 6.88, + "learning_rate": 1.5804289544235925e-05, + "loss": 0.184, + "step": 2601 + }, + { + "epoch": 6.88, + "learning_rate": 1.579088471849866e-05, + "loss": 0.1413, + "step": 2602 + }, + { + "epoch": 6.89, + "learning_rate": 1.5777479892761395e-05, + "loss": 0.0019, + "step": 2603 + }, + { + "epoch": 6.89, + "learning_rate": 1.5764075067024127e-05, + "loss": 0.0047, + "step": 2604 + }, + { + "epoch": 6.89, + "learning_rate": 1.5750670241286862e-05, + "loss": 0.0409, + "step": 2605 + }, + { + "epoch": 6.89, + "learning_rate": 1.5737265415549597e-05, + "loss": 0.0379, + "step": 2606 + }, + { + "epoch": 6.9, + "learning_rate": 1.5723860589812332e-05, + "loss": 0.0005, + "step": 2607 + }, + { + "epoch": 6.9, + "learning_rate": 1.5710455764075067e-05, + "loss": 0.0332, + "step": 2608 + }, + { + "epoch": 6.9, + "learning_rate": 1.5697050938337802e-05, + "loss": 0.0543, + "step": 2609 + }, + { + "epoch": 6.9, + "learning_rate": 1.5683646112600538e-05, + "loss": 0.0009, + "step": 2610 + }, + { + "epoch": 6.91, + "learning_rate": 1.5670241286863273e-05, + "loss": 0.016, + "step": 2611 + }, + { + "epoch": 6.91, + "learning_rate": 1.5656836461126008e-05, + "loss": 0.0035, + "step": 2612 + }, + { + "epoch": 6.91, + "learning_rate": 1.5643431635388743e-05, + "loss": 0.0713, + "step": 2613 + }, + { + "epoch": 6.92, + "learning_rate": 1.5630026809651475e-05, + "loss": 0.0022, + "step": 2614 + }, + { + "epoch": 6.92, + "learning_rate": 1.561662198391421e-05, + "loss": 0.0005, + "step": 2615 + }, + { + "epoch": 6.92, + "learning_rate": 1.5603217158176945e-05, + "loss": 0.0009, + "step": 2616 + }, + { + "epoch": 6.92, + "learning_rate": 1.558981233243968e-05, + "loss": 0.0016, + "step": 2617 + }, + { + "epoch": 6.93, + "learning_rate": 1.5576407506702415e-05, + "loss": 0.0017, + "step": 2618 + }, + { + "epoch": 6.93, + "learning_rate": 1.5563002680965147e-05, + "loss": 0.0094, + "step": 2619 + }, + { + "epoch": 6.93, + "learning_rate": 1.5549597855227882e-05, + "loss": 0.016, + "step": 2620 + }, + { + "epoch": 6.93, + "learning_rate": 1.5536193029490617e-05, + "loss": 0.0005, + "step": 2621 + }, + { + "epoch": 6.94, + "learning_rate": 1.5522788203753352e-05, + "loss": 0.0549, + "step": 2622 + }, + { + "epoch": 6.94, + "learning_rate": 1.5509383378016087e-05, + "loss": 0.3791, + "step": 2623 + }, + { + "epoch": 6.94, + "learning_rate": 1.549597855227882e-05, + "loss": 0.0003, + "step": 2624 + }, + { + "epoch": 6.94, + "learning_rate": 1.5482573726541554e-05, + "loss": 0.0774, + "step": 2625 + }, + { + "epoch": 6.95, + "learning_rate": 1.546916890080429e-05, + "loss": 0.0879, + "step": 2626 + }, + { + "epoch": 6.95, + "learning_rate": 1.5455764075067025e-05, + "loss": 0.0007, + "step": 2627 + }, + { + "epoch": 6.95, + "learning_rate": 1.544235924932976e-05, + "loss": 0.0047, + "step": 2628 + }, + { + "epoch": 6.96, + "learning_rate": 1.542895442359249e-05, + "loss": 0.0011, + "step": 2629 + }, + { + "epoch": 6.96, + "learning_rate": 1.5415549597855227e-05, + "loss": 0.0004, + "step": 2630 + }, + { + "epoch": 6.96, + "learning_rate": 1.5402144772117962e-05, + "loss": 0.4962, + "step": 2631 + }, + { + "epoch": 6.96, + "learning_rate": 1.5388739946380697e-05, + "loss": 0.1182, + "step": 2632 + }, + { + "epoch": 6.97, + "learning_rate": 1.5375335120643432e-05, + "loss": 0.0269, + "step": 2633 + }, + { + "epoch": 6.97, + "learning_rate": 1.5361930294906167e-05, + "loss": 0.0157, + "step": 2634 + }, + { + "epoch": 6.97, + "learning_rate": 1.5348525469168902e-05, + "loss": 0.0022, + "step": 2635 + }, + { + "epoch": 6.97, + "learning_rate": 1.5335120643431637e-05, + "loss": 0.3299, + "step": 2636 + }, + { + "epoch": 6.98, + "learning_rate": 1.5321715817694372e-05, + "loss": 0.0529, + "step": 2637 + }, + { + "epoch": 6.98, + "learning_rate": 1.5308310991957108e-05, + "loss": 0.1396, + "step": 2638 + }, + { + "epoch": 6.98, + "learning_rate": 1.5294906166219843e-05, + "loss": 0.0008, + "step": 2639 + }, + { + "epoch": 6.98, + "learning_rate": 1.5281501340482574e-05, + "loss": 0.0086, + "step": 2640 + }, + { + "epoch": 6.99, + "learning_rate": 1.526809651474531e-05, + "loss": 0.0036, + "step": 2641 + }, + { + "epoch": 6.99, + "learning_rate": 1.5254691689008043e-05, + "loss": 0.0149, + "step": 2642 + }, + { + "epoch": 6.99, + "learning_rate": 1.5241286863270778e-05, + "loss": 0.0011, + "step": 2643 + }, + { + "epoch": 6.99, + "learning_rate": 1.5227882037533513e-05, + "loss": 0.0003, + "step": 2644 + }, + { + "epoch": 7.0, + "learning_rate": 1.5214477211796247e-05, + "loss": 0.0064, + "step": 2645 + }, + { + "epoch": 7.0, + "learning_rate": 1.5201072386058982e-05, + "loss": 0.0281, + "step": 2646 + }, + { + "epoch": 7.0, + "eval_f1": 0.7856000000000002, + "eval_loss": 1.1071351766586304, + "eval_runtime": 1.8613, + "eval_samples_per_second": 812.89, + "eval_steps_per_second": 51.041, + "step": 2646 + }, + { + "epoch": 7.0, + "learning_rate": 1.5187667560321717e-05, + "loss": 0.0049, + "step": 2647 + }, + { + "epoch": 7.01, + "learning_rate": 1.5174262734584452e-05, + "loss": 0.001, + "step": 2648 + }, + { + "epoch": 7.01, + "learning_rate": 1.5160857908847187e-05, + "loss": 0.0004, + "step": 2649 + }, + { + "epoch": 7.01, + "learning_rate": 1.5147453083109919e-05, + "loss": 0.022, + "step": 2650 + }, + { + "epoch": 7.01, + "learning_rate": 1.5134048257372654e-05, + "loss": 0.0126, + "step": 2651 + }, + { + "epoch": 7.02, + "learning_rate": 1.512064343163539e-05, + "loss": 0.0006, + "step": 2652 + }, + { + "epoch": 7.02, + "learning_rate": 1.5107238605898124e-05, + "loss": 0.0182, + "step": 2653 + }, + { + "epoch": 7.02, + "learning_rate": 1.509383378016086e-05, + "loss": 0.0004, + "step": 2654 + }, + { + "epoch": 7.02, + "learning_rate": 1.5080428954423593e-05, + "loss": 0.2763, + "step": 2655 + }, + { + "epoch": 7.03, + "learning_rate": 1.5067024128686328e-05, + "loss": 0.0025, + "step": 2656 + }, + { + "epoch": 7.03, + "learning_rate": 1.5053619302949063e-05, + "loss": 0.0102, + "step": 2657 + }, + { + "epoch": 7.03, + "learning_rate": 1.5040214477211798e-05, + "loss": 0.008, + "step": 2658 + }, + { + "epoch": 7.03, + "learning_rate": 1.5026809651474534e-05, + "loss": 0.0005, + "step": 2659 + }, + { + "epoch": 7.04, + "learning_rate": 1.5013404825737265e-05, + "loss": 0.0006, + "step": 2660 + }, + { + "epoch": 7.04, + "learning_rate": 1.5e-05, + "loss": 0.0003, + "step": 2661 + }, + { + "epoch": 7.04, + "learning_rate": 1.4986595174262736e-05, + "loss": 0.0013, + "step": 2662 + }, + { + "epoch": 7.04, + "learning_rate": 1.497319034852547e-05, + "loss": 0.0033, + "step": 2663 + }, + { + "epoch": 7.05, + "learning_rate": 1.4959785522788206e-05, + "loss": 0.0004, + "step": 2664 + }, + { + "epoch": 7.05, + "learning_rate": 1.4946380697050938e-05, + "loss": 0.0347, + "step": 2665 + }, + { + "epoch": 7.05, + "learning_rate": 1.4932975871313673e-05, + "loss": 0.0208, + "step": 2666 + }, + { + "epoch": 7.06, + "learning_rate": 1.4919571045576408e-05, + "loss": 0.3783, + "step": 2667 + }, + { + "epoch": 7.06, + "learning_rate": 1.4906166219839143e-05, + "loss": 0.0005, + "step": 2668 + }, + { + "epoch": 7.06, + "learning_rate": 1.4892761394101878e-05, + "loss": 0.2023, + "step": 2669 + }, + { + "epoch": 7.06, + "learning_rate": 1.4879356568364611e-05, + "loss": 0.0007, + "step": 2670 + }, + { + "epoch": 7.07, + "learning_rate": 1.4865951742627347e-05, + "loss": 0.0014, + "step": 2671 + }, + { + "epoch": 7.07, + "learning_rate": 1.4852546916890082e-05, + "loss": 0.0018, + "step": 2672 + }, + { + "epoch": 7.07, + "learning_rate": 1.4839142091152817e-05, + "loss": 0.0004, + "step": 2673 + }, + { + "epoch": 7.07, + "learning_rate": 1.4825737265415552e-05, + "loss": 0.0005, + "step": 2674 + }, + { + "epoch": 7.08, + "learning_rate": 1.4812332439678284e-05, + "loss": 0.0235, + "step": 2675 + }, + { + "epoch": 7.08, + "learning_rate": 1.4798927613941019e-05, + "loss": 0.0116, + "step": 2676 + }, + { + "epoch": 7.08, + "learning_rate": 1.4785522788203754e-05, + "loss": 0.0005, + "step": 2677 + }, + { + "epoch": 7.08, + "learning_rate": 1.4772117962466489e-05, + "loss": 0.0011, + "step": 2678 + }, + { + "epoch": 7.09, + "learning_rate": 1.4758713136729224e-05, + "loss": 0.2959, + "step": 2679 + }, + { + "epoch": 7.09, + "learning_rate": 1.4745308310991956e-05, + "loss": 0.2646, + "step": 2680 + }, + { + "epoch": 7.09, + "learning_rate": 1.4731903485254691e-05, + "loss": 0.1224, + "step": 2681 + }, + { + "epoch": 7.1, + "learning_rate": 1.4718498659517426e-05, + "loss": 0.0419, + "step": 2682 + }, + { + "epoch": 7.1, + "learning_rate": 1.4705093833780161e-05, + "loss": 0.0218, + "step": 2683 + }, + { + "epoch": 7.1, + "learning_rate": 1.4691689008042897e-05, + "loss": 0.0003, + "step": 2684 + }, + { + "epoch": 7.1, + "learning_rate": 1.467828418230563e-05, + "loss": 0.0005, + "step": 2685 + }, + { + "epoch": 7.11, + "learning_rate": 1.4664879356568365e-05, + "loss": 0.2352, + "step": 2686 + }, + { + "epoch": 7.11, + "learning_rate": 1.46514745308311e-05, + "loss": 0.0004, + "step": 2687 + }, + { + "epoch": 7.11, + "learning_rate": 1.4638069705093835e-05, + "loss": 0.0004, + "step": 2688 + }, + { + "epoch": 7.11, + "learning_rate": 1.462466487935657e-05, + "loss": 0.1069, + "step": 2689 + }, + { + "epoch": 7.12, + "learning_rate": 1.4611260053619302e-05, + "loss": 0.008, + "step": 2690 + }, + { + "epoch": 7.12, + "learning_rate": 1.4597855227882037e-05, + "loss": 0.0007, + "step": 2691 + }, + { + "epoch": 7.12, + "learning_rate": 1.4584450402144772e-05, + "loss": 0.002, + "step": 2692 + }, + { + "epoch": 7.12, + "learning_rate": 1.4571045576407508e-05, + "loss": 0.0029, + "step": 2693 + }, + { + "epoch": 7.13, + "learning_rate": 1.4557640750670243e-05, + "loss": 0.0221, + "step": 2694 + }, + { + "epoch": 7.13, + "learning_rate": 1.4544235924932978e-05, + "loss": 0.0085, + "step": 2695 + }, + { + "epoch": 7.13, + "learning_rate": 1.453083109919571e-05, + "loss": 0.0009, + "step": 2696 + }, + { + "epoch": 7.13, + "learning_rate": 1.4517426273458445e-05, + "loss": 0.0018, + "step": 2697 + }, + { + "epoch": 7.14, + "learning_rate": 1.450402144772118e-05, + "loss": 0.001, + "step": 2698 + }, + { + "epoch": 7.14, + "learning_rate": 1.4490616621983915e-05, + "loss": 0.0021, + "step": 2699 + }, + { + "epoch": 7.14, + "learning_rate": 1.447721179624665e-05, + "loss": 0.0225, + "step": 2700 + }, + { + "epoch": 7.15, + "learning_rate": 1.4463806970509384e-05, + "loss": 0.0005, + "step": 2701 + }, + { + "epoch": 7.15, + "learning_rate": 1.4450402144772119e-05, + "loss": 0.0057, + "step": 2702 + }, + { + "epoch": 7.15, + "learning_rate": 1.4436997319034854e-05, + "loss": 0.0422, + "step": 2703 + }, + { + "epoch": 7.15, + "learning_rate": 1.4423592493297589e-05, + "loss": 0.0028, + "step": 2704 + }, + { + "epoch": 7.16, + "learning_rate": 1.4410187667560324e-05, + "loss": 0.0231, + "step": 2705 + }, + { + "epoch": 7.16, + "learning_rate": 1.4396782841823056e-05, + "loss": 0.1236, + "step": 2706 + }, + { + "epoch": 7.16, + "learning_rate": 1.4383378016085791e-05, + "loss": 0.0004, + "step": 2707 + }, + { + "epoch": 7.16, + "learning_rate": 1.4369973190348526e-05, + "loss": 0.0019, + "step": 2708 + }, + { + "epoch": 7.17, + "learning_rate": 1.4356568364611261e-05, + "loss": 0.0029, + "step": 2709 + }, + { + "epoch": 7.17, + "learning_rate": 1.4343163538873996e-05, + "loss": 0.0005, + "step": 2710 + }, + { + "epoch": 7.17, + "learning_rate": 1.4329758713136728e-05, + "loss": 0.0665, + "step": 2711 + }, + { + "epoch": 7.17, + "learning_rate": 1.4316353887399463e-05, + "loss": 0.0005, + "step": 2712 + }, + { + "epoch": 7.18, + "learning_rate": 1.4302949061662198e-05, + "loss": 0.0107, + "step": 2713 + }, + { + "epoch": 7.18, + "learning_rate": 1.4289544235924934e-05, + "loss": 0.0005, + "step": 2714 + }, + { + "epoch": 7.18, + "learning_rate": 1.4276139410187669e-05, + "loss": 0.1983, + "step": 2715 + }, + { + "epoch": 7.19, + "learning_rate": 1.4262734584450402e-05, + "loss": 0.0016, + "step": 2716 + }, + { + "epoch": 7.19, + "learning_rate": 1.4249329758713137e-05, + "loss": 0.0003, + "step": 2717 + }, + { + "epoch": 7.19, + "learning_rate": 1.4235924932975872e-05, + "loss": 0.0247, + "step": 2718 + }, + { + "epoch": 7.19, + "learning_rate": 1.4222520107238607e-05, + "loss": 0.0079, + "step": 2719 + }, + { + "epoch": 7.2, + "learning_rate": 1.4209115281501343e-05, + "loss": 0.0012, + "step": 2720 + }, + { + "epoch": 7.2, + "learning_rate": 1.4195710455764074e-05, + "loss": 0.0004, + "step": 2721 + }, + { + "epoch": 7.2, + "learning_rate": 1.418230563002681e-05, + "loss": 0.0004, + "step": 2722 + }, + { + "epoch": 7.2, + "learning_rate": 1.4168900804289545e-05, + "loss": 0.0051, + "step": 2723 + }, + { + "epoch": 7.21, + "learning_rate": 1.415549597855228e-05, + "loss": 0.0006, + "step": 2724 + }, + { + "epoch": 7.21, + "learning_rate": 1.4142091152815015e-05, + "loss": 0.0043, + "step": 2725 + }, + { + "epoch": 7.21, + "learning_rate": 1.4128686327077748e-05, + "loss": 0.004, + "step": 2726 + }, + { + "epoch": 7.21, + "learning_rate": 1.4115281501340483e-05, + "loss": 0.2211, + "step": 2727 + }, + { + "epoch": 7.22, + "learning_rate": 1.4101876675603219e-05, + "loss": 0.0003, + "step": 2728 + }, + { + "epoch": 7.22, + "learning_rate": 1.4088471849865954e-05, + "loss": 0.0004, + "step": 2729 + }, + { + "epoch": 7.22, + "learning_rate": 1.4075067024128689e-05, + "loss": 0.2051, + "step": 2730 + }, + { + "epoch": 7.22, + "learning_rate": 1.406166219839142e-05, + "loss": 0.0003, + "step": 2731 + }, + { + "epoch": 7.23, + "learning_rate": 1.4048257372654156e-05, + "loss": 0.0014, + "step": 2732 + }, + { + "epoch": 7.23, + "learning_rate": 1.403485254691689e-05, + "loss": 0.0007, + "step": 2733 + }, + { + "epoch": 7.23, + "learning_rate": 1.4021447721179626e-05, + "loss": 0.0068, + "step": 2734 + }, + { + "epoch": 7.24, + "learning_rate": 1.4008042895442361e-05, + "loss": 0.137, + "step": 2735 + }, + { + "epoch": 7.24, + "learning_rate": 1.3994638069705093e-05, + "loss": 0.0005, + "step": 2736 + }, + { + "epoch": 7.24, + "learning_rate": 1.3981233243967828e-05, + "loss": 0.0006, + "step": 2737 + }, + { + "epoch": 7.24, + "learning_rate": 1.3967828418230563e-05, + "loss": 0.0206, + "step": 2738 + }, + { + "epoch": 7.25, + "learning_rate": 1.3954423592493298e-05, + "loss": 0.1488, + "step": 2739 + }, + { + "epoch": 7.25, + "learning_rate": 1.3941018766756033e-05, + "loss": 0.0054, + "step": 2740 + }, + { + "epoch": 7.25, + "learning_rate": 1.3927613941018767e-05, + "loss": 0.0269, + "step": 2741 + }, + { + "epoch": 7.25, + "learning_rate": 1.3914209115281502e-05, + "loss": 0.0006, + "step": 2742 + }, + { + "epoch": 7.26, + "learning_rate": 1.3900804289544237e-05, + "loss": 0.0003, + "step": 2743 + }, + { + "epoch": 7.26, + "learning_rate": 1.3887399463806972e-05, + "loss": 0.0004, + "step": 2744 + }, + { + "epoch": 7.26, + "learning_rate": 1.3873994638069707e-05, + "loss": 0.0003, + "step": 2745 + }, + { + "epoch": 7.26, + "learning_rate": 1.3860589812332439e-05, + "loss": 0.0027, + "step": 2746 + }, + { + "epoch": 7.27, + "learning_rate": 1.3847184986595174e-05, + "loss": 0.0006, + "step": 2747 + }, + { + "epoch": 7.27, + "learning_rate": 1.383378016085791e-05, + "loss": 0.0012, + "step": 2748 + }, + { + "epoch": 7.27, + "learning_rate": 1.3820375335120644e-05, + "loss": 0.0522, + "step": 2749 + }, + { + "epoch": 7.28, + "learning_rate": 1.380697050938338e-05, + "loss": 0.0126, + "step": 2750 + }, + { + "epoch": 7.28, + "learning_rate": 1.3793565683646111e-05, + "loss": 0.0083, + "step": 2751 + }, + { + "epoch": 7.28, + "learning_rate": 1.3780160857908846e-05, + "loss": 0.074, + "step": 2752 + }, + { + "epoch": 7.28, + "learning_rate": 1.3766756032171582e-05, + "loss": 0.0002, + "step": 2753 + }, + { + "epoch": 7.29, + "learning_rate": 1.3753351206434317e-05, + "loss": 0.1009, + "step": 2754 + }, + { + "epoch": 7.29, + "learning_rate": 1.3739946380697052e-05, + "loss": 0.0021, + "step": 2755 + }, + { + "epoch": 7.29, + "learning_rate": 1.3726541554959787e-05, + "loss": 0.0082, + "step": 2756 + }, + { + "epoch": 7.29, + "learning_rate": 1.371313672922252e-05, + "loss": 0.0004, + "step": 2757 + }, + { + "epoch": 7.3, + "learning_rate": 1.3699731903485256e-05, + "loss": 0.0006, + "step": 2758 + }, + { + "epoch": 7.3, + "learning_rate": 1.368632707774799e-05, + "loss": 0.0173, + "step": 2759 + }, + { + "epoch": 7.3, + "learning_rate": 1.3672922252010726e-05, + "loss": 0.0147, + "step": 2760 + }, + { + "epoch": 7.3, + "learning_rate": 1.3659517426273461e-05, + "loss": 0.1293, + "step": 2761 + }, + { + "epoch": 7.31, + "learning_rate": 1.3646112600536193e-05, + "loss": 0.2566, + "step": 2762 + }, + { + "epoch": 7.31, + "learning_rate": 1.3632707774798928e-05, + "loss": 0.0026, + "step": 2763 + }, + { + "epoch": 7.31, + "learning_rate": 1.3619302949061663e-05, + "loss": 0.0031, + "step": 2764 + }, + { + "epoch": 7.31, + "learning_rate": 1.3605898123324398e-05, + "loss": 0.0029, + "step": 2765 + }, + { + "epoch": 7.32, + "learning_rate": 1.3592493297587133e-05, + "loss": 0.0005, + "step": 2766 + }, + { + "epoch": 7.32, + "learning_rate": 1.3579088471849865e-05, + "loss": 0.0004, + "step": 2767 + }, + { + "epoch": 7.32, + "learning_rate": 1.35656836461126e-05, + "loss": 0.0294, + "step": 2768 + }, + { + "epoch": 7.33, + "learning_rate": 1.3552278820375335e-05, + "loss": 0.0011, + "step": 2769 + }, + { + "epoch": 7.33, + "learning_rate": 1.353887399463807e-05, + "loss": 0.009, + "step": 2770 + }, + { + "epoch": 7.33, + "learning_rate": 1.3525469168900805e-05, + "loss": 0.0003, + "step": 2771 + }, + { + "epoch": 7.33, + "learning_rate": 1.3512064343163539e-05, + "loss": 0.0003, + "step": 2772 + }, + { + "epoch": 7.34, + "learning_rate": 1.3498659517426274e-05, + "loss": 0.0002, + "step": 2773 + }, + { + "epoch": 7.34, + "learning_rate": 1.348525469168901e-05, + "loss": 0.0002, + "step": 2774 + }, + { + "epoch": 7.34, + "learning_rate": 1.3471849865951744e-05, + "loss": 0.1261, + "step": 2775 + }, + { + "epoch": 7.34, + "learning_rate": 1.345844504021448e-05, + "loss": 0.0006, + "step": 2776 + }, + { + "epoch": 7.35, + "learning_rate": 1.3445040214477211e-05, + "loss": 0.0006, + "step": 2777 + }, + { + "epoch": 7.35, + "learning_rate": 1.3431635388739946e-05, + "loss": 0.0003, + "step": 2778 + }, + { + "epoch": 7.35, + "learning_rate": 1.3418230563002681e-05, + "loss": 0.0754, + "step": 2779 + }, + { + "epoch": 7.35, + "learning_rate": 1.3404825737265417e-05, + "loss": 0.0002, + "step": 2780 + }, + { + "epoch": 7.36, + "learning_rate": 1.3391420911528152e-05, + "loss": 0.0007, + "step": 2781 + }, + { + "epoch": 7.36, + "learning_rate": 1.3378016085790885e-05, + "loss": 0.0004, + "step": 2782 + }, + { + "epoch": 7.36, + "learning_rate": 1.336461126005362e-05, + "loss": 0.001, + "step": 2783 + }, + { + "epoch": 7.37, + "learning_rate": 1.3351206434316355e-05, + "loss": 0.0006, + "step": 2784 + }, + { + "epoch": 7.37, + "learning_rate": 1.333780160857909e-05, + "loss": 0.0227, + "step": 2785 + }, + { + "epoch": 7.37, + "learning_rate": 1.3324396782841826e-05, + "loss": 0.0002, + "step": 2786 + }, + { + "epoch": 7.37, + "learning_rate": 1.3310991957104557e-05, + "loss": 0.0002, + "step": 2787 + }, + { + "epoch": 7.38, + "learning_rate": 1.3297587131367293e-05, + "loss": 0.1036, + "step": 2788 + }, + { + "epoch": 7.38, + "learning_rate": 1.3284182305630028e-05, + "loss": 0.0014, + "step": 2789 + }, + { + "epoch": 7.38, + "learning_rate": 1.3270777479892763e-05, + "loss": 0.35, + "step": 2790 + }, + { + "epoch": 7.38, + "learning_rate": 1.3257372654155498e-05, + "loss": 0.0003, + "step": 2791 + }, + { + "epoch": 7.39, + "learning_rate": 1.324396782841823e-05, + "loss": 0.0182, + "step": 2792 + }, + { + "epoch": 7.39, + "learning_rate": 1.3230563002680965e-05, + "loss": 0.0038, + "step": 2793 + }, + { + "epoch": 7.39, + "learning_rate": 1.32171581769437e-05, + "loss": 0.0003, + "step": 2794 + }, + { + "epoch": 7.39, + "learning_rate": 1.3203753351206435e-05, + "loss": 0.0003, + "step": 2795 + }, + { + "epoch": 7.4, + "learning_rate": 1.319034852546917e-05, + "loss": 0.0008, + "step": 2796 + }, + { + "epoch": 7.4, + "learning_rate": 1.3176943699731904e-05, + "loss": 0.0003, + "step": 2797 + }, + { + "epoch": 7.4, + "learning_rate": 1.3163538873994639e-05, + "loss": 0.0005, + "step": 2798 + }, + { + "epoch": 7.4, + "learning_rate": 1.3150134048257374e-05, + "loss": 0.2165, + "step": 2799 + }, + { + "epoch": 7.41, + "learning_rate": 1.3136729222520109e-05, + "loss": 0.023, + "step": 2800 + }, + { + "epoch": 7.41, + "learning_rate": 1.3123324396782844e-05, + "loss": 0.0047, + "step": 2801 + }, + { + "epoch": 7.41, + "learning_rate": 1.3109919571045576e-05, + "loss": 0.1507, + "step": 2802 + }, + { + "epoch": 7.42, + "learning_rate": 1.3096514745308311e-05, + "loss": 0.2509, + "step": 2803 + }, + { + "epoch": 7.42, + "learning_rate": 1.3083109919571046e-05, + "loss": 0.0085, + "step": 2804 + }, + { + "epoch": 7.42, + "learning_rate": 1.3069705093833781e-05, + "loss": 0.2183, + "step": 2805 + }, + { + "epoch": 7.42, + "learning_rate": 1.3056300268096516e-05, + "loss": 0.0007, + "step": 2806 + }, + { + "epoch": 7.43, + "learning_rate": 1.3042895442359248e-05, + "loss": 0.0005, + "step": 2807 + }, + { + "epoch": 7.43, + "learning_rate": 1.3029490616621983e-05, + "loss": 0.1291, + "step": 2808 + }, + { + "epoch": 7.43, + "learning_rate": 1.3016085790884718e-05, + "loss": 0.1037, + "step": 2809 + }, + { + "epoch": 7.43, + "learning_rate": 1.3002680965147454e-05, + "loss": 0.0147, + "step": 2810 + }, + { + "epoch": 7.44, + "learning_rate": 1.2989276139410189e-05, + "loss": 0.0006, + "step": 2811 + }, + { + "epoch": 7.44, + "learning_rate": 1.2975871313672922e-05, + "loss": 0.0148, + "step": 2812 + }, + { + "epoch": 7.44, + "learning_rate": 1.2962466487935657e-05, + "loss": 0.0129, + "step": 2813 + }, + { + "epoch": 7.44, + "learning_rate": 1.2949061662198392e-05, + "loss": 0.0276, + "step": 2814 + }, + { + "epoch": 7.45, + "learning_rate": 1.2935656836461127e-05, + "loss": 0.0007, + "step": 2815 + }, + { + "epoch": 7.45, + "learning_rate": 1.2922252010723863e-05, + "loss": 0.0006, + "step": 2816 + }, + { + "epoch": 7.45, + "learning_rate": 1.2908847184986598e-05, + "loss": 0.0002, + "step": 2817 + }, + { + "epoch": 7.46, + "learning_rate": 1.289544235924933e-05, + "loss": 0.1274, + "step": 2818 + }, + { + "epoch": 7.46, + "learning_rate": 1.2882037533512065e-05, + "loss": 0.0009, + "step": 2819 + }, + { + "epoch": 7.46, + "learning_rate": 1.28686327077748e-05, + "loss": 0.0007, + "step": 2820 + }, + { + "epoch": 7.46, + "learning_rate": 1.2855227882037535e-05, + "loss": 0.002, + "step": 2821 + }, + { + "epoch": 7.47, + "learning_rate": 1.284182305630027e-05, + "loss": 0.0004, + "step": 2822 + }, + { + "epoch": 7.47, + "learning_rate": 1.2828418230563002e-05, + "loss": 0.0017, + "step": 2823 + }, + { + "epoch": 7.47, + "learning_rate": 1.2815013404825737e-05, + "loss": 0.001, + "step": 2824 + }, + { + "epoch": 7.47, + "learning_rate": 1.2801608579088472e-05, + "loss": 0.0106, + "step": 2825 + }, + { + "epoch": 7.48, + "learning_rate": 1.2788203753351207e-05, + "loss": 0.1158, + "step": 2826 + }, + { + "epoch": 7.48, + "learning_rate": 1.2774798927613942e-05, + "loss": 0.0004, + "step": 2827 + }, + { + "epoch": 7.48, + "learning_rate": 1.2761394101876676e-05, + "loss": 0.3214, + "step": 2828 + }, + { + "epoch": 7.48, + "learning_rate": 1.274798927613941e-05, + "loss": 0.0003, + "step": 2829 + }, + { + "epoch": 7.49, + "learning_rate": 1.2734584450402146e-05, + "loss": 0.0417, + "step": 2830 + }, + { + "epoch": 7.49, + "learning_rate": 1.2721179624664881e-05, + "loss": 0.0002, + "step": 2831 + }, + { + "epoch": 7.49, + "learning_rate": 1.2707774798927616e-05, + "loss": 0.0004, + "step": 2832 + }, + { + "epoch": 7.49, + "learning_rate": 1.2694369973190348e-05, + "loss": 0.1166, + "step": 2833 + }, + { + "epoch": 7.5, + "learning_rate": 1.2680965147453083e-05, + "loss": 0.0008, + "step": 2834 + }, + { + "epoch": 7.5, + "learning_rate": 1.2667560321715818e-05, + "loss": 0.0005, + "step": 2835 + }, + { + "epoch": 7.5, + "learning_rate": 1.2654155495978553e-05, + "loss": 0.0191, + "step": 2836 + }, + { + "epoch": 7.51, + "learning_rate": 1.2640750670241289e-05, + "loss": 0.0642, + "step": 2837 + }, + { + "epoch": 7.51, + "learning_rate": 1.262734584450402e-05, + "loss": 0.0256, + "step": 2838 + }, + { + "epoch": 7.51, + "learning_rate": 1.2613941018766755e-05, + "loss": 0.0007, + "step": 2839 + }, + { + "epoch": 7.51, + "learning_rate": 1.260053619302949e-05, + "loss": 0.049, + "step": 2840 + }, + { + "epoch": 7.52, + "learning_rate": 1.2587131367292226e-05, + "loss": 0.0012, + "step": 2841 + }, + { + "epoch": 7.52, + "learning_rate": 1.257372654155496e-05, + "loss": 0.0006, + "step": 2842 + }, + { + "epoch": 7.52, + "learning_rate": 1.2560321715817694e-05, + "loss": 0.2299, + "step": 2843 + }, + { + "epoch": 7.52, + "learning_rate": 1.254691689008043e-05, + "loss": 0.0006, + "step": 2844 + }, + { + "epoch": 7.53, + "learning_rate": 1.2533512064343164e-05, + "loss": 0.0346, + "step": 2845 + }, + { + "epoch": 7.53, + "learning_rate": 1.25201072386059e-05, + "loss": 0.0021, + "step": 2846 + }, + { + "epoch": 7.53, + "learning_rate": 1.2506702412868635e-05, + "loss": 0.0003, + "step": 2847 + }, + { + "epoch": 7.53, + "learning_rate": 1.2493297587131368e-05, + "loss": 0.135, + "step": 2848 + }, + { + "epoch": 7.54, + "learning_rate": 1.2479892761394102e-05, + "loss": 0.0003, + "step": 2849 + }, + { + "epoch": 7.54, + "learning_rate": 1.2466487935656837e-05, + "loss": 0.0005, + "step": 2850 + }, + { + "epoch": 7.54, + "learning_rate": 1.2453083109919572e-05, + "loss": 0.0005, + "step": 2851 + }, + { + "epoch": 7.54, + "learning_rate": 1.2439678284182305e-05, + "loss": 0.0007, + "step": 2852 + }, + { + "epoch": 7.55, + "learning_rate": 1.242627345844504e-05, + "loss": 0.0004, + "step": 2853 + }, + { + "epoch": 7.55, + "learning_rate": 1.2412868632707776e-05, + "loss": 0.0003, + "step": 2854 + }, + { + "epoch": 7.55, + "learning_rate": 1.239946380697051e-05, + "loss": 0.0003, + "step": 2855 + }, + { + "epoch": 7.56, + "learning_rate": 1.2386058981233246e-05, + "loss": 0.0006, + "step": 2856 + }, + { + "epoch": 7.56, + "learning_rate": 1.237265415549598e-05, + "loss": 0.0171, + "step": 2857 + }, + { + "epoch": 7.56, + "learning_rate": 1.2359249329758714e-05, + "loss": 0.1066, + "step": 2858 + }, + { + "epoch": 7.56, + "learning_rate": 1.2345844504021448e-05, + "loss": 0.0003, + "step": 2859 + }, + { + "epoch": 7.57, + "learning_rate": 1.2332439678284183e-05, + "loss": 0.1106, + "step": 2860 + }, + { + "epoch": 7.57, + "learning_rate": 1.2319034852546918e-05, + "loss": 0.0004, + "step": 2861 + }, + { + "epoch": 7.57, + "learning_rate": 1.2305630026809652e-05, + "loss": 0.0012, + "step": 2862 + }, + { + "epoch": 7.57, + "learning_rate": 1.2292225201072387e-05, + "loss": 0.0004, + "step": 2863 + }, + { + "epoch": 7.58, + "learning_rate": 1.2278820375335122e-05, + "loss": 0.0007, + "step": 2864 + }, + { + "epoch": 7.58, + "learning_rate": 1.2265415549597855e-05, + "loss": 0.0104, + "step": 2865 + }, + { + "epoch": 7.58, + "learning_rate": 1.225201072386059e-05, + "loss": 0.0003, + "step": 2866 + }, + { + "epoch": 7.58, + "learning_rate": 1.2238605898123324e-05, + "loss": 0.3976, + "step": 2867 + }, + { + "epoch": 7.59, + "learning_rate": 1.2225201072386059e-05, + "loss": 0.0003, + "step": 2868 + }, + { + "epoch": 7.59, + "learning_rate": 1.2211796246648794e-05, + "loss": 0.4433, + "step": 2869 + }, + { + "epoch": 7.59, + "learning_rate": 1.219839142091153e-05, + "loss": 0.0005, + "step": 2870 + }, + { + "epoch": 7.6, + "learning_rate": 1.2184986595174264e-05, + "loss": 0.0733, + "step": 2871 + }, + { + "epoch": 7.6, + "learning_rate": 1.2171581769436998e-05, + "loss": 0.0008, + "step": 2872 + }, + { + "epoch": 7.6, + "learning_rate": 1.2158176943699733e-05, + "loss": 0.0003, + "step": 2873 + }, + { + "epoch": 7.6, + "learning_rate": 1.2144772117962468e-05, + "loss": 0.0253, + "step": 2874 + }, + { + "epoch": 7.61, + "learning_rate": 1.2131367292225201e-05, + "loss": 0.09, + "step": 2875 + }, + { + "epoch": 7.61, + "learning_rate": 1.2117962466487937e-05, + "loss": 0.1283, + "step": 2876 + }, + { + "epoch": 7.61, + "learning_rate": 1.210455764075067e-05, + "loss": 0.0866, + "step": 2877 + }, + { + "epoch": 7.61, + "learning_rate": 1.2091152815013405e-05, + "loss": 0.0005, + "step": 2878 + }, + { + "epoch": 7.62, + "learning_rate": 1.207774798927614e-05, + "loss": 0.051, + "step": 2879 + }, + { + "epoch": 7.62, + "learning_rate": 1.2064343163538874e-05, + "loss": 0.0055, + "step": 2880 + }, + { + "epoch": 7.62, + "learning_rate": 1.2050938337801609e-05, + "loss": 0.001, + "step": 2881 + }, + { + "epoch": 7.62, + "learning_rate": 1.2037533512064344e-05, + "loss": 0.0765, + "step": 2882 + }, + { + "epoch": 7.63, + "learning_rate": 1.2024128686327079e-05, + "loss": 0.0239, + "step": 2883 + }, + { + "epoch": 7.63, + "learning_rate": 1.2010723860589814e-05, + "loss": 0.0616, + "step": 2884 + }, + { + "epoch": 7.63, + "learning_rate": 1.1997319034852548e-05, + "loss": 0.0342, + "step": 2885 + }, + { + "epoch": 7.63, + "learning_rate": 1.1983914209115283e-05, + "loss": 0.0006, + "step": 2886 + }, + { + "epoch": 7.64, + "learning_rate": 1.1970509383378016e-05, + "loss": 0.091, + "step": 2887 + }, + { + "epoch": 7.64, + "learning_rate": 1.1957104557640751e-05, + "loss": 0.0004, + "step": 2888 + }, + { + "epoch": 7.64, + "learning_rate": 1.1943699731903486e-05, + "loss": 0.0257, + "step": 2889 + }, + { + "epoch": 7.65, + "learning_rate": 1.193029490616622e-05, + "loss": 0.0422, + "step": 2890 + }, + { + "epoch": 7.65, + "learning_rate": 1.1916890080428955e-05, + "loss": 0.1861, + "step": 2891 + }, + { + "epoch": 7.65, + "learning_rate": 1.1903485254691689e-05, + "loss": 0.0003, + "step": 2892 + }, + { + "epoch": 7.65, + "learning_rate": 1.1890080428954424e-05, + "loss": 0.0678, + "step": 2893 + }, + { + "epoch": 7.66, + "learning_rate": 1.1876675603217159e-05, + "loss": 0.0005, + "step": 2894 + }, + { + "epoch": 7.66, + "learning_rate": 1.1863270777479892e-05, + "loss": 0.0234, + "step": 2895 + }, + { + "epoch": 7.66, + "learning_rate": 1.1849865951742627e-05, + "loss": 0.0007, + "step": 2896 + }, + { + "epoch": 7.66, + "learning_rate": 1.1836461126005362e-05, + "loss": 0.0963, + "step": 2897 + }, + { + "epoch": 7.67, + "learning_rate": 1.1823056300268098e-05, + "loss": 0.0132, + "step": 2898 + }, + { + "epoch": 7.67, + "learning_rate": 1.1809651474530833e-05, + "loss": 0.0019, + "step": 2899 + }, + { + "epoch": 7.67, + "learning_rate": 1.1796246648793566e-05, + "loss": 0.0219, + "step": 2900 + }, + { + "epoch": 7.67, + "learning_rate": 1.1782841823056301e-05, + "loss": 0.0062, + "step": 2901 + }, + { + "epoch": 7.68, + "learning_rate": 1.1769436997319036e-05, + "loss": 0.0003, + "step": 2902 + }, + { + "epoch": 7.68, + "learning_rate": 1.175603217158177e-05, + "loss": 0.0009, + "step": 2903 + }, + { + "epoch": 7.68, + "learning_rate": 1.1742627345844505e-05, + "loss": 0.1446, + "step": 2904 + }, + { + "epoch": 7.69, + "learning_rate": 1.1729222520107238e-05, + "loss": 0.0103, + "step": 2905 + }, + { + "epoch": 7.69, + "learning_rate": 1.1715817694369974e-05, + "loss": 0.0004, + "step": 2906 + }, + { + "epoch": 7.69, + "learning_rate": 1.1702412868632709e-05, + "loss": 0.2502, + "step": 2907 + }, + { + "epoch": 7.69, + "learning_rate": 1.1689008042895442e-05, + "loss": 0.0005, + "step": 2908 + }, + { + "epoch": 7.7, + "learning_rate": 1.1675603217158177e-05, + "loss": 0.0001, + "step": 2909 + }, + { + "epoch": 7.7, + "learning_rate": 1.166219839142091e-05, + "loss": 0.0928, + "step": 2910 + }, + { + "epoch": 7.7, + "learning_rate": 1.1648793565683646e-05, + "loss": 0.0195, + "step": 2911 + }, + { + "epoch": 7.7, + "learning_rate": 1.1635388739946381e-05, + "loss": 0.0727, + "step": 2912 + }, + { + "epoch": 7.71, + "learning_rate": 1.1621983914209116e-05, + "loss": 0.0778, + "step": 2913 + }, + { + "epoch": 7.71, + "learning_rate": 1.1608579088471851e-05, + "loss": 0.1304, + "step": 2914 + }, + { + "epoch": 7.71, + "learning_rate": 1.1595174262734585e-05, + "loss": 0.0002, + "step": 2915 + }, + { + "epoch": 7.71, + "learning_rate": 1.158176943699732e-05, + "loss": 0.0003, + "step": 2916 + }, + { + "epoch": 7.72, + "learning_rate": 1.1568364611260055e-05, + "loss": 0.0137, + "step": 2917 + }, + { + "epoch": 7.72, + "learning_rate": 1.1554959785522788e-05, + "loss": 0.0003, + "step": 2918 + }, + { + "epoch": 7.72, + "learning_rate": 1.1541554959785523e-05, + "loss": 0.0018, + "step": 2919 + }, + { + "epoch": 7.72, + "learning_rate": 1.1528150134048257e-05, + "loss": 0.0057, + "step": 2920 + }, + { + "epoch": 7.73, + "learning_rate": 1.1514745308310992e-05, + "loss": 0.0003, + "step": 2921 + }, + { + "epoch": 7.73, + "learning_rate": 1.1501340482573727e-05, + "loss": 0.0015, + "step": 2922 + }, + { + "epoch": 7.73, + "learning_rate": 1.148793565683646e-05, + "loss": 0.0004, + "step": 2923 + }, + { + "epoch": 7.74, + "learning_rate": 1.1474530831099196e-05, + "loss": 0.0005, + "step": 2924 + }, + { + "epoch": 7.74, + "learning_rate": 1.1461126005361931e-05, + "loss": 0.0345, + "step": 2925 + }, + { + "epoch": 7.74, + "learning_rate": 1.1447721179624666e-05, + "loss": 0.0878, + "step": 2926 + }, + { + "epoch": 7.74, + "learning_rate": 1.1434316353887401e-05, + "loss": 0.0003, + "step": 2927 + }, + { + "epoch": 7.75, + "learning_rate": 1.1420911528150135e-05, + "loss": 0.0732, + "step": 2928 + }, + { + "epoch": 7.75, + "learning_rate": 1.140750670241287e-05, + "loss": 0.0005, + "step": 2929 + }, + { + "epoch": 7.75, + "learning_rate": 1.1394101876675605e-05, + "loss": 0.001, + "step": 2930 + }, + { + "epoch": 7.75, + "learning_rate": 1.1380697050938338e-05, + "loss": 0.0038, + "step": 2931 + }, + { + "epoch": 7.76, + "learning_rate": 1.1367292225201073e-05, + "loss": 0.0056, + "step": 2932 + }, + { + "epoch": 7.76, + "learning_rate": 1.1353887399463807e-05, + "loss": 0.1057, + "step": 2933 + }, + { + "epoch": 7.76, + "learning_rate": 1.1340482573726542e-05, + "loss": 0.0005, + "step": 2934 + }, + { + "epoch": 7.76, + "learning_rate": 1.1327077747989277e-05, + "loss": 0.0419, + "step": 2935 + }, + { + "epoch": 7.77, + "learning_rate": 1.131367292225201e-05, + "loss": 0.0304, + "step": 2936 + }, + { + "epoch": 7.77, + "learning_rate": 1.1300268096514746e-05, + "loss": 0.0002, + "step": 2937 + }, + { + "epoch": 7.77, + "learning_rate": 1.1286863270777479e-05, + "loss": 0.0332, + "step": 2938 + }, + { + "epoch": 7.78, + "learning_rate": 1.1273458445040214e-05, + "loss": 0.1015, + "step": 2939 + }, + { + "epoch": 7.78, + "learning_rate": 1.126005361930295e-05, + "loss": 0.0008, + "step": 2940 + }, + { + "epoch": 7.78, + "learning_rate": 1.1246648793565684e-05, + "loss": 0.0273, + "step": 2941 + }, + { + "epoch": 7.78, + "learning_rate": 1.123324396782842e-05, + "loss": 0.0022, + "step": 2942 + }, + { + "epoch": 7.79, + "learning_rate": 1.1219839142091153e-05, + "loss": 0.0009, + "step": 2943 + }, + { + "epoch": 7.79, + "learning_rate": 1.1206434316353888e-05, + "loss": 0.0003, + "step": 2944 + }, + { + "epoch": 7.79, + "learning_rate": 1.1193029490616623e-05, + "loss": 0.0057, + "step": 2945 + }, + { + "epoch": 7.79, + "learning_rate": 1.1179624664879357e-05, + "loss": 0.0014, + "step": 2946 + }, + { + "epoch": 7.8, + "learning_rate": 1.1166219839142092e-05, + "loss": 0.0009, + "step": 2947 + }, + { + "epoch": 7.8, + "learning_rate": 1.1152815013404825e-05, + "loss": 0.0019, + "step": 2948 + }, + { + "epoch": 7.8, + "learning_rate": 1.113941018766756e-05, + "loss": 0.0005, + "step": 2949 + }, + { + "epoch": 7.8, + "learning_rate": 1.1126005361930296e-05, + "loss": 0.0181, + "step": 2950 + }, + { + "epoch": 7.81, + "learning_rate": 1.1112600536193029e-05, + "loss": 0.001, + "step": 2951 + }, + { + "epoch": 7.81, + "learning_rate": 1.1099195710455764e-05, + "loss": 0.0331, + "step": 2952 + }, + { + "epoch": 7.81, + "learning_rate": 1.10857908847185e-05, + "loss": 0.0003, + "step": 2953 + }, + { + "epoch": 7.81, + "learning_rate": 1.1072386058981234e-05, + "loss": 0.0002, + "step": 2954 + }, + { + "epoch": 7.82, + "learning_rate": 1.105898123324397e-05, + "loss": 0.0305, + "step": 2955 + }, + { + "epoch": 7.82, + "learning_rate": 1.1045576407506703e-05, + "loss": 0.0023, + "step": 2956 + }, + { + "epoch": 7.82, + "learning_rate": 1.1032171581769438e-05, + "loss": 0.0359, + "step": 2957 + }, + { + "epoch": 7.83, + "learning_rate": 1.1018766756032173e-05, + "loss": 0.1075, + "step": 2958 + }, + { + "epoch": 7.83, + "learning_rate": 1.1005361930294907e-05, + "loss": 0.023, + "step": 2959 + }, + { + "epoch": 7.83, + "learning_rate": 1.0991957104557642e-05, + "loss": 0.1425, + "step": 2960 + }, + { + "epoch": 7.83, + "learning_rate": 1.0978552278820375e-05, + "loss": 0.4114, + "step": 2961 + }, + { + "epoch": 7.84, + "learning_rate": 1.096514745308311e-05, + "loss": 0.0003, + "step": 2962 + }, + { + "epoch": 7.84, + "learning_rate": 1.0951742627345846e-05, + "loss": 0.2824, + "step": 2963 + }, + { + "epoch": 7.84, + "learning_rate": 1.0938337801608579e-05, + "loss": 0.0002, + "step": 2964 + }, + { + "epoch": 7.84, + "learning_rate": 1.0924932975871314e-05, + "loss": 0.0004, + "step": 2965 + }, + { + "epoch": 7.85, + "learning_rate": 1.0911528150134048e-05, + "loss": 0.0002, + "step": 2966 + }, + { + "epoch": 7.85, + "learning_rate": 1.0898123324396783e-05, + "loss": 0.0003, + "step": 2967 + }, + { + "epoch": 7.85, + "learning_rate": 1.0884718498659518e-05, + "loss": 0.0003, + "step": 2968 + }, + { + "epoch": 7.85, + "learning_rate": 1.0871313672922253e-05, + "loss": 0.2122, + "step": 2969 + }, + { + "epoch": 7.86, + "learning_rate": 1.0857908847184988e-05, + "loss": 0.0002, + "step": 2970 + }, + { + "epoch": 7.86, + "learning_rate": 1.0844504021447721e-05, + "loss": 0.0003, + "step": 2971 + }, + { + "epoch": 7.86, + "learning_rate": 1.0831099195710457e-05, + "loss": 0.0002, + "step": 2972 + }, + { + "epoch": 7.87, + "learning_rate": 1.0817694369973192e-05, + "loss": 0.0002, + "step": 2973 + }, + { + "epoch": 7.87, + "learning_rate": 1.0804289544235925e-05, + "loss": 0.001, + "step": 2974 + }, + { + "epoch": 7.87, + "learning_rate": 1.079088471849866e-05, + "loss": 0.0002, + "step": 2975 + }, + { + "epoch": 7.87, + "learning_rate": 1.0777479892761394e-05, + "loss": 0.0004, + "step": 2976 + }, + { + "epoch": 7.88, + "learning_rate": 1.0764075067024129e-05, + "loss": 0.0003, + "step": 2977 + }, + { + "epoch": 7.88, + "learning_rate": 1.0750670241286864e-05, + "loss": 0.0003, + "step": 2978 + }, + { + "epoch": 7.88, + "learning_rate": 1.0737265415549597e-05, + "loss": 0.336, + "step": 2979 + }, + { + "epoch": 7.88, + "learning_rate": 1.0723860589812333e-05, + "loss": 0.0003, + "step": 2980 + }, + { + "epoch": 7.89, + "learning_rate": 1.0710455764075068e-05, + "loss": 0.0017, + "step": 2981 + }, + { + "epoch": 7.89, + "learning_rate": 1.0697050938337803e-05, + "loss": 0.1716, + "step": 2982 + }, + { + "epoch": 7.89, + "learning_rate": 1.0683646112600538e-05, + "loss": 0.0004, + "step": 2983 + }, + { + "epoch": 7.89, + "learning_rate": 1.0670241286863271e-05, + "loss": 0.0003, + "step": 2984 + }, + { + "epoch": 7.9, + "learning_rate": 1.0656836461126007e-05, + "loss": 0.1927, + "step": 2985 + }, + { + "epoch": 7.9, + "learning_rate": 1.064343163538874e-05, + "loss": 0.0003, + "step": 2986 + }, + { + "epoch": 7.9, + "learning_rate": 1.0630026809651475e-05, + "loss": 0.0002, + "step": 2987 + }, + { + "epoch": 7.9, + "learning_rate": 1.061662198391421e-05, + "loss": 0.2357, + "step": 2988 + }, + { + "epoch": 7.91, + "learning_rate": 1.0603217158176944e-05, + "loss": 0.464, + "step": 2989 + }, + { + "epoch": 7.91, + "learning_rate": 1.0589812332439679e-05, + "loss": 0.0015, + "step": 2990 + }, + { + "epoch": 7.91, + "learning_rate": 1.0576407506702414e-05, + "loss": 0.0792, + "step": 2991 + }, + { + "epoch": 7.92, + "learning_rate": 1.0563002680965147e-05, + "loss": 0.101, + "step": 2992 + }, + { + "epoch": 7.92, + "learning_rate": 1.0549597855227882e-05, + "loss": 0.0093, + "step": 2993 + }, + { + "epoch": 7.92, + "learning_rate": 1.0536193029490616e-05, + "loss": 0.0007, + "step": 2994 + }, + { + "epoch": 7.92, + "learning_rate": 1.0522788203753351e-05, + "loss": 0.0016, + "step": 2995 + }, + { + "epoch": 7.93, + "learning_rate": 1.0509383378016086e-05, + "loss": 0.0008, + "step": 2996 + }, + { + "epoch": 7.93, + "learning_rate": 1.0495978552278821e-05, + "loss": 0.0047, + "step": 2997 + }, + { + "epoch": 7.93, + "learning_rate": 1.0482573726541556e-05, + "loss": 0.0171, + "step": 2998 + }, + { + "epoch": 7.93, + "learning_rate": 1.046916890080429e-05, + "loss": 0.3023, + "step": 2999 + }, + { + "epoch": 7.94, + "learning_rate": 1.0455764075067025e-05, + "loss": 0.0011, + "step": 3000 + }, + { + "epoch": 7.94, + "learning_rate": 1.044235924932976e-05, + "loss": 0.0816, + "step": 3001 + }, + { + "epoch": 7.94, + "learning_rate": 1.0428954423592494e-05, + "loss": 0.0025, + "step": 3002 + }, + { + "epoch": 7.94, + "learning_rate": 1.0415549597855229e-05, + "loss": 0.0094, + "step": 3003 + }, + { + "epoch": 7.95, + "learning_rate": 1.0402144772117962e-05, + "loss": 0.0644, + "step": 3004 + }, + { + "epoch": 7.95, + "learning_rate": 1.0388739946380697e-05, + "loss": 0.3261, + "step": 3005 + }, + { + "epoch": 7.95, + "learning_rate": 1.0375335120643432e-05, + "loss": 0.1332, + "step": 3006 + }, + { + "epoch": 7.96, + "learning_rate": 1.0361930294906166e-05, + "loss": 0.0067, + "step": 3007 + }, + { + "epoch": 7.96, + "learning_rate": 1.0348525469168901e-05, + "loss": 0.0008, + "step": 3008 + }, + { + "epoch": 7.96, + "learning_rate": 1.0335120643431636e-05, + "loss": 0.174, + "step": 3009 + }, + { + "epoch": 7.96, + "learning_rate": 1.0321715817694371e-05, + "loss": 0.0005, + "step": 3010 + }, + { + "epoch": 7.97, + "learning_rate": 1.0308310991957106e-05, + "loss": 0.0505, + "step": 3011 + }, + { + "epoch": 7.97, + "learning_rate": 1.029490616621984e-05, + "loss": 0.0016, + "step": 3012 + }, + { + "epoch": 7.97, + "learning_rate": 1.0281501340482575e-05, + "loss": 0.1172, + "step": 3013 + }, + { + "epoch": 7.97, + "learning_rate": 1.0268096514745308e-05, + "loss": 0.0268, + "step": 3014 + }, + { + "epoch": 7.98, + "learning_rate": 1.0254691689008044e-05, + "loss": 0.0269, + "step": 3015 + }, + { + "epoch": 7.98, + "learning_rate": 1.0241286863270779e-05, + "loss": 0.0867, + "step": 3016 + }, + { + "epoch": 7.98, + "learning_rate": 1.0227882037533512e-05, + "loss": 0.1145, + "step": 3017 + }, + { + "epoch": 7.98, + "learning_rate": 1.0214477211796247e-05, + "loss": 0.0035, + "step": 3018 + }, + { + "epoch": 7.99, + "learning_rate": 1.0201072386058982e-05, + "loss": 0.0035, + "step": 3019 + }, + { + "epoch": 7.99, + "learning_rate": 1.0187667560321716e-05, + "loss": 0.0003, + "step": 3020 + }, + { + "epoch": 7.99, + "learning_rate": 1.0174262734584451e-05, + "loss": 0.14, + "step": 3021 + }, + { + "epoch": 7.99, + "learning_rate": 1.0160857908847184e-05, + "loss": 0.1619, + "step": 3022 + }, + { + "epoch": 8.0, + "learning_rate": 1.014745308310992e-05, + "loss": 0.0006, + "step": 3023 + }, + { + "epoch": 8.0, + "learning_rate": 1.0134048257372655e-05, + "loss": 0.0004, + "step": 3024 + }, + { + "epoch": 8.0, + "eval_f1": 0.7734138972809668, + "eval_loss": 1.2510614395141602, + "eval_runtime": 1.9043, + "eval_samples_per_second": 794.53, + "eval_steps_per_second": 49.888, + "step": 3024 + } + ], + "max_steps": 3780, + "num_train_epochs": 10, + "total_flos": 774747999858240.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3024/training_args.bin b/checkpoint-3024/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e04ed002938f760694506615e2c2b7be439a9c1 --- /dev/null +++ b/checkpoint-3024/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c01d8e107b4a20c0ba0f3692dae4e25d8f1dffe1d23d6e4f4bdf92b87ab5ea +size 3899 diff --git a/checkpoint-3402/config.json b/checkpoint-3402/config.json new file mode 100644 index 0000000000000000000000000000000000000000..364156e83c34ba8c6fcc66e875a05b1d1a9b4821 --- /dev/null +++ b/checkpoint-3402/config.json @@ -0,0 +1,30 @@ +{ + "_name_or_path": "distilbert-base-cased", + "activation": "gelu", + "architectures": [ + "DistilBertForSequenceClassification" + ], + "attention_dropout": 0.1, + "dim": 768, + "dropout": 0.1, + "hidden_dim": 3072, + "id2label": { + "0": "NO DISASTER", + "1": "DISASTER" + }, + "initializer_range": 0.02, + "max_position_embeddings": 512, + "model_type": "distilbert", + "n_heads": 12, + "n_layers": 6, + "output_past": true, + "pad_token_id": 0, + "problem_type": "single_label_classification", + "qa_dropout": 0.1, + "seq_classif_dropout": 0.2, + "sinusoidal_pos_embds": false, + "tie_weights_": true, + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "vocab_size": 28996 +} diff --git a/checkpoint-3402/optimizer.pt b/checkpoint-3402/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..45d88c18290040c6466f6f5928affa9e3390f390 --- /dev/null +++ b/checkpoint-3402/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e85edc83ec5ffe29136a805b6f367bf7eb43dff879939b6df7aa705d88753f94 +size 526325317 diff --git a/checkpoint-3402/pytorch_model.bin b/checkpoint-3402/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..2c73055f185fdf213b33895e183e68495b2a7610 --- /dev/null +++ b/checkpoint-3402/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:212663d3b081d1c5839dc0e4fb48b43cc45e577bc88f440512f19584946b9db6 +size 263167661 diff --git a/checkpoint-3402/rng_state.pth b/checkpoint-3402/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ca3f4905e5e5bf06cff704a3135514dea8f940af --- /dev/null +++ b/checkpoint-3402/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33e0373cf75568e9b513cbf91702bd9f246da33da5b59e14eb78b0282c54ab90 +size 14575 diff --git a/checkpoint-3402/scheduler.pt b/checkpoint-3402/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..aee6c41e1a40f49c02e1a410e6d895d562f3adfc --- /dev/null +++ b/checkpoint-3402/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:329ec617799a517751e41c3900a932ef850318bdb418eea4fee34fa1016db88a +size 627 diff --git a/checkpoint-3402/trainer_state.json b/checkpoint-3402/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..55bd85773ecfd251340700cecd6c5b8625c0930c --- /dev/null +++ b/checkpoint-3402/trainer_state.json @@ -0,0 +1,20509 @@ +{ + "best_metric": 0.40209120512008667, + "best_model_checkpoint": "./disaster-tweet-distilbert-classification/checkpoint-378", + "epoch": 9.0, + "global_step": 3402, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.7503, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.7789, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.7344, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 0.7709, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 5e-06, + "loss": 0.6884, + "step": 5 + }, + { + "epoch": 0.02, + "learning_rate": 6e-06, + "loss": 0.7087, + "step": 6 + }, + { + "epoch": 0.02, + "learning_rate": 7.000000000000001e-06, + "loss": 0.6655, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6978, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 9e-06, + "loss": 0.7435, + "step": 9 + }, + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 0.719, + "step": 10 + }, + { + "epoch": 0.03, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.7129, + "step": 11 + }, + { + "epoch": 0.03, + "learning_rate": 1.2e-05, + "loss": 0.7249, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.7436, + "step": 13 + }, + { + "epoch": 0.04, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.6886, + "step": 14 + }, + { + "epoch": 0.04, + "learning_rate": 1.5e-05, + "loss": 0.702, + "step": 15 + }, + { + "epoch": 0.04, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7105, + "step": 16 + }, + { + "epoch": 0.04, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.6709, + "step": 17 + }, + { + "epoch": 0.05, + "learning_rate": 1.8e-05, + "loss": 0.6767, + "step": 18 + }, + { + "epoch": 0.05, + "learning_rate": 1.9e-05, + "loss": 0.6784, + "step": 19 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 0.7016, + "step": 20 + }, + { + "epoch": 0.06, + "learning_rate": 2.1e-05, + "loss": 0.6308, + "step": 21 + }, + { + "epoch": 0.06, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.645, + "step": 22 + }, + { + "epoch": 0.06, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.6845, + "step": 23 + }, + { + "epoch": 0.06, + "learning_rate": 2.4e-05, + "loss": 0.6891, + "step": 24 + }, + { + "epoch": 0.07, + "learning_rate": 2.5e-05, + "loss": 0.5914, + "step": 25 + }, + { + "epoch": 0.07, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.6916, + "step": 26 + }, + { + "epoch": 0.07, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.5884, + "step": 27 + }, + { + "epoch": 0.07, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6587, + "step": 28 + }, + { + "epoch": 0.08, + "learning_rate": 2.9e-05, + "loss": 0.6008, + "step": 29 + }, + { + "epoch": 0.08, + "learning_rate": 3e-05, + "loss": 0.6717, + "step": 30 + }, + { + "epoch": 0.08, + "learning_rate": 3.1e-05, + "loss": 0.5795, + "step": 31 + }, + { + "epoch": 0.08, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6358, + "step": 32 + }, + { + "epoch": 0.09, + "learning_rate": 3.3e-05, + "loss": 0.7508, + "step": 33 + }, + { + "epoch": 0.09, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.6601, + "step": 34 + }, + { + "epoch": 0.09, + "learning_rate": 3.5e-05, + "loss": 0.6573, + "step": 35 + }, + { + "epoch": 0.1, + "learning_rate": 3.6e-05, + "loss": 0.5695, + "step": 36 + }, + { + "epoch": 0.1, + "learning_rate": 3.7e-05, + "loss": 0.5535, + "step": 37 + }, + { + "epoch": 0.1, + "learning_rate": 3.8e-05, + "loss": 0.5813, + "step": 38 + }, + { + "epoch": 0.1, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.5224, + "step": 39 + }, + { + "epoch": 0.11, + "learning_rate": 4e-05, + "loss": 0.4757, + "step": 40 + }, + { + "epoch": 0.11, + "learning_rate": 4.1e-05, + "loss": 0.5529, + "step": 41 + }, + { + "epoch": 0.11, + "learning_rate": 4.2e-05, + "loss": 0.4964, + "step": 42 + }, + { + "epoch": 0.11, + "learning_rate": 4.3e-05, + "loss": 0.4565, + "step": 43 + }, + { + "epoch": 0.12, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.5848, + "step": 44 + }, + { + "epoch": 0.12, + "learning_rate": 4.5e-05, + "loss": 0.7333, + "step": 45 + }, + { + "epoch": 0.12, + "learning_rate": 4.600000000000001e-05, + "loss": 0.5224, + "step": 46 + }, + { + "epoch": 0.12, + "learning_rate": 4.7e-05, + "loss": 0.4826, + "step": 47 + }, + { + "epoch": 0.13, + "learning_rate": 4.8e-05, + "loss": 0.4328, + "step": 48 + }, + { + "epoch": 0.13, + "learning_rate": 4.9e-05, + "loss": 0.2546, + "step": 49 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.2636, + "step": 50 + }, + { + "epoch": 0.13, + "learning_rate": 4.998659517426274e-05, + "loss": 0.4116, + "step": 51 + }, + { + "epoch": 0.14, + "learning_rate": 4.997319034852547e-05, + "loss": 0.3191, + "step": 52 + }, + { + "epoch": 0.14, + "learning_rate": 4.995978552278821e-05, + "loss": 0.2822, + "step": 53 + }, + { + "epoch": 0.14, + "learning_rate": 4.994638069705094e-05, + "loss": 0.8821, + "step": 54 + }, + { + "epoch": 0.15, + "learning_rate": 4.993297587131368e-05, + "loss": 0.483, + "step": 55 + }, + { + "epoch": 0.15, + "learning_rate": 4.9919571045576406e-05, + "loss": 0.7164, + "step": 56 + }, + { + "epoch": 0.15, + "learning_rate": 4.990616621983915e-05, + "loss": 0.4161, + "step": 57 + }, + { + "epoch": 0.15, + "learning_rate": 4.989276139410188e-05, + "loss": 0.2668, + "step": 58 + }, + { + "epoch": 0.16, + "learning_rate": 4.987935656836462e-05, + "loss": 0.5255, + "step": 59 + }, + { + "epoch": 0.16, + "learning_rate": 4.986595174262735e-05, + "loss": 0.3784, + "step": 60 + }, + { + "epoch": 0.16, + "learning_rate": 4.985254691689008e-05, + "loss": 0.5065, + "step": 61 + }, + { + "epoch": 0.16, + "learning_rate": 4.983914209115282e-05, + "loss": 0.1988, + "step": 62 + }, + { + "epoch": 0.17, + "learning_rate": 4.982573726541555e-05, + "loss": 0.4362, + "step": 63 + }, + { + "epoch": 0.17, + "learning_rate": 4.981233243967829e-05, + "loss": 0.6619, + "step": 64 + }, + { + "epoch": 0.17, + "learning_rate": 4.979892761394102e-05, + "loss": 0.3217, + "step": 65 + }, + { + "epoch": 0.17, + "learning_rate": 4.978552278820375e-05, + "loss": 0.2967, + "step": 66 + }, + { + "epoch": 0.18, + "learning_rate": 4.977211796246649e-05, + "loss": 0.2429, + "step": 67 + }, + { + "epoch": 0.18, + "learning_rate": 4.975871313672922e-05, + "loss": 0.6642, + "step": 68 + }, + { + "epoch": 0.18, + "learning_rate": 4.974530831099196e-05, + "loss": 0.56, + "step": 69 + }, + { + "epoch": 0.19, + "learning_rate": 4.973190348525469e-05, + "loss": 1.2979, + "step": 70 + }, + { + "epoch": 0.19, + "learning_rate": 4.9718498659517427e-05, + "loss": 0.5287, + "step": 71 + }, + { + "epoch": 0.19, + "learning_rate": 4.970509383378016e-05, + "loss": 0.4684, + "step": 72 + }, + { + "epoch": 0.19, + "learning_rate": 4.96916890080429e-05, + "loss": 0.283, + "step": 73 + }, + { + "epoch": 0.2, + "learning_rate": 4.967828418230563e-05, + "loss": 0.6818, + "step": 74 + }, + { + "epoch": 0.2, + "learning_rate": 4.966487935656837e-05, + "loss": 0.6141, + "step": 75 + }, + { + "epoch": 0.2, + "learning_rate": 4.96514745308311e-05, + "loss": 0.5046, + "step": 76 + }, + { + "epoch": 0.2, + "learning_rate": 4.963806970509384e-05, + "loss": 0.5266, + "step": 77 + }, + { + "epoch": 0.21, + "learning_rate": 4.962466487935657e-05, + "loss": 0.5944, + "step": 78 + }, + { + "epoch": 0.21, + "learning_rate": 4.961126005361931e-05, + "loss": 0.5631, + "step": 79 + }, + { + "epoch": 0.21, + "learning_rate": 4.959785522788204e-05, + "loss": 0.4791, + "step": 80 + }, + { + "epoch": 0.21, + "learning_rate": 4.958445040214477e-05, + "loss": 0.5645, + "step": 81 + }, + { + "epoch": 0.22, + "learning_rate": 4.957104557640751e-05, + "loss": 0.4349, + "step": 82 + }, + { + "epoch": 0.22, + "learning_rate": 4.955764075067024e-05, + "loss": 0.3865, + "step": 83 + }, + { + "epoch": 0.22, + "learning_rate": 4.954423592493298e-05, + "loss": 0.486, + "step": 84 + }, + { + "epoch": 0.22, + "learning_rate": 4.953083109919571e-05, + "loss": 0.2179, + "step": 85 + }, + { + "epoch": 0.23, + "learning_rate": 4.951742627345845e-05, + "loss": 0.3896, + "step": 86 + }, + { + "epoch": 0.23, + "learning_rate": 4.950402144772118e-05, + "loss": 0.4247, + "step": 87 + }, + { + "epoch": 0.23, + "learning_rate": 4.949061662198392e-05, + "loss": 0.4906, + "step": 88 + }, + { + "epoch": 0.24, + "learning_rate": 4.947721179624665e-05, + "loss": 0.4483, + "step": 89 + }, + { + "epoch": 0.24, + "learning_rate": 4.946380697050939e-05, + "loss": 0.557, + "step": 90 + }, + { + "epoch": 0.24, + "learning_rate": 4.9450402144772116e-05, + "loss": 0.7521, + "step": 91 + }, + { + "epoch": 0.24, + "learning_rate": 4.943699731903486e-05, + "loss": 0.3103, + "step": 92 + }, + { + "epoch": 0.25, + "learning_rate": 4.9423592493297586e-05, + "loss": 0.757, + "step": 93 + }, + { + "epoch": 0.25, + "learning_rate": 4.941018766756033e-05, + "loss": 0.8248, + "step": 94 + }, + { + "epoch": 0.25, + "learning_rate": 4.9396782841823056e-05, + "loss": 0.4591, + "step": 95 + }, + { + "epoch": 0.25, + "learning_rate": 4.938337801608579e-05, + "loss": 0.3912, + "step": 96 + }, + { + "epoch": 0.26, + "learning_rate": 4.9369973190348526e-05, + "loss": 0.5289, + "step": 97 + }, + { + "epoch": 0.26, + "learning_rate": 4.935656836461126e-05, + "loss": 0.3264, + "step": 98 + }, + { + "epoch": 0.26, + "learning_rate": 4.9343163538874e-05, + "loss": 0.2947, + "step": 99 + }, + { + "epoch": 0.26, + "learning_rate": 4.932975871313673e-05, + "loss": 0.2647, + "step": 100 + }, + { + "epoch": 0.27, + "learning_rate": 4.931635388739946e-05, + "loss": 0.3691, + "step": 101 + }, + { + "epoch": 0.27, + "learning_rate": 4.93029490616622e-05, + "loss": 0.4796, + "step": 102 + }, + { + "epoch": 0.27, + "learning_rate": 4.928954423592493e-05, + "loss": 0.4827, + "step": 103 + }, + { + "epoch": 0.28, + "learning_rate": 4.927613941018767e-05, + "loss": 0.2672, + "step": 104 + }, + { + "epoch": 0.28, + "learning_rate": 4.92627345844504e-05, + "loss": 0.7456, + "step": 105 + }, + { + "epoch": 0.28, + "learning_rate": 4.9249329758713136e-05, + "loss": 0.5206, + "step": 106 + }, + { + "epoch": 0.28, + "learning_rate": 4.923592493297587e-05, + "loss": 0.3576, + "step": 107 + }, + { + "epoch": 0.29, + "learning_rate": 4.9222520107238606e-05, + "loss": 0.2596, + "step": 108 + }, + { + "epoch": 0.29, + "learning_rate": 4.920911528150134e-05, + "loss": 0.4115, + "step": 109 + }, + { + "epoch": 0.29, + "learning_rate": 4.9195710455764076e-05, + "loss": 0.3481, + "step": 110 + }, + { + "epoch": 0.29, + "learning_rate": 4.918230563002681e-05, + "loss": 0.4387, + "step": 111 + }, + { + "epoch": 0.3, + "learning_rate": 4.916890080428955e-05, + "loss": 0.5023, + "step": 112 + }, + { + "epoch": 0.3, + "learning_rate": 4.915549597855228e-05, + "loss": 0.5916, + "step": 113 + }, + { + "epoch": 0.3, + "learning_rate": 4.914209115281502e-05, + "loss": 0.5467, + "step": 114 + }, + { + "epoch": 0.3, + "learning_rate": 4.912868632707775e-05, + "loss": 0.5631, + "step": 115 + }, + { + "epoch": 0.31, + "learning_rate": 4.911528150134049e-05, + "loss": 0.5512, + "step": 116 + }, + { + "epoch": 0.31, + "learning_rate": 4.910187667560322e-05, + "loss": 0.5546, + "step": 117 + }, + { + "epoch": 0.31, + "learning_rate": 4.908847184986595e-05, + "loss": 0.4209, + "step": 118 + }, + { + "epoch": 0.31, + "learning_rate": 4.907506702412869e-05, + "loss": 0.6064, + "step": 119 + }, + { + "epoch": 0.32, + "learning_rate": 4.906166219839142e-05, + "loss": 0.5301, + "step": 120 + }, + { + "epoch": 0.32, + "learning_rate": 4.904825737265416e-05, + "loss": 0.436, + "step": 121 + }, + { + "epoch": 0.32, + "learning_rate": 4.903485254691689e-05, + "loss": 0.726, + "step": 122 + }, + { + "epoch": 0.33, + "learning_rate": 4.9021447721179626e-05, + "loss": 0.5288, + "step": 123 + }, + { + "epoch": 0.33, + "learning_rate": 4.900804289544236e-05, + "loss": 0.4887, + "step": 124 + }, + { + "epoch": 0.33, + "learning_rate": 4.8994638069705097e-05, + "loss": 0.3752, + "step": 125 + }, + { + "epoch": 0.33, + "learning_rate": 4.898123324396783e-05, + "loss": 0.4895, + "step": 126 + }, + { + "epoch": 0.34, + "learning_rate": 4.896782841823057e-05, + "loss": 0.5046, + "step": 127 + }, + { + "epoch": 0.34, + "learning_rate": 4.8954423592493295e-05, + "loss": 0.3953, + "step": 128 + }, + { + "epoch": 0.34, + "learning_rate": 4.894101876675604e-05, + "loss": 0.2015, + "step": 129 + }, + { + "epoch": 0.34, + "learning_rate": 4.8927613941018765e-05, + "loss": 0.5165, + "step": 130 + }, + { + "epoch": 0.35, + "learning_rate": 4.891420911528151e-05, + "loss": 0.4237, + "step": 131 + }, + { + "epoch": 0.35, + "learning_rate": 4.8900804289544236e-05, + "loss": 0.239, + "step": 132 + }, + { + "epoch": 0.35, + "learning_rate": 4.888739946380697e-05, + "loss": 0.5515, + "step": 133 + }, + { + "epoch": 0.35, + "learning_rate": 4.8873994638069706e-05, + "loss": 0.303, + "step": 134 + }, + { + "epoch": 0.36, + "learning_rate": 4.886058981233244e-05, + "loss": 0.2867, + "step": 135 + }, + { + "epoch": 0.36, + "learning_rate": 4.8847184986595176e-05, + "loss": 0.6756, + "step": 136 + }, + { + "epoch": 0.36, + "learning_rate": 4.883378016085791e-05, + "loss": 0.4996, + "step": 137 + }, + { + "epoch": 0.37, + "learning_rate": 4.8820375335120646e-05, + "loss": 0.2798, + "step": 138 + }, + { + "epoch": 0.37, + "learning_rate": 4.880697050938338e-05, + "loss": 0.8877, + "step": 139 + }, + { + "epoch": 0.37, + "learning_rate": 4.879356568364612e-05, + "loss": 0.5022, + "step": 140 + }, + { + "epoch": 0.37, + "learning_rate": 4.878016085790885e-05, + "loss": 0.2177, + "step": 141 + }, + { + "epoch": 0.38, + "learning_rate": 4.876675603217159e-05, + "loss": 0.8708, + "step": 142 + }, + { + "epoch": 0.38, + "learning_rate": 4.8753351206434315e-05, + "loss": 0.719, + "step": 143 + }, + { + "epoch": 0.38, + "learning_rate": 4.873994638069706e-05, + "loss": 0.562, + "step": 144 + }, + { + "epoch": 0.38, + "learning_rate": 4.8726541554959786e-05, + "loss": 0.492, + "step": 145 + }, + { + "epoch": 0.39, + "learning_rate": 4.871313672922253e-05, + "loss": 0.4637, + "step": 146 + }, + { + "epoch": 0.39, + "learning_rate": 4.8699731903485256e-05, + "loss": 0.4132, + "step": 147 + }, + { + "epoch": 0.39, + "learning_rate": 4.868632707774799e-05, + "loss": 0.2889, + "step": 148 + }, + { + "epoch": 0.39, + "learning_rate": 4.8672922252010726e-05, + "loss": 0.2213, + "step": 149 + }, + { + "epoch": 0.4, + "learning_rate": 4.865951742627346e-05, + "loss": 0.9268, + "step": 150 + }, + { + "epoch": 0.4, + "learning_rate": 4.8646112600536196e-05, + "loss": 0.2852, + "step": 151 + }, + { + "epoch": 0.4, + "learning_rate": 4.863270777479893e-05, + "loss": 0.4599, + "step": 152 + }, + { + "epoch": 0.4, + "learning_rate": 4.861930294906166e-05, + "loss": 0.1913, + "step": 153 + }, + { + "epoch": 0.41, + "learning_rate": 4.86058981233244e-05, + "loss": 0.4488, + "step": 154 + }, + { + "epoch": 0.41, + "learning_rate": 4.859249329758713e-05, + "loss": 0.9022, + "step": 155 + }, + { + "epoch": 0.41, + "learning_rate": 4.857908847184987e-05, + "loss": 0.5221, + "step": 156 + }, + { + "epoch": 0.42, + "learning_rate": 4.85656836461126e-05, + "loss": 0.2394, + "step": 157 + }, + { + "epoch": 0.42, + "learning_rate": 4.8552278820375336e-05, + "loss": 0.3332, + "step": 158 + }, + { + "epoch": 0.42, + "learning_rate": 4.853887399463807e-05, + "loss": 0.4015, + "step": 159 + }, + { + "epoch": 0.42, + "learning_rate": 4.8525469168900806e-05, + "loss": 0.4461, + "step": 160 + }, + { + "epoch": 0.43, + "learning_rate": 4.851206434316354e-05, + "loss": 0.337, + "step": 161 + }, + { + "epoch": 0.43, + "learning_rate": 4.8498659517426276e-05, + "loss": 0.4908, + "step": 162 + }, + { + "epoch": 0.43, + "learning_rate": 4.848525469168901e-05, + "loss": 0.526, + "step": 163 + }, + { + "epoch": 0.43, + "learning_rate": 4.8471849865951746e-05, + "loss": 0.5262, + "step": 164 + }, + { + "epoch": 0.44, + "learning_rate": 4.845844504021448e-05, + "loss": 0.6818, + "step": 165 + }, + { + "epoch": 0.44, + "learning_rate": 4.8445040214477217e-05, + "loss": 0.3154, + "step": 166 + }, + { + "epoch": 0.44, + "learning_rate": 4.843163538873995e-05, + "loss": 0.5963, + "step": 167 + }, + { + "epoch": 0.44, + "learning_rate": 4.841823056300268e-05, + "loss": 0.4451, + "step": 168 + }, + { + "epoch": 0.45, + "learning_rate": 4.840482573726542e-05, + "loss": 0.5969, + "step": 169 + }, + { + "epoch": 0.45, + "learning_rate": 4.839142091152815e-05, + "loss": 0.438, + "step": 170 + }, + { + "epoch": 0.45, + "learning_rate": 4.837801608579089e-05, + "loss": 0.4827, + "step": 171 + }, + { + "epoch": 0.46, + "learning_rate": 4.836461126005362e-05, + "loss": 0.2029, + "step": 172 + }, + { + "epoch": 0.46, + "learning_rate": 4.8351206434316356e-05, + "loss": 0.5195, + "step": 173 + }, + { + "epoch": 0.46, + "learning_rate": 4.833780160857909e-05, + "loss": 0.517, + "step": 174 + }, + { + "epoch": 0.46, + "learning_rate": 4.8324396782841826e-05, + "loss": 0.5532, + "step": 175 + }, + { + "epoch": 0.47, + "learning_rate": 4.831099195710456e-05, + "loss": 0.4198, + "step": 176 + }, + { + "epoch": 0.47, + "learning_rate": 4.8297587131367296e-05, + "loss": 0.8386, + "step": 177 + }, + { + "epoch": 0.47, + "learning_rate": 4.8284182305630025e-05, + "loss": 0.575, + "step": 178 + }, + { + "epoch": 0.47, + "learning_rate": 4.8270777479892766e-05, + "loss": 0.6156, + "step": 179 + }, + { + "epoch": 0.48, + "learning_rate": 4.8257372654155495e-05, + "loss": 0.7044, + "step": 180 + }, + { + "epoch": 0.48, + "learning_rate": 4.824396782841824e-05, + "loss": 0.5712, + "step": 181 + }, + { + "epoch": 0.48, + "learning_rate": 4.8230563002680965e-05, + "loss": 0.34, + "step": 182 + }, + { + "epoch": 0.48, + "learning_rate": 4.82171581769437e-05, + "loss": 0.5773, + "step": 183 + }, + { + "epoch": 0.49, + "learning_rate": 4.8203753351206435e-05, + "loss": 0.546, + "step": 184 + }, + { + "epoch": 0.49, + "learning_rate": 4.819034852546917e-05, + "loss": 0.3955, + "step": 185 + }, + { + "epoch": 0.49, + "learning_rate": 4.8176943699731906e-05, + "loss": 0.5921, + "step": 186 + }, + { + "epoch": 0.49, + "learning_rate": 4.816353887399464e-05, + "loss": 0.3108, + "step": 187 + }, + { + "epoch": 0.5, + "learning_rate": 4.8150134048257376e-05, + "loss": 0.5469, + "step": 188 + }, + { + "epoch": 0.5, + "learning_rate": 4.813672922252011e-05, + "loss": 0.64, + "step": 189 + }, + { + "epoch": 0.5, + "learning_rate": 4.8123324396782846e-05, + "loss": 0.5153, + "step": 190 + }, + { + "epoch": 0.51, + "learning_rate": 4.810991957104558e-05, + "loss": 0.4719, + "step": 191 + }, + { + "epoch": 0.51, + "learning_rate": 4.8096514745308316e-05, + "loss": 0.52, + "step": 192 + }, + { + "epoch": 0.51, + "learning_rate": 4.8083109919571045e-05, + "loss": 0.5114, + "step": 193 + }, + { + "epoch": 0.51, + "learning_rate": 4.806970509383379e-05, + "loss": 0.5469, + "step": 194 + }, + { + "epoch": 0.52, + "learning_rate": 4.8056300268096515e-05, + "loss": 0.3435, + "step": 195 + }, + { + "epoch": 0.52, + "learning_rate": 4.804289544235926e-05, + "loss": 0.6469, + "step": 196 + }, + { + "epoch": 0.52, + "learning_rate": 4.8029490616621985e-05, + "loss": 0.6595, + "step": 197 + }, + { + "epoch": 0.52, + "learning_rate": 4.801608579088472e-05, + "loss": 0.5503, + "step": 198 + }, + { + "epoch": 0.53, + "learning_rate": 4.8002680965147456e-05, + "loss": 0.3799, + "step": 199 + }, + { + "epoch": 0.53, + "learning_rate": 4.798927613941019e-05, + "loss": 0.417, + "step": 200 + }, + { + "epoch": 0.53, + "learning_rate": 4.7975871313672926e-05, + "loss": 0.5281, + "step": 201 + }, + { + "epoch": 0.53, + "learning_rate": 4.796246648793566e-05, + "loss": 0.3439, + "step": 202 + }, + { + "epoch": 0.54, + "learning_rate": 4.794906166219839e-05, + "loss": 0.5777, + "step": 203 + }, + { + "epoch": 0.54, + "learning_rate": 4.793565683646113e-05, + "loss": 0.5286, + "step": 204 + }, + { + "epoch": 0.54, + "learning_rate": 4.792225201072386e-05, + "loss": 0.4302, + "step": 205 + }, + { + "epoch": 0.54, + "learning_rate": 4.79088471849866e-05, + "loss": 0.5413, + "step": 206 + }, + { + "epoch": 0.55, + "learning_rate": 4.789544235924933e-05, + "loss": 0.3087, + "step": 207 + }, + { + "epoch": 0.55, + "learning_rate": 4.7882037533512065e-05, + "loss": 0.4385, + "step": 208 + }, + { + "epoch": 0.55, + "learning_rate": 4.78686327077748e-05, + "loss": 0.7137, + "step": 209 + }, + { + "epoch": 0.56, + "learning_rate": 4.7855227882037535e-05, + "loss": 0.6278, + "step": 210 + }, + { + "epoch": 0.56, + "learning_rate": 4.784182305630027e-05, + "loss": 0.2832, + "step": 211 + }, + { + "epoch": 0.56, + "learning_rate": 4.7828418230563005e-05, + "loss": 0.5899, + "step": 212 + }, + { + "epoch": 0.56, + "learning_rate": 4.7815013404825734e-05, + "loss": 0.4421, + "step": 213 + }, + { + "epoch": 0.57, + "learning_rate": 4.7801608579088476e-05, + "loss": 0.3411, + "step": 214 + }, + { + "epoch": 0.57, + "learning_rate": 4.7788203753351204e-05, + "loss": 0.4236, + "step": 215 + }, + { + "epoch": 0.57, + "learning_rate": 4.7774798927613946e-05, + "loss": 0.4444, + "step": 216 + }, + { + "epoch": 0.57, + "learning_rate": 4.7761394101876674e-05, + "loss": 0.4765, + "step": 217 + }, + { + "epoch": 0.58, + "learning_rate": 4.774798927613941e-05, + "loss": 0.2453, + "step": 218 + }, + { + "epoch": 0.58, + "learning_rate": 4.7734584450402145e-05, + "loss": 0.2765, + "step": 219 + }, + { + "epoch": 0.58, + "learning_rate": 4.772117962466488e-05, + "loss": 0.2075, + "step": 220 + }, + { + "epoch": 0.58, + "learning_rate": 4.7707774798927615e-05, + "loss": 0.5905, + "step": 221 + }, + { + "epoch": 0.59, + "learning_rate": 4.769436997319035e-05, + "loss": 0.3457, + "step": 222 + }, + { + "epoch": 0.59, + "learning_rate": 4.7680965147453085e-05, + "loss": 0.5986, + "step": 223 + }, + { + "epoch": 0.59, + "learning_rate": 4.766756032171582e-05, + "loss": 0.3881, + "step": 224 + }, + { + "epoch": 0.6, + "learning_rate": 4.7654155495978555e-05, + "loss": 0.5655, + "step": 225 + }, + { + "epoch": 0.6, + "learning_rate": 4.764075067024129e-05, + "loss": 0.6156, + "step": 226 + }, + { + "epoch": 0.6, + "learning_rate": 4.7627345844504026e-05, + "loss": 0.4784, + "step": 227 + }, + { + "epoch": 0.6, + "learning_rate": 4.7613941018766754e-05, + "loss": 0.5195, + "step": 228 + }, + { + "epoch": 0.61, + "learning_rate": 4.7600536193029496e-05, + "loss": 0.66, + "step": 229 + }, + { + "epoch": 0.61, + "learning_rate": 4.7587131367292224e-05, + "loss": 0.4104, + "step": 230 + }, + { + "epoch": 0.61, + "learning_rate": 4.7573726541554966e-05, + "loss": 0.3478, + "step": 231 + }, + { + "epoch": 0.61, + "learning_rate": 4.7560321715817695e-05, + "loss": 0.2037, + "step": 232 + }, + { + "epoch": 0.62, + "learning_rate": 4.754691689008043e-05, + "loss": 0.75, + "step": 233 + }, + { + "epoch": 0.62, + "learning_rate": 4.7533512064343165e-05, + "loss": 0.4237, + "step": 234 + }, + { + "epoch": 0.62, + "learning_rate": 4.75201072386059e-05, + "loss": 0.2372, + "step": 235 + }, + { + "epoch": 0.62, + "learning_rate": 4.7506702412868635e-05, + "loss": 0.7874, + "step": 236 + }, + { + "epoch": 0.63, + "learning_rate": 4.749329758713137e-05, + "loss": 0.5751, + "step": 237 + }, + { + "epoch": 0.63, + "learning_rate": 4.7479892761394105e-05, + "loss": 0.5801, + "step": 238 + }, + { + "epoch": 0.63, + "learning_rate": 4.746648793565684e-05, + "loss": 0.4983, + "step": 239 + }, + { + "epoch": 0.63, + "learning_rate": 4.745308310991957e-05, + "loss": 0.4215, + "step": 240 + }, + { + "epoch": 0.64, + "learning_rate": 4.743967828418231e-05, + "loss": 0.3655, + "step": 241 + }, + { + "epoch": 0.64, + "learning_rate": 4.742627345844504e-05, + "loss": 0.523, + "step": 242 + }, + { + "epoch": 0.64, + "learning_rate": 4.741286863270778e-05, + "loss": 0.5952, + "step": 243 + }, + { + "epoch": 0.65, + "learning_rate": 4.739946380697051e-05, + "loss": 0.4226, + "step": 244 + }, + { + "epoch": 0.65, + "learning_rate": 4.7386058981233244e-05, + "loss": 0.316, + "step": 245 + }, + { + "epoch": 0.65, + "learning_rate": 4.737265415549598e-05, + "loss": 0.5096, + "step": 246 + }, + { + "epoch": 0.65, + "learning_rate": 4.7359249329758715e-05, + "loss": 0.435, + "step": 247 + }, + { + "epoch": 0.66, + "learning_rate": 4.734584450402145e-05, + "loss": 0.7516, + "step": 248 + }, + { + "epoch": 0.66, + "learning_rate": 4.7332439678284185e-05, + "loss": 0.531, + "step": 249 + }, + { + "epoch": 0.66, + "learning_rate": 4.731903485254692e-05, + "loss": 0.4096, + "step": 250 + }, + { + "epoch": 0.66, + "learning_rate": 4.7305630026809655e-05, + "loss": 0.387, + "step": 251 + }, + { + "epoch": 0.67, + "learning_rate": 4.729222520107239e-05, + "loss": 0.5468, + "step": 252 + }, + { + "epoch": 0.67, + "learning_rate": 4.7278820375335125e-05, + "loss": 0.4613, + "step": 253 + }, + { + "epoch": 0.67, + "learning_rate": 4.726541554959786e-05, + "loss": 0.8437, + "step": 254 + }, + { + "epoch": 0.67, + "learning_rate": 4.725201072386059e-05, + "loss": 0.522, + "step": 255 + }, + { + "epoch": 0.68, + "learning_rate": 4.723860589812333e-05, + "loss": 0.3922, + "step": 256 + }, + { + "epoch": 0.68, + "learning_rate": 4.722520107238606e-05, + "loss": 0.5114, + "step": 257 + }, + { + "epoch": 0.68, + "learning_rate": 4.72117962466488e-05, + "loss": 0.6148, + "step": 258 + }, + { + "epoch": 0.69, + "learning_rate": 4.719839142091153e-05, + "loss": 0.4578, + "step": 259 + }, + { + "epoch": 0.69, + "learning_rate": 4.7184986595174265e-05, + "loss": 0.6286, + "step": 260 + }, + { + "epoch": 0.69, + "learning_rate": 4.7171581769437e-05, + "loss": 0.5883, + "step": 261 + }, + { + "epoch": 0.69, + "learning_rate": 4.7158176943699735e-05, + "loss": 0.5634, + "step": 262 + }, + { + "epoch": 0.7, + "learning_rate": 4.714477211796247e-05, + "loss": 0.4085, + "step": 263 + }, + { + "epoch": 0.7, + "learning_rate": 4.7131367292225205e-05, + "loss": 0.2988, + "step": 264 + }, + { + "epoch": 0.7, + "learning_rate": 4.7117962466487934e-05, + "loss": 0.6353, + "step": 265 + }, + { + "epoch": 0.7, + "learning_rate": 4.7104557640750675e-05, + "loss": 0.4598, + "step": 266 + }, + { + "epoch": 0.71, + "learning_rate": 4.7091152815013404e-05, + "loss": 0.5072, + "step": 267 + }, + { + "epoch": 0.71, + "learning_rate": 4.7077747989276146e-05, + "loss": 0.49, + "step": 268 + }, + { + "epoch": 0.71, + "learning_rate": 4.7064343163538874e-05, + "loss": 0.7225, + "step": 269 + }, + { + "epoch": 0.71, + "learning_rate": 4.705093833780161e-05, + "loss": 0.5332, + "step": 270 + }, + { + "epoch": 0.72, + "learning_rate": 4.7037533512064344e-05, + "loss": 0.6064, + "step": 271 + }, + { + "epoch": 0.72, + "learning_rate": 4.702412868632708e-05, + "loss": 0.3518, + "step": 272 + }, + { + "epoch": 0.72, + "learning_rate": 4.7010723860589815e-05, + "loss": 0.3673, + "step": 273 + }, + { + "epoch": 0.72, + "learning_rate": 4.699731903485255e-05, + "loss": 0.4688, + "step": 274 + }, + { + "epoch": 0.73, + "learning_rate": 4.6983914209115285e-05, + "loss": 0.5389, + "step": 275 + }, + { + "epoch": 0.73, + "learning_rate": 4.697050938337802e-05, + "loss": 0.374, + "step": 276 + }, + { + "epoch": 0.73, + "learning_rate": 4.6957104557640755e-05, + "loss": 0.475, + "step": 277 + }, + { + "epoch": 0.74, + "learning_rate": 4.694369973190349e-05, + "loss": 0.5397, + "step": 278 + }, + { + "epoch": 0.74, + "learning_rate": 4.6930294906166225e-05, + "loss": 0.3821, + "step": 279 + }, + { + "epoch": 0.74, + "learning_rate": 4.6916890080428954e-05, + "loss": 0.3372, + "step": 280 + }, + { + "epoch": 0.74, + "learning_rate": 4.6903485254691696e-05, + "loss": 0.6652, + "step": 281 + }, + { + "epoch": 0.75, + "learning_rate": 4.6890080428954424e-05, + "loss": 0.2894, + "step": 282 + }, + { + "epoch": 0.75, + "learning_rate": 4.6876675603217166e-05, + "loss": 0.5639, + "step": 283 + }, + { + "epoch": 0.75, + "learning_rate": 4.6863270777479894e-05, + "loss": 0.353, + "step": 284 + }, + { + "epoch": 0.75, + "learning_rate": 4.684986595174263e-05, + "loss": 0.2932, + "step": 285 + }, + { + "epoch": 0.76, + "learning_rate": 4.6836461126005364e-05, + "loss": 0.467, + "step": 286 + }, + { + "epoch": 0.76, + "learning_rate": 4.68230563002681e-05, + "loss": 0.4732, + "step": 287 + }, + { + "epoch": 0.76, + "learning_rate": 4.6809651474530835e-05, + "loss": 0.1808, + "step": 288 + }, + { + "epoch": 0.76, + "learning_rate": 4.679624664879357e-05, + "loss": 0.6031, + "step": 289 + }, + { + "epoch": 0.77, + "learning_rate": 4.67828418230563e-05, + "loss": 0.2555, + "step": 290 + }, + { + "epoch": 0.77, + "learning_rate": 4.676943699731904e-05, + "loss": 0.4041, + "step": 291 + }, + { + "epoch": 0.77, + "learning_rate": 4.675603217158177e-05, + "loss": 0.7822, + "step": 292 + }, + { + "epoch": 0.78, + "learning_rate": 4.674262734584451e-05, + "loss": 0.138, + "step": 293 + }, + { + "epoch": 0.78, + "learning_rate": 4.672922252010724e-05, + "loss": 0.2746, + "step": 294 + }, + { + "epoch": 0.78, + "learning_rate": 4.6715817694369974e-05, + "loss": 0.5835, + "step": 295 + }, + { + "epoch": 0.78, + "learning_rate": 4.670241286863271e-05, + "loss": 0.2367, + "step": 296 + }, + { + "epoch": 0.79, + "learning_rate": 4.6689008042895444e-05, + "loss": 0.3247, + "step": 297 + }, + { + "epoch": 0.79, + "learning_rate": 4.667560321715818e-05, + "loss": 0.306, + "step": 298 + }, + { + "epoch": 0.79, + "learning_rate": 4.6662198391420914e-05, + "loss": 0.2825, + "step": 299 + }, + { + "epoch": 0.79, + "learning_rate": 4.664879356568364e-05, + "loss": 0.6102, + "step": 300 + }, + { + "epoch": 0.8, + "learning_rate": 4.6635388739946385e-05, + "loss": 0.3613, + "step": 301 + }, + { + "epoch": 0.8, + "learning_rate": 4.662198391420911e-05, + "loss": 0.6327, + "step": 302 + }, + { + "epoch": 0.8, + "learning_rate": 4.6608579088471855e-05, + "loss": 0.4059, + "step": 303 + }, + { + "epoch": 0.8, + "learning_rate": 4.659517426273458e-05, + "loss": 0.4027, + "step": 304 + }, + { + "epoch": 0.81, + "learning_rate": 4.658176943699732e-05, + "loss": 0.9133, + "step": 305 + }, + { + "epoch": 0.81, + "learning_rate": 4.6568364611260054e-05, + "loss": 0.1869, + "step": 306 + }, + { + "epoch": 0.81, + "learning_rate": 4.655495978552279e-05, + "loss": 0.3987, + "step": 307 + }, + { + "epoch": 0.81, + "learning_rate": 4.6541554959785524e-05, + "loss": 0.6114, + "step": 308 + }, + { + "epoch": 0.82, + "learning_rate": 4.652815013404826e-05, + "loss": 0.4406, + "step": 309 + }, + { + "epoch": 0.82, + "learning_rate": 4.6514745308310994e-05, + "loss": 0.2954, + "step": 310 + }, + { + "epoch": 0.82, + "learning_rate": 4.650134048257373e-05, + "loss": 0.5067, + "step": 311 + }, + { + "epoch": 0.83, + "learning_rate": 4.6487935656836464e-05, + "loss": 0.3985, + "step": 312 + }, + { + "epoch": 0.83, + "learning_rate": 4.64745308310992e-05, + "loss": 0.3756, + "step": 313 + }, + { + "epoch": 0.83, + "learning_rate": 4.6461126005361935e-05, + "loss": 0.2618, + "step": 314 + }, + { + "epoch": 0.83, + "learning_rate": 4.644772117962466e-05, + "loss": 0.4992, + "step": 315 + }, + { + "epoch": 0.84, + "learning_rate": 4.6434316353887405e-05, + "loss": 0.8224, + "step": 316 + }, + { + "epoch": 0.84, + "learning_rate": 4.642091152815013e-05, + "loss": 0.3425, + "step": 317 + }, + { + "epoch": 0.84, + "learning_rate": 4.6407506702412875e-05, + "loss": 0.4062, + "step": 318 + }, + { + "epoch": 0.84, + "learning_rate": 4.6394101876675603e-05, + "loss": 0.4748, + "step": 319 + }, + { + "epoch": 0.85, + "learning_rate": 4.638069705093834e-05, + "loss": 0.6857, + "step": 320 + }, + { + "epoch": 0.85, + "learning_rate": 4.6367292225201074e-05, + "loss": 0.5368, + "step": 321 + }, + { + "epoch": 0.85, + "learning_rate": 4.635388739946381e-05, + "loss": 0.5571, + "step": 322 + }, + { + "epoch": 0.85, + "learning_rate": 4.6340482573726544e-05, + "loss": 0.3045, + "step": 323 + }, + { + "epoch": 0.86, + "learning_rate": 4.632707774798928e-05, + "loss": 0.3189, + "step": 324 + }, + { + "epoch": 0.86, + "learning_rate": 4.631367292225201e-05, + "loss": 0.3033, + "step": 325 + }, + { + "epoch": 0.86, + "learning_rate": 4.630026809651475e-05, + "loss": 0.4659, + "step": 326 + }, + { + "epoch": 0.87, + "learning_rate": 4.628686327077748e-05, + "loss": 0.4058, + "step": 327 + }, + { + "epoch": 0.87, + "learning_rate": 4.627345844504022e-05, + "loss": 0.6745, + "step": 328 + }, + { + "epoch": 0.87, + "learning_rate": 4.626005361930295e-05, + "loss": 0.3259, + "step": 329 + }, + { + "epoch": 0.87, + "learning_rate": 4.624664879356568e-05, + "loss": 0.5126, + "step": 330 + }, + { + "epoch": 0.88, + "learning_rate": 4.623324396782842e-05, + "loss": 0.2759, + "step": 331 + }, + { + "epoch": 0.88, + "learning_rate": 4.621983914209115e-05, + "loss": 0.2512, + "step": 332 + }, + { + "epoch": 0.88, + "learning_rate": 4.620643431635389e-05, + "loss": 0.3046, + "step": 333 + }, + { + "epoch": 0.88, + "learning_rate": 4.6193029490616624e-05, + "loss": 0.3931, + "step": 334 + }, + { + "epoch": 0.89, + "learning_rate": 4.617962466487936e-05, + "loss": 0.4838, + "step": 335 + }, + { + "epoch": 0.89, + "learning_rate": 4.6166219839142094e-05, + "loss": 0.2925, + "step": 336 + }, + { + "epoch": 0.89, + "learning_rate": 4.615281501340483e-05, + "loss": 0.4481, + "step": 337 + }, + { + "epoch": 0.89, + "learning_rate": 4.6139410187667564e-05, + "loss": 0.4528, + "step": 338 + }, + { + "epoch": 0.9, + "learning_rate": 4.61260053619303e-05, + "loss": 0.2934, + "step": 339 + }, + { + "epoch": 0.9, + "learning_rate": 4.611260053619303e-05, + "loss": 0.609, + "step": 340 + }, + { + "epoch": 0.9, + "learning_rate": 4.609919571045577e-05, + "loss": 0.8988, + "step": 341 + }, + { + "epoch": 0.9, + "learning_rate": 4.60857908847185e-05, + "loss": 1.1222, + "step": 342 + }, + { + "epoch": 0.91, + "learning_rate": 4.607238605898124e-05, + "loss": 0.3265, + "step": 343 + }, + { + "epoch": 0.91, + "learning_rate": 4.605898123324397e-05, + "loss": 0.4722, + "step": 344 + }, + { + "epoch": 0.91, + "learning_rate": 4.60455764075067e-05, + "loss": 0.2791, + "step": 345 + }, + { + "epoch": 0.92, + "learning_rate": 4.603217158176944e-05, + "loss": 0.4183, + "step": 346 + }, + { + "epoch": 0.92, + "learning_rate": 4.6018766756032174e-05, + "loss": 0.5323, + "step": 347 + }, + { + "epoch": 0.92, + "learning_rate": 4.600536193029491e-05, + "loss": 0.6108, + "step": 348 + }, + { + "epoch": 0.92, + "learning_rate": 4.5991957104557644e-05, + "loss": 0.2875, + "step": 349 + }, + { + "epoch": 0.93, + "learning_rate": 4.597855227882037e-05, + "loss": 0.6642, + "step": 350 + }, + { + "epoch": 0.93, + "learning_rate": 4.5965147453083114e-05, + "loss": 0.5244, + "step": 351 + }, + { + "epoch": 0.93, + "learning_rate": 4.595174262734584e-05, + "loss": 0.5562, + "step": 352 + }, + { + "epoch": 0.93, + "learning_rate": 4.5938337801608584e-05, + "loss": 0.3972, + "step": 353 + }, + { + "epoch": 0.94, + "learning_rate": 4.592493297587131e-05, + "loss": 0.4532, + "step": 354 + }, + { + "epoch": 0.94, + "learning_rate": 4.591152815013405e-05, + "loss": 0.3368, + "step": 355 + }, + { + "epoch": 0.94, + "learning_rate": 4.589812332439678e-05, + "loss": 0.171, + "step": 356 + }, + { + "epoch": 0.94, + "learning_rate": 4.588471849865952e-05, + "loss": 0.4036, + "step": 357 + }, + { + "epoch": 0.95, + "learning_rate": 4.587131367292225e-05, + "loss": 0.4305, + "step": 358 + }, + { + "epoch": 0.95, + "learning_rate": 4.585790884718499e-05, + "loss": 0.2643, + "step": 359 + }, + { + "epoch": 0.95, + "learning_rate": 4.5844504021447723e-05, + "loss": 0.3782, + "step": 360 + }, + { + "epoch": 0.96, + "learning_rate": 4.583109919571046e-05, + "loss": 0.1673, + "step": 361 + }, + { + "epoch": 0.96, + "learning_rate": 4.5817694369973194e-05, + "loss": 0.3969, + "step": 362 + }, + { + "epoch": 0.96, + "learning_rate": 4.580428954423593e-05, + "loss": 0.3249, + "step": 363 + }, + { + "epoch": 0.96, + "learning_rate": 4.5790884718498664e-05, + "loss": 0.1656, + "step": 364 + }, + { + "epoch": 0.97, + "learning_rate": 4.57774798927614e-05, + "loss": 0.4551, + "step": 365 + }, + { + "epoch": 0.97, + "learning_rate": 4.5764075067024134e-05, + "loss": 0.6075, + "step": 366 + }, + { + "epoch": 0.97, + "learning_rate": 4.575067024128686e-05, + "loss": 0.4699, + "step": 367 + }, + { + "epoch": 0.97, + "learning_rate": 4.5737265415549605e-05, + "loss": 0.5752, + "step": 368 + }, + { + "epoch": 0.98, + "learning_rate": 4.572386058981233e-05, + "loss": 0.3114, + "step": 369 + }, + { + "epoch": 0.98, + "learning_rate": 4.5710455764075075e-05, + "loss": 0.7407, + "step": 370 + }, + { + "epoch": 0.98, + "learning_rate": 4.56970509383378e-05, + "loss": 0.6427, + "step": 371 + }, + { + "epoch": 0.98, + "learning_rate": 4.568364611260054e-05, + "loss": 0.5021, + "step": 372 + }, + { + "epoch": 0.99, + "learning_rate": 4.5670241286863273e-05, + "loss": 0.4209, + "step": 373 + }, + { + "epoch": 0.99, + "learning_rate": 4.565683646112601e-05, + "loss": 0.5957, + "step": 374 + }, + { + "epoch": 0.99, + "learning_rate": 4.5643431635388744e-05, + "loss": 0.495, + "step": 375 + }, + { + "epoch": 0.99, + "learning_rate": 4.563002680965148e-05, + "loss": 0.3101, + "step": 376 + }, + { + "epoch": 1.0, + "learning_rate": 4.561662198391421e-05, + "loss": 0.472, + "step": 377 + }, + { + "epoch": 1.0, + "learning_rate": 4.560321715817695e-05, + "loss": 0.4607, + "step": 378 + }, + { + "epoch": 1.0, + "eval_f1": 0.7914963205233032, + "eval_loss": 0.40209120512008667, + "eval_runtime": 2.1585, + "eval_samples_per_second": 700.949, + "eval_steps_per_second": 44.012, + "step": 378 + }, + { + "epoch": 1.0, + "learning_rate": 4.558981233243968e-05, + "loss": 0.2422, + "step": 379 + }, + { + "epoch": 1.01, + "learning_rate": 4.557640750670242e-05, + "loss": 0.2074, + "step": 380 + }, + { + "epoch": 1.01, + "learning_rate": 4.556300268096515e-05, + "loss": 0.4469, + "step": 381 + }, + { + "epoch": 1.01, + "learning_rate": 4.554959785522788e-05, + "loss": 0.3121, + "step": 382 + }, + { + "epoch": 1.01, + "learning_rate": 4.553619302949062e-05, + "loss": 0.3634, + "step": 383 + }, + { + "epoch": 1.02, + "learning_rate": 4.552278820375335e-05, + "loss": 0.2139, + "step": 384 + }, + { + "epoch": 1.02, + "learning_rate": 4.550938337801609e-05, + "loss": 0.5273, + "step": 385 + }, + { + "epoch": 1.02, + "learning_rate": 4.549597855227882e-05, + "loss": 0.1228, + "step": 386 + }, + { + "epoch": 1.02, + "learning_rate": 4.548257372654156e-05, + "loss": 0.2459, + "step": 387 + }, + { + "epoch": 1.03, + "learning_rate": 4.5469168900804294e-05, + "loss": 0.443, + "step": 388 + }, + { + "epoch": 1.03, + "learning_rate": 4.545576407506703e-05, + "loss": 0.364, + "step": 389 + }, + { + "epoch": 1.03, + "learning_rate": 4.5442359249329764e-05, + "loss": 0.4071, + "step": 390 + }, + { + "epoch": 1.03, + "learning_rate": 4.54289544235925e-05, + "loss": 0.3876, + "step": 391 + }, + { + "epoch": 1.04, + "learning_rate": 4.541554959785523e-05, + "loss": 0.5239, + "step": 392 + }, + { + "epoch": 1.04, + "learning_rate": 4.540214477211797e-05, + "loss": 0.4548, + "step": 393 + }, + { + "epoch": 1.04, + "learning_rate": 4.53887399463807e-05, + "loss": 0.3588, + "step": 394 + }, + { + "epoch": 1.04, + "learning_rate": 4.537533512064344e-05, + "loss": 0.2693, + "step": 395 + }, + { + "epoch": 1.05, + "learning_rate": 4.536193029490617e-05, + "loss": 0.3773, + "step": 396 + }, + { + "epoch": 1.05, + "learning_rate": 4.53485254691689e-05, + "loss": 0.3982, + "step": 397 + }, + { + "epoch": 1.05, + "learning_rate": 4.533512064343164e-05, + "loss": 0.2074, + "step": 398 + }, + { + "epoch": 1.06, + "learning_rate": 4.532171581769437e-05, + "loss": 0.3322, + "step": 399 + }, + { + "epoch": 1.06, + "learning_rate": 4.530831099195711e-05, + "loss": 0.118, + "step": 400 + }, + { + "epoch": 1.06, + "learning_rate": 4.5294906166219844e-05, + "loss": 0.6216, + "step": 401 + }, + { + "epoch": 1.06, + "learning_rate": 4.528150134048257e-05, + "loss": 0.4028, + "step": 402 + }, + { + "epoch": 1.07, + "learning_rate": 4.5268096514745314e-05, + "loss": 0.3179, + "step": 403 + }, + { + "epoch": 1.07, + "learning_rate": 4.525469168900804e-05, + "loss": 0.2815, + "step": 404 + }, + { + "epoch": 1.07, + "learning_rate": 4.5241286863270784e-05, + "loss": 0.2366, + "step": 405 + }, + { + "epoch": 1.07, + "learning_rate": 4.522788203753351e-05, + "loss": 0.14, + "step": 406 + }, + { + "epoch": 1.08, + "learning_rate": 4.521447721179625e-05, + "loss": 0.1255, + "step": 407 + }, + { + "epoch": 1.08, + "learning_rate": 4.520107238605898e-05, + "loss": 0.1482, + "step": 408 + }, + { + "epoch": 1.08, + "learning_rate": 4.518766756032172e-05, + "loss": 0.665, + "step": 409 + }, + { + "epoch": 1.08, + "learning_rate": 4.517426273458445e-05, + "loss": 0.2535, + "step": 410 + }, + { + "epoch": 1.09, + "learning_rate": 4.516085790884719e-05, + "loss": 0.1716, + "step": 411 + }, + { + "epoch": 1.09, + "learning_rate": 4.5147453083109916e-05, + "loss": 0.0765, + "step": 412 + }, + { + "epoch": 1.09, + "learning_rate": 4.513404825737266e-05, + "loss": 0.1238, + "step": 413 + }, + { + "epoch": 1.1, + "learning_rate": 4.512064343163539e-05, + "loss": 0.3877, + "step": 414 + }, + { + "epoch": 1.1, + "learning_rate": 4.510723860589813e-05, + "loss": 0.206, + "step": 415 + }, + { + "epoch": 1.1, + "learning_rate": 4.509383378016086e-05, + "loss": 0.1822, + "step": 416 + }, + { + "epoch": 1.1, + "learning_rate": 4.508042895442359e-05, + "loss": 0.2294, + "step": 417 + }, + { + "epoch": 1.11, + "learning_rate": 4.506702412868633e-05, + "loss": 0.7083, + "step": 418 + }, + { + "epoch": 1.11, + "learning_rate": 4.505361930294906e-05, + "loss": 0.325, + "step": 419 + }, + { + "epoch": 1.11, + "learning_rate": 4.50402144772118e-05, + "loss": 0.2378, + "step": 420 + }, + { + "epoch": 1.11, + "learning_rate": 4.502680965147453e-05, + "loss": 0.2494, + "step": 421 + }, + { + "epoch": 1.12, + "learning_rate": 4.501340482573727e-05, + "loss": 0.3812, + "step": 422 + }, + { + "epoch": 1.12, + "learning_rate": 4.5e-05, + "loss": 0.1491, + "step": 423 + }, + { + "epoch": 1.12, + "learning_rate": 4.498659517426274e-05, + "loss": 0.0833, + "step": 424 + }, + { + "epoch": 1.12, + "learning_rate": 4.497319034852547e-05, + "loss": 0.4637, + "step": 425 + }, + { + "epoch": 1.13, + "learning_rate": 4.495978552278821e-05, + "loss": 0.2594, + "step": 426 + }, + { + "epoch": 1.13, + "learning_rate": 4.4946380697050937e-05, + "loss": 0.4136, + "step": 427 + }, + { + "epoch": 1.13, + "learning_rate": 4.493297587131368e-05, + "loss": 0.4962, + "step": 428 + }, + { + "epoch": 1.13, + "learning_rate": 4.491957104557641e-05, + "loss": 0.0842, + "step": 429 + }, + { + "epoch": 1.14, + "learning_rate": 4.490616621983915e-05, + "loss": 0.4761, + "step": 430 + }, + { + "epoch": 1.14, + "learning_rate": 4.489276139410188e-05, + "loss": 0.4351, + "step": 431 + }, + { + "epoch": 1.14, + "learning_rate": 4.487935656836461e-05, + "loss": 0.4226, + "step": 432 + }, + { + "epoch": 1.15, + "learning_rate": 4.486595174262735e-05, + "loss": 0.5714, + "step": 433 + }, + { + "epoch": 1.15, + "learning_rate": 4.485254691689008e-05, + "loss": 0.2034, + "step": 434 + }, + { + "epoch": 1.15, + "learning_rate": 4.483914209115282e-05, + "loss": 0.3354, + "step": 435 + }, + { + "epoch": 1.15, + "learning_rate": 4.482573726541555e-05, + "loss": 0.7503, + "step": 436 + }, + { + "epoch": 1.16, + "learning_rate": 4.481233243967828e-05, + "loss": 0.1542, + "step": 437 + }, + { + "epoch": 1.16, + "learning_rate": 4.479892761394102e-05, + "loss": 0.3067, + "step": 438 + }, + { + "epoch": 1.16, + "learning_rate": 4.478552278820375e-05, + "loss": 0.4134, + "step": 439 + }, + { + "epoch": 1.16, + "learning_rate": 4.477211796246649e-05, + "loss": 0.1458, + "step": 440 + }, + { + "epoch": 1.17, + "learning_rate": 4.475871313672922e-05, + "loss": 0.2814, + "step": 441 + }, + { + "epoch": 1.17, + "learning_rate": 4.474530831099196e-05, + "loss": 0.1751, + "step": 442 + }, + { + "epoch": 1.17, + "learning_rate": 4.473190348525469e-05, + "loss": 0.4144, + "step": 443 + }, + { + "epoch": 1.17, + "learning_rate": 4.471849865951743e-05, + "loss": 0.3275, + "step": 444 + }, + { + "epoch": 1.18, + "learning_rate": 4.470509383378016e-05, + "loss": 0.3083, + "step": 445 + }, + { + "epoch": 1.18, + "learning_rate": 4.46916890080429e-05, + "loss": 0.4185, + "step": 446 + }, + { + "epoch": 1.18, + "learning_rate": 4.467828418230563e-05, + "loss": 0.3181, + "step": 447 + }, + { + "epoch": 1.19, + "learning_rate": 4.466487935656837e-05, + "loss": 0.438, + "step": 448 + }, + { + "epoch": 1.19, + "learning_rate": 4.46514745308311e-05, + "loss": 0.4509, + "step": 449 + }, + { + "epoch": 1.19, + "learning_rate": 4.463806970509384e-05, + "loss": 0.4597, + "step": 450 + }, + { + "epoch": 1.19, + "learning_rate": 4.462466487935657e-05, + "loss": 0.3123, + "step": 451 + }, + { + "epoch": 1.2, + "learning_rate": 4.46112600536193e-05, + "loss": 0.376, + "step": 452 + }, + { + "epoch": 1.2, + "learning_rate": 4.459785522788204e-05, + "loss": 0.2392, + "step": 453 + }, + { + "epoch": 1.2, + "learning_rate": 4.458445040214477e-05, + "loss": 0.4451, + "step": 454 + }, + { + "epoch": 1.2, + "learning_rate": 4.4571045576407513e-05, + "loss": 0.2436, + "step": 455 + }, + { + "epoch": 1.21, + "learning_rate": 4.455764075067024e-05, + "loss": 0.2944, + "step": 456 + }, + { + "epoch": 1.21, + "learning_rate": 4.454423592493298e-05, + "loss": 0.323, + "step": 457 + }, + { + "epoch": 1.21, + "learning_rate": 4.453083109919571e-05, + "loss": 0.1966, + "step": 458 + }, + { + "epoch": 1.21, + "learning_rate": 4.451742627345845e-05, + "loss": 0.273, + "step": 459 + }, + { + "epoch": 1.22, + "learning_rate": 4.450402144772118e-05, + "loss": 0.3498, + "step": 460 + }, + { + "epoch": 1.22, + "learning_rate": 4.449061662198392e-05, + "loss": 0.458, + "step": 461 + }, + { + "epoch": 1.22, + "learning_rate": 4.4477211796246646e-05, + "loss": 0.1607, + "step": 462 + }, + { + "epoch": 1.22, + "learning_rate": 4.446380697050939e-05, + "loss": 0.5712, + "step": 463 + }, + { + "epoch": 1.23, + "learning_rate": 4.4450402144772116e-05, + "loss": 0.0908, + "step": 464 + }, + { + "epoch": 1.23, + "learning_rate": 4.443699731903486e-05, + "loss": 0.2625, + "step": 465 + }, + { + "epoch": 1.23, + "learning_rate": 4.4423592493297586e-05, + "loss": 0.368, + "step": 466 + }, + { + "epoch": 1.24, + "learning_rate": 4.441018766756032e-05, + "loss": 0.5096, + "step": 467 + }, + { + "epoch": 1.24, + "learning_rate": 4.4396782841823057e-05, + "loss": 0.209, + "step": 468 + }, + { + "epoch": 1.24, + "learning_rate": 4.438337801608579e-05, + "loss": 0.5235, + "step": 469 + }, + { + "epoch": 1.24, + "learning_rate": 4.436997319034853e-05, + "loss": 0.5116, + "step": 470 + }, + { + "epoch": 1.25, + "learning_rate": 4.435656836461126e-05, + "loss": 0.49, + "step": 471 + }, + { + "epoch": 1.25, + "learning_rate": 4.4343163538874e-05, + "loss": 0.4409, + "step": 472 + }, + { + "epoch": 1.25, + "learning_rate": 4.432975871313673e-05, + "loss": 0.1324, + "step": 473 + }, + { + "epoch": 1.25, + "learning_rate": 4.431635388739947e-05, + "loss": 0.1257, + "step": 474 + }, + { + "epoch": 1.26, + "learning_rate": 4.43029490616622e-05, + "loss": 0.3844, + "step": 475 + }, + { + "epoch": 1.26, + "learning_rate": 4.428954423592494e-05, + "loss": 0.2942, + "step": 476 + }, + { + "epoch": 1.26, + "learning_rate": 4.4276139410187666e-05, + "loss": 0.316, + "step": 477 + }, + { + "epoch": 1.26, + "learning_rate": 4.426273458445041e-05, + "loss": 0.3724, + "step": 478 + }, + { + "epoch": 1.27, + "learning_rate": 4.4249329758713136e-05, + "loss": 0.4498, + "step": 479 + }, + { + "epoch": 1.27, + "learning_rate": 4.423592493297588e-05, + "loss": 0.2677, + "step": 480 + }, + { + "epoch": 1.27, + "learning_rate": 4.4222520107238607e-05, + "loss": 0.3331, + "step": 481 + }, + { + "epoch": 1.28, + "learning_rate": 4.420911528150134e-05, + "loss": 0.4375, + "step": 482 + }, + { + "epoch": 1.28, + "learning_rate": 4.419571045576408e-05, + "loss": 0.3097, + "step": 483 + }, + { + "epoch": 1.28, + "learning_rate": 4.418230563002681e-05, + "loss": 0.4438, + "step": 484 + }, + { + "epoch": 1.28, + "learning_rate": 4.416890080428955e-05, + "loss": 0.3648, + "step": 485 + }, + { + "epoch": 1.29, + "learning_rate": 4.415549597855228e-05, + "loss": 0.3023, + "step": 486 + }, + { + "epoch": 1.29, + "learning_rate": 4.414209115281502e-05, + "loss": 0.2963, + "step": 487 + }, + { + "epoch": 1.29, + "learning_rate": 4.412868632707775e-05, + "loss": 0.1749, + "step": 488 + }, + { + "epoch": 1.29, + "learning_rate": 4.411528150134048e-05, + "loss": 0.2006, + "step": 489 + }, + { + "epoch": 1.3, + "learning_rate": 4.410187667560322e-05, + "loss": 0.0923, + "step": 490 + }, + { + "epoch": 1.3, + "learning_rate": 4.408847184986595e-05, + "loss": 0.2665, + "step": 491 + }, + { + "epoch": 1.3, + "learning_rate": 4.407506702412869e-05, + "loss": 0.3378, + "step": 492 + }, + { + "epoch": 1.3, + "learning_rate": 4.406166219839142e-05, + "loss": 0.2543, + "step": 493 + }, + { + "epoch": 1.31, + "learning_rate": 4.4048257372654156e-05, + "loss": 0.2521, + "step": 494 + }, + { + "epoch": 1.31, + "learning_rate": 4.403485254691689e-05, + "loss": 0.1616, + "step": 495 + }, + { + "epoch": 1.31, + "learning_rate": 4.402144772117963e-05, + "loss": 0.1445, + "step": 496 + }, + { + "epoch": 1.31, + "learning_rate": 4.400804289544236e-05, + "loss": 0.5352, + "step": 497 + }, + { + "epoch": 1.32, + "learning_rate": 4.39946380697051e-05, + "loss": 0.0654, + "step": 498 + }, + { + "epoch": 1.32, + "learning_rate": 4.398123324396783e-05, + "loss": 0.4354, + "step": 499 + }, + { + "epoch": 1.32, + "learning_rate": 4.396782841823057e-05, + "loss": 0.3893, + "step": 500 + }, + { + "epoch": 1.33, + "learning_rate": 4.39544235924933e-05, + "loss": 0.2779, + "step": 501 + }, + { + "epoch": 1.33, + "learning_rate": 4.394101876675604e-05, + "loss": 0.3702, + "step": 502 + }, + { + "epoch": 1.33, + "learning_rate": 4.392761394101877e-05, + "loss": 0.3899, + "step": 503 + }, + { + "epoch": 1.33, + "learning_rate": 4.39142091152815e-05, + "loss": 0.3091, + "step": 504 + }, + { + "epoch": 1.34, + "learning_rate": 4.390080428954424e-05, + "loss": 0.2143, + "step": 505 + }, + { + "epoch": 1.34, + "learning_rate": 4.388739946380697e-05, + "loss": 0.2551, + "step": 506 + }, + { + "epoch": 1.34, + "learning_rate": 4.387399463806971e-05, + "loss": 0.2227, + "step": 507 + }, + { + "epoch": 1.34, + "learning_rate": 4.386058981233244e-05, + "loss": 0.1383, + "step": 508 + }, + { + "epoch": 1.35, + "learning_rate": 4.384718498659518e-05, + "loss": 0.3463, + "step": 509 + }, + { + "epoch": 1.35, + "learning_rate": 4.383378016085791e-05, + "loss": 0.1104, + "step": 510 + }, + { + "epoch": 1.35, + "learning_rate": 4.382037533512065e-05, + "loss": 0.2439, + "step": 511 + }, + { + "epoch": 1.35, + "learning_rate": 4.380697050938338e-05, + "loss": 0.0725, + "step": 512 + }, + { + "epoch": 1.36, + "learning_rate": 4.379356568364612e-05, + "loss": 0.4928, + "step": 513 + }, + { + "epoch": 1.36, + "learning_rate": 4.3780160857908846e-05, + "loss": 0.0903, + "step": 514 + }, + { + "epoch": 1.36, + "learning_rate": 4.376675603217159e-05, + "loss": 0.1808, + "step": 515 + }, + { + "epoch": 1.37, + "learning_rate": 4.3753351206434316e-05, + "loss": 0.617, + "step": 516 + }, + { + "epoch": 1.37, + "learning_rate": 4.373994638069706e-05, + "loss": 0.0333, + "step": 517 + }, + { + "epoch": 1.37, + "learning_rate": 4.3726541554959786e-05, + "loss": 0.1113, + "step": 518 + }, + { + "epoch": 1.37, + "learning_rate": 4.371313672922252e-05, + "loss": 0.4604, + "step": 519 + }, + { + "epoch": 1.38, + "learning_rate": 4.3699731903485256e-05, + "loss": 0.4267, + "step": 520 + }, + { + "epoch": 1.38, + "learning_rate": 4.368632707774799e-05, + "loss": 0.2621, + "step": 521 + }, + { + "epoch": 1.38, + "learning_rate": 4.3672922252010727e-05, + "loss": 0.3236, + "step": 522 + }, + { + "epoch": 1.38, + "learning_rate": 4.365951742627346e-05, + "loss": 0.0559, + "step": 523 + }, + { + "epoch": 1.39, + "learning_rate": 4.364611260053619e-05, + "loss": 0.0801, + "step": 524 + }, + { + "epoch": 1.39, + "learning_rate": 4.363270777479893e-05, + "loss": 0.0518, + "step": 525 + }, + { + "epoch": 1.39, + "learning_rate": 4.361930294906166e-05, + "loss": 0.0618, + "step": 526 + }, + { + "epoch": 1.39, + "learning_rate": 4.36058981233244e-05, + "loss": 0.3043, + "step": 527 + }, + { + "epoch": 1.4, + "learning_rate": 4.359249329758713e-05, + "loss": 0.4693, + "step": 528 + }, + { + "epoch": 1.4, + "learning_rate": 4.3579088471849866e-05, + "loss": 0.2978, + "step": 529 + }, + { + "epoch": 1.4, + "learning_rate": 4.35656836461126e-05, + "loss": 0.0817, + "step": 530 + }, + { + "epoch": 1.4, + "learning_rate": 4.3552278820375336e-05, + "loss": 0.0784, + "step": 531 + }, + { + "epoch": 1.41, + "learning_rate": 4.353887399463807e-05, + "loss": 0.5389, + "step": 532 + }, + { + "epoch": 1.41, + "learning_rate": 4.3525469168900806e-05, + "loss": 0.2337, + "step": 533 + }, + { + "epoch": 1.41, + "learning_rate": 4.351206434316354e-05, + "loss": 0.3189, + "step": 534 + }, + { + "epoch": 1.42, + "learning_rate": 4.3498659517426276e-05, + "loss": 0.2503, + "step": 535 + }, + { + "epoch": 1.42, + "learning_rate": 4.348525469168901e-05, + "loss": 0.333, + "step": 536 + }, + { + "epoch": 1.42, + "learning_rate": 4.347184986595175e-05, + "loss": 0.2311, + "step": 537 + }, + { + "epoch": 1.42, + "learning_rate": 4.345844504021448e-05, + "loss": 0.5196, + "step": 538 + }, + { + "epoch": 1.43, + "learning_rate": 4.344504021447721e-05, + "loss": 0.5466, + "step": 539 + }, + { + "epoch": 1.43, + "learning_rate": 4.343163538873995e-05, + "loss": 0.4296, + "step": 540 + }, + { + "epoch": 1.43, + "learning_rate": 4.341823056300268e-05, + "loss": 0.6559, + "step": 541 + }, + { + "epoch": 1.43, + "learning_rate": 4.340482573726542e-05, + "loss": 0.6134, + "step": 542 + }, + { + "epoch": 1.44, + "learning_rate": 4.339142091152815e-05, + "loss": 0.5614, + "step": 543 + }, + { + "epoch": 1.44, + "learning_rate": 4.3378016085790886e-05, + "loss": 0.3143, + "step": 544 + }, + { + "epoch": 1.44, + "learning_rate": 4.336461126005362e-05, + "loss": 0.564, + "step": 545 + }, + { + "epoch": 1.44, + "learning_rate": 4.3351206434316356e-05, + "loss": 0.2141, + "step": 546 + }, + { + "epoch": 1.45, + "learning_rate": 4.333780160857909e-05, + "loss": 0.5886, + "step": 547 + }, + { + "epoch": 1.45, + "learning_rate": 4.3324396782841826e-05, + "loss": 0.2129, + "step": 548 + }, + { + "epoch": 1.45, + "learning_rate": 4.3310991957104555e-05, + "loss": 0.2491, + "step": 549 + }, + { + "epoch": 1.46, + "learning_rate": 4.32975871313673e-05, + "loss": 0.4023, + "step": 550 + }, + { + "epoch": 1.46, + "learning_rate": 4.3284182305630025e-05, + "loss": 0.2159, + "step": 551 + }, + { + "epoch": 1.46, + "learning_rate": 4.327077747989277e-05, + "loss": 0.2975, + "step": 552 + }, + { + "epoch": 1.46, + "learning_rate": 4.3257372654155495e-05, + "loss": 0.165, + "step": 553 + }, + { + "epoch": 1.47, + "learning_rate": 4.324396782841823e-05, + "loss": 0.063, + "step": 554 + }, + { + "epoch": 1.47, + "learning_rate": 4.3230563002680966e-05, + "loss": 0.4209, + "step": 555 + }, + { + "epoch": 1.47, + "learning_rate": 4.32171581769437e-05, + "loss": 0.2704, + "step": 556 + }, + { + "epoch": 1.47, + "learning_rate": 4.3203753351206436e-05, + "loss": 0.6455, + "step": 557 + }, + { + "epoch": 1.48, + "learning_rate": 4.319034852546917e-05, + "loss": 0.3354, + "step": 558 + }, + { + "epoch": 1.48, + "learning_rate": 4.3176943699731906e-05, + "loss": 0.4069, + "step": 559 + }, + { + "epoch": 1.48, + "learning_rate": 4.316353887399464e-05, + "loss": 0.2334, + "step": 560 + }, + { + "epoch": 1.48, + "learning_rate": 4.3150134048257376e-05, + "loss": 0.4192, + "step": 561 + }, + { + "epoch": 1.49, + "learning_rate": 4.313672922252011e-05, + "loss": 0.2942, + "step": 562 + }, + { + "epoch": 1.49, + "learning_rate": 4.3123324396782847e-05, + "loss": 0.2503, + "step": 563 + }, + { + "epoch": 1.49, + "learning_rate": 4.3109919571045575e-05, + "loss": 0.15, + "step": 564 + }, + { + "epoch": 1.49, + "learning_rate": 4.309651474530832e-05, + "loss": 0.6895, + "step": 565 + }, + { + "epoch": 1.5, + "learning_rate": 4.3083109919571045e-05, + "loss": 0.2462, + "step": 566 + }, + { + "epoch": 1.5, + "learning_rate": 4.306970509383379e-05, + "loss": 0.142, + "step": 567 + }, + { + "epoch": 1.5, + "learning_rate": 4.3056300268096515e-05, + "loss": 0.4838, + "step": 568 + }, + { + "epoch": 1.51, + "learning_rate": 4.304289544235925e-05, + "loss": 0.4714, + "step": 569 + }, + { + "epoch": 1.51, + "learning_rate": 4.3029490616621986e-05, + "loss": 0.4916, + "step": 570 + }, + { + "epoch": 1.51, + "learning_rate": 4.301608579088472e-05, + "loss": 0.2001, + "step": 571 + }, + { + "epoch": 1.51, + "learning_rate": 4.3002680965147456e-05, + "loss": 0.3897, + "step": 572 + }, + { + "epoch": 1.52, + "learning_rate": 4.298927613941019e-05, + "loss": 0.1723, + "step": 573 + }, + { + "epoch": 1.52, + "learning_rate": 4.297587131367292e-05, + "loss": 0.4095, + "step": 574 + }, + { + "epoch": 1.52, + "learning_rate": 4.296246648793566e-05, + "loss": 0.2615, + "step": 575 + }, + { + "epoch": 1.52, + "learning_rate": 4.294906166219839e-05, + "loss": 0.221, + "step": 576 + }, + { + "epoch": 1.53, + "learning_rate": 4.293565683646113e-05, + "loss": 0.1373, + "step": 577 + }, + { + "epoch": 1.53, + "learning_rate": 4.292225201072386e-05, + "loss": 0.313, + "step": 578 + }, + { + "epoch": 1.53, + "learning_rate": 4.2908847184986595e-05, + "loss": 0.4608, + "step": 579 + }, + { + "epoch": 1.53, + "learning_rate": 4.289544235924933e-05, + "loss": 0.2894, + "step": 580 + }, + { + "epoch": 1.54, + "learning_rate": 4.2882037533512065e-05, + "loss": 0.5123, + "step": 581 + }, + { + "epoch": 1.54, + "learning_rate": 4.28686327077748e-05, + "loss": 0.3287, + "step": 582 + }, + { + "epoch": 1.54, + "learning_rate": 4.2855227882037536e-05, + "loss": 0.4634, + "step": 583 + }, + { + "epoch": 1.54, + "learning_rate": 4.284182305630027e-05, + "loss": 0.0753, + "step": 584 + }, + { + "epoch": 1.55, + "learning_rate": 4.2828418230563006e-05, + "loss": 0.5287, + "step": 585 + }, + { + "epoch": 1.55, + "learning_rate": 4.281501340482574e-05, + "loss": 0.5483, + "step": 586 + }, + { + "epoch": 1.55, + "learning_rate": 4.2801608579088476e-05, + "loss": 0.1076, + "step": 587 + }, + { + "epoch": 1.56, + "learning_rate": 4.278820375335121e-05, + "loss": 0.232, + "step": 588 + }, + { + "epoch": 1.56, + "learning_rate": 4.277479892761394e-05, + "loss": 0.4706, + "step": 589 + }, + { + "epoch": 1.56, + "learning_rate": 4.276139410187668e-05, + "loss": 0.5791, + "step": 590 + }, + { + "epoch": 1.56, + "learning_rate": 4.274798927613941e-05, + "loss": 0.4995, + "step": 591 + }, + { + "epoch": 1.57, + "learning_rate": 4.273458445040215e-05, + "loss": 0.3419, + "step": 592 + }, + { + "epoch": 1.57, + "learning_rate": 4.272117962466488e-05, + "loss": 0.2728, + "step": 593 + }, + { + "epoch": 1.57, + "learning_rate": 4.2707774798927615e-05, + "loss": 0.3598, + "step": 594 + }, + { + "epoch": 1.57, + "learning_rate": 4.269436997319035e-05, + "loss": 0.2669, + "step": 595 + }, + { + "epoch": 1.58, + "learning_rate": 4.2680965147453086e-05, + "loss": 0.4091, + "step": 596 + }, + { + "epoch": 1.58, + "learning_rate": 4.266756032171582e-05, + "loss": 0.7653, + "step": 597 + }, + { + "epoch": 1.58, + "learning_rate": 4.2654155495978556e-05, + "loss": 0.4323, + "step": 598 + }, + { + "epoch": 1.58, + "learning_rate": 4.2640750670241284e-05, + "loss": 0.2147, + "step": 599 + }, + { + "epoch": 1.59, + "learning_rate": 4.2627345844504026e-05, + "loss": 0.2229, + "step": 600 + }, + { + "epoch": 1.59, + "learning_rate": 4.2613941018766754e-05, + "loss": 0.1477, + "step": 601 + }, + { + "epoch": 1.59, + "learning_rate": 4.2600536193029496e-05, + "loss": 0.4813, + "step": 602 + }, + { + "epoch": 1.6, + "learning_rate": 4.2587131367292225e-05, + "loss": 0.2719, + "step": 603 + }, + { + "epoch": 1.6, + "learning_rate": 4.257372654155496e-05, + "loss": 0.1577, + "step": 604 + }, + { + "epoch": 1.6, + "learning_rate": 4.2560321715817695e-05, + "loss": 0.5718, + "step": 605 + }, + { + "epoch": 1.6, + "learning_rate": 4.254691689008043e-05, + "loss": 0.1481, + "step": 606 + }, + { + "epoch": 1.61, + "learning_rate": 4.2533512064343165e-05, + "loss": 0.3548, + "step": 607 + }, + { + "epoch": 1.61, + "learning_rate": 4.25201072386059e-05, + "loss": 0.1907, + "step": 608 + }, + { + "epoch": 1.61, + "learning_rate": 4.250670241286863e-05, + "loss": 0.4215, + "step": 609 + }, + { + "epoch": 1.61, + "learning_rate": 4.249329758713137e-05, + "loss": 0.0681, + "step": 610 + }, + { + "epoch": 1.62, + "learning_rate": 4.24798927613941e-05, + "loss": 0.514, + "step": 611 + }, + { + "epoch": 1.62, + "learning_rate": 4.246648793565684e-05, + "loss": 0.1242, + "step": 612 + }, + { + "epoch": 1.62, + "learning_rate": 4.245308310991957e-05, + "loss": 0.217, + "step": 613 + }, + { + "epoch": 1.62, + "learning_rate": 4.243967828418231e-05, + "loss": 0.1641, + "step": 614 + }, + { + "epoch": 1.63, + "learning_rate": 4.242627345844504e-05, + "loss": 0.7095, + "step": 615 + }, + { + "epoch": 1.63, + "learning_rate": 4.2412868632707775e-05, + "loss": 0.359, + "step": 616 + }, + { + "epoch": 1.63, + "learning_rate": 4.239946380697051e-05, + "loss": 0.94, + "step": 617 + }, + { + "epoch": 1.63, + "learning_rate": 4.2386058981233245e-05, + "loss": 0.5194, + "step": 618 + }, + { + "epoch": 1.64, + "learning_rate": 4.237265415549598e-05, + "loss": 0.3398, + "step": 619 + }, + { + "epoch": 1.64, + "learning_rate": 4.2359249329758715e-05, + "loss": 0.2305, + "step": 620 + }, + { + "epoch": 1.64, + "learning_rate": 4.234584450402145e-05, + "loss": 0.6045, + "step": 621 + }, + { + "epoch": 1.65, + "learning_rate": 4.2332439678284185e-05, + "loss": 0.3196, + "step": 622 + }, + { + "epoch": 1.65, + "learning_rate": 4.231903485254692e-05, + "loss": 0.0753, + "step": 623 + }, + { + "epoch": 1.65, + "learning_rate": 4.2305630026809656e-05, + "loss": 0.2732, + "step": 624 + }, + { + "epoch": 1.65, + "learning_rate": 4.229222520107239e-05, + "loss": 0.4372, + "step": 625 + }, + { + "epoch": 1.66, + "learning_rate": 4.227882037533512e-05, + "loss": 0.1299, + "step": 626 + }, + { + "epoch": 1.66, + "learning_rate": 4.226541554959786e-05, + "loss": 0.2928, + "step": 627 + }, + { + "epoch": 1.66, + "learning_rate": 4.225201072386059e-05, + "loss": 0.2028, + "step": 628 + }, + { + "epoch": 1.66, + "learning_rate": 4.223860589812333e-05, + "loss": 0.2725, + "step": 629 + }, + { + "epoch": 1.67, + "learning_rate": 4.222520107238606e-05, + "loss": 0.0851, + "step": 630 + }, + { + "epoch": 1.67, + "learning_rate": 4.2211796246648795e-05, + "loss": 0.1471, + "step": 631 + }, + { + "epoch": 1.67, + "learning_rate": 4.219839142091153e-05, + "loss": 0.1986, + "step": 632 + }, + { + "epoch": 1.67, + "learning_rate": 4.2184986595174265e-05, + "loss": 0.5156, + "step": 633 + }, + { + "epoch": 1.68, + "learning_rate": 4.2171581769437e-05, + "loss": 0.5286, + "step": 634 + }, + { + "epoch": 1.68, + "learning_rate": 4.2158176943699735e-05, + "loss": 0.3635, + "step": 635 + }, + { + "epoch": 1.68, + "learning_rate": 4.2144772117962464e-05, + "loss": 0.1407, + "step": 636 + }, + { + "epoch": 1.69, + "learning_rate": 4.2131367292225206e-05, + "loss": 0.1042, + "step": 637 + }, + { + "epoch": 1.69, + "learning_rate": 4.2117962466487934e-05, + "loss": 0.1553, + "step": 638 + }, + { + "epoch": 1.69, + "learning_rate": 4.2104557640750676e-05, + "loss": 0.1665, + "step": 639 + }, + { + "epoch": 1.69, + "learning_rate": 4.2091152815013404e-05, + "loss": 0.3706, + "step": 640 + }, + { + "epoch": 1.7, + "learning_rate": 4.207774798927614e-05, + "loss": 0.6195, + "step": 641 + }, + { + "epoch": 1.7, + "learning_rate": 4.2064343163538874e-05, + "loss": 0.1341, + "step": 642 + }, + { + "epoch": 1.7, + "learning_rate": 4.205093833780161e-05, + "loss": 0.5384, + "step": 643 + }, + { + "epoch": 1.7, + "learning_rate": 4.2037533512064345e-05, + "loss": 0.2802, + "step": 644 + }, + { + "epoch": 1.71, + "learning_rate": 4.202412868632708e-05, + "loss": 0.3812, + "step": 645 + }, + { + "epoch": 1.71, + "learning_rate": 4.2010723860589815e-05, + "loss": 0.2433, + "step": 646 + }, + { + "epoch": 1.71, + "learning_rate": 4.199731903485255e-05, + "loss": 0.0642, + "step": 647 + }, + { + "epoch": 1.71, + "learning_rate": 4.1983914209115285e-05, + "loss": 0.0547, + "step": 648 + }, + { + "epoch": 1.72, + "learning_rate": 4.197050938337802e-05, + "loss": 0.4388, + "step": 649 + }, + { + "epoch": 1.72, + "learning_rate": 4.1957104557640756e-05, + "loss": 0.8228, + "step": 650 + }, + { + "epoch": 1.72, + "learning_rate": 4.1943699731903484e-05, + "loss": 0.6453, + "step": 651 + }, + { + "epoch": 1.72, + "learning_rate": 4.1930294906166226e-05, + "loss": 0.3367, + "step": 652 + }, + { + "epoch": 1.73, + "learning_rate": 4.1916890080428954e-05, + "loss": 0.2139, + "step": 653 + }, + { + "epoch": 1.73, + "learning_rate": 4.1903485254691696e-05, + "loss": 0.2144, + "step": 654 + }, + { + "epoch": 1.73, + "learning_rate": 4.1890080428954424e-05, + "loss": 0.3894, + "step": 655 + }, + { + "epoch": 1.74, + "learning_rate": 4.187667560321716e-05, + "loss": 0.0891, + "step": 656 + }, + { + "epoch": 1.74, + "learning_rate": 4.1863270777479895e-05, + "loss": 0.1287, + "step": 657 + }, + { + "epoch": 1.74, + "learning_rate": 4.184986595174263e-05, + "loss": 0.2826, + "step": 658 + }, + { + "epoch": 1.74, + "learning_rate": 4.1836461126005365e-05, + "loss": 0.474, + "step": 659 + }, + { + "epoch": 1.75, + "learning_rate": 4.18230563002681e-05, + "loss": 0.4228, + "step": 660 + }, + { + "epoch": 1.75, + "learning_rate": 4.180965147453083e-05, + "loss": 0.4952, + "step": 661 + }, + { + "epoch": 1.75, + "learning_rate": 4.179624664879357e-05, + "loss": 0.173, + "step": 662 + }, + { + "epoch": 1.75, + "learning_rate": 4.17828418230563e-05, + "loss": 0.363, + "step": 663 + }, + { + "epoch": 1.76, + "learning_rate": 4.176943699731904e-05, + "loss": 0.4404, + "step": 664 + }, + { + "epoch": 1.76, + "learning_rate": 4.175603217158177e-05, + "loss": 0.486, + "step": 665 + }, + { + "epoch": 1.76, + "learning_rate": 4.1742627345844504e-05, + "loss": 0.4463, + "step": 666 + }, + { + "epoch": 1.76, + "learning_rate": 4.172922252010724e-05, + "loss": 0.2409, + "step": 667 + }, + { + "epoch": 1.77, + "learning_rate": 4.1715817694369974e-05, + "loss": 0.5291, + "step": 668 + }, + { + "epoch": 1.77, + "learning_rate": 4.170241286863271e-05, + "loss": 0.069, + "step": 669 + }, + { + "epoch": 1.77, + "learning_rate": 4.1689008042895445e-05, + "loss": 0.4162, + "step": 670 + }, + { + "epoch": 1.78, + "learning_rate": 4.167560321715818e-05, + "loss": 0.6171, + "step": 671 + }, + { + "epoch": 1.78, + "learning_rate": 4.1662198391420915e-05, + "loss": 0.3097, + "step": 672 + }, + { + "epoch": 1.78, + "learning_rate": 4.164879356568365e-05, + "loss": 0.5109, + "step": 673 + }, + { + "epoch": 1.78, + "learning_rate": 4.1635388739946385e-05, + "loss": 0.2169, + "step": 674 + }, + { + "epoch": 1.79, + "learning_rate": 4.162198391420912e-05, + "loss": 0.2406, + "step": 675 + }, + { + "epoch": 1.79, + "learning_rate": 4.160857908847185e-05, + "loss": 0.1853, + "step": 676 + }, + { + "epoch": 1.79, + "learning_rate": 4.159517426273459e-05, + "loss": 0.5743, + "step": 677 + }, + { + "epoch": 1.79, + "learning_rate": 4.158176943699732e-05, + "loss": 0.5432, + "step": 678 + }, + { + "epoch": 1.8, + "learning_rate": 4.156836461126006e-05, + "loss": 0.2033, + "step": 679 + }, + { + "epoch": 1.8, + "learning_rate": 4.155495978552279e-05, + "loss": 0.3848, + "step": 680 + }, + { + "epoch": 1.8, + "learning_rate": 4.1541554959785524e-05, + "loss": 0.1721, + "step": 681 + }, + { + "epoch": 1.8, + "learning_rate": 4.152815013404826e-05, + "loss": 0.3793, + "step": 682 + }, + { + "epoch": 1.81, + "learning_rate": 4.1514745308310994e-05, + "loss": 0.3848, + "step": 683 + }, + { + "epoch": 1.81, + "learning_rate": 4.150134048257373e-05, + "loss": 0.186, + "step": 684 + }, + { + "epoch": 1.81, + "learning_rate": 4.1487935656836465e-05, + "loss": 0.2692, + "step": 685 + }, + { + "epoch": 1.81, + "learning_rate": 4.147453083109919e-05, + "loss": 0.3839, + "step": 686 + }, + { + "epoch": 1.82, + "learning_rate": 4.1461126005361935e-05, + "loss": 0.3037, + "step": 687 + }, + { + "epoch": 1.82, + "learning_rate": 4.144772117962466e-05, + "loss": 0.6401, + "step": 688 + }, + { + "epoch": 1.82, + "learning_rate": 4.1434316353887405e-05, + "loss": 0.1173, + "step": 689 + }, + { + "epoch": 1.83, + "learning_rate": 4.1420911528150134e-05, + "loss": 0.3217, + "step": 690 + }, + { + "epoch": 1.83, + "learning_rate": 4.140750670241287e-05, + "loss": 0.2358, + "step": 691 + }, + { + "epoch": 1.83, + "learning_rate": 4.1394101876675604e-05, + "loss": 0.7696, + "step": 692 + }, + { + "epoch": 1.83, + "learning_rate": 4.138069705093834e-05, + "loss": 0.2288, + "step": 693 + }, + { + "epoch": 1.84, + "learning_rate": 4.1367292225201074e-05, + "loss": 0.2575, + "step": 694 + }, + { + "epoch": 1.84, + "learning_rate": 4.135388739946381e-05, + "loss": 0.1201, + "step": 695 + }, + { + "epoch": 1.84, + "learning_rate": 4.1340482573726544e-05, + "loss": 0.2034, + "step": 696 + }, + { + "epoch": 1.84, + "learning_rate": 4.132707774798928e-05, + "loss": 0.1142, + "step": 697 + }, + { + "epoch": 1.85, + "learning_rate": 4.1313672922252015e-05, + "loss": 0.5671, + "step": 698 + }, + { + "epoch": 1.85, + "learning_rate": 4.130026809651475e-05, + "loss": 0.3132, + "step": 699 + }, + { + "epoch": 1.85, + "learning_rate": 4.1286863270777485e-05, + "loss": 0.4266, + "step": 700 + }, + { + "epoch": 1.85, + "learning_rate": 4.127345844504021e-05, + "loss": 0.1354, + "step": 701 + }, + { + "epoch": 1.86, + "learning_rate": 4.1260053619302955e-05, + "loss": 0.2867, + "step": 702 + }, + { + "epoch": 1.86, + "learning_rate": 4.1246648793565684e-05, + "loss": 0.1839, + "step": 703 + }, + { + "epoch": 1.86, + "learning_rate": 4.1233243967828425e-05, + "loss": 0.4741, + "step": 704 + }, + { + "epoch": 1.87, + "learning_rate": 4.1219839142091154e-05, + "loss": 0.2909, + "step": 705 + }, + { + "epoch": 1.87, + "learning_rate": 4.120643431635389e-05, + "loss": 0.2705, + "step": 706 + }, + { + "epoch": 1.87, + "learning_rate": 4.1193029490616624e-05, + "loss": 0.1354, + "step": 707 + }, + { + "epoch": 1.87, + "learning_rate": 4.117962466487936e-05, + "loss": 0.4801, + "step": 708 + }, + { + "epoch": 1.88, + "learning_rate": 4.1166219839142094e-05, + "loss": 0.189, + "step": 709 + }, + { + "epoch": 1.88, + "learning_rate": 4.115281501340483e-05, + "loss": 0.3204, + "step": 710 + }, + { + "epoch": 1.88, + "learning_rate": 4.113941018766756e-05, + "loss": 0.4358, + "step": 711 + }, + { + "epoch": 1.88, + "learning_rate": 4.11260053619303e-05, + "loss": 0.9474, + "step": 712 + }, + { + "epoch": 1.89, + "learning_rate": 4.111260053619303e-05, + "loss": 0.2102, + "step": 713 + }, + { + "epoch": 1.89, + "learning_rate": 4.109919571045577e-05, + "loss": 0.3927, + "step": 714 + }, + { + "epoch": 1.89, + "learning_rate": 4.10857908847185e-05, + "loss": 0.139, + "step": 715 + }, + { + "epoch": 1.89, + "learning_rate": 4.1072386058981233e-05, + "loss": 0.3575, + "step": 716 + }, + { + "epoch": 1.9, + "learning_rate": 4.105898123324397e-05, + "loss": 0.7534, + "step": 717 + }, + { + "epoch": 1.9, + "learning_rate": 4.1045576407506704e-05, + "loss": 0.1134, + "step": 718 + }, + { + "epoch": 1.9, + "learning_rate": 4.103217158176944e-05, + "loss": 0.2136, + "step": 719 + }, + { + "epoch": 1.9, + "learning_rate": 4.1018766756032174e-05, + "loss": 0.4344, + "step": 720 + }, + { + "epoch": 1.91, + "learning_rate": 4.10053619302949e-05, + "loss": 0.0695, + "step": 721 + }, + { + "epoch": 1.91, + "learning_rate": 4.0991957104557644e-05, + "loss": 0.2286, + "step": 722 + }, + { + "epoch": 1.91, + "learning_rate": 4.097855227882037e-05, + "loss": 0.1189, + "step": 723 + }, + { + "epoch": 1.92, + "learning_rate": 4.0965147453083115e-05, + "loss": 0.2882, + "step": 724 + }, + { + "epoch": 1.92, + "learning_rate": 4.095174262734584e-05, + "loss": 0.2623, + "step": 725 + }, + { + "epoch": 1.92, + "learning_rate": 4.093833780160858e-05, + "loss": 0.2473, + "step": 726 + }, + { + "epoch": 1.92, + "learning_rate": 4.092493297587131e-05, + "loss": 0.4846, + "step": 727 + }, + { + "epoch": 1.93, + "learning_rate": 4.091152815013405e-05, + "loss": 0.1689, + "step": 728 + }, + { + "epoch": 1.93, + "learning_rate": 4.0898123324396783e-05, + "loss": 0.3481, + "step": 729 + }, + { + "epoch": 1.93, + "learning_rate": 4.088471849865952e-05, + "loss": 0.3447, + "step": 730 + }, + { + "epoch": 1.93, + "learning_rate": 4.0871313672922254e-05, + "loss": 0.2959, + "step": 731 + }, + { + "epoch": 1.94, + "learning_rate": 4.085790884718499e-05, + "loss": 0.3387, + "step": 732 + }, + { + "epoch": 1.94, + "learning_rate": 4.0844504021447724e-05, + "loss": 0.3742, + "step": 733 + }, + { + "epoch": 1.94, + "learning_rate": 4.083109919571046e-05, + "loss": 0.3245, + "step": 734 + }, + { + "epoch": 1.94, + "learning_rate": 4.0817694369973194e-05, + "loss": 0.4891, + "step": 735 + }, + { + "epoch": 1.95, + "learning_rate": 4.080428954423593e-05, + "loss": 0.1444, + "step": 736 + }, + { + "epoch": 1.95, + "learning_rate": 4.0790884718498664e-05, + "loss": 0.3678, + "step": 737 + }, + { + "epoch": 1.95, + "learning_rate": 4.077747989276139e-05, + "loss": 0.3772, + "step": 738 + }, + { + "epoch": 1.96, + "learning_rate": 4.0764075067024135e-05, + "loss": 0.43, + "step": 739 + }, + { + "epoch": 1.96, + "learning_rate": 4.075067024128686e-05, + "loss": 0.2463, + "step": 740 + }, + { + "epoch": 1.96, + "learning_rate": 4.0737265415549605e-05, + "loss": 0.2277, + "step": 741 + }, + { + "epoch": 1.96, + "learning_rate": 4.072386058981233e-05, + "loss": 0.2153, + "step": 742 + }, + { + "epoch": 1.97, + "learning_rate": 4.071045576407507e-05, + "loss": 0.1052, + "step": 743 + }, + { + "epoch": 1.97, + "learning_rate": 4.0697050938337804e-05, + "loss": 0.5657, + "step": 744 + }, + { + "epoch": 1.97, + "learning_rate": 4.068364611260054e-05, + "loss": 0.2664, + "step": 745 + }, + { + "epoch": 1.97, + "learning_rate": 4.0670241286863274e-05, + "loss": 0.1369, + "step": 746 + }, + { + "epoch": 1.98, + "learning_rate": 4.065683646112601e-05, + "loss": 0.4972, + "step": 747 + }, + { + "epoch": 1.98, + "learning_rate": 4.064343163538874e-05, + "loss": 0.4131, + "step": 748 + }, + { + "epoch": 1.98, + "learning_rate": 4.063002680965148e-05, + "loss": 0.3824, + "step": 749 + }, + { + "epoch": 1.98, + "learning_rate": 4.061662198391421e-05, + "loss": 0.323, + "step": 750 + }, + { + "epoch": 1.99, + "learning_rate": 4.060321715817695e-05, + "loss": 0.3698, + "step": 751 + }, + { + "epoch": 1.99, + "learning_rate": 4.058981233243968e-05, + "loss": 0.1708, + "step": 752 + }, + { + "epoch": 1.99, + "learning_rate": 4.057640750670241e-05, + "loss": 0.2941, + "step": 753 + }, + { + "epoch": 1.99, + "learning_rate": 4.056300268096515e-05, + "loss": 0.3224, + "step": 754 + }, + { + "epoch": 2.0, + "learning_rate": 4.054959785522788e-05, + "loss": 0.0851, + "step": 755 + }, + { + "epoch": 2.0, + "learning_rate": 4.053619302949062e-05, + "loss": 0.4694, + "step": 756 + }, + { + "epoch": 2.0, + "eval_f1": 0.7882736156351792, + "eval_loss": 0.4484867751598358, + "eval_runtime": 1.8734, + "eval_samples_per_second": 807.638, + "eval_steps_per_second": 50.711, + "step": 756 + }, + { + "epoch": 2.0, + "learning_rate": 4.0522788203753354e-05, + "loss": 0.4739, + "step": 757 + }, + { + "epoch": 2.01, + "learning_rate": 4.050938337801609e-05, + "loss": 0.4117, + "step": 758 + }, + { + "epoch": 2.01, + "learning_rate": 4.0495978552278824e-05, + "loss": 0.2169, + "step": 759 + }, + { + "epoch": 2.01, + "learning_rate": 4.048257372654156e-05, + "loss": 0.1848, + "step": 760 + }, + { + "epoch": 2.01, + "learning_rate": 4.0469168900804294e-05, + "loss": 0.5066, + "step": 761 + }, + { + "epoch": 2.02, + "learning_rate": 4.045576407506703e-05, + "loss": 0.1784, + "step": 762 + }, + { + "epoch": 2.02, + "learning_rate": 4.044235924932976e-05, + "loss": 0.3869, + "step": 763 + }, + { + "epoch": 2.02, + "learning_rate": 4.04289544235925e-05, + "loss": 0.1132, + "step": 764 + }, + { + "epoch": 2.02, + "learning_rate": 4.041554959785523e-05, + "loss": 0.2724, + "step": 765 + }, + { + "epoch": 2.03, + "learning_rate": 4.040214477211797e-05, + "loss": 0.0983, + "step": 766 + }, + { + "epoch": 2.03, + "learning_rate": 4.03887399463807e-05, + "loss": 0.1831, + "step": 767 + }, + { + "epoch": 2.03, + "learning_rate": 4.037533512064343e-05, + "loss": 0.1954, + "step": 768 + }, + { + "epoch": 2.03, + "learning_rate": 4.036193029490617e-05, + "loss": 0.4738, + "step": 769 + }, + { + "epoch": 2.04, + "learning_rate": 4.0348525469168903e-05, + "loss": 0.3375, + "step": 770 + }, + { + "epoch": 2.04, + "learning_rate": 4.033512064343164e-05, + "loss": 0.3991, + "step": 771 + }, + { + "epoch": 2.04, + "learning_rate": 4.0321715817694374e-05, + "loss": 0.0696, + "step": 772 + }, + { + "epoch": 2.04, + "learning_rate": 4.03083109919571e-05, + "loss": 0.0982, + "step": 773 + }, + { + "epoch": 2.05, + "learning_rate": 4.0294906166219844e-05, + "loss": 0.2236, + "step": 774 + }, + { + "epoch": 2.05, + "learning_rate": 4.028150134048257e-05, + "loss": 0.4225, + "step": 775 + }, + { + "epoch": 2.05, + "learning_rate": 4.0268096514745314e-05, + "loss": 0.0583, + "step": 776 + }, + { + "epoch": 2.06, + "learning_rate": 4.025469168900804e-05, + "loss": 0.1643, + "step": 777 + }, + { + "epoch": 2.06, + "learning_rate": 4.024128686327078e-05, + "loss": 0.0351, + "step": 778 + }, + { + "epoch": 2.06, + "learning_rate": 4.022788203753351e-05, + "loss": 0.4496, + "step": 779 + }, + { + "epoch": 2.06, + "learning_rate": 4.021447721179625e-05, + "loss": 0.0372, + "step": 780 + }, + { + "epoch": 2.07, + "learning_rate": 4.020107238605898e-05, + "loss": 0.4198, + "step": 781 + }, + { + "epoch": 2.07, + "learning_rate": 4.018766756032172e-05, + "loss": 0.1968, + "step": 782 + }, + { + "epoch": 2.07, + "learning_rate": 4.017426273458445e-05, + "loss": 0.515, + "step": 783 + }, + { + "epoch": 2.07, + "learning_rate": 4.016085790884719e-05, + "loss": 0.408, + "step": 784 + }, + { + "epoch": 2.08, + "learning_rate": 4.0147453083109924e-05, + "loss": 0.3693, + "step": 785 + }, + { + "epoch": 2.08, + "learning_rate": 4.013404825737266e-05, + "loss": 0.0561, + "step": 786 + }, + { + "epoch": 2.08, + "learning_rate": 4.0120643431635394e-05, + "loss": 0.4011, + "step": 787 + }, + { + "epoch": 2.08, + "learning_rate": 4.010723860589812e-05, + "loss": 0.0872, + "step": 788 + }, + { + "epoch": 2.09, + "learning_rate": 4.0093833780160864e-05, + "loss": 0.0768, + "step": 789 + }, + { + "epoch": 2.09, + "learning_rate": 4.008042895442359e-05, + "loss": 0.0184, + "step": 790 + }, + { + "epoch": 2.09, + "learning_rate": 4.0067024128686334e-05, + "loss": 0.3287, + "step": 791 + }, + { + "epoch": 2.1, + "learning_rate": 4.005361930294906e-05, + "loss": 0.0262, + "step": 792 + }, + { + "epoch": 2.1, + "learning_rate": 4.00402144772118e-05, + "loss": 0.0248, + "step": 793 + }, + { + "epoch": 2.1, + "learning_rate": 4.002680965147453e-05, + "loss": 0.3853, + "step": 794 + }, + { + "epoch": 2.1, + "learning_rate": 4.001340482573727e-05, + "loss": 0.3512, + "step": 795 + }, + { + "epoch": 2.11, + "learning_rate": 4e-05, + "loss": 0.4188, + "step": 796 + }, + { + "epoch": 2.11, + "learning_rate": 3.998659517426274e-05, + "loss": 0.1834, + "step": 797 + }, + { + "epoch": 2.11, + "learning_rate": 3.997319034852547e-05, + "loss": 0.2074, + "step": 798 + }, + { + "epoch": 2.11, + "learning_rate": 3.995978552278821e-05, + "loss": 0.7317, + "step": 799 + }, + { + "epoch": 2.12, + "learning_rate": 3.994638069705094e-05, + "loss": 0.3534, + "step": 800 + }, + { + "epoch": 2.12, + "learning_rate": 3.993297587131368e-05, + "loss": 0.3184, + "step": 801 + }, + { + "epoch": 2.12, + "learning_rate": 3.991957104557641e-05, + "loss": 0.1088, + "step": 802 + }, + { + "epoch": 2.12, + "learning_rate": 3.990616621983914e-05, + "loss": 0.0429, + "step": 803 + }, + { + "epoch": 2.13, + "learning_rate": 3.989276139410188e-05, + "loss": 0.4518, + "step": 804 + }, + { + "epoch": 2.13, + "learning_rate": 3.987935656836461e-05, + "loss": 0.1746, + "step": 805 + }, + { + "epoch": 2.13, + "learning_rate": 3.986595174262735e-05, + "loss": 0.1881, + "step": 806 + }, + { + "epoch": 2.13, + "learning_rate": 3.985254691689008e-05, + "loss": 0.4111, + "step": 807 + }, + { + "epoch": 2.14, + "learning_rate": 3.983914209115281e-05, + "loss": 0.059, + "step": 808 + }, + { + "epoch": 2.14, + "learning_rate": 3.982573726541555e-05, + "loss": 0.0495, + "step": 809 + }, + { + "epoch": 2.14, + "learning_rate": 3.981233243967828e-05, + "loss": 0.1134, + "step": 810 + }, + { + "epoch": 2.15, + "learning_rate": 3.9798927613941023e-05, + "loss": 0.5469, + "step": 811 + }, + { + "epoch": 2.15, + "learning_rate": 3.978552278820375e-05, + "loss": 0.0581, + "step": 812 + }, + { + "epoch": 2.15, + "learning_rate": 3.977211796246649e-05, + "loss": 0.1254, + "step": 813 + }, + { + "epoch": 2.15, + "learning_rate": 3.975871313672922e-05, + "loss": 0.0679, + "step": 814 + }, + { + "epoch": 2.16, + "learning_rate": 3.974530831099196e-05, + "loss": 0.0463, + "step": 815 + }, + { + "epoch": 2.16, + "learning_rate": 3.973190348525469e-05, + "loss": 0.33, + "step": 816 + }, + { + "epoch": 2.16, + "learning_rate": 3.971849865951743e-05, + "loss": 0.2931, + "step": 817 + }, + { + "epoch": 2.16, + "learning_rate": 3.970509383378016e-05, + "loss": 0.1034, + "step": 818 + }, + { + "epoch": 2.17, + "learning_rate": 3.96916890080429e-05, + "loss": 0.0379, + "step": 819 + }, + { + "epoch": 2.17, + "learning_rate": 3.967828418230563e-05, + "loss": 0.0456, + "step": 820 + }, + { + "epoch": 2.17, + "learning_rate": 3.966487935656837e-05, + "loss": 0.4862, + "step": 821 + }, + { + "epoch": 2.17, + "learning_rate": 3.96514745308311e-05, + "loss": 0.0512, + "step": 822 + }, + { + "epoch": 2.18, + "learning_rate": 3.963806970509383e-05, + "loss": 0.0879, + "step": 823 + }, + { + "epoch": 2.18, + "learning_rate": 3.962466487935657e-05, + "loss": 0.3664, + "step": 824 + }, + { + "epoch": 2.18, + "learning_rate": 3.96112600536193e-05, + "loss": 0.0975, + "step": 825 + }, + { + "epoch": 2.19, + "learning_rate": 3.9597855227882044e-05, + "loss": 0.162, + "step": 826 + }, + { + "epoch": 2.19, + "learning_rate": 3.958445040214477e-05, + "loss": 0.1076, + "step": 827 + }, + { + "epoch": 2.19, + "learning_rate": 3.957104557640751e-05, + "loss": 0.1185, + "step": 828 + }, + { + "epoch": 2.19, + "learning_rate": 3.955764075067024e-05, + "loss": 0.0868, + "step": 829 + }, + { + "epoch": 2.2, + "learning_rate": 3.954423592493298e-05, + "loss": 0.0246, + "step": 830 + }, + { + "epoch": 2.2, + "learning_rate": 3.953083109919571e-05, + "loss": 0.2233, + "step": 831 + }, + { + "epoch": 2.2, + "learning_rate": 3.951742627345845e-05, + "loss": 0.2113, + "step": 832 + }, + { + "epoch": 2.2, + "learning_rate": 3.9504021447721176e-05, + "loss": 0.0587, + "step": 833 + }, + { + "epoch": 2.21, + "learning_rate": 3.949061662198392e-05, + "loss": 0.0521, + "step": 834 + }, + { + "epoch": 2.21, + "learning_rate": 3.9477211796246646e-05, + "loss": 0.3845, + "step": 835 + }, + { + "epoch": 2.21, + "learning_rate": 3.946380697050939e-05, + "loss": 0.1096, + "step": 836 + }, + { + "epoch": 2.21, + "learning_rate": 3.9450402144772117e-05, + "loss": 0.1488, + "step": 837 + }, + { + "epoch": 2.22, + "learning_rate": 3.943699731903485e-05, + "loss": 0.1937, + "step": 838 + }, + { + "epoch": 2.22, + "learning_rate": 3.942359249329759e-05, + "loss": 0.1309, + "step": 839 + }, + { + "epoch": 2.22, + "learning_rate": 3.941018766756032e-05, + "loss": 0.3271, + "step": 840 + }, + { + "epoch": 2.22, + "learning_rate": 3.939678284182306e-05, + "loss": 0.3318, + "step": 841 + }, + { + "epoch": 2.23, + "learning_rate": 3.938337801608579e-05, + "loss": 0.3516, + "step": 842 + }, + { + "epoch": 2.23, + "learning_rate": 3.936997319034853e-05, + "loss": 0.1641, + "step": 843 + }, + { + "epoch": 2.23, + "learning_rate": 3.935656836461126e-05, + "loss": 0.064, + "step": 844 + }, + { + "epoch": 2.24, + "learning_rate": 3.9343163538874e-05, + "loss": 0.1971, + "step": 845 + }, + { + "epoch": 2.24, + "learning_rate": 3.932975871313673e-05, + "loss": 0.1166, + "step": 846 + }, + { + "epoch": 2.24, + "learning_rate": 3.931635388739947e-05, + "loss": 0.0384, + "step": 847 + }, + { + "epoch": 2.24, + "learning_rate": 3.9302949061662196e-05, + "loss": 0.0462, + "step": 848 + }, + { + "epoch": 2.25, + "learning_rate": 3.928954423592494e-05, + "loss": 0.1073, + "step": 849 + }, + { + "epoch": 2.25, + "learning_rate": 3.9276139410187666e-05, + "loss": 0.0227, + "step": 850 + }, + { + "epoch": 2.25, + "learning_rate": 3.926273458445041e-05, + "loss": 0.0683, + "step": 851 + }, + { + "epoch": 2.25, + "learning_rate": 3.924932975871314e-05, + "loss": 0.7962, + "step": 852 + }, + { + "epoch": 2.26, + "learning_rate": 3.923592493297587e-05, + "loss": 0.0635, + "step": 853 + }, + { + "epoch": 2.26, + "learning_rate": 3.922252010723861e-05, + "loss": 0.0454, + "step": 854 + }, + { + "epoch": 2.26, + "learning_rate": 3.920911528150134e-05, + "loss": 0.5389, + "step": 855 + }, + { + "epoch": 2.26, + "learning_rate": 3.919571045576408e-05, + "loss": 0.1181, + "step": 856 + }, + { + "epoch": 2.27, + "learning_rate": 3.918230563002681e-05, + "loss": 0.1372, + "step": 857 + }, + { + "epoch": 2.27, + "learning_rate": 3.916890080428954e-05, + "loss": 0.3192, + "step": 858 + }, + { + "epoch": 2.27, + "learning_rate": 3.915549597855228e-05, + "loss": 0.3419, + "step": 859 + }, + { + "epoch": 2.28, + "learning_rate": 3.914209115281501e-05, + "loss": 0.0114, + "step": 860 + }, + { + "epoch": 2.28, + "learning_rate": 3.912868632707775e-05, + "loss": 0.1905, + "step": 861 + }, + { + "epoch": 2.28, + "learning_rate": 3.911528150134048e-05, + "loss": 0.0218, + "step": 862 + }, + { + "epoch": 2.28, + "learning_rate": 3.910187667560322e-05, + "loss": 0.4374, + "step": 863 + }, + { + "epoch": 2.29, + "learning_rate": 3.908847184986595e-05, + "loss": 0.1844, + "step": 864 + }, + { + "epoch": 2.29, + "learning_rate": 3.907506702412869e-05, + "loss": 0.2427, + "step": 865 + }, + { + "epoch": 2.29, + "learning_rate": 3.906166219839142e-05, + "loss": 0.2749, + "step": 866 + }, + { + "epoch": 2.29, + "learning_rate": 3.904825737265416e-05, + "loss": 0.2089, + "step": 867 + }, + { + "epoch": 2.3, + "learning_rate": 3.903485254691689e-05, + "loss": 0.06, + "step": 868 + }, + { + "epoch": 2.3, + "learning_rate": 3.902144772117963e-05, + "loss": 0.1951, + "step": 869 + }, + { + "epoch": 2.3, + "learning_rate": 3.900804289544236e-05, + "loss": 0.0252, + "step": 870 + }, + { + "epoch": 2.3, + "learning_rate": 3.89946380697051e-05, + "loss": 0.0299, + "step": 871 + }, + { + "epoch": 2.31, + "learning_rate": 3.898123324396783e-05, + "loss": 0.0298, + "step": 872 + }, + { + "epoch": 2.31, + "learning_rate": 3.896782841823057e-05, + "loss": 0.5186, + "step": 873 + }, + { + "epoch": 2.31, + "learning_rate": 3.89544235924933e-05, + "loss": 0.2704, + "step": 874 + }, + { + "epoch": 2.31, + "learning_rate": 3.894101876675603e-05, + "loss": 0.2435, + "step": 875 + }, + { + "epoch": 2.32, + "learning_rate": 3.892761394101877e-05, + "loss": 0.039, + "step": 876 + }, + { + "epoch": 2.32, + "learning_rate": 3.89142091152815e-05, + "loss": 0.0275, + "step": 877 + }, + { + "epoch": 2.32, + "learning_rate": 3.890080428954424e-05, + "loss": 0.1164, + "step": 878 + }, + { + "epoch": 2.33, + "learning_rate": 3.888739946380697e-05, + "loss": 0.1551, + "step": 879 + }, + { + "epoch": 2.33, + "learning_rate": 3.887399463806971e-05, + "loss": 0.0215, + "step": 880 + }, + { + "epoch": 2.33, + "learning_rate": 3.886058981233244e-05, + "loss": 0.0379, + "step": 881 + }, + { + "epoch": 2.33, + "learning_rate": 3.884718498659518e-05, + "loss": 0.0553, + "step": 882 + }, + { + "epoch": 2.34, + "learning_rate": 3.883378016085791e-05, + "loss": 0.1073, + "step": 883 + }, + { + "epoch": 2.34, + "learning_rate": 3.882037533512065e-05, + "loss": 0.3525, + "step": 884 + }, + { + "epoch": 2.34, + "learning_rate": 3.8806970509383376e-05, + "loss": 0.2646, + "step": 885 + }, + { + "epoch": 2.34, + "learning_rate": 3.879356568364612e-05, + "loss": 0.5758, + "step": 886 + }, + { + "epoch": 2.35, + "learning_rate": 3.8780160857908846e-05, + "loss": 0.9312, + "step": 887 + }, + { + "epoch": 2.35, + "learning_rate": 3.876675603217159e-05, + "loss": 0.2748, + "step": 888 + }, + { + "epoch": 2.35, + "learning_rate": 3.8753351206434316e-05, + "loss": 0.33, + "step": 889 + }, + { + "epoch": 2.35, + "learning_rate": 3.873994638069705e-05, + "loss": 0.0312, + "step": 890 + }, + { + "epoch": 2.36, + "learning_rate": 3.8726541554959786e-05, + "loss": 0.0449, + "step": 891 + }, + { + "epoch": 2.36, + "learning_rate": 3.871313672922252e-05, + "loss": 0.1197, + "step": 892 + }, + { + "epoch": 2.36, + "learning_rate": 3.869973190348526e-05, + "loss": 0.0913, + "step": 893 + }, + { + "epoch": 2.37, + "learning_rate": 3.868632707774799e-05, + "loss": 0.0284, + "step": 894 + }, + { + "epoch": 2.37, + "learning_rate": 3.867292225201073e-05, + "loss": 0.3769, + "step": 895 + }, + { + "epoch": 2.37, + "learning_rate": 3.865951742627346e-05, + "loss": 0.0947, + "step": 896 + }, + { + "epoch": 2.37, + "learning_rate": 3.86461126005362e-05, + "loss": 0.4282, + "step": 897 + }, + { + "epoch": 2.38, + "learning_rate": 3.863270777479893e-05, + "loss": 0.0049, + "step": 898 + }, + { + "epoch": 2.38, + "learning_rate": 3.861930294906167e-05, + "loss": 0.3632, + "step": 899 + }, + { + "epoch": 2.38, + "learning_rate": 3.8605898123324396e-05, + "loss": 0.0421, + "step": 900 + }, + { + "epoch": 2.38, + "learning_rate": 3.859249329758714e-05, + "loss": 0.5793, + "step": 901 + }, + { + "epoch": 2.39, + "learning_rate": 3.8579088471849866e-05, + "loss": 0.1695, + "step": 902 + }, + { + "epoch": 2.39, + "learning_rate": 3.856568364611261e-05, + "loss": 0.3082, + "step": 903 + }, + { + "epoch": 2.39, + "learning_rate": 3.8552278820375336e-05, + "loss": 0.0151, + "step": 904 + }, + { + "epoch": 2.39, + "learning_rate": 3.853887399463807e-05, + "loss": 0.3463, + "step": 905 + }, + { + "epoch": 2.4, + "learning_rate": 3.852546916890081e-05, + "loss": 0.4573, + "step": 906 + }, + { + "epoch": 2.4, + "learning_rate": 3.851206434316354e-05, + "loss": 0.1281, + "step": 907 + }, + { + "epoch": 2.4, + "learning_rate": 3.849865951742628e-05, + "loss": 0.3168, + "step": 908 + }, + { + "epoch": 2.4, + "learning_rate": 3.848525469168901e-05, + "loss": 0.0331, + "step": 909 + }, + { + "epoch": 2.41, + "learning_rate": 3.847184986595174e-05, + "loss": 0.1825, + "step": 910 + }, + { + "epoch": 2.41, + "learning_rate": 3.845844504021448e-05, + "loss": 0.6238, + "step": 911 + }, + { + "epoch": 2.41, + "learning_rate": 3.844504021447721e-05, + "loss": 0.0663, + "step": 912 + }, + { + "epoch": 2.42, + "learning_rate": 3.843163538873995e-05, + "loss": 0.2827, + "step": 913 + }, + { + "epoch": 2.42, + "learning_rate": 3.841823056300268e-05, + "loss": 0.0133, + "step": 914 + }, + { + "epoch": 2.42, + "learning_rate": 3.8404825737265416e-05, + "loss": 0.0415, + "step": 915 + }, + { + "epoch": 2.42, + "learning_rate": 3.839142091152815e-05, + "loss": 0.2969, + "step": 916 + }, + { + "epoch": 2.43, + "learning_rate": 3.8378016085790886e-05, + "loss": 0.0145, + "step": 917 + }, + { + "epoch": 2.43, + "learning_rate": 3.836461126005362e-05, + "loss": 0.0223, + "step": 918 + }, + { + "epoch": 2.43, + "learning_rate": 3.8351206434316357e-05, + "loss": 1.042, + "step": 919 + }, + { + "epoch": 2.43, + "learning_rate": 3.8337801608579085e-05, + "loss": 0.5061, + "step": 920 + }, + { + "epoch": 2.44, + "learning_rate": 3.832439678284183e-05, + "loss": 0.406, + "step": 921 + }, + { + "epoch": 2.44, + "learning_rate": 3.8310991957104555e-05, + "loss": 0.1337, + "step": 922 + }, + { + "epoch": 2.44, + "learning_rate": 3.82975871313673e-05, + "loss": 0.2057, + "step": 923 + }, + { + "epoch": 2.44, + "learning_rate": 3.8284182305630025e-05, + "loss": 0.0987, + "step": 924 + }, + { + "epoch": 2.45, + "learning_rate": 3.827077747989276e-05, + "loss": 0.4611, + "step": 925 + }, + { + "epoch": 2.45, + "learning_rate": 3.8257372654155496e-05, + "loss": 0.0152, + "step": 926 + }, + { + "epoch": 2.45, + "learning_rate": 3.824396782841823e-05, + "loss": 0.4478, + "step": 927 + }, + { + "epoch": 2.46, + "learning_rate": 3.8230563002680966e-05, + "loss": 0.0579, + "step": 928 + }, + { + "epoch": 2.46, + "learning_rate": 3.82171581769437e-05, + "loss": 0.1506, + "step": 929 + }, + { + "epoch": 2.46, + "learning_rate": 3.8203753351206436e-05, + "loss": 0.5514, + "step": 930 + }, + { + "epoch": 2.46, + "learning_rate": 3.819034852546917e-05, + "loss": 0.1505, + "step": 931 + }, + { + "epoch": 2.47, + "learning_rate": 3.8176943699731906e-05, + "loss": 0.1413, + "step": 932 + }, + { + "epoch": 2.47, + "learning_rate": 3.816353887399464e-05, + "loss": 0.4758, + "step": 933 + }, + { + "epoch": 2.47, + "learning_rate": 3.815013404825738e-05, + "loss": 0.0863, + "step": 934 + }, + { + "epoch": 2.47, + "learning_rate": 3.8136729222520105e-05, + "loss": 0.3561, + "step": 935 + }, + { + "epoch": 2.48, + "learning_rate": 3.812332439678285e-05, + "loss": 0.7784, + "step": 936 + }, + { + "epoch": 2.48, + "learning_rate": 3.8109919571045575e-05, + "loss": 0.2243, + "step": 937 + }, + { + "epoch": 2.48, + "learning_rate": 3.809651474530832e-05, + "loss": 0.1013, + "step": 938 + }, + { + "epoch": 2.48, + "learning_rate": 3.8083109919571046e-05, + "loss": 0.132, + "step": 939 + }, + { + "epoch": 2.49, + "learning_rate": 3.806970509383378e-05, + "loss": 0.3971, + "step": 940 + }, + { + "epoch": 2.49, + "learning_rate": 3.8056300268096516e-05, + "loss": 0.0637, + "step": 941 + }, + { + "epoch": 2.49, + "learning_rate": 3.804289544235925e-05, + "loss": 0.5178, + "step": 942 + }, + { + "epoch": 2.49, + "learning_rate": 3.8029490616621986e-05, + "loss": 0.1982, + "step": 943 + }, + { + "epoch": 2.5, + "learning_rate": 3.801608579088472e-05, + "loss": 0.2225, + "step": 944 + }, + { + "epoch": 2.5, + "learning_rate": 3.800268096514745e-05, + "loss": 0.1425, + "step": 945 + }, + { + "epoch": 2.5, + "learning_rate": 3.798927613941019e-05, + "loss": 0.0621, + "step": 946 + }, + { + "epoch": 2.51, + "learning_rate": 3.797587131367292e-05, + "loss": 0.1556, + "step": 947 + }, + { + "epoch": 2.51, + "learning_rate": 3.796246648793566e-05, + "loss": 0.3805, + "step": 948 + }, + { + "epoch": 2.51, + "learning_rate": 3.794906166219839e-05, + "loss": 0.7049, + "step": 949 + }, + { + "epoch": 2.51, + "learning_rate": 3.7935656836461125e-05, + "loss": 0.1055, + "step": 950 + }, + { + "epoch": 2.52, + "learning_rate": 3.792225201072386e-05, + "loss": 0.0489, + "step": 951 + }, + { + "epoch": 2.52, + "learning_rate": 3.7908847184986596e-05, + "loss": 0.1881, + "step": 952 + }, + { + "epoch": 2.52, + "learning_rate": 3.789544235924933e-05, + "loss": 0.0202, + "step": 953 + }, + { + "epoch": 2.52, + "learning_rate": 3.7882037533512066e-05, + "loss": 0.1043, + "step": 954 + }, + { + "epoch": 2.53, + "learning_rate": 3.78686327077748e-05, + "loss": 0.2093, + "step": 955 + }, + { + "epoch": 2.53, + "learning_rate": 3.7855227882037536e-05, + "loss": 0.0395, + "step": 956 + }, + { + "epoch": 2.53, + "learning_rate": 3.784182305630027e-05, + "loss": 0.1459, + "step": 957 + }, + { + "epoch": 2.53, + "learning_rate": 3.7828418230563006e-05, + "loss": 0.0338, + "step": 958 + }, + { + "epoch": 2.54, + "learning_rate": 3.781501340482574e-05, + "loss": 0.4741, + "step": 959 + }, + { + "epoch": 2.54, + "learning_rate": 3.780160857908847e-05, + "loss": 0.2049, + "step": 960 + }, + { + "epoch": 2.54, + "learning_rate": 3.778820375335121e-05, + "loss": 0.309, + "step": 961 + }, + { + "epoch": 2.54, + "learning_rate": 3.777479892761394e-05, + "loss": 0.0253, + "step": 962 + }, + { + "epoch": 2.55, + "learning_rate": 3.776139410187668e-05, + "loss": 0.4832, + "step": 963 + }, + { + "epoch": 2.55, + "learning_rate": 3.774798927613941e-05, + "loss": 0.2111, + "step": 964 + }, + { + "epoch": 2.55, + "learning_rate": 3.7734584450402145e-05, + "loss": 0.1788, + "step": 965 + }, + { + "epoch": 2.56, + "learning_rate": 3.772117962466488e-05, + "loss": 0.5252, + "step": 966 + }, + { + "epoch": 2.56, + "learning_rate": 3.7707774798927616e-05, + "loss": 0.4711, + "step": 967 + }, + { + "epoch": 2.56, + "learning_rate": 3.769436997319035e-05, + "loss": 0.5184, + "step": 968 + }, + { + "epoch": 2.56, + "learning_rate": 3.7680965147453086e-05, + "loss": 0.2164, + "step": 969 + }, + { + "epoch": 2.57, + "learning_rate": 3.7667560321715814e-05, + "loss": 0.5393, + "step": 970 + }, + { + "epoch": 2.57, + "learning_rate": 3.7654155495978556e-05, + "loss": 0.2588, + "step": 971 + }, + { + "epoch": 2.57, + "learning_rate": 3.7640750670241285e-05, + "loss": 0.164, + "step": 972 + }, + { + "epoch": 2.57, + "learning_rate": 3.7627345844504027e-05, + "loss": 0.2896, + "step": 973 + }, + { + "epoch": 2.58, + "learning_rate": 3.7613941018766755e-05, + "loss": 0.039, + "step": 974 + }, + { + "epoch": 2.58, + "learning_rate": 3.760053619302949e-05, + "loss": 0.16, + "step": 975 + }, + { + "epoch": 2.58, + "learning_rate": 3.7587131367292225e-05, + "loss": 0.1832, + "step": 976 + }, + { + "epoch": 2.58, + "learning_rate": 3.757372654155496e-05, + "loss": 0.0812, + "step": 977 + }, + { + "epoch": 2.59, + "learning_rate": 3.7560321715817695e-05, + "loss": 0.1476, + "step": 978 + }, + { + "epoch": 2.59, + "learning_rate": 3.754691689008043e-05, + "loss": 0.1853, + "step": 979 + }, + { + "epoch": 2.59, + "learning_rate": 3.7533512064343166e-05, + "loss": 0.2875, + "step": 980 + }, + { + "epoch": 2.6, + "learning_rate": 3.75201072386059e-05, + "loss": 0.1918, + "step": 981 + }, + { + "epoch": 2.6, + "learning_rate": 3.7506702412868636e-05, + "loss": 0.2445, + "step": 982 + }, + { + "epoch": 2.6, + "learning_rate": 3.749329758713137e-05, + "loss": 0.4653, + "step": 983 + }, + { + "epoch": 2.6, + "learning_rate": 3.7479892761394106e-05, + "loss": 0.0614, + "step": 984 + }, + { + "epoch": 2.61, + "learning_rate": 3.746648793565684e-05, + "loss": 0.2818, + "step": 985 + }, + { + "epoch": 2.61, + "learning_rate": 3.7453083109919576e-05, + "loss": 0.1363, + "step": 986 + }, + { + "epoch": 2.61, + "learning_rate": 3.7439678284182305e-05, + "loss": 0.3244, + "step": 987 + }, + { + "epoch": 2.61, + "learning_rate": 3.742627345844505e-05, + "loss": 0.081, + "step": 988 + }, + { + "epoch": 2.62, + "learning_rate": 3.7412868632707775e-05, + "loss": 0.0488, + "step": 989 + }, + { + "epoch": 2.62, + "learning_rate": 3.739946380697052e-05, + "loss": 0.2057, + "step": 990 + }, + { + "epoch": 2.62, + "learning_rate": 3.7386058981233245e-05, + "loss": 0.2598, + "step": 991 + }, + { + "epoch": 2.62, + "learning_rate": 3.737265415549598e-05, + "loss": 0.1318, + "step": 992 + }, + { + "epoch": 2.63, + "learning_rate": 3.7359249329758716e-05, + "loss": 0.0482, + "step": 993 + }, + { + "epoch": 2.63, + "learning_rate": 3.734584450402145e-05, + "loss": 0.2586, + "step": 994 + }, + { + "epoch": 2.63, + "learning_rate": 3.7332439678284186e-05, + "loss": 0.2533, + "step": 995 + }, + { + "epoch": 2.63, + "learning_rate": 3.731903485254692e-05, + "loss": 0.5292, + "step": 996 + }, + { + "epoch": 2.64, + "learning_rate": 3.730563002680965e-05, + "loss": 0.1677, + "step": 997 + }, + { + "epoch": 2.64, + "learning_rate": 3.729222520107239e-05, + "loss": 0.1869, + "step": 998 + }, + { + "epoch": 2.64, + "learning_rate": 3.727882037533512e-05, + "loss": 0.0645, + "step": 999 + }, + { + "epoch": 2.65, + "learning_rate": 3.726541554959786e-05, + "loss": 0.4107, + "step": 1000 + }, + { + "epoch": 2.65, + "learning_rate": 3.725201072386059e-05, + "loss": 0.0484, + "step": 1001 + }, + { + "epoch": 2.65, + "learning_rate": 3.7238605898123325e-05, + "loss": 0.0813, + "step": 1002 + }, + { + "epoch": 2.65, + "learning_rate": 3.722520107238606e-05, + "loss": 0.2467, + "step": 1003 + }, + { + "epoch": 2.66, + "learning_rate": 3.7211796246648795e-05, + "loss": 0.0324, + "step": 1004 + }, + { + "epoch": 2.66, + "learning_rate": 3.719839142091153e-05, + "loss": 0.0536, + "step": 1005 + }, + { + "epoch": 2.66, + "learning_rate": 3.7184986595174266e-05, + "loss": 0.0399, + "step": 1006 + }, + { + "epoch": 2.66, + "learning_rate": 3.7171581769436994e-05, + "loss": 0.0257, + "step": 1007 + }, + { + "epoch": 2.67, + "learning_rate": 3.7158176943699736e-05, + "loss": 0.0407, + "step": 1008 + }, + { + "epoch": 2.67, + "learning_rate": 3.7144772117962464e-05, + "loss": 0.0173, + "step": 1009 + }, + { + "epoch": 2.67, + "learning_rate": 3.7131367292225206e-05, + "loss": 0.0166, + "step": 1010 + }, + { + "epoch": 2.67, + "learning_rate": 3.7117962466487934e-05, + "loss": 0.1898, + "step": 1011 + }, + { + "epoch": 2.68, + "learning_rate": 3.710455764075067e-05, + "loss": 0.0525, + "step": 1012 + }, + { + "epoch": 2.68, + "learning_rate": 3.7091152815013405e-05, + "loss": 0.043, + "step": 1013 + }, + { + "epoch": 2.68, + "learning_rate": 3.707774798927614e-05, + "loss": 0.3994, + "step": 1014 + }, + { + "epoch": 2.69, + "learning_rate": 3.7064343163538875e-05, + "loss": 0.0372, + "step": 1015 + }, + { + "epoch": 2.69, + "learning_rate": 3.705093833780161e-05, + "loss": 0.2909, + "step": 1016 + }, + { + "epoch": 2.69, + "learning_rate": 3.7037533512064345e-05, + "loss": 0.8221, + "step": 1017 + }, + { + "epoch": 2.69, + "learning_rate": 3.702412868632708e-05, + "loss": 0.0084, + "step": 1018 + }, + { + "epoch": 2.7, + "learning_rate": 3.7010723860589815e-05, + "loss": 0.3058, + "step": 1019 + }, + { + "epoch": 2.7, + "learning_rate": 3.699731903485255e-05, + "loss": 1.0774, + "step": 1020 + }, + { + "epoch": 2.7, + "learning_rate": 3.6983914209115286e-05, + "loss": 0.2018, + "step": 1021 + }, + { + "epoch": 2.7, + "learning_rate": 3.6970509383378014e-05, + "loss": 0.0537, + "step": 1022 + }, + { + "epoch": 2.71, + "learning_rate": 3.6957104557640756e-05, + "loss": 0.444, + "step": 1023 + }, + { + "epoch": 2.71, + "learning_rate": 3.6943699731903484e-05, + "loss": 0.6497, + "step": 1024 + }, + { + "epoch": 2.71, + "learning_rate": 3.6930294906166226e-05, + "loss": 0.0097, + "step": 1025 + }, + { + "epoch": 2.71, + "learning_rate": 3.6916890080428955e-05, + "loss": 0.0082, + "step": 1026 + }, + { + "epoch": 2.72, + "learning_rate": 3.690348525469169e-05, + "loss": 0.0387, + "step": 1027 + }, + { + "epoch": 2.72, + "learning_rate": 3.6890080428954425e-05, + "loss": 0.3969, + "step": 1028 + }, + { + "epoch": 2.72, + "learning_rate": 3.687667560321716e-05, + "loss": 0.0136, + "step": 1029 + }, + { + "epoch": 2.72, + "learning_rate": 3.6863270777479895e-05, + "loss": 0.0099, + "step": 1030 + }, + { + "epoch": 2.73, + "learning_rate": 3.684986595174263e-05, + "loss": 0.3509, + "step": 1031 + }, + { + "epoch": 2.73, + "learning_rate": 3.683646112600536e-05, + "loss": 0.0257, + "step": 1032 + }, + { + "epoch": 2.73, + "learning_rate": 3.68230563002681e-05, + "loss": 0.3158, + "step": 1033 + }, + { + "epoch": 2.74, + "learning_rate": 3.680965147453083e-05, + "loss": 0.0677, + "step": 1034 + }, + { + "epoch": 2.74, + "learning_rate": 3.679624664879357e-05, + "loss": 0.3887, + "step": 1035 + }, + { + "epoch": 2.74, + "learning_rate": 3.67828418230563e-05, + "loss": 0.0714, + "step": 1036 + }, + { + "epoch": 2.74, + "learning_rate": 3.6769436997319034e-05, + "loss": 0.1066, + "step": 1037 + }, + { + "epoch": 2.75, + "learning_rate": 3.675603217158177e-05, + "loss": 0.6238, + "step": 1038 + }, + { + "epoch": 2.75, + "learning_rate": 3.6742627345844504e-05, + "loss": 0.0405, + "step": 1039 + }, + { + "epoch": 2.75, + "learning_rate": 3.672922252010724e-05, + "loss": 0.0223, + "step": 1040 + }, + { + "epoch": 2.75, + "learning_rate": 3.6715817694369975e-05, + "loss": 0.2737, + "step": 1041 + }, + { + "epoch": 2.76, + "learning_rate": 3.670241286863271e-05, + "loss": 0.015, + "step": 1042 + }, + { + "epoch": 2.76, + "learning_rate": 3.6689008042895445e-05, + "loss": 0.1709, + "step": 1043 + }, + { + "epoch": 2.76, + "learning_rate": 3.667560321715818e-05, + "loss": 0.2649, + "step": 1044 + }, + { + "epoch": 2.76, + "learning_rate": 3.6662198391420915e-05, + "loss": 0.1524, + "step": 1045 + }, + { + "epoch": 2.77, + "learning_rate": 3.664879356568365e-05, + "loss": 0.2461, + "step": 1046 + }, + { + "epoch": 2.77, + "learning_rate": 3.663538873994638e-05, + "loss": 0.3425, + "step": 1047 + }, + { + "epoch": 2.77, + "learning_rate": 3.662198391420912e-05, + "loss": 0.2689, + "step": 1048 + }, + { + "epoch": 2.78, + "learning_rate": 3.660857908847185e-05, + "loss": 0.0066, + "step": 1049 + }, + { + "epoch": 2.78, + "learning_rate": 3.659517426273459e-05, + "loss": 0.0328, + "step": 1050 + }, + { + "epoch": 2.78, + "learning_rate": 3.658176943699732e-05, + "loss": 0.1273, + "step": 1051 + }, + { + "epoch": 2.78, + "learning_rate": 3.6568364611260054e-05, + "loss": 0.2346, + "step": 1052 + }, + { + "epoch": 2.79, + "learning_rate": 3.655495978552279e-05, + "loss": 0.0118, + "step": 1053 + }, + { + "epoch": 2.79, + "learning_rate": 3.6541554959785525e-05, + "loss": 0.0287, + "step": 1054 + }, + { + "epoch": 2.79, + "learning_rate": 3.652815013404826e-05, + "loss": 0.264, + "step": 1055 + }, + { + "epoch": 2.79, + "learning_rate": 3.6514745308310995e-05, + "loss": 0.0216, + "step": 1056 + }, + { + "epoch": 2.8, + "learning_rate": 3.650134048257372e-05, + "loss": 0.0261, + "step": 1057 + }, + { + "epoch": 2.8, + "learning_rate": 3.6487935656836465e-05, + "loss": 0.1911, + "step": 1058 + }, + { + "epoch": 2.8, + "learning_rate": 3.6474530831099194e-05, + "loss": 0.029, + "step": 1059 + }, + { + "epoch": 2.8, + "learning_rate": 3.6461126005361935e-05, + "loss": 0.0393, + "step": 1060 + }, + { + "epoch": 2.81, + "learning_rate": 3.6447721179624664e-05, + "loss": 0.1044, + "step": 1061 + }, + { + "epoch": 2.81, + "learning_rate": 3.64343163538874e-05, + "loss": 0.6364, + "step": 1062 + }, + { + "epoch": 2.81, + "learning_rate": 3.6420911528150134e-05, + "loss": 0.3589, + "step": 1063 + }, + { + "epoch": 2.81, + "learning_rate": 3.640750670241287e-05, + "loss": 0.1128, + "step": 1064 + }, + { + "epoch": 2.82, + "learning_rate": 3.6394101876675604e-05, + "loss": 0.2219, + "step": 1065 + }, + { + "epoch": 2.82, + "learning_rate": 3.638069705093834e-05, + "loss": 0.211, + "step": 1066 + }, + { + "epoch": 2.82, + "learning_rate": 3.6367292225201075e-05, + "loss": 0.387, + "step": 1067 + }, + { + "epoch": 2.83, + "learning_rate": 3.635388739946381e-05, + "loss": 0.0725, + "step": 1068 + }, + { + "epoch": 2.83, + "learning_rate": 3.6340482573726545e-05, + "loss": 0.0268, + "step": 1069 + }, + { + "epoch": 2.83, + "learning_rate": 3.632707774798928e-05, + "loss": 0.516, + "step": 1070 + }, + { + "epoch": 2.83, + "learning_rate": 3.6313672922252015e-05, + "loss": 0.0746, + "step": 1071 + }, + { + "epoch": 2.84, + "learning_rate": 3.6300268096514743e-05, + "loss": 0.2486, + "step": 1072 + }, + { + "epoch": 2.84, + "learning_rate": 3.6286863270777485e-05, + "loss": 0.1584, + "step": 1073 + }, + { + "epoch": 2.84, + "learning_rate": 3.6273458445040214e-05, + "loss": 0.1301, + "step": 1074 + }, + { + "epoch": 2.84, + "learning_rate": 3.6260053619302956e-05, + "loss": 0.0122, + "step": 1075 + }, + { + "epoch": 2.85, + "learning_rate": 3.6246648793565684e-05, + "loss": 0.0215, + "step": 1076 + }, + { + "epoch": 2.85, + "learning_rate": 3.623324396782842e-05, + "loss": 0.2068, + "step": 1077 + }, + { + "epoch": 2.85, + "learning_rate": 3.6219839142091154e-05, + "loss": 0.1882, + "step": 1078 + }, + { + "epoch": 2.85, + "learning_rate": 3.620643431635389e-05, + "loss": 0.368, + "step": 1079 + }, + { + "epoch": 2.86, + "learning_rate": 3.6193029490616625e-05, + "loss": 0.7537, + "step": 1080 + }, + { + "epoch": 2.86, + "learning_rate": 3.617962466487936e-05, + "loss": 0.1614, + "step": 1081 + }, + { + "epoch": 2.86, + "learning_rate": 3.616621983914209e-05, + "loss": 0.0585, + "step": 1082 + }, + { + "epoch": 2.87, + "learning_rate": 3.615281501340483e-05, + "loss": 0.0535, + "step": 1083 + }, + { + "epoch": 2.87, + "learning_rate": 3.613941018766756e-05, + "loss": 0.6518, + "step": 1084 + }, + { + "epoch": 2.87, + "learning_rate": 3.61260053619303e-05, + "loss": 0.4885, + "step": 1085 + }, + { + "epoch": 2.87, + "learning_rate": 3.611260053619303e-05, + "loss": 0.4078, + "step": 1086 + }, + { + "epoch": 2.88, + "learning_rate": 3.6099195710455764e-05, + "loss": 0.2101, + "step": 1087 + }, + { + "epoch": 2.88, + "learning_rate": 3.60857908847185e-05, + "loss": 0.0192, + "step": 1088 + }, + { + "epoch": 2.88, + "learning_rate": 3.6072386058981234e-05, + "loss": 0.3885, + "step": 1089 + }, + { + "epoch": 2.88, + "learning_rate": 3.605898123324397e-05, + "loss": 0.0393, + "step": 1090 + }, + { + "epoch": 2.89, + "learning_rate": 3.6045576407506704e-05, + "loss": 0.2179, + "step": 1091 + }, + { + "epoch": 2.89, + "learning_rate": 3.603217158176944e-05, + "loss": 0.1814, + "step": 1092 + }, + { + "epoch": 2.89, + "learning_rate": 3.6018766756032174e-05, + "loss": 0.0647, + "step": 1093 + }, + { + "epoch": 2.89, + "learning_rate": 3.600536193029491e-05, + "loss": 0.0657, + "step": 1094 + }, + { + "epoch": 2.9, + "learning_rate": 3.5991957104557645e-05, + "loss": 0.1062, + "step": 1095 + }, + { + "epoch": 2.9, + "learning_rate": 3.597855227882038e-05, + "loss": 0.4314, + "step": 1096 + }, + { + "epoch": 2.9, + "learning_rate": 3.596514745308311e-05, + "loss": 0.3074, + "step": 1097 + }, + { + "epoch": 2.9, + "learning_rate": 3.595174262734585e-05, + "loss": 0.0159, + "step": 1098 + }, + { + "epoch": 2.91, + "learning_rate": 3.593833780160858e-05, + "loss": 0.3829, + "step": 1099 + }, + { + "epoch": 2.91, + "learning_rate": 3.592493297587132e-05, + "loss": 0.3277, + "step": 1100 + }, + { + "epoch": 2.91, + "learning_rate": 3.591152815013405e-05, + "loss": 0.3785, + "step": 1101 + }, + { + "epoch": 2.92, + "learning_rate": 3.5898123324396784e-05, + "loss": 0.0162, + "step": 1102 + }, + { + "epoch": 2.92, + "learning_rate": 3.588471849865952e-05, + "loss": 0.0772, + "step": 1103 + }, + { + "epoch": 2.92, + "learning_rate": 3.5871313672922254e-05, + "loss": 0.0292, + "step": 1104 + }, + { + "epoch": 2.92, + "learning_rate": 3.585790884718499e-05, + "loss": 0.0748, + "step": 1105 + }, + { + "epoch": 2.93, + "learning_rate": 3.5844504021447724e-05, + "loss": 0.2276, + "step": 1106 + }, + { + "epoch": 2.93, + "learning_rate": 3.583109919571046e-05, + "loss": 0.3174, + "step": 1107 + }, + { + "epoch": 2.93, + "learning_rate": 3.5817694369973195e-05, + "loss": 0.134, + "step": 1108 + }, + { + "epoch": 2.93, + "learning_rate": 3.580428954423592e-05, + "loss": 0.3488, + "step": 1109 + }, + { + "epoch": 2.94, + "learning_rate": 3.5790884718498665e-05, + "loss": 0.1003, + "step": 1110 + }, + { + "epoch": 2.94, + "learning_rate": 3.577747989276139e-05, + "loss": 0.1972, + "step": 1111 + }, + { + "epoch": 2.94, + "learning_rate": 3.5764075067024135e-05, + "loss": 0.2151, + "step": 1112 + }, + { + "epoch": 2.94, + "learning_rate": 3.5750670241286863e-05, + "loss": 0.3725, + "step": 1113 + }, + { + "epoch": 2.95, + "learning_rate": 3.57372654155496e-05, + "loss": 0.0227, + "step": 1114 + }, + { + "epoch": 2.95, + "learning_rate": 3.5723860589812334e-05, + "loss": 0.2063, + "step": 1115 + }, + { + "epoch": 2.95, + "learning_rate": 3.571045576407507e-05, + "loss": 0.0316, + "step": 1116 + }, + { + "epoch": 2.96, + "learning_rate": 3.5697050938337804e-05, + "loss": 0.1545, + "step": 1117 + }, + { + "epoch": 2.96, + "learning_rate": 3.568364611260054e-05, + "loss": 0.4582, + "step": 1118 + }, + { + "epoch": 2.96, + "learning_rate": 3.567024128686327e-05, + "loss": 0.5452, + "step": 1119 + }, + { + "epoch": 2.96, + "learning_rate": 3.565683646112601e-05, + "loss": 0.2009, + "step": 1120 + }, + { + "epoch": 2.97, + "learning_rate": 3.564343163538874e-05, + "loss": 0.1201, + "step": 1121 + }, + { + "epoch": 2.97, + "learning_rate": 3.563002680965148e-05, + "loss": 0.5343, + "step": 1122 + }, + { + "epoch": 2.97, + "learning_rate": 3.561662198391421e-05, + "loss": 0.3925, + "step": 1123 + }, + { + "epoch": 2.97, + "learning_rate": 3.560321715817694e-05, + "loss": 0.0375, + "step": 1124 + }, + { + "epoch": 2.98, + "learning_rate": 3.558981233243968e-05, + "loss": 0.0411, + "step": 1125 + }, + { + "epoch": 2.98, + "learning_rate": 3.5576407506702413e-05, + "loss": 0.0338, + "step": 1126 + }, + { + "epoch": 2.98, + "learning_rate": 3.556300268096515e-05, + "loss": 0.035, + "step": 1127 + }, + { + "epoch": 2.98, + "learning_rate": 3.5549597855227884e-05, + "loss": 0.0283, + "step": 1128 + }, + { + "epoch": 2.99, + "learning_rate": 3.553619302949062e-05, + "loss": 0.034, + "step": 1129 + }, + { + "epoch": 2.99, + "learning_rate": 3.5522788203753354e-05, + "loss": 0.518, + "step": 1130 + }, + { + "epoch": 2.99, + "learning_rate": 3.550938337801609e-05, + "loss": 0.0241, + "step": 1131 + }, + { + "epoch": 2.99, + "learning_rate": 3.5495978552278824e-05, + "loss": 0.3143, + "step": 1132 + }, + { + "epoch": 3.0, + "learning_rate": 3.548257372654156e-05, + "loss": 0.8011, + "step": 1133 + }, + { + "epoch": 3.0, + "learning_rate": 3.546916890080429e-05, + "loss": 0.4365, + "step": 1134 + }, + { + "epoch": 3.0, + "eval_f1": 0.7867219917012448, + "eval_loss": 0.613310694694519, + "eval_runtime": 1.9007, + "eval_samples_per_second": 796.041, + "eval_steps_per_second": 49.983, + "step": 1134 + }, + { + "epoch": 3.0, + "learning_rate": 3.545576407506703e-05, + "loss": 0.0111, + "step": 1135 + }, + { + "epoch": 3.01, + "learning_rate": 3.544235924932976e-05, + "loss": 0.0166, + "step": 1136 + }, + { + "epoch": 3.01, + "learning_rate": 3.54289544235925e-05, + "loss": 0.2159, + "step": 1137 + }, + { + "epoch": 3.01, + "learning_rate": 3.541554959785523e-05, + "loss": 0.0096, + "step": 1138 + }, + { + "epoch": 3.01, + "learning_rate": 3.540214477211796e-05, + "loss": 0.1352, + "step": 1139 + }, + { + "epoch": 3.02, + "learning_rate": 3.53887399463807e-05, + "loss": 0.0195, + "step": 1140 + }, + { + "epoch": 3.02, + "learning_rate": 3.5375335120643434e-05, + "loss": 0.1579, + "step": 1141 + }, + { + "epoch": 3.02, + "learning_rate": 3.536193029490617e-05, + "loss": 0.0078, + "step": 1142 + }, + { + "epoch": 3.02, + "learning_rate": 3.5348525469168904e-05, + "loss": 0.0111, + "step": 1143 + }, + { + "epoch": 3.03, + "learning_rate": 3.533512064343163e-05, + "loss": 0.2457, + "step": 1144 + }, + { + "epoch": 3.03, + "learning_rate": 3.5321715817694374e-05, + "loss": 0.014, + "step": 1145 + }, + { + "epoch": 3.03, + "learning_rate": 3.53083109919571e-05, + "loss": 0.2021, + "step": 1146 + }, + { + "epoch": 3.03, + "learning_rate": 3.5294906166219844e-05, + "loss": 0.5334, + "step": 1147 + }, + { + "epoch": 3.04, + "learning_rate": 3.528150134048257e-05, + "loss": 0.0116, + "step": 1148 + }, + { + "epoch": 3.04, + "learning_rate": 3.526809651474531e-05, + "loss": 0.0099, + "step": 1149 + }, + { + "epoch": 3.04, + "learning_rate": 3.525469168900804e-05, + "loss": 0.2102, + "step": 1150 + }, + { + "epoch": 3.04, + "learning_rate": 3.524128686327078e-05, + "loss": 0.0093, + "step": 1151 + }, + { + "epoch": 3.05, + "learning_rate": 3.522788203753351e-05, + "loss": 0.0112, + "step": 1152 + }, + { + "epoch": 3.05, + "learning_rate": 3.521447721179625e-05, + "loss": 0.1761, + "step": 1153 + }, + { + "epoch": 3.05, + "learning_rate": 3.5201072386058984e-05, + "loss": 0.1608, + "step": 1154 + }, + { + "epoch": 3.06, + "learning_rate": 3.518766756032172e-05, + "loss": 0.2883, + "step": 1155 + }, + { + "epoch": 3.06, + "learning_rate": 3.5174262734584454e-05, + "loss": 0.0304, + "step": 1156 + }, + { + "epoch": 3.06, + "learning_rate": 3.516085790884719e-05, + "loss": 0.0623, + "step": 1157 + }, + { + "epoch": 3.06, + "learning_rate": 3.5147453083109924e-05, + "loss": 0.1824, + "step": 1158 + }, + { + "epoch": 3.07, + "learning_rate": 3.513404825737265e-05, + "loss": 0.2527, + "step": 1159 + }, + { + "epoch": 3.07, + "learning_rate": 3.5120643431635394e-05, + "loss": 0.0877, + "step": 1160 + }, + { + "epoch": 3.07, + "learning_rate": 3.510723860589812e-05, + "loss": 0.2735, + "step": 1161 + }, + { + "epoch": 3.07, + "learning_rate": 3.5093833780160865e-05, + "loss": 0.1126, + "step": 1162 + }, + { + "epoch": 3.08, + "learning_rate": 3.508042895442359e-05, + "loss": 0.2498, + "step": 1163 + }, + { + "epoch": 3.08, + "learning_rate": 3.506702412868633e-05, + "loss": 0.022, + "step": 1164 + }, + { + "epoch": 3.08, + "learning_rate": 3.505361930294906e-05, + "loss": 0.2768, + "step": 1165 + }, + { + "epoch": 3.08, + "learning_rate": 3.50402144772118e-05, + "loss": 0.0429, + "step": 1166 + }, + { + "epoch": 3.09, + "learning_rate": 3.5026809651474533e-05, + "loss": 0.0198, + "step": 1167 + }, + { + "epoch": 3.09, + "learning_rate": 3.501340482573727e-05, + "loss": 0.0097, + "step": 1168 + }, + { + "epoch": 3.09, + "learning_rate": 3.5e-05, + "loss": 0.0276, + "step": 1169 + }, + { + "epoch": 3.1, + "learning_rate": 3.498659517426274e-05, + "loss": 0.2276, + "step": 1170 + }, + { + "epoch": 3.1, + "learning_rate": 3.497319034852547e-05, + "loss": 0.0461, + "step": 1171 + }, + { + "epoch": 3.1, + "learning_rate": 3.495978552278821e-05, + "loss": 0.0103, + "step": 1172 + }, + { + "epoch": 3.1, + "learning_rate": 3.494638069705094e-05, + "loss": 0.1455, + "step": 1173 + }, + { + "epoch": 3.11, + "learning_rate": 3.493297587131367e-05, + "loss": 0.0865, + "step": 1174 + }, + { + "epoch": 3.11, + "learning_rate": 3.491957104557641e-05, + "loss": 0.3226, + "step": 1175 + }, + { + "epoch": 3.11, + "learning_rate": 3.490616621983914e-05, + "loss": 0.1744, + "step": 1176 + }, + { + "epoch": 3.11, + "learning_rate": 3.489276139410188e-05, + "loss": 0.0148, + "step": 1177 + }, + { + "epoch": 3.12, + "learning_rate": 3.487935656836461e-05, + "loss": 0.2582, + "step": 1178 + }, + { + "epoch": 3.12, + "learning_rate": 3.486595174262735e-05, + "loss": 0.2782, + "step": 1179 + }, + { + "epoch": 3.12, + "learning_rate": 3.485254691689008e-05, + "loss": 0.143, + "step": 1180 + }, + { + "epoch": 3.12, + "learning_rate": 3.483914209115282e-05, + "loss": 0.0853, + "step": 1181 + }, + { + "epoch": 3.13, + "learning_rate": 3.4825737265415554e-05, + "loss": 0.1361, + "step": 1182 + }, + { + "epoch": 3.13, + "learning_rate": 3.481233243967829e-05, + "loss": 0.0883, + "step": 1183 + }, + { + "epoch": 3.13, + "learning_rate": 3.479892761394102e-05, + "loss": 0.0116, + "step": 1184 + }, + { + "epoch": 3.13, + "learning_rate": 3.478552278820376e-05, + "loss": 0.0531, + "step": 1185 + }, + { + "epoch": 3.14, + "learning_rate": 3.477211796246649e-05, + "loss": 0.0184, + "step": 1186 + }, + { + "epoch": 3.14, + "learning_rate": 3.475871313672923e-05, + "loss": 0.1601, + "step": 1187 + }, + { + "epoch": 3.14, + "learning_rate": 3.474530831099196e-05, + "loss": 0.007, + "step": 1188 + }, + { + "epoch": 3.15, + "learning_rate": 3.473190348525469e-05, + "loss": 0.0101, + "step": 1189 + }, + { + "epoch": 3.15, + "learning_rate": 3.471849865951743e-05, + "loss": 0.2385, + "step": 1190 + }, + { + "epoch": 3.15, + "learning_rate": 3.470509383378016e-05, + "loss": 0.0075, + "step": 1191 + }, + { + "epoch": 3.15, + "learning_rate": 3.46916890080429e-05, + "loss": 0.0919, + "step": 1192 + }, + { + "epoch": 3.16, + "learning_rate": 3.467828418230563e-05, + "loss": 0.0162, + "step": 1193 + }, + { + "epoch": 3.16, + "learning_rate": 3.466487935656836e-05, + "loss": 0.2239, + "step": 1194 + }, + { + "epoch": 3.16, + "learning_rate": 3.4651474530831104e-05, + "loss": 0.5757, + "step": 1195 + }, + { + "epoch": 3.16, + "learning_rate": 3.463806970509383e-05, + "loss": 0.0774, + "step": 1196 + }, + { + "epoch": 3.17, + "learning_rate": 3.4624664879356574e-05, + "loss": 0.2124, + "step": 1197 + }, + { + "epoch": 3.17, + "learning_rate": 3.46112600536193e-05, + "loss": 0.0107, + "step": 1198 + }, + { + "epoch": 3.17, + "learning_rate": 3.459785522788204e-05, + "loss": 0.3179, + "step": 1199 + }, + { + "epoch": 3.17, + "learning_rate": 3.458445040214477e-05, + "loss": 0.0138, + "step": 1200 + }, + { + "epoch": 3.18, + "learning_rate": 3.457104557640751e-05, + "loss": 0.0094, + "step": 1201 + }, + { + "epoch": 3.18, + "learning_rate": 3.455764075067024e-05, + "loss": 0.0039, + "step": 1202 + }, + { + "epoch": 3.18, + "learning_rate": 3.454423592493298e-05, + "loss": 0.0745, + "step": 1203 + }, + { + "epoch": 3.19, + "learning_rate": 3.453083109919571e-05, + "loss": 0.0387, + "step": 1204 + }, + { + "epoch": 3.19, + "learning_rate": 3.451742627345845e-05, + "loss": 0.1378, + "step": 1205 + }, + { + "epoch": 3.19, + "learning_rate": 3.450402144772118e-05, + "loss": 0.1299, + "step": 1206 + }, + { + "epoch": 3.19, + "learning_rate": 3.449061662198392e-05, + "loss": 0.2821, + "step": 1207 + }, + { + "epoch": 3.2, + "learning_rate": 3.4477211796246653e-05, + "loss": 0.2236, + "step": 1208 + }, + { + "epoch": 3.2, + "learning_rate": 3.446380697050938e-05, + "loss": 0.1436, + "step": 1209 + }, + { + "epoch": 3.2, + "learning_rate": 3.4450402144772124e-05, + "loss": 0.1504, + "step": 1210 + }, + { + "epoch": 3.2, + "learning_rate": 3.443699731903485e-05, + "loss": 0.0415, + "step": 1211 + }, + { + "epoch": 3.21, + "learning_rate": 3.4423592493297594e-05, + "loss": 0.023, + "step": 1212 + }, + { + "epoch": 3.21, + "learning_rate": 3.441018766756032e-05, + "loss": 0.2128, + "step": 1213 + }, + { + "epoch": 3.21, + "learning_rate": 3.439678284182306e-05, + "loss": 0.0066, + "step": 1214 + }, + { + "epoch": 3.21, + "learning_rate": 3.438337801608579e-05, + "loss": 0.4345, + "step": 1215 + }, + { + "epoch": 3.22, + "learning_rate": 3.436997319034853e-05, + "loss": 0.0214, + "step": 1216 + }, + { + "epoch": 3.22, + "learning_rate": 3.435656836461126e-05, + "loss": 0.2094, + "step": 1217 + }, + { + "epoch": 3.22, + "learning_rate": 3.4343163538874e-05, + "loss": 0.0822, + "step": 1218 + }, + { + "epoch": 3.22, + "learning_rate": 3.4329758713136726e-05, + "loss": 0.1153, + "step": 1219 + }, + { + "epoch": 3.23, + "learning_rate": 3.431635388739947e-05, + "loss": 0.0059, + "step": 1220 + }, + { + "epoch": 3.23, + "learning_rate": 3.43029490616622e-05, + "loss": 0.0069, + "step": 1221 + }, + { + "epoch": 3.23, + "learning_rate": 3.428954423592494e-05, + "loss": 0.044, + "step": 1222 + }, + { + "epoch": 3.24, + "learning_rate": 3.427613941018767e-05, + "loss": 0.1975, + "step": 1223 + }, + { + "epoch": 3.24, + "learning_rate": 3.42627345844504e-05, + "loss": 0.3294, + "step": 1224 + }, + { + "epoch": 3.24, + "learning_rate": 3.424932975871314e-05, + "loss": 0.026, + "step": 1225 + }, + { + "epoch": 3.24, + "learning_rate": 3.423592493297587e-05, + "loss": 0.2666, + "step": 1226 + }, + { + "epoch": 3.25, + "learning_rate": 3.422252010723861e-05, + "loss": 0.0628, + "step": 1227 + }, + { + "epoch": 3.25, + "learning_rate": 3.420911528150134e-05, + "loss": 0.0068, + "step": 1228 + }, + { + "epoch": 3.25, + "learning_rate": 3.419571045576407e-05, + "loss": 0.0144, + "step": 1229 + }, + { + "epoch": 3.25, + "learning_rate": 3.418230563002681e-05, + "loss": 0.0029, + "step": 1230 + }, + { + "epoch": 3.26, + "learning_rate": 3.416890080428954e-05, + "loss": 0.606, + "step": 1231 + }, + { + "epoch": 3.26, + "learning_rate": 3.415549597855228e-05, + "loss": 0.2162, + "step": 1232 + }, + { + "epoch": 3.26, + "learning_rate": 3.414209115281501e-05, + "loss": 0.146, + "step": 1233 + }, + { + "epoch": 3.26, + "learning_rate": 3.412868632707775e-05, + "loss": 0.3649, + "step": 1234 + }, + { + "epoch": 3.27, + "learning_rate": 3.411528150134048e-05, + "loss": 0.0062, + "step": 1235 + }, + { + "epoch": 3.27, + "learning_rate": 3.410187667560322e-05, + "loss": 0.4097, + "step": 1236 + }, + { + "epoch": 3.27, + "learning_rate": 3.408847184986595e-05, + "loss": 0.5354, + "step": 1237 + }, + { + "epoch": 3.28, + "learning_rate": 3.407506702412869e-05, + "loss": 0.6222, + "step": 1238 + }, + { + "epoch": 3.28, + "learning_rate": 3.406166219839142e-05, + "loss": 0.0023, + "step": 1239 + }, + { + "epoch": 3.28, + "learning_rate": 3.404825737265416e-05, + "loss": 0.0247, + "step": 1240 + }, + { + "epoch": 3.28, + "learning_rate": 3.403485254691689e-05, + "loss": 0.0051, + "step": 1241 + }, + { + "epoch": 3.29, + "learning_rate": 3.402144772117963e-05, + "loss": 0.2504, + "step": 1242 + }, + { + "epoch": 3.29, + "learning_rate": 3.400804289544236e-05, + "loss": 0.0195, + "step": 1243 + }, + { + "epoch": 3.29, + "learning_rate": 3.39946380697051e-05, + "loss": 0.3706, + "step": 1244 + }, + { + "epoch": 3.29, + "learning_rate": 3.398123324396783e-05, + "loss": 0.0174, + "step": 1245 + }, + { + "epoch": 3.3, + "learning_rate": 3.396782841823056e-05, + "loss": 0.0068, + "step": 1246 + }, + { + "epoch": 3.3, + "learning_rate": 3.39544235924933e-05, + "loss": 0.3938, + "step": 1247 + }, + { + "epoch": 3.3, + "learning_rate": 3.394101876675603e-05, + "loss": 0.0114, + "step": 1248 + }, + { + "epoch": 3.3, + "learning_rate": 3.3927613941018774e-05, + "loss": 0.0088, + "step": 1249 + }, + { + "epoch": 3.31, + "learning_rate": 3.39142091152815e-05, + "loss": 0.0126, + "step": 1250 + }, + { + "epoch": 3.31, + "learning_rate": 3.390080428954424e-05, + "loss": 0.0091, + "step": 1251 + }, + { + "epoch": 3.31, + "learning_rate": 3.388739946380697e-05, + "loss": 0.0232, + "step": 1252 + }, + { + "epoch": 3.31, + "learning_rate": 3.387399463806971e-05, + "loss": 0.3704, + "step": 1253 + }, + { + "epoch": 3.32, + "learning_rate": 3.386058981233244e-05, + "loss": 0.0112, + "step": 1254 + }, + { + "epoch": 3.32, + "learning_rate": 3.384718498659518e-05, + "loss": 0.1709, + "step": 1255 + }, + { + "epoch": 3.32, + "learning_rate": 3.3833780160857906e-05, + "loss": 0.0109, + "step": 1256 + }, + { + "epoch": 3.33, + "learning_rate": 3.382037533512065e-05, + "loss": 0.2874, + "step": 1257 + }, + { + "epoch": 3.33, + "learning_rate": 3.3806970509383376e-05, + "loss": 0.024, + "step": 1258 + }, + { + "epoch": 3.33, + "learning_rate": 3.379356568364612e-05, + "loss": 0.0131, + "step": 1259 + }, + { + "epoch": 3.33, + "learning_rate": 3.3780160857908846e-05, + "loss": 0.2076, + "step": 1260 + }, + { + "epoch": 3.34, + "learning_rate": 3.376675603217158e-05, + "loss": 0.0083, + "step": 1261 + }, + { + "epoch": 3.34, + "learning_rate": 3.375335120643432e-05, + "loss": 0.0234, + "step": 1262 + }, + { + "epoch": 3.34, + "learning_rate": 3.373994638069705e-05, + "loss": 0.0066, + "step": 1263 + }, + { + "epoch": 3.34, + "learning_rate": 3.372654155495979e-05, + "loss": 0.3983, + "step": 1264 + }, + { + "epoch": 3.35, + "learning_rate": 3.371313672922252e-05, + "loss": 0.0648, + "step": 1265 + }, + { + "epoch": 3.35, + "learning_rate": 3.369973190348526e-05, + "loss": 0.006, + "step": 1266 + }, + { + "epoch": 3.35, + "learning_rate": 3.368632707774799e-05, + "loss": 0.0807, + "step": 1267 + }, + { + "epoch": 3.35, + "learning_rate": 3.367292225201073e-05, + "loss": 0.0975, + "step": 1268 + }, + { + "epoch": 3.36, + "learning_rate": 3.365951742627346e-05, + "loss": 0.2934, + "step": 1269 + }, + { + "epoch": 3.36, + "learning_rate": 3.36461126005362e-05, + "loss": 0.0869, + "step": 1270 + }, + { + "epoch": 3.36, + "learning_rate": 3.3632707774798926e-05, + "loss": 0.1374, + "step": 1271 + }, + { + "epoch": 3.37, + "learning_rate": 3.361930294906167e-05, + "loss": 0.3314, + "step": 1272 + }, + { + "epoch": 3.37, + "learning_rate": 3.3605898123324396e-05, + "loss": 0.0045, + "step": 1273 + }, + { + "epoch": 3.37, + "learning_rate": 3.359249329758714e-05, + "loss": 0.0536, + "step": 1274 + }, + { + "epoch": 3.37, + "learning_rate": 3.3579088471849867e-05, + "loss": 0.0564, + "step": 1275 + }, + { + "epoch": 3.38, + "learning_rate": 3.35656836461126e-05, + "loss": 0.0689, + "step": 1276 + }, + { + "epoch": 3.38, + "learning_rate": 3.355227882037534e-05, + "loss": 0.5177, + "step": 1277 + }, + { + "epoch": 3.38, + "learning_rate": 3.353887399463807e-05, + "loss": 0.0689, + "step": 1278 + }, + { + "epoch": 3.38, + "learning_rate": 3.352546916890081e-05, + "loss": 0.0664, + "step": 1279 + }, + { + "epoch": 3.39, + "learning_rate": 3.351206434316354e-05, + "loss": 0.0614, + "step": 1280 + }, + { + "epoch": 3.39, + "learning_rate": 3.349865951742627e-05, + "loss": 0.1994, + "step": 1281 + }, + { + "epoch": 3.39, + "learning_rate": 3.348525469168901e-05, + "loss": 0.4769, + "step": 1282 + }, + { + "epoch": 3.39, + "learning_rate": 3.347184986595174e-05, + "loss": 0.1851, + "step": 1283 + }, + { + "epoch": 3.4, + "learning_rate": 3.345844504021448e-05, + "loss": 0.0092, + "step": 1284 + }, + { + "epoch": 3.4, + "learning_rate": 3.344504021447721e-05, + "loss": 0.0052, + "step": 1285 + }, + { + "epoch": 3.4, + "learning_rate": 3.3431635388739946e-05, + "loss": 0.0095, + "step": 1286 + }, + { + "epoch": 3.4, + "learning_rate": 3.341823056300268e-05, + "loss": 0.0242, + "step": 1287 + }, + { + "epoch": 3.41, + "learning_rate": 3.3404825737265416e-05, + "loss": 0.0565, + "step": 1288 + }, + { + "epoch": 3.41, + "learning_rate": 3.339142091152815e-05, + "loss": 0.2645, + "step": 1289 + }, + { + "epoch": 3.41, + "learning_rate": 3.337801608579089e-05, + "loss": 0.0049, + "step": 1290 + }, + { + "epoch": 3.42, + "learning_rate": 3.336461126005362e-05, + "loss": 0.0929, + "step": 1291 + }, + { + "epoch": 3.42, + "learning_rate": 3.335120643431636e-05, + "loss": 0.3968, + "step": 1292 + }, + { + "epoch": 3.42, + "learning_rate": 3.333780160857909e-05, + "loss": 0.033, + "step": 1293 + }, + { + "epoch": 3.42, + "learning_rate": 3.332439678284183e-05, + "loss": 0.007, + "step": 1294 + }, + { + "epoch": 3.43, + "learning_rate": 3.331099195710456e-05, + "loss": 0.2552, + "step": 1295 + }, + { + "epoch": 3.43, + "learning_rate": 3.329758713136729e-05, + "loss": 0.004, + "step": 1296 + }, + { + "epoch": 3.43, + "learning_rate": 3.328418230563003e-05, + "loss": 0.136, + "step": 1297 + }, + { + "epoch": 3.43, + "learning_rate": 3.327077747989276e-05, + "loss": 0.1407, + "step": 1298 + }, + { + "epoch": 3.44, + "learning_rate": 3.32573726541555e-05, + "loss": 0.0354, + "step": 1299 + }, + { + "epoch": 3.44, + "learning_rate": 3.324396782841823e-05, + "loss": 0.6141, + "step": 1300 + }, + { + "epoch": 3.44, + "learning_rate": 3.3230563002680966e-05, + "loss": 0.2544, + "step": 1301 + }, + { + "epoch": 3.44, + "learning_rate": 3.32171581769437e-05, + "loss": 0.0046, + "step": 1302 + }, + { + "epoch": 3.45, + "learning_rate": 3.320375335120644e-05, + "loss": 0.0126, + "step": 1303 + }, + { + "epoch": 3.45, + "learning_rate": 3.319034852546917e-05, + "loss": 0.3506, + "step": 1304 + }, + { + "epoch": 3.45, + "learning_rate": 3.317694369973191e-05, + "loss": 0.3512, + "step": 1305 + }, + { + "epoch": 3.46, + "learning_rate": 3.3163538873994635e-05, + "loss": 0.3675, + "step": 1306 + }, + { + "epoch": 3.46, + "learning_rate": 3.315013404825738e-05, + "loss": 0.1676, + "step": 1307 + }, + { + "epoch": 3.46, + "learning_rate": 3.3136729222520106e-05, + "loss": 0.0307, + "step": 1308 + }, + { + "epoch": 3.46, + "learning_rate": 3.312332439678285e-05, + "loss": 0.0084, + "step": 1309 + }, + { + "epoch": 3.47, + "learning_rate": 3.3109919571045576e-05, + "loss": 0.1977, + "step": 1310 + }, + { + "epoch": 3.47, + "learning_rate": 3.309651474530831e-05, + "loss": 0.1645, + "step": 1311 + }, + { + "epoch": 3.47, + "learning_rate": 3.3083109919571046e-05, + "loss": 0.2579, + "step": 1312 + }, + { + "epoch": 3.47, + "learning_rate": 3.306970509383378e-05, + "loss": 0.1656, + "step": 1313 + }, + { + "epoch": 3.48, + "learning_rate": 3.3056300268096516e-05, + "loss": 0.0168, + "step": 1314 + }, + { + "epoch": 3.48, + "learning_rate": 3.304289544235925e-05, + "loss": 0.0291, + "step": 1315 + }, + { + "epoch": 3.48, + "learning_rate": 3.302949061662198e-05, + "loss": 0.0146, + "step": 1316 + }, + { + "epoch": 3.48, + "learning_rate": 3.301608579088472e-05, + "loss": 0.0037, + "step": 1317 + }, + { + "epoch": 3.49, + "learning_rate": 3.300268096514745e-05, + "loss": 0.0113, + "step": 1318 + }, + { + "epoch": 3.49, + "learning_rate": 3.298927613941019e-05, + "loss": 0.0734, + "step": 1319 + }, + { + "epoch": 3.49, + "learning_rate": 3.297587131367292e-05, + "loss": 0.0292, + "step": 1320 + }, + { + "epoch": 3.49, + "learning_rate": 3.2962466487935655e-05, + "loss": 0.3875, + "step": 1321 + }, + { + "epoch": 3.5, + "learning_rate": 3.294906166219839e-05, + "loss": 0.0138, + "step": 1322 + }, + { + "epoch": 3.5, + "learning_rate": 3.2935656836461126e-05, + "loss": 0.4653, + "step": 1323 + }, + { + "epoch": 3.5, + "learning_rate": 3.292225201072386e-05, + "loss": 0.1864, + "step": 1324 + }, + { + "epoch": 3.51, + "learning_rate": 3.2908847184986596e-05, + "loss": 0.0116, + "step": 1325 + }, + { + "epoch": 3.51, + "learning_rate": 3.289544235924933e-05, + "loss": 0.014, + "step": 1326 + }, + { + "epoch": 3.51, + "learning_rate": 3.2882037533512066e-05, + "loss": 0.3344, + "step": 1327 + }, + { + "epoch": 3.51, + "learning_rate": 3.28686327077748e-05, + "loss": 0.1544, + "step": 1328 + }, + { + "epoch": 3.52, + "learning_rate": 3.2855227882037537e-05, + "loss": 0.0065, + "step": 1329 + }, + { + "epoch": 3.52, + "learning_rate": 3.284182305630027e-05, + "loss": 0.0041, + "step": 1330 + }, + { + "epoch": 3.52, + "learning_rate": 3.2828418230563e-05, + "loss": 0.0044, + "step": 1331 + }, + { + "epoch": 3.52, + "learning_rate": 3.281501340482574e-05, + "loss": 0.1808, + "step": 1332 + }, + { + "epoch": 3.53, + "learning_rate": 3.280160857908847e-05, + "loss": 0.0521, + "step": 1333 + }, + { + "epoch": 3.53, + "learning_rate": 3.278820375335121e-05, + "loss": 0.3505, + "step": 1334 + }, + { + "epoch": 3.53, + "learning_rate": 3.277479892761394e-05, + "loss": 0.2032, + "step": 1335 + }, + { + "epoch": 3.53, + "learning_rate": 3.2761394101876676e-05, + "loss": 0.004, + "step": 1336 + }, + { + "epoch": 3.54, + "learning_rate": 3.274798927613941e-05, + "loss": 0.0343, + "step": 1337 + }, + { + "epoch": 3.54, + "learning_rate": 3.2734584450402146e-05, + "loss": 0.278, + "step": 1338 + }, + { + "epoch": 3.54, + "learning_rate": 3.272117962466488e-05, + "loss": 0.0056, + "step": 1339 + }, + { + "epoch": 3.54, + "learning_rate": 3.2707774798927616e-05, + "loss": 0.1673, + "step": 1340 + }, + { + "epoch": 3.55, + "learning_rate": 3.2694369973190345e-05, + "loss": 0.0092, + "step": 1341 + }, + { + "epoch": 3.55, + "learning_rate": 3.2680965147453086e-05, + "loss": 0.0058, + "step": 1342 + }, + { + "epoch": 3.55, + "learning_rate": 3.2667560321715815e-05, + "loss": 0.097, + "step": 1343 + }, + { + "epoch": 3.56, + "learning_rate": 3.265415549597856e-05, + "loss": 0.2138, + "step": 1344 + }, + { + "epoch": 3.56, + "learning_rate": 3.2640750670241285e-05, + "loss": 0.0077, + "step": 1345 + }, + { + "epoch": 3.56, + "learning_rate": 3.262734584450402e-05, + "loss": 0.2294, + "step": 1346 + }, + { + "epoch": 3.56, + "learning_rate": 3.2613941018766755e-05, + "loss": 0.3282, + "step": 1347 + }, + { + "epoch": 3.57, + "learning_rate": 3.260053619302949e-05, + "loss": 0.233, + "step": 1348 + }, + { + "epoch": 3.57, + "learning_rate": 3.2587131367292226e-05, + "loss": 0.0379, + "step": 1349 + }, + { + "epoch": 3.57, + "learning_rate": 3.257372654155496e-05, + "loss": 0.2168, + "step": 1350 + }, + { + "epoch": 3.57, + "learning_rate": 3.2560321715817696e-05, + "loss": 0.0443, + "step": 1351 + }, + { + "epoch": 3.58, + "learning_rate": 3.254691689008043e-05, + "loss": 0.2665, + "step": 1352 + }, + { + "epoch": 3.58, + "learning_rate": 3.2533512064343166e-05, + "loss": 0.0136, + "step": 1353 + }, + { + "epoch": 3.58, + "learning_rate": 3.25201072386059e-05, + "loss": 0.0035, + "step": 1354 + }, + { + "epoch": 3.58, + "learning_rate": 3.2506702412868636e-05, + "loss": 0.2153, + "step": 1355 + }, + { + "epoch": 3.59, + "learning_rate": 3.249329758713137e-05, + "loss": 0.088, + "step": 1356 + }, + { + "epoch": 3.59, + "learning_rate": 3.247989276139411e-05, + "loss": 0.0074, + "step": 1357 + }, + { + "epoch": 3.59, + "learning_rate": 3.2466487935656835e-05, + "loss": 0.0924, + "step": 1358 + }, + { + "epoch": 3.6, + "learning_rate": 3.245308310991958e-05, + "loss": 0.0171, + "step": 1359 + }, + { + "epoch": 3.6, + "learning_rate": 3.2439678284182305e-05, + "loss": 0.0132, + "step": 1360 + }, + { + "epoch": 3.6, + "learning_rate": 3.242627345844505e-05, + "loss": 0.0583, + "step": 1361 + }, + { + "epoch": 3.6, + "learning_rate": 3.2412868632707776e-05, + "loss": 0.0038, + "step": 1362 + }, + { + "epoch": 3.61, + "learning_rate": 3.239946380697051e-05, + "loss": 0.0846, + "step": 1363 + }, + { + "epoch": 3.61, + "learning_rate": 3.2386058981233246e-05, + "loss": 0.0058, + "step": 1364 + }, + { + "epoch": 3.61, + "learning_rate": 3.237265415549598e-05, + "loss": 0.4456, + "step": 1365 + }, + { + "epoch": 3.61, + "learning_rate": 3.2359249329758716e-05, + "loss": 0.0029, + "step": 1366 + }, + { + "epoch": 3.62, + "learning_rate": 3.234584450402145e-05, + "loss": 0.2553, + "step": 1367 + }, + { + "epoch": 3.62, + "learning_rate": 3.233243967828418e-05, + "loss": 0.0936, + "step": 1368 + }, + { + "epoch": 3.62, + "learning_rate": 3.231903485254692e-05, + "loss": 0.1017, + "step": 1369 + }, + { + "epoch": 3.62, + "learning_rate": 3.230563002680965e-05, + "loss": 0.0379, + "step": 1370 + }, + { + "epoch": 3.63, + "learning_rate": 3.229222520107239e-05, + "loss": 0.0069, + "step": 1371 + }, + { + "epoch": 3.63, + "learning_rate": 3.227882037533512e-05, + "loss": 0.3235, + "step": 1372 + }, + { + "epoch": 3.63, + "learning_rate": 3.2265415549597855e-05, + "loss": 0.3796, + "step": 1373 + }, + { + "epoch": 3.63, + "learning_rate": 3.225201072386059e-05, + "loss": 0.3246, + "step": 1374 + }, + { + "epoch": 3.64, + "learning_rate": 3.2238605898123325e-05, + "loss": 0.0059, + "step": 1375 + }, + { + "epoch": 3.64, + "learning_rate": 3.222520107238606e-05, + "loss": 0.0405, + "step": 1376 + }, + { + "epoch": 3.64, + "learning_rate": 3.2211796246648796e-05, + "loss": 0.0142, + "step": 1377 + }, + { + "epoch": 3.65, + "learning_rate": 3.219839142091153e-05, + "loss": 0.4426, + "step": 1378 + }, + { + "epoch": 3.65, + "learning_rate": 3.2184986595174266e-05, + "loss": 0.0249, + "step": 1379 + }, + { + "epoch": 3.65, + "learning_rate": 3.2171581769437e-05, + "loss": 0.1053, + "step": 1380 + }, + { + "epoch": 3.65, + "learning_rate": 3.2158176943699736e-05, + "loss": 0.0179, + "step": 1381 + }, + { + "epoch": 3.66, + "learning_rate": 3.214477211796247e-05, + "loss": 0.0718, + "step": 1382 + }, + { + "epoch": 3.66, + "learning_rate": 3.21313672922252e-05, + "loss": 0.1431, + "step": 1383 + }, + { + "epoch": 3.66, + "learning_rate": 3.211796246648794e-05, + "loss": 0.2391, + "step": 1384 + }, + { + "epoch": 3.66, + "learning_rate": 3.210455764075067e-05, + "loss": 0.0053, + "step": 1385 + }, + { + "epoch": 3.67, + "learning_rate": 3.209115281501341e-05, + "loss": 0.2935, + "step": 1386 + }, + { + "epoch": 3.67, + "learning_rate": 3.207774798927614e-05, + "loss": 0.0071, + "step": 1387 + }, + { + "epoch": 3.67, + "learning_rate": 3.2064343163538875e-05, + "loss": 0.031, + "step": 1388 + }, + { + "epoch": 3.67, + "learning_rate": 3.205093833780161e-05, + "loss": 0.1989, + "step": 1389 + }, + { + "epoch": 3.68, + "learning_rate": 3.2037533512064346e-05, + "loss": 0.0533, + "step": 1390 + }, + { + "epoch": 3.68, + "learning_rate": 3.202412868632708e-05, + "loss": 0.2408, + "step": 1391 + }, + { + "epoch": 3.68, + "learning_rate": 3.2010723860589816e-05, + "loss": 0.3158, + "step": 1392 + }, + { + "epoch": 3.69, + "learning_rate": 3.1997319034852544e-05, + "loss": 0.3629, + "step": 1393 + }, + { + "epoch": 3.69, + "learning_rate": 3.1983914209115286e-05, + "loss": 0.0122, + "step": 1394 + }, + { + "epoch": 3.69, + "learning_rate": 3.1970509383378014e-05, + "loss": 0.0449, + "step": 1395 + }, + { + "epoch": 3.69, + "learning_rate": 3.1957104557640756e-05, + "loss": 0.1273, + "step": 1396 + }, + { + "epoch": 3.7, + "learning_rate": 3.1943699731903485e-05, + "loss": 0.3401, + "step": 1397 + }, + { + "epoch": 3.7, + "learning_rate": 3.193029490616622e-05, + "loss": 0.0183, + "step": 1398 + }, + { + "epoch": 3.7, + "learning_rate": 3.1916890080428955e-05, + "loss": 0.0526, + "step": 1399 + }, + { + "epoch": 3.7, + "learning_rate": 3.190348525469169e-05, + "loss": 0.5037, + "step": 1400 + }, + { + "epoch": 3.71, + "learning_rate": 3.1890080428954425e-05, + "loss": 0.0059, + "step": 1401 + }, + { + "epoch": 3.71, + "learning_rate": 3.187667560321716e-05, + "loss": 0.0266, + "step": 1402 + }, + { + "epoch": 3.71, + "learning_rate": 3.1863270777479896e-05, + "loss": 0.4095, + "step": 1403 + }, + { + "epoch": 3.71, + "learning_rate": 3.184986595174263e-05, + "loss": 0.1802, + "step": 1404 + }, + { + "epoch": 3.72, + "learning_rate": 3.1836461126005366e-05, + "loss": 0.3586, + "step": 1405 + }, + { + "epoch": 3.72, + "learning_rate": 3.18230563002681e-05, + "loss": 0.2058, + "step": 1406 + }, + { + "epoch": 3.72, + "learning_rate": 3.1809651474530836e-05, + "loss": 0.008, + "step": 1407 + }, + { + "epoch": 3.72, + "learning_rate": 3.1796246648793564e-05, + "loss": 0.0282, + "step": 1408 + }, + { + "epoch": 3.73, + "learning_rate": 3.1782841823056306e-05, + "loss": 0.0077, + "step": 1409 + }, + { + "epoch": 3.73, + "learning_rate": 3.1769436997319035e-05, + "loss": 0.3461, + "step": 1410 + }, + { + "epoch": 3.73, + "learning_rate": 3.1756032171581777e-05, + "loss": 0.0038, + "step": 1411 + }, + { + "epoch": 3.74, + "learning_rate": 3.1742627345844505e-05, + "loss": 0.0087, + "step": 1412 + }, + { + "epoch": 3.74, + "learning_rate": 3.172922252010724e-05, + "loss": 0.8254, + "step": 1413 + }, + { + "epoch": 3.74, + "learning_rate": 3.1715817694369975e-05, + "loss": 0.017, + "step": 1414 + }, + { + "epoch": 3.74, + "learning_rate": 3.170241286863271e-05, + "loss": 0.2954, + "step": 1415 + }, + { + "epoch": 3.75, + "learning_rate": 3.1689008042895445e-05, + "loss": 0.0286, + "step": 1416 + }, + { + "epoch": 3.75, + "learning_rate": 3.167560321715818e-05, + "loss": 0.0454, + "step": 1417 + }, + { + "epoch": 3.75, + "learning_rate": 3.166219839142091e-05, + "loss": 0.222, + "step": 1418 + }, + { + "epoch": 3.75, + "learning_rate": 3.164879356568365e-05, + "loss": 0.0225, + "step": 1419 + }, + { + "epoch": 3.76, + "learning_rate": 3.163538873994638e-05, + "loss": 0.2599, + "step": 1420 + }, + { + "epoch": 3.76, + "learning_rate": 3.162198391420912e-05, + "loss": 0.2343, + "step": 1421 + }, + { + "epoch": 3.76, + "learning_rate": 3.160857908847185e-05, + "loss": 0.0274, + "step": 1422 + }, + { + "epoch": 3.76, + "learning_rate": 3.1595174262734585e-05, + "loss": 0.0109, + "step": 1423 + }, + { + "epoch": 3.77, + "learning_rate": 3.158176943699732e-05, + "loss": 0.012, + "step": 1424 + }, + { + "epoch": 3.77, + "learning_rate": 3.1568364611260055e-05, + "loss": 0.0267, + "step": 1425 + }, + { + "epoch": 3.77, + "learning_rate": 3.155495978552279e-05, + "loss": 0.0116, + "step": 1426 + }, + { + "epoch": 3.78, + "learning_rate": 3.1541554959785525e-05, + "loss": 0.2563, + "step": 1427 + }, + { + "epoch": 3.78, + "learning_rate": 3.1528150134048253e-05, + "loss": 0.2149, + "step": 1428 + }, + { + "epoch": 3.78, + "learning_rate": 3.1514745308310995e-05, + "loss": 0.2099, + "step": 1429 + }, + { + "epoch": 3.78, + "learning_rate": 3.1501340482573724e-05, + "loss": 0.1445, + "step": 1430 + }, + { + "epoch": 3.79, + "learning_rate": 3.1487935656836466e-05, + "loss": 0.0069, + "step": 1431 + }, + { + "epoch": 3.79, + "learning_rate": 3.1474530831099194e-05, + "loss": 0.3583, + "step": 1432 + }, + { + "epoch": 3.79, + "learning_rate": 3.146112600536193e-05, + "loss": 0.1112, + "step": 1433 + }, + { + "epoch": 3.79, + "learning_rate": 3.1447721179624664e-05, + "loss": 0.5379, + "step": 1434 + }, + { + "epoch": 3.8, + "learning_rate": 3.14343163538874e-05, + "loss": 0.0248, + "step": 1435 + }, + { + "epoch": 3.8, + "learning_rate": 3.1420911528150135e-05, + "loss": 0.0255, + "step": 1436 + }, + { + "epoch": 3.8, + "learning_rate": 3.140750670241287e-05, + "loss": 0.3363, + "step": 1437 + }, + { + "epoch": 3.8, + "learning_rate": 3.1394101876675605e-05, + "loss": 0.2952, + "step": 1438 + }, + { + "epoch": 3.81, + "learning_rate": 3.138069705093834e-05, + "loss": 0.0337, + "step": 1439 + }, + { + "epoch": 3.81, + "learning_rate": 3.1367292225201075e-05, + "loss": 0.0157, + "step": 1440 + }, + { + "epoch": 3.81, + "learning_rate": 3.135388739946381e-05, + "loss": 0.0204, + "step": 1441 + }, + { + "epoch": 3.81, + "learning_rate": 3.1340482573726545e-05, + "loss": 0.7707, + "step": 1442 + }, + { + "epoch": 3.82, + "learning_rate": 3.1327077747989274e-05, + "loss": 0.4232, + "step": 1443 + }, + { + "epoch": 3.82, + "learning_rate": 3.1313672922252016e-05, + "loss": 0.116, + "step": 1444 + }, + { + "epoch": 3.82, + "learning_rate": 3.1300268096514744e-05, + "loss": 0.421, + "step": 1445 + }, + { + "epoch": 3.83, + "learning_rate": 3.1286863270777486e-05, + "loss": 0.0267, + "step": 1446 + }, + { + "epoch": 3.83, + "learning_rate": 3.1273458445040214e-05, + "loss": 0.0078, + "step": 1447 + }, + { + "epoch": 3.83, + "learning_rate": 3.126005361930295e-05, + "loss": 0.0996, + "step": 1448 + }, + { + "epoch": 3.83, + "learning_rate": 3.1246648793565684e-05, + "loss": 0.0389, + "step": 1449 + }, + { + "epoch": 3.84, + "learning_rate": 3.123324396782842e-05, + "loss": 0.0482, + "step": 1450 + }, + { + "epoch": 3.84, + "learning_rate": 3.1219839142091155e-05, + "loss": 0.0053, + "step": 1451 + }, + { + "epoch": 3.84, + "learning_rate": 3.120643431635389e-05, + "loss": 0.0153, + "step": 1452 + }, + { + "epoch": 3.84, + "learning_rate": 3.119302949061662e-05, + "loss": 0.008, + "step": 1453 + }, + { + "epoch": 3.85, + "learning_rate": 3.117962466487936e-05, + "loss": 0.0166, + "step": 1454 + }, + { + "epoch": 3.85, + "learning_rate": 3.116621983914209e-05, + "loss": 0.0889, + "step": 1455 + }, + { + "epoch": 3.85, + "learning_rate": 3.115281501340483e-05, + "loss": 0.0695, + "step": 1456 + }, + { + "epoch": 3.85, + "learning_rate": 3.113941018766756e-05, + "loss": 0.3353, + "step": 1457 + }, + { + "epoch": 3.86, + "learning_rate": 3.1126005361930294e-05, + "loss": 0.0729, + "step": 1458 + }, + { + "epoch": 3.86, + "learning_rate": 3.111260053619303e-05, + "loss": 0.0187, + "step": 1459 + }, + { + "epoch": 3.86, + "learning_rate": 3.1099195710455764e-05, + "loss": 0.2512, + "step": 1460 + }, + { + "epoch": 3.87, + "learning_rate": 3.10857908847185e-05, + "loss": 0.3837, + "step": 1461 + }, + { + "epoch": 3.87, + "learning_rate": 3.1072386058981234e-05, + "loss": 0.2543, + "step": 1462 + }, + { + "epoch": 3.87, + "learning_rate": 3.105898123324397e-05, + "loss": 0.1797, + "step": 1463 + }, + { + "epoch": 3.87, + "learning_rate": 3.1045576407506705e-05, + "loss": 0.3097, + "step": 1464 + }, + { + "epoch": 3.88, + "learning_rate": 3.103217158176944e-05, + "loss": 0.268, + "step": 1465 + }, + { + "epoch": 3.88, + "learning_rate": 3.1018766756032175e-05, + "loss": 0.1773, + "step": 1466 + }, + { + "epoch": 3.88, + "learning_rate": 3.100536193029491e-05, + "loss": 0.2055, + "step": 1467 + }, + { + "epoch": 3.88, + "learning_rate": 3.099195710455764e-05, + "loss": 0.0279, + "step": 1468 + }, + { + "epoch": 3.89, + "learning_rate": 3.097855227882038e-05, + "loss": 0.1263, + "step": 1469 + }, + { + "epoch": 3.89, + "learning_rate": 3.096514745308311e-05, + "loss": 0.0449, + "step": 1470 + }, + { + "epoch": 3.89, + "learning_rate": 3.095174262734585e-05, + "loss": 0.2429, + "step": 1471 + }, + { + "epoch": 3.89, + "learning_rate": 3.093833780160858e-05, + "loss": 0.1245, + "step": 1472 + }, + { + "epoch": 3.9, + "learning_rate": 3.0924932975871314e-05, + "loss": 0.1303, + "step": 1473 + }, + { + "epoch": 3.9, + "learning_rate": 3.091152815013405e-05, + "loss": 0.0303, + "step": 1474 + }, + { + "epoch": 3.9, + "learning_rate": 3.0898123324396784e-05, + "loss": 0.3279, + "step": 1475 + }, + { + "epoch": 3.9, + "learning_rate": 3.088471849865952e-05, + "loss": 0.134, + "step": 1476 + }, + { + "epoch": 3.91, + "learning_rate": 3.0871313672922255e-05, + "loss": 0.5138, + "step": 1477 + }, + { + "epoch": 3.91, + "learning_rate": 3.085790884718498e-05, + "loss": 0.0476, + "step": 1478 + }, + { + "epoch": 3.91, + "learning_rate": 3.0844504021447725e-05, + "loss": 0.1956, + "step": 1479 + }, + { + "epoch": 3.92, + "learning_rate": 3.083109919571045e-05, + "loss": 0.2061, + "step": 1480 + }, + { + "epoch": 3.92, + "learning_rate": 3.0817694369973195e-05, + "loss": 0.269, + "step": 1481 + }, + { + "epoch": 3.92, + "learning_rate": 3.0804289544235923e-05, + "loss": 0.0708, + "step": 1482 + }, + { + "epoch": 3.92, + "learning_rate": 3.0790884718498665e-05, + "loss": 0.0389, + "step": 1483 + }, + { + "epoch": 3.93, + "learning_rate": 3.0777479892761394e-05, + "loss": 0.2566, + "step": 1484 + }, + { + "epoch": 3.93, + "learning_rate": 3.076407506702413e-05, + "loss": 0.0581, + "step": 1485 + }, + { + "epoch": 3.93, + "learning_rate": 3.0750670241286864e-05, + "loss": 0.1527, + "step": 1486 + }, + { + "epoch": 3.93, + "learning_rate": 3.07372654155496e-05, + "loss": 0.3963, + "step": 1487 + }, + { + "epoch": 3.94, + "learning_rate": 3.0723860589812334e-05, + "loss": 0.2241, + "step": 1488 + }, + { + "epoch": 3.94, + "learning_rate": 3.071045576407507e-05, + "loss": 0.1275, + "step": 1489 + }, + { + "epoch": 3.94, + "learning_rate": 3.0697050938337804e-05, + "loss": 0.3148, + "step": 1490 + }, + { + "epoch": 3.94, + "learning_rate": 3.068364611260054e-05, + "loss": 0.1474, + "step": 1491 + }, + { + "epoch": 3.95, + "learning_rate": 3.0670241286863275e-05, + "loss": 0.0233, + "step": 1492 + }, + { + "epoch": 3.95, + "learning_rate": 3.065683646112601e-05, + "loss": 0.1721, + "step": 1493 + }, + { + "epoch": 3.95, + "learning_rate": 3.0643431635388745e-05, + "loss": 0.6024, + "step": 1494 + }, + { + "epoch": 3.96, + "learning_rate": 3.063002680965147e-05, + "loss": 0.1425, + "step": 1495 + }, + { + "epoch": 3.96, + "learning_rate": 3.0616621983914215e-05, + "loss": 0.0311, + "step": 1496 + }, + { + "epoch": 3.96, + "learning_rate": 3.0603217158176944e-05, + "loss": 0.0197, + "step": 1497 + }, + { + "epoch": 3.96, + "learning_rate": 3.0589812332439686e-05, + "loss": 0.0406, + "step": 1498 + }, + { + "epoch": 3.97, + "learning_rate": 3.0576407506702414e-05, + "loss": 0.054, + "step": 1499 + }, + { + "epoch": 3.97, + "learning_rate": 3.056300268096515e-05, + "loss": 0.161, + "step": 1500 + }, + { + "epoch": 3.97, + "learning_rate": 3.0549597855227884e-05, + "loss": 0.0549, + "step": 1501 + }, + { + "epoch": 3.97, + "learning_rate": 3.053619302949062e-05, + "loss": 0.1667, + "step": 1502 + }, + { + "epoch": 3.98, + "learning_rate": 3.0522788203753354e-05, + "loss": 0.1264, + "step": 1503 + }, + { + "epoch": 3.98, + "learning_rate": 3.0509383378016086e-05, + "loss": 0.0133, + "step": 1504 + }, + { + "epoch": 3.98, + "learning_rate": 3.049597855227882e-05, + "loss": 0.0655, + "step": 1505 + }, + { + "epoch": 3.98, + "learning_rate": 3.0482573726541556e-05, + "loss": 0.1054, + "step": 1506 + }, + { + "epoch": 3.99, + "learning_rate": 3.046916890080429e-05, + "loss": 0.0053, + "step": 1507 + }, + { + "epoch": 3.99, + "learning_rate": 3.0455764075067027e-05, + "loss": 0.0347, + "step": 1508 + }, + { + "epoch": 3.99, + "learning_rate": 3.0442359249329762e-05, + "loss": 0.6095, + "step": 1509 + }, + { + "epoch": 3.99, + "learning_rate": 3.0428954423592494e-05, + "loss": 0.1339, + "step": 1510 + }, + { + "epoch": 4.0, + "learning_rate": 3.0415549597855232e-05, + "loss": 0.0088, + "step": 1511 + }, + { + "epoch": 4.0, + "learning_rate": 3.0402144772117964e-05, + "loss": 0.4356, + "step": 1512 + }, + { + "epoch": 4.0, + "eval_f1": 0.7822580645161291, + "eval_loss": 0.6966613531112671, + "eval_runtime": 1.8703, + "eval_samples_per_second": 808.957, + "eval_steps_per_second": 50.794, + "step": 1512 + }, + { + "epoch": 4.0, + "learning_rate": 3.0388739946380702e-05, + "loss": 0.003, + "step": 1513 + }, + { + "epoch": 4.01, + "learning_rate": 3.0375335120643434e-05, + "loss": 0.0067, + "step": 1514 + }, + { + "epoch": 4.01, + "learning_rate": 3.0361930294906166e-05, + "loss": 0.0488, + "step": 1515 + }, + { + "epoch": 4.01, + "learning_rate": 3.0348525469168904e-05, + "loss": 0.0106, + "step": 1516 + }, + { + "epoch": 4.01, + "learning_rate": 3.0335120643431636e-05, + "loss": 0.0098, + "step": 1517 + }, + { + "epoch": 4.02, + "learning_rate": 3.0321715817694375e-05, + "loss": 0.274, + "step": 1518 + }, + { + "epoch": 4.02, + "learning_rate": 3.0308310991957106e-05, + "loss": 0.2007, + "step": 1519 + }, + { + "epoch": 4.02, + "learning_rate": 3.0294906166219838e-05, + "loss": 0.0121, + "step": 1520 + }, + { + "epoch": 4.02, + "learning_rate": 3.0281501340482577e-05, + "loss": 0.0632, + "step": 1521 + }, + { + "epoch": 4.03, + "learning_rate": 3.026809651474531e-05, + "loss": 0.0062, + "step": 1522 + }, + { + "epoch": 4.03, + "learning_rate": 3.0254691689008047e-05, + "loss": 0.0123, + "step": 1523 + }, + { + "epoch": 4.03, + "learning_rate": 3.024128686327078e-05, + "loss": 0.0063, + "step": 1524 + }, + { + "epoch": 4.03, + "learning_rate": 3.022788203753351e-05, + "loss": 0.0102, + "step": 1525 + }, + { + "epoch": 4.04, + "learning_rate": 3.021447721179625e-05, + "loss": 0.0082, + "step": 1526 + }, + { + "epoch": 4.04, + "learning_rate": 3.020107238605898e-05, + "loss": 0.3369, + "step": 1527 + }, + { + "epoch": 4.04, + "learning_rate": 3.018766756032172e-05, + "loss": 0.2587, + "step": 1528 + }, + { + "epoch": 4.04, + "learning_rate": 3.017426273458445e-05, + "loss": 0.0067, + "step": 1529 + }, + { + "epoch": 4.05, + "learning_rate": 3.0160857908847186e-05, + "loss": 0.0021, + "step": 1530 + }, + { + "epoch": 4.05, + "learning_rate": 3.014745308310992e-05, + "loss": 0.0724, + "step": 1531 + }, + { + "epoch": 4.05, + "learning_rate": 3.0134048257372656e-05, + "loss": 0.0074, + "step": 1532 + }, + { + "epoch": 4.06, + "learning_rate": 3.012064343163539e-05, + "loss": 0.0202, + "step": 1533 + }, + { + "epoch": 4.06, + "learning_rate": 3.0107238605898126e-05, + "loss": 0.1435, + "step": 1534 + }, + { + "epoch": 4.06, + "learning_rate": 3.0093833780160858e-05, + "loss": 0.0074, + "step": 1535 + }, + { + "epoch": 4.06, + "learning_rate": 3.0080428954423597e-05, + "loss": 0.4145, + "step": 1536 + }, + { + "epoch": 4.07, + "learning_rate": 3.006702412868633e-05, + "loss": 0.0186, + "step": 1537 + }, + { + "epoch": 4.07, + "learning_rate": 3.0053619302949067e-05, + "loss": 0.1648, + "step": 1538 + }, + { + "epoch": 4.07, + "learning_rate": 3.00402144772118e-05, + "loss": 0.2545, + "step": 1539 + }, + { + "epoch": 4.07, + "learning_rate": 3.002680965147453e-05, + "loss": 0.0016, + "step": 1540 + }, + { + "epoch": 4.08, + "learning_rate": 3.001340482573727e-05, + "loss": 0.0184, + "step": 1541 + }, + { + "epoch": 4.08, + "learning_rate": 3e-05, + "loss": 0.1208, + "step": 1542 + }, + { + "epoch": 4.08, + "learning_rate": 2.998659517426274e-05, + "loss": 0.0021, + "step": 1543 + }, + { + "epoch": 4.08, + "learning_rate": 2.997319034852547e-05, + "loss": 0.0092, + "step": 1544 + }, + { + "epoch": 4.09, + "learning_rate": 2.9959785522788203e-05, + "loss": 0.1514, + "step": 1545 + }, + { + "epoch": 4.09, + "learning_rate": 2.994638069705094e-05, + "loss": 0.0773, + "step": 1546 + }, + { + "epoch": 4.09, + "learning_rate": 2.9932975871313673e-05, + "loss": 0.0093, + "step": 1547 + }, + { + "epoch": 4.1, + "learning_rate": 2.991957104557641e-05, + "loss": 0.0022, + "step": 1548 + }, + { + "epoch": 4.1, + "learning_rate": 2.9906166219839143e-05, + "loss": 0.1765, + "step": 1549 + }, + { + "epoch": 4.1, + "learning_rate": 2.9892761394101875e-05, + "loss": 0.1766, + "step": 1550 + }, + { + "epoch": 4.1, + "learning_rate": 2.9879356568364614e-05, + "loss": 0.0024, + "step": 1551 + }, + { + "epoch": 4.11, + "learning_rate": 2.9865951742627345e-05, + "loss": 0.012, + "step": 1552 + }, + { + "epoch": 4.11, + "learning_rate": 2.9852546916890084e-05, + "loss": 0.0055, + "step": 1553 + }, + { + "epoch": 4.11, + "learning_rate": 2.9839142091152816e-05, + "loss": 0.0088, + "step": 1554 + }, + { + "epoch": 4.11, + "learning_rate": 2.9825737265415547e-05, + "loss": 0.0019, + "step": 1555 + }, + { + "epoch": 4.12, + "learning_rate": 2.9812332439678286e-05, + "loss": 0.0186, + "step": 1556 + }, + { + "epoch": 4.12, + "learning_rate": 2.9798927613941018e-05, + "loss": 0.25, + "step": 1557 + }, + { + "epoch": 4.12, + "learning_rate": 2.9785522788203756e-05, + "loss": 0.0129, + "step": 1558 + }, + { + "epoch": 4.12, + "learning_rate": 2.9772117962466488e-05, + "loss": 0.0048, + "step": 1559 + }, + { + "epoch": 4.13, + "learning_rate": 2.9758713136729223e-05, + "loss": 0.1153, + "step": 1560 + }, + { + "epoch": 4.13, + "learning_rate": 2.9745308310991958e-05, + "loss": 0.1871, + "step": 1561 + }, + { + "epoch": 4.13, + "learning_rate": 2.9731903485254693e-05, + "loss": 0.0087, + "step": 1562 + }, + { + "epoch": 4.13, + "learning_rate": 2.971849865951743e-05, + "loss": 0.0048, + "step": 1563 + }, + { + "epoch": 4.14, + "learning_rate": 2.9705093833780163e-05, + "loss": 0.026, + "step": 1564 + }, + { + "epoch": 4.14, + "learning_rate": 2.9691689008042895e-05, + "loss": 0.3336, + "step": 1565 + }, + { + "epoch": 4.14, + "learning_rate": 2.9678284182305634e-05, + "loss": 0.0015, + "step": 1566 + }, + { + "epoch": 4.15, + "learning_rate": 2.9664879356568365e-05, + "loss": 0.0044, + "step": 1567 + }, + { + "epoch": 4.15, + "learning_rate": 2.9651474530831104e-05, + "loss": 0.0035, + "step": 1568 + }, + { + "epoch": 4.15, + "learning_rate": 2.9638069705093836e-05, + "loss": 0.1206, + "step": 1569 + }, + { + "epoch": 4.15, + "learning_rate": 2.9624664879356567e-05, + "loss": 0.1247, + "step": 1570 + }, + { + "epoch": 4.16, + "learning_rate": 2.9611260053619306e-05, + "loss": 0.0011, + "step": 1571 + }, + { + "epoch": 4.16, + "learning_rate": 2.9597855227882038e-05, + "loss": 0.0023, + "step": 1572 + }, + { + "epoch": 4.16, + "learning_rate": 2.9584450402144776e-05, + "loss": 0.0014, + "step": 1573 + }, + { + "epoch": 4.16, + "learning_rate": 2.9571045576407508e-05, + "loss": 0.2967, + "step": 1574 + }, + { + "epoch": 4.17, + "learning_rate": 2.955764075067024e-05, + "loss": 0.0373, + "step": 1575 + }, + { + "epoch": 4.17, + "learning_rate": 2.9544235924932978e-05, + "loss": 0.3351, + "step": 1576 + }, + { + "epoch": 4.17, + "learning_rate": 2.953083109919571e-05, + "loss": 0.0025, + "step": 1577 + }, + { + "epoch": 4.17, + "learning_rate": 2.951742627345845e-05, + "loss": 0.0025, + "step": 1578 + }, + { + "epoch": 4.18, + "learning_rate": 2.950402144772118e-05, + "loss": 0.0182, + "step": 1579 + }, + { + "epoch": 4.18, + "learning_rate": 2.9490616621983912e-05, + "loss": 0.001, + "step": 1580 + }, + { + "epoch": 4.18, + "learning_rate": 2.947721179624665e-05, + "loss": 0.003, + "step": 1581 + }, + { + "epoch": 4.19, + "learning_rate": 2.9463806970509382e-05, + "loss": 0.0038, + "step": 1582 + }, + { + "epoch": 4.19, + "learning_rate": 2.945040214477212e-05, + "loss": 0.002, + "step": 1583 + }, + { + "epoch": 4.19, + "learning_rate": 2.9436997319034853e-05, + "loss": 0.1688, + "step": 1584 + }, + { + "epoch": 4.19, + "learning_rate": 2.9423592493297584e-05, + "loss": 0.0014, + "step": 1585 + }, + { + "epoch": 4.2, + "learning_rate": 2.9410187667560323e-05, + "loss": 0.2664, + "step": 1586 + }, + { + "epoch": 4.2, + "learning_rate": 2.9396782841823055e-05, + "loss": 0.0012, + "step": 1587 + }, + { + "epoch": 4.2, + "learning_rate": 2.9383378016085793e-05, + "loss": 0.0022, + "step": 1588 + }, + { + "epoch": 4.2, + "learning_rate": 2.9369973190348525e-05, + "loss": 0.0959, + "step": 1589 + }, + { + "epoch": 4.21, + "learning_rate": 2.935656836461126e-05, + "loss": 0.0839, + "step": 1590 + }, + { + "epoch": 4.21, + "learning_rate": 2.9343163538873995e-05, + "loss": 0.7405, + "step": 1591 + }, + { + "epoch": 4.21, + "learning_rate": 2.932975871313673e-05, + "loss": 0.0351, + "step": 1592 + }, + { + "epoch": 4.21, + "learning_rate": 2.9316353887399465e-05, + "loss": 0.0025, + "step": 1593 + }, + { + "epoch": 4.22, + "learning_rate": 2.93029490616622e-05, + "loss": 0.0054, + "step": 1594 + }, + { + "epoch": 4.22, + "learning_rate": 2.9289544235924932e-05, + "loss": 0.0043, + "step": 1595 + }, + { + "epoch": 4.22, + "learning_rate": 2.927613941018767e-05, + "loss": 0.1828, + "step": 1596 + }, + { + "epoch": 4.22, + "learning_rate": 2.9262734584450402e-05, + "loss": 0.0022, + "step": 1597 + }, + { + "epoch": 4.23, + "learning_rate": 2.924932975871314e-05, + "loss": 0.0051, + "step": 1598 + }, + { + "epoch": 4.23, + "learning_rate": 2.9235924932975873e-05, + "loss": 0.0025, + "step": 1599 + }, + { + "epoch": 4.23, + "learning_rate": 2.9222520107238604e-05, + "loss": 0.0018, + "step": 1600 + }, + { + "epoch": 4.24, + "learning_rate": 2.9209115281501343e-05, + "loss": 0.0348, + "step": 1601 + }, + { + "epoch": 4.24, + "learning_rate": 2.9195710455764075e-05, + "loss": 0.207, + "step": 1602 + }, + { + "epoch": 4.24, + "learning_rate": 2.9182305630026813e-05, + "loss": 0.0249, + "step": 1603 + }, + { + "epoch": 4.24, + "learning_rate": 2.9168900804289545e-05, + "loss": 0.0028, + "step": 1604 + }, + { + "epoch": 4.25, + "learning_rate": 2.9155495978552283e-05, + "loss": 0.2604, + "step": 1605 + }, + { + "epoch": 4.25, + "learning_rate": 2.9142091152815015e-05, + "loss": 0.2808, + "step": 1606 + }, + { + "epoch": 4.25, + "learning_rate": 2.9128686327077747e-05, + "loss": 0.0289, + "step": 1607 + }, + { + "epoch": 4.25, + "learning_rate": 2.9115281501340486e-05, + "loss": 0.005, + "step": 1608 + }, + { + "epoch": 4.26, + "learning_rate": 2.9101876675603217e-05, + "loss": 0.7931, + "step": 1609 + }, + { + "epoch": 4.26, + "learning_rate": 2.9088471849865956e-05, + "loss": 0.335, + "step": 1610 + }, + { + "epoch": 4.26, + "learning_rate": 2.9075067024128688e-05, + "loss": 0.2779, + "step": 1611 + }, + { + "epoch": 4.26, + "learning_rate": 2.906166219839142e-05, + "loss": 0.1649, + "step": 1612 + }, + { + "epoch": 4.27, + "learning_rate": 2.9048257372654158e-05, + "loss": 0.0081, + "step": 1613 + }, + { + "epoch": 4.27, + "learning_rate": 2.903485254691689e-05, + "loss": 0.0638, + "step": 1614 + }, + { + "epoch": 4.27, + "learning_rate": 2.9021447721179628e-05, + "loss": 0.016, + "step": 1615 + }, + { + "epoch": 4.28, + "learning_rate": 2.900804289544236e-05, + "loss": 0.0025, + "step": 1616 + }, + { + "epoch": 4.28, + "learning_rate": 2.8994638069705095e-05, + "loss": 0.0249, + "step": 1617 + }, + { + "epoch": 4.28, + "learning_rate": 2.898123324396783e-05, + "loss": 0.0291, + "step": 1618 + }, + { + "epoch": 4.28, + "learning_rate": 2.8967828418230565e-05, + "loss": 0.1773, + "step": 1619 + }, + { + "epoch": 4.29, + "learning_rate": 2.89544235924933e-05, + "loss": 0.3452, + "step": 1620 + }, + { + "epoch": 4.29, + "learning_rate": 2.8941018766756035e-05, + "loss": 0.006, + "step": 1621 + }, + { + "epoch": 4.29, + "learning_rate": 2.8927613941018767e-05, + "loss": 0.0054, + "step": 1622 + }, + { + "epoch": 4.29, + "learning_rate": 2.8914209115281506e-05, + "loss": 0.1852, + "step": 1623 + }, + { + "epoch": 4.3, + "learning_rate": 2.8900804289544237e-05, + "loss": 0.4424, + "step": 1624 + }, + { + "epoch": 4.3, + "learning_rate": 2.8887399463806976e-05, + "loss": 0.0063, + "step": 1625 + }, + { + "epoch": 4.3, + "learning_rate": 2.8873994638069708e-05, + "loss": 0.43, + "step": 1626 + }, + { + "epoch": 4.3, + "learning_rate": 2.886058981233244e-05, + "loss": 0.2283, + "step": 1627 + }, + { + "epoch": 4.31, + "learning_rate": 2.8847184986595178e-05, + "loss": 0.0519, + "step": 1628 + }, + { + "epoch": 4.31, + "learning_rate": 2.883378016085791e-05, + "loss": 0.1797, + "step": 1629 + }, + { + "epoch": 4.31, + "learning_rate": 2.8820375335120648e-05, + "loss": 0.2569, + "step": 1630 + }, + { + "epoch": 4.31, + "learning_rate": 2.880697050938338e-05, + "loss": 0.0024, + "step": 1631 + }, + { + "epoch": 4.32, + "learning_rate": 2.8793565683646112e-05, + "loss": 0.1727, + "step": 1632 + }, + { + "epoch": 4.32, + "learning_rate": 2.878016085790885e-05, + "loss": 0.0091, + "step": 1633 + }, + { + "epoch": 4.32, + "learning_rate": 2.8766756032171582e-05, + "loss": 0.2002, + "step": 1634 + }, + { + "epoch": 4.33, + "learning_rate": 2.875335120643432e-05, + "loss": 0.0217, + "step": 1635 + }, + { + "epoch": 4.33, + "learning_rate": 2.8739946380697052e-05, + "loss": 0.2163, + "step": 1636 + }, + { + "epoch": 4.33, + "learning_rate": 2.8726541554959784e-05, + "loss": 0.0065, + "step": 1637 + }, + { + "epoch": 4.33, + "learning_rate": 2.8713136729222522e-05, + "loss": 0.1567, + "step": 1638 + }, + { + "epoch": 4.34, + "learning_rate": 2.8699731903485254e-05, + "loss": 0.1775, + "step": 1639 + }, + { + "epoch": 4.34, + "learning_rate": 2.8686327077747993e-05, + "loss": 0.0116, + "step": 1640 + }, + { + "epoch": 4.34, + "learning_rate": 2.8672922252010724e-05, + "loss": 0.0114, + "step": 1641 + }, + { + "epoch": 4.34, + "learning_rate": 2.8659517426273456e-05, + "loss": 0.0264, + "step": 1642 + }, + { + "epoch": 4.35, + "learning_rate": 2.8646112600536195e-05, + "loss": 0.0172, + "step": 1643 + }, + { + "epoch": 4.35, + "learning_rate": 2.8632707774798926e-05, + "loss": 0.187, + "step": 1644 + }, + { + "epoch": 4.35, + "learning_rate": 2.8619302949061665e-05, + "loss": 0.009, + "step": 1645 + }, + { + "epoch": 4.35, + "learning_rate": 2.8605898123324397e-05, + "loss": 0.014, + "step": 1646 + }, + { + "epoch": 4.36, + "learning_rate": 2.8592493297587132e-05, + "loss": 0.1643, + "step": 1647 + }, + { + "epoch": 4.36, + "learning_rate": 2.8579088471849867e-05, + "loss": 0.2763, + "step": 1648 + }, + { + "epoch": 4.36, + "learning_rate": 2.8565683646112602e-05, + "loss": 0.0641, + "step": 1649 + }, + { + "epoch": 4.37, + "learning_rate": 2.8552278820375337e-05, + "loss": 0.6128, + "step": 1650 + }, + { + "epoch": 4.37, + "learning_rate": 2.8538873994638072e-05, + "loss": 0.0229, + "step": 1651 + }, + { + "epoch": 4.37, + "learning_rate": 2.8525469168900804e-05, + "loss": 0.0344, + "step": 1652 + }, + { + "epoch": 4.37, + "learning_rate": 2.8512064343163543e-05, + "loss": 0.018, + "step": 1653 + }, + { + "epoch": 4.38, + "learning_rate": 2.8498659517426274e-05, + "loss": 0.191, + "step": 1654 + }, + { + "epoch": 4.38, + "learning_rate": 2.8485254691689013e-05, + "loss": 0.0397, + "step": 1655 + }, + { + "epoch": 4.38, + "learning_rate": 2.8471849865951745e-05, + "loss": 0.0029, + "step": 1656 + }, + { + "epoch": 4.38, + "learning_rate": 2.8458445040214476e-05, + "loss": 0.0034, + "step": 1657 + }, + { + "epoch": 4.39, + "learning_rate": 2.8445040214477215e-05, + "loss": 0.0031, + "step": 1658 + }, + { + "epoch": 4.39, + "learning_rate": 2.8431635388739947e-05, + "loss": 0.4272, + "step": 1659 + }, + { + "epoch": 4.39, + "learning_rate": 2.8418230563002685e-05, + "loss": 0.0042, + "step": 1660 + }, + { + "epoch": 4.39, + "learning_rate": 2.8404825737265417e-05, + "loss": 0.0224, + "step": 1661 + }, + { + "epoch": 4.4, + "learning_rate": 2.839142091152815e-05, + "loss": 0.1021, + "step": 1662 + }, + { + "epoch": 4.4, + "learning_rate": 2.8378016085790887e-05, + "loss": 0.0076, + "step": 1663 + }, + { + "epoch": 4.4, + "learning_rate": 2.836461126005362e-05, + "loss": 0.084, + "step": 1664 + }, + { + "epoch": 4.4, + "learning_rate": 2.8351206434316357e-05, + "loss": 0.0321, + "step": 1665 + }, + { + "epoch": 4.41, + "learning_rate": 2.833780160857909e-05, + "loss": 0.1369, + "step": 1666 + }, + { + "epoch": 4.41, + "learning_rate": 2.832439678284182e-05, + "loss": 0.018, + "step": 1667 + }, + { + "epoch": 4.41, + "learning_rate": 2.831099195710456e-05, + "loss": 0.1886, + "step": 1668 + }, + { + "epoch": 4.42, + "learning_rate": 2.829758713136729e-05, + "loss": 0.0016, + "step": 1669 + }, + { + "epoch": 4.42, + "learning_rate": 2.828418230563003e-05, + "loss": 0.0031, + "step": 1670 + }, + { + "epoch": 4.42, + "learning_rate": 2.827077747989276e-05, + "loss": 0.0043, + "step": 1671 + }, + { + "epoch": 4.42, + "learning_rate": 2.8257372654155497e-05, + "loss": 0.1202, + "step": 1672 + }, + { + "epoch": 4.43, + "learning_rate": 2.8243967828418232e-05, + "loss": 0.1409, + "step": 1673 + }, + { + "epoch": 4.43, + "learning_rate": 2.8230563002680967e-05, + "loss": 0.0821, + "step": 1674 + }, + { + "epoch": 4.43, + "learning_rate": 2.8217158176943702e-05, + "loss": 0.0468, + "step": 1675 + }, + { + "epoch": 4.43, + "learning_rate": 2.8203753351206437e-05, + "loss": 0.0559, + "step": 1676 + }, + { + "epoch": 4.44, + "learning_rate": 2.819034852546917e-05, + "loss": 0.0192, + "step": 1677 + }, + { + "epoch": 4.44, + "learning_rate": 2.8176943699731907e-05, + "loss": 0.0024, + "step": 1678 + }, + { + "epoch": 4.44, + "learning_rate": 2.816353887399464e-05, + "loss": 0.0021, + "step": 1679 + }, + { + "epoch": 4.44, + "learning_rate": 2.8150134048257378e-05, + "loss": 0.0139, + "step": 1680 + }, + { + "epoch": 4.45, + "learning_rate": 2.813672922252011e-05, + "loss": 0.0042, + "step": 1681 + }, + { + "epoch": 4.45, + "learning_rate": 2.812332439678284e-05, + "loss": 0.1666, + "step": 1682 + }, + { + "epoch": 4.45, + "learning_rate": 2.810991957104558e-05, + "loss": 0.5925, + "step": 1683 + }, + { + "epoch": 4.46, + "learning_rate": 2.809651474530831e-05, + "loss": 0.1689, + "step": 1684 + }, + { + "epoch": 4.46, + "learning_rate": 2.808310991957105e-05, + "loss": 0.0053, + "step": 1685 + }, + { + "epoch": 4.46, + "learning_rate": 2.806970509383378e-05, + "loss": 0.0019, + "step": 1686 + }, + { + "epoch": 4.46, + "learning_rate": 2.8056300268096513e-05, + "loss": 0.0632, + "step": 1687 + }, + { + "epoch": 4.47, + "learning_rate": 2.8042895442359252e-05, + "loss": 0.0115, + "step": 1688 + }, + { + "epoch": 4.47, + "learning_rate": 2.8029490616621984e-05, + "loss": 0.002, + "step": 1689 + }, + { + "epoch": 4.47, + "learning_rate": 2.8016085790884722e-05, + "loss": 0.0021, + "step": 1690 + }, + { + "epoch": 4.47, + "learning_rate": 2.8002680965147454e-05, + "loss": 0.0079, + "step": 1691 + }, + { + "epoch": 4.48, + "learning_rate": 2.7989276139410186e-05, + "loss": 0.0016, + "step": 1692 + }, + { + "epoch": 4.48, + "learning_rate": 2.7975871313672924e-05, + "loss": 0.1824, + "step": 1693 + }, + { + "epoch": 4.48, + "learning_rate": 2.7962466487935656e-05, + "loss": 0.1025, + "step": 1694 + }, + { + "epoch": 4.48, + "learning_rate": 2.7949061662198394e-05, + "loss": 0.4274, + "step": 1695 + }, + { + "epoch": 4.49, + "learning_rate": 2.7935656836461126e-05, + "loss": 0.0834, + "step": 1696 + }, + { + "epoch": 4.49, + "learning_rate": 2.7922252010723858e-05, + "loss": 0.6412, + "step": 1697 + }, + { + "epoch": 4.49, + "learning_rate": 2.7908847184986596e-05, + "loss": 0.3051, + "step": 1698 + }, + { + "epoch": 4.49, + "learning_rate": 2.7895442359249328e-05, + "loss": 0.0909, + "step": 1699 + }, + { + "epoch": 4.5, + "learning_rate": 2.7882037533512067e-05, + "loss": 0.2655, + "step": 1700 + }, + { + "epoch": 4.5, + "learning_rate": 2.78686327077748e-05, + "loss": 0.305, + "step": 1701 + }, + { + "epoch": 4.5, + "learning_rate": 2.7855227882037534e-05, + "loss": 0.2733, + "step": 1702 + }, + { + "epoch": 4.51, + "learning_rate": 2.784182305630027e-05, + "loss": 0.0021, + "step": 1703 + }, + { + "epoch": 4.51, + "learning_rate": 2.7828418230563004e-05, + "loss": 0.0072, + "step": 1704 + }, + { + "epoch": 4.51, + "learning_rate": 2.781501340482574e-05, + "loss": 0.0027, + "step": 1705 + }, + { + "epoch": 4.51, + "learning_rate": 2.7801608579088474e-05, + "loss": 0.184, + "step": 1706 + }, + { + "epoch": 4.52, + "learning_rate": 2.7788203753351206e-05, + "loss": 0.0143, + "step": 1707 + }, + { + "epoch": 4.52, + "learning_rate": 2.7774798927613944e-05, + "loss": 0.0297, + "step": 1708 + }, + { + "epoch": 4.52, + "learning_rate": 2.7761394101876676e-05, + "loss": 0.0739, + "step": 1709 + }, + { + "epoch": 4.52, + "learning_rate": 2.7747989276139415e-05, + "loss": 0.0188, + "step": 1710 + }, + { + "epoch": 4.53, + "learning_rate": 2.7734584450402146e-05, + "loss": 0.2487, + "step": 1711 + }, + { + "epoch": 4.53, + "learning_rate": 2.7721179624664878e-05, + "loss": 0.0222, + "step": 1712 + }, + { + "epoch": 4.53, + "learning_rate": 2.7707774798927617e-05, + "loss": 0.0041, + "step": 1713 + }, + { + "epoch": 4.53, + "learning_rate": 2.769436997319035e-05, + "loss": 0.0164, + "step": 1714 + }, + { + "epoch": 4.54, + "learning_rate": 2.7680965147453087e-05, + "loss": 0.0985, + "step": 1715 + }, + { + "epoch": 4.54, + "learning_rate": 2.766756032171582e-05, + "loss": 0.0067, + "step": 1716 + }, + { + "epoch": 4.54, + "learning_rate": 2.765415549597855e-05, + "loss": 0.3304, + "step": 1717 + }, + { + "epoch": 4.54, + "learning_rate": 2.764075067024129e-05, + "loss": 0.006, + "step": 1718 + }, + { + "epoch": 4.55, + "learning_rate": 2.762734584450402e-05, + "loss": 0.0142, + "step": 1719 + }, + { + "epoch": 4.55, + "learning_rate": 2.761394101876676e-05, + "loss": 0.2205, + "step": 1720 + }, + { + "epoch": 4.55, + "learning_rate": 2.760053619302949e-05, + "loss": 0.298, + "step": 1721 + }, + { + "epoch": 4.56, + "learning_rate": 2.7587131367292223e-05, + "loss": 0.0041, + "step": 1722 + }, + { + "epoch": 4.56, + "learning_rate": 2.757372654155496e-05, + "loss": 0.0018, + "step": 1723 + }, + { + "epoch": 4.56, + "learning_rate": 2.7560321715817693e-05, + "loss": 0.0185, + "step": 1724 + }, + { + "epoch": 4.56, + "learning_rate": 2.754691689008043e-05, + "loss": 0.0042, + "step": 1725 + }, + { + "epoch": 4.57, + "learning_rate": 2.7533512064343163e-05, + "loss": 0.036, + "step": 1726 + }, + { + "epoch": 4.57, + "learning_rate": 2.7520107238605898e-05, + "loss": 0.2593, + "step": 1727 + }, + { + "epoch": 4.57, + "learning_rate": 2.7506702412868633e-05, + "loss": 0.0062, + "step": 1728 + }, + { + "epoch": 4.57, + "learning_rate": 2.749329758713137e-05, + "loss": 0.1759, + "step": 1729 + }, + { + "epoch": 4.58, + "learning_rate": 2.7479892761394104e-05, + "loss": 0.0202, + "step": 1730 + }, + { + "epoch": 4.58, + "learning_rate": 2.746648793565684e-05, + "loss": 0.2156, + "step": 1731 + }, + { + "epoch": 4.58, + "learning_rate": 2.7453083109919574e-05, + "loss": 0.4112, + "step": 1732 + }, + { + "epoch": 4.58, + "learning_rate": 2.743967828418231e-05, + "loss": 0.0037, + "step": 1733 + }, + { + "epoch": 4.59, + "learning_rate": 2.742627345844504e-05, + "loss": 0.0186, + "step": 1734 + }, + { + "epoch": 4.59, + "learning_rate": 2.741286863270778e-05, + "loss": 0.0117, + "step": 1735 + }, + { + "epoch": 4.59, + "learning_rate": 2.739946380697051e-05, + "loss": 0.0039, + "step": 1736 + }, + { + "epoch": 4.6, + "learning_rate": 2.738605898123325e-05, + "loss": 0.1185, + "step": 1737 + }, + { + "epoch": 4.6, + "learning_rate": 2.737265415549598e-05, + "loss": 0.0276, + "step": 1738 + }, + { + "epoch": 4.6, + "learning_rate": 2.7359249329758713e-05, + "loss": 0.0041, + "step": 1739 + }, + { + "epoch": 4.6, + "learning_rate": 2.734584450402145e-05, + "loss": 0.0133, + "step": 1740 + }, + { + "epoch": 4.61, + "learning_rate": 2.7332439678284183e-05, + "loss": 0.1042, + "step": 1741 + }, + { + "epoch": 4.61, + "learning_rate": 2.7319034852546922e-05, + "loss": 0.0023, + "step": 1742 + }, + { + "epoch": 4.61, + "learning_rate": 2.7305630026809654e-05, + "loss": 0.1586, + "step": 1743 + }, + { + "epoch": 4.61, + "learning_rate": 2.7292225201072385e-05, + "loss": 0.0258, + "step": 1744 + }, + { + "epoch": 4.62, + "learning_rate": 2.7278820375335124e-05, + "loss": 0.1119, + "step": 1745 + }, + { + "epoch": 4.62, + "learning_rate": 2.7265415549597856e-05, + "loss": 0.1115, + "step": 1746 + }, + { + "epoch": 4.62, + "learning_rate": 2.7252010723860594e-05, + "loss": 0.4607, + "step": 1747 + }, + { + "epoch": 4.62, + "learning_rate": 2.7238605898123326e-05, + "loss": 0.0296, + "step": 1748 + }, + { + "epoch": 4.63, + "learning_rate": 2.7225201072386058e-05, + "loss": 0.0277, + "step": 1749 + }, + { + "epoch": 4.63, + "learning_rate": 2.7211796246648796e-05, + "loss": 0.0777, + "step": 1750 + }, + { + "epoch": 4.63, + "learning_rate": 2.7198391420911528e-05, + "loss": 0.0031, + "step": 1751 + }, + { + "epoch": 4.63, + "learning_rate": 2.7184986595174266e-05, + "loss": 0.2238, + "step": 1752 + }, + { + "epoch": 4.64, + "learning_rate": 2.7171581769436998e-05, + "loss": 0.0409, + "step": 1753 + }, + { + "epoch": 4.64, + "learning_rate": 2.715817694369973e-05, + "loss": 0.0032, + "step": 1754 + }, + { + "epoch": 4.64, + "learning_rate": 2.714477211796247e-05, + "loss": 0.0113, + "step": 1755 + }, + { + "epoch": 4.65, + "learning_rate": 2.71313672922252e-05, + "loss": 0.0204, + "step": 1756 + }, + { + "epoch": 4.65, + "learning_rate": 2.711796246648794e-05, + "loss": 0.0022, + "step": 1757 + }, + { + "epoch": 4.65, + "learning_rate": 2.710455764075067e-05, + "loss": 0.0018, + "step": 1758 + }, + { + "epoch": 4.65, + "learning_rate": 2.7091152815013406e-05, + "loss": 0.263, + "step": 1759 + }, + { + "epoch": 4.66, + "learning_rate": 2.707774798927614e-05, + "loss": 0.0109, + "step": 1760 + }, + { + "epoch": 4.66, + "learning_rate": 2.7064343163538876e-05, + "loss": 0.0653, + "step": 1761 + }, + { + "epoch": 4.66, + "learning_rate": 2.705093833780161e-05, + "loss": 0.0116, + "step": 1762 + }, + { + "epoch": 4.66, + "learning_rate": 2.7037533512064346e-05, + "loss": 0.0063, + "step": 1763 + }, + { + "epoch": 4.67, + "learning_rate": 2.7024128686327078e-05, + "loss": 0.0034, + "step": 1764 + }, + { + "epoch": 4.67, + "learning_rate": 2.7010723860589816e-05, + "loss": 0.0395, + "step": 1765 + }, + { + "epoch": 4.67, + "learning_rate": 2.6997319034852548e-05, + "loss": 0.0014, + "step": 1766 + }, + { + "epoch": 4.67, + "learning_rate": 2.6983914209115287e-05, + "loss": 0.0057, + "step": 1767 + }, + { + "epoch": 4.68, + "learning_rate": 2.697050938337802e-05, + "loss": 0.0018, + "step": 1768 + }, + { + "epoch": 4.68, + "learning_rate": 2.695710455764075e-05, + "loss": 0.012, + "step": 1769 + }, + { + "epoch": 4.68, + "learning_rate": 2.694369973190349e-05, + "loss": 0.0017, + "step": 1770 + }, + { + "epoch": 4.69, + "learning_rate": 2.693029490616622e-05, + "loss": 0.0654, + "step": 1771 + }, + { + "epoch": 4.69, + "learning_rate": 2.691689008042896e-05, + "loss": 0.8002, + "step": 1772 + }, + { + "epoch": 4.69, + "learning_rate": 2.690348525469169e-05, + "loss": 0.0035, + "step": 1773 + }, + { + "epoch": 4.69, + "learning_rate": 2.6890080428954422e-05, + "loss": 0.0051, + "step": 1774 + }, + { + "epoch": 4.7, + "learning_rate": 2.687667560321716e-05, + "loss": 0.0031, + "step": 1775 + }, + { + "epoch": 4.7, + "learning_rate": 2.6863270777479893e-05, + "loss": 0.0142, + "step": 1776 + }, + { + "epoch": 4.7, + "learning_rate": 2.684986595174263e-05, + "loss": 0.0009, + "step": 1777 + }, + { + "epoch": 4.7, + "learning_rate": 2.6836461126005363e-05, + "loss": 0.0015, + "step": 1778 + }, + { + "epoch": 4.71, + "learning_rate": 2.6823056300268095e-05, + "loss": 0.3481, + "step": 1779 + }, + { + "epoch": 4.71, + "learning_rate": 2.6809651474530833e-05, + "loss": 0.3095, + "step": 1780 + }, + { + "epoch": 4.71, + "learning_rate": 2.6796246648793565e-05, + "loss": 0.2567, + "step": 1781 + }, + { + "epoch": 4.71, + "learning_rate": 2.6782841823056303e-05, + "loss": 0.0037, + "step": 1782 + }, + { + "epoch": 4.72, + "learning_rate": 2.6769436997319035e-05, + "loss": 0.001, + "step": 1783 + }, + { + "epoch": 4.72, + "learning_rate": 2.675603217158177e-05, + "loss": 0.0065, + "step": 1784 + }, + { + "epoch": 4.72, + "learning_rate": 2.6742627345844505e-05, + "loss": 0.0029, + "step": 1785 + }, + { + "epoch": 4.72, + "learning_rate": 2.672922252010724e-05, + "loss": 0.6096, + "step": 1786 + }, + { + "epoch": 4.73, + "learning_rate": 2.6715817694369976e-05, + "loss": 0.0127, + "step": 1787 + }, + { + "epoch": 4.73, + "learning_rate": 2.670241286863271e-05, + "loss": 0.0031, + "step": 1788 + }, + { + "epoch": 4.73, + "learning_rate": 2.6689008042895443e-05, + "loss": 0.2463, + "step": 1789 + }, + { + "epoch": 4.74, + "learning_rate": 2.667560321715818e-05, + "loss": 0.1022, + "step": 1790 + }, + { + "epoch": 4.74, + "learning_rate": 2.6662198391420913e-05, + "loss": 0.002, + "step": 1791 + }, + { + "epoch": 4.74, + "learning_rate": 2.664879356568365e-05, + "loss": 0.1576, + "step": 1792 + }, + { + "epoch": 4.74, + "learning_rate": 2.6635388739946383e-05, + "loss": 0.1099, + "step": 1793 + }, + { + "epoch": 4.75, + "learning_rate": 2.6621983914209115e-05, + "loss": 0.1482, + "step": 1794 + }, + { + "epoch": 4.75, + "learning_rate": 2.6608579088471853e-05, + "loss": 0.0007, + "step": 1795 + }, + { + "epoch": 4.75, + "learning_rate": 2.6595174262734585e-05, + "loss": 0.0009, + "step": 1796 + }, + { + "epoch": 4.75, + "learning_rate": 2.6581769436997324e-05, + "loss": 0.005, + "step": 1797 + }, + { + "epoch": 4.76, + "learning_rate": 2.6568364611260055e-05, + "loss": 0.1808, + "step": 1798 + }, + { + "epoch": 4.76, + "learning_rate": 2.6554959785522787e-05, + "loss": 0.0351, + "step": 1799 + }, + { + "epoch": 4.76, + "learning_rate": 2.6541554959785526e-05, + "loss": 0.2555, + "step": 1800 + }, + { + "epoch": 4.76, + "learning_rate": 2.6528150134048257e-05, + "loss": 0.2236, + "step": 1801 + }, + { + "epoch": 4.77, + "learning_rate": 2.6514745308310996e-05, + "loss": 0.3208, + "step": 1802 + }, + { + "epoch": 4.77, + "learning_rate": 2.6501340482573728e-05, + "loss": 0.0202, + "step": 1803 + }, + { + "epoch": 4.77, + "learning_rate": 2.648793565683646e-05, + "loss": 0.0033, + "step": 1804 + }, + { + "epoch": 4.78, + "learning_rate": 2.6474530831099198e-05, + "loss": 0.001, + "step": 1805 + }, + { + "epoch": 4.78, + "learning_rate": 2.646112600536193e-05, + "loss": 0.0019, + "step": 1806 + }, + { + "epoch": 4.78, + "learning_rate": 2.6447721179624668e-05, + "loss": 0.0027, + "step": 1807 + }, + { + "epoch": 4.78, + "learning_rate": 2.64343163538874e-05, + "loss": 0.0051, + "step": 1808 + }, + { + "epoch": 4.79, + "learning_rate": 2.642091152815013e-05, + "loss": 0.1994, + "step": 1809 + }, + { + "epoch": 4.79, + "learning_rate": 2.640750670241287e-05, + "loss": 0.0372, + "step": 1810 + }, + { + "epoch": 4.79, + "learning_rate": 2.6394101876675602e-05, + "loss": 0.0678, + "step": 1811 + }, + { + "epoch": 4.79, + "learning_rate": 2.638069705093834e-05, + "loss": 0.0252, + "step": 1812 + }, + { + "epoch": 4.8, + "learning_rate": 2.6367292225201072e-05, + "loss": 0.0065, + "step": 1813 + }, + { + "epoch": 4.8, + "learning_rate": 2.6353887399463807e-05, + "loss": 0.0045, + "step": 1814 + }, + { + "epoch": 4.8, + "learning_rate": 2.6340482573726542e-05, + "loss": 0.0037, + "step": 1815 + }, + { + "epoch": 4.8, + "learning_rate": 2.6327077747989277e-05, + "loss": 0.0251, + "step": 1816 + }, + { + "epoch": 4.81, + "learning_rate": 2.6313672922252013e-05, + "loss": 0.4196, + "step": 1817 + }, + { + "epoch": 4.81, + "learning_rate": 2.6300268096514748e-05, + "loss": 0.0071, + "step": 1818 + }, + { + "epoch": 4.81, + "learning_rate": 2.628686327077748e-05, + "loss": 0.0787, + "step": 1819 + }, + { + "epoch": 4.81, + "learning_rate": 2.6273458445040218e-05, + "loss": 0.0145, + "step": 1820 + }, + { + "epoch": 4.82, + "learning_rate": 2.626005361930295e-05, + "loss": 0.009, + "step": 1821 + }, + { + "epoch": 4.82, + "learning_rate": 2.6246648793565688e-05, + "loss": 0.0027, + "step": 1822 + }, + { + "epoch": 4.82, + "learning_rate": 2.623324396782842e-05, + "loss": 0.0017, + "step": 1823 + }, + { + "epoch": 4.83, + "learning_rate": 2.6219839142091152e-05, + "loss": 0.4824, + "step": 1824 + }, + { + "epoch": 4.83, + "learning_rate": 2.620643431635389e-05, + "loss": 0.0022, + "step": 1825 + }, + { + "epoch": 4.83, + "learning_rate": 2.6193029490616622e-05, + "loss": 0.3223, + "step": 1826 + }, + { + "epoch": 4.83, + "learning_rate": 2.617962466487936e-05, + "loss": 0.2195, + "step": 1827 + }, + { + "epoch": 4.84, + "learning_rate": 2.6166219839142092e-05, + "loss": 0.0013, + "step": 1828 + }, + { + "epoch": 4.84, + "learning_rate": 2.6152815013404824e-05, + "loss": 0.0343, + "step": 1829 + }, + { + "epoch": 4.84, + "learning_rate": 2.6139410187667563e-05, + "loss": 0.0022, + "step": 1830 + }, + { + "epoch": 4.84, + "learning_rate": 2.6126005361930294e-05, + "loss": 0.0022, + "step": 1831 + }, + { + "epoch": 4.85, + "learning_rate": 2.6112600536193033e-05, + "loss": 0.4116, + "step": 1832 + }, + { + "epoch": 4.85, + "learning_rate": 2.6099195710455765e-05, + "loss": 0.0048, + "step": 1833 + }, + { + "epoch": 4.85, + "learning_rate": 2.6085790884718496e-05, + "loss": 0.5819, + "step": 1834 + }, + { + "epoch": 4.85, + "learning_rate": 2.6072386058981235e-05, + "loss": 0.1985, + "step": 1835 + }, + { + "epoch": 4.86, + "learning_rate": 2.6058981233243967e-05, + "loss": 0.0989, + "step": 1836 + }, + { + "epoch": 4.86, + "learning_rate": 2.6045576407506705e-05, + "loss": 0.341, + "step": 1837 + }, + { + "epoch": 4.86, + "learning_rate": 2.6032171581769437e-05, + "loss": 0.0044, + "step": 1838 + }, + { + "epoch": 4.87, + "learning_rate": 2.601876675603217e-05, + "loss": 0.004, + "step": 1839 + }, + { + "epoch": 4.87, + "learning_rate": 2.6005361930294907e-05, + "loss": 0.2858, + "step": 1840 + }, + { + "epoch": 4.87, + "learning_rate": 2.599195710455764e-05, + "loss": 0.0009, + "step": 1841 + }, + { + "epoch": 4.87, + "learning_rate": 2.5978552278820377e-05, + "loss": 0.0042, + "step": 1842 + }, + { + "epoch": 4.88, + "learning_rate": 2.596514745308311e-05, + "loss": 0.0045, + "step": 1843 + }, + { + "epoch": 4.88, + "learning_rate": 2.5951742627345844e-05, + "loss": 0.0144, + "step": 1844 + }, + { + "epoch": 4.88, + "learning_rate": 2.593833780160858e-05, + "loss": 0.0084, + "step": 1845 + }, + { + "epoch": 4.88, + "learning_rate": 2.5924932975871314e-05, + "loss": 0.4276, + "step": 1846 + }, + { + "epoch": 4.89, + "learning_rate": 2.591152815013405e-05, + "loss": 0.0122, + "step": 1847 + }, + { + "epoch": 4.89, + "learning_rate": 2.5898123324396785e-05, + "loss": 0.0776, + "step": 1848 + }, + { + "epoch": 4.89, + "learning_rate": 2.5884718498659516e-05, + "loss": 0.0117, + "step": 1849 + }, + { + "epoch": 4.89, + "learning_rate": 2.5871313672922255e-05, + "loss": 0.2809, + "step": 1850 + }, + { + "epoch": 4.9, + "learning_rate": 2.5857908847184987e-05, + "loss": 0.0413, + "step": 1851 + }, + { + "epoch": 4.9, + "learning_rate": 2.5844504021447725e-05, + "loss": 0.0187, + "step": 1852 + }, + { + "epoch": 4.9, + "learning_rate": 2.5831099195710457e-05, + "loss": 0.452, + "step": 1853 + }, + { + "epoch": 4.9, + "learning_rate": 2.5817694369973195e-05, + "loss": 0.0206, + "step": 1854 + }, + { + "epoch": 4.91, + "learning_rate": 2.5804289544235927e-05, + "loss": 0.1639, + "step": 1855 + }, + { + "epoch": 4.91, + "learning_rate": 2.579088471849866e-05, + "loss": 0.1865, + "step": 1856 + }, + { + "epoch": 4.91, + "learning_rate": 2.5777479892761398e-05, + "loss": 0.0022, + "step": 1857 + }, + { + "epoch": 4.92, + "learning_rate": 2.576407506702413e-05, + "loss": 0.1167, + "step": 1858 + }, + { + "epoch": 4.92, + "learning_rate": 2.5750670241286868e-05, + "loss": 0.4013, + "step": 1859 + }, + { + "epoch": 4.92, + "learning_rate": 2.57372654155496e-05, + "loss": 0.2355, + "step": 1860 + }, + { + "epoch": 4.92, + "learning_rate": 2.572386058981233e-05, + "loss": 0.0076, + "step": 1861 + }, + { + "epoch": 4.93, + "learning_rate": 2.571045576407507e-05, + "loss": 0.1612, + "step": 1862 + }, + { + "epoch": 4.93, + "learning_rate": 2.56970509383378e-05, + "loss": 0.0047, + "step": 1863 + }, + { + "epoch": 4.93, + "learning_rate": 2.568364611260054e-05, + "loss": 0.1511, + "step": 1864 + }, + { + "epoch": 4.93, + "learning_rate": 2.5670241286863272e-05, + "loss": 0.011, + "step": 1865 + }, + { + "epoch": 4.94, + "learning_rate": 2.5656836461126004e-05, + "loss": 0.1761, + "step": 1866 + }, + { + "epoch": 4.94, + "learning_rate": 2.5643431635388742e-05, + "loss": 0.004, + "step": 1867 + }, + { + "epoch": 4.94, + "learning_rate": 2.5630026809651474e-05, + "loss": 0.0036, + "step": 1868 + }, + { + "epoch": 4.94, + "learning_rate": 2.5616621983914212e-05, + "loss": 0.4345, + "step": 1869 + }, + { + "epoch": 4.95, + "learning_rate": 2.5603217158176944e-05, + "loss": 0.0034, + "step": 1870 + }, + { + "epoch": 4.95, + "learning_rate": 2.558981233243968e-05, + "loss": 0.1269, + "step": 1871 + }, + { + "epoch": 4.95, + "learning_rate": 2.5576407506702414e-05, + "loss": 0.183, + "step": 1872 + }, + { + "epoch": 4.96, + "learning_rate": 2.556300268096515e-05, + "loss": 0.008, + "step": 1873 + }, + { + "epoch": 4.96, + "learning_rate": 2.5549597855227885e-05, + "loss": 0.0035, + "step": 1874 + }, + { + "epoch": 4.96, + "learning_rate": 2.553619302949062e-05, + "loss": 0.0133, + "step": 1875 + }, + { + "epoch": 4.96, + "learning_rate": 2.552278820375335e-05, + "loss": 0.2156, + "step": 1876 + }, + { + "epoch": 4.97, + "learning_rate": 2.550938337801609e-05, + "loss": 0.0043, + "step": 1877 + }, + { + "epoch": 4.97, + "learning_rate": 2.549597855227882e-05, + "loss": 0.2614, + "step": 1878 + }, + { + "epoch": 4.97, + "learning_rate": 2.548257372654156e-05, + "loss": 0.0208, + "step": 1879 + }, + { + "epoch": 4.97, + "learning_rate": 2.5469168900804292e-05, + "loss": 0.0228, + "step": 1880 + }, + { + "epoch": 4.98, + "learning_rate": 2.5455764075067024e-05, + "loss": 0.0105, + "step": 1881 + }, + { + "epoch": 4.98, + "learning_rate": 2.5442359249329762e-05, + "loss": 0.0108, + "step": 1882 + }, + { + "epoch": 4.98, + "learning_rate": 2.5428954423592494e-05, + "loss": 0.3828, + "step": 1883 + }, + { + "epoch": 4.98, + "learning_rate": 2.5415549597855232e-05, + "loss": 0.0093, + "step": 1884 + }, + { + "epoch": 4.99, + "learning_rate": 2.5402144772117964e-05, + "loss": 0.0231, + "step": 1885 + }, + { + "epoch": 4.99, + "learning_rate": 2.5388739946380696e-05, + "loss": 0.0082, + "step": 1886 + }, + { + "epoch": 4.99, + "learning_rate": 2.5375335120643434e-05, + "loss": 0.1796, + "step": 1887 + }, + { + "epoch": 4.99, + "learning_rate": 2.5361930294906166e-05, + "loss": 0.0753, + "step": 1888 + }, + { + "epoch": 5.0, + "learning_rate": 2.5348525469168905e-05, + "loss": 0.0142, + "step": 1889 + }, + { + "epoch": 5.0, + "learning_rate": 2.5335120643431636e-05, + "loss": 0.0047, + "step": 1890 + }, + { + "epoch": 5.0, + "eval_f1": 0.7775974025974025, + "eval_loss": 0.953689694404602, + "eval_runtime": 1.8696, + "eval_samples_per_second": 809.285, + "eval_steps_per_second": 50.814, + "step": 1890 + }, + { + "epoch": 5.0, + "learning_rate": 2.5321715817694368e-05, + "loss": 0.0014, + "step": 1891 + }, + { + "epoch": 5.01, + "learning_rate": 2.5308310991957107e-05, + "loss": 0.0487, + "step": 1892 + }, + { + "epoch": 5.01, + "learning_rate": 2.529490616621984e-05, + "loss": 0.0037, + "step": 1893 + }, + { + "epoch": 5.01, + "learning_rate": 2.5281501340482577e-05, + "loss": 0.0512, + "step": 1894 + }, + { + "epoch": 5.01, + "learning_rate": 2.526809651474531e-05, + "loss": 0.134, + "step": 1895 + }, + { + "epoch": 5.02, + "learning_rate": 2.525469168900804e-05, + "loss": 0.3762, + "step": 1896 + }, + { + "epoch": 5.02, + "learning_rate": 2.524128686327078e-05, + "loss": 0.0011, + "step": 1897 + }, + { + "epoch": 5.02, + "learning_rate": 2.522788203753351e-05, + "loss": 0.0023, + "step": 1898 + }, + { + "epoch": 5.02, + "learning_rate": 2.521447721179625e-05, + "loss": 0.0526, + "step": 1899 + }, + { + "epoch": 5.03, + "learning_rate": 2.520107238605898e-05, + "loss": 0.0553, + "step": 1900 + }, + { + "epoch": 5.03, + "learning_rate": 2.5187667560321716e-05, + "loss": 0.1773, + "step": 1901 + }, + { + "epoch": 5.03, + "learning_rate": 2.517426273458445e-05, + "loss": 0.451, + "step": 1902 + }, + { + "epoch": 5.03, + "learning_rate": 2.5160857908847186e-05, + "loss": 0.0217, + "step": 1903 + }, + { + "epoch": 5.04, + "learning_rate": 2.514745308310992e-05, + "loss": 0.0728, + "step": 1904 + }, + { + "epoch": 5.04, + "learning_rate": 2.5134048257372657e-05, + "loss": 0.0009, + "step": 1905 + }, + { + "epoch": 5.04, + "learning_rate": 2.512064343163539e-05, + "loss": 0.1018, + "step": 1906 + }, + { + "epoch": 5.04, + "learning_rate": 2.5107238605898127e-05, + "loss": 0.0012, + "step": 1907 + }, + { + "epoch": 5.05, + "learning_rate": 2.509383378016086e-05, + "loss": 0.004, + "step": 1908 + }, + { + "epoch": 5.05, + "learning_rate": 2.5080428954423597e-05, + "loss": 0.0012, + "step": 1909 + }, + { + "epoch": 5.05, + "learning_rate": 2.506702412868633e-05, + "loss": 0.0128, + "step": 1910 + }, + { + "epoch": 5.06, + "learning_rate": 2.505361930294906e-05, + "loss": 0.1116, + "step": 1911 + }, + { + "epoch": 5.06, + "learning_rate": 2.50402144772118e-05, + "loss": 0.0011, + "step": 1912 + }, + { + "epoch": 5.06, + "learning_rate": 2.502680965147453e-05, + "loss": 0.0011, + "step": 1913 + }, + { + "epoch": 5.06, + "learning_rate": 2.501340482573727e-05, + "loss": 0.0897, + "step": 1914 + }, + { + "epoch": 5.07, + "learning_rate": 2.5e-05, + "loss": 0.0014, + "step": 1915 + }, + { + "epoch": 5.07, + "learning_rate": 2.4986595174262736e-05, + "loss": 0.0918, + "step": 1916 + }, + { + "epoch": 5.07, + "learning_rate": 2.497319034852547e-05, + "loss": 0.0026, + "step": 1917 + }, + { + "epoch": 5.07, + "learning_rate": 2.4959785522788203e-05, + "loss": 0.0225, + "step": 1918 + }, + { + "epoch": 5.08, + "learning_rate": 2.494638069705094e-05, + "loss": 0.2655, + "step": 1919 + }, + { + "epoch": 5.08, + "learning_rate": 2.4932975871313673e-05, + "loss": 0.0029, + "step": 1920 + }, + { + "epoch": 5.08, + "learning_rate": 2.491957104557641e-05, + "loss": 0.0006, + "step": 1921 + }, + { + "epoch": 5.08, + "learning_rate": 2.4906166219839144e-05, + "loss": 0.0008, + "step": 1922 + }, + { + "epoch": 5.09, + "learning_rate": 2.4892761394101875e-05, + "loss": 0.0012, + "step": 1923 + }, + { + "epoch": 5.09, + "learning_rate": 2.487935656836461e-05, + "loss": 0.0013, + "step": 1924 + }, + { + "epoch": 5.09, + "learning_rate": 2.4865951742627346e-05, + "loss": 0.0524, + "step": 1925 + }, + { + "epoch": 5.1, + "learning_rate": 2.485254691689008e-05, + "loss": 0.0059, + "step": 1926 + }, + { + "epoch": 5.1, + "learning_rate": 2.4839142091152816e-05, + "loss": 0.0026, + "step": 1927 + }, + { + "epoch": 5.1, + "learning_rate": 2.482573726541555e-05, + "loss": 0.0015, + "step": 1928 + }, + { + "epoch": 5.1, + "learning_rate": 2.4812332439678286e-05, + "loss": 0.0073, + "step": 1929 + }, + { + "epoch": 5.11, + "learning_rate": 2.479892761394102e-05, + "loss": 0.0008, + "step": 1930 + }, + { + "epoch": 5.11, + "learning_rate": 2.4785522788203757e-05, + "loss": 0.1519, + "step": 1931 + }, + { + "epoch": 5.11, + "learning_rate": 2.477211796246649e-05, + "loss": 0.008, + "step": 1932 + }, + { + "epoch": 5.11, + "learning_rate": 2.4758713136729223e-05, + "loss": 0.0009, + "step": 1933 + }, + { + "epoch": 5.12, + "learning_rate": 2.474530831099196e-05, + "loss": 0.0299, + "step": 1934 + }, + { + "epoch": 5.12, + "learning_rate": 2.4731903485254694e-05, + "loss": 0.1637, + "step": 1935 + }, + { + "epoch": 5.12, + "learning_rate": 2.471849865951743e-05, + "loss": 0.0369, + "step": 1936 + }, + { + "epoch": 5.12, + "learning_rate": 2.4705093833780164e-05, + "loss": 0.0057, + "step": 1937 + }, + { + "epoch": 5.13, + "learning_rate": 2.4691689008042896e-05, + "loss": 0.0035, + "step": 1938 + }, + { + "epoch": 5.13, + "learning_rate": 2.467828418230563e-05, + "loss": 0.0011, + "step": 1939 + }, + { + "epoch": 5.13, + "learning_rate": 2.4664879356568366e-05, + "loss": 0.0018, + "step": 1940 + }, + { + "epoch": 5.13, + "learning_rate": 2.46514745308311e-05, + "loss": 0.0013, + "step": 1941 + }, + { + "epoch": 5.14, + "learning_rate": 2.4638069705093836e-05, + "loss": 0.0779, + "step": 1942 + }, + { + "epoch": 5.14, + "learning_rate": 2.4624664879356568e-05, + "loss": 0.1762, + "step": 1943 + }, + { + "epoch": 5.14, + "learning_rate": 2.4611260053619303e-05, + "loss": 0.0006, + "step": 1944 + }, + { + "epoch": 5.15, + "learning_rate": 2.4597855227882038e-05, + "loss": 0.0037, + "step": 1945 + }, + { + "epoch": 5.15, + "learning_rate": 2.4584450402144773e-05, + "loss": 0.0005, + "step": 1946 + }, + { + "epoch": 5.15, + "learning_rate": 2.457104557640751e-05, + "loss": 0.0397, + "step": 1947 + }, + { + "epoch": 5.15, + "learning_rate": 2.4557640750670244e-05, + "loss": 0.0082, + "step": 1948 + }, + { + "epoch": 5.16, + "learning_rate": 2.4544235924932975e-05, + "loss": 0.0008, + "step": 1949 + }, + { + "epoch": 5.16, + "learning_rate": 2.453083109919571e-05, + "loss": 0.0219, + "step": 1950 + }, + { + "epoch": 5.16, + "learning_rate": 2.4517426273458446e-05, + "loss": 0.3966, + "step": 1951 + }, + { + "epoch": 5.16, + "learning_rate": 2.450402144772118e-05, + "loss": 0.0011, + "step": 1952 + }, + { + "epoch": 5.17, + "learning_rate": 2.4490616621983916e-05, + "loss": 0.3447, + "step": 1953 + }, + { + "epoch": 5.17, + "learning_rate": 2.4477211796246648e-05, + "loss": 0.0006, + "step": 1954 + }, + { + "epoch": 5.17, + "learning_rate": 2.4463806970509383e-05, + "loss": 0.0011, + "step": 1955 + }, + { + "epoch": 5.17, + "learning_rate": 2.4450402144772118e-05, + "loss": 0.0013, + "step": 1956 + }, + { + "epoch": 5.18, + "learning_rate": 2.4436997319034853e-05, + "loss": 0.1495, + "step": 1957 + }, + { + "epoch": 5.18, + "learning_rate": 2.4423592493297588e-05, + "loss": 0.0005, + "step": 1958 + }, + { + "epoch": 5.18, + "learning_rate": 2.4410187667560323e-05, + "loss": 0.3345, + "step": 1959 + }, + { + "epoch": 5.19, + "learning_rate": 2.439678284182306e-05, + "loss": 0.0048, + "step": 1960 + }, + { + "epoch": 5.19, + "learning_rate": 2.4383378016085793e-05, + "loss": 0.001, + "step": 1961 + }, + { + "epoch": 5.19, + "learning_rate": 2.436997319034853e-05, + "loss": 0.0025, + "step": 1962 + }, + { + "epoch": 5.19, + "learning_rate": 2.4356568364611264e-05, + "loss": 0.3215, + "step": 1963 + }, + { + "epoch": 5.2, + "learning_rate": 2.4343163538873995e-05, + "loss": 0.0197, + "step": 1964 + }, + { + "epoch": 5.2, + "learning_rate": 2.432975871313673e-05, + "loss": 0.0018, + "step": 1965 + }, + { + "epoch": 5.2, + "learning_rate": 2.4316353887399466e-05, + "loss": 0.1012, + "step": 1966 + }, + { + "epoch": 5.2, + "learning_rate": 2.43029490616622e-05, + "loss": 0.0179, + "step": 1967 + }, + { + "epoch": 5.21, + "learning_rate": 2.4289544235924936e-05, + "loss": 0.0032, + "step": 1968 + }, + { + "epoch": 5.21, + "learning_rate": 2.4276139410187668e-05, + "loss": 0.0011, + "step": 1969 + }, + { + "epoch": 5.21, + "learning_rate": 2.4262734584450403e-05, + "loss": 0.4875, + "step": 1970 + }, + { + "epoch": 5.21, + "learning_rate": 2.4249329758713138e-05, + "loss": 0.2791, + "step": 1971 + }, + { + "epoch": 5.22, + "learning_rate": 2.4235924932975873e-05, + "loss": 0.0011, + "step": 1972 + }, + { + "epoch": 5.22, + "learning_rate": 2.4222520107238608e-05, + "loss": 0.0011, + "step": 1973 + }, + { + "epoch": 5.22, + "learning_rate": 2.420911528150134e-05, + "loss": 0.0976, + "step": 1974 + }, + { + "epoch": 5.22, + "learning_rate": 2.4195710455764075e-05, + "loss": 0.3669, + "step": 1975 + }, + { + "epoch": 5.23, + "learning_rate": 2.418230563002681e-05, + "loss": 0.0022, + "step": 1976 + }, + { + "epoch": 5.23, + "learning_rate": 2.4168900804289545e-05, + "loss": 0.0015, + "step": 1977 + }, + { + "epoch": 5.23, + "learning_rate": 2.415549597855228e-05, + "loss": 0.0014, + "step": 1978 + }, + { + "epoch": 5.24, + "learning_rate": 2.4142091152815012e-05, + "loss": 0.0433, + "step": 1979 + }, + { + "epoch": 5.24, + "learning_rate": 2.4128686327077747e-05, + "loss": 0.0019, + "step": 1980 + }, + { + "epoch": 5.24, + "learning_rate": 2.4115281501340483e-05, + "loss": 0.0007, + "step": 1981 + }, + { + "epoch": 5.24, + "learning_rate": 2.4101876675603218e-05, + "loss": 0.0136, + "step": 1982 + }, + { + "epoch": 5.25, + "learning_rate": 2.4088471849865953e-05, + "loss": 0.1744, + "step": 1983 + }, + { + "epoch": 5.25, + "learning_rate": 2.4075067024128688e-05, + "loss": 0.1557, + "step": 1984 + }, + { + "epoch": 5.25, + "learning_rate": 2.4061662198391423e-05, + "loss": 0.1192, + "step": 1985 + }, + { + "epoch": 5.25, + "learning_rate": 2.4048257372654158e-05, + "loss": 0.0406, + "step": 1986 + }, + { + "epoch": 5.26, + "learning_rate": 2.4034852546916893e-05, + "loss": 0.2243, + "step": 1987 + }, + { + "epoch": 5.26, + "learning_rate": 2.402144772117963e-05, + "loss": 0.0021, + "step": 1988 + }, + { + "epoch": 5.26, + "learning_rate": 2.400804289544236e-05, + "loss": 0.002, + "step": 1989 + }, + { + "epoch": 5.26, + "learning_rate": 2.3994638069705095e-05, + "loss": 0.077, + "step": 1990 + }, + { + "epoch": 5.27, + "learning_rate": 2.398123324396783e-05, + "loss": 0.0378, + "step": 1991 + }, + { + "epoch": 5.27, + "learning_rate": 2.3967828418230566e-05, + "loss": 0.012, + "step": 1992 + }, + { + "epoch": 5.27, + "learning_rate": 2.39544235924933e-05, + "loss": 0.1386, + "step": 1993 + }, + { + "epoch": 5.28, + "learning_rate": 2.3941018766756032e-05, + "loss": 0.002, + "step": 1994 + }, + { + "epoch": 5.28, + "learning_rate": 2.3927613941018768e-05, + "loss": 0.0008, + "step": 1995 + }, + { + "epoch": 5.28, + "learning_rate": 2.3914209115281503e-05, + "loss": 0.0021, + "step": 1996 + }, + { + "epoch": 5.28, + "learning_rate": 2.3900804289544238e-05, + "loss": 0.022, + "step": 1997 + }, + { + "epoch": 5.29, + "learning_rate": 2.3887399463806973e-05, + "loss": 0.0015, + "step": 1998 + }, + { + "epoch": 5.29, + "learning_rate": 2.3873994638069705e-05, + "loss": 0.1486, + "step": 1999 + }, + { + "epoch": 5.29, + "learning_rate": 2.386058981233244e-05, + "loss": 0.2586, + "step": 2000 + }, + { + "epoch": 5.29, + "learning_rate": 2.3847184986595175e-05, + "loss": 0.0088, + "step": 2001 + }, + { + "epoch": 5.3, + "learning_rate": 2.383378016085791e-05, + "loss": 0.0044, + "step": 2002 + }, + { + "epoch": 5.3, + "learning_rate": 2.3820375335120645e-05, + "loss": 0.0015, + "step": 2003 + }, + { + "epoch": 5.3, + "learning_rate": 2.3806970509383377e-05, + "loss": 0.0008, + "step": 2004 + }, + { + "epoch": 5.3, + "learning_rate": 2.3793565683646112e-05, + "loss": 0.212, + "step": 2005 + }, + { + "epoch": 5.31, + "learning_rate": 2.3780160857908847e-05, + "loss": 0.0005, + "step": 2006 + }, + { + "epoch": 5.31, + "learning_rate": 2.3766756032171582e-05, + "loss": 0.1511, + "step": 2007 + }, + { + "epoch": 5.31, + "learning_rate": 2.3753351206434318e-05, + "loss": 0.0023, + "step": 2008 + }, + { + "epoch": 5.31, + "learning_rate": 2.3739946380697053e-05, + "loss": 0.1544, + "step": 2009 + }, + { + "epoch": 5.32, + "learning_rate": 2.3726541554959784e-05, + "loss": 0.0306, + "step": 2010 + }, + { + "epoch": 5.32, + "learning_rate": 2.371313672922252e-05, + "loss": 0.1005, + "step": 2011 + }, + { + "epoch": 5.32, + "learning_rate": 2.3699731903485255e-05, + "loss": 0.0744, + "step": 2012 + }, + { + "epoch": 5.33, + "learning_rate": 2.368632707774799e-05, + "loss": 0.0622, + "step": 2013 + }, + { + "epoch": 5.33, + "learning_rate": 2.3672922252010725e-05, + "loss": 0.0192, + "step": 2014 + }, + { + "epoch": 5.33, + "learning_rate": 2.365951742627346e-05, + "loss": 0.016, + "step": 2015 + }, + { + "epoch": 5.33, + "learning_rate": 2.3646112600536195e-05, + "loss": 0.293, + "step": 2016 + }, + { + "epoch": 5.34, + "learning_rate": 2.363270777479893e-05, + "loss": 0.0114, + "step": 2017 + }, + { + "epoch": 5.34, + "learning_rate": 2.3619302949061665e-05, + "loss": 0.1254, + "step": 2018 + }, + { + "epoch": 5.34, + "learning_rate": 2.36058981233244e-05, + "loss": 0.0638, + "step": 2019 + }, + { + "epoch": 5.34, + "learning_rate": 2.3592493297587132e-05, + "loss": 0.0192, + "step": 2020 + }, + { + "epoch": 5.35, + "learning_rate": 2.3579088471849867e-05, + "loss": 0.0069, + "step": 2021 + }, + { + "epoch": 5.35, + "learning_rate": 2.3565683646112603e-05, + "loss": 0.0573, + "step": 2022 + }, + { + "epoch": 5.35, + "learning_rate": 2.3552278820375338e-05, + "loss": 0.0039, + "step": 2023 + }, + { + "epoch": 5.35, + "learning_rate": 2.3538873994638073e-05, + "loss": 0.0079, + "step": 2024 + }, + { + "epoch": 5.36, + "learning_rate": 2.3525469168900805e-05, + "loss": 0.0063, + "step": 2025 + }, + { + "epoch": 5.36, + "learning_rate": 2.351206434316354e-05, + "loss": 0.0032, + "step": 2026 + }, + { + "epoch": 5.36, + "learning_rate": 2.3498659517426275e-05, + "loss": 0.1018, + "step": 2027 + }, + { + "epoch": 5.37, + "learning_rate": 2.348525469168901e-05, + "loss": 0.0017, + "step": 2028 + }, + { + "epoch": 5.37, + "learning_rate": 2.3471849865951745e-05, + "loss": 0.0203, + "step": 2029 + }, + { + "epoch": 5.37, + "learning_rate": 2.3458445040214477e-05, + "loss": 0.0008, + "step": 2030 + }, + { + "epoch": 5.37, + "learning_rate": 2.3445040214477212e-05, + "loss": 0.3145, + "step": 2031 + }, + { + "epoch": 5.38, + "learning_rate": 2.3431635388739947e-05, + "loss": 0.2476, + "step": 2032 + }, + { + "epoch": 5.38, + "learning_rate": 2.3418230563002682e-05, + "loss": 0.0573, + "step": 2033 + }, + { + "epoch": 5.38, + "learning_rate": 2.3404825737265417e-05, + "loss": 0.0008, + "step": 2034 + }, + { + "epoch": 5.38, + "learning_rate": 2.339142091152815e-05, + "loss": 0.0636, + "step": 2035 + }, + { + "epoch": 5.39, + "learning_rate": 2.3378016085790884e-05, + "loss": 0.001, + "step": 2036 + }, + { + "epoch": 5.39, + "learning_rate": 2.336461126005362e-05, + "loss": 0.0079, + "step": 2037 + }, + { + "epoch": 5.39, + "learning_rate": 2.3351206434316355e-05, + "loss": 0.0752, + "step": 2038 + }, + { + "epoch": 5.39, + "learning_rate": 2.333780160857909e-05, + "loss": 0.0024, + "step": 2039 + }, + { + "epoch": 5.4, + "learning_rate": 2.332439678284182e-05, + "loss": 0.0061, + "step": 2040 + }, + { + "epoch": 5.4, + "learning_rate": 2.3310991957104557e-05, + "loss": 0.2882, + "step": 2041 + }, + { + "epoch": 5.4, + "learning_rate": 2.329758713136729e-05, + "loss": 0.02, + "step": 2042 + }, + { + "epoch": 5.4, + "learning_rate": 2.3284182305630027e-05, + "loss": 0.018, + "step": 2043 + }, + { + "epoch": 5.41, + "learning_rate": 2.3270777479892762e-05, + "loss": 0.0125, + "step": 2044 + }, + { + "epoch": 5.41, + "learning_rate": 2.3257372654155497e-05, + "loss": 0.0007, + "step": 2045 + }, + { + "epoch": 5.41, + "learning_rate": 2.3243967828418232e-05, + "loss": 0.3849, + "step": 2046 + }, + { + "epoch": 5.42, + "learning_rate": 2.3230563002680967e-05, + "loss": 0.0011, + "step": 2047 + }, + { + "epoch": 5.42, + "learning_rate": 2.3217158176943702e-05, + "loss": 0.1235, + "step": 2048 + }, + { + "epoch": 5.42, + "learning_rate": 2.3203753351206438e-05, + "loss": 0.0006, + "step": 2049 + }, + { + "epoch": 5.42, + "learning_rate": 2.319034852546917e-05, + "loss": 0.0006, + "step": 2050 + }, + { + "epoch": 5.43, + "learning_rate": 2.3176943699731904e-05, + "loss": 0.0011, + "step": 2051 + }, + { + "epoch": 5.43, + "learning_rate": 2.316353887399464e-05, + "loss": 0.0011, + "step": 2052 + }, + { + "epoch": 5.43, + "learning_rate": 2.3150134048257375e-05, + "loss": 0.0032, + "step": 2053 + }, + { + "epoch": 5.43, + "learning_rate": 2.313672922252011e-05, + "loss": 0.0718, + "step": 2054 + }, + { + "epoch": 5.44, + "learning_rate": 2.312332439678284e-05, + "loss": 0.0052, + "step": 2055 + }, + { + "epoch": 5.44, + "learning_rate": 2.3109919571045577e-05, + "loss": 0.0026, + "step": 2056 + }, + { + "epoch": 5.44, + "learning_rate": 2.3096514745308312e-05, + "loss": 0.1854, + "step": 2057 + }, + { + "epoch": 5.44, + "learning_rate": 2.3083109919571047e-05, + "loss": 0.0008, + "step": 2058 + }, + { + "epoch": 5.45, + "learning_rate": 2.3069705093833782e-05, + "loss": 0.2671, + "step": 2059 + }, + { + "epoch": 5.45, + "learning_rate": 2.3056300268096514e-05, + "loss": 0.0064, + "step": 2060 + }, + { + "epoch": 5.45, + "learning_rate": 2.304289544235925e-05, + "loss": 0.0012, + "step": 2061 + }, + { + "epoch": 5.46, + "learning_rate": 2.3029490616621984e-05, + "loss": 0.0006, + "step": 2062 + }, + { + "epoch": 5.46, + "learning_rate": 2.301608579088472e-05, + "loss": 0.0008, + "step": 2063 + }, + { + "epoch": 5.46, + "learning_rate": 2.3002680965147454e-05, + "loss": 0.4998, + "step": 2064 + }, + { + "epoch": 5.46, + "learning_rate": 2.2989276139410186e-05, + "loss": 0.0037, + "step": 2065 + }, + { + "epoch": 5.47, + "learning_rate": 2.297587131367292e-05, + "loss": 0.0029, + "step": 2066 + }, + { + "epoch": 5.47, + "learning_rate": 2.2962466487935656e-05, + "loss": 0.043, + "step": 2067 + }, + { + "epoch": 5.47, + "learning_rate": 2.294906166219839e-05, + "loss": 0.0451, + "step": 2068 + }, + { + "epoch": 5.47, + "learning_rate": 2.2935656836461127e-05, + "loss": 0.0009, + "step": 2069 + }, + { + "epoch": 5.48, + "learning_rate": 2.2922252010723862e-05, + "loss": 0.002, + "step": 2070 + }, + { + "epoch": 5.48, + "learning_rate": 2.2908847184986597e-05, + "loss": 0.2744, + "step": 2071 + }, + { + "epoch": 5.48, + "learning_rate": 2.2895442359249332e-05, + "loss": 0.0146, + "step": 2072 + }, + { + "epoch": 5.48, + "learning_rate": 2.2882037533512067e-05, + "loss": 0.0011, + "step": 2073 + }, + { + "epoch": 5.49, + "learning_rate": 2.2868632707774802e-05, + "loss": 0.0421, + "step": 2074 + }, + { + "epoch": 5.49, + "learning_rate": 2.2855227882037537e-05, + "loss": 0.1518, + "step": 2075 + }, + { + "epoch": 5.49, + "learning_rate": 2.284182305630027e-05, + "loss": 0.0072, + "step": 2076 + }, + { + "epoch": 5.49, + "learning_rate": 2.2828418230563004e-05, + "loss": 0.2781, + "step": 2077 + }, + { + "epoch": 5.5, + "learning_rate": 2.281501340482574e-05, + "loss": 0.004, + "step": 2078 + }, + { + "epoch": 5.5, + "learning_rate": 2.2801608579088475e-05, + "loss": 0.1029, + "step": 2079 + }, + { + "epoch": 5.5, + "learning_rate": 2.278820375335121e-05, + "loss": 0.0526, + "step": 2080 + }, + { + "epoch": 5.51, + "learning_rate": 2.277479892761394e-05, + "loss": 0.0011, + "step": 2081 + }, + { + "epoch": 5.51, + "learning_rate": 2.2761394101876677e-05, + "loss": 0.0139, + "step": 2082 + }, + { + "epoch": 5.51, + "learning_rate": 2.274798927613941e-05, + "loss": 0.0509, + "step": 2083 + }, + { + "epoch": 5.51, + "learning_rate": 2.2734584450402147e-05, + "loss": 0.0042, + "step": 2084 + }, + { + "epoch": 5.52, + "learning_rate": 2.2721179624664882e-05, + "loss": 0.1964, + "step": 2085 + }, + { + "epoch": 5.52, + "learning_rate": 2.2707774798927614e-05, + "loss": 0.0083, + "step": 2086 + }, + { + "epoch": 5.52, + "learning_rate": 2.269436997319035e-05, + "loss": 0.101, + "step": 2087 + }, + { + "epoch": 5.52, + "learning_rate": 2.2680965147453084e-05, + "loss": 0.0094, + "step": 2088 + }, + { + "epoch": 5.53, + "learning_rate": 2.266756032171582e-05, + "loss": 0.1433, + "step": 2089 + }, + { + "epoch": 5.53, + "learning_rate": 2.2654155495978554e-05, + "loss": 0.0091, + "step": 2090 + }, + { + "epoch": 5.53, + "learning_rate": 2.2640750670241286e-05, + "loss": 0.0018, + "step": 2091 + }, + { + "epoch": 5.53, + "learning_rate": 2.262734584450402e-05, + "loss": 0.001, + "step": 2092 + }, + { + "epoch": 5.54, + "learning_rate": 2.2613941018766756e-05, + "loss": 0.3507, + "step": 2093 + }, + { + "epoch": 5.54, + "learning_rate": 2.260053619302949e-05, + "loss": 0.0527, + "step": 2094 + }, + { + "epoch": 5.54, + "learning_rate": 2.2587131367292226e-05, + "loss": 0.0015, + "step": 2095 + }, + { + "epoch": 5.54, + "learning_rate": 2.2573726541554958e-05, + "loss": 0.0195, + "step": 2096 + }, + { + "epoch": 5.55, + "learning_rate": 2.2560321715817693e-05, + "loss": 0.0007, + "step": 2097 + }, + { + "epoch": 5.55, + "learning_rate": 2.254691689008043e-05, + "loss": 0.001, + "step": 2098 + }, + { + "epoch": 5.55, + "learning_rate": 2.2533512064343164e-05, + "loss": 0.2777, + "step": 2099 + }, + { + "epoch": 5.56, + "learning_rate": 2.25201072386059e-05, + "loss": 0.0657, + "step": 2100 + }, + { + "epoch": 5.56, + "learning_rate": 2.2506702412868634e-05, + "loss": 0.159, + "step": 2101 + }, + { + "epoch": 5.56, + "learning_rate": 2.249329758713137e-05, + "loss": 0.0342, + "step": 2102 + }, + { + "epoch": 5.56, + "learning_rate": 2.2479892761394104e-05, + "loss": 0.3001, + "step": 2103 + }, + { + "epoch": 5.57, + "learning_rate": 2.246648793565684e-05, + "loss": 0.0028, + "step": 2104 + }, + { + "epoch": 5.57, + "learning_rate": 2.2453083109919574e-05, + "loss": 0.0191, + "step": 2105 + }, + { + "epoch": 5.57, + "learning_rate": 2.2439678284182306e-05, + "loss": 0.0012, + "step": 2106 + }, + { + "epoch": 5.57, + "learning_rate": 2.242627345844504e-05, + "loss": 0.2619, + "step": 2107 + }, + { + "epoch": 5.58, + "learning_rate": 2.2412868632707776e-05, + "loss": 0.001, + "step": 2108 + }, + { + "epoch": 5.58, + "learning_rate": 2.239946380697051e-05, + "loss": 0.094, + "step": 2109 + }, + { + "epoch": 5.58, + "learning_rate": 2.2386058981233247e-05, + "loss": 0.003, + "step": 2110 + }, + { + "epoch": 5.58, + "learning_rate": 2.237265415549598e-05, + "loss": 0.0528, + "step": 2111 + }, + { + "epoch": 5.59, + "learning_rate": 2.2359249329758714e-05, + "loss": 0.1252, + "step": 2112 + }, + { + "epoch": 5.59, + "learning_rate": 2.234584450402145e-05, + "loss": 0.0039, + "step": 2113 + }, + { + "epoch": 5.59, + "learning_rate": 2.2332439678284184e-05, + "loss": 0.0913, + "step": 2114 + }, + { + "epoch": 5.6, + "learning_rate": 2.231903485254692e-05, + "loss": 0.0023, + "step": 2115 + }, + { + "epoch": 5.6, + "learning_rate": 2.230563002680965e-05, + "loss": 0.0047, + "step": 2116 + }, + { + "epoch": 5.6, + "learning_rate": 2.2292225201072386e-05, + "loss": 0.0688, + "step": 2117 + }, + { + "epoch": 5.6, + "learning_rate": 2.227882037533512e-05, + "loss": 0.0013, + "step": 2118 + }, + { + "epoch": 5.61, + "learning_rate": 2.2265415549597856e-05, + "loss": 0.0012, + "step": 2119 + }, + { + "epoch": 5.61, + "learning_rate": 2.225201072386059e-05, + "loss": 0.0048, + "step": 2120 + }, + { + "epoch": 5.61, + "learning_rate": 2.2238605898123323e-05, + "loss": 0.0011, + "step": 2121 + }, + { + "epoch": 5.61, + "learning_rate": 2.2225201072386058e-05, + "loss": 0.0009, + "step": 2122 + }, + { + "epoch": 5.62, + "learning_rate": 2.2211796246648793e-05, + "loss": 0.0015, + "step": 2123 + }, + { + "epoch": 5.62, + "learning_rate": 2.2198391420911528e-05, + "loss": 0.0077, + "step": 2124 + }, + { + "epoch": 5.62, + "learning_rate": 2.2184986595174263e-05, + "loss": 0.0007, + "step": 2125 + }, + { + "epoch": 5.62, + "learning_rate": 2.2171581769437e-05, + "loss": 0.0007, + "step": 2126 + }, + { + "epoch": 5.63, + "learning_rate": 2.2158176943699734e-05, + "loss": 0.0506, + "step": 2127 + }, + { + "epoch": 5.63, + "learning_rate": 2.214477211796247e-05, + "loss": 0.0016, + "step": 2128 + }, + { + "epoch": 5.63, + "learning_rate": 2.2131367292225204e-05, + "loss": 0.0005, + "step": 2129 + }, + { + "epoch": 5.63, + "learning_rate": 2.211796246648794e-05, + "loss": 0.0049, + "step": 2130 + }, + { + "epoch": 5.64, + "learning_rate": 2.210455764075067e-05, + "loss": 0.0305, + "step": 2131 + }, + { + "epoch": 5.64, + "learning_rate": 2.2091152815013406e-05, + "loss": 0.0448, + "step": 2132 + }, + { + "epoch": 5.64, + "learning_rate": 2.207774798927614e-05, + "loss": 0.5391, + "step": 2133 + }, + { + "epoch": 5.65, + "learning_rate": 2.2064343163538876e-05, + "loss": 0.0005, + "step": 2134 + }, + { + "epoch": 5.65, + "learning_rate": 2.205093833780161e-05, + "loss": 0.0141, + "step": 2135 + }, + { + "epoch": 5.65, + "learning_rate": 2.2037533512064346e-05, + "loss": 0.3613, + "step": 2136 + }, + { + "epoch": 5.65, + "learning_rate": 2.2024128686327078e-05, + "loss": 0.0013, + "step": 2137 + }, + { + "epoch": 5.66, + "learning_rate": 2.2010723860589813e-05, + "loss": 0.002, + "step": 2138 + }, + { + "epoch": 5.66, + "learning_rate": 2.199731903485255e-05, + "loss": 0.0207, + "step": 2139 + }, + { + "epoch": 5.66, + "learning_rate": 2.1983914209115284e-05, + "loss": 0.0004, + "step": 2140 + }, + { + "epoch": 5.66, + "learning_rate": 2.197050938337802e-05, + "loss": 0.0022, + "step": 2141 + }, + { + "epoch": 5.67, + "learning_rate": 2.195710455764075e-05, + "loss": 0.5076, + "step": 2142 + }, + { + "epoch": 5.67, + "learning_rate": 2.1943699731903486e-05, + "loss": 0.0016, + "step": 2143 + }, + { + "epoch": 5.67, + "learning_rate": 2.193029490616622e-05, + "loss": 0.0014, + "step": 2144 + }, + { + "epoch": 5.67, + "learning_rate": 2.1916890080428956e-05, + "loss": 0.0101, + "step": 2145 + }, + { + "epoch": 5.68, + "learning_rate": 2.190348525469169e-05, + "loss": 0.0048, + "step": 2146 + }, + { + "epoch": 5.68, + "learning_rate": 2.1890080428954423e-05, + "loss": 0.001, + "step": 2147 + }, + { + "epoch": 5.68, + "learning_rate": 2.1876675603217158e-05, + "loss": 0.0004, + "step": 2148 + }, + { + "epoch": 5.69, + "learning_rate": 2.1863270777479893e-05, + "loss": 0.2627, + "step": 2149 + }, + { + "epoch": 5.69, + "learning_rate": 2.1849865951742628e-05, + "loss": 0.0013, + "step": 2150 + }, + { + "epoch": 5.69, + "learning_rate": 2.1836461126005363e-05, + "loss": 0.0074, + "step": 2151 + }, + { + "epoch": 5.69, + "learning_rate": 2.1823056300268095e-05, + "loss": 0.0238, + "step": 2152 + }, + { + "epoch": 5.7, + "learning_rate": 2.180965147453083e-05, + "loss": 0.0013, + "step": 2153 + }, + { + "epoch": 5.7, + "learning_rate": 2.1796246648793565e-05, + "loss": 0.0005, + "step": 2154 + }, + { + "epoch": 5.7, + "learning_rate": 2.17828418230563e-05, + "loss": 0.0006, + "step": 2155 + }, + { + "epoch": 5.7, + "learning_rate": 2.1769436997319036e-05, + "loss": 0.0442, + "step": 2156 + }, + { + "epoch": 5.71, + "learning_rate": 2.175603217158177e-05, + "loss": 0.0007, + "step": 2157 + }, + { + "epoch": 5.71, + "learning_rate": 2.1742627345844506e-05, + "loss": 0.0032, + "step": 2158 + }, + { + "epoch": 5.71, + "learning_rate": 2.172922252010724e-05, + "loss": 0.005, + "step": 2159 + }, + { + "epoch": 5.71, + "learning_rate": 2.1715817694369976e-05, + "loss": 0.0005, + "step": 2160 + }, + { + "epoch": 5.72, + "learning_rate": 2.170241286863271e-05, + "loss": 0.0005, + "step": 2161 + }, + { + "epoch": 5.72, + "learning_rate": 2.1689008042895443e-05, + "loss": 0.2023, + "step": 2162 + }, + { + "epoch": 5.72, + "learning_rate": 2.1675603217158178e-05, + "loss": 0.3146, + "step": 2163 + }, + { + "epoch": 5.72, + "learning_rate": 2.1662198391420913e-05, + "loss": 0.0035, + "step": 2164 + }, + { + "epoch": 5.73, + "learning_rate": 2.164879356568365e-05, + "loss": 0.141, + "step": 2165 + }, + { + "epoch": 5.73, + "learning_rate": 2.1635388739946383e-05, + "loss": 0.0005, + "step": 2166 + }, + { + "epoch": 5.73, + "learning_rate": 2.1621983914209115e-05, + "loss": 0.0013, + "step": 2167 + }, + { + "epoch": 5.74, + "learning_rate": 2.160857908847185e-05, + "loss": 0.0484, + "step": 2168 + }, + { + "epoch": 5.74, + "learning_rate": 2.1595174262734585e-05, + "loss": 0.0109, + "step": 2169 + }, + { + "epoch": 5.74, + "learning_rate": 2.158176943699732e-05, + "loss": 0.3307, + "step": 2170 + }, + { + "epoch": 5.74, + "learning_rate": 2.1568364611260056e-05, + "loss": 0.0013, + "step": 2171 + }, + { + "epoch": 5.75, + "learning_rate": 2.1554959785522787e-05, + "loss": 0.0005, + "step": 2172 + }, + { + "epoch": 5.75, + "learning_rate": 2.1541554959785523e-05, + "loss": 0.0004, + "step": 2173 + }, + { + "epoch": 5.75, + "learning_rate": 2.1528150134048258e-05, + "loss": 0.0744, + "step": 2174 + }, + { + "epoch": 5.75, + "learning_rate": 2.1514745308310993e-05, + "loss": 0.0008, + "step": 2175 + }, + { + "epoch": 5.76, + "learning_rate": 2.1501340482573728e-05, + "loss": 0.0013, + "step": 2176 + }, + { + "epoch": 5.76, + "learning_rate": 2.148793565683646e-05, + "loss": 0.0006, + "step": 2177 + }, + { + "epoch": 5.76, + "learning_rate": 2.1474530831099195e-05, + "loss": 0.055, + "step": 2178 + }, + { + "epoch": 5.76, + "learning_rate": 2.146112600536193e-05, + "loss": 0.0013, + "step": 2179 + }, + { + "epoch": 5.77, + "learning_rate": 2.1447721179624665e-05, + "loss": 0.0022, + "step": 2180 + }, + { + "epoch": 5.77, + "learning_rate": 2.14343163538874e-05, + "loss": 0.0009, + "step": 2181 + }, + { + "epoch": 5.77, + "learning_rate": 2.1420911528150135e-05, + "loss": 0.0011, + "step": 2182 + }, + { + "epoch": 5.78, + "learning_rate": 2.140750670241287e-05, + "loss": 0.0676, + "step": 2183 + }, + { + "epoch": 5.78, + "learning_rate": 2.1394101876675606e-05, + "loss": 0.4583, + "step": 2184 + }, + { + "epoch": 5.78, + "learning_rate": 2.138069705093834e-05, + "loss": 0.0009, + "step": 2185 + }, + { + "epoch": 5.78, + "learning_rate": 2.1367292225201076e-05, + "loss": 0.002, + "step": 2186 + }, + { + "epoch": 5.79, + "learning_rate": 2.1353887399463808e-05, + "loss": 0.7625, + "step": 2187 + }, + { + "epoch": 5.79, + "learning_rate": 2.1340482573726543e-05, + "loss": 0.4912, + "step": 2188 + }, + { + "epoch": 5.79, + "learning_rate": 2.1327077747989278e-05, + "loss": 0.0283, + "step": 2189 + }, + { + "epoch": 5.79, + "learning_rate": 2.1313672922252013e-05, + "loss": 0.0011, + "step": 2190 + }, + { + "epoch": 5.8, + "learning_rate": 2.1300268096514748e-05, + "loss": 0.0013, + "step": 2191 + }, + { + "epoch": 5.8, + "learning_rate": 2.128686327077748e-05, + "loss": 0.3384, + "step": 2192 + }, + { + "epoch": 5.8, + "learning_rate": 2.1273458445040215e-05, + "loss": 0.4533, + "step": 2193 + }, + { + "epoch": 5.8, + "learning_rate": 2.126005361930295e-05, + "loss": 0.0039, + "step": 2194 + }, + { + "epoch": 5.81, + "learning_rate": 2.1246648793565685e-05, + "loss": 0.0038, + "step": 2195 + }, + { + "epoch": 5.81, + "learning_rate": 2.123324396782842e-05, + "loss": 0.0318, + "step": 2196 + }, + { + "epoch": 5.81, + "learning_rate": 2.1219839142091156e-05, + "loss": 0.0045, + "step": 2197 + }, + { + "epoch": 5.81, + "learning_rate": 2.1206434316353887e-05, + "loss": 0.3134, + "step": 2198 + }, + { + "epoch": 5.82, + "learning_rate": 2.1193029490616622e-05, + "loss": 0.0011, + "step": 2199 + }, + { + "epoch": 5.82, + "learning_rate": 2.1179624664879358e-05, + "loss": 0.0008, + "step": 2200 + }, + { + "epoch": 5.82, + "learning_rate": 2.1166219839142093e-05, + "loss": 0.0009, + "step": 2201 + }, + { + "epoch": 5.83, + "learning_rate": 2.1152815013404828e-05, + "loss": 0.0083, + "step": 2202 + }, + { + "epoch": 5.83, + "learning_rate": 2.113941018766756e-05, + "loss": 0.0196, + "step": 2203 + }, + { + "epoch": 5.83, + "learning_rate": 2.1126005361930295e-05, + "loss": 0.0063, + "step": 2204 + }, + { + "epoch": 5.83, + "learning_rate": 2.111260053619303e-05, + "loss": 0.0064, + "step": 2205 + }, + { + "epoch": 5.84, + "learning_rate": 2.1099195710455765e-05, + "loss": 0.0143, + "step": 2206 + }, + { + "epoch": 5.84, + "learning_rate": 2.10857908847185e-05, + "loss": 0.0012, + "step": 2207 + }, + { + "epoch": 5.84, + "learning_rate": 2.1072386058981232e-05, + "loss": 0.0033, + "step": 2208 + }, + { + "epoch": 5.84, + "learning_rate": 2.1058981233243967e-05, + "loss": 0.0014, + "step": 2209 + }, + { + "epoch": 5.85, + "learning_rate": 2.1045576407506702e-05, + "loss": 0.0219, + "step": 2210 + }, + { + "epoch": 5.85, + "learning_rate": 2.1032171581769437e-05, + "loss": 0.3033, + "step": 2211 + }, + { + "epoch": 5.85, + "learning_rate": 2.1018766756032172e-05, + "loss": 0.0711, + "step": 2212 + }, + { + "epoch": 5.85, + "learning_rate": 2.1005361930294907e-05, + "loss": 0.0051, + "step": 2213 + }, + { + "epoch": 5.86, + "learning_rate": 2.0991957104557643e-05, + "loss": 0.026, + "step": 2214 + }, + { + "epoch": 5.86, + "learning_rate": 2.0978552278820378e-05, + "loss": 0.0024, + "step": 2215 + }, + { + "epoch": 5.86, + "learning_rate": 2.0965147453083113e-05, + "loss": 0.3622, + "step": 2216 + }, + { + "epoch": 5.87, + "learning_rate": 2.0951742627345848e-05, + "loss": 0.0009, + "step": 2217 + }, + { + "epoch": 5.87, + "learning_rate": 2.093833780160858e-05, + "loss": 0.0019, + "step": 2218 + }, + { + "epoch": 5.87, + "learning_rate": 2.0924932975871315e-05, + "loss": 0.3412, + "step": 2219 + }, + { + "epoch": 5.87, + "learning_rate": 2.091152815013405e-05, + "loss": 0.0197, + "step": 2220 + }, + { + "epoch": 5.88, + "learning_rate": 2.0898123324396785e-05, + "loss": 0.4229, + "step": 2221 + }, + { + "epoch": 5.88, + "learning_rate": 2.088471849865952e-05, + "loss": 0.0014, + "step": 2222 + }, + { + "epoch": 5.88, + "learning_rate": 2.0871313672922252e-05, + "loss": 0.0183, + "step": 2223 + }, + { + "epoch": 5.88, + "learning_rate": 2.0857908847184987e-05, + "loss": 0.2005, + "step": 2224 + }, + { + "epoch": 5.89, + "learning_rate": 2.0844504021447722e-05, + "loss": 0.0122, + "step": 2225 + }, + { + "epoch": 5.89, + "learning_rate": 2.0831099195710457e-05, + "loss": 0.1178, + "step": 2226 + }, + { + "epoch": 5.89, + "learning_rate": 2.0817694369973193e-05, + "loss": 0.0105, + "step": 2227 + }, + { + "epoch": 5.89, + "learning_rate": 2.0804289544235924e-05, + "loss": 0.0328, + "step": 2228 + }, + { + "epoch": 5.9, + "learning_rate": 2.079088471849866e-05, + "loss": 0.0087, + "step": 2229 + }, + { + "epoch": 5.9, + "learning_rate": 2.0777479892761395e-05, + "loss": 0.0288, + "step": 2230 + }, + { + "epoch": 5.9, + "learning_rate": 2.076407506702413e-05, + "loss": 0.0017, + "step": 2231 + }, + { + "epoch": 5.9, + "learning_rate": 2.0750670241286865e-05, + "loss": 0.002, + "step": 2232 + }, + { + "epoch": 5.91, + "learning_rate": 2.0737265415549597e-05, + "loss": 0.0024, + "step": 2233 + }, + { + "epoch": 5.91, + "learning_rate": 2.072386058981233e-05, + "loss": 0.0008, + "step": 2234 + }, + { + "epoch": 5.91, + "learning_rate": 2.0710455764075067e-05, + "loss": 0.002, + "step": 2235 + }, + { + "epoch": 5.92, + "learning_rate": 2.0697050938337802e-05, + "loss": 0.0052, + "step": 2236 + }, + { + "epoch": 5.92, + "learning_rate": 2.0683646112600537e-05, + "loss": 0.0058, + "step": 2237 + }, + { + "epoch": 5.92, + "learning_rate": 2.0670241286863272e-05, + "loss": 0.0013, + "step": 2238 + }, + { + "epoch": 5.92, + "learning_rate": 2.0656836461126007e-05, + "loss": 0.0158, + "step": 2239 + }, + { + "epoch": 5.93, + "learning_rate": 2.0643431635388742e-05, + "loss": 0.229, + "step": 2240 + }, + { + "epoch": 5.93, + "learning_rate": 2.0630026809651478e-05, + "loss": 0.1844, + "step": 2241 + }, + { + "epoch": 5.93, + "learning_rate": 2.0616621983914213e-05, + "loss": 0.2905, + "step": 2242 + }, + { + "epoch": 5.93, + "learning_rate": 2.0603217158176944e-05, + "loss": 0.0059, + "step": 2243 + }, + { + "epoch": 5.94, + "learning_rate": 2.058981233243968e-05, + "loss": 0.0007, + "step": 2244 + }, + { + "epoch": 5.94, + "learning_rate": 2.0576407506702415e-05, + "loss": 0.1638, + "step": 2245 + }, + { + "epoch": 5.94, + "learning_rate": 2.056300268096515e-05, + "loss": 0.1195, + "step": 2246 + }, + { + "epoch": 5.94, + "learning_rate": 2.0549597855227885e-05, + "loss": 0.0015, + "step": 2247 + }, + { + "epoch": 5.95, + "learning_rate": 2.0536193029490617e-05, + "loss": 0.0013, + "step": 2248 + }, + { + "epoch": 5.95, + "learning_rate": 2.0522788203753352e-05, + "loss": 0.5152, + "step": 2249 + }, + { + "epoch": 5.95, + "learning_rate": 2.0509383378016087e-05, + "loss": 0.0315, + "step": 2250 + }, + { + "epoch": 5.96, + "learning_rate": 2.0495978552278822e-05, + "loss": 0.1213, + "step": 2251 + }, + { + "epoch": 5.96, + "learning_rate": 2.0482573726541557e-05, + "loss": 0.0006, + "step": 2252 + }, + { + "epoch": 5.96, + "learning_rate": 2.046916890080429e-05, + "loss": 0.0011, + "step": 2253 + }, + { + "epoch": 5.96, + "learning_rate": 2.0455764075067024e-05, + "loss": 0.2546, + "step": 2254 + }, + { + "epoch": 5.97, + "learning_rate": 2.044235924932976e-05, + "loss": 0.1259, + "step": 2255 + }, + { + "epoch": 5.97, + "learning_rate": 2.0428954423592494e-05, + "loss": 0.0179, + "step": 2256 + }, + { + "epoch": 5.97, + "learning_rate": 2.041554959785523e-05, + "loss": 0.0257, + "step": 2257 + }, + { + "epoch": 5.97, + "learning_rate": 2.0402144772117965e-05, + "loss": 0.0092, + "step": 2258 + }, + { + "epoch": 5.98, + "learning_rate": 2.0388739946380696e-05, + "loss": 0.3231, + "step": 2259 + }, + { + "epoch": 5.98, + "learning_rate": 2.037533512064343e-05, + "loss": 0.0084, + "step": 2260 + }, + { + "epoch": 5.98, + "learning_rate": 2.0361930294906167e-05, + "loss": 0.0517, + "step": 2261 + }, + { + "epoch": 5.98, + "learning_rate": 2.0348525469168902e-05, + "loss": 0.0012, + "step": 2262 + }, + { + "epoch": 5.99, + "learning_rate": 2.0335120643431637e-05, + "loss": 0.0045, + "step": 2263 + }, + { + "epoch": 5.99, + "learning_rate": 2.032171581769437e-05, + "loss": 0.0012, + "step": 2264 + }, + { + "epoch": 5.99, + "learning_rate": 2.0308310991957104e-05, + "loss": 0.0032, + "step": 2265 + }, + { + "epoch": 5.99, + "learning_rate": 2.029490616621984e-05, + "loss": 0.0038, + "step": 2266 + }, + { + "epoch": 6.0, + "learning_rate": 2.0281501340482574e-05, + "loss": 0.2731, + "step": 2267 + }, + { + "epoch": 6.0, + "learning_rate": 2.026809651474531e-05, + "loss": 0.3122, + "step": 2268 + }, + { + "epoch": 6.0, + "eval_f1": 0.7762762762762763, + "eval_loss": 1.0977023839950562, + "eval_runtime": 1.8624, + "eval_samples_per_second": 812.383, + "eval_steps_per_second": 51.009, + "step": 2268 + }, + { + "epoch": 6.0, + "learning_rate": 2.0254691689008044e-05, + "loss": 0.0035, + "step": 2269 + }, + { + "epoch": 6.01, + "learning_rate": 2.024128686327078e-05, + "loss": 0.0709, + "step": 2270 + }, + { + "epoch": 6.01, + "learning_rate": 2.0227882037533515e-05, + "loss": 0.1548, + "step": 2271 + }, + { + "epoch": 6.01, + "learning_rate": 2.021447721179625e-05, + "loss": 0.0035, + "step": 2272 + }, + { + "epoch": 6.01, + "learning_rate": 2.0201072386058985e-05, + "loss": 0.0009, + "step": 2273 + }, + { + "epoch": 6.02, + "learning_rate": 2.0187667560321717e-05, + "loss": 0.0061, + "step": 2274 + }, + { + "epoch": 6.02, + "learning_rate": 2.0174262734584452e-05, + "loss": 0.3586, + "step": 2275 + }, + { + "epoch": 6.02, + "learning_rate": 2.0160857908847187e-05, + "loss": 0.0019, + "step": 2276 + }, + { + "epoch": 6.02, + "learning_rate": 2.0147453083109922e-05, + "loss": 0.0077, + "step": 2277 + }, + { + "epoch": 6.03, + "learning_rate": 2.0134048257372657e-05, + "loss": 0.0022, + "step": 2278 + }, + { + "epoch": 6.03, + "learning_rate": 2.012064343163539e-05, + "loss": 0.0021, + "step": 2279 + }, + { + "epoch": 6.03, + "learning_rate": 2.0107238605898124e-05, + "loss": 0.0022, + "step": 2280 + }, + { + "epoch": 6.03, + "learning_rate": 2.009383378016086e-05, + "loss": 0.1387, + "step": 2281 + }, + { + "epoch": 6.04, + "learning_rate": 2.0080428954423594e-05, + "loss": 0.0034, + "step": 2282 + }, + { + "epoch": 6.04, + "learning_rate": 2.006702412868633e-05, + "loss": 0.0128, + "step": 2283 + }, + { + "epoch": 6.04, + "learning_rate": 2.005361930294906e-05, + "loss": 0.0201, + "step": 2284 + }, + { + "epoch": 6.04, + "learning_rate": 2.0040214477211796e-05, + "loss": 0.0072, + "step": 2285 + }, + { + "epoch": 6.05, + "learning_rate": 2.002680965147453e-05, + "loss": 0.0031, + "step": 2286 + }, + { + "epoch": 6.05, + "learning_rate": 2.0013404825737267e-05, + "loss": 0.0273, + "step": 2287 + }, + { + "epoch": 6.05, + "learning_rate": 2e-05, + "loss": 0.0679, + "step": 2288 + }, + { + "epoch": 6.06, + "learning_rate": 1.9986595174262733e-05, + "loss": 0.0012, + "step": 2289 + }, + { + "epoch": 6.06, + "learning_rate": 1.997319034852547e-05, + "loss": 0.0045, + "step": 2290 + }, + { + "epoch": 6.06, + "learning_rate": 1.9959785522788204e-05, + "loss": 0.0289, + "step": 2291 + }, + { + "epoch": 6.06, + "learning_rate": 1.994638069705094e-05, + "loss": 0.1421, + "step": 2292 + }, + { + "epoch": 6.07, + "learning_rate": 1.9932975871313674e-05, + "loss": 0.0007, + "step": 2293 + }, + { + "epoch": 6.07, + "learning_rate": 1.9919571045576406e-05, + "loss": 0.0037, + "step": 2294 + }, + { + "epoch": 6.07, + "learning_rate": 1.990616621983914e-05, + "loss": 0.1566, + "step": 2295 + }, + { + "epoch": 6.07, + "learning_rate": 1.9892761394101876e-05, + "loss": 0.0008, + "step": 2296 + }, + { + "epoch": 6.08, + "learning_rate": 1.987935656836461e-05, + "loss": 0.0008, + "step": 2297 + }, + { + "epoch": 6.08, + "learning_rate": 1.9865951742627346e-05, + "loss": 0.1319, + "step": 2298 + }, + { + "epoch": 6.08, + "learning_rate": 1.985254691689008e-05, + "loss": 0.0007, + "step": 2299 + }, + { + "epoch": 6.08, + "learning_rate": 1.9839142091152816e-05, + "loss": 0.0048, + "step": 2300 + }, + { + "epoch": 6.09, + "learning_rate": 1.982573726541555e-05, + "loss": 0.223, + "step": 2301 + }, + { + "epoch": 6.09, + "learning_rate": 1.9812332439678287e-05, + "loss": 0.2188, + "step": 2302 + }, + { + "epoch": 6.09, + "learning_rate": 1.9798927613941022e-05, + "loss": 0.0209, + "step": 2303 + }, + { + "epoch": 6.1, + "learning_rate": 1.9785522788203754e-05, + "loss": 0.001, + "step": 2304 + }, + { + "epoch": 6.1, + "learning_rate": 1.977211796246649e-05, + "loss": 0.0006, + "step": 2305 + }, + { + "epoch": 6.1, + "learning_rate": 1.9758713136729224e-05, + "loss": 0.0011, + "step": 2306 + }, + { + "epoch": 6.1, + "learning_rate": 1.974530831099196e-05, + "loss": 0.0008, + "step": 2307 + }, + { + "epoch": 6.11, + "learning_rate": 1.9731903485254694e-05, + "loss": 0.0015, + "step": 2308 + }, + { + "epoch": 6.11, + "learning_rate": 1.9718498659517426e-05, + "loss": 0.0024, + "step": 2309 + }, + { + "epoch": 6.11, + "learning_rate": 1.970509383378016e-05, + "loss": 0.0006, + "step": 2310 + }, + { + "epoch": 6.11, + "learning_rate": 1.9691689008042896e-05, + "loss": 0.0007, + "step": 2311 + }, + { + "epoch": 6.12, + "learning_rate": 1.967828418230563e-05, + "loss": 0.0026, + "step": 2312 + }, + { + "epoch": 6.12, + "learning_rate": 1.9664879356568366e-05, + "loss": 0.0019, + "step": 2313 + }, + { + "epoch": 6.12, + "learning_rate": 1.9651474530831098e-05, + "loss": 0.0055, + "step": 2314 + }, + { + "epoch": 6.12, + "learning_rate": 1.9638069705093833e-05, + "loss": 0.0023, + "step": 2315 + }, + { + "epoch": 6.13, + "learning_rate": 1.962466487935657e-05, + "loss": 0.0011, + "step": 2316 + }, + { + "epoch": 6.13, + "learning_rate": 1.9611260053619303e-05, + "loss": 0.0407, + "step": 2317 + }, + { + "epoch": 6.13, + "learning_rate": 1.959785522788204e-05, + "loss": 0.046, + "step": 2318 + }, + { + "epoch": 6.13, + "learning_rate": 1.958445040214477e-05, + "loss": 0.0062, + "step": 2319 + }, + { + "epoch": 6.14, + "learning_rate": 1.9571045576407505e-05, + "loss": 0.0007, + "step": 2320 + }, + { + "epoch": 6.14, + "learning_rate": 1.955764075067024e-05, + "loss": 0.3377, + "step": 2321 + }, + { + "epoch": 6.14, + "learning_rate": 1.9544235924932976e-05, + "loss": 0.4294, + "step": 2322 + }, + { + "epoch": 6.15, + "learning_rate": 1.953083109919571e-05, + "loss": 0.0016, + "step": 2323 + }, + { + "epoch": 6.15, + "learning_rate": 1.9517426273458446e-05, + "loss": 0.0283, + "step": 2324 + }, + { + "epoch": 6.15, + "learning_rate": 1.950402144772118e-05, + "loss": 0.0005, + "step": 2325 + }, + { + "epoch": 6.15, + "learning_rate": 1.9490616621983916e-05, + "loss": 0.0011, + "step": 2326 + }, + { + "epoch": 6.16, + "learning_rate": 1.947721179624665e-05, + "loss": 0.0237, + "step": 2327 + }, + { + "epoch": 6.16, + "learning_rate": 1.9463806970509387e-05, + "loss": 0.0581, + "step": 2328 + }, + { + "epoch": 6.16, + "learning_rate": 1.945040214477212e-05, + "loss": 0.0905, + "step": 2329 + }, + { + "epoch": 6.16, + "learning_rate": 1.9436997319034853e-05, + "loss": 0.0012, + "step": 2330 + }, + { + "epoch": 6.17, + "learning_rate": 1.942359249329759e-05, + "loss": 0.0053, + "step": 2331 + }, + { + "epoch": 6.17, + "learning_rate": 1.9410187667560324e-05, + "loss": 0.0225, + "step": 2332 + }, + { + "epoch": 6.17, + "learning_rate": 1.939678284182306e-05, + "loss": 0.0374, + "step": 2333 + }, + { + "epoch": 6.17, + "learning_rate": 1.9383378016085794e-05, + "loss": 0.0006, + "step": 2334 + }, + { + "epoch": 6.18, + "learning_rate": 1.9369973190348526e-05, + "loss": 0.0008, + "step": 2335 + }, + { + "epoch": 6.18, + "learning_rate": 1.935656836461126e-05, + "loss": 0.0006, + "step": 2336 + }, + { + "epoch": 6.18, + "learning_rate": 1.9343163538873996e-05, + "loss": 0.0011, + "step": 2337 + }, + { + "epoch": 6.19, + "learning_rate": 1.932975871313673e-05, + "loss": 0.0014, + "step": 2338 + }, + { + "epoch": 6.19, + "learning_rate": 1.9316353887399466e-05, + "loss": 0.0006, + "step": 2339 + }, + { + "epoch": 6.19, + "learning_rate": 1.9302949061662198e-05, + "loss": 0.0504, + "step": 2340 + }, + { + "epoch": 6.19, + "learning_rate": 1.9289544235924933e-05, + "loss": 0.005, + "step": 2341 + }, + { + "epoch": 6.2, + "learning_rate": 1.9276139410187668e-05, + "loss": 0.2673, + "step": 2342 + }, + { + "epoch": 6.2, + "learning_rate": 1.9262734584450403e-05, + "loss": 0.2173, + "step": 2343 + }, + { + "epoch": 6.2, + "learning_rate": 1.924932975871314e-05, + "loss": 0.0235, + "step": 2344 + }, + { + "epoch": 6.2, + "learning_rate": 1.923592493297587e-05, + "loss": 0.0092, + "step": 2345 + }, + { + "epoch": 6.21, + "learning_rate": 1.9222520107238605e-05, + "loss": 0.0088, + "step": 2346 + }, + { + "epoch": 6.21, + "learning_rate": 1.920911528150134e-05, + "loss": 0.0506, + "step": 2347 + }, + { + "epoch": 6.21, + "learning_rate": 1.9195710455764076e-05, + "loss": 0.0033, + "step": 2348 + }, + { + "epoch": 6.21, + "learning_rate": 1.918230563002681e-05, + "loss": 0.0006, + "step": 2349 + }, + { + "epoch": 6.22, + "learning_rate": 1.9168900804289542e-05, + "loss": 0.0021, + "step": 2350 + }, + { + "epoch": 6.22, + "learning_rate": 1.9155495978552278e-05, + "loss": 0.0006, + "step": 2351 + }, + { + "epoch": 6.22, + "learning_rate": 1.9142091152815013e-05, + "loss": 0.0041, + "step": 2352 + }, + { + "epoch": 6.22, + "learning_rate": 1.9128686327077748e-05, + "loss": 0.0209, + "step": 2353 + }, + { + "epoch": 6.23, + "learning_rate": 1.9115281501340483e-05, + "loss": 0.0145, + "step": 2354 + }, + { + "epoch": 6.23, + "learning_rate": 1.9101876675603218e-05, + "loss": 0.0303, + "step": 2355 + }, + { + "epoch": 6.23, + "learning_rate": 1.9088471849865953e-05, + "loss": 0.0951, + "step": 2356 + }, + { + "epoch": 6.24, + "learning_rate": 1.907506702412869e-05, + "loss": 0.042, + "step": 2357 + }, + { + "epoch": 6.24, + "learning_rate": 1.9061662198391424e-05, + "loss": 0.0009, + "step": 2358 + }, + { + "epoch": 6.24, + "learning_rate": 1.904825737265416e-05, + "loss": 0.0006, + "step": 2359 + }, + { + "epoch": 6.24, + "learning_rate": 1.903485254691689e-05, + "loss": 0.0057, + "step": 2360 + }, + { + "epoch": 6.25, + "learning_rate": 1.9021447721179626e-05, + "loss": 0.0578, + "step": 2361 + }, + { + "epoch": 6.25, + "learning_rate": 1.900804289544236e-05, + "loss": 0.3295, + "step": 2362 + }, + { + "epoch": 6.25, + "learning_rate": 1.8994638069705096e-05, + "loss": 0.0005, + "step": 2363 + }, + { + "epoch": 6.25, + "learning_rate": 1.898123324396783e-05, + "loss": 0.0009, + "step": 2364 + }, + { + "epoch": 6.26, + "learning_rate": 1.8967828418230563e-05, + "loss": 0.0036, + "step": 2365 + }, + { + "epoch": 6.26, + "learning_rate": 1.8954423592493298e-05, + "loss": 0.0029, + "step": 2366 + }, + { + "epoch": 6.26, + "learning_rate": 1.8941018766756033e-05, + "loss": 0.1468, + "step": 2367 + }, + { + "epoch": 6.26, + "learning_rate": 1.8927613941018768e-05, + "loss": 0.3163, + "step": 2368 + }, + { + "epoch": 6.27, + "learning_rate": 1.8914209115281503e-05, + "loss": 0.169, + "step": 2369 + }, + { + "epoch": 6.27, + "learning_rate": 1.8900804289544235e-05, + "loss": 0.0493, + "step": 2370 + }, + { + "epoch": 6.27, + "learning_rate": 1.888739946380697e-05, + "loss": 0.0003, + "step": 2371 + }, + { + "epoch": 6.28, + "learning_rate": 1.8873994638069705e-05, + "loss": 0.0015, + "step": 2372 + }, + { + "epoch": 6.28, + "learning_rate": 1.886058981233244e-05, + "loss": 0.258, + "step": 2373 + }, + { + "epoch": 6.28, + "learning_rate": 1.8847184986595175e-05, + "loss": 0.002, + "step": 2374 + }, + { + "epoch": 6.28, + "learning_rate": 1.8833780160857907e-05, + "loss": 0.0008, + "step": 2375 + }, + { + "epoch": 6.29, + "learning_rate": 1.8820375335120642e-05, + "loss": 0.0003, + "step": 2376 + }, + { + "epoch": 6.29, + "learning_rate": 1.8806970509383377e-05, + "loss": 0.0014, + "step": 2377 + }, + { + "epoch": 6.29, + "learning_rate": 1.8793565683646113e-05, + "loss": 0.0068, + "step": 2378 + }, + { + "epoch": 6.29, + "learning_rate": 1.8780160857908848e-05, + "loss": 0.39, + "step": 2379 + }, + { + "epoch": 6.3, + "learning_rate": 1.8766756032171583e-05, + "loss": 0.0046, + "step": 2380 + }, + { + "epoch": 6.3, + "learning_rate": 1.8753351206434318e-05, + "loss": 0.0008, + "step": 2381 + }, + { + "epoch": 6.3, + "learning_rate": 1.8739946380697053e-05, + "loss": 0.0007, + "step": 2382 + }, + { + "epoch": 6.3, + "learning_rate": 1.8726541554959788e-05, + "loss": 0.0013, + "step": 2383 + }, + { + "epoch": 6.31, + "learning_rate": 1.8713136729222523e-05, + "loss": 0.0055, + "step": 2384 + }, + { + "epoch": 6.31, + "learning_rate": 1.869973190348526e-05, + "loss": 0.0014, + "step": 2385 + }, + { + "epoch": 6.31, + "learning_rate": 1.868632707774799e-05, + "loss": 0.0039, + "step": 2386 + }, + { + "epoch": 6.31, + "learning_rate": 1.8672922252010725e-05, + "loss": 0.0995, + "step": 2387 + }, + { + "epoch": 6.32, + "learning_rate": 1.865951742627346e-05, + "loss": 0.0015, + "step": 2388 + }, + { + "epoch": 6.32, + "learning_rate": 1.8646112600536196e-05, + "loss": 0.083, + "step": 2389 + }, + { + "epoch": 6.32, + "learning_rate": 1.863270777479893e-05, + "loss": 0.0044, + "step": 2390 + }, + { + "epoch": 6.33, + "learning_rate": 1.8619302949061662e-05, + "loss": 0.0105, + "step": 2391 + }, + { + "epoch": 6.33, + "learning_rate": 1.8605898123324398e-05, + "loss": 0.0005, + "step": 2392 + }, + { + "epoch": 6.33, + "learning_rate": 1.8592493297587133e-05, + "loss": 0.0014, + "step": 2393 + }, + { + "epoch": 6.33, + "learning_rate": 1.8579088471849868e-05, + "loss": 0.0114, + "step": 2394 + }, + { + "epoch": 6.34, + "learning_rate": 1.8565683646112603e-05, + "loss": 0.0334, + "step": 2395 + }, + { + "epoch": 6.34, + "learning_rate": 1.8552278820375335e-05, + "loss": 0.006, + "step": 2396 + }, + { + "epoch": 6.34, + "learning_rate": 1.853887399463807e-05, + "loss": 0.3124, + "step": 2397 + }, + { + "epoch": 6.34, + "learning_rate": 1.8525469168900805e-05, + "loss": 0.2324, + "step": 2398 + }, + { + "epoch": 6.35, + "learning_rate": 1.851206434316354e-05, + "loss": 0.0889, + "step": 2399 + }, + { + "epoch": 6.35, + "learning_rate": 1.8498659517426275e-05, + "loss": 0.0705, + "step": 2400 + }, + { + "epoch": 6.35, + "learning_rate": 1.8485254691689007e-05, + "loss": 0.0012, + "step": 2401 + }, + { + "epoch": 6.35, + "learning_rate": 1.8471849865951742e-05, + "loss": 0.0033, + "step": 2402 + }, + { + "epoch": 6.36, + "learning_rate": 1.8458445040214477e-05, + "loss": 0.0021, + "step": 2403 + }, + { + "epoch": 6.36, + "learning_rate": 1.8445040214477212e-05, + "loss": 0.0005, + "step": 2404 + }, + { + "epoch": 6.36, + "learning_rate": 1.8431635388739948e-05, + "loss": 0.0011, + "step": 2405 + }, + { + "epoch": 6.37, + "learning_rate": 1.841823056300268e-05, + "loss": 0.0006, + "step": 2406 + }, + { + "epoch": 6.37, + "learning_rate": 1.8404825737265414e-05, + "loss": 0.0726, + "step": 2407 + }, + { + "epoch": 6.37, + "learning_rate": 1.839142091152815e-05, + "loss": 0.0009, + "step": 2408 + }, + { + "epoch": 6.37, + "learning_rate": 1.8378016085790885e-05, + "loss": 0.0007, + "step": 2409 + }, + { + "epoch": 6.38, + "learning_rate": 1.836461126005362e-05, + "loss": 0.0859, + "step": 2410 + }, + { + "epoch": 6.38, + "learning_rate": 1.8351206434316355e-05, + "loss": 0.0011, + "step": 2411 + }, + { + "epoch": 6.38, + "learning_rate": 1.833780160857909e-05, + "loss": 0.6542, + "step": 2412 + }, + { + "epoch": 6.38, + "learning_rate": 1.8324396782841825e-05, + "loss": 0.2733, + "step": 2413 + }, + { + "epoch": 6.39, + "learning_rate": 1.831099195710456e-05, + "loss": 0.2825, + "step": 2414 + }, + { + "epoch": 6.39, + "learning_rate": 1.8297587131367295e-05, + "loss": 0.0012, + "step": 2415 + }, + { + "epoch": 6.39, + "learning_rate": 1.8284182305630027e-05, + "loss": 0.1404, + "step": 2416 + }, + { + "epoch": 6.39, + "learning_rate": 1.8270777479892762e-05, + "loss": 0.0006, + "step": 2417 + }, + { + "epoch": 6.4, + "learning_rate": 1.8257372654155497e-05, + "loss": 0.0007, + "step": 2418 + }, + { + "epoch": 6.4, + "learning_rate": 1.8243967828418233e-05, + "loss": 0.1429, + "step": 2419 + }, + { + "epoch": 6.4, + "learning_rate": 1.8230563002680968e-05, + "loss": 0.0008, + "step": 2420 + }, + { + "epoch": 6.4, + "learning_rate": 1.82171581769437e-05, + "loss": 0.0062, + "step": 2421 + }, + { + "epoch": 6.41, + "learning_rate": 1.8203753351206435e-05, + "loss": 0.0071, + "step": 2422 + }, + { + "epoch": 6.41, + "learning_rate": 1.819034852546917e-05, + "loss": 0.0017, + "step": 2423 + }, + { + "epoch": 6.41, + "learning_rate": 1.8176943699731905e-05, + "loss": 0.084, + "step": 2424 + }, + { + "epoch": 6.42, + "learning_rate": 1.816353887399464e-05, + "loss": 0.0011, + "step": 2425 + }, + { + "epoch": 6.42, + "learning_rate": 1.8150134048257372e-05, + "loss": 0.0255, + "step": 2426 + }, + { + "epoch": 6.42, + "learning_rate": 1.8136729222520107e-05, + "loss": 0.0009, + "step": 2427 + }, + { + "epoch": 6.42, + "learning_rate": 1.8123324396782842e-05, + "loss": 0.3105, + "step": 2428 + }, + { + "epoch": 6.43, + "learning_rate": 1.8109919571045577e-05, + "loss": 0.0046, + "step": 2429 + }, + { + "epoch": 6.43, + "learning_rate": 1.8096514745308312e-05, + "loss": 0.0089, + "step": 2430 + }, + { + "epoch": 6.43, + "learning_rate": 1.8083109919571044e-05, + "loss": 0.1176, + "step": 2431 + }, + { + "epoch": 6.43, + "learning_rate": 1.806970509383378e-05, + "loss": 0.0235, + "step": 2432 + }, + { + "epoch": 6.44, + "learning_rate": 1.8056300268096514e-05, + "loss": 0.029, + "step": 2433 + }, + { + "epoch": 6.44, + "learning_rate": 1.804289544235925e-05, + "loss": 0.0013, + "step": 2434 + }, + { + "epoch": 6.44, + "learning_rate": 1.8029490616621985e-05, + "loss": 0.0075, + "step": 2435 + }, + { + "epoch": 6.44, + "learning_rate": 1.801608579088472e-05, + "loss": 0.1744, + "step": 2436 + }, + { + "epoch": 6.45, + "learning_rate": 1.8002680965147455e-05, + "loss": 0.0017, + "step": 2437 + }, + { + "epoch": 6.45, + "learning_rate": 1.798927613941019e-05, + "loss": 0.0188, + "step": 2438 + }, + { + "epoch": 6.45, + "learning_rate": 1.7975871313672925e-05, + "loss": 0.0232, + "step": 2439 + }, + { + "epoch": 6.46, + "learning_rate": 1.796246648793566e-05, + "loss": 0.1459, + "step": 2440 + }, + { + "epoch": 6.46, + "learning_rate": 1.7949061662198392e-05, + "loss": 0.0007, + "step": 2441 + }, + { + "epoch": 6.46, + "learning_rate": 1.7935656836461127e-05, + "loss": 0.0005, + "step": 2442 + }, + { + "epoch": 6.46, + "learning_rate": 1.7922252010723862e-05, + "loss": 0.0012, + "step": 2443 + }, + { + "epoch": 6.47, + "learning_rate": 1.7908847184986597e-05, + "loss": 0.0041, + "step": 2444 + }, + { + "epoch": 6.47, + "learning_rate": 1.7895442359249332e-05, + "loss": 0.4884, + "step": 2445 + }, + { + "epoch": 6.47, + "learning_rate": 1.7882037533512068e-05, + "loss": 0.0017, + "step": 2446 + }, + { + "epoch": 6.47, + "learning_rate": 1.78686327077748e-05, + "loss": 0.0566, + "step": 2447 + }, + { + "epoch": 6.48, + "learning_rate": 1.7855227882037534e-05, + "loss": 0.012, + "step": 2448 + }, + { + "epoch": 6.48, + "learning_rate": 1.784182305630027e-05, + "loss": 0.001, + "step": 2449 + }, + { + "epoch": 6.48, + "learning_rate": 1.7828418230563005e-05, + "loss": 0.0028, + "step": 2450 + }, + { + "epoch": 6.48, + "learning_rate": 1.781501340482574e-05, + "loss": 0.4622, + "step": 2451 + }, + { + "epoch": 6.49, + "learning_rate": 1.780160857908847e-05, + "loss": 0.0042, + "step": 2452 + }, + { + "epoch": 6.49, + "learning_rate": 1.7788203753351207e-05, + "loss": 0.0176, + "step": 2453 + }, + { + "epoch": 6.49, + "learning_rate": 1.7774798927613942e-05, + "loss": 0.0012, + "step": 2454 + }, + { + "epoch": 6.49, + "learning_rate": 1.7761394101876677e-05, + "loss": 0.0344, + "step": 2455 + }, + { + "epoch": 6.5, + "learning_rate": 1.7747989276139412e-05, + "loss": 0.1278, + "step": 2456 + }, + { + "epoch": 6.5, + "learning_rate": 1.7734584450402144e-05, + "loss": 0.0017, + "step": 2457 + }, + { + "epoch": 6.5, + "learning_rate": 1.772117962466488e-05, + "loss": 0.0044, + "step": 2458 + }, + { + "epoch": 6.51, + "learning_rate": 1.7707774798927614e-05, + "loss": 0.0016, + "step": 2459 + }, + { + "epoch": 6.51, + "learning_rate": 1.769436997319035e-05, + "loss": 0.0799, + "step": 2460 + }, + { + "epoch": 6.51, + "learning_rate": 1.7680965147453084e-05, + "loss": 0.0066, + "step": 2461 + }, + { + "epoch": 6.51, + "learning_rate": 1.7667560321715816e-05, + "loss": 0.1607, + "step": 2462 + }, + { + "epoch": 6.52, + "learning_rate": 1.765415549597855e-05, + "loss": 0.0742, + "step": 2463 + }, + { + "epoch": 6.52, + "learning_rate": 1.7640750670241286e-05, + "loss": 0.0005, + "step": 2464 + }, + { + "epoch": 6.52, + "learning_rate": 1.762734584450402e-05, + "loss": 0.0006, + "step": 2465 + }, + { + "epoch": 6.52, + "learning_rate": 1.7613941018766757e-05, + "loss": 0.0019, + "step": 2466 + }, + { + "epoch": 6.53, + "learning_rate": 1.7600536193029492e-05, + "loss": 0.0009, + "step": 2467 + }, + { + "epoch": 6.53, + "learning_rate": 1.7587131367292227e-05, + "loss": 0.0023, + "step": 2468 + }, + { + "epoch": 6.53, + "learning_rate": 1.7573726541554962e-05, + "loss": 0.0202, + "step": 2469 + }, + { + "epoch": 6.53, + "learning_rate": 1.7560321715817697e-05, + "loss": 0.0223, + "step": 2470 + }, + { + "epoch": 6.54, + "learning_rate": 1.7546916890080432e-05, + "loss": 0.0009, + "step": 2471 + }, + { + "epoch": 6.54, + "learning_rate": 1.7533512064343164e-05, + "loss": 0.0014, + "step": 2472 + }, + { + "epoch": 6.54, + "learning_rate": 1.75201072386059e-05, + "loss": 0.0514, + "step": 2473 + }, + { + "epoch": 6.54, + "learning_rate": 1.7506702412868634e-05, + "loss": 0.0013, + "step": 2474 + }, + { + "epoch": 6.55, + "learning_rate": 1.749329758713137e-05, + "loss": 0.0087, + "step": 2475 + }, + { + "epoch": 6.55, + "learning_rate": 1.7479892761394105e-05, + "loss": 0.0035, + "step": 2476 + }, + { + "epoch": 6.55, + "learning_rate": 1.7466487935656836e-05, + "loss": 0.0397, + "step": 2477 + }, + { + "epoch": 6.56, + "learning_rate": 1.745308310991957e-05, + "loss": 0.0021, + "step": 2478 + }, + { + "epoch": 6.56, + "learning_rate": 1.7439678284182307e-05, + "loss": 0.052, + "step": 2479 + }, + { + "epoch": 6.56, + "learning_rate": 1.742627345844504e-05, + "loss": 0.0027, + "step": 2480 + }, + { + "epoch": 6.56, + "learning_rate": 1.7412868632707777e-05, + "loss": 0.001, + "step": 2481 + }, + { + "epoch": 6.57, + "learning_rate": 1.739946380697051e-05, + "loss": 0.2899, + "step": 2482 + }, + { + "epoch": 6.57, + "learning_rate": 1.7386058981233244e-05, + "loss": 0.0007, + "step": 2483 + }, + { + "epoch": 6.57, + "learning_rate": 1.737265415549598e-05, + "loss": 0.0704, + "step": 2484 + }, + { + "epoch": 6.57, + "learning_rate": 1.7359249329758714e-05, + "loss": 0.001, + "step": 2485 + }, + { + "epoch": 6.58, + "learning_rate": 1.734584450402145e-05, + "loss": 0.057, + "step": 2486 + }, + { + "epoch": 6.58, + "learning_rate": 1.733243967828418e-05, + "loss": 0.0002, + "step": 2487 + }, + { + "epoch": 6.58, + "learning_rate": 1.7319034852546916e-05, + "loss": 0.0064, + "step": 2488 + }, + { + "epoch": 6.58, + "learning_rate": 1.730563002680965e-05, + "loss": 0.0638, + "step": 2489 + }, + { + "epoch": 6.59, + "learning_rate": 1.7292225201072386e-05, + "loss": 0.0006, + "step": 2490 + }, + { + "epoch": 6.59, + "learning_rate": 1.727882037533512e-05, + "loss": 0.0142, + "step": 2491 + }, + { + "epoch": 6.59, + "learning_rate": 1.7265415549597856e-05, + "loss": 0.0015, + "step": 2492 + }, + { + "epoch": 6.6, + "learning_rate": 1.725201072386059e-05, + "loss": 0.0949, + "step": 2493 + }, + { + "epoch": 6.6, + "learning_rate": 1.7238605898123327e-05, + "loss": 0.0004, + "step": 2494 + }, + { + "epoch": 6.6, + "learning_rate": 1.7225201072386062e-05, + "loss": 0.0111, + "step": 2495 + }, + { + "epoch": 6.6, + "learning_rate": 1.7211796246648797e-05, + "loss": 0.0215, + "step": 2496 + }, + { + "epoch": 6.61, + "learning_rate": 1.719839142091153e-05, + "loss": 0.266, + "step": 2497 + }, + { + "epoch": 6.61, + "learning_rate": 1.7184986595174264e-05, + "loss": 0.4487, + "step": 2498 + }, + { + "epoch": 6.61, + "learning_rate": 1.7171581769437e-05, + "loss": 0.0021, + "step": 2499 + }, + { + "epoch": 6.61, + "learning_rate": 1.7158176943699734e-05, + "loss": 0.0004, + "step": 2500 + }, + { + "epoch": 6.62, + "learning_rate": 1.714477211796247e-05, + "loss": 0.0004, + "step": 2501 + }, + { + "epoch": 6.62, + "learning_rate": 1.71313672922252e-05, + "loss": 0.0011, + "step": 2502 + }, + { + "epoch": 6.62, + "learning_rate": 1.7117962466487936e-05, + "loss": 0.0006, + "step": 2503 + }, + { + "epoch": 6.62, + "learning_rate": 1.710455764075067e-05, + "loss": 0.1005, + "step": 2504 + }, + { + "epoch": 6.63, + "learning_rate": 1.7091152815013406e-05, + "loss": 0.0472, + "step": 2505 + }, + { + "epoch": 6.63, + "learning_rate": 1.707774798927614e-05, + "loss": 0.0004, + "step": 2506 + }, + { + "epoch": 6.63, + "learning_rate": 1.7064343163538877e-05, + "loss": 0.0162, + "step": 2507 + }, + { + "epoch": 6.63, + "learning_rate": 1.705093833780161e-05, + "loss": 0.004, + "step": 2508 + }, + { + "epoch": 6.64, + "learning_rate": 1.7037533512064344e-05, + "loss": 0.0007, + "step": 2509 + }, + { + "epoch": 6.64, + "learning_rate": 1.702412868632708e-05, + "loss": 0.1447, + "step": 2510 + }, + { + "epoch": 6.64, + "learning_rate": 1.7010723860589814e-05, + "loss": 0.0006, + "step": 2511 + }, + { + "epoch": 6.65, + "learning_rate": 1.699731903485255e-05, + "loss": 0.0002, + "step": 2512 + }, + { + "epoch": 6.65, + "learning_rate": 1.698391420911528e-05, + "loss": 0.0004, + "step": 2513 + }, + { + "epoch": 6.65, + "learning_rate": 1.6970509383378016e-05, + "loss": 0.0017, + "step": 2514 + }, + { + "epoch": 6.65, + "learning_rate": 1.695710455764075e-05, + "loss": 0.4581, + "step": 2515 + }, + { + "epoch": 6.66, + "learning_rate": 1.6943699731903486e-05, + "loss": 0.0005, + "step": 2516 + }, + { + "epoch": 6.66, + "learning_rate": 1.693029490616622e-05, + "loss": 0.0043, + "step": 2517 + }, + { + "epoch": 6.66, + "learning_rate": 1.6916890080428953e-05, + "loss": 0.0005, + "step": 2518 + }, + { + "epoch": 6.66, + "learning_rate": 1.6903485254691688e-05, + "loss": 0.0002, + "step": 2519 + }, + { + "epoch": 6.67, + "learning_rate": 1.6890080428954423e-05, + "loss": 0.0005, + "step": 2520 + }, + { + "epoch": 6.67, + "learning_rate": 1.687667560321716e-05, + "loss": 0.0037, + "step": 2521 + }, + { + "epoch": 6.67, + "learning_rate": 1.6863270777479893e-05, + "loss": 0.0003, + "step": 2522 + }, + { + "epoch": 6.67, + "learning_rate": 1.684986595174263e-05, + "loss": 0.0019, + "step": 2523 + }, + { + "epoch": 6.68, + "learning_rate": 1.6836461126005364e-05, + "loss": 0.0023, + "step": 2524 + }, + { + "epoch": 6.68, + "learning_rate": 1.68230563002681e-05, + "loss": 0.0004, + "step": 2525 + }, + { + "epoch": 6.68, + "learning_rate": 1.6809651474530834e-05, + "loss": 0.3317, + "step": 2526 + }, + { + "epoch": 6.69, + "learning_rate": 1.679624664879357e-05, + "loss": 0.0004, + "step": 2527 + }, + { + "epoch": 6.69, + "learning_rate": 1.67828418230563e-05, + "loss": 0.002, + "step": 2528 + }, + { + "epoch": 6.69, + "learning_rate": 1.6769436997319036e-05, + "loss": 0.0003, + "step": 2529 + }, + { + "epoch": 6.69, + "learning_rate": 1.675603217158177e-05, + "loss": 0.0007, + "step": 2530 + }, + { + "epoch": 6.7, + "learning_rate": 1.6742627345844506e-05, + "loss": 0.0012, + "step": 2531 + }, + { + "epoch": 6.7, + "learning_rate": 1.672922252010724e-05, + "loss": 0.024, + "step": 2532 + }, + { + "epoch": 6.7, + "learning_rate": 1.6715817694369973e-05, + "loss": 0.0041, + "step": 2533 + }, + { + "epoch": 6.7, + "learning_rate": 1.6702412868632708e-05, + "loss": 0.1821, + "step": 2534 + }, + { + "epoch": 6.71, + "learning_rate": 1.6689008042895443e-05, + "loss": 0.0004, + "step": 2535 + }, + { + "epoch": 6.71, + "learning_rate": 1.667560321715818e-05, + "loss": 0.0293, + "step": 2536 + }, + { + "epoch": 6.71, + "learning_rate": 1.6662198391420914e-05, + "loss": 0.0005, + "step": 2537 + }, + { + "epoch": 6.71, + "learning_rate": 1.6648793565683645e-05, + "loss": 0.0043, + "step": 2538 + }, + { + "epoch": 6.72, + "learning_rate": 1.663538873994638e-05, + "loss": 0.0112, + "step": 2539 + }, + { + "epoch": 6.72, + "learning_rate": 1.6621983914209116e-05, + "loss": 0.0003, + "step": 2540 + }, + { + "epoch": 6.72, + "learning_rate": 1.660857908847185e-05, + "loss": 0.0005, + "step": 2541 + }, + { + "epoch": 6.72, + "learning_rate": 1.6595174262734586e-05, + "loss": 0.4617, + "step": 2542 + }, + { + "epoch": 6.73, + "learning_rate": 1.6581769436997318e-05, + "loss": 0.0004, + "step": 2543 + }, + { + "epoch": 6.73, + "learning_rate": 1.6568364611260053e-05, + "loss": 0.1932, + "step": 2544 + }, + { + "epoch": 6.73, + "learning_rate": 1.6554959785522788e-05, + "loss": 0.0004, + "step": 2545 + }, + { + "epoch": 6.74, + "learning_rate": 1.6541554959785523e-05, + "loss": 0.0785, + "step": 2546 + }, + { + "epoch": 6.74, + "learning_rate": 1.6528150134048258e-05, + "loss": 0.0882, + "step": 2547 + }, + { + "epoch": 6.74, + "learning_rate": 1.651474530831099e-05, + "loss": 0.3937, + "step": 2548 + }, + { + "epoch": 6.74, + "learning_rate": 1.6501340482573725e-05, + "loss": 0.3401, + "step": 2549 + }, + { + "epoch": 6.75, + "learning_rate": 1.648793565683646e-05, + "loss": 0.026, + "step": 2550 + }, + { + "epoch": 6.75, + "learning_rate": 1.6474530831099195e-05, + "loss": 0.1959, + "step": 2551 + }, + { + "epoch": 6.75, + "learning_rate": 1.646112600536193e-05, + "loss": 0.0022, + "step": 2552 + }, + { + "epoch": 6.75, + "learning_rate": 1.6447721179624666e-05, + "loss": 0.0012, + "step": 2553 + }, + { + "epoch": 6.76, + "learning_rate": 1.64343163538874e-05, + "loss": 0.0064, + "step": 2554 + }, + { + "epoch": 6.76, + "learning_rate": 1.6420911528150136e-05, + "loss": 0.0105, + "step": 2555 + }, + { + "epoch": 6.76, + "learning_rate": 1.640750670241287e-05, + "loss": 0.0008, + "step": 2556 + }, + { + "epoch": 6.76, + "learning_rate": 1.6394101876675606e-05, + "loss": 0.0339, + "step": 2557 + }, + { + "epoch": 6.77, + "learning_rate": 1.6380697050938338e-05, + "loss": 0.1458, + "step": 2558 + }, + { + "epoch": 6.77, + "learning_rate": 1.6367292225201073e-05, + "loss": 0.2526, + "step": 2559 + }, + { + "epoch": 6.77, + "learning_rate": 1.6353887399463808e-05, + "loss": 0.038, + "step": 2560 + }, + { + "epoch": 6.78, + "learning_rate": 1.6340482573726543e-05, + "loss": 0.174, + "step": 2561 + }, + { + "epoch": 6.78, + "learning_rate": 1.632707774798928e-05, + "loss": 0.1936, + "step": 2562 + }, + { + "epoch": 6.78, + "learning_rate": 1.631367292225201e-05, + "loss": 0.0014, + "step": 2563 + }, + { + "epoch": 6.78, + "learning_rate": 1.6300268096514745e-05, + "loss": 0.0008, + "step": 2564 + }, + { + "epoch": 6.79, + "learning_rate": 1.628686327077748e-05, + "loss": 0.0616, + "step": 2565 + }, + { + "epoch": 6.79, + "learning_rate": 1.6273458445040215e-05, + "loss": 0.0029, + "step": 2566 + }, + { + "epoch": 6.79, + "learning_rate": 1.626005361930295e-05, + "loss": 0.1301, + "step": 2567 + }, + { + "epoch": 6.79, + "learning_rate": 1.6246648793565686e-05, + "loss": 0.0076, + "step": 2568 + }, + { + "epoch": 6.8, + "learning_rate": 1.6233243967828417e-05, + "loss": 0.038, + "step": 2569 + }, + { + "epoch": 6.8, + "learning_rate": 1.6219839142091153e-05, + "loss": 0.0376, + "step": 2570 + }, + { + "epoch": 6.8, + "learning_rate": 1.6206434316353888e-05, + "loss": 0.0007, + "step": 2571 + }, + { + "epoch": 6.8, + "learning_rate": 1.6193029490616623e-05, + "loss": 0.016, + "step": 2572 + }, + { + "epoch": 6.81, + "learning_rate": 1.6179624664879358e-05, + "loss": 0.0005, + "step": 2573 + }, + { + "epoch": 6.81, + "learning_rate": 1.616621983914209e-05, + "loss": 0.0024, + "step": 2574 + }, + { + "epoch": 6.81, + "learning_rate": 1.6152815013404825e-05, + "loss": 0.001, + "step": 2575 + }, + { + "epoch": 6.81, + "learning_rate": 1.613941018766756e-05, + "loss": 0.0009, + "step": 2576 + }, + { + "epoch": 6.82, + "learning_rate": 1.6126005361930295e-05, + "loss": 0.1889, + "step": 2577 + }, + { + "epoch": 6.82, + "learning_rate": 1.611260053619303e-05, + "loss": 0.5094, + "step": 2578 + }, + { + "epoch": 6.82, + "learning_rate": 1.6099195710455765e-05, + "loss": 0.017, + "step": 2579 + }, + { + "epoch": 6.83, + "learning_rate": 1.60857908847185e-05, + "loss": 0.029, + "step": 2580 + }, + { + "epoch": 6.83, + "learning_rate": 1.6072386058981236e-05, + "loss": 0.1249, + "step": 2581 + }, + { + "epoch": 6.83, + "learning_rate": 1.605898123324397e-05, + "loss": 0.2531, + "step": 2582 + }, + { + "epoch": 6.83, + "learning_rate": 1.6045576407506706e-05, + "loss": 0.0006, + "step": 2583 + }, + { + "epoch": 6.84, + "learning_rate": 1.6032171581769438e-05, + "loss": 0.0624, + "step": 2584 + }, + { + "epoch": 6.84, + "learning_rate": 1.6018766756032173e-05, + "loss": 0.0254, + "step": 2585 + }, + { + "epoch": 6.84, + "learning_rate": 1.6005361930294908e-05, + "loss": 0.0034, + "step": 2586 + }, + { + "epoch": 6.84, + "learning_rate": 1.5991957104557643e-05, + "loss": 0.0204, + "step": 2587 + }, + { + "epoch": 6.85, + "learning_rate": 1.5978552278820378e-05, + "loss": 0.002, + "step": 2588 + }, + { + "epoch": 6.85, + "learning_rate": 1.596514745308311e-05, + "loss": 0.0015, + "step": 2589 + }, + { + "epoch": 6.85, + "learning_rate": 1.5951742627345845e-05, + "loss": 0.0465, + "step": 2590 + }, + { + "epoch": 6.85, + "learning_rate": 1.593833780160858e-05, + "loss": 0.1892, + "step": 2591 + }, + { + "epoch": 6.86, + "learning_rate": 1.5924932975871315e-05, + "loss": 0.0932, + "step": 2592 + }, + { + "epoch": 6.86, + "learning_rate": 1.591152815013405e-05, + "loss": 0.0015, + "step": 2593 + }, + { + "epoch": 6.86, + "learning_rate": 1.5898123324396782e-05, + "loss": 0.0062, + "step": 2594 + }, + { + "epoch": 6.87, + "learning_rate": 1.5884718498659517e-05, + "loss": 0.0731, + "step": 2595 + }, + { + "epoch": 6.87, + "learning_rate": 1.5871313672922252e-05, + "loss": 0.002, + "step": 2596 + }, + { + "epoch": 6.87, + "learning_rate": 1.5857908847184988e-05, + "loss": 0.0484, + "step": 2597 + }, + { + "epoch": 6.87, + "learning_rate": 1.5844504021447723e-05, + "loss": 0.0082, + "step": 2598 + }, + { + "epoch": 6.88, + "learning_rate": 1.5831099195710454e-05, + "loss": 0.0213, + "step": 2599 + }, + { + "epoch": 6.88, + "learning_rate": 1.581769436997319e-05, + "loss": 0.1612, + "step": 2600 + }, + { + "epoch": 6.88, + "learning_rate": 1.5804289544235925e-05, + "loss": 0.184, + "step": 2601 + }, + { + "epoch": 6.88, + "learning_rate": 1.579088471849866e-05, + "loss": 0.1413, + "step": 2602 + }, + { + "epoch": 6.89, + "learning_rate": 1.5777479892761395e-05, + "loss": 0.0019, + "step": 2603 + }, + { + "epoch": 6.89, + "learning_rate": 1.5764075067024127e-05, + "loss": 0.0047, + "step": 2604 + }, + { + "epoch": 6.89, + "learning_rate": 1.5750670241286862e-05, + "loss": 0.0409, + "step": 2605 + }, + { + "epoch": 6.89, + "learning_rate": 1.5737265415549597e-05, + "loss": 0.0379, + "step": 2606 + }, + { + "epoch": 6.9, + "learning_rate": 1.5723860589812332e-05, + "loss": 0.0005, + "step": 2607 + }, + { + "epoch": 6.9, + "learning_rate": 1.5710455764075067e-05, + "loss": 0.0332, + "step": 2608 + }, + { + "epoch": 6.9, + "learning_rate": 1.5697050938337802e-05, + "loss": 0.0543, + "step": 2609 + }, + { + "epoch": 6.9, + "learning_rate": 1.5683646112600538e-05, + "loss": 0.0009, + "step": 2610 + }, + { + "epoch": 6.91, + "learning_rate": 1.5670241286863273e-05, + "loss": 0.016, + "step": 2611 + }, + { + "epoch": 6.91, + "learning_rate": 1.5656836461126008e-05, + "loss": 0.0035, + "step": 2612 + }, + { + "epoch": 6.91, + "learning_rate": 1.5643431635388743e-05, + "loss": 0.0713, + "step": 2613 + }, + { + "epoch": 6.92, + "learning_rate": 1.5630026809651475e-05, + "loss": 0.0022, + "step": 2614 + }, + { + "epoch": 6.92, + "learning_rate": 1.561662198391421e-05, + "loss": 0.0005, + "step": 2615 + }, + { + "epoch": 6.92, + "learning_rate": 1.5603217158176945e-05, + "loss": 0.0009, + "step": 2616 + }, + { + "epoch": 6.92, + "learning_rate": 1.558981233243968e-05, + "loss": 0.0016, + "step": 2617 + }, + { + "epoch": 6.93, + "learning_rate": 1.5576407506702415e-05, + "loss": 0.0017, + "step": 2618 + }, + { + "epoch": 6.93, + "learning_rate": 1.5563002680965147e-05, + "loss": 0.0094, + "step": 2619 + }, + { + "epoch": 6.93, + "learning_rate": 1.5549597855227882e-05, + "loss": 0.016, + "step": 2620 + }, + { + "epoch": 6.93, + "learning_rate": 1.5536193029490617e-05, + "loss": 0.0005, + "step": 2621 + }, + { + "epoch": 6.94, + "learning_rate": 1.5522788203753352e-05, + "loss": 0.0549, + "step": 2622 + }, + { + "epoch": 6.94, + "learning_rate": 1.5509383378016087e-05, + "loss": 0.3791, + "step": 2623 + }, + { + "epoch": 6.94, + "learning_rate": 1.549597855227882e-05, + "loss": 0.0003, + "step": 2624 + }, + { + "epoch": 6.94, + "learning_rate": 1.5482573726541554e-05, + "loss": 0.0774, + "step": 2625 + }, + { + "epoch": 6.95, + "learning_rate": 1.546916890080429e-05, + "loss": 0.0879, + "step": 2626 + }, + { + "epoch": 6.95, + "learning_rate": 1.5455764075067025e-05, + "loss": 0.0007, + "step": 2627 + }, + { + "epoch": 6.95, + "learning_rate": 1.544235924932976e-05, + "loss": 0.0047, + "step": 2628 + }, + { + "epoch": 6.96, + "learning_rate": 1.542895442359249e-05, + "loss": 0.0011, + "step": 2629 + }, + { + "epoch": 6.96, + "learning_rate": 1.5415549597855227e-05, + "loss": 0.0004, + "step": 2630 + }, + { + "epoch": 6.96, + "learning_rate": 1.5402144772117962e-05, + "loss": 0.4962, + "step": 2631 + }, + { + "epoch": 6.96, + "learning_rate": 1.5388739946380697e-05, + "loss": 0.1182, + "step": 2632 + }, + { + "epoch": 6.97, + "learning_rate": 1.5375335120643432e-05, + "loss": 0.0269, + "step": 2633 + }, + { + "epoch": 6.97, + "learning_rate": 1.5361930294906167e-05, + "loss": 0.0157, + "step": 2634 + }, + { + "epoch": 6.97, + "learning_rate": 1.5348525469168902e-05, + "loss": 0.0022, + "step": 2635 + }, + { + "epoch": 6.97, + "learning_rate": 1.5335120643431637e-05, + "loss": 0.3299, + "step": 2636 + }, + { + "epoch": 6.98, + "learning_rate": 1.5321715817694372e-05, + "loss": 0.0529, + "step": 2637 + }, + { + "epoch": 6.98, + "learning_rate": 1.5308310991957108e-05, + "loss": 0.1396, + "step": 2638 + }, + { + "epoch": 6.98, + "learning_rate": 1.5294906166219843e-05, + "loss": 0.0008, + "step": 2639 + }, + { + "epoch": 6.98, + "learning_rate": 1.5281501340482574e-05, + "loss": 0.0086, + "step": 2640 + }, + { + "epoch": 6.99, + "learning_rate": 1.526809651474531e-05, + "loss": 0.0036, + "step": 2641 + }, + { + "epoch": 6.99, + "learning_rate": 1.5254691689008043e-05, + "loss": 0.0149, + "step": 2642 + }, + { + "epoch": 6.99, + "learning_rate": 1.5241286863270778e-05, + "loss": 0.0011, + "step": 2643 + }, + { + "epoch": 6.99, + "learning_rate": 1.5227882037533513e-05, + "loss": 0.0003, + "step": 2644 + }, + { + "epoch": 7.0, + "learning_rate": 1.5214477211796247e-05, + "loss": 0.0064, + "step": 2645 + }, + { + "epoch": 7.0, + "learning_rate": 1.5201072386058982e-05, + "loss": 0.0281, + "step": 2646 + }, + { + "epoch": 7.0, + "eval_f1": 0.7856000000000002, + "eval_loss": 1.1071351766586304, + "eval_runtime": 1.8613, + "eval_samples_per_second": 812.89, + "eval_steps_per_second": 51.041, + "step": 2646 + }, + { + "epoch": 7.0, + "learning_rate": 1.5187667560321717e-05, + "loss": 0.0049, + "step": 2647 + }, + { + "epoch": 7.01, + "learning_rate": 1.5174262734584452e-05, + "loss": 0.001, + "step": 2648 + }, + { + "epoch": 7.01, + "learning_rate": 1.5160857908847187e-05, + "loss": 0.0004, + "step": 2649 + }, + { + "epoch": 7.01, + "learning_rate": 1.5147453083109919e-05, + "loss": 0.022, + "step": 2650 + }, + { + "epoch": 7.01, + "learning_rate": 1.5134048257372654e-05, + "loss": 0.0126, + "step": 2651 + }, + { + "epoch": 7.02, + "learning_rate": 1.512064343163539e-05, + "loss": 0.0006, + "step": 2652 + }, + { + "epoch": 7.02, + "learning_rate": 1.5107238605898124e-05, + "loss": 0.0182, + "step": 2653 + }, + { + "epoch": 7.02, + "learning_rate": 1.509383378016086e-05, + "loss": 0.0004, + "step": 2654 + }, + { + "epoch": 7.02, + "learning_rate": 1.5080428954423593e-05, + "loss": 0.2763, + "step": 2655 + }, + { + "epoch": 7.03, + "learning_rate": 1.5067024128686328e-05, + "loss": 0.0025, + "step": 2656 + }, + { + "epoch": 7.03, + "learning_rate": 1.5053619302949063e-05, + "loss": 0.0102, + "step": 2657 + }, + { + "epoch": 7.03, + "learning_rate": 1.5040214477211798e-05, + "loss": 0.008, + "step": 2658 + }, + { + "epoch": 7.03, + "learning_rate": 1.5026809651474534e-05, + "loss": 0.0005, + "step": 2659 + }, + { + "epoch": 7.04, + "learning_rate": 1.5013404825737265e-05, + "loss": 0.0006, + "step": 2660 + }, + { + "epoch": 7.04, + "learning_rate": 1.5e-05, + "loss": 0.0003, + "step": 2661 + }, + { + "epoch": 7.04, + "learning_rate": 1.4986595174262736e-05, + "loss": 0.0013, + "step": 2662 + }, + { + "epoch": 7.04, + "learning_rate": 1.497319034852547e-05, + "loss": 0.0033, + "step": 2663 + }, + { + "epoch": 7.05, + "learning_rate": 1.4959785522788206e-05, + "loss": 0.0004, + "step": 2664 + }, + { + "epoch": 7.05, + "learning_rate": 1.4946380697050938e-05, + "loss": 0.0347, + "step": 2665 + }, + { + "epoch": 7.05, + "learning_rate": 1.4932975871313673e-05, + "loss": 0.0208, + "step": 2666 + }, + { + "epoch": 7.06, + "learning_rate": 1.4919571045576408e-05, + "loss": 0.3783, + "step": 2667 + }, + { + "epoch": 7.06, + "learning_rate": 1.4906166219839143e-05, + "loss": 0.0005, + "step": 2668 + }, + { + "epoch": 7.06, + "learning_rate": 1.4892761394101878e-05, + "loss": 0.2023, + "step": 2669 + }, + { + "epoch": 7.06, + "learning_rate": 1.4879356568364611e-05, + "loss": 0.0007, + "step": 2670 + }, + { + "epoch": 7.07, + "learning_rate": 1.4865951742627347e-05, + "loss": 0.0014, + "step": 2671 + }, + { + "epoch": 7.07, + "learning_rate": 1.4852546916890082e-05, + "loss": 0.0018, + "step": 2672 + }, + { + "epoch": 7.07, + "learning_rate": 1.4839142091152817e-05, + "loss": 0.0004, + "step": 2673 + }, + { + "epoch": 7.07, + "learning_rate": 1.4825737265415552e-05, + "loss": 0.0005, + "step": 2674 + }, + { + "epoch": 7.08, + "learning_rate": 1.4812332439678284e-05, + "loss": 0.0235, + "step": 2675 + }, + { + "epoch": 7.08, + "learning_rate": 1.4798927613941019e-05, + "loss": 0.0116, + "step": 2676 + }, + { + "epoch": 7.08, + "learning_rate": 1.4785522788203754e-05, + "loss": 0.0005, + "step": 2677 + }, + { + "epoch": 7.08, + "learning_rate": 1.4772117962466489e-05, + "loss": 0.0011, + "step": 2678 + }, + { + "epoch": 7.09, + "learning_rate": 1.4758713136729224e-05, + "loss": 0.2959, + "step": 2679 + }, + { + "epoch": 7.09, + "learning_rate": 1.4745308310991956e-05, + "loss": 0.2646, + "step": 2680 + }, + { + "epoch": 7.09, + "learning_rate": 1.4731903485254691e-05, + "loss": 0.1224, + "step": 2681 + }, + { + "epoch": 7.1, + "learning_rate": 1.4718498659517426e-05, + "loss": 0.0419, + "step": 2682 + }, + { + "epoch": 7.1, + "learning_rate": 1.4705093833780161e-05, + "loss": 0.0218, + "step": 2683 + }, + { + "epoch": 7.1, + "learning_rate": 1.4691689008042897e-05, + "loss": 0.0003, + "step": 2684 + }, + { + "epoch": 7.1, + "learning_rate": 1.467828418230563e-05, + "loss": 0.0005, + "step": 2685 + }, + { + "epoch": 7.11, + "learning_rate": 1.4664879356568365e-05, + "loss": 0.2352, + "step": 2686 + }, + { + "epoch": 7.11, + "learning_rate": 1.46514745308311e-05, + "loss": 0.0004, + "step": 2687 + }, + { + "epoch": 7.11, + "learning_rate": 1.4638069705093835e-05, + "loss": 0.0004, + "step": 2688 + }, + { + "epoch": 7.11, + "learning_rate": 1.462466487935657e-05, + "loss": 0.1069, + "step": 2689 + }, + { + "epoch": 7.12, + "learning_rate": 1.4611260053619302e-05, + "loss": 0.008, + "step": 2690 + }, + { + "epoch": 7.12, + "learning_rate": 1.4597855227882037e-05, + "loss": 0.0007, + "step": 2691 + }, + { + "epoch": 7.12, + "learning_rate": 1.4584450402144772e-05, + "loss": 0.002, + "step": 2692 + }, + { + "epoch": 7.12, + "learning_rate": 1.4571045576407508e-05, + "loss": 0.0029, + "step": 2693 + }, + { + "epoch": 7.13, + "learning_rate": 1.4557640750670243e-05, + "loss": 0.0221, + "step": 2694 + }, + { + "epoch": 7.13, + "learning_rate": 1.4544235924932978e-05, + "loss": 0.0085, + "step": 2695 + }, + { + "epoch": 7.13, + "learning_rate": 1.453083109919571e-05, + "loss": 0.0009, + "step": 2696 + }, + { + "epoch": 7.13, + "learning_rate": 1.4517426273458445e-05, + "loss": 0.0018, + "step": 2697 + }, + { + "epoch": 7.14, + "learning_rate": 1.450402144772118e-05, + "loss": 0.001, + "step": 2698 + }, + { + "epoch": 7.14, + "learning_rate": 1.4490616621983915e-05, + "loss": 0.0021, + "step": 2699 + }, + { + "epoch": 7.14, + "learning_rate": 1.447721179624665e-05, + "loss": 0.0225, + "step": 2700 + }, + { + "epoch": 7.15, + "learning_rate": 1.4463806970509384e-05, + "loss": 0.0005, + "step": 2701 + }, + { + "epoch": 7.15, + "learning_rate": 1.4450402144772119e-05, + "loss": 0.0057, + "step": 2702 + }, + { + "epoch": 7.15, + "learning_rate": 1.4436997319034854e-05, + "loss": 0.0422, + "step": 2703 + }, + { + "epoch": 7.15, + "learning_rate": 1.4423592493297589e-05, + "loss": 0.0028, + "step": 2704 + }, + { + "epoch": 7.16, + "learning_rate": 1.4410187667560324e-05, + "loss": 0.0231, + "step": 2705 + }, + { + "epoch": 7.16, + "learning_rate": 1.4396782841823056e-05, + "loss": 0.1236, + "step": 2706 + }, + { + "epoch": 7.16, + "learning_rate": 1.4383378016085791e-05, + "loss": 0.0004, + "step": 2707 + }, + { + "epoch": 7.16, + "learning_rate": 1.4369973190348526e-05, + "loss": 0.0019, + "step": 2708 + }, + { + "epoch": 7.17, + "learning_rate": 1.4356568364611261e-05, + "loss": 0.0029, + "step": 2709 + }, + { + "epoch": 7.17, + "learning_rate": 1.4343163538873996e-05, + "loss": 0.0005, + "step": 2710 + }, + { + "epoch": 7.17, + "learning_rate": 1.4329758713136728e-05, + "loss": 0.0665, + "step": 2711 + }, + { + "epoch": 7.17, + "learning_rate": 1.4316353887399463e-05, + "loss": 0.0005, + "step": 2712 + }, + { + "epoch": 7.18, + "learning_rate": 1.4302949061662198e-05, + "loss": 0.0107, + "step": 2713 + }, + { + "epoch": 7.18, + "learning_rate": 1.4289544235924934e-05, + "loss": 0.0005, + "step": 2714 + }, + { + "epoch": 7.18, + "learning_rate": 1.4276139410187669e-05, + "loss": 0.1983, + "step": 2715 + }, + { + "epoch": 7.19, + "learning_rate": 1.4262734584450402e-05, + "loss": 0.0016, + "step": 2716 + }, + { + "epoch": 7.19, + "learning_rate": 1.4249329758713137e-05, + "loss": 0.0003, + "step": 2717 + }, + { + "epoch": 7.19, + "learning_rate": 1.4235924932975872e-05, + "loss": 0.0247, + "step": 2718 + }, + { + "epoch": 7.19, + "learning_rate": 1.4222520107238607e-05, + "loss": 0.0079, + "step": 2719 + }, + { + "epoch": 7.2, + "learning_rate": 1.4209115281501343e-05, + "loss": 0.0012, + "step": 2720 + }, + { + "epoch": 7.2, + "learning_rate": 1.4195710455764074e-05, + "loss": 0.0004, + "step": 2721 + }, + { + "epoch": 7.2, + "learning_rate": 1.418230563002681e-05, + "loss": 0.0004, + "step": 2722 + }, + { + "epoch": 7.2, + "learning_rate": 1.4168900804289545e-05, + "loss": 0.0051, + "step": 2723 + }, + { + "epoch": 7.21, + "learning_rate": 1.415549597855228e-05, + "loss": 0.0006, + "step": 2724 + }, + { + "epoch": 7.21, + "learning_rate": 1.4142091152815015e-05, + "loss": 0.0043, + "step": 2725 + }, + { + "epoch": 7.21, + "learning_rate": 1.4128686327077748e-05, + "loss": 0.004, + "step": 2726 + }, + { + "epoch": 7.21, + "learning_rate": 1.4115281501340483e-05, + "loss": 0.2211, + "step": 2727 + }, + { + "epoch": 7.22, + "learning_rate": 1.4101876675603219e-05, + "loss": 0.0003, + "step": 2728 + }, + { + "epoch": 7.22, + "learning_rate": 1.4088471849865954e-05, + "loss": 0.0004, + "step": 2729 + }, + { + "epoch": 7.22, + "learning_rate": 1.4075067024128689e-05, + "loss": 0.2051, + "step": 2730 + }, + { + "epoch": 7.22, + "learning_rate": 1.406166219839142e-05, + "loss": 0.0003, + "step": 2731 + }, + { + "epoch": 7.23, + "learning_rate": 1.4048257372654156e-05, + "loss": 0.0014, + "step": 2732 + }, + { + "epoch": 7.23, + "learning_rate": 1.403485254691689e-05, + "loss": 0.0007, + "step": 2733 + }, + { + "epoch": 7.23, + "learning_rate": 1.4021447721179626e-05, + "loss": 0.0068, + "step": 2734 + }, + { + "epoch": 7.24, + "learning_rate": 1.4008042895442361e-05, + "loss": 0.137, + "step": 2735 + }, + { + "epoch": 7.24, + "learning_rate": 1.3994638069705093e-05, + "loss": 0.0005, + "step": 2736 + }, + { + "epoch": 7.24, + "learning_rate": 1.3981233243967828e-05, + "loss": 0.0006, + "step": 2737 + }, + { + "epoch": 7.24, + "learning_rate": 1.3967828418230563e-05, + "loss": 0.0206, + "step": 2738 + }, + { + "epoch": 7.25, + "learning_rate": 1.3954423592493298e-05, + "loss": 0.1488, + "step": 2739 + }, + { + "epoch": 7.25, + "learning_rate": 1.3941018766756033e-05, + "loss": 0.0054, + "step": 2740 + }, + { + "epoch": 7.25, + "learning_rate": 1.3927613941018767e-05, + "loss": 0.0269, + "step": 2741 + }, + { + "epoch": 7.25, + "learning_rate": 1.3914209115281502e-05, + "loss": 0.0006, + "step": 2742 + }, + { + "epoch": 7.26, + "learning_rate": 1.3900804289544237e-05, + "loss": 0.0003, + "step": 2743 + }, + { + "epoch": 7.26, + "learning_rate": 1.3887399463806972e-05, + "loss": 0.0004, + "step": 2744 + }, + { + "epoch": 7.26, + "learning_rate": 1.3873994638069707e-05, + "loss": 0.0003, + "step": 2745 + }, + { + "epoch": 7.26, + "learning_rate": 1.3860589812332439e-05, + "loss": 0.0027, + "step": 2746 + }, + { + "epoch": 7.27, + "learning_rate": 1.3847184986595174e-05, + "loss": 0.0006, + "step": 2747 + }, + { + "epoch": 7.27, + "learning_rate": 1.383378016085791e-05, + "loss": 0.0012, + "step": 2748 + }, + { + "epoch": 7.27, + "learning_rate": 1.3820375335120644e-05, + "loss": 0.0522, + "step": 2749 + }, + { + "epoch": 7.28, + "learning_rate": 1.380697050938338e-05, + "loss": 0.0126, + "step": 2750 + }, + { + "epoch": 7.28, + "learning_rate": 1.3793565683646111e-05, + "loss": 0.0083, + "step": 2751 + }, + { + "epoch": 7.28, + "learning_rate": 1.3780160857908846e-05, + "loss": 0.074, + "step": 2752 + }, + { + "epoch": 7.28, + "learning_rate": 1.3766756032171582e-05, + "loss": 0.0002, + "step": 2753 + }, + { + "epoch": 7.29, + "learning_rate": 1.3753351206434317e-05, + "loss": 0.1009, + "step": 2754 + }, + { + "epoch": 7.29, + "learning_rate": 1.3739946380697052e-05, + "loss": 0.0021, + "step": 2755 + }, + { + "epoch": 7.29, + "learning_rate": 1.3726541554959787e-05, + "loss": 0.0082, + "step": 2756 + }, + { + "epoch": 7.29, + "learning_rate": 1.371313672922252e-05, + "loss": 0.0004, + "step": 2757 + }, + { + "epoch": 7.3, + "learning_rate": 1.3699731903485256e-05, + "loss": 0.0006, + "step": 2758 + }, + { + "epoch": 7.3, + "learning_rate": 1.368632707774799e-05, + "loss": 0.0173, + "step": 2759 + }, + { + "epoch": 7.3, + "learning_rate": 1.3672922252010726e-05, + "loss": 0.0147, + "step": 2760 + }, + { + "epoch": 7.3, + "learning_rate": 1.3659517426273461e-05, + "loss": 0.1293, + "step": 2761 + }, + { + "epoch": 7.31, + "learning_rate": 1.3646112600536193e-05, + "loss": 0.2566, + "step": 2762 + }, + { + "epoch": 7.31, + "learning_rate": 1.3632707774798928e-05, + "loss": 0.0026, + "step": 2763 + }, + { + "epoch": 7.31, + "learning_rate": 1.3619302949061663e-05, + "loss": 0.0031, + "step": 2764 + }, + { + "epoch": 7.31, + "learning_rate": 1.3605898123324398e-05, + "loss": 0.0029, + "step": 2765 + }, + { + "epoch": 7.32, + "learning_rate": 1.3592493297587133e-05, + "loss": 0.0005, + "step": 2766 + }, + { + "epoch": 7.32, + "learning_rate": 1.3579088471849865e-05, + "loss": 0.0004, + "step": 2767 + }, + { + "epoch": 7.32, + "learning_rate": 1.35656836461126e-05, + "loss": 0.0294, + "step": 2768 + }, + { + "epoch": 7.33, + "learning_rate": 1.3552278820375335e-05, + "loss": 0.0011, + "step": 2769 + }, + { + "epoch": 7.33, + "learning_rate": 1.353887399463807e-05, + "loss": 0.009, + "step": 2770 + }, + { + "epoch": 7.33, + "learning_rate": 1.3525469168900805e-05, + "loss": 0.0003, + "step": 2771 + }, + { + "epoch": 7.33, + "learning_rate": 1.3512064343163539e-05, + "loss": 0.0003, + "step": 2772 + }, + { + "epoch": 7.34, + "learning_rate": 1.3498659517426274e-05, + "loss": 0.0002, + "step": 2773 + }, + { + "epoch": 7.34, + "learning_rate": 1.348525469168901e-05, + "loss": 0.0002, + "step": 2774 + }, + { + "epoch": 7.34, + "learning_rate": 1.3471849865951744e-05, + "loss": 0.1261, + "step": 2775 + }, + { + "epoch": 7.34, + "learning_rate": 1.345844504021448e-05, + "loss": 0.0006, + "step": 2776 + }, + { + "epoch": 7.35, + "learning_rate": 1.3445040214477211e-05, + "loss": 0.0006, + "step": 2777 + }, + { + "epoch": 7.35, + "learning_rate": 1.3431635388739946e-05, + "loss": 0.0003, + "step": 2778 + }, + { + "epoch": 7.35, + "learning_rate": 1.3418230563002681e-05, + "loss": 0.0754, + "step": 2779 + }, + { + "epoch": 7.35, + "learning_rate": 1.3404825737265417e-05, + "loss": 0.0002, + "step": 2780 + }, + { + "epoch": 7.36, + "learning_rate": 1.3391420911528152e-05, + "loss": 0.0007, + "step": 2781 + }, + { + "epoch": 7.36, + "learning_rate": 1.3378016085790885e-05, + "loss": 0.0004, + "step": 2782 + }, + { + "epoch": 7.36, + "learning_rate": 1.336461126005362e-05, + "loss": 0.001, + "step": 2783 + }, + { + "epoch": 7.37, + "learning_rate": 1.3351206434316355e-05, + "loss": 0.0006, + "step": 2784 + }, + { + "epoch": 7.37, + "learning_rate": 1.333780160857909e-05, + "loss": 0.0227, + "step": 2785 + }, + { + "epoch": 7.37, + "learning_rate": 1.3324396782841826e-05, + "loss": 0.0002, + "step": 2786 + }, + { + "epoch": 7.37, + "learning_rate": 1.3310991957104557e-05, + "loss": 0.0002, + "step": 2787 + }, + { + "epoch": 7.38, + "learning_rate": 1.3297587131367293e-05, + "loss": 0.1036, + "step": 2788 + }, + { + "epoch": 7.38, + "learning_rate": 1.3284182305630028e-05, + "loss": 0.0014, + "step": 2789 + }, + { + "epoch": 7.38, + "learning_rate": 1.3270777479892763e-05, + "loss": 0.35, + "step": 2790 + }, + { + "epoch": 7.38, + "learning_rate": 1.3257372654155498e-05, + "loss": 0.0003, + "step": 2791 + }, + { + "epoch": 7.39, + "learning_rate": 1.324396782841823e-05, + "loss": 0.0182, + "step": 2792 + }, + { + "epoch": 7.39, + "learning_rate": 1.3230563002680965e-05, + "loss": 0.0038, + "step": 2793 + }, + { + "epoch": 7.39, + "learning_rate": 1.32171581769437e-05, + "loss": 0.0003, + "step": 2794 + }, + { + "epoch": 7.39, + "learning_rate": 1.3203753351206435e-05, + "loss": 0.0003, + "step": 2795 + }, + { + "epoch": 7.4, + "learning_rate": 1.319034852546917e-05, + "loss": 0.0008, + "step": 2796 + }, + { + "epoch": 7.4, + "learning_rate": 1.3176943699731904e-05, + "loss": 0.0003, + "step": 2797 + }, + { + "epoch": 7.4, + "learning_rate": 1.3163538873994639e-05, + "loss": 0.0005, + "step": 2798 + }, + { + "epoch": 7.4, + "learning_rate": 1.3150134048257374e-05, + "loss": 0.2165, + "step": 2799 + }, + { + "epoch": 7.41, + "learning_rate": 1.3136729222520109e-05, + "loss": 0.023, + "step": 2800 + }, + { + "epoch": 7.41, + "learning_rate": 1.3123324396782844e-05, + "loss": 0.0047, + "step": 2801 + }, + { + "epoch": 7.41, + "learning_rate": 1.3109919571045576e-05, + "loss": 0.1507, + "step": 2802 + }, + { + "epoch": 7.42, + "learning_rate": 1.3096514745308311e-05, + "loss": 0.2509, + "step": 2803 + }, + { + "epoch": 7.42, + "learning_rate": 1.3083109919571046e-05, + "loss": 0.0085, + "step": 2804 + }, + { + "epoch": 7.42, + "learning_rate": 1.3069705093833781e-05, + "loss": 0.2183, + "step": 2805 + }, + { + "epoch": 7.42, + "learning_rate": 1.3056300268096516e-05, + "loss": 0.0007, + "step": 2806 + }, + { + "epoch": 7.43, + "learning_rate": 1.3042895442359248e-05, + "loss": 0.0005, + "step": 2807 + }, + { + "epoch": 7.43, + "learning_rate": 1.3029490616621983e-05, + "loss": 0.1291, + "step": 2808 + }, + { + "epoch": 7.43, + "learning_rate": 1.3016085790884718e-05, + "loss": 0.1037, + "step": 2809 + }, + { + "epoch": 7.43, + "learning_rate": 1.3002680965147454e-05, + "loss": 0.0147, + "step": 2810 + }, + { + "epoch": 7.44, + "learning_rate": 1.2989276139410189e-05, + "loss": 0.0006, + "step": 2811 + }, + { + "epoch": 7.44, + "learning_rate": 1.2975871313672922e-05, + "loss": 0.0148, + "step": 2812 + }, + { + "epoch": 7.44, + "learning_rate": 1.2962466487935657e-05, + "loss": 0.0129, + "step": 2813 + }, + { + "epoch": 7.44, + "learning_rate": 1.2949061662198392e-05, + "loss": 0.0276, + "step": 2814 + }, + { + "epoch": 7.45, + "learning_rate": 1.2935656836461127e-05, + "loss": 0.0007, + "step": 2815 + }, + { + "epoch": 7.45, + "learning_rate": 1.2922252010723863e-05, + "loss": 0.0006, + "step": 2816 + }, + { + "epoch": 7.45, + "learning_rate": 1.2908847184986598e-05, + "loss": 0.0002, + "step": 2817 + }, + { + "epoch": 7.46, + "learning_rate": 1.289544235924933e-05, + "loss": 0.1274, + "step": 2818 + }, + { + "epoch": 7.46, + "learning_rate": 1.2882037533512065e-05, + "loss": 0.0009, + "step": 2819 + }, + { + "epoch": 7.46, + "learning_rate": 1.28686327077748e-05, + "loss": 0.0007, + "step": 2820 + }, + { + "epoch": 7.46, + "learning_rate": 1.2855227882037535e-05, + "loss": 0.002, + "step": 2821 + }, + { + "epoch": 7.47, + "learning_rate": 1.284182305630027e-05, + "loss": 0.0004, + "step": 2822 + }, + { + "epoch": 7.47, + "learning_rate": 1.2828418230563002e-05, + "loss": 0.0017, + "step": 2823 + }, + { + "epoch": 7.47, + "learning_rate": 1.2815013404825737e-05, + "loss": 0.001, + "step": 2824 + }, + { + "epoch": 7.47, + "learning_rate": 1.2801608579088472e-05, + "loss": 0.0106, + "step": 2825 + }, + { + "epoch": 7.48, + "learning_rate": 1.2788203753351207e-05, + "loss": 0.1158, + "step": 2826 + }, + { + "epoch": 7.48, + "learning_rate": 1.2774798927613942e-05, + "loss": 0.0004, + "step": 2827 + }, + { + "epoch": 7.48, + "learning_rate": 1.2761394101876676e-05, + "loss": 0.3214, + "step": 2828 + }, + { + "epoch": 7.48, + "learning_rate": 1.274798927613941e-05, + "loss": 0.0003, + "step": 2829 + }, + { + "epoch": 7.49, + "learning_rate": 1.2734584450402146e-05, + "loss": 0.0417, + "step": 2830 + }, + { + "epoch": 7.49, + "learning_rate": 1.2721179624664881e-05, + "loss": 0.0002, + "step": 2831 + }, + { + "epoch": 7.49, + "learning_rate": 1.2707774798927616e-05, + "loss": 0.0004, + "step": 2832 + }, + { + "epoch": 7.49, + "learning_rate": 1.2694369973190348e-05, + "loss": 0.1166, + "step": 2833 + }, + { + "epoch": 7.5, + "learning_rate": 1.2680965147453083e-05, + "loss": 0.0008, + "step": 2834 + }, + { + "epoch": 7.5, + "learning_rate": 1.2667560321715818e-05, + "loss": 0.0005, + "step": 2835 + }, + { + "epoch": 7.5, + "learning_rate": 1.2654155495978553e-05, + "loss": 0.0191, + "step": 2836 + }, + { + "epoch": 7.51, + "learning_rate": 1.2640750670241289e-05, + "loss": 0.0642, + "step": 2837 + }, + { + "epoch": 7.51, + "learning_rate": 1.262734584450402e-05, + "loss": 0.0256, + "step": 2838 + }, + { + "epoch": 7.51, + "learning_rate": 1.2613941018766755e-05, + "loss": 0.0007, + "step": 2839 + }, + { + "epoch": 7.51, + "learning_rate": 1.260053619302949e-05, + "loss": 0.049, + "step": 2840 + }, + { + "epoch": 7.52, + "learning_rate": 1.2587131367292226e-05, + "loss": 0.0012, + "step": 2841 + }, + { + "epoch": 7.52, + "learning_rate": 1.257372654155496e-05, + "loss": 0.0006, + "step": 2842 + }, + { + "epoch": 7.52, + "learning_rate": 1.2560321715817694e-05, + "loss": 0.2299, + "step": 2843 + }, + { + "epoch": 7.52, + "learning_rate": 1.254691689008043e-05, + "loss": 0.0006, + "step": 2844 + }, + { + "epoch": 7.53, + "learning_rate": 1.2533512064343164e-05, + "loss": 0.0346, + "step": 2845 + }, + { + "epoch": 7.53, + "learning_rate": 1.25201072386059e-05, + "loss": 0.0021, + "step": 2846 + }, + { + "epoch": 7.53, + "learning_rate": 1.2506702412868635e-05, + "loss": 0.0003, + "step": 2847 + }, + { + "epoch": 7.53, + "learning_rate": 1.2493297587131368e-05, + "loss": 0.135, + "step": 2848 + }, + { + "epoch": 7.54, + "learning_rate": 1.2479892761394102e-05, + "loss": 0.0003, + "step": 2849 + }, + { + "epoch": 7.54, + "learning_rate": 1.2466487935656837e-05, + "loss": 0.0005, + "step": 2850 + }, + { + "epoch": 7.54, + "learning_rate": 1.2453083109919572e-05, + "loss": 0.0005, + "step": 2851 + }, + { + "epoch": 7.54, + "learning_rate": 1.2439678284182305e-05, + "loss": 0.0007, + "step": 2852 + }, + { + "epoch": 7.55, + "learning_rate": 1.242627345844504e-05, + "loss": 0.0004, + "step": 2853 + }, + { + "epoch": 7.55, + "learning_rate": 1.2412868632707776e-05, + "loss": 0.0003, + "step": 2854 + }, + { + "epoch": 7.55, + "learning_rate": 1.239946380697051e-05, + "loss": 0.0003, + "step": 2855 + }, + { + "epoch": 7.56, + "learning_rate": 1.2386058981233246e-05, + "loss": 0.0006, + "step": 2856 + }, + { + "epoch": 7.56, + "learning_rate": 1.237265415549598e-05, + "loss": 0.0171, + "step": 2857 + }, + { + "epoch": 7.56, + "learning_rate": 1.2359249329758714e-05, + "loss": 0.1066, + "step": 2858 + }, + { + "epoch": 7.56, + "learning_rate": 1.2345844504021448e-05, + "loss": 0.0003, + "step": 2859 + }, + { + "epoch": 7.57, + "learning_rate": 1.2332439678284183e-05, + "loss": 0.1106, + "step": 2860 + }, + { + "epoch": 7.57, + "learning_rate": 1.2319034852546918e-05, + "loss": 0.0004, + "step": 2861 + }, + { + "epoch": 7.57, + "learning_rate": 1.2305630026809652e-05, + "loss": 0.0012, + "step": 2862 + }, + { + "epoch": 7.57, + "learning_rate": 1.2292225201072387e-05, + "loss": 0.0004, + "step": 2863 + }, + { + "epoch": 7.58, + "learning_rate": 1.2278820375335122e-05, + "loss": 0.0007, + "step": 2864 + }, + { + "epoch": 7.58, + "learning_rate": 1.2265415549597855e-05, + "loss": 0.0104, + "step": 2865 + }, + { + "epoch": 7.58, + "learning_rate": 1.225201072386059e-05, + "loss": 0.0003, + "step": 2866 + }, + { + "epoch": 7.58, + "learning_rate": 1.2238605898123324e-05, + "loss": 0.3976, + "step": 2867 + }, + { + "epoch": 7.59, + "learning_rate": 1.2225201072386059e-05, + "loss": 0.0003, + "step": 2868 + }, + { + "epoch": 7.59, + "learning_rate": 1.2211796246648794e-05, + "loss": 0.4433, + "step": 2869 + }, + { + "epoch": 7.59, + "learning_rate": 1.219839142091153e-05, + "loss": 0.0005, + "step": 2870 + }, + { + "epoch": 7.6, + "learning_rate": 1.2184986595174264e-05, + "loss": 0.0733, + "step": 2871 + }, + { + "epoch": 7.6, + "learning_rate": 1.2171581769436998e-05, + "loss": 0.0008, + "step": 2872 + }, + { + "epoch": 7.6, + "learning_rate": 1.2158176943699733e-05, + "loss": 0.0003, + "step": 2873 + }, + { + "epoch": 7.6, + "learning_rate": 1.2144772117962468e-05, + "loss": 0.0253, + "step": 2874 + }, + { + "epoch": 7.61, + "learning_rate": 1.2131367292225201e-05, + "loss": 0.09, + "step": 2875 + }, + { + "epoch": 7.61, + "learning_rate": 1.2117962466487937e-05, + "loss": 0.1283, + "step": 2876 + }, + { + "epoch": 7.61, + "learning_rate": 1.210455764075067e-05, + "loss": 0.0866, + "step": 2877 + }, + { + "epoch": 7.61, + "learning_rate": 1.2091152815013405e-05, + "loss": 0.0005, + "step": 2878 + }, + { + "epoch": 7.62, + "learning_rate": 1.207774798927614e-05, + "loss": 0.051, + "step": 2879 + }, + { + "epoch": 7.62, + "learning_rate": 1.2064343163538874e-05, + "loss": 0.0055, + "step": 2880 + }, + { + "epoch": 7.62, + "learning_rate": 1.2050938337801609e-05, + "loss": 0.001, + "step": 2881 + }, + { + "epoch": 7.62, + "learning_rate": 1.2037533512064344e-05, + "loss": 0.0765, + "step": 2882 + }, + { + "epoch": 7.63, + "learning_rate": 1.2024128686327079e-05, + "loss": 0.0239, + "step": 2883 + }, + { + "epoch": 7.63, + "learning_rate": 1.2010723860589814e-05, + "loss": 0.0616, + "step": 2884 + }, + { + "epoch": 7.63, + "learning_rate": 1.1997319034852548e-05, + "loss": 0.0342, + "step": 2885 + }, + { + "epoch": 7.63, + "learning_rate": 1.1983914209115283e-05, + "loss": 0.0006, + "step": 2886 + }, + { + "epoch": 7.64, + "learning_rate": 1.1970509383378016e-05, + "loss": 0.091, + "step": 2887 + }, + { + "epoch": 7.64, + "learning_rate": 1.1957104557640751e-05, + "loss": 0.0004, + "step": 2888 + }, + { + "epoch": 7.64, + "learning_rate": 1.1943699731903486e-05, + "loss": 0.0257, + "step": 2889 + }, + { + "epoch": 7.65, + "learning_rate": 1.193029490616622e-05, + "loss": 0.0422, + "step": 2890 + }, + { + "epoch": 7.65, + "learning_rate": 1.1916890080428955e-05, + "loss": 0.1861, + "step": 2891 + }, + { + "epoch": 7.65, + "learning_rate": 1.1903485254691689e-05, + "loss": 0.0003, + "step": 2892 + }, + { + "epoch": 7.65, + "learning_rate": 1.1890080428954424e-05, + "loss": 0.0678, + "step": 2893 + }, + { + "epoch": 7.66, + "learning_rate": 1.1876675603217159e-05, + "loss": 0.0005, + "step": 2894 + }, + { + "epoch": 7.66, + "learning_rate": 1.1863270777479892e-05, + "loss": 0.0234, + "step": 2895 + }, + { + "epoch": 7.66, + "learning_rate": 1.1849865951742627e-05, + "loss": 0.0007, + "step": 2896 + }, + { + "epoch": 7.66, + "learning_rate": 1.1836461126005362e-05, + "loss": 0.0963, + "step": 2897 + }, + { + "epoch": 7.67, + "learning_rate": 1.1823056300268098e-05, + "loss": 0.0132, + "step": 2898 + }, + { + "epoch": 7.67, + "learning_rate": 1.1809651474530833e-05, + "loss": 0.0019, + "step": 2899 + }, + { + "epoch": 7.67, + "learning_rate": 1.1796246648793566e-05, + "loss": 0.0219, + "step": 2900 + }, + { + "epoch": 7.67, + "learning_rate": 1.1782841823056301e-05, + "loss": 0.0062, + "step": 2901 + }, + { + "epoch": 7.68, + "learning_rate": 1.1769436997319036e-05, + "loss": 0.0003, + "step": 2902 + }, + { + "epoch": 7.68, + "learning_rate": 1.175603217158177e-05, + "loss": 0.0009, + "step": 2903 + }, + { + "epoch": 7.68, + "learning_rate": 1.1742627345844505e-05, + "loss": 0.1446, + "step": 2904 + }, + { + "epoch": 7.69, + "learning_rate": 1.1729222520107238e-05, + "loss": 0.0103, + "step": 2905 + }, + { + "epoch": 7.69, + "learning_rate": 1.1715817694369974e-05, + "loss": 0.0004, + "step": 2906 + }, + { + "epoch": 7.69, + "learning_rate": 1.1702412868632709e-05, + "loss": 0.2502, + "step": 2907 + }, + { + "epoch": 7.69, + "learning_rate": 1.1689008042895442e-05, + "loss": 0.0005, + "step": 2908 + }, + { + "epoch": 7.7, + "learning_rate": 1.1675603217158177e-05, + "loss": 0.0001, + "step": 2909 + }, + { + "epoch": 7.7, + "learning_rate": 1.166219839142091e-05, + "loss": 0.0928, + "step": 2910 + }, + { + "epoch": 7.7, + "learning_rate": 1.1648793565683646e-05, + "loss": 0.0195, + "step": 2911 + }, + { + "epoch": 7.7, + "learning_rate": 1.1635388739946381e-05, + "loss": 0.0727, + "step": 2912 + }, + { + "epoch": 7.71, + "learning_rate": 1.1621983914209116e-05, + "loss": 0.0778, + "step": 2913 + }, + { + "epoch": 7.71, + "learning_rate": 1.1608579088471851e-05, + "loss": 0.1304, + "step": 2914 + }, + { + "epoch": 7.71, + "learning_rate": 1.1595174262734585e-05, + "loss": 0.0002, + "step": 2915 + }, + { + "epoch": 7.71, + "learning_rate": 1.158176943699732e-05, + "loss": 0.0003, + "step": 2916 + }, + { + "epoch": 7.72, + "learning_rate": 1.1568364611260055e-05, + "loss": 0.0137, + "step": 2917 + }, + { + "epoch": 7.72, + "learning_rate": 1.1554959785522788e-05, + "loss": 0.0003, + "step": 2918 + }, + { + "epoch": 7.72, + "learning_rate": 1.1541554959785523e-05, + "loss": 0.0018, + "step": 2919 + }, + { + "epoch": 7.72, + "learning_rate": 1.1528150134048257e-05, + "loss": 0.0057, + "step": 2920 + }, + { + "epoch": 7.73, + "learning_rate": 1.1514745308310992e-05, + "loss": 0.0003, + "step": 2921 + }, + { + "epoch": 7.73, + "learning_rate": 1.1501340482573727e-05, + "loss": 0.0015, + "step": 2922 + }, + { + "epoch": 7.73, + "learning_rate": 1.148793565683646e-05, + "loss": 0.0004, + "step": 2923 + }, + { + "epoch": 7.74, + "learning_rate": 1.1474530831099196e-05, + "loss": 0.0005, + "step": 2924 + }, + { + "epoch": 7.74, + "learning_rate": 1.1461126005361931e-05, + "loss": 0.0345, + "step": 2925 + }, + { + "epoch": 7.74, + "learning_rate": 1.1447721179624666e-05, + "loss": 0.0878, + "step": 2926 + }, + { + "epoch": 7.74, + "learning_rate": 1.1434316353887401e-05, + "loss": 0.0003, + "step": 2927 + }, + { + "epoch": 7.75, + "learning_rate": 1.1420911528150135e-05, + "loss": 0.0732, + "step": 2928 + }, + { + "epoch": 7.75, + "learning_rate": 1.140750670241287e-05, + "loss": 0.0005, + "step": 2929 + }, + { + "epoch": 7.75, + "learning_rate": 1.1394101876675605e-05, + "loss": 0.001, + "step": 2930 + }, + { + "epoch": 7.75, + "learning_rate": 1.1380697050938338e-05, + "loss": 0.0038, + "step": 2931 + }, + { + "epoch": 7.76, + "learning_rate": 1.1367292225201073e-05, + "loss": 0.0056, + "step": 2932 + }, + { + "epoch": 7.76, + "learning_rate": 1.1353887399463807e-05, + "loss": 0.1057, + "step": 2933 + }, + { + "epoch": 7.76, + "learning_rate": 1.1340482573726542e-05, + "loss": 0.0005, + "step": 2934 + }, + { + "epoch": 7.76, + "learning_rate": 1.1327077747989277e-05, + "loss": 0.0419, + "step": 2935 + }, + { + "epoch": 7.77, + "learning_rate": 1.131367292225201e-05, + "loss": 0.0304, + "step": 2936 + }, + { + "epoch": 7.77, + "learning_rate": 1.1300268096514746e-05, + "loss": 0.0002, + "step": 2937 + }, + { + "epoch": 7.77, + "learning_rate": 1.1286863270777479e-05, + "loss": 0.0332, + "step": 2938 + }, + { + "epoch": 7.78, + "learning_rate": 1.1273458445040214e-05, + "loss": 0.1015, + "step": 2939 + }, + { + "epoch": 7.78, + "learning_rate": 1.126005361930295e-05, + "loss": 0.0008, + "step": 2940 + }, + { + "epoch": 7.78, + "learning_rate": 1.1246648793565684e-05, + "loss": 0.0273, + "step": 2941 + }, + { + "epoch": 7.78, + "learning_rate": 1.123324396782842e-05, + "loss": 0.0022, + "step": 2942 + }, + { + "epoch": 7.79, + "learning_rate": 1.1219839142091153e-05, + "loss": 0.0009, + "step": 2943 + }, + { + "epoch": 7.79, + "learning_rate": 1.1206434316353888e-05, + "loss": 0.0003, + "step": 2944 + }, + { + "epoch": 7.79, + "learning_rate": 1.1193029490616623e-05, + "loss": 0.0057, + "step": 2945 + }, + { + "epoch": 7.79, + "learning_rate": 1.1179624664879357e-05, + "loss": 0.0014, + "step": 2946 + }, + { + "epoch": 7.8, + "learning_rate": 1.1166219839142092e-05, + "loss": 0.0009, + "step": 2947 + }, + { + "epoch": 7.8, + "learning_rate": 1.1152815013404825e-05, + "loss": 0.0019, + "step": 2948 + }, + { + "epoch": 7.8, + "learning_rate": 1.113941018766756e-05, + "loss": 0.0005, + "step": 2949 + }, + { + "epoch": 7.8, + "learning_rate": 1.1126005361930296e-05, + "loss": 0.0181, + "step": 2950 + }, + { + "epoch": 7.81, + "learning_rate": 1.1112600536193029e-05, + "loss": 0.001, + "step": 2951 + }, + { + "epoch": 7.81, + "learning_rate": 1.1099195710455764e-05, + "loss": 0.0331, + "step": 2952 + }, + { + "epoch": 7.81, + "learning_rate": 1.10857908847185e-05, + "loss": 0.0003, + "step": 2953 + }, + { + "epoch": 7.81, + "learning_rate": 1.1072386058981234e-05, + "loss": 0.0002, + "step": 2954 + }, + { + "epoch": 7.82, + "learning_rate": 1.105898123324397e-05, + "loss": 0.0305, + "step": 2955 + }, + { + "epoch": 7.82, + "learning_rate": 1.1045576407506703e-05, + "loss": 0.0023, + "step": 2956 + }, + { + "epoch": 7.82, + "learning_rate": 1.1032171581769438e-05, + "loss": 0.0359, + "step": 2957 + }, + { + "epoch": 7.83, + "learning_rate": 1.1018766756032173e-05, + "loss": 0.1075, + "step": 2958 + }, + { + "epoch": 7.83, + "learning_rate": 1.1005361930294907e-05, + "loss": 0.023, + "step": 2959 + }, + { + "epoch": 7.83, + "learning_rate": 1.0991957104557642e-05, + "loss": 0.1425, + "step": 2960 + }, + { + "epoch": 7.83, + "learning_rate": 1.0978552278820375e-05, + "loss": 0.4114, + "step": 2961 + }, + { + "epoch": 7.84, + "learning_rate": 1.096514745308311e-05, + "loss": 0.0003, + "step": 2962 + }, + { + "epoch": 7.84, + "learning_rate": 1.0951742627345846e-05, + "loss": 0.2824, + "step": 2963 + }, + { + "epoch": 7.84, + "learning_rate": 1.0938337801608579e-05, + "loss": 0.0002, + "step": 2964 + }, + { + "epoch": 7.84, + "learning_rate": 1.0924932975871314e-05, + "loss": 0.0004, + "step": 2965 + }, + { + "epoch": 7.85, + "learning_rate": 1.0911528150134048e-05, + "loss": 0.0002, + "step": 2966 + }, + { + "epoch": 7.85, + "learning_rate": 1.0898123324396783e-05, + "loss": 0.0003, + "step": 2967 + }, + { + "epoch": 7.85, + "learning_rate": 1.0884718498659518e-05, + "loss": 0.0003, + "step": 2968 + }, + { + "epoch": 7.85, + "learning_rate": 1.0871313672922253e-05, + "loss": 0.2122, + "step": 2969 + }, + { + "epoch": 7.86, + "learning_rate": 1.0857908847184988e-05, + "loss": 0.0002, + "step": 2970 + }, + { + "epoch": 7.86, + "learning_rate": 1.0844504021447721e-05, + "loss": 0.0003, + "step": 2971 + }, + { + "epoch": 7.86, + "learning_rate": 1.0831099195710457e-05, + "loss": 0.0002, + "step": 2972 + }, + { + "epoch": 7.87, + "learning_rate": 1.0817694369973192e-05, + "loss": 0.0002, + "step": 2973 + }, + { + "epoch": 7.87, + "learning_rate": 1.0804289544235925e-05, + "loss": 0.001, + "step": 2974 + }, + { + "epoch": 7.87, + "learning_rate": 1.079088471849866e-05, + "loss": 0.0002, + "step": 2975 + }, + { + "epoch": 7.87, + "learning_rate": 1.0777479892761394e-05, + "loss": 0.0004, + "step": 2976 + }, + { + "epoch": 7.88, + "learning_rate": 1.0764075067024129e-05, + "loss": 0.0003, + "step": 2977 + }, + { + "epoch": 7.88, + "learning_rate": 1.0750670241286864e-05, + "loss": 0.0003, + "step": 2978 + }, + { + "epoch": 7.88, + "learning_rate": 1.0737265415549597e-05, + "loss": 0.336, + "step": 2979 + }, + { + "epoch": 7.88, + "learning_rate": 1.0723860589812333e-05, + "loss": 0.0003, + "step": 2980 + }, + { + "epoch": 7.89, + "learning_rate": 1.0710455764075068e-05, + "loss": 0.0017, + "step": 2981 + }, + { + "epoch": 7.89, + "learning_rate": 1.0697050938337803e-05, + "loss": 0.1716, + "step": 2982 + }, + { + "epoch": 7.89, + "learning_rate": 1.0683646112600538e-05, + "loss": 0.0004, + "step": 2983 + }, + { + "epoch": 7.89, + "learning_rate": 1.0670241286863271e-05, + "loss": 0.0003, + "step": 2984 + }, + { + "epoch": 7.9, + "learning_rate": 1.0656836461126007e-05, + "loss": 0.1927, + "step": 2985 + }, + { + "epoch": 7.9, + "learning_rate": 1.064343163538874e-05, + "loss": 0.0003, + "step": 2986 + }, + { + "epoch": 7.9, + "learning_rate": 1.0630026809651475e-05, + "loss": 0.0002, + "step": 2987 + }, + { + "epoch": 7.9, + "learning_rate": 1.061662198391421e-05, + "loss": 0.2357, + "step": 2988 + }, + { + "epoch": 7.91, + "learning_rate": 1.0603217158176944e-05, + "loss": 0.464, + "step": 2989 + }, + { + "epoch": 7.91, + "learning_rate": 1.0589812332439679e-05, + "loss": 0.0015, + "step": 2990 + }, + { + "epoch": 7.91, + "learning_rate": 1.0576407506702414e-05, + "loss": 0.0792, + "step": 2991 + }, + { + "epoch": 7.92, + "learning_rate": 1.0563002680965147e-05, + "loss": 0.101, + "step": 2992 + }, + { + "epoch": 7.92, + "learning_rate": 1.0549597855227882e-05, + "loss": 0.0093, + "step": 2993 + }, + { + "epoch": 7.92, + "learning_rate": 1.0536193029490616e-05, + "loss": 0.0007, + "step": 2994 + }, + { + "epoch": 7.92, + "learning_rate": 1.0522788203753351e-05, + "loss": 0.0016, + "step": 2995 + }, + { + "epoch": 7.93, + "learning_rate": 1.0509383378016086e-05, + "loss": 0.0008, + "step": 2996 + }, + { + "epoch": 7.93, + "learning_rate": 1.0495978552278821e-05, + "loss": 0.0047, + "step": 2997 + }, + { + "epoch": 7.93, + "learning_rate": 1.0482573726541556e-05, + "loss": 0.0171, + "step": 2998 + }, + { + "epoch": 7.93, + "learning_rate": 1.046916890080429e-05, + "loss": 0.3023, + "step": 2999 + }, + { + "epoch": 7.94, + "learning_rate": 1.0455764075067025e-05, + "loss": 0.0011, + "step": 3000 + }, + { + "epoch": 7.94, + "learning_rate": 1.044235924932976e-05, + "loss": 0.0816, + "step": 3001 + }, + { + "epoch": 7.94, + "learning_rate": 1.0428954423592494e-05, + "loss": 0.0025, + "step": 3002 + }, + { + "epoch": 7.94, + "learning_rate": 1.0415549597855229e-05, + "loss": 0.0094, + "step": 3003 + }, + { + "epoch": 7.95, + "learning_rate": 1.0402144772117962e-05, + "loss": 0.0644, + "step": 3004 + }, + { + "epoch": 7.95, + "learning_rate": 1.0388739946380697e-05, + "loss": 0.3261, + "step": 3005 + }, + { + "epoch": 7.95, + "learning_rate": 1.0375335120643432e-05, + "loss": 0.1332, + "step": 3006 + }, + { + "epoch": 7.96, + "learning_rate": 1.0361930294906166e-05, + "loss": 0.0067, + "step": 3007 + }, + { + "epoch": 7.96, + "learning_rate": 1.0348525469168901e-05, + "loss": 0.0008, + "step": 3008 + }, + { + "epoch": 7.96, + "learning_rate": 1.0335120643431636e-05, + "loss": 0.174, + "step": 3009 + }, + { + "epoch": 7.96, + "learning_rate": 1.0321715817694371e-05, + "loss": 0.0005, + "step": 3010 + }, + { + "epoch": 7.97, + "learning_rate": 1.0308310991957106e-05, + "loss": 0.0505, + "step": 3011 + }, + { + "epoch": 7.97, + "learning_rate": 1.029490616621984e-05, + "loss": 0.0016, + "step": 3012 + }, + { + "epoch": 7.97, + "learning_rate": 1.0281501340482575e-05, + "loss": 0.1172, + "step": 3013 + }, + { + "epoch": 7.97, + "learning_rate": 1.0268096514745308e-05, + "loss": 0.0268, + "step": 3014 + }, + { + "epoch": 7.98, + "learning_rate": 1.0254691689008044e-05, + "loss": 0.0269, + "step": 3015 + }, + { + "epoch": 7.98, + "learning_rate": 1.0241286863270779e-05, + "loss": 0.0867, + "step": 3016 + }, + { + "epoch": 7.98, + "learning_rate": 1.0227882037533512e-05, + "loss": 0.1145, + "step": 3017 + }, + { + "epoch": 7.98, + "learning_rate": 1.0214477211796247e-05, + "loss": 0.0035, + "step": 3018 + }, + { + "epoch": 7.99, + "learning_rate": 1.0201072386058982e-05, + "loss": 0.0035, + "step": 3019 + }, + { + "epoch": 7.99, + "learning_rate": 1.0187667560321716e-05, + "loss": 0.0003, + "step": 3020 + }, + { + "epoch": 7.99, + "learning_rate": 1.0174262734584451e-05, + "loss": 0.14, + "step": 3021 + }, + { + "epoch": 7.99, + "learning_rate": 1.0160857908847184e-05, + "loss": 0.1619, + "step": 3022 + }, + { + "epoch": 8.0, + "learning_rate": 1.014745308310992e-05, + "loss": 0.0006, + "step": 3023 + }, + { + "epoch": 8.0, + "learning_rate": 1.0134048257372655e-05, + "loss": 0.0004, + "step": 3024 + }, + { + "epoch": 8.0, + "eval_f1": 0.7734138972809668, + "eval_loss": 1.2510614395141602, + "eval_runtime": 1.9043, + "eval_samples_per_second": 794.53, + "eval_steps_per_second": 49.888, + "step": 3024 + }, + { + "epoch": 8.0, + "learning_rate": 1.012064343163539e-05, + "loss": 0.0008, + "step": 3025 + }, + { + "epoch": 8.01, + "learning_rate": 1.0107238605898125e-05, + "loss": 0.0308, + "step": 3026 + }, + { + "epoch": 8.01, + "learning_rate": 1.0093833780160858e-05, + "loss": 0.1509, + "step": 3027 + }, + { + "epoch": 8.01, + "learning_rate": 1.0080428954423593e-05, + "loss": 0.0256, + "step": 3028 + }, + { + "epoch": 8.01, + "learning_rate": 1.0067024128686329e-05, + "loss": 0.0013, + "step": 3029 + }, + { + "epoch": 8.02, + "learning_rate": 1.0053619302949062e-05, + "loss": 0.0002, + "step": 3030 + }, + { + "epoch": 8.02, + "learning_rate": 1.0040214477211797e-05, + "loss": 0.0615, + "step": 3031 + }, + { + "epoch": 8.02, + "learning_rate": 1.002680965147453e-05, + "loss": 0.072, + "step": 3032 + }, + { + "epoch": 8.02, + "learning_rate": 1.0013404825737266e-05, + "loss": 0.0311, + "step": 3033 + }, + { + "epoch": 8.03, + "learning_rate": 1e-05, + "loss": 0.0115, + "step": 3034 + }, + { + "epoch": 8.03, + "learning_rate": 9.986595174262734e-06, + "loss": 0.0016, + "step": 3035 + }, + { + "epoch": 8.03, + "learning_rate": 9.97319034852547e-06, + "loss": 0.0006, + "step": 3036 + }, + { + "epoch": 8.03, + "learning_rate": 9.959785522788203e-06, + "loss": 0.0009, + "step": 3037 + }, + { + "epoch": 8.04, + "learning_rate": 9.946380697050938e-06, + "loss": 0.0002, + "step": 3038 + }, + { + "epoch": 8.04, + "learning_rate": 9.932975871313673e-06, + "loss": 0.0312, + "step": 3039 + }, + { + "epoch": 8.04, + "learning_rate": 9.919571045576408e-06, + "loss": 0.0007, + "step": 3040 + }, + { + "epoch": 8.04, + "learning_rate": 9.906166219839143e-06, + "loss": 0.0168, + "step": 3041 + }, + { + "epoch": 8.05, + "learning_rate": 9.892761394101877e-06, + "loss": 0.1056, + "step": 3042 + }, + { + "epoch": 8.05, + "learning_rate": 9.879356568364612e-06, + "loss": 0.0005, + "step": 3043 + }, + { + "epoch": 8.05, + "learning_rate": 9.865951742627347e-06, + "loss": 0.0003, + "step": 3044 + }, + { + "epoch": 8.06, + "learning_rate": 9.85254691689008e-06, + "loss": 0.0407, + "step": 3045 + }, + { + "epoch": 8.06, + "learning_rate": 9.839142091152816e-06, + "loss": 0.0013, + "step": 3046 + }, + { + "epoch": 8.06, + "learning_rate": 9.825737265415549e-06, + "loss": 0.033, + "step": 3047 + }, + { + "epoch": 8.06, + "learning_rate": 9.812332439678284e-06, + "loss": 0.0007, + "step": 3048 + }, + { + "epoch": 8.07, + "learning_rate": 9.79892761394102e-06, + "loss": 0.0356, + "step": 3049 + }, + { + "epoch": 8.07, + "learning_rate": 9.785522788203753e-06, + "loss": 0.0062, + "step": 3050 + }, + { + "epoch": 8.07, + "learning_rate": 9.772117962466488e-06, + "loss": 0.0005, + "step": 3051 + }, + { + "epoch": 8.07, + "learning_rate": 9.758713136729223e-06, + "loss": 0.0133, + "step": 3052 + }, + { + "epoch": 8.08, + "learning_rate": 9.745308310991958e-06, + "loss": 0.0022, + "step": 3053 + }, + { + "epoch": 8.08, + "learning_rate": 9.731903485254693e-06, + "loss": 0.0063, + "step": 3054 + }, + { + "epoch": 8.08, + "learning_rate": 9.718498659517427e-06, + "loss": 0.3304, + "step": 3055 + }, + { + "epoch": 8.08, + "learning_rate": 9.705093833780162e-06, + "loss": 0.0004, + "step": 3056 + }, + { + "epoch": 8.09, + "learning_rate": 9.691689008042897e-06, + "loss": 0.0335, + "step": 3057 + }, + { + "epoch": 8.09, + "learning_rate": 9.67828418230563e-06, + "loss": 0.1251, + "step": 3058 + }, + { + "epoch": 8.09, + "learning_rate": 9.664879356568366e-06, + "loss": 0.0011, + "step": 3059 + }, + { + "epoch": 8.1, + "learning_rate": 9.651474530831099e-06, + "loss": 0.0003, + "step": 3060 + }, + { + "epoch": 8.1, + "learning_rate": 9.638069705093834e-06, + "loss": 0.2906, + "step": 3061 + }, + { + "epoch": 8.1, + "learning_rate": 9.62466487935657e-06, + "loss": 0.0009, + "step": 3062 + }, + { + "epoch": 8.1, + "learning_rate": 9.611260053619303e-06, + "loss": 0.0005, + "step": 3063 + }, + { + "epoch": 8.11, + "learning_rate": 9.597855227882038e-06, + "loss": 0.0107, + "step": 3064 + }, + { + "epoch": 8.11, + "learning_rate": 9.584450402144771e-06, + "loss": 0.0594, + "step": 3065 + }, + { + "epoch": 8.11, + "learning_rate": 9.571045576407506e-06, + "loss": 0.0927, + "step": 3066 + }, + { + "epoch": 8.11, + "learning_rate": 9.557640750670241e-06, + "loss": 0.1164, + "step": 3067 + }, + { + "epoch": 8.12, + "learning_rate": 9.544235924932977e-06, + "loss": 0.0002, + "step": 3068 + }, + { + "epoch": 8.12, + "learning_rate": 9.530831099195712e-06, + "loss": 0.0004, + "step": 3069 + }, + { + "epoch": 8.12, + "learning_rate": 9.517426273458445e-06, + "loss": 0.0004, + "step": 3070 + }, + { + "epoch": 8.12, + "learning_rate": 9.50402144772118e-06, + "loss": 0.0128, + "step": 3071 + }, + { + "epoch": 8.13, + "learning_rate": 9.490616621983915e-06, + "loss": 0.0004, + "step": 3072 + }, + { + "epoch": 8.13, + "learning_rate": 9.477211796246649e-06, + "loss": 0.0003, + "step": 3073 + }, + { + "epoch": 8.13, + "learning_rate": 9.463806970509384e-06, + "loss": 0.0311, + "step": 3074 + }, + { + "epoch": 8.13, + "learning_rate": 9.450402144772117e-06, + "loss": 0.0204, + "step": 3075 + }, + { + "epoch": 8.14, + "learning_rate": 9.436997319034853e-06, + "loss": 0.0026, + "step": 3076 + }, + { + "epoch": 8.14, + "learning_rate": 9.423592493297588e-06, + "loss": 0.0008, + "step": 3077 + }, + { + "epoch": 8.14, + "learning_rate": 9.410187667560321e-06, + "loss": 0.1434, + "step": 3078 + }, + { + "epoch": 8.15, + "learning_rate": 9.396782841823056e-06, + "loss": 0.0005, + "step": 3079 + }, + { + "epoch": 8.15, + "learning_rate": 9.383378016085791e-06, + "loss": 0.0003, + "step": 3080 + }, + { + "epoch": 8.15, + "learning_rate": 9.369973190348527e-06, + "loss": 0.0002, + "step": 3081 + }, + { + "epoch": 8.15, + "learning_rate": 9.356568364611262e-06, + "loss": 0.0003, + "step": 3082 + }, + { + "epoch": 8.16, + "learning_rate": 9.343163538873995e-06, + "loss": 0.0476, + "step": 3083 + }, + { + "epoch": 8.16, + "learning_rate": 9.32975871313673e-06, + "loss": 0.0002, + "step": 3084 + }, + { + "epoch": 8.16, + "learning_rate": 9.316353887399465e-06, + "loss": 0.0004, + "step": 3085 + }, + { + "epoch": 8.16, + "learning_rate": 9.302949061662199e-06, + "loss": 0.0004, + "step": 3086 + }, + { + "epoch": 8.17, + "learning_rate": 9.289544235924934e-06, + "loss": 0.195, + "step": 3087 + }, + { + "epoch": 8.17, + "learning_rate": 9.276139410187667e-06, + "loss": 0.0258, + "step": 3088 + }, + { + "epoch": 8.17, + "learning_rate": 9.262734584450403e-06, + "loss": 0.0003, + "step": 3089 + }, + { + "epoch": 8.17, + "learning_rate": 9.249329758713138e-06, + "loss": 0.0582, + "step": 3090 + }, + { + "epoch": 8.18, + "learning_rate": 9.235924932975871e-06, + "loss": 0.0192, + "step": 3091 + }, + { + "epoch": 8.18, + "learning_rate": 9.222520107238606e-06, + "loss": 0.2512, + "step": 3092 + }, + { + "epoch": 8.18, + "learning_rate": 9.20911528150134e-06, + "loss": 0.0361, + "step": 3093 + }, + { + "epoch": 8.19, + "learning_rate": 9.195710455764075e-06, + "loss": 0.0003, + "step": 3094 + }, + { + "epoch": 8.19, + "learning_rate": 9.18230563002681e-06, + "loss": 0.0004, + "step": 3095 + }, + { + "epoch": 8.19, + "learning_rate": 9.168900804289545e-06, + "loss": 0.0002, + "step": 3096 + }, + { + "epoch": 8.19, + "learning_rate": 9.15549597855228e-06, + "loss": 0.0005, + "step": 3097 + }, + { + "epoch": 8.2, + "learning_rate": 9.142091152815014e-06, + "loss": 0.0427, + "step": 3098 + }, + { + "epoch": 8.2, + "learning_rate": 9.128686327077749e-06, + "loss": 0.0295, + "step": 3099 + }, + { + "epoch": 8.2, + "learning_rate": 9.115281501340484e-06, + "loss": 0.0144, + "step": 3100 + }, + { + "epoch": 8.2, + "learning_rate": 9.101876675603217e-06, + "loss": 0.001, + "step": 3101 + }, + { + "epoch": 8.21, + "learning_rate": 9.088471849865952e-06, + "loss": 0.0004, + "step": 3102 + }, + { + "epoch": 8.21, + "learning_rate": 9.075067024128686e-06, + "loss": 0.0005, + "step": 3103 + }, + { + "epoch": 8.21, + "learning_rate": 9.061662198391421e-06, + "loss": 0.1232, + "step": 3104 + }, + { + "epoch": 8.21, + "learning_rate": 9.048257372654156e-06, + "loss": 0.0004, + "step": 3105 + }, + { + "epoch": 8.22, + "learning_rate": 9.03485254691689e-06, + "loss": 0.004, + "step": 3106 + }, + { + "epoch": 8.22, + "learning_rate": 9.021447721179625e-06, + "loss": 0.0002, + "step": 3107 + }, + { + "epoch": 8.22, + "learning_rate": 9.00804289544236e-06, + "loss": 0.0067, + "step": 3108 + }, + { + "epoch": 8.22, + "learning_rate": 8.994638069705095e-06, + "loss": 0.0003, + "step": 3109 + }, + { + "epoch": 8.23, + "learning_rate": 8.98123324396783e-06, + "loss": 0.0344, + "step": 3110 + }, + { + "epoch": 8.23, + "learning_rate": 8.967828418230564e-06, + "loss": 0.0005, + "step": 3111 + }, + { + "epoch": 8.23, + "learning_rate": 8.954423592493299e-06, + "loss": 0.0029, + "step": 3112 + }, + { + "epoch": 8.24, + "learning_rate": 8.941018766756034e-06, + "loss": 0.0002, + "step": 3113 + }, + { + "epoch": 8.24, + "learning_rate": 8.927613941018767e-06, + "loss": 0.0003, + "step": 3114 + }, + { + "epoch": 8.24, + "learning_rate": 8.914209115281502e-06, + "loss": 0.0002, + "step": 3115 + }, + { + "epoch": 8.24, + "learning_rate": 8.900804289544236e-06, + "loss": 0.0197, + "step": 3116 + }, + { + "epoch": 8.25, + "learning_rate": 8.887399463806971e-06, + "loss": 0.0002, + "step": 3117 + }, + { + "epoch": 8.25, + "learning_rate": 8.873994638069706e-06, + "loss": 0.0003, + "step": 3118 + }, + { + "epoch": 8.25, + "learning_rate": 8.86058981233244e-06, + "loss": 0.097, + "step": 3119 + }, + { + "epoch": 8.25, + "learning_rate": 8.847184986595175e-06, + "loss": 0.0014, + "step": 3120 + }, + { + "epoch": 8.26, + "learning_rate": 8.833780160857908e-06, + "loss": 0.0004, + "step": 3121 + }, + { + "epoch": 8.26, + "learning_rate": 8.820375335120643e-06, + "loss": 0.0005, + "step": 3122 + }, + { + "epoch": 8.26, + "learning_rate": 8.806970509383378e-06, + "loss": 0.0814, + "step": 3123 + }, + { + "epoch": 8.26, + "learning_rate": 8.793565683646113e-06, + "loss": 0.0024, + "step": 3124 + }, + { + "epoch": 8.27, + "learning_rate": 8.780160857908849e-06, + "loss": 0.0003, + "step": 3125 + }, + { + "epoch": 8.27, + "learning_rate": 8.766756032171582e-06, + "loss": 0.0001, + "step": 3126 + }, + { + "epoch": 8.27, + "learning_rate": 8.753351206434317e-06, + "loss": 0.0003, + "step": 3127 + }, + { + "epoch": 8.28, + "learning_rate": 8.739946380697052e-06, + "loss": 0.3459, + "step": 3128 + }, + { + "epoch": 8.28, + "learning_rate": 8.726541554959786e-06, + "loss": 0.0639, + "step": 3129 + }, + { + "epoch": 8.28, + "learning_rate": 8.71313672922252e-06, + "loss": 0.001, + "step": 3130 + }, + { + "epoch": 8.28, + "learning_rate": 8.699731903485254e-06, + "loss": 0.0073, + "step": 3131 + }, + { + "epoch": 8.29, + "learning_rate": 8.68632707774799e-06, + "loss": 0.0002, + "step": 3132 + }, + { + "epoch": 8.29, + "learning_rate": 8.672922252010725e-06, + "loss": 0.0008, + "step": 3133 + }, + { + "epoch": 8.29, + "learning_rate": 8.659517426273458e-06, + "loss": 0.0001, + "step": 3134 + }, + { + "epoch": 8.29, + "learning_rate": 8.646112600536193e-06, + "loss": 0.0002, + "step": 3135 + }, + { + "epoch": 8.3, + "learning_rate": 8.632707774798928e-06, + "loss": 0.0028, + "step": 3136 + }, + { + "epoch": 8.3, + "learning_rate": 8.619302949061663e-06, + "loss": 0.02, + "step": 3137 + }, + { + "epoch": 8.3, + "learning_rate": 8.605898123324398e-06, + "loss": 0.046, + "step": 3138 + }, + { + "epoch": 8.3, + "learning_rate": 8.592493297587132e-06, + "loss": 0.0002, + "step": 3139 + }, + { + "epoch": 8.31, + "learning_rate": 8.579088471849867e-06, + "loss": 0.0002, + "step": 3140 + }, + { + "epoch": 8.31, + "learning_rate": 8.5656836461126e-06, + "loss": 0.0695, + "step": 3141 + }, + { + "epoch": 8.31, + "learning_rate": 8.552278820375336e-06, + "loss": 0.1764, + "step": 3142 + }, + { + "epoch": 8.31, + "learning_rate": 8.53887399463807e-06, + "loss": 0.0002, + "step": 3143 + }, + { + "epoch": 8.32, + "learning_rate": 8.525469168900804e-06, + "loss": 0.0004, + "step": 3144 + }, + { + "epoch": 8.32, + "learning_rate": 8.51206434316354e-06, + "loss": 0.0207, + "step": 3145 + }, + { + "epoch": 8.32, + "learning_rate": 8.498659517426274e-06, + "loss": 0.0003, + "step": 3146 + }, + { + "epoch": 8.33, + "learning_rate": 8.485254691689008e-06, + "loss": 0.1444, + "step": 3147 + }, + { + "epoch": 8.33, + "learning_rate": 8.471849865951743e-06, + "loss": 0.0006, + "step": 3148 + }, + { + "epoch": 8.33, + "learning_rate": 8.458445040214476e-06, + "loss": 0.0002, + "step": 3149 + }, + { + "epoch": 8.33, + "learning_rate": 8.445040214477212e-06, + "loss": 0.0003, + "step": 3150 + }, + { + "epoch": 8.34, + "learning_rate": 8.431635388739947e-06, + "loss": 0.0033, + "step": 3151 + }, + { + "epoch": 8.34, + "learning_rate": 8.418230563002682e-06, + "loss": 0.0001, + "step": 3152 + }, + { + "epoch": 8.34, + "learning_rate": 8.404825737265417e-06, + "loss": 0.0002, + "step": 3153 + }, + { + "epoch": 8.34, + "learning_rate": 8.39142091152815e-06, + "loss": 0.0003, + "step": 3154 + }, + { + "epoch": 8.35, + "learning_rate": 8.378016085790886e-06, + "loss": 0.0003, + "step": 3155 + }, + { + "epoch": 8.35, + "learning_rate": 8.36461126005362e-06, + "loss": 0.0002, + "step": 3156 + }, + { + "epoch": 8.35, + "learning_rate": 8.351206434316354e-06, + "loss": 0.0003, + "step": 3157 + }, + { + "epoch": 8.35, + "learning_rate": 8.33780160857909e-06, + "loss": 0.0022, + "step": 3158 + }, + { + "epoch": 8.36, + "learning_rate": 8.324396782841823e-06, + "loss": 0.0094, + "step": 3159 + }, + { + "epoch": 8.36, + "learning_rate": 8.310991957104558e-06, + "loss": 0.039, + "step": 3160 + }, + { + "epoch": 8.36, + "learning_rate": 8.297587131367293e-06, + "loss": 0.0623, + "step": 3161 + }, + { + "epoch": 8.37, + "learning_rate": 8.284182305630026e-06, + "loss": 0.0269, + "step": 3162 + }, + { + "epoch": 8.37, + "learning_rate": 8.270777479892762e-06, + "loss": 0.2292, + "step": 3163 + }, + { + "epoch": 8.37, + "learning_rate": 8.257372654155495e-06, + "loss": 0.0814, + "step": 3164 + }, + { + "epoch": 8.37, + "learning_rate": 8.24396782841823e-06, + "loss": 0.0002, + "step": 3165 + }, + { + "epoch": 8.38, + "learning_rate": 8.230563002680965e-06, + "loss": 0.0006, + "step": 3166 + }, + { + "epoch": 8.38, + "learning_rate": 8.2171581769437e-06, + "loss": 0.0405, + "step": 3167 + }, + { + "epoch": 8.38, + "learning_rate": 8.203753351206435e-06, + "loss": 0.3745, + "step": 3168 + }, + { + "epoch": 8.38, + "learning_rate": 8.190348525469169e-06, + "loss": 0.0002, + "step": 3169 + }, + { + "epoch": 8.39, + "learning_rate": 8.176943699731904e-06, + "loss": 0.0007, + "step": 3170 + }, + { + "epoch": 8.39, + "learning_rate": 8.16353887399464e-06, + "loss": 0.0054, + "step": 3171 + }, + { + "epoch": 8.39, + "learning_rate": 8.150134048257373e-06, + "loss": 0.0027, + "step": 3172 + }, + { + "epoch": 8.39, + "learning_rate": 8.136729222520108e-06, + "loss": 0.0823, + "step": 3173 + }, + { + "epoch": 8.4, + "learning_rate": 8.123324396782843e-06, + "loss": 0.2821, + "step": 3174 + }, + { + "epoch": 8.4, + "learning_rate": 8.109919571045576e-06, + "loss": 0.2796, + "step": 3175 + }, + { + "epoch": 8.4, + "learning_rate": 8.096514745308311e-06, + "loss": 0.0004, + "step": 3176 + }, + { + "epoch": 8.4, + "learning_rate": 8.083109919571045e-06, + "loss": 0.0019, + "step": 3177 + }, + { + "epoch": 8.41, + "learning_rate": 8.06970509383378e-06, + "loss": 0.0003, + "step": 3178 + }, + { + "epoch": 8.41, + "learning_rate": 8.056300268096515e-06, + "loss": 0.0331, + "step": 3179 + }, + { + "epoch": 8.41, + "learning_rate": 8.04289544235925e-06, + "loss": 0.0002, + "step": 3180 + }, + { + "epoch": 8.42, + "learning_rate": 8.029490616621985e-06, + "loss": 0.002, + "step": 3181 + }, + { + "epoch": 8.42, + "learning_rate": 8.016085790884719e-06, + "loss": 0.0055, + "step": 3182 + }, + { + "epoch": 8.42, + "learning_rate": 8.002680965147454e-06, + "loss": 0.0002, + "step": 3183 + }, + { + "epoch": 8.42, + "learning_rate": 7.989276139410189e-06, + "loss": 0.0252, + "step": 3184 + }, + { + "epoch": 8.43, + "learning_rate": 7.975871313672923e-06, + "loss": 0.0005, + "step": 3185 + }, + { + "epoch": 8.43, + "learning_rate": 7.962466487935658e-06, + "loss": 0.0309, + "step": 3186 + }, + { + "epoch": 8.43, + "learning_rate": 7.949061662198391e-06, + "loss": 0.4315, + "step": 3187 + }, + { + "epoch": 8.43, + "learning_rate": 7.935656836461126e-06, + "loss": 0.0018, + "step": 3188 + }, + { + "epoch": 8.44, + "learning_rate": 7.922252010723861e-06, + "loss": 0.0973, + "step": 3189 + }, + { + "epoch": 8.44, + "learning_rate": 7.908847184986595e-06, + "loss": 0.0002, + "step": 3190 + }, + { + "epoch": 8.44, + "learning_rate": 7.89544235924933e-06, + "loss": 0.0123, + "step": 3191 + }, + { + "epoch": 8.44, + "learning_rate": 7.882037533512063e-06, + "loss": 0.0005, + "step": 3192 + }, + { + "epoch": 8.45, + "learning_rate": 7.868632707774798e-06, + "loss": 0.0002, + "step": 3193 + }, + { + "epoch": 8.45, + "learning_rate": 7.855227882037534e-06, + "loss": 0.002, + "step": 3194 + }, + { + "epoch": 8.45, + "learning_rate": 7.841823056300269e-06, + "loss": 0.0002, + "step": 3195 + }, + { + "epoch": 8.46, + "learning_rate": 7.828418230563004e-06, + "loss": 0.0006, + "step": 3196 + }, + { + "epoch": 8.46, + "learning_rate": 7.815013404825737e-06, + "loss": 0.0669, + "step": 3197 + }, + { + "epoch": 8.46, + "learning_rate": 7.801608579088472e-06, + "loss": 0.0002, + "step": 3198 + }, + { + "epoch": 8.46, + "learning_rate": 7.788203753351208e-06, + "loss": 0.0052, + "step": 3199 + }, + { + "epoch": 8.47, + "learning_rate": 7.774798927613941e-06, + "loss": 0.1126, + "step": 3200 + }, + { + "epoch": 8.47, + "learning_rate": 7.761394101876676e-06, + "loss": 0.0362, + "step": 3201 + }, + { + "epoch": 8.47, + "learning_rate": 7.74798927613941e-06, + "loss": 0.0002, + "step": 3202 + }, + { + "epoch": 8.47, + "learning_rate": 7.734584450402145e-06, + "loss": 0.1147, + "step": 3203 + }, + { + "epoch": 8.48, + "learning_rate": 7.72117962466488e-06, + "loss": 0.0002, + "step": 3204 + }, + { + "epoch": 8.48, + "learning_rate": 7.707774798927613e-06, + "loss": 0.323, + "step": 3205 + }, + { + "epoch": 8.48, + "learning_rate": 7.694369973190348e-06, + "loss": 0.0019, + "step": 3206 + }, + { + "epoch": 8.48, + "learning_rate": 7.680965147453084e-06, + "loss": 0.0002, + "step": 3207 + }, + { + "epoch": 8.49, + "learning_rate": 7.667560321715819e-06, + "loss": 0.0003, + "step": 3208 + }, + { + "epoch": 8.49, + "learning_rate": 7.654155495978554e-06, + "loss": 0.0313, + "step": 3209 + }, + { + "epoch": 8.49, + "learning_rate": 7.640750670241287e-06, + "loss": 0.0002, + "step": 3210 + }, + { + "epoch": 8.49, + "learning_rate": 7.6273458445040215e-06, + "loss": 0.0011, + "step": 3211 + }, + { + "epoch": 8.5, + "learning_rate": 7.613941018766757e-06, + "loss": 0.0007, + "step": 3212 + }, + { + "epoch": 8.5, + "learning_rate": 7.600536193029491e-06, + "loss": 0.0003, + "step": 3213 + }, + { + "epoch": 8.5, + "learning_rate": 7.587131367292226e-06, + "loss": 0.0002, + "step": 3214 + }, + { + "epoch": 8.51, + "learning_rate": 7.5737265415549595e-06, + "loss": 0.0002, + "step": 3215 + }, + { + "epoch": 8.51, + "learning_rate": 7.560321715817695e-06, + "loss": 0.0002, + "step": 3216 + }, + { + "epoch": 8.51, + "learning_rate": 7.54691689008043e-06, + "loss": 0.029, + "step": 3217 + }, + { + "epoch": 8.51, + "learning_rate": 7.533512064343164e-06, + "loss": 0.0009, + "step": 3218 + }, + { + "epoch": 8.52, + "learning_rate": 7.520107238605899e-06, + "loss": 0.3479, + "step": 3219 + }, + { + "epoch": 8.52, + "learning_rate": 7.506702412868633e-06, + "loss": 0.0002, + "step": 3220 + }, + { + "epoch": 8.52, + "learning_rate": 7.493297587131368e-06, + "loss": 0.0013, + "step": 3221 + }, + { + "epoch": 8.52, + "learning_rate": 7.479892761394103e-06, + "loss": 0.0263, + "step": 3222 + }, + { + "epoch": 8.53, + "learning_rate": 7.466487935656836e-06, + "loss": 0.0676, + "step": 3223 + }, + { + "epoch": 8.53, + "learning_rate": 7.4530831099195715e-06, + "loss": 0.0002, + "step": 3224 + }, + { + "epoch": 8.53, + "learning_rate": 7.439678284182306e-06, + "loss": 0.0002, + "step": 3225 + }, + { + "epoch": 8.53, + "learning_rate": 7.426273458445041e-06, + "loss": 0.0007, + "step": 3226 + }, + { + "epoch": 8.54, + "learning_rate": 7.412868632707776e-06, + "loss": 0.1148, + "step": 3227 + }, + { + "epoch": 8.54, + "learning_rate": 7.3994638069705094e-06, + "loss": 0.0002, + "step": 3228 + }, + { + "epoch": 8.54, + "learning_rate": 7.3860589812332446e-06, + "loss": 0.0929, + "step": 3229 + }, + { + "epoch": 8.54, + "learning_rate": 7.372654155495978e-06, + "loss": 0.0002, + "step": 3230 + }, + { + "epoch": 8.55, + "learning_rate": 7.359249329758713e-06, + "loss": 0.0282, + "step": 3231 + }, + { + "epoch": 8.55, + "learning_rate": 7.345844504021448e-06, + "loss": 0.1421, + "step": 3232 + }, + { + "epoch": 8.55, + "learning_rate": 7.3324396782841825e-06, + "loss": 0.0002, + "step": 3233 + }, + { + "epoch": 8.56, + "learning_rate": 7.319034852546918e-06, + "loss": 0.0175, + "step": 3234 + }, + { + "epoch": 8.56, + "learning_rate": 7.305630026809651e-06, + "loss": 0.0763, + "step": 3235 + }, + { + "epoch": 8.56, + "learning_rate": 7.292225201072386e-06, + "loss": 0.0003, + "step": 3236 + }, + { + "epoch": 8.56, + "learning_rate": 7.278820375335121e-06, + "loss": 0.0522, + "step": 3237 + }, + { + "epoch": 8.57, + "learning_rate": 7.265415549597855e-06, + "loss": 0.0264, + "step": 3238 + }, + { + "epoch": 8.57, + "learning_rate": 7.25201072386059e-06, + "loss": 0.0007, + "step": 3239 + }, + { + "epoch": 8.57, + "learning_rate": 7.238605898123325e-06, + "loss": 0.0002, + "step": 3240 + }, + { + "epoch": 8.57, + "learning_rate": 7.225201072386059e-06, + "loss": 0.0433, + "step": 3241 + }, + { + "epoch": 8.58, + "learning_rate": 7.2117962466487945e-06, + "loss": 0.0897, + "step": 3242 + }, + { + "epoch": 8.58, + "learning_rate": 7.198391420911528e-06, + "loss": 0.0601, + "step": 3243 + }, + { + "epoch": 8.58, + "learning_rate": 7.184986595174263e-06, + "loss": 0.0084, + "step": 3244 + }, + { + "epoch": 8.58, + "learning_rate": 7.171581769436998e-06, + "loss": 0.0224, + "step": 3245 + }, + { + "epoch": 8.59, + "learning_rate": 7.158176943699732e-06, + "loss": 0.0022, + "step": 3246 + }, + { + "epoch": 8.59, + "learning_rate": 7.144772117962467e-06, + "loss": 0.0019, + "step": 3247 + }, + { + "epoch": 8.59, + "learning_rate": 7.131367292225201e-06, + "loss": 0.0003, + "step": 3248 + }, + { + "epoch": 8.6, + "learning_rate": 7.117962466487936e-06, + "loss": 0.0002, + "step": 3249 + }, + { + "epoch": 8.6, + "learning_rate": 7.104557640750671e-06, + "loss": 0.0003, + "step": 3250 + }, + { + "epoch": 8.6, + "learning_rate": 7.091152815013405e-06, + "loss": 0.1219, + "step": 3251 + }, + { + "epoch": 8.6, + "learning_rate": 7.07774798927614e-06, + "loss": 0.0246, + "step": 3252 + }, + { + "epoch": 8.61, + "learning_rate": 7.064343163538874e-06, + "loss": 0.0005, + "step": 3253 + }, + { + "epoch": 8.61, + "learning_rate": 7.050938337801609e-06, + "loss": 0.0004, + "step": 3254 + }, + { + "epoch": 8.61, + "learning_rate": 7.037533512064344e-06, + "loss": 0.0002, + "step": 3255 + }, + { + "epoch": 8.61, + "learning_rate": 7.024128686327078e-06, + "loss": 0.0722, + "step": 3256 + }, + { + "epoch": 8.62, + "learning_rate": 7.010723860589813e-06, + "loss": 0.0006, + "step": 3257 + }, + { + "epoch": 8.62, + "learning_rate": 6.997319034852546e-06, + "loss": 0.0775, + "step": 3258 + }, + { + "epoch": 8.62, + "learning_rate": 6.9839142091152815e-06, + "loss": 0.1329, + "step": 3259 + }, + { + "epoch": 8.62, + "learning_rate": 6.970509383378017e-06, + "loss": 0.0318, + "step": 3260 + }, + { + "epoch": 8.63, + "learning_rate": 6.957104557640751e-06, + "loss": 0.0026, + "step": 3261 + }, + { + "epoch": 8.63, + "learning_rate": 6.943699731903486e-06, + "loss": 0.0119, + "step": 3262 + }, + { + "epoch": 8.63, + "learning_rate": 6.9302949061662195e-06, + "loss": 0.0194, + "step": 3263 + }, + { + "epoch": 8.63, + "learning_rate": 6.916890080428955e-06, + "loss": 0.1102, + "step": 3264 + }, + { + "epoch": 8.64, + "learning_rate": 6.90348525469169e-06, + "loss": 0.0002, + "step": 3265 + }, + { + "epoch": 8.64, + "learning_rate": 6.890080428954423e-06, + "loss": 0.0322, + "step": 3266 + }, + { + "epoch": 8.64, + "learning_rate": 6.876675603217158e-06, + "loss": 0.0083, + "step": 3267 + }, + { + "epoch": 8.65, + "learning_rate": 6.8632707774798935e-06, + "loss": 0.0947, + "step": 3268 + }, + { + "epoch": 8.65, + "learning_rate": 6.849865951742628e-06, + "loss": 0.0002, + "step": 3269 + }, + { + "epoch": 8.65, + "learning_rate": 6.836461126005363e-06, + "loss": 0.0039, + "step": 3270 + }, + { + "epoch": 8.65, + "learning_rate": 6.823056300268096e-06, + "loss": 0.106, + "step": 3271 + }, + { + "epoch": 8.66, + "learning_rate": 6.8096514745308315e-06, + "loss": 0.0107, + "step": 3272 + }, + { + "epoch": 8.66, + "learning_rate": 6.796246648793567e-06, + "loss": 0.0005, + "step": 3273 + }, + { + "epoch": 8.66, + "learning_rate": 6.7828418230563e-06, + "loss": 0.0066, + "step": 3274 + }, + { + "epoch": 8.66, + "learning_rate": 6.769436997319035e-06, + "loss": 0.0003, + "step": 3275 + }, + { + "epoch": 8.67, + "learning_rate": 6.7560321715817694e-06, + "loss": 0.0002, + "step": 3276 + }, + { + "epoch": 8.67, + "learning_rate": 6.742627345844505e-06, + "loss": 0.0008, + "step": 3277 + }, + { + "epoch": 8.67, + "learning_rate": 6.72922252010724e-06, + "loss": 0.0002, + "step": 3278 + }, + { + "epoch": 8.67, + "learning_rate": 6.715817694369973e-06, + "loss": 0.0002, + "step": 3279 + }, + { + "epoch": 8.68, + "learning_rate": 6.702412868632708e-06, + "loss": 0.0472, + "step": 3280 + }, + { + "epoch": 8.68, + "learning_rate": 6.6890080428954426e-06, + "loss": 0.0003, + "step": 3281 + }, + { + "epoch": 8.68, + "learning_rate": 6.675603217158178e-06, + "loss": 0.196, + "step": 3282 + }, + { + "epoch": 8.69, + "learning_rate": 6.662198391420913e-06, + "loss": 0.0351, + "step": 3283 + }, + { + "epoch": 8.69, + "learning_rate": 6.648793565683646e-06, + "loss": 0.0002, + "step": 3284 + }, + { + "epoch": 8.69, + "learning_rate": 6.635388739946381e-06, + "loss": 0.1151, + "step": 3285 + }, + { + "epoch": 8.69, + "learning_rate": 6.621983914209115e-06, + "loss": 0.0057, + "step": 3286 + }, + { + "epoch": 8.7, + "learning_rate": 6.60857908847185e-06, + "loss": 0.0002, + "step": 3287 + }, + { + "epoch": 8.7, + "learning_rate": 6.595174262734585e-06, + "loss": 0.0002, + "step": 3288 + }, + { + "epoch": 8.7, + "learning_rate": 6.581769436997319e-06, + "loss": 0.0774, + "step": 3289 + }, + { + "epoch": 8.7, + "learning_rate": 6.5683646112600545e-06, + "loss": 0.0004, + "step": 3290 + }, + { + "epoch": 8.71, + "learning_rate": 6.554959785522788e-06, + "loss": 0.4666, + "step": 3291 + }, + { + "epoch": 8.71, + "learning_rate": 6.541554959785523e-06, + "loss": 0.0003, + "step": 3292 + }, + { + "epoch": 8.71, + "learning_rate": 6.528150134048258e-06, + "loss": 0.0002, + "step": 3293 + }, + { + "epoch": 8.71, + "learning_rate": 6.514745308310992e-06, + "loss": 0.003, + "step": 3294 + }, + { + "epoch": 8.72, + "learning_rate": 6.501340482573727e-06, + "loss": 0.0009, + "step": 3295 + }, + { + "epoch": 8.72, + "learning_rate": 6.487935656836461e-06, + "loss": 0.0255, + "step": 3296 + }, + { + "epoch": 8.72, + "learning_rate": 6.474530831099196e-06, + "loss": 0.026, + "step": 3297 + }, + { + "epoch": 8.72, + "learning_rate": 6.461126005361931e-06, + "loss": 0.0011, + "step": 3298 + }, + { + "epoch": 8.73, + "learning_rate": 6.447721179624665e-06, + "loss": 0.0003, + "step": 3299 + }, + { + "epoch": 8.73, + "learning_rate": 6.4343163538874e-06, + "loss": 0.0001, + "step": 3300 + }, + { + "epoch": 8.73, + "learning_rate": 6.420911528150135e-06, + "loss": 0.0002, + "step": 3301 + }, + { + "epoch": 8.74, + "learning_rate": 6.4075067024128684e-06, + "loss": 0.0374, + "step": 3302 + }, + { + "epoch": 8.74, + "learning_rate": 6.3941018766756036e-06, + "loss": 0.0003, + "step": 3303 + }, + { + "epoch": 8.74, + "learning_rate": 6.380697050938338e-06, + "loss": 0.0003, + "step": 3304 + }, + { + "epoch": 8.74, + "learning_rate": 6.367292225201073e-06, + "loss": 0.0003, + "step": 3305 + }, + { + "epoch": 8.75, + "learning_rate": 6.353887399463808e-06, + "loss": 0.0002, + "step": 3306 + }, + { + "epoch": 8.75, + "learning_rate": 6.3404825737265416e-06, + "loss": 0.0002, + "step": 3307 + }, + { + "epoch": 8.75, + "learning_rate": 6.327077747989277e-06, + "loss": 0.0003, + "step": 3308 + }, + { + "epoch": 8.75, + "learning_rate": 6.31367292225201e-06, + "loss": 0.0002, + "step": 3309 + }, + { + "epoch": 8.76, + "learning_rate": 6.300268096514745e-06, + "loss": 0.0005, + "step": 3310 + }, + { + "epoch": 8.76, + "learning_rate": 6.28686327077748e-06, + "loss": 0.0003, + "step": 3311 + }, + { + "epoch": 8.76, + "learning_rate": 6.273458445040215e-06, + "loss": 0.0002, + "step": 3312 + }, + { + "epoch": 8.76, + "learning_rate": 6.26005361930295e-06, + "loss": 0.0848, + "step": 3313 + }, + { + "epoch": 8.77, + "learning_rate": 6.246648793565684e-06, + "loss": 0.0002, + "step": 3314 + }, + { + "epoch": 8.77, + "learning_rate": 6.233243967828418e-06, + "loss": 0.021, + "step": 3315 + }, + { + "epoch": 8.77, + "learning_rate": 6.219839142091153e-06, + "loss": 0.2761, + "step": 3316 + }, + { + "epoch": 8.78, + "learning_rate": 6.206434316353888e-06, + "loss": 0.0002, + "step": 3317 + }, + { + "epoch": 8.78, + "learning_rate": 6.193029490616623e-06, + "loss": 0.0309, + "step": 3318 + }, + { + "epoch": 8.78, + "learning_rate": 6.179624664879357e-06, + "loss": 0.0004, + "step": 3319 + }, + { + "epoch": 8.78, + "learning_rate": 6.1662198391420915e-06, + "loss": 0.0003, + "step": 3320 + }, + { + "epoch": 8.79, + "learning_rate": 6.152815013404826e-06, + "loss": 0.0059, + "step": 3321 + }, + { + "epoch": 8.79, + "learning_rate": 6.139410187667561e-06, + "loss": 0.0525, + "step": 3322 + }, + { + "epoch": 8.79, + "learning_rate": 6.126005361930295e-06, + "loss": 0.0002, + "step": 3323 + }, + { + "epoch": 8.79, + "learning_rate": 6.1126005361930295e-06, + "loss": 0.0002, + "step": 3324 + }, + { + "epoch": 8.8, + "learning_rate": 6.099195710455765e-06, + "loss": 0.0003, + "step": 3325 + }, + { + "epoch": 8.8, + "learning_rate": 6.085790884718499e-06, + "loss": 0.0026, + "step": 3326 + }, + { + "epoch": 8.8, + "learning_rate": 6.072386058981234e-06, + "loss": 0.0003, + "step": 3327 + }, + { + "epoch": 8.8, + "learning_rate": 6.058981233243968e-06, + "loss": 0.0137, + "step": 3328 + }, + { + "epoch": 8.81, + "learning_rate": 6.0455764075067026e-06, + "loss": 0.0003, + "step": 3329 + }, + { + "epoch": 8.81, + "learning_rate": 6.032171581769437e-06, + "loss": 0.0002, + "step": 3330 + }, + { + "epoch": 8.81, + "learning_rate": 6.018766756032172e-06, + "loss": 0.0003, + "step": 3331 + }, + { + "epoch": 8.81, + "learning_rate": 6.005361930294907e-06, + "loss": 0.0003, + "step": 3332 + }, + { + "epoch": 8.82, + "learning_rate": 5.991957104557641e-06, + "loss": 0.5064, + "step": 3333 + }, + { + "epoch": 8.82, + "learning_rate": 5.978552278820376e-06, + "loss": 0.0003, + "step": 3334 + }, + { + "epoch": 8.82, + "learning_rate": 5.96514745308311e-06, + "loss": 0.0064, + "step": 3335 + }, + { + "epoch": 8.83, + "learning_rate": 5.951742627345844e-06, + "loss": 0.0001, + "step": 3336 + }, + { + "epoch": 8.83, + "learning_rate": 5.938337801608579e-06, + "loss": 0.0003, + "step": 3337 + }, + { + "epoch": 8.83, + "learning_rate": 5.924932975871314e-06, + "loss": 0.0002, + "step": 3338 + }, + { + "epoch": 8.83, + "learning_rate": 5.911528150134049e-06, + "loss": 0.0004, + "step": 3339 + }, + { + "epoch": 8.84, + "learning_rate": 5.898123324396783e-06, + "loss": 0.0005, + "step": 3340 + }, + { + "epoch": 8.84, + "learning_rate": 5.884718498659518e-06, + "loss": 0.1297, + "step": 3341 + }, + { + "epoch": 8.84, + "learning_rate": 5.8713136729222525e-06, + "loss": 0.0324, + "step": 3342 + }, + { + "epoch": 8.84, + "learning_rate": 5.857908847184987e-06, + "loss": 0.0208, + "step": 3343 + }, + { + "epoch": 8.85, + "learning_rate": 5.844504021447721e-06, + "loss": 0.065, + "step": 3344 + }, + { + "epoch": 8.85, + "learning_rate": 5.831099195710455e-06, + "loss": 0.0323, + "step": 3345 + }, + { + "epoch": 8.85, + "learning_rate": 5.8176943699731905e-06, + "loss": 0.1872, + "step": 3346 + }, + { + "epoch": 8.85, + "learning_rate": 5.804289544235926e-06, + "loss": 0.0251, + "step": 3347 + }, + { + "epoch": 8.86, + "learning_rate": 5.79088471849866e-06, + "loss": 0.0002, + "step": 3348 + }, + { + "epoch": 8.86, + "learning_rate": 5.777479892761394e-06, + "loss": 0.1384, + "step": 3349 + }, + { + "epoch": 8.86, + "learning_rate": 5.7640750670241285e-06, + "loss": 0.0006, + "step": 3350 + }, + { + "epoch": 8.87, + "learning_rate": 5.750670241286864e-06, + "loss": 0.0004, + "step": 3351 + }, + { + "epoch": 8.87, + "learning_rate": 5.737265415549598e-06, + "loss": 0.0121, + "step": 3352 + }, + { + "epoch": 8.87, + "learning_rate": 5.723860589812333e-06, + "loss": 0.0007, + "step": 3353 + }, + { + "epoch": 8.87, + "learning_rate": 5.710455764075067e-06, + "loss": 0.0003, + "step": 3354 + }, + { + "epoch": 8.88, + "learning_rate": 5.697050938337802e-06, + "loss": 0.0003, + "step": 3355 + }, + { + "epoch": 8.88, + "learning_rate": 5.683646112600537e-06, + "loss": 0.009, + "step": 3356 + }, + { + "epoch": 8.88, + "learning_rate": 5.670241286863271e-06, + "loss": 0.0004, + "step": 3357 + }, + { + "epoch": 8.88, + "learning_rate": 5.656836461126005e-06, + "loss": 0.0009, + "step": 3358 + }, + { + "epoch": 8.89, + "learning_rate": 5.6434316353887395e-06, + "loss": 0.0005, + "step": 3359 + }, + { + "epoch": 8.89, + "learning_rate": 5.630026809651475e-06, + "loss": 0.0002, + "step": 3360 + }, + { + "epoch": 8.89, + "learning_rate": 5.61662198391421e-06, + "loss": 0.0319, + "step": 3361 + }, + { + "epoch": 8.89, + "learning_rate": 5.603217158176944e-06, + "loss": 0.0955, + "step": 3362 + }, + { + "epoch": 8.9, + "learning_rate": 5.589812332439678e-06, + "loss": 0.0706, + "step": 3363 + }, + { + "epoch": 8.9, + "learning_rate": 5.576407506702413e-06, + "loss": 0.0072, + "step": 3364 + }, + { + "epoch": 8.9, + "learning_rate": 5.563002680965148e-06, + "loss": 0.0002, + "step": 3365 + }, + { + "epoch": 8.9, + "learning_rate": 5.549597855227882e-06, + "loss": 0.0002, + "step": 3366 + }, + { + "epoch": 8.91, + "learning_rate": 5.536193029490617e-06, + "loss": 0.0018, + "step": 3367 + }, + { + "epoch": 8.91, + "learning_rate": 5.5227882037533515e-06, + "loss": 0.0002, + "step": 3368 + }, + { + "epoch": 8.91, + "learning_rate": 5.509383378016087e-06, + "loss": 0.0002, + "step": 3369 + }, + { + "epoch": 8.92, + "learning_rate": 5.495978552278821e-06, + "loss": 0.0001, + "step": 3370 + }, + { + "epoch": 8.92, + "learning_rate": 5.482573726541555e-06, + "loss": 0.0414, + "step": 3371 + }, + { + "epoch": 8.92, + "learning_rate": 5.4691689008042895e-06, + "loss": 0.0107, + "step": 3372 + }, + { + "epoch": 8.92, + "learning_rate": 5.455764075067024e-06, + "loss": 0.0252, + "step": 3373 + }, + { + "epoch": 8.93, + "learning_rate": 5.442359249329759e-06, + "loss": 0.0918, + "step": 3374 + }, + { + "epoch": 8.93, + "learning_rate": 5.428954423592494e-06, + "loss": 0.0016, + "step": 3375 + }, + { + "epoch": 8.93, + "learning_rate": 5.415549597855228e-06, + "loss": 0.0001, + "step": 3376 + }, + { + "epoch": 8.93, + "learning_rate": 5.402144772117963e-06, + "loss": 0.0002, + "step": 3377 + }, + { + "epoch": 8.94, + "learning_rate": 5.388739946380697e-06, + "loss": 0.0003, + "step": 3378 + }, + { + "epoch": 8.94, + "learning_rate": 5.375335120643432e-06, + "loss": 0.0002, + "step": 3379 + }, + { + "epoch": 8.94, + "learning_rate": 5.361930294906166e-06, + "loss": 0.0722, + "step": 3380 + }, + { + "epoch": 8.94, + "learning_rate": 5.348525469168901e-06, + "loss": 0.0002, + "step": 3381 + }, + { + "epoch": 8.95, + "learning_rate": 5.335120643431636e-06, + "loss": 0.0002, + "step": 3382 + }, + { + "epoch": 8.95, + "learning_rate": 5.32171581769437e-06, + "loss": 0.0001, + "step": 3383 + }, + { + "epoch": 8.95, + "learning_rate": 5.308310991957105e-06, + "loss": 0.0002, + "step": 3384 + }, + { + "epoch": 8.96, + "learning_rate": 5.294906166219839e-06, + "loss": 0.1, + "step": 3385 + }, + { + "epoch": 8.96, + "learning_rate": 5.281501340482574e-06, + "loss": 0.1151, + "step": 3386 + }, + { + "epoch": 8.96, + "learning_rate": 5.268096514745308e-06, + "loss": 0.0003, + "step": 3387 + }, + { + "epoch": 8.96, + "learning_rate": 5.254691689008043e-06, + "loss": 0.0001, + "step": 3388 + }, + { + "epoch": 8.97, + "learning_rate": 5.241286863270778e-06, + "loss": 0.0003, + "step": 3389 + }, + { + "epoch": 8.97, + "learning_rate": 5.2278820375335125e-06, + "loss": 0.0002, + "step": 3390 + }, + { + "epoch": 8.97, + "learning_rate": 5.214477211796247e-06, + "loss": 0.0896, + "step": 3391 + }, + { + "epoch": 8.97, + "learning_rate": 5.201072386058981e-06, + "loss": 0.0002, + "step": 3392 + }, + { + "epoch": 8.98, + "learning_rate": 5.187667560321716e-06, + "loss": 0.0003, + "step": 3393 + }, + { + "epoch": 8.98, + "learning_rate": 5.1742627345844505e-06, + "loss": 0.0003, + "step": 3394 + }, + { + "epoch": 8.98, + "learning_rate": 5.160857908847186e-06, + "loss": 0.0008, + "step": 3395 + }, + { + "epoch": 8.98, + "learning_rate": 5.14745308310992e-06, + "loss": 0.4041, + "step": 3396 + }, + { + "epoch": 8.99, + "learning_rate": 5.134048257372654e-06, + "loss": 0.0253, + "step": 3397 + }, + { + "epoch": 8.99, + "learning_rate": 5.120643431635389e-06, + "loss": 0.0355, + "step": 3398 + }, + { + "epoch": 8.99, + "learning_rate": 5.107238605898124e-06, + "loss": 0.0771, + "step": 3399 + }, + { + "epoch": 8.99, + "learning_rate": 5.093833780160858e-06, + "loss": 0.2133, + "step": 3400 + }, + { + "epoch": 9.0, + "learning_rate": 5.080428954423592e-06, + "loss": 0.0002, + "step": 3401 + }, + { + "epoch": 9.0, + "learning_rate": 5.067024128686327e-06, + "loss": 0.0022, + "step": 3402 + }, + { + "epoch": 9.0, + "eval_f1": 0.774885145482389, + "eval_loss": 1.3973581790924072, + "eval_runtime": 1.8849, + "eval_samples_per_second": 802.683, + "eval_steps_per_second": 50.4, + "step": 3402 + } + ], + "max_steps": 3780, + "num_train_epochs": 10, + "total_flos": 871461619695744.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3402/training_args.bin b/checkpoint-3402/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e04ed002938f760694506615e2c2b7be439a9c1 --- /dev/null +++ b/checkpoint-3402/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c01d8e107b4a20c0ba0f3692dae4e25d8f1dffe1d23d6e4f4bdf92b87ab5ea +size 3899 diff --git a/checkpoint-378/config.json b/checkpoint-378/config.json new file mode 100644 index 0000000000000000000000000000000000000000..364156e83c34ba8c6fcc66e875a05b1d1a9b4821 --- /dev/null +++ b/checkpoint-378/config.json @@ -0,0 +1,30 @@ +{ + "_name_or_path": "distilbert-base-cased", + "activation": "gelu", + "architectures": [ + "DistilBertForSequenceClassification" + ], + "attention_dropout": 0.1, + "dim": 768, + "dropout": 0.1, + "hidden_dim": 3072, + "id2label": { + "0": "NO DISASTER", + "1": "DISASTER" + }, + "initializer_range": 0.02, + "max_position_embeddings": 512, + "model_type": "distilbert", + "n_heads": 12, + "n_layers": 6, + "output_past": true, + "pad_token_id": 0, + "problem_type": "single_label_classification", + "qa_dropout": 0.1, + "seq_classif_dropout": 0.2, + "sinusoidal_pos_embds": false, + "tie_weights_": true, + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "vocab_size": 28996 +} diff --git a/checkpoint-378/optimizer.pt b/checkpoint-378/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c46e9e6337f564f5947fc182eb34b6f4ed120247 --- /dev/null +++ b/checkpoint-378/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa074398f9daecc03959316b62ca7001add365c521952d028c7a2b287d5f22a5 +size 526325317 diff --git a/checkpoint-378/pytorch_model.bin b/checkpoint-378/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..71ba10a609aae9899f7320e9ba7dbd89017b9e2d --- /dev/null +++ b/checkpoint-378/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4189b8dfbd6d144d0ea9419afb0d3b4ae2d988dad19ef3e96026cfb2171324d6 +size 263167661 diff --git a/checkpoint-378/rng_state.pth b/checkpoint-378/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..018e160d6d82a04dc12de2deb86281a3984b9857 --- /dev/null +++ b/checkpoint-378/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c16d6621f3999fb48fd36cd1a8d4884af17553f086354d0efccba7386d64a5f +size 14575 diff --git a/checkpoint-378/scheduler.pt b/checkpoint-378/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..81859dd7da84fdf27e7befd0b235c915e179afe6 --- /dev/null +++ b/checkpoint-378/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a09a780b3fac62e4775ac02b2f7ef447c04f9695234110a0863ec14212f33e2 +size 627 diff --git a/checkpoint-378/trainer_state.json b/checkpoint-378/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a547ad0ed5f7f78258b7f9c941580498537369fb --- /dev/null +++ b/checkpoint-378/trainer_state.json @@ -0,0 +1,2293 @@ +{ + "best_metric": 0.40209120512008667, + "best_model_checkpoint": "./disaster-tweet-distilbert-classification/checkpoint-378", + "epoch": 1.0, + "global_step": 378, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.7503, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.7789, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.7344, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 0.7709, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 5e-06, + "loss": 0.6884, + "step": 5 + }, + { + "epoch": 0.02, + "learning_rate": 6e-06, + "loss": 0.7087, + "step": 6 + }, + { + "epoch": 0.02, + "learning_rate": 7.000000000000001e-06, + "loss": 0.6655, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6978, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 9e-06, + "loss": 0.7435, + "step": 9 + }, + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 0.719, + "step": 10 + }, + { + "epoch": 0.03, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.7129, + "step": 11 + }, + { + "epoch": 0.03, + "learning_rate": 1.2e-05, + "loss": 0.7249, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.7436, + "step": 13 + }, + { + "epoch": 0.04, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.6886, + "step": 14 + }, + { + "epoch": 0.04, + "learning_rate": 1.5e-05, + "loss": 0.702, + "step": 15 + }, + { + "epoch": 0.04, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7105, + "step": 16 + }, + { + "epoch": 0.04, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.6709, + "step": 17 + }, + { + "epoch": 0.05, + "learning_rate": 1.8e-05, + "loss": 0.6767, + "step": 18 + }, + { + "epoch": 0.05, + "learning_rate": 1.9e-05, + "loss": 0.6784, + "step": 19 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 0.7016, + "step": 20 + }, + { + "epoch": 0.06, + "learning_rate": 2.1e-05, + "loss": 0.6308, + "step": 21 + }, + { + "epoch": 0.06, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.645, + "step": 22 + }, + { + "epoch": 0.06, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.6845, + "step": 23 + }, + { + "epoch": 0.06, + "learning_rate": 2.4e-05, + "loss": 0.6891, + "step": 24 + }, + { + "epoch": 0.07, + "learning_rate": 2.5e-05, + "loss": 0.5914, + "step": 25 + }, + { + "epoch": 0.07, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.6916, + "step": 26 + }, + { + "epoch": 0.07, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.5884, + "step": 27 + }, + { + "epoch": 0.07, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6587, + "step": 28 + }, + { + "epoch": 0.08, + "learning_rate": 2.9e-05, + "loss": 0.6008, + "step": 29 + }, + { + "epoch": 0.08, + "learning_rate": 3e-05, + "loss": 0.6717, + "step": 30 + }, + { + "epoch": 0.08, + "learning_rate": 3.1e-05, + "loss": 0.5795, + "step": 31 + }, + { + "epoch": 0.08, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6358, + "step": 32 + }, + { + "epoch": 0.09, + "learning_rate": 3.3e-05, + "loss": 0.7508, + "step": 33 + }, + { + "epoch": 0.09, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.6601, + "step": 34 + }, + { + "epoch": 0.09, + "learning_rate": 3.5e-05, + "loss": 0.6573, + "step": 35 + }, + { + "epoch": 0.1, + "learning_rate": 3.6e-05, + "loss": 0.5695, + "step": 36 + }, + { + "epoch": 0.1, + "learning_rate": 3.7e-05, + "loss": 0.5535, + "step": 37 + }, + { + "epoch": 0.1, + "learning_rate": 3.8e-05, + "loss": 0.5813, + "step": 38 + }, + { + "epoch": 0.1, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.5224, + "step": 39 + }, + { + "epoch": 0.11, + "learning_rate": 4e-05, + "loss": 0.4757, + "step": 40 + }, + { + "epoch": 0.11, + "learning_rate": 4.1e-05, + "loss": 0.5529, + "step": 41 + }, + { + "epoch": 0.11, + "learning_rate": 4.2e-05, + "loss": 0.4964, + "step": 42 + }, + { + "epoch": 0.11, + "learning_rate": 4.3e-05, + "loss": 0.4565, + "step": 43 + }, + { + "epoch": 0.12, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.5848, + "step": 44 + }, + { + "epoch": 0.12, + "learning_rate": 4.5e-05, + "loss": 0.7333, + "step": 45 + }, + { + "epoch": 0.12, + "learning_rate": 4.600000000000001e-05, + "loss": 0.5224, + "step": 46 + }, + { + "epoch": 0.12, + "learning_rate": 4.7e-05, + "loss": 0.4826, + "step": 47 + }, + { + "epoch": 0.13, + "learning_rate": 4.8e-05, + "loss": 0.4328, + "step": 48 + }, + { + "epoch": 0.13, + "learning_rate": 4.9e-05, + "loss": 0.2546, + "step": 49 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.2636, + "step": 50 + }, + { + "epoch": 0.13, + "learning_rate": 4.998659517426274e-05, + "loss": 0.4116, + "step": 51 + }, + { + "epoch": 0.14, + "learning_rate": 4.997319034852547e-05, + "loss": 0.3191, + "step": 52 + }, + { + "epoch": 0.14, + "learning_rate": 4.995978552278821e-05, + "loss": 0.2822, + "step": 53 + }, + { + "epoch": 0.14, + "learning_rate": 4.994638069705094e-05, + "loss": 0.8821, + "step": 54 + }, + { + "epoch": 0.15, + "learning_rate": 4.993297587131368e-05, + "loss": 0.483, + "step": 55 + }, + { + "epoch": 0.15, + "learning_rate": 4.9919571045576406e-05, + "loss": 0.7164, + "step": 56 + }, + { + "epoch": 0.15, + "learning_rate": 4.990616621983915e-05, + "loss": 0.4161, + "step": 57 + }, + { + "epoch": 0.15, + "learning_rate": 4.989276139410188e-05, + "loss": 0.2668, + "step": 58 + }, + { + "epoch": 0.16, + "learning_rate": 4.987935656836462e-05, + "loss": 0.5255, + "step": 59 + }, + { + "epoch": 0.16, + "learning_rate": 4.986595174262735e-05, + "loss": 0.3784, + "step": 60 + }, + { + "epoch": 0.16, + "learning_rate": 4.985254691689008e-05, + "loss": 0.5065, + "step": 61 + }, + { + "epoch": 0.16, + "learning_rate": 4.983914209115282e-05, + "loss": 0.1988, + "step": 62 + }, + { + "epoch": 0.17, + "learning_rate": 4.982573726541555e-05, + "loss": 0.4362, + "step": 63 + }, + { + "epoch": 0.17, + "learning_rate": 4.981233243967829e-05, + "loss": 0.6619, + "step": 64 + }, + { + "epoch": 0.17, + "learning_rate": 4.979892761394102e-05, + "loss": 0.3217, + "step": 65 + }, + { + "epoch": 0.17, + "learning_rate": 4.978552278820375e-05, + "loss": 0.2967, + "step": 66 + }, + { + "epoch": 0.18, + "learning_rate": 4.977211796246649e-05, + "loss": 0.2429, + "step": 67 + }, + { + "epoch": 0.18, + "learning_rate": 4.975871313672922e-05, + "loss": 0.6642, + "step": 68 + }, + { + "epoch": 0.18, + "learning_rate": 4.974530831099196e-05, + "loss": 0.56, + "step": 69 + }, + { + "epoch": 0.19, + "learning_rate": 4.973190348525469e-05, + "loss": 1.2979, + "step": 70 + }, + { + "epoch": 0.19, + "learning_rate": 4.9718498659517427e-05, + "loss": 0.5287, + "step": 71 + }, + { + "epoch": 0.19, + "learning_rate": 4.970509383378016e-05, + "loss": 0.4684, + "step": 72 + }, + { + "epoch": 0.19, + "learning_rate": 4.96916890080429e-05, + "loss": 0.283, + "step": 73 + }, + { + "epoch": 0.2, + "learning_rate": 4.967828418230563e-05, + "loss": 0.6818, + "step": 74 + }, + { + "epoch": 0.2, + "learning_rate": 4.966487935656837e-05, + "loss": 0.6141, + "step": 75 + }, + { + "epoch": 0.2, + "learning_rate": 4.96514745308311e-05, + "loss": 0.5046, + "step": 76 + }, + { + "epoch": 0.2, + "learning_rate": 4.963806970509384e-05, + "loss": 0.5266, + "step": 77 + }, + { + "epoch": 0.21, + "learning_rate": 4.962466487935657e-05, + "loss": 0.5944, + "step": 78 + }, + { + "epoch": 0.21, + "learning_rate": 4.961126005361931e-05, + "loss": 0.5631, + "step": 79 + }, + { + "epoch": 0.21, + "learning_rate": 4.959785522788204e-05, + "loss": 0.4791, + "step": 80 + }, + { + "epoch": 0.21, + "learning_rate": 4.958445040214477e-05, + "loss": 0.5645, + "step": 81 + }, + { + "epoch": 0.22, + "learning_rate": 4.957104557640751e-05, + "loss": 0.4349, + "step": 82 + }, + { + "epoch": 0.22, + "learning_rate": 4.955764075067024e-05, + "loss": 0.3865, + "step": 83 + }, + { + "epoch": 0.22, + "learning_rate": 4.954423592493298e-05, + "loss": 0.486, + "step": 84 + }, + { + "epoch": 0.22, + "learning_rate": 4.953083109919571e-05, + "loss": 0.2179, + "step": 85 + }, + { + "epoch": 0.23, + "learning_rate": 4.951742627345845e-05, + "loss": 0.3896, + "step": 86 + }, + { + "epoch": 0.23, + "learning_rate": 4.950402144772118e-05, + "loss": 0.4247, + "step": 87 + }, + { + "epoch": 0.23, + "learning_rate": 4.949061662198392e-05, + "loss": 0.4906, + "step": 88 + }, + { + "epoch": 0.24, + "learning_rate": 4.947721179624665e-05, + "loss": 0.4483, + "step": 89 + }, + { + "epoch": 0.24, + "learning_rate": 4.946380697050939e-05, + "loss": 0.557, + "step": 90 + }, + { + "epoch": 0.24, + "learning_rate": 4.9450402144772116e-05, + "loss": 0.7521, + "step": 91 + }, + { + "epoch": 0.24, + "learning_rate": 4.943699731903486e-05, + "loss": 0.3103, + "step": 92 + }, + { + "epoch": 0.25, + "learning_rate": 4.9423592493297586e-05, + "loss": 0.757, + "step": 93 + }, + { + "epoch": 0.25, + "learning_rate": 4.941018766756033e-05, + "loss": 0.8248, + "step": 94 + }, + { + "epoch": 0.25, + "learning_rate": 4.9396782841823056e-05, + "loss": 0.4591, + "step": 95 + }, + { + "epoch": 0.25, + "learning_rate": 4.938337801608579e-05, + "loss": 0.3912, + "step": 96 + }, + { + "epoch": 0.26, + "learning_rate": 4.9369973190348526e-05, + "loss": 0.5289, + "step": 97 + }, + { + "epoch": 0.26, + "learning_rate": 4.935656836461126e-05, + "loss": 0.3264, + "step": 98 + }, + { + "epoch": 0.26, + "learning_rate": 4.9343163538874e-05, + "loss": 0.2947, + "step": 99 + }, + { + "epoch": 0.26, + "learning_rate": 4.932975871313673e-05, + "loss": 0.2647, + "step": 100 + }, + { + "epoch": 0.27, + "learning_rate": 4.931635388739946e-05, + "loss": 0.3691, + "step": 101 + }, + { + "epoch": 0.27, + "learning_rate": 4.93029490616622e-05, + "loss": 0.4796, + "step": 102 + }, + { + "epoch": 0.27, + "learning_rate": 4.928954423592493e-05, + "loss": 0.4827, + "step": 103 + }, + { + "epoch": 0.28, + "learning_rate": 4.927613941018767e-05, + "loss": 0.2672, + "step": 104 + }, + { + "epoch": 0.28, + "learning_rate": 4.92627345844504e-05, + "loss": 0.7456, + "step": 105 + }, + { + "epoch": 0.28, + "learning_rate": 4.9249329758713136e-05, + "loss": 0.5206, + "step": 106 + }, + { + "epoch": 0.28, + "learning_rate": 4.923592493297587e-05, + "loss": 0.3576, + "step": 107 + }, + { + "epoch": 0.29, + "learning_rate": 4.9222520107238606e-05, + "loss": 0.2596, + "step": 108 + }, + { + "epoch": 0.29, + "learning_rate": 4.920911528150134e-05, + "loss": 0.4115, + "step": 109 + }, + { + "epoch": 0.29, + "learning_rate": 4.9195710455764076e-05, + "loss": 0.3481, + "step": 110 + }, + { + "epoch": 0.29, + "learning_rate": 4.918230563002681e-05, + "loss": 0.4387, + "step": 111 + }, + { + "epoch": 0.3, + "learning_rate": 4.916890080428955e-05, + "loss": 0.5023, + "step": 112 + }, + { + "epoch": 0.3, + "learning_rate": 4.915549597855228e-05, + "loss": 0.5916, + "step": 113 + }, + { + "epoch": 0.3, + "learning_rate": 4.914209115281502e-05, + "loss": 0.5467, + "step": 114 + }, + { + "epoch": 0.3, + "learning_rate": 4.912868632707775e-05, + "loss": 0.5631, + "step": 115 + }, + { + "epoch": 0.31, + "learning_rate": 4.911528150134049e-05, + "loss": 0.5512, + "step": 116 + }, + { + "epoch": 0.31, + "learning_rate": 4.910187667560322e-05, + "loss": 0.5546, + "step": 117 + }, + { + "epoch": 0.31, + "learning_rate": 4.908847184986595e-05, + "loss": 0.4209, + "step": 118 + }, + { + "epoch": 0.31, + "learning_rate": 4.907506702412869e-05, + "loss": 0.6064, + "step": 119 + }, + { + "epoch": 0.32, + "learning_rate": 4.906166219839142e-05, + "loss": 0.5301, + "step": 120 + }, + { + "epoch": 0.32, + "learning_rate": 4.904825737265416e-05, + "loss": 0.436, + "step": 121 + }, + { + "epoch": 0.32, + "learning_rate": 4.903485254691689e-05, + "loss": 0.726, + "step": 122 + }, + { + "epoch": 0.33, + "learning_rate": 4.9021447721179626e-05, + "loss": 0.5288, + "step": 123 + }, + { + "epoch": 0.33, + "learning_rate": 4.900804289544236e-05, + "loss": 0.4887, + "step": 124 + }, + { + "epoch": 0.33, + "learning_rate": 4.8994638069705097e-05, + "loss": 0.3752, + "step": 125 + }, + { + "epoch": 0.33, + "learning_rate": 4.898123324396783e-05, + "loss": 0.4895, + "step": 126 + }, + { + "epoch": 0.34, + "learning_rate": 4.896782841823057e-05, + "loss": 0.5046, + "step": 127 + }, + { + "epoch": 0.34, + "learning_rate": 4.8954423592493295e-05, + "loss": 0.3953, + "step": 128 + }, + { + "epoch": 0.34, + "learning_rate": 4.894101876675604e-05, + "loss": 0.2015, + "step": 129 + }, + { + "epoch": 0.34, + "learning_rate": 4.8927613941018765e-05, + "loss": 0.5165, + "step": 130 + }, + { + "epoch": 0.35, + "learning_rate": 4.891420911528151e-05, + "loss": 0.4237, + "step": 131 + }, + { + "epoch": 0.35, + "learning_rate": 4.8900804289544236e-05, + "loss": 0.239, + "step": 132 + }, + { + "epoch": 0.35, + "learning_rate": 4.888739946380697e-05, + "loss": 0.5515, + "step": 133 + }, + { + "epoch": 0.35, + "learning_rate": 4.8873994638069706e-05, + "loss": 0.303, + "step": 134 + }, + { + "epoch": 0.36, + "learning_rate": 4.886058981233244e-05, + "loss": 0.2867, + "step": 135 + }, + { + "epoch": 0.36, + "learning_rate": 4.8847184986595176e-05, + "loss": 0.6756, + "step": 136 + }, + { + "epoch": 0.36, + "learning_rate": 4.883378016085791e-05, + "loss": 0.4996, + "step": 137 + }, + { + "epoch": 0.37, + "learning_rate": 4.8820375335120646e-05, + "loss": 0.2798, + "step": 138 + }, + { + "epoch": 0.37, + "learning_rate": 4.880697050938338e-05, + "loss": 0.8877, + "step": 139 + }, + { + "epoch": 0.37, + "learning_rate": 4.879356568364612e-05, + "loss": 0.5022, + "step": 140 + }, + { + "epoch": 0.37, + "learning_rate": 4.878016085790885e-05, + "loss": 0.2177, + "step": 141 + }, + { + "epoch": 0.38, + "learning_rate": 4.876675603217159e-05, + "loss": 0.8708, + "step": 142 + }, + { + "epoch": 0.38, + "learning_rate": 4.8753351206434315e-05, + "loss": 0.719, + "step": 143 + }, + { + "epoch": 0.38, + "learning_rate": 4.873994638069706e-05, + "loss": 0.562, + "step": 144 + }, + { + "epoch": 0.38, + "learning_rate": 4.8726541554959786e-05, + "loss": 0.492, + "step": 145 + }, + { + "epoch": 0.39, + "learning_rate": 4.871313672922253e-05, + "loss": 0.4637, + "step": 146 + }, + { + "epoch": 0.39, + "learning_rate": 4.8699731903485256e-05, + "loss": 0.4132, + "step": 147 + }, + { + "epoch": 0.39, + "learning_rate": 4.868632707774799e-05, + "loss": 0.2889, + "step": 148 + }, + { + "epoch": 0.39, + "learning_rate": 4.8672922252010726e-05, + "loss": 0.2213, + "step": 149 + }, + { + "epoch": 0.4, + "learning_rate": 4.865951742627346e-05, + "loss": 0.9268, + "step": 150 + }, + { + "epoch": 0.4, + "learning_rate": 4.8646112600536196e-05, + "loss": 0.2852, + "step": 151 + }, + { + "epoch": 0.4, + "learning_rate": 4.863270777479893e-05, + "loss": 0.4599, + "step": 152 + }, + { + "epoch": 0.4, + "learning_rate": 4.861930294906166e-05, + "loss": 0.1913, + "step": 153 + }, + { + "epoch": 0.41, + "learning_rate": 4.86058981233244e-05, + "loss": 0.4488, + "step": 154 + }, + { + "epoch": 0.41, + "learning_rate": 4.859249329758713e-05, + "loss": 0.9022, + "step": 155 + }, + { + "epoch": 0.41, + "learning_rate": 4.857908847184987e-05, + "loss": 0.5221, + "step": 156 + }, + { + "epoch": 0.42, + "learning_rate": 4.85656836461126e-05, + "loss": 0.2394, + "step": 157 + }, + { + "epoch": 0.42, + "learning_rate": 4.8552278820375336e-05, + "loss": 0.3332, + "step": 158 + }, + { + "epoch": 0.42, + "learning_rate": 4.853887399463807e-05, + "loss": 0.4015, + "step": 159 + }, + { + "epoch": 0.42, + "learning_rate": 4.8525469168900806e-05, + "loss": 0.4461, + "step": 160 + }, + { + "epoch": 0.43, + "learning_rate": 4.851206434316354e-05, + "loss": 0.337, + "step": 161 + }, + { + "epoch": 0.43, + "learning_rate": 4.8498659517426276e-05, + "loss": 0.4908, + "step": 162 + }, + { + "epoch": 0.43, + "learning_rate": 4.848525469168901e-05, + "loss": 0.526, + "step": 163 + }, + { + "epoch": 0.43, + "learning_rate": 4.8471849865951746e-05, + "loss": 0.5262, + "step": 164 + }, + { + "epoch": 0.44, + "learning_rate": 4.845844504021448e-05, + "loss": 0.6818, + "step": 165 + }, + { + "epoch": 0.44, + "learning_rate": 4.8445040214477217e-05, + "loss": 0.3154, + "step": 166 + }, + { + "epoch": 0.44, + "learning_rate": 4.843163538873995e-05, + "loss": 0.5963, + "step": 167 + }, + { + "epoch": 0.44, + "learning_rate": 4.841823056300268e-05, + "loss": 0.4451, + "step": 168 + }, + { + "epoch": 0.45, + "learning_rate": 4.840482573726542e-05, + "loss": 0.5969, + "step": 169 + }, + { + "epoch": 0.45, + "learning_rate": 4.839142091152815e-05, + "loss": 0.438, + "step": 170 + }, + { + "epoch": 0.45, + "learning_rate": 4.837801608579089e-05, + "loss": 0.4827, + "step": 171 + }, + { + "epoch": 0.46, + "learning_rate": 4.836461126005362e-05, + "loss": 0.2029, + "step": 172 + }, + { + "epoch": 0.46, + "learning_rate": 4.8351206434316356e-05, + "loss": 0.5195, + "step": 173 + }, + { + "epoch": 0.46, + "learning_rate": 4.833780160857909e-05, + "loss": 0.517, + "step": 174 + }, + { + "epoch": 0.46, + "learning_rate": 4.8324396782841826e-05, + "loss": 0.5532, + "step": 175 + }, + { + "epoch": 0.47, + "learning_rate": 4.831099195710456e-05, + "loss": 0.4198, + "step": 176 + }, + { + "epoch": 0.47, + "learning_rate": 4.8297587131367296e-05, + "loss": 0.8386, + "step": 177 + }, + { + "epoch": 0.47, + "learning_rate": 4.8284182305630025e-05, + "loss": 0.575, + "step": 178 + }, + { + "epoch": 0.47, + "learning_rate": 4.8270777479892766e-05, + "loss": 0.6156, + "step": 179 + }, + { + "epoch": 0.48, + "learning_rate": 4.8257372654155495e-05, + "loss": 0.7044, + "step": 180 + }, + { + "epoch": 0.48, + "learning_rate": 4.824396782841824e-05, + "loss": 0.5712, + "step": 181 + }, + { + "epoch": 0.48, + "learning_rate": 4.8230563002680965e-05, + "loss": 0.34, + "step": 182 + }, + { + "epoch": 0.48, + "learning_rate": 4.82171581769437e-05, + "loss": 0.5773, + "step": 183 + }, + { + "epoch": 0.49, + "learning_rate": 4.8203753351206435e-05, + "loss": 0.546, + "step": 184 + }, + { + "epoch": 0.49, + "learning_rate": 4.819034852546917e-05, + "loss": 0.3955, + "step": 185 + }, + { + "epoch": 0.49, + "learning_rate": 4.8176943699731906e-05, + "loss": 0.5921, + "step": 186 + }, + { + "epoch": 0.49, + "learning_rate": 4.816353887399464e-05, + "loss": 0.3108, + "step": 187 + }, + { + "epoch": 0.5, + "learning_rate": 4.8150134048257376e-05, + "loss": 0.5469, + "step": 188 + }, + { + "epoch": 0.5, + "learning_rate": 4.813672922252011e-05, + "loss": 0.64, + "step": 189 + }, + { + "epoch": 0.5, + "learning_rate": 4.8123324396782846e-05, + "loss": 0.5153, + "step": 190 + }, + { + "epoch": 0.51, + "learning_rate": 4.810991957104558e-05, + "loss": 0.4719, + "step": 191 + }, + { + "epoch": 0.51, + "learning_rate": 4.8096514745308316e-05, + "loss": 0.52, + "step": 192 + }, + { + "epoch": 0.51, + "learning_rate": 4.8083109919571045e-05, + "loss": 0.5114, + "step": 193 + }, + { + "epoch": 0.51, + "learning_rate": 4.806970509383379e-05, + "loss": 0.5469, + "step": 194 + }, + { + "epoch": 0.52, + "learning_rate": 4.8056300268096515e-05, + "loss": 0.3435, + "step": 195 + }, + { + "epoch": 0.52, + "learning_rate": 4.804289544235926e-05, + "loss": 0.6469, + "step": 196 + }, + { + "epoch": 0.52, + "learning_rate": 4.8029490616621985e-05, + "loss": 0.6595, + "step": 197 + }, + { + "epoch": 0.52, + "learning_rate": 4.801608579088472e-05, + "loss": 0.5503, + "step": 198 + }, + { + "epoch": 0.53, + "learning_rate": 4.8002680965147456e-05, + "loss": 0.3799, + "step": 199 + }, + { + "epoch": 0.53, + "learning_rate": 4.798927613941019e-05, + "loss": 0.417, + "step": 200 + }, + { + "epoch": 0.53, + "learning_rate": 4.7975871313672926e-05, + "loss": 0.5281, + "step": 201 + }, + { + "epoch": 0.53, + "learning_rate": 4.796246648793566e-05, + "loss": 0.3439, + "step": 202 + }, + { + "epoch": 0.54, + "learning_rate": 4.794906166219839e-05, + "loss": 0.5777, + "step": 203 + }, + { + "epoch": 0.54, + "learning_rate": 4.793565683646113e-05, + "loss": 0.5286, + "step": 204 + }, + { + "epoch": 0.54, + "learning_rate": 4.792225201072386e-05, + "loss": 0.4302, + "step": 205 + }, + { + "epoch": 0.54, + "learning_rate": 4.79088471849866e-05, + "loss": 0.5413, + "step": 206 + }, + { + "epoch": 0.55, + "learning_rate": 4.789544235924933e-05, + "loss": 0.3087, + "step": 207 + }, + { + "epoch": 0.55, + "learning_rate": 4.7882037533512065e-05, + "loss": 0.4385, + "step": 208 + }, + { + "epoch": 0.55, + "learning_rate": 4.78686327077748e-05, + "loss": 0.7137, + "step": 209 + }, + { + "epoch": 0.56, + "learning_rate": 4.7855227882037535e-05, + "loss": 0.6278, + "step": 210 + }, + { + "epoch": 0.56, + "learning_rate": 4.784182305630027e-05, + "loss": 0.2832, + "step": 211 + }, + { + "epoch": 0.56, + "learning_rate": 4.7828418230563005e-05, + "loss": 0.5899, + "step": 212 + }, + { + "epoch": 0.56, + "learning_rate": 4.7815013404825734e-05, + "loss": 0.4421, + "step": 213 + }, + { + "epoch": 0.57, + "learning_rate": 4.7801608579088476e-05, + "loss": 0.3411, + "step": 214 + }, + { + "epoch": 0.57, + "learning_rate": 4.7788203753351204e-05, + "loss": 0.4236, + "step": 215 + }, + { + "epoch": 0.57, + "learning_rate": 4.7774798927613946e-05, + "loss": 0.4444, + "step": 216 + }, + { + "epoch": 0.57, + "learning_rate": 4.7761394101876674e-05, + "loss": 0.4765, + "step": 217 + }, + { + "epoch": 0.58, + "learning_rate": 4.774798927613941e-05, + "loss": 0.2453, + "step": 218 + }, + { + "epoch": 0.58, + "learning_rate": 4.7734584450402145e-05, + "loss": 0.2765, + "step": 219 + }, + { + "epoch": 0.58, + "learning_rate": 4.772117962466488e-05, + "loss": 0.2075, + "step": 220 + }, + { + "epoch": 0.58, + "learning_rate": 4.7707774798927615e-05, + "loss": 0.5905, + "step": 221 + }, + { + "epoch": 0.59, + "learning_rate": 4.769436997319035e-05, + "loss": 0.3457, + "step": 222 + }, + { + "epoch": 0.59, + "learning_rate": 4.7680965147453085e-05, + "loss": 0.5986, + "step": 223 + }, + { + "epoch": 0.59, + "learning_rate": 4.766756032171582e-05, + "loss": 0.3881, + "step": 224 + }, + { + "epoch": 0.6, + "learning_rate": 4.7654155495978555e-05, + "loss": 0.5655, + "step": 225 + }, + { + "epoch": 0.6, + "learning_rate": 4.764075067024129e-05, + "loss": 0.6156, + "step": 226 + }, + { + "epoch": 0.6, + "learning_rate": 4.7627345844504026e-05, + "loss": 0.4784, + "step": 227 + }, + { + "epoch": 0.6, + "learning_rate": 4.7613941018766754e-05, + "loss": 0.5195, + "step": 228 + }, + { + "epoch": 0.61, + "learning_rate": 4.7600536193029496e-05, + "loss": 0.66, + "step": 229 + }, + { + "epoch": 0.61, + "learning_rate": 4.7587131367292224e-05, + "loss": 0.4104, + "step": 230 + }, + { + "epoch": 0.61, + "learning_rate": 4.7573726541554966e-05, + "loss": 0.3478, + "step": 231 + }, + { + "epoch": 0.61, + "learning_rate": 4.7560321715817695e-05, + "loss": 0.2037, + "step": 232 + }, + { + "epoch": 0.62, + "learning_rate": 4.754691689008043e-05, + "loss": 0.75, + "step": 233 + }, + { + "epoch": 0.62, + "learning_rate": 4.7533512064343165e-05, + "loss": 0.4237, + "step": 234 + }, + { + "epoch": 0.62, + "learning_rate": 4.75201072386059e-05, + "loss": 0.2372, + "step": 235 + }, + { + "epoch": 0.62, + "learning_rate": 4.7506702412868635e-05, + "loss": 0.7874, + "step": 236 + }, + { + "epoch": 0.63, + "learning_rate": 4.749329758713137e-05, + "loss": 0.5751, + "step": 237 + }, + { + "epoch": 0.63, + "learning_rate": 4.7479892761394105e-05, + "loss": 0.5801, + "step": 238 + }, + { + "epoch": 0.63, + "learning_rate": 4.746648793565684e-05, + "loss": 0.4983, + "step": 239 + }, + { + "epoch": 0.63, + "learning_rate": 4.745308310991957e-05, + "loss": 0.4215, + "step": 240 + }, + { + "epoch": 0.64, + "learning_rate": 4.743967828418231e-05, + "loss": 0.3655, + "step": 241 + }, + { + "epoch": 0.64, + "learning_rate": 4.742627345844504e-05, + "loss": 0.523, + "step": 242 + }, + { + "epoch": 0.64, + "learning_rate": 4.741286863270778e-05, + "loss": 0.5952, + "step": 243 + }, + { + "epoch": 0.65, + "learning_rate": 4.739946380697051e-05, + "loss": 0.4226, + "step": 244 + }, + { + "epoch": 0.65, + "learning_rate": 4.7386058981233244e-05, + "loss": 0.316, + "step": 245 + }, + { + "epoch": 0.65, + "learning_rate": 4.737265415549598e-05, + "loss": 0.5096, + "step": 246 + }, + { + "epoch": 0.65, + "learning_rate": 4.7359249329758715e-05, + "loss": 0.435, + "step": 247 + }, + { + "epoch": 0.66, + "learning_rate": 4.734584450402145e-05, + "loss": 0.7516, + "step": 248 + }, + { + "epoch": 0.66, + "learning_rate": 4.7332439678284185e-05, + "loss": 0.531, + "step": 249 + }, + { + "epoch": 0.66, + "learning_rate": 4.731903485254692e-05, + "loss": 0.4096, + "step": 250 + }, + { + "epoch": 0.66, + "learning_rate": 4.7305630026809655e-05, + "loss": 0.387, + "step": 251 + }, + { + "epoch": 0.67, + "learning_rate": 4.729222520107239e-05, + "loss": 0.5468, + "step": 252 + }, + { + "epoch": 0.67, + "learning_rate": 4.7278820375335125e-05, + "loss": 0.4613, + "step": 253 + }, + { + "epoch": 0.67, + "learning_rate": 4.726541554959786e-05, + "loss": 0.8437, + "step": 254 + }, + { + "epoch": 0.67, + "learning_rate": 4.725201072386059e-05, + "loss": 0.522, + "step": 255 + }, + { + "epoch": 0.68, + "learning_rate": 4.723860589812333e-05, + "loss": 0.3922, + "step": 256 + }, + { + "epoch": 0.68, + "learning_rate": 4.722520107238606e-05, + "loss": 0.5114, + "step": 257 + }, + { + "epoch": 0.68, + "learning_rate": 4.72117962466488e-05, + "loss": 0.6148, + "step": 258 + }, + { + "epoch": 0.69, + "learning_rate": 4.719839142091153e-05, + "loss": 0.4578, + "step": 259 + }, + { + "epoch": 0.69, + "learning_rate": 4.7184986595174265e-05, + "loss": 0.6286, + "step": 260 + }, + { + "epoch": 0.69, + "learning_rate": 4.7171581769437e-05, + "loss": 0.5883, + "step": 261 + }, + { + "epoch": 0.69, + "learning_rate": 4.7158176943699735e-05, + "loss": 0.5634, + "step": 262 + }, + { + "epoch": 0.7, + "learning_rate": 4.714477211796247e-05, + "loss": 0.4085, + "step": 263 + }, + { + "epoch": 0.7, + "learning_rate": 4.7131367292225205e-05, + "loss": 0.2988, + "step": 264 + }, + { + "epoch": 0.7, + "learning_rate": 4.7117962466487934e-05, + "loss": 0.6353, + "step": 265 + }, + { + "epoch": 0.7, + "learning_rate": 4.7104557640750675e-05, + "loss": 0.4598, + "step": 266 + }, + { + "epoch": 0.71, + "learning_rate": 4.7091152815013404e-05, + "loss": 0.5072, + "step": 267 + }, + { + "epoch": 0.71, + "learning_rate": 4.7077747989276146e-05, + "loss": 0.49, + "step": 268 + }, + { + "epoch": 0.71, + "learning_rate": 4.7064343163538874e-05, + "loss": 0.7225, + "step": 269 + }, + { + "epoch": 0.71, + "learning_rate": 4.705093833780161e-05, + "loss": 0.5332, + "step": 270 + }, + { + "epoch": 0.72, + "learning_rate": 4.7037533512064344e-05, + "loss": 0.6064, + "step": 271 + }, + { + "epoch": 0.72, + "learning_rate": 4.702412868632708e-05, + "loss": 0.3518, + "step": 272 + }, + { + "epoch": 0.72, + "learning_rate": 4.7010723860589815e-05, + "loss": 0.3673, + "step": 273 + }, + { + "epoch": 0.72, + "learning_rate": 4.699731903485255e-05, + "loss": 0.4688, + "step": 274 + }, + { + "epoch": 0.73, + "learning_rate": 4.6983914209115285e-05, + "loss": 0.5389, + "step": 275 + }, + { + "epoch": 0.73, + "learning_rate": 4.697050938337802e-05, + "loss": 0.374, + "step": 276 + }, + { + "epoch": 0.73, + "learning_rate": 4.6957104557640755e-05, + "loss": 0.475, + "step": 277 + }, + { + "epoch": 0.74, + "learning_rate": 4.694369973190349e-05, + "loss": 0.5397, + "step": 278 + }, + { + "epoch": 0.74, + "learning_rate": 4.6930294906166225e-05, + "loss": 0.3821, + "step": 279 + }, + { + "epoch": 0.74, + "learning_rate": 4.6916890080428954e-05, + "loss": 0.3372, + "step": 280 + }, + { + "epoch": 0.74, + "learning_rate": 4.6903485254691696e-05, + "loss": 0.6652, + "step": 281 + }, + { + "epoch": 0.75, + "learning_rate": 4.6890080428954424e-05, + "loss": 0.2894, + "step": 282 + }, + { + "epoch": 0.75, + "learning_rate": 4.6876675603217166e-05, + "loss": 0.5639, + "step": 283 + }, + { + "epoch": 0.75, + "learning_rate": 4.6863270777479894e-05, + "loss": 0.353, + "step": 284 + }, + { + "epoch": 0.75, + "learning_rate": 4.684986595174263e-05, + "loss": 0.2932, + "step": 285 + }, + { + "epoch": 0.76, + "learning_rate": 4.6836461126005364e-05, + "loss": 0.467, + "step": 286 + }, + { + "epoch": 0.76, + "learning_rate": 4.68230563002681e-05, + "loss": 0.4732, + "step": 287 + }, + { + "epoch": 0.76, + "learning_rate": 4.6809651474530835e-05, + "loss": 0.1808, + "step": 288 + }, + { + "epoch": 0.76, + "learning_rate": 4.679624664879357e-05, + "loss": 0.6031, + "step": 289 + }, + { + "epoch": 0.77, + "learning_rate": 4.67828418230563e-05, + "loss": 0.2555, + "step": 290 + }, + { + "epoch": 0.77, + "learning_rate": 4.676943699731904e-05, + "loss": 0.4041, + "step": 291 + }, + { + "epoch": 0.77, + "learning_rate": 4.675603217158177e-05, + "loss": 0.7822, + "step": 292 + }, + { + "epoch": 0.78, + "learning_rate": 4.674262734584451e-05, + "loss": 0.138, + "step": 293 + }, + { + "epoch": 0.78, + "learning_rate": 4.672922252010724e-05, + "loss": 0.2746, + "step": 294 + }, + { + "epoch": 0.78, + "learning_rate": 4.6715817694369974e-05, + "loss": 0.5835, + "step": 295 + }, + { + "epoch": 0.78, + "learning_rate": 4.670241286863271e-05, + "loss": 0.2367, + "step": 296 + }, + { + "epoch": 0.79, + "learning_rate": 4.6689008042895444e-05, + "loss": 0.3247, + "step": 297 + }, + { + "epoch": 0.79, + "learning_rate": 4.667560321715818e-05, + "loss": 0.306, + "step": 298 + }, + { + "epoch": 0.79, + "learning_rate": 4.6662198391420914e-05, + "loss": 0.2825, + "step": 299 + }, + { + "epoch": 0.79, + "learning_rate": 4.664879356568364e-05, + "loss": 0.6102, + "step": 300 + }, + { + "epoch": 0.8, + "learning_rate": 4.6635388739946385e-05, + "loss": 0.3613, + "step": 301 + }, + { + "epoch": 0.8, + "learning_rate": 4.662198391420911e-05, + "loss": 0.6327, + "step": 302 + }, + { + "epoch": 0.8, + "learning_rate": 4.6608579088471855e-05, + "loss": 0.4059, + "step": 303 + }, + { + "epoch": 0.8, + "learning_rate": 4.659517426273458e-05, + "loss": 0.4027, + "step": 304 + }, + { + "epoch": 0.81, + "learning_rate": 4.658176943699732e-05, + "loss": 0.9133, + "step": 305 + }, + { + "epoch": 0.81, + "learning_rate": 4.6568364611260054e-05, + "loss": 0.1869, + "step": 306 + }, + { + "epoch": 0.81, + "learning_rate": 4.655495978552279e-05, + "loss": 0.3987, + "step": 307 + }, + { + "epoch": 0.81, + "learning_rate": 4.6541554959785524e-05, + "loss": 0.6114, + "step": 308 + }, + { + "epoch": 0.82, + "learning_rate": 4.652815013404826e-05, + "loss": 0.4406, + "step": 309 + }, + { + "epoch": 0.82, + "learning_rate": 4.6514745308310994e-05, + "loss": 0.2954, + "step": 310 + }, + { + "epoch": 0.82, + "learning_rate": 4.650134048257373e-05, + "loss": 0.5067, + "step": 311 + }, + { + "epoch": 0.83, + "learning_rate": 4.6487935656836464e-05, + "loss": 0.3985, + "step": 312 + }, + { + "epoch": 0.83, + "learning_rate": 4.64745308310992e-05, + "loss": 0.3756, + "step": 313 + }, + { + "epoch": 0.83, + "learning_rate": 4.6461126005361935e-05, + "loss": 0.2618, + "step": 314 + }, + { + "epoch": 0.83, + "learning_rate": 4.644772117962466e-05, + "loss": 0.4992, + "step": 315 + }, + { + "epoch": 0.84, + "learning_rate": 4.6434316353887405e-05, + "loss": 0.8224, + "step": 316 + }, + { + "epoch": 0.84, + "learning_rate": 4.642091152815013e-05, + "loss": 0.3425, + "step": 317 + }, + { + "epoch": 0.84, + "learning_rate": 4.6407506702412875e-05, + "loss": 0.4062, + "step": 318 + }, + { + "epoch": 0.84, + "learning_rate": 4.6394101876675603e-05, + "loss": 0.4748, + "step": 319 + }, + { + "epoch": 0.85, + "learning_rate": 4.638069705093834e-05, + "loss": 0.6857, + "step": 320 + }, + { + "epoch": 0.85, + "learning_rate": 4.6367292225201074e-05, + "loss": 0.5368, + "step": 321 + }, + { + "epoch": 0.85, + "learning_rate": 4.635388739946381e-05, + "loss": 0.5571, + "step": 322 + }, + { + "epoch": 0.85, + "learning_rate": 4.6340482573726544e-05, + "loss": 0.3045, + "step": 323 + }, + { + "epoch": 0.86, + "learning_rate": 4.632707774798928e-05, + "loss": 0.3189, + "step": 324 + }, + { + "epoch": 0.86, + "learning_rate": 4.631367292225201e-05, + "loss": 0.3033, + "step": 325 + }, + { + "epoch": 0.86, + "learning_rate": 4.630026809651475e-05, + "loss": 0.4659, + "step": 326 + }, + { + "epoch": 0.87, + "learning_rate": 4.628686327077748e-05, + "loss": 0.4058, + "step": 327 + }, + { + "epoch": 0.87, + "learning_rate": 4.627345844504022e-05, + "loss": 0.6745, + "step": 328 + }, + { + "epoch": 0.87, + "learning_rate": 4.626005361930295e-05, + "loss": 0.3259, + "step": 329 + }, + { + "epoch": 0.87, + "learning_rate": 4.624664879356568e-05, + "loss": 0.5126, + "step": 330 + }, + { + "epoch": 0.88, + "learning_rate": 4.623324396782842e-05, + "loss": 0.2759, + "step": 331 + }, + { + "epoch": 0.88, + "learning_rate": 4.621983914209115e-05, + "loss": 0.2512, + "step": 332 + }, + { + "epoch": 0.88, + "learning_rate": 4.620643431635389e-05, + "loss": 0.3046, + "step": 333 + }, + { + "epoch": 0.88, + "learning_rate": 4.6193029490616624e-05, + "loss": 0.3931, + "step": 334 + }, + { + "epoch": 0.89, + "learning_rate": 4.617962466487936e-05, + "loss": 0.4838, + "step": 335 + }, + { + "epoch": 0.89, + "learning_rate": 4.6166219839142094e-05, + "loss": 0.2925, + "step": 336 + }, + { + "epoch": 0.89, + "learning_rate": 4.615281501340483e-05, + "loss": 0.4481, + "step": 337 + }, + { + "epoch": 0.89, + "learning_rate": 4.6139410187667564e-05, + "loss": 0.4528, + "step": 338 + }, + { + "epoch": 0.9, + "learning_rate": 4.61260053619303e-05, + "loss": 0.2934, + "step": 339 + }, + { + "epoch": 0.9, + "learning_rate": 4.611260053619303e-05, + "loss": 0.609, + "step": 340 + }, + { + "epoch": 0.9, + "learning_rate": 4.609919571045577e-05, + "loss": 0.8988, + "step": 341 + }, + { + "epoch": 0.9, + "learning_rate": 4.60857908847185e-05, + "loss": 1.1222, + "step": 342 + }, + { + "epoch": 0.91, + "learning_rate": 4.607238605898124e-05, + "loss": 0.3265, + "step": 343 + }, + { + "epoch": 0.91, + "learning_rate": 4.605898123324397e-05, + "loss": 0.4722, + "step": 344 + }, + { + "epoch": 0.91, + "learning_rate": 4.60455764075067e-05, + "loss": 0.2791, + "step": 345 + }, + { + "epoch": 0.92, + "learning_rate": 4.603217158176944e-05, + "loss": 0.4183, + "step": 346 + }, + { + "epoch": 0.92, + "learning_rate": 4.6018766756032174e-05, + "loss": 0.5323, + "step": 347 + }, + { + "epoch": 0.92, + "learning_rate": 4.600536193029491e-05, + "loss": 0.6108, + "step": 348 + }, + { + "epoch": 0.92, + "learning_rate": 4.5991957104557644e-05, + "loss": 0.2875, + "step": 349 + }, + { + "epoch": 0.93, + "learning_rate": 4.597855227882037e-05, + "loss": 0.6642, + "step": 350 + }, + { + "epoch": 0.93, + "learning_rate": 4.5965147453083114e-05, + "loss": 0.5244, + "step": 351 + }, + { + "epoch": 0.93, + "learning_rate": 4.595174262734584e-05, + "loss": 0.5562, + "step": 352 + }, + { + "epoch": 0.93, + "learning_rate": 4.5938337801608584e-05, + "loss": 0.3972, + "step": 353 + }, + { + "epoch": 0.94, + "learning_rate": 4.592493297587131e-05, + "loss": 0.4532, + "step": 354 + }, + { + "epoch": 0.94, + "learning_rate": 4.591152815013405e-05, + "loss": 0.3368, + "step": 355 + }, + { + "epoch": 0.94, + "learning_rate": 4.589812332439678e-05, + "loss": 0.171, + "step": 356 + }, + { + "epoch": 0.94, + "learning_rate": 4.588471849865952e-05, + "loss": 0.4036, + "step": 357 + }, + { + "epoch": 0.95, + "learning_rate": 4.587131367292225e-05, + "loss": 0.4305, + "step": 358 + }, + { + "epoch": 0.95, + "learning_rate": 4.585790884718499e-05, + "loss": 0.2643, + "step": 359 + }, + { + "epoch": 0.95, + "learning_rate": 4.5844504021447723e-05, + "loss": 0.3782, + "step": 360 + }, + { + "epoch": 0.96, + "learning_rate": 4.583109919571046e-05, + "loss": 0.1673, + "step": 361 + }, + { + "epoch": 0.96, + "learning_rate": 4.5817694369973194e-05, + "loss": 0.3969, + "step": 362 + }, + { + "epoch": 0.96, + "learning_rate": 4.580428954423593e-05, + "loss": 0.3249, + "step": 363 + }, + { + "epoch": 0.96, + "learning_rate": 4.5790884718498664e-05, + "loss": 0.1656, + "step": 364 + }, + { + "epoch": 0.97, + "learning_rate": 4.57774798927614e-05, + "loss": 0.4551, + "step": 365 + }, + { + "epoch": 0.97, + "learning_rate": 4.5764075067024134e-05, + "loss": 0.6075, + "step": 366 + }, + { + "epoch": 0.97, + "learning_rate": 4.575067024128686e-05, + "loss": 0.4699, + "step": 367 + }, + { + "epoch": 0.97, + "learning_rate": 4.5737265415549605e-05, + "loss": 0.5752, + "step": 368 + }, + { + "epoch": 0.98, + "learning_rate": 4.572386058981233e-05, + "loss": 0.3114, + "step": 369 + }, + { + "epoch": 0.98, + "learning_rate": 4.5710455764075075e-05, + "loss": 0.7407, + "step": 370 + }, + { + "epoch": 0.98, + "learning_rate": 4.56970509383378e-05, + "loss": 0.6427, + "step": 371 + }, + { + "epoch": 0.98, + "learning_rate": 4.568364611260054e-05, + "loss": 0.5021, + "step": 372 + }, + { + "epoch": 0.99, + "learning_rate": 4.5670241286863273e-05, + "loss": 0.4209, + "step": 373 + }, + { + "epoch": 0.99, + "learning_rate": 4.565683646112601e-05, + "loss": 0.5957, + "step": 374 + }, + { + "epoch": 0.99, + "learning_rate": 4.5643431635388744e-05, + "loss": 0.495, + "step": 375 + }, + { + "epoch": 0.99, + "learning_rate": 4.563002680965148e-05, + "loss": 0.3101, + "step": 376 + }, + { + "epoch": 1.0, + "learning_rate": 4.561662198391421e-05, + "loss": 0.472, + "step": 377 + }, + { + "epoch": 1.0, + "learning_rate": 4.560321715817695e-05, + "loss": 0.4607, + "step": 378 + }, + { + "epoch": 1.0, + "eval_f1": 0.7914963205233032, + "eval_loss": 0.40209120512008667, + "eval_runtime": 2.1585, + "eval_samples_per_second": 700.949, + "eval_steps_per_second": 44.012, + "step": 378 + } + ], + "max_steps": 3780, + "num_train_epochs": 10, + "total_flos": 96750876293376.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-378/training_args.bin b/checkpoint-378/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e04ed002938f760694506615e2c2b7be439a9c1 --- /dev/null +++ b/checkpoint-378/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c01d8e107b4a20c0ba0f3692dae4e25d8f1dffe1d23d6e4f4bdf92b87ab5ea +size 3899 diff --git a/checkpoint-3780/config.json b/checkpoint-3780/config.json new file mode 100644 index 0000000000000000000000000000000000000000..364156e83c34ba8c6fcc66e875a05b1d1a9b4821 --- /dev/null +++ b/checkpoint-3780/config.json @@ -0,0 +1,30 @@ +{ + "_name_or_path": "distilbert-base-cased", + "activation": "gelu", + "architectures": [ + "DistilBertForSequenceClassification" + ], + "attention_dropout": 0.1, + "dim": 768, + "dropout": 0.1, + "hidden_dim": 3072, + "id2label": { + "0": "NO DISASTER", + "1": "DISASTER" + }, + "initializer_range": 0.02, + "max_position_embeddings": 512, + "model_type": "distilbert", + "n_heads": 12, + "n_layers": 6, + "output_past": true, + "pad_token_id": 0, + "problem_type": "single_label_classification", + "qa_dropout": 0.1, + "seq_classif_dropout": 0.2, + "sinusoidal_pos_embds": false, + "tie_weights_": true, + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "vocab_size": 28996 +} diff --git a/checkpoint-3780/optimizer.pt b/checkpoint-3780/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..086b278bf406ed588ebd49802ebcfabfd7fea164 --- /dev/null +++ b/checkpoint-3780/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a6e7bbbe55f160c474696a1d2c841c8a5ab411c4db0399225cf82dc703fcf25 +size 526325317 diff --git a/checkpoint-3780/pytorch_model.bin b/checkpoint-3780/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..cd4f98ead533dd90c886acbda01770268e93d5d4 --- /dev/null +++ b/checkpoint-3780/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8621c9931b56d85fe08ce64972eb06377320d7a3871419ed292cbb7f27f4fa72 +size 263167661 diff --git a/checkpoint-3780/rng_state.pth b/checkpoint-3780/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e7cf11a934397175a2b15072224d12772d861159 --- /dev/null +++ b/checkpoint-3780/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f51f6ec7a6ce3f24ff8cb090a9410b5f04d88b3f636b31b44821ba1aad19f189 +size 14575 diff --git a/checkpoint-3780/scheduler.pt b/checkpoint-3780/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..22901d529a7b8a357f3af2c3655f1b3a5199d9b7 --- /dev/null +++ b/checkpoint-3780/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:812b4eaee7b1497d09f4835395986d8e630597bdec80f756d6e4952a6cd2fed6 +size 627 diff --git a/checkpoint-3780/trainer_state.json b/checkpoint-3780/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..99f3cd3cd495779218f2ffb8ca1a23d8b2179b52 --- /dev/null +++ b/checkpoint-3780/trainer_state.json @@ -0,0 +1,22786 @@ +{ + "best_metric": 0.40209120512008667, + "best_model_checkpoint": "./disaster-tweet-distilbert-classification/checkpoint-378", + "epoch": 10.0, + "global_step": 3780, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.7503, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.7789, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.7344, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 0.7709, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 5e-06, + "loss": 0.6884, + "step": 5 + }, + { + "epoch": 0.02, + "learning_rate": 6e-06, + "loss": 0.7087, + "step": 6 + }, + { + "epoch": 0.02, + "learning_rate": 7.000000000000001e-06, + "loss": 0.6655, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6978, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 9e-06, + "loss": 0.7435, + "step": 9 + }, + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 0.719, + "step": 10 + }, + { + "epoch": 0.03, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.7129, + "step": 11 + }, + { + "epoch": 0.03, + "learning_rate": 1.2e-05, + "loss": 0.7249, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.7436, + "step": 13 + }, + { + "epoch": 0.04, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.6886, + "step": 14 + }, + { + "epoch": 0.04, + "learning_rate": 1.5e-05, + "loss": 0.702, + "step": 15 + }, + { + "epoch": 0.04, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7105, + "step": 16 + }, + { + "epoch": 0.04, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.6709, + "step": 17 + }, + { + "epoch": 0.05, + "learning_rate": 1.8e-05, + "loss": 0.6767, + "step": 18 + }, + { + "epoch": 0.05, + "learning_rate": 1.9e-05, + "loss": 0.6784, + "step": 19 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 0.7016, + "step": 20 + }, + { + "epoch": 0.06, + "learning_rate": 2.1e-05, + "loss": 0.6308, + "step": 21 + }, + { + "epoch": 0.06, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.645, + "step": 22 + }, + { + "epoch": 0.06, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.6845, + "step": 23 + }, + { + "epoch": 0.06, + "learning_rate": 2.4e-05, + "loss": 0.6891, + "step": 24 + }, + { + "epoch": 0.07, + "learning_rate": 2.5e-05, + "loss": 0.5914, + "step": 25 + }, + { + "epoch": 0.07, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.6916, + "step": 26 + }, + { + "epoch": 0.07, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.5884, + "step": 27 + }, + { + "epoch": 0.07, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6587, + "step": 28 + }, + { + "epoch": 0.08, + "learning_rate": 2.9e-05, + "loss": 0.6008, + "step": 29 + }, + { + "epoch": 0.08, + "learning_rate": 3e-05, + "loss": 0.6717, + "step": 30 + }, + { + "epoch": 0.08, + "learning_rate": 3.1e-05, + "loss": 0.5795, + "step": 31 + }, + { + "epoch": 0.08, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6358, + "step": 32 + }, + { + "epoch": 0.09, + "learning_rate": 3.3e-05, + "loss": 0.7508, + "step": 33 + }, + { + "epoch": 0.09, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.6601, + "step": 34 + }, + { + "epoch": 0.09, + "learning_rate": 3.5e-05, + "loss": 0.6573, + "step": 35 + }, + { + "epoch": 0.1, + "learning_rate": 3.6e-05, + "loss": 0.5695, + "step": 36 + }, + { + "epoch": 0.1, + "learning_rate": 3.7e-05, + "loss": 0.5535, + "step": 37 + }, + { + "epoch": 0.1, + "learning_rate": 3.8e-05, + "loss": 0.5813, + "step": 38 + }, + { + "epoch": 0.1, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.5224, + "step": 39 + }, + { + "epoch": 0.11, + "learning_rate": 4e-05, + "loss": 0.4757, + "step": 40 + }, + { + "epoch": 0.11, + "learning_rate": 4.1e-05, + "loss": 0.5529, + "step": 41 + }, + { + "epoch": 0.11, + "learning_rate": 4.2e-05, + "loss": 0.4964, + "step": 42 + }, + { + "epoch": 0.11, + "learning_rate": 4.3e-05, + "loss": 0.4565, + "step": 43 + }, + { + "epoch": 0.12, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.5848, + "step": 44 + }, + { + "epoch": 0.12, + "learning_rate": 4.5e-05, + "loss": 0.7333, + "step": 45 + }, + { + "epoch": 0.12, + "learning_rate": 4.600000000000001e-05, + "loss": 0.5224, + "step": 46 + }, + { + "epoch": 0.12, + "learning_rate": 4.7e-05, + "loss": 0.4826, + "step": 47 + }, + { + "epoch": 0.13, + "learning_rate": 4.8e-05, + "loss": 0.4328, + "step": 48 + }, + { + "epoch": 0.13, + "learning_rate": 4.9e-05, + "loss": 0.2546, + "step": 49 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.2636, + "step": 50 + }, + { + "epoch": 0.13, + "learning_rate": 4.998659517426274e-05, + "loss": 0.4116, + "step": 51 + }, + { + "epoch": 0.14, + "learning_rate": 4.997319034852547e-05, + "loss": 0.3191, + "step": 52 + }, + { + "epoch": 0.14, + "learning_rate": 4.995978552278821e-05, + "loss": 0.2822, + "step": 53 + }, + { + "epoch": 0.14, + "learning_rate": 4.994638069705094e-05, + "loss": 0.8821, + "step": 54 + }, + { + "epoch": 0.15, + "learning_rate": 4.993297587131368e-05, + "loss": 0.483, + "step": 55 + }, + { + "epoch": 0.15, + "learning_rate": 4.9919571045576406e-05, + "loss": 0.7164, + "step": 56 + }, + { + "epoch": 0.15, + "learning_rate": 4.990616621983915e-05, + "loss": 0.4161, + "step": 57 + }, + { + "epoch": 0.15, + "learning_rate": 4.989276139410188e-05, + "loss": 0.2668, + "step": 58 + }, + { + "epoch": 0.16, + "learning_rate": 4.987935656836462e-05, + "loss": 0.5255, + "step": 59 + }, + { + "epoch": 0.16, + "learning_rate": 4.986595174262735e-05, + "loss": 0.3784, + "step": 60 + }, + { + "epoch": 0.16, + "learning_rate": 4.985254691689008e-05, + "loss": 0.5065, + "step": 61 + }, + { + "epoch": 0.16, + "learning_rate": 4.983914209115282e-05, + "loss": 0.1988, + "step": 62 + }, + { + "epoch": 0.17, + "learning_rate": 4.982573726541555e-05, + "loss": 0.4362, + "step": 63 + }, + { + "epoch": 0.17, + "learning_rate": 4.981233243967829e-05, + "loss": 0.6619, + "step": 64 + }, + { + "epoch": 0.17, + "learning_rate": 4.979892761394102e-05, + "loss": 0.3217, + "step": 65 + }, + { + "epoch": 0.17, + "learning_rate": 4.978552278820375e-05, + "loss": 0.2967, + "step": 66 + }, + { + "epoch": 0.18, + "learning_rate": 4.977211796246649e-05, + "loss": 0.2429, + "step": 67 + }, + { + "epoch": 0.18, + "learning_rate": 4.975871313672922e-05, + "loss": 0.6642, + "step": 68 + }, + { + "epoch": 0.18, + "learning_rate": 4.974530831099196e-05, + "loss": 0.56, + "step": 69 + }, + { + "epoch": 0.19, + "learning_rate": 4.973190348525469e-05, + "loss": 1.2979, + "step": 70 + }, + { + "epoch": 0.19, + "learning_rate": 4.9718498659517427e-05, + "loss": 0.5287, + "step": 71 + }, + { + "epoch": 0.19, + "learning_rate": 4.970509383378016e-05, + "loss": 0.4684, + "step": 72 + }, + { + "epoch": 0.19, + "learning_rate": 4.96916890080429e-05, + "loss": 0.283, + "step": 73 + }, + { + "epoch": 0.2, + "learning_rate": 4.967828418230563e-05, + "loss": 0.6818, + "step": 74 + }, + { + "epoch": 0.2, + "learning_rate": 4.966487935656837e-05, + "loss": 0.6141, + "step": 75 + }, + { + "epoch": 0.2, + "learning_rate": 4.96514745308311e-05, + "loss": 0.5046, + "step": 76 + }, + { + "epoch": 0.2, + "learning_rate": 4.963806970509384e-05, + "loss": 0.5266, + "step": 77 + }, + { + "epoch": 0.21, + "learning_rate": 4.962466487935657e-05, + "loss": 0.5944, + "step": 78 + }, + { + "epoch": 0.21, + "learning_rate": 4.961126005361931e-05, + "loss": 0.5631, + "step": 79 + }, + { + "epoch": 0.21, + "learning_rate": 4.959785522788204e-05, + "loss": 0.4791, + "step": 80 + }, + { + "epoch": 0.21, + "learning_rate": 4.958445040214477e-05, + "loss": 0.5645, + "step": 81 + }, + { + "epoch": 0.22, + "learning_rate": 4.957104557640751e-05, + "loss": 0.4349, + "step": 82 + }, + { + "epoch": 0.22, + "learning_rate": 4.955764075067024e-05, + "loss": 0.3865, + "step": 83 + }, + { + "epoch": 0.22, + "learning_rate": 4.954423592493298e-05, + "loss": 0.486, + "step": 84 + }, + { + "epoch": 0.22, + "learning_rate": 4.953083109919571e-05, + "loss": 0.2179, + "step": 85 + }, + { + "epoch": 0.23, + "learning_rate": 4.951742627345845e-05, + "loss": 0.3896, + "step": 86 + }, + { + "epoch": 0.23, + "learning_rate": 4.950402144772118e-05, + "loss": 0.4247, + "step": 87 + }, + { + "epoch": 0.23, + "learning_rate": 4.949061662198392e-05, + "loss": 0.4906, + "step": 88 + }, + { + "epoch": 0.24, + "learning_rate": 4.947721179624665e-05, + "loss": 0.4483, + "step": 89 + }, + { + "epoch": 0.24, + "learning_rate": 4.946380697050939e-05, + "loss": 0.557, + "step": 90 + }, + { + "epoch": 0.24, + "learning_rate": 4.9450402144772116e-05, + "loss": 0.7521, + "step": 91 + }, + { + "epoch": 0.24, + "learning_rate": 4.943699731903486e-05, + "loss": 0.3103, + "step": 92 + }, + { + "epoch": 0.25, + "learning_rate": 4.9423592493297586e-05, + "loss": 0.757, + "step": 93 + }, + { + "epoch": 0.25, + "learning_rate": 4.941018766756033e-05, + "loss": 0.8248, + "step": 94 + }, + { + "epoch": 0.25, + "learning_rate": 4.9396782841823056e-05, + "loss": 0.4591, + "step": 95 + }, + { + "epoch": 0.25, + "learning_rate": 4.938337801608579e-05, + "loss": 0.3912, + "step": 96 + }, + { + "epoch": 0.26, + "learning_rate": 4.9369973190348526e-05, + "loss": 0.5289, + "step": 97 + }, + { + "epoch": 0.26, + "learning_rate": 4.935656836461126e-05, + "loss": 0.3264, + "step": 98 + }, + { + "epoch": 0.26, + "learning_rate": 4.9343163538874e-05, + "loss": 0.2947, + "step": 99 + }, + { + "epoch": 0.26, + "learning_rate": 4.932975871313673e-05, + "loss": 0.2647, + "step": 100 + }, + { + "epoch": 0.27, + "learning_rate": 4.931635388739946e-05, + "loss": 0.3691, + "step": 101 + }, + { + "epoch": 0.27, + "learning_rate": 4.93029490616622e-05, + "loss": 0.4796, + "step": 102 + }, + { + "epoch": 0.27, + "learning_rate": 4.928954423592493e-05, + "loss": 0.4827, + "step": 103 + }, + { + "epoch": 0.28, + "learning_rate": 4.927613941018767e-05, + "loss": 0.2672, + "step": 104 + }, + { + "epoch": 0.28, + "learning_rate": 4.92627345844504e-05, + "loss": 0.7456, + "step": 105 + }, + { + "epoch": 0.28, + "learning_rate": 4.9249329758713136e-05, + "loss": 0.5206, + "step": 106 + }, + { + "epoch": 0.28, + "learning_rate": 4.923592493297587e-05, + "loss": 0.3576, + "step": 107 + }, + { + "epoch": 0.29, + "learning_rate": 4.9222520107238606e-05, + "loss": 0.2596, + "step": 108 + }, + { + "epoch": 0.29, + "learning_rate": 4.920911528150134e-05, + "loss": 0.4115, + "step": 109 + }, + { + "epoch": 0.29, + "learning_rate": 4.9195710455764076e-05, + "loss": 0.3481, + "step": 110 + }, + { + "epoch": 0.29, + "learning_rate": 4.918230563002681e-05, + "loss": 0.4387, + "step": 111 + }, + { + "epoch": 0.3, + "learning_rate": 4.916890080428955e-05, + "loss": 0.5023, + "step": 112 + }, + { + "epoch": 0.3, + "learning_rate": 4.915549597855228e-05, + "loss": 0.5916, + "step": 113 + }, + { + "epoch": 0.3, + "learning_rate": 4.914209115281502e-05, + "loss": 0.5467, + "step": 114 + }, + { + "epoch": 0.3, + "learning_rate": 4.912868632707775e-05, + "loss": 0.5631, + "step": 115 + }, + { + "epoch": 0.31, + "learning_rate": 4.911528150134049e-05, + "loss": 0.5512, + "step": 116 + }, + { + "epoch": 0.31, + "learning_rate": 4.910187667560322e-05, + "loss": 0.5546, + "step": 117 + }, + { + "epoch": 0.31, + "learning_rate": 4.908847184986595e-05, + "loss": 0.4209, + "step": 118 + }, + { + "epoch": 0.31, + "learning_rate": 4.907506702412869e-05, + "loss": 0.6064, + "step": 119 + }, + { + "epoch": 0.32, + "learning_rate": 4.906166219839142e-05, + "loss": 0.5301, + "step": 120 + }, + { + "epoch": 0.32, + "learning_rate": 4.904825737265416e-05, + "loss": 0.436, + "step": 121 + }, + { + "epoch": 0.32, + "learning_rate": 4.903485254691689e-05, + "loss": 0.726, + "step": 122 + }, + { + "epoch": 0.33, + "learning_rate": 4.9021447721179626e-05, + "loss": 0.5288, + "step": 123 + }, + { + "epoch": 0.33, + "learning_rate": 4.900804289544236e-05, + "loss": 0.4887, + "step": 124 + }, + { + "epoch": 0.33, + "learning_rate": 4.8994638069705097e-05, + "loss": 0.3752, + "step": 125 + }, + { + "epoch": 0.33, + "learning_rate": 4.898123324396783e-05, + "loss": 0.4895, + "step": 126 + }, + { + "epoch": 0.34, + "learning_rate": 4.896782841823057e-05, + "loss": 0.5046, + "step": 127 + }, + { + "epoch": 0.34, + "learning_rate": 4.8954423592493295e-05, + "loss": 0.3953, + "step": 128 + }, + { + "epoch": 0.34, + "learning_rate": 4.894101876675604e-05, + "loss": 0.2015, + "step": 129 + }, + { + "epoch": 0.34, + "learning_rate": 4.8927613941018765e-05, + "loss": 0.5165, + "step": 130 + }, + { + "epoch": 0.35, + "learning_rate": 4.891420911528151e-05, + "loss": 0.4237, + "step": 131 + }, + { + "epoch": 0.35, + "learning_rate": 4.8900804289544236e-05, + "loss": 0.239, + "step": 132 + }, + { + "epoch": 0.35, + "learning_rate": 4.888739946380697e-05, + "loss": 0.5515, + "step": 133 + }, + { + "epoch": 0.35, + "learning_rate": 4.8873994638069706e-05, + "loss": 0.303, + "step": 134 + }, + { + "epoch": 0.36, + "learning_rate": 4.886058981233244e-05, + "loss": 0.2867, + "step": 135 + }, + { + "epoch": 0.36, + "learning_rate": 4.8847184986595176e-05, + "loss": 0.6756, + "step": 136 + }, + { + "epoch": 0.36, + "learning_rate": 4.883378016085791e-05, + "loss": 0.4996, + "step": 137 + }, + { + "epoch": 0.37, + "learning_rate": 4.8820375335120646e-05, + "loss": 0.2798, + "step": 138 + }, + { + "epoch": 0.37, + "learning_rate": 4.880697050938338e-05, + "loss": 0.8877, + "step": 139 + }, + { + "epoch": 0.37, + "learning_rate": 4.879356568364612e-05, + "loss": 0.5022, + "step": 140 + }, + { + "epoch": 0.37, + "learning_rate": 4.878016085790885e-05, + "loss": 0.2177, + "step": 141 + }, + { + "epoch": 0.38, + "learning_rate": 4.876675603217159e-05, + "loss": 0.8708, + "step": 142 + }, + { + "epoch": 0.38, + "learning_rate": 4.8753351206434315e-05, + "loss": 0.719, + "step": 143 + }, + { + "epoch": 0.38, + "learning_rate": 4.873994638069706e-05, + "loss": 0.562, + "step": 144 + }, + { + "epoch": 0.38, + "learning_rate": 4.8726541554959786e-05, + "loss": 0.492, + "step": 145 + }, + { + "epoch": 0.39, + "learning_rate": 4.871313672922253e-05, + "loss": 0.4637, + "step": 146 + }, + { + "epoch": 0.39, + "learning_rate": 4.8699731903485256e-05, + "loss": 0.4132, + "step": 147 + }, + { + "epoch": 0.39, + "learning_rate": 4.868632707774799e-05, + "loss": 0.2889, + "step": 148 + }, + { + "epoch": 0.39, + "learning_rate": 4.8672922252010726e-05, + "loss": 0.2213, + "step": 149 + }, + { + "epoch": 0.4, + "learning_rate": 4.865951742627346e-05, + "loss": 0.9268, + "step": 150 + }, + { + "epoch": 0.4, + "learning_rate": 4.8646112600536196e-05, + "loss": 0.2852, + "step": 151 + }, + { + "epoch": 0.4, + "learning_rate": 4.863270777479893e-05, + "loss": 0.4599, + "step": 152 + }, + { + "epoch": 0.4, + "learning_rate": 4.861930294906166e-05, + "loss": 0.1913, + "step": 153 + }, + { + "epoch": 0.41, + "learning_rate": 4.86058981233244e-05, + "loss": 0.4488, + "step": 154 + }, + { + "epoch": 0.41, + "learning_rate": 4.859249329758713e-05, + "loss": 0.9022, + "step": 155 + }, + { + "epoch": 0.41, + "learning_rate": 4.857908847184987e-05, + "loss": 0.5221, + "step": 156 + }, + { + "epoch": 0.42, + "learning_rate": 4.85656836461126e-05, + "loss": 0.2394, + "step": 157 + }, + { + "epoch": 0.42, + "learning_rate": 4.8552278820375336e-05, + "loss": 0.3332, + "step": 158 + }, + { + "epoch": 0.42, + "learning_rate": 4.853887399463807e-05, + "loss": 0.4015, + "step": 159 + }, + { + "epoch": 0.42, + "learning_rate": 4.8525469168900806e-05, + "loss": 0.4461, + "step": 160 + }, + { + "epoch": 0.43, + "learning_rate": 4.851206434316354e-05, + "loss": 0.337, + "step": 161 + }, + { + "epoch": 0.43, + "learning_rate": 4.8498659517426276e-05, + "loss": 0.4908, + "step": 162 + }, + { + "epoch": 0.43, + "learning_rate": 4.848525469168901e-05, + "loss": 0.526, + "step": 163 + }, + { + "epoch": 0.43, + "learning_rate": 4.8471849865951746e-05, + "loss": 0.5262, + "step": 164 + }, + { + "epoch": 0.44, + "learning_rate": 4.845844504021448e-05, + "loss": 0.6818, + "step": 165 + }, + { + "epoch": 0.44, + "learning_rate": 4.8445040214477217e-05, + "loss": 0.3154, + "step": 166 + }, + { + "epoch": 0.44, + "learning_rate": 4.843163538873995e-05, + "loss": 0.5963, + "step": 167 + }, + { + "epoch": 0.44, + "learning_rate": 4.841823056300268e-05, + "loss": 0.4451, + "step": 168 + }, + { + "epoch": 0.45, + "learning_rate": 4.840482573726542e-05, + "loss": 0.5969, + "step": 169 + }, + { + "epoch": 0.45, + "learning_rate": 4.839142091152815e-05, + "loss": 0.438, + "step": 170 + }, + { + "epoch": 0.45, + "learning_rate": 4.837801608579089e-05, + "loss": 0.4827, + "step": 171 + }, + { + "epoch": 0.46, + "learning_rate": 4.836461126005362e-05, + "loss": 0.2029, + "step": 172 + }, + { + "epoch": 0.46, + "learning_rate": 4.8351206434316356e-05, + "loss": 0.5195, + "step": 173 + }, + { + "epoch": 0.46, + "learning_rate": 4.833780160857909e-05, + "loss": 0.517, + "step": 174 + }, + { + "epoch": 0.46, + "learning_rate": 4.8324396782841826e-05, + "loss": 0.5532, + "step": 175 + }, + { + "epoch": 0.47, + "learning_rate": 4.831099195710456e-05, + "loss": 0.4198, + "step": 176 + }, + { + "epoch": 0.47, + "learning_rate": 4.8297587131367296e-05, + "loss": 0.8386, + "step": 177 + }, + { + "epoch": 0.47, + "learning_rate": 4.8284182305630025e-05, + "loss": 0.575, + "step": 178 + }, + { + "epoch": 0.47, + "learning_rate": 4.8270777479892766e-05, + "loss": 0.6156, + "step": 179 + }, + { + "epoch": 0.48, + "learning_rate": 4.8257372654155495e-05, + "loss": 0.7044, + "step": 180 + }, + { + "epoch": 0.48, + "learning_rate": 4.824396782841824e-05, + "loss": 0.5712, + "step": 181 + }, + { + "epoch": 0.48, + "learning_rate": 4.8230563002680965e-05, + "loss": 0.34, + "step": 182 + }, + { + "epoch": 0.48, + "learning_rate": 4.82171581769437e-05, + "loss": 0.5773, + "step": 183 + }, + { + "epoch": 0.49, + "learning_rate": 4.8203753351206435e-05, + "loss": 0.546, + "step": 184 + }, + { + "epoch": 0.49, + "learning_rate": 4.819034852546917e-05, + "loss": 0.3955, + "step": 185 + }, + { + "epoch": 0.49, + "learning_rate": 4.8176943699731906e-05, + "loss": 0.5921, + "step": 186 + }, + { + "epoch": 0.49, + "learning_rate": 4.816353887399464e-05, + "loss": 0.3108, + "step": 187 + }, + { + "epoch": 0.5, + "learning_rate": 4.8150134048257376e-05, + "loss": 0.5469, + "step": 188 + }, + { + "epoch": 0.5, + "learning_rate": 4.813672922252011e-05, + "loss": 0.64, + "step": 189 + }, + { + "epoch": 0.5, + "learning_rate": 4.8123324396782846e-05, + "loss": 0.5153, + "step": 190 + }, + { + "epoch": 0.51, + "learning_rate": 4.810991957104558e-05, + "loss": 0.4719, + "step": 191 + }, + { + "epoch": 0.51, + "learning_rate": 4.8096514745308316e-05, + "loss": 0.52, + "step": 192 + }, + { + "epoch": 0.51, + "learning_rate": 4.8083109919571045e-05, + "loss": 0.5114, + "step": 193 + }, + { + "epoch": 0.51, + "learning_rate": 4.806970509383379e-05, + "loss": 0.5469, + "step": 194 + }, + { + "epoch": 0.52, + "learning_rate": 4.8056300268096515e-05, + "loss": 0.3435, + "step": 195 + }, + { + "epoch": 0.52, + "learning_rate": 4.804289544235926e-05, + "loss": 0.6469, + "step": 196 + }, + { + "epoch": 0.52, + "learning_rate": 4.8029490616621985e-05, + "loss": 0.6595, + "step": 197 + }, + { + "epoch": 0.52, + "learning_rate": 4.801608579088472e-05, + "loss": 0.5503, + "step": 198 + }, + { + "epoch": 0.53, + "learning_rate": 4.8002680965147456e-05, + "loss": 0.3799, + "step": 199 + }, + { + "epoch": 0.53, + "learning_rate": 4.798927613941019e-05, + "loss": 0.417, + "step": 200 + }, + { + "epoch": 0.53, + "learning_rate": 4.7975871313672926e-05, + "loss": 0.5281, + "step": 201 + }, + { + "epoch": 0.53, + "learning_rate": 4.796246648793566e-05, + "loss": 0.3439, + "step": 202 + }, + { + "epoch": 0.54, + "learning_rate": 4.794906166219839e-05, + "loss": 0.5777, + "step": 203 + }, + { + "epoch": 0.54, + "learning_rate": 4.793565683646113e-05, + "loss": 0.5286, + "step": 204 + }, + { + "epoch": 0.54, + "learning_rate": 4.792225201072386e-05, + "loss": 0.4302, + "step": 205 + }, + { + "epoch": 0.54, + "learning_rate": 4.79088471849866e-05, + "loss": 0.5413, + "step": 206 + }, + { + "epoch": 0.55, + "learning_rate": 4.789544235924933e-05, + "loss": 0.3087, + "step": 207 + }, + { + "epoch": 0.55, + "learning_rate": 4.7882037533512065e-05, + "loss": 0.4385, + "step": 208 + }, + { + "epoch": 0.55, + "learning_rate": 4.78686327077748e-05, + "loss": 0.7137, + "step": 209 + }, + { + "epoch": 0.56, + "learning_rate": 4.7855227882037535e-05, + "loss": 0.6278, + "step": 210 + }, + { + "epoch": 0.56, + "learning_rate": 4.784182305630027e-05, + "loss": 0.2832, + "step": 211 + }, + { + "epoch": 0.56, + "learning_rate": 4.7828418230563005e-05, + "loss": 0.5899, + "step": 212 + }, + { + "epoch": 0.56, + "learning_rate": 4.7815013404825734e-05, + "loss": 0.4421, + "step": 213 + }, + { + "epoch": 0.57, + "learning_rate": 4.7801608579088476e-05, + "loss": 0.3411, + "step": 214 + }, + { + "epoch": 0.57, + "learning_rate": 4.7788203753351204e-05, + "loss": 0.4236, + "step": 215 + }, + { + "epoch": 0.57, + "learning_rate": 4.7774798927613946e-05, + "loss": 0.4444, + "step": 216 + }, + { + "epoch": 0.57, + "learning_rate": 4.7761394101876674e-05, + "loss": 0.4765, + "step": 217 + }, + { + "epoch": 0.58, + "learning_rate": 4.774798927613941e-05, + "loss": 0.2453, + "step": 218 + }, + { + "epoch": 0.58, + "learning_rate": 4.7734584450402145e-05, + "loss": 0.2765, + "step": 219 + }, + { + "epoch": 0.58, + "learning_rate": 4.772117962466488e-05, + "loss": 0.2075, + "step": 220 + }, + { + "epoch": 0.58, + "learning_rate": 4.7707774798927615e-05, + "loss": 0.5905, + "step": 221 + }, + { + "epoch": 0.59, + "learning_rate": 4.769436997319035e-05, + "loss": 0.3457, + "step": 222 + }, + { + "epoch": 0.59, + "learning_rate": 4.7680965147453085e-05, + "loss": 0.5986, + "step": 223 + }, + { + "epoch": 0.59, + "learning_rate": 4.766756032171582e-05, + "loss": 0.3881, + "step": 224 + }, + { + "epoch": 0.6, + "learning_rate": 4.7654155495978555e-05, + "loss": 0.5655, + "step": 225 + }, + { + "epoch": 0.6, + "learning_rate": 4.764075067024129e-05, + "loss": 0.6156, + "step": 226 + }, + { + "epoch": 0.6, + "learning_rate": 4.7627345844504026e-05, + "loss": 0.4784, + "step": 227 + }, + { + "epoch": 0.6, + "learning_rate": 4.7613941018766754e-05, + "loss": 0.5195, + "step": 228 + }, + { + "epoch": 0.61, + "learning_rate": 4.7600536193029496e-05, + "loss": 0.66, + "step": 229 + }, + { + "epoch": 0.61, + "learning_rate": 4.7587131367292224e-05, + "loss": 0.4104, + "step": 230 + }, + { + "epoch": 0.61, + "learning_rate": 4.7573726541554966e-05, + "loss": 0.3478, + "step": 231 + }, + { + "epoch": 0.61, + "learning_rate": 4.7560321715817695e-05, + "loss": 0.2037, + "step": 232 + }, + { + "epoch": 0.62, + "learning_rate": 4.754691689008043e-05, + "loss": 0.75, + "step": 233 + }, + { + "epoch": 0.62, + "learning_rate": 4.7533512064343165e-05, + "loss": 0.4237, + "step": 234 + }, + { + "epoch": 0.62, + "learning_rate": 4.75201072386059e-05, + "loss": 0.2372, + "step": 235 + }, + { + "epoch": 0.62, + "learning_rate": 4.7506702412868635e-05, + "loss": 0.7874, + "step": 236 + }, + { + "epoch": 0.63, + "learning_rate": 4.749329758713137e-05, + "loss": 0.5751, + "step": 237 + }, + { + "epoch": 0.63, + "learning_rate": 4.7479892761394105e-05, + "loss": 0.5801, + "step": 238 + }, + { + "epoch": 0.63, + "learning_rate": 4.746648793565684e-05, + "loss": 0.4983, + "step": 239 + }, + { + "epoch": 0.63, + "learning_rate": 4.745308310991957e-05, + "loss": 0.4215, + "step": 240 + }, + { + "epoch": 0.64, + "learning_rate": 4.743967828418231e-05, + "loss": 0.3655, + "step": 241 + }, + { + "epoch": 0.64, + "learning_rate": 4.742627345844504e-05, + "loss": 0.523, + "step": 242 + }, + { + "epoch": 0.64, + "learning_rate": 4.741286863270778e-05, + "loss": 0.5952, + "step": 243 + }, + { + "epoch": 0.65, + "learning_rate": 4.739946380697051e-05, + "loss": 0.4226, + "step": 244 + }, + { + "epoch": 0.65, + "learning_rate": 4.7386058981233244e-05, + "loss": 0.316, + "step": 245 + }, + { + "epoch": 0.65, + "learning_rate": 4.737265415549598e-05, + "loss": 0.5096, + "step": 246 + }, + { + "epoch": 0.65, + "learning_rate": 4.7359249329758715e-05, + "loss": 0.435, + "step": 247 + }, + { + "epoch": 0.66, + "learning_rate": 4.734584450402145e-05, + "loss": 0.7516, + "step": 248 + }, + { + "epoch": 0.66, + "learning_rate": 4.7332439678284185e-05, + "loss": 0.531, + "step": 249 + }, + { + "epoch": 0.66, + "learning_rate": 4.731903485254692e-05, + "loss": 0.4096, + "step": 250 + }, + { + "epoch": 0.66, + "learning_rate": 4.7305630026809655e-05, + "loss": 0.387, + "step": 251 + }, + { + "epoch": 0.67, + "learning_rate": 4.729222520107239e-05, + "loss": 0.5468, + "step": 252 + }, + { + "epoch": 0.67, + "learning_rate": 4.7278820375335125e-05, + "loss": 0.4613, + "step": 253 + }, + { + "epoch": 0.67, + "learning_rate": 4.726541554959786e-05, + "loss": 0.8437, + "step": 254 + }, + { + "epoch": 0.67, + "learning_rate": 4.725201072386059e-05, + "loss": 0.522, + "step": 255 + }, + { + "epoch": 0.68, + "learning_rate": 4.723860589812333e-05, + "loss": 0.3922, + "step": 256 + }, + { + "epoch": 0.68, + "learning_rate": 4.722520107238606e-05, + "loss": 0.5114, + "step": 257 + }, + { + "epoch": 0.68, + "learning_rate": 4.72117962466488e-05, + "loss": 0.6148, + "step": 258 + }, + { + "epoch": 0.69, + "learning_rate": 4.719839142091153e-05, + "loss": 0.4578, + "step": 259 + }, + { + "epoch": 0.69, + "learning_rate": 4.7184986595174265e-05, + "loss": 0.6286, + "step": 260 + }, + { + "epoch": 0.69, + "learning_rate": 4.7171581769437e-05, + "loss": 0.5883, + "step": 261 + }, + { + "epoch": 0.69, + "learning_rate": 4.7158176943699735e-05, + "loss": 0.5634, + "step": 262 + }, + { + "epoch": 0.7, + "learning_rate": 4.714477211796247e-05, + "loss": 0.4085, + "step": 263 + }, + { + "epoch": 0.7, + "learning_rate": 4.7131367292225205e-05, + "loss": 0.2988, + "step": 264 + }, + { + "epoch": 0.7, + "learning_rate": 4.7117962466487934e-05, + "loss": 0.6353, + "step": 265 + }, + { + "epoch": 0.7, + "learning_rate": 4.7104557640750675e-05, + "loss": 0.4598, + "step": 266 + }, + { + "epoch": 0.71, + "learning_rate": 4.7091152815013404e-05, + "loss": 0.5072, + "step": 267 + }, + { + "epoch": 0.71, + "learning_rate": 4.7077747989276146e-05, + "loss": 0.49, + "step": 268 + }, + { + "epoch": 0.71, + "learning_rate": 4.7064343163538874e-05, + "loss": 0.7225, + "step": 269 + }, + { + "epoch": 0.71, + "learning_rate": 4.705093833780161e-05, + "loss": 0.5332, + "step": 270 + }, + { + "epoch": 0.72, + "learning_rate": 4.7037533512064344e-05, + "loss": 0.6064, + "step": 271 + }, + { + "epoch": 0.72, + "learning_rate": 4.702412868632708e-05, + "loss": 0.3518, + "step": 272 + }, + { + "epoch": 0.72, + "learning_rate": 4.7010723860589815e-05, + "loss": 0.3673, + "step": 273 + }, + { + "epoch": 0.72, + "learning_rate": 4.699731903485255e-05, + "loss": 0.4688, + "step": 274 + }, + { + "epoch": 0.73, + "learning_rate": 4.6983914209115285e-05, + "loss": 0.5389, + "step": 275 + }, + { + "epoch": 0.73, + "learning_rate": 4.697050938337802e-05, + "loss": 0.374, + "step": 276 + }, + { + "epoch": 0.73, + "learning_rate": 4.6957104557640755e-05, + "loss": 0.475, + "step": 277 + }, + { + "epoch": 0.74, + "learning_rate": 4.694369973190349e-05, + "loss": 0.5397, + "step": 278 + }, + { + "epoch": 0.74, + "learning_rate": 4.6930294906166225e-05, + "loss": 0.3821, + "step": 279 + }, + { + "epoch": 0.74, + "learning_rate": 4.6916890080428954e-05, + "loss": 0.3372, + "step": 280 + }, + { + "epoch": 0.74, + "learning_rate": 4.6903485254691696e-05, + "loss": 0.6652, + "step": 281 + }, + { + "epoch": 0.75, + "learning_rate": 4.6890080428954424e-05, + "loss": 0.2894, + "step": 282 + }, + { + "epoch": 0.75, + "learning_rate": 4.6876675603217166e-05, + "loss": 0.5639, + "step": 283 + }, + { + "epoch": 0.75, + "learning_rate": 4.6863270777479894e-05, + "loss": 0.353, + "step": 284 + }, + { + "epoch": 0.75, + "learning_rate": 4.684986595174263e-05, + "loss": 0.2932, + "step": 285 + }, + { + "epoch": 0.76, + "learning_rate": 4.6836461126005364e-05, + "loss": 0.467, + "step": 286 + }, + { + "epoch": 0.76, + "learning_rate": 4.68230563002681e-05, + "loss": 0.4732, + "step": 287 + }, + { + "epoch": 0.76, + "learning_rate": 4.6809651474530835e-05, + "loss": 0.1808, + "step": 288 + }, + { + "epoch": 0.76, + "learning_rate": 4.679624664879357e-05, + "loss": 0.6031, + "step": 289 + }, + { + "epoch": 0.77, + "learning_rate": 4.67828418230563e-05, + "loss": 0.2555, + "step": 290 + }, + { + "epoch": 0.77, + "learning_rate": 4.676943699731904e-05, + "loss": 0.4041, + "step": 291 + }, + { + "epoch": 0.77, + "learning_rate": 4.675603217158177e-05, + "loss": 0.7822, + "step": 292 + }, + { + "epoch": 0.78, + "learning_rate": 4.674262734584451e-05, + "loss": 0.138, + "step": 293 + }, + { + "epoch": 0.78, + "learning_rate": 4.672922252010724e-05, + "loss": 0.2746, + "step": 294 + }, + { + "epoch": 0.78, + "learning_rate": 4.6715817694369974e-05, + "loss": 0.5835, + "step": 295 + }, + { + "epoch": 0.78, + "learning_rate": 4.670241286863271e-05, + "loss": 0.2367, + "step": 296 + }, + { + "epoch": 0.79, + "learning_rate": 4.6689008042895444e-05, + "loss": 0.3247, + "step": 297 + }, + { + "epoch": 0.79, + "learning_rate": 4.667560321715818e-05, + "loss": 0.306, + "step": 298 + }, + { + "epoch": 0.79, + "learning_rate": 4.6662198391420914e-05, + "loss": 0.2825, + "step": 299 + }, + { + "epoch": 0.79, + "learning_rate": 4.664879356568364e-05, + "loss": 0.6102, + "step": 300 + }, + { + "epoch": 0.8, + "learning_rate": 4.6635388739946385e-05, + "loss": 0.3613, + "step": 301 + }, + { + "epoch": 0.8, + "learning_rate": 4.662198391420911e-05, + "loss": 0.6327, + "step": 302 + }, + { + "epoch": 0.8, + "learning_rate": 4.6608579088471855e-05, + "loss": 0.4059, + "step": 303 + }, + { + "epoch": 0.8, + "learning_rate": 4.659517426273458e-05, + "loss": 0.4027, + "step": 304 + }, + { + "epoch": 0.81, + "learning_rate": 4.658176943699732e-05, + "loss": 0.9133, + "step": 305 + }, + { + "epoch": 0.81, + "learning_rate": 4.6568364611260054e-05, + "loss": 0.1869, + "step": 306 + }, + { + "epoch": 0.81, + "learning_rate": 4.655495978552279e-05, + "loss": 0.3987, + "step": 307 + }, + { + "epoch": 0.81, + "learning_rate": 4.6541554959785524e-05, + "loss": 0.6114, + "step": 308 + }, + { + "epoch": 0.82, + "learning_rate": 4.652815013404826e-05, + "loss": 0.4406, + "step": 309 + }, + { + "epoch": 0.82, + "learning_rate": 4.6514745308310994e-05, + "loss": 0.2954, + "step": 310 + }, + { + "epoch": 0.82, + "learning_rate": 4.650134048257373e-05, + "loss": 0.5067, + "step": 311 + }, + { + "epoch": 0.83, + "learning_rate": 4.6487935656836464e-05, + "loss": 0.3985, + "step": 312 + }, + { + "epoch": 0.83, + "learning_rate": 4.64745308310992e-05, + "loss": 0.3756, + "step": 313 + }, + { + "epoch": 0.83, + "learning_rate": 4.6461126005361935e-05, + "loss": 0.2618, + "step": 314 + }, + { + "epoch": 0.83, + "learning_rate": 4.644772117962466e-05, + "loss": 0.4992, + "step": 315 + }, + { + "epoch": 0.84, + "learning_rate": 4.6434316353887405e-05, + "loss": 0.8224, + "step": 316 + }, + { + "epoch": 0.84, + "learning_rate": 4.642091152815013e-05, + "loss": 0.3425, + "step": 317 + }, + { + "epoch": 0.84, + "learning_rate": 4.6407506702412875e-05, + "loss": 0.4062, + "step": 318 + }, + { + "epoch": 0.84, + "learning_rate": 4.6394101876675603e-05, + "loss": 0.4748, + "step": 319 + }, + { + "epoch": 0.85, + "learning_rate": 4.638069705093834e-05, + "loss": 0.6857, + "step": 320 + }, + { + "epoch": 0.85, + "learning_rate": 4.6367292225201074e-05, + "loss": 0.5368, + "step": 321 + }, + { + "epoch": 0.85, + "learning_rate": 4.635388739946381e-05, + "loss": 0.5571, + "step": 322 + }, + { + "epoch": 0.85, + "learning_rate": 4.6340482573726544e-05, + "loss": 0.3045, + "step": 323 + }, + { + "epoch": 0.86, + "learning_rate": 4.632707774798928e-05, + "loss": 0.3189, + "step": 324 + }, + { + "epoch": 0.86, + "learning_rate": 4.631367292225201e-05, + "loss": 0.3033, + "step": 325 + }, + { + "epoch": 0.86, + "learning_rate": 4.630026809651475e-05, + "loss": 0.4659, + "step": 326 + }, + { + "epoch": 0.87, + "learning_rate": 4.628686327077748e-05, + "loss": 0.4058, + "step": 327 + }, + { + "epoch": 0.87, + "learning_rate": 4.627345844504022e-05, + "loss": 0.6745, + "step": 328 + }, + { + "epoch": 0.87, + "learning_rate": 4.626005361930295e-05, + "loss": 0.3259, + "step": 329 + }, + { + "epoch": 0.87, + "learning_rate": 4.624664879356568e-05, + "loss": 0.5126, + "step": 330 + }, + { + "epoch": 0.88, + "learning_rate": 4.623324396782842e-05, + "loss": 0.2759, + "step": 331 + }, + { + "epoch": 0.88, + "learning_rate": 4.621983914209115e-05, + "loss": 0.2512, + "step": 332 + }, + { + "epoch": 0.88, + "learning_rate": 4.620643431635389e-05, + "loss": 0.3046, + "step": 333 + }, + { + "epoch": 0.88, + "learning_rate": 4.6193029490616624e-05, + "loss": 0.3931, + "step": 334 + }, + { + "epoch": 0.89, + "learning_rate": 4.617962466487936e-05, + "loss": 0.4838, + "step": 335 + }, + { + "epoch": 0.89, + "learning_rate": 4.6166219839142094e-05, + "loss": 0.2925, + "step": 336 + }, + { + "epoch": 0.89, + "learning_rate": 4.615281501340483e-05, + "loss": 0.4481, + "step": 337 + }, + { + "epoch": 0.89, + "learning_rate": 4.6139410187667564e-05, + "loss": 0.4528, + "step": 338 + }, + { + "epoch": 0.9, + "learning_rate": 4.61260053619303e-05, + "loss": 0.2934, + "step": 339 + }, + { + "epoch": 0.9, + "learning_rate": 4.611260053619303e-05, + "loss": 0.609, + "step": 340 + }, + { + "epoch": 0.9, + "learning_rate": 4.609919571045577e-05, + "loss": 0.8988, + "step": 341 + }, + { + "epoch": 0.9, + "learning_rate": 4.60857908847185e-05, + "loss": 1.1222, + "step": 342 + }, + { + "epoch": 0.91, + "learning_rate": 4.607238605898124e-05, + "loss": 0.3265, + "step": 343 + }, + { + "epoch": 0.91, + "learning_rate": 4.605898123324397e-05, + "loss": 0.4722, + "step": 344 + }, + { + "epoch": 0.91, + "learning_rate": 4.60455764075067e-05, + "loss": 0.2791, + "step": 345 + }, + { + "epoch": 0.92, + "learning_rate": 4.603217158176944e-05, + "loss": 0.4183, + "step": 346 + }, + { + "epoch": 0.92, + "learning_rate": 4.6018766756032174e-05, + "loss": 0.5323, + "step": 347 + }, + { + "epoch": 0.92, + "learning_rate": 4.600536193029491e-05, + "loss": 0.6108, + "step": 348 + }, + { + "epoch": 0.92, + "learning_rate": 4.5991957104557644e-05, + "loss": 0.2875, + "step": 349 + }, + { + "epoch": 0.93, + "learning_rate": 4.597855227882037e-05, + "loss": 0.6642, + "step": 350 + }, + { + "epoch": 0.93, + "learning_rate": 4.5965147453083114e-05, + "loss": 0.5244, + "step": 351 + }, + { + "epoch": 0.93, + "learning_rate": 4.595174262734584e-05, + "loss": 0.5562, + "step": 352 + }, + { + "epoch": 0.93, + "learning_rate": 4.5938337801608584e-05, + "loss": 0.3972, + "step": 353 + }, + { + "epoch": 0.94, + "learning_rate": 4.592493297587131e-05, + "loss": 0.4532, + "step": 354 + }, + { + "epoch": 0.94, + "learning_rate": 4.591152815013405e-05, + "loss": 0.3368, + "step": 355 + }, + { + "epoch": 0.94, + "learning_rate": 4.589812332439678e-05, + "loss": 0.171, + "step": 356 + }, + { + "epoch": 0.94, + "learning_rate": 4.588471849865952e-05, + "loss": 0.4036, + "step": 357 + }, + { + "epoch": 0.95, + "learning_rate": 4.587131367292225e-05, + "loss": 0.4305, + "step": 358 + }, + { + "epoch": 0.95, + "learning_rate": 4.585790884718499e-05, + "loss": 0.2643, + "step": 359 + }, + { + "epoch": 0.95, + "learning_rate": 4.5844504021447723e-05, + "loss": 0.3782, + "step": 360 + }, + { + "epoch": 0.96, + "learning_rate": 4.583109919571046e-05, + "loss": 0.1673, + "step": 361 + }, + { + "epoch": 0.96, + "learning_rate": 4.5817694369973194e-05, + "loss": 0.3969, + "step": 362 + }, + { + "epoch": 0.96, + "learning_rate": 4.580428954423593e-05, + "loss": 0.3249, + "step": 363 + }, + { + "epoch": 0.96, + "learning_rate": 4.5790884718498664e-05, + "loss": 0.1656, + "step": 364 + }, + { + "epoch": 0.97, + "learning_rate": 4.57774798927614e-05, + "loss": 0.4551, + "step": 365 + }, + { + "epoch": 0.97, + "learning_rate": 4.5764075067024134e-05, + "loss": 0.6075, + "step": 366 + }, + { + "epoch": 0.97, + "learning_rate": 4.575067024128686e-05, + "loss": 0.4699, + "step": 367 + }, + { + "epoch": 0.97, + "learning_rate": 4.5737265415549605e-05, + "loss": 0.5752, + "step": 368 + }, + { + "epoch": 0.98, + "learning_rate": 4.572386058981233e-05, + "loss": 0.3114, + "step": 369 + }, + { + "epoch": 0.98, + "learning_rate": 4.5710455764075075e-05, + "loss": 0.7407, + "step": 370 + }, + { + "epoch": 0.98, + "learning_rate": 4.56970509383378e-05, + "loss": 0.6427, + "step": 371 + }, + { + "epoch": 0.98, + "learning_rate": 4.568364611260054e-05, + "loss": 0.5021, + "step": 372 + }, + { + "epoch": 0.99, + "learning_rate": 4.5670241286863273e-05, + "loss": 0.4209, + "step": 373 + }, + { + "epoch": 0.99, + "learning_rate": 4.565683646112601e-05, + "loss": 0.5957, + "step": 374 + }, + { + "epoch": 0.99, + "learning_rate": 4.5643431635388744e-05, + "loss": 0.495, + "step": 375 + }, + { + "epoch": 0.99, + "learning_rate": 4.563002680965148e-05, + "loss": 0.3101, + "step": 376 + }, + { + "epoch": 1.0, + "learning_rate": 4.561662198391421e-05, + "loss": 0.472, + "step": 377 + }, + { + "epoch": 1.0, + "learning_rate": 4.560321715817695e-05, + "loss": 0.4607, + "step": 378 + }, + { + "epoch": 1.0, + "eval_f1": 0.7914963205233032, + "eval_loss": 0.40209120512008667, + "eval_runtime": 2.1585, + "eval_samples_per_second": 700.949, + "eval_steps_per_second": 44.012, + "step": 378 + }, + { + "epoch": 1.0, + "learning_rate": 4.558981233243968e-05, + "loss": 0.2422, + "step": 379 + }, + { + "epoch": 1.01, + "learning_rate": 4.557640750670242e-05, + "loss": 0.2074, + "step": 380 + }, + { + "epoch": 1.01, + "learning_rate": 4.556300268096515e-05, + "loss": 0.4469, + "step": 381 + }, + { + "epoch": 1.01, + "learning_rate": 4.554959785522788e-05, + "loss": 0.3121, + "step": 382 + }, + { + "epoch": 1.01, + "learning_rate": 4.553619302949062e-05, + "loss": 0.3634, + "step": 383 + }, + { + "epoch": 1.02, + "learning_rate": 4.552278820375335e-05, + "loss": 0.2139, + "step": 384 + }, + { + "epoch": 1.02, + "learning_rate": 4.550938337801609e-05, + "loss": 0.5273, + "step": 385 + }, + { + "epoch": 1.02, + "learning_rate": 4.549597855227882e-05, + "loss": 0.1228, + "step": 386 + }, + { + "epoch": 1.02, + "learning_rate": 4.548257372654156e-05, + "loss": 0.2459, + "step": 387 + }, + { + "epoch": 1.03, + "learning_rate": 4.5469168900804294e-05, + "loss": 0.443, + "step": 388 + }, + { + "epoch": 1.03, + "learning_rate": 4.545576407506703e-05, + "loss": 0.364, + "step": 389 + }, + { + "epoch": 1.03, + "learning_rate": 4.5442359249329764e-05, + "loss": 0.4071, + "step": 390 + }, + { + "epoch": 1.03, + "learning_rate": 4.54289544235925e-05, + "loss": 0.3876, + "step": 391 + }, + { + "epoch": 1.04, + "learning_rate": 4.541554959785523e-05, + "loss": 0.5239, + "step": 392 + }, + { + "epoch": 1.04, + "learning_rate": 4.540214477211797e-05, + "loss": 0.4548, + "step": 393 + }, + { + "epoch": 1.04, + "learning_rate": 4.53887399463807e-05, + "loss": 0.3588, + "step": 394 + }, + { + "epoch": 1.04, + "learning_rate": 4.537533512064344e-05, + "loss": 0.2693, + "step": 395 + }, + { + "epoch": 1.05, + "learning_rate": 4.536193029490617e-05, + "loss": 0.3773, + "step": 396 + }, + { + "epoch": 1.05, + "learning_rate": 4.53485254691689e-05, + "loss": 0.3982, + "step": 397 + }, + { + "epoch": 1.05, + "learning_rate": 4.533512064343164e-05, + "loss": 0.2074, + "step": 398 + }, + { + "epoch": 1.06, + "learning_rate": 4.532171581769437e-05, + "loss": 0.3322, + "step": 399 + }, + { + "epoch": 1.06, + "learning_rate": 4.530831099195711e-05, + "loss": 0.118, + "step": 400 + }, + { + "epoch": 1.06, + "learning_rate": 4.5294906166219844e-05, + "loss": 0.6216, + "step": 401 + }, + { + "epoch": 1.06, + "learning_rate": 4.528150134048257e-05, + "loss": 0.4028, + "step": 402 + }, + { + "epoch": 1.07, + "learning_rate": 4.5268096514745314e-05, + "loss": 0.3179, + "step": 403 + }, + { + "epoch": 1.07, + "learning_rate": 4.525469168900804e-05, + "loss": 0.2815, + "step": 404 + }, + { + "epoch": 1.07, + "learning_rate": 4.5241286863270784e-05, + "loss": 0.2366, + "step": 405 + }, + { + "epoch": 1.07, + "learning_rate": 4.522788203753351e-05, + "loss": 0.14, + "step": 406 + }, + { + "epoch": 1.08, + "learning_rate": 4.521447721179625e-05, + "loss": 0.1255, + "step": 407 + }, + { + "epoch": 1.08, + "learning_rate": 4.520107238605898e-05, + "loss": 0.1482, + "step": 408 + }, + { + "epoch": 1.08, + "learning_rate": 4.518766756032172e-05, + "loss": 0.665, + "step": 409 + }, + { + "epoch": 1.08, + "learning_rate": 4.517426273458445e-05, + "loss": 0.2535, + "step": 410 + }, + { + "epoch": 1.09, + "learning_rate": 4.516085790884719e-05, + "loss": 0.1716, + "step": 411 + }, + { + "epoch": 1.09, + "learning_rate": 4.5147453083109916e-05, + "loss": 0.0765, + "step": 412 + }, + { + "epoch": 1.09, + "learning_rate": 4.513404825737266e-05, + "loss": 0.1238, + "step": 413 + }, + { + "epoch": 1.1, + "learning_rate": 4.512064343163539e-05, + "loss": 0.3877, + "step": 414 + }, + { + "epoch": 1.1, + "learning_rate": 4.510723860589813e-05, + "loss": 0.206, + "step": 415 + }, + { + "epoch": 1.1, + "learning_rate": 4.509383378016086e-05, + "loss": 0.1822, + "step": 416 + }, + { + "epoch": 1.1, + "learning_rate": 4.508042895442359e-05, + "loss": 0.2294, + "step": 417 + }, + { + "epoch": 1.11, + "learning_rate": 4.506702412868633e-05, + "loss": 0.7083, + "step": 418 + }, + { + "epoch": 1.11, + "learning_rate": 4.505361930294906e-05, + "loss": 0.325, + "step": 419 + }, + { + "epoch": 1.11, + "learning_rate": 4.50402144772118e-05, + "loss": 0.2378, + "step": 420 + }, + { + "epoch": 1.11, + "learning_rate": 4.502680965147453e-05, + "loss": 0.2494, + "step": 421 + }, + { + "epoch": 1.12, + "learning_rate": 4.501340482573727e-05, + "loss": 0.3812, + "step": 422 + }, + { + "epoch": 1.12, + "learning_rate": 4.5e-05, + "loss": 0.1491, + "step": 423 + }, + { + "epoch": 1.12, + "learning_rate": 4.498659517426274e-05, + "loss": 0.0833, + "step": 424 + }, + { + "epoch": 1.12, + "learning_rate": 4.497319034852547e-05, + "loss": 0.4637, + "step": 425 + }, + { + "epoch": 1.13, + "learning_rate": 4.495978552278821e-05, + "loss": 0.2594, + "step": 426 + }, + { + "epoch": 1.13, + "learning_rate": 4.4946380697050937e-05, + "loss": 0.4136, + "step": 427 + }, + { + "epoch": 1.13, + "learning_rate": 4.493297587131368e-05, + "loss": 0.4962, + "step": 428 + }, + { + "epoch": 1.13, + "learning_rate": 4.491957104557641e-05, + "loss": 0.0842, + "step": 429 + }, + { + "epoch": 1.14, + "learning_rate": 4.490616621983915e-05, + "loss": 0.4761, + "step": 430 + }, + { + "epoch": 1.14, + "learning_rate": 4.489276139410188e-05, + "loss": 0.4351, + "step": 431 + }, + { + "epoch": 1.14, + "learning_rate": 4.487935656836461e-05, + "loss": 0.4226, + "step": 432 + }, + { + "epoch": 1.15, + "learning_rate": 4.486595174262735e-05, + "loss": 0.5714, + "step": 433 + }, + { + "epoch": 1.15, + "learning_rate": 4.485254691689008e-05, + "loss": 0.2034, + "step": 434 + }, + { + "epoch": 1.15, + "learning_rate": 4.483914209115282e-05, + "loss": 0.3354, + "step": 435 + }, + { + "epoch": 1.15, + "learning_rate": 4.482573726541555e-05, + "loss": 0.7503, + "step": 436 + }, + { + "epoch": 1.16, + "learning_rate": 4.481233243967828e-05, + "loss": 0.1542, + "step": 437 + }, + { + "epoch": 1.16, + "learning_rate": 4.479892761394102e-05, + "loss": 0.3067, + "step": 438 + }, + { + "epoch": 1.16, + "learning_rate": 4.478552278820375e-05, + "loss": 0.4134, + "step": 439 + }, + { + "epoch": 1.16, + "learning_rate": 4.477211796246649e-05, + "loss": 0.1458, + "step": 440 + }, + { + "epoch": 1.17, + "learning_rate": 4.475871313672922e-05, + "loss": 0.2814, + "step": 441 + }, + { + "epoch": 1.17, + "learning_rate": 4.474530831099196e-05, + "loss": 0.1751, + "step": 442 + }, + { + "epoch": 1.17, + "learning_rate": 4.473190348525469e-05, + "loss": 0.4144, + "step": 443 + }, + { + "epoch": 1.17, + "learning_rate": 4.471849865951743e-05, + "loss": 0.3275, + "step": 444 + }, + { + "epoch": 1.18, + "learning_rate": 4.470509383378016e-05, + "loss": 0.3083, + "step": 445 + }, + { + "epoch": 1.18, + "learning_rate": 4.46916890080429e-05, + "loss": 0.4185, + "step": 446 + }, + { + "epoch": 1.18, + "learning_rate": 4.467828418230563e-05, + "loss": 0.3181, + "step": 447 + }, + { + "epoch": 1.19, + "learning_rate": 4.466487935656837e-05, + "loss": 0.438, + "step": 448 + }, + { + "epoch": 1.19, + "learning_rate": 4.46514745308311e-05, + "loss": 0.4509, + "step": 449 + }, + { + "epoch": 1.19, + "learning_rate": 4.463806970509384e-05, + "loss": 0.4597, + "step": 450 + }, + { + "epoch": 1.19, + "learning_rate": 4.462466487935657e-05, + "loss": 0.3123, + "step": 451 + }, + { + "epoch": 1.2, + "learning_rate": 4.46112600536193e-05, + "loss": 0.376, + "step": 452 + }, + { + "epoch": 1.2, + "learning_rate": 4.459785522788204e-05, + "loss": 0.2392, + "step": 453 + }, + { + "epoch": 1.2, + "learning_rate": 4.458445040214477e-05, + "loss": 0.4451, + "step": 454 + }, + { + "epoch": 1.2, + "learning_rate": 4.4571045576407513e-05, + "loss": 0.2436, + "step": 455 + }, + { + "epoch": 1.21, + "learning_rate": 4.455764075067024e-05, + "loss": 0.2944, + "step": 456 + }, + { + "epoch": 1.21, + "learning_rate": 4.454423592493298e-05, + "loss": 0.323, + "step": 457 + }, + { + "epoch": 1.21, + "learning_rate": 4.453083109919571e-05, + "loss": 0.1966, + "step": 458 + }, + { + "epoch": 1.21, + "learning_rate": 4.451742627345845e-05, + "loss": 0.273, + "step": 459 + }, + { + "epoch": 1.22, + "learning_rate": 4.450402144772118e-05, + "loss": 0.3498, + "step": 460 + }, + { + "epoch": 1.22, + "learning_rate": 4.449061662198392e-05, + "loss": 0.458, + "step": 461 + }, + { + "epoch": 1.22, + "learning_rate": 4.4477211796246646e-05, + "loss": 0.1607, + "step": 462 + }, + { + "epoch": 1.22, + "learning_rate": 4.446380697050939e-05, + "loss": 0.5712, + "step": 463 + }, + { + "epoch": 1.23, + "learning_rate": 4.4450402144772116e-05, + "loss": 0.0908, + "step": 464 + }, + { + "epoch": 1.23, + "learning_rate": 4.443699731903486e-05, + "loss": 0.2625, + "step": 465 + }, + { + "epoch": 1.23, + "learning_rate": 4.4423592493297586e-05, + "loss": 0.368, + "step": 466 + }, + { + "epoch": 1.24, + "learning_rate": 4.441018766756032e-05, + "loss": 0.5096, + "step": 467 + }, + { + "epoch": 1.24, + "learning_rate": 4.4396782841823057e-05, + "loss": 0.209, + "step": 468 + }, + { + "epoch": 1.24, + "learning_rate": 4.438337801608579e-05, + "loss": 0.5235, + "step": 469 + }, + { + "epoch": 1.24, + "learning_rate": 4.436997319034853e-05, + "loss": 0.5116, + "step": 470 + }, + { + "epoch": 1.25, + "learning_rate": 4.435656836461126e-05, + "loss": 0.49, + "step": 471 + }, + { + "epoch": 1.25, + "learning_rate": 4.4343163538874e-05, + "loss": 0.4409, + "step": 472 + }, + { + "epoch": 1.25, + "learning_rate": 4.432975871313673e-05, + "loss": 0.1324, + "step": 473 + }, + { + "epoch": 1.25, + "learning_rate": 4.431635388739947e-05, + "loss": 0.1257, + "step": 474 + }, + { + "epoch": 1.26, + "learning_rate": 4.43029490616622e-05, + "loss": 0.3844, + "step": 475 + }, + { + "epoch": 1.26, + "learning_rate": 4.428954423592494e-05, + "loss": 0.2942, + "step": 476 + }, + { + "epoch": 1.26, + "learning_rate": 4.4276139410187666e-05, + "loss": 0.316, + "step": 477 + }, + { + "epoch": 1.26, + "learning_rate": 4.426273458445041e-05, + "loss": 0.3724, + "step": 478 + }, + { + "epoch": 1.27, + "learning_rate": 4.4249329758713136e-05, + "loss": 0.4498, + "step": 479 + }, + { + "epoch": 1.27, + "learning_rate": 4.423592493297588e-05, + "loss": 0.2677, + "step": 480 + }, + { + "epoch": 1.27, + "learning_rate": 4.4222520107238607e-05, + "loss": 0.3331, + "step": 481 + }, + { + "epoch": 1.28, + "learning_rate": 4.420911528150134e-05, + "loss": 0.4375, + "step": 482 + }, + { + "epoch": 1.28, + "learning_rate": 4.419571045576408e-05, + "loss": 0.3097, + "step": 483 + }, + { + "epoch": 1.28, + "learning_rate": 4.418230563002681e-05, + "loss": 0.4438, + "step": 484 + }, + { + "epoch": 1.28, + "learning_rate": 4.416890080428955e-05, + "loss": 0.3648, + "step": 485 + }, + { + "epoch": 1.29, + "learning_rate": 4.415549597855228e-05, + "loss": 0.3023, + "step": 486 + }, + { + "epoch": 1.29, + "learning_rate": 4.414209115281502e-05, + "loss": 0.2963, + "step": 487 + }, + { + "epoch": 1.29, + "learning_rate": 4.412868632707775e-05, + "loss": 0.1749, + "step": 488 + }, + { + "epoch": 1.29, + "learning_rate": 4.411528150134048e-05, + "loss": 0.2006, + "step": 489 + }, + { + "epoch": 1.3, + "learning_rate": 4.410187667560322e-05, + "loss": 0.0923, + "step": 490 + }, + { + "epoch": 1.3, + "learning_rate": 4.408847184986595e-05, + "loss": 0.2665, + "step": 491 + }, + { + "epoch": 1.3, + "learning_rate": 4.407506702412869e-05, + "loss": 0.3378, + "step": 492 + }, + { + "epoch": 1.3, + "learning_rate": 4.406166219839142e-05, + "loss": 0.2543, + "step": 493 + }, + { + "epoch": 1.31, + "learning_rate": 4.4048257372654156e-05, + "loss": 0.2521, + "step": 494 + }, + { + "epoch": 1.31, + "learning_rate": 4.403485254691689e-05, + "loss": 0.1616, + "step": 495 + }, + { + "epoch": 1.31, + "learning_rate": 4.402144772117963e-05, + "loss": 0.1445, + "step": 496 + }, + { + "epoch": 1.31, + "learning_rate": 4.400804289544236e-05, + "loss": 0.5352, + "step": 497 + }, + { + "epoch": 1.32, + "learning_rate": 4.39946380697051e-05, + "loss": 0.0654, + "step": 498 + }, + { + "epoch": 1.32, + "learning_rate": 4.398123324396783e-05, + "loss": 0.4354, + "step": 499 + }, + { + "epoch": 1.32, + "learning_rate": 4.396782841823057e-05, + "loss": 0.3893, + "step": 500 + }, + { + "epoch": 1.33, + "learning_rate": 4.39544235924933e-05, + "loss": 0.2779, + "step": 501 + }, + { + "epoch": 1.33, + "learning_rate": 4.394101876675604e-05, + "loss": 0.3702, + "step": 502 + }, + { + "epoch": 1.33, + "learning_rate": 4.392761394101877e-05, + "loss": 0.3899, + "step": 503 + }, + { + "epoch": 1.33, + "learning_rate": 4.39142091152815e-05, + "loss": 0.3091, + "step": 504 + }, + { + "epoch": 1.34, + "learning_rate": 4.390080428954424e-05, + "loss": 0.2143, + "step": 505 + }, + { + "epoch": 1.34, + "learning_rate": 4.388739946380697e-05, + "loss": 0.2551, + "step": 506 + }, + { + "epoch": 1.34, + "learning_rate": 4.387399463806971e-05, + "loss": 0.2227, + "step": 507 + }, + { + "epoch": 1.34, + "learning_rate": 4.386058981233244e-05, + "loss": 0.1383, + "step": 508 + }, + { + "epoch": 1.35, + "learning_rate": 4.384718498659518e-05, + "loss": 0.3463, + "step": 509 + }, + { + "epoch": 1.35, + "learning_rate": 4.383378016085791e-05, + "loss": 0.1104, + "step": 510 + }, + { + "epoch": 1.35, + "learning_rate": 4.382037533512065e-05, + "loss": 0.2439, + "step": 511 + }, + { + "epoch": 1.35, + "learning_rate": 4.380697050938338e-05, + "loss": 0.0725, + "step": 512 + }, + { + "epoch": 1.36, + "learning_rate": 4.379356568364612e-05, + "loss": 0.4928, + "step": 513 + }, + { + "epoch": 1.36, + "learning_rate": 4.3780160857908846e-05, + "loss": 0.0903, + "step": 514 + }, + { + "epoch": 1.36, + "learning_rate": 4.376675603217159e-05, + "loss": 0.1808, + "step": 515 + }, + { + "epoch": 1.37, + "learning_rate": 4.3753351206434316e-05, + "loss": 0.617, + "step": 516 + }, + { + "epoch": 1.37, + "learning_rate": 4.373994638069706e-05, + "loss": 0.0333, + "step": 517 + }, + { + "epoch": 1.37, + "learning_rate": 4.3726541554959786e-05, + "loss": 0.1113, + "step": 518 + }, + { + "epoch": 1.37, + "learning_rate": 4.371313672922252e-05, + "loss": 0.4604, + "step": 519 + }, + { + "epoch": 1.38, + "learning_rate": 4.3699731903485256e-05, + "loss": 0.4267, + "step": 520 + }, + { + "epoch": 1.38, + "learning_rate": 4.368632707774799e-05, + "loss": 0.2621, + "step": 521 + }, + { + "epoch": 1.38, + "learning_rate": 4.3672922252010727e-05, + "loss": 0.3236, + "step": 522 + }, + { + "epoch": 1.38, + "learning_rate": 4.365951742627346e-05, + "loss": 0.0559, + "step": 523 + }, + { + "epoch": 1.39, + "learning_rate": 4.364611260053619e-05, + "loss": 0.0801, + "step": 524 + }, + { + "epoch": 1.39, + "learning_rate": 4.363270777479893e-05, + "loss": 0.0518, + "step": 525 + }, + { + "epoch": 1.39, + "learning_rate": 4.361930294906166e-05, + "loss": 0.0618, + "step": 526 + }, + { + "epoch": 1.39, + "learning_rate": 4.36058981233244e-05, + "loss": 0.3043, + "step": 527 + }, + { + "epoch": 1.4, + "learning_rate": 4.359249329758713e-05, + "loss": 0.4693, + "step": 528 + }, + { + "epoch": 1.4, + "learning_rate": 4.3579088471849866e-05, + "loss": 0.2978, + "step": 529 + }, + { + "epoch": 1.4, + "learning_rate": 4.35656836461126e-05, + "loss": 0.0817, + "step": 530 + }, + { + "epoch": 1.4, + "learning_rate": 4.3552278820375336e-05, + "loss": 0.0784, + "step": 531 + }, + { + "epoch": 1.41, + "learning_rate": 4.353887399463807e-05, + "loss": 0.5389, + "step": 532 + }, + { + "epoch": 1.41, + "learning_rate": 4.3525469168900806e-05, + "loss": 0.2337, + "step": 533 + }, + { + "epoch": 1.41, + "learning_rate": 4.351206434316354e-05, + "loss": 0.3189, + "step": 534 + }, + { + "epoch": 1.42, + "learning_rate": 4.3498659517426276e-05, + "loss": 0.2503, + "step": 535 + }, + { + "epoch": 1.42, + "learning_rate": 4.348525469168901e-05, + "loss": 0.333, + "step": 536 + }, + { + "epoch": 1.42, + "learning_rate": 4.347184986595175e-05, + "loss": 0.2311, + "step": 537 + }, + { + "epoch": 1.42, + "learning_rate": 4.345844504021448e-05, + "loss": 0.5196, + "step": 538 + }, + { + "epoch": 1.43, + "learning_rate": 4.344504021447721e-05, + "loss": 0.5466, + "step": 539 + }, + { + "epoch": 1.43, + "learning_rate": 4.343163538873995e-05, + "loss": 0.4296, + "step": 540 + }, + { + "epoch": 1.43, + "learning_rate": 4.341823056300268e-05, + "loss": 0.6559, + "step": 541 + }, + { + "epoch": 1.43, + "learning_rate": 4.340482573726542e-05, + "loss": 0.6134, + "step": 542 + }, + { + "epoch": 1.44, + "learning_rate": 4.339142091152815e-05, + "loss": 0.5614, + "step": 543 + }, + { + "epoch": 1.44, + "learning_rate": 4.3378016085790886e-05, + "loss": 0.3143, + "step": 544 + }, + { + "epoch": 1.44, + "learning_rate": 4.336461126005362e-05, + "loss": 0.564, + "step": 545 + }, + { + "epoch": 1.44, + "learning_rate": 4.3351206434316356e-05, + "loss": 0.2141, + "step": 546 + }, + { + "epoch": 1.45, + "learning_rate": 4.333780160857909e-05, + "loss": 0.5886, + "step": 547 + }, + { + "epoch": 1.45, + "learning_rate": 4.3324396782841826e-05, + "loss": 0.2129, + "step": 548 + }, + { + "epoch": 1.45, + "learning_rate": 4.3310991957104555e-05, + "loss": 0.2491, + "step": 549 + }, + { + "epoch": 1.46, + "learning_rate": 4.32975871313673e-05, + "loss": 0.4023, + "step": 550 + }, + { + "epoch": 1.46, + "learning_rate": 4.3284182305630025e-05, + "loss": 0.2159, + "step": 551 + }, + { + "epoch": 1.46, + "learning_rate": 4.327077747989277e-05, + "loss": 0.2975, + "step": 552 + }, + { + "epoch": 1.46, + "learning_rate": 4.3257372654155495e-05, + "loss": 0.165, + "step": 553 + }, + { + "epoch": 1.47, + "learning_rate": 4.324396782841823e-05, + "loss": 0.063, + "step": 554 + }, + { + "epoch": 1.47, + "learning_rate": 4.3230563002680966e-05, + "loss": 0.4209, + "step": 555 + }, + { + "epoch": 1.47, + "learning_rate": 4.32171581769437e-05, + "loss": 0.2704, + "step": 556 + }, + { + "epoch": 1.47, + "learning_rate": 4.3203753351206436e-05, + "loss": 0.6455, + "step": 557 + }, + { + "epoch": 1.48, + "learning_rate": 4.319034852546917e-05, + "loss": 0.3354, + "step": 558 + }, + { + "epoch": 1.48, + "learning_rate": 4.3176943699731906e-05, + "loss": 0.4069, + "step": 559 + }, + { + "epoch": 1.48, + "learning_rate": 4.316353887399464e-05, + "loss": 0.2334, + "step": 560 + }, + { + "epoch": 1.48, + "learning_rate": 4.3150134048257376e-05, + "loss": 0.4192, + "step": 561 + }, + { + "epoch": 1.49, + "learning_rate": 4.313672922252011e-05, + "loss": 0.2942, + "step": 562 + }, + { + "epoch": 1.49, + "learning_rate": 4.3123324396782847e-05, + "loss": 0.2503, + "step": 563 + }, + { + "epoch": 1.49, + "learning_rate": 4.3109919571045575e-05, + "loss": 0.15, + "step": 564 + }, + { + "epoch": 1.49, + "learning_rate": 4.309651474530832e-05, + "loss": 0.6895, + "step": 565 + }, + { + "epoch": 1.5, + "learning_rate": 4.3083109919571045e-05, + "loss": 0.2462, + "step": 566 + }, + { + "epoch": 1.5, + "learning_rate": 4.306970509383379e-05, + "loss": 0.142, + "step": 567 + }, + { + "epoch": 1.5, + "learning_rate": 4.3056300268096515e-05, + "loss": 0.4838, + "step": 568 + }, + { + "epoch": 1.51, + "learning_rate": 4.304289544235925e-05, + "loss": 0.4714, + "step": 569 + }, + { + "epoch": 1.51, + "learning_rate": 4.3029490616621986e-05, + "loss": 0.4916, + "step": 570 + }, + { + "epoch": 1.51, + "learning_rate": 4.301608579088472e-05, + "loss": 0.2001, + "step": 571 + }, + { + "epoch": 1.51, + "learning_rate": 4.3002680965147456e-05, + "loss": 0.3897, + "step": 572 + }, + { + "epoch": 1.52, + "learning_rate": 4.298927613941019e-05, + "loss": 0.1723, + "step": 573 + }, + { + "epoch": 1.52, + "learning_rate": 4.297587131367292e-05, + "loss": 0.4095, + "step": 574 + }, + { + "epoch": 1.52, + "learning_rate": 4.296246648793566e-05, + "loss": 0.2615, + "step": 575 + }, + { + "epoch": 1.52, + "learning_rate": 4.294906166219839e-05, + "loss": 0.221, + "step": 576 + }, + { + "epoch": 1.53, + "learning_rate": 4.293565683646113e-05, + "loss": 0.1373, + "step": 577 + }, + { + "epoch": 1.53, + "learning_rate": 4.292225201072386e-05, + "loss": 0.313, + "step": 578 + }, + { + "epoch": 1.53, + "learning_rate": 4.2908847184986595e-05, + "loss": 0.4608, + "step": 579 + }, + { + "epoch": 1.53, + "learning_rate": 4.289544235924933e-05, + "loss": 0.2894, + "step": 580 + }, + { + "epoch": 1.54, + "learning_rate": 4.2882037533512065e-05, + "loss": 0.5123, + "step": 581 + }, + { + "epoch": 1.54, + "learning_rate": 4.28686327077748e-05, + "loss": 0.3287, + "step": 582 + }, + { + "epoch": 1.54, + "learning_rate": 4.2855227882037536e-05, + "loss": 0.4634, + "step": 583 + }, + { + "epoch": 1.54, + "learning_rate": 4.284182305630027e-05, + "loss": 0.0753, + "step": 584 + }, + { + "epoch": 1.55, + "learning_rate": 4.2828418230563006e-05, + "loss": 0.5287, + "step": 585 + }, + { + "epoch": 1.55, + "learning_rate": 4.281501340482574e-05, + "loss": 0.5483, + "step": 586 + }, + { + "epoch": 1.55, + "learning_rate": 4.2801608579088476e-05, + "loss": 0.1076, + "step": 587 + }, + { + "epoch": 1.56, + "learning_rate": 4.278820375335121e-05, + "loss": 0.232, + "step": 588 + }, + { + "epoch": 1.56, + "learning_rate": 4.277479892761394e-05, + "loss": 0.4706, + "step": 589 + }, + { + "epoch": 1.56, + "learning_rate": 4.276139410187668e-05, + "loss": 0.5791, + "step": 590 + }, + { + "epoch": 1.56, + "learning_rate": 4.274798927613941e-05, + "loss": 0.4995, + "step": 591 + }, + { + "epoch": 1.57, + "learning_rate": 4.273458445040215e-05, + "loss": 0.3419, + "step": 592 + }, + { + "epoch": 1.57, + "learning_rate": 4.272117962466488e-05, + "loss": 0.2728, + "step": 593 + }, + { + "epoch": 1.57, + "learning_rate": 4.2707774798927615e-05, + "loss": 0.3598, + "step": 594 + }, + { + "epoch": 1.57, + "learning_rate": 4.269436997319035e-05, + "loss": 0.2669, + "step": 595 + }, + { + "epoch": 1.58, + "learning_rate": 4.2680965147453086e-05, + "loss": 0.4091, + "step": 596 + }, + { + "epoch": 1.58, + "learning_rate": 4.266756032171582e-05, + "loss": 0.7653, + "step": 597 + }, + { + "epoch": 1.58, + "learning_rate": 4.2654155495978556e-05, + "loss": 0.4323, + "step": 598 + }, + { + "epoch": 1.58, + "learning_rate": 4.2640750670241284e-05, + "loss": 0.2147, + "step": 599 + }, + { + "epoch": 1.59, + "learning_rate": 4.2627345844504026e-05, + "loss": 0.2229, + "step": 600 + }, + { + "epoch": 1.59, + "learning_rate": 4.2613941018766754e-05, + "loss": 0.1477, + "step": 601 + }, + { + "epoch": 1.59, + "learning_rate": 4.2600536193029496e-05, + "loss": 0.4813, + "step": 602 + }, + { + "epoch": 1.6, + "learning_rate": 4.2587131367292225e-05, + "loss": 0.2719, + "step": 603 + }, + { + "epoch": 1.6, + "learning_rate": 4.257372654155496e-05, + "loss": 0.1577, + "step": 604 + }, + { + "epoch": 1.6, + "learning_rate": 4.2560321715817695e-05, + "loss": 0.5718, + "step": 605 + }, + { + "epoch": 1.6, + "learning_rate": 4.254691689008043e-05, + "loss": 0.1481, + "step": 606 + }, + { + "epoch": 1.61, + "learning_rate": 4.2533512064343165e-05, + "loss": 0.3548, + "step": 607 + }, + { + "epoch": 1.61, + "learning_rate": 4.25201072386059e-05, + "loss": 0.1907, + "step": 608 + }, + { + "epoch": 1.61, + "learning_rate": 4.250670241286863e-05, + "loss": 0.4215, + "step": 609 + }, + { + "epoch": 1.61, + "learning_rate": 4.249329758713137e-05, + "loss": 0.0681, + "step": 610 + }, + { + "epoch": 1.62, + "learning_rate": 4.24798927613941e-05, + "loss": 0.514, + "step": 611 + }, + { + "epoch": 1.62, + "learning_rate": 4.246648793565684e-05, + "loss": 0.1242, + "step": 612 + }, + { + "epoch": 1.62, + "learning_rate": 4.245308310991957e-05, + "loss": 0.217, + "step": 613 + }, + { + "epoch": 1.62, + "learning_rate": 4.243967828418231e-05, + "loss": 0.1641, + "step": 614 + }, + { + "epoch": 1.63, + "learning_rate": 4.242627345844504e-05, + "loss": 0.7095, + "step": 615 + }, + { + "epoch": 1.63, + "learning_rate": 4.2412868632707775e-05, + "loss": 0.359, + "step": 616 + }, + { + "epoch": 1.63, + "learning_rate": 4.239946380697051e-05, + "loss": 0.94, + "step": 617 + }, + { + "epoch": 1.63, + "learning_rate": 4.2386058981233245e-05, + "loss": 0.5194, + "step": 618 + }, + { + "epoch": 1.64, + "learning_rate": 4.237265415549598e-05, + "loss": 0.3398, + "step": 619 + }, + { + "epoch": 1.64, + "learning_rate": 4.2359249329758715e-05, + "loss": 0.2305, + "step": 620 + }, + { + "epoch": 1.64, + "learning_rate": 4.234584450402145e-05, + "loss": 0.6045, + "step": 621 + }, + { + "epoch": 1.65, + "learning_rate": 4.2332439678284185e-05, + "loss": 0.3196, + "step": 622 + }, + { + "epoch": 1.65, + "learning_rate": 4.231903485254692e-05, + "loss": 0.0753, + "step": 623 + }, + { + "epoch": 1.65, + "learning_rate": 4.2305630026809656e-05, + "loss": 0.2732, + "step": 624 + }, + { + "epoch": 1.65, + "learning_rate": 4.229222520107239e-05, + "loss": 0.4372, + "step": 625 + }, + { + "epoch": 1.66, + "learning_rate": 4.227882037533512e-05, + "loss": 0.1299, + "step": 626 + }, + { + "epoch": 1.66, + "learning_rate": 4.226541554959786e-05, + "loss": 0.2928, + "step": 627 + }, + { + "epoch": 1.66, + "learning_rate": 4.225201072386059e-05, + "loss": 0.2028, + "step": 628 + }, + { + "epoch": 1.66, + "learning_rate": 4.223860589812333e-05, + "loss": 0.2725, + "step": 629 + }, + { + "epoch": 1.67, + "learning_rate": 4.222520107238606e-05, + "loss": 0.0851, + "step": 630 + }, + { + "epoch": 1.67, + "learning_rate": 4.2211796246648795e-05, + "loss": 0.1471, + "step": 631 + }, + { + "epoch": 1.67, + "learning_rate": 4.219839142091153e-05, + "loss": 0.1986, + "step": 632 + }, + { + "epoch": 1.67, + "learning_rate": 4.2184986595174265e-05, + "loss": 0.5156, + "step": 633 + }, + { + "epoch": 1.68, + "learning_rate": 4.2171581769437e-05, + "loss": 0.5286, + "step": 634 + }, + { + "epoch": 1.68, + "learning_rate": 4.2158176943699735e-05, + "loss": 0.3635, + "step": 635 + }, + { + "epoch": 1.68, + "learning_rate": 4.2144772117962464e-05, + "loss": 0.1407, + "step": 636 + }, + { + "epoch": 1.69, + "learning_rate": 4.2131367292225206e-05, + "loss": 0.1042, + "step": 637 + }, + { + "epoch": 1.69, + "learning_rate": 4.2117962466487934e-05, + "loss": 0.1553, + "step": 638 + }, + { + "epoch": 1.69, + "learning_rate": 4.2104557640750676e-05, + "loss": 0.1665, + "step": 639 + }, + { + "epoch": 1.69, + "learning_rate": 4.2091152815013404e-05, + "loss": 0.3706, + "step": 640 + }, + { + "epoch": 1.7, + "learning_rate": 4.207774798927614e-05, + "loss": 0.6195, + "step": 641 + }, + { + "epoch": 1.7, + "learning_rate": 4.2064343163538874e-05, + "loss": 0.1341, + "step": 642 + }, + { + "epoch": 1.7, + "learning_rate": 4.205093833780161e-05, + "loss": 0.5384, + "step": 643 + }, + { + "epoch": 1.7, + "learning_rate": 4.2037533512064345e-05, + "loss": 0.2802, + "step": 644 + }, + { + "epoch": 1.71, + "learning_rate": 4.202412868632708e-05, + "loss": 0.3812, + "step": 645 + }, + { + "epoch": 1.71, + "learning_rate": 4.2010723860589815e-05, + "loss": 0.2433, + "step": 646 + }, + { + "epoch": 1.71, + "learning_rate": 4.199731903485255e-05, + "loss": 0.0642, + "step": 647 + }, + { + "epoch": 1.71, + "learning_rate": 4.1983914209115285e-05, + "loss": 0.0547, + "step": 648 + }, + { + "epoch": 1.72, + "learning_rate": 4.197050938337802e-05, + "loss": 0.4388, + "step": 649 + }, + { + "epoch": 1.72, + "learning_rate": 4.1957104557640756e-05, + "loss": 0.8228, + "step": 650 + }, + { + "epoch": 1.72, + "learning_rate": 4.1943699731903484e-05, + "loss": 0.6453, + "step": 651 + }, + { + "epoch": 1.72, + "learning_rate": 4.1930294906166226e-05, + "loss": 0.3367, + "step": 652 + }, + { + "epoch": 1.73, + "learning_rate": 4.1916890080428954e-05, + "loss": 0.2139, + "step": 653 + }, + { + "epoch": 1.73, + "learning_rate": 4.1903485254691696e-05, + "loss": 0.2144, + "step": 654 + }, + { + "epoch": 1.73, + "learning_rate": 4.1890080428954424e-05, + "loss": 0.3894, + "step": 655 + }, + { + "epoch": 1.74, + "learning_rate": 4.187667560321716e-05, + "loss": 0.0891, + "step": 656 + }, + { + "epoch": 1.74, + "learning_rate": 4.1863270777479895e-05, + "loss": 0.1287, + "step": 657 + }, + { + "epoch": 1.74, + "learning_rate": 4.184986595174263e-05, + "loss": 0.2826, + "step": 658 + }, + { + "epoch": 1.74, + "learning_rate": 4.1836461126005365e-05, + "loss": 0.474, + "step": 659 + }, + { + "epoch": 1.75, + "learning_rate": 4.18230563002681e-05, + "loss": 0.4228, + "step": 660 + }, + { + "epoch": 1.75, + "learning_rate": 4.180965147453083e-05, + "loss": 0.4952, + "step": 661 + }, + { + "epoch": 1.75, + "learning_rate": 4.179624664879357e-05, + "loss": 0.173, + "step": 662 + }, + { + "epoch": 1.75, + "learning_rate": 4.17828418230563e-05, + "loss": 0.363, + "step": 663 + }, + { + "epoch": 1.76, + "learning_rate": 4.176943699731904e-05, + "loss": 0.4404, + "step": 664 + }, + { + "epoch": 1.76, + "learning_rate": 4.175603217158177e-05, + "loss": 0.486, + "step": 665 + }, + { + "epoch": 1.76, + "learning_rate": 4.1742627345844504e-05, + "loss": 0.4463, + "step": 666 + }, + { + "epoch": 1.76, + "learning_rate": 4.172922252010724e-05, + "loss": 0.2409, + "step": 667 + }, + { + "epoch": 1.77, + "learning_rate": 4.1715817694369974e-05, + "loss": 0.5291, + "step": 668 + }, + { + "epoch": 1.77, + "learning_rate": 4.170241286863271e-05, + "loss": 0.069, + "step": 669 + }, + { + "epoch": 1.77, + "learning_rate": 4.1689008042895445e-05, + "loss": 0.4162, + "step": 670 + }, + { + "epoch": 1.78, + "learning_rate": 4.167560321715818e-05, + "loss": 0.6171, + "step": 671 + }, + { + "epoch": 1.78, + "learning_rate": 4.1662198391420915e-05, + "loss": 0.3097, + "step": 672 + }, + { + "epoch": 1.78, + "learning_rate": 4.164879356568365e-05, + "loss": 0.5109, + "step": 673 + }, + { + "epoch": 1.78, + "learning_rate": 4.1635388739946385e-05, + "loss": 0.2169, + "step": 674 + }, + { + "epoch": 1.79, + "learning_rate": 4.162198391420912e-05, + "loss": 0.2406, + "step": 675 + }, + { + "epoch": 1.79, + "learning_rate": 4.160857908847185e-05, + "loss": 0.1853, + "step": 676 + }, + { + "epoch": 1.79, + "learning_rate": 4.159517426273459e-05, + "loss": 0.5743, + "step": 677 + }, + { + "epoch": 1.79, + "learning_rate": 4.158176943699732e-05, + "loss": 0.5432, + "step": 678 + }, + { + "epoch": 1.8, + "learning_rate": 4.156836461126006e-05, + "loss": 0.2033, + "step": 679 + }, + { + "epoch": 1.8, + "learning_rate": 4.155495978552279e-05, + "loss": 0.3848, + "step": 680 + }, + { + "epoch": 1.8, + "learning_rate": 4.1541554959785524e-05, + "loss": 0.1721, + "step": 681 + }, + { + "epoch": 1.8, + "learning_rate": 4.152815013404826e-05, + "loss": 0.3793, + "step": 682 + }, + { + "epoch": 1.81, + "learning_rate": 4.1514745308310994e-05, + "loss": 0.3848, + "step": 683 + }, + { + "epoch": 1.81, + "learning_rate": 4.150134048257373e-05, + "loss": 0.186, + "step": 684 + }, + { + "epoch": 1.81, + "learning_rate": 4.1487935656836465e-05, + "loss": 0.2692, + "step": 685 + }, + { + "epoch": 1.81, + "learning_rate": 4.147453083109919e-05, + "loss": 0.3839, + "step": 686 + }, + { + "epoch": 1.82, + "learning_rate": 4.1461126005361935e-05, + "loss": 0.3037, + "step": 687 + }, + { + "epoch": 1.82, + "learning_rate": 4.144772117962466e-05, + "loss": 0.6401, + "step": 688 + }, + { + "epoch": 1.82, + "learning_rate": 4.1434316353887405e-05, + "loss": 0.1173, + "step": 689 + }, + { + "epoch": 1.83, + "learning_rate": 4.1420911528150134e-05, + "loss": 0.3217, + "step": 690 + }, + { + "epoch": 1.83, + "learning_rate": 4.140750670241287e-05, + "loss": 0.2358, + "step": 691 + }, + { + "epoch": 1.83, + "learning_rate": 4.1394101876675604e-05, + "loss": 0.7696, + "step": 692 + }, + { + "epoch": 1.83, + "learning_rate": 4.138069705093834e-05, + "loss": 0.2288, + "step": 693 + }, + { + "epoch": 1.84, + "learning_rate": 4.1367292225201074e-05, + "loss": 0.2575, + "step": 694 + }, + { + "epoch": 1.84, + "learning_rate": 4.135388739946381e-05, + "loss": 0.1201, + "step": 695 + }, + { + "epoch": 1.84, + "learning_rate": 4.1340482573726544e-05, + "loss": 0.2034, + "step": 696 + }, + { + "epoch": 1.84, + "learning_rate": 4.132707774798928e-05, + "loss": 0.1142, + "step": 697 + }, + { + "epoch": 1.85, + "learning_rate": 4.1313672922252015e-05, + "loss": 0.5671, + "step": 698 + }, + { + "epoch": 1.85, + "learning_rate": 4.130026809651475e-05, + "loss": 0.3132, + "step": 699 + }, + { + "epoch": 1.85, + "learning_rate": 4.1286863270777485e-05, + "loss": 0.4266, + "step": 700 + }, + { + "epoch": 1.85, + "learning_rate": 4.127345844504021e-05, + "loss": 0.1354, + "step": 701 + }, + { + "epoch": 1.86, + "learning_rate": 4.1260053619302955e-05, + "loss": 0.2867, + "step": 702 + }, + { + "epoch": 1.86, + "learning_rate": 4.1246648793565684e-05, + "loss": 0.1839, + "step": 703 + }, + { + "epoch": 1.86, + "learning_rate": 4.1233243967828425e-05, + "loss": 0.4741, + "step": 704 + }, + { + "epoch": 1.87, + "learning_rate": 4.1219839142091154e-05, + "loss": 0.2909, + "step": 705 + }, + { + "epoch": 1.87, + "learning_rate": 4.120643431635389e-05, + "loss": 0.2705, + "step": 706 + }, + { + "epoch": 1.87, + "learning_rate": 4.1193029490616624e-05, + "loss": 0.1354, + "step": 707 + }, + { + "epoch": 1.87, + "learning_rate": 4.117962466487936e-05, + "loss": 0.4801, + "step": 708 + }, + { + "epoch": 1.88, + "learning_rate": 4.1166219839142094e-05, + "loss": 0.189, + "step": 709 + }, + { + "epoch": 1.88, + "learning_rate": 4.115281501340483e-05, + "loss": 0.3204, + "step": 710 + }, + { + "epoch": 1.88, + "learning_rate": 4.113941018766756e-05, + "loss": 0.4358, + "step": 711 + }, + { + "epoch": 1.88, + "learning_rate": 4.11260053619303e-05, + "loss": 0.9474, + "step": 712 + }, + { + "epoch": 1.89, + "learning_rate": 4.111260053619303e-05, + "loss": 0.2102, + "step": 713 + }, + { + "epoch": 1.89, + "learning_rate": 4.109919571045577e-05, + "loss": 0.3927, + "step": 714 + }, + { + "epoch": 1.89, + "learning_rate": 4.10857908847185e-05, + "loss": 0.139, + "step": 715 + }, + { + "epoch": 1.89, + "learning_rate": 4.1072386058981233e-05, + "loss": 0.3575, + "step": 716 + }, + { + "epoch": 1.9, + "learning_rate": 4.105898123324397e-05, + "loss": 0.7534, + "step": 717 + }, + { + "epoch": 1.9, + "learning_rate": 4.1045576407506704e-05, + "loss": 0.1134, + "step": 718 + }, + { + "epoch": 1.9, + "learning_rate": 4.103217158176944e-05, + "loss": 0.2136, + "step": 719 + }, + { + "epoch": 1.9, + "learning_rate": 4.1018766756032174e-05, + "loss": 0.4344, + "step": 720 + }, + { + "epoch": 1.91, + "learning_rate": 4.10053619302949e-05, + "loss": 0.0695, + "step": 721 + }, + { + "epoch": 1.91, + "learning_rate": 4.0991957104557644e-05, + "loss": 0.2286, + "step": 722 + }, + { + "epoch": 1.91, + "learning_rate": 4.097855227882037e-05, + "loss": 0.1189, + "step": 723 + }, + { + "epoch": 1.92, + "learning_rate": 4.0965147453083115e-05, + "loss": 0.2882, + "step": 724 + }, + { + "epoch": 1.92, + "learning_rate": 4.095174262734584e-05, + "loss": 0.2623, + "step": 725 + }, + { + "epoch": 1.92, + "learning_rate": 4.093833780160858e-05, + "loss": 0.2473, + "step": 726 + }, + { + "epoch": 1.92, + "learning_rate": 4.092493297587131e-05, + "loss": 0.4846, + "step": 727 + }, + { + "epoch": 1.93, + "learning_rate": 4.091152815013405e-05, + "loss": 0.1689, + "step": 728 + }, + { + "epoch": 1.93, + "learning_rate": 4.0898123324396783e-05, + "loss": 0.3481, + "step": 729 + }, + { + "epoch": 1.93, + "learning_rate": 4.088471849865952e-05, + "loss": 0.3447, + "step": 730 + }, + { + "epoch": 1.93, + "learning_rate": 4.0871313672922254e-05, + "loss": 0.2959, + "step": 731 + }, + { + "epoch": 1.94, + "learning_rate": 4.085790884718499e-05, + "loss": 0.3387, + "step": 732 + }, + { + "epoch": 1.94, + "learning_rate": 4.0844504021447724e-05, + "loss": 0.3742, + "step": 733 + }, + { + "epoch": 1.94, + "learning_rate": 4.083109919571046e-05, + "loss": 0.3245, + "step": 734 + }, + { + "epoch": 1.94, + "learning_rate": 4.0817694369973194e-05, + "loss": 0.4891, + "step": 735 + }, + { + "epoch": 1.95, + "learning_rate": 4.080428954423593e-05, + "loss": 0.1444, + "step": 736 + }, + { + "epoch": 1.95, + "learning_rate": 4.0790884718498664e-05, + "loss": 0.3678, + "step": 737 + }, + { + "epoch": 1.95, + "learning_rate": 4.077747989276139e-05, + "loss": 0.3772, + "step": 738 + }, + { + "epoch": 1.96, + "learning_rate": 4.0764075067024135e-05, + "loss": 0.43, + "step": 739 + }, + { + "epoch": 1.96, + "learning_rate": 4.075067024128686e-05, + "loss": 0.2463, + "step": 740 + }, + { + "epoch": 1.96, + "learning_rate": 4.0737265415549605e-05, + "loss": 0.2277, + "step": 741 + }, + { + "epoch": 1.96, + "learning_rate": 4.072386058981233e-05, + "loss": 0.2153, + "step": 742 + }, + { + "epoch": 1.97, + "learning_rate": 4.071045576407507e-05, + "loss": 0.1052, + "step": 743 + }, + { + "epoch": 1.97, + "learning_rate": 4.0697050938337804e-05, + "loss": 0.5657, + "step": 744 + }, + { + "epoch": 1.97, + "learning_rate": 4.068364611260054e-05, + "loss": 0.2664, + "step": 745 + }, + { + "epoch": 1.97, + "learning_rate": 4.0670241286863274e-05, + "loss": 0.1369, + "step": 746 + }, + { + "epoch": 1.98, + "learning_rate": 4.065683646112601e-05, + "loss": 0.4972, + "step": 747 + }, + { + "epoch": 1.98, + "learning_rate": 4.064343163538874e-05, + "loss": 0.4131, + "step": 748 + }, + { + "epoch": 1.98, + "learning_rate": 4.063002680965148e-05, + "loss": 0.3824, + "step": 749 + }, + { + "epoch": 1.98, + "learning_rate": 4.061662198391421e-05, + "loss": 0.323, + "step": 750 + }, + { + "epoch": 1.99, + "learning_rate": 4.060321715817695e-05, + "loss": 0.3698, + "step": 751 + }, + { + "epoch": 1.99, + "learning_rate": 4.058981233243968e-05, + "loss": 0.1708, + "step": 752 + }, + { + "epoch": 1.99, + "learning_rate": 4.057640750670241e-05, + "loss": 0.2941, + "step": 753 + }, + { + "epoch": 1.99, + "learning_rate": 4.056300268096515e-05, + "loss": 0.3224, + "step": 754 + }, + { + "epoch": 2.0, + "learning_rate": 4.054959785522788e-05, + "loss": 0.0851, + "step": 755 + }, + { + "epoch": 2.0, + "learning_rate": 4.053619302949062e-05, + "loss": 0.4694, + "step": 756 + }, + { + "epoch": 2.0, + "eval_f1": 0.7882736156351792, + "eval_loss": 0.4484867751598358, + "eval_runtime": 1.8734, + "eval_samples_per_second": 807.638, + "eval_steps_per_second": 50.711, + "step": 756 + }, + { + "epoch": 2.0, + "learning_rate": 4.0522788203753354e-05, + "loss": 0.4739, + "step": 757 + }, + { + "epoch": 2.01, + "learning_rate": 4.050938337801609e-05, + "loss": 0.4117, + "step": 758 + }, + { + "epoch": 2.01, + "learning_rate": 4.0495978552278824e-05, + "loss": 0.2169, + "step": 759 + }, + { + "epoch": 2.01, + "learning_rate": 4.048257372654156e-05, + "loss": 0.1848, + "step": 760 + }, + { + "epoch": 2.01, + "learning_rate": 4.0469168900804294e-05, + "loss": 0.5066, + "step": 761 + }, + { + "epoch": 2.02, + "learning_rate": 4.045576407506703e-05, + "loss": 0.1784, + "step": 762 + }, + { + "epoch": 2.02, + "learning_rate": 4.044235924932976e-05, + "loss": 0.3869, + "step": 763 + }, + { + "epoch": 2.02, + "learning_rate": 4.04289544235925e-05, + "loss": 0.1132, + "step": 764 + }, + { + "epoch": 2.02, + "learning_rate": 4.041554959785523e-05, + "loss": 0.2724, + "step": 765 + }, + { + "epoch": 2.03, + "learning_rate": 4.040214477211797e-05, + "loss": 0.0983, + "step": 766 + }, + { + "epoch": 2.03, + "learning_rate": 4.03887399463807e-05, + "loss": 0.1831, + "step": 767 + }, + { + "epoch": 2.03, + "learning_rate": 4.037533512064343e-05, + "loss": 0.1954, + "step": 768 + }, + { + "epoch": 2.03, + "learning_rate": 4.036193029490617e-05, + "loss": 0.4738, + "step": 769 + }, + { + "epoch": 2.04, + "learning_rate": 4.0348525469168903e-05, + "loss": 0.3375, + "step": 770 + }, + { + "epoch": 2.04, + "learning_rate": 4.033512064343164e-05, + "loss": 0.3991, + "step": 771 + }, + { + "epoch": 2.04, + "learning_rate": 4.0321715817694374e-05, + "loss": 0.0696, + "step": 772 + }, + { + "epoch": 2.04, + "learning_rate": 4.03083109919571e-05, + "loss": 0.0982, + "step": 773 + }, + { + "epoch": 2.05, + "learning_rate": 4.0294906166219844e-05, + "loss": 0.2236, + "step": 774 + }, + { + "epoch": 2.05, + "learning_rate": 4.028150134048257e-05, + "loss": 0.4225, + "step": 775 + }, + { + "epoch": 2.05, + "learning_rate": 4.0268096514745314e-05, + "loss": 0.0583, + "step": 776 + }, + { + "epoch": 2.06, + "learning_rate": 4.025469168900804e-05, + "loss": 0.1643, + "step": 777 + }, + { + "epoch": 2.06, + "learning_rate": 4.024128686327078e-05, + "loss": 0.0351, + "step": 778 + }, + { + "epoch": 2.06, + "learning_rate": 4.022788203753351e-05, + "loss": 0.4496, + "step": 779 + }, + { + "epoch": 2.06, + "learning_rate": 4.021447721179625e-05, + "loss": 0.0372, + "step": 780 + }, + { + "epoch": 2.07, + "learning_rate": 4.020107238605898e-05, + "loss": 0.4198, + "step": 781 + }, + { + "epoch": 2.07, + "learning_rate": 4.018766756032172e-05, + "loss": 0.1968, + "step": 782 + }, + { + "epoch": 2.07, + "learning_rate": 4.017426273458445e-05, + "loss": 0.515, + "step": 783 + }, + { + "epoch": 2.07, + "learning_rate": 4.016085790884719e-05, + "loss": 0.408, + "step": 784 + }, + { + "epoch": 2.08, + "learning_rate": 4.0147453083109924e-05, + "loss": 0.3693, + "step": 785 + }, + { + "epoch": 2.08, + "learning_rate": 4.013404825737266e-05, + "loss": 0.0561, + "step": 786 + }, + { + "epoch": 2.08, + "learning_rate": 4.0120643431635394e-05, + "loss": 0.4011, + "step": 787 + }, + { + "epoch": 2.08, + "learning_rate": 4.010723860589812e-05, + "loss": 0.0872, + "step": 788 + }, + { + "epoch": 2.09, + "learning_rate": 4.0093833780160864e-05, + "loss": 0.0768, + "step": 789 + }, + { + "epoch": 2.09, + "learning_rate": 4.008042895442359e-05, + "loss": 0.0184, + "step": 790 + }, + { + "epoch": 2.09, + "learning_rate": 4.0067024128686334e-05, + "loss": 0.3287, + "step": 791 + }, + { + "epoch": 2.1, + "learning_rate": 4.005361930294906e-05, + "loss": 0.0262, + "step": 792 + }, + { + "epoch": 2.1, + "learning_rate": 4.00402144772118e-05, + "loss": 0.0248, + "step": 793 + }, + { + "epoch": 2.1, + "learning_rate": 4.002680965147453e-05, + "loss": 0.3853, + "step": 794 + }, + { + "epoch": 2.1, + "learning_rate": 4.001340482573727e-05, + "loss": 0.3512, + "step": 795 + }, + { + "epoch": 2.11, + "learning_rate": 4e-05, + "loss": 0.4188, + "step": 796 + }, + { + "epoch": 2.11, + "learning_rate": 3.998659517426274e-05, + "loss": 0.1834, + "step": 797 + }, + { + "epoch": 2.11, + "learning_rate": 3.997319034852547e-05, + "loss": 0.2074, + "step": 798 + }, + { + "epoch": 2.11, + "learning_rate": 3.995978552278821e-05, + "loss": 0.7317, + "step": 799 + }, + { + "epoch": 2.12, + "learning_rate": 3.994638069705094e-05, + "loss": 0.3534, + "step": 800 + }, + { + "epoch": 2.12, + "learning_rate": 3.993297587131368e-05, + "loss": 0.3184, + "step": 801 + }, + { + "epoch": 2.12, + "learning_rate": 3.991957104557641e-05, + "loss": 0.1088, + "step": 802 + }, + { + "epoch": 2.12, + "learning_rate": 3.990616621983914e-05, + "loss": 0.0429, + "step": 803 + }, + { + "epoch": 2.13, + "learning_rate": 3.989276139410188e-05, + "loss": 0.4518, + "step": 804 + }, + { + "epoch": 2.13, + "learning_rate": 3.987935656836461e-05, + "loss": 0.1746, + "step": 805 + }, + { + "epoch": 2.13, + "learning_rate": 3.986595174262735e-05, + "loss": 0.1881, + "step": 806 + }, + { + "epoch": 2.13, + "learning_rate": 3.985254691689008e-05, + "loss": 0.4111, + "step": 807 + }, + { + "epoch": 2.14, + "learning_rate": 3.983914209115281e-05, + "loss": 0.059, + "step": 808 + }, + { + "epoch": 2.14, + "learning_rate": 3.982573726541555e-05, + "loss": 0.0495, + "step": 809 + }, + { + "epoch": 2.14, + "learning_rate": 3.981233243967828e-05, + "loss": 0.1134, + "step": 810 + }, + { + "epoch": 2.15, + "learning_rate": 3.9798927613941023e-05, + "loss": 0.5469, + "step": 811 + }, + { + "epoch": 2.15, + "learning_rate": 3.978552278820375e-05, + "loss": 0.0581, + "step": 812 + }, + { + "epoch": 2.15, + "learning_rate": 3.977211796246649e-05, + "loss": 0.1254, + "step": 813 + }, + { + "epoch": 2.15, + "learning_rate": 3.975871313672922e-05, + "loss": 0.0679, + "step": 814 + }, + { + "epoch": 2.16, + "learning_rate": 3.974530831099196e-05, + "loss": 0.0463, + "step": 815 + }, + { + "epoch": 2.16, + "learning_rate": 3.973190348525469e-05, + "loss": 0.33, + "step": 816 + }, + { + "epoch": 2.16, + "learning_rate": 3.971849865951743e-05, + "loss": 0.2931, + "step": 817 + }, + { + "epoch": 2.16, + "learning_rate": 3.970509383378016e-05, + "loss": 0.1034, + "step": 818 + }, + { + "epoch": 2.17, + "learning_rate": 3.96916890080429e-05, + "loss": 0.0379, + "step": 819 + }, + { + "epoch": 2.17, + "learning_rate": 3.967828418230563e-05, + "loss": 0.0456, + "step": 820 + }, + { + "epoch": 2.17, + "learning_rate": 3.966487935656837e-05, + "loss": 0.4862, + "step": 821 + }, + { + "epoch": 2.17, + "learning_rate": 3.96514745308311e-05, + "loss": 0.0512, + "step": 822 + }, + { + "epoch": 2.18, + "learning_rate": 3.963806970509383e-05, + "loss": 0.0879, + "step": 823 + }, + { + "epoch": 2.18, + "learning_rate": 3.962466487935657e-05, + "loss": 0.3664, + "step": 824 + }, + { + "epoch": 2.18, + "learning_rate": 3.96112600536193e-05, + "loss": 0.0975, + "step": 825 + }, + { + "epoch": 2.19, + "learning_rate": 3.9597855227882044e-05, + "loss": 0.162, + "step": 826 + }, + { + "epoch": 2.19, + "learning_rate": 3.958445040214477e-05, + "loss": 0.1076, + "step": 827 + }, + { + "epoch": 2.19, + "learning_rate": 3.957104557640751e-05, + "loss": 0.1185, + "step": 828 + }, + { + "epoch": 2.19, + "learning_rate": 3.955764075067024e-05, + "loss": 0.0868, + "step": 829 + }, + { + "epoch": 2.2, + "learning_rate": 3.954423592493298e-05, + "loss": 0.0246, + "step": 830 + }, + { + "epoch": 2.2, + "learning_rate": 3.953083109919571e-05, + "loss": 0.2233, + "step": 831 + }, + { + "epoch": 2.2, + "learning_rate": 3.951742627345845e-05, + "loss": 0.2113, + "step": 832 + }, + { + "epoch": 2.2, + "learning_rate": 3.9504021447721176e-05, + "loss": 0.0587, + "step": 833 + }, + { + "epoch": 2.21, + "learning_rate": 3.949061662198392e-05, + "loss": 0.0521, + "step": 834 + }, + { + "epoch": 2.21, + "learning_rate": 3.9477211796246646e-05, + "loss": 0.3845, + "step": 835 + }, + { + "epoch": 2.21, + "learning_rate": 3.946380697050939e-05, + "loss": 0.1096, + "step": 836 + }, + { + "epoch": 2.21, + "learning_rate": 3.9450402144772117e-05, + "loss": 0.1488, + "step": 837 + }, + { + "epoch": 2.22, + "learning_rate": 3.943699731903485e-05, + "loss": 0.1937, + "step": 838 + }, + { + "epoch": 2.22, + "learning_rate": 3.942359249329759e-05, + "loss": 0.1309, + "step": 839 + }, + { + "epoch": 2.22, + "learning_rate": 3.941018766756032e-05, + "loss": 0.3271, + "step": 840 + }, + { + "epoch": 2.22, + "learning_rate": 3.939678284182306e-05, + "loss": 0.3318, + "step": 841 + }, + { + "epoch": 2.23, + "learning_rate": 3.938337801608579e-05, + "loss": 0.3516, + "step": 842 + }, + { + "epoch": 2.23, + "learning_rate": 3.936997319034853e-05, + "loss": 0.1641, + "step": 843 + }, + { + "epoch": 2.23, + "learning_rate": 3.935656836461126e-05, + "loss": 0.064, + "step": 844 + }, + { + "epoch": 2.24, + "learning_rate": 3.9343163538874e-05, + "loss": 0.1971, + "step": 845 + }, + { + "epoch": 2.24, + "learning_rate": 3.932975871313673e-05, + "loss": 0.1166, + "step": 846 + }, + { + "epoch": 2.24, + "learning_rate": 3.931635388739947e-05, + "loss": 0.0384, + "step": 847 + }, + { + "epoch": 2.24, + "learning_rate": 3.9302949061662196e-05, + "loss": 0.0462, + "step": 848 + }, + { + "epoch": 2.25, + "learning_rate": 3.928954423592494e-05, + "loss": 0.1073, + "step": 849 + }, + { + "epoch": 2.25, + "learning_rate": 3.9276139410187666e-05, + "loss": 0.0227, + "step": 850 + }, + { + "epoch": 2.25, + "learning_rate": 3.926273458445041e-05, + "loss": 0.0683, + "step": 851 + }, + { + "epoch": 2.25, + "learning_rate": 3.924932975871314e-05, + "loss": 0.7962, + "step": 852 + }, + { + "epoch": 2.26, + "learning_rate": 3.923592493297587e-05, + "loss": 0.0635, + "step": 853 + }, + { + "epoch": 2.26, + "learning_rate": 3.922252010723861e-05, + "loss": 0.0454, + "step": 854 + }, + { + "epoch": 2.26, + "learning_rate": 3.920911528150134e-05, + "loss": 0.5389, + "step": 855 + }, + { + "epoch": 2.26, + "learning_rate": 3.919571045576408e-05, + "loss": 0.1181, + "step": 856 + }, + { + "epoch": 2.27, + "learning_rate": 3.918230563002681e-05, + "loss": 0.1372, + "step": 857 + }, + { + "epoch": 2.27, + "learning_rate": 3.916890080428954e-05, + "loss": 0.3192, + "step": 858 + }, + { + "epoch": 2.27, + "learning_rate": 3.915549597855228e-05, + "loss": 0.3419, + "step": 859 + }, + { + "epoch": 2.28, + "learning_rate": 3.914209115281501e-05, + "loss": 0.0114, + "step": 860 + }, + { + "epoch": 2.28, + "learning_rate": 3.912868632707775e-05, + "loss": 0.1905, + "step": 861 + }, + { + "epoch": 2.28, + "learning_rate": 3.911528150134048e-05, + "loss": 0.0218, + "step": 862 + }, + { + "epoch": 2.28, + "learning_rate": 3.910187667560322e-05, + "loss": 0.4374, + "step": 863 + }, + { + "epoch": 2.29, + "learning_rate": 3.908847184986595e-05, + "loss": 0.1844, + "step": 864 + }, + { + "epoch": 2.29, + "learning_rate": 3.907506702412869e-05, + "loss": 0.2427, + "step": 865 + }, + { + "epoch": 2.29, + "learning_rate": 3.906166219839142e-05, + "loss": 0.2749, + "step": 866 + }, + { + "epoch": 2.29, + "learning_rate": 3.904825737265416e-05, + "loss": 0.2089, + "step": 867 + }, + { + "epoch": 2.3, + "learning_rate": 3.903485254691689e-05, + "loss": 0.06, + "step": 868 + }, + { + "epoch": 2.3, + "learning_rate": 3.902144772117963e-05, + "loss": 0.1951, + "step": 869 + }, + { + "epoch": 2.3, + "learning_rate": 3.900804289544236e-05, + "loss": 0.0252, + "step": 870 + }, + { + "epoch": 2.3, + "learning_rate": 3.89946380697051e-05, + "loss": 0.0299, + "step": 871 + }, + { + "epoch": 2.31, + "learning_rate": 3.898123324396783e-05, + "loss": 0.0298, + "step": 872 + }, + { + "epoch": 2.31, + "learning_rate": 3.896782841823057e-05, + "loss": 0.5186, + "step": 873 + }, + { + "epoch": 2.31, + "learning_rate": 3.89544235924933e-05, + "loss": 0.2704, + "step": 874 + }, + { + "epoch": 2.31, + "learning_rate": 3.894101876675603e-05, + "loss": 0.2435, + "step": 875 + }, + { + "epoch": 2.32, + "learning_rate": 3.892761394101877e-05, + "loss": 0.039, + "step": 876 + }, + { + "epoch": 2.32, + "learning_rate": 3.89142091152815e-05, + "loss": 0.0275, + "step": 877 + }, + { + "epoch": 2.32, + "learning_rate": 3.890080428954424e-05, + "loss": 0.1164, + "step": 878 + }, + { + "epoch": 2.33, + "learning_rate": 3.888739946380697e-05, + "loss": 0.1551, + "step": 879 + }, + { + "epoch": 2.33, + "learning_rate": 3.887399463806971e-05, + "loss": 0.0215, + "step": 880 + }, + { + "epoch": 2.33, + "learning_rate": 3.886058981233244e-05, + "loss": 0.0379, + "step": 881 + }, + { + "epoch": 2.33, + "learning_rate": 3.884718498659518e-05, + "loss": 0.0553, + "step": 882 + }, + { + "epoch": 2.34, + "learning_rate": 3.883378016085791e-05, + "loss": 0.1073, + "step": 883 + }, + { + "epoch": 2.34, + "learning_rate": 3.882037533512065e-05, + "loss": 0.3525, + "step": 884 + }, + { + "epoch": 2.34, + "learning_rate": 3.8806970509383376e-05, + "loss": 0.2646, + "step": 885 + }, + { + "epoch": 2.34, + "learning_rate": 3.879356568364612e-05, + "loss": 0.5758, + "step": 886 + }, + { + "epoch": 2.35, + "learning_rate": 3.8780160857908846e-05, + "loss": 0.9312, + "step": 887 + }, + { + "epoch": 2.35, + "learning_rate": 3.876675603217159e-05, + "loss": 0.2748, + "step": 888 + }, + { + "epoch": 2.35, + "learning_rate": 3.8753351206434316e-05, + "loss": 0.33, + "step": 889 + }, + { + "epoch": 2.35, + "learning_rate": 3.873994638069705e-05, + "loss": 0.0312, + "step": 890 + }, + { + "epoch": 2.36, + "learning_rate": 3.8726541554959786e-05, + "loss": 0.0449, + "step": 891 + }, + { + "epoch": 2.36, + "learning_rate": 3.871313672922252e-05, + "loss": 0.1197, + "step": 892 + }, + { + "epoch": 2.36, + "learning_rate": 3.869973190348526e-05, + "loss": 0.0913, + "step": 893 + }, + { + "epoch": 2.37, + "learning_rate": 3.868632707774799e-05, + "loss": 0.0284, + "step": 894 + }, + { + "epoch": 2.37, + "learning_rate": 3.867292225201073e-05, + "loss": 0.3769, + "step": 895 + }, + { + "epoch": 2.37, + "learning_rate": 3.865951742627346e-05, + "loss": 0.0947, + "step": 896 + }, + { + "epoch": 2.37, + "learning_rate": 3.86461126005362e-05, + "loss": 0.4282, + "step": 897 + }, + { + "epoch": 2.38, + "learning_rate": 3.863270777479893e-05, + "loss": 0.0049, + "step": 898 + }, + { + "epoch": 2.38, + "learning_rate": 3.861930294906167e-05, + "loss": 0.3632, + "step": 899 + }, + { + "epoch": 2.38, + "learning_rate": 3.8605898123324396e-05, + "loss": 0.0421, + "step": 900 + }, + { + "epoch": 2.38, + "learning_rate": 3.859249329758714e-05, + "loss": 0.5793, + "step": 901 + }, + { + "epoch": 2.39, + "learning_rate": 3.8579088471849866e-05, + "loss": 0.1695, + "step": 902 + }, + { + "epoch": 2.39, + "learning_rate": 3.856568364611261e-05, + "loss": 0.3082, + "step": 903 + }, + { + "epoch": 2.39, + "learning_rate": 3.8552278820375336e-05, + "loss": 0.0151, + "step": 904 + }, + { + "epoch": 2.39, + "learning_rate": 3.853887399463807e-05, + "loss": 0.3463, + "step": 905 + }, + { + "epoch": 2.4, + "learning_rate": 3.852546916890081e-05, + "loss": 0.4573, + "step": 906 + }, + { + "epoch": 2.4, + "learning_rate": 3.851206434316354e-05, + "loss": 0.1281, + "step": 907 + }, + { + "epoch": 2.4, + "learning_rate": 3.849865951742628e-05, + "loss": 0.3168, + "step": 908 + }, + { + "epoch": 2.4, + "learning_rate": 3.848525469168901e-05, + "loss": 0.0331, + "step": 909 + }, + { + "epoch": 2.41, + "learning_rate": 3.847184986595174e-05, + "loss": 0.1825, + "step": 910 + }, + { + "epoch": 2.41, + "learning_rate": 3.845844504021448e-05, + "loss": 0.6238, + "step": 911 + }, + { + "epoch": 2.41, + "learning_rate": 3.844504021447721e-05, + "loss": 0.0663, + "step": 912 + }, + { + "epoch": 2.42, + "learning_rate": 3.843163538873995e-05, + "loss": 0.2827, + "step": 913 + }, + { + "epoch": 2.42, + "learning_rate": 3.841823056300268e-05, + "loss": 0.0133, + "step": 914 + }, + { + "epoch": 2.42, + "learning_rate": 3.8404825737265416e-05, + "loss": 0.0415, + "step": 915 + }, + { + "epoch": 2.42, + "learning_rate": 3.839142091152815e-05, + "loss": 0.2969, + "step": 916 + }, + { + "epoch": 2.43, + "learning_rate": 3.8378016085790886e-05, + "loss": 0.0145, + "step": 917 + }, + { + "epoch": 2.43, + "learning_rate": 3.836461126005362e-05, + "loss": 0.0223, + "step": 918 + }, + { + "epoch": 2.43, + "learning_rate": 3.8351206434316357e-05, + "loss": 1.042, + "step": 919 + }, + { + "epoch": 2.43, + "learning_rate": 3.8337801608579085e-05, + "loss": 0.5061, + "step": 920 + }, + { + "epoch": 2.44, + "learning_rate": 3.832439678284183e-05, + "loss": 0.406, + "step": 921 + }, + { + "epoch": 2.44, + "learning_rate": 3.8310991957104555e-05, + "loss": 0.1337, + "step": 922 + }, + { + "epoch": 2.44, + "learning_rate": 3.82975871313673e-05, + "loss": 0.2057, + "step": 923 + }, + { + "epoch": 2.44, + "learning_rate": 3.8284182305630025e-05, + "loss": 0.0987, + "step": 924 + }, + { + "epoch": 2.45, + "learning_rate": 3.827077747989276e-05, + "loss": 0.4611, + "step": 925 + }, + { + "epoch": 2.45, + "learning_rate": 3.8257372654155496e-05, + "loss": 0.0152, + "step": 926 + }, + { + "epoch": 2.45, + "learning_rate": 3.824396782841823e-05, + "loss": 0.4478, + "step": 927 + }, + { + "epoch": 2.46, + "learning_rate": 3.8230563002680966e-05, + "loss": 0.0579, + "step": 928 + }, + { + "epoch": 2.46, + "learning_rate": 3.82171581769437e-05, + "loss": 0.1506, + "step": 929 + }, + { + "epoch": 2.46, + "learning_rate": 3.8203753351206436e-05, + "loss": 0.5514, + "step": 930 + }, + { + "epoch": 2.46, + "learning_rate": 3.819034852546917e-05, + "loss": 0.1505, + "step": 931 + }, + { + "epoch": 2.47, + "learning_rate": 3.8176943699731906e-05, + "loss": 0.1413, + "step": 932 + }, + { + "epoch": 2.47, + "learning_rate": 3.816353887399464e-05, + "loss": 0.4758, + "step": 933 + }, + { + "epoch": 2.47, + "learning_rate": 3.815013404825738e-05, + "loss": 0.0863, + "step": 934 + }, + { + "epoch": 2.47, + "learning_rate": 3.8136729222520105e-05, + "loss": 0.3561, + "step": 935 + }, + { + "epoch": 2.48, + "learning_rate": 3.812332439678285e-05, + "loss": 0.7784, + "step": 936 + }, + { + "epoch": 2.48, + "learning_rate": 3.8109919571045575e-05, + "loss": 0.2243, + "step": 937 + }, + { + "epoch": 2.48, + "learning_rate": 3.809651474530832e-05, + "loss": 0.1013, + "step": 938 + }, + { + "epoch": 2.48, + "learning_rate": 3.8083109919571046e-05, + "loss": 0.132, + "step": 939 + }, + { + "epoch": 2.49, + "learning_rate": 3.806970509383378e-05, + "loss": 0.3971, + "step": 940 + }, + { + "epoch": 2.49, + "learning_rate": 3.8056300268096516e-05, + "loss": 0.0637, + "step": 941 + }, + { + "epoch": 2.49, + "learning_rate": 3.804289544235925e-05, + "loss": 0.5178, + "step": 942 + }, + { + "epoch": 2.49, + "learning_rate": 3.8029490616621986e-05, + "loss": 0.1982, + "step": 943 + }, + { + "epoch": 2.5, + "learning_rate": 3.801608579088472e-05, + "loss": 0.2225, + "step": 944 + }, + { + "epoch": 2.5, + "learning_rate": 3.800268096514745e-05, + "loss": 0.1425, + "step": 945 + }, + { + "epoch": 2.5, + "learning_rate": 3.798927613941019e-05, + "loss": 0.0621, + "step": 946 + }, + { + "epoch": 2.51, + "learning_rate": 3.797587131367292e-05, + "loss": 0.1556, + "step": 947 + }, + { + "epoch": 2.51, + "learning_rate": 3.796246648793566e-05, + "loss": 0.3805, + "step": 948 + }, + { + "epoch": 2.51, + "learning_rate": 3.794906166219839e-05, + "loss": 0.7049, + "step": 949 + }, + { + "epoch": 2.51, + "learning_rate": 3.7935656836461125e-05, + "loss": 0.1055, + "step": 950 + }, + { + "epoch": 2.52, + "learning_rate": 3.792225201072386e-05, + "loss": 0.0489, + "step": 951 + }, + { + "epoch": 2.52, + "learning_rate": 3.7908847184986596e-05, + "loss": 0.1881, + "step": 952 + }, + { + "epoch": 2.52, + "learning_rate": 3.789544235924933e-05, + "loss": 0.0202, + "step": 953 + }, + { + "epoch": 2.52, + "learning_rate": 3.7882037533512066e-05, + "loss": 0.1043, + "step": 954 + }, + { + "epoch": 2.53, + "learning_rate": 3.78686327077748e-05, + "loss": 0.2093, + "step": 955 + }, + { + "epoch": 2.53, + "learning_rate": 3.7855227882037536e-05, + "loss": 0.0395, + "step": 956 + }, + { + "epoch": 2.53, + "learning_rate": 3.784182305630027e-05, + "loss": 0.1459, + "step": 957 + }, + { + "epoch": 2.53, + "learning_rate": 3.7828418230563006e-05, + "loss": 0.0338, + "step": 958 + }, + { + "epoch": 2.54, + "learning_rate": 3.781501340482574e-05, + "loss": 0.4741, + "step": 959 + }, + { + "epoch": 2.54, + "learning_rate": 3.780160857908847e-05, + "loss": 0.2049, + "step": 960 + }, + { + "epoch": 2.54, + "learning_rate": 3.778820375335121e-05, + "loss": 0.309, + "step": 961 + }, + { + "epoch": 2.54, + "learning_rate": 3.777479892761394e-05, + "loss": 0.0253, + "step": 962 + }, + { + "epoch": 2.55, + "learning_rate": 3.776139410187668e-05, + "loss": 0.4832, + "step": 963 + }, + { + "epoch": 2.55, + "learning_rate": 3.774798927613941e-05, + "loss": 0.2111, + "step": 964 + }, + { + "epoch": 2.55, + "learning_rate": 3.7734584450402145e-05, + "loss": 0.1788, + "step": 965 + }, + { + "epoch": 2.56, + "learning_rate": 3.772117962466488e-05, + "loss": 0.5252, + "step": 966 + }, + { + "epoch": 2.56, + "learning_rate": 3.7707774798927616e-05, + "loss": 0.4711, + "step": 967 + }, + { + "epoch": 2.56, + "learning_rate": 3.769436997319035e-05, + "loss": 0.5184, + "step": 968 + }, + { + "epoch": 2.56, + "learning_rate": 3.7680965147453086e-05, + "loss": 0.2164, + "step": 969 + }, + { + "epoch": 2.57, + "learning_rate": 3.7667560321715814e-05, + "loss": 0.5393, + "step": 970 + }, + { + "epoch": 2.57, + "learning_rate": 3.7654155495978556e-05, + "loss": 0.2588, + "step": 971 + }, + { + "epoch": 2.57, + "learning_rate": 3.7640750670241285e-05, + "loss": 0.164, + "step": 972 + }, + { + "epoch": 2.57, + "learning_rate": 3.7627345844504027e-05, + "loss": 0.2896, + "step": 973 + }, + { + "epoch": 2.58, + "learning_rate": 3.7613941018766755e-05, + "loss": 0.039, + "step": 974 + }, + { + "epoch": 2.58, + "learning_rate": 3.760053619302949e-05, + "loss": 0.16, + "step": 975 + }, + { + "epoch": 2.58, + "learning_rate": 3.7587131367292225e-05, + "loss": 0.1832, + "step": 976 + }, + { + "epoch": 2.58, + "learning_rate": 3.757372654155496e-05, + "loss": 0.0812, + "step": 977 + }, + { + "epoch": 2.59, + "learning_rate": 3.7560321715817695e-05, + "loss": 0.1476, + "step": 978 + }, + { + "epoch": 2.59, + "learning_rate": 3.754691689008043e-05, + "loss": 0.1853, + "step": 979 + }, + { + "epoch": 2.59, + "learning_rate": 3.7533512064343166e-05, + "loss": 0.2875, + "step": 980 + }, + { + "epoch": 2.6, + "learning_rate": 3.75201072386059e-05, + "loss": 0.1918, + "step": 981 + }, + { + "epoch": 2.6, + "learning_rate": 3.7506702412868636e-05, + "loss": 0.2445, + "step": 982 + }, + { + "epoch": 2.6, + "learning_rate": 3.749329758713137e-05, + "loss": 0.4653, + "step": 983 + }, + { + "epoch": 2.6, + "learning_rate": 3.7479892761394106e-05, + "loss": 0.0614, + "step": 984 + }, + { + "epoch": 2.61, + "learning_rate": 3.746648793565684e-05, + "loss": 0.2818, + "step": 985 + }, + { + "epoch": 2.61, + "learning_rate": 3.7453083109919576e-05, + "loss": 0.1363, + "step": 986 + }, + { + "epoch": 2.61, + "learning_rate": 3.7439678284182305e-05, + "loss": 0.3244, + "step": 987 + }, + { + "epoch": 2.61, + "learning_rate": 3.742627345844505e-05, + "loss": 0.081, + "step": 988 + }, + { + "epoch": 2.62, + "learning_rate": 3.7412868632707775e-05, + "loss": 0.0488, + "step": 989 + }, + { + "epoch": 2.62, + "learning_rate": 3.739946380697052e-05, + "loss": 0.2057, + "step": 990 + }, + { + "epoch": 2.62, + "learning_rate": 3.7386058981233245e-05, + "loss": 0.2598, + "step": 991 + }, + { + "epoch": 2.62, + "learning_rate": 3.737265415549598e-05, + "loss": 0.1318, + "step": 992 + }, + { + "epoch": 2.63, + "learning_rate": 3.7359249329758716e-05, + "loss": 0.0482, + "step": 993 + }, + { + "epoch": 2.63, + "learning_rate": 3.734584450402145e-05, + "loss": 0.2586, + "step": 994 + }, + { + "epoch": 2.63, + "learning_rate": 3.7332439678284186e-05, + "loss": 0.2533, + "step": 995 + }, + { + "epoch": 2.63, + "learning_rate": 3.731903485254692e-05, + "loss": 0.5292, + "step": 996 + }, + { + "epoch": 2.64, + "learning_rate": 3.730563002680965e-05, + "loss": 0.1677, + "step": 997 + }, + { + "epoch": 2.64, + "learning_rate": 3.729222520107239e-05, + "loss": 0.1869, + "step": 998 + }, + { + "epoch": 2.64, + "learning_rate": 3.727882037533512e-05, + "loss": 0.0645, + "step": 999 + }, + { + "epoch": 2.65, + "learning_rate": 3.726541554959786e-05, + "loss": 0.4107, + "step": 1000 + }, + { + "epoch": 2.65, + "learning_rate": 3.725201072386059e-05, + "loss": 0.0484, + "step": 1001 + }, + { + "epoch": 2.65, + "learning_rate": 3.7238605898123325e-05, + "loss": 0.0813, + "step": 1002 + }, + { + "epoch": 2.65, + "learning_rate": 3.722520107238606e-05, + "loss": 0.2467, + "step": 1003 + }, + { + "epoch": 2.66, + "learning_rate": 3.7211796246648795e-05, + "loss": 0.0324, + "step": 1004 + }, + { + "epoch": 2.66, + "learning_rate": 3.719839142091153e-05, + "loss": 0.0536, + "step": 1005 + }, + { + "epoch": 2.66, + "learning_rate": 3.7184986595174266e-05, + "loss": 0.0399, + "step": 1006 + }, + { + "epoch": 2.66, + "learning_rate": 3.7171581769436994e-05, + "loss": 0.0257, + "step": 1007 + }, + { + "epoch": 2.67, + "learning_rate": 3.7158176943699736e-05, + "loss": 0.0407, + "step": 1008 + }, + { + "epoch": 2.67, + "learning_rate": 3.7144772117962464e-05, + "loss": 0.0173, + "step": 1009 + }, + { + "epoch": 2.67, + "learning_rate": 3.7131367292225206e-05, + "loss": 0.0166, + "step": 1010 + }, + { + "epoch": 2.67, + "learning_rate": 3.7117962466487934e-05, + "loss": 0.1898, + "step": 1011 + }, + { + "epoch": 2.68, + "learning_rate": 3.710455764075067e-05, + "loss": 0.0525, + "step": 1012 + }, + { + "epoch": 2.68, + "learning_rate": 3.7091152815013405e-05, + "loss": 0.043, + "step": 1013 + }, + { + "epoch": 2.68, + "learning_rate": 3.707774798927614e-05, + "loss": 0.3994, + "step": 1014 + }, + { + "epoch": 2.69, + "learning_rate": 3.7064343163538875e-05, + "loss": 0.0372, + "step": 1015 + }, + { + "epoch": 2.69, + "learning_rate": 3.705093833780161e-05, + "loss": 0.2909, + "step": 1016 + }, + { + "epoch": 2.69, + "learning_rate": 3.7037533512064345e-05, + "loss": 0.8221, + "step": 1017 + }, + { + "epoch": 2.69, + "learning_rate": 3.702412868632708e-05, + "loss": 0.0084, + "step": 1018 + }, + { + "epoch": 2.7, + "learning_rate": 3.7010723860589815e-05, + "loss": 0.3058, + "step": 1019 + }, + { + "epoch": 2.7, + "learning_rate": 3.699731903485255e-05, + "loss": 1.0774, + "step": 1020 + }, + { + "epoch": 2.7, + "learning_rate": 3.6983914209115286e-05, + "loss": 0.2018, + "step": 1021 + }, + { + "epoch": 2.7, + "learning_rate": 3.6970509383378014e-05, + "loss": 0.0537, + "step": 1022 + }, + { + "epoch": 2.71, + "learning_rate": 3.6957104557640756e-05, + "loss": 0.444, + "step": 1023 + }, + { + "epoch": 2.71, + "learning_rate": 3.6943699731903484e-05, + "loss": 0.6497, + "step": 1024 + }, + { + "epoch": 2.71, + "learning_rate": 3.6930294906166226e-05, + "loss": 0.0097, + "step": 1025 + }, + { + "epoch": 2.71, + "learning_rate": 3.6916890080428955e-05, + "loss": 0.0082, + "step": 1026 + }, + { + "epoch": 2.72, + "learning_rate": 3.690348525469169e-05, + "loss": 0.0387, + "step": 1027 + }, + { + "epoch": 2.72, + "learning_rate": 3.6890080428954425e-05, + "loss": 0.3969, + "step": 1028 + }, + { + "epoch": 2.72, + "learning_rate": 3.687667560321716e-05, + "loss": 0.0136, + "step": 1029 + }, + { + "epoch": 2.72, + "learning_rate": 3.6863270777479895e-05, + "loss": 0.0099, + "step": 1030 + }, + { + "epoch": 2.73, + "learning_rate": 3.684986595174263e-05, + "loss": 0.3509, + "step": 1031 + }, + { + "epoch": 2.73, + "learning_rate": 3.683646112600536e-05, + "loss": 0.0257, + "step": 1032 + }, + { + "epoch": 2.73, + "learning_rate": 3.68230563002681e-05, + "loss": 0.3158, + "step": 1033 + }, + { + "epoch": 2.74, + "learning_rate": 3.680965147453083e-05, + "loss": 0.0677, + "step": 1034 + }, + { + "epoch": 2.74, + "learning_rate": 3.679624664879357e-05, + "loss": 0.3887, + "step": 1035 + }, + { + "epoch": 2.74, + "learning_rate": 3.67828418230563e-05, + "loss": 0.0714, + "step": 1036 + }, + { + "epoch": 2.74, + "learning_rate": 3.6769436997319034e-05, + "loss": 0.1066, + "step": 1037 + }, + { + "epoch": 2.75, + "learning_rate": 3.675603217158177e-05, + "loss": 0.6238, + "step": 1038 + }, + { + "epoch": 2.75, + "learning_rate": 3.6742627345844504e-05, + "loss": 0.0405, + "step": 1039 + }, + { + "epoch": 2.75, + "learning_rate": 3.672922252010724e-05, + "loss": 0.0223, + "step": 1040 + }, + { + "epoch": 2.75, + "learning_rate": 3.6715817694369975e-05, + "loss": 0.2737, + "step": 1041 + }, + { + "epoch": 2.76, + "learning_rate": 3.670241286863271e-05, + "loss": 0.015, + "step": 1042 + }, + { + "epoch": 2.76, + "learning_rate": 3.6689008042895445e-05, + "loss": 0.1709, + "step": 1043 + }, + { + "epoch": 2.76, + "learning_rate": 3.667560321715818e-05, + "loss": 0.2649, + "step": 1044 + }, + { + "epoch": 2.76, + "learning_rate": 3.6662198391420915e-05, + "loss": 0.1524, + "step": 1045 + }, + { + "epoch": 2.77, + "learning_rate": 3.664879356568365e-05, + "loss": 0.2461, + "step": 1046 + }, + { + "epoch": 2.77, + "learning_rate": 3.663538873994638e-05, + "loss": 0.3425, + "step": 1047 + }, + { + "epoch": 2.77, + "learning_rate": 3.662198391420912e-05, + "loss": 0.2689, + "step": 1048 + }, + { + "epoch": 2.78, + "learning_rate": 3.660857908847185e-05, + "loss": 0.0066, + "step": 1049 + }, + { + "epoch": 2.78, + "learning_rate": 3.659517426273459e-05, + "loss": 0.0328, + "step": 1050 + }, + { + "epoch": 2.78, + "learning_rate": 3.658176943699732e-05, + "loss": 0.1273, + "step": 1051 + }, + { + "epoch": 2.78, + "learning_rate": 3.6568364611260054e-05, + "loss": 0.2346, + "step": 1052 + }, + { + "epoch": 2.79, + "learning_rate": 3.655495978552279e-05, + "loss": 0.0118, + "step": 1053 + }, + { + "epoch": 2.79, + "learning_rate": 3.6541554959785525e-05, + "loss": 0.0287, + "step": 1054 + }, + { + "epoch": 2.79, + "learning_rate": 3.652815013404826e-05, + "loss": 0.264, + "step": 1055 + }, + { + "epoch": 2.79, + "learning_rate": 3.6514745308310995e-05, + "loss": 0.0216, + "step": 1056 + }, + { + "epoch": 2.8, + "learning_rate": 3.650134048257372e-05, + "loss": 0.0261, + "step": 1057 + }, + { + "epoch": 2.8, + "learning_rate": 3.6487935656836465e-05, + "loss": 0.1911, + "step": 1058 + }, + { + "epoch": 2.8, + "learning_rate": 3.6474530831099194e-05, + "loss": 0.029, + "step": 1059 + }, + { + "epoch": 2.8, + "learning_rate": 3.6461126005361935e-05, + "loss": 0.0393, + "step": 1060 + }, + { + "epoch": 2.81, + "learning_rate": 3.6447721179624664e-05, + "loss": 0.1044, + "step": 1061 + }, + { + "epoch": 2.81, + "learning_rate": 3.64343163538874e-05, + "loss": 0.6364, + "step": 1062 + }, + { + "epoch": 2.81, + "learning_rate": 3.6420911528150134e-05, + "loss": 0.3589, + "step": 1063 + }, + { + "epoch": 2.81, + "learning_rate": 3.640750670241287e-05, + "loss": 0.1128, + "step": 1064 + }, + { + "epoch": 2.82, + "learning_rate": 3.6394101876675604e-05, + "loss": 0.2219, + "step": 1065 + }, + { + "epoch": 2.82, + "learning_rate": 3.638069705093834e-05, + "loss": 0.211, + "step": 1066 + }, + { + "epoch": 2.82, + "learning_rate": 3.6367292225201075e-05, + "loss": 0.387, + "step": 1067 + }, + { + "epoch": 2.83, + "learning_rate": 3.635388739946381e-05, + "loss": 0.0725, + "step": 1068 + }, + { + "epoch": 2.83, + "learning_rate": 3.6340482573726545e-05, + "loss": 0.0268, + "step": 1069 + }, + { + "epoch": 2.83, + "learning_rate": 3.632707774798928e-05, + "loss": 0.516, + "step": 1070 + }, + { + "epoch": 2.83, + "learning_rate": 3.6313672922252015e-05, + "loss": 0.0746, + "step": 1071 + }, + { + "epoch": 2.84, + "learning_rate": 3.6300268096514743e-05, + "loss": 0.2486, + "step": 1072 + }, + { + "epoch": 2.84, + "learning_rate": 3.6286863270777485e-05, + "loss": 0.1584, + "step": 1073 + }, + { + "epoch": 2.84, + "learning_rate": 3.6273458445040214e-05, + "loss": 0.1301, + "step": 1074 + }, + { + "epoch": 2.84, + "learning_rate": 3.6260053619302956e-05, + "loss": 0.0122, + "step": 1075 + }, + { + "epoch": 2.85, + "learning_rate": 3.6246648793565684e-05, + "loss": 0.0215, + "step": 1076 + }, + { + "epoch": 2.85, + "learning_rate": 3.623324396782842e-05, + "loss": 0.2068, + "step": 1077 + }, + { + "epoch": 2.85, + "learning_rate": 3.6219839142091154e-05, + "loss": 0.1882, + "step": 1078 + }, + { + "epoch": 2.85, + "learning_rate": 3.620643431635389e-05, + "loss": 0.368, + "step": 1079 + }, + { + "epoch": 2.86, + "learning_rate": 3.6193029490616625e-05, + "loss": 0.7537, + "step": 1080 + }, + { + "epoch": 2.86, + "learning_rate": 3.617962466487936e-05, + "loss": 0.1614, + "step": 1081 + }, + { + "epoch": 2.86, + "learning_rate": 3.616621983914209e-05, + "loss": 0.0585, + "step": 1082 + }, + { + "epoch": 2.87, + "learning_rate": 3.615281501340483e-05, + "loss": 0.0535, + "step": 1083 + }, + { + "epoch": 2.87, + "learning_rate": 3.613941018766756e-05, + "loss": 0.6518, + "step": 1084 + }, + { + "epoch": 2.87, + "learning_rate": 3.61260053619303e-05, + "loss": 0.4885, + "step": 1085 + }, + { + "epoch": 2.87, + "learning_rate": 3.611260053619303e-05, + "loss": 0.4078, + "step": 1086 + }, + { + "epoch": 2.88, + "learning_rate": 3.6099195710455764e-05, + "loss": 0.2101, + "step": 1087 + }, + { + "epoch": 2.88, + "learning_rate": 3.60857908847185e-05, + "loss": 0.0192, + "step": 1088 + }, + { + "epoch": 2.88, + "learning_rate": 3.6072386058981234e-05, + "loss": 0.3885, + "step": 1089 + }, + { + "epoch": 2.88, + "learning_rate": 3.605898123324397e-05, + "loss": 0.0393, + "step": 1090 + }, + { + "epoch": 2.89, + "learning_rate": 3.6045576407506704e-05, + "loss": 0.2179, + "step": 1091 + }, + { + "epoch": 2.89, + "learning_rate": 3.603217158176944e-05, + "loss": 0.1814, + "step": 1092 + }, + { + "epoch": 2.89, + "learning_rate": 3.6018766756032174e-05, + "loss": 0.0647, + "step": 1093 + }, + { + "epoch": 2.89, + "learning_rate": 3.600536193029491e-05, + "loss": 0.0657, + "step": 1094 + }, + { + "epoch": 2.9, + "learning_rate": 3.5991957104557645e-05, + "loss": 0.1062, + "step": 1095 + }, + { + "epoch": 2.9, + "learning_rate": 3.597855227882038e-05, + "loss": 0.4314, + "step": 1096 + }, + { + "epoch": 2.9, + "learning_rate": 3.596514745308311e-05, + "loss": 0.3074, + "step": 1097 + }, + { + "epoch": 2.9, + "learning_rate": 3.595174262734585e-05, + "loss": 0.0159, + "step": 1098 + }, + { + "epoch": 2.91, + "learning_rate": 3.593833780160858e-05, + "loss": 0.3829, + "step": 1099 + }, + { + "epoch": 2.91, + "learning_rate": 3.592493297587132e-05, + "loss": 0.3277, + "step": 1100 + }, + { + "epoch": 2.91, + "learning_rate": 3.591152815013405e-05, + "loss": 0.3785, + "step": 1101 + }, + { + "epoch": 2.92, + "learning_rate": 3.5898123324396784e-05, + "loss": 0.0162, + "step": 1102 + }, + { + "epoch": 2.92, + "learning_rate": 3.588471849865952e-05, + "loss": 0.0772, + "step": 1103 + }, + { + "epoch": 2.92, + "learning_rate": 3.5871313672922254e-05, + "loss": 0.0292, + "step": 1104 + }, + { + "epoch": 2.92, + "learning_rate": 3.585790884718499e-05, + "loss": 0.0748, + "step": 1105 + }, + { + "epoch": 2.93, + "learning_rate": 3.5844504021447724e-05, + "loss": 0.2276, + "step": 1106 + }, + { + "epoch": 2.93, + "learning_rate": 3.583109919571046e-05, + "loss": 0.3174, + "step": 1107 + }, + { + "epoch": 2.93, + "learning_rate": 3.5817694369973195e-05, + "loss": 0.134, + "step": 1108 + }, + { + "epoch": 2.93, + "learning_rate": 3.580428954423592e-05, + "loss": 0.3488, + "step": 1109 + }, + { + "epoch": 2.94, + "learning_rate": 3.5790884718498665e-05, + "loss": 0.1003, + "step": 1110 + }, + { + "epoch": 2.94, + "learning_rate": 3.577747989276139e-05, + "loss": 0.1972, + "step": 1111 + }, + { + "epoch": 2.94, + "learning_rate": 3.5764075067024135e-05, + "loss": 0.2151, + "step": 1112 + }, + { + "epoch": 2.94, + "learning_rate": 3.5750670241286863e-05, + "loss": 0.3725, + "step": 1113 + }, + { + "epoch": 2.95, + "learning_rate": 3.57372654155496e-05, + "loss": 0.0227, + "step": 1114 + }, + { + "epoch": 2.95, + "learning_rate": 3.5723860589812334e-05, + "loss": 0.2063, + "step": 1115 + }, + { + "epoch": 2.95, + "learning_rate": 3.571045576407507e-05, + "loss": 0.0316, + "step": 1116 + }, + { + "epoch": 2.96, + "learning_rate": 3.5697050938337804e-05, + "loss": 0.1545, + "step": 1117 + }, + { + "epoch": 2.96, + "learning_rate": 3.568364611260054e-05, + "loss": 0.4582, + "step": 1118 + }, + { + "epoch": 2.96, + "learning_rate": 3.567024128686327e-05, + "loss": 0.5452, + "step": 1119 + }, + { + "epoch": 2.96, + "learning_rate": 3.565683646112601e-05, + "loss": 0.2009, + "step": 1120 + }, + { + "epoch": 2.97, + "learning_rate": 3.564343163538874e-05, + "loss": 0.1201, + "step": 1121 + }, + { + "epoch": 2.97, + "learning_rate": 3.563002680965148e-05, + "loss": 0.5343, + "step": 1122 + }, + { + "epoch": 2.97, + "learning_rate": 3.561662198391421e-05, + "loss": 0.3925, + "step": 1123 + }, + { + "epoch": 2.97, + "learning_rate": 3.560321715817694e-05, + "loss": 0.0375, + "step": 1124 + }, + { + "epoch": 2.98, + "learning_rate": 3.558981233243968e-05, + "loss": 0.0411, + "step": 1125 + }, + { + "epoch": 2.98, + "learning_rate": 3.5576407506702413e-05, + "loss": 0.0338, + "step": 1126 + }, + { + "epoch": 2.98, + "learning_rate": 3.556300268096515e-05, + "loss": 0.035, + "step": 1127 + }, + { + "epoch": 2.98, + "learning_rate": 3.5549597855227884e-05, + "loss": 0.0283, + "step": 1128 + }, + { + "epoch": 2.99, + "learning_rate": 3.553619302949062e-05, + "loss": 0.034, + "step": 1129 + }, + { + "epoch": 2.99, + "learning_rate": 3.5522788203753354e-05, + "loss": 0.518, + "step": 1130 + }, + { + "epoch": 2.99, + "learning_rate": 3.550938337801609e-05, + "loss": 0.0241, + "step": 1131 + }, + { + "epoch": 2.99, + "learning_rate": 3.5495978552278824e-05, + "loss": 0.3143, + "step": 1132 + }, + { + "epoch": 3.0, + "learning_rate": 3.548257372654156e-05, + "loss": 0.8011, + "step": 1133 + }, + { + "epoch": 3.0, + "learning_rate": 3.546916890080429e-05, + "loss": 0.4365, + "step": 1134 + }, + { + "epoch": 3.0, + "eval_f1": 0.7867219917012448, + "eval_loss": 0.613310694694519, + "eval_runtime": 1.9007, + "eval_samples_per_second": 796.041, + "eval_steps_per_second": 49.983, + "step": 1134 + }, + { + "epoch": 3.0, + "learning_rate": 3.545576407506703e-05, + "loss": 0.0111, + "step": 1135 + }, + { + "epoch": 3.01, + "learning_rate": 3.544235924932976e-05, + "loss": 0.0166, + "step": 1136 + }, + { + "epoch": 3.01, + "learning_rate": 3.54289544235925e-05, + "loss": 0.2159, + "step": 1137 + }, + { + "epoch": 3.01, + "learning_rate": 3.541554959785523e-05, + "loss": 0.0096, + "step": 1138 + }, + { + "epoch": 3.01, + "learning_rate": 3.540214477211796e-05, + "loss": 0.1352, + "step": 1139 + }, + { + "epoch": 3.02, + "learning_rate": 3.53887399463807e-05, + "loss": 0.0195, + "step": 1140 + }, + { + "epoch": 3.02, + "learning_rate": 3.5375335120643434e-05, + "loss": 0.1579, + "step": 1141 + }, + { + "epoch": 3.02, + "learning_rate": 3.536193029490617e-05, + "loss": 0.0078, + "step": 1142 + }, + { + "epoch": 3.02, + "learning_rate": 3.5348525469168904e-05, + "loss": 0.0111, + "step": 1143 + }, + { + "epoch": 3.03, + "learning_rate": 3.533512064343163e-05, + "loss": 0.2457, + "step": 1144 + }, + { + "epoch": 3.03, + "learning_rate": 3.5321715817694374e-05, + "loss": 0.014, + "step": 1145 + }, + { + "epoch": 3.03, + "learning_rate": 3.53083109919571e-05, + "loss": 0.2021, + "step": 1146 + }, + { + "epoch": 3.03, + "learning_rate": 3.5294906166219844e-05, + "loss": 0.5334, + "step": 1147 + }, + { + "epoch": 3.04, + "learning_rate": 3.528150134048257e-05, + "loss": 0.0116, + "step": 1148 + }, + { + "epoch": 3.04, + "learning_rate": 3.526809651474531e-05, + "loss": 0.0099, + "step": 1149 + }, + { + "epoch": 3.04, + "learning_rate": 3.525469168900804e-05, + "loss": 0.2102, + "step": 1150 + }, + { + "epoch": 3.04, + "learning_rate": 3.524128686327078e-05, + "loss": 0.0093, + "step": 1151 + }, + { + "epoch": 3.05, + "learning_rate": 3.522788203753351e-05, + "loss": 0.0112, + "step": 1152 + }, + { + "epoch": 3.05, + "learning_rate": 3.521447721179625e-05, + "loss": 0.1761, + "step": 1153 + }, + { + "epoch": 3.05, + "learning_rate": 3.5201072386058984e-05, + "loss": 0.1608, + "step": 1154 + }, + { + "epoch": 3.06, + "learning_rate": 3.518766756032172e-05, + "loss": 0.2883, + "step": 1155 + }, + { + "epoch": 3.06, + "learning_rate": 3.5174262734584454e-05, + "loss": 0.0304, + "step": 1156 + }, + { + "epoch": 3.06, + "learning_rate": 3.516085790884719e-05, + "loss": 0.0623, + "step": 1157 + }, + { + "epoch": 3.06, + "learning_rate": 3.5147453083109924e-05, + "loss": 0.1824, + "step": 1158 + }, + { + "epoch": 3.07, + "learning_rate": 3.513404825737265e-05, + "loss": 0.2527, + "step": 1159 + }, + { + "epoch": 3.07, + "learning_rate": 3.5120643431635394e-05, + "loss": 0.0877, + "step": 1160 + }, + { + "epoch": 3.07, + "learning_rate": 3.510723860589812e-05, + "loss": 0.2735, + "step": 1161 + }, + { + "epoch": 3.07, + "learning_rate": 3.5093833780160865e-05, + "loss": 0.1126, + "step": 1162 + }, + { + "epoch": 3.08, + "learning_rate": 3.508042895442359e-05, + "loss": 0.2498, + "step": 1163 + }, + { + "epoch": 3.08, + "learning_rate": 3.506702412868633e-05, + "loss": 0.022, + "step": 1164 + }, + { + "epoch": 3.08, + "learning_rate": 3.505361930294906e-05, + "loss": 0.2768, + "step": 1165 + }, + { + "epoch": 3.08, + "learning_rate": 3.50402144772118e-05, + "loss": 0.0429, + "step": 1166 + }, + { + "epoch": 3.09, + "learning_rate": 3.5026809651474533e-05, + "loss": 0.0198, + "step": 1167 + }, + { + "epoch": 3.09, + "learning_rate": 3.501340482573727e-05, + "loss": 0.0097, + "step": 1168 + }, + { + "epoch": 3.09, + "learning_rate": 3.5e-05, + "loss": 0.0276, + "step": 1169 + }, + { + "epoch": 3.1, + "learning_rate": 3.498659517426274e-05, + "loss": 0.2276, + "step": 1170 + }, + { + "epoch": 3.1, + "learning_rate": 3.497319034852547e-05, + "loss": 0.0461, + "step": 1171 + }, + { + "epoch": 3.1, + "learning_rate": 3.495978552278821e-05, + "loss": 0.0103, + "step": 1172 + }, + { + "epoch": 3.1, + "learning_rate": 3.494638069705094e-05, + "loss": 0.1455, + "step": 1173 + }, + { + "epoch": 3.11, + "learning_rate": 3.493297587131367e-05, + "loss": 0.0865, + "step": 1174 + }, + { + "epoch": 3.11, + "learning_rate": 3.491957104557641e-05, + "loss": 0.3226, + "step": 1175 + }, + { + "epoch": 3.11, + "learning_rate": 3.490616621983914e-05, + "loss": 0.1744, + "step": 1176 + }, + { + "epoch": 3.11, + "learning_rate": 3.489276139410188e-05, + "loss": 0.0148, + "step": 1177 + }, + { + "epoch": 3.12, + "learning_rate": 3.487935656836461e-05, + "loss": 0.2582, + "step": 1178 + }, + { + "epoch": 3.12, + "learning_rate": 3.486595174262735e-05, + "loss": 0.2782, + "step": 1179 + }, + { + "epoch": 3.12, + "learning_rate": 3.485254691689008e-05, + "loss": 0.143, + "step": 1180 + }, + { + "epoch": 3.12, + "learning_rate": 3.483914209115282e-05, + "loss": 0.0853, + "step": 1181 + }, + { + "epoch": 3.13, + "learning_rate": 3.4825737265415554e-05, + "loss": 0.1361, + "step": 1182 + }, + { + "epoch": 3.13, + "learning_rate": 3.481233243967829e-05, + "loss": 0.0883, + "step": 1183 + }, + { + "epoch": 3.13, + "learning_rate": 3.479892761394102e-05, + "loss": 0.0116, + "step": 1184 + }, + { + "epoch": 3.13, + "learning_rate": 3.478552278820376e-05, + "loss": 0.0531, + "step": 1185 + }, + { + "epoch": 3.14, + "learning_rate": 3.477211796246649e-05, + "loss": 0.0184, + "step": 1186 + }, + { + "epoch": 3.14, + "learning_rate": 3.475871313672923e-05, + "loss": 0.1601, + "step": 1187 + }, + { + "epoch": 3.14, + "learning_rate": 3.474530831099196e-05, + "loss": 0.007, + "step": 1188 + }, + { + "epoch": 3.15, + "learning_rate": 3.473190348525469e-05, + "loss": 0.0101, + "step": 1189 + }, + { + "epoch": 3.15, + "learning_rate": 3.471849865951743e-05, + "loss": 0.2385, + "step": 1190 + }, + { + "epoch": 3.15, + "learning_rate": 3.470509383378016e-05, + "loss": 0.0075, + "step": 1191 + }, + { + "epoch": 3.15, + "learning_rate": 3.46916890080429e-05, + "loss": 0.0919, + "step": 1192 + }, + { + "epoch": 3.16, + "learning_rate": 3.467828418230563e-05, + "loss": 0.0162, + "step": 1193 + }, + { + "epoch": 3.16, + "learning_rate": 3.466487935656836e-05, + "loss": 0.2239, + "step": 1194 + }, + { + "epoch": 3.16, + "learning_rate": 3.4651474530831104e-05, + "loss": 0.5757, + "step": 1195 + }, + { + "epoch": 3.16, + "learning_rate": 3.463806970509383e-05, + "loss": 0.0774, + "step": 1196 + }, + { + "epoch": 3.17, + "learning_rate": 3.4624664879356574e-05, + "loss": 0.2124, + "step": 1197 + }, + { + "epoch": 3.17, + "learning_rate": 3.46112600536193e-05, + "loss": 0.0107, + "step": 1198 + }, + { + "epoch": 3.17, + "learning_rate": 3.459785522788204e-05, + "loss": 0.3179, + "step": 1199 + }, + { + "epoch": 3.17, + "learning_rate": 3.458445040214477e-05, + "loss": 0.0138, + "step": 1200 + }, + { + "epoch": 3.18, + "learning_rate": 3.457104557640751e-05, + "loss": 0.0094, + "step": 1201 + }, + { + "epoch": 3.18, + "learning_rate": 3.455764075067024e-05, + "loss": 0.0039, + "step": 1202 + }, + { + "epoch": 3.18, + "learning_rate": 3.454423592493298e-05, + "loss": 0.0745, + "step": 1203 + }, + { + "epoch": 3.19, + "learning_rate": 3.453083109919571e-05, + "loss": 0.0387, + "step": 1204 + }, + { + "epoch": 3.19, + "learning_rate": 3.451742627345845e-05, + "loss": 0.1378, + "step": 1205 + }, + { + "epoch": 3.19, + "learning_rate": 3.450402144772118e-05, + "loss": 0.1299, + "step": 1206 + }, + { + "epoch": 3.19, + "learning_rate": 3.449061662198392e-05, + "loss": 0.2821, + "step": 1207 + }, + { + "epoch": 3.2, + "learning_rate": 3.4477211796246653e-05, + "loss": 0.2236, + "step": 1208 + }, + { + "epoch": 3.2, + "learning_rate": 3.446380697050938e-05, + "loss": 0.1436, + "step": 1209 + }, + { + "epoch": 3.2, + "learning_rate": 3.4450402144772124e-05, + "loss": 0.1504, + "step": 1210 + }, + { + "epoch": 3.2, + "learning_rate": 3.443699731903485e-05, + "loss": 0.0415, + "step": 1211 + }, + { + "epoch": 3.21, + "learning_rate": 3.4423592493297594e-05, + "loss": 0.023, + "step": 1212 + }, + { + "epoch": 3.21, + "learning_rate": 3.441018766756032e-05, + "loss": 0.2128, + "step": 1213 + }, + { + "epoch": 3.21, + "learning_rate": 3.439678284182306e-05, + "loss": 0.0066, + "step": 1214 + }, + { + "epoch": 3.21, + "learning_rate": 3.438337801608579e-05, + "loss": 0.4345, + "step": 1215 + }, + { + "epoch": 3.22, + "learning_rate": 3.436997319034853e-05, + "loss": 0.0214, + "step": 1216 + }, + { + "epoch": 3.22, + "learning_rate": 3.435656836461126e-05, + "loss": 0.2094, + "step": 1217 + }, + { + "epoch": 3.22, + "learning_rate": 3.4343163538874e-05, + "loss": 0.0822, + "step": 1218 + }, + { + "epoch": 3.22, + "learning_rate": 3.4329758713136726e-05, + "loss": 0.1153, + "step": 1219 + }, + { + "epoch": 3.23, + "learning_rate": 3.431635388739947e-05, + "loss": 0.0059, + "step": 1220 + }, + { + "epoch": 3.23, + "learning_rate": 3.43029490616622e-05, + "loss": 0.0069, + "step": 1221 + }, + { + "epoch": 3.23, + "learning_rate": 3.428954423592494e-05, + "loss": 0.044, + "step": 1222 + }, + { + "epoch": 3.24, + "learning_rate": 3.427613941018767e-05, + "loss": 0.1975, + "step": 1223 + }, + { + "epoch": 3.24, + "learning_rate": 3.42627345844504e-05, + "loss": 0.3294, + "step": 1224 + }, + { + "epoch": 3.24, + "learning_rate": 3.424932975871314e-05, + "loss": 0.026, + "step": 1225 + }, + { + "epoch": 3.24, + "learning_rate": 3.423592493297587e-05, + "loss": 0.2666, + "step": 1226 + }, + { + "epoch": 3.25, + "learning_rate": 3.422252010723861e-05, + "loss": 0.0628, + "step": 1227 + }, + { + "epoch": 3.25, + "learning_rate": 3.420911528150134e-05, + "loss": 0.0068, + "step": 1228 + }, + { + "epoch": 3.25, + "learning_rate": 3.419571045576407e-05, + "loss": 0.0144, + "step": 1229 + }, + { + "epoch": 3.25, + "learning_rate": 3.418230563002681e-05, + "loss": 0.0029, + "step": 1230 + }, + { + "epoch": 3.26, + "learning_rate": 3.416890080428954e-05, + "loss": 0.606, + "step": 1231 + }, + { + "epoch": 3.26, + "learning_rate": 3.415549597855228e-05, + "loss": 0.2162, + "step": 1232 + }, + { + "epoch": 3.26, + "learning_rate": 3.414209115281501e-05, + "loss": 0.146, + "step": 1233 + }, + { + "epoch": 3.26, + "learning_rate": 3.412868632707775e-05, + "loss": 0.3649, + "step": 1234 + }, + { + "epoch": 3.27, + "learning_rate": 3.411528150134048e-05, + "loss": 0.0062, + "step": 1235 + }, + { + "epoch": 3.27, + "learning_rate": 3.410187667560322e-05, + "loss": 0.4097, + "step": 1236 + }, + { + "epoch": 3.27, + "learning_rate": 3.408847184986595e-05, + "loss": 0.5354, + "step": 1237 + }, + { + "epoch": 3.28, + "learning_rate": 3.407506702412869e-05, + "loss": 0.6222, + "step": 1238 + }, + { + "epoch": 3.28, + "learning_rate": 3.406166219839142e-05, + "loss": 0.0023, + "step": 1239 + }, + { + "epoch": 3.28, + "learning_rate": 3.404825737265416e-05, + "loss": 0.0247, + "step": 1240 + }, + { + "epoch": 3.28, + "learning_rate": 3.403485254691689e-05, + "loss": 0.0051, + "step": 1241 + }, + { + "epoch": 3.29, + "learning_rate": 3.402144772117963e-05, + "loss": 0.2504, + "step": 1242 + }, + { + "epoch": 3.29, + "learning_rate": 3.400804289544236e-05, + "loss": 0.0195, + "step": 1243 + }, + { + "epoch": 3.29, + "learning_rate": 3.39946380697051e-05, + "loss": 0.3706, + "step": 1244 + }, + { + "epoch": 3.29, + "learning_rate": 3.398123324396783e-05, + "loss": 0.0174, + "step": 1245 + }, + { + "epoch": 3.3, + "learning_rate": 3.396782841823056e-05, + "loss": 0.0068, + "step": 1246 + }, + { + "epoch": 3.3, + "learning_rate": 3.39544235924933e-05, + "loss": 0.3938, + "step": 1247 + }, + { + "epoch": 3.3, + "learning_rate": 3.394101876675603e-05, + "loss": 0.0114, + "step": 1248 + }, + { + "epoch": 3.3, + "learning_rate": 3.3927613941018774e-05, + "loss": 0.0088, + "step": 1249 + }, + { + "epoch": 3.31, + "learning_rate": 3.39142091152815e-05, + "loss": 0.0126, + "step": 1250 + }, + { + "epoch": 3.31, + "learning_rate": 3.390080428954424e-05, + "loss": 0.0091, + "step": 1251 + }, + { + "epoch": 3.31, + "learning_rate": 3.388739946380697e-05, + "loss": 0.0232, + "step": 1252 + }, + { + "epoch": 3.31, + "learning_rate": 3.387399463806971e-05, + "loss": 0.3704, + "step": 1253 + }, + { + "epoch": 3.32, + "learning_rate": 3.386058981233244e-05, + "loss": 0.0112, + "step": 1254 + }, + { + "epoch": 3.32, + "learning_rate": 3.384718498659518e-05, + "loss": 0.1709, + "step": 1255 + }, + { + "epoch": 3.32, + "learning_rate": 3.3833780160857906e-05, + "loss": 0.0109, + "step": 1256 + }, + { + "epoch": 3.33, + "learning_rate": 3.382037533512065e-05, + "loss": 0.2874, + "step": 1257 + }, + { + "epoch": 3.33, + "learning_rate": 3.3806970509383376e-05, + "loss": 0.024, + "step": 1258 + }, + { + "epoch": 3.33, + "learning_rate": 3.379356568364612e-05, + "loss": 0.0131, + "step": 1259 + }, + { + "epoch": 3.33, + "learning_rate": 3.3780160857908846e-05, + "loss": 0.2076, + "step": 1260 + }, + { + "epoch": 3.34, + "learning_rate": 3.376675603217158e-05, + "loss": 0.0083, + "step": 1261 + }, + { + "epoch": 3.34, + "learning_rate": 3.375335120643432e-05, + "loss": 0.0234, + "step": 1262 + }, + { + "epoch": 3.34, + "learning_rate": 3.373994638069705e-05, + "loss": 0.0066, + "step": 1263 + }, + { + "epoch": 3.34, + "learning_rate": 3.372654155495979e-05, + "loss": 0.3983, + "step": 1264 + }, + { + "epoch": 3.35, + "learning_rate": 3.371313672922252e-05, + "loss": 0.0648, + "step": 1265 + }, + { + "epoch": 3.35, + "learning_rate": 3.369973190348526e-05, + "loss": 0.006, + "step": 1266 + }, + { + "epoch": 3.35, + "learning_rate": 3.368632707774799e-05, + "loss": 0.0807, + "step": 1267 + }, + { + "epoch": 3.35, + "learning_rate": 3.367292225201073e-05, + "loss": 0.0975, + "step": 1268 + }, + { + "epoch": 3.36, + "learning_rate": 3.365951742627346e-05, + "loss": 0.2934, + "step": 1269 + }, + { + "epoch": 3.36, + "learning_rate": 3.36461126005362e-05, + "loss": 0.0869, + "step": 1270 + }, + { + "epoch": 3.36, + "learning_rate": 3.3632707774798926e-05, + "loss": 0.1374, + "step": 1271 + }, + { + "epoch": 3.37, + "learning_rate": 3.361930294906167e-05, + "loss": 0.3314, + "step": 1272 + }, + { + "epoch": 3.37, + "learning_rate": 3.3605898123324396e-05, + "loss": 0.0045, + "step": 1273 + }, + { + "epoch": 3.37, + "learning_rate": 3.359249329758714e-05, + "loss": 0.0536, + "step": 1274 + }, + { + "epoch": 3.37, + "learning_rate": 3.3579088471849867e-05, + "loss": 0.0564, + "step": 1275 + }, + { + "epoch": 3.38, + "learning_rate": 3.35656836461126e-05, + "loss": 0.0689, + "step": 1276 + }, + { + "epoch": 3.38, + "learning_rate": 3.355227882037534e-05, + "loss": 0.5177, + "step": 1277 + }, + { + "epoch": 3.38, + "learning_rate": 3.353887399463807e-05, + "loss": 0.0689, + "step": 1278 + }, + { + "epoch": 3.38, + "learning_rate": 3.352546916890081e-05, + "loss": 0.0664, + "step": 1279 + }, + { + "epoch": 3.39, + "learning_rate": 3.351206434316354e-05, + "loss": 0.0614, + "step": 1280 + }, + { + "epoch": 3.39, + "learning_rate": 3.349865951742627e-05, + "loss": 0.1994, + "step": 1281 + }, + { + "epoch": 3.39, + "learning_rate": 3.348525469168901e-05, + "loss": 0.4769, + "step": 1282 + }, + { + "epoch": 3.39, + "learning_rate": 3.347184986595174e-05, + "loss": 0.1851, + "step": 1283 + }, + { + "epoch": 3.4, + "learning_rate": 3.345844504021448e-05, + "loss": 0.0092, + "step": 1284 + }, + { + "epoch": 3.4, + "learning_rate": 3.344504021447721e-05, + "loss": 0.0052, + "step": 1285 + }, + { + "epoch": 3.4, + "learning_rate": 3.3431635388739946e-05, + "loss": 0.0095, + "step": 1286 + }, + { + "epoch": 3.4, + "learning_rate": 3.341823056300268e-05, + "loss": 0.0242, + "step": 1287 + }, + { + "epoch": 3.41, + "learning_rate": 3.3404825737265416e-05, + "loss": 0.0565, + "step": 1288 + }, + { + "epoch": 3.41, + "learning_rate": 3.339142091152815e-05, + "loss": 0.2645, + "step": 1289 + }, + { + "epoch": 3.41, + "learning_rate": 3.337801608579089e-05, + "loss": 0.0049, + "step": 1290 + }, + { + "epoch": 3.42, + "learning_rate": 3.336461126005362e-05, + "loss": 0.0929, + "step": 1291 + }, + { + "epoch": 3.42, + "learning_rate": 3.335120643431636e-05, + "loss": 0.3968, + "step": 1292 + }, + { + "epoch": 3.42, + "learning_rate": 3.333780160857909e-05, + "loss": 0.033, + "step": 1293 + }, + { + "epoch": 3.42, + "learning_rate": 3.332439678284183e-05, + "loss": 0.007, + "step": 1294 + }, + { + "epoch": 3.43, + "learning_rate": 3.331099195710456e-05, + "loss": 0.2552, + "step": 1295 + }, + { + "epoch": 3.43, + "learning_rate": 3.329758713136729e-05, + "loss": 0.004, + "step": 1296 + }, + { + "epoch": 3.43, + "learning_rate": 3.328418230563003e-05, + "loss": 0.136, + "step": 1297 + }, + { + "epoch": 3.43, + "learning_rate": 3.327077747989276e-05, + "loss": 0.1407, + "step": 1298 + }, + { + "epoch": 3.44, + "learning_rate": 3.32573726541555e-05, + "loss": 0.0354, + "step": 1299 + }, + { + "epoch": 3.44, + "learning_rate": 3.324396782841823e-05, + "loss": 0.6141, + "step": 1300 + }, + { + "epoch": 3.44, + "learning_rate": 3.3230563002680966e-05, + "loss": 0.2544, + "step": 1301 + }, + { + "epoch": 3.44, + "learning_rate": 3.32171581769437e-05, + "loss": 0.0046, + "step": 1302 + }, + { + "epoch": 3.45, + "learning_rate": 3.320375335120644e-05, + "loss": 0.0126, + "step": 1303 + }, + { + "epoch": 3.45, + "learning_rate": 3.319034852546917e-05, + "loss": 0.3506, + "step": 1304 + }, + { + "epoch": 3.45, + "learning_rate": 3.317694369973191e-05, + "loss": 0.3512, + "step": 1305 + }, + { + "epoch": 3.46, + "learning_rate": 3.3163538873994635e-05, + "loss": 0.3675, + "step": 1306 + }, + { + "epoch": 3.46, + "learning_rate": 3.315013404825738e-05, + "loss": 0.1676, + "step": 1307 + }, + { + "epoch": 3.46, + "learning_rate": 3.3136729222520106e-05, + "loss": 0.0307, + "step": 1308 + }, + { + "epoch": 3.46, + "learning_rate": 3.312332439678285e-05, + "loss": 0.0084, + "step": 1309 + }, + { + "epoch": 3.47, + "learning_rate": 3.3109919571045576e-05, + "loss": 0.1977, + "step": 1310 + }, + { + "epoch": 3.47, + "learning_rate": 3.309651474530831e-05, + "loss": 0.1645, + "step": 1311 + }, + { + "epoch": 3.47, + "learning_rate": 3.3083109919571046e-05, + "loss": 0.2579, + "step": 1312 + }, + { + "epoch": 3.47, + "learning_rate": 3.306970509383378e-05, + "loss": 0.1656, + "step": 1313 + }, + { + "epoch": 3.48, + "learning_rate": 3.3056300268096516e-05, + "loss": 0.0168, + "step": 1314 + }, + { + "epoch": 3.48, + "learning_rate": 3.304289544235925e-05, + "loss": 0.0291, + "step": 1315 + }, + { + "epoch": 3.48, + "learning_rate": 3.302949061662198e-05, + "loss": 0.0146, + "step": 1316 + }, + { + "epoch": 3.48, + "learning_rate": 3.301608579088472e-05, + "loss": 0.0037, + "step": 1317 + }, + { + "epoch": 3.49, + "learning_rate": 3.300268096514745e-05, + "loss": 0.0113, + "step": 1318 + }, + { + "epoch": 3.49, + "learning_rate": 3.298927613941019e-05, + "loss": 0.0734, + "step": 1319 + }, + { + "epoch": 3.49, + "learning_rate": 3.297587131367292e-05, + "loss": 0.0292, + "step": 1320 + }, + { + "epoch": 3.49, + "learning_rate": 3.2962466487935655e-05, + "loss": 0.3875, + "step": 1321 + }, + { + "epoch": 3.5, + "learning_rate": 3.294906166219839e-05, + "loss": 0.0138, + "step": 1322 + }, + { + "epoch": 3.5, + "learning_rate": 3.2935656836461126e-05, + "loss": 0.4653, + "step": 1323 + }, + { + "epoch": 3.5, + "learning_rate": 3.292225201072386e-05, + "loss": 0.1864, + "step": 1324 + }, + { + "epoch": 3.51, + "learning_rate": 3.2908847184986596e-05, + "loss": 0.0116, + "step": 1325 + }, + { + "epoch": 3.51, + "learning_rate": 3.289544235924933e-05, + "loss": 0.014, + "step": 1326 + }, + { + "epoch": 3.51, + "learning_rate": 3.2882037533512066e-05, + "loss": 0.3344, + "step": 1327 + }, + { + "epoch": 3.51, + "learning_rate": 3.28686327077748e-05, + "loss": 0.1544, + "step": 1328 + }, + { + "epoch": 3.52, + "learning_rate": 3.2855227882037537e-05, + "loss": 0.0065, + "step": 1329 + }, + { + "epoch": 3.52, + "learning_rate": 3.284182305630027e-05, + "loss": 0.0041, + "step": 1330 + }, + { + "epoch": 3.52, + "learning_rate": 3.2828418230563e-05, + "loss": 0.0044, + "step": 1331 + }, + { + "epoch": 3.52, + "learning_rate": 3.281501340482574e-05, + "loss": 0.1808, + "step": 1332 + }, + { + "epoch": 3.53, + "learning_rate": 3.280160857908847e-05, + "loss": 0.0521, + "step": 1333 + }, + { + "epoch": 3.53, + "learning_rate": 3.278820375335121e-05, + "loss": 0.3505, + "step": 1334 + }, + { + "epoch": 3.53, + "learning_rate": 3.277479892761394e-05, + "loss": 0.2032, + "step": 1335 + }, + { + "epoch": 3.53, + "learning_rate": 3.2761394101876676e-05, + "loss": 0.004, + "step": 1336 + }, + { + "epoch": 3.54, + "learning_rate": 3.274798927613941e-05, + "loss": 0.0343, + "step": 1337 + }, + { + "epoch": 3.54, + "learning_rate": 3.2734584450402146e-05, + "loss": 0.278, + "step": 1338 + }, + { + "epoch": 3.54, + "learning_rate": 3.272117962466488e-05, + "loss": 0.0056, + "step": 1339 + }, + { + "epoch": 3.54, + "learning_rate": 3.2707774798927616e-05, + "loss": 0.1673, + "step": 1340 + }, + { + "epoch": 3.55, + "learning_rate": 3.2694369973190345e-05, + "loss": 0.0092, + "step": 1341 + }, + { + "epoch": 3.55, + "learning_rate": 3.2680965147453086e-05, + "loss": 0.0058, + "step": 1342 + }, + { + "epoch": 3.55, + "learning_rate": 3.2667560321715815e-05, + "loss": 0.097, + "step": 1343 + }, + { + "epoch": 3.56, + "learning_rate": 3.265415549597856e-05, + "loss": 0.2138, + "step": 1344 + }, + { + "epoch": 3.56, + "learning_rate": 3.2640750670241285e-05, + "loss": 0.0077, + "step": 1345 + }, + { + "epoch": 3.56, + "learning_rate": 3.262734584450402e-05, + "loss": 0.2294, + "step": 1346 + }, + { + "epoch": 3.56, + "learning_rate": 3.2613941018766755e-05, + "loss": 0.3282, + "step": 1347 + }, + { + "epoch": 3.57, + "learning_rate": 3.260053619302949e-05, + "loss": 0.233, + "step": 1348 + }, + { + "epoch": 3.57, + "learning_rate": 3.2587131367292226e-05, + "loss": 0.0379, + "step": 1349 + }, + { + "epoch": 3.57, + "learning_rate": 3.257372654155496e-05, + "loss": 0.2168, + "step": 1350 + }, + { + "epoch": 3.57, + "learning_rate": 3.2560321715817696e-05, + "loss": 0.0443, + "step": 1351 + }, + { + "epoch": 3.58, + "learning_rate": 3.254691689008043e-05, + "loss": 0.2665, + "step": 1352 + }, + { + "epoch": 3.58, + "learning_rate": 3.2533512064343166e-05, + "loss": 0.0136, + "step": 1353 + }, + { + "epoch": 3.58, + "learning_rate": 3.25201072386059e-05, + "loss": 0.0035, + "step": 1354 + }, + { + "epoch": 3.58, + "learning_rate": 3.2506702412868636e-05, + "loss": 0.2153, + "step": 1355 + }, + { + "epoch": 3.59, + "learning_rate": 3.249329758713137e-05, + "loss": 0.088, + "step": 1356 + }, + { + "epoch": 3.59, + "learning_rate": 3.247989276139411e-05, + "loss": 0.0074, + "step": 1357 + }, + { + "epoch": 3.59, + "learning_rate": 3.2466487935656835e-05, + "loss": 0.0924, + "step": 1358 + }, + { + "epoch": 3.6, + "learning_rate": 3.245308310991958e-05, + "loss": 0.0171, + "step": 1359 + }, + { + "epoch": 3.6, + "learning_rate": 3.2439678284182305e-05, + "loss": 0.0132, + "step": 1360 + }, + { + "epoch": 3.6, + "learning_rate": 3.242627345844505e-05, + "loss": 0.0583, + "step": 1361 + }, + { + "epoch": 3.6, + "learning_rate": 3.2412868632707776e-05, + "loss": 0.0038, + "step": 1362 + }, + { + "epoch": 3.61, + "learning_rate": 3.239946380697051e-05, + "loss": 0.0846, + "step": 1363 + }, + { + "epoch": 3.61, + "learning_rate": 3.2386058981233246e-05, + "loss": 0.0058, + "step": 1364 + }, + { + "epoch": 3.61, + "learning_rate": 3.237265415549598e-05, + "loss": 0.4456, + "step": 1365 + }, + { + "epoch": 3.61, + "learning_rate": 3.2359249329758716e-05, + "loss": 0.0029, + "step": 1366 + }, + { + "epoch": 3.62, + "learning_rate": 3.234584450402145e-05, + "loss": 0.2553, + "step": 1367 + }, + { + "epoch": 3.62, + "learning_rate": 3.233243967828418e-05, + "loss": 0.0936, + "step": 1368 + }, + { + "epoch": 3.62, + "learning_rate": 3.231903485254692e-05, + "loss": 0.1017, + "step": 1369 + }, + { + "epoch": 3.62, + "learning_rate": 3.230563002680965e-05, + "loss": 0.0379, + "step": 1370 + }, + { + "epoch": 3.63, + "learning_rate": 3.229222520107239e-05, + "loss": 0.0069, + "step": 1371 + }, + { + "epoch": 3.63, + "learning_rate": 3.227882037533512e-05, + "loss": 0.3235, + "step": 1372 + }, + { + "epoch": 3.63, + "learning_rate": 3.2265415549597855e-05, + "loss": 0.3796, + "step": 1373 + }, + { + "epoch": 3.63, + "learning_rate": 3.225201072386059e-05, + "loss": 0.3246, + "step": 1374 + }, + { + "epoch": 3.64, + "learning_rate": 3.2238605898123325e-05, + "loss": 0.0059, + "step": 1375 + }, + { + "epoch": 3.64, + "learning_rate": 3.222520107238606e-05, + "loss": 0.0405, + "step": 1376 + }, + { + "epoch": 3.64, + "learning_rate": 3.2211796246648796e-05, + "loss": 0.0142, + "step": 1377 + }, + { + "epoch": 3.65, + "learning_rate": 3.219839142091153e-05, + "loss": 0.4426, + "step": 1378 + }, + { + "epoch": 3.65, + "learning_rate": 3.2184986595174266e-05, + "loss": 0.0249, + "step": 1379 + }, + { + "epoch": 3.65, + "learning_rate": 3.2171581769437e-05, + "loss": 0.1053, + "step": 1380 + }, + { + "epoch": 3.65, + "learning_rate": 3.2158176943699736e-05, + "loss": 0.0179, + "step": 1381 + }, + { + "epoch": 3.66, + "learning_rate": 3.214477211796247e-05, + "loss": 0.0718, + "step": 1382 + }, + { + "epoch": 3.66, + "learning_rate": 3.21313672922252e-05, + "loss": 0.1431, + "step": 1383 + }, + { + "epoch": 3.66, + "learning_rate": 3.211796246648794e-05, + "loss": 0.2391, + "step": 1384 + }, + { + "epoch": 3.66, + "learning_rate": 3.210455764075067e-05, + "loss": 0.0053, + "step": 1385 + }, + { + "epoch": 3.67, + "learning_rate": 3.209115281501341e-05, + "loss": 0.2935, + "step": 1386 + }, + { + "epoch": 3.67, + "learning_rate": 3.207774798927614e-05, + "loss": 0.0071, + "step": 1387 + }, + { + "epoch": 3.67, + "learning_rate": 3.2064343163538875e-05, + "loss": 0.031, + "step": 1388 + }, + { + "epoch": 3.67, + "learning_rate": 3.205093833780161e-05, + "loss": 0.1989, + "step": 1389 + }, + { + "epoch": 3.68, + "learning_rate": 3.2037533512064346e-05, + "loss": 0.0533, + "step": 1390 + }, + { + "epoch": 3.68, + "learning_rate": 3.202412868632708e-05, + "loss": 0.2408, + "step": 1391 + }, + { + "epoch": 3.68, + "learning_rate": 3.2010723860589816e-05, + "loss": 0.3158, + "step": 1392 + }, + { + "epoch": 3.69, + "learning_rate": 3.1997319034852544e-05, + "loss": 0.3629, + "step": 1393 + }, + { + "epoch": 3.69, + "learning_rate": 3.1983914209115286e-05, + "loss": 0.0122, + "step": 1394 + }, + { + "epoch": 3.69, + "learning_rate": 3.1970509383378014e-05, + "loss": 0.0449, + "step": 1395 + }, + { + "epoch": 3.69, + "learning_rate": 3.1957104557640756e-05, + "loss": 0.1273, + "step": 1396 + }, + { + "epoch": 3.7, + "learning_rate": 3.1943699731903485e-05, + "loss": 0.3401, + "step": 1397 + }, + { + "epoch": 3.7, + "learning_rate": 3.193029490616622e-05, + "loss": 0.0183, + "step": 1398 + }, + { + "epoch": 3.7, + "learning_rate": 3.1916890080428955e-05, + "loss": 0.0526, + "step": 1399 + }, + { + "epoch": 3.7, + "learning_rate": 3.190348525469169e-05, + "loss": 0.5037, + "step": 1400 + }, + { + "epoch": 3.71, + "learning_rate": 3.1890080428954425e-05, + "loss": 0.0059, + "step": 1401 + }, + { + "epoch": 3.71, + "learning_rate": 3.187667560321716e-05, + "loss": 0.0266, + "step": 1402 + }, + { + "epoch": 3.71, + "learning_rate": 3.1863270777479896e-05, + "loss": 0.4095, + "step": 1403 + }, + { + "epoch": 3.71, + "learning_rate": 3.184986595174263e-05, + "loss": 0.1802, + "step": 1404 + }, + { + "epoch": 3.72, + "learning_rate": 3.1836461126005366e-05, + "loss": 0.3586, + "step": 1405 + }, + { + "epoch": 3.72, + "learning_rate": 3.18230563002681e-05, + "loss": 0.2058, + "step": 1406 + }, + { + "epoch": 3.72, + "learning_rate": 3.1809651474530836e-05, + "loss": 0.008, + "step": 1407 + }, + { + "epoch": 3.72, + "learning_rate": 3.1796246648793564e-05, + "loss": 0.0282, + "step": 1408 + }, + { + "epoch": 3.73, + "learning_rate": 3.1782841823056306e-05, + "loss": 0.0077, + "step": 1409 + }, + { + "epoch": 3.73, + "learning_rate": 3.1769436997319035e-05, + "loss": 0.3461, + "step": 1410 + }, + { + "epoch": 3.73, + "learning_rate": 3.1756032171581777e-05, + "loss": 0.0038, + "step": 1411 + }, + { + "epoch": 3.74, + "learning_rate": 3.1742627345844505e-05, + "loss": 0.0087, + "step": 1412 + }, + { + "epoch": 3.74, + "learning_rate": 3.172922252010724e-05, + "loss": 0.8254, + "step": 1413 + }, + { + "epoch": 3.74, + "learning_rate": 3.1715817694369975e-05, + "loss": 0.017, + "step": 1414 + }, + { + "epoch": 3.74, + "learning_rate": 3.170241286863271e-05, + "loss": 0.2954, + "step": 1415 + }, + { + "epoch": 3.75, + "learning_rate": 3.1689008042895445e-05, + "loss": 0.0286, + "step": 1416 + }, + { + "epoch": 3.75, + "learning_rate": 3.167560321715818e-05, + "loss": 0.0454, + "step": 1417 + }, + { + "epoch": 3.75, + "learning_rate": 3.166219839142091e-05, + "loss": 0.222, + "step": 1418 + }, + { + "epoch": 3.75, + "learning_rate": 3.164879356568365e-05, + "loss": 0.0225, + "step": 1419 + }, + { + "epoch": 3.76, + "learning_rate": 3.163538873994638e-05, + "loss": 0.2599, + "step": 1420 + }, + { + "epoch": 3.76, + "learning_rate": 3.162198391420912e-05, + "loss": 0.2343, + "step": 1421 + }, + { + "epoch": 3.76, + "learning_rate": 3.160857908847185e-05, + "loss": 0.0274, + "step": 1422 + }, + { + "epoch": 3.76, + "learning_rate": 3.1595174262734585e-05, + "loss": 0.0109, + "step": 1423 + }, + { + "epoch": 3.77, + "learning_rate": 3.158176943699732e-05, + "loss": 0.012, + "step": 1424 + }, + { + "epoch": 3.77, + "learning_rate": 3.1568364611260055e-05, + "loss": 0.0267, + "step": 1425 + }, + { + "epoch": 3.77, + "learning_rate": 3.155495978552279e-05, + "loss": 0.0116, + "step": 1426 + }, + { + "epoch": 3.78, + "learning_rate": 3.1541554959785525e-05, + "loss": 0.2563, + "step": 1427 + }, + { + "epoch": 3.78, + "learning_rate": 3.1528150134048253e-05, + "loss": 0.2149, + "step": 1428 + }, + { + "epoch": 3.78, + "learning_rate": 3.1514745308310995e-05, + "loss": 0.2099, + "step": 1429 + }, + { + "epoch": 3.78, + "learning_rate": 3.1501340482573724e-05, + "loss": 0.1445, + "step": 1430 + }, + { + "epoch": 3.79, + "learning_rate": 3.1487935656836466e-05, + "loss": 0.0069, + "step": 1431 + }, + { + "epoch": 3.79, + "learning_rate": 3.1474530831099194e-05, + "loss": 0.3583, + "step": 1432 + }, + { + "epoch": 3.79, + "learning_rate": 3.146112600536193e-05, + "loss": 0.1112, + "step": 1433 + }, + { + "epoch": 3.79, + "learning_rate": 3.1447721179624664e-05, + "loss": 0.5379, + "step": 1434 + }, + { + "epoch": 3.8, + "learning_rate": 3.14343163538874e-05, + "loss": 0.0248, + "step": 1435 + }, + { + "epoch": 3.8, + "learning_rate": 3.1420911528150135e-05, + "loss": 0.0255, + "step": 1436 + }, + { + "epoch": 3.8, + "learning_rate": 3.140750670241287e-05, + "loss": 0.3363, + "step": 1437 + }, + { + "epoch": 3.8, + "learning_rate": 3.1394101876675605e-05, + "loss": 0.2952, + "step": 1438 + }, + { + "epoch": 3.81, + "learning_rate": 3.138069705093834e-05, + "loss": 0.0337, + "step": 1439 + }, + { + "epoch": 3.81, + "learning_rate": 3.1367292225201075e-05, + "loss": 0.0157, + "step": 1440 + }, + { + "epoch": 3.81, + "learning_rate": 3.135388739946381e-05, + "loss": 0.0204, + "step": 1441 + }, + { + "epoch": 3.81, + "learning_rate": 3.1340482573726545e-05, + "loss": 0.7707, + "step": 1442 + }, + { + "epoch": 3.82, + "learning_rate": 3.1327077747989274e-05, + "loss": 0.4232, + "step": 1443 + }, + { + "epoch": 3.82, + "learning_rate": 3.1313672922252016e-05, + "loss": 0.116, + "step": 1444 + }, + { + "epoch": 3.82, + "learning_rate": 3.1300268096514744e-05, + "loss": 0.421, + "step": 1445 + }, + { + "epoch": 3.83, + "learning_rate": 3.1286863270777486e-05, + "loss": 0.0267, + "step": 1446 + }, + { + "epoch": 3.83, + "learning_rate": 3.1273458445040214e-05, + "loss": 0.0078, + "step": 1447 + }, + { + "epoch": 3.83, + "learning_rate": 3.126005361930295e-05, + "loss": 0.0996, + "step": 1448 + }, + { + "epoch": 3.83, + "learning_rate": 3.1246648793565684e-05, + "loss": 0.0389, + "step": 1449 + }, + { + "epoch": 3.84, + "learning_rate": 3.123324396782842e-05, + "loss": 0.0482, + "step": 1450 + }, + { + "epoch": 3.84, + "learning_rate": 3.1219839142091155e-05, + "loss": 0.0053, + "step": 1451 + }, + { + "epoch": 3.84, + "learning_rate": 3.120643431635389e-05, + "loss": 0.0153, + "step": 1452 + }, + { + "epoch": 3.84, + "learning_rate": 3.119302949061662e-05, + "loss": 0.008, + "step": 1453 + }, + { + "epoch": 3.85, + "learning_rate": 3.117962466487936e-05, + "loss": 0.0166, + "step": 1454 + }, + { + "epoch": 3.85, + "learning_rate": 3.116621983914209e-05, + "loss": 0.0889, + "step": 1455 + }, + { + "epoch": 3.85, + "learning_rate": 3.115281501340483e-05, + "loss": 0.0695, + "step": 1456 + }, + { + "epoch": 3.85, + "learning_rate": 3.113941018766756e-05, + "loss": 0.3353, + "step": 1457 + }, + { + "epoch": 3.86, + "learning_rate": 3.1126005361930294e-05, + "loss": 0.0729, + "step": 1458 + }, + { + "epoch": 3.86, + "learning_rate": 3.111260053619303e-05, + "loss": 0.0187, + "step": 1459 + }, + { + "epoch": 3.86, + "learning_rate": 3.1099195710455764e-05, + "loss": 0.2512, + "step": 1460 + }, + { + "epoch": 3.87, + "learning_rate": 3.10857908847185e-05, + "loss": 0.3837, + "step": 1461 + }, + { + "epoch": 3.87, + "learning_rate": 3.1072386058981234e-05, + "loss": 0.2543, + "step": 1462 + }, + { + "epoch": 3.87, + "learning_rate": 3.105898123324397e-05, + "loss": 0.1797, + "step": 1463 + }, + { + "epoch": 3.87, + "learning_rate": 3.1045576407506705e-05, + "loss": 0.3097, + "step": 1464 + }, + { + "epoch": 3.88, + "learning_rate": 3.103217158176944e-05, + "loss": 0.268, + "step": 1465 + }, + { + "epoch": 3.88, + "learning_rate": 3.1018766756032175e-05, + "loss": 0.1773, + "step": 1466 + }, + { + "epoch": 3.88, + "learning_rate": 3.100536193029491e-05, + "loss": 0.2055, + "step": 1467 + }, + { + "epoch": 3.88, + "learning_rate": 3.099195710455764e-05, + "loss": 0.0279, + "step": 1468 + }, + { + "epoch": 3.89, + "learning_rate": 3.097855227882038e-05, + "loss": 0.1263, + "step": 1469 + }, + { + "epoch": 3.89, + "learning_rate": 3.096514745308311e-05, + "loss": 0.0449, + "step": 1470 + }, + { + "epoch": 3.89, + "learning_rate": 3.095174262734585e-05, + "loss": 0.2429, + "step": 1471 + }, + { + "epoch": 3.89, + "learning_rate": 3.093833780160858e-05, + "loss": 0.1245, + "step": 1472 + }, + { + "epoch": 3.9, + "learning_rate": 3.0924932975871314e-05, + "loss": 0.1303, + "step": 1473 + }, + { + "epoch": 3.9, + "learning_rate": 3.091152815013405e-05, + "loss": 0.0303, + "step": 1474 + }, + { + "epoch": 3.9, + "learning_rate": 3.0898123324396784e-05, + "loss": 0.3279, + "step": 1475 + }, + { + "epoch": 3.9, + "learning_rate": 3.088471849865952e-05, + "loss": 0.134, + "step": 1476 + }, + { + "epoch": 3.91, + "learning_rate": 3.0871313672922255e-05, + "loss": 0.5138, + "step": 1477 + }, + { + "epoch": 3.91, + "learning_rate": 3.085790884718498e-05, + "loss": 0.0476, + "step": 1478 + }, + { + "epoch": 3.91, + "learning_rate": 3.0844504021447725e-05, + "loss": 0.1956, + "step": 1479 + }, + { + "epoch": 3.92, + "learning_rate": 3.083109919571045e-05, + "loss": 0.2061, + "step": 1480 + }, + { + "epoch": 3.92, + "learning_rate": 3.0817694369973195e-05, + "loss": 0.269, + "step": 1481 + }, + { + "epoch": 3.92, + "learning_rate": 3.0804289544235923e-05, + "loss": 0.0708, + "step": 1482 + }, + { + "epoch": 3.92, + "learning_rate": 3.0790884718498665e-05, + "loss": 0.0389, + "step": 1483 + }, + { + "epoch": 3.93, + "learning_rate": 3.0777479892761394e-05, + "loss": 0.2566, + "step": 1484 + }, + { + "epoch": 3.93, + "learning_rate": 3.076407506702413e-05, + "loss": 0.0581, + "step": 1485 + }, + { + "epoch": 3.93, + "learning_rate": 3.0750670241286864e-05, + "loss": 0.1527, + "step": 1486 + }, + { + "epoch": 3.93, + "learning_rate": 3.07372654155496e-05, + "loss": 0.3963, + "step": 1487 + }, + { + "epoch": 3.94, + "learning_rate": 3.0723860589812334e-05, + "loss": 0.2241, + "step": 1488 + }, + { + "epoch": 3.94, + "learning_rate": 3.071045576407507e-05, + "loss": 0.1275, + "step": 1489 + }, + { + "epoch": 3.94, + "learning_rate": 3.0697050938337804e-05, + "loss": 0.3148, + "step": 1490 + }, + { + "epoch": 3.94, + "learning_rate": 3.068364611260054e-05, + "loss": 0.1474, + "step": 1491 + }, + { + "epoch": 3.95, + "learning_rate": 3.0670241286863275e-05, + "loss": 0.0233, + "step": 1492 + }, + { + "epoch": 3.95, + "learning_rate": 3.065683646112601e-05, + "loss": 0.1721, + "step": 1493 + }, + { + "epoch": 3.95, + "learning_rate": 3.0643431635388745e-05, + "loss": 0.6024, + "step": 1494 + }, + { + "epoch": 3.96, + "learning_rate": 3.063002680965147e-05, + "loss": 0.1425, + "step": 1495 + }, + { + "epoch": 3.96, + "learning_rate": 3.0616621983914215e-05, + "loss": 0.0311, + "step": 1496 + }, + { + "epoch": 3.96, + "learning_rate": 3.0603217158176944e-05, + "loss": 0.0197, + "step": 1497 + }, + { + "epoch": 3.96, + "learning_rate": 3.0589812332439686e-05, + "loss": 0.0406, + "step": 1498 + }, + { + "epoch": 3.97, + "learning_rate": 3.0576407506702414e-05, + "loss": 0.054, + "step": 1499 + }, + { + "epoch": 3.97, + "learning_rate": 3.056300268096515e-05, + "loss": 0.161, + "step": 1500 + }, + { + "epoch": 3.97, + "learning_rate": 3.0549597855227884e-05, + "loss": 0.0549, + "step": 1501 + }, + { + "epoch": 3.97, + "learning_rate": 3.053619302949062e-05, + "loss": 0.1667, + "step": 1502 + }, + { + "epoch": 3.98, + "learning_rate": 3.0522788203753354e-05, + "loss": 0.1264, + "step": 1503 + }, + { + "epoch": 3.98, + "learning_rate": 3.0509383378016086e-05, + "loss": 0.0133, + "step": 1504 + }, + { + "epoch": 3.98, + "learning_rate": 3.049597855227882e-05, + "loss": 0.0655, + "step": 1505 + }, + { + "epoch": 3.98, + "learning_rate": 3.0482573726541556e-05, + "loss": 0.1054, + "step": 1506 + }, + { + "epoch": 3.99, + "learning_rate": 3.046916890080429e-05, + "loss": 0.0053, + "step": 1507 + }, + { + "epoch": 3.99, + "learning_rate": 3.0455764075067027e-05, + "loss": 0.0347, + "step": 1508 + }, + { + "epoch": 3.99, + "learning_rate": 3.0442359249329762e-05, + "loss": 0.6095, + "step": 1509 + }, + { + "epoch": 3.99, + "learning_rate": 3.0428954423592494e-05, + "loss": 0.1339, + "step": 1510 + }, + { + "epoch": 4.0, + "learning_rate": 3.0415549597855232e-05, + "loss": 0.0088, + "step": 1511 + }, + { + "epoch": 4.0, + "learning_rate": 3.0402144772117964e-05, + "loss": 0.4356, + "step": 1512 + }, + { + "epoch": 4.0, + "eval_f1": 0.7822580645161291, + "eval_loss": 0.6966613531112671, + "eval_runtime": 1.8703, + "eval_samples_per_second": 808.957, + "eval_steps_per_second": 50.794, + "step": 1512 + }, + { + "epoch": 4.0, + "learning_rate": 3.0388739946380702e-05, + "loss": 0.003, + "step": 1513 + }, + { + "epoch": 4.01, + "learning_rate": 3.0375335120643434e-05, + "loss": 0.0067, + "step": 1514 + }, + { + "epoch": 4.01, + "learning_rate": 3.0361930294906166e-05, + "loss": 0.0488, + "step": 1515 + }, + { + "epoch": 4.01, + "learning_rate": 3.0348525469168904e-05, + "loss": 0.0106, + "step": 1516 + }, + { + "epoch": 4.01, + "learning_rate": 3.0335120643431636e-05, + "loss": 0.0098, + "step": 1517 + }, + { + "epoch": 4.02, + "learning_rate": 3.0321715817694375e-05, + "loss": 0.274, + "step": 1518 + }, + { + "epoch": 4.02, + "learning_rate": 3.0308310991957106e-05, + "loss": 0.2007, + "step": 1519 + }, + { + "epoch": 4.02, + "learning_rate": 3.0294906166219838e-05, + "loss": 0.0121, + "step": 1520 + }, + { + "epoch": 4.02, + "learning_rate": 3.0281501340482577e-05, + "loss": 0.0632, + "step": 1521 + }, + { + "epoch": 4.03, + "learning_rate": 3.026809651474531e-05, + "loss": 0.0062, + "step": 1522 + }, + { + "epoch": 4.03, + "learning_rate": 3.0254691689008047e-05, + "loss": 0.0123, + "step": 1523 + }, + { + "epoch": 4.03, + "learning_rate": 3.024128686327078e-05, + "loss": 0.0063, + "step": 1524 + }, + { + "epoch": 4.03, + "learning_rate": 3.022788203753351e-05, + "loss": 0.0102, + "step": 1525 + }, + { + "epoch": 4.04, + "learning_rate": 3.021447721179625e-05, + "loss": 0.0082, + "step": 1526 + }, + { + "epoch": 4.04, + "learning_rate": 3.020107238605898e-05, + "loss": 0.3369, + "step": 1527 + }, + { + "epoch": 4.04, + "learning_rate": 3.018766756032172e-05, + "loss": 0.2587, + "step": 1528 + }, + { + "epoch": 4.04, + "learning_rate": 3.017426273458445e-05, + "loss": 0.0067, + "step": 1529 + }, + { + "epoch": 4.05, + "learning_rate": 3.0160857908847186e-05, + "loss": 0.0021, + "step": 1530 + }, + { + "epoch": 4.05, + "learning_rate": 3.014745308310992e-05, + "loss": 0.0724, + "step": 1531 + }, + { + "epoch": 4.05, + "learning_rate": 3.0134048257372656e-05, + "loss": 0.0074, + "step": 1532 + }, + { + "epoch": 4.06, + "learning_rate": 3.012064343163539e-05, + "loss": 0.0202, + "step": 1533 + }, + { + "epoch": 4.06, + "learning_rate": 3.0107238605898126e-05, + "loss": 0.1435, + "step": 1534 + }, + { + "epoch": 4.06, + "learning_rate": 3.0093833780160858e-05, + "loss": 0.0074, + "step": 1535 + }, + { + "epoch": 4.06, + "learning_rate": 3.0080428954423597e-05, + "loss": 0.4145, + "step": 1536 + }, + { + "epoch": 4.07, + "learning_rate": 3.006702412868633e-05, + "loss": 0.0186, + "step": 1537 + }, + { + "epoch": 4.07, + "learning_rate": 3.0053619302949067e-05, + "loss": 0.1648, + "step": 1538 + }, + { + "epoch": 4.07, + "learning_rate": 3.00402144772118e-05, + "loss": 0.2545, + "step": 1539 + }, + { + "epoch": 4.07, + "learning_rate": 3.002680965147453e-05, + "loss": 0.0016, + "step": 1540 + }, + { + "epoch": 4.08, + "learning_rate": 3.001340482573727e-05, + "loss": 0.0184, + "step": 1541 + }, + { + "epoch": 4.08, + "learning_rate": 3e-05, + "loss": 0.1208, + "step": 1542 + }, + { + "epoch": 4.08, + "learning_rate": 2.998659517426274e-05, + "loss": 0.0021, + "step": 1543 + }, + { + "epoch": 4.08, + "learning_rate": 2.997319034852547e-05, + "loss": 0.0092, + "step": 1544 + }, + { + "epoch": 4.09, + "learning_rate": 2.9959785522788203e-05, + "loss": 0.1514, + "step": 1545 + }, + { + "epoch": 4.09, + "learning_rate": 2.994638069705094e-05, + "loss": 0.0773, + "step": 1546 + }, + { + "epoch": 4.09, + "learning_rate": 2.9932975871313673e-05, + "loss": 0.0093, + "step": 1547 + }, + { + "epoch": 4.1, + "learning_rate": 2.991957104557641e-05, + "loss": 0.0022, + "step": 1548 + }, + { + "epoch": 4.1, + "learning_rate": 2.9906166219839143e-05, + "loss": 0.1765, + "step": 1549 + }, + { + "epoch": 4.1, + "learning_rate": 2.9892761394101875e-05, + "loss": 0.1766, + "step": 1550 + }, + { + "epoch": 4.1, + "learning_rate": 2.9879356568364614e-05, + "loss": 0.0024, + "step": 1551 + }, + { + "epoch": 4.11, + "learning_rate": 2.9865951742627345e-05, + "loss": 0.012, + "step": 1552 + }, + { + "epoch": 4.11, + "learning_rate": 2.9852546916890084e-05, + "loss": 0.0055, + "step": 1553 + }, + { + "epoch": 4.11, + "learning_rate": 2.9839142091152816e-05, + "loss": 0.0088, + "step": 1554 + }, + { + "epoch": 4.11, + "learning_rate": 2.9825737265415547e-05, + "loss": 0.0019, + "step": 1555 + }, + { + "epoch": 4.12, + "learning_rate": 2.9812332439678286e-05, + "loss": 0.0186, + "step": 1556 + }, + { + "epoch": 4.12, + "learning_rate": 2.9798927613941018e-05, + "loss": 0.25, + "step": 1557 + }, + { + "epoch": 4.12, + "learning_rate": 2.9785522788203756e-05, + "loss": 0.0129, + "step": 1558 + }, + { + "epoch": 4.12, + "learning_rate": 2.9772117962466488e-05, + "loss": 0.0048, + "step": 1559 + }, + { + "epoch": 4.13, + "learning_rate": 2.9758713136729223e-05, + "loss": 0.1153, + "step": 1560 + }, + { + "epoch": 4.13, + "learning_rate": 2.9745308310991958e-05, + "loss": 0.1871, + "step": 1561 + }, + { + "epoch": 4.13, + "learning_rate": 2.9731903485254693e-05, + "loss": 0.0087, + "step": 1562 + }, + { + "epoch": 4.13, + "learning_rate": 2.971849865951743e-05, + "loss": 0.0048, + "step": 1563 + }, + { + "epoch": 4.14, + "learning_rate": 2.9705093833780163e-05, + "loss": 0.026, + "step": 1564 + }, + { + "epoch": 4.14, + "learning_rate": 2.9691689008042895e-05, + "loss": 0.3336, + "step": 1565 + }, + { + "epoch": 4.14, + "learning_rate": 2.9678284182305634e-05, + "loss": 0.0015, + "step": 1566 + }, + { + "epoch": 4.15, + "learning_rate": 2.9664879356568365e-05, + "loss": 0.0044, + "step": 1567 + }, + { + "epoch": 4.15, + "learning_rate": 2.9651474530831104e-05, + "loss": 0.0035, + "step": 1568 + }, + { + "epoch": 4.15, + "learning_rate": 2.9638069705093836e-05, + "loss": 0.1206, + "step": 1569 + }, + { + "epoch": 4.15, + "learning_rate": 2.9624664879356567e-05, + "loss": 0.1247, + "step": 1570 + }, + { + "epoch": 4.16, + "learning_rate": 2.9611260053619306e-05, + "loss": 0.0011, + "step": 1571 + }, + { + "epoch": 4.16, + "learning_rate": 2.9597855227882038e-05, + "loss": 0.0023, + "step": 1572 + }, + { + "epoch": 4.16, + "learning_rate": 2.9584450402144776e-05, + "loss": 0.0014, + "step": 1573 + }, + { + "epoch": 4.16, + "learning_rate": 2.9571045576407508e-05, + "loss": 0.2967, + "step": 1574 + }, + { + "epoch": 4.17, + "learning_rate": 2.955764075067024e-05, + "loss": 0.0373, + "step": 1575 + }, + { + "epoch": 4.17, + "learning_rate": 2.9544235924932978e-05, + "loss": 0.3351, + "step": 1576 + }, + { + "epoch": 4.17, + "learning_rate": 2.953083109919571e-05, + "loss": 0.0025, + "step": 1577 + }, + { + "epoch": 4.17, + "learning_rate": 2.951742627345845e-05, + "loss": 0.0025, + "step": 1578 + }, + { + "epoch": 4.18, + "learning_rate": 2.950402144772118e-05, + "loss": 0.0182, + "step": 1579 + }, + { + "epoch": 4.18, + "learning_rate": 2.9490616621983912e-05, + "loss": 0.001, + "step": 1580 + }, + { + "epoch": 4.18, + "learning_rate": 2.947721179624665e-05, + "loss": 0.003, + "step": 1581 + }, + { + "epoch": 4.19, + "learning_rate": 2.9463806970509382e-05, + "loss": 0.0038, + "step": 1582 + }, + { + "epoch": 4.19, + "learning_rate": 2.945040214477212e-05, + "loss": 0.002, + "step": 1583 + }, + { + "epoch": 4.19, + "learning_rate": 2.9436997319034853e-05, + "loss": 0.1688, + "step": 1584 + }, + { + "epoch": 4.19, + "learning_rate": 2.9423592493297584e-05, + "loss": 0.0014, + "step": 1585 + }, + { + "epoch": 4.2, + "learning_rate": 2.9410187667560323e-05, + "loss": 0.2664, + "step": 1586 + }, + { + "epoch": 4.2, + "learning_rate": 2.9396782841823055e-05, + "loss": 0.0012, + "step": 1587 + }, + { + "epoch": 4.2, + "learning_rate": 2.9383378016085793e-05, + "loss": 0.0022, + "step": 1588 + }, + { + "epoch": 4.2, + "learning_rate": 2.9369973190348525e-05, + "loss": 0.0959, + "step": 1589 + }, + { + "epoch": 4.21, + "learning_rate": 2.935656836461126e-05, + "loss": 0.0839, + "step": 1590 + }, + { + "epoch": 4.21, + "learning_rate": 2.9343163538873995e-05, + "loss": 0.7405, + "step": 1591 + }, + { + "epoch": 4.21, + "learning_rate": 2.932975871313673e-05, + "loss": 0.0351, + "step": 1592 + }, + { + "epoch": 4.21, + "learning_rate": 2.9316353887399465e-05, + "loss": 0.0025, + "step": 1593 + }, + { + "epoch": 4.22, + "learning_rate": 2.93029490616622e-05, + "loss": 0.0054, + "step": 1594 + }, + { + "epoch": 4.22, + "learning_rate": 2.9289544235924932e-05, + "loss": 0.0043, + "step": 1595 + }, + { + "epoch": 4.22, + "learning_rate": 2.927613941018767e-05, + "loss": 0.1828, + "step": 1596 + }, + { + "epoch": 4.22, + "learning_rate": 2.9262734584450402e-05, + "loss": 0.0022, + "step": 1597 + }, + { + "epoch": 4.23, + "learning_rate": 2.924932975871314e-05, + "loss": 0.0051, + "step": 1598 + }, + { + "epoch": 4.23, + "learning_rate": 2.9235924932975873e-05, + "loss": 0.0025, + "step": 1599 + }, + { + "epoch": 4.23, + "learning_rate": 2.9222520107238604e-05, + "loss": 0.0018, + "step": 1600 + }, + { + "epoch": 4.24, + "learning_rate": 2.9209115281501343e-05, + "loss": 0.0348, + "step": 1601 + }, + { + "epoch": 4.24, + "learning_rate": 2.9195710455764075e-05, + "loss": 0.207, + "step": 1602 + }, + { + "epoch": 4.24, + "learning_rate": 2.9182305630026813e-05, + "loss": 0.0249, + "step": 1603 + }, + { + "epoch": 4.24, + "learning_rate": 2.9168900804289545e-05, + "loss": 0.0028, + "step": 1604 + }, + { + "epoch": 4.25, + "learning_rate": 2.9155495978552283e-05, + "loss": 0.2604, + "step": 1605 + }, + { + "epoch": 4.25, + "learning_rate": 2.9142091152815015e-05, + "loss": 0.2808, + "step": 1606 + }, + { + "epoch": 4.25, + "learning_rate": 2.9128686327077747e-05, + "loss": 0.0289, + "step": 1607 + }, + { + "epoch": 4.25, + "learning_rate": 2.9115281501340486e-05, + "loss": 0.005, + "step": 1608 + }, + { + "epoch": 4.26, + "learning_rate": 2.9101876675603217e-05, + "loss": 0.7931, + "step": 1609 + }, + { + "epoch": 4.26, + "learning_rate": 2.9088471849865956e-05, + "loss": 0.335, + "step": 1610 + }, + { + "epoch": 4.26, + "learning_rate": 2.9075067024128688e-05, + "loss": 0.2779, + "step": 1611 + }, + { + "epoch": 4.26, + "learning_rate": 2.906166219839142e-05, + "loss": 0.1649, + "step": 1612 + }, + { + "epoch": 4.27, + "learning_rate": 2.9048257372654158e-05, + "loss": 0.0081, + "step": 1613 + }, + { + "epoch": 4.27, + "learning_rate": 2.903485254691689e-05, + "loss": 0.0638, + "step": 1614 + }, + { + "epoch": 4.27, + "learning_rate": 2.9021447721179628e-05, + "loss": 0.016, + "step": 1615 + }, + { + "epoch": 4.28, + "learning_rate": 2.900804289544236e-05, + "loss": 0.0025, + "step": 1616 + }, + { + "epoch": 4.28, + "learning_rate": 2.8994638069705095e-05, + "loss": 0.0249, + "step": 1617 + }, + { + "epoch": 4.28, + "learning_rate": 2.898123324396783e-05, + "loss": 0.0291, + "step": 1618 + }, + { + "epoch": 4.28, + "learning_rate": 2.8967828418230565e-05, + "loss": 0.1773, + "step": 1619 + }, + { + "epoch": 4.29, + "learning_rate": 2.89544235924933e-05, + "loss": 0.3452, + "step": 1620 + }, + { + "epoch": 4.29, + "learning_rate": 2.8941018766756035e-05, + "loss": 0.006, + "step": 1621 + }, + { + "epoch": 4.29, + "learning_rate": 2.8927613941018767e-05, + "loss": 0.0054, + "step": 1622 + }, + { + "epoch": 4.29, + "learning_rate": 2.8914209115281506e-05, + "loss": 0.1852, + "step": 1623 + }, + { + "epoch": 4.3, + "learning_rate": 2.8900804289544237e-05, + "loss": 0.4424, + "step": 1624 + }, + { + "epoch": 4.3, + "learning_rate": 2.8887399463806976e-05, + "loss": 0.0063, + "step": 1625 + }, + { + "epoch": 4.3, + "learning_rate": 2.8873994638069708e-05, + "loss": 0.43, + "step": 1626 + }, + { + "epoch": 4.3, + "learning_rate": 2.886058981233244e-05, + "loss": 0.2283, + "step": 1627 + }, + { + "epoch": 4.31, + "learning_rate": 2.8847184986595178e-05, + "loss": 0.0519, + "step": 1628 + }, + { + "epoch": 4.31, + "learning_rate": 2.883378016085791e-05, + "loss": 0.1797, + "step": 1629 + }, + { + "epoch": 4.31, + "learning_rate": 2.8820375335120648e-05, + "loss": 0.2569, + "step": 1630 + }, + { + "epoch": 4.31, + "learning_rate": 2.880697050938338e-05, + "loss": 0.0024, + "step": 1631 + }, + { + "epoch": 4.32, + "learning_rate": 2.8793565683646112e-05, + "loss": 0.1727, + "step": 1632 + }, + { + "epoch": 4.32, + "learning_rate": 2.878016085790885e-05, + "loss": 0.0091, + "step": 1633 + }, + { + "epoch": 4.32, + "learning_rate": 2.8766756032171582e-05, + "loss": 0.2002, + "step": 1634 + }, + { + "epoch": 4.33, + "learning_rate": 2.875335120643432e-05, + "loss": 0.0217, + "step": 1635 + }, + { + "epoch": 4.33, + "learning_rate": 2.8739946380697052e-05, + "loss": 0.2163, + "step": 1636 + }, + { + "epoch": 4.33, + "learning_rate": 2.8726541554959784e-05, + "loss": 0.0065, + "step": 1637 + }, + { + "epoch": 4.33, + "learning_rate": 2.8713136729222522e-05, + "loss": 0.1567, + "step": 1638 + }, + { + "epoch": 4.34, + "learning_rate": 2.8699731903485254e-05, + "loss": 0.1775, + "step": 1639 + }, + { + "epoch": 4.34, + "learning_rate": 2.8686327077747993e-05, + "loss": 0.0116, + "step": 1640 + }, + { + "epoch": 4.34, + "learning_rate": 2.8672922252010724e-05, + "loss": 0.0114, + "step": 1641 + }, + { + "epoch": 4.34, + "learning_rate": 2.8659517426273456e-05, + "loss": 0.0264, + "step": 1642 + }, + { + "epoch": 4.35, + "learning_rate": 2.8646112600536195e-05, + "loss": 0.0172, + "step": 1643 + }, + { + "epoch": 4.35, + "learning_rate": 2.8632707774798926e-05, + "loss": 0.187, + "step": 1644 + }, + { + "epoch": 4.35, + "learning_rate": 2.8619302949061665e-05, + "loss": 0.009, + "step": 1645 + }, + { + "epoch": 4.35, + "learning_rate": 2.8605898123324397e-05, + "loss": 0.014, + "step": 1646 + }, + { + "epoch": 4.36, + "learning_rate": 2.8592493297587132e-05, + "loss": 0.1643, + "step": 1647 + }, + { + "epoch": 4.36, + "learning_rate": 2.8579088471849867e-05, + "loss": 0.2763, + "step": 1648 + }, + { + "epoch": 4.36, + "learning_rate": 2.8565683646112602e-05, + "loss": 0.0641, + "step": 1649 + }, + { + "epoch": 4.37, + "learning_rate": 2.8552278820375337e-05, + "loss": 0.6128, + "step": 1650 + }, + { + "epoch": 4.37, + "learning_rate": 2.8538873994638072e-05, + "loss": 0.0229, + "step": 1651 + }, + { + "epoch": 4.37, + "learning_rate": 2.8525469168900804e-05, + "loss": 0.0344, + "step": 1652 + }, + { + "epoch": 4.37, + "learning_rate": 2.8512064343163543e-05, + "loss": 0.018, + "step": 1653 + }, + { + "epoch": 4.38, + "learning_rate": 2.8498659517426274e-05, + "loss": 0.191, + "step": 1654 + }, + { + "epoch": 4.38, + "learning_rate": 2.8485254691689013e-05, + "loss": 0.0397, + "step": 1655 + }, + { + "epoch": 4.38, + "learning_rate": 2.8471849865951745e-05, + "loss": 0.0029, + "step": 1656 + }, + { + "epoch": 4.38, + "learning_rate": 2.8458445040214476e-05, + "loss": 0.0034, + "step": 1657 + }, + { + "epoch": 4.39, + "learning_rate": 2.8445040214477215e-05, + "loss": 0.0031, + "step": 1658 + }, + { + "epoch": 4.39, + "learning_rate": 2.8431635388739947e-05, + "loss": 0.4272, + "step": 1659 + }, + { + "epoch": 4.39, + "learning_rate": 2.8418230563002685e-05, + "loss": 0.0042, + "step": 1660 + }, + { + "epoch": 4.39, + "learning_rate": 2.8404825737265417e-05, + "loss": 0.0224, + "step": 1661 + }, + { + "epoch": 4.4, + "learning_rate": 2.839142091152815e-05, + "loss": 0.1021, + "step": 1662 + }, + { + "epoch": 4.4, + "learning_rate": 2.8378016085790887e-05, + "loss": 0.0076, + "step": 1663 + }, + { + "epoch": 4.4, + "learning_rate": 2.836461126005362e-05, + "loss": 0.084, + "step": 1664 + }, + { + "epoch": 4.4, + "learning_rate": 2.8351206434316357e-05, + "loss": 0.0321, + "step": 1665 + }, + { + "epoch": 4.41, + "learning_rate": 2.833780160857909e-05, + "loss": 0.1369, + "step": 1666 + }, + { + "epoch": 4.41, + "learning_rate": 2.832439678284182e-05, + "loss": 0.018, + "step": 1667 + }, + { + "epoch": 4.41, + "learning_rate": 2.831099195710456e-05, + "loss": 0.1886, + "step": 1668 + }, + { + "epoch": 4.42, + "learning_rate": 2.829758713136729e-05, + "loss": 0.0016, + "step": 1669 + }, + { + "epoch": 4.42, + "learning_rate": 2.828418230563003e-05, + "loss": 0.0031, + "step": 1670 + }, + { + "epoch": 4.42, + "learning_rate": 2.827077747989276e-05, + "loss": 0.0043, + "step": 1671 + }, + { + "epoch": 4.42, + "learning_rate": 2.8257372654155497e-05, + "loss": 0.1202, + "step": 1672 + }, + { + "epoch": 4.43, + "learning_rate": 2.8243967828418232e-05, + "loss": 0.1409, + "step": 1673 + }, + { + "epoch": 4.43, + "learning_rate": 2.8230563002680967e-05, + "loss": 0.0821, + "step": 1674 + }, + { + "epoch": 4.43, + "learning_rate": 2.8217158176943702e-05, + "loss": 0.0468, + "step": 1675 + }, + { + "epoch": 4.43, + "learning_rate": 2.8203753351206437e-05, + "loss": 0.0559, + "step": 1676 + }, + { + "epoch": 4.44, + "learning_rate": 2.819034852546917e-05, + "loss": 0.0192, + "step": 1677 + }, + { + "epoch": 4.44, + "learning_rate": 2.8176943699731907e-05, + "loss": 0.0024, + "step": 1678 + }, + { + "epoch": 4.44, + "learning_rate": 2.816353887399464e-05, + "loss": 0.0021, + "step": 1679 + }, + { + "epoch": 4.44, + "learning_rate": 2.8150134048257378e-05, + "loss": 0.0139, + "step": 1680 + }, + { + "epoch": 4.45, + "learning_rate": 2.813672922252011e-05, + "loss": 0.0042, + "step": 1681 + }, + { + "epoch": 4.45, + "learning_rate": 2.812332439678284e-05, + "loss": 0.1666, + "step": 1682 + }, + { + "epoch": 4.45, + "learning_rate": 2.810991957104558e-05, + "loss": 0.5925, + "step": 1683 + }, + { + "epoch": 4.46, + "learning_rate": 2.809651474530831e-05, + "loss": 0.1689, + "step": 1684 + }, + { + "epoch": 4.46, + "learning_rate": 2.808310991957105e-05, + "loss": 0.0053, + "step": 1685 + }, + { + "epoch": 4.46, + "learning_rate": 2.806970509383378e-05, + "loss": 0.0019, + "step": 1686 + }, + { + "epoch": 4.46, + "learning_rate": 2.8056300268096513e-05, + "loss": 0.0632, + "step": 1687 + }, + { + "epoch": 4.47, + "learning_rate": 2.8042895442359252e-05, + "loss": 0.0115, + "step": 1688 + }, + { + "epoch": 4.47, + "learning_rate": 2.8029490616621984e-05, + "loss": 0.002, + "step": 1689 + }, + { + "epoch": 4.47, + "learning_rate": 2.8016085790884722e-05, + "loss": 0.0021, + "step": 1690 + }, + { + "epoch": 4.47, + "learning_rate": 2.8002680965147454e-05, + "loss": 0.0079, + "step": 1691 + }, + { + "epoch": 4.48, + "learning_rate": 2.7989276139410186e-05, + "loss": 0.0016, + "step": 1692 + }, + { + "epoch": 4.48, + "learning_rate": 2.7975871313672924e-05, + "loss": 0.1824, + "step": 1693 + }, + { + "epoch": 4.48, + "learning_rate": 2.7962466487935656e-05, + "loss": 0.1025, + "step": 1694 + }, + { + "epoch": 4.48, + "learning_rate": 2.7949061662198394e-05, + "loss": 0.4274, + "step": 1695 + }, + { + "epoch": 4.49, + "learning_rate": 2.7935656836461126e-05, + "loss": 0.0834, + "step": 1696 + }, + { + "epoch": 4.49, + "learning_rate": 2.7922252010723858e-05, + "loss": 0.6412, + "step": 1697 + }, + { + "epoch": 4.49, + "learning_rate": 2.7908847184986596e-05, + "loss": 0.3051, + "step": 1698 + }, + { + "epoch": 4.49, + "learning_rate": 2.7895442359249328e-05, + "loss": 0.0909, + "step": 1699 + }, + { + "epoch": 4.5, + "learning_rate": 2.7882037533512067e-05, + "loss": 0.2655, + "step": 1700 + }, + { + "epoch": 4.5, + "learning_rate": 2.78686327077748e-05, + "loss": 0.305, + "step": 1701 + }, + { + "epoch": 4.5, + "learning_rate": 2.7855227882037534e-05, + "loss": 0.2733, + "step": 1702 + }, + { + "epoch": 4.51, + "learning_rate": 2.784182305630027e-05, + "loss": 0.0021, + "step": 1703 + }, + { + "epoch": 4.51, + "learning_rate": 2.7828418230563004e-05, + "loss": 0.0072, + "step": 1704 + }, + { + "epoch": 4.51, + "learning_rate": 2.781501340482574e-05, + "loss": 0.0027, + "step": 1705 + }, + { + "epoch": 4.51, + "learning_rate": 2.7801608579088474e-05, + "loss": 0.184, + "step": 1706 + }, + { + "epoch": 4.52, + "learning_rate": 2.7788203753351206e-05, + "loss": 0.0143, + "step": 1707 + }, + { + "epoch": 4.52, + "learning_rate": 2.7774798927613944e-05, + "loss": 0.0297, + "step": 1708 + }, + { + "epoch": 4.52, + "learning_rate": 2.7761394101876676e-05, + "loss": 0.0739, + "step": 1709 + }, + { + "epoch": 4.52, + "learning_rate": 2.7747989276139415e-05, + "loss": 0.0188, + "step": 1710 + }, + { + "epoch": 4.53, + "learning_rate": 2.7734584450402146e-05, + "loss": 0.2487, + "step": 1711 + }, + { + "epoch": 4.53, + "learning_rate": 2.7721179624664878e-05, + "loss": 0.0222, + "step": 1712 + }, + { + "epoch": 4.53, + "learning_rate": 2.7707774798927617e-05, + "loss": 0.0041, + "step": 1713 + }, + { + "epoch": 4.53, + "learning_rate": 2.769436997319035e-05, + "loss": 0.0164, + "step": 1714 + }, + { + "epoch": 4.54, + "learning_rate": 2.7680965147453087e-05, + "loss": 0.0985, + "step": 1715 + }, + { + "epoch": 4.54, + "learning_rate": 2.766756032171582e-05, + "loss": 0.0067, + "step": 1716 + }, + { + "epoch": 4.54, + "learning_rate": 2.765415549597855e-05, + "loss": 0.3304, + "step": 1717 + }, + { + "epoch": 4.54, + "learning_rate": 2.764075067024129e-05, + "loss": 0.006, + "step": 1718 + }, + { + "epoch": 4.55, + "learning_rate": 2.762734584450402e-05, + "loss": 0.0142, + "step": 1719 + }, + { + "epoch": 4.55, + "learning_rate": 2.761394101876676e-05, + "loss": 0.2205, + "step": 1720 + }, + { + "epoch": 4.55, + "learning_rate": 2.760053619302949e-05, + "loss": 0.298, + "step": 1721 + }, + { + "epoch": 4.56, + "learning_rate": 2.7587131367292223e-05, + "loss": 0.0041, + "step": 1722 + }, + { + "epoch": 4.56, + "learning_rate": 2.757372654155496e-05, + "loss": 0.0018, + "step": 1723 + }, + { + "epoch": 4.56, + "learning_rate": 2.7560321715817693e-05, + "loss": 0.0185, + "step": 1724 + }, + { + "epoch": 4.56, + "learning_rate": 2.754691689008043e-05, + "loss": 0.0042, + "step": 1725 + }, + { + "epoch": 4.57, + "learning_rate": 2.7533512064343163e-05, + "loss": 0.036, + "step": 1726 + }, + { + "epoch": 4.57, + "learning_rate": 2.7520107238605898e-05, + "loss": 0.2593, + "step": 1727 + }, + { + "epoch": 4.57, + "learning_rate": 2.7506702412868633e-05, + "loss": 0.0062, + "step": 1728 + }, + { + "epoch": 4.57, + "learning_rate": 2.749329758713137e-05, + "loss": 0.1759, + "step": 1729 + }, + { + "epoch": 4.58, + "learning_rate": 2.7479892761394104e-05, + "loss": 0.0202, + "step": 1730 + }, + { + "epoch": 4.58, + "learning_rate": 2.746648793565684e-05, + "loss": 0.2156, + "step": 1731 + }, + { + "epoch": 4.58, + "learning_rate": 2.7453083109919574e-05, + "loss": 0.4112, + "step": 1732 + }, + { + "epoch": 4.58, + "learning_rate": 2.743967828418231e-05, + "loss": 0.0037, + "step": 1733 + }, + { + "epoch": 4.59, + "learning_rate": 2.742627345844504e-05, + "loss": 0.0186, + "step": 1734 + }, + { + "epoch": 4.59, + "learning_rate": 2.741286863270778e-05, + "loss": 0.0117, + "step": 1735 + }, + { + "epoch": 4.59, + "learning_rate": 2.739946380697051e-05, + "loss": 0.0039, + "step": 1736 + }, + { + "epoch": 4.6, + "learning_rate": 2.738605898123325e-05, + "loss": 0.1185, + "step": 1737 + }, + { + "epoch": 4.6, + "learning_rate": 2.737265415549598e-05, + "loss": 0.0276, + "step": 1738 + }, + { + "epoch": 4.6, + "learning_rate": 2.7359249329758713e-05, + "loss": 0.0041, + "step": 1739 + }, + { + "epoch": 4.6, + "learning_rate": 2.734584450402145e-05, + "loss": 0.0133, + "step": 1740 + }, + { + "epoch": 4.61, + "learning_rate": 2.7332439678284183e-05, + "loss": 0.1042, + "step": 1741 + }, + { + "epoch": 4.61, + "learning_rate": 2.7319034852546922e-05, + "loss": 0.0023, + "step": 1742 + }, + { + "epoch": 4.61, + "learning_rate": 2.7305630026809654e-05, + "loss": 0.1586, + "step": 1743 + }, + { + "epoch": 4.61, + "learning_rate": 2.7292225201072385e-05, + "loss": 0.0258, + "step": 1744 + }, + { + "epoch": 4.62, + "learning_rate": 2.7278820375335124e-05, + "loss": 0.1119, + "step": 1745 + }, + { + "epoch": 4.62, + "learning_rate": 2.7265415549597856e-05, + "loss": 0.1115, + "step": 1746 + }, + { + "epoch": 4.62, + "learning_rate": 2.7252010723860594e-05, + "loss": 0.4607, + "step": 1747 + }, + { + "epoch": 4.62, + "learning_rate": 2.7238605898123326e-05, + "loss": 0.0296, + "step": 1748 + }, + { + "epoch": 4.63, + "learning_rate": 2.7225201072386058e-05, + "loss": 0.0277, + "step": 1749 + }, + { + "epoch": 4.63, + "learning_rate": 2.7211796246648796e-05, + "loss": 0.0777, + "step": 1750 + }, + { + "epoch": 4.63, + "learning_rate": 2.7198391420911528e-05, + "loss": 0.0031, + "step": 1751 + }, + { + "epoch": 4.63, + "learning_rate": 2.7184986595174266e-05, + "loss": 0.2238, + "step": 1752 + }, + { + "epoch": 4.64, + "learning_rate": 2.7171581769436998e-05, + "loss": 0.0409, + "step": 1753 + }, + { + "epoch": 4.64, + "learning_rate": 2.715817694369973e-05, + "loss": 0.0032, + "step": 1754 + }, + { + "epoch": 4.64, + "learning_rate": 2.714477211796247e-05, + "loss": 0.0113, + "step": 1755 + }, + { + "epoch": 4.65, + "learning_rate": 2.71313672922252e-05, + "loss": 0.0204, + "step": 1756 + }, + { + "epoch": 4.65, + "learning_rate": 2.711796246648794e-05, + "loss": 0.0022, + "step": 1757 + }, + { + "epoch": 4.65, + "learning_rate": 2.710455764075067e-05, + "loss": 0.0018, + "step": 1758 + }, + { + "epoch": 4.65, + "learning_rate": 2.7091152815013406e-05, + "loss": 0.263, + "step": 1759 + }, + { + "epoch": 4.66, + "learning_rate": 2.707774798927614e-05, + "loss": 0.0109, + "step": 1760 + }, + { + "epoch": 4.66, + "learning_rate": 2.7064343163538876e-05, + "loss": 0.0653, + "step": 1761 + }, + { + "epoch": 4.66, + "learning_rate": 2.705093833780161e-05, + "loss": 0.0116, + "step": 1762 + }, + { + "epoch": 4.66, + "learning_rate": 2.7037533512064346e-05, + "loss": 0.0063, + "step": 1763 + }, + { + "epoch": 4.67, + "learning_rate": 2.7024128686327078e-05, + "loss": 0.0034, + "step": 1764 + }, + { + "epoch": 4.67, + "learning_rate": 2.7010723860589816e-05, + "loss": 0.0395, + "step": 1765 + }, + { + "epoch": 4.67, + "learning_rate": 2.6997319034852548e-05, + "loss": 0.0014, + "step": 1766 + }, + { + "epoch": 4.67, + "learning_rate": 2.6983914209115287e-05, + "loss": 0.0057, + "step": 1767 + }, + { + "epoch": 4.68, + "learning_rate": 2.697050938337802e-05, + "loss": 0.0018, + "step": 1768 + }, + { + "epoch": 4.68, + "learning_rate": 2.695710455764075e-05, + "loss": 0.012, + "step": 1769 + }, + { + "epoch": 4.68, + "learning_rate": 2.694369973190349e-05, + "loss": 0.0017, + "step": 1770 + }, + { + "epoch": 4.69, + "learning_rate": 2.693029490616622e-05, + "loss": 0.0654, + "step": 1771 + }, + { + "epoch": 4.69, + "learning_rate": 2.691689008042896e-05, + "loss": 0.8002, + "step": 1772 + }, + { + "epoch": 4.69, + "learning_rate": 2.690348525469169e-05, + "loss": 0.0035, + "step": 1773 + }, + { + "epoch": 4.69, + "learning_rate": 2.6890080428954422e-05, + "loss": 0.0051, + "step": 1774 + }, + { + "epoch": 4.7, + "learning_rate": 2.687667560321716e-05, + "loss": 0.0031, + "step": 1775 + }, + { + "epoch": 4.7, + "learning_rate": 2.6863270777479893e-05, + "loss": 0.0142, + "step": 1776 + }, + { + "epoch": 4.7, + "learning_rate": 2.684986595174263e-05, + "loss": 0.0009, + "step": 1777 + }, + { + "epoch": 4.7, + "learning_rate": 2.6836461126005363e-05, + "loss": 0.0015, + "step": 1778 + }, + { + "epoch": 4.71, + "learning_rate": 2.6823056300268095e-05, + "loss": 0.3481, + "step": 1779 + }, + { + "epoch": 4.71, + "learning_rate": 2.6809651474530833e-05, + "loss": 0.3095, + "step": 1780 + }, + { + "epoch": 4.71, + "learning_rate": 2.6796246648793565e-05, + "loss": 0.2567, + "step": 1781 + }, + { + "epoch": 4.71, + "learning_rate": 2.6782841823056303e-05, + "loss": 0.0037, + "step": 1782 + }, + { + "epoch": 4.72, + "learning_rate": 2.6769436997319035e-05, + "loss": 0.001, + "step": 1783 + }, + { + "epoch": 4.72, + "learning_rate": 2.675603217158177e-05, + "loss": 0.0065, + "step": 1784 + }, + { + "epoch": 4.72, + "learning_rate": 2.6742627345844505e-05, + "loss": 0.0029, + "step": 1785 + }, + { + "epoch": 4.72, + "learning_rate": 2.672922252010724e-05, + "loss": 0.6096, + "step": 1786 + }, + { + "epoch": 4.73, + "learning_rate": 2.6715817694369976e-05, + "loss": 0.0127, + "step": 1787 + }, + { + "epoch": 4.73, + "learning_rate": 2.670241286863271e-05, + "loss": 0.0031, + "step": 1788 + }, + { + "epoch": 4.73, + "learning_rate": 2.6689008042895443e-05, + "loss": 0.2463, + "step": 1789 + }, + { + "epoch": 4.74, + "learning_rate": 2.667560321715818e-05, + "loss": 0.1022, + "step": 1790 + }, + { + "epoch": 4.74, + "learning_rate": 2.6662198391420913e-05, + "loss": 0.002, + "step": 1791 + }, + { + "epoch": 4.74, + "learning_rate": 2.664879356568365e-05, + "loss": 0.1576, + "step": 1792 + }, + { + "epoch": 4.74, + "learning_rate": 2.6635388739946383e-05, + "loss": 0.1099, + "step": 1793 + }, + { + "epoch": 4.75, + "learning_rate": 2.6621983914209115e-05, + "loss": 0.1482, + "step": 1794 + }, + { + "epoch": 4.75, + "learning_rate": 2.6608579088471853e-05, + "loss": 0.0007, + "step": 1795 + }, + { + "epoch": 4.75, + "learning_rate": 2.6595174262734585e-05, + "loss": 0.0009, + "step": 1796 + }, + { + "epoch": 4.75, + "learning_rate": 2.6581769436997324e-05, + "loss": 0.005, + "step": 1797 + }, + { + "epoch": 4.76, + "learning_rate": 2.6568364611260055e-05, + "loss": 0.1808, + "step": 1798 + }, + { + "epoch": 4.76, + "learning_rate": 2.6554959785522787e-05, + "loss": 0.0351, + "step": 1799 + }, + { + "epoch": 4.76, + "learning_rate": 2.6541554959785526e-05, + "loss": 0.2555, + "step": 1800 + }, + { + "epoch": 4.76, + "learning_rate": 2.6528150134048257e-05, + "loss": 0.2236, + "step": 1801 + }, + { + "epoch": 4.77, + "learning_rate": 2.6514745308310996e-05, + "loss": 0.3208, + "step": 1802 + }, + { + "epoch": 4.77, + "learning_rate": 2.6501340482573728e-05, + "loss": 0.0202, + "step": 1803 + }, + { + "epoch": 4.77, + "learning_rate": 2.648793565683646e-05, + "loss": 0.0033, + "step": 1804 + }, + { + "epoch": 4.78, + "learning_rate": 2.6474530831099198e-05, + "loss": 0.001, + "step": 1805 + }, + { + "epoch": 4.78, + "learning_rate": 2.646112600536193e-05, + "loss": 0.0019, + "step": 1806 + }, + { + "epoch": 4.78, + "learning_rate": 2.6447721179624668e-05, + "loss": 0.0027, + "step": 1807 + }, + { + "epoch": 4.78, + "learning_rate": 2.64343163538874e-05, + "loss": 0.0051, + "step": 1808 + }, + { + "epoch": 4.79, + "learning_rate": 2.642091152815013e-05, + "loss": 0.1994, + "step": 1809 + }, + { + "epoch": 4.79, + "learning_rate": 2.640750670241287e-05, + "loss": 0.0372, + "step": 1810 + }, + { + "epoch": 4.79, + "learning_rate": 2.6394101876675602e-05, + "loss": 0.0678, + "step": 1811 + }, + { + "epoch": 4.79, + "learning_rate": 2.638069705093834e-05, + "loss": 0.0252, + "step": 1812 + }, + { + "epoch": 4.8, + "learning_rate": 2.6367292225201072e-05, + "loss": 0.0065, + "step": 1813 + }, + { + "epoch": 4.8, + "learning_rate": 2.6353887399463807e-05, + "loss": 0.0045, + "step": 1814 + }, + { + "epoch": 4.8, + "learning_rate": 2.6340482573726542e-05, + "loss": 0.0037, + "step": 1815 + }, + { + "epoch": 4.8, + "learning_rate": 2.6327077747989277e-05, + "loss": 0.0251, + "step": 1816 + }, + { + "epoch": 4.81, + "learning_rate": 2.6313672922252013e-05, + "loss": 0.4196, + "step": 1817 + }, + { + "epoch": 4.81, + "learning_rate": 2.6300268096514748e-05, + "loss": 0.0071, + "step": 1818 + }, + { + "epoch": 4.81, + "learning_rate": 2.628686327077748e-05, + "loss": 0.0787, + "step": 1819 + }, + { + "epoch": 4.81, + "learning_rate": 2.6273458445040218e-05, + "loss": 0.0145, + "step": 1820 + }, + { + "epoch": 4.82, + "learning_rate": 2.626005361930295e-05, + "loss": 0.009, + "step": 1821 + }, + { + "epoch": 4.82, + "learning_rate": 2.6246648793565688e-05, + "loss": 0.0027, + "step": 1822 + }, + { + "epoch": 4.82, + "learning_rate": 2.623324396782842e-05, + "loss": 0.0017, + "step": 1823 + }, + { + "epoch": 4.83, + "learning_rate": 2.6219839142091152e-05, + "loss": 0.4824, + "step": 1824 + }, + { + "epoch": 4.83, + "learning_rate": 2.620643431635389e-05, + "loss": 0.0022, + "step": 1825 + }, + { + "epoch": 4.83, + "learning_rate": 2.6193029490616622e-05, + "loss": 0.3223, + "step": 1826 + }, + { + "epoch": 4.83, + "learning_rate": 2.617962466487936e-05, + "loss": 0.2195, + "step": 1827 + }, + { + "epoch": 4.84, + "learning_rate": 2.6166219839142092e-05, + "loss": 0.0013, + "step": 1828 + }, + { + "epoch": 4.84, + "learning_rate": 2.6152815013404824e-05, + "loss": 0.0343, + "step": 1829 + }, + { + "epoch": 4.84, + "learning_rate": 2.6139410187667563e-05, + "loss": 0.0022, + "step": 1830 + }, + { + "epoch": 4.84, + "learning_rate": 2.6126005361930294e-05, + "loss": 0.0022, + "step": 1831 + }, + { + "epoch": 4.85, + "learning_rate": 2.6112600536193033e-05, + "loss": 0.4116, + "step": 1832 + }, + { + "epoch": 4.85, + "learning_rate": 2.6099195710455765e-05, + "loss": 0.0048, + "step": 1833 + }, + { + "epoch": 4.85, + "learning_rate": 2.6085790884718496e-05, + "loss": 0.5819, + "step": 1834 + }, + { + "epoch": 4.85, + "learning_rate": 2.6072386058981235e-05, + "loss": 0.1985, + "step": 1835 + }, + { + "epoch": 4.86, + "learning_rate": 2.6058981233243967e-05, + "loss": 0.0989, + "step": 1836 + }, + { + "epoch": 4.86, + "learning_rate": 2.6045576407506705e-05, + "loss": 0.341, + "step": 1837 + }, + { + "epoch": 4.86, + "learning_rate": 2.6032171581769437e-05, + "loss": 0.0044, + "step": 1838 + }, + { + "epoch": 4.87, + "learning_rate": 2.601876675603217e-05, + "loss": 0.004, + "step": 1839 + }, + { + "epoch": 4.87, + "learning_rate": 2.6005361930294907e-05, + "loss": 0.2858, + "step": 1840 + }, + { + "epoch": 4.87, + "learning_rate": 2.599195710455764e-05, + "loss": 0.0009, + "step": 1841 + }, + { + "epoch": 4.87, + "learning_rate": 2.5978552278820377e-05, + "loss": 0.0042, + "step": 1842 + }, + { + "epoch": 4.88, + "learning_rate": 2.596514745308311e-05, + "loss": 0.0045, + "step": 1843 + }, + { + "epoch": 4.88, + "learning_rate": 2.5951742627345844e-05, + "loss": 0.0144, + "step": 1844 + }, + { + "epoch": 4.88, + "learning_rate": 2.593833780160858e-05, + "loss": 0.0084, + "step": 1845 + }, + { + "epoch": 4.88, + "learning_rate": 2.5924932975871314e-05, + "loss": 0.4276, + "step": 1846 + }, + { + "epoch": 4.89, + "learning_rate": 2.591152815013405e-05, + "loss": 0.0122, + "step": 1847 + }, + { + "epoch": 4.89, + "learning_rate": 2.5898123324396785e-05, + "loss": 0.0776, + "step": 1848 + }, + { + "epoch": 4.89, + "learning_rate": 2.5884718498659516e-05, + "loss": 0.0117, + "step": 1849 + }, + { + "epoch": 4.89, + "learning_rate": 2.5871313672922255e-05, + "loss": 0.2809, + "step": 1850 + }, + { + "epoch": 4.9, + "learning_rate": 2.5857908847184987e-05, + "loss": 0.0413, + "step": 1851 + }, + { + "epoch": 4.9, + "learning_rate": 2.5844504021447725e-05, + "loss": 0.0187, + "step": 1852 + }, + { + "epoch": 4.9, + "learning_rate": 2.5831099195710457e-05, + "loss": 0.452, + "step": 1853 + }, + { + "epoch": 4.9, + "learning_rate": 2.5817694369973195e-05, + "loss": 0.0206, + "step": 1854 + }, + { + "epoch": 4.91, + "learning_rate": 2.5804289544235927e-05, + "loss": 0.1639, + "step": 1855 + }, + { + "epoch": 4.91, + "learning_rate": 2.579088471849866e-05, + "loss": 0.1865, + "step": 1856 + }, + { + "epoch": 4.91, + "learning_rate": 2.5777479892761398e-05, + "loss": 0.0022, + "step": 1857 + }, + { + "epoch": 4.92, + "learning_rate": 2.576407506702413e-05, + "loss": 0.1167, + "step": 1858 + }, + { + "epoch": 4.92, + "learning_rate": 2.5750670241286868e-05, + "loss": 0.4013, + "step": 1859 + }, + { + "epoch": 4.92, + "learning_rate": 2.57372654155496e-05, + "loss": 0.2355, + "step": 1860 + }, + { + "epoch": 4.92, + "learning_rate": 2.572386058981233e-05, + "loss": 0.0076, + "step": 1861 + }, + { + "epoch": 4.93, + "learning_rate": 2.571045576407507e-05, + "loss": 0.1612, + "step": 1862 + }, + { + "epoch": 4.93, + "learning_rate": 2.56970509383378e-05, + "loss": 0.0047, + "step": 1863 + }, + { + "epoch": 4.93, + "learning_rate": 2.568364611260054e-05, + "loss": 0.1511, + "step": 1864 + }, + { + "epoch": 4.93, + "learning_rate": 2.5670241286863272e-05, + "loss": 0.011, + "step": 1865 + }, + { + "epoch": 4.94, + "learning_rate": 2.5656836461126004e-05, + "loss": 0.1761, + "step": 1866 + }, + { + "epoch": 4.94, + "learning_rate": 2.5643431635388742e-05, + "loss": 0.004, + "step": 1867 + }, + { + "epoch": 4.94, + "learning_rate": 2.5630026809651474e-05, + "loss": 0.0036, + "step": 1868 + }, + { + "epoch": 4.94, + "learning_rate": 2.5616621983914212e-05, + "loss": 0.4345, + "step": 1869 + }, + { + "epoch": 4.95, + "learning_rate": 2.5603217158176944e-05, + "loss": 0.0034, + "step": 1870 + }, + { + "epoch": 4.95, + "learning_rate": 2.558981233243968e-05, + "loss": 0.1269, + "step": 1871 + }, + { + "epoch": 4.95, + "learning_rate": 2.5576407506702414e-05, + "loss": 0.183, + "step": 1872 + }, + { + "epoch": 4.96, + "learning_rate": 2.556300268096515e-05, + "loss": 0.008, + "step": 1873 + }, + { + "epoch": 4.96, + "learning_rate": 2.5549597855227885e-05, + "loss": 0.0035, + "step": 1874 + }, + { + "epoch": 4.96, + "learning_rate": 2.553619302949062e-05, + "loss": 0.0133, + "step": 1875 + }, + { + "epoch": 4.96, + "learning_rate": 2.552278820375335e-05, + "loss": 0.2156, + "step": 1876 + }, + { + "epoch": 4.97, + "learning_rate": 2.550938337801609e-05, + "loss": 0.0043, + "step": 1877 + }, + { + "epoch": 4.97, + "learning_rate": 2.549597855227882e-05, + "loss": 0.2614, + "step": 1878 + }, + { + "epoch": 4.97, + "learning_rate": 2.548257372654156e-05, + "loss": 0.0208, + "step": 1879 + }, + { + "epoch": 4.97, + "learning_rate": 2.5469168900804292e-05, + "loss": 0.0228, + "step": 1880 + }, + { + "epoch": 4.98, + "learning_rate": 2.5455764075067024e-05, + "loss": 0.0105, + "step": 1881 + }, + { + "epoch": 4.98, + "learning_rate": 2.5442359249329762e-05, + "loss": 0.0108, + "step": 1882 + }, + { + "epoch": 4.98, + "learning_rate": 2.5428954423592494e-05, + "loss": 0.3828, + "step": 1883 + }, + { + "epoch": 4.98, + "learning_rate": 2.5415549597855232e-05, + "loss": 0.0093, + "step": 1884 + }, + { + "epoch": 4.99, + "learning_rate": 2.5402144772117964e-05, + "loss": 0.0231, + "step": 1885 + }, + { + "epoch": 4.99, + "learning_rate": 2.5388739946380696e-05, + "loss": 0.0082, + "step": 1886 + }, + { + "epoch": 4.99, + "learning_rate": 2.5375335120643434e-05, + "loss": 0.1796, + "step": 1887 + }, + { + "epoch": 4.99, + "learning_rate": 2.5361930294906166e-05, + "loss": 0.0753, + "step": 1888 + }, + { + "epoch": 5.0, + "learning_rate": 2.5348525469168905e-05, + "loss": 0.0142, + "step": 1889 + }, + { + "epoch": 5.0, + "learning_rate": 2.5335120643431636e-05, + "loss": 0.0047, + "step": 1890 + }, + { + "epoch": 5.0, + "eval_f1": 0.7775974025974025, + "eval_loss": 0.953689694404602, + "eval_runtime": 1.8696, + "eval_samples_per_second": 809.285, + "eval_steps_per_second": 50.814, + "step": 1890 + }, + { + "epoch": 5.0, + "learning_rate": 2.5321715817694368e-05, + "loss": 0.0014, + "step": 1891 + }, + { + "epoch": 5.01, + "learning_rate": 2.5308310991957107e-05, + "loss": 0.0487, + "step": 1892 + }, + { + "epoch": 5.01, + "learning_rate": 2.529490616621984e-05, + "loss": 0.0037, + "step": 1893 + }, + { + "epoch": 5.01, + "learning_rate": 2.5281501340482577e-05, + "loss": 0.0512, + "step": 1894 + }, + { + "epoch": 5.01, + "learning_rate": 2.526809651474531e-05, + "loss": 0.134, + "step": 1895 + }, + { + "epoch": 5.02, + "learning_rate": 2.525469168900804e-05, + "loss": 0.3762, + "step": 1896 + }, + { + "epoch": 5.02, + "learning_rate": 2.524128686327078e-05, + "loss": 0.0011, + "step": 1897 + }, + { + "epoch": 5.02, + "learning_rate": 2.522788203753351e-05, + "loss": 0.0023, + "step": 1898 + }, + { + "epoch": 5.02, + "learning_rate": 2.521447721179625e-05, + "loss": 0.0526, + "step": 1899 + }, + { + "epoch": 5.03, + "learning_rate": 2.520107238605898e-05, + "loss": 0.0553, + "step": 1900 + }, + { + "epoch": 5.03, + "learning_rate": 2.5187667560321716e-05, + "loss": 0.1773, + "step": 1901 + }, + { + "epoch": 5.03, + "learning_rate": 2.517426273458445e-05, + "loss": 0.451, + "step": 1902 + }, + { + "epoch": 5.03, + "learning_rate": 2.5160857908847186e-05, + "loss": 0.0217, + "step": 1903 + }, + { + "epoch": 5.04, + "learning_rate": 2.514745308310992e-05, + "loss": 0.0728, + "step": 1904 + }, + { + "epoch": 5.04, + "learning_rate": 2.5134048257372657e-05, + "loss": 0.0009, + "step": 1905 + }, + { + "epoch": 5.04, + "learning_rate": 2.512064343163539e-05, + "loss": 0.1018, + "step": 1906 + }, + { + "epoch": 5.04, + "learning_rate": 2.5107238605898127e-05, + "loss": 0.0012, + "step": 1907 + }, + { + "epoch": 5.05, + "learning_rate": 2.509383378016086e-05, + "loss": 0.004, + "step": 1908 + }, + { + "epoch": 5.05, + "learning_rate": 2.5080428954423597e-05, + "loss": 0.0012, + "step": 1909 + }, + { + "epoch": 5.05, + "learning_rate": 2.506702412868633e-05, + "loss": 0.0128, + "step": 1910 + }, + { + "epoch": 5.06, + "learning_rate": 2.505361930294906e-05, + "loss": 0.1116, + "step": 1911 + }, + { + "epoch": 5.06, + "learning_rate": 2.50402144772118e-05, + "loss": 0.0011, + "step": 1912 + }, + { + "epoch": 5.06, + "learning_rate": 2.502680965147453e-05, + "loss": 0.0011, + "step": 1913 + }, + { + "epoch": 5.06, + "learning_rate": 2.501340482573727e-05, + "loss": 0.0897, + "step": 1914 + }, + { + "epoch": 5.07, + "learning_rate": 2.5e-05, + "loss": 0.0014, + "step": 1915 + }, + { + "epoch": 5.07, + "learning_rate": 2.4986595174262736e-05, + "loss": 0.0918, + "step": 1916 + }, + { + "epoch": 5.07, + "learning_rate": 2.497319034852547e-05, + "loss": 0.0026, + "step": 1917 + }, + { + "epoch": 5.07, + "learning_rate": 2.4959785522788203e-05, + "loss": 0.0225, + "step": 1918 + }, + { + "epoch": 5.08, + "learning_rate": 2.494638069705094e-05, + "loss": 0.2655, + "step": 1919 + }, + { + "epoch": 5.08, + "learning_rate": 2.4932975871313673e-05, + "loss": 0.0029, + "step": 1920 + }, + { + "epoch": 5.08, + "learning_rate": 2.491957104557641e-05, + "loss": 0.0006, + "step": 1921 + }, + { + "epoch": 5.08, + "learning_rate": 2.4906166219839144e-05, + "loss": 0.0008, + "step": 1922 + }, + { + "epoch": 5.09, + "learning_rate": 2.4892761394101875e-05, + "loss": 0.0012, + "step": 1923 + }, + { + "epoch": 5.09, + "learning_rate": 2.487935656836461e-05, + "loss": 0.0013, + "step": 1924 + }, + { + "epoch": 5.09, + "learning_rate": 2.4865951742627346e-05, + "loss": 0.0524, + "step": 1925 + }, + { + "epoch": 5.1, + "learning_rate": 2.485254691689008e-05, + "loss": 0.0059, + "step": 1926 + }, + { + "epoch": 5.1, + "learning_rate": 2.4839142091152816e-05, + "loss": 0.0026, + "step": 1927 + }, + { + "epoch": 5.1, + "learning_rate": 2.482573726541555e-05, + "loss": 0.0015, + "step": 1928 + }, + { + "epoch": 5.1, + "learning_rate": 2.4812332439678286e-05, + "loss": 0.0073, + "step": 1929 + }, + { + "epoch": 5.11, + "learning_rate": 2.479892761394102e-05, + "loss": 0.0008, + "step": 1930 + }, + { + "epoch": 5.11, + "learning_rate": 2.4785522788203757e-05, + "loss": 0.1519, + "step": 1931 + }, + { + "epoch": 5.11, + "learning_rate": 2.477211796246649e-05, + "loss": 0.008, + "step": 1932 + }, + { + "epoch": 5.11, + "learning_rate": 2.4758713136729223e-05, + "loss": 0.0009, + "step": 1933 + }, + { + "epoch": 5.12, + "learning_rate": 2.474530831099196e-05, + "loss": 0.0299, + "step": 1934 + }, + { + "epoch": 5.12, + "learning_rate": 2.4731903485254694e-05, + "loss": 0.1637, + "step": 1935 + }, + { + "epoch": 5.12, + "learning_rate": 2.471849865951743e-05, + "loss": 0.0369, + "step": 1936 + }, + { + "epoch": 5.12, + "learning_rate": 2.4705093833780164e-05, + "loss": 0.0057, + "step": 1937 + }, + { + "epoch": 5.13, + "learning_rate": 2.4691689008042896e-05, + "loss": 0.0035, + "step": 1938 + }, + { + "epoch": 5.13, + "learning_rate": 2.467828418230563e-05, + "loss": 0.0011, + "step": 1939 + }, + { + "epoch": 5.13, + "learning_rate": 2.4664879356568366e-05, + "loss": 0.0018, + "step": 1940 + }, + { + "epoch": 5.13, + "learning_rate": 2.46514745308311e-05, + "loss": 0.0013, + "step": 1941 + }, + { + "epoch": 5.14, + "learning_rate": 2.4638069705093836e-05, + "loss": 0.0779, + "step": 1942 + }, + { + "epoch": 5.14, + "learning_rate": 2.4624664879356568e-05, + "loss": 0.1762, + "step": 1943 + }, + { + "epoch": 5.14, + "learning_rate": 2.4611260053619303e-05, + "loss": 0.0006, + "step": 1944 + }, + { + "epoch": 5.15, + "learning_rate": 2.4597855227882038e-05, + "loss": 0.0037, + "step": 1945 + }, + { + "epoch": 5.15, + "learning_rate": 2.4584450402144773e-05, + "loss": 0.0005, + "step": 1946 + }, + { + "epoch": 5.15, + "learning_rate": 2.457104557640751e-05, + "loss": 0.0397, + "step": 1947 + }, + { + "epoch": 5.15, + "learning_rate": 2.4557640750670244e-05, + "loss": 0.0082, + "step": 1948 + }, + { + "epoch": 5.16, + "learning_rate": 2.4544235924932975e-05, + "loss": 0.0008, + "step": 1949 + }, + { + "epoch": 5.16, + "learning_rate": 2.453083109919571e-05, + "loss": 0.0219, + "step": 1950 + }, + { + "epoch": 5.16, + "learning_rate": 2.4517426273458446e-05, + "loss": 0.3966, + "step": 1951 + }, + { + "epoch": 5.16, + "learning_rate": 2.450402144772118e-05, + "loss": 0.0011, + "step": 1952 + }, + { + "epoch": 5.17, + "learning_rate": 2.4490616621983916e-05, + "loss": 0.3447, + "step": 1953 + }, + { + "epoch": 5.17, + "learning_rate": 2.4477211796246648e-05, + "loss": 0.0006, + "step": 1954 + }, + { + "epoch": 5.17, + "learning_rate": 2.4463806970509383e-05, + "loss": 0.0011, + "step": 1955 + }, + { + "epoch": 5.17, + "learning_rate": 2.4450402144772118e-05, + "loss": 0.0013, + "step": 1956 + }, + { + "epoch": 5.18, + "learning_rate": 2.4436997319034853e-05, + "loss": 0.1495, + "step": 1957 + }, + { + "epoch": 5.18, + "learning_rate": 2.4423592493297588e-05, + "loss": 0.0005, + "step": 1958 + }, + { + "epoch": 5.18, + "learning_rate": 2.4410187667560323e-05, + "loss": 0.3345, + "step": 1959 + }, + { + "epoch": 5.19, + "learning_rate": 2.439678284182306e-05, + "loss": 0.0048, + "step": 1960 + }, + { + "epoch": 5.19, + "learning_rate": 2.4383378016085793e-05, + "loss": 0.001, + "step": 1961 + }, + { + "epoch": 5.19, + "learning_rate": 2.436997319034853e-05, + "loss": 0.0025, + "step": 1962 + }, + { + "epoch": 5.19, + "learning_rate": 2.4356568364611264e-05, + "loss": 0.3215, + "step": 1963 + }, + { + "epoch": 5.2, + "learning_rate": 2.4343163538873995e-05, + "loss": 0.0197, + "step": 1964 + }, + { + "epoch": 5.2, + "learning_rate": 2.432975871313673e-05, + "loss": 0.0018, + "step": 1965 + }, + { + "epoch": 5.2, + "learning_rate": 2.4316353887399466e-05, + "loss": 0.1012, + "step": 1966 + }, + { + "epoch": 5.2, + "learning_rate": 2.43029490616622e-05, + "loss": 0.0179, + "step": 1967 + }, + { + "epoch": 5.21, + "learning_rate": 2.4289544235924936e-05, + "loss": 0.0032, + "step": 1968 + }, + { + "epoch": 5.21, + "learning_rate": 2.4276139410187668e-05, + "loss": 0.0011, + "step": 1969 + }, + { + "epoch": 5.21, + "learning_rate": 2.4262734584450403e-05, + "loss": 0.4875, + "step": 1970 + }, + { + "epoch": 5.21, + "learning_rate": 2.4249329758713138e-05, + "loss": 0.2791, + "step": 1971 + }, + { + "epoch": 5.22, + "learning_rate": 2.4235924932975873e-05, + "loss": 0.0011, + "step": 1972 + }, + { + "epoch": 5.22, + "learning_rate": 2.4222520107238608e-05, + "loss": 0.0011, + "step": 1973 + }, + { + "epoch": 5.22, + "learning_rate": 2.420911528150134e-05, + "loss": 0.0976, + "step": 1974 + }, + { + "epoch": 5.22, + "learning_rate": 2.4195710455764075e-05, + "loss": 0.3669, + "step": 1975 + }, + { + "epoch": 5.23, + "learning_rate": 2.418230563002681e-05, + "loss": 0.0022, + "step": 1976 + }, + { + "epoch": 5.23, + "learning_rate": 2.4168900804289545e-05, + "loss": 0.0015, + "step": 1977 + }, + { + "epoch": 5.23, + "learning_rate": 2.415549597855228e-05, + "loss": 0.0014, + "step": 1978 + }, + { + "epoch": 5.24, + "learning_rate": 2.4142091152815012e-05, + "loss": 0.0433, + "step": 1979 + }, + { + "epoch": 5.24, + "learning_rate": 2.4128686327077747e-05, + "loss": 0.0019, + "step": 1980 + }, + { + "epoch": 5.24, + "learning_rate": 2.4115281501340483e-05, + "loss": 0.0007, + "step": 1981 + }, + { + "epoch": 5.24, + "learning_rate": 2.4101876675603218e-05, + "loss": 0.0136, + "step": 1982 + }, + { + "epoch": 5.25, + "learning_rate": 2.4088471849865953e-05, + "loss": 0.1744, + "step": 1983 + }, + { + "epoch": 5.25, + "learning_rate": 2.4075067024128688e-05, + "loss": 0.1557, + "step": 1984 + }, + { + "epoch": 5.25, + "learning_rate": 2.4061662198391423e-05, + "loss": 0.1192, + "step": 1985 + }, + { + "epoch": 5.25, + "learning_rate": 2.4048257372654158e-05, + "loss": 0.0406, + "step": 1986 + }, + { + "epoch": 5.26, + "learning_rate": 2.4034852546916893e-05, + "loss": 0.2243, + "step": 1987 + }, + { + "epoch": 5.26, + "learning_rate": 2.402144772117963e-05, + "loss": 0.0021, + "step": 1988 + }, + { + "epoch": 5.26, + "learning_rate": 2.400804289544236e-05, + "loss": 0.002, + "step": 1989 + }, + { + "epoch": 5.26, + "learning_rate": 2.3994638069705095e-05, + "loss": 0.077, + "step": 1990 + }, + { + "epoch": 5.27, + "learning_rate": 2.398123324396783e-05, + "loss": 0.0378, + "step": 1991 + }, + { + "epoch": 5.27, + "learning_rate": 2.3967828418230566e-05, + "loss": 0.012, + "step": 1992 + }, + { + "epoch": 5.27, + "learning_rate": 2.39544235924933e-05, + "loss": 0.1386, + "step": 1993 + }, + { + "epoch": 5.28, + "learning_rate": 2.3941018766756032e-05, + "loss": 0.002, + "step": 1994 + }, + { + "epoch": 5.28, + "learning_rate": 2.3927613941018768e-05, + "loss": 0.0008, + "step": 1995 + }, + { + "epoch": 5.28, + "learning_rate": 2.3914209115281503e-05, + "loss": 0.0021, + "step": 1996 + }, + { + "epoch": 5.28, + "learning_rate": 2.3900804289544238e-05, + "loss": 0.022, + "step": 1997 + }, + { + "epoch": 5.29, + "learning_rate": 2.3887399463806973e-05, + "loss": 0.0015, + "step": 1998 + }, + { + "epoch": 5.29, + "learning_rate": 2.3873994638069705e-05, + "loss": 0.1486, + "step": 1999 + }, + { + "epoch": 5.29, + "learning_rate": 2.386058981233244e-05, + "loss": 0.2586, + "step": 2000 + }, + { + "epoch": 5.29, + "learning_rate": 2.3847184986595175e-05, + "loss": 0.0088, + "step": 2001 + }, + { + "epoch": 5.3, + "learning_rate": 2.383378016085791e-05, + "loss": 0.0044, + "step": 2002 + }, + { + "epoch": 5.3, + "learning_rate": 2.3820375335120645e-05, + "loss": 0.0015, + "step": 2003 + }, + { + "epoch": 5.3, + "learning_rate": 2.3806970509383377e-05, + "loss": 0.0008, + "step": 2004 + }, + { + "epoch": 5.3, + "learning_rate": 2.3793565683646112e-05, + "loss": 0.212, + "step": 2005 + }, + { + "epoch": 5.31, + "learning_rate": 2.3780160857908847e-05, + "loss": 0.0005, + "step": 2006 + }, + { + "epoch": 5.31, + "learning_rate": 2.3766756032171582e-05, + "loss": 0.1511, + "step": 2007 + }, + { + "epoch": 5.31, + "learning_rate": 2.3753351206434318e-05, + "loss": 0.0023, + "step": 2008 + }, + { + "epoch": 5.31, + "learning_rate": 2.3739946380697053e-05, + "loss": 0.1544, + "step": 2009 + }, + { + "epoch": 5.32, + "learning_rate": 2.3726541554959784e-05, + "loss": 0.0306, + "step": 2010 + }, + { + "epoch": 5.32, + "learning_rate": 2.371313672922252e-05, + "loss": 0.1005, + "step": 2011 + }, + { + "epoch": 5.32, + "learning_rate": 2.3699731903485255e-05, + "loss": 0.0744, + "step": 2012 + }, + { + "epoch": 5.33, + "learning_rate": 2.368632707774799e-05, + "loss": 0.0622, + "step": 2013 + }, + { + "epoch": 5.33, + "learning_rate": 2.3672922252010725e-05, + "loss": 0.0192, + "step": 2014 + }, + { + "epoch": 5.33, + "learning_rate": 2.365951742627346e-05, + "loss": 0.016, + "step": 2015 + }, + { + "epoch": 5.33, + "learning_rate": 2.3646112600536195e-05, + "loss": 0.293, + "step": 2016 + }, + { + "epoch": 5.34, + "learning_rate": 2.363270777479893e-05, + "loss": 0.0114, + "step": 2017 + }, + { + "epoch": 5.34, + "learning_rate": 2.3619302949061665e-05, + "loss": 0.1254, + "step": 2018 + }, + { + "epoch": 5.34, + "learning_rate": 2.36058981233244e-05, + "loss": 0.0638, + "step": 2019 + }, + { + "epoch": 5.34, + "learning_rate": 2.3592493297587132e-05, + "loss": 0.0192, + "step": 2020 + }, + { + "epoch": 5.35, + "learning_rate": 2.3579088471849867e-05, + "loss": 0.0069, + "step": 2021 + }, + { + "epoch": 5.35, + "learning_rate": 2.3565683646112603e-05, + "loss": 0.0573, + "step": 2022 + }, + { + "epoch": 5.35, + "learning_rate": 2.3552278820375338e-05, + "loss": 0.0039, + "step": 2023 + }, + { + "epoch": 5.35, + "learning_rate": 2.3538873994638073e-05, + "loss": 0.0079, + "step": 2024 + }, + { + "epoch": 5.36, + "learning_rate": 2.3525469168900805e-05, + "loss": 0.0063, + "step": 2025 + }, + { + "epoch": 5.36, + "learning_rate": 2.351206434316354e-05, + "loss": 0.0032, + "step": 2026 + }, + { + "epoch": 5.36, + "learning_rate": 2.3498659517426275e-05, + "loss": 0.1018, + "step": 2027 + }, + { + "epoch": 5.37, + "learning_rate": 2.348525469168901e-05, + "loss": 0.0017, + "step": 2028 + }, + { + "epoch": 5.37, + "learning_rate": 2.3471849865951745e-05, + "loss": 0.0203, + "step": 2029 + }, + { + "epoch": 5.37, + "learning_rate": 2.3458445040214477e-05, + "loss": 0.0008, + "step": 2030 + }, + { + "epoch": 5.37, + "learning_rate": 2.3445040214477212e-05, + "loss": 0.3145, + "step": 2031 + }, + { + "epoch": 5.38, + "learning_rate": 2.3431635388739947e-05, + "loss": 0.2476, + "step": 2032 + }, + { + "epoch": 5.38, + "learning_rate": 2.3418230563002682e-05, + "loss": 0.0573, + "step": 2033 + }, + { + "epoch": 5.38, + "learning_rate": 2.3404825737265417e-05, + "loss": 0.0008, + "step": 2034 + }, + { + "epoch": 5.38, + "learning_rate": 2.339142091152815e-05, + "loss": 0.0636, + "step": 2035 + }, + { + "epoch": 5.39, + "learning_rate": 2.3378016085790884e-05, + "loss": 0.001, + "step": 2036 + }, + { + "epoch": 5.39, + "learning_rate": 2.336461126005362e-05, + "loss": 0.0079, + "step": 2037 + }, + { + "epoch": 5.39, + "learning_rate": 2.3351206434316355e-05, + "loss": 0.0752, + "step": 2038 + }, + { + "epoch": 5.39, + "learning_rate": 2.333780160857909e-05, + "loss": 0.0024, + "step": 2039 + }, + { + "epoch": 5.4, + "learning_rate": 2.332439678284182e-05, + "loss": 0.0061, + "step": 2040 + }, + { + "epoch": 5.4, + "learning_rate": 2.3310991957104557e-05, + "loss": 0.2882, + "step": 2041 + }, + { + "epoch": 5.4, + "learning_rate": 2.329758713136729e-05, + "loss": 0.02, + "step": 2042 + }, + { + "epoch": 5.4, + "learning_rate": 2.3284182305630027e-05, + "loss": 0.018, + "step": 2043 + }, + { + "epoch": 5.41, + "learning_rate": 2.3270777479892762e-05, + "loss": 0.0125, + "step": 2044 + }, + { + "epoch": 5.41, + "learning_rate": 2.3257372654155497e-05, + "loss": 0.0007, + "step": 2045 + }, + { + "epoch": 5.41, + "learning_rate": 2.3243967828418232e-05, + "loss": 0.3849, + "step": 2046 + }, + { + "epoch": 5.42, + "learning_rate": 2.3230563002680967e-05, + "loss": 0.0011, + "step": 2047 + }, + { + "epoch": 5.42, + "learning_rate": 2.3217158176943702e-05, + "loss": 0.1235, + "step": 2048 + }, + { + "epoch": 5.42, + "learning_rate": 2.3203753351206438e-05, + "loss": 0.0006, + "step": 2049 + }, + { + "epoch": 5.42, + "learning_rate": 2.319034852546917e-05, + "loss": 0.0006, + "step": 2050 + }, + { + "epoch": 5.43, + "learning_rate": 2.3176943699731904e-05, + "loss": 0.0011, + "step": 2051 + }, + { + "epoch": 5.43, + "learning_rate": 2.316353887399464e-05, + "loss": 0.0011, + "step": 2052 + }, + { + "epoch": 5.43, + "learning_rate": 2.3150134048257375e-05, + "loss": 0.0032, + "step": 2053 + }, + { + "epoch": 5.43, + "learning_rate": 2.313672922252011e-05, + "loss": 0.0718, + "step": 2054 + }, + { + "epoch": 5.44, + "learning_rate": 2.312332439678284e-05, + "loss": 0.0052, + "step": 2055 + }, + { + "epoch": 5.44, + "learning_rate": 2.3109919571045577e-05, + "loss": 0.0026, + "step": 2056 + }, + { + "epoch": 5.44, + "learning_rate": 2.3096514745308312e-05, + "loss": 0.1854, + "step": 2057 + }, + { + "epoch": 5.44, + "learning_rate": 2.3083109919571047e-05, + "loss": 0.0008, + "step": 2058 + }, + { + "epoch": 5.45, + "learning_rate": 2.3069705093833782e-05, + "loss": 0.2671, + "step": 2059 + }, + { + "epoch": 5.45, + "learning_rate": 2.3056300268096514e-05, + "loss": 0.0064, + "step": 2060 + }, + { + "epoch": 5.45, + "learning_rate": 2.304289544235925e-05, + "loss": 0.0012, + "step": 2061 + }, + { + "epoch": 5.46, + "learning_rate": 2.3029490616621984e-05, + "loss": 0.0006, + "step": 2062 + }, + { + "epoch": 5.46, + "learning_rate": 2.301608579088472e-05, + "loss": 0.0008, + "step": 2063 + }, + { + "epoch": 5.46, + "learning_rate": 2.3002680965147454e-05, + "loss": 0.4998, + "step": 2064 + }, + { + "epoch": 5.46, + "learning_rate": 2.2989276139410186e-05, + "loss": 0.0037, + "step": 2065 + }, + { + "epoch": 5.47, + "learning_rate": 2.297587131367292e-05, + "loss": 0.0029, + "step": 2066 + }, + { + "epoch": 5.47, + "learning_rate": 2.2962466487935656e-05, + "loss": 0.043, + "step": 2067 + }, + { + "epoch": 5.47, + "learning_rate": 2.294906166219839e-05, + "loss": 0.0451, + "step": 2068 + }, + { + "epoch": 5.47, + "learning_rate": 2.2935656836461127e-05, + "loss": 0.0009, + "step": 2069 + }, + { + "epoch": 5.48, + "learning_rate": 2.2922252010723862e-05, + "loss": 0.002, + "step": 2070 + }, + { + "epoch": 5.48, + "learning_rate": 2.2908847184986597e-05, + "loss": 0.2744, + "step": 2071 + }, + { + "epoch": 5.48, + "learning_rate": 2.2895442359249332e-05, + "loss": 0.0146, + "step": 2072 + }, + { + "epoch": 5.48, + "learning_rate": 2.2882037533512067e-05, + "loss": 0.0011, + "step": 2073 + }, + { + "epoch": 5.49, + "learning_rate": 2.2868632707774802e-05, + "loss": 0.0421, + "step": 2074 + }, + { + "epoch": 5.49, + "learning_rate": 2.2855227882037537e-05, + "loss": 0.1518, + "step": 2075 + }, + { + "epoch": 5.49, + "learning_rate": 2.284182305630027e-05, + "loss": 0.0072, + "step": 2076 + }, + { + "epoch": 5.49, + "learning_rate": 2.2828418230563004e-05, + "loss": 0.2781, + "step": 2077 + }, + { + "epoch": 5.5, + "learning_rate": 2.281501340482574e-05, + "loss": 0.004, + "step": 2078 + }, + { + "epoch": 5.5, + "learning_rate": 2.2801608579088475e-05, + "loss": 0.1029, + "step": 2079 + }, + { + "epoch": 5.5, + "learning_rate": 2.278820375335121e-05, + "loss": 0.0526, + "step": 2080 + }, + { + "epoch": 5.51, + "learning_rate": 2.277479892761394e-05, + "loss": 0.0011, + "step": 2081 + }, + { + "epoch": 5.51, + "learning_rate": 2.2761394101876677e-05, + "loss": 0.0139, + "step": 2082 + }, + { + "epoch": 5.51, + "learning_rate": 2.274798927613941e-05, + "loss": 0.0509, + "step": 2083 + }, + { + "epoch": 5.51, + "learning_rate": 2.2734584450402147e-05, + "loss": 0.0042, + "step": 2084 + }, + { + "epoch": 5.52, + "learning_rate": 2.2721179624664882e-05, + "loss": 0.1964, + "step": 2085 + }, + { + "epoch": 5.52, + "learning_rate": 2.2707774798927614e-05, + "loss": 0.0083, + "step": 2086 + }, + { + "epoch": 5.52, + "learning_rate": 2.269436997319035e-05, + "loss": 0.101, + "step": 2087 + }, + { + "epoch": 5.52, + "learning_rate": 2.2680965147453084e-05, + "loss": 0.0094, + "step": 2088 + }, + { + "epoch": 5.53, + "learning_rate": 2.266756032171582e-05, + "loss": 0.1433, + "step": 2089 + }, + { + "epoch": 5.53, + "learning_rate": 2.2654155495978554e-05, + "loss": 0.0091, + "step": 2090 + }, + { + "epoch": 5.53, + "learning_rate": 2.2640750670241286e-05, + "loss": 0.0018, + "step": 2091 + }, + { + "epoch": 5.53, + "learning_rate": 2.262734584450402e-05, + "loss": 0.001, + "step": 2092 + }, + { + "epoch": 5.54, + "learning_rate": 2.2613941018766756e-05, + "loss": 0.3507, + "step": 2093 + }, + { + "epoch": 5.54, + "learning_rate": 2.260053619302949e-05, + "loss": 0.0527, + "step": 2094 + }, + { + "epoch": 5.54, + "learning_rate": 2.2587131367292226e-05, + "loss": 0.0015, + "step": 2095 + }, + { + "epoch": 5.54, + "learning_rate": 2.2573726541554958e-05, + "loss": 0.0195, + "step": 2096 + }, + { + "epoch": 5.55, + "learning_rate": 2.2560321715817693e-05, + "loss": 0.0007, + "step": 2097 + }, + { + "epoch": 5.55, + "learning_rate": 2.254691689008043e-05, + "loss": 0.001, + "step": 2098 + }, + { + "epoch": 5.55, + "learning_rate": 2.2533512064343164e-05, + "loss": 0.2777, + "step": 2099 + }, + { + "epoch": 5.56, + "learning_rate": 2.25201072386059e-05, + "loss": 0.0657, + "step": 2100 + }, + { + "epoch": 5.56, + "learning_rate": 2.2506702412868634e-05, + "loss": 0.159, + "step": 2101 + }, + { + "epoch": 5.56, + "learning_rate": 2.249329758713137e-05, + "loss": 0.0342, + "step": 2102 + }, + { + "epoch": 5.56, + "learning_rate": 2.2479892761394104e-05, + "loss": 0.3001, + "step": 2103 + }, + { + "epoch": 5.57, + "learning_rate": 2.246648793565684e-05, + "loss": 0.0028, + "step": 2104 + }, + { + "epoch": 5.57, + "learning_rate": 2.2453083109919574e-05, + "loss": 0.0191, + "step": 2105 + }, + { + "epoch": 5.57, + "learning_rate": 2.2439678284182306e-05, + "loss": 0.0012, + "step": 2106 + }, + { + "epoch": 5.57, + "learning_rate": 2.242627345844504e-05, + "loss": 0.2619, + "step": 2107 + }, + { + "epoch": 5.58, + "learning_rate": 2.2412868632707776e-05, + "loss": 0.001, + "step": 2108 + }, + { + "epoch": 5.58, + "learning_rate": 2.239946380697051e-05, + "loss": 0.094, + "step": 2109 + }, + { + "epoch": 5.58, + "learning_rate": 2.2386058981233247e-05, + "loss": 0.003, + "step": 2110 + }, + { + "epoch": 5.58, + "learning_rate": 2.237265415549598e-05, + "loss": 0.0528, + "step": 2111 + }, + { + "epoch": 5.59, + "learning_rate": 2.2359249329758714e-05, + "loss": 0.1252, + "step": 2112 + }, + { + "epoch": 5.59, + "learning_rate": 2.234584450402145e-05, + "loss": 0.0039, + "step": 2113 + }, + { + "epoch": 5.59, + "learning_rate": 2.2332439678284184e-05, + "loss": 0.0913, + "step": 2114 + }, + { + "epoch": 5.6, + "learning_rate": 2.231903485254692e-05, + "loss": 0.0023, + "step": 2115 + }, + { + "epoch": 5.6, + "learning_rate": 2.230563002680965e-05, + "loss": 0.0047, + "step": 2116 + }, + { + "epoch": 5.6, + "learning_rate": 2.2292225201072386e-05, + "loss": 0.0688, + "step": 2117 + }, + { + "epoch": 5.6, + "learning_rate": 2.227882037533512e-05, + "loss": 0.0013, + "step": 2118 + }, + { + "epoch": 5.61, + "learning_rate": 2.2265415549597856e-05, + "loss": 0.0012, + "step": 2119 + }, + { + "epoch": 5.61, + "learning_rate": 2.225201072386059e-05, + "loss": 0.0048, + "step": 2120 + }, + { + "epoch": 5.61, + "learning_rate": 2.2238605898123323e-05, + "loss": 0.0011, + "step": 2121 + }, + { + "epoch": 5.61, + "learning_rate": 2.2225201072386058e-05, + "loss": 0.0009, + "step": 2122 + }, + { + "epoch": 5.62, + "learning_rate": 2.2211796246648793e-05, + "loss": 0.0015, + "step": 2123 + }, + { + "epoch": 5.62, + "learning_rate": 2.2198391420911528e-05, + "loss": 0.0077, + "step": 2124 + }, + { + "epoch": 5.62, + "learning_rate": 2.2184986595174263e-05, + "loss": 0.0007, + "step": 2125 + }, + { + "epoch": 5.62, + "learning_rate": 2.2171581769437e-05, + "loss": 0.0007, + "step": 2126 + }, + { + "epoch": 5.63, + "learning_rate": 2.2158176943699734e-05, + "loss": 0.0506, + "step": 2127 + }, + { + "epoch": 5.63, + "learning_rate": 2.214477211796247e-05, + "loss": 0.0016, + "step": 2128 + }, + { + "epoch": 5.63, + "learning_rate": 2.2131367292225204e-05, + "loss": 0.0005, + "step": 2129 + }, + { + "epoch": 5.63, + "learning_rate": 2.211796246648794e-05, + "loss": 0.0049, + "step": 2130 + }, + { + "epoch": 5.64, + "learning_rate": 2.210455764075067e-05, + "loss": 0.0305, + "step": 2131 + }, + { + "epoch": 5.64, + "learning_rate": 2.2091152815013406e-05, + "loss": 0.0448, + "step": 2132 + }, + { + "epoch": 5.64, + "learning_rate": 2.207774798927614e-05, + "loss": 0.5391, + "step": 2133 + }, + { + "epoch": 5.65, + "learning_rate": 2.2064343163538876e-05, + "loss": 0.0005, + "step": 2134 + }, + { + "epoch": 5.65, + "learning_rate": 2.205093833780161e-05, + "loss": 0.0141, + "step": 2135 + }, + { + "epoch": 5.65, + "learning_rate": 2.2037533512064346e-05, + "loss": 0.3613, + "step": 2136 + }, + { + "epoch": 5.65, + "learning_rate": 2.2024128686327078e-05, + "loss": 0.0013, + "step": 2137 + }, + { + "epoch": 5.66, + "learning_rate": 2.2010723860589813e-05, + "loss": 0.002, + "step": 2138 + }, + { + "epoch": 5.66, + "learning_rate": 2.199731903485255e-05, + "loss": 0.0207, + "step": 2139 + }, + { + "epoch": 5.66, + "learning_rate": 2.1983914209115284e-05, + "loss": 0.0004, + "step": 2140 + }, + { + "epoch": 5.66, + "learning_rate": 2.197050938337802e-05, + "loss": 0.0022, + "step": 2141 + }, + { + "epoch": 5.67, + "learning_rate": 2.195710455764075e-05, + "loss": 0.5076, + "step": 2142 + }, + { + "epoch": 5.67, + "learning_rate": 2.1943699731903486e-05, + "loss": 0.0016, + "step": 2143 + }, + { + "epoch": 5.67, + "learning_rate": 2.193029490616622e-05, + "loss": 0.0014, + "step": 2144 + }, + { + "epoch": 5.67, + "learning_rate": 2.1916890080428956e-05, + "loss": 0.0101, + "step": 2145 + }, + { + "epoch": 5.68, + "learning_rate": 2.190348525469169e-05, + "loss": 0.0048, + "step": 2146 + }, + { + "epoch": 5.68, + "learning_rate": 2.1890080428954423e-05, + "loss": 0.001, + "step": 2147 + }, + { + "epoch": 5.68, + "learning_rate": 2.1876675603217158e-05, + "loss": 0.0004, + "step": 2148 + }, + { + "epoch": 5.69, + "learning_rate": 2.1863270777479893e-05, + "loss": 0.2627, + "step": 2149 + }, + { + "epoch": 5.69, + "learning_rate": 2.1849865951742628e-05, + "loss": 0.0013, + "step": 2150 + }, + { + "epoch": 5.69, + "learning_rate": 2.1836461126005363e-05, + "loss": 0.0074, + "step": 2151 + }, + { + "epoch": 5.69, + "learning_rate": 2.1823056300268095e-05, + "loss": 0.0238, + "step": 2152 + }, + { + "epoch": 5.7, + "learning_rate": 2.180965147453083e-05, + "loss": 0.0013, + "step": 2153 + }, + { + "epoch": 5.7, + "learning_rate": 2.1796246648793565e-05, + "loss": 0.0005, + "step": 2154 + }, + { + "epoch": 5.7, + "learning_rate": 2.17828418230563e-05, + "loss": 0.0006, + "step": 2155 + }, + { + "epoch": 5.7, + "learning_rate": 2.1769436997319036e-05, + "loss": 0.0442, + "step": 2156 + }, + { + "epoch": 5.71, + "learning_rate": 2.175603217158177e-05, + "loss": 0.0007, + "step": 2157 + }, + { + "epoch": 5.71, + "learning_rate": 2.1742627345844506e-05, + "loss": 0.0032, + "step": 2158 + }, + { + "epoch": 5.71, + "learning_rate": 2.172922252010724e-05, + "loss": 0.005, + "step": 2159 + }, + { + "epoch": 5.71, + "learning_rate": 2.1715817694369976e-05, + "loss": 0.0005, + "step": 2160 + }, + { + "epoch": 5.72, + "learning_rate": 2.170241286863271e-05, + "loss": 0.0005, + "step": 2161 + }, + { + "epoch": 5.72, + "learning_rate": 2.1689008042895443e-05, + "loss": 0.2023, + "step": 2162 + }, + { + "epoch": 5.72, + "learning_rate": 2.1675603217158178e-05, + "loss": 0.3146, + "step": 2163 + }, + { + "epoch": 5.72, + "learning_rate": 2.1662198391420913e-05, + "loss": 0.0035, + "step": 2164 + }, + { + "epoch": 5.73, + "learning_rate": 2.164879356568365e-05, + "loss": 0.141, + "step": 2165 + }, + { + "epoch": 5.73, + "learning_rate": 2.1635388739946383e-05, + "loss": 0.0005, + "step": 2166 + }, + { + "epoch": 5.73, + "learning_rate": 2.1621983914209115e-05, + "loss": 0.0013, + "step": 2167 + }, + { + "epoch": 5.74, + "learning_rate": 2.160857908847185e-05, + "loss": 0.0484, + "step": 2168 + }, + { + "epoch": 5.74, + "learning_rate": 2.1595174262734585e-05, + "loss": 0.0109, + "step": 2169 + }, + { + "epoch": 5.74, + "learning_rate": 2.158176943699732e-05, + "loss": 0.3307, + "step": 2170 + }, + { + "epoch": 5.74, + "learning_rate": 2.1568364611260056e-05, + "loss": 0.0013, + "step": 2171 + }, + { + "epoch": 5.75, + "learning_rate": 2.1554959785522787e-05, + "loss": 0.0005, + "step": 2172 + }, + { + "epoch": 5.75, + "learning_rate": 2.1541554959785523e-05, + "loss": 0.0004, + "step": 2173 + }, + { + "epoch": 5.75, + "learning_rate": 2.1528150134048258e-05, + "loss": 0.0744, + "step": 2174 + }, + { + "epoch": 5.75, + "learning_rate": 2.1514745308310993e-05, + "loss": 0.0008, + "step": 2175 + }, + { + "epoch": 5.76, + "learning_rate": 2.1501340482573728e-05, + "loss": 0.0013, + "step": 2176 + }, + { + "epoch": 5.76, + "learning_rate": 2.148793565683646e-05, + "loss": 0.0006, + "step": 2177 + }, + { + "epoch": 5.76, + "learning_rate": 2.1474530831099195e-05, + "loss": 0.055, + "step": 2178 + }, + { + "epoch": 5.76, + "learning_rate": 2.146112600536193e-05, + "loss": 0.0013, + "step": 2179 + }, + { + "epoch": 5.77, + "learning_rate": 2.1447721179624665e-05, + "loss": 0.0022, + "step": 2180 + }, + { + "epoch": 5.77, + "learning_rate": 2.14343163538874e-05, + "loss": 0.0009, + "step": 2181 + }, + { + "epoch": 5.77, + "learning_rate": 2.1420911528150135e-05, + "loss": 0.0011, + "step": 2182 + }, + { + "epoch": 5.78, + "learning_rate": 2.140750670241287e-05, + "loss": 0.0676, + "step": 2183 + }, + { + "epoch": 5.78, + "learning_rate": 2.1394101876675606e-05, + "loss": 0.4583, + "step": 2184 + }, + { + "epoch": 5.78, + "learning_rate": 2.138069705093834e-05, + "loss": 0.0009, + "step": 2185 + }, + { + "epoch": 5.78, + "learning_rate": 2.1367292225201076e-05, + "loss": 0.002, + "step": 2186 + }, + { + "epoch": 5.79, + "learning_rate": 2.1353887399463808e-05, + "loss": 0.7625, + "step": 2187 + }, + { + "epoch": 5.79, + "learning_rate": 2.1340482573726543e-05, + "loss": 0.4912, + "step": 2188 + }, + { + "epoch": 5.79, + "learning_rate": 2.1327077747989278e-05, + "loss": 0.0283, + "step": 2189 + }, + { + "epoch": 5.79, + "learning_rate": 2.1313672922252013e-05, + "loss": 0.0011, + "step": 2190 + }, + { + "epoch": 5.8, + "learning_rate": 2.1300268096514748e-05, + "loss": 0.0013, + "step": 2191 + }, + { + "epoch": 5.8, + "learning_rate": 2.128686327077748e-05, + "loss": 0.3384, + "step": 2192 + }, + { + "epoch": 5.8, + "learning_rate": 2.1273458445040215e-05, + "loss": 0.4533, + "step": 2193 + }, + { + "epoch": 5.8, + "learning_rate": 2.126005361930295e-05, + "loss": 0.0039, + "step": 2194 + }, + { + "epoch": 5.81, + "learning_rate": 2.1246648793565685e-05, + "loss": 0.0038, + "step": 2195 + }, + { + "epoch": 5.81, + "learning_rate": 2.123324396782842e-05, + "loss": 0.0318, + "step": 2196 + }, + { + "epoch": 5.81, + "learning_rate": 2.1219839142091156e-05, + "loss": 0.0045, + "step": 2197 + }, + { + "epoch": 5.81, + "learning_rate": 2.1206434316353887e-05, + "loss": 0.3134, + "step": 2198 + }, + { + "epoch": 5.82, + "learning_rate": 2.1193029490616622e-05, + "loss": 0.0011, + "step": 2199 + }, + { + "epoch": 5.82, + "learning_rate": 2.1179624664879358e-05, + "loss": 0.0008, + "step": 2200 + }, + { + "epoch": 5.82, + "learning_rate": 2.1166219839142093e-05, + "loss": 0.0009, + "step": 2201 + }, + { + "epoch": 5.83, + "learning_rate": 2.1152815013404828e-05, + "loss": 0.0083, + "step": 2202 + }, + { + "epoch": 5.83, + "learning_rate": 2.113941018766756e-05, + "loss": 0.0196, + "step": 2203 + }, + { + "epoch": 5.83, + "learning_rate": 2.1126005361930295e-05, + "loss": 0.0063, + "step": 2204 + }, + { + "epoch": 5.83, + "learning_rate": 2.111260053619303e-05, + "loss": 0.0064, + "step": 2205 + }, + { + "epoch": 5.84, + "learning_rate": 2.1099195710455765e-05, + "loss": 0.0143, + "step": 2206 + }, + { + "epoch": 5.84, + "learning_rate": 2.10857908847185e-05, + "loss": 0.0012, + "step": 2207 + }, + { + "epoch": 5.84, + "learning_rate": 2.1072386058981232e-05, + "loss": 0.0033, + "step": 2208 + }, + { + "epoch": 5.84, + "learning_rate": 2.1058981233243967e-05, + "loss": 0.0014, + "step": 2209 + }, + { + "epoch": 5.85, + "learning_rate": 2.1045576407506702e-05, + "loss": 0.0219, + "step": 2210 + }, + { + "epoch": 5.85, + "learning_rate": 2.1032171581769437e-05, + "loss": 0.3033, + "step": 2211 + }, + { + "epoch": 5.85, + "learning_rate": 2.1018766756032172e-05, + "loss": 0.0711, + "step": 2212 + }, + { + "epoch": 5.85, + "learning_rate": 2.1005361930294907e-05, + "loss": 0.0051, + "step": 2213 + }, + { + "epoch": 5.86, + "learning_rate": 2.0991957104557643e-05, + "loss": 0.026, + "step": 2214 + }, + { + "epoch": 5.86, + "learning_rate": 2.0978552278820378e-05, + "loss": 0.0024, + "step": 2215 + }, + { + "epoch": 5.86, + "learning_rate": 2.0965147453083113e-05, + "loss": 0.3622, + "step": 2216 + }, + { + "epoch": 5.87, + "learning_rate": 2.0951742627345848e-05, + "loss": 0.0009, + "step": 2217 + }, + { + "epoch": 5.87, + "learning_rate": 2.093833780160858e-05, + "loss": 0.0019, + "step": 2218 + }, + { + "epoch": 5.87, + "learning_rate": 2.0924932975871315e-05, + "loss": 0.3412, + "step": 2219 + }, + { + "epoch": 5.87, + "learning_rate": 2.091152815013405e-05, + "loss": 0.0197, + "step": 2220 + }, + { + "epoch": 5.88, + "learning_rate": 2.0898123324396785e-05, + "loss": 0.4229, + "step": 2221 + }, + { + "epoch": 5.88, + "learning_rate": 2.088471849865952e-05, + "loss": 0.0014, + "step": 2222 + }, + { + "epoch": 5.88, + "learning_rate": 2.0871313672922252e-05, + "loss": 0.0183, + "step": 2223 + }, + { + "epoch": 5.88, + "learning_rate": 2.0857908847184987e-05, + "loss": 0.2005, + "step": 2224 + }, + { + "epoch": 5.89, + "learning_rate": 2.0844504021447722e-05, + "loss": 0.0122, + "step": 2225 + }, + { + "epoch": 5.89, + "learning_rate": 2.0831099195710457e-05, + "loss": 0.1178, + "step": 2226 + }, + { + "epoch": 5.89, + "learning_rate": 2.0817694369973193e-05, + "loss": 0.0105, + "step": 2227 + }, + { + "epoch": 5.89, + "learning_rate": 2.0804289544235924e-05, + "loss": 0.0328, + "step": 2228 + }, + { + "epoch": 5.9, + "learning_rate": 2.079088471849866e-05, + "loss": 0.0087, + "step": 2229 + }, + { + "epoch": 5.9, + "learning_rate": 2.0777479892761395e-05, + "loss": 0.0288, + "step": 2230 + }, + { + "epoch": 5.9, + "learning_rate": 2.076407506702413e-05, + "loss": 0.0017, + "step": 2231 + }, + { + "epoch": 5.9, + "learning_rate": 2.0750670241286865e-05, + "loss": 0.002, + "step": 2232 + }, + { + "epoch": 5.91, + "learning_rate": 2.0737265415549597e-05, + "loss": 0.0024, + "step": 2233 + }, + { + "epoch": 5.91, + "learning_rate": 2.072386058981233e-05, + "loss": 0.0008, + "step": 2234 + }, + { + "epoch": 5.91, + "learning_rate": 2.0710455764075067e-05, + "loss": 0.002, + "step": 2235 + }, + { + "epoch": 5.92, + "learning_rate": 2.0697050938337802e-05, + "loss": 0.0052, + "step": 2236 + }, + { + "epoch": 5.92, + "learning_rate": 2.0683646112600537e-05, + "loss": 0.0058, + "step": 2237 + }, + { + "epoch": 5.92, + "learning_rate": 2.0670241286863272e-05, + "loss": 0.0013, + "step": 2238 + }, + { + "epoch": 5.92, + "learning_rate": 2.0656836461126007e-05, + "loss": 0.0158, + "step": 2239 + }, + { + "epoch": 5.93, + "learning_rate": 2.0643431635388742e-05, + "loss": 0.229, + "step": 2240 + }, + { + "epoch": 5.93, + "learning_rate": 2.0630026809651478e-05, + "loss": 0.1844, + "step": 2241 + }, + { + "epoch": 5.93, + "learning_rate": 2.0616621983914213e-05, + "loss": 0.2905, + "step": 2242 + }, + { + "epoch": 5.93, + "learning_rate": 2.0603217158176944e-05, + "loss": 0.0059, + "step": 2243 + }, + { + "epoch": 5.94, + "learning_rate": 2.058981233243968e-05, + "loss": 0.0007, + "step": 2244 + }, + { + "epoch": 5.94, + "learning_rate": 2.0576407506702415e-05, + "loss": 0.1638, + "step": 2245 + }, + { + "epoch": 5.94, + "learning_rate": 2.056300268096515e-05, + "loss": 0.1195, + "step": 2246 + }, + { + "epoch": 5.94, + "learning_rate": 2.0549597855227885e-05, + "loss": 0.0015, + "step": 2247 + }, + { + "epoch": 5.95, + "learning_rate": 2.0536193029490617e-05, + "loss": 0.0013, + "step": 2248 + }, + { + "epoch": 5.95, + "learning_rate": 2.0522788203753352e-05, + "loss": 0.5152, + "step": 2249 + }, + { + "epoch": 5.95, + "learning_rate": 2.0509383378016087e-05, + "loss": 0.0315, + "step": 2250 + }, + { + "epoch": 5.96, + "learning_rate": 2.0495978552278822e-05, + "loss": 0.1213, + "step": 2251 + }, + { + "epoch": 5.96, + "learning_rate": 2.0482573726541557e-05, + "loss": 0.0006, + "step": 2252 + }, + { + "epoch": 5.96, + "learning_rate": 2.046916890080429e-05, + "loss": 0.0011, + "step": 2253 + }, + { + "epoch": 5.96, + "learning_rate": 2.0455764075067024e-05, + "loss": 0.2546, + "step": 2254 + }, + { + "epoch": 5.97, + "learning_rate": 2.044235924932976e-05, + "loss": 0.1259, + "step": 2255 + }, + { + "epoch": 5.97, + "learning_rate": 2.0428954423592494e-05, + "loss": 0.0179, + "step": 2256 + }, + { + "epoch": 5.97, + "learning_rate": 2.041554959785523e-05, + "loss": 0.0257, + "step": 2257 + }, + { + "epoch": 5.97, + "learning_rate": 2.0402144772117965e-05, + "loss": 0.0092, + "step": 2258 + }, + { + "epoch": 5.98, + "learning_rate": 2.0388739946380696e-05, + "loss": 0.3231, + "step": 2259 + }, + { + "epoch": 5.98, + "learning_rate": 2.037533512064343e-05, + "loss": 0.0084, + "step": 2260 + }, + { + "epoch": 5.98, + "learning_rate": 2.0361930294906167e-05, + "loss": 0.0517, + "step": 2261 + }, + { + "epoch": 5.98, + "learning_rate": 2.0348525469168902e-05, + "loss": 0.0012, + "step": 2262 + }, + { + "epoch": 5.99, + "learning_rate": 2.0335120643431637e-05, + "loss": 0.0045, + "step": 2263 + }, + { + "epoch": 5.99, + "learning_rate": 2.032171581769437e-05, + "loss": 0.0012, + "step": 2264 + }, + { + "epoch": 5.99, + "learning_rate": 2.0308310991957104e-05, + "loss": 0.0032, + "step": 2265 + }, + { + "epoch": 5.99, + "learning_rate": 2.029490616621984e-05, + "loss": 0.0038, + "step": 2266 + }, + { + "epoch": 6.0, + "learning_rate": 2.0281501340482574e-05, + "loss": 0.2731, + "step": 2267 + }, + { + "epoch": 6.0, + "learning_rate": 2.026809651474531e-05, + "loss": 0.3122, + "step": 2268 + }, + { + "epoch": 6.0, + "eval_f1": 0.7762762762762763, + "eval_loss": 1.0977023839950562, + "eval_runtime": 1.8624, + "eval_samples_per_second": 812.383, + "eval_steps_per_second": 51.009, + "step": 2268 + }, + { + "epoch": 6.0, + "learning_rate": 2.0254691689008044e-05, + "loss": 0.0035, + "step": 2269 + }, + { + "epoch": 6.01, + "learning_rate": 2.024128686327078e-05, + "loss": 0.0709, + "step": 2270 + }, + { + "epoch": 6.01, + "learning_rate": 2.0227882037533515e-05, + "loss": 0.1548, + "step": 2271 + }, + { + "epoch": 6.01, + "learning_rate": 2.021447721179625e-05, + "loss": 0.0035, + "step": 2272 + }, + { + "epoch": 6.01, + "learning_rate": 2.0201072386058985e-05, + "loss": 0.0009, + "step": 2273 + }, + { + "epoch": 6.02, + "learning_rate": 2.0187667560321717e-05, + "loss": 0.0061, + "step": 2274 + }, + { + "epoch": 6.02, + "learning_rate": 2.0174262734584452e-05, + "loss": 0.3586, + "step": 2275 + }, + { + "epoch": 6.02, + "learning_rate": 2.0160857908847187e-05, + "loss": 0.0019, + "step": 2276 + }, + { + "epoch": 6.02, + "learning_rate": 2.0147453083109922e-05, + "loss": 0.0077, + "step": 2277 + }, + { + "epoch": 6.03, + "learning_rate": 2.0134048257372657e-05, + "loss": 0.0022, + "step": 2278 + }, + { + "epoch": 6.03, + "learning_rate": 2.012064343163539e-05, + "loss": 0.0021, + "step": 2279 + }, + { + "epoch": 6.03, + "learning_rate": 2.0107238605898124e-05, + "loss": 0.0022, + "step": 2280 + }, + { + "epoch": 6.03, + "learning_rate": 2.009383378016086e-05, + "loss": 0.1387, + "step": 2281 + }, + { + "epoch": 6.04, + "learning_rate": 2.0080428954423594e-05, + "loss": 0.0034, + "step": 2282 + }, + { + "epoch": 6.04, + "learning_rate": 2.006702412868633e-05, + "loss": 0.0128, + "step": 2283 + }, + { + "epoch": 6.04, + "learning_rate": 2.005361930294906e-05, + "loss": 0.0201, + "step": 2284 + }, + { + "epoch": 6.04, + "learning_rate": 2.0040214477211796e-05, + "loss": 0.0072, + "step": 2285 + }, + { + "epoch": 6.05, + "learning_rate": 2.002680965147453e-05, + "loss": 0.0031, + "step": 2286 + }, + { + "epoch": 6.05, + "learning_rate": 2.0013404825737267e-05, + "loss": 0.0273, + "step": 2287 + }, + { + "epoch": 6.05, + "learning_rate": 2e-05, + "loss": 0.0679, + "step": 2288 + }, + { + "epoch": 6.06, + "learning_rate": 1.9986595174262733e-05, + "loss": 0.0012, + "step": 2289 + }, + { + "epoch": 6.06, + "learning_rate": 1.997319034852547e-05, + "loss": 0.0045, + "step": 2290 + }, + { + "epoch": 6.06, + "learning_rate": 1.9959785522788204e-05, + "loss": 0.0289, + "step": 2291 + }, + { + "epoch": 6.06, + "learning_rate": 1.994638069705094e-05, + "loss": 0.1421, + "step": 2292 + }, + { + "epoch": 6.07, + "learning_rate": 1.9932975871313674e-05, + "loss": 0.0007, + "step": 2293 + }, + { + "epoch": 6.07, + "learning_rate": 1.9919571045576406e-05, + "loss": 0.0037, + "step": 2294 + }, + { + "epoch": 6.07, + "learning_rate": 1.990616621983914e-05, + "loss": 0.1566, + "step": 2295 + }, + { + "epoch": 6.07, + "learning_rate": 1.9892761394101876e-05, + "loss": 0.0008, + "step": 2296 + }, + { + "epoch": 6.08, + "learning_rate": 1.987935656836461e-05, + "loss": 0.0008, + "step": 2297 + }, + { + "epoch": 6.08, + "learning_rate": 1.9865951742627346e-05, + "loss": 0.1319, + "step": 2298 + }, + { + "epoch": 6.08, + "learning_rate": 1.985254691689008e-05, + "loss": 0.0007, + "step": 2299 + }, + { + "epoch": 6.08, + "learning_rate": 1.9839142091152816e-05, + "loss": 0.0048, + "step": 2300 + }, + { + "epoch": 6.09, + "learning_rate": 1.982573726541555e-05, + "loss": 0.223, + "step": 2301 + }, + { + "epoch": 6.09, + "learning_rate": 1.9812332439678287e-05, + "loss": 0.2188, + "step": 2302 + }, + { + "epoch": 6.09, + "learning_rate": 1.9798927613941022e-05, + "loss": 0.0209, + "step": 2303 + }, + { + "epoch": 6.1, + "learning_rate": 1.9785522788203754e-05, + "loss": 0.001, + "step": 2304 + }, + { + "epoch": 6.1, + "learning_rate": 1.977211796246649e-05, + "loss": 0.0006, + "step": 2305 + }, + { + "epoch": 6.1, + "learning_rate": 1.9758713136729224e-05, + "loss": 0.0011, + "step": 2306 + }, + { + "epoch": 6.1, + "learning_rate": 1.974530831099196e-05, + "loss": 0.0008, + "step": 2307 + }, + { + "epoch": 6.11, + "learning_rate": 1.9731903485254694e-05, + "loss": 0.0015, + "step": 2308 + }, + { + "epoch": 6.11, + "learning_rate": 1.9718498659517426e-05, + "loss": 0.0024, + "step": 2309 + }, + { + "epoch": 6.11, + "learning_rate": 1.970509383378016e-05, + "loss": 0.0006, + "step": 2310 + }, + { + "epoch": 6.11, + "learning_rate": 1.9691689008042896e-05, + "loss": 0.0007, + "step": 2311 + }, + { + "epoch": 6.12, + "learning_rate": 1.967828418230563e-05, + "loss": 0.0026, + "step": 2312 + }, + { + "epoch": 6.12, + "learning_rate": 1.9664879356568366e-05, + "loss": 0.0019, + "step": 2313 + }, + { + "epoch": 6.12, + "learning_rate": 1.9651474530831098e-05, + "loss": 0.0055, + "step": 2314 + }, + { + "epoch": 6.12, + "learning_rate": 1.9638069705093833e-05, + "loss": 0.0023, + "step": 2315 + }, + { + "epoch": 6.13, + "learning_rate": 1.962466487935657e-05, + "loss": 0.0011, + "step": 2316 + }, + { + "epoch": 6.13, + "learning_rate": 1.9611260053619303e-05, + "loss": 0.0407, + "step": 2317 + }, + { + "epoch": 6.13, + "learning_rate": 1.959785522788204e-05, + "loss": 0.046, + "step": 2318 + }, + { + "epoch": 6.13, + "learning_rate": 1.958445040214477e-05, + "loss": 0.0062, + "step": 2319 + }, + { + "epoch": 6.14, + "learning_rate": 1.9571045576407505e-05, + "loss": 0.0007, + "step": 2320 + }, + { + "epoch": 6.14, + "learning_rate": 1.955764075067024e-05, + "loss": 0.3377, + "step": 2321 + }, + { + "epoch": 6.14, + "learning_rate": 1.9544235924932976e-05, + "loss": 0.4294, + "step": 2322 + }, + { + "epoch": 6.15, + "learning_rate": 1.953083109919571e-05, + "loss": 0.0016, + "step": 2323 + }, + { + "epoch": 6.15, + "learning_rate": 1.9517426273458446e-05, + "loss": 0.0283, + "step": 2324 + }, + { + "epoch": 6.15, + "learning_rate": 1.950402144772118e-05, + "loss": 0.0005, + "step": 2325 + }, + { + "epoch": 6.15, + "learning_rate": 1.9490616621983916e-05, + "loss": 0.0011, + "step": 2326 + }, + { + "epoch": 6.16, + "learning_rate": 1.947721179624665e-05, + "loss": 0.0237, + "step": 2327 + }, + { + "epoch": 6.16, + "learning_rate": 1.9463806970509387e-05, + "loss": 0.0581, + "step": 2328 + }, + { + "epoch": 6.16, + "learning_rate": 1.945040214477212e-05, + "loss": 0.0905, + "step": 2329 + }, + { + "epoch": 6.16, + "learning_rate": 1.9436997319034853e-05, + "loss": 0.0012, + "step": 2330 + }, + { + "epoch": 6.17, + "learning_rate": 1.942359249329759e-05, + "loss": 0.0053, + "step": 2331 + }, + { + "epoch": 6.17, + "learning_rate": 1.9410187667560324e-05, + "loss": 0.0225, + "step": 2332 + }, + { + "epoch": 6.17, + "learning_rate": 1.939678284182306e-05, + "loss": 0.0374, + "step": 2333 + }, + { + "epoch": 6.17, + "learning_rate": 1.9383378016085794e-05, + "loss": 0.0006, + "step": 2334 + }, + { + "epoch": 6.18, + "learning_rate": 1.9369973190348526e-05, + "loss": 0.0008, + "step": 2335 + }, + { + "epoch": 6.18, + "learning_rate": 1.935656836461126e-05, + "loss": 0.0006, + "step": 2336 + }, + { + "epoch": 6.18, + "learning_rate": 1.9343163538873996e-05, + "loss": 0.0011, + "step": 2337 + }, + { + "epoch": 6.19, + "learning_rate": 1.932975871313673e-05, + "loss": 0.0014, + "step": 2338 + }, + { + "epoch": 6.19, + "learning_rate": 1.9316353887399466e-05, + "loss": 0.0006, + "step": 2339 + }, + { + "epoch": 6.19, + "learning_rate": 1.9302949061662198e-05, + "loss": 0.0504, + "step": 2340 + }, + { + "epoch": 6.19, + "learning_rate": 1.9289544235924933e-05, + "loss": 0.005, + "step": 2341 + }, + { + "epoch": 6.2, + "learning_rate": 1.9276139410187668e-05, + "loss": 0.2673, + "step": 2342 + }, + { + "epoch": 6.2, + "learning_rate": 1.9262734584450403e-05, + "loss": 0.2173, + "step": 2343 + }, + { + "epoch": 6.2, + "learning_rate": 1.924932975871314e-05, + "loss": 0.0235, + "step": 2344 + }, + { + "epoch": 6.2, + "learning_rate": 1.923592493297587e-05, + "loss": 0.0092, + "step": 2345 + }, + { + "epoch": 6.21, + "learning_rate": 1.9222520107238605e-05, + "loss": 0.0088, + "step": 2346 + }, + { + "epoch": 6.21, + "learning_rate": 1.920911528150134e-05, + "loss": 0.0506, + "step": 2347 + }, + { + "epoch": 6.21, + "learning_rate": 1.9195710455764076e-05, + "loss": 0.0033, + "step": 2348 + }, + { + "epoch": 6.21, + "learning_rate": 1.918230563002681e-05, + "loss": 0.0006, + "step": 2349 + }, + { + "epoch": 6.22, + "learning_rate": 1.9168900804289542e-05, + "loss": 0.0021, + "step": 2350 + }, + { + "epoch": 6.22, + "learning_rate": 1.9155495978552278e-05, + "loss": 0.0006, + "step": 2351 + }, + { + "epoch": 6.22, + "learning_rate": 1.9142091152815013e-05, + "loss": 0.0041, + "step": 2352 + }, + { + "epoch": 6.22, + "learning_rate": 1.9128686327077748e-05, + "loss": 0.0209, + "step": 2353 + }, + { + "epoch": 6.23, + "learning_rate": 1.9115281501340483e-05, + "loss": 0.0145, + "step": 2354 + }, + { + "epoch": 6.23, + "learning_rate": 1.9101876675603218e-05, + "loss": 0.0303, + "step": 2355 + }, + { + "epoch": 6.23, + "learning_rate": 1.9088471849865953e-05, + "loss": 0.0951, + "step": 2356 + }, + { + "epoch": 6.24, + "learning_rate": 1.907506702412869e-05, + "loss": 0.042, + "step": 2357 + }, + { + "epoch": 6.24, + "learning_rate": 1.9061662198391424e-05, + "loss": 0.0009, + "step": 2358 + }, + { + "epoch": 6.24, + "learning_rate": 1.904825737265416e-05, + "loss": 0.0006, + "step": 2359 + }, + { + "epoch": 6.24, + "learning_rate": 1.903485254691689e-05, + "loss": 0.0057, + "step": 2360 + }, + { + "epoch": 6.25, + "learning_rate": 1.9021447721179626e-05, + "loss": 0.0578, + "step": 2361 + }, + { + "epoch": 6.25, + "learning_rate": 1.900804289544236e-05, + "loss": 0.3295, + "step": 2362 + }, + { + "epoch": 6.25, + "learning_rate": 1.8994638069705096e-05, + "loss": 0.0005, + "step": 2363 + }, + { + "epoch": 6.25, + "learning_rate": 1.898123324396783e-05, + "loss": 0.0009, + "step": 2364 + }, + { + "epoch": 6.26, + "learning_rate": 1.8967828418230563e-05, + "loss": 0.0036, + "step": 2365 + }, + { + "epoch": 6.26, + "learning_rate": 1.8954423592493298e-05, + "loss": 0.0029, + "step": 2366 + }, + { + "epoch": 6.26, + "learning_rate": 1.8941018766756033e-05, + "loss": 0.1468, + "step": 2367 + }, + { + "epoch": 6.26, + "learning_rate": 1.8927613941018768e-05, + "loss": 0.3163, + "step": 2368 + }, + { + "epoch": 6.27, + "learning_rate": 1.8914209115281503e-05, + "loss": 0.169, + "step": 2369 + }, + { + "epoch": 6.27, + "learning_rate": 1.8900804289544235e-05, + "loss": 0.0493, + "step": 2370 + }, + { + "epoch": 6.27, + "learning_rate": 1.888739946380697e-05, + "loss": 0.0003, + "step": 2371 + }, + { + "epoch": 6.28, + "learning_rate": 1.8873994638069705e-05, + "loss": 0.0015, + "step": 2372 + }, + { + "epoch": 6.28, + "learning_rate": 1.886058981233244e-05, + "loss": 0.258, + "step": 2373 + }, + { + "epoch": 6.28, + "learning_rate": 1.8847184986595175e-05, + "loss": 0.002, + "step": 2374 + }, + { + "epoch": 6.28, + "learning_rate": 1.8833780160857907e-05, + "loss": 0.0008, + "step": 2375 + }, + { + "epoch": 6.29, + "learning_rate": 1.8820375335120642e-05, + "loss": 0.0003, + "step": 2376 + }, + { + "epoch": 6.29, + "learning_rate": 1.8806970509383377e-05, + "loss": 0.0014, + "step": 2377 + }, + { + "epoch": 6.29, + "learning_rate": 1.8793565683646113e-05, + "loss": 0.0068, + "step": 2378 + }, + { + "epoch": 6.29, + "learning_rate": 1.8780160857908848e-05, + "loss": 0.39, + "step": 2379 + }, + { + "epoch": 6.3, + "learning_rate": 1.8766756032171583e-05, + "loss": 0.0046, + "step": 2380 + }, + { + "epoch": 6.3, + "learning_rate": 1.8753351206434318e-05, + "loss": 0.0008, + "step": 2381 + }, + { + "epoch": 6.3, + "learning_rate": 1.8739946380697053e-05, + "loss": 0.0007, + "step": 2382 + }, + { + "epoch": 6.3, + "learning_rate": 1.8726541554959788e-05, + "loss": 0.0013, + "step": 2383 + }, + { + "epoch": 6.31, + "learning_rate": 1.8713136729222523e-05, + "loss": 0.0055, + "step": 2384 + }, + { + "epoch": 6.31, + "learning_rate": 1.869973190348526e-05, + "loss": 0.0014, + "step": 2385 + }, + { + "epoch": 6.31, + "learning_rate": 1.868632707774799e-05, + "loss": 0.0039, + "step": 2386 + }, + { + "epoch": 6.31, + "learning_rate": 1.8672922252010725e-05, + "loss": 0.0995, + "step": 2387 + }, + { + "epoch": 6.32, + "learning_rate": 1.865951742627346e-05, + "loss": 0.0015, + "step": 2388 + }, + { + "epoch": 6.32, + "learning_rate": 1.8646112600536196e-05, + "loss": 0.083, + "step": 2389 + }, + { + "epoch": 6.32, + "learning_rate": 1.863270777479893e-05, + "loss": 0.0044, + "step": 2390 + }, + { + "epoch": 6.33, + "learning_rate": 1.8619302949061662e-05, + "loss": 0.0105, + "step": 2391 + }, + { + "epoch": 6.33, + "learning_rate": 1.8605898123324398e-05, + "loss": 0.0005, + "step": 2392 + }, + { + "epoch": 6.33, + "learning_rate": 1.8592493297587133e-05, + "loss": 0.0014, + "step": 2393 + }, + { + "epoch": 6.33, + "learning_rate": 1.8579088471849868e-05, + "loss": 0.0114, + "step": 2394 + }, + { + "epoch": 6.34, + "learning_rate": 1.8565683646112603e-05, + "loss": 0.0334, + "step": 2395 + }, + { + "epoch": 6.34, + "learning_rate": 1.8552278820375335e-05, + "loss": 0.006, + "step": 2396 + }, + { + "epoch": 6.34, + "learning_rate": 1.853887399463807e-05, + "loss": 0.3124, + "step": 2397 + }, + { + "epoch": 6.34, + "learning_rate": 1.8525469168900805e-05, + "loss": 0.2324, + "step": 2398 + }, + { + "epoch": 6.35, + "learning_rate": 1.851206434316354e-05, + "loss": 0.0889, + "step": 2399 + }, + { + "epoch": 6.35, + "learning_rate": 1.8498659517426275e-05, + "loss": 0.0705, + "step": 2400 + }, + { + "epoch": 6.35, + "learning_rate": 1.8485254691689007e-05, + "loss": 0.0012, + "step": 2401 + }, + { + "epoch": 6.35, + "learning_rate": 1.8471849865951742e-05, + "loss": 0.0033, + "step": 2402 + }, + { + "epoch": 6.36, + "learning_rate": 1.8458445040214477e-05, + "loss": 0.0021, + "step": 2403 + }, + { + "epoch": 6.36, + "learning_rate": 1.8445040214477212e-05, + "loss": 0.0005, + "step": 2404 + }, + { + "epoch": 6.36, + "learning_rate": 1.8431635388739948e-05, + "loss": 0.0011, + "step": 2405 + }, + { + "epoch": 6.37, + "learning_rate": 1.841823056300268e-05, + "loss": 0.0006, + "step": 2406 + }, + { + "epoch": 6.37, + "learning_rate": 1.8404825737265414e-05, + "loss": 0.0726, + "step": 2407 + }, + { + "epoch": 6.37, + "learning_rate": 1.839142091152815e-05, + "loss": 0.0009, + "step": 2408 + }, + { + "epoch": 6.37, + "learning_rate": 1.8378016085790885e-05, + "loss": 0.0007, + "step": 2409 + }, + { + "epoch": 6.38, + "learning_rate": 1.836461126005362e-05, + "loss": 0.0859, + "step": 2410 + }, + { + "epoch": 6.38, + "learning_rate": 1.8351206434316355e-05, + "loss": 0.0011, + "step": 2411 + }, + { + "epoch": 6.38, + "learning_rate": 1.833780160857909e-05, + "loss": 0.6542, + "step": 2412 + }, + { + "epoch": 6.38, + "learning_rate": 1.8324396782841825e-05, + "loss": 0.2733, + "step": 2413 + }, + { + "epoch": 6.39, + "learning_rate": 1.831099195710456e-05, + "loss": 0.2825, + "step": 2414 + }, + { + "epoch": 6.39, + "learning_rate": 1.8297587131367295e-05, + "loss": 0.0012, + "step": 2415 + }, + { + "epoch": 6.39, + "learning_rate": 1.8284182305630027e-05, + "loss": 0.1404, + "step": 2416 + }, + { + "epoch": 6.39, + "learning_rate": 1.8270777479892762e-05, + "loss": 0.0006, + "step": 2417 + }, + { + "epoch": 6.4, + "learning_rate": 1.8257372654155497e-05, + "loss": 0.0007, + "step": 2418 + }, + { + "epoch": 6.4, + "learning_rate": 1.8243967828418233e-05, + "loss": 0.1429, + "step": 2419 + }, + { + "epoch": 6.4, + "learning_rate": 1.8230563002680968e-05, + "loss": 0.0008, + "step": 2420 + }, + { + "epoch": 6.4, + "learning_rate": 1.82171581769437e-05, + "loss": 0.0062, + "step": 2421 + }, + { + "epoch": 6.41, + "learning_rate": 1.8203753351206435e-05, + "loss": 0.0071, + "step": 2422 + }, + { + "epoch": 6.41, + "learning_rate": 1.819034852546917e-05, + "loss": 0.0017, + "step": 2423 + }, + { + "epoch": 6.41, + "learning_rate": 1.8176943699731905e-05, + "loss": 0.084, + "step": 2424 + }, + { + "epoch": 6.42, + "learning_rate": 1.816353887399464e-05, + "loss": 0.0011, + "step": 2425 + }, + { + "epoch": 6.42, + "learning_rate": 1.8150134048257372e-05, + "loss": 0.0255, + "step": 2426 + }, + { + "epoch": 6.42, + "learning_rate": 1.8136729222520107e-05, + "loss": 0.0009, + "step": 2427 + }, + { + "epoch": 6.42, + "learning_rate": 1.8123324396782842e-05, + "loss": 0.3105, + "step": 2428 + }, + { + "epoch": 6.43, + "learning_rate": 1.8109919571045577e-05, + "loss": 0.0046, + "step": 2429 + }, + { + "epoch": 6.43, + "learning_rate": 1.8096514745308312e-05, + "loss": 0.0089, + "step": 2430 + }, + { + "epoch": 6.43, + "learning_rate": 1.8083109919571044e-05, + "loss": 0.1176, + "step": 2431 + }, + { + "epoch": 6.43, + "learning_rate": 1.806970509383378e-05, + "loss": 0.0235, + "step": 2432 + }, + { + "epoch": 6.44, + "learning_rate": 1.8056300268096514e-05, + "loss": 0.029, + "step": 2433 + }, + { + "epoch": 6.44, + "learning_rate": 1.804289544235925e-05, + "loss": 0.0013, + "step": 2434 + }, + { + "epoch": 6.44, + "learning_rate": 1.8029490616621985e-05, + "loss": 0.0075, + "step": 2435 + }, + { + "epoch": 6.44, + "learning_rate": 1.801608579088472e-05, + "loss": 0.1744, + "step": 2436 + }, + { + "epoch": 6.45, + "learning_rate": 1.8002680965147455e-05, + "loss": 0.0017, + "step": 2437 + }, + { + "epoch": 6.45, + "learning_rate": 1.798927613941019e-05, + "loss": 0.0188, + "step": 2438 + }, + { + "epoch": 6.45, + "learning_rate": 1.7975871313672925e-05, + "loss": 0.0232, + "step": 2439 + }, + { + "epoch": 6.46, + "learning_rate": 1.796246648793566e-05, + "loss": 0.1459, + "step": 2440 + }, + { + "epoch": 6.46, + "learning_rate": 1.7949061662198392e-05, + "loss": 0.0007, + "step": 2441 + }, + { + "epoch": 6.46, + "learning_rate": 1.7935656836461127e-05, + "loss": 0.0005, + "step": 2442 + }, + { + "epoch": 6.46, + "learning_rate": 1.7922252010723862e-05, + "loss": 0.0012, + "step": 2443 + }, + { + "epoch": 6.47, + "learning_rate": 1.7908847184986597e-05, + "loss": 0.0041, + "step": 2444 + }, + { + "epoch": 6.47, + "learning_rate": 1.7895442359249332e-05, + "loss": 0.4884, + "step": 2445 + }, + { + "epoch": 6.47, + "learning_rate": 1.7882037533512068e-05, + "loss": 0.0017, + "step": 2446 + }, + { + "epoch": 6.47, + "learning_rate": 1.78686327077748e-05, + "loss": 0.0566, + "step": 2447 + }, + { + "epoch": 6.48, + "learning_rate": 1.7855227882037534e-05, + "loss": 0.012, + "step": 2448 + }, + { + "epoch": 6.48, + "learning_rate": 1.784182305630027e-05, + "loss": 0.001, + "step": 2449 + }, + { + "epoch": 6.48, + "learning_rate": 1.7828418230563005e-05, + "loss": 0.0028, + "step": 2450 + }, + { + "epoch": 6.48, + "learning_rate": 1.781501340482574e-05, + "loss": 0.4622, + "step": 2451 + }, + { + "epoch": 6.49, + "learning_rate": 1.780160857908847e-05, + "loss": 0.0042, + "step": 2452 + }, + { + "epoch": 6.49, + "learning_rate": 1.7788203753351207e-05, + "loss": 0.0176, + "step": 2453 + }, + { + "epoch": 6.49, + "learning_rate": 1.7774798927613942e-05, + "loss": 0.0012, + "step": 2454 + }, + { + "epoch": 6.49, + "learning_rate": 1.7761394101876677e-05, + "loss": 0.0344, + "step": 2455 + }, + { + "epoch": 6.5, + "learning_rate": 1.7747989276139412e-05, + "loss": 0.1278, + "step": 2456 + }, + { + "epoch": 6.5, + "learning_rate": 1.7734584450402144e-05, + "loss": 0.0017, + "step": 2457 + }, + { + "epoch": 6.5, + "learning_rate": 1.772117962466488e-05, + "loss": 0.0044, + "step": 2458 + }, + { + "epoch": 6.51, + "learning_rate": 1.7707774798927614e-05, + "loss": 0.0016, + "step": 2459 + }, + { + "epoch": 6.51, + "learning_rate": 1.769436997319035e-05, + "loss": 0.0799, + "step": 2460 + }, + { + "epoch": 6.51, + "learning_rate": 1.7680965147453084e-05, + "loss": 0.0066, + "step": 2461 + }, + { + "epoch": 6.51, + "learning_rate": 1.7667560321715816e-05, + "loss": 0.1607, + "step": 2462 + }, + { + "epoch": 6.52, + "learning_rate": 1.765415549597855e-05, + "loss": 0.0742, + "step": 2463 + }, + { + "epoch": 6.52, + "learning_rate": 1.7640750670241286e-05, + "loss": 0.0005, + "step": 2464 + }, + { + "epoch": 6.52, + "learning_rate": 1.762734584450402e-05, + "loss": 0.0006, + "step": 2465 + }, + { + "epoch": 6.52, + "learning_rate": 1.7613941018766757e-05, + "loss": 0.0019, + "step": 2466 + }, + { + "epoch": 6.53, + "learning_rate": 1.7600536193029492e-05, + "loss": 0.0009, + "step": 2467 + }, + { + "epoch": 6.53, + "learning_rate": 1.7587131367292227e-05, + "loss": 0.0023, + "step": 2468 + }, + { + "epoch": 6.53, + "learning_rate": 1.7573726541554962e-05, + "loss": 0.0202, + "step": 2469 + }, + { + "epoch": 6.53, + "learning_rate": 1.7560321715817697e-05, + "loss": 0.0223, + "step": 2470 + }, + { + "epoch": 6.54, + "learning_rate": 1.7546916890080432e-05, + "loss": 0.0009, + "step": 2471 + }, + { + "epoch": 6.54, + "learning_rate": 1.7533512064343164e-05, + "loss": 0.0014, + "step": 2472 + }, + { + "epoch": 6.54, + "learning_rate": 1.75201072386059e-05, + "loss": 0.0514, + "step": 2473 + }, + { + "epoch": 6.54, + "learning_rate": 1.7506702412868634e-05, + "loss": 0.0013, + "step": 2474 + }, + { + "epoch": 6.55, + "learning_rate": 1.749329758713137e-05, + "loss": 0.0087, + "step": 2475 + }, + { + "epoch": 6.55, + "learning_rate": 1.7479892761394105e-05, + "loss": 0.0035, + "step": 2476 + }, + { + "epoch": 6.55, + "learning_rate": 1.7466487935656836e-05, + "loss": 0.0397, + "step": 2477 + }, + { + "epoch": 6.56, + "learning_rate": 1.745308310991957e-05, + "loss": 0.0021, + "step": 2478 + }, + { + "epoch": 6.56, + "learning_rate": 1.7439678284182307e-05, + "loss": 0.052, + "step": 2479 + }, + { + "epoch": 6.56, + "learning_rate": 1.742627345844504e-05, + "loss": 0.0027, + "step": 2480 + }, + { + "epoch": 6.56, + "learning_rate": 1.7412868632707777e-05, + "loss": 0.001, + "step": 2481 + }, + { + "epoch": 6.57, + "learning_rate": 1.739946380697051e-05, + "loss": 0.2899, + "step": 2482 + }, + { + "epoch": 6.57, + "learning_rate": 1.7386058981233244e-05, + "loss": 0.0007, + "step": 2483 + }, + { + "epoch": 6.57, + "learning_rate": 1.737265415549598e-05, + "loss": 0.0704, + "step": 2484 + }, + { + "epoch": 6.57, + "learning_rate": 1.7359249329758714e-05, + "loss": 0.001, + "step": 2485 + }, + { + "epoch": 6.58, + "learning_rate": 1.734584450402145e-05, + "loss": 0.057, + "step": 2486 + }, + { + "epoch": 6.58, + "learning_rate": 1.733243967828418e-05, + "loss": 0.0002, + "step": 2487 + }, + { + "epoch": 6.58, + "learning_rate": 1.7319034852546916e-05, + "loss": 0.0064, + "step": 2488 + }, + { + "epoch": 6.58, + "learning_rate": 1.730563002680965e-05, + "loss": 0.0638, + "step": 2489 + }, + { + "epoch": 6.59, + "learning_rate": 1.7292225201072386e-05, + "loss": 0.0006, + "step": 2490 + }, + { + "epoch": 6.59, + "learning_rate": 1.727882037533512e-05, + "loss": 0.0142, + "step": 2491 + }, + { + "epoch": 6.59, + "learning_rate": 1.7265415549597856e-05, + "loss": 0.0015, + "step": 2492 + }, + { + "epoch": 6.6, + "learning_rate": 1.725201072386059e-05, + "loss": 0.0949, + "step": 2493 + }, + { + "epoch": 6.6, + "learning_rate": 1.7238605898123327e-05, + "loss": 0.0004, + "step": 2494 + }, + { + "epoch": 6.6, + "learning_rate": 1.7225201072386062e-05, + "loss": 0.0111, + "step": 2495 + }, + { + "epoch": 6.6, + "learning_rate": 1.7211796246648797e-05, + "loss": 0.0215, + "step": 2496 + }, + { + "epoch": 6.61, + "learning_rate": 1.719839142091153e-05, + "loss": 0.266, + "step": 2497 + }, + { + "epoch": 6.61, + "learning_rate": 1.7184986595174264e-05, + "loss": 0.4487, + "step": 2498 + }, + { + "epoch": 6.61, + "learning_rate": 1.7171581769437e-05, + "loss": 0.0021, + "step": 2499 + }, + { + "epoch": 6.61, + "learning_rate": 1.7158176943699734e-05, + "loss": 0.0004, + "step": 2500 + }, + { + "epoch": 6.62, + "learning_rate": 1.714477211796247e-05, + "loss": 0.0004, + "step": 2501 + }, + { + "epoch": 6.62, + "learning_rate": 1.71313672922252e-05, + "loss": 0.0011, + "step": 2502 + }, + { + "epoch": 6.62, + "learning_rate": 1.7117962466487936e-05, + "loss": 0.0006, + "step": 2503 + }, + { + "epoch": 6.62, + "learning_rate": 1.710455764075067e-05, + "loss": 0.1005, + "step": 2504 + }, + { + "epoch": 6.63, + "learning_rate": 1.7091152815013406e-05, + "loss": 0.0472, + "step": 2505 + }, + { + "epoch": 6.63, + "learning_rate": 1.707774798927614e-05, + "loss": 0.0004, + "step": 2506 + }, + { + "epoch": 6.63, + "learning_rate": 1.7064343163538877e-05, + "loss": 0.0162, + "step": 2507 + }, + { + "epoch": 6.63, + "learning_rate": 1.705093833780161e-05, + "loss": 0.004, + "step": 2508 + }, + { + "epoch": 6.64, + "learning_rate": 1.7037533512064344e-05, + "loss": 0.0007, + "step": 2509 + }, + { + "epoch": 6.64, + "learning_rate": 1.702412868632708e-05, + "loss": 0.1447, + "step": 2510 + }, + { + "epoch": 6.64, + "learning_rate": 1.7010723860589814e-05, + "loss": 0.0006, + "step": 2511 + }, + { + "epoch": 6.65, + "learning_rate": 1.699731903485255e-05, + "loss": 0.0002, + "step": 2512 + }, + { + "epoch": 6.65, + "learning_rate": 1.698391420911528e-05, + "loss": 0.0004, + "step": 2513 + }, + { + "epoch": 6.65, + "learning_rate": 1.6970509383378016e-05, + "loss": 0.0017, + "step": 2514 + }, + { + "epoch": 6.65, + "learning_rate": 1.695710455764075e-05, + "loss": 0.4581, + "step": 2515 + }, + { + "epoch": 6.66, + "learning_rate": 1.6943699731903486e-05, + "loss": 0.0005, + "step": 2516 + }, + { + "epoch": 6.66, + "learning_rate": 1.693029490616622e-05, + "loss": 0.0043, + "step": 2517 + }, + { + "epoch": 6.66, + "learning_rate": 1.6916890080428953e-05, + "loss": 0.0005, + "step": 2518 + }, + { + "epoch": 6.66, + "learning_rate": 1.6903485254691688e-05, + "loss": 0.0002, + "step": 2519 + }, + { + "epoch": 6.67, + "learning_rate": 1.6890080428954423e-05, + "loss": 0.0005, + "step": 2520 + }, + { + "epoch": 6.67, + "learning_rate": 1.687667560321716e-05, + "loss": 0.0037, + "step": 2521 + }, + { + "epoch": 6.67, + "learning_rate": 1.6863270777479893e-05, + "loss": 0.0003, + "step": 2522 + }, + { + "epoch": 6.67, + "learning_rate": 1.684986595174263e-05, + "loss": 0.0019, + "step": 2523 + }, + { + "epoch": 6.68, + "learning_rate": 1.6836461126005364e-05, + "loss": 0.0023, + "step": 2524 + }, + { + "epoch": 6.68, + "learning_rate": 1.68230563002681e-05, + "loss": 0.0004, + "step": 2525 + }, + { + "epoch": 6.68, + "learning_rate": 1.6809651474530834e-05, + "loss": 0.3317, + "step": 2526 + }, + { + "epoch": 6.69, + "learning_rate": 1.679624664879357e-05, + "loss": 0.0004, + "step": 2527 + }, + { + "epoch": 6.69, + "learning_rate": 1.67828418230563e-05, + "loss": 0.002, + "step": 2528 + }, + { + "epoch": 6.69, + "learning_rate": 1.6769436997319036e-05, + "loss": 0.0003, + "step": 2529 + }, + { + "epoch": 6.69, + "learning_rate": 1.675603217158177e-05, + "loss": 0.0007, + "step": 2530 + }, + { + "epoch": 6.7, + "learning_rate": 1.6742627345844506e-05, + "loss": 0.0012, + "step": 2531 + }, + { + "epoch": 6.7, + "learning_rate": 1.672922252010724e-05, + "loss": 0.024, + "step": 2532 + }, + { + "epoch": 6.7, + "learning_rate": 1.6715817694369973e-05, + "loss": 0.0041, + "step": 2533 + }, + { + "epoch": 6.7, + "learning_rate": 1.6702412868632708e-05, + "loss": 0.1821, + "step": 2534 + }, + { + "epoch": 6.71, + "learning_rate": 1.6689008042895443e-05, + "loss": 0.0004, + "step": 2535 + }, + { + "epoch": 6.71, + "learning_rate": 1.667560321715818e-05, + "loss": 0.0293, + "step": 2536 + }, + { + "epoch": 6.71, + "learning_rate": 1.6662198391420914e-05, + "loss": 0.0005, + "step": 2537 + }, + { + "epoch": 6.71, + "learning_rate": 1.6648793565683645e-05, + "loss": 0.0043, + "step": 2538 + }, + { + "epoch": 6.72, + "learning_rate": 1.663538873994638e-05, + "loss": 0.0112, + "step": 2539 + }, + { + "epoch": 6.72, + "learning_rate": 1.6621983914209116e-05, + "loss": 0.0003, + "step": 2540 + }, + { + "epoch": 6.72, + "learning_rate": 1.660857908847185e-05, + "loss": 0.0005, + "step": 2541 + }, + { + "epoch": 6.72, + "learning_rate": 1.6595174262734586e-05, + "loss": 0.4617, + "step": 2542 + }, + { + "epoch": 6.73, + "learning_rate": 1.6581769436997318e-05, + "loss": 0.0004, + "step": 2543 + }, + { + "epoch": 6.73, + "learning_rate": 1.6568364611260053e-05, + "loss": 0.1932, + "step": 2544 + }, + { + "epoch": 6.73, + "learning_rate": 1.6554959785522788e-05, + "loss": 0.0004, + "step": 2545 + }, + { + "epoch": 6.74, + "learning_rate": 1.6541554959785523e-05, + "loss": 0.0785, + "step": 2546 + }, + { + "epoch": 6.74, + "learning_rate": 1.6528150134048258e-05, + "loss": 0.0882, + "step": 2547 + }, + { + "epoch": 6.74, + "learning_rate": 1.651474530831099e-05, + "loss": 0.3937, + "step": 2548 + }, + { + "epoch": 6.74, + "learning_rate": 1.6501340482573725e-05, + "loss": 0.3401, + "step": 2549 + }, + { + "epoch": 6.75, + "learning_rate": 1.648793565683646e-05, + "loss": 0.026, + "step": 2550 + }, + { + "epoch": 6.75, + "learning_rate": 1.6474530831099195e-05, + "loss": 0.1959, + "step": 2551 + }, + { + "epoch": 6.75, + "learning_rate": 1.646112600536193e-05, + "loss": 0.0022, + "step": 2552 + }, + { + "epoch": 6.75, + "learning_rate": 1.6447721179624666e-05, + "loss": 0.0012, + "step": 2553 + }, + { + "epoch": 6.76, + "learning_rate": 1.64343163538874e-05, + "loss": 0.0064, + "step": 2554 + }, + { + "epoch": 6.76, + "learning_rate": 1.6420911528150136e-05, + "loss": 0.0105, + "step": 2555 + }, + { + "epoch": 6.76, + "learning_rate": 1.640750670241287e-05, + "loss": 0.0008, + "step": 2556 + }, + { + "epoch": 6.76, + "learning_rate": 1.6394101876675606e-05, + "loss": 0.0339, + "step": 2557 + }, + { + "epoch": 6.77, + "learning_rate": 1.6380697050938338e-05, + "loss": 0.1458, + "step": 2558 + }, + { + "epoch": 6.77, + "learning_rate": 1.6367292225201073e-05, + "loss": 0.2526, + "step": 2559 + }, + { + "epoch": 6.77, + "learning_rate": 1.6353887399463808e-05, + "loss": 0.038, + "step": 2560 + }, + { + "epoch": 6.78, + "learning_rate": 1.6340482573726543e-05, + "loss": 0.174, + "step": 2561 + }, + { + "epoch": 6.78, + "learning_rate": 1.632707774798928e-05, + "loss": 0.1936, + "step": 2562 + }, + { + "epoch": 6.78, + "learning_rate": 1.631367292225201e-05, + "loss": 0.0014, + "step": 2563 + }, + { + "epoch": 6.78, + "learning_rate": 1.6300268096514745e-05, + "loss": 0.0008, + "step": 2564 + }, + { + "epoch": 6.79, + "learning_rate": 1.628686327077748e-05, + "loss": 0.0616, + "step": 2565 + }, + { + "epoch": 6.79, + "learning_rate": 1.6273458445040215e-05, + "loss": 0.0029, + "step": 2566 + }, + { + "epoch": 6.79, + "learning_rate": 1.626005361930295e-05, + "loss": 0.1301, + "step": 2567 + }, + { + "epoch": 6.79, + "learning_rate": 1.6246648793565686e-05, + "loss": 0.0076, + "step": 2568 + }, + { + "epoch": 6.8, + "learning_rate": 1.6233243967828417e-05, + "loss": 0.038, + "step": 2569 + }, + { + "epoch": 6.8, + "learning_rate": 1.6219839142091153e-05, + "loss": 0.0376, + "step": 2570 + }, + { + "epoch": 6.8, + "learning_rate": 1.6206434316353888e-05, + "loss": 0.0007, + "step": 2571 + }, + { + "epoch": 6.8, + "learning_rate": 1.6193029490616623e-05, + "loss": 0.016, + "step": 2572 + }, + { + "epoch": 6.81, + "learning_rate": 1.6179624664879358e-05, + "loss": 0.0005, + "step": 2573 + }, + { + "epoch": 6.81, + "learning_rate": 1.616621983914209e-05, + "loss": 0.0024, + "step": 2574 + }, + { + "epoch": 6.81, + "learning_rate": 1.6152815013404825e-05, + "loss": 0.001, + "step": 2575 + }, + { + "epoch": 6.81, + "learning_rate": 1.613941018766756e-05, + "loss": 0.0009, + "step": 2576 + }, + { + "epoch": 6.82, + "learning_rate": 1.6126005361930295e-05, + "loss": 0.1889, + "step": 2577 + }, + { + "epoch": 6.82, + "learning_rate": 1.611260053619303e-05, + "loss": 0.5094, + "step": 2578 + }, + { + "epoch": 6.82, + "learning_rate": 1.6099195710455765e-05, + "loss": 0.017, + "step": 2579 + }, + { + "epoch": 6.83, + "learning_rate": 1.60857908847185e-05, + "loss": 0.029, + "step": 2580 + }, + { + "epoch": 6.83, + "learning_rate": 1.6072386058981236e-05, + "loss": 0.1249, + "step": 2581 + }, + { + "epoch": 6.83, + "learning_rate": 1.605898123324397e-05, + "loss": 0.2531, + "step": 2582 + }, + { + "epoch": 6.83, + "learning_rate": 1.6045576407506706e-05, + "loss": 0.0006, + "step": 2583 + }, + { + "epoch": 6.84, + "learning_rate": 1.6032171581769438e-05, + "loss": 0.0624, + "step": 2584 + }, + { + "epoch": 6.84, + "learning_rate": 1.6018766756032173e-05, + "loss": 0.0254, + "step": 2585 + }, + { + "epoch": 6.84, + "learning_rate": 1.6005361930294908e-05, + "loss": 0.0034, + "step": 2586 + }, + { + "epoch": 6.84, + "learning_rate": 1.5991957104557643e-05, + "loss": 0.0204, + "step": 2587 + }, + { + "epoch": 6.85, + "learning_rate": 1.5978552278820378e-05, + "loss": 0.002, + "step": 2588 + }, + { + "epoch": 6.85, + "learning_rate": 1.596514745308311e-05, + "loss": 0.0015, + "step": 2589 + }, + { + "epoch": 6.85, + "learning_rate": 1.5951742627345845e-05, + "loss": 0.0465, + "step": 2590 + }, + { + "epoch": 6.85, + "learning_rate": 1.593833780160858e-05, + "loss": 0.1892, + "step": 2591 + }, + { + "epoch": 6.86, + "learning_rate": 1.5924932975871315e-05, + "loss": 0.0932, + "step": 2592 + }, + { + "epoch": 6.86, + "learning_rate": 1.591152815013405e-05, + "loss": 0.0015, + "step": 2593 + }, + { + "epoch": 6.86, + "learning_rate": 1.5898123324396782e-05, + "loss": 0.0062, + "step": 2594 + }, + { + "epoch": 6.87, + "learning_rate": 1.5884718498659517e-05, + "loss": 0.0731, + "step": 2595 + }, + { + "epoch": 6.87, + "learning_rate": 1.5871313672922252e-05, + "loss": 0.002, + "step": 2596 + }, + { + "epoch": 6.87, + "learning_rate": 1.5857908847184988e-05, + "loss": 0.0484, + "step": 2597 + }, + { + "epoch": 6.87, + "learning_rate": 1.5844504021447723e-05, + "loss": 0.0082, + "step": 2598 + }, + { + "epoch": 6.88, + "learning_rate": 1.5831099195710454e-05, + "loss": 0.0213, + "step": 2599 + }, + { + "epoch": 6.88, + "learning_rate": 1.581769436997319e-05, + "loss": 0.1612, + "step": 2600 + }, + { + "epoch": 6.88, + "learning_rate": 1.5804289544235925e-05, + "loss": 0.184, + "step": 2601 + }, + { + "epoch": 6.88, + "learning_rate": 1.579088471849866e-05, + "loss": 0.1413, + "step": 2602 + }, + { + "epoch": 6.89, + "learning_rate": 1.5777479892761395e-05, + "loss": 0.0019, + "step": 2603 + }, + { + "epoch": 6.89, + "learning_rate": 1.5764075067024127e-05, + "loss": 0.0047, + "step": 2604 + }, + { + "epoch": 6.89, + "learning_rate": 1.5750670241286862e-05, + "loss": 0.0409, + "step": 2605 + }, + { + "epoch": 6.89, + "learning_rate": 1.5737265415549597e-05, + "loss": 0.0379, + "step": 2606 + }, + { + "epoch": 6.9, + "learning_rate": 1.5723860589812332e-05, + "loss": 0.0005, + "step": 2607 + }, + { + "epoch": 6.9, + "learning_rate": 1.5710455764075067e-05, + "loss": 0.0332, + "step": 2608 + }, + { + "epoch": 6.9, + "learning_rate": 1.5697050938337802e-05, + "loss": 0.0543, + "step": 2609 + }, + { + "epoch": 6.9, + "learning_rate": 1.5683646112600538e-05, + "loss": 0.0009, + "step": 2610 + }, + { + "epoch": 6.91, + "learning_rate": 1.5670241286863273e-05, + "loss": 0.016, + "step": 2611 + }, + { + "epoch": 6.91, + "learning_rate": 1.5656836461126008e-05, + "loss": 0.0035, + "step": 2612 + }, + { + "epoch": 6.91, + "learning_rate": 1.5643431635388743e-05, + "loss": 0.0713, + "step": 2613 + }, + { + "epoch": 6.92, + "learning_rate": 1.5630026809651475e-05, + "loss": 0.0022, + "step": 2614 + }, + { + "epoch": 6.92, + "learning_rate": 1.561662198391421e-05, + "loss": 0.0005, + "step": 2615 + }, + { + "epoch": 6.92, + "learning_rate": 1.5603217158176945e-05, + "loss": 0.0009, + "step": 2616 + }, + { + "epoch": 6.92, + "learning_rate": 1.558981233243968e-05, + "loss": 0.0016, + "step": 2617 + }, + { + "epoch": 6.93, + "learning_rate": 1.5576407506702415e-05, + "loss": 0.0017, + "step": 2618 + }, + { + "epoch": 6.93, + "learning_rate": 1.5563002680965147e-05, + "loss": 0.0094, + "step": 2619 + }, + { + "epoch": 6.93, + "learning_rate": 1.5549597855227882e-05, + "loss": 0.016, + "step": 2620 + }, + { + "epoch": 6.93, + "learning_rate": 1.5536193029490617e-05, + "loss": 0.0005, + "step": 2621 + }, + { + "epoch": 6.94, + "learning_rate": 1.5522788203753352e-05, + "loss": 0.0549, + "step": 2622 + }, + { + "epoch": 6.94, + "learning_rate": 1.5509383378016087e-05, + "loss": 0.3791, + "step": 2623 + }, + { + "epoch": 6.94, + "learning_rate": 1.549597855227882e-05, + "loss": 0.0003, + "step": 2624 + }, + { + "epoch": 6.94, + "learning_rate": 1.5482573726541554e-05, + "loss": 0.0774, + "step": 2625 + }, + { + "epoch": 6.95, + "learning_rate": 1.546916890080429e-05, + "loss": 0.0879, + "step": 2626 + }, + { + "epoch": 6.95, + "learning_rate": 1.5455764075067025e-05, + "loss": 0.0007, + "step": 2627 + }, + { + "epoch": 6.95, + "learning_rate": 1.544235924932976e-05, + "loss": 0.0047, + "step": 2628 + }, + { + "epoch": 6.96, + "learning_rate": 1.542895442359249e-05, + "loss": 0.0011, + "step": 2629 + }, + { + "epoch": 6.96, + "learning_rate": 1.5415549597855227e-05, + "loss": 0.0004, + "step": 2630 + }, + { + "epoch": 6.96, + "learning_rate": 1.5402144772117962e-05, + "loss": 0.4962, + "step": 2631 + }, + { + "epoch": 6.96, + "learning_rate": 1.5388739946380697e-05, + "loss": 0.1182, + "step": 2632 + }, + { + "epoch": 6.97, + "learning_rate": 1.5375335120643432e-05, + "loss": 0.0269, + "step": 2633 + }, + { + "epoch": 6.97, + "learning_rate": 1.5361930294906167e-05, + "loss": 0.0157, + "step": 2634 + }, + { + "epoch": 6.97, + "learning_rate": 1.5348525469168902e-05, + "loss": 0.0022, + "step": 2635 + }, + { + "epoch": 6.97, + "learning_rate": 1.5335120643431637e-05, + "loss": 0.3299, + "step": 2636 + }, + { + "epoch": 6.98, + "learning_rate": 1.5321715817694372e-05, + "loss": 0.0529, + "step": 2637 + }, + { + "epoch": 6.98, + "learning_rate": 1.5308310991957108e-05, + "loss": 0.1396, + "step": 2638 + }, + { + "epoch": 6.98, + "learning_rate": 1.5294906166219843e-05, + "loss": 0.0008, + "step": 2639 + }, + { + "epoch": 6.98, + "learning_rate": 1.5281501340482574e-05, + "loss": 0.0086, + "step": 2640 + }, + { + "epoch": 6.99, + "learning_rate": 1.526809651474531e-05, + "loss": 0.0036, + "step": 2641 + }, + { + "epoch": 6.99, + "learning_rate": 1.5254691689008043e-05, + "loss": 0.0149, + "step": 2642 + }, + { + "epoch": 6.99, + "learning_rate": 1.5241286863270778e-05, + "loss": 0.0011, + "step": 2643 + }, + { + "epoch": 6.99, + "learning_rate": 1.5227882037533513e-05, + "loss": 0.0003, + "step": 2644 + }, + { + "epoch": 7.0, + "learning_rate": 1.5214477211796247e-05, + "loss": 0.0064, + "step": 2645 + }, + { + "epoch": 7.0, + "learning_rate": 1.5201072386058982e-05, + "loss": 0.0281, + "step": 2646 + }, + { + "epoch": 7.0, + "eval_f1": 0.7856000000000002, + "eval_loss": 1.1071351766586304, + "eval_runtime": 1.8613, + "eval_samples_per_second": 812.89, + "eval_steps_per_second": 51.041, + "step": 2646 + }, + { + "epoch": 7.0, + "learning_rate": 1.5187667560321717e-05, + "loss": 0.0049, + "step": 2647 + }, + { + "epoch": 7.01, + "learning_rate": 1.5174262734584452e-05, + "loss": 0.001, + "step": 2648 + }, + { + "epoch": 7.01, + "learning_rate": 1.5160857908847187e-05, + "loss": 0.0004, + "step": 2649 + }, + { + "epoch": 7.01, + "learning_rate": 1.5147453083109919e-05, + "loss": 0.022, + "step": 2650 + }, + { + "epoch": 7.01, + "learning_rate": 1.5134048257372654e-05, + "loss": 0.0126, + "step": 2651 + }, + { + "epoch": 7.02, + "learning_rate": 1.512064343163539e-05, + "loss": 0.0006, + "step": 2652 + }, + { + "epoch": 7.02, + "learning_rate": 1.5107238605898124e-05, + "loss": 0.0182, + "step": 2653 + }, + { + "epoch": 7.02, + "learning_rate": 1.509383378016086e-05, + "loss": 0.0004, + "step": 2654 + }, + { + "epoch": 7.02, + "learning_rate": 1.5080428954423593e-05, + "loss": 0.2763, + "step": 2655 + }, + { + "epoch": 7.03, + "learning_rate": 1.5067024128686328e-05, + "loss": 0.0025, + "step": 2656 + }, + { + "epoch": 7.03, + "learning_rate": 1.5053619302949063e-05, + "loss": 0.0102, + "step": 2657 + }, + { + "epoch": 7.03, + "learning_rate": 1.5040214477211798e-05, + "loss": 0.008, + "step": 2658 + }, + { + "epoch": 7.03, + "learning_rate": 1.5026809651474534e-05, + "loss": 0.0005, + "step": 2659 + }, + { + "epoch": 7.04, + "learning_rate": 1.5013404825737265e-05, + "loss": 0.0006, + "step": 2660 + }, + { + "epoch": 7.04, + "learning_rate": 1.5e-05, + "loss": 0.0003, + "step": 2661 + }, + { + "epoch": 7.04, + "learning_rate": 1.4986595174262736e-05, + "loss": 0.0013, + "step": 2662 + }, + { + "epoch": 7.04, + "learning_rate": 1.497319034852547e-05, + "loss": 0.0033, + "step": 2663 + }, + { + "epoch": 7.05, + "learning_rate": 1.4959785522788206e-05, + "loss": 0.0004, + "step": 2664 + }, + { + "epoch": 7.05, + "learning_rate": 1.4946380697050938e-05, + "loss": 0.0347, + "step": 2665 + }, + { + "epoch": 7.05, + "learning_rate": 1.4932975871313673e-05, + "loss": 0.0208, + "step": 2666 + }, + { + "epoch": 7.06, + "learning_rate": 1.4919571045576408e-05, + "loss": 0.3783, + "step": 2667 + }, + { + "epoch": 7.06, + "learning_rate": 1.4906166219839143e-05, + "loss": 0.0005, + "step": 2668 + }, + { + "epoch": 7.06, + "learning_rate": 1.4892761394101878e-05, + "loss": 0.2023, + "step": 2669 + }, + { + "epoch": 7.06, + "learning_rate": 1.4879356568364611e-05, + "loss": 0.0007, + "step": 2670 + }, + { + "epoch": 7.07, + "learning_rate": 1.4865951742627347e-05, + "loss": 0.0014, + "step": 2671 + }, + { + "epoch": 7.07, + "learning_rate": 1.4852546916890082e-05, + "loss": 0.0018, + "step": 2672 + }, + { + "epoch": 7.07, + "learning_rate": 1.4839142091152817e-05, + "loss": 0.0004, + "step": 2673 + }, + { + "epoch": 7.07, + "learning_rate": 1.4825737265415552e-05, + "loss": 0.0005, + "step": 2674 + }, + { + "epoch": 7.08, + "learning_rate": 1.4812332439678284e-05, + "loss": 0.0235, + "step": 2675 + }, + { + "epoch": 7.08, + "learning_rate": 1.4798927613941019e-05, + "loss": 0.0116, + "step": 2676 + }, + { + "epoch": 7.08, + "learning_rate": 1.4785522788203754e-05, + "loss": 0.0005, + "step": 2677 + }, + { + "epoch": 7.08, + "learning_rate": 1.4772117962466489e-05, + "loss": 0.0011, + "step": 2678 + }, + { + "epoch": 7.09, + "learning_rate": 1.4758713136729224e-05, + "loss": 0.2959, + "step": 2679 + }, + { + "epoch": 7.09, + "learning_rate": 1.4745308310991956e-05, + "loss": 0.2646, + "step": 2680 + }, + { + "epoch": 7.09, + "learning_rate": 1.4731903485254691e-05, + "loss": 0.1224, + "step": 2681 + }, + { + "epoch": 7.1, + "learning_rate": 1.4718498659517426e-05, + "loss": 0.0419, + "step": 2682 + }, + { + "epoch": 7.1, + "learning_rate": 1.4705093833780161e-05, + "loss": 0.0218, + "step": 2683 + }, + { + "epoch": 7.1, + "learning_rate": 1.4691689008042897e-05, + "loss": 0.0003, + "step": 2684 + }, + { + "epoch": 7.1, + "learning_rate": 1.467828418230563e-05, + "loss": 0.0005, + "step": 2685 + }, + { + "epoch": 7.11, + "learning_rate": 1.4664879356568365e-05, + "loss": 0.2352, + "step": 2686 + }, + { + "epoch": 7.11, + "learning_rate": 1.46514745308311e-05, + "loss": 0.0004, + "step": 2687 + }, + { + "epoch": 7.11, + "learning_rate": 1.4638069705093835e-05, + "loss": 0.0004, + "step": 2688 + }, + { + "epoch": 7.11, + "learning_rate": 1.462466487935657e-05, + "loss": 0.1069, + "step": 2689 + }, + { + "epoch": 7.12, + "learning_rate": 1.4611260053619302e-05, + "loss": 0.008, + "step": 2690 + }, + { + "epoch": 7.12, + "learning_rate": 1.4597855227882037e-05, + "loss": 0.0007, + "step": 2691 + }, + { + "epoch": 7.12, + "learning_rate": 1.4584450402144772e-05, + "loss": 0.002, + "step": 2692 + }, + { + "epoch": 7.12, + "learning_rate": 1.4571045576407508e-05, + "loss": 0.0029, + "step": 2693 + }, + { + "epoch": 7.13, + "learning_rate": 1.4557640750670243e-05, + "loss": 0.0221, + "step": 2694 + }, + { + "epoch": 7.13, + "learning_rate": 1.4544235924932978e-05, + "loss": 0.0085, + "step": 2695 + }, + { + "epoch": 7.13, + "learning_rate": 1.453083109919571e-05, + "loss": 0.0009, + "step": 2696 + }, + { + "epoch": 7.13, + "learning_rate": 1.4517426273458445e-05, + "loss": 0.0018, + "step": 2697 + }, + { + "epoch": 7.14, + "learning_rate": 1.450402144772118e-05, + "loss": 0.001, + "step": 2698 + }, + { + "epoch": 7.14, + "learning_rate": 1.4490616621983915e-05, + "loss": 0.0021, + "step": 2699 + }, + { + "epoch": 7.14, + "learning_rate": 1.447721179624665e-05, + "loss": 0.0225, + "step": 2700 + }, + { + "epoch": 7.15, + "learning_rate": 1.4463806970509384e-05, + "loss": 0.0005, + "step": 2701 + }, + { + "epoch": 7.15, + "learning_rate": 1.4450402144772119e-05, + "loss": 0.0057, + "step": 2702 + }, + { + "epoch": 7.15, + "learning_rate": 1.4436997319034854e-05, + "loss": 0.0422, + "step": 2703 + }, + { + "epoch": 7.15, + "learning_rate": 1.4423592493297589e-05, + "loss": 0.0028, + "step": 2704 + }, + { + "epoch": 7.16, + "learning_rate": 1.4410187667560324e-05, + "loss": 0.0231, + "step": 2705 + }, + { + "epoch": 7.16, + "learning_rate": 1.4396782841823056e-05, + "loss": 0.1236, + "step": 2706 + }, + { + "epoch": 7.16, + "learning_rate": 1.4383378016085791e-05, + "loss": 0.0004, + "step": 2707 + }, + { + "epoch": 7.16, + "learning_rate": 1.4369973190348526e-05, + "loss": 0.0019, + "step": 2708 + }, + { + "epoch": 7.17, + "learning_rate": 1.4356568364611261e-05, + "loss": 0.0029, + "step": 2709 + }, + { + "epoch": 7.17, + "learning_rate": 1.4343163538873996e-05, + "loss": 0.0005, + "step": 2710 + }, + { + "epoch": 7.17, + "learning_rate": 1.4329758713136728e-05, + "loss": 0.0665, + "step": 2711 + }, + { + "epoch": 7.17, + "learning_rate": 1.4316353887399463e-05, + "loss": 0.0005, + "step": 2712 + }, + { + "epoch": 7.18, + "learning_rate": 1.4302949061662198e-05, + "loss": 0.0107, + "step": 2713 + }, + { + "epoch": 7.18, + "learning_rate": 1.4289544235924934e-05, + "loss": 0.0005, + "step": 2714 + }, + { + "epoch": 7.18, + "learning_rate": 1.4276139410187669e-05, + "loss": 0.1983, + "step": 2715 + }, + { + "epoch": 7.19, + "learning_rate": 1.4262734584450402e-05, + "loss": 0.0016, + "step": 2716 + }, + { + "epoch": 7.19, + "learning_rate": 1.4249329758713137e-05, + "loss": 0.0003, + "step": 2717 + }, + { + "epoch": 7.19, + "learning_rate": 1.4235924932975872e-05, + "loss": 0.0247, + "step": 2718 + }, + { + "epoch": 7.19, + "learning_rate": 1.4222520107238607e-05, + "loss": 0.0079, + "step": 2719 + }, + { + "epoch": 7.2, + "learning_rate": 1.4209115281501343e-05, + "loss": 0.0012, + "step": 2720 + }, + { + "epoch": 7.2, + "learning_rate": 1.4195710455764074e-05, + "loss": 0.0004, + "step": 2721 + }, + { + "epoch": 7.2, + "learning_rate": 1.418230563002681e-05, + "loss": 0.0004, + "step": 2722 + }, + { + "epoch": 7.2, + "learning_rate": 1.4168900804289545e-05, + "loss": 0.0051, + "step": 2723 + }, + { + "epoch": 7.21, + "learning_rate": 1.415549597855228e-05, + "loss": 0.0006, + "step": 2724 + }, + { + "epoch": 7.21, + "learning_rate": 1.4142091152815015e-05, + "loss": 0.0043, + "step": 2725 + }, + { + "epoch": 7.21, + "learning_rate": 1.4128686327077748e-05, + "loss": 0.004, + "step": 2726 + }, + { + "epoch": 7.21, + "learning_rate": 1.4115281501340483e-05, + "loss": 0.2211, + "step": 2727 + }, + { + "epoch": 7.22, + "learning_rate": 1.4101876675603219e-05, + "loss": 0.0003, + "step": 2728 + }, + { + "epoch": 7.22, + "learning_rate": 1.4088471849865954e-05, + "loss": 0.0004, + "step": 2729 + }, + { + "epoch": 7.22, + "learning_rate": 1.4075067024128689e-05, + "loss": 0.2051, + "step": 2730 + }, + { + "epoch": 7.22, + "learning_rate": 1.406166219839142e-05, + "loss": 0.0003, + "step": 2731 + }, + { + "epoch": 7.23, + "learning_rate": 1.4048257372654156e-05, + "loss": 0.0014, + "step": 2732 + }, + { + "epoch": 7.23, + "learning_rate": 1.403485254691689e-05, + "loss": 0.0007, + "step": 2733 + }, + { + "epoch": 7.23, + "learning_rate": 1.4021447721179626e-05, + "loss": 0.0068, + "step": 2734 + }, + { + "epoch": 7.24, + "learning_rate": 1.4008042895442361e-05, + "loss": 0.137, + "step": 2735 + }, + { + "epoch": 7.24, + "learning_rate": 1.3994638069705093e-05, + "loss": 0.0005, + "step": 2736 + }, + { + "epoch": 7.24, + "learning_rate": 1.3981233243967828e-05, + "loss": 0.0006, + "step": 2737 + }, + { + "epoch": 7.24, + "learning_rate": 1.3967828418230563e-05, + "loss": 0.0206, + "step": 2738 + }, + { + "epoch": 7.25, + "learning_rate": 1.3954423592493298e-05, + "loss": 0.1488, + "step": 2739 + }, + { + "epoch": 7.25, + "learning_rate": 1.3941018766756033e-05, + "loss": 0.0054, + "step": 2740 + }, + { + "epoch": 7.25, + "learning_rate": 1.3927613941018767e-05, + "loss": 0.0269, + "step": 2741 + }, + { + "epoch": 7.25, + "learning_rate": 1.3914209115281502e-05, + "loss": 0.0006, + "step": 2742 + }, + { + "epoch": 7.26, + "learning_rate": 1.3900804289544237e-05, + "loss": 0.0003, + "step": 2743 + }, + { + "epoch": 7.26, + "learning_rate": 1.3887399463806972e-05, + "loss": 0.0004, + "step": 2744 + }, + { + "epoch": 7.26, + "learning_rate": 1.3873994638069707e-05, + "loss": 0.0003, + "step": 2745 + }, + { + "epoch": 7.26, + "learning_rate": 1.3860589812332439e-05, + "loss": 0.0027, + "step": 2746 + }, + { + "epoch": 7.27, + "learning_rate": 1.3847184986595174e-05, + "loss": 0.0006, + "step": 2747 + }, + { + "epoch": 7.27, + "learning_rate": 1.383378016085791e-05, + "loss": 0.0012, + "step": 2748 + }, + { + "epoch": 7.27, + "learning_rate": 1.3820375335120644e-05, + "loss": 0.0522, + "step": 2749 + }, + { + "epoch": 7.28, + "learning_rate": 1.380697050938338e-05, + "loss": 0.0126, + "step": 2750 + }, + { + "epoch": 7.28, + "learning_rate": 1.3793565683646111e-05, + "loss": 0.0083, + "step": 2751 + }, + { + "epoch": 7.28, + "learning_rate": 1.3780160857908846e-05, + "loss": 0.074, + "step": 2752 + }, + { + "epoch": 7.28, + "learning_rate": 1.3766756032171582e-05, + "loss": 0.0002, + "step": 2753 + }, + { + "epoch": 7.29, + "learning_rate": 1.3753351206434317e-05, + "loss": 0.1009, + "step": 2754 + }, + { + "epoch": 7.29, + "learning_rate": 1.3739946380697052e-05, + "loss": 0.0021, + "step": 2755 + }, + { + "epoch": 7.29, + "learning_rate": 1.3726541554959787e-05, + "loss": 0.0082, + "step": 2756 + }, + { + "epoch": 7.29, + "learning_rate": 1.371313672922252e-05, + "loss": 0.0004, + "step": 2757 + }, + { + "epoch": 7.3, + "learning_rate": 1.3699731903485256e-05, + "loss": 0.0006, + "step": 2758 + }, + { + "epoch": 7.3, + "learning_rate": 1.368632707774799e-05, + "loss": 0.0173, + "step": 2759 + }, + { + "epoch": 7.3, + "learning_rate": 1.3672922252010726e-05, + "loss": 0.0147, + "step": 2760 + }, + { + "epoch": 7.3, + "learning_rate": 1.3659517426273461e-05, + "loss": 0.1293, + "step": 2761 + }, + { + "epoch": 7.31, + "learning_rate": 1.3646112600536193e-05, + "loss": 0.2566, + "step": 2762 + }, + { + "epoch": 7.31, + "learning_rate": 1.3632707774798928e-05, + "loss": 0.0026, + "step": 2763 + }, + { + "epoch": 7.31, + "learning_rate": 1.3619302949061663e-05, + "loss": 0.0031, + "step": 2764 + }, + { + "epoch": 7.31, + "learning_rate": 1.3605898123324398e-05, + "loss": 0.0029, + "step": 2765 + }, + { + "epoch": 7.32, + "learning_rate": 1.3592493297587133e-05, + "loss": 0.0005, + "step": 2766 + }, + { + "epoch": 7.32, + "learning_rate": 1.3579088471849865e-05, + "loss": 0.0004, + "step": 2767 + }, + { + "epoch": 7.32, + "learning_rate": 1.35656836461126e-05, + "loss": 0.0294, + "step": 2768 + }, + { + "epoch": 7.33, + "learning_rate": 1.3552278820375335e-05, + "loss": 0.0011, + "step": 2769 + }, + { + "epoch": 7.33, + "learning_rate": 1.353887399463807e-05, + "loss": 0.009, + "step": 2770 + }, + { + "epoch": 7.33, + "learning_rate": 1.3525469168900805e-05, + "loss": 0.0003, + "step": 2771 + }, + { + "epoch": 7.33, + "learning_rate": 1.3512064343163539e-05, + "loss": 0.0003, + "step": 2772 + }, + { + "epoch": 7.34, + "learning_rate": 1.3498659517426274e-05, + "loss": 0.0002, + "step": 2773 + }, + { + "epoch": 7.34, + "learning_rate": 1.348525469168901e-05, + "loss": 0.0002, + "step": 2774 + }, + { + "epoch": 7.34, + "learning_rate": 1.3471849865951744e-05, + "loss": 0.1261, + "step": 2775 + }, + { + "epoch": 7.34, + "learning_rate": 1.345844504021448e-05, + "loss": 0.0006, + "step": 2776 + }, + { + "epoch": 7.35, + "learning_rate": 1.3445040214477211e-05, + "loss": 0.0006, + "step": 2777 + }, + { + "epoch": 7.35, + "learning_rate": 1.3431635388739946e-05, + "loss": 0.0003, + "step": 2778 + }, + { + "epoch": 7.35, + "learning_rate": 1.3418230563002681e-05, + "loss": 0.0754, + "step": 2779 + }, + { + "epoch": 7.35, + "learning_rate": 1.3404825737265417e-05, + "loss": 0.0002, + "step": 2780 + }, + { + "epoch": 7.36, + "learning_rate": 1.3391420911528152e-05, + "loss": 0.0007, + "step": 2781 + }, + { + "epoch": 7.36, + "learning_rate": 1.3378016085790885e-05, + "loss": 0.0004, + "step": 2782 + }, + { + "epoch": 7.36, + "learning_rate": 1.336461126005362e-05, + "loss": 0.001, + "step": 2783 + }, + { + "epoch": 7.37, + "learning_rate": 1.3351206434316355e-05, + "loss": 0.0006, + "step": 2784 + }, + { + "epoch": 7.37, + "learning_rate": 1.333780160857909e-05, + "loss": 0.0227, + "step": 2785 + }, + { + "epoch": 7.37, + "learning_rate": 1.3324396782841826e-05, + "loss": 0.0002, + "step": 2786 + }, + { + "epoch": 7.37, + "learning_rate": 1.3310991957104557e-05, + "loss": 0.0002, + "step": 2787 + }, + { + "epoch": 7.38, + "learning_rate": 1.3297587131367293e-05, + "loss": 0.1036, + "step": 2788 + }, + { + "epoch": 7.38, + "learning_rate": 1.3284182305630028e-05, + "loss": 0.0014, + "step": 2789 + }, + { + "epoch": 7.38, + "learning_rate": 1.3270777479892763e-05, + "loss": 0.35, + "step": 2790 + }, + { + "epoch": 7.38, + "learning_rate": 1.3257372654155498e-05, + "loss": 0.0003, + "step": 2791 + }, + { + "epoch": 7.39, + "learning_rate": 1.324396782841823e-05, + "loss": 0.0182, + "step": 2792 + }, + { + "epoch": 7.39, + "learning_rate": 1.3230563002680965e-05, + "loss": 0.0038, + "step": 2793 + }, + { + "epoch": 7.39, + "learning_rate": 1.32171581769437e-05, + "loss": 0.0003, + "step": 2794 + }, + { + "epoch": 7.39, + "learning_rate": 1.3203753351206435e-05, + "loss": 0.0003, + "step": 2795 + }, + { + "epoch": 7.4, + "learning_rate": 1.319034852546917e-05, + "loss": 0.0008, + "step": 2796 + }, + { + "epoch": 7.4, + "learning_rate": 1.3176943699731904e-05, + "loss": 0.0003, + "step": 2797 + }, + { + "epoch": 7.4, + "learning_rate": 1.3163538873994639e-05, + "loss": 0.0005, + "step": 2798 + }, + { + "epoch": 7.4, + "learning_rate": 1.3150134048257374e-05, + "loss": 0.2165, + "step": 2799 + }, + { + "epoch": 7.41, + "learning_rate": 1.3136729222520109e-05, + "loss": 0.023, + "step": 2800 + }, + { + "epoch": 7.41, + "learning_rate": 1.3123324396782844e-05, + "loss": 0.0047, + "step": 2801 + }, + { + "epoch": 7.41, + "learning_rate": 1.3109919571045576e-05, + "loss": 0.1507, + "step": 2802 + }, + { + "epoch": 7.42, + "learning_rate": 1.3096514745308311e-05, + "loss": 0.2509, + "step": 2803 + }, + { + "epoch": 7.42, + "learning_rate": 1.3083109919571046e-05, + "loss": 0.0085, + "step": 2804 + }, + { + "epoch": 7.42, + "learning_rate": 1.3069705093833781e-05, + "loss": 0.2183, + "step": 2805 + }, + { + "epoch": 7.42, + "learning_rate": 1.3056300268096516e-05, + "loss": 0.0007, + "step": 2806 + }, + { + "epoch": 7.43, + "learning_rate": 1.3042895442359248e-05, + "loss": 0.0005, + "step": 2807 + }, + { + "epoch": 7.43, + "learning_rate": 1.3029490616621983e-05, + "loss": 0.1291, + "step": 2808 + }, + { + "epoch": 7.43, + "learning_rate": 1.3016085790884718e-05, + "loss": 0.1037, + "step": 2809 + }, + { + "epoch": 7.43, + "learning_rate": 1.3002680965147454e-05, + "loss": 0.0147, + "step": 2810 + }, + { + "epoch": 7.44, + "learning_rate": 1.2989276139410189e-05, + "loss": 0.0006, + "step": 2811 + }, + { + "epoch": 7.44, + "learning_rate": 1.2975871313672922e-05, + "loss": 0.0148, + "step": 2812 + }, + { + "epoch": 7.44, + "learning_rate": 1.2962466487935657e-05, + "loss": 0.0129, + "step": 2813 + }, + { + "epoch": 7.44, + "learning_rate": 1.2949061662198392e-05, + "loss": 0.0276, + "step": 2814 + }, + { + "epoch": 7.45, + "learning_rate": 1.2935656836461127e-05, + "loss": 0.0007, + "step": 2815 + }, + { + "epoch": 7.45, + "learning_rate": 1.2922252010723863e-05, + "loss": 0.0006, + "step": 2816 + }, + { + "epoch": 7.45, + "learning_rate": 1.2908847184986598e-05, + "loss": 0.0002, + "step": 2817 + }, + { + "epoch": 7.46, + "learning_rate": 1.289544235924933e-05, + "loss": 0.1274, + "step": 2818 + }, + { + "epoch": 7.46, + "learning_rate": 1.2882037533512065e-05, + "loss": 0.0009, + "step": 2819 + }, + { + "epoch": 7.46, + "learning_rate": 1.28686327077748e-05, + "loss": 0.0007, + "step": 2820 + }, + { + "epoch": 7.46, + "learning_rate": 1.2855227882037535e-05, + "loss": 0.002, + "step": 2821 + }, + { + "epoch": 7.47, + "learning_rate": 1.284182305630027e-05, + "loss": 0.0004, + "step": 2822 + }, + { + "epoch": 7.47, + "learning_rate": 1.2828418230563002e-05, + "loss": 0.0017, + "step": 2823 + }, + { + "epoch": 7.47, + "learning_rate": 1.2815013404825737e-05, + "loss": 0.001, + "step": 2824 + }, + { + "epoch": 7.47, + "learning_rate": 1.2801608579088472e-05, + "loss": 0.0106, + "step": 2825 + }, + { + "epoch": 7.48, + "learning_rate": 1.2788203753351207e-05, + "loss": 0.1158, + "step": 2826 + }, + { + "epoch": 7.48, + "learning_rate": 1.2774798927613942e-05, + "loss": 0.0004, + "step": 2827 + }, + { + "epoch": 7.48, + "learning_rate": 1.2761394101876676e-05, + "loss": 0.3214, + "step": 2828 + }, + { + "epoch": 7.48, + "learning_rate": 1.274798927613941e-05, + "loss": 0.0003, + "step": 2829 + }, + { + "epoch": 7.49, + "learning_rate": 1.2734584450402146e-05, + "loss": 0.0417, + "step": 2830 + }, + { + "epoch": 7.49, + "learning_rate": 1.2721179624664881e-05, + "loss": 0.0002, + "step": 2831 + }, + { + "epoch": 7.49, + "learning_rate": 1.2707774798927616e-05, + "loss": 0.0004, + "step": 2832 + }, + { + "epoch": 7.49, + "learning_rate": 1.2694369973190348e-05, + "loss": 0.1166, + "step": 2833 + }, + { + "epoch": 7.5, + "learning_rate": 1.2680965147453083e-05, + "loss": 0.0008, + "step": 2834 + }, + { + "epoch": 7.5, + "learning_rate": 1.2667560321715818e-05, + "loss": 0.0005, + "step": 2835 + }, + { + "epoch": 7.5, + "learning_rate": 1.2654155495978553e-05, + "loss": 0.0191, + "step": 2836 + }, + { + "epoch": 7.51, + "learning_rate": 1.2640750670241289e-05, + "loss": 0.0642, + "step": 2837 + }, + { + "epoch": 7.51, + "learning_rate": 1.262734584450402e-05, + "loss": 0.0256, + "step": 2838 + }, + { + "epoch": 7.51, + "learning_rate": 1.2613941018766755e-05, + "loss": 0.0007, + "step": 2839 + }, + { + "epoch": 7.51, + "learning_rate": 1.260053619302949e-05, + "loss": 0.049, + "step": 2840 + }, + { + "epoch": 7.52, + "learning_rate": 1.2587131367292226e-05, + "loss": 0.0012, + "step": 2841 + }, + { + "epoch": 7.52, + "learning_rate": 1.257372654155496e-05, + "loss": 0.0006, + "step": 2842 + }, + { + "epoch": 7.52, + "learning_rate": 1.2560321715817694e-05, + "loss": 0.2299, + "step": 2843 + }, + { + "epoch": 7.52, + "learning_rate": 1.254691689008043e-05, + "loss": 0.0006, + "step": 2844 + }, + { + "epoch": 7.53, + "learning_rate": 1.2533512064343164e-05, + "loss": 0.0346, + "step": 2845 + }, + { + "epoch": 7.53, + "learning_rate": 1.25201072386059e-05, + "loss": 0.0021, + "step": 2846 + }, + { + "epoch": 7.53, + "learning_rate": 1.2506702412868635e-05, + "loss": 0.0003, + "step": 2847 + }, + { + "epoch": 7.53, + "learning_rate": 1.2493297587131368e-05, + "loss": 0.135, + "step": 2848 + }, + { + "epoch": 7.54, + "learning_rate": 1.2479892761394102e-05, + "loss": 0.0003, + "step": 2849 + }, + { + "epoch": 7.54, + "learning_rate": 1.2466487935656837e-05, + "loss": 0.0005, + "step": 2850 + }, + { + "epoch": 7.54, + "learning_rate": 1.2453083109919572e-05, + "loss": 0.0005, + "step": 2851 + }, + { + "epoch": 7.54, + "learning_rate": 1.2439678284182305e-05, + "loss": 0.0007, + "step": 2852 + }, + { + "epoch": 7.55, + "learning_rate": 1.242627345844504e-05, + "loss": 0.0004, + "step": 2853 + }, + { + "epoch": 7.55, + "learning_rate": 1.2412868632707776e-05, + "loss": 0.0003, + "step": 2854 + }, + { + "epoch": 7.55, + "learning_rate": 1.239946380697051e-05, + "loss": 0.0003, + "step": 2855 + }, + { + "epoch": 7.56, + "learning_rate": 1.2386058981233246e-05, + "loss": 0.0006, + "step": 2856 + }, + { + "epoch": 7.56, + "learning_rate": 1.237265415549598e-05, + "loss": 0.0171, + "step": 2857 + }, + { + "epoch": 7.56, + "learning_rate": 1.2359249329758714e-05, + "loss": 0.1066, + "step": 2858 + }, + { + "epoch": 7.56, + "learning_rate": 1.2345844504021448e-05, + "loss": 0.0003, + "step": 2859 + }, + { + "epoch": 7.57, + "learning_rate": 1.2332439678284183e-05, + "loss": 0.1106, + "step": 2860 + }, + { + "epoch": 7.57, + "learning_rate": 1.2319034852546918e-05, + "loss": 0.0004, + "step": 2861 + }, + { + "epoch": 7.57, + "learning_rate": 1.2305630026809652e-05, + "loss": 0.0012, + "step": 2862 + }, + { + "epoch": 7.57, + "learning_rate": 1.2292225201072387e-05, + "loss": 0.0004, + "step": 2863 + }, + { + "epoch": 7.58, + "learning_rate": 1.2278820375335122e-05, + "loss": 0.0007, + "step": 2864 + }, + { + "epoch": 7.58, + "learning_rate": 1.2265415549597855e-05, + "loss": 0.0104, + "step": 2865 + }, + { + "epoch": 7.58, + "learning_rate": 1.225201072386059e-05, + "loss": 0.0003, + "step": 2866 + }, + { + "epoch": 7.58, + "learning_rate": 1.2238605898123324e-05, + "loss": 0.3976, + "step": 2867 + }, + { + "epoch": 7.59, + "learning_rate": 1.2225201072386059e-05, + "loss": 0.0003, + "step": 2868 + }, + { + "epoch": 7.59, + "learning_rate": 1.2211796246648794e-05, + "loss": 0.4433, + "step": 2869 + }, + { + "epoch": 7.59, + "learning_rate": 1.219839142091153e-05, + "loss": 0.0005, + "step": 2870 + }, + { + "epoch": 7.6, + "learning_rate": 1.2184986595174264e-05, + "loss": 0.0733, + "step": 2871 + }, + { + "epoch": 7.6, + "learning_rate": 1.2171581769436998e-05, + "loss": 0.0008, + "step": 2872 + }, + { + "epoch": 7.6, + "learning_rate": 1.2158176943699733e-05, + "loss": 0.0003, + "step": 2873 + }, + { + "epoch": 7.6, + "learning_rate": 1.2144772117962468e-05, + "loss": 0.0253, + "step": 2874 + }, + { + "epoch": 7.61, + "learning_rate": 1.2131367292225201e-05, + "loss": 0.09, + "step": 2875 + }, + { + "epoch": 7.61, + "learning_rate": 1.2117962466487937e-05, + "loss": 0.1283, + "step": 2876 + }, + { + "epoch": 7.61, + "learning_rate": 1.210455764075067e-05, + "loss": 0.0866, + "step": 2877 + }, + { + "epoch": 7.61, + "learning_rate": 1.2091152815013405e-05, + "loss": 0.0005, + "step": 2878 + }, + { + "epoch": 7.62, + "learning_rate": 1.207774798927614e-05, + "loss": 0.051, + "step": 2879 + }, + { + "epoch": 7.62, + "learning_rate": 1.2064343163538874e-05, + "loss": 0.0055, + "step": 2880 + }, + { + "epoch": 7.62, + "learning_rate": 1.2050938337801609e-05, + "loss": 0.001, + "step": 2881 + }, + { + "epoch": 7.62, + "learning_rate": 1.2037533512064344e-05, + "loss": 0.0765, + "step": 2882 + }, + { + "epoch": 7.63, + "learning_rate": 1.2024128686327079e-05, + "loss": 0.0239, + "step": 2883 + }, + { + "epoch": 7.63, + "learning_rate": 1.2010723860589814e-05, + "loss": 0.0616, + "step": 2884 + }, + { + "epoch": 7.63, + "learning_rate": 1.1997319034852548e-05, + "loss": 0.0342, + "step": 2885 + }, + { + "epoch": 7.63, + "learning_rate": 1.1983914209115283e-05, + "loss": 0.0006, + "step": 2886 + }, + { + "epoch": 7.64, + "learning_rate": 1.1970509383378016e-05, + "loss": 0.091, + "step": 2887 + }, + { + "epoch": 7.64, + "learning_rate": 1.1957104557640751e-05, + "loss": 0.0004, + "step": 2888 + }, + { + "epoch": 7.64, + "learning_rate": 1.1943699731903486e-05, + "loss": 0.0257, + "step": 2889 + }, + { + "epoch": 7.65, + "learning_rate": 1.193029490616622e-05, + "loss": 0.0422, + "step": 2890 + }, + { + "epoch": 7.65, + "learning_rate": 1.1916890080428955e-05, + "loss": 0.1861, + "step": 2891 + }, + { + "epoch": 7.65, + "learning_rate": 1.1903485254691689e-05, + "loss": 0.0003, + "step": 2892 + }, + { + "epoch": 7.65, + "learning_rate": 1.1890080428954424e-05, + "loss": 0.0678, + "step": 2893 + }, + { + "epoch": 7.66, + "learning_rate": 1.1876675603217159e-05, + "loss": 0.0005, + "step": 2894 + }, + { + "epoch": 7.66, + "learning_rate": 1.1863270777479892e-05, + "loss": 0.0234, + "step": 2895 + }, + { + "epoch": 7.66, + "learning_rate": 1.1849865951742627e-05, + "loss": 0.0007, + "step": 2896 + }, + { + "epoch": 7.66, + "learning_rate": 1.1836461126005362e-05, + "loss": 0.0963, + "step": 2897 + }, + { + "epoch": 7.67, + "learning_rate": 1.1823056300268098e-05, + "loss": 0.0132, + "step": 2898 + }, + { + "epoch": 7.67, + "learning_rate": 1.1809651474530833e-05, + "loss": 0.0019, + "step": 2899 + }, + { + "epoch": 7.67, + "learning_rate": 1.1796246648793566e-05, + "loss": 0.0219, + "step": 2900 + }, + { + "epoch": 7.67, + "learning_rate": 1.1782841823056301e-05, + "loss": 0.0062, + "step": 2901 + }, + { + "epoch": 7.68, + "learning_rate": 1.1769436997319036e-05, + "loss": 0.0003, + "step": 2902 + }, + { + "epoch": 7.68, + "learning_rate": 1.175603217158177e-05, + "loss": 0.0009, + "step": 2903 + }, + { + "epoch": 7.68, + "learning_rate": 1.1742627345844505e-05, + "loss": 0.1446, + "step": 2904 + }, + { + "epoch": 7.69, + "learning_rate": 1.1729222520107238e-05, + "loss": 0.0103, + "step": 2905 + }, + { + "epoch": 7.69, + "learning_rate": 1.1715817694369974e-05, + "loss": 0.0004, + "step": 2906 + }, + { + "epoch": 7.69, + "learning_rate": 1.1702412868632709e-05, + "loss": 0.2502, + "step": 2907 + }, + { + "epoch": 7.69, + "learning_rate": 1.1689008042895442e-05, + "loss": 0.0005, + "step": 2908 + }, + { + "epoch": 7.7, + "learning_rate": 1.1675603217158177e-05, + "loss": 0.0001, + "step": 2909 + }, + { + "epoch": 7.7, + "learning_rate": 1.166219839142091e-05, + "loss": 0.0928, + "step": 2910 + }, + { + "epoch": 7.7, + "learning_rate": 1.1648793565683646e-05, + "loss": 0.0195, + "step": 2911 + }, + { + "epoch": 7.7, + "learning_rate": 1.1635388739946381e-05, + "loss": 0.0727, + "step": 2912 + }, + { + "epoch": 7.71, + "learning_rate": 1.1621983914209116e-05, + "loss": 0.0778, + "step": 2913 + }, + { + "epoch": 7.71, + "learning_rate": 1.1608579088471851e-05, + "loss": 0.1304, + "step": 2914 + }, + { + "epoch": 7.71, + "learning_rate": 1.1595174262734585e-05, + "loss": 0.0002, + "step": 2915 + }, + { + "epoch": 7.71, + "learning_rate": 1.158176943699732e-05, + "loss": 0.0003, + "step": 2916 + }, + { + "epoch": 7.72, + "learning_rate": 1.1568364611260055e-05, + "loss": 0.0137, + "step": 2917 + }, + { + "epoch": 7.72, + "learning_rate": 1.1554959785522788e-05, + "loss": 0.0003, + "step": 2918 + }, + { + "epoch": 7.72, + "learning_rate": 1.1541554959785523e-05, + "loss": 0.0018, + "step": 2919 + }, + { + "epoch": 7.72, + "learning_rate": 1.1528150134048257e-05, + "loss": 0.0057, + "step": 2920 + }, + { + "epoch": 7.73, + "learning_rate": 1.1514745308310992e-05, + "loss": 0.0003, + "step": 2921 + }, + { + "epoch": 7.73, + "learning_rate": 1.1501340482573727e-05, + "loss": 0.0015, + "step": 2922 + }, + { + "epoch": 7.73, + "learning_rate": 1.148793565683646e-05, + "loss": 0.0004, + "step": 2923 + }, + { + "epoch": 7.74, + "learning_rate": 1.1474530831099196e-05, + "loss": 0.0005, + "step": 2924 + }, + { + "epoch": 7.74, + "learning_rate": 1.1461126005361931e-05, + "loss": 0.0345, + "step": 2925 + }, + { + "epoch": 7.74, + "learning_rate": 1.1447721179624666e-05, + "loss": 0.0878, + "step": 2926 + }, + { + "epoch": 7.74, + "learning_rate": 1.1434316353887401e-05, + "loss": 0.0003, + "step": 2927 + }, + { + "epoch": 7.75, + "learning_rate": 1.1420911528150135e-05, + "loss": 0.0732, + "step": 2928 + }, + { + "epoch": 7.75, + "learning_rate": 1.140750670241287e-05, + "loss": 0.0005, + "step": 2929 + }, + { + "epoch": 7.75, + "learning_rate": 1.1394101876675605e-05, + "loss": 0.001, + "step": 2930 + }, + { + "epoch": 7.75, + "learning_rate": 1.1380697050938338e-05, + "loss": 0.0038, + "step": 2931 + }, + { + "epoch": 7.76, + "learning_rate": 1.1367292225201073e-05, + "loss": 0.0056, + "step": 2932 + }, + { + "epoch": 7.76, + "learning_rate": 1.1353887399463807e-05, + "loss": 0.1057, + "step": 2933 + }, + { + "epoch": 7.76, + "learning_rate": 1.1340482573726542e-05, + "loss": 0.0005, + "step": 2934 + }, + { + "epoch": 7.76, + "learning_rate": 1.1327077747989277e-05, + "loss": 0.0419, + "step": 2935 + }, + { + "epoch": 7.77, + "learning_rate": 1.131367292225201e-05, + "loss": 0.0304, + "step": 2936 + }, + { + "epoch": 7.77, + "learning_rate": 1.1300268096514746e-05, + "loss": 0.0002, + "step": 2937 + }, + { + "epoch": 7.77, + "learning_rate": 1.1286863270777479e-05, + "loss": 0.0332, + "step": 2938 + }, + { + "epoch": 7.78, + "learning_rate": 1.1273458445040214e-05, + "loss": 0.1015, + "step": 2939 + }, + { + "epoch": 7.78, + "learning_rate": 1.126005361930295e-05, + "loss": 0.0008, + "step": 2940 + }, + { + "epoch": 7.78, + "learning_rate": 1.1246648793565684e-05, + "loss": 0.0273, + "step": 2941 + }, + { + "epoch": 7.78, + "learning_rate": 1.123324396782842e-05, + "loss": 0.0022, + "step": 2942 + }, + { + "epoch": 7.79, + "learning_rate": 1.1219839142091153e-05, + "loss": 0.0009, + "step": 2943 + }, + { + "epoch": 7.79, + "learning_rate": 1.1206434316353888e-05, + "loss": 0.0003, + "step": 2944 + }, + { + "epoch": 7.79, + "learning_rate": 1.1193029490616623e-05, + "loss": 0.0057, + "step": 2945 + }, + { + "epoch": 7.79, + "learning_rate": 1.1179624664879357e-05, + "loss": 0.0014, + "step": 2946 + }, + { + "epoch": 7.8, + "learning_rate": 1.1166219839142092e-05, + "loss": 0.0009, + "step": 2947 + }, + { + "epoch": 7.8, + "learning_rate": 1.1152815013404825e-05, + "loss": 0.0019, + "step": 2948 + }, + { + "epoch": 7.8, + "learning_rate": 1.113941018766756e-05, + "loss": 0.0005, + "step": 2949 + }, + { + "epoch": 7.8, + "learning_rate": 1.1126005361930296e-05, + "loss": 0.0181, + "step": 2950 + }, + { + "epoch": 7.81, + "learning_rate": 1.1112600536193029e-05, + "loss": 0.001, + "step": 2951 + }, + { + "epoch": 7.81, + "learning_rate": 1.1099195710455764e-05, + "loss": 0.0331, + "step": 2952 + }, + { + "epoch": 7.81, + "learning_rate": 1.10857908847185e-05, + "loss": 0.0003, + "step": 2953 + }, + { + "epoch": 7.81, + "learning_rate": 1.1072386058981234e-05, + "loss": 0.0002, + "step": 2954 + }, + { + "epoch": 7.82, + "learning_rate": 1.105898123324397e-05, + "loss": 0.0305, + "step": 2955 + }, + { + "epoch": 7.82, + "learning_rate": 1.1045576407506703e-05, + "loss": 0.0023, + "step": 2956 + }, + { + "epoch": 7.82, + "learning_rate": 1.1032171581769438e-05, + "loss": 0.0359, + "step": 2957 + }, + { + "epoch": 7.83, + "learning_rate": 1.1018766756032173e-05, + "loss": 0.1075, + "step": 2958 + }, + { + "epoch": 7.83, + "learning_rate": 1.1005361930294907e-05, + "loss": 0.023, + "step": 2959 + }, + { + "epoch": 7.83, + "learning_rate": 1.0991957104557642e-05, + "loss": 0.1425, + "step": 2960 + }, + { + "epoch": 7.83, + "learning_rate": 1.0978552278820375e-05, + "loss": 0.4114, + "step": 2961 + }, + { + "epoch": 7.84, + "learning_rate": 1.096514745308311e-05, + "loss": 0.0003, + "step": 2962 + }, + { + "epoch": 7.84, + "learning_rate": 1.0951742627345846e-05, + "loss": 0.2824, + "step": 2963 + }, + { + "epoch": 7.84, + "learning_rate": 1.0938337801608579e-05, + "loss": 0.0002, + "step": 2964 + }, + { + "epoch": 7.84, + "learning_rate": 1.0924932975871314e-05, + "loss": 0.0004, + "step": 2965 + }, + { + "epoch": 7.85, + "learning_rate": 1.0911528150134048e-05, + "loss": 0.0002, + "step": 2966 + }, + { + "epoch": 7.85, + "learning_rate": 1.0898123324396783e-05, + "loss": 0.0003, + "step": 2967 + }, + { + "epoch": 7.85, + "learning_rate": 1.0884718498659518e-05, + "loss": 0.0003, + "step": 2968 + }, + { + "epoch": 7.85, + "learning_rate": 1.0871313672922253e-05, + "loss": 0.2122, + "step": 2969 + }, + { + "epoch": 7.86, + "learning_rate": 1.0857908847184988e-05, + "loss": 0.0002, + "step": 2970 + }, + { + "epoch": 7.86, + "learning_rate": 1.0844504021447721e-05, + "loss": 0.0003, + "step": 2971 + }, + { + "epoch": 7.86, + "learning_rate": 1.0831099195710457e-05, + "loss": 0.0002, + "step": 2972 + }, + { + "epoch": 7.87, + "learning_rate": 1.0817694369973192e-05, + "loss": 0.0002, + "step": 2973 + }, + { + "epoch": 7.87, + "learning_rate": 1.0804289544235925e-05, + "loss": 0.001, + "step": 2974 + }, + { + "epoch": 7.87, + "learning_rate": 1.079088471849866e-05, + "loss": 0.0002, + "step": 2975 + }, + { + "epoch": 7.87, + "learning_rate": 1.0777479892761394e-05, + "loss": 0.0004, + "step": 2976 + }, + { + "epoch": 7.88, + "learning_rate": 1.0764075067024129e-05, + "loss": 0.0003, + "step": 2977 + }, + { + "epoch": 7.88, + "learning_rate": 1.0750670241286864e-05, + "loss": 0.0003, + "step": 2978 + }, + { + "epoch": 7.88, + "learning_rate": 1.0737265415549597e-05, + "loss": 0.336, + "step": 2979 + }, + { + "epoch": 7.88, + "learning_rate": 1.0723860589812333e-05, + "loss": 0.0003, + "step": 2980 + }, + { + "epoch": 7.89, + "learning_rate": 1.0710455764075068e-05, + "loss": 0.0017, + "step": 2981 + }, + { + "epoch": 7.89, + "learning_rate": 1.0697050938337803e-05, + "loss": 0.1716, + "step": 2982 + }, + { + "epoch": 7.89, + "learning_rate": 1.0683646112600538e-05, + "loss": 0.0004, + "step": 2983 + }, + { + "epoch": 7.89, + "learning_rate": 1.0670241286863271e-05, + "loss": 0.0003, + "step": 2984 + }, + { + "epoch": 7.9, + "learning_rate": 1.0656836461126007e-05, + "loss": 0.1927, + "step": 2985 + }, + { + "epoch": 7.9, + "learning_rate": 1.064343163538874e-05, + "loss": 0.0003, + "step": 2986 + }, + { + "epoch": 7.9, + "learning_rate": 1.0630026809651475e-05, + "loss": 0.0002, + "step": 2987 + }, + { + "epoch": 7.9, + "learning_rate": 1.061662198391421e-05, + "loss": 0.2357, + "step": 2988 + }, + { + "epoch": 7.91, + "learning_rate": 1.0603217158176944e-05, + "loss": 0.464, + "step": 2989 + }, + { + "epoch": 7.91, + "learning_rate": 1.0589812332439679e-05, + "loss": 0.0015, + "step": 2990 + }, + { + "epoch": 7.91, + "learning_rate": 1.0576407506702414e-05, + "loss": 0.0792, + "step": 2991 + }, + { + "epoch": 7.92, + "learning_rate": 1.0563002680965147e-05, + "loss": 0.101, + "step": 2992 + }, + { + "epoch": 7.92, + "learning_rate": 1.0549597855227882e-05, + "loss": 0.0093, + "step": 2993 + }, + { + "epoch": 7.92, + "learning_rate": 1.0536193029490616e-05, + "loss": 0.0007, + "step": 2994 + }, + { + "epoch": 7.92, + "learning_rate": 1.0522788203753351e-05, + "loss": 0.0016, + "step": 2995 + }, + { + "epoch": 7.93, + "learning_rate": 1.0509383378016086e-05, + "loss": 0.0008, + "step": 2996 + }, + { + "epoch": 7.93, + "learning_rate": 1.0495978552278821e-05, + "loss": 0.0047, + "step": 2997 + }, + { + "epoch": 7.93, + "learning_rate": 1.0482573726541556e-05, + "loss": 0.0171, + "step": 2998 + }, + { + "epoch": 7.93, + "learning_rate": 1.046916890080429e-05, + "loss": 0.3023, + "step": 2999 + }, + { + "epoch": 7.94, + "learning_rate": 1.0455764075067025e-05, + "loss": 0.0011, + "step": 3000 + }, + { + "epoch": 7.94, + "learning_rate": 1.044235924932976e-05, + "loss": 0.0816, + "step": 3001 + }, + { + "epoch": 7.94, + "learning_rate": 1.0428954423592494e-05, + "loss": 0.0025, + "step": 3002 + }, + { + "epoch": 7.94, + "learning_rate": 1.0415549597855229e-05, + "loss": 0.0094, + "step": 3003 + }, + { + "epoch": 7.95, + "learning_rate": 1.0402144772117962e-05, + "loss": 0.0644, + "step": 3004 + }, + { + "epoch": 7.95, + "learning_rate": 1.0388739946380697e-05, + "loss": 0.3261, + "step": 3005 + }, + { + "epoch": 7.95, + "learning_rate": 1.0375335120643432e-05, + "loss": 0.1332, + "step": 3006 + }, + { + "epoch": 7.96, + "learning_rate": 1.0361930294906166e-05, + "loss": 0.0067, + "step": 3007 + }, + { + "epoch": 7.96, + "learning_rate": 1.0348525469168901e-05, + "loss": 0.0008, + "step": 3008 + }, + { + "epoch": 7.96, + "learning_rate": 1.0335120643431636e-05, + "loss": 0.174, + "step": 3009 + }, + { + "epoch": 7.96, + "learning_rate": 1.0321715817694371e-05, + "loss": 0.0005, + "step": 3010 + }, + { + "epoch": 7.97, + "learning_rate": 1.0308310991957106e-05, + "loss": 0.0505, + "step": 3011 + }, + { + "epoch": 7.97, + "learning_rate": 1.029490616621984e-05, + "loss": 0.0016, + "step": 3012 + }, + { + "epoch": 7.97, + "learning_rate": 1.0281501340482575e-05, + "loss": 0.1172, + "step": 3013 + }, + { + "epoch": 7.97, + "learning_rate": 1.0268096514745308e-05, + "loss": 0.0268, + "step": 3014 + }, + { + "epoch": 7.98, + "learning_rate": 1.0254691689008044e-05, + "loss": 0.0269, + "step": 3015 + }, + { + "epoch": 7.98, + "learning_rate": 1.0241286863270779e-05, + "loss": 0.0867, + "step": 3016 + }, + { + "epoch": 7.98, + "learning_rate": 1.0227882037533512e-05, + "loss": 0.1145, + "step": 3017 + }, + { + "epoch": 7.98, + "learning_rate": 1.0214477211796247e-05, + "loss": 0.0035, + "step": 3018 + }, + { + "epoch": 7.99, + "learning_rate": 1.0201072386058982e-05, + "loss": 0.0035, + "step": 3019 + }, + { + "epoch": 7.99, + "learning_rate": 1.0187667560321716e-05, + "loss": 0.0003, + "step": 3020 + }, + { + "epoch": 7.99, + "learning_rate": 1.0174262734584451e-05, + "loss": 0.14, + "step": 3021 + }, + { + "epoch": 7.99, + "learning_rate": 1.0160857908847184e-05, + "loss": 0.1619, + "step": 3022 + }, + { + "epoch": 8.0, + "learning_rate": 1.014745308310992e-05, + "loss": 0.0006, + "step": 3023 + }, + { + "epoch": 8.0, + "learning_rate": 1.0134048257372655e-05, + "loss": 0.0004, + "step": 3024 + }, + { + "epoch": 8.0, + "eval_f1": 0.7734138972809668, + "eval_loss": 1.2510614395141602, + "eval_runtime": 1.9043, + "eval_samples_per_second": 794.53, + "eval_steps_per_second": 49.888, + "step": 3024 + }, + { + "epoch": 8.0, + "learning_rate": 1.012064343163539e-05, + "loss": 0.0008, + "step": 3025 + }, + { + "epoch": 8.01, + "learning_rate": 1.0107238605898125e-05, + "loss": 0.0308, + "step": 3026 + }, + { + "epoch": 8.01, + "learning_rate": 1.0093833780160858e-05, + "loss": 0.1509, + "step": 3027 + }, + { + "epoch": 8.01, + "learning_rate": 1.0080428954423593e-05, + "loss": 0.0256, + "step": 3028 + }, + { + "epoch": 8.01, + "learning_rate": 1.0067024128686329e-05, + "loss": 0.0013, + "step": 3029 + }, + { + "epoch": 8.02, + "learning_rate": 1.0053619302949062e-05, + "loss": 0.0002, + "step": 3030 + }, + { + "epoch": 8.02, + "learning_rate": 1.0040214477211797e-05, + "loss": 0.0615, + "step": 3031 + }, + { + "epoch": 8.02, + "learning_rate": 1.002680965147453e-05, + "loss": 0.072, + "step": 3032 + }, + { + "epoch": 8.02, + "learning_rate": 1.0013404825737266e-05, + "loss": 0.0311, + "step": 3033 + }, + { + "epoch": 8.03, + "learning_rate": 1e-05, + "loss": 0.0115, + "step": 3034 + }, + { + "epoch": 8.03, + "learning_rate": 9.986595174262734e-06, + "loss": 0.0016, + "step": 3035 + }, + { + "epoch": 8.03, + "learning_rate": 9.97319034852547e-06, + "loss": 0.0006, + "step": 3036 + }, + { + "epoch": 8.03, + "learning_rate": 9.959785522788203e-06, + "loss": 0.0009, + "step": 3037 + }, + { + "epoch": 8.04, + "learning_rate": 9.946380697050938e-06, + "loss": 0.0002, + "step": 3038 + }, + { + "epoch": 8.04, + "learning_rate": 9.932975871313673e-06, + "loss": 0.0312, + "step": 3039 + }, + { + "epoch": 8.04, + "learning_rate": 9.919571045576408e-06, + "loss": 0.0007, + "step": 3040 + }, + { + "epoch": 8.04, + "learning_rate": 9.906166219839143e-06, + "loss": 0.0168, + "step": 3041 + }, + { + "epoch": 8.05, + "learning_rate": 9.892761394101877e-06, + "loss": 0.1056, + "step": 3042 + }, + { + "epoch": 8.05, + "learning_rate": 9.879356568364612e-06, + "loss": 0.0005, + "step": 3043 + }, + { + "epoch": 8.05, + "learning_rate": 9.865951742627347e-06, + "loss": 0.0003, + "step": 3044 + }, + { + "epoch": 8.06, + "learning_rate": 9.85254691689008e-06, + "loss": 0.0407, + "step": 3045 + }, + { + "epoch": 8.06, + "learning_rate": 9.839142091152816e-06, + "loss": 0.0013, + "step": 3046 + }, + { + "epoch": 8.06, + "learning_rate": 9.825737265415549e-06, + "loss": 0.033, + "step": 3047 + }, + { + "epoch": 8.06, + "learning_rate": 9.812332439678284e-06, + "loss": 0.0007, + "step": 3048 + }, + { + "epoch": 8.07, + "learning_rate": 9.79892761394102e-06, + "loss": 0.0356, + "step": 3049 + }, + { + "epoch": 8.07, + "learning_rate": 9.785522788203753e-06, + "loss": 0.0062, + "step": 3050 + }, + { + "epoch": 8.07, + "learning_rate": 9.772117962466488e-06, + "loss": 0.0005, + "step": 3051 + }, + { + "epoch": 8.07, + "learning_rate": 9.758713136729223e-06, + "loss": 0.0133, + "step": 3052 + }, + { + "epoch": 8.08, + "learning_rate": 9.745308310991958e-06, + "loss": 0.0022, + "step": 3053 + }, + { + "epoch": 8.08, + "learning_rate": 9.731903485254693e-06, + "loss": 0.0063, + "step": 3054 + }, + { + "epoch": 8.08, + "learning_rate": 9.718498659517427e-06, + "loss": 0.3304, + "step": 3055 + }, + { + "epoch": 8.08, + "learning_rate": 9.705093833780162e-06, + "loss": 0.0004, + "step": 3056 + }, + { + "epoch": 8.09, + "learning_rate": 9.691689008042897e-06, + "loss": 0.0335, + "step": 3057 + }, + { + "epoch": 8.09, + "learning_rate": 9.67828418230563e-06, + "loss": 0.1251, + "step": 3058 + }, + { + "epoch": 8.09, + "learning_rate": 9.664879356568366e-06, + "loss": 0.0011, + "step": 3059 + }, + { + "epoch": 8.1, + "learning_rate": 9.651474530831099e-06, + "loss": 0.0003, + "step": 3060 + }, + { + "epoch": 8.1, + "learning_rate": 9.638069705093834e-06, + "loss": 0.2906, + "step": 3061 + }, + { + "epoch": 8.1, + "learning_rate": 9.62466487935657e-06, + "loss": 0.0009, + "step": 3062 + }, + { + "epoch": 8.1, + "learning_rate": 9.611260053619303e-06, + "loss": 0.0005, + "step": 3063 + }, + { + "epoch": 8.11, + "learning_rate": 9.597855227882038e-06, + "loss": 0.0107, + "step": 3064 + }, + { + "epoch": 8.11, + "learning_rate": 9.584450402144771e-06, + "loss": 0.0594, + "step": 3065 + }, + { + "epoch": 8.11, + "learning_rate": 9.571045576407506e-06, + "loss": 0.0927, + "step": 3066 + }, + { + "epoch": 8.11, + "learning_rate": 9.557640750670241e-06, + "loss": 0.1164, + "step": 3067 + }, + { + "epoch": 8.12, + "learning_rate": 9.544235924932977e-06, + "loss": 0.0002, + "step": 3068 + }, + { + "epoch": 8.12, + "learning_rate": 9.530831099195712e-06, + "loss": 0.0004, + "step": 3069 + }, + { + "epoch": 8.12, + "learning_rate": 9.517426273458445e-06, + "loss": 0.0004, + "step": 3070 + }, + { + "epoch": 8.12, + "learning_rate": 9.50402144772118e-06, + "loss": 0.0128, + "step": 3071 + }, + { + "epoch": 8.13, + "learning_rate": 9.490616621983915e-06, + "loss": 0.0004, + "step": 3072 + }, + { + "epoch": 8.13, + "learning_rate": 9.477211796246649e-06, + "loss": 0.0003, + "step": 3073 + }, + { + "epoch": 8.13, + "learning_rate": 9.463806970509384e-06, + "loss": 0.0311, + "step": 3074 + }, + { + "epoch": 8.13, + "learning_rate": 9.450402144772117e-06, + "loss": 0.0204, + "step": 3075 + }, + { + "epoch": 8.14, + "learning_rate": 9.436997319034853e-06, + "loss": 0.0026, + "step": 3076 + }, + { + "epoch": 8.14, + "learning_rate": 9.423592493297588e-06, + "loss": 0.0008, + "step": 3077 + }, + { + "epoch": 8.14, + "learning_rate": 9.410187667560321e-06, + "loss": 0.1434, + "step": 3078 + }, + { + "epoch": 8.15, + "learning_rate": 9.396782841823056e-06, + "loss": 0.0005, + "step": 3079 + }, + { + "epoch": 8.15, + "learning_rate": 9.383378016085791e-06, + "loss": 0.0003, + "step": 3080 + }, + { + "epoch": 8.15, + "learning_rate": 9.369973190348527e-06, + "loss": 0.0002, + "step": 3081 + }, + { + "epoch": 8.15, + "learning_rate": 9.356568364611262e-06, + "loss": 0.0003, + "step": 3082 + }, + { + "epoch": 8.16, + "learning_rate": 9.343163538873995e-06, + "loss": 0.0476, + "step": 3083 + }, + { + "epoch": 8.16, + "learning_rate": 9.32975871313673e-06, + "loss": 0.0002, + "step": 3084 + }, + { + "epoch": 8.16, + "learning_rate": 9.316353887399465e-06, + "loss": 0.0004, + "step": 3085 + }, + { + "epoch": 8.16, + "learning_rate": 9.302949061662199e-06, + "loss": 0.0004, + "step": 3086 + }, + { + "epoch": 8.17, + "learning_rate": 9.289544235924934e-06, + "loss": 0.195, + "step": 3087 + }, + { + "epoch": 8.17, + "learning_rate": 9.276139410187667e-06, + "loss": 0.0258, + "step": 3088 + }, + { + "epoch": 8.17, + "learning_rate": 9.262734584450403e-06, + "loss": 0.0003, + "step": 3089 + }, + { + "epoch": 8.17, + "learning_rate": 9.249329758713138e-06, + "loss": 0.0582, + "step": 3090 + }, + { + "epoch": 8.18, + "learning_rate": 9.235924932975871e-06, + "loss": 0.0192, + "step": 3091 + }, + { + "epoch": 8.18, + "learning_rate": 9.222520107238606e-06, + "loss": 0.2512, + "step": 3092 + }, + { + "epoch": 8.18, + "learning_rate": 9.20911528150134e-06, + "loss": 0.0361, + "step": 3093 + }, + { + "epoch": 8.19, + "learning_rate": 9.195710455764075e-06, + "loss": 0.0003, + "step": 3094 + }, + { + "epoch": 8.19, + "learning_rate": 9.18230563002681e-06, + "loss": 0.0004, + "step": 3095 + }, + { + "epoch": 8.19, + "learning_rate": 9.168900804289545e-06, + "loss": 0.0002, + "step": 3096 + }, + { + "epoch": 8.19, + "learning_rate": 9.15549597855228e-06, + "loss": 0.0005, + "step": 3097 + }, + { + "epoch": 8.2, + "learning_rate": 9.142091152815014e-06, + "loss": 0.0427, + "step": 3098 + }, + { + "epoch": 8.2, + "learning_rate": 9.128686327077749e-06, + "loss": 0.0295, + "step": 3099 + }, + { + "epoch": 8.2, + "learning_rate": 9.115281501340484e-06, + "loss": 0.0144, + "step": 3100 + }, + { + "epoch": 8.2, + "learning_rate": 9.101876675603217e-06, + "loss": 0.001, + "step": 3101 + }, + { + "epoch": 8.21, + "learning_rate": 9.088471849865952e-06, + "loss": 0.0004, + "step": 3102 + }, + { + "epoch": 8.21, + "learning_rate": 9.075067024128686e-06, + "loss": 0.0005, + "step": 3103 + }, + { + "epoch": 8.21, + "learning_rate": 9.061662198391421e-06, + "loss": 0.1232, + "step": 3104 + }, + { + "epoch": 8.21, + "learning_rate": 9.048257372654156e-06, + "loss": 0.0004, + "step": 3105 + }, + { + "epoch": 8.22, + "learning_rate": 9.03485254691689e-06, + "loss": 0.004, + "step": 3106 + }, + { + "epoch": 8.22, + "learning_rate": 9.021447721179625e-06, + "loss": 0.0002, + "step": 3107 + }, + { + "epoch": 8.22, + "learning_rate": 9.00804289544236e-06, + "loss": 0.0067, + "step": 3108 + }, + { + "epoch": 8.22, + "learning_rate": 8.994638069705095e-06, + "loss": 0.0003, + "step": 3109 + }, + { + "epoch": 8.23, + "learning_rate": 8.98123324396783e-06, + "loss": 0.0344, + "step": 3110 + }, + { + "epoch": 8.23, + "learning_rate": 8.967828418230564e-06, + "loss": 0.0005, + "step": 3111 + }, + { + "epoch": 8.23, + "learning_rate": 8.954423592493299e-06, + "loss": 0.0029, + "step": 3112 + }, + { + "epoch": 8.24, + "learning_rate": 8.941018766756034e-06, + "loss": 0.0002, + "step": 3113 + }, + { + "epoch": 8.24, + "learning_rate": 8.927613941018767e-06, + "loss": 0.0003, + "step": 3114 + }, + { + "epoch": 8.24, + "learning_rate": 8.914209115281502e-06, + "loss": 0.0002, + "step": 3115 + }, + { + "epoch": 8.24, + "learning_rate": 8.900804289544236e-06, + "loss": 0.0197, + "step": 3116 + }, + { + "epoch": 8.25, + "learning_rate": 8.887399463806971e-06, + "loss": 0.0002, + "step": 3117 + }, + { + "epoch": 8.25, + "learning_rate": 8.873994638069706e-06, + "loss": 0.0003, + "step": 3118 + }, + { + "epoch": 8.25, + "learning_rate": 8.86058981233244e-06, + "loss": 0.097, + "step": 3119 + }, + { + "epoch": 8.25, + "learning_rate": 8.847184986595175e-06, + "loss": 0.0014, + "step": 3120 + }, + { + "epoch": 8.26, + "learning_rate": 8.833780160857908e-06, + "loss": 0.0004, + "step": 3121 + }, + { + "epoch": 8.26, + "learning_rate": 8.820375335120643e-06, + "loss": 0.0005, + "step": 3122 + }, + { + "epoch": 8.26, + "learning_rate": 8.806970509383378e-06, + "loss": 0.0814, + "step": 3123 + }, + { + "epoch": 8.26, + "learning_rate": 8.793565683646113e-06, + "loss": 0.0024, + "step": 3124 + }, + { + "epoch": 8.27, + "learning_rate": 8.780160857908849e-06, + "loss": 0.0003, + "step": 3125 + }, + { + "epoch": 8.27, + "learning_rate": 8.766756032171582e-06, + "loss": 0.0001, + "step": 3126 + }, + { + "epoch": 8.27, + "learning_rate": 8.753351206434317e-06, + "loss": 0.0003, + "step": 3127 + }, + { + "epoch": 8.28, + "learning_rate": 8.739946380697052e-06, + "loss": 0.3459, + "step": 3128 + }, + { + "epoch": 8.28, + "learning_rate": 8.726541554959786e-06, + "loss": 0.0639, + "step": 3129 + }, + { + "epoch": 8.28, + "learning_rate": 8.71313672922252e-06, + "loss": 0.001, + "step": 3130 + }, + { + "epoch": 8.28, + "learning_rate": 8.699731903485254e-06, + "loss": 0.0073, + "step": 3131 + }, + { + "epoch": 8.29, + "learning_rate": 8.68632707774799e-06, + "loss": 0.0002, + "step": 3132 + }, + { + "epoch": 8.29, + "learning_rate": 8.672922252010725e-06, + "loss": 0.0008, + "step": 3133 + }, + { + "epoch": 8.29, + "learning_rate": 8.659517426273458e-06, + "loss": 0.0001, + "step": 3134 + }, + { + "epoch": 8.29, + "learning_rate": 8.646112600536193e-06, + "loss": 0.0002, + "step": 3135 + }, + { + "epoch": 8.3, + "learning_rate": 8.632707774798928e-06, + "loss": 0.0028, + "step": 3136 + }, + { + "epoch": 8.3, + "learning_rate": 8.619302949061663e-06, + "loss": 0.02, + "step": 3137 + }, + { + "epoch": 8.3, + "learning_rate": 8.605898123324398e-06, + "loss": 0.046, + "step": 3138 + }, + { + "epoch": 8.3, + "learning_rate": 8.592493297587132e-06, + "loss": 0.0002, + "step": 3139 + }, + { + "epoch": 8.31, + "learning_rate": 8.579088471849867e-06, + "loss": 0.0002, + "step": 3140 + }, + { + "epoch": 8.31, + "learning_rate": 8.5656836461126e-06, + "loss": 0.0695, + "step": 3141 + }, + { + "epoch": 8.31, + "learning_rate": 8.552278820375336e-06, + "loss": 0.1764, + "step": 3142 + }, + { + "epoch": 8.31, + "learning_rate": 8.53887399463807e-06, + "loss": 0.0002, + "step": 3143 + }, + { + "epoch": 8.32, + "learning_rate": 8.525469168900804e-06, + "loss": 0.0004, + "step": 3144 + }, + { + "epoch": 8.32, + "learning_rate": 8.51206434316354e-06, + "loss": 0.0207, + "step": 3145 + }, + { + "epoch": 8.32, + "learning_rate": 8.498659517426274e-06, + "loss": 0.0003, + "step": 3146 + }, + { + "epoch": 8.33, + "learning_rate": 8.485254691689008e-06, + "loss": 0.1444, + "step": 3147 + }, + { + "epoch": 8.33, + "learning_rate": 8.471849865951743e-06, + "loss": 0.0006, + "step": 3148 + }, + { + "epoch": 8.33, + "learning_rate": 8.458445040214476e-06, + "loss": 0.0002, + "step": 3149 + }, + { + "epoch": 8.33, + "learning_rate": 8.445040214477212e-06, + "loss": 0.0003, + "step": 3150 + }, + { + "epoch": 8.34, + "learning_rate": 8.431635388739947e-06, + "loss": 0.0033, + "step": 3151 + }, + { + "epoch": 8.34, + "learning_rate": 8.418230563002682e-06, + "loss": 0.0001, + "step": 3152 + }, + { + "epoch": 8.34, + "learning_rate": 8.404825737265417e-06, + "loss": 0.0002, + "step": 3153 + }, + { + "epoch": 8.34, + "learning_rate": 8.39142091152815e-06, + "loss": 0.0003, + "step": 3154 + }, + { + "epoch": 8.35, + "learning_rate": 8.378016085790886e-06, + "loss": 0.0003, + "step": 3155 + }, + { + "epoch": 8.35, + "learning_rate": 8.36461126005362e-06, + "loss": 0.0002, + "step": 3156 + }, + { + "epoch": 8.35, + "learning_rate": 8.351206434316354e-06, + "loss": 0.0003, + "step": 3157 + }, + { + "epoch": 8.35, + "learning_rate": 8.33780160857909e-06, + "loss": 0.0022, + "step": 3158 + }, + { + "epoch": 8.36, + "learning_rate": 8.324396782841823e-06, + "loss": 0.0094, + "step": 3159 + }, + { + "epoch": 8.36, + "learning_rate": 8.310991957104558e-06, + "loss": 0.039, + "step": 3160 + }, + { + "epoch": 8.36, + "learning_rate": 8.297587131367293e-06, + "loss": 0.0623, + "step": 3161 + }, + { + "epoch": 8.37, + "learning_rate": 8.284182305630026e-06, + "loss": 0.0269, + "step": 3162 + }, + { + "epoch": 8.37, + "learning_rate": 8.270777479892762e-06, + "loss": 0.2292, + "step": 3163 + }, + { + "epoch": 8.37, + "learning_rate": 8.257372654155495e-06, + "loss": 0.0814, + "step": 3164 + }, + { + "epoch": 8.37, + "learning_rate": 8.24396782841823e-06, + "loss": 0.0002, + "step": 3165 + }, + { + "epoch": 8.38, + "learning_rate": 8.230563002680965e-06, + "loss": 0.0006, + "step": 3166 + }, + { + "epoch": 8.38, + "learning_rate": 8.2171581769437e-06, + "loss": 0.0405, + "step": 3167 + }, + { + "epoch": 8.38, + "learning_rate": 8.203753351206435e-06, + "loss": 0.3745, + "step": 3168 + }, + { + "epoch": 8.38, + "learning_rate": 8.190348525469169e-06, + "loss": 0.0002, + "step": 3169 + }, + { + "epoch": 8.39, + "learning_rate": 8.176943699731904e-06, + "loss": 0.0007, + "step": 3170 + }, + { + "epoch": 8.39, + "learning_rate": 8.16353887399464e-06, + "loss": 0.0054, + "step": 3171 + }, + { + "epoch": 8.39, + "learning_rate": 8.150134048257373e-06, + "loss": 0.0027, + "step": 3172 + }, + { + "epoch": 8.39, + "learning_rate": 8.136729222520108e-06, + "loss": 0.0823, + "step": 3173 + }, + { + "epoch": 8.4, + "learning_rate": 8.123324396782843e-06, + "loss": 0.2821, + "step": 3174 + }, + { + "epoch": 8.4, + "learning_rate": 8.109919571045576e-06, + "loss": 0.2796, + "step": 3175 + }, + { + "epoch": 8.4, + "learning_rate": 8.096514745308311e-06, + "loss": 0.0004, + "step": 3176 + }, + { + "epoch": 8.4, + "learning_rate": 8.083109919571045e-06, + "loss": 0.0019, + "step": 3177 + }, + { + "epoch": 8.41, + "learning_rate": 8.06970509383378e-06, + "loss": 0.0003, + "step": 3178 + }, + { + "epoch": 8.41, + "learning_rate": 8.056300268096515e-06, + "loss": 0.0331, + "step": 3179 + }, + { + "epoch": 8.41, + "learning_rate": 8.04289544235925e-06, + "loss": 0.0002, + "step": 3180 + }, + { + "epoch": 8.42, + "learning_rate": 8.029490616621985e-06, + "loss": 0.002, + "step": 3181 + }, + { + "epoch": 8.42, + "learning_rate": 8.016085790884719e-06, + "loss": 0.0055, + "step": 3182 + }, + { + "epoch": 8.42, + "learning_rate": 8.002680965147454e-06, + "loss": 0.0002, + "step": 3183 + }, + { + "epoch": 8.42, + "learning_rate": 7.989276139410189e-06, + "loss": 0.0252, + "step": 3184 + }, + { + "epoch": 8.43, + "learning_rate": 7.975871313672923e-06, + "loss": 0.0005, + "step": 3185 + }, + { + "epoch": 8.43, + "learning_rate": 7.962466487935658e-06, + "loss": 0.0309, + "step": 3186 + }, + { + "epoch": 8.43, + "learning_rate": 7.949061662198391e-06, + "loss": 0.4315, + "step": 3187 + }, + { + "epoch": 8.43, + "learning_rate": 7.935656836461126e-06, + "loss": 0.0018, + "step": 3188 + }, + { + "epoch": 8.44, + "learning_rate": 7.922252010723861e-06, + "loss": 0.0973, + "step": 3189 + }, + { + "epoch": 8.44, + "learning_rate": 7.908847184986595e-06, + "loss": 0.0002, + "step": 3190 + }, + { + "epoch": 8.44, + "learning_rate": 7.89544235924933e-06, + "loss": 0.0123, + "step": 3191 + }, + { + "epoch": 8.44, + "learning_rate": 7.882037533512063e-06, + "loss": 0.0005, + "step": 3192 + }, + { + "epoch": 8.45, + "learning_rate": 7.868632707774798e-06, + "loss": 0.0002, + "step": 3193 + }, + { + "epoch": 8.45, + "learning_rate": 7.855227882037534e-06, + "loss": 0.002, + "step": 3194 + }, + { + "epoch": 8.45, + "learning_rate": 7.841823056300269e-06, + "loss": 0.0002, + "step": 3195 + }, + { + "epoch": 8.46, + "learning_rate": 7.828418230563004e-06, + "loss": 0.0006, + "step": 3196 + }, + { + "epoch": 8.46, + "learning_rate": 7.815013404825737e-06, + "loss": 0.0669, + "step": 3197 + }, + { + "epoch": 8.46, + "learning_rate": 7.801608579088472e-06, + "loss": 0.0002, + "step": 3198 + }, + { + "epoch": 8.46, + "learning_rate": 7.788203753351208e-06, + "loss": 0.0052, + "step": 3199 + }, + { + "epoch": 8.47, + "learning_rate": 7.774798927613941e-06, + "loss": 0.1126, + "step": 3200 + }, + { + "epoch": 8.47, + "learning_rate": 7.761394101876676e-06, + "loss": 0.0362, + "step": 3201 + }, + { + "epoch": 8.47, + "learning_rate": 7.74798927613941e-06, + "loss": 0.0002, + "step": 3202 + }, + { + "epoch": 8.47, + "learning_rate": 7.734584450402145e-06, + "loss": 0.1147, + "step": 3203 + }, + { + "epoch": 8.48, + "learning_rate": 7.72117962466488e-06, + "loss": 0.0002, + "step": 3204 + }, + { + "epoch": 8.48, + "learning_rate": 7.707774798927613e-06, + "loss": 0.323, + "step": 3205 + }, + { + "epoch": 8.48, + "learning_rate": 7.694369973190348e-06, + "loss": 0.0019, + "step": 3206 + }, + { + "epoch": 8.48, + "learning_rate": 7.680965147453084e-06, + "loss": 0.0002, + "step": 3207 + }, + { + "epoch": 8.49, + "learning_rate": 7.667560321715819e-06, + "loss": 0.0003, + "step": 3208 + }, + { + "epoch": 8.49, + "learning_rate": 7.654155495978554e-06, + "loss": 0.0313, + "step": 3209 + }, + { + "epoch": 8.49, + "learning_rate": 7.640750670241287e-06, + "loss": 0.0002, + "step": 3210 + }, + { + "epoch": 8.49, + "learning_rate": 7.6273458445040215e-06, + "loss": 0.0011, + "step": 3211 + }, + { + "epoch": 8.5, + "learning_rate": 7.613941018766757e-06, + "loss": 0.0007, + "step": 3212 + }, + { + "epoch": 8.5, + "learning_rate": 7.600536193029491e-06, + "loss": 0.0003, + "step": 3213 + }, + { + "epoch": 8.5, + "learning_rate": 7.587131367292226e-06, + "loss": 0.0002, + "step": 3214 + }, + { + "epoch": 8.51, + "learning_rate": 7.5737265415549595e-06, + "loss": 0.0002, + "step": 3215 + }, + { + "epoch": 8.51, + "learning_rate": 7.560321715817695e-06, + "loss": 0.0002, + "step": 3216 + }, + { + "epoch": 8.51, + "learning_rate": 7.54691689008043e-06, + "loss": 0.029, + "step": 3217 + }, + { + "epoch": 8.51, + "learning_rate": 7.533512064343164e-06, + "loss": 0.0009, + "step": 3218 + }, + { + "epoch": 8.52, + "learning_rate": 7.520107238605899e-06, + "loss": 0.3479, + "step": 3219 + }, + { + "epoch": 8.52, + "learning_rate": 7.506702412868633e-06, + "loss": 0.0002, + "step": 3220 + }, + { + "epoch": 8.52, + "learning_rate": 7.493297587131368e-06, + "loss": 0.0013, + "step": 3221 + }, + { + "epoch": 8.52, + "learning_rate": 7.479892761394103e-06, + "loss": 0.0263, + "step": 3222 + }, + { + "epoch": 8.53, + "learning_rate": 7.466487935656836e-06, + "loss": 0.0676, + "step": 3223 + }, + { + "epoch": 8.53, + "learning_rate": 7.4530831099195715e-06, + "loss": 0.0002, + "step": 3224 + }, + { + "epoch": 8.53, + "learning_rate": 7.439678284182306e-06, + "loss": 0.0002, + "step": 3225 + }, + { + "epoch": 8.53, + "learning_rate": 7.426273458445041e-06, + "loss": 0.0007, + "step": 3226 + }, + { + "epoch": 8.54, + "learning_rate": 7.412868632707776e-06, + "loss": 0.1148, + "step": 3227 + }, + { + "epoch": 8.54, + "learning_rate": 7.3994638069705094e-06, + "loss": 0.0002, + "step": 3228 + }, + { + "epoch": 8.54, + "learning_rate": 7.3860589812332446e-06, + "loss": 0.0929, + "step": 3229 + }, + { + "epoch": 8.54, + "learning_rate": 7.372654155495978e-06, + "loss": 0.0002, + "step": 3230 + }, + { + "epoch": 8.55, + "learning_rate": 7.359249329758713e-06, + "loss": 0.0282, + "step": 3231 + }, + { + "epoch": 8.55, + "learning_rate": 7.345844504021448e-06, + "loss": 0.1421, + "step": 3232 + }, + { + "epoch": 8.55, + "learning_rate": 7.3324396782841825e-06, + "loss": 0.0002, + "step": 3233 + }, + { + "epoch": 8.56, + "learning_rate": 7.319034852546918e-06, + "loss": 0.0175, + "step": 3234 + }, + { + "epoch": 8.56, + "learning_rate": 7.305630026809651e-06, + "loss": 0.0763, + "step": 3235 + }, + { + "epoch": 8.56, + "learning_rate": 7.292225201072386e-06, + "loss": 0.0003, + "step": 3236 + }, + { + "epoch": 8.56, + "learning_rate": 7.278820375335121e-06, + "loss": 0.0522, + "step": 3237 + }, + { + "epoch": 8.57, + "learning_rate": 7.265415549597855e-06, + "loss": 0.0264, + "step": 3238 + }, + { + "epoch": 8.57, + "learning_rate": 7.25201072386059e-06, + "loss": 0.0007, + "step": 3239 + }, + { + "epoch": 8.57, + "learning_rate": 7.238605898123325e-06, + "loss": 0.0002, + "step": 3240 + }, + { + "epoch": 8.57, + "learning_rate": 7.225201072386059e-06, + "loss": 0.0433, + "step": 3241 + }, + { + "epoch": 8.58, + "learning_rate": 7.2117962466487945e-06, + "loss": 0.0897, + "step": 3242 + }, + { + "epoch": 8.58, + "learning_rate": 7.198391420911528e-06, + "loss": 0.0601, + "step": 3243 + }, + { + "epoch": 8.58, + "learning_rate": 7.184986595174263e-06, + "loss": 0.0084, + "step": 3244 + }, + { + "epoch": 8.58, + "learning_rate": 7.171581769436998e-06, + "loss": 0.0224, + "step": 3245 + }, + { + "epoch": 8.59, + "learning_rate": 7.158176943699732e-06, + "loss": 0.0022, + "step": 3246 + }, + { + "epoch": 8.59, + "learning_rate": 7.144772117962467e-06, + "loss": 0.0019, + "step": 3247 + }, + { + "epoch": 8.59, + "learning_rate": 7.131367292225201e-06, + "loss": 0.0003, + "step": 3248 + }, + { + "epoch": 8.6, + "learning_rate": 7.117962466487936e-06, + "loss": 0.0002, + "step": 3249 + }, + { + "epoch": 8.6, + "learning_rate": 7.104557640750671e-06, + "loss": 0.0003, + "step": 3250 + }, + { + "epoch": 8.6, + "learning_rate": 7.091152815013405e-06, + "loss": 0.1219, + "step": 3251 + }, + { + "epoch": 8.6, + "learning_rate": 7.07774798927614e-06, + "loss": 0.0246, + "step": 3252 + }, + { + "epoch": 8.61, + "learning_rate": 7.064343163538874e-06, + "loss": 0.0005, + "step": 3253 + }, + { + "epoch": 8.61, + "learning_rate": 7.050938337801609e-06, + "loss": 0.0004, + "step": 3254 + }, + { + "epoch": 8.61, + "learning_rate": 7.037533512064344e-06, + "loss": 0.0002, + "step": 3255 + }, + { + "epoch": 8.61, + "learning_rate": 7.024128686327078e-06, + "loss": 0.0722, + "step": 3256 + }, + { + "epoch": 8.62, + "learning_rate": 7.010723860589813e-06, + "loss": 0.0006, + "step": 3257 + }, + { + "epoch": 8.62, + "learning_rate": 6.997319034852546e-06, + "loss": 0.0775, + "step": 3258 + }, + { + "epoch": 8.62, + "learning_rate": 6.9839142091152815e-06, + "loss": 0.1329, + "step": 3259 + }, + { + "epoch": 8.62, + "learning_rate": 6.970509383378017e-06, + "loss": 0.0318, + "step": 3260 + }, + { + "epoch": 8.63, + "learning_rate": 6.957104557640751e-06, + "loss": 0.0026, + "step": 3261 + }, + { + "epoch": 8.63, + "learning_rate": 6.943699731903486e-06, + "loss": 0.0119, + "step": 3262 + }, + { + "epoch": 8.63, + "learning_rate": 6.9302949061662195e-06, + "loss": 0.0194, + "step": 3263 + }, + { + "epoch": 8.63, + "learning_rate": 6.916890080428955e-06, + "loss": 0.1102, + "step": 3264 + }, + { + "epoch": 8.64, + "learning_rate": 6.90348525469169e-06, + "loss": 0.0002, + "step": 3265 + }, + { + "epoch": 8.64, + "learning_rate": 6.890080428954423e-06, + "loss": 0.0322, + "step": 3266 + }, + { + "epoch": 8.64, + "learning_rate": 6.876675603217158e-06, + "loss": 0.0083, + "step": 3267 + }, + { + "epoch": 8.65, + "learning_rate": 6.8632707774798935e-06, + "loss": 0.0947, + "step": 3268 + }, + { + "epoch": 8.65, + "learning_rate": 6.849865951742628e-06, + "loss": 0.0002, + "step": 3269 + }, + { + "epoch": 8.65, + "learning_rate": 6.836461126005363e-06, + "loss": 0.0039, + "step": 3270 + }, + { + "epoch": 8.65, + "learning_rate": 6.823056300268096e-06, + "loss": 0.106, + "step": 3271 + }, + { + "epoch": 8.66, + "learning_rate": 6.8096514745308315e-06, + "loss": 0.0107, + "step": 3272 + }, + { + "epoch": 8.66, + "learning_rate": 6.796246648793567e-06, + "loss": 0.0005, + "step": 3273 + }, + { + "epoch": 8.66, + "learning_rate": 6.7828418230563e-06, + "loss": 0.0066, + "step": 3274 + }, + { + "epoch": 8.66, + "learning_rate": 6.769436997319035e-06, + "loss": 0.0003, + "step": 3275 + }, + { + "epoch": 8.67, + "learning_rate": 6.7560321715817694e-06, + "loss": 0.0002, + "step": 3276 + }, + { + "epoch": 8.67, + "learning_rate": 6.742627345844505e-06, + "loss": 0.0008, + "step": 3277 + }, + { + "epoch": 8.67, + "learning_rate": 6.72922252010724e-06, + "loss": 0.0002, + "step": 3278 + }, + { + "epoch": 8.67, + "learning_rate": 6.715817694369973e-06, + "loss": 0.0002, + "step": 3279 + }, + { + "epoch": 8.68, + "learning_rate": 6.702412868632708e-06, + "loss": 0.0472, + "step": 3280 + }, + { + "epoch": 8.68, + "learning_rate": 6.6890080428954426e-06, + "loss": 0.0003, + "step": 3281 + }, + { + "epoch": 8.68, + "learning_rate": 6.675603217158178e-06, + "loss": 0.196, + "step": 3282 + }, + { + "epoch": 8.69, + "learning_rate": 6.662198391420913e-06, + "loss": 0.0351, + "step": 3283 + }, + { + "epoch": 8.69, + "learning_rate": 6.648793565683646e-06, + "loss": 0.0002, + "step": 3284 + }, + { + "epoch": 8.69, + "learning_rate": 6.635388739946381e-06, + "loss": 0.1151, + "step": 3285 + }, + { + "epoch": 8.69, + "learning_rate": 6.621983914209115e-06, + "loss": 0.0057, + "step": 3286 + }, + { + "epoch": 8.7, + "learning_rate": 6.60857908847185e-06, + "loss": 0.0002, + "step": 3287 + }, + { + "epoch": 8.7, + "learning_rate": 6.595174262734585e-06, + "loss": 0.0002, + "step": 3288 + }, + { + "epoch": 8.7, + "learning_rate": 6.581769436997319e-06, + "loss": 0.0774, + "step": 3289 + }, + { + "epoch": 8.7, + "learning_rate": 6.5683646112600545e-06, + "loss": 0.0004, + "step": 3290 + }, + { + "epoch": 8.71, + "learning_rate": 6.554959785522788e-06, + "loss": 0.4666, + "step": 3291 + }, + { + "epoch": 8.71, + "learning_rate": 6.541554959785523e-06, + "loss": 0.0003, + "step": 3292 + }, + { + "epoch": 8.71, + "learning_rate": 6.528150134048258e-06, + "loss": 0.0002, + "step": 3293 + }, + { + "epoch": 8.71, + "learning_rate": 6.514745308310992e-06, + "loss": 0.003, + "step": 3294 + }, + { + "epoch": 8.72, + "learning_rate": 6.501340482573727e-06, + "loss": 0.0009, + "step": 3295 + }, + { + "epoch": 8.72, + "learning_rate": 6.487935656836461e-06, + "loss": 0.0255, + "step": 3296 + }, + { + "epoch": 8.72, + "learning_rate": 6.474530831099196e-06, + "loss": 0.026, + "step": 3297 + }, + { + "epoch": 8.72, + "learning_rate": 6.461126005361931e-06, + "loss": 0.0011, + "step": 3298 + }, + { + "epoch": 8.73, + "learning_rate": 6.447721179624665e-06, + "loss": 0.0003, + "step": 3299 + }, + { + "epoch": 8.73, + "learning_rate": 6.4343163538874e-06, + "loss": 0.0001, + "step": 3300 + }, + { + "epoch": 8.73, + "learning_rate": 6.420911528150135e-06, + "loss": 0.0002, + "step": 3301 + }, + { + "epoch": 8.74, + "learning_rate": 6.4075067024128684e-06, + "loss": 0.0374, + "step": 3302 + }, + { + "epoch": 8.74, + "learning_rate": 6.3941018766756036e-06, + "loss": 0.0003, + "step": 3303 + }, + { + "epoch": 8.74, + "learning_rate": 6.380697050938338e-06, + "loss": 0.0003, + "step": 3304 + }, + { + "epoch": 8.74, + "learning_rate": 6.367292225201073e-06, + "loss": 0.0003, + "step": 3305 + }, + { + "epoch": 8.75, + "learning_rate": 6.353887399463808e-06, + "loss": 0.0002, + "step": 3306 + }, + { + "epoch": 8.75, + "learning_rate": 6.3404825737265416e-06, + "loss": 0.0002, + "step": 3307 + }, + { + "epoch": 8.75, + "learning_rate": 6.327077747989277e-06, + "loss": 0.0003, + "step": 3308 + }, + { + "epoch": 8.75, + "learning_rate": 6.31367292225201e-06, + "loss": 0.0002, + "step": 3309 + }, + { + "epoch": 8.76, + "learning_rate": 6.300268096514745e-06, + "loss": 0.0005, + "step": 3310 + }, + { + "epoch": 8.76, + "learning_rate": 6.28686327077748e-06, + "loss": 0.0003, + "step": 3311 + }, + { + "epoch": 8.76, + "learning_rate": 6.273458445040215e-06, + "loss": 0.0002, + "step": 3312 + }, + { + "epoch": 8.76, + "learning_rate": 6.26005361930295e-06, + "loss": 0.0848, + "step": 3313 + }, + { + "epoch": 8.77, + "learning_rate": 6.246648793565684e-06, + "loss": 0.0002, + "step": 3314 + }, + { + "epoch": 8.77, + "learning_rate": 6.233243967828418e-06, + "loss": 0.021, + "step": 3315 + }, + { + "epoch": 8.77, + "learning_rate": 6.219839142091153e-06, + "loss": 0.2761, + "step": 3316 + }, + { + "epoch": 8.78, + "learning_rate": 6.206434316353888e-06, + "loss": 0.0002, + "step": 3317 + }, + { + "epoch": 8.78, + "learning_rate": 6.193029490616623e-06, + "loss": 0.0309, + "step": 3318 + }, + { + "epoch": 8.78, + "learning_rate": 6.179624664879357e-06, + "loss": 0.0004, + "step": 3319 + }, + { + "epoch": 8.78, + "learning_rate": 6.1662198391420915e-06, + "loss": 0.0003, + "step": 3320 + }, + { + "epoch": 8.79, + "learning_rate": 6.152815013404826e-06, + "loss": 0.0059, + "step": 3321 + }, + { + "epoch": 8.79, + "learning_rate": 6.139410187667561e-06, + "loss": 0.0525, + "step": 3322 + }, + { + "epoch": 8.79, + "learning_rate": 6.126005361930295e-06, + "loss": 0.0002, + "step": 3323 + }, + { + "epoch": 8.79, + "learning_rate": 6.1126005361930295e-06, + "loss": 0.0002, + "step": 3324 + }, + { + "epoch": 8.8, + "learning_rate": 6.099195710455765e-06, + "loss": 0.0003, + "step": 3325 + }, + { + "epoch": 8.8, + "learning_rate": 6.085790884718499e-06, + "loss": 0.0026, + "step": 3326 + }, + { + "epoch": 8.8, + "learning_rate": 6.072386058981234e-06, + "loss": 0.0003, + "step": 3327 + }, + { + "epoch": 8.8, + "learning_rate": 6.058981233243968e-06, + "loss": 0.0137, + "step": 3328 + }, + { + "epoch": 8.81, + "learning_rate": 6.0455764075067026e-06, + "loss": 0.0003, + "step": 3329 + }, + { + "epoch": 8.81, + "learning_rate": 6.032171581769437e-06, + "loss": 0.0002, + "step": 3330 + }, + { + "epoch": 8.81, + "learning_rate": 6.018766756032172e-06, + "loss": 0.0003, + "step": 3331 + }, + { + "epoch": 8.81, + "learning_rate": 6.005361930294907e-06, + "loss": 0.0003, + "step": 3332 + }, + { + "epoch": 8.82, + "learning_rate": 5.991957104557641e-06, + "loss": 0.5064, + "step": 3333 + }, + { + "epoch": 8.82, + "learning_rate": 5.978552278820376e-06, + "loss": 0.0003, + "step": 3334 + }, + { + "epoch": 8.82, + "learning_rate": 5.96514745308311e-06, + "loss": 0.0064, + "step": 3335 + }, + { + "epoch": 8.83, + "learning_rate": 5.951742627345844e-06, + "loss": 0.0001, + "step": 3336 + }, + { + "epoch": 8.83, + "learning_rate": 5.938337801608579e-06, + "loss": 0.0003, + "step": 3337 + }, + { + "epoch": 8.83, + "learning_rate": 5.924932975871314e-06, + "loss": 0.0002, + "step": 3338 + }, + { + "epoch": 8.83, + "learning_rate": 5.911528150134049e-06, + "loss": 0.0004, + "step": 3339 + }, + { + "epoch": 8.84, + "learning_rate": 5.898123324396783e-06, + "loss": 0.0005, + "step": 3340 + }, + { + "epoch": 8.84, + "learning_rate": 5.884718498659518e-06, + "loss": 0.1297, + "step": 3341 + }, + { + "epoch": 8.84, + "learning_rate": 5.8713136729222525e-06, + "loss": 0.0324, + "step": 3342 + }, + { + "epoch": 8.84, + "learning_rate": 5.857908847184987e-06, + "loss": 0.0208, + "step": 3343 + }, + { + "epoch": 8.85, + "learning_rate": 5.844504021447721e-06, + "loss": 0.065, + "step": 3344 + }, + { + "epoch": 8.85, + "learning_rate": 5.831099195710455e-06, + "loss": 0.0323, + "step": 3345 + }, + { + "epoch": 8.85, + "learning_rate": 5.8176943699731905e-06, + "loss": 0.1872, + "step": 3346 + }, + { + "epoch": 8.85, + "learning_rate": 5.804289544235926e-06, + "loss": 0.0251, + "step": 3347 + }, + { + "epoch": 8.86, + "learning_rate": 5.79088471849866e-06, + "loss": 0.0002, + "step": 3348 + }, + { + "epoch": 8.86, + "learning_rate": 5.777479892761394e-06, + "loss": 0.1384, + "step": 3349 + }, + { + "epoch": 8.86, + "learning_rate": 5.7640750670241285e-06, + "loss": 0.0006, + "step": 3350 + }, + { + "epoch": 8.87, + "learning_rate": 5.750670241286864e-06, + "loss": 0.0004, + "step": 3351 + }, + { + "epoch": 8.87, + "learning_rate": 5.737265415549598e-06, + "loss": 0.0121, + "step": 3352 + }, + { + "epoch": 8.87, + "learning_rate": 5.723860589812333e-06, + "loss": 0.0007, + "step": 3353 + }, + { + "epoch": 8.87, + "learning_rate": 5.710455764075067e-06, + "loss": 0.0003, + "step": 3354 + }, + { + "epoch": 8.88, + "learning_rate": 5.697050938337802e-06, + "loss": 0.0003, + "step": 3355 + }, + { + "epoch": 8.88, + "learning_rate": 5.683646112600537e-06, + "loss": 0.009, + "step": 3356 + }, + { + "epoch": 8.88, + "learning_rate": 5.670241286863271e-06, + "loss": 0.0004, + "step": 3357 + }, + { + "epoch": 8.88, + "learning_rate": 5.656836461126005e-06, + "loss": 0.0009, + "step": 3358 + }, + { + "epoch": 8.89, + "learning_rate": 5.6434316353887395e-06, + "loss": 0.0005, + "step": 3359 + }, + { + "epoch": 8.89, + "learning_rate": 5.630026809651475e-06, + "loss": 0.0002, + "step": 3360 + }, + { + "epoch": 8.89, + "learning_rate": 5.61662198391421e-06, + "loss": 0.0319, + "step": 3361 + }, + { + "epoch": 8.89, + "learning_rate": 5.603217158176944e-06, + "loss": 0.0955, + "step": 3362 + }, + { + "epoch": 8.9, + "learning_rate": 5.589812332439678e-06, + "loss": 0.0706, + "step": 3363 + }, + { + "epoch": 8.9, + "learning_rate": 5.576407506702413e-06, + "loss": 0.0072, + "step": 3364 + }, + { + "epoch": 8.9, + "learning_rate": 5.563002680965148e-06, + "loss": 0.0002, + "step": 3365 + }, + { + "epoch": 8.9, + "learning_rate": 5.549597855227882e-06, + "loss": 0.0002, + "step": 3366 + }, + { + "epoch": 8.91, + "learning_rate": 5.536193029490617e-06, + "loss": 0.0018, + "step": 3367 + }, + { + "epoch": 8.91, + "learning_rate": 5.5227882037533515e-06, + "loss": 0.0002, + "step": 3368 + }, + { + "epoch": 8.91, + "learning_rate": 5.509383378016087e-06, + "loss": 0.0002, + "step": 3369 + }, + { + "epoch": 8.92, + "learning_rate": 5.495978552278821e-06, + "loss": 0.0001, + "step": 3370 + }, + { + "epoch": 8.92, + "learning_rate": 5.482573726541555e-06, + "loss": 0.0414, + "step": 3371 + }, + { + "epoch": 8.92, + "learning_rate": 5.4691689008042895e-06, + "loss": 0.0107, + "step": 3372 + }, + { + "epoch": 8.92, + "learning_rate": 5.455764075067024e-06, + "loss": 0.0252, + "step": 3373 + }, + { + "epoch": 8.93, + "learning_rate": 5.442359249329759e-06, + "loss": 0.0918, + "step": 3374 + }, + { + "epoch": 8.93, + "learning_rate": 5.428954423592494e-06, + "loss": 0.0016, + "step": 3375 + }, + { + "epoch": 8.93, + "learning_rate": 5.415549597855228e-06, + "loss": 0.0001, + "step": 3376 + }, + { + "epoch": 8.93, + "learning_rate": 5.402144772117963e-06, + "loss": 0.0002, + "step": 3377 + }, + { + "epoch": 8.94, + "learning_rate": 5.388739946380697e-06, + "loss": 0.0003, + "step": 3378 + }, + { + "epoch": 8.94, + "learning_rate": 5.375335120643432e-06, + "loss": 0.0002, + "step": 3379 + }, + { + "epoch": 8.94, + "learning_rate": 5.361930294906166e-06, + "loss": 0.0722, + "step": 3380 + }, + { + "epoch": 8.94, + "learning_rate": 5.348525469168901e-06, + "loss": 0.0002, + "step": 3381 + }, + { + "epoch": 8.95, + "learning_rate": 5.335120643431636e-06, + "loss": 0.0002, + "step": 3382 + }, + { + "epoch": 8.95, + "learning_rate": 5.32171581769437e-06, + "loss": 0.0001, + "step": 3383 + }, + { + "epoch": 8.95, + "learning_rate": 5.308310991957105e-06, + "loss": 0.0002, + "step": 3384 + }, + { + "epoch": 8.96, + "learning_rate": 5.294906166219839e-06, + "loss": 0.1, + "step": 3385 + }, + { + "epoch": 8.96, + "learning_rate": 5.281501340482574e-06, + "loss": 0.1151, + "step": 3386 + }, + { + "epoch": 8.96, + "learning_rate": 5.268096514745308e-06, + "loss": 0.0003, + "step": 3387 + }, + { + "epoch": 8.96, + "learning_rate": 5.254691689008043e-06, + "loss": 0.0001, + "step": 3388 + }, + { + "epoch": 8.97, + "learning_rate": 5.241286863270778e-06, + "loss": 0.0003, + "step": 3389 + }, + { + "epoch": 8.97, + "learning_rate": 5.2278820375335125e-06, + "loss": 0.0002, + "step": 3390 + }, + { + "epoch": 8.97, + "learning_rate": 5.214477211796247e-06, + "loss": 0.0896, + "step": 3391 + }, + { + "epoch": 8.97, + "learning_rate": 5.201072386058981e-06, + "loss": 0.0002, + "step": 3392 + }, + { + "epoch": 8.98, + "learning_rate": 5.187667560321716e-06, + "loss": 0.0003, + "step": 3393 + }, + { + "epoch": 8.98, + "learning_rate": 5.1742627345844505e-06, + "loss": 0.0003, + "step": 3394 + }, + { + "epoch": 8.98, + "learning_rate": 5.160857908847186e-06, + "loss": 0.0008, + "step": 3395 + }, + { + "epoch": 8.98, + "learning_rate": 5.14745308310992e-06, + "loss": 0.4041, + "step": 3396 + }, + { + "epoch": 8.99, + "learning_rate": 5.134048257372654e-06, + "loss": 0.0253, + "step": 3397 + }, + { + "epoch": 8.99, + "learning_rate": 5.120643431635389e-06, + "loss": 0.0355, + "step": 3398 + }, + { + "epoch": 8.99, + "learning_rate": 5.107238605898124e-06, + "loss": 0.0771, + "step": 3399 + }, + { + "epoch": 8.99, + "learning_rate": 5.093833780160858e-06, + "loss": 0.2133, + "step": 3400 + }, + { + "epoch": 9.0, + "learning_rate": 5.080428954423592e-06, + "loss": 0.0002, + "step": 3401 + }, + { + "epoch": 9.0, + "learning_rate": 5.067024128686327e-06, + "loss": 0.0022, + "step": 3402 + }, + { + "epoch": 9.0, + "eval_f1": 0.774885145482389, + "eval_loss": 1.3973581790924072, + "eval_runtime": 1.8849, + "eval_samples_per_second": 802.683, + "eval_steps_per_second": 50.4, + "step": 3402 + }, + { + "epoch": 9.0, + "learning_rate": 5.0536193029490624e-06, + "loss": 0.0002, + "step": 3403 + }, + { + "epoch": 9.01, + "learning_rate": 5.040214477211797e-06, + "loss": 0.1601, + "step": 3404 + }, + { + "epoch": 9.01, + "learning_rate": 5.026809651474531e-06, + "loss": 0.0013, + "step": 3405 + }, + { + "epoch": 9.01, + "learning_rate": 5.013404825737265e-06, + "loss": 0.0142, + "step": 3406 + }, + { + "epoch": 9.01, + "learning_rate": 5e-06, + "loss": 0.0003, + "step": 3407 + }, + { + "epoch": 9.02, + "learning_rate": 4.986595174262735e-06, + "loss": 0.0005, + "step": 3408 + }, + { + "epoch": 9.02, + "learning_rate": 4.973190348525469e-06, + "loss": 0.0002, + "step": 3409 + }, + { + "epoch": 9.02, + "learning_rate": 4.959785522788204e-06, + "loss": 0.0002, + "step": 3410 + }, + { + "epoch": 9.02, + "learning_rate": 4.946380697050938e-06, + "loss": 0.0004, + "step": 3411 + }, + { + "epoch": 9.03, + "learning_rate": 4.9329758713136735e-06, + "loss": 0.0002, + "step": 3412 + }, + { + "epoch": 9.03, + "learning_rate": 4.919571045576408e-06, + "loss": 0.0053, + "step": 3413 + }, + { + "epoch": 9.03, + "learning_rate": 4.906166219839142e-06, + "loss": 0.0005, + "step": 3414 + }, + { + "epoch": 9.03, + "learning_rate": 4.892761394101876e-06, + "loss": 0.0001, + "step": 3415 + }, + { + "epoch": 9.04, + "learning_rate": 4.8793565683646115e-06, + "loss": 0.0003, + "step": 3416 + }, + { + "epoch": 9.04, + "learning_rate": 4.865951742627347e-06, + "loss": 0.0002, + "step": 3417 + }, + { + "epoch": 9.04, + "learning_rate": 4.852546916890081e-06, + "loss": 0.0002, + "step": 3418 + }, + { + "epoch": 9.04, + "learning_rate": 4.839142091152815e-06, + "loss": 0.0004, + "step": 3419 + }, + { + "epoch": 9.05, + "learning_rate": 4.8257372654155495e-06, + "loss": 0.0002, + "step": 3420 + }, + { + "epoch": 9.05, + "learning_rate": 4.812332439678285e-06, + "loss": 0.0036, + "step": 3421 + }, + { + "epoch": 9.05, + "learning_rate": 4.798927613941019e-06, + "loss": 0.0002, + "step": 3422 + }, + { + "epoch": 9.06, + "learning_rate": 4.785522788203753e-06, + "loss": 0.0002, + "step": 3423 + }, + { + "epoch": 9.06, + "learning_rate": 4.772117962466488e-06, + "loss": 0.0002, + "step": 3424 + }, + { + "epoch": 9.06, + "learning_rate": 4.758713136729223e-06, + "loss": 0.0002, + "step": 3425 + }, + { + "epoch": 9.06, + "learning_rate": 4.745308310991958e-06, + "loss": 0.0002, + "step": 3426 + }, + { + "epoch": 9.07, + "learning_rate": 4.731903485254692e-06, + "loss": 0.0001, + "step": 3427 + }, + { + "epoch": 9.07, + "learning_rate": 4.718498659517426e-06, + "loss": 0.0004, + "step": 3428 + }, + { + "epoch": 9.07, + "learning_rate": 4.705093833780161e-06, + "loss": 0.0002, + "step": 3429 + }, + { + "epoch": 9.07, + "learning_rate": 4.691689008042896e-06, + "loss": 0.0002, + "step": 3430 + }, + { + "epoch": 9.08, + "learning_rate": 4.678284182305631e-06, + "loss": 0.0002, + "step": 3431 + }, + { + "epoch": 9.08, + "learning_rate": 4.664879356568365e-06, + "loss": 0.0568, + "step": 3432 + }, + { + "epoch": 9.08, + "learning_rate": 4.651474530831099e-06, + "loss": 0.0002, + "step": 3433 + }, + { + "epoch": 9.08, + "learning_rate": 4.638069705093834e-06, + "loss": 0.0002, + "step": 3434 + }, + { + "epoch": 9.09, + "learning_rate": 4.624664879356569e-06, + "loss": 0.0002, + "step": 3435 + }, + { + "epoch": 9.09, + "learning_rate": 4.611260053619303e-06, + "loss": 0.0019, + "step": 3436 + }, + { + "epoch": 9.09, + "learning_rate": 4.597855227882037e-06, + "loss": 0.0002, + "step": 3437 + }, + { + "epoch": 9.1, + "learning_rate": 4.5844504021447725e-06, + "loss": 0.0001, + "step": 3438 + }, + { + "epoch": 9.1, + "learning_rate": 4.571045576407507e-06, + "loss": 0.0002, + "step": 3439 + }, + { + "epoch": 9.1, + "learning_rate": 4.557640750670242e-06, + "loss": 0.03, + "step": 3440 + }, + { + "epoch": 9.1, + "learning_rate": 4.544235924932976e-06, + "loss": 0.042, + "step": 3441 + }, + { + "epoch": 9.11, + "learning_rate": 4.5308310991957105e-06, + "loss": 0.2888, + "step": 3442 + }, + { + "epoch": 9.11, + "learning_rate": 4.517426273458445e-06, + "loss": 0.0561, + "step": 3443 + }, + { + "epoch": 9.11, + "learning_rate": 4.50402144772118e-06, + "loss": 0.0227, + "step": 3444 + }, + { + "epoch": 9.11, + "learning_rate": 4.490616621983915e-06, + "loss": 0.0004, + "step": 3445 + }, + { + "epoch": 9.12, + "learning_rate": 4.477211796246649e-06, + "loss": 0.0833, + "step": 3446 + }, + { + "epoch": 9.12, + "learning_rate": 4.463806970509384e-06, + "loss": 0.0002, + "step": 3447 + }, + { + "epoch": 9.12, + "learning_rate": 4.450402144772118e-06, + "loss": 0.0268, + "step": 3448 + }, + { + "epoch": 9.12, + "learning_rate": 4.436997319034853e-06, + "loss": 0.0002, + "step": 3449 + }, + { + "epoch": 9.13, + "learning_rate": 4.423592493297587e-06, + "loss": 0.0246, + "step": 3450 + }, + { + "epoch": 9.13, + "learning_rate": 4.410187667560322e-06, + "loss": 0.0002, + "step": 3451 + }, + { + "epoch": 9.13, + "learning_rate": 4.396782841823057e-06, + "loss": 0.0002, + "step": 3452 + }, + { + "epoch": 9.13, + "learning_rate": 4.383378016085791e-06, + "loss": 0.002, + "step": 3453 + }, + { + "epoch": 9.14, + "learning_rate": 4.369973190348526e-06, + "loss": 0.0165, + "step": 3454 + }, + { + "epoch": 9.14, + "learning_rate": 4.35656836461126e-06, + "loss": 0.0398, + "step": 3455 + }, + { + "epoch": 9.14, + "learning_rate": 4.343163538873995e-06, + "loss": 0.023, + "step": 3456 + }, + { + "epoch": 9.15, + "learning_rate": 4.329758713136729e-06, + "loss": 0.0002, + "step": 3457 + }, + { + "epoch": 9.15, + "learning_rate": 4.316353887399464e-06, + "loss": 0.0024, + "step": 3458 + }, + { + "epoch": 9.15, + "learning_rate": 4.302949061662199e-06, + "loss": 0.0002, + "step": 3459 + }, + { + "epoch": 9.15, + "learning_rate": 4.2895442359249335e-06, + "loss": 0.0012, + "step": 3460 + }, + { + "epoch": 9.16, + "learning_rate": 4.276139410187668e-06, + "loss": 0.0016, + "step": 3461 + }, + { + "epoch": 9.16, + "learning_rate": 4.262734584450402e-06, + "loss": 0.0237, + "step": 3462 + }, + { + "epoch": 9.16, + "learning_rate": 4.249329758713137e-06, + "loss": 0.0002, + "step": 3463 + }, + { + "epoch": 9.16, + "learning_rate": 4.2359249329758715e-06, + "loss": 0.071, + "step": 3464 + }, + { + "epoch": 9.17, + "learning_rate": 4.222520107238606e-06, + "loss": 0.0006, + "step": 3465 + }, + { + "epoch": 9.17, + "learning_rate": 4.209115281501341e-06, + "loss": 0.0001, + "step": 3466 + }, + { + "epoch": 9.17, + "learning_rate": 4.195710455764075e-06, + "loss": 0.0002, + "step": 3467 + }, + { + "epoch": 9.17, + "learning_rate": 4.18230563002681e-06, + "loss": 0.0329, + "step": 3468 + }, + { + "epoch": 9.18, + "learning_rate": 4.168900804289545e-06, + "loss": 0.0796, + "step": 3469 + }, + { + "epoch": 9.18, + "learning_rate": 4.155495978552279e-06, + "loss": 0.0002, + "step": 3470 + }, + { + "epoch": 9.18, + "learning_rate": 4.142091152815013e-06, + "loss": 0.0724, + "step": 3471 + }, + { + "epoch": 9.19, + "learning_rate": 4.1286863270777475e-06, + "loss": 0.0002, + "step": 3472 + }, + { + "epoch": 9.19, + "learning_rate": 4.115281501340483e-06, + "loss": 0.0107, + "step": 3473 + }, + { + "epoch": 9.19, + "learning_rate": 4.101876675603218e-06, + "loss": 0.0004, + "step": 3474 + }, + { + "epoch": 9.19, + "learning_rate": 4.088471849865952e-06, + "loss": 0.0019, + "step": 3475 + }, + { + "epoch": 9.2, + "learning_rate": 4.075067024128686e-06, + "loss": 0.0002, + "step": 3476 + }, + { + "epoch": 9.2, + "learning_rate": 4.0616621983914214e-06, + "loss": 0.0002, + "step": 3477 + }, + { + "epoch": 9.2, + "learning_rate": 4.048257372654156e-06, + "loss": 0.0002, + "step": 3478 + }, + { + "epoch": 9.2, + "learning_rate": 4.03485254691689e-06, + "loss": 0.0197, + "step": 3479 + }, + { + "epoch": 9.21, + "learning_rate": 4.021447721179625e-06, + "loss": 0.0002, + "step": 3480 + }, + { + "epoch": 9.21, + "learning_rate": 4.008042895442359e-06, + "loss": 0.032, + "step": 3481 + }, + { + "epoch": 9.21, + "learning_rate": 3.9946380697050945e-06, + "loss": 0.0248, + "step": 3482 + }, + { + "epoch": 9.21, + "learning_rate": 3.981233243967829e-06, + "loss": 0.0002, + "step": 3483 + }, + { + "epoch": 9.22, + "learning_rate": 3.967828418230563e-06, + "loss": 0.0014, + "step": 3484 + }, + { + "epoch": 9.22, + "learning_rate": 3.954423592493297e-06, + "loss": 0.0305, + "step": 3485 + }, + { + "epoch": 9.22, + "learning_rate": 3.941018766756032e-06, + "loss": 0.0003, + "step": 3486 + }, + { + "epoch": 9.22, + "learning_rate": 3.927613941018767e-06, + "loss": 0.0054, + "step": 3487 + }, + { + "epoch": 9.23, + "learning_rate": 3.914209115281502e-06, + "loss": 0.0002, + "step": 3488 + }, + { + "epoch": 9.23, + "learning_rate": 3.900804289544236e-06, + "loss": 0.067, + "step": 3489 + }, + { + "epoch": 9.23, + "learning_rate": 3.8873994638069705e-06, + "loss": 0.0093, + "step": 3490 + }, + { + "epoch": 9.24, + "learning_rate": 3.873994638069705e-06, + "loss": 0.0125, + "step": 3491 + }, + { + "epoch": 9.24, + "learning_rate": 3.86058981233244e-06, + "loss": 0.0002, + "step": 3492 + }, + { + "epoch": 9.24, + "learning_rate": 3.847184986595174e-06, + "loss": 0.0001, + "step": 3493 + }, + { + "epoch": 9.24, + "learning_rate": 3.833780160857909e-06, + "loss": 0.266, + "step": 3494 + }, + { + "epoch": 9.25, + "learning_rate": 3.820375335120644e-06, + "loss": 0.2064, + "step": 3495 + }, + { + "epoch": 9.25, + "learning_rate": 3.8069705093833783e-06, + "loss": 0.021, + "step": 3496 + }, + { + "epoch": 9.25, + "learning_rate": 3.793565683646113e-06, + "loss": 0.0719, + "step": 3497 + }, + { + "epoch": 9.25, + "learning_rate": 3.7801608579088473e-06, + "loss": 0.0002, + "step": 3498 + }, + { + "epoch": 9.26, + "learning_rate": 3.766756032171582e-06, + "loss": 0.0061, + "step": 3499 + }, + { + "epoch": 9.26, + "learning_rate": 3.7533512064343163e-06, + "loss": 0.0481, + "step": 3500 + }, + { + "epoch": 9.26, + "learning_rate": 3.7399463806970514e-06, + "loss": 0.0007, + "step": 3501 + }, + { + "epoch": 9.26, + "learning_rate": 3.7265415549597857e-06, + "loss": 0.0123, + "step": 3502 + }, + { + "epoch": 9.27, + "learning_rate": 3.7131367292225204e-06, + "loss": 0.0002, + "step": 3503 + }, + { + "epoch": 9.27, + "learning_rate": 3.6997319034852547e-06, + "loss": 0.024, + "step": 3504 + }, + { + "epoch": 9.27, + "learning_rate": 3.686327077747989e-06, + "loss": 0.0002, + "step": 3505 + }, + { + "epoch": 9.28, + "learning_rate": 3.672922252010724e-06, + "loss": 0.0003, + "step": 3506 + }, + { + "epoch": 9.28, + "learning_rate": 3.659517426273459e-06, + "loss": 0.0579, + "step": 3507 + }, + { + "epoch": 9.28, + "learning_rate": 3.646112600536193e-06, + "loss": 0.0084, + "step": 3508 + }, + { + "epoch": 9.28, + "learning_rate": 3.6327077747989274e-06, + "loss": 0.0002, + "step": 3509 + }, + { + "epoch": 9.29, + "learning_rate": 3.6193029490616625e-06, + "loss": 0.0405, + "step": 3510 + }, + { + "epoch": 9.29, + "learning_rate": 3.6058981233243972e-06, + "loss": 0.0002, + "step": 3511 + }, + { + "epoch": 9.29, + "learning_rate": 3.5924932975871315e-06, + "loss": 0.0006, + "step": 3512 + }, + { + "epoch": 9.29, + "learning_rate": 3.579088471849866e-06, + "loss": 0.0002, + "step": 3513 + }, + { + "epoch": 9.3, + "learning_rate": 3.5656836461126005e-06, + "loss": 0.0002, + "step": 3514 + }, + { + "epoch": 9.3, + "learning_rate": 3.5522788203753356e-06, + "loss": 0.1122, + "step": 3515 + }, + { + "epoch": 9.3, + "learning_rate": 3.53887399463807e-06, + "loss": 0.2226, + "step": 3516 + }, + { + "epoch": 9.3, + "learning_rate": 3.5254691689008046e-06, + "loss": 0.0002, + "step": 3517 + }, + { + "epoch": 9.31, + "learning_rate": 3.512064343163539e-06, + "loss": 0.0001, + "step": 3518 + }, + { + "epoch": 9.31, + "learning_rate": 3.498659517426273e-06, + "loss": 0.0126, + "step": 3519 + }, + { + "epoch": 9.31, + "learning_rate": 3.4852546916890083e-06, + "loss": 0.0552, + "step": 3520 + }, + { + "epoch": 9.31, + "learning_rate": 3.471849865951743e-06, + "loss": 0.0002, + "step": 3521 + }, + { + "epoch": 9.32, + "learning_rate": 3.4584450402144773e-06, + "loss": 0.0001, + "step": 3522 + }, + { + "epoch": 9.32, + "learning_rate": 3.4450402144772116e-06, + "loss": 0.0002, + "step": 3523 + }, + { + "epoch": 9.32, + "learning_rate": 3.4316353887399467e-06, + "loss": 0.0001, + "step": 3524 + }, + { + "epoch": 9.33, + "learning_rate": 3.4182305630026814e-06, + "loss": 0.0027, + "step": 3525 + }, + { + "epoch": 9.33, + "learning_rate": 3.4048257372654157e-06, + "loss": 0.0275, + "step": 3526 + }, + { + "epoch": 9.33, + "learning_rate": 3.39142091152815e-06, + "loss": 0.0005, + "step": 3527 + }, + { + "epoch": 9.33, + "learning_rate": 3.3780160857908847e-06, + "loss": 0.0002, + "step": 3528 + }, + { + "epoch": 9.34, + "learning_rate": 3.36461126005362e-06, + "loss": 0.1674, + "step": 3529 + }, + { + "epoch": 9.34, + "learning_rate": 3.351206434316354e-06, + "loss": 0.0002, + "step": 3530 + }, + { + "epoch": 9.34, + "learning_rate": 3.337801608579089e-06, + "loss": 0.0004, + "step": 3531 + }, + { + "epoch": 9.34, + "learning_rate": 3.324396782841823e-06, + "loss": 0.0002, + "step": 3532 + }, + { + "epoch": 9.35, + "learning_rate": 3.3109919571045574e-06, + "loss": 0.0002, + "step": 3533 + }, + { + "epoch": 9.35, + "learning_rate": 3.2975871313672925e-06, + "loss": 0.0003, + "step": 3534 + }, + { + "epoch": 9.35, + "learning_rate": 3.2841823056300272e-06, + "loss": 0.0002, + "step": 3535 + }, + { + "epoch": 9.35, + "learning_rate": 3.2707774798927615e-06, + "loss": 0.0259, + "step": 3536 + }, + { + "epoch": 9.36, + "learning_rate": 3.257372654155496e-06, + "loss": 0.0276, + "step": 3537 + }, + { + "epoch": 9.36, + "learning_rate": 3.2439678284182305e-06, + "loss": 0.0004, + "step": 3538 + }, + { + "epoch": 9.36, + "learning_rate": 3.2305630026809657e-06, + "loss": 0.0423, + "step": 3539 + }, + { + "epoch": 9.37, + "learning_rate": 3.2171581769437e-06, + "loss": 0.0002, + "step": 3540 + }, + { + "epoch": 9.37, + "learning_rate": 3.2037533512064342e-06, + "loss": 0.0002, + "step": 3541 + }, + { + "epoch": 9.37, + "learning_rate": 3.190348525469169e-06, + "loss": 0.0004, + "step": 3542 + }, + { + "epoch": 9.37, + "learning_rate": 3.176943699731904e-06, + "loss": 0.0002, + "step": 3543 + }, + { + "epoch": 9.38, + "learning_rate": 3.1635388739946383e-06, + "loss": 0.2091, + "step": 3544 + }, + { + "epoch": 9.38, + "learning_rate": 3.1501340482573726e-06, + "loss": 0.0052, + "step": 3545 + }, + { + "epoch": 9.38, + "learning_rate": 3.1367292225201073e-06, + "loss": 0.0004, + "step": 3546 + }, + { + "epoch": 9.38, + "learning_rate": 3.123324396782842e-06, + "loss": 0.0002, + "step": 3547 + }, + { + "epoch": 9.39, + "learning_rate": 3.1099195710455763e-06, + "loss": 0.0002, + "step": 3548 + }, + { + "epoch": 9.39, + "learning_rate": 3.0965147453083115e-06, + "loss": 0.0002, + "step": 3549 + }, + { + "epoch": 9.39, + "learning_rate": 3.0831099195710457e-06, + "loss": 0.0002, + "step": 3550 + }, + { + "epoch": 9.39, + "learning_rate": 3.0697050938337804e-06, + "loss": 0.0179, + "step": 3551 + }, + { + "epoch": 9.4, + "learning_rate": 3.0563002680965147e-06, + "loss": 0.0002, + "step": 3552 + }, + { + "epoch": 9.4, + "learning_rate": 3.0428954423592494e-06, + "loss": 0.0001, + "step": 3553 + }, + { + "epoch": 9.4, + "learning_rate": 3.029490616621984e-06, + "loss": 0.0004, + "step": 3554 + }, + { + "epoch": 9.4, + "learning_rate": 3.0160857908847184e-06, + "loss": 0.0838, + "step": 3555 + }, + { + "epoch": 9.41, + "learning_rate": 3.0026809651474536e-06, + "loss": 0.0001, + "step": 3556 + }, + { + "epoch": 9.41, + "learning_rate": 2.989276139410188e-06, + "loss": 0.0004, + "step": 3557 + }, + { + "epoch": 9.41, + "learning_rate": 2.975871313672922e-06, + "loss": 0.0005, + "step": 3558 + }, + { + "epoch": 9.42, + "learning_rate": 2.962466487935657e-06, + "loss": 0.145, + "step": 3559 + }, + { + "epoch": 9.42, + "learning_rate": 2.9490616621983915e-06, + "loss": 0.0045, + "step": 3560 + }, + { + "epoch": 9.42, + "learning_rate": 2.9356568364611262e-06, + "loss": 0.2014, + "step": 3561 + }, + { + "epoch": 9.42, + "learning_rate": 2.9222520107238605e-06, + "loss": 0.0005, + "step": 3562 + }, + { + "epoch": 9.43, + "learning_rate": 2.9088471849865952e-06, + "loss": 0.0002, + "step": 3563 + }, + { + "epoch": 9.43, + "learning_rate": 2.89544235924933e-06, + "loss": 0.2962, + "step": 3564 + }, + { + "epoch": 9.43, + "learning_rate": 2.8820375335120642e-06, + "loss": 0.0253, + "step": 3565 + }, + { + "epoch": 9.43, + "learning_rate": 2.868632707774799e-06, + "loss": 0.0002, + "step": 3566 + }, + { + "epoch": 9.44, + "learning_rate": 2.8552278820375336e-06, + "loss": 0.0001, + "step": 3567 + }, + { + "epoch": 9.44, + "learning_rate": 2.8418230563002683e-06, + "loss": 0.0556, + "step": 3568 + }, + { + "epoch": 9.44, + "learning_rate": 2.8284182305630026e-06, + "loss": 0.0018, + "step": 3569 + }, + { + "epoch": 9.44, + "learning_rate": 2.8150134048257373e-06, + "loss": 0.0002, + "step": 3570 + }, + { + "epoch": 9.45, + "learning_rate": 2.801608579088472e-06, + "loss": 0.0001, + "step": 3571 + }, + { + "epoch": 9.45, + "learning_rate": 2.7882037533512063e-06, + "loss": 0.0001, + "step": 3572 + }, + { + "epoch": 9.45, + "learning_rate": 2.774798927613941e-06, + "loss": 0.0007, + "step": 3573 + }, + { + "epoch": 9.46, + "learning_rate": 2.7613941018766757e-06, + "loss": 0.0001, + "step": 3574 + }, + { + "epoch": 9.46, + "learning_rate": 2.7479892761394105e-06, + "loss": 0.0009, + "step": 3575 + }, + { + "epoch": 9.46, + "learning_rate": 2.7345844504021447e-06, + "loss": 0.0072, + "step": 3576 + }, + { + "epoch": 9.46, + "learning_rate": 2.7211796246648794e-06, + "loss": 0.0002, + "step": 3577 + }, + { + "epoch": 9.47, + "learning_rate": 2.707774798927614e-06, + "loss": 0.001, + "step": 3578 + }, + { + "epoch": 9.47, + "learning_rate": 2.6943699731903484e-06, + "loss": 0.0014, + "step": 3579 + }, + { + "epoch": 9.47, + "learning_rate": 2.680965147453083e-06, + "loss": 0.0001, + "step": 3580 + }, + { + "epoch": 9.47, + "learning_rate": 2.667560321715818e-06, + "loss": 0.0001, + "step": 3581 + }, + { + "epoch": 9.48, + "learning_rate": 2.6541554959785526e-06, + "loss": 0.0494, + "step": 3582 + }, + { + "epoch": 9.48, + "learning_rate": 2.640750670241287e-06, + "loss": 0.0002, + "step": 3583 + }, + { + "epoch": 9.48, + "learning_rate": 2.6273458445040215e-06, + "loss": 0.0002, + "step": 3584 + }, + { + "epoch": 9.48, + "learning_rate": 2.6139410187667563e-06, + "loss": 0.0002, + "step": 3585 + }, + { + "epoch": 9.49, + "learning_rate": 2.6005361930294905e-06, + "loss": 0.0019, + "step": 3586 + }, + { + "epoch": 9.49, + "learning_rate": 2.5871313672922252e-06, + "loss": 0.0006, + "step": 3587 + }, + { + "epoch": 9.49, + "learning_rate": 2.57372654155496e-06, + "loss": 0.0001, + "step": 3588 + }, + { + "epoch": 9.49, + "learning_rate": 2.5603217158176947e-06, + "loss": 0.0002, + "step": 3589 + }, + { + "epoch": 9.5, + "learning_rate": 2.546916890080429e-06, + "loss": 0.0001, + "step": 3590 + }, + { + "epoch": 9.5, + "learning_rate": 2.5335120643431636e-06, + "loss": 0.0044, + "step": 3591 + }, + { + "epoch": 9.5, + "learning_rate": 2.5201072386058984e-06, + "loss": 0.3189, + "step": 3592 + }, + { + "epoch": 9.51, + "learning_rate": 2.5067024128686326e-06, + "loss": 0.0001, + "step": 3593 + }, + { + "epoch": 9.51, + "learning_rate": 2.4932975871313673e-06, + "loss": 0.3545, + "step": 3594 + }, + { + "epoch": 9.51, + "learning_rate": 2.479892761394102e-06, + "loss": 0.0002, + "step": 3595 + }, + { + "epoch": 9.51, + "learning_rate": 2.4664879356568368e-06, + "loss": 0.0732, + "step": 3596 + }, + { + "epoch": 9.52, + "learning_rate": 2.453083109919571e-06, + "loss": 0.0001, + "step": 3597 + }, + { + "epoch": 9.52, + "learning_rate": 2.4396782841823058e-06, + "loss": 0.0002, + "step": 3598 + }, + { + "epoch": 9.52, + "learning_rate": 2.4262734584450405e-06, + "loss": 0.0864, + "step": 3599 + }, + { + "epoch": 9.52, + "learning_rate": 2.4128686327077747e-06, + "loss": 0.0695, + "step": 3600 + }, + { + "epoch": 9.53, + "learning_rate": 2.3994638069705094e-06, + "loss": 0.0002, + "step": 3601 + }, + { + "epoch": 9.53, + "learning_rate": 2.386058981233244e-06, + "loss": 0.1166, + "step": 3602 + }, + { + "epoch": 9.53, + "learning_rate": 2.372654155495979e-06, + "loss": 0.0008, + "step": 3603 + }, + { + "epoch": 9.53, + "learning_rate": 2.359249329758713e-06, + "loss": 0.0001, + "step": 3604 + }, + { + "epoch": 9.54, + "learning_rate": 2.345844504021448e-06, + "loss": 0.0125, + "step": 3605 + }, + { + "epoch": 9.54, + "learning_rate": 2.3324396782841826e-06, + "loss": 0.0001, + "step": 3606 + }, + { + "epoch": 9.54, + "learning_rate": 2.319034852546917e-06, + "loss": 0.0027, + "step": 3607 + }, + { + "epoch": 9.54, + "learning_rate": 2.3056300268096516e-06, + "loss": 0.1129, + "step": 3608 + }, + { + "epoch": 9.55, + "learning_rate": 2.2922252010723863e-06, + "loss": 0.0739, + "step": 3609 + }, + { + "epoch": 9.55, + "learning_rate": 2.278820375335121e-06, + "loss": 0.0164, + "step": 3610 + }, + { + "epoch": 9.55, + "learning_rate": 2.2654155495978552e-06, + "loss": 0.0068, + "step": 3611 + }, + { + "epoch": 9.56, + "learning_rate": 2.25201072386059e-06, + "loss": 0.0765, + "step": 3612 + }, + { + "epoch": 9.56, + "learning_rate": 2.2386058981233247e-06, + "loss": 0.0002, + "step": 3613 + }, + { + "epoch": 9.56, + "learning_rate": 2.225201072386059e-06, + "loss": 0.0002, + "step": 3614 + }, + { + "epoch": 9.56, + "learning_rate": 2.2117962466487937e-06, + "loss": 0.0003, + "step": 3615 + }, + { + "epoch": 9.57, + "learning_rate": 2.1983914209115284e-06, + "loss": 0.0252, + "step": 3616 + }, + { + "epoch": 9.57, + "learning_rate": 2.184986595174263e-06, + "loss": 0.0002, + "step": 3617 + }, + { + "epoch": 9.57, + "learning_rate": 2.1715817694369974e-06, + "loss": 0.0002, + "step": 3618 + }, + { + "epoch": 9.57, + "learning_rate": 2.158176943699732e-06, + "loss": 0.0003, + "step": 3619 + }, + { + "epoch": 9.58, + "learning_rate": 2.1447721179624668e-06, + "loss": 0.0101, + "step": 3620 + }, + { + "epoch": 9.58, + "learning_rate": 2.131367292225201e-06, + "loss": 0.0002, + "step": 3621 + }, + { + "epoch": 9.58, + "learning_rate": 2.1179624664879358e-06, + "loss": 0.0002, + "step": 3622 + }, + { + "epoch": 9.58, + "learning_rate": 2.1045576407506705e-06, + "loss": 0.126, + "step": 3623 + }, + { + "epoch": 9.59, + "learning_rate": 2.091152815013405e-06, + "loss": 0.0002, + "step": 3624 + }, + { + "epoch": 9.59, + "learning_rate": 2.0777479892761395e-06, + "loss": 0.0002, + "step": 3625 + }, + { + "epoch": 9.59, + "learning_rate": 2.0643431635388737e-06, + "loss": 0.1247, + "step": 3626 + }, + { + "epoch": 9.6, + "learning_rate": 2.050938337801609e-06, + "loss": 0.0002, + "step": 3627 + }, + { + "epoch": 9.6, + "learning_rate": 2.037533512064343e-06, + "loss": 0.0493, + "step": 3628 + }, + { + "epoch": 9.6, + "learning_rate": 2.024128686327078e-06, + "loss": 0.0526, + "step": 3629 + }, + { + "epoch": 9.6, + "learning_rate": 2.0107238605898126e-06, + "loss": 0.0672, + "step": 3630 + }, + { + "epoch": 9.61, + "learning_rate": 1.9973190348525473e-06, + "loss": 0.0141, + "step": 3631 + }, + { + "epoch": 9.61, + "learning_rate": 1.9839142091152816e-06, + "loss": 0.1962, + "step": 3632 + }, + { + "epoch": 9.61, + "learning_rate": 1.970509383378016e-06, + "loss": 0.0008, + "step": 3633 + }, + { + "epoch": 9.61, + "learning_rate": 1.957104557640751e-06, + "loss": 0.0002, + "step": 3634 + }, + { + "epoch": 9.62, + "learning_rate": 1.9436997319034853e-06, + "loss": 0.0003, + "step": 3635 + }, + { + "epoch": 9.62, + "learning_rate": 1.93029490616622e-06, + "loss": 0.0001, + "step": 3636 + }, + { + "epoch": 9.62, + "learning_rate": 1.9168900804289547e-06, + "loss": 0.0002, + "step": 3637 + }, + { + "epoch": 9.62, + "learning_rate": 1.9034852546916892e-06, + "loss": 0.0319, + "step": 3638 + }, + { + "epoch": 9.63, + "learning_rate": 1.8900804289544237e-06, + "loss": 0.0252, + "step": 3639 + }, + { + "epoch": 9.63, + "learning_rate": 1.8766756032171582e-06, + "loss": 0.0758, + "step": 3640 + }, + { + "epoch": 9.63, + "learning_rate": 1.8632707774798929e-06, + "loss": 0.0002, + "step": 3641 + }, + { + "epoch": 9.63, + "learning_rate": 1.8498659517426274e-06, + "loss": 0.0321, + "step": 3642 + }, + { + "epoch": 9.64, + "learning_rate": 1.836461126005362e-06, + "loss": 0.3423, + "step": 3643 + }, + { + "epoch": 9.64, + "learning_rate": 1.8230563002680966e-06, + "loss": 0.0007, + "step": 3644 + }, + { + "epoch": 9.64, + "learning_rate": 1.8096514745308313e-06, + "loss": 0.0163, + "step": 3645 + }, + { + "epoch": 9.65, + "learning_rate": 1.7962466487935658e-06, + "loss": 0.0001, + "step": 3646 + }, + { + "epoch": 9.65, + "learning_rate": 1.7828418230563003e-06, + "loss": 0.0001, + "step": 3647 + }, + { + "epoch": 9.65, + "learning_rate": 1.769436997319035e-06, + "loss": 0.0025, + "step": 3648 + }, + { + "epoch": 9.65, + "learning_rate": 1.7560321715817695e-06, + "loss": 0.0001, + "step": 3649 + }, + { + "epoch": 9.66, + "learning_rate": 1.7426273458445042e-06, + "loss": 0.1419, + "step": 3650 + }, + { + "epoch": 9.66, + "learning_rate": 1.7292225201072387e-06, + "loss": 0.0789, + "step": 3651 + }, + { + "epoch": 9.66, + "learning_rate": 1.7158176943699734e-06, + "loss": 0.0423, + "step": 3652 + }, + { + "epoch": 9.66, + "learning_rate": 1.7024128686327079e-06, + "loss": 0.0001, + "step": 3653 + }, + { + "epoch": 9.67, + "learning_rate": 1.6890080428954424e-06, + "loss": 0.0003, + "step": 3654 + }, + { + "epoch": 9.67, + "learning_rate": 1.675603217158177e-06, + "loss": 0.0002, + "step": 3655 + }, + { + "epoch": 9.67, + "learning_rate": 1.6621983914209116e-06, + "loss": 0.0002, + "step": 3656 + }, + { + "epoch": 9.67, + "learning_rate": 1.6487935656836463e-06, + "loss": 0.0001, + "step": 3657 + }, + { + "epoch": 9.68, + "learning_rate": 1.6353887399463808e-06, + "loss": 0.0002, + "step": 3658 + }, + { + "epoch": 9.68, + "learning_rate": 1.6219839142091153e-06, + "loss": 0.0002, + "step": 3659 + }, + { + "epoch": 9.68, + "learning_rate": 1.60857908847185e-06, + "loss": 0.0723, + "step": 3660 + }, + { + "epoch": 9.69, + "learning_rate": 1.5951742627345845e-06, + "loss": 0.0001, + "step": 3661 + }, + { + "epoch": 9.69, + "learning_rate": 1.5817694369973192e-06, + "loss": 0.0002, + "step": 3662 + }, + { + "epoch": 9.69, + "learning_rate": 1.5683646112600537e-06, + "loss": 0.1128, + "step": 3663 + }, + { + "epoch": 9.69, + "learning_rate": 1.5549597855227882e-06, + "loss": 0.0001, + "step": 3664 + }, + { + "epoch": 9.7, + "learning_rate": 1.5415549597855229e-06, + "loss": 0.0002, + "step": 3665 + }, + { + "epoch": 9.7, + "learning_rate": 1.5281501340482574e-06, + "loss": 0.0008, + "step": 3666 + }, + { + "epoch": 9.7, + "learning_rate": 1.514745308310992e-06, + "loss": 0.0846, + "step": 3667 + }, + { + "epoch": 9.7, + "learning_rate": 1.5013404825737268e-06, + "loss": 0.1065, + "step": 3668 + }, + { + "epoch": 9.71, + "learning_rate": 1.487935656836461e-06, + "loss": 0.0002, + "step": 3669 + }, + { + "epoch": 9.71, + "learning_rate": 1.4745308310991958e-06, + "loss": 0.0008, + "step": 3670 + }, + { + "epoch": 9.71, + "learning_rate": 1.4611260053619303e-06, + "loss": 0.0001, + "step": 3671 + }, + { + "epoch": 9.71, + "learning_rate": 1.447721179624665e-06, + "loss": 0.029, + "step": 3672 + }, + { + "epoch": 9.72, + "learning_rate": 1.4343163538873995e-06, + "loss": 0.0019, + "step": 3673 + }, + { + "epoch": 9.72, + "learning_rate": 1.4209115281501342e-06, + "loss": 0.0002, + "step": 3674 + }, + { + "epoch": 9.72, + "learning_rate": 1.4075067024128687e-06, + "loss": 0.0001, + "step": 3675 + }, + { + "epoch": 9.72, + "learning_rate": 1.3941018766756032e-06, + "loss": 0.0001, + "step": 3676 + }, + { + "epoch": 9.73, + "learning_rate": 1.3806970509383379e-06, + "loss": 0.0002, + "step": 3677 + }, + { + "epoch": 9.73, + "learning_rate": 1.3672922252010724e-06, + "loss": 0.0549, + "step": 3678 + }, + { + "epoch": 9.73, + "learning_rate": 1.353887399463807e-06, + "loss": 0.178, + "step": 3679 + }, + { + "epoch": 9.74, + "learning_rate": 1.3404825737265416e-06, + "loss": 0.0002, + "step": 3680 + }, + { + "epoch": 9.74, + "learning_rate": 1.3270777479892763e-06, + "loss": 0.0008, + "step": 3681 + }, + { + "epoch": 9.74, + "learning_rate": 1.3136729222520108e-06, + "loss": 0.0002, + "step": 3682 + }, + { + "epoch": 9.74, + "learning_rate": 1.3002680965147453e-06, + "loss": 0.0007, + "step": 3683 + }, + { + "epoch": 9.75, + "learning_rate": 1.28686327077748e-06, + "loss": 0.0158, + "step": 3684 + }, + { + "epoch": 9.75, + "learning_rate": 1.2734584450402145e-06, + "loss": 0.0002, + "step": 3685 + }, + { + "epoch": 9.75, + "learning_rate": 1.2600536193029492e-06, + "loss": 0.0045, + "step": 3686 + }, + { + "epoch": 9.75, + "learning_rate": 1.2466487935656837e-06, + "loss": 0.0001, + "step": 3687 + }, + { + "epoch": 9.76, + "learning_rate": 1.2332439678284184e-06, + "loss": 0.0004, + "step": 3688 + }, + { + "epoch": 9.76, + "learning_rate": 1.2198391420911529e-06, + "loss": 0.0462, + "step": 3689 + }, + { + "epoch": 9.76, + "learning_rate": 1.2064343163538874e-06, + "loss": 0.0002, + "step": 3690 + }, + { + "epoch": 9.76, + "learning_rate": 1.193029490616622e-06, + "loss": 0.4527, + "step": 3691 + }, + { + "epoch": 9.77, + "learning_rate": 1.1796246648793566e-06, + "loss": 0.0005, + "step": 3692 + }, + { + "epoch": 9.77, + "learning_rate": 1.1662198391420913e-06, + "loss": 0.0228, + "step": 3693 + }, + { + "epoch": 9.77, + "learning_rate": 1.1528150134048258e-06, + "loss": 0.1976, + "step": 3694 + }, + { + "epoch": 9.78, + "learning_rate": 1.1394101876675605e-06, + "loss": 0.0002, + "step": 3695 + }, + { + "epoch": 9.78, + "learning_rate": 1.126005361930295e-06, + "loss": 0.0122, + "step": 3696 + }, + { + "epoch": 9.78, + "learning_rate": 1.1126005361930295e-06, + "loss": 0.0215, + "step": 3697 + }, + { + "epoch": 9.78, + "learning_rate": 1.0991957104557642e-06, + "loss": 0.0072, + "step": 3698 + }, + { + "epoch": 9.79, + "learning_rate": 1.0857908847184987e-06, + "loss": 0.0002, + "step": 3699 + }, + { + "epoch": 9.79, + "learning_rate": 1.0723860589812334e-06, + "loss": 0.0002, + "step": 3700 + }, + { + "epoch": 9.79, + "learning_rate": 1.0589812332439679e-06, + "loss": 0.0002, + "step": 3701 + }, + { + "epoch": 9.79, + "learning_rate": 1.0455764075067026e-06, + "loss": 0.0472, + "step": 3702 + }, + { + "epoch": 9.8, + "learning_rate": 1.0321715817694369e-06, + "loss": 0.0002, + "step": 3703 + }, + { + "epoch": 9.8, + "learning_rate": 1.0187667560321716e-06, + "loss": 0.0002, + "step": 3704 + }, + { + "epoch": 9.8, + "learning_rate": 1.0053619302949063e-06, + "loss": 0.0783, + "step": 3705 + }, + { + "epoch": 9.8, + "learning_rate": 9.919571045576408e-07, + "loss": 0.3007, + "step": 3706 + }, + { + "epoch": 9.81, + "learning_rate": 9.785522788203755e-07, + "loss": 0.0002, + "step": 3707 + }, + { + "epoch": 9.81, + "learning_rate": 9.6514745308311e-07, + "loss": 0.0005, + "step": 3708 + }, + { + "epoch": 9.81, + "learning_rate": 9.517426273458446e-07, + "loss": 0.0002, + "step": 3709 + }, + { + "epoch": 9.81, + "learning_rate": 9.383378016085791e-07, + "loss": 0.0003, + "step": 3710 + }, + { + "epoch": 9.82, + "learning_rate": 9.249329758713137e-07, + "loss": 0.0001, + "step": 3711 + }, + { + "epoch": 9.82, + "learning_rate": 9.115281501340483e-07, + "loss": 0.0001, + "step": 3712 + }, + { + "epoch": 9.82, + "learning_rate": 8.981233243967829e-07, + "loss": 0.0002, + "step": 3713 + }, + { + "epoch": 9.83, + "learning_rate": 8.847184986595175e-07, + "loss": 0.0038, + "step": 3714 + }, + { + "epoch": 9.83, + "learning_rate": 8.713136729222521e-07, + "loss": 0.1268, + "step": 3715 + }, + { + "epoch": 9.83, + "learning_rate": 8.579088471849867e-07, + "loss": 0.1269, + "step": 3716 + }, + { + "epoch": 9.83, + "learning_rate": 8.445040214477212e-07, + "loss": 0.0004, + "step": 3717 + }, + { + "epoch": 9.84, + "learning_rate": 8.310991957104558e-07, + "loss": 0.0008, + "step": 3718 + }, + { + "epoch": 9.84, + "learning_rate": 8.176943699731904e-07, + "loss": 0.2391, + "step": 3719 + }, + { + "epoch": 9.84, + "learning_rate": 8.04289544235925e-07, + "loss": 0.1564, + "step": 3720 + }, + { + "epoch": 9.84, + "learning_rate": 7.908847184986596e-07, + "loss": 0.0001, + "step": 3721 + }, + { + "epoch": 9.85, + "learning_rate": 7.774798927613941e-07, + "loss": 0.0004, + "step": 3722 + }, + { + "epoch": 9.85, + "learning_rate": 7.640750670241287e-07, + "loss": 0.0002, + "step": 3723 + }, + { + "epoch": 9.85, + "learning_rate": 7.506702412868634e-07, + "loss": 0.0833, + "step": 3724 + }, + { + "epoch": 9.85, + "learning_rate": 7.372654155495979e-07, + "loss": 0.0002, + "step": 3725 + }, + { + "epoch": 9.86, + "learning_rate": 7.238605898123325e-07, + "loss": 0.0002, + "step": 3726 + }, + { + "epoch": 9.86, + "learning_rate": 7.104557640750671e-07, + "loss": 0.0015, + "step": 3727 + }, + { + "epoch": 9.86, + "learning_rate": 6.970509383378016e-07, + "loss": 0.0594, + "step": 3728 + }, + { + "epoch": 9.87, + "learning_rate": 6.836461126005362e-07, + "loss": 0.003, + "step": 3729 + }, + { + "epoch": 9.87, + "learning_rate": 6.702412868632708e-07, + "loss": 0.0236, + "step": 3730 + }, + { + "epoch": 9.87, + "learning_rate": 6.568364611260054e-07, + "loss": 0.0001, + "step": 3731 + }, + { + "epoch": 9.87, + "learning_rate": 6.4343163538874e-07, + "loss": 0.0209, + "step": 3732 + }, + { + "epoch": 9.88, + "learning_rate": 6.300268096514746e-07, + "loss": 0.0419, + "step": 3733 + }, + { + "epoch": 9.88, + "learning_rate": 6.166219839142092e-07, + "loss": 0.1611, + "step": 3734 + }, + { + "epoch": 9.88, + "learning_rate": 6.032171581769437e-07, + "loss": 0.0331, + "step": 3735 + }, + { + "epoch": 9.88, + "learning_rate": 5.898123324396783e-07, + "loss": 0.0015, + "step": 3736 + }, + { + "epoch": 9.89, + "learning_rate": 5.764075067024129e-07, + "loss": 0.0001, + "step": 3737 + }, + { + "epoch": 9.89, + "learning_rate": 5.630026809651475e-07, + "loss": 0.0002, + "step": 3738 + }, + { + "epoch": 9.89, + "learning_rate": 5.495978552278821e-07, + "loss": 0.0001, + "step": 3739 + }, + { + "epoch": 9.89, + "learning_rate": 5.361930294906167e-07, + "loss": 0.0695, + "step": 3740 + }, + { + "epoch": 9.9, + "learning_rate": 5.227882037533513e-07, + "loss": 0.0256, + "step": 3741 + }, + { + "epoch": 9.9, + "learning_rate": 5.093833780160858e-07, + "loss": 0.0002, + "step": 3742 + }, + { + "epoch": 9.9, + "learning_rate": 4.959785522788204e-07, + "loss": 0.0002, + "step": 3743 + }, + { + "epoch": 9.9, + "learning_rate": 4.82573726541555e-07, + "loss": 0.0001, + "step": 3744 + }, + { + "epoch": 9.91, + "learning_rate": 4.6916890080428954e-07, + "loss": 0.0002, + "step": 3745 + }, + { + "epoch": 9.91, + "learning_rate": 4.5576407506702414e-07, + "loss": 0.0002, + "step": 3746 + }, + { + "epoch": 9.91, + "learning_rate": 4.4235924932975874e-07, + "loss": 0.0055, + "step": 3747 + }, + { + "epoch": 9.92, + "learning_rate": 4.2895442359249334e-07, + "loss": 0.0002, + "step": 3748 + }, + { + "epoch": 9.92, + "learning_rate": 4.155495978552279e-07, + "loss": 0.0002, + "step": 3749 + }, + { + "epoch": 9.92, + "learning_rate": 4.021447721179625e-07, + "loss": 0.0108, + "step": 3750 + }, + { + "epoch": 9.92, + "learning_rate": 3.8873994638069704e-07, + "loss": 0.0002, + "step": 3751 + }, + { + "epoch": 9.93, + "learning_rate": 3.753351206434317e-07, + "loss": 0.0001, + "step": 3752 + }, + { + "epoch": 9.93, + "learning_rate": 3.6193029490616624e-07, + "loss": 0.0734, + "step": 3753 + }, + { + "epoch": 9.93, + "learning_rate": 3.485254691689008e-07, + "loss": 0.0001, + "step": 3754 + }, + { + "epoch": 9.93, + "learning_rate": 3.351206434316354e-07, + "loss": 0.0004, + "step": 3755 + }, + { + "epoch": 9.94, + "learning_rate": 3.2171581769437e-07, + "loss": 0.4773, + "step": 3756 + }, + { + "epoch": 9.94, + "learning_rate": 3.083109919571046e-07, + "loss": 0.0002, + "step": 3757 + }, + { + "epoch": 9.94, + "learning_rate": 2.9490616621983914e-07, + "loss": 0.0056, + "step": 3758 + }, + { + "epoch": 9.94, + "learning_rate": 2.8150134048257374e-07, + "loss": 0.0196, + "step": 3759 + }, + { + "epoch": 9.95, + "learning_rate": 2.6809651474530835e-07, + "loss": 0.0002, + "step": 3760 + }, + { + "epoch": 9.95, + "learning_rate": 2.546916890080429e-07, + "loss": 0.07, + "step": 3761 + }, + { + "epoch": 9.95, + "learning_rate": 2.412868632707775e-07, + "loss": 0.0003, + "step": 3762 + }, + { + "epoch": 9.96, + "learning_rate": 2.2788203753351207e-07, + "loss": 0.0217, + "step": 3763 + }, + { + "epoch": 9.96, + "learning_rate": 2.1447721179624667e-07, + "loss": 0.0001, + "step": 3764 + }, + { + "epoch": 9.96, + "learning_rate": 2.0107238605898125e-07, + "loss": 0.0004, + "step": 3765 + }, + { + "epoch": 9.96, + "learning_rate": 1.8766756032171585e-07, + "loss": 0.0001, + "step": 3766 + }, + { + "epoch": 9.97, + "learning_rate": 1.742627345844504e-07, + "loss": 0.0005, + "step": 3767 + }, + { + "epoch": 9.97, + "learning_rate": 1.60857908847185e-07, + "loss": 0.0255, + "step": 3768 + }, + { + "epoch": 9.97, + "learning_rate": 1.4745308310991957e-07, + "loss": 0.1316, + "step": 3769 + }, + { + "epoch": 9.97, + "learning_rate": 1.3404825737265417e-07, + "loss": 0.0002, + "step": 3770 + }, + { + "epoch": 9.98, + "learning_rate": 1.2064343163538875e-07, + "loss": 0.0057, + "step": 3771 + }, + { + "epoch": 9.98, + "learning_rate": 1.0723860589812334e-07, + "loss": 0.1064, + "step": 3772 + }, + { + "epoch": 9.98, + "learning_rate": 9.383378016085792e-08, + "loss": 0.0002, + "step": 3773 + }, + { + "epoch": 9.98, + "learning_rate": 8.04289544235925e-08, + "loss": 0.0678, + "step": 3774 + }, + { + "epoch": 9.99, + "learning_rate": 6.702412868632709e-08, + "loss": 0.0914, + "step": 3775 + }, + { + "epoch": 9.99, + "learning_rate": 5.361930294906167e-08, + "loss": 0.0001, + "step": 3776 + }, + { + "epoch": 9.99, + "learning_rate": 4.021447721179625e-08, + "loss": 0.448, + "step": 3777 + }, + { + "epoch": 9.99, + "learning_rate": 2.6809651474530834e-08, + "loss": 0.0002, + "step": 3778 + }, + { + "epoch": 10.0, + "learning_rate": 1.3404825737265417e-08, + "loss": 0.0027, + "step": 3779 + }, + { + "epoch": 10.0, + "learning_rate": 0.0, + "loss": 0.0051, + "step": 3780 + }, + { + "epoch": 10.0, + "eval_f1": 0.7801857585139318, + "eval_loss": 1.4196751117706299, + "eval_runtime": 1.9725, + "eval_samples_per_second": 767.032, + "eval_steps_per_second": 48.161, + "step": 3780 + } + ], + "max_steps": 3780, + "num_train_epochs": 10, + "total_flos": 968262171263616.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3780/training_args.bin b/checkpoint-3780/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e04ed002938f760694506615e2c2b7be439a9c1 --- /dev/null +++ b/checkpoint-3780/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c01d8e107b4a20c0ba0f3692dae4e25d8f1dffe1d23d6e4f4bdf92b87ab5ea +size 3899 diff --git a/checkpoint-756/config.json b/checkpoint-756/config.json new file mode 100644 index 0000000000000000000000000000000000000000..364156e83c34ba8c6fcc66e875a05b1d1a9b4821 --- /dev/null +++ b/checkpoint-756/config.json @@ -0,0 +1,30 @@ +{ + "_name_or_path": "distilbert-base-cased", + "activation": "gelu", + "architectures": [ + "DistilBertForSequenceClassification" + ], + "attention_dropout": 0.1, + "dim": 768, + "dropout": 0.1, + "hidden_dim": 3072, + "id2label": { + "0": "NO DISASTER", + "1": "DISASTER" + }, + "initializer_range": 0.02, + "max_position_embeddings": 512, + "model_type": "distilbert", + "n_heads": 12, + "n_layers": 6, + "output_past": true, + "pad_token_id": 0, + "problem_type": "single_label_classification", + "qa_dropout": 0.1, + "seq_classif_dropout": 0.2, + "sinusoidal_pos_embds": false, + "tie_weights_": true, + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "vocab_size": 28996 +} diff --git a/checkpoint-756/optimizer.pt b/checkpoint-756/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ab03b01708364229fb5d9b3f4a093205bfb7a6b --- /dev/null +++ b/checkpoint-756/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efb10dfe5299689219604d3ceb372df330c6967d53616523135abf5362e11545 +size 526325317 diff --git a/checkpoint-756/pytorch_model.bin b/checkpoint-756/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..9082d4526ce2220494c21ca41df523faf1a2d11f --- /dev/null +++ b/checkpoint-756/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cc9d288e32bb2c554a28d860a0e1375a9fdcb015fa51cd40a734d4e85f60243 +size 263167661 diff --git a/checkpoint-756/rng_state.pth b/checkpoint-756/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..89fcee674b501297548f07f207a1b9618d5c4f9e --- /dev/null +++ b/checkpoint-756/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d8bd453b05b9ce290bca55689a8177f0cd66b36710d7c98b9d0a0aadab0fba3 +size 14575 diff --git a/checkpoint-756/scheduler.pt b/checkpoint-756/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..be345c4d7406ac8dd6ce3522464224cf84c9ed22 --- /dev/null +++ b/checkpoint-756/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e47d8e09f616bdcb80a8dbdbfea8a723fbdfff60b864ffebf36d87a5b43372b0 +size 627 diff --git a/checkpoint-756/trainer_state.json b/checkpoint-756/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..00e01405b9a7c4561178327d50593cffa6dc0427 --- /dev/null +++ b/checkpoint-756/trainer_state.json @@ -0,0 +1,4570 @@ +{ + "best_metric": 0.40209120512008667, + "best_model_checkpoint": "./disaster-tweet-distilbert-classification/checkpoint-378", + "epoch": 2.0, + "global_step": 756, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.7503, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.7789, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.7344, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 0.7709, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 5e-06, + "loss": 0.6884, + "step": 5 + }, + { + "epoch": 0.02, + "learning_rate": 6e-06, + "loss": 0.7087, + "step": 6 + }, + { + "epoch": 0.02, + "learning_rate": 7.000000000000001e-06, + "loss": 0.6655, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6978, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 9e-06, + "loss": 0.7435, + "step": 9 + }, + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 0.719, + "step": 10 + }, + { + "epoch": 0.03, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.7129, + "step": 11 + }, + { + "epoch": 0.03, + "learning_rate": 1.2e-05, + "loss": 0.7249, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.7436, + "step": 13 + }, + { + "epoch": 0.04, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.6886, + "step": 14 + }, + { + "epoch": 0.04, + "learning_rate": 1.5e-05, + "loss": 0.702, + "step": 15 + }, + { + "epoch": 0.04, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7105, + "step": 16 + }, + { + "epoch": 0.04, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.6709, + "step": 17 + }, + { + "epoch": 0.05, + "learning_rate": 1.8e-05, + "loss": 0.6767, + "step": 18 + }, + { + "epoch": 0.05, + "learning_rate": 1.9e-05, + "loss": 0.6784, + "step": 19 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 0.7016, + "step": 20 + }, + { + "epoch": 0.06, + "learning_rate": 2.1e-05, + "loss": 0.6308, + "step": 21 + }, + { + "epoch": 0.06, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.645, + "step": 22 + }, + { + "epoch": 0.06, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.6845, + "step": 23 + }, + { + "epoch": 0.06, + "learning_rate": 2.4e-05, + "loss": 0.6891, + "step": 24 + }, + { + "epoch": 0.07, + "learning_rate": 2.5e-05, + "loss": 0.5914, + "step": 25 + }, + { + "epoch": 0.07, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.6916, + "step": 26 + }, + { + "epoch": 0.07, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.5884, + "step": 27 + }, + { + "epoch": 0.07, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.6587, + "step": 28 + }, + { + "epoch": 0.08, + "learning_rate": 2.9e-05, + "loss": 0.6008, + "step": 29 + }, + { + "epoch": 0.08, + "learning_rate": 3e-05, + "loss": 0.6717, + "step": 30 + }, + { + "epoch": 0.08, + "learning_rate": 3.1e-05, + "loss": 0.5795, + "step": 31 + }, + { + "epoch": 0.08, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6358, + "step": 32 + }, + { + "epoch": 0.09, + "learning_rate": 3.3e-05, + "loss": 0.7508, + "step": 33 + }, + { + "epoch": 0.09, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.6601, + "step": 34 + }, + { + "epoch": 0.09, + "learning_rate": 3.5e-05, + "loss": 0.6573, + "step": 35 + }, + { + "epoch": 0.1, + "learning_rate": 3.6e-05, + "loss": 0.5695, + "step": 36 + }, + { + "epoch": 0.1, + "learning_rate": 3.7e-05, + "loss": 0.5535, + "step": 37 + }, + { + "epoch": 0.1, + "learning_rate": 3.8e-05, + "loss": 0.5813, + "step": 38 + }, + { + "epoch": 0.1, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.5224, + "step": 39 + }, + { + "epoch": 0.11, + "learning_rate": 4e-05, + "loss": 0.4757, + "step": 40 + }, + { + "epoch": 0.11, + "learning_rate": 4.1e-05, + "loss": 0.5529, + "step": 41 + }, + { + "epoch": 0.11, + "learning_rate": 4.2e-05, + "loss": 0.4964, + "step": 42 + }, + { + "epoch": 0.11, + "learning_rate": 4.3e-05, + "loss": 0.4565, + "step": 43 + }, + { + "epoch": 0.12, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.5848, + "step": 44 + }, + { + "epoch": 0.12, + "learning_rate": 4.5e-05, + "loss": 0.7333, + "step": 45 + }, + { + "epoch": 0.12, + "learning_rate": 4.600000000000001e-05, + "loss": 0.5224, + "step": 46 + }, + { + "epoch": 0.12, + "learning_rate": 4.7e-05, + "loss": 0.4826, + "step": 47 + }, + { + "epoch": 0.13, + "learning_rate": 4.8e-05, + "loss": 0.4328, + "step": 48 + }, + { + "epoch": 0.13, + "learning_rate": 4.9e-05, + "loss": 0.2546, + "step": 49 + }, + { + "epoch": 0.13, + "learning_rate": 5e-05, + "loss": 0.2636, + "step": 50 + }, + { + "epoch": 0.13, + "learning_rate": 4.998659517426274e-05, + "loss": 0.4116, + "step": 51 + }, + { + "epoch": 0.14, + "learning_rate": 4.997319034852547e-05, + "loss": 0.3191, + "step": 52 + }, + { + "epoch": 0.14, + "learning_rate": 4.995978552278821e-05, + "loss": 0.2822, + "step": 53 + }, + { + "epoch": 0.14, + "learning_rate": 4.994638069705094e-05, + "loss": 0.8821, + "step": 54 + }, + { + "epoch": 0.15, + "learning_rate": 4.993297587131368e-05, + "loss": 0.483, + "step": 55 + }, + { + "epoch": 0.15, + "learning_rate": 4.9919571045576406e-05, + "loss": 0.7164, + "step": 56 + }, + { + "epoch": 0.15, + "learning_rate": 4.990616621983915e-05, + "loss": 0.4161, + "step": 57 + }, + { + "epoch": 0.15, + "learning_rate": 4.989276139410188e-05, + "loss": 0.2668, + "step": 58 + }, + { + "epoch": 0.16, + "learning_rate": 4.987935656836462e-05, + "loss": 0.5255, + "step": 59 + }, + { + "epoch": 0.16, + "learning_rate": 4.986595174262735e-05, + "loss": 0.3784, + "step": 60 + }, + { + "epoch": 0.16, + "learning_rate": 4.985254691689008e-05, + "loss": 0.5065, + "step": 61 + }, + { + "epoch": 0.16, + "learning_rate": 4.983914209115282e-05, + "loss": 0.1988, + "step": 62 + }, + { + "epoch": 0.17, + "learning_rate": 4.982573726541555e-05, + "loss": 0.4362, + "step": 63 + }, + { + "epoch": 0.17, + "learning_rate": 4.981233243967829e-05, + "loss": 0.6619, + "step": 64 + }, + { + "epoch": 0.17, + "learning_rate": 4.979892761394102e-05, + "loss": 0.3217, + "step": 65 + }, + { + "epoch": 0.17, + "learning_rate": 4.978552278820375e-05, + "loss": 0.2967, + "step": 66 + }, + { + "epoch": 0.18, + "learning_rate": 4.977211796246649e-05, + "loss": 0.2429, + "step": 67 + }, + { + "epoch": 0.18, + "learning_rate": 4.975871313672922e-05, + "loss": 0.6642, + "step": 68 + }, + { + "epoch": 0.18, + "learning_rate": 4.974530831099196e-05, + "loss": 0.56, + "step": 69 + }, + { + "epoch": 0.19, + "learning_rate": 4.973190348525469e-05, + "loss": 1.2979, + "step": 70 + }, + { + "epoch": 0.19, + "learning_rate": 4.9718498659517427e-05, + "loss": 0.5287, + "step": 71 + }, + { + "epoch": 0.19, + "learning_rate": 4.970509383378016e-05, + "loss": 0.4684, + "step": 72 + }, + { + "epoch": 0.19, + "learning_rate": 4.96916890080429e-05, + "loss": 0.283, + "step": 73 + }, + { + "epoch": 0.2, + "learning_rate": 4.967828418230563e-05, + "loss": 0.6818, + "step": 74 + }, + { + "epoch": 0.2, + "learning_rate": 4.966487935656837e-05, + "loss": 0.6141, + "step": 75 + }, + { + "epoch": 0.2, + "learning_rate": 4.96514745308311e-05, + "loss": 0.5046, + "step": 76 + }, + { + "epoch": 0.2, + "learning_rate": 4.963806970509384e-05, + "loss": 0.5266, + "step": 77 + }, + { + "epoch": 0.21, + "learning_rate": 4.962466487935657e-05, + "loss": 0.5944, + "step": 78 + }, + { + "epoch": 0.21, + "learning_rate": 4.961126005361931e-05, + "loss": 0.5631, + "step": 79 + }, + { + "epoch": 0.21, + "learning_rate": 4.959785522788204e-05, + "loss": 0.4791, + "step": 80 + }, + { + "epoch": 0.21, + "learning_rate": 4.958445040214477e-05, + "loss": 0.5645, + "step": 81 + }, + { + "epoch": 0.22, + "learning_rate": 4.957104557640751e-05, + "loss": 0.4349, + "step": 82 + }, + { + "epoch": 0.22, + "learning_rate": 4.955764075067024e-05, + "loss": 0.3865, + "step": 83 + }, + { + "epoch": 0.22, + "learning_rate": 4.954423592493298e-05, + "loss": 0.486, + "step": 84 + }, + { + "epoch": 0.22, + "learning_rate": 4.953083109919571e-05, + "loss": 0.2179, + "step": 85 + }, + { + "epoch": 0.23, + "learning_rate": 4.951742627345845e-05, + "loss": 0.3896, + "step": 86 + }, + { + "epoch": 0.23, + "learning_rate": 4.950402144772118e-05, + "loss": 0.4247, + "step": 87 + }, + { + "epoch": 0.23, + "learning_rate": 4.949061662198392e-05, + "loss": 0.4906, + "step": 88 + }, + { + "epoch": 0.24, + "learning_rate": 4.947721179624665e-05, + "loss": 0.4483, + "step": 89 + }, + { + "epoch": 0.24, + "learning_rate": 4.946380697050939e-05, + "loss": 0.557, + "step": 90 + }, + { + "epoch": 0.24, + "learning_rate": 4.9450402144772116e-05, + "loss": 0.7521, + "step": 91 + }, + { + "epoch": 0.24, + "learning_rate": 4.943699731903486e-05, + "loss": 0.3103, + "step": 92 + }, + { + "epoch": 0.25, + "learning_rate": 4.9423592493297586e-05, + "loss": 0.757, + "step": 93 + }, + { + "epoch": 0.25, + "learning_rate": 4.941018766756033e-05, + "loss": 0.8248, + "step": 94 + }, + { + "epoch": 0.25, + "learning_rate": 4.9396782841823056e-05, + "loss": 0.4591, + "step": 95 + }, + { + "epoch": 0.25, + "learning_rate": 4.938337801608579e-05, + "loss": 0.3912, + "step": 96 + }, + { + "epoch": 0.26, + "learning_rate": 4.9369973190348526e-05, + "loss": 0.5289, + "step": 97 + }, + { + "epoch": 0.26, + "learning_rate": 4.935656836461126e-05, + "loss": 0.3264, + "step": 98 + }, + { + "epoch": 0.26, + "learning_rate": 4.9343163538874e-05, + "loss": 0.2947, + "step": 99 + }, + { + "epoch": 0.26, + "learning_rate": 4.932975871313673e-05, + "loss": 0.2647, + "step": 100 + }, + { + "epoch": 0.27, + "learning_rate": 4.931635388739946e-05, + "loss": 0.3691, + "step": 101 + }, + { + "epoch": 0.27, + "learning_rate": 4.93029490616622e-05, + "loss": 0.4796, + "step": 102 + }, + { + "epoch": 0.27, + "learning_rate": 4.928954423592493e-05, + "loss": 0.4827, + "step": 103 + }, + { + "epoch": 0.28, + "learning_rate": 4.927613941018767e-05, + "loss": 0.2672, + "step": 104 + }, + { + "epoch": 0.28, + "learning_rate": 4.92627345844504e-05, + "loss": 0.7456, + "step": 105 + }, + { + "epoch": 0.28, + "learning_rate": 4.9249329758713136e-05, + "loss": 0.5206, + "step": 106 + }, + { + "epoch": 0.28, + "learning_rate": 4.923592493297587e-05, + "loss": 0.3576, + "step": 107 + }, + { + "epoch": 0.29, + "learning_rate": 4.9222520107238606e-05, + "loss": 0.2596, + "step": 108 + }, + { + "epoch": 0.29, + "learning_rate": 4.920911528150134e-05, + "loss": 0.4115, + "step": 109 + }, + { + "epoch": 0.29, + "learning_rate": 4.9195710455764076e-05, + "loss": 0.3481, + "step": 110 + }, + { + "epoch": 0.29, + "learning_rate": 4.918230563002681e-05, + "loss": 0.4387, + "step": 111 + }, + { + "epoch": 0.3, + "learning_rate": 4.916890080428955e-05, + "loss": 0.5023, + "step": 112 + }, + { + "epoch": 0.3, + "learning_rate": 4.915549597855228e-05, + "loss": 0.5916, + "step": 113 + }, + { + "epoch": 0.3, + "learning_rate": 4.914209115281502e-05, + "loss": 0.5467, + "step": 114 + }, + { + "epoch": 0.3, + "learning_rate": 4.912868632707775e-05, + "loss": 0.5631, + "step": 115 + }, + { + "epoch": 0.31, + "learning_rate": 4.911528150134049e-05, + "loss": 0.5512, + "step": 116 + }, + { + "epoch": 0.31, + "learning_rate": 4.910187667560322e-05, + "loss": 0.5546, + "step": 117 + }, + { + "epoch": 0.31, + "learning_rate": 4.908847184986595e-05, + "loss": 0.4209, + "step": 118 + }, + { + "epoch": 0.31, + "learning_rate": 4.907506702412869e-05, + "loss": 0.6064, + "step": 119 + }, + { + "epoch": 0.32, + "learning_rate": 4.906166219839142e-05, + "loss": 0.5301, + "step": 120 + }, + { + "epoch": 0.32, + "learning_rate": 4.904825737265416e-05, + "loss": 0.436, + "step": 121 + }, + { + "epoch": 0.32, + "learning_rate": 4.903485254691689e-05, + "loss": 0.726, + "step": 122 + }, + { + "epoch": 0.33, + "learning_rate": 4.9021447721179626e-05, + "loss": 0.5288, + "step": 123 + }, + { + "epoch": 0.33, + "learning_rate": 4.900804289544236e-05, + "loss": 0.4887, + "step": 124 + }, + { + "epoch": 0.33, + "learning_rate": 4.8994638069705097e-05, + "loss": 0.3752, + "step": 125 + }, + { + "epoch": 0.33, + "learning_rate": 4.898123324396783e-05, + "loss": 0.4895, + "step": 126 + }, + { + "epoch": 0.34, + "learning_rate": 4.896782841823057e-05, + "loss": 0.5046, + "step": 127 + }, + { + "epoch": 0.34, + "learning_rate": 4.8954423592493295e-05, + "loss": 0.3953, + "step": 128 + }, + { + "epoch": 0.34, + "learning_rate": 4.894101876675604e-05, + "loss": 0.2015, + "step": 129 + }, + { + "epoch": 0.34, + "learning_rate": 4.8927613941018765e-05, + "loss": 0.5165, + "step": 130 + }, + { + "epoch": 0.35, + "learning_rate": 4.891420911528151e-05, + "loss": 0.4237, + "step": 131 + }, + { + "epoch": 0.35, + "learning_rate": 4.8900804289544236e-05, + "loss": 0.239, + "step": 132 + }, + { + "epoch": 0.35, + "learning_rate": 4.888739946380697e-05, + "loss": 0.5515, + "step": 133 + }, + { + "epoch": 0.35, + "learning_rate": 4.8873994638069706e-05, + "loss": 0.303, + "step": 134 + }, + { + "epoch": 0.36, + "learning_rate": 4.886058981233244e-05, + "loss": 0.2867, + "step": 135 + }, + { + "epoch": 0.36, + "learning_rate": 4.8847184986595176e-05, + "loss": 0.6756, + "step": 136 + }, + { + "epoch": 0.36, + "learning_rate": 4.883378016085791e-05, + "loss": 0.4996, + "step": 137 + }, + { + "epoch": 0.37, + "learning_rate": 4.8820375335120646e-05, + "loss": 0.2798, + "step": 138 + }, + { + "epoch": 0.37, + "learning_rate": 4.880697050938338e-05, + "loss": 0.8877, + "step": 139 + }, + { + "epoch": 0.37, + "learning_rate": 4.879356568364612e-05, + "loss": 0.5022, + "step": 140 + }, + { + "epoch": 0.37, + "learning_rate": 4.878016085790885e-05, + "loss": 0.2177, + "step": 141 + }, + { + "epoch": 0.38, + "learning_rate": 4.876675603217159e-05, + "loss": 0.8708, + "step": 142 + }, + { + "epoch": 0.38, + "learning_rate": 4.8753351206434315e-05, + "loss": 0.719, + "step": 143 + }, + { + "epoch": 0.38, + "learning_rate": 4.873994638069706e-05, + "loss": 0.562, + "step": 144 + }, + { + "epoch": 0.38, + "learning_rate": 4.8726541554959786e-05, + "loss": 0.492, + "step": 145 + }, + { + "epoch": 0.39, + "learning_rate": 4.871313672922253e-05, + "loss": 0.4637, + "step": 146 + }, + { + "epoch": 0.39, + "learning_rate": 4.8699731903485256e-05, + "loss": 0.4132, + "step": 147 + }, + { + "epoch": 0.39, + "learning_rate": 4.868632707774799e-05, + "loss": 0.2889, + "step": 148 + }, + { + "epoch": 0.39, + "learning_rate": 4.8672922252010726e-05, + "loss": 0.2213, + "step": 149 + }, + { + "epoch": 0.4, + "learning_rate": 4.865951742627346e-05, + "loss": 0.9268, + "step": 150 + }, + { + "epoch": 0.4, + "learning_rate": 4.8646112600536196e-05, + "loss": 0.2852, + "step": 151 + }, + { + "epoch": 0.4, + "learning_rate": 4.863270777479893e-05, + "loss": 0.4599, + "step": 152 + }, + { + "epoch": 0.4, + "learning_rate": 4.861930294906166e-05, + "loss": 0.1913, + "step": 153 + }, + { + "epoch": 0.41, + "learning_rate": 4.86058981233244e-05, + "loss": 0.4488, + "step": 154 + }, + { + "epoch": 0.41, + "learning_rate": 4.859249329758713e-05, + "loss": 0.9022, + "step": 155 + }, + { + "epoch": 0.41, + "learning_rate": 4.857908847184987e-05, + "loss": 0.5221, + "step": 156 + }, + { + "epoch": 0.42, + "learning_rate": 4.85656836461126e-05, + "loss": 0.2394, + "step": 157 + }, + { + "epoch": 0.42, + "learning_rate": 4.8552278820375336e-05, + "loss": 0.3332, + "step": 158 + }, + { + "epoch": 0.42, + "learning_rate": 4.853887399463807e-05, + "loss": 0.4015, + "step": 159 + }, + { + "epoch": 0.42, + "learning_rate": 4.8525469168900806e-05, + "loss": 0.4461, + "step": 160 + }, + { + "epoch": 0.43, + "learning_rate": 4.851206434316354e-05, + "loss": 0.337, + "step": 161 + }, + { + "epoch": 0.43, + "learning_rate": 4.8498659517426276e-05, + "loss": 0.4908, + "step": 162 + }, + { + "epoch": 0.43, + "learning_rate": 4.848525469168901e-05, + "loss": 0.526, + "step": 163 + }, + { + "epoch": 0.43, + "learning_rate": 4.8471849865951746e-05, + "loss": 0.5262, + "step": 164 + }, + { + "epoch": 0.44, + "learning_rate": 4.845844504021448e-05, + "loss": 0.6818, + "step": 165 + }, + { + "epoch": 0.44, + "learning_rate": 4.8445040214477217e-05, + "loss": 0.3154, + "step": 166 + }, + { + "epoch": 0.44, + "learning_rate": 4.843163538873995e-05, + "loss": 0.5963, + "step": 167 + }, + { + "epoch": 0.44, + "learning_rate": 4.841823056300268e-05, + "loss": 0.4451, + "step": 168 + }, + { + "epoch": 0.45, + "learning_rate": 4.840482573726542e-05, + "loss": 0.5969, + "step": 169 + }, + { + "epoch": 0.45, + "learning_rate": 4.839142091152815e-05, + "loss": 0.438, + "step": 170 + }, + { + "epoch": 0.45, + "learning_rate": 4.837801608579089e-05, + "loss": 0.4827, + "step": 171 + }, + { + "epoch": 0.46, + "learning_rate": 4.836461126005362e-05, + "loss": 0.2029, + "step": 172 + }, + { + "epoch": 0.46, + "learning_rate": 4.8351206434316356e-05, + "loss": 0.5195, + "step": 173 + }, + { + "epoch": 0.46, + "learning_rate": 4.833780160857909e-05, + "loss": 0.517, + "step": 174 + }, + { + "epoch": 0.46, + "learning_rate": 4.8324396782841826e-05, + "loss": 0.5532, + "step": 175 + }, + { + "epoch": 0.47, + "learning_rate": 4.831099195710456e-05, + "loss": 0.4198, + "step": 176 + }, + { + "epoch": 0.47, + "learning_rate": 4.8297587131367296e-05, + "loss": 0.8386, + "step": 177 + }, + { + "epoch": 0.47, + "learning_rate": 4.8284182305630025e-05, + "loss": 0.575, + "step": 178 + }, + { + "epoch": 0.47, + "learning_rate": 4.8270777479892766e-05, + "loss": 0.6156, + "step": 179 + }, + { + "epoch": 0.48, + "learning_rate": 4.8257372654155495e-05, + "loss": 0.7044, + "step": 180 + }, + { + "epoch": 0.48, + "learning_rate": 4.824396782841824e-05, + "loss": 0.5712, + "step": 181 + }, + { + "epoch": 0.48, + "learning_rate": 4.8230563002680965e-05, + "loss": 0.34, + "step": 182 + }, + { + "epoch": 0.48, + "learning_rate": 4.82171581769437e-05, + "loss": 0.5773, + "step": 183 + }, + { + "epoch": 0.49, + "learning_rate": 4.8203753351206435e-05, + "loss": 0.546, + "step": 184 + }, + { + "epoch": 0.49, + "learning_rate": 4.819034852546917e-05, + "loss": 0.3955, + "step": 185 + }, + { + "epoch": 0.49, + "learning_rate": 4.8176943699731906e-05, + "loss": 0.5921, + "step": 186 + }, + { + "epoch": 0.49, + "learning_rate": 4.816353887399464e-05, + "loss": 0.3108, + "step": 187 + }, + { + "epoch": 0.5, + "learning_rate": 4.8150134048257376e-05, + "loss": 0.5469, + "step": 188 + }, + { + "epoch": 0.5, + "learning_rate": 4.813672922252011e-05, + "loss": 0.64, + "step": 189 + }, + { + "epoch": 0.5, + "learning_rate": 4.8123324396782846e-05, + "loss": 0.5153, + "step": 190 + }, + { + "epoch": 0.51, + "learning_rate": 4.810991957104558e-05, + "loss": 0.4719, + "step": 191 + }, + { + "epoch": 0.51, + "learning_rate": 4.8096514745308316e-05, + "loss": 0.52, + "step": 192 + }, + { + "epoch": 0.51, + "learning_rate": 4.8083109919571045e-05, + "loss": 0.5114, + "step": 193 + }, + { + "epoch": 0.51, + "learning_rate": 4.806970509383379e-05, + "loss": 0.5469, + "step": 194 + }, + { + "epoch": 0.52, + "learning_rate": 4.8056300268096515e-05, + "loss": 0.3435, + "step": 195 + }, + { + "epoch": 0.52, + "learning_rate": 4.804289544235926e-05, + "loss": 0.6469, + "step": 196 + }, + { + "epoch": 0.52, + "learning_rate": 4.8029490616621985e-05, + "loss": 0.6595, + "step": 197 + }, + { + "epoch": 0.52, + "learning_rate": 4.801608579088472e-05, + "loss": 0.5503, + "step": 198 + }, + { + "epoch": 0.53, + "learning_rate": 4.8002680965147456e-05, + "loss": 0.3799, + "step": 199 + }, + { + "epoch": 0.53, + "learning_rate": 4.798927613941019e-05, + "loss": 0.417, + "step": 200 + }, + { + "epoch": 0.53, + "learning_rate": 4.7975871313672926e-05, + "loss": 0.5281, + "step": 201 + }, + { + "epoch": 0.53, + "learning_rate": 4.796246648793566e-05, + "loss": 0.3439, + "step": 202 + }, + { + "epoch": 0.54, + "learning_rate": 4.794906166219839e-05, + "loss": 0.5777, + "step": 203 + }, + { + "epoch": 0.54, + "learning_rate": 4.793565683646113e-05, + "loss": 0.5286, + "step": 204 + }, + { + "epoch": 0.54, + "learning_rate": 4.792225201072386e-05, + "loss": 0.4302, + "step": 205 + }, + { + "epoch": 0.54, + "learning_rate": 4.79088471849866e-05, + "loss": 0.5413, + "step": 206 + }, + { + "epoch": 0.55, + "learning_rate": 4.789544235924933e-05, + "loss": 0.3087, + "step": 207 + }, + { + "epoch": 0.55, + "learning_rate": 4.7882037533512065e-05, + "loss": 0.4385, + "step": 208 + }, + { + "epoch": 0.55, + "learning_rate": 4.78686327077748e-05, + "loss": 0.7137, + "step": 209 + }, + { + "epoch": 0.56, + "learning_rate": 4.7855227882037535e-05, + "loss": 0.6278, + "step": 210 + }, + { + "epoch": 0.56, + "learning_rate": 4.784182305630027e-05, + "loss": 0.2832, + "step": 211 + }, + { + "epoch": 0.56, + "learning_rate": 4.7828418230563005e-05, + "loss": 0.5899, + "step": 212 + }, + { + "epoch": 0.56, + "learning_rate": 4.7815013404825734e-05, + "loss": 0.4421, + "step": 213 + }, + { + "epoch": 0.57, + "learning_rate": 4.7801608579088476e-05, + "loss": 0.3411, + "step": 214 + }, + { + "epoch": 0.57, + "learning_rate": 4.7788203753351204e-05, + "loss": 0.4236, + "step": 215 + }, + { + "epoch": 0.57, + "learning_rate": 4.7774798927613946e-05, + "loss": 0.4444, + "step": 216 + }, + { + "epoch": 0.57, + "learning_rate": 4.7761394101876674e-05, + "loss": 0.4765, + "step": 217 + }, + { + "epoch": 0.58, + "learning_rate": 4.774798927613941e-05, + "loss": 0.2453, + "step": 218 + }, + { + "epoch": 0.58, + "learning_rate": 4.7734584450402145e-05, + "loss": 0.2765, + "step": 219 + }, + { + "epoch": 0.58, + "learning_rate": 4.772117962466488e-05, + "loss": 0.2075, + "step": 220 + }, + { + "epoch": 0.58, + "learning_rate": 4.7707774798927615e-05, + "loss": 0.5905, + "step": 221 + }, + { + "epoch": 0.59, + "learning_rate": 4.769436997319035e-05, + "loss": 0.3457, + "step": 222 + }, + { + "epoch": 0.59, + "learning_rate": 4.7680965147453085e-05, + "loss": 0.5986, + "step": 223 + }, + { + "epoch": 0.59, + "learning_rate": 4.766756032171582e-05, + "loss": 0.3881, + "step": 224 + }, + { + "epoch": 0.6, + "learning_rate": 4.7654155495978555e-05, + "loss": 0.5655, + "step": 225 + }, + { + "epoch": 0.6, + "learning_rate": 4.764075067024129e-05, + "loss": 0.6156, + "step": 226 + }, + { + "epoch": 0.6, + "learning_rate": 4.7627345844504026e-05, + "loss": 0.4784, + "step": 227 + }, + { + "epoch": 0.6, + "learning_rate": 4.7613941018766754e-05, + "loss": 0.5195, + "step": 228 + }, + { + "epoch": 0.61, + "learning_rate": 4.7600536193029496e-05, + "loss": 0.66, + "step": 229 + }, + { + "epoch": 0.61, + "learning_rate": 4.7587131367292224e-05, + "loss": 0.4104, + "step": 230 + }, + { + "epoch": 0.61, + "learning_rate": 4.7573726541554966e-05, + "loss": 0.3478, + "step": 231 + }, + { + "epoch": 0.61, + "learning_rate": 4.7560321715817695e-05, + "loss": 0.2037, + "step": 232 + }, + { + "epoch": 0.62, + "learning_rate": 4.754691689008043e-05, + "loss": 0.75, + "step": 233 + }, + { + "epoch": 0.62, + "learning_rate": 4.7533512064343165e-05, + "loss": 0.4237, + "step": 234 + }, + { + "epoch": 0.62, + "learning_rate": 4.75201072386059e-05, + "loss": 0.2372, + "step": 235 + }, + { + "epoch": 0.62, + "learning_rate": 4.7506702412868635e-05, + "loss": 0.7874, + "step": 236 + }, + { + "epoch": 0.63, + "learning_rate": 4.749329758713137e-05, + "loss": 0.5751, + "step": 237 + }, + { + "epoch": 0.63, + "learning_rate": 4.7479892761394105e-05, + "loss": 0.5801, + "step": 238 + }, + { + "epoch": 0.63, + "learning_rate": 4.746648793565684e-05, + "loss": 0.4983, + "step": 239 + }, + { + "epoch": 0.63, + "learning_rate": 4.745308310991957e-05, + "loss": 0.4215, + "step": 240 + }, + { + "epoch": 0.64, + "learning_rate": 4.743967828418231e-05, + "loss": 0.3655, + "step": 241 + }, + { + "epoch": 0.64, + "learning_rate": 4.742627345844504e-05, + "loss": 0.523, + "step": 242 + }, + { + "epoch": 0.64, + "learning_rate": 4.741286863270778e-05, + "loss": 0.5952, + "step": 243 + }, + { + "epoch": 0.65, + "learning_rate": 4.739946380697051e-05, + "loss": 0.4226, + "step": 244 + }, + { + "epoch": 0.65, + "learning_rate": 4.7386058981233244e-05, + "loss": 0.316, + "step": 245 + }, + { + "epoch": 0.65, + "learning_rate": 4.737265415549598e-05, + "loss": 0.5096, + "step": 246 + }, + { + "epoch": 0.65, + "learning_rate": 4.7359249329758715e-05, + "loss": 0.435, + "step": 247 + }, + { + "epoch": 0.66, + "learning_rate": 4.734584450402145e-05, + "loss": 0.7516, + "step": 248 + }, + { + "epoch": 0.66, + "learning_rate": 4.7332439678284185e-05, + "loss": 0.531, + "step": 249 + }, + { + "epoch": 0.66, + "learning_rate": 4.731903485254692e-05, + "loss": 0.4096, + "step": 250 + }, + { + "epoch": 0.66, + "learning_rate": 4.7305630026809655e-05, + "loss": 0.387, + "step": 251 + }, + { + "epoch": 0.67, + "learning_rate": 4.729222520107239e-05, + "loss": 0.5468, + "step": 252 + }, + { + "epoch": 0.67, + "learning_rate": 4.7278820375335125e-05, + "loss": 0.4613, + "step": 253 + }, + { + "epoch": 0.67, + "learning_rate": 4.726541554959786e-05, + "loss": 0.8437, + "step": 254 + }, + { + "epoch": 0.67, + "learning_rate": 4.725201072386059e-05, + "loss": 0.522, + "step": 255 + }, + { + "epoch": 0.68, + "learning_rate": 4.723860589812333e-05, + "loss": 0.3922, + "step": 256 + }, + { + "epoch": 0.68, + "learning_rate": 4.722520107238606e-05, + "loss": 0.5114, + "step": 257 + }, + { + "epoch": 0.68, + "learning_rate": 4.72117962466488e-05, + "loss": 0.6148, + "step": 258 + }, + { + "epoch": 0.69, + "learning_rate": 4.719839142091153e-05, + "loss": 0.4578, + "step": 259 + }, + { + "epoch": 0.69, + "learning_rate": 4.7184986595174265e-05, + "loss": 0.6286, + "step": 260 + }, + { + "epoch": 0.69, + "learning_rate": 4.7171581769437e-05, + "loss": 0.5883, + "step": 261 + }, + { + "epoch": 0.69, + "learning_rate": 4.7158176943699735e-05, + "loss": 0.5634, + "step": 262 + }, + { + "epoch": 0.7, + "learning_rate": 4.714477211796247e-05, + "loss": 0.4085, + "step": 263 + }, + { + "epoch": 0.7, + "learning_rate": 4.7131367292225205e-05, + "loss": 0.2988, + "step": 264 + }, + { + "epoch": 0.7, + "learning_rate": 4.7117962466487934e-05, + "loss": 0.6353, + "step": 265 + }, + { + "epoch": 0.7, + "learning_rate": 4.7104557640750675e-05, + "loss": 0.4598, + "step": 266 + }, + { + "epoch": 0.71, + "learning_rate": 4.7091152815013404e-05, + "loss": 0.5072, + "step": 267 + }, + { + "epoch": 0.71, + "learning_rate": 4.7077747989276146e-05, + "loss": 0.49, + "step": 268 + }, + { + "epoch": 0.71, + "learning_rate": 4.7064343163538874e-05, + "loss": 0.7225, + "step": 269 + }, + { + "epoch": 0.71, + "learning_rate": 4.705093833780161e-05, + "loss": 0.5332, + "step": 270 + }, + { + "epoch": 0.72, + "learning_rate": 4.7037533512064344e-05, + "loss": 0.6064, + "step": 271 + }, + { + "epoch": 0.72, + "learning_rate": 4.702412868632708e-05, + "loss": 0.3518, + "step": 272 + }, + { + "epoch": 0.72, + "learning_rate": 4.7010723860589815e-05, + "loss": 0.3673, + "step": 273 + }, + { + "epoch": 0.72, + "learning_rate": 4.699731903485255e-05, + "loss": 0.4688, + "step": 274 + }, + { + "epoch": 0.73, + "learning_rate": 4.6983914209115285e-05, + "loss": 0.5389, + "step": 275 + }, + { + "epoch": 0.73, + "learning_rate": 4.697050938337802e-05, + "loss": 0.374, + "step": 276 + }, + { + "epoch": 0.73, + "learning_rate": 4.6957104557640755e-05, + "loss": 0.475, + "step": 277 + }, + { + "epoch": 0.74, + "learning_rate": 4.694369973190349e-05, + "loss": 0.5397, + "step": 278 + }, + { + "epoch": 0.74, + "learning_rate": 4.6930294906166225e-05, + "loss": 0.3821, + "step": 279 + }, + { + "epoch": 0.74, + "learning_rate": 4.6916890080428954e-05, + "loss": 0.3372, + "step": 280 + }, + { + "epoch": 0.74, + "learning_rate": 4.6903485254691696e-05, + "loss": 0.6652, + "step": 281 + }, + { + "epoch": 0.75, + "learning_rate": 4.6890080428954424e-05, + "loss": 0.2894, + "step": 282 + }, + { + "epoch": 0.75, + "learning_rate": 4.6876675603217166e-05, + "loss": 0.5639, + "step": 283 + }, + { + "epoch": 0.75, + "learning_rate": 4.6863270777479894e-05, + "loss": 0.353, + "step": 284 + }, + { + "epoch": 0.75, + "learning_rate": 4.684986595174263e-05, + "loss": 0.2932, + "step": 285 + }, + { + "epoch": 0.76, + "learning_rate": 4.6836461126005364e-05, + "loss": 0.467, + "step": 286 + }, + { + "epoch": 0.76, + "learning_rate": 4.68230563002681e-05, + "loss": 0.4732, + "step": 287 + }, + { + "epoch": 0.76, + "learning_rate": 4.6809651474530835e-05, + "loss": 0.1808, + "step": 288 + }, + { + "epoch": 0.76, + "learning_rate": 4.679624664879357e-05, + "loss": 0.6031, + "step": 289 + }, + { + "epoch": 0.77, + "learning_rate": 4.67828418230563e-05, + "loss": 0.2555, + "step": 290 + }, + { + "epoch": 0.77, + "learning_rate": 4.676943699731904e-05, + "loss": 0.4041, + "step": 291 + }, + { + "epoch": 0.77, + "learning_rate": 4.675603217158177e-05, + "loss": 0.7822, + "step": 292 + }, + { + "epoch": 0.78, + "learning_rate": 4.674262734584451e-05, + "loss": 0.138, + "step": 293 + }, + { + "epoch": 0.78, + "learning_rate": 4.672922252010724e-05, + "loss": 0.2746, + "step": 294 + }, + { + "epoch": 0.78, + "learning_rate": 4.6715817694369974e-05, + "loss": 0.5835, + "step": 295 + }, + { + "epoch": 0.78, + "learning_rate": 4.670241286863271e-05, + "loss": 0.2367, + "step": 296 + }, + { + "epoch": 0.79, + "learning_rate": 4.6689008042895444e-05, + "loss": 0.3247, + "step": 297 + }, + { + "epoch": 0.79, + "learning_rate": 4.667560321715818e-05, + "loss": 0.306, + "step": 298 + }, + { + "epoch": 0.79, + "learning_rate": 4.6662198391420914e-05, + "loss": 0.2825, + "step": 299 + }, + { + "epoch": 0.79, + "learning_rate": 4.664879356568364e-05, + "loss": 0.6102, + "step": 300 + }, + { + "epoch": 0.8, + "learning_rate": 4.6635388739946385e-05, + "loss": 0.3613, + "step": 301 + }, + { + "epoch": 0.8, + "learning_rate": 4.662198391420911e-05, + "loss": 0.6327, + "step": 302 + }, + { + "epoch": 0.8, + "learning_rate": 4.6608579088471855e-05, + "loss": 0.4059, + "step": 303 + }, + { + "epoch": 0.8, + "learning_rate": 4.659517426273458e-05, + "loss": 0.4027, + "step": 304 + }, + { + "epoch": 0.81, + "learning_rate": 4.658176943699732e-05, + "loss": 0.9133, + "step": 305 + }, + { + "epoch": 0.81, + "learning_rate": 4.6568364611260054e-05, + "loss": 0.1869, + "step": 306 + }, + { + "epoch": 0.81, + "learning_rate": 4.655495978552279e-05, + "loss": 0.3987, + "step": 307 + }, + { + "epoch": 0.81, + "learning_rate": 4.6541554959785524e-05, + "loss": 0.6114, + "step": 308 + }, + { + "epoch": 0.82, + "learning_rate": 4.652815013404826e-05, + "loss": 0.4406, + "step": 309 + }, + { + "epoch": 0.82, + "learning_rate": 4.6514745308310994e-05, + "loss": 0.2954, + "step": 310 + }, + { + "epoch": 0.82, + "learning_rate": 4.650134048257373e-05, + "loss": 0.5067, + "step": 311 + }, + { + "epoch": 0.83, + "learning_rate": 4.6487935656836464e-05, + "loss": 0.3985, + "step": 312 + }, + { + "epoch": 0.83, + "learning_rate": 4.64745308310992e-05, + "loss": 0.3756, + "step": 313 + }, + { + "epoch": 0.83, + "learning_rate": 4.6461126005361935e-05, + "loss": 0.2618, + "step": 314 + }, + { + "epoch": 0.83, + "learning_rate": 4.644772117962466e-05, + "loss": 0.4992, + "step": 315 + }, + { + "epoch": 0.84, + "learning_rate": 4.6434316353887405e-05, + "loss": 0.8224, + "step": 316 + }, + { + "epoch": 0.84, + "learning_rate": 4.642091152815013e-05, + "loss": 0.3425, + "step": 317 + }, + { + "epoch": 0.84, + "learning_rate": 4.6407506702412875e-05, + "loss": 0.4062, + "step": 318 + }, + { + "epoch": 0.84, + "learning_rate": 4.6394101876675603e-05, + "loss": 0.4748, + "step": 319 + }, + { + "epoch": 0.85, + "learning_rate": 4.638069705093834e-05, + "loss": 0.6857, + "step": 320 + }, + { + "epoch": 0.85, + "learning_rate": 4.6367292225201074e-05, + "loss": 0.5368, + "step": 321 + }, + { + "epoch": 0.85, + "learning_rate": 4.635388739946381e-05, + "loss": 0.5571, + "step": 322 + }, + { + "epoch": 0.85, + "learning_rate": 4.6340482573726544e-05, + "loss": 0.3045, + "step": 323 + }, + { + "epoch": 0.86, + "learning_rate": 4.632707774798928e-05, + "loss": 0.3189, + "step": 324 + }, + { + "epoch": 0.86, + "learning_rate": 4.631367292225201e-05, + "loss": 0.3033, + "step": 325 + }, + { + "epoch": 0.86, + "learning_rate": 4.630026809651475e-05, + "loss": 0.4659, + "step": 326 + }, + { + "epoch": 0.87, + "learning_rate": 4.628686327077748e-05, + "loss": 0.4058, + "step": 327 + }, + { + "epoch": 0.87, + "learning_rate": 4.627345844504022e-05, + "loss": 0.6745, + "step": 328 + }, + { + "epoch": 0.87, + "learning_rate": 4.626005361930295e-05, + "loss": 0.3259, + "step": 329 + }, + { + "epoch": 0.87, + "learning_rate": 4.624664879356568e-05, + "loss": 0.5126, + "step": 330 + }, + { + "epoch": 0.88, + "learning_rate": 4.623324396782842e-05, + "loss": 0.2759, + "step": 331 + }, + { + "epoch": 0.88, + "learning_rate": 4.621983914209115e-05, + "loss": 0.2512, + "step": 332 + }, + { + "epoch": 0.88, + "learning_rate": 4.620643431635389e-05, + "loss": 0.3046, + "step": 333 + }, + { + "epoch": 0.88, + "learning_rate": 4.6193029490616624e-05, + "loss": 0.3931, + "step": 334 + }, + { + "epoch": 0.89, + "learning_rate": 4.617962466487936e-05, + "loss": 0.4838, + "step": 335 + }, + { + "epoch": 0.89, + "learning_rate": 4.6166219839142094e-05, + "loss": 0.2925, + "step": 336 + }, + { + "epoch": 0.89, + "learning_rate": 4.615281501340483e-05, + "loss": 0.4481, + "step": 337 + }, + { + "epoch": 0.89, + "learning_rate": 4.6139410187667564e-05, + "loss": 0.4528, + "step": 338 + }, + { + "epoch": 0.9, + "learning_rate": 4.61260053619303e-05, + "loss": 0.2934, + "step": 339 + }, + { + "epoch": 0.9, + "learning_rate": 4.611260053619303e-05, + "loss": 0.609, + "step": 340 + }, + { + "epoch": 0.9, + "learning_rate": 4.609919571045577e-05, + "loss": 0.8988, + "step": 341 + }, + { + "epoch": 0.9, + "learning_rate": 4.60857908847185e-05, + "loss": 1.1222, + "step": 342 + }, + { + "epoch": 0.91, + "learning_rate": 4.607238605898124e-05, + "loss": 0.3265, + "step": 343 + }, + { + "epoch": 0.91, + "learning_rate": 4.605898123324397e-05, + "loss": 0.4722, + "step": 344 + }, + { + "epoch": 0.91, + "learning_rate": 4.60455764075067e-05, + "loss": 0.2791, + "step": 345 + }, + { + "epoch": 0.92, + "learning_rate": 4.603217158176944e-05, + "loss": 0.4183, + "step": 346 + }, + { + "epoch": 0.92, + "learning_rate": 4.6018766756032174e-05, + "loss": 0.5323, + "step": 347 + }, + { + "epoch": 0.92, + "learning_rate": 4.600536193029491e-05, + "loss": 0.6108, + "step": 348 + }, + { + "epoch": 0.92, + "learning_rate": 4.5991957104557644e-05, + "loss": 0.2875, + "step": 349 + }, + { + "epoch": 0.93, + "learning_rate": 4.597855227882037e-05, + "loss": 0.6642, + "step": 350 + }, + { + "epoch": 0.93, + "learning_rate": 4.5965147453083114e-05, + "loss": 0.5244, + "step": 351 + }, + { + "epoch": 0.93, + "learning_rate": 4.595174262734584e-05, + "loss": 0.5562, + "step": 352 + }, + { + "epoch": 0.93, + "learning_rate": 4.5938337801608584e-05, + "loss": 0.3972, + "step": 353 + }, + { + "epoch": 0.94, + "learning_rate": 4.592493297587131e-05, + "loss": 0.4532, + "step": 354 + }, + { + "epoch": 0.94, + "learning_rate": 4.591152815013405e-05, + "loss": 0.3368, + "step": 355 + }, + { + "epoch": 0.94, + "learning_rate": 4.589812332439678e-05, + "loss": 0.171, + "step": 356 + }, + { + "epoch": 0.94, + "learning_rate": 4.588471849865952e-05, + "loss": 0.4036, + "step": 357 + }, + { + "epoch": 0.95, + "learning_rate": 4.587131367292225e-05, + "loss": 0.4305, + "step": 358 + }, + { + "epoch": 0.95, + "learning_rate": 4.585790884718499e-05, + "loss": 0.2643, + "step": 359 + }, + { + "epoch": 0.95, + "learning_rate": 4.5844504021447723e-05, + "loss": 0.3782, + "step": 360 + }, + { + "epoch": 0.96, + "learning_rate": 4.583109919571046e-05, + "loss": 0.1673, + "step": 361 + }, + { + "epoch": 0.96, + "learning_rate": 4.5817694369973194e-05, + "loss": 0.3969, + "step": 362 + }, + { + "epoch": 0.96, + "learning_rate": 4.580428954423593e-05, + "loss": 0.3249, + "step": 363 + }, + { + "epoch": 0.96, + "learning_rate": 4.5790884718498664e-05, + "loss": 0.1656, + "step": 364 + }, + { + "epoch": 0.97, + "learning_rate": 4.57774798927614e-05, + "loss": 0.4551, + "step": 365 + }, + { + "epoch": 0.97, + "learning_rate": 4.5764075067024134e-05, + "loss": 0.6075, + "step": 366 + }, + { + "epoch": 0.97, + "learning_rate": 4.575067024128686e-05, + "loss": 0.4699, + "step": 367 + }, + { + "epoch": 0.97, + "learning_rate": 4.5737265415549605e-05, + "loss": 0.5752, + "step": 368 + }, + { + "epoch": 0.98, + "learning_rate": 4.572386058981233e-05, + "loss": 0.3114, + "step": 369 + }, + { + "epoch": 0.98, + "learning_rate": 4.5710455764075075e-05, + "loss": 0.7407, + "step": 370 + }, + { + "epoch": 0.98, + "learning_rate": 4.56970509383378e-05, + "loss": 0.6427, + "step": 371 + }, + { + "epoch": 0.98, + "learning_rate": 4.568364611260054e-05, + "loss": 0.5021, + "step": 372 + }, + { + "epoch": 0.99, + "learning_rate": 4.5670241286863273e-05, + "loss": 0.4209, + "step": 373 + }, + { + "epoch": 0.99, + "learning_rate": 4.565683646112601e-05, + "loss": 0.5957, + "step": 374 + }, + { + "epoch": 0.99, + "learning_rate": 4.5643431635388744e-05, + "loss": 0.495, + "step": 375 + }, + { + "epoch": 0.99, + "learning_rate": 4.563002680965148e-05, + "loss": 0.3101, + "step": 376 + }, + { + "epoch": 1.0, + "learning_rate": 4.561662198391421e-05, + "loss": 0.472, + "step": 377 + }, + { + "epoch": 1.0, + "learning_rate": 4.560321715817695e-05, + "loss": 0.4607, + "step": 378 + }, + { + "epoch": 1.0, + "eval_f1": 0.7914963205233032, + "eval_loss": 0.40209120512008667, + "eval_runtime": 2.1585, + "eval_samples_per_second": 700.949, + "eval_steps_per_second": 44.012, + "step": 378 + }, + { + "epoch": 1.0, + "learning_rate": 4.558981233243968e-05, + "loss": 0.2422, + "step": 379 + }, + { + "epoch": 1.01, + "learning_rate": 4.557640750670242e-05, + "loss": 0.2074, + "step": 380 + }, + { + "epoch": 1.01, + "learning_rate": 4.556300268096515e-05, + "loss": 0.4469, + "step": 381 + }, + { + "epoch": 1.01, + "learning_rate": 4.554959785522788e-05, + "loss": 0.3121, + "step": 382 + }, + { + "epoch": 1.01, + "learning_rate": 4.553619302949062e-05, + "loss": 0.3634, + "step": 383 + }, + { + "epoch": 1.02, + "learning_rate": 4.552278820375335e-05, + "loss": 0.2139, + "step": 384 + }, + { + "epoch": 1.02, + "learning_rate": 4.550938337801609e-05, + "loss": 0.5273, + "step": 385 + }, + { + "epoch": 1.02, + "learning_rate": 4.549597855227882e-05, + "loss": 0.1228, + "step": 386 + }, + { + "epoch": 1.02, + "learning_rate": 4.548257372654156e-05, + "loss": 0.2459, + "step": 387 + }, + { + "epoch": 1.03, + "learning_rate": 4.5469168900804294e-05, + "loss": 0.443, + "step": 388 + }, + { + "epoch": 1.03, + "learning_rate": 4.545576407506703e-05, + "loss": 0.364, + "step": 389 + }, + { + "epoch": 1.03, + "learning_rate": 4.5442359249329764e-05, + "loss": 0.4071, + "step": 390 + }, + { + "epoch": 1.03, + "learning_rate": 4.54289544235925e-05, + "loss": 0.3876, + "step": 391 + }, + { + "epoch": 1.04, + "learning_rate": 4.541554959785523e-05, + "loss": 0.5239, + "step": 392 + }, + { + "epoch": 1.04, + "learning_rate": 4.540214477211797e-05, + "loss": 0.4548, + "step": 393 + }, + { + "epoch": 1.04, + "learning_rate": 4.53887399463807e-05, + "loss": 0.3588, + "step": 394 + }, + { + "epoch": 1.04, + "learning_rate": 4.537533512064344e-05, + "loss": 0.2693, + "step": 395 + }, + { + "epoch": 1.05, + "learning_rate": 4.536193029490617e-05, + "loss": 0.3773, + "step": 396 + }, + { + "epoch": 1.05, + "learning_rate": 4.53485254691689e-05, + "loss": 0.3982, + "step": 397 + }, + { + "epoch": 1.05, + "learning_rate": 4.533512064343164e-05, + "loss": 0.2074, + "step": 398 + }, + { + "epoch": 1.06, + "learning_rate": 4.532171581769437e-05, + "loss": 0.3322, + "step": 399 + }, + { + "epoch": 1.06, + "learning_rate": 4.530831099195711e-05, + "loss": 0.118, + "step": 400 + }, + { + "epoch": 1.06, + "learning_rate": 4.5294906166219844e-05, + "loss": 0.6216, + "step": 401 + }, + { + "epoch": 1.06, + "learning_rate": 4.528150134048257e-05, + "loss": 0.4028, + "step": 402 + }, + { + "epoch": 1.07, + "learning_rate": 4.5268096514745314e-05, + "loss": 0.3179, + "step": 403 + }, + { + "epoch": 1.07, + "learning_rate": 4.525469168900804e-05, + "loss": 0.2815, + "step": 404 + }, + { + "epoch": 1.07, + "learning_rate": 4.5241286863270784e-05, + "loss": 0.2366, + "step": 405 + }, + { + "epoch": 1.07, + "learning_rate": 4.522788203753351e-05, + "loss": 0.14, + "step": 406 + }, + { + "epoch": 1.08, + "learning_rate": 4.521447721179625e-05, + "loss": 0.1255, + "step": 407 + }, + { + "epoch": 1.08, + "learning_rate": 4.520107238605898e-05, + "loss": 0.1482, + "step": 408 + }, + { + "epoch": 1.08, + "learning_rate": 4.518766756032172e-05, + "loss": 0.665, + "step": 409 + }, + { + "epoch": 1.08, + "learning_rate": 4.517426273458445e-05, + "loss": 0.2535, + "step": 410 + }, + { + "epoch": 1.09, + "learning_rate": 4.516085790884719e-05, + "loss": 0.1716, + "step": 411 + }, + { + "epoch": 1.09, + "learning_rate": 4.5147453083109916e-05, + "loss": 0.0765, + "step": 412 + }, + { + "epoch": 1.09, + "learning_rate": 4.513404825737266e-05, + "loss": 0.1238, + "step": 413 + }, + { + "epoch": 1.1, + "learning_rate": 4.512064343163539e-05, + "loss": 0.3877, + "step": 414 + }, + { + "epoch": 1.1, + "learning_rate": 4.510723860589813e-05, + "loss": 0.206, + "step": 415 + }, + { + "epoch": 1.1, + "learning_rate": 4.509383378016086e-05, + "loss": 0.1822, + "step": 416 + }, + { + "epoch": 1.1, + "learning_rate": 4.508042895442359e-05, + "loss": 0.2294, + "step": 417 + }, + { + "epoch": 1.11, + "learning_rate": 4.506702412868633e-05, + "loss": 0.7083, + "step": 418 + }, + { + "epoch": 1.11, + "learning_rate": 4.505361930294906e-05, + "loss": 0.325, + "step": 419 + }, + { + "epoch": 1.11, + "learning_rate": 4.50402144772118e-05, + "loss": 0.2378, + "step": 420 + }, + { + "epoch": 1.11, + "learning_rate": 4.502680965147453e-05, + "loss": 0.2494, + "step": 421 + }, + { + "epoch": 1.12, + "learning_rate": 4.501340482573727e-05, + "loss": 0.3812, + "step": 422 + }, + { + "epoch": 1.12, + "learning_rate": 4.5e-05, + "loss": 0.1491, + "step": 423 + }, + { + "epoch": 1.12, + "learning_rate": 4.498659517426274e-05, + "loss": 0.0833, + "step": 424 + }, + { + "epoch": 1.12, + "learning_rate": 4.497319034852547e-05, + "loss": 0.4637, + "step": 425 + }, + { + "epoch": 1.13, + "learning_rate": 4.495978552278821e-05, + "loss": 0.2594, + "step": 426 + }, + { + "epoch": 1.13, + "learning_rate": 4.4946380697050937e-05, + "loss": 0.4136, + "step": 427 + }, + { + "epoch": 1.13, + "learning_rate": 4.493297587131368e-05, + "loss": 0.4962, + "step": 428 + }, + { + "epoch": 1.13, + "learning_rate": 4.491957104557641e-05, + "loss": 0.0842, + "step": 429 + }, + { + "epoch": 1.14, + "learning_rate": 4.490616621983915e-05, + "loss": 0.4761, + "step": 430 + }, + { + "epoch": 1.14, + "learning_rate": 4.489276139410188e-05, + "loss": 0.4351, + "step": 431 + }, + { + "epoch": 1.14, + "learning_rate": 4.487935656836461e-05, + "loss": 0.4226, + "step": 432 + }, + { + "epoch": 1.15, + "learning_rate": 4.486595174262735e-05, + "loss": 0.5714, + "step": 433 + }, + { + "epoch": 1.15, + "learning_rate": 4.485254691689008e-05, + "loss": 0.2034, + "step": 434 + }, + { + "epoch": 1.15, + "learning_rate": 4.483914209115282e-05, + "loss": 0.3354, + "step": 435 + }, + { + "epoch": 1.15, + "learning_rate": 4.482573726541555e-05, + "loss": 0.7503, + "step": 436 + }, + { + "epoch": 1.16, + "learning_rate": 4.481233243967828e-05, + "loss": 0.1542, + "step": 437 + }, + { + "epoch": 1.16, + "learning_rate": 4.479892761394102e-05, + "loss": 0.3067, + "step": 438 + }, + { + "epoch": 1.16, + "learning_rate": 4.478552278820375e-05, + "loss": 0.4134, + "step": 439 + }, + { + "epoch": 1.16, + "learning_rate": 4.477211796246649e-05, + "loss": 0.1458, + "step": 440 + }, + { + "epoch": 1.17, + "learning_rate": 4.475871313672922e-05, + "loss": 0.2814, + "step": 441 + }, + { + "epoch": 1.17, + "learning_rate": 4.474530831099196e-05, + "loss": 0.1751, + "step": 442 + }, + { + "epoch": 1.17, + "learning_rate": 4.473190348525469e-05, + "loss": 0.4144, + "step": 443 + }, + { + "epoch": 1.17, + "learning_rate": 4.471849865951743e-05, + "loss": 0.3275, + "step": 444 + }, + { + "epoch": 1.18, + "learning_rate": 4.470509383378016e-05, + "loss": 0.3083, + "step": 445 + }, + { + "epoch": 1.18, + "learning_rate": 4.46916890080429e-05, + "loss": 0.4185, + "step": 446 + }, + { + "epoch": 1.18, + "learning_rate": 4.467828418230563e-05, + "loss": 0.3181, + "step": 447 + }, + { + "epoch": 1.19, + "learning_rate": 4.466487935656837e-05, + "loss": 0.438, + "step": 448 + }, + { + "epoch": 1.19, + "learning_rate": 4.46514745308311e-05, + "loss": 0.4509, + "step": 449 + }, + { + "epoch": 1.19, + "learning_rate": 4.463806970509384e-05, + "loss": 0.4597, + "step": 450 + }, + { + "epoch": 1.19, + "learning_rate": 4.462466487935657e-05, + "loss": 0.3123, + "step": 451 + }, + { + "epoch": 1.2, + "learning_rate": 4.46112600536193e-05, + "loss": 0.376, + "step": 452 + }, + { + "epoch": 1.2, + "learning_rate": 4.459785522788204e-05, + "loss": 0.2392, + "step": 453 + }, + { + "epoch": 1.2, + "learning_rate": 4.458445040214477e-05, + "loss": 0.4451, + "step": 454 + }, + { + "epoch": 1.2, + "learning_rate": 4.4571045576407513e-05, + "loss": 0.2436, + "step": 455 + }, + { + "epoch": 1.21, + "learning_rate": 4.455764075067024e-05, + "loss": 0.2944, + "step": 456 + }, + { + "epoch": 1.21, + "learning_rate": 4.454423592493298e-05, + "loss": 0.323, + "step": 457 + }, + { + "epoch": 1.21, + "learning_rate": 4.453083109919571e-05, + "loss": 0.1966, + "step": 458 + }, + { + "epoch": 1.21, + "learning_rate": 4.451742627345845e-05, + "loss": 0.273, + "step": 459 + }, + { + "epoch": 1.22, + "learning_rate": 4.450402144772118e-05, + "loss": 0.3498, + "step": 460 + }, + { + "epoch": 1.22, + "learning_rate": 4.449061662198392e-05, + "loss": 0.458, + "step": 461 + }, + { + "epoch": 1.22, + "learning_rate": 4.4477211796246646e-05, + "loss": 0.1607, + "step": 462 + }, + { + "epoch": 1.22, + "learning_rate": 4.446380697050939e-05, + "loss": 0.5712, + "step": 463 + }, + { + "epoch": 1.23, + "learning_rate": 4.4450402144772116e-05, + "loss": 0.0908, + "step": 464 + }, + { + "epoch": 1.23, + "learning_rate": 4.443699731903486e-05, + "loss": 0.2625, + "step": 465 + }, + { + "epoch": 1.23, + "learning_rate": 4.4423592493297586e-05, + "loss": 0.368, + "step": 466 + }, + { + "epoch": 1.24, + "learning_rate": 4.441018766756032e-05, + "loss": 0.5096, + "step": 467 + }, + { + "epoch": 1.24, + "learning_rate": 4.4396782841823057e-05, + "loss": 0.209, + "step": 468 + }, + { + "epoch": 1.24, + "learning_rate": 4.438337801608579e-05, + "loss": 0.5235, + "step": 469 + }, + { + "epoch": 1.24, + "learning_rate": 4.436997319034853e-05, + "loss": 0.5116, + "step": 470 + }, + { + "epoch": 1.25, + "learning_rate": 4.435656836461126e-05, + "loss": 0.49, + "step": 471 + }, + { + "epoch": 1.25, + "learning_rate": 4.4343163538874e-05, + "loss": 0.4409, + "step": 472 + }, + { + "epoch": 1.25, + "learning_rate": 4.432975871313673e-05, + "loss": 0.1324, + "step": 473 + }, + { + "epoch": 1.25, + "learning_rate": 4.431635388739947e-05, + "loss": 0.1257, + "step": 474 + }, + { + "epoch": 1.26, + "learning_rate": 4.43029490616622e-05, + "loss": 0.3844, + "step": 475 + }, + { + "epoch": 1.26, + "learning_rate": 4.428954423592494e-05, + "loss": 0.2942, + "step": 476 + }, + { + "epoch": 1.26, + "learning_rate": 4.4276139410187666e-05, + "loss": 0.316, + "step": 477 + }, + { + "epoch": 1.26, + "learning_rate": 4.426273458445041e-05, + "loss": 0.3724, + "step": 478 + }, + { + "epoch": 1.27, + "learning_rate": 4.4249329758713136e-05, + "loss": 0.4498, + "step": 479 + }, + { + "epoch": 1.27, + "learning_rate": 4.423592493297588e-05, + "loss": 0.2677, + "step": 480 + }, + { + "epoch": 1.27, + "learning_rate": 4.4222520107238607e-05, + "loss": 0.3331, + "step": 481 + }, + { + "epoch": 1.28, + "learning_rate": 4.420911528150134e-05, + "loss": 0.4375, + "step": 482 + }, + { + "epoch": 1.28, + "learning_rate": 4.419571045576408e-05, + "loss": 0.3097, + "step": 483 + }, + { + "epoch": 1.28, + "learning_rate": 4.418230563002681e-05, + "loss": 0.4438, + "step": 484 + }, + { + "epoch": 1.28, + "learning_rate": 4.416890080428955e-05, + "loss": 0.3648, + "step": 485 + }, + { + "epoch": 1.29, + "learning_rate": 4.415549597855228e-05, + "loss": 0.3023, + "step": 486 + }, + { + "epoch": 1.29, + "learning_rate": 4.414209115281502e-05, + "loss": 0.2963, + "step": 487 + }, + { + "epoch": 1.29, + "learning_rate": 4.412868632707775e-05, + "loss": 0.1749, + "step": 488 + }, + { + "epoch": 1.29, + "learning_rate": 4.411528150134048e-05, + "loss": 0.2006, + "step": 489 + }, + { + "epoch": 1.3, + "learning_rate": 4.410187667560322e-05, + "loss": 0.0923, + "step": 490 + }, + { + "epoch": 1.3, + "learning_rate": 4.408847184986595e-05, + "loss": 0.2665, + "step": 491 + }, + { + "epoch": 1.3, + "learning_rate": 4.407506702412869e-05, + "loss": 0.3378, + "step": 492 + }, + { + "epoch": 1.3, + "learning_rate": 4.406166219839142e-05, + "loss": 0.2543, + "step": 493 + }, + { + "epoch": 1.31, + "learning_rate": 4.4048257372654156e-05, + "loss": 0.2521, + "step": 494 + }, + { + "epoch": 1.31, + "learning_rate": 4.403485254691689e-05, + "loss": 0.1616, + "step": 495 + }, + { + "epoch": 1.31, + "learning_rate": 4.402144772117963e-05, + "loss": 0.1445, + "step": 496 + }, + { + "epoch": 1.31, + "learning_rate": 4.400804289544236e-05, + "loss": 0.5352, + "step": 497 + }, + { + "epoch": 1.32, + "learning_rate": 4.39946380697051e-05, + "loss": 0.0654, + "step": 498 + }, + { + "epoch": 1.32, + "learning_rate": 4.398123324396783e-05, + "loss": 0.4354, + "step": 499 + }, + { + "epoch": 1.32, + "learning_rate": 4.396782841823057e-05, + "loss": 0.3893, + "step": 500 + }, + { + "epoch": 1.33, + "learning_rate": 4.39544235924933e-05, + "loss": 0.2779, + "step": 501 + }, + { + "epoch": 1.33, + "learning_rate": 4.394101876675604e-05, + "loss": 0.3702, + "step": 502 + }, + { + "epoch": 1.33, + "learning_rate": 4.392761394101877e-05, + "loss": 0.3899, + "step": 503 + }, + { + "epoch": 1.33, + "learning_rate": 4.39142091152815e-05, + "loss": 0.3091, + "step": 504 + }, + { + "epoch": 1.34, + "learning_rate": 4.390080428954424e-05, + "loss": 0.2143, + "step": 505 + }, + { + "epoch": 1.34, + "learning_rate": 4.388739946380697e-05, + "loss": 0.2551, + "step": 506 + }, + { + "epoch": 1.34, + "learning_rate": 4.387399463806971e-05, + "loss": 0.2227, + "step": 507 + }, + { + "epoch": 1.34, + "learning_rate": 4.386058981233244e-05, + "loss": 0.1383, + "step": 508 + }, + { + "epoch": 1.35, + "learning_rate": 4.384718498659518e-05, + "loss": 0.3463, + "step": 509 + }, + { + "epoch": 1.35, + "learning_rate": 4.383378016085791e-05, + "loss": 0.1104, + "step": 510 + }, + { + "epoch": 1.35, + "learning_rate": 4.382037533512065e-05, + "loss": 0.2439, + "step": 511 + }, + { + "epoch": 1.35, + "learning_rate": 4.380697050938338e-05, + "loss": 0.0725, + "step": 512 + }, + { + "epoch": 1.36, + "learning_rate": 4.379356568364612e-05, + "loss": 0.4928, + "step": 513 + }, + { + "epoch": 1.36, + "learning_rate": 4.3780160857908846e-05, + "loss": 0.0903, + "step": 514 + }, + { + "epoch": 1.36, + "learning_rate": 4.376675603217159e-05, + "loss": 0.1808, + "step": 515 + }, + { + "epoch": 1.37, + "learning_rate": 4.3753351206434316e-05, + "loss": 0.617, + "step": 516 + }, + { + "epoch": 1.37, + "learning_rate": 4.373994638069706e-05, + "loss": 0.0333, + "step": 517 + }, + { + "epoch": 1.37, + "learning_rate": 4.3726541554959786e-05, + "loss": 0.1113, + "step": 518 + }, + { + "epoch": 1.37, + "learning_rate": 4.371313672922252e-05, + "loss": 0.4604, + "step": 519 + }, + { + "epoch": 1.38, + "learning_rate": 4.3699731903485256e-05, + "loss": 0.4267, + "step": 520 + }, + { + "epoch": 1.38, + "learning_rate": 4.368632707774799e-05, + "loss": 0.2621, + "step": 521 + }, + { + "epoch": 1.38, + "learning_rate": 4.3672922252010727e-05, + "loss": 0.3236, + "step": 522 + }, + { + "epoch": 1.38, + "learning_rate": 4.365951742627346e-05, + "loss": 0.0559, + "step": 523 + }, + { + "epoch": 1.39, + "learning_rate": 4.364611260053619e-05, + "loss": 0.0801, + "step": 524 + }, + { + "epoch": 1.39, + "learning_rate": 4.363270777479893e-05, + "loss": 0.0518, + "step": 525 + }, + { + "epoch": 1.39, + "learning_rate": 4.361930294906166e-05, + "loss": 0.0618, + "step": 526 + }, + { + "epoch": 1.39, + "learning_rate": 4.36058981233244e-05, + "loss": 0.3043, + "step": 527 + }, + { + "epoch": 1.4, + "learning_rate": 4.359249329758713e-05, + "loss": 0.4693, + "step": 528 + }, + { + "epoch": 1.4, + "learning_rate": 4.3579088471849866e-05, + "loss": 0.2978, + "step": 529 + }, + { + "epoch": 1.4, + "learning_rate": 4.35656836461126e-05, + "loss": 0.0817, + "step": 530 + }, + { + "epoch": 1.4, + "learning_rate": 4.3552278820375336e-05, + "loss": 0.0784, + "step": 531 + }, + { + "epoch": 1.41, + "learning_rate": 4.353887399463807e-05, + "loss": 0.5389, + "step": 532 + }, + { + "epoch": 1.41, + "learning_rate": 4.3525469168900806e-05, + "loss": 0.2337, + "step": 533 + }, + { + "epoch": 1.41, + "learning_rate": 4.351206434316354e-05, + "loss": 0.3189, + "step": 534 + }, + { + "epoch": 1.42, + "learning_rate": 4.3498659517426276e-05, + "loss": 0.2503, + "step": 535 + }, + { + "epoch": 1.42, + "learning_rate": 4.348525469168901e-05, + "loss": 0.333, + "step": 536 + }, + { + "epoch": 1.42, + "learning_rate": 4.347184986595175e-05, + "loss": 0.2311, + "step": 537 + }, + { + "epoch": 1.42, + "learning_rate": 4.345844504021448e-05, + "loss": 0.5196, + "step": 538 + }, + { + "epoch": 1.43, + "learning_rate": 4.344504021447721e-05, + "loss": 0.5466, + "step": 539 + }, + { + "epoch": 1.43, + "learning_rate": 4.343163538873995e-05, + "loss": 0.4296, + "step": 540 + }, + { + "epoch": 1.43, + "learning_rate": 4.341823056300268e-05, + "loss": 0.6559, + "step": 541 + }, + { + "epoch": 1.43, + "learning_rate": 4.340482573726542e-05, + "loss": 0.6134, + "step": 542 + }, + { + "epoch": 1.44, + "learning_rate": 4.339142091152815e-05, + "loss": 0.5614, + "step": 543 + }, + { + "epoch": 1.44, + "learning_rate": 4.3378016085790886e-05, + "loss": 0.3143, + "step": 544 + }, + { + "epoch": 1.44, + "learning_rate": 4.336461126005362e-05, + "loss": 0.564, + "step": 545 + }, + { + "epoch": 1.44, + "learning_rate": 4.3351206434316356e-05, + "loss": 0.2141, + "step": 546 + }, + { + "epoch": 1.45, + "learning_rate": 4.333780160857909e-05, + "loss": 0.5886, + "step": 547 + }, + { + "epoch": 1.45, + "learning_rate": 4.3324396782841826e-05, + "loss": 0.2129, + "step": 548 + }, + { + "epoch": 1.45, + "learning_rate": 4.3310991957104555e-05, + "loss": 0.2491, + "step": 549 + }, + { + "epoch": 1.46, + "learning_rate": 4.32975871313673e-05, + "loss": 0.4023, + "step": 550 + }, + { + "epoch": 1.46, + "learning_rate": 4.3284182305630025e-05, + "loss": 0.2159, + "step": 551 + }, + { + "epoch": 1.46, + "learning_rate": 4.327077747989277e-05, + "loss": 0.2975, + "step": 552 + }, + { + "epoch": 1.46, + "learning_rate": 4.3257372654155495e-05, + "loss": 0.165, + "step": 553 + }, + { + "epoch": 1.47, + "learning_rate": 4.324396782841823e-05, + "loss": 0.063, + "step": 554 + }, + { + "epoch": 1.47, + "learning_rate": 4.3230563002680966e-05, + "loss": 0.4209, + "step": 555 + }, + { + "epoch": 1.47, + "learning_rate": 4.32171581769437e-05, + "loss": 0.2704, + "step": 556 + }, + { + "epoch": 1.47, + "learning_rate": 4.3203753351206436e-05, + "loss": 0.6455, + "step": 557 + }, + { + "epoch": 1.48, + "learning_rate": 4.319034852546917e-05, + "loss": 0.3354, + "step": 558 + }, + { + "epoch": 1.48, + "learning_rate": 4.3176943699731906e-05, + "loss": 0.4069, + "step": 559 + }, + { + "epoch": 1.48, + "learning_rate": 4.316353887399464e-05, + "loss": 0.2334, + "step": 560 + }, + { + "epoch": 1.48, + "learning_rate": 4.3150134048257376e-05, + "loss": 0.4192, + "step": 561 + }, + { + "epoch": 1.49, + "learning_rate": 4.313672922252011e-05, + "loss": 0.2942, + "step": 562 + }, + { + "epoch": 1.49, + "learning_rate": 4.3123324396782847e-05, + "loss": 0.2503, + "step": 563 + }, + { + "epoch": 1.49, + "learning_rate": 4.3109919571045575e-05, + "loss": 0.15, + "step": 564 + }, + { + "epoch": 1.49, + "learning_rate": 4.309651474530832e-05, + "loss": 0.6895, + "step": 565 + }, + { + "epoch": 1.5, + "learning_rate": 4.3083109919571045e-05, + "loss": 0.2462, + "step": 566 + }, + { + "epoch": 1.5, + "learning_rate": 4.306970509383379e-05, + "loss": 0.142, + "step": 567 + }, + { + "epoch": 1.5, + "learning_rate": 4.3056300268096515e-05, + "loss": 0.4838, + "step": 568 + }, + { + "epoch": 1.51, + "learning_rate": 4.304289544235925e-05, + "loss": 0.4714, + "step": 569 + }, + { + "epoch": 1.51, + "learning_rate": 4.3029490616621986e-05, + "loss": 0.4916, + "step": 570 + }, + { + "epoch": 1.51, + "learning_rate": 4.301608579088472e-05, + "loss": 0.2001, + "step": 571 + }, + { + "epoch": 1.51, + "learning_rate": 4.3002680965147456e-05, + "loss": 0.3897, + "step": 572 + }, + { + "epoch": 1.52, + "learning_rate": 4.298927613941019e-05, + "loss": 0.1723, + "step": 573 + }, + { + "epoch": 1.52, + "learning_rate": 4.297587131367292e-05, + "loss": 0.4095, + "step": 574 + }, + { + "epoch": 1.52, + "learning_rate": 4.296246648793566e-05, + "loss": 0.2615, + "step": 575 + }, + { + "epoch": 1.52, + "learning_rate": 4.294906166219839e-05, + "loss": 0.221, + "step": 576 + }, + { + "epoch": 1.53, + "learning_rate": 4.293565683646113e-05, + "loss": 0.1373, + "step": 577 + }, + { + "epoch": 1.53, + "learning_rate": 4.292225201072386e-05, + "loss": 0.313, + "step": 578 + }, + { + "epoch": 1.53, + "learning_rate": 4.2908847184986595e-05, + "loss": 0.4608, + "step": 579 + }, + { + "epoch": 1.53, + "learning_rate": 4.289544235924933e-05, + "loss": 0.2894, + "step": 580 + }, + { + "epoch": 1.54, + "learning_rate": 4.2882037533512065e-05, + "loss": 0.5123, + "step": 581 + }, + { + "epoch": 1.54, + "learning_rate": 4.28686327077748e-05, + "loss": 0.3287, + "step": 582 + }, + { + "epoch": 1.54, + "learning_rate": 4.2855227882037536e-05, + "loss": 0.4634, + "step": 583 + }, + { + "epoch": 1.54, + "learning_rate": 4.284182305630027e-05, + "loss": 0.0753, + "step": 584 + }, + { + "epoch": 1.55, + "learning_rate": 4.2828418230563006e-05, + "loss": 0.5287, + "step": 585 + }, + { + "epoch": 1.55, + "learning_rate": 4.281501340482574e-05, + "loss": 0.5483, + "step": 586 + }, + { + "epoch": 1.55, + "learning_rate": 4.2801608579088476e-05, + "loss": 0.1076, + "step": 587 + }, + { + "epoch": 1.56, + "learning_rate": 4.278820375335121e-05, + "loss": 0.232, + "step": 588 + }, + { + "epoch": 1.56, + "learning_rate": 4.277479892761394e-05, + "loss": 0.4706, + "step": 589 + }, + { + "epoch": 1.56, + "learning_rate": 4.276139410187668e-05, + "loss": 0.5791, + "step": 590 + }, + { + "epoch": 1.56, + "learning_rate": 4.274798927613941e-05, + "loss": 0.4995, + "step": 591 + }, + { + "epoch": 1.57, + "learning_rate": 4.273458445040215e-05, + "loss": 0.3419, + "step": 592 + }, + { + "epoch": 1.57, + "learning_rate": 4.272117962466488e-05, + "loss": 0.2728, + "step": 593 + }, + { + "epoch": 1.57, + "learning_rate": 4.2707774798927615e-05, + "loss": 0.3598, + "step": 594 + }, + { + "epoch": 1.57, + "learning_rate": 4.269436997319035e-05, + "loss": 0.2669, + "step": 595 + }, + { + "epoch": 1.58, + "learning_rate": 4.2680965147453086e-05, + "loss": 0.4091, + "step": 596 + }, + { + "epoch": 1.58, + "learning_rate": 4.266756032171582e-05, + "loss": 0.7653, + "step": 597 + }, + { + "epoch": 1.58, + "learning_rate": 4.2654155495978556e-05, + "loss": 0.4323, + "step": 598 + }, + { + "epoch": 1.58, + "learning_rate": 4.2640750670241284e-05, + "loss": 0.2147, + "step": 599 + }, + { + "epoch": 1.59, + "learning_rate": 4.2627345844504026e-05, + "loss": 0.2229, + "step": 600 + }, + { + "epoch": 1.59, + "learning_rate": 4.2613941018766754e-05, + "loss": 0.1477, + "step": 601 + }, + { + "epoch": 1.59, + "learning_rate": 4.2600536193029496e-05, + "loss": 0.4813, + "step": 602 + }, + { + "epoch": 1.6, + "learning_rate": 4.2587131367292225e-05, + "loss": 0.2719, + "step": 603 + }, + { + "epoch": 1.6, + "learning_rate": 4.257372654155496e-05, + "loss": 0.1577, + "step": 604 + }, + { + "epoch": 1.6, + "learning_rate": 4.2560321715817695e-05, + "loss": 0.5718, + "step": 605 + }, + { + "epoch": 1.6, + "learning_rate": 4.254691689008043e-05, + "loss": 0.1481, + "step": 606 + }, + { + "epoch": 1.61, + "learning_rate": 4.2533512064343165e-05, + "loss": 0.3548, + "step": 607 + }, + { + "epoch": 1.61, + "learning_rate": 4.25201072386059e-05, + "loss": 0.1907, + "step": 608 + }, + { + "epoch": 1.61, + "learning_rate": 4.250670241286863e-05, + "loss": 0.4215, + "step": 609 + }, + { + "epoch": 1.61, + "learning_rate": 4.249329758713137e-05, + "loss": 0.0681, + "step": 610 + }, + { + "epoch": 1.62, + "learning_rate": 4.24798927613941e-05, + "loss": 0.514, + "step": 611 + }, + { + "epoch": 1.62, + "learning_rate": 4.246648793565684e-05, + "loss": 0.1242, + "step": 612 + }, + { + "epoch": 1.62, + "learning_rate": 4.245308310991957e-05, + "loss": 0.217, + "step": 613 + }, + { + "epoch": 1.62, + "learning_rate": 4.243967828418231e-05, + "loss": 0.1641, + "step": 614 + }, + { + "epoch": 1.63, + "learning_rate": 4.242627345844504e-05, + "loss": 0.7095, + "step": 615 + }, + { + "epoch": 1.63, + "learning_rate": 4.2412868632707775e-05, + "loss": 0.359, + "step": 616 + }, + { + "epoch": 1.63, + "learning_rate": 4.239946380697051e-05, + "loss": 0.94, + "step": 617 + }, + { + "epoch": 1.63, + "learning_rate": 4.2386058981233245e-05, + "loss": 0.5194, + "step": 618 + }, + { + "epoch": 1.64, + "learning_rate": 4.237265415549598e-05, + "loss": 0.3398, + "step": 619 + }, + { + "epoch": 1.64, + "learning_rate": 4.2359249329758715e-05, + "loss": 0.2305, + "step": 620 + }, + { + "epoch": 1.64, + "learning_rate": 4.234584450402145e-05, + "loss": 0.6045, + "step": 621 + }, + { + "epoch": 1.65, + "learning_rate": 4.2332439678284185e-05, + "loss": 0.3196, + "step": 622 + }, + { + "epoch": 1.65, + "learning_rate": 4.231903485254692e-05, + "loss": 0.0753, + "step": 623 + }, + { + "epoch": 1.65, + "learning_rate": 4.2305630026809656e-05, + "loss": 0.2732, + "step": 624 + }, + { + "epoch": 1.65, + "learning_rate": 4.229222520107239e-05, + "loss": 0.4372, + "step": 625 + }, + { + "epoch": 1.66, + "learning_rate": 4.227882037533512e-05, + "loss": 0.1299, + "step": 626 + }, + { + "epoch": 1.66, + "learning_rate": 4.226541554959786e-05, + "loss": 0.2928, + "step": 627 + }, + { + "epoch": 1.66, + "learning_rate": 4.225201072386059e-05, + "loss": 0.2028, + "step": 628 + }, + { + "epoch": 1.66, + "learning_rate": 4.223860589812333e-05, + "loss": 0.2725, + "step": 629 + }, + { + "epoch": 1.67, + "learning_rate": 4.222520107238606e-05, + "loss": 0.0851, + "step": 630 + }, + { + "epoch": 1.67, + "learning_rate": 4.2211796246648795e-05, + "loss": 0.1471, + "step": 631 + }, + { + "epoch": 1.67, + "learning_rate": 4.219839142091153e-05, + "loss": 0.1986, + "step": 632 + }, + { + "epoch": 1.67, + "learning_rate": 4.2184986595174265e-05, + "loss": 0.5156, + "step": 633 + }, + { + "epoch": 1.68, + "learning_rate": 4.2171581769437e-05, + "loss": 0.5286, + "step": 634 + }, + { + "epoch": 1.68, + "learning_rate": 4.2158176943699735e-05, + "loss": 0.3635, + "step": 635 + }, + { + "epoch": 1.68, + "learning_rate": 4.2144772117962464e-05, + "loss": 0.1407, + "step": 636 + }, + { + "epoch": 1.69, + "learning_rate": 4.2131367292225206e-05, + "loss": 0.1042, + "step": 637 + }, + { + "epoch": 1.69, + "learning_rate": 4.2117962466487934e-05, + "loss": 0.1553, + "step": 638 + }, + { + "epoch": 1.69, + "learning_rate": 4.2104557640750676e-05, + "loss": 0.1665, + "step": 639 + }, + { + "epoch": 1.69, + "learning_rate": 4.2091152815013404e-05, + "loss": 0.3706, + "step": 640 + }, + { + "epoch": 1.7, + "learning_rate": 4.207774798927614e-05, + "loss": 0.6195, + "step": 641 + }, + { + "epoch": 1.7, + "learning_rate": 4.2064343163538874e-05, + "loss": 0.1341, + "step": 642 + }, + { + "epoch": 1.7, + "learning_rate": 4.205093833780161e-05, + "loss": 0.5384, + "step": 643 + }, + { + "epoch": 1.7, + "learning_rate": 4.2037533512064345e-05, + "loss": 0.2802, + "step": 644 + }, + { + "epoch": 1.71, + "learning_rate": 4.202412868632708e-05, + "loss": 0.3812, + "step": 645 + }, + { + "epoch": 1.71, + "learning_rate": 4.2010723860589815e-05, + "loss": 0.2433, + "step": 646 + }, + { + "epoch": 1.71, + "learning_rate": 4.199731903485255e-05, + "loss": 0.0642, + "step": 647 + }, + { + "epoch": 1.71, + "learning_rate": 4.1983914209115285e-05, + "loss": 0.0547, + "step": 648 + }, + { + "epoch": 1.72, + "learning_rate": 4.197050938337802e-05, + "loss": 0.4388, + "step": 649 + }, + { + "epoch": 1.72, + "learning_rate": 4.1957104557640756e-05, + "loss": 0.8228, + "step": 650 + }, + { + "epoch": 1.72, + "learning_rate": 4.1943699731903484e-05, + "loss": 0.6453, + "step": 651 + }, + { + "epoch": 1.72, + "learning_rate": 4.1930294906166226e-05, + "loss": 0.3367, + "step": 652 + }, + { + "epoch": 1.73, + "learning_rate": 4.1916890080428954e-05, + "loss": 0.2139, + "step": 653 + }, + { + "epoch": 1.73, + "learning_rate": 4.1903485254691696e-05, + "loss": 0.2144, + "step": 654 + }, + { + "epoch": 1.73, + "learning_rate": 4.1890080428954424e-05, + "loss": 0.3894, + "step": 655 + }, + { + "epoch": 1.74, + "learning_rate": 4.187667560321716e-05, + "loss": 0.0891, + "step": 656 + }, + { + "epoch": 1.74, + "learning_rate": 4.1863270777479895e-05, + "loss": 0.1287, + "step": 657 + }, + { + "epoch": 1.74, + "learning_rate": 4.184986595174263e-05, + "loss": 0.2826, + "step": 658 + }, + { + "epoch": 1.74, + "learning_rate": 4.1836461126005365e-05, + "loss": 0.474, + "step": 659 + }, + { + "epoch": 1.75, + "learning_rate": 4.18230563002681e-05, + "loss": 0.4228, + "step": 660 + }, + { + "epoch": 1.75, + "learning_rate": 4.180965147453083e-05, + "loss": 0.4952, + "step": 661 + }, + { + "epoch": 1.75, + "learning_rate": 4.179624664879357e-05, + "loss": 0.173, + "step": 662 + }, + { + "epoch": 1.75, + "learning_rate": 4.17828418230563e-05, + "loss": 0.363, + "step": 663 + }, + { + "epoch": 1.76, + "learning_rate": 4.176943699731904e-05, + "loss": 0.4404, + "step": 664 + }, + { + "epoch": 1.76, + "learning_rate": 4.175603217158177e-05, + "loss": 0.486, + "step": 665 + }, + { + "epoch": 1.76, + "learning_rate": 4.1742627345844504e-05, + "loss": 0.4463, + "step": 666 + }, + { + "epoch": 1.76, + "learning_rate": 4.172922252010724e-05, + "loss": 0.2409, + "step": 667 + }, + { + "epoch": 1.77, + "learning_rate": 4.1715817694369974e-05, + "loss": 0.5291, + "step": 668 + }, + { + "epoch": 1.77, + "learning_rate": 4.170241286863271e-05, + "loss": 0.069, + "step": 669 + }, + { + "epoch": 1.77, + "learning_rate": 4.1689008042895445e-05, + "loss": 0.4162, + "step": 670 + }, + { + "epoch": 1.78, + "learning_rate": 4.167560321715818e-05, + "loss": 0.6171, + "step": 671 + }, + { + "epoch": 1.78, + "learning_rate": 4.1662198391420915e-05, + "loss": 0.3097, + "step": 672 + }, + { + "epoch": 1.78, + "learning_rate": 4.164879356568365e-05, + "loss": 0.5109, + "step": 673 + }, + { + "epoch": 1.78, + "learning_rate": 4.1635388739946385e-05, + "loss": 0.2169, + "step": 674 + }, + { + "epoch": 1.79, + "learning_rate": 4.162198391420912e-05, + "loss": 0.2406, + "step": 675 + }, + { + "epoch": 1.79, + "learning_rate": 4.160857908847185e-05, + "loss": 0.1853, + "step": 676 + }, + { + "epoch": 1.79, + "learning_rate": 4.159517426273459e-05, + "loss": 0.5743, + "step": 677 + }, + { + "epoch": 1.79, + "learning_rate": 4.158176943699732e-05, + "loss": 0.5432, + "step": 678 + }, + { + "epoch": 1.8, + "learning_rate": 4.156836461126006e-05, + "loss": 0.2033, + "step": 679 + }, + { + "epoch": 1.8, + "learning_rate": 4.155495978552279e-05, + "loss": 0.3848, + "step": 680 + }, + { + "epoch": 1.8, + "learning_rate": 4.1541554959785524e-05, + "loss": 0.1721, + "step": 681 + }, + { + "epoch": 1.8, + "learning_rate": 4.152815013404826e-05, + "loss": 0.3793, + "step": 682 + }, + { + "epoch": 1.81, + "learning_rate": 4.1514745308310994e-05, + "loss": 0.3848, + "step": 683 + }, + { + "epoch": 1.81, + "learning_rate": 4.150134048257373e-05, + "loss": 0.186, + "step": 684 + }, + { + "epoch": 1.81, + "learning_rate": 4.1487935656836465e-05, + "loss": 0.2692, + "step": 685 + }, + { + "epoch": 1.81, + "learning_rate": 4.147453083109919e-05, + "loss": 0.3839, + "step": 686 + }, + { + "epoch": 1.82, + "learning_rate": 4.1461126005361935e-05, + "loss": 0.3037, + "step": 687 + }, + { + "epoch": 1.82, + "learning_rate": 4.144772117962466e-05, + "loss": 0.6401, + "step": 688 + }, + { + "epoch": 1.82, + "learning_rate": 4.1434316353887405e-05, + "loss": 0.1173, + "step": 689 + }, + { + "epoch": 1.83, + "learning_rate": 4.1420911528150134e-05, + "loss": 0.3217, + "step": 690 + }, + { + "epoch": 1.83, + "learning_rate": 4.140750670241287e-05, + "loss": 0.2358, + "step": 691 + }, + { + "epoch": 1.83, + "learning_rate": 4.1394101876675604e-05, + "loss": 0.7696, + "step": 692 + }, + { + "epoch": 1.83, + "learning_rate": 4.138069705093834e-05, + "loss": 0.2288, + "step": 693 + }, + { + "epoch": 1.84, + "learning_rate": 4.1367292225201074e-05, + "loss": 0.2575, + "step": 694 + }, + { + "epoch": 1.84, + "learning_rate": 4.135388739946381e-05, + "loss": 0.1201, + "step": 695 + }, + { + "epoch": 1.84, + "learning_rate": 4.1340482573726544e-05, + "loss": 0.2034, + "step": 696 + }, + { + "epoch": 1.84, + "learning_rate": 4.132707774798928e-05, + "loss": 0.1142, + "step": 697 + }, + { + "epoch": 1.85, + "learning_rate": 4.1313672922252015e-05, + "loss": 0.5671, + "step": 698 + }, + { + "epoch": 1.85, + "learning_rate": 4.130026809651475e-05, + "loss": 0.3132, + "step": 699 + }, + { + "epoch": 1.85, + "learning_rate": 4.1286863270777485e-05, + "loss": 0.4266, + "step": 700 + }, + { + "epoch": 1.85, + "learning_rate": 4.127345844504021e-05, + "loss": 0.1354, + "step": 701 + }, + { + "epoch": 1.86, + "learning_rate": 4.1260053619302955e-05, + "loss": 0.2867, + "step": 702 + }, + { + "epoch": 1.86, + "learning_rate": 4.1246648793565684e-05, + "loss": 0.1839, + "step": 703 + }, + { + "epoch": 1.86, + "learning_rate": 4.1233243967828425e-05, + "loss": 0.4741, + "step": 704 + }, + { + "epoch": 1.87, + "learning_rate": 4.1219839142091154e-05, + "loss": 0.2909, + "step": 705 + }, + { + "epoch": 1.87, + "learning_rate": 4.120643431635389e-05, + "loss": 0.2705, + "step": 706 + }, + { + "epoch": 1.87, + "learning_rate": 4.1193029490616624e-05, + "loss": 0.1354, + "step": 707 + }, + { + "epoch": 1.87, + "learning_rate": 4.117962466487936e-05, + "loss": 0.4801, + "step": 708 + }, + { + "epoch": 1.88, + "learning_rate": 4.1166219839142094e-05, + "loss": 0.189, + "step": 709 + }, + { + "epoch": 1.88, + "learning_rate": 4.115281501340483e-05, + "loss": 0.3204, + "step": 710 + }, + { + "epoch": 1.88, + "learning_rate": 4.113941018766756e-05, + "loss": 0.4358, + "step": 711 + }, + { + "epoch": 1.88, + "learning_rate": 4.11260053619303e-05, + "loss": 0.9474, + "step": 712 + }, + { + "epoch": 1.89, + "learning_rate": 4.111260053619303e-05, + "loss": 0.2102, + "step": 713 + }, + { + "epoch": 1.89, + "learning_rate": 4.109919571045577e-05, + "loss": 0.3927, + "step": 714 + }, + { + "epoch": 1.89, + "learning_rate": 4.10857908847185e-05, + "loss": 0.139, + "step": 715 + }, + { + "epoch": 1.89, + "learning_rate": 4.1072386058981233e-05, + "loss": 0.3575, + "step": 716 + }, + { + "epoch": 1.9, + "learning_rate": 4.105898123324397e-05, + "loss": 0.7534, + "step": 717 + }, + { + "epoch": 1.9, + "learning_rate": 4.1045576407506704e-05, + "loss": 0.1134, + "step": 718 + }, + { + "epoch": 1.9, + "learning_rate": 4.103217158176944e-05, + "loss": 0.2136, + "step": 719 + }, + { + "epoch": 1.9, + "learning_rate": 4.1018766756032174e-05, + "loss": 0.4344, + "step": 720 + }, + { + "epoch": 1.91, + "learning_rate": 4.10053619302949e-05, + "loss": 0.0695, + "step": 721 + }, + { + "epoch": 1.91, + "learning_rate": 4.0991957104557644e-05, + "loss": 0.2286, + "step": 722 + }, + { + "epoch": 1.91, + "learning_rate": 4.097855227882037e-05, + "loss": 0.1189, + "step": 723 + }, + { + "epoch": 1.92, + "learning_rate": 4.0965147453083115e-05, + "loss": 0.2882, + "step": 724 + }, + { + "epoch": 1.92, + "learning_rate": 4.095174262734584e-05, + "loss": 0.2623, + "step": 725 + }, + { + "epoch": 1.92, + "learning_rate": 4.093833780160858e-05, + "loss": 0.2473, + "step": 726 + }, + { + "epoch": 1.92, + "learning_rate": 4.092493297587131e-05, + "loss": 0.4846, + "step": 727 + }, + { + "epoch": 1.93, + "learning_rate": 4.091152815013405e-05, + "loss": 0.1689, + "step": 728 + }, + { + "epoch": 1.93, + "learning_rate": 4.0898123324396783e-05, + "loss": 0.3481, + "step": 729 + }, + { + "epoch": 1.93, + "learning_rate": 4.088471849865952e-05, + "loss": 0.3447, + "step": 730 + }, + { + "epoch": 1.93, + "learning_rate": 4.0871313672922254e-05, + "loss": 0.2959, + "step": 731 + }, + { + "epoch": 1.94, + "learning_rate": 4.085790884718499e-05, + "loss": 0.3387, + "step": 732 + }, + { + "epoch": 1.94, + "learning_rate": 4.0844504021447724e-05, + "loss": 0.3742, + "step": 733 + }, + { + "epoch": 1.94, + "learning_rate": 4.083109919571046e-05, + "loss": 0.3245, + "step": 734 + }, + { + "epoch": 1.94, + "learning_rate": 4.0817694369973194e-05, + "loss": 0.4891, + "step": 735 + }, + { + "epoch": 1.95, + "learning_rate": 4.080428954423593e-05, + "loss": 0.1444, + "step": 736 + }, + { + "epoch": 1.95, + "learning_rate": 4.0790884718498664e-05, + "loss": 0.3678, + "step": 737 + }, + { + "epoch": 1.95, + "learning_rate": 4.077747989276139e-05, + "loss": 0.3772, + "step": 738 + }, + { + "epoch": 1.96, + "learning_rate": 4.0764075067024135e-05, + "loss": 0.43, + "step": 739 + }, + { + "epoch": 1.96, + "learning_rate": 4.075067024128686e-05, + "loss": 0.2463, + "step": 740 + }, + { + "epoch": 1.96, + "learning_rate": 4.0737265415549605e-05, + "loss": 0.2277, + "step": 741 + }, + { + "epoch": 1.96, + "learning_rate": 4.072386058981233e-05, + "loss": 0.2153, + "step": 742 + }, + { + "epoch": 1.97, + "learning_rate": 4.071045576407507e-05, + "loss": 0.1052, + "step": 743 + }, + { + "epoch": 1.97, + "learning_rate": 4.0697050938337804e-05, + "loss": 0.5657, + "step": 744 + }, + { + "epoch": 1.97, + "learning_rate": 4.068364611260054e-05, + "loss": 0.2664, + "step": 745 + }, + { + "epoch": 1.97, + "learning_rate": 4.0670241286863274e-05, + "loss": 0.1369, + "step": 746 + }, + { + "epoch": 1.98, + "learning_rate": 4.065683646112601e-05, + "loss": 0.4972, + "step": 747 + }, + { + "epoch": 1.98, + "learning_rate": 4.064343163538874e-05, + "loss": 0.4131, + "step": 748 + }, + { + "epoch": 1.98, + "learning_rate": 4.063002680965148e-05, + "loss": 0.3824, + "step": 749 + }, + { + "epoch": 1.98, + "learning_rate": 4.061662198391421e-05, + "loss": 0.323, + "step": 750 + }, + { + "epoch": 1.99, + "learning_rate": 4.060321715817695e-05, + "loss": 0.3698, + "step": 751 + }, + { + "epoch": 1.99, + "learning_rate": 4.058981233243968e-05, + "loss": 0.1708, + "step": 752 + }, + { + "epoch": 1.99, + "learning_rate": 4.057640750670241e-05, + "loss": 0.2941, + "step": 753 + }, + { + "epoch": 1.99, + "learning_rate": 4.056300268096515e-05, + "loss": 0.3224, + "step": 754 + }, + { + "epoch": 2.0, + "learning_rate": 4.054959785522788e-05, + "loss": 0.0851, + "step": 755 + }, + { + "epoch": 2.0, + "learning_rate": 4.053619302949062e-05, + "loss": 0.4694, + "step": 756 + }, + { + "epoch": 2.0, + "eval_f1": 0.7882736156351792, + "eval_loss": 0.4484867751598358, + "eval_runtime": 1.8734, + "eval_samples_per_second": 807.638, + "eval_steps_per_second": 50.711, + "step": 756 + } + ], + "max_steps": 3780, + "num_train_epochs": 10, + "total_flos": 194391767921472.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-756/training_args.bin b/checkpoint-756/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e04ed002938f760694506615e2c2b7be439a9c1 --- /dev/null +++ b/checkpoint-756/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c01d8e107b4a20c0ba0f3692dae4e25d8f1dffe1d23d6e4f4bdf92b87ab5ea +size 3899 diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..364156e83c34ba8c6fcc66e875a05b1d1a9b4821 --- /dev/null +++ b/config.json @@ -0,0 +1,30 @@ +{ + "_name_or_path": "distilbert-base-cased", + "activation": "gelu", + "architectures": [ + "DistilBertForSequenceClassification" + ], + "attention_dropout": 0.1, + "dim": 768, + "dropout": 0.1, + "hidden_dim": 3072, + "id2label": { + "0": "NO DISASTER", + "1": "DISASTER" + }, + "initializer_range": 0.02, + "max_position_embeddings": 512, + "model_type": "distilbert", + "n_heads": 12, + "n_layers": 6, + "output_past": true, + "pad_token_id": 0, + "problem_type": "single_label_classification", + "qa_dropout": 0.1, + "seq_classif_dropout": 0.2, + "sinusoidal_pos_embds": false, + "tie_weights_": true, + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "vocab_size": 28996 +} diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..71ba10a609aae9899f7320e9ba7dbd89017b9e2d --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4189b8dfbd6d144d0ea9419afb0d3b4ae2d988dad19ef3e96026cfb2171324d6 +size 263167661 diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e04ed002938f760694506615e2c2b7be439a9c1 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c01d8e107b4a20c0ba0f3692dae4e25d8f1dffe1d23d6e4f4bdf92b87ab5ea +size 3899 diff --git a/vocab.txt b/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..2ea941cc79a6f3d7985ca6991ef4f67dad62af04 --- /dev/null +++ b/vocab.txt @@ -0,0 +1,28996 @@ +[PAD] +[unused1] +[unused2] +[unused3] +[unused4] +[unused5] +[unused6] +[unused7] +[unused8] +[unused9] +[unused10] +[unused11] +[unused12] +[unused13] +[unused14] +[unused15] +[unused16] +[unused17] +[unused18] +[unused19] +[unused20] +[unused21] +[unused22] +[unused23] +[unused24] +[unused25] +[unused26] +[unused27] +[unused28] +[unused29] +[unused30] +[unused31] +[unused32] +[unused33] +[unused34] +[unused35] +[unused36] +[unused37] +[unused38] +[unused39] +[unused40] +[unused41] +[unused42] +[unused43] +[unused44] +[unused45] +[unused46] +[unused47] +[unused48] +[unused49] +[unused50] +[unused51] +[unused52] +[unused53] +[unused54] +[unused55] +[unused56] +[unused57] +[unused58] +[unused59] +[unused60] +[unused61] +[unused62] +[unused63] +[unused64] +[unused65] +[unused66] +[unused67] +[unused68] +[unused69] +[unused70] +[unused71] +[unused72] +[unused73] +[unused74] +[unused75] +[unused76] +[unused77] +[unused78] +[unused79] +[unused80] +[unused81] +[unused82] +[unused83] +[unused84] +[unused85] +[unused86] +[unused87] +[unused88] +[unused89] +[unused90] +[unused91] +[unused92] +[unused93] +[unused94] +[unused95] +[unused96] +[unused97] +[unused98] +[unused99] +[UNK] +[CLS] +[SEP] +[MASK] +[unused100] +[unused101] +! +" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; +< += +> +? +@ +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +X +Y +Z +[ +\ +] +^ +_ +` +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +{ +| +} +~ +¡ +¢ +£ +¥ +§ +¨ +© +ª +« +¬ +® +° +± +² +³ +´ +µ +¶ +· +¹ +º +» +¼ +½ +¾ +¿ +À +Á + +Ä +Å +Æ +Ç +È +É +Í +Î +Ñ +Ó +Ö +× +Ø +Ú +Ü +Þ +ß +à +á +â +ã +ä +å +æ +ç +è +é +ê +ë +ì +í +î +ï +ð +ñ +ò +ó +ô +õ +ö +÷ +ø +ù +ú +û +ü +ý +þ +ÿ +Ā +ā +ă +ą +Ć +ć +Č +č +ď +Đ +đ +ē +ė +ę +ě +ğ +ġ +Ħ +ħ +ĩ +Ī +ī +İ +ı +ļ +Ľ +ľ +Ł +ł +ń +ņ +ň +ŋ +Ō +ō +ŏ +ő +Œ +œ +ř +Ś +ś +Ş +ş +Š +š +Ţ +ţ +ť +ũ +ū +ŭ +ů +ű +ų +ŵ +ŷ +ź +Ż +ż +Ž +ž +Ə +ƒ +ơ +ư +ǎ +ǐ +ǒ +ǔ +ǫ +Ș +ș +Ț +ț +ɐ +ɑ +ɔ +ɕ +ə +ɛ +ɡ +ɣ +ɨ +ɪ +ɲ +ɾ +ʀ +ʁ +ʂ +ʃ +ʊ +ʋ +ʌ +ʐ +ʑ +ʒ +ʔ +ʰ +ʲ +ʳ +ʷ +ʻ +ʼ +ʾ +ʿ +ˈ +ː +ˡ +ˢ +ˣ +́ +̃ +̍ +̯ +͡ +Α +Β +Γ +Δ +Ε +Η +Θ +Ι +Κ +Λ +Μ +Ν +Ο +Π +Σ +Τ +Φ +Χ +Ψ +Ω +ά +έ +ή +ί +α +β +γ +δ +ε +ζ +η +θ +ι +κ +λ +μ +ν +ξ +ο +π +ρ +ς +σ +τ +υ +φ +χ +ψ +ω +ό +ύ +ώ +І +Ј +А +Б +В +Г +Д +Е +Ж +З +И +К +Л +М +Н +О +П +Р +С +Т +У +Ф +Х +Ц +Ч +Ш +Э +Ю +Я +а +б +в +г +д +е +ж +з +и +й +к +л +м +н +о +п +р +с +т +у +ф +х +ц +ч +ш +щ +ъ +ы +ь +э +ю +я +ё +і +ї +ј +њ +ћ +Ա +Հ +ա +ե +ի +կ +մ +յ +ն +ո +ս +տ +ր +ւ +ְ +ִ +ֵ +ֶ +ַ +ָ +ֹ +ּ +א +ב +ג +ד +ה +ו +ז +ח +ט +י +כ +ל +ם +מ +ן +נ +ס +ע +פ +צ +ק +ר +ש +ת +، +ء +آ +أ +إ +ئ +ا +ب +ة +ت +ث +ج +ح +خ +د +ذ +ر +ز +س +ش +ص +ض +ط +ظ +ع +غ +ف +ق +ك +ل +م +ن +ه +و +ى +ي +َ +ِ +ٹ +پ +چ +ک +گ +ہ +ی +ے +ं +आ +क +ग +च +ज +ण +त +द +ध +न +प +ब +भ +म +य +र +ल +व +श +ष +स +ह +ा +ि +ी +ु +े +ो +् +। +॥ +আ +ই +এ +ও +ক +খ +গ +চ +ছ +জ +ট +ত +থ +দ +ধ +ন +প +ব +ম +য +র +ল +শ +স +হ +় +া +ি +ী +ু +ে +ো +্ +য় +க +த +ப +ம +ய +ர +ல +வ +ா +ி +ு +் +ร +་ +ག +ང +ད +ན +བ +མ +ར +ལ +ས +ི +ུ +ེ +ོ +ა +ე +ი +ლ +ნ +ო +რ +ს +ᴬ +ᴵ +ᵀ +ᵃ +ᵇ +ᵈ +ᵉ +ᵍ +ᵏ +ᵐ +ᵒ +ᵖ +ᵗ +ᵘ +ᵢ +ᵣ +ᵤ +ᵥ +ᶜ +ᶠ +ḍ +Ḥ +ḥ +Ḩ +ḩ +ḳ +ṃ +ṅ +ṇ +ṛ +ṣ +ṭ +ạ +ả +ấ +ầ +ẩ +ậ +ắ +ế +ề +ể +ễ +ệ +ị +ọ +ố +ồ +ổ +ộ +ớ +ờ +ợ +ụ +ủ +ứ +ừ +ử +ữ +ự +ỳ +ỹ +ἀ +ἐ +ὁ +ὐ +ὰ +ὶ +ὸ +ῆ +ῖ +ῦ +ῶ +‐ +‑ +‒ +– +— +― +‖ +‘ +’ +‚ +“ +” +„ +† +‡ +• +… +‰ +′ +″ +⁄ +⁰ +ⁱ +⁴ +⁵ +⁶ +⁷ +⁸ +⁹ +⁺ +⁻ +ⁿ +₀ +₁ +₂ +₃ +₄ +₅ +₆ +₇ +₈ +₉ +₊ +₍ +₎ +ₐ +ₑ +ₒ +ₓ +ₕ +ₖ +ₘ +ₙ +ₚ +ₛ +ₜ +₤ +€ +₱ +₹ +ℓ +№ +ℝ +⅓ +← +↑ +→ +↔ +⇌ +⇒ +∂ +∈ +− +∗ +∘ +√ +∞ +∧ +∨ +∩ +∪ +≈ +≠ +≡ +≤ +≥ +⊂ +⊆ +⊕ +⋅ +─ +│ +■ +● +★ +☆ +☉ +♠ +♣ +♥ +♦ +♭ +♯ +⟨ +⟩ +ⱼ +、 +。 +《 +》 +「 +」 +『 +』 +〜 +い +う +え +お +か +き +く +け +こ +さ +し +す +せ +そ +た +ち +つ +て +と +な +に +の +は +ひ +ま +み +む +め +も +や +ゆ +よ +ら +り +る +れ +ん +ア +ィ +イ +ウ +エ +オ +カ +ガ +キ +ク +グ +コ +サ +シ +ジ +ス +ズ +タ +ダ +ッ +テ +デ +ト +ド +ナ +ニ +ハ +バ +パ +フ +ブ +プ +マ +ミ +ム +ャ +ュ +ラ +リ +ル +レ +ロ +ン +・ +ー +一 +三 +上 +下 +中 +事 +二 +井 +京 +人 +亻 +仁 +佐 +侍 +光 +公 +力 +北 +十 +南 +原 +口 +史 +司 +吉 +同 +和 +囗 +国 +國 +土 +城 +士 +大 +天 +太 +夫 +女 +子 +宀 +安 +宮 +宿 +小 +尚 +山 +島 +川 +州 +平 +年 +心 +愛 +戸 +文 +新 +方 +日 +明 +星 +書 +月 +木 +本 +李 +村 +東 +松 +林 +正 +武 +氏 +水 +氵 +江 +河 +海 +版 +犬 +王 +生 +田 +白 +皇 +省 +真 +石 +社 +神 +竹 +美 +義 +花 +藤 +西 +谷 +車 +辶 +道 +郎 +郡 +部 +野 +金 +長 +門 +陽 +青 +食 +馬 +高 +龍 +龸 +사 +씨 +의 +이 +한 +fi +fl +! +( +) +, +- +/ +: +the +of +and +to +in +was +The +is +for +as +on +with +that +##s +his +by +he +at +from +it +her +He +had +an +were +you +be +In +she +are +but +which +It +not +or +have +my +him +one +this +me +has +also +up +their +first +out +who +been +they +She +into +all +would +its +##ing +time +two +##a +##e +said +about +when +over +more +other +can +after +back +them +then +##ed +there +like +so +only +##n +could +##d +##i +##y +what +no +##o +where +This +made +than +if +You +##ly +through +we +before +##r +just +some +##er +years +do +New +##t +down +between +new +now +will +three +most +On +around +year +used +such +being +well +during +They +know +against +under +later +did +part +known +off +while +His +re +... +##l +people +until +way +American +didn +University +your +both +many +get +United +became +head +There +second +As +work +any +But +still +again +born +even +eyes +After +including +de +took +And +long +team +season +family +see +right +same +called +name +because +film +don +10 +found +much +school +##es +going +won +place +away +We +day +left +John +000 +hand +since +World +these +how +make +number +each +life +area +man +four +go +No +here +very +National +##m +played +released +never +began +States +album +home +last +too +held +several +May +own +##on +take +end +School +##h +ll +series +What +want +use +another +city +When +2010 +side +At +may +That +came +face +June +think +game +those +high +March +early +September +##al +2011 +looked +July +state +small +thought +went +January +October +##u +based +August +##us +world +good +April +York +us +12 +2012 +2008 +For +2009 +group +along +few +South +little +##k +following +November +something +2013 +December +set +2007 +old +2006 +2014 +located +##an +music +County +City +former +##in +room +ve +next +All +##man +got +father +house +##g +body +15 +20 +18 +started +If +2015 +town +our +line +War +large +population +named +British +company +member +five +My +single +##en +age +State +moved +February +11 +Her +should +century +government +built +come +best +show +However +within +look +men +door +without +need +wasn +2016 +water +One +system +knew +every +died +League +turned +asked +North +St +wanted +building +received +song +served +though +felt +##ia +station +band +##ers +local +public +himself +different +death +say +##1 +30 +##2 +2005 +16 +night +behind +children +English +members +near +saw +together +son +14 +voice +village +13 +hands +help +##3 +due +French +London +top +told +open +published +third +2017 +play +across +During +put +final +often +include +25 +##le +main +having +2004 +once +ever +let +book +led +gave +late +front +find +club +##4 +German +included +species +College +form +opened +mother +women +enough +West +must +2000 +power +really +17 +making +half +##6 +order +might +##is +given +million +times +days +point +full +service +With +km +major +##7 +original +become +seen +II +north +six +##te +love +##0 +national +International +##5 +24 +So +District +lost +run +couldn +career +always +##9 +2003 +##th +country +##z +House +air +tell +south +worked +woman +player +##A +almost +war +River +##ic +married +continued +Then +James +close +black +short +##8 +##na +using +history +returned +light +car +##ra +sure +William +things +General +##ry +2002 +better +support +100 +among +From +feet +King +anything +21 +19 +established +district +2001 +feel +great +##ton +level +Cup +These +written +games +others +already +title +story +##p +law +thing +US +record +role +however +By +students +England +white +control +least +inside +land +##C +22 +give +community +hard +##ie +non +##c +produced +George +round +period +Park +business +various +##ne +does +present +wife +far +taken +per +reached +David +able +version +working +young +live +created +joined +East +living +appeared +case +High +done +23 +important +President +Award +France +position +office +looking +total +general +class +To +production +##S +football +party +brother +keep +mind +free +Street +hair +announced +development +either +nothing +moment +Church +followed +wrote +why +India +San +election +1999 +lead +How +##ch +##rs +words +European +course +considered +America +arms +Army +political +##la +28 +26 +west +east +ground +further +church +less +site +First +Not +Australia +toward +California +##ness +described +works +An +Council +heart +past +military +27 +##or +heard +field +human +soon +founded +1998 +playing +trying +##x +##ist +##ta +television +mouth +although +taking +win +fire +Division +##ity +Party +Royal +program +Some +Don +Association +According +tried +TV +Paul +outside +daughter +Best +While +someone +match +recorded +Canada +closed +region +Air +above +months +elected +##da +##ian +road +##ar +brought +move +1997 +leave +##um +Thomas +1996 +am +low +Robert +formed +person +services +points +Mr +miles +##b +stop +rest +doing +needed +international +release +floor +start +sound +call +killed +real +dark +research +finished +language +Michael +professional +change +sent +50 +upon +29 +track +hit +event +2018 +term +example +Germany +similar +return +##ism +fact +pulled +stood +says +ran +information +yet +result +developed +girl +##re +God +1995 +areas +signed +decided +##ment +Company +seemed +##el +co +turn +race +common +video +Charles +Indian +##ation +blood +art +red +##able +added +rather +1994 +met +director +addition +design +average +minutes +##ies +##ted +available +bed +coming +friend +idea +kind +Union +Road +remained +##ting +everything +##ma +running +care +finally +Chinese +appointed +1992 +Australian +##ley +popular +mean +teams +probably +##land +usually +project +social +Championship +possible +word +Russian +instead +mi +herself +##T +Peter +Hall +Center +seat +style +money +1993 +else +Department +table +Music +current +31 +features +special +events +character +Two +square +sold +debut +##v +process +Although +Since +##ka +40 +Central +currently +education +placed +lot +China +quickly +forward +seven +##ling +Europe +arm +performed +Japanese +1991 +Henry +Now +Dr +##ion +week +Group +myself +big +UK +Washington +ten +deep +1990 +Club +Japan +space +La +directed +smile +episode +hours +whole +##de +##less +Why +wouldn +designed +strong +training +changed +Society +stage +involved +hadn +towards +leading +police +eight +kept +Institute +study +largest +child +eventually +private +modern +Court +throughout +getting +originally +attack +##E +talk +Great +longer +songs +alone +##ine +wide +dead +walked +shot +##ri +Oh +force +##st +Art +today +friends +Island +Richard +1989 +center +construction +believe +size +White +ship +completed +##B +gone +Just +rock +sat +##R +radio +below +entire +families +league +includes +type +lived +official +range +hold +featured +Most +##ter +president +passed +means +##f +forces +lips +Mary +Do +guitar +##ce +food +wall +Of +spent +Its +performance +hear +##P +Western +reported +sister +##et +morning +##M +especially +##ive +Minister +itself +post +bit +groups +1988 +##tion +Black +##ng +Well +raised +sometimes +Canadian +Paris +Spanish +replaced +schools +Academy +leaving +central +female +Christian +Jack +whose +college +onto +provided +##D +##ville +players +actually +stopped +##son +Museum +doesn +##ts +books +fight +allowed +##ur +beginning +Records +awarded +parents +coach +##os +Red +saying +##ck +Smith +Yes +Lake +##L +aircraft +1987 +##ble +previous +ft +action +Italian +African +happened +vocals +Act +future +court +##ge +1986 +degree +phone +##ro +Is +countries +winning +breath +Love +river +matter +Lord +Other +list +self +parts +##ate +provide +cut +shows +plan +1st +interest +##ized +Africa +stated +Sir +fell +owned +earlier +ended +competition +attention +1985 +lower +nearly +bad +older +stay +Saint +##se +certain +1984 +fingers +blue +try +fourth +Grand +##as +king +##nt +makes +chest +movement +states +moving +data +introduced +model +date +section +Los +deal +##I +skin +entered +middle +success +Texas +##w +summer +island +##N +Republic +length +husband +1980 +##ey +reason +anyone +forced +via +base +500 +job +covered +Festival +Roman +successful +rights +cover +Man +writing +Ireland +##F +related +goal +takes +buildings +true +weeks +1983 +Because +opening +novel +ISBN +meet +gold +##ous +mid +km² +standing +Football +Chicago +shook +whom +##ki +1982 +Day +feeling +scored +boy +higher +Force +leader +heavy +fall +question +sense +army +Second +energy +meeting +themselves +kill +##am +board +census +##ya +##ns +mine +meant +market +required +battle +campaign +attended +approximately +Kingdom +runs +active +##ha +contract +clear +previously +health +1979 +Arts +complete +Catholic +couple +units +##ll +##ty +Committee +shoulder +sea +systems +listed +##O +caught +tournament +##G +northern +author +Film +Your +##men +holding +offered +personal +1981 +southern +artist +traditional +studio +200 +capital +##ful +regular +ask +giving +organization +month +news +Are +read +managed +helped +studied +student +defeated +natural +industry +Year +noted +decision +Government +quite +##id +smiled +1972 +Maybe +tracks +##ke +Mark +al +media +engine +hour +Their +relationship +plays +property +structure +1976 +ago +Hill +Martin +1978 +ready +Many +Like +Bay +immediately +generally +Italy +Greek +practice +caused +division +significant +Joseph +speed +Let +thinking +completely +1974 +primary +mostly +##field +##K +1975 +##to +Even +writer +##led +dropped +magazine +collection +understand +route +highest +particular +films +lines +network +Science +loss +carried +direction +green +1977 +location +producer +according +Women +Queen +neck +thus +independent +view +1970 +Angeles +Soviet +distance +problem +Board +tour +western +income +appearance +access +Mexico +nodded +street +surface +arrived +believed +Old +1968 +1973 +becoming +whether +1945 +figure +singer +stand +Following +issue +window +wrong +pain +everyone +lives +issues +park +slowly +la +act +##va +bring +Lee +operations +key +comes +fine +cold +famous +Navy +1971 +Me +additional +individual +##ner +Zealand +goals +county +contains +Service +minute +2nd +reach +talking +particularly +##ham +movie +Director +glass +paper +studies +##co +railway +standard +Education +45 +represented +Chief +Louis +launched +Star +terms +60 +1969 +experience +watched +Another +Press +Tom +staff +starting +subject +break +Virginia +nine +eye +##age +evidence +foot +##est +companies +Prince +##V +gun +create +Big +People +guy +Green +simply +numerous +##line +increased +twenty +##ga +##do +1967 +award +officer +stone +Before +material +Northern +grew +male +plant +Life +legs +step +Al +unit +35 +except +answer +##U +report +response +Edward +commercial +edition +trade +science +##ca +Irish +Law +shown +rate +failed +##ni +remains +changes +mm +limited +larger +Later +cause +waiting +Time +##wood +cost +Bill +manager +activities +likely +allow +operated +retired +##ping +65 +directly +Who +associated +effect +hell +Florida +straight +hot +Valley +management +girls +expected +eastern +Mike +chance +cast +centre +chair +hurt +problems +##li +walk +programs +Team +characters +Battle +edge +pay +maybe +corner +majority +medical +Joe +Summer +##io +attempt +Pacific +command +Radio +##by +names +municipality +1964 +train +economic +Brown +feature +sex +source +agreed +remember +Three +1966 +1965 +Pennsylvania +victory +senior +annual +III +Southern +results +Sam +serving +religious +Jones +appears +##der +despite +claimed +Both +musical +matches +fast +security +selected +Young +double +complex +hospital +chief +Times +##ve +Championships +filled +Public +Despite +beautiful +Research +plans +Province +##ally +Wales +##ko +artists +metal +nearby +Spain +##il +32 +houses +supported +piece +##no +stared +recording +nature +legal +Russia +##ization +remaining +looks +##sh +bridge +closer +cases +scene +marriage +Little +##é +uses +Earth +specific +Frank +theory +Good +discovered +referred +bass +culture +university +presented +Congress +##go +metres +continue +1960 +isn +Awards +meaning +cell +composed +separate +Series +forms +Blue +cross +##tor +increase +test +computer +slightly +Where +Jewish +Town +tree +status +1944 +variety +responsible +pretty +initially +##way +realized +pass +provides +Captain +Alexander +recent +score +broke +Scott +drive +financial +showed +Line +stories +ordered +soldiers +genus +operation +gaze +sitting +society +Only +hope +actor +follow +Empire +Yeah +technology +happy +focus +policy +spread +situation +##ford +##ba +Mrs +watch +Can +1963 +Commission +touch +earned +troops +Under +1962 +individuals +cannot +19th +##lin +mile +expression +exactly +suddenly +weight +dance +stepped +places +appear +difficult +Railway +anti +numbers +kilometres +star +##ier +department +ice +Britain +removed +Once +##lo +Boston +value +##ant +mission +trees +Order +sports +join +serve +Major +poor +Poland +mainly +Theatre +pushed +Station +##it +Lady +federal +silver +##ler +foreign +##ard +Eastern +##den +box +hall +subsequently +lies +acquired +1942 +ancient +CD +History +Jean +beyond +##ger +El +##les +growing +championship +native +Parliament +Williams +watching +direct +overall +offer +Also +80 +Secretary +spoke +Latin +ability +##ated +safe +presence +##ial +headed +regional +planned +1961 +Johnson +throat +consists +##W +extended +Or +bar +walls +Chris +stations +politician +Olympics +influence +share +fighting +speak +hundred +Carolina +die +stars +##tic +color +Chapter +##ish +fear +sleep +goes +Francisco +oil +Bank +sign +physical +##berg +Dutch +seasons +##rd +Games +Governor +sorry +lack +Centre +memory +baby +smaller +charge +Did +multiple +ships +shirt +Assembly +amount +leaves +3rd +Foundation +conditions +1943 +Rock +Democratic +Daniel +##at +winner +products +##ina +store +latter +Professor +civil +prior +host +1956 +soft +vote +needs +Each +rules +1958 +pressure +letter +normal +proposed +levels +records +1959 +paid +intended +Victoria +purpose +okay +historical +issued +1980s +broadcast +rule +simple +picked +firm +Sea +1941 +Elizabeth +1940 +serious +featuring +highly +graduated +mentioned +choice +1948 +replied +percent +Scotland +##hi +females +constructed +1957 +settled +Steve +recognized +cities +crew +glanced +kiss +competed +flight +knowledge +editor +More +Conference +##H +fifth +elements +##ee +##tes +function +newspaper +recently +Miss +cultural +brown +twice +Office +1939 +truth +Creek +1946 +households +USA +1950 +quality +##tt +border +seconds +destroyed +pre +wait +ahead +build +image +90 +cars +##mi +33 +promoted +professor +et +bank +medal +text +broken +Middle +revealed +sides +wing +seems +channel +1970s +Ben +loved +effort +officers +Will +##ff +70 +Israel +Jim +upper +fully +label +Jr +assistant +powerful +pair +positive +##ary +gives +1955 +20th +races +remain +kitchen +primarily +##ti +Sydney +easy +Tour +whispered +buried +300 +News +Polish +1952 +Duke +Columbia +produce +accepted +00 +approach +minor +1947 +Special +44 +Asian +basis +visit +Fort +Civil +finish +formerly +beside +leaned +##ite +median +rose +coast +effects +supposed +Cross +##hip +Corps +residents +Jackson +##ir +Bob +basketball +36 +Asia +seem +Bishop +Book +##ber +ring +##ze +owner +BBC +##ja +transferred +acting +De +appearances +walking +Le +press +grabbed +1954 +officially +1953 +##pe +risk +taught +review +##X +lay +##well +council +Avenue +seeing +losing +Ohio +Super +province +ones +travel +##sa +projects +equipment +spot +Berlin +administrative +heat +potential +shut +capacity +elections +growth +fought +Republican +mixed +Andrew +teacher +turning +strength +shoulders +beat +wind +1949 +Health +follows +camp +suggested +perhaps +Alex +mountain +contact +divided +candidate +fellow +34 +Show +necessary +workers +ball +horse +ways +questions +protect +gas +activity +younger +bottom +founder +Scottish +screen +treatment +easily +com +##house +dedicated +Master +warm +Night +Georgia +Long +von +##me +perfect +website +1960s +piano +efforts +##ide +Tony +sort +offers +Development +Simon +executive +##nd +save +Over +Senate +1951 +1990s +draw +master +Police +##ius +renamed +boys +initial +prominent +damage +Co +##ov +##za +online +begin +occurred +captured +youth +Top +account +tells +Justice +conducted +forest +##town +bought +teeth +Jersey +##di +purchased +agreement +Michigan +##ure +campus +prison +becomes +product +secret +guess +Route +huge +types +drums +64 +split +defeat +estate +housing +##ot +brothers +Coast +declared +happen +titled +therefore +sun +commonly +alongside +Stadium +library +Home +article +steps +telling +slow +assigned +refused +laughed +wants +Nick +wearing +Rome +Open +##ah +Hospital +pointed +Taylor +lifted +escape +participated +##j +drama +parish +Santa +##per +organized +mass +pick +Airport +gets +Library +unable +pull +Live +##ging +surrounding +##ries +focused +Adam +facilities +##ning +##ny +38 +##ring +notable +era +connected +gained +operating +laid +Regiment +branch +defined +Christmas +machine +Four +academic +Iran +adopted +concept +Men +compared +search +traffic +Max +Maria +greater +##ding +widely +##burg +serves +1938 +37 +Go +hotel +shared +typically +scale +1936 +leg +suffered +yards +pieces +Ministry +Wilson +episodes +empty +1918 +safety +continues +yellow +historic +settlement +400 +Come +Corporation +enemy +content +picture +evening +territory +method +trial +solo +driver +Here +##ls +entrance +Prize +spring +whatever +##ent +75 +##ji +reading +Arthur +##cy +Our +clothes +Prime +Illinois +Kong +code +##ria +sit +Harry +Federal +chosen +administration +bodies +begins +stomach +Though +seats +Hong +density +Sun +leaders +Field +museum +chart +platform +languages +##ron +birth +holds +Gold +##un +fish +combined +##ps +4th +1937 +largely +captain +trust +Game +van +boat +Oxford +basic +beneath +Islands +painting +nice +Toronto +path +males +sources +block +conference +parties +murder +clubs +crowd +calling +About +Business +peace +knows +lake +speaking +stayed +Brazil +allowing +Born +unique +thick +Technology +##que +receive +des +semi +alive +noticed +format +##ped +coffee +digital +##ned +handed +guard +tall +faced +setting +plants +partner +claim +reduced +temple +animals +determined +classes +##out +estimated +##ad +Olympic +providing +Massachusetts +learned +Inc +Philadelphia +Social +carry +42 +possibly +hosted +tonight +respectively +Today +shape +Mount +roles +designated +brain +etc +Korea +thoughts +Brian +Highway +doors +background +drew +models +footballer +tone +turns +1935 +quiet +tower +wood +bus +write +software +weapons +flat +marked +1920 +newly +tight +Eric +finger +Journal +FC +Van +rise +critical +Atlantic +granted +returning +communities +humans +quick +39 +48 +ranked +sight +pop +Swedish +Stephen +card +analysis +attacked +##wa +Sunday +identified +Jason +champion +situated +1930 +expanded +tears +##nce +reaching +Davis +protection +Emperor +positions +nominated +Bridge +tax +dress +allows +avoid +leadership +killing +actress +guest +steel +knowing +electric +cells +disease +grade +unknown +##ium +resulted +Pakistan +confirmed +##ged +tongue +covers +##Y +roof +entirely +applied +votes +drink +interview +exchange +Township +reasons +##ised +page +calls +dog +agent +nose +teaching +##ds +##ists +advanced +wish +Golden +existing +vehicle +del +1919 +develop +attacks +pressed +Sports +planning +resulting +facility +Sarah +notes +1933 +Class +Historic +winter +##mo +audience +Community +household +Netherlands +creation +##ize +keeping +1914 +claims +dry +guys +opposite +##ak +explained +Ontario +secondary +difference +Francis +actions +organizations +yard +animal +Up +Lewis +titles +Several +1934 +Ryan +55 +Supreme +rolled +1917 +distribution +figures +afraid +rural +yourself +##rt +sets +barely +Instead +passing +awards +41 +silence +authority +occupied +environment +windows +engineering +surprised +flying +crime +reports +Mountain +powers +driving +succeeded +reviews +1929 +Head +missing +Song +Jesus +opportunity +inspired +ends +albums +conversation +impact +injury +surprise +billion +learning +heavily +oldest +union +creating +##ky +festival +literature +letters +sexual +##tte +apartment +Final +comedy +nation +orders +##sen +contemporary +Power +drawn +existence +connection +##ating +Post +Junior +remembered +message +Medal +castle +note +engineer +sounds +Beach +crossed +##dy +ear +scientific +sales +##ai +theme +starts +clearly +##ut +trouble +##gan +bag +##han +BC +sons +1928 +silent +versions +daily +Studies +ending +Rose +guns +1932 +headquarters +reference +obtained +Squadron +concert +none +du +Among +##don +prevent +Member +answered +staring +Between +##lla +portion +drug +liked +association +performances +Nations +formation +Castle +lose +learn +scoring +relatively +quarter +47 +Premier +##ors +Sweden +baseball +attempted +trip +worth +perform +airport +fields +enter +honor +Medical +rear +commander +officials +condition +supply +materials +52 +Anna +volume +threw +Persian +43 +interested +Gallery +achieved +visited +laws +relief +Area +Matt +singles +Lieutenant +Country +fans +Cambridge +sky +Miller +effective +tradition +Port +##ana +minister +extra +entitled +System +sites +authorities +acres +committee +racing +1931 +desk +trains +ass +weren +Family +farm +##ance +industrial +##head +iron +49 +abandoned +Out +Holy +chairman +waited +frequently +display +Light +transport +starring +Patrick +Engineering +eat +FM +judge +reaction +centuries +price +##tive +Korean +defense +Get +arrested +1927 +send +urban +##ss +pilot +Okay +Media +reality +arts +soul +thirty +##be +catch +generation +##nes +apart +Anne +drop +See +##ving +sixth +trained +Management +magic +cm +height +Fox +Ian +resources +vampire +principal +Was +haven +##au +Walter +Albert +rich +1922 +causing +entry +##ell +shortly +46 +worry +doctor +composer +rank +Network +bright +showing +regions +1924 +wave +carrying +kissed +finding +missed +Earl +lying +target +vehicles +Military +controlled +dinner +##board +briefly +lyrics +motion +duty +strange +attempts +invited +kg +villages +5th +Land +##mer +Christ +prepared +twelve +check +thousand +earth +copies +en +transfer +citizens +Americans +politics +nor +theatre +Project +##bo +clean +rooms +laugh +##ran +application +contained +anyway +containing +Sciences +1925 +rare +speech +exist +1950s +falling +passenger +##im +stands +51 +##ol +##ow +phase +governor +kids +details +methods +Vice +employed +performing +counter +Jane +heads +Channel +wine +opposition +aged +1912 +Every +1926 +highway +##ura +1921 +aired +978 +permanent +Forest +finds +joint +approved +##pur +brief +doubt +acts +brand +wild +closely +Ford +Kevin +chose +shall +port +sweet +fun +asking +Be +##bury +sought +Dave +Mexican +mom +Right +Howard +Moscow +Charlie +Stone +##mann +admitted +##ver +wooden +1923 +Officer +relations +Hot +combat +publication +chain +shop +inhabitants +proved +ideas +address +1915 +Memorial +explain +increasing +conflict +Anthony +Melbourne +narrow +temperature +slid +1916 +worse +selling +documentary +Ali +Ray +opposed +vision +dad +extensive +Infantry +commissioned +Doctor +offices +programming +core +respect +storm +##pa +##ay +##om +promotion +der +struck +anymore +shit +Region +receiving +DVD +alternative +##ue +ride +maximum +1910 +##ious +Third +Affairs +cancer +Executive +##op +dream +18th +Due +##ker +##worth +economy +IV +Billboard +identity +subsequent +statement +skills +##back +funding +##ons +Round +Foreign +truck +Please +lights +wondered +##ms +frame +yes +Still +districts +fiction +Colonel +converted +150 +grown +accident +critics +fit +Information +architecture +Point +Five +armed +Billy +poet +functions +consisted +suit +Turkish +Band +object +desire +##ities +sounded +flow +Norwegian +articles +Marie +pulling +thin +singing +Hunter +Human +Battalion +Federation +Kim +origin +represent +dangerous +weather +fuel +ex +##sing +Last +bedroom +aid +knees +Alan +angry +assumed +plane +Something +founding +concerned +global +Fire +di +please +Portuguese +touched +Roger +nuclear +Register +Jeff +fixed +royal +lie +finals +NFL +Manchester +towns +handle +shaped +Chairman +Dean +launch +understanding +Children +violence +failure +sector +Brigade +wrapped +fired +sharp +tiny +developing +expansion +Free +institutions +technical +Nothing +otherwise +Main +inch +Saturday +wore +Senior +attached +cheek +representing +Kansas +##chi +##kin +actual +advantage +Dan +Austria +##dale +hoped +multi +squad +Norway +streets +1913 +Services +hired +grow +pp +wear +painted +Minnesota +stuff +Building +54 +Philippines +1900 +##ties +educational +Khan +Magazine +##port +Cape +signal +Gordon +sword +Anderson +cool +engaged +Commander +images +Upon +tied +Security +cup +rail +Vietnam +successfully +##red +Muslim +gain +bringing +Native +hers +occurs +negative +Philip +Kelly +Colorado +category +##lan +600 +Have +supporting +wet +56 +stairs +Grace +observed +##ung +funds +restaurant +1911 +Jews +##ments +##che +Jake +Back +53 +asks +journalist +accept +bands +bronze +helping +##ice +decades +mayor +survived +usual +influenced +Douglas +Hey +##izing +surrounded +retirement +Temple +derived +Pope +registered +producing +##ral +structures +Johnny +contributed +finishing +buy +specifically +##king +patients +Jordan +internal +regarding +Samuel +Clark +##q +afternoon +Finally +scenes +notice +refers +quietly +threat +Water +Those +Hamilton +promise +freedom +Turkey +breaking +maintained +device +lap +ultimately +Champion +Tim +Bureau +expressed +investigation +extremely +capable +qualified +recognition +items +##up +Indiana +adult +rain +greatest +architect +Morgan +dressed +equal +Antonio +collected +drove +occur +Grant +graduate +anger +Sri +worried +standards +##ore +injured +somewhere +damn +Singapore +Jimmy +pocket +homes +stock +religion +aware +regarded +Wisconsin +##tra +passes +fresh +##ea +argued +Ltd +EP +Diego +importance +Census +incident +Egypt +Missouri +domestic +leads +ceremony +Early +camera +Father +challenge +Switzerland +lands +familiar +hearing +spend +educated +Tennessee +Thank +##ram +Thus +concern +putting +inches +map +classical +Allen +crazy +valley +Space +softly +##my +pool +worldwide +climate +experienced +neighborhood +scheduled +neither +fleet +1908 +Girl +##J +Part +engines +locations +darkness +Revolution +establishment +lawyer +objects +apparently +Queensland +Entertainment +bill +mark +Television +##ong +pale +demand +Hotel +selection +##rn +##ino +Labour +Liberal +burned +Mom +merged +Arizona +request +##lia +##light +hole +employees +##ical +incorporated +95 +independence +Walker +covering +joining +##ica +task +papers +backing +sell +biggest +6th +strike +establish +##ō +gently +59 +Orchestra +Winter +protein +Juan +locked +dates +Boy +aren +shooting +Luke +solid +charged +Prior +resigned +interior +garden +spoken +improve +wonder +promote +hidden +##med +combination +Hollywood +Swiss +consider +##ks +Lincoln +literary +drawing +Marine +weapon +Victor +Trust +Maryland +properties +##ara +exhibition +understood +hung +Tell +installed +loud +fashion +affected +junior +landing +flowers +##he +Internet +beach +Heart +tries +Mayor +programme +800 +wins +noise +##ster +##ory +58 +contain +fair +delivered +##ul +wedding +Square +advance +behavior +Program +Oregon +##rk +residence +realize +certainly +hill +Houston +57 +indicated +##water +wounded +Village +massive +Moore +thousands +personnel +dating +opera +poetry +##her +causes +feelings +Frederick +applications +push +approached +foundation +pleasure +sale +fly +gotten +northeast +costs +raise +paintings +##ney +views +horses +formal +Arab +hockey +typical +representative +rising +##des +clock +stadium +shifted +Dad +peak +Fame +vice +disappeared +users +Way +Naval +prize +hoping +values +evil +Bell +consisting +##ón +Regional +##ics +improved +circle +carefully +broad +##ini +Fine +maintain +operate +offering +mention +Death +stupid +Through +Princess +attend +interests +ruled +somewhat +wings +roads +grounds +##ual +Greece +Champions +facing +hide +voted +require +Dark +Matthew +credit +sighed +separated +manner +##ile +Boys +1905 +committed +impossible +lip +candidates +7th +Bruce +arranged +Islamic +courses +criminal +##ened +smell +##bed +08 +consecutive +##ening +proper +purchase +weak +Prix +1906 +aside +introduction +Look +##ku +changing +budget +resistance +factory +Forces +agency +##tone +northwest +user +1907 +stating +##one +sport +Design +environmental +cards +concluded +Carl +250 +accused +##ology +Girls +sick +intelligence +Margaret +responsibility +Guard +##tus +17th +sq +goods +1909 +hate +##ek +capture +stores +Gray +comic +Modern +Silver +Andy +electronic +wheel +##ied +Deputy +##bs +Czech +zone +choose +constant +reserve +##lle +Tokyo +spirit +sub +degrees +flew +pattern +compete +Dance +##ik +secretary +Imperial +99 +reduce +Hungarian +confused +##rin +Pierre +describes +regularly +Rachel +85 +landed +passengers +##ise +##sis +historian +meters +Youth +##ud +participate +##cing +arrival +tired +Mother +##gy +jumped +Kentucky +faces +feed +Israeli +Ocean +##Q +##án +plus +snow +techniques +plate +sections +falls +jazz +##ris +tank +loan +repeated +opinion +##res +unless +rugby +journal +Lawrence +moments +shock +distributed +##ded +adjacent +Argentina +crossing +uncle +##ric +Detroit +communication +mental +tomorrow +session +Emma +Without +##gen +Miami +charges +Administration +hits +coat +protected +Cole +invasion +priest +09 +Gary +enjoyed +plot +measure +bound +friendly +throw +musician +##lon +##ins +Age +knife +damaged +birds +driven +lit +ears +breathing +Arabic +Jan +faster +Jonathan +##gate +Independent +starred +Harris +teachers +Alice +sequence +mph +file +translated +decide +determine +Review +documents +sudden +threatened +##ft +bear +distinct +decade +burning +##sky +1930s +replace +begun +extension +##time +1904 +equivalent +accompanied +Christopher +Danish +##ye +Besides +##more +persons +fallen +Rural +roughly +saved +willing +ensure +Belgium +05 +musicians +##ang +giant +Six +Retrieved +worst +purposes +##bly +mountains +seventh +slipped +brick +07 +##py +somehow +Carter +Iraq +cousin +favor +islands +journey +FIFA +contrast +planet +vs +calm +##ings +concrete +branches +gray +profit +Russell +##ae +##ux +##ens +philosophy +businesses +talked +parking +##ming +owners +Place +##tle +agricultural +Kate +06 +southeast +draft +Eddie +earliest +forget +Dallas +Commonwealth +edited +66 +inner +ed +operates +16th +Harvard +assistance +##si +designs +Take +bathroom +indicate +CEO +Command +Louisiana +1902 +Dublin +Books +1901 +tropical +1903 +##tors +Places +tie +progress +forming +solution +62 +letting +##ery +studying +##jo +duties +Baseball +taste +Reserve +##ru +Ann +##gh +visible +##vi +notably +link +NCAA +southwest +Never +storage +mobile +writers +favorite +Pro +pages +truly +count +##tta +string +kid +98 +Ross +row +##idae +Kennedy +##tan +Hockey +hip +waist +grandfather +listen +##ho +feels +busy +72 +stream +obvious +cycle +shaking +Knight +##ren +Carlos +painter +trail +web +linked +04 +Palace +existed +##ira +responded +closing +End +examples +Marshall +weekend +jaw +Denmark +lady +township +medium +chin +Story +option +fifteen +Moon +represents +makeup +investment +jump +childhood +Oklahoma +roll +normally +Ten +Operation +Graham +Seattle +Atlanta +paused +promised +rejected +treated +returns +flag +##ita +Hungary +danger +glad +movements +visual +subjects +credited +soldier +Norman +ill +translation +José +Quebec +medicine +warning +theater +praised +municipal +01 +commune +churches +acid +folk +8th +testing +add +survive +Sound +devices +residential +severe +presidential +Mississippi +Austin +Perhaps +Charlotte +hanging +Montreal +grin +##ten +racial +partnership +shoot +shift +##nie +Les +downtown +Brothers +Garden +matters +restored +mirror +forever +winners +rapidly +poverty +##ible +Until +DC +faith +hundreds +Real +Ukraine +Nelson +balance +Adams +contest +relative +ethnic +Edinburgh +composition +##nts +emergency +##van +marine +reputation +Down +pack +12th +Communist +Mountains +pro +stages +measures +##ld +ABC +Li +victims +benefit +Iowa +Broadway +gathered +rating +Defense +classic +##ily +ceiling +##ions +snapped +Everything +constituency +Franklin +Thompson +Stewart +entering +Judge +forth +##sk +wanting +smiling +moves +tunnel +premiered +grass +unusual +Ukrainian +bird +Friday +tail +Portugal +coal +element +Fred +guards +Senator +collaboration +beauty +Wood +chemical +beer +justice +signs +##Z +sees +##zi +Puerto +##zed +96 +smooth +Bowl +gift +limit +97 +heading +Source +wake +requires +Ed +Constitution +factor +Lane +factors +adding +Note +cleared +pictures +pink +##ola +Kent +Local +Singh +moth +Ty +##ture +courts +Seven +temporary +involving +Vienna +emerged +fishing +agree +defensive +stuck +secure +Tamil +##ick +bottle +03 +Player +instruments +Spring +patient +flesh +contributions +cry +Malaysia +120 +Global +da +Alabama +Within +##work +debuted +expect +Cleveland +concerns +retained +horror +10th +spending +Peace +Transport +grand +Crown +instance +institution +acted +Hills +mounted +Campbell +shouldn +1898 +##ably +chamber +soil +88 +Ethan +sand +cheeks +##gi +marry +61 +weekly +classification +DNA +Elementary +Roy +definitely +Soon +Rights +gate +suggests +aspects +imagine +golden +beating +Studios +Warren +differences +significantly +glance +occasionally +##od +clothing +Assistant +depth +sending +possibility +mode +prisoners +requirements +daughters +dated +Representatives +prove +guilty +interesting +smoke +cricket +93 +##ates +rescue +Connecticut +underground +Opera +13th +reign +##ski +thanks +leather +equipped +routes +fan +##ans +script +Wright +bishop +Welsh +jobs +faculty +eleven +Railroad +appearing +anniversary +Upper +##down +anywhere +Rugby +Metropolitan +Meanwhile +Nicholas +champions +forehead +mining +drinking +76 +Jerry +membership +Brazilian +Wild +Rio +scheme +Unlike +strongly +##bility +fill +##rian +easier +MP +Hell +##sha +Stanley +banks +Baron +##ique +Robinson +67 +Gabriel +Austrian +Wayne +exposed +##wan +Alfred +1899 +manage +mix +visitors +eating +##rate +Sean +commission +Cemetery +policies +Camp +parallel +traveled +guitarist +02 +supplies +couples +poem +blocks +Rick +Training +Energy +achieve +appointment +Wing +Jamie +63 +novels +##em +1890 +songwriter +Base +Jay +##gar +naval +scared +miss +labor +technique +crisis +Additionally +backed +destroy +seriously +tools +tennis +91 +god +##ington +continuing +steam +obviously +Bobby +adapted +fifty +enjoy +Jacob +publishing +column +##ular +Baltimore +Donald +Liverpool +92 +drugs +movies +##ock +Heritage +##je +##istic +vocal +strategy +gene +advice +##bi +Ottoman +riding +##side +Agency +Indonesia +11th +laughing +sleeping +und +muttered +listening +deck +tip +77 +ownership +grey +Claire +deeply +provincial +popularity +Cooper +##á +Emily +##sed +designer +Murray +describe +Danny +Around +Parker +##dae +68 +rates +suffering +considerable +78 +nervous +powered +tons +circumstances +wished +belonged +Pittsburgh +flows +9th +##use +belt +81 +useful +15th +context +List +Dead +Iron +seek +Season +worn +frequency +legislation +replacement +memories +Tournament +Again +Barry +organisation +copy +Gulf +waters +meets +struggle +Oliver +1895 +Susan +protest +kick +Alliance +components +1896 +Tower +Windows +demanded +regiment +sentence +Woman +Logan +Referee +hosts +debate +knee +Blood +##oo +universities +practices +Ward +ranking +correct +happening +Vincent +attracted +classified +##stic +processes +immediate +waste +increasingly +Helen +##po +Lucas +Phil +organ +1897 +tea +suicide +actors +lb +crash +approval +waves +##ered +hated +grip +700 +amongst +69 +74 +hunting +dying +lasted +illegal +##rum +stare +defeating +##gs +shrugged +°C +Jon +Count +Orleans +94 +affairs +formally +##and +##ves +criticized +Disney +Vol +successor +tests +scholars +palace +Would +celebrated +rounds +grant +Schools +Such +commanded +demon +Romania +##all +Karl +71 +##yn +84 +Daily +totally +Medicine +fruit +Die +upset +Lower +Conservative +14th +Mitchell +escaped +shoes +Morris +##tz +queen +harder +prime +Thanks +indeed +Sky +authors +rocks +definition +Nazi +accounts +printed +experiences +##ters +divisions +Cathedral +denied +depending +Express +##let +73 +appeal +loose +colors +filed +##isation +gender +##ew +throne +forests +Finland +domain +boats +Baker +squadron +shore +remove +##ification +careful +wound +railroad +82 +seeking +agents +##ved +Blues +##off +customers +ignored +net +##ction +hiding +Originally +declined +##ess +franchise +eliminated +NBA +merely +pure +appropriate +visiting +forty +markets +offensive +coverage +cave +##nia +spell +##lar +Benjamin +##ire +Convention +filmed +Trade +##sy +##ct +Having +palm +1889 +Evans +intense +plastic +Julia +document +jeans +vessel +SR +##fully +proposal +Birmingham +le +##ative +assembly +89 +fund +lock +1893 +AD +meetings +occupation +modified +Years +odd +aimed +reform +Mission +Works +shake +cat +exception +convinced +executed +pushing +dollars +replacing +soccer +manufacturing +##ros +expensive +kicked +minimum +Josh +coastal +Chase +ha +Thailand +publications +deputy +Sometimes +Angel +effectively +##illa +criticism +conduct +Serbian +landscape +NY +absence +passage +##ula +Blake +Indians +1892 +admit +Trophy +##ball +Next +##rated +##ians +charts +kW +orchestra +79 +heritage +1894 +rough +exists +boundary +Bible +Legislative +moon +medieval +##over +cutting +print +##ett +birthday +##hood +destruction +Julian +injuries +influential +sisters +raising +statue +colour +dancing +characteristics +orange +##ok +##aries +Ken +colonial +twin +Larry +surviving +##shi +Barbara +personality +entertainment +assault +##ering +talent +happens +license +86 +couch +Century +soundtrack +shower +swimming +cash +Staff +bent +1885 +bay +lunch +##lus +dozen +vessels +CBS +greatly +critic +Test +symbol +panel +shell +output +reaches +87 +Front +motor +ocean +##era +##ala +maintenance +violent +scent +Limited +Las +Hope +Theater +Which +survey +Robin +recordings +compilation +##ward +bomb +insurance +Authority +sponsored +satellite +Jazz +refer +stronger +blow +whilst +Wrestling +suggest +##rie +climbed +##els +voices +shopping +1891 +Neil +discovery +##vo +##ations +burst +Baby +peaked +Brooklyn +knocked +lift +##try +false +nations +Hugh +Catherine +preserved +distinguished +terminal +resolution +ratio +pants +cited +competitions +completion +DJ +bone +uniform +schedule +shouted +83 +1920s +rarely +Basketball +Taiwan +artistic +bare +vampires +arrest +Utah +Marcus +assist +gradually +qualifying +Victorian +vast +rival +Warner +Terry +Economic +##cia +losses +boss +versus +audio +runner +apply +surgery +Play +twisted +comfortable +##cs +Everyone +guests +##lt +Harrison +UEFA +lowered +occasions +##lly +##cher +chapter +youngest +eighth +Culture +##room +##stone +1888 +Songs +Seth +Digital +involvement +expedition +relationships +signing +1000 +fault +annually +circuit +afterwards +meat +creature +##ou +cable +Bush +##net +Hispanic +rapid +gonna +figured +extent +considering +cried +##tin +sigh +dynasty +##ration +cabinet +Richmond +stable +##zo +1864 +Admiral +Unit +occasion +shares +badly +longest +##ify +Connor +extreme +wondering +girlfriend +Studio +##tions +1865 +tribe +exact +muscles +hat +Luis +Orthodox +decisions +amateur +description +##lis +hips +kingdom +##ute +Portland +whereas +Bachelor +outer +discussion +partly +Arkansas +1880 +dreams +perfectly +Lloyd +##bridge +asleep +##tti +Greg +permission +trading +pitch +mill +Stage +liquid +Keith +##tal +wolf +processing +stick +Jerusalem +profile +rushed +spiritual +argument +Ice +Guy +till +Delhi +roots +Section +missions +Glasgow +penalty +NBC +encouraged +identify +keyboards +##zing +##ston +disc +plain +informed +Bernard +thinks +fled +Justin +##day +newspapers +##wick +Ralph +##zer +unlike +Stars +artillery +##ified +recovered +arrangement +searching +##pers +##tory +##rus +deaths +Egyptian +diameter +##í +marketing +corporate +teach +marks +Turner +staying +hallway +Sebastian +chapel +naked +mistake +possession +1887 +dominated +jacket +creative +Fellow +Falls +Defence +suspended +employment +##rry +Hebrew +Hudson +Week +Wars +recognize +Natural +controversial +Tommy +thank +Athletic +benefits +decline +intention +##ets +Lost +Wall +participation +elevation +supports +parliament +1861 +concentration +Movement +##IS +competing +stops +behalf +##mm +limits +funded +discuss +Collins +departure +obtain +woods +latest +universe +alcohol +Laura +rush +blade +funny +Dennis +forgotten +Amy +Symphony +apparent +graduating +1862 +Rob +Grey +collections +Mason +emotions +##ugh +literally +Any +counties +1863 +nomination +fighter +habitat +respond +external +Capital +exit +Video +carbon +sharing +Bad +opportunities +Perry +photo +##mus +Orange +posted +remainder +transportation +portrayed +Labor +recommended +percussion +rated +Grade +rivers +partially +suspected +strip +adults +button +struggled +intersection +Canal +##ability +poems +claiming +Madrid +1886 +Together +##our +Much +Vancouver +instrument +instrumental +1870 +mad +angle +Control +Phoenix +Leo +Communications +mail +##ette +##ev +preferred +adaptation +alleged +discussed +deeper +##ane +Yet +Monday +volumes +thrown +Zane +##logy +displayed +rolling +dogs +Along +Todd +##ivity +withdrew +representation +belief +##sia +crown +Late +Short +hardly +grinned +romantic +Pete +##ken +networks +enemies +Colin +Eventually +Side +donated +##su +steady +grab +guide +Finnish +Milan +pregnant +controversy +reminded +1884 +Stuart +##bach +##ade +Race +Belgian +LP +Production +Zone +lieutenant +infantry +Child +confusion +sang +resident +##ez +victim +1881 +channels +Ron +businessman +##gle +Dick +colony +pace +producers +##ese +agencies +Craig +Lucy +Very +centers +Yorkshire +photography +##ched +Album +championships +Metro +substantial +Standard +terrible +directors +contribution +advertising +emotional +##its +layer +segment +sir +folded +Roberts +ceased +Hampshire +##ray +detailed +partners +m² +##pt +Beth +genre +commented +generated +remote +aim +Hans +credits +concerts +periods +breakfast +gay +shadow +defence +Too +Had +transition +Afghanistan +##book +eggs +defend +##lli +writes +Systems +bones +mess +seed +scientists +Shortly +Romanian +##zy +Freedom +muscle +hero +parent +agriculture +checked +Islam +Bristol +Freyja +Arena +cabin +Germans +electricity +ranks +viewed +medals +Wolf +associate +Madison +Sorry +fort +Chile +detail +widespread +attorney +boyfriend +##nan +Students +Spencer +##ig +bite +Maine +demolished +Lisa +erected +Someone +operational +Commissioner +NHL +Coach +Bar +forcing +Dream +Rico +cargo +Murphy +##fish +##ase +distant +##master +##ora +Organization +doorway +Steven +traded +electrical +frequent +##wn +Branch +Sure +1882 +placing +Manhattan +attending +attributed +excellent +pounds +ruling +principles +component +Mediterranean +Vegas +machines +percentage +infrastructure +throwing +affiliated +Kings +secured +Caribbean +Track +Ted +honour +opponent +Virgin +Construction +grave +produces +Challenge +stretched +paying +murmured +##ata +integrated +waved +Nathan +##ator +transmission +videos +##yan +##hu +Nova +descent +AM +Harold +conservative +Therefore +venue +competitive +##ui +conclusion +funeral +confidence +releases +scholar +##sson +Treaty +stress +mood +##sm +Mac +residing +Action +Fund +##ship +animated +fitted +##kar +defending +voting +tend +##berry +answers +believes +##ci +helps +Aaron +##tis +themes +##lay +populations +Players +stroke +Trinity +electoral +paint +abroad +charity +keys +Fair +##pes +interrupted +participants +murdered +Days +supporters +##ab +expert +borders +mate +##llo +solar +architectural +tension +##bling +Parish +tape +operator +Cultural +Clinton +indicates +publisher +ordinary +sugar +arrive +rifle +acoustic +##uring +assets +##shire +SS +sufficient +options +HMS +Classic +bars +rebuilt +governments +Beijing +reporter +screamed +Abbey +crying +mechanical +instantly +communications +Political +cemetery +Cameron +Stop +representatives +USS +texts +mathematics +innings +civilian +Serbia +##hill +practical +patterns +dust +Faculty +debt +##end +##cus +junction +suppose +experimental +Computer +Food +wrist +abuse +dealing +bigger +cap +principle +##pin +Muhammad +Fleet +Collection +attempting +dismissed +##burn +regime +Herbert +##ua +shadows +1883 +Eve +Lanka +1878 +Performance +fictional +##lock +Noah +Run +Voivodeship +exercise +broadcasting +##fer +RAF +Magic +Bangladesh +suitable +##low +##del +styles +toured +Code +identical +links +insisted +110 +flash +Model +slave +Derek +Rev +fairly +Greater +sole +##lands +connecting +zero +bench +##ome +switched +Fall +Owen +yours +Electric +shocked +convention +##bra +climb +memorial +swept +Racing +decides +belong +##nk +parliamentary +##und +ages +proof +##dan +delivery +1860 +##ów +sad +publicly +leaning +Archbishop +dirt +##ose +categories +1876 +burn +##bing +requested +Guinea +Historical +rhythm +relation +##heim +ye +pursue +merchant +##mes +lists +continuous +frowned +colored +tool +gods +involves +Duncan +photographs +Cricket +slight +Gregory +atmosphere +wider +Cook +##tar +essential +Being +FA +emperor +wealthy +nights +##bar +licensed +Hawaii +viewers +Language +load +nearest +milk +kilometers +platforms +##ys +territories +Rogers +sheet +Rangers +contested +##lation +isolated +assisted +swallowed +Small +Contemporary +Technical +Edwards +express +Volume +endemic +##ei +tightly +Whatever +indigenous +Colombia +##ulation +hp +characterized +##ida +Nigeria +Professional +duo +Soccer +slaves +Farm +smart +Attorney +Attendance +Common +salt +##vin +tribes +nod +sentenced +bid +sample +Drive +switch +instant +21st +Cuba +drunk +Alaska +proud +awareness +hitting +sessions +Thai +locally +elsewhere +Dragon +gentle +touching +##lee +Springs +Universal +Latino +spin +1871 +Chart +recalled +Type +pointing +##ii +lowest +##ser +grandmother +Adelaide +Jacques +spotted +Buffalo +restoration +Son +Joan +farmers +Lily +1879 +lucky +##dal +luck +eldest +##rant +Market +drummer +deployed +warned +prince +sing +amazing +sailed +##oon +1875 +Primary +traveling +Masters +Sara +cattle +Trail +gang +Further +desert +relocated +##tch +##ord +Flight +illness +Munich +ninth +repair +Singles +##lated +Tyler +tossed +boots +Work +sized +earning +shoved +magazines +housed +dam +researchers +Former +spun +premiere +spaces +organised +wealth +crimes +devoted +stones +Urban +automatic +hop +affect +outstanding +tanks +mechanism +Muslims +Ms +shots +argue +Jeremy +connections +Armenian +increases +rubbed +1867 +retail +gear +Pan +bonus +jurisdiction +weird +concerning +whisper +##gal +Microsoft +tenure +hills +www +Gmina +porch +files +reportedly +venture +Storm +##ence +Nature +killer +panic +fate +Secret +Wang +scream +drivers +belongs +Chamber +clan +monument +mixing +Peru +bet +Riley +Friends +Isaac +submarine +1877 +130 +judges +harm +ranging +affair +prepare +pupils +householder +Policy +decorated +Nation +slammed +activist +implemented +Room +qualify +Publishing +establishing +Baptist +touring +subsidiary +##nal +legend +1872 +laughter +PC +Athens +settlers +ties +dual +dear +Draft +strategic +Ivan +reveal +closest +dominant +Ah +##ult +Denver +bond +boundaries +drafted +tables +##TV +eyed +Edition +##ena +1868 +belonging +1874 +Industrial +cream +Ridge +Hindu +scholarship +Ma +opens +initiated +##ith +yelled +compound +random +Throughout +grades +physics +sank +grows +exclusively +settle +Saints +brings +Amsterdam +Make +Hart +walks +battery +violin +##born +explanation +##ware +1873 +##har +provinces +thrust +exclusive +sculpture +shops +##fire +VI +constitution +Barcelona +monster +Devon +Jefferson +Sullivan +bow +##din +desperate +##ć +Julie +##mon +##ising +terminus +Jesse +abilities +golf +##ple +##via +##away +Raymond +measured +jury +firing +revenue +suburb +Bulgarian +1866 +##cha +timber +Things +##weight +Morning +spots +Alberta +Data +explains +Kyle +friendship +raw +tube +demonstrated +aboard +immigrants +reply +breathe +Manager +ease +##ban +##dia +Diocese +##vy +##ía +pit +ongoing +##lie +Gilbert +Costa +1940s +Report +voters +cloud +traditions +##MS +gallery +Jennifer +swung +Broadcasting +Does +diverse +reveals +arriving +initiative +##ani +Give +Allied +Pat +Outstanding +monastery +blind +Currently +##war +bloody +stopping +focuses +managing +Florence +Harvey +creatures +900 +breast +internet +Artillery +purple +##mate +alliance +excited +fee +Brisbane +lifetime +Private +##aw +##nis +##gue +##ika +phrase +regulations +reflected +manufactured +conventional +pleased +client +##ix +##ncy +Pedro +reduction +##con +welcome +jail +comfort +Iranian +Norfolk +Dakota +##tein +evolution +everywhere +Initially +sensitive +Olivia +Oscar +implementation +sits +stolen +demands +slide +grandson +##ich +merger +##mic +Spirit +##° +ticket +root +difficulty +Nevada +##als +lined +Dylan +Original +Call +biological +EU +dramatic +##hn +Operations +treaty +gap +##list +Am +Romanized +moral +Butler +perspective +Furthermore +Manuel +absolutely +unsuccessful +disaster +dispute +preparation +tested +discover +##ach +shield +squeezed +brushed +battalion +Arnold +##ras +superior +treat +clinical +##so +Apple +Syria +Cincinnati +package +flights +editions +Leader +minority +wonderful +hang +Pop +Philippine +telephone +bell +honorary +##mar +balls +Democrat +dirty +thereafter +collapsed +Inside +slip +wrestling +##ín +listened +regard +bowl +None +Sport +completing +trapped +##view +copper +Wallace +Honor +blame +Peninsula +##ert +##oy +Anglo +bearing +simultaneously +honest +##ias +Mix +Got +speaker +voiced +impressed +prices +error +1869 +##feld +trials +Nine +Industry +substitute +Municipal +departed +slept +##ama +Junction +Socialist +flower +dropping +comment +fantasy +##ress +arrangements +travelled +furniture +fist +relieved +##tics +Leonard +linear +earn +expand +Soul +Plan +Leeds +Sierra +accessible +innocent +Winner +Fighter +Range +winds +vertical +Pictures +101 +charter +cooperation +prisoner +interviews +recognised +sung +manufacturer +exposure +submitted +Mars +leaf +gauge +screaming +likes +eligible +##ac +gathering +columns +##dra +belly +UN +maps +messages +speakers +##ants +garage +unincorporated +Number +Watson +sixteen +lots +beaten +Could +Municipality +##ano +Horse +talks +Drake +scores +Venice +genetic +##mal +##ère +Cold +Jose +nurse +traditionally +##bus +Territory +Key +Nancy +##win +thumb +São +index +dependent +carries +controls +Comics +coalition +physician +referring +Ruth +Based +restricted +inherited +internationally +stretch +THE +plates +margin +Holland +knock +significance +valuable +Kenya +carved +emotion +conservation +municipalities +overseas +resumed +Finance +graduation +blinked +temperatures +constantly +productions +scientist +ghost +cuts +permitted +##ches +firmly +##bert +patrol +##yo +Croatian +attacking +1850 +portrait +promoting +sink +conversion +##kov +locomotives +Guide +##val +nephew +relevant +Marc +drum +originated +Chair +visits +dragged +Price +favour +corridor +properly +respective +Caroline +reporting +inaugural +1848 +industries +##ching +edges +Christianity +Maurice +Trent +Economics +carrier +Reed +##gon +tribute +Pradesh +##ale +extend +attitude +Yale +##lu +settlements +glasses +taxes +targets +##ids +quarters +##ological +connect +hence +metre +collapse +underneath +banned +Future +clients +alternate +explosion +kinds +Commons +hungry +dragon +Chapel +Buddhist +lover +depression +pulls +##ges +##uk +origins +computers +crosses +kissing +assume +emphasis +lighting +##ites +personally +crashed +beam +touchdown +lane +comparison +##mont +Hitler +##las +execution +##ene +acre +sum +Pearl +ray +##point +essentially +worker +convicted +tear +Clay +recovery +Literature +Unfortunately +##row +partial +Petersburg +Bulgaria +coaching +evolved +reception +enters +narrowed +elevator +therapy +defended +pairs +##lam +breaks +Bennett +Uncle +cylinder +##ison +passion +bases +Actor +cancelled +battles +extensively +oxygen +Ancient +specialized +negotiations +##rat +acquisition +convince +interpretation +##00 +photos +aspect +colleges +Artist +keeps +##wing +Croatia +##ona +Hughes +Otto +comments +##du +Ph +Sweet +adventure +describing +Student +Shakespeare +scattered +objective +Aviation +Phillips +Fourth +athletes +##hal +##tered +Guitar +intensity +née +dining +curve +Obama +topics +legislative +Mill +Cruz +##ars +Members +recipient +Derby +inspiration +corresponding +fed +YouTube +coins +pressing +intent +Karen +cinema +Delta +destination +shorter +Christians +imagined +canal +Newcastle +Shah +Adrian +super +Males +160 +liberal +lord +bat +supplied +Claude +meal +worship +##atic +Han +wire +°F +##tha +punishment +thirteen +fighters +##ibility +1859 +Ball +gardens +##ari +Ottawa +pole +indicating +Twenty +Higher +Bass +Ivy +farming +##urs +certified +Saudi +plenty +##ces +restaurants +Representative +Miles +payment +##inger +##rit +Confederate +festivals +references +##ić +Mario +PhD +playoffs +witness +rice +mask +saving +opponents +enforcement +automatically +relegated +##oe +radar +whenever +Financial +imperial +uncredited +influences +Abraham +skull +Guardian +Haven +Bengal +impressive +input +mixture +Warsaw +altitude +distinction +1857 +collective +Annie +##ean +##bal +directions +Flying +##nic +faded +##ella +contributing +##ó +employee +##lum +##yl +ruler +oriented +conductor +focusing +##die +Giants +Mills +mines +Deep +curled +Jessica +guitars +Louise +procedure +Machine +failing +attendance +Nepal +Brad +Liam +tourist +exhibited +Sophie +depicted +Shaw +Chuck +##can +expecting +challenges +##nda +equally +resignation +##logical +Tigers +loop +pitched +outdoor +reviewed +hopes +True +temporarily +Borough +torn +jerked +collect +Berkeley +Independence +cotton +retreat +campaigns +participating +Intelligence +Heaven +##ked +situations +borough +Democrats +Harbor +##len +Liga +serial +circles +fourteen +##lot +seized +filling +departments +finance +absolute +Roland +Nate +floors +raced +struggling +deliver +protests +##tel +Exchange +efficient +experiments +##dar +faint +3D +binding +Lions +lightly +skill +proteins +difficulties +##cal +monthly +camps +flood +loves +Amanda +Commerce +##oid +##lies +elementary +##tre +organic +##stein +##ph +receives +Tech +enormous +distinctive +Joint +experiment +Circuit +citizen +##hy +shelter +ideal +practically +formula +addressed +Foster +Productions +##ax +variable +punk +Voice +fastest +concentrated +##oma +##yer +stored +surrender +vary +Sergeant +Wells +ward +Wait +##ven +playoff +reducing +cavalry +##dle +Venezuela +tissue +amounts +sweat +##we +Non +##nik +beetle +##bu +##tu +Jared +Hunt +##₂ +fat +Sultan +Living +Circle +Secondary +Suddenly +reverse +##min +Travel +##bin +Lebanon +##mas +virus +Wind +dissolved +enrolled +holiday +Keep +helicopter +Clarke +constitutional +technologies +doubles +instructions +##ace +Azerbaijan +##ill +occasional +frozen +trick +wiped +writings +Shanghai +preparing +challenged +mainstream +summit +180 +##arian +##rating +designation +##ada +revenge +filming +tightened +Miguel +Montana +reflect +celebration +bitch +flashed +signals +rounded +peoples +##tation +renowned +Google +characteristic +Campaign +sliding +##rman +usage +Record +Using +woke +solutions +holes +theories +logo +Protestant +relaxed +brow +nickname +Reading +marble +##tro +symptoms +Overall +capita +##ila +outbreak +revolution +deemed +Principal +Hannah +approaches +inducted +Wellington +vulnerable +Environmental +Drama +incumbent +Dame +1854 +travels +samples +accurate +physically +Sony +Nashville +##sville +##lic +##og +Producer +Lucky +tough +Stanford +resort +repeatedly +eyebrows +Far +choir +commenced +##ep +##ridge +rage +swing +sequel +heir +buses +ad +Grove +##late +##rick +updated +##SA +Delaware +##fa +Athletics +warmth +Off +excitement +verse +Protection +Villa +corruption +intellectual +Jenny +##lyn +mystery +prayer +healthy +##ologist +Bear +lab +Ernest +Remix +register +basement +Montgomery +consistent +tier +1855 +Preston +Brooks +##maker +vocalist +laboratory +delayed +wheels +rope +bachelor +pitcher +Block +Nevertheless +suspect +efficiency +Nebraska +siege +FBI +planted +##AC +Newton +breeding +##ain +eighteen +Argentine +encounter +servant +1858 +elder +Shadow +Episode +fabric +doctors +survival +removal +chemistry +volunteers +Kane +variant +arrives +Eagle +Left +##fe +Jo +divorce +##ret +yesterday +Bryan +handling +diseases +customer +Sheriff +Tiger +Harper +##oi +resting +Linda +Sheffield +gasped +sexy +economics +alien +tale +footage +Liberty +yeah +fundamental +Ground +flames +Actress +photographer +Maggie +Additional +joke +custom +Survey +Abu +silk +consumption +Ellis +bread +##uous +engagement +puts +Dog +##hr +poured +guilt +CDP +boxes +hardware +clenched +##cio +stem +arena +extending +##com +examination +Steel +encountered +revised +140 +picking +Car +hasn +Minor +pride +Roosevelt +boards +##mia +blocked +curious +drag +narrative +brigade +Prefecture +mysterious +namely +connects +Devil +historians +CHAPTER +quit +installation +Golf +empire +elevated +##eo +releasing +Bond +##uri +harsh +ban +##BA +contracts +cloth +presents +stake +chorus +##eau +swear +##mp +allies +generations +Motor +meter +pen +warrior +veteran +##EC +comprehensive +missile +interaction +instruction +Renaissance +rested +Dale +fix +fluid +les +investigate +loaded +widow +exhibit +artificial +select +rushing +tasks +signature +nowhere +Engineer +feared +Prague +bother +extinct +gates +Bird +climbing +heels +striking +artwork +hunt +awake +##hin +Formula +thereby +commitment +imprisoned +Beyond +##MA +transformed +Agriculture +Low +Movie +radical +complicated +Yellow +Auckland +mansion +tenth +Trevor +predecessor +##eer +disbanded +sucked +circular +witch +gaining +lean +Behind +illustrated +rang +celebrate +bike +consist +framework +##cent +Shane +owns +350 +comprises +collaborated +colleagues +##cast +engage +fewer +##ave +1856 +observation +diplomatic +legislature +improvements +Interstate +craft +MTV +martial +administered +jet +approaching +permanently +attraction +manuscript +numbered +Happy +Andrea +shallow +Gothic +Anti +##bad +improvement +trace +preserve +regardless +rode +dies +achievement +maintaining +Hamburg +spine +##air +flowing +encourage +widened +posts +##bound +125 +Southeast +Santiago +##bles +impression +receiver +Single +closure +##unt +communist +honors +Northwest +105 +##ulated +cared +un +hug +magnetic +seeds +topic +perceived +prey +prevented +Marvel +Eight +Michel +Transportation +rings +Gate +##gne +Byzantine +accommodate +floating +##dor +equation +ministry +##ito +##gled +Rules +earthquake +revealing +Brother +Celtic +blew +chairs +Panama +Leon +attractive +descendants +Care +Ambassador +tours +breathed +threatening +##cho +smiles +Lt +Beginning +##iness +fake +assists +fame +strings +Mobile +Liu +parks +http +1852 +brush +Aunt +bullet +consciousness +##sta +##ther +consequences +gather +dug +1851 +bridges +Doug +##sion +Artists +ignore +Carol +brilliant +radiation +temples +basin +clouds +##cted +Stevens +spite +soap +consumer +Damn +Snow +recruited +##craft +Advanced +tournaments +Quinn +undergraduate +questioned +Palmer +Annual +Others +feeding +Spider +printing +##orn +cameras +functional +Chester +readers +Alpha +universal +Faith +Brandon +François +authored +Ring +el +aims +athletic +possessed +Vermont +programmes +##uck +bore +Fisher +statements +shed +saxophone +neighboring +pronounced +barrel +bags +##dge +organisations +pilots +casualties +Kenneth +##brook +silently +Malcolm +span +Essex +anchor +##hl +virtual +lessons +Henri +Trump +Page +pile +locomotive +wounds +uncomfortable +sustained +Diana +Eagles +##pi +2000s +documented +##bel +Cassie +delay +kisses +##ines +variation +##ag +growled +##mark +##ways +Leslie +studios +Friedrich +aunt +actively +armor +eaten +historically +Better +purse +honey +ratings +##ée +naturally +1840 +peer +Kenny +Cardinal +database +Looking +runners +handsome +Double +PA +##boat +##sted +protecting +##jan +Diamond +concepts +interface +##aki +Watch +Article +Columbus +dialogue +pause +##rio +extends +blanket +pulse +1853 +affiliate +ladies +Ronald +counted +kills +demons +##zation +Airlines +Marco +Cat +companion +mere +Yugoslavia +Forum +Allan +pioneer +Competition +Methodist +patent +nobody +Stockholm +##ien +regulation +##ois +accomplished +##itive +washed +sake +Vladimir +crops +prestigious +humor +Sally +labour +tributary +trap +altered +examined +Mumbai +bombing +Ash +noble +suspension +ruins +##bank +spare +displays +guided +dimensional +Iraqi +##hon +sciences +Franz +relating +fence +followers +Palestine +invented +proceeded +Batman +Bradley +##yard +##ova +crystal +Kerala +##ima +shipping +handled +Want +abolished +Drew +##tter +Powell +Half +##table +##cker +exhibitions +Were +assignment +assured +##rine +Indonesian +Grammy +acknowledged +Kylie +coaches +structural +clearing +stationed +Say +Total +Rail +besides +glow +threats +afford +Tree +Musical +##pp +elite +centered +explore +Engineers +Stakes +Hello +tourism +severely +assessment +##tly +crack +politicians +##rrow +sheets +volunteer +##borough +##hold +announcement +recover +contribute +lungs +##ille +mainland +presentation +Johann +Writing +1849 +##bird +Study +Boulevard +coached +fail +airline +Congo +Plus +Syrian +introduce +ridge +Casey +manages +##fi +searched +Support +succession +progressive +coup +cultures +##lessly +sensation +Cork +Elena +Sofia +Philosophy +mini +trunk +academy +Mass +Liz +practiced +Reid +##ule +satisfied +experts +Wilhelm +Woods +invitation +Angels +calendar +joy +Sr +Dam +packed +##uan +bastard +Workers +broadcasts +logic +cooking +backward +##ack +Chen +creates +enzyme +##xi +Davies +aviation +VII +Conservation +fucking +Knights +##kan +requiring +hectares +wars +ate +##box +Mind +desired +oak +absorbed +Really +Vietnamese +Paulo +athlete +##car +##eth +Talk +Wu +##cks +survivors +Yang +Joel +Almost +Holmes +Armed +Joshua +priests +discontinued +##sey +blond +Rolling +suggesting +CA +clay +exterior +Scientific +##sive +Giovanni +Hi +farther +contents +Winners +animation +neutral +mall +Notes +layers +professionals +Armstrong +Against +Piano +involve +monitor +angel +parked +bears +seated +feat +beliefs +##kers +Version +suffer +##ceae +guidance +##eur +honored +raid +alarm +Glen +Ellen +Jamaica +trio +enabled +##ils +procedures +##hus +moderate +upstairs +##ses +torture +Georgian +rebellion +Fernando +Nice +##are +Aires +Campus +beast +##hing +1847 +##FA +Isle +##logist +Princeton +cathedral +Oakland +Solomon +##tto +Milwaukee +upcoming +midfielder +Neither +sacred +Eyes +appreciate +Brunswick +secrets +Rice +Somerset +Chancellor +Curtis +##gel +Rich +separation +grid +##los +##bon +urge +##ees +##ree +freight +towers +psychology +requirement +dollar +##fall +##sman +exile +tomb +Salt +Stefan +Buenos +Revival +Porter +tender +diesel +chocolate +Eugene +Legion +Laboratory +sheep +arched +hospitals +orbit +Full +##hall +drinks +ripped +##RS +tense +Hank +leagues +##nberg +PlayStation +fool +Punjab +relatives +Comedy +sur +1846 +Tonight +Sox +##if +Rabbi +org +speaks +institute +defender +painful +wishes +Weekly +literacy +portions +snake +item +deals +##tum +autumn +sharply +reforms +thighs +prototype +##ition +argues +disorder +Physics +terror +provisions +refugees +predominantly +independently +march +##graphy +Arabia +Andrews +Bus +Money +drops +##zar +pistol +matrix +revolutionary +##ust +Starting +##ptic +Oak +Monica +##ides +servants +##hed +archaeological +divorced +rocket +enjoying +fires +##nel +assembled +qualification +retiring +##fied +Distinguished +handful +infection +Durham +##itz +fortune +renewed +Chelsea +##sley +curved +gesture +retain +exhausted +##ifying +Perth +jumping +Palestinian +Simpson +colonies +steal +##chy +corners +Finn +arguing +Martha +##var +Betty +emerging +Heights +Hindi +Manila +pianist +founders +regret +Napoleon +elbow +overhead +bold +praise +humanity +##ori +Revolutionary +##ere +fur +##ole +Ashley +Official +##rm +lovely +Architecture +##sch +Baronet +virtually +##OS +descended +immigration +##das +##kes +Holly +Wednesday +maintains +theatrical +Evan +Gardens +citing +##gia +segments +Bailey +Ghost +##city +governing +graphics +##ined +privately +potentially +transformation +Crystal +Cabinet +sacrifice +hesitated +mud +Apollo +Desert +bin +victories +Editor +Railways +Web +Case +tourists +Brussels +Franco +compiled +topped +Gene +engineers +commentary +egg +escort +nerve +arch +necessarily +frustration +Michelle +democracy +genes +Facebook +halfway +##ient +102 +flipped +Won +##mit +NASA +Lynn +Provincial +ambassador +Inspector +glared +Change +McDonald +developments +tucked +noting +Gibson +circulation +dubbed +armies +resource +Headquarters +##iest +Mia +Albanian +Oil +Albums +excuse +intervention +Grande +Hugo +integration +civilians +depends +reserves +Dee +compositions +identification +restrictions +quarterback +Miranda +Universe +favourite +ranges +hint +loyal +Op +entity +Manual +quoted +dealt +specialist +Zhang +download +Westminster +Rebecca +streams +Anglican +variations +Mine +detective +Films +reserved +##oke +##key +sailing +##gger +expanding +recall +discovers +particles +behaviour +Gavin +blank +permit +Java +Fraser +Pass +##non +##TA +panels +statistics +notion +courage +dare +venues +##roy +Box +Newport +travelling +Thursday +warriors +Glenn +criteria +360 +mutual +restore +varied +bitter +Katherine +##lant +ritual +bits +##à +Henderson +trips +Richardson +Detective +curse +psychological +Il +midnight +streak +facts +Dawn +Indies +Edmund +roster +Gen +##nation +1830 +congregation +shaft +##ically +##mination +Indianapolis +Sussex +loving +##bit +sounding +horrible +Continental +Griffin +advised +magical +millions +##date +1845 +Safety +lifting +determination +valid +dialect +Penn +Know +triple +avoided +dancer +judgment +sixty +farmer +lakes +blast +aggressive +Abby +tag +chains +inscription +##nn +conducting +Scout +buying +##wich +spreading +##OC +array +hurried +Environment +improving +prompted +fierce +Taking +Away +tune +pissed +Bull +catching +##ying +eyebrow +metropolitan +terrain +##rel +Lodge +manufacturers +creator +##etic +happiness +ports +##ners +Relations +fortress +targeted +##ST +allegedly +blues +##osa +Bosnia +##dom +burial +similarly +stranger +pursued +symbols +rebels +reflection +routine +traced +indoor +eventual +##ska +##ão +##una +MD +##phone +oh +grants +Reynolds +rid +operators +##nus +Joey +vital +siblings +keyboard +br +removing +societies +drives +solely +princess +lighter +Various +Cavalry +believing +SC +underwent +relay +smelled +syndrome +welfare +authorized +seemingly +Hard +chicken +##rina +Ages +Bo +democratic +barn +Eye +shorts +##coming +##hand +disappointed +unexpected +centres +Exhibition +Stories +Site +banking +accidentally +Agent +conjunction +André +Chloe +resist +width +Queens +provision +##art +Melissa +Honorary +Del +prefer +abruptly +duration +##vis +Glass +enlisted +##ado +discipline +Sisters +carriage +##ctor +##sburg +Lancashire +log +fuck +##iz +closet +collecting +holy +rape +trusted +cleaning +inhabited +Rocky +104 +editorial +##yu +##ju +succeed +strict +Cuban +##iya +Bronze +outcome +##ifies +##set +corps +Hero +barrier +Kumar +groaned +Nina +Burton +enable +stability +Milton +knots +##ination +slavery +##borg +curriculum +trailer +warfare +Dante +Edgar +revival +Copenhagen +define +advocate +Garrett +Luther +overcome +pipe +750 +construct +Scotia +kings +flooding +##hard +Ferdinand +Felix +forgot +Fish +Kurt +elaborate +##BC +graphic +gripped +colonel +Sophia +Advisory +Self +##uff +##lio +monitoring +seal +senses +rises +peaceful +journals +1837 +checking +legendary +Ghana +##power +ammunition +Rosa +Richards +nineteenth +ferry +aggregate +Troy +inter +##wall +Triple +steep +tent +Cyprus +1844 +##woman +commanding +farms +doi +navy +specified +na +cricketer +transported +Think +comprising +grateful +solve +##core +beings +clerk +grain +vector +discrimination +##TC +Katie +reasonable +drawings +veins +consideration +Monroe +repeat +breed +dried +witnessed +ordained +Current +spirits +remarkable +consultant +urged +Remember +anime +singers +phenomenon +Rhode +Carlo +demanding +findings +manual +varying +Fellowship +generate +safely +heated +withdrawn +##ao +headquartered +##zon +##lav +##ency +Col +Memphis +imposed +rivals +Planet +healing +##hs +ensemble +Warriors +##bone +cult +Frankfurt +##HL +diversity +Gerald +intermediate +##izes +reactions +Sister +##ously +##lica +quantum +awkward +mentions +pursuit +##ography +varies +profession +molecular +consequence +lectures +cracked +103 +slowed +##tsu +cheese +upgraded +suite +substance +Kingston +1800 +Idaho +Theory +##een +ain +Carson +Molly +##OR +configuration +Whitney +reads +audiences +##tie +Geneva +Outside +##nen +##had +transit +volleyball +Randy +Chad +rubber +motorcycle +respected +eager +Level +coin +##lets +neighbouring +##wski +confident +##cious +poll +uncertain +punch +thesis +Tucker +IATA +Alec +##ographic +##law +1841 +desperately +1812 +Lithuania +accent +Cox +lightning +skirt +##load +Burns +Dynasty +##ug +chapters +Working +dense +Morocco +##kins +casting +Set +activated +oral +Brien +horn +HIV +dawn +stumbled +altar +tore +considerably +Nicole +interchange +registration +biography +Hull +Stan +bulk +consent +Pierce +##ER +Fifth +marched +terrorist +##piece +##itt +Presidential +Heather +staged +Plant +relegation +sporting +joins +##ced +Pakistani +dynamic +Heat +##lf +ourselves +Except +Elliott +nationally +goddess +investors +Burke +Jackie +##ā +##RA +Tristan +Associate +Tuesday +scope +Near +bunch +##abad +##ben +sunlight +##aire +manga +Willie +trucks +boarding +Lion +lawsuit +Learning +Der +pounding +awful +##mine +IT +Legend +romance +Serie +AC +gut +precious +Robertson +hometown +realm +Guards +Tag +batting +##vre +halt +conscious +1838 +acquire +collar +##gg +##ops +Herald +nationwide +citizenship +Aircraft +decrease +em +Fiction +Female +corporation +Located +##ip +fights +unconscious +Tampa +Poetry +lobby +Malta +##sar +##bie +layout +Tate +reader +stained +##bre +##rst +##ulate +loudly +Eva +Cohen +exploded +Merit +Maya +##rable +Rovers +##IC +Morrison +Should +vinyl +##mie +onwards +##gie +vicinity +Wildlife +probability +Mar +Barnes +##ook +spinning +Moses +##vie +Surrey +Planning +conferences +protective +Plaza +deny +Canterbury +manor +Estate +tilted +comics +IBM +destroying +server +Dorothy +##horn +Oslo +lesser +heaven +Marshal +scales +strikes +##ath +firms +attract +##BS +controlling +Bradford +southeastern +Amazon +Travis +Janet +governed +1842 +Train +Holden +bleeding +gifts +rent +1839 +palms +##ū +judicial +Ho +Finals +conflicts +unlikely +draws +##cies +compensation +adds +elderly +Anton +lasting +Nintendo +codes +ministers +pot +associations +capabilities +##cht +libraries +##sie +chances +performers +runway +##af +##nder +Mid +Vocals +##uch +##eon +interpreted +priority +Uganda +ruined +Mathematics +cook +AFL +Lutheran +AIDS +Capitol +chase +axis +Moreover +María +Saxon +storyline +##ffed +Tears +Kid +cent +colours +Sex +##long +pm +blonde +Edwin +CE +diocese +##ents +##boy +Inn +##ller +Saskatchewan +##kh +stepping +Windsor +##oka +##eri +Xavier +Resources +1843 +##top +##rad +##lls +Testament +poorly +1836 +drifted +slope +CIA +remix +Lords +mature +hosting +diamond +beds +##ncies +luxury +trigger +##lier +preliminary +hybrid +journalists +Enterprise +proven +expelled +insects +Beautiful +lifestyle +vanished +##ake +##ander +matching +surfaces +Dominican +Kids +referendum +Orlando +Truth +Sandy +privacy +Calgary +Speaker +sts +Nobody +shifting +##gers +Roll +Armenia +Hand +##ES +106 +##ont +Guild +larvae +Stock +flame +gravity +enhanced +Marion +surely +##tering +Tales +algorithm +Emmy +darker +VIII +##lash +hamlet +deliberately +occurring +choices +Gage +fees +settling +ridiculous +##ela +Sons +cop +custody +##ID +proclaimed +Cardinals +##pm +Metal +Ana +1835 +clue +Cardiff +riders +observations +MA +sometime +##och +performer +intact +Points +allegations +rotation +Tennis +tenor +Directors +##ats +Transit +thigh +Complex +##works +twentieth +Factory +doctrine +Daddy +##ished +pretend +Winston +cigarette +##IA +specimens +hydrogen +smoking +mathematical +arguments +openly +developer +##iro +fists +somebody +##san +Standing +Caleb +intelligent +Stay +Interior +echoed +Valentine +varieties +Brady +cluster +Ever +voyage +##of +deposits +ultimate +Hayes +horizontal +proximity +##ás +estates +exploration +NATO +Classical +##most +bills +condemned +1832 +hunger +##ato +planes +deserve +offense +sequences +rendered +acceptance +##ony +manufacture +Plymouth +innovative +predicted +##RC +Fantasy +##une +supporter +absent +Picture +bassist +rescued +##MC +Ahmed +Monte +##sts +##rius +insane +novelist +##és +agrees +Antarctic +Lancaster +Hopkins +calculated +startled +##star +tribal +Amendment +##hoe +invisible +patron +deer +Walk +tracking +Lyon +tickets +##ED +philosopher +compounds +chuckled +##wi +pound +loyalty +Academic +petition +refuses +marking +Mercury +northeastern +dimensions +scandal +Canyon +patch +publish +##oning +Peak +minds +##boro +Presbyterian +Hardy +theoretical +magnitude +bombs +cage +##ders +##kai +measuring +explaining +avoiding +touchdowns +Card +theology +##ured +Popular +export +suspicious +Probably +photograph +Lou +Parks +Arms +compact +Apparently +excess +Banks +lied +stunned +territorial +Filipino +spectrum +learns +wash +imprisonment +ugly +##rose +Albany +Erik +sends +##hara +##rid +consumed +##gling +Belgrade +Da +opposing +Magnus +footsteps +glowing +delicate +Alexandria +Ludwig +gorgeous +Bros +Index +##PA +customs +preservation +bonds +##mond +environments +##nto +instructed +parted +adoption +locality +workshops +goalkeeper +##rik +##uma +Brighton +Slovenia +##ulating +##tical +towel +hugged +stripped +Bears +upright +Wagner +##aux +secretly +Adventures +nest +Course +Lauren +Boeing +Abdul +Lakes +450 +##cu +USSR +caps +Chan +##nna +conceived +Actually +Belfast +Lithuanian +concentrate +possess +militia +pine +protagonist +Helena +##PS +##band +Belle +Clara +Reform +currency +pregnancy +1500 +##rim +Isabella +hull +Name +trend +journalism +diet +##mel +Recording +acclaimed +Tang +Jace +steering +vacant +suggestion +costume +laser +##š +##ink +##pan +##vić +integral +achievements +wise +classroom +unions +southwestern +##uer +Garcia +toss +Tara +Large +##tate +evident +responsibilities +populated +satisfaction +##bia +casual +Ecuador +##ght +arose +##ović +Cornwall +embrace +refuse +Heavyweight +XI +Eden +activists +##uation +biology +##shan +fraud +Fuck +matched +legacy +Rivers +missionary +extraordinary +Didn +holder +wickets +crucial +Writers +Hurricane +Iceland +gross +trumpet +accordance +hurry +flooded +doctorate +Albania +##yi +united +deceased +jealous +grief +flute +portraits +##а +pleasant +Founded +Face +crowned +Raja +advisor +Salem +##ec +Achievement +admission +freely +minimal +Sudan +developers +estimate +disabled +##lane +downstairs +Bruno +##pus +pinyin +##ude +lecture +deadly +underlying +optical +witnesses +Combat +Julius +tapped +variants +##like +Colonial +Critics +Similarly +mouse +voltage +sculptor +Concert +salary +Frances +##ground +hook +premises +Software +instructor +nominee +##ited +fog +slopes +##zu +vegetation +sail +##rch +Body +Apart +atop +View +utility +ribs +cab +migration +##wyn +bounded +2019 +pillow +trails +##ub +Halifax +shade +Rush +##lah +##dian +Notre +interviewed +Alexandra +Springfield +Indeed +rubbing +dozens +amusement +legally +##lers +Jill +Cinema +ignoring +Choice +##ures +pockets +##nell +laying +Blair +tackles +separately +##teen +Criminal +performs +theorem +Communication +suburbs +##iel +competitors +rows +##hai +Manitoba +Eleanor +interactions +nominations +assassination +##dis +Edmonton +diving +##dine +essay +##tas +AFC +Edge +directing +imagination +sunk +implement +Theodore +trembling +sealed +##rock +Nobel +##ancy +##dorf +##chen +genuine +apartments +Nicolas +AA +Bach +Globe +Store +220 +##10 +Rochester +##ño +alert +107 +Beck +##nin +Naples +Basin +Crawford +fears +Tracy +##hen +disk +##pped +seventeen +Lead +backup +reconstruction +##lines +terrified +sleeve +nicknamed +popped +##making +##ern +Holiday +Gospel +ibn +##ime +convert +divine +resolved +##quet +ski +realizing +##RT +Legislature +reservoir +Rain +sinking +rainfall +elimination +challenging +tobacco +##outs +Given +smallest +Commercial +pin +rebel +comedian +exchanged +airing +dish +Salvador +promising +##wl +relax +presenter +toll +aerial +##eh +Fletcher +brass +disappear +zones +adjusted +contacts +##lk +sensed +Walt +mild +toes +flies +shame +considers +wildlife +Hanna +Arsenal +Ladies +naming +##ishing +anxiety +discussions +cute +undertaken +Cash +strain +Wyoming +dishes +precise +Angela +##ided +hostile +twins +115 +Built +##pel +Online +tactics +Newman +##bourne +unclear +repairs +embarrassed +listing +tugged +Vale +##gin +Meredith +bout +##cle +velocity +tips +froze +evaluation +demonstrate +##card +criticised +Nash +lineup +Rao +monks +bacteria +lease +##lish +frightened +den +revived +finale +##rance +flee +Letters +decreased +##oh +Sounds +wrap +Sharon +incidents +renovated +everybody +stole +Bath +boxing +1815 +withdraw +backs +interim +react +murders +Rhodes +Copa +framed +flown +Estonia +Heavy +explored +##rra +##GA +##ali +Istanbul +1834 +##rite +##aging +##ues +Episcopal +arc +orientation +Maxwell +infected +##rot +BCE +Brook +grasp +Roberto +Excellence +108 +withdrawal +Marines +rider +Lo +##sin +##run +Subsequently +garrison +hurricane +facade +Prussia +crushed +enterprise +##mber +Twitter +Generation +Physical +Sugar +editing +communicate +Ellie +##hurst +Ernst +wagon +promotional +conquest +Parliamentary +courtyard +lawyers +Superman +email +Prussian +lately +lecturer +Singer +Majesty +Paradise +sooner +Heath +slot +curves +convoy +##vian +induced +synonym +breeze +##plane +##ox +peered +Coalition +##hia +odds +##esh +##lina +Tomorrow +Nadu +##ico +##rah +damp +autonomous +console +Victory +counts +Luxembourg +intimate +Archived +Carroll +spy +Zero +habit +Always +faction +teenager +Johnston +chaos +ruin +commerce +blog +##shed +##the +reliable +Word +Yu +Norton +parade +Catholics +damned +##iling +surgeon +##tia +Allison +Jonas +remarked +##ès +idiot +Making +proposals +Industries +strategies +artifacts +batteries +reward +##vers +Agricultural +distinguish +lengths +Jeffrey +Progressive +kicking +Patricia +##gio +ballot +##ios +skilled +##gation +Colt +limestone +##AS +peninsula +##itis +LA +hotels +shapes +Crime +depicting +northwestern +HD +silly +Das +##² +##ws +##ash +##matic +thermal +Has +forgive +surrendered +Palm +Nacional +drank +haired +Mercedes +##foot +loading +Timothy +##roll +mechanisms +traces +digging +discussing +Natalie +##zhou +Forbes +landmark +Anyway +Manor +conspiracy +gym +knocking +viewing +Formation +Pink +Beauty +limbs +Phillip +sponsor +Joy +granite +Harbour +##ero +payments +Ballet +conviction +##dam +Hood +estimates +lacked +Mad +Jorge +##wen +refuge +##LA +invaded +Kat +suburban +##fold +investigated +Ari +complained +creek +Georges +##uts +powder +accepting +deserved +carpet +Thunder +molecules +Legal +cliff +strictly +enrollment +ranch +##rg +##mba +proportion +renovation +crop +grabbing +##liga +finest +entries +receptor +helmet +blown +Listen +flagship +workshop +resolve +nails +Shannon +portal +jointly +shining +Violet +overwhelming +upward +Mick +proceedings +##dies +##aring +Laurence +Churchill +##rice +commit +170 +inclusion +Examples +##verse +##rma +fury +paths +##SC +ankle +nerves +Chemistry +rectangular +sworn +screenplay +cake +Mann +Seoul +Animal +sizes +Speed +vol +Population +Southwest +Hold +continuously +Qualified +wishing +Fighting +Made +disappointment +Portsmouth +Thirty +##beck +Ahmad +teammate +MLB +graph +Charleston +realizes +##dium +exhibits +preventing +##int +fever +rivalry +Male +mentally +dull +##lor +##rich +consistently +##igan +Madame +certificate +suited +Krishna +accuracy +Webb +Budapest +Rex +1831 +Cornell +OK +surveillance +##gated +habitats +Adventure +Conrad +Superior +Gay +sofa +aka +boot +Statistics +Jessie +Liberation +##lip +##rier +brands +saint +Heinrich +Christine +bath +Rhine +ballet +Jin +consensus +chess +Arctic +stack +furious +cheap +toy +##yre +##face +##gging +gastropod +##nne +Romans +membrane +answering +25th +architects +sustainable +##yne +Hon +1814 +Baldwin +dome +##awa +##zen +celebrity +enclosed +##uit +##mmer +Electronic +locals +##CE +supervision +mineral +Chemical +Slovakia +alley +hub +##az +heroes +Creative +##AM +incredible +politically +ESPN +yanked +halls +Aboriginal +Greatest +yield +##20 +congressional +robot +Kiss +welcomed +MS +speeds +proceed +Sherman +eased +Greene +Walsh +Geoffrey +variables +rocky +##print +acclaim +Reverend +Wonder +tonnes +recurring +Dawson +continent +finite +AP +continental +ID +facilitate +essays +Rafael +Neal +1833 +ancestors +##met +##gic +Especially +teenage +frustrated +Jules +cock +expense +##oli +##old +blocking +Notable +prohibited +ca +dock +organize +##wald +Burma +Gloria +dimension +aftermath +choosing +Mickey +torpedo +pub +##used +manuscripts +laps +Ulster +staircase +sphere +Insurance +Contest +lens +risks +investigations +ERA +glare +##play +Graduate +auction +Chronicle +##tric +##50 +Coming +seating +Wade +seeks +inland +Thames +Rather +butterfly +contracted +positioned +consumers +contestants +fragments +Yankees +Santos +administrator +hypothesis +retire +Denis +agreements +Winnipeg +##rill +1820 +trophy +crap +shakes +Jenkins +##rium +ya +twist +labels +Maritime +##lings +##iv +111 +##ensis +Cairo +Anything +##fort +opinions +crowded +##nian +abandon +##iff +drained +imported +##rr +tended +##rain +Going +introducing +sculptures +bankruptcy +danced +demonstration +stance +settings +gazed +abstract +pet +Calvin +stiff +strongest +wrestler +##dre +Republicans +grace +allocated +cursed +snail +advancing +Return +errors +Mall +presenting +eliminate +Amateur +Institution +counting +##wind +warehouse +##nde +Ethiopia +trailed +hollow +##press +Literary +capability +nursing +preceding +lamp +Thomson +Morton +##ctic +Crew +Close +composers +boom +Clare +missiles +112 +hunter +snap +##oni +##tail +Us +declaration +##cock +rally +huh +lion +straightened +Philippe +Sutton +alpha +valued +maker +navigation +detected +favorable +perception +Charter +##ña +Ricky +rebounds +tunnels +slapped +Emergency +supposedly +##act +deployment +socialist +tubes +anybody +corn +##NA +Seminary +heating +pump +##AA +achieving +souls +##ass +Link +##ele +##smith +greeted +Bates +Americas +Elder +cure +contestant +240 +fold +Runner +Uh +licked +Politics +committees +neighbors +fairy +Silva +Leipzig +tipped +correctly +exciting +electronics +foundations +cottage +governmental +##hat +allied +claws +presidency +cruel +Agreement +slender +accompanying +precisely +##pass +driveway +swim +Stand +crews +##mission +rely +everyday +Wings +demo +##hic +recreational +min +nationality +##duction +Easter +##hole +canvas +Kay +Leicester +talented +Discovery +shells +##ech +Kerry +Ferguson +Leave +##place +altogether +adopt +butt +wolves +##nsis +##ania +modest +soprano +Boris +##ught +electron +depicts +hid +cruise +differ +treasure +##nch +Gun +Mama +Bengali +trainer +merchants +innovation +presumably +Shirley +bottles +proceeds +Fear +invested +Pirates +particle +Dominic +blamed +Fight +Daisy +##pper +##graphic +nods +knight +Doyle +tales +Carnegie +Evil +Inter +Shore +Nixon +transform +Savannah +##gas +Baltic +stretching +worlds +protocol +Percy +Toby +Heroes +brave +dancers +##aria +backwards +responses +Chi +Gaelic +Berry +crush +embarked +promises +Madonna +researcher +realised +inaugurated +Cherry +Mikhail +Nottingham +reinforced +subspecies +rapper +##kie +Dreams +Re +Damon +Minneapolis +monsters +suspicion +Tel +surroundings +afterward +complaints +OF +sectors +Algeria +lanes +Sabha +objectives +Donna +bothered +distracted +deciding +##ives +##CA +##onia +bishops +Strange +machinery +Voiced +synthesis +reflects +interference +##TS +##ury +keen +##ign +frown +freestyle +ton +Dixon +Sacred +Ruby +Prison +##ión +1825 +outfit +##tain +curiosity +##ight +frames +steadily +emigrated +horizon +##erly +Doc +philosophical +Table +UTC +Marina +##DA +secular +##eed +Zimbabwe +cops +Mack +sheriff +Sanskrit +Francesco +catches +questioning +streaming +Kill +testimony +hissed +tackle +countryside +copyright +##IP +Buddhism +##rator +ladder +##ON +Past +rookie +depths +##yama +##ister +##HS +Samantha +Dana +Educational +brows +Hammond +raids +envelope +##sco +##hart +##ulus +epic +detection +Streets +Potter +statistical +für +ni +accounting +##pot +employer +Sidney +Depression +commands +Tracks +averaged +lets +Ram +longtime +suits +branded +chip +Shield +loans +ought +Said +sip +##rome +requests +Vernon +bordered +veterans +##ament +Marsh +Herzegovina +Pine +##igo +mills +anticipation +reconnaissance +##ef +expectations +protested +arrow +guessed +depot +maternal +weakness +##ap +projected +pour +Carmen +provider +newer +remind +freed +##rily +##wal +##tones +intentions +Fiji +timing +Match +managers +Kosovo +Herman +Wesley +Chang +135 +semifinals +shouting +Indo +Janeiro +Chess +Macedonia +Buck +##onies +rulers +Mail +##vas +##sel +MHz +Programme +Task +commercially +subtle +propaganda +spelled +bowling +basically +Raven +1828 +Colony +109 +##ingham +##wara +anticipated +1829 +##iers +graduates +##rton +##fication +endangered +ISO +diagnosed +##tage +exercises +Battery +bolt +poison +cartoon +##ción +hood +bowed +heal +Meyer +Reagan +##wed +subfamily +##gent +momentum +infant +detect +##sse +Chapman +Darwin +mechanics +NSW +Cancer +Brooke +Nuclear +comprised +hire +sanctuary +wingspan +contrary +remembering +surprising +Basic +stealing +OS +hatred +##lled +masters +violation +Rule +##nger +assuming +conquered +louder +robe +Beatles +legitimate +##vation +massacre +Rica +unsuccessfully +poets +##enberg +careers +doubled +premier +battalions +Dubai +Paper +Louisville +gestured +dressing +successive +mumbled +Vic +referee +pupil +##cated +##rre +ceremonies +picks +##IN +diplomat +alike +geographical +rays +##HA +##read +harbour +factories +pastor +playwright +Ultimate +nationalist +uniforms +obtaining +kit +Amber +##pling +screenwriter +ancestry +##cott +Fields +PR +Coleman +rat +Bavaria +squeeze +highlighted +Adult +reflecting +Mel +1824 +bicycle +organizing +sided +Previously +Underground +Prof +athletics +coupled +mortal +Hampton +worthy +immune +Ava +##gun +encouraging +simplified +##ssa +##nte +##ann +Providence +entities +Pablo +Strong +Housing +##ista +##ators +kidnapped +mosque +Kirk +whispers +fruits +shattered +fossil +Empress +Johns +Webster +Thing +refusing +differently +specimen +Ha +##EN +##tina +##elle +##night +Horn +neighbourhood +Bolivia +##rth +genres +Pre +##vich +Amelia +swallow +Tribune +Forever +Psychology +Use +##bers +Gazette +ash +##usa +Monster +##cular +delegation +blowing +Oblast +retreated +automobile +##ex +profits +shirts +devil +Treasury +##backs +Drums +Ronnie +gameplay +expertise +Evening +resides +Caesar +unity +Crazy +linking +Vision +donations +Isabel +valve +Sue +WWE +logical +availability +fitting +revolt +##mill +Linux +taxi +Access +pollution +statues +Augustus +##pen +cello +##some +lacking +##ati +Gwen +##aka +##ovich +1821 +Wow +initiatives +Uruguay +Cain +stroked +examine +##ī +mentor +moist +disorders +buttons +##tica +##anna +Species +Lynch +museums +scorer +Poor +eligibility +op +unveiled +cats +Title +wheat +critically +Syracuse +##osis +marketed +enhance +Ryder +##NG +##ull +##rna +embedded +throws +foods +happily +##ami +lesson +formats +punched +##rno +expressions +qualities +##sal +Gods +##lity +elect +wives +##lling +jungle +Toyota +reversed +Grammar +Cloud +Agnes +##ules +disputed +verses +Lucien +threshold +##rea +scanned +##bled +##dley +##lice +Kazakhstan +Gardner +Freeman +##rz +inspection +Rita +accommodation +advances +chill +Elliot +thriller +Constantinople +##mos +debris +whoever +1810 +Santo +Carey +remnants +Guatemala +##irs +carriers +equations +mandatory +##WA +anxious +measurement +Summit +Terminal +Erin +##zes +LLC +##uo +glancing +sin +##₃ +Downtown +flowering +Euro +Leigh +Lance +warn +decent +recommendations +##ote +Quartet +##rrell +Clarence +colleague +guarantee +230 +Clayton +Beast +addresses +prospect +destroyer +vegetables +Leadership +fatal +prints +190 +##makers +Hyde +persuaded +illustrations +Southampton +Joyce +beats +editors +mount +##grave +Malaysian +Bombay +endorsed +##sian +##bee +applying +Religion +nautical +bomber +Na +airfield +gravel +##rew +Cave +bye +dig +decree +burden +Election +Hawk +Fe +##iled +reunited +##tland +liver +Teams +Put +delegates +Ella +##fect +Cal +invention +Castro +bored +##kawa +##ail +Trinidad +NASCAR +pond +develops +##pton +expenses +Zoe +Released +##rf +organs +beta +parameters +Neill +##lene +lateral +Beat +blades +Either +##hale +Mitch +##ET +##vous +Rod +burnt +phones +Rising +##front +investigating +##dent +Stephanie +##keeper +screening +##uro +Swan +Sinclair +modes +bullets +Nigerian +melody +##ques +Rifle +##12 +128 +##jin +charm +Venus +##tian +fusion +advocated +visitor +pinned +genera +3000 +Ferry +Solo +quantity +regained +platinum +shoots +narrowly +preceded +update +##ichi +equality +unaware +regiments +ally +##tos +transmitter +locks +Seeing +outlets +feast +reopened +##ows +struggles +Buddy +1826 +bark +elegant +amused +Pretty +themed +schemes +Lisbon +Te +patted +terrorism +Mystery +##croft +##imo +Madagascar +Journey +dealer +contacted +##quez +ITV +vacation +Wong +Sacramento +organisms +##pts +balcony +coloured +sheer +defines +MC +abortion +forbidden +accredited +Newfoundland +tendency +entrepreneur +Benny +Tanzania +needing +finalist +mythology +weakened +gown +sentences +Guest +websites +Tibetan +UFC +voluntary +annoyed +Welcome +honestly +correspondence +geometry +Deutsche +Biology +Help +##aya +Lines +Hector +##ael +reluctant +##ages +wears +inquiry +##dell +Holocaust +Tourism +Wei +volcanic +##mates +Visual +sorts +neighborhoods +Running +apple +shy +Laws +bend +Northeast +feminist +Speedway +Murder +visa +stuffed +fangs +transmitted +fiscal +Ain +enlarged +##ndi +Cecil +Peterson +Benson +Bedford +acceptable +##CC +##wer +purely +triangle +foster +Alberto +educator +Highland +acute +LGBT +Tina +Mi +adventures +Davidson +Honda +translator +monk +enacted +summoned +##ional +collector +Genesis +Un +liner +Di +Statistical +##CS +filter +Knox +Religious +Stella +Estonian +Turn +##ots +primitive +parishes +##lles +complexity +autobiography +rigid +cannon +pursuing +exploring +##gram +##mme +freshman +caves +Expedition +Traditional +iTunes +certification +cooling +##ort +##gna +##IT +##lman +##VA +Motion +explosive +licence +boxer +shrine +loosely +Brigadier +Savage +Brett +MVP +heavier +##elli +##gged +Buddha +Easy +spells +fails +incredibly +Georg +stern +compatible +Perfect +applies +cognitive +excessive +nightmare +neighbor +Sicily +appealed +static +##₁ +Aberdeen +##leigh +slipping +bride +##guard +Um +Clyde +1818 +##gible +Hal +Frost +Sanders +interactive +Hour +##vor +hurting +bull +termed +shelf +capturing +##pace +rolls +113 +##bor +Chilean +teaches +##rey +exam +shipped +Twin +borrowed +##lift +Shit +##hot +Lindsay +Below +Kiev +Lin +leased +##sto +Eli +Diane +Val +subtropical +shoe +Bolton +Dragons +##rification +Vatican +##pathy +Crisis +dramatically +talents +babies +##ores +surname +##AP +##cology +cubic +opted +Archer +sweep +tends +Karnataka +Judy +stint +Similar +##nut +explicitly +##nga +interact +Mae +portfolio +clinic +abbreviated +Counties +##iko +hearts +##ı +providers +screams +Individual +##etti +Monument +##iana +accessed +encounters +gasp +##rge +defunct +Avery +##rne +nobility +useless +Phase +Vince +senator +##FL +1813 +surprisingly +##illo +##chin +Boyd +rumors +equity +Gone +Hearts +chassis +overnight +Trek +wrists +submit +civic +designers +##rity +prominence +decorative +derives +starter +##AF +wisdom +Powers +reluctantly +measurements +doctoral +Noel +Gideon +Baden +Cologne +lawn +Hawaiian +anthology +##rov +Raiders +embassy +Sterling +##pal +Telugu +troubled +##FC +##bian +fountain +observe +ore +##uru +##gence +spelling +Border +grinning +sketch +Benedict +Xbox +dialects +readily +immigrant +Constitutional +aided +nevertheless +SE +tragedy +##ager +##rden +Flash +##MP +Europa +emissions +##ield +panties +Beverly +Homer +curtain +##oto +toilet +Isn +Jerome +Chiefs +Hermann +supernatural +juice +integrity +Scots +auto +Patriots +Strategic +engaging +prosecution +cleaned +Byron +investments +adequate +vacuum +laughs +##inus +##nge +Usually +Roth +Cities +Brand +corpse +##ffy +Gas +rifles +Plains +sponsorship +Levi +tray +owed +della +commanders +##ead +tactical +##rion +García +harbor +discharge +##hausen +gentleman +endless +highways +##itarian +pleaded +##eta +archive +Midnight +exceptions +instances +Gibraltar +cart +##NS +Darren +Bonnie +##yle +##iva +OCLC +bra +Jess +##EA +consulting +Archives +Chance +distances +commissioner +##AR +LL +sailors +##sters +enthusiasm +Lang +##zia +Yugoslav +confirm +possibilities +Suffolk +##eman +banner +1822 +Supporting +fingertips +civilization +##gos +technically +1827 +Hastings +sidewalk +strained +monuments +Floyd +Chennai +Elvis +villagers +Cumberland +strode +albeit +Believe +planets +combining +Mohammad +container +##mouth +##tures +verb +BA +Tank +Midland +screened +Gang +Democracy +Helsinki +screens +thread +charitable +##version +swiftly +ma +rational +combine +##SS +##antly +dragging +Cliff +Tasmania +quest +professionally +##aj +rap +##lion +livestock +##hua +informal +specially +lonely +Matthews +Dictionary +1816 +Observatory +correspondent +constitute +homeless +waving +appreciated +Analysis +Meeting +dagger +##AL +Gandhi +flank +Giant +Choir +##not +glimpse +toe +Writer +teasing +springs +##dt +Glory +healthcare +regulated +complaint +math +Publications +makers +##hips +cement +Need +apologize +disputes +finishes +Partners +boring +ups +gains +1793 +Congressional +clergy +Folk +##made +##nza +Waters +stays +encoded +spider +betrayed +Applied +inception +##urt +##zzo +wards +bells +UCLA +Worth +bombers +Mo +trademark +Piper +##vel +incorporates +1801 +##cial +dim +Twelve +##word +Appeals +tighter +spacecraft +##tine +coordinates +##iac +mistakes +Zach +laptop +Teresa +##llar +##yr +favored +Nora +sophisticated +Irving +hammer +División +corporations +niece +##rley +Patterson +UNESCO +trafficking +Ming +balanced +plaque +Latvia +broader +##owed +Save +confined +##vable +Dalton +tide +##right +##ural +##num +swords +caring +##eg +IX +Acting +paved +##moto +launching +Antoine +substantially +Pride +Philharmonic +grammar +Indoor +Ensemble +enabling +114 +resided +Angelo +publicity +chaired +crawled +Maharashtra +Telegraph +lengthy +preference +differential +anonymous +Honey +##itation +wage +##iki +consecrated +Bryant +regulatory +Carr +##én +functioning +watches +##ú +shifts +diagnosis +Search +app +Peters +##SE +##cat +Andreas +honours +temper +counsel +Urdu +Anniversary +maritime +##uka +harmony +##unk +essence +Lorenzo +choked +Quarter +indie +##oll +loses +##prints +amendment +Adolf +scenario +similarities +##rade +##LC +technological +metric +Russians +thoroughly +##tead +cruiser +1806 +##nier +1823 +Teddy +##psy +au +progressed +exceptional +broadcaster +partnered +fitness +irregular +placement +mothers +unofficial +Garion +Johannes +1817 +regain +Solar +publishes +Gates +Broken +thirds +conversations +dive +Raj +contributor +quantities +Worcester +governance +##flow +generating +pretending +Belarus +##voy +radius +skating +Marathon +1819 +affection +undertook +##wright +los +##bro +locate +PS +excluded +recreation +tortured +jewelry +moaned +##logue +##cut +Complete +##rop +117 +##II +plantation +whipped +slower +crater +##drome +Volunteer +attributes +celebrations +regards +Publishers +oath +utilized +Robbie +Giuseppe +fiber +indication +melted +archives +Damien +storey +affecting +identifying +dances +alumni +comparable +upgrade +rented +sprint +##kle +Marty +##lous +treating +railways +Lebanese +erupted +occupy +sympathy +Jude +Darling +Qatar +drainage +McCarthy +heel +Klein +computing +wireless +flip +Du +Bella +##ast +##ssen +narrator +mist +sings +alignment +121 +2020 +securing +##rail +Progress +missionaries +brutal +mercy +##shing +Hip +##ache +##olo +switching +##here +Malay +##ob +constituted +Mohammed +Often +standings +surge +teachings +ink +detached +systematic +Trial +Myanmar +##wo +offs +Reyes +decoration +translations +wherever +reviewer +speculation +Bangkok +terminated +##ester +beard +RCA +Aidan +Associated +Emerson +Charity +1803 +generous +Dudley +ATP +##haven +prizes +toxic +gloves +##iles +##dos +Turning +myth +Parade +##building +Hits +##eva +teamed +Above +Duchess +Holt +##oth +Sub +Ace +atomic +inform +Ship +depend +Jun +##bes +Norwich +globe +Baroque +Christina +Cotton +Tunnel +kidding +Concerto +Brittany +tasted +phases +stems +angles +##TE +##nam +##40 +charted +Alison +intensive +Willis +glory +##lit +Bergen +est +taller +##dicate +labeled +##ido +commentator +Warrior +Viscount +shortened +aisle +Aria +Spike +spectators +goodbye +overlooking +mammals +##lude +wholly +Barrett +##gus +accompany +seventy +employ +##mb +ambitious +beloved +basket +##mma +##lding +halted +descendant +pad +exclaimed +cloak +##pet +Strait +Bang +Aviv +sadness +##ffer +Donovan +1880s +agenda +swinging +##quin +jerk +Boat +##rist +nervously +Silence +Echo +shout +implies +##iser +##cking +Shiva +Weston +damages +##tist +effectiveness +Horace +cycling +Rey +ache +Photography +PDF +Dear +leans +Lea +##vision +booth +attained +disbelief +##eus +##ution +Hop +pension +toys +Eurovision +faithful +##heads +Andre +owe +default +Atlas +Megan +highlights +lovers +Constantine +Sixth +masses +##garh +emerge +Auto +Slovak +##oa +##vert +Superintendent +flicked +inventor +Chambers +Frankie +Romeo +pottery +companions +Rudolf +##liers +diary +Unless +tap +alter +Randall +##ddle +##eal +limitations +##boards +utterly +knelt +guaranteed +Cowboys +Islander +horns +##ike +Wendy +sexually +Smart +breasts +##cian +compromise +Duchy +AT +Galaxy +analog +Style +##aking +weighed +Nigel +optional +Czechoslovakia +practicing +Ham +##0s +feedback +batted +uprising +operative +applicable +criminals +classrooms +Somehow +##ode +##OM +Naomi +Winchester +##pping +Bart +Regina +competitor +Recorded +Yuan +Vera +lust +Confederation +##test +suck +1809 +Lambert +175 +Friend +##ppa +Slowly +##⁺ +Wake +Dec +##aneous +chambers +Color +Gus +##site +Alternative +##world +Exeter +Omaha +celebrities +striker +210 +dwarf +meals +Oriental +Pearson +financing +revenues +underwater +Steele +screw +Feeling +Mt +acids +badge +swore +theaters +Moving +admired +lung +knot +penalties +116 +fork +##cribed +Afghan +outskirts +Cambodia +oval +wool +fossils +Ned +Countess +Darkness +delicious +##nica +Evelyn +Recordings +guidelines +##CP +Sandra +meantime +Antarctica +modeling +granddaughter +##rial +Roma +Seventh +Sunshine +Gabe +##nton +Shop +Turks +prolific +soup +parody +##nta +Judith +disciplines +resign +Companies +Libya +Jets +inserted +Mile +retrieve +filmmaker +##rand +realistic +unhappy +##30 +sandstone +##nas +##lent +##ush +##rous +Brent +trash +Rescue +##unted +Autumn +disgust +flexible +infinite +sideways +##oss +##vik +trailing +disturbed +50th +Newark +posthumously +##rol +Schmidt +Josef +##eous +determining +menu +Pole +Anita +Luc +peaks +118 +Yard +warrant +generic +deserted +Walking +stamp +tracked +##berger +paired +surveyed +sued +Rainbow +##isk +Carpenter +submarines +realization +touches +sweeping +Fritz +module +Whether +resembles +##form +##lop +unsure +hunters +Zagreb +unemployment +Senators +Georgetown +##onic +Barker +foul +commercials +Dresden +Words +collision +Carlton +Fashion +doubted +##ril +precision +MIT +Jacobs +mob +Monk +retaining +gotta +##rod +remake +Fast +chips +##pled +sufficiently +##lights +delivering +##enburg +Dancing +Barton +Officers +metals +##lake +religions +##ré +motivated +differs +dorsal +##birds +##rts +Priest +polished +##aling +Saxony +Wyatt +knockout +##hor +Lopez +RNA +##link +metallic +##kas +daylight +Montenegro +##lining +wrapping +resemble +Jam +Viking +uncertainty +angels +enables +##fy +Stuttgart +tricks +tattoo +127 +wicked +asset +breach +##yman +MW +breaths +Jung +im +1798 +noon +vowel +##qua +calmly +seasonal +chat +ingredients +cooled +Randolph +ensuring +##ib +##idal +flashing +1808 +Macedonian +Cool +councils +##lick +advantages +Immediately +Madras +##cked +Pain +fancy +chronic +Malayalam +begged +##nese +Inner +feathers +##vey +Names +dedication +Sing +pan +Fischer +nurses +Sharp +inning +stamps +Meg +##ello +edged +motioned +Jacksonville +##ffle +##dic +##US +divide +garnered +Ranking +chasing +modifications +##oc +clever +midst +flushed +##DP +void +##sby +ambulance +beaches +groan +isolation +strengthen +prevention +##ffs +Scouts +reformed +geographic +squadrons +Fiona +Kai +Consequently +##uss +overtime +##yas +Fr +##BL +Papua +Mixed +glances +Haiti +Sporting +sandy +confronted +René +Tanner +1811 +##IM +advisory +trim +##ibe +González +gambling +Jupiter +##ility +##owski +##nar +122 +apology +teased +Pool +feminine +wicket +eagle +shiny +##lator +blend +peaking +nasty +nodding +fraction +tech +Noble +Kuwait +brushing +Italia +Canberra +duet +Johan +1805 +Written +cameo +Stalin +pig +cord +##zio +Surely +SA +owing +holidays +123 +Ranger +lighthouse +##ige +miners +1804 +##ë +##gren +##ried +crashing +##atory +wartime +highlight +inclined +Torres +Tax +##zel +##oud +Own +##corn +Divine +EMI +Relief +Northwestern +ethics +BMW +click +plasma +Christie +coordinator +Shepherd +washing +cooked +##dio +##eat +Cerambycidae +algebra +Engine +costumes +Vampire +vault +submission +virtue +assumption +##rell +Toledo +##oting +##rva +crept +emphasized +##lton +##ood +Greeks +surgical +crest +Patrol +Beta +Tessa +##GS +pizza +traits +rats +Iris +spray +##GC +Lightning +binary +escapes +##take +Clary +crowds +##zong +hauled +maid +##fen +Manning +##yang +Nielsen +aesthetic +sympathetic +affiliation +soaked +Mozart +personalities +begging +##iga +clip +Raphael +yearly +Lima +abundant +##lm +1794 +strips +Initiative +reporters +##vsky +consolidated +##itated +Civic +rankings +mandate +symbolic +##ively +1807 +rental +duck +nave +complications +##nor +Irene +Nazis +haunted +scholarly +Pratt +Gran +Embassy +Wave +pity +genius +bats +canton +Tropical +marker +##cos +escorted +Climate +##posed +appreciation +freezing +puzzle +Internal +pools +Shawn +pathway +Daniels +Fitzgerald +extant +olive +Vanessa +marriages +cocked +##dging +prone +chemicals +doll +drawer +##HF +Stark +Property +##tai +flowed +Sheridan +##uated +Less +Omar +remarks +catalogue +Seymour +wreck +Carrie +##bby +Mercer +displaced +sovereignty +rip +Flynn +Archie +Quarterfinals +Hassan +##ards +vein +Osaka +pouring +wages +Romance +##cript +##phere +550 +##eil +##stown +Documentary +ancestor +CNN +Panthers +publishers +Rise +##mu +biting +Bright +String +succeeding +119 +loaned +Warwick +Sheikh +Von +Afterwards +Jax +Camden +helicopters +Hence +Laurel +##ddy +transaction +Corp +clause +##owing +##kel +Investment +cups +Lucia +Moss +Giles +chef +López +decisive +30th +distress +linguistic +surveys +Ready +maiden +Touch +frontier +incorporate +exotic +mollusk +Leopold +Ride +##wain +##ndo +teammates +tones +drift +ordering +Feb +Penny +Normandy +Present +Flag +pipes +##rro +delight +motto +Tibet +leap +Eliza +Produced +teenagers +sitcom +Try +Hansen +Cody +wandered +terrestrial +frog +scare +resisted +employers +coined +##DS +resistant +Fly +captive +dissolution +judged +associates +defining +##court +Hale +##mbo +raises +clusters +twelfth +##metric +Roads +##itude +satisfy +Android +Reds +Gloucester +Category +Valencia +Daemon +stabbed +Luna +Churches +Canton +##eller +Attack +Kashmir +annexed +grabs +asteroid +Hartford +recommendation +Rodriguez +handing +stressed +frequencies +delegate +Bones +Erie +Weber +Hands +Acts +millimetres +24th +Fat +Howe +casually +##SL +convent +1790 +IF +##sity +1795 +yelling +##ises +drain +addressing +amino +Marcel +Sylvia +Paramount +Gerard +Volleyball +butter +124 +Albion +##GB +triggered +1792 +folding +accepts +##ße +preparations +Wimbledon +dose +##grass +escaping +##tling +import +charging +##dation +280 +Nolan +##fried +Calcutta +##pool +Cove +examining +minded +heartbeat +twisting +domains +bush +Tunisia +Purple +Leone +##code +evacuated +battlefield +tiger +Electrical +##ared +chased +##cre +cultivated +Jet +solved +shrug +ringing +Impact +##iant +kilometre +##log +commemorate +migrated +singular +designing +promptly +Higgins +##own +##aves +freshwater +Marketing +Payne +beg +locker +pray +implied +AAA +corrected +Trans +Europeans +Ashe +acknowledge +Introduction +##writer +##llen +Munster +auxiliary +growl +Hours +Poems +##AT +reduces +Plain +plague +canceled +detention +polite +necklace +Gustav +##gu +##lance +En +Angola +##bb +dwelling +##hea +5000 +Qing +Dodgers +rim +##ored +##haus +spilled +Elisabeth +Viktor +backpack +1802 +amended +##worthy +Phantom +##ctive +keeper +##loom +Vikings +##gua +employs +Tehran +specialty +##bate +Marx +Mirror +Jenna +rides +needle +prayers +clarinet +forewings +##walk +Midlands +convincing +advocacy +Cao +Birds +cycles +Clement +Gil +bubble +Maximum +humanitarian +Tan +cries +##SI +Parsons +Trio +offshore +Innovation +clutched +260 +##mund +##duct +Prairie +relied +Falcon +##ste +Kolkata +Gill +Swift +Negro +Zoo +valleys +##OL +Opening +beams +MPs +outline +Bermuda +Personal +exceed +productive +##MT +republic +forum +##sty +tornado +Known +dipped +Edith +folks +mathematician +watershed +Ricardo +synthetic +##dication +deity +##₄ +gaming +subjected +suspects +Foot +swollen +Motors +##tty +##ý +aloud +ceremonial +es +nuts +intend +Carlisle +tasked +hesitation +sponsors +unified +inmates +##ctions +##stan +tiles +jokes +whereby +outcomes +Lights +scary +Stoke +Portrait +Blind +sergeant +violations +cultivation +fuselage +Mister +Alfonso +candy +sticks +teen +agony +Enough +invite +Perkins +Appeal +mapping +undergo +Glacier +Melanie +affects +incomplete +##dd +Colombian +##nate +CBC +purchasing +bypass +Drug +Electronics +Frontier +Coventry +##aan +autonomy +scrambled +Recent +bounced +cow +experiencing +Rouge +cuisine +Elite +disability +Ji +inheritance +wildly +Into +##wig +confrontation +Wheeler +shiver +Performing +aligned +consequently +Alexis +Sin +woodland +executives +Stevenson +Ferrari +inevitable +##cist +##dha +##base +Corner +comeback +León +##eck +##urus +MacDonald +pioneering +breakdown +landscapes +Veterans +Rican +Theological +stirred +participant +Credit +Hyderabad +snails +Claudia +##ocene +compliance +##MI +Flags +Middlesex +storms +winding +asserted +er +##ault +##kal +waking +##rates +abbey +Augusta +tooth +trustees +Commodore +##uded +Cunningham +NC +Witch +marching +Sword +Same +spiral +Harley +##ahan +Zack +Audio +1890s +##fit +Simmons +Kara +Veronica +negotiated +Speaking +FIBA +Conservatory +formations +constituencies +explicit +facial +eleventh +##ilt +villain +##dog +##case +##hol +armored +tin +hairs +##umi +##rai +mattress +Angus +cease +verbal +Recreation +savings +Aurora +peers +Monastery +Airways +drowned +additions +downstream +sticking +Shi +mice +skiing +##CD +Raw +Riverside +warming +hooked +boost +memorable +posed +treatments +320 +##dai +celebrating +blink +helpless +circa +Flowers +PM +uncommon +Oct +Hawks +overwhelmed +Sparhawk +repaired +Mercy +pose +counterpart +compare +survives +##½ +##eum +coordinate +Lil +grandchildren +notorious +Yi +Judaism +Juliet +accusations +1789 +floated +marathon +roar +fortified +reunion +145 +Nov +Paula +##fare +##toria +tearing +Cedar +disappearance +Si +gifted +scar +270 +PBS +Technologies +Marvin +650 +roller +cupped +negotiate +##erman +passport +tram +miracle +styled +##tier +necessity +Des +rehabilitation +Lara +USD +psychic +wipe +##lem +mistaken +##lov +charming +Rider +pageant +dynamics +Cassidy +##icus +defenses +##tadt +##vant +aging +##inal +declare +mistress +supervised +##alis +##rest +Ashton +submerged +sack +Dodge +grocery +ramp +Teacher +lineage +imagery +arrange +inscriptions +Organisation +Siege +combines +pounded +Fleming +legends +columnist +Apostolic +prose +insight +Arabian +expired +##uses +##nos +Alone +elbows +##asis +##adi +##combe +Step +Waterloo +Alternate +interval +Sonny +plains +Goals +incorporating +recruit +adjoining +Cheshire +excluding +marrying +ducked +Cherokee +par +##inate +hiking +Coal +##bow +natives +ribbon +Allies +con +descriptions +positively +##lal +defendant +22nd +Vivian +##beat +Weather +possessions +Date +sweetheart +inability +Salisbury +adviser +ideology +Nordic +##eu +Cubs +IP +Administrative +##nick +facto +liberation +Burnett +Javier +fashioned +Electoral +Turin +theft +unanimous +Per +1799 +Clan +Hawkins +Teachers +##wes +Cameroon +Parkway +##gment +demolition +atoms +nucleus +##thi +recovering +##yte +##vice +lifts +Must +deposit +Hancock +Semi +darkened +Declaration +moan +muscular +Myers +attractions +sauce +simulation +##weed +Alps +barriers +##baum +Barack +galleries +Min +holders +Greenwich +donation +Everybody +Wolfgang +sandwich +Kendra +Collegiate +casino +Slavic +ensuing +Porto +##grapher +Jesuit +suppressed +tires +Ibrahim +protesters +Ibn +Amos +1796 +phenomena +Hayden +Paraguay +Squad +Reilly +complement +aluminum +##eers +doubts +decay +demise +Practice +patience +fireplace +transparent +monarchy +##person +Rodney +mattered +rotating +Clifford +disposal +Standards +paced +##llie +arise +tallest +tug +documentation +node +freeway +Nikolai +##cite +clicked +imaging +Lorraine +Tactical +Different +Regular +Holding +165 +Pilot +guarded +##polis +Classics +Mongolia +Brock +monarch +cellular +receptors +Mini +Chandler +financed +financially +Lives +erection +Fuller +unnamed +Kannada +cc +passive +plateau +##arity +freak +##rde +retrieved +transactions +##sus +23rd +swimmer +beef +fulfill +Arlington +offspring +reasoning +Rhys +saves +pseudonym +centimetres +shivered +shuddered +##ME +Feel +##otic +professors +Blackburn +##eng +##life +##haw +interred +lodge +fragile +Della +guardian +##bbled +catalog +clad +observer +tract +declaring +##headed +Lok +dean +Isabelle +1776 +irrigation +spectacular +shuttle +mastering +##aro +Nathaniel +Retired +##lves +Brennan +##kha +dick +##dated +##hler +Rookie +leapt +televised +weekends +Baghdad +Yemen +##fo +factions +ion +Lab +mortality +passionate +Hammer +encompasses +confluence +demonstrations +Ki +derivative +soils +##unch +Ranch +Universities +conventions +outright +aiming +hierarchy +reside +illusion +graves +rituals +126 +Antwerp +Dover +##ema +campuses +Hobart +lifelong +aliens +##vity +Memory +coordination +alphabet +##mina +Titans +pushes +Flanders +##holder +Normal +excellence +capped +profound +Taipei +portrayal +sparked +scratch +se +##eas +##hir +Mackenzie +##cation +Neo +Shin +##lined +magnificent +poster +batsman +##rgent +persuade +##ement +Icelandic +miserable +collegiate +Feature +geography +##mura +Comic +Circus +processor +barracks +Tale +##11 +Bulls +##rap +strengthened +##bell +injection +miniature +broadly +Letter +fare +hostage +traders +##nium +##mere +Fortune +Rivera +Lu +triumph +Browns +Bangalore +cooperative +Basel +announcing +Sawyer +##him +##cco +##kara +darted +##AD +##nova +sucking +##position +perimeter +flung +Holdings +##NP +Basque +sketches +Augustine +Silk +Elijah +analyst +armour +riots +acquiring +ghosts +##ems +132 +Pioneer +Colleges +Simone +Economy +Author +semester +Soldier +il +##unting +##bid +freaking +Vista +tumor +##bat +murderer +##eda +unreleased +##grove +##sser +##té +edit +statute +sovereign +##gawa +Killer +stares +Fury +comply +##lord +##nant +barrels +Andhra +Maple +generator +mascot +unusually +eds +##ante +##runner +rod +##tles +Historically +Jennings +dumped +Established +resemblance +##lium +##cise +##body +##voke +Lydia +##hou +##iring +nonetheless +1797 +corrupt +patrons +physicist +sneak +Livingston +Citizens +Architects +Werner +trends +Melody +eighty +markings +brakes +##titled +oversaw +processed +mock +Midwest +intervals +##EF +stretches +werewolf +##MG +Pack +controller +##dition +Honours +cane +Griffith +vague +repertoire +Courtney +orgasm +Abdullah +dominance +occupies +Ya +introduces +Lester +instinct +collaborative +Indigenous +refusal +##rank +outlet +debts +spear +155 +##keeping +##ulu +Catalan +##osh +tensions +##OT +bred +crude +Dunn +abdomen +accurately +##fu +##lough +accidents +Row +Audrey +rude +Getting +promotes +replies +Paolo +merge +##nock +trans +Evangelical +automated +Canon +##wear +##ggy +##gma +Broncos +foolish +icy +Voices +knives +Aside +dreamed +generals +molecule +AG +rejection +insufficient +##nagar +deposited +sacked +Landing +arches +helpful +devotion +intake +Flower +PGA +dragons +evolutionary +##mail +330 +GM +tissues +##tree +arcade +composite +lid +Across +implications +lacks +theological +assessed +concentrations +Den +##mans +##ulous +Fu +homeland +##stream +Harriet +ecclesiastical +troop +ecological +winked +##xed +eighteenth +Casino +specializing +##sworth +unlocked +supreme +devastated +snatched +trauma +GDP +Nord +saddle +Wes +convenient +competes +##nu +##iss +Marian +subway +##rri +successes +umbrella +##far +##ually +Dundee +##cence +spark +##rix +##я +Quality +Geological +cockpit +rpm +Cam +Bucharest +riot +##PM +Leah +##dad +##pose +Ka +m³ +Bundesliga +Wolfe +grim +textile +quartet +expressing +fantastic +destroyers +eternal +picnic +##oro +contractor +1775 +spanning +declining +##cating +Lowe +Sutherland +Emirates +downward +nineteen +violently +scout +viral +melting +enterprises +##cer +Crosby +Jubilee +antenna +urgent +Rory +##uin +##sure +wandering +##gler +##vent +Suzuki +Lifetime +Dirty +occupying +##quent +Disc +Guru +mound +Lennon +Humanities +listeners +Walton +uh +Braves +Bologna +##bis +##gra +Dwight +crawl +flags +memoir +Thorne +Archdiocese +dairy +##uz +##tery +roared +adjust +patches +inn +Knowing +##bbed +##zan +scan +Papa +precipitation +angrily +passages +postal +Phi +embraced +blacks +economist +triangular +Sen +shooter +punished +Millennium +Swimming +confessed +Aston +defeats +Era +cousins +Williamson +##rer +daytime +dumb +##rek +underway +specification +Buchanan +prayed +concealed +activation +##issa +canon +awesome +Starr +plural +summers +##fields +Slam +unnecessary +1791 +resume +trilogy +compression +##rough +selective +dignity +Yan +##xton +immense +##yun +lone +seeded +hiatus +lightweight +summary +Yo +approve +Galway +rejoined +Elise +garbage +burns +speeches +129 +Honduras +##liness +inventory +jersey +FK +assure +slumped +Lionel +Suite +##sbury +Lena +continuation +##AN +brightly +##nti +GT +Knowledge +##park +##lius +lethal +##tribution +##sions +Certificate +Mara +##lby +algorithms +Jade +blows +pirates +fleeing +wheelchair +Stein +sophomore +Alt +Territorial +diploma +snakes +##olic +##tham +Tiffany +Pius +flush +urging +Hanover +Reich +##olate +Unity +Pike +collectively +Theme +ballad +kindergarten +rocked +zoo +##page +whip +Rodríguez +strokes +checks +Becky +Stern +upstream +##uta +Silent +volunteered +Sigma +##ingen +##tract +##ede +Gujarat +screwed +entertaining +##action +##ryn +defenders +innocence +lesbian +que +Richie +nodes +Lie +juvenile +Jakarta +safer +confront +Bert +breakthrough +gospel +Cable +##zie +institutional +Archive +brake +liquor +feeds +##iate +chancellor +Encyclopedia +Animation +scanning +teens +##mother +Core +Rear +Wine +##flower +reactor +Ave +cardinal +sodium +strands +Olivier +crouched +Vaughan +Sammy +Image +scars +Emmanuel +flour +bias +nipple +revelation +##ucci +Denny +##ssy +Form +Runners +admits +Rama +violated +Burmese +feud +underwear +Mohamed +Named +swift +statewide +Door +Recently +comparing +Hundred +##idge +##nity +##rds +Rally +Reginald +Auburn +solving +waitress +Treasurer +##ilization +Halloween +Ministers +Boss +Shut +##listic +Rahman +demonstrating +##pies +Gaza +Yuri +installations +Math +schooling +##bble +Bronx +exiled +gasoline +133 +bundle +humid +FCC +proportional +relate +VFL +##dez +continuity +##cene +syndicated +atmospheric +arrows +Wanderers +reinforcements +Willow +Lexington +Rotten +##yon +discovering +Serena +portable +##lysis +targeting +£1 +Goodman +Steam +sensors +detachment +Malik +##erie +attitudes +Goes +Kendall +Read +Sleep +beans +Nikki +modification +Jeanne +knuckles +Eleven +##iously +Gross +Jaime +dioxide +moisture +Stones +UCI +displacement +Metacritic +Jury +lace +rendering +elephant +Sergei +##quire +GP +Abbott +##type +projection +Mouse +Bishops +whispering +Kathleen +Rams +##jar +whites +##oran +assess +dispatched +##hire +kin +##mir +Nursing +advocates +tremendous +sweater +assisting +##bil +Farmer +prominently +reddish +Hague +cyclone +##SD +Sage +Lawson +Sanctuary +discharged +retains +##ube +shotgun +wilderness +Reformed +similarity +Entry +Watts +Bahá +Quest +Looks +visions +Reservoir +Arabs +curls +Blu +dripping +accomplish +Verlag +drill +sensor +Dillon +physicians +smashed +##dir +painters +Renault +straw +fading +Directorate +lounge +commissions +Brain +##graph +neo +##urg +plug +coordinated +##houses +Critical +lamps +illustrator +Returning +erosion +Crow +##ciation +blessing +Thought +Wife +medalist +synthesizer +Pam +Thornton +Esther +HBO +fond +Associates +##raz +pirate +permits +Wide +tire +##PC +Ernie +Nassau +transferring +RFC +##ntly +um +spit +AS +##mps +Mining +polar +villa +anchored +##zzi +embarrassment +relates +##ă +Rupert +counterparts +131 +Baxter +##18 +Igor +recognizes +Clive +##hane +##eries +##ibly +occurrence +##scope +fin +colorful +Rapids +banker +tile +##rative +##dus +delays +destinations +##llis +Pond +Dane +grandparents +rewarded +socially +motorway +##hof +##lying +##human +modeled +Dayton +Forward +conscience +Sharma +whistle +Mayer +Sasha +##pical +circuits +Zhou +##ça +Latvian +finalists +predators +Lafayette +closes +obligations +Resolution +##vier +Trustees +reminiscent +##hos +Highlands +Protected +asylum +evacuation +##acy +Chevrolet +confession +Somalia +emergence +separating +##rica +alright +calcium +Laurent +Welfare +Leonardo +ashes +dental +Deal +minerals +##lump +##mount +accounted +staggered +slogan +photographic +builder +##imes +##raft +tragic +144 +SEC +Hit +tailed +##ples +##rring +##rson +ethical +wrestlers +concludes +lunar +##ept +nitrogen +Aid +cyclist +quarterfinals +##ه +harvest +##hem +Pasha +IL +##mis +continually +##forth +Intel +bucket +##ended +witches +pretended +dresses +viewer +peculiar +lowering +volcano +Marilyn +Qualifier +clung +##sher +Cut +modules +Bowie +##lded +onset +transcription +residences +##pie +##itor +scrapped +##bic +Monaco +Mayo +eternity +Strike +uncovered +skeleton +##wicz +Isles +bug +Promoted +##rush +Mechanical +XII +##ivo +gripping +stubborn +velvet +TD +decommissioned +operas +spatial +unstable +Congressman +wasted +##aga +##ume +advertisements +##nya +obliged +Cannes +Conway +bricks +##gnant +##mity +##uise +jumps +Clear +##cine +##sche +chord +utter +Su +podium +spokesman +Royce +assassin +confirmation +licensing +liberty +##rata +Geographic +individually +detained +##ffe +Saturn +crushing +airplane +bushes +knights +##PD +Lilly +hurts +unexpectedly +Conservatives +pumping +Forty +candle +Pérez +peasants +supplement +Sundays +##ggs +##rries +risen +enthusiastic +corresponds +pending +##IF +Owens +floods +Painter +inflation +presumed +inscribed +Chamberlain +bizarre +1200 +liability +reacted +tub +Legacy +##eds +##pted +shone +##litz +##NC +Tiny +genome +bays +Eduardo +robbery +stall +hatch +Depot +Variety +Flora +reprinted +trembled +outlined +CR +Theresa +spans +##plication +Jensen +##eering +posting +##rky +pays +##ost +Marcos +fortifications +inferior +##ential +Devi +despair +Talbot +##chus +updates +ego +Booth +Darius +tops +##lau +Scene +##DC +Harlem +Trey +Generally +candles +##α +Neville +Admiralty +##hong +iconic +victorious +1600 +Rowan +abundance +miniseries +clutching +sanctioned +##words +obscure +##ision +##rle +##EM +disappearing +Resort +Obviously +##eb +exceeded +1870s +Adults +##cts +Cry +Kerr +ragged +selfish +##lson +circled +pillars +galaxy +##asco +##mental +rebuild +caution +Resistance +Start +bind +splitting +Baba +Hogan +ps +partnerships +slam +Peggy +courthouse +##OD +organizational +packages +Angie +##nds +possesses +##rp +Expressway +Gould +Terror +Him +Geoff +nobles +##ope +shark +##nh +identifies +##oor +testified +Playing +##ump +##isa +stool +Idol +##pice +##tana +Byrne +Gerry +grunted +26th +observing +habits +privilege +immortal +wagons +##thy +dot +Bring +##lian +##witz +newest +##uga +constraints +Screen +Issue +##RNA +##vil +reminder +##gles +addiction +piercing +stunning +var +##rita +Signal +accumulated +##wide +float +devastating +viable +cartoons +Uttar +flared +##encies +Theology +patents +##bahn +privileges +##ava +##CO +137 +##oped +##NT +orchestral +medication +225 +erect +Nadia +École +fried +Sales +scripts +##rease +airs +Cage +inadequate +structured +countless +Avengers +Kathy +disguise +mirrors +Investigation +reservation +##nson +Legends +humorous +Mona +decorations +attachment +Via +motivation +Browne +strangers +##ński +Shadows +Twins +##pressed +Alma +Nominated +##ott +Sergio +canopy +152 +Semifinals +devised +##irk +upwards +Traffic +Goddess +Move +beetles +138 +spat +##anne +holdings +##SP +tangled +Whilst +Fowler +anthem +##ING +##ogy +snarled +moonlight +songwriting +tolerance +Worlds +exams +##pia +notices +sensitivity +poetic +Stephens +Boone +insect +reconstructed +Fresh +27th +balloon +##ables +Brendan +mug +##gee +1780 +apex +exports +slides +Lahore +hiring +Shell +electorate +sexuality +poker +nonprofit +##imate +cone +##uce +Okinawa +superintendent +##HC +referenced +turret +Sprint +Citizen +equilibrium +Stafford +curb +Driver +Valerie +##rona +aching +impacts +##bol +observers +Downs +Shri +##uth +airports +##uda +assignments +curtains +solitary +icon +patrols +substances +Jasper +mountainous +Published +ached +##ingly +announce +dove +damaging +##tism +Primera +Dexter +limiting +batch +##uli +undergoing +refugee +Ye +admiral +pavement +##WR +##reed +pipeline +desires +Ramsey +Sheila +thickness +Brotherhood +Tea +instituted +Belt +Break +plots +##ais +masculine +##where +Theo +##aged +##mined +Experience +scratched +Ethiopian +Teaching +##nov +Aiden +Abe +Samoa +conditioning +##mous +Otherwise +fade +Jenks +##encing +Nat +##lain +Anyone +##kis +smirk +Riding +##nny +Bavarian +blessed +potatoes +Hook +##wise +likewise +hardened +Merry +amid +persecution +##sten +Elections +Hoffman +Pitt +##vering +distraction +exploitation +infamous +quote +averaging +healed +Rhythm +Germanic +Mormon +illuminated +guides +##ische +interfere +##ilized +rector +perennial +##ival +Everett +courtesy +##nham +Kirby +Mk +##vic +Medieval +##tale +Luigi +limp +##diction +Alive +greeting +shove +##force +##fly +Jasmine +Bend +Capt +Suzanne +ditch +134 +##nning +Host +fathers +rebuilding +Vocal +wires +##manship +tan +Factor +fixture +##LS +Māori +Plate +pyramid +##umble +slap +Schneider +yell +##ulture +##tional +Goodbye +sore +##pher +depressed +##dox +pitching +Find +Lotus +##wang +strand +Teen +debates +prevalent +##bilities +exposing +hears +billed +##rse +reorganized +compelled +disturbing +displaying +##tock +Clinical +emotionally +##iah +Derbyshire +grouped +##quel +Bahrain +Journalism +IN +persistent +blankets +Crane +camping +Direct +proving +Lola +##dding +Corporate +birthplace +##boats +##ender +Figure +dared +Assam +precursor +##nched +Tribe +Restoration +slate +Meyrick +hunted +stroking +Earlier +Kind +polls +appeals +monetary +##reate +Kira +Langdon +explores +GPS +extensions +squares +Results +draped +announcer +merit +##ennial +##tral +##roved +##cion +robots +supervisor +snorted +##group +Cannon +procession +monkey +freeze +sleeves +Nile +verdict +ropes +firearms +extraction +tensed +EC +Saunders +##tches +diamonds +Marriage +##amble +curling +Amazing +##haling +unrelated +##roads +Daughter +cum +discarded +kidney +cliffs +forested +Candy +##lap +authentic +tablet +notation +##nburg +Bulldogs +Callum +Meet +mouths +coated +##xe +Truman +combinations +##mation +Steelers +Fan +Than +paternal +##father +##uti +Rebellion +inviting +Fun +theatres +##ي +##rom +curator +##cision +networking +Oz +drought +##ssel +granting +MBA +Shelby +Elaine +jealousy +Kyoto +shores +signaling +tenants +debated +Intermediate +Wise +##hes +##pu +Havana +duke +vicious +exited +servers +Nonetheless +Reports +explode +##beth +Nationals +offerings +Oval +conferred +eponymous +folklore +##NR +Shire +planting +1783 +Zeus +accelerated +Constable +consuming +troubles +McCartney +texture +bust +Immigration +excavated +hopefully +##cession +##coe +##name +##ully +lining +Einstein +Venezuelan +reissued +minorities +Beatrice +crystals +##nies +circus +lava +Beirut +extinction +##shu +Becker +##uke +issuing +Zurich +extract +##esta +##rred +regulate +progression +hut +alcoholic +plea +AB +Norse +Hubert +Mansfield +ashamed +##put +Bombardment +stripes +electrons +Denise +horrified +Nor +arranger +Hay +Koch +##ddling +##iner +Birthday +Josie +deliberate +explorer +##jiang +##signed +Arrow +wiping +satellites +baritone +mobility +##rals +Dorset +turbine +Coffee +185 +##lder +Cara +Colts +pits +Crossing +coral +##birth +Tai +zombie +smoothly +##hp +mates +##ady +Marguerite +##tary +puzzled +tapes +overly +Sonic +Prayer +Thinking +##uf +IEEE +obligation +##cliffe +Basil +redesignated +##mmy +nostrils +Barney +XIII +##phones +vacated +unused +Berg +##roid +Towards +viola +136 +Event +subdivided +rabbit +recruiting +##nery +Namibia +##16 +##ilation +recruits +Famous +Francesca +##hari +Goa +##lat +Karachi +haul +biblical +##cible +MGM +##rta +horsepower +profitable +Grandma +importantly +Martinez +incoming +##kill +beneficial +nominal +praying +##isch +gable +nail +noises +##ttle +Polytechnic +rub +##cope +Thor +audition +erotic +##ending +##iano +Ultimately +armoured +##mum +presently +pedestrian +##tled +Ipswich +offence +##ffin +##borne +Flemish +##hman +echo +##cting +auditorium +gentlemen +winged +##tched +Nicaragua +Unknown +prosperity +exhaust +pie +Peruvian +compartment +heights +disabilities +##pole +Harding +Humphrey +postponed +moths +Mathematical +Mets +posters +axe +##nett +Nights +Typically +chuckle +councillors +alternating +141 +Norris +##ately +##etus +deficit +dreaming +cooler +oppose +Beethoven +##esis +Marquis +flashlight +headache +investor +responding +appointments +##shore +Elias +ideals +shades +torch +lingering +##real +pier +fertile +Diploma +currents +Snake +##horse +##15 +Briggs +##ota +##hima +##romatic +Coastal +Kuala +ankles +Rae +slice +Hilton +locking +Approximately +Workshop +Niagara +strangely +##scence +functionality +advertisement +Rapid +Anders +ho +Soviets +packing +basal +Sunderland +Permanent +##fting +rack +tying +Lowell +##ncing +Wizard +mighty +tertiary +pencil +dismissal +torso +grasped +##yev +Sand +gossip +##nae +Beer +implementing +##19 +##riya +Fork +Bee +##eria +Win +##cid +sailor +pressures +##oping +speculated +Freddie +originating +##DF +##SR +##outh +28th +melt +Brenda +lump +Burlington +USC +marginal +##bine +Dogs +swamp +cu +Ex +uranium +metro +spill +Pietro +seize +Chorus +partition +##dock +##media +engineered +##oria +conclusions +subdivision +##uid +Illustrated +Leading +##hora +Berkshire +definite +##books +##cin +##suke +noun +winced +Doris +dissertation +Wilderness +##quest +braced +arbitrary +kidnapping +Kurdish +##but +clearance +excavations +wanna +Allmusic +insult +presided +yacht +##SM +Honour +Tin +attracting +explosives +Gore +Bride +##ience +Packers +Devils +Observer +##course +Loser +##erry +##hardt +##mble +Cyrillic +undefeated +##stra +subordinate +##ame +Wigan +compulsory +Pauline +Cruise +Opposition +##ods +Period +dispersed +expose +##60 +##has +Certain +Clerk +Wolves +##hibition +apparatus +allegiance +orbital +justified +thanked +##ević +Biblical +Carolyn +Graves +##tton +Hercules +backgrounds +replica +1788 +aquatic +Mega +Stirling +obstacles +filing +Founder +vowels +Deborah +Rotterdam +surpassed +Belarusian +##ologists +Zambia +Ren +Olga +Alpine +bi +councillor +Oaks +Animals +eliminating +digit +Managing +##GE +laundry +##rdo +presses +slamming +Tudor +thief +posterior +##bas +Rodgers +smells +##ining +Hole +SUV +trombone +numbering +representations +Domingo +Paralympics +cartridge +##rash +Combined +shelves +Kraków +revision +##frame +Sánchez +##tracted +##bler +Alain +townships +sic +trousers +Gibbs +anterior +symmetry +vaguely +Castile +IRA +resembling +Penguin +##ulent +infections +##stant +raped +##pressive +worrying +brains +bending +JR +Evidence +Venetian +complexes +Jonah +850 +exported +Ambrose +Gap +philanthropist +##atus +Marxist +weighing +##KO +##nath +Soldiers +chiefs +reject +repeating +shaky +Zürich +preserving +##xin +cigarettes +##break +mortar +##fin +Already +reproduction +socks +Waiting +amazed +##aca +dash +##path +Airborne +##harf +##get +descending +OBE +Sant +Tess +Lucius +enjoys +##ttered +##ivation +##ete +Leinster +Phillies +execute +geological +unfinished +Courts +SP +Beaver +Duck +motions +Platinum +friction +##aud +##bet +Parts +Stade +entirety +sprang +Smithsonian +coffin +prolonged +Borneo +##vise +unanimously +##uchi +Cars +Cassandra +Australians +##CT +##rgen +Louisa +spur +Constance +##lities +Patent +racism +tempo +##ssion +##chard +##nology +##claim +Million +Nichols +##dah +Numerous +ing +Pure +plantations +donor +##EP +##rip +convenience +##plate +dots +indirect +##written +Dong +failures +adapt +wizard +unfortunately +##gion +practitioners +economically +Enrique +unchanged +kingdoms +refined +definitions +lazy +worries +railing +##nay +Kaiser +##lug +cracks +sells +ninety +##WC +Directed +denotes +developmental +papal +unfortunate +disappointing +sixteenth +Jen +##urier +NWA +drifting +Horror +##chemical +behaviors +bury +surfaced +foreigners +slick +AND +##rene +##ditions +##teral +scrap +kicks +comprise +buddy +##anda +Mental +##ype +Dom +wines +Limerick +Luca +Rand +##won +Tomatoes +homage +geometric +##nted +telescope +Shelley +poles +##fan +shareholders +Autonomous +cope +intensified +Genoa +Reformation +grazing +##tern +Zhao +provisional +##bies +Con +##riel +Cynthia +Raleigh +vivid +threaten +Length +subscription +roses +Müller +##isms +robin +##tial +Laos +Stanton +nationalism +##clave +##ND +##17 +##zz +staging +Busch +Cindy +relieve +##spective +packs +neglected +CBE +alpine +Evolution +uneasy +coastline +Destiny +Barber +Julio +##tted +informs +unprecedented +Pavilion +##bei +##ference +betrayal +awaiting +leaked +V8 +puppet +adverse +Bourne +Sunset +collectors +##glass +##sque +copied +Demon +conceded +resembled +Rafe +Levy +prosecutor +##ject +flora +manned +deaf +Mosque +reminds +Lizzie +Products +Funny +cassette +congress +##rong +Rover +tossing +prompting +chooses +Satellite +cautiously +Reese +##UT +Huang +Gloucestershire +giggled +Kitty +##å +Pleasant +Aye +##ond +judging +1860s +intentionally +Hurling +aggression +##xy +transfers +employing +##fies +##oda +Archibald +Blessed +Ski +flavor +Rosie +##burgh +sunset +Scholarship +WC +surround +ranged +##jay +Degree +Houses +squeezing +limb +premium +Leningrad +steals +##inated +##ssie +madness +vacancy +hydraulic +Northampton +##prise +Marks +Boxing +##fying +academics +##lich +##TY +CDs +##lma +hardcore +monitors +paperback +cables +Dimitri +upside +advent +Ra +##clusive +Aug +Christchurch +objected +stalked +Simple +colonists +##laid +CT +discusses +fellowship +Carnival +cares +Miracle +pastoral +rooted +shortage +borne +Quentin +meditation +tapping +Novel +##ades +Alicia +Burn +famed +residency +Fernández +Johannesburg +Zhu +offended +Mao +outward +##inas +XV +denial +noticing +##ís +quarry +##hound +##amo +Bernie +Bentley +Joanna +mortgage +##rdi +##sumption +lenses +extracted +depiction +##RE +Networks +Broad +Revenue +flickered +virgin +flanked +##о +Enterprises +probable +Liberals +Falcons +drowning +phrases +loads +assumes +inhaled +awe +logs +slightest +spiders +waterfall +##pate +rocking +shrub +##uil +roofs +##gard +prehistoric +wary +##rak +TO +clips +sustain +treason +microphone +voter +Lamb +psychologist +wrinkled +##ères +mating +Carrier +340 +##lbert +sensing +##rino +destiny +distract +weaker +UC +Nearly +neurons +spends +Apache +##rem +genuinely +wells +##lanted +stereo +##girl +Lois +Leaving +consul +fungi +Pier +Cyril +80s +Jungle +##tani +illustration +Split +##hana +Abigail +##patrick +1787 +diminished +Selected +packaging +##EG +Martínez +communal +Manufacturing +sentiment +143 +unwilling +praising +Citation +pills +##iti +##rax +muffled +neatly +workforce +Yep +leisure +Tu +##nding +Wakefield +ancestral +##uki +destructive +seas +Passion +showcase +##ceptive +heroic +142 +exhaustion +Customs +##aker +Scholar +sliced +##inian +Direction +##OW +Swansea +aluminium +##eep +ceramic +McCoy +Career +Sector +chartered +Damascus +pictured +Interest +stiffened +Plateau +obsolete +##tant +irritated +inappropriate +overs +##nko +bail +Talent +Sur +ours +##nah +barred +legged +sociology +Bud +dictionary +##luk +Cover +obey +##oring +annoying +##dong +apprentice +Cyrus +Role +##GP +##uns +##bag +Greenland +Porsche +Rocket +##32 +organism +##ntary +reliability +##vocation +##й +Found +##hine +motors +promoter +unfair +##oms +##note +distribute +eminent +rails +appealing +chiefly +meaningful +Stephan +##rehension +Consumer +psychiatric +bowler +saints +##iful +##н +1777 +Pol +Dorian +Townsend +hastily +##jima +Quincy +Sol +fascinated +Scarlet +alto +Avon +certainty +##eding +Keys +##chu +Chu +##VE +ions +tributaries +Thanksgiving +##fusion +astronomer +oxide +pavilion +Supply +Casa +Bollywood +sadly +mutations +Keller +##wave +nationals +##rgo +##ym +predict +Catholicism +Vega +##eration +##ums +Mali +tuned +Lankan +Plans +radial +Bosnian +Lexi +##14 +##ü +sacks +unpleasant +Empty +handles +##taking +Bon +switches +intently +tuition +antique +##jk +fraternity +notebook +Desmond +##sei +prostitution +##how +deed +##OP +501 +Somewhere +Rocks +##mons +campaigned +frigate +gases +suppress +##hang +Merlin +Northumberland +dominate +expeditions +thunder +##ups +##rical +Cap +thorough +Ariel +##kind +renewable +constructing +pacing +terrorists +Bowen +documentaries +westward +##lass +##nage +Merchant +##ued +Beaumont +Din +##hian +Danube +peasant +Garrison +encourages +gratitude +reminding +stormed +##ouse +pronunciation +##ailed +Weekend +suggestions +##ffing +##DI +Active +Colombo +##logists +Merrill +##cens +Archaeological +Medina +captained +##yk +duel +cracking +Wilkinson +Guam +pickup +renovations +##ël +##izer +delighted +##iri +Weaver +##ctional +tens +##hab +Clint +##usion +##each +petals +Farrell +##sable +caste +##will +Ezra +##qi +##standing +thrilled +ambush +exhaled +##SU +Resource +blur +forearm +specifications +contingent +cafe +##iology +Antony +fundraising +grape +##rgy +turnout +##udi +Clifton +laboratories +Irvine +##opus +##lid +Monthly +Bihar +statutory +Roses +Emil +##rig +lumber +optimal +##DR +pumps +plaster +Mozambique +##aco +nightclub +propelled +##hun +ked +surplus +wax +##urai +pioneered +Sunny +imprint +Forget +Eliot +approximate +patronage +##bek +##ely +##mbe +Partnership +curl +snapping +29th +Patriarch +##jord +seldom +##ature +astronomy +Bremen +XIV +airborne +205 +1778 +recognizing +stranded +arrogant +bombardment +destined +ensured +146 +robust +Davenport +Interactive +Offensive +Fi +prevents +probe +propeller +sorrow +Blade +mounting +automotive +##dged +wallet +201 +lashes +Forrest +##ift +Cell +Younger +shouts +##cki +folds +##chet +Epic +yields +homosexual +tunes +##minate +##text +Manny +chemist +hindwings +##urn +pilgrimage +##sfield +##riff +MLS +##rive +Huntington +translates +Path +slim +##ndra +##oz +climax +commuter +desperation +##reet +denying +##rious +daring +seminary +polo +##clamation +Teatro +Torah +Cats +identities +Poles +photographed +fiery +popularly +##cross +winters +Hesse +##vio +Nurse +Senegal +Salon +prescribed +justify +##gues +##и +##orted +HQ +##hiro +evaluated +momentarily +##unts +Debbie +##licity +##TP +Mighty +Rabbit +##chal +Events +Savoy +##ht +Brandenburg +Bordeaux +##laus +Release +##IE +##kowski +1900s +SK +Strauss +##aly +Sonia +Updated +synagogue +McKay +flattened +370 +clutch +contests +toast +evaluate +pope +heirs +jam +tutor +reverted +##ading +nonsense +hesitate +Lars +Ceylon +Laurie +##guchi +accordingly +customary +148 +Ethics +Multiple +instincts +IGN +##ä +bullshit +##hit +##par +desirable +##ducing +##yam +alias +ashore +licenses +##lification +misery +147 +Cola +assassinated +fiercely +##aft +las +goat +substrate +lords +Cass +Bridges +ICC +lasts +sights +reproductive +##asi +Ivory +Clean +fixing +##lace +seeming +aide +1850s +harassment +##FF +##LE +reasonably +##coat +##cano +NYC +1784 +Fifty +immunity +Canadians +Cheng +comforting +meanwhile +##tera +##blin +breeds +glowed +##vour +Aden +##verted +##aded +##oral +neat +enforced +poisoning +##ews +##hone +enforce +predecessors +survivor +Month +unfamiliar +pierced +waived +dump +responds +Mai +Declan +angular +Doesn +interpretations +##yar +invest +Dhaka +policeman +Congregation +Eighth +painfully +##este +##vior +Württemberg +##cles +blockade +encouragement +##fie +Caucasus +Malone +Universidad +utilize +Nissan +inherent +151 +agreeing +syllable +determines +Protocol +conclude +##gara +40th +Xu +Taiwanese +##ather +boiler +printer +Lacey +titular +Klaus +Fallon +Wembley +fox +Chandra +Governorate +obsessed +##Ps +micro +##25 +Cooke +gymnasium +weaving +Shall +Hussein +glaring +softball +Reader +Dominion +Trouble +varsity +Cooperation +Chaos +Kang +Kramer +Eisenhower +proves +Connie +consortium +governors +Bethany +opener +Normally +Willy +linebacker +Regent +Used +AllMusic +Twilight +##shaw +Companion +Tribunal +simpler +##gam +Experimental +Slovenian +cellar +deadline +trout +Hubbard +ads +idol +##hetto +Granada +clues +salmon +1700 +Omega +Caldwell +softened +Bills +Honolulu +##gn +Terrace +suitcase +##IL +frantic +##oons +Abbot +Sitting +Fortress +Riders +sickness +enzymes +trustee +Bern +forged +##13 +##ruff +##rl +##versity +inspector +champagne +##held +##FI +hereditary +Taliban +handball +##wine +Sioux +##dicated +honoured +139 +##tude +Skye +meanings +##rkin +cardiac +analyzed +vegetable +##FS +Royals +dial +freelance +##fest +partisan +petroleum +ridden +Lincolnshire +panting +##comb +presidents +Haley +##chs +contributes +Jew +discoveries +panicked +Woody +eyelids +Fate +Tulsa +mg +whiskey +zombies +Wii +##udge +investigators +##bull +centred +##screen +Bone +Lana +##oise +forts +##ske +Conan +Lyons +##writing +SH +##ride +rhythmic +154 +##llah +pioneers +##bright +captivity +Sanchez +Oman +##mith +Flint +Platform +##ioned +emission +packet +Persia +##formed +takeover +tempted +Vance +Few +Toni +receptions +##ن +exchanges +Camille +whale +Chronicles +##rent +##ushing +##rift +Alto +Genus +##asing +onward +foremost +longing +Rockefeller +containers +##cribe +intercepted +##olt +pleading +Bye +bee +##umbling +153 +undertake +Izzy +cheaper +Ultra +validity +##pse +Sa +hovering +##pert +vintage +engraved +##rise +farmland +##ever +##ifier +Atlantis +propose +Catalonia +plunged +##edly +demonstrates +gig +##cover +156 +Osborne +cowboy +herd +investigator +loops +Burning +rests +Instrumental +embarrassing +focal +install +readings +swirling +Chatham +parameter +##zin +##holders +Mandarin +Moody +converting +Escape +warnings +##chester +incarnation +##ophone +adopting +##lins +Cromwell +##laws +Axis +Verde +Kappa +Schwartz +Serbs +caliber +Wanna +Chung +##ality +nursery +principally +Bulletin +likelihood +logging +##erty +Boyle +supportive +twitched +##usive +builds +Marseille +omitted +motif +Lands +##lusion +##ssed +Barrow +Airfield +Harmony +WWF +endured +merging +convey +branding +examinations +167 +Italians +##dh +dude +1781 +##teau +crawling +thoughtful +clasped +concluding +brewery +Moldova +Wan +Towers +Heidelberg +202 +##ict +Lagos +imposing +##eval +##serve +Bacon +frowning +thirteenth +conception +calculations +##ович +##mile +##ivated +mutation +strap +##lund +demographic +nude +perfection +stocks +##renched +##dit +Alejandro +bites +fragment +##hack +##rchy +GB +Surgery +Berger +punish +boiling +consume +Elle +Sid +Dome +relies +Crescent +treasurer +Bloody +1758 +upheld +Guess +Restaurant +signatures +font +millennium +mural +stakes +Abel +hailed +insists +Alumni +Breton +##jun +digits +##FM +##thal +Talking +motive +reigning +babe +masks +##ø +Shaun +potato +sour +whitish +Somali +##derman +##rab +##wy +chancel +telecommunications +Noise +messenger +tidal +grinding +##ogenic +Rebel +constituent +peripheral +recruitment +##ograph +##tler +pumped +Ravi +poked +##gley +Olive +diabetes +discs +liking +sting +fits +stir +Mari +Sega +creativity +weights +Macau +mandated +Bohemia +disastrous +Katrina +Baku +Rajasthan +waiter +##psis +Siberia +verbs +##truction +patented +1782 +##ndon +Relegated +Hunters +Greenwood +Shock +accusing +skipped +Sessions +markers +subset +monumental +Viola +comparative +Alright +Barbados +setup +Session +standardized +##ík +##sket +appoint +AFB +Nationalist +##WS +Troop +leaped +Treasure +goodness +weary +originates +100th +compassion +expresses +recommend +168 +composing +seventeenth +Tex +Atlético +bald +Finding +Presidency +Sharks +favoured +inactive +##lter +suffix +princes +brighter +##ctus +classics +defendants +culminated +terribly +Strategy +evenings +##ção +##iver +##urance +absorb +##rner +Territories +RBI +soothing +Martín +concurrently +##tr +Nicholson +fibers +swam +##oney +Allie +Algerian +Dartmouth +Mafia +##bos +##tts +Councillor +vocabulary +##bla +##lé +intending +##dler +Guerrero +sunshine +pedal +##TO +administrators +periodic +scholarships +Loop +Madeline +exaggerated +##ressed +Regan +##cellular +Explorer +##oids +Alexandre +vows +Reporter +Unable +Average +absorption +##bedience +Fortunately +Auxiliary +Grandpa +##HP +##ovo +potent +temporal +adrenaline +##udo +confusing +guiding +Dry +qualifications +joking +wherein +heavyweight +##ices +nightmares +pharmaceutical +Commanding +##aled +##ove +Gregor +##UP +censorship +degradation +glorious +Austro +##rench +380 +Miriam +sped +##orous +offset +##KA +fined +specialists +Pune +João +##dina +propped +fungus +##ς +frantically +Gabrielle +Hare +committing +##plied +Ask +Wilmington +stunt +numb +warmer +preacher +earnings +##lating +integer +##ija +federation +homosexuality +##cademia +epidemic +grumbled +shoving +Milk +Satan +Tobias +innovations +##dington +geology +memoirs +##IR +spared +culminating +Daphne +Focus +severed +stricken +Paige +Mans +flats +Russo +communes +litigation +strengthening +##powered +Staffordshire +Wiltshire +Painting +Watkins +##د +specializes +Select +##rane +##aver +Fulton +playable +##VN +openings +sampling +##coon +##21 +Allah +travelers +allocation +##arily +Loch +##hm +commentators +fulfilled +##troke +Emeritus +Vanderbilt +Vijay +pledged +##tative +diagram +drilling +##MD +##plain +Edison +productivity +31st +##rying +##ption +##gano +##oration +##bara +posture +bothering +platoon +politely +##inating +redevelopment +Job +##vale +stark +incorrect +Mansion +renewal +threatens +Bahamas +fridge +##tata +Uzbekistan +##edia +Sainte +##mio +gaps +neural +##storm +overturned +Preservation +shields +##ngo +##physics +ah +gradual +killings +##anza +consultation +premiership +Felipe +coincidence +##ène +##any +Handbook +##loaded +Edit +Guns +arguably +##ş +compressed +depict +seller +##qui +Kilkenny +##kling +Olympia +librarian +##acles +dramas +JP +Kit +Maj +##lists +proprietary +##nged +##ettes +##tok +exceeding +Lock +induction +numerical +##vist +Straight +foyer +imaginary +##pop +violinist +Carla +bouncing +##ashi +abolition +##uction +restoring +scenic +##č +Doom +overthrow +para +##vid +##ughty +Concord +HC +cocaine +deputies +##aul +visibility +##wart +Kapoor +Hutchinson +##agan +flashes +kn +decreasing +##ronology +quotes +vain +satisfying +##iam +##linger +310 +Hanson +fauna +##zawa +##rrel +Trenton +##VB +Employment +vocational +Exactly +bartender +butterflies +tow +##chers +##ocks +pigs +merchandise +##game +##pine +Shea +##gration +Connell +Josephine +monopoly +##dled +Cobb +warships +cancellation +someday +stove +##Cs +candidacy +superhero +unrest +Toulouse +admiration +undergone +whirled +Reconnaissance +costly +##ships +290 +Cafe +amber +Tory +##mpt +definitive +##dress +proposes +redesigned +acceleration +##asa +##raphy +Presley +exits +Languages +##cel +Mode +spokesperson +##tius +Ban +forthcoming +grounded +ACC +compelling +logistics +retailers +abused +##gating +soda +##yland +##lution +Landmark +XVI +blush +##tem +hurling +dread +Tobago +Foley +##uad +scenarios +##mentation +##rks +Score +fatigue +hairy +correspond +##iard +defences +confiscated +##rudence +1785 +Formerly +Shot +advertised +460 +Text +ridges +Promise +Dev +exclusion +NHS +tuberculosis +rockets +##offs +sparkling +256 +disappears +mankind +##hore +HP +##omo +taxation +Multi +DS +Virgil +##ams +Dell +stacked +guessing +Jump +Nope +cheer +hates +ballots +overlooked +analyses +Prevention +maturity +dos +##cards +##lect +Mare +##yssa +Petty +##wning +differing +iOS +##ior +Joachim +Sentinel +##nstein +90s +Pamela +480 +Asher +##lary +Vicente +landings +portray +##rda +##xley +Virtual +##uary +finances +Jain +Somebody +Tri +behave +Michele +##ider +dwellings +FAA +Gallagher +##lide +Monkey +195 +aforementioned +##rism +##bey +##kim +##puted +Mesa +hopped +unopposed +recipients +Reality +Been +gritted +149 +playground +pillar +##rone +Guinness +##tad +Théâtre +depended +Tipperary +Reuben +frightening +wooded +Target +globally +##uted +Morales +Baptiste +drunken +Institut +characterised +##chemistry +Strip +discrete +Premiership +##zzling +gazing +Outer +##quisition +Sikh +Booker +##yal +contemporaries +Jericho +##chan +##physical +##witch +Militia +##rez +##zard +dangers +##utter +##₀ +Programs +darling +participates +railroads +##ienne +behavioral +bureau +##rook +161 +Hicks +##rises +Comes +inflicted +bees +kindness +norm +##ković +generators +##pard +##omy +##ili +methodology +Alvin +façade +latitude +##plified +DE +Morse +##mered +educate +intersects +##MF +##cz +##vated +AL +##graded +##fill +constitutes +artery +feudal +avant +cautious +##ogue +immigrated +##chenko +Saul +Clinic +Fang +choke +Cornelius +flexibility +temperate +pins +##erson +oddly +inequality +157 +Natasha +Sal +##uter +215 +aft +blinking +##ntino +northward +Exposition +cookies +Wedding +impulse +Overseas +terrifying +##ough +Mortimer +##see +440 +https +og +imagining +##cars +Nicola +exceptionally +threads +##cup +Oswald +Provisional +dismantled +deserves +1786 +Fairy +discourse +Counsel +departing +Arc +guarding +##orse +420 +alterations +vibrant +Em +squinted +terrace +rowing +Led +accessories +SF +Sgt +cheating +Atomic +##raj +Blackpool +##iary +boarded +substituted +bestowed +lime +kernel +##jah +Belmont +shaken +sticky +retrospective +Louie +migrants +weigh +sunglasses +thumbs +##hoff +excavation +##nks +Extra +Polo +motives +Drum +infrared +tastes +berth +verge +##stand +programmed +warmed +Shankar +Titan +chromosome +cafeteria +dividing +pepper +CPU +Stevie +satirical +Nagar +scowled +Died +backyard +##gata +##reath +##bir +Governors +portraying +##yah +Revenge +##acing +1772 +margins +Bahn +OH +lowland +##razed +catcher +replay +##yoshi +Seriously +##licit +Aristotle +##ald +Habsburg +weekday +Secretariat +CO +##dly +##joy +##stad +litre +ultra +##cke +Mongol +Tucson +correlation +compose +traps +Groups +Hai +Salvatore +##dea +cents +##eese +concession +clash +Trip +Panzer +Moroccan +cruisers +torque +Ba +grossed +##arate +restriction +concentrating +FDA +##Leod +##ones +Scholars +##esi +throbbing +specialised +##heses +Chicken +##fia +##ificant +Erich +Residence +##trate +manipulation +namesake +##tom +Hoover +cue +Lindsey +Lonely +275 +##HT +combustion +subscribers +Punjabi +respects +Jeremiah +penned +##gor +##rilla +suppression +##tration +Crimson +piston +Derry +crimson +lyrical +oversee +portrays +CF +Districts +Lenin +Cora +searches +clans +VHS +##hel +Jacqueline +Redskins +Clubs +desktop +indirectly +alternatives +marijuana +suffrage +##smos +Irwin +##liff +Process +##hawks +Sloane +##bson +Sonata +yielded +Flores +##ares +armament +adaptations +integrate +neighbours +shelters +##tour +Skinner +##jet +##tations +1774 +Peterborough +##elles +ripping +Liang +Dickinson +charities +Rwanda +monasteries +crossover +racist +barked +guerrilla +##ivate +Grayson +##iques +##vious +##got +Rolls +denominations +atom +affinity +##delity +Wish +##inted +##inae +interrogation +##cey +##erina +##lifting +192 +Sands +1779 +mast +Likewise +##hyl +##oft +contempt +##por +assaulted +fills +establishments +Mal +consulted +##omi +##sight +greet +##roma +##egan +Pulitzer +##rried +##dius +##ractical +##voked +Hasan +CB +##zzy +Romanesque +Panic +wheeled +recorder +##tters +##warm +##gly +botanist +Balkan +Lockheed +Polly +farewell +suffers +purchases +Eaton +##80 +Quick +commenting +Saga +beasts +hides +motifs +##icks +Alonso +Springer +Wikipedia +circulated +encoding +jurisdictions +snout +UAE +Integrated +unmarried +Heinz +##lein +##figured +deleted +##tley +Zen +Cycling +Fuel +Scandinavian +##rants +Conner +reef +Marino +curiously +lingered +Gina +manners +activism +Mines +Expo +Micah +promotions +Server +booked +derivatives +eastward +detailing +reelection +##chase +182 +Campeonato +Po +158 +Peel +winger +##itch +canyon +##pit +LDS +A1 +##shin +Giorgio +pathetic +##rga +##mist +Aren +##lag +confronts +motel +textbook +shine +turbines +1770 +Darcy +##cot +Southeastern +##lessness +Banner +recognise +stray +Kitchen +paperwork +realism +Chrysler +filmmakers +fishermen +##hetic +variously +Vishnu +fiddle +Eddy +Origin +##tec +##ulin +Flames +Rs +bankrupt +Extreme +Pomeranian +##emption +ratified +##iu +jockey +Stratford +##ivating +##oire +Babylon +pardon +AI +affordable +deities +disturbance +Trying +##sai +Ida +Papers +advancement +70s +archbishop +Luftwaffe +announces +tugging +##lphin +##sistence +##eel +##ishes +ambition +aura +##fled +##lected +##vue +Prasad +boiled +clarity +Violin +investigative +routing +Yankee +##uckle +McMahon +bugs +eruption +##rooms +Minutes +relics +##ckle +##nse +sipped +valves +weakly +##ital +Middleton +collided +##quer +bamboo +insignia +Tyne +exercised +Ninth +echoing +polynomial +considerations +lunged +##bius +objections +complain +disguised +plaza +##VC +institutes +Judicial +ascent +imminent +Waterford +hello +Lumpur +Niger +Goldman +vendors +Kensington +Wren +browser +##bner +##tri +##mize +##pis +##lea +Cheyenne +Bold +Settlement +Hollow +Paralympic +axle +##toire +##actic +impose +perched +utilizing +slips +Benz +Michaels +manipulate +Chiang +##mian +Dolphins +prohibition +attacker +ecology +Estadio +##SB +##uild +attracts +recalls +glacier +lad +##rima +Barlow +kHz +melodic +##aby +##iracy +assumptions +Cornish +##aru +DOS +Maddie +##mers +lyric +Luton +nm +##tron +Reno +Fin +YOU +Broadcast +Finch +sensory +##bent +Jeep +##uman +additionally +Buildings +businessmen +treaties +235 +Stranger +gateway +Charlton +accomplishments +Diary +apologized +zinc +histories +supplier +##tting +162 +asphalt +Treatment +Abbas +##pating +##yres +Bloom +sedan +soloist +##cum +antagonist +denounced +Fairfax +##aving +##enko +noticeable +Budget +Buckingham +Snyder +retreating +Jai +spoon +invading +giggle +woven +gunfire +arrests +##vered +##come +respiratory +violet +##aws +Byrd +shocking +tenant +Jamaican +Ottomans +Seal +theirs +##isse +##48 +cooperate +peering +##nius +163 +Composer +organist +Mongolian +Bauer +Spy +collects +prophecy +congregations +##moor +Brick +calculation +fixtures +exempt +##dden +Ada +Thousand +##lue +tracing +##achi +bodyguard +vicar +supplying +Łódź +interception +monitored +##heart +Paso +overlap +annoyance +##dice +yellowish +stables +elders +illegally +honesty +##oar +skinny +spinal +##puram +Bourbon +##cor +flourished +Medium +##stics +##aba +Follow +##ckey +stationary +##scription +dresser +scrutiny +Buckley +Clearly +##SF +Lyrics +##heimer +drying +Oracle +internally +rains +##last +Enemy +##oes +McLean +Ole +phosphate +Rosario +Rifles +##mium +battered +Pepper +Presidents +conquer +Château +castles +##aldo +##ulf +Depending +Lesser +Boom +trades +Peyton +164 +emphasize +accustomed +SM +Ai +Classification +##mins +##35 +##rons +leak +piled +deeds +lush +##self +beginnings +breathless +1660 +McGill +##ago +##chaft +##gies +humour +Bomb +securities +Might +##zone +##eves +Matthias +Movies +Levine +vengeance +##ads +Challenger +Misty +Traditionally +constellation +##rass +deepest +workplace +##oof +##vina +impatient +##ML +Mughal +Alessandro +scenery +Slater +postseason +troupe +##ń +Volunteers +Facility +militants +Reggie +sanctions +Expeditionary +Nam +countered +interpret +Basilica +coding +expectation +Duffy +def +Tong +wakes +Bowling +Vehicle +Adler +salad +intricate +stronghold +medley +##uries +##bur +joints +##rac +##yx +##IO +Ordnance +Welch +distributor +Ark +cavern +trench +Weiss +Mauritius +decreases +docks +eagerly +irritation +Matilda +biographer +Visiting +##marked +##iter +##ear +##gong +Moreno +attendant +Bury +instrumentation +theologian +clit +nuns +symphony +translate +375 +loser +##user +##VR +##meter +##orious +harmful +##yuki +Commissioners +Mendoza +sniffed +Hulk +##dded +##ulator +##nz +Donnell +##eka +deported +Met +SD +Aerospace +##cultural +##odes +Fantastic +cavity +remark +emblem +fearing +##iance +ICAO +Liberia +stab +##yd +Pac +Gymnasium +IS +Everton +##vanna +mantle +##ief +Ramon +##genic +Shooting +Smoke +Random +Africans +MB +tavern +bargain +voluntarily +Ion +Peoples +Rusty +attackers +Patton +sins +##cake +Hat +moderately +##hala +##alia +requesting +mechanic +##eae +Seine +Robbins +##ulum +susceptible +Bravo +Slade +Strasbourg +rubble +entrusted +Creation +##amp +smoothed +##uintet +evenly +reviewers +skip +Sculpture +177 +Rough +##rrie +Reeves +##cede +Administrator +garde +minus +carriages +grenade +Ninja +fuscous +##kley +Punk +contributors +Aragon +Tottenham +##cca +##sir +VA +laced +dealers +##sonic +crisp +harmonica +Artistic +Butch +Andes +Farmers +corridors +unseen +##tium +Countries +Lone +envisioned +Katy +##lang +##cc +Quarterly +##neck +consort +##aceae +bidding +Corey +concurrent +##acts +##gum +Highness +##lient +##rators +arising +##unta +pathways +49ers +bolted +complaining +ecosystem +libretto +Ser +narrated +212 +Soft +influx +##dder +incorporation +plagued +tents +##ddled +1750 +Risk +citation +Tomas +hostilities +seals +Bruins +Dominique +attic +competent +##UR +##cci +hugging +Breuning +bacterial +Shrewsbury +vowed +eh +elongated +hangs +render +centimeters +##ficient +Mu +turtle +besieged +##gaard +grapes +bravery +collaborations +deprived +##amine +##using +##gins +arid +##uve +coats +hanged +##sting +Pa +prefix +##ranged +Exit +Chain +Flood +Materials +suspicions +##ö +hovered +Hidden +##state +Malawi +##24 +Mandy +norms +fascinating +airlines +delivers +##rust +Cretaceous +spanned +pillows +##onomy +jar +##kka +regent +fireworks +morality +discomfort +lure +uneven +##jack +Lucian +171 +archaeology +##til +mornings +Billie +Marquess +impending +spilling +tombs +##volved +Celia +Coke +underside +##bation +Vaughn +Daytona +Godfrey +Pascal +Alien +##sign +172 +##lage +iPhone +Gonna +genocide +##rber +oven +endure +dashed +simultaneous +##phism +Wally +##rō +ants +predator +reissue +##aper +Speech +funk +Rudy +claw +Hindus +Numbers +Bing +lantern +##aurus +scattering +poisoned +##active +Andrei +algebraic +baseman +##ritz +Gregg +##cola +selections +##putation +lick +Laguna +##IX +Sumatra +Warning +turf +buyers +Burgess +Oldham +exploit +worm +initiate +strapped +tuning +filters +haze +##е +##ledge +##ydro +##culture +amendments +Promotion +##union +Clair +##uria +petty +shutting +##eveloped +Phoebe +Zeke +conducts +grains +clashes +##latter +illegitimate +willingly +Deer +Lakers +Reference +chaplain +commitments +interrupt +salvation +Panther +Qualifying +Assessment +cancel +efficiently +attorneys +Dynamo +impress +accession +clinging +randomly +reviewing +Romero +Cathy +charting +clapped +rebranded +Azerbaijani +coma +indicator +punches +##tons +Sami +monastic +prospects +Pastor +##rville +electrified +##CI +##utical +tumbled +Chef +muzzle +selecting +UP +Wheel +protocols +##tat +Extended +beautifully +nests +##stal +Andersen +##anu +##³ +##rini +kneeling +##reis +##xia +anatomy +dusty +Safe +turmoil +Bianca +##elo +analyze +##ر +##eran +podcast +Slovene +Locke +Rue +##retta +##uni +Person +Prophet +crooked +disagreed +Versailles +Sarajevo +Utrecht +##ogen +chewing +##ception +##iidae +Missile +attribute +majors +Arch +intellectuals +##andra +ideological +Cory +Salzburg +##fair +Lot +electromagnetic +Distribution +##oper +##pered +Russ +Terra +repeats +fluttered +Riga +##ific +##gt +cows +Hair +labelled +protects +Gale +Personnel +Düsseldorf +Moran +rematch +##OE +Slow +forgiveness +##ssi +proudly +Macmillan +insist +undoubtedly +Québec +Violence +##yuan +##aine +mourning +linen +accidental +##iol +##arium +grossing +lattice +maneuver +##marine +prestige +petrol +gradient +invasive +militant +Galerie +widening +##aman +##quist +disagreement +##ales +creepy +remembers +buzz +##erial +Exempt +Dirk +mon +Addison +##inen +deposed +##agon +fifteenth +Hang +ornate +slab +##lades +Fountain +contractors +das +Warwickshire +1763 +##rc +Carly +Essays +Indy +Ligue +greenhouse +slit +##sea +chewed +wink +##azi +Playhouse +##kon +Gram +Ko +Samson +creators +revive +##rians +spawned +seminars +Craft +Tall +diverted +assistants +computational +enclosure +##acity +Coca +##eve +databases +Drop +##loading +##hage +Greco +Privy +entrances +pork +prospective +Memories +robes +##market +transporting +##lik +Rudolph +Horton +visually +##uay +##nja +Centro +Tor +Howell +##rsey +admitting +postgraduate +herbs +##att +Chin +Rutherford +##bot +##etta +Seasons +explanations +##bery +Friedman +heap +##ryl +##sberg +jaws +##agh +Choi +Killing +Fanny +##suming +##hawk +hopeful +##aid +Monty +gum +remarkably +Secrets +disco +harp +advise +##avia +Marathi +##cycle +Truck +abbot +sincere +urine +##mology +masked +bathing +##tun +Fellows +##TM +##gnetic +owl +##jon +hymn +##leton +208 +hostility +##cée +baked +Bottom +##AB +shudder +##ater +##von +##hee +reorganization +Cycle +##phs +Lex +##style +##rms +Translation +##erick +##imeter +##ière +attested +Hillary +##DM +gal +wander +Salle +##laming +Perez +Pit +##LP +USAF +contexts +Disease +blazing +aroused +razor +walled +Danielle +Mont +Funk +royalty +thee +203 +donors +##erton +famously +processors +reassigned +welcoming +Goldberg +##quities +undisclosed +Orient +Patty +vaccine +refrigerator +Cypriot +consonant +##waters +176 +sober +##lement +Racecourse +##uate +Luckily +Selection +conceptual +vines +Breaking +wa +lions +oversight +sheltered +Dancer +ponds +borrow +##BB +##pulsion +Daly +##eek +fertility +spontaneous +Worldwide +gasping +##tino +169 +ABS +Vickers +ambient +energetic +prisons +##eson +Stacy +##roach +GmbH +Afro +Marin +farmhouse +pinched +##cursion +##sp +Sabine +##pire +181 +nak +swelling +humble +perfume +##balls +Rai +cannons +##taker +Married +Maltese +canals +interceptions +hats +lever +slowing +##ppy +Nike +Silas +Scarborough +skirts +166 +inauguration +Shuttle +alloy +beads +belts +Compton +Cause +battling +critique +surf +Dock +roommate +##ulet +invade +Garland +##slow +nutrition +persona +##zam +Wichita +acquaintance +coincided +##cate +Dracula +clamped +##gau +overhaul +##broken +##rrier +melodies +ventures +Paz +convex +Roots +##holding +Tribute +transgender +##ò +chimney +##riad +Ajax +Thereafter +messed +nowadays +pH +##100 +##alog +Pomerania +##yra +Rossi +glove +##TL +Races +##asily +tablets +Jase +##ttes +diner +##rns +Hu +Mohan +anytime +weighted +remixes +Dove +cherry +imports +##urity +GA +##TT +##iated +##sford +Clarkson +evidently +rugged +Dust +siding +##ometer +acquitted +choral +##mite +infants +Domenico +gallons +Atkinson +gestures +slated +##xa +Archaeology +unwanted +##ibes +##duced +premise +Colby +Geelong +disqualified +##pf +##voking +simplicity +Walkover +Qaeda +Warden +##bourg +##ān +Invasion +Babe +harness +183 +##tated +maze +Burt +bedrooms +##nsley +Horizon +##oast +minimize +peeked +MLA +Trains +tractor +nudged +##iform +Growth +Benton +separates +##about +##kari +buffer +anthropology +brigades +foil +##wu +Domain +licking +whore +##rage +##sham +Initial +Courthouse +Rutgers +dams +villains +supermarket +##brush +Brunei +Palermo +arises +Passenger +outreach +##gill +Labrador +McLaren +##uy +Lori +##fires +Heads +magistrate +¹⁄₂ +Weapons +##wai +##roke +projecting +##ulates +bordering +McKenzie +Pavel +midway +Guangzhou +streamed +racer +##lished +eccentric +spectral +206 +##mism +Wilde +Grange +preparatory +lent +##tam +starving +Gertrude +##cea +##ricted +Breakfast +Mira +blurted +derive +##lair +blunt +sob +Cheltenham +Henrik +reinstated +intends +##istan +unite +##ector +playful +sparks +mapped +Cadet +luggage +prosperous +##ein +salon +##utes +Biological +##rland +Tyrone +buyer +##lose +amounted +Saw +smirked +Ronan +Reviews +Adele +trait +##proof +Bhutan +Ginger +##junct +digitally +stirring +##isted +coconut +Hamlet +Dinner +Scale +pledge +##RP +Wrong +Goal +Panel +therapeutic +elevations +infectious +priesthood +##inda +Guyana +diagnostic +##mbre +Blackwell +sails +##arm +literal +periodically +gleaming +Robot +Rector +##abulous +##tres +Reaching +Romantic +CP +Wonderful +##tur +ornamental +##nges +traitor +##zilla +genetics +mentioning +##eim +resonance +Areas +Shopping +##nard +Gail +Solid +##rito +##mara +Willem +Chip +Matches +Volkswagen +obstacle +Organ +invites +Coral +attain +##anus +##dates +Midway +shuffled +Cecilia +dessert +Gateway +Ch +Napoleonic +Petroleum +jets +goose +striped +bowls +vibration +Sims +nickel +Thirteen +problematic +intervene +##grading +##unds +Mum +semifinal +Radical +##izations +refurbished +##sation +##harine +Maximilian +cites +Advocate +Potomac +surged +preserves +Curry +angled +ordination +##pad +Cade +##DE +##sko +researched +torpedoes +Resident +wetlands +hay +applicants +depart +Bernstein +##pic +##ario +##rae +favourable +##wari +##р +metabolism +nobleman +Defaulted +calculate +ignition +Celebrity +Belize +sulfur +Flat +Sc +USB +flicker +Hertfordshire +Sept +CFL +Pasadena +Saturdays +Titus +##nir +Canary +Computing +Isaiah +##mler +formidable +pulp +orchid +Called +Solutions +kilograms +steamer +##hil +Doncaster +successors +Stokes +Holstein +##sius +sperm +API +Rogue +instability +Acoustic +##rag +159 +undercover +Wouldn +##pra +##medical +Eliminated +honorable +##chel +denomination +abrupt +Buffy +blouse +fi +Regardless +Subsequent +##rdes +Lover +##tford +bacon +##emia +carving +##cripts +Massacre +Ramos +Latter +##ulp +ballroom +##gement +richest +bruises +Rest +Wiley +##aster +explosions +##lastic +Edo +##LD +Mir +choking +disgusted +faintly +Barracks +blasted +headlights +Tours +ensued +presentations +##cale +wrought +##oat +##coa +Quaker +##sdale +recipe +##gny +corpses +##liance +comfortably +##wat +Landscape +niche +catalyst +##leader +Securities +messy +##RL +Rodrigo +backdrop +##opping +treats +Emilio +Anand +bilateral +meadow +VC +socialism +##grad +clinics +##itating +##ppe +##ymphonic +seniors +Advisor +Armoured +Method +Alley +##orio +Sad +fueled +raided +Axel +NH +rushes +Dixie +Otis +wrecked +##22 +capitalism +café +##bbe +##pion +##forcing +Aubrey +Lublin +Whenever +Sears +Scheme +##lana +Meadows +treatise +##RI +##ustic +sacrifices +sustainability +Biography +mystical +Wanted +multiplayer +Applications +disliked +##tisfied +impaired +empirical +forgetting +Fairfield +Sunni +blurred +Growing +Avalon +coil +Camera +Skin +bruised +terminals +##fted +##roving +Commando +##hya +##sper +reservations +needles +dangling +##rsch +##rsten +##spect +##mbs +yoga +regretted +Bliss +Orion +Rufus +glucose +Olsen +autobiographical +##dened +222 +humidity +Shan +##ifiable +supper +##rou +flare +##MO +campaigning +descend +socio +declares +Mounted +Gracie +Arte +endurance +##ety +Copper +costa +airplay +##MB +Proceedings +dislike +grimaced +occupants +births +glacial +oblivious +cans +installment +muddy +##ł +captains +pneumonia +Quiet +Sloan +Excuse +##nine +Geography +gymnastics +multimedia +drains +Anthology +Gear +cylindrical +Fry +undertaking +##pler +##tility +Nan +##recht +Dub +philosophers +piss +Atari +##pha +Galicia +México +##nking +Continuing +bump +graveyard +persisted +Shrine +##erapy +defects +Advance +Bomber +##oil +##ffling +cheerful +##lix +scrub +##eto +awkwardly +collaborator +fencing +##alo +prophet +Croix +coughed +##lication +roadway +slaughter +elephants +##erated +Simpsons +vulnerability +ivory +Birth +lizard +scarce +cylinders +fortunes +##NL +Hate +Priory +##lai +McBride +##copy +Lenny +liaison +Triangle +coronation +sampled +savage +amidst +Grady +whatsoever +instinctively +Reconstruction +insides +seizure +Drawing +##rlin +Antioch +Gao +Díaz +1760 +Sparks +##tien +##bidae +rehearsal +##bbs +botanical +##hers +compensate +wholesale +Seville +shareholder +prediction +astronomical +Reddy +hardest +circling +whereabouts +termination +Rep +Assistance +Dramatic +Herb +##ghter +climbs +188 +Poole +301 +##pable +wit +##istice +Walters +relying +Jakob +##redo +proceeding +Langley +affiliates +ou +##allo +##holm +Samsung +##ishi +Missing +Xi +vertices +Claus +foam +restless +##uating +##sso +##ttering +Philips +delta +bombed +Catalogue +coaster +Ling +Willard +satire +410 +Composition +Net +Orioles +##ldon +fins +Palatinate +Woodward +tease +tilt +brightness +##70 +##bbling +##loss +##dhi +##uilt +Whoever +##yers +hitter +Elton +Extension +ace +Affair +restructuring +##loping +Paterson +hi +##rya +spouse +Shay +Himself +piles +preaching +##gical +bikes +Brave +expulsion +Mirza +stride +Trees +commemorated +famine +masonry +Selena +Watt +Banking +Rancho +Stockton +dip +tattoos +Vlad +acquainted +Flyers +ruthless +fourteenth +illustrate +##akes +EPA +##rows +##uiz +bumped +Designed +Leaders +mastered +Manfred +swirled +McCain +##rout +Artemis +rabbi +flinched +upgrades +penetrate +shipyard +transforming +caretaker +##eiro +Maureen +tightening +##founded +RAM +##icular +##mper +##rung +Fifteen +exploited +consistency +interstate +##ynn +Bridget +contamination +Mistress +##rup +coating +##FP +##jective +Libyan +211 +Gemma +dependence +shrubs +##ggled +Germain +retaliation +traction +##PP +Dangerous +terminology +psychiatrist +##garten +hurdles +Natal +wasting +Weir +revolves +stripe +##reased +preferences +##entation +##lde +##áil +##otherapy +Flame +##ologies +viruses +Label +Pandora +veil +##ogical +Coliseum +Cottage +creeping +Jong +lectured +##çaise +shoreline +##fference +##hra +Shade +Clock +Faye +bilingual +Humboldt +Operating +##fter +##was +algae +towed +amphibious +Parma +impacted +smacked +Piedmont +Monsters +##omb +Moor +##lberg +sinister +Postal +178 +Drummond +Sign +textbooks +hazardous +Brass +Rosemary +Pick +Sit +Architect +transverse +Centennial +confess +polling +##aia +Julien +##mand +consolidation +Ethel +##ulse +severity +Yorker +choreographer +1840s +##ltry +softer +versa +##geny +##quila +##jō +Caledonia +Friendship +Visa +rogue +##zzle +bait +feather +incidence +Foods +Ships +##uto +##stead +arousal +##rote +Hazel +##bolic +Swing +##ej +##cule +##jana +##metry +##uity +Valuable +##ₙ +Shropshire +##nect +365 +Ones +realise +Café +Albuquerque +##grown +##stadt +209 +##ᵢ +prefers +withstand +Lillian +MacArthur +Hara +##fulness +domination +##VO +##school +Freddy +ethnicity +##while +adorned +hormone +Calder +Domestic +Freud +Shields +##phus +##rgan +BP +Segunda +Mustang +##GI +Bonn +patiently +remarried +##umbria +Crete +Elephant +Nuremberg +tolerate +Tyson +##evich +Programming +##lander +Bethlehem +segregation +Constituency +quarterly +blushed +photographers +Sheldon +porcelain +Blanche +goddamn +lively +##fused +bumps +##eli +curated +coherent +provoked +##vet +Madeleine +##isco +rainy +Bethel +accusation +ponytail +gag +##lington +quicker +scroll +##vate +Bow +Gender +Ira +crashes +ACT +Maintenance +##aton +##ieu +bitterly +strains +rattled +vectors +##arina +##ishly +173 +parole +##nx +amusing +Gonzalez +##erative +Caucus +sensual +Penelope +coefficient +Mateo +##mani +proposition +Duty +lacrosse +proportions +Plato +profiles +Botswana +Brandt +reins +mandolin +encompassing +##gens +Kahn +prop +summon +##MR +##yrian +##zaki +Falling +conditional +thy +##bao +##ych +radioactive +##nics +Newspaper +##people +##nded +Gaming +sunny +##look +Sherwood +crafted +NJ +awoke +187 +timeline +giants +possessing +##ycle +Cheryl +ng +Ruiz +polymer +potassium +Ramsay +relocation +##leen +Sociology +##bana +Franciscan +propulsion +denote +##erjee +registers +headline +Tests +emerges +Articles +Mint +livery +breakup +kits +Rap +Browning +Bunny +##mington +##watch +Anastasia +Zachary +arranging +biographical +Erica +Nippon +##membrance +Carmel +##sport +##xes +Paddy +##holes +Issues +Spears +compliment +##stro +##graphs +Castillo +##MU +##space +Corporal +##nent +174 +Gentlemen +##ilize +##vage +convinces +Carmine +Crash +##hashi +Files +Doctors +brownish +sweating +goats +##conductor +rendition +##bt +NL +##spiration +generates +##cans +obsession +##noy +Danger +Diaz +heats +Realm +priorities +##phon +1300 +initiation +pagan +bursts +archipelago +chloride +Screenplay +Hewitt +Khmer +bang +judgement +negotiating +##ait +Mabel +densely +Boulder +knob +430 +Alfredo +##kt +pitches +##ées +##ان +Macdonald +##llum +imply +##mot +Smile +spherical +##tura +Derrick +Kelley +Nico +cortex +launches +differed +parallels +Navigation +##child +##rming +canoe +forestry +reinforce +##mote +confirming +tasting +scaled +##resh +##eting +Understanding +prevailing +Pearce +CW +earnest +Gaius +asserts +denoted +landmarks +Chargers +warns +##flies +Judges +jagged +##dain +tails +Historian +Millie +##sler +221 +##uard +absurd +Dion +##ially +makeshift +Specifically +ignorance +Eat +##ieri +comparisons +forensic +186 +Giro +skeptical +disciplinary +battleship +##45 +Libby +520 +Odyssey +ledge +##post +Eternal +Missionary +deficiency +settler +wonders +##gai +raging +##cis +Romney +Ulrich +annexation +boxers +sect +204 +ARIA +dei +Hitchcock +te +Varsity +##fic +CC +lending +##nial +##tag +##rdy +##obe +Defensive +##dson +##pore +stellar +Lam +Trials +contention +Sung +##uminous +Poe +superiority +##plicate +325 +bitten +conspicuous +##olly +Lila +Pub +Petit +distorted +ISIL +distinctly +##family +Cowboy +mutant +##cats +##week +Changes +Sinatra +epithet +neglect +Innocent +gamma +thrill +reggae +##adia +##ational +##due +landlord +##leaf +visibly +##ì +Darlington +Gomez +##iting +scarf +##lade +Hinduism +Fever +scouts +##roi +convened +##oki +184 +Lao +boycott +unemployed +##lore +##ß +##hammer +Curran +disciples +odor +##ygiene +Lighthouse +Played +whales +discretion +Yves +##ceived +pauses +coincide +##nji +dizzy +##scopic +routed +Guardians +Kellan +carnival +nasal +224 +##awed +Mitsubishi +640 +Cast +silky +Projects +joked +Huddersfield +Rothschild +zu +##olar +Divisions +mildly +##eni +##lge +Appalachian +Sahara +pinch +##roon +wardrobe +##dham +##etal +Bubba +##lini +##rumbling +Communities +Poznań +unification +Beau +Kris +SV +Rowing +Minh +reconciliation +##saki +##sor +taped +##reck +certificates +gubernatorial +rainbow +##uing +litter +##lique +##oted +Butterfly +benefited +Images +induce +Balkans +Velvet +##90 +##xon +Bowman +##breaker +penis +##nitz +##oint +##otive +crust +##pps +organizers +Outdoor +nominees +##rika +TX +##ucks +Protestants +##imation +appetite +Baja +awaited +##points +windshield +##igh +##zled +Brody +Buster +stylized +Bryce +##sz +Dollar +vest +mold +ounce +ok +receivers +##uza +Purdue +Harrington +Hodges +captures +##ggio +Reservation +##ssin +##tman +cosmic +straightforward +flipping +remixed +##athed +Gómez +Lim +motorcycles +economies +owning +Dani +##rosis +myths +sire +kindly +1768 +Bean +graphs +##mee +##RO +##geon +puppy +Stephenson +notified +##jer +Watching +##rama +Sino +urgency +Islanders +##mash +Plata +fumble +##chev +##stance +##rack +##she +facilitated +swings +akin +enduring +payload +##phine +Deputies +murals +##tooth +610 +Jays +eyeing +##quito +transparency +##cote +Timor +negatively +##isan +battled +##fected +thankful +Rage +hospitality +incorrectly +207 +entrepreneurs +##cula +##wley +hedge +##cratic +Corpus +Odessa +Whereas +##ln +fetch +happier +Amherst +bullying +graceful +Height +Bartholomew +willingness +qualifier +191 +Syed +Wesleyan +Layla +##rrence +Webber +##hum +Rat +##cket +##herence +Monterey +contaminated +Beside +Mustafa +Nana +213 +##pruce +Reason +##spense +spike +##gé +AU +disciple +charcoal +##lean +formulated +Diesel +Mariners +accreditation +glossy +1800s +##ih +Mainz +unison +Marianne +shear +overseeing +vernacular +bowled +##lett +unpopular +##ckoned +##monia +Gaston +##TI +##oters +Cups +##bones +##ports +Museo +minors +1773 +Dickens +##EL +##NBC +Presents +ambitions +axes +Río +Yukon +bedside +Ribbon +Units +faults +conceal +##lani +prevailed +214 +Goodwin +Jaguar +crumpled +Cullen +Wireless +ceded +remotely +Bin +mocking +straps +ceramics +##avi +##uding +##ader +Taft +twenties +##aked +Problem +quasi +Lamar +##ntes +##avan +Barr +##eral +hooks +sa +##ône +194 +##ross +Nero +Caine +trance +Homeland +benches +Guthrie +dismiss +##lex +César +foliage +##oot +##alty +Assyrian +Ahead +Murdoch +dictatorship +wraps +##ntal +Corridor +Mackay +respectable +jewels +understands +##pathic +Bryn +##tep +ON +capsule +intrigued +Sleeping +communists +##chayat +##current +##vez +doubling +booklet +##uche +Creed +##NU +spies +##sef +adjusting +197 +Imam +heaved +Tanya +canonical +restraint +senators +stainless +##gnate +Matter +cache +restrained +conflicting +stung +##ool +Sustainable +antiquity +193 +heavens +inclusive +##ador +fluent +303 +911 +archaeologist +superseded +##plex +Tammy +inspire +##passing +##lub +Lama +Mixing +##activated +##yote +parlor +tactic +198 +Stefano +prostitute +recycling +sorted +banana +Stacey +Musée +aristocratic +cough +##rting +authorised +gangs +runoff +thoughtfully +##nish +Fisheries +Provence +detector +hum +##zhen +pill +##árez +Map +Leaves +Peabody +skater +vent +##color +390 +cerebral +hostages +mare +Jurassic +swell +##isans +Knoxville +Naked +Malaya +scowl +Cobra +##anga +Sexual +##dron +##iae +196 +##drick +Ravens +Blaine +##throp +Ismail +symmetric +##lossom +Leicestershire +Sylvester +glazed +##tended +Radar +fused +Families +Blacks +Sale +Zion +foothills +microwave +slain +Collingwood +##pants +##dling +killers +routinely +Janice +hearings +##chanted +##ltration +continents +##iving +##yster +##shot +##yna +injected +Guillaume +##ibi +kinda +Confederacy +Barnett +disasters +incapable +##grating +rhythms +betting +draining +##hak +Callie +Glover +##iliated +Sherlock +hearted +punching +Wolverhampton +Leaf +Pi +builders +furnished +knighted +Photo +##zle +Touring +fumbled +pads +##ий +Bartlett +Gunner +eerie +Marius +Bonus +pots +##hino +##pta +Bray +Frey +Ortiz +stalls +belongings +Subway +fascination +metaphor +Bat +Boer +Colchester +sway +##gro +rhetoric +##dheim +Fool +PMID +admire +##hsil +Strand +TNA +##roth +Nottinghamshire +##mat +##yler +Oxfordshire +##nacle +##roner +BS +##nces +stimulus +transports +Sabbath +##postle +Richter +4000 +##grim +##shima +##lette +deteriorated +analogous +##ratic +UHF +energies +inspiring +Yiddish +Activities +##quential +##boe +Melville +##ilton +Judd +consonants +labs +smuggling +##fari +avid +##uc +truce +undead +##raith +Mostly +bracelet +Connection +Hussain +awhile +##UC +##vention +liable +genetically +##phic +Important +Wildcats +daddy +transmit +##cas +conserved +Yesterday +##lite +Nicky +Guys +Wilder +Lay +skinned +Communists +Garfield +Nearby +organizer +Loss +crafts +walkway +Chocolate +Sundance +Synod +##enham +modify +swayed +Surface +analysts +brackets +drone +parachute +smelling +Andrés +filthy +frogs +vertically +##OK +localities +marries +AHL +35th +##pian +Palazzo +cube +dismay +relocate +##на +Hear +##digo +##oxide +prefecture +converts +hangar +##oya +##ucking +Spectrum +deepened +spoiled +Keeping +##phobic +Verona +outrage +Improvement +##UI +masterpiece +slung +Calling +chant +Haute +mediated +manipulated +affirmed +##hesis +Hangul +skies +##llan +Worcestershire +##kos +mosaic +##bage +##wned +Putnam +folder +##LM +guts +noteworthy +##rada +AJ +sculpted +##iselle +##rang +recognizable +##pent +dolls +lobbying +impatiently +Se +staple +Serb +tandem +Hiroshima +thieves +##ynx +faculties +Norte +##alle +##trusion +chords +##ylon +Gareth +##lops +##escu +FIA +Levin +auspices +groin +Hui +nun +Listed +Honourable +Larsen +rigorous +##erer +Tonga +##pment +##rave +##track +##aa +##enary +540 +clone +sediment +esteem +sighted +cruelty +##boa +inverse +violating +Amtrak +Status +amalgamated +vertex +AR +harmless +Amir +mounts +Coronation +counseling +Audi +CO₂ +splits +##eyer +Humans +Salmon +##have +##rado +##čić +216 +takeoff +classmates +psychedelic +##gni +Gypsy +231 +Anger +GAA +ME +##nist +##tals +Lissa +Odd +baptized +Fiat +fringe +##hren +179 +elevators +perspectives +##TF +##ngle +Question +frontal +950 +thicker +Molecular +##nological +Sixteen +Baton +Hearing +commemorative +dorm +Architectural +purity +##erse +risky +Georgie +relaxing +##ugs +downed +##rar +Slim +##phy +IUCN +##thorpe +Parkinson +217 +Marley +Shipping +sweaty +Jesuits +Sindh +Janata +implying +Armenians +intercept +Ankara +commissioners +ascended +sniper +Grass +Walls +salvage +Dewey +generalized +learnt +PT +##fighter +##tech +DR +##itrus +##zza +mercenaries +slots +##burst +##finger +##nsky +Princes +Rhodesia +##munication +##strom +Fremantle +homework +ins +##Os +##hao +##uffed +Thorpe +Xiao +exquisite +firstly +liberated +technician +Oilers +Phyllis +herb +sharks +MBE +##stock +Product +banjo +##morandum +##than +Visitors +unavailable +unpublished +oxidation +Vogue +##copic +##etics +Yates +##ppard +Leiden +Trading +cottages +Principles +##Millan +##wife +##hiva +Vicar +nouns +strolled +##eorological +##eton +##science +precedent +Armand +Guido +rewards +##ilis +##tise +clipped +chick +##endra +averages +tentatively +1830s +##vos +Certainly +305 +Société +Commandant +##crats +##dified +##nka +marsh +angered +ventilation +Hutton +Ritchie +##having +Eclipse +flick +motionless +Amor +Fest +Loire +lays +##icit +##sband +Guggenheim +Luck +disrupted +##ncia +Disco +##vigator +criticisms +grins +##lons +##vial +##ody +salute +Coaches +junk +saxophonist +##eology +Uprising +Diet +##marks +chronicles +robbed +##iet +##ahi +Bohemian +magician +wavelength +Kenyan +augmented +fashionable +##ogies +Luce +F1 +Monmouth +##jos +##loop +enjoyment +exemption +Centers +##visor +Soundtrack +blinding +practitioner +solidarity +sacrificed +##oso +##cture +##riated +blended +Abd +Copyright +##nob +34th +##reak +Claudio +hectare +rotor +testify +##ends +##iably +##sume +landowner +##cess +##ckman +Eduard +Silesian +backseat +mutually +##abe +Mallory +bounds +Collective +Poet +Winkler +pertaining +scraped +Phelps +crane +flickering +Proto +bubbles +popularized +removes +##86 +Cadillac +Warfare +audible +rites +shivering +##sist +##nst +##biotic +Mon +fascist +Bali +Kathryn +ambiguous +furiously +morale +patio +Sang +inconsistent +topology +Greens +monkeys +Köppen +189 +Toy +vow +##ías +bombings +##culus +improvised +lodged +subsidiaries +garment +startling +practised +Hume +Thorn +categorized +Till +Eileen +wedge +##64 +Federico +patriotic +unlock +##oshi +badminton +Compared +Vilnius +##KE +Crimean +Kemp +decks +spaced +resolutions +sighs +##mind +Imagine +Cartoon +huddled +policemen +forwards +##rouch +equals +##nter +inspected +Charley +MG +##rte +pamphlet +Arturo +dans +scarcely +##ulton +##rvin +parental +unconstitutional +watts +Susannah +Dare +##sitive +Rowland +Valle +invalid +##ué +Detachment +acronym +Yokohama +verified +##lsson +groove +Liza +clarified +compromised +265 +##rgon +##orf +hesitant +Fruit +Application +Mathias +icons +##cell +Qin +interventions +##uron +punt +remnant +##rien +Ames +manifold +spines +floral +##zable +comrades +Fallen +orbits +Annals +hobby +Auditorium +implicated +researching +Pueblo +Ta +terminate +##pella +Rings +approximation +fuzzy +##ús +thriving +##ket +Conor +alarmed +etched +Cary +##rdon +Ally +##rington +Pay +mint +##hasa +##unity +##dman +##itate +Oceania +furrowed +trams +##aq +Wentworth +ventured +choreography +prototypes +Patel +mouthed +trenches +##licing +##yya +Lies +deception +##erve +##vations +Bertrand +earthquakes +##tography +Southwestern +##aja +token +Gupta +##yō +Beckett +initials +ironic +Tsar +subdued +shootout +sobbing +liar +Scandinavia +Souls +ch +therapist +trader +Regulation +Kali +busiest +##pation +32nd +Telephone +Vargas +##moky +##nose +##uge +Favorite +abducted +bonding +219 +255 +correction +mat +drown +fl +unbeaten +Pocket +Summers +Quite +rods +Percussion +##ndy +buzzing +cadet +Wilkes +attire +directory +utilities +naive +populous +Hendrix +##actor +disadvantage +1400 +Landon +Underworld +##ense +Occasionally +mercury +Davey +Morley +spa +wrestled +##vender +eclipse +Sienna +supplemented +thou +Stream +liturgical +##gall +##berries +##piration +1769 +Bucks +abandoning +##jutant +##nac +232 +venom +##31 +Roche +dotted +Currie +Córdoba +Milo +Sharif +divides +justification +prejudice +fortunate +##vide +##ābād +Rowe +inflammatory +##eld +avenue +Sources +##rimal +Messenger +Blanco +advocating +formulation +##pute +emphasizes +nut +Armored +##ented +nutrients +##tment +insistence +Martins +landowners +##RB +comparatively +headlines +snaps +##qing +Celebration +##mad +republican +##NE +Trace +##500 +1771 +proclamation +NRL +Rubin +Buzz +Weimar +##AG +199 +posthumous +##ental +##deacon +Distance +intensely +overheard +Arcade +diagonal +hazard +Giving +weekdays +##ù +Verdi +actresses +##hare +Pulling +##erries +##pores +catering +shortest +##ctors +##cure +##restle +##reta +##runch +##brecht +##uddin +Moments +senate +Feng +Prescott +##thest +218 +divisional +Bertie +sparse +surrounds +coupling +gravitational +werewolves +##lax +Rankings +##mated +##tries +Shia +##mart +##23 +##vocative +interfaces +morphology +newscast +##bide +inputs +solicitor +Olaf +cabinets +puzzles +##tains +Unified +##firmed +WA +solemn +##opy +Tito +Jaenelle +Neolithic +horseback +##ires +pharmacy +prevalence +##lint +Swami +##bush +##tudes +Philipp +mythical +divers +Scouting +aperture +progressively +##bay +##nio +bounce +Floor +##elf +Lucan +adulthood +helm +Bluff +Passage +Salvation +lemon +napkin +scheduling +##gets +Elements +Mina +Novak +stalled +##llister +Infrastructure +##nky +##tania +##uished +Katz +Norma +sucks +trusting +1765 +boilers +Accordingly +##hered +223 +Crowley +##fight +##ulo +Henrietta +##hani +pounder +surprises +##chor +##glia +Dukes +##cracy +##zier +##fs +Patriot +silicon +##VP +simulcast +telegraph +Mysore +cardboard +Len +##QL +Auguste +accordion +analytical +specify +ineffective +hunched +abnormal +Transylvania +##dn +##tending +Emilia +glittering +Maddy +##wana +1762 +External +Lecture +endorsement +Hernández +Anaheim +Ware +offences +##phorus +Plantation +popping +Bonaparte +disgusting +neared +##notes +Identity +heroin +nicely +##raverse +apron +congestion +##PR +padded +##fts +invaders +##came +freshly +Halle +endowed +fracture +ROM +##max +sediments +diffusion +dryly +##tara +Tam +Draw +Spin +Talon +Anthropology +##lify +nausea +##shirt +insert +Fresno +capitalist +indefinitely +apples +Gift +scooped +60s +Cooperative +mistakenly +##lover +murmur +##iger +Equipment +abusive +orphanage +##9th +##lterweight +##unda +Baird +ant +saloon +33rd +Chesapeake +##chair +##sound +##tend +chaotic +pornography +brace +##aret +heiress +SSR +resentment +Arbor +headmaster +##uren +unlimited +##with +##jn +Bram +Ely +Pokémon +pivotal +##guous +Database +Marta +Shine +stumbling +##ovsky +##skin +Henley +Polk +functioned +##layer +##pas +##udd +##MX +blackness +cadets +feral +Damian +##actions +2D +##yla +Apocalypse +##aic +inactivated +##china +##kovic +##bres +destroys +nap +Macy +sums +Madhya +Wisdom +rejects +##amel +60th +Cho +bandwidth +##sons +##obbing +##orama +Mutual +shafts +##estone +##rsen +accord +replaces +waterfront +##gonal +##rida +convictions +##ays +calmed +suppliers +Cummings +GMA +fearful +Scientist +Sinai +examines +experimented +Netflix +Enforcement +Scarlett +##lasia +Healthcare +##onte +Dude +inverted +##36 +##regation +##lidae +Munro +##angay +Airbus +overlapping +Drivers +lawsuits +bodily +##udder +Wanda +Effects +Fathers +##finery +##islav +Ridley +observatory +pod +##utrition +Electricity +landslide +##mable +##zoic +##imator +##uration +Estates +sleepy +Nickelodeon +steaming +irony +schedules +snack +spikes +Hmm +##nesia +##bella +##hibit +Greenville +plucked +Harald +##ono +Gamma +infringement +roaring +deposition +##pol +##orum +660 +seminal +passports +engagements +Akbar +rotated +##bina +##gart +Hartley +##lown +##truct +uttered +traumatic +Dex +##ôme +Holloway +MV +apartheid +##nee +Counter +Colton +OR +245 +Spaniards +Regency +Schedule +scratching +squads +verify +##alk +keyboardist +rotten +Forestry +aids +commemorating +##yed +##érie +Sting +##elly +Dai +##fers +##berley +##ducted +Melvin +cannabis +glider +##enbach +##rban +Costello +Skating +cartoonist +AN +audit +##pectator +distributing +226 +312 +interpreter +header +Alternatively +##ases +smug +##kumar +cabins +remastered +Connolly +Kelsey +LED +tentative +Check +Sichuan +shaved +##42 +Gerhard +Harvest +inward +##rque +Hopefully +hem +##34 +Typical +binds +wrath +Woodstock +forcibly +Fergus +##charged +##tured +prepares +amenities +penetration +##ghan +coarse +##oned +enthusiasts +##av +##twined +fielded +##cky +Kiel +##obia +470 +beers +tremble +youths +attendees +##cademies +##sex +Macon +communism +dir +##abi +Lennox +Wen +differentiate +jewel +##SO +activate +assert +laden +unto +Gillespie +Guillermo +accumulation +##GM +NGO +Rosenberg +calculating +drastically +##omorphic +peeled +Liège +insurgents +outdoors +##enia +Aspen +Sep +awakened +##eye +Consul +Maiden +insanity +##brian +furnace +Colours +distributions +longitudinal +syllables +##scent +Martian +accountant +Atkins +husbands +sewage +zur +collaborate +highlighting +##rites +##PI +colonization +nearer +##XT +dunes +positioning +Ku +multitude +luxurious +Volvo +linguistics +plotting +squared +##inder +outstretched +##uds +Fuji +ji +##feit +##ahu +##loat +##gado +##luster +##oku +América +##iza +Residents +vine +Pieces +DD +Vampires +##ová +smoked +harshly +spreads +##turn +##zhi +betray +electors +##settled +Considering +exploits +stamped +Dusty +enraged +Nairobi +##38 +intervened +##luck +orchestras +##lda +Hereford +Jarvis +calf +##itzer +##CH +salesman +Lovers +cigar +Angelica +doomed +heroine +##tible +Sanford +offenders +##ulously +articulated +##oam +Emanuel +Gardiner +Edna +Shu +gigantic +##stable +Tallinn +coasts +Maker +ale +stalking +##oga +##smus +lucrative +southbound +##changing +Reg +##lants +Schleswig +discount +grouping +physiological +##OH +##sun +Galen +assurance +reconcile +rib +scarlet +Thatcher +anarchist +##oom +Turnpike +##ceding +cocktail +Sweeney +Allegheny +concessions +oppression +reassuring +##poli +##ticus +##TR +##VI +##uca +##zione +directional +strikeouts +Beneath +Couldn +Kabul +##national +hydroelectric +##jit +Desire +##riot +enhancing +northbound +##PO +Ok +Routledge +volatile +Bernardo +Python +333 +ample +chestnut +automobiles +##innamon +##care +##hering +BWF +salaries +Turbo +acquisitions +##stituting +strengths +pilgrims +Ponce +Pig +Actors +Beard +sanitation +##RD +##mett +Telecommunications +worms +##idas +Juno +Larson +Ventura +Northeastern +weighs +Houghton +collaborating +lottery +##rano +Wonderland +gigs +##lmer +##zano +##edd +##nife +mixtape +predominant +tripped +##ruly +Alexei +investing +Belgarath +Brasil +hiss +##crat +##xham +Côte +560 +kilometer +##cological +analyzing +##As +engined +listener +##cakes +negotiation +##hisky +Santana +##lemma +IAAF +Seneca +skeletal +Covenant +Steiner +##lev +##uen +Neptune +retention +##upon +Closing +Czechoslovak +chalk +Navarre +NZ +##IG +##hop +##oly +##quatorial +##sad +Brewery +Conflict +Them +renew +turrets +disagree +Petra +Slave +##reole +adjustment +##dela +##regard +##sner +framing +stature +##rca +##sies +##46 +##mata +Logic +inadvertently +naturalist +spheres +towering +heightened +Dodd +rink +##fle +Keyboards +bulb +diver +ul +##tsk +Exodus +Deacon +España +Canadiens +oblique +thud +reigned +rug +Whitman +Dash +##iens +Haifa +pets +##arland +manually +dart +##bial +Sven +textiles +subgroup +Napier +graffiti +revolver +humming +Babu +protector +typed +Provinces +Sparta +Wills +subjective +##rella +temptation +##liest +FL +Sadie +manifest +Guangdong +Transfer +entertain +eve +recipes +##33 +Benedictine +retailer +##dence +establishes +##cluded +##rked +Ursula +##ltz +##lars +##rena +qualifiers +##curement +colt +depictions +##oit +Spiritual +differentiation +staffed +transitional +##lew +1761 +fatalities +##oan +Bayern +Northamptonshire +Weeks +##CU +Fife +capacities +hoarse +##latt +##ة +evidenced +##HD +##ographer +assessing +evolve +hints +42nd +streaked +##lve +Yahoo +##estive +##rned +##zas +baggage +Elected +secrecy +##champ +Character +Pen +Decca +cape +Bernardino +vapor +Dolly +counselor +##isers +Benin +##khar +##CR +notch +##thus +##racy +bounty +lend +grassland +##chtenstein +##dating +pseudo +golfer +simplest +##ceive +Lucivar +Triumph +dinosaur +dinosaurs +##šić +Seahawks +##nco +resorts +reelected +1766 +reproduce +universally +##OA +ER +tendencies +Consolidated +Massey +Tasmanian +reckless +##icz +##ricks +1755 +questionable +Audience +##lates +preseason +Quran +trivial +Haitian +Freeway +dialed +Appointed +Heard +ecosystems +##bula +hormones +Carbon +Rd +##arney +##working +Christoph +presiding +pu +##athy +Morrow +Dar +ensures +posing +remedy +EA +disclosed +##hui +##rten +rumours +surveying +##ficiency +Aziz +Jewel +Plays +##smatic +Bernhard +Christi +##eanut +##friend +jailed +##dr +govern +neighbour +butler +Acheron +murdering +oils +mac +Editorial +detectives +bolts +##ulon +Guitars +malaria +36th +Pembroke +Opened +##hium +harmonic +serum +##sio +Franks +fingernails +##gli +culturally +evolving +scalp +VP +deploy +uploaded +mater +##evo +Jammu +Spa +##icker +flirting +##cursions +Heidi +Majority +sprawled +##alytic +Zheng +bunker +##lena +ST +##tile +Jiang +ceilings +##ently +##ols +Recovery +dire +##good +Manson +Honestly +Montréal +1764 +227 +quota +Lakshmi +incentive +Accounting +##cilla +Eureka +Reaper +buzzed +##uh +courtroom +dub +##mberg +KC +Gong +Theodor +Académie +NPR +criticizing +protesting +##pired +##yric +abuses +fisheries +##minated +1767 +yd +Gemini +Subcommittee +##fuse +Duff +Wasn +Wight +cleaner +##tite +planetary +Survivor +Zionist +mounds +##rary +landfall +disruption +yielding +##yana +bids +unidentified +Garry +Ellison +Elmer +Fishing +Hayward +demos +modelling +##anche +##stick +caressed +entertained +##hesion +piers +Crimea +##mass +WHO +boulder +trunks +1640 +Biennale +Palestinians +Pursuit +##udes +Dora +contender +##dridge +Nanjing +##ezer +##former +##ibel +Whole +proliferation +##tide +##weiler +fuels +predictions +##ente +##onium +Filming +absorbing +Ramón +strangled +conveyed +inhabit +prostitutes +recession +bonded +clinched +##eak +##iji +##edar +Pleasure +Rite +Christy +Therapy +sarcasm +##collegiate +hilt +probation +Sarawak +coefficients +underworld +biodiversity +SBS +groom +brewing +dungeon +##claiming +Hari +turnover +##ntina +##omer +##opped +orthodox +styling +##tars +##ulata +priced +Marjorie +##eley +##abar +Yong +##tically +Crambidae +Hernandez +##ego +##rricular +##ark +##lamour +##llin +##augh +##tens +Advancement +Loyola +##4th +##hh +goin +marshes +Sardinia +##ša +Ljubljana +Singing +suspiciously +##hesive +Félix +Regarding +flap +stimulation +##raught +Apr +Yin +gaping +tighten +skier +##itas +##lad +##rani +264 +Ashes +Olson +Problems +Tabitha +##rading +balancing +sunrise +##ease +##iture +##ritic +Fringe +##iciency +Inspired +Linnaeus +PBA +disapproval +##kles +##rka +##tails +##urger +Disaster +Laboratories +apps +paradise +Aero +Came +sneaking +Gee +Beacon +ODI +commodity +Ellington +graphical +Gretchen +spire +##skaya +##trine +RTÉ +efficacy +plc +tribunal +##ytic +downhill +flu +medications +##kaya +widen +Sunrise +##nous +distinguishing +pawn +##BO +##irn +##ssing +##ν +Easton +##vila +Rhineland +##aque +defect +##saurus +Goose +Ju +##classified +Middlesbrough +shaping +preached +1759 +##erland +Ein +Hailey +musicals +##altered +Galileo +Hilda +Fighters +Lac +##ometric +295 +Leafs +Milano +##lta +##VD +##ivist +penetrated +Mask +Orchard +plaintiff +##icorn +Yvonne +##fred +outfielder +peek +Collier +Caracas +repealed +Bois +dell +restrict +Dolores +Hadley +peacefully +##LL +condom +Granny +Orders +sabotage +##toon +##rings +compass +marshal +gears +brigadier +dye +Yunnan +communicating +donate +emerald +vitamin +administer +Fulham +##classical +##llas +Buckinghamshire +Held +layered +disclosure +Akira +programmer +shrimp +Crusade +##ximal +Luzon +bakery +##cute +Garth +Citadel +uniquely +Curling +info +mum +Para +##ști +sleek +##ione +hey +Lantern +mesh +##lacing +##lizzard +##gade +prosecuted +Alba +Gilles +greedy +twists +##ogged +Viper +##kata +Appearances +Skyla +hymns +##pelled +curving +predictable +Grave +Watford +##dford +##liptic +##vary +Westwood +fluids +Models +statutes +##ynamite +1740 +##culate +Framework +Johanna +##gression +Vuelta +imp +##otion +##raga +##thouse +Ciudad +festivities +##love +Beyoncé +italics +##vance +DB +##haman +outs +Singers +##ueva +##urning +##51 +##ntiary +##mobile +285 +Mimi +emeritus +nesting +Keeper +Ways +##onal +##oux +Edmond +MMA +##bark +##oop +Hampson +##ñez +##rets +Gladstone +wreckage +Pont +Playboy +reluctance +##ná +apprenticeship +preferring +Value +originate +##wei +##olio +Alexia +##rog +Parachute +jammed +stud +Eton +vols +##ganized +1745 +straining +creep +indicators +##mán +humiliation +hinted +alma +tanker +##egation +Haynes +Penang +amazement +branched +rumble +##ddington +archaeologists +paranoid +expenditure +Absolutely +Musicians +banished +##fining +baptism +Joker +Persons +hemisphere +##tieth +##ück +flock +##xing +lbs +Kung +crab +##dak +##tinent +Regulations +barrage +parcel +##ós +Tanaka +##rsa +Natalia +Voyage +flaws +stepfather +##aven +##eological +Botanical +Minsk +##ckers +Cinderella +Feast +Loving +Previous +Shark +##took +barrister +collaborators +##nnes +Croydon +Graeme +Juniors +##7th +##formation +##ulos +##ák +£2 +##hwa +##rove +##ș +Whig +demeanor +Otago +##TH +##ooster +Faber +instructors +##ahl +##bha +emptied +##schen +saga +##lora +exploding +##rges +Crusaders +##caster +##uations +streaks +CBN +bows +insights +ka +1650 +diversion +LSU +Wingspan +##liva +Response +sanity +Producers +imitation +##fine +Lange +Spokane +splash +weed +Siberian +magnet +##rocodile +capitals +##rgus +swelled +Rani +Bells +Silesia +arithmetic +rumor +##hampton +favors +Weird +marketplace +##orm +tsunami +unpredictable +##citation +##ferno +Tradition +postwar +stench +succeeds +##roup +Anya +Users +oversized +totaling +pouch +##nat +Tripoli +leverage +satin +##cline +Bathurst +Lund +Niall +thereof +##quid +Bangor +barge +Animated +##53 +##alan +Ballard +utilizes +Done +ballistic +NDP +gatherings +##elin +##vening +Rockets +Sabrina +Tamara +Tribal +WTA +##citing +blinded +flux +Khalid +Una +prescription +##jee +Parents +##otics +##food +Silicon +cured +electro +perpendicular +intimacy +##rified +Lots +##ceiving +##powder +incentives +McKenna +##arma +##ounced +##rinkled +Alzheimer +##tarian +262 +Seas +##cam +Novi +##hout +##morphic +##hazar +##hul +##nington +Huron +Bahadur +Pirate +pursed +Griffiths +indicted +swap +refrain +##mulating +Lal +stomped +##Pad +##mamoto +Reef +disposed +plastered +weeping +##rato +Minas +hourly +tumors +##ruising +Lyle +##yper +##sol +Odisha +credibility +##Dowell +Braun +Graphic +lurched +muster +##nex +##ührer +##connected +##iek +##ruba +Carthage +Peck +maple +bursting +##lava +Enrico +rite +##jak +Moment +##skar +Styx +poking +Spartan +##urney +Hepburn +Mart +Titanic +newsletter +waits +Mecklenburg +agitated +eats +##dious +Chow +matrices +Maud +##sexual +sermon +234 +##sible +##lung +Qi +cemeteries +mined +sprinter +##ckett +coward +##gable +##hell +##thin +##FB +Contact +##hay +rainforest +238 +Hemisphere +boasts +##nders +##verance +##kat +Convent +Dunedin +Lecturer +lyricist +##bject +Iberian +comune +##pphire +chunk +##boo +thrusting +fore +informing +pistols +echoes +Tier +battleships +substitution +##belt +moniker +##charya +##lland +Thoroughbred +38th +##01 +##tah +parting +tongues +Cale +##seau +Unionist +modular +celebrates +preview +steamed +Bismarck +302 +737 +vamp +##finity +##nbridge +weaknesses +husky +##berman +absently +##icide +Craven +tailored +Tokugawa +VIP +syntax +Kazan +captives +doses +filtered +overview +Cleopatra +Conversely +stallion +Burger +Suez +Raoul +th +##reaves +Dickson +Nell +Rate +anal +colder +##sław +Arm +Semitic +##green +reflective +1100 +episcopal +journeys +##ours +##pository +##dering +residue +Gunn +##27 +##ntial +##crates +##zig +Astros +Renee +Emerald +##vili +connectivity +undrafted +Sampson +treasures +##kura +##theon +##vern +Destroyer +##iable +##ener +Frederic +briefcase +confinement +Bree +##WD +Athena +233 +Padres +Thom +speeding +##hali +Dental +ducks +Putin +##rcle +##lou +Asylum +##usk +dusk +pasture +Institutes +ONE +jack +##named +diplomacy +Intercontinental +Leagues +Towns +comedic +premature +##edic +##mona +##ories +trimmed +Charge +Cream +guarantees +Dmitry +splashed +Philosophical +tramway +##cape +Maynard +predatory +redundant +##gratory +##wry +sobs +Burgundy +edible +outfits +Handel +dazed +dangerously +idle +Operational +organizes +##sional +blackish +broker +weddings +##halt +Becca +McGee +##gman +protagonists +##pelling +Keynes +aux +stumble +##ordination +Nokia +reel +sexes +##woods +##pheric +##quished +##voc +##oir +##pathian +##ptus +##sma +##tating +##ê +fulfilling +sheath +##ayne +Mei +Ordinary +Collin +Sharpe +grasses +interdisciplinary +##OX +Background +##ignment +Assault +transforms +Hamas +Serge +ratios +##sik +swaying +##rcia +Rosen +##gant +##versible +cinematographer +curly +penny +Kamal +Mellon +Sailor +Spence +phased +Brewers +amassed +Societies +##ropriations +##buted +mythological +##SN +##byss +##ired +Sovereign +preface +Parry +##ife +altitudes +crossings +##28 +Crewe +southernmost +taut +McKinley +##owa +##tore +254 +##ckney +compiling +Shelton +##hiko +228 +Poll +Shepard +Labs +Pace +Carlson +grasping +##ов +Delaney +Winning +robotic +intentional +shattering +##boarding +##git +##grade +Editions +Reserves +ignorant +proposing +##hanna +cutter +Mongols +NW +##eux +Codex +Cristina +Daughters +Rees +forecast +##hita +NGOs +Stations +Beaux +Erwin +##jected +##EX +##trom +Schumacher +##hrill +##rophe +Maharaja +Oricon +##sul +##dynamic +##fighting +Ce +Ingrid +rumbled +Prospect +stairwell +Barnard +applause +complementary +##uba +grunt +##mented +Bloc +Carleton +loft +noisy +##hey +490 +contrasted +##inator +##rief +##centric +##fica +Cantonese +Blanc +Lausanne +License +artifact +##ddin +rot +Amongst +Prakash +RF +##topia +milestone +##vard +Winters +Mead +churchyard +Lulu +estuary +##ind +Cha +Infinity +Meadow +subsidies +##valent +CONCACAF +Ching +medicinal +navigate +Carver +Twice +abdominal +regulating +RB +toilets +Brewer +weakening +ambushed +##aut +##vignon +Lansing +unacceptable +reliance +stabbing +##mpo +##naire +Interview +##ested +##imed +bearings +##lts +Rashid +##iation +authenticity +vigorous +##frey +##uel +biologist +NFC +##rmaid +##wash +Makes +##aunt +##steries +withdrawing +##qa +Buccaneers +bleed +inclination +stain +##ilo +##ppel +Torre +privileged +cereal +trailers +alumnus +neon +Cochrane +Mariana +caress +##47 +##ients +experimentation +Window +convict +signaled +##YP +rower +Pharmacy +interacting +241 +Strings +dominating +kinase +Dinamo +Wire +pains +sensations +##suse +Twenty20 +##39 +spotlight +##hend +elemental +##pura +Jameson +Swindon +honoring +pained +##ediatric +##lux +Psychological +assemblies +ingredient +Martial +Penguins +beverage +Monitor +mysteries +##ION +emigration +mused +##sique +crore +AMC +Funding +Chinatown +Establishment +Finalist +enjoyable +1756 +##mada +##rams +NO +newborn +CS +comprehend +Invisible +Siemens +##acon +246 +contraction +##volving +##moration +##rok +montane +##ntation +Galloway +##llow +Verity +directorial +pearl +Leaning +##rase +Fernandez +swallowing +Automatic +Madness +haunting +paddle +##UE +##rrows +##vies +##zuki +##bolt +##iber +Fender +emails +paste +##lancing +hind +homestead +hopeless +##dles +Rockies +garlic +fatty +shrieked +##ismic +Gillian +Inquiry +Schultz +XML +##cius +##uld +Domesday +grenades +northernmost +##igi +Tbilisi +optimistic +##poon +Refuge +stacks +Bose +smash +surreal +Nah +Straits +Conquest +##roo +##weet +##kell +Gladys +CH +##lim +##vitation +Doctorate +NRHP +knocks +Bey +Romano +##pile +242 +Diamonds +strides +eclectic +Betsy +clade +##hady +##leashed +dissolve +moss +Suburban +silvery +##bria +tally +turtles +##uctive +finely +industrialist +##nary +Ernesto +oz +pact +loneliness +##hov +Tomb +multinational +risked +Layne +USL +ne +##quiries +Ad +Message +Kamen +Kristen +reefs +implements +##itative +educators +garments +gunshot +##essed +##rve +Montevideo +vigorously +Stamford +assemble +packaged +##same +état +Viva +paragraph +##eter +##wire +Stick +Navajo +MCA +##pressing +ensembles +ABA +##zor +##llus +Partner +raked +##BI +Iona +thump +Celeste +Kiran +##iscovered +##rith +inflammation +##arel +Features +loosened +##yclic +Deluxe +Speak +economical +Frankenstein +Picasso +showcased +##zad +##eira +##planes +##linear +##overs +monsoon +prosecutors +slack +Horses +##urers +Angry +coughing +##truder +Questions +##tō +##zak +challenger +clocks +##ieving +Newmarket +##acle +cursing +stimuli +##mming +##qualified +slapping +##vasive +narration +##kini +Advertising +CSI +alliances +mixes +##yes +covert +amalgamation +reproduced +##ardt +##gis +1648 +id +Annette +Boots +Champagne +Brest +Daryl +##emon +##jou +##llers +Mean +adaptive +technicians +##pair +##usal +Yoga +fronts +leaping +Jul +harvesting +keel +##44 +petitioned +##lved +yells +Endowment +proponent +##spur +##tised +##zal +Homes +Includes +##ifer +##oodoo +##rvette +awarding +mirrored +ransom +Flute +outlook +##ganj +DVDs +Sufi +frontman +Goddard +barren +##astic +Suicide +hillside +Harlow +Lau +notions +Amnesty +Homestead +##irt +GE +hooded +umpire +mustered +Catch +Masonic +##erd +Dynamics +Equity +Oro +Charts +Mussolini +populace +muted +accompaniment +##lour +##ndes +ignited +##iferous +##laced +##atch +anguish +registry +##tub +##hards +##neer +251 +Hooker +uncomfortably +##6th +##ivers +Catalina +MiG +giggling +1754 +Dietrich +Kaladin +pricing +##quence +Sabah +##lving +##nical +Gettysburg +Vita +Telecom +Worst +Palais +Pentagon +##brand +##chichte +Graf +unnatural +1715 +bio +##26 +Radcliffe +##utt +chatting +spices +##aus +untouched +##eper +Doll +turkey +Syndicate +##rlene +##JP +##roots +Como +clashed +modernization +1757 +fantasies +##iating +dissipated +Sicilian +inspect +sensible +reputed +##final +Milford +poised +RC +metabolic +Tobacco +Mecca +optimization +##heat +lobe +rabbits +NAS +geologist +##liner +Kilda +carpenter +nationalists +##brae +summarized +##venge +Designer +misleading +beamed +##meyer +Matrix +excuses +##aines +##biology +401 +Moose +drafting +Sai +##ggle +Comprehensive +dripped +skate +##WI +##enan +##ruk +narrower +outgoing +##enter +##nounce +overseen +##structure +travellers +banging +scarred +##thing +##arra +Ebert +Sometime +##nated +BAFTA +Hurricanes +configurations +##MLL +immortality +##heus +gothic +##mpest +clergyman +viewpoint +Maxim +Instituto +emitted +quantitative +1689 +Consortium +##rsk +Meat +Tao +swimmers +Shaking +Terence +mainline +##linity +Quantum +##rogate +Nair +banquet +39th +reprised +lagoon +subdivisions +synonymous +incurred +password +sprung +##vere +Credits +Petersen +Faces +##vu +statesman +Zombie +gesturing +##going +Sergey +dormant +possessive +totals +southward +Ángel +##odies +HM +Mariano +Ramirez +Wicked +impressions +##Net +##cap +##ème +Transformers +Poker +RIAA +Redesignated +##chuk +Harcourt +Peña +spacious +tinged +alternatively +narrowing +Brigham +authorization +Membership +Zeppelin +##amed +Handball +steer +##orium +##rnal +##rops +Committees +endings +##MM +##yung +ejected +grams +##relli +Birch +Hilary +Stadion +orphan +clawed +##kner +Motown +Wilkins +ballads +outspoken +##ancipation +##bankment +##cheng +Advances +harvested +novelty +ineligible +oversees +##´s +obeyed +inevitably +Kingdoms +burying +Fabian +relevance +Tatiana +##MCA +sarcastic +##onda +Akron +229 +sandwiches +Adobe +Maddox +##azar +Hunting +##onized +Smiling +##tology +Juventus +Leroy +Poets +attach +lo +##rly +##film +Structure +##igate +olds +projections +SMS +outnumbered +##tase +judiciary +paramilitary +playfully +##rsing +##tras +Chico +Vin +informally +abandonment +##russ +Baroness +injuring +octagonal +deciduous +##nea +##olm +Hz +Norwood +poses +Marissa +alerted +willed +##KS +Dino +##ddler +##vani +Barbie +Thankfully +625 +bicycles +shimmering +##tinuum +##wolf +Chesterfield +##idy +##urgency +Knowles +sweetly +Ventures +##ponents +##valence +Darryl +Powerplant +RAAF +##pec +Kingsley +Parramatta +penetrating +spectacle +##inia +Marlborough +residual +compatibility +hike +Underwood +depleted +ministries +##odus +##ropriation +rotting +Faso +##inn +Happiness +Lille +Suns +cookie +rift +warmly +##lvin +Bugs +Gotham +Gothenburg +Properties +##seller +##ubi +Created +MAC +Noelle +Requiem +Ulysses +##ails +franchises +##icious +##rwick +celestial +kinetic +720 +STS +transmissions +amplitude +forums +freeing +reptiles +tumbling +##continent +##rising +##tropy +physiology +##uster +Loves +bodied +neutrality +Neumann +assessments +Vicky +##hom +hampered +##uku +Custom +timed +##eville +##xious +elastic +##section +rig +stilled +shipment +243 +artworks +boulders +Bournemouth +##hly +##LF +##linary +rumored +##bino +##drum +Chun +Freiburg +##dges +Equality +252 +Guadalajara +##sors +##taire +Roach +cramped +##ultural +Logistics +Punch +fines +Lai +caravan +##55 +lame +Collector +pausing +315 +migrant +hawk +signalling +##erham +##oughs +Demons +surfing +Rana +insisting +Wien +adolescent +##jong +##rera +##umba +Regis +brushes +##iman +residues +storytelling +Consider +contrasting +regeneration +##elling +##hlete +afforded +reactors +costing +##biotics +##gat +##евич +chanting +secondly +confesses +##ikos +##uang +##ronological +##− +Giacomo +##eca +vaudeville +weeds +rejecting +revoked +affluent +fullback +progresses +geologic +proprietor +replication +gliding +recounted +##bah +##igma +Flow +ii +newcomer +##lasp +##miya +Candace +fractured +interiors +confidential +Inverness +footing +##robe +Coordinator +Westphalia +jumper +##chism +dormitory +##gno +281 +acknowledging +leveled +##éra +Algiers +migrate +Frog +Rare +##iovascular +##urous +DSO +nomadic +##iera +woken +lifeless +##graphical +##ifications +Dot +Sachs +crow +nmi +Tacoma +Weight +mushroom +RS +conditioned +##zine +Tunisian +altering +##mizing +Handicap +Patti +Monsieur +clicking +gorge +interrupting +##powerment +drawers +Serra +##icides +Specialist +##itte +connector +worshipped +##ask +consoles +tags +##iler +glued +##zac +fences +Bratislava +honeymoon +313 +A2 +disposition +Gentleman +Gilmore +glaciers +##scribed +Calhoun +convergence +Aleppo +shortages +##43 +##orax +##worm +##codes +##rmal +neutron +##ossa +Bloomberg +Salford +periodicals +##ryan +Slayer +##ynasties +credentials +##tista +surveyor +File +stinging +unnoticed +Medici +ecstasy +espionage +Jett +Leary +circulating +bargaining +concerto +serviced +37th +HK +##fueling +Delilah +Marcia +graded +##join +Kaplan +feasible +##nale +##yt +Burnley +dreadful +ministerial +Brewster +Judah +##ngled +##rrey +recycled +Iroquois +backstage +parchment +##numbered +Kern +Motorsports +Organizations +##mini +Seems +Warrington +Dunbar +Ezio +##eor +paralyzed +Ara +yeast +##olis +cheated +reappeared +banged +##ymph +##dick +Lyndon +glide +Mat +##natch +Hotels +Household +parasite +irrelevant +youthful +##smic +##tero +##anti +2d +Ignacio +squash +##nets +shale +##اد +Abrams +##oese +assaults +##dier +##otte +Swamp +287 +Spurs +##economic +Fargo +auditioned +##mé +Haas +une +abbreviation +Turkic +##tisfaction +favorites +specials +##lial +Enlightenment +Burkina +##vir +Comparative +Lacrosse +elves +##lerical +##pear +Borders +controllers +##villa +excelled +##acher +##varo +camouflage +perpetual +##ffles +devoid +schooner +##bered +##oris +Gibbons +Lia +discouraged +sue +##gnition +Excellent +Layton +noir +smack +##ivable +##evity +##lone +Myra +weaken +weaponry +##azza +Shake +backbone +Certified +clown +occupational +caller +enslaved +soaking +Wexford +perceive +shortlisted +##pid +feminism +Bari +Indie +##avelin +##ldo +Hellenic +Hundreds +Savings +comedies +Honors +Mohawk +Told +coded +Incorporated +hideous +trusts +hose +Calais +Forster +Gabon +Internationale +AK +Colour +##UM +##heist +McGregor +localized +##tronomy +Darrell +##iara +squirrel +freaked +##eking +##manned +##ungen +radiated +##dua +commence +Donaldson +##iddle +MR +SAS +Tavern +Teenage +admissions +Instruments +##ilizer +Konrad +contemplated +##ductor +Jing +Reacher +recalling +Dhabi +emphasizing +illumination +##tony +legitimacy +Goethe +Ritter +McDonnell +Polar +Seconds +aspiring +derby +tunic +##rmed +outlines +Changing +distortion +##cter +Mechanics +##urly +##vana +Egg +Wolverine +Stupid +centralized +knit +##Ms +Saratoga +Ogden +storylines +##vres +lavish +beverages +##grarian +Kyrgyzstan +forcefully +superb +Elm +Thessaloniki +follower +Plants +slang +trajectory +Nowadays +Bengals +Ingram +perch +coloring +carvings +doubtful +##aph +##gratulations +##41 +Curse +253 +nightstand +Campo +Meiji +decomposition +##giri +McCormick +Yours +##amon +##bang +Texans +injunction +organise +periodical +##peculative +oceans +##aley +Success +Lehigh +##guin +1730 +Davy +allowance +obituary +##tov +treasury +##wayne +euros +readiness +systematically +##stered +##igor +##xen +##cliff +##lya +Send +##umatic +Celtics +Judiciary +425 +propagation +rebellious +##ims +##lut +Dal +##ayman +##cloth +Boise +pairing +Waltz +torment +Hatch +aspirations +diaspora +##hame +Rank +237 +Including +Muir +chained +toxicity +Université +##aroo +Mathews +meadows +##bio +Editing +Khorasan +##them +##ahn +##bari +##umes +evacuate +##sium +gram +kidnap +pinning +##diation +##orms +beacon +organising +McGrath +##ogist +Qur +Tango +##ceptor +##rud +##cend +##cie +##jas +##sided +Tuscany +Venture +creations +exhibiting +##rcerer +##tten +Butcher +Divinity +Pet +Whitehead +falsely +perished +handy +Moines +cyclists +synthesizers +Mortal +notoriety +##ronic +Dialogue +expressive +uk +Nightingale +grimly +vineyards +Driving +relentless +compiler +##district +##tuated +Hades +medicines +objection +Answer +Soap +Chattanooga +##gogue +Haryana +Parties +Turtle +##ferred +explorers +stakeholders +##aar +##rbonne +tempered +conjecture +##tee +##hur +Reeve +bumper +stew +##church +##generate +##ilitating +##chanized +##elier +##enne +translucent +##lows +Publisher +evangelical +inherit +##rted +247 +SmackDown +bitterness +lesions +##worked +mosques +wed +##lashes +Ng +Rebels +booking +##nail +Incident +Sailing +yo +confirms +Chaplin +baths +##kled +modernist +pulsing +Cicero +slaughtered +boasted +##losure +zipper +##hales +aristocracy +halftime +jolt +unlawful +Marching +sustaining +Yerevan +bracket +ram +Markus +##zef +butcher +massage +##quisite +Leisure +Pizza +collapsing +##lante +commentaries +scripted +##disciplinary +##sused +eroded +alleging +vase +Chichester +Peacock +commencement +dice +hotter +poisonous +executions +##occo +frost +fielding +vendor +Counts +Troops +maize +Divisional +analogue +shadowy +Nuevo +Ville +radiating +worthless +Adriatic +Buy +blaze +brutally +horizontally +longed +##matical +federally +Rolf +Root +exclude +rag +agitation +Lounge +astonished +##wirl +Impossible +transformations +##IVE +##ceded +##slav +downloaded +fucked +Egyptians +Welles +##ffington +U2 +befriended +radios +##jid +archaic +compares +##ccelerator +##imated +##tosis +Hung +Scientists +Thousands +geographically +##LR +Macintosh +fluorescent +##ipur +Wehrmacht +##BR +##firmary +Chao +##ague +Boyer +##grounds +##hism +##mento +##taining +infancy +##cton +510 +Boca +##loy +1644 +ben +dong +stresses +Sweat +expressway +graders +ochreous +nets +Lawn +thirst +Uruguayan +satisfactory +##tracts +baroque +rusty +##ław +Shen +Gdańsk +chickens +##graving +Hodge +Papal +SAT +bearer +##ogo +##rger +merits +Calendar +Highest +Skills +##ortex +Roberta +paradigm +recounts +frigates +swamps +unitary +##oker +balloons +Hawthorne +Muse +spurred +advisors +reclaimed +stimulate +fibre +pat +repeal +##dgson +##iar +##rana +anthropologist +descends +flinch +reared +##chang +##eric +##lithic +commissioning +##cumenical +##lume +##rchen +Wolff +##tsky +Eurasian +Nepali +Nightmare +ZIP +playback +##latz +##vington +Warm +##75 +Martina +Rollins +Saetan +Variations +sorting +##م +530 +Joaquin +Ptolemy +thinner +##iator +##pticism +Cebu +Highlanders +Linden +Vanguard +##SV +##mor +##ulge +ISSN +cartridges +repression +Étienne +311 +Lauderdale +commodities +null +##rb +1720 +gearbox +##reator +Ang +Forgotten +dubious +##rls +##dicative +##phate +Groove +Herrera +##çais +Collections +Maximus +##published +Fell +Qualification +filtering +##tized +Roe +hazards +##37 +##lative +##tröm +Guadalupe +Tajikistan +Preliminary +fronted +glands +##paper +##iche +##iding +Cairns +rallies +Location +seduce +##mple +BYU +##itic +##FT +Carmichael +Prentice +songwriters +forefront +Physicians +##rille +##zee +Preparatory +##cherous +UV +##dized +Navarro +misses +##nney +Inland +resisting +##sect +Hurt +##lino +galaxies +##raze +Institutions +devote +##lamp +##ciating +baron +##bracing +Hess +operatic +##CL +##ος +Chevalier +Guiana +##lattered +Fed +##cuted +##smo +Skull +denies +236 +Waller +##mah +Sakura +mole +nominate +sermons +##bering +widowed +##röm +Cavendish +##struction +Nehru +Revelation +doom +Gala +baking +Nr +Yourself +banning +Individuals +Sykes +orchestrated +630 +Phone +steered +620 +specialising +starvation +##AV +##alet +##upation +seductive +##jects +##zure +Tolkien +Benito +Wizards +Submarine +dictator +Duo +Caden +approx +basins +##nc +shrink +##icles +##sponsible +249 +mit +outpost +##bayashi +##rouse +##tl +Jana +Lombard +RBIs +finalized +humanities +##function +Honorable +tomato +##iot +Pie +tee +##pect +Beaufort +Ferris +bucks +##graduate +##ocytes +Directory +anxiously +##nating +flanks +##Ds +virtues +##believable +Grades +criterion +manufactures +sourced +##balt +##dance +##tano +Ying +##BF +##sett +adequately +blacksmith +totaled +trapping +expanse +Historia +Worker +Sense +ascending +housekeeper +##oos +Crafts +Resurrection +##verty +encryption +##aris +##vat +##pox +##runk +##iability +gazes +spying +##ths +helmets +wired +##zophrenia +Cheung +WR +downloads +stereotypes +239 +Lucknow +bleak +Bragg +hauling +##haft +prohibit +##ermined +##castle +barony +##hta +Typhoon +antibodies +##ascism +Hawthorn +Kurdistan +Minority +Gorge +Herr +appliances +disrupt +Drugs +Lazarus +##ilia +##ryo +##tany +Gotta +Masovian +Roxy +choreographed +##rissa +turbulent +##listed +Anatomy +exiting +##det +##isław +580 +Kaufman +sage +##apa +Symposium +##rolls +Kaye +##ptera +##rocław +jerking +##menclature +Guo +M1 +resurrected +trophies +##lard +Gathering +nestled +serpent +Dow +reservoirs +Claremont +arbitration +chronicle +eki +##arded +##zers +##mmoth +Congregational +Astronomical +NE +RA +Robson +Scotch +modelled +slashed +##imus +exceeds +##roper +##utile +Laughing +vascular +superficial +##arians +Barclay +Caucasian +classmate +sibling +Kimberly +Shreveport +##ilde +##liche +Cheney +Deportivo +Veracruz +berries +##lase +Bed +MI +Anatolia +Mindanao +broadband +##olia +##arte +##wab +darts +##immer +##uze +believers +ordinance +violate +##wheel +##ynth +Alongside +Coupe +Hobbs +arrondissement +earl +townland +##dote +##lihood +##sla +Ghosts +midfield +pulmonary +##eno +cues +##gol +##zda +322 +Siena +Sultanate +Bradshaw +Pieter +##thical +Raceway +bared +competence +##ssent +Bet +##urer +##ła +Alistair +Göttingen +appropriately +forge +##osterone +##ugen +DL +345 +convoys +inventions +##resses +##cturnal +Fay +Integration +slash +##roats +Widow +barking +##fant +1A +Hooper +##cona +##runched +unreliable +##emont +##esign +##stabulary +##stop +Journalists +bony +##iba +##trata +##ège +horrific +##bish +Jocelyn +##rmon +##apon +##cier +trainers +##ulatory +1753 +BR +corpus +synthesized +##bidden +##rafford +Elgin +##entry +Doherty +clockwise +##played +spins +##ample +##bley +Cope +constructions +seater +warlord +Voyager +documenting +fairies +##viator +Lviv +jewellery +suites +##gold +Maia +NME +##eavor +##kus +Eugène +furnishings +##risto +MCC +Metropolis +Older +Telangana +##mpus +amplifier +supervising +1710 +buffalo +cushion +terminating +##powering +steak +Quickly +contracting +dem +sarcastically +Elsa +##hein +bastards +narratives +Takes +304 +composure +typing +variance +##ifice +Softball +##rations +McLaughlin +gaped +shrines +##hogany +Glamorgan +##icle +##nai +##ntin +Fleetwood +Woodland +##uxe +fictitious +shrugs +##iper +BWV +conform +##uckled +Launch +##ductory +##mized +Tad +##stituted +##free +Bel +Chávez +messing +quartz +##iculate +##folia +##lynn +ushered +##29 +##ailing +dictated +Pony +##opsis +precinct +802 +Plastic +##ughter +##uno +##porated +Denton +Matters +SPD +hating +##rogen +Essential +Deck +Dortmund +obscured +##maging +Earle +##bred +##ittle +##ropolis +saturated +##fiction +##ression +Pereira +Vinci +mute +warehouses +##ún +biographies +##icking +sealing +##dered +executing +pendant +##wives +murmurs +##oko +substrates +symmetrical +Susie +##mare +Yusuf +analogy +##urage +Lesley +limitation +##rby +##ío +disagreements +##mise +embroidered +nape +unarmed +Sumner +Stores +dwell +Wilcox +creditors +##rivatization +##shes +##amia +directs +recaptured +scouting +McGuire +cradle +##onnell +Sato +insulin +mercenary +tolerant +Macquarie +transitions +cradled +##berto +##ivism +##yotes +FF +Ke +Reach +##dbury +680 +##bill +##oja +##sui +prairie +##ogan +reactive +##icient +##rits +Cyclone +Sirius +Survival +Pak +##coach +##trar +halves +Agatha +Opus +contrasts +##jection +ominous +##iden +Baylor +Woodrow +duct +fortification +intercourse +##rois +Colbert +envy +##isi +Afterward +geared +##flections +accelerate +##lenching +Witness +##rrer +Angelina +Material +assertion +misconduct +Nix +cringed +tingling +##eti +##gned +Everest +disturb +sturdy +##keepers +##vied +Profile +heavenly +##kova +##victed +translating +##sses +316 +Invitational +Mention +martyr +##uristic +Barron +hardness +Nakamura +405 +Genevieve +reflections +##falls +jurist +##LT +Pyramid +##yme +Shoot +heck +linguist +##tower +Ives +superiors +##leo +Achilles +##phological +Christophe +Padma +precedence +grassy +Oral +resurrection +##itting +clumsy +##lten +##rue +huts +##stars +Equal +##queduct +Devin +Gaga +diocesan +##plating +##upe +##graphers +Patch +Scream +hail +moaning +tracts +##hdi +Examination +outsider +##ergic +##oter +Archipelago +Havilland +greenish +tilting +Aleksandr +Konstantin +warship +##emann +##gelist +##ought +billionaire +##blivion +321 +Hungarians +transplant +##jured +##fters +Corbin +autism +pitchers +Garner +thence +Scientology +transitioned +integrating +repetitive +##dant +Rene +vomit +##burne +1661 +Researchers +Wallis +insulted +wavy +##wati +Ewing +excitedly +##kor +frescoes +injustice +##achal +##lumber +##úl +novella +##sca +Liv +##enstein +##river +monstrous +topping +downfall +looming +sinks +trillion +##pont +Effect +##phi +##urley +Sites +catchment +##H1 +Hopper +##raiser +1642 +Maccabi +lance +##chia +##sboro +NSA +branching +retorted +tensor +Immaculate +drumming +feeder +##mony +Dyer +homicide +Temeraire +fishes +protruding +skins +orchards +##nso +inlet +ventral +##finder +Asiatic +Sul +1688 +Melinda +assigns +paranormal +gardening +Tau +calming +##inge +##crow +regimental +Nik +fastened +correlated +##gene +##rieve +Sick +##minster +##politan +hardwood +hurled +##ssler +Cinematography +rhyme +Montenegrin +Packard +debating +##itution +Helens +Trick +Museums +defiance +encompassed +##EE +##TU +##nees +##uben +##ünster +##nosis +435 +Hagen +cinemas +Corbett +commended +##fines +##oman +bosses +ripe +scraping +##loc +filly +Saddam +pointless +Faust +Orléans +Syriac +##♭ +longitude +##ropic +Alfa +bliss +gangster +##ckling +SL +blending +##eptide +##nner +bends +escorting +##bloid +##quis +burials +##sle +##è +Ambulance +insults +##gth +Antrim +unfolded +##missible +splendid +Cure +warily +Saigon +Waste +astonishment +boroughs +##VS +##dalgo +##reshing +##usage +rue +marital +versatile +unpaid +allotted +bacterium +##coil +##cue +Dorothea +IDF +##location +##yke +RPG +##tropical +devotees +liter +##pree +Johnstone +astronaut +attends +pollen +periphery +doctrines +meta +showered +##tyn +GO +Huh +laude +244 +Amar +Christensen +Ping +Pontifical +Austen +raiding +realities +##dric +urges +##dek +Cambridgeshire +##otype +Cascade +Greenberg +Pact +##cognition +##aran +##urion +Riot +mimic +Eastwood +##imating +reversal +##blast +##henian +Pitchfork +##sunderstanding +Staten +WCW +lieu +##bard +##sang +experimenting +Aquino +##lums +TNT +Hannibal +catastrophic +##lsive +272 +308 +##otypic +41st +Highways +aggregator +##fluenza +Featured +Reece +dispatch +simulated +##BE +Communion +Vinnie +hardcover +inexpensive +til +##adores +groundwater +kicker +blogs +frenzy +##wala +dealings +erase +Anglia +##umour +Hapoel +Marquette +##raphic +##tives +consult +atrocities +concussion +##érard +Decree +ethanol +##aen +Rooney +##chemist +##hoot +1620 +menacing +Schuster +##bearable +laborers +sultan +Juliana +erased +onstage +##ync +Eastman +##tick +hushed +##yrinth +Lexie +Wharton +Lev +##PL +Testing +Bangladeshi +##bba +##usions +communicated +integers +internship +societal +##odles +Loki +ET +Ghent +broadcasters +Unix +##auer +Kildare +Yamaha +##quencing +##zman +chilled +##rapped +##uant +Duval +sentiments +Oliveira +packets +Horne +##rient +Harlan +Mirage +invariant +##anger +##tensive +flexed +sweetness +##wson +alleviate +insulting +limo +Hahn +##llars +##hesia +##lapping +buys +##oaming +mocked +pursuits +scooted +##conscious +##ilian +Ballad +jackets +##kra +hilly +##cane +Scenic +McGraw +silhouette +whipping +##roduced +##wark +##chess +##rump +Lemon +calculus +demonic +##latine +Bharatiya +Govt +Que +Trilogy +Ducks +Suit +stairway +##ceipt +Isa +regulator +Automobile +flatly +##buster +##lank +Spartans +topography +Tavi +usable +Chartered +Fairchild +##sance +##vyn +Digest +nuclei +typhoon +##llon +Alvarez +DJs +Grimm +authoritative +firearm +##chschule +Origins +lair +unmistakable +##xial +##cribing +Mouth +##genesis +##shū +##gaon +##ulter +Jaya +Neck +##UN +##oing +##static +relativity +##mott +##utive +##esan +##uveau +BT +salts +##roa +Dustin +preoccupied +Novgorod +##asus +Magnum +tempting +##histling +##ilated +Musa +##ghty +Ashland +pubs +routines +##etto +Soto +257 +Featuring +Augsburg +##alaya +Bit +loomed +expects +##abby +##ooby +Auschwitz +Pendleton +vodka +##sent +rescuing +systemic +##inet +##leg +Yun +applicant +revered +##nacht +##ndas +Muller +characterization +##patient +##roft +Carole +##asperated +Amiga +disconnected +gel +##cologist +Patriotic +rallied +assign +veterinary +installing +##cedural +258 +Jang +Parisian +incarcerated +stalk +##iment +Jamal +McPherson +Palma +##oken +##viation +512 +Rourke +irrational +##rippled +Devlin +erratic +##NI +##payers +Ni +engages +Portal +aesthetics +##rrogance +Milne +assassins +##rots +335 +385 +Cambodian +Females +fellows +si +##block +##otes +Jayne +Toro +flutter +##eera +Burr +##lanche +relaxation +##fra +Fitzroy +##undy +1751 +261 +comb +conglomerate +ribbons +veto +##Es +casts +##ege +1748 +Ares +spears +spirituality +comet +##nado +##yeh +Veterinary +aquarium +yer +Councils +##oked +##ynamic +Malmö +remorse +auditions +drilled +Hoffmann +Moe +Nagoya +Yacht +##hakti +##race +##rrick +Talmud +coordinating +##EI +##bul +##his +##itors +##ligent +##uerra +Narayan +goaltender +taxa +##asures +Det +##mage +Infinite +Maid +bean +intriguing +##cription +gasps +socket +##mentary +##reus +sewing +transmitting +##different +##furbishment +##traction +Grimsby +sprawling +Shipyard +##destine +##hropic +##icked +trolley +##agi +##lesh +Josiah +invasions +Content +firefighters +intro +Lucifer +subunit +Sahib +Myrtle +inhibitor +maneuvers +##teca +Wrath +slippery +##versing +Shoes +##dial +##illiers +##luded +##mmal +##pack +handkerchief +##edestal +##stones +Fusion +cumulative +##mell +##cacia +##rudge +##utz +foe +storing +swiped +##meister +##orra +batter +strung +##venting +##kker +Doo +Taste +immensely +Fairbanks +Jarrett +Boogie +1746 +mage +Kick +legislators +medial +##ilon +##logies +##ranton +Hybrid +##uters +Tide +deportation +Metz +##secration +##virus +UFO +##fell +##orage +##raction +##rrigan +1747 +fabricated +##BM +##GR +##rter +muttering +theorist +##tamine +BMG +Kincaid +solvent +##azed +Thin +adorable +Wendell +ta +##viour +pulses +##pologies +counters +exposition +sewer +Luciano +Clancy +##angelo +##riars +Showtime +observes +frankly +##oppy +Bergman +lobes +timetable +##bri +##uest +FX +##dust +##genus +Glad +Helmut +Meridian +##besity +##ontaine +Revue +miracles +##titis +PP +bluff +syrup +307 +Messiah +##erne +interfering +picturesque +unconventional +dipping +hurriedly +Kerman +248 +Ethnic +Toward +acidic +Harrisburg +##65 +intimidating +##aal +Jed +Pontiac +munitions +##nchen +growling +mausoleum +##ération +##wami +Cy +aerospace +caucus +Doing +##around +##miring +Cuthbert +##poradic +##rovisation +##wth +evaluating +##scraper +Belinda +owes +##sitic +##thermal +##fast +economists +##lishing +##uerre +##ân +credible +##koto +Fourteen +cones +##ebrates +bookstore +towels +##phony +Appearance +newscasts +##olin +Karin +Bingham +##elves +1680 +306 +disks +##lston +##secutor +Levant +##vout +Micro +snuck +##ogel +##racker +Exploration +drastic +##kening +Elsie +endowment +##utnant +Blaze +##rrosion +leaking +45th +##rug +##uernsey +760 +Shapiro +cakes +##ehan +##mei +##ité +##kla +repetition +successively +Friendly +Île +Koreans +Au +Tirana +flourish +Spirits +Yao +reasoned +##leam +Consort +cater +marred +ordeal +supremacy +##ritable +Paisley +euro +healer +portico +wetland +##kman +restart +##habilitation +##zuka +##Script +emptiness +communion +##CF +##inhabited +##wamy +Casablanca +pulsed +##rrible +##safe +395 +Dual +Terrorism +##urge +##found +##gnolia +Courage +patriarch +segregated +intrinsic +##liography +##phe +PD +convection +##icidal +Dharma +Jimmie +texted +constituents +twitch +##calated +##mitage +##ringing +415 +milling +##geons +Armagh +Geometridae +evergreen +needy +reflex +template +##pina +Schubert +##bruck +##icted +##scher +##wildered +1749 +Joanne +clearer +##narl +278 +Print +automation +consciously +flashback +occupations +##ests +Casimir +differentiated +policing +repay +##aks +##gnesium +Evaluation +commotion +##CM +##smopolitan +Clapton +mitochondrial +Kobe +1752 +Ignoring +Vincenzo +Wet +bandage +##rassed +##unate +Maris +##eted +##hetical +figuring +##eit +##nap +leopard +strategically +##reer +Fen +Iain +##ggins +##pipe +Matteo +McIntyre +##chord +##feng +Romani +asshole +flopped +reassure +Founding +Styles +Torino +patrolling +##erging +##ibrating +##ructural +sincerity +##ät +##teacher +Juliette +##cé +##hog +##idated +##span +Winfield +##fender +##nast +##pliant +1690 +Bai +Je +Saharan +expands +Bolshevik +rotate +##root +Britannia +Severn +##cini +##gering +##say +sly +Steps +insertion +rooftop +Piece +cuffs +plausible +##zai +Provost +semantic +##data +##vade +##cimal +IPA +indictment +Libraries +flaming +highlands +liberties +##pio +Elders +aggressively +##pecific +Decision +pigeon +nominally +descriptive +adjustments +equestrian +heaving +##mour +##dives +##fty +##yton +intermittent +##naming +##sets +Calvert +Casper +Tarzan +##kot +Ramírez +##IB +##erus +Gustavo +Roller +vaulted +##solation +##formatics +##tip +Hunger +colloquially +handwriting +hearth +launcher +##idian +##ilities +##lind +##locating +Magdalena +Soo +clubhouse +##kushima +##ruit +Bogotá +Organic +Worship +##Vs +##wold +upbringing +##kick +groundbreaking +##urable +##ván +repulsed +##dira +##ditional +##ici +melancholy +##bodied +##cchi +404 +concurrency +H₂O +bouts +##gami +288 +Leto +troll +##lak +advising +bundled +##nden +lipstick +littered +##leading +##mogeneous +Experiment +Nikola +grove +##ogram +Mace +##jure +cheat +Annabelle +Tori +lurking +Emery +Walden +##riz +paints +Markets +brutality +overrun +##agu +##sat +din +ostensibly +Fielding +flees +##eron +Pound +ornaments +tornadoes +##nikov +##organisation +##reen +##Works +##ldred +##olten +##stillery +soluble +Mata +Grimes +Léon +##NF +coldly +permitting +##inga +##reaked +Agents +hostess +##dl +Dyke +Kota +avail +orderly +##saur +##sities +Arroyo +##ceps +##egro +Hawke +Noctuidae +html +seminar +##ggles +##wasaki +Clube +recited +##sace +Ascension +Fitness +dough +##ixel +Nationale +##solidate +pulpit +vassal +570 +Annapolis +bladder +phylogenetic +##iname +convertible +##ppan +Comet +paler +##definite +Spot +##dices +frequented +Apostles +slalom +##ivision +##mana +##runcated +Trojan +##agger +##iq +##league +Concept +Controller +##barian +##curate +##spersed +##tring +engulfed +inquired +##hmann +286 +##dict +##osy +##raw +MacKenzie +su +##ienced +##iggs +##quitaine +bisexual +##noon +runways +subsp +##! +##" +### +##$ +##% +##& +##' +##( +##) +##* +##+ +##, +##- +##. +##/ +##: +##; +##< +##= +##> +##? +##@ +##[ +##\ +##] +##^ +##_ +##` +##{ +##| +##} +##~ +##¡ +##¢ +##£ +##¥ +##§ +##¨ +##© +##ª +##« +##¬ +##® +##± +##´ +##µ +##¶ +##· +##¹ +##º +##» +##¼ +##¾ +##¿ +##À +##Á +## +##Ä +##Å +##Æ +##Ç +##È +##É +##Í +##Î +##Ñ +##Ó +##Ö +##× +##Ø +##Ú +##Ü +##Þ +##â +##ã +##æ +##ç +##î +##ï +##ð +##ñ +##ô +##õ +##÷ +##û +##þ +##ÿ +##Ā +##ą +##Ć +##Č +##ď +##Đ +##đ +##ē +##ė +##ę +##ě +##ğ +##ġ +##Ħ +##ħ +##ĩ +##Ī +##İ +##ļ +##Ľ +##ľ +##Ł +##ņ +##ň +##ŋ +##Ō +##ŏ +##ő +##Œ +##œ +##ř +##Ś +##ś +##Ş +##Š +##Ţ +##ţ +##ť +##ũ +##ŭ +##ů +##ű +##ų +##ŵ +##ŷ +##ź +##Ż +##ż +##Ž +##ž +##Ə +##ƒ +##ơ +##ư +##ǎ +##ǐ +##ǒ +##ǔ +##ǫ +##Ș +##Ț +##ț +##ɐ +##ɑ +##ɔ +##ɕ +##ə +##ɛ +##ɡ +##ɣ +##ɨ +##ɪ +##ɲ +##ɾ +##ʀ +##ʁ +##ʂ +##ʃ +##ʊ +##ʋ +##ʌ +##ʐ +##ʑ +##ʒ +##ʔ +##ʰ +##ʲ +##ʳ +##ʷ +##ʻ +##ʼ +##ʾ +##ʿ +##ˈ +##ː +##ˡ +##ˢ +##ˣ +##́ +##̃ +##̍ +##̯ +##͡ +##Α +##Β +##Γ +##Δ +##Ε +##Η +##Θ +##Ι +##Κ +##Λ +##Μ +##Ν +##Ο +##Π +##Σ +##Τ +##Φ +##Χ +##Ψ +##Ω +##ά +##έ +##ή +##ί +##β +##γ +##δ +##ε +##ζ +##η +##θ +##ι +##κ +##λ +##μ +##ξ +##ο +##π +##ρ +##σ +##τ +##υ +##φ +##χ +##ψ +##ω +##ό +##ύ +##ώ +##І +##Ј +##А +##Б +##В +##Г +##Д +##Е +##Ж +##З +##И +##К +##Л +##М +##Н +##О +##П +##Р +##С +##Т +##У +##Ф +##Х +##Ц +##Ч +##Ш +##Э +##Ю +##Я +##б +##в +##г +##д +##ж +##з +##к +##л +##м +##п +##с +##т +##у +##ф +##х +##ц +##ч +##ш +##щ +##ъ +##ы +##ь +##э +##ю +##ё +##і +##ї +##ј +##њ +##ћ +##Ա +##Հ +##ա +##ե +##ի +##կ +##մ +##յ +##ն +##ո +##ս +##տ +##ր +##ւ +##ְ +##ִ +##ֵ +##ֶ +##ַ +##ָ +##ֹ +##ּ +##א +##ב +##ג +##ד +##ה +##ו +##ז +##ח +##ט +##י +##כ +##ל +##ם +##מ +##ן +##נ +##ס +##ע +##פ +##צ +##ק +##ר +##ש +##ת +##، +##ء +##آ +##أ +##إ +##ئ +##ا +##ب +##ت +##ث +##ج +##ح +##خ +##ذ +##ز +##س +##ش +##ص +##ض +##ط +##ظ +##ع +##غ +##ف +##ق +##ك +##ل +##و +##ى +##َ +##ِ +##ٹ +##پ +##چ +##ک +##گ +##ہ +##ی +##ے +##ं +##आ +##क +##ग +##च +##ज +##ण +##त +##द +##ध +##न +##प +##ब +##भ +##म +##य +##र +##ल +##व +##श +##ष +##स +##ह +##ा +##ि +##ी +##ु +##े +##ो +##् +##। +##॥ +##আ +##ই +##এ +##ও +##ক +##খ +##গ +##চ +##ছ +##জ +##ট +##ত +##থ +##দ +##ধ +##ন +##প +##ব +##ম +##য +##র +##ল +##শ +##স +##হ +##় +##া +##ি +##ী +##ু +##ে +##ো +##্ +##য় +##க +##த +##ப +##ம +##ய +##ர +##ல +##வ +##ா +##ி +##ு +##் +##ร +##་ +##ག +##ང +##ད +##ན +##བ +##མ +##ར +##ལ +##ས +##ི +##ུ +##ེ +##ོ +##ა +##ე +##ი +##ლ +##ნ +##ო +##რ +##ს +##ᴬ +##ᴵ +##ᵀ +##ᵃ +##ᵇ +##ᵈ +##ᵉ +##ᵍ +##ᵏ +##ᵐ +##ᵒ +##ᵖ +##ᵗ +##ᵘ +##ᵣ +##ᵤ +##ᵥ +##ᶜ +##ᶠ +##ḍ +##Ḥ +##ḥ +##Ḩ +##ḩ +##ḳ +##ṃ +##ṅ +##ṇ +##ṛ +##ṣ +##ṭ +##ạ +##ả +##ấ +##ầ +##ẩ +##ậ +##ắ +##ế +##ề +##ể +##ễ +##ệ +##ị +##ọ +##ố +##ồ +##ổ +##ộ +##ớ +##ờ +##ợ +##ụ +##ủ +##ứ +##ừ +##ử +##ữ +##ự +##ỳ +##ỹ +##ἀ +##ἐ +##ὁ +##ὐ +##ὰ +##ὶ +##ὸ +##ῆ +##ῖ +##ῦ +##ῶ +##‐ +##‑ +##‒ +##– +##— +##― +##‖ +##‘ +##’ +##‚ +##“ +##” +##„ +##† +##‡ +##• +##… +##‰ +##′ +##″ +##⁄ +##⁰ +##ⁱ +##⁴ +##⁵ +##⁶ +##⁷ +##⁸ +##⁹ +##⁻ +##ⁿ +##₅ +##₆ +##₇ +##₈ +##₉ +##₊ +##₍ +##₎ +##ₐ +##ₑ +##ₒ +##ₓ +##ₕ +##ₖ +##ₘ +##ₚ +##ₛ +##ₜ +##₤ +##€ +##₱ +##₹ +##ℓ +##№ +##ℝ +##⅓ +##← +##↑ +##→ +##↔ +##⇌ +##⇒ +##∂ +##∈ +##∗ +##∘ +##√ +##∞ +##∧ +##∨ +##∩ +##∪ +##≈ +##≠ +##≡ +##≤ +##≥ +##⊂ +##⊆ +##⊕ +##⋅ +##─ +##│ +##■ +##● +##★ +##☆ +##☉ +##♠ +##♣ +##♥ +##♦ +##♯ +##⟨ +##⟩ +##ⱼ +##、 +##。 +##《 +##》 +##「 +##」 +##『 +##』 +##〜 +##い +##う +##え +##お +##か +##き +##く +##け +##こ +##さ +##し +##す +##せ +##そ +##た +##ち +##つ +##て +##と +##な +##に +##の +##は +##ひ +##ま +##み +##む +##め +##も +##や +##ゆ +##よ +##ら +##り +##る +##れ +##ん +##ア +##ィ +##イ +##ウ +##エ +##オ +##カ +##ガ +##キ +##ク +##グ +##コ +##サ +##シ +##ジ +##ス +##ズ +##タ +##ダ +##ッ +##テ +##デ +##ト +##ド +##ナ +##ニ +##ハ +##バ +##パ +##フ +##ブ +##プ +##マ +##ミ +##ム +##ャ +##ュ +##ラ +##リ +##ル +##レ +##ロ +##ン +##・ +##ー +##一 +##三 +##上 +##下 +##中 +##事 +##二 +##井 +##京 +##人 +##亻 +##仁 +##佐 +##侍 +##光 +##公 +##力 +##北 +##十 +##南 +##原 +##口 +##史 +##司 +##吉 +##同 +##和 +##囗 +##国 +##國 +##土 +##城 +##士 +##大 +##天 +##太 +##夫 +##女 +##子 +##宀 +##安 +##宮 +##宿 +##小 +##尚 +##山 +##島 +##川 +##州 +##平 +##年 +##心 +##愛 +##戸 +##文 +##新 +##方 +##日 +##明 +##星 +##書 +##月 +##木 +##本 +##李 +##村 +##東 +##松 +##林 +##正 +##武 +##氏 +##水 +##氵 +##江 +##河 +##海 +##版 +##犬 +##王 +##生 +##田 +##白 +##皇 +##省 +##真 +##石 +##社 +##神 +##竹 +##美 +##義 +##花 +##藤 +##西 +##谷 +##車 +##辶 +##道 +##郎 +##郡 +##部 +##野 +##金 +##長 +##門 +##陽 +##青 +##食 +##馬 +##高 +##龍 +##龸 +##사 +##씨 +##의 +##이 +##한 +##fi +##fl +##! +##( +##) +##, +##- +##/ +##: