{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9763241396143519, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "accuracy": 10.1562, "doc_norm": 8.1484, "encoder_q-embeddings": 5173.7646, "encoder_q-layer.0": 5455.7549, "encoder_q-layer.1": 5545.5479, "encoder_q-layer.10": 13934.4404, "encoder_q-layer.11": 11743.667, "encoder_q-layer.2": 6507.3579, "encoder_q-layer.3": 6540.8105, "encoder_q-layer.4": 7023.1445, "encoder_q-layer.5": 7397.0806, "encoder_q-layer.6": 9387.0137, "encoder_q-layer.7": 10704.7969, "encoder_q-layer.8": 12827.5693, "encoder_q-layer.9": 10034.2002, "epoch": 0.0, "inbatch_neg_score": 35.0212, "inbatch_pos_score": 40.625, "learning_rate": 5.000000000000001e-07, "loss": 186.6891, "norm_diff": 0.3703, "num_tokens_overlap": 5.5895, "num_tokens_union": 55.0922, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12421.0105, "preclip_grad_norm_avg": 0.0001, "query_norm": 8.5187, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7317, "sent_len_1": 66.9177, "sent_len_max_0": 18.9062, "sent_len_max_1": 190.8313, "stdk": 0.166, "stdq": 0.2206, "stdqueue_k": 0.0, "step": 100 }, { "accuracy": 14.0625, "doc_norm": 7.55, "encoder_q-embeddings": 1599.3016, "encoder_q-layer.0": 1677.9259, "encoder_q-layer.1": 1673.3319, "encoder_q-layer.10": 3894.1646, "encoder_q-layer.11": 3164.7385, "encoder_q-layer.2": 2100.9629, "encoder_q-layer.3": 2172.873, "encoder_q-layer.4": 2395.0671, "encoder_q-layer.5": 2444.3872, "encoder_q-layer.6": 2940.0808, "encoder_q-layer.7": 3446.8748, "encoder_q-layer.8": 4405.5498, "encoder_q-layer.9": 3474.2102, "epoch": 0.0, "inbatch_neg_score": 37.0427, "inbatch_pos_score": 40.1562, "learning_rate": 1.0000000000000002e-06, "loss": 101.3843, "norm_diff": 0.1674, "num_tokens_overlap": 5.5802, "num_tokens_union": 54.8779, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3835.0756, "preclip_grad_norm_avg": 0.0, "query_norm": 7.7174, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7243, "sent_len_1": 66.5886, "sent_len_max_0": 18.8863, "sent_len_max_1": 189.88, "stdk": 0.1234, "stdq": 0.1803, "stdqueue_k": 0.0, "step": 200 }, { "accuracy": 14.2578, "doc_norm": 7.3346, "encoder_q-embeddings": 1656.3473, "encoder_q-layer.0": 1556.8461, "encoder_q-layer.1": 1657.7114, "encoder_q-layer.10": 1541.0334, "encoder_q-layer.11": 1587.3755, "encoder_q-layer.2": 1867.3296, "encoder_q-layer.3": 1987.1874, "encoder_q-layer.4": 1944.4489, "encoder_q-layer.5": 1899.4171, "encoder_q-layer.6": 1806.5869, "encoder_q-layer.7": 1714.1158, "encoder_q-layer.8": 1768.2036, "encoder_q-layer.9": 1202.881, "epoch": 0.0, "inbatch_neg_score": 41.6417, "inbatch_pos_score": 42.9062, "learning_rate": 1.5e-06, "loss": 41.6766, "norm_diff": 0.1652, "num_tokens_overlap": 5.5678, "num_tokens_union": 55.0786, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2426.1267, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1694, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7129, "sent_len_1": 66.8803, "sent_len_max_0": 18.86, "sent_len_max_1": 188.54, "stdk": 0.0767, "stdq": 0.1228, "stdqueue_k": 0.0, "step": 300 }, { "accuracy": 13.9648, "doc_norm": 7.4573, "encoder_q-embeddings": 281.4982, "encoder_q-layer.0": 253.37, "encoder_q-layer.1": 230.4062, "encoder_q-layer.10": 765.3887, "encoder_q-layer.11": 712.4937, "encoder_q-layer.2": 283.7398, "encoder_q-layer.3": 284.0697, "encoder_q-layer.4": 310.7588, "encoder_q-layer.5": 286.6927, "encoder_q-layer.6": 300.8003, "encoder_q-layer.7": 305.4727, "encoder_q-layer.8": 377.4149, "encoder_q-layer.9": 363.2403, "epoch": 0.0, "inbatch_neg_score": 47.6715, "inbatch_pos_score": 48.1875, "learning_rate": 2.0000000000000003e-06, "loss": 19.9781, "norm_diff": 0.2547, "num_tokens_overlap": 5.5646, "num_tokens_union": 54.8273, "postclip_grad_norm": 1.0, "preclip_grad_norm": 562.4151, "preclip_grad_norm_avg": 0.0, "query_norm": 7.2026, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6813, "sent_len_1": 66.5269, "sent_len_max_0": 18.8087, "sent_len_max_1": 189.37, "stdk": 0.05, "stdq": 0.0933, "stdqueue_k": 0.0, "step": 400 }, { "accuracy": 16.0156, "doc_norm": 7.567, "encoder_q-embeddings": 75.5583, "encoder_q-layer.0": 87.3054, "encoder_q-layer.1": 69.0592, "encoder_q-layer.10": 326.3888, "encoder_q-layer.11": 296.4033, "encoder_q-layer.2": 81.0401, "encoder_q-layer.3": 84.4485, "encoder_q-layer.4": 93.3376, "encoder_q-layer.5": 101.3786, "encoder_q-layer.6": 108.2186, "encoder_q-layer.7": 125.7857, "encoder_q-layer.8": 181.4034, "encoder_q-layer.9": 170.2371, "epoch": 0.0, "inbatch_neg_score": 53.4206, "inbatch_pos_score": 53.7188, "learning_rate": 2.5e-06, "loss": 9.9028, "norm_diff": 0.089, "num_tokens_overlap": 5.5696, "num_tokens_union": 54.868, "postclip_grad_norm": 1.0, "preclip_grad_norm": 221.558, "preclip_grad_norm_avg": 0.0, "query_norm": 7.478, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.72, "sent_len_1": 66.5307, "sent_len_max_0": 18.8325, "sent_len_max_1": 189.8487, "stdk": 0.0362, "stdq": 0.0698, "stdqueue_k": 0.0, "step": 500 }, { "accuracy": 26.3672, "doc_norm": 7.5962, "encoder_q-embeddings": 45.304, "encoder_q-layer.0": 37.3599, "encoder_q-layer.1": 35.8121, "encoder_q-layer.10": 163.0255, "encoder_q-layer.11": 159.0199, "encoder_q-layer.2": 41.9271, "encoder_q-layer.3": 45.7976, "encoder_q-layer.4": 53.3205, "encoder_q-layer.5": 53.997, "encoder_q-layer.6": 61.0464, "encoder_q-layer.7": 68.0424, "encoder_q-layer.8": 107.6682, "encoder_q-layer.9": 109.232, "epoch": 0.01, "inbatch_neg_score": 55.7354, "inbatch_pos_score": 56.0312, "learning_rate": 3e-06, "loss": 5.9139, "norm_diff": 0.0069, "num_tokens_overlap": 5.5697, "num_tokens_union": 55.193, "postclip_grad_norm": 1.0, "preclip_grad_norm": 116.1296, "preclip_grad_norm_avg": 0.0, "query_norm": 7.5898, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.708, "sent_len_1": 67.0493, "sent_len_max_0": 18.9475, "sent_len_max_1": 188.8013, "stdk": 0.0349, "stdq": 0.0529, "stdqueue_k": 0.0, "step": 600 }, { "accuracy": 32.0312, "doc_norm": 7.5603, "encoder_q-embeddings": 34.0378, "encoder_q-layer.0": 26.3538, "encoder_q-layer.1": 25.6397, "encoder_q-layer.10": 88.877, "encoder_q-layer.11": 82.1513, "encoder_q-layer.2": 26.0068, "encoder_q-layer.3": 27.489, "encoder_q-layer.4": 28.4353, "encoder_q-layer.5": 27.6915, "encoder_q-layer.6": 30.1045, "encoder_q-layer.7": 34.2739, "encoder_q-layer.8": 49.009, "encoder_q-layer.9": 50.365, "epoch": 0.01, "inbatch_neg_score": 55.673, "inbatch_pos_score": 56.0, "learning_rate": 3.5000000000000004e-06, "loss": 4.5275, "norm_diff": 0.0078, "num_tokens_overlap": 5.5681, "num_tokens_union": 54.9396, "postclip_grad_norm": 1.0, "preclip_grad_norm": 64.5374, "preclip_grad_norm_avg": 0.0, "query_norm": 7.5682, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6939, "sent_len_1": 66.7269, "sent_len_max_0": 18.86, "sent_len_max_1": 191.7163, "stdk": 0.0348, "stdq": 0.0496, "stdqueue_k": 0.0, "step": 700 }, { "accuracy": 34.082, "doc_norm": 7.537, "encoder_q-embeddings": 31.9287, "encoder_q-layer.0": 24.6961, "encoder_q-layer.1": 23.363, "encoder_q-layer.10": 77.555, "encoder_q-layer.11": 77.7711, "encoder_q-layer.2": 24.3034, "encoder_q-layer.3": 25.9786, "encoder_q-layer.4": 26.7285, "encoder_q-layer.5": 27.2328, "encoder_q-layer.6": 28.2042, "encoder_q-layer.7": 34.4852, "encoder_q-layer.8": 46.6129, "encoder_q-layer.9": 50.1409, "epoch": 0.01, "inbatch_neg_score": 55.4682, "inbatch_pos_score": 55.7812, "learning_rate": 4.000000000000001e-06, "loss": 4.147, "norm_diff": 0.0099, "num_tokens_overlap": 5.5839, "num_tokens_union": 55.0028, "postclip_grad_norm": 1.0, "preclip_grad_norm": 60.4376, "preclip_grad_norm_avg": 0.0, "query_norm": 7.547, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7301, "sent_len_1": 66.7245, "sent_len_max_0": 18.815, "sent_len_max_1": 187.2413, "stdk": 0.0359, "stdq": 0.0457, "stdqueue_k": 0.0, "step": 800 }, { "accuracy": 37.793, "doc_norm": 7.524, "encoder_q-embeddings": 30.3284, "encoder_q-layer.0": 22.232, "encoder_q-layer.1": 22.6103, "encoder_q-layer.10": 69.0838, "encoder_q-layer.11": 70.3429, "encoder_q-layer.2": 22.7736, "encoder_q-layer.3": 23.5125, "encoder_q-layer.4": 24.9892, "encoder_q-layer.5": 25.0999, "encoder_q-layer.6": 27.061, "encoder_q-layer.7": 30.8521, "encoder_q-layer.8": 43.0556, "encoder_q-layer.9": 46.1945, "epoch": 0.01, "inbatch_neg_score": 55.3297, "inbatch_pos_score": 55.6875, "learning_rate": 4.5e-06, "loss": 3.8781, "norm_diff": 0.0112, "num_tokens_overlap": 5.5911, "num_tokens_union": 55.029, "postclip_grad_norm": 1.0, "preclip_grad_norm": 55.3683, "preclip_grad_norm_avg": 0.0, "query_norm": 7.5352, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7257, "sent_len_1": 66.8238, "sent_len_max_0": 18.8487, "sent_len_max_1": 187.41, "stdk": 0.036, "stdq": 0.0461, "stdqueue_k": 0.0, "step": 900 }, { "accuracy": 36.7188, "doc_norm": 7.5168, "encoder_q-embeddings": 29.875, "encoder_q-layer.0": 22.0001, "encoder_q-layer.1": 21.6902, "encoder_q-layer.10": 83.4656, "encoder_q-layer.11": 78.3829, "encoder_q-layer.2": 24.3438, "encoder_q-layer.3": 26.9733, "encoder_q-layer.4": 29.1178, "encoder_q-layer.5": 28.533, "encoder_q-layer.6": 34.8575, "encoder_q-layer.7": 43.3357, "encoder_q-layer.8": 60.8437, "encoder_q-layer.9": 60.5355, "epoch": 0.01, "inbatch_neg_score": 55.2494, "inbatch_pos_score": 55.625, "learning_rate": 5e-06, "loss": 3.7794, "norm_diff": 0.0101, "num_tokens_overlap": 5.57, "num_tokens_union": 54.9162, "postclip_grad_norm": 1.0, "preclip_grad_norm": 64.2608, "preclip_grad_norm_avg": 0.0, "query_norm": 7.5269, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6968, "sent_len_1": 66.6656, "sent_len_max_0": 18.8663, "sent_len_max_1": 190.55, "stdk": 0.0365, "stdq": 0.0449, "stdqueue_k": 0.0, "step": 1000 }, { "accuracy": 38.1836, "doc_norm": 7.5122, "encoder_q-embeddings": 27.3416, "encoder_q-layer.0": 20.4438, "encoder_q-layer.1": 19.4531, "encoder_q-layer.10": 62.3869, "encoder_q-layer.11": 70.4063, "encoder_q-layer.2": 20.5073, "encoder_q-layer.3": 21.2804, "encoder_q-layer.4": 21.176, "encoder_q-layer.5": 21.9391, "encoder_q-layer.6": 24.5838, "encoder_q-layer.7": 29.3163, "encoder_q-layer.8": 36.4631, "encoder_q-layer.9": 40.2125, "epoch": 0.01, "inbatch_neg_score": 55.217, "inbatch_pos_score": 55.5625, "learning_rate": 5.500000000000001e-06, "loss": 3.645, "norm_diff": 0.0092, "num_tokens_overlap": 5.5807, "num_tokens_union": 55.0547, "postclip_grad_norm": 1.0, "preclip_grad_norm": 50.6897, "preclip_grad_norm_avg": 0.0, "query_norm": 7.5214, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7016, "sent_len_1": 66.8711, "sent_len_max_0": 18.8313, "sent_len_max_1": 187.5, "stdk": 0.0362, "stdq": 0.0431, "stdqueue_k": 0.0, "step": 1100 }, { "accuracy": 39.6484, "doc_norm": 7.5062, "encoder_q-embeddings": 28.1447, "encoder_q-layer.0": 20.9839, "encoder_q-layer.1": 21.2784, "encoder_q-layer.10": 68.104, "encoder_q-layer.11": 62.7513, "encoder_q-layer.2": 23.5434, "encoder_q-layer.3": 24.192, "encoder_q-layer.4": 27.036, "encoder_q-layer.5": 27.8382, "encoder_q-layer.6": 33.8147, "encoder_q-layer.7": 36.1686, "encoder_q-layer.8": 51.0569, "encoder_q-layer.9": 52.262, "epoch": 0.01, "inbatch_neg_score": 55.1758, "inbatch_pos_score": 55.5, "learning_rate": 6e-06, "loss": 3.5577, "norm_diff": 0.007, "num_tokens_overlap": 5.5876, "num_tokens_union": 54.9626, "postclip_grad_norm": 1.0, "preclip_grad_norm": 56.0065, "preclip_grad_norm_avg": 0.0, "query_norm": 7.5132, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7265, "sent_len_1": 66.7045, "sent_len_max_0": 18.8212, "sent_len_max_1": 190.9512, "stdk": 0.0353, "stdq": 0.0413, "stdqueue_k": 0.0, "step": 1200 }, { "accuracy": 38.9648, "doc_norm": 7.5057, "encoder_q-embeddings": 25.7245, "encoder_q-layer.0": 19.3382, "encoder_q-layer.1": 18.1374, "encoder_q-layer.10": 58.2816, "encoder_q-layer.11": 69.0427, "encoder_q-layer.2": 18.7817, "encoder_q-layer.3": 19.6787, "encoder_q-layer.4": 20.4694, "encoder_q-layer.5": 21.144, "encoder_q-layer.6": 23.4933, "encoder_q-layer.7": 25.3416, "encoder_q-layer.8": 36.9415, "encoder_q-layer.9": 40.3243, "epoch": 0.01, "inbatch_neg_score": 55.1463, "inbatch_pos_score": 55.5, "learning_rate": 6.5000000000000004e-06, "loss": 3.4845, "norm_diff": 0.0062, "num_tokens_overlap": 5.6015, "num_tokens_union": 55.1115, "postclip_grad_norm": 1.0, "preclip_grad_norm": 48.4674, "preclip_grad_norm_avg": 0.0, "query_norm": 7.5119, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7338, "sent_len_1": 66.9623, "sent_len_max_0": 18.8987, "sent_len_max_1": 188.7575, "stdk": 0.0358, "stdq": 0.0422, "stdqueue_k": 0.0, "step": 1300 }, { "accuracy": 39.0625, "doc_norm": 7.5118, "encoder_q-embeddings": 29.9032, "encoder_q-layer.0": 21.4064, "encoder_q-layer.1": 20.3707, "encoder_q-layer.10": 66.6699, "encoder_q-layer.11": 74.0445, "encoder_q-layer.2": 21.1869, "encoder_q-layer.3": 22.0289, "encoder_q-layer.4": 23.1031, "encoder_q-layer.5": 24.0467, "encoder_q-layer.6": 26.9979, "encoder_q-layer.7": 31.1884, "encoder_q-layer.8": 44.4675, "encoder_q-layer.9": 50.0739, "epoch": 0.01, "inbatch_neg_score": 55.1743, "inbatch_pos_score": 55.5625, "learning_rate": 7.000000000000001e-06, "loss": 3.4785, "norm_diff": 0.0071, "num_tokens_overlap": 5.5688, "num_tokens_union": 54.89, "postclip_grad_norm": 1.0, "preclip_grad_norm": 55.2615, "preclip_grad_norm_avg": 0.0, "query_norm": 7.5189, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7213, "sent_len_1": 66.5615, "sent_len_max_0": 18.8875, "sent_len_max_1": 189.5737, "stdk": 0.0375, "stdq": 0.0429, "stdqueue_k": 0.0, "step": 1400 }, { "accuracy": 41.7969, "doc_norm": 7.51, "encoder_q-embeddings": 24.9857, "encoder_q-layer.0": 19.2321, "encoder_q-layer.1": 18.3827, "encoder_q-layer.10": 52.8863, "encoder_q-layer.11": 65.4736, "encoder_q-layer.2": 18.8989, "encoder_q-layer.3": 18.8619, "encoder_q-layer.4": 19.4536, "encoder_q-layer.5": 20.0064, "encoder_q-layer.6": 22.5471, "encoder_q-layer.7": 24.2141, "encoder_q-layer.8": 32.6444, "encoder_q-layer.9": 36.9646, "epoch": 0.01, "inbatch_neg_score": 55.1788, "inbatch_pos_score": 55.5625, "learning_rate": 7.5e-06, "loss": 3.4126, "norm_diff": 0.0055, "num_tokens_overlap": 5.5707, "num_tokens_union": 55.0811, "postclip_grad_norm": 1.0, "preclip_grad_norm": 45.7603, "preclip_grad_norm_avg": 0.0, "query_norm": 7.5155, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7196, "sent_len_1": 66.9068, "sent_len_max_0": 18.8, "sent_len_max_1": 189.4512, "stdk": 0.0369, "stdq": 0.0412, "stdqueue_k": 0.0, "step": 1500 }, { "accuracy": 43.1641, "doc_norm": 7.5081, "encoder_q-embeddings": 25.4159, "encoder_q-layer.0": 18.3769, "encoder_q-layer.1": 18.2843, "encoder_q-layer.10": 62.1617, "encoder_q-layer.11": 66.9803, "encoder_q-layer.2": 20.1275, "encoder_q-layer.3": 20.8505, "encoder_q-layer.4": 23.3659, "encoder_q-layer.5": 23.9872, "encoder_q-layer.6": 28.5914, "encoder_q-layer.7": 33.5994, "encoder_q-layer.8": 48.2448, "encoder_q-layer.9": 51.4824, "epoch": 0.02, "inbatch_neg_score": 55.195, "inbatch_pos_score": 55.5625, "learning_rate": 8.000000000000001e-06, "loss": 3.3289, "norm_diff": 0.0046, "num_tokens_overlap": 5.5924, "num_tokens_union": 55.0455, "postclip_grad_norm": 1.0, "preclip_grad_norm": 52.8292, "preclip_grad_norm_avg": 0.0, "query_norm": 7.5126, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7466, "sent_len_1": 66.7972, "sent_len_max_0": 18.8538, "sent_len_max_1": 187.78, "stdk": 0.0361, "stdq": 0.0407, "stdqueue_k": 0.0, "step": 1600 }, { "accuracy": 42.4805, "doc_norm": 7.5092, "encoder_q-embeddings": 26.4089, "encoder_q-layer.0": 18.666, "encoder_q-layer.1": 18.3158, "encoder_q-layer.10": 47.789, "encoder_q-layer.11": 59.5252, "encoder_q-layer.2": 19.227, "encoder_q-layer.3": 19.0985, "encoder_q-layer.4": 19.9573, "encoder_q-layer.5": 19.549, "encoder_q-layer.6": 22.2564, "encoder_q-layer.7": 24.1539, "encoder_q-layer.8": 33.695, "encoder_q-layer.9": 35.406, "epoch": 0.02, "inbatch_neg_score": 55.2022, "inbatch_pos_score": 55.5625, "learning_rate": 8.500000000000002e-06, "loss": 3.3703, "norm_diff": 0.0033, "num_tokens_overlap": 5.5735, "num_tokens_union": 54.8392, "postclip_grad_norm": 1.0, "preclip_grad_norm": 44.5704, "preclip_grad_norm_avg": 0.0, "query_norm": 7.5125, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7032, "sent_len_1": 66.5471, "sent_len_max_0": 18.835, "sent_len_max_1": 190.8075, "stdk": 0.0369, "stdq": 0.0405, "stdqueue_k": 0.0, "step": 1700 }, { "accuracy": 42.0898, "doc_norm": 7.5095, "encoder_q-embeddings": 26.3118, "encoder_q-layer.0": 18.4709, "encoder_q-layer.1": 18.0452, "encoder_q-layer.10": 48.8682, "encoder_q-layer.11": 62.7663, "encoder_q-layer.2": 19.0716, "encoder_q-layer.3": 19.0503, "encoder_q-layer.4": 19.9202, "encoder_q-layer.5": 20.1338, "encoder_q-layer.6": 21.5183, "encoder_q-layer.7": 23.6602, "encoder_q-layer.8": 31.7164, "encoder_q-layer.9": 33.8817, "epoch": 0.02, "inbatch_neg_score": 55.1886, "inbatch_pos_score": 55.5625, "learning_rate": 9e-06, "loss": 3.4078, "norm_diff": 0.0017, "num_tokens_overlap": 5.5783, "num_tokens_union": 54.9774, "postclip_grad_norm": 1.0, "preclip_grad_norm": 44.9265, "preclip_grad_norm_avg": 0.0, "query_norm": 7.5108, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7079, "sent_len_1": 66.7039, "sent_len_max_0": 18.7362, "sent_len_max_1": 188.7875, "stdk": 0.0369, "stdq": 0.0406, "stdqueue_k": 0.0, "step": 1800 }, { "accuracy": 44.3359, "doc_norm": 7.5166, "encoder_q-embeddings": 27.7654, "encoder_q-layer.0": 19.222, "encoder_q-layer.1": 18.9726, "encoder_q-layer.10": 57.7967, "encoder_q-layer.11": 72.2691, "encoder_q-layer.2": 20.8223, "encoder_q-layer.3": 21.3071, "encoder_q-layer.4": 22.8739, "encoder_q-layer.5": 24.8873, "encoder_q-layer.6": 28.1561, "encoder_q-layer.7": 32.9466, "encoder_q-layer.8": 45.4382, "encoder_q-layer.9": 46.608, "epoch": 0.02, "inbatch_neg_score": 55.2594, "inbatch_pos_score": 55.6562, "learning_rate": 9.5e-06, "loss": 3.291, "norm_diff": 0.0015, "num_tokens_overlap": 5.5782, "num_tokens_union": 55.0492, "postclip_grad_norm": 1.0, "preclip_grad_norm": 52.9109, "preclip_grad_norm_avg": 0.0, "query_norm": 7.517, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7034, "sent_len_1": 66.8481, "sent_len_max_0": 18.9, "sent_len_max_1": 189.8313, "stdk": 0.0376, "stdq": 0.0401, "stdqueue_k": 0.0, "step": 1900 }, { "accuracy": 43.3594, "doc_norm": 7.5173, "encoder_q-embeddings": 25.1908, "encoder_q-layer.0": 18.1284, "encoder_q-layer.1": 18.0791, "encoder_q-layer.10": 49.1993, "encoder_q-layer.11": 65.6388, "encoder_q-layer.2": 18.4589, "encoder_q-layer.3": 19.4787, "encoder_q-layer.4": 19.1641, "encoder_q-layer.5": 20.3092, "encoder_q-layer.6": 22.2367, "encoder_q-layer.7": 24.8026, "encoder_q-layer.8": 34.2458, "encoder_q-layer.9": 35.6574, "epoch": 0.02, "inbatch_neg_score": 55.2626, "inbatch_pos_score": 55.6562, "learning_rate": 1e-05, "loss": 3.2411, "norm_diff": 0.0009, "num_tokens_overlap": 5.583, "num_tokens_union": 54.9871, "postclip_grad_norm": 1.0, "preclip_grad_norm": 44.9131, "preclip_grad_norm_avg": 0.0, "query_norm": 7.5164, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7267, "sent_len_1": 66.7117, "sent_len_max_0": 18.8487, "sent_len_max_1": 189.0625, "stdk": 0.0375, "stdq": 0.0404, "stdqueue_k": 0.0, "step": 2000 }, { "accuracy": 42.8711, "doc_norm": 7.507, "encoder_q-embeddings": 50.9063, "encoder_q-layer.0": 36.3924, "encoder_q-layer.1": 35.2426, "encoder_q-layer.10": 107.1215, "encoder_q-layer.11": 137.2201, "encoder_q-layer.2": 36.7318, "encoder_q-layer.3": 37.061, "encoder_q-layer.4": 39.2797, "encoder_q-layer.5": 41.036, "encoder_q-layer.6": 45.231, "encoder_q-layer.7": 54.1869, "encoder_q-layer.8": 79.1661, "encoder_q-layer.9": 77.8303, "epoch": 0.02, "inbatch_neg_score": 55.1447, "inbatch_pos_score": 55.5312, "learning_rate": 1.05e-05, "loss": 3.2616, "norm_diff": 0.001, "num_tokens_overlap": 5.5805, "num_tokens_union": 55.0208, "postclip_grad_norm": 1.0, "preclip_grad_norm": 96.5402, "preclip_grad_norm_avg": 0.0, "query_norm": 7.5075, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.709, "sent_len_1": 66.8023, "sent_len_max_0": 18.8087, "sent_len_max_1": 188.4175, "stdk": 0.037, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 2100 }, { "accuracy": 44.8242, "doc_norm": 7.4923, "encoder_q-embeddings": 53.1683, "encoder_q-layer.0": 36.8439, "encoder_q-layer.1": 35.5683, "encoder_q-layer.10": 86.5468, "encoder_q-layer.11": 115.3696, "encoder_q-layer.2": 36.9446, "encoder_q-layer.3": 37.5188, "encoder_q-layer.4": 37.9387, "encoder_q-layer.5": 37.5113, "encoder_q-layer.6": 42.784, "encoder_q-layer.7": 45.6234, "encoder_q-layer.8": 60.4952, "encoder_q-layer.9": 63.5486, "epoch": 0.02, "inbatch_neg_score": 54.929, "inbatch_pos_score": 55.3438, "learning_rate": 1.1000000000000001e-05, "loss": 3.2542, "norm_diff": 0.0014, "num_tokens_overlap": 5.585, "num_tokens_union": 54.9887, "postclip_grad_norm": 1.0, "preclip_grad_norm": 84.7221, "preclip_grad_norm_avg": 0.0, "query_norm": 7.4928, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7279, "sent_len_1": 66.7686, "sent_len_max_0": 18.7737, "sent_len_max_1": 191.285, "stdk": 0.0365, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 2200 }, { "accuracy": 44.043, "doc_norm": 7.4765, "encoder_q-embeddings": 47.5449, "encoder_q-layer.0": 34.2285, "encoder_q-layer.1": 32.7544, "encoder_q-layer.10": 87.3575, "encoder_q-layer.11": 111.7634, "encoder_q-layer.2": 33.9301, "encoder_q-layer.3": 35.8621, "encoder_q-layer.4": 36.2536, "encoder_q-layer.5": 36.0811, "encoder_q-layer.6": 40.0732, "encoder_q-layer.7": 43.0054, "encoder_q-layer.8": 54.4533, "encoder_q-layer.9": 58.3841, "epoch": 0.02, "inbatch_neg_score": 54.7056, "inbatch_pos_score": 55.0938, "learning_rate": 1.1500000000000002e-05, "loss": 3.2038, "norm_diff": 0.002, "num_tokens_overlap": 5.5722, "num_tokens_union": 54.9937, "postclip_grad_norm": 1.0, "preclip_grad_norm": 80.3568, "preclip_grad_norm_avg": 0.0, "query_norm": 7.4755, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7006, "sent_len_1": 66.7477, "sent_len_max_0": 18.89, "sent_len_max_1": 190.2988, "stdk": 0.0367, "stdq": 0.0407, "stdqueue_k": 0.0, "step": 2300 }, { "accuracy": 42.5781, "doc_norm": 7.4593, "encoder_q-embeddings": 45.464, "encoder_q-layer.0": 34.1481, "encoder_q-layer.1": 32.7669, "encoder_q-layer.10": 98.813, "encoder_q-layer.11": 133.3354, "encoder_q-layer.2": 34.9825, "encoder_q-layer.3": 35.9725, "encoder_q-layer.4": 37.0187, "encoder_q-layer.5": 36.5405, "encoder_q-layer.6": 41.0878, "encoder_q-layer.7": 46.8758, "encoder_q-layer.8": 60.0359, "encoder_q-layer.9": 68.0728, "epoch": 0.02, "inbatch_neg_score": 54.442, "inbatch_pos_score": 54.8125, "learning_rate": 1.2e-05, "loss": 3.1773, "norm_diff": 0.0033, "num_tokens_overlap": 5.5812, "num_tokens_union": 55.0222, "postclip_grad_norm": 1.0, "preclip_grad_norm": 88.0126, "preclip_grad_norm_avg": 0.0, "query_norm": 7.456, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7308, "sent_len_1": 66.8192, "sent_len_max_0": 18.8463, "sent_len_max_1": 191.6375, "stdk": 0.0372, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 2400 }, { "accuracy": 44.3359, "doc_norm": 7.4424, "encoder_q-embeddings": 47.148, "encoder_q-layer.0": 34.233, "encoder_q-layer.1": 33.5136, "encoder_q-layer.10": 92.7533, "encoder_q-layer.11": 129.5151, "encoder_q-layer.2": 36.1871, "encoder_q-layer.3": 36.0812, "encoder_q-layer.4": 38.2411, "encoder_q-layer.5": 39.0989, "encoder_q-layer.6": 44.3186, "encoder_q-layer.7": 50.7712, "encoder_q-layer.8": 65.3644, "encoder_q-layer.9": 71.3288, "epoch": 0.02, "inbatch_neg_score": 54.1766, "inbatch_pos_score": 54.5625, "learning_rate": 1.25e-05, "loss": 3.1814, "norm_diff": 0.0028, "num_tokens_overlap": 5.5799, "num_tokens_union": 54.9391, "postclip_grad_norm": 1.0, "preclip_grad_norm": 88.1559, "preclip_grad_norm_avg": 0.0, "query_norm": 7.4396, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7366, "sent_len_1": 66.6587, "sent_len_max_0": 18.96, "sent_len_max_1": 190.9462, "stdk": 0.0375, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 2500 }, { "accuracy": 43.75, "doc_norm": 7.426, "encoder_q-embeddings": 46.1879, "encoder_q-layer.0": 33.523, "encoder_q-layer.1": 32.82, "encoder_q-layer.10": 92.4542, "encoder_q-layer.11": 128.1656, "encoder_q-layer.2": 34.1842, "encoder_q-layer.3": 36.382, "encoder_q-layer.4": 36.4794, "encoder_q-layer.5": 36.8284, "encoder_q-layer.6": 43.76, "encoder_q-layer.7": 45.3058, "encoder_q-layer.8": 62.8306, "encoder_q-layer.9": 65.7791, "epoch": 0.03, "inbatch_neg_score": 53.9366, "inbatch_pos_score": 54.3125, "learning_rate": 1.3000000000000001e-05, "loss": 3.1407, "norm_diff": 0.0035, "num_tokens_overlap": 5.5656, "num_tokens_union": 55.0342, "postclip_grad_norm": 1.0, "preclip_grad_norm": 86.0766, "preclip_grad_norm_avg": 0.0, "query_norm": 7.4225, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6971, "sent_len_1": 66.8083, "sent_len_max_0": 18.8062, "sent_len_max_1": 188.0675, "stdk": 0.037, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 2600 }, { "accuracy": 43.1641, "doc_norm": 7.4115, "encoder_q-embeddings": 50.4858, "encoder_q-layer.0": 34.8555, "encoder_q-layer.1": 33.523, "encoder_q-layer.10": 100.1716, "encoder_q-layer.11": 124.3523, "encoder_q-layer.2": 35.6621, "encoder_q-layer.3": 35.6282, "encoder_q-layer.4": 36.617, "encoder_q-layer.5": 37.3888, "encoder_q-layer.6": 42.8936, "encoder_q-layer.7": 51.0437, "encoder_q-layer.8": 65.2066, "encoder_q-layer.9": 72.8953, "epoch": 0.03, "inbatch_neg_score": 53.6966, "inbatch_pos_score": 54.0938, "learning_rate": 1.3500000000000001e-05, "loss": 3.1742, "norm_diff": 0.0036, "num_tokens_overlap": 5.5767, "num_tokens_union": 55.0265, "postclip_grad_norm": 1.0, "preclip_grad_norm": 86.948, "preclip_grad_norm_avg": 0.0, "query_norm": 7.4079, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7239, "sent_len_1": 66.7876, "sent_len_max_0": 18.8637, "sent_len_max_1": 189.9162, "stdk": 0.0364, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 2700 }, { "accuracy": 45.8984, "doc_norm": 7.3902, "encoder_q-embeddings": 46.1863, "encoder_q-layer.0": 32.7984, "encoder_q-layer.1": 32.9713, "encoder_q-layer.10": 86.8191, "encoder_q-layer.11": 114.5372, "encoder_q-layer.2": 35.5425, "encoder_q-layer.3": 34.6353, "encoder_q-layer.4": 35.7055, "encoder_q-layer.5": 35.4927, "encoder_q-layer.6": 41.2316, "encoder_q-layer.7": 44.8895, "encoder_q-layer.8": 54.7569, "encoder_q-layer.9": 60.1933, "epoch": 0.03, "inbatch_neg_score": 53.3717, "inbatch_pos_score": 53.7812, "learning_rate": 1.4000000000000001e-05, "loss": 3.1265, "norm_diff": 0.0056, "num_tokens_overlap": 5.5831, "num_tokens_union": 54.9804, "postclip_grad_norm": 1.0, "preclip_grad_norm": 81.4981, "preclip_grad_norm_avg": 0.0, "query_norm": 7.3846, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7154, "sent_len_1": 66.753, "sent_len_max_0": 18.8938, "sent_len_max_1": 192.3487, "stdk": 0.0379, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 2800 }, { "accuracy": 44.7266, "doc_norm": 7.3824, "encoder_q-embeddings": 49.9015, "encoder_q-layer.0": 35.381, "encoder_q-layer.1": 33.9903, "encoder_q-layer.10": 125.1096, "encoder_q-layer.11": 134.4484, "encoder_q-layer.2": 38.0375, "encoder_q-layer.3": 38.6827, "encoder_q-layer.4": 40.4323, "encoder_q-layer.5": 42.3584, "encoder_q-layer.6": 50.0061, "encoder_q-layer.7": 58.8885, "encoder_q-layer.8": 76.3963, "encoder_q-layer.9": 85.6238, "epoch": 0.03, "inbatch_neg_score": 53.1969, "inbatch_pos_score": 53.625, "learning_rate": 1.45e-05, "loss": 3.1583, "norm_diff": 0.0067, "num_tokens_overlap": 5.563, "num_tokens_union": 54.865, "postclip_grad_norm": 1.0, "preclip_grad_norm": 97.8649, "preclip_grad_norm_avg": 0.0, "query_norm": 7.3757, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.701, "sent_len_1": 66.5409, "sent_len_max_0": 18.775, "sent_len_max_1": 187.13, "stdk": 0.0387, "stdq": 0.04, "stdqueue_k": 0.0, "step": 2900 }, { "accuracy": 43.8477, "doc_norm": 7.3587, "encoder_q-embeddings": 46.1862, "encoder_q-layer.0": 33.461, "encoder_q-layer.1": 32.6707, "encoder_q-layer.10": 89.3184, "encoder_q-layer.11": 116.1012, "encoder_q-layer.2": 34.4435, "encoder_q-layer.3": 35.4106, "encoder_q-layer.4": 37.8798, "encoder_q-layer.5": 39.9212, "encoder_q-layer.6": 44.7255, "encoder_q-layer.7": 50.2996, "encoder_q-layer.8": 64.5725, "encoder_q-layer.9": 70.1474, "epoch": 0.03, "inbatch_neg_score": 52.9202, "inbatch_pos_score": 53.3125, "learning_rate": 1.5e-05, "loss": 3.0838, "norm_diff": 0.0039, "num_tokens_overlap": 5.5809, "num_tokens_union": 54.9114, "postclip_grad_norm": 1.0, "preclip_grad_norm": 83.4543, "preclip_grad_norm_avg": 0.0, "query_norm": 7.3548, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7425, "sent_len_1": 66.5458, "sent_len_max_0": 18.8062, "sent_len_max_1": 187.6225, "stdk": 0.0379, "stdq": 0.0399, "stdqueue_k": 0.0, "step": 3000 }, { "accuracy": 44.6289, "doc_norm": 7.3428, "encoder_q-embeddings": 51.2116, "encoder_q-layer.0": 35.3606, "encoder_q-layer.1": 34.0081, "encoder_q-layer.10": 91.3173, "encoder_q-layer.11": 120.6372, "encoder_q-layer.2": 36.4488, "encoder_q-layer.3": 37.4511, "encoder_q-layer.4": 39.2739, "encoder_q-layer.5": 38.9853, "encoder_q-layer.6": 45.2236, "encoder_q-layer.7": 49.615, "encoder_q-layer.8": 65.3381, "encoder_q-layer.9": 68.7351, "epoch": 0.03, "inbatch_neg_score": 52.6556, "inbatch_pos_score": 53.0625, "learning_rate": 1.55e-05, "loss": 3.1018, "norm_diff": 0.0063, "num_tokens_overlap": 5.5766, "num_tokens_union": 54.9787, "postclip_grad_norm": 1.0, "preclip_grad_norm": 86.0481, "preclip_grad_norm_avg": 0.0, "query_norm": 7.3365, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7123, "sent_len_1": 66.7134, "sent_len_max_0": 18.9325, "sent_len_max_1": 189.2525, "stdk": 0.0381, "stdq": 0.039, "stdqueue_k": 0.0, "step": 3100 }, { "accuracy": 46.1914, "doc_norm": 7.3299, "encoder_q-embeddings": 50.172, "encoder_q-layer.0": 35.7275, "encoder_q-layer.1": 34.4075, "encoder_q-layer.10": 90.8124, "encoder_q-layer.11": 119.4036, "encoder_q-layer.2": 35.9394, "encoder_q-layer.3": 36.4062, "encoder_q-layer.4": 39.3988, "encoder_q-layer.5": 41.3764, "encoder_q-layer.6": 43.6048, "encoder_q-layer.7": 46.3415, "encoder_q-layer.8": 62.3012, "encoder_q-layer.9": 63.6545, "epoch": 0.03, "inbatch_neg_score": 52.4268, "inbatch_pos_score": 52.8438, "learning_rate": 1.6000000000000003e-05, "loss": 3.0645, "norm_diff": 0.0072, "num_tokens_overlap": 5.5755, "num_tokens_union": 55.0137, "postclip_grad_norm": 1.0, "preclip_grad_norm": 85.038, "preclip_grad_norm_avg": 0.0, "query_norm": 7.3227, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7057, "sent_len_1": 66.8291, "sent_len_max_0": 18.895, "sent_len_max_1": 188.3575, "stdk": 0.0383, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 3200 }, { "accuracy": 44.5312, "doc_norm": 7.3082, "encoder_q-embeddings": 50.265, "encoder_q-layer.0": 36.6515, "encoder_q-layer.1": 34.9694, "encoder_q-layer.10": 128.161, "encoder_q-layer.11": 142.48, "encoder_q-layer.2": 37.3018, "encoder_q-layer.3": 39.1324, "encoder_q-layer.4": 39.968, "encoder_q-layer.5": 40.9666, "encoder_q-layer.6": 49.1748, "encoder_q-layer.7": 58.8612, "encoder_q-layer.8": 78.7757, "encoder_q-layer.9": 93.3501, "epoch": 0.03, "inbatch_neg_score": 52.1183, "inbatch_pos_score": 52.5312, "learning_rate": 1.65e-05, "loss": 3.0226, "norm_diff": 0.0051, "num_tokens_overlap": 5.5793, "num_tokens_union": 55.0146, "postclip_grad_norm": 1.0, "preclip_grad_norm": 101.6593, "preclip_grad_norm_avg": 0.0, "query_norm": 7.3031, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7274, "sent_len_1": 66.7883, "sent_len_max_0": 18.87, "sent_len_max_1": 192.4062, "stdk": 0.0388, "stdq": 0.0406, "stdqueue_k": 0.0, "step": 3300 }, { "accuracy": 46.0938, "doc_norm": 7.2888, "encoder_q-embeddings": 46.0696, "encoder_q-layer.0": 33.1702, "encoder_q-layer.1": 32.0863, "encoder_q-layer.10": 83.663, "encoder_q-layer.11": 118.6938, "encoder_q-layer.2": 33.6793, "encoder_q-layer.3": 33.4918, "encoder_q-layer.4": 35.2555, "encoder_q-layer.5": 34.7667, "encoder_q-layer.6": 38.4637, "encoder_q-layer.7": 42.8896, "encoder_q-layer.8": 54.4441, "encoder_q-layer.9": 61.0531, "epoch": 0.03, "inbatch_neg_score": 51.8624, "inbatch_pos_score": 52.25, "learning_rate": 1.7000000000000003e-05, "loss": 3.0258, "norm_diff": 0.0083, "num_tokens_overlap": 5.5889, "num_tokens_union": 55.2032, "postclip_grad_norm": 1.0, "preclip_grad_norm": 80.2406, "preclip_grad_norm_avg": 0.0, "query_norm": 7.2805, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7191, "sent_len_1": 67.1155, "sent_len_max_0": 18.8625, "sent_len_max_1": 191.4913, "stdk": 0.0373, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 3400 }, { "accuracy": 47.0703, "doc_norm": 7.2663, "encoder_q-embeddings": 46.0915, "encoder_q-layer.0": 32.6651, "encoder_q-layer.1": 32.6675, "encoder_q-layer.10": 77.271, "encoder_q-layer.11": 112.4905, "encoder_q-layer.2": 34.4981, "encoder_q-layer.3": 35.3494, "encoder_q-layer.4": 36.9592, "encoder_q-layer.5": 36.563, "encoder_q-layer.6": 42.9897, "encoder_q-layer.7": 43.5845, "encoder_q-layer.8": 52.8259, "encoder_q-layer.9": 58.0145, "epoch": 0.03, "inbatch_neg_score": 51.5765, "inbatch_pos_score": 52.0, "learning_rate": 1.75e-05, "loss": 3.0382, "norm_diff": 0.0051, "num_tokens_overlap": 5.5849, "num_tokens_union": 55.1269, "postclip_grad_norm": 1.0, "preclip_grad_norm": 77.2773, "preclip_grad_norm_avg": 0.0, "query_norm": 7.2611, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7206, "sent_len_1": 66.9713, "sent_len_max_0": 18.8775, "sent_len_max_1": 190.4013, "stdk": 0.0372, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 3500 }, { "accuracy": 45.6055, "doc_norm": 7.2484, "encoder_q-embeddings": 45.8856, "encoder_q-layer.0": 33.346, "encoder_q-layer.1": 33.1841, "encoder_q-layer.10": 89.7142, "encoder_q-layer.11": 118.0397, "encoder_q-layer.2": 34.7321, "encoder_q-layer.3": 35.858, "encoder_q-layer.4": 35.9534, "encoder_q-layer.5": 35.5191, "encoder_q-layer.6": 41.2268, "encoder_q-layer.7": 45.421, "encoder_q-layer.8": 59.9543, "encoder_q-layer.9": 68.655, "epoch": 0.04, "inbatch_neg_score": 51.2595, "inbatch_pos_score": 51.6875, "learning_rate": 1.8e-05, "loss": 3.0884, "norm_diff": 0.0065, "num_tokens_overlap": 5.5765, "num_tokens_union": 55.118, "postclip_grad_norm": 1.0, "preclip_grad_norm": 82.6975, "preclip_grad_norm_avg": 0.0, "query_norm": 7.2419, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7322, "sent_len_1": 66.9082, "sent_len_max_0": 18.9013, "sent_len_max_1": 189.34, "stdk": 0.0387, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 3600 }, { "accuracy": 44.1406, "doc_norm": 7.2361, "encoder_q-embeddings": 47.8438, "encoder_q-layer.0": 34.6095, "encoder_q-layer.1": 33.4211, "encoder_q-layer.10": 130.1411, "encoder_q-layer.11": 141.6141, "encoder_q-layer.2": 36.187, "encoder_q-layer.3": 38.445, "encoder_q-layer.4": 40.5869, "encoder_q-layer.5": 43.6883, "encoder_q-layer.6": 52.6951, "encoder_q-layer.7": 62.7617, "encoder_q-layer.8": 87.0974, "encoder_q-layer.9": 95.0917, "epoch": 0.04, "inbatch_neg_score": 51.0653, "inbatch_pos_score": 51.4688, "learning_rate": 1.85e-05, "loss": 3.026, "norm_diff": 0.0087, "num_tokens_overlap": 5.5913, "num_tokens_union": 55.1694, "postclip_grad_norm": 1.0, "preclip_grad_norm": 103.6972, "preclip_grad_norm_avg": 0.0, "query_norm": 7.2274, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7343, "sent_len_1": 66.9843, "sent_len_max_0": 18.8738, "sent_len_max_1": 188.9638, "stdk": 0.0382, "stdq": 0.0399, "stdqueue_k": 0.0, "step": 3700 }, { "accuracy": 48.7305, "doc_norm": 7.2149, "encoder_q-embeddings": 42.6525, "encoder_q-layer.0": 30.6534, "encoder_q-layer.1": 31.7967, "encoder_q-layer.10": 169.5878, "encoder_q-layer.11": 146.9875, "encoder_q-layer.2": 37.6263, "encoder_q-layer.3": 42.2076, "encoder_q-layer.4": 49.0002, "encoder_q-layer.5": 54.2466, "encoder_q-layer.6": 66.062, "encoder_q-layer.7": 73.7585, "encoder_q-layer.8": 107.576, "encoder_q-layer.9": 129.3706, "epoch": 0.04, "inbatch_neg_score": 50.89, "inbatch_pos_score": 51.2812, "learning_rate": 1.9e-05, "loss": 3.0297, "norm_diff": 0.0052, "num_tokens_overlap": 5.5788, "num_tokens_union": 55.1363, "postclip_grad_norm": 1.0, "preclip_grad_norm": 122.4149, "preclip_grad_norm_avg": 0.0, "query_norm": 7.2097, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7247, "sent_len_1": 66.9821, "sent_len_max_0": 18.8062, "sent_len_max_1": 189.8775, "stdk": 0.0371, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 3800 }, { "accuracy": 48.0469, "doc_norm": 7.2115, "encoder_q-embeddings": 47.9575, "encoder_q-layer.0": 34.0661, "encoder_q-layer.1": 33.1605, "encoder_q-layer.10": 95.4806, "encoder_q-layer.11": 118.8817, "encoder_q-layer.2": 35.7649, "encoder_q-layer.3": 35.6688, "encoder_q-layer.4": 36.081, "encoder_q-layer.5": 36.5964, "encoder_q-layer.6": 40.3609, "encoder_q-layer.7": 43.9928, "encoder_q-layer.8": 62.7251, "encoder_q-layer.9": 67.4355, "epoch": 0.04, "inbatch_neg_score": 50.6818, "inbatch_pos_score": 51.125, "learning_rate": 1.9500000000000003e-05, "loss": 2.9972, "norm_diff": 0.008, "num_tokens_overlap": 5.5802, "num_tokens_union": 55.1303, "postclip_grad_norm": 1.0, "preclip_grad_norm": 82.9694, "preclip_grad_norm_avg": 0.0, "query_norm": 7.2035, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.714, "sent_len_1": 66.9687, "sent_len_max_0": 18.9187, "sent_len_max_1": 188.3262, "stdk": 0.0402, "stdq": 0.0401, "stdqueue_k": 0.0, "step": 3900 }, { "accuracy": 49.0234, "doc_norm": 7.1926, "encoder_q-embeddings": 46.6087, "encoder_q-layer.0": 32.1847, "encoder_q-layer.1": 30.7701, "encoder_q-layer.10": 73.6699, "encoder_q-layer.11": 103.2787, "encoder_q-layer.2": 32.391, "encoder_q-layer.3": 33.5567, "encoder_q-layer.4": 34.2392, "encoder_q-layer.5": 33.964, "encoder_q-layer.6": 36.7239, "encoder_q-layer.7": 39.6298, "encoder_q-layer.8": 49.3986, "encoder_q-layer.9": 50.5385, "epoch": 0.04, "inbatch_neg_score": 50.4955, "inbatch_pos_score": 50.9062, "learning_rate": 2e-05, "loss": 2.9948, "norm_diff": 0.0059, "num_tokens_overlap": 5.5859, "num_tokens_union": 55.059, "postclip_grad_norm": 1.0, "preclip_grad_norm": 72.9838, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1867, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7252, "sent_len_1": 66.7584, "sent_len_max_0": 18.8863, "sent_len_max_1": 186.3675, "stdk": 0.039, "stdq": 0.039, "stdqueue_k": 0.0, "step": 4000 }, { "accuracy": 46.582, "doc_norm": 7.1893, "encoder_q-embeddings": 85.6494, "encoder_q-layer.0": 61.3529, "encoder_q-layer.1": 63.6143, "encoder_q-layer.10": 238.2862, "encoder_q-layer.11": 289.8391, "encoder_q-layer.2": 67.4142, "encoder_q-layer.3": 70.2813, "encoder_q-layer.4": 73.6332, "encoder_q-layer.5": 74.2673, "encoder_q-layer.6": 84.9275, "encoder_q-layer.7": 100.8847, "encoder_q-layer.8": 141.3738, "encoder_q-layer.9": 171.9337, "epoch": 0.04, "inbatch_neg_score": 50.4611, "inbatch_pos_score": 50.8438, "learning_rate": 2.05e-05, "loss": 3.0063, "norm_diff": 0.009, "num_tokens_overlap": 5.5871, "num_tokens_union": 55.0647, "postclip_grad_norm": 1.0, "preclip_grad_norm": 188.5022, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1803, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7239, "sent_len_1": 66.8945, "sent_len_max_0": 18.825, "sent_len_max_1": 191.3225, "stdk": 0.0384, "stdq": 0.0378, "stdqueue_k": 0.0, "step": 4100 }, { "accuracy": 49.3164, "doc_norm": 7.1891, "encoder_q-embeddings": 94.4332, "encoder_q-layer.0": 64.9663, "encoder_q-layer.1": 61.8835, "encoder_q-layer.10": 164.8918, "encoder_q-layer.11": 231.1194, "encoder_q-layer.2": 66.0143, "encoder_q-layer.3": 67.4636, "encoder_q-layer.4": 71.2411, "encoder_q-layer.5": 70.8906, "encoder_q-layer.6": 79.5082, "encoder_q-layer.7": 86.9412, "encoder_q-layer.8": 110.9872, "encoder_q-layer.9": 121.1663, "epoch": 0.04, "inbatch_neg_score": 50.405, "inbatch_pos_score": 50.8438, "learning_rate": 2.1e-05, "loss": 2.9545, "norm_diff": 0.0071, "num_tokens_overlap": 5.5739, "num_tokens_union": 54.9867, "postclip_grad_norm": 1.0, "preclip_grad_norm": 157.8901, "preclip_grad_norm_avg": 0.0, "query_norm": 7.182, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7243, "sent_len_1": 66.7629, "sent_len_max_0": 18.8625, "sent_len_max_1": 191.3862, "stdk": 0.04, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 4200 }, { "accuracy": 49.0234, "doc_norm": 7.1842, "encoder_q-embeddings": 85.8421, "encoder_q-layer.0": 62.9356, "encoder_q-layer.1": 62.7042, "encoder_q-layer.10": 168.1524, "encoder_q-layer.11": 227.8277, "encoder_q-layer.2": 68.0198, "encoder_q-layer.3": 73.1174, "encoder_q-layer.4": 77.4331, "encoder_q-layer.5": 78.2915, "encoder_q-layer.6": 96.8489, "encoder_q-layer.7": 99.7522, "encoder_q-layer.8": 119.2713, "encoder_q-layer.9": 121.2463, "epoch": 0.04, "inbatch_neg_score": 50.3569, "inbatch_pos_score": 50.7812, "learning_rate": 2.15e-05, "loss": 2.9543, "norm_diff": 0.0083, "num_tokens_overlap": 5.5792, "num_tokens_union": 54.9628, "postclip_grad_norm": 1.0, "preclip_grad_norm": 160.5367, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1759, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7229, "sent_len_1": 66.7489, "sent_len_max_0": 18.9075, "sent_len_max_1": 190.9975, "stdk": 0.037, "stdq": 0.0377, "stdqueue_k": 0.0, "step": 4300 }, { "accuracy": 48.1445, "doc_norm": 7.1847, "encoder_q-embeddings": 93.2305, "encoder_q-layer.0": 65.191, "encoder_q-layer.1": 64.6236, "encoder_q-layer.10": 185.7372, "encoder_q-layer.11": 243.441, "encoder_q-layer.2": 67.7736, "encoder_q-layer.3": 66.7436, "encoder_q-layer.4": 70.2608, "encoder_q-layer.5": 71.4391, "encoder_q-layer.6": 80.0737, "encoder_q-layer.7": 87.3032, "encoder_q-layer.8": 106.6496, "encoder_q-layer.9": 134.6759, "epoch": 0.04, "inbatch_neg_score": 50.3117, "inbatch_pos_score": 50.75, "learning_rate": 2.2000000000000003e-05, "loss": 2.938, "norm_diff": 0.0102, "num_tokens_overlap": 5.6041, "num_tokens_union": 55.0968, "postclip_grad_norm": 1.0, "preclip_grad_norm": 164.2242, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1745, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7598, "sent_len_1": 66.8692, "sent_len_max_0": 19.13, "sent_len_max_1": 188.4913, "stdk": 0.04, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 4400 }, { "accuracy": 46.2891, "doc_norm": 7.1906, "encoder_q-embeddings": 104.2038, "encoder_q-layer.0": 68.9782, "encoder_q-layer.1": 65.8932, "encoder_q-layer.10": 247.8949, "encoder_q-layer.11": 285.0684, "encoder_q-layer.2": 72.8799, "encoder_q-layer.3": 75.7882, "encoder_q-layer.4": 77.3359, "encoder_q-layer.5": 80.1416, "encoder_q-layer.6": 95.7344, "encoder_q-layer.7": 112.8756, "encoder_q-layer.8": 145.282, "encoder_q-layer.9": 180.4401, "epoch": 0.04, "inbatch_neg_score": 50.382, "inbatch_pos_score": 50.8125, "learning_rate": 2.25e-05, "loss": 2.9304, "norm_diff": 0.0092, "num_tokens_overlap": 5.5746, "num_tokens_union": 55.0927, "postclip_grad_norm": 1.0, "preclip_grad_norm": 196.3139, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1814, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7105, "sent_len_1": 66.9003, "sent_len_max_0": 18.68, "sent_len_max_1": 188.7438, "stdk": 0.0388, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 4500 }, { "accuracy": 48.8281, "doc_norm": 7.1855, "encoder_q-embeddings": 92.5031, "encoder_q-layer.0": 63.2427, "encoder_q-layer.1": 61.1707, "encoder_q-layer.10": 146.8521, "encoder_q-layer.11": 200.5063, "encoder_q-layer.2": 64.1282, "encoder_q-layer.3": 63.9793, "encoder_q-layer.4": 68.1987, "encoder_q-layer.5": 65.0451, "encoder_q-layer.6": 73.4956, "encoder_q-layer.7": 87.2226, "encoder_q-layer.8": 105.3963, "encoder_q-layer.9": 105.9916, "epoch": 0.04, "inbatch_neg_score": 50.3659, "inbatch_pos_score": 50.7812, "learning_rate": 2.3000000000000003e-05, "loss": 2.9538, "norm_diff": 0.0089, "num_tokens_overlap": 5.5768, "num_tokens_union": 54.9843, "postclip_grad_norm": 1.0, "preclip_grad_norm": 144.6703, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1766, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7103, "sent_len_1": 66.7075, "sent_len_max_0": 18.8038, "sent_len_max_1": 190.9288, "stdk": 0.0384, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 4600 }, { "accuracy": 45.0195, "doc_norm": 7.1793, "encoder_q-embeddings": 86.5703, "encoder_q-layer.0": 61.4387, "encoder_q-layer.1": 61.305, "encoder_q-layer.10": 238.346, "encoder_q-layer.11": 246.985, "encoder_q-layer.2": 67.5151, "encoder_q-layer.3": 69.9755, "encoder_q-layer.4": 75.6992, "encoder_q-layer.5": 81.7271, "encoder_q-layer.6": 92.923, "encoder_q-layer.7": 100.7563, "encoder_q-layer.8": 127.5874, "encoder_q-layer.9": 153.0665, "epoch": 0.05, "inbatch_neg_score": 50.3162, "inbatch_pos_score": 50.7188, "learning_rate": 2.35e-05, "loss": 2.9175, "norm_diff": 0.0088, "num_tokens_overlap": 5.5904, "num_tokens_union": 54.9603, "postclip_grad_norm": 1.0, "preclip_grad_norm": 176.6178, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1705, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7305, "sent_len_1": 66.6481, "sent_len_max_0": 18.815, "sent_len_max_1": 186.9288, "stdk": 0.038, "stdq": 0.0374, "stdqueue_k": 0.0, "step": 4700 }, { "accuracy": 46.4844, "doc_norm": 7.1822, "encoder_q-embeddings": 93.1189, "encoder_q-layer.0": 65.4832, "encoder_q-layer.1": 65.8693, "encoder_q-layer.10": 251.9013, "encoder_q-layer.11": 291.1109, "encoder_q-layer.2": 72.4602, "encoder_q-layer.3": 73.7902, "encoder_q-layer.4": 79.8696, "encoder_q-layer.5": 85.9338, "encoder_q-layer.6": 102.7291, "encoder_q-layer.7": 107.0858, "encoder_q-layer.8": 136.4651, "encoder_q-layer.9": 153.6561, "epoch": 0.05, "inbatch_neg_score": 50.2842, "inbatch_pos_score": 50.7188, "learning_rate": 2.4e-05, "loss": 2.9463, "norm_diff": 0.0085, "num_tokens_overlap": 5.5708, "num_tokens_union": 54.8339, "postclip_grad_norm": 1.0, "preclip_grad_norm": 195.0337, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1736, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6988, "sent_len_1": 66.5236, "sent_len_max_0": 18.8263, "sent_len_max_1": 189.83, "stdk": 0.0382, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 4800 }, { "accuracy": 47.8516, "doc_norm": 7.1939, "encoder_q-embeddings": 97.3955, "encoder_q-layer.0": 69.885, "encoder_q-layer.1": 68.4824, "encoder_q-layer.10": 209.677, "encoder_q-layer.11": 264.062, "encoder_q-layer.2": 70.8038, "encoder_q-layer.3": 72.0838, "encoder_q-layer.4": 76.1111, "encoder_q-layer.5": 75.6335, "encoder_q-layer.6": 82.5416, "encoder_q-layer.7": 92.2165, "encoder_q-layer.8": 116.9047, "encoder_q-layer.9": 132.9184, "epoch": 0.05, "inbatch_neg_score": 50.3852, "inbatch_pos_score": 50.8125, "learning_rate": 2.45e-05, "loss": 2.8751, "norm_diff": 0.0148, "num_tokens_overlap": 5.5721, "num_tokens_union": 55.0393, "postclip_grad_norm": 1.0, "preclip_grad_norm": 175.3073, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1791, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6914, "sent_len_1": 66.8506, "sent_len_max_0": 18.7812, "sent_len_max_1": 190.62, "stdk": 0.0395, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 4900 }, { "accuracy": 47.8516, "doc_norm": 7.1936, "encoder_q-embeddings": 100.5425, "encoder_q-layer.0": 69.3781, "encoder_q-layer.1": 69.1036, "encoder_q-layer.10": 249.6969, "encoder_q-layer.11": 289.7053, "encoder_q-layer.2": 74.1825, "encoder_q-layer.3": 78.0167, "encoder_q-layer.4": 81.5646, "encoder_q-layer.5": 81.5091, "encoder_q-layer.6": 93.7933, "encoder_q-layer.7": 109.2996, "encoder_q-layer.8": 138.0034, "encoder_q-layer.9": 162.5682, "epoch": 0.05, "inbatch_neg_score": 50.4043, "inbatch_pos_score": 50.8438, "learning_rate": 2.5e-05, "loss": 2.8594, "norm_diff": 0.0084, "num_tokens_overlap": 5.5978, "num_tokens_union": 55.0143, "postclip_grad_norm": 1.0, "preclip_grad_norm": 196.2015, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1853, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7321, "sent_len_1": 66.7665, "sent_len_max_0": 18.7975, "sent_len_max_1": 191.2463, "stdk": 0.0386, "stdq": 0.0401, "stdqueue_k": 0.0, "step": 5000 }, { "accuracy": 44.9219, "doc_norm": 7.1924, "encoder_q-embeddings": 93.3451, "encoder_q-layer.0": 65.7055, "encoder_q-layer.1": 66.3269, "encoder_q-layer.10": 185.056, "encoder_q-layer.11": 261.9641, "encoder_q-layer.2": 70.5123, "encoder_q-layer.3": 73.3578, "encoder_q-layer.4": 78.3408, "encoder_q-layer.5": 78.619, "encoder_q-layer.6": 91.0204, "encoder_q-layer.7": 97.6948, "encoder_q-layer.8": 127.0266, "encoder_q-layer.9": 135.821, "epoch": 0.05, "inbatch_neg_score": 50.4229, "inbatch_pos_score": 50.8438, "learning_rate": 2.5500000000000003e-05, "loss": 2.8833, "norm_diff": 0.0105, "num_tokens_overlap": 5.5962, "num_tokens_union": 55.0269, "postclip_grad_norm": 1.0, "preclip_grad_norm": 172.888, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1819, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7384, "sent_len_1": 66.7412, "sent_len_max_0": 18.82, "sent_len_max_1": 188.9387, "stdk": 0.0386, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 5100 }, { "accuracy": 48.9258, "doc_norm": 7.1933, "encoder_q-embeddings": 90.1833, "encoder_q-layer.0": 60.9584, "encoder_q-layer.1": 58.9373, "encoder_q-layer.10": 131.397, "encoder_q-layer.11": 211.1122, "encoder_q-layer.2": 62.8477, "encoder_q-layer.3": 66.4907, "encoder_q-layer.4": 66.2216, "encoder_q-layer.5": 65.1, "encoder_q-layer.6": 70.3581, "encoder_q-layer.7": 76.7991, "encoder_q-layer.8": 92.017, "encoder_q-layer.9": 99.0844, "epoch": 0.05, "inbatch_neg_score": 50.4769, "inbatch_pos_score": 50.9062, "learning_rate": 2.6000000000000002e-05, "loss": 2.8914, "norm_diff": 0.0089, "num_tokens_overlap": 5.5798, "num_tokens_union": 55.2234, "postclip_grad_norm": 1.0, "preclip_grad_norm": 139.8709, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1844, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7272, "sent_len_1": 67.1224, "sent_len_max_0": 18.8588, "sent_len_max_1": 190.5712, "stdk": 0.0382, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 5200 }, { "accuracy": 45.5078, "doc_norm": 7.1931, "encoder_q-embeddings": 86.0849, "encoder_q-layer.0": 60.2457, "encoder_q-layer.1": 59.8939, "encoder_q-layer.10": 170.501, "encoder_q-layer.11": 226.9914, "encoder_q-layer.2": 65.7118, "encoder_q-layer.3": 64.6972, "encoder_q-layer.4": 65.4253, "encoder_q-layer.5": 65.7107, "encoder_q-layer.6": 74.0605, "encoder_q-layer.7": 82.0683, "encoder_q-layer.8": 103.1177, "encoder_q-layer.9": 118.3813, "epoch": 0.05, "inbatch_neg_score": 50.4679, "inbatch_pos_score": 50.875, "learning_rate": 2.6500000000000004e-05, "loss": 2.8988, "norm_diff": 0.0081, "num_tokens_overlap": 5.5762, "num_tokens_union": 54.9666, "postclip_grad_norm": 1.0, "preclip_grad_norm": 152.5647, "preclip_grad_norm_avg": 0.0, "query_norm": 7.185, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7004, "sent_len_1": 66.7073, "sent_len_max_0": 18.7538, "sent_len_max_1": 190.105, "stdk": 0.0389, "stdq": 0.039, "stdqueue_k": 0.0, "step": 5300 }, { "accuracy": 45.6055, "doc_norm": 7.211, "encoder_q-embeddings": 92.5959, "encoder_q-layer.0": 65.9726, "encoder_q-layer.1": 66.0281, "encoder_q-layer.10": 234.0411, "encoder_q-layer.11": 287.5378, "encoder_q-layer.2": 70.3811, "encoder_q-layer.3": 73.9089, "encoder_q-layer.4": 75.4344, "encoder_q-layer.5": 78.6184, "encoder_q-layer.6": 89.7699, "encoder_q-layer.7": 108.8996, "encoder_q-layer.8": 148.8093, "encoder_q-layer.9": 174.7591, "epoch": 0.05, "inbatch_neg_score": 50.6834, "inbatch_pos_score": 51.0938, "learning_rate": 2.7000000000000002e-05, "loss": 2.8564, "norm_diff": 0.0132, "num_tokens_overlap": 5.5697, "num_tokens_union": 54.9215, "postclip_grad_norm": 1.0, "preclip_grad_norm": 190.7541, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1978, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7048, "sent_len_1": 66.6644, "sent_len_max_0": 18.8675, "sent_len_max_1": 190.4975, "stdk": 0.0395, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 5400 }, { "accuracy": 49.5117, "doc_norm": 7.2053, "encoder_q-embeddings": 91.5372, "encoder_q-layer.0": 63.8871, "encoder_q-layer.1": 63.0344, "encoder_q-layer.10": 145.8915, "encoder_q-layer.11": 219.7766, "encoder_q-layer.2": 66.2528, "encoder_q-layer.3": 65.7257, "encoder_q-layer.4": 67.7818, "encoder_q-layer.5": 64.9888, "encoder_q-layer.6": 71.297, "encoder_q-layer.7": 79.6827, "encoder_q-layer.8": 97.43, "encoder_q-layer.9": 101.4402, "epoch": 0.05, "inbatch_neg_score": 50.6273, "inbatch_pos_score": 51.0625, "learning_rate": 2.7500000000000004e-05, "loss": 2.8432, "norm_diff": 0.0099, "num_tokens_overlap": 5.5869, "num_tokens_union": 54.9391, "postclip_grad_norm": 1.0, "preclip_grad_norm": 145.8927, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1953, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7375, "sent_len_1": 66.6928, "sent_len_max_0": 18.8225, "sent_len_max_1": 191.7962, "stdk": 0.0391, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 5500 }, { "accuracy": 47.168, "doc_norm": 7.2073, "encoder_q-embeddings": 87.6751, "encoder_q-layer.0": 59.921, "encoder_q-layer.1": 61.193, "encoder_q-layer.10": 157.1841, "encoder_q-layer.11": 216.097, "encoder_q-layer.2": 65.4997, "encoder_q-layer.3": 67.0747, "encoder_q-layer.4": 67.5233, "encoder_q-layer.5": 67.5603, "encoder_q-layer.6": 75.9852, "encoder_q-layer.7": 85.6602, "encoder_q-layer.8": 101.4716, "encoder_q-layer.9": 103.8618, "epoch": 0.05, "inbatch_neg_score": 50.6602, "inbatch_pos_score": 51.0625, "learning_rate": 2.8000000000000003e-05, "loss": 2.8482, "norm_diff": 0.011, "num_tokens_overlap": 5.5798, "num_tokens_union": 55.0053, "postclip_grad_norm": 1.0, "preclip_grad_norm": 147.8715, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1963, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7311, "sent_len_1": 66.77, "sent_len_max_0": 18.7638, "sent_len_max_1": 189.555, "stdk": 0.0396, "stdq": 0.0377, "stdqueue_k": 0.0, "step": 5600 }, { "accuracy": 47.2656, "doc_norm": 7.2156, "encoder_q-embeddings": 105.7297, "encoder_q-layer.0": 70.1426, "encoder_q-layer.1": 71.0975, "encoder_q-layer.10": 312.055, "encoder_q-layer.11": 312.2825, "encoder_q-layer.2": 76.4907, "encoder_q-layer.3": 82.0064, "encoder_q-layer.4": 88.422, "encoder_q-layer.5": 94.1549, "encoder_q-layer.6": 110.1506, "encoder_q-layer.7": 128.2757, "encoder_q-layer.8": 176.5293, "encoder_q-layer.9": 230.6092, "epoch": 0.06, "inbatch_neg_score": 50.7112, "inbatch_pos_score": 51.1562, "learning_rate": 2.8499999999999998e-05, "loss": 2.8272, "norm_diff": 0.0094, "num_tokens_overlap": 5.6023, "num_tokens_union": 55.1131, "postclip_grad_norm": 1.0, "preclip_grad_norm": 230.1849, "preclip_grad_norm_avg": 0.0, "query_norm": 7.2062, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7505, "sent_len_1": 66.8953, "sent_len_max_0": 18.8212, "sent_len_max_1": 188.2287, "stdk": 0.0414, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 5700 }, { "accuracy": 47.168, "doc_norm": 7.2182, "encoder_q-embeddings": 90.5599, "encoder_q-layer.0": 63.0292, "encoder_q-layer.1": 61.6172, "encoder_q-layer.10": 214.8357, "encoder_q-layer.11": 257.524, "encoder_q-layer.2": 68.0278, "encoder_q-layer.3": 69.315, "encoder_q-layer.4": 70.8425, "encoder_q-layer.5": 73.3942, "encoder_q-layer.6": 83.4176, "encoder_q-layer.7": 90.026, "encoder_q-layer.8": 114.7123, "encoder_q-layer.9": 142.9655, "epoch": 0.06, "inbatch_neg_score": 50.8321, "inbatch_pos_score": 51.25, "learning_rate": 2.9e-05, "loss": 2.8803, "norm_diff": 0.0121, "num_tokens_overlap": 5.5841, "num_tokens_union": 55.0249, "postclip_grad_norm": 1.0, "preclip_grad_norm": 172.4016, "preclip_grad_norm_avg": 0.0, "query_norm": 7.2061, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7181, "sent_len_1": 66.8399, "sent_len_max_0": 18.7875, "sent_len_max_1": 189.4988, "stdk": 0.0382, "stdq": 0.0376, "stdqueue_k": 0.0, "step": 5800 }, { "accuracy": 47.4609, "doc_norm": 7.2275, "encoder_q-embeddings": 85.931, "encoder_q-layer.0": 60.5768, "encoder_q-layer.1": 60.0123, "encoder_q-layer.10": 134.0798, "encoder_q-layer.11": 215.8945, "encoder_q-layer.2": 64.3689, "encoder_q-layer.3": 64.6682, "encoder_q-layer.4": 67.0823, "encoder_q-layer.5": 65.7499, "encoder_q-layer.6": 74.431, "encoder_q-layer.7": 80.3353, "encoder_q-layer.8": 93.8164, "encoder_q-layer.9": 94.9376, "epoch": 0.06, "inbatch_neg_score": 50.9166, "inbatch_pos_score": 51.3438, "learning_rate": 2.95e-05, "loss": 2.8924, "norm_diff": 0.0112, "num_tokens_overlap": 5.5876, "num_tokens_union": 55.0927, "postclip_grad_norm": 1.0, "preclip_grad_norm": 141.787, "preclip_grad_norm_avg": 0.0, "query_norm": 7.2163, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7084, "sent_len_1": 66.9566, "sent_len_max_0": 18.8562, "sent_len_max_1": 190.175, "stdk": 0.039, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 5900 }, { "accuracy": 46.3867, "doc_norm": 7.2297, "encoder_q-embeddings": 92.3296, "encoder_q-layer.0": 62.375, "encoder_q-layer.1": 64.0155, "encoder_q-layer.10": 216.7428, "encoder_q-layer.11": 264.2921, "encoder_q-layer.2": 66.8248, "encoder_q-layer.3": 70.2095, "encoder_q-layer.4": 74.7065, "encoder_q-layer.5": 73.2597, "encoder_q-layer.6": 84.1114, "encoder_q-layer.7": 96.582, "encoder_q-layer.8": 135.42, "encoder_q-layer.9": 158.5029, "epoch": 0.06, "inbatch_neg_score": 51.002, "inbatch_pos_score": 51.4062, "learning_rate": 3e-05, "loss": 2.8589, "norm_diff": 0.0078, "num_tokens_overlap": 5.5698, "num_tokens_union": 54.907, "postclip_grad_norm": 1.0, "preclip_grad_norm": 178.1373, "preclip_grad_norm_avg": 0.0, "query_norm": 7.2219, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7365, "sent_len_1": 66.5598, "sent_len_max_0": 18.8313, "sent_len_max_1": 189.725, "stdk": 0.0383, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 6000 }, { "accuracy": 52.6367, "doc_norm": 7.2265, "encoder_q-embeddings": 180.0557, "encoder_q-layer.0": 121.8945, "encoder_q-layer.1": 124.3041, "encoder_q-layer.10": 340.5005, "encoder_q-layer.11": 482.1429, "encoder_q-layer.2": 130.5497, "encoder_q-layer.3": 132.9833, "encoder_q-layer.4": 135.0971, "encoder_q-layer.5": 131.6101, "encoder_q-layer.6": 152.1528, "encoder_q-layer.7": 178.2399, "encoder_q-layer.8": 227.4774, "encoder_q-layer.9": 233.6731, "epoch": 0.06, "inbatch_neg_score": 50.8972, "inbatch_pos_score": 51.3438, "learning_rate": 3.05e-05, "loss": 2.8097, "norm_diff": 0.0119, "num_tokens_overlap": 5.5735, "num_tokens_union": 54.9607, "postclip_grad_norm": 1.0, "preclip_grad_norm": 313.6809, "preclip_grad_norm_avg": 0.0, "query_norm": 7.2146, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6949, "sent_len_1": 66.6747, "sent_len_max_0": 18.8987, "sent_len_max_1": 187.04, "stdk": 0.0396, "stdq": 0.039, "stdqueue_k": 0.0, "step": 6100 }, { "accuracy": 47.6562, "doc_norm": 7.203, "encoder_q-embeddings": 179.0519, "encoder_q-layer.0": 122.9647, "encoder_q-layer.1": 122.4603, "encoder_q-layer.10": 281.7508, "encoder_q-layer.11": 449.4203, "encoder_q-layer.2": 128.2305, "encoder_q-layer.3": 130.0198, "encoder_q-layer.4": 132.9282, "encoder_q-layer.5": 134.0403, "encoder_q-layer.6": 155.2356, "encoder_q-layer.7": 166.6879, "encoder_q-layer.8": 197.6128, "encoder_q-layer.9": 202.603, "epoch": 0.06, "inbatch_neg_score": 50.5838, "inbatch_pos_score": 51.0, "learning_rate": 3.1e-05, "loss": 2.8627, "norm_diff": 0.0115, "num_tokens_overlap": 5.5825, "num_tokens_union": 54.9287, "postclip_grad_norm": 1.0, "preclip_grad_norm": 293.4325, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1914, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7402, "sent_len_1": 66.6127, "sent_len_max_0": 18.7838, "sent_len_max_1": 189.6213, "stdk": 0.0385, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 6200 }, { "accuracy": 47.8516, "doc_norm": 7.1917, "encoder_q-embeddings": 183.2375, "encoder_q-layer.0": 127.1077, "encoder_q-layer.1": 119.4236, "encoder_q-layer.10": 304.2175, "encoder_q-layer.11": 443.7066, "encoder_q-layer.2": 127.2498, "encoder_q-layer.3": 134.7224, "encoder_q-layer.4": 135.566, "encoder_q-layer.5": 131.5587, "encoder_q-layer.6": 151.2444, "encoder_q-layer.7": 151.4393, "encoder_q-layer.8": 184.1524, "encoder_q-layer.9": 197.7512, "epoch": 0.06, "inbatch_neg_score": 50.4047, "inbatch_pos_score": 50.8438, "learning_rate": 3.15e-05, "loss": 2.8345, "norm_diff": 0.0109, "num_tokens_overlap": 5.5894, "num_tokens_union": 54.9379, "postclip_grad_norm": 1.0, "preclip_grad_norm": 290.3616, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1808, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7138, "sent_len_1": 66.7666, "sent_len_max_0": 18.7763, "sent_len_max_1": 190.4675, "stdk": 0.0387, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 6300 }, { "accuracy": 49.6094, "doc_norm": 7.1857, "encoder_q-embeddings": 180.8956, "encoder_q-layer.0": 122.0565, "encoder_q-layer.1": 121.3337, "encoder_q-layer.10": 263.0206, "encoder_q-layer.11": 437.674, "encoder_q-layer.2": 130.8127, "encoder_q-layer.3": 129.7541, "encoder_q-layer.4": 133.3807, "encoder_q-layer.5": 130.8784, "encoder_q-layer.6": 145.0253, "encoder_q-layer.7": 151.9375, "encoder_q-layer.8": 184.7718, "encoder_q-layer.9": 184.6716, "epoch": 0.06, "inbatch_neg_score": 50.3188, "inbatch_pos_score": 50.75, "learning_rate": 3.2000000000000005e-05, "loss": 2.8496, "norm_diff": 0.0124, "num_tokens_overlap": 5.5779, "num_tokens_union": 54.8821, "postclip_grad_norm": 1.0, "preclip_grad_norm": 282.926, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1733, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7233, "sent_len_1": 66.633, "sent_len_max_0": 18.8875, "sent_len_max_1": 192.19, "stdk": 0.0389, "stdq": 0.0399, "stdqueue_k": 0.0, "step": 6400 }, { "accuracy": 48.6328, "doc_norm": 7.173, "encoder_q-embeddings": 176.1617, "encoder_q-layer.0": 119.6275, "encoder_q-layer.1": 120.5409, "encoder_q-layer.10": 297.601, "encoder_q-layer.11": 472.6913, "encoder_q-layer.2": 128.2263, "encoder_q-layer.3": 131.7874, "encoder_q-layer.4": 136.8845, "encoder_q-layer.5": 138.601, "encoder_q-layer.6": 157.8264, "encoder_q-layer.7": 170.305, "encoder_q-layer.8": 190.317, "encoder_q-layer.9": 209.6799, "epoch": 0.06, "inbatch_neg_score": 50.1725, "inbatch_pos_score": 50.5938, "learning_rate": 3.2500000000000004e-05, "loss": 2.8204, "norm_diff": 0.0103, "num_tokens_overlap": 5.5795, "num_tokens_union": 54.9107, "postclip_grad_norm": 1.0, "preclip_grad_norm": 299.0692, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1627, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7216, "sent_len_1": 66.6203, "sent_len_max_0": 19.0375, "sent_len_max_1": 190.645, "stdk": 0.0387, "stdq": 0.0377, "stdqueue_k": 0.0, "step": 6500 }, { "accuracy": 50.5859, "doc_norm": 7.1589, "encoder_q-embeddings": 163.225, "encoder_q-layer.0": 111.0154, "encoder_q-layer.1": 113.84, "encoder_q-layer.10": 297.0187, "encoder_q-layer.11": 419.0809, "encoder_q-layer.2": 120.4143, "encoder_q-layer.3": 121.4759, "encoder_q-layer.4": 122.8888, "encoder_q-layer.5": 121.3321, "encoder_q-layer.6": 139.057, "encoder_q-layer.7": 147.2187, "encoder_q-layer.8": 176.8526, "encoder_q-layer.9": 201.9382, "epoch": 0.06, "inbatch_neg_score": 50.0022, "inbatch_pos_score": 50.4375, "learning_rate": 3.3e-05, "loss": 2.8288, "norm_diff": 0.0101, "num_tokens_overlap": 5.5853, "num_tokens_union": 54.993, "postclip_grad_norm": 1.0, "preclip_grad_norm": 276.815, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1488, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7222, "sent_len_1": 66.7525, "sent_len_max_0": 18.8637, "sent_len_max_1": 186.7312, "stdk": 0.0383, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 6600 }, { "accuracy": 50.6836, "doc_norm": 7.1604, "encoder_q-embeddings": 181.8841, "encoder_q-layer.0": 123.8199, "encoder_q-layer.1": 120.235, "encoder_q-layer.10": 309.7723, "encoder_q-layer.11": 473.9697, "encoder_q-layer.2": 127.9758, "encoder_q-layer.3": 130.1879, "encoder_q-layer.4": 136.6262, "encoder_q-layer.5": 137.9985, "encoder_q-layer.6": 152.9162, "encoder_q-layer.7": 172.9394, "encoder_q-layer.8": 193.1227, "encoder_q-layer.9": 215.4159, "epoch": 0.07, "inbatch_neg_score": 49.9264, "inbatch_pos_score": 50.375, "learning_rate": 3.35e-05, "loss": 2.8124, "norm_diff": 0.0124, "num_tokens_overlap": 5.5844, "num_tokens_union": 55.0266, "postclip_grad_norm": 1.0, "preclip_grad_norm": 299.2817, "preclip_grad_norm_avg": 0.0, "query_norm": 7.148, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7464, "sent_len_1": 66.7729, "sent_len_max_0": 18.8362, "sent_len_max_1": 188.3075, "stdk": 0.0403, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 6700 }, { "accuracy": 49.0234, "doc_norm": 7.1492, "encoder_q-embeddings": 189.1025, "encoder_q-layer.0": 128.5149, "encoder_q-layer.1": 125.6055, "encoder_q-layer.10": 272.189, "encoder_q-layer.11": 418.4303, "encoder_q-layer.2": 131.7304, "encoder_q-layer.3": 132.165, "encoder_q-layer.4": 135.0464, "encoder_q-layer.5": 132.4132, "encoder_q-layer.6": 144.5857, "encoder_q-layer.7": 154.2517, "encoder_q-layer.8": 184.1914, "encoder_q-layer.9": 186.799, "epoch": 0.07, "inbatch_neg_score": 49.7898, "inbatch_pos_score": 50.2188, "learning_rate": 3.4000000000000007e-05, "loss": 2.8045, "norm_diff": 0.012, "num_tokens_overlap": 5.5582, "num_tokens_union": 54.9853, "postclip_grad_norm": 1.0, "preclip_grad_norm": 282.9026, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1372, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.682, "sent_len_1": 66.7693, "sent_len_max_0": 18.8313, "sent_len_max_1": 190.6712, "stdk": 0.0398, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 6800 }, { "accuracy": 50.1953, "doc_norm": 7.133, "encoder_q-embeddings": 171.8804, "encoder_q-layer.0": 117.4598, "encoder_q-layer.1": 116.6039, "encoder_q-layer.10": 418.2762, "encoder_q-layer.11": 540.6285, "encoder_q-layer.2": 128.624, "encoder_q-layer.3": 129.5153, "encoder_q-layer.4": 133.8165, "encoder_q-layer.5": 133.5878, "encoder_q-layer.6": 148.2207, "encoder_q-layer.7": 173.0632, "encoder_q-layer.8": 221.6319, "encoder_q-layer.9": 273.8593, "epoch": 0.07, "inbatch_neg_score": 49.5908, "inbatch_pos_score": 50.0, "learning_rate": 3.45e-05, "loss": 2.8018, "norm_diff": 0.0126, "num_tokens_overlap": 5.5782, "num_tokens_union": 55.0783, "postclip_grad_norm": 1.0, "preclip_grad_norm": 333.1966, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1204, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7324, "sent_len_1": 66.8867, "sent_len_max_0": 18.8738, "sent_len_max_1": 188.5563, "stdk": 0.0395, "stdq": 0.0378, "stdqueue_k": 0.0, "step": 6900 }, { "accuracy": 46.7773, "doc_norm": 7.1342, "encoder_q-embeddings": 185.9694, "encoder_q-layer.0": 129.3501, "encoder_q-layer.1": 129.7969, "encoder_q-layer.10": 307.8031, "encoder_q-layer.11": 556.5276, "encoder_q-layer.2": 139.6526, "encoder_q-layer.3": 141.4785, "encoder_q-layer.4": 141.7079, "encoder_q-layer.5": 141.8676, "encoder_q-layer.6": 160.5707, "encoder_q-layer.7": 173.8863, "encoder_q-layer.8": 208.0288, "encoder_q-layer.9": 206.1568, "epoch": 0.07, "inbatch_neg_score": 49.5272, "inbatch_pos_score": 49.9375, "learning_rate": 3.5e-05, "loss": 2.8046, "norm_diff": 0.0164, "num_tokens_overlap": 5.5724, "num_tokens_union": 54.8781, "postclip_grad_norm": 1.0, "preclip_grad_norm": 326.2539, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1178, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7028, "sent_len_1": 66.5546, "sent_len_max_0": 18.9288, "sent_len_max_1": 190.1875, "stdk": 0.0393, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 7000 }, { "accuracy": 49.2188, "doc_norm": 7.1223, "encoder_q-embeddings": 174.7246, "encoder_q-layer.0": 120.2831, "encoder_q-layer.1": 119.0489, "encoder_q-layer.10": 272.3337, "encoder_q-layer.11": 450.2322, "encoder_q-layer.2": 125.4265, "encoder_q-layer.3": 129.5128, "encoder_q-layer.4": 129.6485, "encoder_q-layer.5": 126.2432, "encoder_q-layer.6": 145.0661, "encoder_q-layer.7": 167.4868, "encoder_q-layer.8": 191.8309, "encoder_q-layer.9": 190.0528, "epoch": 0.07, "inbatch_neg_score": 49.4217, "inbatch_pos_score": 49.8438, "learning_rate": 3.55e-05, "loss": 2.7857, "norm_diff": 0.0113, "num_tokens_overlap": 5.5774, "num_tokens_union": 55.1353, "postclip_grad_norm": 1.0, "preclip_grad_norm": 286.4665, "preclip_grad_norm_avg": 0.0, "query_norm": 7.111, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7225, "sent_len_1": 66.8945, "sent_len_max_0": 18.8588, "sent_len_max_1": 187.7988, "stdk": 0.039, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 7100 }, { "accuracy": 49.707, "doc_norm": 7.1152, "encoder_q-embeddings": 165.3068, "encoder_q-layer.0": 115.7189, "encoder_q-layer.1": 117.8233, "encoder_q-layer.10": 247.4297, "encoder_q-layer.11": 419.6462, "encoder_q-layer.2": 122.6204, "encoder_q-layer.3": 130.088, "encoder_q-layer.4": 128.1828, "encoder_q-layer.5": 124.5647, "encoder_q-layer.6": 143.7417, "encoder_q-layer.7": 147.6933, "encoder_q-layer.8": 171.6893, "encoder_q-layer.9": 183.9531, "epoch": 0.07, "inbatch_neg_score": 49.33, "inbatch_pos_score": 49.75, "learning_rate": 3.6e-05, "loss": 2.7672, "norm_diff": 0.0108, "num_tokens_overlap": 5.5823, "num_tokens_union": 55.0008, "postclip_grad_norm": 1.0, "preclip_grad_norm": 268.8153, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1045, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7243, "sent_len_1": 66.785, "sent_len_max_0": 18.845, "sent_len_max_1": 189.4387, "stdk": 0.0392, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 7200 }, { "accuracy": 49.8047, "doc_norm": 7.1122, "encoder_q-embeddings": 178.4379, "encoder_q-layer.0": 121.411, "encoder_q-layer.1": 117.9437, "encoder_q-layer.10": 320.0771, "encoder_q-layer.11": 438.9532, "encoder_q-layer.2": 128.8754, "encoder_q-layer.3": 130.9278, "encoder_q-layer.4": 139.2527, "encoder_q-layer.5": 137.361, "encoder_q-layer.6": 158.4315, "encoder_q-layer.7": 176.3903, "encoder_q-layer.8": 195.9603, "encoder_q-layer.9": 207.1845, "epoch": 0.07, "inbatch_neg_score": 49.1936, "inbatch_pos_score": 49.6562, "learning_rate": 3.65e-05, "loss": 2.7449, "norm_diff": 0.014, "num_tokens_overlap": 5.58, "num_tokens_union": 54.9056, "postclip_grad_norm": 1.0, "preclip_grad_norm": 293.9541, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0982, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7153, "sent_len_1": 66.6091, "sent_len_max_0": 18.75, "sent_len_max_1": 186.7237, "stdk": 0.0398, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 7300 }, { "accuracy": 45.5078, "doc_norm": 7.1035, "encoder_q-embeddings": 170.8649, "encoder_q-layer.0": 119.99, "encoder_q-layer.1": 115.9427, "encoder_q-layer.10": 256.6625, "encoder_q-layer.11": 398.7049, "encoder_q-layer.2": 125.8536, "encoder_q-layer.3": 127.774, "encoder_q-layer.4": 128.4686, "encoder_q-layer.5": 124.5698, "encoder_q-layer.6": 142.4948, "encoder_q-layer.7": 155.2965, "encoder_q-layer.8": 172.9562, "encoder_q-layer.9": 188.629, "epoch": 0.07, "inbatch_neg_score": 49.1678, "inbatch_pos_score": 49.5625, "learning_rate": 3.7e-05, "loss": 2.8062, "norm_diff": 0.0125, "num_tokens_overlap": 5.5871, "num_tokens_union": 54.9433, "postclip_grad_norm": 1.0, "preclip_grad_norm": 267.2935, "preclip_grad_norm_avg": 0.0, "query_norm": 7.091, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7406, "sent_len_1": 66.6123, "sent_len_max_0": 18.9187, "sent_len_max_1": 188.4563, "stdk": 0.0385, "stdq": 0.0376, "stdqueue_k": 0.0, "step": 7400 }, { "accuracy": 48.0469, "doc_norm": 7.0982, "encoder_q-embeddings": 197.8332, "encoder_q-layer.0": 124.8296, "encoder_q-layer.1": 121.2748, "encoder_q-layer.10": 285.5076, "encoder_q-layer.11": 442.1642, "encoder_q-layer.2": 129.2373, "encoder_q-layer.3": 141.2069, "encoder_q-layer.4": 144.7186, "encoder_q-layer.5": 145.1203, "encoder_q-layer.6": 168.7468, "encoder_q-layer.7": 193.8142, "encoder_q-layer.8": 216.2962, "encoder_q-layer.9": 220.57, "epoch": 0.07, "inbatch_neg_score": 49.0585, "inbatch_pos_score": 49.5, "learning_rate": 3.7500000000000003e-05, "loss": 2.7953, "norm_diff": 0.0126, "num_tokens_overlap": 5.5774, "num_tokens_union": 55.1019, "postclip_grad_norm": 1.0, "preclip_grad_norm": 304.563, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0856, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7022, "sent_len_1": 66.9692, "sent_len_max_0": 18.83, "sent_len_max_1": 187.8013, "stdk": 0.0387, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 7500 }, { "accuracy": 48.4375, "doc_norm": 7.0944, "encoder_q-embeddings": 182.4572, "encoder_q-layer.0": 122.7374, "encoder_q-layer.1": 121.456, "encoder_q-layer.10": 429.0602, "encoder_q-layer.11": 479.122, "encoder_q-layer.2": 134.1838, "encoder_q-layer.3": 137.2151, "encoder_q-layer.4": 148.9778, "encoder_q-layer.5": 149.7973, "encoder_q-layer.6": 176.4563, "encoder_q-layer.7": 198.674, "encoder_q-layer.8": 244.3774, "encoder_q-layer.9": 269.0052, "epoch": 0.07, "inbatch_neg_score": 48.9361, "inbatch_pos_score": 49.4062, "learning_rate": 3.8e-05, "loss": 2.7527, "norm_diff": 0.0135, "num_tokens_overlap": 5.581, "num_tokens_union": 55.0256, "postclip_grad_norm": 1.0, "preclip_grad_norm": 333.9704, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0809, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6907, "sent_len_1": 66.8616, "sent_len_max_0": 18.7937, "sent_len_max_1": 188.9875, "stdk": 0.0399, "stdq": 0.039, "stdqueue_k": 0.0, "step": 7600 }, { "accuracy": 50.1953, "doc_norm": 7.0839, "encoder_q-embeddings": 180.6118, "encoder_q-layer.0": 124.1732, "encoder_q-layer.1": 123.8427, "encoder_q-layer.10": 234.2885, "encoder_q-layer.11": 404.8853, "encoder_q-layer.2": 128.6452, "encoder_q-layer.3": 129.9267, "encoder_q-layer.4": 130.8904, "encoder_q-layer.5": 128.2981, "encoder_q-layer.6": 142.3889, "encoder_q-layer.7": 149.2071, "encoder_q-layer.8": 177.9496, "encoder_q-layer.9": 178.9732, "epoch": 0.08, "inbatch_neg_score": 48.8383, "inbatch_pos_score": 49.3125, "learning_rate": 3.85e-05, "loss": 2.7771, "norm_diff": 0.0117, "num_tokens_overlap": 5.5866, "num_tokens_union": 55.0202, "postclip_grad_norm": 1.0, "preclip_grad_norm": 270.3138, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0721, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.717, "sent_len_1": 66.8311, "sent_len_max_0": 18.8625, "sent_len_max_1": 189.4462, "stdk": 0.0391, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 7700 }, { "accuracy": 47.6562, "doc_norm": 7.0818, "encoder_q-embeddings": 167.6215, "encoder_q-layer.0": 112.3096, "encoder_q-layer.1": 111.409, "encoder_q-layer.10": 328.7772, "encoder_q-layer.11": 468.3002, "encoder_q-layer.2": 122.2754, "encoder_q-layer.3": 127.6537, "encoder_q-layer.4": 129.1735, "encoder_q-layer.5": 132.5505, "encoder_q-layer.6": 163.7582, "encoder_q-layer.7": 170.3032, "encoder_q-layer.8": 190.8965, "encoder_q-layer.9": 206.0972, "epoch": 0.08, "inbatch_neg_score": 48.8768, "inbatch_pos_score": 49.2812, "learning_rate": 3.9000000000000006e-05, "loss": 2.7805, "norm_diff": 0.0131, "num_tokens_overlap": 5.5737, "num_tokens_union": 55.1012, "postclip_grad_norm": 1.0, "preclip_grad_norm": 296.6122, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0688, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6931, "sent_len_1": 66.9305, "sent_len_max_0": 18.93, "sent_len_max_1": 190.9863, "stdk": 0.0383, "stdq": 0.0377, "stdqueue_k": 0.0, "step": 7800 }, { "accuracy": 50.3906, "doc_norm": 7.0816, "encoder_q-embeddings": 174.392, "encoder_q-layer.0": 122.1721, "encoder_q-layer.1": 120.0512, "encoder_q-layer.10": 274.0211, "encoder_q-layer.11": 427.8618, "encoder_q-layer.2": 126.9087, "encoder_q-layer.3": 129.9382, "encoder_q-layer.4": 131.3514, "encoder_q-layer.5": 130.4783, "encoder_q-layer.6": 139.8686, "encoder_q-layer.7": 150.4748, "encoder_q-layer.8": 176.9709, "encoder_q-layer.9": 184.2227, "epoch": 0.08, "inbatch_neg_score": 48.7723, "inbatch_pos_score": 49.2188, "learning_rate": 3.9500000000000005e-05, "loss": 2.7484, "norm_diff": 0.0134, "num_tokens_overlap": 5.585, "num_tokens_union": 55.0411, "postclip_grad_norm": 1.0, "preclip_grad_norm": 277.3991, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0682, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.727, "sent_len_1": 66.8541, "sent_len_max_0": 18.7563, "sent_len_max_1": 190.3512, "stdk": 0.0401, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 7900 }, { "accuracy": 48.4375, "doc_norm": 7.0682, "encoder_q-embeddings": 166.8816, "encoder_q-layer.0": 117.5408, "encoder_q-layer.1": 117.7629, "encoder_q-layer.10": 318.8311, "encoder_q-layer.11": 454.1931, "encoder_q-layer.2": 129.3703, "encoder_q-layer.3": 130.5492, "encoder_q-layer.4": 130.8045, "encoder_q-layer.5": 129.691, "encoder_q-layer.6": 154.1625, "encoder_q-layer.7": 161.6813, "encoder_q-layer.8": 202.737, "encoder_q-layer.9": 202.4877, "epoch": 0.08, "inbatch_neg_score": 48.6235, "inbatch_pos_score": 49.0625, "learning_rate": 4e-05, "loss": 2.7111, "norm_diff": 0.0115, "num_tokens_overlap": 5.5787, "num_tokens_union": 54.9545, "postclip_grad_norm": 1.0, "preclip_grad_norm": 290.9065, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0567, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7325, "sent_len_1": 66.7172, "sent_len_max_0": 18.9, "sent_len_max_1": 189.7237, "stdk": 0.0398, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 8000 }, { "accuracy": 48.3398, "doc_norm": 7.0754, "encoder_q-embeddings": 320.4355, "encoder_q-layer.0": 222.6, "encoder_q-layer.1": 216.115, "encoder_q-layer.10": 577.0467, "encoder_q-layer.11": 875.7931, "encoder_q-layer.2": 237.9823, "encoder_q-layer.3": 244.1683, "encoder_q-layer.4": 253.3378, "encoder_q-layer.5": 242.7558, "encoder_q-layer.6": 277.123, "encoder_q-layer.7": 346.2018, "encoder_q-layer.8": 366.7809, "encoder_q-layer.9": 379.6499, "epoch": 0.08, "inbatch_neg_score": 48.7544, "inbatch_pos_score": 49.1875, "learning_rate": 4.05e-05, "loss": 2.7463, "norm_diff": 0.0122, "num_tokens_overlap": 5.5779, "num_tokens_union": 54.892, "postclip_grad_norm": 1.0, "preclip_grad_norm": 551.397, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0631, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7158, "sent_len_1": 66.5886, "sent_len_max_0": 18.9812, "sent_len_max_1": 187.9913, "stdk": 0.0388, "stdq": 0.038, "stdqueue_k": 0.0, "step": 8100 }, { "accuracy": 51.1719, "doc_norm": 7.0772, "encoder_q-embeddings": 319.2898, "encoder_q-layer.0": 230.3888, "encoder_q-layer.1": 228.9651, "encoder_q-layer.10": 462.1965, "encoder_q-layer.11": 799.8048, "encoder_q-layer.2": 244.7878, "encoder_q-layer.3": 246.308, "encoder_q-layer.4": 251.9473, "encoder_q-layer.5": 252.7272, "encoder_q-layer.6": 269.7748, "encoder_q-layer.7": 325.4489, "encoder_q-layer.8": 366.9361, "encoder_q-layer.9": 340.1211, "epoch": 0.08, "inbatch_neg_score": 48.7502, "inbatch_pos_score": 49.1875, "learning_rate": 4.1e-05, "loss": 2.7308, "norm_diff": 0.0126, "num_tokens_overlap": 5.5855, "num_tokens_union": 55.0362, "postclip_grad_norm": 1.0, "preclip_grad_norm": 521.4959, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0646, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7325, "sent_len_1": 66.8025, "sent_len_max_0": 18.7475, "sent_len_max_1": 188.9387, "stdk": 0.0398, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 8200 }, { "accuracy": 52.0508, "doc_norm": 7.0692, "encoder_q-embeddings": 322.1541, "encoder_q-layer.0": 215.4633, "encoder_q-layer.1": 218.5941, "encoder_q-layer.10": 597.0316, "encoder_q-layer.11": 815.0615, "encoder_q-layer.2": 242.0481, "encoder_q-layer.3": 247.5721, "encoder_q-layer.4": 258.9035, "encoder_q-layer.5": 262.7281, "encoder_q-layer.6": 304.4714, "encoder_q-layer.7": 327.9889, "encoder_q-layer.8": 399.8048, "encoder_q-layer.9": 437.308, "epoch": 0.08, "inbatch_neg_score": 48.6877, "inbatch_pos_score": 49.125, "learning_rate": 4.15e-05, "loss": 2.779, "norm_diff": 0.0116, "num_tokens_overlap": 5.5802, "num_tokens_union": 55.081, "postclip_grad_norm": 1.0, "preclip_grad_norm": 552.1802, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0575, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7099, "sent_len_1": 66.8871, "sent_len_max_0": 18.8925, "sent_len_max_1": 187.8113, "stdk": 0.0385, "stdq": 0.0378, "stdqueue_k": 0.0, "step": 8300 }, { "accuracy": 46.9727, "doc_norm": 7.0682, "encoder_q-embeddings": 344.3846, "encoder_q-layer.0": 234.7399, "encoder_q-layer.1": 238.6707, "encoder_q-layer.10": 587.5576, "encoder_q-layer.11": 877.6313, "encoder_q-layer.2": 256.0522, "encoder_q-layer.3": 260.3961, "encoder_q-layer.4": 264.675, "encoder_q-layer.5": 262.646, "encoder_q-layer.6": 305.2932, "encoder_q-layer.7": 330.1585, "encoder_q-layer.8": 394.567, "encoder_q-layer.9": 410.6457, "epoch": 0.08, "inbatch_neg_score": 48.6006, "inbatch_pos_score": 49.0312, "learning_rate": 4.2e-05, "loss": 2.743, "norm_diff": 0.0137, "num_tokens_overlap": 5.5742, "num_tokens_union": 54.8061, "postclip_grad_norm": 1.0, "preclip_grad_norm": 570.7438, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0545, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7102, "sent_len_1": 66.4104, "sent_len_max_0": 18.8275, "sent_len_max_1": 188.5087, "stdk": 0.0403, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 8400 }, { "accuracy": 49.9023, "doc_norm": 7.069, "encoder_q-embeddings": 328.8143, "encoder_q-layer.0": 231.2011, "encoder_q-layer.1": 230.2982, "encoder_q-layer.10": 514.0532, "encoder_q-layer.11": 774.6528, "encoder_q-layer.2": 249.2679, "encoder_q-layer.3": 254.998, "encoder_q-layer.4": 253.2408, "encoder_q-layer.5": 254.3203, "encoder_q-layer.6": 303.477, "encoder_q-layer.7": 327.8961, "encoder_q-layer.8": 350.1881, "encoder_q-layer.9": 363.3672, "epoch": 0.08, "inbatch_neg_score": 48.6701, "inbatch_pos_score": 49.0938, "learning_rate": 4.25e-05, "loss": 2.7427, "norm_diff": 0.009, "num_tokens_overlap": 5.5746, "num_tokens_union": 55.079, "postclip_grad_norm": 1.0, "preclip_grad_norm": 524.5077, "preclip_grad_norm_avg": 0.0, "query_norm": 7.06, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7178, "sent_len_1": 66.8741, "sent_len_max_0": 18.765, "sent_len_max_1": 188.3675, "stdk": 0.039, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 8500 }, { "accuracy": 50.4883, "doc_norm": 7.0703, "encoder_q-embeddings": 359.2561, "encoder_q-layer.0": 231.9334, "encoder_q-layer.1": 226.1098, "encoder_q-layer.10": 491.0881, "encoder_q-layer.11": 787.7576, "encoder_q-layer.2": 242.3127, "encoder_q-layer.3": 244.3342, "encoder_q-layer.4": 261.8193, "encoder_q-layer.5": 267.615, "encoder_q-layer.6": 283.1466, "encoder_q-layer.7": 309.8337, "encoder_q-layer.8": 352.8877, "encoder_q-layer.9": 361.6452, "epoch": 0.08, "inbatch_neg_score": 48.6648, "inbatch_pos_score": 49.125, "learning_rate": 4.3e-05, "loss": 2.7428, "norm_diff": 0.014, "num_tokens_overlap": 5.5576, "num_tokens_union": 54.8642, "postclip_grad_norm": 1.0, "preclip_grad_norm": 534.7091, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0563, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6876, "sent_len_1": 66.5714, "sent_len_max_0": 18.8712, "sent_len_max_1": 190.0337, "stdk": 0.0381, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 8600 }, { "accuracy": 49.2188, "doc_norm": 7.0802, "encoder_q-embeddings": 361.9117, "encoder_q-layer.0": 240.7684, "encoder_q-layer.1": 243.0259, "encoder_q-layer.10": 418.9084, "encoder_q-layer.11": 760.6833, "encoder_q-layer.2": 258.4816, "encoder_q-layer.3": 262.7701, "encoder_q-layer.4": 265.9312, "encoder_q-layer.5": 260.4848, "encoder_q-layer.6": 294.8222, "encoder_q-layer.7": 328.3493, "encoder_q-layer.8": 339.6386, "encoder_q-layer.9": 326.7654, "epoch": 0.08, "inbatch_neg_score": 48.7439, "inbatch_pos_score": 49.1875, "learning_rate": 4.35e-05, "loss": 2.7341, "norm_diff": 0.015, "num_tokens_overlap": 5.5908, "num_tokens_union": 55.0836, "postclip_grad_norm": 1.0, "preclip_grad_norm": 524.3797, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0652, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7373, "sent_len_1": 66.8923, "sent_len_max_0": 18.8625, "sent_len_max_1": 189.9688, "stdk": 0.0389, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 8700 }, { "accuracy": 50.3906, "doc_norm": 7.0846, "encoder_q-embeddings": 338.4961, "encoder_q-layer.0": 232.1058, "encoder_q-layer.1": 226.4026, "encoder_q-layer.10": 491.1982, "encoder_q-layer.11": 825.7674, "encoder_q-layer.2": 246.3261, "encoder_q-layer.3": 243.6477, "encoder_q-layer.4": 245.3072, "encoder_q-layer.5": 244.6311, "encoder_q-layer.6": 272.782, "encoder_q-layer.7": 283.8136, "encoder_q-layer.8": 331.5516, "encoder_q-layer.9": 329.5518, "epoch": 0.09, "inbatch_neg_score": 48.8253, "inbatch_pos_score": 49.2812, "learning_rate": 4.4000000000000006e-05, "loss": 2.7258, "norm_diff": 0.0144, "num_tokens_overlap": 5.5861, "num_tokens_union": 54.9695, "postclip_grad_norm": 1.0, "preclip_grad_norm": 529.6155, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0702, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7267, "sent_len_1": 66.7606, "sent_len_max_0": 18.83, "sent_len_max_1": 189.9963, "stdk": 0.0408, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 8800 }, { "accuracy": 50.0, "doc_norm": 7.0858, "encoder_q-embeddings": 362.3266, "encoder_q-layer.0": 244.8467, "encoder_q-layer.1": 243.8945, "encoder_q-layer.10": 541.1585, "encoder_q-layer.11": 834.4323, "encoder_q-layer.2": 262.1297, "encoder_q-layer.3": 276.4298, "encoder_q-layer.4": 278.8768, "encoder_q-layer.5": 257.9373, "encoder_q-layer.6": 277.4548, "encoder_q-layer.7": 300.0722, "encoder_q-layer.8": 365.4904, "encoder_q-layer.9": 344.2726, "epoch": 0.09, "inbatch_neg_score": 48.8727, "inbatch_pos_score": 49.3125, "learning_rate": 4.4500000000000004e-05, "loss": 2.7354, "norm_diff": 0.0109, "num_tokens_overlap": 5.5699, "num_tokens_union": 54.8851, "postclip_grad_norm": 1.0, "preclip_grad_norm": 555.4682, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0749, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6845, "sent_len_1": 66.6149, "sent_len_max_0": 18.9062, "sent_len_max_1": 188.9863, "stdk": 0.0405, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 8900 }, { "accuracy": 49.9023, "doc_norm": 7.0786, "encoder_q-embeddings": 308.5774, "encoder_q-layer.0": 221.6529, "encoder_q-layer.1": 218.4532, "encoder_q-layer.10": 435.2959, "encoder_q-layer.11": 782.1512, "encoder_q-layer.2": 234.794, "encoder_q-layer.3": 237.9814, "encoder_q-layer.4": 249.6585, "encoder_q-layer.5": 236.7941, "encoder_q-layer.6": 254.385, "encoder_q-layer.7": 261.8575, "encoder_q-layer.8": 310.841, "encoder_q-layer.9": 322.8547, "epoch": 0.09, "inbatch_neg_score": 48.8034, "inbatch_pos_score": 49.25, "learning_rate": 4.5e-05, "loss": 2.7191, "norm_diff": 0.0122, "num_tokens_overlap": 5.5793, "num_tokens_union": 55.0113, "postclip_grad_norm": 1.0, "preclip_grad_norm": 502.3342, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0664, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7126, "sent_len_1": 66.7308, "sent_len_max_0": 18.7387, "sent_len_max_1": 189.775, "stdk": 0.0382, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 9000 }, { "accuracy": 49.2188, "doc_norm": 7.0799, "encoder_q-embeddings": 343.2187, "encoder_q-layer.0": 241.9198, "encoder_q-layer.1": 236.1812, "encoder_q-layer.10": 479.7291, "encoder_q-layer.11": 844.3557, "encoder_q-layer.2": 246.1434, "encoder_q-layer.3": 251.5905, "encoder_q-layer.4": 262.3069, "encoder_q-layer.5": 252.7324, "encoder_q-layer.6": 280.8347, "encoder_q-layer.7": 294.2088, "encoder_q-layer.8": 355.9575, "encoder_q-layer.9": 350.7682, "epoch": 0.09, "inbatch_neg_score": 48.7254, "inbatch_pos_score": 49.1875, "learning_rate": 4.55e-05, "loss": 2.7105, "norm_diff": 0.016, "num_tokens_overlap": 5.5784, "num_tokens_union": 55.0763, "postclip_grad_norm": 1.0, "preclip_grad_norm": 538.9884, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0639, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7176, "sent_len_1": 66.8702, "sent_len_max_0": 18.7888, "sent_len_max_1": 191.33, "stdk": 0.0405, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 9100 }, { "accuracy": 48.9258, "doc_norm": 7.0684, "encoder_q-embeddings": 358.2409, "encoder_q-layer.0": 238.7817, "encoder_q-layer.1": 232.7826, "encoder_q-layer.10": 449.228, "encoder_q-layer.11": 742.5127, "encoder_q-layer.2": 249.5504, "encoder_q-layer.3": 250.6623, "encoder_q-layer.4": 256.7518, "encoder_q-layer.5": 257.609, "encoder_q-layer.6": 272.5652, "encoder_q-layer.7": 280.0581, "encoder_q-layer.8": 352.2604, "encoder_q-layer.9": 361.6331, "epoch": 0.09, "inbatch_neg_score": 48.6372, "inbatch_pos_score": 49.0625, "learning_rate": 4.600000000000001e-05, "loss": 2.7631, "norm_diff": 0.0121, "num_tokens_overlap": 5.572, "num_tokens_union": 55.0259, "postclip_grad_norm": 1.0, "preclip_grad_norm": 519.7415, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0563, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.699, "sent_len_1": 66.8308, "sent_len_max_0": 18.7625, "sent_len_max_1": 190.83, "stdk": 0.0387, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 9200 }, { "accuracy": 51.9531, "doc_norm": 7.0644, "encoder_q-embeddings": 322.3112, "encoder_q-layer.0": 220.0885, "encoder_q-layer.1": 220.7519, "encoder_q-layer.10": 517.6379, "encoder_q-layer.11": 802.1455, "encoder_q-layer.2": 237.2225, "encoder_q-layer.3": 236.6648, "encoder_q-layer.4": 246.7802, "encoder_q-layer.5": 236.471, "encoder_q-layer.6": 263.0403, "encoder_q-layer.7": 278.2198, "encoder_q-layer.8": 321.1451, "encoder_q-layer.9": 337.2242, "epoch": 0.09, "inbatch_neg_score": 48.5711, "inbatch_pos_score": 49.0312, "learning_rate": 4.6500000000000005e-05, "loss": 2.6956, "norm_diff": 0.013, "num_tokens_overlap": 5.5936, "num_tokens_union": 55.0774, "postclip_grad_norm": 1.0, "preclip_grad_norm": 517.0227, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0514, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7528, "sent_len_1": 66.8309, "sent_len_max_0": 18.885, "sent_len_max_1": 187.165, "stdk": 0.0391, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 9300 }, { "accuracy": 47.2656, "doc_norm": 7.0778, "encoder_q-embeddings": 350.5266, "encoder_q-layer.0": 243.7677, "encoder_q-layer.1": 243.4446, "encoder_q-layer.10": 504.2085, "encoder_q-layer.11": 892.8817, "encoder_q-layer.2": 254.8758, "encoder_q-layer.3": 258.663, "encoder_q-layer.4": 276.5839, "encoder_q-layer.5": 268.7491, "encoder_q-layer.6": 289.0764, "encoder_q-layer.7": 309.3573, "encoder_q-layer.8": 368.2994, "encoder_q-layer.9": 382.1634, "epoch": 0.09, "inbatch_neg_score": 48.7343, "inbatch_pos_score": 49.1562, "learning_rate": 4.7e-05, "loss": 2.7089, "norm_diff": 0.0139, "num_tokens_overlap": 5.5904, "num_tokens_union": 55.1921, "postclip_grad_norm": 1.0, "preclip_grad_norm": 563.0227, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0639, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7373, "sent_len_1": 67.0119, "sent_len_max_0": 18.9875, "sent_len_max_1": 189.7363, "stdk": 0.0397, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 9400 }, { "accuracy": 51.1719, "doc_norm": 7.0723, "encoder_q-embeddings": 338.273, "encoder_q-layer.0": 227.7386, "encoder_q-layer.1": 226.6711, "encoder_q-layer.10": 577.8579, "encoder_q-layer.11": 790.4043, "encoder_q-layer.2": 247.1096, "encoder_q-layer.3": 243.6979, "encoder_q-layer.4": 252.8822, "encoder_q-layer.5": 247.5256, "encoder_q-layer.6": 268.2798, "encoder_q-layer.7": 289.7972, "encoder_q-layer.8": 345.6668, "encoder_q-layer.9": 365.8347, "epoch": 0.09, "inbatch_neg_score": 48.6613, "inbatch_pos_score": 49.0938, "learning_rate": 4.75e-05, "loss": 2.7164, "norm_diff": 0.0139, "num_tokens_overlap": 5.5917, "num_tokens_union": 54.9115, "postclip_grad_norm": 1.0, "preclip_grad_norm": 535.2658, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0584, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7455, "sent_len_1": 66.6073, "sent_len_max_0": 18.7512, "sent_len_max_1": 191.0437, "stdk": 0.0401, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 9500 }, { "accuracy": 47.168, "doc_norm": 7.0629, "encoder_q-embeddings": 339.7016, "encoder_q-layer.0": 224.0468, "encoder_q-layer.1": 229.2721, "encoder_q-layer.10": 454.8504, "encoder_q-layer.11": 849.2988, "encoder_q-layer.2": 239.6802, "encoder_q-layer.3": 242.1073, "encoder_q-layer.4": 245.4573, "encoder_q-layer.5": 241.7568, "encoder_q-layer.6": 277.8821, "encoder_q-layer.7": 282.2662, "encoder_q-layer.8": 333.889, "encoder_q-layer.9": 314.983, "epoch": 0.09, "inbatch_neg_score": 48.6101, "inbatch_pos_score": 49.0312, "learning_rate": 4.8e-05, "loss": 2.7477, "norm_diff": 0.0122, "num_tokens_overlap": 5.5711, "num_tokens_union": 54.9892, "postclip_grad_norm": 1.0, "preclip_grad_norm": 524.3058, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0507, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.715, "sent_len_1": 66.8349, "sent_len_max_0": 18.9175, "sent_len_max_1": 192.9137, "stdk": 0.0383, "stdq": 0.0373, "stdqueue_k": 0.0, "step": 9600 }, { "accuracy": 48.4375, "doc_norm": 7.0755, "encoder_q-embeddings": 344.1329, "encoder_q-layer.0": 238.0069, "encoder_q-layer.1": 240.3939, "encoder_q-layer.10": 789.6578, "encoder_q-layer.11": 995.9926, "encoder_q-layer.2": 269.9435, "encoder_q-layer.3": 275.8615, "encoder_q-layer.4": 282.0085, "encoder_q-layer.5": 283.7986, "encoder_q-layer.6": 315.683, "encoder_q-layer.7": 346.5597, "encoder_q-layer.8": 381.0472, "encoder_q-layer.9": 466.6123, "epoch": 0.09, "inbatch_neg_score": 48.6832, "inbatch_pos_score": 49.125, "learning_rate": 4.85e-05, "loss": 2.6871, "norm_diff": 0.0103, "num_tokens_overlap": 5.5738, "num_tokens_union": 55.0775, "postclip_grad_norm": 1.0, "preclip_grad_norm": 635.2534, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0652, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.696, "sent_len_1": 66.8836, "sent_len_max_0": 18.7375, "sent_len_max_1": 188.4137, "stdk": 0.0401, "stdq": 0.0398, "stdqueue_k": 0.0, "step": 9700 }, { "accuracy": 51.1719, "doc_norm": 7.069, "encoder_q-embeddings": 319.3911, "encoder_q-layer.0": 228.41, "encoder_q-layer.1": 230.6144, "encoder_q-layer.10": 474.944, "encoder_q-layer.11": 790.6906, "encoder_q-layer.2": 246.8668, "encoder_q-layer.3": 249.4886, "encoder_q-layer.4": 253.4575, "encoder_q-layer.5": 257.9269, "encoder_q-layer.6": 280.8621, "encoder_q-layer.7": 278.2886, "encoder_q-layer.8": 333.9854, "encoder_q-layer.9": 325.4114, "epoch": 0.1, "inbatch_neg_score": 48.6163, "inbatch_pos_score": 49.0625, "learning_rate": 4.9e-05, "loss": 2.6792, "norm_diff": 0.0134, "num_tokens_overlap": 5.5814, "num_tokens_union": 54.9333, "postclip_grad_norm": 1.0, "preclip_grad_norm": 514.5177, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0556, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.72, "sent_len_1": 66.6438, "sent_len_max_0": 18.8, "sent_len_max_1": 188.9288, "stdk": 0.0397, "stdq": 0.039, "stdqueue_k": 0.0, "step": 9800 }, { "accuracy": 49.6094, "doc_norm": 7.0745, "encoder_q-embeddings": 322.7308, "encoder_q-layer.0": 219.8598, "encoder_q-layer.1": 228.9268, "encoder_q-layer.10": 417.3398, "encoder_q-layer.11": 754.8224, "encoder_q-layer.2": 244.615, "encoder_q-layer.3": 243.4917, "encoder_q-layer.4": 248.7782, "encoder_q-layer.5": 243.6246, "encoder_q-layer.6": 274.6678, "encoder_q-layer.7": 270.0921, "encoder_q-layer.8": 308.1178, "encoder_q-layer.9": 304.9738, "epoch": 0.1, "inbatch_neg_score": 48.6775, "inbatch_pos_score": 49.125, "learning_rate": 4.9500000000000004e-05, "loss": 2.6851, "norm_diff": 0.0142, "num_tokens_overlap": 5.5925, "num_tokens_union": 55.0495, "postclip_grad_norm": 1.0, "preclip_grad_norm": 494.0101, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0603, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.73, "sent_len_1": 66.8093, "sent_len_max_0": 18.8712, "sent_len_max_1": 189.0637, "stdk": 0.0394, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 9900 }, { "accuracy": 49.9023, "doc_norm": 7.07, "encoder_q-embeddings": 356.6564, "encoder_q-layer.0": 244.5125, "encoder_q-layer.1": 240.4826, "encoder_q-layer.10": 544.293, "encoder_q-layer.11": 865.3846, "encoder_q-layer.2": 256.8445, "encoder_q-layer.3": 258.6122, "encoder_q-layer.4": 263.9006, "encoder_q-layer.5": 265.672, "encoder_q-layer.6": 298.1664, "encoder_q-layer.7": 315.1208, "encoder_q-layer.8": 349.9863, "encoder_q-layer.9": 371.3069, "epoch": 0.1, "inbatch_neg_score": 48.6231, "inbatch_pos_score": 49.0625, "learning_rate": 5e-05, "loss": 2.6979, "norm_diff": 0.0115, "num_tokens_overlap": 5.5685, "num_tokens_union": 54.8807, "postclip_grad_norm": 1.0, "preclip_grad_norm": 561.8139, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0585, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7148, "sent_len_1": 66.5539, "sent_len_max_0": 18.8625, "sent_len_max_1": 189.2025, "stdk": 0.0401, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 10000 }, { "dev_runtime": 30.6742, "dev_samples_per_second": 2.086, "dev_steps_per_second": 0.033, "epoch": 0.1, "step": 10000, "test_accuracy": 8.697509765625, "test_doc_norm": 7.06818962097168, "test_inbatch_neg_score": 49.06257629394531, "test_inbatch_pos_score": 49.710205078125, "test_loss": 3.7887144088745117, "test_norm_diff": 0.0011394545435905457, "test_query_norm": 7.067959308624268, "test_queue_k_norm": 0.0, "test_stdk": 0.03362690284848213, "test_stdq": 0.03356219083070755, "test_stdqueue_k": 0.0 }, { "dev_runtime": 30.6742, "dev_samples_per_second": 2.086, "dev_steps_per_second": 0.033, "epoch": 0.1, "eval_beir-arguana_ndcg@10": 0.36794, "eval_beir-arguana_recall@10": 0.64794, "eval_beir-arguana_recall@100": 0.95519, "eval_beir-arguana_recall@20": 0.82219, "eval_beir-avg_ndcg@10": 0.37048783333333335, "eval_beir-avg_recall@10": 0.43999816666666663, "eval_beir-avg_recall@100": 0.6195675, "eval_beir-avg_recall@20": 0.5043038333333334, "eval_beir-cqadupstack_ndcg@10": 0.26745833333333335, "eval_beir-cqadupstack_recall@10": 0.3610616666666666, "eval_beir-cqadupstack_recall@100": 0.5881849999999998, "eval_beir-cqadupstack_recall@20": 0.42613833333333334, "eval_beir-fiqa_ndcg@10": 0.23824, "eval_beir-fiqa_recall@10": 0.29881, "eval_beir-fiqa_recall@100": 0.56644, "eval_beir-fiqa_recall@20": 0.36547, "eval_beir-nfcorpus_ndcg@10": 0.2874, "eval_beir-nfcorpus_recall@10": 0.14111, "eval_beir-nfcorpus_recall@100": 0.27774, "eval_beir-nfcorpus_recall@20": 0.17277, "eval_beir-nq_ndcg@10": 0.26016, "eval_beir-nq_recall@10": 0.4326, "eval_beir-nq_recall@100": 0.75797, "eval_beir-nq_recall@20": 0.54444, "eval_beir-quora_ndcg@10": 0.79353, "eval_beir-quora_recall@10": 0.89199, "eval_beir-quora_recall@100": 0.97736, "eval_beir-quora_recall@20": 0.9309, "eval_beir-scidocs_ndcg@10": 0.14772, "eval_beir-scidocs_recall@10": 0.15338, "eval_beir-scidocs_recall@100": 0.36013, "eval_beir-scidocs_recall@20": 0.21338, "eval_beir-scifact_ndcg@10": 0.58505, "eval_beir-scifact_recall@10": 0.73317, "eval_beir-scifact_recall@100": 0.87756, "eval_beir-scifact_recall@20": 0.80189, "eval_beir-trec-covid_ndcg@10": 0.58955, "eval_beir-trec-covid_recall@10": 0.614, "eval_beir-trec-covid_recall@100": 0.4292, "eval_beir-trec-covid_recall@20": 0.575, "eval_beir-webis-touche2020_ndcg@10": 0.16783, "eval_beir-webis-touche2020_recall@10": 0.12592, "eval_beir-webis-touche2020_recall@100": 0.4059, "eval_beir-webis-touche2020_recall@20": 0.19086, "eval_senteval-avg_sts": 0.7514190324744924, "eval_senteval-sickr_spearman": 0.7367528113851222, "eval_senteval-stsb_spearman": 0.7660852535638626, "step": 10000, "test_accuracy": 8.697509765625, "test_doc_norm": 7.06818962097168, "test_inbatch_neg_score": 49.06257629394531, "test_inbatch_pos_score": 49.710205078125, "test_loss": 3.7887144088745117, "test_norm_diff": 0.0011394545435905457, "test_query_norm": 7.067959308624268, "test_queue_k_norm": 0.0, "test_stdk": 0.03362690284848213, "test_stdq": 0.03356219083070755, "test_stdqueue_k": 0.0 }, { "accuracy": 52.4414, "doc_norm": 7.0768, "encoder_q-embeddings": 685.8259, "encoder_q-layer.0": 440.5613, "encoder_q-layer.1": 438.5612, "encoder_q-layer.10": 881.1891, "encoder_q-layer.11": 1467.2939, "encoder_q-layer.2": 472.3007, "encoder_q-layer.3": 481.6523, "encoder_q-layer.4": 476.529, "encoder_q-layer.5": 465.9895, "encoder_q-layer.6": 506.6188, "encoder_q-layer.7": 542.3544, "encoder_q-layer.8": 616.0112, "encoder_q-layer.9": 623.8632, "epoch": 0.1, "inbatch_neg_score": 48.7316, "inbatch_pos_score": 49.1875, "learning_rate": 4.994444444444445e-05, "loss": 2.6737, "norm_diff": 0.0123, "num_tokens_overlap": 5.5729, "num_tokens_union": 54.8955, "postclip_grad_norm": 1.0, "preclip_grad_norm": 969.1411, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0646, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6897, "sent_len_1": 66.6854, "sent_len_max_0": 18.7613, "sent_len_max_1": 190.125, "stdk": 0.0382, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 10100 }, { "accuracy": 47.7539, "doc_norm": 7.0857, "encoder_q-embeddings": 669.6775, "encoder_q-layer.0": 463.0582, "encoder_q-layer.1": 463.9305, "encoder_q-layer.10": 959.8615, "encoder_q-layer.11": 1778.0581, "encoder_q-layer.2": 496.7356, "encoder_q-layer.3": 492.5664, "encoder_q-layer.4": 499.005, "encoder_q-layer.5": 486.4565, "encoder_q-layer.6": 519.2849, "encoder_q-layer.7": 569.8843, "encoder_q-layer.8": 655.2502, "encoder_q-layer.9": 686.6893, "epoch": 0.1, "inbatch_neg_score": 48.8922, "inbatch_pos_score": 49.3125, "learning_rate": 4.9888888888888894e-05, "loss": 2.7008, "norm_diff": 0.0126, "num_tokens_overlap": 5.5793, "num_tokens_union": 54.952, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1076.7627, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0731, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7365, "sent_len_1": 66.6804, "sent_len_max_0": 18.8925, "sent_len_max_1": 189.3125, "stdk": 0.0393, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 10200 }, { "accuracy": 49.8047, "doc_norm": 7.1094, "encoder_q-embeddings": 592.1243, "encoder_q-layer.0": 420.676, "encoder_q-layer.1": 436.37, "encoder_q-layer.10": 1251.068, "encoder_q-layer.11": 1899.0265, "encoder_q-layer.2": 475.8992, "encoder_q-layer.3": 478.2503, "encoder_q-layer.4": 476.4453, "encoder_q-layer.5": 463.9053, "encoder_q-layer.6": 514.6949, "encoder_q-layer.7": 579.6254, "encoder_q-layer.8": 708.4634, "encoder_q-layer.9": 817.0085, "epoch": 0.1, "inbatch_neg_score": 49.2104, "inbatch_pos_score": 49.6562, "learning_rate": 4.9833333333333336e-05, "loss": 2.6374, "norm_diff": 0.0132, "num_tokens_overlap": 5.5793, "num_tokens_union": 55.0549, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1113.7407, "preclip_grad_norm_avg": 0.0, "query_norm": 7.0962, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7176, "sent_len_1": 66.8331, "sent_len_max_0": 18.7987, "sent_len_max_1": 187.7475, "stdk": 0.0382, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 10300 }, { "accuracy": 52.0508, "doc_norm": 7.1139, "encoder_q-embeddings": 627.4532, "encoder_q-layer.0": 437.1695, "encoder_q-layer.1": 433.0941, "encoder_q-layer.10": 1116.9429, "encoder_q-layer.11": 1669.0929, "encoder_q-layer.2": 464.3457, "encoder_q-layer.3": 472.3463, "encoder_q-layer.4": 485.1372, "encoder_q-layer.5": 475.2433, "encoder_q-layer.6": 527.0017, "encoder_q-layer.7": 581.6995, "encoder_q-layer.8": 702.7116, "encoder_q-layer.9": 770.637, "epoch": 0.1, "inbatch_neg_score": 49.2404, "inbatch_pos_score": 49.6875, "learning_rate": 4.977777777777778e-05, "loss": 2.682, "norm_diff": 0.0159, "num_tokens_overlap": 5.5794, "num_tokens_union": 55.0078, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1059.2781, "preclip_grad_norm_avg": 0.0, "query_norm": 7.098, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7142, "sent_len_1": 66.7579, "sent_len_max_0": 18.9725, "sent_len_max_1": 190.6362, "stdk": 0.039, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 10400 }, { "accuracy": 51.7578, "doc_norm": 7.1151, "encoder_q-embeddings": 667.3245, "encoder_q-layer.0": 450.385, "encoder_q-layer.1": 452.9295, "encoder_q-layer.10": 870.6861, "encoder_q-layer.11": 1765.3434, "encoder_q-layer.2": 474.7642, "encoder_q-layer.3": 497.8126, "encoder_q-layer.4": 511.3102, "encoder_q-layer.5": 494.7884, "encoder_q-layer.6": 551.2045, "encoder_q-layer.7": 623.0709, "encoder_q-layer.8": 683.2817, "encoder_q-layer.9": 669.0684, "epoch": 0.1, "inbatch_neg_score": 49.2413, "inbatch_pos_score": 49.6875, "learning_rate": 4.972222222222223e-05, "loss": 2.6608, "norm_diff": 0.0145, "num_tokens_overlap": 5.5788, "num_tokens_union": 55.063, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1058.6954, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1006, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7254, "sent_len_1": 66.855, "sent_len_max_0": 19.0075, "sent_len_max_1": 189.94, "stdk": 0.0396, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 10500 }, { "accuracy": 51.2695, "doc_norm": 7.1178, "encoder_q-embeddings": 665.834, "encoder_q-layer.0": 448.9002, "encoder_q-layer.1": 469.2202, "encoder_q-layer.10": 871.1981, "encoder_q-layer.11": 1665.8398, "encoder_q-layer.2": 488.8536, "encoder_q-layer.3": 491.1535, "encoder_q-layer.4": 501.8985, "encoder_q-layer.5": 471.0514, "encoder_q-layer.6": 515.8949, "encoder_q-layer.7": 571.3726, "encoder_q-layer.8": 616.968, "encoder_q-layer.9": 604.048, "epoch": 0.1, "inbatch_neg_score": 49.296, "inbatch_pos_score": 49.75, "learning_rate": 4.966666666666667e-05, "loss": 2.6371, "norm_diff": 0.0138, "num_tokens_overlap": 5.5828, "num_tokens_union": 54.9452, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1026.3018, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1039, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7203, "sent_len_1": 66.6995, "sent_len_max_0": 18.8375, "sent_len_max_1": 189.6712, "stdk": 0.0396, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 10600 }, { "accuracy": 52.832, "doc_norm": 7.1189, "encoder_q-embeddings": 645.3484, "encoder_q-layer.0": 434.0625, "encoder_q-layer.1": 430.6322, "encoder_q-layer.10": 916.0026, "encoder_q-layer.11": 1690.9579, "encoder_q-layer.2": 455.1787, "encoder_q-layer.3": 464.694, "encoder_q-layer.4": 481.4196, "encoder_q-layer.5": 480.0939, "encoder_q-layer.6": 555.9432, "encoder_q-layer.7": 606.1246, "encoder_q-layer.8": 688.3116, "encoder_q-layer.9": 638.0313, "epoch": 0.1, "inbatch_neg_score": 49.2997, "inbatch_pos_score": 49.75, "learning_rate": 4.961111111111111e-05, "loss": 2.7055, "norm_diff": 0.0147, "num_tokens_overlap": 5.5728, "num_tokens_union": 54.8272, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1024.2555, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1042, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7176, "sent_len_1": 66.4723, "sent_len_max_0": 18.7288, "sent_len_max_1": 189.2175, "stdk": 0.0399, "stdq": 0.0398, "stdqueue_k": 0.0, "step": 10700 }, { "accuracy": 51.7578, "doc_norm": 7.1128, "encoder_q-embeddings": 699.6958, "encoder_q-layer.0": 473.13, "encoder_q-layer.1": 471.8325, "encoder_q-layer.10": 1096.8337, "encoder_q-layer.11": 1935.9357, "encoder_q-layer.2": 503.7458, "encoder_q-layer.3": 513.4376, "encoder_q-layer.4": 529.9604, "encoder_q-layer.5": 519.9365, "encoder_q-layer.6": 574.0186, "encoder_q-layer.7": 597.2359, "encoder_q-layer.8": 720.7485, "encoder_q-layer.9": 725.8525, "epoch": 0.11, "inbatch_neg_score": 49.1895, "inbatch_pos_score": 49.6562, "learning_rate": 4.955555555555556e-05, "loss": 2.7058, "norm_diff": 0.0123, "num_tokens_overlap": 5.5862, "num_tokens_union": 54.9984, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1147.8038, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1005, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.734, "sent_len_1": 66.7498, "sent_len_max_0": 18.9387, "sent_len_max_1": 189.83, "stdk": 0.0393, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 10800 }, { "accuracy": 51.2695, "doc_norm": 7.1293, "encoder_q-embeddings": 656.559, "encoder_q-layer.0": 456.2281, "encoder_q-layer.1": 454.0376, "encoder_q-layer.10": 964.5019, "encoder_q-layer.11": 1591.0775, "encoder_q-layer.2": 484.6209, "encoder_q-layer.3": 489.8696, "encoder_q-layer.4": 503.728, "encoder_q-layer.5": 481.1076, "encoder_q-layer.6": 519.7097, "encoder_q-layer.7": 552.9406, "encoder_q-layer.8": 652.2267, "encoder_q-layer.9": 650.8235, "epoch": 0.11, "inbatch_neg_score": 49.3949, "inbatch_pos_score": 49.875, "learning_rate": 4.9500000000000004e-05, "loss": 2.6918, "norm_diff": 0.0132, "num_tokens_overlap": 5.5806, "num_tokens_union": 55.0949, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1028.0178, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1162, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7268, "sent_len_1": 66.8537, "sent_len_max_0": 18.9713, "sent_len_max_1": 190.165, "stdk": 0.0407, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 10900 }, { "accuracy": 50.7812, "doc_norm": 7.1221, "encoder_q-embeddings": 663.2349, "encoder_q-layer.0": 460.4537, "encoder_q-layer.1": 453.2679, "encoder_q-layer.10": 1021.1927, "encoder_q-layer.11": 1791.1888, "encoder_q-layer.2": 486.0013, "encoder_q-layer.3": 480.0324, "encoder_q-layer.4": 487.1822, "encoder_q-layer.5": 492.0098, "encoder_q-layer.6": 551.7219, "encoder_q-layer.7": 563.2823, "encoder_q-layer.8": 691.5601, "encoder_q-layer.9": 704.8307, "epoch": 0.11, "inbatch_neg_score": 49.3341, "inbatch_pos_score": 49.8125, "learning_rate": 4.9444444444444446e-05, "loss": 2.667, "norm_diff": 0.0123, "num_tokens_overlap": 5.5701, "num_tokens_union": 55.0146, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1087.683, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1098, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7144, "sent_len_1": 66.8253, "sent_len_max_0": 18.8863, "sent_len_max_1": 189.6488, "stdk": 0.0399, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 11000 }, { "accuracy": 50.0, "doc_norm": 7.1231, "encoder_q-embeddings": 657.4246, "encoder_q-layer.0": 453.3944, "encoder_q-layer.1": 441.1694, "encoder_q-layer.10": 826.0762, "encoder_q-layer.11": 1525.9564, "encoder_q-layer.2": 482.0025, "encoder_q-layer.3": 480.2925, "encoder_q-layer.4": 500.6992, "encoder_q-layer.5": 464.2161, "encoder_q-layer.6": 517.4811, "encoder_q-layer.7": 545.8135, "encoder_q-layer.8": 620.7342, "encoder_q-layer.9": 608.2479, "epoch": 0.11, "inbatch_neg_score": 49.4153, "inbatch_pos_score": 49.875, "learning_rate": 4.938888888888889e-05, "loss": 2.6604, "norm_diff": 0.0093, "num_tokens_overlap": 5.576, "num_tokens_union": 55.1927, "postclip_grad_norm": 1.0, "preclip_grad_norm": 984.3631, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1138, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6985, "sent_len_1": 67.1005, "sent_len_max_0": 18.9163, "sent_len_max_1": 189.7163, "stdk": 0.0386, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 11100 }, { "accuracy": 51.5625, "doc_norm": 7.1365, "encoder_q-embeddings": 629.3859, "encoder_q-layer.0": 422.7112, "encoder_q-layer.1": 430.701, "encoder_q-layer.10": 964.9653, "encoder_q-layer.11": 1592.8407, "encoder_q-layer.2": 461.8134, "encoder_q-layer.3": 476.1648, "encoder_q-layer.4": 488.6884, "encoder_q-layer.5": 503.8689, "encoder_q-layer.6": 560.7495, "encoder_q-layer.7": 601.8192, "encoder_q-layer.8": 635.7471, "encoder_q-layer.9": 614.4966, "epoch": 0.11, "inbatch_neg_score": 49.5697, "inbatch_pos_score": 50.0312, "learning_rate": 4.933333333333334e-05, "loss": 2.6557, "norm_diff": 0.0163, "num_tokens_overlap": 5.5746, "num_tokens_union": 54.9248, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1000.5551, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1202, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7166, "sent_len_1": 66.628, "sent_len_max_0": 18.8137, "sent_len_max_1": 188.3363, "stdk": 0.04, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 11200 }, { "accuracy": 50.1953, "doc_norm": 7.1401, "encoder_q-embeddings": 745.9161, "encoder_q-layer.0": 482.1497, "encoder_q-layer.1": 471.6649, "encoder_q-layer.10": 934.799, "encoder_q-layer.11": 1687.7085, "encoder_q-layer.2": 512.0803, "encoder_q-layer.3": 489.1863, "encoder_q-layer.4": 511.6214, "encoder_q-layer.5": 490.4235, "encoder_q-layer.6": 552.0115, "encoder_q-layer.7": 563.2812, "encoder_q-layer.8": 658.5358, "encoder_q-layer.9": 653.1374, "epoch": 0.11, "inbatch_neg_score": 49.5454, "inbatch_pos_score": 50.0312, "learning_rate": 4.927777777777778e-05, "loss": 2.676, "norm_diff": 0.0176, "num_tokens_overlap": 5.5652, "num_tokens_union": 54.9715, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1071.5043, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1225, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7104, "sent_len_1": 66.7516, "sent_len_max_0": 18.8487, "sent_len_max_1": 190.37, "stdk": 0.0403, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 11300 }, { "accuracy": 50.0977, "doc_norm": 7.1334, "encoder_q-embeddings": 623.3936, "encoder_q-layer.0": 440.2192, "encoder_q-layer.1": 446.6443, "encoder_q-layer.10": 827.717, "encoder_q-layer.11": 1455.0555, "encoder_q-layer.2": 467.406, "encoder_q-layer.3": 489.3309, "encoder_q-layer.4": 482.547, "encoder_q-layer.5": 468.9755, "encoder_q-layer.6": 536.407, "encoder_q-layer.7": 584.2645, "encoder_q-layer.8": 671.3929, "encoder_q-layer.9": 614.8173, "epoch": 0.11, "inbatch_neg_score": 49.5472, "inbatch_pos_score": 50.0, "learning_rate": 4.922222222222222e-05, "loss": 2.6272, "norm_diff": 0.0104, "num_tokens_overlap": 5.5812, "num_tokens_union": 54.9088, "postclip_grad_norm": 1.0, "preclip_grad_norm": 961.5768, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1229, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7114, "sent_len_1": 66.5956, "sent_len_max_0": 18.7825, "sent_len_max_1": 187.9963, "stdk": 0.0401, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 11400 }, { "accuracy": 51.2695, "doc_norm": 7.149, "encoder_q-embeddings": 663.2375, "encoder_q-layer.0": 450.7389, "encoder_q-layer.1": 477.6053, "encoder_q-layer.10": 1331.0276, "encoder_q-layer.11": 1875.803, "encoder_q-layer.2": 501.5644, "encoder_q-layer.3": 511.0521, "encoder_q-layer.4": 524.0795, "encoder_q-layer.5": 523.6381, "encoder_q-layer.6": 589.0203, "encoder_q-layer.7": 617.6648, "encoder_q-layer.8": 707.2116, "encoder_q-layer.9": 742.8395, "epoch": 0.11, "inbatch_neg_score": 49.6979, "inbatch_pos_score": 50.1562, "learning_rate": 4.9166666666666665e-05, "loss": 2.6439, "norm_diff": 0.0132, "num_tokens_overlap": 5.5796, "num_tokens_union": 55.1243, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1168.1836, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1357, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7297, "sent_len_1": 66.9544, "sent_len_max_0": 18.92, "sent_len_max_1": 189.015, "stdk": 0.0402, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 11500 }, { "accuracy": 51.1719, "doc_norm": 7.1498, "encoder_q-embeddings": 642.627, "encoder_q-layer.0": 436.6455, "encoder_q-layer.1": 432.4995, "encoder_q-layer.10": 786.4391, "encoder_q-layer.11": 1481.7415, "encoder_q-layer.2": 463.9503, "encoder_q-layer.3": 471.0818, "encoder_q-layer.4": 477.882, "encoder_q-layer.5": 460.9769, "encoder_q-layer.6": 501.0715, "encoder_q-layer.7": 542.7455, "encoder_q-layer.8": 623.4266, "encoder_q-layer.9": 609.8973, "epoch": 0.11, "inbatch_neg_score": 49.7446, "inbatch_pos_score": 50.1875, "learning_rate": 4.9111111111111114e-05, "loss": 2.6714, "norm_diff": 0.0145, "num_tokens_overlap": 5.5899, "num_tokens_union": 54.9639, "postclip_grad_norm": 1.0, "preclip_grad_norm": 972.6194, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1353, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7394, "sent_len_1": 66.669, "sent_len_max_0": 18.8725, "sent_len_max_1": 188.4988, "stdk": 0.0397, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 11600 }, { "accuracy": 51.5625, "doc_norm": 7.1519, "encoder_q-embeddings": 654.0939, "encoder_q-layer.0": 461.0854, "encoder_q-layer.1": 459.9431, "encoder_q-layer.10": 1145.6772, "encoder_q-layer.11": 1797.37, "encoder_q-layer.2": 498.2341, "encoder_q-layer.3": 501.056, "encoder_q-layer.4": 509.5609, "encoder_q-layer.5": 496.4512, "encoder_q-layer.6": 569.0857, "encoder_q-layer.7": 613.6355, "encoder_q-layer.8": 720.0151, "encoder_q-layer.9": 729.1011, "epoch": 0.11, "inbatch_neg_score": 49.7362, "inbatch_pos_score": 50.2188, "learning_rate": 4.905555555555556e-05, "loss": 2.6186, "norm_diff": 0.0134, "num_tokens_overlap": 5.5848, "num_tokens_union": 55.1149, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1106.4072, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1386, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7133, "sent_len_1": 66.9844, "sent_len_max_0": 18.9825, "sent_len_max_1": 191.7675, "stdk": 0.0407, "stdq": 0.0402, "stdqueue_k": 0.0, "step": 11700 }, { "accuracy": 50.4883, "doc_norm": 7.1541, "encoder_q-embeddings": 620.1844, "encoder_q-layer.0": 434.9379, "encoder_q-layer.1": 436.4301, "encoder_q-layer.10": 785.9696, "encoder_q-layer.11": 1419.2521, "encoder_q-layer.2": 479.4569, "encoder_q-layer.3": 486.6297, "encoder_q-layer.4": 489.5668, "encoder_q-layer.5": 482.5974, "encoder_q-layer.6": 517.9999, "encoder_q-layer.7": 544.23, "encoder_q-layer.8": 641.7507, "encoder_q-layer.9": 594.9381, "epoch": 0.12, "inbatch_neg_score": 49.8469, "inbatch_pos_score": 50.2812, "learning_rate": 4.9e-05, "loss": 2.6385, "norm_diff": 0.0124, "num_tokens_overlap": 5.5834, "num_tokens_union": 54.9801, "postclip_grad_norm": 1.0, "preclip_grad_norm": 948.4908, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1417, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7334, "sent_len_1": 66.7529, "sent_len_max_0": 18.8475, "sent_len_max_1": 189.9412, "stdk": 0.0386, "stdq": 0.039, "stdqueue_k": 0.0, "step": 11800 }, { "accuracy": 52.832, "doc_norm": 7.1529, "encoder_q-embeddings": 646.8387, "encoder_q-layer.0": 448.7039, "encoder_q-layer.1": 470.7398, "encoder_q-layer.10": 819.4918, "encoder_q-layer.11": 1602.2449, "encoder_q-layer.2": 501.8897, "encoder_q-layer.3": 512.4053, "encoder_q-layer.4": 506.0872, "encoder_q-layer.5": 503.2975, "encoder_q-layer.6": 580.673, "encoder_q-layer.7": 605.7269, "encoder_q-layer.8": 673.0825, "encoder_q-layer.9": 629.863, "epoch": 0.12, "inbatch_neg_score": 49.8119, "inbatch_pos_score": 50.2812, "learning_rate": 4.894444444444445e-05, "loss": 2.6365, "norm_diff": 0.0124, "num_tokens_overlap": 5.5903, "num_tokens_union": 54.9506, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1017.6079, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1405, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7213, "sent_len_1": 66.6383, "sent_len_max_0": 18.7625, "sent_len_max_1": 187.715, "stdk": 0.0399, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 11900 }, { "accuracy": 49.8047, "doc_norm": 7.1585, "encoder_q-embeddings": 738.534, "encoder_q-layer.0": 491.2958, "encoder_q-layer.1": 478.0908, "encoder_q-layer.10": 898.0449, "encoder_q-layer.11": 1543.4619, "encoder_q-layer.2": 499.1405, "encoder_q-layer.3": 487.3222, "encoder_q-layer.4": 498.7748, "encoder_q-layer.5": 477.5791, "encoder_q-layer.6": 519.2003, "encoder_q-layer.7": 542.6299, "encoder_q-layer.8": 599.3464, "encoder_q-layer.9": 620.3951, "epoch": 0.12, "inbatch_neg_score": 49.858, "inbatch_pos_score": 50.3125, "learning_rate": 4.888888888888889e-05, "loss": 2.651, "norm_diff": 0.0135, "num_tokens_overlap": 5.5799, "num_tokens_union": 55.1217, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1027.7848, "preclip_grad_norm_avg": 0.0, "query_norm": 7.145, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7161, "sent_len_1": 66.9589, "sent_len_max_0": 18.9112, "sent_len_max_1": 190.6062, "stdk": 0.0403, "stdq": 0.0401, "stdqueue_k": 0.0, "step": 12000 }, { "accuracy": 54.1992, "doc_norm": 7.1613, "encoder_q-embeddings": 1233.0164, "encoder_q-layer.0": 834.9495, "encoder_q-layer.1": 839.8041, "encoder_q-layer.10": 2310.8472, "encoder_q-layer.11": 3119.116, "encoder_q-layer.2": 905.2197, "encoder_q-layer.3": 928.2285, "encoder_q-layer.4": 944.3232, "encoder_q-layer.5": 949.3813, "encoder_q-layer.6": 1060.6152, "encoder_q-layer.7": 1106.3849, "encoder_q-layer.8": 1266.4274, "encoder_q-layer.9": 1407.1985, "epoch": 0.12, "inbatch_neg_score": 49.9302, "inbatch_pos_score": 50.4062, "learning_rate": 4.883333333333334e-05, "loss": 2.6606, "norm_diff": 0.0134, "num_tokens_overlap": 5.5861, "num_tokens_union": 55.0124, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2021.7753, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1479, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7151, "sent_len_1": 66.7691, "sent_len_max_0": 18.8538, "sent_len_max_1": 190.1087, "stdk": 0.0394, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 12100 }, { "accuracy": 51.6602, "doc_norm": 7.1548, "encoder_q-embeddings": 1341.795, "encoder_q-layer.0": 903.1467, "encoder_q-layer.1": 885.382, "encoder_q-layer.10": 1609.9633, "encoder_q-layer.11": 2944.2141, "encoder_q-layer.2": 946.8554, "encoder_q-layer.3": 954.9578, "encoder_q-layer.4": 975.9514, "encoder_q-layer.5": 937.1509, "encoder_q-layer.6": 1032.0731, "encoder_q-layer.7": 1123.7067, "encoder_q-layer.8": 1200.7449, "encoder_q-layer.9": 1156.2711, "epoch": 0.12, "inbatch_neg_score": 49.7979, "inbatch_pos_score": 50.25, "learning_rate": 4.8777777777777775e-05, "loss": 2.5774, "norm_diff": 0.013, "num_tokens_overlap": 5.5797, "num_tokens_union": 55.0087, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1935.5635, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1417, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7329, "sent_len_1": 66.7862, "sent_len_max_0": 18.805, "sent_len_max_1": 191.5037, "stdk": 0.0406, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 12200 }, { "accuracy": 51.7578, "doc_norm": 7.1505, "encoder_q-embeddings": 1255.2828, "encoder_q-layer.0": 853.867, "encoder_q-layer.1": 850.7391, "encoder_q-layer.10": 1558.1267, "encoder_q-layer.11": 2747.1746, "encoder_q-layer.2": 913.963, "encoder_q-layer.3": 915.9145, "encoder_q-layer.4": 914.0729, "encoder_q-layer.5": 890.6402, "encoder_q-layer.6": 1007.8748, "encoder_q-layer.7": 1048.416, "encoder_q-layer.8": 1218.4495, "encoder_q-layer.9": 1162.9287, "epoch": 0.12, "inbatch_neg_score": 49.744, "inbatch_pos_score": 50.1875, "learning_rate": 4.8722222222222224e-05, "loss": 2.6189, "norm_diff": 0.0157, "num_tokens_overlap": 5.5683, "num_tokens_union": 54.9019, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1853.8815, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1349, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6911, "sent_len_1": 66.603, "sent_len_max_0": 18.7237, "sent_len_max_1": 188.4338, "stdk": 0.0406, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 12300 }, { "accuracy": 50.0977, "doc_norm": 7.1479, "encoder_q-embeddings": 1288.6729, "encoder_q-layer.0": 877.7987, "encoder_q-layer.1": 854.4686, "encoder_q-layer.10": 1963.3124, "encoder_q-layer.11": 3134.1108, "encoder_q-layer.2": 945.5695, "encoder_q-layer.3": 940.6714, "encoder_q-layer.4": 966.2684, "encoder_q-layer.5": 960.709, "encoder_q-layer.6": 1055.2463, "encoder_q-layer.7": 1166.7874, "encoder_q-layer.8": 1365.535, "encoder_q-layer.9": 1298.7504, "epoch": 0.12, "inbatch_neg_score": 49.7381, "inbatch_pos_score": 50.1875, "learning_rate": 4.866666666666667e-05, "loss": 2.6418, "norm_diff": 0.0125, "num_tokens_overlap": 5.59, "num_tokens_union": 55.1129, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2044.5289, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1354, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7399, "sent_len_1": 66.9033, "sent_len_max_0": 18.865, "sent_len_max_1": 187.8738, "stdk": 0.0393, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 12400 }, { "accuracy": 50.4883, "doc_norm": 7.1478, "encoder_q-embeddings": 1368.9526, "encoder_q-layer.0": 891.2272, "encoder_q-layer.1": 908.2104, "encoder_q-layer.10": 1939.6531, "encoder_q-layer.11": 3660.2729, "encoder_q-layer.2": 956.2755, "encoder_q-layer.3": 967.3343, "encoder_q-layer.4": 981.2958, "encoder_q-layer.5": 944.7385, "encoder_q-layer.6": 1068.3938, "encoder_q-layer.7": 1165.6235, "encoder_q-layer.8": 1316.5186, "encoder_q-layer.9": 1323.0328, "epoch": 0.12, "inbatch_neg_score": 49.6678, "inbatch_pos_score": 50.125, "learning_rate": 4.8611111111111115e-05, "loss": 2.6007, "norm_diff": 0.0147, "num_tokens_overlap": 5.5816, "num_tokens_union": 55.0988, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2141.6888, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1331, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7174, "sent_len_1": 66.9403, "sent_len_max_0": 18.91, "sent_len_max_1": 188.46, "stdk": 0.041, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 12500 }, { "accuracy": 50.3906, "doc_norm": 7.1467, "encoder_q-embeddings": 1342.3971, "encoder_q-layer.0": 937.5366, "encoder_q-layer.1": 940.1067, "encoder_q-layer.10": 1799.0378, "encoder_q-layer.11": 3465.0291, "encoder_q-layer.2": 983.022, "encoder_q-layer.3": 1017.338, "encoder_q-layer.4": 1005.3442, "encoder_q-layer.5": 986.951, "encoder_q-layer.6": 1066.3107, "encoder_q-layer.7": 1113.0126, "encoder_q-layer.8": 1334.9806, "encoder_q-layer.9": 1349.0461, "epoch": 0.12, "inbatch_neg_score": 49.6451, "inbatch_pos_score": 50.0938, "learning_rate": 4.855555555555556e-05, "loss": 2.6147, "norm_diff": 0.016, "num_tokens_overlap": 5.5799, "num_tokens_union": 54.8721, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2098.2569, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1306, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7217, "sent_len_1": 66.5883, "sent_len_max_0": 18.7913, "sent_len_max_1": 188.6375, "stdk": 0.0403, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 12600 }, { "accuracy": 55.5664, "doc_norm": 7.1383, "encoder_q-embeddings": 1271.4587, "encoder_q-layer.0": 841.8652, "encoder_q-layer.1": 837.4053, "encoder_q-layer.10": 3460.1951, "encoder_q-layer.11": 3997.426, "encoder_q-layer.2": 917.3992, "encoder_q-layer.3": 935.14, "encoder_q-layer.4": 987.3824, "encoder_q-layer.5": 1019.7654, "encoder_q-layer.6": 1216.8099, "encoder_q-layer.7": 1331.7329, "encoder_q-layer.8": 1686.1926, "encoder_q-layer.9": 2011.6241, "epoch": 0.12, "inbatch_neg_score": 49.6172, "inbatch_pos_score": 50.0625, "learning_rate": 4.85e-05, "loss": 2.5848, "norm_diff": 0.0126, "num_tokens_overlap": 5.5757, "num_tokens_union": 55.0772, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2559.0418, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1257, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7005, "sent_len_1": 66.86, "sent_len_max_0": 18.87, "sent_len_max_1": 188.4363, "stdk": 0.0388, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 12700 }, { "accuracy": 52.9297, "doc_norm": 7.1369, "encoder_q-embeddings": 1268.8344, "encoder_q-layer.0": 875.6966, "encoder_q-layer.1": 895.0642, "encoder_q-layer.10": 1576.9497, "encoder_q-layer.11": 2904.6584, "encoder_q-layer.2": 957.3753, "encoder_q-layer.3": 993.5831, "encoder_q-layer.4": 1004.6541, "encoder_q-layer.5": 961.5452, "encoder_q-layer.6": 1009.5657, "encoder_q-layer.7": 1154.6282, "encoder_q-layer.8": 1245.7926, "encoder_q-layer.9": 1172.6249, "epoch": 0.12, "inbatch_neg_score": 49.5371, "inbatch_pos_score": 50.0, "learning_rate": 4.844444444444445e-05, "loss": 2.662, "norm_diff": 0.0143, "num_tokens_overlap": 5.593, "num_tokens_union": 55.0202, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1936.652, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1225, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7343, "sent_len_1": 66.7658, "sent_len_max_0": 18.9, "sent_len_max_1": 186.9575, "stdk": 0.0412, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 12800 }, { "accuracy": 52.4414, "doc_norm": 7.1378, "encoder_q-embeddings": 1360.5027, "encoder_q-layer.0": 914.9313, "encoder_q-layer.1": 931.3312, "encoder_q-layer.10": 1516.6365, "encoder_q-layer.11": 2837.6614, "encoder_q-layer.2": 955.2205, "encoder_q-layer.3": 945.8522, "encoder_q-layer.4": 983.4341, "encoder_q-layer.5": 929.7734, "encoder_q-layer.6": 1033.7534, "encoder_q-layer.7": 1117.7013, "encoder_q-layer.8": 1204.0122, "encoder_q-layer.9": 1165.8032, "epoch": 0.13, "inbatch_neg_score": 49.5248, "inbatch_pos_score": 50.0, "learning_rate": 4.838888888888889e-05, "loss": 2.6115, "norm_diff": 0.0143, "num_tokens_overlap": 5.5807, "num_tokens_union": 55.0575, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1921.7193, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1235, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.693, "sent_len_1": 66.9462, "sent_len_max_0": 18.905, "sent_len_max_1": 187.6875, "stdk": 0.0407, "stdq": 0.04, "stdqueue_k": 0.0, "step": 12900 }, { "accuracy": 49.8047, "doc_norm": 7.1321, "encoder_q-embeddings": 1383.7582, "encoder_q-layer.0": 912.1273, "encoder_q-layer.1": 921.4694, "encoder_q-layer.10": 1971.1477, "encoder_q-layer.11": 3161.0754, "encoder_q-layer.2": 975.1787, "encoder_q-layer.3": 1015.0165, "encoder_q-layer.4": 1015.6481, "encoder_q-layer.5": 998.4944, "encoder_q-layer.6": 1111.5336, "encoder_q-layer.7": 1168.8851, "encoder_q-layer.8": 1380.4396, "encoder_q-layer.9": 1363.8135, "epoch": 0.13, "inbatch_neg_score": 49.4623, "inbatch_pos_score": 49.9375, "learning_rate": 4.8333333333333334e-05, "loss": 2.6339, "norm_diff": 0.0139, "num_tokens_overlap": 5.583, "num_tokens_union": 54.8861, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2088.5642, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1182, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7195, "sent_len_1": 66.6898, "sent_len_max_0": 18.9025, "sent_len_max_1": 191.4013, "stdk": 0.0404, "stdq": 0.0399, "stdqueue_k": 0.0, "step": 13000 }, { "accuracy": 50.0977, "doc_norm": 7.1273, "encoder_q-embeddings": 1431.4172, "encoder_q-layer.0": 936.2855, "encoder_q-layer.1": 926.295, "encoder_q-layer.10": 1659.7473, "encoder_q-layer.11": 2833.3914, "encoder_q-layer.2": 992.7133, "encoder_q-layer.3": 973.2761, "encoder_q-layer.4": 993.6633, "encoder_q-layer.5": 947.383, "encoder_q-layer.6": 1037.2527, "encoder_q-layer.7": 1101.1425, "encoder_q-layer.8": 1300.6373, "encoder_q-layer.9": 1247.9647, "epoch": 0.13, "inbatch_neg_score": 49.3863, "inbatch_pos_score": 49.8438, "learning_rate": 4.8277777777777776e-05, "loss": 2.5843, "norm_diff": 0.0171, "num_tokens_overlap": 5.5803, "num_tokens_union": 55.061, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1994.9371, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1102, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7316, "sent_len_1": 66.8563, "sent_len_max_0": 18.925, "sent_len_max_1": 191.0762, "stdk": 0.0395, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 13100 }, { "accuracy": 53.125, "doc_norm": 7.1334, "encoder_q-embeddings": 1348.6982, "encoder_q-layer.0": 875.8, "encoder_q-layer.1": 882.5146, "encoder_q-layer.10": 1672.3153, "encoder_q-layer.11": 3223.8848, "encoder_q-layer.2": 930.0497, "encoder_q-layer.3": 980.0317, "encoder_q-layer.4": 982.3261, "encoder_q-layer.5": 968.8309, "encoder_q-layer.6": 1016.7012, "encoder_q-layer.7": 1123.1096, "encoder_q-layer.8": 1230.3085, "encoder_q-layer.9": 1222.353, "epoch": 0.13, "inbatch_neg_score": 49.4732, "inbatch_pos_score": 49.9375, "learning_rate": 4.8222222222222225e-05, "loss": 2.6289, "norm_diff": 0.0177, "num_tokens_overlap": 5.5821, "num_tokens_union": 54.9199, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2030.2204, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1156, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7022, "sent_len_1": 66.699, "sent_len_max_0": 18.8175, "sent_len_max_1": 190.5337, "stdk": 0.0405, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 13200 }, { "accuracy": 52.0508, "doc_norm": 7.1246, "encoder_q-embeddings": 1231.0576, "encoder_q-layer.0": 853.7128, "encoder_q-layer.1": 868.31, "encoder_q-layer.10": 1671.0723, "encoder_q-layer.11": 2853.0703, "encoder_q-layer.2": 923.8277, "encoder_q-layer.3": 922.6677, "encoder_q-layer.4": 962.015, "encoder_q-layer.5": 920.8835, "encoder_q-layer.6": 1036.7877, "encoder_q-layer.7": 1078.9623, "encoder_q-layer.8": 1247.1621, "encoder_q-layer.9": 1252.4194, "epoch": 0.13, "inbatch_neg_score": 49.3247, "inbatch_pos_score": 49.7812, "learning_rate": 4.8166666666666674e-05, "loss": 2.6255, "norm_diff": 0.0184, "num_tokens_overlap": 5.5832, "num_tokens_union": 54.9706, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1881.9653, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1062, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7186, "sent_len_1": 66.7427, "sent_len_max_0": 18.8075, "sent_len_max_1": 188.8075, "stdk": 0.0407, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 13300 }, { "accuracy": 53.418, "doc_norm": 7.1158, "encoder_q-embeddings": 1214.9125, "encoder_q-layer.0": 826.0278, "encoder_q-layer.1": 837.3075, "encoder_q-layer.10": 1536.4066, "encoder_q-layer.11": 2981.8271, "encoder_q-layer.2": 908.6284, "encoder_q-layer.3": 933.3614, "encoder_q-layer.4": 918.051, "encoder_q-layer.5": 900.3786, "encoder_q-layer.6": 1000.6721, "encoder_q-layer.7": 1064.2678, "encoder_q-layer.8": 1149.7916, "encoder_q-layer.9": 1147.252, "epoch": 0.13, "inbatch_neg_score": 49.2336, "inbatch_pos_score": 49.6875, "learning_rate": 4.811111111111111e-05, "loss": 2.5857, "norm_diff": 0.0142, "num_tokens_overlap": 5.5802, "num_tokens_union": 55.1575, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1869.8205, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1015, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7097, "sent_len_1": 67.0388, "sent_len_max_0": 18.8788, "sent_len_max_1": 189.2688, "stdk": 0.0412, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 13400 }, { "accuracy": 49.4141, "doc_norm": 7.1241, "encoder_q-embeddings": 1279.1626, "encoder_q-layer.0": 862.6304, "encoder_q-layer.1": 868.1243, "encoder_q-layer.10": 1547.0146, "encoder_q-layer.11": 3012.4146, "encoder_q-layer.2": 918.5035, "encoder_q-layer.3": 927.9487, "encoder_q-layer.4": 950.558, "encoder_q-layer.5": 897.9612, "encoder_q-layer.6": 984.5632, "encoder_q-layer.7": 1037.1331, "encoder_q-layer.8": 1250.3135, "encoder_q-layer.9": 1155.0577, "epoch": 0.13, "inbatch_neg_score": 49.3681, "inbatch_pos_score": 49.8125, "learning_rate": 4.805555555555556e-05, "loss": 2.6432, "norm_diff": 0.0145, "num_tokens_overlap": 5.5788, "num_tokens_union": 54.8719, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1907.0768, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1096, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7085, "sent_len_1": 66.6194, "sent_len_max_0": 18.8762, "sent_len_max_1": 189.04, "stdk": 0.041, "stdq": 0.039, "stdqueue_k": 0.0, "step": 13500 }, { "accuracy": 51.7578, "doc_norm": 7.1314, "encoder_q-embeddings": 1305.9429, "encoder_q-layer.0": 855.7212, "encoder_q-layer.1": 868.078, "encoder_q-layer.10": 1906.2983, "encoder_q-layer.11": 3147.4282, "encoder_q-layer.2": 920.3987, "encoder_q-layer.3": 917.7231, "encoder_q-layer.4": 951.4709, "encoder_q-layer.5": 980.2552, "encoder_q-layer.6": 1079.1576, "encoder_q-layer.7": 1109.078, "encoder_q-layer.8": 1302.6958, "encoder_q-layer.9": 1340.3529, "epoch": 0.13, "inbatch_neg_score": 49.4319, "inbatch_pos_score": 49.9062, "learning_rate": 4.8e-05, "loss": 2.5758, "norm_diff": 0.0156, "num_tokens_overlap": 5.5856, "num_tokens_union": 55.1352, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2016.0963, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1158, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7324, "sent_len_1": 66.9616, "sent_len_max_0": 18.8538, "sent_len_max_1": 190.2738, "stdk": 0.0399, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 13600 }, { "accuracy": 53.2227, "doc_norm": 7.1258, "encoder_q-embeddings": 1233.3091, "encoder_q-layer.0": 833.3144, "encoder_q-layer.1": 833.1407, "encoder_q-layer.10": 1493.0198, "encoder_q-layer.11": 2839.1348, "encoder_q-layer.2": 931.837, "encoder_q-layer.3": 945.5524, "encoder_q-layer.4": 938.3829, "encoder_q-layer.5": 930.0264, "encoder_q-layer.6": 997.8876, "encoder_q-layer.7": 1059.7094, "encoder_q-layer.8": 1209.0815, "encoder_q-layer.9": 1164.1615, "epoch": 0.13, "inbatch_neg_score": 49.4069, "inbatch_pos_score": 49.875, "learning_rate": 4.794444444444445e-05, "loss": 2.6002, "norm_diff": 0.0145, "num_tokens_overlap": 5.5732, "num_tokens_union": 54.9216, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1858.5617, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1113, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7224, "sent_len_1": 66.6621, "sent_len_max_0": 18.7888, "sent_len_max_1": 191.7475, "stdk": 0.04, "stdq": 0.0399, "stdqueue_k": 0.0, "step": 13700 }, { "accuracy": 50.7812, "doc_norm": 7.1305, "encoder_q-embeddings": 1341.542, "encoder_q-layer.0": 918.3022, "encoder_q-layer.1": 946.6238, "encoder_q-layer.10": 2218.4319, "encoder_q-layer.11": 3310.0947, "encoder_q-layer.2": 1010.7047, "encoder_q-layer.3": 986.5432, "encoder_q-layer.4": 1034.3583, "encoder_q-layer.5": 992.7151, "encoder_q-layer.6": 1119.9823, "encoder_q-layer.7": 1219.1626, "encoder_q-layer.8": 1373.4983, "encoder_q-layer.9": 1496.1705, "epoch": 0.13, "inbatch_neg_score": 49.4318, "inbatch_pos_score": 49.9062, "learning_rate": 4.7888888888888886e-05, "loss": 2.585, "norm_diff": 0.0114, "num_tokens_overlap": 5.5918, "num_tokens_union": 55.0256, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2143.2361, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1191, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7489, "sent_len_1": 66.7348, "sent_len_max_0": 18.83, "sent_len_max_1": 188.0788, "stdk": 0.0405, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 13800 }, { "accuracy": 51.6602, "doc_norm": 7.1414, "encoder_q-embeddings": 1265.1956, "encoder_q-layer.0": 871.077, "encoder_q-layer.1": 867.2332, "encoder_q-layer.10": 1730.792, "encoder_q-layer.11": 2819.6416, "encoder_q-layer.2": 926.1115, "encoder_q-layer.3": 951.2626, "encoder_q-layer.4": 1021.1741, "encoder_q-layer.5": 977.2102, "encoder_q-layer.6": 1073.2545, "encoder_q-layer.7": 1166.9873, "encoder_q-layer.8": 1308.6553, "encoder_q-layer.9": 1235.2489, "epoch": 0.14, "inbatch_neg_score": 49.5314, "inbatch_pos_score": 50.0, "learning_rate": 4.7833333333333335e-05, "loss": 2.6008, "norm_diff": 0.0153, "num_tokens_overlap": 5.5626, "num_tokens_union": 55.0328, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1911.9586, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1261, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6815, "sent_len_1": 66.9194, "sent_len_max_0": 18.8463, "sent_len_max_1": 190.1375, "stdk": 0.0413, "stdq": 0.0398, "stdqueue_k": 0.0, "step": 13900 }, { "accuracy": 54.4922, "doc_norm": 7.1408, "encoder_q-embeddings": 1266.5612, "encoder_q-layer.0": 843.3611, "encoder_q-layer.1": 890.584, "encoder_q-layer.10": 1710.6105, "encoder_q-layer.11": 2874.4412, "encoder_q-layer.2": 957.6152, "encoder_q-layer.3": 949.2399, "encoder_q-layer.4": 986.7649, "encoder_q-layer.5": 984.0339, "encoder_q-layer.6": 1103.7318, "encoder_q-layer.7": 1294.394, "encoder_q-layer.8": 1486.1472, "encoder_q-layer.9": 1340.6763, "epoch": 0.14, "inbatch_neg_score": 49.5673, "inbatch_pos_score": 50.0625, "learning_rate": 4.7777777777777784e-05, "loss": 2.5716, "norm_diff": 0.0169, "num_tokens_overlap": 5.5828, "num_tokens_union": 55.0655, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1974.3103, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1239, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.739, "sent_len_1": 66.8645, "sent_len_max_0": 18.8487, "sent_len_max_1": 188.305, "stdk": 0.0403, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 14000 }, { "accuracy": 52.9297, "doc_norm": 7.1406, "encoder_q-embeddings": 2489.8975, "encoder_q-layer.0": 1702.1724, "encoder_q-layer.1": 1716.3889, "encoder_q-layer.10": 3419.5759, "encoder_q-layer.11": 5842.333, "encoder_q-layer.2": 1802.249, "encoder_q-layer.3": 1788.3804, "encoder_q-layer.4": 1769.6108, "encoder_q-layer.5": 1777.3962, "encoder_q-layer.6": 1936.9609, "encoder_q-layer.7": 2027.5269, "encoder_q-layer.8": 2301.2144, "encoder_q-layer.9": 2383.0251, "epoch": 0.14, "inbatch_neg_score": 49.569, "inbatch_pos_score": 50.0625, "learning_rate": 4.7722222222222226e-05, "loss": 2.6233, "norm_diff": 0.0139, "num_tokens_overlap": 5.5801, "num_tokens_union": 55.0312, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3761.9477, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1268, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7208, "sent_len_1": 66.7783, "sent_len_max_0": 18.9013, "sent_len_max_1": 190.5163, "stdk": 0.0403, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 14100 }, { "accuracy": 48.7305, "doc_norm": 7.1354, "encoder_q-embeddings": 2817.3413, "encoder_q-layer.0": 1855.8185, "encoder_q-layer.1": 1862.7574, "encoder_q-layer.10": 4069.5813, "encoder_q-layer.11": 7086.9365, "encoder_q-layer.2": 2058.0378, "encoder_q-layer.3": 2020.0825, "encoder_q-layer.4": 2082.825, "encoder_q-layer.5": 2135.3489, "encoder_q-layer.6": 2323.7063, "encoder_q-layer.7": 2452.3835, "encoder_q-layer.8": 2963.051, "encoder_q-layer.9": 3126.0857, "epoch": 0.14, "inbatch_neg_score": 49.4983, "inbatch_pos_score": 49.9688, "learning_rate": 4.766666666666667e-05, "loss": 2.5652, "norm_diff": 0.0144, "num_tokens_overlap": 5.575, "num_tokens_union": 54.9126, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4408.9279, "preclip_grad_norm_avg": 0.0, "query_norm": 7.121, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.706, "sent_len_1": 66.6228, "sent_len_max_0": 18.8562, "sent_len_max_1": 188.4563, "stdk": 0.0402, "stdq": 0.0399, "stdqueue_k": 0.0, "step": 14200 }, { "accuracy": 50.7812, "doc_norm": 7.1331, "encoder_q-embeddings": 2591.2659, "encoder_q-layer.0": 1757.4897, "encoder_q-layer.1": 1764.7307, "encoder_q-layer.10": 3045.4192, "encoder_q-layer.11": 5663.541, "encoder_q-layer.2": 1889.7596, "encoder_q-layer.3": 1944.5322, "encoder_q-layer.4": 1954.8513, "encoder_q-layer.5": 1933.011, "encoder_q-layer.6": 2077.958, "encoder_q-layer.7": 2162.137, "encoder_q-layer.8": 2445.5017, "encoder_q-layer.9": 2370.3254, "epoch": 0.14, "inbatch_neg_score": 49.4479, "inbatch_pos_score": 49.9062, "learning_rate": 4.761111111111111e-05, "loss": 2.5953, "norm_diff": 0.0152, "num_tokens_overlap": 5.5838, "num_tokens_union": 54.9663, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3806.8764, "preclip_grad_norm_avg": 0.0, "query_norm": 7.118, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7329, "sent_len_1": 66.7021, "sent_len_max_0": 18.9075, "sent_len_max_1": 188.0662, "stdk": 0.0418, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 14300 }, { "accuracy": 53.9062, "doc_norm": 7.1331, "encoder_q-embeddings": 2455.8291, "encoder_q-layer.0": 1705.6812, "encoder_q-layer.1": 1672.1624, "encoder_q-layer.10": 3110.1729, "encoder_q-layer.11": 5401.4385, "encoder_q-layer.2": 1791.5056, "encoder_q-layer.3": 1851.0089, "encoder_q-layer.4": 1925.6304, "encoder_q-layer.5": 1824.884, "encoder_q-layer.6": 1961.4705, "encoder_q-layer.7": 2076.2925, "encoder_q-layer.8": 2376.9229, "encoder_q-layer.9": 2305.8833, "epoch": 0.14, "inbatch_neg_score": 49.4221, "inbatch_pos_score": 49.9062, "learning_rate": 4.755555555555556e-05, "loss": 2.5579, "norm_diff": 0.018, "num_tokens_overlap": 5.5819, "num_tokens_union": 55.0816, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3666.1793, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1151, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7296, "sent_len_1": 66.902, "sent_len_max_0": 18.8175, "sent_len_max_1": 192.255, "stdk": 0.0405, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 14400 }, { "accuracy": 51.3672, "doc_norm": 7.1349, "encoder_q-embeddings": 2429.0967, "encoder_q-layer.0": 1694.5999, "encoder_q-layer.1": 1669.1107, "encoder_q-layer.10": 3238.8931, "encoder_q-layer.11": 5763.0684, "encoder_q-layer.2": 1797.572, "encoder_q-layer.3": 1790.2, "encoder_q-layer.4": 1789.0123, "encoder_q-layer.5": 1746.5349, "encoder_q-layer.6": 1903.3696, "encoder_q-layer.7": 2079.1797, "encoder_q-layer.8": 2435.4465, "encoder_q-layer.9": 2328.936, "epoch": 0.14, "inbatch_neg_score": 49.5258, "inbatch_pos_score": 50.0, "learning_rate": 4.75e-05, "loss": 2.5757, "norm_diff": 0.0135, "num_tokens_overlap": 5.5768, "num_tokens_union": 54.9852, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3701.7662, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1214, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7303, "sent_len_1": 66.7345, "sent_len_max_0": 18.8113, "sent_len_max_1": 190.7363, "stdk": 0.0393, "stdq": 0.039, "stdqueue_k": 0.0, "step": 14500 }, { "accuracy": 55.7617, "doc_norm": 7.1404, "encoder_q-embeddings": 2546.3315, "encoder_q-layer.0": 1719.7313, "encoder_q-layer.1": 1683.1299, "encoder_q-layer.10": 4235.0327, "encoder_q-layer.11": 6435.9756, "encoder_q-layer.2": 1827.6981, "encoder_q-layer.3": 1801.7747, "encoder_q-layer.4": 1967.59, "encoder_q-layer.5": 1918.001, "encoder_q-layer.6": 2102.1165, "encoder_q-layer.7": 2264.552, "encoder_q-layer.8": 2496.8982, "encoder_q-layer.9": 2500.0049, "epoch": 0.14, "inbatch_neg_score": 49.5678, "inbatch_pos_score": 50.0625, "learning_rate": 4.7444444444444445e-05, "loss": 2.5936, "norm_diff": 0.0167, "num_tokens_overlap": 5.5826, "num_tokens_union": 55.0495, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4085.9326, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1237, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7422, "sent_len_1": 66.7664, "sent_len_max_0": 19.01, "sent_len_max_1": 188.275, "stdk": 0.0403, "stdq": 0.0379, "stdqueue_k": 0.0, "step": 14600 }, { "accuracy": 54.7852, "doc_norm": 7.1343, "encoder_q-embeddings": 2673.0137, "encoder_q-layer.0": 1725.1072, "encoder_q-layer.1": 1754.9661, "encoder_q-layer.10": 3086.197, "encoder_q-layer.11": 6038.9316, "encoder_q-layer.2": 1885.0287, "encoder_q-layer.3": 1916.4366, "encoder_q-layer.4": 2006.1138, "encoder_q-layer.5": 1906.6727, "encoder_q-layer.6": 2136.5608, "encoder_q-layer.7": 2231.5103, "encoder_q-layer.8": 2498.6482, "encoder_q-layer.9": 2391.8918, "epoch": 0.14, "inbatch_neg_score": 49.4788, "inbatch_pos_score": 49.9688, "learning_rate": 4.7388888888888894e-05, "loss": 2.5556, "norm_diff": 0.0158, "num_tokens_overlap": 5.5893, "num_tokens_union": 55.0488, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3931.806, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1185, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7362, "sent_len_1": 66.7967, "sent_len_max_0": 18.785, "sent_len_max_1": 189.015, "stdk": 0.0403, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 14700 }, { "accuracy": 53.8086, "doc_norm": 7.133, "encoder_q-embeddings": 2667.1528, "encoder_q-layer.0": 1742.8892, "encoder_q-layer.1": 1736.548, "encoder_q-layer.10": 3313.001, "encoder_q-layer.11": 6302.5928, "encoder_q-layer.2": 1837.8602, "encoder_q-layer.3": 1888.4104, "encoder_q-layer.4": 1898.4485, "encoder_q-layer.5": 1832.3129, "encoder_q-layer.6": 1951.7314, "encoder_q-layer.7": 2112.2212, "encoder_q-layer.8": 2523.7805, "encoder_q-layer.9": 2439.3596, "epoch": 0.14, "inbatch_neg_score": 49.4636, "inbatch_pos_score": 49.9375, "learning_rate": 4.7333333333333336e-05, "loss": 2.5996, "norm_diff": 0.0142, "num_tokens_overlap": 5.5866, "num_tokens_union": 54.9916, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3910.0205, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1188, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7369, "sent_len_1": 66.7706, "sent_len_max_0": 18.8725, "sent_len_max_1": 189.84, "stdk": 0.04, "stdq": 0.04, "stdqueue_k": 0.0, "step": 14800 }, { "accuracy": 54.1016, "doc_norm": 7.1274, "encoder_q-embeddings": 2515.8093, "encoder_q-layer.0": 1721.8516, "encoder_q-layer.1": 1729.2566, "encoder_q-layer.10": 2926.4756, "encoder_q-layer.11": 5353.9238, "encoder_q-layer.2": 1817.5071, "encoder_q-layer.3": 1826.9761, "encoder_q-layer.4": 1877.165, "encoder_q-layer.5": 1845.8721, "encoder_q-layer.6": 1974.9091, "encoder_q-layer.7": 2194.8596, "encoder_q-layer.8": 2507.679, "encoder_q-layer.9": 2231.6567, "epoch": 0.15, "inbatch_neg_score": 49.3448, "inbatch_pos_score": 49.8438, "learning_rate": 4.727777777777778e-05, "loss": 2.6126, "norm_diff": 0.0154, "num_tokens_overlap": 5.5772, "num_tokens_union": 54.903, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3700.6017, "preclip_grad_norm_avg": 0.0, "query_norm": 7.112, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.704, "sent_len_1": 66.6444, "sent_len_max_0": 18.7913, "sent_len_max_1": 189.4437, "stdk": 0.0406, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 14900 }, { "accuracy": 52.9297, "doc_norm": 7.1216, "encoder_q-embeddings": 2544.5305, "encoder_q-layer.0": 1713.2043, "encoder_q-layer.1": 1720.5449, "encoder_q-layer.10": 2904.1824, "encoder_q-layer.11": 5544.6147, "encoder_q-layer.2": 1852.0741, "encoder_q-layer.3": 1857.6942, "encoder_q-layer.4": 1897.9091, "encoder_q-layer.5": 1805.6946, "encoder_q-layer.6": 1976.7334, "encoder_q-layer.7": 2050.5137, "encoder_q-layer.8": 2373.8398, "encoder_q-layer.9": 2253.2358, "epoch": 0.15, "inbatch_neg_score": 49.251, "inbatch_pos_score": 49.75, "learning_rate": 4.722222222222222e-05, "loss": 2.5712, "norm_diff": 0.0198, "num_tokens_overlap": 5.5747, "num_tokens_union": 55.0089, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3707.7621, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1018, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6999, "sent_len_1": 66.7792, "sent_len_max_0": 18.81, "sent_len_max_1": 190.4712, "stdk": 0.041, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 15000 }, { "accuracy": 49.4141, "doc_norm": 7.118, "encoder_q-embeddings": 2534.8276, "encoder_q-layer.0": 1743.6593, "encoder_q-layer.1": 1756.314, "encoder_q-layer.10": 3125.5798, "encoder_q-layer.11": 5997.8452, "encoder_q-layer.2": 1964.9622, "encoder_q-layer.3": 1983.7799, "encoder_q-layer.4": 1973.0143, "encoder_q-layer.5": 1964.4264, "encoder_q-layer.6": 2087.4111, "encoder_q-layer.7": 2113.7729, "encoder_q-layer.8": 2418.1189, "encoder_q-layer.9": 2346.9299, "epoch": 0.15, "inbatch_neg_score": 49.2495, "inbatch_pos_score": 49.7188, "learning_rate": 4.716666666666667e-05, "loss": 2.566, "norm_diff": 0.0147, "num_tokens_overlap": 5.5871, "num_tokens_union": 55.1277, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3905.4542, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1033, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7412, "sent_len_1": 66.9618, "sent_len_max_0": 18.8012, "sent_len_max_1": 189.89, "stdk": 0.0404, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 15100 }, { "accuracy": 54.4922, "doc_norm": 7.1247, "encoder_q-embeddings": 2455.1147, "encoder_q-layer.0": 1689.6831, "encoder_q-layer.1": 1716.9476, "encoder_q-layer.10": 3507.6731, "encoder_q-layer.11": 5636.0879, "encoder_q-layer.2": 1883.9847, "encoder_q-layer.3": 1864.969, "encoder_q-layer.4": 1860.2996, "encoder_q-layer.5": 1802.3035, "encoder_q-layer.6": 1906.1583, "encoder_q-layer.7": 2121.0134, "encoder_q-layer.8": 2407.637, "encoder_q-layer.9": 2311.1687, "epoch": 0.15, "inbatch_neg_score": 49.2704, "inbatch_pos_score": 49.75, "learning_rate": 4.711111111111111e-05, "loss": 2.585, "norm_diff": 0.0202, "num_tokens_overlap": 5.5779, "num_tokens_union": 54.9242, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3736.7206, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1045, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7205, "sent_len_1": 66.6646, "sent_len_max_0": 18.9037, "sent_len_max_1": 188.8113, "stdk": 0.0429, "stdq": 0.0401, "stdqueue_k": 0.0, "step": 15200 }, { "accuracy": 53.2227, "doc_norm": 7.1268, "encoder_q-embeddings": 2413.7197, "encoder_q-layer.0": 1632.7632, "encoder_q-layer.1": 1635.1232, "encoder_q-layer.10": 4421.9756, "encoder_q-layer.11": 6258.2036, "encoder_q-layer.2": 1806.8408, "encoder_q-layer.3": 1806.3738, "encoder_q-layer.4": 1966.699, "encoder_q-layer.5": 1810.5227, "encoder_q-layer.6": 2077.9548, "encoder_q-layer.7": 2216.0112, "encoder_q-layer.8": 2627.8384, "encoder_q-layer.9": 2783.6604, "epoch": 0.15, "inbatch_neg_score": 49.3979, "inbatch_pos_score": 49.8438, "learning_rate": 4.7055555555555555e-05, "loss": 2.5338, "norm_diff": 0.0166, "num_tokens_overlap": 5.5796, "num_tokens_union": 54.8775, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4052.0137, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1103, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7405, "sent_len_1": 66.5063, "sent_len_max_0": 18.8025, "sent_len_max_1": 188.9075, "stdk": 0.0399, "stdq": 0.039, "stdqueue_k": 0.0, "step": 15300 }, { "accuracy": 52.4414, "doc_norm": 7.1198, "encoder_q-embeddings": 2527.0476, "encoder_q-layer.0": 1674.4741, "encoder_q-layer.1": 1698.1471, "encoder_q-layer.10": 2984.9294, "encoder_q-layer.11": 5378.9033, "encoder_q-layer.2": 1799.7922, "encoder_q-layer.3": 1853.6077, "encoder_q-layer.4": 1822.6183, "encoder_q-layer.5": 1754.62, "encoder_q-layer.6": 1959.6108, "encoder_q-layer.7": 2016.6484, "encoder_q-layer.8": 2247.5454, "encoder_q-layer.9": 2173.2727, "epoch": 0.15, "inbatch_neg_score": 49.3073, "inbatch_pos_score": 49.7812, "learning_rate": 4.7e-05, "loss": 2.5619, "norm_diff": 0.0133, "num_tokens_overlap": 5.5656, "num_tokens_union": 55.0631, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3632.7189, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1065, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6966, "sent_len_1": 66.9086, "sent_len_max_0": 18.9037, "sent_len_max_1": 192.2063, "stdk": 0.0398, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 15400 }, { "accuracy": 49.3164, "doc_norm": 7.1207, "encoder_q-embeddings": 2543.5217, "encoder_q-layer.0": 1805.8702, "encoder_q-layer.1": 1756.9478, "encoder_q-layer.10": 4060.2329, "encoder_q-layer.11": 6394.5845, "encoder_q-layer.2": 1881.7161, "encoder_q-layer.3": 1868.9827, "encoder_q-layer.4": 1893.3849, "encoder_q-layer.5": 1847.5919, "encoder_q-layer.6": 2040.0475, "encoder_q-layer.7": 2088.1609, "encoder_q-layer.8": 2380.9287, "encoder_q-layer.9": 2543.5293, "epoch": 0.15, "inbatch_neg_score": 49.2853, "inbatch_pos_score": 49.75, "learning_rate": 4.6944444444444446e-05, "loss": 2.5534, "norm_diff": 0.017, "num_tokens_overlap": 5.5775, "num_tokens_union": 55.0996, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4042.1961, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1037, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7245, "sent_len_1": 66.9391, "sent_len_max_0": 18.9887, "sent_len_max_1": 191.6325, "stdk": 0.0405, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 15500 }, { "accuracy": 51.5625, "doc_norm": 7.1288, "encoder_q-embeddings": 2464.8533, "encoder_q-layer.0": 1662.5681, "encoder_q-layer.1": 1702.9423, "encoder_q-layer.10": 2876.5522, "encoder_q-layer.11": 5630.2651, "encoder_q-layer.2": 1795.2223, "encoder_q-layer.3": 1807.1348, "encoder_q-layer.4": 1843.9805, "encoder_q-layer.5": 1800.4487, "encoder_q-layer.6": 1997.8177, "encoder_q-layer.7": 2059.4966, "encoder_q-layer.8": 2384.166, "encoder_q-layer.9": 2137.8701, "epoch": 0.15, "inbatch_neg_score": 49.3968, "inbatch_pos_score": 49.875, "learning_rate": 4.6888888888888895e-05, "loss": 2.5534, "norm_diff": 0.0141, "num_tokens_overlap": 5.5857, "num_tokens_union": 54.9641, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3666.4726, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1148, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7012, "sent_len_1": 66.68, "sent_len_max_0": 18.8337, "sent_len_max_1": 187.7163, "stdk": 0.0407, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 15600 }, { "accuracy": 50.8789, "doc_norm": 7.1245, "encoder_q-embeddings": 2594.8013, "encoder_q-layer.0": 1778.8928, "encoder_q-layer.1": 1765.9014, "encoder_q-layer.10": 3132.2605, "encoder_q-layer.11": 5551.7422, "encoder_q-layer.2": 1858.8822, "encoder_q-layer.3": 1871.7451, "encoder_q-layer.4": 1925.9404, "encoder_q-layer.5": 1903.39, "encoder_q-layer.6": 1959.7721, "encoder_q-layer.7": 2155.5786, "encoder_q-layer.8": 2558.4341, "encoder_q-layer.9": 2245.7478, "epoch": 0.15, "inbatch_neg_score": 49.3291, "inbatch_pos_score": 49.8125, "learning_rate": 4.683333333333334e-05, "loss": 2.5793, "norm_diff": 0.0161, "num_tokens_overlap": 5.587, "num_tokens_union": 55.0764, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3748.6526, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1084, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7354, "sent_len_1": 66.8245, "sent_len_max_0": 18.7937, "sent_len_max_1": 189.7125, "stdk": 0.0405, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 15700 }, { "accuracy": 51.3672, "doc_norm": 7.1223, "encoder_q-embeddings": 2484.7874, "encoder_q-layer.0": 1709.1234, "encoder_q-layer.1": 1717.8668, "encoder_q-layer.10": 3202.6455, "encoder_q-layer.11": 5574.2725, "encoder_q-layer.2": 1864.1536, "encoder_q-layer.3": 1889.1438, "encoder_q-layer.4": 1894.7894, "encoder_q-layer.5": 1804.3091, "encoder_q-layer.6": 2009.5173, "encoder_q-layer.7": 2119.2856, "encoder_q-layer.8": 2482.9565, "encoder_q-layer.9": 2433.8936, "epoch": 0.15, "inbatch_neg_score": 49.2799, "inbatch_pos_score": 49.75, "learning_rate": 4.677777777777778e-05, "loss": 2.5099, "norm_diff": 0.0172, "num_tokens_overlap": 5.5956, "num_tokens_union": 55.1182, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3693.0914, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1051, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7487, "sent_len_1": 66.9437, "sent_len_max_0": 18.8975, "sent_len_max_1": 189.3338, "stdk": 0.041, "stdq": 0.0398, "stdqueue_k": 0.0, "step": 15800 }, { "accuracy": 53.418, "doc_norm": 7.1285, "encoder_q-embeddings": 2449.4097, "encoder_q-layer.0": 1737.5305, "encoder_q-layer.1": 1749.0002, "encoder_q-layer.10": 3061.2917, "encoder_q-layer.11": 6041.0166, "encoder_q-layer.2": 1867.7987, "encoder_q-layer.3": 1921.0359, "encoder_q-layer.4": 1978.11, "encoder_q-layer.5": 1964.4609, "encoder_q-layer.6": 2184.0198, "encoder_q-layer.7": 2242.9148, "encoder_q-layer.8": 2464.1655, "encoder_q-layer.9": 2328.4514, "epoch": 0.16, "inbatch_neg_score": 49.4227, "inbatch_pos_score": 49.875, "learning_rate": 4.672222222222222e-05, "loss": 2.5515, "norm_diff": 0.0151, "num_tokens_overlap": 5.5885, "num_tokens_union": 55.0884, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3808.0367, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1134, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7291, "sent_len_1": 66.9053, "sent_len_max_0": 18.9713, "sent_len_max_1": 190.2675, "stdk": 0.0397, "stdq": 0.039, "stdqueue_k": 0.0, "step": 15900 }, { "accuracy": 52.2461, "doc_norm": 7.1337, "encoder_q-embeddings": 2634.0183, "encoder_q-layer.0": 1805.1871, "encoder_q-layer.1": 1811.5055, "encoder_q-layer.10": 2952.1892, "encoder_q-layer.11": 5329.5913, "encoder_q-layer.2": 1883.3745, "encoder_q-layer.3": 1928.2606, "encoder_q-layer.4": 2006.9033, "encoder_q-layer.5": 1978.4286, "encoder_q-layer.6": 2068.6187, "encoder_q-layer.7": 2182.2671, "encoder_q-layer.8": 2373.5837, "encoder_q-layer.9": 2209.4817, "epoch": 0.16, "inbatch_neg_score": 49.3843, "inbatch_pos_score": 49.875, "learning_rate": 4.666666666666667e-05, "loss": 2.5521, "norm_diff": 0.0194, "num_tokens_overlap": 5.5797, "num_tokens_union": 55.1265, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3723.7443, "preclip_grad_norm_avg": 0.0, "query_norm": 7.1144, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7178, "sent_len_1": 66.9979, "sent_len_max_0": 18.8762, "sent_len_max_1": 190.685, "stdk": 0.0415, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 16000 }, { "accuracy": 54.9805, "doc_norm": 7.1246, "encoder_q-embeddings": 4793.8652, "encoder_q-layer.0": 3352.4434, "encoder_q-layer.1": 3400.2495, "encoder_q-layer.10": 6131.9219, "encoder_q-layer.11": 11882.7207, "encoder_q-layer.2": 3601.0317, "encoder_q-layer.3": 3592.6443, "encoder_q-layer.4": 3736.0977, "encoder_q-layer.5": 3716.3679, "encoder_q-layer.6": 3930.7112, "encoder_q-layer.7": 4189.7148, "encoder_q-layer.8": 4522.7461, "encoder_q-layer.9": 4503.6509, "epoch": 0.16, "inbatch_neg_score": 49.2993, "inbatch_pos_score": 49.7812, "learning_rate": 4.6611111111111114e-05, "loss": 2.5826, "norm_diff": 0.0186, "num_tokens_overlap": 5.5763, "num_tokens_union": 54.9961, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7514.5949, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.106, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7162, "sent_len_1": 66.7941, "sent_len_max_0": 18.92, "sent_len_max_1": 189.7038, "stdk": 0.0403, "stdq": 0.0379, "stdqueue_k": 0.0, "step": 16100 }, { "accuracy": 53.418, "doc_norm": 7.121, "encoder_q-embeddings": 5150.0859, "encoder_q-layer.0": 3502.366, "encoder_q-layer.1": 3555.6353, "encoder_q-layer.10": 5933.061, "encoder_q-layer.11": 11513.2637, "encoder_q-layer.2": 3643.1487, "encoder_q-layer.3": 3669.8823, "encoder_q-layer.4": 3749.5217, "encoder_q-layer.5": 3585.2607, "encoder_q-layer.6": 3812.4697, "encoder_q-layer.7": 4213.9346, "encoder_q-layer.8": 4615.7432, "encoder_q-layer.9": 4398.8052, "epoch": 0.16, "inbatch_neg_score": 49.3087, "inbatch_pos_score": 49.7812, "learning_rate": 4.6555555555555556e-05, "loss": 2.5321, "norm_diff": 0.0137, "num_tokens_overlap": 5.5644, "num_tokens_union": 54.8663, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7517.3277, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.1073, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6899, "sent_len_1": 66.5775, "sent_len_max_0": 18.8388, "sent_len_max_1": 189.3013, "stdk": 0.0405, "stdq": 0.0398, "stdqueue_k": 0.0, "step": 16200 }, { "accuracy": 56.1523, "doc_norm": 7.1171, "encoder_q-embeddings": 4929.5776, "encoder_q-layer.0": 3371.6772, "encoder_q-layer.1": 3511.845, "encoder_q-layer.10": 5851.1938, "encoder_q-layer.11": 10194.1064, "encoder_q-layer.2": 3733.8003, "encoder_q-layer.3": 3713.2759, "encoder_q-layer.4": 3707.7085, "encoder_q-layer.5": 3582.4617, "encoder_q-layer.6": 3671.5928, "encoder_q-layer.7": 3851.9055, "encoder_q-layer.8": 4209.4043, "encoder_q-layer.9": 4183.3101, "epoch": 0.16, "inbatch_neg_score": 49.1789, "inbatch_pos_score": 49.6875, "learning_rate": 4.6500000000000005e-05, "loss": 2.5708, "norm_diff": 0.0184, "num_tokens_overlap": 5.5746, "num_tokens_union": 54.9326, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7075.1093, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0987, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7138, "sent_len_1": 66.6563, "sent_len_max_0": 18.77, "sent_len_max_1": 188.9363, "stdk": 0.0421, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 16300 }, { "accuracy": 52.832, "doc_norm": 7.1171, "encoder_q-embeddings": 5094.6035, "encoder_q-layer.0": 3435.1348, "encoder_q-layer.1": 3457.7585, "encoder_q-layer.10": 6458.4956, "encoder_q-layer.11": 12057.8447, "encoder_q-layer.2": 3733.9849, "encoder_q-layer.3": 3651.3796, "encoder_q-layer.4": 3790.4578, "encoder_q-layer.5": 3669.2742, "encoder_q-layer.6": 4004.2998, "encoder_q-layer.7": 4305.125, "encoder_q-layer.8": 4642.8706, "encoder_q-layer.9": 4681.6479, "epoch": 0.16, "inbatch_neg_score": 49.208, "inbatch_pos_score": 49.6562, "learning_rate": 4.644444444444445e-05, "loss": 2.5453, "norm_diff": 0.0187, "num_tokens_overlap": 5.5686, "num_tokens_union": 54.9556, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7602.3207, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0984, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7229, "sent_len_1": 66.6759, "sent_len_max_0": 18.8463, "sent_len_max_1": 190.3088, "stdk": 0.0407, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 16400 }, { "accuracy": 54.2969, "doc_norm": 7.1152, "encoder_q-embeddings": 5236.9893, "encoder_q-layer.0": 3578.5972, "encoder_q-layer.1": 3613.9192, "encoder_q-layer.10": 7522.7734, "encoder_q-layer.11": 13152.5908, "encoder_q-layer.2": 3982.6545, "encoder_q-layer.3": 4048.8877, "encoder_q-layer.4": 4165.5859, "encoder_q-layer.5": 4183.936, "encoder_q-layer.6": 4440.1616, "encoder_q-layer.7": 4869.6562, "encoder_q-layer.8": 5406.6274, "encoder_q-layer.9": 5290.0645, "epoch": 0.16, "inbatch_neg_score": 49.1681, "inbatch_pos_score": 49.6562, "learning_rate": 4.638888888888889e-05, "loss": 2.5058, "norm_diff": 0.0165, "num_tokens_overlap": 5.5723, "num_tokens_union": 54.8992, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8193.9128, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0986, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7075, "sent_len_1": 66.6421, "sent_len_max_0": 18.89, "sent_len_max_1": 190.905, "stdk": 0.0416, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 16500 }, { "accuracy": 53.6133, "doc_norm": 7.105, "encoder_q-embeddings": 4967.4082, "encoder_q-layer.0": 3369.2471, "encoder_q-layer.1": 3373.5525, "encoder_q-layer.10": 6186.9824, "encoder_q-layer.11": 11385.5859, "encoder_q-layer.2": 3554.3311, "encoder_q-layer.3": 3522.0227, "encoder_q-layer.4": 3672.5964, "encoder_q-layer.5": 3544.405, "encoder_q-layer.6": 4093.0063, "encoder_q-layer.7": 4584.6504, "encoder_q-layer.8": 4749.3931, "encoder_q-layer.9": 4497.2129, "epoch": 0.16, "inbatch_neg_score": 49.044, "inbatch_pos_score": 49.5312, "learning_rate": 4.633333333333333e-05, "loss": 2.5376, "norm_diff": 0.0174, "num_tokens_overlap": 5.5873, "num_tokens_union": 55.1531, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7381.7364, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0876, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7178, "sent_len_1": 67.0037, "sent_len_max_0": 18.8212, "sent_len_max_1": 187.5725, "stdk": 0.041, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 16600 }, { "accuracy": 50.5859, "doc_norm": 7.1107, "encoder_q-embeddings": 4656.874, "encoder_q-layer.0": 3312.4082, "encoder_q-layer.1": 3306.9224, "encoder_q-layer.10": 6829.9995, "encoder_q-layer.11": 11246.459, "encoder_q-layer.2": 3654.6187, "encoder_q-layer.3": 3734.9277, "encoder_q-layer.4": 3824.4653, "encoder_q-layer.5": 3774.4661, "encoder_q-layer.6": 4157.3159, "encoder_q-layer.7": 4548.9121, "encoder_q-layer.8": 5072.5146, "encoder_q-layer.9": 4714.29, "epoch": 0.16, "inbatch_neg_score": 49.0943, "inbatch_pos_score": 49.5625, "learning_rate": 4.627777777777778e-05, "loss": 2.5527, "norm_diff": 0.0175, "num_tokens_overlap": 5.5749, "num_tokens_union": 54.9913, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7513.2068, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0932, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7089, "sent_len_1": 66.7484, "sent_len_max_0": 19.0675, "sent_len_max_1": 192.2837, "stdk": 0.0408, "stdq": 0.0399, "stdqueue_k": 0.0, "step": 16700 }, { "accuracy": 53.3203, "doc_norm": 7.1205, "encoder_q-embeddings": 5256.7871, "encoder_q-layer.0": 3581.1985, "encoder_q-layer.1": 3617.7236, "encoder_q-layer.10": 10953.7393, "encoder_q-layer.11": 15750.4434, "encoder_q-layer.2": 4009.3103, "encoder_q-layer.3": 4015.3242, "encoder_q-layer.4": 4162.4229, "encoder_q-layer.5": 4141.9214, "encoder_q-layer.6": 4599.6753, "encoder_q-layer.7": 5043.1348, "encoder_q-layer.8": 6511.583, "encoder_q-layer.9": 6736.1758, "epoch": 0.16, "inbatch_neg_score": 49.1599, "inbatch_pos_score": 49.6562, "learning_rate": 4.6222222222222224e-05, "loss": 2.545, "norm_diff": 0.02, "num_tokens_overlap": 5.5712, "num_tokens_union": 54.8986, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9532.3822, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.1005, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.722, "sent_len_1": 66.6135, "sent_len_max_0": 18.9025, "sent_len_max_1": 188.1337, "stdk": 0.0413, "stdq": 0.0398, "stdqueue_k": 0.0, "step": 16800 }, { "accuracy": 52.6367, "doc_norm": 7.1055, "encoder_q-embeddings": 5577.1411, "encoder_q-layer.0": 3616.5186, "encoder_q-layer.1": 3639.2615, "encoder_q-layer.10": 6735.1602, "encoder_q-layer.11": 12491.7227, "encoder_q-layer.2": 3840.6758, "encoder_q-layer.3": 3787.8877, "encoder_q-layer.4": 3828.0977, "encoder_q-layer.5": 3836.8774, "encoder_q-layer.6": 4270.6401, "encoder_q-layer.7": 4627.873, "encoder_q-layer.8": 5658.0977, "encoder_q-layer.9": 4950.2104, "epoch": 0.16, "inbatch_neg_score": 49.0216, "inbatch_pos_score": 49.5, "learning_rate": 4.6166666666666666e-05, "loss": 2.5179, "norm_diff": 0.0186, "num_tokens_overlap": 5.5785, "num_tokens_union": 55.1057, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8055.1614, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0868, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7045, "sent_len_1": 66.918, "sent_len_max_0": 18.9075, "sent_len_max_1": 187.405, "stdk": 0.0406, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 16900 }, { "accuracy": 56.1523, "doc_norm": 7.0987, "encoder_q-embeddings": 4888.165, "encoder_q-layer.0": 3247.1477, "encoder_q-layer.1": 3411.9573, "encoder_q-layer.10": 6751.1201, "encoder_q-layer.11": 13349.624, "encoder_q-layer.2": 3682.2195, "encoder_q-layer.3": 3652.4092, "encoder_q-layer.4": 3881.4814, "encoder_q-layer.5": 3845.8042, "encoder_q-layer.6": 4202.3066, "encoder_q-layer.7": 4439.9429, "encoder_q-layer.8": 4946.999, "encoder_q-layer.9": 4826.459, "epoch": 0.17, "inbatch_neg_score": 48.8899, "inbatch_pos_score": 49.4062, "learning_rate": 4.6111111111111115e-05, "loss": 2.5477, "norm_diff": 0.0176, "num_tokens_overlap": 5.5658, "num_tokens_union": 54.8674, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7910.3503, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0811, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7146, "sent_len_1": 66.5162, "sent_len_max_0": 18.8938, "sent_len_max_1": 187.3575, "stdk": 0.0413, "stdq": 0.0403, "stdqueue_k": 0.0, "step": 17000 }, { "accuracy": 52.5391, "doc_norm": 7.0876, "encoder_q-embeddings": 4962.0283, "encoder_q-layer.0": 3428.6484, "encoder_q-layer.1": 3430.2451, "encoder_q-layer.10": 7086.0391, "encoder_q-layer.11": 11292.5771, "encoder_q-layer.2": 3791.2295, "encoder_q-layer.3": 3767.8916, "encoder_q-layer.4": 3866.4443, "encoder_q-layer.5": 3989.1929, "encoder_q-layer.6": 4282.4907, "encoder_q-layer.7": 4574.8237, "encoder_q-layer.8": 4879.001, "encoder_q-layer.9": 4738.8862, "epoch": 0.17, "inbatch_neg_score": 48.8787, "inbatch_pos_score": 49.3125, "learning_rate": 4.605555555555556e-05, "loss": 2.5303, "norm_diff": 0.015, "num_tokens_overlap": 5.5601, "num_tokens_union": 54.9102, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7554.8865, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0726, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6831, "sent_len_1": 66.6885, "sent_len_max_0": 18.8287, "sent_len_max_1": 190.5637, "stdk": 0.0394, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 17100 }, { "accuracy": 54.2969, "doc_norm": 7.0967, "encoder_q-embeddings": 4735.9229, "encoder_q-layer.0": 3278.8008, "encoder_q-layer.1": 3353.6438, "encoder_q-layer.10": 5442.8896, "encoder_q-layer.11": 10565.5527, "encoder_q-layer.2": 3626.1697, "encoder_q-layer.3": 3598.4094, "encoder_q-layer.4": 3693.1772, "encoder_q-layer.5": 3530.8027, "encoder_q-layer.6": 3685.8372, "encoder_q-layer.7": 4079.7058, "encoder_q-layer.8": 4445.9614, "encoder_q-layer.9": 4138.6138, "epoch": 0.17, "inbatch_neg_score": 48.9172, "inbatch_pos_score": 49.4062, "learning_rate": 4.600000000000001e-05, "loss": 2.5114, "norm_diff": 0.0172, "num_tokens_overlap": 5.5886, "num_tokens_union": 55.0162, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7052.6807, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0795, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7318, "sent_len_1": 66.832, "sent_len_max_0": 18.8975, "sent_len_max_1": 192.0062, "stdk": 0.0406, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 17200 }, { "accuracy": 51.3672, "doc_norm": 7.0995, "encoder_q-embeddings": 5538.3833, "encoder_q-layer.0": 3606.1135, "encoder_q-layer.1": 3616.2847, "encoder_q-layer.10": 6926.8296, "encoder_q-layer.11": 12538.6289, "encoder_q-layer.2": 3972.6467, "encoder_q-layer.3": 3951.7502, "encoder_q-layer.4": 3982.1025, "encoder_q-layer.5": 3813.6182, "encoder_q-layer.6": 4164.6035, "encoder_q-layer.7": 4537.1582, "encoder_q-layer.8": 4985.5493, "encoder_q-layer.9": 5254.6807, "epoch": 0.17, "inbatch_neg_score": 48.8771, "inbatch_pos_score": 49.375, "learning_rate": 4.594444444444444e-05, "loss": 2.5322, "norm_diff": 0.0221, "num_tokens_overlap": 5.5766, "num_tokens_union": 55.0083, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8077.4746, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0774, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7179, "sent_len_1": 66.7627, "sent_len_max_0": 18.7963, "sent_len_max_1": 188.4625, "stdk": 0.0421, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 17300 }, { "accuracy": 52.4414, "doc_norm": 7.0882, "encoder_q-embeddings": 5120.2188, "encoder_q-layer.0": 3501.0315, "encoder_q-layer.1": 3500.5996, "encoder_q-layer.10": 7673.4385, "encoder_q-layer.11": 11887.877, "encoder_q-layer.2": 3805.9753, "encoder_q-layer.3": 3776.1218, "encoder_q-layer.4": 3726.6594, "encoder_q-layer.5": 3667.8313, "encoder_q-layer.6": 3860.2961, "encoder_q-layer.7": 4395.3276, "encoder_q-layer.8": 5143.1982, "encoder_q-layer.9": 4815.2295, "epoch": 0.17, "inbatch_neg_score": 48.8261, "inbatch_pos_score": 49.2812, "learning_rate": 4.588888888888889e-05, "loss": 2.532, "norm_diff": 0.0196, "num_tokens_overlap": 5.583, "num_tokens_union": 55.0187, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7695.5659, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0686, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7203, "sent_len_1": 66.825, "sent_len_max_0": 19.0012, "sent_len_max_1": 190.8162, "stdk": 0.0404, "stdq": 0.0379, "stdqueue_k": 0.0, "step": 17400 }, { "accuracy": 55.957, "doc_norm": 7.0897, "encoder_q-embeddings": 4680.6494, "encoder_q-layer.0": 3190.4492, "encoder_q-layer.1": 3241.2817, "encoder_q-layer.10": 6962.353, "encoder_q-layer.11": 12072.6289, "encoder_q-layer.2": 3630.0122, "encoder_q-layer.3": 3679.4995, "encoder_q-layer.4": 3807.8008, "encoder_q-layer.5": 3637.5273, "encoder_q-layer.6": 4067.2227, "encoder_q-layer.7": 4427.7861, "encoder_q-layer.8": 5083.0845, "encoder_q-layer.9": 4875.1665, "epoch": 0.17, "inbatch_neg_score": 48.8177, "inbatch_pos_score": 49.3125, "learning_rate": 4.5833333333333334e-05, "loss": 2.4999, "norm_diff": 0.0169, "num_tokens_overlap": 5.5867, "num_tokens_union": 55.0715, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7608.1194, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0728, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7253, "sent_len_1": 66.8596, "sent_len_max_0": 18.8612, "sent_len_max_1": 190.17, "stdk": 0.0404, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 17500 }, { "accuracy": 52.832, "doc_norm": 7.0947, "encoder_q-embeddings": 5029.1191, "encoder_q-layer.0": 3391.9097, "encoder_q-layer.1": 3456.1304, "encoder_q-layer.10": 5771.7021, "encoder_q-layer.11": 11438.9541, "encoder_q-layer.2": 3753.5728, "encoder_q-layer.3": 3839.0042, "encoder_q-layer.4": 3928.7727, "encoder_q-layer.5": 3720.2947, "encoder_q-layer.6": 4393.6831, "encoder_q-layer.7": 4682.8525, "encoder_q-layer.8": 5205.2783, "encoder_q-layer.9": 4610.3413, "epoch": 0.17, "inbatch_neg_score": 48.897, "inbatch_pos_score": 49.375, "learning_rate": 4.577777777777778e-05, "loss": 2.5285, "norm_diff": 0.0151, "num_tokens_overlap": 5.5864, "num_tokens_union": 55.0307, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7473.4422, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0796, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7101, "sent_len_1": 66.841, "sent_len_max_0": 18.9225, "sent_len_max_1": 189.0087, "stdk": 0.0397, "stdq": 0.0402, "stdqueue_k": 0.0, "step": 17600 }, { "accuracy": 50.293, "doc_norm": 7.1024, "encoder_q-embeddings": 5284.5576, "encoder_q-layer.0": 3563.4731, "encoder_q-layer.1": 3652.6853, "encoder_q-layer.10": 7398.748, "encoder_q-layer.11": 12882.0586, "encoder_q-layer.2": 4023.8242, "encoder_q-layer.3": 3980.782, "encoder_q-layer.4": 3981.115, "encoder_q-layer.5": 3763.8086, "encoder_q-layer.6": 4191.248, "encoder_q-layer.7": 4427.6279, "encoder_q-layer.8": 5245.2412, "encoder_q-layer.9": 4983.4575, "epoch": 0.17, "inbatch_neg_score": 48.9515, "inbatch_pos_score": 49.4375, "learning_rate": 4.572222222222222e-05, "loss": 2.5706, "norm_diff": 0.0206, "num_tokens_overlap": 5.5728, "num_tokens_union": 55.1438, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8154.8183, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0818, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6975, "sent_len_1": 67.0086, "sent_len_max_0": 18.6975, "sent_len_max_1": 189.8325, "stdk": 0.0418, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 17700 }, { "accuracy": 50.6836, "doc_norm": 7.0991, "encoder_q-embeddings": 4894.0659, "encoder_q-layer.0": 3503.5327, "encoder_q-layer.1": 3502.3481, "encoder_q-layer.10": 6500.6191, "encoder_q-layer.11": 11129.4912, "encoder_q-layer.2": 3740.9216, "encoder_q-layer.3": 3743.7463, "encoder_q-layer.4": 3923.9233, "encoder_q-layer.5": 3774.4883, "encoder_q-layer.6": 3934.4028, "encoder_q-layer.7": 4292.3237, "encoder_q-layer.8": 4692.0229, "encoder_q-layer.9": 4623.3975, "epoch": 0.17, "inbatch_neg_score": 48.9282, "inbatch_pos_score": 49.4062, "learning_rate": 4.566666666666667e-05, "loss": 2.5412, "norm_diff": 0.0174, "num_tokens_overlap": 5.5827, "num_tokens_union": 55.1787, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7444.1891, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0817, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7428, "sent_len_1": 66.9906, "sent_len_max_0": 18.9937, "sent_len_max_1": 188.17, "stdk": 0.0412, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 17800 }, { "accuracy": 50.8789, "doc_norm": 7.0963, "encoder_q-embeddings": 4964.3745, "encoder_q-layer.0": 3562.6272, "encoder_q-layer.1": 3565.2969, "encoder_q-layer.10": 6489.2417, "encoder_q-layer.11": 10645.8164, "encoder_q-layer.2": 3687.6831, "encoder_q-layer.3": 3691.9788, "encoder_q-layer.4": 4053.8435, "encoder_q-layer.5": 3707.0139, "encoder_q-layer.6": 3857.5298, "encoder_q-layer.7": 4379.835, "encoder_q-layer.8": 4528.4868, "encoder_q-layer.9": 4719.937, "epoch": 0.17, "inbatch_neg_score": 48.8713, "inbatch_pos_score": 49.3438, "learning_rate": 4.561111111111112e-05, "loss": 2.4774, "norm_diff": 0.0194, "num_tokens_overlap": 5.5861, "num_tokens_union": 55.0557, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7304.8229, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0769, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7176, "sent_len_1": 66.8778, "sent_len_max_0": 18.8625, "sent_len_max_1": 188.9512, "stdk": 0.0413, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 17900 }, { "accuracy": 49.3164, "doc_norm": 7.0949, "encoder_q-embeddings": 4981.2461, "encoder_q-layer.0": 3480.113, "encoder_q-layer.1": 3507.5134, "encoder_q-layer.10": 6289.7158, "encoder_q-layer.11": 12745.0645, "encoder_q-layer.2": 3846.0071, "encoder_q-layer.3": 3817.3887, "encoder_q-layer.4": 3838.3257, "encoder_q-layer.5": 3704.7263, "encoder_q-layer.6": 4131.2886, "encoder_q-layer.7": 4247.1377, "encoder_q-layer.8": 4661.061, "encoder_q-layer.9": 4403.9082, "epoch": 0.18, "inbatch_neg_score": 48.9111, "inbatch_pos_score": 49.375, "learning_rate": 4.555555555555556e-05, "loss": 2.5323, "norm_diff": 0.0158, "num_tokens_overlap": 5.5708, "num_tokens_union": 54.9377, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7741.5126, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0791, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7097, "sent_len_1": 66.6817, "sent_len_max_0": 18.74, "sent_len_max_1": 190.5525, "stdk": 0.0412, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 18000 }, { "accuracy": 53.418, "doc_norm": 7.0967, "encoder_q-embeddings": 9417.5361, "encoder_q-layer.0": 6473.4243, "encoder_q-layer.1": 6601.8545, "encoder_q-layer.10": 15195.2295, "encoder_q-layer.11": 27769.6152, "encoder_q-layer.2": 7117.3032, "encoder_q-layer.3": 7031.8789, "encoder_q-layer.4": 7218.4277, "encoder_q-layer.5": 7301.7036, "encoder_q-layer.6": 8053.0068, "encoder_q-layer.7": 8772.1836, "encoder_q-layer.8": 10566.5293, "encoder_q-layer.9": 10186.0322, "epoch": 0.18, "inbatch_neg_score": 48.9579, "inbatch_pos_score": 49.4062, "learning_rate": 4.55e-05, "loss": 2.5217, "norm_diff": 0.0164, "num_tokens_overlap": 5.5827, "num_tokens_union": 55.0518, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16183.3402, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0803, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7162, "sent_len_1": 66.8884, "sent_len_max_0": 18.96, "sent_len_max_1": 189.71, "stdk": 0.0401, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 18100 }, { "accuracy": 52.4414, "doc_norm": 7.099, "encoder_q-embeddings": 10295.7891, "encoder_q-layer.0": 6884.4873, "encoder_q-layer.1": 7014.5361, "encoder_q-layer.10": 12157.5176, "encoder_q-layer.11": 22016.9551, "encoder_q-layer.2": 7445.4731, "encoder_q-layer.3": 7465.4399, "encoder_q-layer.4": 7395.8638, "encoder_q-layer.5": 7096.5923, "encoder_q-layer.6": 7483.7432, "encoder_q-layer.7": 8084.0112, "encoder_q-layer.8": 9030.8506, "encoder_q-layer.9": 8871.0039, "epoch": 0.18, "inbatch_neg_score": 48.9463, "inbatch_pos_score": 49.4062, "learning_rate": 4.5444444444444444e-05, "loss": 2.4807, "norm_diff": 0.0178, "num_tokens_overlap": 5.5798, "num_tokens_union": 54.9127, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14832.6862, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0812, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7319, "sent_len_1": 66.602, "sent_len_max_0": 18.69, "sent_len_max_1": 190.3438, "stdk": 0.0413, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 18200 }, { "accuracy": 52.3438, "doc_norm": 7.0972, "encoder_q-embeddings": 10438.6963, "encoder_q-layer.0": 7036.001, "encoder_q-layer.1": 7396.1924, "encoder_q-layer.10": 12827.6465, "encoder_q-layer.11": 22456.0586, "encoder_q-layer.2": 7947.5649, "encoder_q-layer.3": 7979.7358, "encoder_q-layer.4": 8296.4141, "encoder_q-layer.5": 7812.6606, "encoder_q-layer.6": 8250.7471, "encoder_q-layer.7": 8879.6807, "encoder_q-layer.8": 10488.1094, "encoder_q-layer.9": 9659.7695, "epoch": 0.18, "inbatch_neg_score": 48.8572, "inbatch_pos_score": 49.3438, "learning_rate": 4.538888888888889e-05, "loss": 2.5318, "norm_diff": 0.0206, "num_tokens_overlap": 5.5707, "num_tokens_union": 55.0387, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15455.2001, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0765, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7064, "sent_len_1": 66.8972, "sent_len_max_0": 18.8275, "sent_len_max_1": 190.0075, "stdk": 0.0417, "stdq": 0.0403, "stdqueue_k": 0.0, "step": 18300 }, { "accuracy": 52.832, "doc_norm": 7.0916, "encoder_q-embeddings": 10159.4785, "encoder_q-layer.0": 6895.7524, "encoder_q-layer.1": 7108.1719, "encoder_q-layer.10": 11567.6846, "encoder_q-layer.11": 20701.0449, "encoder_q-layer.2": 7500.2446, "encoder_q-layer.3": 7626.8076, "encoder_q-layer.4": 7791.1958, "encoder_q-layer.5": 7283.3955, "encoder_q-layer.6": 7882.376, "encoder_q-layer.7": 8292.3623, "encoder_q-layer.8": 9059.4365, "encoder_q-layer.9": 8505.2549, "epoch": 0.18, "inbatch_neg_score": 48.829, "inbatch_pos_score": 49.3125, "learning_rate": 4.5333333333333335e-05, "loss": 2.5043, "norm_diff": 0.0179, "num_tokens_overlap": 5.5871, "num_tokens_union": 54.9517, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14550.7785, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0737, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7274, "sent_len_1": 66.6449, "sent_len_max_0": 18.8675, "sent_len_max_1": 188.4938, "stdk": 0.0412, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 18400 }, { "accuracy": 53.125, "doc_norm": 7.0919, "encoder_q-embeddings": 9845.9834, "encoder_q-layer.0": 6734.4961, "encoder_q-layer.1": 6690.1216, "encoder_q-layer.10": 12420.3564, "encoder_q-layer.11": 20639.8223, "encoder_q-layer.2": 7434.6313, "encoder_q-layer.3": 7456.9849, "encoder_q-layer.4": 7553.3911, "encoder_q-layer.5": 7466.5273, "encoder_q-layer.6": 8216.6484, "encoder_q-layer.7": 8809.3711, "encoder_q-layer.8": 9698.5195, "encoder_q-layer.9": 8908.6426, "epoch": 0.18, "inbatch_neg_score": 48.8165, "inbatch_pos_score": 49.3125, "learning_rate": 4.527777777777778e-05, "loss": 2.5319, "norm_diff": 0.0198, "num_tokens_overlap": 5.5824, "num_tokens_union": 55.0498, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14460.0894, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0721, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.73, "sent_len_1": 66.8084, "sent_len_max_0": 18.8388, "sent_len_max_1": 188.84, "stdk": 0.0419, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 18500 }, { "accuracy": 55.1758, "doc_norm": 7.0959, "encoder_q-embeddings": 9989.8369, "encoder_q-layer.0": 6870.8687, "encoder_q-layer.1": 6917.0103, "encoder_q-layer.10": 11066.0273, "encoder_q-layer.11": 22419.3301, "encoder_q-layer.2": 7489.8364, "encoder_q-layer.3": 7585.7754, "encoder_q-layer.4": 7847.8027, "encoder_q-layer.5": 7686.6973, "encoder_q-layer.6": 8065.103, "encoder_q-layer.7": 8484.457, "encoder_q-layer.8": 9868.4883, "encoder_q-layer.9": 9060.5312, "epoch": 0.18, "inbatch_neg_score": 48.8372, "inbatch_pos_score": 49.3125, "learning_rate": 4.522222222222223e-05, "loss": 2.5115, "norm_diff": 0.0218, "num_tokens_overlap": 5.5712, "num_tokens_union": 55.0308, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14767.0169, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0741, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6926, "sent_len_1": 66.8456, "sent_len_max_0": 18.8675, "sent_len_max_1": 187.8375, "stdk": 0.0411, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 18600 }, { "accuracy": 51.4648, "doc_norm": 7.0965, "encoder_q-embeddings": 10250.5645, "encoder_q-layer.0": 6762.062, "encoder_q-layer.1": 6831.8276, "encoder_q-layer.10": 12285.0137, "encoder_q-layer.11": 21294.5469, "encoder_q-layer.2": 7394.0127, "encoder_q-layer.3": 7324.5996, "encoder_q-layer.4": 7556.8613, "encoder_q-layer.5": 7140.7939, "encoder_q-layer.6": 7849.71, "encoder_q-layer.7": 8412.0996, "encoder_q-layer.8": 9097.2568, "encoder_q-layer.9": 8300.1172, "epoch": 0.18, "inbatch_neg_score": 48.9122, "inbatch_pos_score": 49.375, "learning_rate": 4.516666666666667e-05, "loss": 2.507, "norm_diff": 0.0194, "num_tokens_overlap": 5.5637, "num_tokens_union": 54.908, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14478.829, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0771, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7195, "sent_len_1": 66.5814, "sent_len_max_0": 18.8762, "sent_len_max_1": 189.0788, "stdk": 0.0411, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 18700 }, { "accuracy": 51.4648, "doc_norm": 7.0936, "encoder_q-embeddings": 9555.2939, "encoder_q-layer.0": 6619.7144, "encoder_q-layer.1": 6690.1274, "encoder_q-layer.10": 11863.8232, "encoder_q-layer.11": 21043.5938, "encoder_q-layer.2": 7117.3506, "encoder_q-layer.3": 7191.52, "encoder_q-layer.4": 7376.084, "encoder_q-layer.5": 7001.2256, "encoder_q-layer.6": 7634.7998, "encoder_q-layer.7": 8086.166, "encoder_q-layer.8": 8971.5059, "encoder_q-layer.9": 8397.1006, "epoch": 0.18, "inbatch_neg_score": 48.8878, "inbatch_pos_score": 49.3438, "learning_rate": 4.511111111111112e-05, "loss": 2.5344, "norm_diff": 0.0167, "num_tokens_overlap": 5.5941, "num_tokens_union": 55.0601, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14162.5194, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0769, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7399, "sent_len_1": 66.8165, "sent_len_max_0": 18.755, "sent_len_max_1": 188.6337, "stdk": 0.0418, "stdq": 0.0399, "stdqueue_k": 0.0, "step": 18800 }, { "accuracy": 53.3203, "doc_norm": 7.0911, "encoder_q-embeddings": 9778.2344, "encoder_q-layer.0": 7026.3354, "encoder_q-layer.1": 7161.249, "encoder_q-layer.10": 15124.2568, "encoder_q-layer.11": 23614.6289, "encoder_q-layer.2": 7553.9238, "encoder_q-layer.3": 7508.1045, "encoder_q-layer.4": 7499.9731, "encoder_q-layer.5": 7420.4746, "encoder_q-layer.6": 8480.7734, "encoder_q-layer.7": 8961.6367, "encoder_q-layer.8": 9778.7344, "encoder_q-layer.9": 10055.6484, "epoch": 0.18, "inbatch_neg_score": 48.802, "inbatch_pos_score": 49.2812, "learning_rate": 4.5055555555555554e-05, "loss": 2.4977, "norm_diff": 0.0188, "num_tokens_overlap": 5.5754, "num_tokens_union": 55.139, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15443.8058, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0723, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7192, "sent_len_1": 67.016, "sent_len_max_0": 18.7888, "sent_len_max_1": 189.725, "stdk": 0.042, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 18900 }, { "accuracy": 54.1992, "doc_norm": 7.0928, "encoder_q-embeddings": 9966.4277, "encoder_q-layer.0": 6550.2012, "encoder_q-layer.1": 6665.0732, "encoder_q-layer.10": 12033.3271, "encoder_q-layer.11": 26155.1465, "encoder_q-layer.2": 7245.8638, "encoder_q-layer.3": 7134.7969, "encoder_q-layer.4": 7354.895, "encoder_q-layer.5": 7187.2544, "encoder_q-layer.6": 8020.6577, "encoder_q-layer.7": 8544.7021, "encoder_q-layer.8": 9215.7383, "encoder_q-layer.9": 8888.6621, "epoch": 0.19, "inbatch_neg_score": 48.8158, "inbatch_pos_score": 49.3125, "learning_rate": 4.5e-05, "loss": 2.5384, "norm_diff": 0.018, "num_tokens_overlap": 5.5704, "num_tokens_union": 54.9892, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15612.2609, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0748, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7135, "sent_len_1": 66.7629, "sent_len_max_0": 18.8012, "sent_len_max_1": 189.85, "stdk": 0.0413, "stdq": 0.0399, "stdqueue_k": 0.0, "step": 19000 }, { "accuracy": 54.5898, "doc_norm": 7.0902, "encoder_q-embeddings": 8955.9736, "encoder_q-layer.0": 6317.2031, "encoder_q-layer.1": 6361.9912, "encoder_q-layer.10": 13432.957, "encoder_q-layer.11": 22879.5312, "encoder_q-layer.2": 6861.3862, "encoder_q-layer.3": 7154.9941, "encoder_q-layer.4": 7252.6562, "encoder_q-layer.5": 6888.2251, "encoder_q-layer.6": 7472.7808, "encoder_q-layer.7": 8095.4727, "encoder_q-layer.8": 9065.6953, "encoder_q-layer.9": 8725.4766, "epoch": 0.19, "inbatch_neg_score": 48.8069, "inbatch_pos_score": 49.2812, "learning_rate": 4.4944444444444445e-05, "loss": 2.472, "norm_diff": 0.0208, "num_tokens_overlap": 5.6038, "num_tokens_union": 55.1817, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14240.1678, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0694, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7373, "sent_len_1": 66.9784, "sent_len_max_0": 18.9213, "sent_len_max_1": 188.9, "stdk": 0.0413, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 19100 }, { "accuracy": 53.9062, "doc_norm": 7.0918, "encoder_q-embeddings": 10298.5273, "encoder_q-layer.0": 7048.9497, "encoder_q-layer.1": 6920.6377, "encoder_q-layer.10": 12874.5498, "encoder_q-layer.11": 21814.3809, "encoder_q-layer.2": 7538.3037, "encoder_q-layer.3": 7399.7783, "encoder_q-layer.4": 7586.9214, "encoder_q-layer.5": 7392.6401, "encoder_q-layer.6": 7944.7642, "encoder_q-layer.7": 8487.3896, "encoder_q-layer.8": 9761.1611, "encoder_q-layer.9": 8978.5859, "epoch": 0.19, "inbatch_neg_score": 48.7795, "inbatch_pos_score": 49.2812, "learning_rate": 4.4888888888888894e-05, "loss": 2.5087, "norm_diff": 0.0186, "num_tokens_overlap": 5.5641, "num_tokens_union": 54.8701, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14880.6682, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0731, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7071, "sent_len_1": 66.5328, "sent_len_max_0": 18.875, "sent_len_max_1": 188.1937, "stdk": 0.0422, "stdq": 0.0403, "stdqueue_k": 0.0, "step": 19200 }, { "accuracy": 56.543, "doc_norm": 7.0885, "encoder_q-embeddings": 9456.125, "encoder_q-layer.0": 6412.7363, "encoder_q-layer.1": 6640.3896, "encoder_q-layer.10": 11168.9326, "encoder_q-layer.11": 21082.0781, "encoder_q-layer.2": 7060.6904, "encoder_q-layer.3": 7022.5024, "encoder_q-layer.4": 7180.7974, "encoder_q-layer.5": 7032.5405, "encoder_q-layer.6": 7481.9683, "encoder_q-layer.7": 7897.3545, "encoder_q-layer.8": 8744.3887, "encoder_q-layer.9": 8376.3223, "epoch": 0.19, "inbatch_neg_score": 48.7434, "inbatch_pos_score": 49.2188, "learning_rate": 4.483333333333333e-05, "loss": 2.5186, "norm_diff": 0.021, "num_tokens_overlap": 5.5913, "num_tokens_union": 55.1022, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13918.2498, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0676, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7332, "sent_len_1": 66.9384, "sent_len_max_0": 18.86, "sent_len_max_1": 191.3388, "stdk": 0.0421, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 19300 }, { "accuracy": 56.1523, "doc_norm": 7.0857, "encoder_q-embeddings": 8765.8672, "encoder_q-layer.0": 6085.0386, "encoder_q-layer.1": 6129.3599, "encoder_q-layer.10": 14646.2764, "encoder_q-layer.11": 24150.5391, "encoder_q-layer.2": 6832.7739, "encoder_q-layer.3": 6802.9702, "encoder_q-layer.4": 6959.4639, "encoder_q-layer.5": 6864.3369, "encoder_q-layer.6": 7220.1089, "encoder_q-layer.7": 7823.4771, "encoder_q-layer.8": 9078.2949, "encoder_q-layer.9": 9092.9082, "epoch": 0.19, "inbatch_neg_score": 48.7239, "inbatch_pos_score": 49.2188, "learning_rate": 4.477777777777778e-05, "loss": 2.4865, "norm_diff": 0.0204, "num_tokens_overlap": 5.5708, "num_tokens_union": 55.0511, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14829.2956, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0653, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7073, "sent_len_1": 66.838, "sent_len_max_0": 18.8663, "sent_len_max_1": 189.9775, "stdk": 0.0413, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 19400 }, { "accuracy": 51.0742, "doc_norm": 7.085, "encoder_q-embeddings": 9648.4111, "encoder_q-layer.0": 6922.0742, "encoder_q-layer.1": 6850.022, "encoder_q-layer.10": 14254.5312, "encoder_q-layer.11": 23455.4727, "encoder_q-layer.2": 7806.7021, "encoder_q-layer.3": 7629.7983, "encoder_q-layer.4": 7898.541, "encoder_q-layer.5": 8050.3276, "encoder_q-layer.6": 9181.8174, "encoder_q-layer.7": 9387.0938, "encoder_q-layer.8": 10626.1621, "encoder_q-layer.9": 10245.3301, "epoch": 0.19, "inbatch_neg_score": 48.6969, "inbatch_pos_score": 49.1562, "learning_rate": 4.472222222222223e-05, "loss": 2.4952, "norm_diff": 0.0183, "num_tokens_overlap": 5.5787, "num_tokens_union": 55.0776, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15551.3993, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0667, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7229, "sent_len_1": 66.8947, "sent_len_max_0": 18.8575, "sent_len_max_1": 189.5513, "stdk": 0.0407, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 19500 }, { "accuracy": 54.8828, "doc_norm": 7.0879, "encoder_q-embeddings": 9963.374, "encoder_q-layer.0": 6738.9453, "encoder_q-layer.1": 6710.686, "encoder_q-layer.10": 12948.7168, "encoder_q-layer.11": 25409.6895, "encoder_q-layer.2": 7235.855, "encoder_q-layer.3": 7223.6094, "encoder_q-layer.4": 7632.3955, "encoder_q-layer.5": 7688.9292, "encoder_q-layer.6": 7977.1626, "encoder_q-layer.7": 8134.96, "encoder_q-layer.8": 9222.0596, "encoder_q-layer.9": 9072.1494, "epoch": 0.19, "inbatch_neg_score": 48.7729, "inbatch_pos_score": 49.25, "learning_rate": 4.466666666666667e-05, "loss": 2.5147, "norm_diff": 0.0186, "num_tokens_overlap": 5.591, "num_tokens_union": 55.0687, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15425.4428, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0693, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7456, "sent_len_1": 66.8372, "sent_len_max_0": 18.9187, "sent_len_max_1": 189.6037, "stdk": 0.0397, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 19600 }, { "accuracy": 56.4453, "doc_norm": 7.094, "encoder_q-embeddings": 9408.2383, "encoder_q-layer.0": 6390.123, "encoder_q-layer.1": 6558.1494, "encoder_q-layer.10": 12707.2627, "encoder_q-layer.11": 22387.2168, "encoder_q-layer.2": 7061.9009, "encoder_q-layer.3": 7266.9902, "encoder_q-layer.4": 7401.9873, "encoder_q-layer.5": 7409.8032, "encoder_q-layer.6": 8288.6855, "encoder_q-layer.7": 9098.7334, "encoder_q-layer.8": 10792.1816, "encoder_q-layer.9": 9102.125, "epoch": 0.19, "inbatch_neg_score": 48.8419, "inbatch_pos_score": 49.3125, "learning_rate": 4.461111111111111e-05, "loss": 2.52, "norm_diff": 0.0225, "num_tokens_overlap": 5.5829, "num_tokens_union": 55.0076, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15039.4664, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0715, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.712, "sent_len_1": 66.8368, "sent_len_max_0": 19.0587, "sent_len_max_1": 189.7837, "stdk": 0.0404, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 19700 }, { "accuracy": 54.5898, "doc_norm": 7.0905, "encoder_q-embeddings": 9319.208, "encoder_q-layer.0": 6439.4087, "encoder_q-layer.1": 6583.1772, "encoder_q-layer.10": 12007.2529, "encoder_q-layer.11": 21381.3926, "encoder_q-layer.2": 7156.1758, "encoder_q-layer.3": 7201.833, "encoder_q-layer.4": 7457.1841, "encoder_q-layer.5": 7581.9453, "encoder_q-layer.6": 7880.2402, "encoder_q-layer.7": 9089.0107, "encoder_q-layer.8": 10097.0166, "encoder_q-layer.9": 8758.9355, "epoch": 0.19, "inbatch_neg_score": 48.7703, "inbatch_pos_score": 49.25, "learning_rate": 4.4555555555555555e-05, "loss": 2.4726, "norm_diff": 0.0198, "num_tokens_overlap": 5.568, "num_tokens_union": 54.9804, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14372.4767, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0706, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7136, "sent_len_1": 66.69, "sent_len_max_0": 18.895, "sent_len_max_1": 189.9812, "stdk": 0.0412, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 19800 }, { "accuracy": 53.8086, "doc_norm": 7.0944, "encoder_q-embeddings": 9691.9932, "encoder_q-layer.0": 6732.875, "encoder_q-layer.1": 6912.084, "encoder_q-layer.10": 12617.4043, "encoder_q-layer.11": 23349.9336, "encoder_q-layer.2": 7400.8408, "encoder_q-layer.3": 7545.8579, "encoder_q-layer.4": 7703.4658, "encoder_q-layer.5": 7350.522, "encoder_q-layer.6": 7931.8364, "encoder_q-layer.7": 8326.0156, "encoder_q-layer.8": 9286.0381, "encoder_q-layer.9": 9422.5352, "epoch": 0.19, "inbatch_neg_score": 48.8421, "inbatch_pos_score": 49.3438, "learning_rate": 4.4500000000000004e-05, "loss": 2.4977, "norm_diff": 0.0199, "num_tokens_overlap": 5.5947, "num_tokens_union": 55.082, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14871.8375, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0745, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7309, "sent_len_1": 66.8842, "sent_len_max_0": 18.8962, "sent_len_max_1": 190.2562, "stdk": 0.0419, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 19900 }, { "accuracy": 54.2969, "doc_norm": 7.0887, "encoder_q-embeddings": 9112.8887, "encoder_q-layer.0": 6179.9932, "encoder_q-layer.1": 6210.376, "encoder_q-layer.10": 12382.416, "encoder_q-layer.11": 23622.8477, "encoder_q-layer.2": 6683.1177, "encoder_q-layer.3": 6794.2007, "encoder_q-layer.4": 7149.1123, "encoder_q-layer.5": 6878.4678, "encoder_q-layer.6": 7794.7471, "encoder_q-layer.7": 8230.1797, "encoder_q-layer.8": 9383.3164, "encoder_q-layer.9": 8977.3213, "epoch": 0.2, "inbatch_neg_score": 48.8344, "inbatch_pos_score": 49.3125, "learning_rate": 4.4444444444444447e-05, "loss": 2.4519, "norm_diff": 0.0175, "num_tokens_overlap": 5.5729, "num_tokens_union": 54.9153, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14390.5232, "preclip_grad_norm_avg": 0.0001, "query_norm": 7.0712, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6906, "sent_len_1": 66.6627, "sent_len_max_0": 18.845, "sent_len_max_1": 189.29, "stdk": 0.0404, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 20000 }, { "dev_runtime": 26.6597, "dev_samples_per_second": 2.401, "dev_steps_per_second": 0.038, "epoch": 0.2, "step": 20000, "test_accuracy": 8.59222412109375, "test_doc_norm": 7.084954738616943, "test_inbatch_neg_score": 49.301292419433594, "test_inbatch_pos_score": 49.94174575805664, "test_loss": 3.908353805541992, "test_norm_diff": 0.0012386813759803772, "test_query_norm": 7.084763050079346, "test_queue_k_norm": 0.0, "test_stdk": 0.03358521685004234, "test_stdq": 0.033546216785907745, "test_stdqueue_k": 0.0 }, { "dev_runtime": 26.6597, "dev_samples_per_second": 2.401, "dev_steps_per_second": 0.038, "epoch": 0.2, "eval_beir-arguana_ndcg@10": 0.36915, "eval_beir-arguana_recall@10": 0.65007, "eval_beir-arguana_recall@100": 0.96017, "eval_beir-arguana_recall@20": 0.82148, "eval_beir-avg_ndcg@10": 0.3748675, "eval_beir-avg_recall@10": 0.44301408333333336, "eval_beir-avg_recall@100": 0.6310493333333334, "eval_beir-avg_recall@20": 0.5096779166666667, "eval_beir-cqadupstack_ndcg@10": 0.28131500000000004, "eval_beir-cqadupstack_recall@10": 0.37760083333333333, "eval_beir-cqadupstack_recall@100": 0.6050633333333334, "eval_beir-cqadupstack_recall@20": 0.44336916666666676, "eval_beir-fiqa_ndcg@10": 0.25347, "eval_beir-fiqa_recall@10": 0.30944, "eval_beir-fiqa_recall@100": 0.58158, "eval_beir-fiqa_recall@20": 0.38444, "eval_beir-nfcorpus_ndcg@10": 0.31005, "eval_beir-nfcorpus_recall@10": 0.14412, "eval_beir-nfcorpus_recall@100": 0.28041, "eval_beir-nfcorpus_recall@20": 0.17993, "eval_beir-nq_ndcg@10": 0.24928, "eval_beir-nq_recall@10": 0.42159, "eval_beir-nq_recall@100": 0.76579, "eval_beir-nq_recall@20": 0.54118, "eval_beir-quora_ndcg@10": 0.79003, "eval_beir-quora_recall@10": 0.8906, "eval_beir-quora_recall@100": 0.97795, "eval_beir-quora_recall@20": 0.92905, "eval_beir-scidocs_ndcg@10": 0.1475, "eval_beir-scidocs_recall@10": 0.15453, "eval_beir-scidocs_recall@100": 0.36725, "eval_beir-scidocs_recall@20": 0.21268, "eval_beir-scifact_ndcg@10": 0.60373, "eval_beir-scifact_recall@10": 0.73467, "eval_beir-scifact_recall@100": 0.91822, "eval_beir-scifact_recall@20": 0.80356, "eval_beir-trec-covid_ndcg@10": 0.55634, "eval_beir-trec-covid_recall@10": 0.61, "eval_beir-trec-covid_recall@100": 0.4318, "eval_beir-trec-covid_recall@20": 0.577, "eval_beir-webis-touche2020_ndcg@10": 0.18781, "eval_beir-webis-touche2020_recall@10": 0.13752, "eval_beir-webis-touche2020_recall@100": 0.42226, "eval_beir-webis-touche2020_recall@20": 0.20409, "eval_senteval-avg_sts": 0.7443229249152019, "eval_senteval-sickr_spearman": 0.7257497506054765, "eval_senteval-stsb_spearman": 0.7628960992249272, "step": 20000, "test_accuracy": 8.59222412109375, "test_doc_norm": 7.084954738616943, "test_inbatch_neg_score": 49.301292419433594, "test_inbatch_pos_score": 49.94174575805664, "test_loss": 3.908353805541992, "test_norm_diff": 0.0012386813759803772, "test_query_norm": 7.084763050079346, "test_queue_k_norm": 0.0, "test_stdk": 0.03358521685004234, "test_stdq": 0.033546216785907745, "test_stdqueue_k": 0.0 }, { "accuracy": 54.8828, "doc_norm": 7.0863, "encoder_q-embeddings": 20151.6836, "encoder_q-layer.0": 13465.624, "encoder_q-layer.1": 13561.7695, "encoder_q-layer.10": 23186.3574, "encoder_q-layer.11": 39202.7891, "encoder_q-layer.2": 14494.3984, "encoder_q-layer.3": 14430.8203, "encoder_q-layer.4": 14895.4482, "encoder_q-layer.5": 14693.5957, "encoder_q-layer.6": 15643.3379, "encoder_q-layer.7": 16768.5547, "encoder_q-layer.8": 19356.416, "encoder_q-layer.9": 17884.5059, "epoch": 0.2, "inbatch_neg_score": 48.7626, "inbatch_pos_score": 49.25, "learning_rate": 4.438888888888889e-05, "loss": 2.4988, "norm_diff": 0.0191, "num_tokens_overlap": 5.5812, "num_tokens_union": 55.1346, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28453.526, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0671, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7011, "sent_len_1": 67.0653, "sent_len_max_0": 18.8275, "sent_len_max_1": 191.2262, "stdk": 0.0411, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 20100 }, { "accuracy": 55.0781, "doc_norm": 7.0902, "encoder_q-embeddings": 19635.8594, "encoder_q-layer.0": 13116.8506, "encoder_q-layer.1": 12981.3047, "encoder_q-layer.10": 22702.9258, "encoder_q-layer.11": 43433.25, "encoder_q-layer.2": 13455.3164, "encoder_q-layer.3": 13672.6914, "encoder_q-layer.4": 14164.6826, "encoder_q-layer.5": 13913.7891, "encoder_q-layer.6": 15322.3262, "encoder_q-layer.7": 15963.9043, "encoder_q-layer.8": 18145.8633, "encoder_q-layer.9": 16608.2559, "epoch": 0.2, "inbatch_neg_score": 48.808, "inbatch_pos_score": 49.2812, "learning_rate": 4.433333333333334e-05, "loss": 2.4504, "norm_diff": 0.0186, "num_tokens_overlap": 5.5842, "num_tokens_union": 55.0768, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28135.0847, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0717, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7148, "sent_len_1": 66.9501, "sent_len_max_0": 18.7613, "sent_len_max_1": 189.9725, "stdk": 0.0417, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 20200 }, { "accuracy": 55.6641, "doc_norm": 7.0972, "encoder_q-embeddings": 21069.3086, "encoder_q-layer.0": 14128.3037, "encoder_q-layer.1": 14150.1426, "encoder_q-layer.10": 27927.0, "encoder_q-layer.11": 56677.625, "encoder_q-layer.2": 15307.9834, "encoder_q-layer.3": 15231.9365, "encoder_q-layer.4": 15941.9893, "encoder_q-layer.5": 15394.0957, "encoder_q-layer.6": 16243.9492, "encoder_q-layer.7": 18041.8027, "encoder_q-layer.8": 21718.1836, "encoder_q-layer.9": 19958.1562, "epoch": 0.2, "inbatch_neg_score": 48.8466, "inbatch_pos_score": 49.3438, "learning_rate": 4.427777777777778e-05, "loss": 2.4387, "norm_diff": 0.0222, "num_tokens_overlap": 5.5792, "num_tokens_union": 55.0207, "postclip_grad_norm": 1.0, "preclip_grad_norm": 33067.2167, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.075, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.736, "sent_len_1": 66.8182, "sent_len_max_0": 18.8388, "sent_len_max_1": 190.2763, "stdk": 0.0419, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 20300 }, { "accuracy": 53.3203, "doc_norm": 7.0846, "encoder_q-embeddings": 19490.2109, "encoder_q-layer.0": 13110.7861, "encoder_q-layer.1": 13374.5039, "encoder_q-layer.10": 22497.6348, "encoder_q-layer.11": 44951.332, "encoder_q-layer.2": 14348.8906, "encoder_q-layer.3": 14413.0811, "encoder_q-layer.4": 14510.791, "encoder_q-layer.5": 13924.2559, "encoder_q-layer.6": 14912.3984, "encoder_q-layer.7": 15628.6455, "encoder_q-layer.8": 18174.3691, "encoder_q-layer.9": 17290.4238, "epoch": 0.2, "inbatch_neg_score": 48.7558, "inbatch_pos_score": 49.2188, "learning_rate": 4.422222222222222e-05, "loss": 2.5089, "norm_diff": 0.0167, "num_tokens_overlap": 5.5897, "num_tokens_union": 54.9128, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28716.7524, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0679, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7503, "sent_len_1": 66.5969, "sent_len_max_0": 18.8637, "sent_len_max_1": 188.925, "stdk": 0.0404, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 20400 }, { "accuracy": 52.3438, "doc_norm": 7.0821, "encoder_q-embeddings": 19929.6289, "encoder_q-layer.0": 13336.1113, "encoder_q-layer.1": 13143.6924, "encoder_q-layer.10": 21445.5977, "encoder_q-layer.11": 41649.707, "encoder_q-layer.2": 14146.4756, "encoder_q-layer.3": 14214.5625, "encoder_q-layer.4": 15455.9141, "encoder_q-layer.5": 14233.1582, "encoder_q-layer.6": 14996.0322, "encoder_q-layer.7": 16201.9014, "encoder_q-layer.8": 18159.752, "encoder_q-layer.9": 16431.1758, "epoch": 0.2, "inbatch_neg_score": 48.7088, "inbatch_pos_score": 49.1875, "learning_rate": 4.4166666666666665e-05, "loss": 2.5083, "norm_diff": 0.0191, "num_tokens_overlap": 5.6004, "num_tokens_union": 55.0787, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28290.029, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.063, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7331, "sent_len_1": 66.9181, "sent_len_max_0": 18.9187, "sent_len_max_1": 189.6275, "stdk": 0.041, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 20500 }, { "accuracy": 53.418, "doc_norm": 7.0918, "encoder_q-embeddings": 18797.2773, "encoder_q-layer.0": 13080.5596, "encoder_q-layer.1": 13005.6123, "encoder_q-layer.10": 28414.4824, "encoder_q-layer.11": 45229.0547, "encoder_q-layer.2": 13759.1406, "encoder_q-layer.3": 13959.3311, "encoder_q-layer.4": 14472.8125, "encoder_q-layer.5": 13992.4717, "encoder_q-layer.6": 14990.9141, "encoder_q-layer.7": 16532.1133, "encoder_q-layer.8": 19058.0977, "encoder_q-layer.9": 19131.5586, "epoch": 0.2, "inbatch_neg_score": 48.8232, "inbatch_pos_score": 49.3125, "learning_rate": 4.4111111111111114e-05, "loss": 2.4148, "norm_diff": 0.0213, "num_tokens_overlap": 5.5741, "num_tokens_union": 55.061, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29083.4824, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0705, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.72, "sent_len_1": 66.8198, "sent_len_max_0": 18.77, "sent_len_max_1": 188.4575, "stdk": 0.0411, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 20600 }, { "accuracy": 51.7578, "doc_norm": 7.0894, "encoder_q-embeddings": 19794.627, "encoder_q-layer.0": 13845.167, "encoder_q-layer.1": 14269.1172, "encoder_q-layer.10": 30913.3184, "encoder_q-layer.11": 49349.0273, "encoder_q-layer.2": 15158.5859, "encoder_q-layer.3": 15019.208, "encoder_q-layer.4": 15482.4434, "encoder_q-layer.5": 16426.8438, "encoder_q-layer.6": 18331.3594, "encoder_q-layer.7": 18751.5176, "encoder_q-layer.8": 20501.7969, "encoder_q-layer.9": 19968.8008, "epoch": 0.2, "inbatch_neg_score": 48.786, "inbatch_pos_score": 49.2812, "learning_rate": 4.4055555555555557e-05, "loss": 2.4512, "norm_diff": 0.0163, "num_tokens_overlap": 5.5852, "num_tokens_union": 54.9915, "postclip_grad_norm": 1.0, "preclip_grad_norm": 31779.662, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0731, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7166, "sent_len_1": 66.7983, "sent_len_max_0": 18.9887, "sent_len_max_1": 190.5062, "stdk": 0.0409, "stdq": 0.0405, "stdqueue_k": 0.0, "step": 20700 }, { "accuracy": 54.3945, "doc_norm": 7.0842, "encoder_q-embeddings": 19725.5605, "encoder_q-layer.0": 13625.7715, "encoder_q-layer.1": 13277.6494, "encoder_q-layer.10": 28652.418, "encoder_q-layer.11": 48095.2109, "encoder_q-layer.2": 13964.043, "encoder_q-layer.3": 14421.1367, "encoder_q-layer.4": 14901.9277, "encoder_q-layer.5": 14329.5127, "encoder_q-layer.6": 15088.6562, "encoder_q-layer.7": 16713.0254, "encoder_q-layer.8": 18319.4023, "encoder_q-layer.9": 17223.0332, "epoch": 0.2, "inbatch_neg_score": 48.771, "inbatch_pos_score": 49.25, "learning_rate": 4.4000000000000006e-05, "loss": 2.4677, "norm_diff": 0.0171, "num_tokens_overlap": 5.5749, "num_tokens_union": 54.9426, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29983.9471, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0671, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7087, "sent_len_1": 66.6448, "sent_len_max_0": 18.8513, "sent_len_max_1": 189.585, "stdk": 0.0415, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 20800 }, { "accuracy": 53.5156, "doc_norm": 7.0849, "encoder_q-embeddings": 19591.4297, "encoder_q-layer.0": 13451.1885, "encoder_q-layer.1": 13522.8457, "encoder_q-layer.10": 22256.9961, "encoder_q-layer.11": 43967.8945, "encoder_q-layer.2": 14766.9102, "encoder_q-layer.3": 14794.1953, "encoder_q-layer.4": 15618.2451, "encoder_q-layer.5": 14611.6279, "encoder_q-layer.6": 15155.0596, "encoder_q-layer.7": 16002.5811, "encoder_q-layer.8": 18293.7129, "encoder_q-layer.9": 17191.9941, "epoch": 0.2, "inbatch_neg_score": 48.7144, "inbatch_pos_score": 49.1875, "learning_rate": 4.394444444444445e-05, "loss": 2.5067, "norm_diff": 0.0196, "num_tokens_overlap": 5.578, "num_tokens_union": 55.0441, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28781.2678, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0653, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7315, "sent_len_1": 66.8268, "sent_len_max_0": 18.8888, "sent_len_max_1": 189.9025, "stdk": 0.0421, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 20900 }, { "accuracy": 55.1758, "doc_norm": 7.0845, "encoder_q-embeddings": 17681.0723, "encoder_q-layer.0": 12495.2773, "encoder_q-layer.1": 12713.3232, "encoder_q-layer.10": 21841.6094, "encoder_q-layer.11": 40897.5391, "encoder_q-layer.2": 13623.1475, "encoder_q-layer.3": 13156.6396, "encoder_q-layer.4": 13188.8545, "encoder_q-layer.5": 13058.7891, "encoder_q-layer.6": 14860.8594, "encoder_q-layer.7": 15201.5273, "encoder_q-layer.8": 16455.4258, "encoder_q-layer.9": 16303.6562, "epoch": 0.21, "inbatch_neg_score": 48.737, "inbatch_pos_score": 49.2188, "learning_rate": 4.388888888888889e-05, "loss": 2.4424, "norm_diff": 0.0172, "num_tokens_overlap": 5.5934, "num_tokens_union": 55.1736, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26930.504, "preclip_grad_norm_avg": 0.0002, "query_norm": 7.0673, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7223, "sent_len_1": 67.0524, "sent_len_max_0": 18.8175, "sent_len_max_1": 189.6962, "stdk": 0.0408, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 21000 }, { "accuracy": 52.3438, "doc_norm": 7.0863, "encoder_q-embeddings": 19868.0, "encoder_q-layer.0": 13565.5928, "encoder_q-layer.1": 13548.0801, "encoder_q-layer.10": 24446.7246, "encoder_q-layer.11": 43946.2812, "encoder_q-layer.2": 14571.5928, "encoder_q-layer.3": 14291.4326, "encoder_q-layer.4": 14399.2031, "encoder_q-layer.5": 14097.0117, "encoder_q-layer.6": 15352.3037, "encoder_q-layer.7": 16605.3574, "encoder_q-layer.8": 18111.707, "encoder_q-layer.9": 17997.6543, "epoch": 0.21, "inbatch_neg_score": 48.7596, "inbatch_pos_score": 49.2188, "learning_rate": 4.383333333333334e-05, "loss": 2.4986, "norm_diff": 0.0185, "num_tokens_overlap": 5.557, "num_tokens_union": 54.9379, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28846.4631, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0679, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7095, "sent_len_1": 66.6479, "sent_len_max_0": 18.7613, "sent_len_max_1": 189.3175, "stdk": 0.0418, "stdq": 0.04, "stdqueue_k": 0.0, "step": 21100 }, { "accuracy": 56.6406, "doc_norm": 7.0902, "encoder_q-embeddings": 18554.8789, "encoder_q-layer.0": 12739.334, "encoder_q-layer.1": 13013.3301, "encoder_q-layer.10": 22829.6895, "encoder_q-layer.11": 39170.4102, "encoder_q-layer.2": 13705.999, "encoder_q-layer.3": 13748.7188, "encoder_q-layer.4": 14515.2666, "encoder_q-layer.5": 13350.5557, "encoder_q-layer.6": 15061.7139, "encoder_q-layer.7": 15526.8896, "encoder_q-layer.8": 17632.9375, "encoder_q-layer.9": 17050.7207, "epoch": 0.21, "inbatch_neg_score": 48.7734, "inbatch_pos_score": 49.25, "learning_rate": 4.377777777777778e-05, "loss": 2.4578, "norm_diff": 0.0208, "num_tokens_overlap": 5.5767, "num_tokens_union": 55.053, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27075.6469, "preclip_grad_norm_avg": 0.0002, "query_norm": 7.0694, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7091, "sent_len_1": 66.8951, "sent_len_max_0": 18.915, "sent_len_max_1": 188.3025, "stdk": 0.0415, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 21200 }, { "accuracy": 54.7852, "doc_norm": 7.0915, "encoder_q-embeddings": 19732.373, "encoder_q-layer.0": 13381.9307, "encoder_q-layer.1": 13278.249, "encoder_q-layer.10": 21151.1875, "encoder_q-layer.11": 41487.3281, "encoder_q-layer.2": 14191.8828, "encoder_q-layer.3": 14095.2012, "encoder_q-layer.4": 13910.3379, "encoder_q-layer.5": 13836.6016, "encoder_q-layer.6": 14800.5049, "encoder_q-layer.7": 15640.2451, "encoder_q-layer.8": 17155.7051, "encoder_q-layer.9": 16296.0996, "epoch": 0.21, "inbatch_neg_score": 48.7719, "inbatch_pos_score": 49.25, "learning_rate": 4.3722222222222224e-05, "loss": 2.4877, "norm_diff": 0.0228, "num_tokens_overlap": 5.5652, "num_tokens_union": 54.8866, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27901.9652, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0686, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7174, "sent_len_1": 66.5755, "sent_len_max_0": 18.8837, "sent_len_max_1": 190.3025, "stdk": 0.0425, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 21300 }, { "accuracy": 53.3203, "doc_norm": 7.0873, "encoder_q-embeddings": 19790.2461, "encoder_q-layer.0": 14151.2334, "encoder_q-layer.1": 14754.6533, "encoder_q-layer.10": 23123.7773, "encoder_q-layer.11": 50739.2773, "encoder_q-layer.2": 15976.3613, "encoder_q-layer.3": 15193.7598, "encoder_q-layer.4": 15015.7393, "encoder_q-layer.5": 14598.4307, "encoder_q-layer.6": 15639.4434, "encoder_q-layer.7": 16629.875, "encoder_q-layer.8": 19094.3477, "encoder_q-layer.9": 17648.4551, "epoch": 0.21, "inbatch_neg_score": 48.7344, "inbatch_pos_score": 49.2188, "learning_rate": 4.3666666666666666e-05, "loss": 2.4797, "norm_diff": 0.0199, "num_tokens_overlap": 5.5828, "num_tokens_union": 54.9556, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30628.5454, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0674, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7317, "sent_len_1": 66.7065, "sent_len_max_0": 18.8425, "sent_len_max_1": 189.7038, "stdk": 0.0419, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 21400 }, { "accuracy": 55.4688, "doc_norm": 7.0898, "encoder_q-embeddings": 21708.0938, "encoder_q-layer.0": 14417.8145, "encoder_q-layer.1": 15025.6641, "encoder_q-layer.10": 22757.9922, "encoder_q-layer.11": 44246.3203, "encoder_q-layer.2": 16202.1162, "encoder_q-layer.3": 15382.9248, "encoder_q-layer.4": 15539.3594, "encoder_q-layer.5": 14944.0361, "encoder_q-layer.6": 15783.25, "encoder_q-layer.7": 18213.1113, "encoder_q-layer.8": 19764.3984, "encoder_q-layer.9": 18862.8301, "epoch": 0.21, "inbatch_neg_score": 48.7034, "inbatch_pos_score": 49.2188, "learning_rate": 4.3611111111111116e-05, "loss": 2.4853, "norm_diff": 0.0237, "num_tokens_overlap": 5.5735, "num_tokens_union": 54.944, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30255.5215, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0661, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7164, "sent_len_1": 66.6816, "sent_len_max_0": 18.8637, "sent_len_max_1": 187.8525, "stdk": 0.0422, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 21500 }, { "accuracy": 55.3711, "doc_norm": 7.0804, "encoder_q-embeddings": 18138.3203, "encoder_q-layer.0": 12392.6328, "encoder_q-layer.1": 12779.8652, "encoder_q-layer.10": 23670.4258, "encoder_q-layer.11": 46942.2617, "encoder_q-layer.2": 13720.8867, "encoder_q-layer.3": 14214.3584, "encoder_q-layer.4": 14453.6328, "encoder_q-layer.5": 14121.1006, "encoder_q-layer.6": 14965.5225, "encoder_q-layer.7": 16096.5303, "encoder_q-layer.8": 18566.5488, "encoder_q-layer.9": 17165.7539, "epoch": 0.21, "inbatch_neg_score": 48.638, "inbatch_pos_score": 49.125, "learning_rate": 4.355555555555556e-05, "loss": 2.4507, "norm_diff": 0.0219, "num_tokens_overlap": 5.5819, "num_tokens_union": 55.0383, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28766.9957, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0584, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7289, "sent_len_1": 66.8228, "sent_len_max_0": 18.935, "sent_len_max_1": 189.4925, "stdk": 0.0422, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 21600 }, { "accuracy": 53.2227, "doc_norm": 7.0761, "encoder_q-embeddings": 20536.2441, "encoder_q-layer.0": 13656.7334, "encoder_q-layer.1": 13897.6953, "encoder_q-layer.10": 24113.3047, "encoder_q-layer.11": 49323.1484, "encoder_q-layer.2": 15041.9736, "encoder_q-layer.3": 15144.7861, "encoder_q-layer.4": 15298.5605, "encoder_q-layer.5": 14820.3223, "encoder_q-layer.6": 15757.7725, "encoder_q-layer.7": 17245.0996, "encoder_q-layer.8": 19341.4336, "encoder_q-layer.9": 18198.6953, "epoch": 0.21, "inbatch_neg_score": 48.5733, "inbatch_pos_score": 49.0625, "learning_rate": 4.35e-05, "loss": 2.4455, "norm_diff": 0.0203, "num_tokens_overlap": 5.5789, "num_tokens_union": 54.9461, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30488.327, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0558, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7176, "sent_len_1": 66.7053, "sent_len_max_0": 18.8962, "sent_len_max_1": 188.6213, "stdk": 0.0414, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 21700 }, { "accuracy": 54.2969, "doc_norm": 7.0851, "encoder_q-embeddings": 18287.7461, "encoder_q-layer.0": 12896.9971, "encoder_q-layer.1": 12932.8721, "encoder_q-layer.10": 22164.9219, "encoder_q-layer.11": 40120.2031, "encoder_q-layer.2": 14164.0723, "encoder_q-layer.3": 14247.5254, "encoder_q-layer.4": 14523.123, "encoder_q-layer.5": 13913.6221, "encoder_q-layer.6": 14912.5938, "encoder_q-layer.7": 15928.6553, "encoder_q-layer.8": 18630.8047, "encoder_q-layer.9": 16472.4062, "epoch": 0.21, "inbatch_neg_score": 48.6786, "inbatch_pos_score": 49.1562, "learning_rate": 4.344444444444445e-05, "loss": 2.4891, "norm_diff": 0.0243, "num_tokens_overlap": 5.5692, "num_tokens_union": 54.9163, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27439.4219, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0607, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7038, "sent_len_1": 66.5956, "sent_len_max_0": 18.7862, "sent_len_max_1": 191.7025, "stdk": 0.0422, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 21800 }, { "accuracy": 56.1523, "doc_norm": 7.0769, "encoder_q-embeddings": 18804.3555, "encoder_q-layer.0": 12986.5674, "encoder_q-layer.1": 13405.3965, "encoder_q-layer.10": 40181.2969, "encoder_q-layer.11": 54322.1367, "encoder_q-layer.2": 14504.4004, "encoder_q-layer.3": 15006.416, "encoder_q-layer.4": 15199.251, "encoder_q-layer.5": 15219.9443, "encoder_q-layer.6": 17248.5762, "encoder_q-layer.7": 18365.7578, "encoder_q-layer.8": 21497.8594, "encoder_q-layer.9": 23675.1211, "epoch": 0.21, "inbatch_neg_score": 48.6462, "inbatch_pos_score": 49.125, "learning_rate": 4.338888888888889e-05, "loss": 2.4404, "norm_diff": 0.0216, "num_tokens_overlap": 5.5862, "num_tokens_union": 55.0059, "postclip_grad_norm": 1.0, "preclip_grad_norm": 34255.4054, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0553, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7491, "sent_len_1": 66.7673, "sent_len_max_0": 18.925, "sent_len_max_1": 190.4125, "stdk": 0.0408, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 21900 }, { "accuracy": 51.9531, "doc_norm": 7.0762, "encoder_q-embeddings": 18626.625, "encoder_q-layer.0": 12860.668, "encoder_q-layer.1": 12880.9629, "encoder_q-layer.10": 23663.1094, "encoder_q-layer.11": 49138.9922, "encoder_q-layer.2": 13769.0547, "encoder_q-layer.3": 14086.7314, "encoder_q-layer.4": 14456.125, "encoder_q-layer.5": 14230.0107, "encoder_q-layer.6": 15263.8447, "encoder_q-layer.7": 17817.4688, "encoder_q-layer.8": 19350.4512, "encoder_q-layer.9": 18103.3359, "epoch": 0.21, "inbatch_neg_score": 48.5771, "inbatch_pos_score": 49.0625, "learning_rate": 4.3333333333333334e-05, "loss": 2.4662, "norm_diff": 0.0211, "num_tokens_overlap": 5.5925, "num_tokens_union": 55.0434, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29813.633, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.055, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7452, "sent_len_1": 66.779, "sent_len_max_0": 18.7763, "sent_len_max_1": 189.3475, "stdk": 0.0413, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 22000 }, { "accuracy": 54.5898, "doc_norm": 7.0735, "encoder_q-embeddings": 19059.3652, "encoder_q-layer.0": 13117.416, "encoder_q-layer.1": 13156.9707, "encoder_q-layer.10": 25853.5703, "encoder_q-layer.11": 46120.4844, "encoder_q-layer.2": 14686.2842, "encoder_q-layer.3": 14935.4717, "encoder_q-layer.4": 15387.7979, "encoder_q-layer.5": 15389.6357, "encoder_q-layer.6": 17428.6191, "encoder_q-layer.7": 18054.6875, "encoder_q-layer.8": 19732.4004, "encoder_q-layer.9": 18991.2812, "epoch": 0.22, "inbatch_neg_score": 48.5157, "inbatch_pos_score": 49.0, "learning_rate": 4.3277777777777776e-05, "loss": 2.4482, "norm_diff": 0.0195, "num_tokens_overlap": 5.5689, "num_tokens_union": 54.7968, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30287.5663, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.054, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6868, "sent_len_1": 66.4808, "sent_len_max_0": 18.7638, "sent_len_max_1": 190.9038, "stdk": 0.0419, "stdq": 0.0402, "stdqueue_k": 0.0, "step": 22100 }, { "accuracy": 53.9062, "doc_norm": 7.0738, "encoder_q-embeddings": 20083.7734, "encoder_q-layer.0": 13710.4502, "encoder_q-layer.1": 14148.0635, "encoder_q-layer.10": 24563.4023, "encoder_q-layer.11": 42487.4766, "encoder_q-layer.2": 15303.9629, "encoder_q-layer.3": 15561.8193, "encoder_q-layer.4": 15475.0078, "encoder_q-layer.5": 14888.4492, "encoder_q-layer.6": 16113.1143, "encoder_q-layer.7": 17051.6621, "encoder_q-layer.8": 19776.1016, "encoder_q-layer.9": 18219.7461, "epoch": 0.22, "inbatch_neg_score": 48.4945, "inbatch_pos_score": 49.0, "learning_rate": 4.3222222222222226e-05, "loss": 2.4357, "norm_diff": 0.0232, "num_tokens_overlap": 5.5789, "num_tokens_union": 54.9966, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29353.8098, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0507, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7314, "sent_len_1": 66.7681, "sent_len_max_0": 18.8762, "sent_len_max_1": 191.2463, "stdk": 0.0416, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 22200 }, { "accuracy": 53.3203, "doc_norm": 7.0713, "encoder_q-embeddings": 19995.7715, "encoder_q-layer.0": 13554.749, "encoder_q-layer.1": 13803.1191, "encoder_q-layer.10": 27122.4414, "encoder_q-layer.11": 49953.1836, "encoder_q-layer.2": 14946.7432, "encoder_q-layer.3": 14981.1455, "encoder_q-layer.4": 15539.6572, "encoder_q-layer.5": 15431.8818, "encoder_q-layer.6": 16690.7969, "encoder_q-layer.7": 17815.1836, "encoder_q-layer.8": 20426.9258, "encoder_q-layer.9": 19598.0137, "epoch": 0.22, "inbatch_neg_score": 48.4443, "inbatch_pos_score": 48.9375, "learning_rate": 4.316666666666667e-05, "loss": 2.4955, "norm_diff": 0.0247, "num_tokens_overlap": 5.5702, "num_tokens_union": 54.9773, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30923.6342, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0466, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7046, "sent_len_1": 66.788, "sent_len_max_0": 18.9075, "sent_len_max_1": 191.0225, "stdk": 0.0432, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 22300 }, { "accuracy": 53.5156, "doc_norm": 7.0611, "encoder_q-embeddings": 19512.5156, "encoder_q-layer.0": 13326.4531, "encoder_q-layer.1": 12829.6309, "encoder_q-layer.10": 25221.5273, "encoder_q-layer.11": 44342.4492, "encoder_q-layer.2": 13724.3271, "encoder_q-layer.3": 13664.5098, "encoder_q-layer.4": 13972.4365, "encoder_q-layer.5": 13618.4482, "encoder_q-layer.6": 14861.6064, "encoder_q-layer.7": 16480.8027, "encoder_q-layer.8": 18237.4277, "encoder_q-layer.9": 17054.9766, "epoch": 0.22, "inbatch_neg_score": 48.3987, "inbatch_pos_score": 48.875, "learning_rate": 4.311111111111111e-05, "loss": 2.4696, "norm_diff": 0.0193, "num_tokens_overlap": 5.577, "num_tokens_union": 55.1599, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28813.3221, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0418, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.718, "sent_len_1": 66.9794, "sent_len_max_0": 18.825, "sent_len_max_1": 189.42, "stdk": 0.042, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 22400 }, { "accuracy": 55.957, "doc_norm": 7.0635, "encoder_q-embeddings": 18747.7383, "encoder_q-layer.0": 12757.335, "encoder_q-layer.1": 12966.916, "encoder_q-layer.10": 25658.3145, "encoder_q-layer.11": 43766.3867, "encoder_q-layer.2": 14317.792, "encoder_q-layer.3": 14219.5322, "encoder_q-layer.4": 14374.9375, "encoder_q-layer.5": 13880.3574, "encoder_q-layer.6": 15364.7793, "encoder_q-layer.7": 16090.6807, "encoder_q-layer.8": 18161.2285, "encoder_q-layer.9": 18074.4707, "epoch": 0.22, "inbatch_neg_score": 48.4441, "inbatch_pos_score": 48.9062, "learning_rate": 4.305555555555556e-05, "loss": 2.4781, "norm_diff": 0.0181, "num_tokens_overlap": 5.5723, "num_tokens_union": 55.1306, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28701.621, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0453, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6948, "sent_len_1": 66.9562, "sent_len_max_0": 18.7625, "sent_len_max_1": 188.9588, "stdk": 0.0398, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 22500 }, { "accuracy": 53.6133, "doc_norm": 7.0673, "encoder_q-embeddings": 20378.873, "encoder_q-layer.0": 13925.8076, "encoder_q-layer.1": 13929.3203, "encoder_q-layer.10": 23970.4238, "encoder_q-layer.11": 45371.3008, "encoder_q-layer.2": 15235.7568, "encoder_q-layer.3": 15014.4268, "encoder_q-layer.4": 15558.9072, "encoder_q-layer.5": 14924.1494, "encoder_q-layer.6": 15785.6719, "encoder_q-layer.7": 17077.6758, "encoder_q-layer.8": 19763.9629, "encoder_q-layer.9": 17941.5879, "epoch": 0.22, "inbatch_neg_score": 48.4513, "inbatch_pos_score": 48.9375, "learning_rate": 4.3e-05, "loss": 2.4528, "norm_diff": 0.0217, "num_tokens_overlap": 5.5758, "num_tokens_union": 55.0431, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29774.9939, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0456, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7305, "sent_len_1": 66.8374, "sent_len_max_0": 18.905, "sent_len_max_1": 189.0125, "stdk": 0.0422, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 22600 }, { "accuracy": 52.2461, "doc_norm": 7.0662, "encoder_q-embeddings": 19530.7637, "encoder_q-layer.0": 13057.8994, "encoder_q-layer.1": 13373.4785, "encoder_q-layer.10": 22064.4258, "encoder_q-layer.11": 43256.3242, "encoder_q-layer.2": 14532.7363, "encoder_q-layer.3": 14741.9688, "encoder_q-layer.4": 15358.6221, "encoder_q-layer.5": 14877.1396, "encoder_q-layer.6": 16267.8604, "encoder_q-layer.7": 17617.4863, "encoder_q-layer.8": 19637.9121, "encoder_q-layer.9": 17705.0547, "epoch": 0.22, "inbatch_neg_score": 48.4485, "inbatch_pos_score": 48.9062, "learning_rate": 4.294444444444445e-05, "loss": 2.4673, "norm_diff": 0.0199, "num_tokens_overlap": 5.5702, "num_tokens_union": 54.9567, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28669.2957, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0463, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6855, "sent_len_1": 66.7213, "sent_len_max_0": 18.8188, "sent_len_max_1": 188.7012, "stdk": 0.0413, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 22700 }, { "accuracy": 52.1484, "doc_norm": 7.0622, "encoder_q-embeddings": 19077.0547, "encoder_q-layer.0": 13106.8037, "encoder_q-layer.1": 13074.085, "encoder_q-layer.10": 20978.8066, "encoder_q-layer.11": 42005.5898, "encoder_q-layer.2": 14043.7725, "encoder_q-layer.3": 14355.7393, "encoder_q-layer.4": 14910.1562, "encoder_q-layer.5": 14431.1133, "encoder_q-layer.6": 15288.6748, "encoder_q-layer.7": 16822.4316, "encoder_q-layer.8": 17902.1133, "encoder_q-layer.9": 16439.1113, "epoch": 0.22, "inbatch_neg_score": 48.3889, "inbatch_pos_score": 48.875, "learning_rate": 4.2888888888888886e-05, "loss": 2.4266, "norm_diff": 0.0218, "num_tokens_overlap": 5.5867, "num_tokens_union": 55.1371, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27945.9553, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0404, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7512, "sent_len_1": 66.9478, "sent_len_max_0": 18.9737, "sent_len_max_1": 190.0513, "stdk": 0.0424, "stdq": 0.0398, "stdqueue_k": 0.0, "step": 22800 }, { "accuracy": 53.6133, "doc_norm": 7.0664, "encoder_q-embeddings": 18717.7754, "encoder_q-layer.0": 13002.6963, "encoder_q-layer.1": 13099.4238, "encoder_q-layer.10": 26524.3516, "encoder_q-layer.11": 41888.8984, "encoder_q-layer.2": 14400.4883, "encoder_q-layer.3": 14274.6836, "encoder_q-layer.4": 14240.6328, "encoder_q-layer.5": 13910.2617, "encoder_q-layer.6": 15091.7061, "encoder_q-layer.7": 15996.043, "encoder_q-layer.8": 18969.4863, "encoder_q-layer.9": 18224.5684, "epoch": 0.22, "inbatch_neg_score": 48.442, "inbatch_pos_score": 48.9375, "learning_rate": 4.2833333333333335e-05, "loss": 2.3919, "norm_diff": 0.0231, "num_tokens_overlap": 5.5886, "num_tokens_union": 55.0329, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28384.3974, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0433, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7183, "sent_len_1": 66.8313, "sent_len_max_0": 18.8062, "sent_len_max_1": 189.5062, "stdk": 0.0422, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 22900 }, { "accuracy": 53.5156, "doc_norm": 7.0721, "encoder_q-embeddings": 20047.834, "encoder_q-layer.0": 13400.0703, "encoder_q-layer.1": 13548.0625, "encoder_q-layer.10": 29772.0977, "encoder_q-layer.11": 48557.8594, "encoder_q-layer.2": 14758.6104, "encoder_q-layer.3": 15022.6738, "encoder_q-layer.4": 15429.29, "encoder_q-layer.5": 14310.1533, "encoder_q-layer.6": 15608.2295, "encoder_q-layer.7": 17169.8789, "encoder_q-layer.8": 19191.2637, "encoder_q-layer.9": 19755.1699, "epoch": 0.22, "inbatch_neg_score": 48.4609, "inbatch_pos_score": 48.9375, "learning_rate": 4.277777777777778e-05, "loss": 2.4699, "norm_diff": 0.0233, "num_tokens_overlap": 5.5752, "num_tokens_union": 54.9256, "postclip_grad_norm": 1.0, "preclip_grad_norm": 31202.6157, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0487, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.711, "sent_len_1": 66.6552, "sent_len_max_0": 18.7737, "sent_len_max_1": 189.3388, "stdk": 0.0428, "stdq": 0.0401, "stdqueue_k": 0.0, "step": 23000 }, { "accuracy": 52.7344, "doc_norm": 7.0691, "encoder_q-embeddings": 20212.9199, "encoder_q-layer.0": 13621.6201, "encoder_q-layer.1": 14074.8965, "encoder_q-layer.10": 34075.3164, "encoder_q-layer.11": 51658.1484, "encoder_q-layer.2": 15312.2422, "encoder_q-layer.3": 15966.5205, "encoder_q-layer.4": 16857.3301, "encoder_q-layer.5": 15536.5049, "encoder_q-layer.6": 16611.2383, "encoder_q-layer.7": 17986.8887, "encoder_q-layer.8": 22037.8789, "encoder_q-layer.9": 21269.8633, "epoch": 0.23, "inbatch_neg_score": 48.4235, "inbatch_pos_score": 48.9062, "learning_rate": 4.272222222222223e-05, "loss": 2.5313, "norm_diff": 0.0222, "num_tokens_overlap": 5.584, "num_tokens_union": 54.9731, "postclip_grad_norm": 1.0, "preclip_grad_norm": 33130.0079, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0469, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7048, "sent_len_1": 66.7301, "sent_len_max_0": 18.7975, "sent_len_max_1": 189.6912, "stdk": 0.0429, "stdq": 0.0399, "stdqueue_k": 0.0, "step": 23100 }, { "accuracy": 52.9297, "doc_norm": 7.0676, "encoder_q-embeddings": 19181.0078, "encoder_q-layer.0": 13525.4326, "encoder_q-layer.1": 13545.7324, "encoder_q-layer.10": 21749.3242, "encoder_q-layer.11": 42239.6914, "encoder_q-layer.2": 14506.6045, "encoder_q-layer.3": 14719.5732, "encoder_q-layer.4": 14899.875, "encoder_q-layer.5": 14039.7275, "encoder_q-layer.6": 14975.877, "encoder_q-layer.7": 16411.6035, "encoder_q-layer.8": 18068.3262, "encoder_q-layer.9": 16900.2871, "epoch": 0.23, "inbatch_neg_score": 48.4495, "inbatch_pos_score": 48.9062, "learning_rate": 4.266666666666667e-05, "loss": 2.4493, "norm_diff": 0.0207, "num_tokens_overlap": 5.5814, "num_tokens_union": 55.1657, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28476.0201, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.047, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.743, "sent_len_1": 66.965, "sent_len_max_0": 18.84, "sent_len_max_1": 190.8063, "stdk": 0.0411, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 23200 }, { "accuracy": 53.5156, "doc_norm": 7.0718, "encoder_q-embeddings": 19723.9375, "encoder_q-layer.0": 13520.749, "encoder_q-layer.1": 13734.917, "encoder_q-layer.10": 29199.6172, "encoder_q-layer.11": 47788.3164, "encoder_q-layer.2": 14939.2715, "encoder_q-layer.3": 15402.5635, "encoder_q-layer.4": 15911.5449, "encoder_q-layer.5": 15896.5225, "encoder_q-layer.6": 16894.5195, "encoder_q-layer.7": 17498.6309, "encoder_q-layer.8": 19859.0332, "encoder_q-layer.9": 20648.4141, "epoch": 0.23, "inbatch_neg_score": 48.4647, "inbatch_pos_score": 48.9375, "learning_rate": 4.261111111111111e-05, "loss": 2.4096, "norm_diff": 0.0223, "num_tokens_overlap": 5.5896, "num_tokens_union": 54.9944, "postclip_grad_norm": 1.0, "preclip_grad_norm": 31325.5588, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0495, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7147, "sent_len_1": 66.7389, "sent_len_max_0": 18.9275, "sent_len_max_1": 188.5525, "stdk": 0.0418, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 23300 }, { "accuracy": 54.3945, "doc_norm": 7.0643, "encoder_q-embeddings": 21324.5469, "encoder_q-layer.0": 14011.4316, "encoder_q-layer.1": 14027.2471, "encoder_q-layer.10": 26539.6719, "encoder_q-layer.11": 46787.0195, "encoder_q-layer.2": 15203.8301, "encoder_q-layer.3": 15297.3486, "encoder_q-layer.4": 15962.291, "encoder_q-layer.5": 14924.8916, "encoder_q-layer.6": 15736.7178, "encoder_q-layer.7": 18551.9004, "encoder_q-layer.8": 20474.1855, "encoder_q-layer.9": 18519.6855, "epoch": 0.23, "inbatch_neg_score": 48.4016, "inbatch_pos_score": 48.875, "learning_rate": 4.255555555555556e-05, "loss": 2.4291, "norm_diff": 0.0202, "num_tokens_overlap": 5.5712, "num_tokens_union": 55.0108, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30759.8909, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0441, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7103, "sent_len_1": 66.7667, "sent_len_max_0": 18.88, "sent_len_max_1": 189.0662, "stdk": 0.0417, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 23400 }, { "accuracy": 52.5391, "doc_norm": 7.0686, "encoder_q-embeddings": 19352.5801, "encoder_q-layer.0": 13177.7471, "encoder_q-layer.1": 13640.916, "encoder_q-layer.10": 23929.2441, "encoder_q-layer.11": 45715.3203, "encoder_q-layer.2": 14232.7646, "encoder_q-layer.3": 14615.5391, "encoder_q-layer.4": 14619.5615, "encoder_q-layer.5": 14396.6738, "encoder_q-layer.6": 15797.666, "encoder_q-layer.7": 17553.5215, "encoder_q-layer.8": 19998.2598, "encoder_q-layer.9": 18497.6074, "epoch": 0.23, "inbatch_neg_score": 48.4704, "inbatch_pos_score": 48.9375, "learning_rate": 4.25e-05, "loss": 2.4718, "norm_diff": 0.0211, "num_tokens_overlap": 5.564, "num_tokens_union": 55.0333, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29302.9929, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0475, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6942, "sent_len_1": 66.8243, "sent_len_max_0": 18.8438, "sent_len_max_1": 190.9212, "stdk": 0.0408, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 23500 }, { "accuracy": 55.6641, "doc_norm": 7.0641, "encoder_q-embeddings": 18771.7051, "encoder_q-layer.0": 12813.7109, "encoder_q-layer.1": 12832.585, "encoder_q-layer.10": 22089.7793, "encoder_q-layer.11": 45845.418, "encoder_q-layer.2": 13745.2275, "encoder_q-layer.3": 13673.9805, "encoder_q-layer.4": 13877.3262, "encoder_q-layer.5": 13470.0684, "encoder_q-layer.6": 14877.168, "encoder_q-layer.7": 16192.8525, "encoder_q-layer.8": 17851.1133, "encoder_q-layer.9": 16189.6602, "epoch": 0.23, "inbatch_neg_score": 48.3955, "inbatch_pos_score": 48.875, "learning_rate": 4.2444444444444445e-05, "loss": 2.4178, "norm_diff": 0.0235, "num_tokens_overlap": 5.5829, "num_tokens_union": 54.9694, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28281.637, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0406, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7418, "sent_len_1": 66.6614, "sent_len_max_0": 18.8738, "sent_len_max_1": 191.1213, "stdk": 0.0412, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 23600 }, { "accuracy": 54.8828, "doc_norm": 7.0695, "encoder_q-embeddings": 18444.8984, "encoder_q-layer.0": 12667.9932, "encoder_q-layer.1": 12967.2236, "encoder_q-layer.10": 21750.6055, "encoder_q-layer.11": 43009.0352, "encoder_q-layer.2": 14225.1875, "encoder_q-layer.3": 14116.9268, "encoder_q-layer.4": 14444.5264, "encoder_q-layer.5": 14237.7451, "encoder_q-layer.6": 15569.2549, "encoder_q-layer.7": 17548.6973, "encoder_q-layer.8": 19881.1484, "encoder_q-layer.9": 16767.9805, "epoch": 0.23, "inbatch_neg_score": 48.4205, "inbatch_pos_score": 48.9375, "learning_rate": 4.238888888888889e-05, "loss": 2.466, "norm_diff": 0.0253, "num_tokens_overlap": 5.5881, "num_tokens_union": 55.0252, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28417.1392, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0442, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.748, "sent_len_1": 66.8067, "sent_len_max_0": 18.9262, "sent_len_max_1": 189.2138, "stdk": 0.0429, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 23700 }, { "accuracy": 56.0547, "doc_norm": 7.0621, "encoder_q-embeddings": 18159.7539, "encoder_q-layer.0": 12790.3398, "encoder_q-layer.1": 12976.5078, "encoder_q-layer.10": 21300.7344, "encoder_q-layer.11": 43927.1367, "encoder_q-layer.2": 14086.6035, "encoder_q-layer.3": 14175.3047, "encoder_q-layer.4": 14912.4863, "encoder_q-layer.5": 14030.9531, "encoder_q-layer.6": 15024.4512, "encoder_q-layer.7": 16340.4395, "encoder_q-layer.8": 19121.8086, "encoder_q-layer.9": 17137.959, "epoch": 0.23, "inbatch_neg_score": 48.3345, "inbatch_pos_score": 48.8125, "learning_rate": 4.233333333333334e-05, "loss": 2.4569, "norm_diff": 0.0229, "num_tokens_overlap": 5.5803, "num_tokens_union": 55.0064, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28224.3889, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0392, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.745, "sent_len_1": 66.6638, "sent_len_max_0": 18.8863, "sent_len_max_1": 189.1188, "stdk": 0.0424, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 23800 }, { "accuracy": 53.5156, "doc_norm": 7.06, "encoder_q-embeddings": 19881.4219, "encoder_q-layer.0": 13203.916, "encoder_q-layer.1": 13175.1621, "encoder_q-layer.10": 27580.9336, "encoder_q-layer.11": 44095.918, "encoder_q-layer.2": 14224.8203, "encoder_q-layer.3": 14586.1074, "encoder_q-layer.4": 14722.0234, "encoder_q-layer.5": 14644.4346, "encoder_q-layer.6": 15521.0645, "encoder_q-layer.7": 15699.6484, "encoder_q-layer.8": 19117.1309, "encoder_q-layer.9": 17823.5508, "epoch": 0.23, "inbatch_neg_score": 48.332, "inbatch_pos_score": 48.8125, "learning_rate": 4.227777777777778e-05, "loss": 2.4612, "norm_diff": 0.0232, "num_tokens_overlap": 5.5746, "num_tokens_union": 54.9961, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29241.2893, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0367, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7298, "sent_len_1": 66.7921, "sent_len_max_0": 18.9213, "sent_len_max_1": 189.8837, "stdk": 0.041, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 23900 }, { "accuracy": 52.9297, "doc_norm": 7.0546, "encoder_q-embeddings": 18366.2773, "encoder_q-layer.0": 13014.501, "encoder_q-layer.1": 13592.6875, "encoder_q-layer.10": 26165.9824, "encoder_q-layer.11": 42599.4922, "encoder_q-layer.2": 14702.835, "encoder_q-layer.3": 14681.6963, "encoder_q-layer.4": 15041.1416, "encoder_q-layer.5": 14845.2324, "encoder_q-layer.6": 15921.3496, "encoder_q-layer.7": 16751.7539, "encoder_q-layer.8": 18871.1953, "encoder_q-layer.9": 18169.8809, "epoch": 0.23, "inbatch_neg_score": 48.2636, "inbatch_pos_score": 48.75, "learning_rate": 4.222222222222222e-05, "loss": 2.4315, "norm_diff": 0.0224, "num_tokens_overlap": 5.5889, "num_tokens_union": 55.0427, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29074.8975, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0322, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.725, "sent_len_1": 66.812, "sent_len_max_0": 18.9563, "sent_len_max_1": 189.44, "stdk": 0.0407, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 24000 }, { "accuracy": 56.1523, "doc_norm": 7.056, "encoder_q-embeddings": 18521.5762, "encoder_q-layer.0": 12783.3271, "encoder_q-layer.1": 13544.3672, "encoder_q-layer.10": 21647.9316, "encoder_q-layer.11": 41627.168, "encoder_q-layer.2": 14229.0234, "encoder_q-layer.3": 14122.6777, "encoder_q-layer.4": 14349.6973, "encoder_q-layer.5": 14164.1855, "encoder_q-layer.6": 14823.6504, "encoder_q-layer.7": 15683.0537, "encoder_q-layer.8": 17621.9551, "encoder_q-layer.9": 16692.9453, "epoch": 0.24, "inbatch_neg_score": 48.1915, "inbatch_pos_score": 48.6875, "learning_rate": 4.216666666666667e-05, "loss": 2.4393, "norm_diff": 0.0264, "num_tokens_overlap": 5.5833, "num_tokens_union": 54.9721, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27823.7557, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0296, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7028, "sent_len_1": 66.7855, "sent_len_max_0": 18.8275, "sent_len_max_1": 189.1925, "stdk": 0.0429, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 24100 }, { "accuracy": 55.5664, "doc_norm": 7.0544, "encoder_q-embeddings": 17613.4102, "encoder_q-layer.0": 12347.417, "encoder_q-layer.1": 12443.7725, "encoder_q-layer.10": 22491.2324, "encoder_q-layer.11": 44317.8359, "encoder_q-layer.2": 13836.4463, "encoder_q-layer.3": 13761.8164, "encoder_q-layer.4": 14336.415, "encoder_q-layer.5": 13603.7197, "encoder_q-layer.6": 14519.5361, "encoder_q-layer.7": 15313.1289, "encoder_q-layer.8": 17857.4434, "encoder_q-layer.9": 16624.0469, "epoch": 0.24, "inbatch_neg_score": 48.2468, "inbatch_pos_score": 48.75, "learning_rate": 4.211111111111111e-05, "loss": 2.4592, "norm_diff": 0.0225, "num_tokens_overlap": 5.5794, "num_tokens_union": 55.2173, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27673.502, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0319, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7385, "sent_len_1": 67.0319, "sent_len_max_0": 18.8412, "sent_len_max_1": 190.1912, "stdk": 0.0414, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 24200 }, { "accuracy": 54.9805, "doc_norm": 7.0475, "encoder_q-embeddings": 17983.0566, "encoder_q-layer.0": 12734.6963, "encoder_q-layer.1": 12774.8369, "encoder_q-layer.10": 22428.0918, "encoder_q-layer.11": 40681.5234, "encoder_q-layer.2": 13684.8721, "encoder_q-layer.3": 13812.5918, "encoder_q-layer.4": 14304.4727, "encoder_q-layer.5": 13424.7773, "encoder_q-layer.6": 14784.8242, "encoder_q-layer.7": 16431.2461, "encoder_q-layer.8": 17730.9824, "encoder_q-layer.9": 16593.002, "epoch": 0.24, "inbatch_neg_score": 48.1802, "inbatch_pos_score": 48.6562, "learning_rate": 4.205555555555556e-05, "loss": 2.4604, "norm_diff": 0.0229, "num_tokens_overlap": 5.5714, "num_tokens_union": 55.1538, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27468.2786, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0246, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7248, "sent_len_1": 66.9966, "sent_len_max_0": 18.77, "sent_len_max_1": 191.9412, "stdk": 0.0416, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 24300 }, { "accuracy": 55.1758, "doc_norm": 7.0468, "encoder_q-embeddings": 20533.0684, "encoder_q-layer.0": 13413.3057, "encoder_q-layer.1": 14137.6543, "encoder_q-layer.10": 21387.5742, "encoder_q-layer.11": 40240.6289, "encoder_q-layer.2": 15426.6729, "encoder_q-layer.3": 15547.9102, "encoder_q-layer.4": 15064.3438, "encoder_q-layer.5": 14475.0391, "encoder_q-layer.6": 15587.625, "encoder_q-layer.7": 15941.251, "encoder_q-layer.8": 17923.7168, "encoder_q-layer.9": 16283.0801, "epoch": 0.24, "inbatch_neg_score": 48.1555, "inbatch_pos_score": 48.625, "learning_rate": 4.2e-05, "loss": 2.4471, "norm_diff": 0.0211, "num_tokens_overlap": 5.5761, "num_tokens_union": 54.9495, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28481.6194, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0257, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7035, "sent_len_1": 66.6859, "sent_len_max_0": 18.945, "sent_len_max_1": 188.4888, "stdk": 0.0426, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 24400 }, { "accuracy": 52.7344, "doc_norm": 7.0454, "encoder_q-embeddings": 19264.1426, "encoder_q-layer.0": 13105.5898, "encoder_q-layer.1": 13694.3496, "encoder_q-layer.10": 20775.7871, "encoder_q-layer.11": 37508.9453, "encoder_q-layer.2": 14503.2275, "encoder_q-layer.3": 14882.126, "encoder_q-layer.4": 15257.667, "encoder_q-layer.5": 14957.3027, "encoder_q-layer.6": 15344.0723, "encoder_q-layer.7": 15419.9609, "encoder_q-layer.8": 16867.7637, "encoder_q-layer.9": 16016.3623, "epoch": 0.24, "inbatch_neg_score": 48.169, "inbatch_pos_score": 48.625, "learning_rate": 4.194444444444445e-05, "loss": 2.4386, "norm_diff": 0.0208, "num_tokens_overlap": 5.5828, "num_tokens_union": 54.9848, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27041.834, "preclip_grad_norm_avg": 0.0002, "query_norm": 7.0247, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7085, "sent_len_1": 66.7413, "sent_len_max_0": 18.8738, "sent_len_max_1": 190.0913, "stdk": 0.0419, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 24500 }, { "accuracy": 54.3945, "doc_norm": 7.0427, "encoder_q-embeddings": 18619.5234, "encoder_q-layer.0": 13213.4473, "encoder_q-layer.1": 13548.4619, "encoder_q-layer.10": 23587.6582, "encoder_q-layer.11": 48367.4492, "encoder_q-layer.2": 14119.1504, "encoder_q-layer.3": 14096.0713, "encoder_q-layer.4": 14388.998, "encoder_q-layer.5": 13822.9199, "encoder_q-layer.6": 15613.7793, "encoder_q-layer.7": 16410.1055, "encoder_q-layer.8": 19665.0566, "encoder_q-layer.9": 17370.0781, "epoch": 0.24, "inbatch_neg_score": 48.0932, "inbatch_pos_score": 48.5938, "learning_rate": 4.188888888888889e-05, "loss": 2.4116, "norm_diff": 0.0195, "num_tokens_overlap": 5.5793, "num_tokens_union": 54.968, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29539.8894, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0232, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7024, "sent_len_1": 66.7517, "sent_len_max_0": 18.82, "sent_len_max_1": 192.095, "stdk": 0.041, "stdq": 0.04, "stdqueue_k": 0.0, "step": 24600 }, { "accuracy": 54.1016, "doc_norm": 7.0462, "encoder_q-embeddings": 19690.5273, "encoder_q-layer.0": 12865.1416, "encoder_q-layer.1": 13155.3906, "encoder_q-layer.10": 21004.6719, "encoder_q-layer.11": 42846.3438, "encoder_q-layer.2": 14107.6436, "encoder_q-layer.3": 14448.9941, "encoder_q-layer.4": 14640.0029, "encoder_q-layer.5": 14318.2412, "encoder_q-layer.6": 15335.0742, "encoder_q-layer.7": 17830.0938, "encoder_q-layer.8": 17652.9531, "encoder_q-layer.9": 16524.3262, "epoch": 0.24, "inbatch_neg_score": 48.1787, "inbatch_pos_score": 48.6562, "learning_rate": 4.183333333333334e-05, "loss": 2.4091, "norm_diff": 0.0211, "num_tokens_overlap": 5.5892, "num_tokens_union": 55.0539, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28265.7428, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0251, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.712, "sent_len_1": 66.9029, "sent_len_max_0": 18.79, "sent_len_max_1": 189.175, "stdk": 0.041, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 24700 }, { "accuracy": 54.5898, "doc_norm": 7.0427, "encoder_q-embeddings": 19153.6777, "encoder_q-layer.0": 13079.458, "encoder_q-layer.1": 13602.4297, "encoder_q-layer.10": 24462.6367, "encoder_q-layer.11": 40341.1992, "encoder_q-layer.2": 14257.7725, "encoder_q-layer.3": 14297.9189, "encoder_q-layer.4": 14791.9932, "encoder_q-layer.5": 14618.708, "encoder_q-layer.6": 15029.8691, "encoder_q-layer.7": 16654.8555, "encoder_q-layer.8": 18592.0625, "encoder_q-layer.9": 17762.3047, "epoch": 0.24, "inbatch_neg_score": 48.1108, "inbatch_pos_score": 48.5938, "learning_rate": 4.177777777777778e-05, "loss": 2.4611, "norm_diff": 0.021, "num_tokens_overlap": 5.5853, "num_tokens_union": 55.025, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28265.5345, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0218, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7264, "sent_len_1": 66.8052, "sent_len_max_0": 18.8062, "sent_len_max_1": 188.1113, "stdk": 0.0414, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 24800 }, { "accuracy": 56.543, "doc_norm": 7.0396, "encoder_q-embeddings": 19090.1035, "encoder_q-layer.0": 13387.7148, "encoder_q-layer.1": 13585.7139, "encoder_q-layer.10": 21778.709, "encoder_q-layer.11": 39322.9453, "encoder_q-layer.2": 14333.5352, "encoder_q-layer.3": 14246.582, "encoder_q-layer.4": 14383.9658, "encoder_q-layer.5": 14117.0508, "encoder_q-layer.6": 15056.5752, "encoder_q-layer.7": 16234.3096, "encoder_q-layer.8": 17930.543, "encoder_q-layer.9": 17002.0977, "epoch": 0.24, "inbatch_neg_score": 48.0275, "inbatch_pos_score": 48.5312, "learning_rate": 4.172222222222222e-05, "loss": 2.4762, "norm_diff": 0.0231, "num_tokens_overlap": 5.5532, "num_tokens_union": 54.9927, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27659.7364, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0165, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6679, "sent_len_1": 66.8372, "sent_len_max_0": 18.7763, "sent_len_max_1": 190.6287, "stdk": 0.0416, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 24900 }, { "accuracy": 57.7148, "doc_norm": 7.0435, "encoder_q-embeddings": 19465.1113, "encoder_q-layer.0": 13574.3203, "encoder_q-layer.1": 13473.6875, "encoder_q-layer.10": 24567.7246, "encoder_q-layer.11": 43885.6289, "encoder_q-layer.2": 14485.7383, "encoder_q-layer.3": 14481.6104, "encoder_q-layer.4": 14759.0049, "encoder_q-layer.5": 14187.9014, "encoder_q-layer.6": 15654.9658, "encoder_q-layer.7": 16939.9199, "encoder_q-layer.8": 19501.7871, "encoder_q-layer.9": 17868.0957, "epoch": 0.24, "inbatch_neg_score": 48.0544, "inbatch_pos_score": 48.5625, "learning_rate": 4.166666666666667e-05, "loss": 2.4278, "norm_diff": 0.0232, "num_tokens_overlap": 5.5816, "num_tokens_union": 55.0167, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28927.1448, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0203, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7011, "sent_len_1": 66.8361, "sent_len_max_0": 18.92, "sent_len_max_1": 191.47, "stdk": 0.0435, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 25000 }, { "accuracy": 54.2969, "doc_norm": 7.0455, "encoder_q-embeddings": 17792.666, "encoder_q-layer.0": 12599.6797, "encoder_q-layer.1": 12626.2461, "encoder_q-layer.10": 23108.5215, "encoder_q-layer.11": 42051.3242, "encoder_q-layer.2": 13676.9688, "encoder_q-layer.3": 13977.3848, "encoder_q-layer.4": 15055.2871, "encoder_q-layer.5": 14003.2031, "encoder_q-layer.6": 14698.2363, "encoder_q-layer.7": 16650.707, "encoder_q-layer.8": 19475.7754, "encoder_q-layer.9": 16763.1035, "epoch": 0.25, "inbatch_neg_score": 48.1358, "inbatch_pos_score": 48.5938, "learning_rate": 4.1611111111111114e-05, "loss": 2.4343, "norm_diff": 0.0232, "num_tokens_overlap": 5.5719, "num_tokens_union": 55.0455, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28114.6321, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0223, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.715, "sent_len_1": 66.8312, "sent_len_max_0": 18.9112, "sent_len_max_1": 190.88, "stdk": 0.0416, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 25100 }, { "accuracy": 57.6172, "doc_norm": 7.049, "encoder_q-embeddings": 17918.252, "encoder_q-layer.0": 12331.4678, "encoder_q-layer.1": 12496.5938, "encoder_q-layer.10": 21540.0586, "encoder_q-layer.11": 38962.7461, "encoder_q-layer.2": 13569.8164, "encoder_q-layer.3": 13788.7607, "encoder_q-layer.4": 14312.4961, "encoder_q-layer.5": 13719.8779, "encoder_q-layer.6": 14941.3838, "encoder_q-layer.7": 16001.6758, "encoder_q-layer.8": 18050.1836, "encoder_q-layer.9": 16955.5332, "epoch": 0.25, "inbatch_neg_score": 48.1242, "inbatch_pos_score": 48.625, "learning_rate": 4.155555555555556e-05, "loss": 2.4485, "norm_diff": 0.0245, "num_tokens_overlap": 5.5886, "num_tokens_union": 54.9685, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26876.5742, "preclip_grad_norm_avg": 0.0002, "query_norm": 7.0245, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7329, "sent_len_1": 66.7196, "sent_len_max_0": 18.8575, "sent_len_max_1": 190.015, "stdk": 0.0419, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 25200 }, { "accuracy": 53.418, "doc_norm": 7.0419, "encoder_q-embeddings": 19286.1309, "encoder_q-layer.0": 12787.4346, "encoder_q-layer.1": 13038.0439, "encoder_q-layer.10": 22982.0391, "encoder_q-layer.11": 42930.2188, "encoder_q-layer.2": 13940.5615, "encoder_q-layer.3": 14069.9111, "encoder_q-layer.4": 14475.1387, "encoder_q-layer.5": 14118.0918, "encoder_q-layer.6": 15225.4697, "encoder_q-layer.7": 16434.457, "encoder_q-layer.8": 18350.8242, "encoder_q-layer.9": 16828.6406, "epoch": 0.25, "inbatch_neg_score": 48.0699, "inbatch_pos_score": 48.5625, "learning_rate": 4.15e-05, "loss": 2.3925, "norm_diff": 0.0245, "num_tokens_overlap": 5.585, "num_tokens_union": 55.1072, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28745.7512, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0173, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7303, "sent_len_1": 66.887, "sent_len_max_0": 18.87, "sent_len_max_1": 189.8525, "stdk": 0.0414, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 25300 }, { "accuracy": 54.3945, "doc_norm": 7.0478, "encoder_q-embeddings": 18250.998, "encoder_q-layer.0": 12924.502, "encoder_q-layer.1": 12947.2998, "encoder_q-layer.10": 21036.1113, "encoder_q-layer.11": 42701.4023, "encoder_q-layer.2": 14170.5166, "encoder_q-layer.3": 14238.4824, "encoder_q-layer.4": 14411.0928, "encoder_q-layer.5": 14224.167, "encoder_q-layer.6": 15121.3428, "encoder_q-layer.7": 15809.6914, "encoder_q-layer.8": 17509.1172, "encoder_q-layer.9": 16847.8125, "epoch": 0.25, "inbatch_neg_score": 48.1309, "inbatch_pos_score": 48.625, "learning_rate": 4.144444444444445e-05, "loss": 2.4366, "norm_diff": 0.0262, "num_tokens_overlap": 5.5857, "num_tokens_union": 55.0595, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27595.9819, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0216, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7454, "sent_len_1": 66.8704, "sent_len_max_0": 18.8475, "sent_len_max_1": 190.8988, "stdk": 0.0419, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 25400 }, { "accuracy": 55.3711, "doc_norm": 7.0465, "encoder_q-embeddings": 19273.707, "encoder_q-layer.0": 13517.8281, "encoder_q-layer.1": 13647.8027, "encoder_q-layer.10": 20704.2578, "encoder_q-layer.11": 42315.9297, "encoder_q-layer.2": 14713.9863, "encoder_q-layer.3": 14859.6504, "encoder_q-layer.4": 15118.334, "encoder_q-layer.5": 14482.0303, "encoder_q-layer.6": 14809.6787, "encoder_q-layer.7": 16235.623, "encoder_q-layer.8": 17034.7734, "encoder_q-layer.9": 16278.0586, "epoch": 0.25, "inbatch_neg_score": 48.1089, "inbatch_pos_score": 48.625, "learning_rate": 4.138888888888889e-05, "loss": 2.4449, "norm_diff": 0.0242, "num_tokens_overlap": 5.5749, "num_tokens_union": 54.9288, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28619.3244, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0223, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7079, "sent_len_1": 66.6582, "sent_len_max_0": 18.83, "sent_len_max_1": 189.7012, "stdk": 0.0421, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 25500 }, { "accuracy": 55.0781, "doc_norm": 7.0469, "encoder_q-embeddings": 17859.0273, "encoder_q-layer.0": 12556.5215, "encoder_q-layer.1": 12811.6875, "encoder_q-layer.10": 22058.9297, "encoder_q-layer.11": 40185.3984, "encoder_q-layer.2": 13761.7363, "encoder_q-layer.3": 14122.3428, "encoder_q-layer.4": 14498.5791, "encoder_q-layer.5": 14092.0107, "encoder_q-layer.6": 14829.5508, "encoder_q-layer.7": 15890.7197, "encoder_q-layer.8": 18211.1289, "encoder_q-layer.9": 16496.627, "epoch": 0.25, "inbatch_neg_score": 48.138, "inbatch_pos_score": 48.625, "learning_rate": 4.133333333333333e-05, "loss": 2.4269, "norm_diff": 0.0221, "num_tokens_overlap": 5.5849, "num_tokens_union": 55.0085, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26985.2967, "preclip_grad_norm_avg": 0.0002, "query_norm": 7.0247, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7268, "sent_len_1": 66.6907, "sent_len_max_0": 18.8313, "sent_len_max_1": 187.0538, "stdk": 0.0428, "stdq": 0.0402, "stdqueue_k": 0.0, "step": 25600 }, { "accuracy": 55.0781, "doc_norm": 7.0486, "encoder_q-embeddings": 19107.1699, "encoder_q-layer.0": 13253.9443, "encoder_q-layer.1": 13567.5752, "encoder_q-layer.10": 21832.5605, "encoder_q-layer.11": 42700.9883, "encoder_q-layer.2": 14860.7295, "encoder_q-layer.3": 14800.6572, "encoder_q-layer.4": 15098.1885, "encoder_q-layer.5": 14389.7227, "encoder_q-layer.6": 15883.4619, "encoder_q-layer.7": 16155.6367, "encoder_q-layer.8": 18547.7617, "encoder_q-layer.9": 17091.9512, "epoch": 0.25, "inbatch_neg_score": 48.1729, "inbatch_pos_score": 48.6562, "learning_rate": 4.127777777777778e-05, "loss": 2.4109, "norm_diff": 0.0218, "num_tokens_overlap": 5.5847, "num_tokens_union": 54.9674, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28428.3337, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0267, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7026, "sent_len_1": 66.7576, "sent_len_max_0": 18.8625, "sent_len_max_1": 188.9062, "stdk": 0.0408, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 25700 }, { "accuracy": 52.0508, "doc_norm": 7.0424, "encoder_q-embeddings": 18392.6367, "encoder_q-layer.0": 13073.1162, "encoder_q-layer.1": 13828.8135, "encoder_q-layer.10": 31172.7168, "encoder_q-layer.11": 48673.8945, "encoder_q-layer.2": 14738.4131, "encoder_q-layer.3": 14641.3916, "encoder_q-layer.4": 15166.6562, "encoder_q-layer.5": 14723.5137, "encoder_q-layer.6": 16478.0254, "encoder_q-layer.7": 17324.5039, "encoder_q-layer.8": 20757.2363, "encoder_q-layer.9": 19851.9863, "epoch": 0.25, "inbatch_neg_score": 48.1065, "inbatch_pos_score": 48.5625, "learning_rate": 4.1222222222222224e-05, "loss": 2.462, "norm_diff": 0.0214, "num_tokens_overlap": 5.5705, "num_tokens_union": 54.9555, "postclip_grad_norm": 1.0, "preclip_grad_norm": 31188.6584, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.021, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7201, "sent_len_1": 66.7236, "sent_len_max_0": 18.885, "sent_len_max_1": 189.3413, "stdk": 0.0415, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 25800 }, { "accuracy": 56.8359, "doc_norm": 7.0442, "encoder_q-embeddings": 18750.0176, "encoder_q-layer.0": 13068.5654, "encoder_q-layer.1": 13309.4736, "encoder_q-layer.10": 20729.2695, "encoder_q-layer.11": 40695.3164, "encoder_q-layer.2": 14805.7393, "encoder_q-layer.3": 14775.2598, "encoder_q-layer.4": 14582.8311, "encoder_q-layer.5": 14125.916, "encoder_q-layer.6": 15893.5137, "encoder_q-layer.7": 16154.4678, "encoder_q-layer.8": 18341.002, "encoder_q-layer.9": 17049.7383, "epoch": 0.25, "inbatch_neg_score": 48.0772, "inbatch_pos_score": 48.5625, "learning_rate": 4.116666666666667e-05, "loss": 2.4339, "norm_diff": 0.0228, "num_tokens_overlap": 5.5922, "num_tokens_union": 55.0221, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27991.5157, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0215, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7642, "sent_len_1": 66.7379, "sent_len_max_0": 18.8012, "sent_len_max_1": 188.43, "stdk": 0.0424, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 25900 }, { "accuracy": 55.7617, "doc_norm": 7.047, "encoder_q-embeddings": 18857.2012, "encoder_q-layer.0": 12922.4453, "encoder_q-layer.1": 13286.1875, "encoder_q-layer.10": 22726.3613, "encoder_q-layer.11": 38965.957, "encoder_q-layer.2": 14277.8096, "encoder_q-layer.3": 14229.5508, "encoder_q-layer.4": 14359.3066, "encoder_q-layer.5": 14606.5537, "encoder_q-layer.6": 16009.6484, "encoder_q-layer.7": 16714.5957, "encoder_q-layer.8": 18741.0234, "encoder_q-layer.9": 17257.5078, "epoch": 0.25, "inbatch_neg_score": 48.0675, "inbatch_pos_score": 48.5625, "learning_rate": 4.111111111111111e-05, "loss": 2.4743, "norm_diff": 0.0294, "num_tokens_overlap": 5.573, "num_tokens_union": 54.9857, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27888.4399, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0176, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7239, "sent_len_1": 66.7079, "sent_len_max_0": 18.8212, "sent_len_max_1": 189.0775, "stdk": 0.0433, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 26000 }, { "accuracy": 54.2969, "doc_norm": 7.0312, "encoder_q-embeddings": 18771.6152, "encoder_q-layer.0": 12944.8926, "encoder_q-layer.1": 13467.6729, "encoder_q-layer.10": 25571.1797, "encoder_q-layer.11": 42376.8984, "encoder_q-layer.2": 14257.1875, "encoder_q-layer.3": 14893.8438, "encoder_q-layer.4": 14653.6191, "encoder_q-layer.5": 14308.0498, "encoder_q-layer.6": 14969.208, "encoder_q-layer.7": 15968.8682, "encoder_q-layer.8": 18995.2539, "encoder_q-layer.9": 17118.9551, "epoch": 0.25, "inbatch_neg_score": 47.8787, "inbatch_pos_score": 48.375, "learning_rate": 4.105555555555556e-05, "loss": 2.4632, "norm_diff": 0.0251, "num_tokens_overlap": 5.5744, "num_tokens_union": 54.9239, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28426.1219, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0061, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7272, "sent_len_1": 66.6245, "sent_len_max_0": 18.8938, "sent_len_max_1": 188.6, "stdk": 0.0426, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 26100 }, { "accuracy": 54.1992, "doc_norm": 7.0225, "encoder_q-embeddings": 18085.502, "encoder_q-layer.0": 12610.3594, "encoder_q-layer.1": 13134.792, "encoder_q-layer.10": 23851.207, "encoder_q-layer.11": 41893.9141, "encoder_q-layer.2": 14147.8037, "encoder_q-layer.3": 13890.5088, "encoder_q-layer.4": 14094.4238, "encoder_q-layer.5": 13473.2148, "encoder_q-layer.6": 14400.6006, "encoder_q-layer.7": 15052.4385, "encoder_q-layer.8": 17693.627, "encoder_q-layer.9": 17042.7793, "epoch": 0.26, "inbatch_neg_score": 47.8433, "inbatch_pos_score": 48.3125, "learning_rate": 4.1e-05, "loss": 2.3859, "norm_diff": 0.0226, "num_tokens_overlap": 5.5738, "num_tokens_union": 54.9387, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27459.9772, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9999, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7336, "sent_len_1": 66.6917, "sent_len_max_0": 18.825, "sent_len_max_1": 191.4575, "stdk": 0.0413, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 26200 }, { "accuracy": 52.9297, "doc_norm": 7.0233, "encoder_q-embeddings": 19462.4629, "encoder_q-layer.0": 12949.4844, "encoder_q-layer.1": 13188.5693, "encoder_q-layer.10": 26264.0918, "encoder_q-layer.11": 42716.0742, "encoder_q-layer.2": 14516.5625, "encoder_q-layer.3": 15287.9258, "encoder_q-layer.4": 15519.7148, "encoder_q-layer.5": 14794.1494, "encoder_q-layer.6": 15787.749, "encoder_q-layer.7": 17343.1777, "encoder_q-layer.8": 19413.3789, "encoder_q-layer.9": 18216.3242, "epoch": 0.26, "inbatch_neg_score": 47.8237, "inbatch_pos_score": 48.3125, "learning_rate": 4.094444444444445e-05, "loss": 2.4007, "norm_diff": 0.0217, "num_tokens_overlap": 5.5836, "num_tokens_union": 55.0433, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29470.4251, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0017, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7234, "sent_len_1": 66.8757, "sent_len_max_0": 18.8637, "sent_len_max_1": 189.6788, "stdk": 0.0413, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 26300 }, { "accuracy": 54.0039, "doc_norm": 7.0317, "encoder_q-embeddings": 19041.8633, "encoder_q-layer.0": 12865.4492, "encoder_q-layer.1": 13799.8428, "encoder_q-layer.10": 25963.7539, "encoder_q-layer.11": 48421.293, "encoder_q-layer.2": 15128.5459, "encoder_q-layer.3": 15098.1104, "encoder_q-layer.4": 15172.9561, "encoder_q-layer.5": 15191.9775, "encoder_q-layer.6": 16923.7227, "encoder_q-layer.7": 18310.9141, "encoder_q-layer.8": 21856.3828, "encoder_q-layer.9": 19483.3848, "epoch": 0.26, "inbatch_neg_score": 47.8643, "inbatch_pos_score": 48.3438, "learning_rate": 4.088888888888889e-05, "loss": 2.4038, "norm_diff": 0.0262, "num_tokens_overlap": 5.5723, "num_tokens_union": 55.0519, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30236.1829, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0054, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.729, "sent_len_1": 66.8004, "sent_len_max_0": 18.7662, "sent_len_max_1": 188.165, "stdk": 0.0431, "stdq": 0.0402, "stdqueue_k": 0.0, "step": 26400 }, { "accuracy": 54.5898, "doc_norm": 7.0288, "encoder_q-embeddings": 19544.666, "encoder_q-layer.0": 13581.8848, "encoder_q-layer.1": 14042.1514, "encoder_q-layer.10": 24853.3789, "encoder_q-layer.11": 43978.6875, "encoder_q-layer.2": 15242.9121, "encoder_q-layer.3": 15422.625, "encoder_q-layer.4": 16282.2324, "encoder_q-layer.5": 15010.0889, "encoder_q-layer.6": 15988.3047, "encoder_q-layer.7": 16834.1406, "encoder_q-layer.8": 18605.4531, "encoder_q-layer.9": 18468.6836, "epoch": 0.26, "inbatch_neg_score": 47.8291, "inbatch_pos_score": 48.3438, "learning_rate": 4.0833333333333334e-05, "loss": 2.3942, "norm_diff": 0.0226, "num_tokens_overlap": 5.5704, "num_tokens_union": 54.9316, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29503.7477, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0062, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7032, "sent_len_1": 66.5883, "sent_len_max_0": 18.9062, "sent_len_max_1": 186.7525, "stdk": 0.0435, "stdq": 0.0399, "stdqueue_k": 0.0, "step": 26500 }, { "accuracy": 54.1016, "doc_norm": 7.0295, "encoder_q-embeddings": 18469.4824, "encoder_q-layer.0": 12861.3994, "encoder_q-layer.1": 12986.123, "encoder_q-layer.10": 22082.0762, "encoder_q-layer.11": 41345.3828, "encoder_q-layer.2": 14113.6846, "encoder_q-layer.3": 14331.8545, "encoder_q-layer.4": 15058.4209, "encoder_q-layer.5": 14757.5176, "encoder_q-layer.6": 16135.0312, "encoder_q-layer.7": 18239.5801, "encoder_q-layer.8": 20241.4395, "encoder_q-layer.9": 16992.8242, "epoch": 0.26, "inbatch_neg_score": 47.8928, "inbatch_pos_score": 48.375, "learning_rate": 4.0777777777777783e-05, "loss": 2.3919, "norm_diff": 0.023, "num_tokens_overlap": 5.582, "num_tokens_union": 54.851, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28330.6031, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0065, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7348, "sent_len_1": 66.4834, "sent_len_max_0": 18.9612, "sent_len_max_1": 188.8525, "stdk": 0.0418, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 26600 }, { "accuracy": 53.3203, "doc_norm": 7.0304, "encoder_q-embeddings": 19524.0332, "encoder_q-layer.0": 13581.625, "encoder_q-layer.1": 13689.3193, "encoder_q-layer.10": 28156.832, "encoder_q-layer.11": 48600.8711, "encoder_q-layer.2": 14920.6553, "encoder_q-layer.3": 15261.8984, "encoder_q-layer.4": 15848.1475, "encoder_q-layer.5": 14914.9307, "encoder_q-layer.6": 16005.2354, "encoder_q-layer.7": 17780.291, "encoder_q-layer.8": 20283.9785, "encoder_q-layer.9": 19176.8203, "epoch": 0.26, "inbatch_neg_score": 47.807, "inbatch_pos_score": 48.3125, "learning_rate": 4.0722222222222226e-05, "loss": 2.4, "norm_diff": 0.0275, "num_tokens_overlap": 5.5775, "num_tokens_union": 54.8778, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30943.3659, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0029, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6947, "sent_len_1": 66.6133, "sent_len_max_0": 18.9138, "sent_len_max_1": 188.7088, "stdk": 0.043, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 26700 }, { "accuracy": 57.4219, "doc_norm": 7.0247, "encoder_q-embeddings": 18964.0488, "encoder_q-layer.0": 12736.916, "encoder_q-layer.1": 12980.1709, "encoder_q-layer.10": 26561.3066, "encoder_q-layer.11": 45945.5078, "encoder_q-layer.2": 14105.9922, "encoder_q-layer.3": 14183.3184, "encoder_q-layer.4": 14736.2617, "encoder_q-layer.5": 14126.75, "encoder_q-layer.6": 16688.1953, "encoder_q-layer.7": 17740.7969, "encoder_q-layer.8": 20205.7793, "encoder_q-layer.9": 18660.8262, "epoch": 0.26, "inbatch_neg_score": 47.8189, "inbatch_pos_score": 48.3125, "learning_rate": 4.066666666666667e-05, "loss": 2.4268, "norm_diff": 0.0242, "num_tokens_overlap": 5.5708, "num_tokens_union": 54.8963, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29577.9776, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0006, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7209, "sent_len_1": 66.6288, "sent_len_max_0": 18.9262, "sent_len_max_1": 190.5737, "stdk": 0.0422, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 26800 }, { "accuracy": 53.9062, "doc_norm": 7.0284, "encoder_q-embeddings": 19720.959, "encoder_q-layer.0": 13129.7666, "encoder_q-layer.1": 13408.6875, "encoder_q-layer.10": 27688.75, "encoder_q-layer.11": 44358.0469, "encoder_q-layer.2": 14785.0303, "encoder_q-layer.3": 15108.6973, "encoder_q-layer.4": 15532.4248, "encoder_q-layer.5": 15164.7793, "encoder_q-layer.6": 16256.7539, "encoder_q-layer.7": 16798.4941, "encoder_q-layer.8": 19905.3047, "encoder_q-layer.9": 18314.0527, "epoch": 0.26, "inbatch_neg_score": 47.7871, "inbatch_pos_score": 48.3125, "learning_rate": 4.061111111111111e-05, "loss": 2.4015, "norm_diff": 0.0269, "num_tokens_overlap": 5.5671, "num_tokens_union": 55.0708, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29766.6143, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0015, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7091, "sent_len_1": 66.829, "sent_len_max_0": 18.8438, "sent_len_max_1": 187.8537, "stdk": 0.0441, "stdq": 0.0401, "stdqueue_k": 0.0, "step": 26900 }, { "accuracy": 55.8594, "doc_norm": 7.0219, "encoder_q-embeddings": 19058.5605, "encoder_q-layer.0": 13332.8516, "encoder_q-layer.1": 13050.626, "encoder_q-layer.10": 23770.5078, "encoder_q-layer.11": 44214.1719, "encoder_q-layer.2": 14228.873, "encoder_q-layer.3": 14375.0898, "encoder_q-layer.4": 14680.7422, "encoder_q-layer.5": 14156.3232, "encoder_q-layer.6": 15612.0752, "encoder_q-layer.7": 16482.7051, "encoder_q-layer.8": 19443.0371, "encoder_q-layer.9": 17302.4492, "epoch": 0.26, "inbatch_neg_score": 47.7276, "inbatch_pos_score": 48.2188, "learning_rate": 4.055555555555556e-05, "loss": 2.4375, "norm_diff": 0.0274, "num_tokens_overlap": 5.5809, "num_tokens_union": 55.1099, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28533.207, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9944, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7371, "sent_len_1": 66.8782, "sent_len_max_0": 18.8012, "sent_len_max_1": 188.7937, "stdk": 0.0428, "stdq": 0.039, "stdqueue_k": 0.0, "step": 27000 }, { "accuracy": 54.1016, "doc_norm": 7.0249, "encoder_q-embeddings": 19301.1367, "encoder_q-layer.0": 13503.7725, "encoder_q-layer.1": 13805.0557, "encoder_q-layer.10": 22452.2285, "encoder_q-layer.11": 42974.7344, "encoder_q-layer.2": 14653.5371, "encoder_q-layer.3": 14417.5498, "encoder_q-layer.4": 14481.7949, "encoder_q-layer.5": 14014.9424, "encoder_q-layer.6": 15151.1621, "encoder_q-layer.7": 16433.8613, "encoder_q-layer.8": 18671.8047, "encoder_q-layer.9": 17201.8496, "epoch": 0.26, "inbatch_neg_score": 47.7869, "inbatch_pos_score": 48.2812, "learning_rate": 4.05e-05, "loss": 2.4458, "norm_diff": 0.026, "num_tokens_overlap": 5.5742, "num_tokens_union": 55.0505, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28886.9413, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9989, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7081, "sent_len_1": 66.8898, "sent_len_max_0": 18.7775, "sent_len_max_1": 190.8713, "stdk": 0.0417, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 27100 }, { "accuracy": 53.3203, "doc_norm": 7.0256, "encoder_q-embeddings": 18597.6582, "encoder_q-layer.0": 13270.1084, "encoder_q-layer.1": 13310.7871, "encoder_q-layer.10": 22803.2949, "encoder_q-layer.11": 43719.8789, "encoder_q-layer.2": 14613.0859, "encoder_q-layer.3": 14464.1035, "encoder_q-layer.4": 14524.457, "encoder_q-layer.5": 14521.5127, "encoder_q-layer.6": 15934.6104, "encoder_q-layer.7": 16413.3047, "encoder_q-layer.8": 18310.2422, "encoder_q-layer.9": 17322.3926, "epoch": 0.27, "inbatch_neg_score": 47.809, "inbatch_pos_score": 48.2812, "learning_rate": 4.0444444444444444e-05, "loss": 2.374, "norm_diff": 0.0243, "num_tokens_overlap": 5.5884, "num_tokens_union": 55.0855, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28538.0335, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0013, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.739, "sent_len_1": 66.8367, "sent_len_max_0": 18.7175, "sent_len_max_1": 188.735, "stdk": 0.0423, "stdq": 0.039, "stdqueue_k": 0.0, "step": 27200 }, { "accuracy": 56.0547, "doc_norm": 7.0256, "encoder_q-embeddings": 19657.207, "encoder_q-layer.0": 13690.042, "encoder_q-layer.1": 13632.8281, "encoder_q-layer.10": 22837.3828, "encoder_q-layer.11": 42945.5156, "encoder_q-layer.2": 14658.5127, "encoder_q-layer.3": 14781.1934, "encoder_q-layer.4": 15677.5098, "encoder_q-layer.5": 15196.8389, "encoder_q-layer.6": 15630.1895, "encoder_q-layer.7": 16458.7402, "encoder_q-layer.8": 17953.6719, "encoder_q-layer.9": 17041.6836, "epoch": 0.27, "inbatch_neg_score": 47.7931, "inbatch_pos_score": 48.2812, "learning_rate": 4.038888888888889e-05, "loss": 2.3893, "norm_diff": 0.027, "num_tokens_overlap": 5.5868, "num_tokens_union": 55.0653, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28503.9452, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9986, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7539, "sent_len_1": 66.8051, "sent_len_max_0": 18.8962, "sent_len_max_1": 190.6725, "stdk": 0.0422, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 27300 }, { "accuracy": 55.2734, "doc_norm": 7.0251, "encoder_q-embeddings": 19073.5391, "encoder_q-layer.0": 12647.084, "encoder_q-layer.1": 12847.9863, "encoder_q-layer.10": 22007.457, "encoder_q-layer.11": 45983.0469, "encoder_q-layer.2": 13918.9736, "encoder_q-layer.3": 14149.3496, "encoder_q-layer.4": 14302.6934, "encoder_q-layer.5": 14517.1426, "encoder_q-layer.6": 15357.8672, "encoder_q-layer.7": 17097.584, "encoder_q-layer.8": 18358.459, "encoder_q-layer.9": 16811.1309, "epoch": 0.27, "inbatch_neg_score": 47.7889, "inbatch_pos_score": 48.2812, "learning_rate": 4.0333333333333336e-05, "loss": 2.3863, "norm_diff": 0.0269, "num_tokens_overlap": 5.5843, "num_tokens_union": 55.1098, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28469.9638, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9982, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7402, "sent_len_1": 66.862, "sent_len_max_0": 18.7812, "sent_len_max_1": 189.1775, "stdk": 0.042, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 27400 }, { "accuracy": 54.9805, "doc_norm": 7.0255, "encoder_q-embeddings": 18762.5059, "encoder_q-layer.0": 13007.4102, "encoder_q-layer.1": 13079.6475, "encoder_q-layer.10": 23538.5117, "encoder_q-layer.11": 43171.0977, "encoder_q-layer.2": 14335.6729, "encoder_q-layer.3": 13877.3945, "encoder_q-layer.4": 14461.5332, "encoder_q-layer.5": 14266.1973, "encoder_q-layer.6": 15172.7666, "encoder_q-layer.7": 15963.4844, "encoder_q-layer.8": 18674.9844, "encoder_q-layer.9": 16942.6309, "epoch": 0.27, "inbatch_neg_score": 47.8235, "inbatch_pos_score": 48.3125, "learning_rate": 4.027777777777778e-05, "loss": 2.4045, "norm_diff": 0.0259, "num_tokens_overlap": 5.5908, "num_tokens_union": 55.1333, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28448.5425, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9995, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7484, "sent_len_1": 66.9351, "sent_len_max_0": 18.7875, "sent_len_max_1": 189.3313, "stdk": 0.0425, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 27500 }, { "accuracy": 53.8086, "doc_norm": 7.0214, "encoder_q-embeddings": 19140.1367, "encoder_q-layer.0": 13502.8916, "encoder_q-layer.1": 13382.8389, "encoder_q-layer.10": 22919.4023, "encoder_q-layer.11": 42862.9727, "encoder_q-layer.2": 14661.7637, "encoder_q-layer.3": 14494.3887, "encoder_q-layer.4": 14517.2168, "encoder_q-layer.5": 14035.1104, "encoder_q-layer.6": 15870.2188, "encoder_q-layer.7": 16915.3984, "encoder_q-layer.8": 18173.9336, "encoder_q-layer.9": 17307.9609, "epoch": 0.27, "inbatch_neg_score": 47.7617, "inbatch_pos_score": 48.25, "learning_rate": 4.022222222222222e-05, "loss": 2.4386, "norm_diff": 0.0253, "num_tokens_overlap": 5.5921, "num_tokens_union": 55.0946, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28810.0749, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9961, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7314, "sent_len_1": 66.9115, "sent_len_max_0": 19.03, "sent_len_max_1": 189.4263, "stdk": 0.0423, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 27600 }, { "accuracy": 56.25, "doc_norm": 7.0175, "encoder_q-embeddings": 18369.5215, "encoder_q-layer.0": 13045.4707, "encoder_q-layer.1": 13326.9004, "encoder_q-layer.10": 23880.9512, "encoder_q-layer.11": 48191.1094, "encoder_q-layer.2": 14782.1064, "encoder_q-layer.3": 14587.7031, "encoder_q-layer.4": 15084.7529, "encoder_q-layer.5": 14055.0918, "encoder_q-layer.6": 14985.0625, "encoder_q-layer.7": 16259.1045, "encoder_q-layer.8": 18656.5234, "encoder_q-layer.9": 17318.0879, "epoch": 0.27, "inbatch_neg_score": 47.7092, "inbatch_pos_score": 48.1875, "learning_rate": 4.016666666666667e-05, "loss": 2.3723, "norm_diff": 0.0235, "num_tokens_overlap": 5.5744, "num_tokens_union": 55.1254, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29339.2215, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.994, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6886, "sent_len_1": 66.997, "sent_len_max_0": 18.9438, "sent_len_max_1": 190.6475, "stdk": 0.0429, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 27700 }, { "accuracy": 55.4688, "doc_norm": 7.0191, "encoder_q-embeddings": 18448.623, "encoder_q-layer.0": 12655.9004, "encoder_q-layer.1": 12899.7754, "encoder_q-layer.10": 20747.627, "encoder_q-layer.11": 42222.0352, "encoder_q-layer.2": 13961.9219, "encoder_q-layer.3": 14106.9854, "encoder_q-layer.4": 15128.9316, "encoder_q-layer.5": 13961.9697, "encoder_q-layer.6": 14745.2402, "encoder_q-layer.7": 15596.8711, "encoder_q-layer.8": 17092.5469, "encoder_q-layer.9": 16776.9883, "epoch": 0.27, "inbatch_neg_score": 47.7125, "inbatch_pos_score": 48.1875, "learning_rate": 4.011111111111111e-05, "loss": 2.3877, "norm_diff": 0.0258, "num_tokens_overlap": 5.5789, "num_tokens_union": 55.1237, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27477.8211, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9933, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7065, "sent_len_1": 67.0486, "sent_len_max_0": 18.875, "sent_len_max_1": 191.9475, "stdk": 0.0427, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 27800 }, { "accuracy": 54.5898, "doc_norm": 7.0193, "encoder_q-embeddings": 18548.6641, "encoder_q-layer.0": 12914.5791, "encoder_q-layer.1": 13069.542, "encoder_q-layer.10": 21762.0645, "encoder_q-layer.11": 46370.2891, "encoder_q-layer.2": 14320.1123, "encoder_q-layer.3": 14131.4785, "encoder_q-layer.4": 14766.373, "encoder_q-layer.5": 14200.9062, "encoder_q-layer.6": 14769.3047, "encoder_q-layer.7": 16127.707, "encoder_q-layer.8": 17946.6367, "encoder_q-layer.9": 16638.5664, "epoch": 0.27, "inbatch_neg_score": 47.6982, "inbatch_pos_score": 48.1875, "learning_rate": 4.0055555555555554e-05, "loss": 2.381, "norm_diff": 0.0256, "num_tokens_overlap": 5.5786, "num_tokens_union": 55.0488, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28525.5938, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9937, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7166, "sent_len_1": 66.8558, "sent_len_max_0": 18.78, "sent_len_max_1": 188.1937, "stdk": 0.0427, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 27900 }, { "accuracy": 56.7383, "doc_norm": 7.0233, "encoder_q-embeddings": 17783.8145, "encoder_q-layer.0": 12229.293, "encoder_q-layer.1": 12464.0342, "encoder_q-layer.10": 20972.541, "encoder_q-layer.11": 38217.3867, "encoder_q-layer.2": 13651.5576, "encoder_q-layer.3": 13811.7656, "encoder_q-layer.4": 14012.0264, "encoder_q-layer.5": 13874.8018, "encoder_q-layer.6": 14811.1504, "encoder_q-layer.7": 15348.4385, "encoder_q-layer.8": 17185.4297, "encoder_q-layer.9": 15733.5264, "epoch": 0.27, "inbatch_neg_score": 47.7888, "inbatch_pos_score": 48.2812, "learning_rate": 4e-05, "loss": 2.407, "norm_diff": 0.0241, "num_tokens_overlap": 5.5729, "num_tokens_union": 54.9292, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26512.4345, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.9992, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7011, "sent_len_1": 66.6573, "sent_len_max_0": 18.88, "sent_len_max_1": 189.245, "stdk": 0.0421, "stdq": 0.039, "stdqueue_k": 0.0, "step": 28000 }, { "accuracy": 55.957, "doc_norm": 7.0265, "encoder_q-embeddings": 18882.377, "encoder_q-layer.0": 13258.5225, "encoder_q-layer.1": 13432.2148, "encoder_q-layer.10": 21692.3789, "encoder_q-layer.11": 41326.8125, "encoder_q-layer.2": 14216.6396, "encoder_q-layer.3": 14439.7324, "encoder_q-layer.4": 14680.6807, "encoder_q-layer.5": 14768.6055, "encoder_q-layer.6": 15252.4023, "encoder_q-layer.7": 16328.4961, "encoder_q-layer.8": 19972.0176, "encoder_q-layer.9": 16878.7031, "epoch": 0.27, "inbatch_neg_score": 47.7877, "inbatch_pos_score": 48.3125, "learning_rate": 3.9944444444444446e-05, "loss": 2.4331, "norm_diff": 0.026, "num_tokens_overlap": 5.5851, "num_tokens_union": 55.0618, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28505.3261, "preclip_grad_norm_avg": 0.0003, "query_norm": 7.0005, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7319, "sent_len_1": 66.8751, "sent_len_max_0": 18.855, "sent_len_max_1": 190.7575, "stdk": 0.0424, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 28100 }, { "accuracy": 55.6641, "doc_norm": 7.0243, "encoder_q-embeddings": 19945.6426, "encoder_q-layer.0": 13044.667, "encoder_q-layer.1": 13331.5918, "encoder_q-layer.10": 25032.4707, "encoder_q-layer.11": 42842.082, "encoder_q-layer.2": 14456.3193, "encoder_q-layer.3": 14767.0752, "encoder_q-layer.4": 15445.292, "encoder_q-layer.5": 14349.0322, "encoder_q-layer.6": 15269.001, "encoder_q-layer.7": 15972.1211, "encoder_q-layer.8": 18068.7715, "encoder_q-layer.9": 17479.0254, "epoch": 0.28, "inbatch_neg_score": 47.7173, "inbatch_pos_score": 48.2188, "learning_rate": 3.9888888888888895e-05, "loss": 2.3699, "norm_diff": 0.0292, "num_tokens_overlap": 5.5818, "num_tokens_union": 55.0507, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28981.5718, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9951, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7257, "sent_len_1": 66.843, "sent_len_max_0": 18.7538, "sent_len_max_1": 189.355, "stdk": 0.0438, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 28200 }, { "accuracy": 55.0781, "doc_norm": 7.0161, "encoder_q-embeddings": 18147.4609, "encoder_q-layer.0": 12674.877, "encoder_q-layer.1": 12993.7539, "encoder_q-layer.10": 24519.0293, "encoder_q-layer.11": 48362.8828, "encoder_q-layer.2": 14113.915, "encoder_q-layer.3": 14382.0186, "encoder_q-layer.4": 14364.9277, "encoder_q-layer.5": 14151.1768, "encoder_q-layer.6": 15713.999, "encoder_q-layer.7": 16737.6289, "encoder_q-layer.8": 20723.6055, "encoder_q-layer.9": 17401.1562, "epoch": 0.28, "inbatch_neg_score": 47.6836, "inbatch_pos_score": 48.1875, "learning_rate": 3.983333333333333e-05, "loss": 2.432, "norm_diff": 0.024, "num_tokens_overlap": 5.5504, "num_tokens_union": 54.8338, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29361.607, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9921, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6826, "sent_len_1": 66.4843, "sent_len_max_0": 18.8438, "sent_len_max_1": 189.8925, "stdk": 0.0422, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 28300 }, { "accuracy": 54.5898, "doc_norm": 7.0156, "encoder_q-embeddings": 18798.1387, "encoder_q-layer.0": 13252.084, "encoder_q-layer.1": 13463.251, "encoder_q-layer.10": 24801.2676, "encoder_q-layer.11": 39941.2344, "encoder_q-layer.2": 14542.0156, "encoder_q-layer.3": 14401.3545, "encoder_q-layer.4": 15221.1963, "encoder_q-layer.5": 14296.6895, "encoder_q-layer.6": 15399.1709, "encoder_q-layer.7": 16009.4609, "encoder_q-layer.8": 19375.25, "encoder_q-layer.9": 18510.0469, "epoch": 0.28, "inbatch_neg_score": 47.6068, "inbatch_pos_score": 48.125, "learning_rate": 3.977777777777778e-05, "loss": 2.4111, "norm_diff": 0.0284, "num_tokens_overlap": 5.5794, "num_tokens_union": 54.97, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28285.1909, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9872, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7466, "sent_len_1": 66.6722, "sent_len_max_0": 18.95, "sent_len_max_1": 188.3187, "stdk": 0.0429, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 28400 }, { "accuracy": 54.1992, "doc_norm": 7.0047, "encoder_q-embeddings": 18848.4707, "encoder_q-layer.0": 13143.8369, "encoder_q-layer.1": 13943.791, "encoder_q-layer.10": 22050.4531, "encoder_q-layer.11": 44564.4531, "encoder_q-layer.2": 14722.041, "encoder_q-layer.3": 14686.0049, "encoder_q-layer.4": 14459.8623, "encoder_q-layer.5": 14214.6758, "encoder_q-layer.6": 15157.9307, "encoder_q-layer.7": 17561.7969, "encoder_q-layer.8": 20837.1719, "encoder_q-layer.9": 17223.9238, "epoch": 0.28, "inbatch_neg_score": 47.5223, "inbatch_pos_score": 48.0, "learning_rate": 3.972222222222222e-05, "loss": 2.4033, "norm_diff": 0.0242, "num_tokens_overlap": 5.5795, "num_tokens_union": 55.0948, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29278.3805, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9805, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7264, "sent_len_1": 66.8724, "sent_len_max_0": 18.775, "sent_len_max_1": 190.3575, "stdk": 0.0423, "stdq": 0.039, "stdqueue_k": 0.0, "step": 28500 }, { "accuracy": 53.418, "doc_norm": 7.0006, "encoder_q-embeddings": 18246.3105, "encoder_q-layer.0": 12818.2344, "encoder_q-layer.1": 13353.9434, "encoder_q-layer.10": 21455.3438, "encoder_q-layer.11": 45023.3359, "encoder_q-layer.2": 14551.5732, "encoder_q-layer.3": 14449.3965, "encoder_q-layer.4": 14952.9775, "encoder_q-layer.5": 14367.9014, "encoder_q-layer.6": 15497.5107, "encoder_q-layer.7": 16367.6719, "encoder_q-layer.8": 18384.4141, "encoder_q-layer.9": 16742.5527, "epoch": 0.28, "inbatch_neg_score": 47.504, "inbatch_pos_score": 47.9688, "learning_rate": 3.966666666666667e-05, "loss": 2.3793, "norm_diff": 0.0246, "num_tokens_overlap": 5.584, "num_tokens_union": 54.9509, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28735.0913, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.976, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7255, "sent_len_1": 66.6734, "sent_len_max_0": 18.8712, "sent_len_max_1": 187.7025, "stdk": 0.0427, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 28600 }, { "accuracy": 53.2227, "doc_norm": 7.0007, "encoder_q-embeddings": 18102.8496, "encoder_q-layer.0": 12777.4971, "encoder_q-layer.1": 13006.0537, "encoder_q-layer.10": 21859.2988, "encoder_q-layer.11": 40861.8359, "encoder_q-layer.2": 13952.0547, "encoder_q-layer.3": 13740.8691, "encoder_q-layer.4": 14229.4785, "encoder_q-layer.5": 13988.3359, "encoder_q-layer.6": 15215.7295, "encoder_q-layer.7": 15434.459, "encoder_q-layer.8": 18899.0996, "encoder_q-layer.9": 16365.1816, "epoch": 0.28, "inbatch_neg_score": 47.5162, "inbatch_pos_score": 47.9688, "learning_rate": 3.961111111111111e-05, "loss": 2.413, "norm_diff": 0.0217, "num_tokens_overlap": 5.5704, "num_tokens_union": 54.9708, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27540.4143, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.979, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7173, "sent_len_1": 66.6598, "sent_len_max_0": 18.8313, "sent_len_max_1": 188.9837, "stdk": 0.042, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 28700 }, { "accuracy": 54.9805, "doc_norm": 7.0068, "encoder_q-embeddings": 19720.0957, "encoder_q-layer.0": 13383.7275, "encoder_q-layer.1": 13577.4326, "encoder_q-layer.10": 21161.752, "encoder_q-layer.11": 41992.8555, "encoder_q-layer.2": 14440.2031, "encoder_q-layer.3": 14325.5068, "encoder_q-layer.4": 14744.1006, "encoder_q-layer.5": 14070.1787, "encoder_q-layer.6": 14714.2812, "encoder_q-layer.7": 15524.1846, "encoder_q-layer.8": 17538.9727, "encoder_q-layer.9": 16348.9424, "epoch": 0.28, "inbatch_neg_score": 47.5242, "inbatch_pos_score": 48.0, "learning_rate": 3.9555555555555556e-05, "loss": 2.4031, "norm_diff": 0.0258, "num_tokens_overlap": 5.5694, "num_tokens_union": 54.8741, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28089.7385, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.981, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6904, "sent_len_1": 66.5973, "sent_len_max_0": 18.7825, "sent_len_max_1": 190.9762, "stdk": 0.0427, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 28800 }, { "accuracy": 53.125, "doc_norm": 7.0038, "encoder_q-embeddings": 18385.8906, "encoder_q-layer.0": 12765.9072, "encoder_q-layer.1": 13050.6465, "encoder_q-layer.10": 21308.0723, "encoder_q-layer.11": 38804.3672, "encoder_q-layer.2": 14199.375, "encoder_q-layer.3": 14208.1406, "encoder_q-layer.4": 14448.9277, "encoder_q-layer.5": 13708.1367, "encoder_q-layer.6": 15384.4834, "encoder_q-layer.7": 15889.1494, "encoder_q-layer.8": 18419.6211, "encoder_q-layer.9": 17264.9668, "epoch": 0.28, "inbatch_neg_score": 47.5004, "inbatch_pos_score": 48.0, "learning_rate": 3.9500000000000005e-05, "loss": 2.4263, "norm_diff": 0.0259, "num_tokens_overlap": 5.5597, "num_tokens_union": 54.9927, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27215.5084, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.9779, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.683, "sent_len_1": 66.8579, "sent_len_max_0": 18.8663, "sent_len_max_1": 189.7575, "stdk": 0.0428, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 28900 }, { "accuracy": 55.0781, "doc_norm": 7.0008, "encoder_q-embeddings": 17987.5742, "encoder_q-layer.0": 12817.5342, "encoder_q-layer.1": 12717.9648, "encoder_q-layer.10": 21353.4805, "encoder_q-layer.11": 40235.2734, "encoder_q-layer.2": 13957.3711, "encoder_q-layer.3": 13743.2061, "encoder_q-layer.4": 14020.3779, "encoder_q-layer.5": 13811.5957, "encoder_q-layer.6": 14644.7881, "encoder_q-layer.7": 15383.3848, "encoder_q-layer.8": 17996.0859, "encoder_q-layer.9": 16301.1934, "epoch": 0.28, "inbatch_neg_score": 47.443, "inbatch_pos_score": 47.9375, "learning_rate": 3.944444444444445e-05, "loss": 2.3137, "norm_diff": 0.0254, "num_tokens_overlap": 5.5885, "num_tokens_union": 54.936, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26931.3005, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.9754, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7231, "sent_len_1": 66.6981, "sent_len_max_0": 18.8825, "sent_len_max_1": 189.735, "stdk": 0.0414, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 29000 }, { "accuracy": 53.9062, "doc_norm": 7.0079, "encoder_q-embeddings": 19064.9336, "encoder_q-layer.0": 13351.0879, "encoder_q-layer.1": 13799.7451, "encoder_q-layer.10": 26845.4453, "encoder_q-layer.11": 43340.3828, "encoder_q-layer.2": 14623.5225, "encoder_q-layer.3": 14810.8975, "encoder_q-layer.4": 15130.5127, "encoder_q-layer.5": 15079.0166, "encoder_q-layer.6": 16067.3232, "encoder_q-layer.7": 16773.5176, "encoder_q-layer.8": 18749.2031, "encoder_q-layer.9": 18294.0117, "epoch": 0.28, "inbatch_neg_score": 47.4949, "inbatch_pos_score": 48.0, "learning_rate": 3.938888888888889e-05, "loss": 2.4095, "norm_diff": 0.0279, "num_tokens_overlap": 5.5779, "num_tokens_union": 55.0033, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29406.6724, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.98, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7129, "sent_len_1": 66.8038, "sent_len_max_0": 18.7775, "sent_len_max_1": 189.8212, "stdk": 0.0431, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 29100 }, { "accuracy": 55.2734, "doc_norm": 6.9959, "encoder_q-embeddings": 18318.1699, "encoder_q-layer.0": 13322.9736, "encoder_q-layer.1": 13915.1729, "encoder_q-layer.10": 22194.5938, "encoder_q-layer.11": 43782.4609, "encoder_q-layer.2": 14848.1592, "encoder_q-layer.3": 14582.0713, "encoder_q-layer.4": 14644.165, "encoder_q-layer.5": 13748.4434, "encoder_q-layer.6": 14656.4834, "encoder_q-layer.7": 15448.4873, "encoder_q-layer.8": 18136.3105, "encoder_q-layer.9": 16455.8594, "epoch": 0.29, "inbatch_neg_score": 47.4309, "inbatch_pos_score": 47.9062, "learning_rate": 3.933333333333333e-05, "loss": 2.3906, "norm_diff": 0.0224, "num_tokens_overlap": 5.5875, "num_tokens_union": 55.0089, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28040.1046, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9736, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7239, "sent_len_1": 66.7801, "sent_len_max_0": 18.865, "sent_len_max_1": 191.9625, "stdk": 0.0423, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 29200 }, { "accuracy": 54.4922, "doc_norm": 6.9935, "encoder_q-embeddings": 19317.832, "encoder_q-layer.0": 13475.5703, "encoder_q-layer.1": 13654.4355, "encoder_q-layer.10": 23201.8047, "encoder_q-layer.11": 43919.5039, "encoder_q-layer.2": 14440.1416, "encoder_q-layer.3": 14068.0811, "encoder_q-layer.4": 14084.3164, "encoder_q-layer.5": 13728.4746, "encoder_q-layer.6": 14883.5693, "encoder_q-layer.7": 15912.0225, "encoder_q-layer.8": 17767.8613, "encoder_q-layer.9": 17284.1113, "epoch": 0.29, "inbatch_neg_score": 47.3305, "inbatch_pos_score": 47.8438, "learning_rate": 3.927777777777778e-05, "loss": 2.4306, "norm_diff": 0.0249, "num_tokens_overlap": 5.5689, "num_tokens_union": 54.8529, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28322.1048, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9686, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6984, "sent_len_1": 66.5485, "sent_len_max_0": 18.82, "sent_len_max_1": 187.595, "stdk": 0.0428, "stdq": 0.0398, "stdqueue_k": 0.0, "step": 29300 }, { "accuracy": 55.3711, "doc_norm": 6.9946, "encoder_q-embeddings": 19009.6309, "encoder_q-layer.0": 12722.8457, "encoder_q-layer.1": 12938.666, "encoder_q-layer.10": 21202.7109, "encoder_q-layer.11": 41725.9219, "encoder_q-layer.2": 14265.7764, "encoder_q-layer.3": 14308.6318, "encoder_q-layer.4": 14761.25, "encoder_q-layer.5": 14177.9131, "encoder_q-layer.6": 15929.6436, "encoder_q-layer.7": 16696.7539, "encoder_q-layer.8": 20015.9434, "encoder_q-layer.9": 16543.6641, "epoch": 0.29, "inbatch_neg_score": 47.3761, "inbatch_pos_score": 47.875, "learning_rate": 3.922222222222223e-05, "loss": 2.4049, "norm_diff": 0.0255, "num_tokens_overlap": 5.5724, "num_tokens_union": 55.0506, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27851.9989, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9691, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7023, "sent_len_1": 66.8589, "sent_len_max_0": 18.8663, "sent_len_max_1": 187.79, "stdk": 0.0424, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 29400 }, { "accuracy": 55.1758, "doc_norm": 6.9975, "encoder_q-embeddings": 18923.7773, "encoder_q-layer.0": 13088.4502, "encoder_q-layer.1": 13347.7549, "encoder_q-layer.10": 20727.6855, "encoder_q-layer.11": 40102.4648, "encoder_q-layer.2": 14056.0801, "encoder_q-layer.3": 13717.4902, "encoder_q-layer.4": 13984.6846, "encoder_q-layer.5": 13381.5498, "encoder_q-layer.6": 14239.417, "encoder_q-layer.7": 15467.9668, "encoder_q-layer.8": 17050.0625, "encoder_q-layer.9": 16151.2812, "epoch": 0.29, "inbatch_neg_score": 47.4072, "inbatch_pos_score": 47.9062, "learning_rate": 3.9166666666666665e-05, "loss": 2.4157, "norm_diff": 0.0256, "num_tokens_overlap": 5.5783, "num_tokens_union": 55.0253, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27268.6987, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9719, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7164, "sent_len_1": 66.8345, "sent_len_max_0": 18.8975, "sent_len_max_1": 188.8925, "stdk": 0.0421, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 29500 }, { "accuracy": 52.6367, "doc_norm": 6.9954, "encoder_q-embeddings": 19503.7949, "encoder_q-layer.0": 13354.2393, "encoder_q-layer.1": 13386.9004, "encoder_q-layer.10": 28870.8242, "encoder_q-layer.11": 47027.3672, "encoder_q-layer.2": 14366.832, "encoder_q-layer.3": 14654.5117, "encoder_q-layer.4": 14824.5234, "encoder_q-layer.5": 14266.6748, "encoder_q-layer.6": 15435.0068, "encoder_q-layer.7": 16928.0664, "encoder_q-layer.8": 19715.8574, "encoder_q-layer.9": 19354.0488, "epoch": 0.29, "inbatch_neg_score": 47.3997, "inbatch_pos_score": 47.9062, "learning_rate": 3.9111111111111115e-05, "loss": 2.3593, "norm_diff": 0.0224, "num_tokens_overlap": 5.5707, "num_tokens_union": 54.9667, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30411.9203, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.973, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7307, "sent_len_1": 66.6519, "sent_len_max_0": 18.9387, "sent_len_max_1": 187.685, "stdk": 0.042, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 29600 }, { "accuracy": 51.8555, "doc_norm": 6.9977, "encoder_q-embeddings": 20499.334, "encoder_q-layer.0": 13793.7617, "encoder_q-layer.1": 14412.0801, "encoder_q-layer.10": 24718.9297, "encoder_q-layer.11": 42828.3359, "encoder_q-layer.2": 14687.0576, "encoder_q-layer.3": 14755.4131, "encoder_q-layer.4": 15282.6064, "encoder_q-layer.5": 14723.9863, "encoder_q-layer.6": 16134.6455, "encoder_q-layer.7": 17918.5938, "encoder_q-layer.8": 20077.7441, "encoder_q-layer.9": 17174.4609, "epoch": 0.29, "inbatch_neg_score": 47.4063, "inbatch_pos_score": 47.875, "learning_rate": 3.905555555555556e-05, "loss": 2.3914, "norm_diff": 0.0273, "num_tokens_overlap": 5.5754, "num_tokens_union": 54.9242, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29788.2396, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9705, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7026, "sent_len_1": 66.6022, "sent_len_max_0": 18.8388, "sent_len_max_1": 188.0062, "stdk": 0.0429, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 29700 }, { "accuracy": 55.8594, "doc_norm": 6.9904, "encoder_q-embeddings": 18442.8516, "encoder_q-layer.0": 12795.002, "encoder_q-layer.1": 13016.4541, "encoder_q-layer.10": 27276.9629, "encoder_q-layer.11": 41083.2812, "encoder_q-layer.2": 13792.6318, "encoder_q-layer.3": 14161.6611, "encoder_q-layer.4": 13970.457, "encoder_q-layer.5": 13904.6572, "encoder_q-layer.6": 14957.1553, "encoder_q-layer.7": 16682.8809, "encoder_q-layer.8": 19222.3691, "encoder_q-layer.9": 17153.582, "epoch": 0.29, "inbatch_neg_score": 47.3682, "inbatch_pos_score": 47.8438, "learning_rate": 3.9000000000000006e-05, "loss": 2.4055, "norm_diff": 0.0227, "num_tokens_overlap": 5.5706, "num_tokens_union": 54.8924, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28221.5396, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9677, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7001, "sent_len_1": 66.6127, "sent_len_max_0": 18.8412, "sent_len_max_1": 188.8875, "stdk": 0.0418, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 29800 }, { "accuracy": 54.8828, "doc_norm": 6.9943, "encoder_q-embeddings": 18828.1621, "encoder_q-layer.0": 12768.9746, "encoder_q-layer.1": 13025.6182, "encoder_q-layer.10": 20702.8359, "encoder_q-layer.11": 41049.3711, "encoder_q-layer.2": 14344.8975, "encoder_q-layer.3": 14594.1621, "encoder_q-layer.4": 15161.6836, "encoder_q-layer.5": 14400.9346, "encoder_q-layer.6": 15007.1436, "encoder_q-layer.7": 15680.665, "encoder_q-layer.8": 17305.3125, "encoder_q-layer.9": 16817.0977, "epoch": 0.29, "inbatch_neg_score": 47.3693, "inbatch_pos_score": 47.875, "learning_rate": 3.894444444444444e-05, "loss": 2.4014, "norm_diff": 0.0241, "num_tokens_overlap": 5.5881, "num_tokens_union": 54.974, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27698.8919, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9702, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7155, "sent_len_1": 66.7177, "sent_len_max_0": 18.7838, "sent_len_max_1": 190.4737, "stdk": 0.0418, "stdq": 0.0398, "stdqueue_k": 0.0, "step": 29900 }, { "accuracy": 54.7852, "doc_norm": 6.9848, "encoder_q-embeddings": 18157.9688, "encoder_q-layer.0": 12440.8721, "encoder_q-layer.1": 12431.7295, "encoder_q-layer.10": 20717.6875, "encoder_q-layer.11": 37151.3555, "encoder_q-layer.2": 13651.0918, "encoder_q-layer.3": 14061.4092, "encoder_q-layer.4": 13988.0078, "encoder_q-layer.5": 13663.8721, "encoder_q-layer.6": 14673.8887, "encoder_q-layer.7": 16470.459, "encoder_q-layer.8": 17309.0977, "encoder_q-layer.9": 15659.5908, "epoch": 0.29, "inbatch_neg_score": 47.272, "inbatch_pos_score": 47.75, "learning_rate": 3.888888888888889e-05, "loss": 2.3894, "norm_diff": 0.0251, "num_tokens_overlap": 5.5837, "num_tokens_union": 55.0104, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26401.862, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.9597, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7274, "sent_len_1": 66.7521, "sent_len_max_0": 18.8425, "sent_len_max_1": 189.2287, "stdk": 0.0422, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 30000 }, { "dev_runtime": 27.2203, "dev_samples_per_second": 2.351, "dev_steps_per_second": 0.037, "epoch": 0.29, "step": 30000, "test_accuracy": 8.6395263671875, "test_doc_norm": 6.981309413909912, "test_inbatch_neg_score": 47.765113830566406, "test_inbatch_pos_score": 48.455360412597656, "test_loss": 3.9195399284362793, "test_norm_diff": 0.0013811811804771423, "test_query_norm": 6.981024265289307, "test_queue_k_norm": 0.0, "test_stdk": 0.035072941333055496, "test_stdq": 0.0350043810904026, "test_stdqueue_k": 0.0 }, { "dev_runtime": 27.2203, "dev_samples_per_second": 2.351, "dev_steps_per_second": 0.037, "epoch": 0.29, "eval_beir-arguana_ndcg@10": 0.37446, "eval_beir-arguana_recall@10": 0.65363, "eval_beir-arguana_recall@100": 0.96586, "eval_beir-arguana_recall@20": 0.83642, "eval_beir-avg_ndcg@10": 0.3825388333333334, "eval_beir-avg_recall@10": 0.45264058333333335, "eval_beir-avg_recall@100": 0.63527175, "eval_beir-avg_recall@20": 0.5179090000000001, "eval_beir-cqadupstack_ndcg@10": 0.28683833333333336, "eval_beir-cqadupstack_recall@10": 0.38546583333333334, "eval_beir-cqadupstack_recall@100": 0.6161175, "eval_beir-cqadupstack_recall@20": 0.45341000000000004, "eval_beir-fiqa_ndcg@10": 0.26245, "eval_beir-fiqa_recall@10": 0.32678, "eval_beir-fiqa_recall@100": 0.60244, "eval_beir-fiqa_recall@20": 0.40439, "eval_beir-nfcorpus_ndcg@10": 0.31888, "eval_beir-nfcorpus_recall@10": 0.15304, "eval_beir-nfcorpus_recall@100": 0.2961, "eval_beir-nfcorpus_recall@20": 0.1842, "eval_beir-nq_ndcg@10": 0.25693, "eval_beir-nq_recall@10": 0.43248, "eval_beir-nq_recall@100": 0.76572, "eval_beir-nq_recall@20": 0.54736, "eval_beir-quora_ndcg@10": 0.78897, "eval_beir-quora_recall@10": 0.89079, "eval_beir-quora_recall@100": 0.97825, "eval_beir-quora_recall@20": 0.93097, "eval_beir-scidocs_ndcg@10": 0.15066, "eval_beir-scidocs_recall@10": 0.15952, "eval_beir-scidocs_recall@100": 0.37258, "eval_beir-scidocs_recall@20": 0.21835, "eval_beir-scifact_ndcg@10": 0.61228, "eval_beir-scifact_recall@10": 0.76994, "eval_beir-scifact_recall@100": 0.906, "eval_beir-scifact_recall@20": 0.82689, "eval_beir-trec-covid_ndcg@10": 0.58779, "eval_beir-trec-covid_recall@10": 0.63, "eval_beir-trec-covid_recall@100": 0.4284, "eval_beir-trec-covid_recall@20": 0.588, "eval_beir-webis-touche2020_ndcg@10": 0.18613, "eval_beir-webis-touche2020_recall@10": 0.12476, "eval_beir-webis-touche2020_recall@100": 0.42125, "eval_beir-webis-touche2020_recall@20": 0.1891, "eval_senteval-avg_sts": 0.7419728623917532, "eval_senteval-sickr_spearman": 0.7279423158101598, "eval_senteval-stsb_spearman": 0.7560034089733467, "step": 30000, "test_accuracy": 8.6395263671875, "test_doc_norm": 6.981309413909912, "test_inbatch_neg_score": 47.765113830566406, "test_inbatch_pos_score": 48.455360412597656, "test_loss": 3.9195399284362793, "test_norm_diff": 0.0013811811804771423, "test_query_norm": 6.981024265289307, "test_queue_k_norm": 0.0, "test_stdk": 0.035072941333055496, "test_stdq": 0.0350043810904026, "test_stdqueue_k": 0.0 }, { "accuracy": 56.0547, "doc_norm": 6.9958, "encoder_q-embeddings": 18297.7559, "encoder_q-layer.0": 12630.1875, "encoder_q-layer.1": 13021.8701, "encoder_q-layer.10": 22177.4258, "encoder_q-layer.11": 39250.1641, "encoder_q-layer.2": 13982.4697, "encoder_q-layer.3": 13884.5957, "encoder_q-layer.4": 13918.626, "encoder_q-layer.5": 13521.2812, "encoder_q-layer.6": 14751.2832, "encoder_q-layer.7": 15191.8496, "encoder_q-layer.8": 17436.9648, "encoder_q-layer.9": 16800.0527, "epoch": 0.29, "inbatch_neg_score": 47.3287, "inbatch_pos_score": 47.8438, "learning_rate": 3.883333333333333e-05, "loss": 2.454, "norm_diff": 0.0294, "num_tokens_overlap": 5.5864, "num_tokens_union": 55.1288, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26857.9837, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.9665, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7366, "sent_len_1": 66.9058, "sent_len_max_0": 18.865, "sent_len_max_1": 188.875, "stdk": 0.0431, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 30100 }, { "accuracy": 55.7617, "doc_norm": 6.9892, "encoder_q-embeddings": 18352.0, "encoder_q-layer.0": 12222.7568, "encoder_q-layer.1": 12370.9561, "encoder_q-layer.10": 24516.1602, "encoder_q-layer.11": 43028.7188, "encoder_q-layer.2": 13683.1045, "encoder_q-layer.3": 14040.4365, "encoder_q-layer.4": 14169.6113, "encoder_q-layer.5": 14069.293, "encoder_q-layer.6": 15399.3721, "encoder_q-layer.7": 16254.8994, "encoder_q-layer.8": 19280.5684, "encoder_q-layer.9": 17200.3906, "epoch": 0.29, "inbatch_neg_score": 47.2835, "inbatch_pos_score": 47.7812, "learning_rate": 3.877777777777778e-05, "loss": 2.3718, "norm_diff": 0.0276, "num_tokens_overlap": 5.5738, "num_tokens_union": 55.0725, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28009.218, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9616, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7389, "sent_len_1": 66.8372, "sent_len_max_0": 18.825, "sent_len_max_1": 190.9925, "stdk": 0.0428, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 30200 }, { "accuracy": 54.0039, "doc_norm": 6.9846, "encoder_q-embeddings": 18386.1152, "encoder_q-layer.0": 12901.0107, "encoder_q-layer.1": 13391.8779, "encoder_q-layer.10": 21807.5488, "encoder_q-layer.11": 40733.8164, "encoder_q-layer.2": 14504.9883, "encoder_q-layer.3": 14495.6768, "encoder_q-layer.4": 14639.9932, "encoder_q-layer.5": 14041.0137, "encoder_q-layer.6": 14874.7061, "encoder_q-layer.7": 15703.3252, "encoder_q-layer.8": 17510.3047, "encoder_q-layer.9": 15841.4717, "epoch": 0.3, "inbatch_neg_score": 47.2333, "inbatch_pos_score": 47.7188, "learning_rate": 3.8722222222222225e-05, "loss": 2.3844, "norm_diff": 0.0271, "num_tokens_overlap": 5.5749, "num_tokens_union": 54.8621, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27821.3668, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9575, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7227, "sent_len_1": 66.5774, "sent_len_max_0": 18.9325, "sent_len_max_1": 189.9125, "stdk": 0.0427, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 30300 }, { "accuracy": 55.4688, "doc_norm": 6.9844, "encoder_q-embeddings": 19150.25, "encoder_q-layer.0": 12829.251, "encoder_q-layer.1": 13092.4854, "encoder_q-layer.10": 24369.0547, "encoder_q-layer.11": 40772.9648, "encoder_q-layer.2": 14385.5713, "encoder_q-layer.3": 13944.0234, "encoder_q-layer.4": 14453.9551, "encoder_q-layer.5": 14003.9375, "encoder_q-layer.6": 15344.0576, "encoder_q-layer.7": 15902.3105, "encoder_q-layer.8": 17438.2852, "encoder_q-layer.9": 16622.5645, "epoch": 0.3, "inbatch_neg_score": 47.2176, "inbatch_pos_score": 47.6875, "learning_rate": 3.866666666666667e-05, "loss": 2.4281, "norm_diff": 0.0267, "num_tokens_overlap": 5.5871, "num_tokens_union": 55.0583, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27834.5604, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9577, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7302, "sent_len_1": 66.8457, "sent_len_max_0": 18.8412, "sent_len_max_1": 190.3587, "stdk": 0.0436, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 30400 }, { "accuracy": 56.0547, "doc_norm": 6.9822, "encoder_q-embeddings": 18381.7188, "encoder_q-layer.0": 12709.8232, "encoder_q-layer.1": 12962.5684, "encoder_q-layer.10": 22737.6719, "encoder_q-layer.11": 43983.5859, "encoder_q-layer.2": 14250.0498, "encoder_q-layer.3": 14654.3496, "encoder_q-layer.4": 15313.9141, "encoder_q-layer.5": 13994.1025, "encoder_q-layer.6": 15295.9258, "encoder_q-layer.7": 16173.0352, "encoder_q-layer.8": 18227.6094, "encoder_q-layer.9": 17368.7773, "epoch": 0.3, "inbatch_neg_score": 47.1895, "inbatch_pos_score": 47.6875, "learning_rate": 3.8611111111111116e-05, "loss": 2.3536, "norm_diff": 0.024, "num_tokens_overlap": 5.5883, "num_tokens_union": 54.8589, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28371.3197, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9582, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7358, "sent_len_1": 66.5104, "sent_len_max_0": 18.7775, "sent_len_max_1": 187.8475, "stdk": 0.0427, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 30500 }, { "accuracy": 55.6641, "doc_norm": 6.9859, "encoder_q-embeddings": 19445.3945, "encoder_q-layer.0": 13573.708, "encoder_q-layer.1": 14089.0615, "encoder_q-layer.10": 30696.0898, "encoder_q-layer.11": 48263.1289, "encoder_q-layer.2": 14992.1094, "encoder_q-layer.3": 14878.9287, "encoder_q-layer.4": 15203.667, "encoder_q-layer.5": 14711.3242, "encoder_q-layer.6": 16176.6133, "encoder_q-layer.7": 17082.6875, "encoder_q-layer.8": 20196.7891, "encoder_q-layer.9": 19358.084, "epoch": 0.3, "inbatch_neg_score": 47.1698, "inbatch_pos_score": 47.6875, "learning_rate": 3.855555555555556e-05, "loss": 2.3998, "norm_diff": 0.0269, "num_tokens_overlap": 5.5741, "num_tokens_union": 55.1265, "postclip_grad_norm": 1.0, "preclip_grad_norm": 31119.448, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.959, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7086, "sent_len_1": 66.9962, "sent_len_max_0": 18.9325, "sent_len_max_1": 188.9613, "stdk": 0.0433, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 30600 }, { "accuracy": 55.5664, "doc_norm": 6.982, "encoder_q-embeddings": 18186.0996, "encoder_q-layer.0": 12431.9873, "encoder_q-layer.1": 12708.2334, "encoder_q-layer.10": 23167.5488, "encoder_q-layer.11": 42507.3242, "encoder_q-layer.2": 13961.125, "encoder_q-layer.3": 13779.8223, "encoder_q-layer.4": 14179.4531, "encoder_q-layer.5": 14507.1055, "encoder_q-layer.6": 15110.1807, "encoder_q-layer.7": 16150.793, "encoder_q-layer.8": 19422.5391, "encoder_q-layer.9": 17725.7383, "epoch": 0.3, "inbatch_neg_score": 47.2094, "inbatch_pos_score": 47.6875, "learning_rate": 3.85e-05, "loss": 2.3796, "norm_diff": 0.0269, "num_tokens_overlap": 5.5702, "num_tokens_union": 54.9451, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27925.823, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9551, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6919, "sent_len_1": 66.6642, "sent_len_max_0": 18.905, "sent_len_max_1": 188.5337, "stdk": 0.0426, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 30700 }, { "accuracy": 53.3203, "doc_norm": 6.9863, "encoder_q-embeddings": 18368.9004, "encoder_q-layer.0": 12536.2207, "encoder_q-layer.1": 13130.8701, "encoder_q-layer.10": 21535.4004, "encoder_q-layer.11": 40915.9297, "encoder_q-layer.2": 14216.6191, "encoder_q-layer.3": 14405.8506, "encoder_q-layer.4": 14085.3555, "encoder_q-layer.5": 14097.9209, "encoder_q-layer.6": 14335.3193, "encoder_q-layer.7": 15119.8916, "encoder_q-layer.8": 17536.2188, "encoder_q-layer.9": 16322.0879, "epoch": 0.3, "inbatch_neg_score": 47.2382, "inbatch_pos_score": 47.75, "learning_rate": 3.844444444444444e-05, "loss": 2.4351, "norm_diff": 0.0262, "num_tokens_overlap": 5.5764, "num_tokens_union": 54.9917, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27277.3908, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9601, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7033, "sent_len_1": 66.7724, "sent_len_max_0": 18.8275, "sent_len_max_1": 188.8175, "stdk": 0.0424, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 30800 }, { "accuracy": 55.3711, "doc_norm": 6.9855, "encoder_q-embeddings": 19371.7812, "encoder_q-layer.0": 12958.3574, "encoder_q-layer.1": 12968.0088, "encoder_q-layer.10": 20381.3867, "encoder_q-layer.11": 38663.9844, "encoder_q-layer.2": 14146.5117, "encoder_q-layer.3": 14744.4756, "encoder_q-layer.4": 14443.2998, "encoder_q-layer.5": 14327.7139, "encoder_q-layer.6": 15151.6338, "encoder_q-layer.7": 15216.4609, "encoder_q-layer.8": 17044.2871, "encoder_q-layer.9": 15877.5752, "epoch": 0.3, "inbatch_neg_score": 47.192, "inbatch_pos_score": 47.6875, "learning_rate": 3.838888888888889e-05, "loss": 2.3599, "norm_diff": 0.0283, "num_tokens_overlap": 5.571, "num_tokens_union": 55.0675, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27207.0746, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.9572, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6803, "sent_len_1": 66.9228, "sent_len_max_0": 18.7638, "sent_len_max_1": 189.3562, "stdk": 0.0436, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 30900 }, { "accuracy": 54.1016, "doc_norm": 6.9853, "encoder_q-embeddings": 18412.916, "encoder_q-layer.0": 12726.7422, "encoder_q-layer.1": 13506.25, "encoder_q-layer.10": 26877.0469, "encoder_q-layer.11": 51188.2227, "encoder_q-layer.2": 14047.0752, "encoder_q-layer.3": 14064.8936, "encoder_q-layer.4": 14793.9746, "encoder_q-layer.5": 14441.5166, "encoder_q-layer.6": 15271.4385, "encoder_q-layer.7": 16716.7383, "encoder_q-layer.8": 18558.6348, "encoder_q-layer.9": 18545.0723, "epoch": 0.3, "inbatch_neg_score": 47.1712, "inbatch_pos_score": 47.6562, "learning_rate": 3.8333333333333334e-05, "loss": 2.4147, "norm_diff": 0.0291, "num_tokens_overlap": 5.5908, "num_tokens_union": 55.1031, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30281.5096, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9562, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7512, "sent_len_1": 66.8755, "sent_len_max_0": 18.8113, "sent_len_max_1": 190.4737, "stdk": 0.0433, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 31000 }, { "accuracy": 54.8828, "doc_norm": 6.9802, "encoder_q-embeddings": 18276.0566, "encoder_q-layer.0": 12855.1533, "encoder_q-layer.1": 13493.1143, "encoder_q-layer.10": 20641.5176, "encoder_q-layer.11": 40234.2656, "encoder_q-layer.2": 14790.3389, "encoder_q-layer.3": 14708.2822, "encoder_q-layer.4": 15250.2773, "encoder_q-layer.5": 14457.2217, "encoder_q-layer.6": 15346.251, "encoder_q-layer.7": 16176.1562, "encoder_q-layer.8": 16964.9492, "encoder_q-layer.9": 16519.7891, "epoch": 0.3, "inbatch_neg_score": 47.1607, "inbatch_pos_score": 47.6562, "learning_rate": 3.827777777777778e-05, "loss": 2.3717, "norm_diff": 0.0257, "num_tokens_overlap": 5.5884, "num_tokens_union": 55.034, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27472.7528, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9545, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7223, "sent_len_1": 66.8649, "sent_len_max_0": 18.75, "sent_len_max_1": 189.7475, "stdk": 0.0426, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 31100 }, { "accuracy": 56.543, "doc_norm": 6.9759, "encoder_q-embeddings": 18996.1953, "encoder_q-layer.0": 12808.4834, "encoder_q-layer.1": 13082.0684, "encoder_q-layer.10": 20782.623, "encoder_q-layer.11": 40506.8359, "encoder_q-layer.2": 14070.1699, "encoder_q-layer.3": 14331.8154, "encoder_q-layer.4": 14575.7178, "encoder_q-layer.5": 14007.083, "encoder_q-layer.6": 14964.8467, "encoder_q-layer.7": 16073.6406, "encoder_q-layer.8": 17409.957, "encoder_q-layer.9": 16102.4434, "epoch": 0.3, "inbatch_neg_score": 47.0928, "inbatch_pos_score": 47.5938, "learning_rate": 3.8222222222222226e-05, "loss": 2.3754, "norm_diff": 0.0238, "num_tokens_overlap": 5.5848, "num_tokens_union": 55.0881, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27194.5441, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.9521, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7336, "sent_len_1": 66.9534, "sent_len_max_0": 18.7975, "sent_len_max_1": 191.5012, "stdk": 0.0422, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 31200 }, { "accuracy": 57.1289, "doc_norm": 6.9791, "encoder_q-embeddings": 19293.5039, "encoder_q-layer.0": 12683.5645, "encoder_q-layer.1": 13255.7744, "encoder_q-layer.10": 22395.0957, "encoder_q-layer.11": 44095.9375, "encoder_q-layer.2": 14635.582, "encoder_q-layer.3": 14220.873, "encoder_q-layer.4": 14177.2949, "encoder_q-layer.5": 13754.8828, "encoder_q-layer.6": 14944.2559, "encoder_q-layer.7": 15652.1455, "encoder_q-layer.8": 17514.207, "encoder_q-layer.9": 16850.1152, "epoch": 0.31, "inbatch_neg_score": 47.1539, "inbatch_pos_score": 47.6562, "learning_rate": 3.816666666666667e-05, "loss": 2.3401, "norm_diff": 0.0228, "num_tokens_overlap": 5.5944, "num_tokens_union": 55.0965, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28281.6729, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9563, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7696, "sent_len_1": 66.8619, "sent_len_max_0": 18.8625, "sent_len_max_1": 188.9988, "stdk": 0.0425, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 31300 }, { "accuracy": 55.3711, "doc_norm": 6.9773, "encoder_q-embeddings": 19391.6504, "encoder_q-layer.0": 12965.8398, "encoder_q-layer.1": 13208.4795, "encoder_q-layer.10": 25555.4277, "encoder_q-layer.11": 48042.1094, "encoder_q-layer.2": 14298.8779, "encoder_q-layer.3": 14365.4326, "encoder_q-layer.4": 14139.0879, "encoder_q-layer.5": 14285.7461, "encoder_q-layer.6": 15890.3203, "encoder_q-layer.7": 16807.1855, "encoder_q-layer.8": 19361.1875, "encoder_q-layer.9": 18021.3281, "epoch": 0.31, "inbatch_neg_score": 47.0715, "inbatch_pos_score": 47.5938, "learning_rate": 3.811111111111112e-05, "loss": 2.4023, "norm_diff": 0.0289, "num_tokens_overlap": 5.5789, "num_tokens_union": 55.0506, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29591.8015, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9484, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7292, "sent_len_1": 66.7776, "sent_len_max_0": 18.79, "sent_len_max_1": 186.4613, "stdk": 0.0432, "stdq": 0.039, "stdqueue_k": 0.0, "step": 31400 }, { "accuracy": 55.1758, "doc_norm": 6.97, "encoder_q-embeddings": 18540.5645, "encoder_q-layer.0": 12644.208, "encoder_q-layer.1": 12633.7852, "encoder_q-layer.10": 22764.5156, "encoder_q-layer.11": 42057.6953, "encoder_q-layer.2": 13600.2891, "encoder_q-layer.3": 13687.7861, "encoder_q-layer.4": 13717.8965, "encoder_q-layer.5": 13409.7666, "encoder_q-layer.6": 14995.5518, "encoder_q-layer.7": 16610.8965, "encoder_q-layer.8": 18990.0312, "encoder_q-layer.9": 17158.748, "epoch": 0.31, "inbatch_neg_score": 47.0351, "inbatch_pos_score": 47.5312, "learning_rate": 3.805555555555555e-05, "loss": 2.3666, "norm_diff": 0.0271, "num_tokens_overlap": 5.5707, "num_tokens_union": 54.9084, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27558.9458, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9429, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7166, "sent_len_1": 66.6357, "sent_len_max_0": 18.8137, "sent_len_max_1": 188.5737, "stdk": 0.0423, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 31500 }, { "accuracy": 55.3711, "doc_norm": 6.9627, "encoder_q-embeddings": 18374.1504, "encoder_q-layer.0": 12571.1328, "encoder_q-layer.1": 12642.2129, "encoder_q-layer.10": 19837.4824, "encoder_q-layer.11": 39897.5234, "encoder_q-layer.2": 13476.0605, "encoder_q-layer.3": 13530.4678, "encoder_q-layer.4": 13839.1436, "encoder_q-layer.5": 13858.9033, "encoder_q-layer.6": 15175.4668, "encoder_q-layer.7": 15961.5703, "encoder_q-layer.8": 17161.8047, "encoder_q-layer.9": 15736.1602, "epoch": 0.31, "inbatch_neg_score": 46.9467, "inbatch_pos_score": 47.4375, "learning_rate": 3.8e-05, "loss": 2.3614, "norm_diff": 0.0258, "num_tokens_overlap": 5.579, "num_tokens_union": 55.0079, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26759.5716, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.9369, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7182, "sent_len_1": 66.8021, "sent_len_max_0": 18.7175, "sent_len_max_1": 189.9787, "stdk": 0.0422, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 31600 }, { "accuracy": 55.1758, "doc_norm": 6.9695, "encoder_q-embeddings": 18244.9043, "encoder_q-layer.0": 12971.8584, "encoder_q-layer.1": 13635.0859, "encoder_q-layer.10": 20678.7109, "encoder_q-layer.11": 41962.4453, "encoder_q-layer.2": 14334.6816, "encoder_q-layer.3": 14444.8477, "encoder_q-layer.4": 15424.2695, "encoder_q-layer.5": 14621.9482, "encoder_q-layer.6": 15126.6094, "encoder_q-layer.7": 16666.6289, "encoder_q-layer.8": 17605.1895, "encoder_q-layer.9": 16636.5762, "epoch": 0.31, "inbatch_neg_score": 46.9447, "inbatch_pos_score": 47.4375, "learning_rate": 3.7944444444444444e-05, "loss": 2.3398, "norm_diff": 0.0307, "num_tokens_overlap": 5.5885, "num_tokens_union": 54.9662, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27724.7003, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9388, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7366, "sent_len_1": 66.7187, "sent_len_max_0": 18.87, "sent_len_max_1": 189.1687, "stdk": 0.0432, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 31700 }, { "accuracy": 54.9805, "doc_norm": 6.9715, "encoder_q-embeddings": 18727.7871, "encoder_q-layer.0": 12837.3525, "encoder_q-layer.1": 12817.5342, "encoder_q-layer.10": 20462.582, "encoder_q-layer.11": 39591.3945, "encoder_q-layer.2": 13902.1074, "encoder_q-layer.3": 14055.3828, "encoder_q-layer.4": 14075.1621, "encoder_q-layer.5": 13988.2021, "encoder_q-layer.6": 14670.4502, "encoder_q-layer.7": 15370.8164, "encoder_q-layer.8": 17052.627, "encoder_q-layer.9": 15643.25, "epoch": 0.31, "inbatch_neg_score": 47.0143, "inbatch_pos_score": 47.5, "learning_rate": 3.7888888888888894e-05, "loss": 2.3374, "norm_diff": 0.0308, "num_tokens_overlap": 5.5898, "num_tokens_union": 55.0458, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27065.7132, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.9407, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7239, "sent_len_1": 66.823, "sent_len_max_0": 18.82, "sent_len_max_1": 189.7475, "stdk": 0.0416, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 31800 }, { "accuracy": 53.9062, "doc_norm": 6.9661, "encoder_q-embeddings": 18440.2871, "encoder_q-layer.0": 12715.249, "encoder_q-layer.1": 13225.2959, "encoder_q-layer.10": 23929.9434, "encoder_q-layer.11": 42480.7891, "encoder_q-layer.2": 14977.7295, "encoder_q-layer.3": 14312.8359, "encoder_q-layer.4": 14987.7588, "encoder_q-layer.5": 14859.3232, "encoder_q-layer.6": 16229.3799, "encoder_q-layer.7": 16427.2754, "encoder_q-layer.8": 18158.7715, "encoder_q-layer.9": 17645.1152, "epoch": 0.31, "inbatch_neg_score": 46.9385, "inbatch_pos_score": 47.4375, "learning_rate": 3.7833333333333336e-05, "loss": 2.3951, "norm_diff": 0.0279, "num_tokens_overlap": 5.5641, "num_tokens_union": 54.9872, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28346.2575, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9382, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7023, "sent_len_1": 66.6587, "sent_len_max_0": 18.87, "sent_len_max_1": 189.1075, "stdk": 0.0426, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 31900 }, { "accuracy": 59.5703, "doc_norm": 6.9662, "encoder_q-embeddings": 18832.9277, "encoder_q-layer.0": 12383.9883, "encoder_q-layer.1": 12555.627, "encoder_q-layer.10": 31332.4414, "encoder_q-layer.11": 50422.9062, "encoder_q-layer.2": 13614.2549, "encoder_q-layer.3": 13551.3291, "encoder_q-layer.4": 13783.6035, "encoder_q-layer.5": 13597.0342, "encoder_q-layer.6": 15012.8662, "encoder_q-layer.7": 15941.4043, "encoder_q-layer.8": 19840.0781, "encoder_q-layer.9": 19789.0371, "epoch": 0.31, "inbatch_neg_score": 46.9259, "inbatch_pos_score": 47.4375, "learning_rate": 3.777777777777778e-05, "loss": 2.3827, "norm_diff": 0.0281, "num_tokens_overlap": 5.5836, "num_tokens_union": 54.9362, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30398.7194, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9381, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7419, "sent_len_1": 66.6015, "sent_len_max_0": 18.855, "sent_len_max_1": 189.7488, "stdk": 0.0441, "stdq": 0.0399, "stdqueue_k": 0.0, "step": 32000 }, { "accuracy": 55.8594, "doc_norm": 6.9707, "encoder_q-embeddings": 17960.9219, "encoder_q-layer.0": 12320.6582, "encoder_q-layer.1": 12790.6201, "encoder_q-layer.10": 22554.2695, "encoder_q-layer.11": 44279.3945, "encoder_q-layer.2": 14175.0762, "encoder_q-layer.3": 14214.292, "encoder_q-layer.4": 14422.8896, "encoder_q-layer.5": 14212.6289, "encoder_q-layer.6": 14784.8691, "encoder_q-layer.7": 16080.4316, "encoder_q-layer.8": 18757.1309, "encoder_q-layer.9": 17008.4883, "epoch": 0.31, "inbatch_neg_score": 47.0148, "inbatch_pos_score": 47.5, "learning_rate": 3.772222222222223e-05, "loss": 2.358, "norm_diff": 0.0272, "num_tokens_overlap": 5.5743, "num_tokens_union": 55.0062, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27916.2726, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9435, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7053, "sent_len_1": 66.8276, "sent_len_max_0": 18.7225, "sent_len_max_1": 191.1587, "stdk": 0.0427, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 32100 }, { "accuracy": 57.2266, "doc_norm": 6.973, "encoder_q-embeddings": 18678.3047, "encoder_q-layer.0": 12717.8203, "encoder_q-layer.1": 12761.7295, "encoder_q-layer.10": 27419.8066, "encoder_q-layer.11": 46617.6758, "encoder_q-layer.2": 13922.1123, "encoder_q-layer.3": 14531.1592, "encoder_q-layer.4": 14456.2256, "encoder_q-layer.5": 13831.8125, "encoder_q-layer.6": 14887.3252, "encoder_q-layer.7": 15596.2344, "encoder_q-layer.8": 19252.2832, "encoder_q-layer.9": 18551.7988, "epoch": 0.31, "inbatch_neg_score": 47.0352, "inbatch_pos_score": 47.5312, "learning_rate": 3.766666666666667e-05, "loss": 2.3362, "norm_diff": 0.0293, "num_tokens_overlap": 5.5708, "num_tokens_union": 55.1539, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29918.3085, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9437, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7372, "sent_len_1": 66.9704, "sent_len_max_0": 18.8712, "sent_len_max_1": 192.6625, "stdk": 0.0427, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 32200 }, { "accuracy": 56.1523, "doc_norm": 6.9627, "encoder_q-embeddings": 18770.791, "encoder_q-layer.0": 12879.2793, "encoder_q-layer.1": 13544.8369, "encoder_q-layer.10": 20277.2734, "encoder_q-layer.11": 41925.5, "encoder_q-layer.2": 14745.459, "encoder_q-layer.3": 14754.832, "encoder_q-layer.4": 14888.6045, "encoder_q-layer.5": 14708.1387, "encoder_q-layer.6": 15856.2363, "encoder_q-layer.7": 16746.3574, "encoder_q-layer.8": 19205.6699, "encoder_q-layer.9": 16917.4609, "epoch": 0.32, "inbatch_neg_score": 46.9287, "inbatch_pos_score": 47.4062, "learning_rate": 3.761111111111111e-05, "loss": 2.3894, "norm_diff": 0.0249, "num_tokens_overlap": 5.5747, "num_tokens_union": 55.0365, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28270.7489, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9378, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7315, "sent_len_1": 66.7977, "sent_len_max_0": 18.8412, "sent_len_max_1": 189.74, "stdk": 0.0432, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 32300 }, { "accuracy": 52.5391, "doc_norm": 6.9591, "encoder_q-embeddings": 19519.6562, "encoder_q-layer.0": 13095.6934, "encoder_q-layer.1": 13351.3555, "encoder_q-layer.10": 25835.1797, "encoder_q-layer.11": 46005.9492, "encoder_q-layer.2": 14527.9014, "encoder_q-layer.3": 14870.5693, "encoder_q-layer.4": 15116.8652, "encoder_q-layer.5": 15110.5889, "encoder_q-layer.6": 15755.3574, "encoder_q-layer.7": 16548.2656, "encoder_q-layer.8": 19072.375, "encoder_q-layer.9": 17331.4336, "epoch": 0.32, "inbatch_neg_score": 46.9024, "inbatch_pos_score": 47.375, "learning_rate": 3.7555555555555554e-05, "loss": 2.41, "norm_diff": 0.0254, "num_tokens_overlap": 5.5781, "num_tokens_union": 54.966, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29469.7638, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9337, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7333, "sent_len_1": 66.6658, "sent_len_max_0": 18.8675, "sent_len_max_1": 191.4325, "stdk": 0.0421, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 32400 }, { "accuracy": 54.9805, "doc_norm": 6.9613, "encoder_q-embeddings": 18630.5781, "encoder_q-layer.0": 13045.5098, "encoder_q-layer.1": 13370.8164, "encoder_q-layer.10": 24094.9531, "encoder_q-layer.11": 42486.8359, "encoder_q-layer.2": 14701.8281, "encoder_q-layer.3": 14448.834, "encoder_q-layer.4": 14541.6055, "encoder_q-layer.5": 14161.8359, "encoder_q-layer.6": 14981.4932, "encoder_q-layer.7": 15833.1396, "encoder_q-layer.8": 18343.252, "encoder_q-layer.9": 16509.9355, "epoch": 0.32, "inbatch_neg_score": 46.8301, "inbatch_pos_score": 47.3438, "learning_rate": 3.7500000000000003e-05, "loss": 2.403, "norm_diff": 0.0294, "num_tokens_overlap": 5.578, "num_tokens_union": 55.0264, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28413.1158, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9319, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7201, "sent_len_1": 66.8449, "sent_len_max_0": 18.8137, "sent_len_max_1": 191.1475, "stdk": 0.0433, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 32500 }, { "accuracy": 55.2734, "doc_norm": 6.9603, "encoder_q-embeddings": 19273.457, "encoder_q-layer.0": 13197.5996, "encoder_q-layer.1": 13505.4648, "encoder_q-layer.10": 22790.0039, "encoder_q-layer.11": 41860.7734, "encoder_q-layer.2": 14740.3164, "encoder_q-layer.3": 14880.0654, "encoder_q-layer.4": 15154.5361, "encoder_q-layer.5": 14743.4111, "encoder_q-layer.6": 15820.7061, "encoder_q-layer.7": 16230.4111, "encoder_q-layer.8": 18882.6758, "encoder_q-layer.9": 17800.584, "epoch": 0.32, "inbatch_neg_score": 46.849, "inbatch_pos_score": 47.375, "learning_rate": 3.7444444444444446e-05, "loss": 2.3793, "norm_diff": 0.027, "num_tokens_overlap": 5.5653, "num_tokens_union": 54.9318, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28661.4508, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9333, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7099, "sent_len_1": 66.6834, "sent_len_max_0": 18.9013, "sent_len_max_1": 189.9162, "stdk": 0.0427, "stdq": 0.0401, "stdqueue_k": 0.0, "step": 32600 }, { "accuracy": 56.7383, "doc_norm": 6.9581, "encoder_q-embeddings": 17687.3594, "encoder_q-layer.0": 12751.3916, "encoder_q-layer.1": 12698.1963, "encoder_q-layer.10": 21606.0684, "encoder_q-layer.11": 41425.3086, "encoder_q-layer.2": 13637.3467, "encoder_q-layer.3": 13698.5557, "encoder_q-layer.4": 14184.0791, "encoder_q-layer.5": 13990.4805, "encoder_q-layer.6": 15200.7773, "encoder_q-layer.7": 16311.458, "encoder_q-layer.8": 18119.2246, "encoder_q-layer.9": 16798.9355, "epoch": 0.32, "inbatch_neg_score": 46.8226, "inbatch_pos_score": 47.3125, "learning_rate": 3.738888888888889e-05, "loss": 2.4027, "norm_diff": 0.029, "num_tokens_overlap": 5.5688, "num_tokens_union": 54.9227, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27461.7227, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9291, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7071, "sent_len_1": 66.5876, "sent_len_max_0": 18.7275, "sent_len_max_1": 188.8162, "stdk": 0.0422, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 32700 }, { "accuracy": 55.3711, "doc_norm": 6.9589, "encoder_q-embeddings": 17851.8359, "encoder_q-layer.0": 12489.46, "encoder_q-layer.1": 12942.4512, "encoder_q-layer.10": 20875.5039, "encoder_q-layer.11": 43258.6172, "encoder_q-layer.2": 14189.7266, "encoder_q-layer.3": 14297.2627, "encoder_q-layer.4": 14255.4277, "encoder_q-layer.5": 14370.1826, "encoder_q-layer.6": 15463.125, "encoder_q-layer.7": 16239.3369, "encoder_q-layer.8": 17965.7891, "encoder_q-layer.9": 16371.4297, "epoch": 0.32, "inbatch_neg_score": 46.8339, "inbatch_pos_score": 47.3438, "learning_rate": 3.733333333333334e-05, "loss": 2.3524, "norm_diff": 0.0285, "num_tokens_overlap": 5.5658, "num_tokens_union": 55.057, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27808.5802, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9305, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7211, "sent_len_1": 66.84, "sent_len_max_0": 18.8375, "sent_len_max_1": 187.9913, "stdk": 0.0422, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 32800 }, { "accuracy": 56.4453, "doc_norm": 6.9554, "encoder_q-embeddings": 17527.3223, "encoder_q-layer.0": 12135.6445, "encoder_q-layer.1": 12330.1992, "encoder_q-layer.10": 22852.9355, "encoder_q-layer.11": 41769.6094, "encoder_q-layer.2": 13711.1748, "encoder_q-layer.3": 13516.9131, "encoder_q-layer.4": 14279.3457, "encoder_q-layer.5": 14216.6777, "encoder_q-layer.6": 14942.4072, "encoder_q-layer.7": 15360.5703, "encoder_q-layer.8": 18963.041, "encoder_q-layer.9": 17705.3105, "epoch": 0.32, "inbatch_neg_score": 46.8226, "inbatch_pos_score": 47.3125, "learning_rate": 3.727777777777778e-05, "loss": 2.3777, "norm_diff": 0.0276, "num_tokens_overlap": 5.5876, "num_tokens_union": 55.0727, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27405.5936, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9278, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7093, "sent_len_1": 66.9504, "sent_len_max_0": 18.8875, "sent_len_max_1": 190.7388, "stdk": 0.0423, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 32900 }, { "accuracy": 56.3477, "doc_norm": 6.9522, "encoder_q-embeddings": 18708.1309, "encoder_q-layer.0": 12611.1016, "encoder_q-layer.1": 12870.4141, "encoder_q-layer.10": 21867.3613, "encoder_q-layer.11": 42054.2812, "encoder_q-layer.2": 13998.501, "encoder_q-layer.3": 14102.5508, "encoder_q-layer.4": 14540.0703, "encoder_q-layer.5": 14344.5488, "encoder_q-layer.6": 14716.124, "encoder_q-layer.7": 15611.6738, "encoder_q-layer.8": 17424.2715, "encoder_q-layer.9": 15956.6797, "epoch": 0.32, "inbatch_neg_score": 46.7747, "inbatch_pos_score": 47.2812, "learning_rate": 3.722222222222222e-05, "loss": 2.3505, "norm_diff": 0.0248, "num_tokens_overlap": 5.5756, "num_tokens_union": 55.1157, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27622.249, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9273, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.687, "sent_len_1": 67.0139, "sent_len_max_0": 18.815, "sent_len_max_1": 190.3487, "stdk": 0.0428, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 33000 }, { "accuracy": 55.1758, "doc_norm": 6.9487, "encoder_q-embeddings": 17866.6758, "encoder_q-layer.0": 12965.3418, "encoder_q-layer.1": 12986.1816, "encoder_q-layer.10": 22038.2949, "encoder_q-layer.11": 40995.7656, "encoder_q-layer.2": 13659.1182, "encoder_q-layer.3": 13963.2246, "encoder_q-layer.4": 14078.7939, "encoder_q-layer.5": 13489.1631, "encoder_q-layer.6": 14432.2529, "encoder_q-layer.7": 15307.252, "encoder_q-layer.8": 17274.4961, "encoder_q-layer.9": 16257.3252, "epoch": 0.32, "inbatch_neg_score": 46.7062, "inbatch_pos_score": 47.1875, "learning_rate": 3.7166666666666664e-05, "loss": 2.3456, "norm_diff": 0.0292, "num_tokens_overlap": 5.5813, "num_tokens_union": 55.0238, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27340.8329, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9196, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.707, "sent_len_1": 66.8023, "sent_len_max_0": 18.8938, "sent_len_max_1": 188.2688, "stdk": 0.0428, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 33100 }, { "accuracy": 58.3984, "doc_norm": 6.9507, "encoder_q-embeddings": 17278.252, "encoder_q-layer.0": 11950.0586, "encoder_q-layer.1": 12206.3965, "encoder_q-layer.10": 21582.3281, "encoder_q-layer.11": 45057.6016, "encoder_q-layer.2": 13224.1807, "encoder_q-layer.3": 13273.6943, "encoder_q-layer.4": 14209.7725, "encoder_q-layer.5": 13545.8408, "encoder_q-layer.6": 14177.3955, "encoder_q-layer.7": 15148.7588, "encoder_q-layer.8": 17851.752, "encoder_q-layer.9": 17334.1621, "epoch": 0.32, "inbatch_neg_score": 46.6938, "inbatch_pos_score": 47.2188, "learning_rate": 3.7111111111111113e-05, "loss": 2.3632, "norm_diff": 0.0289, "num_tokens_overlap": 5.5842, "num_tokens_union": 55.1441, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27492.393, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9218, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7281, "sent_len_1": 66.9533, "sent_len_max_0": 18.8275, "sent_len_max_1": 190.3475, "stdk": 0.0435, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 33200 }, { "accuracy": 56.7383, "doc_norm": 6.9442, "encoder_q-embeddings": 17277.252, "encoder_q-layer.0": 12234.9775, "encoder_q-layer.1": 12398.3711, "encoder_q-layer.10": 22093.4121, "encoder_q-layer.11": 41117.8242, "encoder_q-layer.2": 13950.9033, "encoder_q-layer.3": 13894.0947, "encoder_q-layer.4": 14152.8418, "encoder_q-layer.5": 13540.6953, "encoder_q-layer.6": 15193.1914, "encoder_q-layer.7": 15486.96, "encoder_q-layer.8": 16543.6465, "encoder_q-layer.9": 15285.1729, "epoch": 0.33, "inbatch_neg_score": 46.6451, "inbatch_pos_score": 47.1562, "learning_rate": 3.705555555555556e-05, "loss": 2.4258, "norm_diff": 0.0263, "num_tokens_overlap": 5.5716, "num_tokens_union": 55.1279, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26725.0546, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.918, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7168, "sent_len_1": 66.9528, "sent_len_max_0": 18.9187, "sent_len_max_1": 191.0813, "stdk": 0.0431, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 33300 }, { "accuracy": 54.5898, "doc_norm": 6.9485, "encoder_q-embeddings": 20708.8398, "encoder_q-layer.0": 13931.1113, "encoder_q-layer.1": 14302.2529, "encoder_q-layer.10": 21234.5527, "encoder_q-layer.11": 43066.3711, "encoder_q-layer.2": 15359.4287, "encoder_q-layer.3": 15134.2725, "encoder_q-layer.4": 14949.8428, "encoder_q-layer.5": 14424.1123, "encoder_q-layer.6": 15694.042, "encoder_q-layer.7": 15748.1738, "encoder_q-layer.8": 17606.7598, "encoder_q-layer.9": 16265.8848, "epoch": 0.33, "inbatch_neg_score": 46.6599, "inbatch_pos_score": 47.1562, "learning_rate": 3.7e-05, "loss": 2.3347, "norm_diff": 0.0296, "num_tokens_overlap": 5.5803, "num_tokens_union": 55.0652, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28681.7979, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9189, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7267, "sent_len_1": 66.9085, "sent_len_max_0": 19.015, "sent_len_max_1": 189.655, "stdk": 0.0441, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 33400 }, { "accuracy": 53.3203, "doc_norm": 6.9465, "encoder_q-embeddings": 18944.8145, "encoder_q-layer.0": 13128.3252, "encoder_q-layer.1": 13663.5, "encoder_q-layer.10": 22281.3516, "encoder_q-layer.11": 40000.2891, "encoder_q-layer.2": 14504.2471, "encoder_q-layer.3": 14128.332, "encoder_q-layer.4": 14595.3252, "encoder_q-layer.5": 14367.8223, "encoder_q-layer.6": 15087.0391, "encoder_q-layer.7": 17060.5566, "encoder_q-layer.8": 20083.3242, "encoder_q-layer.9": 17269.5215, "epoch": 0.33, "inbatch_neg_score": 46.6665, "inbatch_pos_score": 47.1562, "learning_rate": 3.694444444444445e-05, "loss": 2.3909, "norm_diff": 0.0292, "num_tokens_overlap": 5.5772, "num_tokens_union": 54.9338, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27861.6054, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9174, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7261, "sent_len_1": 66.6483, "sent_len_max_0": 18.83, "sent_len_max_1": 190.8075, "stdk": 0.0438, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 33500 }, { "accuracy": 51.6602, "doc_norm": 6.9398, "encoder_q-embeddings": 18463.5234, "encoder_q-layer.0": 12452.3779, "encoder_q-layer.1": 13206.7891, "encoder_q-layer.10": 19910.248, "encoder_q-layer.11": 38532.4727, "encoder_q-layer.2": 14728.6055, "encoder_q-layer.3": 14568.4004, "encoder_q-layer.4": 14297.2969, "encoder_q-layer.5": 13726.209, "encoder_q-layer.6": 15125.5996, "encoder_q-layer.7": 15594.4932, "encoder_q-layer.8": 17344.9023, "encoder_q-layer.9": 15924.2539, "epoch": 0.33, "inbatch_neg_score": 46.6474, "inbatch_pos_score": 47.125, "learning_rate": 3.688888888888889e-05, "loss": 2.3679, "norm_diff": 0.0254, "num_tokens_overlap": 5.5805, "num_tokens_union": 54.9914, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26982.5884, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.9144, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7236, "sent_len_1": 66.7208, "sent_len_max_0": 18.8775, "sent_len_max_1": 189.8125, "stdk": 0.0405, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 33600 }, { "accuracy": 53.3203, "doc_norm": 6.9448, "encoder_q-embeddings": 19200.3828, "encoder_q-layer.0": 13218.4043, "encoder_q-layer.1": 13761.2021, "encoder_q-layer.10": 22916.9355, "encoder_q-layer.11": 41452.7812, "encoder_q-layer.2": 14606.8926, "encoder_q-layer.3": 14591.5049, "encoder_q-layer.4": 14940.5391, "encoder_q-layer.5": 14389.1113, "encoder_q-layer.6": 16190.7373, "encoder_q-layer.7": 17717.3203, "encoder_q-layer.8": 18676.5, "encoder_q-layer.9": 17782.9395, "epoch": 0.33, "inbatch_neg_score": 46.6637, "inbatch_pos_score": 47.1562, "learning_rate": 3.683333333333334e-05, "loss": 2.3638, "norm_diff": 0.0282, "num_tokens_overlap": 5.5825, "num_tokens_union": 55.0133, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28601.6723, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9166, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7255, "sent_len_1": 66.8588, "sent_len_max_0": 18.9325, "sent_len_max_1": 190.5175, "stdk": 0.0428, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 33700 }, { "accuracy": 55.2734, "doc_norm": 6.9472, "encoder_q-embeddings": 18620.334, "encoder_q-layer.0": 13206.543, "encoder_q-layer.1": 13275.7812, "encoder_q-layer.10": 22146.7695, "encoder_q-layer.11": 40909.6562, "encoder_q-layer.2": 14867.8477, "encoder_q-layer.3": 15119.791, "encoder_q-layer.4": 14947.4033, "encoder_q-layer.5": 14613.7803, "encoder_q-layer.6": 16226.502, "encoder_q-layer.7": 16668.3086, "encoder_q-layer.8": 18197.0996, "encoder_q-layer.9": 16970.5117, "epoch": 0.33, "inbatch_neg_score": 46.6523, "inbatch_pos_score": 47.1562, "learning_rate": 3.677777777777778e-05, "loss": 2.3738, "norm_diff": 0.0278, "num_tokens_overlap": 5.5622, "num_tokens_union": 54.9644, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28250.1788, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9194, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6939, "sent_len_1": 66.7489, "sent_len_max_0": 18.8888, "sent_len_max_1": 190.9563, "stdk": 0.0427, "stdq": 0.0398, "stdqueue_k": 0.0, "step": 33800 }, { "accuracy": 54.0039, "doc_norm": 6.9409, "encoder_q-embeddings": 18424.3379, "encoder_q-layer.0": 13315.1055, "encoder_q-layer.1": 13393.6348, "encoder_q-layer.10": 19444.2656, "encoder_q-layer.11": 41702.8594, "encoder_q-layer.2": 13968.6689, "encoder_q-layer.3": 13785.7275, "encoder_q-layer.4": 14523.8379, "encoder_q-layer.5": 13785.374, "encoder_q-layer.6": 14839.543, "encoder_q-layer.7": 15536.418, "encoder_q-layer.8": 17199.8125, "encoder_q-layer.9": 15817.7656, "epoch": 0.33, "inbatch_neg_score": 46.6021, "inbatch_pos_score": 47.0938, "learning_rate": 3.672222222222222e-05, "loss": 2.3505, "norm_diff": 0.0272, "num_tokens_overlap": 5.5797, "num_tokens_union": 54.8806, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27473.4007, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9137, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7184, "sent_len_1": 66.5499, "sent_len_max_0": 18.8388, "sent_len_max_1": 187.8562, "stdk": 0.0418, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 33900 }, { "accuracy": 53.0273, "doc_norm": 6.9387, "encoder_q-embeddings": 20046.752, "encoder_q-layer.0": 13837.124, "encoder_q-layer.1": 14235.7129, "encoder_q-layer.10": 24190.9512, "encoder_q-layer.11": 41640.668, "encoder_q-layer.2": 15314.1953, "encoder_q-layer.3": 14943.1562, "encoder_q-layer.4": 15023.6162, "encoder_q-layer.5": 14841.9209, "encoder_q-layer.6": 15718.2705, "encoder_q-layer.7": 16678.4141, "encoder_q-layer.8": 18151.1836, "encoder_q-layer.9": 17294.2793, "epoch": 0.33, "inbatch_neg_score": 46.5109, "inbatch_pos_score": 47.0, "learning_rate": 3.6666666666666666e-05, "loss": 2.3492, "norm_diff": 0.0301, "num_tokens_overlap": 5.5863, "num_tokens_union": 54.9698, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29355.5945, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9086, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7337, "sent_len_1": 66.6585, "sent_len_max_0": 18.8412, "sent_len_max_1": 188.3787, "stdk": 0.0427, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 34000 }, { "accuracy": 54.1992, "doc_norm": 6.9372, "encoder_q-embeddings": 17786.7715, "encoder_q-layer.0": 12422.5547, "encoder_q-layer.1": 12759.957, "encoder_q-layer.10": 23722.5449, "encoder_q-layer.11": 44242.0195, "encoder_q-layer.2": 13820.0439, "encoder_q-layer.3": 13420.0586, "encoder_q-layer.4": 13884.8643, "encoder_q-layer.5": 13861.1318, "encoder_q-layer.6": 14556.7588, "encoder_q-layer.7": 16373.959, "encoder_q-layer.8": 17935.7207, "encoder_q-layer.9": 16773.2676, "epoch": 0.33, "inbatch_neg_score": 46.5542, "inbatch_pos_score": 47.0625, "learning_rate": 3.6611111111111115e-05, "loss": 2.3262, "norm_diff": 0.0288, "num_tokens_overlap": 5.5747, "num_tokens_union": 54.9189, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28070.0093, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9085, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7258, "sent_len_1": 66.6994, "sent_len_max_0": 18.7725, "sent_len_max_1": 191.205, "stdk": 0.0433, "stdq": 0.039, "stdqueue_k": 0.0, "step": 34100 }, { "accuracy": 54.4922, "doc_norm": 6.9357, "encoder_q-embeddings": 19588.0332, "encoder_q-layer.0": 13632.4551, "encoder_q-layer.1": 13540.5049, "encoder_q-layer.10": 24723.8711, "encoder_q-layer.11": 39821.2617, "encoder_q-layer.2": 14642.6533, "encoder_q-layer.3": 14631.7852, "encoder_q-layer.4": 14721.6934, "encoder_q-layer.5": 14337.5742, "encoder_q-layer.6": 15394.6035, "encoder_q-layer.7": 15784.5127, "encoder_q-layer.8": 17414.8105, "encoder_q-layer.9": 17078.123, "epoch": 0.33, "inbatch_neg_score": 46.4907, "inbatch_pos_score": 47.0, "learning_rate": 3.655555555555556e-05, "loss": 2.4155, "norm_diff": 0.0296, "num_tokens_overlap": 5.5693, "num_tokens_union": 54.8889, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28092.5305, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9061, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6989, "sent_len_1": 66.5903, "sent_len_max_0": 19.0113, "sent_len_max_1": 188.7775, "stdk": 0.0426, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 34200 }, { "accuracy": 58.7891, "doc_norm": 6.9325, "encoder_q-embeddings": 18130.2168, "encoder_q-layer.0": 12506.1523, "encoder_q-layer.1": 12668.2871, "encoder_q-layer.10": 22167.3066, "encoder_q-layer.11": 47126.7812, "encoder_q-layer.2": 14000.8438, "encoder_q-layer.3": 13859.8477, "encoder_q-layer.4": 14199.2646, "encoder_q-layer.5": 13679.1426, "encoder_q-layer.6": 14501.6699, "encoder_q-layer.7": 15691.5137, "encoder_q-layer.8": 17652.9629, "encoder_q-layer.9": 17215.3516, "epoch": 0.33, "inbatch_neg_score": 46.454, "inbatch_pos_score": 46.9688, "learning_rate": 3.65e-05, "loss": 2.3616, "norm_diff": 0.0295, "num_tokens_overlap": 5.5822, "num_tokens_union": 54.9565, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28300.712, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.903, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7259, "sent_len_1": 66.7628, "sent_len_max_0": 18.845, "sent_len_max_1": 190.725, "stdk": 0.0426, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 34300 }, { "accuracy": 54.8828, "doc_norm": 6.9298, "encoder_q-embeddings": 18584.6289, "encoder_q-layer.0": 12734.2891, "encoder_q-layer.1": 12848.5557, "encoder_q-layer.10": 20741.5078, "encoder_q-layer.11": 39505.2695, "encoder_q-layer.2": 13925.2539, "encoder_q-layer.3": 14154.7969, "encoder_q-layer.4": 14382.4717, "encoder_q-layer.5": 13724.0371, "encoder_q-layer.6": 14438.8535, "encoder_q-layer.7": 16150.5576, "encoder_q-layer.8": 18208.7871, "encoder_q-layer.9": 16762.1855, "epoch": 0.34, "inbatch_neg_score": 46.4241, "inbatch_pos_score": 46.9375, "learning_rate": 3.644444444444445e-05, "loss": 2.3647, "norm_diff": 0.0278, "num_tokens_overlap": 5.557, "num_tokens_union": 54.8978, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27143.7427, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.902, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.696, "sent_len_1": 66.6125, "sent_len_max_0": 18.8463, "sent_len_max_1": 189.8738, "stdk": 0.0431, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 34400 }, { "accuracy": 54.6875, "doc_norm": 6.9262, "encoder_q-embeddings": 19243.123, "encoder_q-layer.0": 12978.0049, "encoder_q-layer.1": 13425.3789, "encoder_q-layer.10": 25338.3281, "encoder_q-layer.11": 45620.0078, "encoder_q-layer.2": 14605.2246, "encoder_q-layer.3": 14946.9639, "encoder_q-layer.4": 15557.998, "encoder_q-layer.5": 15000.7959, "encoder_q-layer.6": 15701.9238, "encoder_q-layer.7": 16184.541, "encoder_q-layer.8": 18038.0176, "encoder_q-layer.9": 17743.1875, "epoch": 0.34, "inbatch_neg_score": 46.3931, "inbatch_pos_score": 46.875, "learning_rate": 3.638888888888889e-05, "loss": 2.3368, "norm_diff": 0.0253, "num_tokens_overlap": 5.5932, "num_tokens_union": 55.0998, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29871.1954, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.9008, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7367, "sent_len_1": 66.935, "sent_len_max_0": 18.8888, "sent_len_max_1": 189.1975, "stdk": 0.0424, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 34500 }, { "accuracy": 55.2734, "doc_norm": 6.9248, "encoder_q-embeddings": 18568.4766, "encoder_q-layer.0": 12857.3408, "encoder_q-layer.1": 13871.3643, "encoder_q-layer.10": 22288.8965, "encoder_q-layer.11": 46599.3633, "encoder_q-layer.2": 14608.2578, "encoder_q-layer.3": 14513.6045, "encoder_q-layer.4": 14842.4316, "encoder_q-layer.5": 14187.1016, "encoder_q-layer.6": 15604.0811, "encoder_q-layer.7": 15831.9375, "encoder_q-layer.8": 17815.2324, "encoder_q-layer.9": 16828.4258, "epoch": 0.34, "inbatch_neg_score": 46.3902, "inbatch_pos_score": 46.875, "learning_rate": 3.633333333333333e-05, "loss": 2.3691, "norm_diff": 0.0271, "num_tokens_overlap": 5.5832, "num_tokens_union": 55.213, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29022.4667, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8977, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.72, "sent_len_1": 67.0649, "sent_len_max_0": 18.8375, "sent_len_max_1": 190.1738, "stdk": 0.0431, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 34600 }, { "accuracy": 54.4922, "doc_norm": 6.9275, "encoder_q-embeddings": 18645.5664, "encoder_q-layer.0": 12914.5859, "encoder_q-layer.1": 13203.1611, "encoder_q-layer.10": 26376.1445, "encoder_q-layer.11": 43228.8281, "encoder_q-layer.2": 14384.9629, "encoder_q-layer.3": 14667.4746, "encoder_q-layer.4": 14609.3877, "encoder_q-layer.5": 14392.0771, "encoder_q-layer.6": 16104.8174, "encoder_q-layer.7": 16826.1133, "encoder_q-layer.8": 19657.3613, "encoder_q-layer.9": 17752.0391, "epoch": 0.34, "inbatch_neg_score": 46.3714, "inbatch_pos_score": 46.875, "learning_rate": 3.6277777777777776e-05, "loss": 2.3879, "norm_diff": 0.0291, "num_tokens_overlap": 5.5732, "num_tokens_union": 54.9766, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29079.4934, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8984, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7167, "sent_len_1": 66.7467, "sent_len_max_0": 18.86, "sent_len_max_1": 189.7488, "stdk": 0.0438, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 34700 }, { "accuracy": 56.6406, "doc_norm": 6.9193, "encoder_q-embeddings": 18964.5996, "encoder_q-layer.0": 13323.2861, "encoder_q-layer.1": 13183.5703, "encoder_q-layer.10": 20688.7793, "encoder_q-layer.11": 38973.6016, "encoder_q-layer.2": 14289.0996, "encoder_q-layer.3": 13970.7148, "encoder_q-layer.4": 14440.5459, "encoder_q-layer.5": 13762.9277, "encoder_q-layer.6": 14503.9404, "encoder_q-layer.7": 14976.4873, "encoder_q-layer.8": 16786.7559, "encoder_q-layer.9": 16545.4297, "epoch": 0.34, "inbatch_neg_score": 46.3021, "inbatch_pos_score": 46.8125, "learning_rate": 3.6222222222222225e-05, "loss": 2.3249, "norm_diff": 0.025, "num_tokens_overlap": 5.5722, "num_tokens_union": 54.9528, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27036.6138, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.8943, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7144, "sent_len_1": 66.7102, "sent_len_max_0": 18.91, "sent_len_max_1": 188.6875, "stdk": 0.0429, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 34800 }, { "accuracy": 55.7617, "doc_norm": 6.9223, "encoder_q-embeddings": 18873.7852, "encoder_q-layer.0": 13000.1836, "encoder_q-layer.1": 13536.1465, "encoder_q-layer.10": 20788.7695, "encoder_q-layer.11": 39389.5547, "encoder_q-layer.2": 14403.6406, "encoder_q-layer.3": 14352.9238, "encoder_q-layer.4": 14441.8799, "encoder_q-layer.5": 13932.0137, "encoder_q-layer.6": 15195.3291, "encoder_q-layer.7": 15621.0781, "encoder_q-layer.8": 17202.9414, "encoder_q-layer.9": 16800.4102, "epoch": 0.34, "inbatch_neg_score": 46.29, "inbatch_pos_score": 46.8125, "learning_rate": 3.6166666666666674e-05, "loss": 2.3468, "norm_diff": 0.0296, "num_tokens_overlap": 5.5818, "num_tokens_union": 54.9762, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27414.2064, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8927, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7495, "sent_len_1": 66.6615, "sent_len_max_0": 18.795, "sent_len_max_1": 191.6975, "stdk": 0.0434, "stdq": 0.039, "stdqueue_k": 0.0, "step": 34900 }, { "accuracy": 52.0508, "doc_norm": 6.9219, "encoder_q-embeddings": 18272.1895, "encoder_q-layer.0": 12494.9619, "encoder_q-layer.1": 13313.4404, "encoder_q-layer.10": 24348.7734, "encoder_q-layer.11": 44972.1641, "encoder_q-layer.2": 14627.5977, "encoder_q-layer.3": 15011.6729, "encoder_q-layer.4": 14989.0068, "encoder_q-layer.5": 14331.8848, "encoder_q-layer.6": 15461.8711, "encoder_q-layer.7": 16306.6914, "encoder_q-layer.8": 18974.2852, "encoder_q-layer.9": 18064.0547, "epoch": 0.34, "inbatch_neg_score": 46.2632, "inbatch_pos_score": 46.75, "learning_rate": 3.611111111111111e-05, "loss": 2.3646, "norm_diff": 0.0329, "num_tokens_overlap": 5.5864, "num_tokens_union": 55.0167, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28613.4922, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.889, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7212, "sent_len_1": 66.7784, "sent_len_max_0": 18.765, "sent_len_max_1": 191.2012, "stdk": 0.0434, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 35000 }, { "accuracy": 56.9336, "doc_norm": 6.9184, "encoder_q-embeddings": 17390.8047, "encoder_q-layer.0": 12081.8105, "encoder_q-layer.1": 12604.1309, "encoder_q-layer.10": 21890.1387, "encoder_q-layer.11": 42604.0039, "encoder_q-layer.2": 13538.3057, "encoder_q-layer.3": 13931.5049, "encoder_q-layer.4": 14419.5166, "encoder_q-layer.5": 13940.4922, "encoder_q-layer.6": 15121.6406, "encoder_q-layer.7": 15596.249, "encoder_q-layer.8": 17787.0996, "encoder_q-layer.9": 16390.2539, "epoch": 0.34, "inbatch_neg_score": 46.3251, "inbatch_pos_score": 46.8125, "learning_rate": 3.605555555555556e-05, "loss": 2.3485, "norm_diff": 0.0264, "num_tokens_overlap": 5.5826, "num_tokens_union": 55.018, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27075.3799, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.892, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7119, "sent_len_1": 66.8556, "sent_len_max_0": 18.91, "sent_len_max_1": 190.1662, "stdk": 0.0422, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 35100 }, { "accuracy": 55.4688, "doc_norm": 6.9252, "encoder_q-embeddings": 19550.5176, "encoder_q-layer.0": 13633.7734, "encoder_q-layer.1": 13995.1357, "encoder_q-layer.10": 24186.1289, "encoder_q-layer.11": 46928.1406, "encoder_q-layer.2": 15027.5273, "encoder_q-layer.3": 15115.0029, "encoder_q-layer.4": 15335.9746, "encoder_q-layer.5": 14773.5391, "encoder_q-layer.6": 15871.7295, "encoder_q-layer.7": 18468.9473, "encoder_q-layer.8": 20308.7676, "encoder_q-layer.9": 18798.6016, "epoch": 0.34, "inbatch_neg_score": 46.3486, "inbatch_pos_score": 46.8438, "learning_rate": 3.6e-05, "loss": 2.3686, "norm_diff": 0.0286, "num_tokens_overlap": 5.5715, "num_tokens_union": 55.0562, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30176.8091, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8966, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7048, "sent_len_1": 66.8055, "sent_len_max_0": 18.8287, "sent_len_max_1": 188.5613, "stdk": 0.044, "stdq": 0.0401, "stdqueue_k": 0.0, "step": 35200 }, { "accuracy": 54.4922, "doc_norm": 6.9184, "encoder_q-embeddings": 17904.4395, "encoder_q-layer.0": 12710.0957, "encoder_q-layer.1": 13400.0566, "encoder_q-layer.10": 20513.0586, "encoder_q-layer.11": 38746.4883, "encoder_q-layer.2": 14751.918, "encoder_q-layer.3": 14790.4629, "encoder_q-layer.4": 14839.3125, "encoder_q-layer.5": 14191.4463, "encoder_q-layer.6": 14756.4316, "encoder_q-layer.7": 15602.2578, "encoder_q-layer.8": 17320.5527, "encoder_q-layer.9": 16090.4551, "epoch": 0.34, "inbatch_neg_score": 46.2855, "inbatch_pos_score": 46.75, "learning_rate": 3.594444444444445e-05, "loss": 2.3875, "norm_diff": 0.028, "num_tokens_overlap": 5.5842, "num_tokens_union": 55.0816, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27096.6586, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.8903, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7095, "sent_len_1": 66.8292, "sent_len_max_0": 18.815, "sent_len_max_1": 188.2163, "stdk": 0.0426, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 35300 }, { "accuracy": 55.3711, "doc_norm": 6.9164, "encoder_q-embeddings": 17495.998, "encoder_q-layer.0": 12614.3809, "encoder_q-layer.1": 13086.4766, "encoder_q-layer.10": 20266.1836, "encoder_q-layer.11": 39191.6602, "encoder_q-layer.2": 13789.4209, "encoder_q-layer.3": 13906.9893, "encoder_q-layer.4": 14138.4707, "encoder_q-layer.5": 13648.8584, "encoder_q-layer.6": 14645.8438, "encoder_q-layer.7": 15499.7695, "encoder_q-layer.8": 17443.8164, "encoder_q-layer.9": 16604.7227, "epoch": 0.35, "inbatch_neg_score": 46.2741, "inbatch_pos_score": 46.7812, "learning_rate": 3.5888888888888886e-05, "loss": 2.3741, "norm_diff": 0.0249, "num_tokens_overlap": 5.5843, "num_tokens_union": 55.0208, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26572.4382, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.8915, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.746, "sent_len_1": 66.7466, "sent_len_max_0": 18.9812, "sent_len_max_1": 190.7075, "stdk": 0.0426, "stdq": 0.0398, "stdqueue_k": 0.0, "step": 35400 }, { "accuracy": 54.6875, "doc_norm": 6.9162, "encoder_q-embeddings": 18329.082, "encoder_q-layer.0": 12719.3994, "encoder_q-layer.1": 13294.418, "encoder_q-layer.10": 23351.0977, "encoder_q-layer.11": 45052.2422, "encoder_q-layer.2": 14245.543, "encoder_q-layer.3": 14460.5947, "encoder_q-layer.4": 14988.2783, "encoder_q-layer.5": 14417.1191, "encoder_q-layer.6": 15169.751, "encoder_q-layer.7": 17054.6562, "encoder_q-layer.8": 18703.5469, "encoder_q-layer.9": 17744.1074, "epoch": 0.35, "inbatch_neg_score": 46.2212, "inbatch_pos_score": 46.7188, "learning_rate": 3.5833333333333335e-05, "loss": 2.3758, "norm_diff": 0.0288, "num_tokens_overlap": 5.5819, "num_tokens_union": 55.0053, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28460.3973, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8873, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7428, "sent_len_1": 66.7999, "sent_len_max_0": 18.7987, "sent_len_max_1": 190.9075, "stdk": 0.0431, "stdq": 0.0398, "stdqueue_k": 0.0, "step": 35500 }, { "accuracy": 55.1758, "doc_norm": 6.9134, "encoder_q-embeddings": 18802.7871, "encoder_q-layer.0": 12950.209, "encoder_q-layer.1": 13213.6289, "encoder_q-layer.10": 25497.8828, "encoder_q-layer.11": 47255.4062, "encoder_q-layer.2": 14075.5186, "encoder_q-layer.3": 14813.4434, "encoder_q-layer.4": 14914.1465, "encoder_q-layer.5": 14653.2314, "encoder_q-layer.6": 15250.4922, "encoder_q-layer.7": 17471.1719, "encoder_q-layer.8": 19033.125, "encoder_q-layer.9": 17920.6055, "epoch": 0.35, "inbatch_neg_score": 46.1965, "inbatch_pos_score": 46.7188, "learning_rate": 3.577777777777778e-05, "loss": 2.388, "norm_diff": 0.0262, "num_tokens_overlap": 5.573, "num_tokens_union": 55.0209, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29408.2364, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8872, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7062, "sent_len_1": 66.7959, "sent_len_max_0": 18.8238, "sent_len_max_1": 189.8812, "stdk": 0.0431, "stdq": 0.0401, "stdqueue_k": 0.0, "step": 35600 }, { "accuracy": 56.6406, "doc_norm": 6.91, "encoder_q-embeddings": 17549.1602, "encoder_q-layer.0": 12305.8076, "encoder_q-layer.1": 12605.54, "encoder_q-layer.10": 29822.5254, "encoder_q-layer.11": 40391.8672, "encoder_q-layer.2": 13566.9004, "encoder_q-layer.3": 13742.7529, "encoder_q-layer.4": 14166.3594, "encoder_q-layer.5": 14374.1367, "encoder_q-layer.6": 15421.6074, "encoder_q-layer.7": 16973.0586, "encoder_q-layer.8": 20524.9258, "encoder_q-layer.9": 19416.7109, "epoch": 0.35, "inbatch_neg_score": 46.1994, "inbatch_pos_score": 46.6875, "learning_rate": 3.5722222222222226e-05, "loss": 2.3179, "norm_diff": 0.0293, "num_tokens_overlap": 5.5926, "num_tokens_union": 54.9922, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28391.6782, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8807, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7529, "sent_len_1": 66.7208, "sent_len_max_0": 18.8137, "sent_len_max_1": 187.31, "stdk": 0.0422, "stdq": 0.0377, "stdqueue_k": 0.0, "step": 35700 }, { "accuracy": 51.4648, "doc_norm": 6.9094, "encoder_q-embeddings": 18767.3125, "encoder_q-layer.0": 12946.5967, "encoder_q-layer.1": 13380.7412, "encoder_q-layer.10": 21304.9453, "encoder_q-layer.11": 40492.4062, "encoder_q-layer.2": 14251.5225, "encoder_q-layer.3": 14657.4023, "encoder_q-layer.4": 15083.3447, "encoder_q-layer.5": 14561.1768, "encoder_q-layer.6": 15238.541, "encoder_q-layer.7": 17194.5742, "encoder_q-layer.8": 18476.4023, "encoder_q-layer.9": 16754.5039, "epoch": 0.35, "inbatch_neg_score": 46.1893, "inbatch_pos_score": 46.6562, "learning_rate": 3.566666666666667e-05, "loss": 2.3983, "norm_diff": 0.0276, "num_tokens_overlap": 5.5889, "num_tokens_union": 55.1436, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27979.1982, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8818, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7192, "sent_len_1": 66.8947, "sent_len_max_0": 18.805, "sent_len_max_1": 187.2713, "stdk": 0.0429, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 35800 }, { "accuracy": 52.1484, "doc_norm": 6.9096, "encoder_q-embeddings": 19632.0391, "encoder_q-layer.0": 13099.6025, "encoder_q-layer.1": 13270.2217, "encoder_q-layer.10": 22140.1582, "encoder_q-layer.11": 39499.4102, "encoder_q-layer.2": 14574.1367, "encoder_q-layer.3": 14715.1729, "encoder_q-layer.4": 14977.4941, "encoder_q-layer.5": 14657.5557, "encoder_q-layer.6": 15459.375, "encoder_q-layer.7": 16847.7676, "encoder_q-layer.8": 20152.1914, "encoder_q-layer.9": 17773.9629, "epoch": 0.35, "inbatch_neg_score": 46.1668, "inbatch_pos_score": 46.6562, "learning_rate": 3.561111111111111e-05, "loss": 2.3157, "norm_diff": 0.0285, "num_tokens_overlap": 5.5877, "num_tokens_union": 54.9636, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28509.4615, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8812, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7149, "sent_len_1": 66.7584, "sent_len_max_0": 18.74, "sent_len_max_1": 188.645, "stdk": 0.0421, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 35900 }, { "accuracy": 54.6875, "doc_norm": 6.9111, "encoder_q-embeddings": 18487.3555, "encoder_q-layer.0": 12731.458, "encoder_q-layer.1": 13376.0557, "encoder_q-layer.10": 24785.457, "encoder_q-layer.11": 45949.7617, "encoder_q-layer.2": 14543.1562, "encoder_q-layer.3": 14711.9971, "encoder_q-layer.4": 15081.3301, "encoder_q-layer.5": 15406.0488, "encoder_q-layer.6": 16230.7832, "encoder_q-layer.7": 16792.7695, "encoder_q-layer.8": 19306.4961, "encoder_q-layer.9": 17981.1875, "epoch": 0.35, "inbatch_neg_score": 46.1324, "inbatch_pos_score": 46.625, "learning_rate": 3.555555555555556e-05, "loss": 2.3485, "norm_diff": 0.0306, "num_tokens_overlap": 5.5802, "num_tokens_union": 54.8639, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29287.4017, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8805, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7091, "sent_len_1": 66.5905, "sent_len_max_0": 18.805, "sent_len_max_1": 189.8125, "stdk": 0.0438, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 36000 }, { "accuracy": 54.7852, "doc_norm": 6.9037, "encoder_q-embeddings": 38083.3906, "encoder_q-layer.0": 25345.1152, "encoder_q-layer.1": 25842.9512, "encoder_q-layer.10": 46716.2188, "encoder_q-layer.11": 85376.8906, "encoder_q-layer.2": 28050.625, "encoder_q-layer.3": 27486.0898, "encoder_q-layer.4": 28716.0137, "encoder_q-layer.5": 27903.5957, "encoder_q-layer.6": 29307.8555, "encoder_q-layer.7": 32579.4727, "encoder_q-layer.8": 34929.6367, "encoder_q-layer.9": 31726.3652, "epoch": 0.35, "inbatch_neg_score": 46.0408, "inbatch_pos_score": 46.5312, "learning_rate": 3.55e-05, "loss": 2.3316, "norm_diff": 0.0304, "num_tokens_overlap": 5.5857, "num_tokens_union": 55.1764, "postclip_grad_norm": 1.0, "preclip_grad_norm": 55451.3733, "preclip_grad_norm_avg": 0.0005, "query_norm": 6.8733, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7632, "sent_len_1": 66.9716, "sent_len_max_0": 18.8813, "sent_len_max_1": 189.0875, "stdk": 0.0436, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 36100 }, { "accuracy": 55.8594, "doc_norm": 6.8992, "encoder_q-embeddings": 18594.4336, "encoder_q-layer.0": 13112.9248, "encoder_q-layer.1": 13409.0801, "encoder_q-layer.10": 22019.2266, "encoder_q-layer.11": 41631.9023, "encoder_q-layer.2": 13796.499, "encoder_q-layer.3": 13690.5801, "encoder_q-layer.4": 14315.5117, "encoder_q-layer.5": 13704.3721, "encoder_q-layer.6": 14779.8535, "encoder_q-layer.7": 16564.2363, "encoder_q-layer.8": 18778.0059, "encoder_q-layer.9": 16704.2188, "epoch": 0.35, "inbatch_neg_score": 46.036, "inbatch_pos_score": 46.5, "learning_rate": 3.5444444444444445e-05, "loss": 2.3314, "norm_diff": 0.028, "num_tokens_overlap": 5.5798, "num_tokens_union": 55.0155, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27361.1701, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8712, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7163, "sent_len_1": 66.7874, "sent_len_max_0": 18.8337, "sent_len_max_1": 188.5575, "stdk": 0.042, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 36200 }, { "accuracy": 57.4219, "doc_norm": 6.9039, "encoder_q-embeddings": 17491.0625, "encoder_q-layer.0": 12563.2822, "encoder_q-layer.1": 12874.54, "encoder_q-layer.10": 21063.1152, "encoder_q-layer.11": 40407.6992, "encoder_q-layer.2": 14149.1533, "encoder_q-layer.3": 14264.4004, "encoder_q-layer.4": 14057.0645, "encoder_q-layer.5": 13650.4971, "encoder_q-layer.6": 14440.2373, "encoder_q-layer.7": 15633.0078, "encoder_q-layer.8": 17350.0195, "encoder_q-layer.9": 16264.2393, "epoch": 0.35, "inbatch_neg_score": 46.0276, "inbatch_pos_score": 46.5625, "learning_rate": 3.538888888888889e-05, "loss": 2.3817, "norm_diff": 0.0298, "num_tokens_overlap": 5.5736, "num_tokens_union": 54.9832, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26664.7193, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.8741, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.701, "sent_len_1": 66.786, "sent_len_max_0": 18.8175, "sent_len_max_1": 190.6012, "stdk": 0.0435, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 36300 }, { "accuracy": 56.1523, "doc_norm": 6.9058, "encoder_q-embeddings": 18269.5684, "encoder_q-layer.0": 12534.7529, "encoder_q-layer.1": 12990.6729, "encoder_q-layer.10": 27891.0996, "encoder_q-layer.11": 45943.2852, "encoder_q-layer.2": 14472.5322, "encoder_q-layer.3": 14606.3213, "encoder_q-layer.4": 15044.3672, "encoder_q-layer.5": 14738.1611, "encoder_q-layer.6": 16068.3682, "encoder_q-layer.7": 17429.0137, "encoder_q-layer.8": 19725.5469, "encoder_q-layer.9": 18995.084, "epoch": 0.36, "inbatch_neg_score": 45.9845, "inbatch_pos_score": 46.5, "learning_rate": 3.5333333333333336e-05, "loss": 2.3367, "norm_diff": 0.0327, "num_tokens_overlap": 5.5855, "num_tokens_union": 55.0722, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29967.5328, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8731, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.709, "sent_len_1": 66.8969, "sent_len_max_0": 18.8912, "sent_len_max_1": 190.1475, "stdk": 0.0438, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 36400 }, { "accuracy": 58.4961, "doc_norm": 6.8951, "encoder_q-embeddings": 17305.8828, "encoder_q-layer.0": 12259.2139, "encoder_q-layer.1": 12700.3135, "encoder_q-layer.10": 19681.4727, "encoder_q-layer.11": 39659.5586, "encoder_q-layer.2": 13723.2178, "encoder_q-layer.3": 13796.4902, "encoder_q-layer.4": 13862.7129, "encoder_q-layer.5": 13811.9912, "encoder_q-layer.6": 14745.2266, "encoder_q-layer.7": 15812.1367, "encoder_q-layer.8": 17174.6699, "encoder_q-layer.9": 16173.4287, "epoch": 0.36, "inbatch_neg_score": 45.9398, "inbatch_pos_score": 46.4688, "learning_rate": 3.527777777777778e-05, "loss": 2.4152, "norm_diff": 0.0265, "num_tokens_overlap": 5.5736, "num_tokens_union": 55.0532, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26567.7729, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.8685, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7354, "sent_len_1": 66.8348, "sent_len_max_0": 18.8463, "sent_len_max_1": 190.3512, "stdk": 0.0433, "stdq": 0.04, "stdqueue_k": 0.0, "step": 36500 }, { "accuracy": 59.082, "doc_norm": 6.8889, "encoder_q-embeddings": 18106.2012, "encoder_q-layer.0": 12289.0859, "encoder_q-layer.1": 12545.3857, "encoder_q-layer.10": 23236.0645, "encoder_q-layer.11": 38926.9922, "encoder_q-layer.2": 13436.4492, "encoder_q-layer.3": 13517.748, "encoder_q-layer.4": 13745.1045, "encoder_q-layer.5": 13378.418, "encoder_q-layer.6": 14267.5693, "encoder_q-layer.7": 15518.3535, "encoder_q-layer.8": 17737.4434, "encoder_q-layer.9": 16233.6758, "epoch": 0.36, "inbatch_neg_score": 45.8591, "inbatch_pos_score": 46.375, "learning_rate": 3.522222222222222e-05, "loss": 2.3596, "norm_diff": 0.0288, "num_tokens_overlap": 5.5871, "num_tokens_union": 55.0136, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26950.2984, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.8601, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7188, "sent_len_1": 66.7798, "sent_len_max_0": 18.8837, "sent_len_max_1": 188.6213, "stdk": 0.0425, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 36600 }, { "accuracy": 54.4922, "doc_norm": 6.8894, "encoder_q-embeddings": 18297.1914, "encoder_q-layer.0": 13008.0537, "encoder_q-layer.1": 13886.7715, "encoder_q-layer.10": 24623.9336, "encoder_q-layer.11": 41400.1914, "encoder_q-layer.2": 14808.3701, "encoder_q-layer.3": 15344.7979, "encoder_q-layer.4": 15408.2949, "encoder_q-layer.5": 14758.7158, "encoder_q-layer.6": 15776.5898, "encoder_q-layer.7": 16116.916, "encoder_q-layer.8": 17901.8242, "encoder_q-layer.9": 17227.0352, "epoch": 0.36, "inbatch_neg_score": 45.8138, "inbatch_pos_score": 46.3125, "learning_rate": 3.516666666666667e-05, "loss": 2.346, "norm_diff": 0.0322, "num_tokens_overlap": 5.5735, "num_tokens_union": 55.0157, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28545.2976, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8572, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7034, "sent_len_1": 66.7561, "sent_len_max_0": 18.9575, "sent_len_max_1": 187.765, "stdk": 0.0439, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 36700 }, { "accuracy": 56.25, "doc_norm": 6.8879, "encoder_q-embeddings": 19096.2363, "encoder_q-layer.0": 13100.1494, "encoder_q-layer.1": 13357.9688, "encoder_q-layer.10": 21187.9902, "encoder_q-layer.11": 39778.4609, "encoder_q-layer.2": 14563.1982, "encoder_q-layer.3": 14689.3066, "encoder_q-layer.4": 14595.3047, "encoder_q-layer.5": 14447.4355, "encoder_q-layer.6": 15242.2559, "encoder_q-layer.7": 15848.707, "encoder_q-layer.8": 17387.8223, "encoder_q-layer.9": 16465.6309, "epoch": 0.36, "inbatch_neg_score": 45.824, "inbatch_pos_score": 46.3438, "learning_rate": 3.511111111111111e-05, "loss": 2.3517, "norm_diff": 0.0308, "num_tokens_overlap": 5.5633, "num_tokens_union": 55.0456, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27291.6225, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8572, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7229, "sent_len_1": 66.7596, "sent_len_max_0": 18.8287, "sent_len_max_1": 186.5725, "stdk": 0.0431, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 36800 }, { "accuracy": 57.0312, "doc_norm": 6.8903, "encoder_q-embeddings": 18768.6016, "encoder_q-layer.0": 12823.0869, "encoder_q-layer.1": 13126.8281, "encoder_q-layer.10": 20218.3613, "encoder_q-layer.11": 38209.293, "encoder_q-layer.2": 14131.1377, "encoder_q-layer.3": 13920.2568, "encoder_q-layer.4": 14615.4619, "encoder_q-layer.5": 14177.082, "encoder_q-layer.6": 15036.2998, "encoder_q-layer.7": 16414.3926, "encoder_q-layer.8": 17861.3945, "encoder_q-layer.9": 15707.3545, "epoch": 0.36, "inbatch_neg_score": 45.8856, "inbatch_pos_score": 46.375, "learning_rate": 3.505555555555556e-05, "loss": 2.3622, "norm_diff": 0.0284, "num_tokens_overlap": 5.5671, "num_tokens_union": 55.036, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26937.9594, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.8618, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7161, "sent_len_1": 66.7825, "sent_len_max_0": 18.8075, "sent_len_max_1": 189.1362, "stdk": 0.0422, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 36900 }, { "accuracy": 55.2734, "doc_norm": 6.8938, "encoder_q-embeddings": 18905.168, "encoder_q-layer.0": 12650.8027, "encoder_q-layer.1": 12752.2979, "encoder_q-layer.10": 21996.3262, "encoder_q-layer.11": 39933.4023, "encoder_q-layer.2": 13900.3535, "encoder_q-layer.3": 14426.2305, "encoder_q-layer.4": 14320.1025, "encoder_q-layer.5": 13797.1025, "encoder_q-layer.6": 14781.0459, "encoder_q-layer.7": 15784.1221, "encoder_q-layer.8": 17699.3242, "encoder_q-layer.9": 16106.1553, "epoch": 0.36, "inbatch_neg_score": 45.8915, "inbatch_pos_score": 46.4062, "learning_rate": 3.5e-05, "loss": 2.3719, "norm_diff": 0.0312, "num_tokens_overlap": 5.575, "num_tokens_union": 54.9626, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27503.8348, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8626, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7191, "sent_len_1": 66.7394, "sent_len_max_0": 18.8463, "sent_len_max_1": 190.3363, "stdk": 0.0447, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 37000 }, { "accuracy": 57.9102, "doc_norm": 6.8902, "encoder_q-embeddings": 18069.0, "encoder_q-layer.0": 12555.8506, "encoder_q-layer.1": 12542.0674, "encoder_q-layer.10": 21790.5098, "encoder_q-layer.11": 43132.3984, "encoder_q-layer.2": 13586.0078, "encoder_q-layer.3": 13870.6504, "encoder_q-layer.4": 13832.5742, "encoder_q-layer.5": 13409.2861, "encoder_q-layer.6": 15230.8945, "encoder_q-layer.7": 17037.2695, "encoder_q-layer.8": 17987.4883, "encoder_q-layer.9": 17475.3008, "epoch": 0.36, "inbatch_neg_score": 45.8457, "inbatch_pos_score": 46.375, "learning_rate": 3.4944444444444446e-05, "loss": 2.3791, "norm_diff": 0.0325, "num_tokens_overlap": 5.5851, "num_tokens_union": 54.9123, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27648.665, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8577, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7172, "sent_len_1": 66.6072, "sent_len_max_0": 18.9788, "sent_len_max_1": 187.755, "stdk": 0.0445, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 37100 }, { "accuracy": 55.6641, "doc_norm": 6.8842, "encoder_q-embeddings": 18357.1719, "encoder_q-layer.0": 13027.502, "encoder_q-layer.1": 13327.2705, "encoder_q-layer.10": 20222.5293, "encoder_q-layer.11": 42245.207, "encoder_q-layer.2": 14427.2529, "encoder_q-layer.3": 14102.127, "encoder_q-layer.4": 14405.9873, "encoder_q-layer.5": 13833.833, "encoder_q-layer.6": 14908.9883, "encoder_q-layer.7": 15996.6299, "encoder_q-layer.8": 17591.8301, "encoder_q-layer.9": 16212.5469, "epoch": 0.36, "inbatch_neg_score": 45.7542, "inbatch_pos_score": 46.2812, "learning_rate": 3.4888888888888895e-05, "loss": 2.3506, "norm_diff": 0.0299, "num_tokens_overlap": 5.5813, "num_tokens_union": 55.1521, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27588.9817, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8543, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7344, "sent_len_1": 67.0577, "sent_len_max_0": 18.8625, "sent_len_max_1": 188.8825, "stdk": 0.0434, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 37200 }, { "accuracy": 55.957, "doc_norm": 6.8858, "encoder_q-embeddings": 18076.5605, "encoder_q-layer.0": 12681.7539, "encoder_q-layer.1": 13236.584, "encoder_q-layer.10": 21143.4512, "encoder_q-layer.11": 43285.7656, "encoder_q-layer.2": 14507.0117, "encoder_q-layer.3": 14832.376, "encoder_q-layer.4": 14910.915, "encoder_q-layer.5": 14785.0723, "encoder_q-layer.6": 16069.084, "encoder_q-layer.7": 17805.9688, "encoder_q-layer.8": 18800.7539, "encoder_q-layer.9": 17072.3301, "epoch": 0.36, "inbatch_neg_score": 45.7491, "inbatch_pos_score": 46.25, "learning_rate": 3.483333333333334e-05, "loss": 2.3312, "norm_diff": 0.0333, "num_tokens_overlap": 5.5758, "num_tokens_union": 54.937, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28412.1943, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8526, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7181, "sent_len_1": 66.6525, "sent_len_max_0": 18.8337, "sent_len_max_1": 187.6125, "stdk": 0.0441, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 37300 }, { "accuracy": 53.7109, "doc_norm": 6.8779, "encoder_q-embeddings": 18876.2617, "encoder_q-layer.0": 12712.4404, "encoder_q-layer.1": 13595.541, "encoder_q-layer.10": 24855.4277, "encoder_q-layer.11": 46102.2422, "encoder_q-layer.2": 14670.9668, "encoder_q-layer.3": 14928.0869, "encoder_q-layer.4": 15701.6729, "encoder_q-layer.5": 14854.4043, "encoder_q-layer.6": 16103.4834, "encoder_q-layer.7": 17239.2793, "encoder_q-layer.8": 19558.6133, "encoder_q-layer.9": 19035.4453, "epoch": 0.37, "inbatch_neg_score": 45.6737, "inbatch_pos_score": 46.1562, "learning_rate": 3.477777777777778e-05, "loss": 2.3085, "norm_diff": 0.0306, "num_tokens_overlap": 5.5703, "num_tokens_union": 54.996, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29604.9281, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8472, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6982, "sent_len_1": 66.8147, "sent_len_max_0": 18.8038, "sent_len_max_1": 192.6188, "stdk": 0.0432, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 37400 }, { "accuracy": 59.8633, "doc_norm": 6.8801, "encoder_q-embeddings": 17700.9238, "encoder_q-layer.0": 12353.6748, "encoder_q-layer.1": 12776.5508, "encoder_q-layer.10": 20655.166, "encoder_q-layer.11": 43511.1055, "encoder_q-layer.2": 14094.2402, "encoder_q-layer.3": 13813.1172, "encoder_q-layer.4": 14412.7188, "encoder_q-layer.5": 13767.3184, "encoder_q-layer.6": 14761.1475, "encoder_q-layer.7": 16309.7246, "encoder_q-layer.8": 18052.5859, "encoder_q-layer.9": 16344.4746, "epoch": 0.37, "inbatch_neg_score": 45.6633, "inbatch_pos_score": 46.1875, "learning_rate": 3.472222222222222e-05, "loss": 2.3478, "norm_diff": 0.0329, "num_tokens_overlap": 5.6013, "num_tokens_union": 54.997, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27616.9551, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8472, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.727, "sent_len_1": 66.7811, "sent_len_max_0": 18.7913, "sent_len_max_1": 187.8775, "stdk": 0.0438, "stdq": 0.0398, "stdqueue_k": 0.0, "step": 37500 }, { "accuracy": 57.7148, "doc_norm": 6.8669, "encoder_q-embeddings": 17208.5117, "encoder_q-layer.0": 12553.8984, "encoder_q-layer.1": 12606.6377, "encoder_q-layer.10": 26966.5176, "encoder_q-layer.11": 42762.0625, "encoder_q-layer.2": 13787.8984, "encoder_q-layer.3": 13600.3916, "encoder_q-layer.4": 13805.9385, "encoder_q-layer.5": 13700.584, "encoder_q-layer.6": 14849.3945, "encoder_q-layer.7": 16719.6094, "encoder_q-layer.8": 18494.5996, "encoder_q-layer.9": 16957.0254, "epoch": 0.37, "inbatch_neg_score": 45.5879, "inbatch_pos_score": 46.0625, "learning_rate": 3.466666666666667e-05, "loss": 2.3009, "norm_diff": 0.0293, "num_tokens_overlap": 5.5787, "num_tokens_union": 54.9642, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28021.2527, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8376, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7201, "sent_len_1": 66.6167, "sent_len_max_0": 18.8575, "sent_len_max_1": 189.7325, "stdk": 0.0425, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 37600 }, { "accuracy": 55.957, "doc_norm": 6.8736, "encoder_q-embeddings": 18632.959, "encoder_q-layer.0": 13246.1924, "encoder_q-layer.1": 13604.8398, "encoder_q-layer.10": 21755.5273, "encoder_q-layer.11": 44927.5469, "encoder_q-layer.2": 14508.7334, "encoder_q-layer.3": 14555.7529, "encoder_q-layer.4": 14519.8701, "encoder_q-layer.5": 14434.4131, "encoder_q-layer.6": 14893.6133, "encoder_q-layer.7": 15960.4766, "encoder_q-layer.8": 18640.8945, "encoder_q-layer.9": 17270.9121, "epoch": 0.37, "inbatch_neg_score": 45.6351, "inbatch_pos_score": 46.125, "learning_rate": 3.4611111111111114e-05, "loss": 2.3544, "norm_diff": 0.0301, "num_tokens_overlap": 5.5872, "num_tokens_union": 55.0611, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28422.9108, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8434, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7438, "sent_len_1": 66.8502, "sent_len_max_0": 18.8762, "sent_len_max_1": 188.5412, "stdk": 0.0431, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 37700 }, { "accuracy": 55.7617, "doc_norm": 6.8711, "encoder_q-embeddings": 18424.293, "encoder_q-layer.0": 12892.9551, "encoder_q-layer.1": 13221.3906, "encoder_q-layer.10": 25811.3809, "encoder_q-layer.11": 52055.4375, "encoder_q-layer.2": 14340.6816, "encoder_q-layer.3": 14544.8096, "encoder_q-layer.4": 14734.7969, "encoder_q-layer.5": 14468.1309, "encoder_q-layer.6": 15307.541, "encoder_q-layer.7": 16794.3867, "encoder_q-layer.8": 18883.4648, "encoder_q-layer.9": 18135.3887, "epoch": 0.37, "inbatch_neg_score": 45.5326, "inbatch_pos_score": 46.0625, "learning_rate": 3.4555555555555556e-05, "loss": 2.3532, "norm_diff": 0.0357, "num_tokens_overlap": 5.5835, "num_tokens_union": 55.0351, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30583.7627, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8354, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7298, "sent_len_1": 66.7865, "sent_len_max_0": 18.7638, "sent_len_max_1": 189.655, "stdk": 0.045, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 37800 }, { "accuracy": 57.4219, "doc_norm": 6.8662, "encoder_q-embeddings": 19076.4727, "encoder_q-layer.0": 12579.6387, "encoder_q-layer.1": 12959.4375, "encoder_q-layer.10": 26009.8535, "encoder_q-layer.11": 43358.3945, "encoder_q-layer.2": 13865.2852, "encoder_q-layer.3": 13694.9082, "encoder_q-layer.4": 14532.8857, "encoder_q-layer.5": 13759.6045, "encoder_q-layer.6": 14931.0762, "encoder_q-layer.7": 15929.251, "encoder_q-layer.8": 18049.4727, "encoder_q-layer.9": 17915.6914, "epoch": 0.37, "inbatch_neg_score": 45.5607, "inbatch_pos_score": 46.0625, "learning_rate": 3.45e-05, "loss": 2.3522, "norm_diff": 0.0292, "num_tokens_overlap": 5.5784, "num_tokens_union": 54.8858, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28461.8625, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.837, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7185, "sent_len_1": 66.58, "sent_len_max_0": 18.785, "sent_len_max_1": 188.59, "stdk": 0.0431, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 37900 }, { "accuracy": 54.5898, "doc_norm": 6.8676, "encoder_q-embeddings": 19962.6758, "encoder_q-layer.0": 13363.9473, "encoder_q-layer.1": 13474.4209, "encoder_q-layer.10": 24639.2832, "encoder_q-layer.11": 40084.8008, "encoder_q-layer.2": 14488.2344, "encoder_q-layer.3": 14578.8887, "encoder_q-layer.4": 15133.8203, "encoder_q-layer.5": 14541.2266, "encoder_q-layer.6": 15605.04, "encoder_q-layer.7": 16160.1104, "encoder_q-layer.8": 17966.5469, "encoder_q-layer.9": 17160.4746, "epoch": 0.37, "inbatch_neg_score": 45.6275, "inbatch_pos_score": 46.0938, "learning_rate": 3.444444444444445e-05, "loss": 2.3345, "norm_diff": 0.026, "num_tokens_overlap": 5.5808, "num_tokens_union": 55.0068, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28244.8591, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8415, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7172, "sent_len_1": 66.7635, "sent_len_max_0": 18.875, "sent_len_max_1": 189.725, "stdk": 0.0428, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 38000 }, { "accuracy": 56.0547, "doc_norm": 6.8729, "encoder_q-embeddings": 18182.5918, "encoder_q-layer.0": 13017.4854, "encoder_q-layer.1": 13405.5742, "encoder_q-layer.10": 21784.9277, "encoder_q-layer.11": 42611.6562, "encoder_q-layer.2": 14286.4336, "encoder_q-layer.3": 14384.375, "encoder_q-layer.4": 14313.6113, "encoder_q-layer.5": 13980.3604, "encoder_q-layer.6": 15256.8125, "encoder_q-layer.7": 15609.793, "encoder_q-layer.8": 18566.2363, "encoder_q-layer.9": 16200.9111, "epoch": 0.37, "inbatch_neg_score": 45.5962, "inbatch_pos_score": 46.0938, "learning_rate": 3.438888888888889e-05, "loss": 2.322, "norm_diff": 0.0323, "num_tokens_overlap": 5.5774, "num_tokens_union": 55.0554, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27607.1541, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8407, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7207, "sent_len_1": 66.8881, "sent_len_max_0": 18.9413, "sent_len_max_1": 190.2075, "stdk": 0.0434, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 38100 }, { "accuracy": 55.957, "doc_norm": 6.8746, "encoder_q-embeddings": 18590.2637, "encoder_q-layer.0": 13137.1963, "encoder_q-layer.1": 13170.707, "encoder_q-layer.10": 25580.8398, "encoder_q-layer.11": 41406.9922, "encoder_q-layer.2": 14556.3516, "encoder_q-layer.3": 14532.8555, "encoder_q-layer.4": 15556.0703, "encoder_q-layer.5": 15178.5732, "encoder_q-layer.6": 17086.4434, "encoder_q-layer.7": 18342.459, "encoder_q-layer.8": 20684.3535, "encoder_q-layer.9": 18959.2363, "epoch": 0.37, "inbatch_neg_score": 45.5536, "inbatch_pos_score": 46.0625, "learning_rate": 3.433333333333333e-05, "loss": 2.3758, "norm_diff": 0.0331, "num_tokens_overlap": 5.5662, "num_tokens_union": 54.8951, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29170.8589, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8414, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7041, "sent_len_1": 66.6138, "sent_len_max_0": 18.8825, "sent_len_max_1": 189.9563, "stdk": 0.044, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 38200 }, { "accuracy": 55.0781, "doc_norm": 6.8674, "encoder_q-embeddings": 17619.1152, "encoder_q-layer.0": 12405.4277, "encoder_q-layer.1": 12655.1709, "encoder_q-layer.10": 21538.0996, "encoder_q-layer.11": 38074.125, "encoder_q-layer.2": 13638.7832, "encoder_q-layer.3": 14102.6982, "encoder_q-layer.4": 14593.8164, "encoder_q-layer.5": 13786.5742, "encoder_q-layer.6": 15146.2236, "encoder_q-layer.7": 15846.6191, "encoder_q-layer.8": 17944.6777, "encoder_q-layer.9": 16284.623, "epoch": 0.37, "inbatch_neg_score": 45.5378, "inbatch_pos_score": 46.0312, "learning_rate": 3.427777777777778e-05, "loss": 2.3432, "norm_diff": 0.0314, "num_tokens_overlap": 5.5699, "num_tokens_union": 54.7512, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26841.3542, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.836, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7002, "sent_len_1": 66.4115, "sent_len_max_0": 18.7925, "sent_len_max_1": 187.4462, "stdk": 0.0429, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 38300 }, { "accuracy": 54.9805, "doc_norm": 6.8589, "encoder_q-embeddings": 18052.6816, "encoder_q-layer.0": 12431.9092, "encoder_q-layer.1": 13127.9736, "encoder_q-layer.10": 21260.0859, "encoder_q-layer.11": 39631.6484, "encoder_q-layer.2": 14066.5049, "encoder_q-layer.3": 14362.3906, "encoder_q-layer.4": 14479.8174, "encoder_q-layer.5": 13539.6318, "encoder_q-layer.6": 14689.1484, "encoder_q-layer.7": 15820.5137, "encoder_q-layer.8": 17011.0547, "encoder_q-layer.9": 16024.0664, "epoch": 0.37, "inbatch_neg_score": 45.4285, "inbatch_pos_score": 45.9375, "learning_rate": 3.4222222222222224e-05, "loss": 2.3564, "norm_diff": 0.0317, "num_tokens_overlap": 5.5656, "num_tokens_union": 55.0007, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27058.248, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.8272, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7169, "sent_len_1": 66.7555, "sent_len_max_0": 18.8125, "sent_len_max_1": 190.3587, "stdk": 0.0434, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 38400 }, { "accuracy": 52.9297, "doc_norm": 6.8586, "encoder_q-embeddings": 18927.7207, "encoder_q-layer.0": 13204.1729, "encoder_q-layer.1": 13205.7363, "encoder_q-layer.10": 21986.2539, "encoder_q-layer.11": 40081.6016, "encoder_q-layer.2": 14514.0254, "encoder_q-layer.3": 14531.4424, "encoder_q-layer.4": 14801.8115, "encoder_q-layer.5": 14780.2402, "encoder_q-layer.6": 15238.8701, "encoder_q-layer.7": 17752.2109, "encoder_q-layer.8": 19054.1562, "encoder_q-layer.9": 16600.582, "epoch": 0.38, "inbatch_neg_score": 45.4277, "inbatch_pos_score": 45.9062, "learning_rate": 3.4166666666666666e-05, "loss": 2.2753, "norm_diff": 0.03, "num_tokens_overlap": 5.5954, "num_tokens_union": 55.1979, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28000.9866, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8286, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7429, "sent_len_1": 67.0778, "sent_len_max_0": 18.915, "sent_len_max_1": 191.3663, "stdk": 0.043, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 38500 }, { "accuracy": 54.9805, "doc_norm": 6.8564, "encoder_q-embeddings": 18170.0078, "encoder_q-layer.0": 12338.4629, "encoder_q-layer.1": 12720.4854, "encoder_q-layer.10": 20159.1465, "encoder_q-layer.11": 37619.4453, "encoder_q-layer.2": 13602.1592, "encoder_q-layer.3": 13756.0664, "encoder_q-layer.4": 13785.2217, "encoder_q-layer.5": 13767.6064, "encoder_q-layer.6": 14922.5908, "encoder_q-layer.7": 15645.2861, "encoder_q-layer.8": 19414.793, "encoder_q-layer.9": 16458.6719, "epoch": 0.38, "inbatch_neg_score": 45.3976, "inbatch_pos_score": 45.9062, "learning_rate": 3.411111111111111e-05, "loss": 2.3517, "norm_diff": 0.0309, "num_tokens_overlap": 5.5777, "num_tokens_union": 55.0763, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26912.1362, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.8255, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7138, "sent_len_1": 66.8775, "sent_len_max_0": 18.8612, "sent_len_max_1": 188.6138, "stdk": 0.0434, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 38600 }, { "accuracy": 58.1055, "doc_norm": 6.857, "encoder_q-embeddings": 17520.5938, "encoder_q-layer.0": 11980.2314, "encoder_q-layer.1": 12271.2119, "encoder_q-layer.10": 22636.5312, "encoder_q-layer.11": 43251.2031, "encoder_q-layer.2": 13527.6943, "encoder_q-layer.3": 13630.2031, "encoder_q-layer.4": 13929.3145, "encoder_q-layer.5": 13475.3301, "encoder_q-layer.6": 14517.1836, "encoder_q-layer.7": 15206.7998, "encoder_q-layer.8": 17283.4102, "encoder_q-layer.9": 16477.0625, "epoch": 0.38, "inbatch_neg_score": 45.4026, "inbatch_pos_score": 45.9062, "learning_rate": 3.405555555555556e-05, "loss": 2.358, "norm_diff": 0.0322, "num_tokens_overlap": 5.5773, "num_tokens_union": 54.9163, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27054.6388, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.8248, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7157, "sent_len_1": 66.6407, "sent_len_max_0": 18.8863, "sent_len_max_1": 191.3, "stdk": 0.0431, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 38700 }, { "accuracy": 54.2969, "doc_norm": 6.8534, "encoder_q-embeddings": 19464.4688, "encoder_q-layer.0": 13191.7246, "encoder_q-layer.1": 13754.5986, "encoder_q-layer.10": 23424.2793, "encoder_q-layer.11": 48082.4883, "encoder_q-layer.2": 14700.9014, "encoder_q-layer.3": 15253.0928, "encoder_q-layer.4": 15568.2812, "encoder_q-layer.5": 15254.583, "encoder_q-layer.6": 16253.6924, "encoder_q-layer.7": 17154.457, "encoder_q-layer.8": 18410.7402, "encoder_q-layer.9": 17549.2734, "epoch": 0.38, "inbatch_neg_score": 45.3264, "inbatch_pos_score": 45.8438, "learning_rate": 3.4000000000000007e-05, "loss": 2.3515, "norm_diff": 0.0287, "num_tokens_overlap": 5.5781, "num_tokens_union": 55.0528, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29906.0661, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8247, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7241, "sent_len_1": 66.8404, "sent_len_max_0": 18.745, "sent_len_max_1": 189.9925, "stdk": 0.0439, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 38800 }, { "accuracy": 56.3477, "doc_norm": 6.8523, "encoder_q-embeddings": 18119.291, "encoder_q-layer.0": 12676.2373, "encoder_q-layer.1": 12925.0537, "encoder_q-layer.10": 19255.4258, "encoder_q-layer.11": 39051.3906, "encoder_q-layer.2": 13930.4785, "encoder_q-layer.3": 14297.3027, "encoder_q-layer.4": 14860.416, "encoder_q-layer.5": 14575.9824, "encoder_q-layer.6": 14686.3379, "encoder_q-layer.7": 16420.0254, "encoder_q-layer.8": 17250.2246, "encoder_q-layer.9": 15798.876, "epoch": 0.38, "inbatch_neg_score": 45.3807, "inbatch_pos_score": 45.875, "learning_rate": 3.394444444444444e-05, "loss": 2.3179, "norm_diff": 0.0307, "num_tokens_overlap": 5.5839, "num_tokens_union": 55.0378, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26653.9427, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.8215, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7047, "sent_len_1": 66.8299, "sent_len_max_0": 18.905, "sent_len_max_1": 190.1562, "stdk": 0.0421, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 38900 }, { "accuracy": 54.4922, "doc_norm": 6.8534, "encoder_q-embeddings": 19320.4277, "encoder_q-layer.0": 13837.3555, "encoder_q-layer.1": 13637.1973, "encoder_q-layer.10": 21762.668, "encoder_q-layer.11": 40312.4805, "encoder_q-layer.2": 14909.8145, "encoder_q-layer.3": 14609.0488, "encoder_q-layer.4": 14972.6768, "encoder_q-layer.5": 14314.874, "encoder_q-layer.6": 15205.2656, "encoder_q-layer.7": 15383.8389, "encoder_q-layer.8": 17598.1172, "encoder_q-layer.9": 16486.248, "epoch": 0.38, "inbatch_neg_score": 45.3543, "inbatch_pos_score": 45.8438, "learning_rate": 3.388888888888889e-05, "loss": 2.3543, "norm_diff": 0.0309, "num_tokens_overlap": 5.589, "num_tokens_union": 54.9993, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28069.7308, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8225, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7301, "sent_len_1": 66.7686, "sent_len_max_0": 18.7725, "sent_len_max_1": 190.3388, "stdk": 0.0433, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 39000 }, { "accuracy": 55.6641, "doc_norm": 6.8533, "encoder_q-embeddings": 19905.3652, "encoder_q-layer.0": 13293.5186, "encoder_q-layer.1": 13590.2988, "encoder_q-layer.10": 25457.5449, "encoder_q-layer.11": 42627.8359, "encoder_q-layer.2": 14268.7744, "encoder_q-layer.3": 14088.7832, "encoder_q-layer.4": 14368.0859, "encoder_q-layer.5": 14293.9531, "encoder_q-layer.6": 15975.3652, "encoder_q-layer.7": 17228.2832, "encoder_q-layer.8": 18073.1875, "encoder_q-layer.9": 17222.9883, "epoch": 0.38, "inbatch_neg_score": 45.3052, "inbatch_pos_score": 45.8125, "learning_rate": 3.3833333333333334e-05, "loss": 2.2928, "norm_diff": 0.032, "num_tokens_overlap": 5.5803, "num_tokens_union": 55.091, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29224.3035, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8213, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7213, "sent_len_1": 66.8975, "sent_len_max_0": 18.9112, "sent_len_max_1": 188.7188, "stdk": 0.0441, "stdq": 0.0398, "stdqueue_k": 0.0, "step": 39100 }, { "accuracy": 55.957, "doc_norm": 6.8567, "encoder_q-embeddings": 18426.9297, "encoder_q-layer.0": 12924.3633, "encoder_q-layer.1": 13242.752, "encoder_q-layer.10": 25566.9883, "encoder_q-layer.11": 43807.7461, "encoder_q-layer.2": 14331.4141, "encoder_q-layer.3": 14303.4209, "encoder_q-layer.4": 14357.6631, "encoder_q-layer.5": 13843.8301, "encoder_q-layer.6": 15226.793, "encoder_q-layer.7": 16923.9395, "encoder_q-layer.8": 19628.2949, "encoder_q-layer.9": 18009.9688, "epoch": 0.38, "inbatch_neg_score": 45.3526, "inbatch_pos_score": 45.875, "learning_rate": 3.377777777777778e-05, "loss": 2.3304, "norm_diff": 0.0332, "num_tokens_overlap": 5.5875, "num_tokens_union": 54.9476, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28420.5551, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8235, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7149, "sent_len_1": 66.6947, "sent_len_max_0": 18.8525, "sent_len_max_1": 190.185, "stdk": 0.0445, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 39200 }, { "accuracy": 55.8594, "doc_norm": 6.8515, "encoder_q-embeddings": 18115.1367, "encoder_q-layer.0": 12661.0029, "encoder_q-layer.1": 12873.9385, "encoder_q-layer.10": 21894.2363, "encoder_q-layer.11": 39073.625, "encoder_q-layer.2": 14292.666, "encoder_q-layer.3": 13896.3154, "encoder_q-layer.4": 14491.457, "encoder_q-layer.5": 14267.3281, "encoder_q-layer.6": 15285.4678, "encoder_q-layer.7": 15739.0859, "encoder_q-layer.8": 17920.3379, "encoder_q-layer.9": 16898.5645, "epoch": 0.38, "inbatch_neg_score": 45.2866, "inbatch_pos_score": 45.7812, "learning_rate": 3.3722222222222225e-05, "loss": 2.2828, "norm_diff": 0.0337, "num_tokens_overlap": 5.582, "num_tokens_union": 55.1569, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27158.2162, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.8177, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7254, "sent_len_1": 67.0057, "sent_len_max_0": 18.9337, "sent_len_max_1": 190.59, "stdk": 0.0453, "stdq": 0.0401, "stdqueue_k": 0.0, "step": 39300 }, { "accuracy": 53.7109, "doc_norm": 6.8446, "encoder_q-embeddings": 19364.5254, "encoder_q-layer.0": 13403.8154, "encoder_q-layer.1": 13412.8203, "encoder_q-layer.10": 19480.1465, "encoder_q-layer.11": 36335.3203, "encoder_q-layer.2": 14536.2109, "encoder_q-layer.3": 13983.3643, "encoder_q-layer.4": 14478.1621, "encoder_q-layer.5": 13361.8525, "encoder_q-layer.6": 14447.8838, "encoder_q-layer.7": 16291.8945, "encoder_q-layer.8": 16870.3066, "encoder_q-layer.9": 16672.1094, "epoch": 0.38, "inbatch_neg_score": 45.2333, "inbatch_pos_score": 45.7188, "learning_rate": 3.366666666666667e-05, "loss": 2.3248, "norm_diff": 0.0317, "num_tokens_overlap": 5.5877, "num_tokens_union": 54.9405, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26765.6481, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.8129, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.722, "sent_len_1": 66.7052, "sent_len_max_0": 18.8588, "sent_len_max_1": 189.2875, "stdk": 0.0438, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 39400 }, { "accuracy": 51.8555, "doc_norm": 6.8433, "encoder_q-embeddings": 18389.1816, "encoder_q-layer.0": 13128.1689, "encoder_q-layer.1": 13635.3203, "encoder_q-layer.10": 25239.2227, "encoder_q-layer.11": 42444.7969, "encoder_q-layer.2": 14315.5459, "encoder_q-layer.3": 14332.0957, "encoder_q-layer.4": 14531.5947, "encoder_q-layer.5": 14062.2422, "encoder_q-layer.6": 16021.6123, "encoder_q-layer.7": 17082.3301, "encoder_q-layer.8": 19582.3516, "encoder_q-layer.9": 18266.666, "epoch": 0.39, "inbatch_neg_score": 45.1997, "inbatch_pos_score": 45.6875, "learning_rate": 3.3611111111111116e-05, "loss": 2.3273, "norm_diff": 0.0324, "num_tokens_overlap": 5.5645, "num_tokens_union": 55.0939, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28953.2015, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.811, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7087, "sent_len_1": 66.9114, "sent_len_max_0": 18.8287, "sent_len_max_1": 190.865, "stdk": 0.0431, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 39500 }, { "accuracy": 55.4688, "doc_norm": 6.8454, "encoder_q-embeddings": 18484.8496, "encoder_q-layer.0": 13121.5498, "encoder_q-layer.1": 13523.3037, "encoder_q-layer.10": 23957.6816, "encoder_q-layer.11": 44191.4336, "encoder_q-layer.2": 14812.4434, "encoder_q-layer.3": 14884.7578, "encoder_q-layer.4": 14829.3643, "encoder_q-layer.5": 14453.3057, "encoder_q-layer.6": 15734.2754, "encoder_q-layer.7": 16718.7656, "encoder_q-layer.8": 18691.4453, "encoder_q-layer.9": 17804.7383, "epoch": 0.39, "inbatch_neg_score": 45.2306, "inbatch_pos_score": 45.75, "learning_rate": 3.355555555555556e-05, "loss": 2.3223, "norm_diff": 0.0289, "num_tokens_overlap": 5.5736, "num_tokens_union": 54.7974, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28889.2713, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8165, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7239, "sent_len_1": 66.436, "sent_len_max_0": 18.8125, "sent_len_max_1": 190.2375, "stdk": 0.0443, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 39600 }, { "accuracy": 54.4922, "doc_norm": 6.8468, "encoder_q-embeddings": 18467.2188, "encoder_q-layer.0": 12738.4521, "encoder_q-layer.1": 13147.6211, "encoder_q-layer.10": 22349.8438, "encoder_q-layer.11": 45583.6016, "encoder_q-layer.2": 14236.998, "encoder_q-layer.3": 14252.3584, "encoder_q-layer.4": 14767.4893, "encoder_q-layer.5": 14178.3184, "encoder_q-layer.6": 14874.9316, "encoder_q-layer.7": 15851.4141, "encoder_q-layer.8": 17763.6289, "encoder_q-layer.9": 17277.8867, "epoch": 0.39, "inbatch_neg_score": 45.217, "inbatch_pos_score": 45.7188, "learning_rate": 3.35e-05, "loss": 2.3586, "norm_diff": 0.033, "num_tokens_overlap": 5.5835, "num_tokens_union": 54.9873, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28477.3964, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8139, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7284, "sent_len_1": 66.7465, "sent_len_max_0": 18.8938, "sent_len_max_1": 189.3438, "stdk": 0.0448, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 39700 }, { "accuracy": 54.6875, "doc_norm": 6.8458, "encoder_q-embeddings": 19177.3145, "encoder_q-layer.0": 13445.4922, "encoder_q-layer.1": 13926.0684, "encoder_q-layer.10": 21121.0879, "encoder_q-layer.11": 44980.8516, "encoder_q-layer.2": 14718.3848, "encoder_q-layer.3": 14928.4473, "encoder_q-layer.4": 15537.7227, "encoder_q-layer.5": 14731.3682, "encoder_q-layer.6": 15649.5654, "encoder_q-layer.7": 16550.6309, "encoder_q-layer.8": 18517.7539, "encoder_q-layer.9": 17020.2539, "epoch": 0.39, "inbatch_neg_score": 45.2264, "inbatch_pos_score": 45.7188, "learning_rate": 3.3444444444444443e-05, "loss": 2.3345, "norm_diff": 0.0318, "num_tokens_overlap": 5.5749, "num_tokens_union": 55.083, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29187.4642, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.814, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7176, "sent_len_1": 66.8399, "sent_len_max_0": 18.9088, "sent_len_max_1": 189.28, "stdk": 0.0433, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 39800 }, { "accuracy": 56.3477, "doc_norm": 6.8383, "encoder_q-embeddings": 18036.3613, "encoder_q-layer.0": 12320.585, "encoder_q-layer.1": 12824.6357, "encoder_q-layer.10": 20571.8477, "encoder_q-layer.11": 38144.1172, "encoder_q-layer.2": 14165.8076, "encoder_q-layer.3": 14314.2549, "encoder_q-layer.4": 14862.2354, "encoder_q-layer.5": 14590.2539, "encoder_q-layer.6": 15118.7461, "encoder_q-layer.7": 17034.1914, "encoder_q-layer.8": 17904.7852, "encoder_q-layer.9": 16972.8965, "epoch": 0.39, "inbatch_neg_score": 45.1656, "inbatch_pos_score": 45.6562, "learning_rate": 3.338888888888889e-05, "loss": 2.3909, "norm_diff": 0.0291, "num_tokens_overlap": 5.5733, "num_tokens_union": 55.0594, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27184.0349, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.8093, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6935, "sent_len_1": 66.8893, "sent_len_max_0": 18.7725, "sent_len_max_1": 189.9837, "stdk": 0.0429, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 39900 }, { "accuracy": 56.0547, "doc_norm": 6.8417, "encoder_q-embeddings": 17916.2227, "encoder_q-layer.0": 12327.0566, "encoder_q-layer.1": 12423.9531, "encoder_q-layer.10": 23191.2578, "encoder_q-layer.11": 40537.6133, "encoder_q-layer.2": 13450.8877, "encoder_q-layer.3": 13732.9492, "encoder_q-layer.4": 14132.6035, "encoder_q-layer.5": 13774.9062, "encoder_q-layer.6": 14726.9844, "encoder_q-layer.7": 15189.1084, "encoder_q-layer.8": 18020.7344, "encoder_q-layer.9": 15997.0615, "epoch": 0.39, "inbatch_neg_score": 45.182, "inbatch_pos_score": 45.6875, "learning_rate": 3.3333333333333335e-05, "loss": 2.3524, "norm_diff": 0.0316, "num_tokens_overlap": 5.5597, "num_tokens_union": 54.878, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27098.0542, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.8101, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7214, "sent_len_1": 66.5832, "sent_len_max_0": 18.8912, "sent_len_max_1": 189.695, "stdk": 0.0424, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 40000 }, { "dev_runtime": 27.0096, "dev_samples_per_second": 2.37, "dev_steps_per_second": 0.037, "epoch": 0.39, "step": 40000, "test_accuracy": 8.7371826171875, "test_doc_norm": 6.83458137512207, "test_inbatch_neg_score": 45.685951232910156, "test_inbatch_pos_score": 46.41509246826172, "test_loss": 3.9236345291137695, "test_norm_diff": 0.001545920968055725, "test_query_norm": 6.834383010864258, "test_queue_k_norm": 0.0, "test_stdk": 0.03600190207362175, "test_stdq": 0.03596462309360504, "test_stdqueue_k": 0.0 }, { "dev_runtime": 27.0096, "dev_samples_per_second": 2.37, "dev_steps_per_second": 0.037, "epoch": 0.39, "eval_beir-arguana_ndcg@10": 0.37753, "eval_beir-arguana_recall@10": 0.6707, "eval_beir-arguana_recall@100": 0.97226, "eval_beir-arguana_recall@20": 0.83357, "eval_beir-avg_ndcg@10": 0.3798328333333334, "eval_beir-avg_recall@10": 0.4512574166666667, "eval_beir-avg_recall@100": 0.6351440833333333, "eval_beir-avg_recall@20": 0.51742025, "eval_beir-cqadupstack_ndcg@10": 0.2902883333333333, "eval_beir-cqadupstack_recall@10": 0.3880841666666666, "eval_beir-cqadupstack_recall@100": 0.6214308333333333, "eval_beir-cqadupstack_recall@20": 0.45231250000000006, "eval_beir-fiqa_ndcg@10": 0.25669, "eval_beir-fiqa_recall@10": 0.32298, "eval_beir-fiqa_recall@100": 0.59302, "eval_beir-fiqa_recall@20": 0.40183, "eval_beir-nfcorpus_ndcg@10": 0.32866, "eval_beir-nfcorpus_recall@10": 0.15891, "eval_beir-nfcorpus_recall@100": 0.30339, "eval_beir-nfcorpus_recall@20": 0.18696, "eval_beir-nq_ndcg@10": 0.25729, "eval_beir-nq_recall@10": 0.42828, "eval_beir-nq_recall@100": 0.77136, "eval_beir-nq_recall@20": 0.55569, "eval_beir-quora_ndcg@10": 0.78967, "eval_beir-quora_recall@10": 0.89147, "eval_beir-quora_recall@100": 0.97776, "eval_beir-quora_recall@20": 0.93214, "eval_beir-scidocs_ndcg@10": 0.15305, "eval_beir-scidocs_recall@10": 0.16008, "eval_beir-scidocs_recall@100": 0.37847, "eval_beir-scidocs_recall@20": 0.22007, "eval_beir-scifact_ndcg@10": 0.60138, "eval_beir-scifact_recall@10": 0.76661, "eval_beir-scifact_recall@100": 0.89989, "eval_beir-scifact_recall@20": 0.82522, "eval_beir-trec-covid_ndcg@10": 0.56549, "eval_beir-trec-covid_recall@10": 0.6, "eval_beir-trec-covid_recall@100": 0.4234, "eval_beir-trec-covid_recall@20": 0.564, "eval_beir-webis-touche2020_ndcg@10": 0.17828, "eval_beir-webis-touche2020_recall@10": 0.12546, "eval_beir-webis-touche2020_recall@100": 0.41046, "eval_beir-webis-touche2020_recall@20": 0.20241, "eval_senteval-avg_sts": 0.7445611835939482, "eval_senteval-sickr_spearman": 0.7299440185074582, "eval_senteval-stsb_spearman": 0.7591783486804382, "step": 40000, "test_accuracy": 8.7371826171875, "test_doc_norm": 6.83458137512207, "test_inbatch_neg_score": 45.685951232910156, "test_inbatch_pos_score": 46.41509246826172, "test_loss": 3.9236345291137695, "test_norm_diff": 0.001545920968055725, "test_query_norm": 6.834383010864258, "test_queue_k_norm": 0.0, "test_stdk": 0.03600190207362175, "test_stdq": 0.03596462309360504, "test_stdqueue_k": 0.0 }, { "accuracy": 58.2031, "doc_norm": 6.8348, "encoder_q-embeddings": 17965.168, "encoder_q-layer.0": 12427.6816, "encoder_q-layer.1": 12552.5928, "encoder_q-layer.10": 24438.4922, "encoder_q-layer.11": 41481.375, "encoder_q-layer.2": 13888.2607, "encoder_q-layer.3": 13920.2217, "encoder_q-layer.4": 14200.9326, "encoder_q-layer.5": 14606.8184, "encoder_q-layer.6": 16091.1055, "encoder_q-layer.7": 16804.2891, "encoder_q-layer.8": 18583.2871, "encoder_q-layer.9": 17314.0469, "epoch": 0.39, "inbatch_neg_score": 45.1033, "inbatch_pos_score": 45.625, "learning_rate": 3.327777777777778e-05, "loss": 2.3304, "norm_diff": 0.0315, "num_tokens_overlap": 5.5825, "num_tokens_union": 54.8694, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28381.8052, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8033, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7127, "sent_len_1": 66.551, "sent_len_max_0": 18.855, "sent_len_max_1": 190.1275, "stdk": 0.0429, "stdq": 0.039, "stdqueue_k": 0.0, "step": 40100 }, { "accuracy": 55.6641, "doc_norm": 6.8328, "encoder_q-embeddings": 19531.5957, "encoder_q-layer.0": 13098.3857, "encoder_q-layer.1": 13528.5, "encoder_q-layer.10": 21210.5098, "encoder_q-layer.11": 37897.9844, "encoder_q-layer.2": 14223.0742, "encoder_q-layer.3": 14717.5488, "encoder_q-layer.4": 14848.4404, "encoder_q-layer.5": 14032.6133, "encoder_q-layer.6": 15112.5723, "encoder_q-layer.7": 15777.1348, "encoder_q-layer.8": 18453.9004, "encoder_q-layer.9": 16498.8867, "epoch": 0.39, "inbatch_neg_score": 45.0762, "inbatch_pos_score": 45.5938, "learning_rate": 3.322222222222222e-05, "loss": 2.301, "norm_diff": 0.0299, "num_tokens_overlap": 5.5788, "num_tokens_union": 54.9397, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27462.2492, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8028, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7087, "sent_len_1": 66.7384, "sent_len_max_0": 18.9037, "sent_len_max_1": 192.1887, "stdk": 0.0429, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 40200 }, { "accuracy": 56.7383, "doc_norm": 6.8424, "encoder_q-embeddings": 18246.6055, "encoder_q-layer.0": 12709.8545, "encoder_q-layer.1": 13285.8857, "encoder_q-layer.10": 22181.8047, "encoder_q-layer.11": 42589.3633, "encoder_q-layer.2": 14385.3896, "encoder_q-layer.3": 13931.2656, "encoder_q-layer.4": 14446.9043, "encoder_q-layer.5": 13819.3154, "encoder_q-layer.6": 15004.4521, "encoder_q-layer.7": 15745.0342, "encoder_q-layer.8": 17562.6602, "encoder_q-layer.9": 16173.7881, "epoch": 0.39, "inbatch_neg_score": 45.1418, "inbatch_pos_score": 45.6562, "learning_rate": 3.316666666666667e-05, "loss": 2.3395, "norm_diff": 0.0357, "num_tokens_overlap": 5.5802, "num_tokens_union": 55.0773, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27782.9997, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8068, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7118, "sent_len_1": 66.876, "sent_len_max_0": 18.8238, "sent_len_max_1": 190.3175, "stdk": 0.045, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 40300 }, { "accuracy": 55.957, "doc_norm": 6.8417, "encoder_q-embeddings": 17751.5547, "encoder_q-layer.0": 13055.1924, "encoder_q-layer.1": 13494.8896, "encoder_q-layer.10": 25609.4648, "encoder_q-layer.11": 44633.1328, "encoder_q-layer.2": 15117.1719, "encoder_q-layer.3": 14453.6143, "encoder_q-layer.4": 15126.7695, "encoder_q-layer.5": 14582.1475, "encoder_q-layer.6": 14861.835, "encoder_q-layer.7": 16992.0195, "encoder_q-layer.8": 18918.0215, "encoder_q-layer.9": 17933.7109, "epoch": 0.39, "inbatch_neg_score": 45.1336, "inbatch_pos_score": 45.6562, "learning_rate": 3.311111111111112e-05, "loss": 2.3574, "norm_diff": 0.0337, "num_tokens_overlap": 5.5787, "num_tokens_union": 55.0076, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28786.9196, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8081, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7, "sent_len_1": 66.7661, "sent_len_max_0": 18.8475, "sent_len_max_1": 190.6987, "stdk": 0.044, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 40400 }, { "accuracy": 58.2031, "doc_norm": 6.8364, "encoder_q-embeddings": 17799.3164, "encoder_q-layer.0": 12053.1631, "encoder_q-layer.1": 12411.5557, "encoder_q-layer.10": 19408.3184, "encoder_q-layer.11": 38355.8789, "encoder_q-layer.2": 13199.0635, "encoder_q-layer.3": 13220.3906, "encoder_q-layer.4": 13355.0625, "encoder_q-layer.5": 12992.8926, "encoder_q-layer.6": 13792.6797, "encoder_q-layer.7": 14245.3535, "encoder_q-layer.8": 16103.46, "encoder_q-layer.9": 15309.29, "epoch": 0.4, "inbatch_neg_score": 45.1186, "inbatch_pos_score": 45.625, "learning_rate": 3.3055555555555553e-05, "loss": 2.3493, "norm_diff": 0.0325, "num_tokens_overlap": 5.5715, "num_tokens_union": 55.028, "postclip_grad_norm": 1.0, "preclip_grad_norm": 25854.5367, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.8039, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7167, "sent_len_1": 66.8088, "sent_len_max_0": 18.8188, "sent_len_max_1": 191.965, "stdk": 0.0437, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 40500 }, { "accuracy": 57.1289, "doc_norm": 6.8333, "encoder_q-embeddings": 17822.1504, "encoder_q-layer.0": 12985.6553, "encoder_q-layer.1": 12908.832, "encoder_q-layer.10": 22806.6602, "encoder_q-layer.11": 43643.0312, "encoder_q-layer.2": 14080.2988, "encoder_q-layer.3": 13921.0352, "encoder_q-layer.4": 14141.3984, "encoder_q-layer.5": 13298.1592, "encoder_q-layer.6": 14942.1562, "encoder_q-layer.7": 17359.7637, "encoder_q-layer.8": 19804.0508, "encoder_q-layer.9": 17463.4199, "epoch": 0.4, "inbatch_neg_score": 45.0974, "inbatch_pos_score": 45.5938, "learning_rate": 3.3e-05, "loss": 2.3416, "norm_diff": 0.0312, "num_tokens_overlap": 5.583, "num_tokens_union": 54.9817, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28081.382, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8021, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7133, "sent_len_1": 66.8075, "sent_len_max_0": 18.8438, "sent_len_max_1": 191.7375, "stdk": 0.0425, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 40600 }, { "accuracy": 53.8086, "doc_norm": 6.8335, "encoder_q-embeddings": 19808.5801, "encoder_q-layer.0": 13299.915, "encoder_q-layer.1": 13576.3672, "encoder_q-layer.10": 21665.4414, "encoder_q-layer.11": 41538.7109, "encoder_q-layer.2": 14356.8809, "encoder_q-layer.3": 14578.3232, "encoder_q-layer.4": 14419.1191, "encoder_q-layer.5": 14494.9512, "encoder_q-layer.6": 15600.9082, "encoder_q-layer.7": 16047.0332, "encoder_q-layer.8": 17915.293, "encoder_q-layer.9": 16196.8994, "epoch": 0.4, "inbatch_neg_score": 45.0968, "inbatch_pos_score": 45.5938, "learning_rate": 3.2944444444444445e-05, "loss": 2.3226, "norm_diff": 0.0303, "num_tokens_overlap": 5.5744, "num_tokens_union": 55.0363, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27927.8506, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.8031, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7027, "sent_len_1": 66.7276, "sent_len_max_0": 18.785, "sent_len_max_1": 189.4075, "stdk": 0.044, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 40700 }, { "accuracy": 54.8828, "doc_norm": 6.8314, "encoder_q-embeddings": 18739.5879, "encoder_q-layer.0": 13006.6895, "encoder_q-layer.1": 13215.9805, "encoder_q-layer.10": 20240.6973, "encoder_q-layer.11": 42345.2656, "encoder_q-layer.2": 14503.8818, "encoder_q-layer.3": 14270.3154, "encoder_q-layer.4": 14638.6094, "encoder_q-layer.5": 14134.1602, "encoder_q-layer.6": 14853.4375, "encoder_q-layer.7": 15706.1084, "encoder_q-layer.8": 17801.1289, "encoder_q-layer.9": 16112.1328, "epoch": 0.4, "inbatch_neg_score": 45.0209, "inbatch_pos_score": 45.5312, "learning_rate": 3.2888888888888894e-05, "loss": 2.3411, "norm_diff": 0.0342, "num_tokens_overlap": 5.571, "num_tokens_union": 55.1103, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27864.065, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7972, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7196, "sent_len_1": 66.9646, "sent_len_max_0": 18.8388, "sent_len_max_1": 187.8887, "stdk": 0.0432, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 40800 }, { "accuracy": 55.4688, "doc_norm": 6.8269, "encoder_q-embeddings": 17343.1582, "encoder_q-layer.0": 12153.7979, "encoder_q-layer.1": 12842.5732, "encoder_q-layer.10": 19700.2695, "encoder_q-layer.11": 38506.8242, "encoder_q-layer.2": 13829.1982, "encoder_q-layer.3": 13481.9141, "encoder_q-layer.4": 13924.5352, "encoder_q-layer.5": 13224.9365, "encoder_q-layer.6": 13946.8633, "encoder_q-layer.7": 15012.6211, "encoder_q-layer.8": 16590.293, "encoder_q-layer.9": 15632.9463, "epoch": 0.4, "inbatch_neg_score": 44.9644, "inbatch_pos_score": 45.4688, "learning_rate": 3.283333333333333e-05, "loss": 2.3037, "norm_diff": 0.0334, "num_tokens_overlap": 5.5712, "num_tokens_union": 54.9622, "postclip_grad_norm": 1.0, "preclip_grad_norm": 25996.1094, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7935, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6929, "sent_len_1": 66.7202, "sent_len_max_0": 18.8212, "sent_len_max_1": 188.9425, "stdk": 0.0431, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 40900 }, { "accuracy": 56.3477, "doc_norm": 6.8222, "encoder_q-embeddings": 18090.5566, "encoder_q-layer.0": 12608.7939, "encoder_q-layer.1": 12936.3984, "encoder_q-layer.10": 26861.457, "encoder_q-layer.11": 47282.875, "encoder_q-layer.2": 14016.2715, "encoder_q-layer.3": 14017.3408, "encoder_q-layer.4": 14386.7969, "encoder_q-layer.5": 13691.6875, "encoder_q-layer.6": 15618.79, "encoder_q-layer.7": 16769.9766, "encoder_q-layer.8": 20932.4043, "encoder_q-layer.9": 19469.7441, "epoch": 0.4, "inbatch_neg_score": 44.8925, "inbatch_pos_score": 45.4062, "learning_rate": 3.277777777777778e-05, "loss": 2.3531, "norm_diff": 0.0314, "num_tokens_overlap": 5.5845, "num_tokens_union": 54.9582, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29439.1866, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7908, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7427, "sent_len_1": 66.7141, "sent_len_max_0": 18.8625, "sent_len_max_1": 188.215, "stdk": 0.0431, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 41000 }, { "accuracy": 58.2031, "doc_norm": 6.8161, "encoder_q-embeddings": 17986.3516, "encoder_q-layer.0": 12610.5273, "encoder_q-layer.1": 13008.1758, "encoder_q-layer.10": 22373.7402, "encoder_q-layer.11": 39805.0469, "encoder_q-layer.2": 14343.1445, "encoder_q-layer.3": 14170.8213, "encoder_q-layer.4": 14691.9307, "encoder_q-layer.5": 14351.5537, "encoder_q-layer.6": 15237.8584, "encoder_q-layer.7": 15779.2188, "encoder_q-layer.8": 18846.6152, "encoder_q-layer.9": 17210.418, "epoch": 0.4, "inbatch_neg_score": 44.824, "inbatch_pos_score": 45.3438, "learning_rate": 3.272222222222223e-05, "loss": 2.3246, "norm_diff": 0.0304, "num_tokens_overlap": 5.5827, "num_tokens_union": 55.0138, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27380.4125, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7857, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7423, "sent_len_1": 66.7961, "sent_len_max_0": 18.9187, "sent_len_max_1": 189.8063, "stdk": 0.0429, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 41100 }, { "accuracy": 55.2734, "doc_norm": 6.8191, "encoder_q-embeddings": 18061.875, "encoder_q-layer.0": 13008.3906, "encoder_q-layer.1": 13049.2852, "encoder_q-layer.10": 21143.7207, "encoder_q-layer.11": 41447.6055, "encoder_q-layer.2": 13599.9258, "encoder_q-layer.3": 13553.1602, "encoder_q-layer.4": 14206.21, "encoder_q-layer.5": 13768.499, "encoder_q-layer.6": 14649.915, "encoder_q-layer.7": 16718.4492, "encoder_q-layer.8": 18467.2695, "encoder_q-layer.9": 16602.8828, "epoch": 0.4, "inbatch_neg_score": 44.8559, "inbatch_pos_score": 45.3438, "learning_rate": 3.266666666666667e-05, "loss": 2.3557, "norm_diff": 0.0343, "num_tokens_overlap": 5.5769, "num_tokens_union": 55.086, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27324.6667, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7849, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7222, "sent_len_1": 66.9462, "sent_len_max_0": 18.8863, "sent_len_max_1": 191.9688, "stdk": 0.0441, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 41200 }, { "accuracy": 54.5898, "doc_norm": 6.8174, "encoder_q-embeddings": 18937.748, "encoder_q-layer.0": 12893.9551, "encoder_q-layer.1": 13574.6084, "encoder_q-layer.10": 23197.3867, "encoder_q-layer.11": 41969.3906, "encoder_q-layer.2": 14586.0508, "encoder_q-layer.3": 14823.7959, "encoder_q-layer.4": 14835.9258, "encoder_q-layer.5": 14974.0254, "encoder_q-layer.6": 16352.7861, "encoder_q-layer.7": 17597.4473, "encoder_q-layer.8": 19451.6152, "encoder_q-layer.9": 18044.0684, "epoch": 0.4, "inbatch_neg_score": 44.8483, "inbatch_pos_score": 45.375, "learning_rate": 3.261111111111111e-05, "loss": 2.3315, "norm_diff": 0.0303, "num_tokens_overlap": 5.5787, "num_tokens_union": 54.9923, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28760.559, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7871, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7286, "sent_len_1": 66.7413, "sent_len_max_0": 18.8087, "sent_len_max_1": 190.0163, "stdk": 0.0434, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 41300 }, { "accuracy": 56.6406, "doc_norm": 6.8177, "encoder_q-embeddings": 18475.7871, "encoder_q-layer.0": 12831.8613, "encoder_q-layer.1": 13176.2607, "encoder_q-layer.10": 25650.248, "encoder_q-layer.11": 43819.6562, "encoder_q-layer.2": 14901.877, "encoder_q-layer.3": 14960.7207, "encoder_q-layer.4": 15223.9229, "encoder_q-layer.5": 15166.7197, "encoder_q-layer.6": 16086.8379, "encoder_q-layer.7": 16932.2266, "encoder_q-layer.8": 19589.5352, "encoder_q-layer.9": 17721.0488, "epoch": 0.4, "inbatch_neg_score": 44.8028, "inbatch_pos_score": 45.3125, "learning_rate": 3.2555555555555555e-05, "loss": 2.3223, "norm_diff": 0.0324, "num_tokens_overlap": 5.5796, "num_tokens_union": 55.2031, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29393.299, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7852, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7367, "sent_len_1": 66.9969, "sent_len_max_0": 18.815, "sent_len_max_1": 188.6225, "stdk": 0.0451, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 41400 }, { "accuracy": 55.8594, "doc_norm": 6.8099, "encoder_q-embeddings": 18709.7656, "encoder_q-layer.0": 12816.2178, "encoder_q-layer.1": 13020.4902, "encoder_q-layer.10": 23270.2363, "encoder_q-layer.11": 42048.8594, "encoder_q-layer.2": 14107.9346, "encoder_q-layer.3": 14066.668, "encoder_q-layer.4": 14995.1934, "encoder_q-layer.5": 14167.5947, "encoder_q-layer.6": 15463.6855, "encoder_q-layer.7": 16467.6328, "encoder_q-layer.8": 19861.2031, "encoder_q-layer.9": 17453.8809, "epoch": 0.41, "inbatch_neg_score": 44.7964, "inbatch_pos_score": 45.3125, "learning_rate": 3.2500000000000004e-05, "loss": 2.3599, "norm_diff": 0.0278, "num_tokens_overlap": 5.5769, "num_tokens_union": 54.9877, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28541.1701, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7822, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7089, "sent_len_1": 66.7359, "sent_len_max_0": 18.8238, "sent_len_max_1": 188.05, "stdk": 0.0436, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 41500 }, { "accuracy": 56.543, "doc_norm": 6.8173, "encoder_q-embeddings": 18173.4121, "encoder_q-layer.0": 12416.5742, "encoder_q-layer.1": 13173.8203, "encoder_q-layer.10": 21753.3242, "encoder_q-layer.11": 46370.3672, "encoder_q-layer.2": 14279.6309, "encoder_q-layer.3": 14459.8896, "encoder_q-layer.4": 14582.2852, "encoder_q-layer.5": 14488.9932, "encoder_q-layer.6": 15299.8105, "encoder_q-layer.7": 17514.7715, "encoder_q-layer.8": 17917.0273, "encoder_q-layer.9": 16810.8184, "epoch": 0.41, "inbatch_neg_score": 44.8295, "inbatch_pos_score": 45.3125, "learning_rate": 3.2444444444444446e-05, "loss": 2.3262, "norm_diff": 0.0333, "num_tokens_overlap": 5.5859, "num_tokens_union": 55.1239, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28722.1052, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7839, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7226, "sent_len_1": 67.0028, "sent_len_max_0": 18.8525, "sent_len_max_1": 188.3262, "stdk": 0.0439, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 41600 }, { "accuracy": 56.8359, "doc_norm": 6.8155, "encoder_q-embeddings": 18484.1641, "encoder_q-layer.0": 12695.0762, "encoder_q-layer.1": 13217.8662, "encoder_q-layer.10": 19629.9629, "encoder_q-layer.11": 40068.5156, "encoder_q-layer.2": 14339.3496, "encoder_q-layer.3": 14005.8916, "encoder_q-layer.4": 14125.9824, "encoder_q-layer.5": 13603.1475, "encoder_q-layer.6": 14350.4141, "encoder_q-layer.7": 15775.9707, "encoder_q-layer.8": 17152.873, "encoder_q-layer.9": 16431.3281, "epoch": 0.41, "inbatch_neg_score": 44.8387, "inbatch_pos_score": 45.3438, "learning_rate": 3.238888888888889e-05, "loss": 2.3143, "norm_diff": 0.0313, "num_tokens_overlap": 5.5717, "num_tokens_union": 54.8702, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26811.4875, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7842, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.73, "sent_len_1": 66.5419, "sent_len_max_0": 18.825, "sent_len_max_1": 187.2025, "stdk": 0.0434, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 41700 }, { "accuracy": 57.0312, "doc_norm": 6.8131, "encoder_q-embeddings": 18154.8809, "encoder_q-layer.0": 12610.9766, "encoder_q-layer.1": 13308.751, "encoder_q-layer.10": 20878.7695, "encoder_q-layer.11": 39417.0664, "encoder_q-layer.2": 14328.4102, "encoder_q-layer.3": 14133.0488, "encoder_q-layer.4": 14657.6113, "encoder_q-layer.5": 13884.8203, "encoder_q-layer.6": 14147.2412, "encoder_q-layer.7": 15644.2402, "encoder_q-layer.8": 16777.0293, "encoder_q-layer.9": 15713.8262, "epoch": 0.41, "inbatch_neg_score": 44.8212, "inbatch_pos_score": 45.3125, "learning_rate": 3.233333333333333e-05, "loss": 2.2933, "norm_diff": 0.03, "num_tokens_overlap": 5.5866, "num_tokens_union": 55.0735, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26685.3615, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7832, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7272, "sent_len_1": 66.8193, "sent_len_max_0": 18.8813, "sent_len_max_1": 187.9162, "stdk": 0.0429, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 41800 }, { "accuracy": 55.957, "doc_norm": 6.8151, "encoder_q-embeddings": 17572.0078, "encoder_q-layer.0": 12681.0996, "encoder_q-layer.1": 12825.8418, "encoder_q-layer.10": 23285.4258, "encoder_q-layer.11": 43632.6484, "encoder_q-layer.2": 14188.1143, "encoder_q-layer.3": 14091.79, "encoder_q-layer.4": 14647.5723, "encoder_q-layer.5": 14127.458, "encoder_q-layer.6": 15260.4355, "encoder_q-layer.7": 16337.9199, "encoder_q-layer.8": 17996.6113, "encoder_q-layer.9": 16911.207, "epoch": 0.41, "inbatch_neg_score": 44.7832, "inbatch_pos_score": 45.3125, "learning_rate": 3.227777777777778e-05, "loss": 2.3586, "norm_diff": 0.0347, "num_tokens_overlap": 5.5929, "num_tokens_union": 55.1121, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28088.2906, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7804, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.743, "sent_len_1": 66.9007, "sent_len_max_0": 18.8263, "sent_len_max_1": 189.3325, "stdk": 0.0439, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 41900 }, { "accuracy": 51.9531, "doc_norm": 6.8103, "encoder_q-embeddings": 18300.2949, "encoder_q-layer.0": 12605.9131, "encoder_q-layer.1": 13165.2002, "encoder_q-layer.10": 22439.832, "encoder_q-layer.11": 38606.0039, "encoder_q-layer.2": 14354.0713, "encoder_q-layer.3": 14649.2803, "encoder_q-layer.4": 15248.9443, "encoder_q-layer.5": 14613.9434, "encoder_q-layer.6": 16424.7539, "encoder_q-layer.7": 17403.7617, "encoder_q-layer.8": 18525.6484, "encoder_q-layer.9": 17370.0391, "epoch": 0.41, "inbatch_neg_score": 44.7489, "inbatch_pos_score": 45.25, "learning_rate": 3.222222222222223e-05, "loss": 2.3376, "norm_diff": 0.0331, "num_tokens_overlap": 5.5645, "num_tokens_union": 54.8977, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27829.6782, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7772, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7043, "sent_len_1": 66.6012, "sent_len_max_0": 18.8137, "sent_len_max_1": 188.3613, "stdk": 0.0434, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 42000 }, { "accuracy": 54.8828, "doc_norm": 6.8096, "encoder_q-embeddings": 17794.2656, "encoder_q-layer.0": 12280.2139, "encoder_q-layer.1": 12977.1045, "encoder_q-layer.10": 20567.793, "encoder_q-layer.11": 39007.25, "encoder_q-layer.2": 13761.8848, "encoder_q-layer.3": 13775.4209, "encoder_q-layer.4": 14073.9365, "encoder_q-layer.5": 13551.3418, "encoder_q-layer.6": 14431.6504, "encoder_q-layer.7": 15616.3291, "encoder_q-layer.8": 17498.2598, "encoder_q-layer.9": 16684.8516, "epoch": 0.41, "inbatch_neg_score": 44.7233, "inbatch_pos_score": 45.2188, "learning_rate": 3.2166666666666665e-05, "loss": 2.2959, "norm_diff": 0.0333, "num_tokens_overlap": 5.5854, "num_tokens_union": 55.1101, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26797.1902, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7763, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7206, "sent_len_1": 66.9787, "sent_len_max_0": 18.8125, "sent_len_max_1": 188.3187, "stdk": 0.0431, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 42100 }, { "accuracy": 58.2031, "doc_norm": 6.8148, "encoder_q-embeddings": 17708.8477, "encoder_q-layer.0": 12254.8975, "encoder_q-layer.1": 12459.9834, "encoder_q-layer.10": 21221.2148, "encoder_q-layer.11": 45383.082, "encoder_q-layer.2": 13377.0381, "encoder_q-layer.3": 13831.0088, "encoder_q-layer.4": 13831.8125, "encoder_q-layer.5": 13574.4111, "encoder_q-layer.6": 14935.4492, "encoder_q-layer.7": 16270.4043, "encoder_q-layer.8": 19597.2539, "encoder_q-layer.9": 17345.3945, "epoch": 0.41, "inbatch_neg_score": 44.7681, "inbatch_pos_score": 45.3125, "learning_rate": 3.2111111111111114e-05, "loss": 2.367, "norm_diff": 0.0334, "num_tokens_overlap": 5.5763, "num_tokens_union": 54.9208, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27960.1582, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7814, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7174, "sent_len_1": 66.6189, "sent_len_max_0": 18.7913, "sent_len_max_1": 185.9825, "stdk": 0.0438, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 42200 }, { "accuracy": 56.3477, "doc_norm": 6.8124, "encoder_q-embeddings": 18167.8867, "encoder_q-layer.0": 13022.5869, "encoder_q-layer.1": 13372.6523, "encoder_q-layer.10": 21590.9062, "encoder_q-layer.11": 41640.0391, "encoder_q-layer.2": 14356.1318, "encoder_q-layer.3": 14570.5957, "encoder_q-layer.4": 15520.873, "encoder_q-layer.5": 14768.3711, "encoder_q-layer.6": 15183.2275, "encoder_q-layer.7": 15749.8496, "encoder_q-layer.8": 18119.6484, "encoder_q-layer.9": 16940.4434, "epoch": 0.41, "inbatch_neg_score": 44.749, "inbatch_pos_score": 45.2812, "learning_rate": 3.2055555555555556e-05, "loss": 2.3014, "norm_diff": 0.0319, "num_tokens_overlap": 5.5479, "num_tokens_union": 54.8523, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27525.5213, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7805, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6797, "sent_len_1": 66.502, "sent_len_max_0": 18.7925, "sent_len_max_1": 188.2975, "stdk": 0.0447, "stdq": 0.039, "stdqueue_k": 0.0, "step": 42300 }, { "accuracy": 55.3711, "doc_norm": 6.8059, "encoder_q-embeddings": 18177.9961, "encoder_q-layer.0": 12520.6221, "encoder_q-layer.1": 12964.5596, "encoder_q-layer.10": 23151.9668, "encoder_q-layer.11": 42653.5586, "encoder_q-layer.2": 13853.9043, "encoder_q-layer.3": 13834.0039, "encoder_q-layer.4": 14007.9951, "encoder_q-layer.5": 14473.0527, "encoder_q-layer.6": 14621.8535, "encoder_q-layer.7": 16176.2012, "encoder_q-layer.8": 17979.5215, "encoder_q-layer.9": 17052.6152, "epoch": 0.41, "inbatch_neg_score": 44.7409, "inbatch_pos_score": 45.2188, "learning_rate": 3.2000000000000005e-05, "loss": 2.3326, "norm_diff": 0.0302, "num_tokens_overlap": 5.5692, "num_tokens_union": 54.9946, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28056.4464, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7758, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7294, "sent_len_1": 66.7338, "sent_len_max_0": 18.7387, "sent_len_max_1": 188.27, "stdk": 0.0438, "stdq": 0.0398, "stdqueue_k": 0.0, "step": 42400 }, { "accuracy": 57.6172, "doc_norm": 6.806, "encoder_q-embeddings": 18135.1348, "encoder_q-layer.0": 12846.4502, "encoder_q-layer.1": 12805.3193, "encoder_q-layer.10": 20725.0586, "encoder_q-layer.11": 37722.3125, "encoder_q-layer.2": 13588.9404, "encoder_q-layer.3": 13776.9297, "encoder_q-layer.4": 14164.7334, "encoder_q-layer.5": 13848.8369, "encoder_q-layer.6": 14609.5332, "encoder_q-layer.7": 15482.624, "encoder_q-layer.8": 17146.0625, "encoder_q-layer.9": 15978.9238, "epoch": 0.41, "inbatch_neg_score": 44.6684, "inbatch_pos_score": 45.1875, "learning_rate": 3.194444444444444e-05, "loss": 2.2851, "norm_diff": 0.0328, "num_tokens_overlap": 5.5899, "num_tokens_union": 54.9704, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26285.8667, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7732, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7405, "sent_len_1": 66.703, "sent_len_max_0": 18.8938, "sent_len_max_1": 190.5888, "stdk": 0.0436, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 42500 }, { "accuracy": 57.0312, "doc_norm": 6.8031, "encoder_q-embeddings": 17479.3438, "encoder_q-layer.0": 12880.0186, "encoder_q-layer.1": 13394.0391, "encoder_q-layer.10": 19104.1074, "encoder_q-layer.11": 36090.1016, "encoder_q-layer.2": 14198.4375, "encoder_q-layer.3": 13712.7656, "encoder_q-layer.4": 13950.416, "encoder_q-layer.5": 13500.9473, "encoder_q-layer.6": 14237.4053, "encoder_q-layer.7": 14947.1416, "encoder_q-layer.8": 16622.875, "encoder_q-layer.9": 15197.5664, "epoch": 0.42, "inbatch_neg_score": 44.6361, "inbatch_pos_score": 45.125, "learning_rate": 3.188888888888889e-05, "loss": 2.3242, "norm_diff": 0.0336, "num_tokens_overlap": 5.5827, "num_tokens_union": 55.128, "postclip_grad_norm": 1.0, "preclip_grad_norm": 25999.8158, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7695, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7426, "sent_len_1": 66.9385, "sent_len_max_0": 18.8087, "sent_len_max_1": 190.9062, "stdk": 0.0439, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 42600 }, { "accuracy": 53.5156, "doc_norm": 6.8005, "encoder_q-embeddings": 18575.0078, "encoder_q-layer.0": 12926.6123, "encoder_q-layer.1": 13177.2842, "encoder_q-layer.10": 25857.6094, "encoder_q-layer.11": 44294.9961, "encoder_q-layer.2": 14201.9883, "encoder_q-layer.3": 14439.4727, "encoder_q-layer.4": 15322.1582, "encoder_q-layer.5": 14633.5635, "encoder_q-layer.6": 15773.4961, "encoder_q-layer.7": 16555.8047, "encoder_q-layer.8": 20608.625, "encoder_q-layer.9": 18625.4512, "epoch": 0.42, "inbatch_neg_score": 44.5796, "inbatch_pos_score": 45.0938, "learning_rate": 3.183333333333334e-05, "loss": 2.2987, "norm_diff": 0.0329, "num_tokens_overlap": 5.5801, "num_tokens_union": 54.973, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29127.1517, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7675, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7305, "sent_len_1": 66.7069, "sent_len_max_0": 18.87, "sent_len_max_1": 189.1813, "stdk": 0.044, "stdq": 0.04, "stdqueue_k": 0.0, "step": 42700 }, { "accuracy": 56.1523, "doc_norm": 6.7965, "encoder_q-embeddings": 19462.0273, "encoder_q-layer.0": 13215.2588, "encoder_q-layer.1": 13644.0283, "encoder_q-layer.10": 21271.209, "encoder_q-layer.11": 41695.082, "encoder_q-layer.2": 14854.9971, "encoder_q-layer.3": 14641.2832, "encoder_q-layer.4": 15001.3936, "encoder_q-layer.5": 14536.6699, "encoder_q-layer.6": 15722.2715, "encoder_q-layer.7": 16227.4619, "encoder_q-layer.8": 17993.7891, "encoder_q-layer.9": 16504.0645, "epoch": 0.42, "inbatch_neg_score": 44.5229, "inbatch_pos_score": 45.0312, "learning_rate": 3.177777777777778e-05, "loss": 2.3255, "norm_diff": 0.0323, "num_tokens_overlap": 5.5758, "num_tokens_union": 55.0787, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28010.5613, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7642, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7126, "sent_len_1": 66.8805, "sent_len_max_0": 18.8513, "sent_len_max_1": 188.465, "stdk": 0.0425, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 42800 }, { "accuracy": 56.7383, "doc_norm": 6.7939, "encoder_q-embeddings": 18013.2969, "encoder_q-layer.0": 12402.3887, "encoder_q-layer.1": 12886.2832, "encoder_q-layer.10": 20753.0801, "encoder_q-layer.11": 42158.0156, "encoder_q-layer.2": 14090.1084, "encoder_q-layer.3": 14139.2139, "encoder_q-layer.4": 14562.2168, "encoder_q-layer.5": 14732.4248, "encoder_q-layer.6": 15153.7686, "encoder_q-layer.7": 15810.2275, "encoder_q-layer.8": 17922.3027, "encoder_q-layer.9": 16829.1289, "epoch": 0.42, "inbatch_neg_score": 44.5035, "inbatch_pos_score": 45.0, "learning_rate": 3.1722222222222224e-05, "loss": 2.2859, "norm_diff": 0.0349, "num_tokens_overlap": 5.5806, "num_tokens_union": 55.0114, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27776.6709, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.759, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.72, "sent_len_1": 66.7793, "sent_len_max_0": 18.6788, "sent_len_max_1": 187.5012, "stdk": 0.0441, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 42900 }, { "accuracy": 59.668, "doc_norm": 6.7911, "encoder_q-embeddings": 18076.5664, "encoder_q-layer.0": 12721.3682, "encoder_q-layer.1": 12903.6191, "encoder_q-layer.10": 22316.8008, "encoder_q-layer.11": 43206.3008, "encoder_q-layer.2": 14061.4775, "encoder_q-layer.3": 13866.6543, "encoder_q-layer.4": 14374.4072, "encoder_q-layer.5": 14521.5967, "encoder_q-layer.6": 15390.2842, "encoder_q-layer.7": 17349.4609, "encoder_q-layer.8": 20033.1445, "encoder_q-layer.9": 18212.0215, "epoch": 0.42, "inbatch_neg_score": 44.4511, "inbatch_pos_score": 44.9688, "learning_rate": 3.1666666666666666e-05, "loss": 2.3035, "norm_diff": 0.0344, "num_tokens_overlap": 5.5809, "num_tokens_union": 54.9414, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28145.5437, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7567, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7154, "sent_len_1": 66.6693, "sent_len_max_0": 18.7963, "sent_len_max_1": 190.2612, "stdk": 0.044, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 43000 }, { "accuracy": 54.0039, "doc_norm": 6.7919, "encoder_q-embeddings": 17432.5547, "encoder_q-layer.0": 12299.6982, "encoder_q-layer.1": 12368.3662, "encoder_q-layer.10": 21144.8301, "encoder_q-layer.11": 41571.9062, "encoder_q-layer.2": 13581.8398, "encoder_q-layer.3": 13646.1582, "encoder_q-layer.4": 14031.8545, "encoder_q-layer.5": 13821.8096, "encoder_q-layer.6": 14910.4502, "encoder_q-layer.7": 15912.7803, "encoder_q-layer.8": 17525.5352, "encoder_q-layer.9": 16646.5156, "epoch": 0.42, "inbatch_neg_score": 44.4816, "inbatch_pos_score": 44.9688, "learning_rate": 3.1611111111111115e-05, "loss": 2.3008, "norm_diff": 0.0345, "num_tokens_overlap": 5.5836, "num_tokens_union": 54.9929, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27220.6878, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7574, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7393, "sent_len_1": 66.7756, "sent_len_max_0": 19.0087, "sent_len_max_1": 190.3162, "stdk": 0.0435, "stdq": 0.039, "stdqueue_k": 0.0, "step": 43100 }, { "accuracy": 56.8359, "doc_norm": 6.7958, "encoder_q-embeddings": 19074.082, "encoder_q-layer.0": 13242.3154, "encoder_q-layer.1": 13396.4951, "encoder_q-layer.10": 20565.709, "encoder_q-layer.11": 46678.7305, "encoder_q-layer.2": 14152.9795, "encoder_q-layer.3": 14323.168, "encoder_q-layer.4": 14920.5137, "encoder_q-layer.5": 14666.4541, "encoder_q-layer.6": 15699.082, "encoder_q-layer.7": 16285.2969, "encoder_q-layer.8": 19327.8398, "encoder_q-layer.9": 16747.127, "epoch": 0.42, "inbatch_neg_score": 44.5103, "inbatch_pos_score": 45.0312, "learning_rate": 3.155555555555556e-05, "loss": 2.3429, "norm_diff": 0.0355, "num_tokens_overlap": 5.5756, "num_tokens_union": 54.9975, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28978.1329, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7602, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7179, "sent_len_1": 66.7742, "sent_len_max_0": 18.9288, "sent_len_max_1": 188.5275, "stdk": 0.0435, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 43200 }, { "accuracy": 55.4688, "doc_norm": 6.7905, "encoder_q-embeddings": 18913.7852, "encoder_q-layer.0": 13068.5557, "encoder_q-layer.1": 13105.6973, "encoder_q-layer.10": 27625.3555, "encoder_q-layer.11": 45639.3984, "encoder_q-layer.2": 14079.3955, "encoder_q-layer.3": 14323.458, "encoder_q-layer.4": 14939.7646, "encoder_q-layer.5": 15068.958, "encoder_q-layer.6": 15735.792, "encoder_q-layer.7": 15998.7598, "encoder_q-layer.8": 19182.7793, "encoder_q-layer.9": 18989.5781, "epoch": 0.42, "inbatch_neg_score": 44.4787, "inbatch_pos_score": 45.0, "learning_rate": 3.15e-05, "loss": 2.2945, "norm_diff": 0.0319, "num_tokens_overlap": 5.58, "num_tokens_union": 54.9551, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29390.2468, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7587, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7271, "sent_len_1": 66.6414, "sent_len_max_0": 18.8137, "sent_len_max_1": 187.9613, "stdk": 0.0438, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 43300 }, { "accuracy": 56.543, "doc_norm": 6.7859, "encoder_q-embeddings": 18621.6523, "encoder_q-layer.0": 12855.3604, "encoder_q-layer.1": 13545.082, "encoder_q-layer.10": 20416.8555, "encoder_q-layer.11": 40544.0234, "encoder_q-layer.2": 14213.1152, "encoder_q-layer.3": 13778.1543, "encoder_q-layer.4": 14522.8574, "encoder_q-layer.5": 13796.3027, "encoder_q-layer.6": 15027.833, "encoder_q-layer.7": 16572.3516, "encoder_q-layer.8": 17784.6699, "encoder_q-layer.9": 16563.248, "epoch": 0.42, "inbatch_neg_score": 44.4322, "inbatch_pos_score": 44.9375, "learning_rate": 3.144444444444445e-05, "loss": 2.3196, "norm_diff": 0.0307, "num_tokens_overlap": 5.5832, "num_tokens_union": 54.9736, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27159.1548, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7552, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7379, "sent_len_1": 66.6501, "sent_len_max_0": 18.8238, "sent_len_max_1": 187.6275, "stdk": 0.044, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 43400 }, { "accuracy": 53.5156, "doc_norm": 6.7842, "encoder_q-embeddings": 18174.0293, "encoder_q-layer.0": 12703.7568, "encoder_q-layer.1": 13116.1328, "encoder_q-layer.10": 20393.7148, "encoder_q-layer.11": 38040.0352, "encoder_q-layer.2": 14238.4512, "encoder_q-layer.3": 14728.3203, "encoder_q-layer.4": 15134.6309, "encoder_q-layer.5": 14549.8164, "encoder_q-layer.6": 15124.1973, "encoder_q-layer.7": 16040.4941, "encoder_q-layer.8": 17709.1211, "encoder_q-layer.9": 16586.4863, "epoch": 0.42, "inbatch_neg_score": 44.3828, "inbatch_pos_score": 44.875, "learning_rate": 3.138888888888889e-05, "loss": 2.29, "norm_diff": 0.0332, "num_tokens_overlap": 5.5675, "num_tokens_union": 55.0097, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27281.0144, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.751, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7218, "sent_len_1": 66.7314, "sent_len_max_0": 18.935, "sent_len_max_1": 188.7063, "stdk": 0.0434, "stdq": 0.039, "stdqueue_k": 0.0, "step": 43500 }, { "accuracy": 55.0781, "doc_norm": 6.7885, "encoder_q-embeddings": 17836.6523, "encoder_q-layer.0": 12787.4033, "encoder_q-layer.1": 13063.3672, "encoder_q-layer.10": 26791.9121, "encoder_q-layer.11": 44686.5781, "encoder_q-layer.2": 14166.9492, "encoder_q-layer.3": 14183.2832, "encoder_q-layer.4": 15020.626, "encoder_q-layer.5": 14350.3262, "encoder_q-layer.6": 15278.9258, "encoder_q-layer.7": 16405.8652, "encoder_q-layer.8": 19837.5684, "encoder_q-layer.9": 18873.3203, "epoch": 0.43, "inbatch_neg_score": 44.4256, "inbatch_pos_score": 44.9375, "learning_rate": 3.1333333333333334e-05, "loss": 2.3021, "norm_diff": 0.0322, "num_tokens_overlap": 5.5654, "num_tokens_union": 54.7855, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29128.6036, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7563, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7209, "sent_len_1": 66.4416, "sent_len_max_0": 18.82, "sent_len_max_1": 187.5888, "stdk": 0.0445, "stdq": 0.0398, "stdqueue_k": 0.0, "step": 43600 }, { "accuracy": 57.0312, "doc_norm": 6.7892, "encoder_q-embeddings": 18477.1348, "encoder_q-layer.0": 12961.5527, "encoder_q-layer.1": 13018.0977, "encoder_q-layer.10": 20303.7402, "encoder_q-layer.11": 39088.9844, "encoder_q-layer.2": 14176.6895, "encoder_q-layer.3": 14153.1514, "encoder_q-layer.4": 14572.5566, "encoder_q-layer.5": 14031.0293, "encoder_q-layer.6": 14956.29, "encoder_q-layer.7": 16924.8789, "encoder_q-layer.8": 17315.2285, "encoder_q-layer.9": 16008.2178, "epoch": 0.43, "inbatch_neg_score": 44.4478, "inbatch_pos_score": 44.9375, "learning_rate": 3.1277777777777776e-05, "loss": 2.2904, "norm_diff": 0.0338, "num_tokens_overlap": 5.5724, "num_tokens_union": 54.9587, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27326.6049, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7554, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6903, "sent_len_1": 66.787, "sent_len_max_0": 18.79, "sent_len_max_1": 188.5575, "stdk": 0.0431, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 43700 }, { "accuracy": 57.9102, "doc_norm": 6.7881, "encoder_q-embeddings": 17694.627, "encoder_q-layer.0": 12094.0879, "encoder_q-layer.1": 12745.625, "encoder_q-layer.10": 26250.4707, "encoder_q-layer.11": 49914.2734, "encoder_q-layer.2": 14017.8965, "encoder_q-layer.3": 13557.5898, "encoder_q-layer.4": 13662.1426, "encoder_q-layer.5": 13631.9551, "encoder_q-layer.6": 14456.8408, "encoder_q-layer.7": 15474.5371, "encoder_q-layer.8": 17652.4648, "encoder_q-layer.9": 16873.0781, "epoch": 0.43, "inbatch_neg_score": 44.4235, "inbatch_pos_score": 44.9375, "learning_rate": 3.1222222222222225e-05, "loss": 2.2806, "norm_diff": 0.0334, "num_tokens_overlap": 5.5797, "num_tokens_union": 55.0205, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29019.8531, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7547, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6982, "sent_len_1": 66.8453, "sent_len_max_0": 18.855, "sent_len_max_1": 190.35, "stdk": 0.0444, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 43800 }, { "accuracy": 54.5898, "doc_norm": 6.7873, "encoder_q-embeddings": 18098.4648, "encoder_q-layer.0": 12946.249, "encoder_q-layer.1": 13383.6035, "encoder_q-layer.10": 20076.2695, "encoder_q-layer.11": 45536.7773, "encoder_q-layer.2": 14574.7988, "encoder_q-layer.3": 14627.9258, "encoder_q-layer.4": 14861.375, "encoder_q-layer.5": 14381.459, "encoder_q-layer.6": 15168.0557, "encoder_q-layer.7": 15807.2441, "encoder_q-layer.8": 17947.8164, "encoder_q-layer.9": 16702.5078, "epoch": 0.43, "inbatch_neg_score": 44.4263, "inbatch_pos_score": 44.9375, "learning_rate": 3.116666666666667e-05, "loss": 2.3428, "norm_diff": 0.0319, "num_tokens_overlap": 5.5769, "num_tokens_union": 55.0477, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28441.3076, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7554, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7457, "sent_len_1": 66.8077, "sent_len_max_0": 18.905, "sent_len_max_1": 189.4263, "stdk": 0.0436, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 43900 }, { "accuracy": 58.1055, "doc_norm": 6.7879, "encoder_q-embeddings": 17180.2891, "encoder_q-layer.0": 12133.8271, "encoder_q-layer.1": 12758.2988, "encoder_q-layer.10": 22016.793, "encoder_q-layer.11": 41317.4688, "encoder_q-layer.2": 14089.6523, "encoder_q-layer.3": 14223.4893, "encoder_q-layer.4": 14025.9951, "encoder_q-layer.5": 13716.0645, "encoder_q-layer.6": 15362.5342, "encoder_q-layer.7": 16277.7988, "encoder_q-layer.8": 19309.3711, "encoder_q-layer.9": 17464.0332, "epoch": 0.43, "inbatch_neg_score": 44.3757, "inbatch_pos_score": 44.9062, "learning_rate": 3.111111111111111e-05, "loss": 2.3272, "norm_diff": 0.0383, "num_tokens_overlap": 5.5688, "num_tokens_union": 55.0973, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27546.1946, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7496, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7118, "sent_len_1": 66.95, "sent_len_max_0": 18.715, "sent_len_max_1": 193.075, "stdk": 0.045, "stdq": 0.039, "stdqueue_k": 0.0, "step": 44000 }, { "accuracy": 54.1016, "doc_norm": 6.7805, "encoder_q-embeddings": 18909.2188, "encoder_q-layer.0": 12719.6045, "encoder_q-layer.1": 13337.9043, "encoder_q-layer.10": 19558.9355, "encoder_q-layer.11": 40306.9492, "encoder_q-layer.2": 14335.6787, "encoder_q-layer.3": 14489.666, "encoder_q-layer.4": 15397.2637, "encoder_q-layer.5": 14483.3701, "encoder_q-layer.6": 15373.2363, "encoder_q-layer.7": 16141.2344, "encoder_q-layer.8": 18376.2461, "encoder_q-layer.9": 16686.7188, "epoch": 0.43, "inbatch_neg_score": 44.391, "inbatch_pos_score": 44.875, "learning_rate": 3.105555555555555e-05, "loss": 2.3206, "norm_diff": 0.03, "num_tokens_overlap": 5.57, "num_tokens_union": 54.7921, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27773.1269, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7505, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7074, "sent_len_1": 66.4291, "sent_len_max_0": 18.8438, "sent_len_max_1": 188.0962, "stdk": 0.0429, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 44100 }, { "accuracy": 55.957, "doc_norm": 6.7843, "encoder_q-embeddings": 17199.832, "encoder_q-layer.0": 12137.9541, "encoder_q-layer.1": 12543.5928, "encoder_q-layer.10": 21659.4551, "encoder_q-layer.11": 38387.6094, "encoder_q-layer.2": 13956.8828, "encoder_q-layer.3": 14162.2041, "encoder_q-layer.4": 14395.1504, "encoder_q-layer.5": 14439.3311, "encoder_q-layer.6": 15054.6494, "encoder_q-layer.7": 15869.7754, "encoder_q-layer.8": 17466.6914, "encoder_q-layer.9": 16701.0273, "epoch": 0.43, "inbatch_neg_score": 44.3927, "inbatch_pos_score": 44.9062, "learning_rate": 3.1e-05, "loss": 2.3392, "norm_diff": 0.0335, "num_tokens_overlap": 5.5865, "num_tokens_union": 55.0215, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26774.7884, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7508, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7204, "sent_len_1": 66.7801, "sent_len_max_0": 18.9025, "sent_len_max_1": 189.8, "stdk": 0.0429, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 44200 }, { "accuracy": 59.4727, "doc_norm": 6.7846, "encoder_q-embeddings": 17184.9375, "encoder_q-layer.0": 12334.1328, "encoder_q-layer.1": 12699.5625, "encoder_q-layer.10": 22916.9863, "encoder_q-layer.11": 40506.8398, "encoder_q-layer.2": 14053.4277, "encoder_q-layer.3": 13787.9336, "encoder_q-layer.4": 14199.3486, "encoder_q-layer.5": 13663.7158, "encoder_q-layer.6": 14388.1152, "encoder_q-layer.7": 16475.0586, "encoder_q-layer.8": 17545.0098, "encoder_q-layer.9": 16325.1572, "epoch": 0.43, "inbatch_neg_score": 44.3693, "inbatch_pos_score": 44.875, "learning_rate": 3.094444444444445e-05, "loss": 2.3109, "norm_diff": 0.0339, "num_tokens_overlap": 5.5803, "num_tokens_union": 55.0308, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27108.4917, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7507, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.696, "sent_len_1": 66.839, "sent_len_max_0": 18.7688, "sent_len_max_1": 188.3775, "stdk": 0.0434, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 44300 }, { "accuracy": 54.8828, "doc_norm": 6.7818, "encoder_q-embeddings": 18217.9785, "encoder_q-layer.0": 12858.458, "encoder_q-layer.1": 13467.0889, "encoder_q-layer.10": 23158.9375, "encoder_q-layer.11": 42434.8398, "encoder_q-layer.2": 14084.957, "encoder_q-layer.3": 14209.6475, "encoder_q-layer.4": 15032.5752, "encoder_q-layer.5": 14516.2305, "encoder_q-layer.6": 15774.5352, "encoder_q-layer.7": 17622.5352, "encoder_q-layer.8": 20018.3828, "encoder_q-layer.9": 18447.6816, "epoch": 0.43, "inbatch_neg_score": 44.3536, "inbatch_pos_score": 44.8438, "learning_rate": 3.088888888888889e-05, "loss": 2.2857, "norm_diff": 0.0322, "num_tokens_overlap": 5.5786, "num_tokens_union": 55.0831, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28183.9583, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7496, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7388, "sent_len_1": 66.8741, "sent_len_max_0": 18.935, "sent_len_max_1": 189.38, "stdk": 0.044, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 44400 }, { "accuracy": 54.5898, "doc_norm": 6.781, "encoder_q-embeddings": 17503.5586, "encoder_q-layer.0": 12685.2266, "encoder_q-layer.1": 13379.5684, "encoder_q-layer.10": 22609.4121, "encoder_q-layer.11": 43149.7812, "encoder_q-layer.2": 14314.1924, "encoder_q-layer.3": 14686.2754, "encoder_q-layer.4": 15086.4434, "encoder_q-layer.5": 14442.5059, "encoder_q-layer.6": 15515.8008, "encoder_q-layer.7": 16678.6562, "encoder_q-layer.8": 19168.2539, "encoder_q-layer.9": 17411.1016, "epoch": 0.43, "inbatch_neg_score": 44.2973, "inbatch_pos_score": 44.8125, "learning_rate": 3.0833333333333335e-05, "loss": 2.2757, "norm_diff": 0.0359, "num_tokens_overlap": 5.5893, "num_tokens_union": 55.0266, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28013.6818, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7451, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7277, "sent_len_1": 66.8429, "sent_len_max_0": 18.8562, "sent_len_max_1": 190.2113, "stdk": 0.0456, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 44500 }, { "accuracy": 57.6172, "doc_norm": 6.7797, "encoder_q-embeddings": 18120.3828, "encoder_q-layer.0": 12793.5488, "encoder_q-layer.1": 12971.1787, "encoder_q-layer.10": 24500.1875, "encoder_q-layer.11": 44354.7891, "encoder_q-layer.2": 13933.7842, "encoder_q-layer.3": 13878.7783, "encoder_q-layer.4": 14276.6504, "encoder_q-layer.5": 13960.3057, "encoder_q-layer.6": 14665.5693, "encoder_q-layer.7": 15680.7598, "encoder_q-layer.8": 18595.8711, "encoder_q-layer.9": 17158.791, "epoch": 0.44, "inbatch_neg_score": 44.3311, "inbatch_pos_score": 44.8438, "learning_rate": 3.077777777777778e-05, "loss": 2.269, "norm_diff": 0.0324, "num_tokens_overlap": 5.5727, "num_tokens_union": 54.9411, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28072.5398, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7473, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7217, "sent_len_1": 66.641, "sent_len_max_0": 18.8113, "sent_len_max_1": 187.655, "stdk": 0.0434, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 44600 }, { "accuracy": 55.8594, "doc_norm": 6.7765, "encoder_q-embeddings": 17325.5879, "encoder_q-layer.0": 12746.7461, "encoder_q-layer.1": 12912.542, "encoder_q-layer.10": 22747.6699, "encoder_q-layer.11": 42612.9805, "encoder_q-layer.2": 14143.4014, "encoder_q-layer.3": 14067.7656, "encoder_q-layer.4": 14427.3516, "encoder_q-layer.5": 13837.2178, "encoder_q-layer.6": 15894.8154, "encoder_q-layer.7": 17314.0645, "encoder_q-layer.8": 18603.6055, "encoder_q-layer.9": 16646.666, "epoch": 0.44, "inbatch_neg_score": 44.2889, "inbatch_pos_score": 44.7812, "learning_rate": 3.0722222222222227e-05, "loss": 2.363, "norm_diff": 0.0326, "num_tokens_overlap": 5.576, "num_tokens_union": 55.0843, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27667.2441, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7439, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7386, "sent_len_1": 66.8659, "sent_len_max_0": 18.8487, "sent_len_max_1": 190.0362, "stdk": 0.0423, "stdq": 0.039, "stdqueue_k": 0.0, "step": 44700 }, { "accuracy": 56.543, "doc_norm": 6.7737, "encoder_q-embeddings": 17486.584, "encoder_q-layer.0": 12479.4551, "encoder_q-layer.1": 12902.1768, "encoder_q-layer.10": 19777.3145, "encoder_q-layer.11": 36727.0742, "encoder_q-layer.2": 13680.5303, "encoder_q-layer.3": 13984.6299, "encoder_q-layer.4": 14155.7549, "encoder_q-layer.5": 14038.9971, "encoder_q-layer.6": 15186.8066, "encoder_q-layer.7": 15992.999, "encoder_q-layer.8": 17616.4609, "encoder_q-layer.9": 16074.1943, "epoch": 0.44, "inbatch_neg_score": 44.2563, "inbatch_pos_score": 44.75, "learning_rate": 3.066666666666667e-05, "loss": 2.3262, "norm_diff": 0.0338, "num_tokens_overlap": 5.587, "num_tokens_union": 55.0613, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26190.8631, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.74, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.727, "sent_len_1": 66.8491, "sent_len_max_0": 18.925, "sent_len_max_1": 190.7688, "stdk": 0.0434, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 44800 }, { "accuracy": 55.2734, "doc_norm": 6.7732, "encoder_q-embeddings": 18734.1445, "encoder_q-layer.0": 12947.8643, "encoder_q-layer.1": 13332.708, "encoder_q-layer.10": 21101.0371, "encoder_q-layer.11": 37380.9961, "encoder_q-layer.2": 14458.7236, "encoder_q-layer.3": 14504.8574, "encoder_q-layer.4": 14769.584, "encoder_q-layer.5": 14232.1064, "encoder_q-layer.6": 15067.25, "encoder_q-layer.7": 15546.9531, "encoder_q-layer.8": 17105.1621, "encoder_q-layer.9": 15899.168, "epoch": 0.44, "inbatch_neg_score": 44.2438, "inbatch_pos_score": 44.75, "learning_rate": 3.061111111111111e-05, "loss": 2.3187, "norm_diff": 0.0324, "num_tokens_overlap": 5.5874, "num_tokens_union": 54.8801, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26656.1385, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7409, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7331, "sent_len_1": 66.6117, "sent_len_max_0": 18.905, "sent_len_max_1": 190.0737, "stdk": 0.0432, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 44900 }, { "accuracy": 57.2266, "doc_norm": 6.7726, "encoder_q-embeddings": 18436.832, "encoder_q-layer.0": 12542.1201, "encoder_q-layer.1": 13053.3779, "encoder_q-layer.10": 21697.2988, "encoder_q-layer.11": 47473.3594, "encoder_q-layer.2": 14132.1357, "encoder_q-layer.3": 13864.4893, "encoder_q-layer.4": 13639.4951, "encoder_q-layer.5": 13752.9346, "encoder_q-layer.6": 14548.3057, "encoder_q-layer.7": 15628.3262, "encoder_q-layer.8": 18289.0938, "encoder_q-layer.9": 16731.0977, "epoch": 0.44, "inbatch_neg_score": 44.1801, "inbatch_pos_score": 44.6875, "learning_rate": 3.055555555555556e-05, "loss": 2.3069, "norm_diff": 0.0357, "num_tokens_overlap": 5.5799, "num_tokens_union": 55.0387, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28481.6224, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7369, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7051, "sent_len_1": 66.8073, "sent_len_max_0": 18.8637, "sent_len_max_1": 187.7138, "stdk": 0.0437, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 45000 }, { "accuracy": 55.957, "doc_norm": 6.7611, "encoder_q-embeddings": 18285.3574, "encoder_q-layer.0": 12264.0674, "encoder_q-layer.1": 12425.4922, "encoder_q-layer.10": 19216.0586, "encoder_q-layer.11": 39484.8242, "encoder_q-layer.2": 13510.8262, "encoder_q-layer.3": 13663.999, "encoder_q-layer.4": 13472.7363, "encoder_q-layer.5": 13291.2715, "encoder_q-layer.6": 14547.1387, "encoder_q-layer.7": 15775.5117, "encoder_q-layer.8": 17925.0137, "encoder_q-layer.9": 15790.2949, "epoch": 0.44, "inbatch_neg_score": 44.0952, "inbatch_pos_score": 44.5938, "learning_rate": 3.05e-05, "loss": 2.3265, "norm_diff": 0.0315, "num_tokens_overlap": 5.5608, "num_tokens_union": 54.8234, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26560.0931, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7296, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7204, "sent_len_1": 66.48, "sent_len_max_0": 18.7712, "sent_len_max_1": 188.0712, "stdk": 0.0433, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 45100 }, { "accuracy": 57.0312, "doc_norm": 6.7605, "encoder_q-embeddings": 17373.9395, "encoder_q-layer.0": 12131.2607, "encoder_q-layer.1": 12644.5732, "encoder_q-layer.10": 18949.4941, "encoder_q-layer.11": 38532.5703, "encoder_q-layer.2": 13528.9795, "encoder_q-layer.3": 13670.0566, "encoder_q-layer.4": 13985.8154, "encoder_q-layer.5": 13774.3955, "encoder_q-layer.6": 14976.1172, "encoder_q-layer.7": 15624.5938, "encoder_q-layer.8": 18034.8145, "encoder_q-layer.9": 15571.9863, "epoch": 0.44, "inbatch_neg_score": 44.0443, "inbatch_pos_score": 44.5625, "learning_rate": 3.044444444444445e-05, "loss": 2.3347, "norm_diff": 0.0355, "num_tokens_overlap": 5.577, "num_tokens_union": 55.0177, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26288.3206, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.725, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.705, "sent_len_1": 66.8245, "sent_len_max_0": 18.8837, "sent_len_max_1": 188.9475, "stdk": 0.044, "stdq": 0.039, "stdqueue_k": 0.0, "step": 45200 }, { "accuracy": 56.4453, "doc_norm": 6.7665, "encoder_q-embeddings": 17989.7148, "encoder_q-layer.0": 12571.418, "encoder_q-layer.1": 13022.6289, "encoder_q-layer.10": 24930.2637, "encoder_q-layer.11": 42205.6016, "encoder_q-layer.2": 14191.4678, "encoder_q-layer.3": 14429.9072, "encoder_q-layer.4": 14787.0801, "encoder_q-layer.5": 14290.6416, "encoder_q-layer.6": 14910.7617, "encoder_q-layer.7": 15638.3281, "encoder_q-layer.8": 18929.9473, "encoder_q-layer.9": 17593.4062, "epoch": 0.44, "inbatch_neg_score": 44.0573, "inbatch_pos_score": 44.5938, "learning_rate": 3.0388888888888887e-05, "loss": 2.2611, "norm_diff": 0.0385, "num_tokens_overlap": 5.5811, "num_tokens_union": 55.1538, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28217.7058, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7279, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7151, "sent_len_1": 66.9874, "sent_len_max_0": 18.8137, "sent_len_max_1": 189.99, "stdk": 0.0453, "stdq": 0.0399, "stdqueue_k": 0.0, "step": 45300 }, { "accuracy": 58.2031, "doc_norm": 6.765, "encoder_q-embeddings": 16778.7188, "encoder_q-layer.0": 12131.2334, "encoder_q-layer.1": 12644.5166, "encoder_q-layer.10": 21817.1016, "encoder_q-layer.11": 42584.8438, "encoder_q-layer.2": 13628.8535, "encoder_q-layer.3": 13900.4453, "encoder_q-layer.4": 14785.1211, "encoder_q-layer.5": 14426.7373, "encoder_q-layer.6": 15669.3193, "encoder_q-layer.7": 16797.5352, "encoder_q-layer.8": 18427.9004, "encoder_q-layer.9": 17316.9863, "epoch": 0.44, "inbatch_neg_score": 44.0855, "inbatch_pos_score": 44.5938, "learning_rate": 3.0333333333333337e-05, "loss": 2.2898, "norm_diff": 0.0359, "num_tokens_overlap": 5.5891, "num_tokens_union": 54.9501, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27599.6438, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7291, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7143, "sent_len_1": 66.7137, "sent_len_max_0": 18.8388, "sent_len_max_1": 191.2575, "stdk": 0.0446, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 45400 }, { "accuracy": 57.8125, "doc_norm": 6.7676, "encoder_q-embeddings": 17802.1133, "encoder_q-layer.0": 12151.7861, "encoder_q-layer.1": 12443.2607, "encoder_q-layer.10": 20413.3789, "encoder_q-layer.11": 40439.5859, "encoder_q-layer.2": 13605.8193, "encoder_q-layer.3": 13782.7197, "encoder_q-layer.4": 13884.6973, "encoder_q-layer.5": 13927.7236, "encoder_q-layer.6": 14627.4932, "encoder_q-layer.7": 16178.7744, "encoder_q-layer.8": 18082.6504, "encoder_q-layer.9": 16470.457, "epoch": 0.44, "inbatch_neg_score": 44.1383, "inbatch_pos_score": 44.6562, "learning_rate": 3.0277777777777776e-05, "loss": 2.3179, "norm_diff": 0.0363, "num_tokens_overlap": 5.5888, "num_tokens_union": 54.9408, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26622.7288, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7313, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7305, "sent_len_1": 66.6165, "sent_len_max_0": 18.8538, "sent_len_max_1": 189.1838, "stdk": 0.0434, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 45500 }, { "accuracy": 56.3477, "doc_norm": 6.7649, "encoder_q-embeddings": 19641.5586, "encoder_q-layer.0": 13103.9336, "encoder_q-layer.1": 13145.1445, "encoder_q-layer.10": 20016.7129, "encoder_q-layer.11": 39740.25, "encoder_q-layer.2": 14243.1162, "encoder_q-layer.3": 14222.2568, "encoder_q-layer.4": 14571.7324, "encoder_q-layer.5": 14268.5781, "encoder_q-layer.6": 15271.6016, "encoder_q-layer.7": 15794.5254, "encoder_q-layer.8": 17823.6211, "encoder_q-layer.9": 16464.5215, "epoch": 0.45, "inbatch_neg_score": 44.0866, "inbatch_pos_score": 44.5938, "learning_rate": 3.0222222222222225e-05, "loss": 2.3392, "norm_diff": 0.0345, "num_tokens_overlap": 5.5633, "num_tokens_union": 54.9463, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27550.9276, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7304, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6966, "sent_len_1": 66.6825, "sent_len_max_0": 18.8337, "sent_len_max_1": 188.2163, "stdk": 0.0444, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 45600 }, { "accuracy": 56.1523, "doc_norm": 6.76, "encoder_q-embeddings": 17212.4883, "encoder_q-layer.0": 12489.4824, "encoder_q-layer.1": 13018.2715, "encoder_q-layer.10": 20666.7461, "encoder_q-layer.11": 41425.0391, "encoder_q-layer.2": 14173.3174, "encoder_q-layer.3": 13875.6367, "encoder_q-layer.4": 14359.6738, "encoder_q-layer.5": 14220.9629, "encoder_q-layer.6": 16271.5645, "encoder_q-layer.7": 16184.0244, "encoder_q-layer.8": 17752.9492, "encoder_q-layer.9": 16679.668, "epoch": 0.45, "inbatch_neg_score": 44.0823, "inbatch_pos_score": 44.5938, "learning_rate": 3.016666666666667e-05, "loss": 2.2983, "norm_diff": 0.0319, "num_tokens_overlap": 5.5756, "num_tokens_union": 55.0575, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27630.0446, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7281, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7187, "sent_len_1": 66.855, "sent_len_max_0": 18.87, "sent_len_max_1": 189.6675, "stdk": 0.0436, "stdq": 0.039, "stdqueue_k": 0.0, "step": 45700 }, { "accuracy": 54.4922, "doc_norm": 6.7588, "encoder_q-embeddings": 18243.9922, "encoder_q-layer.0": 12465.0879, "encoder_q-layer.1": 12871.0156, "encoder_q-layer.10": 19671.9609, "encoder_q-layer.11": 43187.1875, "encoder_q-layer.2": 13978.7197, "encoder_q-layer.3": 14170.876, "encoder_q-layer.4": 14359.5674, "encoder_q-layer.5": 13402.2334, "encoder_q-layer.6": 14700.8232, "encoder_q-layer.7": 15713.9707, "encoder_q-layer.8": 17701.7012, "encoder_q-layer.9": 16103.6191, "epoch": 0.45, "inbatch_neg_score": 44.0622, "inbatch_pos_score": 44.5625, "learning_rate": 3.0111111111111113e-05, "loss": 2.2835, "norm_diff": 0.0333, "num_tokens_overlap": 5.5896, "num_tokens_union": 55.1046, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27831.7234, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7255, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7303, "sent_len_1": 66.9361, "sent_len_max_0": 18.9013, "sent_len_max_1": 189.9475, "stdk": 0.0438, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 45800 }, { "accuracy": 58.8867, "doc_norm": 6.7544, "encoder_q-embeddings": 17072.5449, "encoder_q-layer.0": 12084.4492, "encoder_q-layer.1": 12661.1826, "encoder_q-layer.10": 25832.0664, "encoder_q-layer.11": 43689.0117, "encoder_q-layer.2": 13788.6162, "encoder_q-layer.3": 14286.5, "encoder_q-layer.4": 14690.9775, "encoder_q-layer.5": 14496.9482, "encoder_q-layer.6": 14698.0752, "encoder_q-layer.7": 15478.1562, "encoder_q-layer.8": 17372.4863, "encoder_q-layer.9": 17455.2773, "epoch": 0.45, "inbatch_neg_score": 44.0036, "inbatch_pos_score": 44.5, "learning_rate": 3.005555555555556e-05, "loss": 2.2991, "norm_diff": 0.0321, "num_tokens_overlap": 5.5788, "num_tokens_union": 54.9676, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28115.2662, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7223, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7097, "sent_len_1": 66.6827, "sent_len_max_0": 18.8275, "sent_len_max_1": 189.4025, "stdk": 0.0431, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 45900 }, { "accuracy": 54.9805, "doc_norm": 6.7465, "encoder_q-embeddings": 16821.0469, "encoder_q-layer.0": 11610.9287, "encoder_q-layer.1": 12279.0039, "encoder_q-layer.10": 19899.9551, "encoder_q-layer.11": 37730.582, "encoder_q-layer.2": 13450.6152, "encoder_q-layer.3": 13646.8457, "encoder_q-layer.4": 14190.1631, "encoder_q-layer.5": 13716.1934, "encoder_q-layer.6": 14650.5967, "encoder_q-layer.7": 15077.916, "encoder_q-layer.8": 16654.2891, "encoder_q-layer.9": 15513.207, "epoch": 0.45, "inbatch_neg_score": 43.9405, "inbatch_pos_score": 44.4375, "learning_rate": 3e-05, "loss": 2.316, "norm_diff": 0.0295, "num_tokens_overlap": 5.5926, "num_tokens_union": 55.1112, "postclip_grad_norm": 1.0, "preclip_grad_norm": 25839.2822, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.717, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7205, "sent_len_1": 66.9794, "sent_len_max_0": 18.925, "sent_len_max_1": 187.2862, "stdk": 0.0424, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 46000 }, { "accuracy": 55.5664, "doc_norm": 6.7482, "encoder_q-embeddings": 17228.5938, "encoder_q-layer.0": 12096.9365, "encoder_q-layer.1": 12433.6943, "encoder_q-layer.10": 21459.0527, "encoder_q-layer.11": 43017.4609, "encoder_q-layer.2": 13854.5713, "encoder_q-layer.3": 13870.1328, "encoder_q-layer.4": 14285.1182, "encoder_q-layer.5": 14129.0811, "encoder_q-layer.6": 15338.4961, "encoder_q-layer.7": 17257.8281, "encoder_q-layer.8": 17903.0, "encoder_q-layer.9": 16598.2012, "epoch": 0.45, "inbatch_neg_score": 43.9162, "inbatch_pos_score": 44.4062, "learning_rate": 2.9944444444444446e-05, "loss": 2.2717, "norm_diff": 0.0334, "num_tokens_overlap": 5.5869, "num_tokens_union": 55.0029, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27429.6532, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7148, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.715, "sent_len_1": 66.7595, "sent_len_max_0": 18.85, "sent_len_max_1": 188.5462, "stdk": 0.0431, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 46100 }, { "accuracy": 58.0078, "doc_norm": 6.751, "encoder_q-embeddings": 17850.5156, "encoder_q-layer.0": 12272.3223, "encoder_q-layer.1": 12862.3867, "encoder_q-layer.10": 21201.9531, "encoder_q-layer.11": 38848.6211, "encoder_q-layer.2": 14251.168, "encoder_q-layer.3": 14366.6514, "encoder_q-layer.4": 14541.2852, "encoder_q-layer.5": 14021.0352, "encoder_q-layer.6": 15398.6035, "encoder_q-layer.7": 16089.0127, "encoder_q-layer.8": 17672.4707, "encoder_q-layer.9": 16571.6016, "epoch": 0.45, "inbatch_neg_score": 43.9023, "inbatch_pos_score": 44.4062, "learning_rate": 2.988888888888889e-05, "loss": 2.3227, "norm_diff": 0.0367, "num_tokens_overlap": 5.5728, "num_tokens_union": 54.8803, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26888.3442, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7143, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7069, "sent_len_1": 66.5937, "sent_len_max_0": 18.9213, "sent_len_max_1": 189.9162, "stdk": 0.0436, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 46200 }, { "accuracy": 54.3945, "doc_norm": 6.7504, "encoder_q-embeddings": 18124.002, "encoder_q-layer.0": 12828.3594, "encoder_q-layer.1": 12982.5371, "encoder_q-layer.10": 20669.5586, "encoder_q-layer.11": 42035.8438, "encoder_q-layer.2": 14401.4551, "encoder_q-layer.3": 14597.707, "encoder_q-layer.4": 15303.6533, "encoder_q-layer.5": 14709.584, "encoder_q-layer.6": 15787.0859, "encoder_q-layer.7": 16350.9961, "encoder_q-layer.8": 18934.957, "encoder_q-layer.9": 16395.375, "epoch": 0.45, "inbatch_neg_score": 43.8897, "inbatch_pos_score": 44.4062, "learning_rate": 2.9833333333333335e-05, "loss": 2.315, "norm_diff": 0.0352, "num_tokens_overlap": 5.5689, "num_tokens_union": 54.8917, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27861.4871, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7151, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6984, "sent_len_1": 66.6057, "sent_len_max_0": 18.8875, "sent_len_max_1": 190.0712, "stdk": 0.0443, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 46300 }, { "accuracy": 57.2266, "doc_norm": 6.7538, "encoder_q-embeddings": 18505.5039, "encoder_q-layer.0": 12696.5107, "encoder_q-layer.1": 13241.2354, "encoder_q-layer.10": 23418.2012, "encoder_q-layer.11": 42999.8438, "encoder_q-layer.2": 13987.127, "encoder_q-layer.3": 14090.5234, "encoder_q-layer.4": 14631.1084, "encoder_q-layer.5": 14274.2939, "encoder_q-layer.6": 15144.4336, "encoder_q-layer.7": 15959.8779, "encoder_q-layer.8": 17494.0, "encoder_q-layer.9": 16869.9727, "epoch": 0.45, "inbatch_neg_score": 43.915, "inbatch_pos_score": 44.4375, "learning_rate": 2.9777777777777777e-05, "loss": 2.3224, "norm_diff": 0.0335, "num_tokens_overlap": 5.582, "num_tokens_union": 55.003, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28381.9685, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7203, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7357, "sent_len_1": 66.7286, "sent_len_max_0": 18.855, "sent_len_max_1": 189.065, "stdk": 0.0453, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 46400 }, { "accuracy": 55.2734, "doc_norm": 6.7475, "encoder_q-embeddings": 17953.002, "encoder_q-layer.0": 12908.2412, "encoder_q-layer.1": 13137.4961, "encoder_q-layer.10": 23564.793, "encoder_q-layer.11": 38087.8633, "encoder_q-layer.2": 13853.4268, "encoder_q-layer.3": 13669.6582, "encoder_q-layer.4": 14212.2803, "encoder_q-layer.5": 13266.7207, "encoder_q-layer.6": 15077.0098, "encoder_q-layer.7": 15669.1738, "encoder_q-layer.8": 18454.918, "encoder_q-layer.9": 17215.9062, "epoch": 0.45, "inbatch_neg_score": 43.8988, "inbatch_pos_score": 44.4062, "learning_rate": 2.9722222222222223e-05, "loss": 2.28, "norm_diff": 0.0325, "num_tokens_overlap": 5.5895, "num_tokens_union": 55.011, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27163.4688, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7149, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7305, "sent_len_1": 66.7268, "sent_len_max_0": 18.9, "sent_len_max_1": 190.6875, "stdk": 0.0443, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 46500 }, { "accuracy": 56.0547, "doc_norm": 6.7447, "encoder_q-embeddings": 18329.8496, "encoder_q-layer.0": 12581.4053, "encoder_q-layer.1": 12841.1846, "encoder_q-layer.10": 21104.457, "encoder_q-layer.11": 39822.5312, "encoder_q-layer.2": 13626.5078, "encoder_q-layer.3": 13725.4668, "encoder_q-layer.4": 14058.4053, "encoder_q-layer.5": 14145.3945, "encoder_q-layer.6": 15250.2217, "encoder_q-layer.7": 15454.2344, "encoder_q-layer.8": 17766.2754, "encoder_q-layer.9": 16885.3066, "epoch": 0.45, "inbatch_neg_score": 43.8696, "inbatch_pos_score": 44.375, "learning_rate": 2.9666666666666672e-05, "loss": 2.2829, "norm_diff": 0.0311, "num_tokens_overlap": 5.5893, "num_tokens_union": 55.1375, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27213.0301, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7135, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7442, "sent_len_1": 67.0055, "sent_len_max_0": 18.8938, "sent_len_max_1": 189.95, "stdk": 0.0435, "stdq": 0.039, "stdqueue_k": 0.0, "step": 46600 }, { "accuracy": 55.8594, "doc_norm": 6.7473, "encoder_q-embeddings": 17966.8828, "encoder_q-layer.0": 12561.6514, "encoder_q-layer.1": 12821.8242, "encoder_q-layer.10": 22653.6191, "encoder_q-layer.11": 42457.918, "encoder_q-layer.2": 14206.8926, "encoder_q-layer.3": 14736.9121, "encoder_q-layer.4": 15295.7529, "encoder_q-layer.5": 14898.1543, "encoder_q-layer.6": 15995.2275, "encoder_q-layer.7": 16500.6523, "encoder_q-layer.8": 18654.4648, "encoder_q-layer.9": 18587.0586, "epoch": 0.46, "inbatch_neg_score": 43.8202, "inbatch_pos_score": 44.3438, "learning_rate": 2.961111111111111e-05, "loss": 2.2828, "norm_diff": 0.0385, "num_tokens_overlap": 5.5584, "num_tokens_union": 55.0379, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28415.2744, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7087, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.696, "sent_len_1": 66.8128, "sent_len_max_0": 18.7688, "sent_len_max_1": 188.6825, "stdk": 0.0442, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 46700 }, { "accuracy": 59.1797, "doc_norm": 6.7391, "encoder_q-embeddings": 17373.1875, "encoder_q-layer.0": 12037.6367, "encoder_q-layer.1": 12611.875, "encoder_q-layer.10": 19857.8535, "encoder_q-layer.11": 39232.2422, "encoder_q-layer.2": 13557.2344, "encoder_q-layer.3": 13795.5879, "encoder_q-layer.4": 14221.1719, "encoder_q-layer.5": 13830.4775, "encoder_q-layer.6": 15127.9014, "encoder_q-layer.7": 15889.752, "encoder_q-layer.8": 18277.6426, "encoder_q-layer.9": 16321.1963, "epoch": 0.46, "inbatch_neg_score": 43.7546, "inbatch_pos_score": 44.2812, "learning_rate": 2.955555555555556e-05, "loss": 2.2899, "norm_diff": 0.0326, "num_tokens_overlap": 5.5754, "num_tokens_union": 55.0143, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26865.1186, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7065, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7112, "sent_len_1": 66.8254, "sent_len_max_0": 18.7875, "sent_len_max_1": 190.0425, "stdk": 0.0445, "stdq": 0.0399, "stdqueue_k": 0.0, "step": 46800 }, { "accuracy": 57.1289, "doc_norm": 6.742, "encoder_q-embeddings": 16505.4688, "encoder_q-layer.0": 11861.0156, "encoder_q-layer.1": 12376.8184, "encoder_q-layer.10": 20172.6348, "encoder_q-layer.11": 40150.4023, "encoder_q-layer.2": 13384.1631, "encoder_q-layer.3": 13448.292, "encoder_q-layer.4": 13811.6152, "encoder_q-layer.5": 13727.2354, "encoder_q-layer.6": 14440.2725, "encoder_q-layer.7": 15277.9004, "encoder_q-layer.8": 17616.4297, "encoder_q-layer.9": 16505.543, "epoch": 0.46, "inbatch_neg_score": 43.7769, "inbatch_pos_score": 44.2812, "learning_rate": 2.95e-05, "loss": 2.3305, "norm_diff": 0.0353, "num_tokens_overlap": 5.5775, "num_tokens_union": 54.9038, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26476.6911, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7067, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7239, "sent_len_1": 66.6753, "sent_len_max_0": 18.8513, "sent_len_max_1": 191.4525, "stdk": 0.0432, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 46900 }, { "accuracy": 56.0547, "doc_norm": 6.7396, "encoder_q-embeddings": 18068.9824, "encoder_q-layer.0": 12742.6377, "encoder_q-layer.1": 12887.2559, "encoder_q-layer.10": 19698.2266, "encoder_q-layer.11": 39684.7734, "encoder_q-layer.2": 14009.5674, "encoder_q-layer.3": 13884.334, "encoder_q-layer.4": 14462.8965, "encoder_q-layer.5": 14293.0166, "encoder_q-layer.6": 14804.4336, "encoder_q-layer.7": 16029.7969, "encoder_q-layer.8": 17859.3301, "encoder_q-layer.9": 15914.082, "epoch": 0.46, "inbatch_neg_score": 43.7543, "inbatch_pos_score": 44.25, "learning_rate": 2.9444444444444448e-05, "loss": 2.3013, "norm_diff": 0.0352, "num_tokens_overlap": 5.5821, "num_tokens_union": 55.0645, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27202.7343, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7044, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7212, "sent_len_1": 66.8218, "sent_len_max_0": 18.705, "sent_len_max_1": 189.1175, "stdk": 0.044, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 47000 }, { "accuracy": 58.3984, "doc_norm": 6.7332, "encoder_q-embeddings": 17913.3184, "encoder_q-layer.0": 12066.5342, "encoder_q-layer.1": 12509.2471, "encoder_q-layer.10": 21221.9492, "encoder_q-layer.11": 36768.1211, "encoder_q-layer.2": 13668.21, "encoder_q-layer.3": 13557.8691, "encoder_q-layer.4": 14011.1807, "encoder_q-layer.5": 13872.0527, "encoder_q-layer.6": 14708.6191, "encoder_q-layer.7": 16043.0195, "encoder_q-layer.8": 18887.7422, "encoder_q-layer.9": 16640.252, "epoch": 0.46, "inbatch_neg_score": 43.6979, "inbatch_pos_score": 44.2188, "learning_rate": 2.9388888888888887e-05, "loss": 2.2903, "norm_diff": 0.0315, "num_tokens_overlap": 5.5916, "num_tokens_union": 55.0233, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26463.8382, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7018, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7382, "sent_len_1": 66.7326, "sent_len_max_0": 18.835, "sent_len_max_1": 188.9875, "stdk": 0.0435, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 47100 }, { "accuracy": 58.8867, "doc_norm": 6.7353, "encoder_q-embeddings": 17735.3281, "encoder_q-layer.0": 12644.1846, "encoder_q-layer.1": 12989.5674, "encoder_q-layer.10": 20966.2051, "encoder_q-layer.11": 40740.1953, "encoder_q-layer.2": 13953.792, "encoder_q-layer.3": 13812.1514, "encoder_q-layer.4": 14076.1064, "encoder_q-layer.5": 14136.7754, "encoder_q-layer.6": 15442.2236, "encoder_q-layer.7": 15610.5977, "encoder_q-layer.8": 18891.1602, "encoder_q-layer.9": 16126.9619, "epoch": 0.46, "inbatch_neg_score": 43.715, "inbatch_pos_score": 44.2188, "learning_rate": 2.9333333333333336e-05, "loss": 2.3255, "norm_diff": 0.0349, "num_tokens_overlap": 5.5811, "num_tokens_union": 55.1508, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26981.2239, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.7004, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7416, "sent_len_1": 66.927, "sent_len_max_0": 18.8525, "sent_len_max_1": 188.215, "stdk": 0.0439, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 47200 }, { "accuracy": 55.4688, "doc_norm": 6.7315, "encoder_q-embeddings": 18139.0547, "encoder_q-layer.0": 12428.9316, "encoder_q-layer.1": 12536.3467, "encoder_q-layer.10": 21189.7754, "encoder_q-layer.11": 40242.2773, "encoder_q-layer.2": 13607.5547, "encoder_q-layer.3": 13633.0498, "encoder_q-layer.4": 14089.3584, "encoder_q-layer.5": 14032.0645, "encoder_q-layer.6": 15596.3037, "encoder_q-layer.7": 16359.6377, "encoder_q-layer.8": 18891.5703, "encoder_q-layer.9": 16988.8711, "epoch": 0.46, "inbatch_neg_score": 43.7033, "inbatch_pos_score": 44.1875, "learning_rate": 2.927777777777778e-05, "loss": 2.3013, "norm_diff": 0.0322, "num_tokens_overlap": 5.5573, "num_tokens_union": 54.8607, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27381.9594, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6993, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7036, "sent_len_1": 66.549, "sent_len_max_0": 18.815, "sent_len_max_1": 187.71, "stdk": 0.0432, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 47300 }, { "accuracy": 57.4219, "doc_norm": 6.7323, "encoder_q-embeddings": 17845.3574, "encoder_q-layer.0": 12662.3623, "encoder_q-layer.1": 13310.1416, "encoder_q-layer.10": 20436.8047, "encoder_q-layer.11": 40081.7812, "encoder_q-layer.2": 15046.3994, "encoder_q-layer.3": 14515.2373, "encoder_q-layer.4": 14806.6455, "encoder_q-layer.5": 14427.8457, "encoder_q-layer.6": 15692.1963, "encoder_q-layer.7": 17065.9668, "encoder_q-layer.8": 18481.6035, "encoder_q-layer.9": 16336.6904, "epoch": 0.46, "inbatch_neg_score": 43.6583, "inbatch_pos_score": 44.1562, "learning_rate": 2.9222222222222224e-05, "loss": 2.2896, "norm_diff": 0.0337, "num_tokens_overlap": 5.592, "num_tokens_union": 55.0833, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27833.1622, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6986, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7393, "sent_len_1": 66.8679, "sent_len_max_0": 18.9163, "sent_len_max_1": 189.8663, "stdk": 0.0435, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 47400 }, { "accuracy": 53.8086, "doc_norm": 6.7374, "encoder_q-embeddings": 20353.3887, "encoder_q-layer.0": 13506.0713, "encoder_q-layer.1": 13861.0117, "encoder_q-layer.10": 24936.875, "encoder_q-layer.11": 46770.668, "encoder_q-layer.2": 15036.6289, "encoder_q-layer.3": 15603.0557, "encoder_q-layer.4": 15797.0303, "encoder_q-layer.5": 15651.1123, "encoder_q-layer.6": 16782.6211, "encoder_q-layer.7": 18049.2969, "encoder_q-layer.8": 19981.8926, "encoder_q-layer.9": 18285.7129, "epoch": 0.46, "inbatch_neg_score": 43.6634, "inbatch_pos_score": 44.1875, "learning_rate": 2.916666666666667e-05, "loss": 2.3026, "norm_diff": 0.0363, "num_tokens_overlap": 5.5835, "num_tokens_union": 55.0716, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30276.0868, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.7011, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7423, "sent_len_1": 66.824, "sent_len_max_0": 18.7763, "sent_len_max_1": 188.2488, "stdk": 0.0462, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 47500 }, { "accuracy": 54.6875, "doc_norm": 6.7309, "encoder_q-embeddings": 18767.877, "encoder_q-layer.0": 12802.7803, "encoder_q-layer.1": 13331.4678, "encoder_q-layer.10": 24080.4238, "encoder_q-layer.11": 42970.1641, "encoder_q-layer.2": 14480.7139, "encoder_q-layer.3": 14481.3174, "encoder_q-layer.4": 15092.0732, "encoder_q-layer.5": 15265.9639, "encoder_q-layer.6": 16227.2256, "encoder_q-layer.7": 18522.3379, "encoder_q-layer.8": 19948.0117, "encoder_q-layer.9": 18117.6738, "epoch": 0.46, "inbatch_neg_score": 43.6566, "inbatch_pos_score": 44.1562, "learning_rate": 2.9111111111111112e-05, "loss": 2.2835, "norm_diff": 0.0348, "num_tokens_overlap": 5.5817, "num_tokens_union": 55.1217, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29162.0821, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6961, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.732, "sent_len_1": 66.9134, "sent_len_max_0": 18.77, "sent_len_max_1": 189.2212, "stdk": 0.044, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 47600 }, { "accuracy": 54.9805, "doc_norm": 6.7258, "encoder_q-embeddings": 18858.7207, "encoder_q-layer.0": 12622.2979, "encoder_q-layer.1": 12808.4746, "encoder_q-layer.10": 22241.1719, "encoder_q-layer.11": 39233.2539, "encoder_q-layer.2": 14262.5469, "encoder_q-layer.3": 13827.9805, "encoder_q-layer.4": 14689.1875, "encoder_q-layer.5": 14157.9316, "encoder_q-layer.6": 14943.9316, "encoder_q-layer.7": 16067.1992, "encoder_q-layer.8": 18064.8105, "encoder_q-layer.9": 16532.8223, "epoch": 0.47, "inbatch_neg_score": 43.6248, "inbatch_pos_score": 44.125, "learning_rate": 2.9055555555555558e-05, "loss": 2.2889, "norm_diff": 0.0328, "num_tokens_overlap": 5.5828, "num_tokens_union": 54.9707, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27312.7293, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.693, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7233, "sent_len_1": 66.6835, "sent_len_max_0": 18.8275, "sent_len_max_1": 186.8088, "stdk": 0.0436, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 47700 }, { "accuracy": 55.0781, "doc_norm": 6.7291, "encoder_q-embeddings": 18454.0, "encoder_q-layer.0": 13016.2266, "encoder_q-layer.1": 13339.7305, "encoder_q-layer.10": 22596.5156, "encoder_q-layer.11": 41945.9531, "encoder_q-layer.2": 14014.6719, "encoder_q-layer.3": 14286.3887, "encoder_q-layer.4": 14292.5664, "encoder_q-layer.5": 13929.7471, "encoder_q-layer.6": 15422.1484, "encoder_q-layer.7": 16085.2695, "encoder_q-layer.8": 18214.1016, "encoder_q-layer.9": 16566.1191, "epoch": 0.47, "inbatch_neg_score": 43.5681, "inbatch_pos_score": 44.0938, "learning_rate": 2.9e-05, "loss": 2.3007, "norm_diff": 0.036, "num_tokens_overlap": 5.5884, "num_tokens_union": 54.9866, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28286.0698, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6931, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.747, "sent_len_1": 66.7392, "sent_len_max_0": 18.9288, "sent_len_max_1": 189.94, "stdk": 0.0443, "stdq": 0.0401, "stdqueue_k": 0.0, "step": 47800 }, { "accuracy": 56.6406, "doc_norm": 6.7214, "encoder_q-embeddings": 17933.4824, "encoder_q-layer.0": 12202.793, "encoder_q-layer.1": 12890.3291, "encoder_q-layer.10": 21348.9727, "encoder_q-layer.11": 40559.5273, "encoder_q-layer.2": 14097.9873, "encoder_q-layer.3": 14368.6572, "encoder_q-layer.4": 14512.4863, "encoder_q-layer.5": 14092.9502, "encoder_q-layer.6": 14524.5273, "encoder_q-layer.7": 15289.9463, "encoder_q-layer.8": 17120.1562, "encoder_q-layer.9": 17037.3242, "epoch": 0.47, "inbatch_neg_score": 43.5456, "inbatch_pos_score": 44.0625, "learning_rate": 2.8944444444444446e-05, "loss": 2.2866, "norm_diff": 0.0318, "num_tokens_overlap": 5.5763, "num_tokens_union": 55.0382, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27178.4506, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6895, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7389, "sent_len_1": 66.7842, "sent_len_max_0": 18.8012, "sent_len_max_1": 190.0462, "stdk": 0.0432, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 47900 }, { "accuracy": 54.8828, "doc_norm": 6.7265, "encoder_q-embeddings": 18404.7344, "encoder_q-layer.0": 12781.6572, "encoder_q-layer.1": 13006.082, "encoder_q-layer.10": 20343.2812, "encoder_q-layer.11": 38495.75, "encoder_q-layer.2": 13911.374, "encoder_q-layer.3": 14085.9697, "encoder_q-layer.4": 14374.123, "encoder_q-layer.5": 14040.3984, "encoder_q-layer.6": 14974.4102, "encoder_q-layer.7": 15576.8398, "encoder_q-layer.8": 17915.582, "encoder_q-layer.9": 16164.0459, "epoch": 0.47, "inbatch_neg_score": 43.5813, "inbatch_pos_score": 44.0625, "learning_rate": 2.8888888888888888e-05, "loss": 2.2841, "norm_diff": 0.0363, "num_tokens_overlap": 5.5805, "num_tokens_union": 54.9186, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26949.0078, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6902, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7056, "sent_len_1": 66.692, "sent_len_max_0": 18.805, "sent_len_max_1": 191.465, "stdk": 0.0436, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 48000 }, { "accuracy": 54.3945, "doc_norm": 6.7218, "encoder_q-embeddings": 18470.4355, "encoder_q-layer.0": 13078.7383, "encoder_q-layer.1": 13558.5059, "encoder_q-layer.10": 25124.2148, "encoder_q-layer.11": 44938.1875, "encoder_q-layer.2": 14473.1445, "encoder_q-layer.3": 14507.3086, "encoder_q-layer.4": 15368.0293, "encoder_q-layer.5": 14463.9385, "encoder_q-layer.6": 15757.5996, "encoder_q-layer.7": 17044.3691, "encoder_q-layer.8": 18965.793, "encoder_q-layer.9": 18255.7266, "epoch": 0.47, "inbatch_neg_score": 43.4699, "inbatch_pos_score": 43.9688, "learning_rate": 2.8833333333333334e-05, "loss": 2.2651, "norm_diff": 0.0363, "num_tokens_overlap": 5.5682, "num_tokens_union": 54.9334, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29269.7035, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6856, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7089, "sent_len_1": 66.5505, "sent_len_max_0": 18.8337, "sent_len_max_1": 188.6475, "stdk": 0.0447, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 48100 }, { "accuracy": 55.3711, "doc_norm": 6.717, "encoder_q-embeddings": 32670.5625, "encoder_q-layer.0": 23317.2188, "encoder_q-layer.1": 24454.8164, "encoder_q-layer.10": 37367.8477, "encoder_q-layer.11": 71014.4375, "encoder_q-layer.2": 27280.9844, "encoder_q-layer.3": 27505.6758, "encoder_q-layer.4": 27361.3301, "encoder_q-layer.5": 27805.3672, "encoder_q-layer.6": 29904.1562, "encoder_q-layer.7": 30922.7051, "encoder_q-layer.8": 32916.8398, "encoder_q-layer.9": 30380.3652, "epoch": 0.47, "inbatch_neg_score": 43.4901, "inbatch_pos_score": 44.0, "learning_rate": 2.877777777777778e-05, "loss": 2.2729, "norm_diff": 0.035, "num_tokens_overlap": 5.5859, "num_tokens_union": 55.1491, "postclip_grad_norm": 1.0, "preclip_grad_norm": 50527.3719, "preclip_grad_norm_avg": 0.0005, "query_norm": 6.682, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.734, "sent_len_1": 66.9361, "sent_len_max_0": 18.9075, "sent_len_max_1": 189.1825, "stdk": 0.0437, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 48200 }, { "accuracy": 58.6914, "doc_norm": 6.7185, "encoder_q-embeddings": 16986.2793, "encoder_q-layer.0": 12017.0918, "encoder_q-layer.1": 12445.9443, "encoder_q-layer.10": 20377.7031, "encoder_q-layer.11": 36637.3828, "encoder_q-layer.2": 13421.2012, "encoder_q-layer.3": 13778.2285, "encoder_q-layer.4": 14518.9209, "encoder_q-layer.5": 13844.3818, "encoder_q-layer.6": 14749.626, "encoder_q-layer.7": 16467.3828, "encoder_q-layer.8": 19265.0664, "encoder_q-layer.9": 16366.1445, "epoch": 0.47, "inbatch_neg_score": 43.4578, "inbatch_pos_score": 43.9688, "learning_rate": 2.8722222222222222e-05, "loss": 2.3221, "norm_diff": 0.0371, "num_tokens_overlap": 5.59, "num_tokens_union": 55.0661, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26303.2648, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6814, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7246, "sent_len_1": 66.8659, "sent_len_max_0": 18.8588, "sent_len_max_1": 191.115, "stdk": 0.0446, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 48300 }, { "accuracy": 55.5664, "doc_norm": 6.722, "encoder_q-embeddings": 18602.5527, "encoder_q-layer.0": 12890.3857, "encoder_q-layer.1": 13231.5566, "encoder_q-layer.10": 22710.2285, "encoder_q-layer.11": 40041.2383, "encoder_q-layer.2": 14273.002, "encoder_q-layer.3": 14219.5801, "encoder_q-layer.4": 14366.9648, "encoder_q-layer.5": 14180.6934, "encoder_q-layer.6": 14716.7197, "encoder_q-layer.7": 16148.7314, "encoder_q-layer.8": 18697.2656, "encoder_q-layer.9": 16592.4434, "epoch": 0.47, "inbatch_neg_score": 43.505, "inbatch_pos_score": 44.0312, "learning_rate": 2.8666666666666668e-05, "loss": 2.3083, "norm_diff": 0.0343, "num_tokens_overlap": 5.5933, "num_tokens_union": 55.1421, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27824.8413, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6877, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7311, "sent_len_1": 66.9854, "sent_len_max_0": 18.935, "sent_len_max_1": 188.1637, "stdk": 0.0445, "stdq": 0.039, "stdqueue_k": 0.0, "step": 48400 }, { "accuracy": 56.4453, "doc_norm": 6.7235, "encoder_q-embeddings": 17788.5703, "encoder_q-layer.0": 12856.7393, "encoder_q-layer.1": 13176.9717, "encoder_q-layer.10": 20997.041, "encoder_q-layer.11": 39895.5977, "encoder_q-layer.2": 14112.0449, "encoder_q-layer.3": 14145.9502, "encoder_q-layer.4": 14355.3613, "encoder_q-layer.5": 14118.0088, "encoder_q-layer.6": 14987.8799, "encoder_q-layer.7": 16850.4531, "encoder_q-layer.8": 19810.2012, "encoder_q-layer.9": 17611.0156, "epoch": 0.47, "inbatch_neg_score": 43.4928, "inbatch_pos_score": 44.0, "learning_rate": 2.861111111111111e-05, "loss": 2.2925, "norm_diff": 0.0373, "num_tokens_overlap": 5.5865, "num_tokens_union": 54.9643, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27523.5822, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6862, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7469, "sent_len_1": 66.6784, "sent_len_max_0": 18.8412, "sent_len_max_1": 189.4712, "stdk": 0.0451, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 48500 }, { "accuracy": 55.0781, "doc_norm": 6.7165, "encoder_q-embeddings": 17746.1055, "encoder_q-layer.0": 12824.3633, "encoder_q-layer.1": 12959.9648, "encoder_q-layer.10": 20479.8887, "encoder_q-layer.11": 39485.3672, "encoder_q-layer.2": 13971.2207, "encoder_q-layer.3": 13779.2344, "encoder_q-layer.4": 14057.8076, "encoder_q-layer.5": 13652.0693, "encoder_q-layer.6": 15399.4824, "encoder_q-layer.7": 16094.0947, "encoder_q-layer.8": 17603.3867, "encoder_q-layer.9": 16360.9951, "epoch": 0.47, "inbatch_neg_score": 43.4368, "inbatch_pos_score": 43.9375, "learning_rate": 2.855555555555556e-05, "loss": 2.2855, "norm_diff": 0.0368, "num_tokens_overlap": 5.582, "num_tokens_union": 55.1141, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26870.1723, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6797, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7231, "sent_len_1": 66.9604, "sent_len_max_0": 18.8137, "sent_len_max_1": 190.535, "stdk": 0.0436, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 48600 }, { "accuracy": 55.5664, "doc_norm": 6.7149, "encoder_q-embeddings": 17647.7539, "encoder_q-layer.0": 12480.1992, "encoder_q-layer.1": 12711.1182, "encoder_q-layer.10": 20411.4902, "encoder_q-layer.11": 41216.6172, "encoder_q-layer.2": 13750.3965, "encoder_q-layer.3": 14306.6846, "encoder_q-layer.4": 14722.7217, "encoder_q-layer.5": 14338.4658, "encoder_q-layer.6": 15042.8633, "encoder_q-layer.7": 16290.8145, "encoder_q-layer.8": 18428.3574, "encoder_q-layer.9": 16526.5664, "epoch": 0.48, "inbatch_neg_score": 43.4286, "inbatch_pos_score": 43.9375, "learning_rate": 2.8499999999999998e-05, "loss": 2.2635, "norm_diff": 0.0334, "num_tokens_overlap": 5.5716, "num_tokens_union": 54.8711, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27316.7826, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6815, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6979, "sent_len_1": 66.6499, "sent_len_max_0": 18.8975, "sent_len_max_1": 191.3013, "stdk": 0.0438, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 48700 }, { "accuracy": 57.3242, "doc_norm": 6.7159, "encoder_q-embeddings": 18388.0469, "encoder_q-layer.0": 12388.9346, "encoder_q-layer.1": 12609.6445, "encoder_q-layer.10": 19263.5684, "encoder_q-layer.11": 41543.9297, "encoder_q-layer.2": 13596.5039, "encoder_q-layer.3": 13939.3242, "encoder_q-layer.4": 14230.6201, "encoder_q-layer.5": 14278.9971, "encoder_q-layer.6": 15026.2539, "encoder_q-layer.7": 16443.9961, "encoder_q-layer.8": 17331.4375, "encoder_q-layer.9": 16244.8379, "epoch": 0.48, "inbatch_neg_score": 43.435, "inbatch_pos_score": 43.9375, "learning_rate": 2.8444444444444447e-05, "loss": 2.2771, "norm_diff": 0.0362, "num_tokens_overlap": 5.5908, "num_tokens_union": 55.0727, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27314.893, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6797, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7199, "sent_len_1": 66.8987, "sent_len_max_0": 18.9, "sent_len_max_1": 190.8675, "stdk": 0.0437, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 48800 }, { "accuracy": 57.4219, "doc_norm": 6.7143, "encoder_q-embeddings": 17214.8789, "encoder_q-layer.0": 12062.4629, "encoder_q-layer.1": 12522.1152, "encoder_q-layer.10": 20482.0488, "encoder_q-layer.11": 38782.3906, "encoder_q-layer.2": 13499.0488, "encoder_q-layer.3": 13493.5029, "encoder_q-layer.4": 13812.5879, "encoder_q-layer.5": 13801.0068, "encoder_q-layer.6": 14729.458, "encoder_q-layer.7": 15449.6523, "encoder_q-layer.8": 17192.3848, "encoder_q-layer.9": 15877.0742, "epoch": 0.48, "inbatch_neg_score": 43.4524, "inbatch_pos_score": 43.9688, "learning_rate": 2.8388888888888893e-05, "loss": 2.2865, "norm_diff": 0.0345, "num_tokens_overlap": 5.581, "num_tokens_union": 55.0411, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26368.7985, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6798, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7208, "sent_len_1": 66.857, "sent_len_max_0": 18.8212, "sent_len_max_1": 189.4087, "stdk": 0.0434, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 48900 }, { "accuracy": 55.4688, "doc_norm": 6.7116, "encoder_q-embeddings": 18075.8848, "encoder_q-layer.0": 12872.873, "encoder_q-layer.1": 13363.3037, "encoder_q-layer.10": 23252.6992, "encoder_q-layer.11": 41245.7969, "encoder_q-layer.2": 14542.792, "encoder_q-layer.3": 14392.0723, "encoder_q-layer.4": 14617.3789, "encoder_q-layer.5": 14162.1025, "encoder_q-layer.6": 15472.0107, "encoder_q-layer.7": 15669.1074, "encoder_q-layer.8": 18451.7012, "encoder_q-layer.9": 17275.3301, "epoch": 0.48, "inbatch_neg_score": 43.3647, "inbatch_pos_score": 43.875, "learning_rate": 2.8333333333333335e-05, "loss": 2.3139, "norm_diff": 0.0359, "num_tokens_overlap": 5.5896, "num_tokens_union": 55.173, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28097.4341, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6758, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7542, "sent_len_1": 67.0054, "sent_len_max_0": 18.94, "sent_len_max_1": 190.9913, "stdk": 0.0446, "stdq": 0.039, "stdqueue_k": 0.0, "step": 49000 }, { "accuracy": 59.9609, "doc_norm": 6.7132, "encoder_q-embeddings": 18652.793, "encoder_q-layer.0": 12627.2148, "encoder_q-layer.1": 12814.2871, "encoder_q-layer.10": 22759.293, "encoder_q-layer.11": 40815.5859, "encoder_q-layer.2": 14094.0498, "encoder_q-layer.3": 14081.4355, "encoder_q-layer.4": 14699.5195, "encoder_q-layer.5": 14862.0801, "encoder_q-layer.6": 16099.3555, "encoder_q-layer.7": 17098.2031, "encoder_q-layer.8": 19596.2168, "encoder_q-layer.9": 18197.6172, "epoch": 0.48, "inbatch_neg_score": 43.3297, "inbatch_pos_score": 43.875, "learning_rate": 2.827777777777778e-05, "loss": 2.2953, "norm_diff": 0.0401, "num_tokens_overlap": 5.5748, "num_tokens_union": 55.1067, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28336.0735, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6732, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7238, "sent_len_1": 66.9553, "sent_len_max_0": 18.8825, "sent_len_max_1": 189.7637, "stdk": 0.0451, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 49100 }, { "accuracy": 56.1523, "doc_norm": 6.7072, "encoder_q-embeddings": 16920.0312, "encoder_q-layer.0": 12205.2979, "encoder_q-layer.1": 12461.0811, "encoder_q-layer.10": 21140.0781, "encoder_q-layer.11": 39305.3789, "encoder_q-layer.2": 13550.3535, "encoder_q-layer.3": 13325.7256, "encoder_q-layer.4": 14067.6533, "encoder_q-layer.5": 13911.2676, "encoder_q-layer.6": 15114.8955, "encoder_q-layer.7": 16638.291, "encoder_q-layer.8": 18356.8613, "encoder_q-layer.9": 16439.0215, "epoch": 0.48, "inbatch_neg_score": 43.3322, "inbatch_pos_score": 43.8438, "learning_rate": 2.8222222222222223e-05, "loss": 2.3108, "norm_diff": 0.0346, "num_tokens_overlap": 5.5641, "num_tokens_union": 54.9988, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26515.8142, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6726, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7117, "sent_len_1": 66.7667, "sent_len_max_0": 18.74, "sent_len_max_1": 191.4487, "stdk": 0.0439, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 49200 }, { "accuracy": 55.8594, "doc_norm": 6.7082, "encoder_q-embeddings": 18144.2051, "encoder_q-layer.0": 13587.3779, "encoder_q-layer.1": 13562.5811, "encoder_q-layer.10": 23603.2695, "encoder_q-layer.11": 41608.9922, "encoder_q-layer.2": 14790.3545, "encoder_q-layer.3": 14760.4844, "encoder_q-layer.4": 15017.3145, "encoder_q-layer.5": 14407.6748, "encoder_q-layer.6": 15278.1406, "encoder_q-layer.7": 16589.1777, "encoder_q-layer.8": 19535.7363, "encoder_q-layer.9": 17568.3457, "epoch": 0.48, "inbatch_neg_score": 43.3249, "inbatch_pos_score": 43.8438, "learning_rate": 2.816666666666667e-05, "loss": 2.2611, "norm_diff": 0.0345, "num_tokens_overlap": 5.5741, "num_tokens_union": 54.9872, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28688.4309, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6736, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7324, "sent_len_1": 66.7067, "sent_len_max_0": 18.7488, "sent_len_max_1": 189.6138, "stdk": 0.0432, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 49300 }, { "accuracy": 60.0586, "doc_norm": 6.7098, "encoder_q-embeddings": 16893.3848, "encoder_q-layer.0": 11924.1387, "encoder_q-layer.1": 12157.0166, "encoder_q-layer.10": 31423.4629, "encoder_q-layer.11": 48171.0078, "encoder_q-layer.2": 13077.3242, "encoder_q-layer.3": 13028.4805, "encoder_q-layer.4": 13191.7764, "encoder_q-layer.5": 13109.8389, "encoder_q-layer.6": 14549.6123, "encoder_q-layer.7": 15481.249, "encoder_q-layer.8": 19166.7383, "encoder_q-layer.9": 19299.2207, "epoch": 0.48, "inbatch_neg_score": 43.3851, "inbatch_pos_score": 43.9062, "learning_rate": 2.811111111111111e-05, "loss": 2.2659, "norm_diff": 0.0357, "num_tokens_overlap": 5.5803, "num_tokens_union": 54.9899, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30033.5355, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6741, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7115, "sent_len_1": 66.7975, "sent_len_max_0": 18.825, "sent_len_max_1": 188.8512, "stdk": 0.0435, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 49400 }, { "accuracy": 54.3945, "doc_norm": 6.71, "encoder_q-embeddings": 19046.8125, "encoder_q-layer.0": 13191.4238, "encoder_q-layer.1": 13713.1328, "encoder_q-layer.10": 23678.543, "encoder_q-layer.11": 40875.9492, "encoder_q-layer.2": 14804.9277, "encoder_q-layer.3": 14679.7246, "encoder_q-layer.4": 15270.3027, "encoder_q-layer.5": 14983.2256, "encoder_q-layer.6": 16336.4414, "encoder_q-layer.7": 16368.666, "encoder_q-layer.8": 19151.8789, "encoder_q-layer.9": 17613.0352, "epoch": 0.48, "inbatch_neg_score": 43.341, "inbatch_pos_score": 43.8438, "learning_rate": 2.8055555555555557e-05, "loss": 2.2545, "norm_diff": 0.0329, "num_tokens_overlap": 5.5819, "num_tokens_union": 55.018, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28633.9898, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6771, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7256, "sent_len_1": 66.7645, "sent_len_max_0": 18.87, "sent_len_max_1": 188.9563, "stdk": 0.0439, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 49500 }, { "accuracy": 57.9102, "doc_norm": 6.7027, "encoder_q-embeddings": 17647.4199, "encoder_q-layer.0": 12706.7324, "encoder_q-layer.1": 12861.7764, "encoder_q-layer.10": 21195.1426, "encoder_q-layer.11": 43483.0234, "encoder_q-layer.2": 13765.0781, "encoder_q-layer.3": 13697.6094, "encoder_q-layer.4": 14095.8486, "encoder_q-layer.5": 14248.2441, "encoder_q-layer.6": 14961.1094, "encoder_q-layer.7": 15498.4629, "encoder_q-layer.8": 17481.8027, "encoder_q-layer.9": 16862.1016, "epoch": 0.48, "inbatch_neg_score": 43.285, "inbatch_pos_score": 43.7812, "learning_rate": 2.8000000000000003e-05, "loss": 2.2954, "norm_diff": 0.0333, "num_tokens_overlap": 5.5757, "num_tokens_union": 55.0016, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27796.9196, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6694, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7281, "sent_len_1": 66.7259, "sent_len_max_0": 18.9925, "sent_len_max_1": 189.4087, "stdk": 0.0443, "stdq": 0.039, "stdqueue_k": 0.0, "step": 49600 }, { "accuracy": 55.3711, "doc_norm": 6.7076, "encoder_q-embeddings": 19168.9863, "encoder_q-layer.0": 13227.5889, "encoder_q-layer.1": 13558.2939, "encoder_q-layer.10": 20113.3457, "encoder_q-layer.11": 41026.0391, "encoder_q-layer.2": 14725.3086, "encoder_q-layer.3": 14350.5371, "encoder_q-layer.4": 14585.8115, "encoder_q-layer.5": 14437.2754, "encoder_q-layer.6": 14893.252, "encoder_q-layer.7": 16347.8594, "encoder_q-layer.8": 17479.7129, "encoder_q-layer.9": 16661.7793, "epoch": 0.49, "inbatch_neg_score": 43.2757, "inbatch_pos_score": 43.7812, "learning_rate": 2.7944444444444445e-05, "loss": 2.2877, "norm_diff": 0.0391, "num_tokens_overlap": 5.5798, "num_tokens_union": 55.008, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27620.2818, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6685, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7231, "sent_len_1": 66.7341, "sent_len_max_0": 18.7112, "sent_len_max_1": 188.1037, "stdk": 0.0441, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 49700 }, { "accuracy": 56.9336, "doc_norm": 6.7042, "encoder_q-embeddings": 17645.7734, "encoder_q-layer.0": 12288.8252, "encoder_q-layer.1": 12702.8027, "encoder_q-layer.10": 19306.2578, "encoder_q-layer.11": 38964.4922, "encoder_q-layer.2": 13771.1875, "encoder_q-layer.3": 13883.7471, "encoder_q-layer.4": 14161.4219, "encoder_q-layer.5": 13840.2627, "encoder_q-layer.6": 14645.7705, "encoder_q-layer.7": 16104.6162, "encoder_q-layer.8": 17271.2168, "encoder_q-layer.9": 15916.4912, "epoch": 0.49, "inbatch_neg_score": 43.3016, "inbatch_pos_score": 43.8125, "learning_rate": 2.788888888888889e-05, "loss": 2.2558, "norm_diff": 0.033, "num_tokens_overlap": 5.5771, "num_tokens_union": 54.9582, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26759.3805, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6712, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7088, "sent_len_1": 66.7478, "sent_len_max_0": 18.8225, "sent_len_max_1": 189.785, "stdk": 0.0442, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 49800 }, { "accuracy": 57.1289, "doc_norm": 6.7053, "encoder_q-embeddings": 17647.4258, "encoder_q-layer.0": 12242.126, "encoder_q-layer.1": 12618.4893, "encoder_q-layer.10": 20031.5664, "encoder_q-layer.11": 39426.3945, "encoder_q-layer.2": 13742.2988, "encoder_q-layer.3": 13624.6162, "encoder_q-layer.4": 13768.623, "encoder_q-layer.5": 13720.3154, "encoder_q-layer.6": 14686.8008, "encoder_q-layer.7": 16078.8721, "encoder_q-layer.8": 17411.4883, "encoder_q-layer.9": 16044.4668, "epoch": 0.49, "inbatch_neg_score": 43.2911, "inbatch_pos_score": 43.7812, "learning_rate": 2.7833333333333333e-05, "loss": 2.2679, "norm_diff": 0.0356, "num_tokens_overlap": 5.5749, "num_tokens_union": 54.9888, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26896.8769, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6697, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.73, "sent_len_1": 66.7467, "sent_len_max_0": 18.8625, "sent_len_max_1": 189.9075, "stdk": 0.0444, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 49900 }, { "accuracy": 57.5195, "doc_norm": 6.7014, "encoder_q-embeddings": 18346.6172, "encoder_q-layer.0": 12905.8262, "encoder_q-layer.1": 13131.5488, "encoder_q-layer.10": 24284.2891, "encoder_q-layer.11": 38069.3398, "encoder_q-layer.2": 14037.8711, "encoder_q-layer.3": 13699.375, "encoder_q-layer.4": 14067.377, "encoder_q-layer.5": 13888.2441, "encoder_q-layer.6": 14482.1465, "encoder_q-layer.7": 15755.9639, "encoder_q-layer.8": 18225.8691, "encoder_q-layer.9": 17281.6953, "epoch": 0.49, "inbatch_neg_score": 43.2459, "inbatch_pos_score": 43.75, "learning_rate": 2.777777777777778e-05, "loss": 2.2975, "norm_diff": 0.0348, "num_tokens_overlap": 5.5662, "num_tokens_union": 54.8991, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27111.908, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6667, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7092, "sent_len_1": 66.6035, "sent_len_max_0": 18.85, "sent_len_max_1": 190.0087, "stdk": 0.0446, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 50000 }, { "dev_runtime": 27.1597, "dev_samples_per_second": 2.356, "dev_steps_per_second": 0.037, "epoch": 0.49, "step": 50000, "test_accuracy": 8.642578125, "test_doc_norm": 6.6951141357421875, "test_inbatch_neg_score": 43.77082824707031, "test_inbatch_pos_score": 44.520591735839844, "test_loss": 4.020589351654053, "test_norm_diff": 0.0016355514526367188, "test_query_norm": 6.694849014282227, "test_queue_k_norm": 0.0, "test_stdk": 0.036487728357315063, "test_stdq": 0.03643035516142845, "test_stdqueue_k": 0.0 }, { "dev_runtime": 27.1597, "dev_samples_per_second": 2.356, "dev_steps_per_second": 0.037, "epoch": 0.49, "eval_beir-arguana_ndcg@10": 0.37186, "eval_beir-arguana_recall@10": 0.65789, "eval_beir-arguana_recall@100": 0.96799, "eval_beir-arguana_recall@20": 0.82575, "eval_beir-avg_ndcg@10": 0.3895358333333333, "eval_beir-avg_recall@10": 0.4586630833333333, "eval_beir-avg_recall@100": 0.64388925, "eval_beir-avg_recall@20": 0.5269959999999999, "eval_beir-cqadupstack_ndcg@10": 0.2944883333333333, "eval_beir-cqadupstack_recall@10": 0.39287083333333334, "eval_beir-cqadupstack_recall@100": 0.6264825, "eval_beir-cqadupstack_recall@20": 0.4621300000000001, "eval_beir-fiqa_ndcg@10": 0.26932, "eval_beir-fiqa_recall@10": 0.32411, "eval_beir-fiqa_recall@100": 0.59603, "eval_beir-fiqa_recall@20": 0.39597, "eval_beir-nfcorpus_ndcg@10": 0.33599, "eval_beir-nfcorpus_recall@10": 0.16348, "eval_beir-nfcorpus_recall@100": 0.30067, "eval_beir-nfcorpus_recall@20": 0.20002, "eval_beir-nq_ndcg@10": 0.27264, "eval_beir-nq_recall@10": 0.45309, "eval_beir-nq_recall@100": 0.79174, "eval_beir-nq_recall@20": 0.58184, "eval_beir-quora_ndcg@10": 0.78855, "eval_beir-quora_recall@10": 0.89062, "eval_beir-quora_recall@100": 0.97739, "eval_beir-quora_recall@20": 0.92902, "eval_beir-scidocs_ndcg@10": 0.16287, "eval_beir-scidocs_recall@10": 0.17138, "eval_beir-scidocs_recall@100": 0.38357, "eval_beir-scidocs_recall@20": 0.22732, "eval_beir-scifact_ndcg@10": 0.61831, "eval_beir-scifact_recall@10": 0.77133, "eval_beir-scifact_recall@100": 0.90933, "eval_beir-scifact_recall@20": 0.83689, "eval_beir-trec-covid_ndcg@10": 0.57949, "eval_beir-trec-covid_recall@10": 0.626, "eval_beir-trec-covid_recall@100": 0.4476, "eval_beir-trec-covid_recall@20": 0.59, "eval_beir-webis-touche2020_ndcg@10": 0.20184, "eval_beir-webis-touche2020_recall@10": 0.13586, "eval_beir-webis-touche2020_recall@100": 0.43809, "eval_beir-webis-touche2020_recall@20": 0.22102, "eval_senteval-avg_sts": 0.747406052704166, "eval_senteval-sickr_spearman": 0.7318210434843339, "eval_senteval-stsb_spearman": 0.7629910619239982, "step": 50000, "test_accuracy": 8.642578125, "test_doc_norm": 6.6951141357421875, "test_inbatch_neg_score": 43.77082824707031, "test_inbatch_pos_score": 44.520591735839844, "test_loss": 4.020589351654053, "test_norm_diff": 0.0016355514526367188, "test_query_norm": 6.694849014282227, "test_queue_k_norm": 0.0, "test_stdk": 0.036487728357315063, "test_stdq": 0.03643035516142845, "test_stdqueue_k": 0.0 }, { "accuracy": 55.4688, "doc_norm": 6.6982, "encoder_q-embeddings": 17360.5566, "encoder_q-layer.0": 12841.0596, "encoder_q-layer.1": 13385.2764, "encoder_q-layer.10": 20064.2988, "encoder_q-layer.11": 45209.5195, "encoder_q-layer.2": 14868.6113, "encoder_q-layer.3": 14462.1641, "encoder_q-layer.4": 14877.1602, "encoder_q-layer.5": 14492.1924, "encoder_q-layer.6": 15623.376, "encoder_q-layer.7": 16508.543, "encoder_q-layer.8": 18118.1973, "encoder_q-layer.9": 16771.7168, "epoch": 0.49, "inbatch_neg_score": 43.2011, "inbatch_pos_score": 43.7188, "learning_rate": 2.772222222222222e-05, "loss": 2.2696, "norm_diff": 0.0342, "num_tokens_overlap": 5.5731, "num_tokens_union": 55.0592, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28742.1319, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6639, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6984, "sent_len_1": 66.8799, "sent_len_max_0": 18.8325, "sent_len_max_1": 188.855, "stdk": 0.0439, "stdq": 0.039, "stdqueue_k": 0.0, "step": 50100 }, { "accuracy": 55.2734, "doc_norm": 6.698, "encoder_q-embeddings": 17598.3574, "encoder_q-layer.0": 12597.9141, "encoder_q-layer.1": 12659.1201, "encoder_q-layer.10": 20440.0352, "encoder_q-layer.11": 39612.0078, "encoder_q-layer.2": 13628.7324, "encoder_q-layer.3": 13753.9854, "encoder_q-layer.4": 14236.5615, "encoder_q-layer.5": 13873.2129, "encoder_q-layer.6": 14494.3584, "encoder_q-layer.7": 15180.084, "encoder_q-layer.8": 17893.0488, "encoder_q-layer.9": 16149.002, "epoch": 0.49, "inbatch_neg_score": 43.1826, "inbatch_pos_score": 43.7188, "learning_rate": 2.7666666666666667e-05, "loss": 2.3047, "norm_diff": 0.0357, "num_tokens_overlap": 5.573, "num_tokens_union": 55.0527, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26770.8704, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6623, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7112, "sent_len_1": 66.833, "sent_len_max_0": 18.7913, "sent_len_max_1": 190.2875, "stdk": 0.044, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 50200 }, { "accuracy": 58.0078, "doc_norm": 6.6967, "encoder_q-embeddings": 16597.7676, "encoder_q-layer.0": 11868.6416, "encoder_q-layer.1": 12397.6445, "encoder_q-layer.10": 19923.0566, "encoder_q-layer.11": 39623.4297, "encoder_q-layer.2": 13348.498, "encoder_q-layer.3": 13085.6387, "encoder_q-layer.4": 13896.7266, "encoder_q-layer.5": 13652.415, "encoder_q-layer.6": 14767.1426, "encoder_q-layer.7": 15501.2373, "encoder_q-layer.8": 17954.3613, "encoder_q-layer.9": 15941.1865, "epoch": 0.49, "inbatch_neg_score": 43.1962, "inbatch_pos_score": 43.7188, "learning_rate": 2.761111111111111e-05, "loss": 2.3055, "norm_diff": 0.0345, "num_tokens_overlap": 5.5759, "num_tokens_union": 55.0193, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26070.8327, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6623, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.693, "sent_len_1": 66.8353, "sent_len_max_0": 18.845, "sent_len_max_1": 188.6788, "stdk": 0.0437, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 50300 }, { "accuracy": 56.9336, "doc_norm": 6.6974, "encoder_q-embeddings": 16999.4727, "encoder_q-layer.0": 12241.5986, "encoder_q-layer.1": 12552.4141, "encoder_q-layer.10": 24511.9902, "encoder_q-layer.11": 45877.2422, "encoder_q-layer.2": 13602.5635, "encoder_q-layer.3": 13670.0498, "encoder_q-layer.4": 14407.8721, "encoder_q-layer.5": 14017.5615, "encoder_q-layer.6": 14959.6494, "encoder_q-layer.7": 15436.9785, "encoder_q-layer.8": 17679.1543, "encoder_q-layer.9": 16712.1875, "epoch": 0.49, "inbatch_neg_score": 43.1755, "inbatch_pos_score": 43.6875, "learning_rate": 2.7555555555555555e-05, "loss": 2.2654, "norm_diff": 0.0369, "num_tokens_overlap": 5.5806, "num_tokens_union": 55.0213, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28058.8069, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6605, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7113, "sent_len_1": 66.7963, "sent_len_max_0": 18.7987, "sent_len_max_1": 190.5625, "stdk": 0.0445, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 50400 }, { "accuracy": 56.543, "doc_norm": 6.6992, "encoder_q-embeddings": 17697.6348, "encoder_q-layer.0": 12268.584, "encoder_q-layer.1": 12936.9102, "encoder_q-layer.10": 22127.4746, "encoder_q-layer.11": 39101.0469, "encoder_q-layer.2": 14204.4268, "encoder_q-layer.3": 14367.8955, "encoder_q-layer.4": 15207.002, "encoder_q-layer.5": 14678.8721, "encoder_q-layer.6": 15859.4277, "encoder_q-layer.7": 16901.3574, "encoder_q-layer.8": 18316.4785, "encoder_q-layer.9": 16831.1133, "epoch": 0.49, "inbatch_neg_score": 43.1612, "inbatch_pos_score": 43.6875, "learning_rate": 2.7500000000000004e-05, "loss": 2.3158, "norm_diff": 0.0373, "num_tokens_overlap": 5.5837, "num_tokens_union": 54.8869, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27670.5033, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6619, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7207, "sent_len_1": 66.6163, "sent_len_max_0": 18.91, "sent_len_max_1": 190.18, "stdk": 0.0444, "stdq": 0.0401, "stdqueue_k": 0.0, "step": 50500 }, { "accuracy": 55.957, "doc_norm": 6.6981, "encoder_q-embeddings": 17851.5977, "encoder_q-layer.0": 12534.1367, "encoder_q-layer.1": 12745.6357, "encoder_q-layer.10": 22543.959, "encoder_q-layer.11": 42368.6562, "encoder_q-layer.2": 13652.707, "encoder_q-layer.3": 13730.6553, "encoder_q-layer.4": 14260.9385, "encoder_q-layer.5": 13905.0674, "encoder_q-layer.6": 14725.0244, "encoder_q-layer.7": 16105.5811, "encoder_q-layer.8": 18742.4023, "encoder_q-layer.9": 17697.4336, "epoch": 0.49, "inbatch_neg_score": 43.1639, "inbatch_pos_score": 43.6875, "learning_rate": 2.7444444444444443e-05, "loss": 2.2972, "norm_diff": 0.0375, "num_tokens_overlap": 5.565, "num_tokens_union": 54.9929, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27658.387, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6606, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7066, "sent_len_1": 66.764, "sent_len_max_0": 18.79, "sent_len_max_1": 189.73, "stdk": 0.044, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 50600 }, { "accuracy": 55.8594, "doc_norm": 6.6981, "encoder_q-embeddings": 18971.7715, "encoder_q-layer.0": 13191.417, "encoder_q-layer.1": 13646.2373, "encoder_q-layer.10": 21940.5645, "encoder_q-layer.11": 42128.5195, "encoder_q-layer.2": 14804.4355, "encoder_q-layer.3": 14799.7949, "encoder_q-layer.4": 15654.3271, "encoder_q-layer.5": 14747.3711, "encoder_q-layer.6": 15723.8613, "encoder_q-layer.7": 16255.6309, "encoder_q-layer.8": 17855.1992, "encoder_q-layer.9": 16866.7188, "epoch": 0.49, "inbatch_neg_score": 43.1577, "inbatch_pos_score": 43.6562, "learning_rate": 2.7388888888888892e-05, "loss": 2.287, "norm_diff": 0.038, "num_tokens_overlap": 5.579, "num_tokens_union": 55.0629, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28795.6283, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.66, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7085, "sent_len_1": 66.9567, "sent_len_max_0": 18.7775, "sent_len_max_1": 190.9288, "stdk": 0.0435, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 50700 }, { "accuracy": 55.957, "doc_norm": 6.6915, "encoder_q-embeddings": 19903.1855, "encoder_q-layer.0": 12968.4941, "encoder_q-layer.1": 13250.0098, "encoder_q-layer.10": 21933.1191, "encoder_q-layer.11": 37552.5391, "encoder_q-layer.2": 14760.2412, "encoder_q-layer.3": 14775.5537, "encoder_q-layer.4": 14681.3799, "encoder_q-layer.5": 13999.1064, "encoder_q-layer.6": 15151.2559, "encoder_q-layer.7": 16428.041, "encoder_q-layer.8": 18673.9023, "encoder_q-layer.9": 17086.9922, "epoch": 0.5, "inbatch_neg_score": 43.1086, "inbatch_pos_score": 43.5938, "learning_rate": 2.733333333333333e-05, "loss": 2.2331, "norm_diff": 0.0361, "num_tokens_overlap": 5.5778, "num_tokens_union": 55.0138, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27674.6232, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6554, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7363, "sent_len_1": 66.7273, "sent_len_max_0": 18.8738, "sent_len_max_1": 188.885, "stdk": 0.0433, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 50800 }, { "accuracy": 55.8594, "doc_norm": 6.6863, "encoder_q-embeddings": 17235.377, "encoder_q-layer.0": 12471.0166, "encoder_q-layer.1": 12760.6162, "encoder_q-layer.10": 24410.0801, "encoder_q-layer.11": 44005.0078, "encoder_q-layer.2": 13824.1377, "encoder_q-layer.3": 13818.2129, "encoder_q-layer.4": 14345.9697, "encoder_q-layer.5": 13781.3838, "encoder_q-layer.6": 14844.3799, "encoder_q-layer.7": 15577.2773, "encoder_q-layer.8": 18174.5098, "encoder_q-layer.9": 17316.1172, "epoch": 0.5, "inbatch_neg_score": 43.055, "inbatch_pos_score": 43.5625, "learning_rate": 2.727777777777778e-05, "loss": 2.2542, "norm_diff": 0.0326, "num_tokens_overlap": 5.5767, "num_tokens_union": 55.1882, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28275.8348, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6537, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.733, "sent_len_1": 67.0756, "sent_len_max_0": 18.7937, "sent_len_max_1": 189.1463, "stdk": 0.0441, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 50900 }, { "accuracy": 57.8125, "doc_norm": 6.6902, "encoder_q-embeddings": 16429.3027, "encoder_q-layer.0": 11879.2207, "encoder_q-layer.1": 12418.0762, "encoder_q-layer.10": 19908.625, "encoder_q-layer.11": 37282.6523, "encoder_q-layer.2": 13241.1973, "encoder_q-layer.3": 13465.0391, "encoder_q-layer.4": 13713.3164, "encoder_q-layer.5": 13169.3604, "encoder_q-layer.6": 14777.0566, "encoder_q-layer.7": 16710.7734, "encoder_q-layer.8": 17478.2891, "encoder_q-layer.9": 15916.25, "epoch": 0.5, "inbatch_neg_score": 43.0756, "inbatch_pos_score": 43.5938, "learning_rate": 2.7222222222222223e-05, "loss": 2.3252, "norm_diff": 0.0363, "num_tokens_overlap": 5.5909, "num_tokens_union": 55.0159, "postclip_grad_norm": 1.0, "preclip_grad_norm": 25742.9868, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.654, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7117, "sent_len_1": 66.7944, "sent_len_max_0": 18.855, "sent_len_max_1": 189.1362, "stdk": 0.0437, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 51000 }, { "accuracy": 57.0312, "doc_norm": 6.6878, "encoder_q-embeddings": 19342.4336, "encoder_q-layer.0": 13209.0039, "encoder_q-layer.1": 13835.8672, "encoder_q-layer.10": 19963.9863, "encoder_q-layer.11": 40393.6953, "encoder_q-layer.2": 14882.9346, "encoder_q-layer.3": 14609.0869, "encoder_q-layer.4": 14903.4316, "encoder_q-layer.5": 14371.0791, "encoder_q-layer.6": 15173.5566, "encoder_q-layer.7": 16368.4033, "encoder_q-layer.8": 19593.2773, "encoder_q-layer.9": 16720.8828, "epoch": 0.5, "inbatch_neg_score": 42.995, "inbatch_pos_score": 43.5312, "learning_rate": 2.716666666666667e-05, "loss": 2.3085, "norm_diff": 0.0373, "num_tokens_overlap": 5.5717, "num_tokens_union": 54.8196, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28359.1677, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6505, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6943, "sent_len_1": 66.5142, "sent_len_max_0": 18.9025, "sent_len_max_1": 189.435, "stdk": 0.0447, "stdq": 0.0399, "stdqueue_k": 0.0, "step": 51100 }, { "accuracy": 58.3008, "doc_norm": 6.6882, "encoder_q-embeddings": 17852.5254, "encoder_q-layer.0": 12576.8125, "encoder_q-layer.1": 12738.6113, "encoder_q-layer.10": 20479.8809, "encoder_q-layer.11": 38742.4297, "encoder_q-layer.2": 13986.7646, "encoder_q-layer.3": 14105.5225, "encoder_q-layer.4": 14320.334, "encoder_q-layer.5": 14382.4102, "encoder_q-layer.6": 14608.4121, "encoder_q-layer.7": 15610.3936, "encoder_q-layer.8": 17878.1328, "encoder_q-layer.9": 16279.2197, "epoch": 0.5, "inbatch_neg_score": 42.9973, "inbatch_pos_score": 43.5, "learning_rate": 2.7111111111111114e-05, "loss": 2.2769, "norm_diff": 0.0399, "num_tokens_overlap": 5.5915, "num_tokens_union": 55.079, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26963.491, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6483, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7358, "sent_len_1": 66.8787, "sent_len_max_0": 18.7375, "sent_len_max_1": 190.9437, "stdk": 0.0449, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 51200 }, { "accuracy": 58.4961, "doc_norm": 6.6876, "encoder_q-embeddings": 17204.707, "encoder_q-layer.0": 12037.3008, "encoder_q-layer.1": 12431.1943, "encoder_q-layer.10": 21207.1328, "encoder_q-layer.11": 37744.7812, "encoder_q-layer.2": 13695.0547, "encoder_q-layer.3": 13774.4248, "encoder_q-layer.4": 14126.8125, "encoder_q-layer.5": 13619.3242, "encoder_q-layer.6": 14356.0088, "encoder_q-layer.7": 15395.8887, "encoder_q-layer.8": 17004.5957, "encoder_q-layer.9": 15783.6406, "epoch": 0.5, "inbatch_neg_score": 42.9848, "inbatch_pos_score": 43.5312, "learning_rate": 2.7055555555555557e-05, "loss": 2.2527, "norm_diff": 0.0389, "num_tokens_overlap": 5.5899, "num_tokens_union": 55.1016, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26163.8902, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6487, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7282, "sent_len_1": 66.9659, "sent_len_max_0": 18.8637, "sent_len_max_1": 190.3187, "stdk": 0.0443, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 51300 }, { "accuracy": 57.6172, "doc_norm": 6.6839, "encoder_q-embeddings": 17629.3594, "encoder_q-layer.0": 12150.3428, "encoder_q-layer.1": 12465.2861, "encoder_q-layer.10": 21212.9121, "encoder_q-layer.11": 41866.8711, "encoder_q-layer.2": 13629.1504, "encoder_q-layer.3": 13713.9238, "encoder_q-layer.4": 14230.1436, "encoder_q-layer.5": 13761.0674, "encoder_q-layer.6": 14239.9971, "encoder_q-layer.7": 15299.4258, "encoder_q-layer.8": 17093.5938, "encoder_q-layer.9": 16355.0127, "epoch": 0.5, "inbatch_neg_score": 42.9687, "inbatch_pos_score": 43.5, "learning_rate": 2.7000000000000002e-05, "loss": 2.2905, "norm_diff": 0.0377, "num_tokens_overlap": 5.5676, "num_tokens_union": 54.9922, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27001.8701, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6461, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7148, "sent_len_1": 66.7382, "sent_len_max_0": 18.88, "sent_len_max_1": 188.0962, "stdk": 0.0433, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 51400 }, { "accuracy": 58.3984, "doc_norm": 6.6818, "encoder_q-embeddings": 16584.0371, "encoder_q-layer.0": 11631.4023, "encoder_q-layer.1": 12242.1787, "encoder_q-layer.10": 25495.6836, "encoder_q-layer.11": 42173.332, "encoder_q-layer.2": 13299.6016, "encoder_q-layer.3": 13162.0488, "encoder_q-layer.4": 13984.2246, "encoder_q-layer.5": 14410.0742, "encoder_q-layer.6": 14695.4736, "encoder_q-layer.7": 15305.8496, "encoder_q-layer.8": 18157.7578, "encoder_q-layer.9": 16616.916, "epoch": 0.5, "inbatch_neg_score": 42.976, "inbatch_pos_score": 43.4688, "learning_rate": 2.6944444444444445e-05, "loss": 2.2801, "norm_diff": 0.0389, "num_tokens_overlap": 5.5816, "num_tokens_union": 55.1535, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27358.4026, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6429, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7208, "sent_len_1": 66.9741, "sent_len_max_0": 18.8238, "sent_len_max_1": 190.0875, "stdk": 0.0446, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 51500 }, { "accuracy": 58.2031, "doc_norm": 6.6807, "encoder_q-embeddings": 16923.373, "encoder_q-layer.0": 12202.0127, "encoder_q-layer.1": 12447.0996, "encoder_q-layer.10": 20675.084, "encoder_q-layer.11": 41689.6406, "encoder_q-layer.2": 14161.9404, "encoder_q-layer.3": 14102.2217, "encoder_q-layer.4": 14166.4961, "encoder_q-layer.5": 13884.3867, "encoder_q-layer.6": 15039.1055, "encoder_q-layer.7": 15447.9375, "encoder_q-layer.8": 17912.041, "encoder_q-layer.9": 16222.8164, "epoch": 0.5, "inbatch_neg_score": 42.9678, "inbatch_pos_score": 43.5, "learning_rate": 2.688888888888889e-05, "loss": 2.2981, "norm_diff": 0.0345, "num_tokens_overlap": 5.5723, "num_tokens_union": 54.8637, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27152.5003, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6462, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7129, "sent_len_1": 66.6081, "sent_len_max_0": 18.8337, "sent_len_max_1": 190.2262, "stdk": 0.0441, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 51600 }, { "accuracy": 55.0781, "doc_norm": 6.6796, "encoder_q-embeddings": 19242.418, "encoder_q-layer.0": 12978.3975, "encoder_q-layer.1": 13371.9932, "encoder_q-layer.10": 23447.6387, "encoder_q-layer.11": 46453.625, "encoder_q-layer.2": 14658.335, "encoder_q-layer.3": 14969.9102, "encoder_q-layer.4": 15768.3115, "encoder_q-layer.5": 15683.959, "encoder_q-layer.6": 16291.3691, "encoder_q-layer.7": 16608.5938, "encoder_q-layer.8": 19657.959, "encoder_q-layer.9": 18124.6426, "epoch": 0.5, "inbatch_neg_score": 42.9283, "inbatch_pos_score": 43.4375, "learning_rate": 2.6833333333333333e-05, "loss": 2.2946, "norm_diff": 0.0363, "num_tokens_overlap": 5.5663, "num_tokens_union": 54.8488, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30103.4973, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6432, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6916, "sent_len_1": 66.5812, "sent_len_max_0": 18.8388, "sent_len_max_1": 190.8162, "stdk": 0.0435, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 51700 }, { "accuracy": 56.7383, "doc_norm": 6.6811, "encoder_q-embeddings": 18008.0371, "encoder_q-layer.0": 12471.624, "encoder_q-layer.1": 13292.5254, "encoder_q-layer.10": 20176.123, "encoder_q-layer.11": 40914.9141, "encoder_q-layer.2": 13787.9619, "encoder_q-layer.3": 13947.3887, "encoder_q-layer.4": 14276.0098, "encoder_q-layer.5": 14209.9355, "encoder_q-layer.6": 14778.4609, "encoder_q-layer.7": 15577.7793, "encoder_q-layer.8": 17848.0391, "encoder_q-layer.9": 16578.5547, "epoch": 0.51, "inbatch_neg_score": 42.9665, "inbatch_pos_score": 43.4688, "learning_rate": 2.677777777777778e-05, "loss": 2.2478, "norm_diff": 0.0366, "num_tokens_overlap": 5.572, "num_tokens_union": 54.8856, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27151.2357, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6445, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7025, "sent_len_1": 66.5538, "sent_len_max_0": 18.7862, "sent_len_max_1": 188.25, "stdk": 0.044, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 51800 }, { "accuracy": 55.2734, "doc_norm": 6.6759, "encoder_q-embeddings": 17542.7461, "encoder_q-layer.0": 12257.4082, "encoder_q-layer.1": 12490.9697, "encoder_q-layer.10": 20228.916, "encoder_q-layer.11": 43105.5547, "encoder_q-layer.2": 13913.6475, "encoder_q-layer.3": 13973.9766, "encoder_q-layer.4": 14157.1641, "encoder_q-layer.5": 14172.1191, "encoder_q-layer.6": 15469.9375, "encoder_q-layer.7": 16683.3457, "encoder_q-layer.8": 17403.5488, "encoder_q-layer.9": 16655.5566, "epoch": 0.51, "inbatch_neg_score": 42.903, "inbatch_pos_score": 43.4375, "learning_rate": 2.6722222222222228e-05, "loss": 2.3287, "norm_diff": 0.0341, "num_tokens_overlap": 5.5816, "num_tokens_union": 55.0231, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27463.7098, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6419, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7122, "sent_len_1": 66.7812, "sent_len_max_0": 18.8362, "sent_len_max_1": 189.61, "stdk": 0.0439, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 51900 }, { "accuracy": 57.0312, "doc_norm": 6.6778, "encoder_q-embeddings": 16820.8477, "encoder_q-layer.0": 12040.0859, "encoder_q-layer.1": 12938.2979, "encoder_q-layer.10": 19378.8477, "encoder_q-layer.11": 37600.3242, "encoder_q-layer.2": 13328.2705, "encoder_q-layer.3": 13321.9727, "encoder_q-layer.4": 14243.0957, "encoder_q-layer.5": 13661.418, "encoder_q-layer.6": 14987.6074, "encoder_q-layer.7": 16093.8203, "encoder_q-layer.8": 19380.8516, "encoder_q-layer.9": 16564.373, "epoch": 0.51, "inbatch_neg_score": 42.9031, "inbatch_pos_score": 43.4062, "learning_rate": 2.6666666666666667e-05, "loss": 2.2716, "norm_diff": 0.0386, "num_tokens_overlap": 5.5817, "num_tokens_union": 55.0111, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26355.9805, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6392, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7077, "sent_len_1": 66.782, "sent_len_max_0": 18.8337, "sent_len_max_1": 189.5513, "stdk": 0.0436, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 52000 }, { "accuracy": 54.8828, "doc_norm": 6.6848, "encoder_q-embeddings": 18683.5059, "encoder_q-layer.0": 13278.6836, "encoder_q-layer.1": 13207.1836, "encoder_q-layer.10": 22385.6055, "encoder_q-layer.11": 41693.4023, "encoder_q-layer.2": 14471.1221, "encoder_q-layer.3": 14772.9268, "encoder_q-layer.4": 15355.0293, "encoder_q-layer.5": 14894.376, "encoder_q-layer.6": 15137.6514, "encoder_q-layer.7": 16084.6084, "encoder_q-layer.8": 18484.3672, "encoder_q-layer.9": 17395.0879, "epoch": 0.51, "inbatch_neg_score": 42.9202, "inbatch_pos_score": 43.4375, "learning_rate": 2.6611111111111116e-05, "loss": 2.2633, "norm_diff": 0.0423, "num_tokens_overlap": 5.574, "num_tokens_union": 54.9976, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28028.4132, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6426, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7238, "sent_len_1": 66.7864, "sent_len_max_0": 18.8025, "sent_len_max_1": 189.5025, "stdk": 0.0455, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 52100 }, { "accuracy": 54.4922, "doc_norm": 6.675, "encoder_q-embeddings": 18049.2383, "encoder_q-layer.0": 12420.9424, "encoder_q-layer.1": 12940.1729, "encoder_q-layer.10": 18468.4062, "encoder_q-layer.11": 37604.6914, "encoder_q-layer.2": 14144.0557, "encoder_q-layer.3": 14320.7227, "encoder_q-layer.4": 14559.3574, "encoder_q-layer.5": 13990.7939, "encoder_q-layer.6": 14632.5762, "encoder_q-layer.7": 15232.8076, "encoder_q-layer.8": 17361.1582, "encoder_q-layer.9": 15567.8809, "epoch": 0.51, "inbatch_neg_score": 42.8953, "inbatch_pos_score": 43.4062, "learning_rate": 2.6555555555555555e-05, "loss": 2.2903, "norm_diff": 0.0368, "num_tokens_overlap": 5.577, "num_tokens_union": 55.0939, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26433.8108, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6382, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7183, "sent_len_1": 66.895, "sent_len_max_0": 18.7712, "sent_len_max_1": 190.5662, "stdk": 0.0432, "stdq": 0.0378, "stdqueue_k": 0.0, "step": 52200 }, { "accuracy": 56.3477, "doc_norm": 6.677, "encoder_q-embeddings": 17012.0234, "encoder_q-layer.0": 12231.0811, "encoder_q-layer.1": 12681.3662, "encoder_q-layer.10": 22359.6934, "encoder_q-layer.11": 43478.7422, "encoder_q-layer.2": 13696.6279, "encoder_q-layer.3": 13456.1104, "encoder_q-layer.4": 13898.9424, "encoder_q-layer.5": 13867.875, "encoder_q-layer.6": 14937.7344, "encoder_q-layer.7": 15584.5693, "encoder_q-layer.8": 17446.373, "encoder_q-layer.9": 17361.4258, "epoch": 0.51, "inbatch_neg_score": 42.8649, "inbatch_pos_score": 43.375, "learning_rate": 2.6500000000000004e-05, "loss": 2.2433, "norm_diff": 0.0377, "num_tokens_overlap": 5.5859, "num_tokens_union": 55.0823, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27937.1635, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6393, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.728, "sent_len_1": 66.8746, "sent_len_max_0": 19.0025, "sent_len_max_1": 189.2937, "stdk": 0.0455, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 52300 }, { "accuracy": 54.5898, "doc_norm": 6.6732, "encoder_q-embeddings": 18954.9609, "encoder_q-layer.0": 13081.3877, "encoder_q-layer.1": 13016.3467, "encoder_q-layer.10": 28207.543, "encoder_q-layer.11": 42380.9609, "encoder_q-layer.2": 13932.6943, "encoder_q-layer.3": 14126.4639, "encoder_q-layer.4": 14514.3896, "encoder_q-layer.5": 14276.2588, "encoder_q-layer.6": 15647.7422, "encoder_q-layer.7": 16970.4629, "encoder_q-layer.8": 20030.7148, "encoder_q-layer.9": 18657.0156, "epoch": 0.51, "inbatch_neg_score": 42.7947, "inbatch_pos_score": 43.3125, "learning_rate": 2.6444444444444443e-05, "loss": 2.261, "norm_diff": 0.039, "num_tokens_overlap": 5.5829, "num_tokens_union": 55.0656, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29601.9558, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6342, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.738, "sent_len_1": 66.7881, "sent_len_max_0": 18.8113, "sent_len_max_1": 188.0362, "stdk": 0.0454, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 52400 }, { "accuracy": 54.8828, "doc_norm": 6.6674, "encoder_q-embeddings": 17793.7324, "encoder_q-layer.0": 12614.2129, "encoder_q-layer.1": 13146.5576, "encoder_q-layer.10": 23252.4102, "encoder_q-layer.11": 42696.5898, "encoder_q-layer.2": 13933.8164, "encoder_q-layer.3": 13769.7314, "encoder_q-layer.4": 14223.4033, "encoder_q-layer.5": 14040.5479, "encoder_q-layer.6": 15163.6387, "encoder_q-layer.7": 16326.4072, "encoder_q-layer.8": 19306.8359, "encoder_q-layer.9": 17834.9531, "epoch": 0.51, "inbatch_neg_score": 42.7419, "inbatch_pos_score": 43.25, "learning_rate": 2.6388888888888892e-05, "loss": 2.2555, "norm_diff": 0.0375, "num_tokens_overlap": 5.5815, "num_tokens_union": 55.1587, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27734.791, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.63, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7064, "sent_len_1": 67.0224, "sent_len_max_0": 18.81, "sent_len_max_1": 189.1987, "stdk": 0.0438, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 52500 }, { "accuracy": 58.0078, "doc_norm": 6.6676, "encoder_q-embeddings": 17186.6367, "encoder_q-layer.0": 12253.3848, "encoder_q-layer.1": 12562.2959, "encoder_q-layer.10": 20311.0723, "encoder_q-layer.11": 44068.625, "encoder_q-layer.2": 13729.2402, "encoder_q-layer.3": 13823.3721, "encoder_q-layer.4": 14118.543, "encoder_q-layer.5": 13642.1279, "encoder_q-layer.6": 14952.126, "encoder_q-layer.7": 17014.2363, "encoder_q-layer.8": 18514.9512, "encoder_q-layer.9": 16633.2344, "epoch": 0.51, "inbatch_neg_score": 42.7327, "inbatch_pos_score": 43.25, "learning_rate": 2.633333333333333e-05, "loss": 2.2685, "norm_diff": 0.0385, "num_tokens_overlap": 5.5715, "num_tokens_union": 54.97, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27639.1972, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6291, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7293, "sent_len_1": 66.6904, "sent_len_max_0": 18.8087, "sent_len_max_1": 188.9125, "stdk": 0.0437, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 52600 }, { "accuracy": 54.7852, "doc_norm": 6.6698, "encoder_q-embeddings": 18648.5449, "encoder_q-layer.0": 13195.793, "encoder_q-layer.1": 13468.5117, "encoder_q-layer.10": 22785.207, "encoder_q-layer.11": 42937.8242, "encoder_q-layer.2": 14583.832, "encoder_q-layer.3": 14597.0527, "encoder_q-layer.4": 14933.458, "encoder_q-layer.5": 14693.2109, "encoder_q-layer.6": 15378.2627, "encoder_q-layer.7": 16966.5137, "encoder_q-layer.8": 19148.1875, "encoder_q-layer.9": 17971.752, "epoch": 0.51, "inbatch_neg_score": 42.756, "inbatch_pos_score": 43.2812, "learning_rate": 2.627777777777778e-05, "loss": 2.2393, "norm_diff": 0.0387, "num_tokens_overlap": 5.583, "num_tokens_union": 55.1272, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28643.4887, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6312, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7238, "sent_len_1": 66.9586, "sent_len_max_0": 18.765, "sent_len_max_1": 189.2237, "stdk": 0.0449, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 52700 }, { "accuracy": 58.2031, "doc_norm": 6.6669, "encoder_q-embeddings": 16765.1523, "encoder_q-layer.0": 11994.1797, "encoder_q-layer.1": 12621.7256, "encoder_q-layer.10": 22111.8047, "encoder_q-layer.11": 39868.3359, "encoder_q-layer.2": 13447.7939, "encoder_q-layer.3": 13642.9258, "encoder_q-layer.4": 14006.4365, "encoder_q-layer.5": 14066.9941, "encoder_q-layer.6": 14980.1816, "encoder_q-layer.7": 16081.625, "encoder_q-layer.8": 17975.8926, "encoder_q-layer.9": 16641.1797, "epoch": 0.52, "inbatch_neg_score": 42.7263, "inbatch_pos_score": 43.25, "learning_rate": 2.6222222222222226e-05, "loss": 2.2976, "norm_diff": 0.0386, "num_tokens_overlap": 5.5689, "num_tokens_union": 54.9583, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26624.2543, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6282, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7057, "sent_len_1": 66.7304, "sent_len_max_0": 18.8725, "sent_len_max_1": 190.6425, "stdk": 0.0446, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 52800 }, { "accuracy": 59.2773, "doc_norm": 6.6617, "encoder_q-embeddings": 17016.6074, "encoder_q-layer.0": 11689.0479, "encoder_q-layer.1": 11988.6553, "encoder_q-layer.10": 21003.5234, "encoder_q-layer.11": 38872.4023, "encoder_q-layer.2": 13018.3643, "encoder_q-layer.3": 13304.8125, "encoder_q-layer.4": 13085.0498, "encoder_q-layer.5": 13068.5615, "encoder_q-layer.6": 14009.2725, "encoder_q-layer.7": 15235.8965, "encoder_q-layer.8": 17341.0254, "encoder_q-layer.9": 16082.5898, "epoch": 0.52, "inbatch_neg_score": 42.7094, "inbatch_pos_score": 43.2188, "learning_rate": 2.6166666666666668e-05, "loss": 2.2524, "norm_diff": 0.0382, "num_tokens_overlap": 5.5998, "num_tokens_union": 55.1004, "postclip_grad_norm": 1.0, "preclip_grad_norm": 25820.1729, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6235, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7156, "sent_len_1": 66.9466, "sent_len_max_0": 18.7975, "sent_len_max_1": 191.3388, "stdk": 0.0439, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 52900 }, { "accuracy": 54.9805, "doc_norm": 6.658, "encoder_q-embeddings": 17194.5, "encoder_q-layer.0": 12134.9453, "encoder_q-layer.1": 12567.7158, "encoder_q-layer.10": 19385.8027, "encoder_q-layer.11": 38668.4844, "encoder_q-layer.2": 13529.5967, "encoder_q-layer.3": 13765.6543, "encoder_q-layer.4": 14093.2988, "encoder_q-layer.5": 14303.8066, "encoder_q-layer.6": 14742.9365, "encoder_q-layer.7": 16745.3555, "encoder_q-layer.8": 17075.4355, "encoder_q-layer.9": 15721.1641, "epoch": 0.52, "inbatch_neg_score": 42.6799, "inbatch_pos_score": 43.1875, "learning_rate": 2.6111111111111114e-05, "loss": 2.3037, "norm_diff": 0.0351, "num_tokens_overlap": 5.5676, "num_tokens_union": 54.9887, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26368.2454, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6229, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6995, "sent_len_1": 66.7863, "sent_len_max_0": 18.8375, "sent_len_max_1": 191.2688, "stdk": 0.0435, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 53000 }, { "accuracy": 57.2266, "doc_norm": 6.658, "encoder_q-embeddings": 17516.3359, "encoder_q-layer.0": 12391.1514, "encoder_q-layer.1": 12979.8818, "encoder_q-layer.10": 30461.998, "encoder_q-layer.11": 46051.0781, "encoder_q-layer.2": 13628.5117, "encoder_q-layer.3": 13698.1162, "encoder_q-layer.4": 14313.3906, "encoder_q-layer.5": 14706.8818, "encoder_q-layer.6": 16141.3105, "encoder_q-layer.7": 16939.2598, "encoder_q-layer.8": 19829.4219, "encoder_q-layer.9": 20049.9746, "epoch": 0.52, "inbatch_neg_score": 42.688, "inbatch_pos_score": 43.1875, "learning_rate": 2.6055555555555556e-05, "loss": 2.287, "norm_diff": 0.0372, "num_tokens_overlap": 5.5819, "num_tokens_union": 54.973, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30192.504, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6208, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.739, "sent_len_1": 66.7361, "sent_len_max_0": 19.03, "sent_len_max_1": 188.3575, "stdk": 0.0431, "stdq": 0.038, "stdqueue_k": 0.0, "step": 53100 }, { "accuracy": 57.8125, "doc_norm": 6.6626, "encoder_q-embeddings": 16899.1055, "encoder_q-layer.0": 12198.4561, "encoder_q-layer.1": 12830.3037, "encoder_q-layer.10": 19816.3809, "encoder_q-layer.11": 39465.2578, "encoder_q-layer.2": 13994.8047, "encoder_q-layer.3": 13819.5186, "encoder_q-layer.4": 14692.3252, "encoder_q-layer.5": 13837.8809, "encoder_q-layer.6": 14584.5488, "encoder_q-layer.7": 15675.5, "encoder_q-layer.8": 17367.8633, "encoder_q-layer.9": 15833.7041, "epoch": 0.52, "inbatch_neg_score": 42.6851, "inbatch_pos_score": 43.1875, "learning_rate": 2.6000000000000002e-05, "loss": 2.2642, "norm_diff": 0.0375, "num_tokens_overlap": 5.5729, "num_tokens_union": 54.9951, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26389.06, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6251, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7024, "sent_len_1": 66.7659, "sent_len_max_0": 18.9262, "sent_len_max_1": 190.2375, "stdk": 0.0448, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 53200 }, { "accuracy": 56.0547, "doc_norm": 6.6622, "encoder_q-embeddings": 17954.3496, "encoder_q-layer.0": 12385.6572, "encoder_q-layer.1": 13250.4922, "encoder_q-layer.10": 20396.5312, "encoder_q-layer.11": 39326.3398, "encoder_q-layer.2": 14094.8994, "encoder_q-layer.3": 14242.9502, "encoder_q-layer.4": 14947.2441, "encoder_q-layer.5": 14750.8506, "encoder_q-layer.6": 15666.04, "encoder_q-layer.7": 16943.1719, "encoder_q-layer.8": 17856.0898, "encoder_q-layer.9": 16437.6602, "epoch": 0.52, "inbatch_neg_score": 42.6804, "inbatch_pos_score": 43.1875, "learning_rate": 2.5944444444444444e-05, "loss": 2.2482, "norm_diff": 0.0395, "num_tokens_overlap": 5.5817, "num_tokens_union": 54.9985, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27308.5644, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6228, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7155, "sent_len_1": 66.79, "sent_len_max_0": 18.7638, "sent_len_max_1": 190.4187, "stdk": 0.0445, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 53300 }, { "accuracy": 56.6406, "doc_norm": 6.6682, "encoder_q-embeddings": 17425.8359, "encoder_q-layer.0": 12476.9756, "encoder_q-layer.1": 12676.8633, "encoder_q-layer.10": 23949.7168, "encoder_q-layer.11": 41110.6289, "encoder_q-layer.2": 13391.0215, "encoder_q-layer.3": 13644.6445, "encoder_q-layer.4": 14674.8623, "encoder_q-layer.5": 15088.6094, "encoder_q-layer.6": 16020.6738, "encoder_q-layer.7": 17190.2109, "encoder_q-layer.8": 19397.7539, "encoder_q-layer.9": 18377.4199, "epoch": 0.52, "inbatch_neg_score": 42.6902, "inbatch_pos_score": 43.2188, "learning_rate": 2.588888888888889e-05, "loss": 2.2864, "norm_diff": 0.0416, "num_tokens_overlap": 5.5832, "num_tokens_union": 54.9842, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28046.138, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6266, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7164, "sent_len_1": 66.7614, "sent_len_max_0": 18.7825, "sent_len_max_1": 188.7738, "stdk": 0.0447, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 53400 }, { "accuracy": 55.7617, "doc_norm": 6.6651, "encoder_q-embeddings": 18506.0898, "encoder_q-layer.0": 13458.0156, "encoder_q-layer.1": 13780.3467, "encoder_q-layer.10": 21310.9922, "encoder_q-layer.11": 41930.7148, "encoder_q-layer.2": 14964.4639, "encoder_q-layer.3": 14662.9463, "encoder_q-layer.4": 14821.4189, "encoder_q-layer.5": 14663.71, "encoder_q-layer.6": 15395.5303, "encoder_q-layer.7": 17081.8887, "encoder_q-layer.8": 18681.7715, "encoder_q-layer.9": 18463.7246, "epoch": 0.52, "inbatch_neg_score": 42.6937, "inbatch_pos_score": 43.2188, "learning_rate": 2.5833333333333336e-05, "loss": 2.2686, "norm_diff": 0.0396, "num_tokens_overlap": 5.5682, "num_tokens_union": 54.8952, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28475.6921, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6255, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.702, "sent_len_1": 66.5553, "sent_len_max_0": 18.8337, "sent_len_max_1": 189.7075, "stdk": 0.0438, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 53500 }, { "accuracy": 56.6406, "doc_norm": 6.664, "encoder_q-embeddings": 18427.2754, "encoder_q-layer.0": 12998.4844, "encoder_q-layer.1": 13319.8613, "encoder_q-layer.10": 26707.7266, "encoder_q-layer.11": 44729.4844, "encoder_q-layer.2": 14234.2314, "encoder_q-layer.3": 14263.9229, "encoder_q-layer.4": 14871.9814, "encoder_q-layer.5": 14403.1748, "encoder_q-layer.6": 15265.9951, "encoder_q-layer.7": 17552.9297, "encoder_q-layer.8": 20104.3262, "encoder_q-layer.9": 18052.5898, "epoch": 0.52, "inbatch_neg_score": 42.6718, "inbatch_pos_score": 43.1875, "learning_rate": 2.5777777777777778e-05, "loss": 2.2949, "norm_diff": 0.0398, "num_tokens_overlap": 5.5746, "num_tokens_union": 55.1169, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29366.8338, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6242, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7098, "sent_len_1": 66.9205, "sent_len_max_0": 18.7975, "sent_len_max_1": 188.4512, "stdk": 0.0442, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 53600 }, { "accuracy": 56.8359, "doc_norm": 6.6556, "encoder_q-embeddings": 17346.5254, "encoder_q-layer.0": 12350.8008, "encoder_q-layer.1": 12733.7344, "encoder_q-layer.10": 22547.3047, "encoder_q-layer.11": 41640.8086, "encoder_q-layer.2": 13765.373, "encoder_q-layer.3": 13867.8018, "encoder_q-layer.4": 14243.8428, "encoder_q-layer.5": 14105.542, "encoder_q-layer.6": 15078.2344, "encoder_q-layer.7": 16967.1738, "encoder_q-layer.8": 18884.5664, "encoder_q-layer.9": 16913.6719, "epoch": 0.52, "inbatch_neg_score": 42.5891, "inbatch_pos_score": 43.0938, "learning_rate": 2.5722222222222224e-05, "loss": 2.3123, "norm_diff": 0.0386, "num_tokens_overlap": 5.5683, "num_tokens_union": 54.9779, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27557.9467, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.617, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7003, "sent_len_1": 66.7533, "sent_len_max_0": 18.855, "sent_len_max_1": 191.545, "stdk": 0.0449, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 53700 }, { "accuracy": 56.3477, "doc_norm": 6.6519, "encoder_q-embeddings": 17592.0664, "encoder_q-layer.0": 12294.6553, "encoder_q-layer.1": 12561.0264, "encoder_q-layer.10": 21139.7305, "encoder_q-layer.11": 39270.3242, "encoder_q-layer.2": 13594.6963, "encoder_q-layer.3": 13726.6885, "encoder_q-layer.4": 14161.6348, "encoder_q-layer.5": 13575.7578, "encoder_q-layer.6": 14614.6943, "encoder_q-layer.7": 16381.5781, "encoder_q-layer.8": 17763.6484, "encoder_q-layer.9": 16658.1934, "epoch": 0.53, "inbatch_neg_score": 42.5576, "inbatch_pos_score": 43.0625, "learning_rate": 2.5666666666666666e-05, "loss": 2.2777, "norm_diff": 0.0376, "num_tokens_overlap": 5.5648, "num_tokens_union": 55.0372, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26823.1509, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6143, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6961, "sent_len_1": 66.8161, "sent_len_max_0": 18.7437, "sent_len_max_1": 192.0362, "stdk": 0.0441, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 53800 }, { "accuracy": 57.5195, "doc_norm": 6.6549, "encoder_q-embeddings": 18560.1641, "encoder_q-layer.0": 13128.832, "encoder_q-layer.1": 13492.6562, "encoder_q-layer.10": 26693.9883, "encoder_q-layer.11": 44577.0625, "encoder_q-layer.2": 14681.8418, "encoder_q-layer.3": 14355.6416, "encoder_q-layer.4": 14824.3984, "encoder_q-layer.5": 14326.7627, "encoder_q-layer.6": 15615.8232, "encoder_q-layer.7": 17287.9043, "encoder_q-layer.8": 20179.4141, "encoder_q-layer.9": 18135.2012, "epoch": 0.53, "inbatch_neg_score": 42.5659, "inbatch_pos_score": 43.0938, "learning_rate": 2.5611111111111115e-05, "loss": 2.2716, "norm_diff": 0.0391, "num_tokens_overlap": 5.5798, "num_tokens_union": 54.9613, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29324.1361, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6158, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7185, "sent_len_1": 66.6822, "sent_len_max_0": 18.8188, "sent_len_max_1": 190.5288, "stdk": 0.0454, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 53900 }, { "accuracy": 57.2266, "doc_norm": 6.6583, "encoder_q-embeddings": 18320.9492, "encoder_q-layer.0": 12552.291, "encoder_q-layer.1": 13634.9004, "encoder_q-layer.10": 24517.2324, "encoder_q-layer.11": 46684.9219, "encoder_q-layer.2": 14558.9658, "encoder_q-layer.3": 14258.6953, "encoder_q-layer.4": 15455.9902, "encoder_q-layer.5": 15411.0693, "encoder_q-layer.6": 16153.4004, "encoder_q-layer.7": 16961.25, "encoder_q-layer.8": 19227.5273, "encoder_q-layer.9": 18100.5977, "epoch": 0.53, "inbatch_neg_score": 42.5887, "inbatch_pos_score": 43.0938, "learning_rate": 2.5555555555555554e-05, "loss": 2.2503, "norm_diff": 0.042, "num_tokens_overlap": 5.5735, "num_tokens_union": 54.9167, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29391.8874, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6163, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7031, "sent_len_1": 66.6486, "sent_len_max_0": 18.84, "sent_len_max_1": 190.4563, "stdk": 0.0456, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 54000 }, { "accuracy": 56.6406, "doc_norm": 6.6532, "encoder_q-embeddings": 17909.6836, "encoder_q-layer.0": 12760.8994, "encoder_q-layer.1": 13037.9199, "encoder_q-layer.10": 21233.3066, "encoder_q-layer.11": 37897.957, "encoder_q-layer.2": 13960.4121, "encoder_q-layer.3": 13696.876, "encoder_q-layer.4": 14170.3984, "encoder_q-layer.5": 13854.0781, "encoder_q-layer.6": 15119.2539, "encoder_q-layer.7": 16951.6953, "encoder_q-layer.8": 18288.3691, "encoder_q-layer.9": 17104.8848, "epoch": 0.53, "inbatch_neg_score": 42.589, "inbatch_pos_score": 43.125, "learning_rate": 2.5500000000000003e-05, "loss": 2.2581, "norm_diff": 0.0356, "num_tokens_overlap": 5.5689, "num_tokens_union": 54.9838, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27109.8146, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6175, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6996, "sent_len_1": 66.6842, "sent_len_max_0": 18.8163, "sent_len_max_1": 188.5938, "stdk": 0.0451, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 54100 }, { "accuracy": 58.1055, "doc_norm": 6.6464, "encoder_q-embeddings": 17499.2871, "encoder_q-layer.0": 12106.1152, "encoder_q-layer.1": 12400.3252, "encoder_q-layer.10": 22300.1719, "encoder_q-layer.11": 46145.5703, "encoder_q-layer.2": 13192.3457, "encoder_q-layer.3": 13409.4346, "encoder_q-layer.4": 13870.291, "encoder_q-layer.5": 13187.2568, "encoder_q-layer.6": 14199.4043, "encoder_q-layer.7": 15514.0859, "encoder_q-layer.8": 18156.6172, "encoder_q-layer.9": 17439.1484, "epoch": 0.53, "inbatch_neg_score": 42.5279, "inbatch_pos_score": 43.0312, "learning_rate": 2.5444444444444442e-05, "loss": 2.2588, "norm_diff": 0.034, "num_tokens_overlap": 5.5827, "num_tokens_union": 55.0386, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28032.2815, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6124, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7263, "sent_len_1": 66.8547, "sent_len_max_0": 18.895, "sent_len_max_1": 189.7575, "stdk": 0.0446, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 54200 }, { "accuracy": 57.6172, "doc_norm": 6.654, "encoder_q-embeddings": 17836.8008, "encoder_q-layer.0": 12467.0127, "encoder_q-layer.1": 12926.7402, "encoder_q-layer.10": 21447.1211, "encoder_q-layer.11": 47660.0742, "encoder_q-layer.2": 14137.2139, "encoder_q-layer.3": 14115.2354, "encoder_q-layer.4": 14728.3994, "encoder_q-layer.5": 14608.6963, "encoder_q-layer.6": 15827.0547, "encoder_q-layer.7": 16917.5, "encoder_q-layer.8": 18750.9746, "encoder_q-layer.9": 16892.0762, "epoch": 0.53, "inbatch_neg_score": 42.5243, "inbatch_pos_score": 43.0625, "learning_rate": 2.538888888888889e-05, "loss": 2.276, "norm_diff": 0.0393, "num_tokens_overlap": 5.5792, "num_tokens_union": 54.8671, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28908.1874, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6147, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7137, "sent_len_1": 66.5717, "sent_len_max_0": 18.7425, "sent_len_max_1": 190.7438, "stdk": 0.0454, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 54300 }, { "accuracy": 58.2031, "doc_norm": 6.6483, "encoder_q-embeddings": 17521.6797, "encoder_q-layer.0": 12464.1973, "encoder_q-layer.1": 12926.1982, "encoder_q-layer.10": 23118.9609, "encoder_q-layer.11": 39198.9844, "encoder_q-layer.2": 13665.8047, "encoder_q-layer.3": 13313.4775, "encoder_q-layer.4": 13661.3418, "encoder_q-layer.5": 13541.1279, "encoder_q-layer.6": 14244.5674, "encoder_q-layer.7": 15437.5811, "encoder_q-layer.8": 18686.8594, "encoder_q-layer.9": 17052.3789, "epoch": 0.53, "inbatch_neg_score": 42.5295, "inbatch_pos_score": 43.0312, "learning_rate": 2.5333333333333337e-05, "loss": 2.3506, "norm_diff": 0.0385, "num_tokens_overlap": 5.5863, "num_tokens_union": 54.9748, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26789.8041, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.6098, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7339, "sent_len_1": 66.6861, "sent_len_max_0": 18.8163, "sent_len_max_1": 189.9062, "stdk": 0.0434, "stdq": 0.038, "stdqueue_k": 0.0, "step": 54400 }, { "accuracy": 58.5938, "doc_norm": 6.6419, "encoder_q-embeddings": 18129.5234, "encoder_q-layer.0": 12438.2578, "encoder_q-layer.1": 12681.2334, "encoder_q-layer.10": 21474.2109, "encoder_q-layer.11": 42552.0195, "encoder_q-layer.2": 14064.5781, "encoder_q-layer.3": 13991.3223, "encoder_q-layer.4": 14689.5723, "encoder_q-layer.5": 14302.0234, "encoder_q-layer.6": 15814.0869, "encoder_q-layer.7": 16757.3945, "encoder_q-layer.8": 19177.0371, "encoder_q-layer.9": 17104.1914, "epoch": 0.53, "inbatch_neg_score": 42.3849, "inbatch_pos_score": 42.9375, "learning_rate": 2.527777777777778e-05, "loss": 2.2637, "norm_diff": 0.0386, "num_tokens_overlap": 5.5872, "num_tokens_union": 55.0704, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27889.2536, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.6033, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7251, "sent_len_1": 66.8677, "sent_len_max_0": 18.7662, "sent_len_max_1": 191.28, "stdk": 0.0439, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 54500 }, { "accuracy": 58.3984, "doc_norm": 6.6338, "encoder_q-embeddings": 16785.1973, "encoder_q-layer.0": 12111.2305, "encoder_q-layer.1": 12728.2275, "encoder_q-layer.10": 18912.4023, "encoder_q-layer.11": 36862.0352, "encoder_q-layer.2": 13478.2314, "encoder_q-layer.3": 13906.0156, "encoder_q-layer.4": 13791.2646, "encoder_q-layer.5": 13605.9766, "encoder_q-layer.6": 14327.7021, "encoder_q-layer.7": 15039.9033, "encoder_q-layer.8": 17282.8652, "encoder_q-layer.9": 15599.1426, "epoch": 0.53, "inbatch_neg_score": 42.3336, "inbatch_pos_score": 42.8438, "learning_rate": 2.5222222222222225e-05, "loss": 2.295, "norm_diff": 0.0339, "num_tokens_overlap": 5.5766, "num_tokens_union": 55.2256, "postclip_grad_norm": 1.0, "preclip_grad_norm": 25787.3784, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5999, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7164, "sent_len_1": 67.1379, "sent_len_max_0": 18.8975, "sent_len_max_1": 190.2312, "stdk": 0.0434, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 54600 }, { "accuracy": 56.543, "doc_norm": 6.6341, "encoder_q-embeddings": 17288.0332, "encoder_q-layer.0": 12533.0547, "encoder_q-layer.1": 13172.335, "encoder_q-layer.10": 21908.9648, "encoder_q-layer.11": 39519.0703, "encoder_q-layer.2": 14227.5488, "encoder_q-layer.3": 14376.3145, "encoder_q-layer.4": 14906.2178, "encoder_q-layer.5": 14296.9082, "encoder_q-layer.6": 15195.1973, "encoder_q-layer.7": 17372.5957, "encoder_q-layer.8": 17974.1094, "encoder_q-layer.9": 17351.0703, "epoch": 0.53, "inbatch_neg_score": 42.3002, "inbatch_pos_score": 42.8125, "learning_rate": 2.5166666666666667e-05, "loss": 2.2458, "norm_diff": 0.0394, "num_tokens_overlap": 5.5706, "num_tokens_union": 54.7514, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27527.0323, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5947, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7051, "sent_len_1": 66.3964, "sent_len_max_0": 18.7925, "sent_len_max_1": 191.0275, "stdk": 0.0441, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 54700 }, { "accuracy": 56.7383, "doc_norm": 6.6294, "encoder_q-embeddings": 17302.6973, "encoder_q-layer.0": 12024.2031, "encoder_q-layer.1": 13079.1787, "encoder_q-layer.10": 20528.1973, "encoder_q-layer.11": 42257.5898, "encoder_q-layer.2": 13985.0938, "encoder_q-layer.3": 13865.0693, "encoder_q-layer.4": 14311.8242, "encoder_q-layer.5": 13925.6025, "encoder_q-layer.6": 15159.1211, "encoder_q-layer.7": 15595.0107, "encoder_q-layer.8": 17551.3574, "encoder_q-layer.9": 16161.6738, "epoch": 0.54, "inbatch_neg_score": 42.2833, "inbatch_pos_score": 42.8125, "learning_rate": 2.5111111111111113e-05, "loss": 2.2636, "norm_diff": 0.0365, "num_tokens_overlap": 5.5833, "num_tokens_union": 55.0291, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27267.5296, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5929, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7422, "sent_len_1": 66.7752, "sent_len_max_0": 18.7625, "sent_len_max_1": 189.3, "stdk": 0.044, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 54800 }, { "accuracy": 59.082, "doc_norm": 6.632, "encoder_q-embeddings": 17950.2363, "encoder_q-layer.0": 12906.6787, "encoder_q-layer.1": 13148.8037, "encoder_q-layer.10": 23573.1504, "encoder_q-layer.11": 39699.9297, "encoder_q-layer.2": 13781.6514, "encoder_q-layer.3": 13841.8848, "encoder_q-layer.4": 14394.9814, "encoder_q-layer.5": 14302.7383, "encoder_q-layer.6": 14970.623, "encoder_q-layer.7": 16113.0928, "encoder_q-layer.8": 19442.793, "encoder_q-layer.9": 17285.5762, "epoch": 0.54, "inbatch_neg_score": 42.2804, "inbatch_pos_score": 42.8125, "learning_rate": 2.5055555555555555e-05, "loss": 2.2051, "norm_diff": 0.0365, "num_tokens_overlap": 5.5946, "num_tokens_union": 54.9957, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27785.2794, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5956, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7467, "sent_len_1": 66.7071, "sent_len_max_0": 18.7688, "sent_len_max_1": 189.2825, "stdk": 0.0444, "stdq": 0.0398, "stdqueue_k": 0.0, "step": 54900 }, { "accuracy": 57.7148, "doc_norm": 6.6343, "encoder_q-embeddings": 18720.2168, "encoder_q-layer.0": 13146.1631, "encoder_q-layer.1": 13363.2285, "encoder_q-layer.10": 20737.7324, "encoder_q-layer.11": 43910.1875, "encoder_q-layer.2": 14179.6865, "encoder_q-layer.3": 14005.5908, "encoder_q-layer.4": 14898.3408, "encoder_q-layer.5": 14574.9541, "encoder_q-layer.6": 15636.2646, "encoder_q-layer.7": 16262.7627, "encoder_q-layer.8": 18970.8809, "encoder_q-layer.9": 16981.9902, "epoch": 0.54, "inbatch_neg_score": 42.2316, "inbatch_pos_score": 42.7812, "learning_rate": 2.5e-05, "loss": 2.2967, "norm_diff": 0.0433, "num_tokens_overlap": 5.5711, "num_tokens_union": 54.9856, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28267.5907, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.591, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7231, "sent_len_1": 66.6571, "sent_len_max_0": 18.7838, "sent_len_max_1": 189.075, "stdk": 0.0456, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 55000 }, { "accuracy": 53.8086, "doc_norm": 6.6313, "encoder_q-embeddings": 18042.2559, "encoder_q-layer.0": 12819.0615, "encoder_q-layer.1": 13132.501, "encoder_q-layer.10": 21398.8066, "encoder_q-layer.11": 42979.2539, "encoder_q-layer.2": 14401.3438, "encoder_q-layer.3": 14389.1953, "encoder_q-layer.4": 14636.3262, "encoder_q-layer.5": 14529.3154, "encoder_q-layer.6": 15413.9951, "encoder_q-layer.7": 16189.1016, "encoder_q-layer.8": 17703.9688, "encoder_q-layer.9": 16760.8984, "epoch": 0.54, "inbatch_neg_score": 42.2437, "inbatch_pos_score": 42.75, "learning_rate": 2.4944444444444447e-05, "loss": 2.2453, "norm_diff": 0.0394, "num_tokens_overlap": 5.5773, "num_tokens_union": 55.1344, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28192.0918, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5918, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7178, "sent_len_1": 66.9986, "sent_len_max_0": 18.79, "sent_len_max_1": 189.5425, "stdk": 0.0448, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 55100 }, { "accuracy": 59.1797, "doc_norm": 6.626, "encoder_q-embeddings": 17167.875, "encoder_q-layer.0": 12343.9551, "encoder_q-layer.1": 12514.0928, "encoder_q-layer.10": 21528.3223, "encoder_q-layer.11": 38489.3438, "encoder_q-layer.2": 13939.4023, "encoder_q-layer.3": 13676.2881, "encoder_q-layer.4": 13889.5234, "encoder_q-layer.5": 13917.2988, "encoder_q-layer.6": 14979.5059, "encoder_q-layer.7": 16276.8359, "encoder_q-layer.8": 18572.6504, "encoder_q-layer.9": 17227.1035, "epoch": 0.54, "inbatch_neg_score": 42.2196, "inbatch_pos_score": 42.75, "learning_rate": 2.488888888888889e-05, "loss": 2.2715, "norm_diff": 0.0375, "num_tokens_overlap": 5.5791, "num_tokens_union": 55.0827, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26956.871, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5885, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7318, "sent_len_1": 66.9047, "sent_len_max_0": 18.8275, "sent_len_max_1": 191.3063, "stdk": 0.0443, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 55200 }, { "accuracy": 56.4453, "doc_norm": 6.6268, "encoder_q-embeddings": 17689.957, "encoder_q-layer.0": 12531.0752, "encoder_q-layer.1": 13324.8574, "encoder_q-layer.10": 24372.4941, "encoder_q-layer.11": 43297.2422, "encoder_q-layer.2": 14549.085, "encoder_q-layer.3": 14110.1797, "encoder_q-layer.4": 14638.1709, "encoder_q-layer.5": 14643.417, "encoder_q-layer.6": 16250.9619, "encoder_q-layer.7": 16567.2891, "encoder_q-layer.8": 17918.4863, "encoder_q-layer.9": 17252.2891, "epoch": 0.54, "inbatch_neg_score": 42.181, "inbatch_pos_score": 42.6875, "learning_rate": 2.4833333333333335e-05, "loss": 2.3173, "norm_diff": 0.0402, "num_tokens_overlap": 5.5642, "num_tokens_union": 54.9513, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28192.528, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5866, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7177, "sent_len_1": 66.6827, "sent_len_max_0": 18.7638, "sent_len_max_1": 191.3075, "stdk": 0.0458, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 55300 }, { "accuracy": 58.0078, "doc_norm": 6.6224, "encoder_q-embeddings": 16687.582, "encoder_q-layer.0": 12271.4863, "encoder_q-layer.1": 12757.2432, "encoder_q-layer.10": 19752.4336, "encoder_q-layer.11": 39135.6797, "encoder_q-layer.2": 13861.7832, "encoder_q-layer.3": 13614.8574, "encoder_q-layer.4": 13918.8428, "encoder_q-layer.5": 13612.8418, "encoder_q-layer.6": 14478.0498, "encoder_q-layer.7": 15212.165, "encoder_q-layer.8": 17614.9102, "encoder_q-layer.9": 15945.6045, "epoch": 0.54, "inbatch_neg_score": 42.1128, "inbatch_pos_score": 42.6562, "learning_rate": 2.477777777777778e-05, "loss": 2.2501, "norm_diff": 0.0409, "num_tokens_overlap": 5.5696, "num_tokens_union": 54.9531, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26258.3752, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5814, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6942, "sent_len_1": 66.7033, "sent_len_max_0": 18.8375, "sent_len_max_1": 189.2537, "stdk": 0.0454, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 55400 }, { "accuracy": 56.25, "doc_norm": 6.6166, "encoder_q-embeddings": 18574.2402, "encoder_q-layer.0": 12661.8086, "encoder_q-layer.1": 12799.8076, "encoder_q-layer.10": 21356.082, "encoder_q-layer.11": 38599.7695, "encoder_q-layer.2": 13932.2793, "encoder_q-layer.3": 13938.1699, "encoder_q-layer.4": 14505.4717, "encoder_q-layer.5": 14208.1494, "encoder_q-layer.6": 15549.9912, "encoder_q-layer.7": 15846.7568, "encoder_q-layer.8": 17455.4902, "encoder_q-layer.9": 16633.2051, "epoch": 0.54, "inbatch_neg_score": 42.0793, "inbatch_pos_score": 42.5938, "learning_rate": 2.4722222222222223e-05, "loss": 2.2373, "norm_diff": 0.0385, "num_tokens_overlap": 5.5799, "num_tokens_union": 54.9446, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27312.1626, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5781, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7142, "sent_len_1": 66.6424, "sent_len_max_0": 18.9088, "sent_len_max_1": 188.1675, "stdk": 0.0438, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 55500 }, { "accuracy": 55.1758, "doc_norm": 6.6124, "encoder_q-embeddings": 17120.4316, "encoder_q-layer.0": 12345.5479, "encoder_q-layer.1": 12627.0518, "encoder_q-layer.10": 20677.8398, "encoder_q-layer.11": 40405.25, "encoder_q-layer.2": 13635.2832, "encoder_q-layer.3": 13571.3242, "encoder_q-layer.4": 13766.7432, "encoder_q-layer.5": 14041.2607, "encoder_q-layer.6": 14672.1133, "encoder_q-layer.7": 16280.4346, "encoder_q-layer.8": 17843.0605, "encoder_q-layer.9": 16538.0996, "epoch": 0.54, "inbatch_neg_score": 42.094, "inbatch_pos_score": 42.5938, "learning_rate": 2.466666666666667e-05, "loss": 2.2637, "norm_diff": 0.0342, "num_tokens_overlap": 5.5659, "num_tokens_union": 54.9823, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26973.5875, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5782, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7153, "sent_len_1": 66.7513, "sent_len_max_0": 18.9413, "sent_len_max_1": 190.26, "stdk": 0.0435, "stdq": 0.0378, "stdqueue_k": 0.0, "step": 55600 }, { "accuracy": 57.4219, "doc_norm": 6.6133, "encoder_q-embeddings": 17518.5469, "encoder_q-layer.0": 12284.8701, "encoder_q-layer.1": 12781.4189, "encoder_q-layer.10": 21632.1133, "encoder_q-layer.11": 42824.1562, "encoder_q-layer.2": 13948.8877, "encoder_q-layer.3": 13880.5068, "encoder_q-layer.4": 14026.0596, "encoder_q-layer.5": 14339.2559, "encoder_q-layer.6": 14899.8379, "encoder_q-layer.7": 15200.9678, "encoder_q-layer.8": 17340.2598, "encoder_q-layer.9": 16449.043, "epoch": 0.54, "inbatch_neg_score": 42.0448, "inbatch_pos_score": 42.5312, "learning_rate": 2.461111111111111e-05, "loss": 2.2278, "norm_diff": 0.0376, "num_tokens_overlap": 5.5836, "num_tokens_union": 54.8817, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27594.6379, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5757, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7389, "sent_len_1": 66.5175, "sent_len_max_0": 18.7625, "sent_len_max_1": 187.8275, "stdk": 0.0436, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 55700 }, { "accuracy": 57.1289, "doc_norm": 6.6142, "encoder_q-embeddings": 17874.3789, "encoder_q-layer.0": 12802.373, "encoder_q-layer.1": 13165.5371, "encoder_q-layer.10": 20078.7539, "encoder_q-layer.11": 38501.8164, "encoder_q-layer.2": 14270.0234, "encoder_q-layer.3": 13711.7705, "encoder_q-layer.4": 14019.6689, "encoder_q-layer.5": 14143.0586, "encoder_q-layer.6": 15387.1133, "encoder_q-layer.7": 16705.5098, "encoder_q-layer.8": 18758.2793, "encoder_q-layer.9": 16644.8789, "epoch": 0.54, "inbatch_neg_score": 42.0053, "inbatch_pos_score": 42.5312, "learning_rate": 2.4555555555555557e-05, "loss": 2.3009, "norm_diff": 0.039, "num_tokens_overlap": 5.5792, "num_tokens_union": 55.0014, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27067.2575, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5751, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.734, "sent_len_1": 66.7456, "sent_len_max_0": 18.8938, "sent_len_max_1": 190.61, "stdk": 0.0442, "stdq": 0.039, "stdqueue_k": 0.0, "step": 55800 }, { "accuracy": 54.4922, "doc_norm": 6.61, "encoder_q-embeddings": 18361.4531, "encoder_q-layer.0": 12796.2783, "encoder_q-layer.1": 12816.7158, "encoder_q-layer.10": 18549.8066, "encoder_q-layer.11": 37206.2812, "encoder_q-layer.2": 14557.3643, "encoder_q-layer.3": 14398.0488, "encoder_q-layer.4": 14576.9443, "encoder_q-layer.5": 14282.4326, "encoder_q-layer.6": 15167.5566, "encoder_q-layer.7": 15819.6826, "encoder_q-layer.8": 16797.373, "encoder_q-layer.9": 16282.1562, "epoch": 0.55, "inbatch_neg_score": 41.9969, "inbatch_pos_score": 42.5, "learning_rate": 2.45e-05, "loss": 2.2323, "norm_diff": 0.0398, "num_tokens_overlap": 5.5797, "num_tokens_union": 54.9829, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26539.616, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5702, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7217, "sent_len_1": 66.8387, "sent_len_max_0": 18.8263, "sent_len_max_1": 191.8113, "stdk": 0.0433, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 55900 }, { "accuracy": 55.0781, "doc_norm": 6.6046, "encoder_q-embeddings": 17573.5469, "encoder_q-layer.0": 12572.1816, "encoder_q-layer.1": 12726.1924, "encoder_q-layer.10": 20304.6309, "encoder_q-layer.11": 38398.0547, "encoder_q-layer.2": 13769.1689, "encoder_q-layer.3": 13825.5225, "encoder_q-layer.4": 14039.3955, "encoder_q-layer.5": 13741.4785, "encoder_q-layer.6": 14677.7031, "encoder_q-layer.7": 15386.2793, "encoder_q-layer.8": 17500.4062, "encoder_q-layer.9": 16142.3643, "epoch": 0.55, "inbatch_neg_score": 41.9504, "inbatch_pos_score": 42.4688, "learning_rate": 2.4444444444444445e-05, "loss": 2.2329, "norm_diff": 0.0363, "num_tokens_overlap": 5.5773, "num_tokens_union": 55.0693, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26339.9081, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5683, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7236, "sent_len_1": 66.8353, "sent_len_max_0": 18.7188, "sent_len_max_1": 189.8388, "stdk": 0.0449, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 56000 }, { "accuracy": 56.8359, "doc_norm": 6.6093, "encoder_q-embeddings": 17570.5117, "encoder_q-layer.0": 12438.3164, "encoder_q-layer.1": 12738.0352, "encoder_q-layer.10": 20596.7305, "encoder_q-layer.11": 41301.2695, "encoder_q-layer.2": 14144.2334, "encoder_q-layer.3": 14186.1094, "encoder_q-layer.4": 14139.9414, "encoder_q-layer.5": 14401.2178, "encoder_q-layer.6": 15356.5186, "encoder_q-layer.7": 16542.7207, "encoder_q-layer.8": 17552.1465, "encoder_q-layer.9": 16823.6035, "epoch": 0.55, "inbatch_neg_score": 41.9508, "inbatch_pos_score": 42.4688, "learning_rate": 2.4388888888888887e-05, "loss": 2.2709, "norm_diff": 0.04, "num_tokens_overlap": 5.5653, "num_tokens_union": 55.0205, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27524.6004, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5693, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7212, "sent_len_1": 66.746, "sent_len_max_0": 18.8212, "sent_len_max_1": 189.9975, "stdk": 0.045, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 56100 }, { "accuracy": 57.2266, "doc_norm": 6.6048, "encoder_q-embeddings": 16443.7051, "encoder_q-layer.0": 11586.9414, "encoder_q-layer.1": 11902.5039, "encoder_q-layer.10": 19573.2578, "encoder_q-layer.11": 37157.1719, "encoder_q-layer.2": 13160.7324, "encoder_q-layer.3": 13281.4043, "encoder_q-layer.4": 13534.4385, "encoder_q-layer.5": 13504.5088, "encoder_q-layer.6": 14365.5957, "encoder_q-layer.7": 15397.0625, "encoder_q-layer.8": 16769.7773, "encoder_q-layer.9": 16509.3105, "epoch": 0.55, "inbatch_neg_score": 41.9468, "inbatch_pos_score": 42.4688, "learning_rate": 2.4333333333333336e-05, "loss": 2.2851, "norm_diff": 0.039, "num_tokens_overlap": 5.5693, "num_tokens_union": 54.9931, "postclip_grad_norm": 1.0, "preclip_grad_norm": 25431.8336, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5659, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7204, "sent_len_1": 66.7629, "sent_len_max_0": 18.8825, "sent_len_max_1": 188.3313, "stdk": 0.0446, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 56200 }, { "accuracy": 55.6641, "doc_norm": 6.6042, "encoder_q-embeddings": 18452.7559, "encoder_q-layer.0": 12633.6123, "encoder_q-layer.1": 13429.9287, "encoder_q-layer.10": 21344.7793, "encoder_q-layer.11": 42019.2031, "encoder_q-layer.2": 14428.2559, "encoder_q-layer.3": 14380.4229, "encoder_q-layer.4": 15021.0703, "encoder_q-layer.5": 14188.4199, "encoder_q-layer.6": 15456.6201, "encoder_q-layer.7": 16157.6875, "encoder_q-layer.8": 17499.4043, "encoder_q-layer.9": 16649.4648, "epoch": 0.55, "inbatch_neg_score": 41.9209, "inbatch_pos_score": 42.4375, "learning_rate": 2.427777777777778e-05, "loss": 2.2409, "norm_diff": 0.0388, "num_tokens_overlap": 5.5807, "num_tokens_union": 55.1206, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28055.472, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5655, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7322, "sent_len_1": 66.9756, "sent_len_max_0": 18.8438, "sent_len_max_1": 189.54, "stdk": 0.0438, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 56300 }, { "accuracy": 57.1289, "doc_norm": 6.6021, "encoder_q-embeddings": 17348.6875, "encoder_q-layer.0": 12434.3662, "encoder_q-layer.1": 12921.5557, "encoder_q-layer.10": 20565.1602, "encoder_q-layer.11": 38904.8945, "encoder_q-layer.2": 13929.3291, "encoder_q-layer.3": 14067.1738, "encoder_q-layer.4": 14398.0967, "encoder_q-layer.5": 13697.499, "encoder_q-layer.6": 14640.7373, "encoder_q-layer.7": 15727.1855, "encoder_q-layer.8": 17598.0215, "encoder_q-layer.9": 16288.5674, "epoch": 0.55, "inbatch_neg_score": 41.9027, "inbatch_pos_score": 42.4375, "learning_rate": 2.4222222222222224e-05, "loss": 2.2518, "norm_diff": 0.0356, "num_tokens_overlap": 5.5834, "num_tokens_union": 55.0059, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26696.0535, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5665, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7477, "sent_len_1": 66.7346, "sent_len_max_0": 18.89, "sent_len_max_1": 189.4988, "stdk": 0.0446, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 56400 }, { "accuracy": 60.8398, "doc_norm": 6.6045, "encoder_q-embeddings": 17888.0508, "encoder_q-layer.0": 12442.4609, "encoder_q-layer.1": 12777.8369, "encoder_q-layer.10": 20418.3848, "encoder_q-layer.11": 40387.3984, "encoder_q-layer.2": 14229.5898, "encoder_q-layer.3": 14163.623, "encoder_q-layer.4": 14676.542, "encoder_q-layer.5": 14650.5996, "encoder_q-layer.6": 15155.0684, "encoder_q-layer.7": 16396.0703, "encoder_q-layer.8": 19060.2012, "encoder_q-layer.9": 16941.8457, "epoch": 0.55, "inbatch_neg_score": 41.8651, "inbatch_pos_score": 42.4062, "learning_rate": 2.4166666666666667e-05, "loss": 2.2, "norm_diff": 0.0398, "num_tokens_overlap": 5.5949, "num_tokens_union": 55.0684, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27692.9912, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5647, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7272, "sent_len_1": 66.9114, "sent_len_max_0": 18.8188, "sent_len_max_1": 188.29, "stdk": 0.0454, "stdq": 0.04, "stdqueue_k": 0.0, "step": 56500 }, { "accuracy": 56.8359, "doc_norm": 6.6006, "encoder_q-embeddings": 18010.8516, "encoder_q-layer.0": 12358.7158, "encoder_q-layer.1": 12872.9316, "encoder_q-layer.10": 22897.2969, "encoder_q-layer.11": 44654.9414, "encoder_q-layer.2": 13987.9971, "encoder_q-layer.3": 14200.5508, "encoder_q-layer.4": 14922.3184, "encoder_q-layer.5": 15130.791, "encoder_q-layer.6": 16449.8926, "encoder_q-layer.7": 16897.9395, "encoder_q-layer.8": 20718.2871, "encoder_q-layer.9": 17924.6797, "epoch": 0.55, "inbatch_neg_score": 41.8784, "inbatch_pos_score": 42.375, "learning_rate": 2.4111111111111113e-05, "loss": 2.236, "norm_diff": 0.0378, "num_tokens_overlap": 5.6059, "num_tokens_union": 55.1181, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29191.1265, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5628, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7426, "sent_len_1": 66.9802, "sent_len_max_0": 18.78, "sent_len_max_1": 190.86, "stdk": 0.0434, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 56600 }, { "accuracy": 56.3477, "doc_norm": 6.6018, "encoder_q-embeddings": 18449.3281, "encoder_q-layer.0": 12868.0391, "encoder_q-layer.1": 13614.3809, "encoder_q-layer.10": 23418.7617, "encoder_q-layer.11": 42398.4453, "encoder_q-layer.2": 14359.3701, "encoder_q-layer.3": 14607.5137, "encoder_q-layer.4": 14998.5156, "encoder_q-layer.5": 14970.624, "encoder_q-layer.6": 15999.4561, "encoder_q-layer.7": 17094.834, "encoder_q-layer.8": 20550.3633, "encoder_q-layer.9": 18661.1328, "epoch": 0.55, "inbatch_neg_score": 41.8743, "inbatch_pos_score": 42.375, "learning_rate": 2.4055555555555555e-05, "loss": 2.2596, "norm_diff": 0.0367, "num_tokens_overlap": 5.5787, "num_tokens_union": 55.0602, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29198.0123, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.565, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7327, "sent_len_1": 66.8753, "sent_len_max_0": 18.9013, "sent_len_max_1": 190.3075, "stdk": 0.0443, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 56700 }, { "accuracy": 54.7852, "doc_norm": 6.6028, "encoder_q-embeddings": 18581.5762, "encoder_q-layer.0": 12623.9814, "encoder_q-layer.1": 13192.625, "encoder_q-layer.10": 20887.6855, "encoder_q-layer.11": 36718.1094, "encoder_q-layer.2": 14420.8115, "encoder_q-layer.3": 14575.5869, "encoder_q-layer.4": 15332.3203, "encoder_q-layer.5": 14657.9727, "encoder_q-layer.6": 15639.2773, "encoder_q-layer.7": 16185.8418, "encoder_q-layer.8": 18873.1719, "encoder_q-layer.9": 16624.3984, "epoch": 0.55, "inbatch_neg_score": 41.8578, "inbatch_pos_score": 42.375, "learning_rate": 2.4e-05, "loss": 2.2927, "norm_diff": 0.0411, "num_tokens_overlap": 5.5886, "num_tokens_union": 55.0004, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27182.7811, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5617, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7239, "sent_len_1": 66.7475, "sent_len_max_0": 18.9213, "sent_len_max_1": 188.7637, "stdk": 0.0439, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 56800 }, { "accuracy": 55.1758, "doc_norm": 6.6067, "encoder_q-embeddings": 18644.6973, "encoder_q-layer.0": 13551.7422, "encoder_q-layer.1": 14161.0029, "encoder_q-layer.10": 24988.875, "encoder_q-layer.11": 47669.7031, "encoder_q-layer.2": 15331.1367, "encoder_q-layer.3": 15247.8291, "encoder_q-layer.4": 15342.8691, "encoder_q-layer.5": 15402.3066, "encoder_q-layer.6": 16350.1787, "encoder_q-layer.7": 16993.668, "encoder_q-layer.8": 20326.4297, "encoder_q-layer.9": 19243.582, "epoch": 0.56, "inbatch_neg_score": 41.8968, "inbatch_pos_score": 42.4062, "learning_rate": 2.3944444444444443e-05, "loss": 2.2372, "norm_diff": 0.0402, "num_tokens_overlap": 5.5751, "num_tokens_union": 55.0327, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30322.8394, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5664, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7112, "sent_len_1": 66.8362, "sent_len_max_0": 18.8175, "sent_len_max_1": 191.3375, "stdk": 0.044, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 56900 }, { "accuracy": 58.7891, "doc_norm": 6.603, "encoder_q-embeddings": 16762.0156, "encoder_q-layer.0": 11814.2881, "encoder_q-layer.1": 12438.71, "encoder_q-layer.10": 23631.2266, "encoder_q-layer.11": 40884.707, "encoder_q-layer.2": 13820.7646, "encoder_q-layer.3": 13851.1436, "encoder_q-layer.4": 14188.1758, "encoder_q-layer.5": 13955.5947, "encoder_q-layer.6": 15200.209, "encoder_q-layer.7": 15686.2148, "encoder_q-layer.8": 17735.7734, "encoder_q-layer.9": 16704.1836, "epoch": 0.56, "inbatch_neg_score": 41.9007, "inbatch_pos_score": 42.4062, "learning_rate": 2.3888888888888892e-05, "loss": 2.3211, "norm_diff": 0.0398, "num_tokens_overlap": 5.5687, "num_tokens_union": 55.0163, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27360.8315, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5633, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6968, "sent_len_1": 66.8539, "sent_len_max_0": 18.765, "sent_len_max_1": 188.94, "stdk": 0.0445, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 57000 }, { "accuracy": 56.25, "doc_norm": 6.596, "encoder_q-embeddings": 18476.4941, "encoder_q-layer.0": 12421.1465, "encoder_q-layer.1": 12947.5557, "encoder_q-layer.10": 23710.3359, "encoder_q-layer.11": 43706.0742, "encoder_q-layer.2": 13679.4307, "encoder_q-layer.3": 13831.5059, "encoder_q-layer.4": 14374.3779, "encoder_q-layer.5": 14227.208, "encoder_q-layer.6": 15053.1768, "encoder_q-layer.7": 16004.1836, "encoder_q-layer.8": 19654.2363, "encoder_q-layer.9": 17968.4004, "epoch": 0.56, "inbatch_neg_score": 41.8323, "inbatch_pos_score": 42.3438, "learning_rate": 2.3833333333333334e-05, "loss": 2.2286, "norm_diff": 0.0375, "num_tokens_overlap": 5.5903, "num_tokens_union": 55.1926, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28415.5416, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5585, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7396, "sent_len_1": 67.0729, "sent_len_max_0": 18.7787, "sent_len_max_1": 191.0012, "stdk": 0.0431, "stdq": 0.0378, "stdqueue_k": 0.0, "step": 57100 }, { "accuracy": 57.1289, "doc_norm": 6.5987, "encoder_q-embeddings": 18063.8301, "encoder_q-layer.0": 12762.3379, "encoder_q-layer.1": 13222.5332, "encoder_q-layer.10": 21078.0996, "encoder_q-layer.11": 40963.8047, "encoder_q-layer.2": 14569.1016, "encoder_q-layer.3": 14249.5752, "encoder_q-layer.4": 14507.4834, "encoder_q-layer.5": 14266.2119, "encoder_q-layer.6": 15551.042, "encoder_q-layer.7": 16463.3418, "encoder_q-layer.8": 17686.4102, "encoder_q-layer.9": 17173.1367, "epoch": 0.56, "inbatch_neg_score": 41.7781, "inbatch_pos_score": 42.3125, "learning_rate": 2.377777777777778e-05, "loss": 2.2865, "norm_diff": 0.042, "num_tokens_overlap": 5.5871, "num_tokens_union": 55.0342, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27841.6057, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5567, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7521, "sent_len_1": 66.8166, "sent_len_max_0": 18.9213, "sent_len_max_1": 191.2038, "stdk": 0.0451, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 57200 }, { "accuracy": 55.8594, "doc_norm": 6.5961, "encoder_q-embeddings": 17922.752, "encoder_q-layer.0": 12425.6885, "encoder_q-layer.1": 12844.9688, "encoder_q-layer.10": 21023.2598, "encoder_q-layer.11": 39779.0273, "encoder_q-layer.2": 13927.166, "encoder_q-layer.3": 13820.752, "encoder_q-layer.4": 14150.999, "encoder_q-layer.5": 13818.1934, "encoder_q-layer.6": 14479.6055, "encoder_q-layer.7": 16334.7939, "encoder_q-layer.8": 17491.834, "encoder_q-layer.9": 16690.8359, "epoch": 0.56, "inbatch_neg_score": 41.8122, "inbatch_pos_score": 42.3125, "learning_rate": 2.3722222222222222e-05, "loss": 2.2181, "norm_diff": 0.0364, "num_tokens_overlap": 5.588, "num_tokens_union": 55.0406, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26907.0412, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5597, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7149, "sent_len_1": 66.823, "sent_len_max_0": 18.8612, "sent_len_max_1": 188.4938, "stdk": 0.0437, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 57300 }, { "accuracy": 60.2539, "doc_norm": 6.6009, "encoder_q-embeddings": 18022.2461, "encoder_q-layer.0": 12818.8379, "encoder_q-layer.1": 13586.9814, "encoder_q-layer.10": 20011.7344, "encoder_q-layer.11": 37907.7383, "encoder_q-layer.2": 14384.2842, "encoder_q-layer.3": 14716.1562, "encoder_q-layer.4": 15185.1787, "encoder_q-layer.5": 14458.5293, "encoder_q-layer.6": 15391.5703, "encoder_q-layer.7": 17232.8203, "encoder_q-layer.8": 18277.8691, "encoder_q-layer.9": 16400.8906, "epoch": 0.56, "inbatch_neg_score": 41.7972, "inbatch_pos_score": 42.3438, "learning_rate": 2.3666666666666668e-05, "loss": 2.2643, "norm_diff": 0.0408, "num_tokens_overlap": 5.5674, "num_tokens_union": 54.8398, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27169.5895, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5601, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6922, "sent_len_1": 66.5403, "sent_len_max_0": 18.9675, "sent_len_max_1": 189.5387, "stdk": 0.0437, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 57400 }, { "accuracy": 55.2734, "doc_norm": 6.5954, "encoder_q-embeddings": 17818.3945, "encoder_q-layer.0": 12502.582, "encoder_q-layer.1": 12698.1904, "encoder_q-layer.10": 20549.6973, "encoder_q-layer.11": 37222.1914, "encoder_q-layer.2": 13917.6777, "encoder_q-layer.3": 14101.7939, "encoder_q-layer.4": 14827.8965, "encoder_q-layer.5": 14670.0918, "encoder_q-layer.6": 14679.8398, "encoder_q-layer.7": 15594.2793, "encoder_q-layer.8": 16925.9609, "encoder_q-layer.9": 16235.9268, "epoch": 0.56, "inbatch_neg_score": 41.8041, "inbatch_pos_score": 42.3125, "learning_rate": 2.361111111111111e-05, "loss": 2.247, "norm_diff": 0.0387, "num_tokens_overlap": 5.5703, "num_tokens_union": 54.945, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26509.9701, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5567, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.694, "sent_len_1": 66.7138, "sent_len_max_0": 18.8238, "sent_len_max_1": 190.3875, "stdk": 0.0442, "stdq": 0.039, "stdqueue_k": 0.0, "step": 57500 }, { "accuracy": 54.4922, "doc_norm": 6.5966, "encoder_q-embeddings": 17864.832, "encoder_q-layer.0": 12426.3945, "encoder_q-layer.1": 12609.0801, "encoder_q-layer.10": 22452.9941, "encoder_q-layer.11": 43167.25, "encoder_q-layer.2": 13656.6348, "encoder_q-layer.3": 13755.7061, "encoder_q-layer.4": 14647.1543, "encoder_q-layer.5": 14308.9141, "encoder_q-layer.6": 15225.1611, "encoder_q-layer.7": 16039.084, "encoder_q-layer.8": 18772.0547, "encoder_q-layer.9": 17617.8691, "epoch": 0.56, "inbatch_neg_score": 41.7998, "inbatch_pos_score": 42.3125, "learning_rate": 2.3555555555555556e-05, "loss": 2.2985, "norm_diff": 0.0406, "num_tokens_overlap": 5.5744, "num_tokens_union": 54.9553, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28390.9955, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.556, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7238, "sent_len_1": 66.6625, "sent_len_max_0": 18.8225, "sent_len_max_1": 188.7837, "stdk": 0.0444, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 57600 }, { "accuracy": 55.8594, "doc_norm": 6.5937, "encoder_q-embeddings": 18425.7031, "encoder_q-layer.0": 12744.8721, "encoder_q-layer.1": 13221.917, "encoder_q-layer.10": 23006.5312, "encoder_q-layer.11": 42845.2266, "encoder_q-layer.2": 14267.0439, "encoder_q-layer.3": 14395.0352, "encoder_q-layer.4": 15341.2314, "encoder_q-layer.5": 15079.1064, "encoder_q-layer.6": 15720.3184, "encoder_q-layer.7": 16620.1973, "encoder_q-layer.8": 18194.6914, "encoder_q-layer.9": 16926.0566, "epoch": 0.56, "inbatch_neg_score": 41.7631, "inbatch_pos_score": 42.2812, "learning_rate": 2.35e-05, "loss": 2.2162, "norm_diff": 0.0385, "num_tokens_overlap": 5.5791, "num_tokens_union": 54.8965, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28472.804, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5552, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7275, "sent_len_1": 66.611, "sent_len_max_0": 18.8938, "sent_len_max_1": 189.6375, "stdk": 0.0455, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 57700 }, { "accuracy": 56.8359, "doc_norm": 6.594, "encoder_q-embeddings": 16866.6289, "encoder_q-layer.0": 12063.0781, "encoder_q-layer.1": 12614.1787, "encoder_q-layer.10": 21485.3613, "encoder_q-layer.11": 40909.0898, "encoder_q-layer.2": 13850.0762, "encoder_q-layer.3": 13657.8555, "encoder_q-layer.4": 14295.8242, "encoder_q-layer.5": 14207.5078, "encoder_q-layer.6": 15209.6152, "encoder_q-layer.7": 16039.2988, "encoder_q-layer.8": 18527.4434, "encoder_q-layer.9": 16836.3945, "epoch": 0.56, "inbatch_neg_score": 41.7757, "inbatch_pos_score": 42.2812, "learning_rate": 2.3444444444444448e-05, "loss": 2.2464, "norm_diff": 0.0385, "num_tokens_overlap": 5.5836, "num_tokens_union": 55.0541, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27134.184, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5555, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7224, "sent_len_1": 66.8935, "sent_len_max_0": 18.8663, "sent_len_max_1": 190.3088, "stdk": 0.0443, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 57800 }, { "accuracy": 55.957, "doc_norm": 6.5933, "encoder_q-embeddings": 17997.4473, "encoder_q-layer.0": 12630.252, "encoder_q-layer.1": 12962.3438, "encoder_q-layer.10": 21390.9688, "encoder_q-layer.11": 46649.8633, "encoder_q-layer.2": 14665.4814, "encoder_q-layer.3": 14380.375, "encoder_q-layer.4": 15149.2764, "encoder_q-layer.5": 15094.6846, "encoder_q-layer.6": 15578.4619, "encoder_q-layer.7": 16513.4531, "encoder_q-layer.8": 18823.4375, "encoder_q-layer.9": 17162.3574, "epoch": 0.57, "inbatch_neg_score": 41.7681, "inbatch_pos_score": 42.2812, "learning_rate": 2.338888888888889e-05, "loss": 2.232, "norm_diff": 0.0373, "num_tokens_overlap": 5.5803, "num_tokens_union": 55.0312, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29068.9924, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.556, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7165, "sent_len_1": 66.793, "sent_len_max_0": 18.6962, "sent_len_max_1": 189.21, "stdk": 0.0451, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 57900 }, { "accuracy": 58.3008, "doc_norm": 6.5923, "encoder_q-embeddings": 16305.3135, "encoder_q-layer.0": 11811.9072, "encoder_q-layer.1": 12064.5049, "encoder_q-layer.10": 19843.4219, "encoder_q-layer.11": 37265.9023, "encoder_q-layer.2": 13111.3779, "encoder_q-layer.3": 13357.0938, "encoder_q-layer.4": 13844.4902, "encoder_q-layer.5": 13977.2852, "encoder_q-layer.6": 14843.375, "encoder_q-layer.7": 15685.3203, "encoder_q-layer.8": 17690.084, "encoder_q-layer.9": 16124.8936, "epoch": 0.57, "inbatch_neg_score": 41.789, "inbatch_pos_score": 42.3125, "learning_rate": 2.3333333333333336e-05, "loss": 2.252, "norm_diff": 0.0346, "num_tokens_overlap": 5.6004, "num_tokens_union": 55.1777, "postclip_grad_norm": 1.0, "preclip_grad_norm": 25669.8753, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5577, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7204, "sent_len_1": 67.0765, "sent_len_max_0": 18.8412, "sent_len_max_1": 190.3775, "stdk": 0.0434, "stdq": 0.039, "stdqueue_k": 0.0, "step": 58000 }, { "accuracy": 56.8359, "doc_norm": 6.5931, "encoder_q-embeddings": 18034.6719, "encoder_q-layer.0": 12698.9336, "encoder_q-layer.1": 12928.2891, "encoder_q-layer.10": 19172.873, "encoder_q-layer.11": 38007.9219, "encoder_q-layer.2": 14124.1865, "encoder_q-layer.3": 14006.0537, "encoder_q-layer.4": 14529.3818, "encoder_q-layer.5": 14096.7617, "encoder_q-layer.6": 14929.2451, "encoder_q-layer.7": 15250.4658, "encoder_q-layer.8": 16879.0215, "encoder_q-layer.9": 15814.6191, "epoch": 0.57, "inbatch_neg_score": 41.7727, "inbatch_pos_score": 42.3125, "learning_rate": 2.3277777777777778e-05, "loss": 2.2389, "norm_diff": 0.0389, "num_tokens_overlap": 5.5899, "num_tokens_union": 54.9936, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26777.6299, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5542, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7305, "sent_len_1": 66.7403, "sent_len_max_0": 18.9037, "sent_len_max_1": 188.3862, "stdk": 0.0461, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 58100 }, { "accuracy": 55.1758, "doc_norm": 6.5914, "encoder_q-embeddings": 18393.3438, "encoder_q-layer.0": 12778.5977, "encoder_q-layer.1": 13254.3896, "encoder_q-layer.10": 21983.8242, "encoder_q-layer.11": 42867.707, "encoder_q-layer.2": 13910.9072, "encoder_q-layer.3": 14329.4248, "encoder_q-layer.4": 14741.9434, "encoder_q-layer.5": 14444.6318, "encoder_q-layer.6": 15427.0596, "encoder_q-layer.7": 16602.7461, "encoder_q-layer.8": 18898.7344, "encoder_q-layer.9": 17772.8613, "epoch": 0.57, "inbatch_neg_score": 41.7352, "inbatch_pos_score": 42.25, "learning_rate": 2.3222222222222224e-05, "loss": 2.2658, "norm_diff": 0.0388, "num_tokens_overlap": 5.5529, "num_tokens_union": 54.924, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28553.3856, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5526, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6647, "sent_len_1": 66.6706, "sent_len_max_0": 18.9013, "sent_len_max_1": 188.6863, "stdk": 0.0438, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 58200 }, { "accuracy": 57.2266, "doc_norm": 6.5961, "encoder_q-embeddings": 18153.1895, "encoder_q-layer.0": 12569.4141, "encoder_q-layer.1": 13291.2666, "encoder_q-layer.10": 20713.6777, "encoder_q-layer.11": 40177.4805, "encoder_q-layer.2": 14355.3154, "encoder_q-layer.3": 14080.4033, "encoder_q-layer.4": 14339.248, "encoder_q-layer.5": 14103.6504, "encoder_q-layer.6": 14691.6875, "encoder_q-layer.7": 15832.1406, "encoder_q-layer.8": 18048.2891, "encoder_q-layer.9": 16444.7656, "epoch": 0.57, "inbatch_neg_score": 41.7159, "inbatch_pos_score": 42.25, "learning_rate": 2.3166666666666666e-05, "loss": 2.2518, "norm_diff": 0.043, "num_tokens_overlap": 5.5741, "num_tokens_union": 54.9066, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27128.5967, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5531, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7201, "sent_len_1": 66.6043, "sent_len_max_0": 18.8538, "sent_len_max_1": 189.9275, "stdk": 0.0447, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 58300 }, { "accuracy": 58.3984, "doc_norm": 6.5967, "encoder_q-embeddings": 18199.3594, "encoder_q-layer.0": 12665.2822, "encoder_q-layer.1": 13323.2207, "encoder_q-layer.10": 19492.002, "encoder_q-layer.11": 37547.5977, "encoder_q-layer.2": 14376.6543, "encoder_q-layer.3": 14106.0479, "encoder_q-layer.4": 14280.3691, "encoder_q-layer.5": 14172.6582, "encoder_q-layer.6": 15599.7773, "encoder_q-layer.7": 15974.5342, "encoder_q-layer.8": 17000.4395, "encoder_q-layer.9": 16257.3135, "epoch": 0.57, "inbatch_neg_score": 41.7507, "inbatch_pos_score": 42.2812, "learning_rate": 2.3111111111111112e-05, "loss": 2.266, "norm_diff": 0.0409, "num_tokens_overlap": 5.5854, "num_tokens_union": 54.8659, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26919.0709, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5558, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7327, "sent_len_1": 66.5125, "sent_len_max_0": 18.8025, "sent_len_max_1": 189.5337, "stdk": 0.045, "stdq": 0.04, "stdqueue_k": 0.0, "step": 58400 }, { "accuracy": 57.4219, "doc_norm": 6.5913, "encoder_q-embeddings": 17112.0547, "encoder_q-layer.0": 12104.9785, "encoder_q-layer.1": 12610.5938, "encoder_q-layer.10": 20978.75, "encoder_q-layer.11": 39277.957, "encoder_q-layer.2": 13601.6377, "encoder_q-layer.3": 13878.7158, "encoder_q-layer.4": 14137.625, "encoder_q-layer.5": 13800.3711, "encoder_q-layer.6": 15048.8428, "encoder_q-layer.7": 15891.833, "encoder_q-layer.8": 17344.7637, "encoder_q-layer.9": 16845.1445, "epoch": 0.57, "inbatch_neg_score": 41.7409, "inbatch_pos_score": 42.25, "learning_rate": 2.3055555555555558e-05, "loss": 2.2665, "norm_diff": 0.0406, "num_tokens_overlap": 5.5697, "num_tokens_union": 54.991, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26502.4223, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5507, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7158, "sent_len_1": 66.7121, "sent_len_max_0": 18.8775, "sent_len_max_1": 188.9038, "stdk": 0.0448, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 58500 }, { "accuracy": 55.5664, "doc_norm": 6.5852, "encoder_q-embeddings": 16918.9707, "encoder_q-layer.0": 11296.1865, "encoder_q-layer.1": 11909.4102, "encoder_q-layer.10": 19416.9648, "encoder_q-layer.11": 37920.5938, "encoder_q-layer.2": 12894.9941, "encoder_q-layer.3": 13341.7305, "encoder_q-layer.4": 13576.0459, "encoder_q-layer.5": 14098.8623, "encoder_q-layer.6": 14930.6523, "encoder_q-layer.7": 15961.3496, "encoder_q-layer.8": 17098.3516, "encoder_q-layer.9": 15952.4707, "epoch": 0.57, "inbatch_neg_score": 41.7162, "inbatch_pos_score": 42.2188, "learning_rate": 2.3000000000000003e-05, "loss": 2.2029, "norm_diff": 0.0358, "num_tokens_overlap": 5.5802, "num_tokens_union": 54.908, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26393.3606, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5494, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7181, "sent_len_1": 66.6048, "sent_len_max_0": 18.8137, "sent_len_max_1": 188.1075, "stdk": 0.0427, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 58600 }, { "accuracy": 56.3477, "doc_norm": 6.5808, "encoder_q-embeddings": 17784.1836, "encoder_q-layer.0": 12415.4482, "encoder_q-layer.1": 12758.5117, "encoder_q-layer.10": 20388.6777, "encoder_q-layer.11": 40856.2422, "encoder_q-layer.2": 13561.9785, "encoder_q-layer.3": 13366.457, "encoder_q-layer.4": 14024.4473, "encoder_q-layer.5": 13753.5938, "encoder_q-layer.6": 14925.6992, "encoder_q-layer.7": 15806.4082, "encoder_q-layer.8": 18346.207, "encoder_q-layer.9": 16334.9131, "epoch": 0.57, "inbatch_neg_score": 41.6889, "inbatch_pos_score": 42.1875, "learning_rate": 2.2944444444444446e-05, "loss": 2.2667, "norm_diff": 0.0352, "num_tokens_overlap": 5.584, "num_tokens_union": 55.0054, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26910.5086, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5457, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7691, "sent_len_1": 66.6594, "sent_len_max_0": 18.8313, "sent_len_max_1": 189.305, "stdk": 0.0431, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 58700 }, { "accuracy": 60.3516, "doc_norm": 6.5881, "encoder_q-embeddings": 17021.3906, "encoder_q-layer.0": 12060.2998, "encoder_q-layer.1": 12932.4307, "encoder_q-layer.10": 21957.3418, "encoder_q-layer.11": 45791.1016, "encoder_q-layer.2": 13976.3975, "encoder_q-layer.3": 13979.4404, "encoder_q-layer.4": 14396.3398, "encoder_q-layer.5": 14570.8633, "encoder_q-layer.6": 15056.0127, "encoder_q-layer.7": 15992.6992, "encoder_q-layer.8": 17711.0312, "encoder_q-layer.9": 16892.6855, "epoch": 0.57, "inbatch_neg_score": 41.6365, "inbatch_pos_score": 42.1562, "learning_rate": 2.288888888888889e-05, "loss": 2.2564, "norm_diff": 0.043, "num_tokens_overlap": 5.5702, "num_tokens_union": 55.0575, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28192.1203, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5451, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7117, "sent_len_1": 66.8313, "sent_len_max_0": 18.84, "sent_len_max_1": 190.2113, "stdk": 0.045, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 58800 }, { "accuracy": 59.375, "doc_norm": 6.5825, "encoder_q-embeddings": 18539.4551, "encoder_q-layer.0": 12686.835, "encoder_q-layer.1": 12900.2812, "encoder_q-layer.10": 22458.4062, "encoder_q-layer.11": 39458.6406, "encoder_q-layer.2": 14585.3438, "encoder_q-layer.3": 14777.126, "encoder_q-layer.4": 14888.3135, "encoder_q-layer.5": 14330.9004, "encoder_q-layer.6": 15220.1963, "encoder_q-layer.7": 16236.0186, "encoder_q-layer.8": 17922.1191, "encoder_q-layer.9": 16411.2852, "epoch": 0.58, "inbatch_neg_score": 41.6207, "inbatch_pos_score": 42.125, "learning_rate": 2.2833333333333334e-05, "loss": 2.2493, "norm_diff": 0.0389, "num_tokens_overlap": 5.5832, "num_tokens_union": 55.0801, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27619.3517, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5436, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7394, "sent_len_1": 66.818, "sent_len_max_0": 18.83, "sent_len_max_1": 189.105, "stdk": 0.044, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 58900 }, { "accuracy": 59.5703, "doc_norm": 6.5849, "encoder_q-embeddings": 17449.123, "encoder_q-layer.0": 12005.1943, "encoder_q-layer.1": 12664.6748, "encoder_q-layer.10": 19831.5117, "encoder_q-layer.11": 37149.5078, "encoder_q-layer.2": 13635.3828, "encoder_q-layer.3": 14172.7148, "encoder_q-layer.4": 14138.2832, "encoder_q-layer.5": 13910.6777, "encoder_q-layer.6": 14530.3564, "encoder_q-layer.7": 15910.54, "encoder_q-layer.8": 17508.1758, "encoder_q-layer.9": 16105.3691, "epoch": 0.58, "inbatch_neg_score": 41.613, "inbatch_pos_score": 42.125, "learning_rate": 2.277777777777778e-05, "loss": 2.2507, "norm_diff": 0.0424, "num_tokens_overlap": 5.5751, "num_tokens_union": 54.9634, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26244.8523, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5425, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7004, "sent_len_1": 66.729, "sent_len_max_0": 18.815, "sent_len_max_1": 189.525, "stdk": 0.0442, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 59000 }, { "accuracy": 57.5195, "doc_norm": 6.5781, "encoder_q-embeddings": 16635.1309, "encoder_q-layer.0": 11720.6992, "encoder_q-layer.1": 11953.1025, "encoder_q-layer.10": 19533.8281, "encoder_q-layer.11": 36961.3945, "encoder_q-layer.2": 13248.1621, "encoder_q-layer.3": 13562.9717, "encoder_q-layer.4": 13792.6553, "encoder_q-layer.5": 13562.7012, "encoder_q-layer.6": 14252.4424, "encoder_q-layer.7": 14418.6914, "encoder_q-layer.8": 16621.3633, "encoder_q-layer.9": 15949.1875, "epoch": 0.58, "inbatch_neg_score": 41.5533, "inbatch_pos_score": 42.0625, "learning_rate": 2.2722222222222222e-05, "loss": 2.2547, "norm_diff": 0.041, "num_tokens_overlap": 5.5683, "num_tokens_union": 54.9646, "postclip_grad_norm": 1.0, "preclip_grad_norm": 25557.8249, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5371, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7022, "sent_len_1": 66.6978, "sent_len_max_0": 18.8163, "sent_len_max_1": 188.8913, "stdk": 0.0455, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 59100 }, { "accuracy": 58.1055, "doc_norm": 6.5802, "encoder_q-embeddings": 16883.9434, "encoder_q-layer.0": 12113.0771, "encoder_q-layer.1": 12297.5947, "encoder_q-layer.10": 18876.0723, "encoder_q-layer.11": 38336.0234, "encoder_q-layer.2": 13469.6221, "encoder_q-layer.3": 13637.6504, "encoder_q-layer.4": 14145.8467, "encoder_q-layer.5": 13868.4883, "encoder_q-layer.6": 14707.0781, "encoder_q-layer.7": 15764.3633, "encoder_q-layer.8": 17180.8066, "encoder_q-layer.9": 16227.292, "epoch": 0.58, "inbatch_neg_score": 41.5616, "inbatch_pos_score": 42.0938, "learning_rate": 2.2666666666666668e-05, "loss": 2.25, "norm_diff": 0.0418, "num_tokens_overlap": 5.5664, "num_tokens_union": 54.9637, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26212.6268, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5383, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7218, "sent_len_1": 66.6846, "sent_len_max_0": 18.7387, "sent_len_max_1": 190.415, "stdk": 0.0442, "stdq": 0.038, "stdqueue_k": 0.0, "step": 59200 }, { "accuracy": 57.9102, "doc_norm": 6.581, "encoder_q-embeddings": 17881.2441, "encoder_q-layer.0": 12765.3926, "encoder_q-layer.1": 13471.6709, "encoder_q-layer.10": 21681.8652, "encoder_q-layer.11": 41549.0312, "encoder_q-layer.2": 14574.0225, "encoder_q-layer.3": 14615.5449, "encoder_q-layer.4": 14927.1025, "encoder_q-layer.5": 14460.1016, "encoder_q-layer.6": 15586.3828, "encoder_q-layer.7": 16966.8711, "encoder_q-layer.8": 18647.0977, "encoder_q-layer.9": 17326.2109, "epoch": 0.58, "inbatch_neg_score": 41.5132, "inbatch_pos_score": 42.0312, "learning_rate": 2.2611111111111113e-05, "loss": 2.2362, "norm_diff": 0.0451, "num_tokens_overlap": 5.5826, "num_tokens_union": 54.9323, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28034.6845, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5359, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7173, "sent_len_1": 66.6979, "sent_len_max_0": 18.9062, "sent_len_max_1": 190.7738, "stdk": 0.0463, "stdq": 0.0399, "stdqueue_k": 0.0, "step": 59300 }, { "accuracy": 56.6406, "doc_norm": 6.5777, "encoder_q-embeddings": 18367.5547, "encoder_q-layer.0": 13320.917, "encoder_q-layer.1": 13878.251, "encoder_q-layer.10": 21128.7695, "encoder_q-layer.11": 40601.8203, "encoder_q-layer.2": 14914.2002, "encoder_q-layer.3": 14540.9004, "encoder_q-layer.4": 15102.8584, "encoder_q-layer.5": 14503.7275, "encoder_q-layer.6": 15946.958, "encoder_q-layer.7": 17727.2637, "encoder_q-layer.8": 18063.2461, "encoder_q-layer.9": 17334.0547, "epoch": 0.58, "inbatch_neg_score": 41.5452, "inbatch_pos_score": 42.0625, "learning_rate": 2.255555555555556e-05, "loss": 2.2888, "norm_diff": 0.0389, "num_tokens_overlap": 5.5726, "num_tokens_union": 55.156, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27993.6786, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5387, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7082, "sent_len_1": 67.0519, "sent_len_max_0": 18.75, "sent_len_max_1": 191.175, "stdk": 0.0455, "stdq": 0.039, "stdqueue_k": 0.0, "step": 59400 }, { "accuracy": 55.8594, "doc_norm": 6.5735, "encoder_q-embeddings": 17026.2793, "encoder_q-layer.0": 11884.7773, "encoder_q-layer.1": 13060.2695, "encoder_q-layer.10": 19879.0273, "encoder_q-layer.11": 40923.1836, "encoder_q-layer.2": 14452.208, "encoder_q-layer.3": 14506.7646, "encoder_q-layer.4": 14701.2939, "encoder_q-layer.5": 14462.7314, "encoder_q-layer.6": 15486.8623, "encoder_q-layer.7": 16080.4355, "encoder_q-layer.8": 18042.8457, "encoder_q-layer.9": 16841.3184, "epoch": 0.58, "inbatch_neg_score": 41.5239, "inbatch_pos_score": 42.0312, "learning_rate": 2.25e-05, "loss": 2.2229, "norm_diff": 0.0384, "num_tokens_overlap": 5.5766, "num_tokens_union": 55.0496, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27192.7392, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.535, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7174, "sent_len_1": 66.851, "sent_len_max_0": 18.7963, "sent_len_max_1": 191.2775, "stdk": 0.0439, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 59500 }, { "accuracy": 54.6875, "doc_norm": 6.574, "encoder_q-embeddings": 18330.3574, "encoder_q-layer.0": 12878.6602, "encoder_q-layer.1": 12858.9824, "encoder_q-layer.10": 20719.957, "encoder_q-layer.11": 42680.6055, "encoder_q-layer.2": 14008.2002, "encoder_q-layer.3": 14116.3223, "encoder_q-layer.4": 14764.7344, "encoder_q-layer.5": 14451.459, "encoder_q-layer.6": 15533.9131, "encoder_q-layer.7": 16298.5205, "encoder_q-layer.8": 17198.6934, "encoder_q-layer.9": 16863.5098, "epoch": 0.58, "inbatch_neg_score": 41.5054, "inbatch_pos_score": 42.0, "learning_rate": 2.2444444444444447e-05, "loss": 2.2444, "norm_diff": 0.0393, "num_tokens_overlap": 5.5683, "num_tokens_union": 54.9313, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27784.9005, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5347, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7147, "sent_len_1": 66.7043, "sent_len_max_0": 18.8313, "sent_len_max_1": 190.33, "stdk": 0.045, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 59600 }, { "accuracy": 56.0547, "doc_norm": 6.5707, "encoder_q-embeddings": 16905.4395, "encoder_q-layer.0": 12326.042, "encoder_q-layer.1": 12805.8115, "encoder_q-layer.10": 19672.0078, "encoder_q-layer.11": 38289.1055, "encoder_q-layer.2": 13927.3193, "encoder_q-layer.3": 13992.9688, "encoder_q-layer.4": 14166.8242, "encoder_q-layer.5": 13818.7295, "encoder_q-layer.6": 14638.4102, "encoder_q-layer.7": 14989.6562, "encoder_q-layer.8": 16990.084, "encoder_q-layer.9": 15909.6533, "epoch": 0.58, "inbatch_neg_score": 41.4773, "inbatch_pos_score": 41.9688, "learning_rate": 2.238888888888889e-05, "loss": 2.2634, "norm_diff": 0.0392, "num_tokens_overlap": 5.5795, "num_tokens_union": 54.8722, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26255.6946, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5315, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7425, "sent_len_1": 66.5245, "sent_len_max_0": 18.82, "sent_len_max_1": 189.3388, "stdk": 0.0447, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 59700 }, { "accuracy": 58.1055, "doc_norm": 6.5684, "encoder_q-embeddings": 17273.459, "encoder_q-layer.0": 12274.6152, "encoder_q-layer.1": 12482.6982, "encoder_q-layer.10": 20118.4492, "encoder_q-layer.11": 38508.2031, "encoder_q-layer.2": 13759.0107, "encoder_q-layer.3": 13585.4287, "encoder_q-layer.4": 14170.7666, "encoder_q-layer.5": 13389.5068, "encoder_q-layer.6": 14756.6133, "encoder_q-layer.7": 15579.8408, "encoder_q-layer.8": 17196.6387, "encoder_q-layer.9": 16596.1035, "epoch": 0.58, "inbatch_neg_score": 41.4586, "inbatch_pos_score": 41.9688, "learning_rate": 2.2333333333333335e-05, "loss": 2.2384, "norm_diff": 0.0416, "num_tokens_overlap": 5.59, "num_tokens_union": 54.926, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26447.6318, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5269, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7396, "sent_len_1": 66.5777, "sent_len_max_0": 18.8875, "sent_len_max_1": 186.8812, "stdk": 0.0441, "stdq": 0.0376, "stdqueue_k": 0.0, "step": 59800 }, { "accuracy": 58.3984, "doc_norm": 6.5741, "encoder_q-embeddings": 17202.4902, "encoder_q-layer.0": 12078.0752, "encoder_q-layer.1": 12438.5049, "encoder_q-layer.10": 23615.1484, "encoder_q-layer.11": 42374.2734, "encoder_q-layer.2": 13812.79, "encoder_q-layer.3": 14152.7266, "encoder_q-layer.4": 14292.6562, "encoder_q-layer.5": 13958.4561, "encoder_q-layer.6": 15504.6279, "encoder_q-layer.7": 16601.6113, "encoder_q-layer.8": 19680.627, "encoder_q-layer.9": 17064.6016, "epoch": 0.58, "inbatch_neg_score": 41.5135, "inbatch_pos_score": 42.0312, "learning_rate": 2.2277777777777778e-05, "loss": 2.2841, "norm_diff": 0.0384, "num_tokens_overlap": 5.5722, "num_tokens_union": 55.0261, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27839.877, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5357, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.743, "sent_len_1": 66.7562, "sent_len_max_0": 18.885, "sent_len_max_1": 189.3137, "stdk": 0.0432, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 59900 }, { "accuracy": 56.0547, "doc_norm": 6.5767, "encoder_q-embeddings": 16971.2539, "encoder_q-layer.0": 12105.8643, "encoder_q-layer.1": 12708.4795, "encoder_q-layer.10": 20354.1699, "encoder_q-layer.11": 39964.3359, "encoder_q-layer.2": 13548.959, "encoder_q-layer.3": 13706.5684, "encoder_q-layer.4": 13703.7256, "encoder_q-layer.5": 13605.4189, "encoder_q-layer.6": 14224.6377, "encoder_q-layer.7": 15216.6182, "encoder_q-layer.8": 17481.1348, "encoder_q-layer.9": 15991.1152, "epoch": 0.59, "inbatch_neg_score": 41.5106, "inbatch_pos_score": 42.0312, "learning_rate": 2.2222222222222223e-05, "loss": 2.2181, "norm_diff": 0.0419, "num_tokens_overlap": 5.5815, "num_tokens_union": 55.0725, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26838.8857, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5348, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7455, "sent_len_1": 66.8258, "sent_len_max_0": 18.7963, "sent_len_max_1": 191.9787, "stdk": 0.0445, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 60000 }, { "dev_runtime": 27.9162, "dev_samples_per_second": 2.293, "dev_steps_per_second": 0.036, "epoch": 0.59, "step": 60000, "test_accuracy": 8.6639404296875, "test_doc_norm": 6.572542190551758, "test_inbatch_neg_score": 42.063636779785156, "test_inbatch_pos_score": 42.86922073364258, "test_loss": 4.123045444488525, "test_norm_diff": 0.0020381808280944824, "test_query_norm": 6.572222709655762, "test_queue_k_norm": 0.0, "test_stdk": 0.03787185251712799, "test_stdq": 0.03780949115753174, "test_stdqueue_k": 0.0 }, { "dev_runtime": 27.9162, "dev_samples_per_second": 2.293, "dev_steps_per_second": 0.036, "epoch": 0.59, "eval_beir-arguana_ndcg@10": 0.3641, "eval_beir-arguana_recall@10": 0.65292, "eval_beir-arguana_recall@100": 0.96728, "eval_beir-arguana_recall@20": 0.82432, "eval_beir-avg_ndcg@10": 0.38819641666666665, "eval_beir-avg_recall@10": 0.4594866666666667, "eval_beir-avg_recall@100": 0.6459811666666667, "eval_beir-avg_recall@20": 0.5259551666666666, "eval_beir-cqadupstack_ndcg@10": 0.29545416666666663, "eval_beir-cqadupstack_recall@10": 0.39577666666666667, "eval_beir-cqadupstack_recall@100": 0.6279316666666667, "eval_beir-cqadupstack_recall@20": 0.46224166666666666, "eval_beir-fiqa_ndcg@10": 0.27838, "eval_beir-fiqa_recall@10": 0.34512, "eval_beir-fiqa_recall@100": 0.62005, "eval_beir-fiqa_recall@20": 0.42414, "eval_beir-nfcorpus_ndcg@10": 0.33961, "eval_beir-nfcorpus_recall@10": 0.16862, "eval_beir-nfcorpus_recall@100": 0.30697, "eval_beir-nfcorpus_recall@20": 0.20145, "eval_beir-nq_ndcg@10": 0.26714, "eval_beir-nq_recall@10": 0.44771, "eval_beir-nq_recall@100": 0.77711, "eval_beir-nq_recall@20": 0.56187, "eval_beir-quora_ndcg@10": 0.78585, "eval_beir-quora_recall@10": 0.88721, "eval_beir-quora_recall@100": 0.97687, "eval_beir-quora_recall@20": 0.92741, "eval_beir-scidocs_ndcg@10": 0.15938, "eval_beir-scidocs_recall@10": 0.16993, "eval_beir-scidocs_recall@100": 0.38248, "eval_beir-scidocs_recall@20": 0.22857, "eval_beir-scifact_ndcg@10": 0.61455, "eval_beir-scifact_recall@10": 0.77494, "eval_beir-scifact_recall@100": 0.916, "eval_beir-scifact_recall@20": 0.82244, "eval_beir-trec-covid_ndcg@10": 0.57967, "eval_beir-trec-covid_recall@10": 0.614, "eval_beir-trec-covid_recall@100": 0.4576, "eval_beir-trec-covid_recall@20": 0.59, "eval_beir-webis-touche2020_ndcg@10": 0.19783, "eval_beir-webis-touche2020_recall@10": 0.13864, "eval_beir-webis-touche2020_recall@100": 0.42752, "eval_beir-webis-touche2020_recall@20": 0.21711, "eval_senteval-avg_sts": 0.7414615073989015, "eval_senteval-sickr_spearman": 0.7333399614724372, "eval_senteval-stsb_spearman": 0.7495830533253659, "step": 60000, "test_accuracy": 8.6639404296875, "test_doc_norm": 6.572542190551758, "test_inbatch_neg_score": 42.063636779785156, "test_inbatch_pos_score": 42.86922073364258, "test_loss": 4.123045444488525, "test_norm_diff": 0.0020381808280944824, "test_query_norm": 6.572222709655762, "test_queue_k_norm": 0.0, "test_stdk": 0.03787185251712799, "test_stdq": 0.03780949115753174, "test_stdqueue_k": 0.0 }, { "accuracy": 56.9336, "doc_norm": 6.5717, "encoder_q-embeddings": 16706.3535, "encoder_q-layer.0": 11862.292, "encoder_q-layer.1": 12563.2881, "encoder_q-layer.10": 20255.6504, "encoder_q-layer.11": 40730.5195, "encoder_q-layer.2": 13738.9092, "encoder_q-layer.3": 13685.6729, "encoder_q-layer.4": 13886.7861, "encoder_q-layer.5": 13672.4385, "encoder_q-layer.6": 14641.3047, "encoder_q-layer.7": 15236.2314, "encoder_q-layer.8": 17949.1738, "encoder_q-layer.9": 16015.6475, "epoch": 0.59, "inbatch_neg_score": 41.4891, "inbatch_pos_score": 42.0, "learning_rate": 2.216666666666667e-05, "loss": 2.2483, "norm_diff": 0.0398, "num_tokens_overlap": 5.5752, "num_tokens_union": 55.0167, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26530.5105, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5319, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7104, "sent_len_1": 66.8103, "sent_len_max_0": 18.8313, "sent_len_max_1": 189.2775, "stdk": 0.0427, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 60100 }, { "accuracy": 56.25, "doc_norm": 6.5773, "encoder_q-embeddings": 18103.9668, "encoder_q-layer.0": 12714.0918, "encoder_q-layer.1": 13283.4814, "encoder_q-layer.10": 22915.5508, "encoder_q-layer.11": 41781.5352, "encoder_q-layer.2": 14484.5303, "encoder_q-layer.3": 14259.8984, "encoder_q-layer.4": 15046.0098, "encoder_q-layer.5": 14724.043, "encoder_q-layer.6": 15560.46, "encoder_q-layer.7": 17065.582, "encoder_q-layer.8": 18406.9004, "encoder_q-layer.9": 17233.1426, "epoch": 0.59, "inbatch_neg_score": 41.5015, "inbatch_pos_score": 42.0312, "learning_rate": 2.211111111111111e-05, "loss": 2.2839, "norm_diff": 0.0413, "num_tokens_overlap": 5.5851, "num_tokens_union": 55.1572, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28349.564, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.536, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7203, "sent_len_1": 67.0239, "sent_len_max_0": 18.9875, "sent_len_max_1": 192.825, "stdk": 0.0439, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 60200 }, { "accuracy": 55.6641, "doc_norm": 6.5705, "encoder_q-embeddings": 38038.4531, "encoder_q-layer.0": 26168.2168, "encoder_q-layer.1": 26743.0391, "encoder_q-layer.10": 41309.5312, "encoder_q-layer.11": 83608.625, "encoder_q-layer.2": 29003.7715, "encoder_q-layer.3": 28567.1211, "encoder_q-layer.4": 29114.3906, "encoder_q-layer.5": 28601.7891, "encoder_q-layer.6": 32396.123, "encoder_q-layer.7": 33966.1992, "encoder_q-layer.8": 36558.1562, "encoder_q-layer.9": 33040.0234, "epoch": 0.59, "inbatch_neg_score": 41.4821, "inbatch_pos_score": 41.9688, "learning_rate": 2.2055555555555557e-05, "loss": 2.244, "norm_diff": 0.0392, "num_tokens_overlap": 5.5684, "num_tokens_union": 54.9684, "postclip_grad_norm": 1.0, "preclip_grad_norm": 55702.3454, "preclip_grad_norm_avg": 0.0005, "query_norm": 6.5313, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7397, "sent_len_1": 66.7341, "sent_len_max_0": 18.8263, "sent_len_max_1": 191.0462, "stdk": 0.0443, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 60300 }, { "accuracy": 58.0078, "doc_norm": 6.5723, "encoder_q-embeddings": 18282.5742, "encoder_q-layer.0": 12909.709, "encoder_q-layer.1": 13460.6006, "encoder_q-layer.10": 21348.5488, "encoder_q-layer.11": 42553.7578, "encoder_q-layer.2": 14574.29, "encoder_q-layer.3": 14080.9277, "encoder_q-layer.4": 14673.2002, "encoder_q-layer.5": 14165.667, "encoder_q-layer.6": 15573.8135, "encoder_q-layer.7": 16662.7422, "encoder_q-layer.8": 18465.3398, "encoder_q-layer.9": 17491.3906, "epoch": 0.59, "inbatch_neg_score": 41.467, "inbatch_pos_score": 41.9688, "learning_rate": 2.2000000000000003e-05, "loss": 2.2567, "norm_diff": 0.0403, "num_tokens_overlap": 5.5938, "num_tokens_union": 55.0321, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28036.9201, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.532, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7301, "sent_len_1": 66.8285, "sent_len_max_0": 18.9, "sent_len_max_1": 189.31, "stdk": 0.0446, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 60400 }, { "accuracy": 60.3516, "doc_norm": 6.575, "encoder_q-embeddings": 17074.1934, "encoder_q-layer.0": 12192.8018, "encoder_q-layer.1": 12655.5811, "encoder_q-layer.10": 19996.5723, "encoder_q-layer.11": 39725.8672, "encoder_q-layer.2": 13701.6006, "encoder_q-layer.3": 13490.5166, "encoder_q-layer.4": 14073.5635, "encoder_q-layer.5": 13843.5547, "encoder_q-layer.6": 14818.4531, "encoder_q-layer.7": 15860.1523, "encoder_q-layer.8": 18245.9277, "encoder_q-layer.9": 16580.3398, "epoch": 0.59, "inbatch_neg_score": 41.4377, "inbatch_pos_score": 42.0, "learning_rate": 2.1944444444444445e-05, "loss": 2.2553, "norm_diff": 0.0433, "num_tokens_overlap": 5.5849, "num_tokens_union": 55.0877, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26381.8555, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5317, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7312, "sent_len_1": 66.8453, "sent_len_max_0": 18.7775, "sent_len_max_1": 188.8063, "stdk": 0.0469, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 60500 }, { "accuracy": 57.1289, "doc_norm": 6.5671, "encoder_q-embeddings": 18740.1465, "encoder_q-layer.0": 12800.8799, "encoder_q-layer.1": 13066.4355, "encoder_q-layer.10": 19814.3184, "encoder_q-layer.11": 41241.3867, "encoder_q-layer.2": 14560.9648, "encoder_q-layer.3": 14574.6748, "encoder_q-layer.4": 14754.668, "encoder_q-layer.5": 14154.0127, "encoder_q-layer.6": 14953.2793, "encoder_q-layer.7": 15900.6309, "encoder_q-layer.8": 18382.3008, "encoder_q-layer.9": 17003.5391, "epoch": 0.59, "inbatch_neg_score": 41.418, "inbatch_pos_score": 41.9375, "learning_rate": 2.188888888888889e-05, "loss": 2.2301, "norm_diff": 0.0381, "num_tokens_overlap": 5.5755, "num_tokens_union": 55.0887, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27555.2975, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.529, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7093, "sent_len_1": 66.9024, "sent_len_max_0": 18.8938, "sent_len_max_1": 189.49, "stdk": 0.044, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 60600 }, { "accuracy": 57.7148, "doc_norm": 6.5677, "encoder_q-embeddings": 17131.0332, "encoder_q-layer.0": 12347.29, "encoder_q-layer.1": 12766.9072, "encoder_q-layer.10": 21090.3086, "encoder_q-layer.11": 41024.2305, "encoder_q-layer.2": 13992.332, "encoder_q-layer.3": 14141.9971, "encoder_q-layer.4": 14590.0811, "encoder_q-layer.5": 14043.2637, "encoder_q-layer.6": 15114.8213, "encoder_q-layer.7": 15316.3926, "encoder_q-layer.8": 17972.5703, "encoder_q-layer.9": 16928.0879, "epoch": 0.59, "inbatch_neg_score": 41.3841, "inbatch_pos_score": 41.9062, "learning_rate": 2.1833333333333333e-05, "loss": 2.2703, "norm_diff": 0.0418, "num_tokens_overlap": 5.5699, "num_tokens_union": 54.987, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27042.8423, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5259, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6954, "sent_len_1": 66.7715, "sent_len_max_0": 18.85, "sent_len_max_1": 188.7525, "stdk": 0.0455, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 60700 }, { "accuracy": 57.1289, "doc_norm": 6.5648, "encoder_q-embeddings": 16022.0732, "encoder_q-layer.0": 11857.9912, "encoder_q-layer.1": 11867.8389, "encoder_q-layer.10": 20346.5859, "encoder_q-layer.11": 40749.9531, "encoder_q-layer.2": 13082.9111, "encoder_q-layer.3": 13526.9375, "encoder_q-layer.4": 14172.6533, "encoder_q-layer.5": 13813.8496, "encoder_q-layer.6": 14344.8926, "encoder_q-layer.7": 15248.8496, "encoder_q-layer.8": 17322.2676, "encoder_q-layer.9": 16508.166, "epoch": 0.59, "inbatch_neg_score": 41.3796, "inbatch_pos_score": 41.9062, "learning_rate": 2.177777777777778e-05, "loss": 2.2238, "norm_diff": 0.0414, "num_tokens_overlap": 5.5936, "num_tokens_union": 54.9686, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26342.6487, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5233, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7369, "sent_len_1": 66.7381, "sent_len_max_0": 18.9462, "sent_len_max_1": 190.0987, "stdk": 0.0445, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 60800 }, { "accuracy": 56.0547, "doc_norm": 6.5674, "encoder_q-embeddings": 17154.9395, "encoder_q-layer.0": 12264.1855, "encoder_q-layer.1": 12775.2383, "encoder_q-layer.10": 19798.8789, "encoder_q-layer.11": 40256.375, "encoder_q-layer.2": 14480.5029, "encoder_q-layer.3": 14015.6855, "encoder_q-layer.4": 14223.2598, "encoder_q-layer.5": 13812.8418, "encoder_q-layer.6": 15042.5967, "encoder_q-layer.7": 16283.1338, "encoder_q-layer.8": 18487.7402, "encoder_q-layer.9": 16748.3809, "epoch": 0.59, "inbatch_neg_score": 41.4103, "inbatch_pos_score": 41.9375, "learning_rate": 2.1722222222222225e-05, "loss": 2.2786, "norm_diff": 0.0402, "num_tokens_overlap": 5.5756, "num_tokens_union": 54.973, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27280.4104, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5273, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7169, "sent_len_1": 66.7291, "sent_len_max_0": 18.8463, "sent_len_max_1": 190.4563, "stdk": 0.0439, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 60900 }, { "accuracy": 56.9336, "doc_norm": 6.5652, "encoder_q-embeddings": 17352.0039, "encoder_q-layer.0": 11956.6943, "encoder_q-layer.1": 12723.6943, "encoder_q-layer.10": 20689.7891, "encoder_q-layer.11": 36908.5898, "encoder_q-layer.2": 13639.7832, "encoder_q-layer.3": 13788.0361, "encoder_q-layer.4": 14455.8486, "encoder_q-layer.5": 14284.2236, "encoder_q-layer.6": 15034.377, "encoder_q-layer.7": 15669.5107, "encoder_q-layer.8": 17156.5332, "encoder_q-layer.9": 16059.9316, "epoch": 0.6, "inbatch_neg_score": 41.3716, "inbatch_pos_score": 41.875, "learning_rate": 2.1666666666666667e-05, "loss": 2.2473, "norm_diff": 0.0413, "num_tokens_overlap": 5.5815, "num_tokens_union": 55.0374, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26431.6435, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5239, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7055, "sent_len_1": 66.8654, "sent_len_max_0": 18.8075, "sent_len_max_1": 189.725, "stdk": 0.0449, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 61000 }, { "accuracy": 55.3711, "doc_norm": 6.5684, "encoder_q-embeddings": 17916.2773, "encoder_q-layer.0": 12393.8496, "encoder_q-layer.1": 12803.9414, "encoder_q-layer.10": 21572.0801, "encoder_q-layer.11": 43281.6094, "encoder_q-layer.2": 13784.3037, "encoder_q-layer.3": 14068.9375, "encoder_q-layer.4": 14889.1484, "encoder_q-layer.5": 14464.29, "encoder_q-layer.6": 14885.4541, "encoder_q-layer.7": 16909.125, "encoder_q-layer.8": 19527.0488, "encoder_q-layer.9": 16946.5508, "epoch": 0.6, "inbatch_neg_score": 41.3902, "inbatch_pos_score": 41.9062, "learning_rate": 2.1611111111111113e-05, "loss": 2.2394, "norm_diff": 0.0441, "num_tokens_overlap": 5.5908, "num_tokens_union": 55.0033, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27789.8455, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5244, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7426, "sent_len_1": 66.8128, "sent_len_max_0": 18.8025, "sent_len_max_1": 189.7912, "stdk": 0.0446, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 61100 }, { "accuracy": 56.9336, "doc_norm": 6.5608, "encoder_q-embeddings": 17107.8906, "encoder_q-layer.0": 12376.79, "encoder_q-layer.1": 12740.875, "encoder_q-layer.10": 19898.1855, "encoder_q-layer.11": 40591.8867, "encoder_q-layer.2": 13745.0303, "encoder_q-layer.3": 13865.9756, "encoder_q-layer.4": 14389.1953, "encoder_q-layer.5": 13371.2676, "encoder_q-layer.6": 14545.4834, "encoder_q-layer.7": 15513.9824, "encoder_q-layer.8": 17628.8945, "encoder_q-layer.9": 16389.5742, "epoch": 0.6, "inbatch_neg_score": 41.3637, "inbatch_pos_score": 41.875, "learning_rate": 2.1555555555555555e-05, "loss": 2.2417, "norm_diff": 0.0383, "num_tokens_overlap": 5.582, "num_tokens_union": 54.9499, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26525.8129, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5225, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7182, "sent_len_1": 66.6778, "sent_len_max_0": 18.8788, "sent_len_max_1": 187.4275, "stdk": 0.0442, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 61200 }, { "accuracy": 57.2266, "doc_norm": 6.5607, "encoder_q-embeddings": 16446.3027, "encoder_q-layer.0": 11700.9863, "encoder_q-layer.1": 12174.4346, "encoder_q-layer.10": 24886.0039, "encoder_q-layer.11": 39030.5469, "encoder_q-layer.2": 13602.667, "encoder_q-layer.3": 13721.0361, "encoder_q-layer.4": 13987.9541, "encoder_q-layer.5": 14117.2305, "encoder_q-layer.6": 14760.1289, "encoder_q-layer.7": 16053.5557, "encoder_q-layer.8": 17937.7402, "encoder_q-layer.9": 17137.3242, "epoch": 0.6, "inbatch_neg_score": 41.36, "inbatch_pos_score": 41.875, "learning_rate": 2.15e-05, "loss": 2.2289, "norm_diff": 0.0389, "num_tokens_overlap": 5.5646, "num_tokens_union": 55.0093, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26961.2965, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5218, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7005, "sent_len_1": 66.7496, "sent_len_max_0": 18.8825, "sent_len_max_1": 187.9425, "stdk": 0.0431, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 61300 }, { "accuracy": 55.7617, "doc_norm": 6.5611, "encoder_q-embeddings": 18144.4512, "encoder_q-layer.0": 12716.8223, "encoder_q-layer.1": 12969.0645, "encoder_q-layer.10": 21616.4902, "encoder_q-layer.11": 38940.1875, "encoder_q-layer.2": 14486.7061, "encoder_q-layer.3": 14325.2178, "encoder_q-layer.4": 14345.915, "encoder_q-layer.5": 14637.9258, "encoder_q-layer.6": 15363.9355, "encoder_q-layer.7": 16523.1465, "encoder_q-layer.8": 18881.4043, "encoder_q-layer.9": 17366.7734, "epoch": 0.6, "inbatch_neg_score": 41.31, "inbatch_pos_score": 41.8125, "learning_rate": 2.1444444444444443e-05, "loss": 2.2433, "norm_diff": 0.0422, "num_tokens_overlap": 5.5789, "num_tokens_union": 54.9594, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27619.075, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5189, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7318, "sent_len_1": 66.6402, "sent_len_max_0": 18.8362, "sent_len_max_1": 187.3338, "stdk": 0.0447, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 61400 }, { "accuracy": 56.0547, "doc_norm": 6.5619, "encoder_q-embeddings": 18145.0137, "encoder_q-layer.0": 12575.2061, "encoder_q-layer.1": 13157.873, "encoder_q-layer.10": 21078.793, "encoder_q-layer.11": 40312.043, "encoder_q-layer.2": 14380.7207, "encoder_q-layer.3": 14544.4229, "encoder_q-layer.4": 14851.8037, "encoder_q-layer.5": 14400.7021, "encoder_q-layer.6": 15427.8242, "encoder_q-layer.7": 16709.1172, "encoder_q-layer.8": 18648.709, "encoder_q-layer.9": 17740.3789, "epoch": 0.6, "inbatch_neg_score": 41.3068, "inbatch_pos_score": 41.8125, "learning_rate": 2.138888888888889e-05, "loss": 2.2556, "norm_diff": 0.0412, "num_tokens_overlap": 5.5678, "num_tokens_union": 54.9809, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28097.9335, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5206, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7194, "sent_len_1": 66.6526, "sent_len_max_0": 18.9937, "sent_len_max_1": 189.1262, "stdk": 0.0444, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 61500 }, { "accuracy": 57.2266, "doc_norm": 6.5583, "encoder_q-embeddings": 17872.4141, "encoder_q-layer.0": 12106.4404, "encoder_q-layer.1": 12589.3477, "encoder_q-layer.10": 21291.957, "encoder_q-layer.11": 40911.7656, "encoder_q-layer.2": 13713.8418, "encoder_q-layer.3": 14155.8711, "encoder_q-layer.4": 14737.4453, "encoder_q-layer.5": 14581.625, "encoder_q-layer.6": 15649.4639, "encoder_q-layer.7": 17413.0996, "encoder_q-layer.8": 19619.334, "encoder_q-layer.9": 17710.1738, "epoch": 0.6, "inbatch_neg_score": 41.2814, "inbatch_pos_score": 41.8125, "learning_rate": 2.1333333333333335e-05, "loss": 2.2477, "norm_diff": 0.0395, "num_tokens_overlap": 5.5809, "num_tokens_union": 54.9667, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27680.4721, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5188, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7127, "sent_len_1": 66.7211, "sent_len_max_0": 18.8487, "sent_len_max_1": 188.9437, "stdk": 0.0444, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 61600 }, { "accuracy": 57.6172, "doc_norm": 6.5549, "encoder_q-embeddings": 17265.6465, "encoder_q-layer.0": 12441.0625, "encoder_q-layer.1": 13582.1592, "encoder_q-layer.10": 21661.0078, "encoder_q-layer.11": 39095.1953, "encoder_q-layer.2": 14131.7168, "encoder_q-layer.3": 14445.2715, "encoder_q-layer.4": 14969.1709, "encoder_q-layer.5": 14585.4238, "encoder_q-layer.6": 15347.1904, "encoder_q-layer.7": 16331.0371, "encoder_q-layer.8": 18251.0449, "encoder_q-layer.9": 16943.1875, "epoch": 0.6, "inbatch_neg_score": 41.2453, "inbatch_pos_score": 41.75, "learning_rate": 2.127777777777778e-05, "loss": 2.2606, "norm_diff": 0.0416, "num_tokens_overlap": 5.5752, "num_tokens_union": 55.0291, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27271.7333, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5133, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6914, "sent_len_1": 66.8583, "sent_len_max_0": 18.845, "sent_len_max_1": 188.9825, "stdk": 0.0439, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 61700 }, { "accuracy": 57.6172, "doc_norm": 6.5505, "encoder_q-embeddings": 17862.0156, "encoder_q-layer.0": 12331.6846, "encoder_q-layer.1": 12835.1758, "encoder_q-layer.10": 20220.457, "encoder_q-layer.11": 41592.4141, "encoder_q-layer.2": 14289.1523, "encoder_q-layer.3": 14474.6377, "encoder_q-layer.4": 15020.0781, "encoder_q-layer.5": 14429.4609, "encoder_q-layer.6": 15447.8379, "encoder_q-layer.7": 16235.6592, "encoder_q-layer.8": 18051.3516, "encoder_q-layer.9": 16929.873, "epoch": 0.6, "inbatch_neg_score": 41.2006, "inbatch_pos_score": 41.7188, "learning_rate": 2.1222222222222223e-05, "loss": 2.2828, "norm_diff": 0.0415, "num_tokens_overlap": 5.5764, "num_tokens_union": 55.0364, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27737.1411, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.509, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.713, "sent_len_1": 66.7694, "sent_len_max_0": 18.7987, "sent_len_max_1": 187.985, "stdk": 0.044, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 61800 }, { "accuracy": 61.0352, "doc_norm": 6.5588, "encoder_q-embeddings": 16395.291, "encoder_q-layer.0": 11699.2178, "encoder_q-layer.1": 12278.9814, "encoder_q-layer.10": 21583.3438, "encoder_q-layer.11": 43935.2344, "encoder_q-layer.2": 13319.0371, "encoder_q-layer.3": 13523.5459, "encoder_q-layer.4": 13890.4814, "encoder_q-layer.5": 13973.2949, "encoder_q-layer.6": 14847.2666, "encoder_q-layer.7": 16165.1113, "encoder_q-layer.8": 18280.2812, "encoder_q-layer.9": 16210.9014, "epoch": 0.6, "inbatch_neg_score": 41.2018, "inbatch_pos_score": 41.75, "learning_rate": 2.116666666666667e-05, "loss": 2.2483, "norm_diff": 0.045, "num_tokens_overlap": 5.5826, "num_tokens_union": 55.1026, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27220.2019, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5138, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7343, "sent_len_1": 66.8689, "sent_len_max_0": 18.8313, "sent_len_max_1": 187.9075, "stdk": 0.0461, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 61900 }, { "accuracy": 56.25, "doc_norm": 6.5514, "encoder_q-embeddings": 18066.748, "encoder_q-layer.0": 12538.3281, "encoder_q-layer.1": 12946.2852, "encoder_q-layer.10": 20339.2715, "encoder_q-layer.11": 40407.3867, "encoder_q-layer.2": 14095.2881, "encoder_q-layer.3": 13934.3125, "encoder_q-layer.4": 14866.6689, "encoder_q-layer.5": 14245.5039, "encoder_q-layer.6": 15014.0508, "encoder_q-layer.7": 15847.2695, "encoder_q-layer.8": 17575.4844, "encoder_q-layer.9": 16680.4844, "epoch": 0.61, "inbatch_neg_score": 41.195, "inbatch_pos_score": 41.7188, "learning_rate": 2.111111111111111e-05, "loss": 2.2428, "norm_diff": 0.0417, "num_tokens_overlap": 5.5763, "num_tokens_union": 55.0769, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27321.3923, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5097, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7154, "sent_len_1": 66.854, "sent_len_max_0": 18.88, "sent_len_max_1": 189.765, "stdk": 0.0451, "stdq": 0.039, "stdqueue_k": 0.0, "step": 62000 }, { "accuracy": 57.2266, "doc_norm": 6.5506, "encoder_q-embeddings": 17708.8887, "encoder_q-layer.0": 12350.7002, "encoder_q-layer.1": 12722.2285, "encoder_q-layer.10": 19721.209, "encoder_q-layer.11": 37452.7188, "encoder_q-layer.2": 13936.5479, "encoder_q-layer.3": 14005.8018, "encoder_q-layer.4": 14293.5498, "encoder_q-layer.5": 13815.4053, "encoder_q-layer.6": 14503.3145, "encoder_q-layer.7": 15672.6543, "encoder_q-layer.8": 17137.7676, "encoder_q-layer.9": 16122.8926, "epoch": 0.61, "inbatch_neg_score": 41.1832, "inbatch_pos_score": 41.6875, "learning_rate": 2.1055555555555556e-05, "loss": 2.2404, "norm_diff": 0.0414, "num_tokens_overlap": 5.5792, "num_tokens_union": 54.8796, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26729.3522, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5092, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7298, "sent_len_1": 66.5733, "sent_len_max_0": 18.7925, "sent_len_max_1": 186.9525, "stdk": 0.0443, "stdq": 0.039, "stdqueue_k": 0.0, "step": 62100 }, { "accuracy": 56.8359, "doc_norm": 6.5449, "encoder_q-embeddings": 18089.7227, "encoder_q-layer.0": 12797.8965, "encoder_q-layer.1": 13037.2412, "encoder_q-layer.10": 20872.3789, "encoder_q-layer.11": 39414.9336, "encoder_q-layer.2": 13983.2715, "encoder_q-layer.3": 13979.6045, "encoder_q-layer.4": 14498.2158, "encoder_q-layer.5": 13947.1016, "encoder_q-layer.6": 14510.9355, "encoder_q-layer.7": 15568.7627, "encoder_q-layer.8": 17710.3555, "encoder_q-layer.9": 16696.4961, "epoch": 0.61, "inbatch_neg_score": 41.1662, "inbatch_pos_score": 41.6562, "learning_rate": 2.1e-05, "loss": 2.225, "norm_diff": 0.0374, "num_tokens_overlap": 5.5756, "num_tokens_union": 55.1038, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27281.6846, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5075, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7259, "sent_len_1": 66.9567, "sent_len_max_0": 18.8375, "sent_len_max_1": 191.4512, "stdk": 0.0438, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 62200 }, { "accuracy": 59.375, "doc_norm": 6.5458, "encoder_q-embeddings": 17761.2227, "encoder_q-layer.0": 12444.8428, "encoder_q-layer.1": 12434.5967, "encoder_q-layer.10": 21927.5039, "encoder_q-layer.11": 42463.2266, "encoder_q-layer.2": 13789.207, "encoder_q-layer.3": 14005.21, "encoder_q-layer.4": 14516.7031, "encoder_q-layer.5": 14040.7529, "encoder_q-layer.6": 14934.7598, "encoder_q-layer.7": 16290.0664, "encoder_q-layer.8": 18260.0234, "encoder_q-layer.9": 16877.5527, "epoch": 0.61, "inbatch_neg_score": 41.1296, "inbatch_pos_score": 41.6562, "learning_rate": 2.0944444444444445e-05, "loss": 2.2538, "norm_diff": 0.0396, "num_tokens_overlap": 5.5699, "num_tokens_union": 55.039, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27505.5283, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5062, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7067, "sent_len_1": 66.8193, "sent_len_max_0": 18.7763, "sent_len_max_1": 188.6575, "stdk": 0.0448, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 62300 }, { "accuracy": 55.957, "doc_norm": 6.5478, "encoder_q-embeddings": 17220.9258, "encoder_q-layer.0": 12092.3672, "encoder_q-layer.1": 12602.8076, "encoder_q-layer.10": 20305.4512, "encoder_q-layer.11": 39309.6094, "encoder_q-layer.2": 14020.8506, "encoder_q-layer.3": 14371.8164, "encoder_q-layer.4": 14288.0469, "encoder_q-layer.5": 14031.4775, "encoder_q-layer.6": 14663.4834, "encoder_q-layer.7": 15531.9365, "encoder_q-layer.8": 17740.2754, "encoder_q-layer.9": 16098.9482, "epoch": 0.61, "inbatch_neg_score": 41.1352, "inbatch_pos_score": 41.6562, "learning_rate": 2.088888888888889e-05, "loss": 2.2348, "norm_diff": 0.0416, "num_tokens_overlap": 5.5782, "num_tokens_union": 55.0278, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26632.9673, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5062, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.702, "sent_len_1": 66.917, "sent_len_max_0": 18.8325, "sent_len_max_1": 190.6113, "stdk": 0.045, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 62400 }, { "accuracy": 58.5938, "doc_norm": 6.5476, "encoder_q-embeddings": 16651.9922, "encoder_q-layer.0": 11957.127, "encoder_q-layer.1": 12440.9561, "encoder_q-layer.10": 20827.459, "encoder_q-layer.11": 41436.0391, "encoder_q-layer.2": 13612.4521, "encoder_q-layer.3": 13399.1426, "encoder_q-layer.4": 13718.3145, "encoder_q-layer.5": 13861.0371, "encoder_q-layer.6": 14962.7637, "encoder_q-layer.7": 15796.2969, "encoder_q-layer.8": 17383.6035, "encoder_q-layer.9": 15941.7822, "epoch": 0.61, "inbatch_neg_score": 41.144, "inbatch_pos_score": 41.6562, "learning_rate": 2.0833333333333336e-05, "loss": 2.2294, "norm_diff": 0.0393, "num_tokens_overlap": 5.5888, "num_tokens_union": 55.0627, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26976.8447, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5082, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7612, "sent_len_1": 66.7895, "sent_len_max_0": 18.8513, "sent_len_max_1": 190.195, "stdk": 0.044, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 62500 }, { "accuracy": 58.8867, "doc_norm": 6.543, "encoder_q-embeddings": 16731.75, "encoder_q-layer.0": 11425.0098, "encoder_q-layer.1": 11985.7051, "encoder_q-layer.10": 21206.8281, "encoder_q-layer.11": 39186.3633, "encoder_q-layer.2": 13236.666, "encoder_q-layer.3": 13341.9336, "encoder_q-layer.4": 13710.2129, "encoder_q-layer.5": 13502.9482, "encoder_q-layer.6": 14481.3154, "encoder_q-layer.7": 15477.2324, "encoder_q-layer.8": 17584.1582, "encoder_q-layer.9": 16156.0039, "epoch": 0.61, "inbatch_neg_score": 41.088, "inbatch_pos_score": 41.5938, "learning_rate": 2.077777777777778e-05, "loss": 2.2296, "norm_diff": 0.0414, "num_tokens_overlap": 5.5809, "num_tokens_union": 54.9626, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26217.5034, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5016, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7189, "sent_len_1": 66.6886, "sent_len_max_0": 18.9175, "sent_len_max_1": 189.0625, "stdk": 0.0439, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 62600 }, { "accuracy": 57.2266, "doc_norm": 6.5468, "encoder_q-embeddings": 18171.1777, "encoder_q-layer.0": 12201.3594, "encoder_q-layer.1": 12373.79, "encoder_q-layer.10": 19628.4609, "encoder_q-layer.11": 36849.8789, "encoder_q-layer.2": 13935.1436, "encoder_q-layer.3": 13714.5352, "encoder_q-layer.4": 14350.1328, "encoder_q-layer.5": 14198.0176, "encoder_q-layer.6": 14928.2461, "encoder_q-layer.7": 15416.3574, "encoder_q-layer.8": 16705.6621, "encoder_q-layer.9": 16169.7051, "epoch": 0.61, "inbatch_neg_score": 41.1033, "inbatch_pos_score": 41.625, "learning_rate": 2.0722222222222224e-05, "loss": 2.2592, "norm_diff": 0.0432, "num_tokens_overlap": 5.5733, "num_tokens_union": 54.9352, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26210.3855, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5036, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7048, "sent_len_1": 66.6907, "sent_len_max_0": 18.7175, "sent_len_max_1": 188.7962, "stdk": 0.0457, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 62700 }, { "accuracy": 58.7891, "doc_norm": 6.547, "encoder_q-embeddings": 17564.3105, "encoder_q-layer.0": 11980.001, "encoder_q-layer.1": 12415.2363, "encoder_q-layer.10": 20226.7637, "encoder_q-layer.11": 38714.2031, "encoder_q-layer.2": 13361.7275, "encoder_q-layer.3": 13586.8096, "encoder_q-layer.4": 14263.2354, "encoder_q-layer.5": 13809.7158, "encoder_q-layer.6": 14700.1533, "encoder_q-layer.7": 15513.5918, "encoder_q-layer.8": 17400.2363, "encoder_q-layer.9": 15980.2168, "epoch": 0.61, "inbatch_neg_score": 41.095, "inbatch_pos_score": 41.625, "learning_rate": 2.0666666666666666e-05, "loss": 2.2473, "norm_diff": 0.0458, "num_tokens_overlap": 5.5745, "num_tokens_union": 54.8659, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26524.7204, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5012, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6902, "sent_len_1": 66.5958, "sent_len_max_0": 18.8438, "sent_len_max_1": 189.4275, "stdk": 0.0445, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 62800 }, { "accuracy": 55.7617, "doc_norm": 6.5389, "encoder_q-embeddings": 17894.9277, "encoder_q-layer.0": 12494.708, "encoder_q-layer.1": 13064.2373, "encoder_q-layer.10": 19311.6133, "encoder_q-layer.11": 37049.9883, "encoder_q-layer.2": 13966.0605, "encoder_q-layer.3": 14015.4648, "encoder_q-layer.4": 14747.0723, "encoder_q-layer.5": 13885.7148, "encoder_q-layer.6": 14703.7139, "encoder_q-layer.7": 16004.3037, "encoder_q-layer.8": 18079.25, "encoder_q-layer.9": 16318.1318, "epoch": 0.61, "inbatch_neg_score": 41.0695, "inbatch_pos_score": 41.5938, "learning_rate": 2.0611111111111112e-05, "loss": 2.2535, "norm_diff": 0.0371, "num_tokens_overlap": 5.5727, "num_tokens_union": 55.048, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26726.5602, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5018, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7157, "sent_len_1": 66.8442, "sent_len_max_0": 18.8562, "sent_len_max_1": 189.9512, "stdk": 0.0442, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 62900 }, { "accuracy": 60.2539, "doc_norm": 6.5399, "encoder_q-embeddings": 17417.2285, "encoder_q-layer.0": 12367.0264, "encoder_q-layer.1": 12706.1025, "encoder_q-layer.10": 22609.1484, "encoder_q-layer.11": 40024.1758, "encoder_q-layer.2": 14048.8008, "encoder_q-layer.3": 13695.6133, "encoder_q-layer.4": 14098.4297, "encoder_q-layer.5": 14309.3154, "encoder_q-layer.6": 14793.4785, "encoder_q-layer.7": 16211.4971, "encoder_q-layer.8": 18546.0117, "encoder_q-layer.9": 17164.6016, "epoch": 0.62, "inbatch_neg_score": 41.0575, "inbatch_pos_score": 41.5938, "learning_rate": 2.0555555555555555e-05, "loss": 2.2152, "norm_diff": 0.0397, "num_tokens_overlap": 5.5821, "num_tokens_union": 55.0738, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27255.5961, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5003, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7333, "sent_len_1": 66.8389, "sent_len_max_0": 18.82, "sent_len_max_1": 190.4588, "stdk": 0.044, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 63000 }, { "accuracy": 55.3711, "doc_norm": 6.5408, "encoder_q-embeddings": 17183.2188, "encoder_q-layer.0": 12247.5381, "encoder_q-layer.1": 12388.3086, "encoder_q-layer.10": 22179.5742, "encoder_q-layer.11": 41579.4805, "encoder_q-layer.2": 13634.0322, "encoder_q-layer.3": 13405.6992, "encoder_q-layer.4": 14065.2188, "encoder_q-layer.5": 13709.8359, "encoder_q-layer.6": 14509.374, "encoder_q-layer.7": 16382.1387, "encoder_q-layer.8": 17893.9062, "encoder_q-layer.9": 17145.4395, "epoch": 0.62, "inbatch_neg_score": 41.0988, "inbatch_pos_score": 41.5938, "learning_rate": 2.05e-05, "loss": 2.2341, "norm_diff": 0.0399, "num_tokens_overlap": 5.5798, "num_tokens_union": 54.9533, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27159.4526, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5009, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7267, "sent_len_1": 66.6721, "sent_len_max_0": 18.6863, "sent_len_max_1": 190.265, "stdk": 0.0444, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 63100 }, { "accuracy": 59.082, "doc_norm": 6.5426, "encoder_q-embeddings": 16134.5234, "encoder_q-layer.0": 11368.2715, "encoder_q-layer.1": 11933.2334, "encoder_q-layer.10": 20613.332, "encoder_q-layer.11": 38846.5352, "encoder_q-layer.2": 13540.6289, "encoder_q-layer.3": 13544.4043, "encoder_q-layer.4": 14268.4697, "encoder_q-layer.5": 13622.1016, "encoder_q-layer.6": 14391.8584, "encoder_q-layer.7": 15843.0342, "encoder_q-layer.8": 17536.0957, "encoder_q-layer.9": 16911.0605, "epoch": 0.62, "inbatch_neg_score": 41.0476, "inbatch_pos_score": 41.5625, "learning_rate": 2.0444444444444446e-05, "loss": 2.1996, "norm_diff": 0.0424, "num_tokens_overlap": 5.5636, "num_tokens_union": 55.0578, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26079.9558, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.5002, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6994, "sent_len_1": 66.8926, "sent_len_max_0": 18.855, "sent_len_max_1": 189.4638, "stdk": 0.0447, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 63200 }, { "accuracy": 57.3242, "doc_norm": 6.5426, "encoder_q-embeddings": 18031.918, "encoder_q-layer.0": 12453.7764, "encoder_q-layer.1": 13530.2109, "encoder_q-layer.10": 19912.5801, "encoder_q-layer.11": 42197.0703, "encoder_q-layer.2": 14502.7695, "encoder_q-layer.3": 14226.1045, "encoder_q-layer.4": 14880.1084, "encoder_q-layer.5": 14629.2324, "encoder_q-layer.6": 16002.7979, "encoder_q-layer.7": 16432.4082, "encoder_q-layer.8": 18192.9375, "encoder_q-layer.9": 16579.127, "epoch": 0.62, "inbatch_neg_score": 41.0493, "inbatch_pos_score": 41.5625, "learning_rate": 2.0388888888888892e-05, "loss": 2.2156, "norm_diff": 0.0406, "num_tokens_overlap": 5.5759, "num_tokens_union": 54.9838, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27990.1522, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5021, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7323, "sent_len_1": 66.75, "sent_len_max_0": 18.8287, "sent_len_max_1": 191.1037, "stdk": 0.0452, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 63300 }, { "accuracy": 56.9336, "doc_norm": 6.5421, "encoder_q-embeddings": 16554.3574, "encoder_q-layer.0": 12009.3779, "encoder_q-layer.1": 12414.3018, "encoder_q-layer.10": 23359.9375, "encoder_q-layer.11": 41110.9883, "encoder_q-layer.2": 13791.7002, "encoder_q-layer.3": 13548.6533, "encoder_q-layer.4": 13774.5059, "encoder_q-layer.5": 13649.5625, "encoder_q-layer.6": 14851.0068, "encoder_q-layer.7": 16178.8184, "encoder_q-layer.8": 18455.5254, "encoder_q-layer.9": 17664.5156, "epoch": 0.62, "inbatch_neg_score": 41.0573, "inbatch_pos_score": 41.5625, "learning_rate": 2.0333333333333334e-05, "loss": 2.2706, "norm_diff": 0.0412, "num_tokens_overlap": 5.5633, "num_tokens_union": 54.8374, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27587.6537, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.5009, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6886, "sent_len_1": 66.5165, "sent_len_max_0": 18.8625, "sent_len_max_1": 187.4863, "stdk": 0.0441, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 63400 }, { "accuracy": 55.0781, "doc_norm": 6.5399, "encoder_q-embeddings": 17378.0332, "encoder_q-layer.0": 12220.0645, "encoder_q-layer.1": 12713.4443, "encoder_q-layer.10": 19434.8145, "encoder_q-layer.11": 37426.7031, "encoder_q-layer.2": 13761.4219, "encoder_q-layer.3": 13407.7578, "encoder_q-layer.4": 13889.9297, "encoder_q-layer.5": 13805.9229, "encoder_q-layer.6": 14789.3467, "encoder_q-layer.7": 15532.1729, "encoder_q-layer.8": 17016.0215, "encoder_q-layer.9": 15940.1904, "epoch": 0.62, "inbatch_neg_score": 41.0502, "inbatch_pos_score": 41.5625, "learning_rate": 2.027777777777778e-05, "loss": 2.2392, "norm_diff": 0.0414, "num_tokens_overlap": 5.5739, "num_tokens_union": 54.9175, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26182.0386, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4984, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7124, "sent_len_1": 66.62, "sent_len_max_0": 18.875, "sent_len_max_1": 188.2725, "stdk": 0.0445, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 63500 }, { "accuracy": 57.9102, "doc_norm": 6.5443, "encoder_q-embeddings": 17280.4551, "encoder_q-layer.0": 12314.4893, "encoder_q-layer.1": 12698.2705, "encoder_q-layer.10": 20115.2285, "encoder_q-layer.11": 38063.6211, "encoder_q-layer.2": 13538.2266, "encoder_q-layer.3": 13737.1035, "encoder_q-layer.4": 14075.9014, "encoder_q-layer.5": 13882.7617, "encoder_q-layer.6": 15313.0264, "encoder_q-layer.7": 17494.9375, "encoder_q-layer.8": 18441.0176, "encoder_q-layer.9": 16924.2012, "epoch": 0.62, "inbatch_neg_score": 41.055, "inbatch_pos_score": 41.5625, "learning_rate": 2.0222222222222222e-05, "loss": 2.2201, "norm_diff": 0.0447, "num_tokens_overlap": 5.5768, "num_tokens_union": 55.0506, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26872.2313, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4996, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7089, "sent_len_1": 66.8265, "sent_len_max_0": 18.8525, "sent_len_max_1": 188.3388, "stdk": 0.0454, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 63600 }, { "accuracy": 58.3984, "doc_norm": 6.5397, "encoder_q-embeddings": 16547.7871, "encoder_q-layer.0": 12177.1064, "encoder_q-layer.1": 12491.5811, "encoder_q-layer.10": 20940.2246, "encoder_q-layer.11": 40856.0781, "encoder_q-layer.2": 13593.9375, "encoder_q-layer.3": 13838.1172, "encoder_q-layer.4": 14274.1582, "encoder_q-layer.5": 14453.5508, "encoder_q-layer.6": 15454.2871, "encoder_q-layer.7": 16414.7852, "encoder_q-layer.8": 18450.1875, "encoder_q-layer.9": 16296.2734, "epoch": 0.62, "inbatch_neg_score": 41.0209, "inbatch_pos_score": 41.5625, "learning_rate": 2.0166666666666668e-05, "loss": 2.2454, "norm_diff": 0.0409, "num_tokens_overlap": 5.567, "num_tokens_union": 55.0452, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27103.3703, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4988, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7196, "sent_len_1": 66.7937, "sent_len_max_0": 18.825, "sent_len_max_1": 189.2375, "stdk": 0.0447, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 63700 }, { "accuracy": 55.2734, "doc_norm": 6.5379, "encoder_q-embeddings": 17298.2598, "encoder_q-layer.0": 12336.2168, "encoder_q-layer.1": 12402.3584, "encoder_q-layer.10": 19252.293, "encoder_q-layer.11": 37523.2695, "encoder_q-layer.2": 13691.8965, "encoder_q-layer.3": 13613.9854, "encoder_q-layer.4": 14180.4453, "encoder_q-layer.5": 14206.8496, "encoder_q-layer.6": 15133.4678, "encoder_q-layer.7": 16005.2705, "encoder_q-layer.8": 17358.9863, "encoder_q-layer.9": 16149.2871, "epoch": 0.62, "inbatch_neg_score": 41.014, "inbatch_pos_score": 41.5312, "learning_rate": 2.011111111111111e-05, "loss": 2.2128, "norm_diff": 0.0437, "num_tokens_overlap": 5.5768, "num_tokens_union": 54.9563, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26330.9647, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4942, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7072, "sent_len_1": 66.7276, "sent_len_max_0": 18.7437, "sent_len_max_1": 189.075, "stdk": 0.0449, "stdq": 0.0373, "stdqueue_k": 0.0, "step": 63800 }, { "accuracy": 54.6875, "doc_norm": 6.5379, "encoder_q-embeddings": 17609.3984, "encoder_q-layer.0": 12545.0918, "encoder_q-layer.1": 12877.7871, "encoder_q-layer.10": 23453.3672, "encoder_q-layer.11": 41967.7109, "encoder_q-layer.2": 13983.1699, "encoder_q-layer.3": 13979.7012, "encoder_q-layer.4": 14132.0361, "encoder_q-layer.5": 14326.7803, "encoder_q-layer.6": 15241.6289, "encoder_q-layer.7": 16416.8516, "encoder_q-layer.8": 18868.3789, "encoder_q-layer.9": 17842.2598, "epoch": 0.62, "inbatch_neg_score": 40.9973, "inbatch_pos_score": 41.5, "learning_rate": 2.0055555555555556e-05, "loss": 2.2241, "norm_diff": 0.0412, "num_tokens_overlap": 5.5832, "num_tokens_union": 55.0384, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27820.4347, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4968, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7127, "sent_len_1": 66.8171, "sent_len_max_0": 18.91, "sent_len_max_1": 188.9187, "stdk": 0.0449, "stdq": 0.039, "stdqueue_k": 0.0, "step": 63900 }, { "accuracy": 56.0547, "doc_norm": 6.5367, "encoder_q-embeddings": 17246.791, "encoder_q-layer.0": 12554.9805, "encoder_q-layer.1": 12707.0635, "encoder_q-layer.10": 19875.707, "encoder_q-layer.11": 39636.1914, "encoder_q-layer.2": 13944.3447, "encoder_q-layer.3": 14429.9932, "encoder_q-layer.4": 14467.3154, "encoder_q-layer.5": 14109.9932, "encoder_q-layer.6": 15089.2598, "encoder_q-layer.7": 16241.0293, "encoder_q-layer.8": 18834.5137, "encoder_q-layer.9": 16373.4717, "epoch": 0.62, "inbatch_neg_score": 40.9628, "inbatch_pos_score": 41.5, "learning_rate": 2e-05, "loss": 2.2523, "norm_diff": 0.044, "num_tokens_overlap": 5.5725, "num_tokens_union": 54.9269, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27164.0411, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4927, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6953, "sent_len_1": 66.6869, "sent_len_max_0": 18.9125, "sent_len_max_1": 188.9013, "stdk": 0.0445, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 64000 }, { "accuracy": 52.3438, "doc_norm": 6.5356, "encoder_q-embeddings": 18446.8574, "encoder_q-layer.0": 12950.2959, "encoder_q-layer.1": 13240.1562, "encoder_q-layer.10": 24835.8594, "encoder_q-layer.11": 45475.0547, "encoder_q-layer.2": 14407.0156, "encoder_q-layer.3": 14644.3242, "encoder_q-layer.4": 15080.0947, "encoder_q-layer.5": 15169.668, "encoder_q-layer.6": 16028.6064, "encoder_q-layer.7": 17461.8945, "encoder_q-layer.8": 20020.6074, "encoder_q-layer.9": 18429.9102, "epoch": 0.63, "inbatch_neg_score": 40.9915, "inbatch_pos_score": 41.5, "learning_rate": 1.9944444444444447e-05, "loss": 2.2356, "norm_diff": 0.0418, "num_tokens_overlap": 5.5735, "num_tokens_union": 54.9826, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29353.3032, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4938, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6999, "sent_len_1": 66.7033, "sent_len_max_0": 18.8238, "sent_len_max_1": 189.375, "stdk": 0.0441, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 64100 }, { "accuracy": 58.4961, "doc_norm": 6.538, "encoder_q-embeddings": 17353.1465, "encoder_q-layer.0": 12123.6084, "encoder_q-layer.1": 12828.0322, "encoder_q-layer.10": 20938.998, "encoder_q-layer.11": 38386.6289, "encoder_q-layer.2": 13875.0742, "encoder_q-layer.3": 13634.9443, "encoder_q-layer.4": 14415.7822, "encoder_q-layer.5": 14210.5234, "encoder_q-layer.6": 14850.1719, "encoder_q-layer.7": 15622.9746, "encoder_q-layer.8": 18021.4707, "encoder_q-layer.9": 16532.1055, "epoch": 0.63, "inbatch_neg_score": 40.9629, "inbatch_pos_score": 41.5, "learning_rate": 1.988888888888889e-05, "loss": 2.2497, "norm_diff": 0.0454, "num_tokens_overlap": 5.5645, "num_tokens_union": 54.8593, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26788.681, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4926, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7061, "sent_len_1": 66.5066, "sent_len_max_0": 18.8212, "sent_len_max_1": 189.2512, "stdk": 0.0457, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 64200 }, { "accuracy": 58.4961, "doc_norm": 6.5386, "encoder_q-embeddings": 18030.8652, "encoder_q-layer.0": 12442.8418, "encoder_q-layer.1": 12820.7344, "encoder_q-layer.10": 21326.5332, "encoder_q-layer.11": 39250.8789, "encoder_q-layer.2": 14107.0117, "encoder_q-layer.3": 13931.8232, "encoder_q-layer.4": 14249.3457, "encoder_q-layer.5": 14089.1787, "encoder_q-layer.6": 15030.0938, "encoder_q-layer.7": 16316.0, "encoder_q-layer.8": 17714.9453, "encoder_q-layer.9": 16405.9434, "epoch": 0.63, "inbatch_neg_score": 40.9738, "inbatch_pos_score": 41.5, "learning_rate": 1.9833333333333335e-05, "loss": 2.2132, "norm_diff": 0.0432, "num_tokens_overlap": 5.5929, "num_tokens_union": 54.9735, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27195.5885, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4954, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7265, "sent_len_1": 66.7567, "sent_len_max_0": 18.8263, "sent_len_max_1": 188.8212, "stdk": 0.0461, "stdq": 0.0395, "stdqueue_k": 0.0, "step": 64300 }, { "accuracy": 61.1328, "doc_norm": 6.534, "encoder_q-embeddings": 17406.416, "encoder_q-layer.0": 12500.6504, "encoder_q-layer.1": 12569.6865, "encoder_q-layer.10": 25829.8828, "encoder_q-layer.11": 41886.4531, "encoder_q-layer.2": 13645.9092, "encoder_q-layer.3": 13881.7617, "encoder_q-layer.4": 14394.9004, "encoder_q-layer.5": 14552.1006, "encoder_q-layer.6": 15836.8867, "encoder_q-layer.7": 18168.5312, "encoder_q-layer.8": 19385.5449, "encoder_q-layer.9": 17834.791, "epoch": 0.63, "inbatch_neg_score": 40.9584, "inbatch_pos_score": 41.5, "learning_rate": 1.9777777777777778e-05, "loss": 2.208, "norm_diff": 0.042, "num_tokens_overlap": 5.5822, "num_tokens_union": 55.0951, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28628.759, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.492, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7147, "sent_len_1": 66.9664, "sent_len_max_0": 18.8575, "sent_len_max_1": 190.4062, "stdk": 0.0448, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 64400 }, { "accuracy": 56.9336, "doc_norm": 6.5328, "encoder_q-embeddings": 18085.7363, "encoder_q-layer.0": 12446.0508, "encoder_q-layer.1": 12931.0273, "encoder_q-layer.10": 21113.0078, "encoder_q-layer.11": 39704.293, "encoder_q-layer.2": 13820.2021, "encoder_q-layer.3": 14429.9609, "encoder_q-layer.4": 15041.1348, "encoder_q-layer.5": 15198.8213, "encoder_q-layer.6": 15987.2061, "encoder_q-layer.7": 16096.0293, "encoder_q-layer.8": 18245.8574, "encoder_q-layer.9": 16957.3457, "epoch": 0.63, "inbatch_neg_score": 40.978, "inbatch_pos_score": 41.5, "learning_rate": 1.9722222222222224e-05, "loss": 2.246, "norm_diff": 0.0405, "num_tokens_overlap": 5.5916, "num_tokens_union": 54.9574, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27584.7541, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4923, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7309, "sent_len_1": 66.7564, "sent_len_max_0": 18.9213, "sent_len_max_1": 188.6275, "stdk": 0.0449, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 64500 }, { "accuracy": 56.4453, "doc_norm": 6.5334, "encoder_q-embeddings": 17675.8613, "encoder_q-layer.0": 12331.8916, "encoder_q-layer.1": 12890.3711, "encoder_q-layer.10": 19149.3438, "encoder_q-layer.11": 36769.0195, "encoder_q-layer.2": 13879.5703, "encoder_q-layer.3": 13786.9609, "encoder_q-layer.4": 14294.9658, "encoder_q-layer.5": 13704.4199, "encoder_q-layer.6": 14614.9072, "encoder_q-layer.7": 15718.9092, "encoder_q-layer.8": 17200.877, "encoder_q-layer.9": 16264.5283, "epoch": 0.63, "inbatch_neg_score": 40.9389, "inbatch_pos_score": 41.4375, "learning_rate": 1.9666666666666666e-05, "loss": 2.2417, "norm_diff": 0.0444, "num_tokens_overlap": 5.5619, "num_tokens_union": 54.9698, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26226.277, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.489, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6932, "sent_len_1": 66.7428, "sent_len_max_0": 18.785, "sent_len_max_1": 189.8825, "stdk": 0.0453, "stdq": 0.0377, "stdqueue_k": 0.0, "step": 64600 }, { "accuracy": 59.375, "doc_norm": 6.5306, "encoder_q-embeddings": 16902.8711, "encoder_q-layer.0": 11615.3359, "encoder_q-layer.1": 12103.1963, "encoder_q-layer.10": 21230.0078, "encoder_q-layer.11": 42053.5703, "encoder_q-layer.2": 13265.0244, "encoder_q-layer.3": 13391.4551, "encoder_q-layer.4": 14000.8604, "encoder_q-layer.5": 13645.5371, "encoder_q-layer.6": 14297.332, "encoder_q-layer.7": 14906.5068, "encoder_q-layer.8": 17220.5117, "encoder_q-layer.9": 15703.5918, "epoch": 0.63, "inbatch_neg_score": 40.9155, "inbatch_pos_score": 41.4375, "learning_rate": 1.9611111111111115e-05, "loss": 2.1839, "norm_diff": 0.0423, "num_tokens_overlap": 5.5796, "num_tokens_union": 55.161, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26709.8011, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4882, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7155, "sent_len_1": 67.022, "sent_len_max_0": 18.8712, "sent_len_max_1": 190.8725, "stdk": 0.0448, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 64700 }, { "accuracy": 56.3477, "doc_norm": 6.5295, "encoder_q-embeddings": 17509.7715, "encoder_q-layer.0": 12367.5215, "encoder_q-layer.1": 12965.123, "encoder_q-layer.10": 20424.1055, "encoder_q-layer.11": 38123.5508, "encoder_q-layer.2": 14063.2061, "encoder_q-layer.3": 13946.2207, "encoder_q-layer.4": 14772.2715, "encoder_q-layer.5": 14130.0547, "encoder_q-layer.6": 15620.0312, "encoder_q-layer.7": 17130.7051, "encoder_q-layer.8": 18643.6797, "encoder_q-layer.9": 16455.7617, "epoch": 0.63, "inbatch_neg_score": 40.8923, "inbatch_pos_score": 41.4062, "learning_rate": 1.9555555555555557e-05, "loss": 2.2391, "norm_diff": 0.0428, "num_tokens_overlap": 5.5833, "num_tokens_union": 54.9343, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27090.5052, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4867, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7283, "sent_len_1": 66.6384, "sent_len_max_0": 18.8062, "sent_len_max_1": 187.8787, "stdk": 0.045, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 64800 }, { "accuracy": 58.3008, "doc_norm": 6.5258, "encoder_q-embeddings": 17149.1953, "encoder_q-layer.0": 12024.7627, "encoder_q-layer.1": 12254.7256, "encoder_q-layer.10": 23454.666, "encoder_q-layer.11": 41674.6602, "encoder_q-layer.2": 13323.2852, "encoder_q-layer.3": 13329.4736, "encoder_q-layer.4": 13789.4561, "encoder_q-layer.5": 13795.0732, "encoder_q-layer.6": 15613.0771, "encoder_q-layer.7": 17278.7168, "encoder_q-layer.8": 18167.4688, "encoder_q-layer.9": 16419.8613, "epoch": 0.63, "inbatch_neg_score": 40.8598, "inbatch_pos_score": 41.375, "learning_rate": 1.9500000000000003e-05, "loss": 2.1914, "norm_diff": 0.0422, "num_tokens_overlap": 5.5592, "num_tokens_union": 54.9815, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27401.78, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4836, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6924, "sent_len_1": 66.7664, "sent_len_max_0": 18.8513, "sent_len_max_1": 191.1513, "stdk": 0.045, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 64900 }, { "accuracy": 55.0781, "doc_norm": 6.5231, "encoder_q-embeddings": 18682.4648, "encoder_q-layer.0": 12610.0908, "encoder_q-layer.1": 13258.4238, "encoder_q-layer.10": 20762.957, "encoder_q-layer.11": 42824.625, "encoder_q-layer.2": 14372.0293, "encoder_q-layer.3": 14375.0684, "encoder_q-layer.4": 14572.5977, "encoder_q-layer.5": 14531.5957, "encoder_q-layer.6": 16011.6719, "encoder_q-layer.7": 16904.8477, "encoder_q-layer.8": 18592.918, "encoder_q-layer.9": 17570.8203, "epoch": 0.63, "inbatch_neg_score": 40.8676, "inbatch_pos_score": 41.375, "learning_rate": 1.9444444444444445e-05, "loss": 2.2347, "norm_diff": 0.0398, "num_tokens_overlap": 5.5732, "num_tokens_union": 55.1226, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28437.8991, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4833, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7209, "sent_len_1": 66.8962, "sent_len_max_0": 18.7925, "sent_len_max_1": 190.3262, "stdk": 0.0427, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 65000 }, { "accuracy": 60.9375, "doc_norm": 6.527, "encoder_q-embeddings": 16748.5371, "encoder_q-layer.0": 12214.5938, "encoder_q-layer.1": 12830.7471, "encoder_q-layer.10": 18982.5469, "encoder_q-layer.11": 38230.3359, "encoder_q-layer.2": 13479.9375, "encoder_q-layer.3": 13433.0898, "encoder_q-layer.4": 14230.8975, "encoder_q-layer.5": 13317.2031, "encoder_q-layer.6": 14113.2178, "encoder_q-layer.7": 14931.9805, "encoder_q-layer.8": 16388.4863, "encoder_q-layer.9": 15385.7227, "epoch": 0.64, "inbatch_neg_score": 40.8528, "inbatch_pos_score": 41.375, "learning_rate": 1.938888888888889e-05, "loss": 2.2127, "norm_diff": 0.0415, "num_tokens_overlap": 5.5737, "num_tokens_union": 55.0705, "postclip_grad_norm": 1.0, "preclip_grad_norm": 25641.1425, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4855, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6852, "sent_len_1": 66.8888, "sent_len_max_0": 18.8463, "sent_len_max_1": 188.705, "stdk": 0.0453, "stdq": 0.039, "stdqueue_k": 0.0, "step": 65100 }, { "accuracy": 58.2031, "doc_norm": 6.5298, "encoder_q-embeddings": 17910.0156, "encoder_q-layer.0": 12281.6191, "encoder_q-layer.1": 13013.084, "encoder_q-layer.10": 20496.3711, "encoder_q-layer.11": 41589.3359, "encoder_q-layer.2": 14136.502, "encoder_q-layer.3": 14198.8994, "encoder_q-layer.4": 14662.6338, "encoder_q-layer.5": 15156.7881, "encoder_q-layer.6": 15213.7451, "encoder_q-layer.7": 15656.5684, "encoder_q-layer.8": 17945.918, "encoder_q-layer.9": 16503.252, "epoch": 0.64, "inbatch_neg_score": 40.8615, "inbatch_pos_score": 41.375, "learning_rate": 1.9333333333333333e-05, "loss": 2.2424, "norm_diff": 0.0446, "num_tokens_overlap": 5.5857, "num_tokens_union": 55.1806, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27705.4028, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4852, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7383, "sent_len_1": 67.0746, "sent_len_max_0": 18.815, "sent_len_max_1": 190.7025, "stdk": 0.0453, "stdq": 0.039, "stdqueue_k": 0.0, "step": 65200 }, { "accuracy": 55.5664, "doc_norm": 6.5252, "encoder_q-embeddings": 18765.1816, "encoder_q-layer.0": 13377.6299, "encoder_q-layer.1": 13522.5117, "encoder_q-layer.10": 22841.6289, "encoder_q-layer.11": 44112.7305, "encoder_q-layer.2": 14371.1602, "encoder_q-layer.3": 14291.9219, "encoder_q-layer.4": 14410.2695, "encoder_q-layer.5": 14011.1064, "encoder_q-layer.6": 14983.1055, "encoder_q-layer.7": 15725.1182, "encoder_q-layer.8": 17831.1738, "encoder_q-layer.9": 17481.1699, "epoch": 0.64, "inbatch_neg_score": 40.8143, "inbatch_pos_score": 41.3438, "learning_rate": 1.927777777777778e-05, "loss": 2.2373, "norm_diff": 0.0431, "num_tokens_overlap": 5.5783, "num_tokens_union": 54.9754, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29018.4362, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4821, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7099, "sent_len_1": 66.7669, "sent_len_max_0": 18.9337, "sent_len_max_1": 190.65, "stdk": 0.0449, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 65300 }, { "accuracy": 56.0547, "doc_norm": 6.5264, "encoder_q-embeddings": 17952.9609, "encoder_q-layer.0": 13087.293, "encoder_q-layer.1": 13341.5596, "encoder_q-layer.10": 20198.6973, "encoder_q-layer.11": 37252.9062, "encoder_q-layer.2": 14467.7305, "encoder_q-layer.3": 15122.4883, "encoder_q-layer.4": 15221.8271, "encoder_q-layer.5": 14861.9746, "encoder_q-layer.6": 15772.5762, "encoder_q-layer.7": 16412.9941, "encoder_q-layer.8": 18606.8398, "encoder_q-layer.9": 17133.4902, "epoch": 0.64, "inbatch_neg_score": 40.8065, "inbatch_pos_score": 41.3125, "learning_rate": 1.922222222222222e-05, "loss": 2.2526, "norm_diff": 0.0435, "num_tokens_overlap": 5.5699, "num_tokens_union": 54.9976, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27488.5472, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4829, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7288, "sent_len_1": 66.7313, "sent_len_max_0": 18.9475, "sent_len_max_1": 188.605, "stdk": 0.0462, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 65400 }, { "accuracy": 57.8125, "doc_norm": 6.5226, "encoder_q-embeddings": 16658.9805, "encoder_q-layer.0": 11979.8867, "encoder_q-layer.1": 12508.5127, "encoder_q-layer.10": 21356.4082, "encoder_q-layer.11": 39540.9766, "encoder_q-layer.2": 13625.8662, "encoder_q-layer.3": 13783.3369, "encoder_q-layer.4": 14262.1455, "encoder_q-layer.5": 14130.3262, "encoder_q-layer.6": 15321.583, "encoder_q-layer.7": 17100.3906, "encoder_q-layer.8": 18204.2402, "encoder_q-layer.9": 16852.7773, "epoch": 0.64, "inbatch_neg_score": 40.8275, "inbatch_pos_score": 41.3438, "learning_rate": 1.9166666666666667e-05, "loss": 2.2384, "norm_diff": 0.042, "num_tokens_overlap": 5.5736, "num_tokens_union": 55.0706, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26906.3902, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4807, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7136, "sent_len_1": 66.8483, "sent_len_max_0": 18.7725, "sent_len_max_1": 187.2775, "stdk": 0.0431, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 65500 }, { "accuracy": 58.0078, "doc_norm": 6.5258, "encoder_q-embeddings": 17353.7891, "encoder_q-layer.0": 12201.3867, "encoder_q-layer.1": 13026.8301, "encoder_q-layer.10": 22862.1641, "encoder_q-layer.11": 39987.8047, "encoder_q-layer.2": 14003.2305, "encoder_q-layer.3": 13806.915, "encoder_q-layer.4": 14530.873, "encoder_q-layer.5": 14817.5225, "encoder_q-layer.6": 15072.6191, "encoder_q-layer.7": 17248.5859, "encoder_q-layer.8": 17633.127, "encoder_q-layer.9": 16677.8984, "epoch": 0.64, "inbatch_neg_score": 40.8459, "inbatch_pos_score": 41.375, "learning_rate": 1.9111111111111113e-05, "loss": 2.1857, "norm_diff": 0.0423, "num_tokens_overlap": 5.5828, "num_tokens_union": 54.9914, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27074.4278, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4836, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7192, "sent_len_1": 66.7316, "sent_len_max_0": 18.92, "sent_len_max_1": 188.495, "stdk": 0.0455, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 65600 }, { "accuracy": 55.6641, "doc_norm": 6.5236, "encoder_q-embeddings": 17624.748, "encoder_q-layer.0": 12286.71, "encoder_q-layer.1": 12827.3125, "encoder_q-layer.10": 21005.252, "encoder_q-layer.11": 37489.1953, "encoder_q-layer.2": 14005.2363, "encoder_q-layer.3": 13914.7129, "encoder_q-layer.4": 14134.8623, "encoder_q-layer.5": 14053.1904, "encoder_q-layer.6": 14721.7207, "encoder_q-layer.7": 15962.8525, "encoder_q-layer.8": 17786.0957, "encoder_q-layer.9": 16692.5156, "epoch": 0.64, "inbatch_neg_score": 40.8356, "inbatch_pos_score": 41.3438, "learning_rate": 1.905555555555556e-05, "loss": 2.2381, "norm_diff": 0.0416, "num_tokens_overlap": 5.5833, "num_tokens_union": 54.9882, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26664.8498, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4821, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7282, "sent_len_1": 66.7794, "sent_len_max_0": 18.7688, "sent_len_max_1": 192.2225, "stdk": 0.0457, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 65700 }, { "accuracy": 58.0078, "doc_norm": 6.5224, "encoder_q-embeddings": 16765.1621, "encoder_q-layer.0": 12278.2139, "encoder_q-layer.1": 12669.71, "encoder_q-layer.10": 19027.2305, "encoder_q-layer.11": 38122.6367, "encoder_q-layer.2": 13611.1367, "encoder_q-layer.3": 13745.4355, "encoder_q-layer.4": 14706.6016, "encoder_q-layer.5": 13966.5439, "encoder_q-layer.6": 14907.3574, "encoder_q-layer.7": 16312.6748, "encoder_q-layer.8": 18073.1562, "encoder_q-layer.9": 16582.2598, "epoch": 0.64, "inbatch_neg_score": 40.7868, "inbatch_pos_score": 41.3125, "learning_rate": 1.9e-05, "loss": 2.2189, "norm_diff": 0.0434, "num_tokens_overlap": 5.5749, "num_tokens_union": 55.0374, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26412.5904, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.479, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7303, "sent_len_1": 66.7722, "sent_len_max_0": 18.8762, "sent_len_max_1": 190.8562, "stdk": 0.0446, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 65800 }, { "accuracy": 55.2734, "doc_norm": 6.5216, "encoder_q-embeddings": 18165.0762, "encoder_q-layer.0": 12685.3301, "encoder_q-layer.1": 13184.1553, "encoder_q-layer.10": 22874.0898, "encoder_q-layer.11": 40454.7969, "encoder_q-layer.2": 14556.0469, "encoder_q-layer.3": 14204.1152, "encoder_q-layer.4": 14668.2842, "encoder_q-layer.5": 14965.1602, "encoder_q-layer.6": 15819.4014, "encoder_q-layer.7": 16764.2598, "encoder_q-layer.8": 18660.8633, "encoder_q-layer.9": 18037.2188, "epoch": 0.64, "inbatch_neg_score": 40.7916, "inbatch_pos_score": 41.2812, "learning_rate": 1.8944444444444447e-05, "loss": 2.2466, "norm_diff": 0.0445, "num_tokens_overlap": 5.5702, "num_tokens_union": 55.0317, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28047.0174, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4771, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.723, "sent_len_1": 66.7462, "sent_len_max_0": 18.815, "sent_len_max_1": 187.56, "stdk": 0.0456, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 65900 }, { "accuracy": 60.5469, "doc_norm": 6.5226, "encoder_q-embeddings": 16543.2715, "encoder_q-layer.0": 11891.0371, "encoder_q-layer.1": 12448.5596, "encoder_q-layer.10": 19982.2324, "encoder_q-layer.11": 41761.1719, "encoder_q-layer.2": 13644.2871, "encoder_q-layer.3": 13468.3408, "encoder_q-layer.4": 14360.541, "encoder_q-layer.5": 14308.7988, "encoder_q-layer.6": 15448.7188, "encoder_q-layer.7": 16300.0723, "encoder_q-layer.8": 18313.0137, "encoder_q-layer.9": 17016.8574, "epoch": 0.64, "inbatch_neg_score": 40.7701, "inbatch_pos_score": 41.2812, "learning_rate": 1.888888888888889e-05, "loss": 2.2522, "norm_diff": 0.0437, "num_tokens_overlap": 5.5914, "num_tokens_union": 55.0748, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26951.1874, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4788, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7413, "sent_len_1": 66.8817, "sent_len_max_0": 18.8412, "sent_len_max_1": 190.005, "stdk": 0.0442, "stdq": 0.039, "stdqueue_k": 0.0, "step": 66000 }, { "accuracy": 56.6406, "doc_norm": 6.5176, "encoder_q-embeddings": 17788.9004, "encoder_q-layer.0": 12659.5439, "encoder_q-layer.1": 13449.4648, "encoder_q-layer.10": 21966.6602, "encoder_q-layer.11": 40545.8789, "encoder_q-layer.2": 14357.4629, "encoder_q-layer.3": 14481.21, "encoder_q-layer.4": 14427.0352, "encoder_q-layer.5": 14294.6514, "encoder_q-layer.6": 15051.8486, "encoder_q-layer.7": 16342.9443, "encoder_q-layer.8": 18313.6699, "encoder_q-layer.9": 17256.293, "epoch": 0.65, "inbatch_neg_score": 40.7707, "inbatch_pos_score": 41.2812, "learning_rate": 1.8833333333333335e-05, "loss": 2.2202, "norm_diff": 0.0401, "num_tokens_overlap": 5.5838, "num_tokens_union": 54.9208, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27895.7931, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4774, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7374, "sent_len_1": 66.6657, "sent_len_max_0": 18.8687, "sent_len_max_1": 189.94, "stdk": 0.0435, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 66100 }, { "accuracy": 57.6172, "doc_norm": 6.5248, "encoder_q-embeddings": 17684.8613, "encoder_q-layer.0": 12437.6836, "encoder_q-layer.1": 13119.4805, "encoder_q-layer.10": 21263.0254, "encoder_q-layer.11": 41445.4219, "encoder_q-layer.2": 14348.0186, "encoder_q-layer.3": 14229.3359, "encoder_q-layer.4": 14299.0654, "encoder_q-layer.5": 14228.7119, "encoder_q-layer.6": 15195.999, "encoder_q-layer.7": 16381.9141, "encoder_q-layer.8": 18565.6582, "encoder_q-layer.9": 16980.834, "epoch": 0.65, "inbatch_neg_score": 40.7471, "inbatch_pos_score": 41.2812, "learning_rate": 1.8777777777777777e-05, "loss": 2.2156, "norm_diff": 0.0471, "num_tokens_overlap": 5.5696, "num_tokens_union": 55.0685, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27755.2664, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4778, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7147, "sent_len_1": 66.8827, "sent_len_max_0": 18.8487, "sent_len_max_1": 189.9487, "stdk": 0.0473, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 66200 }, { "accuracy": 57.4219, "doc_norm": 6.5191, "encoder_q-embeddings": 16359.5039, "encoder_q-layer.0": 11845.4199, "encoder_q-layer.1": 12327.5547, "encoder_q-layer.10": 20368.377, "encoder_q-layer.11": 40173.1172, "encoder_q-layer.2": 13271.0088, "encoder_q-layer.3": 13132.2148, "encoder_q-layer.4": 13817.2783, "encoder_q-layer.5": 13755.7773, "encoder_q-layer.6": 14642.9424, "encoder_q-layer.7": 15452.6211, "encoder_q-layer.8": 17522.1094, "encoder_q-layer.9": 15908.8564, "epoch": 0.65, "inbatch_neg_score": 40.7244, "inbatch_pos_score": 41.25, "learning_rate": 1.8722222222222223e-05, "loss": 2.216, "norm_diff": 0.0466, "num_tokens_overlap": 5.5755, "num_tokens_union": 55.0883, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26488.1419, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4725, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7115, "sent_len_1": 66.9354, "sent_len_max_0": 18.98, "sent_len_max_1": 190.7337, "stdk": 0.0455, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 66300 }, { "accuracy": 56.6406, "doc_norm": 6.5168, "encoder_q-embeddings": 16620.8125, "encoder_q-layer.0": 12146.9863, "encoder_q-layer.1": 12163.4756, "encoder_q-layer.10": 19921.5684, "encoder_q-layer.11": 39952.4219, "encoder_q-layer.2": 13162.4414, "encoder_q-layer.3": 13358.8965, "encoder_q-layer.4": 13756.4072, "encoder_q-layer.5": 13442.7451, "encoder_q-layer.6": 14235.834, "encoder_q-layer.7": 15320.9238, "encoder_q-layer.8": 18214.2246, "encoder_q-layer.9": 15728.0488, "epoch": 0.65, "inbatch_neg_score": 40.734, "inbatch_pos_score": 41.25, "learning_rate": 1.866666666666667e-05, "loss": 2.2332, "norm_diff": 0.0439, "num_tokens_overlap": 5.5901, "num_tokens_union": 55.0032, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26417.5207, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4729, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7248, "sent_len_1": 66.7407, "sent_len_max_0": 18.8925, "sent_len_max_1": 189.4462, "stdk": 0.045, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 66400 }, { "accuracy": 57.7148, "doc_norm": 6.512, "encoder_q-embeddings": 17520.2559, "encoder_q-layer.0": 12379.2363, "encoder_q-layer.1": 12832.1377, "encoder_q-layer.10": 20043.2676, "encoder_q-layer.11": 38065.0078, "encoder_q-layer.2": 14096.1016, "encoder_q-layer.3": 13868.8652, "encoder_q-layer.4": 14505.1211, "encoder_q-layer.5": 14299.9443, "encoder_q-layer.6": 15155.4043, "encoder_q-layer.7": 15747.4551, "encoder_q-layer.8": 18050.4336, "encoder_q-layer.9": 16243.9355, "epoch": 0.65, "inbatch_neg_score": 40.6976, "inbatch_pos_score": 41.2188, "learning_rate": 1.861111111111111e-05, "loss": 2.2397, "norm_diff": 0.0405, "num_tokens_overlap": 5.5736, "num_tokens_union": 54.942, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26605.6909, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4716, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7015, "sent_len_1": 66.7164, "sent_len_max_0": 18.8925, "sent_len_max_1": 191.9025, "stdk": 0.044, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 66500 }, { "accuracy": 56.543, "doc_norm": 6.5136, "encoder_q-embeddings": 17956.3242, "encoder_q-layer.0": 12568.7783, "encoder_q-layer.1": 13359.9326, "encoder_q-layer.10": 20579.0117, "encoder_q-layer.11": 42791.125, "encoder_q-layer.2": 14336.5645, "encoder_q-layer.3": 14514.3086, "encoder_q-layer.4": 14864.8857, "encoder_q-layer.5": 14124.3213, "encoder_q-layer.6": 15688.4775, "encoder_q-layer.7": 16080.9971, "encoder_q-layer.8": 17939.3398, "encoder_q-layer.9": 16105.9678, "epoch": 0.65, "inbatch_neg_score": 40.6903, "inbatch_pos_score": 41.1875, "learning_rate": 1.8555555555555557e-05, "loss": 2.2378, "norm_diff": 0.0427, "num_tokens_overlap": 5.5647, "num_tokens_union": 54.9794, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27988.8834, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4709, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6901, "sent_len_1": 66.8065, "sent_len_max_0": 18.815, "sent_len_max_1": 190.3675, "stdk": 0.0453, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 66600 }, { "accuracy": 59.082, "doc_norm": 6.5155, "encoder_q-embeddings": 16421.4434, "encoder_q-layer.0": 11880.8701, "encoder_q-layer.1": 12282.8408, "encoder_q-layer.10": 22831.2363, "encoder_q-layer.11": 38125.9414, "encoder_q-layer.2": 13383.4541, "encoder_q-layer.3": 13616.6924, "encoder_q-layer.4": 13869.2998, "encoder_q-layer.5": 13655.585, "encoder_q-layer.6": 15016.5527, "encoder_q-layer.7": 15809.8936, "encoder_q-layer.8": 18065.918, "encoder_q-layer.9": 18085.7441, "epoch": 0.65, "inbatch_neg_score": 40.7258, "inbatch_pos_score": 41.25, "learning_rate": 1.85e-05, "loss": 2.2261, "norm_diff": 0.0421, "num_tokens_overlap": 5.5673, "num_tokens_union": 55.0424, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26696.8311, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4734, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7071, "sent_len_1": 66.8928, "sent_len_max_0": 18.8687, "sent_len_max_1": 191.4825, "stdk": 0.0436, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 66700 }, { "accuracy": 56.543, "doc_norm": 6.5161, "encoder_q-embeddings": 16990.293, "encoder_q-layer.0": 12260.7891, "encoder_q-layer.1": 12892.9912, "encoder_q-layer.10": 19836.0312, "encoder_q-layer.11": 38623.5039, "encoder_q-layer.2": 13740.3535, "encoder_q-layer.3": 13977.0654, "encoder_q-layer.4": 14538.8662, "encoder_q-layer.5": 14408.4092, "encoder_q-layer.6": 15238.6934, "encoder_q-layer.7": 16574.4336, "encoder_q-layer.8": 18023.8535, "encoder_q-layer.9": 16424.998, "epoch": 0.65, "inbatch_neg_score": 40.6838, "inbatch_pos_score": 41.2188, "learning_rate": 1.8444444444444445e-05, "loss": 2.2434, "norm_diff": 0.0435, "num_tokens_overlap": 5.5937, "num_tokens_union": 55.0409, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26974.4937, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4726, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7346, "sent_len_1": 66.82, "sent_len_max_0": 18.8075, "sent_len_max_1": 191.24, "stdk": 0.0455, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 66800 }, { "accuracy": 56.25, "doc_norm": 6.5154, "encoder_q-embeddings": 17556.6172, "encoder_q-layer.0": 12338.7002, "encoder_q-layer.1": 12735.3623, "encoder_q-layer.10": 21963.0645, "encoder_q-layer.11": 46558.668, "encoder_q-layer.2": 13685.5801, "encoder_q-layer.3": 14231.75, "encoder_q-layer.4": 14430.2803, "encoder_q-layer.5": 14049.6289, "encoder_q-layer.6": 15090.9746, "encoder_q-layer.7": 16645.0938, "encoder_q-layer.8": 18341.2168, "encoder_q-layer.9": 16953.7793, "epoch": 0.65, "inbatch_neg_score": 40.7263, "inbatch_pos_score": 41.2188, "learning_rate": 1.838888888888889e-05, "loss": 2.2449, "norm_diff": 0.042, "num_tokens_overlap": 5.5702, "num_tokens_union": 54.9757, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28578.5878, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4734, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7227, "sent_len_1": 66.8258, "sent_len_max_0": 18.7625, "sent_len_max_1": 192.44, "stdk": 0.0448, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 66900 }, { "accuracy": 58.7891, "doc_norm": 6.5191, "encoder_q-embeddings": 17750.3789, "encoder_q-layer.0": 12650.6592, "encoder_q-layer.1": 13224.2539, "encoder_q-layer.10": 20323.6875, "encoder_q-layer.11": 41859.5508, "encoder_q-layer.2": 14161.0098, "encoder_q-layer.3": 14002.0625, "encoder_q-layer.4": 14196.7852, "encoder_q-layer.5": 13819.6641, "encoder_q-layer.6": 14723.1816, "encoder_q-layer.7": 16102.1309, "encoder_q-layer.8": 18096.8047, "encoder_q-layer.9": 16480.8906, "epoch": 0.65, "inbatch_neg_score": 40.701, "inbatch_pos_score": 41.2188, "learning_rate": 1.8333333333333333e-05, "loss": 2.2657, "norm_diff": 0.0456, "num_tokens_overlap": 5.5725, "num_tokens_union": 54.9458, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27530.7622, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4736, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7089, "sent_len_1": 66.7202, "sent_len_max_0": 18.8175, "sent_len_max_1": 189.2862, "stdk": 0.0461, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 67000 }, { "accuracy": 56.6406, "doc_norm": 6.5162, "encoder_q-embeddings": 17334.6484, "encoder_q-layer.0": 12390.3574, "encoder_q-layer.1": 13025.0391, "encoder_q-layer.10": 20472.6934, "encoder_q-layer.11": 41437.207, "encoder_q-layer.2": 14154.3213, "encoder_q-layer.3": 14134.5322, "encoder_q-layer.4": 14691.8955, "encoder_q-layer.5": 14531.4756, "encoder_q-layer.6": 15515.0215, "encoder_q-layer.7": 16484.4785, "encoder_q-layer.8": 18187.1367, "encoder_q-layer.9": 16021.666, "epoch": 0.66, "inbatch_neg_score": 40.7013, "inbatch_pos_score": 41.2188, "learning_rate": 1.827777777777778e-05, "loss": 2.2451, "norm_diff": 0.0432, "num_tokens_overlap": 5.5848, "num_tokens_union": 55.1353, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27727.1641, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4731, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7176, "sent_len_1": 67.0032, "sent_len_max_0": 18.785, "sent_len_max_1": 190.9812, "stdk": 0.0457, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 67100 }, { "accuracy": 56.8359, "doc_norm": 6.5145, "encoder_q-embeddings": 17925.1855, "encoder_q-layer.0": 12854.9082, "encoder_q-layer.1": 13304.0752, "encoder_q-layer.10": 23823.2344, "encoder_q-layer.11": 43467.7188, "encoder_q-layer.2": 14201.1182, "encoder_q-layer.3": 14068.7354, "encoder_q-layer.4": 14466.8877, "encoder_q-layer.5": 14344.0879, "encoder_q-layer.6": 15316.6475, "encoder_q-layer.7": 16806.4922, "encoder_q-layer.8": 19517.5586, "encoder_q-layer.9": 17189.8652, "epoch": 0.66, "inbatch_neg_score": 40.6661, "inbatch_pos_score": 41.1875, "learning_rate": 1.8222222222222224e-05, "loss": 2.1918, "norm_diff": 0.0443, "num_tokens_overlap": 5.5851, "num_tokens_union": 54.9346, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28460.8649, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4703, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7077, "sent_len_1": 66.6759, "sent_len_max_0": 18.8962, "sent_len_max_1": 188.615, "stdk": 0.0452, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 67200 }, { "accuracy": 58.3984, "doc_norm": 6.5166, "encoder_q-embeddings": 17787.9766, "encoder_q-layer.0": 12537.6982, "encoder_q-layer.1": 13000.5391, "encoder_q-layer.10": 21902.9531, "encoder_q-layer.11": 43438.2891, "encoder_q-layer.2": 14055.2705, "encoder_q-layer.3": 14259.0361, "encoder_q-layer.4": 14822.0615, "encoder_q-layer.5": 14181.1963, "encoder_q-layer.6": 15219.0312, "encoder_q-layer.7": 16187.5713, "encoder_q-layer.8": 17937.1602, "encoder_q-layer.9": 17230.1621, "epoch": 0.66, "inbatch_neg_score": 40.6586, "inbatch_pos_score": 41.1875, "learning_rate": 1.8166666666666667e-05, "loss": 2.2181, "norm_diff": 0.0447, "num_tokens_overlap": 5.5787, "num_tokens_union": 54.9338, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27854.2864, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4719, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6952, "sent_len_1": 66.7492, "sent_len_max_0": 18.86, "sent_len_max_1": 191.1062, "stdk": 0.0458, "stdq": 0.039, "stdqueue_k": 0.0, "step": 67300 }, { "accuracy": 57.0312, "doc_norm": 6.5128, "encoder_q-embeddings": 18582.9238, "encoder_q-layer.0": 12790.5996, "encoder_q-layer.1": 13260.835, "encoder_q-layer.10": 23422.7188, "encoder_q-layer.11": 47082.2031, "encoder_q-layer.2": 14274.0449, "encoder_q-layer.3": 14011.1016, "encoder_q-layer.4": 14309.1904, "encoder_q-layer.5": 14255.6777, "encoder_q-layer.6": 15066.9551, "encoder_q-layer.7": 16686.0469, "encoder_q-layer.8": 18645.2285, "encoder_q-layer.9": 17492.7188, "epoch": 0.66, "inbatch_neg_score": 40.6451, "inbatch_pos_score": 41.1562, "learning_rate": 1.8111111111111112e-05, "loss": 2.2202, "norm_diff": 0.0441, "num_tokens_overlap": 5.5758, "num_tokens_union": 55.0073, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29638.2397, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4688, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7162, "sent_len_1": 66.7722, "sent_len_max_0": 18.8738, "sent_len_max_1": 190.805, "stdk": 0.0445, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 67400 }, { "accuracy": 55.0781, "doc_norm": 6.5079, "encoder_q-embeddings": 17125.9355, "encoder_q-layer.0": 12199.1895, "encoder_q-layer.1": 12815.6523, "encoder_q-layer.10": 20193.5566, "encoder_q-layer.11": 41502.8633, "encoder_q-layer.2": 13976.9238, "encoder_q-layer.3": 13873.4434, "encoder_q-layer.4": 14193.1348, "encoder_q-layer.5": 14598.0, "encoder_q-layer.6": 15986.457, "encoder_q-layer.7": 16730.291, "encoder_q-layer.8": 19272.1133, "encoder_q-layer.9": 17421.0762, "epoch": 0.66, "inbatch_neg_score": 40.6321, "inbatch_pos_score": 41.125, "learning_rate": 1.8055555555555555e-05, "loss": 2.2398, "norm_diff": 0.0431, "num_tokens_overlap": 5.5796, "num_tokens_union": 55.1792, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27508.7627, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4648, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.716, "sent_len_1": 67.1319, "sent_len_max_0": 18.7825, "sent_len_max_1": 191.3775, "stdk": 0.0441, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 67500 }, { "accuracy": 57.9102, "doc_norm": 6.509, "encoder_q-embeddings": 17936.3652, "encoder_q-layer.0": 12213.7949, "encoder_q-layer.1": 12918.4824, "encoder_q-layer.10": 19411.0605, "encoder_q-layer.11": 42471.4688, "encoder_q-layer.2": 14274.7891, "encoder_q-layer.3": 13902.3037, "encoder_q-layer.4": 14561.3672, "encoder_q-layer.5": 14308.5312, "encoder_q-layer.6": 14716.1064, "encoder_q-layer.7": 15113.0527, "encoder_q-layer.8": 17172.3457, "encoder_q-layer.9": 15911.5, "epoch": 0.66, "inbatch_neg_score": 40.6053, "inbatch_pos_score": 41.125, "learning_rate": 1.8e-05, "loss": 2.2135, "norm_diff": 0.045, "num_tokens_overlap": 5.5785, "num_tokens_union": 54.9725, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27156.3136, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.464, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7172, "sent_len_1": 66.7584, "sent_len_max_0": 18.8588, "sent_len_max_1": 189.065, "stdk": 0.0446, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 67600 }, { "accuracy": 56.0547, "doc_norm": 6.5068, "encoder_q-embeddings": 18368.3906, "encoder_q-layer.0": 12837.043, "encoder_q-layer.1": 13150.2998, "encoder_q-layer.10": 19868.3145, "encoder_q-layer.11": 38133.8945, "encoder_q-layer.2": 14235.4277, "encoder_q-layer.3": 14080.2412, "encoder_q-layer.4": 14372.1562, "encoder_q-layer.5": 14533.4414, "encoder_q-layer.6": 15653.3516, "encoder_q-layer.7": 15997.0273, "encoder_q-layer.8": 17717.8477, "encoder_q-layer.9": 16758.1504, "epoch": 0.66, "inbatch_neg_score": 40.5829, "inbatch_pos_score": 41.0938, "learning_rate": 1.7944444444444443e-05, "loss": 2.2119, "norm_diff": 0.0434, "num_tokens_overlap": 5.5679, "num_tokens_union": 54.9293, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27299.597, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4634, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7281, "sent_len_1": 66.6204, "sent_len_max_0": 18.895, "sent_len_max_1": 187.4462, "stdk": 0.0447, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 67700 }, { "accuracy": 58.2031, "doc_norm": 6.5045, "encoder_q-embeddings": 16967.9375, "encoder_q-layer.0": 12023.5146, "encoder_q-layer.1": 12687.5244, "encoder_q-layer.10": 20456.4219, "encoder_q-layer.11": 38521.25, "encoder_q-layer.2": 13569.3037, "encoder_q-layer.3": 13745.3291, "encoder_q-layer.4": 14055.3799, "encoder_q-layer.5": 13656.5332, "encoder_q-layer.6": 14627.9648, "encoder_q-layer.7": 15313.3369, "encoder_q-layer.8": 18695.8301, "encoder_q-layer.9": 16595.9863, "epoch": 0.66, "inbatch_neg_score": 40.5956, "inbatch_pos_score": 41.0938, "learning_rate": 1.788888888888889e-05, "loss": 2.2014, "norm_diff": 0.041, "num_tokens_overlap": 5.5899, "num_tokens_union": 55.128, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26560.8592, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4634, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7228, "sent_len_1": 67.0147, "sent_len_max_0": 18.8663, "sent_len_max_1": 188.1525, "stdk": 0.0439, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 67800 }, { "accuracy": 57.4219, "doc_norm": 6.5012, "encoder_q-embeddings": 17510.8379, "encoder_q-layer.0": 12096.6416, "encoder_q-layer.1": 12822.6543, "encoder_q-layer.10": 18968.8984, "encoder_q-layer.11": 37544.3711, "encoder_q-layer.2": 13867.1836, "encoder_q-layer.3": 13768.8691, "encoder_q-layer.4": 14155.1504, "encoder_q-layer.5": 14014.2578, "encoder_q-layer.6": 15032.8525, "encoder_q-layer.7": 16925.9512, "encoder_q-layer.8": 18467.834, "encoder_q-layer.9": 16002.958, "epoch": 0.66, "inbatch_neg_score": 40.5477, "inbatch_pos_score": 41.0625, "learning_rate": 1.7833333333333334e-05, "loss": 2.2539, "norm_diff": 0.0419, "num_tokens_overlap": 5.5678, "num_tokens_union": 54.8831, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26720.6736, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4593, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7079, "sent_len_1": 66.6076, "sent_len_max_0": 18.7013, "sent_len_max_1": 190.73, "stdk": 0.0438, "stdq": 0.038, "stdqueue_k": 0.0, "step": 67900 }, { "accuracy": 57.4219, "doc_norm": 6.5017, "encoder_q-embeddings": 16896.9238, "encoder_q-layer.0": 12266.0293, "encoder_q-layer.1": 12958.8516, "encoder_q-layer.10": 21684.5723, "encoder_q-layer.11": 39351.6562, "encoder_q-layer.2": 14205.5049, "encoder_q-layer.3": 14203.6523, "encoder_q-layer.4": 15212.3838, "encoder_q-layer.5": 14286.7881, "encoder_q-layer.6": 14942.3086, "encoder_q-layer.7": 16149.8184, "encoder_q-layer.8": 18637.4688, "encoder_q-layer.9": 16288.0908, "epoch": 0.66, "inbatch_neg_score": 40.5267, "inbatch_pos_score": 41.0312, "learning_rate": 1.777777777777778e-05, "loss": 2.1887, "norm_diff": 0.0439, "num_tokens_overlap": 5.5889, "num_tokens_union": 55.0266, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26972.6289, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4578, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7395, "sent_len_1": 66.812, "sent_len_max_0": 18.94, "sent_len_max_1": 188.9013, "stdk": 0.0452, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 68000 }, { "accuracy": 58.4961, "doc_norm": 6.503, "encoder_q-embeddings": 17400.6562, "encoder_q-layer.0": 12158.583, "encoder_q-layer.1": 12624.1172, "encoder_q-layer.10": 21511.5391, "encoder_q-layer.11": 41144.8984, "encoder_q-layer.2": 14133.1182, "encoder_q-layer.3": 14304.6045, "encoder_q-layer.4": 14478.252, "encoder_q-layer.5": 13980.6826, "encoder_q-layer.6": 15260.5908, "encoder_q-layer.7": 16130.5156, "encoder_q-layer.8": 18105.9512, "encoder_q-layer.9": 16856.7012, "epoch": 0.66, "inbatch_neg_score": 40.5004, "inbatch_pos_score": 41.0312, "learning_rate": 1.7722222222222222e-05, "loss": 2.2303, "norm_diff": 0.046, "num_tokens_overlap": 5.5761, "num_tokens_union": 54.9685, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27725.2061, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.457, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7189, "sent_len_1": 66.7483, "sent_len_max_0": 18.8813, "sent_len_max_1": 190.5513, "stdk": 0.0454, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 68100 }, { "accuracy": 58.2031, "doc_norm": 6.4976, "encoder_q-embeddings": 17142.9004, "encoder_q-layer.0": 12038.5547, "encoder_q-layer.1": 12729.6045, "encoder_q-layer.10": 19232.4688, "encoder_q-layer.11": 38385.5938, "encoder_q-layer.2": 13865.4316, "encoder_q-layer.3": 13768.4268, "encoder_q-layer.4": 13733.498, "encoder_q-layer.5": 13457.0234, "encoder_q-layer.6": 14247.8896, "encoder_q-layer.7": 15486.1201, "encoder_q-layer.8": 16733.4922, "encoder_q-layer.9": 15834.8848, "epoch": 0.67, "inbatch_neg_score": 40.4862, "inbatch_pos_score": 41.0, "learning_rate": 1.7666666666666668e-05, "loss": 2.2657, "norm_diff": 0.0419, "num_tokens_overlap": 5.5751, "num_tokens_union": 54.9208, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26208.2608, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4558, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7141, "sent_len_1": 66.658, "sent_len_max_0": 18.795, "sent_len_max_1": 190.3537, "stdk": 0.0441, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 68200 }, { "accuracy": 58.3008, "doc_norm": 6.5014, "encoder_q-embeddings": 18239.5625, "encoder_q-layer.0": 11921.0898, "encoder_q-layer.1": 12405.0742, "encoder_q-layer.10": 20942.1602, "encoder_q-layer.11": 39430.2812, "encoder_q-layer.2": 13298.2969, "encoder_q-layer.3": 13281.2676, "encoder_q-layer.4": 13611.0557, "encoder_q-layer.5": 13780.4229, "encoder_q-layer.6": 15060.8535, "encoder_q-layer.7": 16521.7734, "encoder_q-layer.8": 19223.1426, "encoder_q-layer.9": 16733.5527, "epoch": 0.67, "inbatch_neg_score": 40.4909, "inbatch_pos_score": 41.0312, "learning_rate": 1.761111111111111e-05, "loss": 2.2175, "norm_diff": 0.0443, "num_tokens_overlap": 5.5799, "num_tokens_union": 55.047, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26974.2503, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.457, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7169, "sent_len_1": 66.8283, "sent_len_max_0": 18.7538, "sent_len_max_1": 188.6575, "stdk": 0.0451, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 68300 }, { "accuracy": 57.5195, "doc_norm": 6.498, "encoder_q-embeddings": 17427.7539, "encoder_q-layer.0": 12028.96, "encoder_q-layer.1": 12232.792, "encoder_q-layer.10": 19391.8691, "encoder_q-layer.11": 38229.1523, "encoder_q-layer.2": 13374.0088, "encoder_q-layer.3": 13011.9473, "encoder_q-layer.4": 13489.6875, "encoder_q-layer.5": 13365.54, "encoder_q-layer.6": 14541.25, "encoder_q-layer.7": 15961.7324, "encoder_q-layer.8": 17521.0039, "encoder_q-layer.9": 15925.6758, "epoch": 0.67, "inbatch_neg_score": 40.45, "inbatch_pos_score": 40.9688, "learning_rate": 1.7555555555555556e-05, "loss": 2.2037, "norm_diff": 0.0452, "num_tokens_overlap": 5.5788, "num_tokens_union": 55.1136, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26369.981, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4528, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7385, "sent_len_1": 66.9434, "sent_len_max_0": 18.8625, "sent_len_max_1": 188.2488, "stdk": 0.0457, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 68400 }, { "accuracy": 56.0547, "doc_norm": 6.4957, "encoder_q-embeddings": 16281.3008, "encoder_q-layer.0": 11947.502, "encoder_q-layer.1": 12737.7627, "encoder_q-layer.10": 18863.5879, "encoder_q-layer.11": 37823.207, "encoder_q-layer.2": 13613.5361, "encoder_q-layer.3": 13762.4219, "encoder_q-layer.4": 14816.0342, "encoder_q-layer.5": 14297.3359, "encoder_q-layer.6": 14722.2607, "encoder_q-layer.7": 16179.6904, "encoder_q-layer.8": 17627.6562, "encoder_q-layer.9": 16014.834, "epoch": 0.67, "inbatch_neg_score": 40.4411, "inbatch_pos_score": 40.9375, "learning_rate": 1.75e-05, "loss": 2.2118, "norm_diff": 0.0457, "num_tokens_overlap": 5.5715, "num_tokens_union": 55.0939, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26184.798, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.45, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7114, "sent_len_1": 66.9595, "sent_len_max_0": 18.8325, "sent_len_max_1": 190.5788, "stdk": 0.0453, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 68500 }, { "accuracy": 57.5195, "doc_norm": 6.5001, "encoder_q-embeddings": 16937.7324, "encoder_q-layer.0": 12271.043, "encoder_q-layer.1": 12741.0508, "encoder_q-layer.10": 19282.0449, "encoder_q-layer.11": 40558.2734, "encoder_q-layer.2": 14339.6318, "encoder_q-layer.3": 14118.7812, "encoder_q-layer.4": 14159.2949, "encoder_q-layer.5": 13805.1914, "encoder_q-layer.6": 15003.0557, "encoder_q-layer.7": 15613.6113, "encoder_q-layer.8": 17151.4902, "encoder_q-layer.9": 15933.8105, "epoch": 0.67, "inbatch_neg_score": 40.4122, "inbatch_pos_score": 40.9375, "learning_rate": 1.7444444444444448e-05, "loss": 2.2267, "norm_diff": 0.0503, "num_tokens_overlap": 5.5844, "num_tokens_union": 55.1244, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26796.7238, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4498, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7213, "sent_len_1": 66.8863, "sent_len_max_0": 18.7275, "sent_len_max_1": 188.7075, "stdk": 0.046, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 68600 }, { "accuracy": 56.9336, "doc_norm": 6.4939, "encoder_q-embeddings": 17868.2363, "encoder_q-layer.0": 12994.8457, "encoder_q-layer.1": 13151.541, "encoder_q-layer.10": 20686.623, "encoder_q-layer.11": 39690.9883, "encoder_q-layer.2": 14161.4746, "encoder_q-layer.3": 14368.0645, "encoder_q-layer.4": 15401.0723, "encoder_q-layer.5": 14586.8291, "encoder_q-layer.6": 15556.7031, "encoder_q-layer.7": 16456.7305, "encoder_q-layer.8": 17736.4102, "encoder_q-layer.9": 16447.7051, "epoch": 0.67, "inbatch_neg_score": 40.3977, "inbatch_pos_score": 40.9062, "learning_rate": 1.738888888888889e-05, "loss": 2.1955, "norm_diff": 0.0443, "num_tokens_overlap": 5.5793, "num_tokens_union": 54.9536, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27536.934, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4496, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7235, "sent_len_1": 66.6858, "sent_len_max_0": 18.815, "sent_len_max_1": 187.6175, "stdk": 0.0459, "stdq": 0.039, "stdqueue_k": 0.0, "step": 68700 }, { "accuracy": 57.9102, "doc_norm": 6.4994, "encoder_q-embeddings": 17994.6309, "encoder_q-layer.0": 12387.7754, "encoder_q-layer.1": 13146.3291, "encoder_q-layer.10": 23306.3711, "encoder_q-layer.11": 48755.6953, "encoder_q-layer.2": 14293.8906, "encoder_q-layer.3": 14082.8359, "encoder_q-layer.4": 14741.5918, "encoder_q-layer.5": 14555.8887, "encoder_q-layer.6": 15776.1338, "encoder_q-layer.7": 16573.6973, "encoder_q-layer.8": 18967.9824, "encoder_q-layer.9": 17939.293, "epoch": 0.67, "inbatch_neg_score": 40.393, "inbatch_pos_score": 40.9375, "learning_rate": 1.7333333333333336e-05, "loss": 2.1999, "norm_diff": 0.0484, "num_tokens_overlap": 5.5721, "num_tokens_union": 54.9644, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29337.0741, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4509, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7241, "sent_len_1": 66.7103, "sent_len_max_0": 18.8987, "sent_len_max_1": 190.1925, "stdk": 0.0461, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 68800 }, { "accuracy": 57.2266, "doc_norm": 6.4921, "encoder_q-embeddings": 17403.2051, "encoder_q-layer.0": 12208.3418, "encoder_q-layer.1": 12912.498, "encoder_q-layer.10": 20182.8223, "encoder_q-layer.11": 40530.0195, "encoder_q-layer.2": 14243.4336, "encoder_q-layer.3": 13995.3428, "encoder_q-layer.4": 14404.2324, "encoder_q-layer.5": 14487.9824, "encoder_q-layer.6": 15408.1133, "encoder_q-layer.7": 15713.1309, "encoder_q-layer.8": 17384.4199, "encoder_q-layer.9": 16206.7676, "epoch": 0.67, "inbatch_neg_score": 40.3889, "inbatch_pos_score": 40.9062, "learning_rate": 1.7277777777777778e-05, "loss": 2.2184, "norm_diff": 0.0438, "num_tokens_overlap": 5.5708, "num_tokens_union": 55.0227, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27261.1463, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4484, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6989, "sent_len_1": 66.8166, "sent_len_max_0": 18.765, "sent_len_max_1": 189.6887, "stdk": 0.0451, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 68900 }, { "accuracy": 58.1055, "doc_norm": 6.4909, "encoder_q-embeddings": 17934.3984, "encoder_q-layer.0": 12225.7754, "encoder_q-layer.1": 12655.1426, "encoder_q-layer.10": 20423.8613, "encoder_q-layer.11": 38421.1484, "encoder_q-layer.2": 14120.9629, "encoder_q-layer.3": 13703.959, "encoder_q-layer.4": 14044.542, "encoder_q-layer.5": 13820.0225, "encoder_q-layer.6": 15237.3311, "encoder_q-layer.7": 15658.29, "encoder_q-layer.8": 17571.5664, "encoder_q-layer.9": 16193.8008, "epoch": 0.67, "inbatch_neg_score": 40.3523, "inbatch_pos_score": 40.875, "learning_rate": 1.7222222222222224e-05, "loss": 2.214, "norm_diff": 0.0458, "num_tokens_overlap": 5.5802, "num_tokens_union": 55.0381, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26905.6911, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4451, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7359, "sent_len_1": 66.8281, "sent_len_max_0": 18.7525, "sent_len_max_1": 191.4975, "stdk": 0.0442, "stdq": 0.038, "stdqueue_k": 0.0, "step": 69000 }, { "accuracy": 57.5195, "doc_norm": 6.4913, "encoder_q-embeddings": 17561.291, "encoder_q-layer.0": 12570.3379, "encoder_q-layer.1": 13474.3193, "encoder_q-layer.10": 23571.5449, "encoder_q-layer.11": 44539.1133, "encoder_q-layer.2": 14197.3838, "encoder_q-layer.3": 14072.2559, "encoder_q-layer.4": 14409.874, "encoder_q-layer.5": 14180.707, "encoder_q-layer.6": 14990.9707, "encoder_q-layer.7": 15991.1641, "encoder_q-layer.8": 18559.0254, "encoder_q-layer.9": 17795.709, "epoch": 0.67, "inbatch_neg_score": 40.3308, "inbatch_pos_score": 40.8438, "learning_rate": 1.7166666666666666e-05, "loss": 2.2053, "norm_diff": 0.0474, "num_tokens_overlap": 5.5811, "num_tokens_union": 55.1105, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29007.9397, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4439, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7315, "sent_len_1": 66.9427, "sent_len_max_0": 18.8612, "sent_len_max_1": 191.455, "stdk": 0.0456, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 69100 }, { "accuracy": 58.5938, "doc_norm": 6.491, "encoder_q-embeddings": 17234.1992, "encoder_q-layer.0": 12033.4395, "encoder_q-layer.1": 12186.0723, "encoder_q-layer.10": 20244.2812, "encoder_q-layer.11": 37756.4961, "encoder_q-layer.2": 13529.3223, "encoder_q-layer.3": 13605.7822, "encoder_q-layer.4": 14185.751, "encoder_q-layer.5": 14537.3799, "encoder_q-layer.6": 15348.0029, "encoder_q-layer.7": 16341.1055, "encoder_q-layer.8": 18068.2266, "encoder_q-layer.9": 16692.7305, "epoch": 0.68, "inbatch_neg_score": 40.3392, "inbatch_pos_score": 40.875, "learning_rate": 1.7111111111111112e-05, "loss": 2.1993, "norm_diff": 0.0462, "num_tokens_overlap": 5.5876, "num_tokens_union": 55.0124, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26831.8179, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4448, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7242, "sent_len_1": 66.7646, "sent_len_max_0": 18.8425, "sent_len_max_1": 188.0888, "stdk": 0.0448, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 69200 }, { "accuracy": 58.4961, "doc_norm": 6.492, "encoder_q-embeddings": 17632.9844, "encoder_q-layer.0": 12402.7236, "encoder_q-layer.1": 12739.1357, "encoder_q-layer.10": 20738.9414, "encoder_q-layer.11": 40677.5, "encoder_q-layer.2": 13867.1846, "encoder_q-layer.3": 14101.21, "encoder_q-layer.4": 14398.9424, "encoder_q-layer.5": 13931.0, "encoder_q-layer.6": 14830.1143, "encoder_q-layer.7": 16736.2188, "encoder_q-layer.8": 18297.0176, "encoder_q-layer.9": 17053.6699, "epoch": 0.68, "inbatch_neg_score": 40.3267, "inbatch_pos_score": 40.875, "learning_rate": 1.7055555555555554e-05, "loss": 2.2606, "norm_diff": 0.047, "num_tokens_overlap": 5.5783, "num_tokens_union": 55.0122, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27307.9572, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.445, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7149, "sent_len_1": 66.8192, "sent_len_max_0": 18.85, "sent_len_max_1": 189.6587, "stdk": 0.045, "stdq": 0.0399, "stdqueue_k": 0.0, "step": 69300 }, { "accuracy": 56.7383, "doc_norm": 6.4857, "encoder_q-embeddings": 18827.4727, "encoder_q-layer.0": 12979.0156, "encoder_q-layer.1": 13963.3262, "encoder_q-layer.10": 19174.0527, "encoder_q-layer.11": 37474.9102, "encoder_q-layer.2": 14883.6475, "encoder_q-layer.3": 14501.4785, "encoder_q-layer.4": 15047.2441, "encoder_q-layer.5": 14745.3652, "encoder_q-layer.6": 15688.5811, "encoder_q-layer.7": 17467.9434, "encoder_q-layer.8": 18299.1816, "encoder_q-layer.9": 16766.2031, "epoch": 0.68, "inbatch_neg_score": 40.328, "inbatch_pos_score": 40.8438, "learning_rate": 1.7000000000000003e-05, "loss": 2.2192, "norm_diff": 0.0427, "num_tokens_overlap": 5.5881, "num_tokens_union": 55.0228, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27264.3575, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4429, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7139, "sent_len_1": 66.8224, "sent_len_max_0": 18.78, "sent_len_max_1": 187.7475, "stdk": 0.0442, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 69400 }, { "accuracy": 54.7852, "doc_norm": 6.4864, "encoder_q-embeddings": 18813.959, "encoder_q-layer.0": 13343.6973, "encoder_q-layer.1": 13719.5469, "encoder_q-layer.10": 21327.8926, "encoder_q-layer.11": 40833.3242, "encoder_q-layer.2": 14696.0234, "encoder_q-layer.3": 14735.2334, "encoder_q-layer.4": 15038.9307, "encoder_q-layer.5": 14822.9248, "encoder_q-layer.6": 15672.0107, "encoder_q-layer.7": 16603.1113, "encoder_q-layer.8": 19995.0977, "encoder_q-layer.9": 17551.3848, "epoch": 0.68, "inbatch_neg_score": 40.3257, "inbatch_pos_score": 40.8438, "learning_rate": 1.6944444444444446e-05, "loss": 2.2019, "norm_diff": 0.0437, "num_tokens_overlap": 5.5822, "num_tokens_union": 55.0651, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28426.1197, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4426, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7134, "sent_len_1": 66.8631, "sent_len_max_0": 18.8438, "sent_len_max_1": 189.235, "stdk": 0.0453, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 69500 }, { "accuracy": 58.1055, "doc_norm": 6.4853, "encoder_q-embeddings": 17802.5879, "encoder_q-layer.0": 12335.0342, "encoder_q-layer.1": 12979.8486, "encoder_q-layer.10": 18896.2402, "encoder_q-layer.11": 36186.6328, "encoder_q-layer.2": 13778.6475, "encoder_q-layer.3": 13827.5039, "encoder_q-layer.4": 14238.9434, "encoder_q-layer.5": 13758.8379, "encoder_q-layer.6": 14612.0117, "encoder_q-layer.7": 16137.2715, "encoder_q-layer.8": 17178.4023, "encoder_q-layer.9": 16027.0156, "epoch": 0.68, "inbatch_neg_score": 40.2731, "inbatch_pos_score": 40.8125, "learning_rate": 1.688888888888889e-05, "loss": 2.1969, "norm_diff": 0.0455, "num_tokens_overlap": 5.5674, "num_tokens_union": 54.9258, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26200.4031, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4398, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7187, "sent_len_1": 66.6161, "sent_len_max_0": 18.7213, "sent_len_max_1": 191.8512, "stdk": 0.0459, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 69600 }, { "accuracy": 60.5469, "doc_norm": 6.4838, "encoder_q-embeddings": 17408.6543, "encoder_q-layer.0": 12198.7979, "encoder_q-layer.1": 12844.3027, "encoder_q-layer.10": 20844.0215, "encoder_q-layer.11": 44445.5859, "encoder_q-layer.2": 14529.1865, "encoder_q-layer.3": 14095.124, "encoder_q-layer.4": 14856.7402, "encoder_q-layer.5": 13752.0039, "encoder_q-layer.6": 15327.584, "encoder_q-layer.7": 15693.9756, "encoder_q-layer.8": 17648.6738, "encoder_q-layer.9": 16009.0947, "epoch": 0.68, "inbatch_neg_score": 40.288, "inbatch_pos_score": 40.8125, "learning_rate": 1.6833333333333334e-05, "loss": 2.2128, "norm_diff": 0.0447, "num_tokens_overlap": 5.5604, "num_tokens_union": 54.9918, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27762.9079, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4391, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7017, "sent_len_1": 66.7495, "sent_len_max_0": 18.7838, "sent_len_max_1": 188.6213, "stdk": 0.046, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 69700 }, { "accuracy": 57.2266, "doc_norm": 6.4865, "encoder_q-embeddings": 18116.6621, "encoder_q-layer.0": 12476.8457, "encoder_q-layer.1": 12682.4609, "encoder_q-layer.10": 21387.5938, "encoder_q-layer.11": 38850.1797, "encoder_q-layer.2": 13943.2959, "encoder_q-layer.3": 13858.6484, "encoder_q-layer.4": 14466.79, "encoder_q-layer.5": 14112.6709, "encoder_q-layer.6": 14928.9316, "encoder_q-layer.7": 15337.4678, "encoder_q-layer.8": 18950.248, "encoder_q-layer.9": 17004.8711, "epoch": 0.68, "inbatch_neg_score": 40.3014, "inbatch_pos_score": 40.8125, "learning_rate": 1.677777777777778e-05, "loss": 2.169, "norm_diff": 0.0445, "num_tokens_overlap": 5.5828, "num_tokens_union": 55.0132, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27336.1325, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.442, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7158, "sent_len_1": 66.83, "sent_len_max_0": 18.8287, "sent_len_max_1": 188.8787, "stdk": 0.046, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 69800 }, { "accuracy": 60.9375, "doc_norm": 6.486, "encoder_q-embeddings": 16989.709, "encoder_q-layer.0": 11844.7256, "encoder_q-layer.1": 12484.7734, "encoder_q-layer.10": 18450.8477, "encoder_q-layer.11": 36927.0547, "encoder_q-layer.2": 13669.0479, "encoder_q-layer.3": 13531.7432, "encoder_q-layer.4": 13886.7861, "encoder_q-layer.5": 13598.9697, "encoder_q-layer.6": 14240.1162, "encoder_q-layer.7": 16081.7676, "encoder_q-layer.8": 16744.6777, "encoder_q-layer.9": 15652.0615, "epoch": 0.68, "inbatch_neg_score": 40.2855, "inbatch_pos_score": 40.8125, "learning_rate": 1.6722222222222222e-05, "loss": 2.2315, "norm_diff": 0.0444, "num_tokens_overlap": 5.5736, "num_tokens_union": 54.9636, "postclip_grad_norm": 1.0, "preclip_grad_norm": 25727.7594, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4415, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7142, "sent_len_1": 66.712, "sent_len_max_0": 18.745, "sent_len_max_1": 190.5238, "stdk": 0.0447, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 69900 }, { "accuracy": 56.543, "doc_norm": 6.4807, "encoder_q-embeddings": 17434.5176, "encoder_q-layer.0": 12425.9746, "encoder_q-layer.1": 12787.2695, "encoder_q-layer.10": 21712.3516, "encoder_q-layer.11": 42166.7852, "encoder_q-layer.2": 14145.1318, "encoder_q-layer.3": 14456.2637, "encoder_q-layer.4": 14813.6426, "encoder_q-layer.5": 14255.9268, "encoder_q-layer.6": 15410.2041, "encoder_q-layer.7": 16009.6992, "encoder_q-layer.8": 18132.4062, "encoder_q-layer.9": 16314.4043, "epoch": 0.68, "inbatch_neg_score": 40.2548, "inbatch_pos_score": 40.7812, "learning_rate": 1.6666666666666667e-05, "loss": 2.2204, "norm_diff": 0.0433, "num_tokens_overlap": 5.5625, "num_tokens_union": 54.9734, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28089.2216, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4374, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7187, "sent_len_1": 66.6895, "sent_len_max_0": 18.7975, "sent_len_max_1": 189.5175, "stdk": 0.0446, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 70000 }, { "dev_runtime": 26.9888, "dev_samples_per_second": 2.371, "dev_steps_per_second": 0.037, "epoch": 0.68, "step": 70000, "test_accuracy": 8.7158203125, "test_doc_norm": 6.473352909088135, "test_inbatch_neg_score": 40.80248260498047, "test_inbatch_pos_score": 41.582881927490234, "test_loss": 4.0471720695495605, "test_norm_diff": 0.0018200278282165527, "test_query_norm": 6.4731364250183105, "test_queue_k_norm": 0.0, "test_stdk": 0.037328317761421204, "test_stdq": 0.03727255016565323, "test_stdqueue_k": 0.0 }, { "dev_runtime": 26.9888, "dev_samples_per_second": 2.371, "dev_steps_per_second": 0.037, "epoch": 0.68, "eval_beir-arguana_ndcg@10": 0.3755, "eval_beir-arguana_recall@10": 0.66856, "eval_beir-arguana_recall@100": 0.97013, "eval_beir-arguana_recall@20": 0.83001, "eval_beir-avg_ndcg@10": 0.382946, "eval_beir-avg_recall@10": 0.45665100000000003, "eval_beir-avg_recall@100": 0.6426909166666667, "eval_beir-avg_recall@20": 0.5226464166666667, "eval_beir-cqadupstack_ndcg@10": 0.29533, "eval_beir-cqadupstack_recall@10": 0.39774, "eval_beir-cqadupstack_recall@100": 0.6294991666666667, "eval_beir-cqadupstack_recall@20": 0.4623441666666666, "eval_beir-fiqa_ndcg@10": 0.26661, "eval_beir-fiqa_recall@10": 0.33839, "eval_beir-fiqa_recall@100": 0.59851, "eval_beir-fiqa_recall@20": 0.41287, "eval_beir-nfcorpus_ndcg@10": 0.33622, "eval_beir-nfcorpus_recall@10": 0.16615, "eval_beir-nfcorpus_recall@100": 0.3152, "eval_beir-nfcorpus_recall@20": 0.2051, "eval_beir-nq_ndcg@10": 0.26947, "eval_beir-nq_recall@10": 0.44988, "eval_beir-nq_recall@100": 0.78351, "eval_beir-nq_recall@20": 0.57551, "eval_beir-quora_ndcg@10": 0.78559, "eval_beir-quora_recall@10": 0.88888, "eval_beir-quora_recall@100": 0.97719, "eval_beir-quora_recall@20": 0.92937, "eval_beir-scidocs_ndcg@10": 0.15963, "eval_beir-scidocs_recall@10": 0.17003, "eval_beir-scidocs_recall@100": 0.38525, "eval_beir-scidocs_recall@20": 0.22543, "eval_beir-scifact_ndcg@10": 0.61636, "eval_beir-scifact_recall@10": 0.76817, "eval_beir-scifact_recall@100": 0.91767, "eval_beir-scifact_recall@20": 0.83856, "eval_beir-trec-covid_ndcg@10": 0.55037, "eval_beir-trec-covid_recall@10": 0.588, "eval_beir-trec-covid_recall@100": 0.4196, "eval_beir-trec-covid_recall@20": 0.55, "eval_beir-webis-touche2020_ndcg@10": 0.17438, "eval_beir-webis-touche2020_recall@10": 0.13071, "eval_beir-webis-touche2020_recall@100": 0.43035, "eval_beir-webis-touche2020_recall@20": 0.19727, "eval_senteval-avg_sts": 0.741355314564999, "eval_senteval-sickr_spearman": 0.7311469574429758, "eval_senteval-stsb_spearman": 0.7515636716870223, "step": 70000, "test_accuracy": 8.7158203125, "test_doc_norm": 6.473352909088135, "test_inbatch_neg_score": 40.80248260498047, "test_inbatch_pos_score": 41.582881927490234, "test_loss": 4.0471720695495605, "test_norm_diff": 0.0018200278282165527, "test_query_norm": 6.4731364250183105, "test_queue_k_norm": 0.0, "test_stdk": 0.037328317761421204, "test_stdq": 0.03727255016565323, "test_stdqueue_k": 0.0 }, { "accuracy": 60.0586, "doc_norm": 6.4814, "encoder_q-embeddings": 16422.2305, "encoder_q-layer.0": 11619.9814, "encoder_q-layer.1": 12104.8467, "encoder_q-layer.10": 22470.4277, "encoder_q-layer.11": 42237.5117, "encoder_q-layer.2": 12990.3291, "encoder_q-layer.3": 13448.9346, "encoder_q-layer.4": 13668.2227, "encoder_q-layer.5": 13501.5859, "encoder_q-layer.6": 14430.8281, "encoder_q-layer.7": 15441.6562, "encoder_q-layer.8": 16995.2754, "encoder_q-layer.9": 16505.7188, "epoch": 0.68, "inbatch_neg_score": 40.2397, "inbatch_pos_score": 40.7812, "learning_rate": 1.661111111111111e-05, "loss": 2.1553, "norm_diff": 0.0454, "num_tokens_overlap": 5.5796, "num_tokens_union": 55.1284, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26817.746, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.436, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7102, "sent_len_1": 66.9575, "sent_len_max_0": 18.735, "sent_len_max_1": 190.51, "stdk": 0.0457, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 70100 }, { "accuracy": 56.543, "doc_norm": 6.4801, "encoder_q-embeddings": 18167.1582, "encoder_q-layer.0": 12750.3828, "encoder_q-layer.1": 13030.3115, "encoder_q-layer.10": 21753.957, "encoder_q-layer.11": 39239.2344, "encoder_q-layer.2": 14264.9873, "encoder_q-layer.3": 14399.3613, "encoder_q-layer.4": 14534.0156, "encoder_q-layer.5": 14585.9609, "encoder_q-layer.6": 14903.6123, "encoder_q-layer.7": 15986.0479, "encoder_q-layer.8": 19260.3711, "encoder_q-layer.9": 17297.1133, "epoch": 0.69, "inbatch_neg_score": 40.2218, "inbatch_pos_score": 40.75, "learning_rate": 1.655555555555556e-05, "loss": 2.1975, "norm_diff": 0.0465, "num_tokens_overlap": 5.5732, "num_tokens_union": 55.0394, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27338.4295, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4336, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6969, "sent_len_1": 66.8484, "sent_len_max_0": 18.805, "sent_len_max_1": 190.1738, "stdk": 0.0451, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 70200 }, { "accuracy": 55.957, "doc_norm": 6.4808, "encoder_q-embeddings": 18417.7676, "encoder_q-layer.0": 13160.5938, "encoder_q-layer.1": 13785.0664, "encoder_q-layer.10": 21158.7012, "encoder_q-layer.11": 40519.75, "encoder_q-layer.2": 14351.9209, "encoder_q-layer.3": 14942.0938, "encoder_q-layer.4": 15204.4268, "encoder_q-layer.5": 14819.9883, "encoder_q-layer.6": 15937.8506, "encoder_q-layer.7": 17181.498, "encoder_q-layer.8": 19719.9727, "encoder_q-layer.9": 17659.8418, "epoch": 0.69, "inbatch_neg_score": 40.2316, "inbatch_pos_score": 40.75, "learning_rate": 1.65e-05, "loss": 2.2267, "norm_diff": 0.0446, "num_tokens_overlap": 5.5878, "num_tokens_union": 55.1078, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28591.8637, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4362, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7449, "sent_len_1": 66.9376, "sent_len_max_0": 18.86, "sent_len_max_1": 189.5188, "stdk": 0.0456, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 70300 }, { "accuracy": 56.3477, "doc_norm": 6.4838, "encoder_q-embeddings": 17628.6777, "encoder_q-layer.0": 12297.6191, "encoder_q-layer.1": 13046.8057, "encoder_q-layer.10": 21782.3652, "encoder_q-layer.11": 40925.3359, "encoder_q-layer.2": 14127.3506, "encoder_q-layer.3": 14055.0635, "encoder_q-layer.4": 14744.9863, "encoder_q-layer.5": 14549.2539, "encoder_q-layer.6": 15828.5254, "encoder_q-layer.7": 16937.5449, "encoder_q-layer.8": 18751.1621, "encoder_q-layer.9": 16981.1758, "epoch": 0.69, "inbatch_neg_score": 40.2372, "inbatch_pos_score": 40.75, "learning_rate": 1.6444444444444447e-05, "loss": 2.2214, "norm_diff": 0.047, "num_tokens_overlap": 5.5894, "num_tokens_union": 55.0574, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27645.1767, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4368, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7387, "sent_len_1": 66.8692, "sent_len_max_0": 18.9438, "sent_len_max_1": 188.1875, "stdk": 0.0467, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 70400 }, { "accuracy": 58.3984, "doc_norm": 6.4821, "encoder_q-embeddings": 17444.748, "encoder_q-layer.0": 12244.9883, "encoder_q-layer.1": 12655.0752, "encoder_q-layer.10": 20507.4414, "encoder_q-layer.11": 40846.457, "encoder_q-layer.2": 13690.6279, "encoder_q-layer.3": 13542.0879, "encoder_q-layer.4": 13968.0225, "encoder_q-layer.5": 13932.4297, "encoder_q-layer.6": 14608.8711, "encoder_q-layer.7": 15448.584, "encoder_q-layer.8": 18267.6621, "encoder_q-layer.9": 16454.4258, "epoch": 0.69, "inbatch_neg_score": 40.218, "inbatch_pos_score": 40.75, "learning_rate": 1.638888888888889e-05, "loss": 2.2311, "norm_diff": 0.0467, "num_tokens_overlap": 5.5692, "num_tokens_union": 55.0295, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26856.2648, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4355, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7262, "sent_len_1": 66.7162, "sent_len_max_0": 18.7975, "sent_len_max_1": 187.9688, "stdk": 0.0448, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 70500 }, { "accuracy": 58.2031, "doc_norm": 6.4827, "encoder_q-embeddings": 18438.5039, "encoder_q-layer.0": 13121.6777, "encoder_q-layer.1": 13296.4004, "encoder_q-layer.10": 19680.4355, "encoder_q-layer.11": 39382.7422, "encoder_q-layer.2": 14160.1035, "encoder_q-layer.3": 14041.8545, "encoder_q-layer.4": 14507.6074, "encoder_q-layer.5": 14481.3271, "encoder_q-layer.6": 15244.5264, "encoder_q-layer.7": 15986.9365, "encoder_q-layer.8": 17370.5039, "encoder_q-layer.9": 15886.0537, "epoch": 0.69, "inbatch_neg_score": 40.22, "inbatch_pos_score": 40.75, "learning_rate": 1.6333333333333335e-05, "loss": 2.2274, "norm_diff": 0.0473, "num_tokens_overlap": 5.5771, "num_tokens_union": 54.9236, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26982.8476, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4353, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7093, "sent_len_1": 66.6607, "sent_len_max_0": 18.9662, "sent_len_max_1": 191.0875, "stdk": 0.0455, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 70600 }, { "accuracy": 55.8594, "doc_norm": 6.4795, "encoder_q-embeddings": 17190.4727, "encoder_q-layer.0": 12383.5117, "encoder_q-layer.1": 12709.9814, "encoder_q-layer.10": 22801.7676, "encoder_q-layer.11": 44690.8828, "encoder_q-layer.2": 13887.7314, "encoder_q-layer.3": 13843.2412, "encoder_q-layer.4": 14484.0127, "encoder_q-layer.5": 14269.8877, "encoder_q-layer.6": 15428.5889, "encoder_q-layer.7": 16970.0898, "encoder_q-layer.8": 19237.1523, "encoder_q-layer.9": 17758.6504, "epoch": 0.69, "inbatch_neg_score": 40.1737, "inbatch_pos_score": 40.6875, "learning_rate": 1.6277777777777777e-05, "loss": 2.2102, "norm_diff": 0.0479, "num_tokens_overlap": 5.5816, "num_tokens_union": 55.1978, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28218.4754, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4317, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7431, "sent_len_1": 67.0779, "sent_len_max_0": 18.8788, "sent_len_max_1": 190.8325, "stdk": 0.0453, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 70700 }, { "accuracy": 58.4961, "doc_norm": 6.4744, "encoder_q-embeddings": 17050.7227, "encoder_q-layer.0": 12151.4004, "encoder_q-layer.1": 12775.8311, "encoder_q-layer.10": 19335.0039, "encoder_q-layer.11": 38103.5742, "encoder_q-layer.2": 14012.5059, "encoder_q-layer.3": 14101.8613, "encoder_q-layer.4": 14839.7568, "encoder_q-layer.5": 14562.2959, "encoder_q-layer.6": 15165.0264, "encoder_q-layer.7": 15612.6709, "encoder_q-layer.8": 17459.8828, "encoder_q-layer.9": 15993.0771, "epoch": 0.69, "inbatch_neg_score": 40.1397, "inbatch_pos_score": 40.6562, "learning_rate": 1.6222222222222223e-05, "loss": 2.201, "norm_diff": 0.045, "num_tokens_overlap": 5.586, "num_tokens_union": 55.0011, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26446.2638, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4294, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7108, "sent_len_1": 66.8277, "sent_len_max_0": 18.9037, "sent_len_max_1": 188.7113, "stdk": 0.0448, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 70800 }, { "accuracy": 56.7383, "doc_norm": 6.4805, "encoder_q-embeddings": 18753.1719, "encoder_q-layer.0": 13192.5732, "encoder_q-layer.1": 13598.208, "encoder_q-layer.10": 20420.5332, "encoder_q-layer.11": 39619.7617, "encoder_q-layer.2": 14665.9248, "encoder_q-layer.3": 14515.7803, "encoder_q-layer.4": 14482.5361, "encoder_q-layer.5": 14859.7666, "encoder_q-layer.6": 15472.6406, "encoder_q-layer.7": 16114.5869, "encoder_q-layer.8": 17244.1855, "encoder_q-layer.9": 16465.8047, "epoch": 0.69, "inbatch_neg_score": 40.1957, "inbatch_pos_score": 40.7188, "learning_rate": 1.6166666666666665e-05, "loss": 2.1905, "norm_diff": 0.05, "num_tokens_overlap": 5.5926, "num_tokens_union": 55.1383, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27756.7116, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4305, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7495, "sent_len_1": 66.9929, "sent_len_max_0": 18.845, "sent_len_max_1": 189.775, "stdk": 0.0467, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 70900 }, { "accuracy": 55.7617, "doc_norm": 6.4752, "encoder_q-embeddings": 17562.2617, "encoder_q-layer.0": 12673.5391, "encoder_q-layer.1": 12675.7432, "encoder_q-layer.10": 19851.7207, "encoder_q-layer.11": 41285.9062, "encoder_q-layer.2": 13633.6426, "encoder_q-layer.3": 13770.8604, "encoder_q-layer.4": 14359.4307, "encoder_q-layer.5": 14372.0303, "encoder_q-layer.6": 16118.9307, "encoder_q-layer.7": 17733.6055, "encoder_q-layer.8": 17803.1699, "encoder_q-layer.9": 16649.4258, "epoch": 0.69, "inbatch_neg_score": 40.1684, "inbatch_pos_score": 40.6875, "learning_rate": 1.6111111111111115e-05, "loss": 2.2059, "norm_diff": 0.0457, "num_tokens_overlap": 5.571, "num_tokens_union": 55.0843, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27634.8542, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4295, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6878, "sent_len_1": 66.9562, "sent_len_max_0": 18.715, "sent_len_max_1": 190.055, "stdk": 0.0452, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 71000 }, { "accuracy": 57.7148, "doc_norm": 6.475, "encoder_q-embeddings": 17631.7363, "encoder_q-layer.0": 12378.8877, "encoder_q-layer.1": 12717.0625, "encoder_q-layer.10": 20461.707, "encoder_q-layer.11": 37816.418, "encoder_q-layer.2": 13881.9102, "encoder_q-layer.3": 14240.8555, "encoder_q-layer.4": 14737.4043, "encoder_q-layer.5": 14629.6348, "encoder_q-layer.6": 15407.3301, "encoder_q-layer.7": 15740.9453, "encoder_q-layer.8": 18272.8262, "encoder_q-layer.9": 16521.7734, "epoch": 0.69, "inbatch_neg_score": 40.1457, "inbatch_pos_score": 40.6562, "learning_rate": 1.6055555555555557e-05, "loss": 2.2045, "norm_diff": 0.0461, "num_tokens_overlap": 5.5726, "num_tokens_union": 54.9131, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27063.4694, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.429, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7108, "sent_len_1": 66.6662, "sent_len_max_0": 18.9088, "sent_len_max_1": 191.9863, "stdk": 0.0449, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 71100 }, { "accuracy": 56.543, "doc_norm": 6.4729, "encoder_q-embeddings": 19236.9121, "encoder_q-layer.0": 13674.3379, "encoder_q-layer.1": 13731.6299, "encoder_q-layer.10": 23008.3887, "encoder_q-layer.11": 40100.582, "encoder_q-layer.2": 14590.8379, "encoder_q-layer.3": 14551.0166, "encoder_q-layer.4": 15513.6221, "encoder_q-layer.5": 14267.4668, "encoder_q-layer.6": 15504.7861, "encoder_q-layer.7": 15984.0586, "encoder_q-layer.8": 17264.1543, "encoder_q-layer.9": 16732.168, "epoch": 0.7, "inbatch_neg_score": 40.1298, "inbatch_pos_score": 40.6562, "learning_rate": 1.6000000000000003e-05, "loss": 2.2326, "norm_diff": 0.0457, "num_tokens_overlap": 5.5759, "num_tokens_union": 54.9315, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28251.8414, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4272, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.71, "sent_len_1": 66.6997, "sent_len_max_0": 18.9088, "sent_len_max_1": 189.3113, "stdk": 0.0452, "stdq": 0.038, "stdqueue_k": 0.0, "step": 71200 }, { "accuracy": 60.0586, "doc_norm": 6.4726, "encoder_q-embeddings": 17027.6797, "encoder_q-layer.0": 12340.0605, "encoder_q-layer.1": 12575.0283, "encoder_q-layer.10": 24582.4219, "encoder_q-layer.11": 42090.2344, "encoder_q-layer.2": 13585.2148, "encoder_q-layer.3": 13647.6914, "encoder_q-layer.4": 14199.4902, "encoder_q-layer.5": 13983.791, "encoder_q-layer.6": 15368.2607, "encoder_q-layer.7": 16452.1758, "encoder_q-layer.8": 17846.1387, "encoder_q-layer.9": 18038.9766, "epoch": 0.7, "inbatch_neg_score": 40.1064, "inbatch_pos_score": 40.625, "learning_rate": 1.5944444444444445e-05, "loss": 2.2038, "norm_diff": 0.0471, "num_tokens_overlap": 5.5889, "num_tokens_union": 55.092, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27698.5892, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4255, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7293, "sent_len_1": 66.8873, "sent_len_max_0": 18.9413, "sent_len_max_1": 190.7537, "stdk": 0.0458, "stdq": 0.039, "stdqueue_k": 0.0, "step": 71300 }, { "accuracy": 58.9844, "doc_norm": 6.4683, "encoder_q-embeddings": 16918.5176, "encoder_q-layer.0": 12234.1924, "encoder_q-layer.1": 12810.0215, "encoder_q-layer.10": 21173.6211, "encoder_q-layer.11": 40561.5273, "encoder_q-layer.2": 14144.9395, "encoder_q-layer.3": 14020.541, "encoder_q-layer.4": 14359.1875, "encoder_q-layer.5": 13990.8262, "encoder_q-layer.6": 15005.6211, "encoder_q-layer.7": 17300.5234, "encoder_q-layer.8": 18376.4766, "encoder_q-layer.9": 16521.1113, "epoch": 0.7, "inbatch_neg_score": 40.0733, "inbatch_pos_score": 40.5938, "learning_rate": 1.588888888888889e-05, "loss": 2.1921, "norm_diff": 0.0436, "num_tokens_overlap": 5.5814, "num_tokens_union": 55.1768, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27227.5527, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4248, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7271, "sent_len_1": 67.0062, "sent_len_max_0": 18.77, "sent_len_max_1": 188.2788, "stdk": 0.0453, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 71400 }, { "accuracy": 59.5703, "doc_norm": 6.4721, "encoder_q-embeddings": 16868.0645, "encoder_q-layer.0": 11944.1475, "encoder_q-layer.1": 12362.9141, "encoder_q-layer.10": 20795.3691, "encoder_q-layer.11": 41619.3047, "encoder_q-layer.2": 13627.8711, "encoder_q-layer.3": 13472.2832, "encoder_q-layer.4": 13750.4336, "encoder_q-layer.5": 13662.9014, "encoder_q-layer.6": 14679.6445, "encoder_q-layer.7": 16037.623, "encoder_q-layer.8": 17773.9883, "encoder_q-layer.9": 16653.7109, "epoch": 0.7, "inbatch_neg_score": 40.1001, "inbatch_pos_score": 40.625, "learning_rate": 1.5833333333333333e-05, "loss": 2.2324, "norm_diff": 0.0472, "num_tokens_overlap": 5.5782, "num_tokens_union": 54.9886, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27026.8088, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4249, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7439, "sent_len_1": 66.6417, "sent_len_max_0": 18.7175, "sent_len_max_1": 189.875, "stdk": 0.0455, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 71500 }, { "accuracy": 59.082, "doc_norm": 6.4701, "encoder_q-embeddings": 17038.9453, "encoder_q-layer.0": 11852.3594, "encoder_q-layer.1": 12410.4307, "encoder_q-layer.10": 19763.8359, "encoder_q-layer.11": 39001.3477, "encoder_q-layer.2": 13457.0234, "encoder_q-layer.3": 13427.1895, "encoder_q-layer.4": 14053.3779, "encoder_q-layer.5": 13377.3877, "encoder_q-layer.6": 14464.6152, "encoder_q-layer.7": 15716.8545, "encoder_q-layer.8": 18315.0195, "encoder_q-layer.9": 16186.1357, "epoch": 0.7, "inbatch_neg_score": 40.0744, "inbatch_pos_score": 40.5938, "learning_rate": 1.577777777777778e-05, "loss": 2.1965, "norm_diff": 0.0457, "num_tokens_overlap": 5.5869, "num_tokens_union": 54.977, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26645.7857, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4243, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7368, "sent_len_1": 66.6943, "sent_len_max_0": 18.7763, "sent_len_max_1": 190.7463, "stdk": 0.0451, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 71600 }, { "accuracy": 59.668, "doc_norm": 6.4669, "encoder_q-embeddings": 17219.7422, "encoder_q-layer.0": 12224.8545, "encoder_q-layer.1": 13206.4795, "encoder_q-layer.10": 23542.9199, "encoder_q-layer.11": 41002.4883, "encoder_q-layer.2": 14173.9004, "encoder_q-layer.3": 14126.6973, "encoder_q-layer.4": 14981.6504, "encoder_q-layer.5": 14801.4199, "encoder_q-layer.6": 16364.7266, "encoder_q-layer.7": 16907.7871, "encoder_q-layer.8": 19035.9355, "encoder_q-layer.9": 17911.5645, "epoch": 0.7, "inbatch_neg_score": 40.0315, "inbatch_pos_score": 40.5625, "learning_rate": 1.5722222222222225e-05, "loss": 2.1931, "norm_diff": 0.0446, "num_tokens_overlap": 5.5768, "num_tokens_union": 55.0768, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28245.1545, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4222, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7255, "sent_len_1": 66.8609, "sent_len_max_0": 18.8438, "sent_len_max_1": 188.7488, "stdk": 0.0456, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 71700 }, { "accuracy": 55.7617, "doc_norm": 6.4594, "encoder_q-embeddings": 18301.3145, "encoder_q-layer.0": 12929.7598, "encoder_q-layer.1": 13487.2031, "encoder_q-layer.10": 20451.6816, "encoder_q-layer.11": 38952.9102, "encoder_q-layer.2": 14380.3555, "encoder_q-layer.3": 14759.8203, "encoder_q-layer.4": 15107.0469, "encoder_q-layer.5": 14647.7412, "encoder_q-layer.6": 15249.5303, "encoder_q-layer.7": 16871.668, "encoder_q-layer.8": 18708.0254, "encoder_q-layer.9": 16619.9258, "epoch": 0.7, "inbatch_neg_score": 40.0154, "inbatch_pos_score": 40.5312, "learning_rate": 1.5666666666666667e-05, "loss": 2.1809, "norm_diff": 0.0409, "num_tokens_overlap": 5.5704, "num_tokens_union": 55.0695, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27669.0246, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4185, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7062, "sent_len_1": 66.8842, "sent_len_max_0": 18.8, "sent_len_max_1": 190.145, "stdk": 0.0447, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 71800 }, { "accuracy": 56.543, "doc_norm": 6.4653, "encoder_q-embeddings": 18140.7754, "encoder_q-layer.0": 12806.707, "encoder_q-layer.1": 13654.2656, "encoder_q-layer.10": 21284.7715, "encoder_q-layer.11": 44260.0508, "encoder_q-layer.2": 14933.1494, "encoder_q-layer.3": 15146.4766, "encoder_q-layer.4": 15491.0635, "encoder_q-layer.5": 15153.04, "encoder_q-layer.6": 16432.3438, "encoder_q-layer.7": 17657.0234, "encoder_q-layer.8": 19578.3984, "encoder_q-layer.9": 17833.7695, "epoch": 0.7, "inbatch_neg_score": 40.0074, "inbatch_pos_score": 40.5312, "learning_rate": 1.5611111111111113e-05, "loss": 2.1951, "norm_diff": 0.0461, "num_tokens_overlap": 5.5921, "num_tokens_union": 54.985, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29478.9833, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4192, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7412, "sent_len_1": 66.7872, "sent_len_max_0": 18.9625, "sent_len_max_1": 189.8963, "stdk": 0.0463, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 71900 }, { "accuracy": 55.4688, "doc_norm": 6.4673, "encoder_q-embeddings": 18432.3555, "encoder_q-layer.0": 12881.8779, "encoder_q-layer.1": 13607.5801, "encoder_q-layer.10": 21604.1016, "encoder_q-layer.11": 43300.5469, "encoder_q-layer.2": 14767.1953, "encoder_q-layer.3": 15085.5381, "encoder_q-layer.4": 15201.1729, "encoder_q-layer.5": 14552.2666, "encoder_q-layer.6": 15639.9795, "encoder_q-layer.7": 16186.5459, "encoder_q-layer.8": 19214.9902, "encoder_q-layer.9": 18225.5703, "epoch": 0.7, "inbatch_neg_score": 40.0068, "inbatch_pos_score": 40.5312, "learning_rate": 1.5555555555555555e-05, "loss": 2.1979, "norm_diff": 0.0481, "num_tokens_overlap": 5.5844, "num_tokens_union": 54.9181, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28852.5894, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4191, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7363, "sent_len_1": 66.5855, "sent_len_max_0": 18.7975, "sent_len_max_1": 191.2637, "stdk": 0.0455, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 72000 }, { "accuracy": 54.1992, "doc_norm": 6.4626, "encoder_q-embeddings": 18275.6758, "encoder_q-layer.0": 12691.9971, "encoder_q-layer.1": 13020.0908, "encoder_q-layer.10": 22188.4238, "encoder_q-layer.11": 39879.7383, "encoder_q-layer.2": 14396.8965, "encoder_q-layer.3": 14486.2559, "encoder_q-layer.4": 15104.1758, "encoder_q-layer.5": 15260.2793, "encoder_q-layer.6": 15777.6631, "encoder_q-layer.7": 17736.4316, "encoder_q-layer.8": 18713.6562, "encoder_q-layer.9": 16860.3848, "epoch": 0.7, "inbatch_neg_score": 40.0047, "inbatch_pos_score": 40.5, "learning_rate": 1.55e-05, "loss": 2.2266, "norm_diff": 0.0449, "num_tokens_overlap": 5.579, "num_tokens_union": 55.1566, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27760.8662, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4176, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7329, "sent_len_1": 67.0076, "sent_len_max_0": 18.8475, "sent_len_max_1": 191.5213, "stdk": 0.0456, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 72100 }, { "accuracy": 59.4727, "doc_norm": 6.4617, "encoder_q-embeddings": 18349.7305, "encoder_q-layer.0": 12529.7119, "encoder_q-layer.1": 12875.4941, "encoder_q-layer.10": 22114.4082, "encoder_q-layer.11": 38951.2891, "encoder_q-layer.2": 13990.0225, "encoder_q-layer.3": 14033.0801, "encoder_q-layer.4": 14705.0908, "encoder_q-layer.5": 14124.9707, "encoder_q-layer.6": 15848.9834, "encoder_q-layer.7": 16042.4727, "encoder_q-layer.8": 19141.0078, "encoder_q-layer.9": 16951.9629, "epoch": 0.7, "inbatch_neg_score": 39.9878, "inbatch_pos_score": 40.5, "learning_rate": 1.5444444444444446e-05, "loss": 2.2212, "norm_diff": 0.0445, "num_tokens_overlap": 5.574, "num_tokens_union": 55.0908, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27653.3248, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4172, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7129, "sent_len_1": 66.927, "sent_len_max_0": 18.8962, "sent_len_max_1": 189.6138, "stdk": 0.0457, "stdq": 0.039, "stdqueue_k": 0.0, "step": 72200 }, { "accuracy": 53.418, "doc_norm": 6.4614, "encoder_q-embeddings": 17373.8887, "encoder_q-layer.0": 12531.6055, "encoder_q-layer.1": 12997.2715, "encoder_q-layer.10": 29628.3203, "encoder_q-layer.11": 46754.6055, "encoder_q-layer.2": 14472.6855, "encoder_q-layer.3": 14593.0752, "encoder_q-layer.4": 15270.0049, "encoder_q-layer.5": 14961.9902, "encoder_q-layer.6": 16362.9004, "encoder_q-layer.7": 18913.8652, "encoder_q-layer.8": 21199.0312, "encoder_q-layer.9": 19586.2246, "epoch": 0.71, "inbatch_neg_score": 39.9619, "inbatch_pos_score": 40.4688, "learning_rate": 1.538888888888889e-05, "loss": 2.2134, "norm_diff": 0.0452, "num_tokens_overlap": 5.5829, "num_tokens_union": 54.8507, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30794.1868, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4162, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7235, "sent_len_1": 66.5013, "sent_len_max_0": 18.8575, "sent_len_max_1": 188.3975, "stdk": 0.0466, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 72300 }, { "accuracy": 59.2773, "doc_norm": 6.4608, "encoder_q-embeddings": 34745.3047, "encoder_q-layer.0": 25022.7832, "encoder_q-layer.1": 25988.3789, "encoder_q-layer.10": 42500.3359, "encoder_q-layer.11": 87126.5078, "encoder_q-layer.2": 28023.7617, "encoder_q-layer.3": 27652.8496, "encoder_q-layer.4": 28679.9844, "encoder_q-layer.5": 28244.4121, "encoder_q-layer.6": 29866.6465, "encoder_q-layer.7": 31471.0566, "encoder_q-layer.8": 35712.8672, "encoder_q-layer.9": 32321.7227, "epoch": 0.71, "inbatch_neg_score": 39.9519, "inbatch_pos_score": 40.5, "learning_rate": 1.5333333333333334e-05, "loss": 2.218, "norm_diff": 0.0458, "num_tokens_overlap": 5.5914, "num_tokens_union": 55.1461, "postclip_grad_norm": 1.0, "preclip_grad_norm": 55183.4134, "preclip_grad_norm_avg": 0.0005, "query_norm": 6.415, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7245, "sent_len_1": 67.028, "sent_len_max_0": 19.0425, "sent_len_max_1": 192.315, "stdk": 0.0443, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 72400 }, { "accuracy": 58.7891, "doc_norm": 6.4594, "encoder_q-embeddings": 16680.4727, "encoder_q-layer.0": 11988.6348, "encoder_q-layer.1": 12533.0752, "encoder_q-layer.10": 19112.8965, "encoder_q-layer.11": 38507.668, "encoder_q-layer.2": 13547.5898, "encoder_q-layer.3": 13411.6924, "encoder_q-layer.4": 13659.1055, "encoder_q-layer.5": 13503.2246, "encoder_q-layer.6": 14487.3594, "encoder_q-layer.7": 15549.6299, "encoder_q-layer.8": 17251.7793, "encoder_q-layer.9": 15637.4639, "epoch": 0.71, "inbatch_neg_score": 39.9518, "inbatch_pos_score": 40.4688, "learning_rate": 1.527777777777778e-05, "loss": 2.2196, "norm_diff": 0.0458, "num_tokens_overlap": 5.5804, "num_tokens_union": 55.0582, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26294.6201, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4136, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7089, "sent_len_1": 66.8822, "sent_len_max_0": 18.8975, "sent_len_max_1": 189.7012, "stdk": 0.0454, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 72500 }, { "accuracy": 58.6914, "doc_norm": 6.4581, "encoder_q-embeddings": 17433.7012, "encoder_q-layer.0": 12680.2471, "encoder_q-layer.1": 13020.585, "encoder_q-layer.10": 19489.1602, "encoder_q-layer.11": 40493.6289, "encoder_q-layer.2": 14443.0508, "encoder_q-layer.3": 14016.752, "encoder_q-layer.4": 14560.8672, "encoder_q-layer.5": 14022.4697, "encoder_q-layer.6": 14971.1836, "encoder_q-layer.7": 15709.9561, "encoder_q-layer.8": 17222.2969, "encoder_q-layer.9": 15967.2861, "epoch": 0.71, "inbatch_neg_score": 39.9158, "inbatch_pos_score": 40.4375, "learning_rate": 1.5222222222222224e-05, "loss": 2.2357, "norm_diff": 0.0449, "num_tokens_overlap": 5.5881, "num_tokens_union": 54.9491, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26840.7048, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4132, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7252, "sent_len_1": 66.6324, "sent_len_max_0": 18.7937, "sent_len_max_1": 187.2075, "stdk": 0.0455, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 72600 }, { "accuracy": 58.5938, "doc_norm": 6.4577, "encoder_q-embeddings": 17518.2715, "encoder_q-layer.0": 11845.7734, "encoder_q-layer.1": 12288.9473, "encoder_q-layer.10": 21935.9844, "encoder_q-layer.11": 40300.293, "encoder_q-layer.2": 13462.5117, "encoder_q-layer.3": 13726.5791, "encoder_q-layer.4": 14013.0039, "encoder_q-layer.5": 13358.3428, "encoder_q-layer.6": 15316.5146, "encoder_q-layer.7": 15779.2422, "encoder_q-layer.8": 18396.7852, "encoder_q-layer.9": 16432.2324, "epoch": 0.71, "inbatch_neg_score": 39.8712, "inbatch_pos_score": 40.4062, "learning_rate": 1.5166666666666668e-05, "loss": 2.2249, "norm_diff": 0.0491, "num_tokens_overlap": 5.5779, "num_tokens_union": 55.0227, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27070.8987, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4086, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7154, "sent_len_1": 66.8268, "sent_len_max_0": 18.8113, "sent_len_max_1": 191.35, "stdk": 0.046, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 72700 }, { "accuracy": 56.9336, "doc_norm": 6.4533, "encoder_q-embeddings": 16914.7207, "encoder_q-layer.0": 11888.416, "encoder_q-layer.1": 12506.9883, "encoder_q-layer.10": 19433.0215, "encoder_q-layer.11": 37669.3555, "encoder_q-layer.2": 13865.6055, "encoder_q-layer.3": 14014.7139, "encoder_q-layer.4": 14159.7871, "encoder_q-layer.5": 14029.0391, "encoder_q-layer.6": 15059.6699, "encoder_q-layer.7": 15804.0723, "encoder_q-layer.8": 17522.1992, "encoder_q-layer.9": 16292.3008, "epoch": 0.71, "inbatch_neg_score": 39.8841, "inbatch_pos_score": 40.4062, "learning_rate": 1.5111111111111112e-05, "loss": 2.1925, "norm_diff": 0.0444, "num_tokens_overlap": 5.5929, "num_tokens_union": 55.1333, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26486.0313, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.409, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.754, "sent_len_1": 66.8731, "sent_len_max_0": 18.9325, "sent_len_max_1": 188.6662, "stdk": 0.0449, "stdq": 0.039, "stdqueue_k": 0.0, "step": 72800 }, { "accuracy": 57.1289, "doc_norm": 6.4543, "encoder_q-embeddings": 17338.832, "encoder_q-layer.0": 12684.7295, "encoder_q-layer.1": 12886.9375, "encoder_q-layer.10": 23108.0059, "encoder_q-layer.11": 45788.2695, "encoder_q-layer.2": 13877.5186, "encoder_q-layer.3": 14237.2383, "encoder_q-layer.4": 14654.8809, "encoder_q-layer.5": 14156.3789, "encoder_q-layer.6": 15287.7207, "encoder_q-layer.7": 16059.9326, "encoder_q-layer.8": 19145.2578, "encoder_q-layer.9": 16905.8984, "epoch": 0.71, "inbatch_neg_score": 39.8751, "inbatch_pos_score": 40.4062, "learning_rate": 1.5055555555555556e-05, "loss": 2.1687, "norm_diff": 0.0455, "num_tokens_overlap": 5.5819, "num_tokens_union": 55.1325, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28549.5919, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4087, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7082, "sent_len_1": 67.0218, "sent_len_max_0": 18.87, "sent_len_max_1": 191.61, "stdk": 0.0457, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 72900 }, { "accuracy": 56.543, "doc_norm": 6.4551, "encoder_q-embeddings": 17398.5352, "encoder_q-layer.0": 12277.7578, "encoder_q-layer.1": 12784.1465, "encoder_q-layer.10": 21085.4492, "encoder_q-layer.11": 36703.8906, "encoder_q-layer.2": 14194.8379, "encoder_q-layer.3": 13983.7998, "encoder_q-layer.4": 14161.3379, "encoder_q-layer.5": 14205.9492, "encoder_q-layer.6": 15248.5273, "encoder_q-layer.7": 16774.2227, "encoder_q-layer.8": 18315.1211, "encoder_q-layer.9": 17814.6641, "epoch": 0.71, "inbatch_neg_score": 39.882, "inbatch_pos_score": 40.4062, "learning_rate": 1.5e-05, "loss": 2.1906, "norm_diff": 0.0463, "num_tokens_overlap": 5.5864, "num_tokens_union": 55.0038, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26869.7754, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4087, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7386, "sent_len_1": 66.7814, "sent_len_max_0": 18.9037, "sent_len_max_1": 191.1887, "stdk": 0.0466, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 73000 }, { "accuracy": 56.6406, "doc_norm": 6.4528, "encoder_q-embeddings": 16418.4551, "encoder_q-layer.0": 11792.1035, "encoder_q-layer.1": 12203.0664, "encoder_q-layer.10": 19509.6738, "encoder_q-layer.11": 37654.5234, "encoder_q-layer.2": 13160.7012, "encoder_q-layer.3": 13455.3994, "encoder_q-layer.4": 14010.9678, "encoder_q-layer.5": 14084.1641, "encoder_q-layer.6": 14983.8555, "encoder_q-layer.7": 15966.4727, "encoder_q-layer.8": 17306.7832, "encoder_q-layer.9": 16539.5898, "epoch": 0.71, "inbatch_neg_score": 39.854, "inbatch_pos_score": 40.375, "learning_rate": 1.4944444444444444e-05, "loss": 2.1853, "norm_diff": 0.0463, "num_tokens_overlap": 5.5782, "num_tokens_union": 54.9656, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26223.3904, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4065, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.693, "sent_len_1": 66.7665, "sent_len_max_0": 18.7937, "sent_len_max_1": 191.535, "stdk": 0.0444, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 73100 }, { "accuracy": 61.3281, "doc_norm": 6.4513, "encoder_q-embeddings": 15992.6406, "encoder_q-layer.0": 11842.1816, "encoder_q-layer.1": 12019.5713, "encoder_q-layer.10": 19616.584, "encoder_q-layer.11": 39639.5312, "encoder_q-layer.2": 13157.2188, "encoder_q-layer.3": 13048.793, "encoder_q-layer.4": 13570.7939, "encoder_q-layer.5": 13954.8525, "encoder_q-layer.6": 14708.6719, "encoder_q-layer.7": 15700.292, "encoder_q-layer.8": 17502.9922, "encoder_q-layer.9": 15799.7861, "epoch": 0.71, "inbatch_neg_score": 39.8461, "inbatch_pos_score": 40.375, "learning_rate": 1.4888888888888888e-05, "loss": 2.2071, "norm_diff": 0.0437, "num_tokens_overlap": 5.5814, "num_tokens_union": 54.9905, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26436.2546, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4076, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7382, "sent_len_1": 66.7173, "sent_len_max_0": 18.79, "sent_len_max_1": 187.3925, "stdk": 0.0454, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 73200 }, { "accuracy": 57.9102, "doc_norm": 6.455, "encoder_q-embeddings": 18170.7598, "encoder_q-layer.0": 12788.0361, "encoder_q-layer.1": 12802.3828, "encoder_q-layer.10": 20321.8633, "encoder_q-layer.11": 39762.0625, "encoder_q-layer.2": 13965.8535, "encoder_q-layer.3": 13853.668, "encoder_q-layer.4": 14476.7412, "encoder_q-layer.5": 14481.1895, "encoder_q-layer.6": 15320.4258, "encoder_q-layer.7": 16042.1279, "encoder_q-layer.8": 18148.8594, "encoder_q-layer.9": 16821.7832, "epoch": 0.72, "inbatch_neg_score": 39.8341, "inbatch_pos_score": 40.375, "learning_rate": 1.4833333333333336e-05, "loss": 2.2322, "norm_diff": 0.0491, "num_tokens_overlap": 5.5786, "num_tokens_union": 54.9835, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27822.5601, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4059, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.725, "sent_len_1": 66.7641, "sent_len_max_0": 18.8412, "sent_len_max_1": 189.1712, "stdk": 0.0465, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 73300 }, { "accuracy": 56.8359, "doc_norm": 6.4483, "encoder_q-embeddings": 17495.8105, "encoder_q-layer.0": 12369.0703, "encoder_q-layer.1": 12751.7197, "encoder_q-layer.10": 19932.6328, "encoder_q-layer.11": 38335.0703, "encoder_q-layer.2": 13865.2559, "encoder_q-layer.3": 13719.1904, "encoder_q-layer.4": 14272.918, "encoder_q-layer.5": 14449.6797, "encoder_q-layer.6": 14943.0469, "encoder_q-layer.7": 15503.2744, "encoder_q-layer.8": 18642.2441, "encoder_q-layer.9": 16351.6953, "epoch": 0.72, "inbatch_neg_score": 39.8315, "inbatch_pos_score": 40.3438, "learning_rate": 1.477777777777778e-05, "loss": 2.2232, "norm_diff": 0.0448, "num_tokens_overlap": 5.5779, "num_tokens_union": 54.971, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26720.2107, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4036, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7187, "sent_len_1": 66.7123, "sent_len_max_0": 18.8313, "sent_len_max_1": 190.2325, "stdk": 0.0445, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 73400 }, { "accuracy": 55.4688, "doc_norm": 6.4541, "encoder_q-embeddings": 17915.3809, "encoder_q-layer.0": 12534.2998, "encoder_q-layer.1": 13103.9385, "encoder_q-layer.10": 20998.6543, "encoder_q-layer.11": 40644.8008, "encoder_q-layer.2": 14626.8037, "encoder_q-layer.3": 14594.2441, "encoder_q-layer.4": 14830.542, "encoder_q-layer.5": 14449.0723, "encoder_q-layer.6": 15591.0078, "encoder_q-layer.7": 16213.5684, "encoder_q-layer.8": 18482.8535, "encoder_q-layer.9": 17061.3965, "epoch": 0.72, "inbatch_neg_score": 39.8175, "inbatch_pos_score": 40.3438, "learning_rate": 1.4722222222222224e-05, "loss": 2.2355, "norm_diff": 0.0496, "num_tokens_overlap": 5.5668, "num_tokens_union": 55.0167, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27627.1229, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4045, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6862, "sent_len_1": 66.8083, "sent_len_max_0": 18.8487, "sent_len_max_1": 190.1238, "stdk": 0.0468, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 73500 }, { "accuracy": 57.5195, "doc_norm": 6.4516, "encoder_q-embeddings": 16607.3203, "encoder_q-layer.0": 11959.3105, "encoder_q-layer.1": 12388.1406, "encoder_q-layer.10": 19599.8184, "encoder_q-layer.11": 36853.8555, "encoder_q-layer.2": 13452.5908, "encoder_q-layer.3": 13211.0996, "encoder_q-layer.4": 13918.5576, "encoder_q-layer.5": 14033.4375, "encoder_q-layer.6": 14850.8555, "encoder_q-layer.7": 15390.5225, "encoder_q-layer.8": 16643.418, "encoder_q-layer.9": 15437.7988, "epoch": 0.72, "inbatch_neg_score": 39.8281, "inbatch_pos_score": 40.3438, "learning_rate": 1.4666666666666668e-05, "loss": 2.225, "norm_diff": 0.0471, "num_tokens_overlap": 5.5707, "num_tokens_union": 54.9642, "postclip_grad_norm": 1.0, "preclip_grad_norm": 25878.6055, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4045, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7125, "sent_len_1": 66.6758, "sent_len_max_0": 18.9237, "sent_len_max_1": 190.3262, "stdk": 0.0462, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 73600 }, { "accuracy": 59.375, "doc_norm": 6.4527, "encoder_q-embeddings": 17305.7793, "encoder_q-layer.0": 12063.4023, "encoder_q-layer.1": 12575.793, "encoder_q-layer.10": 19989.0996, "encoder_q-layer.11": 43059.8672, "encoder_q-layer.2": 13457.5762, "encoder_q-layer.3": 13351.2295, "encoder_q-layer.4": 14121.1904, "encoder_q-layer.5": 14042.1074, "encoder_q-layer.6": 14899.6631, "encoder_q-layer.7": 16326.7207, "encoder_q-layer.8": 17362.5762, "encoder_q-layer.9": 15633.0605, "epoch": 0.72, "inbatch_neg_score": 39.8281, "inbatch_pos_score": 40.375, "learning_rate": 1.4611111111111112e-05, "loss": 2.1928, "norm_diff": 0.0478, "num_tokens_overlap": 5.5709, "num_tokens_union": 55.0496, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27271.2918, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.405, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7259, "sent_len_1": 66.7805, "sent_len_max_0": 18.7563, "sent_len_max_1": 189.6887, "stdk": 0.0459, "stdq": 0.039, "stdqueue_k": 0.0, "step": 73700 }, { "accuracy": 58.3984, "doc_norm": 6.4535, "encoder_q-embeddings": 17557.6973, "encoder_q-layer.0": 12626.5889, "encoder_q-layer.1": 13545.6299, "encoder_q-layer.10": 21957.9453, "encoder_q-layer.11": 37843.082, "encoder_q-layer.2": 14914.29, "encoder_q-layer.3": 14961.8896, "encoder_q-layer.4": 15184.0186, "encoder_q-layer.5": 15132.8193, "encoder_q-layer.6": 15992.3965, "encoder_q-layer.7": 16663.3105, "encoder_q-layer.8": 18332.1504, "encoder_q-layer.9": 17209.1934, "epoch": 0.72, "inbatch_neg_score": 39.8177, "inbatch_pos_score": 40.3438, "learning_rate": 1.4555555555555556e-05, "loss": 2.2312, "norm_diff": 0.0495, "num_tokens_overlap": 5.5793, "num_tokens_union": 54.8278, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27410.8628, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4039, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.699, "sent_len_1": 66.5291, "sent_len_max_0": 18.8475, "sent_len_max_1": 190.6287, "stdk": 0.0461, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 73800 }, { "accuracy": 56.0547, "doc_norm": 6.447, "encoder_q-embeddings": 17877.4766, "encoder_q-layer.0": 12658.6738, "encoder_q-layer.1": 12850.0137, "encoder_q-layer.10": 20401.7559, "encoder_q-layer.11": 41514.3398, "encoder_q-layer.2": 14074.2578, "encoder_q-layer.3": 14226.8955, "encoder_q-layer.4": 14448.7217, "encoder_q-layer.5": 14456.084, "encoder_q-layer.6": 15522.0625, "encoder_q-layer.7": 16818.1836, "encoder_q-layer.8": 18863.4883, "encoder_q-layer.9": 16646.3594, "epoch": 0.72, "inbatch_neg_score": 39.8017, "inbatch_pos_score": 40.3125, "learning_rate": 1.45e-05, "loss": 2.1851, "norm_diff": 0.0446, "num_tokens_overlap": 5.5782, "num_tokens_union": 55.006, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27690.9135, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4024, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7356, "sent_len_1": 66.6809, "sent_len_max_0": 18.8287, "sent_len_max_1": 187.2975, "stdk": 0.0451, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 73900 }, { "accuracy": 57.2266, "doc_norm": 6.4502, "encoder_q-embeddings": 16675.0898, "encoder_q-layer.0": 12069.1113, "encoder_q-layer.1": 12590.9072, "encoder_q-layer.10": 21919.543, "encoder_q-layer.11": 43209.8984, "encoder_q-layer.2": 13938.9365, "encoder_q-layer.3": 13704.4492, "encoder_q-layer.4": 14563.5654, "encoder_q-layer.5": 14469.4355, "encoder_q-layer.6": 15169.2314, "encoder_q-layer.7": 16448.2266, "encoder_q-layer.8": 17374.6484, "encoder_q-layer.9": 16354.6836, "epoch": 0.72, "inbatch_neg_score": 39.8299, "inbatch_pos_score": 40.3438, "learning_rate": 1.4444444444444444e-05, "loss": 2.1824, "norm_diff": 0.0471, "num_tokens_overlap": 5.5728, "num_tokens_union": 54.9084, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27813.3755, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.4032, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7232, "sent_len_1": 66.6313, "sent_len_max_0": 18.8513, "sent_len_max_1": 189.97, "stdk": 0.0455, "stdq": 0.0378, "stdqueue_k": 0.0, "step": 74000 }, { "accuracy": 58.5938, "doc_norm": 6.4488, "encoder_q-embeddings": 17086.0996, "encoder_q-layer.0": 12341.3047, "encoder_q-layer.1": 12797.6455, "encoder_q-layer.10": 19175.4785, "encoder_q-layer.11": 38193.2578, "encoder_q-layer.2": 14419.9766, "encoder_q-layer.3": 13860.6875, "encoder_q-layer.4": 14103.999, "encoder_q-layer.5": 13642.1826, "encoder_q-layer.6": 14355.3018, "encoder_q-layer.7": 15080.2627, "encoder_q-layer.8": 16788.9805, "encoder_q-layer.9": 15710.6357, "epoch": 0.72, "inbatch_neg_score": 39.8203, "inbatch_pos_score": 40.3438, "learning_rate": 1.438888888888889e-05, "loss": 2.2183, "norm_diff": 0.0461, "num_tokens_overlap": 5.5679, "num_tokens_union": 54.9129, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26426.046, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4028, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6988, "sent_len_1": 66.693, "sent_len_max_0": 18.8575, "sent_len_max_1": 189.1475, "stdk": 0.0458, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 74100 }, { "accuracy": 57.1289, "doc_norm": 6.4483, "encoder_q-embeddings": 17899.9512, "encoder_q-layer.0": 12857.5508, "encoder_q-layer.1": 13670.4053, "encoder_q-layer.10": 19201.8613, "encoder_q-layer.11": 37247.2383, "encoder_q-layer.2": 14426.6387, "encoder_q-layer.3": 14548.7432, "encoder_q-layer.4": 15114.3789, "encoder_q-layer.5": 14426.4736, "encoder_q-layer.6": 15344.4473, "encoder_q-layer.7": 16521.9688, "encoder_q-layer.8": 17907.4082, "encoder_q-layer.9": 16692.6523, "epoch": 0.72, "inbatch_neg_score": 39.8028, "inbatch_pos_score": 40.3125, "learning_rate": 1.4333333333333334e-05, "loss": 2.2223, "norm_diff": 0.0459, "num_tokens_overlap": 5.5893, "num_tokens_union": 55.0778, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27011.3007, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4024, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7138, "sent_len_1": 66.9209, "sent_len_max_0": 18.915, "sent_len_max_1": 192.0175, "stdk": 0.0462, "stdq": 0.039, "stdqueue_k": 0.0, "step": 74200 }, { "accuracy": 60.4492, "doc_norm": 6.4483, "encoder_q-embeddings": 16570.8301, "encoder_q-layer.0": 11870.3232, "encoder_q-layer.1": 12225.5977, "encoder_q-layer.10": 19907.9355, "encoder_q-layer.11": 40014.5039, "encoder_q-layer.2": 13084.6016, "encoder_q-layer.3": 13416.9209, "encoder_q-layer.4": 13624.1748, "encoder_q-layer.5": 13623.127, "encoder_q-layer.6": 15116.8311, "encoder_q-layer.7": 15804.5078, "encoder_q-layer.8": 17419.3047, "encoder_q-layer.9": 16024.1562, "epoch": 0.73, "inbatch_neg_score": 39.7975, "inbatch_pos_score": 40.3438, "learning_rate": 1.427777777777778e-05, "loss": 2.2043, "norm_diff": 0.046, "num_tokens_overlap": 5.5647, "num_tokens_union": 54.9089, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26655.5735, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.4023, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.725, "sent_len_1": 66.5798, "sent_len_max_0": 18.9088, "sent_len_max_1": 188.4688, "stdk": 0.0446, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 74300 }, { "accuracy": 57.4219, "doc_norm": 6.4465, "encoder_q-embeddings": 17315.9824, "encoder_q-layer.0": 12408.2275, "encoder_q-layer.1": 12696.2676, "encoder_q-layer.10": 22861.8496, "encoder_q-layer.11": 39591.2891, "encoder_q-layer.2": 13687.8809, "encoder_q-layer.3": 13687.6055, "encoder_q-layer.4": 13725.6426, "encoder_q-layer.5": 13392.1172, "encoder_q-layer.6": 14148.8623, "encoder_q-layer.7": 15198.0527, "encoder_q-layer.8": 17882.9492, "encoder_q-layer.9": 16609.1719, "epoch": 0.73, "inbatch_neg_score": 39.763, "inbatch_pos_score": 40.2812, "learning_rate": 1.4222222222222224e-05, "loss": 2.225, "norm_diff": 0.0479, "num_tokens_overlap": 5.5664, "num_tokens_union": 54.9421, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26879.9727, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3986, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7265, "sent_len_1": 66.6622, "sent_len_max_0": 18.8912, "sent_len_max_1": 189.8963, "stdk": 0.0451, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 74400 }, { "accuracy": 56.25, "doc_norm": 6.4496, "encoder_q-embeddings": 18007.4746, "encoder_q-layer.0": 12698.417, "encoder_q-layer.1": 13084.626, "encoder_q-layer.10": 22421.9043, "encoder_q-layer.11": 41796.8242, "encoder_q-layer.2": 14063.0742, "encoder_q-layer.3": 14410.1406, "encoder_q-layer.4": 14550.5625, "encoder_q-layer.5": 14407.9033, "encoder_q-layer.6": 15315.3525, "encoder_q-layer.7": 16142.6035, "encoder_q-layer.8": 17816.8203, "encoder_q-layer.9": 17057.6758, "epoch": 0.73, "inbatch_neg_score": 39.7477, "inbatch_pos_score": 40.2812, "learning_rate": 1.4166666666666668e-05, "loss": 2.222, "norm_diff": 0.0508, "num_tokens_overlap": 5.5793, "num_tokens_union": 54.9228, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28078.0482, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3988, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7195, "sent_len_1": 66.633, "sent_len_max_0": 18.7525, "sent_len_max_1": 189.4263, "stdk": 0.0464, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 74500 }, { "accuracy": 53.418, "doc_norm": 6.4396, "encoder_q-embeddings": 18976.5586, "encoder_q-layer.0": 13308.6689, "encoder_q-layer.1": 13551.9678, "encoder_q-layer.10": 21628.418, "encoder_q-layer.11": 46253.7188, "encoder_q-layer.2": 15090.834, "encoder_q-layer.3": 15085.9424, "encoder_q-layer.4": 15364.7529, "encoder_q-layer.5": 15275.6758, "encoder_q-layer.6": 16527.5957, "encoder_q-layer.7": 18299.4785, "encoder_q-layer.8": 19538.752, "encoder_q-layer.9": 18578.7148, "epoch": 0.73, "inbatch_neg_score": 39.73, "inbatch_pos_score": 40.2188, "learning_rate": 1.4111111111111112e-05, "loss": 2.2014, "norm_diff": 0.0441, "num_tokens_overlap": 5.5744, "num_tokens_union": 55.0555, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29578.79, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3955, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7253, "sent_len_1": 66.8457, "sent_len_max_0": 18.8425, "sent_len_max_1": 190.1562, "stdk": 0.0446, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 74600 }, { "accuracy": 58.6914, "doc_norm": 6.4434, "encoder_q-embeddings": 18444.1816, "encoder_q-layer.0": 13044.46, "encoder_q-layer.1": 13107.998, "encoder_q-layer.10": 21405.459, "encoder_q-layer.11": 40921.9297, "encoder_q-layer.2": 14260.2812, "encoder_q-layer.3": 14450.0029, "encoder_q-layer.4": 15049.8633, "encoder_q-layer.5": 14465.3848, "encoder_q-layer.6": 15595.418, "encoder_q-layer.7": 16948.3125, "encoder_q-layer.8": 18124.3574, "encoder_q-layer.9": 17173.2871, "epoch": 0.73, "inbatch_neg_score": 39.7171, "inbatch_pos_score": 40.25, "learning_rate": 1.4055555555555556e-05, "loss": 2.2672, "norm_diff": 0.0459, "num_tokens_overlap": 5.5722, "num_tokens_union": 54.9038, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28612.0822, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3975, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7079, "sent_len_1": 66.6357, "sent_len_max_0": 18.8575, "sent_len_max_1": 190.4487, "stdk": 0.0456, "stdq": 0.039, "stdqueue_k": 0.0, "step": 74700 }, { "accuracy": 56.9336, "doc_norm": 6.4399, "encoder_q-embeddings": 16962.4766, "encoder_q-layer.0": 11960.6016, "encoder_q-layer.1": 12501.5713, "encoder_q-layer.10": 20012.2676, "encoder_q-layer.11": 40560.2969, "encoder_q-layer.2": 13796.4355, "encoder_q-layer.3": 14080.8184, "encoder_q-layer.4": 14823.5635, "encoder_q-layer.5": 14370.9277, "encoder_q-layer.6": 14908.7031, "encoder_q-layer.7": 16006.4443, "encoder_q-layer.8": 17922.5918, "encoder_q-layer.9": 16411.8008, "epoch": 0.73, "inbatch_neg_score": 39.6878, "inbatch_pos_score": 40.1875, "learning_rate": 1.4000000000000001e-05, "loss": 2.1805, "norm_diff": 0.0468, "num_tokens_overlap": 5.5818, "num_tokens_union": 55.0133, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27092.2746, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3931, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7349, "sent_len_1": 66.7951, "sent_len_max_0": 18.7138, "sent_len_max_1": 188.8988, "stdk": 0.0453, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 74800 }, { "accuracy": 55.8594, "doc_norm": 6.4412, "encoder_q-embeddings": 19694.2188, "encoder_q-layer.0": 13608.3848, "encoder_q-layer.1": 13951.5889, "encoder_q-layer.10": 21347.5918, "encoder_q-layer.11": 40604.5742, "encoder_q-layer.2": 15180.7881, "encoder_q-layer.3": 15373.8105, "encoder_q-layer.4": 16041.7891, "encoder_q-layer.5": 15593.8242, "encoder_q-layer.6": 16750.666, "encoder_q-layer.7": 17240.2012, "encoder_q-layer.8": 19589.5352, "encoder_q-layer.9": 16537.4316, "epoch": 0.73, "inbatch_neg_score": 39.6669, "inbatch_pos_score": 40.1875, "learning_rate": 1.3944444444444446e-05, "loss": 2.1755, "norm_diff": 0.048, "num_tokens_overlap": 5.5835, "num_tokens_union": 54.9721, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28679.7769, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3931, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7049, "sent_len_1": 66.7683, "sent_len_max_0": 18.755, "sent_len_max_1": 189.5913, "stdk": 0.0463, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 74900 }, { "accuracy": 56.9336, "doc_norm": 6.4351, "encoder_q-embeddings": 17274.6562, "encoder_q-layer.0": 12281.6201, "encoder_q-layer.1": 12680.7393, "encoder_q-layer.10": 20117.6641, "encoder_q-layer.11": 36944.3633, "encoder_q-layer.2": 13741.0332, "encoder_q-layer.3": 14097.5332, "encoder_q-layer.4": 14670.6748, "encoder_q-layer.5": 14420.418, "encoder_q-layer.6": 15244.8311, "encoder_q-layer.7": 17104.0703, "encoder_q-layer.8": 17721.748, "encoder_q-layer.9": 16390.6523, "epoch": 0.73, "inbatch_neg_score": 39.6313, "inbatch_pos_score": 40.1562, "learning_rate": 1.388888888888889e-05, "loss": 2.2102, "norm_diff": 0.0454, "num_tokens_overlap": 5.5911, "num_tokens_union": 55.0643, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26659.1027, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3897, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7131, "sent_len_1": 66.8646, "sent_len_max_0": 18.8038, "sent_len_max_1": 188.5725, "stdk": 0.0459, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 75000 }, { "accuracy": 57.6172, "doc_norm": 6.4361, "encoder_q-embeddings": 17945.2422, "encoder_q-layer.0": 12622.1328, "encoder_q-layer.1": 13201.6328, "encoder_q-layer.10": 19749.4727, "encoder_q-layer.11": 38814.0703, "encoder_q-layer.2": 14540.8887, "encoder_q-layer.3": 14271.373, "encoder_q-layer.4": 14812.7705, "encoder_q-layer.5": 14442.168, "encoder_q-layer.6": 15188.9541, "encoder_q-layer.7": 16403.1328, "encoder_q-layer.8": 18444.0996, "encoder_q-layer.9": 16385.6367, "epoch": 0.73, "inbatch_neg_score": 39.6169, "inbatch_pos_score": 40.125, "learning_rate": 1.3833333333333334e-05, "loss": 2.184, "norm_diff": 0.0462, "num_tokens_overlap": 5.5853, "num_tokens_union": 54.9817, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27404.6923, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3899, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7223, "sent_len_1": 66.7613, "sent_len_max_0": 18.9513, "sent_len_max_1": 190.92, "stdk": 0.0463, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 75100 }, { "accuracy": 56.6406, "doc_norm": 6.4353, "encoder_q-embeddings": 18755.873, "encoder_q-layer.0": 13143.1992, "encoder_q-layer.1": 13550.9375, "encoder_q-layer.10": 20610.0586, "encoder_q-layer.11": 39374.3828, "encoder_q-layer.2": 14676.1035, "encoder_q-layer.3": 14852.2881, "encoder_q-layer.4": 15189.1182, "encoder_q-layer.5": 14469.9375, "encoder_q-layer.6": 15229.3467, "encoder_q-layer.7": 15648.6787, "encoder_q-layer.8": 18823.8984, "encoder_q-layer.9": 16747.877, "epoch": 0.73, "inbatch_neg_score": 39.6256, "inbatch_pos_score": 40.1562, "learning_rate": 1.3777777777777778e-05, "loss": 2.2143, "norm_diff": 0.046, "num_tokens_overlap": 5.5794, "num_tokens_union": 55.11, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27794.4301, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3893, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7234, "sent_len_1": 66.9536, "sent_len_max_0": 18.7987, "sent_len_max_1": 190.1513, "stdk": 0.0458, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 75200 }, { "accuracy": 55.3711, "doc_norm": 6.4315, "encoder_q-embeddings": 18132.125, "encoder_q-layer.0": 12790.3613, "encoder_q-layer.1": 13343.2432, "encoder_q-layer.10": 20916.9551, "encoder_q-layer.11": 42983.3594, "encoder_q-layer.2": 14722.0596, "encoder_q-layer.3": 14803.6377, "encoder_q-layer.4": 15379.0244, "encoder_q-layer.5": 14701.3506, "encoder_q-layer.6": 15782.3613, "encoder_q-layer.7": 16622.748, "encoder_q-layer.8": 18341.6348, "encoder_q-layer.9": 17375.0996, "epoch": 0.74, "inbatch_neg_score": 39.6038, "inbatch_pos_score": 40.125, "learning_rate": 1.3722222222222222e-05, "loss": 2.1952, "norm_diff": 0.0446, "num_tokens_overlap": 5.5829, "num_tokens_union": 55.035, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28369.618, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3869, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6894, "sent_len_1": 66.8855, "sent_len_max_0": 18.785, "sent_len_max_1": 190.7262, "stdk": 0.0449, "stdq": 0.039, "stdqueue_k": 0.0, "step": 75300 }, { "accuracy": 55.8594, "doc_norm": 6.431, "encoder_q-embeddings": 18068.9395, "encoder_q-layer.0": 12536.0674, "encoder_q-layer.1": 13147.5928, "encoder_q-layer.10": 24041.2754, "encoder_q-layer.11": 43660.2969, "encoder_q-layer.2": 14296.1113, "encoder_q-layer.3": 14481.4404, "encoder_q-layer.4": 14510.6064, "encoder_q-layer.5": 14707.1787, "encoder_q-layer.6": 15451.5205, "encoder_q-layer.7": 16776.6816, "encoder_q-layer.8": 18567.832, "encoder_q-layer.9": 17926.7969, "epoch": 0.74, "inbatch_neg_score": 39.5828, "inbatch_pos_score": 40.0938, "learning_rate": 1.3666666666666666e-05, "loss": 2.1732, "norm_diff": 0.0451, "num_tokens_overlap": 5.575, "num_tokens_union": 54.9012, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28629.63, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3859, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6806, "sent_len_1": 66.6376, "sent_len_max_0": 18.7725, "sent_len_max_1": 189.2287, "stdk": 0.0457, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 75400 }, { "accuracy": 57.2266, "doc_norm": 6.435, "encoder_q-embeddings": 16805.498, "encoder_q-layer.0": 12184.7207, "encoder_q-layer.1": 12577.3359, "encoder_q-layer.10": 20337.3965, "encoder_q-layer.11": 41361.4336, "encoder_q-layer.2": 13898.9502, "encoder_q-layer.3": 13826.4404, "encoder_q-layer.4": 14742.2725, "encoder_q-layer.5": 14339.3662, "encoder_q-layer.6": 15538.1191, "encoder_q-layer.7": 16919.1992, "encoder_q-layer.8": 18797.418, "encoder_q-layer.9": 16861.2285, "epoch": 0.74, "inbatch_neg_score": 39.5814, "inbatch_pos_score": 40.125, "learning_rate": 1.3611111111111111e-05, "loss": 2.1725, "norm_diff": 0.0478, "num_tokens_overlap": 5.5872, "num_tokens_union": 54.984, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27400.3016, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3872, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7332, "sent_len_1": 66.7189, "sent_len_max_0": 18.7575, "sent_len_max_1": 189.3562, "stdk": 0.0458, "stdq": 0.039, "stdqueue_k": 0.0, "step": 75500 }, { "accuracy": 59.668, "doc_norm": 6.435, "encoder_q-embeddings": 17905.4492, "encoder_q-layer.0": 12401.0498, "encoder_q-layer.1": 12811.0957, "encoder_q-layer.10": 20868.5137, "encoder_q-layer.11": 43295.3516, "encoder_q-layer.2": 13742.4971, "encoder_q-layer.3": 14663.9492, "encoder_q-layer.4": 14708.2725, "encoder_q-layer.5": 14113.1699, "encoder_q-layer.6": 15094.7656, "encoder_q-layer.7": 15792.2617, "encoder_q-layer.8": 17618.584, "encoder_q-layer.9": 16682.5293, "epoch": 0.74, "inbatch_neg_score": 39.6198, "inbatch_pos_score": 40.125, "learning_rate": 1.3555555555555557e-05, "loss": 2.2184, "norm_diff": 0.0468, "num_tokens_overlap": 5.5838, "num_tokens_union": 54.9375, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28162.6659, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3882, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.726, "sent_len_1": 66.6509, "sent_len_max_0": 18.805, "sent_len_max_1": 189.9288, "stdk": 0.0449, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 75600 }, { "accuracy": 58.2031, "doc_norm": 6.4343, "encoder_q-embeddings": 17803.8555, "encoder_q-layer.0": 12682.874, "encoder_q-layer.1": 12751.668, "encoder_q-layer.10": 20129.2695, "encoder_q-layer.11": 36249.1172, "encoder_q-layer.2": 14041.915, "encoder_q-layer.3": 13853.9326, "encoder_q-layer.4": 14524.6162, "encoder_q-layer.5": 14155.9404, "encoder_q-layer.6": 15107.4766, "encoder_q-layer.7": 16345.0518, "encoder_q-layer.8": 17698.5508, "encoder_q-layer.9": 15891.1406, "epoch": 0.74, "inbatch_neg_score": 39.5968, "inbatch_pos_score": 40.125, "learning_rate": 1.3500000000000001e-05, "loss": 2.1716, "norm_diff": 0.0474, "num_tokens_overlap": 5.5891, "num_tokens_union": 55.0177, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26675.8707, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3869, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7279, "sent_len_1": 66.8179, "sent_len_max_0": 18.8888, "sent_len_max_1": 186.4013, "stdk": 0.0446, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 75700 }, { "accuracy": 58.3008, "doc_norm": 6.4295, "encoder_q-embeddings": 17849.2812, "encoder_q-layer.0": 12452.6201, "encoder_q-layer.1": 12966.377, "encoder_q-layer.10": 25591.8672, "encoder_q-layer.11": 45922.7617, "encoder_q-layer.2": 14163.5557, "encoder_q-layer.3": 14132.9053, "encoder_q-layer.4": 14705.9473, "encoder_q-layer.5": 14895.7549, "encoder_q-layer.6": 15456.8955, "encoder_q-layer.7": 16017.6348, "encoder_q-layer.8": 18334.4336, "encoder_q-layer.9": 17848.0273, "epoch": 0.74, "inbatch_neg_score": 39.5665, "inbatch_pos_score": 40.0938, "learning_rate": 1.3444444444444445e-05, "loss": 2.2008, "norm_diff": 0.0447, "num_tokens_overlap": 5.6018, "num_tokens_union": 55.1817, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28697.2763, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3848, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7572, "sent_len_1": 67.0408, "sent_len_max_0": 18.8875, "sent_len_max_1": 190.3063, "stdk": 0.0456, "stdq": 0.0398, "stdqueue_k": 0.0, "step": 75800 }, { "accuracy": 58.7891, "doc_norm": 6.4323, "encoder_q-embeddings": 16814.1875, "encoder_q-layer.0": 12162.6533, "encoder_q-layer.1": 12693.7549, "encoder_q-layer.10": 19443.9238, "encoder_q-layer.11": 38425.3164, "encoder_q-layer.2": 13942.2754, "encoder_q-layer.3": 13890.415, "encoder_q-layer.4": 14446.6025, "encoder_q-layer.5": 14058.6494, "encoder_q-layer.6": 14833.4561, "encoder_q-layer.7": 16157.9434, "encoder_q-layer.8": 17949.418, "encoder_q-layer.9": 16334.6514, "epoch": 0.74, "inbatch_neg_score": 39.5597, "inbatch_pos_score": 40.0938, "learning_rate": 1.338888888888889e-05, "loss": 2.1837, "norm_diff": 0.048, "num_tokens_overlap": 5.5777, "num_tokens_union": 54.9704, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26613.6538, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3843, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7417, "sent_len_1": 66.6845, "sent_len_max_0": 18.9262, "sent_len_max_1": 192.3237, "stdk": 0.046, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 75900 }, { "accuracy": 56.9336, "doc_norm": 6.4322, "encoder_q-embeddings": 17546.3359, "encoder_q-layer.0": 12493.9443, "encoder_q-layer.1": 12934.5312, "encoder_q-layer.10": 27124.166, "encoder_q-layer.11": 46078.3828, "encoder_q-layer.2": 14199.3496, "encoder_q-layer.3": 14036.6201, "encoder_q-layer.4": 14427.9609, "encoder_q-layer.5": 14162.3535, "encoder_q-layer.6": 14993.708, "encoder_q-layer.7": 17967.9355, "encoder_q-layer.8": 20372.4277, "encoder_q-layer.9": 18425.1484, "epoch": 0.74, "inbatch_neg_score": 39.5457, "inbatch_pos_score": 40.0625, "learning_rate": 1.3333333333333333e-05, "loss": 2.1978, "norm_diff": 0.049, "num_tokens_overlap": 5.5786, "num_tokens_union": 55.0629, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29691.408, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3832, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7182, "sent_len_1": 66.9203, "sent_len_max_0": 18.7987, "sent_len_max_1": 191.8225, "stdk": 0.0467, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 76000 }, { "accuracy": 58.6914, "doc_norm": 6.4329, "encoder_q-embeddings": 17257.6055, "encoder_q-layer.0": 12155.5, "encoder_q-layer.1": 12669.6641, "encoder_q-layer.10": 23753.0352, "encoder_q-layer.11": 47733.0117, "encoder_q-layer.2": 14039.9111, "encoder_q-layer.3": 14134.4551, "encoder_q-layer.4": 15060.5225, "encoder_q-layer.5": 15605.7432, "encoder_q-layer.6": 15531.8926, "encoder_q-layer.7": 17028.6797, "encoder_q-layer.8": 20543.0098, "encoder_q-layer.9": 17308.0059, "epoch": 0.74, "inbatch_neg_score": 39.5129, "inbatch_pos_score": 40.0625, "learning_rate": 1.3277777777777777e-05, "loss": 2.2345, "norm_diff": 0.0511, "num_tokens_overlap": 5.5654, "num_tokens_union": 55.0044, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29101.3191, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3818, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6905, "sent_len_1": 66.7939, "sent_len_max_0": 18.8038, "sent_len_max_1": 189.235, "stdk": 0.0463, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 76100 }, { "accuracy": 57.2266, "doc_norm": 6.4296, "encoder_q-embeddings": 17645.6035, "encoder_q-layer.0": 12859.6006, "encoder_q-layer.1": 13523.0332, "encoder_q-layer.10": 19846.9258, "encoder_q-layer.11": 37782.8828, "encoder_q-layer.2": 14712.2246, "encoder_q-layer.3": 14463.0645, "encoder_q-layer.4": 14746.6719, "encoder_q-layer.5": 14505.1934, "encoder_q-layer.6": 16390.5488, "encoder_q-layer.7": 16875.4043, "encoder_q-layer.8": 18543.4102, "encoder_q-layer.9": 16544.5273, "epoch": 0.74, "inbatch_neg_score": 39.4993, "inbatch_pos_score": 40.0312, "learning_rate": 1.3222222222222221e-05, "loss": 2.1889, "norm_diff": 0.0505, "num_tokens_overlap": 5.5798, "num_tokens_union": 54.836, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27063.1331, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.379, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7162, "sent_len_1": 66.4749, "sent_len_max_0": 18.8788, "sent_len_max_1": 187.5838, "stdk": 0.0469, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 76200 }, { "accuracy": 59.5703, "doc_norm": 6.4205, "encoder_q-embeddings": 17821.2559, "encoder_q-layer.0": 12530.2969, "encoder_q-layer.1": 13212.5889, "encoder_q-layer.10": 20167.3496, "encoder_q-layer.11": 37977.0391, "encoder_q-layer.2": 14312.2021, "encoder_q-layer.3": 14162.3926, "encoder_q-layer.4": 14369.4619, "encoder_q-layer.5": 14250.8896, "encoder_q-layer.6": 15377.6631, "encoder_q-layer.7": 15990.6914, "encoder_q-layer.8": 18173.8105, "encoder_q-layer.9": 16471.9258, "epoch": 0.74, "inbatch_neg_score": 39.4782, "inbatch_pos_score": 40.0, "learning_rate": 1.3166666666666665e-05, "loss": 2.1916, "norm_diff": 0.0443, "num_tokens_overlap": 5.5758, "num_tokens_union": 55.0261, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26729.3027, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3762, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7106, "sent_len_1": 66.7863, "sent_len_max_0": 18.9138, "sent_len_max_1": 189.4825, "stdk": 0.0453, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 76300 }, { "accuracy": 56.4453, "doc_norm": 6.4224, "encoder_q-embeddings": 17533.207, "encoder_q-layer.0": 12839.1045, "encoder_q-layer.1": 13405.1211, "encoder_q-layer.10": 21927.3789, "encoder_q-layer.11": 44978.5195, "encoder_q-layer.2": 14055.0986, "encoder_q-layer.3": 13876.1611, "encoder_q-layer.4": 14698.1973, "encoder_q-layer.5": 14773.1592, "encoder_q-layer.6": 15787.7549, "encoder_q-layer.7": 16326.1006, "encoder_q-layer.8": 17797.0527, "encoder_q-layer.9": 16748.541, "epoch": 0.75, "inbatch_neg_score": 39.4406, "inbatch_pos_score": 39.9688, "learning_rate": 1.3111111111111113e-05, "loss": 2.201, "norm_diff": 0.0477, "num_tokens_overlap": 5.5731, "num_tokens_union": 54.9828, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28465.718, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3747, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7137, "sent_len_1": 66.7585, "sent_len_max_0": 18.9, "sent_len_max_1": 191.02, "stdk": 0.0451, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 76400 }, { "accuracy": 58.2031, "doc_norm": 6.422, "encoder_q-embeddings": 17048.4512, "encoder_q-layer.0": 12088.1924, "encoder_q-layer.1": 12856.957, "encoder_q-layer.10": 20888.9551, "encoder_q-layer.11": 41823.125, "encoder_q-layer.2": 14157.6621, "encoder_q-layer.3": 14330.3916, "encoder_q-layer.4": 14282.7637, "encoder_q-layer.5": 14568.0752, "encoder_q-layer.6": 15764.293, "encoder_q-layer.7": 16695.5996, "encoder_q-layer.8": 18403.5254, "encoder_q-layer.9": 16936.2949, "epoch": 0.75, "inbatch_neg_score": 39.4447, "inbatch_pos_score": 39.9688, "learning_rate": 1.3055555555555557e-05, "loss": 2.1933, "norm_diff": 0.0479, "num_tokens_overlap": 5.5801, "num_tokens_union": 55.0837, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27962.5051, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.374, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7144, "sent_len_1": 66.9155, "sent_len_max_0": 18.83, "sent_len_max_1": 190.3525, "stdk": 0.044, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 76500 }, { "accuracy": 59.5703, "doc_norm": 6.4219, "encoder_q-embeddings": 16697.3184, "encoder_q-layer.0": 12199.6084, "encoder_q-layer.1": 12625.4092, "encoder_q-layer.10": 21276.2949, "encoder_q-layer.11": 35803.8242, "encoder_q-layer.2": 14075.3145, "encoder_q-layer.3": 14059.1172, "encoder_q-layer.4": 14554.5107, "encoder_q-layer.5": 14298.0674, "encoder_q-layer.6": 15016.1113, "encoder_q-layer.7": 16284.9082, "encoder_q-layer.8": 17714.7832, "encoder_q-layer.9": 17101.7148, "epoch": 0.75, "inbatch_neg_score": 39.4316, "inbatch_pos_score": 39.9688, "learning_rate": 1.3000000000000001e-05, "loss": 2.1976, "norm_diff": 0.0491, "num_tokens_overlap": 5.5768, "num_tokens_union": 54.9909, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26289.4747, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3729, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.748, "sent_len_1": 66.7211, "sent_len_max_0": 18.97, "sent_len_max_1": 190.36, "stdk": 0.0465, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 76600 }, { "accuracy": 58.3984, "doc_norm": 6.4168, "encoder_q-embeddings": 16541.4277, "encoder_q-layer.0": 11649.1943, "encoder_q-layer.1": 12080.9795, "encoder_q-layer.10": 19022.709, "encoder_q-layer.11": 41289.6133, "encoder_q-layer.2": 13571.8438, "encoder_q-layer.3": 13975.0869, "encoder_q-layer.4": 13812.6758, "encoder_q-layer.5": 13878.1055, "encoder_q-layer.6": 15109.957, "encoder_q-layer.7": 15853.9238, "encoder_q-layer.8": 16809.3125, "encoder_q-layer.9": 15817.4678, "epoch": 0.75, "inbatch_neg_score": 39.4075, "inbatch_pos_score": 39.9375, "learning_rate": 1.2944444444444445e-05, "loss": 2.2317, "norm_diff": 0.0459, "num_tokens_overlap": 5.5714, "num_tokens_union": 54.9745, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26684.4588, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3709, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7153, "sent_len_1": 66.7433, "sent_len_max_0": 18.855, "sent_len_max_1": 189.3725, "stdk": 0.0453, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 76700 }, { "accuracy": 57.6172, "doc_norm": 6.4209, "encoder_q-embeddings": 17606.8594, "encoder_q-layer.0": 12414.6816, "encoder_q-layer.1": 12986.4102, "encoder_q-layer.10": 19584.6172, "encoder_q-layer.11": 36857.1484, "encoder_q-layer.2": 14327.71, "encoder_q-layer.3": 14241.7539, "encoder_q-layer.4": 14783.3662, "encoder_q-layer.5": 15031.3359, "encoder_q-layer.6": 16010.5264, "encoder_q-layer.7": 16271.6084, "encoder_q-layer.8": 17147.2871, "encoder_q-layer.9": 15886.959, "epoch": 0.75, "inbatch_neg_score": 39.404, "inbatch_pos_score": 39.9375, "learning_rate": 1.2888888888888889e-05, "loss": 2.1923, "norm_diff": 0.0501, "num_tokens_overlap": 5.5721, "num_tokens_union": 54.9121, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26758.4724, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3708, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7138, "sent_len_1": 66.6654, "sent_len_max_0": 18.8125, "sent_len_max_1": 187.9412, "stdk": 0.0457, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 76800 }, { "accuracy": 55.3711, "doc_norm": 6.4188, "encoder_q-embeddings": 18664.5215, "encoder_q-layer.0": 12804.4971, "encoder_q-layer.1": 13383.2266, "encoder_q-layer.10": 23550.5781, "encoder_q-layer.11": 45387.2344, "encoder_q-layer.2": 14418.4775, "encoder_q-layer.3": 14496.5459, "encoder_q-layer.4": 14759.3506, "encoder_q-layer.5": 14766.3291, "encoder_q-layer.6": 15752.623, "encoder_q-layer.7": 17287.0176, "encoder_q-layer.8": 19113.2168, "encoder_q-layer.9": 17995.9785, "epoch": 0.75, "inbatch_neg_score": 39.4026, "inbatch_pos_score": 39.9375, "learning_rate": 1.2833333333333333e-05, "loss": 2.231, "norm_diff": 0.0462, "num_tokens_overlap": 5.5765, "num_tokens_union": 55.0257, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29370.6997, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3726, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7394, "sent_len_1": 66.7597, "sent_len_max_0": 18.81, "sent_len_max_1": 189.8388, "stdk": 0.0451, "stdq": 0.039, "stdqueue_k": 0.0, "step": 76900 }, { "accuracy": 58.8867, "doc_norm": 6.4181, "encoder_q-embeddings": 17198.4863, "encoder_q-layer.0": 12102.5107, "encoder_q-layer.1": 12710.833, "encoder_q-layer.10": 21313.623, "encoder_q-layer.11": 39393.2266, "encoder_q-layer.2": 13837.4707, "encoder_q-layer.3": 13864.8408, "encoder_q-layer.4": 14191.3994, "encoder_q-layer.5": 14226.4121, "encoder_q-layer.6": 15051.4648, "encoder_q-layer.7": 16092.5459, "encoder_q-layer.8": 18270.9512, "encoder_q-layer.9": 17585.0039, "epoch": 0.75, "inbatch_neg_score": 39.4129, "inbatch_pos_score": 39.9375, "learning_rate": 1.2777777777777777e-05, "loss": 2.2144, "norm_diff": 0.0466, "num_tokens_overlap": 5.5881, "num_tokens_union": 55.0018, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27498.1641, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3715, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7175, "sent_len_1": 66.7426, "sent_len_max_0": 18.8287, "sent_len_max_1": 189.0888, "stdk": 0.0453, "stdq": 0.039, "stdqueue_k": 0.0, "step": 77000 }, { "accuracy": 55.5664, "doc_norm": 6.4204, "encoder_q-embeddings": 18196.7109, "encoder_q-layer.0": 12794.4912, "encoder_q-layer.1": 12930.4404, "encoder_q-layer.10": 21423.3574, "encoder_q-layer.11": 39000.5234, "encoder_q-layer.2": 13622.3799, "encoder_q-layer.3": 14030.9141, "encoder_q-layer.4": 14695.7197, "encoder_q-layer.5": 14575.1621, "encoder_q-layer.6": 16006.5107, "encoder_q-layer.7": 17950.7188, "encoder_q-layer.8": 19645.0645, "encoder_q-layer.9": 18292.7793, "epoch": 0.75, "inbatch_neg_score": 39.4133, "inbatch_pos_score": 39.9375, "learning_rate": 1.2722222222222221e-05, "loss": 2.1555, "norm_diff": 0.0479, "num_tokens_overlap": 5.5872, "num_tokens_union": 55.0649, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27991.3126, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3726, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7308, "sent_len_1": 66.8741, "sent_len_max_0": 18.8212, "sent_len_max_1": 192.0263, "stdk": 0.0461, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 77100 }, { "accuracy": 58.6914, "doc_norm": 6.4168, "encoder_q-embeddings": 17398.1621, "encoder_q-layer.0": 12506.0957, "encoder_q-layer.1": 12997.8271, "encoder_q-layer.10": 19500.3574, "encoder_q-layer.11": 36182.2734, "encoder_q-layer.2": 14087.4541, "encoder_q-layer.3": 13974.2529, "encoder_q-layer.4": 14455.8398, "encoder_q-layer.5": 14536.7832, "encoder_q-layer.6": 15655.1367, "encoder_q-layer.7": 15800.585, "encoder_q-layer.8": 17209.4473, "encoder_q-layer.9": 16692.8398, "epoch": 0.75, "inbatch_neg_score": 39.4098, "inbatch_pos_score": 39.9375, "learning_rate": 1.2666666666666668e-05, "loss": 2.1936, "norm_diff": 0.0454, "num_tokens_overlap": 5.5801, "num_tokens_union": 55.0846, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26631.9857, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3714, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7143, "sent_len_1": 66.9294, "sent_len_max_0": 18.8487, "sent_len_max_1": 189.535, "stdk": 0.0456, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 77200 }, { "accuracy": 57.4219, "doc_norm": 6.4191, "encoder_q-embeddings": 17761.2188, "encoder_q-layer.0": 12448.7139, "encoder_q-layer.1": 12960.1318, "encoder_q-layer.10": 19646.8965, "encoder_q-layer.11": 37886.8867, "encoder_q-layer.2": 13810.1133, "encoder_q-layer.3": 13834.6299, "encoder_q-layer.4": 14303.0752, "encoder_q-layer.5": 14017.3047, "encoder_q-layer.6": 15039.2939, "encoder_q-layer.7": 15962.3604, "encoder_q-layer.8": 18247.875, "encoder_q-layer.9": 16256.8379, "epoch": 0.75, "inbatch_neg_score": 39.3887, "inbatch_pos_score": 39.9062, "learning_rate": 1.2611111111111113e-05, "loss": 2.2243, "norm_diff": 0.0496, "num_tokens_overlap": 5.5765, "num_tokens_union": 55.042, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26673.5471, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3694, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7136, "sent_len_1": 66.887, "sent_len_max_0": 18.7037, "sent_len_max_1": 193.8725, "stdk": 0.046, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 77300 }, { "accuracy": 56.1523, "doc_norm": 6.4141, "encoder_q-embeddings": 18193.4062, "encoder_q-layer.0": 12605.5059, "encoder_q-layer.1": 13155.4873, "encoder_q-layer.10": 20642.5547, "encoder_q-layer.11": 43837.1328, "encoder_q-layer.2": 14453.5039, "encoder_q-layer.3": 14708.3252, "encoder_q-layer.4": 14955.9189, "encoder_q-layer.5": 14826.3301, "encoder_q-layer.6": 15671.3955, "encoder_q-layer.7": 17023.4453, "encoder_q-layer.8": 18625.5879, "encoder_q-layer.9": 17602.6406, "epoch": 0.76, "inbatch_neg_score": 39.3694, "inbatch_pos_score": 39.875, "learning_rate": 1.2555555555555557e-05, "loss": 2.1768, "norm_diff": 0.0448, "num_tokens_overlap": 5.582, "num_tokens_union": 54.9991, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28886.7933, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3693, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7406, "sent_len_1": 66.734, "sent_len_max_0": 18.8938, "sent_len_max_1": 189.88, "stdk": 0.0462, "stdq": 0.0377, "stdqueue_k": 0.0, "step": 77400 }, { "accuracy": 59.2773, "doc_norm": 6.4149, "encoder_q-embeddings": 17091.4082, "encoder_q-layer.0": 12354.8076, "encoder_q-layer.1": 12572.209, "encoder_q-layer.10": 19968.0, "encoder_q-layer.11": 39090.5273, "encoder_q-layer.2": 13528.7256, "encoder_q-layer.3": 13995.2051, "encoder_q-layer.4": 14363.7139, "encoder_q-layer.5": 14171.6592, "encoder_q-layer.6": 15206.583, "encoder_q-layer.7": 16011.7197, "encoder_q-layer.8": 17925.0488, "encoder_q-layer.9": 16516.1406, "epoch": 0.76, "inbatch_neg_score": 39.3442, "inbatch_pos_score": 39.875, "learning_rate": 1.25e-05, "loss": 2.1944, "norm_diff": 0.0477, "num_tokens_overlap": 5.5816, "num_tokens_union": 55.0391, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26717.6028, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3672, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7402, "sent_len_1": 66.8583, "sent_len_max_0": 18.8487, "sent_len_max_1": 191.8212, "stdk": 0.0459, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 77500 }, { "accuracy": 56.543, "doc_norm": 6.414, "encoder_q-embeddings": 17863.1816, "encoder_q-layer.0": 12281.4355, "encoder_q-layer.1": 12842.9766, "encoder_q-layer.10": 21170.2402, "encoder_q-layer.11": 39225.4219, "encoder_q-layer.2": 13986.9854, "encoder_q-layer.3": 14418.21, "encoder_q-layer.4": 14804.8301, "encoder_q-layer.5": 14419.1074, "encoder_q-layer.6": 15066.5664, "encoder_q-layer.7": 16376.8867, "encoder_q-layer.8": 18107.9668, "encoder_q-layer.9": 16709.416, "epoch": 0.76, "inbatch_neg_score": 39.3521, "inbatch_pos_score": 39.875, "learning_rate": 1.2444444444444445e-05, "loss": 2.1822, "norm_diff": 0.0481, "num_tokens_overlap": 5.5806, "num_tokens_union": 54.9087, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27565.7811, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3659, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7212, "sent_len_1": 66.5823, "sent_len_max_0": 18.8675, "sent_len_max_1": 188.0062, "stdk": 0.0461, "stdq": 0.0378, "stdqueue_k": 0.0, "step": 77600 }, { "accuracy": 56.543, "doc_norm": 6.4102, "encoder_q-embeddings": 17257.6406, "encoder_q-layer.0": 12195.5693, "encoder_q-layer.1": 13136.54, "encoder_q-layer.10": 20934.9453, "encoder_q-layer.11": 47465.1367, "encoder_q-layer.2": 14204.2939, "encoder_q-layer.3": 14471.9248, "encoder_q-layer.4": 14119.5479, "encoder_q-layer.5": 13937.5293, "encoder_q-layer.6": 15094.1289, "encoder_q-layer.7": 15765.1064, "encoder_q-layer.8": 18492.2109, "encoder_q-layer.9": 16720.9043, "epoch": 0.76, "inbatch_neg_score": 39.3359, "inbatch_pos_score": 39.8438, "learning_rate": 1.238888888888889e-05, "loss": 2.1871, "norm_diff": 0.0448, "num_tokens_overlap": 5.5629, "num_tokens_union": 54.9546, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28745.7974, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3655, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6939, "sent_len_1": 66.7567, "sent_len_max_0": 18.8888, "sent_len_max_1": 190.8363, "stdk": 0.0459, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 77700 }, { "accuracy": 57.1289, "doc_norm": 6.411, "encoder_q-embeddings": 17846.4492, "encoder_q-layer.0": 12953.1572, "encoder_q-layer.1": 13504.7666, "encoder_q-layer.10": 19966.9668, "encoder_q-layer.11": 36989.5352, "encoder_q-layer.2": 14391.373, "encoder_q-layer.3": 14772.6748, "encoder_q-layer.4": 14536.6797, "encoder_q-layer.5": 14224.4932, "encoder_q-layer.6": 15281.6016, "encoder_q-layer.7": 17376.1367, "encoder_q-layer.8": 18974.9023, "encoder_q-layer.9": 16931.1934, "epoch": 0.76, "inbatch_neg_score": 39.2951, "inbatch_pos_score": 39.8125, "learning_rate": 1.2333333333333334e-05, "loss": 2.2059, "norm_diff": 0.0472, "num_tokens_overlap": 5.5706, "num_tokens_union": 54.9627, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27210.3074, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3637, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7131, "sent_len_1": 66.6861, "sent_len_max_0": 18.7213, "sent_len_max_1": 189.43, "stdk": 0.0464, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 77800 }, { "accuracy": 56.25, "doc_norm": 6.4109, "encoder_q-embeddings": 17987.2793, "encoder_q-layer.0": 12406.8164, "encoder_q-layer.1": 12903.8926, "encoder_q-layer.10": 30774.1797, "encoder_q-layer.11": 52466.9648, "encoder_q-layer.2": 14212.8291, "encoder_q-layer.3": 14543.3594, "encoder_q-layer.4": 15033.5469, "encoder_q-layer.5": 15361.46, "encoder_q-layer.6": 16324.5176, "encoder_q-layer.7": 18338.373, "encoder_q-layer.8": 22098.4414, "encoder_q-layer.9": 20797.2949, "epoch": 0.76, "inbatch_neg_score": 39.2946, "inbatch_pos_score": 39.8125, "learning_rate": 1.2277777777777778e-05, "loss": 2.2206, "norm_diff": 0.0472, "num_tokens_overlap": 5.5656, "num_tokens_union": 54.8801, "postclip_grad_norm": 1.0, "preclip_grad_norm": 31874.0715, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3637, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6929, "sent_len_1": 66.5442, "sent_len_max_0": 18.9075, "sent_len_max_1": 190.6387, "stdk": 0.0464, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 77900 }, { "accuracy": 59.7656, "doc_norm": 6.4143, "encoder_q-embeddings": 17668.8301, "encoder_q-layer.0": 12380.8291, "encoder_q-layer.1": 12940.6445, "encoder_q-layer.10": 19329.3398, "encoder_q-layer.11": 37917.8086, "encoder_q-layer.2": 13741.1777, "encoder_q-layer.3": 13845.3936, "encoder_q-layer.4": 14206.8584, "encoder_q-layer.5": 14008.5771, "encoder_q-layer.6": 15402.8545, "encoder_q-layer.7": 16469.543, "encoder_q-layer.8": 18132.9785, "encoder_q-layer.9": 16111.3809, "epoch": 0.76, "inbatch_neg_score": 39.2804, "inbatch_pos_score": 39.8438, "learning_rate": 1.2222222222222222e-05, "loss": 2.1646, "norm_diff": 0.0503, "num_tokens_overlap": 5.5776, "num_tokens_union": 55.0114, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26774.9968, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.364, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7081, "sent_len_1": 66.7855, "sent_len_max_0": 18.915, "sent_len_max_1": 188.695, "stdk": 0.0464, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 78000 }, { "accuracy": 57.0312, "doc_norm": 6.4057, "encoder_q-embeddings": 17163.4121, "encoder_q-layer.0": 12157.7461, "encoder_q-layer.1": 12533.0059, "encoder_q-layer.10": 22528.9492, "encoder_q-layer.11": 45848.1094, "encoder_q-layer.2": 13674.9033, "encoder_q-layer.3": 14220.8564, "encoder_q-layer.4": 14202.8184, "encoder_q-layer.5": 13873.9336, "encoder_q-layer.6": 14779.5967, "encoder_q-layer.7": 15646.7715, "encoder_q-layer.8": 17685.4941, "encoder_q-layer.9": 16762.1953, "epoch": 0.76, "inbatch_neg_score": 39.2637, "inbatch_pos_score": 39.7812, "learning_rate": 1.2166666666666668e-05, "loss": 2.1981, "norm_diff": 0.0478, "num_tokens_overlap": 5.5716, "num_tokens_union": 54.9501, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28379.9188, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3579, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7068, "sent_len_1": 66.6923, "sent_len_max_0": 18.7525, "sent_len_max_1": 190.145, "stdk": 0.0451, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 78100 }, { "accuracy": 55.8594, "doc_norm": 6.4056, "encoder_q-embeddings": 17285.2207, "encoder_q-layer.0": 12635.8496, "encoder_q-layer.1": 12867.7266, "encoder_q-layer.10": 19993.2012, "encoder_q-layer.11": 38622.8516, "encoder_q-layer.2": 14175.8438, "encoder_q-layer.3": 14432.5176, "encoder_q-layer.4": 14774.0977, "encoder_q-layer.5": 14644.915, "encoder_q-layer.6": 15253.3955, "encoder_q-layer.7": 15838.4287, "encoder_q-layer.8": 18393.4609, "encoder_q-layer.9": 16292.6436, "epoch": 0.76, "inbatch_neg_score": 39.2665, "inbatch_pos_score": 39.7812, "learning_rate": 1.2111111111111112e-05, "loss": 2.1855, "norm_diff": 0.046, "num_tokens_overlap": 5.5755, "num_tokens_union": 54.9451, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27158.1606, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3596, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7252, "sent_len_1": 66.7084, "sent_len_max_0": 18.8687, "sent_len_max_1": 190.7725, "stdk": 0.0451, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 78200 }, { "accuracy": 55.5664, "doc_norm": 6.4098, "encoder_q-embeddings": 17445.3203, "encoder_q-layer.0": 12268.1279, "encoder_q-layer.1": 13034.6299, "encoder_q-layer.10": 20061.8652, "encoder_q-layer.11": 39720.832, "encoder_q-layer.2": 13803.9219, "encoder_q-layer.3": 14048.7705, "encoder_q-layer.4": 14181.6924, "encoder_q-layer.5": 13902.0273, "encoder_q-layer.6": 15261.6006, "encoder_q-layer.7": 16298.4639, "encoder_q-layer.8": 18133.2578, "encoder_q-layer.9": 16404.4238, "epoch": 0.76, "inbatch_neg_score": 39.295, "inbatch_pos_score": 39.8125, "learning_rate": 1.2055555555555556e-05, "loss": 2.2108, "norm_diff": 0.0473, "num_tokens_overlap": 5.5804, "num_tokens_union": 55.007, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27196.125, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3625, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7517, "sent_len_1": 66.7442, "sent_len_max_0": 18.8525, "sent_len_max_1": 188.1425, "stdk": 0.0456, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 78300 }, { "accuracy": 57.5195, "doc_norm": 6.407, "encoder_q-embeddings": 16971.6777, "encoder_q-layer.0": 11859.3574, "encoder_q-layer.1": 12605.1543, "encoder_q-layer.10": 19580.2188, "encoder_q-layer.11": 37212.6914, "encoder_q-layer.2": 13392.6113, "encoder_q-layer.3": 13176.1426, "encoder_q-layer.4": 13469.1357, "encoder_q-layer.5": 13419.6016, "encoder_q-layer.6": 14624.4609, "encoder_q-layer.7": 15734.4385, "encoder_q-layer.8": 17024.8613, "encoder_q-layer.9": 15446.9912, "epoch": 0.77, "inbatch_neg_score": 39.2662, "inbatch_pos_score": 39.7812, "learning_rate": 1.2e-05, "loss": 2.1902, "norm_diff": 0.0476, "num_tokens_overlap": 5.5717, "num_tokens_union": 54.8878, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26200.0036, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3595, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7124, "sent_len_1": 66.5959, "sent_len_max_0": 18.76, "sent_len_max_1": 190.1262, "stdk": 0.0449, "stdq": 0.039, "stdqueue_k": 0.0, "step": 78400 }, { "accuracy": 58.0078, "doc_norm": 6.4083, "encoder_q-embeddings": 35281.6875, "encoder_q-layer.0": 24639.0176, "encoder_q-layer.1": 26072.7578, "encoder_q-layer.10": 43003.8477, "encoder_q-layer.11": 84979.3906, "encoder_q-layer.2": 29439.3496, "encoder_q-layer.3": 28766.9258, "encoder_q-layer.4": 28709.5117, "encoder_q-layer.5": 28643.9434, "encoder_q-layer.6": 30560.7012, "encoder_q-layer.7": 32782.3945, "encoder_q-layer.8": 36250.3945, "encoder_q-layer.9": 34906.9492, "epoch": 0.77, "inbatch_neg_score": 39.2453, "inbatch_pos_score": 39.7812, "learning_rate": 1.1944444444444446e-05, "loss": 2.1806, "norm_diff": 0.05, "num_tokens_overlap": 5.5785, "num_tokens_union": 55.1181, "postclip_grad_norm": 1.0, "preclip_grad_norm": 55767.7183, "preclip_grad_norm_avg": 0.0005, "query_norm": 6.3583, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7041, "sent_len_1": 67.0001, "sent_len_max_0": 18.7875, "sent_len_max_1": 190.8575, "stdk": 0.0466, "stdq": 0.038, "stdqueue_k": 0.0, "step": 78500 }, { "accuracy": 58.2031, "doc_norm": 6.4047, "encoder_q-embeddings": 18147.0312, "encoder_q-layer.0": 12956.3057, "encoder_q-layer.1": 13148.0293, "encoder_q-layer.10": 21150.3926, "encoder_q-layer.11": 41415.1797, "encoder_q-layer.2": 13988.5762, "encoder_q-layer.3": 14381.6992, "encoder_q-layer.4": 14701.0234, "encoder_q-layer.5": 14127.0137, "encoder_q-layer.6": 15046.4902, "encoder_q-layer.7": 16520.9453, "encoder_q-layer.8": 19356.5137, "encoder_q-layer.9": 17138.3027, "epoch": 0.77, "inbatch_neg_score": 39.236, "inbatch_pos_score": 39.75, "learning_rate": 1.188888888888889e-05, "loss": 2.209, "norm_diff": 0.0471, "num_tokens_overlap": 5.5758, "num_tokens_union": 55.0839, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28033.7464, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3576, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7409, "sent_len_1": 66.8766, "sent_len_max_0": 18.7688, "sent_len_max_1": 191.0488, "stdk": 0.0444, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 78600 }, { "accuracy": 58.2031, "doc_norm": 6.4074, "encoder_q-embeddings": 16746.9004, "encoder_q-layer.0": 12222.3516, "encoder_q-layer.1": 12917.9004, "encoder_q-layer.10": 20609.4473, "encoder_q-layer.11": 41020.918, "encoder_q-layer.2": 14197.958, "encoder_q-layer.3": 13912.25, "encoder_q-layer.4": 14887.4043, "encoder_q-layer.5": 14294.4727, "encoder_q-layer.6": 15638.8809, "encoder_q-layer.7": 16398.9141, "encoder_q-layer.8": 18154.8066, "encoder_q-layer.9": 17325.5371, "epoch": 0.77, "inbatch_neg_score": 39.2213, "inbatch_pos_score": 39.75, "learning_rate": 1.1833333333333334e-05, "loss": 2.1294, "norm_diff": 0.0494, "num_tokens_overlap": 5.584, "num_tokens_union": 55.0392, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27524.1702, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.358, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7342, "sent_len_1": 66.8151, "sent_len_max_0": 18.77, "sent_len_max_1": 192.2562, "stdk": 0.0455, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 78700 }, { "accuracy": 57.7148, "doc_norm": 6.4058, "encoder_q-embeddings": 17392.2266, "encoder_q-layer.0": 12294.9375, "encoder_q-layer.1": 12963.9014, "encoder_q-layer.10": 22806.3711, "encoder_q-layer.11": 40078.5039, "encoder_q-layer.2": 14161.7881, "encoder_q-layer.3": 13957.5293, "encoder_q-layer.4": 14542.4473, "encoder_q-layer.5": 14346.3398, "encoder_q-layer.6": 16041.5332, "encoder_q-layer.7": 17192.6113, "encoder_q-layer.8": 20119.877, "encoder_q-layer.9": 17936.6855, "epoch": 0.77, "inbatch_neg_score": 39.1944, "inbatch_pos_score": 39.7188, "learning_rate": 1.1777777777777778e-05, "loss": 2.2133, "norm_diff": 0.0487, "num_tokens_overlap": 5.5661, "num_tokens_union": 54.9984, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28061.0237, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3571, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7184, "sent_len_1": 66.7234, "sent_len_max_0": 18.77, "sent_len_max_1": 187.9875, "stdk": 0.0466, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 78800 }, { "accuracy": 57.3242, "doc_norm": 6.4013, "encoder_q-embeddings": 17527.8828, "encoder_q-layer.0": 12063.2197, "encoder_q-layer.1": 12678.9561, "encoder_q-layer.10": 19793.3301, "encoder_q-layer.11": 38772.3203, "encoder_q-layer.2": 13804.5146, "encoder_q-layer.3": 13885.3906, "encoder_q-layer.4": 14383.1719, "encoder_q-layer.5": 14415.1953, "encoder_q-layer.6": 15224.4355, "encoder_q-layer.7": 16415.6992, "encoder_q-layer.8": 17649.6348, "encoder_q-layer.9": 16454.3633, "epoch": 0.77, "inbatch_neg_score": 39.1984, "inbatch_pos_score": 39.7188, "learning_rate": 1.1722222222222224e-05, "loss": 2.1806, "norm_diff": 0.0461, "num_tokens_overlap": 5.5808, "num_tokens_union": 54.9837, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27088.0797, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3552, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7476, "sent_len_1": 66.7228, "sent_len_max_0": 18.8362, "sent_len_max_1": 188.4888, "stdk": 0.0457, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 78900 }, { "accuracy": 58.1055, "doc_norm": 6.4055, "encoder_q-embeddings": 18118.8496, "encoder_q-layer.0": 13194.3828, "encoder_q-layer.1": 13624.8027, "encoder_q-layer.10": 23433.666, "encoder_q-layer.11": 44556.832, "encoder_q-layer.2": 14819.5586, "encoder_q-layer.3": 15212.8633, "encoder_q-layer.4": 15123.3311, "encoder_q-layer.5": 14023.0459, "encoder_q-layer.6": 15285.9443, "encoder_q-layer.7": 16691.8711, "encoder_q-layer.8": 18498.5898, "encoder_q-layer.9": 17515.6055, "epoch": 0.77, "inbatch_neg_score": 39.1939, "inbatch_pos_score": 39.7188, "learning_rate": 1.1666666666666668e-05, "loss": 2.1737, "norm_diff": 0.0519, "num_tokens_overlap": 5.5879, "num_tokens_union": 55.0633, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28780.1094, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3536, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7199, "sent_len_1": 66.9532, "sent_len_max_0": 18.935, "sent_len_max_1": 191.1775, "stdk": 0.0459, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 79000 }, { "accuracy": 57.0312, "doc_norm": 6.4044, "encoder_q-embeddings": 18010.668, "encoder_q-layer.0": 12966.3877, "encoder_q-layer.1": 13459.1035, "encoder_q-layer.10": 21084.25, "encoder_q-layer.11": 39654.4414, "encoder_q-layer.2": 14386.3037, "encoder_q-layer.3": 13884.4805, "encoder_q-layer.4": 14763.9912, "encoder_q-layer.5": 14737.709, "encoder_q-layer.6": 14951.7266, "encoder_q-layer.7": 16320.7773, "encoder_q-layer.8": 17660.2285, "encoder_q-layer.9": 16908.834, "epoch": 0.77, "inbatch_neg_score": 39.1899, "inbatch_pos_score": 39.7188, "learning_rate": 1.1611111111111112e-05, "loss": 2.2025, "norm_diff": 0.0489, "num_tokens_overlap": 5.5762, "num_tokens_union": 54.9755, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27457.9505, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3555, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7172, "sent_len_1": 66.7189, "sent_len_max_0": 18.755, "sent_len_max_1": 189.305, "stdk": 0.0468, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 79100 }, { "accuracy": 58.2031, "doc_norm": 6.4033, "encoder_q-embeddings": 17436.1758, "encoder_q-layer.0": 12327.6172, "encoder_q-layer.1": 12778.6572, "encoder_q-layer.10": 20628.2891, "encoder_q-layer.11": 37262.793, "encoder_q-layer.2": 13826.0977, "encoder_q-layer.3": 13721.7656, "encoder_q-layer.4": 14104.832, "encoder_q-layer.5": 13836.0391, "encoder_q-layer.6": 14948.877, "encoder_q-layer.7": 16405.7812, "encoder_q-layer.8": 17708.4492, "encoder_q-layer.9": 16297.2529, "epoch": 0.77, "inbatch_neg_score": 39.1781, "inbatch_pos_score": 39.7188, "learning_rate": 1.1555555555555556e-05, "loss": 2.1647, "norm_diff": 0.05, "num_tokens_overlap": 5.5759, "num_tokens_union": 54.9789, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26699.7718, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3533, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7315, "sent_len_1": 66.6967, "sent_len_max_0": 18.89, "sent_len_max_1": 190.3125, "stdk": 0.0459, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 79200 }, { "accuracy": 56.7383, "doc_norm": 6.4044, "encoder_q-embeddings": 19020.0723, "encoder_q-layer.0": 13098.1016, "encoder_q-layer.1": 13471.5088, "encoder_q-layer.10": 26249.127, "encoder_q-layer.11": 45961.7227, "encoder_q-layer.2": 14855.9316, "encoder_q-layer.3": 14978.3047, "encoder_q-layer.4": 15480.1602, "encoder_q-layer.5": 15507.7861, "encoder_q-layer.6": 16894.2129, "encoder_q-layer.7": 17803.7285, "encoder_q-layer.8": 21561.5742, "encoder_q-layer.9": 19539.6641, "epoch": 0.77, "inbatch_neg_score": 39.1646, "inbatch_pos_score": 39.6875, "learning_rate": 1.1500000000000002e-05, "loss": 2.1792, "norm_diff": 0.0505, "num_tokens_overlap": 5.5701, "num_tokens_union": 54.8849, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30827.1056, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3539, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7083, "sent_len_1": 66.545, "sent_len_max_0": 18.8612, "sent_len_max_1": 189.0925, "stdk": 0.0464, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 79300 }, { "accuracy": 54.7852, "doc_norm": 6.3988, "encoder_q-embeddings": 17780.6641, "encoder_q-layer.0": 12485.5068, "encoder_q-layer.1": 13062.5439, "encoder_q-layer.10": 19860.3379, "encoder_q-layer.11": 38968.0078, "encoder_q-layer.2": 14450.0381, "encoder_q-layer.3": 14672.5645, "encoder_q-layer.4": 15416.8018, "encoder_q-layer.5": 14432.041, "encoder_q-layer.6": 15241.0781, "encoder_q-layer.7": 15911.3086, "encoder_q-layer.8": 17800.3711, "encoder_q-layer.9": 16266.2207, "epoch": 0.78, "inbatch_neg_score": 39.1817, "inbatch_pos_score": 39.6875, "learning_rate": 1.1444444444444446e-05, "loss": 2.176, "norm_diff": 0.0473, "num_tokens_overlap": 5.5808, "num_tokens_union": 54.9368, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27260.275, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3516, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7225, "sent_len_1": 66.63, "sent_len_max_0": 18.7375, "sent_len_max_1": 189.0025, "stdk": 0.0448, "stdq": 0.0378, "stdqueue_k": 0.0, "step": 79400 }, { "accuracy": 58.6914, "doc_norm": 6.4046, "encoder_q-embeddings": 18920.0352, "encoder_q-layer.0": 13085.0654, "encoder_q-layer.1": 13780.7852, "encoder_q-layer.10": 25284.6094, "encoder_q-layer.11": 44778.7891, "encoder_q-layer.2": 14537.0938, "encoder_q-layer.3": 14486.6621, "encoder_q-layer.4": 15048.8525, "encoder_q-layer.5": 15093.6484, "encoder_q-layer.6": 16524.5762, "encoder_q-layer.7": 16718.6582, "encoder_q-layer.8": 19765.3086, "encoder_q-layer.9": 17812.6543, "epoch": 0.78, "inbatch_neg_score": 39.1903, "inbatch_pos_score": 39.75, "learning_rate": 1.138888888888889e-05, "loss": 2.1876, "norm_diff": 0.0494, "num_tokens_overlap": 5.5801, "num_tokens_union": 54.9831, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29525.4452, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3553, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7212, "sent_len_1": 66.7468, "sent_len_max_0": 18.8225, "sent_len_max_1": 188.7125, "stdk": 0.0458, "stdq": 0.0379, "stdqueue_k": 0.0, "step": 79500 }, { "accuracy": 57.8125, "doc_norm": 6.3988, "encoder_q-embeddings": 17751.3945, "encoder_q-layer.0": 12409.4502, "encoder_q-layer.1": 12907.4141, "encoder_q-layer.10": 19511.5703, "encoder_q-layer.11": 37659.6016, "encoder_q-layer.2": 13958.8379, "encoder_q-layer.3": 13966.3516, "encoder_q-layer.4": 14365.3184, "encoder_q-layer.5": 14455.8857, "encoder_q-layer.6": 14781.0449, "encoder_q-layer.7": 15519.0244, "encoder_q-layer.8": 17042.2305, "encoder_q-layer.9": 16125.915, "epoch": 0.78, "inbatch_neg_score": 39.1655, "inbatch_pos_score": 39.6875, "learning_rate": 1.1333333333333334e-05, "loss": 2.1917, "norm_diff": 0.0449, "num_tokens_overlap": 5.5725, "num_tokens_union": 54.9624, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26405.9125, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3538, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.722, "sent_len_1": 66.7439, "sent_len_max_0": 18.9337, "sent_len_max_1": 189.7287, "stdk": 0.0462, "stdq": 0.039, "stdqueue_k": 0.0, "step": 79600 }, { "accuracy": 57.4219, "doc_norm": 6.4009, "encoder_q-embeddings": 17033.5215, "encoder_q-layer.0": 12014.374, "encoder_q-layer.1": 12515.2871, "encoder_q-layer.10": 19742.3066, "encoder_q-layer.11": 41093.1289, "encoder_q-layer.2": 13930.6367, "encoder_q-layer.3": 14089.3242, "encoder_q-layer.4": 14591.9717, "encoder_q-layer.5": 14441.1729, "encoder_q-layer.6": 16307.8184, "encoder_q-layer.7": 16842.7578, "encoder_q-layer.8": 19952.7207, "encoder_q-layer.9": 16864.8613, "epoch": 0.78, "inbatch_neg_score": 39.1627, "inbatch_pos_score": 39.6875, "learning_rate": 1.127777777777778e-05, "loss": 2.1954, "norm_diff": 0.049, "num_tokens_overlap": 5.5841, "num_tokens_union": 55.0188, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27628.6436, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3519, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7147, "sent_len_1": 66.8963, "sent_len_max_0": 18.8637, "sent_len_max_1": 191.32, "stdk": 0.045, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 79700 }, { "accuracy": 56.3477, "doc_norm": 6.4013, "encoder_q-embeddings": 16809.3242, "encoder_q-layer.0": 12019.2979, "encoder_q-layer.1": 12645.2109, "encoder_q-layer.10": 20141.5781, "encoder_q-layer.11": 41347.1914, "encoder_q-layer.2": 14052.6533, "encoder_q-layer.3": 13941.8633, "encoder_q-layer.4": 14213.7168, "encoder_q-layer.5": 14028.0557, "encoder_q-layer.6": 16242.3398, "encoder_q-layer.7": 16688.1074, "encoder_q-layer.8": 18618.5879, "encoder_q-layer.9": 16432.5957, "epoch": 0.78, "inbatch_neg_score": 39.1592, "inbatch_pos_score": 39.6875, "learning_rate": 1.1222222222222224e-05, "loss": 2.2331, "norm_diff": 0.0511, "num_tokens_overlap": 5.5843, "num_tokens_union": 54.9876, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27354.3757, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3502, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7291, "sent_len_1": 66.7811, "sent_len_max_0": 18.9675, "sent_len_max_1": 190.1362, "stdk": 0.045, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 79800 }, { "accuracy": 60.4492, "doc_norm": 6.3978, "encoder_q-embeddings": 17769.9199, "encoder_q-layer.0": 12642.252, "encoder_q-layer.1": 12845.875, "encoder_q-layer.10": 20640.5312, "encoder_q-layer.11": 42399.25, "encoder_q-layer.2": 13872.4287, "encoder_q-layer.3": 13592.5879, "encoder_q-layer.4": 14203.0322, "encoder_q-layer.5": 13640.8223, "encoder_q-layer.6": 14510.8682, "encoder_q-layer.7": 16146.8105, "encoder_q-layer.8": 17218.6895, "encoder_q-layer.9": 16515.4648, "epoch": 0.78, "inbatch_neg_score": 39.1492, "inbatch_pos_score": 39.6875, "learning_rate": 1.1166666666666668e-05, "loss": 2.1483, "norm_diff": 0.0464, "num_tokens_overlap": 5.5811, "num_tokens_union": 55.0058, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27308.3038, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3514, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7302, "sent_len_1": 66.7653, "sent_len_max_0": 18.8987, "sent_len_max_1": 190.355, "stdk": 0.0467, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 79900 }, { "accuracy": 58.3008, "doc_norm": 6.396, "encoder_q-embeddings": 16629.7266, "encoder_q-layer.0": 11971.2227, "encoder_q-layer.1": 12396.3418, "encoder_q-layer.10": 23713.5273, "encoder_q-layer.11": 38630.2109, "encoder_q-layer.2": 13860.3887, "encoder_q-layer.3": 13671.3721, "encoder_q-layer.4": 14242.458, "encoder_q-layer.5": 14119.2178, "encoder_q-layer.6": 15192.2295, "encoder_q-layer.7": 16435.4551, "encoder_q-layer.8": 18785.8379, "encoder_q-layer.9": 17501.1211, "epoch": 0.78, "inbatch_neg_score": 39.134, "inbatch_pos_score": 39.6562, "learning_rate": 1.1111111111111112e-05, "loss": 2.1807, "norm_diff": 0.0471, "num_tokens_overlap": 5.5771, "num_tokens_union": 54.9355, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27299.9775, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3489, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7257, "sent_len_1": 66.6687, "sent_len_max_0": 18.8275, "sent_len_max_1": 192.6875, "stdk": 0.0458, "stdq": 0.039, "stdqueue_k": 0.0, "step": 80000 }, { "dev_runtime": 26.5952, "dev_samples_per_second": 2.406, "dev_steps_per_second": 0.038, "epoch": 0.78, "step": 80000, "test_accuracy": 8.65936279296875, "test_doc_norm": 6.387241840362549, "test_inbatch_neg_score": 39.689430236816406, "test_inbatch_pos_score": 40.472900390625, "test_loss": 4.133596897125244, "test_norm_diff": 0.0019421204924583435, "test_query_norm": 6.3870849609375, "test_queue_k_norm": 0.0, "test_stdk": 0.03742000088095665, "test_stdq": 0.03737448900938034, "test_stdqueue_k": 0.0 }, { "dev_runtime": 26.5952, "dev_samples_per_second": 2.406, "dev_steps_per_second": 0.038, "epoch": 0.78, "eval_beir-arguana_ndcg@10": 0.35881, "eval_beir-arguana_recall@10": 0.63727, "eval_beir-arguana_recall@100": 0.95946, "eval_beir-arguana_recall@20": 0.8165, "eval_beir-avg_ndcg@10": 0.38213791666666663, "eval_beir-avg_recall@10": 0.4529377500000001, "eval_beir-avg_recall@100": 0.6434675, "eval_beir-avg_recall@20": 0.5240486666666666, "eval_beir-cqadupstack_ndcg@10": 0.29531916666666663, "eval_beir-cqadupstack_recall@10": 0.39480750000000003, "eval_beir-cqadupstack_recall@100": 0.6292449999999999, "eval_beir-cqadupstack_recall@20": 0.46509666666666666, "eval_beir-fiqa_ndcg@10": 0.26752, "eval_beir-fiqa_recall@10": 0.33705, "eval_beir-fiqa_recall@100": 0.60946, "eval_beir-fiqa_recall@20": 0.4144, "eval_beir-nfcorpus_ndcg@10": 0.33574, "eval_beir-nfcorpus_recall@10": 0.16612, "eval_beir-nfcorpus_recall@100": 0.31861, "eval_beir-nfcorpus_recall@20": 0.20272, "eval_beir-nq_ndcg@10": 0.26854, "eval_beir-nq_recall@10": 0.44542, "eval_beir-nq_recall@100": 0.78718, "eval_beir-nq_recall@20": 0.57307, "eval_beir-quora_ndcg@10": 0.78659, "eval_beir-quora_recall@10": 0.88854, "eval_beir-quora_recall@100": 0.97659, "eval_beir-quora_recall@20": 0.92787, "eval_beir-scidocs_ndcg@10": 0.15829, "eval_beir-scidocs_recall@10": 0.16708, "eval_beir-scidocs_recall@100": 0.3856, "eval_beir-scidocs_recall@20": 0.22505, "eval_beir-scifact_ndcg@10": 0.60592, "eval_beir-scifact_recall@10": 0.75678, "eval_beir-scifact_recall@100": 0.911, "eval_beir-scifact_recall@20": 0.84078, "eval_beir-trec-covid_ndcg@10": 0.56273, "eval_beir-trec-covid_recall@10": 0.608, "eval_beir-trec-covid_recall@100": 0.4292, "eval_beir-trec-covid_recall@20": 0.559, "eval_beir-webis-touche2020_ndcg@10": 0.18192, "eval_beir-webis-touche2020_recall@10": 0.12831, "eval_beir-webis-touche2020_recall@100": 0.42833, "eval_beir-webis-touche2020_recall@20": 0.216, "eval_senteval-avg_sts": 0.7416287823391498, "eval_senteval-sickr_spearman": 0.7334141214930575, "eval_senteval-stsb_spearman": 0.749843443185242, "step": 80000, "test_accuracy": 8.65936279296875, "test_doc_norm": 6.387241840362549, "test_inbatch_neg_score": 39.689430236816406, "test_inbatch_pos_score": 40.472900390625, "test_loss": 4.133596897125244, "test_norm_diff": 0.0019421204924583435, "test_query_norm": 6.3870849609375, "test_queue_k_norm": 0.0, "test_stdk": 0.03742000088095665, "test_stdq": 0.03737448900938034, "test_stdqueue_k": 0.0 }, { "accuracy": 57.4219, "doc_norm": 6.3936, "encoder_q-embeddings": 17044.8945, "encoder_q-layer.0": 11984.498, "encoder_q-layer.1": 12753.0234, "encoder_q-layer.10": 20691.3887, "encoder_q-layer.11": 42715.6211, "encoder_q-layer.2": 13668.4053, "encoder_q-layer.3": 13865.4648, "encoder_q-layer.4": 14717.9512, "encoder_q-layer.5": 14718.7949, "encoder_q-layer.6": 15834.1797, "encoder_q-layer.7": 16726.4766, "encoder_q-layer.8": 18624.8047, "encoder_q-layer.9": 16524.9668, "epoch": 0.78, "inbatch_neg_score": 39.1224, "inbatch_pos_score": 39.625, "learning_rate": 1.1055555555555556e-05, "loss": 2.2095, "norm_diff": 0.0459, "num_tokens_overlap": 5.5769, "num_tokens_union": 54.9707, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27553.6299, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3478, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7325, "sent_len_1": 66.6976, "sent_len_max_0": 18.7862, "sent_len_max_1": 188.9925, "stdk": 0.0462, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 80100 }, { "accuracy": 58.9844, "doc_norm": 6.3921, "encoder_q-embeddings": 16771.3652, "encoder_q-layer.0": 12249.7959, "encoder_q-layer.1": 12853.0488, "encoder_q-layer.10": 23829.166, "encoder_q-layer.11": 39540.3711, "encoder_q-layer.2": 13984.3535, "encoder_q-layer.3": 13958.9551, "encoder_q-layer.4": 13861.2607, "encoder_q-layer.5": 14039.4365, "encoder_q-layer.6": 14180.6035, "encoder_q-layer.7": 15763.0049, "encoder_q-layer.8": 17003.7734, "encoder_q-layer.9": 16391.502, "epoch": 0.78, "inbatch_neg_score": 39.1057, "inbatch_pos_score": 39.625, "learning_rate": 1.1000000000000001e-05, "loss": 2.1732, "norm_diff": 0.0457, "num_tokens_overlap": 5.5815, "num_tokens_union": 54.955, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27255.4921, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3464, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7271, "sent_len_1": 66.7376, "sent_len_max_0": 18.8825, "sent_len_max_1": 192.1262, "stdk": 0.0448, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 80200 }, { "accuracy": 57.6172, "doc_norm": 6.3935, "encoder_q-embeddings": 17258.8438, "encoder_q-layer.0": 12469.9854, "encoder_q-layer.1": 13274.4844, "encoder_q-layer.10": 20869.3633, "encoder_q-layer.11": 40173.3906, "encoder_q-layer.2": 14062.6816, "encoder_q-layer.3": 14460.9482, "encoder_q-layer.4": 15052.8887, "encoder_q-layer.5": 14994.1562, "encoder_q-layer.6": 15915.7314, "encoder_q-layer.7": 16759.7715, "encoder_q-layer.8": 19490.1738, "encoder_q-layer.9": 17573.1855, "epoch": 0.78, "inbatch_neg_score": 39.1016, "inbatch_pos_score": 39.625, "learning_rate": 1.0944444444444445e-05, "loss": 2.1847, "norm_diff": 0.0455, "num_tokens_overlap": 5.5949, "num_tokens_union": 55.1674, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27791.4336, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.348, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7156, "sent_len_1": 67.0521, "sent_len_max_0": 18.855, "sent_len_max_1": 188.99, "stdk": 0.0451, "stdq": 0.039, "stdqueue_k": 0.0, "step": 80300 }, { "accuracy": 57.4219, "doc_norm": 6.3955, "encoder_q-embeddings": 18303.0762, "encoder_q-layer.0": 12673.2373, "encoder_q-layer.1": 13351.9385, "encoder_q-layer.10": 20841.5664, "encoder_q-layer.11": 38841.2266, "encoder_q-layer.2": 14428.7686, "encoder_q-layer.3": 14420.3477, "encoder_q-layer.4": 15093.6143, "encoder_q-layer.5": 14336.1689, "encoder_q-layer.6": 15329.0703, "encoder_q-layer.7": 17058.1367, "encoder_q-layer.8": 18833.3398, "encoder_q-layer.9": 16642.3906, "epoch": 0.78, "inbatch_neg_score": 39.0995, "inbatch_pos_score": 39.625, "learning_rate": 1.088888888888889e-05, "loss": 2.1881, "norm_diff": 0.047, "num_tokens_overlap": 5.5797, "num_tokens_union": 54.9146, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27712.7747, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3484, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7287, "sent_len_1": 66.6363, "sent_len_max_0": 18.8238, "sent_len_max_1": 190.9325, "stdk": 0.0452, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 80400 }, { "accuracy": 57.5195, "doc_norm": 6.3971, "encoder_q-embeddings": 17269.9258, "encoder_q-layer.0": 12125.668, "encoder_q-layer.1": 12817.2383, "encoder_q-layer.10": 21540.5957, "encoder_q-layer.11": 40468.1055, "encoder_q-layer.2": 14284.9863, "encoder_q-layer.3": 14430.8682, "encoder_q-layer.4": 14639.8027, "encoder_q-layer.5": 14507.043, "encoder_q-layer.6": 15745.5674, "encoder_q-layer.7": 16203.4248, "encoder_q-layer.8": 18821.7227, "encoder_q-layer.9": 17422.3223, "epoch": 0.79, "inbatch_neg_score": 39.0812, "inbatch_pos_score": 39.625, "learning_rate": 1.0833333333333334e-05, "loss": 2.2011, "norm_diff": 0.0498, "num_tokens_overlap": 5.5865, "num_tokens_union": 55.1124, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27890.1743, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3473, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7295, "sent_len_1": 66.8836, "sent_len_max_0": 18.9187, "sent_len_max_1": 187.2625, "stdk": 0.0459, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 80500 }, { "accuracy": 59.668, "doc_norm": 6.3949, "encoder_q-embeddings": 18081.7168, "encoder_q-layer.0": 12678.0947, "encoder_q-layer.1": 13867.7363, "encoder_q-layer.10": 21904.625, "encoder_q-layer.11": 44529.1016, "encoder_q-layer.2": 14807.1387, "encoder_q-layer.3": 14393.1758, "encoder_q-layer.4": 14788.3408, "encoder_q-layer.5": 14668.1875, "encoder_q-layer.6": 15752.6484, "encoder_q-layer.7": 16110.0781, "encoder_q-layer.8": 18350.457, "encoder_q-layer.9": 17080.1055, "epoch": 0.79, "inbatch_neg_score": 39.0513, "inbatch_pos_score": 39.5625, "learning_rate": 1.0777777777777778e-05, "loss": 2.1975, "norm_diff": 0.0508, "num_tokens_overlap": 5.5885, "num_tokens_union": 55.1092, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28649.8248, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.344, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7279, "sent_len_1": 66.9086, "sent_len_max_0": 18.8738, "sent_len_max_1": 189.5062, "stdk": 0.0464, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 80600 }, { "accuracy": 56.0547, "doc_norm": 6.3973, "encoder_q-embeddings": 17352.6367, "encoder_q-layer.0": 12336.7422, "encoder_q-layer.1": 13135.874, "encoder_q-layer.10": 25548.998, "encoder_q-layer.11": 44424.5625, "encoder_q-layer.2": 14226.8672, "encoder_q-layer.3": 14537.915, "encoder_q-layer.4": 14865.1514, "encoder_q-layer.5": 14674.1719, "encoder_q-layer.6": 15380.8291, "encoder_q-layer.7": 16657.3867, "encoder_q-layer.8": 19817.625, "encoder_q-layer.9": 18045.3184, "epoch": 0.79, "inbatch_neg_score": 39.0537, "inbatch_pos_score": 39.5938, "learning_rate": 1.0722222222222222e-05, "loss": 2.1773, "norm_diff": 0.053, "num_tokens_overlap": 5.5724, "num_tokens_union": 55.1603, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29130.3384, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3443, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7175, "sent_len_1": 66.9777, "sent_len_max_0": 18.9662, "sent_len_max_1": 188.8913, "stdk": 0.0463, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 80700 }, { "accuracy": 58.2031, "doc_norm": 6.393, "encoder_q-embeddings": 17777.5293, "encoder_q-layer.0": 12576.957, "encoder_q-layer.1": 12661.5918, "encoder_q-layer.10": 19524.1602, "encoder_q-layer.11": 41612.4883, "encoder_q-layer.2": 14010.0879, "encoder_q-layer.3": 14015.1895, "encoder_q-layer.4": 14643.2578, "encoder_q-layer.5": 14404.4736, "encoder_q-layer.6": 15468.4258, "encoder_q-layer.7": 16572.4844, "encoder_q-layer.8": 18130.8594, "encoder_q-layer.9": 16632.9043, "epoch": 0.79, "inbatch_neg_score": 39.0309, "inbatch_pos_score": 39.5625, "learning_rate": 1.0666666666666667e-05, "loss": 2.2335, "norm_diff": 0.0513, "num_tokens_overlap": 5.5671, "num_tokens_union": 55.1023, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27603.204, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3417, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7213, "sent_len_1": 66.8947, "sent_len_max_0": 18.8075, "sent_len_max_1": 190.8475, "stdk": 0.0457, "stdq": 0.038, "stdqueue_k": 0.0, "step": 80800 }, { "accuracy": 59.2773, "doc_norm": 6.3905, "encoder_q-embeddings": 16610.0371, "encoder_q-layer.0": 11852.2539, "encoder_q-layer.1": 12789.2764, "encoder_q-layer.10": 18950.4258, "encoder_q-layer.11": 38058.1445, "encoder_q-layer.2": 13437.2988, "encoder_q-layer.3": 13851.3809, "encoder_q-layer.4": 14012.7314, "encoder_q-layer.5": 13406.1357, "encoder_q-layer.6": 14858.7793, "encoder_q-layer.7": 15942.0215, "encoder_q-layer.8": 17157.5195, "encoder_q-layer.9": 16263.1836, "epoch": 0.79, "inbatch_neg_score": 39.036, "inbatch_pos_score": 39.5625, "learning_rate": 1.0611111111111111e-05, "loss": 2.191, "norm_diff": 0.048, "num_tokens_overlap": 5.5622, "num_tokens_union": 54.9116, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26235.6619, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3424, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7372, "sent_len_1": 66.5252, "sent_len_max_0": 18.945, "sent_len_max_1": 190.915, "stdk": 0.0463, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 80900 }, { "accuracy": 57.9102, "doc_norm": 6.3926, "encoder_q-embeddings": 18650.1152, "encoder_q-layer.0": 13018.9756, "encoder_q-layer.1": 13411.4531, "encoder_q-layer.10": 20949.6855, "encoder_q-layer.11": 41703.9297, "encoder_q-layer.2": 14743.6172, "encoder_q-layer.3": 14902.8027, "encoder_q-layer.4": 16039.7725, "encoder_q-layer.5": 15187.1064, "encoder_q-layer.6": 15722.6816, "encoder_q-layer.7": 16552.666, "encoder_q-layer.8": 18607.5156, "encoder_q-layer.9": 17481.2617, "epoch": 0.79, "inbatch_neg_score": 39.0045, "inbatch_pos_score": 39.5625, "learning_rate": 1.0555555555555555e-05, "loss": 2.2151, "norm_diff": 0.0504, "num_tokens_overlap": 5.5686, "num_tokens_union": 54.908, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28621.0534, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3422, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7006, "sent_len_1": 66.6056, "sent_len_max_0": 18.8575, "sent_len_max_1": 186.7763, "stdk": 0.0463, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 81000 }, { "accuracy": 58.3008, "doc_norm": 6.3872, "encoder_q-embeddings": 16962.0859, "encoder_q-layer.0": 11914.8379, "encoder_q-layer.1": 12544.4141, "encoder_q-layer.10": 22009.0898, "encoder_q-layer.11": 44832.6289, "encoder_q-layer.2": 13514.0195, "encoder_q-layer.3": 13750.9932, "encoder_q-layer.4": 14347.9609, "encoder_q-layer.5": 14246.5381, "encoder_q-layer.6": 15448.3271, "encoder_q-layer.7": 15940.9131, "encoder_q-layer.8": 18443.5391, "encoder_q-layer.9": 17054.4609, "epoch": 0.79, "inbatch_neg_score": 39.0155, "inbatch_pos_score": 39.5312, "learning_rate": 1.05e-05, "loss": 2.1799, "norm_diff": 0.0471, "num_tokens_overlap": 5.5727, "num_tokens_union": 55.128, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27731.1479, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3401, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6912, "sent_len_1": 67.0316, "sent_len_max_0": 18.7225, "sent_len_max_1": 191.65, "stdk": 0.0455, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 81100 }, { "accuracy": 58.0078, "doc_norm": 6.3856, "encoder_q-embeddings": 17825.9336, "encoder_q-layer.0": 12104.7295, "encoder_q-layer.1": 12917.2998, "encoder_q-layer.10": 20222.5918, "encoder_q-layer.11": 41372.0938, "encoder_q-layer.2": 14079.9238, "encoder_q-layer.3": 14694.5859, "encoder_q-layer.4": 15063.7314, "encoder_q-layer.5": 14598.7266, "encoder_q-layer.6": 15299.0391, "encoder_q-layer.7": 16380.9082, "encoder_q-layer.8": 18006.5117, "encoder_q-layer.9": 16850.5664, "epoch": 0.79, "inbatch_neg_score": 39.0048, "inbatch_pos_score": 39.5312, "learning_rate": 1.0444444444444445e-05, "loss": 2.1839, "norm_diff": 0.0461, "num_tokens_overlap": 5.5965, "num_tokens_union": 55.2504, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27928.4073, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3396, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7161, "sent_len_1": 67.1736, "sent_len_max_0": 18.8388, "sent_len_max_1": 188.8288, "stdk": 0.0459, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 81200 }, { "accuracy": 57.9102, "doc_norm": 6.3881, "encoder_q-embeddings": 16804.8262, "encoder_q-layer.0": 12216.3467, "encoder_q-layer.1": 12330.6445, "encoder_q-layer.10": 21527.2734, "encoder_q-layer.11": 41481.3984, "encoder_q-layer.2": 13510.4209, "encoder_q-layer.3": 13708.8535, "encoder_q-layer.4": 14331.3867, "encoder_q-layer.5": 14237.3076, "encoder_q-layer.6": 14956.7344, "encoder_q-layer.7": 15615.1143, "encoder_q-layer.8": 18177.8828, "encoder_q-layer.9": 16794.5352, "epoch": 0.79, "inbatch_neg_score": 38.9916, "inbatch_pos_score": 39.5312, "learning_rate": 1.038888888888889e-05, "loss": 2.1766, "norm_diff": 0.0486, "num_tokens_overlap": 5.5734, "num_tokens_union": 54.9921, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27575.5971, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3395, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7017, "sent_len_1": 66.7225, "sent_len_max_0": 19.0388, "sent_len_max_1": 189.3975, "stdk": 0.0472, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 81300 }, { "accuracy": 59.082, "doc_norm": 6.3859, "encoder_q-embeddings": 15528.0156, "encoder_q-layer.0": 11520.6436, "encoder_q-layer.1": 12441.165, "encoder_q-layer.10": 19768.3652, "encoder_q-layer.11": 43013.5508, "encoder_q-layer.2": 13559.4092, "encoder_q-layer.3": 13954.4922, "encoder_q-layer.4": 14321.8652, "encoder_q-layer.5": 13786.6748, "encoder_q-layer.6": 14274.21, "encoder_q-layer.7": 15216.2686, "encoder_q-layer.8": 17749.5293, "encoder_q-layer.9": 16727.2012, "epoch": 0.79, "inbatch_neg_score": 38.9848, "inbatch_pos_score": 39.5, "learning_rate": 1.0333333333333333e-05, "loss": 2.2045, "norm_diff": 0.0487, "num_tokens_overlap": 5.5866, "num_tokens_union": 55.1154, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26815.479, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3373, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7292, "sent_len_1": 66.9428, "sent_len_max_0": 18.8875, "sent_len_max_1": 191.8313, "stdk": 0.0456, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 81400 }, { "accuracy": 59.668, "doc_norm": 6.3867, "encoder_q-embeddings": 16948.2637, "encoder_q-layer.0": 12140.3076, "encoder_q-layer.1": 12350.1406, "encoder_q-layer.10": 19141.0098, "encoder_q-layer.11": 37946.1211, "encoder_q-layer.2": 13260.3164, "encoder_q-layer.3": 13187.4365, "encoder_q-layer.4": 13872.9482, "encoder_q-layer.5": 13762.2334, "encoder_q-layer.6": 14278.4531, "encoder_q-layer.7": 15070.5156, "encoder_q-layer.8": 17427.0566, "encoder_q-layer.9": 16113.8105, "epoch": 0.8, "inbatch_neg_score": 39.0021, "inbatch_pos_score": 39.5312, "learning_rate": 1.0277777777777777e-05, "loss": 2.1778, "norm_diff": 0.0469, "num_tokens_overlap": 5.5952, "num_tokens_union": 55.11, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26034.7528, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3398, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7358, "sent_len_1": 66.9667, "sent_len_max_0": 18.8337, "sent_len_max_1": 190.0025, "stdk": 0.0462, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 81500 }, { "accuracy": 56.8359, "doc_norm": 6.3882, "encoder_q-embeddings": 18022.459, "encoder_q-layer.0": 12820.1621, "encoder_q-layer.1": 13096.959, "encoder_q-layer.10": 21808.541, "encoder_q-layer.11": 40716.4297, "encoder_q-layer.2": 14207.6006, "encoder_q-layer.3": 14435.6914, "encoder_q-layer.4": 15080.0957, "encoder_q-layer.5": 14576.3467, "encoder_q-layer.6": 15663.4385, "encoder_q-layer.7": 16673.4277, "encoder_q-layer.8": 18178.1465, "encoder_q-layer.9": 17314.3008, "epoch": 0.8, "inbatch_neg_score": 39.0024, "inbatch_pos_score": 39.5, "learning_rate": 1.0222222222222223e-05, "loss": 2.2132, "norm_diff": 0.0491, "num_tokens_overlap": 5.5896, "num_tokens_union": 55.1954, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28103.1772, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3391, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7297, "sent_len_1": 67.0282, "sent_len_max_0": 18.9112, "sent_len_max_1": 189.2425, "stdk": 0.0463, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 81600 }, { "accuracy": 57.1289, "doc_norm": 6.3886, "encoder_q-embeddings": 17853.8477, "encoder_q-layer.0": 12597.7051, "encoder_q-layer.1": 13170.1084, "encoder_q-layer.10": 19665.8066, "encoder_q-layer.11": 37410.6758, "encoder_q-layer.2": 14164.3506, "encoder_q-layer.3": 13797.2207, "encoder_q-layer.4": 14336.1416, "encoder_q-layer.5": 14242.917, "encoder_q-layer.6": 15011.9482, "encoder_q-layer.7": 16403.2363, "encoder_q-layer.8": 18528.7441, "encoder_q-layer.9": 16281.2061, "epoch": 0.8, "inbatch_neg_score": 38.9915, "inbatch_pos_score": 39.5, "learning_rate": 1.0166666666666667e-05, "loss": 2.1833, "norm_diff": 0.0496, "num_tokens_overlap": 5.5788, "num_tokens_union": 54.9939, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26969.7906, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.339, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7046, "sent_len_1": 66.7278, "sent_len_max_0": 18.9812, "sent_len_max_1": 188.0112, "stdk": 0.045, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 81700 }, { "accuracy": 56.25, "doc_norm": 6.3857, "encoder_q-embeddings": 17782.8613, "encoder_q-layer.0": 12278.4316, "encoder_q-layer.1": 13170.4912, "encoder_q-layer.10": 19813.4316, "encoder_q-layer.11": 36550.6797, "encoder_q-layer.2": 14174.5898, "encoder_q-layer.3": 14036.1963, "encoder_q-layer.4": 14473.1631, "encoder_q-layer.5": 14502.5928, "encoder_q-layer.6": 15727.8008, "encoder_q-layer.7": 16235.9365, "encoder_q-layer.8": 17568.9219, "encoder_q-layer.9": 16762.7715, "epoch": 0.8, "inbatch_neg_score": 38.9668, "inbatch_pos_score": 39.5, "learning_rate": 1.0111111111111111e-05, "loss": 2.1588, "norm_diff": 0.0494, "num_tokens_overlap": 5.5791, "num_tokens_union": 55.1247, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26760.1232, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3362, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7203, "sent_len_1": 66.9938, "sent_len_max_0": 18.8175, "sent_len_max_1": 188.6475, "stdk": 0.0457, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 81800 }, { "accuracy": 58.0078, "doc_norm": 6.3826, "encoder_q-embeddings": 18289.8945, "encoder_q-layer.0": 12985.7295, "encoder_q-layer.1": 12823.4746, "encoder_q-layer.10": 21356.041, "encoder_q-layer.11": 38625.5, "encoder_q-layer.2": 13954.9824, "encoder_q-layer.3": 14197.0215, "encoder_q-layer.4": 15051.2451, "encoder_q-layer.5": 14765.8525, "encoder_q-layer.6": 15656.2373, "encoder_q-layer.7": 16583.2402, "encoder_q-layer.8": 18518.3691, "encoder_q-layer.9": 17052.6738, "epoch": 0.8, "inbatch_neg_score": 38.9574, "inbatch_pos_score": 39.4688, "learning_rate": 1.0055555555555555e-05, "loss": 2.1991, "norm_diff": 0.0468, "num_tokens_overlap": 5.5752, "num_tokens_union": 54.9903, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27584.4002, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3357, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.737, "sent_len_1": 66.7506, "sent_len_max_0": 18.8863, "sent_len_max_1": 189.2113, "stdk": 0.0458, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 81900 }, { "accuracy": 56.1523, "doc_norm": 6.3875, "encoder_q-embeddings": 17960.8203, "encoder_q-layer.0": 12849.2773, "encoder_q-layer.1": 13137.5801, "encoder_q-layer.10": 21360.4277, "encoder_q-layer.11": 38657.0312, "encoder_q-layer.2": 14340.2627, "encoder_q-layer.3": 14303.9023, "encoder_q-layer.4": 14979.416, "encoder_q-layer.5": 14699.4346, "encoder_q-layer.6": 15478.002, "encoder_q-layer.7": 16901.6973, "encoder_q-layer.8": 18719.0664, "encoder_q-layer.9": 16998.4492, "epoch": 0.8, "inbatch_neg_score": 38.9525, "inbatch_pos_score": 39.4688, "learning_rate": 1e-05, "loss": 2.1508, "norm_diff": 0.0506, "num_tokens_overlap": 5.5833, "num_tokens_union": 54.9586, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27680.183, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3369, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7019, "sent_len_1": 66.7198, "sent_len_max_0": 18.7025, "sent_len_max_1": 191.2713, "stdk": 0.0454, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 82000 }, { "accuracy": 55.957, "doc_norm": 6.3829, "encoder_q-embeddings": 17743.5605, "encoder_q-layer.0": 12521.6328, "encoder_q-layer.1": 12920.3545, "encoder_q-layer.10": 21113.3125, "encoder_q-layer.11": 40372.6719, "encoder_q-layer.2": 14131.3389, "encoder_q-layer.3": 14196.5801, "encoder_q-layer.4": 14544.3096, "encoder_q-layer.5": 14418.5264, "encoder_q-layer.6": 15688.7012, "encoder_q-layer.7": 16172.8252, "encoder_q-layer.8": 18342.0742, "encoder_q-layer.9": 17228.5703, "epoch": 0.8, "inbatch_neg_score": 38.9401, "inbatch_pos_score": 39.4375, "learning_rate": 9.944444444444445e-06, "loss": 2.1378, "norm_diff": 0.0486, "num_tokens_overlap": 5.5769, "num_tokens_union": 55.0523, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27619.1581, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3343, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7362, "sent_len_1": 66.7271, "sent_len_max_0": 18.7862, "sent_len_max_1": 188.0712, "stdk": 0.0466, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 82100 }, { "accuracy": 56.25, "doc_norm": 6.3844, "encoder_q-embeddings": 18354.7715, "encoder_q-layer.0": 13078.9316, "encoder_q-layer.1": 13476.7266, "encoder_q-layer.10": 24436.832, "encoder_q-layer.11": 49185.9453, "encoder_q-layer.2": 14489.7725, "encoder_q-layer.3": 14450.4775, "encoder_q-layer.4": 14884.5264, "encoder_q-layer.5": 15224.2686, "encoder_q-layer.6": 15583.6377, "encoder_q-layer.7": 17236.9395, "encoder_q-layer.8": 19664.1699, "encoder_q-layer.9": 18182.373, "epoch": 0.8, "inbatch_neg_score": 38.9362, "inbatch_pos_score": 39.4375, "learning_rate": 9.888888888888889e-06, "loss": 2.2231, "norm_diff": 0.0501, "num_tokens_overlap": 5.5787, "num_tokens_union": 55.0582, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29958.9925, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3343, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7091, "sent_len_1": 66.8587, "sent_len_max_0": 18.8588, "sent_len_max_1": 189.9062, "stdk": 0.0457, "stdq": 0.039, "stdqueue_k": 0.0, "step": 82200 }, { "accuracy": 55.0781, "doc_norm": 6.3834, "encoder_q-embeddings": 18158.4395, "encoder_q-layer.0": 12745.7266, "encoder_q-layer.1": 13376.582, "encoder_q-layer.10": 21788.0156, "encoder_q-layer.11": 45761.2539, "encoder_q-layer.2": 14463.0742, "encoder_q-layer.3": 14627.123, "encoder_q-layer.4": 15359.4453, "encoder_q-layer.5": 15305.501, "encoder_q-layer.6": 16174.7998, "encoder_q-layer.7": 17346.3262, "encoder_q-layer.8": 20885.2305, "encoder_q-layer.9": 17901.2871, "epoch": 0.8, "inbatch_neg_score": 38.9118, "inbatch_pos_score": 39.4375, "learning_rate": 9.833333333333333e-06, "loss": 2.2003, "norm_diff": 0.0503, "num_tokens_overlap": 5.5857, "num_tokens_union": 55.1386, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29667.5991, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3331, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.75, "sent_len_1": 66.9828, "sent_len_max_0": 18.9125, "sent_len_max_1": 191.375, "stdk": 0.0465, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 82300 }, { "accuracy": 59.9609, "doc_norm": 6.3811, "encoder_q-embeddings": 16789.5293, "encoder_q-layer.0": 11895.7246, "encoder_q-layer.1": 12366.6494, "encoder_q-layer.10": 22869.8828, "encoder_q-layer.11": 38038.3945, "encoder_q-layer.2": 13822.3975, "encoder_q-layer.3": 13298.208, "encoder_q-layer.4": 13880.8857, "encoder_q-layer.5": 13849.9277, "encoder_q-layer.6": 14885.7676, "encoder_q-layer.7": 15865.5449, "encoder_q-layer.8": 19356.7109, "encoder_q-layer.9": 17343.8164, "epoch": 0.8, "inbatch_neg_score": 38.9052, "inbatch_pos_score": 39.4375, "learning_rate": 9.777777777777779e-06, "loss": 2.1431, "norm_diff": 0.0502, "num_tokens_overlap": 5.5932, "num_tokens_union": 54.956, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26886.9794, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3309, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.741, "sent_len_1": 66.69, "sent_len_max_0": 18.8212, "sent_len_max_1": 189.79, "stdk": 0.0465, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 82400 }, { "accuracy": 58.3008, "doc_norm": 6.3774, "encoder_q-embeddings": 16507.2832, "encoder_q-layer.0": 11712.415, "encoder_q-layer.1": 12567.6133, "encoder_q-layer.10": 21561.0391, "encoder_q-layer.11": 39559.4492, "encoder_q-layer.2": 13730.3877, "encoder_q-layer.3": 13819.7881, "encoder_q-layer.4": 14118.5879, "encoder_q-layer.5": 14280.2334, "encoder_q-layer.6": 15054.8936, "encoder_q-layer.7": 15234.2246, "encoder_q-layer.8": 18166.8086, "encoder_q-layer.9": 16692.6523, "epoch": 0.81, "inbatch_neg_score": 38.9024, "inbatch_pos_score": 39.4062, "learning_rate": 9.722222222222223e-06, "loss": 2.2015, "norm_diff": 0.047, "num_tokens_overlap": 5.5793, "num_tokens_union": 54.8948, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27069.8857, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3304, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7136, "sent_len_1": 66.5437, "sent_len_max_0": 18.8263, "sent_len_max_1": 188.0012, "stdk": 0.0451, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 82500 }, { "accuracy": 58.0078, "doc_norm": 6.3834, "encoder_q-embeddings": 18326.9902, "encoder_q-layer.0": 12655.8369, "encoder_q-layer.1": 13464.9443, "encoder_q-layer.10": 21501.9609, "encoder_q-layer.11": 48652.3555, "encoder_q-layer.2": 14370.6621, "encoder_q-layer.3": 14269.5078, "encoder_q-layer.4": 14907.5225, "encoder_q-layer.5": 14658.4316, "encoder_q-layer.6": 15857.2256, "encoder_q-layer.7": 17012.1953, "encoder_q-layer.8": 19802.1738, "encoder_q-layer.9": 21366.6719, "epoch": 0.81, "inbatch_neg_score": 38.8956, "inbatch_pos_score": 39.4375, "learning_rate": 9.666666666666667e-06, "loss": 2.1997, "norm_diff": 0.0524, "num_tokens_overlap": 5.5893, "num_tokens_union": 55.1721, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30418.8191, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.331, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7399, "sent_len_1": 67.0065, "sent_len_max_0": 19.0413, "sent_len_max_1": 190.655, "stdk": 0.0467, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 82600 }, { "accuracy": 56.543, "doc_norm": 6.3782, "encoder_q-embeddings": 17114.1309, "encoder_q-layer.0": 12305.0801, "encoder_q-layer.1": 12903.7822, "encoder_q-layer.10": 19647.1445, "encoder_q-layer.11": 40933.8359, "encoder_q-layer.2": 13810.0566, "encoder_q-layer.3": 13680.792, "encoder_q-layer.4": 14144.8535, "encoder_q-layer.5": 14116.7529, "encoder_q-layer.6": 14986.7822, "encoder_q-layer.7": 16656.1699, "encoder_q-layer.8": 17328.9199, "encoder_q-layer.9": 16468.3047, "epoch": 0.81, "inbatch_neg_score": 38.8841, "inbatch_pos_score": 39.4062, "learning_rate": 9.61111111111111e-06, "loss": 2.1896, "norm_diff": 0.0482, "num_tokens_overlap": 5.5714, "num_tokens_union": 54.8806, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27025.878, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.33, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7064, "sent_len_1": 66.588, "sent_len_max_0": 18.8762, "sent_len_max_1": 189.355, "stdk": 0.0463, "stdq": 0.039, "stdqueue_k": 0.0, "step": 82700 }, { "accuracy": 59.1797, "doc_norm": 6.3786, "encoder_q-embeddings": 18859.3965, "encoder_q-layer.0": 12984.9512, "encoder_q-layer.1": 13268.0518, "encoder_q-layer.10": 21959.3594, "encoder_q-layer.11": 40442.6836, "encoder_q-layer.2": 14072.3789, "encoder_q-layer.3": 14275.9951, "encoder_q-layer.4": 15108.582, "encoder_q-layer.5": 14479.542, "encoder_q-layer.6": 15200.04, "encoder_q-layer.7": 16479.0312, "encoder_q-layer.8": 18339.3945, "encoder_q-layer.9": 17068.7109, "epoch": 0.81, "inbatch_neg_score": 38.8537, "inbatch_pos_score": 39.375, "learning_rate": 9.555555555555556e-06, "loss": 2.1862, "norm_diff": 0.0506, "num_tokens_overlap": 5.5666, "num_tokens_union": 54.9528, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28321.1961, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.328, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7277, "sent_len_1": 66.6633, "sent_len_max_0": 18.8562, "sent_len_max_1": 189.2837, "stdk": 0.046, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 82800 }, { "accuracy": 58.1055, "doc_norm": 6.3771, "encoder_q-embeddings": 17500.1934, "encoder_q-layer.0": 12625.0352, "encoder_q-layer.1": 13188.3311, "encoder_q-layer.10": 21221.3555, "encoder_q-layer.11": 39859.6641, "encoder_q-layer.2": 14096.374, "encoder_q-layer.3": 14354.2109, "encoder_q-layer.4": 14972.1855, "encoder_q-layer.5": 15081.0576, "encoder_q-layer.6": 15846.1816, "encoder_q-layer.7": 16097.3926, "encoder_q-layer.8": 18006.2305, "encoder_q-layer.9": 16639.8418, "epoch": 0.81, "inbatch_neg_score": 38.8318, "inbatch_pos_score": 39.375, "learning_rate": 9.5e-06, "loss": 2.2021, "norm_diff": 0.0504, "num_tokens_overlap": 5.5846, "num_tokens_union": 55.0746, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27559.8689, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3267, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7254, "sent_len_1": 66.8934, "sent_len_max_0": 18.8712, "sent_len_max_1": 190.58, "stdk": 0.046, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 82900 }, { "accuracy": 56.8359, "doc_norm": 6.3738, "encoder_q-embeddings": 16593.7617, "encoder_q-layer.0": 11763.4482, "encoder_q-layer.1": 12776.8965, "encoder_q-layer.10": 22317.1777, "encoder_q-layer.11": 41373.8711, "encoder_q-layer.2": 13862.2529, "encoder_q-layer.3": 13883.249, "encoder_q-layer.4": 14115.8984, "encoder_q-layer.5": 14120.4365, "encoder_q-layer.6": 15433.6729, "encoder_q-layer.7": 16555.5273, "encoder_q-layer.8": 19107.5156, "encoder_q-layer.9": 16680.627, "epoch": 0.81, "inbatch_neg_score": 38.8305, "inbatch_pos_score": 39.3438, "learning_rate": 9.444444444444445e-06, "loss": 2.1991, "norm_diff": 0.0504, "num_tokens_overlap": 5.5782, "num_tokens_union": 54.9925, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27300.5879, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3234, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7329, "sent_len_1": 66.7468, "sent_len_max_0": 18.8188, "sent_len_max_1": 189.4688, "stdk": 0.0452, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 83000 }, { "accuracy": 59.4727, "doc_norm": 6.3769, "encoder_q-embeddings": 16580.7871, "encoder_q-layer.0": 11762.9141, "encoder_q-layer.1": 12421.0977, "encoder_q-layer.10": 19905.8867, "encoder_q-layer.11": 39324.832, "encoder_q-layer.2": 13531.8389, "encoder_q-layer.3": 14003.1025, "encoder_q-layer.4": 14548.6797, "encoder_q-layer.5": 14022.0713, "encoder_q-layer.6": 14953.0508, "encoder_q-layer.7": 15839.1504, "encoder_q-layer.8": 17582.959, "encoder_q-layer.9": 16665.2246, "epoch": 0.81, "inbatch_neg_score": 38.8257, "inbatch_pos_score": 39.3438, "learning_rate": 9.388888888888889e-06, "loss": 2.2092, "norm_diff": 0.0506, "num_tokens_overlap": 5.5765, "num_tokens_union": 54.98, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26732.7875, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3264, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7, "sent_len_1": 66.7729, "sent_len_max_0": 18.9213, "sent_len_max_1": 190.3762, "stdk": 0.0452, "stdq": 0.039, "stdqueue_k": 0.0, "step": 83100 }, { "accuracy": 58.4961, "doc_norm": 6.3737, "encoder_q-embeddings": 17263.6465, "encoder_q-layer.0": 11887.9727, "encoder_q-layer.1": 12720.0029, "encoder_q-layer.10": 21404.0391, "encoder_q-layer.11": 40589.7422, "encoder_q-layer.2": 13838.043, "encoder_q-layer.3": 14083.1816, "encoder_q-layer.4": 14733.4834, "encoder_q-layer.5": 14598.9746, "encoder_q-layer.6": 14590.9902, "encoder_q-layer.7": 16286.3066, "encoder_q-layer.8": 17176.1426, "encoder_q-layer.9": 16562.6953, "epoch": 0.81, "inbatch_neg_score": 38.8107, "inbatch_pos_score": 39.3438, "learning_rate": 9.333333333333334e-06, "loss": 2.1681, "norm_diff": 0.0502, "num_tokens_overlap": 5.5697, "num_tokens_union": 54.9067, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27373.4439, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3235, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6945, "sent_len_1": 66.6611, "sent_len_max_0": 18.8062, "sent_len_max_1": 188.9475, "stdk": 0.0456, "stdq": 0.0378, "stdqueue_k": 0.0, "step": 83200 }, { "accuracy": 57.3242, "doc_norm": 6.3739, "encoder_q-embeddings": 17132.041, "encoder_q-layer.0": 12302.9062, "encoder_q-layer.1": 12646.3359, "encoder_q-layer.10": 19492.3418, "encoder_q-layer.11": 38483.7891, "encoder_q-layer.2": 13925.416, "encoder_q-layer.3": 14104.5303, "encoder_q-layer.4": 14594.7168, "encoder_q-layer.5": 14514.9541, "encoder_q-layer.6": 15198.1543, "encoder_q-layer.7": 16767.9102, "encoder_q-layer.8": 17668.9258, "encoder_q-layer.9": 16125.04, "epoch": 0.81, "inbatch_neg_score": 38.8189, "inbatch_pos_score": 39.3438, "learning_rate": 9.277777777777778e-06, "loss": 2.2281, "norm_diff": 0.0499, "num_tokens_overlap": 5.5662, "num_tokens_union": 54.9833, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26677.397, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.324, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7175, "sent_len_1": 66.7289, "sent_len_max_0": 18.8263, "sent_len_max_1": 190.7512, "stdk": 0.0453, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 83300 }, { "accuracy": 59.082, "doc_norm": 6.3736, "encoder_q-embeddings": 16528.6465, "encoder_q-layer.0": 11765.0967, "encoder_q-layer.1": 12379.5791, "encoder_q-layer.10": 26659.1211, "encoder_q-layer.11": 43452.7578, "encoder_q-layer.2": 13706.2061, "encoder_q-layer.3": 14055.0459, "encoder_q-layer.4": 14116.5918, "encoder_q-layer.5": 14149.7734, "encoder_q-layer.6": 15278.9062, "encoder_q-layer.7": 17739.0039, "encoder_q-layer.8": 20502.752, "encoder_q-layer.9": 17769.9043, "epoch": 0.81, "inbatch_neg_score": 38.8099, "inbatch_pos_score": 39.3438, "learning_rate": 9.222222222222222e-06, "loss": 2.1674, "norm_diff": 0.0494, "num_tokens_overlap": 5.5827, "num_tokens_union": 54.9973, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28616.0707, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3242, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7202, "sent_len_1": 66.8226, "sent_len_max_0": 18.725, "sent_len_max_1": 191.6238, "stdk": 0.046, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 83400 }, { "accuracy": 55.1758, "doc_norm": 6.3741, "encoder_q-embeddings": 18318.5488, "encoder_q-layer.0": 12352.8281, "encoder_q-layer.1": 12838.0723, "encoder_q-layer.10": 21234.4062, "encoder_q-layer.11": 41643.8984, "encoder_q-layer.2": 14424.4805, "encoder_q-layer.3": 14320.709, "encoder_q-layer.4": 15090.583, "encoder_q-layer.5": 15240.6445, "encoder_q-layer.6": 16030.7617, "encoder_q-layer.7": 17220.4844, "encoder_q-layer.8": 18381.4609, "encoder_q-layer.9": 17547.6309, "epoch": 0.82, "inbatch_neg_score": 38.8047, "inbatch_pos_score": 39.3125, "learning_rate": 9.166666666666666e-06, "loss": 2.1387, "norm_diff": 0.0507, "num_tokens_overlap": 5.5901, "num_tokens_union": 54.9043, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28473.3047, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3234, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7524, "sent_len_1": 66.5739, "sent_len_max_0": 18.8725, "sent_len_max_1": 189.3363, "stdk": 0.0456, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 83500 }, { "accuracy": 57.1289, "doc_norm": 6.371, "encoder_q-embeddings": 18150.1582, "encoder_q-layer.0": 13354.6338, "encoder_q-layer.1": 13662.3311, "encoder_q-layer.10": 22142.8184, "encoder_q-layer.11": 40161.1562, "encoder_q-layer.2": 14856.3867, "encoder_q-layer.3": 15060.7383, "encoder_q-layer.4": 15409.4121, "encoder_q-layer.5": 14806.8223, "encoder_q-layer.6": 15509.916, "encoder_q-layer.7": 16975.3066, "encoder_q-layer.8": 18905.291, "encoder_q-layer.9": 17468.3965, "epoch": 0.82, "inbatch_neg_score": 38.7816, "inbatch_pos_score": 39.3125, "learning_rate": 9.111111111111112e-06, "loss": 2.2133, "norm_diff": 0.0486, "num_tokens_overlap": 5.5834, "num_tokens_union": 55.0854, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28409.1304, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3224, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7137, "sent_len_1": 66.9211, "sent_len_max_0": 18.8275, "sent_len_max_1": 189.8038, "stdk": 0.0461, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 83600 }, { "accuracy": 57.3242, "doc_norm": 6.373, "encoder_q-embeddings": 16352.9297, "encoder_q-layer.0": 11737.2598, "encoder_q-layer.1": 12323.2539, "encoder_q-layer.10": 21308.7441, "encoder_q-layer.11": 42129.0977, "encoder_q-layer.2": 13599.1982, "encoder_q-layer.3": 13404.5244, "encoder_q-layer.4": 14280.3438, "encoder_q-layer.5": 14001.5537, "encoder_q-layer.6": 15376.6133, "encoder_q-layer.7": 16843.3984, "encoder_q-layer.8": 18340.5195, "encoder_q-layer.9": 16813.5137, "epoch": 0.82, "inbatch_neg_score": 38.7689, "inbatch_pos_score": 39.3125, "learning_rate": 9.055555555555556e-06, "loss": 2.1386, "norm_diff": 0.0525, "num_tokens_overlap": 5.5747, "num_tokens_union": 54.873, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27004.5573, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3205, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6985, "sent_len_1": 66.5701, "sent_len_max_0": 18.9062, "sent_len_max_1": 188.9525, "stdk": 0.047, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 83700 }, { "accuracy": 56.3477, "doc_norm": 6.3685, "encoder_q-embeddings": 16778.1602, "encoder_q-layer.0": 11963.0312, "encoder_q-layer.1": 12499.8867, "encoder_q-layer.10": 24801.75, "encoder_q-layer.11": 49546.4648, "encoder_q-layer.2": 13592.8936, "encoder_q-layer.3": 13821.5859, "encoder_q-layer.4": 14496.3164, "encoder_q-layer.5": 14159.1182, "encoder_q-layer.6": 15636.1982, "encoder_q-layer.7": 17328.082, "encoder_q-layer.8": 19422.7852, "encoder_q-layer.9": 18406.7988, "epoch": 0.82, "inbatch_neg_score": 38.7555, "inbatch_pos_score": 39.25, "learning_rate": 9e-06, "loss": 2.1385, "norm_diff": 0.0495, "num_tokens_overlap": 5.5761, "num_tokens_union": 55.142, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29667.2031, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3189, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7156, "sent_len_1": 66.9987, "sent_len_max_0": 18.925, "sent_len_max_1": 189.535, "stdk": 0.0453, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 83800 }, { "accuracy": 58.0078, "doc_norm": 6.372, "encoder_q-embeddings": 18279.9922, "encoder_q-layer.0": 13264.1543, "encoder_q-layer.1": 13655.2305, "encoder_q-layer.10": 21250.4844, "encoder_q-layer.11": 43817.1172, "encoder_q-layer.2": 14505.7705, "encoder_q-layer.3": 14348.6885, "encoder_q-layer.4": 14647.6797, "encoder_q-layer.5": 14423.3604, "encoder_q-layer.6": 15737.8643, "encoder_q-layer.7": 16147.5869, "encoder_q-layer.8": 18351.0371, "encoder_q-layer.9": 17252.7051, "epoch": 0.82, "inbatch_neg_score": 38.7359, "inbatch_pos_score": 39.25, "learning_rate": 8.944444444444444e-06, "loss": 2.1936, "norm_diff": 0.0537, "num_tokens_overlap": 5.5793, "num_tokens_union": 54.958, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28239.4641, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3183, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7223, "sent_len_1": 66.6887, "sent_len_max_0": 18.8, "sent_len_max_1": 188.4625, "stdk": 0.0462, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 83900 }, { "accuracy": 59.9609, "doc_norm": 6.3682, "encoder_q-embeddings": 17186.3828, "encoder_q-layer.0": 12046.4473, "encoder_q-layer.1": 12550.4043, "encoder_q-layer.10": 20667.0, "encoder_q-layer.11": 39081.7617, "encoder_q-layer.2": 13595.8721, "encoder_q-layer.3": 13781.334, "encoder_q-layer.4": 14650.2822, "encoder_q-layer.5": 13833.333, "encoder_q-layer.6": 14777.8379, "encoder_q-layer.7": 15800.2783, "encoder_q-layer.8": 18270.2656, "encoder_q-layer.9": 16683.5586, "epoch": 0.82, "inbatch_neg_score": 38.7438, "inbatch_pos_score": 39.2812, "learning_rate": 8.88888888888889e-06, "loss": 2.1696, "norm_diff": 0.049, "num_tokens_overlap": 5.58, "num_tokens_union": 55.0777, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26643.9862, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3192, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6844, "sent_len_1": 66.9774, "sent_len_max_0": 18.78, "sent_len_max_1": 188.6213, "stdk": 0.0457, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 84000 }, { "accuracy": 57.8125, "doc_norm": 6.3703, "encoder_q-embeddings": 17286.4023, "encoder_q-layer.0": 12274.959, "encoder_q-layer.1": 12868.6064, "encoder_q-layer.10": 21404.2559, "encoder_q-layer.11": 43297.8281, "encoder_q-layer.2": 13865.3037, "encoder_q-layer.3": 14150.3799, "encoder_q-layer.4": 14860.5166, "encoder_q-layer.5": 14663.2979, "encoder_q-layer.6": 15401.4639, "encoder_q-layer.7": 16337.6982, "encoder_q-layer.8": 18952.4609, "encoder_q-layer.9": 18625.4062, "epoch": 0.82, "inbatch_neg_score": 38.7349, "inbatch_pos_score": 39.25, "learning_rate": 8.833333333333334e-06, "loss": 2.1714, "norm_diff": 0.0523, "num_tokens_overlap": 5.5843, "num_tokens_union": 55.2288, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27882.6013, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.318, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7343, "sent_len_1": 67.0839, "sent_len_max_0": 18.8362, "sent_len_max_1": 191.9187, "stdk": 0.0455, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 84100 }, { "accuracy": 57.4219, "doc_norm": 6.3643, "encoder_q-embeddings": 17853.0332, "encoder_q-layer.0": 12738.4551, "encoder_q-layer.1": 13423.3789, "encoder_q-layer.10": 24315.6289, "encoder_q-layer.11": 38563.0312, "encoder_q-layer.2": 14368.5488, "encoder_q-layer.3": 14224.9434, "encoder_q-layer.4": 14778.166, "encoder_q-layer.5": 14615.3398, "encoder_q-layer.6": 15964.1104, "encoder_q-layer.7": 17152.4648, "encoder_q-layer.8": 18762.1406, "encoder_q-layer.9": 17231.3945, "epoch": 0.82, "inbatch_neg_score": 38.7187, "inbatch_pos_score": 39.25, "learning_rate": 8.777777777777778e-06, "loss": 2.1842, "norm_diff": 0.0471, "num_tokens_overlap": 5.5705, "num_tokens_union": 55.125, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28037.1827, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3171, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6968, "sent_len_1": 66.9863, "sent_len_max_0": 18.8062, "sent_len_max_1": 191.2012, "stdk": 0.0456, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 84200 }, { "accuracy": 58.7891, "doc_norm": 6.368, "encoder_q-embeddings": 18248.6543, "encoder_q-layer.0": 12674.0, "encoder_q-layer.1": 13163.3252, "encoder_q-layer.10": 20083.2852, "encoder_q-layer.11": 38110.2969, "encoder_q-layer.2": 14538.6475, "encoder_q-layer.3": 14714.6084, "encoder_q-layer.4": 15187.8906, "encoder_q-layer.5": 14753.8662, "encoder_q-layer.6": 15917.998, "encoder_q-layer.7": 19133.6797, "encoder_q-layer.8": 19702.3516, "encoder_q-layer.9": 18207.2969, "epoch": 0.82, "inbatch_neg_score": 38.7248, "inbatch_pos_score": 39.25, "learning_rate": 8.722222222222224e-06, "loss": 2.2126, "norm_diff": 0.0515, "num_tokens_overlap": 5.5843, "num_tokens_union": 55.0589, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27972.5657, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3165, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7241, "sent_len_1": 66.9347, "sent_len_max_0": 18.8125, "sent_len_max_1": 191.385, "stdk": 0.0464, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 84300 }, { "accuracy": 56.7383, "doc_norm": 6.3684, "encoder_q-embeddings": 17401.0957, "encoder_q-layer.0": 12178.9736, "encoder_q-layer.1": 12736.6123, "encoder_q-layer.10": 20163.3125, "encoder_q-layer.11": 40450.832, "encoder_q-layer.2": 14250.4795, "encoder_q-layer.3": 14334.375, "encoder_q-layer.4": 15472.7891, "encoder_q-layer.5": 14820.6582, "encoder_q-layer.6": 15457.5, "encoder_q-layer.7": 15670.5732, "encoder_q-layer.8": 17323.9707, "encoder_q-layer.9": 16550.459, "epoch": 0.82, "inbatch_neg_score": 38.6936, "inbatch_pos_score": 39.2188, "learning_rate": 8.666666666666668e-06, "loss": 2.1867, "norm_diff": 0.053, "num_tokens_overlap": 5.5709, "num_tokens_union": 55.0564, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27423.8072, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3153, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6939, "sent_len_1": 66.8687, "sent_len_max_0": 18.8188, "sent_len_max_1": 188.4625, "stdk": 0.0456, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 84400 }, { "accuracy": 56.8359, "doc_norm": 6.3672, "encoder_q-embeddings": 17478.1445, "encoder_q-layer.0": 12576.7441, "encoder_q-layer.1": 13153.8477, "encoder_q-layer.10": 20074.0547, "encoder_q-layer.11": 38615.6172, "encoder_q-layer.2": 14354.3809, "encoder_q-layer.3": 14365.9746, "encoder_q-layer.4": 14650.4639, "encoder_q-layer.5": 14405.0498, "encoder_q-layer.6": 15476.7666, "encoder_q-layer.7": 16687.4453, "encoder_q-layer.8": 18065.0566, "encoder_q-layer.9": 16094.8105, "epoch": 0.82, "inbatch_neg_score": 38.6948, "inbatch_pos_score": 39.2188, "learning_rate": 8.611111111111112e-06, "loss": 2.1491, "norm_diff": 0.0519, "num_tokens_overlap": 5.582, "num_tokens_union": 55.1951, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27342.4105, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3153, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7262, "sent_len_1": 67.0653, "sent_len_max_0": 18.92, "sent_len_max_1": 188.3162, "stdk": 0.0469, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 84500 }, { "accuracy": 57.9102, "doc_norm": 6.3674, "encoder_q-embeddings": 17718.582, "encoder_q-layer.0": 12689.2324, "encoder_q-layer.1": 13364.7686, "encoder_q-layer.10": 19460.4766, "encoder_q-layer.11": 39150.0664, "encoder_q-layer.2": 14508.7148, "encoder_q-layer.3": 14346.6455, "encoder_q-layer.4": 14762.7812, "encoder_q-layer.5": 14788.8066, "encoder_q-layer.6": 15241.6318, "encoder_q-layer.7": 16994.0742, "encoder_q-layer.8": 17551.8848, "encoder_q-layer.9": 16458.7812, "epoch": 0.83, "inbatch_neg_score": 38.6721, "inbatch_pos_score": 39.2188, "learning_rate": 8.555555555555556e-06, "loss": 2.1793, "norm_diff": 0.0532, "num_tokens_overlap": 5.5731, "num_tokens_union": 54.9858, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27386.5065, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3142, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.707, "sent_len_1": 66.7401, "sent_len_max_0": 18.8062, "sent_len_max_1": 187.2225, "stdk": 0.0469, "stdq": 0.039, "stdqueue_k": 0.0, "step": 84600 }, { "accuracy": 58.3984, "doc_norm": 6.3605, "encoder_q-embeddings": 16602.4707, "encoder_q-layer.0": 11956.0811, "encoder_q-layer.1": 12677.2773, "encoder_q-layer.10": 22023.9961, "encoder_q-layer.11": 42595.2422, "encoder_q-layer.2": 13517.1787, "encoder_q-layer.3": 13359.04, "encoder_q-layer.4": 13758.4375, "encoder_q-layer.5": 13649.8389, "encoder_q-layer.6": 14475.6523, "encoder_q-layer.7": 15732.4375, "encoder_q-layer.8": 18346.4648, "encoder_q-layer.9": 16653.7383, "epoch": 0.83, "inbatch_neg_score": 38.6667, "inbatch_pos_score": 39.1875, "learning_rate": 8.500000000000002e-06, "loss": 2.1828, "norm_diff": 0.047, "num_tokens_overlap": 5.5645, "num_tokens_union": 55.0675, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27200.1273, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3134, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7073, "sent_len_1": 66.8653, "sent_len_max_0": 18.76, "sent_len_max_1": 190.575, "stdk": 0.0454, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 84700 }, { "accuracy": 57.9102, "doc_norm": 6.36, "encoder_q-embeddings": 16563.0215, "encoder_q-layer.0": 11954.3047, "encoder_q-layer.1": 12425.3232, "encoder_q-layer.10": 20718.2539, "encoder_q-layer.11": 44184.0078, "encoder_q-layer.2": 13661.0889, "encoder_q-layer.3": 14096.7305, "encoder_q-layer.4": 14580.3828, "encoder_q-layer.5": 14069.7246, "encoder_q-layer.6": 15371.0225, "encoder_q-layer.7": 16375.209, "encoder_q-layer.8": 18928.9473, "encoder_q-layer.9": 17021.8438, "epoch": 0.83, "inbatch_neg_score": 38.6412, "inbatch_pos_score": 39.1562, "learning_rate": 8.444444444444446e-06, "loss": 2.1984, "norm_diff": 0.0485, "num_tokens_overlap": 5.5652, "num_tokens_union": 55.049, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27785.2284, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3115, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7168, "sent_len_1": 66.7373, "sent_len_max_0": 18.8087, "sent_len_max_1": 187.4137, "stdk": 0.0461, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 84800 }, { "accuracy": 56.6406, "doc_norm": 6.3599, "encoder_q-embeddings": 17597.3867, "encoder_q-layer.0": 12463.4268, "encoder_q-layer.1": 12821.2891, "encoder_q-layer.10": 21807.9609, "encoder_q-layer.11": 44286.7188, "encoder_q-layer.2": 14166.1816, "encoder_q-layer.3": 14584.1953, "encoder_q-layer.4": 14723.5078, "encoder_q-layer.5": 15002.0752, "encoder_q-layer.6": 16761.0195, "encoder_q-layer.7": 17787.1543, "encoder_q-layer.8": 20003.5488, "encoder_q-layer.9": 17979.9492, "epoch": 0.83, "inbatch_neg_score": 38.6004, "inbatch_pos_score": 39.125, "learning_rate": 8.38888888888889e-06, "loss": 2.1647, "norm_diff": 0.0526, "num_tokens_overlap": 5.5884, "num_tokens_union": 55.156, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28727.3151, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3072, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7329, "sent_len_1": 66.976, "sent_len_max_0": 18.82, "sent_len_max_1": 189.2287, "stdk": 0.0474, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 84900 }, { "accuracy": 61.9141, "doc_norm": 6.3568, "encoder_q-embeddings": 17849.3242, "encoder_q-layer.0": 12236.6855, "encoder_q-layer.1": 12602.1807, "encoder_q-layer.10": 22982.9414, "encoder_q-layer.11": 49797.8906, "encoder_q-layer.2": 13498.6709, "encoder_q-layer.3": 13548.5215, "encoder_q-layer.4": 13896.5303, "encoder_q-layer.5": 14157.1953, "encoder_q-layer.6": 15322.3301, "encoder_q-layer.7": 16446.0078, "encoder_q-layer.8": 18233.2598, "encoder_q-layer.9": 18328.4941, "epoch": 0.83, "inbatch_neg_score": 38.5932, "inbatch_pos_score": 39.125, "learning_rate": 8.333333333333334e-06, "loss": 2.1775, "norm_diff": 0.0501, "num_tokens_overlap": 5.5851, "num_tokens_union": 55.0499, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29549.5552, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3067, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7239, "sent_len_1": 66.833, "sent_len_max_0": 18.86, "sent_len_max_1": 189.0538, "stdk": 0.0468, "stdq": 0.039, "stdqueue_k": 0.0, "step": 85000 }, { "accuracy": 57.7148, "doc_norm": 6.3591, "encoder_q-embeddings": 17378.5762, "encoder_q-layer.0": 12452.8096, "encoder_q-layer.1": 12537.0381, "encoder_q-layer.10": 26344.0312, "encoder_q-layer.11": 49970.8867, "encoder_q-layer.2": 13998.7686, "encoder_q-layer.3": 14310.4561, "encoder_q-layer.4": 14491.8857, "encoder_q-layer.5": 14744.2744, "encoder_q-layer.6": 16087.7236, "encoder_q-layer.7": 17796.5156, "encoder_q-layer.8": 20572.8457, "encoder_q-layer.9": 19032.8125, "epoch": 0.83, "inbatch_neg_score": 38.6349, "inbatch_pos_score": 39.1562, "learning_rate": 8.27777777777778e-06, "loss": 2.1696, "norm_diff": 0.0508, "num_tokens_overlap": 5.5827, "num_tokens_union": 54.9033, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29925.6702, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3083, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7192, "sent_len_1": 66.5829, "sent_len_max_0": 18.8238, "sent_len_max_1": 187.24, "stdk": 0.0461, "stdq": 0.0379, "stdqueue_k": 0.0, "step": 85100 }, { "accuracy": 58.0078, "doc_norm": 6.3597, "encoder_q-embeddings": 16587.9277, "encoder_q-layer.0": 12131.1611, "encoder_q-layer.1": 13010.833, "encoder_q-layer.10": 18918.6016, "encoder_q-layer.11": 36456.7891, "encoder_q-layer.2": 13944.1973, "encoder_q-layer.3": 13731.7451, "encoder_q-layer.4": 13946.5762, "encoder_q-layer.5": 13766.127, "encoder_q-layer.6": 14830.8867, "encoder_q-layer.7": 16091.6543, "encoder_q-layer.8": 18730.9141, "encoder_q-layer.9": 16643.1328, "epoch": 0.83, "inbatch_neg_score": 38.6353, "inbatch_pos_score": 39.1562, "learning_rate": 8.222222222222223e-06, "loss": 2.1693, "norm_diff": 0.0502, "num_tokens_overlap": 5.5662, "num_tokens_union": 54.8614, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26403.7328, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3095, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6872, "sent_len_1": 66.5763, "sent_len_max_0": 18.8375, "sent_len_max_1": 189.5625, "stdk": 0.045, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 85200 }, { "accuracy": 58.5938, "doc_norm": 6.3605, "encoder_q-embeddings": 18068.7988, "encoder_q-layer.0": 12565.2266, "encoder_q-layer.1": 13388.1035, "encoder_q-layer.10": 19767.4766, "encoder_q-layer.11": 39081.8945, "encoder_q-layer.2": 14797.3096, "encoder_q-layer.3": 15167.4189, "encoder_q-layer.4": 15042.7793, "encoder_q-layer.5": 14727.8779, "encoder_q-layer.6": 15517.8027, "encoder_q-layer.7": 16309.499, "encoder_q-layer.8": 18179.6855, "encoder_q-layer.9": 16605.2676, "epoch": 0.83, "inbatch_neg_score": 38.607, "inbatch_pos_score": 39.125, "learning_rate": 8.166666666666668e-06, "loss": 2.1889, "norm_diff": 0.0515, "num_tokens_overlap": 5.5697, "num_tokens_union": 54.8892, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27456.1311, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.309, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6798, "sent_len_1": 66.636, "sent_len_max_0": 18.845, "sent_len_max_1": 190.5888, "stdk": 0.045, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 85300 }, { "accuracy": 59.082, "doc_norm": 6.3568, "encoder_q-embeddings": 16640.2383, "encoder_q-layer.0": 11899.251, "encoder_q-layer.1": 12624.9922, "encoder_q-layer.10": 22199.7031, "encoder_q-layer.11": 41488.4492, "encoder_q-layer.2": 13888.2959, "encoder_q-layer.3": 14240.5244, "encoder_q-layer.4": 14599.8262, "encoder_q-layer.5": 13876.626, "encoder_q-layer.6": 15057.8301, "encoder_q-layer.7": 16040.8428, "encoder_q-layer.8": 17701.584, "encoder_q-layer.9": 16898.3516, "epoch": 0.83, "inbatch_neg_score": 38.6041, "inbatch_pos_score": 39.125, "learning_rate": 8.111111111111112e-06, "loss": 2.1997, "norm_diff": 0.0495, "num_tokens_overlap": 5.5649, "num_tokens_union": 54.8886, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27332.1842, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3072, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6963, "sent_len_1": 66.6402, "sent_len_max_0": 18.9187, "sent_len_max_1": 189.8913, "stdk": 0.0461, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 85400 }, { "accuracy": 56.6406, "doc_norm": 6.3583, "encoder_q-embeddings": 16698.0801, "encoder_q-layer.0": 12374.9043, "encoder_q-layer.1": 12779.5703, "encoder_q-layer.10": 19892.0996, "encoder_q-layer.11": 39486.3086, "encoder_q-layer.2": 13923.2324, "encoder_q-layer.3": 13767.1602, "encoder_q-layer.4": 14371.252, "encoder_q-layer.5": 14692.0576, "encoder_q-layer.6": 15338.3896, "encoder_q-layer.7": 16750.0879, "encoder_q-layer.8": 17918.9688, "encoder_q-layer.9": 16421.7793, "epoch": 0.83, "inbatch_neg_score": 38.6006, "inbatch_pos_score": 39.125, "learning_rate": 8.055555555555557e-06, "loss": 2.1868, "norm_diff": 0.0491, "num_tokens_overlap": 5.5795, "num_tokens_union": 55.1053, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27182.5866, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3092, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7069, "sent_len_1": 66.925, "sent_len_max_0": 18.8762, "sent_len_max_1": 189.6362, "stdk": 0.0462, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 85500 }, { "accuracy": 59.7656, "doc_norm": 6.3583, "encoder_q-embeddings": 18040.459, "encoder_q-layer.0": 12636.2305, "encoder_q-layer.1": 13062.7295, "encoder_q-layer.10": 19987.8867, "encoder_q-layer.11": 37481.9219, "encoder_q-layer.2": 13993.5186, "encoder_q-layer.3": 14024.6504, "encoder_q-layer.4": 14718.9404, "encoder_q-layer.5": 14604.2217, "encoder_q-layer.6": 15260.1943, "encoder_q-layer.7": 16870.1523, "encoder_q-layer.8": 18416.5918, "encoder_q-layer.9": 16503.2617, "epoch": 0.84, "inbatch_neg_score": 38.61, "inbatch_pos_score": 39.1562, "learning_rate": 8.000000000000001e-06, "loss": 2.1683, "norm_diff": 0.049, "num_tokens_overlap": 5.5788, "num_tokens_union": 55.0192, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27284.3763, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3093, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.71, "sent_len_1": 66.8359, "sent_len_max_0": 18.8362, "sent_len_max_1": 188.7488, "stdk": 0.0463, "stdq": 0.0392, "stdqueue_k": 0.0, "step": 85600 }, { "accuracy": 58.2031, "doc_norm": 6.3585, "encoder_q-embeddings": 16595.8574, "encoder_q-layer.0": 12071.2129, "encoder_q-layer.1": 12915.5947, "encoder_q-layer.10": 22606.3789, "encoder_q-layer.11": 44562.5859, "encoder_q-layer.2": 13791.2764, "encoder_q-layer.3": 14045.0693, "encoder_q-layer.4": 14370.9922, "encoder_q-layer.5": 14337.2549, "encoder_q-layer.6": 15191.0127, "encoder_q-layer.7": 16078.8018, "encoder_q-layer.8": 19892.1621, "encoder_q-layer.9": 17443.6387, "epoch": 0.84, "inbatch_neg_score": 38.6115, "inbatch_pos_score": 39.125, "learning_rate": 7.944444444444445e-06, "loss": 2.2081, "norm_diff": 0.0502, "num_tokens_overlap": 5.5783, "num_tokens_union": 54.9182, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28074.8768, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3083, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.727, "sent_len_1": 66.6387, "sent_len_max_0": 18.7475, "sent_len_max_1": 191.3738, "stdk": 0.0463, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 85700 }, { "accuracy": 56.3477, "doc_norm": 6.3596, "encoder_q-embeddings": 18310.8848, "encoder_q-layer.0": 12536.5225, "encoder_q-layer.1": 13040.6689, "encoder_q-layer.10": 21382.2773, "encoder_q-layer.11": 38916.918, "encoder_q-layer.2": 14466.0889, "encoder_q-layer.3": 14678.252, "encoder_q-layer.4": 15113.0781, "encoder_q-layer.5": 15020.2354, "encoder_q-layer.6": 15979.001, "encoder_q-layer.7": 16718.7129, "encoder_q-layer.8": 18411.3164, "encoder_q-layer.9": 17510.7969, "epoch": 0.84, "inbatch_neg_score": 38.609, "inbatch_pos_score": 39.125, "learning_rate": 7.88888888888889e-06, "loss": 2.1898, "norm_diff": 0.0522, "num_tokens_overlap": 5.5728, "num_tokens_union": 55.0504, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27940.4547, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3074, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7022, "sent_len_1": 66.8358, "sent_len_max_0": 18.8238, "sent_len_max_1": 187.85, "stdk": 0.0457, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 85800 }, { "accuracy": 59.2773, "doc_norm": 6.3582, "encoder_q-embeddings": 17137.6348, "encoder_q-layer.0": 11610.7744, "encoder_q-layer.1": 12218.3516, "encoder_q-layer.10": 19477.8164, "encoder_q-layer.11": 39628.3867, "encoder_q-layer.2": 13417.083, "encoder_q-layer.3": 13703.8584, "encoder_q-layer.4": 13892.2158, "encoder_q-layer.5": 13668.8545, "encoder_q-layer.6": 14804.0674, "encoder_q-layer.7": 15184.7393, "encoder_q-layer.8": 17626.6211, "encoder_q-layer.9": 15748.7666, "epoch": 0.84, "inbatch_neg_score": 38.5813, "inbatch_pos_score": 39.125, "learning_rate": 7.833333333333333e-06, "loss": 2.2038, "norm_diff": 0.0518, "num_tokens_overlap": 5.5696, "num_tokens_union": 55.0564, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26474.5074, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3064, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7129, "sent_len_1": 66.8339, "sent_len_max_0": 18.8813, "sent_len_max_1": 189.7413, "stdk": 0.0464, "stdq": 0.039, "stdqueue_k": 0.0, "step": 85900 }, { "accuracy": 59.9609, "doc_norm": 6.3575, "encoder_q-embeddings": 17382.6816, "encoder_q-layer.0": 12139.8398, "encoder_q-layer.1": 12546.5908, "encoder_q-layer.10": 20821.8652, "encoder_q-layer.11": 38117.7031, "encoder_q-layer.2": 13649.2041, "encoder_q-layer.3": 14369.6445, "encoder_q-layer.4": 15214.6055, "encoder_q-layer.5": 14140.9844, "encoder_q-layer.6": 14939.8281, "encoder_q-layer.7": 15386.5254, "encoder_q-layer.8": 17107.8125, "encoder_q-layer.9": 16022.0127, "epoch": 0.84, "inbatch_neg_score": 38.575, "inbatch_pos_score": 39.125, "learning_rate": 7.777777777777777e-06, "loss": 2.1897, "norm_diff": 0.0503, "num_tokens_overlap": 5.5813, "num_tokens_union": 55.0716, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26867.8816, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3071, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7299, "sent_len_1": 66.8567, "sent_len_max_0": 18.9, "sent_len_max_1": 188.0075, "stdk": 0.0469, "stdq": 0.0397, "stdqueue_k": 0.0, "step": 86000 }, { "accuracy": 55.4688, "doc_norm": 6.3563, "encoder_q-embeddings": 19049.0, "encoder_q-layer.0": 12930.1064, "encoder_q-layer.1": 13108.2236, "encoder_q-layer.10": 19716.6113, "encoder_q-layer.11": 36435.832, "encoder_q-layer.2": 14570.1699, "encoder_q-layer.3": 14732.0303, "encoder_q-layer.4": 14759.3447, "encoder_q-layer.5": 14336.8369, "encoder_q-layer.6": 15000.4463, "encoder_q-layer.7": 15775.2178, "encoder_q-layer.8": 17019.0215, "encoder_q-layer.9": 15882.9971, "epoch": 0.84, "inbatch_neg_score": 38.5803, "inbatch_pos_score": 39.0938, "learning_rate": 7.722222222222223e-06, "loss": 2.1388, "norm_diff": 0.0501, "num_tokens_overlap": 5.5852, "num_tokens_union": 55.015, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27051.5907, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3062, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7378, "sent_len_1": 66.7643, "sent_len_max_0": 18.8637, "sent_len_max_1": 191.5062, "stdk": 0.045, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 86100 }, { "accuracy": 57.1289, "doc_norm": 6.3563, "encoder_q-embeddings": 17443.3184, "encoder_q-layer.0": 12305.1406, "encoder_q-layer.1": 13214.9775, "encoder_q-layer.10": 20783.8281, "encoder_q-layer.11": 38369.8047, "encoder_q-layer.2": 14132.4639, "encoder_q-layer.3": 14418.666, "encoder_q-layer.4": 15291.7793, "encoder_q-layer.5": 14698.4033, "encoder_q-layer.6": 15610.6455, "encoder_q-layer.7": 16681.832, "encoder_q-layer.8": 18980.4082, "encoder_q-layer.9": 17085.4668, "epoch": 0.84, "inbatch_neg_score": 38.5764, "inbatch_pos_score": 39.0938, "learning_rate": 7.666666666666667e-06, "loss": 2.1529, "norm_diff": 0.0501, "num_tokens_overlap": 5.5781, "num_tokens_union": 55.0373, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27685.0593, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3062, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7192, "sent_len_1": 66.808, "sent_len_max_0": 18.905, "sent_len_max_1": 189.6275, "stdk": 0.0441, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 86200 }, { "accuracy": 59.1797, "doc_norm": 6.3563, "encoder_q-embeddings": 16602.7578, "encoder_q-layer.0": 11999.6338, "encoder_q-layer.1": 12472.0762, "encoder_q-layer.10": 20845.1211, "encoder_q-layer.11": 41974.2852, "encoder_q-layer.2": 13794.0645, "encoder_q-layer.3": 13862.752, "encoder_q-layer.4": 14090.8584, "encoder_q-layer.5": 13565.2832, "encoder_q-layer.6": 14804.9053, "encoder_q-layer.7": 15674.2881, "encoder_q-layer.8": 18118.0039, "encoder_q-layer.9": 16425.0527, "epoch": 0.84, "inbatch_neg_score": 38.5729, "inbatch_pos_score": 39.0938, "learning_rate": 7.611111111111112e-06, "loss": 2.1442, "norm_diff": 0.0508, "num_tokens_overlap": 5.5774, "num_tokens_union": 54.9717, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27064.6453, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3054, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7261, "sent_len_1": 66.6559, "sent_len_max_0": 18.8538, "sent_len_max_1": 186.0325, "stdk": 0.0466, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 86300 }, { "accuracy": 58.1055, "doc_norm": 6.3557, "encoder_q-embeddings": 17070.9629, "encoder_q-layer.0": 12051.3867, "encoder_q-layer.1": 12621.7139, "encoder_q-layer.10": 20385.9863, "encoder_q-layer.11": 38180.6289, "encoder_q-layer.2": 13617.6328, "encoder_q-layer.3": 14118.4961, "encoder_q-layer.4": 14327.9658, "encoder_q-layer.5": 13666.7129, "encoder_q-layer.6": 14367.8125, "encoder_q-layer.7": 15792.9805, "encoder_q-layer.8": 17443.8496, "encoder_q-layer.9": 16922.582, "epoch": 0.84, "inbatch_neg_score": 38.5651, "inbatch_pos_score": 39.0938, "learning_rate": 7.555555555555556e-06, "loss": 2.1483, "norm_diff": 0.0504, "num_tokens_overlap": 5.5807, "num_tokens_union": 55.0825, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26760.2103, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3053, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7298, "sent_len_1": 66.8526, "sent_len_max_0": 18.8025, "sent_len_max_1": 188.2788, "stdk": 0.0452, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 86400 }, { "accuracy": 57.1289, "doc_norm": 6.357, "encoder_q-embeddings": 17275.2617, "encoder_q-layer.0": 12192.2764, "encoder_q-layer.1": 12743.3701, "encoder_q-layer.10": 19956.4043, "encoder_q-layer.11": 44092.3516, "encoder_q-layer.2": 14199.9697, "encoder_q-layer.3": 14217.7158, "encoder_q-layer.4": 15075.1182, "encoder_q-layer.5": 15383.3789, "encoder_q-layer.6": 15846.3027, "encoder_q-layer.7": 17723.0879, "encoder_q-layer.8": 18481.748, "encoder_q-layer.9": 16917.6074, "epoch": 0.84, "inbatch_neg_score": 38.5693, "inbatch_pos_score": 39.0938, "learning_rate": 7.5e-06, "loss": 2.2235, "norm_diff": 0.0522, "num_tokens_overlap": 5.5614, "num_tokens_union": 55.0593, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28545.7269, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3048, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7096, "sent_len_1": 66.8426, "sent_len_max_0": 18.8463, "sent_len_max_1": 189.6488, "stdk": 0.0464, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 86500 }, { "accuracy": 55.6641, "doc_norm": 6.3564, "encoder_q-embeddings": 17412.2461, "encoder_q-layer.0": 12318.7891, "encoder_q-layer.1": 13033.0303, "encoder_q-layer.10": 23821.0273, "encoder_q-layer.11": 44892.9336, "encoder_q-layer.2": 14395.7393, "encoder_q-layer.3": 14379.0674, "encoder_q-layer.4": 15072.6152, "encoder_q-layer.5": 15109.8369, "encoder_q-layer.6": 15543.6113, "encoder_q-layer.7": 17024.7441, "encoder_q-layer.8": 19504.8848, "encoder_q-layer.9": 18562.7715, "epoch": 0.85, "inbatch_neg_score": 38.5722, "inbatch_pos_score": 39.0938, "learning_rate": 7.444444444444444e-06, "loss": 2.1896, "norm_diff": 0.0507, "num_tokens_overlap": 5.5814, "num_tokens_union": 54.9981, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29095.6062, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3057, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7152, "sent_len_1": 66.7311, "sent_len_max_0": 18.7937, "sent_len_max_1": 188.765, "stdk": 0.0457, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 86600 }, { "accuracy": 61.2305, "doc_norm": 6.3552, "encoder_q-embeddings": 17450.1484, "encoder_q-layer.0": 12659.4893, "encoder_q-layer.1": 13142.9102, "encoder_q-layer.10": 20475.7148, "encoder_q-layer.11": 38322.125, "encoder_q-layer.2": 14349.6475, "encoder_q-layer.3": 14226.2158, "encoder_q-layer.4": 14810.7949, "encoder_q-layer.5": 14153.4678, "encoder_q-layer.6": 15370.0674, "encoder_q-layer.7": 16063.8887, "encoder_q-layer.8": 18502.5703, "encoder_q-layer.9": 16928.2793, "epoch": 0.85, "inbatch_neg_score": 38.5497, "inbatch_pos_score": 39.0938, "learning_rate": 7.38888888888889e-06, "loss": 2.1531, "norm_diff": 0.0501, "num_tokens_overlap": 5.5937, "num_tokens_union": 55.0941, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26961.3195, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3051, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7317, "sent_len_1": 66.9095, "sent_len_max_0": 18.825, "sent_len_max_1": 188.5588, "stdk": 0.047, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 86700 }, { "accuracy": 59.8633, "doc_norm": 6.3595, "encoder_q-embeddings": 16766.9805, "encoder_q-layer.0": 11840.749, "encoder_q-layer.1": 12398.2939, "encoder_q-layer.10": 21051.8145, "encoder_q-layer.11": 39031.2266, "encoder_q-layer.2": 13485.1924, "encoder_q-layer.3": 13403.4355, "encoder_q-layer.4": 13750.4639, "encoder_q-layer.5": 13262.9639, "encoder_q-layer.6": 14274.9912, "encoder_q-layer.7": 15394.1807, "encoder_q-layer.8": 17407.1465, "encoder_q-layer.9": 15740.7148, "epoch": 0.85, "inbatch_neg_score": 38.5552, "inbatch_pos_score": 39.0938, "learning_rate": 7.333333333333334e-06, "loss": 2.1848, "norm_diff": 0.0561, "num_tokens_overlap": 5.5823, "num_tokens_union": 54.871, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26638.8821, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3034, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7286, "sent_len_1": 66.5643, "sent_len_max_0": 18.8362, "sent_len_max_1": 191.3075, "stdk": 0.0463, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 86800 }, { "accuracy": 58.3984, "doc_norm": 6.3539, "encoder_q-embeddings": 17349.0195, "encoder_q-layer.0": 12759.4082, "encoder_q-layer.1": 13134.3076, "encoder_q-layer.10": 22704.8105, "encoder_q-layer.11": 41685.9102, "encoder_q-layer.2": 14438.9941, "encoder_q-layer.3": 14257.6611, "encoder_q-layer.4": 14684.7012, "encoder_q-layer.5": 14479.5967, "encoder_q-layer.6": 15379.1592, "encoder_q-layer.7": 16501.4766, "encoder_q-layer.8": 18823.4473, "encoder_q-layer.9": 17393.1523, "epoch": 0.85, "inbatch_neg_score": 38.5551, "inbatch_pos_score": 39.0938, "learning_rate": 7.277777777777778e-06, "loss": 2.1984, "norm_diff": 0.049, "num_tokens_overlap": 5.5726, "num_tokens_union": 54.8875, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28213.2983, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3049, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7385, "sent_len_1": 66.5218, "sent_len_max_0": 18.8188, "sent_len_max_1": 187.2237, "stdk": 0.0465, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 86900 }, { "accuracy": 58.8867, "doc_norm": 6.3557, "encoder_q-embeddings": 16969.002, "encoder_q-layer.0": 11819.0498, "encoder_q-layer.1": 12156.085, "encoder_q-layer.10": 23666.9121, "encoder_q-layer.11": 42469.0625, "encoder_q-layer.2": 13468.7949, "encoder_q-layer.3": 13701.8799, "encoder_q-layer.4": 14307.0605, "encoder_q-layer.5": 14244.6504, "encoder_q-layer.6": 15398.9658, "encoder_q-layer.7": 18197.6641, "encoder_q-layer.8": 19028.9961, "encoder_q-layer.9": 17485.1816, "epoch": 0.85, "inbatch_neg_score": 38.5404, "inbatch_pos_score": 39.0625, "learning_rate": 7.222222222222222e-06, "loss": 2.1811, "norm_diff": 0.0534, "num_tokens_overlap": 5.59, "num_tokens_union": 55.0172, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28129.0704, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3023, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.732, "sent_len_1": 66.7796, "sent_len_max_0": 18.8, "sent_len_max_1": 189.3225, "stdk": 0.0473, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 87000 }, { "accuracy": 59.7656, "doc_norm": 6.3579, "encoder_q-embeddings": 18483.5742, "encoder_q-layer.0": 12502.6582, "encoder_q-layer.1": 12572.7979, "encoder_q-layer.10": 21424.7891, "encoder_q-layer.11": 41851.2109, "encoder_q-layer.2": 13687.5264, "encoder_q-layer.3": 14150.7529, "encoder_q-layer.4": 14433.6855, "encoder_q-layer.5": 14862.6377, "encoder_q-layer.6": 15745.4834, "encoder_q-layer.7": 18336.0625, "encoder_q-layer.8": 20755.0156, "encoder_q-layer.9": 17522.6426, "epoch": 0.85, "inbatch_neg_score": 38.5264, "inbatch_pos_score": 39.0625, "learning_rate": 7.166666666666667e-06, "loss": 2.1367, "norm_diff": 0.0538, "num_tokens_overlap": 5.5994, "num_tokens_union": 55.0794, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28593.7213, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3041, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7306, "sent_len_1": 66.8893, "sent_len_max_0": 18.8163, "sent_len_max_1": 191.4263, "stdk": 0.0469, "stdq": 0.0396, "stdqueue_k": 0.0, "step": 87100 }, { "accuracy": 59.9609, "doc_norm": 6.3527, "encoder_q-embeddings": 17404.5215, "encoder_q-layer.0": 12515.2842, "encoder_q-layer.1": 12791.1064, "encoder_q-layer.10": 22033.418, "encoder_q-layer.11": 43438.4609, "encoder_q-layer.2": 13739.4834, "encoder_q-layer.3": 14004.5781, "encoder_q-layer.4": 14654.5566, "encoder_q-layer.5": 14313.0566, "encoder_q-layer.6": 15558.3125, "encoder_q-layer.7": 16759.5977, "encoder_q-layer.8": 19737.2598, "encoder_q-layer.9": 17462.1836, "epoch": 0.85, "inbatch_neg_score": 38.5108, "inbatch_pos_score": 39.0625, "learning_rate": 7.111111111111112e-06, "loss": 2.135, "norm_diff": 0.0506, "num_tokens_overlap": 5.5745, "num_tokens_union": 54.955, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28061.8884, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3021, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7004, "sent_len_1": 66.7381, "sent_len_max_0": 18.9125, "sent_len_max_1": 188.5588, "stdk": 0.046, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 87200 }, { "accuracy": 57.7148, "doc_norm": 6.3515, "encoder_q-embeddings": 16661.7012, "encoder_q-layer.0": 11824.1777, "encoder_q-layer.1": 12877.3701, "encoder_q-layer.10": 20907.0, "encoder_q-layer.11": 39379.9297, "encoder_q-layer.2": 13810.8232, "encoder_q-layer.3": 14058.3057, "encoder_q-layer.4": 14269.1719, "encoder_q-layer.5": 13871.9404, "encoder_q-layer.6": 15273.793, "encoder_q-layer.7": 15976.7363, "encoder_q-layer.8": 19652.9531, "encoder_q-layer.9": 16948.834, "epoch": 0.85, "inbatch_neg_score": 38.5054, "inbatch_pos_score": 39.0312, "learning_rate": 7.055555555555556e-06, "loss": 2.1662, "norm_diff": 0.0514, "num_tokens_overlap": 5.583, "num_tokens_union": 54.8718, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27220.3223, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.3002, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7416, "sent_len_1": 66.4989, "sent_len_max_0": 18.9363, "sent_len_max_1": 187.82, "stdk": 0.0466, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 87300 }, { "accuracy": 56.8359, "doc_norm": 6.3523, "encoder_q-embeddings": 17129.2324, "encoder_q-layer.0": 12102.9355, "encoder_q-layer.1": 12534.2334, "encoder_q-layer.10": 23292.2207, "encoder_q-layer.11": 41827.2031, "encoder_q-layer.2": 13837.4355, "encoder_q-layer.3": 13882.1328, "encoder_q-layer.4": 14341.8613, "encoder_q-layer.5": 14645.1172, "encoder_q-layer.6": 15708.998, "encoder_q-layer.7": 16045.5127, "encoder_q-layer.8": 18086.75, "encoder_q-layer.9": 16815.5078, "epoch": 0.85, "inbatch_neg_score": 38.5108, "inbatch_pos_score": 39.0312, "learning_rate": 7.000000000000001e-06, "loss": 2.1856, "norm_diff": 0.0529, "num_tokens_overlap": 5.5771, "num_tokens_union": 54.9631, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27777.3113, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2994, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7103, "sent_len_1": 66.7199, "sent_len_max_0": 18.8188, "sent_len_max_1": 189.2812, "stdk": 0.0457, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 87400 }, { "accuracy": 55.2734, "doc_norm": 6.3513, "encoder_q-embeddings": 18402.0977, "encoder_q-layer.0": 13133.8359, "encoder_q-layer.1": 13405.457, "encoder_q-layer.10": 20694.1797, "encoder_q-layer.11": 41978.7148, "encoder_q-layer.2": 14740.2871, "encoder_q-layer.3": 15092.2764, "encoder_q-layer.4": 15386.6436, "encoder_q-layer.5": 14647.8691, "encoder_q-layer.6": 15436.7031, "encoder_q-layer.7": 16749.8887, "encoder_q-layer.8": 18082.0801, "encoder_q-layer.9": 16602.8223, "epoch": 0.85, "inbatch_neg_score": 38.4877, "inbatch_pos_score": 39.0, "learning_rate": 6.944444444444445e-06, "loss": 2.158, "norm_diff": 0.0511, "num_tokens_overlap": 5.5787, "num_tokens_union": 54.9682, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28333.8091, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.3002, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7181, "sent_len_1": 66.7282, "sent_len_max_0": 18.8337, "sent_len_max_1": 188.4837, "stdk": 0.0469, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 87500 }, { "accuracy": 57.3242, "doc_norm": 6.3432, "encoder_q-embeddings": 17333.9863, "encoder_q-layer.0": 12006.3672, "encoder_q-layer.1": 12633.4102, "encoder_q-layer.10": 19945.0273, "encoder_q-layer.11": 36494.7969, "encoder_q-layer.2": 13662.8125, "encoder_q-layer.3": 14343.8652, "encoder_q-layer.4": 14528.458, "encoder_q-layer.5": 14357.4854, "encoder_q-layer.6": 15456.2754, "encoder_q-layer.7": 16068.2637, "encoder_q-layer.8": 18066.9844, "encoder_q-layer.9": 16869.5938, "epoch": 0.86, "inbatch_neg_score": 38.4693, "inbatch_pos_score": 39.0, "learning_rate": 6.888888888888889e-06, "loss": 2.1592, "norm_diff": 0.0478, "num_tokens_overlap": 5.5716, "num_tokens_union": 54.9738, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26706.7312, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2954, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7095, "sent_len_1": 66.6973, "sent_len_max_0": 18.8912, "sent_len_max_1": 189.7775, "stdk": 0.0457, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 87600 }, { "accuracy": 57.7148, "doc_norm": 6.3485, "encoder_q-embeddings": 17409.002, "encoder_q-layer.0": 12306.291, "encoder_q-layer.1": 13189.5117, "encoder_q-layer.10": 21724.0195, "encoder_q-layer.11": 41184.6211, "encoder_q-layer.2": 14531.3564, "encoder_q-layer.3": 14896.9512, "encoder_q-layer.4": 14815.6074, "encoder_q-layer.5": 15402.5996, "encoder_q-layer.6": 16014.6201, "encoder_q-layer.7": 16736.7305, "encoder_q-layer.8": 19234.8555, "encoder_q-layer.9": 18156.5879, "epoch": 0.86, "inbatch_neg_score": 38.4567, "inbatch_pos_score": 39.0, "learning_rate": 6.833333333333333e-06, "loss": 2.1577, "norm_diff": 0.0515, "num_tokens_overlap": 5.5897, "num_tokens_union": 55.2392, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28396.5283, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.297, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7353, "sent_len_1": 67.1457, "sent_len_max_0": 18.7162, "sent_len_max_1": 189.4125, "stdk": 0.0457, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 87700 }, { "accuracy": 57.0312, "doc_norm": 6.3469, "encoder_q-embeddings": 16855.2148, "encoder_q-layer.0": 12454.7266, "encoder_q-layer.1": 12536.915, "encoder_q-layer.10": 23831.2617, "encoder_q-layer.11": 45926.5586, "encoder_q-layer.2": 13762.4336, "encoder_q-layer.3": 14019.8867, "encoder_q-layer.4": 14536.4092, "encoder_q-layer.5": 14515.3838, "encoder_q-layer.6": 16133.5254, "encoder_q-layer.7": 17463.6406, "encoder_q-layer.8": 21928.1016, "encoder_q-layer.9": 19317.5938, "epoch": 0.86, "inbatch_neg_score": 38.452, "inbatch_pos_score": 38.9688, "learning_rate": 6.777777777777779e-06, "loss": 2.1396, "norm_diff": 0.0519, "num_tokens_overlap": 5.5712, "num_tokens_union": 55.0495, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29013.0372, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.295, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7335, "sent_len_1": 66.8084, "sent_len_max_0": 18.8125, "sent_len_max_1": 189.1087, "stdk": 0.0461, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 87800 }, { "accuracy": 58.9844, "doc_norm": 6.3463, "encoder_q-embeddings": 17035.0879, "encoder_q-layer.0": 12250.0, "encoder_q-layer.1": 12830.6934, "encoder_q-layer.10": 20494.6719, "encoder_q-layer.11": 41299.5352, "encoder_q-layer.2": 13777.8311, "encoder_q-layer.3": 13730.3047, "encoder_q-layer.4": 14326.082, "encoder_q-layer.5": 13855.3242, "encoder_q-layer.6": 14910.8184, "encoder_q-layer.7": 16004.9131, "encoder_q-layer.8": 18134.6445, "encoder_q-layer.9": 16127.7715, "epoch": 0.86, "inbatch_neg_score": 38.4292, "inbatch_pos_score": 38.9688, "learning_rate": 6.722222222222223e-06, "loss": 2.1275, "norm_diff": 0.0509, "num_tokens_overlap": 5.5837, "num_tokens_union": 55.0787, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27383.1986, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2954, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7088, "sent_len_1": 66.8914, "sent_len_max_0": 18.9288, "sent_len_max_1": 189.5213, "stdk": 0.0472, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 87900 }, { "accuracy": 58.3008, "doc_norm": 6.3459, "encoder_q-embeddings": 17791.3555, "encoder_q-layer.0": 12406.7158, "encoder_q-layer.1": 13333.3379, "encoder_q-layer.10": 20336.332, "encoder_q-layer.11": 40813.418, "encoder_q-layer.2": 14297.3604, "encoder_q-layer.3": 14294.1934, "encoder_q-layer.4": 14866.4229, "encoder_q-layer.5": 14804.8193, "encoder_q-layer.6": 15057.6611, "encoder_q-layer.7": 15980.2764, "encoder_q-layer.8": 18385.4375, "encoder_q-layer.9": 16527.4863, "epoch": 0.86, "inbatch_neg_score": 38.4373, "inbatch_pos_score": 38.9688, "learning_rate": 6.666666666666667e-06, "loss": 2.1568, "norm_diff": 0.0508, "num_tokens_overlap": 5.5722, "num_tokens_union": 54.8898, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27662.6282, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2951, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7179, "sent_len_1": 66.5541, "sent_len_max_0": 18.74, "sent_len_max_1": 188.2413, "stdk": 0.0462, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 88000 }, { "accuracy": 59.375, "doc_norm": 6.3449, "encoder_q-embeddings": 17351.1543, "encoder_q-layer.0": 12214.6846, "encoder_q-layer.1": 12527.1582, "encoder_q-layer.10": 19894.1191, "encoder_q-layer.11": 39906.6602, "encoder_q-layer.2": 13649.8262, "encoder_q-layer.3": 13785.6709, "encoder_q-layer.4": 14473.0703, "encoder_q-layer.5": 14016.7715, "encoder_q-layer.6": 15027.835, "encoder_q-layer.7": 16146.0039, "encoder_q-layer.8": 19446.3574, "encoder_q-layer.9": 17189.0254, "epoch": 0.86, "inbatch_neg_score": 38.4222, "inbatch_pos_score": 38.9375, "learning_rate": 6.611111111111111e-06, "loss": 2.187, "norm_diff": 0.0509, "num_tokens_overlap": 5.5957, "num_tokens_union": 55.0021, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27249.0625, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2939, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7413, "sent_len_1": 66.7647, "sent_len_max_0": 18.8637, "sent_len_max_1": 190.4525, "stdk": 0.0456, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 88100 }, { "accuracy": 60.8398, "doc_norm": 6.3468, "encoder_q-embeddings": 16733.9824, "encoder_q-layer.0": 11775.916, "encoder_q-layer.1": 12349.2783, "encoder_q-layer.10": 18984.4531, "encoder_q-layer.11": 36739.0781, "encoder_q-layer.2": 13731.0146, "encoder_q-layer.3": 13591.7119, "encoder_q-layer.4": 13962.2412, "encoder_q-layer.5": 14181.3105, "encoder_q-layer.6": 15734.2012, "encoder_q-layer.7": 17548.7559, "encoder_q-layer.8": 18265.1719, "encoder_q-layer.9": 16330.4004, "epoch": 0.86, "inbatch_neg_score": 38.4072, "inbatch_pos_score": 38.9375, "learning_rate": 6.555555555555556e-06, "loss": 2.1642, "norm_diff": 0.0542, "num_tokens_overlap": 5.5769, "num_tokens_union": 54.9772, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26278.9719, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2926, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.716, "sent_len_1": 66.7136, "sent_len_max_0": 18.815, "sent_len_max_1": 190.4462, "stdk": 0.0462, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 88200 }, { "accuracy": 57.3242, "doc_norm": 6.3465, "encoder_q-embeddings": 17553.8125, "encoder_q-layer.0": 12217.1533, "encoder_q-layer.1": 12726.248, "encoder_q-layer.10": 24530.1504, "encoder_q-layer.11": 46477.3984, "encoder_q-layer.2": 13954.9639, "encoder_q-layer.3": 13914.2891, "encoder_q-layer.4": 15172.4395, "encoder_q-layer.5": 14788.5156, "encoder_q-layer.6": 16280.0996, "encoder_q-layer.7": 19516.3926, "encoder_q-layer.8": 19897.6406, "encoder_q-layer.9": 18472.5859, "epoch": 0.86, "inbatch_neg_score": 38.41, "inbatch_pos_score": 38.9375, "learning_rate": 6.5000000000000004e-06, "loss": 2.2166, "norm_diff": 0.054, "num_tokens_overlap": 5.5753, "num_tokens_union": 55.0502, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29606.5554, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2925, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7333, "sent_len_1": 66.7581, "sent_len_max_0": 18.8863, "sent_len_max_1": 188.465, "stdk": 0.0471, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 88300 }, { "accuracy": 57.6172, "doc_norm": 6.3441, "encoder_q-embeddings": 17055.5293, "encoder_q-layer.0": 12118.8291, "encoder_q-layer.1": 12738.3857, "encoder_q-layer.10": 20191.0977, "encoder_q-layer.11": 38048.6055, "encoder_q-layer.2": 13707.498, "encoder_q-layer.3": 13883.0527, "encoder_q-layer.4": 14526.6162, "encoder_q-layer.5": 14680.3242, "encoder_q-layer.6": 15316.5537, "encoder_q-layer.7": 16549.2598, "encoder_q-layer.8": 17508.3516, "encoder_q-layer.9": 16272.749, "epoch": 0.86, "inbatch_neg_score": 38.4137, "inbatch_pos_score": 38.9375, "learning_rate": 6.4444444444444445e-06, "loss": 2.2251, "norm_diff": 0.0511, "num_tokens_overlap": 5.5692, "num_tokens_union": 54.9112, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26808.1784, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2929, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7313, "sent_len_1": 66.5491, "sent_len_max_0": 18.8487, "sent_len_max_1": 189.8663, "stdk": 0.0449, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 88400 }, { "accuracy": 55.6641, "doc_norm": 6.3419, "encoder_q-embeddings": 17601.8633, "encoder_q-layer.0": 12341.0586, "encoder_q-layer.1": 12897.2275, "encoder_q-layer.10": 21974.2031, "encoder_q-layer.11": 44665.9883, "encoder_q-layer.2": 14435.0762, "encoder_q-layer.3": 14300.3398, "encoder_q-layer.4": 14918.5322, "encoder_q-layer.5": 14734.0732, "encoder_q-layer.6": 15312.9395, "encoder_q-layer.7": 16290.8262, "encoder_q-layer.8": 18918.2305, "encoder_q-layer.9": 17423.916, "epoch": 0.86, "inbatch_neg_score": 38.3905, "inbatch_pos_score": 38.9062, "learning_rate": 6.3888888888888885e-06, "loss": 2.2, "norm_diff": 0.052, "num_tokens_overlap": 5.562, "num_tokens_union": 54.997, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28350.4909, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2899, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6981, "sent_len_1": 66.7792, "sent_len_max_0": 18.8263, "sent_len_max_1": 189.345, "stdk": 0.0466, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 88500 }, { "accuracy": 57.2266, "doc_norm": 6.3401, "encoder_q-embeddings": 16666.2578, "encoder_q-layer.0": 11884.3242, "encoder_q-layer.1": 12336.5342, "encoder_q-layer.10": 21166.0625, "encoder_q-layer.11": 42341.2305, "encoder_q-layer.2": 13575.3799, "encoder_q-layer.3": 13644.6729, "encoder_q-layer.4": 14806.1865, "encoder_q-layer.5": 14101.6963, "encoder_q-layer.6": 15464.1016, "encoder_q-layer.7": 16607.6973, "encoder_q-layer.8": 18850.3203, "encoder_q-layer.9": 17965.9648, "epoch": 0.87, "inbatch_neg_score": 38.3707, "inbatch_pos_score": 38.9062, "learning_rate": 6.333333333333334e-06, "loss": 2.1809, "norm_diff": 0.0513, "num_tokens_overlap": 5.5788, "num_tokens_union": 55.0434, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27683.8513, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2888, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7248, "sent_len_1": 66.8149, "sent_len_max_0": 18.8912, "sent_len_max_1": 190.2113, "stdk": 0.0465, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 88600 }, { "accuracy": 59.7656, "doc_norm": 6.3371, "encoder_q-embeddings": 17265.0977, "encoder_q-layer.0": 12285.0459, "encoder_q-layer.1": 12849.3311, "encoder_q-layer.10": 27003.0371, "encoder_q-layer.11": 44643.1719, "encoder_q-layer.2": 13839.1719, "encoder_q-layer.3": 13724.0527, "encoder_q-layer.4": 14530.2998, "encoder_q-layer.5": 14839.3828, "encoder_q-layer.6": 15591.7949, "encoder_q-layer.7": 16603.8867, "encoder_q-layer.8": 20107.1191, "encoder_q-layer.9": 19135.7676, "epoch": 0.87, "inbatch_neg_score": 38.3741, "inbatch_pos_score": 38.9062, "learning_rate": 6.277777777777778e-06, "loss": 2.1856, "norm_diff": 0.0498, "num_tokens_overlap": 5.5821, "num_tokens_union": 55.0777, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29093.9972, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2874, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7431, "sent_len_1": 66.8955, "sent_len_max_0": 18.9763, "sent_len_max_1": 189.1438, "stdk": 0.0452, "stdq": 0.0375, "stdqueue_k": 0.0, "step": 88700 }, { "accuracy": 58.3008, "doc_norm": 6.3396, "encoder_q-embeddings": 17457.6035, "encoder_q-layer.0": 11924.5459, "encoder_q-layer.1": 12549.6104, "encoder_q-layer.10": 23771.1816, "encoder_q-layer.11": 43304.8711, "encoder_q-layer.2": 13864.0342, "encoder_q-layer.3": 13704.1562, "encoder_q-layer.4": 14368.3682, "encoder_q-layer.5": 14038.9209, "encoder_q-layer.6": 14957.8389, "encoder_q-layer.7": 16279.6367, "encoder_q-layer.8": 18059.7344, "encoder_q-layer.9": 16638.4277, "epoch": 0.87, "inbatch_neg_score": 38.3654, "inbatch_pos_score": 38.9062, "learning_rate": 6.222222222222222e-06, "loss": 2.2353, "norm_diff": 0.0504, "num_tokens_overlap": 5.5768, "num_tokens_union": 54.8504, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28353.0722, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2892, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7122, "sent_len_1": 66.5765, "sent_len_max_0": 18.8513, "sent_len_max_1": 189.1375, "stdk": 0.0462, "stdq": 0.039, "stdqueue_k": 0.0, "step": 88800 }, { "accuracy": 58.0078, "doc_norm": 6.341, "encoder_q-embeddings": 17068.4453, "encoder_q-layer.0": 12072.2803, "encoder_q-layer.1": 12535.251, "encoder_q-layer.10": 21614.9004, "encoder_q-layer.11": 41236.3906, "encoder_q-layer.2": 13641.6475, "encoder_q-layer.3": 13911.7256, "encoder_q-layer.4": 14703.7656, "encoder_q-layer.5": 14323.2578, "encoder_q-layer.6": 15097.3467, "encoder_q-layer.7": 15439.0488, "encoder_q-layer.8": 18640.9707, "encoder_q-layer.9": 16807.3555, "epoch": 0.87, "inbatch_neg_score": 38.3712, "inbatch_pos_score": 38.9062, "learning_rate": 6.166666666666667e-06, "loss": 2.1528, "norm_diff": 0.0513, "num_tokens_overlap": 5.571, "num_tokens_union": 55.0223, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27606.8113, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2897, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7157, "sent_len_1": 66.8251, "sent_len_max_0": 18.9488, "sent_len_max_1": 191.05, "stdk": 0.0467, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 88900 }, { "accuracy": 55.6641, "doc_norm": 6.3403, "encoder_q-embeddings": 19637.4141, "encoder_q-layer.0": 13322.8252, "encoder_q-layer.1": 13717.6992, "encoder_q-layer.10": 23014.9453, "encoder_q-layer.11": 40331.8789, "encoder_q-layer.2": 14917.1953, "encoder_q-layer.3": 14905.1494, "encoder_q-layer.4": 15742.4395, "encoder_q-layer.5": 15028.084, "encoder_q-layer.6": 16703.8574, "encoder_q-layer.7": 17200.4824, "encoder_q-layer.8": 19591.293, "encoder_q-layer.9": 18419.3652, "epoch": 0.87, "inbatch_neg_score": 38.3774, "inbatch_pos_score": 38.875, "learning_rate": 6.111111111111111e-06, "loss": 2.1743, "norm_diff": 0.0504, "num_tokens_overlap": 5.588, "num_tokens_union": 55.0602, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28772.6155, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2898, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7347, "sent_len_1": 66.9083, "sent_len_max_0": 18.9187, "sent_len_max_1": 190.8487, "stdk": 0.047, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 89000 }, { "accuracy": 57.2266, "doc_norm": 6.3429, "encoder_q-embeddings": 17614.6602, "encoder_q-layer.0": 12305.8516, "encoder_q-layer.1": 12631.2422, "encoder_q-layer.10": 23539.2051, "encoder_q-layer.11": 39182.4219, "encoder_q-layer.2": 14450.8311, "encoder_q-layer.3": 13857.6094, "encoder_q-layer.4": 14318.3018, "encoder_q-layer.5": 14884.5303, "encoder_q-layer.6": 15610.376, "encoder_q-layer.7": 17817.3555, "encoder_q-layer.8": 18775.5059, "encoder_q-layer.9": 17995.0469, "epoch": 0.87, "inbatch_neg_score": 38.367, "inbatch_pos_score": 38.9062, "learning_rate": 6.055555555555556e-06, "loss": 2.1789, "norm_diff": 0.0539, "num_tokens_overlap": 5.587, "num_tokens_union": 55.0879, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27892.889, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.289, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.734, "sent_len_1": 66.9409, "sent_len_max_0": 18.78, "sent_len_max_1": 190.2988, "stdk": 0.0458, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 89100 }, { "accuracy": 58.3008, "doc_norm": 6.3424, "encoder_q-embeddings": 17379.6641, "encoder_q-layer.0": 12365.874, "encoder_q-layer.1": 13126.0938, "encoder_q-layer.10": 19343.7852, "encoder_q-layer.11": 38100.8438, "encoder_q-layer.2": 14046.4014, "encoder_q-layer.3": 13894.2207, "encoder_q-layer.4": 14638.6484, "encoder_q-layer.5": 14503.4551, "encoder_q-layer.6": 15635.6875, "encoder_q-layer.7": 16466.8281, "encoder_q-layer.8": 18435.0273, "encoder_q-layer.9": 16689.5605, "epoch": 0.87, "inbatch_neg_score": 38.3575, "inbatch_pos_score": 38.9062, "learning_rate": 6e-06, "loss": 2.1638, "norm_diff": 0.053, "num_tokens_overlap": 5.5768, "num_tokens_union": 54.9889, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26833.5592, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2894, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7228, "sent_len_1": 66.7906, "sent_len_max_0": 18.9662, "sent_len_max_1": 190.4013, "stdk": 0.0465, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 89200 }, { "accuracy": 58.0078, "doc_norm": 6.3384, "encoder_q-embeddings": 17637.1133, "encoder_q-layer.0": 12535.4531, "encoder_q-layer.1": 12783.5273, "encoder_q-layer.10": 23397.8867, "encoder_q-layer.11": 42024.6953, "encoder_q-layer.2": 13806.6055, "encoder_q-layer.3": 14475.4375, "encoder_q-layer.4": 14806.7158, "encoder_q-layer.5": 14140.9678, "encoder_q-layer.6": 15731.7979, "encoder_q-layer.7": 16466.9316, "encoder_q-layer.8": 19358.3359, "encoder_q-layer.9": 18673.4277, "epoch": 0.87, "inbatch_neg_score": 38.3626, "inbatch_pos_score": 38.875, "learning_rate": 5.944444444444445e-06, "loss": 2.1579, "norm_diff": 0.0482, "num_tokens_overlap": 5.5733, "num_tokens_union": 54.9962, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28614.9106, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2902, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7107, "sent_len_1": 66.7522, "sent_len_max_0": 18.81, "sent_len_max_1": 188.785, "stdk": 0.0466, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 89300 }, { "accuracy": 57.7148, "doc_norm": 6.3386, "encoder_q-embeddings": 17076.9688, "encoder_q-layer.0": 11794.8281, "encoder_q-layer.1": 12575.4844, "encoder_q-layer.10": 19932.7871, "encoder_q-layer.11": 38960.7539, "encoder_q-layer.2": 13664.7715, "encoder_q-layer.3": 13195.0879, "encoder_q-layer.4": 14048.3369, "encoder_q-layer.5": 14341.4551, "encoder_q-layer.6": 15108.1084, "encoder_q-layer.7": 16514.3906, "encoder_q-layer.8": 17689.1152, "encoder_q-layer.9": 16477.168, "epoch": 0.87, "inbatch_neg_score": 38.3669, "inbatch_pos_score": 38.875, "learning_rate": 5.888888888888889e-06, "loss": 2.208, "norm_diff": 0.0498, "num_tokens_overlap": 5.5698, "num_tokens_union": 55.0859, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27040.105, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2888, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6827, "sent_len_1": 66.8135, "sent_len_max_0": 18.7613, "sent_len_max_1": 188.525, "stdk": 0.0461, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 89400 }, { "accuracy": 57.4219, "doc_norm": 6.3379, "encoder_q-embeddings": 17973.2793, "encoder_q-layer.0": 12187.5068, "encoder_q-layer.1": 12674.4805, "encoder_q-layer.10": 21288.7109, "encoder_q-layer.11": 41144.4297, "encoder_q-layer.2": 13915.3965, "encoder_q-layer.3": 13935.7402, "encoder_q-layer.4": 13920.6064, "encoder_q-layer.5": 14072.5469, "encoder_q-layer.6": 15259.751, "encoder_q-layer.7": 16310.6084, "encoder_q-layer.8": 17609.4551, "encoder_q-layer.9": 16901.25, "epoch": 0.87, "inbatch_neg_score": 38.3649, "inbatch_pos_score": 38.875, "learning_rate": 5.833333333333334e-06, "loss": 2.1985, "norm_diff": 0.0495, "num_tokens_overlap": 5.5651, "num_tokens_union": 54.8336, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27241.6154, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2884, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6949, "sent_len_1": 66.5464, "sent_len_max_0": 18.9237, "sent_len_max_1": 189.365, "stdk": 0.0448, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 89500 }, { "accuracy": 60.4492, "doc_norm": 6.3401, "encoder_q-embeddings": 16903.1523, "encoder_q-layer.0": 12206.9941, "encoder_q-layer.1": 12990.5215, "encoder_q-layer.10": 22420.5215, "encoder_q-layer.11": 41283.3672, "encoder_q-layer.2": 14561.6162, "encoder_q-layer.3": 13928.9912, "encoder_q-layer.4": 14477.459, "encoder_q-layer.5": 14062.4531, "encoder_q-layer.6": 15188.1836, "encoder_q-layer.7": 16684.8535, "encoder_q-layer.8": 18854.0371, "encoder_q-layer.9": 16899.8281, "epoch": 0.87, "inbatch_neg_score": 38.3609, "inbatch_pos_score": 38.875, "learning_rate": 5.777777777777778e-06, "loss": 2.1835, "norm_diff": 0.0515, "num_tokens_overlap": 5.5754, "num_tokens_union": 54.9986, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27578.3732, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2886, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7042, "sent_len_1": 66.8045, "sent_len_max_0": 18.7025, "sent_len_max_1": 190.775, "stdk": 0.0456, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 89600 }, { "accuracy": 57.9102, "doc_norm": 6.3397, "encoder_q-embeddings": 17773.8652, "encoder_q-layer.0": 12474.6094, "encoder_q-layer.1": 13118.3193, "encoder_q-layer.10": 23422.5039, "encoder_q-layer.11": 42281.8789, "encoder_q-layer.2": 14392.0195, "encoder_q-layer.3": 14572.5117, "encoder_q-layer.4": 15076.2031, "encoder_q-layer.5": 14639.5439, "encoder_q-layer.6": 15952.8867, "encoder_q-layer.7": 17677.3613, "encoder_q-layer.8": 19776.25, "encoder_q-layer.9": 17339.7285, "epoch": 0.88, "inbatch_neg_score": 38.3561, "inbatch_pos_score": 38.875, "learning_rate": 5.722222222222223e-06, "loss": 2.193, "norm_diff": 0.0508, "num_tokens_overlap": 5.5717, "num_tokens_union": 54.9887, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28782.2089, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2888, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7348, "sent_len_1": 66.7108, "sent_len_max_0": 18.8513, "sent_len_max_1": 188.9675, "stdk": 0.0445, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 89700 }, { "accuracy": 58.9844, "doc_norm": 6.3388, "encoder_q-embeddings": 17487.6523, "encoder_q-layer.0": 12241.3604, "encoder_q-layer.1": 12582.7607, "encoder_q-layer.10": 19288.9766, "encoder_q-layer.11": 37589.6875, "encoder_q-layer.2": 13712.1943, "encoder_q-layer.3": 13757.3906, "encoder_q-layer.4": 14521.8916, "encoder_q-layer.5": 14556.0215, "encoder_q-layer.6": 15166.1377, "encoder_q-layer.7": 16542.8535, "encoder_q-layer.8": 17609.1465, "encoder_q-layer.9": 15950.2402, "epoch": 0.88, "inbatch_neg_score": 38.3558, "inbatch_pos_score": 38.875, "learning_rate": 5.666666666666667e-06, "loss": 2.1749, "norm_diff": 0.052, "num_tokens_overlap": 5.5878, "num_tokens_union": 54.9942, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26595.0809, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2868, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7317, "sent_len_1": 66.7175, "sent_len_max_0": 18.92, "sent_len_max_1": 190.2463, "stdk": 0.0476, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 89800 }, { "accuracy": 59.8633, "doc_norm": 6.3391, "encoder_q-embeddings": 17417.4629, "encoder_q-layer.0": 12733.2148, "encoder_q-layer.1": 12944.998, "encoder_q-layer.10": 20173.5215, "encoder_q-layer.11": 37666.2578, "encoder_q-layer.2": 13565.9521, "encoder_q-layer.3": 13512.1309, "encoder_q-layer.4": 13847.6143, "encoder_q-layer.5": 13692.7061, "encoder_q-layer.6": 14346.5381, "encoder_q-layer.7": 15382.5996, "encoder_q-layer.8": 17270.9453, "encoder_q-layer.9": 16093.5654, "epoch": 0.88, "inbatch_neg_score": 38.3389, "inbatch_pos_score": 38.875, "learning_rate": 5.611111111111112e-06, "loss": 2.195, "norm_diff": 0.0518, "num_tokens_overlap": 5.5775, "num_tokens_union": 55.0332, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26487.7789, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2873, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7059, "sent_len_1": 66.8372, "sent_len_max_0": 18.7862, "sent_len_max_1": 189.2413, "stdk": 0.0467, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 89900 }, { "accuracy": 58.3984, "doc_norm": 6.335, "encoder_q-embeddings": 17171.3281, "encoder_q-layer.0": 12061.6875, "encoder_q-layer.1": 12324.8369, "encoder_q-layer.10": 20179.9961, "encoder_q-layer.11": 41096.5508, "encoder_q-layer.2": 13703.0332, "encoder_q-layer.3": 13792.4482, "encoder_q-layer.4": 14498.999, "encoder_q-layer.5": 14578.6445, "encoder_q-layer.6": 15468.0508, "encoder_q-layer.7": 16321.6582, "encoder_q-layer.8": 18711.3203, "encoder_q-layer.9": 16957.1172, "epoch": 0.88, "inbatch_neg_score": 38.3424, "inbatch_pos_score": 38.875, "learning_rate": 5.555555555555556e-06, "loss": 2.1931, "norm_diff": 0.0481, "num_tokens_overlap": 5.5643, "num_tokens_union": 54.9956, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27608.8904, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2869, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7038, "sent_len_1": 66.7422, "sent_len_max_0": 18.7512, "sent_len_max_1": 189.5675, "stdk": 0.0464, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 90000 }, { "dev_runtime": 27.208, "dev_samples_per_second": 2.352, "dev_steps_per_second": 0.037, "epoch": 0.88, "step": 90000, "test_accuracy": 8.6883544921875, "test_doc_norm": 6.327967643737793, "test_inbatch_neg_score": 38.90996551513672, "test_inbatch_pos_score": 39.71258544921875, "test_loss": 4.136532306671143, "test_norm_diff": 0.0020042583346366882, "test_query_norm": 6.327746391296387, "test_queue_k_norm": 0.0, "test_stdk": 0.03785919398069382, "test_stdq": 0.03780497610569, "test_stdqueue_k": 0.0 }, { "dev_runtime": 27.208, "dev_samples_per_second": 2.352, "dev_steps_per_second": 0.037, "epoch": 0.88, "eval_beir-arguana_ndcg@10": 0.36385, "eval_beir-arguana_recall@10": 0.65078, "eval_beir-arguana_recall@100": 0.96302, "eval_beir-arguana_recall@20": 0.81863, "eval_beir-avg_ndcg@10": 0.38864308333333336, "eval_beir-avg_recall@10": 0.4607023333333333, "eval_beir-avg_recall@100": 0.6492880000000001, "eval_beir-avg_recall@20": 0.5297275, "eval_beir-cqadupstack_ndcg@10": 0.3050408333333333, "eval_beir-cqadupstack_recall@10": 0.4079733333333333, "eval_beir-cqadupstack_recall@100": 0.6394799999999999, "eval_beir-cqadupstack_recall@20": 0.47464500000000015, "eval_beir-fiqa_ndcg@10": 0.2796, "eval_beir-fiqa_recall@10": 0.35224, "eval_beir-fiqa_recall@100": 0.6126, "eval_beir-fiqa_recall@20": 0.42243, "eval_beir-nfcorpus_ndcg@10": 0.33906, "eval_beir-nfcorpus_recall@10": 0.1676, "eval_beir-nfcorpus_recall@100": 0.32554, "eval_beir-nfcorpus_recall@20": 0.21141, "eval_beir-nq_ndcg@10": 0.27559, "eval_beir-nq_recall@10": 0.46405, "eval_beir-nq_recall@100": 0.79391, "eval_beir-nq_recall@20": 0.58408, "eval_beir-quora_ndcg@10": 0.78451, "eval_beir-quora_recall@10": 0.88676, "eval_beir-quora_recall@100": 0.97548, "eval_beir-quora_recall@20": 0.92687, "eval_beir-scidocs_ndcg@10": 0.16081, "eval_beir-scidocs_recall@10": 0.16873, "eval_beir-scidocs_recall@100": 0.38908, "eval_beir-scidocs_recall@20": 0.22872, "eval_beir-scifact_ndcg@10": 0.61159, "eval_beir-scifact_recall@10": 0.75667, "eval_beir-scifact_recall@100": 0.91433, "eval_beir-scifact_recall@20": 0.83356, "eval_beir-trec-covid_ndcg@10": 0.5757, "eval_beir-trec-covid_recall@10": 0.614, "eval_beir-trec-covid_recall@100": 0.4372, "eval_beir-trec-covid_recall@20": 0.591, "eval_beir-webis-touche2020_ndcg@10": 0.19068, "eval_beir-webis-touche2020_recall@10": 0.13822, "eval_beir-webis-touche2020_recall@100": 0.44224, "eval_beir-webis-touche2020_recall@20": 0.20593, "eval_senteval-avg_sts": 0.7425354546010247, "eval_senteval-sickr_spearman": 0.7377094468324651, "eval_senteval-stsb_spearman": 0.7473614623695842, "step": 90000, "test_accuracy": 8.6883544921875, "test_doc_norm": 6.327967643737793, "test_inbatch_neg_score": 38.90996551513672, "test_inbatch_pos_score": 39.71258544921875, "test_loss": 4.136532306671143, "test_norm_diff": 0.0020042583346366882, "test_query_norm": 6.327746391296387, "test_queue_k_norm": 0.0, "test_stdk": 0.03785919398069382, "test_stdq": 0.03780497610569, "test_stdqueue_k": 0.0 }, { "accuracy": 57.9102, "doc_norm": 6.3397, "encoder_q-embeddings": 16528.2578, "encoder_q-layer.0": 11816.5508, "encoder_q-layer.1": 12144.1719, "encoder_q-layer.10": 20821.6328, "encoder_q-layer.11": 39491.2852, "encoder_q-layer.2": 13523.8506, "encoder_q-layer.3": 13612.377, "encoder_q-layer.4": 14119.0479, "encoder_q-layer.5": 14279.209, "encoder_q-layer.6": 15074.502, "encoder_q-layer.7": 16841.957, "encoder_q-layer.8": 18198.7227, "encoder_q-layer.9": 16835.1016, "epoch": 0.88, "inbatch_neg_score": 38.3321, "inbatch_pos_score": 38.875, "learning_rate": 5.500000000000001e-06, "loss": 2.1789, "norm_diff": 0.0534, "num_tokens_overlap": 5.5858, "num_tokens_union": 55.0657, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27100.8155, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2863, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7334, "sent_len_1": 66.8817, "sent_len_max_0": 18.96, "sent_len_max_1": 191.0087, "stdk": 0.0473, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 90100 }, { "accuracy": 58.3008, "doc_norm": 6.337, "encoder_q-embeddings": 16306.7578, "encoder_q-layer.0": 11745.3779, "encoder_q-layer.1": 12416.5527, "encoder_q-layer.10": 20946.6445, "encoder_q-layer.11": 38411.3594, "encoder_q-layer.2": 13498.1826, "encoder_q-layer.3": 13601.2266, "encoder_q-layer.4": 14210.1514, "encoder_q-layer.5": 14105.6826, "encoder_q-layer.6": 15053.6562, "encoder_q-layer.7": 16173.6143, "encoder_q-layer.8": 18303.5332, "encoder_q-layer.9": 16680.1133, "epoch": 0.88, "inbatch_neg_score": 38.3172, "inbatch_pos_score": 38.8438, "learning_rate": 5.444444444444445e-06, "loss": 2.153, "norm_diff": 0.0505, "num_tokens_overlap": 5.5757, "num_tokens_union": 54.8859, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26787.325, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2864, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7077, "sent_len_1": 66.6208, "sent_len_max_0": 18.86, "sent_len_max_1": 189.7512, "stdk": 0.0467, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 90200 }, { "accuracy": 55.0781, "doc_norm": 6.339, "encoder_q-embeddings": 17387.2598, "encoder_q-layer.0": 12950.5166, "encoder_q-layer.1": 13241.6104, "encoder_q-layer.10": 20438.8047, "encoder_q-layer.11": 39299.2227, "encoder_q-layer.2": 14339.9648, "encoder_q-layer.3": 14041.832, "encoder_q-layer.4": 14558.6045, "encoder_q-layer.5": 14555.9688, "encoder_q-layer.6": 15987.7705, "encoder_q-layer.7": 16433.7988, "encoder_q-layer.8": 18798.2344, "encoder_q-layer.9": 17090.1016, "epoch": 0.88, "inbatch_neg_score": 38.322, "inbatch_pos_score": 38.8438, "learning_rate": 5.388888888888889e-06, "loss": 2.1377, "norm_diff": 0.053, "num_tokens_overlap": 5.5748, "num_tokens_union": 55.0539, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27534.1356, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.286, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7153, "sent_len_1": 66.8217, "sent_len_max_0": 18.8788, "sent_len_max_1": 189.7138, "stdk": 0.0455, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 90300 }, { "accuracy": 57.7148, "doc_norm": 6.3361, "encoder_q-embeddings": 16195.8281, "encoder_q-layer.0": 11940.9492, "encoder_q-layer.1": 12412.7539, "encoder_q-layer.10": 20381.666, "encoder_q-layer.11": 40134.0508, "encoder_q-layer.2": 13535.7734, "encoder_q-layer.3": 13591.3115, "encoder_q-layer.4": 14299.9131, "encoder_q-layer.5": 13726.4609, "encoder_q-layer.6": 14637.3486, "encoder_q-layer.7": 15421.6807, "encoder_q-layer.8": 17138.3711, "encoder_q-layer.9": 16490.4492, "epoch": 0.88, "inbatch_neg_score": 38.3145, "inbatch_pos_score": 38.8438, "learning_rate": 5.333333333333334e-06, "loss": 2.1792, "norm_diff": 0.0523, "num_tokens_overlap": 5.5737, "num_tokens_union": 54.8824, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26839.6076, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2837, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7034, "sent_len_1": 66.6493, "sent_len_max_0": 18.88, "sent_len_max_1": 191.1875, "stdk": 0.0457, "stdq": 0.0378, "stdqueue_k": 0.0, "step": 90400 }, { "accuracy": 58.3984, "doc_norm": 6.3364, "encoder_q-embeddings": 16983.9902, "encoder_q-layer.0": 11908.7842, "encoder_q-layer.1": 12454.7715, "encoder_q-layer.10": 20073.4121, "encoder_q-layer.11": 43600.1875, "encoder_q-layer.2": 13906.8398, "encoder_q-layer.3": 13704.5537, "encoder_q-layer.4": 14494.8994, "encoder_q-layer.5": 13959.7197, "encoder_q-layer.6": 14667.1777, "encoder_q-layer.7": 16005.7842, "encoder_q-layer.8": 18456.2812, "encoder_q-layer.9": 17373.0859, "epoch": 0.88, "inbatch_neg_score": 38.3092, "inbatch_pos_score": 38.8438, "learning_rate": 5.277777777777778e-06, "loss": 2.1807, "norm_diff": 0.0509, "num_tokens_overlap": 5.5766, "num_tokens_union": 54.8994, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27510.6263, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2855, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7136, "sent_len_1": 66.5408, "sent_len_max_0": 18.8788, "sent_len_max_1": 187.585, "stdk": 0.0468, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 90500 }, { "accuracy": 55.957, "doc_norm": 6.3326, "encoder_q-embeddings": 16097.0596, "encoder_q-layer.0": 11684.9033, "encoder_q-layer.1": 12203.6699, "encoder_q-layer.10": 20537.3203, "encoder_q-layer.11": 41281.2344, "encoder_q-layer.2": 13327.542, "encoder_q-layer.3": 13617.8145, "encoder_q-layer.4": 14477.9111, "encoder_q-layer.5": 14272.6758, "encoder_q-layer.6": 15254.5322, "encoder_q-layer.7": 16808.7363, "encoder_q-layer.8": 18285.4492, "encoder_q-layer.9": 16604.9805, "epoch": 0.88, "inbatch_neg_score": 38.3153, "inbatch_pos_score": 38.8125, "learning_rate": 5.2222222222222226e-06, "loss": 2.1988, "norm_diff": 0.05, "num_tokens_overlap": 5.5748, "num_tokens_union": 54.8305, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27138.2361, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2826, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7255, "sent_len_1": 66.5287, "sent_len_max_0": 18.84, "sent_len_max_1": 190.9425, "stdk": 0.0456, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 90600 }, { "accuracy": 58.1055, "doc_norm": 6.3361, "encoder_q-embeddings": 16499.25, "encoder_q-layer.0": 11912.1133, "encoder_q-layer.1": 12264.207, "encoder_q-layer.10": 20070.3906, "encoder_q-layer.11": 40164.7539, "encoder_q-layer.2": 13360.8438, "encoder_q-layer.3": 13716.2637, "encoder_q-layer.4": 14357.1572, "encoder_q-layer.5": 14003.0723, "encoder_q-layer.6": 14873.1943, "encoder_q-layer.7": 15866.1904, "encoder_q-layer.8": 17010.3516, "encoder_q-layer.9": 16730.5059, "epoch": 0.89, "inbatch_neg_score": 38.3102, "inbatch_pos_score": 38.8438, "learning_rate": 5.166666666666667e-06, "loss": 2.2169, "norm_diff": 0.0522, "num_tokens_overlap": 5.579, "num_tokens_union": 55.0343, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26871.9657, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.284, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7273, "sent_len_1": 66.8119, "sent_len_max_0": 18.82, "sent_len_max_1": 191.4313, "stdk": 0.0475, "stdq": 0.039, "stdqueue_k": 0.0, "step": 90700 }, { "accuracy": 60.3516, "doc_norm": 6.336, "encoder_q-embeddings": 17630.0566, "encoder_q-layer.0": 12477.751, "encoder_q-layer.1": 12929.2744, "encoder_q-layer.10": 19971.2246, "encoder_q-layer.11": 40467.0781, "encoder_q-layer.2": 13954.8574, "encoder_q-layer.3": 14029.5986, "encoder_q-layer.4": 14557.998, "encoder_q-layer.5": 14237.5586, "encoder_q-layer.6": 15322.1172, "encoder_q-layer.7": 15999.502, "encoder_q-layer.8": 17682.957, "encoder_q-layer.9": 16352.2695, "epoch": 0.89, "inbatch_neg_score": 38.2859, "inbatch_pos_score": 38.8125, "learning_rate": 5.1111111111111115e-06, "loss": 2.1643, "norm_diff": 0.0529, "num_tokens_overlap": 5.581, "num_tokens_union": 55.0155, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27307.5253, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2831, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7179, "sent_len_1": 66.7434, "sent_len_max_0": 18.7913, "sent_len_max_1": 188.57, "stdk": 0.0458, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 90800 }, { "accuracy": 56.543, "doc_norm": 6.3329, "encoder_q-embeddings": 16951.2344, "encoder_q-layer.0": 12358.1162, "encoder_q-layer.1": 12633.4111, "encoder_q-layer.10": 21453.207, "encoder_q-layer.11": 40501.2852, "encoder_q-layer.2": 13874.2012, "encoder_q-layer.3": 13971.3047, "encoder_q-layer.4": 14896.4238, "encoder_q-layer.5": 14438.8115, "encoder_q-layer.6": 15523.8252, "encoder_q-layer.7": 17284.625, "encoder_q-layer.8": 19669.0566, "encoder_q-layer.9": 17837.7773, "epoch": 0.89, "inbatch_neg_score": 38.2912, "inbatch_pos_score": 38.8125, "learning_rate": 5.0555555555555555e-06, "loss": 2.1635, "norm_diff": 0.0499, "num_tokens_overlap": 5.5803, "num_tokens_union": 55.0816, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27593.3419, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.283, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.72, "sent_len_1": 66.8892, "sent_len_max_0": 18.8175, "sent_len_max_1": 190.6225, "stdk": 0.0457, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 90900 }, { "accuracy": 55.7617, "doc_norm": 6.333, "encoder_q-embeddings": 18026.8848, "encoder_q-layer.0": 12881.9004, "encoder_q-layer.1": 13160.1738, "encoder_q-layer.10": 23127.3965, "encoder_q-layer.11": 40819.0156, "encoder_q-layer.2": 14572.9287, "encoder_q-layer.3": 14678.0361, "encoder_q-layer.4": 15119.6543, "encoder_q-layer.5": 14595.2607, "encoder_q-layer.6": 15711.001, "encoder_q-layer.7": 17393.1211, "encoder_q-layer.8": 19302.6875, "encoder_q-layer.9": 18140.6816, "epoch": 0.89, "inbatch_neg_score": 38.3073, "inbatch_pos_score": 38.8125, "learning_rate": 5e-06, "loss": 2.1489, "norm_diff": 0.0498, "num_tokens_overlap": 5.5829, "num_tokens_union": 55.2007, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28469.9857, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2832, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7192, "sent_len_1": 67.0321, "sent_len_max_0": 18.9013, "sent_len_max_1": 189.0513, "stdk": 0.044, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 91000 }, { "accuracy": 60.8398, "doc_norm": 6.3388, "encoder_q-embeddings": 16168.3926, "encoder_q-layer.0": 11308.9248, "encoder_q-layer.1": 12010.2217, "encoder_q-layer.10": 20852.1094, "encoder_q-layer.11": 40196.4375, "encoder_q-layer.2": 13229.9707, "encoder_q-layer.3": 13443.1191, "encoder_q-layer.4": 14069.7959, "encoder_q-layer.5": 13821.6836, "encoder_q-layer.6": 14990.623, "encoder_q-layer.7": 16926.0625, "encoder_q-layer.8": 18350.5879, "encoder_q-layer.9": 16527.0938, "epoch": 0.89, "inbatch_neg_score": 38.2764, "inbatch_pos_score": 38.8438, "learning_rate": 4.9444444444444444e-06, "loss": 2.1624, "norm_diff": 0.0538, "num_tokens_overlap": 5.5836, "num_tokens_union": 55.0844, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26660.2391, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.285, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7399, "sent_len_1": 66.8255, "sent_len_max_0": 18.8837, "sent_len_max_1": 189.2287, "stdk": 0.0467, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 91100 }, { "accuracy": 56.4453, "doc_norm": 6.3347, "encoder_q-embeddings": 17865.8867, "encoder_q-layer.0": 12549.6436, "encoder_q-layer.1": 12887.8887, "encoder_q-layer.10": 20178.0625, "encoder_q-layer.11": 40867.25, "encoder_q-layer.2": 13962.7471, "encoder_q-layer.3": 14086.5391, "encoder_q-layer.4": 14769.7646, "encoder_q-layer.5": 14295.1963, "encoder_q-layer.6": 15381.1396, "encoder_q-layer.7": 16233.5566, "encoder_q-layer.8": 18154.4355, "encoder_q-layer.9": 16555.0059, "epoch": 0.89, "inbatch_neg_score": 38.2846, "inbatch_pos_score": 38.8125, "learning_rate": 4.888888888888889e-06, "loss": 2.1966, "norm_diff": 0.051, "num_tokens_overlap": 5.572, "num_tokens_union": 54.9758, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27626.3265, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2838, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7134, "sent_len_1": 66.6995, "sent_len_max_0": 18.8, "sent_len_max_1": 190.415, "stdk": 0.0454, "stdq": 0.0394, "stdqueue_k": 0.0, "step": 91200 }, { "accuracy": 56.0547, "doc_norm": 6.3329, "encoder_q-embeddings": 16350.8535, "encoder_q-layer.0": 11858.9961, "encoder_q-layer.1": 12476.3281, "encoder_q-layer.10": 20930.3047, "encoder_q-layer.11": 40636.625, "encoder_q-layer.2": 13841.0352, "encoder_q-layer.3": 13614.9297, "encoder_q-layer.4": 14663.0918, "encoder_q-layer.5": 14639.7832, "encoder_q-layer.6": 14950.8203, "encoder_q-layer.7": 16218.416, "encoder_q-layer.8": 19481.625, "encoder_q-layer.9": 16601.3789, "epoch": 0.89, "inbatch_neg_score": 38.2824, "inbatch_pos_score": 38.8125, "learning_rate": 4.833333333333333e-06, "loss": 2.194, "norm_diff": 0.0508, "num_tokens_overlap": 5.5738, "num_tokens_union": 54.9999, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27291.8045, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2821, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7147, "sent_len_1": 66.8036, "sent_len_max_0": 18.82, "sent_len_max_1": 189.5075, "stdk": 0.0454, "stdq": 0.038, "stdqueue_k": 0.0, "step": 91300 }, { "accuracy": 56.8359, "doc_norm": 6.3371, "encoder_q-embeddings": 17771.2559, "encoder_q-layer.0": 12571.0928, "encoder_q-layer.1": 13105.127, "encoder_q-layer.10": 25149.7598, "encoder_q-layer.11": 45528.1055, "encoder_q-layer.2": 14140.5518, "encoder_q-layer.3": 14243.7568, "encoder_q-layer.4": 14632.7266, "encoder_q-layer.5": 14734.6816, "encoder_q-layer.6": 16043.3252, "encoder_q-layer.7": 17289.5605, "encoder_q-layer.8": 20328.4082, "encoder_q-layer.9": 18847.8828, "epoch": 0.89, "inbatch_neg_score": 38.2791, "inbatch_pos_score": 38.8125, "learning_rate": 4.777777777777778e-06, "loss": 2.1479, "norm_diff": 0.054, "num_tokens_overlap": 5.5779, "num_tokens_union": 55.1168, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29800.1353, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.283, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7252, "sent_len_1": 66.9463, "sent_len_max_0": 18.855, "sent_len_max_1": 190.905, "stdk": 0.0466, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 91400 }, { "accuracy": 56.25, "doc_norm": 6.3331, "encoder_q-embeddings": 16462.3555, "encoder_q-layer.0": 12098.8594, "encoder_q-layer.1": 12665.7217, "encoder_q-layer.10": 22601.4395, "encoder_q-layer.11": 43371.8242, "encoder_q-layer.2": 13794.2529, "encoder_q-layer.3": 13812.2461, "encoder_q-layer.4": 14611.4307, "encoder_q-layer.5": 14429.3213, "encoder_q-layer.6": 15249.9854, "encoder_q-layer.7": 16539.293, "encoder_q-layer.8": 18232.7031, "encoder_q-layer.9": 17187.8945, "epoch": 0.89, "inbatch_neg_score": 38.2823, "inbatch_pos_score": 38.7812, "learning_rate": 4.722222222222222e-06, "loss": 2.1588, "norm_diff": 0.0505, "num_tokens_overlap": 5.5796, "num_tokens_union": 55.1014, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27649.114, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2826, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7029, "sent_len_1": 66.9524, "sent_len_max_0": 18.8475, "sent_len_max_1": 190.2125, "stdk": 0.0459, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 91500 }, { "accuracy": 56.1523, "doc_norm": 6.333, "encoder_q-embeddings": 17674.5898, "encoder_q-layer.0": 12475.6309, "encoder_q-layer.1": 12901.5615, "encoder_q-layer.10": 21657.3184, "encoder_q-layer.11": 43444.5391, "encoder_q-layer.2": 13893.3701, "encoder_q-layer.3": 14207.2529, "encoder_q-layer.4": 14643.3818, "encoder_q-layer.5": 14232.7637, "encoder_q-layer.6": 15963.5195, "encoder_q-layer.7": 15990.9766, "encoder_q-layer.8": 18644.1758, "encoder_q-layer.9": 17274.3438, "epoch": 0.89, "inbatch_neg_score": 38.2771, "inbatch_pos_score": 38.8125, "learning_rate": 4.666666666666667e-06, "loss": 2.1593, "norm_diff": 0.0508, "num_tokens_overlap": 5.5717, "num_tokens_union": 55.0468, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28286.3934, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2822, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6977, "sent_len_1": 66.905, "sent_len_max_0": 18.9525, "sent_len_max_1": 191.37, "stdk": 0.0457, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 91600 }, { "accuracy": 56.9336, "doc_norm": 6.3324, "encoder_q-embeddings": 17467.1895, "encoder_q-layer.0": 12521.0029, "encoder_q-layer.1": 12957.0352, "encoder_q-layer.10": 20018.7891, "encoder_q-layer.11": 39085.6211, "encoder_q-layer.2": 14051.7344, "encoder_q-layer.3": 13676.873, "encoder_q-layer.4": 14405.1924, "encoder_q-layer.5": 13995.2441, "encoder_q-layer.6": 14832.7168, "encoder_q-layer.7": 16105.8506, "encoder_q-layer.8": 18100.5469, "encoder_q-layer.9": 16590.6699, "epoch": 0.9, "inbatch_neg_score": 38.2694, "inbatch_pos_score": 38.7812, "learning_rate": 4.611111111111111e-06, "loss": 2.1295, "norm_diff": 0.0518, "num_tokens_overlap": 5.5767, "num_tokens_union": 55.1094, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27046.835, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2806, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.744, "sent_len_1": 66.9741, "sent_len_max_0": 19.0813, "sent_len_max_1": 190.4812, "stdk": 0.0458, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 91700 }, { "accuracy": 61.1328, "doc_norm": 6.3341, "encoder_q-embeddings": 16289.9199, "encoder_q-layer.0": 11734.5049, "encoder_q-layer.1": 12026.457, "encoder_q-layer.10": 21428.4629, "encoder_q-layer.11": 39480.9297, "encoder_q-layer.2": 12816.0234, "encoder_q-layer.3": 13074.9951, "encoder_q-layer.4": 13889.7178, "encoder_q-layer.5": 13907.3789, "encoder_q-layer.6": 15742.8105, "encoder_q-layer.7": 16571.3574, "encoder_q-layer.8": 17713.8066, "encoder_q-layer.9": 16881.2031, "epoch": 0.9, "inbatch_neg_score": 38.2504, "inbatch_pos_score": 38.8125, "learning_rate": 4.555555555555556e-06, "loss": 2.1707, "norm_diff": 0.0533, "num_tokens_overlap": 5.5804, "num_tokens_union": 54.9641, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26646.4885, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2808, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7339, "sent_len_1": 66.7073, "sent_len_max_0": 18.905, "sent_len_max_1": 187.9525, "stdk": 0.0462, "stdq": 0.0379, "stdqueue_k": 0.0, "step": 91800 }, { "accuracy": 58.6914, "doc_norm": 6.3325, "encoder_q-embeddings": 18210.7617, "encoder_q-layer.0": 12918.6885, "encoder_q-layer.1": 13245.0322, "encoder_q-layer.10": 20805.1719, "encoder_q-layer.11": 38466.7891, "encoder_q-layer.2": 14707.5625, "encoder_q-layer.3": 14648.5059, "encoder_q-layer.4": 14975.2324, "encoder_q-layer.5": 15068.2939, "encoder_q-layer.6": 15464.9336, "encoder_q-layer.7": 16099.0615, "encoder_q-layer.8": 18432.8828, "encoder_q-layer.9": 17432.5742, "epoch": 0.9, "inbatch_neg_score": 38.2407, "inbatch_pos_score": 38.7812, "learning_rate": 4.5e-06, "loss": 2.168, "norm_diff": 0.0532, "num_tokens_overlap": 5.5741, "num_tokens_union": 54.9367, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27615.8686, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2793, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7049, "sent_len_1": 66.6663, "sent_len_max_0": 18.76, "sent_len_max_1": 187.3787, "stdk": 0.0446, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 91900 }, { "accuracy": 60.7422, "doc_norm": 6.3335, "encoder_q-embeddings": 16414.1445, "encoder_q-layer.0": 11896.0742, "encoder_q-layer.1": 12182.0342, "encoder_q-layer.10": 19866.8594, "encoder_q-layer.11": 37861.9883, "encoder_q-layer.2": 13333.0967, "encoder_q-layer.3": 13257.3877, "encoder_q-layer.4": 13464.8174, "encoder_q-layer.5": 13536.3057, "encoder_q-layer.6": 14624.8711, "encoder_q-layer.7": 15308.5693, "encoder_q-layer.8": 17222.5703, "encoder_q-layer.9": 16463.2793, "epoch": 0.9, "inbatch_neg_score": 38.2345, "inbatch_pos_score": 38.7812, "learning_rate": 4.444444444444445e-06, "loss": 2.163, "norm_diff": 0.0525, "num_tokens_overlap": 5.5813, "num_tokens_union": 55.0443, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26051.0243, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.281, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.732, "sent_len_1": 66.8285, "sent_len_max_0": 18.8188, "sent_len_max_1": 189.045, "stdk": 0.0473, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 92000 }, { "accuracy": 60.4492, "doc_norm": 6.3308, "encoder_q-embeddings": 17216.4316, "encoder_q-layer.0": 12056.0254, "encoder_q-layer.1": 12654.415, "encoder_q-layer.10": 21733.752, "encoder_q-layer.11": 40916.9336, "encoder_q-layer.2": 13760.0469, "encoder_q-layer.3": 14014.291, "encoder_q-layer.4": 14794.6533, "encoder_q-layer.5": 14179.2129, "encoder_q-layer.6": 15714.8125, "encoder_q-layer.7": 16381.1318, "encoder_q-layer.8": 18025.0059, "encoder_q-layer.9": 16966.2871, "epoch": 0.9, "inbatch_neg_score": 38.2467, "inbatch_pos_score": 38.7812, "learning_rate": 4.388888888888889e-06, "loss": 2.1785, "norm_diff": 0.0529, "num_tokens_overlap": 5.5821, "num_tokens_union": 55.0078, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27593.8069, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2779, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7224, "sent_len_1": 66.7827, "sent_len_max_0": 18.83, "sent_len_max_1": 189.9075, "stdk": 0.0462, "stdq": 0.0375, "stdqueue_k": 0.0, "step": 92100 }, { "accuracy": 56.0547, "doc_norm": 6.3313, "encoder_q-embeddings": 17629.9453, "encoder_q-layer.0": 12184.6973, "encoder_q-layer.1": 12721.5615, "encoder_q-layer.10": 20425.1016, "encoder_q-layer.11": 39785.0273, "encoder_q-layer.2": 14280.4316, "encoder_q-layer.3": 14396.0498, "encoder_q-layer.4": 14767.7422, "encoder_q-layer.5": 14459.2637, "encoder_q-layer.6": 14773.2461, "encoder_q-layer.7": 15921.0059, "encoder_q-layer.8": 17726.5234, "encoder_q-layer.9": 16456.7188, "epoch": 0.9, "inbatch_neg_score": 38.2434, "inbatch_pos_score": 38.75, "learning_rate": 4.333333333333334e-06, "loss": 2.1579, "norm_diff": 0.0532, "num_tokens_overlap": 5.5818, "num_tokens_union": 54.93, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27145.8522, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2781, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7249, "sent_len_1": 66.6605, "sent_len_max_0": 18.9337, "sent_len_max_1": 189.8, "stdk": 0.046, "stdq": 0.0375, "stdqueue_k": 0.0, "step": 92200 }, { "accuracy": 59.082, "doc_norm": 6.3308, "encoder_q-embeddings": 24912.5352, "encoder_q-layer.0": 19135.4316, "encoder_q-layer.1": 15715.6982, "encoder_q-layer.10": 21034.5742, "encoder_q-layer.11": 43994.8984, "encoder_q-layer.2": 16417.6445, "encoder_q-layer.3": 14298.0908, "encoder_q-layer.4": 14204.4355, "encoder_q-layer.5": 13985.7793, "encoder_q-layer.6": 14730.3076, "encoder_q-layer.7": 15492.7529, "encoder_q-layer.8": 18406.8047, "encoder_q-layer.9": 17022.7109, "epoch": 0.9, "inbatch_neg_score": 38.2521, "inbatch_pos_score": 38.7812, "learning_rate": 4.277777777777778e-06, "loss": 2.2049, "norm_diff": 0.0512, "num_tokens_overlap": 5.5729, "num_tokens_union": 54.9282, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30890.9499, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2796, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7083, "sent_len_1": 66.7096, "sent_len_max_0": 18.72, "sent_len_max_1": 187.2688, "stdk": 0.0467, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 92300 }, { "accuracy": 57.3242, "doc_norm": 6.3278, "encoder_q-embeddings": 17420.6445, "encoder_q-layer.0": 12255.0078, "encoder_q-layer.1": 12850.1074, "encoder_q-layer.10": 20560.4707, "encoder_q-layer.11": 38637.9219, "encoder_q-layer.2": 13969.5557, "encoder_q-layer.3": 13710.8789, "encoder_q-layer.4": 14119.1426, "encoder_q-layer.5": 13984.5996, "encoder_q-layer.6": 14940.583, "encoder_q-layer.7": 15504.3506, "encoder_q-layer.8": 17620.6016, "encoder_q-layer.9": 15880.5381, "epoch": 0.9, "inbatch_neg_score": 38.2552, "inbatch_pos_score": 38.7812, "learning_rate": 4.222222222222223e-06, "loss": 2.2026, "norm_diff": 0.0501, "num_tokens_overlap": 5.5756, "num_tokens_union": 55.0247, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26918.8052, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2777, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7156, "sent_len_1": 66.8004, "sent_len_max_0": 18.8113, "sent_len_max_1": 188.4375, "stdk": 0.0456, "stdq": 0.0376, "stdqueue_k": 0.0, "step": 92400 }, { "accuracy": 57.2266, "doc_norm": 6.3315, "encoder_q-embeddings": 16627.1133, "encoder_q-layer.0": 11885.9111, "encoder_q-layer.1": 12915.3945, "encoder_q-layer.10": 20792.9785, "encoder_q-layer.11": 41166.3477, "encoder_q-layer.2": 14015.4756, "encoder_q-layer.3": 14437.5439, "encoder_q-layer.4": 14876.418, "encoder_q-layer.5": 14761.9551, "encoder_q-layer.6": 15243.0967, "encoder_q-layer.7": 16332.2285, "encoder_q-layer.8": 17808.4102, "encoder_q-layer.9": 16811.0957, "epoch": 0.9, "inbatch_neg_score": 38.234, "inbatch_pos_score": 38.75, "learning_rate": 4.166666666666667e-06, "loss": 2.1366, "norm_diff": 0.0542, "num_tokens_overlap": 5.574, "num_tokens_union": 54.8098, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27636.779, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2773, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7095, "sent_len_1": 66.4191, "sent_len_max_0": 18.88, "sent_len_max_1": 187.9225, "stdk": 0.0456, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 92500 }, { "accuracy": 57.2266, "doc_norm": 6.3308, "encoder_q-embeddings": 33719.7422, "encoder_q-layer.0": 24513.8906, "encoder_q-layer.1": 25954.6055, "encoder_q-layer.10": 42557.8477, "encoder_q-layer.11": 81203.8672, "encoder_q-layer.2": 27386.5547, "encoder_q-layer.3": 27307.8438, "encoder_q-layer.4": 29001.3145, "encoder_q-layer.5": 29003.5137, "encoder_q-layer.6": 31048.2949, "encoder_q-layer.7": 32258.0352, "encoder_q-layer.8": 36418.207, "encoder_q-layer.9": 34077.4961, "epoch": 0.9, "inbatch_neg_score": 38.2248, "inbatch_pos_score": 38.75, "learning_rate": 4.111111111111112e-06, "loss": 2.155, "norm_diff": 0.0538, "num_tokens_overlap": 5.5816, "num_tokens_union": 54.9647, "postclip_grad_norm": 1.0, "preclip_grad_norm": 54739.525, "preclip_grad_norm_avg": 0.0005, "query_norm": 6.2769, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7222, "sent_len_1": 66.7368, "sent_len_max_0": 18.7638, "sent_len_max_1": 190.5462, "stdk": 0.0457, "stdq": 0.038, "stdqueue_k": 0.0, "step": 92600 }, { "accuracy": 56.7383, "doc_norm": 6.3303, "encoder_q-embeddings": 18305.6543, "encoder_q-layer.0": 12790.8926, "encoder_q-layer.1": 13635.2402, "encoder_q-layer.10": 24238.3184, "encoder_q-layer.11": 45974.0352, "encoder_q-layer.2": 14558.3096, "encoder_q-layer.3": 14859.2324, "encoder_q-layer.4": 15301.1738, "encoder_q-layer.5": 14937.4863, "encoder_q-layer.6": 15504.9014, "encoder_q-layer.7": 16521.5859, "encoder_q-layer.8": 20048.2969, "encoder_q-layer.9": 18178.6035, "epoch": 0.91, "inbatch_neg_score": 38.2127, "inbatch_pos_score": 38.75, "learning_rate": 4.055555555555556e-06, "loss": 2.1587, "norm_diff": 0.0536, "num_tokens_overlap": 5.5911, "num_tokens_union": 55.0088, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29592.6257, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2767, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7426, "sent_len_1": 66.7453, "sent_len_max_0": 18.8813, "sent_len_max_1": 190.495, "stdk": 0.0468, "stdq": 0.039, "stdqueue_k": 0.0, "step": 92700 }, { "accuracy": 58.7891, "doc_norm": 6.3264, "encoder_q-embeddings": 17730.873, "encoder_q-layer.0": 12731.5967, "encoder_q-layer.1": 12951.9219, "encoder_q-layer.10": 21014.6348, "encoder_q-layer.11": 41696.0703, "encoder_q-layer.2": 13922.6797, "encoder_q-layer.3": 13964.4912, "encoder_q-layer.4": 14672.3457, "encoder_q-layer.5": 14336.1279, "encoder_q-layer.6": 15148.0869, "encoder_q-layer.7": 16321.5371, "encoder_q-layer.8": 18208.0977, "encoder_q-layer.9": 16865.3848, "epoch": 0.91, "inbatch_neg_score": 38.1984, "inbatch_pos_score": 38.7188, "learning_rate": 4.000000000000001e-06, "loss": 2.1718, "norm_diff": 0.0526, "num_tokens_overlap": 5.5722, "num_tokens_union": 55.0663, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27987.3643, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2739, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7, "sent_len_1": 66.9336, "sent_len_max_0": 18.8438, "sent_len_max_1": 193.01, "stdk": 0.0459, "stdq": 0.0379, "stdqueue_k": 0.0, "step": 92800 }, { "accuracy": 54.7852, "doc_norm": 6.3272, "encoder_q-embeddings": 16993.248, "encoder_q-layer.0": 12438.0088, "encoder_q-layer.1": 13137.9443, "encoder_q-layer.10": 21221.2812, "encoder_q-layer.11": 39110.6328, "encoder_q-layer.2": 13877.0332, "encoder_q-layer.3": 13583.915, "encoder_q-layer.4": 14436.5312, "encoder_q-layer.5": 14270.0615, "encoder_q-layer.6": 15545.8428, "encoder_q-layer.7": 16904.6289, "encoder_q-layer.8": 17899.2363, "encoder_q-layer.9": 16891.293, "epoch": 0.91, "inbatch_neg_score": 38.1947, "inbatch_pos_score": 38.7188, "learning_rate": 3.944444444444445e-06, "loss": 2.181, "norm_diff": 0.0538, "num_tokens_overlap": 5.5766, "num_tokens_union": 54.987, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27105.7771, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2734, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7011, "sent_len_1": 66.7605, "sent_len_max_0": 18.8813, "sent_len_max_1": 190.0675, "stdk": 0.0463, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 92900 }, { "accuracy": 61.9141, "doc_norm": 6.3312, "encoder_q-embeddings": 17114.6328, "encoder_q-layer.0": 12218.959, "encoder_q-layer.1": 13031.8047, "encoder_q-layer.10": 20474.209, "encoder_q-layer.11": 39199.4531, "encoder_q-layer.2": 14127.8701, "encoder_q-layer.3": 14142.0107, "encoder_q-layer.4": 14625.7314, "encoder_q-layer.5": 14389.9834, "encoder_q-layer.6": 15406.2119, "encoder_q-layer.7": 16464.3262, "encoder_q-layer.8": 18034.0293, "encoder_q-layer.9": 16728.9512, "epoch": 0.91, "inbatch_neg_score": 38.1697, "inbatch_pos_score": 38.7188, "learning_rate": 3.888888888888889e-06, "loss": 2.1222, "norm_diff": 0.0572, "num_tokens_overlap": 5.5761, "num_tokens_union": 55.0704, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27508.3076, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.274, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7258, "sent_len_1": 66.9085, "sent_len_max_0": 18.9488, "sent_len_max_1": 190.9025, "stdk": 0.0469, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 93000 }, { "accuracy": 57.8125, "doc_norm": 6.3241, "encoder_q-embeddings": 17241.8477, "encoder_q-layer.0": 12111.6943, "encoder_q-layer.1": 12440.7783, "encoder_q-layer.10": 20693.9727, "encoder_q-layer.11": 38958.0508, "encoder_q-layer.2": 13588.8721, "encoder_q-layer.3": 13839.834, "encoder_q-layer.4": 14760.2949, "encoder_q-layer.5": 14585.3477, "encoder_q-layer.6": 15463.4824, "encoder_q-layer.7": 16507.2207, "encoder_q-layer.8": 18443.7207, "encoder_q-layer.9": 17112.5898, "epoch": 0.91, "inbatch_neg_score": 38.1739, "inbatch_pos_score": 38.6875, "learning_rate": 3.833333333333334e-06, "loss": 2.146, "norm_diff": 0.0509, "num_tokens_overlap": 5.5907, "num_tokens_union": 55.1451, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27173.117, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2731, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7234, "sent_len_1": 67.0467, "sent_len_max_0": 18.8738, "sent_len_max_1": 190.9425, "stdk": 0.0458, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 93100 }, { "accuracy": 58.4961, "doc_norm": 6.3271, "encoder_q-embeddings": 16637.3398, "encoder_q-layer.0": 11914.3486, "encoder_q-layer.1": 12188.6855, "encoder_q-layer.10": 20980.2852, "encoder_q-layer.11": 42474.9766, "encoder_q-layer.2": 13340.1211, "encoder_q-layer.3": 13335.1484, "encoder_q-layer.4": 13725.1768, "encoder_q-layer.5": 13916.7471, "encoder_q-layer.6": 15144.6211, "encoder_q-layer.7": 16359.3926, "encoder_q-layer.8": 18532.4238, "encoder_q-layer.9": 17195.5098, "epoch": 0.91, "inbatch_neg_score": 38.1636, "inbatch_pos_score": 38.6875, "learning_rate": 3.777777777777778e-06, "loss": 2.1943, "norm_diff": 0.0557, "num_tokens_overlap": 5.5911, "num_tokens_union": 54.9461, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27500.3431, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2714, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7575, "sent_len_1": 66.6715, "sent_len_max_0": 18.8538, "sent_len_max_1": 188.6025, "stdk": 0.0473, "stdq": 0.0379, "stdqueue_k": 0.0, "step": 93200 }, { "accuracy": 60.5469, "doc_norm": 6.328, "encoder_q-embeddings": 16807.4551, "encoder_q-layer.0": 12167.541, "encoder_q-layer.1": 12441.2734, "encoder_q-layer.10": 22840.8984, "encoder_q-layer.11": 41524.2617, "encoder_q-layer.2": 13456.8555, "encoder_q-layer.3": 13891.4365, "encoder_q-layer.4": 14650.0801, "encoder_q-layer.5": 14566.2344, "encoder_q-layer.6": 15872.6455, "encoder_q-layer.7": 16997.3809, "encoder_q-layer.8": 18945.9609, "encoder_q-layer.9": 17729.207, "epoch": 0.91, "inbatch_neg_score": 38.15, "inbatch_pos_score": 38.6875, "learning_rate": 3.722222222222222e-06, "loss": 2.1459, "norm_diff": 0.0544, "num_tokens_overlap": 5.5742, "num_tokens_union": 54.9652, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28145.924, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2736, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7243, "sent_len_1": 66.7095, "sent_len_max_0": 18.8362, "sent_len_max_1": 190.4725, "stdk": 0.0462, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 93300 }, { "accuracy": 57.4219, "doc_norm": 6.3237, "encoder_q-embeddings": 17515.6074, "encoder_q-layer.0": 12323.332, "encoder_q-layer.1": 12790.7275, "encoder_q-layer.10": 19843.9707, "encoder_q-layer.11": 39009.4922, "encoder_q-layer.2": 13734.3408, "encoder_q-layer.3": 14125.2832, "encoder_q-layer.4": 14354.0605, "encoder_q-layer.5": 14229.8701, "encoder_q-layer.6": 15404.2988, "encoder_q-layer.7": 16498.5234, "encoder_q-layer.8": 17848.8613, "encoder_q-layer.9": 16751.082, "epoch": 0.91, "inbatch_neg_score": 38.1591, "inbatch_pos_score": 38.6875, "learning_rate": 3.666666666666667e-06, "loss": 2.163, "norm_diff": 0.0525, "num_tokens_overlap": 5.5739, "num_tokens_union": 54.9636, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27111.6278, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2712, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6974, "sent_len_1": 66.7427, "sent_len_max_0": 18.9462, "sent_len_max_1": 189.4525, "stdk": 0.0456, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 93400 }, { "accuracy": 58.7891, "doc_norm": 6.3264, "encoder_q-embeddings": 17353.9473, "encoder_q-layer.0": 12433.4551, "encoder_q-layer.1": 12998.3477, "encoder_q-layer.10": 25157.0254, "encoder_q-layer.11": 42000.3398, "encoder_q-layer.2": 13951.8955, "encoder_q-layer.3": 13692.792, "encoder_q-layer.4": 13756.4922, "encoder_q-layer.5": 13698.0645, "encoder_q-layer.6": 14812.1094, "encoder_q-layer.7": 15713.6289, "encoder_q-layer.8": 18538.7715, "encoder_q-layer.9": 18013.2656, "epoch": 0.91, "inbatch_neg_score": 38.144, "inbatch_pos_score": 38.6875, "learning_rate": 3.611111111111111e-06, "loss": 2.1753, "norm_diff": 0.0543, "num_tokens_overlap": 5.5915, "num_tokens_union": 55.0525, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28085.8946, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2721, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7487, "sent_len_1": 66.8167, "sent_len_max_0": 18.8925, "sent_len_max_1": 191.0863, "stdk": 0.0466, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 93500 }, { "accuracy": 58.6914, "doc_norm": 6.3272, "encoder_q-embeddings": 17285.4746, "encoder_q-layer.0": 12368.2812, "encoder_q-layer.1": 13070.2744, "encoder_q-layer.10": 19871.8809, "encoder_q-layer.11": 38581.5078, "encoder_q-layer.2": 14114.4863, "encoder_q-layer.3": 14147.21, "encoder_q-layer.4": 14244.1699, "encoder_q-layer.5": 14123.46, "encoder_q-layer.6": 15108.6445, "encoder_q-layer.7": 15961.4678, "encoder_q-layer.8": 17328.418, "encoder_q-layer.9": 16045.3389, "epoch": 0.91, "inbatch_neg_score": 38.149, "inbatch_pos_score": 38.6875, "learning_rate": 3.555555555555556e-06, "loss": 2.1545, "norm_diff": 0.0562, "num_tokens_overlap": 5.5941, "num_tokens_union": 55.0452, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26815.3752, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.271, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7358, "sent_len_1": 66.8443, "sent_len_max_0": 18.8425, "sent_len_max_1": 188.5362, "stdk": 0.0474, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 93600 }, { "accuracy": 57.7148, "doc_norm": 6.3242, "encoder_q-embeddings": 17416.418, "encoder_q-layer.0": 12264.417, "encoder_q-layer.1": 12496.665, "encoder_q-layer.10": 20278.2422, "encoder_q-layer.11": 39577.8984, "encoder_q-layer.2": 13678.1592, "encoder_q-layer.3": 13718.2168, "encoder_q-layer.4": 13975.0596, "encoder_q-layer.5": 13743.1641, "encoder_q-layer.6": 14457.6221, "encoder_q-layer.7": 15668.1289, "encoder_q-layer.8": 17916.4492, "encoder_q-layer.9": 16995.8262, "epoch": 0.91, "inbatch_neg_score": 38.1446, "inbatch_pos_score": 38.6875, "learning_rate": 3.5000000000000004e-06, "loss": 2.1866, "norm_diff": 0.0535, "num_tokens_overlap": 5.5772, "num_tokens_union": 55.1286, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26835.4655, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2707, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6946, "sent_len_1": 66.9439, "sent_len_max_0": 18.7913, "sent_len_max_1": 187.8487, "stdk": 0.0471, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 93700 }, { "accuracy": 55.5664, "doc_norm": 6.3253, "encoder_q-embeddings": 16900.6348, "encoder_q-layer.0": 11894.4053, "encoder_q-layer.1": 12613.4658, "encoder_q-layer.10": 23306.832, "encoder_q-layer.11": 40809.293, "encoder_q-layer.2": 13933.5098, "encoder_q-layer.3": 13861.8438, "encoder_q-layer.4": 14464.6006, "encoder_q-layer.5": 14377.041, "encoder_q-layer.6": 15516.1787, "encoder_q-layer.7": 16037.8896, "encoder_q-layer.8": 20165.7031, "encoder_q-layer.9": 18030.502, "epoch": 0.92, "inbatch_neg_score": 38.1473, "inbatch_pos_score": 38.6562, "learning_rate": 3.4444444444444444e-06, "loss": 2.1624, "norm_diff": 0.0548, "num_tokens_overlap": 5.5784, "num_tokens_union": 55.0887, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27936.8823, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2705, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.714, "sent_len_1": 66.9226, "sent_len_max_0": 18.7675, "sent_len_max_1": 190.2612, "stdk": 0.0446, "stdq": 0.0379, "stdqueue_k": 0.0, "step": 93800 }, { "accuracy": 57.1289, "doc_norm": 6.3235, "encoder_q-embeddings": 17494.4785, "encoder_q-layer.0": 12179.626, "encoder_q-layer.1": 12585.4951, "encoder_q-layer.10": 19499.5273, "encoder_q-layer.11": 38282.668, "encoder_q-layer.2": 13804.9141, "encoder_q-layer.3": 13600.6191, "encoder_q-layer.4": 13951.083, "encoder_q-layer.5": 14083.54, "encoder_q-layer.6": 15296.0908, "encoder_q-layer.7": 15527.418, "encoder_q-layer.8": 18144.5586, "encoder_q-layer.9": 16259.1416, "epoch": 0.92, "inbatch_neg_score": 38.1391, "inbatch_pos_score": 38.6875, "learning_rate": 3.3888888888888893e-06, "loss": 2.1671, "norm_diff": 0.0525, "num_tokens_overlap": 5.5863, "num_tokens_union": 55.0516, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26762.2183, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.271, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7109, "sent_len_1": 66.8474, "sent_len_max_0": 18.8038, "sent_len_max_1": 188.0875, "stdk": 0.046, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 93900 }, { "accuracy": 56.25, "doc_norm": 6.3264, "encoder_q-embeddings": 17355.9316, "encoder_q-layer.0": 12477.5039, "encoder_q-layer.1": 12810.7725, "encoder_q-layer.10": 19832.4922, "encoder_q-layer.11": 38824.6992, "encoder_q-layer.2": 14129.7285, "encoder_q-layer.3": 13907.46, "encoder_q-layer.4": 14798.7959, "encoder_q-layer.5": 14332.6953, "encoder_q-layer.6": 15099.8691, "encoder_q-layer.7": 16217.0195, "encoder_q-layer.8": 18719.8516, "encoder_q-layer.9": 16375.0234, "epoch": 0.92, "inbatch_neg_score": 38.1474, "inbatch_pos_score": 38.6562, "learning_rate": 3.3333333333333333e-06, "loss": 2.1533, "norm_diff": 0.0556, "num_tokens_overlap": 5.5845, "num_tokens_union": 54.9541, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27160.2569, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2708, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6887, "sent_len_1": 66.7499, "sent_len_max_0": 18.7812, "sent_len_max_1": 190.7025, "stdk": 0.0468, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 94000 }, { "accuracy": 59.375, "doc_norm": 6.3234, "encoder_q-embeddings": 16569.416, "encoder_q-layer.0": 11680.3936, "encoder_q-layer.1": 12073.4229, "encoder_q-layer.10": 20840.8008, "encoder_q-layer.11": 41292.9062, "encoder_q-layer.2": 13715.1387, "encoder_q-layer.3": 13538.9355, "encoder_q-layer.4": 14102.1807, "encoder_q-layer.5": 13406.7568, "encoder_q-layer.6": 13920.4004, "encoder_q-layer.7": 15294.7822, "encoder_q-layer.8": 17312.1582, "encoder_q-layer.9": 16687.2402, "epoch": 0.92, "inbatch_neg_score": 38.1426, "inbatch_pos_score": 38.6875, "learning_rate": 3.277777777777778e-06, "loss": 2.2008, "norm_diff": 0.0537, "num_tokens_overlap": 5.5801, "num_tokens_union": 55.0755, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26445.4437, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2698, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7313, "sent_len_1": 66.8744, "sent_len_max_0": 18.8412, "sent_len_max_1": 188.5675, "stdk": 0.0464, "stdq": 0.0379, "stdqueue_k": 0.0, "step": 94100 }, { "accuracy": 57.3242, "doc_norm": 6.3241, "encoder_q-embeddings": 17559.2656, "encoder_q-layer.0": 12753.6133, "encoder_q-layer.1": 13189.3213, "encoder_q-layer.10": 20643.5566, "encoder_q-layer.11": 39757.5938, "encoder_q-layer.2": 14438.2734, "encoder_q-layer.3": 14322.8652, "encoder_q-layer.4": 14751.3057, "encoder_q-layer.5": 14473.6768, "encoder_q-layer.6": 15679.3086, "encoder_q-layer.7": 16121.0098, "encoder_q-layer.8": 17371.3535, "encoder_q-layer.9": 17215.0293, "epoch": 0.92, "inbatch_neg_score": 38.1324, "inbatch_pos_score": 38.6562, "learning_rate": 3.2222222222222222e-06, "loss": 2.1554, "norm_diff": 0.0534, "num_tokens_overlap": 5.5849, "num_tokens_union": 55.1141, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27596.7221, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2707, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7257, "sent_len_1": 66.8851, "sent_len_max_0": 18.8538, "sent_len_max_1": 188.9263, "stdk": 0.0462, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 94200 }, { "accuracy": 55.1758, "doc_norm": 6.3211, "encoder_q-embeddings": 16787.9902, "encoder_q-layer.0": 12002.1924, "encoder_q-layer.1": 12790.1289, "encoder_q-layer.10": 22543.1172, "encoder_q-layer.11": 45450.9023, "encoder_q-layer.2": 14187.1055, "encoder_q-layer.3": 14211.3193, "encoder_q-layer.4": 14796.2568, "encoder_q-layer.5": 14621.3916, "encoder_q-layer.6": 15670.1992, "encoder_q-layer.7": 17459.2012, "encoder_q-layer.8": 19113.3555, "encoder_q-layer.9": 17943.9043, "epoch": 0.92, "inbatch_neg_score": 38.1368, "inbatch_pos_score": 38.6562, "learning_rate": 3.166666666666667e-06, "loss": 2.1709, "norm_diff": 0.0505, "num_tokens_overlap": 5.5871, "num_tokens_union": 55.0435, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28830.7237, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2707, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7292, "sent_len_1": 66.8463, "sent_len_max_0": 18.96, "sent_len_max_1": 189.7125, "stdk": 0.046, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 94300 }, { "accuracy": 60.1562, "doc_norm": 6.3245, "encoder_q-embeddings": 16291.9307, "encoder_q-layer.0": 11712.8105, "encoder_q-layer.1": 12099.5557, "encoder_q-layer.10": 19627.5176, "encoder_q-layer.11": 41416.9023, "encoder_q-layer.2": 13363.1318, "encoder_q-layer.3": 13627.8945, "encoder_q-layer.4": 13813.9941, "encoder_q-layer.5": 13663.126, "encoder_q-layer.6": 14783.8311, "encoder_q-layer.7": 15525.8389, "encoder_q-layer.8": 17487.373, "encoder_q-layer.9": 16031.1172, "epoch": 0.92, "inbatch_neg_score": 38.1312, "inbatch_pos_score": 38.6875, "learning_rate": 3.111111111111111e-06, "loss": 2.1376, "norm_diff": 0.0543, "num_tokens_overlap": 5.5873, "num_tokens_union": 54.8576, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26443.3386, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2702, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7178, "sent_len_1": 66.5724, "sent_len_max_0": 18.78, "sent_len_max_1": 190.7775, "stdk": 0.0469, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 94400 }, { "accuracy": 58.4961, "doc_norm": 6.3213, "encoder_q-embeddings": 17173.2656, "encoder_q-layer.0": 12158.1943, "encoder_q-layer.1": 12928.0479, "encoder_q-layer.10": 20243.3516, "encoder_q-layer.11": 39250.1211, "encoder_q-layer.2": 13729.6152, "encoder_q-layer.3": 13953.5283, "encoder_q-layer.4": 14171.4131, "encoder_q-layer.5": 14493.5078, "encoder_q-layer.6": 14982.4414, "encoder_q-layer.7": 16454.4219, "encoder_q-layer.8": 17797.3086, "encoder_q-layer.9": 16547.707, "epoch": 0.92, "inbatch_neg_score": 38.1283, "inbatch_pos_score": 38.625, "learning_rate": 3.0555555555555556e-06, "loss": 2.1707, "norm_diff": 0.052, "num_tokens_overlap": 5.5762, "num_tokens_union": 55.0473, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27010.3086, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2692, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7137, "sent_len_1": 66.8753, "sent_len_max_0": 18.8137, "sent_len_max_1": 192.9475, "stdk": 0.0466, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 94500 }, { "accuracy": 58.3984, "doc_norm": 6.3252, "encoder_q-embeddings": 17598.0957, "encoder_q-layer.0": 12495.291, "encoder_q-layer.1": 12925.2363, "encoder_q-layer.10": 20069.877, "encoder_q-layer.11": 39513.4805, "encoder_q-layer.2": 13872.6445, "encoder_q-layer.3": 14050.2891, "encoder_q-layer.4": 14898.8604, "encoder_q-layer.5": 14644.3076, "encoder_q-layer.6": 15509.9746, "encoder_q-layer.7": 16398.1055, "encoder_q-layer.8": 18058.3418, "encoder_q-layer.9": 16975.5156, "epoch": 0.92, "inbatch_neg_score": 38.1082, "inbatch_pos_score": 38.625, "learning_rate": 3e-06, "loss": 2.1415, "norm_diff": 0.057, "num_tokens_overlap": 5.5814, "num_tokens_union": 55.1276, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27274.8817, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2682, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.726, "sent_len_1": 66.9387, "sent_len_max_0": 18.7375, "sent_len_max_1": 189.8212, "stdk": 0.0466, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 94600 }, { "accuracy": 59.082, "doc_norm": 6.3216, "encoder_q-embeddings": 17423.9648, "encoder_q-layer.0": 11940.9062, "encoder_q-layer.1": 12436.9238, "encoder_q-layer.10": 21473.9238, "encoder_q-layer.11": 40215.5, "encoder_q-layer.2": 13350.1611, "encoder_q-layer.3": 13748.5449, "encoder_q-layer.4": 13901.8564, "encoder_q-layer.5": 13902.9629, "encoder_q-layer.6": 14689.1865, "encoder_q-layer.7": 15442.1445, "encoder_q-layer.8": 17780.8691, "encoder_q-layer.9": 16970.9805, "epoch": 0.92, "inbatch_neg_score": 38.1187, "inbatch_pos_score": 38.6562, "learning_rate": 2.9444444444444445e-06, "loss": 2.1417, "norm_diff": 0.0522, "num_tokens_overlap": 5.5741, "num_tokens_union": 54.8818, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27294.0096, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2694, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7332, "sent_len_1": 66.4898, "sent_len_max_0": 18.8637, "sent_len_max_1": 187.4, "stdk": 0.0453, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 94700 }, { "accuracy": 58.0078, "doc_norm": 6.3214, "encoder_q-embeddings": 17310.541, "encoder_q-layer.0": 12394.6123, "encoder_q-layer.1": 13041.7227, "encoder_q-layer.10": 21682.377, "encoder_q-layer.11": 39847.6953, "encoder_q-layer.2": 14106.71, "encoder_q-layer.3": 13809.0312, "encoder_q-layer.4": 14803.7021, "encoder_q-layer.5": 14945.8906, "encoder_q-layer.6": 16223.5957, "encoder_q-layer.7": 16887.9062, "encoder_q-layer.8": 17665.5566, "encoder_q-layer.9": 16937.8477, "epoch": 0.93, "inbatch_neg_score": 38.1132, "inbatch_pos_score": 38.625, "learning_rate": 2.888888888888889e-06, "loss": 2.1587, "norm_diff": 0.0531, "num_tokens_overlap": 5.5764, "num_tokens_union": 55.0583, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27639.3935, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2683, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7123, "sent_len_1": 66.8566, "sent_len_max_0": 18.8075, "sent_len_max_1": 188.8587, "stdk": 0.0452, "stdq": 0.0379, "stdqueue_k": 0.0, "step": 94800 }, { "accuracy": 57.6172, "doc_norm": 6.3208, "encoder_q-embeddings": 18462.625, "encoder_q-layer.0": 13211.8652, "encoder_q-layer.1": 13680.0869, "encoder_q-layer.10": 21741.4004, "encoder_q-layer.11": 43889.5078, "encoder_q-layer.2": 14728.6768, "encoder_q-layer.3": 14840.7734, "encoder_q-layer.4": 15003.2432, "encoder_q-layer.5": 15101.9775, "encoder_q-layer.6": 16072.9629, "encoder_q-layer.7": 16640.9844, "encoder_q-layer.8": 19967.0469, "encoder_q-layer.9": 17466.9941, "epoch": 0.93, "inbatch_neg_score": 38.1003, "inbatch_pos_score": 38.625, "learning_rate": 2.8333333333333335e-06, "loss": 2.158, "norm_diff": 0.0531, "num_tokens_overlap": 5.5847, "num_tokens_union": 55.1435, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29189.5154, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2676, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7003, "sent_len_1": 66.9915, "sent_len_max_0": 18.8125, "sent_len_max_1": 189.5387, "stdk": 0.0472, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 94900 }, { "accuracy": 54.9805, "doc_norm": 6.32, "encoder_q-embeddings": 18669.2207, "encoder_q-layer.0": 12727.6445, "encoder_q-layer.1": 13188.6709, "encoder_q-layer.10": 22697.8613, "encoder_q-layer.11": 43450.8945, "encoder_q-layer.2": 14162.5508, "encoder_q-layer.3": 14292.5654, "encoder_q-layer.4": 14917.8281, "encoder_q-layer.5": 14996.3203, "encoder_q-layer.6": 16154.4824, "encoder_q-layer.7": 16976.2402, "encoder_q-layer.8": 19457.9004, "encoder_q-layer.9": 18201.2363, "epoch": 0.93, "inbatch_neg_score": 38.0957, "inbatch_pos_score": 38.625, "learning_rate": 2.777777777777778e-06, "loss": 2.1453, "norm_diff": 0.0531, "num_tokens_overlap": 5.5892, "num_tokens_union": 55.1373, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28793.1949, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.267, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.733, "sent_len_1": 66.9579, "sent_len_max_0": 18.8612, "sent_len_max_1": 191.4712, "stdk": 0.046, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 95000 }, { "accuracy": 58.7891, "doc_norm": 6.3185, "encoder_q-embeddings": 17198.0391, "encoder_q-layer.0": 12235.4834, "encoder_q-layer.1": 12738.9385, "encoder_q-layer.10": 22181.6016, "encoder_q-layer.11": 41573.1602, "encoder_q-layer.2": 13558.8975, "encoder_q-layer.3": 13687.4775, "encoder_q-layer.4": 13971.8926, "encoder_q-layer.5": 13726.8516, "encoder_q-layer.6": 15691.1826, "encoder_q-layer.7": 17076.3066, "encoder_q-layer.8": 19003.7637, "encoder_q-layer.9": 17938.3262, "epoch": 0.93, "inbatch_neg_score": 38.0838, "inbatch_pos_score": 38.625, "learning_rate": 2.7222222222222224e-06, "loss": 2.1526, "norm_diff": 0.0521, "num_tokens_overlap": 5.5737, "num_tokens_union": 55.1204, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28402.4118, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2664, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7031, "sent_len_1": 66.9834, "sent_len_max_0": 18.9425, "sent_len_max_1": 190.8075, "stdk": 0.0463, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 95100 }, { "accuracy": 55.957, "doc_norm": 6.3201, "encoder_q-embeddings": 17548.7969, "encoder_q-layer.0": 12406.2363, "encoder_q-layer.1": 13022.1201, "encoder_q-layer.10": 20852.7129, "encoder_q-layer.11": 43565.9531, "encoder_q-layer.2": 14204.6973, "encoder_q-layer.3": 14072.9443, "encoder_q-layer.4": 14896.5127, "encoder_q-layer.5": 14434.0303, "encoder_q-layer.6": 15360.4414, "encoder_q-layer.7": 16287.8066, "encoder_q-layer.8": 19075.3535, "encoder_q-layer.9": 16858.2441, "epoch": 0.93, "inbatch_neg_score": 38.0812, "inbatch_pos_score": 38.5938, "learning_rate": 2.666666666666667e-06, "loss": 2.1735, "norm_diff": 0.0558, "num_tokens_overlap": 5.5892, "num_tokens_union": 55.0829, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28295.069, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2643, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7114, "sent_len_1": 66.9052, "sent_len_max_0": 18.805, "sent_len_max_1": 188.5225, "stdk": 0.0455, "stdq": 0.038, "stdqueue_k": 0.0, "step": 95200 }, { "accuracy": 59.2773, "doc_norm": 6.3184, "encoder_q-embeddings": 16713.9551, "encoder_q-layer.0": 12197.1025, "encoder_q-layer.1": 12797.3574, "encoder_q-layer.10": 21063.6523, "encoder_q-layer.11": 39268.7891, "encoder_q-layer.2": 13890.8916, "encoder_q-layer.3": 13633.6836, "encoder_q-layer.4": 14167.1025, "encoder_q-layer.5": 13936.6973, "encoder_q-layer.6": 14651.8887, "encoder_q-layer.7": 15754.4072, "encoder_q-layer.8": 19758.6895, "encoder_q-layer.9": 16486.1797, "epoch": 0.93, "inbatch_neg_score": 38.0788, "inbatch_pos_score": 38.625, "learning_rate": 2.6111111111111113e-06, "loss": 2.135, "norm_diff": 0.052, "num_tokens_overlap": 5.5683, "num_tokens_union": 54.9657, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27054.7669, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2664, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.693, "sent_len_1": 66.7184, "sent_len_max_0": 18.8663, "sent_len_max_1": 188.07, "stdk": 0.0464, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 95300 }, { "accuracy": 58.6914, "doc_norm": 6.32, "encoder_q-embeddings": 17579.5215, "encoder_q-layer.0": 12422.8652, "encoder_q-layer.1": 12906.3262, "encoder_q-layer.10": 19826.1562, "encoder_q-layer.11": 39360.9102, "encoder_q-layer.2": 14023.6797, "encoder_q-layer.3": 14307.7773, "encoder_q-layer.4": 14553.0645, "encoder_q-layer.5": 14527.5371, "encoder_q-layer.6": 15680.3115, "encoder_q-layer.7": 16174.292, "encoder_q-layer.8": 18566.1289, "encoder_q-layer.9": 16717.959, "epoch": 0.93, "inbatch_neg_score": 38.0778, "inbatch_pos_score": 38.625, "learning_rate": 2.5555555555555557e-06, "loss": 2.1504, "norm_diff": 0.0537, "num_tokens_overlap": 5.59, "num_tokens_union": 55.0212, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27260.2421, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2663, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7173, "sent_len_1": 66.798, "sent_len_max_0": 18.81, "sent_len_max_1": 187.2225, "stdk": 0.0467, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 95400 }, { "accuracy": 59.5703, "doc_norm": 6.3176, "encoder_q-embeddings": 16326.5596, "encoder_q-layer.0": 11906.7588, "encoder_q-layer.1": 12418.2832, "encoder_q-layer.10": 20651.6465, "encoder_q-layer.11": 42207.9297, "encoder_q-layer.2": 13463.8848, "encoder_q-layer.3": 13431.1807, "encoder_q-layer.4": 13939.7617, "encoder_q-layer.5": 13742.9404, "encoder_q-layer.6": 14552.2402, "encoder_q-layer.7": 15576.0527, "encoder_q-layer.8": 17334.1133, "encoder_q-layer.9": 15960.4639, "epoch": 0.93, "inbatch_neg_score": 38.0803, "inbatch_pos_score": 38.5938, "learning_rate": 2.5e-06, "loss": 2.1965, "norm_diff": 0.0519, "num_tokens_overlap": 5.5736, "num_tokens_union": 54.9951, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26874.6916, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2656, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7082, "sent_len_1": 66.7517, "sent_len_max_0": 18.9225, "sent_len_max_1": 189.2012, "stdk": 0.0465, "stdq": 0.038, "stdqueue_k": 0.0, "step": 95500 }, { "accuracy": 60.0586, "doc_norm": 6.3191, "encoder_q-embeddings": 17978.4336, "encoder_q-layer.0": 12564.0312, "encoder_q-layer.1": 13415.707, "encoder_q-layer.10": 20241.3359, "encoder_q-layer.11": 38611.9922, "encoder_q-layer.2": 14645.6816, "encoder_q-layer.3": 14244.9111, "encoder_q-layer.4": 14571.1318, "encoder_q-layer.5": 14252.2051, "encoder_q-layer.6": 15005.6426, "encoder_q-layer.7": 16329.6367, "encoder_q-layer.8": 18268.9434, "encoder_q-layer.9": 16872.7539, "epoch": 0.93, "inbatch_neg_score": 38.0723, "inbatch_pos_score": 38.5938, "learning_rate": 2.4444444444444447e-06, "loss": 2.1848, "norm_diff": 0.0526, "num_tokens_overlap": 5.5732, "num_tokens_union": 54.9282, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27448.1718, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2665, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7408, "sent_len_1": 66.6191, "sent_len_max_0": 18.8038, "sent_len_max_1": 189.365, "stdk": 0.0452, "stdq": 0.0378, "stdqueue_k": 0.0, "step": 95600 }, { "accuracy": 56.543, "doc_norm": 6.3206, "encoder_q-embeddings": 16829.5156, "encoder_q-layer.0": 12237.3135, "encoder_q-layer.1": 12850.3154, "encoder_q-layer.10": 21479.1426, "encoder_q-layer.11": 42423.3711, "encoder_q-layer.2": 14222.4238, "encoder_q-layer.3": 14688.7021, "encoder_q-layer.4": 15252.8691, "encoder_q-layer.5": 14906.1211, "encoder_q-layer.6": 16028.5645, "encoder_q-layer.7": 17230.3418, "encoder_q-layer.8": 19138.2188, "encoder_q-layer.9": 17142.4668, "epoch": 0.93, "inbatch_neg_score": 38.0744, "inbatch_pos_score": 38.5938, "learning_rate": 2.388888888888889e-06, "loss": 2.1348, "norm_diff": 0.0553, "num_tokens_overlap": 5.5897, "num_tokens_union": 55.058, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28213.6168, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2654, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7275, "sent_len_1": 66.8551, "sent_len_max_0": 18.9125, "sent_len_max_1": 191.6375, "stdk": 0.047, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 95700 }, { "accuracy": 59.5703, "doc_norm": 6.3177, "encoder_q-embeddings": 16695.7227, "encoder_q-layer.0": 12149.0625, "encoder_q-layer.1": 12744.4912, "encoder_q-layer.10": 21200.9492, "encoder_q-layer.11": 42211.3633, "encoder_q-layer.2": 14156.2656, "encoder_q-layer.3": 14261.8213, "encoder_q-layer.4": 15014.1123, "encoder_q-layer.5": 14866.7178, "encoder_q-layer.6": 15448.1562, "encoder_q-layer.7": 17236.1543, "encoder_q-layer.8": 17757.2598, "encoder_q-layer.9": 16417.6387, "epoch": 0.94, "inbatch_neg_score": 38.0604, "inbatch_pos_score": 38.5938, "learning_rate": 2.3333333333333336e-06, "loss": 2.1893, "norm_diff": 0.0528, "num_tokens_overlap": 5.5781, "num_tokens_union": 55.0753, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27392.449, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2649, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.749, "sent_len_1": 66.9387, "sent_len_max_0": 18.835, "sent_len_max_1": 191.5687, "stdk": 0.0474, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 95800 }, { "accuracy": 58.9844, "doc_norm": 6.3175, "encoder_q-embeddings": 16204.3584, "encoder_q-layer.0": 11465.6641, "encoder_q-layer.1": 12088.9961, "encoder_q-layer.10": 20201.6133, "encoder_q-layer.11": 38987.5625, "encoder_q-layer.2": 13113.6729, "encoder_q-layer.3": 13878.5547, "encoder_q-layer.4": 14150.2773, "encoder_q-layer.5": 13560.292, "encoder_q-layer.6": 14842.5508, "encoder_q-layer.7": 15773.6455, "encoder_q-layer.8": 18145.8438, "encoder_q-layer.9": 16826.6348, "epoch": 0.94, "inbatch_neg_score": 38.0679, "inbatch_pos_score": 38.5938, "learning_rate": 2.277777777777778e-06, "loss": 2.1628, "norm_diff": 0.0529, "num_tokens_overlap": 5.5743, "num_tokens_union": 54.9327, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26520.5343, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2646, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7033, "sent_len_1": 66.6615, "sent_len_max_0": 18.7925, "sent_len_max_1": 190.1225, "stdk": 0.0456, "stdq": 0.038, "stdqueue_k": 0.0, "step": 95900 }, { "accuracy": 57.8125, "doc_norm": 6.3186, "encoder_q-embeddings": 17002.3848, "encoder_q-layer.0": 12166.0625, "encoder_q-layer.1": 12490.0957, "encoder_q-layer.10": 20018.6875, "encoder_q-layer.11": 40527.5977, "encoder_q-layer.2": 13736.6631, "encoder_q-layer.3": 13593.1543, "encoder_q-layer.4": 14225.4961, "encoder_q-layer.5": 14133.5215, "encoder_q-layer.6": 14986.8281, "encoder_q-layer.7": 15921.0938, "encoder_q-layer.8": 17620.6953, "encoder_q-layer.9": 16424.459, "epoch": 0.94, "inbatch_neg_score": 38.0653, "inbatch_pos_score": 38.5938, "learning_rate": 2.2222222222222225e-06, "loss": 2.0845, "norm_diff": 0.0539, "num_tokens_overlap": 5.585, "num_tokens_union": 55.043, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27222.9881, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2647, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7348, "sent_len_1": 66.7681, "sent_len_max_0": 18.785, "sent_len_max_1": 189.1362, "stdk": 0.0459, "stdq": 0.0378, "stdqueue_k": 0.0, "step": 96000 }, { "accuracy": 57.1289, "doc_norm": 6.3178, "encoder_q-embeddings": 17045.3516, "encoder_q-layer.0": 12349.6914, "encoder_q-layer.1": 13061.1758, "encoder_q-layer.10": 22180.9844, "encoder_q-layer.11": 40868.457, "encoder_q-layer.2": 14280.0723, "encoder_q-layer.3": 14213.6416, "encoder_q-layer.4": 14436.0918, "encoder_q-layer.5": 13929.5488, "encoder_q-layer.6": 14998.7666, "encoder_q-layer.7": 16381.0742, "encoder_q-layer.8": 18354.8594, "encoder_q-layer.9": 17590.5859, "epoch": 0.94, "inbatch_neg_score": 38.0683, "inbatch_pos_score": 38.5938, "learning_rate": 2.166666666666667e-06, "loss": 2.1445, "norm_diff": 0.0546, "num_tokens_overlap": 5.5786, "num_tokens_union": 54.9654, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27888.2895, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2632, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7396, "sent_len_1": 66.6583, "sent_len_max_0": 18.775, "sent_len_max_1": 189.0437, "stdk": 0.0465, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 96100 }, { "accuracy": 57.0312, "doc_norm": 6.3177, "encoder_q-embeddings": 17583.9004, "encoder_q-layer.0": 12307.9092, "encoder_q-layer.1": 12904.0186, "encoder_q-layer.10": 20828.2168, "encoder_q-layer.11": 37595.1484, "encoder_q-layer.2": 14103.6182, "encoder_q-layer.3": 13878.1846, "encoder_q-layer.4": 14538.9111, "encoder_q-layer.5": 14663.3164, "encoder_q-layer.6": 15286.1816, "encoder_q-layer.7": 16684.4531, "encoder_q-layer.8": 17486.1758, "encoder_q-layer.9": 16067.541, "epoch": 0.94, "inbatch_neg_score": 38.0668, "inbatch_pos_score": 38.5938, "learning_rate": 2.1111111111111114e-06, "loss": 2.1791, "norm_diff": 0.053, "num_tokens_overlap": 5.5829, "num_tokens_union": 55.1589, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27055.3059, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2647, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7202, "sent_len_1": 66.9764, "sent_len_max_0": 19.0012, "sent_len_max_1": 189.6463, "stdk": 0.0459, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 96200 }, { "accuracy": 60.2539, "doc_norm": 6.3175, "encoder_q-embeddings": 16841.2285, "encoder_q-layer.0": 12056.2451, "encoder_q-layer.1": 12430.0537, "encoder_q-layer.10": 20253.5391, "encoder_q-layer.11": 39821.3555, "encoder_q-layer.2": 13819.4756, "encoder_q-layer.3": 13947.4424, "encoder_q-layer.4": 14292.7422, "encoder_q-layer.5": 14208.6787, "encoder_q-layer.6": 15163.3164, "encoder_q-layer.7": 15880.0898, "encoder_q-layer.8": 17806.9355, "encoder_q-layer.9": 16322.5576, "epoch": 0.94, "inbatch_neg_score": 38.0596, "inbatch_pos_score": 38.5938, "learning_rate": 2.055555555555556e-06, "loss": 2.1321, "norm_diff": 0.0536, "num_tokens_overlap": 5.5874, "num_tokens_union": 55.1663, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27310.0332, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2639, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.731, "sent_len_1": 67.0248, "sent_len_max_0": 18.83, "sent_len_max_1": 189.895, "stdk": 0.0459, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 96300 }, { "accuracy": 61.3281, "doc_norm": 6.3192, "encoder_q-embeddings": 15919.6709, "encoder_q-layer.0": 11322.9072, "encoder_q-layer.1": 11899.8379, "encoder_q-layer.10": 20729.3105, "encoder_q-layer.11": 39040.6875, "encoder_q-layer.2": 13241.875, "encoder_q-layer.3": 13134.3701, "encoder_q-layer.4": 13824.8428, "encoder_q-layer.5": 13824.1641, "encoder_q-layer.6": 14434.8457, "encoder_q-layer.7": 16218.1689, "encoder_q-layer.8": 17493.4395, "encoder_q-layer.9": 16295.1729, "epoch": 0.94, "inbatch_neg_score": 38.055, "inbatch_pos_score": 38.5938, "learning_rate": 2.0000000000000003e-06, "loss": 2.1702, "norm_diff": 0.0549, "num_tokens_overlap": 5.5826, "num_tokens_union": 55.1058, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26371.1386, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2643, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7105, "sent_len_1": 66.9372, "sent_len_max_0": 18.7025, "sent_len_max_1": 189.4375, "stdk": 0.0474, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 96400 }, { "accuracy": 55.3711, "doc_norm": 6.317, "encoder_q-embeddings": 17259.8789, "encoder_q-layer.0": 12611.9756, "encoder_q-layer.1": 13320.6934, "encoder_q-layer.10": 22359.1719, "encoder_q-layer.11": 40360.9219, "encoder_q-layer.2": 13827.2354, "encoder_q-layer.3": 14115.1279, "encoder_q-layer.4": 14972.1504, "encoder_q-layer.5": 14628.1465, "encoder_q-layer.6": 15886.6934, "encoder_q-layer.7": 17553.5898, "encoder_q-layer.8": 19462.2559, "encoder_q-layer.9": 18207.8164, "epoch": 0.94, "inbatch_neg_score": 38.0634, "inbatch_pos_score": 38.5625, "learning_rate": 1.9444444444444444e-06, "loss": 2.1519, "norm_diff": 0.0529, "num_tokens_overlap": 5.5817, "num_tokens_union": 55.111, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28206.7121, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2641, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7142, "sent_len_1": 66.9924, "sent_len_max_0": 18.7037, "sent_len_max_1": 189.0625, "stdk": 0.0468, "stdq": 0.0382, "stdqueue_k": 0.0, "step": 96500 }, { "accuracy": 55.6641, "doc_norm": 6.3188, "encoder_q-embeddings": 17434.2852, "encoder_q-layer.0": 12362.0078, "encoder_q-layer.1": 13014.876, "encoder_q-layer.10": 28966.5801, "encoder_q-layer.11": 45035.2891, "encoder_q-layer.2": 14705.1162, "encoder_q-layer.3": 14731.3096, "encoder_q-layer.4": 15062.1318, "encoder_q-layer.5": 15455.7305, "encoder_q-layer.6": 16589.9766, "encoder_q-layer.7": 17946.2168, "encoder_q-layer.8": 20095.4473, "encoder_q-layer.9": 19863.0469, "epoch": 0.94, "inbatch_neg_score": 38.055, "inbatch_pos_score": 38.5625, "learning_rate": 1.888888888888889e-06, "loss": 2.1494, "norm_diff": 0.0546, "num_tokens_overlap": 5.5733, "num_tokens_union": 54.9567, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30375.0027, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2642, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7061, "sent_len_1": 66.7487, "sent_len_max_0": 18.9425, "sent_len_max_1": 189.685, "stdk": 0.047, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 96600 }, { "accuracy": 58.3984, "doc_norm": 6.3162, "encoder_q-embeddings": 16280.543, "encoder_q-layer.0": 11641.2979, "encoder_q-layer.1": 12187.2393, "encoder_q-layer.10": 21681.3047, "encoder_q-layer.11": 42960.4414, "encoder_q-layer.2": 13433.749, "encoder_q-layer.3": 13654.8154, "encoder_q-layer.4": 14341.3896, "encoder_q-layer.5": 14533.79, "encoder_q-layer.6": 15666.8877, "encoder_q-layer.7": 16826.416, "encoder_q-layer.8": 19441.2383, "encoder_q-layer.9": 17733.1309, "epoch": 0.94, "inbatch_neg_score": 38.0519, "inbatch_pos_score": 38.5938, "learning_rate": 1.8333333333333335e-06, "loss": 2.1451, "norm_diff": 0.0526, "num_tokens_overlap": 5.5809, "num_tokens_union": 54.9785, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27821.0496, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2636, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7364, "sent_len_1": 66.7851, "sent_len_max_0": 18.9312, "sent_len_max_1": 191.2788, "stdk": 0.0465, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 96700 }, { "accuracy": 57.8125, "doc_norm": 6.3176, "encoder_q-embeddings": 17223.4082, "encoder_q-layer.0": 12306.5889, "encoder_q-layer.1": 12728.7725, "encoder_q-layer.10": 22450.0527, "encoder_q-layer.11": 41027.9648, "encoder_q-layer.2": 13989.9092, "encoder_q-layer.3": 14102.0488, "encoder_q-layer.4": 14955.0508, "encoder_q-layer.5": 14513.6152, "encoder_q-layer.6": 15805.8818, "encoder_q-layer.7": 17050.7676, "encoder_q-layer.8": 19044.0898, "encoder_q-layer.9": 17940.1387, "epoch": 0.95, "inbatch_neg_score": 38.055, "inbatch_pos_score": 38.5938, "learning_rate": 1.777777777777778e-06, "loss": 2.1763, "norm_diff": 0.0533, "num_tokens_overlap": 5.5963, "num_tokens_union": 55.1578, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28004.5129, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2643, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7252, "sent_len_1": 66.981, "sent_len_max_0": 18.825, "sent_len_max_1": 188.8663, "stdk": 0.0465, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 96800 }, { "accuracy": 59.4727, "doc_norm": 6.3208, "encoder_q-embeddings": 16937.3477, "encoder_q-layer.0": 12212.0615, "encoder_q-layer.1": 12543.1621, "encoder_q-layer.10": 21318.0156, "encoder_q-layer.11": 39814.9844, "encoder_q-layer.2": 13713.7344, "encoder_q-layer.3": 13882.5576, "encoder_q-layer.4": 15386.3984, "encoder_q-layer.5": 14353.459, "encoder_q-layer.6": 15015.002, "encoder_q-layer.7": 16243.5439, "encoder_q-layer.8": 18481.9414, "encoder_q-layer.9": 16945.6289, "epoch": 0.95, "inbatch_neg_score": 38.0543, "inbatch_pos_score": 38.5938, "learning_rate": 1.7222222222222222e-06, "loss": 2.1874, "norm_diff": 0.057, "num_tokens_overlap": 5.5644, "num_tokens_union": 55.0053, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27383.6171, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2638, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7099, "sent_len_1": 66.8038, "sent_len_max_0": 18.8962, "sent_len_max_1": 189.8988, "stdk": 0.0473, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 96900 }, { "accuracy": 59.1797, "doc_norm": 6.3172, "encoder_q-embeddings": 16755.4492, "encoder_q-layer.0": 12389.5391, "encoder_q-layer.1": 12312.3438, "encoder_q-layer.10": 20672.3105, "encoder_q-layer.11": 39269.9102, "encoder_q-layer.2": 13364.5225, "encoder_q-layer.3": 13229.0088, "encoder_q-layer.4": 13657.5352, "encoder_q-layer.5": 13318.3047, "encoder_q-layer.6": 14372.0127, "encoder_q-layer.7": 15758.3438, "encoder_q-layer.8": 16931.668, "encoder_q-layer.9": 16259.0928, "epoch": 0.95, "inbatch_neg_score": 38.0432, "inbatch_pos_score": 38.5625, "learning_rate": 1.6666666666666667e-06, "loss": 2.1313, "norm_diff": 0.0535, "num_tokens_overlap": 5.5605, "num_tokens_union": 54.8794, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26301.8009, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2638, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6871, "sent_len_1": 66.6118, "sent_len_max_0": 18.73, "sent_len_max_1": 188.7063, "stdk": 0.0467, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 97000 }, { "accuracy": 58.3008, "doc_norm": 6.3159, "encoder_q-embeddings": 17361.5879, "encoder_q-layer.0": 12412.1689, "encoder_q-layer.1": 12932.8926, "encoder_q-layer.10": 21814.6211, "encoder_q-layer.11": 41717.3711, "encoder_q-layer.2": 14418.0527, "encoder_q-layer.3": 14197.0576, "encoder_q-layer.4": 14756.7266, "encoder_q-layer.5": 14234.4443, "encoder_q-layer.6": 15623.7715, "encoder_q-layer.7": 16857.8379, "encoder_q-layer.8": 18484.2344, "encoder_q-layer.9": 17355.0273, "epoch": 0.95, "inbatch_neg_score": 38.0509, "inbatch_pos_score": 38.5625, "learning_rate": 1.6111111111111111e-06, "loss": 2.1388, "norm_diff": 0.0522, "num_tokens_overlap": 5.5639, "num_tokens_union": 54.9736, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28063.7796, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2637, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.697, "sent_len_1": 66.785, "sent_len_max_0": 18.8938, "sent_len_max_1": 189.475, "stdk": 0.0452, "stdq": 0.039, "stdqueue_k": 0.0, "step": 97100 }, { "accuracy": 58.7891, "doc_norm": 6.3198, "encoder_q-embeddings": 17046.4824, "encoder_q-layer.0": 12179.0195, "encoder_q-layer.1": 12568.3857, "encoder_q-layer.10": 21242.8887, "encoder_q-layer.11": 40142.0, "encoder_q-layer.2": 14002.2285, "encoder_q-layer.3": 14126.1191, "encoder_q-layer.4": 14583.918, "encoder_q-layer.5": 14536.1611, "encoder_q-layer.6": 15766.5186, "encoder_q-layer.7": 16041.8047, "encoder_q-layer.8": 17908.7676, "encoder_q-layer.9": 16786.5156, "epoch": 0.95, "inbatch_neg_score": 38.0419, "inbatch_pos_score": 38.5938, "learning_rate": 1.5555555555555556e-06, "loss": 2.1177, "norm_diff": 0.0558, "num_tokens_overlap": 5.5782, "num_tokens_union": 55.1404, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27441.43, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.264, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7359, "sent_len_1": 66.9501, "sent_len_max_0": 18.88, "sent_len_max_1": 189.8587, "stdk": 0.0466, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 97200 }, { "accuracy": 55.1758, "doc_norm": 6.3149, "encoder_q-embeddings": 17702.5176, "encoder_q-layer.0": 12701.4561, "encoder_q-layer.1": 12993.4365, "encoder_q-layer.10": 21022.8594, "encoder_q-layer.11": 43257.5234, "encoder_q-layer.2": 14600.1855, "encoder_q-layer.3": 14634.3389, "encoder_q-layer.4": 15131.6416, "encoder_q-layer.5": 14684.2217, "encoder_q-layer.6": 15389.3857, "encoder_q-layer.7": 16781.9648, "encoder_q-layer.8": 18183.418, "encoder_q-layer.9": 16931.4355, "epoch": 0.95, "inbatch_neg_score": 38.046, "inbatch_pos_score": 38.5625, "learning_rate": 1.5e-06, "loss": 2.1635, "norm_diff": 0.052, "num_tokens_overlap": 5.5818, "num_tokens_union": 55.1122, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28380.6095, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2628, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7065, "sent_len_1": 66.9654, "sent_len_max_0": 18.88, "sent_len_max_1": 189.6838, "stdk": 0.0466, "stdq": 0.0378, "stdqueue_k": 0.0, "step": 97300 }, { "accuracy": 57.1289, "doc_norm": 6.3174, "encoder_q-embeddings": 17384.8203, "encoder_q-layer.0": 12198.3018, "encoder_q-layer.1": 12495.5889, "encoder_q-layer.10": 23044.5586, "encoder_q-layer.11": 40439.0742, "encoder_q-layer.2": 13668.833, "encoder_q-layer.3": 13980.082, "encoder_q-layer.4": 14566.0518, "encoder_q-layer.5": 14334.3896, "encoder_q-layer.6": 15864.7422, "encoder_q-layer.7": 17516.6621, "encoder_q-layer.8": 18173.4531, "encoder_q-layer.9": 16949.8379, "epoch": 0.95, "inbatch_neg_score": 38.0452, "inbatch_pos_score": 38.5625, "learning_rate": 1.4444444444444445e-06, "loss": 2.2047, "norm_diff": 0.0543, "num_tokens_overlap": 5.5779, "num_tokens_union": 54.9939, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28101.3508, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.263, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7215, "sent_len_1": 66.7429, "sent_len_max_0": 18.7712, "sent_len_max_1": 189.6763, "stdk": 0.0474, "stdq": 0.0387, "stdqueue_k": 0.0, "step": 97400 }, { "accuracy": 57.5195, "doc_norm": 6.3159, "encoder_q-embeddings": 17489.8535, "encoder_q-layer.0": 12509.6924, "encoder_q-layer.1": 13006.1055, "encoder_q-layer.10": 21164.6367, "encoder_q-layer.11": 40806.3516, "encoder_q-layer.2": 14397.3301, "encoder_q-layer.3": 14305.4814, "encoder_q-layer.4": 14854.543, "encoder_q-layer.5": 15116.1738, "encoder_q-layer.6": 15668.0117, "encoder_q-layer.7": 17455.8477, "encoder_q-layer.8": 18368.5332, "encoder_q-layer.9": 16976.877, "epoch": 0.95, "inbatch_neg_score": 38.0441, "inbatch_pos_score": 38.5625, "learning_rate": 1.388888888888889e-06, "loss": 2.1956, "norm_diff": 0.0529, "num_tokens_overlap": 5.5876, "num_tokens_union": 55.1362, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27918.6626, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.263, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7398, "sent_len_1": 66.9261, "sent_len_max_0": 18.87, "sent_len_max_1": 190.31, "stdk": 0.0459, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 97500 }, { "accuracy": 54.8828, "doc_norm": 6.3135, "encoder_q-embeddings": 17196.5332, "encoder_q-layer.0": 12517.7012, "encoder_q-layer.1": 12969.7959, "encoder_q-layer.10": 20360.0215, "encoder_q-layer.11": 41144.2031, "encoder_q-layer.2": 14403.4229, "encoder_q-layer.3": 14607.4502, "encoder_q-layer.4": 14803.7256, "encoder_q-layer.5": 14439.21, "encoder_q-layer.6": 15689.3457, "encoder_q-layer.7": 16701.7578, "encoder_q-layer.8": 18350.4844, "encoder_q-layer.9": 17140.4863, "epoch": 0.95, "inbatch_neg_score": 38.0508, "inbatch_pos_score": 38.5625, "learning_rate": 1.3333333333333334e-06, "loss": 2.1495, "norm_diff": 0.0503, "num_tokens_overlap": 5.5986, "num_tokens_union": 54.9612, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27690.8976, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2632, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7356, "sent_len_1": 66.7063, "sent_len_max_0": 18.9187, "sent_len_max_1": 191.135, "stdk": 0.0461, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 97600 }, { "accuracy": 59.8633, "doc_norm": 6.3152, "encoder_q-embeddings": 17042.457, "encoder_q-layer.0": 12396.0586, "encoder_q-layer.1": 12546.9023, "encoder_q-layer.10": 22026.1562, "encoder_q-layer.11": 42600.0938, "encoder_q-layer.2": 13607.8965, "encoder_q-layer.3": 13822.9658, "encoder_q-layer.4": 14787.96, "encoder_q-layer.5": 14199.6719, "encoder_q-layer.6": 15224.4951, "encoder_q-layer.7": 15479.2441, "encoder_q-layer.8": 17938.8145, "encoder_q-layer.9": 16425.5391, "epoch": 0.95, "inbatch_neg_score": 38.0447, "inbatch_pos_score": 38.5625, "learning_rate": 1.2777777777777779e-06, "loss": 2.1347, "norm_diff": 0.0522, "num_tokens_overlap": 5.571, "num_tokens_union": 55.1548, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27605.7685, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.263, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7379, "sent_len_1": 66.933, "sent_len_max_0": 18.8238, "sent_len_max_1": 190.1438, "stdk": 0.0477, "stdq": 0.0388, "stdqueue_k": 0.0, "step": 97700 }, { "accuracy": 58.3008, "doc_norm": 6.3163, "encoder_q-embeddings": 16389.2656, "encoder_q-layer.0": 11712.6191, "encoder_q-layer.1": 12281.6201, "encoder_q-layer.10": 19563.5938, "encoder_q-layer.11": 40635.543, "encoder_q-layer.2": 13680.5283, "encoder_q-layer.3": 13622.7754, "encoder_q-layer.4": 13879.7705, "encoder_q-layer.5": 13754.1064, "encoder_q-layer.6": 14843.8037, "encoder_q-layer.7": 16125.582, "encoder_q-layer.8": 17853.6934, "encoder_q-layer.9": 16196.3438, "epoch": 0.95, "inbatch_neg_score": 38.0441, "inbatch_pos_score": 38.5625, "learning_rate": 1.2222222222222223e-06, "loss": 2.1388, "norm_diff": 0.0534, "num_tokens_overlap": 5.5896, "num_tokens_union": 55.0363, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26629.3368, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2629, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7385, "sent_len_1": 66.8516, "sent_len_max_0": 18.87, "sent_len_max_1": 192.43, "stdk": 0.0456, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 97800 }, { "accuracy": 60.2539, "doc_norm": 6.3175, "encoder_q-embeddings": 17709.6738, "encoder_q-layer.0": 12108.1621, "encoder_q-layer.1": 12750.1934, "encoder_q-layer.10": 20027.5332, "encoder_q-layer.11": 39664.5, "encoder_q-layer.2": 14157.8301, "encoder_q-layer.3": 14109.7686, "encoder_q-layer.4": 14570.8252, "encoder_q-layer.5": 14261.1807, "encoder_q-layer.6": 15375.1797, "encoder_q-layer.7": 16573.0117, "encoder_q-layer.8": 17851.8203, "encoder_q-layer.9": 16406.1309, "epoch": 0.96, "inbatch_neg_score": 38.0357, "inbatch_pos_score": 38.5625, "learning_rate": 1.1666666666666668e-06, "loss": 2.1415, "norm_diff": 0.0544, "num_tokens_overlap": 5.5906, "num_tokens_union": 55.074, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27374.0027, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2631, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.73, "sent_len_1": 66.8287, "sent_len_max_0": 18.975, "sent_len_max_1": 188.6625, "stdk": 0.0471, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 97900 }, { "accuracy": 59.2773, "doc_norm": 6.318, "encoder_q-embeddings": 18544.1602, "encoder_q-layer.0": 12659.6094, "encoder_q-layer.1": 13388.7275, "encoder_q-layer.10": 21769.416, "encoder_q-layer.11": 40266.3281, "encoder_q-layer.2": 14356.1689, "encoder_q-layer.3": 14412.1289, "encoder_q-layer.4": 15169.6289, "encoder_q-layer.5": 14959.3652, "encoder_q-layer.6": 16712.623, "encoder_q-layer.7": 16671.2949, "encoder_q-layer.8": 18485.4258, "encoder_q-layer.9": 17917.2734, "epoch": 0.96, "inbatch_neg_score": 38.0369, "inbatch_pos_score": 38.5625, "learning_rate": 1.1111111111111112e-06, "loss": 2.1645, "norm_diff": 0.0551, "num_tokens_overlap": 5.5776, "num_tokens_union": 54.9683, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28320.3605, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.263, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7314, "sent_len_1": 66.6899, "sent_len_max_0": 18.795, "sent_len_max_1": 188.4387, "stdk": 0.0458, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 98000 }, { "accuracy": 57.3242, "doc_norm": 6.3169, "encoder_q-embeddings": 17994.2266, "encoder_q-layer.0": 12448.123, "encoder_q-layer.1": 13150.8848, "encoder_q-layer.10": 19912.9707, "encoder_q-layer.11": 41643.3789, "encoder_q-layer.2": 14591.625, "encoder_q-layer.3": 14533.8359, "encoder_q-layer.4": 14982.6729, "encoder_q-layer.5": 14756.9873, "encoder_q-layer.6": 15115.8906, "encoder_q-layer.7": 16750.582, "encoder_q-layer.8": 18522.8711, "encoder_q-layer.9": 16652.1504, "epoch": 0.96, "inbatch_neg_score": 38.0391, "inbatch_pos_score": 38.5625, "learning_rate": 1.0555555555555557e-06, "loss": 2.1694, "norm_diff": 0.0542, "num_tokens_overlap": 5.5714, "num_tokens_union": 55.0062, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27801.1644, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2627, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7037, "sent_len_1": 66.7808, "sent_len_max_0": 18.8588, "sent_len_max_1": 188.76, "stdk": 0.046, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 98100 }, { "accuracy": 56.8359, "doc_norm": 6.3134, "encoder_q-embeddings": 17145.6406, "encoder_q-layer.0": 12084.2969, "encoder_q-layer.1": 12365.7646, "encoder_q-layer.10": 23462.8828, "encoder_q-layer.11": 44365.4922, "encoder_q-layer.2": 13625.375, "encoder_q-layer.3": 13946.8643, "encoder_q-layer.4": 14219.707, "encoder_q-layer.5": 14145.4707, "encoder_q-layer.6": 14859.2432, "encoder_q-layer.7": 15957.4033, "encoder_q-layer.8": 18657.4648, "encoder_q-layer.9": 17828.6035, "epoch": 0.96, "inbatch_neg_score": 38.0454, "inbatch_pos_score": 38.5625, "learning_rate": 1.0000000000000002e-06, "loss": 2.1727, "norm_diff": 0.0522, "num_tokens_overlap": 5.5599, "num_tokens_union": 55.115, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28186.0071, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2613, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6847, "sent_len_1": 66.9183, "sent_len_max_0": 18.8375, "sent_len_max_1": 189.6738, "stdk": 0.0454, "stdq": 0.0386, "stdqueue_k": 0.0, "step": 98200 }, { "accuracy": 60.5469, "doc_norm": 6.3188, "encoder_q-embeddings": 17273.1094, "encoder_q-layer.0": 12267.7129, "encoder_q-layer.1": 13091.3154, "encoder_q-layer.10": 20692.375, "encoder_q-layer.11": 40034.9023, "encoder_q-layer.2": 14416.2285, "encoder_q-layer.3": 14388.29, "encoder_q-layer.4": 14384.248, "encoder_q-layer.5": 13935.7178, "encoder_q-layer.6": 15053.082, "encoder_q-layer.7": 15773.3408, "encoder_q-layer.8": 17662.0, "encoder_q-layer.9": 16248.2129, "epoch": 0.96, "inbatch_neg_score": 38.0368, "inbatch_pos_score": 38.5625, "learning_rate": 9.444444444444445e-07, "loss": 2.1378, "norm_diff": 0.0555, "num_tokens_overlap": 5.5716, "num_tokens_union": 54.9347, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27081.6075, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2633, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7471, "sent_len_1": 66.5656, "sent_len_max_0": 18.87, "sent_len_max_1": 188.1725, "stdk": 0.0471, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 98300 }, { "accuracy": 56.8359, "doc_norm": 6.3168, "encoder_q-embeddings": 17014.8574, "encoder_q-layer.0": 12297.3076, "encoder_q-layer.1": 13069.3623, "encoder_q-layer.10": 23030.4004, "encoder_q-layer.11": 43999.2578, "encoder_q-layer.2": 14258.7021, "encoder_q-layer.3": 14670.1621, "encoder_q-layer.4": 15016.9268, "encoder_q-layer.5": 14922.3525, "encoder_q-layer.6": 15618.3623, "encoder_q-layer.7": 17913.4746, "encoder_q-layer.8": 20065.1934, "encoder_q-layer.9": 18055.2461, "epoch": 0.96, "inbatch_neg_score": 38.0396, "inbatch_pos_score": 38.5625, "learning_rate": 8.88888888888889e-07, "loss": 2.1863, "norm_diff": 0.0547, "num_tokens_overlap": 5.5926, "num_tokens_union": 55.0087, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28962.1318, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2621, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7514, "sent_len_1": 66.7327, "sent_len_max_0": 18.8575, "sent_len_max_1": 191.8913, "stdk": 0.0463, "stdq": 0.0379, "stdqueue_k": 0.0, "step": 98400 }, { "accuracy": 59.375, "doc_norm": 6.3164, "encoder_q-embeddings": 16649.0, "encoder_q-layer.0": 11598.1885, "encoder_q-layer.1": 12502.6641, "encoder_q-layer.10": 21087.5879, "encoder_q-layer.11": 37733.6836, "encoder_q-layer.2": 13309.3574, "encoder_q-layer.3": 13325.3496, "encoder_q-layer.4": 13971.5967, "encoder_q-layer.5": 14056.7754, "encoder_q-layer.6": 15430.7383, "encoder_q-layer.7": 16648.0586, "encoder_q-layer.8": 18964.8984, "encoder_q-layer.9": 16662.041, "epoch": 0.96, "inbatch_neg_score": 38.0289, "inbatch_pos_score": 38.5625, "learning_rate": 8.333333333333333e-07, "loss": 2.179, "norm_diff": 0.0553, "num_tokens_overlap": 5.58, "num_tokens_union": 55.0444, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26566.9929, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2611, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7346, "sent_len_1": 66.8856, "sent_len_max_0": 18.8925, "sent_len_max_1": 188.455, "stdk": 0.0457, "stdq": 0.0378, "stdqueue_k": 0.0, "step": 98500 }, { "accuracy": 60.3516, "doc_norm": 6.3157, "encoder_q-embeddings": 17436.8887, "encoder_q-layer.0": 12227.0547, "encoder_q-layer.1": 12699.8955, "encoder_q-layer.10": 22011.8984, "encoder_q-layer.11": 40207.8125, "encoder_q-layer.2": 14164.5332, "encoder_q-layer.3": 14143.7881, "encoder_q-layer.4": 14783.0752, "encoder_q-layer.5": 14139.4531, "encoder_q-layer.6": 14999.3379, "encoder_q-layer.7": 15516.6494, "encoder_q-layer.8": 18536.9395, "encoder_q-layer.9": 17038.1973, "epoch": 0.96, "inbatch_neg_score": 38.0327, "inbatch_pos_score": 38.5625, "learning_rate": 7.777777777777778e-07, "loss": 2.1587, "norm_diff": 0.0535, "num_tokens_overlap": 5.5684, "num_tokens_union": 54.9101, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27405.7044, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2622, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7074, "sent_len_1": 66.5901, "sent_len_max_0": 18.8588, "sent_len_max_1": 189.1937, "stdk": 0.0471, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 98600 }, { "accuracy": 56.8359, "doc_norm": 6.3147, "encoder_q-embeddings": 17585.1426, "encoder_q-layer.0": 12122.7207, "encoder_q-layer.1": 12617.3096, "encoder_q-layer.10": 21131.5488, "encoder_q-layer.11": 41183.5352, "encoder_q-layer.2": 13914.3311, "encoder_q-layer.3": 14189.6934, "encoder_q-layer.4": 14325.3682, "encoder_q-layer.5": 14154.8535, "encoder_q-layer.6": 15631.7109, "encoder_q-layer.7": 16214.7012, "encoder_q-layer.8": 18307.7969, "encoder_q-layer.9": 17621.0508, "epoch": 0.96, "inbatch_neg_score": 38.0394, "inbatch_pos_score": 38.5625, "learning_rate": 7.222222222222222e-07, "loss": 2.1823, "norm_diff": 0.0523, "num_tokens_overlap": 5.5752, "num_tokens_union": 54.9385, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27754.3378, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2624, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7324, "sent_len_1": 66.6134, "sent_len_max_0": 18.7838, "sent_len_max_1": 191.0625, "stdk": 0.0474, "stdq": 0.0393, "stdqueue_k": 0.0, "step": 98700 }, { "accuracy": 58.3984, "doc_norm": 6.317, "encoder_q-embeddings": 17366.4941, "encoder_q-layer.0": 12417.0635, "encoder_q-layer.1": 13137.4707, "encoder_q-layer.10": 19870.4336, "encoder_q-layer.11": 41865.4609, "encoder_q-layer.2": 14395.1123, "encoder_q-layer.3": 14177.1553, "encoder_q-layer.4": 14575.4512, "encoder_q-layer.5": 14527.9307, "encoder_q-layer.6": 15226.5605, "encoder_q-layer.7": 15857.9805, "encoder_q-layer.8": 18324.291, "encoder_q-layer.9": 15913.6885, "epoch": 0.96, "inbatch_neg_score": 38.0322, "inbatch_pos_score": 38.5625, "learning_rate": 6.666666666666667e-07, "loss": 2.176, "norm_diff": 0.0545, "num_tokens_overlap": 5.5832, "num_tokens_union": 55.122, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27426.5862, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2625, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7211, "sent_len_1": 66.9271, "sent_len_max_0": 18.8175, "sent_len_max_1": 189.675, "stdk": 0.0471, "stdq": 0.0389, "stdqueue_k": 0.0, "step": 98800 }, { "accuracy": 59.8633, "doc_norm": 6.3162, "encoder_q-embeddings": 17058.3652, "encoder_q-layer.0": 12296.9043, "encoder_q-layer.1": 12802.9844, "encoder_q-layer.10": 21948.1348, "encoder_q-layer.11": 41163.5312, "encoder_q-layer.2": 14245.6387, "encoder_q-layer.3": 14088.2178, "encoder_q-layer.4": 14990.5459, "encoder_q-layer.5": 14303.1221, "encoder_q-layer.6": 14719.7432, "encoder_q-layer.7": 15583.8506, "encoder_q-layer.8": 17749.752, "encoder_q-layer.9": 16920.5742, "epoch": 0.97, "inbatch_neg_score": 38.0289, "inbatch_pos_score": 38.5625, "learning_rate": 6.111111111111112e-07, "loss": 2.1776, "norm_diff": 0.0545, "num_tokens_overlap": 5.5797, "num_tokens_union": 54.9751, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27590.3453, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2617, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7198, "sent_len_1": 66.6682, "sent_len_max_0": 18.85, "sent_len_max_1": 187.8425, "stdk": 0.046, "stdq": 0.0379, "stdqueue_k": 0.0, "step": 98900 }, { "accuracy": 61.4258, "doc_norm": 6.3175, "encoder_q-embeddings": 16472.8184, "encoder_q-layer.0": 12081.9033, "encoder_q-layer.1": 12539.4424, "encoder_q-layer.10": 22040.4004, "encoder_q-layer.11": 39920.9883, "encoder_q-layer.2": 13405.7354, "encoder_q-layer.3": 13697.9678, "encoder_q-layer.4": 13896.9346, "encoder_q-layer.5": 13507.0215, "encoder_q-layer.6": 14785.4473, "encoder_q-layer.7": 16750.041, "encoder_q-layer.8": 17622.3828, "encoder_q-layer.9": 16995.6934, "epoch": 0.97, "inbatch_neg_score": 38.0285, "inbatch_pos_score": 38.5625, "learning_rate": 5.555555555555556e-07, "loss": 2.1485, "norm_diff": 0.0551, "num_tokens_overlap": 5.5826, "num_tokens_union": 54.9076, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26915.9531, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2624, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7381, "sent_len_1": 66.582, "sent_len_max_0": 18.765, "sent_len_max_1": 189.9363, "stdk": 0.0474, "stdq": 0.0391, "stdqueue_k": 0.0, "step": 99000 }, { "accuracy": 62.207, "doc_norm": 6.3191, "encoder_q-embeddings": 17517.4043, "encoder_q-layer.0": 12605.3457, "encoder_q-layer.1": 12961.1895, "encoder_q-layer.10": 21560.8652, "encoder_q-layer.11": 40303.7812, "encoder_q-layer.2": 14148.5508, "encoder_q-layer.3": 14280.4326, "encoder_q-layer.4": 15039.7119, "encoder_q-layer.5": 14469.3027, "encoder_q-layer.6": 15154.4941, "encoder_q-layer.7": 16524.6289, "encoder_q-layer.8": 19664.3281, "encoder_q-layer.9": 18129.957, "epoch": 0.97, "inbatch_neg_score": 38.0246, "inbatch_pos_score": 38.5938, "learning_rate": 5.000000000000001e-07, "loss": 2.1835, "norm_diff": 0.0557, "num_tokens_overlap": 5.58, "num_tokens_union": 55.0311, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28036.6687, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2634, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.716, "sent_len_1": 66.8497, "sent_len_max_0": 18.7963, "sent_len_max_1": 190.9825, "stdk": 0.047, "stdq": 0.0385, "stdqueue_k": 0.0, "step": 99100 }, { "accuracy": 57.3242, "doc_norm": 6.317, "encoder_q-embeddings": 17228.8906, "encoder_q-layer.0": 12341.8604, "encoder_q-layer.1": 12936.6934, "encoder_q-layer.10": 19891.2539, "encoder_q-layer.11": 42321.3984, "encoder_q-layer.2": 14227.8193, "encoder_q-layer.3": 14536.1289, "encoder_q-layer.4": 14733.751, "encoder_q-layer.5": 14516.7158, "encoder_q-layer.6": 15621.6631, "encoder_q-layer.7": 16125.2314, "encoder_q-layer.8": 17342.2031, "encoder_q-layer.9": 16339.5059, "epoch": 0.97, "inbatch_neg_score": 38.0304, "inbatch_pos_score": 38.5625, "learning_rate": 4.444444444444445e-07, "loss": 2.1717, "norm_diff": 0.0546, "num_tokens_overlap": 5.5706, "num_tokens_union": 55.1266, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27724.669, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2624, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.6935, "sent_len_1": 67.0247, "sent_len_max_0": 18.775, "sent_len_max_1": 191.3425, "stdk": 0.0464, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 99200 }, { "accuracy": 55.4688, "doc_norm": 6.3177, "encoder_q-embeddings": 17103.8613, "encoder_q-layer.0": 13037.2783, "encoder_q-layer.1": 13321.7139, "encoder_q-layer.10": 22699.4199, "encoder_q-layer.11": 46858.875, "encoder_q-layer.2": 14308.2773, "encoder_q-layer.3": 14668.8535, "encoder_q-layer.4": 15623.7559, "encoder_q-layer.5": 15458.6445, "encoder_q-layer.6": 16414.9668, "encoder_q-layer.7": 18003.4629, "encoder_q-layer.8": 19346.9746, "encoder_q-layer.9": 17983.9453, "epoch": 0.97, "inbatch_neg_score": 38.0357, "inbatch_pos_score": 38.5625, "learning_rate": 3.888888888888889e-07, "loss": 2.148, "norm_diff": 0.0557, "num_tokens_overlap": 5.5848, "num_tokens_union": 55.1088, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29367.2469, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.262, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7116, "sent_len_1": 67.0143, "sent_len_max_0": 18.955, "sent_len_max_1": 189.27, "stdk": 0.0463, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 99300 }, { "accuracy": 60.9375, "doc_norm": 6.318, "encoder_q-embeddings": 17438.0488, "encoder_q-layer.0": 12351.6982, "encoder_q-layer.1": 12876.2627, "encoder_q-layer.10": 21793.1855, "encoder_q-layer.11": 37759.2617, "encoder_q-layer.2": 13672.3477, "encoder_q-layer.3": 13753.3779, "encoder_q-layer.4": 14546.2305, "encoder_q-layer.5": 14279.7383, "encoder_q-layer.6": 15662.6689, "encoder_q-layer.7": 15999.1953, "encoder_q-layer.8": 17658.1094, "encoder_q-layer.9": 16902.5859, "epoch": 0.97, "inbatch_neg_score": 38.0294, "inbatch_pos_score": 38.5938, "learning_rate": 3.3333333333333335e-07, "loss": 2.126, "norm_diff": 0.0548, "num_tokens_overlap": 5.5824, "num_tokens_union": 55.0679, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27151.0823, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2632, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7246, "sent_len_1": 66.9022, "sent_len_max_0": 18.7587, "sent_len_max_1": 189.6788, "stdk": 0.0468, "stdq": 0.0384, "stdqueue_k": 0.0, "step": 99400 }, { "accuracy": 59.082, "doc_norm": 6.3203, "encoder_q-embeddings": 18319.875, "encoder_q-layer.0": 12764.8086, "encoder_q-layer.1": 13349.0771, "encoder_q-layer.10": 22966.3281, "encoder_q-layer.11": 42057.9492, "encoder_q-layer.2": 14756.8955, "encoder_q-layer.3": 14667.8525, "encoder_q-layer.4": 14981.4111, "encoder_q-layer.5": 14807.6738, "encoder_q-layer.6": 15944.7188, "encoder_q-layer.7": 17683.8223, "encoder_q-layer.8": 19637.1621, "encoder_q-layer.9": 17885.8594, "epoch": 0.97, "inbatch_neg_score": 38.0304, "inbatch_pos_score": 38.5625, "learning_rate": 2.777777777777778e-07, "loss": 2.1795, "norm_diff": 0.0578, "num_tokens_overlap": 5.5825, "num_tokens_union": 55.0049, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28742.6272, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2625, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7008, "sent_len_1": 66.7509, "sent_len_max_0": 18.8725, "sent_len_max_1": 187.6425, "stdk": 0.0464, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 99500 }, { "accuracy": 58.3008, "doc_norm": 6.3149, "encoder_q-embeddings": 16968.7969, "encoder_q-layer.0": 12087.7529, "encoder_q-layer.1": 12699.7197, "encoder_q-layer.10": 19997.9316, "encoder_q-layer.11": 38588.6328, "encoder_q-layer.2": 13613.8418, "encoder_q-layer.3": 13475.4844, "encoder_q-layer.4": 14096.7373, "encoder_q-layer.5": 13636.3281, "encoder_q-layer.6": 14964.3232, "encoder_q-layer.7": 17097.2598, "encoder_q-layer.8": 18739.5176, "encoder_q-layer.9": 16834.1074, "epoch": 0.97, "inbatch_neg_score": 38.0344, "inbatch_pos_score": 38.5625, "learning_rate": 2.2222222222222224e-07, "loss": 2.1723, "norm_diff": 0.0526, "num_tokens_overlap": 5.5856, "num_tokens_union": 55.0322, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27107.5225, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2623, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7121, "sent_len_1": 66.7724, "sent_len_max_0": 18.81, "sent_len_max_1": 188.1275, "stdk": 0.0463, "stdq": 0.0381, "stdqueue_k": 0.0, "step": 99600 }, { "accuracy": 58.3008, "doc_norm": 6.317, "encoder_q-embeddings": 17495.5742, "encoder_q-layer.0": 12165.0576, "encoder_q-layer.1": 12826.2246, "encoder_q-layer.10": 20158.457, "encoder_q-layer.11": 39699.8047, "encoder_q-layer.2": 13829.8848, "encoder_q-layer.3": 14099.3818, "encoder_q-layer.4": 14763.8682, "encoder_q-layer.5": 14944.1182, "encoder_q-layer.6": 15684.71, "encoder_q-layer.7": 16065.5205, "encoder_q-layer.8": 18339.8789, "encoder_q-layer.9": 17069.2109, "epoch": 0.97, "inbatch_neg_score": 38.0295, "inbatch_pos_score": 38.5625, "learning_rate": 1.6666666666666668e-07, "loss": 2.1524, "norm_diff": 0.0552, "num_tokens_overlap": 5.5742, "num_tokens_union": 54.9515, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27504.6039, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2618, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7106, "sent_len_1": 66.698, "sent_len_max_0": 18.805, "sent_len_max_1": 189.7837, "stdk": 0.047, "stdq": 0.038, "stdqueue_k": 0.0, "step": 99700 }, { "accuracy": 57.5195, "doc_norm": 6.3141, "encoder_q-embeddings": 16699.0117, "encoder_q-layer.0": 11993.082, "encoder_q-layer.1": 12603.876, "encoder_q-layer.10": 19984.7051, "encoder_q-layer.11": 38912.4062, "encoder_q-layer.2": 14347.0693, "encoder_q-layer.3": 13968.9785, "encoder_q-layer.4": 14087.9355, "encoder_q-layer.5": 14277.4902, "encoder_q-layer.6": 15578.9531, "encoder_q-layer.7": 15972.7031, "encoder_q-layer.8": 17956.5352, "encoder_q-layer.9": 16566.7344, "epoch": 0.97, "inbatch_neg_score": 38.0358, "inbatch_pos_score": 38.5625, "learning_rate": 1.1111111111111112e-07, "loss": 2.1765, "norm_diff": 0.0531, "num_tokens_overlap": 5.5815, "num_tokens_union": 54.9739, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26559.4077, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.261, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7238, "sent_len_1": 66.7393, "sent_len_max_0": 18.7075, "sent_len_max_1": 190.67, "stdk": 0.0459, "stdq": 0.038, "stdqueue_k": 0.0, "step": 99800 }, { "accuracy": 59.8633, "doc_norm": 6.3184, "encoder_q-embeddings": 17067.9434, "encoder_q-layer.0": 12417.2354, "encoder_q-layer.1": 12881.6729, "encoder_q-layer.10": 21296.8223, "encoder_q-layer.11": 38250.2773, "encoder_q-layer.2": 13664.7314, "encoder_q-layer.3": 14066.6436, "encoder_q-layer.4": 14384.0732, "encoder_q-layer.5": 14341.666, "encoder_q-layer.6": 15286.4961, "encoder_q-layer.7": 16608.0566, "encoder_q-layer.8": 19042.1426, "encoder_q-layer.9": 17163.7676, "epoch": 0.98, "inbatch_neg_score": 38.0206, "inbatch_pos_score": 38.5625, "learning_rate": 5.555555555555556e-08, "loss": 2.1568, "norm_diff": 0.056, "num_tokens_overlap": 5.5847, "num_tokens_union": 55.0741, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27058.781, "preclip_grad_norm_avg": 0.0002, "query_norm": 6.2624, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7318, "sent_len_1": 66.8855, "sent_len_max_0": 18.7888, "sent_len_max_1": 189.58, "stdk": 0.0466, "stdq": 0.039, "stdqueue_k": 0.0, "step": 99900 }, { "accuracy": 57.5195, "doc_norm": 6.3154, "encoder_q-embeddings": 17364.6621, "encoder_q-layer.0": 12427.4688, "encoder_q-layer.1": 13218.1592, "encoder_q-layer.10": 19979.6484, "encoder_q-layer.11": 45256.1172, "encoder_q-layer.2": 14226.3164, "encoder_q-layer.3": 14070.4756, "encoder_q-layer.4": 14909.583, "encoder_q-layer.5": 14849.7197, "encoder_q-layer.6": 15248.4375, "encoder_q-layer.7": 16287.5176, "encoder_q-layer.8": 18675.7754, "encoder_q-layer.9": 16853.0645, "epoch": 0.98, "inbatch_neg_score": 38.0317, "inbatch_pos_score": 38.5625, "learning_rate": 0.0, "loss": 2.1496, "norm_diff": 0.0538, "num_tokens_overlap": 5.5778, "num_tokens_union": 55.0433, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28494.0764, "preclip_grad_norm_avg": 0.0003, "query_norm": 6.2615, "queue_k_norm": 0.0, "queue_ptr": 0.0, "sent_len_0": 9.7174, "sent_len_1": 66.8483, "sent_len_max_0": 18.8575, "sent_len_max_1": 189.4588, "stdk": 0.0465, "stdq": 0.0383, "stdqueue_k": 0.0, "step": 100000 }, { "dev_runtime": 28.4516, "dev_samples_per_second": 2.249, "dev_steps_per_second": 0.035, "epoch": 0.98, "step": 100000, "test_accuracy": 8.68377685546875, "test_doc_norm": 6.305839538574219, "test_inbatch_neg_score": 38.608299255371094, "test_inbatch_pos_score": 39.42621994018555, "test_loss": 4.181252956390381, "test_norm_diff": 0.002082139253616333, "test_query_norm": 6.30559778213501, "test_queue_k_norm": 0.0, "test_stdk": 0.03822837769985199, "test_stdq": 0.038172926753759384, "test_stdqueue_k": 0.0 }, { "dev_runtime": 28.4516, "dev_samples_per_second": 2.249, "dev_steps_per_second": 0.035, "epoch": 0.98, "eval_beir-arguana_ndcg@10": 0.36665, "eval_beir-arguana_recall@10": 0.65292, "eval_beir-arguana_recall@100": 0.96728, "eval_beir-arguana_recall@20": 0.81935, "eval_beir-avg_ndcg@10": 0.38986658333333335, "eval_beir-avg_recall@10": 0.46193975, "eval_beir-avg_recall@100": 0.6502833333333334, "eval_beir-avg_recall@20": 0.5300694166666666, "eval_beir-cqadupstack_ndcg@10": 0.3031158333333333, "eval_beir-cqadupstack_recall@10": 0.40381749999999994, "eval_beir-cqadupstack_recall@100": 0.6389233333333334, "eval_beir-cqadupstack_recall@20": 0.4736041666666666, "eval_beir-fiqa_ndcg@10": 0.28062, "eval_beir-fiqa_recall@10": 0.35412, "eval_beir-fiqa_recall@100": 0.61765, "eval_beir-fiqa_recall@20": 0.43214, "eval_beir-nfcorpus_ndcg@10": 0.34134, "eval_beir-nfcorpus_recall@10": 0.16925, "eval_beir-nfcorpus_recall@100": 0.31962, "eval_beir-nfcorpus_recall@20": 0.20788, "eval_beir-nq_ndcg@10": 0.27574, "eval_beir-nq_recall@10": 0.46125, "eval_beir-nq_recall@100": 0.7927, "eval_beir-nq_recall@20": 0.59007, "eval_beir-quora_ndcg@10": 0.78549, "eval_beir-quora_recall@10": 0.88855, "eval_beir-quora_recall@100": 0.9752, "eval_beir-quora_recall@20": 0.92615, "eval_beir-scidocs_ndcg@10": 0.15962, "eval_beir-scidocs_recall@10": 0.16823, "eval_beir-scidocs_recall@100": 0.39283, "eval_beir-scidocs_recall@20": 0.22532, "eval_beir-scifact_ndcg@10": 0.618, "eval_beir-scifact_recall@10": 0.76, "eval_beir-scifact_recall@100": 0.911, "eval_beir-scifact_recall@20": 0.82856, "eval_beir-trec-covid_ndcg@10": 0.58244, "eval_beir-trec-covid_recall@10": 0.628, "eval_beir-trec-covid_recall@100": 0.4492, "eval_beir-trec-covid_recall@20": 0.588, "eval_beir-webis-touche2020_ndcg@10": 0.18565, "eval_beir-webis-touche2020_recall@10": 0.13326, "eval_beir-webis-touche2020_recall@100": 0.43843, "eval_beir-webis-touche2020_recall@20": 0.20962, "eval_senteval-avg_sts": 0.7377226490903321, "eval_senteval-sickr_spearman": 0.7323591663810709, "eval_senteval-stsb_spearman": 0.7430861317995934, "step": 100000, "test_accuracy": 8.68377685546875, "test_doc_norm": 6.305839538574219, "test_inbatch_neg_score": 38.608299255371094, "test_inbatch_pos_score": 39.42621994018555, "test_loss": 4.181252956390381, "test_norm_diff": 0.002082139253616333, "test_query_norm": 6.30559778213501, "test_queue_k_norm": 0.0, "test_stdk": 0.03822837769985199, "test_stdq": 0.038172926753759384, "test_stdqueue_k": 0.0 }, { "epoch": 0.98, "step": 100000, "total_flos": 0, "train_runtime": 82414.2278, "train_samples_per_second": 1.213 } ], "max_steps": 100000, "num_train_epochs": 1, "total_flos": 0, "trial_name": null, "trial_params": null }